1212import re
1313from typing import List
1414
15-
1615Base = declarative_base ()
1716engine = create_engine ('sqlite:///patentsview.db' )
1817
@@ -94,8 +93,8 @@ class CitedPatent(Base):
9493 )
9594
9695 id = Column (Integer , primary_key = True )
97- citing_patent_number = Column (String , ForeignKey ('patents.patent_number' ))
98- cited_patent_number = Column (String , ForeignKey ('patents.patent_number' ))
96+ citing_patent_number = Column (String ) # , ForeignKey('patents.patent_number'))
97+ cited_patent_number = Column (String ) # , ForeignKey('patents.patent_number'))
9998
10099 def __init__ (self , patent_number , cited_patent_number ):
101100 self .citing_patent_number = patent_number
@@ -107,12 +106,10 @@ def __init__(self, patent_number, cited_patent_number):
107106dbSession = sessionmaker (bind = engine )
108107session = dbSession ()
109108
110-
111109# setting for searching for company name
112110# e.g.: "_eq", "_begins", etc.
113111COMPANY_SEARCH_CRITERIA = '_eq'
114112
115-
116113# Application Variables
117114search_base_url = "https://dev.patentsview.org/"
118115patent_search_endpoint = search_base_url + "api/patents/query"
@@ -227,7 +224,7 @@ def insert_names(file_path):
227224 index = df .columns .get_loc ("Name 1" )
228225 primary_name = row [index ]
229226 primary_id = session .query (Company .id ).filter_by (name = primary_name ).scalar ()
230- alternate_names = [name for name in row [index + 1 :] if type (name ) == str ]
227+ alternate_names = [name for name in row [index + 1 :] if type (name ) == str ]
231228 insert_alternate_names (primary_id , alternate_names , False )
232229 session .commit ()
233230
@@ -244,7 +241,7 @@ def get_company_primary_id(name):
244241
245242def fetch_all_cited_patent_numbers_for_all_patents_in_db (verbose = False ):
246243 l = []
247- for number in session .query (Patent .patent_number ).all ():
244+ for number in session .query (Patent .patent_number ).distinct (). all ():
248245 l .append (number .patent_number )
249246 add_cited_patent_numbers (l , verbose = verbose )
250247
@@ -259,7 +256,7 @@ def add_cited_patents(limit=25, verbose=False):
259256 patents_in_db = session .query (Patent .patent_number )
260257 cited_patents_to_add = [x .cited_patent_number for x in session .query (CitedPatent .cited_patent_number )\
261258 .filter (~ CitedPatent .cited_patent_number .in_ (patents_in_db )).all ()]
262- for patents in fetch_patents_by_number (cited_patents_to_add , results_format , limit = limit , verbose = verbose ):
259+ for patents in fetch_patents_by_number (cited_patents_to_add , results_format , limit = limit , verbose = verbose ):
263260 add_patents (patents )
264261
265262
@@ -376,31 +373,31 @@ def add_patents(patents):
376373 else :
377374 # TODO find a company/patent that satisfies this path so that this can be tested
378375 # TODO handle case where there is no assignee organization, just an individual's first & last name
379- result = session .query (AlternateName .id , AlternateName .company_id )\
376+ result = session .query (AlternateName .id , AlternateName .company_id ) \
380377 .filter (func .lower (AlternateName .name ) == assignee_organization ).first ()
381378 if result :
382379 assignee_id = result .company_id
383380 assignee_alternate_id = result .id
384381
385- # If it is, add the record
386- if assignee_id :
387- p_obj = Patent ( patent_number = p [ "patent_number" ] ,
388- patent_title = p ["patent_title " ],
389- company_id = assignee_id ,
390- year = p [ "patent_year" ] ,
391- grant_date = p [ "patent_date" ] ,
392- uspc_class = uspc_main_classes ,
393- assignee_first_name = assignee_first_name ,
394- assignee_last_name = assignee_last_name ,
395- company_alternate_name_id = assignee_alternate_id
396- )
397-
398- # Check if the patent is already in the database; add it if it is not
399- # TODO: change this so that the database is not read so frequently from disk
400- if session . query ( Patent )\
401- . filter_by ( patent_number = p [ "patent_number" ], company_id = assignee_id ,
402- company_alternate_name_id = assignee_alternate_id ).first () is None :
403- patent_objects .append (p_obj )
382+ p_obj = Patent ( patent_number = p [ "patent_number" ],
383+ patent_title = p [ "patent_title" ],
384+ company_id = assignee_id ,
385+ year = p ["patent_year " ],
386+ grant_date = p [ "patent_date" ] ,
387+ uspc_class = uspc_main_classes ,
388+ assignee_first_name = assignee_first_name ,
389+ assignee_last_name = assignee_last_name ,
390+ company_alternate_name_id = assignee_alternate_id
391+ )
392+
393+ # Check if the patent is already in the database; add it if it is not
394+ # TODO: change this so that the database is not read so frequently from disk
395+ if session . query ( Patent ). filter_by ( patent_number = p [ "patent_number" ],
396+ company_id = assignee_id ,
397+ company_alternate_name_id = assignee_alternate_id ,
398+ assignee_first_name = assignee_first_name ,
399+ assignee_last_name = assignee_last_name , ).first () is None :
400+ patent_objects .append (p_obj )
404401
405402 # Save the patents
406403 session .bulk_save_objects (patent_objects )
@@ -409,7 +406,8 @@ def add_patents(patents):
409406
410407def fetch_patents_for_all_companies_in_db (resume_from_company_id = None , verbose = False ):
411408 if resume_from_company_id and type (resume_from_company_id ) == int :
412- company_query = session .query (Company .id ).filter (Company .id >= resume_from_company_id ).order_by (Company .id .asc ()).all ()
409+ company_query = session .query (Company .id ).filter (Company .id >= resume_from_company_id ).order_by (
410+ Company .id .asc ()).all ()
413411 else :
414412 company_query = session .query (Company .id ).order_by (Company .id .asc ()).all ()
415413
@@ -449,7 +447,6 @@ def main():
449447 end_date = options .end_date [0 ]
450448
451449 # TODO: implement functionality that uses the Start and End dates
452- """
453450 if options .fetch_patents_for_all_companies :
454451 company_id = options .resume_from_company_id
455452 if company_id :
@@ -460,14 +457,13 @@ def main():
460457 fetch_patents_for_all_companies_in_db ()
461458
462459 fetch_all_cited_patent_numbers_for_all_patents_in_db ()
463- """
464- add_cited_patents ()
460+ add_cited_patents (verbose = True )
465461
466462
467463def get_options ():
468464 parser = argparse .ArgumentParser (description = "A script that calls the PatentsView API." ,
469- # formatter_class=argparse.RawDescriptionHelpFormatter
470- )
465+ # formatter_class=argparse.RawDescriptionHelpFormatter
466+ )
471467
472468 parser .add_argument (
473469 '-p' , '--path' , type = str , metavar = "path" ,
0 commit comments