Skip to content

Commit 2f00eb6

Browse files
committed
Add add_cited_patents() and refactor cited patent functions
1 parent d629313 commit 2f00eb6

File tree

1 file changed

+52
-14
lines changed

1 file changed

+52
-14
lines changed

src/main.py

Lines changed: 52 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -242,8 +242,34 @@ def get_company_primary_id(name):
242242
return None
243243

244244

245-
def add_cited_patents(patents_list, limit=25, verbose=False):
245+
def fetch_all_cited_patent_numbers_for_all_patents_in_db(verbose=False):
246+
l = []
247+
for number in session.query(Patent.patent_number).all():
248+
l.append(number.patent_number)
249+
add_cited_patent_numbers(l, verbose=verbose)
250+
251+
252+
def add_cited_patents(limit=25, verbose=False):
253+
# This function populates the patents table with the missing information for the
254+
# patent numbers found in the cited_patents table
255+
# TODO refactor this function to accept a list of patents
256+
results_format = ('["patent_number","patent_date","patent_year","assignee_organization","app_date",'
257+
'"patent_title","uspc_mainclass_id","assignee_first_name","assignee_last_name"]'
258+
)
259+
patents_in_db = session.query(Patent.patent_number)
260+
cited_patents_to_add = [x.cited_patent_number for x in session.query(CitedPatent.cited_patent_number)\
261+
.filter(~CitedPatent.cited_patent_number.in_(patents_in_db)).all()]
262+
for patents in fetch_patents_by_number(cited_patents_to_add, results_format , limit=limit, verbose=verbose):
263+
add_patents(patents)
264+
265+
266+
def add_cited_patent_numbers(patents_list, limit=25, verbose=False):
246267
results_format = '["patent_number","cited_patent_number"]'
268+
for patents in fetch_patents_by_number(patents_list, results_format, limit=limit, verbose=verbose):
269+
add_cited_patent_numbers_to_db(patents)
270+
271+
272+
def fetch_patents_by_number(patents_list, results_format, limit=25, verbose=False):
247273
q_list = ['"%s"' % patent_number for patent_number in patents_list]
248274
q_str = '{"patent_number":[%s]}' % ",".join(q_list)
249275

@@ -260,6 +286,7 @@ def add_cited_patents(patents_list, limit=25, verbose=False):
260286
response = patentsview_get_request(patent_search_endpoint, q_str, results_format, verbose=verbose)
261287
results = json.loads(response)
262288
patents = results['patents']
289+
yield patents
263290
else:
264291
if limit and ((endpoint_length // 2000) < (endpoint_length // limit)):
265292
number_of_chunks = endpoint_length // limit + 1
@@ -274,16 +301,29 @@ def add_cited_patents(patents_list, limit=25, verbose=False):
274301
q_str = '{"patent_number":[%s]}' % ",".join(q_list[start_index:end_index])
275302
response = patentsview_get_request(patent_search_endpoint, q_str, results_format, verbose=verbose)
276303
results = json.loads(response)
277-
print(results)
304+
if verbose:
305+
print(results)
278306
if results['patents']:
279307
patents += results['patents']
280308

309+
# This is to potentially avoid a "Segmentation Fault (core dumped)" error
310+
311+
# TODO change this to an implementation that is more programmatic
312+
if len(patents) >= 1000:
313+
yield patents
314+
patents = []
315+
yield patents
316+
317+
318+
def add_cited_patent_numbers_to_db(citing_patent_numbers: List) -> None:
319+
print("Adding cited patent numbers to db.")
281320
# Patents that are already in the db
282-
cited_patents_in_db = [(x.citing_patent_number, x.cited_patent_number) for x in session.query(CitedPatent).all()]
321+
cited_patents_in_db = [(x.citing_patent_number, x.cited_patent_number) for x in
322+
session.query(CitedPatent).all()]
283323
# Patents fetched
284324
cited_patent_objects = []
285325
# Add ALL cited patents to cited_patent_objects list
286-
for patent in patents:
326+
for patent in citing_patent_numbers:
287327
patent_number = patent["patent_number"]
288328
for cited_patent_number in patent["cited_patents"]:
289329
# Check if there are cited patents in the results and if they are already in the database
@@ -367,7 +407,7 @@ def add_patents(patents):
367407
session.commit()
368408

369409

370-
def fetch_patents_for_all_companies_in_db(resume_from_company_id=None):
410+
def fetch_patents_for_all_companies_in_db(resume_from_company_id=None, verbose=False):
371411
if resume_from_company_id and type(resume_from_company_id) == int:
372412
company_query = session.query(Company.id).filter(Company.id >= resume_from_company_id).order_by(Company.id.asc()).all()
373413
else:
@@ -380,23 +420,16 @@ def fetch_patents_for_all_companies_in_db(resume_from_company_id=None):
380420
alternate_names = session.query(AlternateName.name, AlternateName.id).filter_by(company_id=company_id).all()
381421

382422
for org in primary_names:
383-
patents = get_all_company_patents(org[0], verbose=True)
423+
patents = get_all_company_patents(org[0], verbose=verbose)
384424
if patents:
385425
add_patents(patents)
386426

387427
for org, alternate_name_id in alternate_names:
388-
patents = get_all_company_patents(org, verbose=True)
428+
patents = get_all_company_patents(org, verbose=verbose)
389429
if patents:
390430
add_patents(patents)
391431

392432

393-
def fetch_all_cited_patent_numbers_for_all_patents_in_db():
394-
l = []
395-
for number in session.query(Patent.patent_number).all():
396-
l.append(number.patent_number)
397-
add_cited_patents(l, verbose=True)
398-
399-
400433
def main():
401434
options = get_options()
402435

@@ -416,6 +449,7 @@ def main():
416449
end_date = options.end_date[0]
417450

418451
# TODO: implement functionality that uses the Start and End dates
452+
"""
419453
if options.fetch_patents_for_all_companies:
420454
company_id = options.resume_from_company_id
421455
if company_id:
@@ -425,6 +459,10 @@ def main():
425459
print("Fetching patents for all companies in the database.")
426460
fetch_patents_for_all_companies_in_db()
427461
462+
fetch_all_cited_patent_numbers_for_all_patents_in_db()
463+
"""
464+
add_cited_patents()
465+
428466

429467
def get_options():
430468
parser = argparse.ArgumentParser(description="A script that calls the PatentsView API.",

0 commit comments

Comments
 (0)