|
19 | 19 | graph = Neo4jGraph(); |
20 | 20 |
|
21 | 21 |
|
22 | | -def extract_graph_from_file(uri, userName, password, files): |
| 22 | +def extract_graph_from_file(uri, userName, password, file): |
23 | 23 | try: |
24 | | - for file in files: |
25 | | - start_time = datetime.now() |
26 | | - job_status = "In-Progess" |
27 | | - file_type = file.filename.split('.')[1] |
28 | | - file_size = round(file.size/(1<<10), 2) |
29 | | - file_name = file.filename |
| 24 | + start_time = datetime.now() |
| 25 | + job_status = "In-Progess" |
| 26 | + file_type = file.filename.split('.')[1] |
| 27 | + file_size = round(file.size/(1<<10), 2) |
| 28 | + file_name = file.filename |
30 | 29 |
|
31 | | - diffbot_api_key = os.environ.get('DIFFBOT_API_KEY') |
32 | | - diffbot_nlp = DiffbotGraphTransformer(diffbot_api_key=diffbot_api_key) |
33 | | - |
34 | | - graph = Neo4jGraph(url=uri, username=userName, password=password) |
| 30 | + diffbot_api_key = os.environ.get('DIFFBOT_API_KEY') |
| 31 | + diffbot_nlp = DiffbotGraphTransformer(diffbot_api_key=diffbot_api_key) |
| 32 | + |
| 33 | + graph = Neo4jGraph(url=uri, username=userName, password=password) |
35 | 34 |
|
36 | | - metadata = {"source": "local","filename": file.filename, "filesize":file.size } |
37 | | - |
38 | | - source_node = "fileName: '{}'" |
39 | | - update_node_prop = "SET s.fileSize = '{} KB', s.fileType = '{}' ,s.createdAt ='{}',s.status = '{}',s.nodeCount= 0, s.relationshipCount = 0" |
40 | | - #create source node as file name if not exist |
41 | | - graph.query('MERGE(s:Source {'+source_node.format(file_name)+'}) '+update_node_prop.format(file_size,file_type,start_time,job_status)) |
| 35 | + metadata = {"source": "local","filename": file.filename, "filesize":file.size } |
| 36 | + |
| 37 | + source_node = "fileName: '{}'" |
| 38 | + update_node_prop = "SET s.fileSize = '{} KB', s.fileType = '{}' ,s.createdAt ='{}',s.status = '{}',s.nodeCount= 0, s.relationshipCount = 0" |
| 39 | + #create source node as file name if not exist |
| 40 | + graph.query('MERGE(s:Source {'+source_node.format(file_name)+'}) '+update_node_prop.format(file_size,file_type,start_time,job_status)) |
42 | 41 |
|
43 | | - with open('temp.pdf','wb') as f: |
44 | | - f.write(file.file.read()) |
45 | | - loader = PyPDFLoader('temp.pdf') |
46 | | - pages = loader.load_and_split() |
47 | | - |
48 | | - for i in range(0,len(pages)): |
49 | | - pages[i]=Document(page_content=pages[i].page_content.replace('\n',' '), metadata=metadata) |
50 | | - |
51 | | - graph_documents = diffbot_nlp.convert_to_graph_documents(pages) |
52 | | - # print(graph_documents) |
53 | | - graph.add_graph_documents(graph_documents) |
| 42 | + with open('temp.pdf','wb') as f: |
| 43 | + f.write(file.file.read()) |
| 44 | + loader = PyPDFLoader('temp.pdf') |
| 45 | + pages = loader.load_and_split() |
| 46 | + |
| 47 | + for i in range(0,len(pages)): |
| 48 | + pages[i]=Document(page_content=pages[i].page_content.replace('\n',' '), metadata=metadata) |
| 49 | + |
| 50 | + graph_documents = diffbot_nlp.convert_to_graph_documents(pages) |
| 51 | + # print(graph_documents) |
| 52 | + graph.add_graph_documents(graph_documents) |
54 | 53 |
|
55 | | - graph.refresh_schema() |
56 | | - # count the length list of nodes and relationship based on document generated by Diffbot |
57 | | - nodes_created =len(graph_documents[0].nodes) |
58 | | - relationships_created = len(graph_documents[0].relationships) |
59 | | - |
60 | | - end_time = datetime.now() |
61 | | - processed_time = end_time - start_time |
62 | | - job_status = "Completed" |
63 | | - error_message ="" |
| 54 | + graph.refresh_schema() |
| 55 | + # count the length list of nodes and relationship based on document generated by Diffbot |
| 56 | + nodes_created =len(graph_documents[0].nodes) |
| 57 | + relationships_created = len(graph_documents[0].relationships) |
| 58 | + |
| 59 | + end_time = datetime.now() |
| 60 | + processed_time = end_time - start_time |
| 61 | + job_status = "Completed" |
| 62 | + error_message ="" |
64 | 63 |
|
65 | | - update_node_prop = "SET s.fileSize = '{} KB', s.fileType = '{}' ,s.createdAt ='{}', s.updatedAt = '{}', s.processingTime = '{}',s.status = '{}', s.errorMessgae = '{}',s.nodeCount= {}, s.relationshipCount = {}" |
66 | | - graph.query('MERGE(s:Source {'+source_node.format(file_name)+'}) '+update_node_prop.format(file_size,file_type,start_time,end_time,round(processed_time.total_seconds(),2),job_status,error_message,nodes_created,relationships_created)) |
| 64 | + update_node_prop = "SET s.fileSize = '{} KB', s.fileType = '{}' ,s.createdAt ='{}', s.updatedAt = '{}', s.processingTime = '{}',s.status = '{}', s.errorMessgae = '{}',s.nodeCount= {}, s.relationshipCount = {}" |
| 65 | + graph.query('MERGE(s:Source {'+source_node.format(file_name)+'}) '+update_node_prop.format(file_size,file_type,start_time,end_time,round(processed_time.total_seconds(),2),job_status,error_message,nodes_created,relationships_created)) |
67 | 66 |
|
68 | | - output = { |
69 | | - "nodeCount": nodes_created, |
70 | | - "relationshipCount": relationships_created, |
71 | | - "processingTime": round(processed_time.total_seconds(),2), |
72 | | - "status" : job_status |
73 | | - } |
| 67 | + output = { |
| 68 | + "fileSize":file_size+' KB', |
| 69 | + "nodeCount": nodes_created, |
| 70 | + "relationshipCount": relationships_created, |
| 71 | + "processingTime": round(processed_time.total_seconds(),2), |
| 72 | + "status" : job_status |
| 73 | + } |
74 | 74 |
|
75 | 75 | return JSONResponse(content=jsonable_encoder(output)) |
76 | 76 | except Exception as e: |
|
0 commit comments