diff --git a/README.md b/README.md index 2d6eaabaa..29d54b452 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,30 @@ Transform unstructured data (PDFs, DOCs, TXT, YouTube videos, web pages, etc.) i This application allows you to upload files from various sources (local machine, GCS, S3 bucket, or web sources), choose your preferred LLM model, and generate a Knowledge Graph. ---- +## Getting Started + +### **Prerequisites** +- **Python 3.12 or higher** (for local/separate backend deployment) +- Neo4j Database **5.23 or later** with APOC installed. + - **Neo4j Aura** databases (including the free tier) are supported. + - If using **Neo4j Desktop**, you will need to deploy the backend and frontend separately (docker-compose is not supported). + +#### **Backend Setup** +1. Create the `.env` file in the `backend` folder by copying `backend/example.env`. +2. Preconfigure user credentials in the `.env` file to bypass the login dialog: + ```bash + NEO4J_URI= + NEO4J_USERNAME= + NEO4J_PASSWORD= + NEO4J_DATABASE= + ``` +3. Run: + ```bash + cd backend + python3.12 -m venv venv + source venv/bin/activate # On Windows: venv\Scripts\activate + pip install -r requirements.txt -c constraints.txt + uvicorn score:app --reload ## Key Features diff --git a/backend/Dockerfile b/backend/Dockerfile index b031e4425..1b97767b1 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.10-slim +FROM python:3.12-slim WORKDIR /code ENV PORT 8000 EXPOSE 8000 diff --git a/backend/README.md b/backend/README.md index 1ab091216..4667f47f6 100644 --- a/backend/README.md +++ b/backend/README.md @@ -1,6 +1,11 @@ # Project Overview Welcome to our project! This project is built using FastAPI framework to create a fast and modern API with Python. +## Prerequisites + +- Python 3.12 or higher +- pip (Python package manager) + ## Feature API Endpoint : This project provides various API endpoint to perform specific tasks. Data Validation : Utilize FastAPI data validation and serialization feature. @@ -16,9 +21,14 @@ Follow these steps to set up and run the project locally: > cd llm-graph-builder -2. Install Dependency : +2. Create a virtual environment (recommended): + +> python3.12 -m venv venv +> source venv/bin/activate # On Windows: venv\Scripts\activate + +3. Install Dependency : -> pip install -t requirements.txt +> pip install -r requirements.txt -c constraints.txt ## Run backend project using unicorn Run the server: diff --git a/backend/requirements.txt b/backend/requirements.txt index ffb6f04a9..8a5d3bc79 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -1,65 +1,64 @@ -accelerate==1.7.0 -asyncio==3.4.3 -boto3==1.38.36 -botocore==1.38.36 -certifi==2025.6.15 -fastapi==0.115.12 +accelerate==1.12.0 +asyncio==4.0.0 +boto3==1.40.23 +botocore==1.40.23 +certifi==2025.8.3 +fastapi==0.116.1 fastapi-health==0.4.0 fireworks-ai==0.15.12 google-api-core==2.25.1 google-auth==2.40.3 google_auth_oauthlib==1.2.2 google-cloud-core==2.4.3 -json-repair==0.39.1 +json-repair==0.44.1 pip-install==1.3.5 -langchain==0.3.25 -langchain-aws==0.2.25 -langchain-anthropic==0.3.15 -langchain-fireworks==0.3.0 -langchain-community==0.3.25 -langchain-core==0.3.65 -langchain-experimental==0.3.4 -langchain-google-vertexai==2.0.25 -langchain-groq==0.3.2 -langchain-openai==0.3.23 -langchain-text-splitters==0.3.8 -langchain-huggingface==0.3.0 +langchain==1.1.2 +langchain-aws==1.1.0 +langchain-anthropic==1.2.0 +langchain-fireworks==1.1.0 +langchain-community==0.4.1 +langchain-core==1.1.1 +langchain-experimental==0.4.0 +langchain-google-vertexai==3.1.1 +langchain-groq==1.1.0 +langchain-openai==1.1.0 +langchain-text-splitters==1.0.0 +langchain-huggingface==1.1.0 +langchain-classic==1.0.0 langdetect==1.0.9 -langsmith==0.3.45 -langserve==0.3.1 -neo4j-rust-ext==5.28.1.0 +langsmith==0.4.55 +langserve==0.3.3 +neo4j-rust-ext==5.28.2.1 nltk==3.9.1 -openai==1.86.0 -opencv-python==4.11.0.86 +openai==2.9.0 psutil==7.0.0 -pydantic==2.11.7 -python-dotenv==1.1.0 +pydantic==2.12.5 +python-dotenv==1.1.1 python-magic==0.4.27 PyPDF2==3.0.1 -PyMuPDF==1.26.1 -starlette==0.46.2 -sse-starlette==2.3.6 +PyMuPDF==1.26.4 +starlette==0.47.3 +sse-starlette==3.0.2 starlette-session==0.4.3 tqdm==4.67.1 -unstructured[all-docs] -unstructured==0.17.2 -unstructured-client==0.36.0 +unstructured[all-docs]==0.18.14 +unstructured-client==0.42.3 unstructured-inference==1.0.5 -urllib3==2.4.0 -uvicorn==0.34.3 +urllib3==2.5.0 +uvicorn==0.35.0 gunicorn==23.0.0 wikipedia==1.4.0 -wrapt==1.17.2 +wrapt==1.17.3 yarl==1.20.1 -youtube-transcript-api==1.1.0 +youtube-transcript-api==1.2.2 zipp==3.23.0 -sentence-transformers==5.0.0 +sentence-transformers==5.1.0 google-cloud-logging==3.12.1 pypandoc==1.15 -graphdatascience==1.15.1 -Secweb==1.18.1 -ragas==0.3.1 +graphdatascience==1.18a1 +Secweb==1.25.2 +ragas==0.3.2 rouge_score==0.1.2 -langchain-neo4j==0.4.0 +langchain-neo4j==0.6.0 pypandoc-binary==1.15 chardet==5.2.0 \ No newline at end of file diff --git a/backend/src/QA_integration.py b/backend/src/QA_integration.py index 1a9e24eb3..fe811833a 100644 --- a/backend/src/QA_integration.py +++ b/backend/src/QA_integration.py @@ -11,12 +11,11 @@ from langchain_neo4j import Neo4jVector from langchain_neo4j import Neo4jChatMessageHistory from langchain_neo4j import GraphCypherQAChain -from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder +from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder from langchain_core.output_parsers import StrOutputParser from langchain_core.runnables import RunnableBranch -from langchain.retrievers import ContextualCompressionRetriever -from langchain_community.document_transformers import EmbeddingsRedundantFilter -from langchain.retrievers.document_compressors import EmbeddingsFilter, DocumentCompressorPipeline +from langchain_classic.retrievers import ContextualCompressionRetriever +from langchain_classic.retrievers.document_compressors import EmbeddingsFilter, DocumentCompressorPipeline from langchain_text_splitters import TokenTextSplitter from langchain_core.messages import HumanMessage, AIMessage from langchain_community.chat_message_histories import ChatMessageHistory diff --git a/backend/src/create_chunks.py b/backend/src/create_chunks.py index 523d2b77c..63dc8ada8 100644 --- a/backend/src/create_chunks.py +++ b/backend/src/create_chunks.py @@ -1,5 +1,5 @@ from langchain_text_splitters import TokenTextSplitter -from langchain.docstore.document import Document +from langchain_core.documents import Document from langchain_neo4j import Neo4jGraph import logging from src.document_sources.youtube import get_chunks_with_timestamps, get_calculated_timestamps diff --git a/backend/src/document_sources/youtube.py b/backend/src/document_sources/youtube.py index 1c60a9b85..56ac8c0ef 100644 --- a/backend/src/document_sources/youtube.py +++ b/backend/src/document_sources/youtube.py @@ -1,4 +1,4 @@ -from langchain.docstore.document import Document +from langchain_core.documents import Document from src.shared.llm_graph_builder_exception import LLMGraphBuilderException from youtube_transcript_api import YouTubeTranscriptApi from youtube_transcript_api.proxies import GenericProxyConfig diff --git a/backend/src/llm.py b/backend/src/llm.py index 854e5926f..a52ce1851 100644 --- a/backend/src/llm.py +++ b/backend/src/llm.py @@ -1,5 +1,5 @@ import logging -from langchain.docstore.document import Document +from langchain_core.documents import Document import os from langchain_openai import ChatOpenAI, AzureChatOpenAI from langchain_google_vertexai import ChatVertexAI diff --git a/backend/src/make_relationships.py b/backend/src/make_relationships.py index bfb945617..f9c191169 100644 --- a/backend/src/make_relationships.py +++ b/backend/src/make_relationships.py @@ -1,6 +1,5 @@ from langchain_neo4j import Neo4jGraph -from langchain.docstore.document import Document -from src.shared.common_fn import load_embedding_model,execute_graph_query +from langchain_core.documents import Document from src.shared.common_fn import load_embedding_model,execute_graph_query import logging from typing import List @@ -34,7 +33,6 @@ def merge_relationship_between_chunk_and_entites(graph: Neo4jGraph, graph_docume MERGE (c)-[:HAS_ENTITY]->(n) """ execute_graph_query(graph,unwind_query, params={"batch_data": batch_data}) - execute_graph_query(graph,unwind_query, params={"batch_data": batch_data}) def create_chunk_embeddings(graph, chunkId_chunkDoc_list, file_name): @@ -61,7 +59,6 @@ def create_chunk_embeddings(graph, chunkId_chunkDoc_list, file_name): MERGE (c)-[:PART_OF]->(d) """ execute_graph_query(graph,query_to_create_embedding, params={"fileName":file_name, "data":data_for_query}) - execute_graph_query(graph,query_to_create_embedding, params={"fileName":file_name, "data":data_for_query}) def create_relation_between_chunks(graph, file_name, chunks: List[Document])->list: logging.info("creating FIRST_CHUNK and NEXT_CHUNK relationships between chunks") @@ -130,7 +127,6 @@ def create_relation_between_chunks(graph, file_name, chunks: List[Document])->li MERGE (c)-[:PART_OF]->(d) """ execute_graph_query(graph,query_to_create_chunk_and_PART_OF_relation, params={"batch_data": batch_data}) - execute_graph_query(graph,query_to_create_chunk_and_PART_OF_relation, params={"batch_data": batch_data}) query_to_create_FIRST_relation = """ UNWIND $relationships AS relationship @@ -140,7 +136,6 @@ def create_relation_between_chunks(graph, file_name, chunks: List[Document])->li MERGE (d)-[:FIRST_CHUNK]->(c)) """ execute_graph_query(graph,query_to_create_FIRST_relation, params={"f_name": file_name, "relationships": relationships}) - execute_graph_query(graph,query_to_create_FIRST_relation, params={"f_name": file_name, "relationships": relationships}) query_to_create_NEXT_CHUNK_relation = """ UNWIND $relationships AS relationship diff --git a/docs/project_docs.adoc b/docs/project_docs.adoc index 22e997217..4721f4d63 100644 --- a/docs/project_docs.adoc +++ b/docs/project_docs.adoc @@ -21,6 +21,11 @@ This document provides comprehensive documentation for the Neo4j llm-graph-build == Local Setup and Execution +Prerequisites: +- Python 3.12 or higher +- Node.js 20 or higher +- Docker (optional, for containerized deployment) + Run Docker Compose to build and start all components: .... docker-compose up --build @@ -38,8 +43,8 @@ yarn run dev ** For backend .... cd backend -python -m venv envName -source envName/bin/activate +python3.12 -m venv venv +source venv/bin/activate pip install -r requirements.txt uvicorn score:app --reload ....