mongodb-developer · mscarpenter · Nov 24, 2025
@@ -1 +1,83 @@
-# local-rag-deepseek-mongodb
+# Local RAG with PDF, Ollama, and MongoDB Atlas
+
+This application demonstrates a Retrieval-Augmented Generation (RAG) pipeline using **Ollama** for local LLMs and embeddings, and **MongoDB Atlas** as the vector store. It allows users to upload a PDF, index its content, and ask questions based on the document's context.
+
+## Features
+
+- **PDF Ingestion**: Upload and parse PDF documents.
+- **Chunking & Embedding**: Splits text into manageable chunks and generates embeddings using Ollama.
+- **Vector Storage**: Stores embeddings in MongoDB Atlas Vector Search.
+- **Context-Aware QA**: Retrieves relevant context to answer user queries using a local LLM.
+- **Conversation History**: Maintains context across multiple turns of conversation.
+
+## Prerequisites
+
+Before running this application, ensure you have the following:
+
+1.  **Python 3.9+**: Installed on your system.
+2.  **MongoDB Atlas Cluster**:
+    - Create a [free account](https://www.mongodb.com/cloud/atlas/register).
+    - Deploy a cluster (M0 sandbox is sufficient).
+    - Get your connection string.
+3.  **Ollama**:
+    - Download and install [Ollama](https://ollama.com/).
+    - Pull the required models:
+      ```bash
+      ollama pull llama3
+      ollama pull nomic-embed-text
+      ```
+      *(Note: You can configure different models in `config.yaml`)*
+
+## Installation
+
+1.  **Clone the repository** (if you haven't already):
+    ```bash
+    git clone <repository-url>
+    cd apps/local-rag-pdf
+    ```
+
+2.  **Create a virtual environment**:
+    ```bash
+    python -m venv .venv
+    source .venv/bin/activate  # On Windows: .venv\Scripts\activate
+    ```
+
+3.  **Install dependencies**:
+    ```bash
+    pip install -r requirements.txt
+    ```
+
+4.  **Configure the application**:
+    - Open `config.yaml`.
+    - Update `mongo_connection_str` with your Atlas connection string.
+    - (Optional) Change `llm_model` or `embedding_model` if you want to use different Ollama models.
+
+## Usage
+
+1.  **Run the application**:
+    ```bash
+    streamlit run app.py
+    ```
+
+2.  **Interact with the UI**:
+    - Upload a PDF file using the sidebar.
+    - Wait for the ingestion process to complete (check the logs in the terminal).
+    - Type your question in the chat input box.
+
+## Architecture
+
+1.  **User** uploads a PDF.
+2.  **PyMuPDF** extracts text from the PDF.
+3.  **LangChain** splits the text into chunks.
+4.  **Ollama** generates vector embeddings for each chunk.
+5.  **MongoDB Atlas** stores these embeddings.
+6.  When a **User** asks a question:
+    - The question is embedded using **Ollama**.
+    - **MongoDB Atlas** performs a vector search to find relevant chunks.
+    - The retrieved chunks + the question are sent to the **Ollama** LLM.
+    - The LLM generates a response based on the context.
+
+## Troubleshooting
+
+-   **Connection Error**: Ensure your IP address is whitelisted in MongoDB Atlas Network Access.
+-   **Ollama Error**: Make sure the Ollama service is running locally (`ollama serve`).
@@ -63,6 +63,8 @@ def process_query():
                         conversation_history=conversation_history,
                         k=st.session_state["retrieval_k"],
                         score_threshold=st.session_state["retrieval_threshold"],
+                        search_type=st.session_state.get("search_type", "similarity"),
+                        lambda_mult=st.session_state.get("lambda_mult", 0.5),
                     )
                 except ValueError as e:
                     agent_text = str(e)
@@ -141,6 +143,32 @@ def page():
     # Display messages and text input
     display_messages()
 
+    # Sidebar settings
+    with st.sidebar:
+        st.header("Retrieval Settings")
+        search_type = st.radio(
+            "Search Type",
+            options=["similarity", "mmr"],
+            format_func=lambda x: "Similarity"
+            if x == "similarity"
+            else "MMR (Diversity)",
+            index=0,
+        )
+
+        lambda_mult = 0.5
+        if search_type == "mmr":
+            lambda_mult = st.slider(
+                "Diversity (Lambda)",
+                min_value=0.0,
+                max_value=1.0,
+                value=0.5,
+                step=0.1,
+                help="0.0 = Maximum Diversity, 1.0 = Maximum Relevance",
+            )
+
+        st.session_state["search_type"] = search_type
+        st.session_state["lambda_mult"] = lambda_mult
+
     # Accept user input using the new chat input
     prompt = st.chat_input("Type your message here...")
     if prompt:

@@ -2,15 +2,15 @@
 from typing import Optional
 
 import yaml
-from langchain.schema.output_parser import StrOutputParser
-from langchain.schema.runnable import RunnablePassthrough
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain_community.document_loaders import PyPDFLoader
+from langchain_community.document_loaders import PyMuPDFLoader
 from langchain_community.vectorstores.utils import filter_complex_metadata
 from langchain_core.globals import set_debug, set_verbose
+from langchain_core.output_parsers import StrOutputParser
 from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.runnables import RunnablePassthrough
 from langchain_mongodb.vectorstores import MongoDBAtlasVectorSearch
 from langchain_ollama import ChatOllama, OllamaEmbeddings
+from langchain_text_splitters import RecursiveCharacterTextSplitter
 from pymongo import MongoClient
 
 # Enable verbose debugging
@@ -38,6 +38,18 @@ def __init__(self, config_file: str = "config.yaml"):
         config = load_config(config_file)
 
         # Read values from config
+        required_keys = [
+            "llm_model",
+            "embedding_model",
+            "mongo_connection_str",
+            "database_name",
+            "collection_name",
+        ]
+        for key in required_keys:
+            if key not in config:
+                logger.error(f"Missing configuration key: {key}")
+                raise KeyError(f"Missing configuration key: {key}")
+
         llm_model = config["llm_model"]
         embedding_model = config["embedding_model"]
         mongo_connection_str = config["mongo_connection_str"]
@@ -96,30 +108,40 @@ def upload_and_index_pdf(self, pdf_file_path: str):
         """
         Upload and index a PDF file, chunk its contents, and store the embeddings in MongoDB Atlas.
         """
-        logger.info(f"Starting ingestion for file: {pdf_file_path}")
-        docs = PyPDFLoader(file_path=pdf_file_path).load()
+        try:
+            logger.info(f"Starting ingestion for file: {pdf_file_path}")
+            docs = PyMuPDFLoader(file_path=pdf_file_path).load()
+
+            if not docs:
+                logger.warning(f"No content found in file: {pdf_file_path}")
+                return
 
-        logger.info(f"Loaded {len(docs)} pages from {pdf_file_path}")
+            logger.info(f"Loaded {len(docs)} pages from {pdf_file_path}")
 
-        chunks = self.text_splitter.split_documents(docs)
-        logger.info(f"Split into {len(chunks)} document chunks")
+            chunks = self.text_splitter.split_documents(docs)
+            logger.info(f"Split into {len(chunks)} document chunks")
 
-        # Optional: Log some sample chunks for verification
-        for i, chunk in enumerate(chunks[:3]):
-            logger.debug(f"Chunk {i+1} Content: {chunk.page_content[:200]}...")
+            # Optional: Log some sample chunks for verification
+            for i, chunk in enumerate(chunks[:3]):
+                logger.debug(f"Chunk {i+1} Content: {chunk.page_content[:200]}...")
 
-        chunks = filter_complex_metadata(chunks)
+            chunks = filter_complex_metadata(chunks)
 
-        # Add documents to vector store and check embeddings
-        self.vector_store.add_documents(documents=chunks)
-        logger.info("Document embeddings stored successfully in MongoDB Atlas.")
+            # Add documents to vector store and check embeddings
+            self.vector_store.add_documents(documents=chunks)
+            logger.info("Document embeddings stored successfully in MongoDB Atlas.")
+        except Exception as e:
+            logger.error(f"Failed to ingest PDF file {pdf_file_path}: {e}")
+            raise
 
     def query_with_context(
         self,
         query: str,
         conversation_history: Optional[list] = None,
         k: int = 5,
         score_threshold: float = 0.2,
+        search_type: str = "similarity",
+        lambda_mult: float = 0.5,
     ):
         """
         Answer a query using the RAG pipeline with verbose debugging and conversation history.
@@ -129,18 +151,35 @@ def query_with_context(
         - conversation_history (list): List of previous messages in the conversation.
         - k (int): Number of retrieved documents.
         - score_threshold (float): Similarity score threshold for retrieval.
+        - search_type (str): Type of search ("similarity" or "mmr").
+        - lambda_mult (float): Diversity factor for MMR (0.0 to 1.0).
 
         Returns:
         - str: The assistant's response.
         """
         if not self.vector_store:
             raise ValueError("No vector store found. Please ingest a document first.")
 
-        if not self.retriever:
-            self.retriever = self.vector_store.as_retriever(
-                search_type="similarity_score_threshold",
-                search_kwargs={"k": k, "score_threshold": score_threshold},
-            )
+        # Reset retriever if search parameters change (simplified approach)
+        # In a more complex app, we might check if params changed.
+        # Here we just re-create it to be safe and simple.
+        search_kwargs = {"k": k}
+
+        if search_type == "similarity":
+            search_kwargs["score_threshold"] = score_threshold
+            search_type_arg = "similarity_score_threshold"
+        elif search_type == "mmr":
+            search_kwargs["lambda_mult"] = lambda_mult
+            search_type_arg = "mmr"
+        else:
+            # Fallback
+            search_kwargs["score_threshold"] = score_threshold
+            search_type_arg = "similarity_score_threshold"
+
+        self.retriever = self.vector_store.as_retriever(
+            search_type=search_type_arg,
+            search_kwargs=search_kwargs,
+        )
 
         # Generate and log query embeddings
         query_embedding = self.embeddings.embed_query(query)

@@ -4,4 +4,4 @@ langchain_ollama
 langchain_community
 langchain-mongodb
 pymongo
-pypdf
+pymupdf
-Original file line number
+Diff line change
@@ Expand Up / @@ -4,4 +4,4 @@ langchain_ollama @@
     langchain_community
     langchain-mongodb
     pymongo
-    pypdf
+    pymupdf