From 06e5ec0f16dff7502fa7681f36fd6889fe4e783c Mon Sep 17 00:00:00 2001
From: Prins Kumar <prins.kumar@agivant.com>
Date: Mon, 17 Nov 2025 17:07:34 +0530
Subject: [PATCH 01/20] Add S3 Bedrock BDA ingestion support with user
 confirmation and pymupdf4llm integration

---
 common/requirements.txt              |   3 +-
 common/utils/image_data_extractor.py | 163 +++---------
 common/utils/markdown_parsing.py     |  63 +++++
 common/utils/text_extractors.py      | 254 +++++++++---------
 graphrag-ui/src/pages/Setup.tsx      | 370 +++++++++++++--------------
 5 files changed, 403 insertions(+), 450 deletions(-)
 create mode 100644 common/utils/markdown_parsing.py

diff --git a/common/requirements.txt b/common/requirements.txt
index 562c2f6..f0022f3 100644
--- a/common/requirements.txt
+++ b/common/requirements.txt
@@ -110,7 +110,8 @@ packaging==24.2
 pandas==2.2.3
 #pathtools==0.1.2
 pillow==11.2.1
-PyMuPDF==1.26.4
+#PyMuPDF==1.26.4
+pymupdf4llm==0.2.0
 platformdirs==4.3.8
 pluggy==1.6.0
 prometheus_client==0.22.1
diff --git a/common/utils/image_data_extractor.py b/common/utils/image_data_extractor.py
index bde9c97..74e8d2f 100644
--- a/common/utils/image_data_extractor.py
+++ b/common/utils/image_data_extractor.py
@@ -11,155 +11,54 @@
 
 logger = logging.getLogger(__name__)
 
-
-
-def describe_image_with_llm(image_input):
+def describe_image_with_llm(file_path):
     """
-    Send image (pixmap or PIL image) to LLM vision model and return description.
-    Uses multimodal_service from config if available, otherwise falls back to completion_service.
-    Currently supports: OpenAI, Azure OpenAI, Google GenAI, and Google VertexAI
+    Read image file and convert to base64 to send to LLM.
     """
     try:
+        from PIL import Image as PILImage
+        
         client = get_multimodal_service()
         if not client:
             return "[Image: Failed to create multimodal LLM client]"
-        
+
+        # Read image and convert to base64
+        pil_image = PILImage.open(file_path)
         buffer = io.BytesIO()
-        # Convert to RGB if needed for better compatibility
-        if image_input.mode != 'RGB':
-            image_input = image_input.convert('RGB')
-        image_input.save(buffer, format="JPEG", quality=95)
-        b64_img = base64.b64encode(buffer.getvalue()).decode("utf-8")
+        if pil_image.mode != 'RGB':
+            pil_image = pil_image.convert('RGB')
+        pil_image.save(buffer, format="JPEG", quality=95)
+        image_base64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
 
-        # Build messages (system + human)
         messages = [
-        SystemMessage(
-            content="You are a helpful assistant that describes images concisely for document analysis."
-        ),
-        HumanMessage(
-            content=[
-                {
-                    "type": "text",
-                    "text": (
-                        "Please describe what you see in this image and "
-                        "if the image has scanned text then extract all the text. "
-                        "if the image has any logo, icon, or branding element, try to describe it with text. "
-                        "Focus on any text, diagrams, charts, or other visual elements."
-                        "If the image is purely a logo, icon, or branding element, start your response with 'LOGO:' or 'ICON:'."
-                    ),
-                },
-                 {
-                     "type": "image_url",
-                     "image_url": {"url": f"data:image/jpeg;base64,{b64_img}"},
-                 },
-            ]
-        ),
+            SystemMessage(
+                content="You are a helpful assistant that describes images concisely for document analysis."
+            ),
+            HumanMessage(
+                content=[
+                    {
+                        "type": "text",
+                        "text": (
+                            "Please describe what you see in this image and "
+                            "if the image has scanned text then extract all the text. "
+                            "If the image has any graph, chart, table, or other diagram, describe it. "
+                        ),
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"},
+                    },
+                ],
+            ),
         ]
 
-        # Get response from LangChain LLM client
-        # Access the underlying LangChain client
         langchain_client = client.llm
         response = langchain_client.invoke(messages)
 
-        return response.content if hasattr(response, 'content') else str(response)
+        return response.content if hasattr(response, "content") else str(response)
 
     except Exception as e:
         logger.error(f"Failed to describe image with LLM: {str(e)}")
         return "[Image: Error processing image description]"
 
 
-def save_image_and_get_markdown(image_input, context_info="", graphname=None):
-    """
-    Save image locally to static/images/ folder and return markdown reference with description.
-    
-    LEGACY/OLD APPROACH: Used for backward compatibility with JSONL-based loading.
-    Images are saved as files and served via /ui/images/ endpoint with img:// protocol.
-    
-    For NEW direct loading approach, images are stored in Image vertex as base64
-    and served via /ui/image_vertex/ endpoint with image:// protocol.
-    
-    Args:
-        image_input: PIL Image object
-        context_info: Optional context (e.g., "page 3 of invoice.pdf")
-        graphname: Graph name to organize images by graph (optional)
-    
-    Returns:
-        dict with:
-            - 'markdown': Markdown string with img:// reference
-            - 'image_id': Unique identifier for the saved image
-            - 'image_path': Path where image was saved to static/images/
-    """
-    try:
-        # FIRST: Get description from LLM to check if it's a logo
-        description = describe_image_with_llm(image_input)
-        
-        # Check if the image is a logo, icon, or decorative element BEFORE saving
-        # These should be filtered out as they're not content-relevant
-        description_lower = description.lower()
-        logo_indicators = ['logo', 'icon', 'branding', 'watermark', 'trademark', 'company logo', 'brand logo']
-        
-        if any(indicator in description_lower for indicator in logo_indicators):
-            logger.info(f"Detected logo/icon in image, skipping: {description[:100]}")
-            return None
-        
-        # If not a logo, proceed with saving the image
-        # Generate unique image ID using hash of image content
-        buffer = io.BytesIO()
-        if image_input.mode != 'RGB':
-            image_input = image_input.convert('RGB')
-        image_input.save(buffer, format="JPEG", quality=95)
-        image_bytes = buffer.getvalue()
-        
-        # Create hash-based ID (deterministic for same image)
-        image_hash = hashlib.sha256(image_bytes).hexdigest()[:16]
-        image_id = f"{image_hash}.jpg"
-        
-        # Save image to local storage directory organized by graphname
-        project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-        
-        # If graphname is provided, organize images by graph
-        if graphname:
-            images_dir = os.path.join(project_root, "static", "images", graphname)
-            # Include graphname in the image reference for URL construction
-            image_reference = f"{graphname}/{image_id}"
-        else:
-            images_dir = os.path.join(project_root, "static", "images")
-            image_reference = image_id
-        
-        os.makedirs(images_dir, exist_ok=True)
-        
-        image_path = os.path.join(images_dir, image_id)
-        
-        # Save image file (skip if already exists with same hash)
-        if not os.path.exists(image_path):
-            with open(image_path, 'wb') as f:
-                f.write(image_bytes)
-            logger.info(f"Saved content image to: {image_path}")
-        else:
-            logger.debug(f"Image already exists: {image_path}")
-        
-        # Generate markdown with custom img:// protocol (will be replaced later)
-        # Format: ![description](img://graphname/image_id) or ![description](img://image_id)
-        markdown = f"![{description}](img://{image_reference})"
-        
-        logger.info(f"Created image reference: {image_reference} with description")
-        
-        return {
-            'markdown': markdown,
-            'image_id': image_reference,
-            'image_path': image_path,
-            'description': description
-        }
-        
-    except Exception as e:
-        logger.error(f"Failed to save image and generate markdown: {str(e)}")
-        # Fallback to text description only
-        fallback_desc = f"[Image: {context_info} - processing failed]"
-        return {
-            'markdown': fallback_desc,
-            'image_id': None,
-            'image_path': None,
-            'description': fallback_desc
-        }
-
-
diff --git a/common/utils/markdown_parsing.py b/common/utils/markdown_parsing.py
new file mode 100644
index 0000000..7c8c476
--- /dev/null
+++ b/common/utils/markdown_parsing.py
@@ -0,0 +1,63 @@
+import re
+import os
+import pymupdf4llm
+
+class MarkdownProcessor:
+    """
+    A helper class to extract markdown image entries and
+    update descriptions based on image_id.
+    """
+
+    # regex for markdown images: ![alt](path)
+    _pattern = re.compile(r'!\[([^\]]*)\]\(([^)\s]+)\)')
+
+    @classmethod
+    def extract_images(cls, md_text):
+        """
+        Returns list of {"path": path, "image_id": image_id}
+        image_id = basename without extension
+        """
+        images = []
+        for m in cls._pattern.finditer(md_text):
+            path = m.group(2)
+            basename = os.path.basename(path)
+            image_id = os.path.splitext(basename)[0]
+            images.append({"path": path, "image_id": image_id})
+        return images
+
+    @classmethod
+    def insert_description_by_id(cls, md_text, image_id, description):
+        """
+        Replace the description for an image whose basename == image_id.
+        """
+
+        def repl(m):
+            old_path = m.group(2)
+            candidate_id = os.path.splitext(os.path.basename(old_path))[0]
+
+            if candidate_id == image_id:
+                # Insert new description
+                return f'![{description}]({old_path})'
+
+            return m.group(0)
+
+        return cls._pattern.sub(repl, md_text)
+
+    @classmethod
+    def replace_path_with_tg_protocol(cls, md_text, image_id, tg_reference):
+        """
+        Replace the file path for an image whose basename == image_id with tg:// protocol reference.
+        tg_reference should be like 'Graphs_image_1'
+        """
+        def repl(m):
+            old_path = m.group(2)
+            candidate_id = os.path.splitext(os.path.basename(old_path))[0]
+
+            if candidate_id == image_id:
+                # Replace path with tg:// protocol reference
+                alt_text = m.group(1)
+                return f'![{alt_text}](tg://{tg_reference})'
+
+            return m.group(0)
+
+        return cls._pattern.sub(repl, md_text)
\ No newline at end of file
diff --git a/common/utils/text_extractors.py b/common/utils/text_extractors.py
index da3e22d..b900cae 100644
--- a/common/utils/text_extractors.py
+++ b/common/utils/text_extractors.py
@@ -183,137 +183,154 @@ def extract_text_from_file_with_images_as_docs(file_path, graphname=None):
 
 def _extract_pdf_with_images_as_docs(file_path, base_doc_id, graphname=None):
     """
-    Extract PDF as ONE markdown document with inline image references.
+    Extract PDF as ONE markdown document with inline image references using pymupdf4llm.
+    Uses unique temporary folder per PDF to allow parallel processing.
+    After processing, delete the extracted image folder.
     """
+    # Use unique folder per PDF to allow parallel processing without conflicts
+    unique_folder_id = uuid.uuid4().hex[:12]
+    image_output_folder = Path(f"tg_temp_{unique_folder_id}")
+
     try:
-        import fitz  # PyMuPDF
+        import pymupdf4llm
         from PIL import Image as PILImage
+        from common.utils.image_data_extractor import describe_image_with_llm
+        from common.utils.markdown_parsing import MarkdownProcessor
+
+        # Ensure clean slate - remove folder if it exists from failed previous run
+        if image_output_folder.exists():
+            shutil.rmtree(image_output_folder, ignore_errors=True)
+
+        # Convert PDF to markdown with extracted image files
+        try:
+            markdown_content = pymupdf4llm.to_markdown(
+                file_path,
+                write_images=True,
+                image_path=str(image_output_folder),  # unique folder per PDF
+                force_text=False,
+                margins=0,
+                image_size_limit=0.08,
+            )
+        except Exception as e:
+            logger.error(f"pymupdf4llm failed for {file_path}: {e}")
+            # Cleanup folder if it was created
+            if image_output_folder.exists():
+                shutil.rmtree(image_output_folder, ignore_errors=True)
+            return [{
+                "doc_id": base_doc_id,
+                "doc_type": "markdown",
+                "content": f"[PDF extraction failed: {e}]",
+                "position": 0
+            }]
+
+        if not markdown_content or not markdown_content.strip():
+            logger.warning(f"No content extracted from PDF: {file_path}")
+
+        # Extract image references from markdown
+        image_refs = MarkdownProcessor.extract_images(markdown_content)
+
+        if not image_refs:
+            # cleanup folder anyway
+            if image_output_folder.exists():
+                shutil.rmtree(image_output_folder, ignore_errors=True)
+
+            return [{
+                "doc_id": base_doc_id,
+                "doc_type": "markdown",
+                "content": markdown_content,
+                "position": 0
+            }]
 
-        doc = fitz.open(file_path)
-        markdown_parts = []
         image_entries = []
         image_counter = 0
 
-        for page_num, page in enumerate(doc, start=1):
-            if page_num > 1:
-                markdown_parts.append("\n\n")
-            markdown_parts.append(f"--- Page {page_num} ---\n") #Avoid to be splitted as a single chunk
-
-            blocks = page.get_text("blocks", sort=True)
-            text_blocks_with_pos = []
-
-            for block in blocks:
-                block_type = block[6] if len(block) > 6 else 0
-                if block_type == 0:
-                    text = block[4].strip()
-                    if text:
-                        y_pos = block[1]
-                        text_blocks_with_pos.append({'type': 'text', 'content': text, 'y_pos': y_pos})
-
-            image_list = page.get_images(full=True)
-            images_with_pos = []
-
-            if image_list:
-                for img_index, img_info in enumerate(image_list):
-                    try:
-                        xref = img_info[0]
-                        base_image = doc.extract_image(xref)
-                        image_bytes = base_image["image"]
-                        image_ext = base_image["ext"]
-
-                        img_rects = page.get_image_rects(xref)
-                        y_pos = img_rects[0].y0 if img_rects else 999999
-
-                        pil_image = PILImage.open(io.BytesIO(image_bytes))
-                        if pil_image.width < 100 or pil_image.height < 100:
-                            continue
-
-                        from common.utils.image_data_extractor import describe_image_with_llm
-                        description = describe_image_with_llm(pil_image)
-                        description_lower = description.lower()
-                        logo_indicators = [
-                            'logo:', 'icon:', 'logo', 'icon', 'branding',
-                            'watermark', 'trademark', 'stylized letter',
-                            'stylized text', 'word "', "word '"
-                        ]
-                        if any(indicator in description_lower for indicator in logo_indicators):
-                            continue
-
-                        buffer = io.BytesIO()
-                        if pil_image.mode != 'RGB':
-                            pil_image = pil_image.convert('RGB')
-                        pil_image.save(buffer, format="JPEG", quality=95)
-                        image_base64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
-
-                        image_counter += 1
-                        image_doc_id = f"{base_doc_id}_image_{image_counter}"
-
-                        images_with_pos.append({
-                            'type': 'image',
-                            'image_doc_id': image_doc_id,
-                            'description': description,
-                            'y_pos': y_pos,
-                            'image_data': image_base64,
-                            'image_format': image_ext,
-                            'width': pil_image.width,
-                            'height': pil_image.height
-                        })
-                    except Exception as img_error:
-                        logger.warning(f"Failed to extract image on page {page_num}: {img_error}")
-
-            all_elements = text_blocks_with_pos + images_with_pos
-            all_elements.sort(key=lambda x: x['y_pos'])
-
-            for element in all_elements:
-                if element['type'] == 'text':
-                    markdown_parts.append(element['content'])
-                    markdown_parts.append("\n\n")
-                else:
-                    # Add image description as text, then markdown image reference
-                    # Use short alt text in markdown, full description as regular text
-                    markdown_parts.append(f"![{element['description']}](tg://{element['image_doc_id']})\n\n")
-
-                    image_entries.append({
-                        "doc_id": element['image_doc_id'],
-                        "doc_type": "image",
-                        "image_description": element['description'],
-                        "image_data": element['image_data'],
-                        "image_format": element['image_format'],
-                        "parent_doc": base_doc_id,
-                        "page_number": page_num,
-                        "width": element['width'],
-                        "height": element['height'],
-                        "position": int(element['image_doc_id'].split('_')[-1])
-                    })
-
-        doc.close()
-
-        markdown_content = "".join(markdown_parts) if markdown_parts else "" #No content extracted from PDF
-        if not markdown_content:
-            return []
+        for img_ref in image_refs:
+            try:
+                img_path = Path(img_ref["path"])  # convert to Path
+                image_id = img_ref["image_id"]
+
+                # Image description
+                description = describe_image_with_llm(str(img_path))
+
+                markdown_content = MarkdownProcessor.insert_description_by_id(
+                    markdown_content,
+                    image_id,
+                    description
+                )
+
+                # Convert image to base64
+                pil_image = PILImage.open(img_path)
+                buffer = io.BytesIO()
+
+                if pil_image.mode != "RGB":
+                    pil_image = pil_image.convert("RGB")
+
+                pil_image.save(buffer, format="JPEG", quality=95)
+                image_base64 = base64.b64encode(buffer.getvalue()).decode("utf-8")
+
+                image_counter += 1
+                image_doc_id = f"{base_doc_id}_image_{image_counter}"
+
+                # Replace file path with tg:// protocol reference in markdown
+                markdown_content = MarkdownProcessor.replace_path_with_tg_protocol(
+                    markdown_content,
+                    image_id,
+                    image_doc_id
+                )
+
+                image_entries.append({
+                    "doc_id": image_doc_id,
+                    "doc_type": "image",
+                    "image_description": description,
+                    "image_data": image_base64,
+                    "image_format": "jpg",
+                    "parent_doc": base_doc_id,
+                    "page_number": 0,
+                    "width": pil_image.width,
+                    "height": pil_image.height,
+                    "position": image_counter
+                })
+
+            except Exception as img_error:
+                logger.warning(f"Failed to process image {img_ref.get('path')}: {img_error}")
+
+        # FINAL CLEANUP — delete folder after processing everything
+        if image_output_folder.exists() and image_output_folder.is_dir():
+            try:
+                shutil.rmtree(image_output_folder)
+                logger.debug(f"Deleted image folder: {image_output_folder}")
+            except Exception as delete_err:
+                logger.warning(f"Failed to delete folder {image_output_folder}: {delete_err}")
 
+        # Build final result
         result = [{
             "doc_id": base_doc_id,
-            "doc_type": "",
+            "doc_type": "markdown",
             "content": markdown_content,
             "position": 0
         }]
         result.extend(image_entries)
+
         return result
 
-    except ImportError:
-        logger.error("PyMuPDF not available")
+    except ImportError as import_err:
+        logger.error(f"Required library missing: {import_err}")
+        # Cleanup on import error
+        if image_output_folder.exists():
+            shutil.rmtree(image_output_folder, ignore_errors=True)
         return [{
             "doc_id": base_doc_id,
-            "doc_type": "",
-            "content": "[PDF extraction requires PyMuPDF]",
+            "doc_type": "markdown",
+            "content": "[PDF extraction requires pymupdf4llm and PyMuPDF]",
             "position": 0
         }]
     except Exception as e:
         logger.error(f"Error extracting PDF: {e}")
+        # Cleanup on any other error
+        if image_output_folder.exists():
+            shutil.rmtree(image_output_folder, ignore_errors=True)
         raise
 
-
 def _extract_standalone_image_as_doc(file_path, base_doc_id, graphname=None):
     """
     Extract standalone image file as ONE markdown document with inline image reference.
@@ -324,25 +341,15 @@ def _extract_standalone_image_as_doc(file_path, base_doc_id, graphname=None):
 
         pil_image = PILImage.open(file_path)
         if pil_image.width < 100 or pil_image.height < 100:
-            return [{
-                "doc_id": base_doc_id,
-                "doc_type": "",
-                "content": f"[Skipped small image: {file_path.name}]",
-                "position": 0
-            }]
+            pass
 
-        description = describe_image_with_llm(pil_image)
+        description = describe_image_with_llm(str(Path(file_path).absolute()))
         description_lower = description.lower()
         logo_indicators = ['logo:', 'icon:', 'logo', 'icon', 'branding',
                            'watermark', 'trademark', 'stylized letter',
                            'stylized text', 'word "', "word '"]
         if any(indicator in description_lower for indicator in logo_indicators):
-            return [{
-                "doc_id": base_doc_id,
-                "doc_type": "",
-                "content": f"[Skipped logo/icon: {file_path.name}]",
-                "position": 0
-            }]
+            return []
 
         buffer = io.BytesIO()
         if pil_image.mode != 'RGB':
@@ -353,7 +360,6 @@ def _extract_standalone_image_as_doc(file_path, base_doc_id, graphname=None):
         image_id = f"{base_doc_id}_image_1"
         # Put description as text, then markdown image reference with short alt text
         content = f"![{description}](tg://{image_id})"
-
         return [
             {
                 "doc_id": base_doc_id,
@@ -379,7 +385,7 @@ def _extract_standalone_image_as_doc(file_path, base_doc_id, graphname=None):
         logger.error(f"Error extracting image: {e}")
         return [{
             "doc_id": base_doc_id,
-            "doc_type": "",
+            "doc_type": "markdown",
             "content": f"[Image extraction failed: {str(e)}]",
             "position": 0
         }]
@@ -441,12 +447,10 @@ def get_doc_type_from_extension(extension):
 
     if extension in ['.html', '.htm']:
         return 'html'
-    elif extension in ['.md']:
-        return 'markdown'
     elif extension in ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp']:
         return 'image'
     else:
-        return ''
+        return 'markdown'
 
 
 def get_supported_extensions():
diff --git a/graphrag-ui/src/pages/Setup.tsx b/graphrag-ui/src/pages/Setup.tsx
index b7d357d..2aaee99 100644
--- a/graphrag-ui/src/pages/Setup.tsx
+++ b/graphrag-ui/src/pages/Setup.tsx
@@ -2,7 +2,7 @@ import React, { useState, useEffect } from "react";
 import { useNavigate } from "react-router-dom";
 import { Button } from "@/components/ui/button";
 import { Input } from "@/components/ui/input";
-import { Database, Upload, RefreshCw, Loader2, Trash2, FolderUp, Cloud, ArrowLeft, CloudDownload, CloudLightning } from "lucide-react";
+import { Database, Upload, RefreshCw, Loader2, Trash2, FolderUp, Cloud, ArrowLeft, CloudDownload, CloudCog } from "lucide-react";
 import {
   Dialog,
   DialogContent,
@@ -56,7 +56,6 @@ const Setup = () => {
   const [uploadMessage, setUploadMessage] = useState("");
   const [isIngesting, setIsIngesting] = useState(false);
   const [ingestMessage, setIngestMessage] = useState("");
-  const [activeTab, setActiveTab] = useState("upload");
 
   // Refresh state
   const [refreshOpen, setRefreshOpen] = useState(false);
@@ -67,12 +66,13 @@ const Setup = () => {
   const [isCheckingStatus, setIsCheckingStatus] = useState(false);
   
   // S3 state
+  const [fileFormat, setFileFormat] = useState<"json" | "multi">("json");
   const [awsAccessKey, setAwsAccessKey] = useState("");
   const [awsSecretKey, setAwsSecretKey] = useState("");
+  const [dataPath, setDataPath] = useState("");
   const [inputBucket, setInputBucket] = useState("");
   const [outputBucket, setOutputBucket] = useState("");
   const [regionName, setRegionName] = useState("");
-  const [skipBDAProcessing, setSkipBDAProcessing] = useState(false);
 
   // Cloud Download state
   const [cloudProvider, setCloudProvider] = useState<"s3" | "gcs" | "azure">("s3");
@@ -458,7 +458,7 @@ const Setup = () => {
       }
 
       const createData = await createResponse.json();
-      //console.log("Create ingest response:", createData);
+      console.log("Create ingest response:", createData);
 
       // Step 2: Run ingest
       setIngestMessage("Step 2/2: Running document ingest...");
@@ -484,7 +484,7 @@ const Setup = () => {
       }
 
       const ingestData = await ingestResponse.json();
-      //console.log("Ingest response:", ingestData);
+      console.log("Ingest response:", ingestData);
 
       setIngestMessage(`✅ Data ingested successfully! Processed documents from ${folderPath}/`);
     } catch (error: any) {
@@ -495,8 +495,8 @@ const Setup = () => {
     }
   };
 
-  // Ingest files from S3 with Amazon BDA
-  const handleAmazonBDAIngest = async () => {
+  // Ingest files from S3 with Bedrock BDA
+  const handleS3BedrockIngest = async () => {
     if (!ingestGraphName) {
       setIngestMessage("Please select a graph");
       return;
@@ -508,112 +508,92 @@ const Setup = () => {
       return;
     }
 
-    if (skipBDAProcessing) {
-      // When skipping BDA, only output bucket and region are required
-      if (!outputBucket || !regionName) {
-        setIngestMessage("❌ Please provide Output Bucket and Region Name");
-        return;
-      }
-    } else {
-      // When using BDA, all fields are required
+    if (fileFormat === "multi") {
       if (!inputBucket || !outputBucket || !regionName) {
         setIngestMessage("❌ Please provide Input Bucket, Output Bucket, and Region Name");
         return;
       }
-    }
 
-    // Ask for confirmation
-    const confirmMessage = skipBDAProcessing
-      ? `You're skipping Amazon BDA processing and will ingest directly from the output bucket (${outputBucket}). Please confirm to proceed.`
-      : `You're using Amazon BDA for multimodal document processing. This will trigger Amazon BDA to process your documents from the input bucket (${inputBucket}) and store the results in the output bucket (${outputBucket}) and then ingest them into your knowledge graph. Please confirm to proceed.`;
-    
-    const shouldProceed = await confirm(confirmMessage);
-    if (!shouldProceed) {
-      setIngestMessage("Operation cancelled by user.");
-      return;
+      // Ask for confirmation if using Bedrock (multi format)
+      const shouldProceed = await confirm(
+        `Are you using AWS Bedrock for multimodal document processing? This will trigger AWS Bedrock BDA to process your documents from the input bucket (${inputBucket}) and store the results in the output bucket (${outputBucket}).`
+      );
+      if (!shouldProceed) {
+        setIngestMessage("Operation cancelled by user.");
+        return;
+      }
+    } else if (fileFormat === "json") {
+      if (!dataPath) {
+        setIngestMessage("❌ Please provide Data Path (e.g., s3://bucket-name/path/to/data)");
+        return;
+      }
     }
 
     setIsIngesting(true);
+    setIngestMessage("Step 1/2: Creating ingest job...");
 
     try {
       const creds = localStorage.getItem("creds");
-      let loadingInfo: any = {};
 
-      if (skipBDAProcessing) {
-        // Skip BDA processing - create ingest job that reads directly from output bucket
-        const runIngestConfig: any = {
-          data_source: "bda",
+      // Step 1: Create ingest job
+      const createIngestConfig: any = {
+        data_source: "s3",
+        data_source_config: {
           aws_access_key: awsAccessKey,
           aws_secret_key: awsSecretKey,
-          output_bucket: outputBucket,
-          region_name: regionName,
-          bda_jobs:[],
-          loader_config: {
-            doc_id_field: "doc_id",
-            content_field: "content",
-            doc_type: "markdown",
-          },
-          file_format: "multi"
-        };
-
-        setIngestMessage("Step 1/2: Creating ingest job from output bucket...");
-
-        // Run ingest directly
-        loadingInfo = {
-          load_job_id: "load_documents_content_json",
-          data_source_id: runIngestConfig,
-          file_path: outputBucket,
-        };
-        setIngestMessage(`Step 2/2: Running document ingestion for all files in ${outputBucket}...`);
-      } else {
-        // Step 1: Create ingest job with BDA processing
-        const createIngestConfig: any = {
-          data_source: "bda",
-          data_source_config: {
-            aws_access_key: awsAccessKey,
-            aws_secret_key: awsSecretKey,
-            input_bucket: inputBucket,
-            output_bucket: outputBucket,
-            region_name: regionName,
-          },
-          loader_config: {
-            doc_id_field: "doc_id",
-            content_field: "content",
-            doc_type: "markdown",
-          },
-          file_format: "multi"
-        };
+        },
+        loader_config: {
+          doc_id_field: "doc_id",
+          content_field: "content",
+          doc_type: fileFormat === "multi" ? "markdown" : "",
+        },
+        file_format: fileFormat
+      };
 
-        setIngestMessage("Step 1/2: Triggering Amazon BDA processing and creating ingest job...");
+      // Add format-specific configuration
+      if (fileFormat === "multi") {
+        createIngestConfig.data_source_config.input_bucket = inputBucket;
+        createIngestConfig.data_source_config.output_bucket = outputBucket;
+        createIngestConfig.data_source_config.region_name = regionName;
+        setIngestMessage("Step 1/2: Creating ingest job and triggering AWS Bedrock BDA processing...");
+      } else if (fileFormat === "json") {
+        createIngestConfig.loader_config.doc_id_field = "url";
+      }
 
-        const createResponse = await fetch(`/ui/${ingestGraphName}/create_ingest`, {
-          method: "POST",
-          headers: {
-            "Content-Type": "application/json",
-            Authorization: `Basic ${creds}`,
-          },
-          body: JSON.stringify(createIngestConfig),
-        });
+      const createResponse = await fetch(`/ui/${ingestGraphName}/create_ingest`, {
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          Authorization: `Basic ${creds}`,
+        },
+        body: JSON.stringify(createIngestConfig),
+      });
 
-        if (!createResponse.ok) {
-          const errorData = await createResponse.json();
-          throw new Error(errorData.detail || `Failed to create ingest job: ${createResponse.statusText}`);
-        }
+      if (!createResponse.ok) {
+        const errorData = await createResponse.json();
+        throw new Error(errorData.detail || `Failed to create ingest job: ${createResponse.statusText}`);
+      }
 
-        const createData = await createResponse.json();
-        //console.log("Create ingest response:", createData);
+      const createData = await createResponse.json();
+      console.log("Create ingest response:", createData);
 
-        // Step 2: Run ingest
-        loadingInfo = {
-          load_job_id: createData.load_job_id,
-          data_source_id: createData.data_source_id,
-          file_path: outputBucket,
-        };
+      // Step 2: Run ingest
+      setIngestMessage("Step 2/2: Running document ingest...");
 
-        const filesToIngest = createData.data_source_id.bda_jobs.map((job: any) => job.jobId.split("/")[-1]);
-        setIngestMessage(`Step 2/2: Running document ingest for ${filesToIngest.length} files in ${outputBucket}...`);
+      // Determine file path based on format
+      let filePath = "";
+      if (fileFormat === "multi") {
+        filePath = outputBucket; // For multi format, use output bucket
+      } else if (fileFormat === "json") {
+        filePath = dataPath; // For json format, use the provided data path
       }
 
+      const loadingInfo = {
+        load_job_id: createData.load_job_id,
+        data_source_id: createData.data_source_id,
+        file_path: filePath,
+      };
+
       const ingestResponse = await fetch(`/ui/${ingestGraphName}/ingest`, {
         method: "POST",
         headers: {
@@ -629,13 +609,15 @@ const Setup = () => {
       }
 
       const ingestData = await ingestResponse.json();
-      //console.log("Ingest response:", ingestData);
-      const filesIngested = ingestData.summary.map((file: any) => file.file_path);
-
-      setIngestMessage(`✅ Document ingestion completed successfully! Ingested ${filesIngested.length} into your knowledge graph.`);
+      console.log("Ingest response:", ingestData);
 
+      if (fileFormat === "multi") {
+        setIngestMessage(`✅ Data ingested successfully! AWS Bedrock BDA processed documents from ${inputBucket} and loaded results from ${outputBucket}.`);
+      } else {
+        setIngestMessage(`✅ Data ingested successfully! Processed documents from ${dataPath}.`);
+      }
     } catch (error: any) {
-      console.error("Error ingesting files:", error);
+      console.error("Error ingesting S3 data:", error);
       setIngestMessage(`❌ Error: ${error.message}`);
     } finally {
       setIsIngesting(false);
@@ -1121,8 +1103,8 @@ const Setup = () => {
               <label className="block text-sm font-medium mb-2 text-black dark:text-white">
                 Target Graph Name
               </label>
-              <Select value={ingestGraphName} onValueChange={setIngestGraphName} disabled={isIngesting}>
-                <SelectTrigger className="dark:border-[#3D3D3D] dark:bg-shadeA" disabled={isIngesting}>
+              <Select value={ingestGraphName} onValueChange={setIngestGraphName}>
+                <SelectTrigger className="dark:border-[#3D3D3D] dark:bg-shadeA">
                   <SelectValue placeholder="Select a graph" />
                 </SelectTrigger>
                 <SelectContent>
@@ -1139,35 +1121,32 @@ const Setup = () => {
                   )}
                 </SelectContent>
               </Select>
+              {ingestGraphName && (
+                <p className="text-xs text-gray-500 dark:text-gray-400 mt-1">
+                  Files will be uploaded to: uploads/{ingestGraphName}/
+                </p>
+              )}
             </div>
 
-            <Tabs value={activeTab} onValueChange={(value) => {
-              // Block tab switching when ingesting
-              if (!isIngesting) {
-                setActiveTab(value);
-              }
-            }} className="w-full">
+            <Tabs defaultValue="upload" className="w-full">
               <TabsList className="grid w-full grid-cols-3">
-                <TabsTrigger value="upload" disabled={isIngesting}>
+                <TabsTrigger value="upload">
                   <FolderUp className="h-4 w-4 mr-2" />
                   Upload Files
                 </TabsTrigger>
-                <TabsTrigger value="cloudDownload" disabled={isIngesting}>
+                <TabsTrigger value="cloudDownload">
                   <CloudDownload className="h-4 w-4 mr-2" />
                   Download from Cloud
                 </TabsTrigger>
-                <TabsTrigger value="AmazonBDA" disabled={isIngesting}>
-                  <CloudLightning className="h-4 w-4 mr-2" />
-                  Use Amazon BDA
+                <TabsTrigger value="s3">
+                  <CloudCog className="h-4 w-4 mr-2" />
+                  Amazon BDA Configuration
                 </TabsTrigger>
               </TabsList>
 
               {/* Upload Data Tab */}
               <TabsContent value="upload" className="space-y-4">
                 <div className="space-y-4">
-                  <p className="text-sm font-medium text-gray-500 dark:text-gray-400 mb-3">
-                    Upload local files to the server and ingest them into your knowledge graph.
-                  </p>
                   <div>
                     <label className="block text-sm font-medium mb-2 text-black dark:text-white">
                       Select Files
@@ -1179,9 +1158,9 @@ const Setup = () => {
                       disabled={isUploading}
                       className="dark:border-[#3D3D3D] dark:bg-shadeA"
                     />
-                    <p className="text-xs text-gray-500 dark:text-gray-400 mt-2">
-                      Maximum upload per request: {MAX_UPLOAD_SIZE_MB} MB. {ingestGraphName ? `Upload destination: uploads/${ingestGraphName}/` : ""}
-                    </p>
+                  <p className="text-xs text-gray-500 dark:text-gray-400 mt-2">
+                    Maximum upload per request: {MAX_UPLOAD_SIZE_MB} MB.
+                  </p>
                   </div>
 
                   <div className="flex gap-2">
@@ -1295,9 +1274,6 @@ const Setup = () => {
               {/* Download from Cloud Storage Tab */}
               <TabsContent value="cloudDownload" className="space-y-4">
                 <div className="space-y-4">
-                  <p className="text-sm font-medium text-gray-500 dark:text-gray-400 mb-3">
-                    Download files from cloud storage and ingest them into your knowledge graph.
-                  </p>
                   <div>
                     <label className="block text-sm font-medium mb-2 text-black dark:text-white">
                       Cloud Storage Provider
@@ -1487,13 +1463,11 @@ const Setup = () => {
                       </div>
                     </>
                   )}
-                  {ingestGraphName && (
-                    <p className="text-xs text-gray-500 dark:text-gray-400 mb-2">
-                      Download destination: downloaded_files_cloud/{ingestGraphName}/
-                    </p>
-                  )}
 
                   <div className="pt-4 border-t border-gray-300 dark:border-[#3D3D3D]">
+                    <p className="text-xs text-gray-500 dark:text-gray-400 mb-2">
+                      Files will be downloaded to: downloaded_files_cloud/{ingestGraphName}/
+                    </p>
                     <Button 
                       onClick={handleCloudDownload}
                       disabled={isDownloading}
@@ -1607,12 +1581,23 @@ const Setup = () => {
                 </div>
               </TabsContent>
 
-              {/* Amazon BDA Configuration Tab */}
-              <TabsContent value="AmazonBDA" className="space-y-4">
-                <div className="space-y-4">              
-                  <p className="text-sm font-medium text-gray-500 dark:text-gray-400 mb-3">
-                    Process multimodal documents stored in S3 with Amazon Bedrock Data Automation and ingest them into your knowledge graph.
-                  </p>
+              {/* S3 Bedrock Configuration Tab */}
+              <TabsContent value="s3" className="space-y-4">
+                <div className="space-y-4">
+                  <div>
+                    <label className="block text-sm font-medium mb-2 text-black dark:text-white">
+                      File Format
+                    </label>
+                    <Select value={fileFormat} onValueChange={(value: "json" | "multi") => setFileFormat(value)}>
+                      <SelectTrigger className="dark:border-[#3D3D3D] dark:bg-shadeA">
+                        <SelectValue placeholder="Select file format" />
+                      </SelectTrigger>
+                      <SelectContent>
+                        <SelectItem value="json">JSON</SelectItem>
+                        <SelectItem value="multi">Multi</SelectItem>
+                      </SelectContent>
+                    </Select>
+                  </div>
 
                   {/* Common fields */}
                   <div>
@@ -1625,7 +1610,6 @@ const Setup = () => {
                       onChange={(e) => setAwsAccessKey(e.target.value)}
                       placeholder="Enter AWS access key"
                       className="dark:border-[#3D3D3D] dark:bg-shadeA"
-                      disabled={isIngesting}
                     />
                   </div>
 
@@ -1639,74 +1623,76 @@ const Setup = () => {
                       onChange={(e) => setAwsSecretKey(e.target.value)}
                       placeholder="Enter AWS secret key"
                       className="dark:border-[#3D3D3D] dark:bg-shadeA"
-                      disabled={isIngesting}
                     />
                   </div>
 
-                  <div>
-                    <div className="flex items-center justify-between mb-2">
-                      <label className="block text-sm font-medium text-black dark:text-white">
-                        Input Bucket
-                      </label>
-                      <label className="flex items-center gap-2 text-sm text-gray-600 dark:text-gray-400 cursor-pointer">
-                        <input
-                          type="checkbox"
-                          checked={skipBDAProcessing}
-                          onChange={(e) => setSkipBDAProcessing(e.target.checked)}
-                          disabled={isIngesting}
-                          className="h-4 w-4 rounded border-gray-300 dark:border-gray-600"
-                        />
-                        <span>Skip BDA (ingest existing BDA output bucket directly)</span>
+                  {/* Conditional fields based on file format */}
+                  {fileFormat === "json" ? (
+                    <div>
+                      <label className="block text-sm font-medium mb-2 text-black dark:text-white">
+                        Data Path
                       </label>
+                      <Input
+                        type="text"
+                        value={dataPath}
+                        onChange={(e) => setDataPath(e.target.value)}
+                        placeholder="s3://bucket-name/path/to/data"
+                        className="dark:border-[#3D3D3D] dark:bg-shadeA"
+                      />
                     </div>
-                    <Input
-                      type="text"
-                      value={inputBucket}
-                      onChange={(e) => setInputBucket(e.target.value)}
-                      placeholder="Enter input bucket name"
-                      className="dark:border-[#3D3D3D] dark:bg-shadeA"
-                      disabled={isIngesting || skipBDAProcessing}
-                    />
-                  </div>
-
-                  <div>
-                    <label className="block text-sm font-medium mb-2 text-black dark:text-white">
-                      Output Bucket
-                    </label>
-                    <Input
-                      type="text"
-                      value={outputBucket}
-                      onChange={(e) => setOutputBucket(e.target.value)}
-                      placeholder="Enter output bucket name"
-                      className="dark:border-[#3D3D3D] dark:bg-shadeA"
-                      disabled={isIngesting}
-                    />
-                  </div>
+                  ) : (
+                    <>
+                      <div>
+                        <label className="block text-sm font-medium mb-2 text-black dark:text-white">
+                          Input Bucket
+                        </label>
+                        <Input
+                          type="text"
+                          value={inputBucket}
+                          onChange={(e) => setInputBucket(e.target.value)}
+                          placeholder="Enter input bucket name"
+                          className="dark:border-[#3D3D3D] dark:bg-shadeA"
+                        />
+                      </div>
 
-                  <div>
-                    <label className="block text-sm font-medium mb-2 text-black dark:text-white">
-                      Region Name
-                    </label>
-                    <Input
-                      type="text"
-                      value={regionName}
-                      onChange={(e) => setRegionName(e.target.value)}
-                      placeholder="e.g., us-east-1"
-                      className="dark:border-[#3D3D3D] dark:bg-shadeA"
-                      disabled={isIngesting}
-                    />
-                  </div>
+                      <div>
+                        <label className="block text-sm font-medium mb-2 text-black dark:text-white">
+                          Output Bucket
+                        </label>
+                        <Input
+                          type="text"
+                          value={outputBucket}
+                          onChange={(e) => setOutputBucket(e.target.value)}
+                          placeholder="Enter output bucket name"
+                          className="dark:border-[#3D3D3D] dark:bg-shadeA"
+                        />
+                      </div>
 
-                  {ingestGraphName && (
-                    <p className="text-xs text-gray-500 dark:text-gray-400 mb-2">
-                      Processing destination: Input bucket ({inputBucket || "not specified"}) → Output bucket ({outputBucket || "not specified"}) → Knowledge graph ({ingestGraphName})
-                    </p>
+                      <div>
+                        <label className="block text-sm font-medium mb-2 text-black dark:text-white">
+                          Region Name
+                        </label>
+                        <Input
+                          type="text"
+                          value={regionName}
+                          onChange={(e) => setRegionName(e.target.value)}
+                          placeholder="e.g., us-east-1"
+                          className="dark:border-[#3D3D3D] dark:bg-shadeA"
+                        />
+                      </div>
+                    </>
                   )}
 
-                  {/* Ingest S3 Files with Amazon BDA Section */}
+                  {/* Ingest S3 Bedrock Data Section */}
                   <div className="border-t border-gray-300 dark:border-[#3D3D3D] pt-4 mt-4">
+                    <h3 className="text-sm font-medium mb-2 text-black dark:text-white">
+                      Ingest S3 Data into Knowledge Graph
+                    </h3>
+                    <p className="text-xs text-gray-500 dark:text-gray-400 mb-3">
+                      Process S3 data and add it to the knowledge graph using AWS Bedrock BDA for multimodal documents
+                    </p>
                     <Button
-                      onClick={handleAmazonBDAIngest}
+                      onClick={handleS3BedrockIngest}
                       disabled={isIngesting}
                       className="gradient text-white w-full"
                     >
@@ -1718,7 +1704,7 @@ const Setup = () => {
                       ) : (
                         <>
                           <Database className="h-4 w-4 mr-2" />
-                          Ingest from S3 Bucket into {ingestGraphName}
+                          Ingest from S3 into {ingestGraphName}
                         </>
                       )}
                     </Button>
@@ -1771,7 +1757,7 @@ const Setup = () => {
             <DialogHeader>
               <DialogTitle className="text-black dark:text-white">Refresh Knowledge Graph</DialogTitle>
               <DialogDescription className="text-gray-600 dark:text-[#D9D9D9]">
-                Rebuild the graph content and rerun community detection for your knowledge graph
+                Rebuild the graph content of your knowledge graph
               </DialogDescription>
             </DialogHeader>
 
@@ -1780,8 +1766,8 @@ const Setup = () => {
                 <label className="block text-sm font-medium mb-2 text-black dark:text-white">
                   Select Graph to Refresh
                 </label>
-                <Select value={refreshGraphName} onValueChange={setRefreshGraphName} disabled={isRefreshing || isRebuildRunning || isCheckingStatus}>
-                  <SelectTrigger className="dark:border-[#3D3D3D] dark:bg-shadeA" disabled={isRefreshing || isRebuildRunning || isCheckingStatus}>
+                <Select value={refreshGraphName} onValueChange={setRefreshGraphName}>
+                  <SelectTrigger className="dark:border-[#3D3D3D] dark:bg-shadeA">
                     <SelectValue placeholder="Select a graph" />
                   </SelectTrigger>
                   <SelectContent>
@@ -1805,7 +1791,7 @@ const Setup = () => {
                   ⚠️ Warning
                 </p>
                 <p className="text-sm text-yellow-700 dark:text-yellow-300 mt-1">
-                  This operation will process new documents and rerun community detection that will interrupt related queries.
+                  This operation will rebuild the graph content that will interrupt related queries. 
                   Please confirm to proceed.
                 </p>
               </div>

From 788fe2ac6b39b17c99c7fab82d717be89347697e Mon Sep 17 00:00:00 2001
From: Prins Kumar <prins.kumar@agivant.com>
Date: Tue, 18 Nov 2025 18:27:24 +0530
Subject: [PATCH 02/20] Update README for OpenAI and Bedrock config, add
 pymupdf4llm license

---
 README.md                         |  78 ++--
 licenses/pymupdf4llm-AGPL-3.0.txt | 661 ++++++++++++++++++++++++++++++
 2 files changed, 704 insertions(+), 35 deletions(-)
 create mode 100644 licenses/pymupdf4llm-AGPL-3.0.txt

diff --git a/README.md b/README.md
index 8c38f6c..13c88b3 100644
--- a/README.md
+++ b/README.md
@@ -103,24 +103,23 @@ Organizing the data as a knowledge graph allows a chatbot to access accurate, fa
 ### Quick Start
 
 #### Use TigerGraph Docker-Based Instance
-Set your LLM Provider (supported `openai` or `gemini`) api key as environment varabiel LLM_API_KEY and use the following command for a one-step quick deployment with TigerGraph Community Edition and default configurations:
+Set your OpenAI api key as environment varabiel OPENAI_API_KEY and use the following command for a one-step quick deployment with TigerGraph Community Edition and default configurations:
 ```
-curl -k https://raw.githubusercontent.com/tigergraph/graphrag/refs/heads/main/docs/tutorials/setup_graphrag.sh | bash
+curl -k https://raw.githubusercontent.com/tigergraph/graphrag/refs/heads/main/docs/tutorials/setup_graphrag.sh | sh
 ```
 
 The GraphRAG instances will be deployed at `./graphrag` folder and TigerGraph instance will be available at `http://localhost:14240`.
-To change installation folder, use `bash -s -- <graphrag_folder> <llm_provider>` instead of `bash` at the end of the above command.
-
-> Note: for other LLM providers, manually update `configs/server_config.json` accordingly and re-run `docker compose up -d`
+To change installation folder, use `sh -s -- <graphrag_folder>` instead of `sh` at the end of the above command.
 
 #### Use Pre-Installed TigerGraph Instance
-Similar to the above setup, and use the following command for a one-step quick deployment connecting to a pre-installed TigerGraph with default configurations:
+
+Using the following command for a one-step quick deployment with TigerGraph Community Edition and default configurations:
 ```
-curl -k https://raw.githubusercontent.com/tigergraph/graphrag/refs/heads/main/docs/tutorials/setup_graphrag_tg.sh | bash
+curl -k https://raw.githubusercontent.com/tigergraph/graphrag/refs/heads/main/docs/tutorials/setup_graphrag_tg.sh | sh
 ```
 
 The GraphRAG instances will be deployed at `./graphrag` folder and connect to TigerGraph instance at `http://localhost:14240` by default.
-To change installation folder, TigerGraph instance location or username/password, use `bash -s -- <graphrag_folder> <llm_provider> <tg_host> <tg_port> <tg_username> <tg_password>` instead of `bash` at the end of the above command.
+To change installation folder, TigerGraph instance location or username/password, use `sh -s -- <graphrag_loc> <tg_host> <tg_port> <tg_username> <tg_password>` instead of `sh` at the end of the above command.
 
 [Go back to top](#top)
 
@@ -152,7 +151,7 @@ Here’s what the folder structure looks like:
 
 ##### Step 3: Adjust configurations
 
-Edit `llm_config` section of `configs/server_config.json` and replace `<YOUR_LLM_API_KEY>` to your own LLM_API_KEY for the LLM provider. 
+Edit `llm_config` section of `configs/server_config.json` and replace `<YOUR_OPENAI_API_KEY>` to your own OPENAI_API_KEY. 
  
 > If desired, you can also change the model to be used for the embedding service and completion service to your preferred models to adjust the output from the LLM service.
 
@@ -470,23 +469,27 @@ In addition to the `OPENAI_API_KEY`, `llm_model` and `model_name` can be edited
 ```json
 {
     "llm_config": {
+        "authentication_configuration": {
+            "OPENAI_API_KEY": "YOUR_OPENAI_API_KEY_HERE"
+        },
         "embedding_service": {
-            "embedding_model_service": "openai",
             "model_name": "text-embedding-3-small",
-            "authentication_configuration": {
-                "OPENAI_API_KEY": "YOUR_OPENAI_API_KEY_HERE"
-            }
+            "embedding_model_service": "openai"
         },
         "completion_service": {
             "llm_service": "openai",
             "llm_model": "gpt-4.1-mini",
-            "authentication_configuration": {
-                "OPENAI_API_KEY": "YOUR_OPENAI_API_KEY_HERE"
-            },
             "model_kwargs": {
                 "temperature": 0
             },
             "prompt_path": "./common/prompts/openai_gpt4/"
+        },
+        "multimodal_service": {
+            "llm_service": "openai",
+            "llm_model": "gpt-4o-mini",
+            "model_kwargs": {
+                "temperature": 0
+            }
         }
     }
 }
@@ -546,7 +549,7 @@ And your JSON config should follow as:
             "model_kwargs": {
                 "temperature": 0
             },
-            "prompt_path": "./common/prompts/gcp_vertexai_palm/"
+            "prompt_path": "./app/prompts/gcp_vertexai_palm/"
         }
     }
 }
@@ -583,7 +586,7 @@ In addition to the `AZURE_OPENAI_ENDPOINT`, `AZURE_OPENAI_API_KEY`, and `azure_d
             "model_kwargs": {
                 "temperature": 0
             },
-            "prompt_path": "./common/prompts/azure_open_ai_gpt35_turbo_instruct/"
+            "prompt_path": "./app/prompts/azure_open_ai_gpt35_turbo_instruct/"
         }
     }
 }
@@ -594,27 +597,32 @@ In addition to the `AZURE_OPENAI_ENDPOINT`, `AZURE_OPENAI_API_KEY`, and `azure_d
 ```json
 {
     "llm_config": {
+        "authentication_configuration": {
+            "AWS_ACCESS_KEY_ID": "YOUR_AWS_ACCESS_KEY",
+            "AWS_SECRET_ACCESS_KEY": "YOUR_AWS_SECRET_KEY",
+            "AWS_REGION_NAME": "us-west-2"
+        },
         "embedding_service": {
+            "model_name": "amazon.titan-embed-text-v1",
             "embedding_model_service": "bedrock",
-            "model_name":"amazon.titan-embed-text-v2",
-            "region_name":"us-west-2",
-            "authentication_configuration": {
-                "AWS_ACCESS_KEY_ID": "ACCESS_KEY",
-                "AWS_SECRET_ACCESS_KEY": "SECRET"
-            }
+            "dimensions": 1536
         },
         "completion_service": {
             "llm_service": "bedrock",
-            "llm_model": "us.anthropic.claude-3-7-sonnet-20250219-v1:0",
-            "region_name":"us-west-2",
-            "authentication_configuration": {
-                "AWS_ACCESS_KEY_ID": "ACCESS_KEY",
-                "AWS_SECRET_ACCESS_KEY": "SECRET"
-            },
+            "llm_model": "anthropic.claude-3-5-sonnet-20240620-v1:0",
             "model_kwargs": {
                 "temperature": 0,
+                "max_tokens": 4096
             },
-            "prompt_path": "./common/prompts/aws_bedrock_claude3haiku/"
+            "prompt_path": "./common/prompts/openai_gpt4/"
+        },
+        "multimodal_service": {
+            "llm_service": "bedrock",
+            "llm_model": "anthropic.claude-3-5-sonnet-20240620-v1:0",
+            "model_kwargs": {
+                "temperature": 0,
+                "max_tokens": 4096
+            }
         }
     }
 }
@@ -640,7 +648,7 @@ In addition to the `AZURE_OPENAI_ENDPOINT`, `AZURE_OPENAI_API_KEY`, and `azure_d
             "model_kwargs": {
                 "temperature": 0.0000001
             },
-            "prompt_path": "./common/prompts/openai_gpt4/"
+            "prompt_path": "./app/prompts/openai_gpt4/"
         }
     }
 }
@@ -670,7 +678,7 @@ Example configuration for a model on Hugging Face with a dedicated endpoint is s
             "model_kwargs": {
                 "temperature": 0.1
             },
-            "prompt_path": "./common/prompts/openai_gpt4/"
+            "prompt_path": "./app/prompts/openai_gpt4/"
         }
     }
 }
@@ -697,7 +705,7 @@ Example configuration for a model on Hugging Face with a serverless endpoint is
             "model_kwargs": {
                 "temperature": 0.1
             },
-            "prompt_path": "./common/prompts/llama_70b/"
+            "prompt_path": "./app/prompts/llama_70b/"
         }
     }
 }
@@ -724,7 +732,7 @@ Example configuration for a model on Hugging Face with a serverless endpoint is
             "model_kwargs": {
                 "temperature": 0.1
             },
-            "prompt_path": "./common/prompts/openai_gpt4/"
+            "prompt_path": "./app/prompts/openai_gpt4/"
         }
     }
 }
diff --git a/licenses/pymupdf4llm-AGPL-3.0.txt b/licenses/pymupdf4llm-AGPL-3.0.txt
new file mode 100644
index 0000000..0ad25db
--- /dev/null
+++ b/licenses/pymupdf4llm-AGPL-3.0.txt
@@ -0,0 +1,661 @@
+                    GNU AFFERO GENERAL PUBLIC LICENSE
+                       Version 3, 19 November 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The GNU Affero General Public License is a free, copyleft license for
+software and other kinds of works, specifically designed to ensure
+cooperation with the community in the case of network server software.
+
+  The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works.  By contrast,
+our General Public Licenses are intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+  Developers that use our General Public Licenses protect your rights
+with two steps: (1) assert copyright on the software, and (2) offer
+you this License which gives you legal permission to copy, distribute
+and/or modify the software.
+
+  A secondary benefit of defending all users' freedom is that
+improvements made in alternate versions of the program, if they
+receive widespread use, become available for other developers to
+incorporate.  Many developers of free software are heartened and
+encouraged by the resulting cooperation.  However, in the case of
+software used on network servers, this result may fail to come about.
+The GNU General Public License permits making a modified version and
+letting the public access it on a server without ever releasing its
+source code to the public.
+
+  The GNU Affero General Public License is designed specifically to
+ensure that, in such cases, the modified source code becomes available
+to the community.  It requires the operator of a network server to
+provide the source code of the modified version running there to the
+users of that server.  Therefore, public use of a modified version, on
+a publicly accessible server, gives the public access to the source
+code of the modified version.
+
+  An older license, called the Affero General Public License and
+published by Affero, was designed to accomplish similar goals.  This is
+a different license, not a version of the Affero GPL, but Affero has
+released a new version of the Affero GPL which permits relicensing under
+this license.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                       TERMS AND CONDITIONS
+
+  0. Definitions.
+
+  "This License" refers to version 3 of the GNU Affero General Public License.
+
+  "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+  "The Program" refers to any copyrightable work licensed under this
+License.  Each licensee is addressed as "you".  "Licensees" and
+"recipients" may be individuals or organizations.
+
+  To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy.  The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+  A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+  To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy.  Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+  To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies.  Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+  An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License.  If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+  1. Source Code.
+
+  The "source code" for a work means the preferred form of the work
+for making modifications to it.  "Object code" means any non-source
+form of a work.
+
+  A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+  The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form.  A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+  The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities.  However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work.  For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+  The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+  The Corresponding Source for a work in source code form is that
+same work.
+
+  2. Basic Permissions.
+
+  All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met.  This License explicitly affirms your unlimited
+permission to run the unmodified Program.  The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work.  This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+  You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force.  You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright.  Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+  Conveying under any other circumstances is permitted solely under
+the conditions stated below.  Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+  No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+  When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+  4. Conveying Verbatim Copies.
+
+  You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+  You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+  5. Conveying Modified Source Versions.
+
+  You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+    a) The work must carry prominent notices stating that you modified
+    it, and giving a relevant date.
+
+    b) The work must carry prominent notices stating that it is
+    released under this License and any conditions added under section
+    7.  This requirement modifies the requirement in section 4 to
+    "keep intact all notices".
+
+    c) You must license the entire work, as a whole, under this
+    License to anyone who comes into possession of a copy.  This
+    License will therefore apply, along with any applicable section 7
+    additional terms, to the whole of the work, and all its parts,
+    regardless of how they are packaged.  This License gives no
+    permission to license the work in any other way, but it does not
+    invalidate such permission if you have separately received it.
+
+    d) If the work has interactive user interfaces, each must display
+    Appropriate Legal Notices; however, if the Program has interactive
+    interfaces that do not display Appropriate Legal Notices, your
+    work need not make them do so.
+
+  A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit.  Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+  6. Conveying Non-Source Forms.
+
+  You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+    a) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by the
+    Corresponding Source fixed on a durable physical medium
+    customarily used for software interchange.
+
+    b) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by a
+    written offer, valid for at least three years and valid for as
+    long as you offer spare parts or customer support for that product
+    model, to give anyone who possesses the object code either (1) a
+    copy of the Corresponding Source for all the software in the
+    product that is covered by this License, on a durable physical
+    medium customarily used for software interchange, for a price no
+    more than your reasonable cost of physically performing this
+    conveying of source, or (2) access to copy the
+    Corresponding Source from a network server at no charge.
+
+    c) Convey individual copies of the object code with a copy of the
+    written offer to provide the Corresponding Source.  This
+    alternative is allowed only occasionally and noncommercially, and
+    only if you received the object code with such an offer, in accord
+    with subsection 6b.
+
+    d) Convey the object code by offering access from a designated
+    place (gratis or for a charge), and offer equivalent access to the
+    Corresponding Source in the same way through the same place at no
+    further charge.  You need not require recipients to copy the
+    Corresponding Source along with the object code.  If the place to
+    copy the object code is a network server, the Corresponding Source
+    may be on a different server (operated by you or a third party)
+    that supports equivalent copying facilities, provided you maintain
+    clear directions next to the object code saying where to find the
+    Corresponding Source.  Regardless of what server hosts the
+    Corresponding Source, you remain obligated to ensure that it is
+    available for as long as needed to satisfy these requirements.
+
+    e) Convey the object code using peer-to-peer transmission, provided
+    you inform other peers where the object code and Corresponding
+    Source of the work are being offered to the general public at no
+    charge under subsection 6d.
+
+  A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+  A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling.  In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage.  For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product.  A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+  "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source.  The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+  If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information.  But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+  The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed.  Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+  Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+  7. Additional Terms.
+
+  "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law.  If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+  When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it.  (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.)  You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+  Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+    a) Disclaiming warranty or limiting liability differently from the
+    terms of sections 15 and 16 of this License; or
+
+    b) Requiring preservation of specified reasonable legal notices or
+    author attributions in that material or in the Appropriate Legal
+    Notices displayed by works containing it; or
+
+    c) Prohibiting misrepresentation of the origin of that material, or
+    requiring that modified versions of such material be marked in
+    reasonable ways as different from the original version; or
+
+    d) Limiting the use for publicity purposes of names of licensors or
+    authors of the material; or
+
+    e) Declining to grant rights under trademark law for use of some
+    trade names, trademarks, or service marks; or
+
+    f) Requiring indemnification of licensors and authors of that
+    material by anyone who conveys the material (or modified versions of
+    it) with contractual assumptions of liability to the recipient, for
+    any liability that these contractual assumptions directly impose on
+    those licensors and authors.
+
+  All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10.  If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term.  If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+  If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+  Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+  8. Termination.
+
+  You may not propagate or modify a covered work except as expressly
+provided under this License.  Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+  However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+  Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+  Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License.  If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+  9. Acceptance Not Required for Having Copies.
+
+  You are not required to accept this License in order to receive or
+run a copy of the Program.  Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance.  However,
+nothing other than this License grants you permission to propagate or
+modify any covered work.  These actions infringe copyright if you do
+not accept this License.  Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+  10. Automatic Licensing of Downstream Recipients.
+
+  Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License.  You are not responsible
+for enforcing compliance by third parties with this License.
+
+  An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations.  If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+  You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License.  For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+  11. Patents.
+
+  A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based.  The
+work thus licensed is called the contributor's "contributor version".
+
+  A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version.  For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+  Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+  In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement).  To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+  If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients.  "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+  If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+  A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License.  You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+  Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+  12. No Surrender of Others' Freedom.
+
+  If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all.  For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+  13. Remote Network Interaction; Use with the GNU General Public License.
+
+  Notwithstanding any other provision of this License, if you modify the
+Program, your modified version must prominently offer all users
+interacting with it remotely through a computer network (if your version
+supports such interaction) an opportunity to receive the Corresponding
+Source of your version by providing access to the Corresponding Source
+from a network server at no charge, through some standard or customary
+means of facilitating copying of software.  This Corresponding Source
+shall include the Corresponding Source for any work covered by version 3
+of the GNU General Public License that is incorporated pursuant to the
+following paragraph.
+
+  Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU General Public License into a single
+combined work, and to convey the resulting work.  The terms of this
+License will continue to apply to the part which is the covered work,
+but the work with which it is combined will remain governed by version
+3 of the GNU General Public License.
+
+  14. Revised Versions of this License.
+
+  The Free Software Foundation may publish revised and/or new versions of
+the GNU Affero General Public License from time to time.  Such new versions
+will be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+  Each version is given a distinguishing version number.  If the
+Program specifies that a certain numbered version of the GNU Affero General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation.  If the Program does not specify a version number of the
+GNU Affero General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+  If the Program specifies that a proxy can decide which future
+versions of the GNU Affero General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+  Later license versions may give you additional or different
+permissions.  However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+  15. Disclaimer of Warranty.
+
+  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. Limitation of Liability.
+
+  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+  17. Interpretation of Sections 15 and 16.
+
+  If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published
+    by the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+  If your software can interact with users remotely through a computer
+network, you should also make sure that it provides a way for users to
+get its source.  For example, if your program is a web application, its
+interface could display a "Source" link that leads users to an archive
+of the code.  There are many ways you could offer source, and different
+solutions will be better for different programs; see section 13 for the
+specific requirements.
+
+  You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU AGPL, see
+<https://www.gnu.org/licenses/>.

From 58a86d11200bdd0f469fbcada816f83f18e04030 Mon Sep 17 00:00:00 2001
From: Prins Kumar <prins.kumar@agivant.com>
Date: Tue, 18 Nov 2025 18:32:55 +0530
Subject: [PATCH 03/20] Update README for OpenAI and Bedrock config, add
 pymupdf4llm license

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 13c88b3..9469ad6 100644
--- a/README.md
+++ b/README.md
@@ -482,7 +482,7 @@ In addition to the `OPENAI_API_KEY`, `llm_model` and `model_name` can be edited
             "model_kwargs": {
                 "temperature": 0
             },
-            "prompt_path": "./common/prompts/openai_gpt4/"
+            "prompt_path": "./app/prompts/openai_gpt4/"
         },
         "multimodal_service": {
             "llm_service": "openai",
@@ -614,7 +614,7 @@ In addition to the `AZURE_OPENAI_ENDPOINT`, `AZURE_OPENAI_API_KEY`, and `azure_d
                 "temperature": 0,
                 "max_tokens": 4096
             },
-            "prompt_path": "./common/prompts/openai_gpt4/"
+            "prompt_path": "./app/prompts/aws_bedrock_claude3haiku/"
         },
         "multimodal_service": {
             "llm_service": "bedrock",

From c20aff8fc3f48a39778d17beb023c1cf0f9e0acb Mon Sep 17 00:00:00 2001
From: Prins Kumar <prins.kumar@agivant.com>
Date: Tue, 18 Nov 2025 22:55:22 +0530
Subject: [PATCH 04/20] Fix prompt_path to use ./common/prompts/ for OpenAI and
 Bedrock

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 9469ad6..13c88b3 100644
--- a/README.md
+++ b/README.md
@@ -482,7 +482,7 @@ In addition to the `OPENAI_API_KEY`, `llm_model` and `model_name` can be edited
             "model_kwargs": {
                 "temperature": 0
             },
-            "prompt_path": "./app/prompts/openai_gpt4/"
+            "prompt_path": "./common/prompts/openai_gpt4/"
         },
         "multimodal_service": {
             "llm_service": "openai",
@@ -614,7 +614,7 @@ In addition to the `AZURE_OPENAI_ENDPOINT`, `AZURE_OPENAI_API_KEY`, and `azure_d
                 "temperature": 0,
                 "max_tokens": 4096
             },
-            "prompt_path": "./app/prompts/aws_bedrock_claude3haiku/"
+            "prompt_path": "./common/prompts/openai_gpt4/"
         },
         "multimodal_service": {
             "llm_service": "bedrock",

From 5a0f87cd9638f78b8c52f9ae1e9c8c5e6fde60fe Mon Sep 17 00:00:00 2001
From: Prins Kumar <prins.kumar@agivant.com>
Date: Fri, 21 Nov 2025 20:57:53 +0530
Subject: [PATCH 05/20] bug fixes

---
 graphrag/app/routers/ui.py          |  1 +
 graphrag/app/supportai/supportai.py | 18 ++++++++++++------
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/graphrag/app/routers/ui.py b/graphrag/app/routers/ui.py
index 9637347..114b489 100644
--- a/graphrag/app/routers/ui.py
+++ b/graphrag/app/routers/ui.py
@@ -395,6 +395,7 @@ async def serve_image_from_vertex(
         LogWriter.info(f"Serving image {image_id} from graph {graphname}")
 
         # Fetch the Image vertex by ID
+        # TigerGraph loading job uses gsql_lower() so all IDs are stored in lowercase
         image_vertices = conn.getVerticesById('Image', [image_id.lower()])
         
         if not image_vertices:
diff --git a/graphrag/app/supportai/supportai.py b/graphrag/app/supportai/supportai.py
index d2efe8a..6b93df0 100644
--- a/graphrag/app/supportai/supportai.py
+++ b/graphrag/app/supportai/supportai.py
@@ -337,9 +337,9 @@ def create_ingest(
     conn: TigerGraphConnection,
 ):
     # Check for invalid combination of multi format and non-s3 data source
-    if ingest_config.data_source.lower() in ["bda", "server"] and ingest_config.get("file_format", "").lower() != "multi":
-        logger.warning(f"File format {ingest_config.get('file_format', '').lower()} is not supported for data source {ingest_config.data_source.lower()}")
-        ingest_config["file_format"] = "multi"
+    if ingest_config.data_source.lower() in ["bda", "server"] and ingest_config.file_format.lower() != "multi":
+        logger.warning(f"File format {ingest_config.file_format.lower()} is not supported for data source {ingest_config.data_source.lower()}")
+        ingest_config.file_format = "multi"
 
     res_ingest_config = {"data_source": ingest_config.data_source.lower()}
     res_ingest_config["file_format"] = ingest_config.file_format.lower()
@@ -481,9 +481,9 @@ def create_ingest(
         except Exception as e:
             raise Exception(f"Error during Amazon BDA preprocessing: {e}")
     elif ingest_config.data_source.lower() == "server":
-        data_path = ingest_config.data_source_config.get("data_path", None)
+        data_path = ingest_config.data_source_config.get("folder_path", None)
         if data_path is None:
-            raise Exception("Data path not provided for server processing")
+            raise Exception("Folder path not provided for server processing")
         try:
             extractor = TextExtractor()
             server_processing_result = extractor.process_folder(data_path, graphname=graphname)
@@ -652,7 +652,10 @@ def ingest(
                 data_source_id = ingest_config.get("data_source_id", "DocumentContent")
                 if ingest_config.get("server_jobs"):
                     for doc_data in ingest_config.get("server_jobs"):
-                        if not doc_data.get("doc_id") or not doc_data.get("content"):
+                        if not doc_data.get("doc_id"):
+                            continue
+                        # Skip documents with neither content nor image_data
+                        if not doc_data.get("content") and not doc_data.get("image_data"):
                             continue
                         if doc_data.get("image_data"):
                             payload = {
@@ -660,8 +663,11 @@ def ingest(
                                 "doc_type": "image",
                                 "image_data": doc_data.get("image_data", ""),
                                 "image_format": doc_data.get("image_format", "jpg"),
+                                "image_description": doc_data.get("image_description", ""),
                                 "parent_doc": doc_data.get("parent_doc", ""),
                                 "page_number": doc_data.get("page_number", 0),
+                                "width": doc_data.get("width", 0),
+                                "height": doc_data.get("height", 0),
                                 "position": doc_data.get("position", 0),
                                 "content": ""
                             }

From 3bfe5c1ddf10eb650e6fd9729703023aba0a21d4 Mon Sep 17 00:00:00 2001
From: Prins Kumar <prins.kumar@agivant.com>
Date: Mon, 24 Nov 2025 16:01:07 +0530
Subject: [PATCH 06/20] Fix PDF extractions

---
 common/utils/text_extractors.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/common/utils/text_extractors.py b/common/utils/text_extractors.py
index b900cae..21dc2ff 100644
--- a/common/utils/text_extractors.py
+++ b/common/utils/text_extractors.py
@@ -207,9 +207,9 @@ def _extract_pdf_with_images_as_docs(file_path, base_doc_id, graphname=None):
                 file_path,
                 write_images=True,
                 image_path=str(image_output_folder),  # unique folder per PDF
-                force_text=False,
                 margins=0,
                 image_size_limit=0.08,
+                table_strategy="lines"
             )
         except Exception as e:
             logger.error(f"pymupdf4llm failed for {file_path}: {e}")

From a660bb72999a55f449c09f35278060bc14217571 Mon Sep 17 00:00:00 2001
From: Prins Kumar <prins.kumar@agivant.com>
Date: Mon, 24 Nov 2025 20:03:54 +0530
Subject: [PATCH 07/20] Fix PDF extraction threading issue: add lock for
 pymupdf4llm (not thread-safe)

---
 common/utils/text_extractors.py | 59 +++++++++++++++++++++------------
 1 file changed, 38 insertions(+), 21 deletions(-)

diff --git a/common/utils/text_extractors.py b/common/utils/text_extractors.py
index 21dc2ff..ec5b140 100644
--- a/common/utils/text_extractors.py
+++ b/common/utils/text_extractors.py
@@ -8,6 +8,7 @@
 import uuid
 import base64
 import io
+import threading
 from pathlib import Path
 import shutil
 import asyncio
@@ -15,6 +16,9 @@
 
 logger = logging.getLogger(__name__)
 
+# Global lock for pymupdf4llm calls (not thread-safe)
+_pymupdf4llm_lock = threading.Lock()
+
 
 class TextExtractor:
     """Class for handling text extraction from various file formats and cleanup."""
@@ -202,26 +206,39 @@ def _extract_pdf_with_images_as_docs(file_path, base_doc_id, graphname=None):
             shutil.rmtree(image_output_folder, ignore_errors=True)
 
         # Convert PDF to markdown with extracted image files
-        try:
-            markdown_content = pymupdf4llm.to_markdown(
-                file_path,
-                write_images=True,
-                image_path=str(image_output_folder),  # unique folder per PDF
-                margins=0,
-                image_size_limit=0.08,
-                table_strategy="lines"
-            )
-        except Exception as e:
-            logger.error(f"pymupdf4llm failed for {file_path}: {e}")
-            # Cleanup folder if it was created
-            if image_output_folder.exists():
-                shutil.rmtree(image_output_folder, ignore_errors=True)
-            return [{
-                "doc_id": base_doc_id,
-                "doc_type": "markdown",
-                "content": f"[PDF extraction failed: {e}]",
-                "position": 0
-            }]
+        # Use lock because pymupdf4llm's table extraction is not thread-safe
+        # See: https://github.com/pymupdf/PyMuPDF/issues/3241
+        with _pymupdf4llm_lock:
+            try:
+                markdown_content = pymupdf4llm.to_markdown(
+                    file_path,
+                    write_images=True,
+                    image_path=str(image_output_folder),  # unique folder per PDF
+                    margins=0,
+                    image_size_limit=0.08,
+                )
+            except Exception:
+                # Retry with table_strategy="lines" if first attempt fails
+                try:
+                    markdown_content = pymupdf4llm.to_markdown(
+                        file_path,
+                        write_images=True,
+                        image_path=str(image_output_folder),  # unique folder per PDF
+                        margins=0,
+                        image_size_limit=0.08,
+                        table_strategy="lines",
+                    )
+                except Exception as e:
+                    logger.error(f"pymupdf4llm failed for {file_path}: {e}")
+                    # Cleanup folder if it was created
+                    if image_output_folder.exists():
+                        shutil.rmtree(image_output_folder, ignore_errors=True)
+                    return [{
+                        "doc_id": base_doc_id,
+                        "doc_type": "markdown",
+                        "content": f"[PDF extraction failed: {e}]",
+                        "position": 0
+                    }]
 
         if not markdown_content or not markdown_content.strip():
             logger.warning(f"No content extracted from PDF: {file_path}")
@@ -461,4 +478,4 @@ def get_supported_extensions():
 def is_supported_file(file_path):
     """Check if a file is supported for text extraction."""
     extension = Path(file_path).suffix.lower()
-    return extension in get_supported_extensions()
+    return extension in get_supported_extensions()
\ No newline at end of file

From 27438590a99c916b9275fc329d54519517ee56f3 Mon Sep 17 00:00:00 2001
From: Prins Kumar <prins.kumar@agivant.com>
Date: Mon, 17 Nov 2025 17:07:34 +0530
Subject: [PATCH 08/20] Add S3 Bedrock BDA ingestion support with user
 confirmation and pymupdf4llm integration

---
 common/requirements.txt              |   3 +-
 common/utils/image_data_extractor.py | 163 +++---------
 common/utils/markdown_parsing.py     |  63 +++++
 common/utils/text_extractors.py      | 254 +++++++++---------
 graphrag-ui/src/pages/Setup.tsx      | 370 +++++++++++++--------------
 5 files changed, 403 insertions(+), 450 deletions(-)
 create mode 100644 common/utils/markdown_parsing.py

diff --git a/common/requirements.txt b/common/requirements.txt
index 562c2f6..f0022f3 100644
--- a/common/requirements.txt
+++ b/common/requirements.txt
@@ -110,7 +110,8 @@ packaging==24.2
 pandas==2.2.3
 #pathtools==0.1.2
 pillow==11.2.1
-PyMuPDF==1.26.4
+#PyMuPDF==1.26.4
+pymupdf4llm==0.2.0
 platformdirs==4.3.8
 pluggy==1.6.0
 prometheus_client==0.22.1
diff --git a/common/utils/image_data_extractor.py b/common/utils/image_data_extractor.py
index bde9c97..74e8d2f 100644
--- a/common/utils/image_data_extractor.py
+++ b/common/utils/image_data_extractor.py
@@ -11,155 +11,54 @@
 
 logger = logging.getLogger(__name__)
 
-
-
-def describe_image_with_llm(image_input):
+def describe_image_with_llm(file_path):
     """
-    Send image (pixmap or PIL image) to LLM vision model and return description.
-    Uses multimodal_service from config if available, otherwise falls back to completion_service.
-    Currently supports: OpenAI, Azure OpenAI, Google GenAI, and Google VertexAI
+    Read image file and convert to base64 to send to LLM.
     """
     try:
+        from PIL import Image as PILImage
+        
         client = get_multimodal_service()
         if not client:
             return "[Image: Failed to create multimodal LLM client]"
-        
+
+        # Read image and convert to base64
+        pil_image = PILImage.open(file_path)
         buffer = io.BytesIO()
-        # Convert to RGB if needed for better compatibility
-        if image_input.mode != 'RGB':
-            image_input = image_input.convert('RGB')
-        image_input.save(buffer, format="JPEG", quality=95)
-        b64_img = base64.b64encode(buffer.getvalue()).decode("utf-8")
+        if pil_image.mode != 'RGB':
+            pil_image = pil_image.convert('RGB')
+        pil_image.save(buffer, format="JPEG", quality=95)
+        image_base64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
 
-        # Build messages (system + human)
         messages = [
-        SystemMessage(
-            content="You are a helpful assistant that describes images concisely for document analysis."
-        ),
-        HumanMessage(
-            content=[
-                {
-                    "type": "text",
-                    "text": (
-                        "Please describe what you see in this image and "
-                        "if the image has scanned text then extract all the text. "
-                        "if the image has any logo, icon, or branding element, try to describe it with text. "
-                        "Focus on any text, diagrams, charts, or other visual elements."
-                        "If the image is purely a logo, icon, or branding element, start your response with 'LOGO:' or 'ICON:'."
-                    ),
-                },
-                 {
-                     "type": "image_url",
-                     "image_url": {"url": f"data:image/jpeg;base64,{b64_img}"},
-                 },
-            ]
-        ),
+            SystemMessage(
+                content="You are a helpful assistant that describes images concisely for document analysis."
+            ),
+            HumanMessage(
+                content=[
+                    {
+                        "type": "text",
+                        "text": (
+                            "Please describe what you see in this image and "
+                            "if the image has scanned text then extract all the text. "
+                            "If the image has any graph, chart, table, or other diagram, describe it. "
+                        ),
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"},
+                    },
+                ],
+            ),
         ]
 
-        # Get response from LangChain LLM client
-        # Access the underlying LangChain client
         langchain_client = client.llm
         response = langchain_client.invoke(messages)
 
-        return response.content if hasattr(response, 'content') else str(response)
+        return response.content if hasattr(response, "content") else str(response)
 
     except Exception as e:
         logger.error(f"Failed to describe image with LLM: {str(e)}")
         return "[Image: Error processing image description]"
 
 
-def save_image_and_get_markdown(image_input, context_info="", graphname=None):
-    """
-    Save image locally to static/images/ folder and return markdown reference with description.
-    
-    LEGACY/OLD APPROACH: Used for backward compatibility with JSONL-based loading.
-    Images are saved as files and served via /ui/images/ endpoint with img:// protocol.
-    
-    For NEW direct loading approach, images are stored in Image vertex as base64
-    and served via /ui/image_vertex/ endpoint with image:// protocol.
-    
-    Args:
-        image_input: PIL Image object
-        context_info: Optional context (e.g., "page 3 of invoice.pdf")
-        graphname: Graph name to organize images by graph (optional)
-    
-    Returns:
-        dict with:
-            - 'markdown': Markdown string with img:// reference
-            - 'image_id': Unique identifier for the saved image
-            - 'image_path': Path where image was saved to static/images/
-    """
-    try:
-        # FIRST: Get description from LLM to check if it's a logo
-        description = describe_image_with_llm(image_input)
-        
-        # Check if the image is a logo, icon, or decorative element BEFORE saving
-        # These should be filtered out as they're not content-relevant
-        description_lower = description.lower()
-        logo_indicators = ['logo', 'icon', 'branding', 'watermark', 'trademark', 'company logo', 'brand logo']
-        
-        if any(indicator in description_lower for indicator in logo_indicators):
-            logger.info(f"Detected logo/icon in image, skipping: {description[:100]}")
-            return None
-        
-        # If not a logo, proceed with saving the image
-        # Generate unique image ID using hash of image content
-        buffer = io.BytesIO()
-        if image_input.mode != 'RGB':
-            image_input = image_input.convert('RGB')
-        image_input.save(buffer, format="JPEG", quality=95)
-        image_bytes = buffer.getvalue()
-        
-        # Create hash-based ID (deterministic for same image)
-        image_hash = hashlib.sha256(image_bytes).hexdigest()[:16]
-        image_id = f"{image_hash}.jpg"
-        
-        # Save image to local storage directory organized by graphname
-        project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-        
-        # If graphname is provided, organize images by graph
-        if graphname:
-            images_dir = os.path.join(project_root, "static", "images", graphname)
-            # Include graphname in the image reference for URL construction
-            image_reference = f"{graphname}/{image_id}"
-        else:
-            images_dir = os.path.join(project_root, "static", "images")
-            image_reference = image_id
-        
-        os.makedirs(images_dir, exist_ok=True)
-        
-        image_path = os.path.join(images_dir, image_id)
-        
-        # Save image file (skip if already exists with same hash)
-        if not os.path.exists(image_path):
-            with open(image_path, 'wb') as f:
-                f.write(image_bytes)
-            logger.info(f"Saved content image to: {image_path}")
-        else:
-            logger.debug(f"Image already exists: {image_path}")
-        
-        # Generate markdown with custom img:// protocol (will be replaced later)
-        # Format: ![description](img://graphname/image_id) or ![description](img://image_id)
-        markdown = f"![{description}](img://{image_reference})"
-        
-        logger.info(f"Created image reference: {image_reference} with description")
-        
-        return {
-            'markdown': markdown,
-            'image_id': image_reference,
-            'image_path': image_path,
-            'description': description
-        }
-        
-    except Exception as e:
-        logger.error(f"Failed to save image and generate markdown: {str(e)}")
-        # Fallback to text description only
-        fallback_desc = f"[Image: {context_info} - processing failed]"
-        return {
-            'markdown': fallback_desc,
-            'image_id': None,
-            'image_path': None,
-            'description': fallback_desc
-        }
-
-
diff --git a/common/utils/markdown_parsing.py b/common/utils/markdown_parsing.py
new file mode 100644
index 0000000..7c8c476
--- /dev/null
+++ b/common/utils/markdown_parsing.py
@@ -0,0 +1,63 @@
+import re
+import os
+import pymupdf4llm
+
+class MarkdownProcessor:
+    """
+    A helper class to extract markdown image entries and
+    update descriptions based on image_id.
+    """
+
+    # regex for markdown images: ![alt](path)
+    _pattern = re.compile(r'!\[([^\]]*)\]\(([^)\s]+)\)')
+
+    @classmethod
+    def extract_images(cls, md_text):
+        """
+        Returns list of {"path": path, "image_id": image_id}
+        image_id = basename without extension
+        """
+        images = []
+        for m in cls._pattern.finditer(md_text):
+            path = m.group(2)
+            basename = os.path.basename(path)
+            image_id = os.path.splitext(basename)[0]
+            images.append({"path": path, "image_id": image_id})
+        return images
+
+    @classmethod
+    def insert_description_by_id(cls, md_text, image_id, description):
+        """
+        Replace the description for an image whose basename == image_id.
+        """
+
+        def repl(m):
+            old_path = m.group(2)
+            candidate_id = os.path.splitext(os.path.basename(old_path))[0]
+
+            if candidate_id == image_id:
+                # Insert new description
+                return f'![{description}]({old_path})'
+
+            return m.group(0)
+
+        return cls._pattern.sub(repl, md_text)
+
+    @classmethod
+    def replace_path_with_tg_protocol(cls, md_text, image_id, tg_reference):
+        """
+        Replace the file path for an image whose basename == image_id with tg:// protocol reference.
+        tg_reference should be like 'Graphs_image_1'
+        """
+        def repl(m):
+            old_path = m.group(2)
+            candidate_id = os.path.splitext(os.path.basename(old_path))[0]
+
+            if candidate_id == image_id:
+                # Replace path with tg:// protocol reference
+                alt_text = m.group(1)
+                return f'![{alt_text}](tg://{tg_reference})'
+
+            return m.group(0)
+
+        return cls._pattern.sub(repl, md_text)
\ No newline at end of file
diff --git a/common/utils/text_extractors.py b/common/utils/text_extractors.py
index da3e22d..b900cae 100644
--- a/common/utils/text_extractors.py
+++ b/common/utils/text_extractors.py
@@ -183,137 +183,154 @@ def extract_text_from_file_with_images_as_docs(file_path, graphname=None):
 
 def _extract_pdf_with_images_as_docs(file_path, base_doc_id, graphname=None):
     """
-    Extract PDF as ONE markdown document with inline image references.
+    Extract PDF as ONE markdown document with inline image references using pymupdf4llm.
+    Uses unique temporary folder per PDF to allow parallel processing.
+    After processing, delete the extracted image folder.
     """
+    # Use unique folder per PDF to allow parallel processing without conflicts
+    unique_folder_id = uuid.uuid4().hex[:12]
+    image_output_folder = Path(f"tg_temp_{unique_folder_id}")
+
     try:
-        import fitz  # PyMuPDF
+        import pymupdf4llm
         from PIL import Image as PILImage
+        from common.utils.image_data_extractor import describe_image_with_llm
+        from common.utils.markdown_parsing import MarkdownProcessor
+
+        # Ensure clean slate - remove folder if it exists from failed previous run
+        if image_output_folder.exists():
+            shutil.rmtree(image_output_folder, ignore_errors=True)
+
+        # Convert PDF to markdown with extracted image files
+        try:
+            markdown_content = pymupdf4llm.to_markdown(
+                file_path,
+                write_images=True,
+                image_path=str(image_output_folder),  # unique folder per PDF
+                force_text=False,
+                margins=0,
+                image_size_limit=0.08,
+            )
+        except Exception as e:
+            logger.error(f"pymupdf4llm failed for {file_path}: {e}")
+            # Cleanup folder if it was created
+            if image_output_folder.exists():
+                shutil.rmtree(image_output_folder, ignore_errors=True)
+            return [{
+                "doc_id": base_doc_id,
+                "doc_type": "markdown",
+                "content": f"[PDF extraction failed: {e}]",
+                "position": 0
+            }]
+
+        if not markdown_content or not markdown_content.strip():
+            logger.warning(f"No content extracted from PDF: {file_path}")
+
+        # Extract image references from markdown
+        image_refs = MarkdownProcessor.extract_images(markdown_content)
+
+        if not image_refs:
+            # cleanup folder anyway
+            if image_output_folder.exists():
+                shutil.rmtree(image_output_folder, ignore_errors=True)
+
+            return [{
+                "doc_id": base_doc_id,
+                "doc_type": "markdown",
+                "content": markdown_content,
+                "position": 0
+            }]
 
-        doc = fitz.open(file_path)
-        markdown_parts = []
         image_entries = []
         image_counter = 0
 
-        for page_num, page in enumerate(doc, start=1):
-            if page_num > 1:
-                markdown_parts.append("\n\n")
-            markdown_parts.append(f"--- Page {page_num} ---\n") #Avoid to be splitted as a single chunk
-
-            blocks = page.get_text("blocks", sort=True)
-            text_blocks_with_pos = []
-
-            for block in blocks:
-                block_type = block[6] if len(block) > 6 else 0
-                if block_type == 0:
-                    text = block[4].strip()
-                    if text:
-                        y_pos = block[1]
-                        text_blocks_with_pos.append({'type': 'text', 'content': text, 'y_pos': y_pos})
-
-            image_list = page.get_images(full=True)
-            images_with_pos = []
-
-            if image_list:
-                for img_index, img_info in enumerate(image_list):
-                    try:
-                        xref = img_info[0]
-                        base_image = doc.extract_image(xref)
-                        image_bytes = base_image["image"]
-                        image_ext = base_image["ext"]
-
-                        img_rects = page.get_image_rects(xref)
-                        y_pos = img_rects[0].y0 if img_rects else 999999
-
-                        pil_image = PILImage.open(io.BytesIO(image_bytes))
-                        if pil_image.width < 100 or pil_image.height < 100:
-                            continue
-
-                        from common.utils.image_data_extractor import describe_image_with_llm
-                        description = describe_image_with_llm(pil_image)
-                        description_lower = description.lower()
-                        logo_indicators = [
-                            'logo:', 'icon:', 'logo', 'icon', 'branding',
-                            'watermark', 'trademark', 'stylized letter',
-                            'stylized text', 'word "', "word '"
-                        ]
-                        if any(indicator in description_lower for indicator in logo_indicators):
-                            continue
-
-                        buffer = io.BytesIO()
-                        if pil_image.mode != 'RGB':
-                            pil_image = pil_image.convert('RGB')
-                        pil_image.save(buffer, format="JPEG", quality=95)
-                        image_base64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
-
-                        image_counter += 1
-                        image_doc_id = f"{base_doc_id}_image_{image_counter}"
-
-                        images_with_pos.append({
-                            'type': 'image',
-                            'image_doc_id': image_doc_id,
-                            'description': description,
-                            'y_pos': y_pos,
-                            'image_data': image_base64,
-                            'image_format': image_ext,
-                            'width': pil_image.width,
-                            'height': pil_image.height
-                        })
-                    except Exception as img_error:
-                        logger.warning(f"Failed to extract image on page {page_num}: {img_error}")
-
-            all_elements = text_blocks_with_pos + images_with_pos
-            all_elements.sort(key=lambda x: x['y_pos'])
-
-            for element in all_elements:
-                if element['type'] == 'text':
-                    markdown_parts.append(element['content'])
-                    markdown_parts.append("\n\n")
-                else:
-                    # Add image description as text, then markdown image reference
-                    # Use short alt text in markdown, full description as regular text
-                    markdown_parts.append(f"![{element['description']}](tg://{element['image_doc_id']})\n\n")
-
-                    image_entries.append({
-                        "doc_id": element['image_doc_id'],
-                        "doc_type": "image",
-                        "image_description": element['description'],
-                        "image_data": element['image_data'],
-                        "image_format": element['image_format'],
-                        "parent_doc": base_doc_id,
-                        "page_number": page_num,
-                        "width": element['width'],
-                        "height": element['height'],
-                        "position": int(element['image_doc_id'].split('_')[-1])
-                    })
-
-        doc.close()
-
-        markdown_content = "".join(markdown_parts) if markdown_parts else "" #No content extracted from PDF
-        if not markdown_content:
-            return []
+        for img_ref in image_refs:
+            try:
+                img_path = Path(img_ref["path"])  # convert to Path
+                image_id = img_ref["image_id"]
+
+                # Image description
+                description = describe_image_with_llm(str(img_path))
+
+                markdown_content = MarkdownProcessor.insert_description_by_id(
+                    markdown_content,
+                    image_id,
+                    description
+                )
+
+                # Convert image to base64
+                pil_image = PILImage.open(img_path)
+                buffer = io.BytesIO()
+
+                if pil_image.mode != "RGB":
+                    pil_image = pil_image.convert("RGB")
+
+                pil_image.save(buffer, format="JPEG", quality=95)
+                image_base64 = base64.b64encode(buffer.getvalue()).decode("utf-8")
+
+                image_counter += 1
+                image_doc_id = f"{base_doc_id}_image_{image_counter}"
+
+                # Replace file path with tg:// protocol reference in markdown
+                markdown_content = MarkdownProcessor.replace_path_with_tg_protocol(
+                    markdown_content,
+                    image_id,
+                    image_doc_id
+                )
+
+                image_entries.append({
+                    "doc_id": image_doc_id,
+                    "doc_type": "image",
+                    "image_description": description,
+                    "image_data": image_base64,
+                    "image_format": "jpg",
+                    "parent_doc": base_doc_id,
+                    "page_number": 0,
+                    "width": pil_image.width,
+                    "height": pil_image.height,
+                    "position": image_counter
+                })
+
+            except Exception as img_error:
+                logger.warning(f"Failed to process image {img_ref.get('path')}: {img_error}")
+
+        # FINAL CLEANUP — delete folder after processing everything
+        if image_output_folder.exists() and image_output_folder.is_dir():
+            try:
+                shutil.rmtree(image_output_folder)
+                logger.debug(f"Deleted image folder: {image_output_folder}")
+            except Exception as delete_err:
+                logger.warning(f"Failed to delete folder {image_output_folder}: {delete_err}")
 
+        # Build final result
         result = [{
             "doc_id": base_doc_id,
-            "doc_type": "",
+            "doc_type": "markdown",
             "content": markdown_content,
             "position": 0
         }]
         result.extend(image_entries)
+
         return result
 
-    except ImportError:
-        logger.error("PyMuPDF not available")
+    except ImportError as import_err:
+        logger.error(f"Required library missing: {import_err}")
+        # Cleanup on import error
+        if image_output_folder.exists():
+            shutil.rmtree(image_output_folder, ignore_errors=True)
         return [{
             "doc_id": base_doc_id,
-            "doc_type": "",
-            "content": "[PDF extraction requires PyMuPDF]",
+            "doc_type": "markdown",
+            "content": "[PDF extraction requires pymupdf4llm and PyMuPDF]",
             "position": 0
         }]
     except Exception as e:
         logger.error(f"Error extracting PDF: {e}")
+        # Cleanup on any other error
+        if image_output_folder.exists():
+            shutil.rmtree(image_output_folder, ignore_errors=True)
         raise
 
-
 def _extract_standalone_image_as_doc(file_path, base_doc_id, graphname=None):
     """
     Extract standalone image file as ONE markdown document with inline image reference.
@@ -324,25 +341,15 @@ def _extract_standalone_image_as_doc(file_path, base_doc_id, graphname=None):
 
         pil_image = PILImage.open(file_path)
         if pil_image.width < 100 or pil_image.height < 100:
-            return [{
-                "doc_id": base_doc_id,
-                "doc_type": "",
-                "content": f"[Skipped small image: {file_path.name}]",
-                "position": 0
-            }]
+            pass
 
-        description = describe_image_with_llm(pil_image)
+        description = describe_image_with_llm(str(Path(file_path).absolute()))
         description_lower = description.lower()
         logo_indicators = ['logo:', 'icon:', 'logo', 'icon', 'branding',
                            'watermark', 'trademark', 'stylized letter',
                            'stylized text', 'word "', "word '"]
         if any(indicator in description_lower for indicator in logo_indicators):
-            return [{
-                "doc_id": base_doc_id,
-                "doc_type": "",
-                "content": f"[Skipped logo/icon: {file_path.name}]",
-                "position": 0
-            }]
+            return []
 
         buffer = io.BytesIO()
         if pil_image.mode != 'RGB':
@@ -353,7 +360,6 @@ def _extract_standalone_image_as_doc(file_path, base_doc_id, graphname=None):
         image_id = f"{base_doc_id}_image_1"
         # Put description as text, then markdown image reference with short alt text
         content = f"![{description}](tg://{image_id})"
-
         return [
             {
                 "doc_id": base_doc_id,
@@ -379,7 +385,7 @@ def _extract_standalone_image_as_doc(file_path, base_doc_id, graphname=None):
         logger.error(f"Error extracting image: {e}")
         return [{
             "doc_id": base_doc_id,
-            "doc_type": "",
+            "doc_type": "markdown",
             "content": f"[Image extraction failed: {str(e)}]",
             "position": 0
         }]
@@ -441,12 +447,10 @@ def get_doc_type_from_extension(extension):
 
     if extension in ['.html', '.htm']:
         return 'html'
-    elif extension in ['.md']:
-        return 'markdown'
     elif extension in ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp']:
         return 'image'
     else:
-        return ''
+        return 'markdown'
 
 
 def get_supported_extensions():
diff --git a/graphrag-ui/src/pages/Setup.tsx b/graphrag-ui/src/pages/Setup.tsx
index b7d357d..2aaee99 100644
--- a/graphrag-ui/src/pages/Setup.tsx
+++ b/graphrag-ui/src/pages/Setup.tsx
@@ -2,7 +2,7 @@ import React, { useState, useEffect } from "react";
 import { useNavigate } from "react-router-dom";
 import { Button } from "@/components/ui/button";
 import { Input } from "@/components/ui/input";
-import { Database, Upload, RefreshCw, Loader2, Trash2, FolderUp, Cloud, ArrowLeft, CloudDownload, CloudLightning } from "lucide-react";
+import { Database, Upload, RefreshCw, Loader2, Trash2, FolderUp, Cloud, ArrowLeft, CloudDownload, CloudCog } from "lucide-react";
 import {
   Dialog,
   DialogContent,
@@ -56,7 +56,6 @@ const Setup = () => {
   const [uploadMessage, setUploadMessage] = useState("");
   const [isIngesting, setIsIngesting] = useState(false);
   const [ingestMessage, setIngestMessage] = useState("");
-  const [activeTab, setActiveTab] = useState("upload");
 
   // Refresh state
   const [refreshOpen, setRefreshOpen] = useState(false);
@@ -67,12 +66,13 @@ const Setup = () => {
   const [isCheckingStatus, setIsCheckingStatus] = useState(false);
   
   // S3 state
+  const [fileFormat, setFileFormat] = useState<"json" | "multi">("json");
   const [awsAccessKey, setAwsAccessKey] = useState("");
   const [awsSecretKey, setAwsSecretKey] = useState("");
+  const [dataPath, setDataPath] = useState("");
   const [inputBucket, setInputBucket] = useState("");
   const [outputBucket, setOutputBucket] = useState("");
   const [regionName, setRegionName] = useState("");
-  const [skipBDAProcessing, setSkipBDAProcessing] = useState(false);
 
   // Cloud Download state
   const [cloudProvider, setCloudProvider] = useState<"s3" | "gcs" | "azure">("s3");
@@ -458,7 +458,7 @@ const Setup = () => {
       }
 
       const createData = await createResponse.json();
-      //console.log("Create ingest response:", createData);
+      console.log("Create ingest response:", createData);
 
       // Step 2: Run ingest
       setIngestMessage("Step 2/2: Running document ingest...");
@@ -484,7 +484,7 @@ const Setup = () => {
       }
 
       const ingestData = await ingestResponse.json();
-      //console.log("Ingest response:", ingestData);
+      console.log("Ingest response:", ingestData);
 
       setIngestMessage(`✅ Data ingested successfully! Processed documents from ${folderPath}/`);
     } catch (error: any) {
@@ -495,8 +495,8 @@ const Setup = () => {
     }
   };
 
-  // Ingest files from S3 with Amazon BDA
-  const handleAmazonBDAIngest = async () => {
+  // Ingest files from S3 with Bedrock BDA
+  const handleS3BedrockIngest = async () => {
     if (!ingestGraphName) {
       setIngestMessage("Please select a graph");
       return;
@@ -508,112 +508,92 @@ const Setup = () => {
       return;
     }
 
-    if (skipBDAProcessing) {
-      // When skipping BDA, only output bucket and region are required
-      if (!outputBucket || !regionName) {
-        setIngestMessage("❌ Please provide Output Bucket and Region Name");
-        return;
-      }
-    } else {
-      // When using BDA, all fields are required
+    if (fileFormat === "multi") {
       if (!inputBucket || !outputBucket || !regionName) {
         setIngestMessage("❌ Please provide Input Bucket, Output Bucket, and Region Name");
         return;
       }
-    }
 
-    // Ask for confirmation
-    const confirmMessage = skipBDAProcessing
-      ? `You're skipping Amazon BDA processing and will ingest directly from the output bucket (${outputBucket}). Please confirm to proceed.`
-      : `You're using Amazon BDA for multimodal document processing. This will trigger Amazon BDA to process your documents from the input bucket (${inputBucket}) and store the results in the output bucket (${outputBucket}) and then ingest them into your knowledge graph. Please confirm to proceed.`;
-    
-    const shouldProceed = await confirm(confirmMessage);
-    if (!shouldProceed) {
-      setIngestMessage("Operation cancelled by user.");
-      return;
+      // Ask for confirmation if using Bedrock (multi format)
+      const shouldProceed = await confirm(
+        `Are you using AWS Bedrock for multimodal document processing? This will trigger AWS Bedrock BDA to process your documents from the input bucket (${inputBucket}) and store the results in the output bucket (${outputBucket}).`
+      );
+      if (!shouldProceed) {
+        setIngestMessage("Operation cancelled by user.");
+        return;
+      }
+    } else if (fileFormat === "json") {
+      if (!dataPath) {
+        setIngestMessage("❌ Please provide Data Path (e.g., s3://bucket-name/path/to/data)");
+        return;
+      }
     }
 
     setIsIngesting(true);
+    setIngestMessage("Step 1/2: Creating ingest job...");
 
     try {
       const creds = localStorage.getItem("creds");
-      let loadingInfo: any = {};
 
-      if (skipBDAProcessing) {
-        // Skip BDA processing - create ingest job that reads directly from output bucket
-        const runIngestConfig: any = {
-          data_source: "bda",
+      // Step 1: Create ingest job
+      const createIngestConfig: any = {
+        data_source: "s3",
+        data_source_config: {
           aws_access_key: awsAccessKey,
           aws_secret_key: awsSecretKey,
-          output_bucket: outputBucket,
-          region_name: regionName,
-          bda_jobs:[],
-          loader_config: {
-            doc_id_field: "doc_id",
-            content_field: "content",
-            doc_type: "markdown",
-          },
-          file_format: "multi"
-        };
-
-        setIngestMessage("Step 1/2: Creating ingest job from output bucket...");
-
-        // Run ingest directly
-        loadingInfo = {
-          load_job_id: "load_documents_content_json",
-          data_source_id: runIngestConfig,
-          file_path: outputBucket,
-        };
-        setIngestMessage(`Step 2/2: Running document ingestion for all files in ${outputBucket}...`);
-      } else {
-        // Step 1: Create ingest job with BDA processing
-        const createIngestConfig: any = {
-          data_source: "bda",
-          data_source_config: {
-            aws_access_key: awsAccessKey,
-            aws_secret_key: awsSecretKey,
-            input_bucket: inputBucket,
-            output_bucket: outputBucket,
-            region_name: regionName,
-          },
-          loader_config: {
-            doc_id_field: "doc_id",
-            content_field: "content",
-            doc_type: "markdown",
-          },
-          file_format: "multi"
-        };
+        },
+        loader_config: {
+          doc_id_field: "doc_id",
+          content_field: "content",
+          doc_type: fileFormat === "multi" ? "markdown" : "",
+        },
+        file_format: fileFormat
+      };
 
-        setIngestMessage("Step 1/2: Triggering Amazon BDA processing and creating ingest job...");
+      // Add format-specific configuration
+      if (fileFormat === "multi") {
+        createIngestConfig.data_source_config.input_bucket = inputBucket;
+        createIngestConfig.data_source_config.output_bucket = outputBucket;
+        createIngestConfig.data_source_config.region_name = regionName;
+        setIngestMessage("Step 1/2: Creating ingest job and triggering AWS Bedrock BDA processing...");
+      } else if (fileFormat === "json") {
+        createIngestConfig.loader_config.doc_id_field = "url";
+      }
 
-        const createResponse = await fetch(`/ui/${ingestGraphName}/create_ingest`, {
-          method: "POST",
-          headers: {
-            "Content-Type": "application/json",
-            Authorization: `Basic ${creds}`,
-          },
-          body: JSON.stringify(createIngestConfig),
-        });
+      const createResponse = await fetch(`/ui/${ingestGraphName}/create_ingest`, {
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          Authorization: `Basic ${creds}`,
+        },
+        body: JSON.stringify(createIngestConfig),
+      });
 
-        if (!createResponse.ok) {
-          const errorData = await createResponse.json();
-          throw new Error(errorData.detail || `Failed to create ingest job: ${createResponse.statusText}`);
-        }
+      if (!createResponse.ok) {
+        const errorData = await createResponse.json();
+        throw new Error(errorData.detail || `Failed to create ingest job: ${createResponse.statusText}`);
+      }
 
-        const createData = await createResponse.json();
-        //console.log("Create ingest response:", createData);
+      const createData = await createResponse.json();
+      console.log("Create ingest response:", createData);
 
-        // Step 2: Run ingest
-        loadingInfo = {
-          load_job_id: createData.load_job_id,
-          data_source_id: createData.data_source_id,
-          file_path: outputBucket,
-        };
+      // Step 2: Run ingest
+      setIngestMessage("Step 2/2: Running document ingest...");
 
-        const filesToIngest = createData.data_source_id.bda_jobs.map((job: any) => job.jobId.split("/")[-1]);
-        setIngestMessage(`Step 2/2: Running document ingest for ${filesToIngest.length} files in ${outputBucket}...`);
+      // Determine file path based on format
+      let filePath = "";
+      if (fileFormat === "multi") {
+        filePath = outputBucket; // For multi format, use output bucket
+      } else if (fileFormat === "json") {
+        filePath = dataPath; // For json format, use the provided data path
       }
 
+      const loadingInfo = {
+        load_job_id: createData.load_job_id,
+        data_source_id: createData.data_source_id,
+        file_path: filePath,
+      };
+
       const ingestResponse = await fetch(`/ui/${ingestGraphName}/ingest`, {
         method: "POST",
         headers: {
@@ -629,13 +609,15 @@ const Setup = () => {
       }
 
       const ingestData = await ingestResponse.json();
-      //console.log("Ingest response:", ingestData);
-      const filesIngested = ingestData.summary.map((file: any) => file.file_path);
-
-      setIngestMessage(`✅ Document ingestion completed successfully! Ingested ${filesIngested.length} into your knowledge graph.`);
+      console.log("Ingest response:", ingestData);
 
+      if (fileFormat === "multi") {
+        setIngestMessage(`✅ Data ingested successfully! AWS Bedrock BDA processed documents from ${inputBucket} and loaded results from ${outputBucket}.`);
+      } else {
+        setIngestMessage(`✅ Data ingested successfully! Processed documents from ${dataPath}.`);
+      }
     } catch (error: any) {
-      console.error("Error ingesting files:", error);
+      console.error("Error ingesting S3 data:", error);
       setIngestMessage(`❌ Error: ${error.message}`);
     } finally {
       setIsIngesting(false);
@@ -1121,8 +1103,8 @@ const Setup = () => {
               <label className="block text-sm font-medium mb-2 text-black dark:text-white">
                 Target Graph Name
               </label>
-              <Select value={ingestGraphName} onValueChange={setIngestGraphName} disabled={isIngesting}>
-                <SelectTrigger className="dark:border-[#3D3D3D] dark:bg-shadeA" disabled={isIngesting}>
+              <Select value={ingestGraphName} onValueChange={setIngestGraphName}>
+                <SelectTrigger className="dark:border-[#3D3D3D] dark:bg-shadeA">
                   <SelectValue placeholder="Select a graph" />
                 </SelectTrigger>
                 <SelectContent>
@@ -1139,35 +1121,32 @@ const Setup = () => {
                   )}
                 </SelectContent>
               </Select>
+              {ingestGraphName && (
+                <p className="text-xs text-gray-500 dark:text-gray-400 mt-1">
+                  Files will be uploaded to: uploads/{ingestGraphName}/
+                </p>
+              )}
             </div>
 
-            <Tabs value={activeTab} onValueChange={(value) => {
-              // Block tab switching when ingesting
-              if (!isIngesting) {
-                setActiveTab(value);
-              }
-            }} className="w-full">
+            <Tabs defaultValue="upload" className="w-full">
               <TabsList className="grid w-full grid-cols-3">
-                <TabsTrigger value="upload" disabled={isIngesting}>
+                <TabsTrigger value="upload">
                   <FolderUp className="h-4 w-4 mr-2" />
                   Upload Files
                 </TabsTrigger>
-                <TabsTrigger value="cloudDownload" disabled={isIngesting}>
+                <TabsTrigger value="cloudDownload">
                   <CloudDownload className="h-4 w-4 mr-2" />
                   Download from Cloud
                 </TabsTrigger>
-                <TabsTrigger value="AmazonBDA" disabled={isIngesting}>
-                  <CloudLightning className="h-4 w-4 mr-2" />
-                  Use Amazon BDA
+                <TabsTrigger value="s3">
+                  <CloudCog className="h-4 w-4 mr-2" />
+                  Amazon BDA Configuration
                 </TabsTrigger>
               </TabsList>
 
               {/* Upload Data Tab */}
               <TabsContent value="upload" className="space-y-4">
                 <div className="space-y-4">
-                  <p className="text-sm font-medium text-gray-500 dark:text-gray-400 mb-3">
-                    Upload local files to the server and ingest them into your knowledge graph.
-                  </p>
                   <div>
                     <label className="block text-sm font-medium mb-2 text-black dark:text-white">
                       Select Files
@@ -1179,9 +1158,9 @@ const Setup = () => {
                       disabled={isUploading}
                       className="dark:border-[#3D3D3D] dark:bg-shadeA"
                     />
-                    <p className="text-xs text-gray-500 dark:text-gray-400 mt-2">
-                      Maximum upload per request: {MAX_UPLOAD_SIZE_MB} MB. {ingestGraphName ? `Upload destination: uploads/${ingestGraphName}/` : ""}
-                    </p>
+                  <p className="text-xs text-gray-500 dark:text-gray-400 mt-2">
+                    Maximum upload per request: {MAX_UPLOAD_SIZE_MB} MB.
+                  </p>
                   </div>
 
                   <div className="flex gap-2">
@@ -1295,9 +1274,6 @@ const Setup = () => {
               {/* Download from Cloud Storage Tab */}
               <TabsContent value="cloudDownload" className="space-y-4">
                 <div className="space-y-4">
-                  <p className="text-sm font-medium text-gray-500 dark:text-gray-400 mb-3">
-                    Download files from cloud storage and ingest them into your knowledge graph.
-                  </p>
                   <div>
                     <label className="block text-sm font-medium mb-2 text-black dark:text-white">
                       Cloud Storage Provider
@@ -1487,13 +1463,11 @@ const Setup = () => {
                       </div>
                     </>
                   )}
-                  {ingestGraphName && (
-                    <p className="text-xs text-gray-500 dark:text-gray-400 mb-2">
-                      Download destination: downloaded_files_cloud/{ingestGraphName}/
-                    </p>
-                  )}
 
                   <div className="pt-4 border-t border-gray-300 dark:border-[#3D3D3D]">
+                    <p className="text-xs text-gray-500 dark:text-gray-400 mb-2">
+                      Files will be downloaded to: downloaded_files_cloud/{ingestGraphName}/
+                    </p>
                     <Button 
                       onClick={handleCloudDownload}
                       disabled={isDownloading}
@@ -1607,12 +1581,23 @@ const Setup = () => {
                 </div>
               </TabsContent>
 
-              {/* Amazon BDA Configuration Tab */}
-              <TabsContent value="AmazonBDA" className="space-y-4">
-                <div className="space-y-4">              
-                  <p className="text-sm font-medium text-gray-500 dark:text-gray-400 mb-3">
-                    Process multimodal documents stored in S3 with Amazon Bedrock Data Automation and ingest them into your knowledge graph.
-                  </p>
+              {/* S3 Bedrock Configuration Tab */}
+              <TabsContent value="s3" className="space-y-4">
+                <div className="space-y-4">
+                  <div>
+                    <label className="block text-sm font-medium mb-2 text-black dark:text-white">
+                      File Format
+                    </label>
+                    <Select value={fileFormat} onValueChange={(value: "json" | "multi") => setFileFormat(value)}>
+                      <SelectTrigger className="dark:border-[#3D3D3D] dark:bg-shadeA">
+                        <SelectValue placeholder="Select file format" />
+                      </SelectTrigger>
+                      <SelectContent>
+                        <SelectItem value="json">JSON</SelectItem>
+                        <SelectItem value="multi">Multi</SelectItem>
+                      </SelectContent>
+                    </Select>
+                  </div>
 
                   {/* Common fields */}
                   <div>
@@ -1625,7 +1610,6 @@ const Setup = () => {
                       onChange={(e) => setAwsAccessKey(e.target.value)}
                       placeholder="Enter AWS access key"
                       className="dark:border-[#3D3D3D] dark:bg-shadeA"
-                      disabled={isIngesting}
                     />
                   </div>
 
@@ -1639,74 +1623,76 @@ const Setup = () => {
                       onChange={(e) => setAwsSecretKey(e.target.value)}
                       placeholder="Enter AWS secret key"
                       className="dark:border-[#3D3D3D] dark:bg-shadeA"
-                      disabled={isIngesting}
                     />
                   </div>
 
-                  <div>
-                    <div className="flex items-center justify-between mb-2">
-                      <label className="block text-sm font-medium text-black dark:text-white">
-                        Input Bucket
-                      </label>
-                      <label className="flex items-center gap-2 text-sm text-gray-600 dark:text-gray-400 cursor-pointer">
-                        <input
-                          type="checkbox"
-                          checked={skipBDAProcessing}
-                          onChange={(e) => setSkipBDAProcessing(e.target.checked)}
-                          disabled={isIngesting}
-                          className="h-4 w-4 rounded border-gray-300 dark:border-gray-600"
-                        />
-                        <span>Skip BDA (ingest existing BDA output bucket directly)</span>
+                  {/* Conditional fields based on file format */}
+                  {fileFormat === "json" ? (
+                    <div>
+                      <label className="block text-sm font-medium mb-2 text-black dark:text-white">
+                        Data Path
                       </label>
+                      <Input
+                        type="text"
+                        value={dataPath}
+                        onChange={(e) => setDataPath(e.target.value)}
+                        placeholder="s3://bucket-name/path/to/data"
+                        className="dark:border-[#3D3D3D] dark:bg-shadeA"
+                      />
                     </div>
-                    <Input
-                      type="text"
-                      value={inputBucket}
-                      onChange={(e) => setInputBucket(e.target.value)}
-                      placeholder="Enter input bucket name"
-                      className="dark:border-[#3D3D3D] dark:bg-shadeA"
-                      disabled={isIngesting || skipBDAProcessing}
-                    />
-                  </div>
-
-                  <div>
-                    <label className="block text-sm font-medium mb-2 text-black dark:text-white">
-                      Output Bucket
-                    </label>
-                    <Input
-                      type="text"
-                      value={outputBucket}
-                      onChange={(e) => setOutputBucket(e.target.value)}
-                      placeholder="Enter output bucket name"
-                      className="dark:border-[#3D3D3D] dark:bg-shadeA"
-                      disabled={isIngesting}
-                    />
-                  </div>
+                  ) : (
+                    <>
+                      <div>
+                        <label className="block text-sm font-medium mb-2 text-black dark:text-white">
+                          Input Bucket
+                        </label>
+                        <Input
+                          type="text"
+                          value={inputBucket}
+                          onChange={(e) => setInputBucket(e.target.value)}
+                          placeholder="Enter input bucket name"
+                          className="dark:border-[#3D3D3D] dark:bg-shadeA"
+                        />
+                      </div>
 
-                  <div>
-                    <label className="block text-sm font-medium mb-2 text-black dark:text-white">
-                      Region Name
-                    </label>
-                    <Input
-                      type="text"
-                      value={regionName}
-                      onChange={(e) => setRegionName(e.target.value)}
-                      placeholder="e.g., us-east-1"
-                      className="dark:border-[#3D3D3D] dark:bg-shadeA"
-                      disabled={isIngesting}
-                    />
-                  </div>
+                      <div>
+                        <label className="block text-sm font-medium mb-2 text-black dark:text-white">
+                          Output Bucket
+                        </label>
+                        <Input
+                          type="text"
+                          value={outputBucket}
+                          onChange={(e) => setOutputBucket(e.target.value)}
+                          placeholder="Enter output bucket name"
+                          className="dark:border-[#3D3D3D] dark:bg-shadeA"
+                        />
+                      </div>
 
-                  {ingestGraphName && (
-                    <p className="text-xs text-gray-500 dark:text-gray-400 mb-2">
-                      Processing destination: Input bucket ({inputBucket || "not specified"}) → Output bucket ({outputBucket || "not specified"}) → Knowledge graph ({ingestGraphName})
-                    </p>
+                      <div>
+                        <label className="block text-sm font-medium mb-2 text-black dark:text-white">
+                          Region Name
+                        </label>
+                        <Input
+                          type="text"
+                          value={regionName}
+                          onChange={(e) => setRegionName(e.target.value)}
+                          placeholder="e.g., us-east-1"
+                          className="dark:border-[#3D3D3D] dark:bg-shadeA"
+                        />
+                      </div>
+                    </>
                   )}
 
-                  {/* Ingest S3 Files with Amazon BDA Section */}
+                  {/* Ingest S3 Bedrock Data Section */}
                   <div className="border-t border-gray-300 dark:border-[#3D3D3D] pt-4 mt-4">
+                    <h3 className="text-sm font-medium mb-2 text-black dark:text-white">
+                      Ingest S3 Data into Knowledge Graph
+                    </h3>
+                    <p className="text-xs text-gray-500 dark:text-gray-400 mb-3">
+                      Process S3 data and add it to the knowledge graph using AWS Bedrock BDA for multimodal documents
+                    </p>
                     <Button
-                      onClick={handleAmazonBDAIngest}
+                      onClick={handleS3BedrockIngest}
                       disabled={isIngesting}
                       className="gradient text-white w-full"
                     >
@@ -1718,7 +1704,7 @@ const Setup = () => {
                       ) : (
                         <>
                           <Database className="h-4 w-4 mr-2" />
-                          Ingest from S3 Bucket into {ingestGraphName}
+                          Ingest from S3 into {ingestGraphName}
                         </>
                       )}
                     </Button>
@@ -1771,7 +1757,7 @@ const Setup = () => {
             <DialogHeader>
               <DialogTitle className="text-black dark:text-white">Refresh Knowledge Graph</DialogTitle>
               <DialogDescription className="text-gray-600 dark:text-[#D9D9D9]">
-                Rebuild the graph content and rerun community detection for your knowledge graph
+                Rebuild the graph content of your knowledge graph
               </DialogDescription>
             </DialogHeader>
 
@@ -1780,8 +1766,8 @@ const Setup = () => {
                 <label className="block text-sm font-medium mb-2 text-black dark:text-white">
                   Select Graph to Refresh
                 </label>
-                <Select value={refreshGraphName} onValueChange={setRefreshGraphName} disabled={isRefreshing || isRebuildRunning || isCheckingStatus}>
-                  <SelectTrigger className="dark:border-[#3D3D3D] dark:bg-shadeA" disabled={isRefreshing || isRebuildRunning || isCheckingStatus}>
+                <Select value={refreshGraphName} onValueChange={setRefreshGraphName}>
+                  <SelectTrigger className="dark:border-[#3D3D3D] dark:bg-shadeA">
                     <SelectValue placeholder="Select a graph" />
                   </SelectTrigger>
                   <SelectContent>
@@ -1805,7 +1791,7 @@ const Setup = () => {
                   ⚠️ Warning
                 </p>
                 <p className="text-sm text-yellow-700 dark:text-yellow-300 mt-1">
-                  This operation will process new documents and rerun community detection that will interrupt related queries.
+                  This operation will rebuild the graph content that will interrupt related queries. 
                   Please confirm to proceed.
                 </p>
               </div>

From ddae372fcf764f40f1a051b5a3d6f67c83a2a874 Mon Sep 17 00:00:00 2001
From: Prins Kumar <prins.kumar@agivant.com>
Date: Tue, 18 Nov 2025 18:27:24 +0530
Subject: [PATCH 09/20] Update README for OpenAI and Bedrock config, add
 pymupdf4llm license

---
 README.md                         |  78 ++--
 licenses/pymupdf4llm-AGPL-3.0.txt | 661 ++++++++++++++++++++++++++++++
 2 files changed, 704 insertions(+), 35 deletions(-)
 create mode 100644 licenses/pymupdf4llm-AGPL-3.0.txt

diff --git a/README.md b/README.md
index 8c38f6c..13c88b3 100644
--- a/README.md
+++ b/README.md
@@ -103,24 +103,23 @@ Organizing the data as a knowledge graph allows a chatbot to access accurate, fa
 ### Quick Start
 
 #### Use TigerGraph Docker-Based Instance
-Set your LLM Provider (supported `openai` or `gemini`) api key as environment varabiel LLM_API_KEY and use the following command for a one-step quick deployment with TigerGraph Community Edition and default configurations:
+Set your OpenAI api key as environment varabiel OPENAI_API_KEY and use the following command for a one-step quick deployment with TigerGraph Community Edition and default configurations:
 ```
-curl -k https://raw.githubusercontent.com/tigergraph/graphrag/refs/heads/main/docs/tutorials/setup_graphrag.sh | bash
+curl -k https://raw.githubusercontent.com/tigergraph/graphrag/refs/heads/main/docs/tutorials/setup_graphrag.sh | sh
 ```
 
 The GraphRAG instances will be deployed at `./graphrag` folder and TigerGraph instance will be available at `http://localhost:14240`.
-To change installation folder, use `bash -s -- <graphrag_folder> <llm_provider>` instead of `bash` at the end of the above command.
-
-> Note: for other LLM providers, manually update `configs/server_config.json` accordingly and re-run `docker compose up -d`
+To change installation folder, use `sh -s -- <graphrag_folder>` instead of `sh` at the end of the above command.
 
 #### Use Pre-Installed TigerGraph Instance
-Similar to the above setup, and use the following command for a one-step quick deployment connecting to a pre-installed TigerGraph with default configurations:
+
+Using the following command for a one-step quick deployment with TigerGraph Community Edition and default configurations:
 ```
-curl -k https://raw.githubusercontent.com/tigergraph/graphrag/refs/heads/main/docs/tutorials/setup_graphrag_tg.sh | bash
+curl -k https://raw.githubusercontent.com/tigergraph/graphrag/refs/heads/main/docs/tutorials/setup_graphrag_tg.sh | sh
 ```
 
 The GraphRAG instances will be deployed at `./graphrag` folder and connect to TigerGraph instance at `http://localhost:14240` by default.
-To change installation folder, TigerGraph instance location or username/password, use `bash -s -- <graphrag_folder> <llm_provider> <tg_host> <tg_port> <tg_username> <tg_password>` instead of `bash` at the end of the above command.
+To change installation folder, TigerGraph instance location or username/password, use `sh -s -- <graphrag_loc> <tg_host> <tg_port> <tg_username> <tg_password>` instead of `sh` at the end of the above command.
 
 [Go back to top](#top)
 
@@ -152,7 +151,7 @@ Here’s what the folder structure looks like:
 
 ##### Step 3: Adjust configurations
 
-Edit `llm_config` section of `configs/server_config.json` and replace `<YOUR_LLM_API_KEY>` to your own LLM_API_KEY for the LLM provider. 
+Edit `llm_config` section of `configs/server_config.json` and replace `<YOUR_OPENAI_API_KEY>` to your own OPENAI_API_KEY. 
  
 > If desired, you can also change the model to be used for the embedding service and completion service to your preferred models to adjust the output from the LLM service.
 
@@ -470,23 +469,27 @@ In addition to the `OPENAI_API_KEY`, `llm_model` and `model_name` can be edited
 ```json
 {
     "llm_config": {
+        "authentication_configuration": {
+            "OPENAI_API_KEY": "YOUR_OPENAI_API_KEY_HERE"
+        },
         "embedding_service": {
-            "embedding_model_service": "openai",
             "model_name": "text-embedding-3-small",
-            "authentication_configuration": {
-                "OPENAI_API_KEY": "YOUR_OPENAI_API_KEY_HERE"
-            }
+            "embedding_model_service": "openai"
         },
         "completion_service": {
             "llm_service": "openai",
             "llm_model": "gpt-4.1-mini",
-            "authentication_configuration": {
-                "OPENAI_API_KEY": "YOUR_OPENAI_API_KEY_HERE"
-            },
             "model_kwargs": {
                 "temperature": 0
             },
             "prompt_path": "./common/prompts/openai_gpt4/"
+        },
+        "multimodal_service": {
+            "llm_service": "openai",
+            "llm_model": "gpt-4o-mini",
+            "model_kwargs": {
+                "temperature": 0
+            }
         }
     }
 }
@@ -546,7 +549,7 @@ And your JSON config should follow as:
             "model_kwargs": {
                 "temperature": 0
             },
-            "prompt_path": "./common/prompts/gcp_vertexai_palm/"
+            "prompt_path": "./app/prompts/gcp_vertexai_palm/"
         }
     }
 }
@@ -583,7 +586,7 @@ In addition to the `AZURE_OPENAI_ENDPOINT`, `AZURE_OPENAI_API_KEY`, and `azure_d
             "model_kwargs": {
                 "temperature": 0
             },
-            "prompt_path": "./common/prompts/azure_open_ai_gpt35_turbo_instruct/"
+            "prompt_path": "./app/prompts/azure_open_ai_gpt35_turbo_instruct/"
         }
     }
 }
@@ -594,27 +597,32 @@ In addition to the `AZURE_OPENAI_ENDPOINT`, `AZURE_OPENAI_API_KEY`, and `azure_d
 ```json
 {
     "llm_config": {
+        "authentication_configuration": {
+            "AWS_ACCESS_KEY_ID": "YOUR_AWS_ACCESS_KEY",
+            "AWS_SECRET_ACCESS_KEY": "YOUR_AWS_SECRET_KEY",
+            "AWS_REGION_NAME": "us-west-2"
+        },
         "embedding_service": {
+            "model_name": "amazon.titan-embed-text-v1",
             "embedding_model_service": "bedrock",
-            "model_name":"amazon.titan-embed-text-v2",
-            "region_name":"us-west-2",
-            "authentication_configuration": {
-                "AWS_ACCESS_KEY_ID": "ACCESS_KEY",
-                "AWS_SECRET_ACCESS_KEY": "SECRET"
-            }
+            "dimensions": 1536
         },
         "completion_service": {
             "llm_service": "bedrock",
-            "llm_model": "us.anthropic.claude-3-7-sonnet-20250219-v1:0",
-            "region_name":"us-west-2",
-            "authentication_configuration": {
-                "AWS_ACCESS_KEY_ID": "ACCESS_KEY",
-                "AWS_SECRET_ACCESS_KEY": "SECRET"
-            },
+            "llm_model": "anthropic.claude-3-5-sonnet-20240620-v1:0",
             "model_kwargs": {
                 "temperature": 0,
+                "max_tokens": 4096
             },
-            "prompt_path": "./common/prompts/aws_bedrock_claude3haiku/"
+            "prompt_path": "./common/prompts/openai_gpt4/"
+        },
+        "multimodal_service": {
+            "llm_service": "bedrock",
+            "llm_model": "anthropic.claude-3-5-sonnet-20240620-v1:0",
+            "model_kwargs": {
+                "temperature": 0,
+                "max_tokens": 4096
+            }
         }
     }
 }
@@ -640,7 +648,7 @@ In addition to the `AZURE_OPENAI_ENDPOINT`, `AZURE_OPENAI_API_KEY`, and `azure_d
             "model_kwargs": {
                 "temperature": 0.0000001
             },
-            "prompt_path": "./common/prompts/openai_gpt4/"
+            "prompt_path": "./app/prompts/openai_gpt4/"
         }
     }
 }
@@ -670,7 +678,7 @@ Example configuration for a model on Hugging Face with a dedicated endpoint is s
             "model_kwargs": {
                 "temperature": 0.1
             },
-            "prompt_path": "./common/prompts/openai_gpt4/"
+            "prompt_path": "./app/prompts/openai_gpt4/"
         }
     }
 }
@@ -697,7 +705,7 @@ Example configuration for a model on Hugging Face with a serverless endpoint is
             "model_kwargs": {
                 "temperature": 0.1
             },
-            "prompt_path": "./common/prompts/llama_70b/"
+            "prompt_path": "./app/prompts/llama_70b/"
         }
     }
 }
@@ -724,7 +732,7 @@ Example configuration for a model on Hugging Face with a serverless endpoint is
             "model_kwargs": {
                 "temperature": 0.1
             },
-            "prompt_path": "./common/prompts/openai_gpt4/"
+            "prompt_path": "./app/prompts/openai_gpt4/"
         }
     }
 }
diff --git a/licenses/pymupdf4llm-AGPL-3.0.txt b/licenses/pymupdf4llm-AGPL-3.0.txt
new file mode 100644
index 0000000..0ad25db
--- /dev/null
+++ b/licenses/pymupdf4llm-AGPL-3.0.txt
@@ -0,0 +1,661 @@
+                    GNU AFFERO GENERAL PUBLIC LICENSE
+                       Version 3, 19 November 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The GNU Affero General Public License is a free, copyleft license for
+software and other kinds of works, specifically designed to ensure
+cooperation with the community in the case of network server software.
+
+  The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works.  By contrast,
+our General Public Licenses are intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+  Developers that use our General Public Licenses protect your rights
+with two steps: (1) assert copyright on the software, and (2) offer
+you this License which gives you legal permission to copy, distribute
+and/or modify the software.
+
+  A secondary benefit of defending all users' freedom is that
+improvements made in alternate versions of the program, if they
+receive widespread use, become available for other developers to
+incorporate.  Many developers of free software are heartened and
+encouraged by the resulting cooperation.  However, in the case of
+software used on network servers, this result may fail to come about.
+The GNU General Public License permits making a modified version and
+letting the public access it on a server without ever releasing its
+source code to the public.
+
+  The GNU Affero General Public License is designed specifically to
+ensure that, in such cases, the modified source code becomes available
+to the community.  It requires the operator of a network server to
+provide the source code of the modified version running there to the
+users of that server.  Therefore, public use of a modified version, on
+a publicly accessible server, gives the public access to the source
+code of the modified version.
+
+  An older license, called the Affero General Public License and
+published by Affero, was designed to accomplish similar goals.  This is
+a different license, not a version of the Affero GPL, but Affero has
+released a new version of the Affero GPL which permits relicensing under
+this license.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                       TERMS AND CONDITIONS
+
+  0. Definitions.
+
+  "This License" refers to version 3 of the GNU Affero General Public License.
+
+  "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+  "The Program" refers to any copyrightable work licensed under this
+License.  Each licensee is addressed as "you".  "Licensees" and
+"recipients" may be individuals or organizations.
+
+  To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy.  The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+  A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+  To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy.  Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+  To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies.  Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+  An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License.  If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+  1. Source Code.
+
+  The "source code" for a work means the preferred form of the work
+for making modifications to it.  "Object code" means any non-source
+form of a work.
+
+  A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+  The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form.  A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+  The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities.  However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work.  For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+  The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+  The Corresponding Source for a work in source code form is that
+same work.
+
+  2. Basic Permissions.
+
+  All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met.  This License explicitly affirms your unlimited
+permission to run the unmodified Program.  The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work.  This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+  You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force.  You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright.  Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+  Conveying under any other circumstances is permitted solely under
+the conditions stated below.  Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+  No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+  When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+  4. Conveying Verbatim Copies.
+
+  You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+  You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+  5. Conveying Modified Source Versions.
+
+  You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+    a) The work must carry prominent notices stating that you modified
+    it, and giving a relevant date.
+
+    b) The work must carry prominent notices stating that it is
+    released under this License and any conditions added under section
+    7.  This requirement modifies the requirement in section 4 to
+    "keep intact all notices".
+
+    c) You must license the entire work, as a whole, under this
+    License to anyone who comes into possession of a copy.  This
+    License will therefore apply, along with any applicable section 7
+    additional terms, to the whole of the work, and all its parts,
+    regardless of how they are packaged.  This License gives no
+    permission to license the work in any other way, but it does not
+    invalidate such permission if you have separately received it.
+
+    d) If the work has interactive user interfaces, each must display
+    Appropriate Legal Notices; however, if the Program has interactive
+    interfaces that do not display Appropriate Legal Notices, your
+    work need not make them do so.
+
+  A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit.  Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+  6. Conveying Non-Source Forms.
+
+  You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+    a) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by the
+    Corresponding Source fixed on a durable physical medium
+    customarily used for software interchange.
+
+    b) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by a
+    written offer, valid for at least three years and valid for as
+    long as you offer spare parts or customer support for that product
+    model, to give anyone who possesses the object code either (1) a
+    copy of the Corresponding Source for all the software in the
+    product that is covered by this License, on a durable physical
+    medium customarily used for software interchange, for a price no
+    more than your reasonable cost of physically performing this
+    conveying of source, or (2) access to copy the
+    Corresponding Source from a network server at no charge.
+
+    c) Convey individual copies of the object code with a copy of the
+    written offer to provide the Corresponding Source.  This
+    alternative is allowed only occasionally and noncommercially, and
+    only if you received the object code with such an offer, in accord
+    with subsection 6b.
+
+    d) Convey the object code by offering access from a designated
+    place (gratis or for a charge), and offer equivalent access to the
+    Corresponding Source in the same way through the same place at no
+    further charge.  You need not require recipients to copy the
+    Corresponding Source along with the object code.  If the place to
+    copy the object code is a network server, the Corresponding Source
+    may be on a different server (operated by you or a third party)
+    that supports equivalent copying facilities, provided you maintain
+    clear directions next to the object code saying where to find the
+    Corresponding Source.  Regardless of what server hosts the
+    Corresponding Source, you remain obligated to ensure that it is
+    available for as long as needed to satisfy these requirements.
+
+    e) Convey the object code using peer-to-peer transmission, provided
+    you inform other peers where the object code and Corresponding
+    Source of the work are being offered to the general public at no
+    charge under subsection 6d.
+
+  A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+  A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling.  In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage.  For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product.  A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+  "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source.  The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+  If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information.  But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+  The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed.  Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+  Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+  7. Additional Terms.
+
+  "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law.  If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+  When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it.  (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.)  You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+  Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+    a) Disclaiming warranty or limiting liability differently from the
+    terms of sections 15 and 16 of this License; or
+
+    b) Requiring preservation of specified reasonable legal notices or
+    author attributions in that material or in the Appropriate Legal
+    Notices displayed by works containing it; or
+
+    c) Prohibiting misrepresentation of the origin of that material, or
+    requiring that modified versions of such material be marked in
+    reasonable ways as different from the original version; or
+
+    d) Limiting the use for publicity purposes of names of licensors or
+    authors of the material; or
+
+    e) Declining to grant rights under trademark law for use of some
+    trade names, trademarks, or service marks; or
+
+    f) Requiring indemnification of licensors and authors of that
+    material by anyone who conveys the material (or modified versions of
+    it) with contractual assumptions of liability to the recipient, for
+    any liability that these contractual assumptions directly impose on
+    those licensors and authors.
+
+  All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10.  If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term.  If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+  If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+  Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+  8. Termination.
+
+  You may not propagate or modify a covered work except as expressly
+provided under this License.  Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+  However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+  Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+  Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License.  If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+  9. Acceptance Not Required for Having Copies.
+
+  You are not required to accept this License in order to receive or
+run a copy of the Program.  Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance.  However,
+nothing other than this License grants you permission to propagate or
+modify any covered work.  These actions infringe copyright if you do
+not accept this License.  Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+  10. Automatic Licensing of Downstream Recipients.
+
+  Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License.  You are not responsible
+for enforcing compliance by third parties with this License.
+
+  An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations.  If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+  You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License.  For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+  11. Patents.
+
+  A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based.  The
+work thus licensed is called the contributor's "contributor version".
+
+  A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version.  For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+  Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+  In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement).  To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+  If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients.  "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+  If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+  A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License.  You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+  Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+  12. No Surrender of Others' Freedom.
+
+  If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all.  For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+  13. Remote Network Interaction; Use with the GNU General Public License.
+
+  Notwithstanding any other provision of this License, if you modify the
+Program, your modified version must prominently offer all users
+interacting with it remotely through a computer network (if your version
+supports such interaction) an opportunity to receive the Corresponding
+Source of your version by providing access to the Corresponding Source
+from a network server at no charge, through some standard or customary
+means of facilitating copying of software.  This Corresponding Source
+shall include the Corresponding Source for any work covered by version 3
+of the GNU General Public License that is incorporated pursuant to the
+following paragraph.
+
+  Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU General Public License into a single
+combined work, and to convey the resulting work.  The terms of this
+License will continue to apply to the part which is the covered work,
+but the work with which it is combined will remain governed by version
+3 of the GNU General Public License.
+
+  14. Revised Versions of this License.
+
+  The Free Software Foundation may publish revised and/or new versions of
+the GNU Affero General Public License from time to time.  Such new versions
+will be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+  Each version is given a distinguishing version number.  If the
+Program specifies that a certain numbered version of the GNU Affero General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation.  If the Program does not specify a version number of the
+GNU Affero General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+  If the Program specifies that a proxy can decide which future
+versions of the GNU Affero General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+  Later license versions may give you additional or different
+permissions.  However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+  15. Disclaimer of Warranty.
+
+  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. Limitation of Liability.
+
+  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+  17. Interpretation of Sections 15 and 16.
+
+  If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published
+    by the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+  If your software can interact with users remotely through a computer
+network, you should also make sure that it provides a way for users to
+get its source.  For example, if your program is a web application, its
+interface could display a "Source" link that leads users to an archive
+of the code.  There are many ways you could offer source, and different
+solutions will be better for different programs; see section 13 for the
+specific requirements.
+
+  You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU AGPL, see
+<https://www.gnu.org/licenses/>.

From feb734562998f256f6d7ebbbaa8d5a19233d2bb6 Mon Sep 17 00:00:00 2001
From: Prins Kumar <prins.kumar@agivant.com>
Date: Tue, 18 Nov 2025 18:32:55 +0530
Subject: [PATCH 10/20] Update README for OpenAI and Bedrock config, add
 pymupdf4llm license

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 13c88b3..9469ad6 100644
--- a/README.md
+++ b/README.md
@@ -482,7 +482,7 @@ In addition to the `OPENAI_API_KEY`, `llm_model` and `model_name` can be edited
             "model_kwargs": {
                 "temperature": 0
             },
-            "prompt_path": "./common/prompts/openai_gpt4/"
+            "prompt_path": "./app/prompts/openai_gpt4/"
         },
         "multimodal_service": {
             "llm_service": "openai",
@@ -614,7 +614,7 @@ In addition to the `AZURE_OPENAI_ENDPOINT`, `AZURE_OPENAI_API_KEY`, and `azure_d
                 "temperature": 0,
                 "max_tokens": 4096
             },
-            "prompt_path": "./common/prompts/openai_gpt4/"
+            "prompt_path": "./app/prompts/aws_bedrock_claude3haiku/"
         },
         "multimodal_service": {
             "llm_service": "bedrock",

From 7a6789665e17bf5ebe285251b130661a5d96c10c Mon Sep 17 00:00:00 2001
From: Prins Kumar <prins.kumar@agivant.com>
Date: Tue, 18 Nov 2025 22:55:22 +0530
Subject: [PATCH 11/20] Fix prompt_path to use ./common/prompts/ for OpenAI and
 Bedrock

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 9469ad6..13c88b3 100644
--- a/README.md
+++ b/README.md
@@ -482,7 +482,7 @@ In addition to the `OPENAI_API_KEY`, `llm_model` and `model_name` can be edited
             "model_kwargs": {
                 "temperature": 0
             },
-            "prompt_path": "./app/prompts/openai_gpt4/"
+            "prompt_path": "./common/prompts/openai_gpt4/"
         },
         "multimodal_service": {
             "llm_service": "openai",
@@ -614,7 +614,7 @@ In addition to the `AZURE_OPENAI_ENDPOINT`, `AZURE_OPENAI_API_KEY`, and `azure_d
                 "temperature": 0,
                 "max_tokens": 4096
             },
-            "prompt_path": "./app/prompts/aws_bedrock_claude3haiku/"
+            "prompt_path": "./common/prompts/openai_gpt4/"
         },
         "multimodal_service": {
             "llm_service": "bedrock",

From 7f51feaf1a491736338da8fee4713e87e863816a Mon Sep 17 00:00:00 2001
From: Prins Kumar <prins.kumar@agivant.com>
Date: Fri, 21 Nov 2025 20:57:53 +0530
Subject: [PATCH 12/20] bug fixes

---
 graphrag/app/routers/ui.py          |  1 +
 graphrag/app/supportai/supportai.py | 18 ++++++++++++------
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/graphrag/app/routers/ui.py b/graphrag/app/routers/ui.py
index 9637347..114b489 100644
--- a/graphrag/app/routers/ui.py
+++ b/graphrag/app/routers/ui.py
@@ -395,6 +395,7 @@ async def serve_image_from_vertex(
         LogWriter.info(f"Serving image {image_id} from graph {graphname}")
 
         # Fetch the Image vertex by ID
+        # TigerGraph loading job uses gsql_lower() so all IDs are stored in lowercase
         image_vertices = conn.getVerticesById('Image', [image_id.lower()])
         
         if not image_vertices:
diff --git a/graphrag/app/supportai/supportai.py b/graphrag/app/supportai/supportai.py
index d2efe8a..6b93df0 100644
--- a/graphrag/app/supportai/supportai.py
+++ b/graphrag/app/supportai/supportai.py
@@ -337,9 +337,9 @@ def create_ingest(
     conn: TigerGraphConnection,
 ):
     # Check for invalid combination of multi format and non-s3 data source
-    if ingest_config.data_source.lower() in ["bda", "server"] and ingest_config.get("file_format", "").lower() != "multi":
-        logger.warning(f"File format {ingest_config.get('file_format', '').lower()} is not supported for data source {ingest_config.data_source.lower()}")
-        ingest_config["file_format"] = "multi"
+    if ingest_config.data_source.lower() in ["bda", "server"] and ingest_config.file_format.lower() != "multi":
+        logger.warning(f"File format {ingest_config.file_format.lower()} is not supported for data source {ingest_config.data_source.lower()}")
+        ingest_config.file_format = "multi"
 
     res_ingest_config = {"data_source": ingest_config.data_source.lower()}
     res_ingest_config["file_format"] = ingest_config.file_format.lower()
@@ -481,9 +481,9 @@ def create_ingest(
         except Exception as e:
             raise Exception(f"Error during Amazon BDA preprocessing: {e}")
     elif ingest_config.data_source.lower() == "server":
-        data_path = ingest_config.data_source_config.get("data_path", None)
+        data_path = ingest_config.data_source_config.get("folder_path", None)
         if data_path is None:
-            raise Exception("Data path not provided for server processing")
+            raise Exception("Folder path not provided for server processing")
         try:
             extractor = TextExtractor()
             server_processing_result = extractor.process_folder(data_path, graphname=graphname)
@@ -652,7 +652,10 @@ def ingest(
                 data_source_id = ingest_config.get("data_source_id", "DocumentContent")
                 if ingest_config.get("server_jobs"):
                     for doc_data in ingest_config.get("server_jobs"):
-                        if not doc_data.get("doc_id") or not doc_data.get("content"):
+                        if not doc_data.get("doc_id"):
+                            continue
+                        # Skip documents with neither content nor image_data
+                        if not doc_data.get("content") and not doc_data.get("image_data"):
                             continue
                         if doc_data.get("image_data"):
                             payload = {
@@ -660,8 +663,11 @@ def ingest(
                                 "doc_type": "image",
                                 "image_data": doc_data.get("image_data", ""),
                                 "image_format": doc_data.get("image_format", "jpg"),
+                                "image_description": doc_data.get("image_description", ""),
                                 "parent_doc": doc_data.get("parent_doc", ""),
                                 "page_number": doc_data.get("page_number", 0),
+                                "width": doc_data.get("width", 0),
+                                "height": doc_data.get("height", 0),
                                 "position": doc_data.get("position", 0),
                                 "content": ""
                             }

From a2b8d90e63c2647678b53cef9b952af05d73c5ae Mon Sep 17 00:00:00 2001
From: Prins Kumar <prins.kumar@agivant.com>
Date: Mon, 24 Nov 2025 20:00:28 +0530
Subject: [PATCH 13/20] Add local temp file storage for ingestion review

---
 common/utils/text_extractors.py     |  59 +++--
 graphrag-ui/src/pages/Setup.tsx     | 319 +++++++++++++++++++++++++---
 graphrag/app/routers/ui.py          | 136 ++++++++++++
 graphrag/app/supportai/supportai.py |  70 +++++-
 4 files changed, 530 insertions(+), 54 deletions(-)

diff --git a/common/utils/text_extractors.py b/common/utils/text_extractors.py
index b900cae..ec5b140 100644
--- a/common/utils/text_extractors.py
+++ b/common/utils/text_extractors.py
@@ -8,6 +8,7 @@
 import uuid
 import base64
 import io
+import threading
 from pathlib import Path
 import shutil
 import asyncio
@@ -15,6 +16,9 @@
 
 logger = logging.getLogger(__name__)
 
+# Global lock for pymupdf4llm calls (not thread-safe)
+_pymupdf4llm_lock = threading.Lock()
+
 
 class TextExtractor:
     """Class for handling text extraction from various file formats and cleanup."""
@@ -202,26 +206,39 @@ def _extract_pdf_with_images_as_docs(file_path, base_doc_id, graphname=None):
             shutil.rmtree(image_output_folder, ignore_errors=True)
 
         # Convert PDF to markdown with extracted image files
-        try:
-            markdown_content = pymupdf4llm.to_markdown(
-                file_path,
-                write_images=True,
-                image_path=str(image_output_folder),  # unique folder per PDF
-                force_text=False,
-                margins=0,
-                image_size_limit=0.08,
-            )
-        except Exception as e:
-            logger.error(f"pymupdf4llm failed for {file_path}: {e}")
-            # Cleanup folder if it was created
-            if image_output_folder.exists():
-                shutil.rmtree(image_output_folder, ignore_errors=True)
-            return [{
-                "doc_id": base_doc_id,
-                "doc_type": "markdown",
-                "content": f"[PDF extraction failed: {e}]",
-                "position": 0
-            }]
+        # Use lock because pymupdf4llm's table extraction is not thread-safe
+        # See: https://github.com/pymupdf/PyMuPDF/issues/3241
+        with _pymupdf4llm_lock:
+            try:
+                markdown_content = pymupdf4llm.to_markdown(
+                    file_path,
+                    write_images=True,
+                    image_path=str(image_output_folder),  # unique folder per PDF
+                    margins=0,
+                    image_size_limit=0.08,
+                )
+            except Exception:
+                # Retry with table_strategy="lines" if first attempt fails
+                try:
+                    markdown_content = pymupdf4llm.to_markdown(
+                        file_path,
+                        write_images=True,
+                        image_path=str(image_output_folder),  # unique folder per PDF
+                        margins=0,
+                        image_size_limit=0.08,
+                        table_strategy="lines",
+                    )
+                except Exception as e:
+                    logger.error(f"pymupdf4llm failed for {file_path}: {e}")
+                    # Cleanup folder if it was created
+                    if image_output_folder.exists():
+                        shutil.rmtree(image_output_folder, ignore_errors=True)
+                    return [{
+                        "doc_id": base_doc_id,
+                        "doc_type": "markdown",
+                        "content": f"[PDF extraction failed: {e}]",
+                        "position": 0
+                    }]
 
         if not markdown_content or not markdown_content.strip():
             logger.warning(f"No content extracted from PDF: {file_path}")
@@ -461,4 +478,4 @@ def get_supported_extensions():
 def is_supported_file(file_path):
     """Check if a file is supported for text extraction."""
     extension = Path(file_path).suffix.lower()
-    return extension in get_supported_extensions()
+    return extension in get_supported_extensions()
\ No newline at end of file
diff --git a/graphrag-ui/src/pages/Setup.tsx b/graphrag-ui/src/pages/Setup.tsx
index 2aaee99..c844896 100644
--- a/graphrag-ui/src/pages/Setup.tsx
+++ b/graphrag-ui/src/pages/Setup.tsx
@@ -56,6 +56,12 @@ const Setup = () => {
   const [uploadMessage, setUploadMessage] = useState("");
   const [isIngesting, setIsIngesting] = useState(false);
   const [ingestMessage, setIngestMessage] = useState("");
+  
+  // Ingestion temp files state
+  const [tempSessionId, setTempSessionId] = useState<string | null>(null);
+  const [tempFiles, setTempFiles] = useState<any[]>([]);
+  const [showTempFiles, setShowTempFiles] = useState(false);
+  const [ingestJobData, setIngestJobData] = useState<any>(null);
 
   // Refresh state
   const [refreshOpen, setRefreshOpen] = useState(false);
@@ -416,6 +422,125 @@ const Setup = () => {
     }
   };
 
+  // Fetch temp processed files
+  const fetchTempFiles = async (sessionId: string) => {
+    if (!ingestGraphName || !sessionId) return;
+
+    try {
+      const creds = localStorage.getItem("creds");
+      const response = await fetch(`/ui/${ingestGraphName}/ingestion_temp/list?session_id=${sessionId}`, {
+        headers: { Authorization: `Basic ${creds}` },
+      });
+      const data = await response.json();
+      if (data.status === "success" && data.sessions.length > 0) {
+        setTempFiles(data.sessions[0].files || []);
+        setShowTempFiles(true);
+      }
+    } catch (error) {
+      console.error("Error fetching temp files:", error);
+    }
+  };
+
+  // Delete a specific temp file
+  const handleDeleteTempFile = async (filename: string) => {
+    if (!ingestGraphName || !tempSessionId) return;
+
+    try {
+      const creds = localStorage.getItem("creds");
+      const response = await fetch(
+        `/ui/${ingestGraphName}/ingestion_temp/delete?session_id=${tempSessionId}&filename=${encodeURIComponent(filename)}`,
+        {
+          method: "DELETE",
+          headers: { Authorization: `Basic ${creds}` },
+        }
+      );
+      const data = await response.json();
+      if (data.status === "success") {
+        setIngestMessage(`✅ ${data.message}`);
+        // Refresh the temp files list
+        await fetchTempFiles(tempSessionId);
+      }
+    } catch (error: any) {
+      setIngestMessage(`❌ Error: ${error.message}`);
+    }
+  };
+
+  // Delete all temp files for session
+  const handleDeleteAllTempFiles = async () => {
+    if (!ingestGraphName || !tempSessionId) return;
+
+    try {
+      const creds = localStorage.getItem("creds");
+      const response = await fetch(
+        `/ui/${ingestGraphName}/ingestion_temp/delete?session_id=${tempSessionId}`,
+        {
+          method: "DELETE",
+          headers: { Authorization: `Basic ${creds}` },
+        }
+      );
+      const data = await response.json();
+      if (data.status === "success") {
+        setIngestMessage(`✅ ${data.message}`);
+        setTempFiles([]);
+        setShowTempFiles(false);
+        setTempSessionId(null);
+      }
+    } catch (error: any) {
+      setIngestMessage(`❌ Error: ${error.message}`);
+    }
+  };
+
+  // Run final ingest after user reviews temp files
+  const handleRunIngest = async () => {
+    if (!ingestJobData) {
+      setIngestMessage("❌ No ingest job data available");
+      return;
+    }
+
+    setIsIngesting(true);
+    setIngestMessage("Running final document ingest...");
+
+    try {
+      const creds = localStorage.getItem("creds");
+
+      const loadingInfo = {
+        load_job_id: ingestJobData.load_job_id,
+        data_source_id: ingestJobData.data_source_id,
+        file_path: ingestJobData.data_path,
+      };
+
+      const ingestResponse = await fetch(`/ui/${ingestGraphName}/ingest`, {
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          Authorization: `Basic ${creds}`,
+        },
+        body: JSON.stringify(loadingInfo),
+      });
+
+      if (!ingestResponse.ok) {
+        const errorData = await ingestResponse.json();
+        throw new Error(errorData.detail || `Failed to run ingest: ${ingestResponse.statusText}`);
+      }
+
+      const ingestData = await ingestResponse.json();
+      console.log("Ingest response:", ingestData);
+
+      setIngestMessage(`✅ Data ingested successfully! Processed ${tempFiles.length} documents.`);
+      
+      // Clear temp state
+      setTempFiles([]);
+      setShowTempFiles(false);
+      setTempSessionId(null);
+      setIngestJobData(null);
+    } catch (error: any) {
+      console.error("Error running ingest:", error);
+      setIngestMessage(`❌ Error: ${error.message}`);
+    } finally {
+      setIsIngesting(false);
+    }
+  };
+
   // Ingest files into knowledge graph (uploaded or downloaded)
   const handleIngestDocuments = async (sourceType: "uploaded" | "downloaded" = "uploaded") => {
     if (!ingestGraphName) {
@@ -460,37 +585,53 @@ const Setup = () => {
       const createData = await createResponse.json();
       console.log("Create ingest response:", createData);
 
-      // Step 2: Run ingest
-      setIngestMessage("Step 2/2: Running document ingest...");
-
-      const loadingInfo = {
-        load_job_id: createData.load_job_id,
-        data_source_id: createData.data_source_id,
-        file_path: createData.data_path || createData.file_path, // Handle both field names
-      };
+      // Check if temp files were created (for server data source)
+      const sessionId = createData.data_source_id?.temp_session_id;
+      
+      if (sessionId) {
+        // Files are saved to temp storage - show them for review
+        setTempSessionId(sessionId);
+        setIngestJobData({
+          load_job_id: createData.load_job_id,
+          data_source_id: createData.data_source_id,
+          data_path: createData.data_path || createData.file_path,
+        });
+        setIngestMessage(`✅ Processed ${createData.data_source_id.file_count} files. Review them below before ingesting.`);
+        await fetchTempFiles(sessionId);
+        setIsIngesting(false);
+      } else {
+        // No temp files (e.g., S3 Bedrock) - proceed directly to ingest
+        setIngestMessage("Step 2/2: Running document ingest...");
 
-      const ingestResponse = await fetch(`/ui/${ingestGraphName}/ingest`, {
-        method: "POST",
-        headers: {
-          "Content-Type": "application/json",
-          Authorization: `Basic ${creds}`,
-        },
-        body: JSON.stringify(loadingInfo),
-      });
+        const loadingInfo = {
+          load_job_id: createData.load_job_id,
+          data_source_id: createData.data_source_id,
+          file_path: createData.data_path || createData.file_path,
+        };
 
-      if (!ingestResponse.ok) {
-        const errorData = await ingestResponse.json();
-        throw new Error(errorData.detail || `Failed to run ingest: ${ingestResponse.statusText}`);
-      }
+        const ingestResponse = await fetch(`/ui/${ingestGraphName}/ingest`, {
+          method: "POST",
+          headers: {
+            "Content-Type": "application/json",
+            Authorization: `Basic ${creds}`,
+          },
+          body: JSON.stringify(loadingInfo),
+        });
+
+        if (!ingestResponse.ok) {
+          const errorData = await ingestResponse.json();
+          throw new Error(errorData.detail || `Failed to run ingest: ${ingestResponse.statusText}`);
+        }
 
-      const ingestData = await ingestResponse.json();
-      console.log("Ingest response:", ingestData);
+        const ingestData = await ingestResponse.json();
+        console.log("Ingest response:", ingestData);
 
-      setIngestMessage(`✅ Data ingested successfully! Processed documents from ${folderPath}/`);
+        setIngestMessage(`✅ Data ingested successfully! Processed documents from ${folderPath}/`);
+        setIsIngesting(false);
+      }
     } catch (error: any) {
       console.error("Error ingesting data:", error);
       setIngestMessage(`❌ Error: ${error.message}`);
-    } finally {
       setIsIngesting(false);
     }
   };
@@ -1237,6 +1378,71 @@ const Setup = () => {
                           {ingestMessage}
                         </div>
                       )}
+
+                      {/* Processed Temp Files - Review before ingesting */}
+                      {showTempFiles && tempFiles.length > 0 && (
+                        <div className="mt-4 border border-gray-300 dark:border-[#3D3D3D] rounded-lg p-4">
+                          <div className="flex items-center justify-between mb-3">
+                            <h3 className="text-sm font-medium text-black dark:text-white">
+                              Processed Files ({tempFiles.length})
+                            </h3>
+                            <Button
+                              onClick={handleDeleteAllTempFiles}
+                              variant="outline"
+                              size="sm"
+                              className="dark:border-[#3D3D3D]"
+                            >
+                              <Trash2 className="h-3 w-3 mr-1" />
+                              Clear All
+                            </Button>
+                          </div>
+                          <p className="text-xs text-gray-500 dark:text-gray-400 mb-3">
+                            Review the processed files below. You can delete any file before ingesting.
+                          </p>
+                          <div className="space-y-2 max-h-64 overflow-y-auto mb-3">
+                            {tempFiles.map((file, index) => (
+                              <div
+                                key={index}
+                                className="flex items-center justify-between p-2 bg-gray-50 dark:bg-shadeA rounded"
+                              >
+                                <div className="flex-1 min-w-0">
+                                  <p className="text-sm text-black dark:text-white truncate">
+                                    {file.doc_id}
+                                  </p>
+                                  <p className="text-xs text-gray-500 dark:text-gray-400">
+                                    {(file.size / 1024).toFixed(2)} KB
+                                  </p>
+                                </div>
+                                <Button
+                                  onClick={() => handleDeleteTempFile(file.filename)}
+                                  variant="outline"
+                                  size="sm"
+                                  className="ml-2 dark:border-[#3D3D3D]"
+                                >
+                                  <Trash2 className="h-3 w-3" />
+                                </Button>
+                              </div>
+                            ))}
+                          </div>
+                          <Button
+                            onClick={handleRunIngest}
+                            disabled={isIngesting}
+                            className="gradient text-white w-full"
+                          >
+                            {isIngesting ? (
+                              <>
+                                <Loader2 className="h-4 w-4 mr-2 animate-spin" />
+                                Ingesting...
+                              </>
+                            ) : (
+                              <>
+                                <Database className="h-4 w-4 mr-2" />
+                                Run Final Ingest
+                              </>
+                            )}
+                          </Button>
+                        </div>
+                      )}
                     </div>
                   )}
 
@@ -1576,6 +1782,71 @@ const Setup = () => {
                           {ingestMessage}
                         </div>
                       )}
+
+                      {/* Processed Temp Files - Review before ingesting */}
+                      {showTempFiles && tempFiles.length > 0 && (
+                        <div className="mt-4 border border-gray-300 dark:border-[#3D3D3D] rounded-lg p-4">
+                          <div className="flex items-center justify-between mb-3">
+                            <h3 className="text-sm font-medium text-black dark:text-white">
+                              Processed Files ({tempFiles.length})
+                            </h3>
+                            <Button
+                              onClick={handleDeleteAllTempFiles}
+                              variant="outline"
+                              size="sm"
+                              className="dark:border-[#3D3D3D]"
+                            >
+                              <Trash2 className="h-3 w-3 mr-1" />
+                              Clear All
+                            </Button>
+                          </div>
+                          <p className="text-xs text-gray-500 dark:text-gray-400 mb-3">
+                            Review the processed files below. You can delete any file before ingesting.
+                          </p>
+                          <div className="space-y-2 max-h-64 overflow-y-auto mb-3">
+                            {tempFiles.map((file, index) => (
+                              <div
+                                key={index}
+                                className="flex items-center justify-between p-2 bg-gray-50 dark:bg-shadeA rounded"
+                              >
+                                <div className="flex-1 min-w-0">
+                                  <p className="text-sm text-black dark:text-white truncate">
+                                    {file.doc_id}
+                                  </p>
+                                  <p className="text-xs text-gray-500 dark:text-gray-400">
+                                    {(file.size / 1024).toFixed(2)} KB
+                                  </p>
+                                </div>
+                                <Button
+                                  onClick={() => handleDeleteTempFile(file.filename)}
+                                  variant="outline"
+                                  size="sm"
+                                  className="ml-2 dark:border-[#3D3D3D]"
+                                >
+                                  <Trash2 className="h-3 w-3" />
+                                </Button>
+                              </div>
+                            ))}
+                          </div>
+                          <Button
+                            onClick={handleRunIngest}
+                            disabled={isIngesting}
+                            className="gradient text-white w-full"
+                          >
+                            {isIngesting ? (
+                              <>
+                                <Loader2 className="h-4 w-4 mr-2 animate-spin" />
+                                Ingesting...
+                              </>
+                            ) : (
+                              <>
+                                <Database className="h-4 w-4 mr-2" />
+                                Run Final Ingest
+                              </>
+                            )}
+                          </Button>
+                        </div>
+                      )}
                     </div>
                   )}
                 </div>
diff --git a/graphrag/app/routers/ui.py b/graphrag/app/routers/ui.py
index 114b489..9b012ec 100644
--- a/graphrag/app/routers/ui.py
+++ b/graphrag/app/routers/ui.py
@@ -1380,3 +1380,139 @@ async def delete_cloud_downloads(
         logger.debug_pii(f"Delete error trace:\n{exc}")
         raise HTTPException(status_code=500, detail=f"Error deleting files: {str(e)}")
 
+
+# Ingestion Temp Files Endpoints
+
+@router.get(route_prefix + "/{graphname}/ingestion_temp/list")
+async def list_ingestion_temp_files(
+    graphname: str,
+    credentials: Annotated[HTTPBase, Depends(security)],
+    session_id: str = None,
+):
+    """
+    List processed files in the ingestion temp folder for a specific graph.
+    """
+    try:
+        base_temp_dir = os.path.join("uploads", "ingestion_temp", graphname)
+        
+        if not os.path.exists(base_temp_dir):
+            return {
+                "status": "success",
+                "graphname": graphname,
+                "sessions": [],
+                "total_files": 0,
+            }
+        
+        sessions = []
+        total_files = 0
+        
+        # If session_id provided, list only that session
+        if session_id:
+            session_dir = os.path.join(base_temp_dir, session_id)
+            if os.path.exists(session_dir) and os.path.isdir(session_dir):
+                files = []
+                for filename in os.listdir(session_dir):
+                    filepath = os.path.join(session_dir, filename)
+                    if os.path.isfile(filepath) and filename.endswith('.json'):
+                        file_stat = os.stat(filepath)
+                        # Read doc_id from file
+                        try:
+                            with open(filepath, 'r', encoding='utf-8') as f:
+                                doc_data = json.load(f)
+                                doc_id = doc_data.get('doc_id', 'unknown')
+                        except:
+                            doc_id = 'unknown'
+                        
+                        files.append({
+                            "filename": filename,
+                            "doc_id": doc_id,
+                            "size": file_stat.st_size,
+                            "modified": file_stat.st_mtime,
+                        })
+                sessions.append({
+                    "session_id": session_id,
+                    "files": files,
+                    "file_count": len(files),
+                })
+                total_files = len(files)
+        
+        return {
+            "status": "success",
+            "graphname": graphname,
+            "sessions": sessions,
+            "total_files": total_files,
+        }
+    
+    except Exception as e:
+        exc = traceback.format_exc()
+        logger.error(f"Error listing ingestion temp files for graph {graphname}: {e}")
+        logger.debug_pii(f"List error trace:\n{exc}")
+        raise HTTPException(status_code=500, detail=f"Error listing temp files: {str(e)}")
+
+
+@router.delete(route_prefix + "/{graphname}/ingestion_temp/delete")
+async def delete_ingestion_temp_files(
+    graphname: str,
+    credentials: Annotated[HTTPBase, Depends(security)],
+    session_id: str = None,
+    filename: str = None,
+):
+    """
+    Delete files from ingestion temp folder.
+    """
+    try:
+        base_temp_dir = os.path.join("uploads", "ingestion_temp", graphname)
+        
+        if not session_id:
+            raise HTTPException(status_code=400, detail="session_id is required")
+        
+        session_dir = os.path.join(base_temp_dir, session_id)
+        
+        if not os.path.exists(session_dir):
+            return {
+                "status": "success",
+                "message": f"No temp files found for session {session_id}",
+                "deleted_files": [],
+            }
+        
+        deleted_files = []
+        
+        if filename:
+            # Delete specific file
+            file_path = os.path.join(session_dir, filename)
+            if os.path.exists(file_path) and os.path.isfile(file_path):
+                os.remove(file_path)
+                deleted_files.append(filename)
+                logger.info(f"Deleted temp file {filename} from session {session_id}")
+                
+                # If session folder is now empty, remove it
+                if not os.listdir(session_dir):
+                    os.rmdir(session_dir)
+                    logger.info(f"Removed empty session folder {session_id}")
+            else:
+                raise HTTPException(status_code=404, detail=f"File {filename} not found")
+        else:
+            # Delete entire session folder
+            import shutil
+            for filename in os.listdir(session_dir):
+                if os.path.isfile(os.path.join(session_dir, filename)):
+                    deleted_files.append(filename)
+            
+            shutil.rmtree(session_dir)
+            logger.info(f"Deleted session folder {session_id} for graph {graphname}")
+        
+        return {
+            "status": "success",
+            "message": f"Successfully deleted {len(deleted_files)} file(s)",
+            "deleted_files": deleted_files,
+            "session_id": session_id,
+        }
+    
+    except HTTPException:
+        raise
+    except Exception as e:
+        exc = traceback.format_exc()
+        logger.error(f"Error deleting ingestion temp files for graph {graphname}: {e}")
+        logger.debug_pii(f"Delete error trace:\n{exc}")
+        raise HTTPException(status_code=500, detail=f"Error deleting temp files: {str(e)}")
+
diff --git a/graphrag/app/supportai/supportai.py b/graphrag/app/supportai/supportai.py
index 6b93df0..88542dc 100644
--- a/graphrag/app/supportai/supportai.py
+++ b/graphrag/app/supportai/supportai.py
@@ -489,14 +489,37 @@ def create_ingest(
             server_processing_result = extractor.process_folder(data_path, graphname=graphname)
             if server_processing_result.get("statusCode") != 200:
                 raise Exception(f"Server folder processing failed: {server_processing_result}")
-            else:
-                logger.info(f"Server folder processing completed successfully: {server_processing_result}")
-
-            res_ingest_config["server_jobs"] = server_processing_result.get("documents", [])
+            
+            # Log only summary, NOT the full documents to avoid memory logging
+            logger.info(f"Server folder processing completed: {server_processing_result.get('message')}")
+
+            # Save processed documents to temporary folder instead of keeping in memory
+            temp_session_id = str(uuid.uuid4())
+            temp_folder = os.path.join("uploads", "ingestion_temp", graphname, temp_session_id)
+            os.makedirs(temp_folder, exist_ok=True)
+            
+            documents = server_processing_result.get("documents", [])
+            doc_count = len(documents)
+            
+            # Save each document as a separate JSON file
+            for idx, doc_data in enumerate(documents):
+                doc_filename = f"doc_{idx}_{doc_data.get('doc_id', 'unknown')}.json"
+                doc_filepath = os.path.join(temp_folder, doc_filename)
+                with open(doc_filepath, 'w', encoding='utf-8') as f:
+                    json.dump(doc_data, f, ensure_ascii=False, indent=2)
+            
+            # Clear documents from memory immediately after saving
+            documents.clear()
+            server_processing_result.clear()
+            
+            logger.info(f"Saved {doc_count} processed documents to {temp_folder}")
+            
+            res_ingest_config["temp_session_id"] = temp_session_id
+            res_ingest_config["temp_folder"] = temp_folder
+            res_ingest_config["file_count"] = doc_count
             res_ingest_config["data_source_id"] = "DocumentContent"
-            # Use a placeholder path that doesn't start with "/" to avoid pyTigerGraph treating it as a file
-            # The actual folder path is stored in server_jobs, this is just for the API call
-            res["data_path"] = "in_response"
+            # Use a placeholder path to indicate temp storage
+            res["data_path"] = "in_temp_storage"
             res["data_source_id"] = res_ingest_config
         except Exception as e:
             raise Exception(f"Error during server folder processing: {e}")
@@ -650,13 +673,30 @@ def ingest(
             try:
                 processed_files = []
                 data_source_id = ingest_config.get("data_source_id", "DocumentContent")
-                if ingest_config.get("server_jobs"):
-                    for doc_data in ingest_config.get("server_jobs"):
+                
+                # Read from temporary folder
+                temp_folder = ingest_config.get("temp_folder")
+                if not temp_folder or not os.path.exists(temp_folder):
+                    raise Exception(f"Temporary folder not found: {temp_folder}")
+                
+                # Read all JSON files from temp folder
+                json_files = [f for f in os.listdir(temp_folder) if f.endswith('.json')]
+                logger.info(f"Reading {len(json_files)} documents from {temp_folder}")
+                
+                for json_filename in json_files:
+                    json_filepath = os.path.join(temp_folder, json_filename)
+                    try:
+                        with open(json_filepath, 'r', encoding='utf-8') as f:
+                            doc_data = json.load(f)
+                        
                         if not doc_data.get("doc_id"):
+                            logger.warning(f"Skipping invalid document: {json_filename}")
                             continue
                         # Skip documents with neither content nor image_data
                         if not doc_data.get("content") and not doc_data.get("image_data"):
+                            logger.warning(f"Skipping document with no content: {json_filename}")
                             continue
+                            
                         if doc_data.get("image_data"):
                             payload = {
                                 "doc_id": doc_data.get("doc_id", ""),
@@ -684,6 +724,18 @@ def ingest(
                             'parent_doc': doc_data.get("parent_doc", ""),
                         })
                         logger.info(f"Data uploading done for doc_id: {doc_data.get('doc_id', 'unknown')}")
+                    except Exception as file_error:
+                        logger.error(f"Error processing file {json_filename}: {file_error}")
+                        continue
+                
+                # Clean up temp folder after successful ingestion
+                try:
+                    import shutil
+                    shutil.rmtree(temp_folder)
+                    logger.info(f"Cleaned up temporary folder: {temp_folder}")
+                except Exception as cleanup_error:
+                    logger.warning(f"Failed to cleanup temp folder {temp_folder}: {cleanup_error}")
+                    
             except Exception as e:
                 raise Exception(f"Error during server markdown extraction and TigerGraph loading: {e}")
             return {

From 38619e045a0411bd25132c2c325f7afaea9c0121 Mon Sep 17 00:00:00 2001
From: Prins Kumar <prins.kumar@agivant.com>
Date: Mon, 24 Nov 2025 20:31:36 +0530
Subject: [PATCH 14/20] Add direct ingestion option with checkbox to skip file
 review

---
 graphrag-ui/src/pages/Setup.tsx | 37 ++++++++++++++++++++++++++++++---
 1 file changed, 34 insertions(+), 3 deletions(-)

diff --git a/graphrag-ui/src/pages/Setup.tsx b/graphrag-ui/src/pages/Setup.tsx
index c844896..e86eefb 100644
--- a/graphrag-ui/src/pages/Setup.tsx
+++ b/graphrag-ui/src/pages/Setup.tsx
@@ -62,6 +62,7 @@ const Setup = () => {
   const [tempFiles, setTempFiles] = useState<any[]>([]);
   const [showTempFiles, setShowTempFiles] = useState(false);
   const [ingestJobData, setIngestJobData] = useState<any>(null);
+  const [directIngestion, setDirectIngestion] = useState(false);
 
   // Refresh state
   const [refreshOpen, setRefreshOpen] = useState(false);
@@ -588,8 +589,8 @@ const Setup = () => {
       // Check if temp files were created (for server data source)
       const sessionId = createData.data_source_id?.temp_session_id;
       
-      if (sessionId) {
-        // Files are saved to temp storage - show them for review
+      if (sessionId && !directIngestion) {
+        // Files are saved to temp storage - show them for review (only if not direct ingestion)
         setTempSessionId(sessionId);
         setIngestJobData({
           load_job_id: createData.load_job_id,
@@ -600,7 +601,7 @@ const Setup = () => {
         await fetchTempFiles(sessionId);
         setIsIngesting(false);
       } else {
-        // No temp files (e.g., S3 Bedrock) - proceed directly to ingest
+        // No temp files (e.g., S3 Bedrock) OR direct ingestion enabled - proceed directly to ingest
         setIngestMessage("Step 2/2: Running document ingest...");
 
         const loadingInfo = {
@@ -1350,6 +1351,21 @@ const Setup = () => {
                       <p className="text-xs text-gray-500 dark:text-gray-400 mb-3">
                         Process uploaded files and add them to the knowledge graph
                       </p>
+                      
+                      {/* Direct Ingestion Checkbox */}
+                      <div className="flex items-center mb-3">
+                        <input
+                          type="checkbox"
+                          id="directIngestion"
+                          checked={directIngestion}
+                          onChange={(e) => setDirectIngestion(e.target.checked)}
+                          className="mr-2 h-4 w-4 rounded border-gray-300 text-blue-600 focus:ring-blue-500"
+                        />
+                        <label htmlFor="directIngestion" className="text-sm text-gray-700 dark:text-gray-300">
+                          Direct Ingestion (skip file review)
+                        </label>
+                      </div>
+                      
                       <Button
                         onClick={() => handleIngestDocuments("uploaded")}
                         disabled={isIngesting}
@@ -1754,6 +1770,21 @@ const Setup = () => {
                       <p className="text-xs text-gray-500 dark:text-gray-400 mb-3">
                         Process downloaded files and add them to the knowledge graph
                       </p>
+                      
+                      {/* Direct Ingestion Checkbox */}
+                      <div className="flex items-center mb-3">
+                        <input
+                          type="checkbox"
+                          id="directIngestionDownloaded"
+                          checked={directIngestion}
+                          onChange={(e) => setDirectIngestion(e.target.checked)}
+                          className="mr-2 h-4 w-4 rounded border-gray-300 text-blue-600 focus:ring-blue-500"
+                        />
+                        <label htmlFor="directIngestionDownloaded" className="text-sm text-gray-700 dark:text-gray-300">
+                          Direct Ingestion (skip file review)
+                        </label>
+                      </div>
+                      
                       <Button
                         onClick={() => handleIngestDocuments("downloaded")}
                         disabled={isIngesting}

From 3bbe8bc4d5dc48fc7547c690dbbfcdcaec21ea82 Mon Sep 17 00:00:00 2001
From: Prins Kumar <prins.kumar@agivant.com>
Date: Wed, 26 Nov 2025 20:37:25 +0530
Subject: [PATCH 15/20] Auto-process files on upload/download, delete temp
 files with original files

---
 graphrag-ui/src/pages/Setup.tsx | 406 +++++++++++++++++---------------
 1 file changed, 219 insertions(+), 187 deletions(-)

diff --git a/graphrag-ui/src/pages/Setup.tsx b/graphrag-ui/src/pages/Setup.tsx
index e86eefb..17f952c 100644
--- a/graphrag-ui/src/pages/Setup.tsx
+++ b/graphrag-ui/src/pages/Setup.tsx
@@ -56,7 +56,7 @@ const Setup = () => {
   const [uploadMessage, setUploadMessage] = useState("");
   const [isIngesting, setIsIngesting] = useState(false);
   const [ingestMessage, setIngestMessage] = useState("");
-  
+
   // Ingestion temp files state
   const [tempSessionId, setTempSessionId] = useState<string | null>(null);
   const [tempFiles, setTempFiles] = useState<any[]>([]);
@@ -166,13 +166,19 @@ const Setup = () => {
 
       const data = await response.json();
       if (data.status === "success") {
-        setUploadMessage(`✅ ${data.message}`);
+        setUploadMessage(`✅ ${data.message} Processing...`);
         setSelectedFiles(null);
         await fetchUploadedFiles();
+        
+        // Step 2: Call create_ingest to process uploaded files
+        console.log("Calling handleCreateIngestAfterUpload from main upload...");
+        await handleCreateIngestAfterUpload("uploaded");
+        console.log("handleCreateIngestAfterUpload completed");
       } else {
         setUploadMessage(`⚠️ ${data.message}`);
       }
     } catch (error: any) {
+      console.error("Upload error:", error);
       setUploadMessage(`❌ Error: ${error.message}`);
     } finally {
       setIsUploading(false);
@@ -226,14 +232,20 @@ const Setup = () => {
 
       // Show final result
       if (failedCount === 0) {
-        setUploadMessage(`✅ Successfully uploaded all ${uploadedCount} files (uploaded individually).`);
+        setUploadMessage(`✅ Successfully uploaded all ${uploadedCount} files. Processing...`);
       } else {
-        setUploadMessage(`⚠️ Uploaded ${uploadedCount} files successfully, ${failedCount} failed.`);
+        setUploadMessage(`⚠️ Uploaded ${uploadedCount} files successfully, ${failedCount} failed. Processing...`);
       }
       
       setSelectedFiles(null);
       await fetchUploadedFiles();
+      
+      // Step 2: Call create_ingest to process uploaded files
+      console.log("Calling handleCreateIngestAfterUpload...");
+      await handleCreateIngestAfterUpload("uploaded");
+      console.log("handleCreateIngestAfterUpload completed");
     } catch (error: any) {
+      console.error("Upload error:", error);
       setUploadMessage(`❌ Batch upload error: ${error.message}`);
     } finally {
       setIsUploading(false);
@@ -244,8 +256,19 @@ const Setup = () => {
   const handleDeleteFile = async (filename: string) => {
     if (!ingestGraphName) return;
 
+    console.log("Deleting file:", filename);
+    console.log("tempSessionId:", tempSessionId);
+
     try {
       const creds = localStorage.getItem("creds");
+      
+      // Also delete corresponding temp files FIRST if session exists
+      if (tempSessionId) {
+        console.log("Calling handleDeleteTempFilesForOriginal...");
+        await handleDeleteTempFilesForOriginal(filename);
+      }
+      
+      // Then delete original file
       const response = await fetch(
         `/ui/${ingestGraphName}/uploads?filename=${encodeURIComponent(filename)}`,
         {
@@ -254,9 +277,11 @@ const Setup = () => {
         }
       );
       const data = await response.json();
+      
       setUploadMessage(`✅ ${data.message}`);
       await fetchUploadedFiles();
     } catch (error: any) {
+      console.error("Delete error:", error);
       setUploadMessage(`❌ Error: ${error.message}`);
     }
   };
@@ -275,6 +300,12 @@ const Setup = () => {
         headers: { Authorization: `Basic ${creds}` },
       });
       const data = await response.json();
+      
+      // Also clear temp session
+      if (tempSessionId) {
+        await handleDeleteAllTempFiles();
+      }
+      
       setUploadMessage(`✅ ${data.message}`);
       await fetchUploadedFiles();
     } catch (error: any) {
@@ -367,8 +398,11 @@ const Setup = () => {
 
       const data = await response.json();
       if (data.status === "success") {
-        setDownloadMessage(`✅ ${data.message}`);
+        setDownloadMessage(`✅ ${data.message}. Processing...`);
         await fetchDownloadedFiles();
+        
+        // Step 2: Call create_ingest to process downloaded files
+        await handleCreateIngestAfterUpload("downloaded");
       } else if (data.status === "warning") {
         setDownloadMessage(`⚠️ ${data.message}`);
       } else {
@@ -395,6 +429,12 @@ const Setup = () => {
         }
       );
       const data = await response.json();
+      
+      // Also delete corresponding temp files if session exists
+      if (tempSessionId) {
+        await handleDeleteTempFilesForOriginal(filename);
+      }
+      
       setDownloadMessage(`✅ ${data.message}`);
       await fetchDownloadedFiles();
     } catch (error: any) {
@@ -491,6 +531,60 @@ const Setup = () => {
     }
   };
 
+  // Delete temp files matching original filename
+  const handleDeleteTempFilesForOriginal = async (originalFilename: string) => {
+    console.log("handleDeleteTempFilesForOriginal called with:", originalFilename);
+    
+    if (!ingestGraphName || !tempSessionId) {
+      console.log("No graph name or session ID, returning");
+      return;
+    }
+
+    try {
+      // Extract base name without extension (e.g., "document.pdf" -> "document")
+      const baseName = originalFilename.replace(/\.[^/.]+$/, "");
+      console.log("Base name:", baseName);
+      
+      const creds = localStorage.getItem("creds");
+      
+      // Fetch temp files to find matches
+      const response = await fetch(`/ui/${ingestGraphName}/ingestion_temp/list?session_id=${tempSessionId}`, {
+        headers: { Authorization: `Basic ${creds}` },
+      });
+      const data = await response.json();
+      console.log("Temp files list response:", data);
+      
+      if (data.status === "success" && data.sessions.length > 0) {
+        const files = data.sessions[0].files || [];
+        console.log("All temp files:", files.map((f: any) => f.filename));
+        
+        // Find temp files matching pattern: doc_{idx}_{baseName}*.json
+        const matchingFiles = files.filter((f: any) => f.filename.includes(`_${baseName}`));
+        console.log("Matching files to delete:", matchingFiles.map((f: any) => f.filename));
+        
+        // Delete each matching file
+        for (const file of matchingFiles) {
+          console.log("Deleting temp file:", file.filename);
+          const deleteResponse = await fetch(
+            `/ui/${ingestGraphName}/ingestion_temp/delete?session_id=${tempSessionId}&filename=${encodeURIComponent(file.filename)}`,
+            {
+              method: "DELETE",
+              headers: { Authorization: `Basic ${creds}` },
+            }
+          );
+          const deleteData = await deleteResponse.json();
+          console.log("Delete response:", deleteData);
+        }
+        
+        console.log(`Successfully deleted ${matchingFiles.length} temp file(s)`);
+      } else {
+        console.log("No temp files found or empty sessions");
+      }
+    } catch (error: any) {
+      console.error("Error deleting temp files:", error);
+    }
+  };
+
   // Run final ingest after user reviews temp files
   const handleRunIngest = async () => {
     if (!ingestJobData) {
@@ -602,32 +696,32 @@ const Setup = () => {
         setIsIngesting(false);
       } else {
         // No temp files (e.g., S3 Bedrock) OR direct ingestion enabled - proceed directly to ingest
-        setIngestMessage("Step 2/2: Running document ingest...");
+      setIngestMessage("Step 2/2: Running document ingest...");
 
-        const loadingInfo = {
-          load_job_id: createData.load_job_id,
-          data_source_id: createData.data_source_id,
+      const loadingInfo = {
+        load_job_id: createData.load_job_id,
+        data_source_id: createData.data_source_id,
           file_path: createData.data_path || createData.file_path,
-        };
+      };
 
-        const ingestResponse = await fetch(`/ui/${ingestGraphName}/ingest`, {
-          method: "POST",
-          headers: {
-            "Content-Type": "application/json",
-            Authorization: `Basic ${creds}`,
-          },
-          body: JSON.stringify(loadingInfo),
-        });
+      const ingestResponse = await fetch(`/ui/${ingestGraphName}/ingest`, {
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          Authorization: `Basic ${creds}`,
+        },
+        body: JSON.stringify(loadingInfo),
+      });
 
-        if (!ingestResponse.ok) {
-          const errorData = await ingestResponse.json();
-          throw new Error(errorData.detail || `Failed to run ingest: ${ingestResponse.statusText}`);
-        }
+      if (!ingestResponse.ok) {
+        const errorData = await ingestResponse.json();
+        throw new Error(errorData.detail || `Failed to run ingest: ${ingestResponse.statusText}`);
+      }
 
-        const ingestData = await ingestResponse.json();
-        console.log("Ingest response:", ingestData);
+      const ingestData = await ingestResponse.json();
+      console.log("Ingest response:", ingestData);
 
-        setIngestMessage(`✅ Data ingested successfully! Processed documents from ${folderPath}/`);
+      setIngestMessage(`✅ Data ingested successfully! Processed documents from ${folderPath}/`);
         setIsIngesting(false);
       }
     } catch (error: any) {
@@ -637,6 +731,88 @@ const Setup = () => {
     }
   };
 
+  // Create ingest after upload/download (called automatically after files are uploaded/downloaded)
+  const handleCreateIngestAfterUpload = async (sourceType: "uploaded" | "downloaded" = "uploaded") => {
+    console.log("handleCreateIngestAfterUpload called with sourceType:", sourceType);
+    console.log("ingestGraphName:", ingestGraphName);
+    
+    if (!ingestGraphName) {
+      console.log("No graph name, returning early");
+      return;
+    }
+
+    const folderPath = sourceType === "uploaded" 
+      ? `uploads/${ingestGraphName}`
+      : `downloaded_files_cloud/${ingestGraphName}`;
+    
+    console.log("folderPath:", folderPath);
+
+    try {
+      const creds = localStorage.getItem("creds");
+
+      // Call create_ingest to process files
+      const createIngestConfig = {
+        data_source: "server",
+        data_source_config: {
+          folder_path: folderPath
+        },
+        loader_config: {},
+        file_format: "multi"
+      };
+      
+      console.log("Calling create_ingest with config:", createIngestConfig);
+
+      const createResponse = await fetch(`/ui/${ingestGraphName}/create_ingest`, {
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          Authorization: `Basic ${creds}`,
+        },
+        body: JSON.stringify(createIngestConfig),
+      });
+      
+      console.log("create_ingest response status:", createResponse.status);
+
+      if (!createResponse.ok) {
+        const errorData = await createResponse.json();
+        console.error("create_ingest error:", errorData);
+        throw new Error(errorData.detail || `Failed to create ingest job: ${createResponse.statusText}`);
+      }
+
+      const createData = await createResponse.json();
+      console.log("create_ingest response data:", createData);
+      
+      const sessionId = createData.data_source_id?.temp_session_id;
+      console.log("Session ID:", sessionId);
+      
+      if (sessionId) {
+        // Save session ID for later ingest
+        setTempSessionId(sessionId);
+        setIngestJobData({
+          load_job_id: createData.load_job_id,
+          data_source_id: createData.data_source_id,
+          data_path: createData.data_path || createData.file_path,
+        });
+        
+        console.log("Direct ingestion enabled:", directIngestion);
+        
+        if (directIngestion) {
+          // Direct ingestion - proceed to ingest immediately
+          setUploadMessage("Running direct ingestion...");
+          await handleRunIngest();
+        } else {
+          // Save for later - files ready for ingestion
+          setUploadMessage(`✅ Successfully processed ${createData.data_source_id.file_count} files. Ready for ingestion.`);
+        }
+      } else {
+        console.warn("No session ID returned from create_ingest");
+      }
+    } catch (error: any) {
+      console.error("Error in create_ingest:", error);
+      setUploadMessage(`❌ Processing error: ${error.message}`);
+    }
+  };
+
   // Ingest files from S3 with Bedrock BDA
   const handleS3BedrockIngest = async () => {
     if (!ingestGraphName) {
@@ -1303,6 +1479,20 @@ const Setup = () => {
                   <p className="text-xs text-gray-500 dark:text-gray-400 mt-2">
                     Maximum upload per request: {MAX_UPLOAD_SIZE_MB} MB.
                   </p>
+                  
+                  {/* Direct Ingestion Checkbox */}
+                  <div className="flex items-center mt-3 mb-2">
+                    <input
+                      type="checkbox"
+                      id="directIngestion"
+                      checked={directIngestion}
+                      onChange={(e) => setDirectIngestion(e.target.checked)}
+                      className="mr-2 h-4 w-4 rounded border-gray-300 text-blue-600 focus:ring-blue-500"
+                    />
+                    <label htmlFor="directIngestion" className="text-sm text-gray-700 dark:text-gray-300">
+                      Direct Ingestion (upload + process + ingest all at once)
+                    </label>
+                  </div>
                   </div>
 
                   <div className="flex gap-2">
@@ -1352,23 +1542,9 @@ const Setup = () => {
                         Process uploaded files and add them to the knowledge graph
                       </p>
                       
-                      {/* Direct Ingestion Checkbox */}
-                      <div className="flex items-center mb-3">
-                        <input
-                          type="checkbox"
-                          id="directIngestion"
-                          checked={directIngestion}
-                          onChange={(e) => setDirectIngestion(e.target.checked)}
-                          className="mr-2 h-4 w-4 rounded border-gray-300 text-blue-600 focus:ring-blue-500"
-                        />
-                        <label htmlFor="directIngestion" className="text-sm text-gray-700 dark:text-gray-300">
-                          Direct Ingestion (skip file review)
-                        </label>
-                      </div>
-                      
                       <Button
-                        onClick={() => handleIngestDocuments("uploaded")}
-                        disabled={isIngesting}
+                        onClick={handleRunIngest}
+                        disabled={isIngesting || !tempSessionId}
                         className="gradient text-white w-full"
                       >
                         {isIngesting ? (
@@ -1394,71 +1570,6 @@ const Setup = () => {
                           {ingestMessage}
                         </div>
                       )}
-
-                      {/* Processed Temp Files - Review before ingesting */}
-                      {showTempFiles && tempFiles.length > 0 && (
-                        <div className="mt-4 border border-gray-300 dark:border-[#3D3D3D] rounded-lg p-4">
-                          <div className="flex items-center justify-between mb-3">
-                            <h3 className="text-sm font-medium text-black dark:text-white">
-                              Processed Files ({tempFiles.length})
-                            </h3>
-                            <Button
-                              onClick={handleDeleteAllTempFiles}
-                              variant="outline"
-                              size="sm"
-                              className="dark:border-[#3D3D3D]"
-                            >
-                              <Trash2 className="h-3 w-3 mr-1" />
-                              Clear All
-                            </Button>
-                          </div>
-                          <p className="text-xs text-gray-500 dark:text-gray-400 mb-3">
-                            Review the processed files below. You can delete any file before ingesting.
-                          </p>
-                          <div className="space-y-2 max-h-64 overflow-y-auto mb-3">
-                            {tempFiles.map((file, index) => (
-                              <div
-                                key={index}
-                                className="flex items-center justify-between p-2 bg-gray-50 dark:bg-shadeA rounded"
-                              >
-                                <div className="flex-1 min-w-0">
-                                  <p className="text-sm text-black dark:text-white truncate">
-                                    {file.doc_id}
-                                  </p>
-                                  <p className="text-xs text-gray-500 dark:text-gray-400">
-                                    {(file.size / 1024).toFixed(2)} KB
-                                  </p>
-                                </div>
-                                <Button
-                                  onClick={() => handleDeleteTempFile(file.filename)}
-                                  variant="outline"
-                                  size="sm"
-                                  className="ml-2 dark:border-[#3D3D3D]"
-                                >
-                                  <Trash2 className="h-3 w-3" />
-                                </Button>
-                              </div>
-                            ))}
-                          </div>
-                          <Button
-                            onClick={handleRunIngest}
-                            disabled={isIngesting}
-                            className="gradient text-white w-full"
-                          >
-                            {isIngesting ? (
-                              <>
-                                <Loader2 className="h-4 w-4 mr-2 animate-spin" />
-                                Ingesting...
-                              </>
-                            ) : (
-                              <>
-                                <Database className="h-4 w-4 mr-2" />
-                                Run Final Ingest
-                              </>
-                            )}
-                          </Button>
-                        </div>
-                      )}
                     </div>
                   )}
 
@@ -1771,23 +1882,9 @@ const Setup = () => {
                         Process downloaded files and add them to the knowledge graph
                       </p>
                       
-                      {/* Direct Ingestion Checkbox */}
-                      <div className="flex items-center mb-3">
-                        <input
-                          type="checkbox"
-                          id="directIngestionDownloaded"
-                          checked={directIngestion}
-                          onChange={(e) => setDirectIngestion(e.target.checked)}
-                          className="mr-2 h-4 w-4 rounded border-gray-300 text-blue-600 focus:ring-blue-500"
-                        />
-                        <label htmlFor="directIngestionDownloaded" className="text-sm text-gray-700 dark:text-gray-300">
-                          Direct Ingestion (skip file review)
-                        </label>
-                      </div>
-                      
                       <Button
-                        onClick={() => handleIngestDocuments("downloaded")}
-                        disabled={isIngesting}
+                        onClick={handleRunIngest}
+                        disabled={isIngesting || !tempSessionId}
                         className="gradient text-white w-full"
                       >
                         {isIngesting ? (
@@ -1813,71 +1910,6 @@ const Setup = () => {
                           {ingestMessage}
                         </div>
                       )}
-
-                      {/* Processed Temp Files - Review before ingesting */}
-                      {showTempFiles && tempFiles.length > 0 && (
-                        <div className="mt-4 border border-gray-300 dark:border-[#3D3D3D] rounded-lg p-4">
-                          <div className="flex items-center justify-between mb-3">
-                            <h3 className="text-sm font-medium text-black dark:text-white">
-                              Processed Files ({tempFiles.length})
-                            </h3>
-                            <Button
-                              onClick={handleDeleteAllTempFiles}
-                              variant="outline"
-                              size="sm"
-                              className="dark:border-[#3D3D3D]"
-                            >
-                              <Trash2 className="h-3 w-3 mr-1" />
-                              Clear All
-                            </Button>
-                          </div>
-                          <p className="text-xs text-gray-500 dark:text-gray-400 mb-3">
-                            Review the processed files below. You can delete any file before ingesting.
-                          </p>
-                          <div className="space-y-2 max-h-64 overflow-y-auto mb-3">
-                            {tempFiles.map((file, index) => (
-                              <div
-                                key={index}
-                                className="flex items-center justify-between p-2 bg-gray-50 dark:bg-shadeA rounded"
-                              >
-                                <div className="flex-1 min-w-0">
-                                  <p className="text-sm text-black dark:text-white truncate">
-                                    {file.doc_id}
-                                  </p>
-                                  <p className="text-xs text-gray-500 dark:text-gray-400">
-                                    {(file.size / 1024).toFixed(2)} KB
-                                  </p>
-                                </div>
-                                <Button
-                                  onClick={() => handleDeleteTempFile(file.filename)}
-                                  variant="outline"
-                                  size="sm"
-                                  className="ml-2 dark:border-[#3D3D3D]"
-                                >
-                                  <Trash2 className="h-3 w-3" />
-                                </Button>
-                              </div>
-                            ))}
-                          </div>
-                          <Button
-                            onClick={handleRunIngest}
-                            disabled={isIngesting}
-                            className="gradient text-white w-full"
-                          >
-                            {isIngesting ? (
-                              <>
-                                <Loader2 className="h-4 w-4 mr-2 animate-spin" />
-                                Ingesting...
-                              </>
-                            ) : (
-                              <>
-                                <Database className="h-4 w-4 mr-2" />
-                                Run Final Ingest
-                              </>
-                            )}
-                          </Button>
-                        </div>
-                      )}
                     </div>
                   )}
                 </div>

From dd5772453ee8e676b2e1af9bdc6ffa9eee495254 Mon Sep 17 00:00:00 2001
From: Prins Kumar <prins.kumar@agivant.com>
Date: Mon, 1 Dec 2025 17:16:28 +0530
Subject: [PATCH 16/20] Merge latest main and consolidate markdown_parsing.py
 into text_extractors.py

---
 common/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/common/requirements.txt b/common/requirements.txt
index f0022f3..3bbd096 100644
--- a/common/requirements.txt
+++ b/common/requirements.txt
@@ -108,7 +108,7 @@ ordered-set==4.1.0
 orjson==3.10.18
 packaging==24.2
 pandas==2.2.3
-#pathtools==0.1.2
+pathtools==0.1.2
 pillow==11.2.1
 #PyMuPDF==1.26.4
 pymupdf4llm==0.2.0

From 5d474687274707e3004b2ca1669ea346898bdd99 Mon Sep 17 00:00:00 2001
From: Prins Kumar <prins.kumar@agivant.com>
Date: Mon, 1 Dec 2025 20:44:56 +0530
Subject: [PATCH 17/20] Fix merge conflict resolution: add missing try block
 and remove incorrect temp_folder cleanup

---
 graphrag/app/supportai/supportai.py | 79 +++++++++++++----------------
 1 file changed, 36 insertions(+), 43 deletions(-)

diff --git a/graphrag/app/supportai/supportai.py b/graphrag/app/supportai/supportai.py
index 17e1d2a..c2030cd 100644
--- a/graphrag/app/supportai/supportai.py
+++ b/graphrag/app/supportai/supportai.py
@@ -675,50 +675,43 @@ def ingest(
                 data_source_id = ingest_config.get("data_source_id", "DocumentContent")
                 if ingest_config.get("server_jobs"):
                     for doc_data in ingest_config.get("server_jobs"):
-                        if not doc_data.get("doc_id"):
+                        try:
+                            if not doc_data.get("doc_id"):
+                                continue
+                            # Skip documents with neither content nor image_data
+                            if not doc_data.get("content") and not doc_data.get("image_data"):
+                                continue
+                                
+                            if doc_data.get("image_data"):
+                                payload = {
+                                    "doc_id": doc_data.get("doc_id", ""),
+                                    "doc_type": "image",
+                                    "image_data": doc_data.get("image_data", ""),
+                                    "image_format": doc_data.get("image_format", "jpg"),
+                                    "image_description": doc_data.get("image_description", ""),
+                                    "parent_doc": doc_data.get("parent_doc", ""),
+                                    "page_number": doc_data.get("page_number", 0),
+                                    "width": doc_data.get("width", 0),
+                                    "height": doc_data.get("height", 0),
+                                    "position": doc_data.get("position", 0),
+                                    "content": ""
+                                }
+                            else:
+                                payload = {
+                                    "doc_id": doc_data.get("doc_id", ""),
+                                    "doc_type": doc_data.get("doc_type", "markdown"),
+                                    "content": doc_data.get("content", "")
+                                }
+                            payload_json = json.dumps(payload)
+                            conn.runLoadingJobWithData(payload_json, data_source_id, loader_info.load_job_id)
+                            processed_files.append({
+                                'file_path': doc_data.get("doc_id", ""),
+                                'parent_doc': doc_data.get("parent_doc", ""),
+                            })
+                            logger.info(f"Data uploading done for doc_id: {doc_data.get('doc_id', 'unknown')}")
+                        except Exception as file_error:
+                            logger.error(f"Error processing document {doc_data.get('doc_id', 'unknown')}: {file_error}")
                             continue
-                        # Skip documents with neither content nor image_data
-                        if not doc_data.get("content") and not doc_data.get("image_data"):
-                            continue
-                            
-                        if doc_data.get("image_data"):
-                            payload = {
-                                "doc_id": doc_data.get("doc_id", ""),
-                                "doc_type": "image",
-                                "image_data": doc_data.get("image_data", ""),
-                                "image_format": doc_data.get("image_format", "jpg"),
-                                "image_description": doc_data.get("image_description", ""),
-                                "parent_doc": doc_data.get("parent_doc", ""),
-                                "page_number": doc_data.get("page_number", 0),
-                                "width": doc_data.get("width", 0),
-                                "height": doc_data.get("height", 0),
-                                "position": doc_data.get("position", 0),
-                                "content": ""
-                            }
-                        else:
-                            payload = {
-                                "doc_id": doc_data.get("doc_id", ""),
-                                "doc_type": doc_data.get("doc_type", "markdown"),
-                                "content": doc_data.get("content", "")
-                            }
-                        payload_json = json.dumps(payload)
-                        conn.runLoadingJobWithData(payload_json, data_source_id, loader_info.load_job_id)
-                        processed_files.append({
-                            'file_path': doc_data.get("doc_id", ""),
-                            'parent_doc': doc_data.get("parent_doc", ""),
-                        })
-                        logger.info(f"Data uploading done for doc_id: {doc_data.get('doc_id', 'unknown')}")
-                    except Exception as file_error:
-                        logger.error(f"Error processing file {json_filename}: {file_error}")
-                        continue
-                
-                # Clean up temp folder after successful ingestion
-                try:
-                    import shutil
-                    shutil.rmtree(temp_folder)
-                    logger.info(f"Cleaned up temporary folder: {temp_folder}")
-                except Exception as cleanup_error:
-                    logger.warning(f"Failed to cleanup temp folder {temp_folder}: {cleanup_error}")
                     
             except Exception as e:
                 raise Exception(f"Error during server markdown extraction and TigerGraph loading: {e}")

From 7fd1ab28e7ddc90d7105221eb2506ca5737dc6b4 Mon Sep 17 00:00:00 2001
From: Prins Kumar <prins.kumar@agivant.com>
Date: Tue, 2 Dec 2025 22:09:08 +0530
Subject: [PATCH 18/20] Supportai merge issue fix for temp file ingestion

---
 graphrag/app/supportai/supportai.py | 99 ++++++++++++++++++-----------
 1 file changed, 61 insertions(+), 38 deletions(-)

diff --git a/graphrag/app/supportai/supportai.py b/graphrag/app/supportai/supportai.py
index c2030cd..88542dc 100644
--- a/graphrag/app/supportai/supportai.py
+++ b/graphrag/app/supportai/supportai.py
@@ -673,45 +673,68 @@ def ingest(
             try:
                 processed_files = []
                 data_source_id = ingest_config.get("data_source_id", "DocumentContent")
-                if ingest_config.get("server_jobs"):
-                    for doc_data in ingest_config.get("server_jobs"):
-                        try:
-                            if not doc_data.get("doc_id"):
-                                continue
-                            # Skip documents with neither content nor image_data
-                            if not doc_data.get("content") and not doc_data.get("image_data"):
-                                continue
-                                
-                            if doc_data.get("image_data"):
-                                payload = {
-                                    "doc_id": doc_data.get("doc_id", ""),
-                                    "doc_type": "image",
-                                    "image_data": doc_data.get("image_data", ""),
-                                    "image_format": doc_data.get("image_format", "jpg"),
-                                    "image_description": doc_data.get("image_description", ""),
-                                    "parent_doc": doc_data.get("parent_doc", ""),
-                                    "page_number": doc_data.get("page_number", 0),
-                                    "width": doc_data.get("width", 0),
-                                    "height": doc_data.get("height", 0),
-                                    "position": doc_data.get("position", 0),
-                                    "content": ""
-                                }
-                            else:
-                                payload = {
-                                    "doc_id": doc_data.get("doc_id", ""),
-                                    "doc_type": doc_data.get("doc_type", "markdown"),
-                                    "content": doc_data.get("content", "")
-                                }
-                            payload_json = json.dumps(payload)
-                            conn.runLoadingJobWithData(payload_json, data_source_id, loader_info.load_job_id)
-                            processed_files.append({
-                                'file_path': doc_data.get("doc_id", ""),
-                                'parent_doc': doc_data.get("parent_doc", ""),
-                            })
-                            logger.info(f"Data uploading done for doc_id: {doc_data.get('doc_id', 'unknown')}")
-                        except Exception as file_error:
-                            logger.error(f"Error processing document {doc_data.get('doc_id', 'unknown')}: {file_error}")
+                
+                # Read from temporary folder
+                temp_folder = ingest_config.get("temp_folder")
+                if not temp_folder or not os.path.exists(temp_folder):
+                    raise Exception(f"Temporary folder not found: {temp_folder}")
+                
+                # Read all JSON files from temp folder
+                json_files = [f for f in os.listdir(temp_folder) if f.endswith('.json')]
+                logger.info(f"Reading {len(json_files)} documents from {temp_folder}")
+                
+                for json_filename in json_files:
+                    json_filepath = os.path.join(temp_folder, json_filename)
+                    try:
+                        with open(json_filepath, 'r', encoding='utf-8') as f:
+                            doc_data = json.load(f)
+                        
+                        if not doc_data.get("doc_id"):
+                            logger.warning(f"Skipping invalid document: {json_filename}")
                             continue
+                        # Skip documents with neither content nor image_data
+                        if not doc_data.get("content") and not doc_data.get("image_data"):
+                            logger.warning(f"Skipping document with no content: {json_filename}")
+                            continue
+                            
+                        if doc_data.get("image_data"):
+                            payload = {
+                                "doc_id": doc_data.get("doc_id", ""),
+                                "doc_type": "image",
+                                "image_data": doc_data.get("image_data", ""),
+                                "image_format": doc_data.get("image_format", "jpg"),
+                                "image_description": doc_data.get("image_description", ""),
+                                "parent_doc": doc_data.get("parent_doc", ""),
+                                "page_number": doc_data.get("page_number", 0),
+                                "width": doc_data.get("width", 0),
+                                "height": doc_data.get("height", 0),
+                                "position": doc_data.get("position", 0),
+                                "content": ""
+                            }
+                        else:
+                            payload = {
+                                "doc_id": doc_data.get("doc_id", ""),
+                                "doc_type": doc_data.get("doc_type", "markdown"),
+                                "content": doc_data.get("content", "")
+                            }
+                        payload_json = json.dumps(payload)
+                        conn.runLoadingJobWithData(payload_json, data_source_id, loader_info.load_job_id)
+                        processed_files.append({
+                            'file_path': doc_data.get("doc_id", ""),
+                            'parent_doc': doc_data.get("parent_doc", ""),
+                        })
+                        logger.info(f"Data uploading done for doc_id: {doc_data.get('doc_id', 'unknown')}")
+                    except Exception as file_error:
+                        logger.error(f"Error processing file {json_filename}: {file_error}")
+                        continue
+                
+                # Clean up temp folder after successful ingestion
+                try:
+                    import shutil
+                    shutil.rmtree(temp_folder)
+                    logger.info(f"Cleaned up temporary folder: {temp_folder}")
+                except Exception as cleanup_error:
+                    logger.warning(f"Failed to cleanup temp folder {temp_folder}: {cleanup_error}")
                     
             except Exception as e:
                 raise Exception(f"Error during server markdown extraction and TigerGraph loading: {e}")

From aa1ce342988f86d67c12b4bf4ebbb23d79a1fad6 Mon Sep 17 00:00:00 2001
From: Prins Kumar <prins.kumar@agivant.com>
Date: Wed, 3 Dec 2025 15:23:46 +0530
Subject: [PATCH 19/20] Redesign temp file storage: save immediately during
 file processing instead of after

---
 common/utils/text_extractors.py     | 77 ++++++++++++++++++++++++-----
 graphrag/app/supportai/supportai.py | 35 +++++--------
 2 files changed, 77 insertions(+), 35 deletions(-)

diff --git a/common/utils/text_extractors.py b/common/utils/text_extractors.py
index 72e3a0c..eefc451 100644
--- a/common/utils/text_extractors.py
+++ b/common/utils/text_extractors.py
@@ -97,10 +97,11 @@ def __init__(self):
             '.jpg': 'image/jpeg'
         }
 
-    async def _process_file_async(self, file_path, folder_path_obj, graphname):
+    async def _process_file_async(self, file_path, folder_path_obj, graphname, temp_folder=None, file_counter=None):
         """
         Async helper to process a single file.
         Runs in thread pool to avoid blocking on I/O operations.
+        If temp_folder is provided, saves documents immediately and returns metadata only.
         """
         try:
             loop = asyncio.get_event_loop()
@@ -112,6 +113,27 @@ async def _process_file_async(self, file_path, folder_path_obj, graphname):
                 graphname
             )
 
+            # If temp_folder provided, save immediately and return metadata only
+            if temp_folder and doc_entries:
+                saved_files = []
+                for idx, doc_data in enumerate(doc_entries):
+                    # Use file_counter for unique naming across all files
+                    counter_val = next(file_counter) if file_counter else idx
+                    doc_filename = f"doc_{counter_val}_{doc_data.get('doc_id', 'unknown')}.json"
+                    doc_filepath = os.path.join(temp_folder, doc_filename)
+                    with open(doc_filepath, 'w', encoding='utf-8') as f:
+                        json.dump(doc_data, f, ensure_ascii=False, indent=2)
+                    saved_files.append(doc_filename)
+                
+                # Return metadata only, not full documents (memory efficient)
+                return {
+                    'success': True,
+                    'file_path': str(file_path),
+                    'saved_files': saved_files,
+                    'num_documents': len(doc_entries)
+                }
+            
+            # No temp_folder - return documents in memory (legacy behavior)
             return {
                 'success': True,
                 'file_path': str(file_path),
@@ -127,10 +149,11 @@ async def _process_file_async(self, file_path, folder_path_obj, graphname):
             logger.warning(f"Failed to process file {file_path}: {e}")
             return {'success': False, 'file_path': str(file_path), 'error': str(e)}
 
-    async def _process_folder_async(self, folder_path, graphname=None, max_concurrent=10):
+    async def _process_folder_async(self, folder_path, graphname=None, max_concurrent=10, temp_folder=None):
         """
         Async version of process_folder for parallel file processing.
         This prevents conflicts when multiple users process folders simultaneously.
+        If temp_folder is provided, saves documents immediately to disk instead of holding in memory.
         """
         logger.info(f"Processing local folder ASYNC: {folder_path} for graph: {graphname} (max_concurrent={max_concurrent})")
 
@@ -142,6 +165,11 @@ async def _process_folder_async(self, folder_path, graphname=None, max_concurren
         if not folder_path_obj.is_dir():
             raise Exception(f"Path is not a directory: {folder_path}")
 
+        # Create temp folder if provided
+        if temp_folder:
+            os.makedirs(temp_folder, exist_ok=True)
+            logger.info(f"Saving processed documents to: {temp_folder}")
+
         def safe_walk(path):
             try:
                 for item in path.iterdir():
@@ -166,16 +194,20 @@ def safe_walk(path):
         logger.info(f"Found {len(files_to_process)} files to process")
 
         semaphore = asyncio.Semaphore(max_concurrent)
+        
+        # Thread-safe counter for unique file naming
+        file_counter = iter(range(100000)) if temp_folder else None
 
         async def process_with_semaphore(file_path):
             async with semaphore:
-                return await self._process_file_async(file_path, folder_path_obj, graphname)
+                return await self._process_file_async(file_path, folder_path_obj, graphname, temp_folder, file_counter)
 
         tasks = [process_with_semaphore(fp) for fp in files_to_process]
         results = await asyncio.gather(*tasks, return_exceptions=True)
 
         all_documents = []
         processed_files_info = []
+        total_saved_files = []
 
         for result in results:
             if isinstance(result, Exception):
@@ -183,10 +215,15 @@ async def process_with_semaphore(file_path):
                 continue
 
             if result.get('success'):
-                all_documents.extend(result.get('documents', []))
+                # If temp_folder was used, documents are saved to disk
+                if temp_folder:
+                    total_saved_files.extend(result.get('saved_files', []))
+                else:
+                    all_documents.extend(result.get('documents', []))
+                
                 processed_files_info.append({
                     'file_path': result['file_path'],
-                    'num_documents': result.get('num_documents', len(result.get('documents', []))),
+                    'num_documents': result.get('num_documents', 0),
                     'status': 'success'
                 })
             else:
@@ -196,23 +233,39 @@ async def process_with_semaphore(file_path):
                     'error': result.get('error', 'Unknown error')
                 })
 
-        logger.info(f"Processed {len(processed_files_info)} files, extracted {len(all_documents)} total documents")
+        total_docs = len(total_saved_files) if temp_folder else len(all_documents)
+        logger.info(f"Processed {len(processed_files_info)} files, extracted {total_docs} total documents")
 
-        return {
+        response = {
             'statusCode': 200,
-            'message': f'Processed {len(processed_files_info)} files, {len(all_documents)} documents',
-            'documents': all_documents,
+            'message': f'Processed {len(processed_files_info)} files, {total_docs} documents',
             'files': processed_files_info,
-            'num_documents': len(all_documents)
+            'num_documents': total_docs
         }
+        
+        # Only include documents in response if NOT saving to temp_folder
+        if temp_folder:
+            response['saved_to_temp'] = True
+            response['temp_folder'] = temp_folder
+            response['saved_files'] = total_saved_files
+        else:
+            response['documents'] = all_documents
+        
+        return response
 
-    def process_folder(self, folder_path, graphname=None):
+    def process_folder(self, folder_path, graphname=None, temp_folder=None):
         """
         Process local folder with multiple file formats and extract text content.
         Uses async processing internally for parallel file handling.
+        
+        Args:
+            folder_path: Path to the folder containing files to process
+            graphname: Name of the graph (for context)
+            temp_folder: Optional path to save processed documents immediately.
+                        If provided, documents are saved to disk instead of returned in memory.
         """
         logger.info(f"Processing local folder: {folder_path} for graph: {graphname}")
-        return asyncio.run(self._process_folder_async(folder_path, graphname))
+        return asyncio.run(self._process_folder_async(folder_path, graphname, temp_folder=temp_folder))
 
 
 def extract_text_from_file_with_images_as_docs(file_path, graphname=None):
diff --git a/graphrag/app/supportai/supportai.py b/graphrag/app/supportai/supportai.py
index 88542dc..2fb2e45 100644
--- a/graphrag/app/supportai/supportai.py
+++ b/graphrag/app/supportai/supportai.py
@@ -485,34 +485,23 @@ def create_ingest(
         if data_path is None:
             raise Exception("Folder path not provided for server processing")
         try:
-            extractor = TextExtractor()
-            server_processing_result = extractor.process_folder(data_path, graphname=graphname)
-            if server_processing_result.get("statusCode") != 200:
-                raise Exception(f"Server folder processing failed: {server_processing_result}")
-            
-            # Log only summary, NOT the full documents to avoid memory logging
-            logger.info(f"Server folder processing completed: {server_processing_result.get('message')}")
-
-            # Save processed documents to temporary folder instead of keeping in memory
+            # Create temp folder BEFORE processing so extractor can save directly
             temp_session_id = str(uuid.uuid4())
             temp_folder = os.path.join("uploads", "ingestion_temp", graphname, temp_session_id)
-            os.makedirs(temp_folder, exist_ok=True)
             
-            documents = server_processing_result.get("documents", [])
-            doc_count = len(documents)
-            
-            # Save each document as a separate JSON file
-            for idx, doc_data in enumerate(documents):
-                doc_filename = f"doc_{idx}_{doc_data.get('doc_id', 'unknown')}.json"
-                doc_filepath = os.path.join(temp_folder, doc_filename)
-                with open(doc_filepath, 'w', encoding='utf-8') as f:
-                    json.dump(doc_data, f, ensure_ascii=False, indent=2)
+            # Process files and save immediately to temp folder (memory efficient)
+            extractor = TextExtractor()
+            server_processing_result = extractor.process_folder(
+                data_path, 
+                graphname=graphname,
+                temp_folder=temp_folder  # Extractor saves files as it processes
+            )
             
-            # Clear documents from memory immediately after saving
-            documents.clear()
-            server_processing_result.clear()
+            if server_processing_result.get("statusCode") != 200:
+                raise Exception(f"Server folder processing failed: {server_processing_result}")
             
-            logger.info(f"Saved {doc_count} processed documents to {temp_folder}")
+            doc_count = server_processing_result.get("num_documents", 0)
+            logger.info(f"Server folder processing completed: {server_processing_result.get('message')}")
             
             res_ingest_config["temp_session_id"] = temp_session_id
             res_ingest_config["temp_folder"] = temp_folder

From 845fd9133dfd7b3030536b00828ea2e2442b9090 Mon Sep 17 00:00:00 2001
From: Prins Kumar <prins.kumar@agivant.com>
Date: Wed, 3 Dec 2025 17:29:22 +0530
Subject: [PATCH 20/20] Add Server Configuration UI for real-time LLM and
 GraphRAG config updates

---
 common/config.py                |  56 ++++
 graphrag-ui/src/pages/Setup.tsx | 441 +++++++++++++++++++++++++++++++-
 graphrag/app/routers/ui.py      |  71 ++++-
 3 files changed, 566 insertions(+), 2 deletions(-)

diff --git a/common/config.py b/common/config.py
index 703d3f8..be4e7fe 100644
--- a/common/config.py
+++ b/common/config.py
@@ -51,6 +51,62 @@
 
 # Configs
 SERVER_CONFIG = os.getenv("SERVER_CONFIG", "configs/server_config.json")
+
+
+def get_config_file_path():
+    """Get the path to the server config file."""
+    return SERVER_CONFIG
+
+
+def get_current_config():
+    """Get the current in-memory configuration (llm_config and graphrag_config)."""
+    return {
+        "llm_config": llm_config,
+        "graphrag_config": graphrag_config,
+    }
+
+
+def update_config(new_llm_config: dict = None, new_graphrag_config: dict = None, persist: bool = True):
+    """
+    Update the in-memory configuration and optionally persist to file.
+    This allows config changes to take effect immediately without container restart.
+    
+    Args:
+        new_llm_config: New LLM configuration to apply
+        new_graphrag_config: New GraphRAG configuration to apply
+        persist: If True, also save changes to server_config.json file
+    """
+    global llm_config, graphrag_config
+    
+    # Update llm_config in memory
+    if new_llm_config is not None:
+        llm_config.clear()
+        llm_config.update(new_llm_config)
+    
+    # Update graphrag_config in memory
+    if new_graphrag_config is not None:
+        graphrag_config.clear()
+        graphrag_config.update(new_graphrag_config)
+    
+    # Persist to file if requested
+    if persist:
+        config_path = get_config_file_path()
+        if config_path[-5:] == ".json":
+            # Read current file config
+            with open(config_path, "r") as f:
+                file_config = json.load(f)
+            
+            # Update with new values
+            if new_llm_config is not None:
+                file_config["llm_config"] = new_llm_config
+            if new_graphrag_config is not None:
+                file_config["graphrag_config"] = new_graphrag_config
+            
+            # Write back to file
+            with open(config_path, "w") as f:
+                json.dump(file_config, f, indent=2)
+    
+    return True
 PATH_PREFIX = os.getenv("PATH_PREFIX", "")
 PRODUCTION = os.getenv("PRODUCTION", "false").lower() == "true"
 
diff --git a/graphrag-ui/src/pages/Setup.tsx b/graphrag-ui/src/pages/Setup.tsx
index 17f952c..8216e86 100644
--- a/graphrag-ui/src/pages/Setup.tsx
+++ b/graphrag-ui/src/pages/Setup.tsx
@@ -2,7 +2,7 @@ import React, { useState, useEffect } from "react";
 import { useNavigate } from "react-router-dom";
 import { Button } from "@/components/ui/button";
 import { Input } from "@/components/ui/input";
-import { Database, Upload, RefreshCw, Loader2, Trash2, FolderUp, Cloud, ArrowLeft, CloudDownload, CloudCog } from "lucide-react";
+import { Database, Upload, RefreshCw, Loader2, Trash2, FolderUp, Cloud, ArrowLeft, CloudDownload, CloudCog, Settings } from "lucide-react";
 import {
   Dialog,
   DialogContent,
@@ -100,6 +100,26 @@ const Setup = () => {
   const [isDownloading, setIsDownloading] = useState(false);
   const [downloadMessage, setDownloadMessage] = useState("");
 
+  // Server Configuration state
+  const [configOpen, setConfigOpen] = useState(false);
+  const [isLoadingConfig, setIsLoadingConfig] = useState(false);
+  const [isSavingConfig, setIsSavingConfig] = useState(false);
+  const [configMessage, setConfigMessage] = useState("");
+  const [configMessageType, setConfigMessageType] = useState<"success" | "error" | "">("");
+  
+  // LLM Config state
+  const [llmService, setLlmService] = useState("openai");
+  const [llmModel, setLlmModel] = useState("");
+  const [llmApiKey, setLlmApiKey] = useState("");
+  const [llmTemperature, setLlmTemperature] = useState("0");
+  const [embeddingService, setEmbeddingService] = useState("openai");
+  const [embeddingModel, setEmbeddingModel] = useState("");
+  
+  // GraphRAG Config state
+  const [chunkerType, setChunkerType] = useState("semantic");
+  const [extractorType, setExtractorType] = useState("llm");
+  const [reuseEmbedding, setReuseEmbedding] = useState(true);
+
   // Fetch uploaded files
   const fetchUploadedFiles = async () => {
     if (!ingestGraphName) return;
@@ -585,6 +605,153 @@ const Setup = () => {
     }
   };
 
+  // Fetch server configuration
+  const fetchServerConfig = async () => {
+    setIsLoadingConfig(true);
+    setConfigMessage("");
+    setConfigMessageType("");
+    
+    try {
+      const creds = localStorage.getItem("creds");
+      const response = await fetch("/ui/config", {
+        headers: { Authorization: `Basic ${creds}` },
+      });
+      
+      if (!response.ok) {
+        throw new Error("Failed to fetch configuration");
+      }
+      
+      const data = await response.json();
+      
+      if (data.status === "success" && data.config) {
+        const { llm_config, graphrag_config } = data.config;
+        
+        // Set LLM config values
+        if (llm_config) {
+          const completionService = llm_config.completion_service || {};
+          const embeddingServiceConfig = llm_config.embedding_service || {};
+          const authConfig = llm_config.authentication_configuration || {};
+          
+          setLlmService(completionService.llm_service || "openai");
+          setLlmModel(completionService.llm_model || "");
+          setLlmTemperature(String(completionService.model_kwargs?.temperature ?? "0"));
+          setEmbeddingService(embeddingServiceConfig.embedding_model_service || "openai");
+          setEmbeddingModel(embeddingServiceConfig.model_name || "");
+          
+          // Get API key (masked for display)
+          const apiKey = authConfig.OPENAI_API_KEY || authConfig.AZURE_OPENAI_API_KEY || "";
+          setLlmApiKey(apiKey ? "••••••••" : "");
+        }
+        
+        // Set GraphRAG config values
+        if (graphrag_config) {
+          setChunkerType(graphrag_config.chunker || "semantic");
+          setExtractorType(graphrag_config.extractor || "llm");
+          setReuseEmbedding(graphrag_config.reuse_embedding !== false);
+        }
+      }
+    } catch (error: any) {
+      console.error("Error fetching config:", error);
+      setConfigMessage(`❌ Error loading configuration: ${error.message}`);
+      setConfigMessageType("error");
+    } finally {
+      setIsLoadingConfig(false);
+    }
+  };
+
+  // Save server configuration
+  const handleSaveConfig = async () => {
+    setIsSavingConfig(true);
+    setConfigMessage("");
+    setConfigMessageType("");
+    
+    try {
+      const creds = localStorage.getItem("creds");
+      
+      // First fetch the current config to preserve other settings
+      const fetchResponse = await fetch("/ui/config", {
+        headers: { Authorization: `Basic ${creds}` },
+      });
+      
+      if (!fetchResponse.ok) {
+        throw new Error("Failed to fetch current configuration");
+      }
+      
+      const currentData = await fetchResponse.json();
+      const currentConfig = currentData.config || {};
+      
+      // Build updated config
+      const updatedLlmConfig = {
+        ...currentConfig.llm_config,
+        completion_service: {
+          ...currentConfig.llm_config?.completion_service,
+          llm_service: llmService,
+          llm_model: llmModel,
+          model_kwargs: {
+            ...currentConfig.llm_config?.completion_service?.model_kwargs,
+            temperature: parseFloat(llmTemperature) || 0,
+          },
+        },
+        embedding_service: {
+          ...currentConfig.llm_config?.embedding_service,
+          embedding_model_service: embeddingService,
+          model_name: embeddingModel,
+        },
+      };
+      
+      // Only update API key if user entered a new one (not masked)
+      if (llmApiKey && !llmApiKey.includes("•")) {
+        updatedLlmConfig.authentication_configuration = {
+          ...currentConfig.llm_config?.authentication_configuration,
+          OPENAI_API_KEY: llmApiKey,
+        };
+      }
+      
+      const updatedGraphragConfig = {
+        ...currentConfig.graphrag_config,
+        chunker: chunkerType,
+        extractor: extractorType,
+        reuse_embedding: reuseEmbedding,
+      };
+      
+      // Save updated config
+      const saveResponse = await fetch("/ui/config", {
+        method: "PUT",
+        headers: {
+          "Content-Type": "application/json",
+          Authorization: `Basic ${creds}`,
+        },
+        body: JSON.stringify({
+          llm_config: updatedLlmConfig,
+          graphrag_config: updatedGraphragConfig,
+        }),
+      });
+      
+      if (!saveResponse.ok) {
+        const errorData = await saveResponse.json();
+        throw new Error(errorData.detail || "Failed to save configuration");
+      }
+      
+      const saveData = await saveResponse.json();
+      setConfigMessage("✅ Configuration saved successfully! Changes are now active.");
+      setConfigMessageType("success");
+      
+    } catch (error: any) {
+      console.error("Error saving config:", error);
+      setConfigMessage(`❌ Error saving configuration: ${error.message}`);
+      setConfigMessageType("error");
+    } finally {
+      setIsSavingConfig(false);
+    }
+  };
+
+  // Load config when dialog opens
+  useEffect(() => {
+    if (configOpen) {
+      fetchServerConfig();
+    }
+  }, [configOpen]);
+
   // Run final ingest after user reviews temp files
   const handleRunIngest = async () => {
     if (!ingestJobData) {
@@ -1286,6 +1453,33 @@ const Setup = () => {
 
         </div>
 
+        {/* Server Configuration - Separate row */}
+        <div className="grid grid-cols-1 lg:grid-cols-3 gap-6 mt-6">
+          {/* Section 4: Server Configuration */}
+          <div className="border border-gray-300 dark:border-[#3D3D3D] rounded-lg p-6 bg-white dark:bg-shadeA flex flex-col h-full">
+            <div className="mb-4">
+              <div className="w-12 h-12 rounded-full bg-tigerOrange/10 flex items-center justify-center mb-4">
+                <Settings className="h-6 w-6 text-tigerOrange" />
+              </div>
+              <h2 className="text-lg font-semibold mb-2 text-black dark:text-white">
+                Server Configuration
+              </h2>
+              <p className="text-sm text-gray-600 dark:text-[#D9D9D9] mb-4">
+                Configure LLM settings and GraphRAG options for your server.
+              </p>
+            </div>
+            <div className="mt-auto pt-4 border-t border-gray-300 dark:border-[#3D3D3D]">
+              <Button 
+                className="gradient w-full text-white"
+                onClick={() => setConfigOpen(true)}
+              >
+                <Settings className="h-4 w-4 mr-2" />
+                Configure Server
+              </Button>
+            </div>
+          </div>
+        </div>
+
         {/* Initialize Graph Dialog */}
         <Dialog 
           open={initializeGraphOpen}
@@ -2185,6 +2379,251 @@ const Setup = () => {
           </DialogContent>
         </Dialog>
 
+        {/* Server Configuration Dialog */}
+        <Dialog 
+          open={configOpen} 
+          onOpenChange={(open) => {
+            if (!open && isConfirmDialogOpen) {
+              return;
+            }
+            setConfigOpen(open);
+          }}
+        >
+          <DialogContent 
+            className="sm:max-w-[600px] bg-white dark:bg-background border-gray-300 dark:border-[#3D3D3D] max-h-[80vh] overflow-y-auto"
+            onInteractOutside={(e) => e.preventDefault()}
+          >
+            <DialogHeader>
+              <DialogTitle className="text-black dark:text-white">Server Configuration</DialogTitle>
+              <DialogDescription className="text-gray-600 dark:text-[#D9D9D9]">
+                Configure LLM and GraphRAG settings. Changes take effect immediately without restart.
+              </DialogDescription>
+            </DialogHeader>
+
+            {isLoadingConfig ? (
+              <div className="flex items-center justify-center py-8">
+                <Loader2 className="h-8 w-8 animate-spin text-tigerOrange" />
+                <span className="ml-2 text-gray-600 dark:text-gray-400">Loading configuration...</span>
+              </div>
+            ) : (
+              <Tabs defaultValue="llm" className="w-full">
+                <TabsList className="grid w-full grid-cols-2">
+                  <TabsTrigger value="llm">LLM Configuration</TabsTrigger>
+                  <TabsTrigger value="graphrag">GraphRAG Configuration</TabsTrigger>
+                </TabsList>
+
+                {/* LLM Configuration Tab */}
+                <TabsContent value="llm" className="space-y-4 mt-4">
+                  <div>
+                    <label className="block text-sm font-medium mb-2 text-black dark:text-white">
+                      LLM Service
+                    </label>
+                    <Select value={llmService} onValueChange={setLlmService}>
+                      <SelectTrigger className="dark:border-[#3D3D3D] dark:bg-shadeA">
+                        <SelectValue placeholder="Select LLM service" />
+                      </SelectTrigger>
+                      <SelectContent>
+                        <SelectItem value="openai">OpenAI</SelectItem>
+                        <SelectItem value="azure">Azure OpenAI</SelectItem>
+                        <SelectItem value="bedrock">AWS Bedrock</SelectItem>
+                        <SelectItem value="vertexai">Google VertexAI</SelectItem>
+                        <SelectItem value="genai">Google GenAI</SelectItem>
+                        <SelectItem value="ollama">Ollama</SelectItem>
+                        <SelectItem value="groq">Groq</SelectItem>
+                        <SelectItem value="huggingface">HuggingFace</SelectItem>
+                        <SelectItem value="watsonx">IBM WatsonX</SelectItem>
+                      </SelectContent>
+                    </Select>
+                  </div>
+
+                  <div>
+                    <label className="block text-sm font-medium mb-2 text-black dark:text-white">
+                      LLM Model
+                    </label>
+                    <Input
+                      type="text"
+                      value={llmModel}
+                      onChange={(e) => setLlmModel(e.target.value)}
+                      placeholder="e.g., gpt-4o-mini, gpt-4.1-mini"
+                      className="dark:border-[#3D3D3D] dark:bg-shadeA"
+                    />
+                  </div>
+
+                  <div>
+                    <label className="block text-sm font-medium mb-2 text-black dark:text-white">
+                      API Key
+                    </label>
+                    <Input
+                      type="password"
+                      value={llmApiKey}
+                      onChange={(e) => setLlmApiKey(e.target.value)}
+                      placeholder="Enter API key (leave blank to keep current)"
+                      className="dark:border-[#3D3D3D] dark:bg-shadeA"
+                    />
+                    <p className="text-xs text-gray-500 dark:text-gray-400 mt-1">
+                      Leave blank to keep the current API key
+                    </p>
+                  </div>
+
+                  <div>
+                    <label className="block text-sm font-medium mb-2 text-black dark:text-white">
+                      Temperature
+                    </label>
+                    <Input
+                      type="number"
+                      step="0.1"
+                      min="0"
+                      max="2"
+                      value={llmTemperature}
+                      onChange={(e) => setLlmTemperature(e.target.value)}
+                      placeholder="0"
+                      className="dark:border-[#3D3D3D] dark:bg-shadeA"
+                    />
+                  </div>
+
+                  <div className="border-t border-gray-300 dark:border-[#3D3D3D] pt-4 mt-4">
+                    <h4 className="text-sm font-medium mb-3 text-black dark:text-white">Embedding Service</h4>
+                    
+                    <div className="space-y-4">
+                      <div>
+                        <label className="block text-sm font-medium mb-2 text-black dark:text-white">
+                          Embedding Service
+                        </label>
+                        <Select value={embeddingService} onValueChange={setEmbeddingService}>
+                          <SelectTrigger className="dark:border-[#3D3D3D] dark:bg-shadeA">
+                            <SelectValue placeholder="Select embedding service" />
+                          </SelectTrigger>
+                          <SelectContent>
+                            <SelectItem value="openai">OpenAI</SelectItem>
+                            <SelectItem value="azure">Azure OpenAI</SelectItem>
+                            <SelectItem value="bedrock">AWS Bedrock</SelectItem>
+                            <SelectItem value="vertexai">Google VertexAI</SelectItem>
+                            <SelectItem value="genai">Google GenAI</SelectItem>
+                            <SelectItem value="ollama">Ollama</SelectItem>
+                          </SelectContent>
+                        </Select>
+                      </div>
+
+                      <div>
+                        <label className="block text-sm font-medium mb-2 text-black dark:text-white">
+                          Embedding Model
+                        </label>
+                        <Input
+                          type="text"
+                          value={embeddingModel}
+                          onChange={(e) => setEmbeddingModel(e.target.value)}
+                          placeholder="e.g., text-embedding-3-small"
+                          className="dark:border-[#3D3D3D] dark:bg-shadeA"
+                        />
+                      </div>
+                    </div>
+                  </div>
+                </TabsContent>
+
+                {/* GraphRAG Configuration Tab */}
+                <TabsContent value="graphrag" className="space-y-4 mt-4">
+                  <div>
+                    <label className="block text-sm font-medium mb-2 text-black dark:text-white">
+                      Chunker Type
+                    </label>
+                    <Select value={chunkerType} onValueChange={setChunkerType}>
+                      <SelectTrigger className="dark:border-[#3D3D3D] dark:bg-shadeA">
+                        <SelectValue placeholder="Select chunker type" />
+                      </SelectTrigger>
+                      <SelectContent>
+                        <SelectItem value="semantic">Semantic</SelectItem>
+                        <SelectItem value="character">Character</SelectItem>
+                        <SelectItem value="recursive">Recursive</SelectItem>
+                        <SelectItem value="regex">Regex</SelectItem>
+                        <SelectItem value="markdown">Markdown</SelectItem>
+                        <SelectItem value="html">HTML</SelectItem>
+                        <SelectItem value="single">Single</SelectItem>
+                      </SelectContent>
+                    </Select>
+                    <p className="text-xs text-gray-500 dark:text-gray-400 mt-1">
+                      Semantic chunking uses AI to create meaningful chunks
+                    </p>
+                  </div>
+
+                  <div>
+                    <label className="block text-sm font-medium mb-2 text-black dark:text-white">
+                      Extractor Type
+                    </label>
+                    <Select value={extractorType} onValueChange={setExtractorType}>
+                      <SelectTrigger className="dark:border-[#3D3D3D] dark:bg-shadeA">
+                        <SelectValue placeholder="Select extractor type" />
+                      </SelectTrigger>
+                      <SelectContent>
+                        <SelectItem value="llm">LLM</SelectItem>
+                        <SelectItem value="graph">Graph</SelectItem>
+                      </SelectContent>
+                    </Select>
+                    <p className="text-xs text-gray-500 dark:text-gray-400 mt-1">
+                      LLM extractor uses AI to extract entities and relationships
+                    </p>
+                  </div>
+
+                  <div className="flex items-center space-x-2 pt-2">
+                    <input
+                      type="checkbox"
+                      id="reuseEmbedding"
+                      checked={reuseEmbedding}
+                      onChange={(e) => setReuseEmbedding(e.target.checked)}
+                      className="h-4 w-4 rounded border-gray-300 text-tigerOrange focus:ring-tigerOrange"
+                    />
+                    <label htmlFor="reuseEmbedding" className="text-sm text-black dark:text-white">
+                      Reuse Embeddings
+                    </label>
+                  </div>
+                  <p className="text-xs text-gray-500 dark:text-gray-400">
+                    When enabled, existing embeddings will be reused instead of regenerating
+                  </p>
+                </TabsContent>
+              </Tabs>
+            )}
+
+            {configMessage && (
+              <div className={`p-3 rounded-lg text-sm ${
+                configMessageType === "success"
+                  ? "bg-green-50 dark:bg-green-900/20 text-green-700 dark:text-green-300"
+                  : configMessageType === "error"
+                  ? "bg-red-50 dark:bg-red-900/20 text-red-700 dark:text-red-300"
+                  : "bg-blue-50 dark:bg-blue-900/20 text-blue-700 dark:text-blue-300"
+              }`}>
+                {configMessage}
+              </div>
+            )}
+
+            <DialogFooter>
+              <Button
+                variant="outline"
+                onClick={() => setConfigOpen(false)}
+                disabled={isSavingConfig}
+                className="dark:border-[#3D3D3D]"
+              >
+                Cancel
+              </Button>
+              <Button
+                onClick={handleSaveConfig}
+                disabled={isSavingConfig || isLoadingConfig}
+                className="gradient text-white"
+              >
+                {isSavingConfig ? (
+                  <>
+                    <Loader2 className="h-4 w-4 mr-2 animate-spin" />
+                    Saving...
+                  </>
+                ) : (
+                  <>
+                    <Settings className="h-4 w-4 mr-2" />
+                    Save Configuration
+                  </>
+                )}
+              </Button>
+            </DialogFooter>
+          </DialogContent>
+        </Dialog>
+
         {/* User Confirmation Dialog */}
         {confirmDialog}
       </div>
diff --git a/graphrag/app/routers/ui.py b/graphrag/app/routers/ui.py
index 9b012ec..4d99c15 100644
--- a/graphrag/app/routers/ui.py
+++ b/graphrag/app/routers/ui.py
@@ -46,7 +46,7 @@
 from pyTigerGraph import TigerGraphConnection
 from tools.validation_utils import MapQuestionToSchemaException
 
-from common.config import db_config, graphrag_config, embedding_service, llm_config, service_status
+from common.config import db_config, graphrag_config, embedding_service, llm_config, service_status, get_current_config, update_config
 from common.db.connections import get_db_connection_pwd_manual
 from common.logs.log import req_id_cv
 from common.logs.logwriter import LogWriter
@@ -1516,3 +1516,72 @@ async def delete_ingestion_temp_files(
         logger.debug_pii(f"Delete error trace:\n{exc}")
         raise HTTPException(status_code=500, detail=f"Error deleting temp files: {str(e)}")
 
+
+# =====================================================
+# Server Configuration Endpoints
+# =====================================================
+
+@router.get(f"{route_prefix}/config")
+def get_server_config_endpoint(
+    creds: Annotated[tuple[list[str], HTTPBasicCredentials], Depends(ui_basic_auth)],
+):
+    """
+    Get the current server configuration (LLM config and GraphRAG config).
+    This returns the in-memory configuration that is actively being used.
+    """
+    try:
+        config = get_current_config()
+        
+        return {
+            "status": "success",
+            "config": config
+        }
+    except Exception as e:
+        logger.error(f"Error reading server config: {e}")
+        raise HTTPException(
+            status_code=500,
+            detail=f"Error reading server configuration: {str(e)}"
+        )
+
+
+@router.put(f"{route_prefix}/config")
+def update_server_config_endpoint(
+    creds: Annotated[tuple[list[str], HTTPBasicCredentials], Depends(ui_basic_auth)],
+    config_update: dict = Body(...),
+):
+    """
+    Update the server configuration (LLM config and/or GraphRAG config).
+    Changes take effect immediately in memory AND are persisted to server_config.json.
+    No container restart required!
+    
+    Parameters:
+    - config_update: JSON body containing llm_config and/or graphrag_config to update
+    """
+    try:
+        new_llm_config = config_update.get("llm_config")
+        new_graphrag_config = config_update.get("graphrag_config")
+        
+        # Update in-memory config and persist to file
+        update_config(
+            new_llm_config=new_llm_config,
+            new_graphrag_config=new_graphrag_config,
+            persist=True
+        )
+        
+        logger.info("Server configuration updated successfully (in-memory and persisted)")
+        
+        # Return the updated config
+        updated_config = get_current_config()
+        
+        return {
+            "status": "success",
+            "message": "Configuration updated successfully. Changes are now active.",
+            "config": updated_config
+        }
+    except Exception as e:
+        logger.error(f"Error updating server config: {e}")
+        raise HTTPException(
+            status_code=500,
+            detail=f"Error updating server configuration: {str(e)}"
+        )
+