From 14d94abdf3119ae94fd87f8158fc694edd5c8882 Mon Sep 17 00:00:00 2001
From: Paula <paula.mihu@arangodb.com>
Date: Tue, 5 Aug 2025 12:30:57 +0200
Subject: [PATCH 1/2] add initial content

---
 .../graphrag/private-llm-triton-tutorial.md   | 198 ++++++++++++++++++
 1 file changed, 198 insertions(+)
 create mode 100644 site/content/3.13/data-science/graphrag/private-llm-triton-tutorial.md

diff --git a/site/content/3.13/data-science/graphrag/private-llm-triton-tutorial.md b/site/content/3.13/data-science/graphrag/private-llm-triton-tutorial.md
new file mode 100644
index 0000000000..84258e35bb
--- /dev/null
+++ b/site/content/3.13/data-science/graphrag/private-llm-triton-tutorial.md
@@ -0,0 +1,198 @@
+---
+title: How to use GraphRAG with a Private LLM
+menuTitle: Private LLM Tutorial
+weight: 15
+description: >-
+ Learn how to create, configure, and run a full GraphRAG workflow with
+ using a private LLM and Triton Inference Server
+---
+{{< tag "ArangoDB Platform" >}}
+
+{{< tip >}}
+The ArangoDB Platform & GenAI Suite is available as a pre-release. To get
+exclusive early access, [get in touch](https://arangodb.com/contact/) with
+the ArangoDB team.
+{{< /tip >}}
+
+## Prerequisite: Get an LLM to host
+
+If you already have an LLM, you can skip this step. If you are new to LLMs
+(Large Language Models), this section explains how to get and prepare an
+open-source LLM.
+
+This tutorial downloads an open-source model from Hugging Face, but you can
+use any other model provider.
+
+### Install the Hugging Face CLI
+
+Follow the official [Hugging Face guide](https://huggingface.co/docs/huggingface_hub/en/guides/cli)
+to install the CLI.
+
+You should now be able to run the `hf --help` command.
+
+### Download a model
+
+Pick the model you want to use. For demonstration purposes, this tutorial is
+using a [Nemotron model](https://huggingface.co/nvidia/OpenReasoning-Nemotron-7B).
+
+You can download it with the following command:
+```
+hf download nvidia/OpenReasoning-Nemotron-7B`
+```
+
+Refer to the Hugging Face documentation for more details.
+
+{{< info >}}
+ArangoDB explicitly provides no further guarantees or guidance on the chosen LLM.
+ArangoDB's goal is to work with any LLM available in the market.
+{{< /info >}}
+
+### Export model as ONNX
+
+ONNX is an open standard hat defines a common set of operators and a file format
+to represent deep learning models in different frameworks. The Optimum library
+exports a model to ONNX with configuration objects which are supported for many
+architectures and can be easily extended.
+
+Follow the [Hugging Face guideline](https://huggingface.co/docs/transformers/serialization)
+to export the model as ONNX format via Optimum.
+
+After installing Optimum, run the following command:
+```
+optimum-cli export onnx --model nvidia/OpenReasoning-Nemotron-7B MyModel
+```
+{{< tip >}}
+Replace `MyModel` with a name of your choice for your model.
+{{< /tip >}}
+
+This exports the model into ONNX format, which is currently required.
+
+## Prepare the necessary files
+
+You need two files for the model to work:
+- Triton configuration file: `config.pb.txt`
+- Python backend file: `model.py`
+
+{{< info >}}
+Currently, it is only supported the Python backend of Triton with the rest of GenAI services.
+Other operating modes will be added in future versions.
+{{< /info >}}
+
+### Triton configuration file
+
+To ensure compatibility with the Triton service, you need the following configuration
+file `config.pb.txt`, which must be placed next to your Models folder:
+
+```yaml
+name: "MyModel" # Set the name to the you chose previously
+backend: "python"
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_STRING
+    dims: [-1]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_STRING
+    dims: [-1]
+  }
+]
+instance_group [
+  {
+    count: 1
+    kind: KIND_GPU
+  }
+]
+```
+
+This configuration defines the display name of the Model, specifies the use of
+the Python backend, and sets input and output as string tokens for text generation.
+It also configures the model to use 1 GPU on the Triton server.
+
+### Triton Python backend
+
+Next, you need to implement Python code for the backend to handle the text
+tokenization within the Triton server.
+
+Therefore, place a file named `model.py` in your model folder with the following content:
+
+```python
+import numpy as np
+import json
+import triton_python_backend_utils as pb_utils
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+
+class TritonPythonModel:
+    def initialize(self, args):
+        model_path = args['model_repository'] + "/" + args['model_version'] + "/"
+        self.tokenizer = AutoTokenizer.from_pretrained(model_path)
+        self.model = AutoModelForCausalLM.from_pretrained(model_path)
+        self.pipe = pipeline("text-generation", model=self.model, tokenizer=self.tokenizer, batch_size=1)
+
+    def execute(self, requests):
+        responses = []
+        for request in requests:
+            in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
+            input_text = in_0.as_numpy()[0].decode('utf-8')
+
+            try:
+                input_data = json.loads(input_text)
+            except json.JSONDecodeError:
+                input_data = eval(input_text)
+
+            prompt = self.tokenizer.apply_chat_template(input_data, tokenize=False, add_generation_prompt=True)
+            output = self.pipe(prompt, max_new_tokens=1024, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
+            generated_text = output[0]['generated_text'][len(prompt):].strip()
+
+            out_tensor = pb_utils.Tensor("OUTPUT0", np.array([generated_text], dtype=object))
+            responses.append(pb_utils.InferenceResponse(output_tensors=[out_tensor]))
+        return responses
+```
+
+The above code is generic and should work for most CausalLM Models. Check Hugging
+Face Transformers to see if your model supports `AutoModelForCausalLM`.
+If not, you need to adjust this file. You may also need to adjust it if you want
+to fine-tune the configuration of your model. This tutorial prioritizes a
+plug-and-play workflow over fine-tuning for maximum performance, aiming to work
+in most common scenarios.
+
+### Model directory structure
+
+After preparing these files, your directory structure should look similar to this:
+
+```
+.
+├── config.pb.txt
+└── MyModel
+    ├── added_tokens.json
+    ├── chat_template.jinja
+    ├── config.json
+    ├── generation_config.json
+    ├── merges.txt
+    ├── model.onnx
+    ├── model.onnx_data
+    ├── model.py
+    ├── special_tokens_map.json
+    ├── tokenizer_config.json
+    ├── tokenizer.json
+    └── vocab.json
+```
+
+Now you are ready to upload the model.
+
+### Upload the Model to MLflow
+
+First, you need to install the CLI.
+
+```
+pip install mlflow==2.22.1
+```
+
+{{< warning >}}
+MLflow version 3 introduces a breaking change that affects this workflow, so it is
+important to use MLflow version 2.
+{{< /warning >}}
+

From 6dc2bac817c0d5391d9318899199798051c7bfad Mon Sep 17 00:00:00 2001
From: Simran Spiller <simran@arangodb.com>
Date: Wed, 19 Nov 2025 14:58:21 +0100
Subject: [PATCH 2/2] Use ```py instead of ```python

---
 site/content/ai-suite/reference/mlflow.md     |  4 +-
 .../reference/triton-inference-server.md      |  2 +-
 .../arangodb/3.11/develop/drivers/python.md   | 30 ++++-----
 site/content/ecosystem/drivers/python.md      | 62 +++++++++----------
 4 files changed, 49 insertions(+), 49 deletions(-)

diff --git a/site/content/ai-suite/reference/mlflow.md b/site/content/ai-suite/reference/mlflow.md
index 8de8e83414..520cf28189 100644
--- a/site/content/ai-suite/reference/mlflow.md
+++ b/site/content/ai-suite/reference/mlflow.md
@@ -112,7 +112,7 @@ There are two approaches for programmatic access to your ArangoDB MLflow service
 
 ### Configuration in Python
 
-```python
+```py
 import mlflow
 import os
 
@@ -136,7 +136,7 @@ export MLFLOW_TRACKING_TOKEN="your-bearer-token-here"
 
 Then use MLflow normally in your Python code:
 
-```python
+```py
 import mlflow
 
 # MLflow automatically uses the environment variables
diff --git a/site/content/ai-suite/reference/triton-inference-server.md b/site/content/ai-suite/reference/triton-inference-server.md
index 5320c557e5..7e5857dfa6 100644
--- a/site/content/ai-suite/reference/triton-inference-server.md
+++ b/site/content/ai-suite/reference/triton-inference-server.md
@@ -95,7 +95,7 @@ Triton service. Each model requires the following two files:
 1. **`model.py`**
    Implements the Python backend model. Triton uses this file to load and 
    execute your model for inference.
-   ```python
+   ```py
    class TritonPythonModel:
        def initialize(self, args):
            # Load your model here
diff --git a/site/content/arangodb/3.11/develop/drivers/python.md b/site/content/arangodb/3.11/develop/drivers/python.md
index bea8233d48..b8e39c9dde 100644
--- a/site/content/arangodb/3.11/develop/drivers/python.md
+++ b/site/content/arangodb/3.11/develop/drivers/python.md
@@ -27,7 +27,7 @@ pip install python-arango --upgrade
 
 You can then import the library in your project as follows:
 
-```python
+```py
 from arango import ArangoClient
 ```
 
@@ -37,7 +37,7 @@ The following example shows how to use the driver from connecting to ArangoDB,
 over creating databases, collections, indexes, and documents, to retrieving
 data using queries:
 
-```python
+```py
 from arango import ArangoClient
 
 # Initialize the client for ArangoDB.
@@ -71,7 +71,7 @@ student_names = [document["name"] for document in cursor]
 The following example shows how to create a [named graph](../../graphs/_index.md),
 populate it with vertices and edges, and query it with a graph traversal:
 
-```python
+```py
 from arango import ArangoClient
 
 # Initialize the client for ArangoDB.
@@ -134,7 +134,7 @@ To connect to a database, create an instance of `ArangoClient` which provides a
 connection to the database server. Then call its `db` method and pass the
 database name, user name, and password as parameters.
 
-```python
+```py
 from arango import ArangoClient
 
 # Initialize a client
@@ -149,7 +149,7 @@ sys_db = client.db("_system", username="root", password="qwerty")
 To retrieve a list of all databases on an ArangoDB server, connect to the
 `_system` database and call the `databases()` method.
 
-```python
+```py
 # Retrieve the names of all databases on the server as list of strings
 db_list = sys_db.databases()
 ```
@@ -159,7 +159,7 @@ db_list = sys_db.databases()
 To create a new database, connect to the `_system` database and call
 `create_database()`.
 
-```python
+```py
 # Create a new database named "test".
 sys_db.create_database("test")
 
@@ -174,7 +174,7 @@ To delete an existing database, connect to the `_system` database and call
 parameter. The `_system` database cannot be deleted. Make sure to specify
 the correct database name when you are deleting databases.
 
-```python
+```py
 # Delete the 'test' database
 sys_db.delete_database("test")
 ```
@@ -186,7 +186,7 @@ sys_db.delete_database("test")
 To retrieve a list of collections in a database, connect to the database and
 call `collections()`.
 
-```python
+```py
 # Connect to the database
 db = client.db(db_name, username=user_name, password=pass_word)
 
@@ -198,7 +198,7 @@ collection_list = db.collections()
 
 To create a new collection, connect to the database and call `create_collection()`.
 
-```python
+```py
 # Create a new collection for doctors
 doctors_col = db.create_collection(name="doctors")
 
@@ -212,7 +212,7 @@ To delete a collection, connect to the database and call `delete_collection()`,
 passing the name of the collection to be deleted as a parameter. Make sure to
 specify the correct collection name when you delete collections.
 
-```python
+```py
 # Delete the 'doctors' collection
 db.delete_collection(name="doctors")
 ```
@@ -225,7 +225,7 @@ To create a new document, get a reference to the collection and call its
 `insert()` method, passing the object/document to be created in ArangoDB as
 a parameter.
 
-```python
+```py
 # Get a reference to the 'patients' collection
 patients_col = db.collection(name="patients")
 
@@ -252,7 +252,7 @@ To patch or partially update a document, call the `update()` method of the
 collection and pass the object/document as a parameter. The document must have
 a property named `_key` holding the unique key assigned to the document.
 
-```python
+```py
 # Patch John's patient record by adding a city property to the document
 patients_col.update({ "_key": "741603", "city": "Cleveland" })
 ```
@@ -280,7 +280,7 @@ collection and pass the object/document that fully replaces thee existing
 document as a parameter. The document must have a property named `_key` holding
 the unique key assigned to the document.
 
-```python
+```py
 # Replace John's document
 patients_col.replace({ "_key": "741603", "fullname": "John Doe", "age": 18, "city": "Cleveland" })
 ```
@@ -306,7 +306,7 @@ not specified in the request when the document was fully replaced.
 To delete a document, call the `delete()` method of the collection and pass an
 document containing at least the `_key` attribute as a parameter.
 
-```python
+```py
 # Delete John's document
 patients_col.delete({ "_key": "741603" })
 ```
@@ -319,7 +319,7 @@ To run a query, connect to the desired database and call `aql.execute()`.
 This returns a cursor, which lets you fetch the results in batches. You can
 iterate over the cursor to automatically fetch the data.
 
-```python
+```py
 # Run a query
 cursor = db.aql.execute('FOR i IN 1..@value RETURN i', bind_vars={'value': 3})
 
diff --git a/site/content/ecosystem/drivers/python.md b/site/content/ecosystem/drivers/python.md
index f78ec18a7b..7d9eef0759 100644
--- a/site/content/ecosystem/drivers/python.md
+++ b/site/content/ecosystem/drivers/python.md
@@ -43,7 +43,7 @@ pip install python-arango --upgrade
 
 You can then import the library in your project as follows:
 
-```python
+```py
 from arango import ArangoClient
 ```
 {{< /tab >}}
@@ -60,7 +60,7 @@ pip install python-arango-async --upgrade
 
 You can then import the library in your project as follows:
 
-```python
+```py
 from arangoasync import ArangoClient
 ```
 {{< /tab >}}
@@ -78,7 +78,7 @@ data using queries:
 {{< tabs "python-driver" >}}
 
 {{< tab "python-arango" >}}
-```python
+```py
 from arango import ArangoClient
 
 # Initialize the client for ArangoDB.
@@ -111,7 +111,7 @@ student_names = [document["name"] for document in cursor]
 {{< /tab >}}
 
 {{< tab "python-arango-async" >}}
-```python
+```py
 from arangoasync import ArangoClient
 from arangoasync.auth import Auth
 
@@ -150,7 +150,7 @@ async with ArangoClient(hosts="http://localhost:8529") as client:
 You may also use the client without a context manager, but you must ensure to
 close the client when done.
 
-```python
+```py
 from arangoasync import ArangoClient
 from arangoasync.auth import Auth
 
@@ -180,7 +180,7 @@ populate it with nodes and edges, and query it with a graph traversal:
 {{< tabs "python-driver" >}}
 
 {{< tab "python-arango" >}}
-```python
+```py
 from arango import ArangoClient
 
 # Initialize the client for ArangoDB.
@@ -237,7 +237,7 @@ cursor = db.aql.execute(query)
 {{< /tab >}}
 
 {{< tab "python-arango-async" >}}
-```python
+```py
 from arangoasync import ArangoClient
 from arangoasync.auth import Auth
 
@@ -309,7 +309,7 @@ database name, user name, and password as parameters.
 {{< tabs "python-driver" >}}
 
 {{< tab "python-arango" >}}
-```python
+```py
 from arango import ArangoClient
 
 # Initialize a client
@@ -321,7 +321,7 @@ sys_db = client.db("_system", username="root", password="passwd")
 {{< /tab >}}
 
 {{< tab "python-arango-async" >}}
-```python
+```py
 from arangoasync import ArangoClient
 from arangoasync.auth import Auth
 
@@ -344,14 +344,14 @@ To retrieve a list of all databases on an ArangoDB server, connect to the
 {{< tabs "python-driver" >}}
 
 {{< tab "python-arango" >}}
-```python
+```py
 # Retrieve the names of all databases on the server as list of strings
 db_list = sys_db.databases()
 ```
 {{< /tab >}}
 
 {{< tab "python-arango-async" >}}
-```python
+```py
     # Retrieve the names of all databases on the server as list of strings
     db_list = await sys_db.databases()
 ```
@@ -368,7 +368,7 @@ To create a new database, connect to the `_system` database and call
 {{< tabs "python-driver" >}}
 
 {{< tab "python-arango" >}}
-```python
+```py
 # Create a new database named "test".
 ok = sys_db.create_database("test")
 
@@ -378,7 +378,7 @@ test_db = client.db("test", username="root", password="passwd")
 {{< /tab >}}
 
 {{< tab "python-arango-async" >}}
-```python
+```py
     # Create a new database named "test".
     ok = await sys_db.create_database("test")
 
@@ -399,14 +399,14 @@ the correct database name when you are deleting databases.
 {{< tabs "python-driver" >}}
 
 {{< tab "python-arango" >}}
-```python
+```py
 # Delete the 'test' database
 sys_db.delete_database("test")
 ```
 {{< /tab >}}
 
 {{< tab "python-arango-async" >}}
-```python
+```py
     # Delete the 'test' database
     ok = await sys_db.delete_database("test")
 ```
@@ -424,7 +424,7 @@ call `collections()`.
 {{< tabs "python-driver" >}}
 
 {{< tab "python-arango" >}}
-```python
+```py
 # Connect to the database
 db = client.db(db_name, username=user_name, password=pass_word)
 
@@ -434,7 +434,7 @@ collection_list = db.collections()
 {{< /tab >}}
 
 {{< tab "python-arango-async" >}}
-```python
+```py
     # Connect to the database
     db = await client.db(db_name, auth=Auth(username=user_name, password=pass_word))
 
@@ -452,7 +452,7 @@ To create a new collection, connect to the database and call `create_collection(
 {{< tabs "python-driver" >}}
 
 {{< tab "python-arango" >}}
-```python
+```py
 # Create a new collection for doctors
 doctors_col = db.create_collection(name="doctors")
 
@@ -462,7 +462,7 @@ patients_col = db.create_collection(name="patients")
 {{< /tab >}}
 
 {{< tab "python-arango-async" >}}
-```python
+```py
     # Create a new collection for doctors
     doctors_col = await db.create_collection(name="doctors")
 
@@ -482,14 +482,14 @@ specify the correct collection name when you delete collections.
 {{< tabs "python-driver" >}}
 
 {{< tab "python-arango" >}}
-```python
+```py
 # Delete the 'doctors' collection
 db.delete_collection(name="doctors")
 ```
 {{< /tab >}}
 
 {{< tab "python-arango-async" >}}
-```python
+```py
     # Delete the 'doctors' collection
     ok = await db.delete_collection(name="doctors")
 ```
@@ -508,7 +508,7 @@ a parameter.
 {{< tabs "python-driver" >}}
 
 {{< tab "python-arango" >}}
-```python
+```py
 # Get a reference to the 'patients' collection
 patients_col = db.collection(name="patients")
 
@@ -519,7 +519,7 @@ meta2 = patients_col.insert({"name": "John", "age": 18})
 {{< /tab >}}
 
 {{< tab "python-arango-async" >}}
-```python
+```py
     # Get a reference to the 'patients' collection
     patients_col = db.collection(name="patients")
 
@@ -552,14 +552,14 @@ a property named `_key` holding the unique key assigned to the document.
 {{< tabs "python-driver" >}}
 
 {{< tab "python-arango" >}}
-```python
+```py
 # Patch John's patient record by adding a city property to the document
 meta = patients_col.update({ "_key": "741603", "city": "Cleveland" })
 ```
 {{< /tab >}}
 
 {{< tab "python-arango-async" >}}
-```python
+```py
     # Patch John's patient record by adding a city property to the document
     meta = await patients_col.update({ "_key": "741603", "city": "Cleveland" })
 ```
@@ -593,14 +593,14 @@ the unique key assigned to the document.
 {{< tabs "python-driver" >}}
 
 {{< tab "python-arango" >}}
-```python
+```py
 # Replace John's document
 meta = patients_col.replace({ "_key": "741603", "fullname": "John Doe", "age": 18, "city": "Cleveland" })
 ```
 {{< /tab >}}
 
 {{< tab "python-arango-async" >}}
-```python
+```py
     # Replace John's document
     meta = await patients_col.replace({ "_key": "741603", "fullname": "John Doe", "age": 18, "city": "Cleveland" })
 ```
@@ -632,14 +632,14 @@ document containing at least the `_key` attribute as a parameter.
 {{< tabs "python-driver" >}}
 
 {{< tab "python-arango" >}}
-```python
+```py
 # Delete John's document
 patients_col.delete({ "_key": "741603" })
 ```
 {{< /tab >}}
 
 {{< tab "python-arango-async" >}}
-```python
+```py
     # Delete John's document
     meta = await patients_col.delete({ "_key": "741603" })
 ```
@@ -658,7 +658,7 @@ iterate over the cursor to automatically fetch the data.
 {{< tabs "python-driver" >}}
 
 {{< tab "python-arango" >}}
-```python
+```py
 # Run a query
 cursor = db.aql.execute('FOR i IN 1..@value RETURN i', bind_vars={'value': 3})
 
@@ -669,7 +669,7 @@ for doc in cursor:
 {{< /tab >}}
 
 {{< tab "python-arango-async" >}}
-```python
+```py
     # Run a query
     cursor = await db.aql.execute('FOR i IN 1..@value RETURN i', bind_vars={'value': 3})