From 14d94abdf3119ae94fd87f8158fc694edd5c8882 Mon Sep 17 00:00:00 2001 From: Paula Date: Tue, 5 Aug 2025 12:30:57 +0200 Subject: [PATCH 1/2] add initial content --- .../graphrag/private-llm-triton-tutorial.md | 198 ++++++++++++++++++ 1 file changed, 198 insertions(+) create mode 100644 site/content/3.13/data-science/graphrag/private-llm-triton-tutorial.md diff --git a/site/content/3.13/data-science/graphrag/private-llm-triton-tutorial.md b/site/content/3.13/data-science/graphrag/private-llm-triton-tutorial.md new file mode 100644 index 0000000000..84258e35bb --- /dev/null +++ b/site/content/3.13/data-science/graphrag/private-llm-triton-tutorial.md @@ -0,0 +1,198 @@ +--- +title: How to use GraphRAG with a Private LLM +menuTitle: Private LLM Tutorial +weight: 15 +description: >- + Learn how to create, configure, and run a full GraphRAG workflow with + using a private LLM and Triton Inference Server +--- +{{< tag "ArangoDB Platform" >}} + +{{< tip >}} +The ArangoDB Platform & GenAI Suite is available as a pre-release. To get +exclusive early access, [get in touch](https://arangodb.com/contact/) with +the ArangoDB team. +{{< /tip >}} + +## Prerequisite: Get an LLM to host + +If you already have an LLM, you can skip this step. If you are new to LLMs +(Large Language Models), this section explains how to get and prepare an +open-source LLM. + +This tutorial downloads an open-source model from Hugging Face, but you can +use any other model provider. + +### Install the Hugging Face CLI + +Follow the official [Hugging Face guide](https://huggingface.co/docs/huggingface_hub/en/guides/cli) +to install the CLI. + +You should now be able to run the `hf --help` command. + +### Download a model + +Pick the model you want to use. For demonstration purposes, this tutorial is +using a [Nemotron model](https://huggingface.co/nvidia/OpenReasoning-Nemotron-7B). + +You can download it with the following command: +``` +hf download nvidia/OpenReasoning-Nemotron-7B` +``` + +Refer to the Hugging Face documentation for more details. + +{{< info >}} +ArangoDB explicitly provides no further guarantees or guidance on the chosen LLM. +ArangoDB's goal is to work with any LLM available in the market. +{{< /info >}} + +### Export model as ONNX + +ONNX is an open standard hat defines a common set of operators and a file format +to represent deep learning models in different frameworks. The Optimum library +exports a model to ONNX with configuration objects which are supported for many +architectures and can be easily extended. + +Follow the [Hugging Face guideline](https://huggingface.co/docs/transformers/serialization) +to export the model as ONNX format via Optimum. + +After installing Optimum, run the following command: +``` +optimum-cli export onnx --model nvidia/OpenReasoning-Nemotron-7B MyModel +``` +{{< tip >}} +Replace `MyModel` with a name of your choice for your model. +{{< /tip >}} + +This exports the model into ONNX format, which is currently required. + +## Prepare the necessary files + +You need two files for the model to work: +- Triton configuration file: `config.pb.txt` +- Python backend file: `model.py` + +{{< info >}} +Currently, it is only supported the Python backend of Triton with the rest of GenAI services. +Other operating modes will be added in future versions. +{{< /info >}} + +### Triton configuration file + +To ensure compatibility with the Triton service, you need the following configuration +file `config.pb.txt`, which must be placed next to your Models folder: + +```yaml +name: "MyModel" # Set the name to the you chose previously +backend: "python" +input [ + { + name: "INPUT0" + data_type: TYPE_STRING + dims: [-1] + } +] +output [ + { + name: "OUTPUT0" + data_type: TYPE_STRING + dims: [-1] + } +] +instance_group [ + { + count: 1 + kind: KIND_GPU + } +] +``` + +This configuration defines the display name of the Model, specifies the use of +the Python backend, and sets input and output as string tokens for text generation. +It also configures the model to use 1 GPU on the Triton server. + +### Triton Python backend + +Next, you need to implement Python code for the backend to handle the text +tokenization within the Triton server. + +Therefore, place a file named `model.py` in your model folder with the following content: + +```python +import numpy as np +import json +import triton_python_backend_utils as pb_utils +from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline + +class TritonPythonModel: + def initialize(self, args): + model_path = args['model_repository'] + "/" + args['model_version'] + "/" + self.tokenizer = AutoTokenizer.from_pretrained(model_path) + self.model = AutoModelForCausalLM.from_pretrained(model_path) + self.pipe = pipeline("text-generation", model=self.model, tokenizer=self.tokenizer, batch_size=1) + + def execute(self, requests): + responses = [] + for request in requests: + in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0") + input_text = in_0.as_numpy()[0].decode('utf-8') + + try: + input_data = json.loads(input_text) + except json.JSONDecodeError: + input_data = eval(input_text) + + prompt = self.tokenizer.apply_chat_template(input_data, tokenize=False, add_generation_prompt=True) + output = self.pipe(prompt, max_new_tokens=1024, do_sample=True, temperature=0.7, top_k=50, top_p=0.95) + generated_text = output[0]['generated_text'][len(prompt):].strip() + + out_tensor = pb_utils.Tensor("OUTPUT0", np.array([generated_text], dtype=object)) + responses.append(pb_utils.InferenceResponse(output_tensors=[out_tensor])) + return responses +``` + +The above code is generic and should work for most CausalLM Models. Check Hugging +Face Transformers to see if your model supports `AutoModelForCausalLM`. +If not, you need to adjust this file. You may also need to adjust it if you want +to fine-tune the configuration of your model. This tutorial prioritizes a +plug-and-play workflow over fine-tuning for maximum performance, aiming to work +in most common scenarios. + +### Model directory structure + +After preparing these files, your directory structure should look similar to this: + +``` +. +├── config.pb.txt +└── MyModel + ├── added_tokens.json + ├── chat_template.jinja + ├── config.json + ├── generation_config.json + ├── merges.txt + ├── model.onnx + ├── model.onnx_data + ├── model.py + ├── special_tokens_map.json + ├── tokenizer_config.json + ├── tokenizer.json + └── vocab.json +``` + +Now you are ready to upload the model. + +### Upload the Model to MLflow + +First, you need to install the CLI. + +``` +pip install mlflow==2.22.1 +``` + +{{< warning >}} +MLflow version 3 introduces a breaking change that affects this workflow, so it is +important to use MLflow version 2. +{{< /warning >}} + From 6dc2bac817c0d5391d9318899199798051c7bfad Mon Sep 17 00:00:00 2001 From: Simran Spiller Date: Wed, 19 Nov 2025 14:58:21 +0100 Subject: [PATCH 2/2] Use ```py instead of ```python --- site/content/ai-suite/reference/mlflow.md | 4 +- .../reference/triton-inference-server.md | 2 +- .../arangodb/3.11/develop/drivers/python.md | 30 ++++----- site/content/ecosystem/drivers/python.md | 62 +++++++++---------- 4 files changed, 49 insertions(+), 49 deletions(-) diff --git a/site/content/ai-suite/reference/mlflow.md b/site/content/ai-suite/reference/mlflow.md index 8de8e83414..520cf28189 100644 --- a/site/content/ai-suite/reference/mlflow.md +++ b/site/content/ai-suite/reference/mlflow.md @@ -112,7 +112,7 @@ There are two approaches for programmatic access to your ArangoDB MLflow service ### Configuration in Python -```python +```py import mlflow import os @@ -136,7 +136,7 @@ export MLFLOW_TRACKING_TOKEN="your-bearer-token-here" Then use MLflow normally in your Python code: -```python +```py import mlflow # MLflow automatically uses the environment variables diff --git a/site/content/ai-suite/reference/triton-inference-server.md b/site/content/ai-suite/reference/triton-inference-server.md index 5320c557e5..7e5857dfa6 100644 --- a/site/content/ai-suite/reference/triton-inference-server.md +++ b/site/content/ai-suite/reference/triton-inference-server.md @@ -95,7 +95,7 @@ Triton service. Each model requires the following two files: 1. **`model.py`** Implements the Python backend model. Triton uses this file to load and execute your model for inference. - ```python + ```py class TritonPythonModel: def initialize(self, args): # Load your model here diff --git a/site/content/arangodb/3.11/develop/drivers/python.md b/site/content/arangodb/3.11/develop/drivers/python.md index bea8233d48..b8e39c9dde 100644 --- a/site/content/arangodb/3.11/develop/drivers/python.md +++ b/site/content/arangodb/3.11/develop/drivers/python.md @@ -27,7 +27,7 @@ pip install python-arango --upgrade You can then import the library in your project as follows: -```python +```py from arango import ArangoClient ``` @@ -37,7 +37,7 @@ The following example shows how to use the driver from connecting to ArangoDB, over creating databases, collections, indexes, and documents, to retrieving data using queries: -```python +```py from arango import ArangoClient # Initialize the client for ArangoDB. @@ -71,7 +71,7 @@ student_names = [document["name"] for document in cursor] The following example shows how to create a [named graph](../../graphs/_index.md), populate it with vertices and edges, and query it with a graph traversal: -```python +```py from arango import ArangoClient # Initialize the client for ArangoDB. @@ -134,7 +134,7 @@ To connect to a database, create an instance of `ArangoClient` which provides a connection to the database server. Then call its `db` method and pass the database name, user name, and password as parameters. -```python +```py from arango import ArangoClient # Initialize a client @@ -149,7 +149,7 @@ sys_db = client.db("_system", username="root", password="qwerty") To retrieve a list of all databases on an ArangoDB server, connect to the `_system` database and call the `databases()` method. -```python +```py # Retrieve the names of all databases on the server as list of strings db_list = sys_db.databases() ``` @@ -159,7 +159,7 @@ db_list = sys_db.databases() To create a new database, connect to the `_system` database and call `create_database()`. -```python +```py # Create a new database named "test". sys_db.create_database("test") @@ -174,7 +174,7 @@ To delete an existing database, connect to the `_system` database and call parameter. The `_system` database cannot be deleted. Make sure to specify the correct database name when you are deleting databases. -```python +```py # Delete the 'test' database sys_db.delete_database("test") ``` @@ -186,7 +186,7 @@ sys_db.delete_database("test") To retrieve a list of collections in a database, connect to the database and call `collections()`. -```python +```py # Connect to the database db = client.db(db_name, username=user_name, password=pass_word) @@ -198,7 +198,7 @@ collection_list = db.collections() To create a new collection, connect to the database and call `create_collection()`. -```python +```py # Create a new collection for doctors doctors_col = db.create_collection(name="doctors") @@ -212,7 +212,7 @@ To delete a collection, connect to the database and call `delete_collection()`, passing the name of the collection to be deleted as a parameter. Make sure to specify the correct collection name when you delete collections. -```python +```py # Delete the 'doctors' collection db.delete_collection(name="doctors") ``` @@ -225,7 +225,7 @@ To create a new document, get a reference to the collection and call its `insert()` method, passing the object/document to be created in ArangoDB as a parameter. -```python +```py # Get a reference to the 'patients' collection patients_col = db.collection(name="patients") @@ -252,7 +252,7 @@ To patch or partially update a document, call the `update()` method of the collection and pass the object/document as a parameter. The document must have a property named `_key` holding the unique key assigned to the document. -```python +```py # Patch John's patient record by adding a city property to the document patients_col.update({ "_key": "741603", "city": "Cleveland" }) ``` @@ -280,7 +280,7 @@ collection and pass the object/document that fully replaces thee existing document as a parameter. The document must have a property named `_key` holding the unique key assigned to the document. -```python +```py # Replace John's document patients_col.replace({ "_key": "741603", "fullname": "John Doe", "age": 18, "city": "Cleveland" }) ``` @@ -306,7 +306,7 @@ not specified in the request when the document was fully replaced. To delete a document, call the `delete()` method of the collection and pass an document containing at least the `_key` attribute as a parameter. -```python +```py # Delete John's document patients_col.delete({ "_key": "741603" }) ``` @@ -319,7 +319,7 @@ To run a query, connect to the desired database and call `aql.execute()`. This returns a cursor, which lets you fetch the results in batches. You can iterate over the cursor to automatically fetch the data. -```python +```py # Run a query cursor = db.aql.execute('FOR i IN 1..@value RETURN i', bind_vars={'value': 3}) diff --git a/site/content/ecosystem/drivers/python.md b/site/content/ecosystem/drivers/python.md index f78ec18a7b..7d9eef0759 100644 --- a/site/content/ecosystem/drivers/python.md +++ b/site/content/ecosystem/drivers/python.md @@ -43,7 +43,7 @@ pip install python-arango --upgrade You can then import the library in your project as follows: -```python +```py from arango import ArangoClient ``` {{< /tab >}} @@ -60,7 +60,7 @@ pip install python-arango-async --upgrade You can then import the library in your project as follows: -```python +```py from arangoasync import ArangoClient ``` {{< /tab >}} @@ -78,7 +78,7 @@ data using queries: {{< tabs "python-driver" >}} {{< tab "python-arango" >}} -```python +```py from arango import ArangoClient # Initialize the client for ArangoDB. @@ -111,7 +111,7 @@ student_names = [document["name"] for document in cursor] {{< /tab >}} {{< tab "python-arango-async" >}} -```python +```py from arangoasync import ArangoClient from arangoasync.auth import Auth @@ -150,7 +150,7 @@ async with ArangoClient(hosts="http://localhost:8529") as client: You may also use the client without a context manager, but you must ensure to close the client when done. -```python +```py from arangoasync import ArangoClient from arangoasync.auth import Auth @@ -180,7 +180,7 @@ populate it with nodes and edges, and query it with a graph traversal: {{< tabs "python-driver" >}} {{< tab "python-arango" >}} -```python +```py from arango import ArangoClient # Initialize the client for ArangoDB. @@ -237,7 +237,7 @@ cursor = db.aql.execute(query) {{< /tab >}} {{< tab "python-arango-async" >}} -```python +```py from arangoasync import ArangoClient from arangoasync.auth import Auth @@ -309,7 +309,7 @@ database name, user name, and password as parameters. {{< tabs "python-driver" >}} {{< tab "python-arango" >}} -```python +```py from arango import ArangoClient # Initialize a client @@ -321,7 +321,7 @@ sys_db = client.db("_system", username="root", password="passwd") {{< /tab >}} {{< tab "python-arango-async" >}} -```python +```py from arangoasync import ArangoClient from arangoasync.auth import Auth @@ -344,14 +344,14 @@ To retrieve a list of all databases on an ArangoDB server, connect to the {{< tabs "python-driver" >}} {{< tab "python-arango" >}} -```python +```py # Retrieve the names of all databases on the server as list of strings db_list = sys_db.databases() ``` {{< /tab >}} {{< tab "python-arango-async" >}} -```python +```py # Retrieve the names of all databases on the server as list of strings db_list = await sys_db.databases() ``` @@ -368,7 +368,7 @@ To create a new database, connect to the `_system` database and call {{< tabs "python-driver" >}} {{< tab "python-arango" >}} -```python +```py # Create a new database named "test". ok = sys_db.create_database("test") @@ -378,7 +378,7 @@ test_db = client.db("test", username="root", password="passwd") {{< /tab >}} {{< tab "python-arango-async" >}} -```python +```py # Create a new database named "test". ok = await sys_db.create_database("test") @@ -399,14 +399,14 @@ the correct database name when you are deleting databases. {{< tabs "python-driver" >}} {{< tab "python-arango" >}} -```python +```py # Delete the 'test' database sys_db.delete_database("test") ``` {{< /tab >}} {{< tab "python-arango-async" >}} -```python +```py # Delete the 'test' database ok = await sys_db.delete_database("test") ``` @@ -424,7 +424,7 @@ call `collections()`. {{< tabs "python-driver" >}} {{< tab "python-arango" >}} -```python +```py # Connect to the database db = client.db(db_name, username=user_name, password=pass_word) @@ -434,7 +434,7 @@ collection_list = db.collections() {{< /tab >}} {{< tab "python-arango-async" >}} -```python +```py # Connect to the database db = await client.db(db_name, auth=Auth(username=user_name, password=pass_word)) @@ -452,7 +452,7 @@ To create a new collection, connect to the database and call `create_collection( {{< tabs "python-driver" >}} {{< tab "python-arango" >}} -```python +```py # Create a new collection for doctors doctors_col = db.create_collection(name="doctors") @@ -462,7 +462,7 @@ patients_col = db.create_collection(name="patients") {{< /tab >}} {{< tab "python-arango-async" >}} -```python +```py # Create a new collection for doctors doctors_col = await db.create_collection(name="doctors") @@ -482,14 +482,14 @@ specify the correct collection name when you delete collections. {{< tabs "python-driver" >}} {{< tab "python-arango" >}} -```python +```py # Delete the 'doctors' collection db.delete_collection(name="doctors") ``` {{< /tab >}} {{< tab "python-arango-async" >}} -```python +```py # Delete the 'doctors' collection ok = await db.delete_collection(name="doctors") ``` @@ -508,7 +508,7 @@ a parameter. {{< tabs "python-driver" >}} {{< tab "python-arango" >}} -```python +```py # Get a reference to the 'patients' collection patients_col = db.collection(name="patients") @@ -519,7 +519,7 @@ meta2 = patients_col.insert({"name": "John", "age": 18}) {{< /tab >}} {{< tab "python-arango-async" >}} -```python +```py # Get a reference to the 'patients' collection patients_col = db.collection(name="patients") @@ -552,14 +552,14 @@ a property named `_key` holding the unique key assigned to the document. {{< tabs "python-driver" >}} {{< tab "python-arango" >}} -```python +```py # Patch John's patient record by adding a city property to the document meta = patients_col.update({ "_key": "741603", "city": "Cleveland" }) ``` {{< /tab >}} {{< tab "python-arango-async" >}} -```python +```py # Patch John's patient record by adding a city property to the document meta = await patients_col.update({ "_key": "741603", "city": "Cleveland" }) ``` @@ -593,14 +593,14 @@ the unique key assigned to the document. {{< tabs "python-driver" >}} {{< tab "python-arango" >}} -```python +```py # Replace John's document meta = patients_col.replace({ "_key": "741603", "fullname": "John Doe", "age": 18, "city": "Cleveland" }) ``` {{< /tab >}} {{< tab "python-arango-async" >}} -```python +```py # Replace John's document meta = await patients_col.replace({ "_key": "741603", "fullname": "John Doe", "age": 18, "city": "Cleveland" }) ``` @@ -632,14 +632,14 @@ document containing at least the `_key` attribute as a parameter. {{< tabs "python-driver" >}} {{< tab "python-arango" >}} -```python +```py # Delete John's document patients_col.delete({ "_key": "741603" }) ``` {{< /tab >}} {{< tab "python-arango-async" >}} -```python +```py # Delete John's document meta = await patients_col.delete({ "_key": "741603" }) ``` @@ -658,7 +658,7 @@ iterate over the cursor to automatically fetch the data. {{< tabs "python-driver" >}} {{< tab "python-arango" >}} -```python +```py # Run a query cursor = db.aql.execute('FOR i IN 1..@value RETURN i', bind_vars={'value': 3}) @@ -669,7 +669,7 @@ for doc in cursor: {{< /tab >}} {{< tab "python-arango-async" >}} -```python +```py # Run a query cursor = await db.aql.execute('FOR i IN 1..@value RETURN i', bind_vars={'value': 3})