diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 186654a..b41f9f6 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -14,25 +14,14 @@ jobs: name: Tests on PHP ${{ matrix.php }} - ${{ matrix.dependency-version }} - services: - chroma-wo-auth: - image: chromadb/chroma:1.0.8 - ports: - - 8000:8000 - - chroma-w-auth: - image: chromadb/chroma:1.0.8 - ports: - - 8001:8000 - env: - CHROMA_SERVER_AUTHN_CREDENTIALS: 'test-token' - CHROMA_SERVER_AUTHN_PROVIDER: 'chromadb.auth.token_authn.TokenAuthenticationServerProvider' - CHROMA_AUTH_TOKEN_TRANSPORT_HEADER: 'Authorization' - steps: - name: Checkout uses: actions/checkout@v3 + - name: Install Chroma CLI + run: | + curl -sSL https://raw.githubusercontent.com/chroma-core/chroma/main/rust/cli/install/install.sh | bash + - name: Cache dependencies uses: actions/cache@v3 with: diff --git a/.gitignore b/.gitignore index b6b96f8..63549af 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,6 @@ *.swp *.swo playground/* -.idea \ No newline at end of file +.idea +.chroma +.vscode \ No newline at end of file diff --git a/README.md b/README.md index 836e255..7fcbf84 100644 --- a/README.md +++ b/README.md @@ -1,592 +1,493 @@ -## ChromaDB PHP +# ChromaDB PHP -**A PHP library for interacting with [Chroma](https://github.com/chroma-core/chroma) vector database seamlessly.** +**A customized, framework-agnostic PHP library for interacting with [Chroma](https://github.com/chroma-core/chroma) vector database seamlessly.** [![Total Downloads](https://img.shields.io/packagist/dt/codewithkyrian/chromadb-php.svg)](https://packagist.org/packages/codewithkyrian/chromadb-php) [![Latest Version on Packagist](https://img.shields.io/packagist/v/codewithkyrian/chromadb-php.svg)](https://packagist.org/packages/codewithkyrian/chromadb-php) [![MIT Licensed](https://img.shields.io/badge/license-mit-blue.svg)](https://github.com/CodeWithKyrian/chromadb-php/blob/main/LICENSE) [![GitHub Tests Action Status](https://github.com/CodeWithKyrian/chromadb-php/actions/workflows/test.yml/badge.svg)](https://github.com/CodeWithKyrian/chromadb-php/actions/workflows/test.yml) -> **Note:** This package is framework-agnostic, and can be used in any PHP project. If you're using Laravel however, you -> might want to check out the Laravel-specific package [here](https://github.com/CodeWithKyrian/chromadb-laravel) which -> provides a more Laravel-like experience, and includes a few extra features. +> **Note:** This package is framework-agnostic. If you use **Laravel**, check out [chromadb-laravel](https://github.com/CodeWithKyrian/chromadb-laravel) for a tailored experience. -## Description +## Introduction -[Chroma](https://www.trychroma.com/) is an open-source vector database that allows you to store, search, and analyze high-dimensional data at scale. -It is designed to be fast, scalable, and reliable. It makes it easy to build LLM (Large Language Model) applications and -services that require high-dimensional vector search. - -ChromaDB PHP provides a simple and intuitive interface for interacting with Chroma from PHP. It enables you to: - -- Create, read, update, and delete documents. -- Execute queries and aggregations. -- Manage collections and indexes. -- Handle authentication and authorization. -- Utilize other ChromaDB features seamlessly. -- And more... - -## Small Example - -```php -use Codewithkyrian\ChromaDB\ChromaDB; - -$chromaDB = ChromaDB::client(); - -// Check current ChromaDB version -echo $chromaDB->version(); - -// Create a collection -$collection = $chromaDB->createCollection('test-collection'); - -echo $collection->name; // test-collection -echo $collection->id; // xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxx - -// Insert some documents into the collection -$ids = ['test1', 'test2', 'test3']; -$embeddings = [ - [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0], - [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0], - [10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0], -]; -$metadatas = [ - ['url' => 'https://example.com/test1'], - ['url' => 'https://example.com/test2'], - ['url' => 'https://example.com/test3'], -]; - -$collection->add($ids, $embeddings, $metadatas); - -// Search for similar embeddings -$queryResponse = $collection->query( - queryEmbeddings: [ - [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0] - ], - nResults: 2 -); - -// Print results -echo $queryResponse->ids[0][0]; // test1 -echo $queryResponse->ids[0][1]; // test2 - - -``` +[Chroma](https://www.trychroma.com/) is an open-source vector database designed to be fast, scalable, and reliable. ChromaDB PHP allows you to interact with Chroma servers seamlessly. It provides a fluent, type-safe API for managing collections, documents, and embeddings, making it easy to build LLM-powered applications in PHP. ## Requirements - PHP 8.1 or higher -- ChromaDB 0.4.0 or higher running in client/server mode +- ChromaDB 1.0 or higher -## Running ChromaDB +## Installation -In order to use this library, you need to have ChromaDB running somewhere. You can either run it locally or in the -cloud. -(Chroma doesn't support cloud yet, but it will soon.) +```bash +composer require codewithkyrian/chromadb-php +``` -For now, ChromaDB can only run in-memory in Python. You can however run it in client/server mode by either running the -python -project or using the docker image (recommended). +## Configuration & Setup -To run the docker image, you can use the following command: +### Running ChromaDB +You need a running ChromaDB instance. + +**Docker (Recommended):** ```bash docker run -p 8000:8000 chromadb/chroma ``` -You can also pass in some environment variables using a `.env` file: - +**Chroma CLI:** ```bash -docker run -p 8000:8000 --env-file .env chromadb/chroma +chroma run --path /path/to/data ``` -Or if you prefer using a docker-compose file, you can use the following: +### Connectivity -```yaml -version: '3.9' +Connect to your Chroma server. The default connection is `http://localhost:8000`. -services: - chroma: - image: 'chromadb/chroma' - ports: - - '8000:8000' - volumes: - - chroma-data:/chroma/chroma +```php +use Codewithkyrian\ChromaDB\ChromaDB; -volumes: - chroma-data: - driver: local +// Basic Connection +$client = ChromaDB::local()->connect(); + +// Custom Host/Port +$client = ChromaDB::local() + ->withHost('http://your-server-ip') + ->withPort(8000) + ->withTenant('my-tenant') + ->withDatabase('production_db') + ->connect(); + +// Chroma Cloud / Authentication +$client = ChromaDB::cloud('your-api-key') + ->withTenant('tenant-id') + ->connect(); ``` -And then run it using: +## Embedding Functions -```bash -docker-compose up -d -``` +ChromaDB uses embedding functions to convert text into vectors. You can define which function a collection uses upon creation. -(Check out the [Chroma Documentation](https://docs.trychroma.com/deployment) for more information on how to run -ChromaDB.) +Embedding functions are linked to a collection and used when you call `add`, `update`, `upsert` or `query`. If you add documents *without* embeddings, it is used to generate them automatically. If you query using text, it is used to convert your query text into a vector for search. -Either way, you can now access ChromaDB at `http://localhost:8000`. +The library provides lightweight wrappers around popular embedding providers for ease of use: -## Installation +- `OpenAIEmbeddingFunction` +- `JinaEmbeddingFunction` +- `HuggingFaceEmbeddingServerFunction` +- `OllamaEmbeddingFunction` +- `MistralAIEmbeddingFunction` -```bash -composer require codewithkyrian/chromadb-php -``` - -## Usage - -### Connecting to ChromaDB +Example: ```php -use Codewithkyrian\ChromaDB\ChromaDB; +use Codewithkyrian\ChromaDB\Embeddings\OpenAIEmbeddingFunction; -$chroma = ChromaDB::client(); +$ef = new OpenAIEmbeddingFunction('your-openai-api-key'); +$collection = $client->createCollection( + name: 'knowledge-base', + embeddingFunction: $ef +); ``` -By default, ChromaDB will try to connect to `http://localhost:8000` using the default database name `default_database` -and default tenant name `default_tenant`. You can however change these values by constructing the client using the -factory method: +### Custom Functions +You can create your own embedding function by implementing `Codewithkyrian\ChromaDB\Embeddings\EmbeddingFunction`. ```php -use Codewithkyrian\ChromaDB\ChromaDB; +use Codewithkyrian\ChromaDB\Embeddings\EmbeddingFunction; -$chroma = ChromaDB::factory() - ->withHost('http://localhost') - ->withPort(8000) - ->withDatabase('new_database') - ->withTenant('new_tenant') - ->connect(); +$ef = new class implements EmbeddingFunction { + public function generate(array $texts): array { + // Call your model API here and return float[][] + return [[0.1, 0.2, ...], ...]; + } +}; ``` -If the tenant or database doesn't exist, the package will automatically create them for you. - -### Authentication - -ChromaDB supports static token-based authentication. To use it, you need to start the Chroma server passing the required -environment variables as stated in the documentation. If you're using the docker image, you can pass in the environment -variables using the `--env` flag or by using a `.env` file and for the docker-compose file, you can use the `env_file` -option, or pass in the environment variables directly like so: - -```yaml -version: '3.9' - -services: - chroma: - image: 'chromadb/chroma' - ports: - - '8000:8000' - environment: - - CHROMA_SERVER_AUTHN_CREDENTIALS=test-token - - CHROMA_SERVER_AUTHN_PROVIDER=chromadb.auth.token_authn.TokenAuthenticationServerProvider - - ... -``` - -You can then connect to ChromaDB using the factory method: +## Collections -```php -use Codewithkyrian\ChromaDB\ChromaDB; +Collections are where you store and categorize your embeddings and documents. All operations are performed on a specific collection. -$chroma = ChromaDB::factory() - ->withAuthToken('test-token') - ->connect(); -``` +```php +// Create (throws if exists) +$collection = $client->createCollection('my-collection', $ef); -### Getting the version +// Get (throws if missing) +$collection = $client->getCollection('my-collection'); -```php +// Get or Create = +$collection = $client->getOrCreateCollection('my-collection', $ef); -echo $chroma->version(); // 0.4.0 +// Fork (creates a copy of an existing collection) +// Note: Forking is only supported for Chroma Cloud, not local Chroma instances +$forkedCollection = $client->forkCollection('my-collection', 'my-collection-fork', $ef); +// Delete +$client->deleteCollection('my-collection'); ``` -### Creating a Collection +## Adding Data -Creating a collection is as simple as calling the `createCollection` method on the client and passing in the name of -the collection. +You can add items to a collection using the structured `Record` class or raw arrays. Both methods represent the same data: -```php +- **IDs** (Required): Unique string identifier. +- **Embeddings**: Vector representation (float array). +- **Documents**: Raw text content. +- **Metadatas**: Key-value pairs for filtering. -$collection = $chroma->createCollection('test-collection'); +### Using Arrays +You can pass a parallel arrays of IDs, embeddings, metadatas, etc. This is useful for bulk operations. +```php +$collection->add( + ids: ['id1', 'id2'], + documents: ['This is a document about PHP.', 'ChromaDB is great for AI.'], + embeddings: [[0.1, 0.2, 0.3], [0.9, 0.8, 0.7]], + metadatas: [ + ['category' => 'development', 'author' => 'Kyrian'], + ['category' => 'ai', 'is_published' => true] + ] +); ``` -If the collection already exists in the database, the package will throw an exception. - -### Inserting Documents +### Using Records (Fluent API) +The `Record` class provides a fluent interface for building items. It mirrors the array structure but in an object-oriented way. ```php -$ids = ['test1', 'test2', 'test3']; -$embeddings = [ - [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0], - [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0], - [10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0], -]; -$metadatas = [ - ['url' => 'https://example.com/test1'], - ['url' => 'https://example.com/test2'], - ['url' => 'https://example.com/test3'], -]; - -$collection->add($ids, $embeddings, $metadatas); +use Codewithkyrian\ChromaDB\Types\Record; + +$collection->add([ + // Fluent Factory style + Record::make('id4') + ->withDocument('This is a document about PHP.') + ->withEmbedding([0.1, 0.2, 0.3]) + ->withMetadata(['category' => 'development', 'author' => 'Kyrian']), + + // Constructor style + new Record( + id: 'id7', + document: 'ChromaDB is great for AI.', + embedding: [0.9, 0.8, 0.7], + metadata: ['category' => 'ai', 'is_published' => true] + ), +]); ``` -To insert documents into a collection, you need to provide the following: +If you provide `documents` but *omit* `embeddings`, Chroma uses the collection's **Embedding Function** to generate them. This is useful if you have an external embedding function or if you want to manually control the embedding process. When providing just embeddings and not documents, it's assumed you're storing the documents elsewhere and associating the provided embeddings with those documents using the `ids` or any other metadata. -- `ids`: An array of document ids. The ids must be unique and must be strings. -- `embeddings`: An array of document embeddings. The embeddings must be a 1D array of floats with a consistent length. You - can compute the embeddings using any embedding model of your choice (just make sure that's what you use when querying as - well). -- `metadatas`: An array of document metadatas. The metadatas must be an array of key-value pairs. +> If the supplied embeddings are not the same dimension as the embeddings already indexed in the collection, an exception will be raised. -If you don't have the embeddings, you can pass in the documents and provide an embedding function that will be used to -compute the embeddings for you. +## Retrieval (`get` and `peek`) -### Passing in Embedding Function +Retrieve specific items by ID or filtered metadata without generating embeddings. -To use an embedding function, you need to pass it in as an argument when creating the collection: +### Get +Fetch specific items. ```php -use CodeWithKyrian\ChromaDB\EmbeddingFunction\EmbeddingFunctionInterface; +use Codewithkyrian\ChromaDB\Types\Includes; + +// Fetch by ID +$item = $collection->get(ids: ['id1']); -$embeddingFunction = new OpenAIEmbeddingFunction('api-key', 'org-id', 'model-name'); +// Fetch filtered items (Metadata Filter) +$items = $collection->get( + where: ['category' => 'php'], + include: [Includes::Documents, Includes::Metadatas] +); -$collection = $chroma->createCollection('test-collection', embeddingFunction: $embeddingFunction); +// Fetch items as Record objects +$records = $items->asRecords(); ``` -The embedding function must be an instance of `EmbeddingFunctionInterface`. There are a few built-in embedding functions -that you can use: - -- `OpenAIEmbeddingFunction`: This embedding function uses the OpenAI API to compute the embeddings. You can use it like - this: - ```php - use CodeWithKyrian\Chroma\EmbeddingFunction\OpenAIEmbeddingFunction; - - $embeddingFunction = new OpenAIEmbeddingFunction('api-key', 'org-id', 'model-name'); - - $collection = $chromaDB->createCollection('test-collection', embeddingFunction: $embeddingFunction); - ``` - You can get your OpenAI API key and organization id from your [OpenAI dashboard](https://beta.openai.com/), and you - can omit the organization id if your API key doesn't belong to an organization. The model name is optional as well and - defaults to `text-embedding-ada-002` - -- `JinaEmbeddingFunction`: This is a wrapper for the Jina Embedding models. You can use by passing your Jina API key and - the desired model. THis defaults to `jina-embeddings-v2-base-en` - ```php - use Codewithkyrian\ChromaDB\Embeddings\JinaEmbeddingFunction; - - $embeddingFunction = new JinaEmbeddingFunction('api-key'); - - $collection = $chromaDB->createCollection('test-collection', embeddingFunction: $embeddingFunction); - ``` - -- `HuggingFaceEmbeddingServerFunction`: This embedding function is a wrapper around the HuggingFace Text Embedding - Server. Before using it, you need to have - the [HuggingFace Embedding Server](https://github.com/huggingface/text-embeddings-inference) running somewhere locally. Here's how you can use it: - ```php - use CodeWithKyrian\Chroma\EmbeddingFunction\HuggingFaceEmbeddingFunction; - - $embeddingFunction = new HuggingFaceEmbeddingFunction('api-key', 'model-name'); - - $collection = $chromaDB->createCollection('test-collection', embeddingFunction: $embeddingFunction); - ``` - -Besides the built-in embedding functions, you can also create your own embedding function by implementing -the `EmbeddingFunction` interface (including Anonymous Classes): +### Peek +Preview the first `n` items in the collection. ```php -use CodeWithKyrian\ChromaDB\EmbeddingFunction\EmbeddingFunctionInterface; - -$embeddingFunction = new class implements EmbeddingFunctionInterface { - public function generate(array $texts): array - { - // Compute the embeddings here and return them as an array of arrays - } -}; - -$collection = $chroma->createCollection('test-collection', embeddingFunction: $embeddingFunction); +$preview = $collection->peek(limit: 5); ``` -> The embedding function will be called for each batch of documents that are inserted into the collection, and must be -> provided either when creating the collection or when querying the collection. If you don't provide an embedding -> function, and you don't provide the embeddings, the package will throw an exception. - -### Inserting Documents into a Collection with an Embedding Function +### Specifying Return Data (`include`) +Both `get` and `query` allow you to specify what data to return using the `include` parameter. ```php -$ids = ['test1', 'test2', 'test3']; -$documents = [ - 'This is a test document', - 'This is another test document', - 'This is yet another test document', -]; -$metadatas = [ - ['url' => 'https://example.com/test1'], - ['url' => 'https://example.com/test2'], - ['url' => 'https://example.com/test3'], -]; - -$collection->add( - ids: $ids, - documents: $documents, - metadatas: $metadatas +use Codewithkyrian\ChromaDB\Types\Includes; + +$collection->get( + ids: ['id1'], + include: [ + Includes::Documents, // Return the document text + Includes::Metadatas, // Return the metadata + Includes::Embeddings // Return the vector + ] ); ``` +> **Note:** `Includes::Distances` is only available when **Querying**, not when using `get()`. -### Getting a Collection +## Querying (Vector Search) -```php -$collection = $chromaDB->getCollection('test-collection'); -``` +Querying is about finding items *semantically similar* to your input. Chroma performs a vector search to find the nearest neighbors. ChromaDB-PHP also provides a powerful, fluent query builder for filtering by metadata and document content. + +### Query by Text + +Provide text strings. Chroma embeds them using the collection's Embedding Function and finds the nearest neighbors. -Or with an embedding function: ```php -$collection = $chromaDB->getCollection('test-collection', embeddingFunction: $embeddingFunction); -``` +$results = $collection->query( + queryTexts: ['How do I use PHP with Chroma?'], + nResults: 5 // Return top 5 matches +); -> Make sure that the embedding function you provide is the same one that was used when creating the collection. +// Get results as ScoredRecord objects +// Returns ScoredRecord[][] (one array of results per query text) +$records = $results->asRecords(); +``` -### Counting the items in a collection +### Query by Embeddings +Provide raw vectors. Useful if you compute embeddings externally. ```php -$collection->count() // 2 +$results = $collection->query( + queryEmbeddings: [[0.1, 0.2, ...]], + nResults: 5 +); ``` -### Updating a collection +### Specifying Return Data (`include`) + +By default, queries return IDs, Embeddings, Metadatas, and Distances. You can customize this using the `Includes` enum to optimize performance. ```php -$collection->update( - ids: ['test1', 'test2', 'test3'], - embeddings: [ - [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0], - [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0], - [10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0], - ], - metadatas: [ - ['url' => 'https://example.com/test1'], - ['url' => 'https://example.com/test2'], - ['url' => 'https://example.com/test3'], +use Codewithkyrian\ChromaDB\Types\Includes; + +$collection->query( + queryTexts: ['How do I use PHP with Chroma?'], + nResults: 5, + include: [ + Includes::Documents, // Return the actual text content + Includes::Distances // Return the similarity score ] ); ``` -### Deleting Documents +### Metadata Filtering (`where`) +You can filter search results based on metadata of the items. The library provides a fluent **Builder** for safety, but also supports raw arrays. + +### Supported Comparisons ```php -$collection->delete(['test1', 'test2', 'test3']); +// Equals +Where::field('category')->eq('news'); +['category' => ['$eq' => 'news']]; + +// Not Equals +Where::field('status')->ne('archived'); +['status' => ['$ne' => 'archived']]; + +// Greater Than +Where::field('views')->gt(100); +['views' => ['$gt' => 100]]; + +// Less Than +Where::field('rating')->lt(5); +['rating' => ['$lt' => 5]]; + +// Greater Than or Equal To +Where::field('views')->gte(100); +['views' => ['$gte' => 100]]; + +// Less Than or Equal To +Where::field('rating')->lte(5); +['rating' => ['$lte' => 5]]; + +// List inclusion +Where::field('tag')->in(['php', 'laravel']); +['tag' => ['$in' => ['php', 'laravel']]]; + +// List exclusion +Where::field('tag')->nin(['php', 'laravel']); +['tag' => ['$nin' => ['php', 'laravel']]]; + +// Logical AND +Where::all( + Where::field('category')->eq('code'), + Where::field('language')->eq('php') +) ; +['$and' => [ + ['category' => ['$eq' => 'code']], + ['language' => ['$eq' => 'php']] +]] + +// Logical OR +Where::any( + Where::field('category')->eq('code'), + Where::field('language')->eq('php') +) ; +['$or' => [ + ['category' => ['$eq' => 'code']], + ['language' => ['$eq' => 'php']] +]] ``` -### Querying a Collection +#### Usage ```php -$queryResponse = $collection->query( - queryEmbeddings: [ - [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0] - ], - nResults: 2 +$collection->query( + queryTexts: ['How do I use PHP with Chroma?'], + nResults: 5, + where: Where::field('category')->eq('code') +); + +$collection->query( + queryTexts: ['How do I use PHP with Chroma?'], + nResults: 5, + where: ['category' => ['$eq' => 'code']] ); -echo $queryResponse->ids[0][0]; // test1 -echo $queryResponse->ids[0][1]; // test2 +$collection->query( + queryTexts: ['How do I use PHP with Chroma?'], + nResults: 5, + where: Where::all( + Where::field('category')->eq('code'), + Where::field('language')->eq('php') + ) +); + +$collection->query( + queryTexts: ['How do I use PHP with Chroma?'], + nResults: 5, + where: ['$and' => [ + ['category' => ['$eq' => 'code']], + ['language' => ['$eq' => 'php']] + ]] +); ``` -To query a collection, you need to provide the following: - -- `queryEmbeddings` (optional): An array of query embeddings. The embeddings must be a 1D array of floats. You - can compute the embeddings using any embedding model of your choice (just make sure that's what you use when inserting - as - well). -- `nResults`: The number of results to return. Defaults to 10. -- `queryTexts` (optional): An array of query texts. The texts must be strings. You can omit this if you provide the - embeddings. Here's - an example: - ```php - $queryResponse = $collection->query( - queryTexts: [ - 'This is a test document' - ], - nResults: 2 - ); - - echo $queryResponse->ids[0][0]; // test1 - echo $queryResponse->ids[0][1]; // test2 - ``` -- `where` (optional): The where clause to use to filter items based on their metadata. Here's an example: - ```php - $queryResponse = $collection->query( - queryEmbeddings: [ - [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0] - ], - nResults: 2, - where: [ - 'url' => 'https://example.com/test1' - ] - ); - - echo $queryResponse->ids[0][0]; // test1 - ``` - The where clause must be an array of key-value pairs. The key must be a string, and the value can be a string or - an array of valid filter values. Here are the valid filters (`$eq`, `$ne`, `$in`, `$nin`, `$gt`, `$gte`, `$lt`, - `$lte`): - - `$eq`: Equals - - `$ne`: Not equals - - `$gt`: Greater than - - `$gte`: Greater than or equal to - - `$lt`: Less than - - `$lte`: Less than or equal to - - Here's an example: - ```php - $queryResponse = $collection->query( - queryEmbeddings: [ - [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0] - ], - nResults: 2, - where: [ - 'url' => [ - '$eq' => 'https://example.com/test1' - ] - ] - ); - ``` - You can also use multiple filters: - ```php - $queryResponse = $collection->query( - queryEmbeddings: [ - [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0] - ], - nResults: 2, - where: [ - 'url' => [ - '$eq' => 'https://example.com/test1' - ], - 'title' => [ - '$ne' => 'Test 1' - ] - ] - ); - ``` -- `whereDocument` (optional): The where clause to use to filter items based on their document. Here's an example: - ```php - $queryResponse = $collection->query( - queryEmbeddings: [ - [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0] - ], - nResults: 2, - whereDocument: [ - 'text' => 'This is a test document' - ] - ); - - echo $queryResponse->ids[0][0]; // test1 - ``` - The where clause must be an array of key-value pairs. The key must be a string, and the value can be a string or - an array of valid filter values. In this case, only two filtering keys are supported - `$contains` - and `$not_contains`. - - Here's an example: - ```php - $queryResponse = $collection->query( - queryEmbeddings: [ - [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0] - ], - nResults: 2, - whereDocument: [ - 'text' => [ - '$contains' => 'test document' - ] - ] - ); - ``` -- `include` (optional): An array of fields to include in the response. Possible values - are `embeddings`, `documents`, `metadatas` and `distances`. It defaults to `embeddings` - and `metadatas` (`documents` are not included by default because they can be large). - ```php - $queryResponse = $collection->query( - queryEmbeddings: [ - [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0] - ], - nResults: 2, - include: ['embeddings'] - ); - ``` - `distances` is only valid for querying and not for getting. It returns the distances between the query embeddings - and the embeddings of the results. - -Other relevant information about querying and retrieving a collection can be found in the [ChromaDB Documentation](https://docs.trychroma.com/usage-guide). - -### Deleting items in a collection - -To delete the documents in a collection, pass in an array of the ids of the items: +### Full Text Search (`whereDocument`) -```php -$collection->delete(['test1', 'test2']); +Used to filter based on the text content of the document itself. This supports **substring matching** and **Regex**. You can also use the fluent builder or array syntax. + +#### Supported Comparisons -$collection->count() // 1 +```php +// Substring (Contains) +Where::document()->contains('search term') +['$contains' => 'search term'] + +// Substring (Not Contains) +Where::document()->notContains('spam') +['$not_contains' => 'spam'] + +// Regex Matching +Where::document()->matches('^PHP 8\.[0-9]+') +['$regex' => '^PHP 8\.[0-9]+'] + +Where::document()->notMatches('deprecated') +['$not_regex' => 'deprecated'] + +// Logical OR +Where::any( + Where::document()->contains('php'), + Where::document()->contains('laravel') +) +['$or' => [ + ['document' => ['$contains' => 'php']], + ['document' => ['$contains' => 'laravel']] +]] + +// Logical AND +Where::all( + Where::document()->contains('php'), + Where::document()->contains('laravel') +) +['$and' => [ + ['document' => ['$contains' => 'php']], + ['document' => ['$contains' => 'laravel']] +]] ``` -Passing the ids is optional. You can delete items from a collection using a where filter: +#### Usage ```php -$collection->add( - ['test1', 'test2', 'test3'], - [ - [1.0, 2.0, 3.0, 4.0, 5.0], - [6.0, 7.0, 8.0, 9.0, 10.0], - [11.0, 12.0, 13.0, 14.0, 15.0], - ], - [ - ['some' => 'metadata1'], - ['some' => 'metadata2'], - ['some' => 'metadata3'], - ] +$collection->query( + queryTexts: ['How do I use PHP with Chroma?'], + nResults: 5, + whereDocument: Where::document()->contains('php') ); -$collection->delete( - where: [ - 'some' => 'metadata1' - ] +$collection->query( + queryTexts: ['How do I use PHP with Chroma?'], + nResults: 5, + whereDocument: ['$contains' => 'php'] ); -$collection->count() // 2 -``` - -### Deleting a collection - -Deleting a collection is as simple as passing in the name of the collection to be deleted. +$collection->query( + queryTexts: ['How do I use PHP with Chroma?'], + nResults: 5, + whereDocument: Where::any( + Where::document()->contains('php'), + Where::document()->contains('laravel') + ) +); -```php -$chroma->deleteCollection('test_collection'); +$collection->query( + queryTexts: ['How do I use PHP with Chroma?'], + nResults: 5, + whereDocument: ['$or' => [ + ['$contains' => 'php'], + ['$contains' => 'laravel'] + ]] +); ``` -## Testing +## Updating Data -``` -// Run chroma by running the docker compose file in the repo -docker compose up -d +Use `update` to modify existing items (fails if ID missing) or `upsert` to update-or-create. Just like adding, you can either pass an array of records, or a parallel array of IDs, documents, and metadatas. -composer test +```php +// Update using Records +$collection->update([ + Record::make('id1')->withMetadata(['updated' => true]) +]); + +// Upsert using Arrays +$collection->upsert( + ids: ['id_new'], + documents: ['New document content'], + metadatas: [['created' => 'now']] +); ``` -## Contributors - -- [Kyrian Obikwelu](https://github.com/CodeWithKyrian) -- Other contributors are welcome. +## Deleting Data -## License - -This project is licensed under the MIT License. See -the [LICENSE](https://github.com/codewithkyrian/chromadb-php/blob/main/LICENSE) file for more information. +Delete by IDs or by filter. +```php +// Delete specific items +$collection->delete(['id1', 'id2']); +// Delete all items matching a filter +$collection->delete(where: Where::field('category')->eq('outdated')); +// Delete all items matching a document content filter +$collection->delete(whereDocument: Where::document()->contains('outdated')); +``` +## Examples +- **[`basic-usage`](examples/basic-usage)** - Simple example demonstrating basic operations: connecting, adding documents, and querying +- **[`document-chunking-cloud`](examples/document-chunking-cloud)** - Document chunking, embedding, and storage in Chroma Cloud with semantic search +## Testing +Run the test suite using Pest. +```bash +composer test +``` +## License +MIT License. See [LICENSE](LICENSE) for more information. diff --git a/composer.json b/composer.json index 0d3f885..8d556fe 100644 --- a/composer.json +++ b/composer.json @@ -1,23 +1,42 @@ { "name": "codewithkyrian/chromadb-php", "description": "A PHP client for the Chroma Open Source Embedding Database", - "keywords": ["chromadb", "php", "embedding", "database", "vectors", "semantic", "search", "chroma", "open-source"], + "keywords": [ + "chromadb", + "php", + "embedding", + "database", + "vectors", + "semantic", + "search", + "chroma", + "open-source" + ], "type": "library", "license": "MIT", "require": { "php": "^8.1", - "guzzlehttp/guzzle": "^7.0" + "psr/http-client": "^1.0", + "psr/http-factory": "^1.1", + "php-http/discovery": "^1.20" }, "require-dev": { "pestphp/pest": "^2.19", "symfony/var-dumper": "^6.3", - "mockery/mockery": "^1.6" + "mockery/mockery": "^1.6", + "symfony/process": "^6.4 || ^7.3 || ^8.0", + "guzzlehttp/guzzle": "^7.10" }, "autoload": { "psr-4": { "Codewithkyrian\\ChromaDB\\": "src/" } }, + "autoload-dev": { + "psr-4": { + "Codewithkyrian\\ChromaDB\\Tests\\": "tests/" + } + }, "authors": [ { "name": "Kyrian Obikwelu", @@ -26,11 +45,12 @@ ], "config": { "allow-plugins": { - "pestphp/pest-plugin": true + "pestphp/pest-plugin": true, + "php-http/discovery": true } }, "scripts": { "test": "vendor/bin/pest", "test:coverage": "XDEBUG_MODE=coverage ./vendor/bin/pest --coverage" } -} +} \ No newline at end of file diff --git a/examples/.gitignore b/examples/.gitignore deleted file mode 100644 index 88e99d5..0000000 --- a/examples/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -vendor -composer.lock \ No newline at end of file diff --git a/examples/index.php b/examples/basic-usage/index.php similarity index 64% rename from examples/index.php rename to examples/basic-usage/index.php index 1b3c207..3c09355 100644 --- a/examples/index.php +++ b/examples/basic-usage/index.php @@ -2,7 +2,7 @@ declare(strict_types=1); -require './vendor/autoload.php'; +require __DIR__ . '/../../vendor/autoload.php'; use Codewithkyrian\ChromaDB\ChromaDB; use Codewithkyrian\ChromaDB\Embeddings\JinaEmbeddingFunction; @@ -13,19 +13,22 @@ ->withTenant('test_tenant') ->connect(); -$chroma->deleteAllCollections(); - $embeddingFunction = new OllamaEmbeddingFunction(); -$collection = $chroma->createCollection( +$collection = $chroma->getCollection( name: 'test_collection', embeddingFunction: $embeddingFunction ); +$items = [ + ["id" => 1, "content" => "He seems very happy"], + ["id" => 2, "content" => "He was very sad when we last talked"], + ["id" => 3, "content" => "She made him angry"], +]; $collection->add( - ids: ['1', '2', '3'], - documents: ['He seems very happy', 'He was very sad when we last talked', 'She made him angry'] + ids: array_column($items, 'id'), + documents: array_column($items, 'content') ); $queryResponse = $collection->query( @@ -34,5 +37,3 @@ ); dd($queryResponse->documents[0], $queryResponse->distances[0]); - - diff --git a/examples/composer.json b/examples/composer.json deleted file mode 100644 index 0ba0596..0000000 --- a/examples/composer.json +++ /dev/null @@ -1,27 +0,0 @@ -{ - "name": "kyrian/examples", - "type": "project", - "autoload": { - "psr-4": { - } - }, - "authors": [ - { - "name": "Kyrian Obikwelu", - "email": "koshnawaza@gmail.com" - } - ], - "repositories": [ - { - "type": "path", - "url": "../", - "options": { - "symlink": true - } - } - ], - "require": { - "codewithkyrian/chromadb-php": "@dev", - "symfony/var-dumper": "^6.3" - } -} diff --git a/examples/document-chunking-cloud/README.md b/examples/document-chunking-cloud/README.md new file mode 100644 index 0000000..6de35fa --- /dev/null +++ b/examples/document-chunking-cloud/README.md @@ -0,0 +1,198 @@ +# Document Chunking and Embedding Example + +This example demonstrates how to chunk a document, generate embeddings, and store them in Chroma Cloud for semantic search and retrieval. + +## Overview + +The example performs the following operations: + +1. **Ingestion Mode**: Chunks a document (`document.txt`) into smaller pieces, generates embeddings using Jina AI, and stores them in Chroma Cloud +2. **Query Mode**: Performs semantic search on the stored documents using natural language queries + +## Prerequisites + +- PHP 8.1 or higher +- Chroma Cloud account with API key +- Jina AI API key (for embeddings) +- Composer dependencies installed (`composer install`) + +## Setup + +1. Set your API keys as environment variables: + +```bash +export CHROMA_API_KEY="your-chroma-cloud-api-key" +export JINA_API_KEY="your-jina-api-key" +``` + +Or pass them via CLI arguments (see Usage below). + +## Usage + +### Ingest Mode + +Chunk and store the document to Chroma Cloud: + +```bash +php index.php -mode ingest +``` + +With custom options: + +```bash +php index.php -mode ingest \ + --api-key "your-chroma-api-key" \ + --jina-key "your-jina-api-key" \ + --tenant "my-tenant" \ + --database "my-database" +``` + +### Query Mode + +Search the stored documents: + +```bash +php index.php -mode query --query "What happened at the Dartmouth Workshop?" +``` + +With custom options: + +```bash +php index.php -mode query \ + --query "Who proposed the Turing Test?" \ + --api-key "your-chroma-api-key" \ + --jina-key "your-jina-api-key" \ + --tenant "my-tenant" \ + --database "my-database" +``` + +## CLI Arguments + +| Argument | Description | Default | Required | +|----------|-------------|---------|----------| +| `-mode` | Operation mode: `ingest` or `query` | - | Yes | +| `--query` | Query text for search (query mode only) | "Which event marked the birth of symbolic AI?" | No | +| `--api-key` | Chroma Cloud API key | `CHROMA_API_KEY` env var | Yes | +| `--jina-key` | Jina AI API key for embeddings | `JINA_API_KEY` env var | Yes | +| `--tenant` | Chroma Cloud tenant name | `default_tenant` | No | +| `--database` | Chroma Cloud database name | `default_database` | No | +| `--collection-name` | Collection name to use | `history_of_ai` | No | + +## Example Queries + +Try these example queries to test the semantic search: + +```bash +# Historical events +php index.php -mode query --query "What happened at the Dartmouth Workshop?" + +# People and contributions +php index.php -mode query --query "Who proposed the Turing Test?" + +# Technical breakthroughs +php index.php -mode query --query "What was the significance of AlexNet in 2012?" + +# Concepts and explanations +php index.php -mode query --query "How do Large Language Models and Generative AI work?" + +# Historical figures +php index.php -mode query --query "Who is considered the first computer programmer?" +``` + +## How It Works + +### Document Chunking + +The document is chunked based on: +- **CHAPTER markers**: New chapters create new chunks +- **PAGE markers**: New pages create new chunks +- **Text accumulation**: Text between markers is accumulated into chunks + +Each chunk includes: +- Unique ID +- Document text +- Metadata (chapter and page information) + +### Embedding Generation + +- Uses Jina AI's embedding function to convert text chunks into vector embeddings +- Embeddings are generated in batch for efficiency +- All chunks are embedded before storage + +### Storage + +- Chunks are stored in a Chroma Cloud collection +- The collection is recreated on each ingestion (previous data is deleted) +- Each chunk maintains its metadata for filtering and context + +### Querying + +- Natural language queries are converted to embeddings using the same Jina AI function +- Vector similarity search finds the most relevant chunks +- Results include distance scores, documents, and metadata + +## Output + +### Ingest Mode + +``` +--- Chroma Cloud Example: ingest Mode --- +Tenant: default_tenant, Database: default_database +Connected to Chroma Cloud version: 0.1.0 +Starting Ingestion... +Parsed 9 chunks from document. +Embedding and adding 9 items... +Ingestion Complete! +``` + +### Query Mode + +``` +--- Chroma Cloud Example: query Mode --- +Tenant: default_tenant, Database: default_database +Connected to Chroma Cloud version: 0.1.0 +Querying: "What happened at the Dartmouth Workshop?" + +--- Results --- +[0] (Distance: 0.123) +Location: CHAPTER 1: The Dawn of Thinking Machines, PAGE 3 +Content: The 1956 Dartmouth Workshop is widely considered the founding event of AI as a field. John McCarthy, Marvin Minsky, Nathaniel Rochester, and Claude Shannon brought together... +--------------------------- +``` + +## Customization + +### Using a Different Document + +Replace `document.txt` with your own document. The chunking logic will automatically process it based on CHAPTER and PAGE markers. + +### Using a Different Embedding Function + +Modify `index.php` to use a different embedding function: + +```php +use Codewithkyrian\ChromaDB\Embeddings\OpenAIEmbeddingFunction; + +$ef = new OpenAIEmbeddingFunction($config['openai_key']); +``` + +### Custom Chunking Strategy + +Modify the `chunkDocument()` function to implement your own chunking logic (e.g., by sentence, by paragraph, fixed-size chunks, etc.). + +## Troubleshooting + +**Error: Chroma Cloud API Key is required** +- Set `CHROMA_API_KEY` environment variable or use `--api-key` argument + +**Error: Jina API Key is required** +- Set `JINA_API_KEY` environment variable or use `--jina-key` argument + +**Error: Collection not found** +- Run ingestion mode first to create and populate the collection + +**No results returned** +- Ensure the collection was successfully ingested +- Try different query phrasings +- Check that the query is related to the document content + diff --git a/examples/document-chunking-cloud/document.txt b/examples/document-chunking-cloud/document.txt new file mode 100644 index 0000000..a589576 --- /dev/null +++ b/examples/document-chunking-cloud/document.txt @@ -0,0 +1,25 @@ +THE EVOLUTION OF ARTIFICIAL INTELLIGENCE + +CHAPTER 1: The Dawn of Thinking Machines +PAGE 1 +The quest to create machines that can think is as old as storytelling itself. From the automatons of Greek mythology to the Golems of Jewish folklore, humanity has always dreamed of breathing life into the inanimate. However, it wasn't until the 20th century that the mathematical foundations for Artificial Intelligence were laid. Ada Lovelace, often considered the first computer programmer, speculated that the Analytical Engine might act upon other things besides numbers. +PAGE 2 +In 1950, Alan Turing proposed the famous "Turing Test" as a measure of machine intelligence. He asked, "Can machines think?" and suggested that if a machine could converse with a human without being distinguished from another human, it could be said to "think". This period marked the birth of symbolic AI, where researchers believed that intelligence could be reduced to symbol manipulation. +PAGE 3 +The 1956 Dartmouth Workshop is widely considered the founding event of AI as a field. John McCarthy, Marvin Minsky, Nathaniel Rochester, and Claude Shannon brought together researchers to discuss "thinking machines". Optimism was high; Minsky famously predicted that within a generation, the problem of creating 'artificial intelligence' would be substantially solved. + +CHAPTER 2: Deep Learning and Neural Networks +PAGE 1 +While early AI focused on logic and rules, another approach was brewing: connectionism. Inspired by the human brain, artificial neural networks aimed to learn from data rather than following hard-coded instructions. The Perceptron, developed by Frank Rosenblatt in 1958, was an early model of a single neuron, capable of simple binary classification. +PAGE 2 +However, neural networks faced a "winter" in the 1970s and 80s due to computational limitations and the inability to train deep networks. It wasn't until the mid-2000s, with the advent of powerful GPUs and big data, that "Deep Learning" re-emerged. Researchers like Geoffrey Hinton showed that multi-layered networks could learn complex patterns, leading to breakthroughs in image and speech recognition. +PAGE 3 +The turning point came in 2012 with AlexNet, a deep convolutional neural network that dominated the ImageNet competition. This victory demonstrated the undeniable power of deep learning, sparking an explosion of investment and research. Suddenly, computers could see, hear, and translate languages with near-human accuracy. + +CHAPTER 3: The Generative Era +PAGE 1 +In the 2020s, AI shifted from merely analyzing data to creating it. Generative AI, powered by architectures like the Transformer (introduced by Google in 2017), enabled models to understand and generate human-like text. The concept of "Attention" allowed these models to weigh the importance of different words in a sentence, capturing context like never before. +PAGE 2 +Large Language Models (LLMs) like GPT-3 and GPT-4 demonstrated emergent abilities. They could write code, compose poetry, solve math problems, and even reason through complex tasks. This era also saw the rise of diffusion models in image generation, allowing users to create stunning visual art from simple text prompts. +PAGE 3 +As we stand on the brink of Artificial General Intelligence (AGI), the focus shifts to alignment and safety. Ensuring that these powerful systems act in accordance with human values is the defining challenge of our time. The journey from the Dartmouth Workshop to ChatGPT has been long, but in many ways, it is just beginning. diff --git a/examples/document-chunking-cloud/index.php b/examples/document-chunking-cloud/index.php new file mode 100644 index 0000000..29a8475 --- /dev/null +++ b/examples/document-chunking-cloud/index.php @@ -0,0 +1,176 @@ +connect(); +echo "Connected to Chroma Cloud version: " . $client->version() . "\n"; + +$ef = new JinaEmbeddingFunction($config['jina_key']); + +try { + if ($mode === 'ingest') { + echo "Starting Ingestion...\n"; + + if (!file_exists($docPath)) + die("Document not found: $docPath\n"); + + $records = chunkDocument($docPath); + echo "Parsed " . count($records) . " chunks from document.\n"; + + try { + $client->deleteCollection($config['collection_name']); + } catch (\Exception $e) { + } // Clean start + $collection = $client->createCollection($config['collection_name'], null, $ef); + + echo "Embedding and adding " . count($records) . " items...\n"; + $collection->add($records); + + echo "Ingestion Complete!\n"; + } elseif ($mode === 'query') { + echo "Querying: \"$queryText\"\n"; + + $collection = $client->getCollection($config['collection_name'], $ef); + + $response = $collection->query( + queryTexts: [$queryText], + include: [Includes::Documents, Includes::Metadatas, Includes::Distances], + nResults: 3, + ); + + echo "\n--- Results ---\n"; + $resultRecords = $response->asRecords(); + + foreach ($resultRecords[0] as $index => $record) { + echo "[$index] (Distance: " . ($record->distance ?? 'N/A') . ")\n"; + echo "Location: {$record->metadata['chapter']}, {$record->metadata['page']}\n"; + echo "Content: " . substr($record->document, 0, 150) . "...\n"; + echo "---------------------------\n"; + } + } +} catch (\Exception $e) { + echo "Error: " . $e->getMessage() . "\n"; + exit(1); +} + +/** + * Parses CLI arguments into a configuration array. + * + * @param string[] $args + * @return array{ + * api_key: string, + * tenant: string, + * database: string, + * jina_key: string, + * collection_name: string, + * mode: string|null, + * query: string + * } + */ +function parseConfig(array $args): array +{ + $config = [ + 'api_key' => getenv('CHROMA_API_KEY') ?: '', + 'tenant' => 'default_tenant', + 'database' => 'default_database', + 'jina_key' => getenv('JINA_API_KEY') ?: '', + 'collection_name' => 'history_of_ai', + 'mode' => null, + 'query' => "Which event marked the birth of symbolic AI?", + ]; + + foreach ($args as $i => $arg) { + if ($arg === '-mode') + $config['mode'] = $args[$i + 1] ?? null; + if ($arg === '--query') + $config['query'] = $args[$i + 1] ?? $config['query']; + if ($arg === '--api-key') + $config['api_key'] = $args[$i + 1]; + if ($arg === '--tenant') + $config['tenant'] = $args[$i + 1]; + if ($arg === '--database') + $config['database'] = $args[$i + 1]; + if ($arg === '--jina-key') + $config['jina_key'] = $args[$i + 1]; + } + + if (!$config['api_key']) { + die("Error: Chroma Cloud API Key is required. Set CHROMA_API_KEY env var or pass --api-key.\n"); + } + if (!$config['jina_key']) { + die("Error: Jina API Key is required (for embeddings). Set JINA_API_KEY env var or pass --jina-key.\n"); + } + if (!$config['mode'] || !in_array($config['mode'], ['ingest', 'query'])) { + die("Usage: php chroma_cloud.php -mode [ingest|query] [--query \"text\"] [--api-key key] [--jina-key key] ...\n"); + } + + return $config; +} + +/** + * Reads the document and chunks it into Records. + * + * @param string $path + * @return Record[] + */ +function chunkDocument(string $path): array +{ + $content = file_get_contents($path); + $lines = explode("\n", $content); + $records = []; + $currentChapter = "Intro"; + $currentPage = "1"; + $buffer = ""; + + $createRecord = function ($text, $chapter, $page) { + return Record::make(uniqid("chunk_")) + ->withDocument($text) + ->withMetadata(['chapter' => $chapter, 'page' => $page]); + }; + + foreach ($lines as $line) { + $line = trim($line); + if (empty($line)) + continue; + + if (str_starts_with($line, 'CHAPTER')) { + if (!empty($buffer)) + $records[] = $createRecord($buffer, $currentChapter, $currentPage); + $buffer = ""; + $currentChapter = $line; + } elseif (str_starts_with($line, 'PAGE')) { + if (!empty($buffer)) + $records[] = $createRecord($buffer, $currentChapter, $currentPage); + $buffer = ""; + $currentPage = $line; + } else { + if (!empty($buffer)) + $buffer .= " "; + $buffer .= $line; + } + } + if (!empty($buffer)) + $records[] = $createRecord($buffer, $currentChapter, $currentPage); + + return $records; +} + +// Suggested Queries: +// - "What happened at the Dartmouth Workshop?" +// - "Who proposed the Turing Test?" +// - "What was the significance of AlexNet in 2012?" +// - "How do Large Language Models and Generative AI work?" +// - "Who is considered the first computer programmer?" diff --git a/src/Api.php b/src/Api.php new file mode 100644 index 0000000..976f559 --- /dev/null +++ b/src/Api.php @@ -0,0 +1,504 @@ +sendRequest('GET', '/api/v2/auth/identity'); + + return json_decode($response->getBody()->getContents(), true); + } + + /** + * Retrieves a collection by Chroma Resource Name (CRN). + * + * @param string $crn The Chroma Resource Name of the collection. + * @param string $database The database name. + * @param string $tenant The tenant name. + */ + public function getCollectionByCrn(string $crn, string $database, string $tenant): Collection + { + $response = $this->sendRequest('GET', "/api/v2/collections/{$crn}"); + + return Collection::fromArray(json_decode($response->getBody()->getContents(), true), $this, $database, $tenant); + } + + /** + * Returns the health of the server and executor. + */ + public function healthcheck(): array + { + $response = $this->sendRequest('GET', '/api/v2/healthcheck'); + + return json_decode($response->getBody()->getContents(), true); + } + + /** + * Returns the current time in nanoseconds since epoch. + */ + public function heartbeat(): array + { + $response = $this->sendRequest('GET', '/api/v2/heartbeat'); + + return json_decode($response->getBody()->getContents(), true); + } + + /** + * Returns basic readiness information about the server. + */ + public function preFlightChecks(): mixed + { + $response = $this->sendRequest('GET', '/api/v2/pre-flight-checks'); + + return json_decode($response->getBody()->getContents(), true); + } + + /** + * Resets the database and all collections (only authorized users can reset the database) + */ + public function reset(): bool + { + $response = $this->sendRequest('POST', '/api/v2/reset'); + + return json_decode($response->getBody()->getContents(), true); + } + + /** + * Returns the version of the ChromaDB server. + */ + public function version(): string + { + $response = $this->sendRequest('GET', '/api/v2/version'); + + return json_decode($response->getBody()->getContents(), true); + } + + /** + * Creates a new tenant. + */ + public function createTenant(CreateTenantRequest $request): void + { + $this->sendRequest('POST', '/api/v2/tenants', [ + 'json' => $request->toArray(), + ]); + } + + /** + * Retrieves a tenant by name. + */ + public function getTenant(string $tenant): ?Tenant + { + $response = $this->sendRequest('GET', "/api/v2/tenants/$tenant"); + + $result = json_decode($response->getBody()->getContents(), true); + + return Tenant::fromArray($result); + } + + /** + * Updates a tenant. + */ + public function updateTenant(string $tenant, UpdateTenantRequest $request): void + { + $this->sendRequest('PATCH', "/api/v2/tenants/$tenant", [ + 'json' => $request->toArray(), + ]); + } + + /** + * Creates a new database for a given tenant. + * + * @param string $tenant Tenant ID to associate with the new database + * @param CreateDatabaseRequest $request The request to create the database. + */ + public function createDatabase(string $tenant, CreateDatabaseRequest $request): void + { + $this->sendRequest('POST', "/api/v2/tenants/$tenant/databases", [ + 'json' => $request->toArray() + ]); + } + + /** + * Lists all databases for a tenant. + * + * @param string $tenant The tenant ID to list databases for. + * @param int $limit Optional limit on the number of databases to return. + * @param int $offset Optional offset on the number of databases to return. + * + * @return Database[] + */ + public function listDatabases(string $tenant, ?int $limit = null, ?int $offset = null): array + { + $response = $this->sendRequest('GET', "/api/v2/tenants/$tenant/databases", [ + 'query' => [ + 'limit' => $limit, + 'offset' => $offset, + ], + ]); + + $result = json_decode($response->getBody()->getContents(), true); + + return array_map(fn(array $item) => Database::fromArray($item), $result); + } + + /** + * Retrieves a database by name. + * + * @param string $database The database name to retrieve. + * @param string $tenant The tenant ID to retrieve the database from. + * + * @return Database + */ + public function getDatabase(string $database, string $tenant): Database + { + $response = $this->sendRequest('GET', "/api/v2/tenants/$tenant/databases/$database"); + + $result = json_decode($response->getBody()->getContents(), true); + + return Database::fromArray($result); + } + + /** + * Deletes a database by name. + * + * @param string $database The database name to delete. + * @param string $tenant The tenant ID to delete the database from. + */ + public function deleteDatabase(string $database, string $tenant): void + { + $this->sendRequest('DELETE', "/api/v2/tenants/$tenant/databases/$database"); + } + + /** + * Lists all collections in the specified database. + * + * @param string $database The database name to list collections for. + * @param string $tenant The tenant ID to list collections for. + * @param int $limit Optional limit on the number of collections to return. + * @param int $offset Optional offset on the number of collections to return. + * + * @return Collection[] + */ + public function listCollections(string $database, string $tenant, ?int $limit = null, ?int $offset = null): array + { + $response = $this->sendRequest('GET', "/api/v2/tenants/$tenant/databases/$database/collections", [ + 'query' => [ + 'limit' => $limit, + 'offset' => $offset, + ], + ]); + + $result = json_decode($response->getBody()->getContents(), true); + + return array_map(fn(array $item) => Collection::fromArray($item, $this, $database, $tenant), $result); + } + + /** + * Creates a new collection under the specified database. + * + * @param string $database The database name to create the collection for. + * @param string $tenant The tenant ID to create the collection for. + * @param CreateCollectionRequest $request The request to create the collection. + * + * @return Collection + */ + public function createCollection(string $database, string $tenant, CreateCollectionRequest $request): Collection + { + $response = $this->sendRequest('POST', "/api/v2/tenants/$tenant/databases/$database/collections", [ + 'json' => $request->toArray() + ]); + + $result = json_decode($response->getBody()->getContents(), true); + + return Collection::fromArray($result, $this, $database, $tenant); + } + + /** + * Retrieves a collection by ID or name. + * + * @param string $collectionId The UUID of the collection to retrieve. + * @param string $database The database name to retrieve the collection from. + * @param string $tenant The tenant ID to retrieve the collection from. + * + * @return Collection + */ + public function getCollection(string $collectionId, string $database, string $tenant): Collection + { + $response = $this->sendRequest('GET', "/api/v2/tenants/$tenant/databases/$database/collections/$collectionId"); + + $result = json_decode($response->getBody()->getContents(), true); + + return Collection::fromArray($result, $this, $database, $tenant); + } + + /** + * Updates an existing collection's name or metadata. + * + * @param string $collectionId The UUID of the collection to update. + * @param string $database The database name to update the collection in. + * @param string $tenant The tenant ID to update the collection in. + * @param UpdateCollectionRequest $request The request to update the collection. + */ + public function updateCollection(string $collectionId, string $database, string $tenant, UpdateCollectionRequest $request): void + { + $this->sendRequest('PUT', "/api/v2/tenants/$tenant/databases/$database/collections/$collectionId", [ + 'json' => $request->toArray(), + ]); + } + + /** + * Forks an existing collection. + * + * @param string $collectionId The UUID of the collection to fork. + * @param string $database The database name to fork the collection from. + * @param string $tenant The tenant ID to fork the collection from. + * @param ForkCollectionRequest $request The request to fork the collection. + * + * @return Collection + */ + public function forkCollection(string $collectionId, string $database, string $tenant, ForkCollectionRequest $request): Collection + { + $response = $this->sendRequest('POST', "/api/v2/tenants/$tenant/databases/$database/collections/$collectionId/fork", [ + 'json' => $request->toArray() + ]); + + $result = json_decode($response->getBody()->getContents(), true); + + return Collection::fromArray($result, $this, $database, $tenant); + } + + /** + * Deletes a collection in a given database. + * + * @param string $collectionId The UUID of the collection to delete. + * @param string $database The database name to delete the collection from. + * @param string $tenant The tenant ID to delete the collection from. + */ + public function deleteCollection(string $collectionId, string $database, string $tenant): void + { + $this->sendRequest('DELETE', "/api/v2/tenants/$tenant/databases/$database/collections/$collectionId"); + } + + /** + * Retrieves the total number of collections in a given database. + * + * @param string $database The database name to count collections in. + * @param string $tenant The tenant ID to count collections in. + * + * @return int + */ + public function countCollections(string $database, string $tenant): int + { + $response = $this->sendRequest('GET', "/api/v2/tenants/$tenant/databases/$database/collections_count"); + + return json_decode($response->getBody()->getContents(), true); + } + + /** + * Adds items to a collection. + * + * @param string $collectionId The UUID of the collection to add items to. + * @param string $database The database name to add items to. + * @param string $tenant The tenant ID to add items to. + * @param AddItemsRequest $request The request to add items to the collection. + */ + public function addCollectionItems(string $collectionId, string $database, string $tenant, AddItemsRequest $request): void + { + $this->sendRequest('POST', "/api/v2/tenants/$tenant/databases/$database/collections/$collectionId/add", [ + 'json' => $request->toArray(), + ]); + } + + /** + * Retrieves the number of items in a collection. + * + * @param string $collectionId The UUID of the collection to count items for. + * @param string $database The database name to count items in. + * @param string $tenant The tenant ID to count items in. + * + * @return int + */ + public function countCollectionItems(string $collectionId, string $database, string $tenant): int + { + $response = $this->sendRequest('GET', "/api/v2/tenants/$tenant/databases/$database/collections/$collectionId/count"); + + return json_decode($response->getBody()->getContents(), true); + } + + /** + * Updates items in a collection. + * + * @param string $collectionId The UUID of the collection to update items in. + * @param string $database The database name to update items in. + * @param string $tenant The tenant ID to update items in. + * @param UpdateItemsRequest $request The request to update items in the collection. + */ + public function updateCollectionItems(string $collectionId, string $database, string $tenant, UpdateItemsRequest $request): void + { + $this->sendRequest('POST', "/api/v2/tenants/$tenant/databases/$database/collections/$collectionId/update", [ + 'json' => $request->toArray(), + ]); + } + + /** + * Upserts items in a collection (create if not exists, otherwise update). + * + * @param string $collectionId The UUID of the collection to upsert items in. + * @param string $database The database name to upsert items in. + * @param string $tenant The tenant ID to upsert items in. + * @param AddItemsRequest $request The request to upsert items in the collection. + */ + public function upsertCollectionItems(string $collectionId, string $database, string $tenant, AddItemsRequest $request): void + { + $this->sendRequest('POST', "/api/v2/tenants/$tenant/databases/$database/collections/$collectionId/upsert", [ + 'json' => $request->toArray(), + ]); + } + + /** + * Retrieves items from a collection by ID or metadata filter. + * + * @param string $collectionId The UUID of the collection to get items from. + * @param string $database The database name to get items from. + * @param string $tenant The tenant ID to get items from. + * @param GetEmbeddingRequest $request The request to get items from the collection. + * + * @return GetItemsResponse + */ + public function getCollectionItems(string $collectionId, string $database, string $tenant, GetEmbeddingRequest $request): GetItemsResponse + { + $response = $this->sendRequest('POST', "/api/v2/tenants/$tenant/databases/$database/collections/$collectionId/get", [ + 'json' => $request->toArray(), + ]); + + $result = json_decode($response->getBody()->getContents(), true); + + return GetItemsResponse::fromArray($result); + } + + /** + * Deletes items from a collection by ID or metadata filter. + * + * @param string $collectionId The UUID of the collection to delete items from. + * @param string $database The database name to delete items from. + * @param string $tenant The tenant ID to delete items from. + * @param DeleteItemsRequest $request The request to delete items from the collection. + */ + public function deleteCollectionItems(string $collectionId, string $database, string $tenant, DeleteItemsRequest $request): void + { + $this->sendRequest('POST', "/api/v2/tenants/$tenant/databases/$database/collections/$collectionId/delete", [ + 'json' => $request->toArray(), + ]); + } + + /** + * Query a collection in a variety of ways, including vector search, metadata filtering, and full-text search + * + * @param string $collectionId The UUID of the collection to query. + * @param string $database The database name to query the collection in. + * @param string $tenant The tenant ID to query the collection in. + * @param QueryItemsRequest $request The request to query the collection. + * + * @return QueryItemsResponse + */ + public function queryCollectionItems(string $collectionId, string $database, string $tenant, QueryItemsRequest $request): QueryItemsResponse + { + $response = $this->sendRequest('POST', "/api/v2/tenants/$tenant/databases/$database/collections/$collectionId/query", [ + 'json' => $request->toArray(), + ]); + + $result = json_decode($response->getBody()->getContents(), true); + + return QueryItemsResponse::fromArray($result); + } + + private function sendRequest(string $method, string $path, array $options = []): ResponseInterface + { + $uri = $this->baseUri . $path; + if (isset($options['query'])) { + $uri .= '?' . http_build_query($options['query']); + } + + $request = $this->requestFactory->createRequest($method, $uri) + ->withHeader('Content-Type', 'application/json') + ->withHeader('Accept', 'application/json'); + + foreach ($this->headers as $name => $value) { + $request = $request->withHeader($name, $value); + } + + if (isset($options['json'])) { + $body = $this->streamFactory->createStream(json_encode($options['json'])); + $request = $request->withBody($body); + } + + try { + $response = $this->client->sendRequest($request); + } catch (ClientExceptionInterface $e) { + throw new ConnectionException($e->getMessage(), $e->getCode()); + } + + if ($response->getStatusCode() >= 400) { + $this->handleErrorResponse($response); + } + + return $response; + } + + private function handleErrorResponse(ResponseInterface $response): void + { + $statusCode = $response->getStatusCode(); + $body = json_decode($response->getBody()->getContents(), true); + + $errorType = $body['error'] ?? 'UnknownError'; + $message = $body['message'] ?? 'Unknown error occurred'; + + if ($statusCode === 409) { + $errorType = 'UniqueConstraintError'; + } + + throw ChromaException::create($message, $errorType, $statusCode); + } +} diff --git a/src/ChromaDB.php b/src/ChromaDB.php index bbb4af6..3245e0a 100644 --- a/src/ChromaDB.php +++ b/src/ChromaDB.php @@ -4,28 +4,66 @@ namespace Codewithkyrian\ChromaDB; - class ChromaDB { + /** + * Creates a new factory instance to configure a custom ChromaDB Client + */ + public static function factory(): Factory + { + return new Factory(); + } + + /** + * @deprecated Use ChromaDB::local()->connect() or ChromaDB::factory()->connect() instead. + */ public static function client(): Client { return self::factory()->connect(); } /** - * Creates a new factory instance to configure a custom Alchemy Client + * Creates a new factory instance configured for a local/self-hosted ChromaDB instance. */ - public static function factory(): Factory - { - return new Factory(); + public static function local( + string $host = 'http://localhost', + ?int $port = 8000, + ?string $tenant = null, + ?string $database = null + ): Factory { + $factory = self::factory() + ->withHost($host) + ->withPort($port); + + if ($tenant) { + $factory->withTenant($tenant); + } + + if ($database) { + $factory->withDatabase($database); + } + + return $factory; } /** - * Resets the database. This will delete all collections and entries and - * return true if the database was reset successfully. + * Creates a new factory instance configured for Chroma Cloud. */ - public static function reset() : bool + public static function cloud(string $apiKey, ?string $tenant = null, ?string $database = null): Factory { - return (new Factory())->createApiClient()->reset(); + $factory = self::factory() + ->withHost('https://api.trychroma.com') + ->withPort(null) + ->withHeader('X-Chroma-Token', $apiKey); + + if ($tenant) { + $factory->withTenant($tenant); + } + + if ($database) { + $factory->withDatabase($database); + } + + return $factory; } -} \ No newline at end of file +} diff --git a/src/Client.php b/src/Client.php index 9890876..36adb78 100644 --- a/src/Client.php +++ b/src/Client.php @@ -5,37 +5,38 @@ namespace Codewithkyrian\ChromaDB; use Codewithkyrian\ChromaDB\Embeddings\EmbeddingFunction; -use Codewithkyrian\ChromaDB\Generated\ChromaApiClient; -use Codewithkyrian\ChromaDB\Generated\Exceptions\ChromaNotFoundException; -use Codewithkyrian\ChromaDB\Generated\Models\Collection; -use Codewithkyrian\ChromaDB\Resources\CollectionResource; +use Codewithkyrian\ChromaDB\Api; +use Codewithkyrian\ChromaDB\Exceptions\NotFoundException; +use Codewithkyrian\ChromaDB\Models\Collection; +use Codewithkyrian\ChromaDB\Requests\CreateDatabaseRequest; +use Codewithkyrian\ChromaDB\Requests\CreateTenantRequest; +use Codewithkyrian\ChromaDB\Requests\CreateCollectionRequest; +use Codewithkyrian\ChromaDB\Requests\ForkCollectionRequest; class Client { public function __construct( - public readonly ChromaApiClient $apiClient, - public readonly string $database, - public readonly string $tenant, - ) - { + public readonly Api $api, + public readonly string $database, + public readonly string $tenant, + ) { $this->initDatabaseAndTenant(); } - public function initDatabaseAndTenant(): void { try { - $this->apiClient->getTenant($this->tenant); - } catch (ChromaNotFoundException) { - $createTenantRequest = new Generated\Requests\CreateTenantRequest($this->tenant); - $this->apiClient->createTenant($createTenantRequest); + $this->api->getTenant($this->tenant); + } catch (NotFoundException) { + $createTenantRequest = new CreateTenantRequest($this->tenant); + $this->api->createTenant($createTenantRequest); } try { - $this->apiClient->getDatabase($this->database, $this->tenant); - } catch (ChromaNotFoundException) { - $createDatabaseRequest = new Generated\Requests\CreateDatabaseRequest($this->database); - $this->apiClient->createDatabase($this->tenant, $createDatabaseRequest); + $this->api->getDatabase($this->database, $this->tenant); + } catch (NotFoundException) { + $createDatabaseRequest = new CreateDatabaseRequest($this->database); + $this->api->createDatabase($this->tenant, $createDatabaseRequest); } } @@ -44,7 +45,7 @@ public function initDatabaseAndTenant(): void */ public function version(): string { - return $this->apiClient->version(); + return $this->api->version(); } /** @@ -53,7 +54,7 @@ public function version(): string */ public function heartbeat(): int { - $res = $this->apiClient->heartbeat(); + $res = $this->api->heartbeat(); return $res['nanosecond heartbeat'] ?? 0; } @@ -65,7 +66,7 @@ public function heartbeat(): int */ public function listCollections(): array { - return $this->apiClient->listCollections($this->database, $this->tenant); + return $this->api->listCollections($this->database, $this->tenant); } @@ -76,22 +77,19 @@ public function listCollections(): array * @param ?array $metadata Optional metadata associated with the collection. * @param ?EmbeddingFunction $embeddingFunction Optional custom embedding function for the collection. * - * @return CollectionResource + * @return Collection */ - public function createCollection(string $name, ?array $metadata = null, ?EmbeddingFunction $embeddingFunction = null): CollectionResource + public function createCollection(string $name, ?array $metadata = null, ?EmbeddingFunction $embeddingFunction = null): Collection { - $request = new Generated\Requests\CreateCollectionRequest($name, $metadata); + $request = new CreateCollectionRequest($name, $metadata); - $collection = $this->apiClient->createCollection($this->database, $this->tenant, $request); + $collection = $this->api->createCollection($this->database, $this->tenant, $request); + if ($embeddingFunction) { + $collection->setEmbeddingFunction($embeddingFunction); + } - return CollectionResource::make( - $collection, - $this->database, - $this->tenant, - $embeddingFunction, - $this->apiClient - ); + return $collection; } /** @@ -101,21 +99,19 @@ public function createCollection(string $name, ?array $metadata = null, ?Embeddi * @param ?array $metadata Optional metadata associated with the collection. * @param ?EmbeddingFunction $embeddingFunction Optional custom embedding function for the collection. * - * @return CollectionResource + * @return Collection */ - public function getOrCreateCollection(string $name, ?array $metadata = null, ?EmbeddingFunction $embeddingFunction = null): CollectionResource + public function getOrCreateCollection(string $name, ?array $metadata = null, ?EmbeddingFunction $embeddingFunction = null): Collection { - $request = new Generated\Requests\CreateCollectionRequest($name, $metadata, true); + $request = new CreateCollectionRequest($name, $metadata, true); - $collection = $this->apiClient->createCollection($this->database, $this->tenant, $request); + $collection = $this->api->createCollection($this->database, $this->tenant, $request); - return CollectionResource::make( - $collection, - $this->database, - $this->tenant, - $embeddingFunction, - $this->apiClient - ); + if ($embeddingFunction) { + $collection->setEmbeddingFunction($embeddingFunction); + } + + return $collection; } /** @@ -125,19 +121,40 @@ public function getOrCreateCollection(string $name, ?array $metadata = null, ?Em * @param string $name The name of the collection. * @param ?EmbeddingFunction $embeddingFunction Optional custom embedding function for the collection. * - * @return CollectionResource + * @return Collection */ - public function getCollection(string $name, ?EmbeddingFunction $embeddingFunction = null): CollectionResource + public function getCollection(string $name, ?EmbeddingFunction $embeddingFunction = null): Collection { - $collection = $this->apiClient->getCollection($name, $this->database, $this->tenant); - - return CollectionResource::make( - $collection, - $this->database, - $this->tenant, - $embeddingFunction, - $this->apiClient - ); + $collection = $this->api->getCollection($name, $this->database, $this->tenant); + + if ($embeddingFunction) { + $collection->setEmbeddingFunction($embeddingFunction); + } + + return $collection; + } + + /** + * Forks an existing collection. + * + * @param string $name The name of the collection to fork. + * @param string $newName The name for the forked collection. + * @param ?EmbeddingFunction $embeddingFunction Optional custom embedding function for the forked collection. + * + * @return Collection + */ + public function forkCollection(string $name, string $newName, ?EmbeddingFunction $embeddingFunction = null): Collection + { + $collection = $this->api->getCollection($name, $this->database, $this->tenant); + $request = new ForkCollectionRequest($newName); + + $forkedCollection = $this->api->forkCollection($collection->id, $this->database, $this->tenant, $request); + + if ($embeddingFunction) { + $forkedCollection->setEmbeddingFunction($embeddingFunction); + } + + return $forkedCollection; } /** @@ -147,7 +164,7 @@ public function getCollection(string $name, ?EmbeddingFunction $embeddingFunctio */ public function deleteCollection(string $name): void { - $this->apiClient->deleteCollection($name, $this->database, $this->tenant); + $this->api->deleteCollection($name, $this->database, $this->tenant); } /** @@ -162,6 +179,8 @@ public function deleteAllCollections(): void } } - - -} \ No newline at end of file + public function reset(): bool + { + return $this->api->reset(); + } +} diff --git a/src/Embeddings/HuggingFaceEmbeddingServerFunction.php b/src/Embeddings/HuggingFaceEmbeddingServerFunction.php index fdcd469..244c272 100644 --- a/src/Embeddings/HuggingFaceEmbeddingServerFunction.php +++ b/src/Embeddings/HuggingFaceEmbeddingServerFunction.php @@ -5,37 +5,41 @@ namespace Codewithkyrian\ChromaDB\Embeddings; -use GuzzleHttp\Client; -use GuzzleHttp\Exception\GuzzleException; +use Codewithkyrian\ChromaDB\Embeddings\EmbeddingFunction; +use Http\Discovery\Psr17FactoryDiscovery; +use Http\Discovery\Psr18ClientDiscovery; +use Psr\Http\Client\ClientInterface; +use Psr\Http\Message\RequestFactoryInterface; +use Psr\Http\Message\StreamFactoryInterface; class HuggingFaceEmbeddingServerFunction implements EmbeddingFunction { + private ClientInterface $httpClient; + private RequestFactoryInterface $requestFactory; + private StreamFactoryInterface $streamFactory; public function __construct( - public readonly string $baseUrl = 'http://localhost:8080', - ) - { + private readonly string $url, + ) { + $this->httpClient = Psr18ClientDiscovery::find(); + $this->requestFactory = Psr17FactoryDiscovery::findRequestFactory(); + $this->streamFactory = Psr17FactoryDiscovery::findStreamFactory(); } public function generate(array $texts): array { - $client = new Client([ - 'base_uri' => $this->baseUrl, - 'headers' => [ - 'Content-Type' => 'application/json', - ] - ]); - - try { - $response = $client->post('embed', [ - 'json' => [ - 'inputs' => $texts, - ] - ]); - } catch (GuzzleException $e) { - throw new \RuntimeException('Failed to generate embeddings', 0, $e); - } - - return json_decode($response->getBody()->getContents(), true); + $request = $this->requestFactory->createRequest('POST', $this->url) + ->withHeader('Content-Type', 'application/json'); + + $body = $this->streamFactory->createStream(json_encode([ + 'inputs' => $texts, + ])); + + $request = $request->withBody($body); + + $response = $this->httpClient->sendRequest($request); + $embeddings = json_decode($response->getBody()->getContents(), true); + + return $embeddings; } } \ No newline at end of file diff --git a/src/Embeddings/JinaEmbeddingFunction.php b/src/Embeddings/JinaEmbeddingFunction.php index b01412d..19f3761 100644 --- a/src/Embeddings/JinaEmbeddingFunction.php +++ b/src/Embeddings/JinaEmbeddingFunction.php @@ -6,27 +6,25 @@ namespace Codewithkyrian\ChromaDB\Embeddings; use Codewithkyrian\ChromaDB\Embeddings\EmbeddingFunction; -use GuzzleHttp\Client; -use GuzzleHttp\Exception\GuzzleException; -use Psr\Http\Client\ClientExceptionInterface; +use Http\Discovery\Psr17FactoryDiscovery; +use Http\Discovery\Psr18ClientDiscovery; +use Psr\Http\Client\ClientInterface; +use Psr\Http\Message\RequestFactoryInterface; +use Psr\Http\Message\StreamFactoryInterface; class JinaEmbeddingFunction implements EmbeddingFunction { - private Client $client; + private ClientInterface $httpClient; + private RequestFactoryInterface $requestFactory; + private StreamFactoryInterface $streamFactory; public function __construct( - public readonly string $apiKey, - public readonly string $model = 'jina-embeddings-v2-base-en', - ) - { - $this->client = new Client([ - 'base_uri' => 'https://api.jina.ai/v1/', - 'headers' => [ - 'Authorization' => "Bearer $this->apiKey", - 'Content-Type' => 'application/json', - 'Accept-Encoding' => 'identity', - ] - ]); + private readonly string $apiKey, + private readonly string $model = 'jina-embeddings-v2-base-en' + ) { + $this->httpClient = Psr18ClientDiscovery::find(); + $this->requestFactory = Psr17FactoryDiscovery::findRequestFactory(); + $this->streamFactory = Psr17FactoryDiscovery::findStreamFactory(); } /** @@ -34,21 +32,20 @@ public function __construct( */ public function generate(array $texts): array { - try { - $response = $this->client->post('embeddings', [ - 'json' => [ - 'model' => $this->model, - 'input' => $texts, - ] - ]); - - $result = json_decode($response->getBody()->getContents(), true); - $embeddings = $result['data']; - usort($embeddings, fn($a, $b) => $a['index'] <=> $b['index']); - - return array_map(fn($embedding) => $embedding['embedding'], $embeddings); - } catch (ClientExceptionInterface $e) { - throw new \RuntimeException("Error calling Jina AI API: {$e->getMessage()}", 0, $e); - } + $request = $this->requestFactory->createRequest('POST', 'https://api.jina.ai/v1/embeddings') + ->withHeader('Authorization', 'Bearer ' . $this->apiKey) + ->withHeader('Content-Type', 'application/json'); + + $body = $this->streamFactory->createStream(json_encode([ + 'model' => $this->model, + 'input' => $texts, + ])); + + $request = $request->withBody($body); + + $response = $this->httpClient->sendRequest($request); + $data = json_decode($response->getBody()->getContents(), true); + + return array_map(fn($item) => $item['embedding'], $data['data']); } } \ No newline at end of file diff --git a/src/Embeddings/MistralAIEmbeddingFunction.php b/src/Embeddings/MistralAIEmbeddingFunction.php index 1884351..b09f377 100644 --- a/src/Embeddings/MistralAIEmbeddingFunction.php +++ b/src/Embeddings/MistralAIEmbeddingFunction.php @@ -5,29 +5,26 @@ namespace Codewithkyrian\ChromaDB\Embeddings; -use GuzzleHttp\Client; -use Psr\Http\Client\ClientExceptionInterface; +use Codewithkyrian\ChromaDB\Embeddings\EmbeddingFunction; +use Http\Discovery\Psr17FactoryDiscovery; +use Http\Discovery\Psr18ClientDiscovery; +use Psr\Http\Client\ClientInterface; +use Psr\Http\Message\RequestFactoryInterface; +use Psr\Http\Message\StreamFactoryInterface; class MistralAIEmbeddingFunction implements EmbeddingFunction { - private Client $client; + private ClientInterface $httpClient; + private RequestFactoryInterface $requestFactory; + private StreamFactoryInterface $streamFactory; public function __construct( - public readonly string $apiKey, - public readonly string $organization = '', - public readonly string $model = 'mistral-embed', - ) - { - $headers = [ - 'Authorization' => "Bearer $this->apiKey", - 'Content-Type' => 'application/json', - ]; - - - $this->client = new Client([ - 'base_uri' => 'https://api.mistral.ai/v1/', - 'headers' => $headers - ]); + private readonly string $apiKey, + private readonly string $model = 'mistral-embed' + ) { + $this->httpClient = Psr18ClientDiscovery::find(); + $this->requestFactory = Psr17FactoryDiscovery::findRequestFactory(); + $this->streamFactory = Psr17FactoryDiscovery::findStreamFactory(); } /** @@ -35,21 +32,20 @@ public function __construct( */ public function generate(array $texts): array { - try { - $response = $this->client->post('embeddings', [ - 'json' => [ - 'model' => $this->model, - 'input' => $texts, - ] - ]); - - $result = json_decode($response->getBody()->getContents(), true); - $embeddings = $result['data']; - usort($embeddings, fn($a, $b) => $a['index'] <=> $b['index']); - - return array_map(fn($embedding) => $embedding['embedding'], $embeddings); - } catch (ClientExceptionInterface $e) { - throw new \RuntimeException("Error calling MistralAI API: {$e->getMessage()}", 0, $e); - } + $request = $this->requestFactory->createRequest('POST', 'https://api.mistral.ai/v1/embeddings') + ->withHeader('Authorization', 'Bearer ' . $this->apiKey) + ->withHeader('Content-Type', 'application/json'); + + $body = $this->streamFactory->createStream(json_encode([ + 'model' => $this->model, + 'input' => $texts, + ])); + + $request = $request->withBody($body); + + $response = $this->httpClient->sendRequest($request); + $data = json_decode($response->getBody()->getContents(), true); + + return array_map(fn($item) => $item['embedding'], $data['data']); } } diff --git a/src/Embeddings/OllamaEmbeddingFunction.php b/src/Embeddings/OllamaEmbeddingFunction.php index e5684be..e028268 100644 --- a/src/Embeddings/OllamaEmbeddingFunction.php +++ b/src/Embeddings/OllamaEmbeddingFunction.php @@ -2,52 +2,51 @@ declare(strict_types=1); - namespace Codewithkyrian\ChromaDB\Embeddings; -use GuzzleHttp\Client; +use Codewithkyrian\ChromaDB\Embeddings\EmbeddingFunction; +use Http\Discovery\Psr17FactoryDiscovery; +use Http\Discovery\Psr18ClientDiscovery; +use Psr\Http\Client\ClientInterface; +use Psr\Http\Message\RequestFactoryInterface; +use Psr\Http\Message\StreamFactoryInterface; class OllamaEmbeddingFunction implements EmbeddingFunction { - private Client $client; + private ClientInterface $httpClient; + private RequestFactoryInterface $requestFactory; + private StreamFactoryInterface $streamFactory; public function __construct( - public readonly string $baseUrl = 'http://localhost:11434', - public readonly string $model = 'all-minilm', - ) - { - $this->client = new Client([ - 'base_uri' => $this->baseUrl, - 'headers' => [ - 'Content-Type' => 'application/json', - ] - ]); + private readonly string $baseUrl = 'http://localhost:11434', + private readonly string $model = 'all-minilm', + ) { + $this->httpClient = Psr18ClientDiscovery::find(); + $this->requestFactory = Psr17FactoryDiscovery::findRequestFactory(); + $this->streamFactory = Psr17FactoryDiscovery::findStreamFactory(); } - /** - * @inheritDoc - */ public function generate(array $texts): array { - try { - $embeddings = []; + $embeddings = []; - foreach ($texts as $text) { - $response = $this->client->post('api/embeddings', [ - 'json' => [ - 'prompt' => $text, - 'model' => $this->model, - ] - ]); + foreach ($texts as $text) { + $request = $this->requestFactory->createRequest('POST', $this->baseUrl . '/api/embeddings') + ->withHeader('Content-Type', 'application/json'); - $result = json_decode($response->getBody()->getContents(), true); + $body = $this->streamFactory->createStream(json_encode([ + 'prompt' => $text, + 'model' => $this->model, + ])); - $embeddings[] = $result['embedding']; - } + $request = $request->withBody($body); - return $embeddings; - } catch (\Exception $e) { - throw new \RuntimeException('Failed to generate embeddings', 0, $e); + $response = $this->httpClient->sendRequest($request); + $result = json_decode($response->getBody()->getContents(), true); + + $embeddings[] = $result['embedding']; } + + return $embeddings; } } \ No newline at end of file diff --git a/src/Embeddings/OpenAIEmbeddingFunction.php b/src/Embeddings/OpenAIEmbeddingFunction.php index 2170d1b..437dab3 100644 --- a/src/Embeddings/OpenAIEmbeddingFunction.php +++ b/src/Embeddings/OpenAIEmbeddingFunction.php @@ -5,32 +5,27 @@ namespace Codewithkyrian\ChromaDB\Embeddings; -use GuzzleHttp\Client; -use Psr\Http\Client\ClientExceptionInterface; +use Codewithkyrian\ChromaDB\Embeddings\EmbeddingFunction; +use Http\Discovery\Psr17FactoryDiscovery; +use Http\Discovery\Psr18ClientDiscovery; +use Psr\Http\Client\ClientInterface; +use Psr\Http\Message\RequestFactoryInterface; +use Psr\Http\Message\StreamFactoryInterface; class OpenAIEmbeddingFunction implements EmbeddingFunction { - private Client $client; + private ClientInterface $httpClient; + private RequestFactoryInterface $requestFactory; + private StreamFactoryInterface $streamFactory; public function __construct( - public readonly string $apiKey, - public readonly string $organization = '', - public readonly string $model = 'text-embedding-ada-002', - ) - { - $headers = [ - 'Authorization' => "Bearer $this->apiKey", - 'Content-Type' => 'application/json', - ]; - - if (!empty($this->organization)) { - $headers['OpenAI-Organization'] = $this->organization; - } - - $this->client = new Client([ - 'base_uri' => 'https://api.openai.com/v1/', - 'headers' => $headers - ]); + private readonly string $apiKey, + private readonly string $organizationId = '', + private readonly string $model = 'text-embedding-ada-002' + ) { + $this->httpClient = Psr18ClientDiscovery::find(); + $this->requestFactory = Psr17FactoryDiscovery::findRequestFactory(); + $this->streamFactory = Psr17FactoryDiscovery::findStreamFactory(); } /** @@ -38,21 +33,24 @@ public function __construct( */ public function generate(array $texts): array { - try { - $response = $this->client->post('embeddings', [ - 'json' => [ - 'model' => $this->model, - 'input' => $texts, - ] - ]); - - $result = json_decode($response->getBody()->getContents(), true); - $embeddings = $result['data']; - usort($embeddings, fn($a, $b) => $a['index'] <=> $b['index']); - - return array_map(fn($embedding) => $embedding['embedding'], $embeddings); - } catch (ClientExceptionInterface $e) { - throw new \RuntimeException("Error calling OpenAI API: {$e->getMessage()}", 0, $e); + $request = $this->requestFactory->createRequest('POST', 'https://api.openai.com/v1/embeddings') + ->withHeader('Authorization', 'Bearer ' . $this->apiKey) + ->withHeader('Content-Type', 'application/json'); + + if (!empty($this->organizationId)) { + $request = $request->withHeader('OpenAI-Organization', $this->organizationId); } + + $body = $this->streamFactory->createStream(json_encode([ + 'model' => $this->model, + 'input' => $texts, + ])); + + $request = $request->withBody($body); + + $response = $this->httpClient->sendRequest($request); + $data = json_decode($response->getBody()->getContents(), true); + + return array_map(fn($item) => $item['embedding'], $data['data']); } } \ No newline at end of file diff --git a/src/Generated/Exceptions/ChromaException.php b/src/Exceptions/ChromaException.php similarity index 50% rename from src/Generated/Exceptions/ChromaException.php rename to src/Exceptions/ChromaException.php index b0286a3..27dd634 100644 --- a/src/Generated/Exceptions/ChromaException.php +++ b/src/Exceptions/ChromaException.php @@ -2,23 +2,21 @@ declare(strict_types=1); - -namespace Codewithkyrian\ChromaDB\Generated\Exceptions; +namespace Codewithkyrian\ChromaDB\Exceptions; class ChromaException extends \Exception { - public static function throwSpecific(string $message, string $type, int $code) + public static function create(string $message, string $type, int $code): self { - throw match ($type) { - 'NotFoundError' => new ChromaNotFoundException($message, $code), - 'AuthorizationError' => new ChromaAuthorizationException($message, $code), - 'ValueError' => new ChromaValueException($message, $code), - 'UniqueConstraintError' => new ChromaUniqueConstraintException($message, $code), - 'DimensionalityError' => new ChromaDimensionalityException($message, $code), - 'InvalidCollection' => new ChromaInvalidCollectionException($message, $code), - 'TypeError' => new ChromaTypeException($message, $code), - 'InvalidArgumentError' => new ChromaInvalidArgumentException($message, $code), + return match ($type) { + 'NotFoundError' => new NotFoundException($message, $code), + 'ValueError' => new ValueException($message, $code), + 'UniqueConstraintError' => new UniqueConstraintException($message, $code), + 'DimensionalityError' => new DimensionalityException($message, $code), + 'InvalidCollection' => new InvalidCollectionException($message, $code), + 'TypeError' => new TypeException($message, $code), + 'InvalidArgumentError' => new InvalidArgumentException($message, $code), default => new self($message, $code), }; } diff --git a/src/Exceptions/ConnectionException.php b/src/Exceptions/ConnectionException.php new file mode 100644 index 0000000..8dacab9 --- /dev/null +++ b/src/Exceptions/ConnectionException.php @@ -0,0 +1,10 @@ + */ - protected ChromaApiClient $apiClient; + protected array $headers = []; /** * The url of the client to use for the requests. @@ -60,7 +49,7 @@ public function withHost(string $host): self /** * The port of the client to use for the requests. */ - public function withPort(int $port): self + public function withPort(?int $port): self { $this->port = $port; return $this; @@ -86,47 +75,55 @@ public function withTenant(string $tenant): self /** * The bearer token used to authenticate requests. + * + * @deprecated Use withHeader('X-Chroma-Token', $authToken) instead. */ public function withAuthToken(string $authToken): self { - $this->authToken = $authToken; + return $this->withHeader('X-Chroma-Token', $authToken); + } + + /** + * Add a header to the requests. + */ + public function withHeader(string $name, string $value): self + { + $this->headers[$name] = $value; return $this; } /** - * The http client to use for the requests. + * Add multiple headers to the requests. + * + * @param array $headers */ - public function withHttpClient(\GuzzleHttp\Client $httpClient): self + public function withHeaders(array $headers): self { - $this->httpClient = $httpClient; + $this->headers = array_merge($this->headers, $headers); return $this; } public function connect(): Client { - $this->apiClient = $this->createApiClient(); + $api = $this->createApi(); - return new Client($this->apiClient, $this->database, $this->tenant); + return new Client($api, $this->database, $this->tenant); } - public function createApiClient() : ChromaApiClient + public function createApi(): Api { - $this->baseUrl = $this->host . ':' . $this->port; - - $headers = [ - 'Content-Type' => 'application/json', - 'Accept' => 'application/json', - ]; - - if (!empty($this->authToken)) { - $headers['Authorization'] = 'Bearer ' . $this->authToken; - } - - $this->httpClient ??= new \GuzzleHttp\Client([ - 'base_uri' => $this->baseUrl, - 'headers' => $headers, - ]); - - return new ChromaApiClient($this->httpClient); + $baseUrl = $this->port ? "$this->host:$this->port" : $this->host; + + $httpClient = Psr18ClientDiscovery::find(); + $requestFactory = Psr17FactoryDiscovery::findRequestFactory(); + $streamFactory = Psr17FactoryDiscovery::findStreamFactory(); + + return new Api( + $httpClient, + $requestFactory, + $streamFactory, + $baseUrl, + $this->headers + ); } } diff --git a/src/Generated/ChromaApiClient.php b/src/Generated/ChromaApiClient.php deleted file mode 100644 index 99b1bfe..0000000 --- a/src/Generated/ChromaApiClient.php +++ /dev/null @@ -1,356 +0,0 @@ -httpClient->get('/api/v2'); - } catch (ClientExceptionInterface $e) { - $this->handleChromaApiException($e); - } - return json_decode($response->getBody()->getContents(), true); - } - - - public function version(): string - { - try { - $response = $this->httpClient->get('/api/v2/version'); - - // remove the quo - return trim($response->getBody()->getContents(), '"'); - } catch (ClientExceptionInterface $e) { - $this->handleChromaApiException($e); - } - } - - public function heartbeat(): array - { - try { - $response = $this->httpClient->get('/api/v2/heartbeat'); - } catch (ClientExceptionInterface $e) { - $this->handleChromaApiException($e); - } - return json_decode($response->getBody()->getContents(), true); - } - - public function preFlightChecks(): mixed - { - try { - $response = $this->httpClient->get('/api/v2/pre-flight-checks'); - } catch (ClientExceptionInterface $e) { - $this->handleChromaApiException($e); - } - return json_decode($response->getBody()->getContents(), true); - } - - - public function createDatabase(string $tenant, CreateDatabaseRequest $request): void - { - try { - $this->httpClient->post("/api/v2/tenants/$tenant/databases", [ - 'json' => $request->toArray() - ]); - } catch (ClientExceptionInterface $e) { - $this->handleChromaApiException($e); - } - } - - public function getDatabase(string $database, string $tenant): Database - { - try { - $response = $this->httpClient->get("/api/v2/tenants/$tenant/databases/$database"); - } catch (ClientExceptionInterface $e) { - $this->handleChromaApiException($e); - } - - $result = json_decode($response->getBody()->getContents(), true); - - return Database::make($result); - } - - public function createTenant(CreateTenantRequest $request): void - { - try { - $this->httpClient->post('/api/v2/tenants', [ - 'json' => $request->toArray(), - ]); - } catch (ClientExceptionInterface $e) { - $this->handleChromaApiException($e); - } - } - - public function getTenant(string $tenant): ?Tenant - { - try { - $response = $this->httpClient->get("/api/v2/tenants/$tenant"); - - $result = json_decode($response->getBody()->getContents(), true); - - return Tenant::make($result); - } catch (ClientExceptionInterface $e) { - $this->handleChromaApiException($e); - } - } - - - public function listCollections(string $database, string $tenant): array - { - try { - $response = $this->httpClient->get("/api/v2/tenants/$tenant/databases/$database/collections"); - } catch (ClientExceptionInterface $e) { - $this->handleChromaApiException($e); - } - - $result = json_decode($response->getBody()->getContents(), true); - - return array_map(function (array $item) { - return Collection::make($item); - }, $result); - } - - public function createCollection(string $database, string $tenant, CreateCollectionRequest $request): Collection - { - try { - $response = $this->httpClient->post("/api/v2/tenants/$tenant/databases/$database/collections", [ - 'json' => $request->toArray() - ]); - } catch (ClientExceptionInterface $e) { - $this->handleChromaApiException($e); - } - - $result = json_decode($response->getBody()->getContents(), true); - - return Collection::make($result); - } - - public function getCollection(string $collectionId, string $database, string $tenant): Collection - { - try { - $response = $this->httpClient->get("/api/v2/tenants/$tenant/databases/$database/collections/$collectionId"); - } catch (ClientExceptionInterface $e) { - $this->handleChromaApiException($e); - } - - $result = json_decode($response->getBody()->getContents(), true); - - return Collection::make($result); - } - - public function updateCollection(string $collectionId, string $database, string $tenant, UpdateCollectionRequest $request): void - { - try { - $response = $this->httpClient->put("/api/v2/tenants/$tenant/databases/$database/collections/$collectionId", [ - 'json' => $request->toArray(), - ]); - } catch (ClientExceptionInterface $e) { - $this->handleChromaApiException($e); - } - } - - public function deleteCollection(string $collectionId, string $database, string $tenant): void - { - try { - $this->httpClient->delete("/api/v2/tenants/$tenant/databases/$database/collections/$collectionId"); - } catch (ClientExceptionInterface $e) { - $this->handleChromaApiException($e); - } - } - - public function add(string $collectionId, string $database, string $tenant, AddEmbeddingRequest $request): void - { - try { - $this->httpClient->post("/api/v2/tenants/$tenant/databases/$database/collections/$collectionId/add", [ - 'json' => $request->toArray(), - ]); - } catch (ClientExceptionInterface $e) { - $this->handleChromaApiException($e); - } - } - - public function update(string $collectionId, string $database, string $tenant, UpdateEmbeddingRequest $request): void - { - try { - $this->httpClient->post("/api/v2/tenants/$tenant/databases/$database/collections/$collectionId/update", [ - 'json' => $request->toArray(), - ]); - } catch (ClientExceptionInterface $e) { - $this->handleChromaApiException($e); - } - } - - public function upsert(string $collectionId, string $database, string $tenant, AddEmbeddingRequest $request): void - { - try { - $this->httpClient->post("/api/v2/tenants/$tenant/databases/$database/collections/$collectionId/upsert", [ - 'json' => $request->toArray(), - ]); - } catch (ClientExceptionInterface $e) { - $this->handleChromaApiException($e); - } - } - - public function get(string $collectionId, string $database, string $tenant, GetEmbeddingRequest $request): GetItemsResponse - { - try { - $response = $this->httpClient->post("/api/v2/tenants/$tenant/databases/$database/collections/$collectionId/get", [ - 'json' => $request->toArray(), - ]); - } catch (ClientExceptionInterface $e) { - $this->handleChromaApiException($e); - } - - $result = json_decode($response->getBody()->getContents(), true); - - return GetItemsResponse::from($result); - } - - public function delete(string $collectionId, string $database, string $tenant, DeleteEmbeddingRequest $request): void - { - try { - $this->httpClient->post("/api/v2/tenants/$tenant/databases/$database/collections/$collectionId/delete", [ - 'json' => $request->toArray(), - ]); - } catch (ClientExceptionInterface $e) { - $this->handleChromaApiException($e); - } - } - - public function count(string $collectionId, string $database, string $tenant): int - { - try { - $response = $this->httpClient->get("/api/v2/tenants/$tenant/databases/$database/collections/$collectionId/count"); - } catch (ClientExceptionInterface $e) { - $this->handleChromaApiException($e); - } - - return json_decode($response->getBody()->getContents(), true); - } - - public function getNearestNeighbors(string $collectionId, string $database, string $tenant, QueryEmbeddingRequest $request): QueryItemsResponse - { - try { - $response = $this->httpClient->post("/api/v2/tenants/$tenant/databases/$database/collections/$collectionId/query", [ - 'json' => $request->toArray(), - ]); - } catch (ClientExceptionInterface $e) { - $this->handleChromaApiException($e); - } - - $result = json_decode($response->getBody()->getContents(), true); - - return QueryItemsResponse::from($result); - } - - public function reset(): bool - { - try { - $response = $this->httpClient->post('/api/v2/reset'); - } catch (ClientExceptionInterface $e) { - $this->handleChromaApiException($e); - } - - return json_decode($response->getBody()->getContents(), true); - } - - private function handleChromaApiException(\Exception|ClientExceptionInterface $e): void - { - if ($e instanceof ConnectException) { - $context = $e->getHandlerContext(); - $message = $context['error'] ?? $e->getMessage(); - $code = $context['errno'] ?? $e->getCode(); - throw new ChromaConnectionException($message, $code); - } - - if ($e instanceof RequestException) { - $errorString = $e->getResponse()->getBody()->getContents(); - - if (preg_match('/(?<={"\"error\"\:\")([^"]*)/', $errorString, $matches)) { - $errorString = $matches[1]; - } - - $error = json_decode($errorString, true); - - if ($error !== null) { - - // If the structure is 'error' => 'NotFoundError("Collection not found")' - if (preg_match( - '/^(?P\w+)\((?P.*)\)$/', - $error['error'] ?? '', - $matches - )) { - if (isset($matches['message'])) { - $error_type = $matches['error_type'] ?? 'UnknownError'; - $message = $matches['message']; - - // Remove trailing and leading quotes - if (str_starts_with($message, "'") && str_ends_with($message, "'")) { - $message = substr($message, 1, -1); - } - - ChromaException::throwSpecific($message, $error_type, $e->getCode()); - } - } - - // If the structure is 'detail' => 'Collection not found' - if (isset($error['detail'])) { - $message = $error['detail']; - $error_type = ChromaException::inferTypeFromMessage($message); - - - ChromaException::throwSpecific($message, $error_type, $e->getCode()); - } - - // If the structure is {'error': 'Error Type', 'message' : 'Error message'} - if (isset($error['error']) && isset($error['message'])) { - ChromaException::throwSpecific($error['message'], $error['error'], $e->getCode()); - } - - // If the structure is 'error' => 'Collection not found' - if (isset($error['error'])) { - $message = $error['error']; - $error_type = ChromaException::inferTypeFromMessage($message); - - ChromaException::throwSpecific($message, $error_type, $e->getCode()); - } - } - } - - throw new ChromaException($e->getMessage(), $e->getCode()); - } -} diff --git a/src/Generated/Exceptions/ChromaAuthorizationException.php b/src/Generated/Exceptions/ChromaAuthorizationException.php deleted file mode 100644 index c701225..0000000 --- a/src/Generated/Exceptions/ChromaAuthorizationException.php +++ /dev/null @@ -1,11 +0,0 @@ -message}"; - } - - public function toArray(): array - { - return [ - 'loc' => $this->loc, - 'message' => $this->message, - 'type' => $this->type, - ]; - } - -} diff --git a/src/Generated/Models/Collection.php b/src/Generated/Models/Collection.php deleted file mode 100644 index 0c54df2..0000000 --- a/src/Generated/Models/Collection.php +++ /dev/null @@ -1,36 +0,0 @@ - $this->name, - 'id' => $this->id, - 'metadata' => $this->metadata, - ]; - } -} \ No newline at end of file diff --git a/src/Generated/Requests/AddEmbeddingRequest.php b/src/Generated/Requests/AddEmbeddingRequest.php deleted file mode 100644 index 98a3b1e..0000000 --- a/src/Generated/Requests/AddEmbeddingRequest.php +++ /dev/null @@ -1,68 +0,0 @@ -> - */ - public readonly ?array $metadatas, - - /** - * IDs of the items to add. - * - * @var string[] - */ - public readonly array $ids, - - /** - * Optional documents of the items to add. - * - * @var string[] - */ - public readonly ?array $documents, - - public readonly ?array $images, - - ) - { - } - - public static function create(array $data): self - { - return new self( - embeddings: $data['embeddings'] ?? null, - metadatas: $data['metadatas'] ?? null, - ids: $data['ids'], - documents: $data['documents'] ?? null, - images: $data['images'] ?? null, - ); - } - - public function toArray(): array - { - return [ - 'embeddings' => $this->embeddings, - 'metadatas' => $this->metadatas, - 'ids' => $this->ids, - 'documents' => $this->documents, - ]; - } -} \ No newline at end of file diff --git a/src/Generated/Requests/DeleteEmbeddingRequest.php b/src/Generated/Requests/DeleteEmbeddingRequest.php deleted file mode 100644 index 6682f56..0000000 --- a/src/Generated/Requests/DeleteEmbeddingRequest.php +++ /dev/null @@ -1,53 +0,0 @@ - - */ - public readonly ?array $where, - - /** - * Optional query condition to filter items to delete based on document content. - * - * @var array - */ - public readonly ?array $whereDocument, - ) - { - } - - public static function create(array $data): self - { - return new self( - ids: $data['ids'] ?? null, - where: $data['where'] ?? null, - whereDocument: $data['where_document'] ?? null, - ); - } - - public function toArray(): array - { - return [ - 'ids' => $this->ids, - 'where' => $this->where, - 'where_document' => $this->whereDocument, - ]; - } - -} \ No newline at end of file diff --git a/src/Generated/Requests/GetEmbeddingRequest.php b/src/Generated/Requests/GetEmbeddingRequest.php deleted file mode 100644 index 9c7dcd4..0000000 --- a/src/Generated/Requests/GetEmbeddingRequest.php +++ /dev/null @@ -1,85 +0,0 @@ - - */ - public readonly ?array $where= null, - - /** - * Optional where clause to filter items by. - * - * @var array - */ - public readonly ?array $whereDocument= null, - - /** - * Sort items. - */ - public readonly ?string $sort= null, - - /** - * Optional limit on the number of items to get. - */ - public readonly ?int $limit= null, - - /** - * Optional offset on the number of items to get. - */ - public readonly ?int $offset= null, - - /** - * Optional list of items to include in the response. - * - * @var string[] - */ - public readonly ?array $include= null, - ) - { - } - - public static function create(array $data): self - { - return new self( - ids: $data['ids'] ?? null, - where: $data['where'] ?? null, - whereDocument: $data['where_document'] ?? null, - sort: $data['sort'] ?? null, - limit: $data['limit'] ?? null, - offset: $data['offset'] ?? null, - include: $data['include'] ?? null, - ); - } - - public function toArray(): array - { - return [ - 'ids' => $this->ids, - 'where' => $this->where, - 'whereDocument' => $this->whereDocument, - 'sort' => $this->sort, - 'limit' => $this->limit, - 'offset' => $this->offset, - 'include' => $this->include, - ]; - } -} \ No newline at end of file diff --git a/src/Generated/Requests/QueryEmbeddingRequest.php b/src/Generated/Requests/QueryEmbeddingRequest.php deleted file mode 100644 index dd859ef..0000000 --- a/src/Generated/Requests/QueryEmbeddingRequest.php +++ /dev/null @@ -1,68 +0,0 @@ - - */ - public readonly ?array $where, - - /** - * Optional query condition to filter results based on document content. - * - * @var array - */ - public readonly ?array $whereDocument, - - /** - * Optional query condition to filter results based on embedding content. - * - * @var float[][] - */ - public readonly ?array $queryEmbeddings, - - /** - * Optional number of results to return. Defaults to 10. - */ - public readonly ?int $nResults, - - /** - * Optional list of items to include in the response. - * - * @var string[] - */ - public readonly ?array $include, - ) - { - } - - public static function create(array $data): self - { - return new self( - where: $data['where'] ?? null, - whereDocument: $data['where_document'] ?? null, - queryEmbeddings: $data['query_embeddings'] ?? null, - nResults: $data['n_results'] ?? null, - include: $data['include'] ?? null, - ); - } - - public function toArray(): array - { - return array_filter([ - 'where' => $this->where, - 'where_document' => $this->whereDocument, - 'query_embeddings' => $this->queryEmbeddings, - 'n_results' => $this->nResults, - 'include' => $this->include, - ], fn($value) => $value !== null); - } -} \ No newline at end of file diff --git a/src/Generated/Requests/UpdateCollectionRequest.php b/src/Generated/Requests/UpdateCollectionRequest.php deleted file mode 100644 index 73a8af6..0000000 --- a/src/Generated/Requests/UpdateCollectionRequest.php +++ /dev/null @@ -1,42 +0,0 @@ - - */ - public readonly ?array $newMetadata, - - ) - { - } - - public static function create(array $data): self - { - return new self( - newName: $data['new_name'] ?? null, - newMetadata: $data['new_metadata'] ?? null, - ); - } - - public function toArray(): array - { - return array_filter([ - 'new_name' => $this->newName, - 'new_metadata' => $this->newMetadata, - ]); - } -} \ No newline at end of file diff --git a/src/Generated/Requests/UpdateEmbeddingRequest.php b/src/Generated/Requests/UpdateEmbeddingRequest.php deleted file mode 100644 index 144fbfa..0000000 --- a/src/Generated/Requests/UpdateEmbeddingRequest.php +++ /dev/null @@ -1,70 +0,0 @@ -[] - */ - public readonly ?array $metadatas, - - /** - * Optional documents of the items to update. - * - * @var string[] - */ - public readonly ?array $documents, - - /** - * Optional uris of the items to update. - * - * @var string[] - */ - public readonly ?array $images, - ) - { - } - - public static function create(array $data): self - { - return new self( - embeddings: $data['embeddings'] ?? null, - ids: $data['ids'], - metadatas: $data['metadatas'] ?? null, - documents: $data['documents'] ?? null, - images: $data['images'] ?? null, - ); - } - - public function toArray(): array - { - return array_filter([ - 'embeddings' => $this->embeddings, - 'ids' => $this->ids, - 'metadatas' => $this->metadatas, - 'documents' => $this->documents, - ]); - } -} \ No newline at end of file diff --git a/src/Generated/Responses/QueryItemsResponse.php b/src/Generated/Responses/QueryItemsResponse.php deleted file mode 100644 index 3142d16..0000000 --- a/src/Generated/Responses/QueryItemsResponse.php +++ /dev/null @@ -1,93 +0,0 @@ -[][] - */ - public readonly ?array $metadatas, - - /** - * List of documents of the items. - * - * @var string[][] - */ - public readonly ?array $documents, - - /** - * List of data of the items. - * - * @var string[][] - */ - public readonly ?array $data, - - /** - * List of uris of the items. - * - * @var string[][] - */ - public readonly ?array $uris, - - /** - * List of distances of the items. - * - * @var float[][] - */ - public readonly ?array $distances, - ) - { - } - - public static function from(array $data): self - { - return new self( - ids: $data['ids'], - embeddings: $data['embeddings'] ?? null, - metadatas: $data['metadatas'] ?? null, - documents: $data['documents'] ?? null, - data: $data['data'] ?? null, - uris: $data['uris'] ?? null, - distances: $data['distances'] ?? null, - ); - } - - public function toArray(): array - { - return array_filter([ - 'ids' => $this->ids, - 'embeddings' => $this->embeddings, - 'metadatas' => $this->metadatas, - 'documents' => $this->documents, - 'data' => $this->data, - 'uris' => $this->uris, - 'distances' => $this->distances, - ]); - } - -} \ No newline at end of file diff --git a/src/Models/Collection.php b/src/Models/Collection.php new file mode 100644 index 0000000..e5b64ef --- /dev/null +++ b/src/Models/Collection.php @@ -0,0 +1,535 @@ + $this->name, + 'id' => $this->id, + 'metadata' => $this->metadata, + ]; + } + + /** + * Add items to the collection. + * + * @param string[]|Record[] $ids The IDs of the items to add, or an array of Record objects. + * @param number[][]|null $embeddings The embeddings of the items to add (optional). + * @param array>|null $metadatas The metadatas of the items to add (optional). + * @param string[]|null $documents The documents of the items to add (optional). + * @return void + */ + public function add( + array $ids, + ?array $embeddings = null, + ?array $metadatas = null, + ?array $documents = null, + ): void { + if (!empty($ids) && $ids[0] instanceof Record) { + $records = $ids; + $ids = []; + $embeddings = []; + $metadatas = []; + $documents = []; + + foreach ($records as $record) { + $ids[] = $record->id; + $embeddings[] = $record->embedding; + $metadatas[] = $record->metadata; + $documents[] = $record->document; + } + } + + $preparedEmbeddings = $this->prepareEmbeddings($embeddings, $documents); + + $validated = $this->validate( + ids: $ids, + embeddings: $preparedEmbeddings, + metadatas: $metadatas, + documents: $documents, + requireEmbeddingsOrDocuments: true, + ); + + $request = new AddItemsRequest( + embeddings: $validated['embeddings'], + metadatas: $validated['metadatas'], + ids: $validated['ids'], + documents: $validated['documents'], + ); + + $this->api->addCollectionItems($this->id, $this->database, $this->tenant, $request); + } + + /** + * Update the embeddings, documents, and/or metadatas of existing items. + * + * @param string[]|Record[] $ids The IDs of the items to update, or an array of Record objects. + * @param number[][]|null $embeddings The embeddings of the items to update (optional). + * @param array>|null $metadatas The metadatas of the items to update (optional). + * @param string[]|null $documents The documents of the items to update (optional). + * + */ + public function update( + array $ids, + ?array $embeddings = null, + ?array $metadatas = null, + ?array $documents = null, + ) { + if (!empty($ids) && $ids[0] instanceof Record) { + $records = $ids; + $ids = []; + $embeddings = []; + $metadatas = []; + $documents = []; + + foreach ($records as $record) { + $ids[] = $record->id; + $embeddings[] = $record->embedding; + $metadatas[] = $record->metadata; + $documents[] = $record->document; + } + } + + $preparedEmbeddings = $this->prepareEmbeddings($embeddings, $documents); + + $validated = $this->validate( + ids: $ids, + embeddings: $preparedEmbeddings, + metadatas: $metadatas, + documents: $documents, + requireEmbeddingsOrDocuments: false, + ); + + $request = new UpdateItemsRequest( + embeddings: $validated['embeddings'], + ids: $validated['ids'], + metadatas: $validated['metadatas'], + documents: $validated['documents'], + ); + + $this->api->updateCollectionItems($this->id, $this->database, $this->tenant, $request); + } + + /** + * Upsert items in the collection. + * + * @param string[]|Record[] $ids The IDs of the items to upsert, or an array of Record objects. + * @param number[][]|null $embeddings The embeddings of the items to upsert (optional). + * @param array>|null $metadatas The metadatas of the items to upsert (optional). + * @param string[]|null $documents The documents of the items to upsert (optional). + * + */ + public function upsert( + array $ids, + ?array $embeddings = null, + ?array $metadatas = null, + ?array $documents = null, + ): void { + if (!empty($ids) && $ids[0] instanceof Record) { + $records = $ids; + $ids = []; + $embeddings = []; + $metadatas = []; + $documents = []; + + foreach ($records as $record) { + $ids[] = $record->id; + $embeddings[] = $record->embedding; + $metadatas[] = $record->metadata; + $documents[] = $record->document; + } + } + + $preparedEmbeddings = $this->prepareEmbeddings($embeddings, $documents); + + $validated = $this->validate( + ids: $ids, + embeddings: $preparedEmbeddings, + metadatas: $metadatas, + documents: $documents, + requireEmbeddingsOrDocuments: true, + ); + + $request = new AddItemsRequest( + embeddings: $validated['embeddings'], + metadatas: $validated['metadatas'], + ids: $validated['ids'], + documents: $validated['documents'], + ); + + $this->api->upsertCollectionItems($this->id, $this->database, $this->tenant, $request); + } + + /** + * Count the number of items in the collection. + */ + public function count(): int + { + return $this->api->countCollectionItems($this->id, $this->database, $this->tenant); + } + + /** + * Get items from the collection. + * + * @param array|null $ids The IDs of the items to get (optional). + * @param array|null $where The where clause to filter items by (optional). + * @param array|null $whereDocument The where clause to filter items by (optional). + * @param int|null $limit The limit on the number of items to get (optional). + * @param int|null $offset The offset on the number of items to get (optional). + * @param string[]|Includes[]|null $include The list of fields to include in the response (optional). + */ + public function get( + ?array $ids = null, + ?array $where = null, + ?array $whereDocument = null, + ?int $limit = null, + ?int $offset = null, + ?array $include = null + ): GetItemsResponse { + $include ??= ['embeddings', 'metadatas', 'distances']; + + $include = array_map(fn($i) => $i instanceof Includes ? $i->value : $i, $include); + + $request = new GetEmbeddingRequest( + ids: $ids, + where: $where, + whereDocument: $whereDocument, + limit: $limit, + offset: $offset, + include: $include, + ); + + return $this->api->getCollectionItems($this->id, $this->database, $this->tenant, $request); + } + + /** + * Retrieves a preview of records from the collection. + * + * @param int $limit The number of entries to return. Defaults to 10. + * @param string[]|Includes[]|null $include The list of fields to include in the response (optional). + */ + public function peek(int $limit = 10, ?array $include = null): GetItemsResponse + { + $include ??= ['embeddings', 'metadatas', 'distances']; + + $include = array_map(fn($i) => $i instanceof Includes ? $i->value : $i, $include); + + $request = new GetEmbeddingRequest( + limit: $limit, + include: $include, + ); + + return $this->api->getCollectionItems($this->id, $this->database, $this->tenant, $request); + } + + /** + * Deletes items from the collection. + * + * @param ?array $ids The IDs of the items to delete. + * @param ?array $where The where clause to filter items to delete based on metadata values (optional). + * @param ?array $whereDocument The where clause to filter to delete based on document content (optional). + */ + public function delete(?array $ids = null, ?array $where = null, ?array $whereDocument = null): void + { + $request = new DeleteItemsRequest( + ids: $ids, + where: $where, + whereDocument: $whereDocument, + ); + + $this->api->deleteCollectionItems($this->id, $this->database, $this->tenant, $request); + } + + /** + * Performs similarity search on the collection. + * + * @param number[][]|null $queryEmbeddings The embeddings of the query (optional). + * @param string[]|null $queryTexts The texts of the query (optional). + * @param int $nResults The number of results to return (optional). + * @param ?array $where The where clause to filter items to search based on metadata values (optional). + * @param ?array $whereDocument The where clause to filter to search based on document content (optional). + * @param string[]|Includes[]|null $include The list of fields to include in the response (optional). + */ + public function query( + ?array $queryEmbeddings = null, + ?array $queryTexts = null, + int $nResults = 10, + ?array $where = null, + ?array $whereDocument = null, + ?array $include = null + ): QueryItemsResponse { + $include ??= ['embeddings', 'metadatas', 'distances']; + + $include = array_map(fn($i) => $i instanceof Includes ? $i->value : $i, $include); + + if ($nResults <= 0) { + throw new InvalidArgumentException('Expected nResults to be a positive integer'); + } + + if ( + !(($queryEmbeddings != null xor $queryTexts != null)) + ) { + throw new InvalidArgumentException( + 'You must provide only one of queryEmbeddings or queryTexts' + ); + } + + $finalEmbeddings = $this->prepareEmbeddings($queryEmbeddings, $queryTexts); + + if ($finalEmbeddings !== null) { + foreach ($finalEmbeddings as $i => $embedding) { + if (!is_array($embedding)) { + throw new InvalidArgumentException(sprintf( + "Expected query embedding at index %d to be an array, got %s", + $i, + gettype($embedding) + )); + } + + foreach ($embedding as $j => $value) { + if (!is_float($value) && !is_int($value)) { + throw new InvalidArgumentException(sprintf( + "Expected query embedding value at index %d.%d to be a float, got %s", + $i, + $j, + gettype($value) + )); + } + } + } + } + + $request = new QueryItemsRequest( + where: $where, + whereDocument: $whereDocument, + queryEmbeddings: $finalEmbeddings, + nResults: $nResults, + include: $include, + ); + + return $this->api->queryCollectionItems($this->id, $this->database, $this->tenant, $request); + } + + /** + * Modify the collection name or metadata. + */ + public function modify(string $name, array $metadata): void + { + $request = new UpdateCollectionRequest($name, $metadata); + + $this->api->updateCollection($this->id, $this->database, $this->tenant, $request); + } + + public function setEmbeddingFunction(EmbeddingFunction $embeddingFunction): void + { + $this->embeddingFunction = $embeddingFunction; + } + + /** + * Prepares embeddings by generating missing ones in batch. + * + * @param array|null $embeddings Existing embeddings (may contain nulls for missing ones) + * @param array|null $texts Texts to generate embeddings from (documents or queryTexts) + * @return array|null Prepared embeddings array with all nulls filled in, or null if texts is null + */ + protected function prepareEmbeddings(?array $embeddings, ?array $texts): ?array + { + if ($texts === null) { + return $embeddings; + } + + if (empty($texts)) { + return $embeddings; + } + + if ($embeddings === null || empty($embeddings)) { + return $this->embeddingFunction->generate($texts); + } + + $missingIndices = []; + $textsToEmbed = []; + + foreach ($embeddings as $i => $embedding) { + if ($embedding === null) { + if (!isset($texts[$i]) || $texts[$i] === null) { + throw new InvalidArgumentException(sprintf('Cannot generate embedding at index %d: no text provided', $i)); + } + $missingIndices[] = $i; + $textsToEmbed[] = $texts[$i]; + } + } + + if (empty($missingIndices)) { + return $embeddings; + } + + $generatedEmbeddings = $this->embeddingFunction->generate($textsToEmbed); + + $finalEmbeddings = []; + $generatedIndex = 0; + + foreach ($embeddings as $i => $embedding) { + if ($embedding === null) { + $finalEmbeddings[] = $generatedEmbeddings[$generatedIndex++]; + } else { + $finalEmbeddings[] = $embedding; + } + } + + return $finalEmbeddings; + } + + /** + * Validates the inputs to the add, upsert, and update methods. + * + * @return array{ + * ids: string[], + * embeddings: int[][], + * metadatas: array[], + * documents: string[] + * } + */ + protected function validate( + array $ids, + ?array $embeddings, + ?array $metadatas, + ?array $documents, + bool $requireEmbeddingsOrDocuments + ): array { + + if ($requireEmbeddingsOrDocuments) { + if ($embeddings === null && $documents === null) { + throw new InvalidArgumentException( + 'You must provide embeddings or documents' + ); + } + } + + if ( + $embeddings != null && count($embeddings) != count($ids) + || $metadatas != null && count($metadatas) != count($ids) + || $documents != null && count($documents) != count($ids) + ) { + throw new InvalidArgumentException( + 'The number of ids, embeddings, metadatas, and documents must be the same' + ); + } + + // Validate metadatas + if ($metadatas !== null) { + foreach ($metadatas as $i => $metadata) { + if ($metadata !== null && !is_array($metadata)) { + throw new InvalidArgumentException(sprintf( + "Expected metadata at index %d to be an array, got %s", + $i, + gettype($metadata) + )); + } + } + } + + // Validate embeddings + if ($embeddings !== null) { + foreach ($embeddings as $i => $embedding) { + if (!is_array($embedding)) { + throw new InvalidArgumentException(sprintf( + "Expected embedding at index %d to be an array, got %s", + $i, + gettype($embedding) + )); + } + + foreach ($embedding as $j => $value) { + if (!is_float($value) && !is_int($value)) { + throw new InvalidArgumentException(sprintf( + "Expected embedding value at index %d.%d to be a number, got %s", + $i, + $j, + gettype($value) + )); + } + } + } + } + + // Validate ids + $ids = array_map(function ($id) { + if (is_object($id) && method_exists($id, '__toString')) { + $id = (string) $id; + } + if (!is_string($id)) { + throw new InvalidArgumentException('Expected IDs to be strings, got ' . gettype($id)); + } + if ($id === '') { + throw new InvalidArgumentException('Expected IDs to be an array of non-empty strings'); + } + return $id; + }, $ids); + + // Validate unique ids + $uniqueIds = array_unique($ids); + if (count($uniqueIds) !== count($ids)) { + $duplicateIds = array_filter($ids, function ($id) use ($ids) { + return count(array_keys($ids, $id)) > 1; + }); + throw new InvalidArgumentException('Expected IDs to be unique, found duplicates for: ' . implode(', ', array_unique($duplicateIds))); + } + + return [ + 'ids' => $ids, + 'embeddings' => $embeddings, + 'metadatas' => $metadatas, + 'documents' => $documents, + ]; + } +} diff --git a/src/Generated/Models/Database.php b/src/Models/Database.php similarity index 85% rename from src/Generated/Models/Database.php rename to src/Models/Database.php index 3ae8620..16a4391 100644 --- a/src/Generated/Models/Database.php +++ b/src/Models/Database.php @@ -3,7 +3,7 @@ declare(strict_types=1); -namespace Codewithkyrian\ChromaDB\Generated\Models; +namespace Codewithkyrian\ChromaDB\Models; class Database { @@ -22,11 +22,10 @@ public function __construct( * Tenant of the database. */ public readonly ?string $tenant, - ) - { + ) { } - public static function make(array $data): self + public static function fromArray(array $data): self { return new self( id: $data['id'], @@ -43,5 +42,4 @@ public function toArray(): array 'tenant' => $this->tenant, ]; } - -} \ No newline at end of file +} diff --git a/src/Generated/Models/Tenant.php b/src/Models/Tenant.php similarity index 77% rename from src/Generated/Models/Tenant.php rename to src/Models/Tenant.php index 1755bb3..457ca8a 100644 --- a/src/Generated/Models/Tenant.php +++ b/src/Models/Tenant.php @@ -3,7 +3,7 @@ declare(strict_types=1); -namespace Codewithkyrian\ChromaDB\Generated\Models; +namespace Codewithkyrian\ChromaDB\Models; class Tenant { @@ -14,11 +14,10 @@ public function __construct( * @var string */ public readonly string $name, - ) - { + ) { } - public static function make(array $data): self + public static function fromArray(array $data): self { return new self( name: $data['name'], @@ -31,4 +30,4 @@ public function toArray(): array 'name' => $this->name, ]; } -} \ No newline at end of file +} diff --git a/src/Query/Where.php b/src/Query/Where.php new file mode 100644 index 0000000..15b8108 --- /dev/null +++ b/src/Query/Where.php @@ -0,0 +1,40 @@ + $conditions]; + } + + /** + * Combine multiple conditions with logical OR. + */ + public static function any(array ...$conditions): array + { + return ['$or' => $conditions]; + } +} diff --git a/src/Query/WhereDocument.php b/src/Query/WhereDocument.php new file mode 100644 index 0000000..3374e29 --- /dev/null +++ b/src/Query/WhereDocument.php @@ -0,0 +1,38 @@ + $value]; + } + + public function notContains(string $value): array + { + return ['$not_contains' => $value]; + } + + public function matches(string $value): array + { + return ['$regex' => $value]; + } + + public function notMatches(string $value): array + { + return ['$not_regex' => $value]; + } + + public function regex(string $value): array + { + return ['$regex' => $value]; + } + + public function notRegex(string $value): array + { + return ['$not_regex' => $value]; + } +} diff --git a/src/Query/WhereField.php b/src/Query/WhereField.php new file mode 100644 index 0000000..a018996 --- /dev/null +++ b/src/Query/WhereField.php @@ -0,0 +1,52 @@ +field => ['$eq' => $value]]; + } + + public function ne(string|int|float|bool $value): array + { + return [$this->field => ['$ne' => $value]]; + } + + public function gt(int|float $value): array + { + return [$this->field => ['$gt' => $value]]; + } + + public function gte(int|float $value): array + { + return [$this->field => ['$gte' => $value]]; + } + + public function lt(int|float $value): array + { + return [$this->field => ['$lt' => $value]]; + } + + public function lte(int|float $value): array + { + return [$this->field => ['$lte' => $value]]; + } + + public function in(array $values): array + { + return [$this->field => ['$in' => $values]]; + } + + public function notIn(array $values): array + { + return [$this->field => ['$nin' => $values]]; + } +} diff --git a/src/Requests/AddItemsRequest.php b/src/Requests/AddItemsRequest.php new file mode 100644 index 0000000..cd61599 --- /dev/null +++ b/src/Requests/AddItemsRequest.php @@ -0,0 +1,45 @@ +> $metadatas Optional metadatas of the items to add. + * @param string[] $ids IDs of the items to add. + * @param string[] $documents Optional documents of the items to add. + */ + public function __construct( + public readonly array $ids, + public readonly ?array $embeddings = null, + public readonly ?array $metadatas = null, + public readonly ?array $documents = null, + ) {} + + public static function fromArray(array $data): self + { + return new self( + embeddings: $data['embeddings'] ?? null, + metadatas: $data['metadatas'] ?? null, + ids: $data['ids'], + documents: $data['documents'] ?? null, + ); + } + + public function toArray(): array + { + return array_filter([ + 'embeddings' => $this->embeddings, + 'metadatas' => $this->metadatas, + 'ids' => $this->ids, + 'documents' => $this->documents, + ], fn($value) => $value !== null); + } +} diff --git a/src/Generated/Requests/CreateCollectionRequest.php b/src/Requests/CreateCollectionRequest.php similarity index 63% rename from src/Generated/Requests/CreateCollectionRequest.php rename to src/Requests/CreateCollectionRequest.php index c21d946..8f216b5 100644 --- a/src/Generated/Requests/CreateCollectionRequest.php +++ b/src/Requests/CreateCollectionRequest.php @@ -3,35 +3,25 @@ declare(strict_types=1); -namespace Codewithkyrian\ChromaDB\Generated\Requests; +namespace Codewithkyrian\ChromaDB\Requests; /** * Request model for creating a collection. */ class CreateCollectionRequest { + /** + * @param string $name The name of the collection + * @param array $metadata The metadata of the collection + * @param bool $getOrCreate If true, will return existing collection if it exists, otherwise will throw an exception. + */ public function __construct( - /** - * The name of the collection - */ public readonly string $name, - - /** - * The metadata of the collection - * - * @var array - */ public readonly ?array $metadata, - - /** - * If true, will return existing collection if it exists. - */ public readonly bool $getOrCreate = false, - ) - { - } + ) {} - public static function create(array $data): self + public static function fromArray(array $data): self { return new self( name: $data['name'], @@ -48,4 +38,4 @@ public function toArray(): array 'get_or_create' => $this->getOrCreate, ]; } -} \ No newline at end of file +} diff --git a/src/Generated/Requests/CreateDatabaseRequest.php b/src/Requests/CreateDatabaseRequest.php similarity index 64% rename from src/Generated/Requests/CreateDatabaseRequest.php rename to src/Requests/CreateDatabaseRequest.php index a42f67c..f6de833 100644 --- a/src/Generated/Requests/CreateDatabaseRequest.php +++ b/src/Requests/CreateDatabaseRequest.php @@ -3,17 +3,18 @@ declare(strict_types=1); -namespace Codewithkyrian\ChromaDB\Generated\Requests; +namespace Codewithkyrian\ChromaDB\Requests; class CreateDatabaseRequest { + /** + * @param string $name The name of the database + */ public function __construct( public readonly string $name, - ) - { - } + ) {} - public static function create(array $data): self + public static function fromArray(array $data): self { return new self( name: $data['name'], @@ -26,4 +27,4 @@ public function toArray(): array 'name' => $this->name, ]; } -} \ No newline at end of file +} diff --git a/src/Generated/Requests/CreateTenantRequest.php b/src/Requests/CreateTenantRequest.php similarity index 65% rename from src/Generated/Requests/CreateTenantRequest.php rename to src/Requests/CreateTenantRequest.php index 689a9d1..b827bbf 100644 --- a/src/Generated/Requests/CreateTenantRequest.php +++ b/src/Requests/CreateTenantRequest.php @@ -3,17 +3,18 @@ declare(strict_types=1); -namespace Codewithkyrian\ChromaDB\Generated\Requests; +namespace Codewithkyrian\ChromaDB\Requests; class CreateTenantRequest { + /** + * @param string $name The name of the tenant + */ public function __construct( public readonly string $name, - ) - { - } + ) {} - public static function create(array $data): self + public static function fromArray(array $data): self { return new self( name: $data['name'], @@ -26,4 +27,4 @@ public function toArray(): array 'name' => $this->name, ]; } -} \ No newline at end of file +} diff --git a/src/Requests/DeleteItemsRequest.php b/src/Requests/DeleteItemsRequest.php new file mode 100644 index 0000000..06c734d --- /dev/null +++ b/src/Requests/DeleteItemsRequest.php @@ -0,0 +1,38 @@ + $where Optional query condition to filter items to delete based on metadata values. + * @param array $whereDocument Optional query condition to filter items to delete based on document content. + */ + public function __construct( + public readonly ?array $ids = null, + public readonly ?array $where = null, + public readonly ?array $whereDocument = null, + ) {} + + public static function fromArray(array $data): self + { + return new self( + ids: $data['ids'] ?? null, + where: $data['where'] ?? null, + whereDocument: $data['where_document'] ?? null, + ); + } + + public function toArray(): array + { + return array_filter([ + 'ids' => $this->ids, + 'where' => $this->where, + 'where_document' => $this->whereDocument, + ], fn($value) => $value !== null); + } +} diff --git a/src/Requests/ForkCollectionRequest.php b/src/Requests/ForkCollectionRequest.php new file mode 100644 index 0000000..29a4540 --- /dev/null +++ b/src/Requests/ForkCollectionRequest.php @@ -0,0 +1,33 @@ + $this->newName, + ]; + } +} diff --git a/src/Requests/GetEmbeddingRequest.php b/src/Requests/GetEmbeddingRequest.php new file mode 100644 index 0000000..03d4e97 --- /dev/null +++ b/src/Requests/GetEmbeddingRequest.php @@ -0,0 +1,58 @@ + $where Optional where clause to filter items by. + * @param array $whereDocument Optional where clause to filter items by. + * @param string $sort Optional sort items. + * @param int $limit Optional limit on the number of items to get. + * @param int $offset Optional offset on the number of items to get. + * @param string[] $include Optional list of items to include in the response. + */ + public function __construct( + public readonly ?array $ids = null, + public readonly ?array $where = null, + public readonly ?array $whereDocument = null, + public readonly ?string $sort = null, + public readonly ?int $limit = null, + public readonly ?int $offset = null, + public readonly ?array $include = null, + ) { + } + + public static function fromArray(array $data): self + { + return new self( + ids: $data['ids'] ?? null, + where: $data['where'] ?? null, + whereDocument: $data['where_document'] ?? null, + sort: $data['sort'] ?? null, + limit: $data['limit'] ?? null, + offset: $data['offset'] ?? null, + include: $data['include'] ?? null, + ); + } + + public function toArray(): array + { + return array_filter([ + 'ids' => $this->ids, + 'where' => $this->where, + 'where_document' => $this->whereDocument, + 'sort' => $this->sort, + 'limit' => $this->limit, + 'offset' => $this->offset, + 'include' => $this->include, + ], fn($value) => $value !== null); + } +} diff --git a/src/Requests/QueryItemsRequest.php b/src/Requests/QueryItemsRequest.php new file mode 100644 index 0000000..0cb8fe3 --- /dev/null +++ b/src/Requests/QueryItemsRequest.php @@ -0,0 +1,46 @@ + $where Optional query condition to filter results based on metadata values. + * @param array $whereDocument Optional query condition to filter results based on document content. + * @param float[][] $queryEmbeddings Optional query condition to filter results based on embedding content. + * @param int $nResults Optional number of results to return. Defaults to 10. + * @param string[] $include Optional list of items to include in the response. + */ + public function __construct( + public readonly ?array $where = null, + public readonly ?array $whereDocument = null, + public readonly ?array $queryEmbeddings = null, + public readonly ?int $nResults = null, + public readonly ?array $include = null, + ) {} + + public static function fromArray(array $data): self + { + return new self( + where: $data['where'] ?? null, + whereDocument: $data['where_document'] ?? null, + queryEmbeddings: $data['query_embeddings'] ?? null, + nResults: $data['n_results'] ?? null, + include: $data['include'] ?? null, + ); + } + + public function toArray(): array + { + return array_filter([ + 'where' => $this->where, + 'where_document' => $this->whereDocument, + 'query_embeddings' => $this->queryEmbeddings, + 'n_results' => $this->nResults, + 'include' => $this->include, + ], fn($value) => $value !== null); + } +} diff --git a/src/Requests/UpdateCollectionRequest.php b/src/Requests/UpdateCollectionRequest.php new file mode 100644 index 0000000..0e8d347 --- /dev/null +++ b/src/Requests/UpdateCollectionRequest.php @@ -0,0 +1,34 @@ +|null $metadata New metadata of the collection. + */ + public function __construct( + public readonly ?string $name, + public readonly ?array $metadata, + ) {} + + public static function fromArray(array $data): self + { + return new self( + name: $data['new_name'] ?? null, + metadata: $data['new_metadata'] ?? null, + ); + } + + public function toArray(): array + { + return array_filter([ + 'new_name' => $this->name, + 'new_metadata' => $this->metadata, + ]); + } +} diff --git a/src/Generated/Responses/GetItemsResponse.php b/src/Requests/UpdateItemsRequest.php similarity index 50% rename from src/Generated/Responses/GetItemsResponse.php rename to src/Requests/UpdateItemsRequest.php index e9a4de6..c5b06bc 100644 --- a/src/Generated/Responses/GetItemsResponse.php +++ b/src/Requests/UpdateItemsRequest.php @@ -3,51 +3,29 @@ declare(strict_types=1); -namespace Codewithkyrian\ChromaDB\Generated\Responses; +namespace Codewithkyrian\ChromaDB\Requests; -/** - * Response model for getting items from collection. - */ -class GetItemsResponse +class UpdateItemsRequest { + /** + * @param float[][] $embeddings Optional embeddings of the items to update. + * @param string[] $ids IDs of the items to update. + * @param array $metadatas Optional metadatas of the items to update. + * @param string[] $documents Optional documents of the items to update. + */ public function __construct( - /** - * List of ids of the items. - * - * @var string[] - */ - public readonly array $ids, - - /** - * List of metadata of the items. - * - * @var array[] - */ - public readonly ?array $metadatas, - - /** - * List of embeddings of the items. - * - * @var float[][] - */ public readonly ?array $embeddings, - - /** - * List of documents of the items. - * - * @var string[] - */ + public readonly array $ids, + public readonly ?array $metadatas, public readonly ?array $documents, - ) - { - } + ) {} - public static function from(array $data): self + public static function fromArray(array $data): self { return new self( + embeddings: $data['embeddings'] ?? null, ids: $data['ids'], metadatas: $data['metadatas'] ?? null, - embeddings: $data['embeddings'] ?? null, documents: $data['documents'] ?? null, ); } @@ -55,10 +33,10 @@ public static function from(array $data): self public function toArray(): array { return array_filter([ + 'embeddings' => $this->embeddings, 'ids' => $this->ids, 'metadatas' => $this->metadatas, - 'embeddings' => $this->embeddings, 'documents' => $this->documents, ]); } -} \ No newline at end of file +} diff --git a/src/Requests/UpdateTenantRequest.php b/src/Requests/UpdateTenantRequest.php new file mode 100644 index 0000000..7406513 --- /dev/null +++ b/src/Requests/UpdateTenantRequest.php @@ -0,0 +1,27 @@ + $this->name, + ]; + } +} diff --git a/src/Resources/CollectionResource.php b/src/Resources/CollectionResource.php deleted file mode 100644 index b79c3cc..0000000 --- a/src/Resources/CollectionResource.php +++ /dev/null @@ -1,409 +0,0 @@ -name, - id: $collection->id, - metadata: $collection->metadata, - database: $database, - tenant: $tenant, - embeddingFunction: $embeddingFunction, - apiClient: $apiClient, - ); - } - - /** - * Add items to the collection. - * - * @param array $ids The IDs of the items to add. - * @param ?array $embeddings The embeddings of the items to add (optional). - * @param ?array $metadatas The metadatas of the items to add (optional). - * @param ?array $documents The documents of the items to add (optional). - * @param ?array $images The base64 encoded images of the items to add (optional). - * @return void - */ - public function add( - array $ids, - ?array $embeddings = null, - ?array $metadatas = null, - ?array $documents = null, - ?array $images = null - ): void { - $validated = $this->validate( - ids: $ids, - embeddings: $embeddings, - metadatas: $metadatas, - documents: $documents, - images: $images, - requireEmbeddingsOrDocuments: true, - ); - - - $request = new AddEmbeddingRequest( - embeddings: $validated['embeddings'], - metadatas: $validated['metadatas'], - ids: $validated['ids'], - documents: $validated['documents'], - images: $validated['images'], - ); - - - $this->apiClient->add($this->id, $this->database, $this->tenant, $request); - } - - - /** - * Update the embeddings, documents, and/or metadatas of existing items. - * - * @param array $ids The IDs of the items to update. - * @param ?array $embeddings The embeddings of the items to update (optional). - * @param ?array $metadatas The metadatas of the items to update (optional). - * @param ?array $documents The documents of the items to update (optional). - * @param ?array $images The base64 encoded images of the items to update (optional). - * - */ - public function update( - array $ids, - ?array $embeddings = null, - ?array $metadatas = null, - ?array $documents = null, - ?array $images = null - ) { - $validated = $this->validate( - ids: $ids, - embeddings: $embeddings, - metadatas: $metadatas, - documents: $documents, - images: $images, - requireEmbeddingsOrDocuments: false, - ); - - $request = new UpdateEmbeddingRequest( - embeddings: $validated['embeddings'], - ids: $validated['ids'], - metadatas: $validated['metadatas'], - documents: $validated['documents'], - images: $validated['images'], - ); - - $this->apiClient->update($this->id, $this->database, $this->tenant, $request); - } - - /** - * Upsert items in the collection. - * - * @param array $ids The IDs of the items to upsert. - * @param ?array $embeddings The embeddings of the items to upsert (optional). - * @param ?array $metadatas The metadatas of the items to upsert (optional). - * @param ?array $documents The documents of the items to upsert (optional). - * @param ?array $images The base64 encoded images of the items to upsert (optional). - * - */ - public function upsert( - array $ids, - ?array $embeddings = null, - ?array $metadatas = null, - ?array $documents = null, - ?array $images = null - ): void { - $validated = $this->validate( - ids: $ids, - embeddings: $embeddings, - metadatas: $metadatas, - documents: $documents, - images: $images, - requireEmbeddingsOrDocuments: true, - ); - - $request = new AddEmbeddingRequest( - embeddings: $validated['embeddings'], - metadatas: $validated['metadatas'], - ids: $validated['ids'], - documents: $validated['documents'], - images: $validated['images'], - ); - - $this->apiClient->upsert($this->id, $this->database, $this->tenant, $request); - } - - /** - * Count the number of items in the collection. - */ - public function count(): int - { - return $this->apiClient->count($this->id, $this->database, $this->tenant); - } - - /** - * Returns the first `$limit` entries of the collection. - * - * @param int $limit The number of entries to return. Defaults to 10. - * @param string[] $include The list of fields to include in the response (optional). - */ - public function peek( - int $limit = 10, - ?array $include = null - ): GetItemsResponse { - $include ??= ['embeddings', 'metadatas', 'distances']; - - $request = new GetEmbeddingRequest( - limit: $limit, - include: $include, - ); - - return $this->apiClient->get($this->id, $this->database, $this->tenant, $request); - } - - /** - * Get items from the collection. - * - * @param array $ids The IDs of the items to get (optional). - * @param array $where The where clause to filter items by (optional). - * @param array $whereDocument The where clause to filter items by (optional). - * @param int $limit The limit on the number of items to get (optional). - * @param int $offset The offset on the number of items to get (optional). - * @param string[] $include The list of fields to include in the response (optional). - */ - public function get( - ?array $ids = null, - ?array $where = null, - ?array $whereDocument = null, - ?int $limit = null, - ?int $offset = null, - ?array $include = null - ): GetItemsResponse { - $include ??= ['embeddings', 'metadatas', 'distances']; - - $request = new GetEmbeddingRequest( - ids: $ids, - where: $where, - whereDocument: $whereDocument, - limit: $limit, - offset: $offset, - include: $include, - ); - - return $this->apiClient->get($this->id, $this->database, $this->tenant, $request); - } - - /** - * Deletes items from the collection. - * - * @param ?array $ids The IDs of the items to delete. - * @param ?array $where The where clause to filter items to delete based on metadata values (optional). - * @param ?array $whereDocument The where clause to filter to delete based on document content (optional). - */ - public function delete(?array $ids = null, ?array $where = null, ?array $whereDocument = null): void - { - $request = new DeleteEmbeddingRequest( - ids: $ids, - where: $where, - whereDocument: $whereDocument, - ); - - $this->apiClient->delete($this->id, $this->database, $this->tenant, $request); - } - - /** - * Performs a query on the collection using the specified parameters. - * - * - */ - public function query( - ?array $queryEmbeddings = null, - ?array $queryTexts = null, - ?array $queryImages = null, - int $nResults = 10, - ?array $where = null, - ?array $whereDocument = null, - ?array $include = null - ): QueryItemsResponse { - $include ??= ['embeddings', 'metadatas', 'distances']; - - if ( - !(($queryEmbeddings != null xor $queryTexts != null xor $queryImages != null)) - ) { - throw new \InvalidArgumentException( - 'You must provide only one of queryEmbeddings, queryTexts, queryImages, or queryUris' - ); - } - - $finalEmbeddings = []; - - if ($queryEmbeddings == null) { - if ($this->embeddingFunction == null) { - throw new \InvalidArgumentException( - 'You must provide an embedding function if you did not provide embeddings' - ); - } elseif ($queryTexts != null) { - $finalEmbeddings = $this->embeddingFunction->generate($queryTexts); - } elseif ($queryImages != null) { - $finalEmbeddings = $this->embeddingFunction->generate($queryImages); - } else { - throw new \InvalidArgumentException( - 'If you did not provide embeddings, you must provide documents or images' - ); - } - } else { - $finalEmbeddings = $queryEmbeddings; - } - - - $request = new QueryEmbeddingRequest( - where: $where, - whereDocument: $whereDocument, - queryEmbeddings: $finalEmbeddings, - nResults: $nResults, - include: $include, - ); - - return $this->apiClient->getNearestNeighbors($this->id, $this->database, $this->tenant, $request); - } - - - /** - * Modify the collection name or metadata. - */ - public function modify(string $name, array $metadata): void - { - $request = new UpdateCollectionRequest($name, $metadata); - - $this->apiClient->updateCollection($this->id, $this->database, $this->tenant, $request); - } - - /** - * Validates the inputs to the add, upsert, and update methods. - * - * @return array{ids: string[], embeddings: int[][], metadatas: array[], documents: string[], images: string[], uris: string[]} - */ - protected - function validate( - array $ids, - ?array $embeddings, - ?array $metadatas, - ?array $documents, - ?array $images, - bool $requireEmbeddingsOrDocuments - ): array { - - if ($requireEmbeddingsOrDocuments) { - if ($embeddings === null && $documents === null && $images === null) { - throw new \InvalidArgumentException( - 'You must provide embeddings, documents, or images' - ); - } - } - - if ( - $embeddings != null && count($embeddings) != count($ids) - || $metadatas != null && count($metadatas) != count($ids) - || $documents != null && count($documents) != count($ids) - || $images != null && count($images) != count($ids) - ) { - throw new \InvalidArgumentException( - 'The number of ids, embeddings, metadatas, documents, and images must be the same' - ); - } - - if ($embeddings == null) { - if ($this->embeddingFunction == null) { - throw new \InvalidArgumentException( - 'You must provide an embedding function if you did not provide embeddings' - ); - } elseif ($documents != null) { - $finalEmbeddings = $this->embeddingFunction->generate($documents); - } elseif ($images != null) { - $finalEmbeddings = $this->embeddingFunction->generate($images); - } else { - throw new \InvalidArgumentException( - 'If you did not provide embeddings, you must provide documents or images' - ); - } - } else { - $finalEmbeddings = $embeddings; - } - - $ids = array_map(function ($id) { - $id = (string)$id; - if ($id === '') { - throw new \InvalidArgumentException('Expected IDs to be non-empty strings'); - } - return $id; - }, $ids); - - $uniqueIds = array_unique($ids); - if (count($uniqueIds) !== count($ids)) { - $duplicateIds = array_filter($ids, function ($id) use ($ids) { - return count(array_keys($ids, $id)) > 1; - }); - throw new \InvalidArgumentException('Expected IDs to be unique, found duplicates for: ' . implode(', ', $duplicateIds)); - } - - - return [ - 'ids' => $ids, - 'embeddings' => $finalEmbeddings, - 'metadatas' => $metadatas, - 'documents' => $documents, - 'images' => $images, - ]; - } -} diff --git a/src/Responses/GetItemsResponse.php b/src/Responses/GetItemsResponse.php new file mode 100644 index 0000000..0fae5df --- /dev/null +++ b/src/Responses/GetItemsResponse.php @@ -0,0 +1,65 @@ +[]|null $metadatas List of metadata of the items. + * @param float[][]|null $embeddings List of embeddings of the items. + * @param string[]|null $documents List of documents of the items. + */ + public function __construct( + public readonly array $ids, + public readonly ?array $metadatas, + public readonly ?array $embeddings, + public readonly ?array $documents, + ) { + } + + public static function fromArray(array $data): self + { + return new self( + ids: $data['ids'], + metadatas: $data['metadatas'] ?? null, + embeddings: $data['embeddings'] ?? null, + documents: $data['documents'] ?? null, + ); + } + + public function toArray(): array + { + return array_filter([ + 'ids' => $this->ids, + 'metadatas' => $this->metadatas, + 'embeddings' => $this->embeddings, + 'documents' => $this->documents, + ]); + } + + /** + * @return Record[] + */ + public function asRecords(): array + { + $records = []; + foreach ($this->ids as $index => $id) { + $records[] = new Record( + id: $id, + embedding: $this->embeddings[$index] ?? null, + metadata: $this->metadatas[$index] ?? null, + document: $this->documents[$index] ?? null, + ); + } + return $records; + } +} diff --git a/src/Responses/QueryItemsResponse.php b/src/Responses/QueryItemsResponse.php new file mode 100644 index 0000000..8dafbc7 --- /dev/null +++ b/src/Responses/QueryItemsResponse.php @@ -0,0 +1,82 @@ +[][]|null $metadatas List of metadatas of the items. + * @param string[][]|null $documents List of documents of the items. + * @param string[][]|null $data List of data of the items. + * @param string[][]|null $uris List of uris of the items. + * @param float[][]|null $distances List of distances of the items. + */ + public readonly array $ids, + public readonly ?array $embeddings, + public readonly ?array $metadatas, + public readonly ?array $documents, + public readonly ?array $data, + public readonly ?array $uris, + public readonly ?array $distances, + ) { + } + + public static function fromArray(array $data): self + { + return new self( + ids: $data['ids'], + embeddings: $data['embeddings'] ?? null, + metadatas: $data['metadatas'] ?? null, + documents: $data['documents'] ?? null, + data: $data['data'] ?? null, + uris: $data['uris'] ?? null, + distances: $data['distances'] ?? null, + ); + } + + public function toArray(): array + { + return array_filter([ + 'ids' => $this->ids, + 'embeddings' => $this->embeddings, + 'metadatas' => $this->metadatas, + 'documents' => $this->documents, + 'data' => $this->data, + 'uris' => $this->uris, + 'distances' => $this->distances, + ]); + } + + /** + * @return ScoredRecord[][] + */ + public function asRecords(): array + { + $records = []; + foreach ($this->ids as $queryIndex => $ids) { + $queryRecords = []; + foreach ($ids as $resultIndex => $id) { + $queryRecords[] = new ScoredRecord( + id: $id, + embedding: $this->embeddings[$queryIndex][$resultIndex] ?? null, + metadata: $this->metadatas[$queryIndex][$resultIndex] ?? null, + document: $this->documents[$queryIndex][$resultIndex] ?? null, + distance: $this->distances[$queryIndex][$resultIndex] ?? null, + ); + } + $records[] = $queryRecords; + } + return $records; + } +} diff --git a/src/Types/Includes.php b/src/Types/Includes.php new file mode 100644 index 0000000..c8cd6ae --- /dev/null +++ b/src/Types/Includes.php @@ -0,0 +1,14 @@ +|null $metadata The metadata of the item. + * @param string|null $document The document content of the item. + * @param string|null $uri The URI of the item. + */ + public function __construct( + public string $id, + public ?array $embedding = null, + public ?array $metadata = null, + public ?string $document = null, + public ?string $uri = null, + ) { + } + + public static function make(string $id): self + { + return new self($id); + } + + public function withEmbedding(array $embedding): self + { + $this->embedding = $embedding; + return $this; + } + + public function withMetadata(array $metadata): self + { + $this->metadata = $metadata; + return $this; + } + + public function withDocument(string $document): self + { + $this->document = $document; + return $this; + } + + public function withUri(string $uri): self + { + $this->uri = $uri; + return $this; + } +} diff --git a/src/Types/ScoredRecord.php b/src/Types/ScoredRecord.php new file mode 100644 index 0000000..6bcdd5e --- /dev/null +++ b/src/Types/ScoredRecord.php @@ -0,0 +1,33 @@ +|null $metadata The metadata of the item. + * @param string|null $document The document content of the item. + * @param string|null $uri The URI of the item. + * @param float|null $distance The distance of the item + */ + public function __construct( + string $id, + ?array $embedding = null, + ?array $metadata = null, + ?string $document = null, + ?string $uri = null, + public ?float $distance = null, + ) { + parent::__construct($id, $embedding, $metadata, $document, $uri); + } + + public function withDistance(float $distance): self + { + $this->distance = $distance; + return $this; + } +} diff --git a/tests/ChromaDB.php b/tests/ChromaDB.php deleted file mode 100644 index 12624f2..0000000 --- a/tests/ChromaDB.php +++ /dev/null @@ -1,57 +0,0 @@ -toBeInstanceOf(Client::class); -}); - -it('can connect to a chroma server using factory', function () { - $client = ChromaDB::factory() - ->withHost('http://localhost') - ->withPort(8000) - ->connect(); - - expect($client)->toBeInstanceOf(Client::class); -}); - -test('can connect to an API token authenticated chroma server', function () { - $client = ChromaDB::factory() - ->withPort(8001) - ->withAuthToken('test-token') - ->connect(); - - expect($client)->toBeInstanceOf(Client::class); -}); - -/* -NOTE: Currently token-based authentication is broken in the current ChromaDB versions - -it('cannot connect to an API token authenticated chroma server with wrong token', function () { - ChromaDB::factory() - ->withPort(8001) - ->withAuthToken('wrong-token') - ->connect(); -})->throws(ChromaAuthorizationException::class); - -it('throws exception when connecting to API token authenticated chroma server with no token', function () { - ChromaDB::factory() - ->withPort(8001) - ->connect(); -})->throws(ChromaAuthorizationException::class); - -*/ - -it('throws a connection exception when connecting to a non-existent chroma server', function () { - ChromaDB::factory() - ->withHost('http://localhost') - ->withPort(8002) - ->connect(); -})->throws(ChromaConnectionException::class); diff --git a/tests/Feature/ApiTest.php b/tests/Feature/ApiTest.php new file mode 100644 index 0000000..2c82464 --- /dev/null +++ b/tests/Feature/ApiTest.php @@ -0,0 +1,367 @@ +api = ChromaDB::factory() + ->withHeader('X-Chroma-Token', 'test-token') + ->createApi(); +}); + +afterEach(function () { + $this->api->reset(); +}); + +it('can get user identity', function () { + $identity = $this->api->getUserIdentity(); + + expect($identity)->toBeArray() + ->and($identity)->toHaveKey('user_id') + ->and($identity)->toHaveKey('tenant'); +}); + +it('can check health', function () { + $health = $this->api->healthcheck(); + + expect($health)->toBeArray(); +}); + +it('can check heartbeat', function () { + $heartbeat = $this->api->heartbeat(); + + expect($heartbeat)->toBeArray() + ->and($heartbeat)->toHaveKey('nanosecond heartbeat'); +}); + +it('can check pre-flight checks', function () { + $checks = $this->api->preFlightChecks(); + + expect($checks)->toBeArray(); +}); + +it('can get version', function () { + $version = $this->api->version(); + + expect($version)->toBeString(); +}); + +it('can create a tenant', function () { + $tenantName = 'test-tenant-' . uniqid(); + $this->api->createTenant(new CreateTenantRequest($tenantName)); + + $tenant = $this->api->getTenant($tenantName); + + expect($tenant->name)->toBe($tenantName); +}); + +it('cannot create a duplicate tenant', function () { + $tenantName = 'test-tenant-' . uniqid(); + $this->api->createTenant(new CreateTenantRequest($tenantName)); + + $this->api->createTenant(new CreateTenantRequest($tenantName)); +})->throws(UniqueConstraintException::class, 'already exists'); + +it('can get a tenant', function () { + $tenantName = 'test-tenant-' . uniqid(); + $this->api->createTenant(new CreateTenantRequest($tenantName)); + + $tenant = $this->api->getTenant($tenantName); + + expect($tenant->name)->toBe($tenantName); +}); + +it('cannot get a non-existent tenant', function () { + $this->api->getTenant('non-existent-tenant'); +})->throws(NotFoundException::class, 'Tenant [non-existent-tenant] not found'); + +it('can create a database', function () { + $dbName = 'test-db-' . uniqid(); + $this->api->createDatabase('default_tenant', new CreateDatabaseRequest($dbName)); + + $database = $this->api->getDatabase($dbName, 'default_tenant'); + expect($database->name)->toBe($dbName); +}); + +it('cannot create a duplicate database', function () { + $dbName = 'test-db-' . uniqid(); + $this->api->createDatabase('default_tenant', new CreateDatabaseRequest($dbName)); + + $this->api->createDatabase('default_tenant', new CreateDatabaseRequest($dbName)); +})->throws(UniqueConstraintException::class, 'already exists'); + +it('can list databases', function () { + $dbName = 'test-db-' . uniqid(); + $this->api->createDatabase('default_tenant', new CreateDatabaseRequest($dbName)); + + $databases = $this->api->listDatabases('default_tenant'); + expect($databases)->toBeArray(); +}); + +it('can get a database', function () { + $dbName = 'test-db-' . uniqid(); + $this->api->createDatabase('default_tenant', new CreateDatabaseRequest($dbName)); + + $database = $this->api->getDatabase($dbName, 'default_tenant'); + expect($database->name)->toBe($dbName); +}); + +it('cannot get a non-existent database', function () { + $this->api->getDatabase('non-existent-db', 'default_tenant'); +})->throws(NotFoundException::class, 'Database [non-existent-db] not found'); + +it('can delete a database', function () { + $dbName = 'test-db-' . uniqid(); + $this->api->createDatabase('default_tenant', new CreateDatabaseRequest($dbName)); + + $this->api->deleteDatabase($dbName, 'default_tenant'); + + $databases = $this->api->listDatabases('default_tenant'); + $names = array_map(fn($db) => $db->name, $databases); + expect($names)->not->toContain($dbName); +}); + +it('can create a collection', function () { + $collectionName = 'test-collection-' . uniqid(); + $collection = $this->api->createCollection('default_database', 'default_tenant', new CreateCollectionRequest($collectionName, null)); + + expect($collection->name)->toBe($collectionName); +}); + +it('cannot create a duplicate collection', function () { + $collectionName = 'test-collection-' . uniqid(); + $this->api->createCollection('default_database', 'default_tenant', new CreateCollectionRequest($collectionName, null)); + + $this->api->createCollection('default_database', 'default_tenant', new CreateCollectionRequest($collectionName, null)); +})->throws(UniqueConstraintException::class, 'already exists'); + +it('cannot create a collection with an invalid name', function () { + $this->api->createCollection('default_database', 'default_tenant', new CreateCollectionRequest('Invalid Name With Spaces', null)); +})->throws(InvalidArgumentException::class, "Expected a name containing 3-512 characters"); + +it('can list collections', function () { + $collectionName = 'test-collection-' . uniqid(); + $collection = $this->api->createCollection('default_database', 'default_tenant', new CreateCollectionRequest($collectionName, null)); + + $collections = $this->api->listCollections('default_database', 'default_tenant'); + expect($collections)->toBeArray(); +}); + +it('can get a collection', function () { + $collectionName = 'test-collection-' . uniqid(); + $collection = $this->api->createCollection('default_database', 'default_tenant', new CreateCollectionRequest($collectionName, null)); + + $fetchedCollection = $this->api->getCollection($collectionName, 'default_database', 'default_tenant'); + expect($fetchedCollection->id)->toBe($collection->id); +}); + +it('cannot get a non-existent collection', function () { + $this->api->getCollection('non-existent-collection', 'default_database', 'default_tenant'); +})->throws(NotFoundException::class); + +it('can update a collection', function () { + $collectionName = 'test-collection-' . uniqid(); + $collection = $this->api->createCollection('default_database', 'default_tenant', new CreateCollectionRequest($collectionName, null)); + + $collections = $this->api->listCollections('default_database', 'default_tenant'); + $ids = array_map(fn($c) => $c->id, $collections); + expect($ids)->toContain($collection->id); + + $updatedName = 'updated-' . $collectionName; + $this->api->updateCollection($collection->id, 'default_database', 'default_tenant', new UpdateCollectionRequest($updatedName, ['new' => 'metadata'])); + + $updatedCollection = $this->api->getCollection($updatedName, 'default_database', 'default_tenant'); + expect($updatedCollection->name)->toBe($updatedName) + ->and($updatedCollection->metadata)->toBe(['new' => 'metadata']); +}); + +it('can fork a collection', function () { + if (str_contains($this->api->baseUri, 'localhost') || !str_contains($this->api->baseUri, 'api.trychroma.com')) { + test()->markTestSkipped('Collection forking is not supported for local Chroma'); + } + + $collectionName = 'test-collection-' . uniqid(); + $collection = $this->api->createCollection('default_database', 'default_tenant', new CreateCollectionRequest($collectionName, ['original' => 'metadata'])); + + $forkedName = 'forked-' . $collectionName; + $forkedCollection = $this->api->forkCollection($collection->id, 'default_database', 'default_tenant', new ForkCollectionRequest($forkedName)); + + expect($forkedCollection->name)->toBe($forkedName) + ->and($forkedCollection->id)->not->toBe($collection->id); + + $collections = $this->api->listCollections('default_database', 'default_tenant'); + $names = array_map(fn($c) => $c->name, $collections); + expect($names)->toContain($collectionName) + ->and($names)->toContain($forkedName); +}); + +it('can delete a collection', function () { + $collectionName = 'test-collection-' . uniqid(); + $collection = $this->api->createCollection('default_database', 'default_tenant', new CreateCollectionRequest($collectionName, null)); + + $this->api->deleteCollection($collectionName, 'default_database', 'default_tenant'); + + $collections = $this->api->listCollections('default_database', 'default_tenant'); + $ids = array_map(fn($c) => $c->id, $collections); + expect($ids)->not->toContain($collection->id); +}); + +it('cannot delete a non-existent collection', function () { + $this->api->deleteCollection('non-existent-collection', 'default_database', 'default_tenant'); +})->throws(NotFoundException::class); + +it('can count collections', function () { + $initialCount = $this->api->countCollections('default_database', 'default_tenant'); + + $collectionName1 = 'test-collection-' . uniqid(); + $collectionName2 = 'test-collection-' . uniqid(); + + $this->api->createCollection('default_database', 'default_tenant', new CreateCollectionRequest($collectionName1, null)); + $this->api->createCollection('default_database', 'default_tenant', new CreateCollectionRequest($collectionName2, null)); + + $newCount = $this->api->countCollections('default_database', 'default_tenant'); + + expect($newCount)->toBe($initialCount + 2); +}); + +it('can add items to a collection', function () { + $collectionName = 'test-items-' . uniqid(); + $collection = $this->api->createCollection('default_database', 'default_tenant', new CreateCollectionRequest($collectionName, null)); + + $this->api->addCollectionItems($collection->id, 'default_database', 'default_tenant', new AddItemsRequest( + ids: ['id1', 'id2'], + embeddings: [[1.1, 2.2], [3.3, 4.4]], + metadatas: [['key' => 'value1'], ['key' => 'value2']], + documents: ['doc1', 'doc2'], + )); + + $count = $this->api->countCollectionItems($collection->id, 'default_database', 'default_tenant'); + expect($count)->toBe(2); +}); + +it('can count items in a collection', function () { + $collectionName = 'test-items-' . uniqid(); + $collection = $this->api->createCollection('default_database', 'default_tenant', new CreateCollectionRequest($collectionName, null)); + + $this->api->addCollectionItems($collection->id, 'default_database', 'default_tenant', new AddItemsRequest( + ids: ['id1'], + embeddings: [[1.1, 2.2]], + metadatas: [['key' => 'value1']], + documents: ['doc1'], + )); + + $count = $this->api->countCollectionItems($collection->id, 'default_database', 'default_tenant'); + expect($count)->toBe(1); +}); + +it('can get items from a collection', function () { + $collectionName = 'test-items-' . uniqid(); + $collection = $this->api->createCollection('default_database', 'default_tenant', new CreateCollectionRequest($collectionName, null)); + + $this->api->addCollectionItems($collection->id, 'default_database', 'default_tenant', new AddItemsRequest( + ids: ['id1', 'id2'], + embeddings: [[1.1, 2.2], [3.3, 4.4]], + metadatas: [['key' => 'value1'], ['key' => 'value2']], + documents: ['doc1', 'doc2'], + )); + + $items = $this->api->getCollectionItems($collection->id, 'default_database', 'default_tenant', new GetEmbeddingRequest( + ids: ['id1'], + )); + expect($items->ids)->toContain('id1') + ->and($items->ids)->not->toContain('id2'); +}); + +it('can query items in a collection', function () { + $collectionName = 'test-items-' . uniqid(); + $collection = $this->api->createCollection('default_database', 'default_tenant', new CreateCollectionRequest($collectionName, null)); + + $this->api->addCollectionItems($collection->id, 'default_database', 'default_tenant', new AddItemsRequest( + ids: ['id1'], + embeddings: [[1.1, 2.2]], + metadatas: [['key' => 'value1']], + documents: ['doc1'], + )); + + $query = $this->api->queryCollectionItems($collection->id, 'default_database', 'default_tenant', new QueryItemsRequest( + queryEmbeddings: [[1.1, 2.2]], + nResults: 1, + )); + expect($query->ids[0])->toContain('id1'); +}); + +it('can update items in a collection', function () { + $collectionName = 'test-items-' . uniqid(); + $collection = $this->api->createCollection('default_database', 'default_tenant', new CreateCollectionRequest($collectionName, null)); + + $this->api->addCollectionItems($collection->id, 'default_database', 'default_tenant', new AddItemsRequest( + ids: ['id1'], + embeddings: [[1.1, 2.2]], + metadatas: [['key' => 'value1']], + documents: ['doc1'], + )); + + $this->api->updateCollectionItems($collection->id, 'default_database', 'default_tenant', new UpdateItemsRequest( + embeddings: [[1.2, 2.3]], + ids: ['id1'], + metadatas: [['key' => 'updated_value1']], + documents: ['updated_doc1'], + )); + $updatedItem = $this->api->getCollectionItems($collection->id, 'default_database', 'default_tenant', new GetEmbeddingRequest(ids: ['id1'])); + expect($updatedItem->metadatas[0])->toBe(['key' => 'updated_value1']); +}); + +it('can upsert items in a collection', function () { + $collectionName = 'test-items-' . uniqid(); + $collection = $this->api->createCollection('default_database', 'default_tenant', new CreateCollectionRequest($collectionName, null)); + + $this->api->addCollectionItems($collection->id, 'default_database', 'default_tenant', new AddItemsRequest( + ids: ['id1'], + embeddings: [[1.1, 2.2]], + metadatas: [['key' => 'value1']], + documents: ['doc1'], + )); + + $this->api->upsertCollectionItems($collection->id, 'default_database', 'default_tenant', new AddItemsRequest( + embeddings: [[1.3, 2.4], [5.5, 6.6]], + metadatas: [['key' => 'upserted_value1'], ['key' => 'value3']], + ids: ['id1', 'id3'], + documents: ['upserted_doc1', 'doc3'], + )); + $count = $this->api->countCollectionItems($collection->id, 'default_database', 'default_tenant'); + expect($count)->toBe(2); +}); + +it('can delete items from a collection', function () { + $collectionName = 'test-items-' . uniqid(); + $collection = $this->api->createCollection('default_database', 'default_tenant', new CreateCollectionRequest($collectionName, null)); + + $this->api->addCollectionItems($collection->id, 'default_database', 'default_tenant', new AddItemsRequest( + ids: ['id1', 'id2'], + embeddings: [[1.1, 2.2], [3.3, 4.4]], + metadatas: [['key' => 'value1'], ['key' => 'value2']], + documents: ['doc1', 'doc2'], + )); + + $this->api->deleteCollectionItems($collection->id, 'default_database', 'default_tenant', new DeleteItemsRequest( + ids: ['id1'], + )); + $count = $this->api->countCollectionItems($collection->id, 'default_database', 'default_tenant'); + expect($count)->toBe(1); +}); diff --git a/tests/Feature/ChromaFacadeTest.php b/tests/Feature/ChromaFacadeTest.php new file mode 100644 index 0000000..5488c34 --- /dev/null +++ b/tests/Feature/ChromaFacadeTest.php @@ -0,0 +1,65 @@ +toBeInstanceOf(Client::class); +}); + +it('can connect to a chroma server using factory', function () { + $client = ChromaDB::factory() + ->withHost('http://localhost') + ->withPort(8000) + ->withHeader('X-Chroma-Token', 'test-token') + ->connect(); + + expect($client)->toBeInstanceOf(Client::class); +}); + +it('throws a connection exception when connecting to a non-existent chroma server', function () { + ChromaDB::factory() + ->withHost('http://localhost') + ->withPort(8002) + ->connect(); +})->throws(ConnectionException::class); + +it('can create a cloud factory', function () { + $factory = ChromaDB::cloud('test-api-key'); + + expect($factory)->toBeInstanceOf(Factory::class); + + $reflection = new ReflectionClass($factory); + $host = $reflection->getProperty('host')->getValue($factory); + $port = $reflection->getProperty('port')->getValue($factory); + $headers = $reflection->getProperty('headers')->getValue($factory); + + expect($host)->toBe('https://api.trychroma.com') + ->and($port)->toBeNull() + ->and($headers)->toBe(['X-Chroma-Token' => 'test-api-key']); +}); + +it('can create a local factory', function () { + $factory = ChromaDB::local('http://custom-host', 1234, 'test-tenant', 'test-db'); + + expect($factory)->toBeInstanceOf(Factory::class); + + $reflection = new ReflectionClass($factory); + $host = $reflection->getProperty('host')->getValue($factory); + $port = $reflection->getProperty('port')->getValue($factory); + $tenant = $reflection->getProperty('tenant')->getValue($factory); + $database = $reflection->getProperty('database')->getValue($factory); + + expect($host)->toBe('http://custom-host') + ->and($port)->toBe(1234) + ->and($tenant)->toBe('test-tenant') + ->and($database)->toBe('test-db'); +}); diff --git a/tests/Feature/ClientTest.php b/tests/Feature/ClientTest.php new file mode 100644 index 0000000..e4c65c8 --- /dev/null +++ b/tests/Feature/ClientTest.php @@ -0,0 +1,155 @@ +client = ChromaDB::factory() + ->withHeader('X-Chroma-Token', 'test-token') + ->withDatabase('test_database') + ->withTenant('test_tenant') + ->connect(); + + $this->client->deleteAllCollections(); + + $this->embeddingFunction = new class implements EmbeddingFunction { + public function generate(array $texts): array + { + return array_map(function ($text) { + return [1.0, 2.0, 3.0, 4.0, 5.0]; + }, $texts); + } + }; + + $this->collection = $this->client->createCollection( + name: 'test_collection', + embeddingFunction: $this->embeddingFunction + ); +}); + +afterEach(function () { + $this->client->reset(); +}); + +it('can get the version', function () { + $version = $this->client->version(); + + expect($version) + ->toBeString() + ->toMatch('/^[0-9]+\.[0-9]+\.[0-9]+$/'); +}); + +it('can get the heartbeat', function () { + $heartbeat = $this->client->heartbeat(); + + expect($heartbeat) + ->toBeInt() + ->toBeGreaterThan(0); +}); + +it('can list collections', function () { + $collections = $this->client->listCollections(); + + expect($collections) + ->toBeArray() + ->toHaveCount(1); + + $this->client->createCollection('test_collection_2'); + + $collections = $this->client->listCollections(); + + expect($collections) + ->toBeArray() + ->toHaveCount(2); +}); + + +it('can create or get collections', function () { + $collection = $this->client->getOrCreateCollection('test_collection'); + + expect($collection) + ->toBeInstanceOf(Collection::class) + ->toHaveProperty('name', 'test_collection'); + + $collection = $this->client->getOrCreateCollection('test_collection_2'); + + expect($collection) + ->toBeInstanceOf(Collection::class) + ->toHaveProperty('name', 'test_collection_2'); +}); + +it('can get a collection', function () { + $collection = $this->client->getCollection('test_collection'); + + expect($collection) + ->toBeInstanceOf(Collection::class) + ->toHaveProperty('name', 'test_collection'); +}); + +it('cannot get a collection that does not exist', function () { + $this->client->getCollection('test_collection_2'); +})->throws(NotFoundException::class); + +it('can modify a collection name or metadata', function () { + $this->collection->modify('test_collection_2', ['test' => 'test_2']); + + $collection = $this->client->getCollection('test_collection_2'); + + expect($collection->name) + ->toBe('test_collection_2') + ->and($collection->metadata) + ->toMatchArray(['test' => 'test_2']); +}); + +it('can fork a collection', function () { + if (str_contains($this->client->api->baseUri, 'localhost') || !str_contains($this->client->api->baseUri, 'api.trychroma.com')) { + test()->markTestSkipped('Collection forking is not supported for local Chroma'); + } + + $forkedCollection = $this->client->forkCollection('test_collection', 'test_collection_fork', $this->embeddingFunction); + + expect($forkedCollection) + ->toBeInstanceOf(Collection::class) + ->toHaveProperty('name', 'test_collection_fork') + ->and($forkedCollection->id)->not->toBe($this->collection->id); + + $collections = $this->client->listCollections(); + $names = array_map(fn($c) => $c->name, $collections); + expect($names)->toContain('test_collection') + ->and($names)->toContain('test_collection_fork'); +}); + +it('can delete a collection', function () { + $this->client->deleteCollection('test_collection'); + + expect(fn() => $this->client->getCollection('test_collection')) + ->toThrow(NotFoundException::class); +}); + +it('can delete all collections', function () { + $this->client->createCollection('test_collection_2'); + + $collections = $this->client->listCollections(); + + expect($collections) + ->toBeArray() + ->toHaveCount(2); + + $this->client->deleteAllCollections(); + + $collections = $this->client->listCollections(); + + expect($collections) + ->toBeArray() + ->toHaveCount(0); +}); + +it('cannot delete a collection that does not exist', function () { + $this->client->deleteCollection('test_collection_2'); +})->throws(NotFoundException::class); diff --git a/tests/Client.php b/tests/Feature/CollectionTest.php similarity index 66% rename from tests/Client.php rename to tests/Feature/CollectionTest.php index 4c51896..3e0848d 100644 --- a/tests/Client.php +++ b/tests/Feature/CollectionTest.php @@ -2,18 +2,19 @@ declare(strict_types=1); +namespace Codewithkyrian\ChromaDB\Tests\Feature; + use Codewithkyrian\ChromaDB\ChromaDB; use Codewithkyrian\ChromaDB\Embeddings\EmbeddingFunction; -use Codewithkyrian\ChromaDB\Generated\Exceptions\ChromaDimensionalityException; -use Codewithkyrian\ChromaDB\Generated\Exceptions\ChromaException; -use Codewithkyrian\ChromaDB\Generated\Exceptions\ChromaTypeException; -use Codewithkyrian\ChromaDB\Generated\Exceptions\ChromaValueException; -use Codewithkyrian\ChromaDB\Generated\Exceptions\ChromaInvalidArgumentException; -use Codewithkyrian\ChromaDB\Generated\Exceptions\ChromaNotFoundException; -use Codewithkyrian\ChromaDB\Resources\CollectionResource; +use Codewithkyrian\ChromaDB\Exceptions\ChromaException; +use Codewithkyrian\ChromaDB\Exceptions\InvalidArgumentException; +use Codewithkyrian\ChromaDB\Types\Includes; +use Codewithkyrian\ChromaDB\Types\Record; +use Codewithkyrian\ChromaDB\Types\ScoredRecord; beforeEach(function () { $this->client = ChromaDB::factory() + ->withHeader('X-Chroma-Token', 'test-token') ->withDatabase('test_database') ->withTenant('test_tenant') ->connect(); @@ -30,114 +31,18 @@ public function generate(array $texts): array }; $this->collection = $this->client->createCollection( - name: 'test_collection', + name: 'collection_ops_test', embeddingFunction: $this->embeddingFunction ); }); - -it('can get the version', function () { - $version = $this->client->version(); - - expect($version) - ->toBeString() - ->toMatch('/^[0-9]+\.[0-9]+\.[0-9]+$/'); -}); - -it('can get the heartbeat', function () { - $heartbeat = $this->client->heartbeat(); - - expect($heartbeat) - ->toBeInt() - ->toBeGreaterThan(0); -}); - -it('can list collections', function () { - $collections = $this->client->listCollections(); - - expect($collections) - ->toBeArray() - ->toHaveCount(1); - - $this->client->createCollection('test_collection_2'); - - $collections = $this->client->listCollections(); - - expect($collections) - ->toBeArray() - ->toHaveCount(2); -}); - - -it('can create or get collections', function () { - $collection = $this->client->getOrCreateCollection('test_collection'); - - expect($collection) - ->toBeInstanceOf(CollectionResource::class) - ->toHaveProperty('name', 'test_collection'); - - $collection = $this->client->getOrCreateCollection('test_collection_2'); - - expect($collection) - ->toBeInstanceOf(CollectionResource::class) - ->toHaveProperty('name', 'test_collection_2'); -}); - -it('can get a collection', function () { - $collection = $this->client->getCollection('test_collection'); - - expect($collection) - ->toBeInstanceOf(CollectionResource::class) - ->toHaveProperty('name', 'test_collection'); -}); - -it('throws a value error when getting a collection that does not exist', function () { - $this->client->getCollection('test_collection_2'); -})->throws(ChromaNotFoundException::class); - -it('can modify a collection name or metadata', function () { - $this->collection->modify('test_collection_2', ['test' => 'test_2']); - - $collection = $this->client->getCollection('test_collection_2'); - - expect($collection->name) - ->toBe('test_collection_2') - ->and($collection->metadata) - ->toMatchArray(['test' => 'test_2']); -}); - -it('can delete a collection', function () { - $this->client->deleteCollection('test_collection'); - - expect(fn() => $this->client->getCollection('test_collection')) - ->toThrow(ChromaNotFoundException::class); -}); - -it('can delete all collections', function () { - $this->client->createCollection('test_collection_2'); - - $collections = $this->client->listCollections(); - - expect($collections) - ->toBeArray() - ->toHaveCount(2); - - $this->client->deleteAllCollections(); - - $collections = $this->client->listCollections(); - - expect($collections) - ->toBeArray() - ->toHaveCount(0); +afterEach(function () { + $this->client->reset(); }); -it('throws a value error when deleting a collection that does not exist', function () { - $this->client->deleteCollection('test_collection_2'); -})->throws(ChromaNotFoundException::class); - it('can add single embeddings to a collection', function () { $ids = ['test1']; - $embeddings = [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]]; + $embeddings = [[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]]; $metadatas = [['test' => 'test']]; $this->collection->add($ids, $embeddings, $metadatas); @@ -169,7 +74,7 @@ public function generate(array $texts): array it('cannot add single embeddings to a collection with a different dimensionality', function () { $ids = ['test1']; - $embeddings = [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]]; + $embeddings = [[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]]; $metadatas = [['test' => 'test']]; $this->collection->add($ids, $embeddings, $metadatas); @@ -177,15 +82,125 @@ public function generate(array $texts): array // Dimensionality is now 10. Other embeddings must have the same dimensionality. $ids = ['test2']; - $embeddings = [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]]; + $embeddings = [[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0]]; $metadatas = [['test' => 'test2']]; $this->collection->add($ids, $embeddings, $metadatas); -})->throws(ChromaInvalidArgumentException::class, 'Collection expecting embedding with dimension of 10, got 11'); +})->throws(InvalidArgumentException::class, 'Collection expecting embedding with dimension of 10, got 11'); + +it('can add items to collection using record objects', function () { + $records = [ + Record::make('1') + ->withEmbedding([1.0, 2.0, 3.0, 4.0, 5.0]) + ->withMetadata(['test' => 'creation']), + ]; + + $this->collection->add($records); + + $item = $this->collection->get(ids: ['1']); + expect($item->ids)->toBe(['1']) + ->and($item->metadatas[0])->toBe(['test' => 'creation']); +}); + +it('can add batch embeddings to a collection', function () { + $ids = ['test1', 'test2', 'test3']; + $embeddings = [ + [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0], + [11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0], + [21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0], + ]; + $metadatas = [ + ['some' => 'metadata1'], + ['some' => 'metadata2'], + ['some' => 'metadata3'], + ]; + + $this->collection->add($ids, $embeddings, $metadatas); + + expect($this->collection->count())->toBe(3); + + $getResponse = $this->collection->get($ids); + + expect($getResponse->ids) + ->toMatchArray($ids) + ->and($getResponse->embeddings) + ->toMatchArray($embeddings) + ->and($getResponse->metadatas) + ->toMatchArray($metadatas); +}); + +it('cannot add batch embeddings with different dimensionality to a collection', function () { + $ids = ['test1', 'test2', 'test3']; + $embeddings = [ + [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0], + [11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0], + [21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0], + ]; + $metadatas = [ + ['some' => 'metadata1'], + ['some' => 'metadata2'], + ['some' => 'metadata3'], + ]; + + $this->collection->add($ids, $embeddings, $metadatas); +})->throws(InvalidArgumentException::class); + +it('can add batch documents to a collection', function () { + $ids = ['test1', 'test2', 'test3']; + $documents = [ + 'This is a test document', + 'This is another test document', + 'This is a third test document', + ]; + $metadatas = [ + ['some' => 'metadata1'], + ['some' => 'metadata2'], + ['some' => 'metadata3'], + ]; + + $this->collection->add( + $ids, + metadatas: $metadatas, + documents: $documents + ); + + expect($this->collection->count())->toBe(3); + + $getResponse = $this->collection->get($ids, include: ['documents', 'metadatas']); + + expect($getResponse->ids) + ->toMatchArray($ids) + ->and($getResponse->documents) + ->toMatchArray($documents) + ->and($getResponse->metadatas) + ->toMatchArray($metadatas); +}); + +it('cannot add items with mismatched lengths', function () { + $this->collection->add( + ids: ['1', '2'], + embeddings: [[1.0, 2.0, 3.0, 4.0, 5.0]] + ); +})->throws(InvalidArgumentException::class, 'The number of ids, embeddings, metadatas, and documents must be the same'); + +it('cannot add items with invalid IDs', function () { + $this->collection->add( + ids: [''], // Empty string ID + embeddings: [[1.0, 2.0, 3.0, 4.0, 5.0]] + ); +})->throws(InvalidArgumentException::class, 'Expected IDs to be an array of non-empty strings'); + +it('cannot add items without embeddings or documents', function () { + $this->collection->add( + ids: ['1'], + embeddings: null, + documents: null + ); +})->throws(InvalidArgumentException::class, 'You must provide embeddings or documents'); it('can upsert single embeddings to a collection', function () { $ids = ['test1']; - $embeddings = [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]]; + $embeddings = [[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]]; $metadatas = [['test' => 'test']]; $this->collection->upsert($ids, $embeddings, $metadatas); @@ -200,7 +215,7 @@ public function generate(array $texts): array it('can update single embeddings in a collection', function () { $ids = ['test1']; - $embeddings = [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]]; + $embeddings = [[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]]; $metadatas = [['test' => 'test']]; $this->collection->add($ids, $embeddings, $metadatas); @@ -255,78 +270,42 @@ public function generate(array $texts): array ->toMatchArray($newMetadatas); }); -it('can add batch embeddings to a collection', function () { - $ids = ['test1', 'test2', 'test3']; - $embeddings = [ - [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], - [11, 12, 13, 14, 15, 16, 17, 18, 19, 20], - [21, 22, 23, 24, 25, 26, 27, 28, 29, 30], - ]; - $metadatas = [ - ['some' => 'metadata1'], - ['some' => 'metadata2'], - ['some' => 'metadata3'], - ]; - - $this->collection->add($ids, $embeddings, $metadatas); - - expect($this->collection->count())->toBe(3); - - $getResponse = $this->collection->get($ids); - - expect($getResponse->ids) - ->toMatchArray($ids) - ->and($getResponse->embeddings) - ->toMatchArray($embeddings) - ->and($getResponse->metadatas) - ->toMatchArray($metadatas); +it('can update items in collection using record objects', function () { + $this->collection->add([ + Record::make('1') + ->withEmbedding([1.0, 2.0, 3.0, 4.0, 5.0]) + ->withMetadata(['v' => 1]), + ]); + + $this->collection->update([ + Record::make('1') + ->withEmbedding([1.0, 2.0, 3.0, 4.0, 5.0]) + ->withMetadata(['v' => 2]), + ]); + + $item = $this->collection->get(ids: ['1']); + expect($item->metadatas[0])->toBe(['v' => 2]); }); -it('cannot add batch embeddings with different dimensionality to a collection', function () { - $ids = ['test1', 'test2', 'test3']; - $embeddings = [ - [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], - [11, 12, 13, 14, 15, 16, 17, 18, 19], - [21, 22, 23, 24, 25, 26, 27, 28], - ]; - $metadatas = [ - ['some' => 'metadata1'], - ['some' => 'metadata2'], - ['some' => 'metadata3'], - ]; - - $this->collection->add($ids, $embeddings, $metadatas); -})->throws(ChromaInvalidArgumentException::class); - -it('can add batch documents to a collection', function () { - $ids = ['test1', 'test2', 'test3']; - $documents = [ - 'This is a test document', - 'This is another test document', - 'This is a third test document', - ]; - $metadatas = [ - ['some' => 'metadata1'], - ['some' => 'metadata2'], - ['some' => 'metadata3'], - ]; - - $this->collection->add( - $ids, - metadatas: $metadatas, - documents: $documents - ); - - expect($this->collection->count())->toBe(3); - - $getResponse = $this->collection->get($ids, include: ['documents', 'metadatas']); - - expect($getResponse->ids) - ->toMatchArray($ids) - ->and($getResponse->documents) - ->toMatchArray($documents) - ->and($getResponse->metadatas) - ->toMatchArray($metadatas); +it('can upsert items in collection using record objects', function () { + $this->collection->add([ + Record::make('1') + ->withEmbedding([1.0, 2.0, 3.0, 4.0, 5.0]) + ->withMetadata(['v' => 1]), + ]); + + $this->collection->upsert([ + Record::make('1') + ->withEmbedding([1.0, 2.0, 3.0, 4.0, 5.0]) + ->withMetadata(['v' => 5]), // Update + Record::make('2') + ->withEmbedding([6.0, 7.0, 8.0, 9.0, 10.0]) + ->withMetadata(['v' => 10]), // Insert + ]); + + $res = $this->collection->get(); + expect($res->ids)->toHaveCount(2) + ->and($this->collection->get(['1'])->metadatas[0])->toBe(['v' => 5]); }); @@ -373,6 +352,19 @@ public function generate(array $texts): array ->toMatchArray([0.0, 0.0]); }); +it('cannot query with negative nResults', function () { + $this->collection->query( + queryTexts: ['test'], + nResults: -1 + ); +})->throws(InvalidArgumentException::class, 'Expected nResults to be a positive integer'); + +it('cannot query with invalid embedding format', function () { + $this->collection->query( + queryEmbeddings: [['invalid']] + ); +})->throws(InvalidArgumentException::class, 'Expected query embedding value at index 0.0 to be a float'); + it('can get a collection by id', function () { $ids = ['test1', 'test2', 'test3']; $embeddings = [ @@ -431,6 +423,50 @@ public function generate(array $texts): array ->toBe('test1'); }); +it('cannot get a collection by where with an invalid operator', function () { + $ids = ['test1', 'test2', 'test3']; + $embeddings = [ + [1.0, 2.0, 3.0, 4.0, 5.0], + [6.0, 7.0, 8.0, 9.0, 10.0], + [11.0, 12.0, 13.0, 14.0, 15.0], + ]; + $metadatas = [ + ['some' => 'metadata1'], + ['some' => 'metadata2'], + ['some' => 'metadata3'], + ]; + + $this->collection->add($ids, $embeddings, $metadatas); + + expect($this->collection->count())->toBe(3); + + $collectionItems = $this->collection->get( + where: [ + 'some' => ['$invalid' => 'metadata1'] + ] + ); +})->throws(ChromaException::class); + +it('can retrieve items as record objects', function () { + $this->collection->add([ + Record::make('1') + ->withEmbedding([1.0, 2.0, 3.0, 4.0, 5.0]) + ->withDocument('test doc'), + ]); + + $response = $this->collection->get( + ids: ['1'], + include: [Includes::Documents, Includes::Embeddings] + ); + + $records = $response->asRecords(); + + expect($records)->toHaveCount(1) + ->and($records[0])->toBeInstanceOf(Record::class) + ->and($records[0]->id)->toBe('1') + ->and($records[0]->document)->toBe('test doc'); +}); + it('can query a collection using query texts', function () { $ids = ['test1', 'test2', 'test3']; $documents = [ @@ -461,29 +497,23 @@ public function generate(array $texts): array ->toBeIn(['test1', 'test2', 'test3']); }); -it('throws a value error when getting a collection by where with an invalid operator', function () { - $ids = ['test1', 'test2', 'test3']; - $embeddings = [ - [1.0, 2.0, 3.0, 4.0, 5.0], - [6.0, 7.0, 8.0, 9.0, 10.0], - [11.0, 12.0, 13.0, 14.0, 15.0], - ]; - $metadatas = [ - ['some' => 'metadata1'], - ['some' => 'metadata2'], - ['some' => 'metadata3'], - ]; +it('can retrieve query results as record objects', function () { + $this->collection->add([ + Record::make('1')->withEmbedding([1.0, 2.0, 3.0, 4.0, 5.0]), + ]); - $this->collection->add($ids, $embeddings, $metadatas); + $response = $this->collection->query( + queryEmbeddings: [[1.0, 2.0, 3.0, 4.0, 5.0]], + nResults: 1 + ); - expect($this->collection->count())->toBe(3); + $records = $response->asRecords(); - $collectionItems = $this->collection->get( - where: [ - 'some' => ['$invalid' => 'metadata1'] - ] - ); -})->throws(ChromaException::class); + expect($records)->toHaveCount(1) + ->and($records[0][0])->toBeInstanceOf(ScoredRecord::class) + ->and($records[0][0]->id)->toBe('1') + ->and($records[0][0]->distance)->toBeLessThan(0.001); +}); it('can delete a collection by id', function () { $ids = ['test1', 'test2', 'test3']; diff --git a/tests/Feature/QueryFilteringTest.php b/tests/Feature/QueryFilteringTest.php new file mode 100644 index 0000000..60ca263 --- /dev/null +++ b/tests/Feature/QueryFilteringTest.php @@ -0,0 +1,201 @@ +client = ChromaDB::factory() + ->withHeader('X-Chroma-Token', 'test-token') + ->withDatabase('test_database') + ->withTenant('test_tenant') + ->connect(); + + $this->client->deleteAllCollections(); + + $this->collection = $this->client->createCollection('filtering_test_collection'); + + $this->collection->add([ + new Record('1', embedding: [0.1], metadata: ['cat' => 'A', 'val' => 10], document: 'php framework'), + new Record('2', embedding: [0.2], metadata: ['cat' => 'B', 'val' => 20], document: 'laravel framework'), + new Record('3', embedding: [0.3], metadata: ['cat' => 'A', 'val' => 30], document: 'symfony framework'), + new Record('4', embedding: [0.4], metadata: ['cat' => 'C', 'val' => 40], document: 'react library'), + new Record('5', embedding: [0.5], metadata: ['cat' => 'B', 'val' => 50], document: 'vue library'), + ]); +}); + +afterEach(function () { + $this->client->reset(); +}); + +describe('metadata filtering operators', function () { + it('filters by equality (eq)', function () { + $res = $this->collection->get(where: Where::field('cat')->eq('A')); + expect($res->ids)->toHaveCount(2)->toContain('1', '3'); + }); + + it('filters by inequality (ne)', function () { + $res = $this->collection->get(where: Where::field('cat')->ne('A')); + expect($res->ids)->toHaveCount(3)->toContain('2', '4', '5'); + }); + + it('filters by greater than (gt)', function () { + $res = $this->collection->get(where: Where::field('val')->gt(30)); + expect($res->ids)->toHaveCount(2)->toContain('4', '5'); + }); + + it('filters by greater than or equal (gte)', function () { + $res = $this->collection->get(where: Where::field('val')->gte(30)); + expect($res->ids)->toHaveCount(3)->toContain('3', '4', '5'); + }); + + it('filters by less than (lt)', function () { + $res = $this->collection->get(where: Where::field('val')->lt(20)); + expect($res->ids)->toHaveCount(1)->toContain('1'); + }); + + it('filters by less than or equal (lte)', function () { + $res = $this->collection->get(where: Where::field('val')->lte(20)); + expect($res->ids)->toHaveCount(2)->toContain('1', '2'); + }); + + it('filters by inclusion (in)', function () { + $res = $this->collection->get(where: Where::field('cat')->in(['A', 'C'])); + expect($res->ids)->toHaveCount(3)->toContain('1', '3', '4'); + }); + + it('filters by exclusion (nin)', function () { + $res = $this->collection->get(where: Where::field('cat')->notIn(['A', 'C'])); + expect($res->ids)->toHaveCount(2)->toContain('2', '5'); + }); + + it('filters by logical AND', function () { + $res = $this->collection->get(where: Where::all( + Where::field('cat')->eq('A'), + Where::field('val')->gt(20) + )); + expect($res->ids)->toBe(['3']); + }); + + it('filters by logical OR', function () { + $res = $this->collection->get(where: Where::any( + Where::field('cat')->eq('C'), + Where::field('val')->gt(40) + )); + expect($res->ids)->toHaveCount(2)->toContain('4', '5'); + }); + + it('cannot get with an invalid operator', function () { + $this->collection->get(where: ['cat' => ['$invalid' => 'A']]); + })->throws(InvalidArgumentException::class, 'Invalid where clause'); + + it('cannot get with a non-list to $and', function () { + $this->collection->get(where: ['$and' => 'invalid']); + })->throws(ChromaException::class, 'Invalid where clause'); + + it('cannot get with a non-list to $or', function () { + $this->collection->get(where: ['$or' => 'invalid']); + })->throws(InvalidArgumentException::class, 'Invalid where clause'); + + it('cannot get with a list of Where objects directly', function () { + $this->collection->get(where: [ + Where::field('cat')->eq('A'), + Where::field('val')->eq(10) + ]); + })->throws(InvalidArgumentException::class, 'Invalid where clause'); + + it('cannot get with a random array structure', function () { + $this->collection->get(where: ['random' => ['junk']]); + })->throws(InvalidArgumentException::class, 'Invalid where clause'); +}); + +describe('document content filtering', function () { + it('filters by contains', function () { + $res = $this->collection->get(whereDocument: Where::document()->contains('framework')); + expect($res->ids)->toHaveCount(3)->toContain('1', '2', '3'); + }); + + it('filters by not contains', function () { + $res = $this->collection->get(whereDocument: Where::document()->notContains('framework')); + expect($res->ids)->toHaveCount(2)->toContain('4', '5'); + }); + + it('filters by regex matches', function () { + $res = $this->collection->get(whereDocument: Where::document()->matches('^php')); + expect($res->ids)->toBe(['1']); + }); + + it('filters by regex not matches', function () { + $res = $this->collection->get(whereDocument: Where::document()->notMatches('framework$')); + expect($res->ids)->toHaveCount(2)->toContain('4', '5'); + }); + + it('filters by logical OR with document', function () { + $res = $this->collection->get(whereDocument: Where::any( + Where::document()->contains('laravel'), + Where::document()->contains('vue') + )); + expect($res->ids)->toHaveCount(2)->toContain('2', '5'); + }); + + it('filters by logical AND with document', function () { + $res = $this->collection->get(whereDocument: Where::all( + Where::document()->contains('framework'), + Where::document()->contains('php') + )); + expect($res->ids)->toBe(['1']); + }); + + it('cannot get with an invalid document operator', function () { + $this->collection->get(whereDocument: ['$invalid' => 'A']); + })->throws(ChromaException::class, 'Invalid where document clause'); + + it('cannot get with a non-list to $and in whereDocument', function () { + $this->collection->get(whereDocument: ['$and' => 'invalid']); + })->throws(InvalidArgumentException::class, 'Invalid where document clause'); + + it('cannot get with a non-list to $or in whereDocument', function () { + $this->collection->get(whereDocument: ['$or' => 'invalid']); + })->throws(InvalidArgumentException::class, 'Invalid where document clause'); + + it('cannot get with a list of Where objects directly in whereDocument', function () { + $this->collection->get(whereDocument: [ + Where::document()->contains('A'), + Where::document()->contains('B') + ]); + })->throws(ChromaException::class, 'Invalid where document clause'); + + it('cannot get with a random array structure in whereDocument', function () { + $this->collection->get(whereDocument: ['random' => ['junk']]); + })->throws(InvalidArgumentException::class, 'Invalid where document clause'); +}); + +it('can query with filtering', function () { + $res = $this->collection->query( + queryEmbeddings: [[0.1]], + nResults: 5, + where: Where::field('cat')->eq('A') + ); + expect($res->ids[0])->toHaveCount(2)->toContain('1', '3'); +}); + +it('can delete with filtering', function () { + $this->collection->delete(where: Where::field('cat')->eq('B')); + + $res = $this->collection->get(); + expect($res->ids)->not->toContain('2', '5') + ->and($res->ids)->toHaveCount(3); +}); + +it('can delete with document filtering', function () { + $this->collection->delete(whereDocument: Where::document()->contains('library')); + + $res = $this->collection->get(); + expect($res->ids)->not->toContain('4', '5'); +}); diff --git a/tests/Fixtures/ChromaServer.php b/tests/Fixtures/ChromaServer.php new file mode 100644 index 0000000..3816f74 --- /dev/null +++ b/tests/Fixtures/ChromaServer.php @@ -0,0 +1,58 @@ +isRunning()) { + return; + } + + if (self::isPortInUse($port)) { + echo "Port $port is already in use. Assuming Chroma is running externally.\n"; + return; + } + + self::$process = new Process(['chroma', 'run', 'tests/Fixtures/chroma.yaml']); + + self::$process->start(); + + $retries = 20; + while ($retries > 0) { + if (self::isPortInUse($port)) { + return; + } + usleep(500000); // 0.5 seconds + $retries--; + } + + throw new \RuntimeException("Failed to start Chroma server on port $port."); + } + + public static function stop(): void + { + if (self::$process !== null && self::$process->isRunning()) { + self::$process->stop(); + self::$process = null; + } + } + + private static function isPortInUse(int $port): bool + { + $connection = @fsockopen('localhost', $port); + + if (is_resource($connection)) { + fclose($connection); + return true; + } + + return false; + } + +} diff --git a/tests/Fixtures/chroma.yaml b/tests/Fixtures/chroma.yaml new file mode 100644 index 0000000..52bf70f --- /dev/null +++ b/tests/Fixtures/chroma.yaml @@ -0,0 +1,5 @@ +# HTTP server settings +port: 8000 +listen_address: "0.0.0.0" +allow_reset: true +persist_path: ".chroma" diff --git a/tests/Pest.php b/tests/Pest.php new file mode 100644 index 0000000..4b2d749 --- /dev/null +++ b/tests/Pest.php @@ -0,0 +1,12 @@ +beforeAll(function () { + ChromaServer::start(); + }) + ->afterAll(function () { + ChromaServer::stop(); + }) + ->in('Feature');