Skip to content

Commit 92d983e

Browse files
authored
Added Vectorizers and setup CI (#1)
* Added Vectorizers and setup CI * Updated testsuite name * Added CODECOV_TOKEN
1 parent f736ee4 commit 92d983e

File tree

9 files changed

+615
-1
lines changed

9 files changed

+615
-1
lines changed

.github/workflows/tests.yml

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
name: Tests
2+
3+
on:
4+
push:
5+
branches:
6+
- main
7+
pull_request:
8+
9+
concurrency:
10+
group: ${{ github.workflow }}-${{ github.ref }}
11+
cancel-in-progress: true
12+
13+
jobs:
14+
tests:
15+
name: Unit tests
16+
runs-on: ubuntu-latest
17+
strategy:
18+
fail-fast: false
19+
steps:
20+
- name: Checkout repository
21+
uses: actions/checkout@v3
22+
23+
- name: Setup PHP with Composer and extensions
24+
uses: shivammathur/setup-php@v2
25+
with:
26+
php-version: '8.0'
27+
coverage: xdebug
28+
29+
- name: Install Composer dependencies
30+
uses: ramsey/composer-install@v2
31+
with:
32+
dependency-versions: highest
33+
34+
- name: Run unit tests
35+
run: vendor/bin/phpunit --testsuite Unit --verbose --coverage-clover build/logs/clover.xml --coverage-filter ./src
36+
37+
- name: Upload codecov coverage
38+
uses: codecov/codecov-action@v3
39+
with:
40+
fail_ci_if_error: false
41+
files: build/logs/clover.xml
42+
verbose: true
43+
env:
44+
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
composer.lock
22
.idea/
3-
vendor/
3+
vendor/
4+
.phpunit.result.cache

phpunit.xml.dist

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<phpunit
3+
colors="true"
4+
processIsolation="false"
5+
stopOnError="false"
6+
stopOnFailure="false"
7+
beStrictAboutTestsThatDoNotTestAnything="true"
8+
>
9+
<testsuites>
10+
<testsuite name="Unit">
11+
<directory>tests/Unit/</directory>
12+
</testsuite>
13+
</testsuites>
14+
15+
<coverage>
16+
<report>
17+
<clover outputFile="build/logs/clover.xml"/>
18+
</report>
19+
</coverage>
20+
</phpunit>

src/Vectorizer/Factory.php

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
<?php
2+
3+
namespace Vladvildanov\PredisVl\Vectorizer;
4+
5+
use Exception;
6+
7+
class Factory implements FactoryInterface
8+
{
9+
/**
10+
* @var array|string[]
11+
*/
12+
private array $classMappings = [
13+
'openai' => OpenAIVectorizer::class,
14+
];
15+
16+
/**
17+
* Allows to provide additional class mappings for custom vectorizers.
18+
*
19+
* Example: ['foo' => Foo::class, 'bar' => Bar::class]
20+
*
21+
* @param array $additionalMappings
22+
*/
23+
public function __construct(array $additionalMappings = [])
24+
{
25+
if (!empty($additionalMappings)) {
26+
$this->classMappings = array_merge($this->classMappings, $additionalMappings);
27+
}
28+
}
29+
30+
/**
31+
* @inheritDoc
32+
*/
33+
public function createVectorizer(string $vectorizer, string $model = null, array $configuration = []): VectorizerInterface
34+
{
35+
if (!array_key_exists(strtolower($vectorizer), $this->classMappings)) {
36+
throw new Exception('Given vectorizer does not exists.');
37+
}
38+
39+
$class = $this->classMappings[strtolower($vectorizer)];
40+
41+
return new $class($model, $configuration);
42+
}
43+
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
<?php
2+
3+
namespace Vladvildanov\PredisVl\Vectorizer;
4+
5+
interface FactoryInterface
6+
{
7+
/**
8+
* Creates Vectorizer with given configuration.
9+
*
10+
* @param string $vectorizer
11+
* @param string|null $model
12+
* @param array $configuration
13+
* @return VectorizerInterface
14+
*/
15+
public function createVectorizer(
16+
string $vectorizer,
17+
string $model = null,
18+
array $configuration = []
19+
): VectorizerInterface;
20+
}
Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
<?php
2+
3+
namespace Vladvildanov\PredisVl\Vectorizer;
4+
5+
use Exception;
6+
use GuzzleHttp\Client;
7+
use Psr\Http\Message\ResponseInterface;
8+
9+
class OpenAIVectorizer implements VectorizerInterface
10+
{
11+
/**
12+
* @var Client
13+
*/
14+
private Client $client;
15+
16+
/**
17+
* @var string
18+
*/
19+
private string $model;
20+
21+
/**
22+
* @var string
23+
*/
24+
private string $apiUrl;
25+
26+
/**
27+
* API configuration accepts token and optional request parameters specified in documentation.
28+
*
29+
* Example: $apiConfiguration = ['token' => $token, 'requestParams' => [...]]
30+
*
31+
* @link https://platform.openai.com/docs/api-reference/embeddings/create
32+
* @param string|null $model
33+
* @param array $apiConfiguration
34+
* @param Client|null $client
35+
*/
36+
public function __construct(
37+
string $model = null,
38+
private array $apiConfiguration = [],
39+
Client $client = null
40+
) {
41+
$this->client = $client ?? new Client();
42+
$this->model = $model ?? 'text-embedding-ada-002';
43+
$this->apiUrl = getenv('OPENAI_API_URL') ?: 'https://api.openai.com/v1/embeddings';
44+
}
45+
46+
/**
47+
* @inheritDoc
48+
*/
49+
public function embed(string $text): array
50+
{
51+
$jsonResponse = $this->sendRequest(
52+
[
53+
'model' => $this->model,
54+
'input' => $text,
55+
]
56+
)->getBody()->getContents();
57+
58+
return json_decode($jsonResponse, true, 512, JSON_THROW_ON_ERROR);
59+
}
60+
61+
/**
62+
* @inheritDoc
63+
*/
64+
public function batchEmbed(array $texts): array
65+
{
66+
$jsonResponse = $this->sendRequest(
67+
[
68+
'model' => $this->model,
69+
'input' => $texts,
70+
]
71+
)->getBody()->getContents();
72+
73+
return json_decode($jsonResponse, true, 512, JSON_THROW_ON_ERROR);
74+
}
75+
76+
/**
77+
* @inheritDoc
78+
*/
79+
public function getModel(): string
80+
{
81+
return $this->model;
82+
}
83+
84+
/**
85+
* @inheritDoc
86+
*/
87+
public function getConfiguration(): array
88+
{
89+
return $this->apiConfiguration;
90+
}
91+
92+
/**
93+
* Returns API token associated with current vectorizer.
94+
*
95+
* @return string
96+
* @throws Exception
97+
*/
98+
private function getApiToken(): string
99+
{
100+
if (array_key_exists('token', $this->apiConfiguration)) {
101+
return $this->apiConfiguration['token'];
102+
}
103+
104+
if (false !== $token = getenv('OPENAI_API_TOKEN')) {
105+
return $token;
106+
}
107+
108+
throw new Exception(
109+
'API token should be provided in API configuration or as an environment variable.'
110+
);
111+
}
112+
113+
/**
114+
* Sends an actual request to API endpoint.
115+
*
116+
* @param array $requestBody
117+
* @return ResponseInterface
118+
* @throws Exception|\GuzzleHttp\Exception\GuzzleException
119+
*/
120+
private function sendRequest(array $requestBody): ResponseInterface
121+
{
122+
$requestParams = (array_key_exists('requestParams', $this->apiConfiguration))
123+
? $this->apiConfiguration['requestParams']
124+
: [];
125+
$requestBody = array_merge($requestBody, $requestParams);
126+
127+
return $this->client->post($this->apiUrl, [
128+
'headers' => [
129+
'Authorization' => 'Bearer ' . $this->getApiToken(),
130+
'Content-Type' => 'application/json',
131+
'Accept' => 'application/json',
132+
],
133+
'json' => $requestBody
134+
]);
135+
}
136+
}
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
<?php
2+
3+
namespace Vladvildanov\PredisVl\Vectorizer;
4+
5+
interface VectorizerInterface
6+
{
7+
/**
8+
* Convert text into it's vector representation.
9+
*
10+
* @param string $text
11+
* @return float[]|string[]
12+
*/
13+
public function embed(string $text): array;
14+
15+
/**
16+
* Convert multiple text chunks into it's single vector representation.
17+
*
18+
* @param string[] $texts
19+
* @return float[]|string[]
20+
*/
21+
public function batchEmbed(array $texts): array;
22+
23+
/**
24+
* Returns model name of current vectorizer.
25+
*
26+
* @return string
27+
*/
28+
public function getModel(): string;
29+
30+
/**
31+
* Returns vectorizer configuration.
32+
*
33+
* @return array
34+
*/
35+
public function getConfiguration(): array;
36+
}
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
<?php
2+
3+
namespace Unit\Vectorizer;
4+
5+
use Exception;
6+
use PHPUnit\Framework\TestCase;
7+
use Vladvildanov\PredisVl\Vectorizer\Factory;
8+
use Vladvildanov\PredisVl\Vectorizer\OpenAIVectorizer;
9+
10+
class FactoryTest extends TestCase
11+
{
12+
/**
13+
* @return void
14+
* @throws Exception
15+
*/
16+
public function testCreateVectorizer(): void
17+
{
18+
$factory = new Factory();
19+
$vectorizer = $factory->createVectorizer('openai', 'test model');
20+
21+
$this->assertSame('test model', $vectorizer->getModel());
22+
}
23+
24+
/**
25+
* @return void
26+
* @throws Exception
27+
*/
28+
public function testCreateVectorizerWithAdditionalMappings(): void
29+
{
30+
$factory = new Factory(['openai' => OpenAIVectorizer::class]);
31+
$vectorizer = $factory->createVectorizer('openai', 'test model');
32+
33+
$this->assertSame('test model', $vectorizer->getModel());
34+
}
35+
36+
/**
37+
* @return void
38+
* @throws Exception
39+
*/
40+
public function testCreateVectorizerThrowsErrorOnNonExistingVectorizer(): void
41+
{
42+
$factory = new Factory();
43+
44+
$this->expectException(Exception::class);
45+
$this->expectExceptionMessage('Given vectorizer does not exists.');
46+
47+
$factory->createVectorizer('foobar');
48+
}
49+
}

0 commit comments

Comments
 (0)