Skip to content

Commit a86b753

Browse files
committed
refactor(store): improve code quality and fix PHPStan errors in HybridStore
- Extract RRF logic into dedicated ReciprocalRankFusion class - Introduce TextSearchStrategyInterface for pluggable search strategies - Remove debug code (file_put_contents calls) - Replace empty() with strict comparisons ([] !==) per PHPStan rules - Add missing PHPDoc types for array parameters - Mark properties as readonly for immutability - Extract helper methods (buildTsvectorColumns, createSearchTextTrigger) - Use NullVector for results without embeddings - Update tests to reflect new setup() execution order
1 parent 38c15ee commit a86b753

File tree

8 files changed

+1726
-556
lines changed

8 files changed

+1726
-556
lines changed

src/store/src/Bridge/Postgres/HybridStore.php

Lines changed: 454 additions & 437 deletions
Large diffs are not rendered by default.
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <fabien@symfony.com>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
namespace Symfony\AI\Store\Bridge\Postgres\TextSearch;
13+
14+
/**
15+
* PostgreSQL native full-text search strategy using ts_rank_cd.
16+
*
17+
* This is the default strategy that works with any PostgreSQL installation
18+
* without requiring additional extensions.
19+
*
20+
* @author Ahmed EBEN HASSINE <ahmedbhs123@gmail.com>
21+
*/
22+
final class PostgresTextSearchStrategy implements TextSearchStrategyInterface
23+
{
24+
private const CTE_ALIAS = 'fts_search';
25+
private const RANK_COLUMN = 'fts_rank';
26+
private const SCORE_COLUMN = 'fts_score';
27+
28+
public function getSetupSql(string $tableName, string $contentFieldName, string $language): array
29+
{
30+
return [
31+
// Add tsvector column if not exists
32+
\sprintf(
33+
"ALTER TABLE %s ADD COLUMN IF NOT EXISTS content_tsv tsvector
34+
GENERATED ALWAYS AS (to_tsvector('%s', %s)) STORED",
35+
$tableName,
36+
$language,
37+
$contentFieldName,
38+
),
39+
// Create GIN index for full-text search
40+
\sprintf(
41+
'CREATE INDEX IF NOT EXISTS %s_content_tsv_idx ON %s USING gin(content_tsv)',
42+
$tableName,
43+
$tableName,
44+
),
45+
];
46+
}
47+
48+
public function buildSearchCte(
49+
string $tableName,
50+
string $contentFieldName,
51+
string $language,
52+
string $queryParam = ':query',
53+
): string {
54+
return \sprintf(
55+
"%s AS (
56+
SELECT
57+
id,
58+
metadata,
59+
%s,
60+
ts_rank_cd(content_tsv, plainto_tsquery('%s', %s)) AS %s,
61+
ROW_NUMBER() OVER (
62+
ORDER BY ts_rank_cd(content_tsv, plainto_tsquery('%s', %s)) DESC
63+
) AS %s
64+
FROM %s
65+
WHERE content_tsv @@ plainto_tsquery('%s', %s)
66+
)",
67+
self::CTE_ALIAS,
68+
$contentFieldName,
69+
$language,
70+
$queryParam,
71+
self::SCORE_COLUMN,
72+
$language,
73+
$queryParam,
74+
self::RANK_COLUMN,
75+
$tableName,
76+
$language,
77+
$queryParam,
78+
);
79+
}
80+
81+
public function getCteAlias(): string
82+
{
83+
return self::CTE_ALIAS;
84+
}
85+
86+
public function getRankColumn(): string
87+
{
88+
return self::RANK_COLUMN;
89+
}
90+
91+
public function getScoreColumn(): string
92+
{
93+
return self::SCORE_COLUMN;
94+
}
95+
96+
public function getNormalizedScoreExpression(string $scoreColumn): string
97+
{
98+
// ts_rank_cd returns values typically between 0 and 1, but can exceed 1
99+
// We cap it at 1.0 for normalization
100+
return \sprintf('LEAST(%s, 1.0)', $scoreColumn);
101+
}
102+
103+
public function getRequiredExtensions(): array
104+
{
105+
return []; // No additional extensions required
106+
}
107+
108+
public function isAvailable(\PDO $connection): bool
109+
{
110+
return true; // Always available in PostgreSQL
111+
}
112+
}
Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <fabien@symfony.com>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
namespace Symfony\AI\Store\Bridge\Postgres;
13+
14+
/**
15+
* Reciprocal Rank Fusion (RRF) calculator for combining multiple search rankings.
16+
*
17+
* RRF is a method to combine results from multiple search algorithms by their ranks.
18+
* The formula is: score = Σ (weight_i / (k + rank_i))
19+
*
20+
* @see https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf
21+
*
22+
* @author Ahmed EBEN HASSINE <ahmedbhs123@gmail.com>
23+
*/
24+
final class ReciprocalRankFusion
25+
{
26+
/**
27+
* @param int $k RRF constant (default: 60). Higher values give more equal weighting between results.
28+
* @param bool $normalizeScores Whether to normalize scores to 0-100 range (default: true)
29+
*/
30+
public function __construct(
31+
private readonly int $k = 60,
32+
private readonly bool $normalizeScores = true,
33+
) {
34+
}
35+
36+
/**
37+
* Calculate RRF score for a single result with multiple rankings.
38+
*
39+
* @param array<string, array{rank: int, score: float, weight: float}> $rankings
40+
* Each entry contains: rank (1-based), score (normalized 0-1), weight (0-1)
41+
*
42+
* @return float The combined RRF score
43+
*/
44+
public function calculate(array $rankings): float
45+
{
46+
$score = 0.0;
47+
48+
foreach ($rankings as $ranking) {
49+
if (null === $ranking['rank']) {
50+
continue;
51+
}
52+
53+
$contribution = (1.0 / ($this->k + $ranking['rank'])) * $ranking['score'] * $ranking['weight'];
54+
$score += $contribution;
55+
}
56+
57+
if ($this->normalizeScores) {
58+
$score = $this->normalize($score);
59+
}
60+
61+
return $score;
62+
}
63+
64+
/**
65+
* Calculate individual contribution for a ranking.
66+
*
67+
* @param int $rank The rank (1-based position)
68+
* @param float $score The normalized score (0-1)
69+
* @param float $weight The weight for this ranking source (0-1)
70+
*/
71+
public function calculateContribution(int $rank, float $score, float $weight): float
72+
{
73+
$contribution = (1.0 / ($this->k + $rank)) * $score * $weight;
74+
75+
if ($this->normalizeScores) {
76+
$contribution = $this->normalize($contribution);
77+
}
78+
79+
return $contribution;
80+
}
81+
82+
/**
83+
* Normalize a score to 0-100 range.
84+
*
85+
* The theoretical maximum RRF score is 1/(k+1), so we normalize against that.
86+
*/
87+
public function normalize(float $score): float
88+
{
89+
$maxScore = 1.0 / ($this->k + 1);
90+
91+
return ($score / $maxScore) * 100;
92+
}
93+
94+
/**
95+
* Denormalize a score from 0-100 range back to raw RRF score.
96+
*/
97+
public function denormalize(float $normalizedScore): float
98+
{
99+
$maxScore = 1.0 / ($this->k + 1);
100+
101+
return ($normalizedScore / 100) * $maxScore;
102+
}
103+
104+
/**
105+
* Build SQL expression for RRF calculation.
106+
*
107+
* @param string $rankColumn The column containing the rank
108+
* @param string $scoreExpr SQL expression for the normalized score (0-1)
109+
* @param float $weight The weight for this ranking source
110+
* @param string $nullDefault Default value when rank is NULL (default: '0.0')
111+
*/
112+
public function buildSqlExpression(
113+
string $rankColumn,
114+
string $scoreExpr,
115+
float $weight,
116+
string $nullDefault = '0.0',
117+
): string {
118+
return \sprintf(
119+
'COALESCE(1.0 / (%d + %s) * %s * %f, %s)',
120+
$this->k,
121+
$rankColumn,
122+
$scoreExpr,
123+
$weight,
124+
$nullDefault,
125+
);
126+
}
127+
128+
/**
129+
* Build SQL expression for combining multiple RRF contributions.
130+
*
131+
* @param array<array{rank_column: string, score_expr: string, weight: float}> $sources
132+
*/
133+
public function buildCombinedSqlExpression(array $sources): string
134+
{
135+
$expressions = [];
136+
137+
foreach ($sources as $source) {
138+
$expressions[] = $this->buildSqlExpression(
139+
$source['rank_column'],
140+
$source['score_expr'],
141+
$source['weight'],
142+
);
143+
}
144+
145+
return '(' . implode(' + ', $expressions) . ')';
146+
}
147+
148+
public function getK(): int
149+
{
150+
return $this->k;
151+
}
152+
153+
public function isNormalized(): bool
154+
{
155+
return $this->normalizeScores;
156+
}
157+
}

0 commit comments

Comments
 (0)