Skip to content

Commit 3f2a9d4

Browse files
committed
Add prefix cache match scorer
1 parent 3b01d20 commit 3f2a9d4

File tree

3 files changed

+251
-0
lines changed

3 files changed

+251
-0
lines changed

pkg/epp/datalayer/plugins/data_types.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,19 @@
1+
/*
2+
Copyright 2025 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
117
package plugins
218

319
import (
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
/*
2+
Copyright 2025 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package scorer
18+
19+
import (
20+
"context"
21+
"encoding/json"
22+
23+
k8stypes "k8s.io/apimachinery/pkg/types"
24+
dplugins "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer/plugins"
25+
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/plugins"
26+
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework"
27+
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types"
28+
)
29+
30+
const (
31+
PrefixCacheMatchScorerType = "prefix-cache-match-scorer"
32+
)
33+
34+
type ServerID k8stypes.NamespacedName
35+
36+
// compile-time type assertion
37+
var _ framework.Scorer = &PrefixCacheScorer{}
38+
39+
// PrefixCacheScorerFactory defines the factory function for PrefixCacheScorer.
40+
func PrefixCacheScorerFactory(name string, _ json.RawMessage, _ plugins.Handle) (plugins.Plugin, error) {
41+
return NewPrefixCacheScorer().WithName(name), nil
42+
}
43+
44+
// NewPrefixCacheScorer initializes a new PrefixCacheScorer and returns its pointer.
45+
func NewPrefixCacheScorer() *PrefixCacheScorer {
46+
return &PrefixCacheScorer{
47+
tn: plugins.TypedName{Type: PrefixCacheMatchScorerType, Name: PrefixCacheMatchScorerType},
48+
}
49+
}
50+
51+
// PrefixCacheScorer scores list of candidate pods based on Lora affinity and availability.
52+
type PrefixCacheScorer struct {
53+
tn plugins.TypedName
54+
}
55+
56+
// TypedName returns the type and name tuple of this plugin instance.
57+
func (s *PrefixCacheScorer) TypedName() plugins.TypedName {
58+
return s.tn
59+
}
60+
61+
// Consumes returns the list of data that is consumed by the plugin.
62+
func (s *PrefixCacheScorer) Consumes() map[string]any {
63+
return map[string]any{}
64+
}
65+
66+
// WithName sets the name of the scorer.
67+
func (s *PrefixCacheScorer) WithName(name string) *PrefixCacheScorer {
68+
s.tn.Name = name
69+
return s
70+
}
71+
72+
func (s *PrefixCacheScorer) Score(_ context.Context, cycleState *types.CycleState, _ *types.LLMRequest, pods []types.Pod) map[types.Pod]float64 {
73+
// calculate the scores of pods
74+
scores := make(map[types.Pod]float64, len(pods))
75+
76+
for _, pod := range pods {
77+
matchPercent, ok := pod.Get(dplugins.PrefixCacheMatchPrecentKey)
78+
if !ok {
79+
scores[pod] = 0.0
80+
continue
81+
}
82+
scores[pod] = matchPercent.(*dplugins.PrefixCacheMatchPercent).MatchPercentage()
83+
}
84+
return scores
85+
}
Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
/*
2+
Copyright 2025 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package scorer
18+
19+
import (
20+
"context"
21+
"testing"
22+
23+
"github.com/stretchr/testify/assert"
24+
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend"
25+
backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics"
26+
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer"
27+
dplugins "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer/plugins"
28+
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types"
29+
)
30+
31+
// mockPod is a mock implementation of types.Pod for testing purposes.
32+
type mockPod struct {
33+
data map[string]datalayer.Cloneable
34+
}
35+
36+
func newMockPod() *mockPod {
37+
return &mockPod{
38+
data: make(map[string]datalayer.Cloneable),
39+
}
40+
}
41+
42+
func (p *mockPod) Get(key string) (datalayer.Cloneable, bool) {
43+
val, ok := p.data[key]
44+
return val, ok
45+
}
46+
47+
func (p *mockPod) Put(key string, value datalayer.Cloneable) {
48+
p.data[key] = value
49+
}
50+
51+
func (p *mockPod) GetPod() *backend.Pod {
52+
return nil
53+
}
54+
55+
func (p *mockPod) GetMetrics() *backendmetrics.MetricsState {
56+
return nil
57+
}
58+
59+
func (p *mockPod) String() string {
60+
return ""
61+
}
62+
63+
func (p *mockPod) Keys() []string {
64+
keys := make([]string, 0, len(p.data))
65+
for k := range p.data {
66+
keys = append(keys, k)
67+
}
68+
return keys
69+
}
70+
71+
func TestPrefixCacheScorer_Score(t *testing.T) {
72+
pod1 := newMockPod()
73+
pod1.Put(dplugins.PrefixCacheMatchPrecentKey, dplugins.NewPrefixCacheMatchPercent(50.0))
74+
75+
pod2 := newMockPod()
76+
pod2.Put(dplugins.PrefixCacheMatchPrecentKey, dplugins.NewPrefixCacheMatchPercent(100.0))
77+
78+
pod3 := newMockPod()
79+
80+
testCases := []struct {
81+
name string
82+
pods []types.Pod
83+
expected map[types.Pod]float64
84+
}{
85+
{
86+
name: "pods with prefix cache match percent",
87+
pods: []types.Pod{pod1, pod2},
88+
expected: map[types.Pod]float64{
89+
pod1: 50.0,
90+
pod2: 100.0,
91+
},
92+
},
93+
{
94+
name: "pod without prefix cache match percent",
95+
pods: []types.Pod{pod3},
96+
expected: map[types.Pod]float64{
97+
pod3: 0.0,
98+
},
99+
},
100+
{
101+
name: "mixed pods",
102+
pods: []types.Pod{pod1, pod3},
103+
expected: map[types.Pod]float64{
104+
pod1: 50.0,
105+
pod3: 0.0,
106+
},
107+
},
108+
{
109+
name: "empty pods list",
110+
pods: []types.Pod{},
111+
expected: map[types.Pod]float64{},
112+
},
113+
}
114+
115+
scorer := NewPrefixCacheScorer()
116+
117+
for _, tc := range testCases {
118+
t.Run(tc.name, func(t *testing.T) {
119+
scores := scorer.Score(context.Background(), nil, nil, tc.pods)
120+
assert.Equal(t, tc.expected, scores)
121+
})
122+
}
123+
}
124+
125+
func TestNewPrefixCacheScorer(t *testing.T) {
126+
scorer := NewPrefixCacheScorer()
127+
assert.NotNil(t, scorer)
128+
assert.Equal(t, PrefixCacheMatchScorerType, scorer.tn.Type)
129+
assert.Equal(t, PrefixCacheMatchScorerType, scorer.tn.Name)
130+
}
131+
132+
func TestPrefixCacheScorer_WithName(t *testing.T) {
133+
scorer := NewPrefixCacheScorer()
134+
customName := "custom-scorer"
135+
scorer.WithName(customName)
136+
assert.Equal(t, customName, scorer.TypedName().Name)
137+
}
138+
139+
func TestPrefixCacheScorer_TypedName(t *testing.T) {
140+
scorer := NewPrefixCacheScorer()
141+
tn := scorer.TypedName()
142+
assert.Equal(t, PrefixCacheMatchScorerType, tn.Type)
143+
assert.Equal(t, PrefixCacheMatchScorerType, tn.Name)
144+
}
145+
146+
func TestPrefixCacheScorer_Consumes(t *testing.T) {
147+
scorer := NewPrefixCacheScorer()
148+
consumes := scorer.Consumes()
149+
assert.Empty(t, consumes)
150+
}

0 commit comments

Comments
 (0)