Skip to content

Commit cbbfd0e

Browse files
authored
add metrics for model rewrite. (#1938)
1 parent 7aa9c35 commit cbbfd0e

File tree

7 files changed

+79
-12
lines changed

7 files changed

+79
-12
lines changed

pkg/epp/datastore/datastore.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ type Datastore interface {
6363
// InferenceModelRewrite operations
6464
ModelRewriteSet(infModelRewrite *v1alpha2.InferenceModelRewrite)
6565
ModelRewriteDelete(namespacedName types.NamespacedName)
66-
ModelRewriteGet(modelName string) *v1alpha2.InferenceModelRewriteRule
66+
ModelRewriteGet(modelName string) (*v1alpha2.InferenceModelRewriteRule, string)
6767
ModelRewriteGetAll() []*v1alpha2.InferenceModelRewrite
6868

6969
// PodList lists pods matching the given predicate.
@@ -225,7 +225,7 @@ func (ds *datastore) ModelRewriteDelete(namespacedName types.NamespacedName) {
225225
ds.modelRewrites.delete(namespacedName)
226226
}
227227

228-
func (ds *datastore) ModelRewriteGet(modelName string) *v1alpha2.InferenceModelRewriteRule {
228+
func (ds *datastore) ModelRewriteGet(modelName string) (*v1alpha2.InferenceModelRewriteRule, string) {
229229
ds.mu.RLock()
230230
defer ds.mu.RUnlock()
231231
return ds.modelRewrites.getRule(modelName)

pkg/epp/datastore/modelrewritestore.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -120,17 +120,18 @@ func (ms *modelRewriteStore) deleteInternal(n string) {
120120

121121
// getRule returns the single, highest-precedence rule for a given model name.
122122
// It prioritizes exact matches over generic ones, and among those, the oldest rule wins.
123-
func (ms *modelRewriteStore) getRule(modelName string) *v1alpha2.InferenceModelRewriteRule {
123+
// It also returns the name of the InferenceModelRewrite resource that provided the rule.
124+
func (ms *modelRewriteStore) getRule(modelName string) (*v1alpha2.InferenceModelRewriteRule, string) {
124125
// Exact matches have the highest precedence.
125126
if rulesWithMd, ok := ms.rulesByExactModelMatch[modelName]; ok && len(rulesWithMd) > 0 {
126-
return &rulesWithMd[0].rule // The list is pre-sorted, so the first element is the oldest.
127+
return &rulesWithMd[0].rule, rulesWithMd[0].parentName() // The list is pre-sorted, so the first element is the oldest.
127128
}
128129

129130
// If no exact match, fall back to the oldest generic rule.
130131
if len(ms.genericRules) > 0 {
131-
return &ms.genericRules[0].rule // The list is pre-sorted.
132+
return &ms.genericRules[0].rule, ms.genericRules[0].parentName() // The list is pre-sorted.
132133
}
133-
return nil
134+
return nil, ""
134135
}
135136

136137
// getAll returns a slice of all InferenceModelRewrite objects currently in the store.

pkg/epp/datastore/modelrewritestore_test.go

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,55 +73,63 @@ func TestModelRewriteStore(t *testing.T) {
7373
op func(store *modelRewriteStore)
7474
modelToGet string
7575
wantRule *v1alpha2.InferenceModelRewriteRule
76+
wantName string
7677
wantGetAll []*v1alpha2.InferenceModelRewrite
7778
}{
7879
{
7980
name: "Simple exact match",
8081
initialState: []*v1alpha2.InferenceModelRewrite{rewriteOld},
8182
modelToGet: "model1",
8283
wantRule: &ruleModel1V1,
84+
wantName: rewriteOld.Name,
8385
wantGetAll: []*v1alpha2.InferenceModelRewrite{rewriteOld},
8486
},
8587
{
8688
name: "Simple generic match",
8789
initialState: []*v1alpha2.InferenceModelRewrite{rewriteGenericOld},
8890
modelToGet: "model2", // A different model to test generic fallback
8991
wantRule: &ruleGeneric,
92+
wantName: rewriteGenericOld.Name,
9093
wantGetAll: []*v1alpha2.InferenceModelRewrite{rewriteGenericOld},
9194
},
9295
{
9396
name: "No match",
9497
initialState: []*v1alpha2.InferenceModelRewrite{rewriteOld},
9598
modelToGet: "model2",
9699
wantRule: nil,
100+
wantName: "",
97101
wantGetAll: []*v1alpha2.InferenceModelRewrite{rewriteOld},
98102
},
99103
{
100104
name: "Precedence: Exact match wins over generic",
101105
initialState: []*v1alpha2.InferenceModelRewrite{rewriteOld, rewriteGenericOld},
102106
modelToGet: "model1",
103107
wantRule: &ruleModel1V1,
108+
wantName: rewriteOld.Name,
104109
wantGetAll: []*v1alpha2.InferenceModelRewrite{rewriteOld, rewriteGenericOld},
105110
},
106111
{
107112
name: "Precedence: Fallback to generic when no exact match",
108113
initialState: []*v1alpha2.InferenceModelRewrite{rewriteOld, rewriteGenericOld},
109114
modelToGet: "model2",
110115
wantRule: &ruleGeneric,
116+
wantName: rewriteGenericOld.Name,
111117
wantGetAll: []*v1alpha2.InferenceModelRewrite{rewriteOld, rewriteGenericOld},
112118
},
113119
{
114120
name: "Precedence: Oldest exact match wins",
115121
initialState: []*v1alpha2.InferenceModelRewrite{rewriteNew, rewriteOld},
116122
modelToGet: "model1",
117123
wantRule: &ruleModel1V1,
124+
wantName: rewriteOld.Name,
118125
wantGetAll: []*v1alpha2.InferenceModelRewrite{rewriteNew, rewriteOld},
119126
},
120127
{
121128
name: "Precedence: Oldest generic match wins",
122129
initialState: []*v1alpha2.InferenceModelRewrite{rewriteGenericNew, rewriteGenericOld},
123130
modelToGet: "any-model",
124131
wantRule: &ruleGeneric,
132+
wantName: rewriteGenericOld.Name,
125133
wantGetAll: []*v1alpha2.InferenceModelRewrite{rewriteGenericNew, rewriteGenericOld},
126134
},
127135
{
@@ -132,6 +140,7 @@ func TestModelRewriteStore(t *testing.T) {
132140
},
133141
modelToGet: "model1",
134142
wantRule: &ruleGeneric, // Falls back to generic
143+
wantName: rewriteGenericOld.Name,
135144
wantGetAll: []*v1alpha2.InferenceModelRewrite{rewriteGenericOld},
136145
},
137146
{
@@ -142,6 +151,7 @@ func TestModelRewriteStore(t *testing.T) {
142151
},
143152
modelToGet: "model1",
144153
wantRule: &ruleModel1V1,
154+
wantName: rewriteOld.Name,
145155
wantGetAll: []*v1alpha2.InferenceModelRewrite{rewriteOld},
146156
},
147157
{
@@ -152,6 +162,7 @@ func TestModelRewriteStore(t *testing.T) {
152162
},
153163
modelToGet: "model1",
154164
wantRule: &ruleModel1V2,
165+
wantName: rewriteUpdated.Name,
155166
wantGetAll: []*v1alpha2.InferenceModelRewrite{rewriteUpdated},
156167
},
157168
}
@@ -167,10 +178,13 @@ func TestModelRewriteStore(t *testing.T) {
167178
tc.op(store)
168179
}
169180

170-
gotRule := store.getRule(tc.modelToGet)
181+
gotRule, gotName := store.getRule(tc.modelToGet)
171182
if diff := cmp.Diff(tc.wantRule, gotRule); diff != "" {
172183
t.Errorf("GetRule() mismatch (-want +got):\n%s", diff)
173184
}
185+
if gotName != tc.wantName {
186+
t.Errorf("GetRule() returned incorrect name: got %s, want %s", gotName, tc.wantName)
187+
}
174188

175189
if tc.wantGetAll != nil {
176190
gotAll := store.getAll()

pkg/epp/metrics/metrics.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,16 @@ var (
373373
},
374374
[]string{"fairness_id", "priority"},
375375
)
376+
377+
// Inference Model Rewrite Metrics
378+
inferenceModelRewriteDecisionsTotal = prometheus.NewCounterVec(
379+
prometheus.CounterOpts{
380+
Subsystem: InferenceExtension,
381+
Name: "model_rewrite_decisions_total",
382+
Help: metricsutil.HelpMsgWithStability("Total number of inference model rewrite decisions.", compbasemetrics.ALPHA),
383+
},
384+
[]string{"model_rewrite_name", "model_name", "target_model"},
385+
)
376386
)
377387

378388
var registerMetrics sync.Once
@@ -416,6 +426,7 @@ func Register(customCollectors ...prometheus.Collector) {
416426
metrics.Registry.MustRegister(PrefixCacheHitLength)
417427
metrics.Registry.MustRegister(flowControlRequestQueueDuration)
418428
metrics.Registry.MustRegister(flowControlQueueSize)
429+
metrics.Registry.MustRegister(inferenceModelRewriteDecisionsTotal)
419430
for _, collector := range customCollectors {
420431
metrics.Registry.MustRegister(collector)
421432
}
@@ -460,6 +471,7 @@ func Reset() {
460471
PrefixCacheHitLength.Reset()
461472
flowControlRequestQueueDuration.Reset()
462473
flowControlQueueSize.Reset()
474+
inferenceModelRewriteDecisionsTotal.Reset()
463475
}
464476

465477
// RecordRequstCounter records the number of requests.
@@ -737,3 +749,8 @@ func SetTTFTSLOThreshold(modelName, targetModelName string, threshold float64) {
737749
func SetTPOTSLOThreshold(modelName, targetModelName string, threshold float64) {
738750
inferenceGauges.With(prometheus.Labels{"model_name": modelName, "target_model_name": targetModelName, "type": "tpot_slo_threshold"}).Set(threshold)
739751
}
752+
753+
// RecordInferenceModelRewriteDecision records the routing decision for InferenceModelRewrite.
754+
func RecordInferenceModelRewriteDecision(modelRewriteName, modelName, targetModel string) {
755+
inferenceModelRewriteDecisionsTotal.WithLabelValues(modelRewriteName, modelName, targetModel).Inc()
756+
}

pkg/epp/metrics/metrics_test.go

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -887,3 +887,36 @@ func TestFlowControlQueueSizeMetric(t *testing.T) {
887887
require.NoError(t, err, "Failed to get gauge value for non-existent user-c/100")
888888
require.Equal(t, 0.0, val, "Gauge value for non-existent labels should be 0")
889889
}
890+
891+
func TestInferenceModelRewriteDecisionsTotalMetric(t *testing.T) {
892+
Reset()
893+
894+
RecordInferenceModelRewriteDecision("rewrite-rule-1", "model-a", "model-b")
895+
RecordInferenceModelRewriteDecision("rewrite-rule-1", "model-a", "model-b")
896+
RecordInferenceModelRewriteDecision("rewrite-rule-2", "model-c", "model-d")
897+
898+
testCases := []struct {
899+
name string
900+
labels prometheus.Labels
901+
expectCount float64
902+
}{
903+
{
904+
name: "rewrite-rule-1, model-a -> model-b",
905+
labels: prometheus.Labels{"model_rewrite_name": "rewrite-rule-1", "model_name": "model-a", "target_model": "model-b"},
906+
expectCount: 2,
907+
},
908+
{
909+
name: "rewrite-rule-2, model-c -> model-d",
910+
labels: prometheus.Labels{"model_rewrite_name": "rewrite-rule-2", "model_name": "model-c", "target_model": "model-d"},
911+
expectCount: 1,
912+
},
913+
}
914+
915+
for _, tc := range testCases {
916+
t.Run(tc.name, func(t *testing.T) {
917+
val, err := testutil.GetCounterMetricValue(inferenceModelRewriteDecisionsTotal.With(tc.labels))
918+
require.NoError(t, err, "Failed to get counter value for labels %v", tc.labels)
919+
require.Equal(t, tc.expectCount, val, "Counter value mismatch for labels %v", tc.labels)
920+
})
921+
}
922+
}

pkg/epp/requestcontrol/director.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,8 @@ type Datastore interface {
5151
PoolGet() (*datalayer.EndpointPool, error)
5252
ObjectiveGet(objectiveName string) *v1alpha2.InferenceObjective
5353
PodList(predicate func(backendmetrics.PodMetrics) bool) []backendmetrics.PodMetrics
54-
ModelRewriteGet(modelName string) *v1alpha2.InferenceModelRewriteRule
54+
// ModelRewriteGet returns the rewrite rule for a given model name and the name of the InferenceModelRewrite object.
55+
ModelRewriteGet(modelName string) (*v1alpha2.InferenceModelRewriteRule, string)
5556
}
5657

5758
// Scheduler defines the interface required by the Director for scheduling.
@@ -195,11 +196,12 @@ func (d *Director) HandleRequest(ctx context.Context, reqCtx *handlers.RequestCo
195196
}
196197

197198
func (d *Director) applyWeightedModelRewrite(reqCtx *handlers.RequestContext) {
198-
rewriteRule := d.datastore.ModelRewriteGet(reqCtx.IncomingModelName)
199+
rewriteRule, modelRewriteName := d.datastore.ModelRewriteGet(reqCtx.IncomingModelName)
199200
if rewriteRule == nil {
200201
return
201202
}
202203
reqCtx.TargetModelName = d.selectWeightedModel(rewriteRule.Targets)
204+
metrics.RecordInferenceModelRewriteDecision(modelRewriteName, reqCtx.IncomingModelName, reqCtx.TargetModelName)
203205
}
204206

205207
func (d *Director) selectWeightedModel(models []v1alpha2.TargetModel) string {

pkg/epp/requestcontrol/director_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ func (m mockProducedDataType) Clone() datalayer.Cloneable {
169169
return mockProducedDataType{value: m.value}
170170
}
171171

172-
func (ds *mockDatastore) ModelRewriteGet(modelName string) *v1alpha2.InferenceModelRewriteRule {
172+
func (ds *mockDatastore) ModelRewriteGet(modelName string) (*v1alpha2.InferenceModelRewriteRule, string) {
173173
// This mock implementation simulates the precedence logic for simplicity.
174174
// It finds the oldest rewrite that has a rule matching the modelName.
175175
var matchingRewrites []*v1alpha2.InferenceModelRewrite
@@ -185,7 +185,7 @@ func (ds *mockDatastore) ModelRewriteGet(modelName string) *v1alpha2.InferenceMo
185185
}
186186

187187
if len(matchingRewrites) == 0 {
188-
return nil
188+
return nil, ""
189189
}
190190

191191
// Sort by timestamp to find the oldest.
@@ -194,7 +194,7 @@ func (ds *mockDatastore) ModelRewriteGet(modelName string) *v1alpha2.InferenceMo
194194
})
195195

196196
// Return the first rule from the oldest rewrite.
197-
return &matchingRewrites[0].Spec.Rules[0]
197+
return &matchingRewrites[0].Spec.Rules[0], matchingRewrites[0].Name
198198
}
199199

200200
func TestDirector_HandleRequest(t *testing.T) {

0 commit comments

Comments
 (0)