diff --git a/cmd/epp/runner/runner.go b/cmd/epp/runner/runner.go index ad0760886..beef52f60 100644 --- a/cmd/epp/runner/runner.go +++ b/cmd/epp/runner/runner.go @@ -68,12 +68,17 @@ import ( testresponsereceived "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/requestcontrol/plugins/test/responsereceived" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/saturationdetector" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling" - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/multi/prefix" - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/multi/slo_aware_router" - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/picker" - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/profile" - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/scorer" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/kvcacheutilizationscorer" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/loraaffinityscorer" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/maxscorepicker" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/predictedlatencyprofilehandler" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/predictedlatencyscorer" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/prefixcachescorer" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/queuescorer" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/randompicker" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/singleprofilehandler" testfilter "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/test/filter" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/weightedrandompicker" runserver "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/server" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/env" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" @@ -423,17 +428,17 @@ func setupDatastore(setupLog logr.Logger, ctx context.Context, epFactory datalay // registerInTreePlugins registers the factory functions of all known plugins func (r *Runner) registerInTreePlugins() { - plugins.Register(prefix.PrefixCachePluginType, prefix.PrefixCachePluginFactory) - plugins.Register(picker.MaxScorePickerType, picker.MaxScorePickerFactory) - plugins.Register(picker.RandomPickerType, picker.RandomPickerFactory) - plugins.Register(picker.WeightedRandomPickerType, picker.WeightedRandomPickerFactory) - plugins.Register(profile.SingleProfileHandlerType, profile.SingleProfileHandlerFactory) - plugins.Register(scorer.KvCacheUtilizationScorerType, scorer.KvCacheUtilizationScorerFactory) - plugins.Register(scorer.QueueScorerType, scorer.QueueScorerFactory) - plugins.Register(scorer.LoraAffinityScorerType, scorer.LoraAffinityScorerFactory) + plugins.Register(prefixcachescorer.PrefixCachePluginType, prefixcachescorer.PrefixCachePluginFactory) + plugins.Register(maxscorepicker.MaxScorePickerType, maxscorepicker.MaxScorePickerFactory) + plugins.Register(randompicker.RandomPickerType, randompicker.RandomPickerFactory) + plugins.Register(weightedrandompicker.WeightedRandomPickerType, weightedrandompicker.WeightedRandomPickerFactory) + plugins.Register(singleprofilehandler.SingleProfileHandlerType, singleprofilehandler.SingleProfileHandlerFactory) + plugins.Register(kvcacheutilizationscorer.KvCacheUtilizationScorerType, kvcacheutilizationscorer.KvCacheUtilizationScorerFactory) + plugins.Register(queuescorer.QueueScorerType, queuescorer.QueueScorerFactory) + plugins.Register(loraaffinityscorer.LoraAffinityScorerType, loraaffinityscorer.LoraAffinityScorerFactory) // Latency predictor plugins - plugins.Register(slo_aware_router.SLOAwareRouterPluginType, slo_aware_router.SLOAwareRouterFactory) - plugins.Register(profile.SLOAwareProfileHandlerType, profile.SLOAwareProfileHandlerFactory) + plugins.Register(predictedlatencyscorer.SLOAwareRouterPluginType, predictedlatencyscorer.SLOAwareRouterFactory) + plugins.Register(predictedlatencyprofilehandler.SLOAwareProfileHandlerType, predictedlatencyprofilehandler.SLOAwareProfileHandlerFactory) // register filter for test purpose only (used in conformance tests) plugins.Register(testfilter.HeaderBasedTestingFilterType, testfilter.HeaderBasedTestingFilterFactory) // register response received plugin for test purpose only (used in conformance tests) diff --git a/hack/boilerplate/boilerplate.py b/hack/boilerplate/boilerplate.py index 013fb427e..7f42c2799 100755 --- a/hack/boilerplate/boilerplate.py +++ b/hack/boilerplate/boilerplate.py @@ -167,7 +167,7 @@ def file_extension(filename): '_output', 'pkg/epp/backend/metrics/pod_metrics_test.go', 'pkg/epp/saturationdetector/config.go', - 'pkg/epp/scheduling/framework/plugins/multi/prefix/indexer_test.go', + 'pkg/epp/scheduling/framework/plugins/prefixcachescorer/indexer_test.go', 'pkg/epp/util/env/env_test.go', 'staging/src/k8s.io/kubectl/pkg/generated/bindata.go', 'test/e2e/generated/bindata.go', diff --git a/pkg/epp/config/loader/configloader.go b/pkg/epp/config/loader/configloader.go index 56f05dcfc..c46c4b0c6 100644 --- a/pkg/epp/config/loader/configloader.go +++ b/pkg/epp/config/loader/configloader.go @@ -32,7 +32,7 @@ import ( "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/saturationdetector" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework" - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/profile" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/singleprofilehandler" ) var scheme = runtime.NewScheme() @@ -130,7 +130,7 @@ func loadSchedulerConfig(configProfiles []configapi.SchedulingProfile, handle pl return nil, errors.New("no profile handler was specified") } - if profileHandler.TypedName().Type == profile.SingleProfileHandlerType && len(profiles) > 1 { + if profileHandler.TypedName().Type == singleprofilehandler.SingleProfileHandlerType && len(profiles) > 1 { return nil, errors.New("single profile handler is intended to be used with a single profile, but multiple profiles were specified") } diff --git a/pkg/epp/config/loader/configloader_test.go b/pkg/epp/config/loader/configloader_test.go index a23f22316..0629e18e8 100644 --- a/pkg/epp/config/loader/configloader_test.go +++ b/pkg/epp/config/loader/configloader_test.go @@ -33,9 +33,11 @@ import ( "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/plugins" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/saturationdetector" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework" - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/multi/prefix" - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/picker" - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/profile" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/maxscorepicker" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/prefixcachescorer" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/randompicker" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/singleprofilehandler" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/weightedrandompicker" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" "sigs.k8s.io/gateway-api-inference-extension/test/utils" @@ -226,12 +228,12 @@ func TestLoadRawConfigurationWithDefaults(t *testing.T) { Parameters: json.RawMessage("{\"threshold\":10}"), }, { - Name: profile.SingleProfileHandlerType, - Type: profile.SingleProfileHandlerType, + Name: singleprofilehandler.SingleProfileHandlerType, + Type: singleprofilehandler.SingleProfileHandlerType, }, { - Name: picker.MaxScorePickerType, - Type: picker.MaxScorePickerType, + Name: maxscorepicker.MaxScorePickerType, + Type: maxscorepicker.MaxScorePickerType, }, }, SchedulingProfiles: []configapi.SchedulingProfile{ @@ -492,11 +494,11 @@ func registerNeededFeatureGates() { } func registerNeededPlgugins() { - plugins.Register(prefix.PrefixCachePluginType, prefix.PrefixCachePluginFactory) - plugins.Register(picker.MaxScorePickerType, picker.MaxScorePickerFactory) - plugins.Register(picker.RandomPickerType, picker.RandomPickerFactory) - plugins.Register(picker.WeightedRandomPickerType, picker.WeightedRandomPickerFactory) - plugins.Register(profile.SingleProfileHandlerType, profile.SingleProfileHandlerFactory) + plugins.Register(prefixcachescorer.PrefixCachePluginType, prefixcachescorer.PrefixCachePluginFactory) + plugins.Register(maxscorepicker.MaxScorePickerType, maxscorepicker.MaxScorePickerFactory) + plugins.Register(randompicker.RandomPickerType, randompicker.RandomPickerFactory) + plugins.Register(weightedrandompicker.WeightedRandomPickerType, weightedrandompicker.WeightedRandomPickerFactory) + plugins.Register(singleprofilehandler.SingleProfileHandlerType, singleprofilehandler.SingleProfileHandlerFactory) } func TestNewDetector(t *testing.T) { diff --git a/pkg/epp/config/loader/defaults.go b/pkg/epp/config/loader/defaults.go index 17075dd05..a2c83a33b 100644 --- a/pkg/epp/config/loader/defaults.go +++ b/pkg/epp/config/loader/defaults.go @@ -22,8 +22,9 @@ import ( "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/plugins" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/saturationdetector" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework" - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/picker" - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/profile" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/maxscorepicker" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/pickershared" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/singleprofilehandler" ) const ( @@ -115,10 +116,10 @@ func setDefaultsPhaseTwo(cfg *configapi.EndpointPickerConfig, handle plugins.Han } } if !profileHandlerFound { - handle.AddPlugin(profile.SingleProfileHandlerType, profile.NewSingleProfileHandler()) + handle.AddPlugin(singleprofilehandler.SingleProfileHandlerType, singleprofilehandler.NewSingleProfileHandler()) cfg.Plugins = append(cfg.Plugins, - configapi.PluginSpec{Name: profile.SingleProfileHandlerType, - Type: profile.SingleProfileHandlerType, + configapi.PluginSpec{Name: singleprofilehandler.SingleProfileHandlerType, + Type: singleprofilehandler.SingleProfileHandlerType, }) } } @@ -131,8 +132,8 @@ func setDefaultsPhaseTwo(cfg *configapi.EndpointPickerConfig, handle plugins.Han } } if maxScorePicker == "" { - handle.AddPlugin(picker.MaxScorePickerType, picker.NewMaxScorePicker(picker.DefaultMaxNumOfEndpoints)) - maxScorePicker = picker.MaxScorePickerType + handle.AddPlugin(maxscorepicker.MaxScorePickerType, maxscorepicker.NewMaxScorePicker(pickershared.DefaultMaxNumOfEndpoints)) + maxScorePicker = maxscorepicker.MaxScorePickerType cfg.Plugins = append(cfg.Plugins, configapi.PluginSpec{Name: maxScorePicker, Type: maxScorePicker}) } diff --git a/pkg/epp/scheduling/framework/plugins/README.md b/pkg/epp/scheduling/framework/plugins/README.md index 56ca315e6..4491bcd11 100644 --- a/pkg/epp/scheduling/framework/plugins/README.md +++ b/pkg/epp/scheduling/framework/plugins/README.md @@ -2,14 +2,11 @@ This package contains the scheduling plugin implementations. -Plugins are organized by the following rule. Follow this rule when adding a new -plugin. +Plugins are grouped by plugin name to make it easy to find a specific feature. +Each plugin lives in its own directory (for example `prefixcachescorer/`, +`predictedlatencyscorer/`, `maxscorepicker/`). Shared helpers that are used by +multiple plugins live in supporting packages such as `pickershared/`, and test +helpers stay under `test/`. -``` -plugins/ -|__ filter/(Plugins that implement the Filter interface only.) -|__ scorer/ (Plugins that implement the Scorer interface only.) -|__ picker/(Plugins that implement the Picker interface only.) -|__ multi/ (Plugins that implement multiple plugin interfaces.) -|____prefix/ (Prefix cache aware scheduling plugin.) -``` +When adding a new plugin, create a new directory named after the plugin and keep +all of its code and tests inside that directory. diff --git a/pkg/epp/scheduling/framework/plugins/scorer/kvcache_utilization.go b/pkg/epp/scheduling/framework/plugins/kvcacheutilizationscorer/kvcache_utilization.go similarity index 98% rename from pkg/epp/scheduling/framework/plugins/scorer/kvcache_utilization.go rename to pkg/epp/scheduling/framework/plugins/kvcacheutilizationscorer/kvcache_utilization.go index d78bcc9ed..f2122f30b 100644 --- a/pkg/epp/scheduling/framework/plugins/scorer/kvcache_utilization.go +++ b/pkg/epp/scheduling/framework/plugins/kvcacheutilizationscorer/kvcache_utilization.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package scorer +package kvcacheutilizationscorer import ( "context" diff --git a/pkg/epp/scheduling/framework/plugins/scorer/kvcache_utilization_test.go b/pkg/epp/scheduling/framework/plugins/kvcacheutilizationscorer/kvcache_utilization_test.go similarity index 99% rename from pkg/epp/scheduling/framework/plugins/scorer/kvcache_utilization_test.go rename to pkg/epp/scheduling/framework/plugins/kvcacheutilizationscorer/kvcache_utilization_test.go index 76aaeee31..d2e956440 100644 --- a/pkg/epp/scheduling/framework/plugins/scorer/kvcache_utilization_test.go +++ b/pkg/epp/scheduling/framework/plugins/kvcacheutilizationscorer/kvcache_utilization_test.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package scorer +package kvcacheutilizationscorer import ( "context" diff --git a/pkg/epp/scheduling/framework/plugins/scorer/lora_affinity.go b/pkg/epp/scheduling/framework/plugins/loraaffinityscorer/lora_affinity.go similarity index 99% rename from pkg/epp/scheduling/framework/plugins/scorer/lora_affinity.go rename to pkg/epp/scheduling/framework/plugins/loraaffinityscorer/lora_affinity.go index 80524798d..d629c9c90 100644 --- a/pkg/epp/scheduling/framework/plugins/scorer/lora_affinity.go +++ b/pkg/epp/scheduling/framework/plugins/loraaffinityscorer/lora_affinity.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package scorer +package loraaffinityscorer import ( "context" diff --git a/pkg/epp/scheduling/framework/plugins/scorer/lora_affinity_test.go b/pkg/epp/scheduling/framework/plugins/loraaffinityscorer/lora_affinity_test.go similarity index 99% rename from pkg/epp/scheduling/framework/plugins/scorer/lora_affinity_test.go rename to pkg/epp/scheduling/framework/plugins/loraaffinityscorer/lora_affinity_test.go index 418bafde0..2efe031a8 100644 --- a/pkg/epp/scheduling/framework/plugins/scorer/lora_affinity_test.go +++ b/pkg/epp/scheduling/framework/plugins/loraaffinityscorer/lora_affinity_test.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package scorer +package loraaffinityscorer import ( "context" diff --git a/pkg/epp/scheduling/framework/plugins/picker/max_score_picker.go b/pkg/epp/scheduling/framework/plugins/maxscorepicker/max_score_picker.go similarity index 90% rename from pkg/epp/scheduling/framework/plugins/picker/max_score_picker.go rename to pkg/epp/scheduling/framework/plugins/maxscorepicker/max_score_picker.go index 33e99bd06..6524ad357 100644 --- a/pkg/epp/scheduling/framework/plugins/picker/max_score_picker.go +++ b/pkg/epp/scheduling/framework/plugins/maxscorepicker/max_score_picker.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package picker +package maxscorepicker import ( "context" @@ -26,6 +26,7 @@ import ( "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/plugins" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/pickershared" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types" logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" ) @@ -39,7 +40,7 @@ var _ framework.Picker = &MaxScorePicker{} // MaxScorePickerFactory defines the factory function for MaxScorePicker. func MaxScorePickerFactory(name string, rawParameters json.RawMessage, _ plugins.Handle) (plugins.Plugin, error) { - parameters := pickerParameters{MaxNumOfEndpoints: DefaultMaxNumOfEndpoints} + parameters := pickershared.Parameters{MaxNumOfEndpoints: pickershared.DefaultMaxNumOfEndpoints} if rawParameters != nil { if err := json.Unmarshal(rawParameters, ¶meters); err != nil { return nil, fmt.Errorf("failed to parse the parameters of the '%s' picker - %w", MaxScorePickerType, err) @@ -51,9 +52,7 @@ func MaxScorePickerFactory(name string, rawParameters json.RawMessage, _ plugins // NewMaxScorePicker initializes a new MaxScorePicker and returns its pointer. func NewMaxScorePicker(maxNumOfEndpoints int) *MaxScorePicker { - if maxNumOfEndpoints <= 0 { - maxNumOfEndpoints = DefaultMaxNumOfEndpoints // on invalid configuration value, fallback to default value - } + maxNumOfEndpoints = pickershared.NormalizeMaxNumOfEndpoints(maxNumOfEndpoints) return &MaxScorePicker{ typedName: plugins.TypedName{Type: MaxScorePickerType, Name: MaxScorePickerType}, @@ -84,7 +83,7 @@ func (p *MaxScorePicker) Pick(ctx context.Context, cycleState *types.CycleState, "num-of-candidates", len(scoredPods), "scored-pods", scoredPods) // Shuffle in-place - needed for random tie break when scores are equal - shuffleScoredPods(scoredPods) + pickershared.ShuffleScoredPods(scoredPods) slices.SortStableFunc(scoredPods, func(i, j *types.ScoredPod) int { // highest score first if i.Score > j.Score { diff --git a/pkg/epp/scheduling/framework/plugins/maxscorepicker/max_score_picker_test.go b/pkg/epp/scheduling/framework/plugins/maxscorepicker/max_score_picker_test.go new file mode 100644 index 000000000..fadfba2c5 --- /dev/null +++ b/pkg/epp/scheduling/framework/plugins/maxscorepicker/max_score_picker_test.go @@ -0,0 +1,137 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package maxscorepicker + +import ( + "context" + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + k8stypes "k8s.io/apimachinery/pkg/types" + + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types" +) + +func TestPickMaxScorePicker(t *testing.T) { + pod1 := &types.PodMetrics{Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod1"}}} + pod2 := &types.PodMetrics{Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod2"}}} + pod3 := &types.PodMetrics{Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod3"}}} + + tests := []struct { + name string + picker framework.Picker + input []*types.ScoredPod + output []types.Pod + tieBreakCandidates int // tie break is random, specify how many candidate with max score + }{ + { + name: "Single max score", + picker: NewMaxScorePicker(1), + input: []*types.ScoredPod{ + {Pod: pod1, Score: 10}, + {Pod: pod2, Score: 25}, + {Pod: pod3, Score: 15}, + }, + output: []types.Pod{ + &types.ScoredPod{Pod: pod2, Score: 25}, + }, + }, + { + name: "Multiple max scores, all are equally scored", + picker: NewMaxScorePicker(2), + input: []*types.ScoredPod{ + {Pod: pod1, Score: 50}, + {Pod: pod2, Score: 50}, + {Pod: pod3, Score: 30}, + }, + output: []types.Pod{ + &types.ScoredPod{Pod: pod1, Score: 50}, + &types.ScoredPod{Pod: pod2, Score: 50}, + }, + tieBreakCandidates: 2, + }, + { + name: "Multiple results sorted by highest score, more pods than needed", + picker: NewMaxScorePicker(2), + input: []*types.ScoredPod{ + {Pod: pod1, Score: 20}, + {Pod: pod2, Score: 25}, + {Pod: pod3, Score: 30}, + }, + output: []types.Pod{ + &types.ScoredPod{Pod: pod3, Score: 30}, + &types.ScoredPod{Pod: pod2, Score: 25}, + }, + }, + { + name: "Multiple results sorted by highest score, less pods than needed", + picker: NewMaxScorePicker(4), // picker is required to return 4 pods at most, but we have only 3. + input: []*types.ScoredPod{ + {Pod: pod1, Score: 20}, + {Pod: pod2, Score: 25}, + {Pod: pod3, Score: 30}, + }, + output: []types.Pod{ + &types.ScoredPod{Pod: pod3, Score: 30}, + &types.ScoredPod{Pod: pod2, Score: 25}, + &types.ScoredPod{Pod: pod1, Score: 20}, + }, + }, + { + name: "Multiple results sorted by highest score, num of pods exactly needed", + picker: NewMaxScorePicker(3), // picker is required to return 3 pods at most, we have only 3. + input: []*types.ScoredPod{ + {Pod: pod1, Score: 30}, + {Pod: pod2, Score: 25}, + {Pod: pod3, Score: 30}, + }, + output: []types.Pod{ + &types.ScoredPod{Pod: pod1, Score: 30}, + &types.ScoredPod{Pod: pod3, Score: 30}, + &types.ScoredPod{Pod: pod2, Score: 25}, + }, + tieBreakCandidates: 2, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + result := test.picker.Pick(context.Background(), types.NewCycleState(), test.input) + got := result.TargetPods + + if test.tieBreakCandidates > 0 { + testMaxScoredPods := test.output[:test.tieBreakCandidates] + gotMaxScoredPods := got[:test.tieBreakCandidates] + diff := cmp.Diff(testMaxScoredPods, gotMaxScoredPods, cmpopts.SortSlices(func(a, b types.Pod) bool { + return a.String() < b.String() // predictable order within the pods with equal scores + })) + if diff != "" { + t.Errorf("Unexpected output (-want +got): %v", diff) + } + test.output = test.output[test.tieBreakCandidates:] + got = got[test.tieBreakCandidates:] + } + + if diff := cmp.Diff(test.output, got); diff != "" { + t.Errorf("Unexpected output (-want +got): %v", diff) + } + }) + } +} diff --git a/pkg/epp/scheduling/framework/plugins/picker/common.go b/pkg/epp/scheduling/framework/plugins/pickershared/common.go similarity index 66% rename from pkg/epp/scheduling/framework/plugins/picker/common.go rename to pkg/epp/scheduling/framework/plugins/pickershared/common.go index c8655840f..ffa9c0f81 100644 --- a/pkg/epp/scheduling/framework/plugins/picker/common.go +++ b/pkg/epp/scheduling/framework/plugins/pickershared/common.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package picker +package pickershared import ( "math/rand/v2" @@ -23,16 +23,23 @@ import ( "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types" ) -const ( - DefaultMaxNumOfEndpoints = 1 // common default to all pickers -) +const DefaultMaxNumOfEndpoints = 1 // common default to all pickers -// pickerParameters defines the common parameters for all pickers -type pickerParameters struct { +// Parameters defines the common parameters for all pickers. +type Parameters struct { MaxNumOfEndpoints int `json:"maxNumOfEndpoints"` } -func shuffleScoredPods(scoredPods []*types.ScoredPod) { +// NormalizeMaxNumOfEndpoints applies the shared default when the provided value is invalid. +func NormalizeMaxNumOfEndpoints(maxNumOfEndpoints int) int { + if maxNumOfEndpoints <= 0 { + return DefaultMaxNumOfEndpoints + } + return maxNumOfEndpoints +} + +// ShuffleScoredPods randomizes the order of scored pods in place. +func ShuffleScoredPods(scoredPods []*types.ScoredPod) { // Rand package is not safe for concurrent use, so we create a new instance. // Source: https://pkg.go.dev/math/rand/v2#pkg-overview randomGenerator := rand.New(rand.NewPCG(uint64(time.Now().UnixNano()), 0)) diff --git a/pkg/epp/scheduling/framework/plugins/profile/slo_aware_profile_handler.go b/pkg/epp/scheduling/framework/plugins/predictedlatencyprofilehandler/slo_aware_profile_handler.go similarity index 99% rename from pkg/epp/scheduling/framework/plugins/profile/slo_aware_profile_handler.go rename to pkg/epp/scheduling/framework/plugins/predictedlatencyprofilehandler/slo_aware_profile_handler.go index f66fb118e..90dbe9918 100644 --- a/pkg/epp/scheduling/framework/plugins/profile/slo_aware_profile_handler.go +++ b/pkg/epp/scheduling/framework/plugins/predictedlatencyprofilehandler/slo_aware_profile_handler.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package profile +package predictedlatencyprofilehandler import ( "context" diff --git a/pkg/epp/scheduling/framework/plugins/multi/slo_aware_router/config.go b/pkg/epp/scheduling/framework/plugins/predictedlatencyscorer/config.go similarity index 99% rename from pkg/epp/scheduling/framework/plugins/multi/slo_aware_router/config.go rename to pkg/epp/scheduling/framework/plugins/predictedlatencyscorer/config.go index bbfd77223..120fcf71e 100644 --- a/pkg/epp/scheduling/framework/plugins/multi/slo_aware_router/config.go +++ b/pkg/epp/scheduling/framework/plugins/predictedlatencyscorer/config.go @@ -15,7 +15,7 @@ limitations under the License. */ // Package requestcontrol contains helpers to decouple latency-predictor logic. -package slo_aware_router +package predictedlatencyscorer import ( "os" diff --git a/pkg/epp/scheduling/framework/plugins/multi/slo_aware_router/headers.go b/pkg/epp/scheduling/framework/plugins/predictedlatencyscorer/headers.go similarity index 98% rename from pkg/epp/scheduling/framework/plugins/multi/slo_aware_router/headers.go rename to pkg/epp/scheduling/framework/plugins/predictedlatencyscorer/headers.go index da0a86f20..68a4a5792 100644 --- a/pkg/epp/scheduling/framework/plugins/multi/slo_aware_router/headers.go +++ b/pkg/epp/scheduling/framework/plugins/predictedlatencyscorer/headers.go @@ -15,7 +15,7 @@ limitations under the License. */ // Package requestcontrol contains helpers to decouple latency-predictor logic. -package slo_aware_router +package predictedlatencyscorer import ( "strconv" diff --git a/pkg/epp/scheduling/framework/plugins/multi/slo_aware_router/helpers.go b/pkg/epp/scheduling/framework/plugins/predictedlatencyscorer/helpers.go similarity index 99% rename from pkg/epp/scheduling/framework/plugins/multi/slo_aware_router/helpers.go rename to pkg/epp/scheduling/framework/plugins/predictedlatencyscorer/helpers.go index c28ec6f47..66175a49e 100644 --- a/pkg/epp/scheduling/framework/plugins/multi/slo_aware_router/helpers.go +++ b/pkg/epp/scheduling/framework/plugins/predictedlatencyscorer/helpers.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package slo_aware_router +package predictedlatencyscorer import ( "context" diff --git a/pkg/epp/scheduling/framework/plugins/multi/slo_aware_router/latencypredictor_helper.go b/pkg/epp/scheduling/framework/plugins/predictedlatencyscorer/latencypredictor_helper.go similarity index 99% rename from pkg/epp/scheduling/framework/plugins/multi/slo_aware_router/latencypredictor_helper.go rename to pkg/epp/scheduling/framework/plugins/predictedlatencyscorer/latencypredictor_helper.go index 7482de93c..a7a2608b9 100644 --- a/pkg/epp/scheduling/framework/plugins/multi/slo_aware_router/latencypredictor_helper.go +++ b/pkg/epp/scheduling/framework/plugins/predictedlatencyscorer/latencypredictor_helper.go @@ -15,7 +15,7 @@ limitations under the License. */ // Package requestcontrol contains helpers to decouple latency-predictor logic. -package slo_aware_router +package predictedlatencyscorer import ( "context" diff --git a/pkg/epp/scheduling/framework/plugins/multi/slo_aware_router/latencypredictor_helper_test.go b/pkg/epp/scheduling/framework/plugins/predictedlatencyscorer/latencypredictor_helper_test.go similarity index 99% rename from pkg/epp/scheduling/framework/plugins/multi/slo_aware_router/latencypredictor_helper_test.go rename to pkg/epp/scheduling/framework/plugins/predictedlatencyscorer/latencypredictor_helper_test.go index 92227cba6..9c915c187 100644 --- a/pkg/epp/scheduling/framework/plugins/multi/slo_aware_router/latencypredictor_helper_test.go +++ b/pkg/epp/scheduling/framework/plugins/predictedlatencyscorer/latencypredictor_helper_test.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package slo_aware_router +package predictedlatencyscorer import ( "context" diff --git a/pkg/epp/scheduling/framework/plugins/multi/slo_aware_router/prediction.go b/pkg/epp/scheduling/framework/plugins/predictedlatencyscorer/prediction.go similarity index 99% rename from pkg/epp/scheduling/framework/plugins/multi/slo_aware_router/prediction.go rename to pkg/epp/scheduling/framework/plugins/predictedlatencyscorer/prediction.go index f31932538..f0670c729 100644 --- a/pkg/epp/scheduling/framework/plugins/multi/slo_aware_router/prediction.go +++ b/pkg/epp/scheduling/framework/plugins/predictedlatencyscorer/prediction.go @@ -15,7 +15,7 @@ limitations under the License. */ // Package requestcontrol contains helpers to decouple latency-predictor logic. -package slo_aware_router +package predictedlatencyscorer import ( "context" diff --git a/pkg/epp/scheduling/framework/plugins/multi/slo_aware_router/requestcontrol_hooks.go b/pkg/epp/scheduling/framework/plugins/predictedlatencyscorer/requestcontrol_hooks.go similarity index 99% rename from pkg/epp/scheduling/framework/plugins/multi/slo_aware_router/requestcontrol_hooks.go rename to pkg/epp/scheduling/framework/plugins/predictedlatencyscorer/requestcontrol_hooks.go index d91aac2cf..0b1a3a539 100644 --- a/pkg/epp/scheduling/framework/plugins/multi/slo_aware_router/requestcontrol_hooks.go +++ b/pkg/epp/scheduling/framework/plugins/predictedlatencyscorer/requestcontrol_hooks.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package slo_aware_router +package predictedlatencyscorer import ( "context" diff --git a/pkg/epp/scheduling/framework/plugins/multi/slo_aware_router/requestcontrol_hooks_test.go b/pkg/epp/scheduling/framework/plugins/predictedlatencyscorer/requestcontrol_hooks_test.go similarity index 99% rename from pkg/epp/scheduling/framework/plugins/multi/slo_aware_router/requestcontrol_hooks_test.go rename to pkg/epp/scheduling/framework/plugins/predictedlatencyscorer/requestcontrol_hooks_test.go index 5aaf1a2a2..61638dcf1 100644 --- a/pkg/epp/scheduling/framework/plugins/multi/slo_aware_router/requestcontrol_hooks_test.go +++ b/pkg/epp/scheduling/framework/plugins/predictedlatencyscorer/requestcontrol_hooks_test.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package slo_aware_router +package predictedlatencyscorer import ( "context" diff --git a/pkg/epp/scheduling/framework/plugins/multi/slo_aware_router/running_request_queue.go b/pkg/epp/scheduling/framework/plugins/predictedlatencyscorer/running_request_queue.go similarity index 99% rename from pkg/epp/scheduling/framework/plugins/multi/slo_aware_router/running_request_queue.go rename to pkg/epp/scheduling/framework/plugins/predictedlatencyscorer/running_request_queue.go index 37017fbdc..253fd48ae 100644 --- a/pkg/epp/scheduling/framework/plugins/multi/slo_aware_router/running_request_queue.go +++ b/pkg/epp/scheduling/framework/plugins/predictedlatencyscorer/running_request_queue.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package slo_aware_router +package predictedlatencyscorer import ( "container/heap" diff --git a/pkg/epp/scheduling/framework/plugins/multi/slo_aware_router/running_request_queue_test.go b/pkg/epp/scheduling/framework/plugins/predictedlatencyscorer/running_request_queue_test.go similarity index 99% rename from pkg/epp/scheduling/framework/plugins/multi/slo_aware_router/running_request_queue_test.go rename to pkg/epp/scheduling/framework/plugins/predictedlatencyscorer/running_request_queue_test.go index ef34c84b5..2f45ad36f 100644 --- a/pkg/epp/scheduling/framework/plugins/multi/slo_aware_router/running_request_queue_test.go +++ b/pkg/epp/scheduling/framework/plugins/predictedlatencyscorer/running_request_queue_test.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package slo_aware_router +package predictedlatencyscorer import ( "fmt" diff --git a/pkg/epp/scheduling/framework/plugins/multi/slo_aware_router/sampler.go b/pkg/epp/scheduling/framework/plugins/predictedlatencyscorer/sampler.go similarity index 98% rename from pkg/epp/scheduling/framework/plugins/multi/slo_aware_router/sampler.go rename to pkg/epp/scheduling/framework/plugins/predictedlatencyscorer/sampler.go index 13d2543a6..84ad78fa3 100644 --- a/pkg/epp/scheduling/framework/plugins/multi/slo_aware_router/sampler.go +++ b/pkg/epp/scheduling/framework/plugins/predictedlatencyscorer/sampler.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package slo_aware_router +package predictedlatencyscorer import ( "hash/fnv" diff --git a/pkg/epp/scheduling/framework/plugins/multi/slo_aware_router/scorer.go b/pkg/epp/scheduling/framework/plugins/predictedlatencyscorer/scorer.go similarity index 96% rename from pkg/epp/scheduling/framework/plugins/multi/slo_aware_router/scorer.go rename to pkg/epp/scheduling/framework/plugins/predictedlatencyscorer/scorer.go index c72ec5874..8b0541d4d 100644 --- a/pkg/epp/scheduling/framework/plugins/multi/slo_aware_router/scorer.go +++ b/pkg/epp/scheduling/framework/plugins/predictedlatencyscorer/scorer.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package slo_aware_router +package predictedlatencyscorer import ( "context" @@ -30,7 +30,7 @@ import ( ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/plugins" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework" - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/multi/prefix" + prefixcache "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/prefixcachescorer" schedulingtypes "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types" logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" latencypredictor "sigs.k8s.io/gateway-api-inference-extension/sidecars/latencypredictorasync" @@ -313,8 +313,8 @@ func (t *SLOAwareRouter) getOrMakeSLORequestContext(request *schedulingtypes.LLM func (s *SLOAwareRouter) getPrefixCacheScoreForPod(ctx context.Context, cycleState *schedulingtypes.CycleState, pod schedulingtypes.Pod) float64 { log.FromContext(ctx).V(logutil.DEBUG).Info("Running getPrefixCacheScoreForPod, getting prefix cache score for pod", "pod", pod.GetPod().String()) - plugintype := prefix.PrefixCachePluginType - pluginname := prefix.PrefixCachePluginType + plugintype := prefixcache.PrefixCachePluginType + pluginname := prefixcache.PrefixCachePluginType cycleStateKey := (plugins.TypedName{Type: plugintype, Name: pluginname}).String() stateData, err := cycleState.Read(plugins.StateKey(cycleStateKey)) @@ -326,7 +326,7 @@ func (s *SLOAwareRouter) getPrefixCacheScoreForPod(ctx context.Context, cycleSta return 0.0 } - prefixCacheState, ok := stateData.(*prefix.SchedulingContextState) + prefixCacheState, ok := stateData.(*prefixcache.SchedulingContextState) if !ok { // This should not happen if the plugin is configured correctly. log.FromContext(ctx).Error(fmt.Errorf("unexpected state type: %T", stateData), "failed to read prefix cache state") @@ -340,7 +340,7 @@ func (s *SLOAwareRouter) getPrefixCacheScoreForPod(ctx context.Context, cycleSta return 0.0 } - matchLen := prefixCacheState.PrefixCacheServers[prefix.ServerID(pod.GetPod().NamespacedName)] + matchLen := prefixCacheState.PrefixCacheServers[prefixcache.ServerID(pod.GetPod().NamespacedName)] log.FromContext(ctx).V(logutil.DEBUG).Info("Prefix cache score for pod", "pod", pod.GetPod().String(), "matchLen", matchLen, "totalPrefixes", total) return float64(matchLen) / float64(total) } diff --git a/pkg/epp/scheduling/framework/plugins/multi/slo_aware_router/scorer_helpers.go b/pkg/epp/scheduling/framework/plugins/predictedlatencyscorer/scorer_helpers.go similarity index 99% rename from pkg/epp/scheduling/framework/plugins/multi/slo_aware_router/scorer_helpers.go rename to pkg/epp/scheduling/framework/plugins/predictedlatencyscorer/scorer_helpers.go index f5c7f64d9..1e3bbdf35 100644 --- a/pkg/epp/scheduling/framework/plugins/multi/slo_aware_router/scorer_helpers.go +++ b/pkg/epp/scheduling/framework/plugins/predictedlatencyscorer/scorer_helpers.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package slo_aware_router +package predictedlatencyscorer import ( "context" diff --git a/pkg/epp/scheduling/framework/plugins/multi/slo_aware_router/scorer_test.go b/pkg/epp/scheduling/framework/plugins/predictedlatencyscorer/scorer_test.go similarity index 99% rename from pkg/epp/scheduling/framework/plugins/multi/slo_aware_router/scorer_test.go rename to pkg/epp/scheduling/framework/plugins/predictedlatencyscorer/scorer_test.go index 21aaa375d..5417ddabd 100644 --- a/pkg/epp/scheduling/framework/plugins/multi/slo_aware_router/scorer_test.go +++ b/pkg/epp/scheduling/framework/plugins/predictedlatencyscorer/scorer_test.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package slo_aware_router +package predictedlatencyscorer import ( "context" diff --git a/pkg/epp/scheduling/framework/plugins/multi/slo_aware_router/selection.go b/pkg/epp/scheduling/framework/plugins/predictedlatencyscorer/selection.go similarity index 99% rename from pkg/epp/scheduling/framework/plugins/multi/slo_aware_router/selection.go rename to pkg/epp/scheduling/framework/plugins/predictedlatencyscorer/selection.go index 0e1c9eca7..eddfcfa30 100644 --- a/pkg/epp/scheduling/framework/plugins/multi/slo_aware_router/selection.go +++ b/pkg/epp/scheduling/framework/plugins/predictedlatencyscorer/selection.go @@ -15,7 +15,7 @@ limitations under the License. */ // Package requestcontrol contains helpers to decouple latency-predictor logic. -package slo_aware_router +package predictedlatencyscorer import ( "context" diff --git a/pkg/epp/scheduling/framework/plugins/multi/slo_aware_router/selection_helpers.go b/pkg/epp/scheduling/framework/plugins/predictedlatencyscorer/selection_helpers.go similarity index 99% rename from pkg/epp/scheduling/framework/plugins/multi/slo_aware_router/selection_helpers.go rename to pkg/epp/scheduling/framework/plugins/predictedlatencyscorer/selection_helpers.go index 012cd70ad..bc0ae1008 100644 --- a/pkg/epp/scheduling/framework/plugins/multi/slo_aware_router/selection_helpers.go +++ b/pkg/epp/scheduling/framework/plugins/predictedlatencyscorer/selection_helpers.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package slo_aware_router +package predictedlatencyscorer import ( "context" diff --git a/pkg/epp/scheduling/framework/plugins/multi/slo_aware_router/types.go b/pkg/epp/scheduling/framework/plugins/predictedlatencyscorer/types.go similarity index 98% rename from pkg/epp/scheduling/framework/plugins/multi/slo_aware_router/types.go rename to pkg/epp/scheduling/framework/plugins/predictedlatencyscorer/types.go index eec531a30..2b4f535f4 100644 --- a/pkg/epp/scheduling/framework/plugins/multi/slo_aware_router/types.go +++ b/pkg/epp/scheduling/framework/plugins/predictedlatencyscorer/types.go @@ -15,7 +15,7 @@ limitations under the License. */ // Package requestcontrol contains helpers to decouple latency-predictor logic. -package slo_aware_router +package predictedlatencyscorer import schedulingtypes "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types" diff --git a/pkg/epp/scheduling/framework/plugins/multi/prefix/OWNERS b/pkg/epp/scheduling/framework/plugins/prefixcachescorer/OWNERS similarity index 100% rename from pkg/epp/scheduling/framework/plugins/multi/prefix/OWNERS rename to pkg/epp/scheduling/framework/plugins/prefixcachescorer/OWNERS diff --git a/pkg/epp/scheduling/framework/plugins/multi/prefix/indexer.go b/pkg/epp/scheduling/framework/plugins/prefixcachescorer/indexer.go similarity index 99% rename from pkg/epp/scheduling/framework/plugins/multi/prefix/indexer.go rename to pkg/epp/scheduling/framework/plugins/prefixcachescorer/indexer.go index 98de269bc..977a4e615 100644 --- a/pkg/epp/scheduling/framework/plugins/multi/prefix/indexer.go +++ b/pkg/epp/scheduling/framework/plugins/prefixcachescorer/indexer.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package prefix +package prefixcachescorer import ( "context" diff --git a/pkg/epp/scheduling/framework/plugins/multi/prefix/indexer_test.go b/pkg/epp/scheduling/framework/plugins/prefixcachescorer/indexer_test.go similarity index 99% rename from pkg/epp/scheduling/framework/plugins/multi/prefix/indexer_test.go rename to pkg/epp/scheduling/framework/plugins/prefixcachescorer/indexer_test.go index c512c1a06..b3c62d8b7 100644 --- a/pkg/epp/scheduling/framework/plugins/multi/prefix/indexer_test.go +++ b/pkg/epp/scheduling/framework/plugins/prefixcachescorer/indexer_test.go @@ -13,7 +13,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -package prefix +package prefixcachescorer import ( "context" diff --git a/pkg/epp/scheduling/framework/plugins/multi/prefix/plugin.go b/pkg/epp/scheduling/framework/plugins/prefixcachescorer/plugin.go similarity index 99% rename from pkg/epp/scheduling/framework/plugins/multi/prefix/plugin.go rename to pkg/epp/scheduling/framework/plugins/prefixcachescorer/plugin.go index 2a1a3a8b2..7beea32ee 100644 --- a/pkg/epp/scheduling/framework/plugins/multi/prefix/plugin.go +++ b/pkg/epp/scheduling/framework/plugins/prefixcachescorer/plugin.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package prefix +package prefixcachescorer import ( "context" diff --git a/pkg/epp/scheduling/framework/plugins/multi/prefix/plugin_test.go b/pkg/epp/scheduling/framework/plugins/prefixcachescorer/plugin_test.go similarity index 99% rename from pkg/epp/scheduling/framework/plugins/multi/prefix/plugin_test.go rename to pkg/epp/scheduling/framework/plugins/prefixcachescorer/plugin_test.go index f0feeef68..b0fab6ab0 100644 --- a/pkg/epp/scheduling/framework/plugins/multi/prefix/plugin_test.go +++ b/pkg/epp/scheduling/framework/plugins/prefixcachescorer/plugin_test.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package prefix +package prefixcachescorer import ( "context" diff --git a/pkg/epp/scheduling/framework/plugins/scorer/queue.go b/pkg/epp/scheduling/framework/plugins/queuescorer/queue.go similarity index 99% rename from pkg/epp/scheduling/framework/plugins/scorer/queue.go rename to pkg/epp/scheduling/framework/plugins/queuescorer/queue.go index e2a07b0bd..00e555de4 100644 --- a/pkg/epp/scheduling/framework/plugins/scorer/queue.go +++ b/pkg/epp/scheduling/framework/plugins/queuescorer/queue.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package scorer +package queuescorer import ( "context" diff --git a/pkg/epp/scheduling/framework/plugins/scorer/queue_test.go b/pkg/epp/scheduling/framework/plugins/queuescorer/queue_test.go similarity index 99% rename from pkg/epp/scheduling/framework/plugins/scorer/queue_test.go rename to pkg/epp/scheduling/framework/plugins/queuescorer/queue_test.go index ce8193679..bd4cd5509 100644 --- a/pkg/epp/scheduling/framework/plugins/scorer/queue_test.go +++ b/pkg/epp/scheduling/framework/plugins/queuescorer/queue_test.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package scorer +package queuescorer import ( "context" diff --git a/pkg/epp/scheduling/framework/plugins/picker/random_picker.go b/pkg/epp/scheduling/framework/plugins/randompicker/random_picker.go similarity index 89% rename from pkg/epp/scheduling/framework/plugins/picker/random_picker.go rename to pkg/epp/scheduling/framework/plugins/randompicker/random_picker.go index 10ad68469..abd13311e 100644 --- a/pkg/epp/scheduling/framework/plugins/picker/random_picker.go +++ b/pkg/epp/scheduling/framework/plugins/randompicker/random_picker.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package picker +package randompicker import ( "context" @@ -25,6 +25,7 @@ import ( "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/plugins" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/pickershared" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types" logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" ) @@ -38,7 +39,7 @@ var _ framework.Picker = &RandomPicker{} // RandomPickerFactory defines the factory function for RandomPicker. func RandomPickerFactory(name string, rawParameters json.RawMessage, _ plugins.Handle) (plugins.Plugin, error) { - parameters := pickerParameters{MaxNumOfEndpoints: DefaultMaxNumOfEndpoints} + parameters := pickershared.Parameters{MaxNumOfEndpoints: pickershared.DefaultMaxNumOfEndpoints} if rawParameters != nil { if err := json.Unmarshal(rawParameters, ¶meters); err != nil { return nil, fmt.Errorf("failed to parse the parameters of the '%s' picker - %w", RandomPickerType, err) @@ -50,13 +51,9 @@ func RandomPickerFactory(name string, rawParameters json.RawMessage, _ plugins.H // NewRandomPicker initializes a new RandomPicker and returns its pointer. func NewRandomPicker(maxNumOfEndpoints int) *RandomPicker { - if maxNumOfEndpoints <= 0 { - maxNumOfEndpoints = DefaultMaxNumOfEndpoints // on invalid configuration value, fallback to default value - } - return &RandomPicker{ typedName: plugins.TypedName{Type: RandomPickerType, Name: RandomPickerType}, - maxNumOfEndpoints: maxNumOfEndpoints, + maxNumOfEndpoints: pickershared.NormalizeMaxNumOfEndpoints(maxNumOfEndpoints), } } @@ -83,7 +80,7 @@ func (p *RandomPicker) Pick(ctx context.Context, _ *types.CycleState, scoredPods "num-of-candidates", len(scoredPods), "scored-pods", scoredPods) // Shuffle in-place - shuffleScoredPods(scoredPods) + pickershared.ShuffleScoredPods(scoredPods) // if we have enough pods to return keep only the relevant subset if p.maxNumOfEndpoints < len(scoredPods) { diff --git a/pkg/epp/scheduling/framework/plugins/profile/single_profile_handler.go b/pkg/epp/scheduling/framework/plugins/singleprofilehandler/single_profile_handler.go similarity index 99% rename from pkg/epp/scheduling/framework/plugins/profile/single_profile_handler.go rename to pkg/epp/scheduling/framework/plugins/singleprofilehandler/single_profile_handler.go index 3efaa7bd3..be862cef4 100644 --- a/pkg/epp/scheduling/framework/plugins/profile/single_profile_handler.go +++ b/pkg/epp/scheduling/framework/plugins/singleprofilehandler/single_profile_handler.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package profile +package singleprofilehandler import ( "context" diff --git a/pkg/epp/scheduling/framework/plugins/picker/weighted_random_picker.go b/pkg/epp/scheduling/framework/plugins/weightedrandompicker/weighted_random_picker.go similarity index 90% rename from pkg/epp/scheduling/framework/plugins/picker/weighted_random_picker.go rename to pkg/epp/scheduling/framework/plugins/weightedrandompicker/weighted_random_picker.go index 540ede43c..455dd4333 100644 --- a/pkg/epp/scheduling/framework/plugins/picker/weighted_random_picker.go +++ b/pkg/epp/scheduling/framework/plugins/weightedrandompicker/weighted_random_picker.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package picker +package weightedrandompicker import ( "context" @@ -30,6 +30,8 @@ import ( "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/plugins" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/pickershared" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/randompicker" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types" logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" ) @@ -49,7 +51,7 @@ var _ framework.Picker = &WeightedRandomPicker{} // WeightedRandomPickerFactory defines the factory function for WeightedRandomPicker. func WeightedRandomPickerFactory(name string, rawParameters json.RawMessage, _ plugins.Handle) (plugins.Plugin, error) { - parameters := pickerParameters{MaxNumOfEndpoints: DefaultMaxNumOfEndpoints} + parameters := pickershared.Parameters{MaxNumOfEndpoints: pickershared.DefaultMaxNumOfEndpoints} if rawParameters != nil { if err := json.Unmarshal(rawParameters, ¶meters); err != nil { return nil, fmt.Errorf("failed to parse the parameters of the '%s' picker - %w", WeightedRandomPickerType, err) @@ -61,14 +63,12 @@ func WeightedRandomPickerFactory(name string, rawParameters json.RawMessage, _ p // NewWeightedRandomPicker initializes a new WeightedRandomPicker and returns its pointer. func NewWeightedRandomPicker(maxNumOfEndpoints int) *WeightedRandomPicker { - if maxNumOfEndpoints <= 0 { - maxNumOfEndpoints = DefaultMaxNumOfEndpoints // on invalid configuration value, fallback to default value - } + maxNumOfEndpoints = pickershared.NormalizeMaxNumOfEndpoints(maxNumOfEndpoints) return &WeightedRandomPicker{ typedName: plugins.TypedName{Type: WeightedRandomPickerType, Name: WeightedRandomPickerType}, maxNumOfEndpoints: maxNumOfEndpoints, - randomPicker: NewRandomPicker(maxNumOfEndpoints), + randomPicker: randompicker.NewRandomPicker(maxNumOfEndpoints), } } @@ -88,7 +88,7 @@ func NewWeightedRandomPicker(maxNumOfEndpoints int) *WeightedRandomPicker { type WeightedRandomPicker struct { typedName plugins.TypedName maxNumOfEndpoints int - randomPicker *RandomPicker // fallback for zero weights + randomPicker *randompicker.RandomPicker // fallback for zero weights } // WithName sets the name of the picker. diff --git a/pkg/epp/scheduling/framework/plugins/picker/picker_test.go b/pkg/epp/scheduling/framework/plugins/weightedrandompicker/weighted_random_picker_test.go similarity index 56% rename from pkg/epp/scheduling/framework/plugins/picker/picker_test.go rename to pkg/epp/scheduling/framework/plugins/weightedrandompicker/weighted_random_picker_test.go index 022328efd..c64a4047b 100644 --- a/pkg/epp/scheduling/framework/plugins/picker/picker_test.go +++ b/pkg/epp/scheduling/framework/plugins/weightedrandompicker/weighted_random_picker_test.go @@ -14,129 +14,19 @@ See the License for the specific language governing permissions and limitations under the License. */ -package picker +package weightedrandompicker import ( "context" "math" "testing" - "github.com/google/go-cmp/cmp" - "github.com/google/go-cmp/cmp/cmpopts" k8stypes "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend" - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types" ) -func TestPickMaxScorePicker(t *testing.T) { - pod1 := &types.PodMetrics{Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod1"}}} - pod2 := &types.PodMetrics{Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod2"}}} - pod3 := &types.PodMetrics{Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod3"}}} - - tests := []struct { - name string - picker framework.Picker - input []*types.ScoredPod - output []types.Pod - tieBreakCandidates int // tie break is random, specify how many candidate with max score - }{ - { - name: "Single max score", - picker: NewMaxScorePicker(1), - input: []*types.ScoredPod{ - {Pod: pod1, Score: 10}, - {Pod: pod2, Score: 25}, - {Pod: pod3, Score: 15}, - }, - output: []types.Pod{ - &types.ScoredPod{Pod: pod2, Score: 25}, - }, - }, - { - name: "Multiple max scores, all are equally scored", - picker: NewMaxScorePicker(2), - input: []*types.ScoredPod{ - {Pod: pod1, Score: 50}, - {Pod: pod2, Score: 50}, - {Pod: pod3, Score: 30}, - }, - output: []types.Pod{ - &types.ScoredPod{Pod: pod1, Score: 50}, - &types.ScoredPod{Pod: pod2, Score: 50}, - }, - tieBreakCandidates: 2, - }, - { - name: "Multiple results sorted by highest score, more pods than needed", - picker: NewMaxScorePicker(2), - input: []*types.ScoredPod{ - {Pod: pod1, Score: 20}, - {Pod: pod2, Score: 25}, - {Pod: pod3, Score: 30}, - }, - output: []types.Pod{ - &types.ScoredPod{Pod: pod3, Score: 30}, - &types.ScoredPod{Pod: pod2, Score: 25}, - }, - }, - { - name: "Multiple results sorted by highest score, less pods than needed", - picker: NewMaxScorePicker(4), // picker is required to return 4 pods at most, but we have only 3. - input: []*types.ScoredPod{ - {Pod: pod1, Score: 20}, - {Pod: pod2, Score: 25}, - {Pod: pod3, Score: 30}, - }, - output: []types.Pod{ - &types.ScoredPod{Pod: pod3, Score: 30}, - &types.ScoredPod{Pod: pod2, Score: 25}, - &types.ScoredPod{Pod: pod1, Score: 20}, - }, - }, - { - name: "Multiple results sorted by highest score, num of pods exactly needed", - picker: NewMaxScorePicker(3), // picker is required to return 3 pods at most, we have only 3. - input: []*types.ScoredPod{ - {Pod: pod1, Score: 30}, - {Pod: pod2, Score: 25}, - {Pod: pod3, Score: 30}, - }, - output: []types.Pod{ - &types.ScoredPod{Pod: pod1, Score: 30}, - &types.ScoredPod{Pod: pod3, Score: 30}, - &types.ScoredPod{Pod: pod2, Score: 25}, - }, - tieBreakCandidates: 2, - }, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - result := test.picker.Pick(context.Background(), types.NewCycleState(), test.input) - got := result.TargetPods - - if test.tieBreakCandidates > 0 { - testMaxScoredPods := test.output[:test.tieBreakCandidates] - gotMaxScoredPods := got[:test.tieBreakCandidates] - diff := cmp.Diff(testMaxScoredPods, gotMaxScoredPods, cmpopts.SortSlices(func(a, b types.Pod) bool { - return a.String() < b.String() // predictable order within the pods with equal scores - })) - if diff != "" { - t.Errorf("Unexpected output (-want +got): %v", diff) - } - test.output = test.output[test.tieBreakCandidates:] - got = got[test.tieBreakCandidates:] - } - - if diff := cmp.Diff(test.output, got); diff != "" { - t.Errorf("Unexpected output (-want +got): %v", diff) - } - }) - } -} - func TestPickWeightedRandomPicker(t *testing.T) { const ( testIterations = 10000 diff --git a/pkg/epp/scheduling/scheduler_test.go b/pkg/epp/scheduling/scheduler_test.go index c197096ba..ae7d2cb3f 100644 --- a/pkg/epp/scheduling/scheduler_test.go +++ b/pkg/epp/scheduling/scheduler_test.go @@ -27,19 +27,22 @@ import ( "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend" backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics" // Import config for thresholds "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework" - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/multi/prefix" - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/picker" - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/profile" - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/scorer" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/kvcacheutilizationscorer" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/loraaffinityscorer" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/maxscorepicker" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/pickershared" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/prefixcachescorer" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/queuescorer" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/singleprofilehandler" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types" ) // Tests the default scheduler configuration and expected behavior. func TestSchedule(t *testing.T) { - kvCacheUtilizationScorer := scorer.NewKVCacheUtilizationScorer() - queueingScorer := scorer.NewQueueScorer() - prefixCacheScorer := prefix.New(context.Background(), prefix.DefaultConfig) - loraAffinityScorer := scorer.NewLoraAffinityScorer() + kvCacheUtilizationScorer := kvcacheutilizationscorer.NewKVCacheUtilizationScorer() + queueingScorer := queuescorer.NewQueueScorer() + prefixCacheScorer := prefixcachescorer.New(context.Background(), prefixcachescorer.DefaultConfig) + loraAffinityScorer := loraaffinityscorer.NewLoraAffinityScorer() defaultProfile := framework.NewSchedulerProfile(). WithScorers(framework.NewWeightedScorer(kvCacheUtilizationScorer, 1), @@ -47,9 +50,9 @@ func TestSchedule(t *testing.T) { framework.NewWeightedScorer(prefixCacheScorer, 1), framework.NewWeightedScorer(loraAffinityScorer, 1), ). - WithPicker(picker.NewMaxScorePicker(picker.DefaultMaxNumOfEndpoints)) + WithPicker(maxscorepicker.NewMaxScorePicker(pickershared.DefaultMaxNumOfEndpoints)) - profileHandler := profile.NewSingleProfileHandler() + profileHandler := singleprofilehandler.NewSingleProfileHandler() schedulerConfig := NewSchedulerConfig(profileHandler, map[string]*framework.SchedulerProfile{"default": defaultProfile}) diff --git a/site-src/guides/epp-configuration/prefix-aware.md b/site-src/guides/epp-configuration/prefix-aware.md index 07913f2ec..efa65eb77 100644 --- a/site-src/guides/epp-configuration/prefix-aware.md +++ b/site-src/guides/epp-configuration/prefix-aware.md @@ -1,6 +1,6 @@ # Prefix Cache Aware Plugin Configuration -The [prefix cache plugin](https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/7617439188b410670ed0f1ff805a3b7f9918a75b/pkg/epp/scheduling/framework/plugins/multi/prefix/plugin.go#L63) +The [prefix cache plugin](https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/main/pkg/epp/scheduling/framework/plugins/prefixcachescorer/plugin.go#L63) takes advantage of the prefix caching (e.g., [vllm APC](https://docs.vllm.ai/en/latest/features/automatic_prefix_caching.html)) feature of model servers, and optimizes request scheduling by placing requests sharing the longest prefixes to the same server as much as possible, while balancing the server load by considering kv-cache diff --git a/test/integration/epp/hermetic_test.go b/test/integration/epp/hermetic_test.go index 59bf12cd4..820991c5e 100644 --- a/test/integration/epp/hermetic_test.go +++ b/test/integration/epp/hermetic_test.go @@ -69,10 +69,13 @@ import ( "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/saturationdetector" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework" - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/multi/prefix" - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/picker" - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/profile" - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/scorer" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/kvcacheutilizationscorer" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/loraaffinityscorer" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/maxscorepicker" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/pickershared" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/prefixcachescorer" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/queuescorer" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/singleprofilehandler" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/server" logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" requtil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/request" @@ -1215,10 +1218,10 @@ func BeforeSuite() func() { serverRunner.Datastore = datastore.NewDatastore(context.Background(), pmf, 0) - kvCacheUtilizationScorer := scorer.NewKVCacheUtilizationScorer() - queueingScorer := scorer.NewQueueScorer() - prefixCacheScorer := prefix.New(context.Background(), prefix.DefaultConfig) - loraAffinityScorer := scorer.NewLoraAffinityScorer() + kvCacheUtilizationScorer := kvcacheutilizationscorer.NewKVCacheUtilizationScorer() + queueingScorer := queuescorer.NewQueueScorer() + prefixCacheScorer := prefixcachescorer.New(context.Background(), prefixcachescorer.DefaultConfig) + loraAffinityScorer := loraaffinityscorer.NewLoraAffinityScorer() defaultProfile := framework.NewSchedulerProfile(). WithScorers(framework.NewWeightedScorer(kvCacheUtilizationScorer, 1), @@ -1226,9 +1229,9 @@ func BeforeSuite() func() { framework.NewWeightedScorer(prefixCacheScorer, 1), framework.NewWeightedScorer(loraAffinityScorer, 1), ). - WithPicker(picker.NewMaxScorePicker(picker.DefaultMaxNumOfEndpoints)) + WithPicker(maxscorepicker.NewMaxScorePicker(pickershared.DefaultMaxNumOfEndpoints)) - profileHandler := profile.NewSingleProfileHandler() + profileHandler := singleprofilehandler.NewSingleProfileHandler() schedulerConfig := scheduling.NewSchedulerConfig(profileHandler, map[string]*framework.SchedulerProfile{"default": defaultProfile}) scheduler := scheduling.NewSchedulerWithConfig(schedulerConfig)