@@ -299,6 +299,16 @@ var (
299299 []string {},
300300 )
301301
302+ // SchedulerAttemptsTotal counts total number of scheduling attempts, labeled by status.
303+ SchedulerAttemptsTotal = prometheus .NewCounterVec (
304+ prometheus.CounterOpts {
305+ Subsystem : InferenceExtension ,
306+ Name : "scheduler_attempts_total" ,
307+ Help : metricsutil .HelpMsgWithStability ("Total number of scheduling attempts." , compbasemetrics .ALPHA ),
308+ },
309+ []string {"status" }, // "success", "failure"
310+ )
311+
302312 PluginProcessingLatencies = prometheus .NewHistogramVec (
303313 prometheus.HistogramOpts {
304314 Subsystem : InferenceExtension ,
@@ -409,6 +419,7 @@ func Register(customCollectors ...prometheus.Collector) {
409419 metrics .Registry .MustRegister (inferencePoolAvgQueueSize )
410420 metrics .Registry .MustRegister (inferencePoolReadyPods )
411421 metrics .Registry .MustRegister (SchedulerE2ELatency )
422+ metrics .Registry .MustRegister (SchedulerAttemptsTotal )
412423 metrics .Registry .MustRegister (PluginProcessingLatencies )
413424 metrics .Registry .MustRegister (InferenceExtensionInfo )
414425 metrics .Registry .MustRegister (PrefixCacheSize )
@@ -453,6 +464,7 @@ func Reset() {
453464 inferencePoolAvgQueueSize .Reset ()
454465 inferencePoolReadyPods .Reset ()
455466 SchedulerE2ELatency .Reset ()
467+ SchedulerAttemptsTotal .Reset ()
456468 PluginProcessingLatencies .Reset ()
457469 InferenceExtensionInfo .Reset ()
458470 PrefixCacheSize .Reset ()
@@ -462,7 +474,7 @@ func Reset() {
462474 flowControlQueueSize .Reset ()
463475}
464476
465- // RecordRequstCounter records the number of requests.
477+ // RecordRequestCounter records the number of requests.
466478func RecordRequestCounter (modelName , targetModelName string ) {
467479 requestCounter .WithLabelValues (modelName , targetModelName ).Inc ()
468480}
@@ -684,6 +696,20 @@ func RecordSchedulerE2ELatency(duration time.Duration) {
684696 SchedulerE2ELatency .WithLabelValues ().Observe (duration .Seconds ())
685697}
686698
699+ // RecordSchedulerAttempt records a scheduling attempt with status.
700+ func RecordSchedulerAttempt (err error ) {
701+ if err != nil {
702+ SchedulerAttemptsTotal .WithLabelValues (SchedulerStatusFailure ).Inc ()
703+ } else {
704+ SchedulerAttemptsTotal .WithLabelValues (SchedulerStatusSuccess ).Inc ()
705+ }
706+ }
707+
708+ const (
709+ SchedulerStatusSuccess = "success"
710+ SchedulerStatusFailure = "failure"
711+ )
712+
687713// RecordPluginProcessingLatency records the processing latency for a plugin.
688714func RecordPluginProcessingLatency (extensionPoint , pluginType , pluginName string , duration time.Duration ) {
689715 PluginProcessingLatencies .WithLabelValues (extensionPoint , pluginType , pluginName ).Observe (duration .Seconds ())
0 commit comments