From cd683b706e2e6c2f7ccd0c60702db9f6c7a34115 Mon Sep 17 00:00:00 2001
From: Ahmet Oeztuerk <Ahmet.Oeztuerk@consol.de>
Date: Fri, 28 Nov 2025 16:52:52 +0100
Subject: [PATCH 1/3] - add idle time for the cpu utilization - log error
 explanations before returning default values in cpu utilization - WIP: wait
 during the cpu utilization check if the gathered points are not enough apart
 for the range. now the checks will return a valid value instead of zeros,
 causing problems rn

---
 docs/checks/commands/check_cpu_utilization.md | 23 +++++++------
 pkg/snclient/check_cpu_utilization.go         | 34 ++++++++++++++++---
 2 files changed, 42 insertions(+), 15 deletions(-)

diff --git a/docs/checks/commands/check_cpu_utilization.md b/docs/checks/commands/check_cpu_utilization.md
index 1a5b7051..45e2d059 100644
--- a/docs/checks/commands/check_cpu_utilization.md
+++ b/docs/checks/commands/check_cpu_utilization.md
@@ -20,8 +20,8 @@ Checks the cpu utilization metrics.
 
 ### Default Check
 
-    check_cpu_utilization
-    OK - user: 29% - system: 11% - iowait: 3% - steal: 0% - guest: 0% |'user'=28.83%;;;0;...
+	check_cpu_utilization
+OK - user: 2% - system: 1% - iowait: 0% - steal: 0% - guest: 0 - idle: 96% |'total'=3.4%;90;95;0; 'user'=2.11%;;;0;...
 
 ### Example using NRPE and Naemon
 
@@ -41,15 +41,15 @@ Naemon Config
 
 ## Argument Defaults
 
-| Argument      | Default Value                                                                                       |
-| ------------- | --------------------------------------------------------------------------------------------------- |
-| warning       | total > 90                                                                                          |
-| critical      | total > 95                                                                                          |
-| empty-state   | 0 (OK)                                                                                              |
-| empty-syntax  |                                                                                                     |
-| top-syntax    | \${status} - \${list}                                                                               |
-| ok-syntax     |                                                                                                     |
-| detail-syntax | user: \${user}% - system: \${system}% - iowait: \${iowait}% - steal: \${steal}% - guest: \${guest}% |
+| Argument      | Default Value                                                                                         |
+| ------------- | ----------------------------------------------------------------------------------------------------- |
+| warning       | total > 90                                                                                            |
+| critical      | total > 95                                                                                            |
+| empty-state   | 0 (OK)                                                                                                |
+| empty-syntax  |                                                                                                       |
+| top-syntax    | \${status} - \${list}                                                                                 |
+| ok-syntax     |                                                                                                       |
+| detail-syntax | user: \${user}% - system: \${system}% - iowait: \${iowait}% - steal: \${steal}% - guest: \${guest} - idle: %{idle}% |
 
 ## Check Specific Arguments
 
@@ -71,3 +71,4 @@ these can be used in filters and thresholds (along with the default attributes):
 | iowait    | IOWait cpu utilization in percent                    |
 | steal     | Steal cpu utilization in percent                     |
 | guest     | Guest cpu utilization in percent                     |
+| idle      | Idle cpu utilization in percent                      |
diff --git a/pkg/snclient/check_cpu_utilization.go b/pkg/snclient/check_cpu_utilization.go
index db277442..21a89d38 100644
--- a/pkg/snclient/check_cpu_utilization.go
+++ b/pkg/snclient/check_cpu_utilization.go
@@ -21,6 +21,7 @@ type CPUUtilizationResult struct {
 	iowait float64
 	steal  float64
 	guest  float64
+	idle   float64
 }
 
 type CheckCPUUtilization struct {
@@ -49,7 +50,7 @@ func (l *CheckCPUUtilization) Build() *CheckData {
 		defaultWarning:  "total > 90",
 		defaultCritical: "total > 95",
 		topSyntax:       "${status} - ${list}",
-		detailSyntax:    "user: ${user}% - system: ${system}% - iowait: ${iowait}% - steal: ${steal}% - guest: ${guest}%",
+		detailSyntax:    "user: ${user}% - system: ${system}% - iowait: ${iowait}% - steal: ${steal}% - guest: ${guest} - idle: %{idle}%",
 		attributes: []CheckAttribute{
 			{name: "total", description: "Sum of user,system,iowait,steal and guest in percent", unit: UPercent},
 			{name: "user", description: "User cpu utilization in percent", unit: UPercent},
@@ -57,10 +58,11 @@ func (l *CheckCPUUtilization) Build() *CheckData {
 			{name: "iowait", description: "IOWait cpu utilization in percent", unit: UPercent},
 			{name: "steal", description: "Steal cpu utilization in percent", unit: UPercent},
 			{name: "guest", description: "Guest cpu utilization in percent", unit: UPercent},
+			{name: "idle", description: "Idle cpu utilization in percent", unit: UPercent},
 		},
 		exampleDefault: `
-    check_cpu_utilization
-    OK - user: 29% - system: 11% - iowait: 3% - steal: 0% - guest: 0% |'user'=28.83%;;;0;...
+	check_cpu_utilization
+OK - user: 2% - system: 1% - iowait: 0% - steal: 0% - guest: 0 - idle: 96% |'total'=3.4%;90;95;0; 'user'=2.11%;;;0;...
 	`,
 		exampleArgs: `'warn=total > 90%' 'crit=total > 95%'`,
 	}
@@ -86,6 +88,7 @@ func (l *CheckCPUUtilization) Check(_ context.Context, snc *Agent, check *CheckD
 	return check.Finalize()
 }
 
+//nolint:funlen // The function is simple enough, the length comes from many fields to add.
 func (l *CheckCPUUtilization) addCPUUtilizationMetrics(check *CheckData, scanLookBack uint64) {
 	entry := map[string]string{
 		"total":  "0",
@@ -94,6 +97,7 @@ func (l *CheckCPUUtilization) addCPUUtilizationMetrics(check *CheckData, scanLoo
 		"iowait": "0",
 		"steal":  "0",
 		"guest":  "0",
+		"idle":   "0",
 	}
 	check.listData = append(check.listData, entry)
 
@@ -108,6 +112,7 @@ func (l *CheckCPUUtilization) addCPUUtilizationMetrics(check *CheckData, scanLoo
 	entry["iowait"] = fmt.Sprintf("%.f", cpuMetrics.iowait)
 	entry["steal"] = fmt.Sprintf("%.f", cpuMetrics.steal)
 	entry["guest"] = fmt.Sprintf("%.f", cpuMetrics.guest)
+	entry["idle"] = fmt.Sprintf("%.f", cpuMetrics.idle)
 
 	check.result.Metrics = append(check.result.Metrics,
 		&CheckMetric{
@@ -158,6 +163,14 @@ func (l *CheckCPUUtilization) addCPUUtilizationMetrics(check *CheckData, scanLoo
 			Critical: check.critThreshold,
 			Min:      &Zero,
 		},
+		&CheckMetric{
+			Name:     "idle",
+			Value:    utils.ToPrecision(cpuMetrics.idle, 2),
+			Unit:     "%",
+			Warning:  check.warnThreshold,
+			Critical: check.critThreshold,
+			Min:      &Zero,
+		},
 	)
 }
 
@@ -180,15 +193,27 @@ func (l *CheckCPUUtilization) getMetrics(scanLookBack uint64) (res *CPUUtilizati
 	cpuinfo1 := counter1.GetLast()
 	cpuinfo2 := counter2.GetAt(time.Now().Add(-time.Duration(scanLookBack64) * time.Second))
 	if cpuinfo1 == nil || cpuinfo2 == nil {
+		log.Errorf("Either the latest cpuinfo counter, or the cpuinfo counter from %d seconds ago seem to be null", scanLookBack)
+
 		return nil, false
 	}
 
 	if cpuinfo1.UnixMilli < cpuinfo2.UnixMilli {
+		log.Errorf("The last cpuinfo counters have a smaller timestamp: %d than the one that was found near %d seconds ago: %d", cpuinfo1.UnixMilli, scanLookBack, cpuinfo2.UnixMilli)
+
 		return nil, false
 	}
 	duration := float64(cpuinfo1.UnixMilli - cpuinfo2.UnixMilli)
+
 	if duration <= 0 {
-		return nil, false
+		// This case might happen if there is not enough recorded counters to make up the look back time
+		// We need to wait until that duration difference can be achieved
+		secondsToSleep := min(scanLookBack, 5)
+
+		log.Tracef("Waiting %d seconds and returning that value, as cpu utilization metrics for the last %d seconds is not available yet.", secondsToSleep, scanLookBack)
+		time.Sleep(time.Second * time.Duration(convert.Int32(secondsToSleep)))
+
+		return l.getMetrics(secondsToSleep - 1)
 	}
 	duration /= 1e3 // cpu times are measured in seconds
 
@@ -213,6 +238,7 @@ func (l *CheckCPUUtilization) getMetrics(scanLookBack uint64) (res *CPUUtilizati
 	res.iowait = (((info1.Iowait - info2.Iowait) / duration) * 100) / float64(numCPU)
 	res.steal = (((info1.Steal - info2.Steal) / duration) * 100) / float64(numCPU)
 	res.guest = (((info1.Guest - info2.Guest) / duration) * 100) / float64(numCPU)
+	res.idle = (((info1.Idle - info2.Idle) / duration) * 100) / float64(numCPU)
 	res.total = (res.user + res.system + res.iowait)
 
 	return res, true

From 08d693c69b0ac23e43f45fc79504fba318e67d2c Mon Sep 17 00:00:00 2001
From: Ahmet Oeztuerk <Ahmet.Oeztuerk@consol.de>
Date: Tue, 2 Dec 2025 15:09:13 +0100
Subject: [PATCH 2/3] feature: cpu idle time in check_cpu_utilization

Add some more features in the counter class, it stores more values like
oldest, timesSet, retention and interval. Implement getFirst() and
checkRetention() functions and use them in the check_cpu_utilization.

Cpu utilization check can now detect if the maximum counter retention
time is too small for the query, or if the current counter interval
between earliest and the latest value is smaller than the interval. It
will also report and complain in the log if the counter search interval
being relatively smaller than the query interval.
---
 pkg/counter/counter.go                | 106 ++++++++++++++++++++------
 pkg/counter/counter_test.go           |  94 +++++++++++++++++++++++
 pkg/snclient/check_cpu_utilization.go |  89 ++++++++++++++-------
 3 files changed, 238 insertions(+), 51 deletions(-)

diff --git a/pkg/counter/counter.go b/pkg/counter/counter.go
index 3c515112..f86da1bf 100644
--- a/pkg/counter/counter.go
+++ b/pkg/counter/counter.go
@@ -1,6 +1,7 @@
 package counter
 
 import (
+	"fmt"
 	"math"
 	"sync"
 	"time"
@@ -9,10 +10,14 @@ import (
 // Counter is the container for a single timeseries of performance values
 // it used a fixed size storage backend
 type Counter struct {
-	lock    sync.RWMutex // lock for concurrent access
-	data    []Value      // array of values
-	current int64        // position of last inserted value
-	size    int64        // number of values for this series
+	lock      sync.RWMutex  // lock for concurrent access
+	data      []Value       // array of values. size determined by the retention and interval
+	current   int64         // position of last inserted value
+	oldest    int64         // position of the earliest inserted value
+	size      int64         // number of values for this series
+	timesSet  int64         // number of times a value was set in this counter
+	retention time.Duration // the time span this counter can hold, interval * size
+	interval  time.Duration // the interval time that new values are designed to be added
 }
 
 // Value is a single entry of a Counter
@@ -27,27 +32,40 @@ func NewCounter(retentionTime, interval time.Duration) *Counter {
 	retentionMilli := retentionTime.Milliseconds()
 	intervalMilli := interval.Milliseconds()
 
-	// round retention time to a multiple of interval
-	retention := int64(math.Ceil(float64(retentionMilli)/float64(intervalMilli))) * intervalMilli
-	size := retention / intervalMilli
+	// round retentionMili time to a multiple of interval
+	retentionMili := int64(math.Ceil(float64(retentionMilli)/float64(intervalMilli))) * intervalMilli
+	size := retentionMili / intervalMilli
 
 	return &Counter{
-		lock:    sync.RWMutex{},
-		data:    make([]Value, size),
-		size:    size,
-		current: -1,
+		lock:      sync.RWMutex{},
+		data:      make([]Value, size),
+		size:      size,
+		current:   -1,
+		oldest:    -1,
+		retention: time.Duration(retentionMili) * time.Millisecond,
+		interval:  interval,
+		timesSet:  0,
 	}
 }
 
 // Set adds a new value with current timestamp
 func (c *Counter) Set(val any) {
 	c.lock.Lock()
+	// setting a value for the first time
+	if c.oldest == -1 {
+		c.oldest = 0
+	}
 	c.current++
 	if c.current == c.size {
 		c.current = 0
 	}
 	c.data[c.current].UnixMilli = time.Now().UTC().UnixMilli()
 	c.data[c.current].Value = val
+	c.timesSet++
+	// if we already filled the array, and started overwriting, the oldest index just got overwritten
+	if c.timesSet > c.size {
+		c.oldest = (c.current + 1) % c.size
+	}
 	c.lock.Unlock()
 }
 
@@ -66,7 +84,9 @@ func (c *Counter) AvgForDuration(duration time.Duration) float64 {
 	if idx == -1 {
 		return 0
 	}
-	for seen := int64(0); seen <= c.size; seen++ {
+
+	//nolint:intrange // tracking the seen elements is easier
+	for seen := int64(0); seen < c.size; seen++ {
 		if c.data[idx].UnixMilli > useAfter {
 			if val, ok := c.data[idx].Value.(float64); ok {
 				sum += val
@@ -145,35 +165,73 @@ func (c *Counter) getLast() *Value {
 	return &c.data[c.current]
 }
 
-// GetAt returns first value closest to given date
-func (c *Counter) GetAt(useAfter time.Time) *Value {
+// GetFirst returns first (earliest) value
+func (c *Counter) GetFirst() *Value {
+	c.lock.RLock()
+	defer c.lock.RUnlock()
+
+	return c.getFirst()
+}
+
+func (c *Counter) getFirst() *Value {
+	// the latest added item had index c.current
+	if c.oldest == -1 {
+		return nil
+	}
+
+	return &c.data[c.oldest]
+}
+
+// GetAt returns first value with >= timestamp than lowerBound
+func (c *Counter) GetAt(lowerBound time.Time) *Value {
 	c.lock.RLock()
 	defer c.lock.RUnlock()
 
-	return c.getAt(useAfter)
+	return c.getAt(lowerBound)
 }
 
-func (c *Counter) getAt(useAfter time.Time) *Value {
-	useAfterUnix := useAfter.UTC().UnixMilli()
+// Gets the first counter that has a >= timestamp than lowerBound
+func (c *Counter) getAt(lowerBound time.Time) *Value {
+	useAfterUnix := lowerBound.UTC().UnixMilli()
+
+	// the counter is not yet populated
 	idx := c.current
 	if idx == -1 {
 		return nil
 	}
 
-	var last *Value
-	for seen := int64(0); seen <= c.size; seen++ {
-		val := &c.data[idx]
-		if val.UnixMilli < useAfterUnix {
-			return last
+	var previouslyComparedValue *Value
+	for valuesSeen := int64(0); valuesSeen <= c.size; valuesSeen++ {
+		currentValue := &c.data[idx]
+		if currentValue.UnixMilli < useAfterUnix {
+			return previouslyComparedValue
 		}
-		last = val
+
+		previouslyComparedValue = currentValue
 		idx--
 		if idx < 0 {
 			idx = c.size - 1
 		}
 	}
 
-	return last
+	return previouslyComparedValue
+}
+
+// checks if the counter can fit the targetRetention. optionally extend the interval by count in the check
+func (c *Counter) CheckRetention(targetRetention time.Duration, intervalExtensionCount int64) error {
+	extendedRetentionRange := c.retention + time.Duration(intervalExtensionCount)*c.interval
+
+	if extendedRetentionRange < targetRetention {
+		if intervalExtensionCount == 0 {
+			return fmt.Errorf("counter retention range is %f seconds, less than the target retention range of %f seconds",
+				extendedRetentionRange.Seconds(), targetRetention.Seconds())
+		}
+
+		return fmt.Errorf("counter retention range is %f seconds, even when extended by %d intervals to be %f seconds, it is less than target retention range of %f seconds",
+			c.interval.Seconds(), intervalExtensionCount, extendedRetentionRange.Seconds(), targetRetention.Seconds())
+	}
+
+	return nil
 }
 
 // Float64 returns value as float64
diff --git a/pkg/counter/counter_test.go b/pkg/counter/counter_test.go
index 068a17a7..f53ccbee 100644
--- a/pkg/counter/counter_test.go
+++ b/pkg/counter/counter_test.go
@@ -5,6 +5,7 @@ import (
 	"time"
 
 	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
 )
 
 func TestCounter(t *testing.T) {
@@ -55,3 +56,96 @@ func TestCounter(t *testing.T) {
 	set.Delete("test", "key")
 	assert.Emptyf(t, set.counter, "set is empty now")
 }
+
+func TestCounter2(t *testing.T) {
+	set := NewCounterSet()
+
+	retention := time.Millisecond * 4500
+	interval := time.Second
+	set.Create("test", "key", retention, interval)
+
+	// empty counter
+	counter := set.Get("test", "key")
+	latest := counter.getLast()
+	oldest := counter.getFirst()
+	assert.Nil(t, latest, "calling latest on empty counter should return nil")
+	assert.Nil(t, oldest, "calling oldest on empty counter should return nil")
+
+	// check the retention for 4 seconds
+	retentionCheck1 := counter.CheckRetention(time.Second*4, 0)
+	require.NoError(t, retentionCheck1, "the counter should be able to hold 4 seconds")
+
+	// check the retention for 5 seconds
+	retentionCheck2 := counter.CheckRetention(time.Second*5, 0)
+	require.NoError(t, retentionCheck2, "the counter should be able to hold 5 seconds")
+
+	// check the retention for 6 seconds
+	retentionCheck3 := counter.CheckRetention(time.Second*6, 0)
+	require.Error(t, retentionCheck3, "the counter should not be able to hold 6 seconds")
+
+	// check the retention for 10 seconds
+	retentionCheck4 := counter.CheckRetention(time.Second*10, 0)
+	require.Error(t, retentionCheck4, "the counter should not be able to hold 10 seconds")
+
+	// check the retention for 1 minute with 10 extensions
+	retentionCheck5 := counter.CheckRetention(time.Minute, 10)
+	require.Error(t, retentionCheck5, "the counter should not be able to hold 1 minute with 10 interval extensions")
+
+	// check the retention for 1 minute with 100 extensions
+	retentionCheck6 := counter.CheckRetention(time.Minute, 100)
+	require.NoError(t, retentionCheck6, "the counter should be able to hold 1 minute with 10 interval extensions")
+
+	// 1 _ _ _ _
+	counter.Set(float64(1))
+	latest = counter.getLast()
+	oldest = counter.getFirst()
+	assert.InEpsilon(t, float64(1), latest.Value, 0.001, "latest element should be 1")
+	assert.InEpsilon(t, float64(1), oldest.Value, 0.001, "oldest element should be 1")
+
+	// 1 2 _ _ _
+	counter.Set(float64(2))
+	latest = counter.getLast()
+	oldest = counter.getFirst()
+	assert.InEpsilon(t, float64(2), latest.Value, 0.001, "latest element should be 2")
+	assert.InEpsilon(t, float64(1), oldest.Value, 0.001, "oldest element should be 1")
+
+	// 1 2 3 _ _
+	counter.Set(float64(3))
+	latest = counter.getLast()
+	oldest = counter.getFirst()
+	assert.InEpsilon(t, float64(3), latest.Value, 0.001, "latest element should be 3")
+	assert.InEpsilon(t, float64(1), oldest.Value, 0.001, "oldest element should be 1")
+
+	// 1 2 3 4 _
+	counter.Set(float64(4))
+	latest = counter.getLast()
+	oldest = counter.getFirst()
+	assert.InEpsilon(t, float64(4), latest.Value, 0.001, "latest element should be 4")
+	assert.InEpsilon(t, float64(1), oldest.Value, 0.001, "oldest element should be 1")
+
+	// 1 2 3 4 5
+	counter.Set(float64(5))
+	latest = counter.getLast()
+	oldest = counter.getFirst()
+	assert.InEpsilon(t, float64(5), latest.Value, 0.001, "latest element should be 5")
+	assert.InEpsilon(t, float64(1), oldest.Value, 0.001, "oldest element should be 1")
+
+	// check the average now
+	avg := counter.AvgForDuration(time.Minute)
+	assert.InEpsilon(t, 3, avg, 0.001, "average of 1,2,3,4,5 is 3")
+
+	// started overwriting from the first index, the c.oldest should update
+	// 6 2 3 4 5
+	counter.Set(float64(6))
+	latest = counter.getLast()
+	oldest = counter.getFirst()
+	assert.InEpsilon(t, float64(6), latest.Value, 0.001, "latest element should be 6")
+	assert.InEpsilon(t, float64(2), oldest.Value, 0.001, "oldest element should be 2")
+
+	// 6 7 3 4 5
+	counter.Set(float64(7))
+	latest = counter.getLast()
+	oldest = counter.getFirst()
+	assert.InEpsilon(t, float64(7), latest.Value, 0.001, "latest element should be 7")
+	assert.InEpsilon(t, float64(3), oldest.Value, 0.001, "oldest element should be 3")
+}
diff --git a/pkg/snclient/check_cpu_utilization.go b/pkg/snclient/check_cpu_utilization.go
index 21a89d38..85337698 100644
--- a/pkg/snclient/check_cpu_utilization.go
+++ b/pkg/snclient/check_cpu_utilization.go
@@ -174,54 +174,88 @@ func (l *CheckCPUUtilization) addCPUUtilizationMetrics(check *CheckData, scanLoo
 	)
 }
 
+//nolint:funlen // moving these statements to new helper functions would be illogical
 func (l *CheckCPUUtilization) getMetrics(scanLookBack uint64) (res *CPUUtilizationResult, ok bool) {
 	res = &CPUUtilizationResult{}
 
-	counter1 := l.snc.Counter.Get("cpuinfo", "info")
-	counter2 := l.snc.Counter.Get("cpuinfo", "info")
-	if counter1 == nil || counter2 == nil {
+	counter := l.snc.Counter.Get("cpuinfo", "info")
+	if counter == nil {
 		return nil, false
 	}
 
 	scanLookBack64, err := convert.Int64E(scanLookBack)
 	if err != nil {
-		log.Warnf("failed to convert scan look back: %s", err.Error())
+		log.Warnf("failed to convert scan look back to int64: %s", err.Error())
 
 		return nil, false
 	}
 
-	cpuinfo1 := counter1.GetLast()
-	cpuinfo2 := counter2.GetAt(time.Now().Add(-time.Duration(scanLookBack64) * time.Second))
-	if cpuinfo1 == nil || cpuinfo2 == nil {
-		log.Errorf("Either the latest cpuinfo counter, or the cpuinfo counter from %d seconds ago seem to be null", scanLookBack)
+	if err = counter.CheckRetention(time.Second*time.Duration(scanLookBack64), 0); err != nil {
+		log.Tracef("cpuinfo counter cant hold the query range: %s", err.Error())
+	}
+
+	if err = counter.CheckRetention(time.Second*time.Duration(scanLookBack64), 100); err != nil {
+		log.Warnf("cpuinfo counter cant hold the query range even when extended: %s", err.Error())
+
+		return nil, false
+	}
+
+	cpuinfoLatest := counter.GetLast()
+	cpuinfoOldest := counter.GetFirst()
+	if cpuinfoLatest == nil {
+		log.Warnf("latest cpuinfo value seems to be null. counter might not be populated yet.")
+
+		return nil, false
+	}
+	if cpuinfoOldest == nil {
+		log.Warnf("oldest cpuinfo value seems to be null. counter might not be populated yet.")
 
 		return nil, false
 	}
 
-	if cpuinfo1.UnixMilli < cpuinfo2.UnixMilli {
-		log.Errorf("The last cpuinfo counters have a smaller timestamp: %d than the one that was found near %d seconds ago: %d", cpuinfo1.UnixMilli, scanLookBack, cpuinfo2.UnixMilli)
+	cpuinfoCounterDuration := cpuinfoLatest.UnixMilli - cpuinfoOldest.UnixMilli
+	if cpuinfoCounterDuration < scanLookBack64*1000 {
+		log.Tracef("cpuinfo counter has %d ms between its latest and oldest value, cannot properly provide %d s range of query", cpuinfoCounterDuration, scanLookBack)
+
+		// Optionally we can wait on this thread while other threads fill the counter up.
+	}
+
+	cpuinfoLookBackAgo := counter.GetAt(time.Now().Add(-time.Duration(scanLookBack64) * time.Second))
+	if cpuinfoLookBackAgo == nil {
+		log.Warnf("cpuinfo value search with lower bound of now-%d seconds returned null", scanLookBack)
 
 		return nil, false
 	}
-	duration := float64(cpuinfo1.UnixMilli - cpuinfo2.UnixMilli)
 
-	if duration <= 0 {
-		// This case might happen if there is not enough recorded counters to make up the look back time
-		// We need to wait until that duration difference can be achieved
-		secondsToSleep := min(scanLookBack, 5)
+	duration := float64(cpuinfoLatest.UnixMilli - cpuinfoLookBackAgo.UnixMilli)
+	acceptableDurationMultipler := 0.5
+	minimumAcceptableDuration := float64(scanLookBack) * 1000 * acceptableDurationMultipler
+
+	switch {
+	case duration <= 0:
+		// This case might happen if there is only one counter value so far
+		log.Tracef("counter query from now-%d seconds <-> latest returned a range of %f ms. This is not positive, there might not be enough values recorded yet.", scanLookBack, duration)
 
-		log.Tracef("Waiting %d seconds and returning that value, as cpu utilization metrics for the last %d seconds is not available yet.", secondsToSleep, scanLookBack)
-		time.Sleep(time.Second * time.Duration(convert.Int32(secondsToSleep)))
+		return nil, false
+	case duration < minimumAcceptableDuration:
+		log.Tracef("counter query from now-%d seconds <-> latest returned a range of %f ms. This is not bellow the acceptable range, the data may be unrepresentative. "+
+			"acceptableDurationMultipler * scanLookBack seconds : %f * %f = %f ",
+			scanLookBack, duration, acceptableDurationMultipler, float64(scanLookBack), minimumAcceptableDuration)
 
-		return l.getMetrics(secondsToSleep - 1)
+		// Optionally we can return an empty result here
+	case duration <= float64(scanLookBack)*1000:
+		log.Tracef("counter query from now-%d seconds <-> latest returned a range of %f ms. This is in the acceptable range, the data should be representative. "+
+			"acceptableDurationMultipler * scanLookBack seconds : %f * %f = %f ",
+			scanLookBack, duration, acceptableDurationMultipler, float64(scanLookBack), minimumAcceptableDuration)
+	default:
+		log.Tracef("counter query from now-%d seconds <-> latest returned a range of %f ms. This is higher than the query range and something must have gone wrong.", scanLookBack, duration)
 	}
-	duration /= 1e3 // cpu times are measured in seconds
 
-	info1, ok := cpuinfo1.Value.(*cpuinfo.TimesStat)
+	info1, ok := cpuinfoLatest.Value.(*cpuinfo.TimesStat)
 	if !ok {
 		return nil, false
 	}
-	info2, ok := cpuinfo2.Value.(*cpuinfo.TimesStat)
+	info2, ok := cpuinfoLookBackAgo.Value.(*cpuinfo.TimesStat)
 	if !ok {
 		return nil, false
 	}
@@ -233,12 +267,13 @@ func (l *CheckCPUUtilization) getMetrics(scanLookBack uint64) (res *CPUUtilizati
 		return nil, false
 	}
 
-	res.user = (((info1.User - info2.User) / duration) * 100) / float64(numCPU)
-	res.system = (((info1.System - info2.System) / duration) * 100) / float64(numCPU)
-	res.iowait = (((info1.Iowait - info2.Iowait) / duration) * 100) / float64(numCPU)
-	res.steal = (((info1.Steal - info2.Steal) / duration) * 100) / float64(numCPU)
-	res.guest = (((info1.Guest - info2.Guest) / duration) * 100) / float64(numCPU)
-	res.idle = (((info1.Idle - info2.Idle) / duration) * 100) / float64(numCPU)
+	durationInS := duration / 1e3 // cpu times are measured in seconds
+	res.user = (((info1.User - info2.User) / durationInS * 100) / float64(numCPU))
+	res.system = (((info1.System - info2.System) / durationInS * 100) / float64(numCPU))
+	res.iowait = (((info1.Iowait - info2.Iowait) / durationInS * 100) / float64(numCPU))
+	res.steal = (((info1.Steal - info2.Steal) / durationInS * 100) / float64(numCPU))
+	res.guest = (((info1.Guest - info2.Guest) / durationInS * 100) / float64(numCPU))
+	res.idle = (((info1.Idle - info2.Idle) / durationInS * 100) / float64(numCPU))
 	res.total = (res.user + res.system + res.iowait)
 
 	return res, true

From bbd3a747163cfc7dc54c331da00f964765dcdce3 Mon Sep 17 00:00:00 2001
From: Ahmet Oeztuerk <Ahmet.Oeztuerk@consol.de>
Date: Thu, 4 Dec 2025 15:01:21 +0100
Subject: [PATCH 3/3] address comments

fix typos and use range loops instead of loop variable
---
 pkg/counter/counter.go                | 17 ++++++++---------
 pkg/snclient/check_cpu_utilization.go |  4 ++--
 2 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/pkg/counter/counter.go b/pkg/counter/counter.go
index f86da1bf..4eeaf5af 100644
--- a/pkg/counter/counter.go
+++ b/pkg/counter/counter.go
@@ -29,12 +29,12 @@ type Value struct {
 // NewCounter creates a new Counter with given retention time and interval
 func NewCounter(retentionTime, interval time.Duration) *Counter {
 	// round retention and interval to milliseconds
-	retentionMilli := retentionTime.Milliseconds()
-	intervalMilli := interval.Milliseconds()
+	retentionMili := retentionTime.Milliseconds()
+	intervalMili := interval.Milliseconds()
 
-	// round retentionMili time to a multiple of interval
-	retentionMili := int64(math.Ceil(float64(retentionMilli)/float64(intervalMilli))) * intervalMilli
-	size := retentionMili / intervalMilli
+	// round retentionMili to a multiple of interval
+	retentionMiliRounded := int64(math.Ceil(float64(retentionMili)/float64(intervalMili))) * intervalMili
+	size := retentionMiliRounded / intervalMili
 
 	return &Counter{
 		lock:      sync.RWMutex{},
@@ -42,7 +42,7 @@ func NewCounter(retentionTime, interval time.Duration) *Counter {
 		size:      size,
 		current:   -1,
 		oldest:    -1,
-		retention: time.Duration(retentionMili) * time.Millisecond,
+		retention: time.Duration(retentionMiliRounded) * time.Millisecond,
 		interval:  interval,
 		timesSet:  0,
 	}
@@ -85,8 +85,7 @@ func (c *Counter) AvgForDuration(duration time.Duration) float64 {
 		return 0
 	}
 
-	//nolint:intrange // tracking the seen elements is easier
-	for seen := int64(0); seen < c.size; seen++ {
+	for range c.size {
 		if c.data[idx].UnixMilli > useAfter {
 			if val, ok := c.data[idx].Value.(float64); ok {
 				sum += val
@@ -201,7 +200,7 @@ func (c *Counter) getAt(lowerBound time.Time) *Value {
 	}
 
 	var previouslyComparedValue *Value
-	for valuesSeen := int64(0); valuesSeen <= c.size; valuesSeen++ {
+	for range c.size {
 		currentValue := &c.data[idx]
 		if currentValue.UnixMilli < useAfterUnix {
 			return previouslyComparedValue
diff --git a/pkg/snclient/check_cpu_utilization.go b/pkg/snclient/check_cpu_utilization.go
index 85337698..f1c20ec1 100644
--- a/pkg/snclient/check_cpu_utilization.go
+++ b/pkg/snclient/check_cpu_utilization.go
@@ -191,11 +191,11 @@ func (l *CheckCPUUtilization) getMetrics(scanLookBack uint64) (res *CPUUtilizati
 	}
 
 	if err = counter.CheckRetention(time.Second*time.Duration(scanLookBack64), 0); err != nil {
-		log.Tracef("cpuinfo counter cant hold the query range: %s", err.Error())
+		log.Tracef("cpuinfo counter can not hold the query range: %s", err.Error())
 	}
 
 	if err = counter.CheckRetention(time.Second*time.Duration(scanLookBack64), 100); err != nil {
-		log.Warnf("cpuinfo counter cant hold the query range even when extended: %s", err.Error())
+		log.Warnf("cpuinfo counter can not hold the query range even when extended: %s", err.Error())
 
 		return nil, false
 	}