From cd683b706e2e6c2f7ccd0c60702db9f6c7a34115 Mon Sep 17 00:00:00 2001 From: Ahmet Oeztuerk Date: Fri, 28 Nov 2025 16:52:52 +0100 Subject: [PATCH 1/3] - add idle time for the cpu utilization - log error explanations before returning default values in cpu utilization - WIP: wait during the cpu utilization check if the gathered points are not enough apart for the range. now the checks will return a valid value instead of zeros, causing problems rn --- docs/checks/commands/check_cpu_utilization.md | 23 +++++++------ pkg/snclient/check_cpu_utilization.go | 34 ++++++++++++++++--- 2 files changed, 42 insertions(+), 15 deletions(-) diff --git a/docs/checks/commands/check_cpu_utilization.md b/docs/checks/commands/check_cpu_utilization.md index 1a5b7051..45e2d059 100644 --- a/docs/checks/commands/check_cpu_utilization.md +++ b/docs/checks/commands/check_cpu_utilization.md @@ -20,8 +20,8 @@ Checks the cpu utilization metrics. ### Default Check - check_cpu_utilization - OK - user: 29% - system: 11% - iowait: 3% - steal: 0% - guest: 0% |'user'=28.83%;;;0;... + check_cpu_utilization +OK - user: 2% - system: 1% - iowait: 0% - steal: 0% - guest: 0 - idle: 96% |'total'=3.4%;90;95;0; 'user'=2.11%;;;0;... ### Example using NRPE and Naemon @@ -41,15 +41,15 @@ Naemon Config ## Argument Defaults -| Argument | Default Value | -| ------------- | --------------------------------------------------------------------------------------------------- | -| warning | total > 90 | -| critical | total > 95 | -| empty-state | 0 (OK) | -| empty-syntax | | -| top-syntax | \${status} - \${list} | -| ok-syntax | | -| detail-syntax | user: \${user}% - system: \${system}% - iowait: \${iowait}% - steal: \${steal}% - guest: \${guest}% | +| Argument | Default Value | +| ------------- | ----------------------------------------------------------------------------------------------------- | +| warning | total > 90 | +| critical | total > 95 | +| empty-state | 0 (OK) | +| empty-syntax | | +| top-syntax | \${status} - \${list} | +| ok-syntax | | +| detail-syntax | user: \${user}% - system: \${system}% - iowait: \${iowait}% - steal: \${steal}% - guest: \${guest} - idle: %{idle}% | ## Check Specific Arguments @@ -71,3 +71,4 @@ these can be used in filters and thresholds (along with the default attributes): | iowait | IOWait cpu utilization in percent | | steal | Steal cpu utilization in percent | | guest | Guest cpu utilization in percent | +| idle | Idle cpu utilization in percent | diff --git a/pkg/snclient/check_cpu_utilization.go b/pkg/snclient/check_cpu_utilization.go index db277442..21a89d38 100644 --- a/pkg/snclient/check_cpu_utilization.go +++ b/pkg/snclient/check_cpu_utilization.go @@ -21,6 +21,7 @@ type CPUUtilizationResult struct { iowait float64 steal float64 guest float64 + idle float64 } type CheckCPUUtilization struct { @@ -49,7 +50,7 @@ func (l *CheckCPUUtilization) Build() *CheckData { defaultWarning: "total > 90", defaultCritical: "total > 95", topSyntax: "${status} - ${list}", - detailSyntax: "user: ${user}% - system: ${system}% - iowait: ${iowait}% - steal: ${steal}% - guest: ${guest}%", + detailSyntax: "user: ${user}% - system: ${system}% - iowait: ${iowait}% - steal: ${steal}% - guest: ${guest} - idle: %{idle}%", attributes: []CheckAttribute{ {name: "total", description: "Sum of user,system,iowait,steal and guest in percent", unit: UPercent}, {name: "user", description: "User cpu utilization in percent", unit: UPercent}, @@ -57,10 +58,11 @@ func (l *CheckCPUUtilization) Build() *CheckData { {name: "iowait", description: "IOWait cpu utilization in percent", unit: UPercent}, {name: "steal", description: "Steal cpu utilization in percent", unit: UPercent}, {name: "guest", description: "Guest cpu utilization in percent", unit: UPercent}, + {name: "idle", description: "Idle cpu utilization in percent", unit: UPercent}, }, exampleDefault: ` - check_cpu_utilization - OK - user: 29% - system: 11% - iowait: 3% - steal: 0% - guest: 0% |'user'=28.83%;;;0;... + check_cpu_utilization +OK - user: 2% - system: 1% - iowait: 0% - steal: 0% - guest: 0 - idle: 96% |'total'=3.4%;90;95;0; 'user'=2.11%;;;0;... `, exampleArgs: `'warn=total > 90%' 'crit=total > 95%'`, } @@ -86,6 +88,7 @@ func (l *CheckCPUUtilization) Check(_ context.Context, snc *Agent, check *CheckD return check.Finalize() } +//nolint:funlen // The function is simple enough, the length comes from many fields to add. func (l *CheckCPUUtilization) addCPUUtilizationMetrics(check *CheckData, scanLookBack uint64) { entry := map[string]string{ "total": "0", @@ -94,6 +97,7 @@ func (l *CheckCPUUtilization) addCPUUtilizationMetrics(check *CheckData, scanLoo "iowait": "0", "steal": "0", "guest": "0", + "idle": "0", } check.listData = append(check.listData, entry) @@ -108,6 +112,7 @@ func (l *CheckCPUUtilization) addCPUUtilizationMetrics(check *CheckData, scanLoo entry["iowait"] = fmt.Sprintf("%.f", cpuMetrics.iowait) entry["steal"] = fmt.Sprintf("%.f", cpuMetrics.steal) entry["guest"] = fmt.Sprintf("%.f", cpuMetrics.guest) + entry["idle"] = fmt.Sprintf("%.f", cpuMetrics.idle) check.result.Metrics = append(check.result.Metrics, &CheckMetric{ @@ -158,6 +163,14 @@ func (l *CheckCPUUtilization) addCPUUtilizationMetrics(check *CheckData, scanLoo Critical: check.critThreshold, Min: &Zero, }, + &CheckMetric{ + Name: "idle", + Value: utils.ToPrecision(cpuMetrics.idle, 2), + Unit: "%", + Warning: check.warnThreshold, + Critical: check.critThreshold, + Min: &Zero, + }, ) } @@ -180,15 +193,27 @@ func (l *CheckCPUUtilization) getMetrics(scanLookBack uint64) (res *CPUUtilizati cpuinfo1 := counter1.GetLast() cpuinfo2 := counter2.GetAt(time.Now().Add(-time.Duration(scanLookBack64) * time.Second)) if cpuinfo1 == nil || cpuinfo2 == nil { + log.Errorf("Either the latest cpuinfo counter, or the cpuinfo counter from %d seconds ago seem to be null", scanLookBack) + return nil, false } if cpuinfo1.UnixMilli < cpuinfo2.UnixMilli { + log.Errorf("The last cpuinfo counters have a smaller timestamp: %d than the one that was found near %d seconds ago: %d", cpuinfo1.UnixMilli, scanLookBack, cpuinfo2.UnixMilli) + return nil, false } duration := float64(cpuinfo1.UnixMilli - cpuinfo2.UnixMilli) + if duration <= 0 { - return nil, false + // This case might happen if there is not enough recorded counters to make up the look back time + // We need to wait until that duration difference can be achieved + secondsToSleep := min(scanLookBack, 5) + + log.Tracef("Waiting %d seconds and returning that value, as cpu utilization metrics for the last %d seconds is not available yet.", secondsToSleep, scanLookBack) + time.Sleep(time.Second * time.Duration(convert.Int32(secondsToSleep))) + + return l.getMetrics(secondsToSleep - 1) } duration /= 1e3 // cpu times are measured in seconds @@ -213,6 +238,7 @@ func (l *CheckCPUUtilization) getMetrics(scanLookBack uint64) (res *CPUUtilizati res.iowait = (((info1.Iowait - info2.Iowait) / duration) * 100) / float64(numCPU) res.steal = (((info1.Steal - info2.Steal) / duration) * 100) / float64(numCPU) res.guest = (((info1.Guest - info2.Guest) / duration) * 100) / float64(numCPU) + res.idle = (((info1.Idle - info2.Idle) / duration) * 100) / float64(numCPU) res.total = (res.user + res.system + res.iowait) return res, true From 08d693c69b0ac23e43f45fc79504fba318e67d2c Mon Sep 17 00:00:00 2001 From: Ahmet Oeztuerk Date: Tue, 2 Dec 2025 15:09:13 +0100 Subject: [PATCH 2/3] feature: cpu idle time in check_cpu_utilization Add some more features in the counter class, it stores more values like oldest, timesSet, retention and interval. Implement getFirst() and checkRetention() functions and use them in the check_cpu_utilization. Cpu utilization check can now detect if the maximum counter retention time is too small for the query, or if the current counter interval between earliest and the latest value is smaller than the interval. It will also report and complain in the log if the counter search interval being relatively smaller than the query interval. --- pkg/counter/counter.go | 106 ++++++++++++++++++++------ pkg/counter/counter_test.go | 94 +++++++++++++++++++++++ pkg/snclient/check_cpu_utilization.go | 89 ++++++++++++++------- 3 files changed, 238 insertions(+), 51 deletions(-) diff --git a/pkg/counter/counter.go b/pkg/counter/counter.go index 3c515112..f86da1bf 100644 --- a/pkg/counter/counter.go +++ b/pkg/counter/counter.go @@ -1,6 +1,7 @@ package counter import ( + "fmt" "math" "sync" "time" @@ -9,10 +10,14 @@ import ( // Counter is the container for a single timeseries of performance values // it used a fixed size storage backend type Counter struct { - lock sync.RWMutex // lock for concurrent access - data []Value // array of values - current int64 // position of last inserted value - size int64 // number of values for this series + lock sync.RWMutex // lock for concurrent access + data []Value // array of values. size determined by the retention and interval + current int64 // position of last inserted value + oldest int64 // position of the earliest inserted value + size int64 // number of values for this series + timesSet int64 // number of times a value was set in this counter + retention time.Duration // the time span this counter can hold, interval * size + interval time.Duration // the interval time that new values are designed to be added } // Value is a single entry of a Counter @@ -27,27 +32,40 @@ func NewCounter(retentionTime, interval time.Duration) *Counter { retentionMilli := retentionTime.Milliseconds() intervalMilli := interval.Milliseconds() - // round retention time to a multiple of interval - retention := int64(math.Ceil(float64(retentionMilli)/float64(intervalMilli))) * intervalMilli - size := retention / intervalMilli + // round retentionMili time to a multiple of interval + retentionMili := int64(math.Ceil(float64(retentionMilli)/float64(intervalMilli))) * intervalMilli + size := retentionMili / intervalMilli return &Counter{ - lock: sync.RWMutex{}, - data: make([]Value, size), - size: size, - current: -1, + lock: sync.RWMutex{}, + data: make([]Value, size), + size: size, + current: -1, + oldest: -1, + retention: time.Duration(retentionMili) * time.Millisecond, + interval: interval, + timesSet: 0, } } // Set adds a new value with current timestamp func (c *Counter) Set(val any) { c.lock.Lock() + // setting a value for the first time + if c.oldest == -1 { + c.oldest = 0 + } c.current++ if c.current == c.size { c.current = 0 } c.data[c.current].UnixMilli = time.Now().UTC().UnixMilli() c.data[c.current].Value = val + c.timesSet++ + // if we already filled the array, and started overwriting, the oldest index just got overwritten + if c.timesSet > c.size { + c.oldest = (c.current + 1) % c.size + } c.lock.Unlock() } @@ -66,7 +84,9 @@ func (c *Counter) AvgForDuration(duration time.Duration) float64 { if idx == -1 { return 0 } - for seen := int64(0); seen <= c.size; seen++ { + + //nolint:intrange // tracking the seen elements is easier + for seen := int64(0); seen < c.size; seen++ { if c.data[idx].UnixMilli > useAfter { if val, ok := c.data[idx].Value.(float64); ok { sum += val @@ -145,35 +165,73 @@ func (c *Counter) getLast() *Value { return &c.data[c.current] } -// GetAt returns first value closest to given date -func (c *Counter) GetAt(useAfter time.Time) *Value { +// GetFirst returns first (earliest) value +func (c *Counter) GetFirst() *Value { + c.lock.RLock() + defer c.lock.RUnlock() + + return c.getFirst() +} + +func (c *Counter) getFirst() *Value { + // the latest added item had index c.current + if c.oldest == -1 { + return nil + } + + return &c.data[c.oldest] +} + +// GetAt returns first value with >= timestamp than lowerBound +func (c *Counter) GetAt(lowerBound time.Time) *Value { c.lock.RLock() defer c.lock.RUnlock() - return c.getAt(useAfter) + return c.getAt(lowerBound) } -func (c *Counter) getAt(useAfter time.Time) *Value { - useAfterUnix := useAfter.UTC().UnixMilli() +// Gets the first counter that has a >= timestamp than lowerBound +func (c *Counter) getAt(lowerBound time.Time) *Value { + useAfterUnix := lowerBound.UTC().UnixMilli() + + // the counter is not yet populated idx := c.current if idx == -1 { return nil } - var last *Value - for seen := int64(0); seen <= c.size; seen++ { - val := &c.data[idx] - if val.UnixMilli < useAfterUnix { - return last + var previouslyComparedValue *Value + for valuesSeen := int64(0); valuesSeen <= c.size; valuesSeen++ { + currentValue := &c.data[idx] + if currentValue.UnixMilli < useAfterUnix { + return previouslyComparedValue } - last = val + + previouslyComparedValue = currentValue idx-- if idx < 0 { idx = c.size - 1 } } - return last + return previouslyComparedValue +} + +// checks if the counter can fit the targetRetention. optionally extend the interval by count in the check +func (c *Counter) CheckRetention(targetRetention time.Duration, intervalExtensionCount int64) error { + extendedRetentionRange := c.retention + time.Duration(intervalExtensionCount)*c.interval + + if extendedRetentionRange < targetRetention { + if intervalExtensionCount == 0 { + return fmt.Errorf("counter retention range is %f seconds, less than the target retention range of %f seconds", + extendedRetentionRange.Seconds(), targetRetention.Seconds()) + } + + return fmt.Errorf("counter retention range is %f seconds, even when extended by %d intervals to be %f seconds, it is less than target retention range of %f seconds", + c.interval.Seconds(), intervalExtensionCount, extendedRetentionRange.Seconds(), targetRetention.Seconds()) + } + + return nil } // Float64 returns value as float64 diff --git a/pkg/counter/counter_test.go b/pkg/counter/counter_test.go index 068a17a7..f53ccbee 100644 --- a/pkg/counter/counter_test.go +++ b/pkg/counter/counter_test.go @@ -5,6 +5,7 @@ import ( "time" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestCounter(t *testing.T) { @@ -55,3 +56,96 @@ func TestCounter(t *testing.T) { set.Delete("test", "key") assert.Emptyf(t, set.counter, "set is empty now") } + +func TestCounter2(t *testing.T) { + set := NewCounterSet() + + retention := time.Millisecond * 4500 + interval := time.Second + set.Create("test", "key", retention, interval) + + // empty counter + counter := set.Get("test", "key") + latest := counter.getLast() + oldest := counter.getFirst() + assert.Nil(t, latest, "calling latest on empty counter should return nil") + assert.Nil(t, oldest, "calling oldest on empty counter should return nil") + + // check the retention for 4 seconds + retentionCheck1 := counter.CheckRetention(time.Second*4, 0) + require.NoError(t, retentionCheck1, "the counter should be able to hold 4 seconds") + + // check the retention for 5 seconds + retentionCheck2 := counter.CheckRetention(time.Second*5, 0) + require.NoError(t, retentionCheck2, "the counter should be able to hold 5 seconds") + + // check the retention for 6 seconds + retentionCheck3 := counter.CheckRetention(time.Second*6, 0) + require.Error(t, retentionCheck3, "the counter should not be able to hold 6 seconds") + + // check the retention for 10 seconds + retentionCheck4 := counter.CheckRetention(time.Second*10, 0) + require.Error(t, retentionCheck4, "the counter should not be able to hold 10 seconds") + + // check the retention for 1 minute with 10 extensions + retentionCheck5 := counter.CheckRetention(time.Minute, 10) + require.Error(t, retentionCheck5, "the counter should not be able to hold 1 minute with 10 interval extensions") + + // check the retention for 1 minute with 100 extensions + retentionCheck6 := counter.CheckRetention(time.Minute, 100) + require.NoError(t, retentionCheck6, "the counter should be able to hold 1 minute with 10 interval extensions") + + // 1 _ _ _ _ + counter.Set(float64(1)) + latest = counter.getLast() + oldest = counter.getFirst() + assert.InEpsilon(t, float64(1), latest.Value, 0.001, "latest element should be 1") + assert.InEpsilon(t, float64(1), oldest.Value, 0.001, "oldest element should be 1") + + // 1 2 _ _ _ + counter.Set(float64(2)) + latest = counter.getLast() + oldest = counter.getFirst() + assert.InEpsilon(t, float64(2), latest.Value, 0.001, "latest element should be 2") + assert.InEpsilon(t, float64(1), oldest.Value, 0.001, "oldest element should be 1") + + // 1 2 3 _ _ + counter.Set(float64(3)) + latest = counter.getLast() + oldest = counter.getFirst() + assert.InEpsilon(t, float64(3), latest.Value, 0.001, "latest element should be 3") + assert.InEpsilon(t, float64(1), oldest.Value, 0.001, "oldest element should be 1") + + // 1 2 3 4 _ + counter.Set(float64(4)) + latest = counter.getLast() + oldest = counter.getFirst() + assert.InEpsilon(t, float64(4), latest.Value, 0.001, "latest element should be 4") + assert.InEpsilon(t, float64(1), oldest.Value, 0.001, "oldest element should be 1") + + // 1 2 3 4 5 + counter.Set(float64(5)) + latest = counter.getLast() + oldest = counter.getFirst() + assert.InEpsilon(t, float64(5), latest.Value, 0.001, "latest element should be 5") + assert.InEpsilon(t, float64(1), oldest.Value, 0.001, "oldest element should be 1") + + // check the average now + avg := counter.AvgForDuration(time.Minute) + assert.InEpsilon(t, 3, avg, 0.001, "average of 1,2,3,4,5 is 3") + + // started overwriting from the first index, the c.oldest should update + // 6 2 3 4 5 + counter.Set(float64(6)) + latest = counter.getLast() + oldest = counter.getFirst() + assert.InEpsilon(t, float64(6), latest.Value, 0.001, "latest element should be 6") + assert.InEpsilon(t, float64(2), oldest.Value, 0.001, "oldest element should be 2") + + // 6 7 3 4 5 + counter.Set(float64(7)) + latest = counter.getLast() + oldest = counter.getFirst() + assert.InEpsilon(t, float64(7), latest.Value, 0.001, "latest element should be 7") + assert.InEpsilon(t, float64(3), oldest.Value, 0.001, "oldest element should be 3") +} diff --git a/pkg/snclient/check_cpu_utilization.go b/pkg/snclient/check_cpu_utilization.go index 21a89d38..85337698 100644 --- a/pkg/snclient/check_cpu_utilization.go +++ b/pkg/snclient/check_cpu_utilization.go @@ -174,54 +174,88 @@ func (l *CheckCPUUtilization) addCPUUtilizationMetrics(check *CheckData, scanLoo ) } +//nolint:funlen // moving these statements to new helper functions would be illogical func (l *CheckCPUUtilization) getMetrics(scanLookBack uint64) (res *CPUUtilizationResult, ok bool) { res = &CPUUtilizationResult{} - counter1 := l.snc.Counter.Get("cpuinfo", "info") - counter2 := l.snc.Counter.Get("cpuinfo", "info") - if counter1 == nil || counter2 == nil { + counter := l.snc.Counter.Get("cpuinfo", "info") + if counter == nil { return nil, false } scanLookBack64, err := convert.Int64E(scanLookBack) if err != nil { - log.Warnf("failed to convert scan look back: %s", err.Error()) + log.Warnf("failed to convert scan look back to int64: %s", err.Error()) return nil, false } - cpuinfo1 := counter1.GetLast() - cpuinfo2 := counter2.GetAt(time.Now().Add(-time.Duration(scanLookBack64) * time.Second)) - if cpuinfo1 == nil || cpuinfo2 == nil { - log.Errorf("Either the latest cpuinfo counter, or the cpuinfo counter from %d seconds ago seem to be null", scanLookBack) + if err = counter.CheckRetention(time.Second*time.Duration(scanLookBack64), 0); err != nil { + log.Tracef("cpuinfo counter cant hold the query range: %s", err.Error()) + } + + if err = counter.CheckRetention(time.Second*time.Duration(scanLookBack64), 100); err != nil { + log.Warnf("cpuinfo counter cant hold the query range even when extended: %s", err.Error()) + + return nil, false + } + + cpuinfoLatest := counter.GetLast() + cpuinfoOldest := counter.GetFirst() + if cpuinfoLatest == nil { + log.Warnf("latest cpuinfo value seems to be null. counter might not be populated yet.") + + return nil, false + } + if cpuinfoOldest == nil { + log.Warnf("oldest cpuinfo value seems to be null. counter might not be populated yet.") return nil, false } - if cpuinfo1.UnixMilli < cpuinfo2.UnixMilli { - log.Errorf("The last cpuinfo counters have a smaller timestamp: %d than the one that was found near %d seconds ago: %d", cpuinfo1.UnixMilli, scanLookBack, cpuinfo2.UnixMilli) + cpuinfoCounterDuration := cpuinfoLatest.UnixMilli - cpuinfoOldest.UnixMilli + if cpuinfoCounterDuration < scanLookBack64*1000 { + log.Tracef("cpuinfo counter has %d ms between its latest and oldest value, cannot properly provide %d s range of query", cpuinfoCounterDuration, scanLookBack) + + // Optionally we can wait on this thread while other threads fill the counter up. + } + + cpuinfoLookBackAgo := counter.GetAt(time.Now().Add(-time.Duration(scanLookBack64) * time.Second)) + if cpuinfoLookBackAgo == nil { + log.Warnf("cpuinfo value search with lower bound of now-%d seconds returned null", scanLookBack) return nil, false } - duration := float64(cpuinfo1.UnixMilli - cpuinfo2.UnixMilli) - if duration <= 0 { - // This case might happen if there is not enough recorded counters to make up the look back time - // We need to wait until that duration difference can be achieved - secondsToSleep := min(scanLookBack, 5) + duration := float64(cpuinfoLatest.UnixMilli - cpuinfoLookBackAgo.UnixMilli) + acceptableDurationMultipler := 0.5 + minimumAcceptableDuration := float64(scanLookBack) * 1000 * acceptableDurationMultipler + + switch { + case duration <= 0: + // This case might happen if there is only one counter value so far + log.Tracef("counter query from now-%d seconds <-> latest returned a range of %f ms. This is not positive, there might not be enough values recorded yet.", scanLookBack, duration) - log.Tracef("Waiting %d seconds and returning that value, as cpu utilization metrics for the last %d seconds is not available yet.", secondsToSleep, scanLookBack) - time.Sleep(time.Second * time.Duration(convert.Int32(secondsToSleep))) + return nil, false + case duration < minimumAcceptableDuration: + log.Tracef("counter query from now-%d seconds <-> latest returned a range of %f ms. This is not bellow the acceptable range, the data may be unrepresentative. "+ + "acceptableDurationMultipler * scanLookBack seconds : %f * %f = %f ", + scanLookBack, duration, acceptableDurationMultipler, float64(scanLookBack), minimumAcceptableDuration) - return l.getMetrics(secondsToSleep - 1) + // Optionally we can return an empty result here + case duration <= float64(scanLookBack)*1000: + log.Tracef("counter query from now-%d seconds <-> latest returned a range of %f ms. This is in the acceptable range, the data should be representative. "+ + "acceptableDurationMultipler * scanLookBack seconds : %f * %f = %f ", + scanLookBack, duration, acceptableDurationMultipler, float64(scanLookBack), minimumAcceptableDuration) + default: + log.Tracef("counter query from now-%d seconds <-> latest returned a range of %f ms. This is higher than the query range and something must have gone wrong.", scanLookBack, duration) } - duration /= 1e3 // cpu times are measured in seconds - info1, ok := cpuinfo1.Value.(*cpuinfo.TimesStat) + info1, ok := cpuinfoLatest.Value.(*cpuinfo.TimesStat) if !ok { return nil, false } - info2, ok := cpuinfo2.Value.(*cpuinfo.TimesStat) + info2, ok := cpuinfoLookBackAgo.Value.(*cpuinfo.TimesStat) if !ok { return nil, false } @@ -233,12 +267,13 @@ func (l *CheckCPUUtilization) getMetrics(scanLookBack uint64) (res *CPUUtilizati return nil, false } - res.user = (((info1.User - info2.User) / duration) * 100) / float64(numCPU) - res.system = (((info1.System - info2.System) / duration) * 100) / float64(numCPU) - res.iowait = (((info1.Iowait - info2.Iowait) / duration) * 100) / float64(numCPU) - res.steal = (((info1.Steal - info2.Steal) / duration) * 100) / float64(numCPU) - res.guest = (((info1.Guest - info2.Guest) / duration) * 100) / float64(numCPU) - res.idle = (((info1.Idle - info2.Idle) / duration) * 100) / float64(numCPU) + durationInS := duration / 1e3 // cpu times are measured in seconds + res.user = (((info1.User - info2.User) / durationInS * 100) / float64(numCPU)) + res.system = (((info1.System - info2.System) / durationInS * 100) / float64(numCPU)) + res.iowait = (((info1.Iowait - info2.Iowait) / durationInS * 100) / float64(numCPU)) + res.steal = (((info1.Steal - info2.Steal) / durationInS * 100) / float64(numCPU)) + res.guest = (((info1.Guest - info2.Guest) / durationInS * 100) / float64(numCPU)) + res.idle = (((info1.Idle - info2.Idle) / durationInS * 100) / float64(numCPU)) res.total = (res.user + res.system + res.iowait) return res, true From bbd3a747163cfc7dc54c331da00f964765dcdce3 Mon Sep 17 00:00:00 2001 From: Ahmet Oeztuerk Date: Thu, 4 Dec 2025 15:01:21 +0100 Subject: [PATCH 3/3] address comments fix typos and use range loops instead of loop variable --- pkg/counter/counter.go | 17 ++++++++--------- pkg/snclient/check_cpu_utilization.go | 4 ++-- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/pkg/counter/counter.go b/pkg/counter/counter.go index f86da1bf..4eeaf5af 100644 --- a/pkg/counter/counter.go +++ b/pkg/counter/counter.go @@ -29,12 +29,12 @@ type Value struct { // NewCounter creates a new Counter with given retention time and interval func NewCounter(retentionTime, interval time.Duration) *Counter { // round retention and interval to milliseconds - retentionMilli := retentionTime.Milliseconds() - intervalMilli := interval.Milliseconds() + retentionMili := retentionTime.Milliseconds() + intervalMili := interval.Milliseconds() - // round retentionMili time to a multiple of interval - retentionMili := int64(math.Ceil(float64(retentionMilli)/float64(intervalMilli))) * intervalMilli - size := retentionMili / intervalMilli + // round retentionMili to a multiple of interval + retentionMiliRounded := int64(math.Ceil(float64(retentionMili)/float64(intervalMili))) * intervalMili + size := retentionMiliRounded / intervalMili return &Counter{ lock: sync.RWMutex{}, @@ -42,7 +42,7 @@ func NewCounter(retentionTime, interval time.Duration) *Counter { size: size, current: -1, oldest: -1, - retention: time.Duration(retentionMili) * time.Millisecond, + retention: time.Duration(retentionMiliRounded) * time.Millisecond, interval: interval, timesSet: 0, } @@ -85,8 +85,7 @@ func (c *Counter) AvgForDuration(duration time.Duration) float64 { return 0 } - //nolint:intrange // tracking the seen elements is easier - for seen := int64(0); seen < c.size; seen++ { + for range c.size { if c.data[idx].UnixMilli > useAfter { if val, ok := c.data[idx].Value.(float64); ok { sum += val @@ -201,7 +200,7 @@ func (c *Counter) getAt(lowerBound time.Time) *Value { } var previouslyComparedValue *Value - for valuesSeen := int64(0); valuesSeen <= c.size; valuesSeen++ { + for range c.size { currentValue := &c.data[idx] if currentValue.UnixMilli < useAfterUnix { return previouslyComparedValue diff --git a/pkg/snclient/check_cpu_utilization.go b/pkg/snclient/check_cpu_utilization.go index 85337698..f1c20ec1 100644 --- a/pkg/snclient/check_cpu_utilization.go +++ b/pkg/snclient/check_cpu_utilization.go @@ -191,11 +191,11 @@ func (l *CheckCPUUtilization) getMetrics(scanLookBack uint64) (res *CPUUtilizati } if err = counter.CheckRetention(time.Second*time.Duration(scanLookBack64), 0); err != nil { - log.Tracef("cpuinfo counter cant hold the query range: %s", err.Error()) + log.Tracef("cpuinfo counter can not hold the query range: %s", err.Error()) } if err = counter.CheckRetention(time.Second*time.Duration(scanLookBack64), 100); err != nil { - log.Warnf("cpuinfo counter cant hold the query range even when extended: %s", err.Error()) + log.Warnf("cpuinfo counter can not hold the query range even when extended: %s", err.Error()) return nil, false }