Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 12 additions & 11 deletions docs/checks/commands/check_cpu_utilization.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ Checks the cpu utilization metrics.

### Default Check

check_cpu_utilization
OK - user: 29% - system: 11% - iowait: 3% - steal: 0% - guest: 0% |'user'=28.83%;;;0;...
check_cpu_utilization
OK - user: 2% - system: 1% - iowait: 0% - steal: 0% - guest: 0 - idle: 96% |'total'=3.4%;90;95;0; 'user'=2.11%;;;0;...

### Example using NRPE and Naemon

Expand All @@ -41,15 +41,15 @@ Naemon Config

## Argument Defaults

| Argument | Default Value |
| ------------- | --------------------------------------------------------------------------------------------------- |
| warning | total > 90 |
| critical | total > 95 |
| empty-state | 0 (OK) |
| empty-syntax | |
| top-syntax | \${status} - \${list} |
| ok-syntax | |
| detail-syntax | user: \${user}% - system: \${system}% - iowait: \${iowait}% - steal: \${steal}% - guest: \${guest}% |
| Argument | Default Value |
| ------------- | ----------------------------------------------------------------------------------------------------- |
| warning | total > 90 |
| critical | total > 95 |
| empty-state | 0 (OK) |
| empty-syntax | |
| top-syntax | \${status} - \${list} |
| ok-syntax | |
| detail-syntax | user: \${user}% - system: \${system}% - iowait: \${iowait}% - steal: \${steal}% - guest: \${guest} - idle: %{idle}% |

## Check Specific Arguments

Expand All @@ -71,3 +71,4 @@ these can be used in filters and thresholds (along with the default attributes):
| iowait | IOWait cpu utilization in percent |
| steal | Steal cpu utilization in percent |
| guest | Guest cpu utilization in percent |
| idle | Idle cpu utilization in percent |
109 changes: 83 additions & 26 deletions pkg/counter/counter.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package counter

import (
"fmt"
"math"
"sync"
"time"
Expand All @@ -9,10 +10,14 @@ import (
// Counter is the container for a single timeseries of performance values
// it used a fixed size storage backend
type Counter struct {
lock sync.RWMutex // lock for concurrent access
data []Value // array of values
current int64 // position of last inserted value
size int64 // number of values for this series
lock sync.RWMutex // lock for concurrent access
data []Value // array of values. size determined by the retention and interval
current int64 // position of last inserted value
oldest int64 // position of the earliest inserted value
size int64 // number of values for this series
timesSet int64 // number of times a value was set in this counter
retention time.Duration // the time span this counter can hold, interval * size
interval time.Duration // the interval time that new values are designed to be added
}

// Value is a single entry of a Counter
Expand All @@ -24,30 +29,43 @@ type Value struct {
// NewCounter creates a new Counter with given retention time and interval
func NewCounter(retentionTime, interval time.Duration) *Counter {
// round retention and interval to milliseconds
retentionMilli := retentionTime.Milliseconds()
intervalMilli := interval.Milliseconds()
retentionMili := retentionTime.Milliseconds()
intervalMili := interval.Milliseconds()

// round retention time to a multiple of interval
retention := int64(math.Ceil(float64(retentionMilli)/float64(intervalMilli))) * intervalMilli
size := retention / intervalMilli
// round retentionMili to a multiple of interval
retentionMiliRounded := int64(math.Ceil(float64(retentionMili)/float64(intervalMili))) * intervalMili
size := retentionMiliRounded / intervalMili

return &Counter{
lock: sync.RWMutex{},
data: make([]Value, size),
size: size,
current: -1,
lock: sync.RWMutex{},
data: make([]Value, size),
size: size,
current: -1,
oldest: -1,
retention: time.Duration(retentionMiliRounded) * time.Millisecond,
interval: interval,
timesSet: 0,
}
}

// Set adds a new value with current timestamp
func (c *Counter) Set(val any) {
c.lock.Lock()
// setting a value for the first time
if c.oldest == -1 {
c.oldest = 0
}
c.current++
if c.current == c.size {
c.current = 0
}
c.data[c.current].UnixMilli = time.Now().UTC().UnixMilli()
c.data[c.current].Value = val
c.timesSet++
// if we already filled the array, and started overwriting, the oldest index just got overwritten
if c.timesSet > c.size {
c.oldest = (c.current + 1) % c.size
}
c.lock.Unlock()
}

Expand All @@ -66,7 +84,8 @@ func (c *Counter) AvgForDuration(duration time.Duration) float64 {
if idx == -1 {
return 0
}
for seen := int64(0); seen <= c.size; seen++ {

for range c.size {
if c.data[idx].UnixMilli > useAfter {
if val, ok := c.data[idx].Value.(float64); ok {
sum += val
Expand Down Expand Up @@ -145,35 +164,73 @@ func (c *Counter) getLast() *Value {
return &c.data[c.current]
}

// GetAt returns first value closest to given date
func (c *Counter) GetAt(useAfter time.Time) *Value {
// GetFirst returns first (earliest) value
func (c *Counter) GetFirst() *Value {
c.lock.RLock()
defer c.lock.RUnlock()

return c.getFirst()
}

func (c *Counter) getFirst() *Value {
// the latest added item had index c.current
if c.oldest == -1 {
return nil
}

return &c.data[c.oldest]
}

// GetAt returns first value with >= timestamp than lowerBound
func (c *Counter) GetAt(lowerBound time.Time) *Value {
c.lock.RLock()
defer c.lock.RUnlock()

return c.getAt(useAfter)
return c.getAt(lowerBound)
}

func (c *Counter) getAt(useAfter time.Time) *Value {
useAfterUnix := useAfter.UTC().UnixMilli()
// Gets the first counter that has a >= timestamp than lowerBound
func (c *Counter) getAt(lowerBound time.Time) *Value {
useAfterUnix := lowerBound.UTC().UnixMilli()

// the counter is not yet populated
idx := c.current
if idx == -1 {
return nil
}

var last *Value
for seen := int64(0); seen <= c.size; seen++ {
val := &c.data[idx]
if val.UnixMilli < useAfterUnix {
return last
var previouslyComparedValue *Value
for range c.size {
currentValue := &c.data[idx]
if currentValue.UnixMilli < useAfterUnix {
return previouslyComparedValue
}
last = val

previouslyComparedValue = currentValue
idx--
if idx < 0 {
idx = c.size - 1
}
}

return last
return previouslyComparedValue
}

// checks if the counter can fit the targetRetention. optionally extend the interval by count in the check
func (c *Counter) CheckRetention(targetRetention time.Duration, intervalExtensionCount int64) error {
extendedRetentionRange := c.retention + time.Duration(intervalExtensionCount)*c.interval

if extendedRetentionRange < targetRetention {
if intervalExtensionCount == 0 {
return fmt.Errorf("counter retention range is %f seconds, less than the target retention range of %f seconds",
extendedRetentionRange.Seconds(), targetRetention.Seconds())
}

return fmt.Errorf("counter retention range is %f seconds, even when extended by %d intervals to be %f seconds, it is less than target retention range of %f seconds",
c.interval.Seconds(), intervalExtensionCount, extendedRetentionRange.Seconds(), targetRetention.Seconds())
}

return nil
}

// Float64 returns value as float64
Expand Down
94 changes: 94 additions & 0 deletions pkg/counter/counter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"time"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func TestCounter(t *testing.T) {
Expand Down Expand Up @@ -55,3 +56,96 @@ func TestCounter(t *testing.T) {
set.Delete("test", "key")
assert.Emptyf(t, set.counter, "set is empty now")
}

func TestCounter2(t *testing.T) {
set := NewCounterSet()

retention := time.Millisecond * 4500
interval := time.Second
set.Create("test", "key", retention, interval)

// empty counter
counter := set.Get("test", "key")
latest := counter.getLast()
oldest := counter.getFirst()
assert.Nil(t, latest, "calling latest on empty counter should return nil")
assert.Nil(t, oldest, "calling oldest on empty counter should return nil")

// check the retention for 4 seconds
retentionCheck1 := counter.CheckRetention(time.Second*4, 0)
require.NoError(t, retentionCheck1, "the counter should be able to hold 4 seconds")

// check the retention for 5 seconds
retentionCheck2 := counter.CheckRetention(time.Second*5, 0)
require.NoError(t, retentionCheck2, "the counter should be able to hold 5 seconds")

// check the retention for 6 seconds
retentionCheck3 := counter.CheckRetention(time.Second*6, 0)
require.Error(t, retentionCheck3, "the counter should not be able to hold 6 seconds")

// check the retention for 10 seconds
retentionCheck4 := counter.CheckRetention(time.Second*10, 0)
require.Error(t, retentionCheck4, "the counter should not be able to hold 10 seconds")

// check the retention for 1 minute with 10 extensions
retentionCheck5 := counter.CheckRetention(time.Minute, 10)
require.Error(t, retentionCheck5, "the counter should not be able to hold 1 minute with 10 interval extensions")

// check the retention for 1 minute with 100 extensions
retentionCheck6 := counter.CheckRetention(time.Minute, 100)
require.NoError(t, retentionCheck6, "the counter should be able to hold 1 minute with 10 interval extensions")

// 1 _ _ _ _
counter.Set(float64(1))
latest = counter.getLast()
oldest = counter.getFirst()
assert.InEpsilon(t, float64(1), latest.Value, 0.001, "latest element should be 1")
assert.InEpsilon(t, float64(1), oldest.Value, 0.001, "oldest element should be 1")

// 1 2 _ _ _
counter.Set(float64(2))
latest = counter.getLast()
oldest = counter.getFirst()
assert.InEpsilon(t, float64(2), latest.Value, 0.001, "latest element should be 2")
assert.InEpsilon(t, float64(1), oldest.Value, 0.001, "oldest element should be 1")

// 1 2 3 _ _
counter.Set(float64(3))
latest = counter.getLast()
oldest = counter.getFirst()
assert.InEpsilon(t, float64(3), latest.Value, 0.001, "latest element should be 3")
assert.InEpsilon(t, float64(1), oldest.Value, 0.001, "oldest element should be 1")

// 1 2 3 4 _
counter.Set(float64(4))
latest = counter.getLast()
oldest = counter.getFirst()
assert.InEpsilon(t, float64(4), latest.Value, 0.001, "latest element should be 4")
assert.InEpsilon(t, float64(1), oldest.Value, 0.001, "oldest element should be 1")

// 1 2 3 4 5
counter.Set(float64(5))
latest = counter.getLast()
oldest = counter.getFirst()
assert.InEpsilon(t, float64(5), latest.Value, 0.001, "latest element should be 5")
assert.InEpsilon(t, float64(1), oldest.Value, 0.001, "oldest element should be 1")

// check the average now
avg := counter.AvgForDuration(time.Minute)
assert.InEpsilon(t, 3, avg, 0.001, "average of 1,2,3,4,5 is 3")

// started overwriting from the first index, the c.oldest should update
// 6 2 3 4 5
counter.Set(float64(6))
latest = counter.getLast()
oldest = counter.getFirst()
assert.InEpsilon(t, float64(6), latest.Value, 0.001, "latest element should be 6")
assert.InEpsilon(t, float64(2), oldest.Value, 0.001, "oldest element should be 2")

// 6 7 3 4 5
counter.Set(float64(7))
latest = counter.getLast()
oldest = counter.getFirst()
assert.InEpsilon(t, float64(7), latest.Value, 0.001, "latest element should be 7")
assert.InEpsilon(t, float64(3), oldest.Value, 0.001, "oldest element should be 3")
}
Loading