Skip to content

Commit ee35d85

Browse files
chore: use percentages for alert config
1 parent a975810 commit ee35d85

File tree

4 files changed

+54
-48
lines changed

4 files changed

+54
-48
lines changed

coderd/agentapi/api.go

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -120,10 +120,14 @@ func New(opts Options) *API {
120120
NotificationsEnqueuer: opts.NotificationsEnqueuer,
121121
Debounce: 5 * time.Minute,
122122

123-
// These values assume a window of 20
124123
Config: resourcesmonitor.Config{
125-
MinimumNOKsToAlert: 4,
126-
ConsecutiveNOKsToAlert: 10,
124+
NumDatapoints: 20,
125+
CollectionInterval: 10 * time.Second,
126+
127+
Alert: resourcesmonitor.AlertConfig{
128+
MinimumNOKsPercent: 20,
129+
ConsecutiveNOKsPercent: 50,
130+
},
127131
},
128132
}
129133

coderd/agentapi/resources_monitoring.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,8 @@ func (a *ResourcesMonitoringAPI) GetResourcesMonitoringConfiguration(ctx context
4848

4949
return &proto.GetResourcesMonitoringConfigurationResponse{
5050
Config: &proto.GetResourcesMonitoringConfigurationResponse_Config{
51-
CollectionIntervalSeconds: 10,
52-
NumDatapoints: 20,
51+
CollectionIntervalSeconds: int32(a.Config.CollectionInterval.Seconds()),
52+
NumDatapoints: a.Config.NumDatapoints,
5353
},
5454
Memory: func() *proto.GetResourcesMonitoringConfigurationResponse_Memory {
5555
if memoryErr != nil {

coderd/agentapi/resources_monitoring_test.go

Lines changed: 17 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,13 @@ func resourceMonitorAPI(t *testing.T) (*agentapi.ResourcesMonitoringAPI, databas
6565
Database: db,
6666
NotificationsEnqueuer: notifyEnq,
6767
Config: resourcesmonitor.Config{
68-
MinimumNOKsToAlert: 4,
69-
ConsecutiveNOKsToAlert: 10,
68+
NumDatapoints: 20,
69+
CollectionInterval: 10 * time.Second,
70+
71+
Alert: resourcesmonitor.AlertConfig{
72+
MinimumNOKsPercent: 20,
73+
ConsecutiveNOKsPercent: 50,
74+
},
7075
},
7176
Debounce: 1 * time.Minute,
7277
}, user, clock, notifyEnq
@@ -87,7 +92,7 @@ func TestMemoryResourceMonitorDebounce(t *testing.T) {
8792
// 5. OK -> NOK |> sends a notification as debounce period exceeded
8893

8994
api, user, clock, notifyEnq := resourceMonitorAPI(t)
90-
api.Config.ConsecutiveNOKsToAlert = 1
95+
api.Config.Alert.ConsecutiveNOKsPercent = 100
9196

9297
// Given: A monitor in an OK state
9398
dbgen.WorkspaceAgentMemoryResourceMonitor(t, api.Database, database.WorkspaceAgentMemoryResourceMonitor{
@@ -281,8 +286,6 @@ func TestMemoryResourceMonitor(t *testing.T) {
281286
t.Parallel()
282287

283288
api, user, clock, notifyEnq := resourceMonitorAPI(t)
284-
api.Config.MinimumNOKsToAlert = 4
285-
api.Config.ConsecutiveNOKsToAlert = 10
286289

287290
datapoints := make([]*agentproto.PushResourcesMonitoringUsageRequest_Datapoint, 0, len(tt.memoryUsage))
288291
collectedAt := clock.Now()
@@ -327,8 +330,8 @@ func TestMemoryResourceMonitorMissingData(t *testing.T) {
327330
t.Parallel()
328331

329332
api, _, clock, notifyEnq := resourceMonitorAPI(t)
330-
api.Config.ConsecutiveNOKsToAlert = 2
331-
api.Config.MinimumNOKsToAlert = 10
333+
api.Config.Alert.ConsecutiveNOKsPercent = 50
334+
api.Config.Alert.MinimumNOKsPercent = 100
332335

333336
// Given: A monitor in an OK state.
334337
dbgen.WorkspaceAgentMemoryResourceMonitor(t, api.Database, database.WorkspaceAgentMemoryResourceMonitor{
@@ -376,8 +379,8 @@ func TestMemoryResourceMonitorMissingData(t *testing.T) {
376379
t.Parallel()
377380

378381
api, _, clock, _ := resourceMonitorAPI(t)
379-
api.Config.ConsecutiveNOKsToAlert = 2
380-
api.Config.MinimumNOKsToAlert = 10
382+
api.Config.Alert.ConsecutiveNOKsPercent = 50
383+
api.Config.Alert.MinimumNOKsPercent = 100
381384

382385
// Given: A monitor in a NOK state.
383386
dbgen.WorkspaceAgentMemoryResourceMonitor(t, api.Database, database.WorkspaceAgentMemoryResourceMonitor{
@@ -448,7 +451,6 @@ func TestVolumeResourceMonitorDebounce(t *testing.T) {
448451
secondVolumePath := "/dev/coder"
449452

450453
api, _, clock, notifyEnq := resourceMonitorAPI(t)
451-
api.Config.MinimumNOKsToAlert = 1
452454

453455
// Given:
454456
// - First monitor in an OK state
@@ -627,17 +629,13 @@ func TestVolumeResourceMonitor(t *testing.T) {
627629
previousState database.WorkspaceAgentMonitorState
628630
expectState database.WorkspaceAgentMonitorState
629631
shouldNotify bool
630-
minimumNOKs int
631-
consecutiveNOKs int
632632
}{
633633
{
634634
name: "WhenOK/NeverExceedsThreshold",
635635
volumePath: "/home/coder",
636636
volumeUsage: []int64{2, 3, 2, 4, 2, 3, 2, 1, 2, 3, 4, 4, 1, 2, 3, 1, 2},
637637
volumeTotal: 10,
638638
thresholdPercent: 80,
639-
consecutiveNOKs: 4,
640-
minimumNOKs: 10,
641639
previousState: database.WorkspaceAgentMonitorStateOK,
642640
expectState: database.WorkspaceAgentMonitorStateOK,
643641
shouldNotify: false,
@@ -648,8 +646,6 @@ func TestVolumeResourceMonitor(t *testing.T) {
648646
volumeUsage: []int64{9, 3, 2, 4, 2, 3, 2, 1, 2, 3, 4, 4, 1, 2, 3, 1, 2},
649647
volumeTotal: 10,
650648
thresholdPercent: 80,
651-
consecutiveNOKs: 4,
652-
minimumNOKs: 10,
653649
previousState: database.WorkspaceAgentMonitorStateOK,
654650
expectState: database.WorkspaceAgentMonitorStateOK,
655651
shouldNotify: false,
@@ -660,8 +656,6 @@ func TestVolumeResourceMonitor(t *testing.T) {
660656
volumeUsage: []int64{2, 3, 2, 4, 2, 3, 2, 1, 2, 3, 4, 4, 1, 8, 9, 8, 9},
661657
volumeTotal: 10,
662658
thresholdPercent: 80,
663-
consecutiveNOKs: 4,
664-
minimumNOKs: 10,
665659
previousState: database.WorkspaceAgentMonitorStateOK,
666660
expectState: database.WorkspaceAgentMonitorStateNOK,
667661
shouldNotify: true,
@@ -672,8 +666,6 @@ func TestVolumeResourceMonitor(t *testing.T) {
672666
volumeUsage: []int64{2, 8, 2, 9, 2, 8, 2, 9, 2, 8, 4, 9, 1, 8, 2, 8, 9},
673667
volumeTotal: 10,
674668
thresholdPercent: 80,
675-
minimumNOKs: 4,
676-
consecutiveNOKs: 10,
677669
previousState: database.WorkspaceAgentMonitorStateOK,
678670
expectState: database.WorkspaceAgentMonitorStateNOK,
679671
shouldNotify: true,
@@ -684,8 +676,6 @@ func TestVolumeResourceMonitor(t *testing.T) {
684676
volumeUsage: []int64{2, 3, 2, 4, 2, 3, 2, 1, 2, 3, 4, 4, 1, 2, 3, 1, 2},
685677
volumeTotal: 10,
686678
thresholdPercent: 80,
687-
consecutiveNOKs: 4,
688-
minimumNOKs: 10,
689679
previousState: database.WorkspaceAgentMonitorStateNOK,
690680
expectState: database.WorkspaceAgentMonitorStateOK,
691681
shouldNotify: false,
@@ -696,8 +686,6 @@ func TestVolumeResourceMonitor(t *testing.T) {
696686
volumeUsage: []int64{9, 3, 2, 4, 2, 3, 2, 1, 2, 3, 4, 4, 1, 2, 3, 1, 2},
697687
volumeTotal: 10,
698688
thresholdPercent: 80,
699-
consecutiveNOKs: 4,
700-
minimumNOKs: 10,
701689
previousState: database.WorkspaceAgentMonitorStateNOK,
702690
expectState: database.WorkspaceAgentMonitorStateNOK,
703691
shouldNotify: false,
@@ -708,8 +696,6 @@ func TestVolumeResourceMonitor(t *testing.T) {
708696
volumeUsage: []int64{2, 3, 2, 4, 2, 3, 2, 1, 2, 3, 4, 4, 1, 8, 9, 8, 9},
709697
volumeTotal: 10,
710698
thresholdPercent: 80,
711-
consecutiveNOKs: 4,
712-
minimumNOKs: 10,
713699
previousState: database.WorkspaceAgentMonitorStateNOK,
714700
expectState: database.WorkspaceAgentMonitorStateNOK,
715701
shouldNotify: false,
@@ -720,8 +706,6 @@ func TestVolumeResourceMonitor(t *testing.T) {
720706
volumeUsage: []int64{2, 8, 2, 9, 2, 8, 2, 9, 2, 8, 4, 9, 1, 8, 2, 8, 9},
721707
volumeTotal: 10,
722708
thresholdPercent: 80,
723-
minimumNOKs: 4,
724-
consecutiveNOKs: 10,
725709
previousState: database.WorkspaceAgentMonitorStateNOK,
726710
expectState: database.WorkspaceAgentMonitorStateNOK,
727711
shouldNotify: false,
@@ -735,8 +719,6 @@ func TestVolumeResourceMonitor(t *testing.T) {
735719
t.Parallel()
736720

737721
api, user, clock, notifyEnq := resourceMonitorAPI(t)
738-
api.Config.MinimumNOKsToAlert = tt.minimumNOKs
739-
api.Config.ConsecutiveNOKsToAlert = tt.consecutiveNOKs
740722

741723
datapoints := make([]*agentproto.PushResourcesMonitoringUsageRequest_Datapoint, 0, len(tt.volumeUsage))
742724
collectedAt := clock.Now()
@@ -785,7 +767,7 @@ func TestVolumeResourceMonitorMultiple(t *testing.T) {
785767
t.Parallel()
786768

787769
api, _, clock, notifyEnq := resourceMonitorAPI(t)
788-
api.Config.ConsecutiveNOKsToAlert = 1
770+
api.Config.Alert.ConsecutiveNOKsPercent = 100
789771

790772
// Given: two different volume resource monitors
791773
dbgen.WorkspaceAgentVolumeResourceMonitor(t, api.Database, database.WorkspaceAgentVolumeResourceMonitor{
@@ -843,8 +825,8 @@ func TestVolumeResourceMonitorMissingData(t *testing.T) {
843825
volumePath := "/home/coder"
844826

845827
api, _, clock, notifyEnq := resourceMonitorAPI(t)
846-
api.Config.ConsecutiveNOKsToAlert = 2
847-
api.Config.MinimumNOKsToAlert = 10
828+
api.Config.Alert.ConsecutiveNOKsPercent = 50
829+
api.Config.Alert.MinimumNOKsPercent = 100
848830

849831
// Given: A monitor in an OK state.
850832
dbgen.WorkspaceAgentVolumeResourceMonitor(t, api.Database, database.WorkspaceAgentVolumeResourceMonitor{
@@ -902,8 +884,8 @@ func TestVolumeResourceMonitorMissingData(t *testing.T) {
902884
volumePath := "/home/coder"
903885

904886
api, _, clock, _ := resourceMonitorAPI(t)
905-
api.Config.ConsecutiveNOKsToAlert = 2
906-
api.Config.MinimumNOKsToAlert = 10
887+
api.Config.Alert.ConsecutiveNOKsPercent = 50
888+
api.Config.Alert.MinimumNOKsPercent = 100
907889

908890
// Given: A monitor in a NOK state.
909891
dbgen.WorkspaceAgentVolumeResourceMonitor(t, api.Database, database.WorkspaceAgentVolumeResourceMonitor{

coderd/agentapi/resourcesmonitor/resources_monitor.go

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
package resourcesmonitor
22

33
import (
4+
"math"
5+
"time"
6+
47
"github.com/coder/coder/v2/agent/proto"
58
"github.com/coder/coder/v2/coderd/database"
69
"github.com/coder/coder/v2/coderd/util/slice"
@@ -14,14 +17,25 @@ const (
1417
StateUnknown
1518
)
1619

20+
type AlertConfig struct {
21+
// What percentage of datapoints in a row are
22+
// required to put the monitor in an alert state.
23+
ConsecutiveNOKsPercent int
24+
25+
// What percentage of datapoints in a window are
26+
// required to put the monitor in an alert state.
27+
MinimumNOKsPercent int
28+
}
29+
1730
type Config struct {
18-
// How many datapoints in a row are required to
19-
// put the monitor in an alert state.
20-
ConsecutiveNOKsToAlert int
31+
// How many datapoints should the agent send
32+
NumDatapoints int32
2133

22-
// How many datapoints in total are required to
23-
// put the monitor in an alert state.
24-
MinimumNOKsToAlert int
34+
// How long between each datapoint should
35+
// collection occur.
36+
CollectionInterval time.Duration
37+
38+
Alert AlertConfig
2539
}
2640

2741
func CalculateMemoryUsageStates(
@@ -75,10 +89,11 @@ func CalculateVolumeUsageStates(
7589
}
7690

7791
func NextState(c Config, oldState database.WorkspaceAgentMonitorState, states []State) database.WorkspaceAgentMonitorState {
92+
7893
// If there are enough consecutive NOK states, we should be in an
7994
// alert state.
8095
consecutiveNOKs := slice.CountConsecutive(StateNOK, states...)
81-
if consecutiveNOKs >= c.ConsecutiveNOKsToAlert {
96+
if percent(consecutiveNOKs, len(states)) >= c.Alert.ConsecutiveNOKsPercent {
8297
return database.WorkspaceAgentMonitorStateNOK
8398
}
8499

@@ -96,7 +111,7 @@ func NextState(c Config, oldState database.WorkspaceAgentMonitorState, states []
96111
}
97112

98113
// If there are enough NOK datapoints, we should be in an alert state.
99-
if nokCount >= c.MinimumNOKsToAlert {
114+
if percent(nokCount, len(states)) >= c.Alert.MinimumNOKsPercent {
100115
return database.WorkspaceAgentMonitorStateNOK
101116
}
102117

@@ -108,3 +123,8 @@ func NextState(c Config, oldState database.WorkspaceAgentMonitorState, states []
108123
// Otherwise we stay in the same state as last.
109124
return oldState
110125
}
126+
127+
func percent[T int](numerator, denominator T) int {
128+
percent := float64(numerator*100) / float64(denominator)
129+
return int(math.Round(percent))
130+
}

0 commit comments

Comments
 (0)