@@ -13,13 +13,16 @@ import (
13
13
14
14
"github.com/google/uuid"
15
15
"github.com/prometheus/client_golang/prometheus"
16
+ promClient "github.com/prometheus/client_model/go"
16
17
"github.com/stretchr/testify/assert"
17
18
"github.com/stretchr/testify/require"
18
19
"tailscale.com/tailcfg"
19
20
20
21
"cdr.dev/slog"
21
22
"cdr.dev/slog/sloggers/slogtest"
22
23
24
+ "github.com/coder/quartz"
25
+
23
26
agentproto "github.com/coder/coder/v2/agent/proto"
24
27
"github.com/coder/coder/v2/coderd/agentmetrics"
25
28
"github.com/coder/coder/v2/coderd/coderdtest"
@@ -38,7 +41,6 @@ import (
38
41
"github.com/coder/coder/v2/tailnet"
39
42
"github.com/coder/coder/v2/tailnet/tailnettest"
40
43
"github.com/coder/coder/v2/testutil"
41
- "github.com/coder/quartz"
42
44
)
43
45
44
46
func TestActiveUsers (t * testing.T ) {
@@ -518,11 +520,10 @@ func TestAgentStats(t *testing.T) {
518
520
defer agent2 .DRPCConn ().Close ()
519
521
defer agent3 .DRPCConn ().Close ()
520
522
521
- registry := prometheus .NewRegistry ()
522
-
523
523
// given
524
+ const workspaceCount = 3
524
525
var i int64
525
- for i = 0 ; i < 3 ; i ++ {
526
+ for i = 0 ; i < workspaceCount ; i ++ {
526
527
_ , err = agent1 .UpdateStats (ctx , & agentproto.UpdateStatsRequest {
527
528
Stats : & agentproto.Stats {
528
529
TxBytes : 1 + i , RxBytes : 2 + i ,
@@ -559,58 +560,96 @@ func TestAgentStats(t *testing.T) {
559
560
// to be posted after this.
560
561
closeBatcher ()
561
562
562
- // when
563
- //
564
- // Set initialCreateAfter to some time in the past, so that AgentStats would include all above PostStats,
565
- // and it doesn't depend on the real time.
566
- closeFunc , err := prometheusmetrics .AgentStats (ctx , slogtest .Make (t , & slogtest.Options {
567
- IgnoreErrors : true ,
568
- }), registry , db , time .Now ().Add (- time .Minute ), time .Millisecond , agentmetrics .LabelAll , false )
569
- require .NoError (t , err )
570
- t .Cleanup (closeFunc )
563
+ tests := []struct {
564
+ name string
565
+ aggregateByLabels []string
566
+ goldenFile string
567
+ metricKeyFn func (metric * promClient.MetricFamily , sample * promClient.Metric ) string
568
+ }{
569
+ {
570
+ name : "unaggregated" ,
571
+ aggregateByLabels : agentmetrics .LabelAll ,
572
+ goldenFile : "testdata/agent-stats.json" ,
573
+ metricKeyFn : func (metric * promClient.MetricFamily , sample * promClient.Metric ) string {
574
+ return fmt .Sprintf ("%s:%s:%s:%s" , sample .Label [1 ].GetValue (), sample .Label [2 ].GetValue (), sample .Label [0 ].GetValue (), metric .GetName ())
575
+ },
576
+ },
577
+ {
578
+ name : "single label aggregation" ,
579
+ aggregateByLabels : []string {agentmetrics .LabelUsername },
580
+ goldenFile : "testdata/agent-stats-aggregated.json" ,
581
+ metricKeyFn : func (metric * promClient.MetricFamily , sample * promClient.Metric ) string {
582
+ return fmt .Sprintf ("%s:%s" , sample .Label [0 ].GetValue (), metric .GetName ())
583
+ },
584
+ },
585
+ }
571
586
572
- // then
573
- goldenFile , err := os .ReadFile ("testdata/agent-stats.json" )
574
- require .NoError (t , err )
575
- golden := map [string ]int {}
576
- err = json .Unmarshal (goldenFile , & golden )
577
- require .NoError (t , err )
587
+ for _ , tc := range tests {
588
+ t .Run (tc .name , func (t * testing.T ) {
589
+ t .Parallel ()
578
590
579
- collected := map [string ]int {}
580
- var executionSeconds bool
581
- assert .Eventually (t , func () bool {
582
- metrics , err := registry .Gather ()
583
- assert .NoError (t , err )
591
+ // when
592
+ //
593
+ // Set initialCreateAfter to some time in the past, so that AgentStats would include all above PostStats,
594
+ // and it doesn't depend on the real time.
595
+ registry := prometheus .NewRegistry ()
596
+ closeFunc , err := prometheusmetrics .AgentStats (ctx , slogtest .Make (t , & slogtest.Options {
597
+ IgnoreErrors : true ,
598
+ }), registry , db , time .Now ().Add (- time .Minute ), time .Millisecond , tc .aggregateByLabels , false ) // TODO: make conditional on ExperimentWorkspaceUsage like in server.go?
599
+ require .NoError (t , err )
600
+ t .Cleanup (closeFunc )
584
601
585
- if len (metrics ) < 1 {
586
- return false
587
- }
602
+ // then
603
+ goldenFile , err := os .ReadFile (tc .goldenFile )
604
+ require .NoError (t , err )
605
+ golden := map [string ]float64 {}
606
+ err = json .Unmarshal (goldenFile , & golden )
607
+ require .NoError (t , err )
588
608
589
- for _ , metric := range metrics {
590
- switch metric .GetName () {
591
- case "coderd_prometheusmetrics_agentstats_execution_seconds" :
592
- executionSeconds = true
593
- case "coderd_agentstats_connection_count" ,
594
- "coderd_agentstats_connection_median_latency_seconds" ,
595
- "coderd_agentstats_rx_bytes" ,
596
- "coderd_agentstats_tx_bytes" ,
597
- "coderd_agentstats_session_count_jetbrains" ,
598
- "coderd_agentstats_session_count_reconnecting_pty" ,
599
- "coderd_agentstats_session_count_ssh" ,
600
- "coderd_agentstats_session_count_vscode" :
601
- for _ , m := range metric .Metric {
602
- // username:workspace:agent:metric = value
603
- collected [m .Label [1 ].GetValue ()+ ":" + m .Label [2 ].GetValue ()+ ":" + m .Label [0 ].GetValue ()+ ":" + metric .GetName ()] = int (m .Gauge .GetValue ())
609
+ collected := map [string ]float64 {}
610
+ // sampleCount := map[string]int{}
611
+ var executionSeconds bool
612
+ assert .Eventually (t , func () bool {
613
+ metrics , err := registry .Gather ()
614
+ assert .NoError (t , err )
615
+
616
+ if len (metrics ) < 1 {
617
+ return false
604
618
}
605
- default :
606
- require .FailNowf (t , "unexpected metric collected" , "metric: %s" , metric .GetName ())
607
- }
608
- }
609
- return executionSeconds && reflect .DeepEqual (golden , collected )
610
- }, testutil .WaitShort , testutil .IntervalFast )
611
619
612
- // Keep this assertion, so that "go test" can print differences instead of "Condition never satisfied"
613
- assert .EqualValues (t , golden , collected )
620
+ for _ , metric := range metrics {
621
+ switch metric .GetName () {
622
+ // This metric is not aggregated, but we need to validate we saw it, at least.
623
+ case "coderd_prometheusmetrics_agentstats_execution_seconds" :
624
+ executionSeconds = true
625
+ case "coderd_agentstats_connection_count" ,
626
+ "coderd_agentstats_connection_median_latency_seconds" ,
627
+ "coderd_agentstats_rx_bytes" ,
628
+ "coderd_agentstats_tx_bytes" ,
629
+ "coderd_agentstats_session_count_jetbrains" ,
630
+ "coderd_agentstats_session_count_reconnecting_pty" ,
631
+ "coderd_agentstats_session_count_ssh" ,
632
+ "coderd_agentstats_session_count_vscode" :
633
+ for _ , sample := range metric .Metric {
634
+ key := tc .metricKeyFn (metric , sample )
635
+ collected [key ] = sample .Gauge .GetValue ()
636
+ // if _, ok := sampleCount[key]; !ok {
637
+ // sampleCount[key] = 0
638
+ // }
639
+ // sampleCount[key]++
640
+ }
641
+ default :
642
+ require .FailNowf (t , "unexpected metric collected" , "metric: %s" , metric .GetName ())
643
+ }
644
+ }
645
+
646
+ return executionSeconds && reflect .DeepEqual (golden , collected )
647
+ }, testutil .WaitShort , testutil .IntervalFast )
648
+
649
+ // Keep this assertion, so that "go test" can print differences instead of "Condition never satisfied"
650
+ assert .EqualValues (t , golden , collected )
651
+ })
652
+ }
614
653
}
615
654
616
655
func TestExperimentsMetric (t * testing.T ) {
0 commit comments