Skip to content

Commit 21d1873

Browse files
authored
feat: make agent stats' cardinality configurable (coder#12468)
Closes coder#12221
1 parent 0647ec1 commit 21d1873

18 files changed

+747
-85
lines changed

cli/server.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -229,13 +229,13 @@ func enablePrometheus(
229229
afterCtx(ctx, closeInsightsMetricsCollector)
230230

231231
if vals.Prometheus.CollectAgentStats {
232-
closeAgentStatsFunc, err := prometheusmetrics.AgentStats(ctx, logger, options.PrometheusRegistry, options.Database, time.Now(), 0)
232+
closeAgentStatsFunc, err := prometheusmetrics.AgentStats(ctx, logger, options.PrometheusRegistry, options.Database, time.Now(), 0, options.DeploymentValues.Prometheus.AggregateAgentStatsBy.Value())
233233
if err != nil {
234234
return nil, xerrors.Errorf("register agent stats prometheus metric: %w", err)
235235
}
236236
afterCtx(ctx, closeAgentStatsFunc)
237237

238-
metricsAggregator, err := prometheusmetrics.NewMetricsAggregator(logger, options.PrometheusRegistry, 0)
238+
metricsAggregator, err := prometheusmetrics.NewMetricsAggregator(logger, options.PrometheusRegistry, 0, options.DeploymentValues.Prometheus.AggregateAgentStatsBy.Value())
239239
if err != nil {
240240
return nil, xerrors.Errorf("can't initialize metrics aggregator: %w", err)
241241
}

cli/testdata/coder_server_--help.golden

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,11 @@ INTROSPECTION / PROMETHEUS OPTIONS:
123123
--prometheus-address host:port, $CODER_PROMETHEUS_ADDRESS (default: 127.0.0.1:2112)
124124
The bind address to serve prometheus metrics.
125125

126+
--prometheus-aggregate-agent-stats-by string-array, $CODER_PROMETHEUS_AGGREGATE_AGENT_STATS_BY (default: agent_name,template_name,username,workspace_name)
127+
When collecting agent stats, aggregate metrics by a given set of
128+
comma-separated labels to reduce cardinality. Accepted values are
129+
agent_name, template_name, username, workspace_name.
130+
126131
--prometheus-collect-agent-stats bool, $CODER_PROMETHEUS_COLLECT_AGENT_STATS
127132
Collect agent stats (may increase charges for metrics storage).
128133

cli/testdata/server-config.yaml.golden

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,15 @@ introspection:
188188
# Collect agent stats (may increase charges for metrics storage).
189189
# (default: <unset>, type: bool)
190190
collect_agent_stats: false
191+
# When collecting agent stats, aggregate metrics by a given set of comma-separated
192+
# labels to reduce cardinality. Accepted values are agent_name, template_name,
193+
# username, workspace_name.
194+
# (default: agent_name,template_name,username,workspace_name, type: string-array)
195+
aggregate_agent_stats_by:
196+
- agent_name
197+
- template_name
198+
- username
199+
- workspace_name
191200
# Collect database metrics (may increase charges for metrics storage).
192201
# (default: false, type: bool)
193202
collect_db_metrics: false

coderd/agentmetrics/labels.go

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
package agentmetrics
2+
3+
import (
4+
"strings"
5+
6+
"golang.org/x/xerrors"
7+
)
8+
9+
const (
10+
LabelAgentName = "agent_name"
11+
LabelTemplateName = "template_name"
12+
LabelUsername = "username"
13+
LabelWorkspaceName = "workspace_name"
14+
)
15+
16+
var (
17+
LabelAll = []string{LabelAgentName, LabelTemplateName, LabelUsername, LabelWorkspaceName}
18+
LabelAgentStats = []string{LabelAgentName, LabelUsername, LabelWorkspaceName}
19+
)
20+
21+
// ValidateAggregationLabels ensures a given set of labels are valid aggregation labels.
22+
func ValidateAggregationLabels(labels []string) error {
23+
acceptable := LabelAll
24+
25+
seen := make(map[string]any, len(acceptable))
26+
for _, label := range acceptable {
27+
seen[label] = nil
28+
}
29+
30+
for _, label := range labels {
31+
if _, found := seen[label]; !found {
32+
return xerrors.Errorf("%q is not a valid aggregation label; only one or more of %q are acceptable",
33+
label, strings.Join(acceptable, ", "))
34+
}
35+
}
36+
37+
return nil
38+
}

coderd/agentmetrics/labels_test.go

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
package agentmetrics_test
2+
3+
import (
4+
"testing"
5+
6+
"github.com/stretchr/testify/require"
7+
8+
"github.com/coder/coder/v2/coderd/agentmetrics"
9+
)
10+
11+
func TestValidateAggregationLabels(t *testing.T) {
12+
t.Parallel()
13+
14+
tests := []struct {
15+
name string
16+
labels []string
17+
expectedErr bool
18+
}{
19+
{
20+
name: "empty list is valid",
21+
},
22+
{
23+
name: "single valid entry",
24+
labels: []string{agentmetrics.LabelTemplateName},
25+
},
26+
{
27+
name: "multiple valid entries",
28+
labels: []string{agentmetrics.LabelTemplateName, agentmetrics.LabelUsername},
29+
},
30+
{
31+
name: "repeated valid entries are not invalid",
32+
labels: []string{agentmetrics.LabelTemplateName, agentmetrics.LabelUsername, agentmetrics.LabelUsername, agentmetrics.LabelUsername},
33+
},
34+
{
35+
name: "empty entry is invalid",
36+
labels: []string{""},
37+
expectedErr: true,
38+
},
39+
}
40+
41+
for _, tc := range tests {
42+
tc := tc
43+
44+
t.Run(tc.name, func(t *testing.T) {
45+
t.Parallel()
46+
47+
err := agentmetrics.ValidateAggregationLabels(tc.labels)
48+
if tc.expectedErr {
49+
require.Error(t, err)
50+
}
51+
})
52+
}
53+
}

coderd/apidoc/docs.go

Lines changed: 6 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

coderd/apidoc/swagger.json

Lines changed: 6 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)