Skip to content

Commit df80e9b

Browse files
committed
Use MustNewConstMetric
1 parent 99fe1bf commit df80e9b

File tree

5 files changed

+90
-20
lines changed

5 files changed

+90
-20
lines changed

agent/metrics.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,8 @@ func isIgnoredMetric(metricName string) bool {
3333
if strings.HasPrefix(metricName, "dns_") ||
3434
strings.HasPrefix(metricName, "controlclient_") ||
3535
strings.HasPrefix(metricName, "peerapi_") ||
36-
strings.HasPrefix(metricName, "profiles_") {
36+
strings.HasPrefix(metricName, "profiles_") ||
37+
strings.HasPrefix(metricName, "tstun_") {
3738
return true
3839
}
3940
return false

coderd/coderd.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ type Options struct {
149149

150150
HTTPClient *http.Client
151151

152-
UpdateAgentMetrics func(ctx context.Context, workspaceID uuid.UUID, agentID uuid.UUID, metrics []agentsdk.AgentMetric)
152+
UpdateAgentMetrics func(ctx context.Context, username, workspaceName, agentName string, metrics []agentsdk.AgentMetric)
153153
}
154154

155155
// @title Coder API

coderd/prometheusmetrics/aggregator.go

Lines changed: 63 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,37 @@ package prometheusmetrics
22

33
import (
44
"context"
5-
"log"
5+
"sync"
66

7-
"github.com/google/uuid"
87
"github.com/prometheus/client_golang/prometheus"
8+
"golang.org/x/xerrors"
9+
10+
"cdr.dev/slog"
911

1012
"github.com/coder/coder/codersdk/agentsdk"
1113
)
1214

13-
type MetricsAggregator struct{}
15+
const (
16+
// MetricHelpForAgent is a help string that replaces all agent metric help
17+
// messages. This is because a registry cannot have conflicting
18+
// help messages for the same metric in a "gather". If our coder agents are
19+
// on different versions, this is a possible scenario.
20+
metricHelpForAgent = "Metric is forwarded from workspace agent connected to this instance of coderd."
21+
)
22+
23+
type MetricsAggregator struct {
24+
m sync.Mutex
25+
log slog.Logger
26+
queue []annotatedMetrics
27+
}
28+
29+
type annotatedMetrics struct {
30+
username string
31+
workspaceName string
32+
agentName string
33+
34+
metrics []agentsdk.AgentMetric
35+
}
1436

1537
var _ prometheus.Collector = new(MetricsAggregator)
1638

@@ -19,11 +41,47 @@ var _ prometheus.Collector = new(MetricsAggregator)
1941
func (*MetricsAggregator) Describe(_ chan<- *prometheus.Desc) {
2042
}
2143

44+
var agentMetricsLabels = []string{usernameLabel, workspaceNameLabel, agentNameLabel}
45+
2246
func (ma *MetricsAggregator) Collect(ch chan<- prometheus.Metric) {
47+
ma.m.Lock()
48+
defer ma.m.Unlock()
49+
50+
for _, annotated := range ma.queue {
51+
for _, m := range annotated.metrics {
52+
desc := prometheus.NewDesc(m.Name, metricHelpForAgent, agentMetricsLabels, nil)
53+
valueType, err := asPrometheusValueType(m.Type)
54+
if err != nil {
55+
ma.log.Error(context.Background(), "can't convert Prometheus value type", slog.F("value_type", m.Type), slog.Error(err))
56+
}
57+
constMetric := prometheus.MustNewConstMetric(desc, valueType, m.Value, annotated.username, annotated.workspaceName, annotated.agentName)
58+
ch <- constMetric
59+
}
60+
}
2361
}
2462

2563
// TODO Run function with done channel
2664

27-
func (ma *MetricsAggregator) Update(ctx context.Context, workspaceID uuid.UUID, agentID uuid.UUID, metrics []agentsdk.AgentMetric) {
28-
log.Printf("Workspace: %s, Agent: %s, Metrics: %v", workspaceID, agentID, metrics) // FIXME
65+
func (ma *MetricsAggregator) Update(_ context.Context, username, workspaceName, agentName string, metrics []agentsdk.AgentMetric) {
66+
ma.m.Lock()
67+
defer ma.m.Unlock()
68+
69+
ma.queue = append(ma.queue, annotatedMetrics{
70+
username: username,
71+
workspaceName: workspaceName,
72+
agentName: agentName,
73+
74+
metrics: metrics,
75+
})
76+
}
77+
78+
func asPrometheusValueType(metricType agentsdk.AgentMetricType) (prometheus.ValueType, error) {
79+
switch metricType {
80+
case agentsdk.AgentMetricTypeGauge:
81+
return prometheus.GaugeValue, nil
82+
case agentsdk.AgentMetricTypeCounter:
83+
return prometheus.CounterValue, nil
84+
default:
85+
return -1, xerrors.Errorf("unsupported value type: %s", metricType)
86+
}
2987
}

coderd/prometheusmetrics/prometheusmetrics.go

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,12 @@ import (
2222
"github.com/coder/coder/tailnet"
2323
)
2424

25+
const (
26+
agentNameLabel = "agent_name"
27+
usernameLabel = "username"
28+
workspaceNameLabel = "workspace_name"
29+
)
30+
2531
// ActiveUsers tracks the number of users that have authenticated within the past hour.
2632
func ActiveUsers(ctx context.Context, registerer prometheus.Registerer, db database.Store, duration time.Duration) (func(), error) {
2733
if duration == 0 {
@@ -140,7 +146,7 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis
140146
Subsystem: "agents",
141147
Name: "up",
142148
Help: "The number of active agents per workspace.",
143-
}, []string{"username", "workspace_name"}))
149+
}, []string{usernameLabel, workspaceNameLabel}))
144150
err := registerer.Register(agentsGauge)
145151
if err != nil {
146152
return nil, err
@@ -151,7 +157,7 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis
151157
Subsystem: "agents",
152158
Name: "connections",
153159
Help: "Agent connections with statuses.",
154-
}, []string{"agent_name", "username", "workspace_name", "status", "lifecycle_state", "tailnet_node"}))
160+
}, []string{agentNameLabel, usernameLabel, workspaceNameLabel, "status", "lifecycle_state", "tailnet_node"}))
155161
err = registerer.Register(agentsConnectionsGauge)
156162
if err != nil {
157163
return nil, err
@@ -162,7 +168,7 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis
162168
Subsystem: "agents",
163169
Name: "connection_latencies_seconds",
164170
Help: "Agent connection latencies in seconds.",
165-
}, []string{"agent_name", "username", "workspace_name", "derp_region", "preferred"}))
171+
}, []string{agentNameLabel, usernameLabel, workspaceNameLabel, "derp_region", "preferred"}))
166172
err = registerer.Register(agentsConnectionLatenciesGauge)
167173
if err != nil {
168174
return nil, err
@@ -173,7 +179,7 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis
173179
Subsystem: "agents",
174180
Name: "apps",
175181
Help: "Agent applications with statuses.",
176-
}, []string{"agent_name", "username", "workspace_name", "app_name", "health"}))
182+
}, []string{agentNameLabel, usernameLabel, workspaceNameLabel, "app_name", "health"}))
177183
err = registerer.Register(agentsAppsGauge)
178184
if err != nil {
179185
return nil, err
@@ -333,7 +339,7 @@ func AgentStats(ctx context.Context, logger slog.Logger, registerer prometheus.R
333339
Subsystem: "agentstats",
334340
Name: "tx_bytes",
335341
Help: "Agent Tx bytes",
336-
}, []string{"agent_name", "username", "workspace_name"}))
342+
}, []string{agentNameLabel, usernameLabel, workspaceNameLabel}))
337343
err = registerer.Register(agentStatsTxBytesGauge)
338344
if err != nil {
339345
return nil, err
@@ -344,7 +350,7 @@ func AgentStats(ctx context.Context, logger slog.Logger, registerer prometheus.R
344350
Subsystem: "agentstats",
345351
Name: "rx_bytes",
346352
Help: "Agent Rx bytes",
347-
}, []string{"agent_name", "username", "workspace_name"}))
353+
}, []string{agentNameLabel, usernameLabel, workspaceNameLabel}))
348354
err = registerer.Register(agentStatsRxBytesGauge)
349355
if err != nil {
350356
return nil, err
@@ -355,7 +361,7 @@ func AgentStats(ctx context.Context, logger slog.Logger, registerer prometheus.R
355361
Subsystem: "agentstats",
356362
Name: "connection_count",
357363
Help: "The number of established connections by agent",
358-
}, []string{"agent_name", "username", "workspace_name"}))
364+
}, []string{agentNameLabel, usernameLabel, workspaceNameLabel}))
359365
err = registerer.Register(agentStatsConnectionCountGauge)
360366
if err != nil {
361367
return nil, err
@@ -366,7 +372,7 @@ func AgentStats(ctx context.Context, logger slog.Logger, registerer prometheus.R
366372
Subsystem: "agentstats",
367373
Name: "connection_median_latency_seconds",
368374
Help: "The median agent connection latency in seconds",
369-
}, []string{"agent_name", "username", "workspace_name"}))
375+
}, []string{agentNameLabel, usernameLabel, workspaceNameLabel}))
370376
err = registerer.Register(agentStatsConnectionMedianLatencyGauge)
371377
if err != nil {
372378
return nil, err
@@ -377,7 +383,7 @@ func AgentStats(ctx context.Context, logger slog.Logger, registerer prometheus.R
377383
Subsystem: "agentstats",
378384
Name: "session_count_jetbrains",
379385
Help: "The number of session established by JetBrains",
380-
}, []string{"agent_name", "username", "workspace_name"}))
386+
}, []string{agentNameLabel, usernameLabel, workspaceNameLabel}))
381387
err = registerer.Register(agentStatsSessionCountJetBrainsGauge)
382388
if err != nil {
383389
return nil, err
@@ -388,7 +394,7 @@ func AgentStats(ctx context.Context, logger slog.Logger, registerer prometheus.R
388394
Subsystem: "agentstats",
389395
Name: "session_count_reconnecting_pty",
390396
Help: "The number of session established by reconnecting PTY",
391-
}, []string{"agent_name", "username", "workspace_name"}))
397+
}, []string{agentNameLabel, usernameLabel, workspaceNameLabel}))
392398
err = registerer.Register(agentStatsSessionCountReconnectingPTYGauge)
393399
if err != nil {
394400
return nil, err
@@ -399,7 +405,7 @@ func AgentStats(ctx context.Context, logger slog.Logger, registerer prometheus.R
399405
Subsystem: "agentstats",
400406
Name: "session_count_ssh",
401407
Help: "The number of session established by SSH",
402-
}, []string{"agent_name", "username", "workspace_name"}))
408+
}, []string{agentNameLabel, usernameLabel, workspaceNameLabel}))
403409
err = registerer.Register(agentStatsSessionCountSSHGauge)
404410
if err != nil {
405411
return nil, err
@@ -410,7 +416,7 @@ func AgentStats(ctx context.Context, logger slog.Logger, registerer prometheus.R
410416
Subsystem: "agentstats",
411417
Name: "session_count_vscode",
412418
Help: "The number of session established by VSCode",
413-
}, []string{"agent_name", "username", "workspace_name"}))
419+
}, []string{agentNameLabel, usernameLabel, workspaceNameLabel}))
414420
err = registerer.Register(agentStatsSessionCountVSCodeGauge)
415421
if err != nil {
416422
return nil, err

coderd/workspaceagents.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1246,7 +1246,12 @@ func (api *API) workspaceAgentReportStats(rw http.ResponseWriter, r *http.Reques
12461246
})
12471247
if api.Options.UpdateAgentMetrics != nil {
12481248
errGroup.Go(func() error {
1249-
api.Options.UpdateAgentMetrics(ctx, workspace.ID, workspaceAgent.ID, req.Metrics)
1249+
user, err := api.Database.GetUserByID(ctx, workspace.OwnerID)
1250+
if err != nil {
1251+
return err
1252+
}
1253+
1254+
api.Options.UpdateAgentMetrics(ctx, user.Username, workspace.Name, workspaceAgent.Name, req.Metrics)
12501255
return nil
12511256
})
12521257
}

0 commit comments

Comments
 (0)