Skip to content

Commit f867dd7

Browse files
committed
feat: add template tags to agent up metric
So we can track template and version usage for all running workspaces. Right now, we can only track it by workspace builds.
1 parent 16ebe10 commit f867dd7

File tree

3 files changed

+69
-7
lines changed

3 files changed

+69
-7
lines changed

coderd/database/queries.sql.go

Lines changed: 4 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

coderd/database/queries/workspaces.sql

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ WHERE
7575

7676
-- name: GetWorkspaces :many
7777
SELECT
78-
workspaces.*, COUNT(*) OVER () as count
78+
workspaces.*, latest_build.template_version_id as template_version_id, COUNT(*) OVER () as count
7979
FROM
8080
workspaces
8181
JOIN
@@ -85,6 +85,7 @@ ON
8585
LEFT JOIN LATERAL (
8686
SELECT
8787
workspace_builds.transition,
88+
workspace_builds.template_version_id,
8889
provisioner_jobs.id AS provisioner_job_id,
8990
provisioner_jobs.started_at,
9091
provisioner_jobs.updated_at,

coderd/prometheusmetrics/prometheusmetrics.go

Lines changed: 63 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis
153153
Subsystem: "agents",
154154
Name: "up",
155155
Help: "The number of active agents per workspace.",
156-
}, []string{usernameLabel, workspaceNameLabel}))
156+
}, []string{usernameLabel, workspaceNameLabel, "template_name", "template_version"}))
157157
err := registerer.Register(agentsGauge)
158158
if err != nil {
159159
return nil, err
@@ -225,6 +225,10 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis
225225
logger.Debug(ctx, "Agent metrics collection is starting")
226226
timer := prometheus.NewTimer(metricsCollectorAgents)
227227

228+
// Need to define these ahead of time bc of the use of gotos below
229+
var templateNamesByID map[uuid.UUID]string
230+
var templateVersionNamesByID map[uuid.UUID]string
231+
228232
workspaceRows, err := db.GetWorkspaces(ctx, database.GetWorkspacesParams{
229233
AgentInactiveDisconnectTimeoutSeconds: int64(agentInactiveDisconnectTimeout.Seconds()),
230234
})
@@ -233,30 +237,44 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis
233237
goto done
234238
}
235239

240+
templateNamesByID, templateVersionNamesByID, err = getTemplatesAndVersionNamesFromWorkspaces(ctx, db, workspaceRows)
241+
if err != nil {
242+
logger.Error(ctx, "can't get template info", slog.Error(err))
243+
goto done
244+
}
245+
236246
for _, workspace := range workspaceRows {
247+
templateName, found := templateNamesByID[workspace.TemplateID]
248+
if !found {
249+
templateName = "unknown"
250+
}
251+
templateVersionName, found := templateVersionNamesByID[workspace.TemplateID]
252+
if !found {
253+
templateVersionName = "unknown"
254+
}
237255
user, err := db.GetUserByID(ctx, workspace.OwnerID)
238256
if err != nil {
239257
logger.Error(ctx, "can't get user", slog.F("user_id", workspace.OwnerID), slog.Error(err))
240-
agentsGauge.WithLabelValues(VectorOperationAdd, 0, user.Username, workspace.Name)
258+
agentsGauge.WithLabelValues(VectorOperationAdd, 0, user.Username, workspace.Name, templateName, templateVersionName)
241259
continue
242260
}
243261

244262
agents, err := db.GetWorkspaceAgentsInLatestBuildByWorkspaceID(ctx, workspace.ID)
245263
if err != nil {
246264
logger.Error(ctx, "can't get workspace agents", slog.F("workspace_id", workspace.ID), slog.Error(err))
247-
agentsGauge.WithLabelValues(VectorOperationAdd, 0, user.Username, workspace.Name)
265+
agentsGauge.WithLabelValues(VectorOperationAdd, 0, user.Username, workspace.Name, templateName, templateVersionName)
248266
continue
249267
}
250268

251269
if len(agents) == 0 {
252270
logger.Debug(ctx, "workspace agents are unavailable", slog.F("workspace_id", workspace.ID))
253-
agentsGauge.WithLabelValues(VectorOperationAdd, 0, user.Username, workspace.Name)
271+
agentsGauge.WithLabelValues(VectorOperationAdd, 0, user.Username, workspace.Name, templateName, templateVersionName)
254272
continue
255273
}
256274

257275
for _, agent := range agents {
258276
// Collect information about agents
259-
agentsGauge.WithLabelValues(VectorOperationAdd, 1, user.Username, workspace.Name)
277+
agentsGauge.WithLabelValues(VectorOperationAdd, 1, user.Username, workspace.Name, templateName, templateVersionName)
260278

261279
connectionStatus := agent.Status(agentInactiveDisconnectTimeout)
262280
node := (*coordinator.Load()).Node(agent.ID)
@@ -325,6 +343,46 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis
325343
}, nil
326344
}
327345

346+
func getTemplatesAndVersionNamesFromWorkspaces(ctx context.Context, db database.Store, workspaceRows []database.GetWorkspacesRow) (map[uuid.UUID]string, map[uuid.UUID]string, error) {
347+
// Aggregate the used template and version IDs to minimize DB calls
348+
usedTemplateIDs := map[uuid.UUID]struct{}{}
349+
usedTemplateVersionIDs := map[uuid.UUID]struct{}{}
350+
for _, workspace := range workspaceRows {
351+
usedTemplateIDs[workspace.TemplateID] = struct{}{}
352+
usedTemplateVersionIDs[workspace.TemplateVersionID] = struct{}{}
353+
}
354+
templatesToGet := make([]uuid.UUID, 0, len(usedTemplateIDs))
355+
for id := range usedTemplateIDs {
356+
templatesToGet = append(templatesToGet, id)
357+
}
358+
templateVersionsToGet := make([]uuid.UUID, 0, len(usedTemplateVersionIDs))
359+
for id := range usedTemplateVersionIDs {
360+
templateVersionsToGet = append(templateVersionsToGet, id)
361+
}
362+
363+
templates, err := db.GetTemplatesWithFilter(ctx, database.GetTemplatesWithFilterParams{
364+
IDs: templatesToGet,
365+
})
366+
if err != nil {
367+
return nil, nil, err
368+
}
369+
templateNamesByID := make(map[uuid.UUID]string, len(templates))
370+
for _, template := range templates {
371+
templateNamesByID[template.ID] = template.Name
372+
}
373+
374+
versions, err := db.GetTemplateVersionsByIDs(ctx, templateVersionsToGet)
375+
if err != nil {
376+
return nil, nil, err
377+
}
378+
templateVersionNamesByID := make(map[uuid.UUID]string, len(versions))
379+
for _, version := range versions {
380+
templateVersionNamesByID[version.ID] = version.Name
381+
}
382+
383+
return templateNamesByID, templateVersionNamesByID, nil
384+
}
385+
328386
func AgentStats(ctx context.Context, logger slog.Logger, registerer prometheus.Registerer, db database.Store, initialCreateAfter time.Time, duration time.Duration) (func(), error) {
329387
if duration == 0 {
330388
duration = 1 * time.Minute

0 commit comments

Comments
 (0)