@@ -153,7 +153,7 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis
153
153
Subsystem : "agents" ,
154
154
Name : "up" ,
155
155
Help : "The number of active agents per workspace." ,
156
- }, []string {usernameLabel , workspaceNameLabel }))
156
+ }, []string {usernameLabel , workspaceNameLabel , "template_name" , "template_version" }))
157
157
err := registerer .Register (agentsGauge )
158
158
if err != nil {
159
159
return nil , err
@@ -225,6 +225,10 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis
225
225
logger .Debug (ctx , "Agent metrics collection is starting" )
226
226
timer := prometheus .NewTimer (metricsCollectorAgents )
227
227
228
+ // Need to define these ahead of time bc of the use of gotos below
229
+ var templateNamesByID map [uuid.UUID ]string
230
+ var templateVersionNamesByID map [uuid.UUID ]string
231
+
228
232
workspaceRows , err := db .GetWorkspaces (ctx , database.GetWorkspacesParams {
229
233
AgentInactiveDisconnectTimeoutSeconds : int64 (agentInactiveDisconnectTimeout .Seconds ()),
230
234
})
@@ -233,30 +237,44 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis
233
237
goto done
234
238
}
235
239
240
+ templateNamesByID , templateVersionNamesByID , err = getTemplatesAndVersionNamesFromWorkspaces (ctx , db , workspaceRows )
241
+ if err != nil {
242
+ logger .Error (ctx , "can't get template info" , slog .Error (err ))
243
+ goto done
244
+ }
245
+
236
246
for _ , workspace := range workspaceRows {
247
+ templateName , found := templateNamesByID [workspace .TemplateID ]
248
+ if ! found {
249
+ templateName = "unknown"
250
+ }
251
+ templateVersionName , found := templateVersionNamesByID [workspace .TemplateID ]
252
+ if ! found {
253
+ templateVersionName = "unknown"
254
+ }
237
255
user , err := db .GetUserByID (ctx , workspace .OwnerID )
238
256
if err != nil {
239
257
logger .Error (ctx , "can't get user" , slog .F ("user_id" , workspace .OwnerID ), slog .Error (err ))
240
- agentsGauge .WithLabelValues (VectorOperationAdd , 0 , user .Username , workspace .Name )
258
+ agentsGauge .WithLabelValues (VectorOperationAdd , 0 , user .Username , workspace .Name , templateName , templateVersionName )
241
259
continue
242
260
}
243
261
244
262
agents , err := db .GetWorkspaceAgentsInLatestBuildByWorkspaceID (ctx , workspace .ID )
245
263
if err != nil {
246
264
logger .Error (ctx , "can't get workspace agents" , slog .F ("workspace_id" , workspace .ID ), slog .Error (err ))
247
- agentsGauge .WithLabelValues (VectorOperationAdd , 0 , user .Username , workspace .Name )
265
+ agentsGauge .WithLabelValues (VectorOperationAdd , 0 , user .Username , workspace .Name , templateName , templateVersionName )
248
266
continue
249
267
}
250
268
251
269
if len (agents ) == 0 {
252
270
logger .Debug (ctx , "workspace agents are unavailable" , slog .F ("workspace_id" , workspace .ID ))
253
- agentsGauge .WithLabelValues (VectorOperationAdd , 0 , user .Username , workspace .Name )
271
+ agentsGauge .WithLabelValues (VectorOperationAdd , 0 , user .Username , workspace .Name , templateName , templateVersionName )
254
272
continue
255
273
}
256
274
257
275
for _ , agent := range agents {
258
276
// Collect information about agents
259
- agentsGauge .WithLabelValues (VectorOperationAdd , 1 , user .Username , workspace .Name )
277
+ agentsGauge .WithLabelValues (VectorOperationAdd , 1 , user .Username , workspace .Name , templateName , templateVersionName )
260
278
261
279
connectionStatus := agent .Status (agentInactiveDisconnectTimeout )
262
280
node := (* coordinator .Load ()).Node (agent .ID )
@@ -325,6 +343,46 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis
325
343
}, nil
326
344
}
327
345
346
+ func getTemplatesAndVersionNamesFromWorkspaces (ctx context.Context , db database.Store , workspaceRows []database.GetWorkspacesRow ) (map [uuid.UUID ]string , map [uuid.UUID ]string , error ) {
347
+ // Aggregate the used template and version IDs to minimize DB calls
348
+ usedTemplateIDs := map [uuid.UUID ]struct {}{}
349
+ usedTemplateVersionIDs := map [uuid.UUID ]struct {}{}
350
+ for _ , workspace := range workspaceRows {
351
+ usedTemplateIDs [workspace .TemplateID ] = struct {}{}
352
+ usedTemplateVersionIDs [workspace .TemplateVersionID ] = struct {}{}
353
+ }
354
+ templatesToGet := make ([]uuid.UUID , 0 , len (usedTemplateIDs ))
355
+ for id := range usedTemplateIDs {
356
+ templatesToGet = append (templatesToGet , id )
357
+ }
358
+ templateVersionsToGet := make ([]uuid.UUID , 0 , len (usedTemplateVersionIDs ))
359
+ for id := range usedTemplateVersionIDs {
360
+ templateVersionsToGet = append (templateVersionsToGet , id )
361
+ }
362
+
363
+ templates , err := db .GetTemplatesWithFilter (ctx , database.GetTemplatesWithFilterParams {
364
+ IDs : templatesToGet ,
365
+ })
366
+ if err != nil {
367
+ return nil , nil , err
368
+ }
369
+ templateNamesByID := make (map [uuid.UUID ]string , len (templates ))
370
+ for _ , template := range templates {
371
+ templateNamesByID [template .ID ] = template .Name
372
+ }
373
+
374
+ versions , err := db .GetTemplateVersionsByIDs (ctx , templateVersionsToGet )
375
+ if err != nil {
376
+ return nil , nil , err
377
+ }
378
+ templateVersionNamesByID := make (map [uuid.UUID ]string , len (versions ))
379
+ for _ , version := range versions {
380
+ templateVersionNamesByID [version .ID ] = version .Name
381
+ }
382
+
383
+ return templateNamesByID , templateVersionNamesByID , nil
384
+ }
385
+
328
386
func AgentStats (ctx context.Context , logger slog.Logger , registerer prometheus.Registerer , db database.Store , initialCreateAfter time.Time , duration time.Duration ) (func (), error ) {
329
387
if duration == 0 {
330
388
duration = 1 * time .Minute
0 commit comments