@@ -19,6 +19,7 @@ import (
19
19
"github.com/coder/coder/v2/coderd/database/dbauthz"
20
20
"github.com/coder/coder/v2/coderd/database/dbgen"
21
21
"github.com/coder/coder/v2/coderd/database/dbtestutil"
22
+ "github.com/coder/coder/v2/coderd/database/dbtime"
22
23
agplprebuilds "github.com/coder/coder/v2/coderd/prebuilds"
23
24
"github.com/coder/coder/v2/codersdk"
24
25
"github.com/coder/coder/v2/enterprise/coderd/prebuilds"
@@ -165,27 +166,12 @@ func TestMetricsCollector(t *testing.T) {
165
166
eligible : []bool {false },
166
167
},
167
168
{
168
- name : "deleted templates never desire prebuilds" ,
169
- transitions : allTransitions ,
170
- jobStatuses : allJobStatuses ,
171
- initiatorIDs : []uuid.UUID {agplprebuilds .SystemUserID },
172
- ownerIDs : []uuid.UUID {agplprebuilds .SystemUserID , uuid .New ()},
173
- metrics : []metricCheck {
174
- {prebuilds .MetricDesiredGauge , ptr .To (0.0 ), false },
175
- },
176
- templateDeleted : []bool {true },
177
- eligible : []bool {false },
178
- },
179
- {
180
- name : "running prebuilds for deleted templates are still counted, so that they can be deleted" ,
181
- transitions : []database.WorkspaceTransition {database .WorkspaceTransitionStart },
182
- jobStatuses : []database.ProvisionerJobStatus {database .ProvisionerJobStatusSucceeded },
183
- initiatorIDs : []uuid.UUID {agplprebuilds .SystemUserID },
184
- ownerIDs : []uuid.UUID {agplprebuilds .SystemUserID },
185
- metrics : []metricCheck {
186
- {prebuilds .MetricRunningGauge , ptr .To (1.0 ), false },
187
- {prebuilds .MetricEligibleGauge , ptr .To (0.0 ), false },
188
- },
169
+ name : "deleted templates should not be included in exported metrics" ,
170
+ transitions : allTransitions ,
171
+ jobStatuses : allJobStatuses ,
172
+ initiatorIDs : []uuid.UUID {agplprebuilds .SystemUserID },
173
+ ownerIDs : []uuid.UUID {agplprebuilds .SystemUserID , uuid .New ()},
174
+ metrics : nil ,
189
175
templateDeleted : []bool {true },
190
176
eligible : []bool {false },
191
177
},
@@ -281,6 +267,12 @@ func TestMetricsCollector(t *testing.T) {
281
267
"organization_name" : org .Name ,
282
268
}
283
269
270
+ // If no expected metrics have been defined, ensure we don't find any metric series (i.e. metrics with given labels).
271
+ if test .metrics == nil {
272
+ series := findAllMetricSeries (metricsFamilies , labels )
273
+ require .Empty (t , series )
274
+ }
275
+
284
276
for _ , check := range test .metrics {
285
277
metric := findMetric (metricsFamilies , check .name , labels )
286
278
if check .value == nil {
@@ -307,6 +299,131 @@ func TestMetricsCollector(t *testing.T) {
307
299
}
308
300
}
309
301
302
+ // TestMetricsCollector_DuplicateTemplateNames validates a bug that we saw previously which caused duplicate metric series
303
+ // registration when a template was deleted and a new one created with the same name (and preset name).
304
+ // We are now excluding deleted templates from our metric collection.
305
+ func TestMetricsCollector_DuplicateTemplateNames (t * testing.T ) {
306
+ t .Parallel ()
307
+
308
+ if ! dbtestutil .WillUsePostgres () {
309
+ t .Skip ("this test requires postgres" )
310
+ }
311
+
312
+ type metricCheck struct {
313
+ name string
314
+ value * float64
315
+ isCounter bool
316
+ }
317
+
318
+ type testCase struct {
319
+ transition database.WorkspaceTransition
320
+ jobStatus database.ProvisionerJobStatus
321
+ initiatorID uuid.UUID
322
+ ownerID uuid.UUID
323
+ metrics []metricCheck
324
+ eligible bool
325
+ }
326
+
327
+ test := testCase {
328
+ transition : database .WorkspaceTransitionStart ,
329
+ jobStatus : database .ProvisionerJobStatusSucceeded ,
330
+ initiatorID : agplprebuilds .SystemUserID ,
331
+ ownerID : agplprebuilds .SystemUserID ,
332
+ metrics : []metricCheck {
333
+ {prebuilds .MetricCreatedCount , ptr .To (1.0 ), true },
334
+ {prebuilds .MetricClaimedCount , ptr .To (0.0 ), true },
335
+ {prebuilds .MetricFailedCount , ptr .To (0.0 ), true },
336
+ {prebuilds .MetricDesiredGauge , ptr .To (1.0 ), false },
337
+ {prebuilds .MetricRunningGauge , ptr .To (1.0 ), false },
338
+ {prebuilds .MetricEligibleGauge , ptr .To (1.0 ), false },
339
+ },
340
+ eligible : true ,
341
+ }
342
+
343
+ logger := slogtest .Make (t , & slogtest.Options {IgnoreErrors : true })
344
+ clock := quartz .NewMock (t )
345
+ db , pubsub := dbtestutil .NewDB (t )
346
+ reconciler := prebuilds .NewStoreReconciler (db , pubsub , codersdk.PrebuildsConfig {}, logger , quartz .NewMock (t ), prometheus .NewRegistry (), newNoopEnqueuer ())
347
+ ctx := testutil .Context (t , testutil .WaitLong )
348
+
349
+ collector := prebuilds .NewMetricsCollector (db , logger , reconciler )
350
+ registry := prometheus .NewPedanticRegistry ()
351
+ registry .Register (collector )
352
+
353
+ presetName := "default-preset"
354
+ defaultOrg := dbgen .Organization (t , db , database.Organization {})
355
+ setupTemplateWithDeps := func () database.Template {
356
+ template := setupTestDBTemplateWithinOrg (t , db , test .ownerID , false , "default-template" , defaultOrg )
357
+ templateVersionID := setupTestDBTemplateVersion (ctx , t , clock , db , pubsub , defaultOrg .ID , test .ownerID , template .ID )
358
+ preset := setupTestDBPreset (t , db , templateVersionID , 1 , "default-preset" )
359
+ workspace , _ := setupTestDBWorkspace (
360
+ t , clock , db , pubsub ,
361
+ test .transition , test .jobStatus , defaultOrg .ID , preset , template .ID , templateVersionID , test .initiatorID , test .ownerID ,
362
+ )
363
+ setupTestDBWorkspaceAgent (t , db , workspace .ID , test .eligible )
364
+ return template
365
+ }
366
+
367
+ // When: starting with a regular template.
368
+ template := setupTemplateWithDeps ()
369
+ labels := map [string ]string {
370
+ "template_name" : template .Name ,
371
+ "preset_name" : presetName ,
372
+ "organization_name" : defaultOrg .Name ,
373
+ }
374
+
375
+ // nolint:gocritic // Authz context needed to retrieve state.
376
+ ctx = dbauthz .AsPrebuildsOrchestrator (ctx )
377
+
378
+ // Then: metrics collect successfully.
379
+ require .NoError (t , collector .UpdateState (ctx , testutil .WaitLong ))
380
+ metricsFamilies , err := registry .Gather ()
381
+ require .NoError (t , err )
382
+ require .NotEmpty (t , findAllMetricSeries (metricsFamilies , labels ))
383
+
384
+ // When: the template is deleted.
385
+ require .NoError (t , db .UpdateTemplateDeletedByID (ctx , database.UpdateTemplateDeletedByIDParams {
386
+ ID : template .ID ,
387
+ Deleted : true ,
388
+ UpdatedAt : dbtime .Now (),
389
+ }))
390
+
391
+ // Then: metrics collect successfully but are empty because the template is deleted.
392
+ require .NoError (t , collector .UpdateState (ctx , testutil .WaitLong ))
393
+ metricsFamilies , err = registry .Gather ()
394
+ require .NoError (t , err )
395
+ require .Empty (t , findAllMetricSeries (metricsFamilies , labels ))
396
+
397
+ // When: a new template is created with the same name as the deleted template.
398
+ newTemplate := setupTemplateWithDeps ()
399
+
400
+ // Ensure the database has both the new and old (delete) template.
401
+ {
402
+ deleted , err := db .GetTemplateByOrganizationAndName (ctx , database.GetTemplateByOrganizationAndNameParams {
403
+ OrganizationID : template .OrganizationID ,
404
+ Deleted : true ,
405
+ Name : template .Name ,
406
+ })
407
+ require .NoError (t , err )
408
+ require .Equal (t , template .ID , deleted .ID )
409
+
410
+ current , err := db .GetTemplateByOrganizationAndName (ctx , database.GetTemplateByOrganizationAndNameParams {
411
+ // Use details from deleted template to ensure they're aligned.
412
+ OrganizationID : template .OrganizationID ,
413
+ Deleted : false ,
414
+ Name : template .Name ,
415
+ })
416
+ require .NoError (t , err )
417
+ require .Equal (t , newTemplate .ID , current .ID )
418
+ }
419
+
420
+ // Then: metrics collect successfully.
421
+ require .NoError (t , collector .UpdateState (ctx , testutil .WaitLong ))
422
+ metricsFamilies , err = registry .Gather ()
423
+ require .NoError (t , err )
424
+ require .NotEmpty (t , findAllMetricSeries (metricsFamilies , labels ))
425
+ }
426
+
310
427
func findMetric (metricsFamilies []* prometheus_client.MetricFamily , name string , labels map [string ]string ) * prometheus_client.Metric {
311
428
for _ , metricFamily := range metricsFamilies {
312
429
if metricFamily .GetName () != name {
@@ -334,3 +451,33 @@ func findMetric(metricsFamilies []*prometheus_client.MetricFamily, name string,
334
451
}
335
452
return nil
336
453
}
454
+
455
+ // findAllMetricSeries finds all metrics with a given set of labels.
456
+ func findAllMetricSeries (metricsFamilies []* prometheus_client.MetricFamily , labels map [string ]string ) map [string ]* prometheus_client.Metric {
457
+ series := make (map [string ]* prometheus_client.Metric )
458
+ for _ , metricFamily := range metricsFamilies {
459
+ for _ , metric := range metricFamily .GetMetric () {
460
+ labelPairs := metric .GetLabel ()
461
+
462
+ if len (labelPairs ) != len (labels ) {
463
+ continue
464
+ }
465
+
466
+ // Convert label pairs to map for easier lookup
467
+ metricLabels := make (map [string ]string , len (labelPairs ))
468
+ for _ , label := range labelPairs {
469
+ metricLabels [label .GetName ()] = label .GetValue ()
470
+ }
471
+
472
+ // Check if all requested labels match
473
+ for wantName , wantValue := range labels {
474
+ if metricLabels [wantName ] != wantValue {
475
+ continue
476
+ }
477
+ }
478
+
479
+ series [metricFamily .GetName ()] = metric
480
+ }
481
+ }
482
+ return series
483
+ }
0 commit comments