From fa348d0b236dee8ae637688069402283126ebce7 Mon Sep 17 00:00:00 2001 From: evgeniy-scherbina Date: Wed, 14 May 2025 19:09:08 +0000 Subject: [PATCH 01/10] test: added duplicate-template-names test --- .../coderd/prebuilds/metricscollector_test.go | 87 +++++++++++++++++++ enterprise/coderd/prebuilds/reconcile_test.go | 26 ++++++ 2 files changed, 113 insertions(+) diff --git a/enterprise/coderd/prebuilds/metricscollector_test.go b/enterprise/coderd/prebuilds/metricscollector_test.go index 07c3c3c6996bb..988efd2410727 100644 --- a/enterprise/coderd/prebuilds/metricscollector_test.go +++ b/enterprise/coderd/prebuilds/metricscollector_test.go @@ -307,6 +307,93 @@ func TestMetricsCollector(t *testing.T) { } } +func TestMetricsCollector_DuplicateTemplateNames(t *testing.T) { + t.Parallel() + + if !dbtestutil.WillUsePostgres() { + t.Skip("this test requires postgres") + } + + type metricCheck struct { + name string + value *float64 + isCounter bool + } + + type testCase struct { + transition database.WorkspaceTransition + jobStatus database.ProvisionerJobStatus + initiatorID uuid.UUID + ownerID uuid.UUID + metrics []metricCheck + eligible bool + } + + test := testCase{ + transition: database.WorkspaceTransitionStart, + jobStatus: database.ProvisionerJobStatusSucceeded, + initiatorID: agplprebuilds.SystemUserID, + ownerID: agplprebuilds.SystemUserID, + metrics: []metricCheck{ + {prebuilds.MetricCreatedCount, ptr.To(1.0), true}, + {prebuilds.MetricClaimedCount, ptr.To(0.0), true}, + {prebuilds.MetricFailedCount, ptr.To(0.0), true}, + {prebuilds.MetricDesiredGauge, ptr.To(1.0), false}, + {prebuilds.MetricRunningGauge, ptr.To(1.0), false}, + {prebuilds.MetricEligibleGauge, ptr.To(1.0), false}, + }, + eligible: true, + } + + logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}) + clock := quartz.NewMock(t) + db, pubsub := dbtestutil.NewDB(t) + reconciler := prebuilds.NewStoreReconciler(db, pubsub, codersdk.PrebuildsConfig{}, logger, quartz.NewMock(t), prometheus.NewRegistry(), newNoopEnqueuer()) + ctx := testutil.Context(t, testutil.WaitLong) + + createdUsers := []uuid.UUID{agplprebuilds.SystemUserID} + for _, user := range []uuid.UUID{test.ownerID, test.initiatorID} { + if !slices.Contains(createdUsers, user) { + dbgen.User(t, db, database.User{ + ID: user, + }) + createdUsers = append(createdUsers, user) + } + } + + collector := prebuilds.NewMetricsCollector(db, logger, reconciler) + registry := prometheus.NewPedanticRegistry() + registry.Register(collector) + + defaultOrgName := "default-org" + defaultTemplateName := "default-template" + defaultPresetName := "default-preset" + defaultOrg := dbgen.Organization(t, db, database.Organization{ + Name: defaultOrgName, + }) + setupTemplateWithDeps := func(templateDeleted bool) { + template := setupTestDBTemplateWithinOrg(t, db, test.ownerID, true, defaultTemplateName, defaultOrg) + templateVersionID := setupTestDBTemplateVersion(ctx, t, clock, db, pubsub, defaultOrg.ID, test.ownerID, template.ID) + preset := setupTestDBPreset(t, db, templateVersionID, 1, defaultPresetName) + workspace, _ := setupTestDBWorkspace( + t, clock, db, pubsub, + test.transition, test.jobStatus, defaultOrg.ID, preset, template.ID, templateVersionID, test.initiatorID, test.ownerID, + ) + setupTestDBWorkspaceAgent(t, db, workspace.ID, test.eligible) + } + // Simulates creating and then deleting a template. + setupTemplateWithDeps(true) + // Simulates creating a template with the same org, template name, and preset name. + setupTemplateWithDeps(false) + + // Force an update to the metrics state to allow the collector to collect fresh metrics. + // nolint:gocritic // Authz context needed to retrieve state. + require.NoError(t, collector.UpdateState(dbauthz.AsPrebuildsOrchestrator(ctx), testutil.WaitLong)) + + _, err := registry.Gather() + require.NoError(t, err) +} + func findMetric(metricsFamilies []*prometheus_client.MetricFamily, name string, labels map[string]string) *prometheus_client.Metric { for _, metricFamily := range metricsFamilies { if metricFamily.GetName() != name { diff --git a/enterprise/coderd/prebuilds/reconcile_test.go b/enterprise/coderd/prebuilds/reconcile_test.go index bdf447dcfae22..3fa02f31ba899 100644 --- a/enterprise/coderd/prebuilds/reconcile_test.go +++ b/enterprise/coderd/prebuilds/reconcile_test.go @@ -1060,6 +1060,32 @@ func setupTestDBTemplate( return org, template } +func setupTestDBTemplateWithinOrg( + t *testing.T, + db database.Store, + userID uuid.UUID, + templateDeleted bool, + templateName string, + org database.Organization, +) database.Template { + t.Helper() + + template := dbgen.Template(t, db, database.Template{ + Name: templateName, + CreatedBy: userID, + OrganizationID: org.ID, + CreatedAt: time.Now().Add(muchEarlier), + }) + if templateDeleted { + ctx := testutil.Context(t, testutil.WaitShort) + require.NoError(t, db.UpdateTemplateDeletedByID(ctx, database.UpdateTemplateDeletedByIDParams{ + ID: template.ID, + Deleted: true, + })) + } + return template +} + const ( earlier = -time.Hour muchEarlier = -time.Hour * 2 From c062274823170f091c425575d58b5271e78508ad Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Wed, 14 May 2025 18:05:56 +0200 Subject: [PATCH 02/10] chore: exclude deleted templates from all queries deleted templates cannot have associated workspaces, so it is logically invalid to include them Signed-off-by: Danny Kopping --- coderd/database/queries.sql.go | 4 ++++ coderd/database/queries/prebuilds.sql | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/coderd/database/queries.sql.go b/coderd/database/queries.sql.go index bd1d5cddd43ed..2b211ebc960fa 100644 --- a/coderd/database/queries.sql.go +++ b/coderd/database/queries.sql.go @@ -6149,6 +6149,7 @@ WHERE w.id IN ( AND b.template_version_id = t.active_version_id AND p.current_preset_id = $3::uuid AND p.ready + AND NOT t.deleted LIMIT 1 FOR UPDATE OF p SKIP LOCKED -- Ensure that a concurrent request will not select the same prebuild. ) RETURNING w.id, w.name @@ -6184,6 +6185,7 @@ FROM workspace_latest_builds wlb -- prebuilds that are still building. INNER JOIN templates t ON t.active_version_id = wlb.template_version_id WHERE wlb.job_status IN ('pending'::provisioner_job_status, 'running'::provisioner_job_status) +AND NOT t.deleted GROUP BY t.id, wpb.template_version_id, wpb.transition, wlb.template_version_preset_id ` @@ -6298,6 +6300,7 @@ WITH filtered_builds AS ( WHERE tvp.desired_instances IS NOT NULL -- Consider only presets that have a prebuild configuration. AND wlb.transition = 'start'::workspace_transition AND w.owner_id = 'c42fdf75-3097-471c-8c33-fb52454d81c0' + AND NOT t.deleted ), time_sorted_builds AS ( -- Group builds by preset, then sort each group by created_at. @@ -6449,6 +6452,7 @@ FROM templates t INNER JOIN template_version_presets tvp ON tvp.template_version_id = tv.id INNER JOIN organizations o ON o.id = t.organization_id WHERE tvp.desired_instances IS NOT NULL -- Consider only presets that have a prebuild configuration. + AND NOT t.deleted AND (t.id = $1::uuid OR $1 IS NULL) ` diff --git a/coderd/database/queries/prebuilds.sql b/coderd/database/queries/prebuilds.sql index 1d3a827c98586..80e0e06f2945d 100644 --- a/coderd/database/queries/prebuilds.sql +++ b/coderd/database/queries/prebuilds.sql @@ -15,6 +15,7 @@ WHERE w.id IN ( AND b.template_version_id = t.active_version_id AND p.current_preset_id = @preset_id::uuid AND p.ready + AND NOT t.deleted LIMIT 1 FOR UPDATE OF p SKIP LOCKED -- Ensure that a concurrent request will not select the same prebuild. ) RETURNING w.id, w.name; @@ -40,6 +41,7 @@ FROM templates t INNER JOIN template_version_presets tvp ON tvp.template_version_id = tv.id INNER JOIN organizations o ON o.id = t.organization_id WHERE tvp.desired_instances IS NOT NULL -- Consider only presets that have a prebuild configuration. + AND NOT t.deleted AND (t.id = sqlc.narg('template_id')::uuid OR sqlc.narg('template_id') IS NULL); -- name: GetRunningPrebuiltWorkspaces :many @@ -70,6 +72,7 @@ FROM workspace_latest_builds wlb -- prebuilds that are still building. INNER JOIN templates t ON t.active_version_id = wlb.template_version_id WHERE wlb.job_status IN ('pending'::provisioner_job_status, 'running'::provisioner_job_status) +AND NOT t.deleted GROUP BY t.id, wpb.template_version_id, wpb.transition, wlb.template_version_preset_id; -- GetPresetsBackoff groups workspace builds by preset ID. @@ -98,6 +101,7 @@ WITH filtered_builds AS ( WHERE tvp.desired_instances IS NOT NULL -- Consider only presets that have a prebuild configuration. AND wlb.transition = 'start'::workspace_transition AND w.owner_id = 'c42fdf75-3097-471c-8c33-fb52454d81c0' + AND NOT t.deleted ), time_sorted_builds AS ( -- Group builds by preset, then sort each group by created_at. From d28fb15cdc1201c00a314776ed406f7c68617b28 Mon Sep 17 00:00:00 2001 From: evgeniy-scherbina Date: Wed, 14 May 2025 22:26:01 +0000 Subject: [PATCH 03/10] test: improve duplicate-template-names test --- .../coderd/prebuilds/metricscollector_test.go | 42 +++++++++++++++++-- 1 file changed, 38 insertions(+), 4 deletions(-) diff --git a/enterprise/coderd/prebuilds/metricscollector_test.go b/enterprise/coderd/prebuilds/metricscollector_test.go index 988efd2410727..3ad739ec5e453 100644 --- a/enterprise/coderd/prebuilds/metricscollector_test.go +++ b/enterprise/coderd/prebuilds/metricscollector_test.go @@ -371,8 +371,8 @@ func TestMetricsCollector_DuplicateTemplateNames(t *testing.T) { defaultOrg := dbgen.Organization(t, db, database.Organization{ Name: defaultOrgName, }) - setupTemplateWithDeps := func(templateDeleted bool) { - template := setupTestDBTemplateWithinOrg(t, db, test.ownerID, true, defaultTemplateName, defaultOrg) + setupTemplateWithDeps := func(templateDeleted bool) database.Template { + template := setupTestDBTemplateWithinOrg(t, db, test.ownerID, templateDeleted, defaultTemplateName, defaultOrg) templateVersionID := setupTestDBTemplateVersion(ctx, t, clock, db, pubsub, defaultOrg.ID, test.ownerID, template.ID) preset := setupTestDBPreset(t, db, templateVersionID, 1, defaultPresetName) workspace, _ := setupTestDBWorkspace( @@ -380,18 +380,52 @@ func TestMetricsCollector_DuplicateTemplateNames(t *testing.T) { test.transition, test.jobStatus, defaultOrg.ID, preset, template.ID, templateVersionID, test.initiatorID, test.ownerID, ) setupTestDBWorkspaceAgent(t, db, workspace.ID, test.eligible) + return template } // Simulates creating and then deleting a template. setupTemplateWithDeps(true) // Simulates creating a template with the same org, template name, and preset name. - setupTemplateWithDeps(false) + newTemplate := setupTemplateWithDeps(false) // Force an update to the metrics state to allow the collector to collect fresh metrics. // nolint:gocritic // Authz context needed to retrieve state. require.NoError(t, collector.UpdateState(dbauthz.AsPrebuildsOrchestrator(ctx), testutil.WaitLong)) - _, err := registry.Gather() + metricsFamilies, err := registry.Gather() require.NoError(t, err) + + templateVersions, err := db.GetTemplateVersionsByTemplateID(ctx, database.GetTemplateVersionsByTemplateIDParams{ + TemplateID: newTemplate.ID, + }) + require.NoError(t, err) + require.Equal(t, 1, len(templateVersions)) + + presets, err := db.GetPresetsByTemplateVersionID(ctx, templateVersions[0].ID) + require.NoError(t, err) + require.Equal(t, 1, len(presets)) + + for _, preset := range presets { + labels := map[string]string{ + "template_name": newTemplate.Name, + "preset_name": preset.Name, + "organization_name": defaultOrg.Name, + } + + for _, check := range test.metrics { + metric := findMetric(metricsFamilies, check.name, labels) + if check.value == nil { + continue + } + + require.NotNil(t, metric, "metric %s should exist", check.name) + + if check.isCounter { + require.Equal(t, *check.value, metric.GetCounter().GetValue(), "counter %s value mismatch", check.name) + } else { + require.Equal(t, *check.value, metric.GetGauge().GetValue(), "gauge %s value mismatch", check.name) + } + } + } } func findMetric(metricsFamilies []*prometheus_client.MetricFamily, name string, labels map[string]string) *prometheus_client.Metric { From 9ba369d2cf3413d9ef1cdf515156eabb16109da3 Mon Sep 17 00:00:00 2001 From: evgeniy-scherbina Date: Wed, 14 May 2025 23:14:59 +0000 Subject: [PATCH 04/10] fix: partially revert fix on db level --- coderd/database/queries.sql.go | 2 -- coderd/database/queries/prebuilds.sql | 2 -- 2 files changed, 4 deletions(-) diff --git a/coderd/database/queries.sql.go b/coderd/database/queries.sql.go index 2b211ebc960fa..24f1f0b74914a 100644 --- a/coderd/database/queries.sql.go +++ b/coderd/database/queries.sql.go @@ -6185,7 +6185,6 @@ FROM workspace_latest_builds wlb -- prebuilds that are still building. INNER JOIN templates t ON t.active_version_id = wlb.template_version_id WHERE wlb.job_status IN ('pending'::provisioner_job_status, 'running'::provisioner_job_status) -AND NOT t.deleted GROUP BY t.id, wpb.template_version_id, wpb.transition, wlb.template_version_preset_id ` @@ -6452,7 +6451,6 @@ FROM templates t INNER JOIN template_version_presets tvp ON tvp.template_version_id = tv.id INNER JOIN organizations o ON o.id = t.organization_id WHERE tvp.desired_instances IS NOT NULL -- Consider only presets that have a prebuild configuration. - AND NOT t.deleted AND (t.id = $1::uuid OR $1 IS NULL) ` diff --git a/coderd/database/queries/prebuilds.sql b/coderd/database/queries/prebuilds.sql index 80e0e06f2945d..509a2a8aab226 100644 --- a/coderd/database/queries/prebuilds.sql +++ b/coderd/database/queries/prebuilds.sql @@ -41,7 +41,6 @@ FROM templates t INNER JOIN template_version_presets tvp ON tvp.template_version_id = tv.id INNER JOIN organizations o ON o.id = t.organization_id WHERE tvp.desired_instances IS NOT NULL -- Consider only presets that have a prebuild configuration. - AND NOT t.deleted AND (t.id = sqlc.narg('template_id')::uuid OR sqlc.narg('template_id') IS NULL); -- name: GetRunningPrebuiltWorkspaces :many @@ -72,7 +71,6 @@ FROM workspace_latest_builds wlb -- prebuilds that are still building. INNER JOIN templates t ON t.active_version_id = wlb.template_version_id WHERE wlb.job_status IN ('pending'::provisioner_job_status, 'running'::provisioner_job_status) -AND NOT t.deleted GROUP BY t.id, wpb.template_version_id, wpb.transition, wlb.template_version_preset_id; -- GetPresetsBackoff groups workspace builds by preset ID. From e42a08312b48d906c6439c253713b456772e5ca1 Mon Sep 17 00:00:00 2001 From: evgeniy-scherbina Date: Wed, 14 May 2025 23:40:49 +0000 Subject: [PATCH 05/10] fix: skip deleted templates on metrics collector level --- .../coderd/prebuilds/metricscollector.go | 4 +++ .../coderd/prebuilds/metricscollector_test.go | 25 ------------------- 2 files changed, 4 insertions(+), 25 deletions(-) diff --git a/enterprise/coderd/prebuilds/metricscollector.go b/enterprise/coderd/prebuilds/metricscollector.go index 9f1cc837d0474..7a7734b6f8093 100644 --- a/enterprise/coderd/prebuilds/metricscollector.go +++ b/enterprise/coderd/prebuilds/metricscollector.go @@ -157,6 +157,10 @@ func (mc *MetricsCollector) Collect(metricsCh chan<- prometheus.Metric) { continue } + if preset.Deleted { + continue + } + presetSnapshot, err := currentState.snapshot.FilterByPreset(preset.ID) if err != nil { mc.logger.Error(context.Background(), "failed to filter by preset", slog.Error(err)) diff --git a/enterprise/coderd/prebuilds/metricscollector_test.go b/enterprise/coderd/prebuilds/metricscollector_test.go index 3ad739ec5e453..51383b34e5a7f 100644 --- a/enterprise/coderd/prebuilds/metricscollector_test.go +++ b/enterprise/coderd/prebuilds/metricscollector_test.go @@ -164,31 +164,6 @@ func TestMetricsCollector(t *testing.T) { templateDeleted: []bool{false}, eligible: []bool{false}, }, - { - name: "deleted templates never desire prebuilds", - transitions: allTransitions, - jobStatuses: allJobStatuses, - initiatorIDs: []uuid.UUID{agplprebuilds.SystemUserID}, - ownerIDs: []uuid.UUID{agplprebuilds.SystemUserID, uuid.New()}, - metrics: []metricCheck{ - {prebuilds.MetricDesiredGauge, ptr.To(0.0), false}, - }, - templateDeleted: []bool{true}, - eligible: []bool{false}, - }, - { - name: "running prebuilds for deleted templates are still counted, so that they can be deleted", - transitions: []database.WorkspaceTransition{database.WorkspaceTransitionStart}, - jobStatuses: []database.ProvisionerJobStatus{database.ProvisionerJobStatusSucceeded}, - initiatorIDs: []uuid.UUID{agplprebuilds.SystemUserID}, - ownerIDs: []uuid.UUID{agplprebuilds.SystemUserID}, - metrics: []metricCheck{ - {prebuilds.MetricRunningGauge, ptr.To(1.0), false}, - {prebuilds.MetricEligibleGauge, ptr.To(0.0), false}, - }, - templateDeleted: []bool{true}, - eligible: []bool{false}, - }, } for _, test := range tests { test := test // capture for parallel From d320cbcd73339b5ecbca5235279993d569cdd539 Mon Sep 17 00:00:00 2001 From: evgeniy-scherbina Date: Wed, 14 May 2025 23:46:39 +0000 Subject: [PATCH 06/10] fix: fix linter --- enterprise/coderd/prebuilds/reconcile_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/enterprise/coderd/prebuilds/reconcile_test.go b/enterprise/coderd/prebuilds/reconcile_test.go index 3fa02f31ba899..74a8038dc5754 100644 --- a/enterprise/coderd/prebuilds/reconcile_test.go +++ b/enterprise/coderd/prebuilds/reconcile_test.go @@ -1060,6 +1060,7 @@ func setupTestDBTemplate( return org, template } +// nolint:revive // It's a control flag, but this is a test. func setupTestDBTemplateWithinOrg( t *testing.T, db database.Store, From 4c1c469c0685c61afea409b5e74a0026667aee92 Mon Sep 17 00:00:00 2001 From: evgeniy-scherbina Date: Wed, 14 May 2025 23:48:58 +0000 Subject: [PATCH 07/10] fix: fix indentation --- coderd/database/queries.sql.go | 2 +- coderd/database/queries/prebuilds.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/coderd/database/queries.sql.go b/coderd/database/queries.sql.go index 24f1f0b74914a..26a88b9d8321f 100644 --- a/coderd/database/queries.sql.go +++ b/coderd/database/queries.sql.go @@ -6149,7 +6149,7 @@ WHERE w.id IN ( AND b.template_version_id = t.active_version_id AND p.current_preset_id = $3::uuid AND p.ready - AND NOT t.deleted + AND NOT t.deleted LIMIT 1 FOR UPDATE OF p SKIP LOCKED -- Ensure that a concurrent request will not select the same prebuild. ) RETURNING w.id, w.name diff --git a/coderd/database/queries/prebuilds.sql b/coderd/database/queries/prebuilds.sql index 509a2a8aab226..cf1b8e9992e19 100644 --- a/coderd/database/queries/prebuilds.sql +++ b/coderd/database/queries/prebuilds.sql @@ -15,7 +15,7 @@ WHERE w.id IN ( AND b.template_version_id = t.active_version_id AND p.current_preset_id = @preset_id::uuid AND p.ready - AND NOT t.deleted + AND NOT t.deleted LIMIT 1 FOR UPDATE OF p SKIP LOCKED -- Ensure that a concurrent request will not select the same prebuild. ) RETURNING w.id, w.name; From 9e8bf7ed00c98f45e4b646add3f6c18f784eaede Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Thu, 15 May 2025 07:59:12 +0200 Subject: [PATCH 08/10] chore: restore deleted test cases Signed-off-by: Danny Kopping --- .../coderd/prebuilds/metricscollector_test.go | 56 +++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/enterprise/coderd/prebuilds/metricscollector_test.go b/enterprise/coderd/prebuilds/metricscollector_test.go index 51383b34e5a7f..4c8ab32a20f4d 100644 --- a/enterprise/coderd/prebuilds/metricscollector_test.go +++ b/enterprise/coderd/prebuilds/metricscollector_test.go @@ -164,6 +164,26 @@ func TestMetricsCollector(t *testing.T) { templateDeleted: []bool{false}, eligible: []bool{false}, }, + { + name: "deleted templates never desire prebuilds", + transitions: allTransitions, + jobStatuses: allJobStatuses, + initiatorIDs: []uuid.UUID{agplprebuilds.SystemUserID}, + ownerIDs: []uuid.UUID{agplprebuilds.SystemUserID, uuid.New()}, + metrics: nil, + templateDeleted: []bool{true}, + eligible: []bool{false}, + }, + { + name: "running prebuilds for deleted templates are still counted, so that they can be deleted", + transitions: []database.WorkspaceTransition{database.WorkspaceTransitionStart}, + jobStatuses: []database.ProvisionerJobStatus{database.ProvisionerJobStatusSucceeded}, + initiatorIDs: []uuid.UUID{agplprebuilds.SystemUserID}, + ownerIDs: []uuid.UUID{agplprebuilds.SystemUserID}, + metrics: nil, + templateDeleted: []bool{true}, + eligible: []bool{false}, + }, } for _, test := range tests { test := test // capture for parallel @@ -256,6 +276,12 @@ func TestMetricsCollector(t *testing.T) { "organization_name": org.Name, } + // If no expected metrics have been defined, ensure we don't find any metric series (i.e. metrics with given labels). + if test.metrics == nil { + series := findAllMetricSeries(metricsFamilies, labels) + require.Empty(t, series) + } + for _, check := range test.metrics { metric := findMetric(metricsFamilies, check.name, labels) if check.value == nil { @@ -430,3 +456,33 @@ func findMetric(metricsFamilies []*prometheus_client.MetricFamily, name string, } return nil } + +// findAllMetricSeries finds all metrics with a given set of labels. +func findAllMetricSeries(metricsFamilies []*prometheus_client.MetricFamily, labels map[string]string) map[string]*prometheus_client.Metric { + series := make(map[string]*prometheus_client.Metric) + for _, metricFamily := range metricsFamilies { + for _, metric := range metricFamily.GetMetric() { + labelPairs := metric.GetLabel() + + if len(labelPairs) != len(labels) { + continue + } + + // Convert label pairs to map for easier lookup + metricLabels := make(map[string]string, len(labelPairs)) + for _, label := range labelPairs { + metricLabels[label.GetName()] = label.GetValue() + } + + // Check if all requested labels match + for wantName, wantValue := range labels { + if metricLabels[wantName] != wantValue { + continue + } + } + + series[metricFamily.GetName()] = metric + } + } + return series +} From b539ddca019af0e69df52071cedf112ed0f6a396 Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Thu, 15 May 2025 10:13:25 +0200 Subject: [PATCH 09/10] chore: adding clarity around soft-deletion constraints Signed-off-by: Danny Kopping --- coderd/database/queries.sql.go | 2 ++ coderd/database/queries/prebuilds.sql | 2 ++ enterprise/coderd/prebuilds/reconcile_test.go | 7 ++++++- 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/coderd/database/queries.sql.go b/coderd/database/queries.sql.go index 26a88b9d8321f..415c7521ba202 100644 --- a/coderd/database/queries.sql.go +++ b/coderd/database/queries.sql.go @@ -6185,6 +6185,7 @@ FROM workspace_latest_builds wlb -- prebuilds that are still building. INNER JOIN templates t ON t.active_version_id = wlb.template_version_id WHERE wlb.job_status IN ('pending'::provisioner_job_status, 'running'::provisioner_job_status) + -- AND NOT t.deleted -- We don't exclude deleted templates because there's no constraint in the DB preventing a soft deletion on a template while workspaces are running. GROUP BY t.id, wpb.template_version_id, wpb.transition, wlb.template_version_preset_id ` @@ -6451,6 +6452,7 @@ FROM templates t INNER JOIN template_version_presets tvp ON tvp.template_version_id = tv.id INNER JOIN organizations o ON o.id = t.organization_id WHERE tvp.desired_instances IS NOT NULL -- Consider only presets that have a prebuild configuration. + -- AND NOT t.deleted -- We don't exclude deleted templates because there's no constraint in the DB preventing a soft deletion on a template while workspaces are running. AND (t.id = $1::uuid OR $1 IS NULL) ` diff --git a/coderd/database/queries/prebuilds.sql b/coderd/database/queries/prebuilds.sql index cf1b8e9992e19..8c27ddf62b7c3 100644 --- a/coderd/database/queries/prebuilds.sql +++ b/coderd/database/queries/prebuilds.sql @@ -41,6 +41,7 @@ FROM templates t INNER JOIN template_version_presets tvp ON tvp.template_version_id = tv.id INNER JOIN organizations o ON o.id = t.organization_id WHERE tvp.desired_instances IS NOT NULL -- Consider only presets that have a prebuild configuration. + -- AND NOT t.deleted -- We don't exclude deleted templates because there's no constraint in the DB preventing a soft deletion on a template while workspaces are running. AND (t.id = sqlc.narg('template_id')::uuid OR sqlc.narg('template_id') IS NULL); -- name: GetRunningPrebuiltWorkspaces :many @@ -71,6 +72,7 @@ FROM workspace_latest_builds wlb -- prebuilds that are still building. INNER JOIN templates t ON t.active_version_id = wlb.template_version_id WHERE wlb.job_status IN ('pending'::provisioner_job_status, 'running'::provisioner_job_status) + -- AND NOT t.deleted -- We don't exclude deleted templates because there's no constraint in the DB preventing a soft deletion on a template while workspaces are running. GROUP BY t.id, wpb.template_version_id, wpb.transition, wlb.template_version_preset_id; -- GetPresetsBackoff groups workspace builds by preset ID. diff --git a/enterprise/coderd/prebuilds/reconcile_test.go b/enterprise/coderd/prebuilds/reconcile_test.go index 74a8038dc5754..660b1733e6cc9 100644 --- a/enterprise/coderd/prebuilds/reconcile_test.go +++ b/enterprise/coderd/prebuilds/reconcile_test.go @@ -294,10 +294,15 @@ func TestPrebuildReconciliation(t *testing.T) { templateDeleted: []bool{false}, }, { - name: "delete prebuilds for deleted templates", + // Templates can be soft-deleted (`deleted=true`) or hard-deleted (row is removed). + // On the former there is *no* DB constraint to prevent soft deletion, so we have to ensure that if somehow + // the template was soft-deleted any running prebuilds will be removed. + // On the latter there is a DB constraint to prevent row deletion if any workspaces reference the deleting template. + name: "soft-deleted templates MAY have prebuilds", prebuildLatestTransitions: []database.WorkspaceTransition{database.WorkspaceTransitionStart}, prebuildJobStatuses: []database.ProvisionerJobStatus{database.ProvisionerJobStatusSucceeded}, templateVersionActive: []bool{true, false}, + shouldCreateNewPrebuild: ptr.To(false), shouldDeleteOldPrebuild: ptr.To(true), templateDeleted: []bool{true}, }, From 9cc50491ae04f8caba3edd142b69975303f13678 Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Thu, 15 May 2025 10:13:46 +0200 Subject: [PATCH 10/10] chore: clear up metrics tests Signed-off-by: Danny Kopping --- .../coderd/prebuilds/metricscollector_test.go | 127 +++++++++--------- 1 file changed, 61 insertions(+), 66 deletions(-) diff --git a/enterprise/coderd/prebuilds/metricscollector_test.go b/enterprise/coderd/prebuilds/metricscollector_test.go index 4c8ab32a20f4d..dce9e07dd110f 100644 --- a/enterprise/coderd/prebuilds/metricscollector_test.go +++ b/enterprise/coderd/prebuilds/metricscollector_test.go @@ -19,6 +19,7 @@ import ( "github.com/coder/coder/v2/coderd/database/dbauthz" "github.com/coder/coder/v2/coderd/database/dbgen" "github.com/coder/coder/v2/coderd/database/dbtestutil" + "github.com/coder/coder/v2/coderd/database/dbtime" agplprebuilds "github.com/coder/coder/v2/coderd/prebuilds" "github.com/coder/coder/v2/codersdk" "github.com/coder/coder/v2/enterprise/coderd/prebuilds" @@ -165,7 +166,7 @@ func TestMetricsCollector(t *testing.T) { eligible: []bool{false}, }, { - name: "deleted templates never desire prebuilds", + name: "deleted templates should not be included in exported metrics", transitions: allTransitions, jobStatuses: allJobStatuses, initiatorIDs: []uuid.UUID{agplprebuilds.SystemUserID}, @@ -174,16 +175,6 @@ func TestMetricsCollector(t *testing.T) { templateDeleted: []bool{true}, eligible: []bool{false}, }, - { - name: "running prebuilds for deleted templates are still counted, so that they can be deleted", - transitions: []database.WorkspaceTransition{database.WorkspaceTransitionStart}, - jobStatuses: []database.ProvisionerJobStatus{database.ProvisionerJobStatusSucceeded}, - initiatorIDs: []uuid.UUID{agplprebuilds.SystemUserID}, - ownerIDs: []uuid.UUID{agplprebuilds.SystemUserID}, - metrics: nil, - templateDeleted: []bool{true}, - eligible: []bool{false}, - }, } for _, test := range tests { test := test // capture for parallel @@ -308,6 +299,9 @@ func TestMetricsCollector(t *testing.T) { } } +// TestMetricsCollector_DuplicateTemplateNames validates a bug that we saw previously which caused duplicate metric series +// registration when a template was deleted and a new one created with the same name (and preset name). +// We are now excluding deleted templates from our metric collection. func TestMetricsCollector_DuplicateTemplateNames(t *testing.T) { t.Parallel() @@ -352,30 +346,16 @@ func TestMetricsCollector_DuplicateTemplateNames(t *testing.T) { reconciler := prebuilds.NewStoreReconciler(db, pubsub, codersdk.PrebuildsConfig{}, logger, quartz.NewMock(t), prometheus.NewRegistry(), newNoopEnqueuer()) ctx := testutil.Context(t, testutil.WaitLong) - createdUsers := []uuid.UUID{agplprebuilds.SystemUserID} - for _, user := range []uuid.UUID{test.ownerID, test.initiatorID} { - if !slices.Contains(createdUsers, user) { - dbgen.User(t, db, database.User{ - ID: user, - }) - createdUsers = append(createdUsers, user) - } - } - collector := prebuilds.NewMetricsCollector(db, logger, reconciler) registry := prometheus.NewPedanticRegistry() registry.Register(collector) - defaultOrgName := "default-org" - defaultTemplateName := "default-template" - defaultPresetName := "default-preset" - defaultOrg := dbgen.Organization(t, db, database.Organization{ - Name: defaultOrgName, - }) - setupTemplateWithDeps := func(templateDeleted bool) database.Template { - template := setupTestDBTemplateWithinOrg(t, db, test.ownerID, templateDeleted, defaultTemplateName, defaultOrg) + presetName := "default-preset" + defaultOrg := dbgen.Organization(t, db, database.Organization{}) + setupTemplateWithDeps := func() database.Template { + template := setupTestDBTemplateWithinOrg(t, db, test.ownerID, false, "default-template", defaultOrg) templateVersionID := setupTestDBTemplateVersion(ctx, t, clock, db, pubsub, defaultOrg.ID, test.ownerID, template.ID) - preset := setupTestDBPreset(t, db, templateVersionID, 1, defaultPresetName) + preset := setupTestDBPreset(t, db, templateVersionID, 1, "default-preset") workspace, _ := setupTestDBWorkspace( t, clock, db, pubsub, test.transition, test.jobStatus, defaultOrg.ID, preset, template.ID, templateVersionID, test.initiatorID, test.ownerID, @@ -383,50 +363,65 @@ func TestMetricsCollector_DuplicateTemplateNames(t *testing.T) { setupTestDBWorkspaceAgent(t, db, workspace.ID, test.eligible) return template } - // Simulates creating and then deleting a template. - setupTemplateWithDeps(true) - // Simulates creating a template with the same org, template name, and preset name. - newTemplate := setupTemplateWithDeps(false) - // Force an update to the metrics state to allow the collector to collect fresh metrics. + // When: starting with a regular template. + template := setupTemplateWithDeps() + labels := map[string]string{ + "template_name": template.Name, + "preset_name": presetName, + "organization_name": defaultOrg.Name, + } + // nolint:gocritic // Authz context needed to retrieve state. - require.NoError(t, collector.UpdateState(dbauthz.AsPrebuildsOrchestrator(ctx), testutil.WaitLong)) + ctx = dbauthz.AsPrebuildsOrchestrator(ctx) + // Then: metrics collect successfully. + require.NoError(t, collector.UpdateState(ctx, testutil.WaitLong)) metricsFamilies, err := registry.Gather() require.NoError(t, err) - - templateVersions, err := db.GetTemplateVersionsByTemplateID(ctx, database.GetTemplateVersionsByTemplateIDParams{ - TemplateID: newTemplate.ID, - }) + require.NotEmpty(t, findAllMetricSeries(metricsFamilies, labels)) + + // When: the template is deleted. + require.NoError(t, db.UpdateTemplateDeletedByID(ctx, database.UpdateTemplateDeletedByIDParams{ + ID: template.ID, + Deleted: true, + UpdatedAt: dbtime.Now(), + })) + + // Then: metrics collect successfully but are empty because the template is deleted. + require.NoError(t, collector.UpdateState(ctx, testutil.WaitLong)) + metricsFamilies, err = registry.Gather() require.NoError(t, err) - require.Equal(t, 1, len(templateVersions)) + require.Empty(t, findAllMetricSeries(metricsFamilies, labels)) + + // When: a new template is created with the same name as the deleted template. + newTemplate := setupTemplateWithDeps() + + // Ensure the database has both the new and old (delete) template. + { + deleted, err := db.GetTemplateByOrganizationAndName(ctx, database.GetTemplateByOrganizationAndNameParams{ + OrganizationID: template.OrganizationID, + Deleted: true, + Name: template.Name, + }) + require.NoError(t, err) + require.Equal(t, template.ID, deleted.ID) + + current, err := db.GetTemplateByOrganizationAndName(ctx, database.GetTemplateByOrganizationAndNameParams{ + // Use details from deleted template to ensure they're aligned. + OrganizationID: template.OrganizationID, + Deleted: false, + Name: template.Name, + }) + require.NoError(t, err) + require.Equal(t, newTemplate.ID, current.ID) + } - presets, err := db.GetPresetsByTemplateVersionID(ctx, templateVersions[0].ID) + // Then: metrics collect successfully. + require.NoError(t, collector.UpdateState(ctx, testutil.WaitLong)) + metricsFamilies, err = registry.Gather() require.NoError(t, err) - require.Equal(t, 1, len(presets)) - - for _, preset := range presets { - labels := map[string]string{ - "template_name": newTemplate.Name, - "preset_name": preset.Name, - "organization_name": defaultOrg.Name, - } - - for _, check := range test.metrics { - metric := findMetric(metricsFamilies, check.name, labels) - if check.value == nil { - continue - } - - require.NotNil(t, metric, "metric %s should exist", check.name) - - if check.isCounter { - require.Equal(t, *check.value, metric.GetCounter().GetValue(), "counter %s value mismatch", check.name) - } else { - require.Equal(t, *check.value, metric.GetGauge().GetValue(), "gauge %s value mismatch", check.name) - } - } - } + require.NotEmpty(t, findAllMetricSeries(metricsFamilies, labels)) } func findMetric(metricsFamilies []*prometheus_client.MetricFamily, name string, labels map[string]string) *prometheus_client.Metric {