From 690d8e85c42bc40b5ac867144a497e6d5228bfc0 Mon Sep 17 00:00:00 2001 From: evgeniy-scherbina Date: Mon, 26 May 2025 20:56:34 +0000 Subject: [PATCH] fix: fix metric for hard-limited presets --- enterprise/coderd/prebuilds/reconcile.go | 24 ++++++++++++------- enterprise/coderd/prebuilds/reconcile_test.go | 7 ++++-- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/enterprise/coderd/prebuilds/reconcile.go b/enterprise/coderd/prebuilds/reconcile.go index c07c15de2aeb8..90c97afa26d69 100644 --- a/enterprise/coderd/prebuilds/reconcile.go +++ b/enterprise/coderd/prebuilds/reconcile.go @@ -361,15 +361,23 @@ func (c *StoreReconciler) ReconcilePreset(ctx context.Context, ps prebuilds.Pres slog.F("preset_name", ps.Preset.Name), ) - // Report a preset as hard-limited only if all the following conditions are met: - // - The preset is marked as hard-limited - // - The preset is using the active version of its template, and the template has not been deleted + // Report a metric only if the preset uses the latest version of the template and the template is not deleted. + // This avoids conflicts between metrics from old and new template versions. // - // The second condition is important because a hard-limited preset that has become outdated is no longer relevant. - // Its associated prebuilt workspaces were likely deleted, and it's not meaningful to continue reporting it - // as hard-limited to the admin. - reportAsHardLimited := ps.IsHardLimited && ps.Preset.UsingActiveVersion && !ps.Preset.Deleted - c.metrics.trackHardLimitedStatus(ps.Preset.OrganizationName, ps.Preset.TemplateName, ps.Preset.Name, reportAsHardLimited) + // NOTE: Multiple versions of a preset can exist with the same orgName, templateName, and presetName, + // because templates can have multiple versions — or deleted templates can share the same name. + // + // The safest approach is to report the metric only for the latest version of the preset. + // When a new template version is released, the metric for the new preset should overwrite + // the old value in Prometheus. + // + // However, there’s one edge case: if an admin creates a template, it becomes hard-limited, + // then deletes the template and never creates another with the same name, + // the old preset will continue to be reported as hard-limited — + // even though it’s deleted. This will persist until `coderd` is restarted. + if ps.Preset.UsingActiveVersion && !ps.Preset.Deleted { + c.metrics.trackHardLimitedStatus(ps.Preset.OrganizationName, ps.Preset.TemplateName, ps.Preset.Name, ps.IsHardLimited) + } // If the preset reached the hard failure limit for the first time during this iteration: // - Mark it as hard-limited in the database diff --git a/enterprise/coderd/prebuilds/reconcile_test.go b/enterprise/coderd/prebuilds/reconcile_test.go index 8dafd55ae610e..a5de509e5e410 100644 --- a/enterprise/coderd/prebuilds/reconcile_test.go +++ b/enterprise/coderd/prebuilds/reconcile_test.go @@ -1034,7 +1034,8 @@ func TestHardLimitedPresetShouldNotBlockDeletion(t *testing.T) { require.Equal(t, database.WorkspaceTransitionDelete, workspaceBuilds[0].Transition) require.Equal(t, database.WorkspaceTransitionStart, workspaceBuilds[1].Transition) - // Metric is deleted after preset became outdated. + // The metric is still set to 1, even though the preset has become outdated. + // This happens because the old value hasn't been overwritten by a newer preset yet. mf, err = registry.Gather() require.NoError(t, err) metric = findMetric(mf, prebuilds.MetricPresetHardLimitedGauge, map[string]string{ @@ -1042,7 +1043,9 @@ func TestHardLimitedPresetShouldNotBlockDeletion(t *testing.T) { "preset_name": preset.Name, "org_name": org.Name, }) - require.Nil(t, metric) + require.NotNil(t, metric) + require.NotNil(t, metric.GetGauge()) + require.EqualValues(t, 1, metric.GetGauge().GetValue()) }) } }