Skip to content

Commit 690d8e8

Browse files
fix: fix metric for hard-limited presets
1 parent f678f92 commit 690d8e8

File tree

2 files changed

+21
-10
lines changed

2 files changed

+21
-10
lines changed

enterprise/coderd/prebuilds/reconcile.go

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -361,15 +361,23 @@ func (c *StoreReconciler) ReconcilePreset(ctx context.Context, ps prebuilds.Pres
361361
slog.F("preset_name", ps.Preset.Name),
362362
)
363363

364-
// Report a preset as hard-limited only if all the following conditions are met:
365-
// - The preset is marked as hard-limited
366-
// - The preset is using the active version of its template, and the template has not been deleted
364+
// Report a metric only if the preset uses the latest version of the template and the template is not deleted.
365+
// This avoids conflicts between metrics from old and new template versions.
367366
//
368-
// The second condition is important because a hard-limited preset that has become outdated is no longer relevant.
369-
// Its associated prebuilt workspaces were likely deleted, and it's not meaningful to continue reporting it
370-
// as hard-limited to the admin.
371-
reportAsHardLimited := ps.IsHardLimited && ps.Preset.UsingActiveVersion && !ps.Preset.Deleted
372-
c.metrics.trackHardLimitedStatus(ps.Preset.OrganizationName, ps.Preset.TemplateName, ps.Preset.Name, reportAsHardLimited)
367+
// NOTE: Multiple versions of a preset can exist with the same orgName, templateName, and presetName,
368+
// because templates can have multiple versions — or deleted templates can share the same name.
369+
//
370+
// The safest approach is to report the metric only for the latest version of the preset.
371+
// When a new template version is released, the metric for the new preset should overwrite
372+
// the old value in Prometheus.
373+
//
374+
// However, there’s one edge case: if an admin creates a template, it becomes hard-limited,
375+
// then deletes the template and never creates another with the same name,
376+
// the old preset will continue to be reported as hard-limited —
377+
// even though it’s deleted. This will persist until `coderd` is restarted.
378+
if ps.Preset.UsingActiveVersion && !ps.Preset.Deleted {
379+
c.metrics.trackHardLimitedStatus(ps.Preset.OrganizationName, ps.Preset.TemplateName, ps.Preset.Name, ps.IsHardLimited)
380+
}
373381

374382
// If the preset reached the hard failure limit for the first time during this iteration:
375383
// - Mark it as hard-limited in the database

enterprise/coderd/prebuilds/reconcile_test.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1034,15 +1034,18 @@ func TestHardLimitedPresetShouldNotBlockDeletion(t *testing.T) {
10341034
require.Equal(t, database.WorkspaceTransitionDelete, workspaceBuilds[0].Transition)
10351035
require.Equal(t, database.WorkspaceTransitionStart, workspaceBuilds[1].Transition)
10361036

1037-
// Metric is deleted after preset became outdated.
1037+
// The metric is still set to 1, even though the preset has become outdated.
1038+
// This happens because the old value hasn't been overwritten by a newer preset yet.
10381039
mf, err = registry.Gather()
10391040
require.NoError(t, err)
10401041
metric = findMetric(mf, prebuilds.MetricPresetHardLimitedGauge, map[string]string{
10411042
"template_name": template.Name,
10421043
"preset_name": preset.Name,
10431044
"org_name": org.Name,
10441045
})
1045-
require.Nil(t, metric)
1046+
require.NotNil(t, metric)
1047+
require.NotNil(t, metric.GetGauge())
1048+
require.EqualValues(t, 1, metric.GetGauge().GetValue())
10461049
})
10471050
}
10481051
}

0 commit comments

Comments
 (0)