Skip to content

Commit f7d3634

Browse files
fix: reimplement reporting of preset-hard-limited metric
1 parent e8c75eb commit f7d3634

File tree

4 files changed

+67
-15
lines changed

4 files changed

+67
-15
lines changed

coderd/prebuilds/global_snapshot.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,14 @@ func (s GlobalSnapshot) FilterByPreset(presetID uuid.UUID) (*PresetSnapshot, err
8080
}, nil
8181
}
8282

83+
func (s GlobalSnapshot) IsHardLimited(presetID uuid.UUID) bool {
84+
_, isHardLimited := slice.Find(s.HardLimitedPresets, func(row database.GetPresetsAtFailureLimitRow) bool {
85+
return row.PresetID == presetID
86+
})
87+
88+
return isHardLimited
89+
}
90+
8391
// filterExpiredWorkspaces splits running workspaces into expired and non-expired
8492
// based on the preset's TTL.
8593
// If TTL is missing or zero, all workspaces are considered non-expired.

enterprise/coderd/prebuilds/metricscollector.go

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -280,16 +280,9 @@ func (k hardLimitedPresetKey) String() string {
280280
return fmt.Sprintf("%s:%s:%s", k.orgName, k.templateName, k.presetName)
281281
}
282282

283-
// nolint:revive // isHardLimited determines if the preset should be reported as hard-limited in Prometheus.
284-
func (mc *MetricsCollector) trackHardLimitedStatus(orgName, templateName, presetName string, isHardLimited bool) {
283+
func (mc *MetricsCollector) registerHardLimitedPresets(isPresetHardLimited map[hardLimitedPresetKey]bool) {
285284
mc.isPresetHardLimitedMu.Lock()
286285
defer mc.isPresetHardLimitedMu.Unlock()
287286

288-
key := hardLimitedPresetKey{orgName: orgName, templateName: templateName, presetName: presetName}
289-
290-
if isHardLimited {
291-
mc.isPresetHardLimited[key] = true
292-
} else {
293-
delete(mc.isPresetHardLimited, key)
294-
}
287+
mc.isPresetHardLimited = isPresetHardLimited
295288
}

enterprise/coderd/prebuilds/reconcile.go

Lines changed: 56 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,9 @@ func (c *StoreReconciler) ReconcileAll(ctx context.Context) error {
256256
if err != nil {
257257
return xerrors.Errorf("determine current snapshot: %w", err)
258258
}
259+
260+
c.reportHardLimitedPresets(snapshot)
261+
259262
if len(snapshot.Presets) == 0 {
260263
logger.Debug(ctx, "no templates found with prebuilds configured")
261264
return nil
@@ -296,6 +299,56 @@ func (c *StoreReconciler) ReconcileAll(ctx context.Context) error {
296299
return err
297300
}
298301

302+
// Report a metric only if the preset uses the latest version of the template and the template is not deleted.
303+
// This avoids conflicts between metrics from old and new template versions.
304+
//
305+
// NOTE: Multiple versions of a preset can exist with the same orgName, templateName, and presetName,
306+
// because templates can have multiple versions — or deleted templates can share the same name.
307+
//
308+
// The safest approach is to report the metric only for the latest version of the preset.
309+
// When a new template version is released, the metric for the new preset should overwrite
310+
// the old value in Prometheus.
311+
//
312+
// However, there’s one edge case: if an admin creates a template, it becomes hard-limited,
313+
// then deletes the template and never creates another with the same name,
314+
// the old preset will continue to be reported as hard-limited —
315+
// even though it’s deleted. This will persist until `coderd` is restarted.
316+
317+
func (c *StoreReconciler) reportHardLimitedPresets(snapshot *prebuilds.GlobalSnapshot) {
318+
// presetsMap is a map from key (orgName:templateName:presetName) to list of corresponding presets.
319+
// Multiple versions of a preset can exist with the same orgName, templateName, and presetName,
320+
// because templates can have multiple versions — or deleted templates can share the same name.
321+
presetsMap := make(map[hardLimitedPresetKey][]database.GetTemplatePresetsWithPrebuildsRow)
322+
for _, preset := range snapshot.Presets {
323+
key := hardLimitedPresetKey{
324+
orgName: preset.OrganizationName,
325+
templateName: preset.TemplateName,
326+
presetName: preset.Name,
327+
}
328+
329+
presetsMap[key] = append(presetsMap[key], preset)
330+
}
331+
332+
// Report a preset as hard-limited only if all the following conditions are met:
333+
// - The preset is marked as hard-limited
334+
// - The preset is using the active version of its template, and the template has not been deleted
335+
//
336+
// The second condition is important because a hard-limited preset that has become outdated is no longer relevant.
337+
// Its associated prebuilt workspaces were likely deleted, and it's not meaningful to continue reporting it
338+
// as hard-limited to the admin.
339+
isPresetHardLimited := make(map[hardLimitedPresetKey]bool)
340+
for key, presets := range presetsMap {
341+
for _, preset := range presets {
342+
if preset.UsingActiveVersion && !preset.Deleted && snapshot.IsHardLimited(preset.ID) {
343+
isPresetHardLimited[key] = true
344+
break
345+
}
346+
}
347+
}
348+
349+
c.metrics.registerHardLimitedPresets(isPresetHardLimited)
350+
}
351+
299352
// SnapshotState captures the current state of all prebuilds across templates.
300353
func (c *StoreReconciler) SnapshotState(ctx context.Context, store database.Store) (*prebuilds.GlobalSnapshot, error) {
301354
if err := ctx.Err(); err != nil {
@@ -375,9 +428,9 @@ func (c *StoreReconciler) ReconcilePreset(ctx context.Context, ps prebuilds.Pres
375428
// then deletes the template and never creates another with the same name,
376429
// the old preset will continue to be reported as hard-limited —
377430
// even though it’s deleted. This will persist until `coderd` is restarted.
378-
if ps.Preset.UsingActiveVersion && !ps.Preset.Deleted {
379-
c.metrics.trackHardLimitedStatus(ps.Preset.OrganizationName, ps.Preset.TemplateName, ps.Preset.Name, ps.IsHardLimited)
380-
}
431+
//if ps.Preset.UsingActiveVersion && !ps.Preset.Deleted {
432+
// c.metrics.trackHardLimitedStatus(ps.Preset.OrganizationName, ps.Preset.TemplateName, ps.Preset.Name, ps.IsHardLimited)
433+
//}
381434

382435
// If the preset reached the hard failure limit for the first time during this iteration:
383436
// - Mark it as hard-limited in the database

enterprise/coderd/prebuilds/reconcile_test.go

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1043,9 +1043,7 @@ func TestHardLimitedPresetShouldNotBlockDeletion(t *testing.T) {
10431043
"preset_name": preset.Name,
10441044
"org_name": org.Name,
10451045
})
1046-
require.NotNil(t, metric)
1047-
require.NotNil(t, metric.GetGauge())
1048-
require.EqualValues(t, 1, metric.GetGauge().GetValue())
1046+
require.Nil(t, metric)
10491047
})
10501048
}
10511049
}

0 commit comments

Comments
 (0)