Skip to content
Merged
46 changes: 46 additions & 0 deletions enterprise/coderd/prebuilds/metricscollector.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ const (
MetricDesiredGauge = namespace + "desired"
MetricRunningGauge = namespace + "running"
MetricEligibleGauge = namespace + "eligible"
MetricPresetHardLimitedGauge = namespace + "preset_hard_limited"
MetricLastUpdatedGauge = namespace + "metrics_last_updated"
)

Expand Down Expand Up @@ -82,6 +83,12 @@ var (
labels,
nil,
)
presetHardLimitedDesc = prometheus.NewDesc(
MetricPresetHardLimitedGauge,
"Indicates whether a given preset has reached the hard failure limit (1 = hard-limited). Metric is omitted otherwise.",
labels,
nil,
)
lastUpdateDesc = prometheus.NewDesc(
MetricLastUpdatedGauge,
"The unix timestamp when the metrics related to prebuilt workspaces were last updated; these metrics are cached.",
Expand All @@ -104,17 +111,22 @@ type MetricsCollector struct {

replacementsCounter map[replacementKey]float64
replacementsCounterMu sync.Mutex

isPresetHardLimited map[hardLimitedPresetKey]bool
isPresetHardLimitedMu sync.Mutex
}

var _ prometheus.Collector = new(MetricsCollector)

func NewMetricsCollector(db database.Store, logger slog.Logger, snapshotter prebuilds.StateSnapshotter) *MetricsCollector {
log := logger.Named("prebuilds_metrics_collector")

return &MetricsCollector{
database: db,
logger: log,
snapshotter: snapshotter,
replacementsCounter: make(map[replacementKey]float64),
isPresetHardLimited: make(map[hardLimitedPresetKey]bool),
}
}

Expand All @@ -126,6 +138,7 @@ func (*MetricsCollector) Describe(descCh chan<- *prometheus.Desc) {
descCh <- desiredPrebuildsDesc
descCh <- runningPrebuildsDesc
descCh <- eligiblePrebuildsDesc
descCh <- presetHardLimitedDesc
descCh <- lastUpdateDesc
}

Expand Down Expand Up @@ -173,6 +186,17 @@ func (mc *MetricsCollector) Collect(metricsCh chan<- prometheus.Metric) {
metricsCh <- prometheus.MustNewConstMetric(eligiblePrebuildsDesc, prometheus.GaugeValue, float64(state.Eligible), preset.TemplateName, preset.Name, preset.OrganizationName)
}

mc.isPresetHardLimitedMu.Lock()
for key, isHardLimited := range mc.isPresetHardLimited {
var val float64
if isHardLimited {
val = 1
}

metricsCh <- prometheus.MustNewConstMetric(presetHardLimitedDesc, prometheus.GaugeValue, val, key.templateName, key.presetName, key.orgName)
}
mc.isPresetHardLimitedMu.Unlock()

metricsCh <- prometheus.MustNewConstMetric(lastUpdateDesc, prometheus.GaugeValue, float64(currentState.createdAt.Unix()))
}

Expand Down Expand Up @@ -247,3 +271,25 @@ func (mc *MetricsCollector) trackResourceReplacement(orgName, templateName, pres
// cause an issue (or indeed if either would), so we just track the replacement.
mc.replacementsCounter[key]++
}

type hardLimitedPresetKey struct {
orgName, templateName, presetName string
}

func (k hardLimitedPresetKey) String() string {
return fmt.Sprintf("%s:%s:%s", k.orgName, k.templateName, k.presetName)
}

// nolint:revive // isHardLimited determines if the preset should be reported as hard-limited in Prometheus.
func (mc *MetricsCollector) trackHardLimitedStatus(orgName, templateName, presetName string, isHardLimited bool) {
mc.isPresetHardLimitedMu.Lock()
defer mc.isPresetHardLimitedMu.Unlock()

key := hardLimitedPresetKey{orgName: orgName, templateName: templateName, presetName: presetName}

if isHardLimited {
mc.isPresetHardLimited[key] = true
} else {
delete(mc.isPresetHardLimited, key)
}
}
29 changes: 19 additions & 10 deletions enterprise/coderd/prebuilds/reconcile.go
Original file line number Diff line number Diff line change
Expand Up @@ -361,17 +361,22 @@ func (c *StoreReconciler) ReconcilePreset(ctx context.Context, ps prebuilds.Pres
slog.F("preset_name", ps.Preset.Name),
)

// If the preset was previously hard-limited, log it and exit early.
if ps.Preset.PrebuildStatus == database.PrebuildStatusHardLimited {
logger.Warn(ctx, "skipping hard limited preset")
return nil
}
// Report a preset as hard-limited only if all the following conditions are met:
// - The preset is marked as hard-limited
// - The preset is using the active version of its template, and the template has not been deleted
//
// The second condition is important because a hard-limited preset that has become outdated is no longer relevant.
// Its associated prebuilt workspaces were likely deleted, and it's not meaningful to continue reporting it
// as hard-limited to the admin.
reportAsHardLimited := ps.IsHardLimited && ps.Preset.UsingActiveVersion && !ps.Preset.Deleted
c.metrics.trackHardLimitedStatus(ps.Preset.OrganizationName, ps.Preset.TemplateName, ps.Preset.Name, reportAsHardLimited)

// If the preset reached the hard failure limit for the first time during this iteration:
// - Mark it as hard-limited in the database
// - Send notifications to template admins
if ps.IsHardLimited {
logger.Warn(ctx, "skipping hard limited preset")
// - Continue execution, we disallow only creation operation for hard-limited presets. Deletion is allowed.
if ps.Preset.PrebuildStatus != database.PrebuildStatusHardLimited && ps.IsHardLimited {
logger.Warn(ctx, "preset is hard limited, notifying template admins")

err := c.store.UpdatePresetPrebuildStatus(ctx, database.UpdatePresetPrebuildStatusParams{
Status: database.PrebuildStatusHardLimited,
Expand All @@ -384,10 +389,7 @@ func (c *StoreReconciler) ReconcilePreset(ctx context.Context, ps prebuilds.Pres
err = c.notifyPrebuildFailureLimitReached(ctx, ps)
if err != nil {
logger.Error(ctx, "failed to notify that number of prebuild failures reached the limit", slog.Error(err))
return nil
}

return nil
}

state := ps.CalculateState()
Expand Down Expand Up @@ -452,6 +454,13 @@ func (c *StoreReconciler) ReconcilePreset(ctx context.Context, ps prebuilds.Pres
actions.Create = desired
}

// If preset is hard-limited, and it's a create operation, log it and exit early.
// Creation operation is disallowed for hard-limited preset.
if ps.IsHardLimited && actions.Create > 0 {
logger.Warn(ctx, "skipping hard limited preset for create operation")
return nil
}

var multiErr multierror.Error

for range actions.Create {
Expand Down
Loading
Loading