Skip to content
4 changes: 4 additions & 0 deletions coderd/database/queries.sql.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions coderd/database/queries/prebuilds.sql
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ WHERE w.id IN (
AND b.template_version_id = t.active_version_id
AND p.current_preset_id = @preset_id::uuid
AND p.ready
AND NOT t.deleted
LIMIT 1 FOR UPDATE OF p SKIP LOCKED -- Ensure that a concurrent request will not select the same prebuild.
)
RETURNING w.id, w.name;
Expand All @@ -40,6 +41,7 @@ FROM templates t
INNER JOIN template_version_presets tvp ON tvp.template_version_id = tv.id
INNER JOIN organizations o ON o.id = t.organization_id
WHERE tvp.desired_instances IS NOT NULL -- Consider only presets that have a prebuild configuration.
-- AND NOT t.deleted -- We don't exclude deleted templates because there's no constraint in the DB preventing a soft deletion on a template while workspaces are running.
AND (t.id = sqlc.narg('template_id')::uuid OR sqlc.narg('template_id') IS NULL);

-- name: GetRunningPrebuiltWorkspaces :many
Expand Down Expand Up @@ -70,6 +72,7 @@ FROM workspace_latest_builds wlb
-- prebuilds that are still building.
INNER JOIN templates t ON t.active_version_id = wlb.template_version_id
WHERE wlb.job_status IN ('pending'::provisioner_job_status, 'running'::provisioner_job_status)
-- AND NOT t.deleted -- We don't exclude deleted templates because there's no constraint in the DB preventing a soft deletion on a template while workspaces are running.
GROUP BY t.id, wpb.template_version_id, wpb.transition, wlb.template_version_preset_id;

-- GetPresetsBackoff groups workspace builds by preset ID.
Expand Down Expand Up @@ -98,6 +101,7 @@ WITH filtered_builds AS (
WHERE tvp.desired_instances IS NOT NULL -- Consider only presets that have a prebuild configuration.
AND wlb.transition = 'start'::workspace_transition
AND w.owner_id = 'c42fdf75-3097-471c-8c33-fb52454d81c0'
AND NOT t.deleted
),
time_sorted_builds AS (
-- Group builds by preset, then sort each group by created_at.
Expand Down
4 changes: 4 additions & 0 deletions enterprise/coderd/prebuilds/metricscollector.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,10 @@ func (mc *MetricsCollector) Collect(metricsCh chan<- prometheus.Metric) {
continue
}

if preset.Deleted {
continue
}

presetSnapshot, err := currentState.snapshot.FilterByPreset(preset.ID)
if err != nil {
mc.logger.Error(context.Background(), "failed to filter by preset", slog.Error(err))
Expand Down
189 changes: 168 additions & 21 deletions enterprise/coderd/prebuilds/metricscollector_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"github.com/coder/coder/v2/coderd/database/dbauthz"
"github.com/coder/coder/v2/coderd/database/dbgen"
"github.com/coder/coder/v2/coderd/database/dbtestutil"
"github.com/coder/coder/v2/coderd/database/dbtime"
agplprebuilds "github.com/coder/coder/v2/coderd/prebuilds"
"github.com/coder/coder/v2/codersdk"
"github.com/coder/coder/v2/enterprise/coderd/prebuilds"
Expand Down Expand Up @@ -165,27 +166,12 @@ func TestMetricsCollector(t *testing.T) {
eligible: []bool{false},
},
{
name: "deleted templates never desire prebuilds",
transitions: allTransitions,
jobStatuses: allJobStatuses,
initiatorIDs: []uuid.UUID{agplprebuilds.SystemUserID},
ownerIDs: []uuid.UUID{agplprebuilds.SystemUserID, uuid.New()},
metrics: []metricCheck{
{prebuilds.MetricDesiredGauge, ptr.To(0.0), false},
},
templateDeleted: []bool{true},
eligible: []bool{false},
},
{
name: "running prebuilds for deleted templates are still counted, so that they can be deleted",
transitions: []database.WorkspaceTransition{database.WorkspaceTransitionStart},
jobStatuses: []database.ProvisionerJobStatus{database.ProvisionerJobStatusSucceeded},
initiatorIDs: []uuid.UUID{agplprebuilds.SystemUserID},
ownerIDs: []uuid.UUID{agplprebuilds.SystemUserID},
metrics: []metricCheck{
{prebuilds.MetricRunningGauge, ptr.To(1.0), false},
{prebuilds.MetricEligibleGauge, ptr.To(0.0), false},
},
name: "deleted templates should not be included in exported metrics",
transitions: allTransitions,
jobStatuses: allJobStatuses,
initiatorIDs: []uuid.UUID{agplprebuilds.SystemUserID},
ownerIDs: []uuid.UUID{agplprebuilds.SystemUserID, uuid.New()},
metrics: nil,
templateDeleted: []bool{true},
eligible: []bool{false},
},
Expand Down Expand Up @@ -281,6 +267,12 @@ func TestMetricsCollector(t *testing.T) {
"organization_name": org.Name,
}

// If no expected metrics have been defined, ensure we don't find any metric series (i.e. metrics with given labels).
if test.metrics == nil {
series := findAllMetricSeries(metricsFamilies, labels)
require.Empty(t, series)
}

for _, check := range test.metrics {
metric := findMetric(metricsFamilies, check.name, labels)
if check.value == nil {
Expand All @@ -307,6 +299,131 @@ func TestMetricsCollector(t *testing.T) {
}
}

// TestMetricsCollector_DuplicateTemplateNames validates a bug that we saw previously which caused duplicate metric series
// registration when a template was deleted and a new one created with the same name (and preset name).
// We are now excluding deleted templates from our metric collection.
func TestMetricsCollector_DuplicateTemplateNames(t *testing.T) {
t.Parallel()

if !dbtestutil.WillUsePostgres() {
t.Skip("this test requires postgres")
}

type metricCheck struct {
name string
value *float64
isCounter bool
}

type testCase struct {
transition database.WorkspaceTransition
jobStatus database.ProvisionerJobStatus
initiatorID uuid.UUID
ownerID uuid.UUID
metrics []metricCheck
eligible bool
}

test := testCase{
transition: database.WorkspaceTransitionStart,
jobStatus: database.ProvisionerJobStatusSucceeded,
initiatorID: agplprebuilds.SystemUserID,
ownerID: agplprebuilds.SystemUserID,
metrics: []metricCheck{
{prebuilds.MetricCreatedCount, ptr.To(1.0), true},
{prebuilds.MetricClaimedCount, ptr.To(0.0), true},
{prebuilds.MetricFailedCount, ptr.To(0.0), true},
{prebuilds.MetricDesiredGauge, ptr.To(1.0), false},
{prebuilds.MetricRunningGauge, ptr.To(1.0), false},
{prebuilds.MetricEligibleGauge, ptr.To(1.0), false},
},
eligible: true,
}

logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true})
clock := quartz.NewMock(t)
db, pubsub := dbtestutil.NewDB(t)
reconciler := prebuilds.NewStoreReconciler(db, pubsub, codersdk.PrebuildsConfig{}, logger, quartz.NewMock(t), prometheus.NewRegistry(), newNoopEnqueuer())
ctx := testutil.Context(t, testutil.WaitLong)

collector := prebuilds.NewMetricsCollector(db, logger, reconciler)
registry := prometheus.NewPedanticRegistry()
registry.Register(collector)

presetName := "default-preset"
defaultOrg := dbgen.Organization(t, db, database.Organization{})
setupTemplateWithDeps := func() database.Template {
template := setupTestDBTemplateWithinOrg(t, db, test.ownerID, false, "default-template", defaultOrg)
templateVersionID := setupTestDBTemplateVersion(ctx, t, clock, db, pubsub, defaultOrg.ID, test.ownerID, template.ID)
preset := setupTestDBPreset(t, db, templateVersionID, 1, "default-preset")
workspace, _ := setupTestDBWorkspace(
t, clock, db, pubsub,
test.transition, test.jobStatus, defaultOrg.ID, preset, template.ID, templateVersionID, test.initiatorID, test.ownerID,
)
setupTestDBWorkspaceAgent(t, db, workspace.ID, test.eligible)
return template
}

// When: starting with a regular template.
template := setupTemplateWithDeps()
labels := map[string]string{
"template_name": template.Name,
"preset_name": presetName,
"organization_name": defaultOrg.Name,
}

// nolint:gocritic // Authz context needed to retrieve state.
ctx = dbauthz.AsPrebuildsOrchestrator(ctx)

// Then: metrics collect successfully.
require.NoError(t, collector.UpdateState(ctx, testutil.WaitLong))
metricsFamilies, err := registry.Gather()
require.NoError(t, err)
require.NotEmpty(t, findAllMetricSeries(metricsFamilies, labels))

// When: the template is deleted.
require.NoError(t, db.UpdateTemplateDeletedByID(ctx, database.UpdateTemplateDeletedByIDParams{
ID: template.ID,
Deleted: true,
UpdatedAt: dbtime.Now(),
}))

// Then: metrics collect successfully but are empty because the template is deleted.
require.NoError(t, collector.UpdateState(ctx, testutil.WaitLong))
metricsFamilies, err = registry.Gather()
require.NoError(t, err)
require.Empty(t, findAllMetricSeries(metricsFamilies, labels))

// When: a new template is created with the same name as the deleted template.
newTemplate := setupTemplateWithDeps()

// Ensure the database has both the new and old (delete) template.
{
deleted, err := db.GetTemplateByOrganizationAndName(ctx, database.GetTemplateByOrganizationAndNameParams{
OrganizationID: template.OrganizationID,
Deleted: true,
Name: template.Name,
})
require.NoError(t, err)
require.Equal(t, template.ID, deleted.ID)

current, err := db.GetTemplateByOrganizationAndName(ctx, database.GetTemplateByOrganizationAndNameParams{
// Use details from deleted template to ensure they're aligned.
OrganizationID: template.OrganizationID,
Deleted: false,
Name: template.Name,
})
require.NoError(t, err)
require.Equal(t, newTemplate.ID, current.ID)
}

// Then: metrics collect successfully.
require.NoError(t, collector.UpdateState(ctx, testutil.WaitLong))
metricsFamilies, err = registry.Gather()
require.NoError(t, err)
require.NotEmpty(t, findAllMetricSeries(metricsFamilies, labels))
}

func findMetric(metricsFamilies []*prometheus_client.MetricFamily, name string, labels map[string]string) *prometheus_client.Metric {
for _, metricFamily := range metricsFamilies {
if metricFamily.GetName() != name {
Expand Down Expand Up @@ -334,3 +451,33 @@ func findMetric(metricsFamilies []*prometheus_client.MetricFamily, name string,
}
return nil
}

// findAllMetricSeries finds all metrics with a given set of labels.
func findAllMetricSeries(metricsFamilies []*prometheus_client.MetricFamily, labels map[string]string) map[string]*prometheus_client.Metric {
series := make(map[string]*prometheus_client.Metric)
for _, metricFamily := range metricsFamilies {
for _, metric := range metricFamily.GetMetric() {
labelPairs := metric.GetLabel()

if len(labelPairs) != len(labels) {
continue
}

// Convert label pairs to map for easier lookup
metricLabels := make(map[string]string, len(labelPairs))
for _, label := range labelPairs {
metricLabels[label.GetName()] = label.GetValue()
}

// Check if all requested labels match
for wantName, wantValue := range labels {
if metricLabels[wantName] != wantValue {
continue
}
}

series[metricFamily.GetName()] = metric
}
}
return series
}
34 changes: 33 additions & 1 deletion enterprise/coderd/prebuilds/reconcile_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -294,10 +294,15 @@ func TestPrebuildReconciliation(t *testing.T) {
templateDeleted: []bool{false},
},
{
name: "delete prebuilds for deleted templates",
// Templates can be soft-deleted (`deleted=true`) or hard-deleted (row is removed).
// On the former there is *no* DB constraint to prevent soft deletion, so we have to ensure that if somehow
// the template was soft-deleted any running prebuilds will be removed.
// On the latter there is a DB constraint to prevent row deletion if any workspaces reference the deleting template.
name: "soft-deleted templates MAY have prebuilds",
prebuildLatestTransitions: []database.WorkspaceTransition{database.WorkspaceTransitionStart},
prebuildJobStatuses: []database.ProvisionerJobStatus{database.ProvisionerJobStatusSucceeded},
templateVersionActive: []bool{true, false},
shouldCreateNewPrebuild: ptr.To(false),
shouldDeleteOldPrebuild: ptr.To(true),
templateDeleted: []bool{true},
},
Expand Down Expand Up @@ -1060,6 +1065,33 @@ func setupTestDBTemplate(
return org, template
}

// nolint:revive // It's a control flag, but this is a test.
func setupTestDBTemplateWithinOrg(
t *testing.T,
db database.Store,
userID uuid.UUID,
templateDeleted bool,
templateName string,
org database.Organization,
) database.Template {
t.Helper()

template := dbgen.Template(t, db, database.Template{
Name: templateName,
CreatedBy: userID,
OrganizationID: org.ID,
CreatedAt: time.Now().Add(muchEarlier),
})
if templateDeleted {
ctx := testutil.Context(t, testutil.WaitShort)
require.NoError(t, db.UpdateTemplateDeletedByID(ctx, database.UpdateTemplateDeletedByIDParams{
ID: template.ID,
Deleted: true,
}))
}
return template
}

const (
earlier = -time.Hour
muchEarlier = -time.Hour * 2
Expand Down
Loading