diff --git a/cli/testdata/server-config.yaml.golden b/cli/testdata/server-config.yaml.golden index 9995a7f389130..7403819a2d10b 100644 --- a/cli/testdata/server-config.yaml.golden +++ b/cli/testdata/server-config.yaml.golden @@ -704,3 +704,7 @@ workspace_prebuilds: # backoff. # (default: 1h0m0s, type: duration) reconciliation_backoff_lookback_period: 1h0m0s + # Maximum number of consecutive failed prebuilds before a preset hits the hard + # limit; disabled when set to zero. + # (default: 3, type: int) + failure_hard_limit: 3 diff --git a/coderd/apidoc/docs.go b/coderd/apidoc/docs.go index e98197d3b5bb2..7cee63e183e7e 100644 --- a/coderd/apidoc/docs.go +++ b/coderd/apidoc/docs.go @@ -14326,6 +14326,10 @@ const docTemplate = `{ "codersdk.PrebuildsConfig": { "type": "object", "properties": { + "failure_hard_limit": { + "description": "FailureHardLimit defines the maximum number of consecutive failed prebuild attempts allowed\nbefore a preset is considered to be in a hard limit state. When a preset hits this limit,\nno new prebuilds will be created until the limit is reset.\nFailureHardLimit is disabled when set to zero.", + "type": "integer" + }, "reconciliation_backoff_interval": { "description": "ReconciliationBackoffInterval specifies the amount of time to increase the backoff interval\nwhen errors occur during reconciliation.", "type": "integer" diff --git a/coderd/apidoc/swagger.json b/coderd/apidoc/swagger.json index fa103f55fbe9f..89a582091496f 100644 --- a/coderd/apidoc/swagger.json +++ b/coderd/apidoc/swagger.json @@ -12968,6 +12968,10 @@ "codersdk.PrebuildsConfig": { "type": "object", "properties": { + "failure_hard_limit": { + "description": "FailureHardLimit defines the maximum number of consecutive failed prebuild attempts allowed\nbefore a preset is considered to be in a hard limit state. When a preset hits this limit,\nno new prebuilds will be created until the limit is reset.\nFailureHardLimit is disabled when set to zero.", + "type": "integer" + }, "reconciliation_backoff_interval": { "description": "ReconciliationBackoffInterval specifies the amount of time to increase the backoff interval\nwhen errors occur during reconciliation.", "type": "integer" diff --git a/coderd/database/dbauthz/dbauthz.go b/coderd/database/dbauthz/dbauthz.go index ab3781452dd2d..a210599d17cc4 100644 --- a/coderd/database/dbauthz/dbauthz.go +++ b/coderd/database/dbauthz/dbauthz.go @@ -2226,6 +2226,15 @@ func (q *querier) GetPresetParametersByTemplateVersionID(ctx context.Context, ar return q.db.GetPresetParametersByTemplateVersionID(ctx, args) } +func (q *querier) GetPresetsAtFailureLimit(ctx context.Context, hardLimit int64) ([]database.GetPresetsAtFailureLimitRow, error) { + // GetPresetsAtFailureLimit returns a list of template version presets that have reached the hard failure limit. + // Request the same authorization permissions as GetPresetsBackoff, since the methods are similar. + if err := q.authorizeContext(ctx, policy.ActionViewInsights, rbac.ResourceTemplate.All()); err != nil { + return nil, err + } + return q.db.GetPresetsAtFailureLimit(ctx, hardLimit) +} + func (q *querier) GetPresetsBackoff(ctx context.Context, lookback time.Time) ([]database.GetPresetsBackoffRow, error) { // GetPresetsBackoff returns a list of template version presets along with metadata such as the number of failed prebuilds. if err := q.authorizeContext(ctx, policy.ActionViewInsights, rbac.ResourceTemplate.All()); err != nil { @@ -4201,6 +4210,24 @@ func (q *querier) UpdateOrganizationDeletedByID(ctx context.Context, arg databas return deleteQ(q.log, q.auth, q.db.GetOrganizationByID, deleteF)(ctx, arg.ID) } +func (q *querier) UpdatePresetPrebuildStatus(ctx context.Context, arg database.UpdatePresetPrebuildStatusParams) error { + preset, err := q.db.GetPresetByID(ctx, arg.PresetID) + if err != nil { + return err + } + + object := rbac.ResourceTemplate. + WithID(preset.TemplateID.UUID). + InOrg(preset.OrganizationID) + + err = q.authorizeContext(ctx, policy.ActionUpdate, object) + if err != nil { + return err + } + + return q.db.UpdatePresetPrebuildStatus(ctx, arg) +} + func (q *querier) UpdateProvisionerDaemonLastSeenAt(ctx context.Context, arg database.UpdateProvisionerDaemonLastSeenAtParams) error { if err := q.authorizeContext(ctx, policy.ActionUpdate, rbac.ResourceProvisionerDaemon); err != nil { return err diff --git a/coderd/database/dbauthz/dbauthz_test.go b/coderd/database/dbauthz/dbauthz_test.go index e8b90afbc396d..703e51d739c47 100644 --- a/coderd/database/dbauthz/dbauthz_test.go +++ b/coderd/database/dbauthz/dbauthz_test.go @@ -4924,6 +4924,11 @@ func (s *MethodTestSuite) TestPrebuilds() { Asserts(rbac.ResourceWorkspace.All(), policy.ActionRead). ErrorsWithInMemDB(dbmem.ErrUnimplemented) })) + s.Run("GetPresetsAtFailureLimit", s.Subtest(func(_ database.Store, check *expects) { + check.Args(int64(0)). + Asserts(rbac.ResourceTemplate.All(), policy.ActionViewInsights). + ErrorsWithInMemDB(dbmem.ErrUnimplemented) + })) s.Run("GetPresetsBackoff", s.Subtest(func(_ database.Store, check *expects) { check.Args(time.Time{}). Asserts(rbac.ResourceTemplate.All(), policy.ActionViewInsights). @@ -4971,8 +4976,34 @@ func (s *MethodTestSuite) TestPrebuilds() { }, InvalidateAfterSecs: preset.InvalidateAfterSecs, OrganizationID: org.ID, + PrebuildStatus: database.PrebuildStatusHealthy, }) })) + s.Run("UpdatePresetPrebuildStatus", s.Subtest(func(db database.Store, check *expects) { + org := dbgen.Organization(s.T(), db, database.Organization{}) + user := dbgen.User(s.T(), db, database.User{}) + template := dbgen.Template(s.T(), db, database.Template{ + OrganizationID: org.ID, + CreatedBy: user.ID, + }) + templateVersion := dbgen.TemplateVersion(s.T(), db, database.TemplateVersion{ + TemplateID: uuid.NullUUID{ + UUID: template.ID, + Valid: true, + }, + OrganizationID: org.ID, + CreatedBy: user.ID, + }) + preset := dbgen.Preset(s.T(), db, database.InsertPresetParams{ + TemplateVersionID: templateVersion.ID, + }) + req := database.UpdatePresetPrebuildStatusParams{ + PresetID: preset.ID, + Status: database.PrebuildStatusHealthy, + } + check.Args(req). + Asserts(rbac.ResourceTemplate.WithID(template.ID).InOrg(org.ID), policy.ActionUpdate) + })) } func (s *MethodTestSuite) TestOAuth2ProviderApps() { diff --git a/coderd/database/dbmem/dbmem.go b/coderd/database/dbmem/dbmem.go index 75c56b9c2324d..1a1455d83045b 100644 --- a/coderd/database/dbmem/dbmem.go +++ b/coderd/database/dbmem/dbmem.go @@ -4287,6 +4287,7 @@ func (q *FakeQuerier) GetPresetByID(ctx context.Context, presetID uuid.UUID) (da CreatedAt: preset.CreatedAt, DesiredInstances: preset.DesiredInstances, InvalidateAfterSecs: preset.InvalidateAfterSecs, + PrebuildStatus: preset.PrebuildStatus, TemplateID: tv.TemplateID, OrganizationID: tv.OrganizationID, }, nil @@ -4352,6 +4353,10 @@ func (q *FakeQuerier) GetPresetParametersByTemplateVersionID(_ context.Context, return parameters, nil } +func (q *FakeQuerier) GetPresetsAtFailureLimit(ctx context.Context, hardLimit int64) ([]database.GetPresetsAtFailureLimitRow, error) { + return nil, ErrUnimplemented +} + func (*FakeQuerier) GetPresetsBackoff(_ context.Context, _ time.Time) ([]database.GetPresetsBackoffRow, error) { return nil, ErrUnimplemented } @@ -9089,6 +9094,7 @@ func (q *FakeQuerier) InsertPreset(_ context.Context, arg database.InsertPresetP Int32: 0, Valid: true, }, + PrebuildStatus: database.PrebuildStatusHealthy, } q.presets = append(q.presets, preset) return preset, nil @@ -10917,6 +10923,25 @@ func (q *FakeQuerier) UpdateOrganizationDeletedByID(_ context.Context, arg datab return sql.ErrNoRows } +func (q *FakeQuerier) UpdatePresetPrebuildStatus(ctx context.Context, arg database.UpdatePresetPrebuildStatusParams) error { + err := validateDatabaseType(arg) + if err != nil { + return err + } + + q.mutex.RLock() + defer q.mutex.RUnlock() + + for _, preset := range q.presets { + if preset.ID == arg.PresetID { + preset.PrebuildStatus = arg.Status + return nil + } + } + + return xerrors.Errorf("preset %v does not exist", arg.PresetID) +} + func (q *FakeQuerier) UpdateProvisionerDaemonLastSeenAt(_ context.Context, arg database.UpdateProvisionerDaemonLastSeenAtParams) error { err := validateDatabaseType(arg) if err != nil { diff --git a/coderd/database/dbmetrics/querymetrics.go b/coderd/database/dbmetrics/querymetrics.go index 47ec185915660..e35ec11b02453 100644 --- a/coderd/database/dbmetrics/querymetrics.go +++ b/coderd/database/dbmetrics/querymetrics.go @@ -1138,6 +1138,13 @@ func (m queryMetricsStore) GetPresetParametersByTemplateVersionID(ctx context.Co return r0, r1 } +func (m queryMetricsStore) GetPresetsAtFailureLimit(ctx context.Context, hardLimit int64) ([]database.GetPresetsAtFailureLimitRow, error) { + start := time.Now() + r0, r1 := m.s.GetPresetsAtFailureLimit(ctx, hardLimit) + m.queryLatencies.WithLabelValues("GetPresetsAtFailureLimit").Observe(time.Since(start).Seconds()) + return r0, r1 +} + func (m queryMetricsStore) GetPresetsBackoff(ctx context.Context, lookback time.Time) ([]database.GetPresetsBackoffRow, error) { start := time.Now() r0, r1 := m.s.GetPresetsBackoff(ctx, lookback) @@ -2692,6 +2699,13 @@ func (m queryMetricsStore) UpdateOrganizationDeletedByID(ctx context.Context, ar return r0 } +func (m queryMetricsStore) UpdatePresetPrebuildStatus(ctx context.Context, arg database.UpdatePresetPrebuildStatusParams) error { + start := time.Now() + r0 := m.s.UpdatePresetPrebuildStatus(ctx, arg) + m.queryLatencies.WithLabelValues("UpdatePresetPrebuildStatus").Observe(time.Since(start).Seconds()) + return r0 +} + func (m queryMetricsStore) UpdateProvisionerDaemonLastSeenAt(ctx context.Context, arg database.UpdateProvisionerDaemonLastSeenAtParams) error { start := time.Now() r0 := m.s.UpdateProvisionerDaemonLastSeenAt(ctx, arg) diff --git a/coderd/database/dbmock/dbmock.go b/coderd/database/dbmock/dbmock.go index e3a9a14698e42..7a1fc0c4b2a6f 100644 --- a/coderd/database/dbmock/dbmock.go +++ b/coderd/database/dbmock/dbmock.go @@ -2328,6 +2328,21 @@ func (mr *MockStoreMockRecorder) GetPresetParametersByTemplateVersionID(ctx, tem return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetPresetParametersByTemplateVersionID", reflect.TypeOf((*MockStore)(nil).GetPresetParametersByTemplateVersionID), ctx, templateVersionID) } +// GetPresetsAtFailureLimit mocks base method. +func (m *MockStore) GetPresetsAtFailureLimit(ctx context.Context, hardLimit int64) ([]database.GetPresetsAtFailureLimitRow, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetPresetsAtFailureLimit", ctx, hardLimit) + ret0, _ := ret[0].([]database.GetPresetsAtFailureLimitRow) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// GetPresetsAtFailureLimit indicates an expected call of GetPresetsAtFailureLimit. +func (mr *MockStoreMockRecorder) GetPresetsAtFailureLimit(ctx, hardLimit any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetPresetsAtFailureLimit", reflect.TypeOf((*MockStore)(nil).GetPresetsAtFailureLimit), ctx, hardLimit) +} + // GetPresetsBackoff mocks base method. func (m *MockStore) GetPresetsBackoff(ctx context.Context, lookback time.Time) ([]database.GetPresetsBackoffRow, error) { m.ctrl.T.Helper() @@ -5706,6 +5721,20 @@ func (mr *MockStoreMockRecorder) UpdateOrganizationDeletedByID(ctx, arg any) *go return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "UpdateOrganizationDeletedByID", reflect.TypeOf((*MockStore)(nil).UpdateOrganizationDeletedByID), ctx, arg) } +// UpdatePresetPrebuildStatus mocks base method. +func (m *MockStore) UpdatePresetPrebuildStatus(ctx context.Context, arg database.UpdatePresetPrebuildStatusParams) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "UpdatePresetPrebuildStatus", ctx, arg) + ret0, _ := ret[0].(error) + return ret0 +} + +// UpdatePresetPrebuildStatus indicates an expected call of UpdatePresetPrebuildStatus. +func (mr *MockStoreMockRecorder) UpdatePresetPrebuildStatus(ctx, arg any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "UpdatePresetPrebuildStatus", reflect.TypeOf((*MockStore)(nil).UpdatePresetPrebuildStatus), ctx, arg) +} + // UpdateProvisionerDaemonLastSeenAt mocks base method. func (m *MockStore) UpdateProvisionerDaemonLastSeenAt(ctx context.Context, arg database.UpdateProvisionerDaemonLastSeenAtParams) error { m.ctrl.T.Helper() diff --git a/coderd/database/dump.sql b/coderd/database/dump.sql index 2f23b3ad4ce78..ec196405df2d3 100644 --- a/coderd/database/dump.sql +++ b/coderd/database/dump.sql @@ -153,6 +153,12 @@ CREATE TYPE port_share_protocol AS ENUM ( 'https' ); +CREATE TYPE prebuild_status AS ENUM ( + 'healthy', + 'hard_limited', + 'validation_failed' +); + CREATE TYPE provisioner_daemon_status AS ENUM ( 'offline', 'idle', @@ -1439,7 +1445,8 @@ CREATE TABLE template_version_presets ( name text NOT NULL, created_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL, desired_instances integer, - invalidate_after_secs integer DEFAULT 0 + invalidate_after_secs integer DEFAULT 0, + prebuild_status prebuild_status DEFAULT 'healthy'::prebuild_status NOT NULL ); CREATE TABLE template_version_terraform_values ( diff --git a/coderd/database/migrations/000328_prebuild_failure_limit_notification.down.sql b/coderd/database/migrations/000328_prebuild_failure_limit_notification.down.sql new file mode 100644 index 0000000000000..40697c7bbc3d2 --- /dev/null +++ b/coderd/database/migrations/000328_prebuild_failure_limit_notification.down.sql @@ -0,0 +1 @@ +DELETE FROM notification_templates WHERE id = '414d9331-c1fc-4761-b40c-d1f4702279eb'; diff --git a/coderd/database/migrations/000328_prebuild_failure_limit_notification.up.sql b/coderd/database/migrations/000328_prebuild_failure_limit_notification.up.sql new file mode 100644 index 0000000000000..403bd667abd28 --- /dev/null +++ b/coderd/database/migrations/000328_prebuild_failure_limit_notification.up.sql @@ -0,0 +1,25 @@ +INSERT INTO notification_templates +(id, name, title_template, body_template, "group", actions) +VALUES ('414d9331-c1fc-4761-b40c-d1f4702279eb', + 'Prebuild Failure Limit Reached', + E'There is a problem creating prebuilt workspaces', + $$ +The number of failed prebuild attempts has reached the hard limit for template **{{ .Labels.template }}** and preset **{{ .Labels.preset }}**. + +To resume prebuilds, fix the underlying issue and upload a new template version. + +Refer to the documentation for more details: +- [Troubleshooting templates](https://coder.com/docs/admin/templates/troubleshooting) +- [Troubleshooting of prebuilt workspaces](https://coder.com/docs/admin/templates/extending-templates/prebuilt-workspaces#administration-and-troubleshooting) +$$, + 'Template Events', + '[ + { + "label": "View failed prebuilt workspaces", + "url": "{{base_url}}/workspaces?filter=owner:prebuilds+status:failed+template:{{.Labels.template}}" + }, + { + "label": "View template version", + "url": "{{base_url}}/templates/{{.Labels.org}}/{{.Labels.template}}/versions/{{.Labels.template_version}}" + } + ]'::jsonb); diff --git a/coderd/database/migrations/000329_add_status_to_template_presets.down.sql b/coderd/database/migrations/000329_add_status_to_template_presets.down.sql new file mode 100644 index 0000000000000..8fe04f99cae33 --- /dev/null +++ b/coderd/database/migrations/000329_add_status_to_template_presets.down.sql @@ -0,0 +1,5 @@ +-- Remove the column from the table first (must happen before dropping the enum type) +ALTER TABLE template_version_presets DROP COLUMN prebuild_status; + +-- Then drop the enum type +DROP TYPE prebuild_status; diff --git a/coderd/database/migrations/000329_add_status_to_template_presets.up.sql b/coderd/database/migrations/000329_add_status_to_template_presets.up.sql new file mode 100644 index 0000000000000..019a246f73a87 --- /dev/null +++ b/coderd/database/migrations/000329_add_status_to_template_presets.up.sql @@ -0,0 +1,7 @@ +CREATE TYPE prebuild_status AS ENUM ( + 'healthy', -- Prebuilds are working as expected; this is the default, healthy state. + 'hard_limited', -- Prebuilds have failed repeatedly and hit the configured hard failure limit; won't be retried anymore. + 'validation_failed' -- Prebuilds failed due to a non-retryable validation error (e.g. template misconfiguration); won't be retried. +); + +ALTER TABLE template_version_presets ADD COLUMN prebuild_status prebuild_status NOT NULL DEFAULT 'healthy'::prebuild_status; diff --git a/coderd/database/models.go b/coderd/database/models.go index ff49b8f471be0..d5047f6bbe65f 100644 --- a/coderd/database/models.go +++ b/coderd/database/models.go @@ -1343,6 +1343,67 @@ func AllPortShareProtocolValues() []PortShareProtocol { } } +type PrebuildStatus string + +const ( + PrebuildStatusHealthy PrebuildStatus = "healthy" + PrebuildStatusHardLimited PrebuildStatus = "hard_limited" + PrebuildStatusValidationFailed PrebuildStatus = "validation_failed" +) + +func (e *PrebuildStatus) Scan(src interface{}) error { + switch s := src.(type) { + case []byte: + *e = PrebuildStatus(s) + case string: + *e = PrebuildStatus(s) + default: + return fmt.Errorf("unsupported scan type for PrebuildStatus: %T", src) + } + return nil +} + +type NullPrebuildStatus struct { + PrebuildStatus PrebuildStatus `json:"prebuild_status"` + Valid bool `json:"valid"` // Valid is true if PrebuildStatus is not NULL +} + +// Scan implements the Scanner interface. +func (ns *NullPrebuildStatus) Scan(value interface{}) error { + if value == nil { + ns.PrebuildStatus, ns.Valid = "", false + return nil + } + ns.Valid = true + return ns.PrebuildStatus.Scan(value) +} + +// Value implements the driver Valuer interface. +func (ns NullPrebuildStatus) Value() (driver.Value, error) { + if !ns.Valid { + return nil, nil + } + return string(ns.PrebuildStatus), nil +} + +func (e PrebuildStatus) Valid() bool { + switch e { + case PrebuildStatusHealthy, + PrebuildStatusHardLimited, + PrebuildStatusValidationFailed: + return true + } + return false +} + +func AllPrebuildStatusValues() []PrebuildStatus { + return []PrebuildStatus{ + PrebuildStatusHealthy, + PrebuildStatusHardLimited, + PrebuildStatusValidationFailed, + } +} + // The status of a provisioner daemon. type ProvisionerDaemonStatus string @@ -3248,12 +3309,13 @@ type TemplateVersionParameter struct { } type TemplateVersionPreset struct { - ID uuid.UUID `db:"id" json:"id"` - TemplateVersionID uuid.UUID `db:"template_version_id" json:"template_version_id"` - Name string `db:"name" json:"name"` - CreatedAt time.Time `db:"created_at" json:"created_at"` - DesiredInstances sql.NullInt32 `db:"desired_instances" json:"desired_instances"` - InvalidateAfterSecs sql.NullInt32 `db:"invalidate_after_secs" json:"invalidate_after_secs"` + ID uuid.UUID `db:"id" json:"id"` + TemplateVersionID uuid.UUID `db:"template_version_id" json:"template_version_id"` + Name string `db:"name" json:"name"` + CreatedAt time.Time `db:"created_at" json:"created_at"` + DesiredInstances sql.NullInt32 `db:"desired_instances" json:"desired_instances"` + InvalidateAfterSecs sql.NullInt32 `db:"invalidate_after_secs" json:"invalidate_after_secs"` + PrebuildStatus PrebuildStatus `db:"prebuild_status" json:"prebuild_status"` } type TemplateVersionPresetParameter struct { diff --git a/coderd/database/querier.go b/coderd/database/querier.go index d248780397ead..ac7497b641a05 100644 --- a/coderd/database/querier.go +++ b/coderd/database/querier.go @@ -241,6 +241,15 @@ type sqlcQuerier interface { GetPresetByWorkspaceBuildID(ctx context.Context, workspaceBuildID uuid.UUID) (TemplateVersionPreset, error) GetPresetParametersByPresetID(ctx context.Context, presetID uuid.UUID) ([]TemplateVersionPresetParameter, error) GetPresetParametersByTemplateVersionID(ctx context.Context, templateVersionID uuid.UUID) ([]TemplateVersionPresetParameter, error) + // GetPresetsAtFailureLimit groups workspace builds by preset ID. + // Each preset is associated with exactly one template version ID. + // For each preset, the query checks the last hard_limit builds. + // If all of them failed, the preset is considered to have hit the hard failure limit. + // The query returns a list of preset IDs that have reached this failure threshold. + // Only active template versions with configured presets are considered. + // For each preset, check the last hard_limit builds. + // If all of them failed, the preset is considered to have hit the hard failure limit. + GetPresetsAtFailureLimit(ctx context.Context, hardLimit int64) ([]GetPresetsAtFailureLimitRow, error) // GetPresetsBackoff groups workspace builds by preset ID. // Each preset is associated with exactly one template version ID. // For each group, the query checks up to N of the most recent jobs that occurred within the @@ -568,6 +577,7 @@ type sqlcQuerier interface { UpdateOAuth2ProviderAppSecretByID(ctx context.Context, arg UpdateOAuth2ProviderAppSecretByIDParams) (OAuth2ProviderAppSecret, error) UpdateOrganization(ctx context.Context, arg UpdateOrganizationParams) (Organization, error) UpdateOrganizationDeletedByID(ctx context.Context, arg UpdateOrganizationDeletedByIDParams) error + UpdatePresetPrebuildStatus(ctx context.Context, arg UpdatePresetPrebuildStatusParams) error UpdateProvisionerDaemonLastSeenAt(ctx context.Context, arg UpdateProvisionerDaemonLastSeenAtParams) error UpdateProvisionerJobByID(ctx context.Context, arg UpdateProvisionerJobByIDParams) error UpdateProvisionerJobWithCancelByID(ctx context.Context, arg UpdateProvisionerJobWithCancelByIDParams) error diff --git a/coderd/database/querier_test.go b/coderd/database/querier_test.go index b2cc20c4894d5..5bafa58796b7a 100644 --- a/coderd/database/querier_test.go +++ b/coderd/database/querier_test.go @@ -4123,8 +4123,7 @@ func TestGetPresetsBackoff(t *testing.T) { }) tmpl1 := createTemplate(t, db, orgID, userID) - tmpl1V1 := createTmplVersionAndPreset(t, db, tmpl1, tmpl1.ActiveVersionID, now, nil) - _ = tmpl1V1 + createTmplVersionAndPreset(t, db, tmpl1, tmpl1.ActiveVersionID, now, nil) backoffs, err := db.GetPresetsBackoff(ctx, now.Add(-time.Hour)) require.NoError(t, err) @@ -4401,6 +4400,311 @@ func TestGetPresetsBackoff(t *testing.T) { }) } +func TestGetPresetsAtFailureLimit(t *testing.T) { + t.Parallel() + if !dbtestutil.WillUsePostgres() { + t.SkipNow() + } + + now := dbtime.Now() + hourBefore := now.Add(-time.Hour) + orgID := uuid.New() + userID := uuid.New() + + findPresetByTmplVersionID := func(hardLimitedPresets []database.GetPresetsAtFailureLimitRow, tmplVersionID uuid.UUID) *database.GetPresetsAtFailureLimitRow { + for _, preset := range hardLimitedPresets { + if preset.TemplateVersionID == tmplVersionID { + return &preset + } + } + + return nil + } + + testCases := []struct { + name string + // true - build is successful + // false - build is unsuccessful + buildSuccesses []bool + hardLimit int64 + expHitHardLimit bool + }{ + { + name: "failed build", + buildSuccesses: []bool{false}, + hardLimit: 1, + expHitHardLimit: true, + }, + { + name: "2 failed builds", + buildSuccesses: []bool{false, false}, + hardLimit: 1, + expHitHardLimit: true, + }, + { + name: "successful build", + buildSuccesses: []bool{true}, + hardLimit: 1, + expHitHardLimit: false, + }, + { + name: "last build is failed", + buildSuccesses: []bool{true, true, false}, + hardLimit: 1, + expHitHardLimit: true, + }, + { + name: "last build is successful", + buildSuccesses: []bool{false, false, true}, + hardLimit: 1, + expHitHardLimit: false, + }, + { + name: "last 3 builds are failed - hard limit is reached", + buildSuccesses: []bool{true, true, false, false, false}, + hardLimit: 3, + expHitHardLimit: true, + }, + { + name: "1 out of 3 last build is successful - hard limit is NOT reached", + buildSuccesses: []bool{false, false, true, false, false}, + hardLimit: 3, + expHitHardLimit: false, + }, + // hardLimit set to zero, implicitly disables the hard limit. + { + name: "despite 5 failed builds, the hard limit is not reached because it's disabled.", + buildSuccesses: []bool{false, false, false, false, false}, + hardLimit: 0, + expHitHardLimit: false, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + db, _ := dbtestutil.NewDB(t) + ctx := testutil.Context(t, testutil.WaitShort) + dbgen.Organization(t, db, database.Organization{ + ID: orgID, + }) + dbgen.User(t, db, database.User{ + ID: userID, + }) + + tmpl := createTemplate(t, db, orgID, userID) + tmplV1 := createTmplVersionAndPreset(t, db, tmpl, tmpl.ActiveVersionID, now, nil) + for idx, buildSuccess := range tc.buildSuccesses { + createPrebuiltWorkspace(ctx, t, db, tmpl, tmplV1, orgID, now, &createPrebuiltWorkspaceOpts{ + failedJob: !buildSuccess, + createdAt: hourBefore.Add(time.Duration(idx) * time.Second), + }) + } + + hardLimitedPresets, err := db.GetPresetsAtFailureLimit(ctx, tc.hardLimit) + require.NoError(t, err) + + if !tc.expHitHardLimit { + require.Len(t, hardLimitedPresets, 0) + return + } + + require.Len(t, hardLimitedPresets, 1) + hardLimitedPreset := hardLimitedPresets[0] + require.Equal(t, hardLimitedPreset.TemplateVersionID, tmpl.ActiveVersionID) + require.Equal(t, hardLimitedPreset.PresetID, tmplV1.preset.ID) + }) + } + + t.Run("Ignore Inactive Version", func(t *testing.T) { + t.Parallel() + + db, _ := dbtestutil.NewDB(t) + ctx := testutil.Context(t, testutil.WaitShort) + dbgen.Organization(t, db, database.Organization{ + ID: orgID, + }) + dbgen.User(t, db, database.User{ + ID: userID, + }) + + tmpl := createTemplate(t, db, orgID, userID) + tmplV1 := createTmplVersionAndPreset(t, db, tmpl, uuid.New(), now, nil) + createPrebuiltWorkspace(ctx, t, db, tmpl, tmplV1, orgID, now, &createPrebuiltWorkspaceOpts{ + failedJob: true, + }) + + // Active Version + tmplV2 := createTmplVersionAndPreset(t, db, tmpl, tmpl.ActiveVersionID, now, nil) + createPrebuiltWorkspace(ctx, t, db, tmpl, tmplV2, orgID, now, &createPrebuiltWorkspaceOpts{ + failedJob: true, + }) + createPrebuiltWorkspace(ctx, t, db, tmpl, tmplV2, orgID, now, &createPrebuiltWorkspaceOpts{ + failedJob: true, + }) + + hardLimitedPresets, err := db.GetPresetsAtFailureLimit(ctx, 1) + require.NoError(t, err) + + require.Len(t, hardLimitedPresets, 1) + hardLimitedPreset := hardLimitedPresets[0] + require.Equal(t, hardLimitedPreset.TemplateVersionID, tmpl.ActiveVersionID) + require.Equal(t, hardLimitedPreset.PresetID, tmplV2.preset.ID) + }) + + t.Run("Multiple Templates", func(t *testing.T) { + t.Parallel() + + db, _ := dbtestutil.NewDB(t) + ctx := testutil.Context(t, testutil.WaitShort) + dbgen.Organization(t, db, database.Organization{ + ID: orgID, + }) + dbgen.User(t, db, database.User{ + ID: userID, + }) + + tmpl1 := createTemplate(t, db, orgID, userID) + tmpl1V1 := createTmplVersionAndPreset(t, db, tmpl1, tmpl1.ActiveVersionID, now, nil) + createPrebuiltWorkspace(ctx, t, db, tmpl1, tmpl1V1, orgID, now, &createPrebuiltWorkspaceOpts{ + failedJob: true, + }) + + tmpl2 := createTemplate(t, db, orgID, userID) + tmpl2V1 := createTmplVersionAndPreset(t, db, tmpl2, tmpl2.ActiveVersionID, now, nil) + createPrebuiltWorkspace(ctx, t, db, tmpl2, tmpl2V1, orgID, now, &createPrebuiltWorkspaceOpts{ + failedJob: true, + }) + + hardLimitedPresets, err := db.GetPresetsAtFailureLimit(ctx, 1) + + require.NoError(t, err) + + require.Len(t, hardLimitedPresets, 2) + { + hardLimitedPreset := findPresetByTmplVersionID(hardLimitedPresets, tmpl1.ActiveVersionID) + require.Equal(t, hardLimitedPreset.TemplateVersionID, tmpl1.ActiveVersionID) + require.Equal(t, hardLimitedPreset.PresetID, tmpl1V1.preset.ID) + } + { + hardLimitedPreset := findPresetByTmplVersionID(hardLimitedPresets, tmpl2.ActiveVersionID) + require.Equal(t, hardLimitedPreset.TemplateVersionID, tmpl2.ActiveVersionID) + require.Equal(t, hardLimitedPreset.PresetID, tmpl2V1.preset.ID) + } + }) + + t.Run("Multiple Templates, Versions and Workspace Builds", func(t *testing.T) { + t.Parallel() + + db, _ := dbtestutil.NewDB(t) + ctx := testutil.Context(t, testutil.WaitShort) + dbgen.Organization(t, db, database.Organization{ + ID: orgID, + }) + dbgen.User(t, db, database.User{ + ID: userID, + }) + + tmpl1 := createTemplate(t, db, orgID, userID) + tmpl1V1 := createTmplVersionAndPreset(t, db, tmpl1, tmpl1.ActiveVersionID, now, nil) + createPrebuiltWorkspace(ctx, t, db, tmpl1, tmpl1V1, orgID, now, &createPrebuiltWorkspaceOpts{ + failedJob: true, + }) + createPrebuiltWorkspace(ctx, t, db, tmpl1, tmpl1V1, orgID, now, &createPrebuiltWorkspaceOpts{ + failedJob: true, + }) + + tmpl2 := createTemplate(t, db, orgID, userID) + tmpl2V1 := createTmplVersionAndPreset(t, db, tmpl2, tmpl2.ActiveVersionID, now, nil) + createPrebuiltWorkspace(ctx, t, db, tmpl2, tmpl2V1, orgID, now, &createPrebuiltWorkspaceOpts{ + failedJob: true, + }) + createPrebuiltWorkspace(ctx, t, db, tmpl2, tmpl2V1, orgID, now, &createPrebuiltWorkspaceOpts{ + failedJob: true, + }) + + tmpl3 := createTemplate(t, db, orgID, userID) + tmpl3V1 := createTmplVersionAndPreset(t, db, tmpl3, uuid.New(), now, nil) + createPrebuiltWorkspace(ctx, t, db, tmpl3, tmpl3V1, orgID, now, &createPrebuiltWorkspaceOpts{ + failedJob: true, + }) + + tmpl3V2 := createTmplVersionAndPreset(t, db, tmpl3, tmpl3.ActiveVersionID, now, nil) + createPrebuiltWorkspace(ctx, t, db, tmpl3, tmpl3V2, orgID, now, &createPrebuiltWorkspaceOpts{ + failedJob: true, + }) + createPrebuiltWorkspace(ctx, t, db, tmpl3, tmpl3V2, orgID, now, &createPrebuiltWorkspaceOpts{ + failedJob: true, + }) + + hardLimit := int64(2) + hardLimitedPresets, err := db.GetPresetsAtFailureLimit(ctx, hardLimit) + require.NoError(t, err) + + require.Len(t, hardLimitedPresets, 3) + { + hardLimitedPreset := findPresetByTmplVersionID(hardLimitedPresets, tmpl1.ActiveVersionID) + require.Equal(t, hardLimitedPreset.TemplateVersionID, tmpl1.ActiveVersionID) + require.Equal(t, hardLimitedPreset.PresetID, tmpl1V1.preset.ID) + } + { + hardLimitedPreset := findPresetByTmplVersionID(hardLimitedPresets, tmpl2.ActiveVersionID) + require.Equal(t, hardLimitedPreset.TemplateVersionID, tmpl2.ActiveVersionID) + require.Equal(t, hardLimitedPreset.PresetID, tmpl2V1.preset.ID) + } + { + hardLimitedPreset := findPresetByTmplVersionID(hardLimitedPresets, tmpl3.ActiveVersionID) + require.Equal(t, hardLimitedPreset.TemplateVersionID, tmpl3.ActiveVersionID) + require.Equal(t, hardLimitedPreset.PresetID, tmpl3V2.preset.ID) + } + }) + + t.Run("No Workspace Builds", func(t *testing.T) { + t.Parallel() + + db, _ := dbtestutil.NewDB(t) + ctx := testutil.Context(t, testutil.WaitShort) + dbgen.Organization(t, db, database.Organization{ + ID: orgID, + }) + dbgen.User(t, db, database.User{ + ID: userID, + }) + + tmpl1 := createTemplate(t, db, orgID, userID) + createTmplVersionAndPreset(t, db, tmpl1, tmpl1.ActiveVersionID, now, nil) + + hardLimitedPresets, err := db.GetPresetsAtFailureLimit(ctx, 1) + require.NoError(t, err) + require.Nil(t, hardLimitedPresets) + }) + + t.Run("No Failed Workspace Builds", func(t *testing.T) { + t.Parallel() + + db, _ := dbtestutil.NewDB(t) + ctx := testutil.Context(t, testutil.WaitShort) + dbgen.Organization(t, db, database.Organization{ + ID: orgID, + }) + dbgen.User(t, db, database.User{ + ID: userID, + }) + + tmpl1 := createTemplate(t, db, orgID, userID) + tmpl1V1 := createTmplVersionAndPreset(t, db, tmpl1, tmpl1.ActiveVersionID, now, nil) + successfulJobOpts := createPrebuiltWorkspaceOpts{} + createPrebuiltWorkspace(ctx, t, db, tmpl1, tmpl1V1, orgID, now, &successfulJobOpts) + createPrebuiltWorkspace(ctx, t, db, tmpl1, tmpl1V1, orgID, now, &successfulJobOpts) + createPrebuiltWorkspace(ctx, t, db, tmpl1, tmpl1V1, orgID, now, &successfulJobOpts) + + hardLimitedPresets, err := db.GetPresetsAtFailureLimit(ctx, 1) + require.NoError(t, err) + require.Nil(t, hardLimitedPresets) + }) +} + func requireUsersMatch(t testing.TB, expected []database.User, found []database.GetUsersRow, msg string) { t.Helper() require.ElementsMatch(t, expected, database.ConvertUserRows(found), msg) diff --git a/coderd/database/queries.sql.go b/coderd/database/queries.sql.go index 99a8bf4603b57..ffd8ccb035206 100644 --- a/coderd/database/queries.sql.go +++ b/coderd/database/queries.sql.go @@ -6288,6 +6288,71 @@ func (q *sqlQuerier) GetPrebuildMetrics(ctx context.Context) ([]GetPrebuildMetri return items, nil } +const getPresetsAtFailureLimit = `-- name: GetPresetsAtFailureLimit :many +WITH filtered_builds AS ( + -- Only select builds which are for prebuild creations + SELECT wlb.template_version_id, wlb.created_at, tvp.id AS preset_id, wlb.job_status, tvp.desired_instances + FROM template_version_presets tvp + INNER JOIN workspace_latest_builds wlb ON wlb.template_version_preset_id = tvp.id + INNER JOIN workspaces w ON wlb.workspace_id = w.id + INNER JOIN template_versions tv ON wlb.template_version_id = tv.id + INNER JOIN templates t ON tv.template_id = t.id AND t.active_version_id = tv.id + WHERE tvp.desired_instances IS NOT NULL -- Consider only presets that have a prebuild configuration. + AND wlb.transition = 'start'::workspace_transition + AND w.owner_id = 'c42fdf75-3097-471c-8c33-fb52454d81c0' +), +time_sorted_builds AS ( + -- Group builds by preset, then sort each group by created_at. + SELECT fb.template_version_id, fb.created_at, fb.preset_id, fb.job_status, fb.desired_instances, + ROW_NUMBER() OVER (PARTITION BY fb.preset_id ORDER BY fb.created_at DESC) as rn + FROM filtered_builds fb +) +SELECT + tsb.template_version_id, + tsb.preset_id +FROM time_sorted_builds tsb +WHERE tsb.rn <= $1::bigint + AND tsb.job_status = 'failed'::provisioner_job_status +GROUP BY tsb.template_version_id, tsb.preset_id +HAVING COUNT(*) = $1::bigint +` + +type GetPresetsAtFailureLimitRow struct { + TemplateVersionID uuid.UUID `db:"template_version_id" json:"template_version_id"` + PresetID uuid.UUID `db:"preset_id" json:"preset_id"` +} + +// GetPresetsAtFailureLimit groups workspace builds by preset ID. +// Each preset is associated with exactly one template version ID. +// For each preset, the query checks the last hard_limit builds. +// If all of them failed, the preset is considered to have hit the hard failure limit. +// The query returns a list of preset IDs that have reached this failure threshold. +// Only active template versions with configured presets are considered. +// For each preset, check the last hard_limit builds. +// If all of them failed, the preset is considered to have hit the hard failure limit. +func (q *sqlQuerier) GetPresetsAtFailureLimit(ctx context.Context, hardLimit int64) ([]GetPresetsAtFailureLimitRow, error) { + rows, err := q.db.QueryContext(ctx, getPresetsAtFailureLimit, hardLimit) + if err != nil { + return nil, err + } + defer rows.Close() + var items []GetPresetsAtFailureLimitRow + for rows.Next() { + var i GetPresetsAtFailureLimitRow + if err := rows.Scan(&i.TemplateVersionID, &i.PresetID); err != nil { + return nil, err + } + items = append(items, i) + } + if err := rows.Close(); err != nil { + return nil, err + } + if err := rows.Err(); err != nil { + return nil, err + } + return items, nil +} + const getPresetsBackoff = `-- name: GetPresetsBackoff :many WITH filtered_builds AS ( -- Only select builds which are for prebuild creations @@ -6438,6 +6503,7 @@ const getTemplatePresetsWithPrebuilds = `-- name: GetTemplatePresetsWithPrebuild SELECT t.id AS template_id, t.name AS template_name, + o.id AS organization_id, o.name AS organization_name, tv.id AS template_version_id, tv.name AS template_version_name, @@ -6445,6 +6511,7 @@ SELECT tvp.id, tvp.name, tvp.desired_instances AS desired_instances, + tvp.prebuild_status, t.deleted, t.deprecated != '' AS deprecated FROM templates t @@ -6457,17 +6524,19 @@ WHERE tvp.desired_instances IS NOT NULL -- Consider only presets that have a pre ` type GetTemplatePresetsWithPrebuildsRow struct { - TemplateID uuid.UUID `db:"template_id" json:"template_id"` - TemplateName string `db:"template_name" json:"template_name"` - OrganizationName string `db:"organization_name" json:"organization_name"` - TemplateVersionID uuid.UUID `db:"template_version_id" json:"template_version_id"` - TemplateVersionName string `db:"template_version_name" json:"template_version_name"` - UsingActiveVersion bool `db:"using_active_version" json:"using_active_version"` - ID uuid.UUID `db:"id" json:"id"` - Name string `db:"name" json:"name"` - DesiredInstances sql.NullInt32 `db:"desired_instances" json:"desired_instances"` - Deleted bool `db:"deleted" json:"deleted"` - Deprecated bool `db:"deprecated" json:"deprecated"` + TemplateID uuid.UUID `db:"template_id" json:"template_id"` + TemplateName string `db:"template_name" json:"template_name"` + OrganizationID uuid.UUID `db:"organization_id" json:"organization_id"` + OrganizationName string `db:"organization_name" json:"organization_name"` + TemplateVersionID uuid.UUID `db:"template_version_id" json:"template_version_id"` + TemplateVersionName string `db:"template_version_name" json:"template_version_name"` + UsingActiveVersion bool `db:"using_active_version" json:"using_active_version"` + ID uuid.UUID `db:"id" json:"id"` + Name string `db:"name" json:"name"` + DesiredInstances sql.NullInt32 `db:"desired_instances" json:"desired_instances"` + PrebuildStatus PrebuildStatus `db:"prebuild_status" json:"prebuild_status"` + Deleted bool `db:"deleted" json:"deleted"` + Deprecated bool `db:"deprecated" json:"deprecated"` } // GetTemplatePresetsWithPrebuilds retrieves template versions with configured presets and prebuilds. @@ -6485,6 +6554,7 @@ func (q *sqlQuerier) GetTemplatePresetsWithPrebuilds(ctx context.Context, templa if err := rows.Scan( &i.TemplateID, &i.TemplateName, + &i.OrganizationID, &i.OrganizationName, &i.TemplateVersionID, &i.TemplateVersionName, @@ -6492,6 +6562,7 @@ func (q *sqlQuerier) GetTemplatePresetsWithPrebuilds(ctx context.Context, templa &i.ID, &i.Name, &i.DesiredInstances, + &i.PrebuildStatus, &i.Deleted, &i.Deprecated, ); err != nil { @@ -6509,21 +6580,22 @@ func (q *sqlQuerier) GetTemplatePresetsWithPrebuilds(ctx context.Context, templa } const getPresetByID = `-- name: GetPresetByID :one -SELECT tvp.id, tvp.template_version_id, tvp.name, tvp.created_at, tvp.desired_instances, tvp.invalidate_after_secs, tv.template_id, tv.organization_id FROM +SELECT tvp.id, tvp.template_version_id, tvp.name, tvp.created_at, tvp.desired_instances, tvp.invalidate_after_secs, tvp.prebuild_status, tv.template_id, tv.organization_id FROM template_version_presets tvp INNER JOIN template_versions tv ON tvp.template_version_id = tv.id WHERE tvp.id = $1 ` type GetPresetByIDRow struct { - ID uuid.UUID `db:"id" json:"id"` - TemplateVersionID uuid.UUID `db:"template_version_id" json:"template_version_id"` - Name string `db:"name" json:"name"` - CreatedAt time.Time `db:"created_at" json:"created_at"` - DesiredInstances sql.NullInt32 `db:"desired_instances" json:"desired_instances"` - InvalidateAfterSecs sql.NullInt32 `db:"invalidate_after_secs" json:"invalidate_after_secs"` - TemplateID uuid.NullUUID `db:"template_id" json:"template_id"` - OrganizationID uuid.UUID `db:"organization_id" json:"organization_id"` + ID uuid.UUID `db:"id" json:"id"` + TemplateVersionID uuid.UUID `db:"template_version_id" json:"template_version_id"` + Name string `db:"name" json:"name"` + CreatedAt time.Time `db:"created_at" json:"created_at"` + DesiredInstances sql.NullInt32 `db:"desired_instances" json:"desired_instances"` + InvalidateAfterSecs sql.NullInt32 `db:"invalidate_after_secs" json:"invalidate_after_secs"` + PrebuildStatus PrebuildStatus `db:"prebuild_status" json:"prebuild_status"` + TemplateID uuid.NullUUID `db:"template_id" json:"template_id"` + OrganizationID uuid.UUID `db:"organization_id" json:"organization_id"` } func (q *sqlQuerier) GetPresetByID(ctx context.Context, presetID uuid.UUID) (GetPresetByIDRow, error) { @@ -6536,6 +6608,7 @@ func (q *sqlQuerier) GetPresetByID(ctx context.Context, presetID uuid.UUID) (Get &i.CreatedAt, &i.DesiredInstances, &i.InvalidateAfterSecs, + &i.PrebuildStatus, &i.TemplateID, &i.OrganizationID, ) @@ -6544,7 +6617,7 @@ func (q *sqlQuerier) GetPresetByID(ctx context.Context, presetID uuid.UUID) (Get const getPresetByWorkspaceBuildID = `-- name: GetPresetByWorkspaceBuildID :one SELECT - template_version_presets.id, template_version_presets.template_version_id, template_version_presets.name, template_version_presets.created_at, template_version_presets.desired_instances, template_version_presets.invalidate_after_secs + template_version_presets.id, template_version_presets.template_version_id, template_version_presets.name, template_version_presets.created_at, template_version_presets.desired_instances, template_version_presets.invalidate_after_secs, template_version_presets.prebuild_status FROM template_version_presets INNER JOIN workspace_builds ON workspace_builds.template_version_preset_id = template_version_presets.id @@ -6562,6 +6635,7 @@ func (q *sqlQuerier) GetPresetByWorkspaceBuildID(ctx context.Context, workspaceB &i.CreatedAt, &i.DesiredInstances, &i.InvalidateAfterSecs, + &i.PrebuildStatus, ) return i, err } @@ -6643,7 +6717,7 @@ func (q *sqlQuerier) GetPresetParametersByTemplateVersionID(ctx context.Context, const getPresetsByTemplateVersionID = `-- name: GetPresetsByTemplateVersionID :many SELECT - id, template_version_id, name, created_at, desired_instances, invalidate_after_secs + id, template_version_id, name, created_at, desired_instances, invalidate_after_secs, prebuild_status FROM template_version_presets WHERE @@ -6666,6 +6740,7 @@ func (q *sqlQuerier) GetPresetsByTemplateVersionID(ctx context.Context, template &i.CreatedAt, &i.DesiredInstances, &i.InvalidateAfterSecs, + &i.PrebuildStatus, ); err != nil { return nil, err } @@ -6696,7 +6771,7 @@ VALUES ( $4, $5, $6 -) RETURNING id, template_version_id, name, created_at, desired_instances, invalidate_after_secs +) RETURNING id, template_version_id, name, created_at, desired_instances, invalidate_after_secs, prebuild_status ` type InsertPresetParams struct { @@ -6725,6 +6800,7 @@ func (q *sqlQuerier) InsertPreset(ctx context.Context, arg InsertPresetParams) ( &i.CreatedAt, &i.DesiredInstances, &i.InvalidateAfterSecs, + &i.PrebuildStatus, ) return i, err } @@ -6773,6 +6849,22 @@ func (q *sqlQuerier) InsertPresetParameters(ctx context.Context, arg InsertPrese return items, nil } +const updatePresetPrebuildStatus = `-- name: UpdatePresetPrebuildStatus :exec +UPDATE template_version_presets +SET prebuild_status = $1 +WHERE id = $2 +` + +type UpdatePresetPrebuildStatusParams struct { + Status PrebuildStatus `db:"status" json:"status"` + PresetID uuid.UUID `db:"preset_id" json:"preset_id"` +} + +func (q *sqlQuerier) UpdatePresetPrebuildStatus(ctx context.Context, arg UpdatePresetPrebuildStatusParams) error { + _, err := q.db.ExecContext(ctx, updatePresetPrebuildStatus, arg.Status, arg.PresetID) + return err +} + const deleteOldProvisionerDaemons = `-- name: DeleteOldProvisionerDaemons :exec DELETE FROM provisioner_daemons WHERE ( (created_at < (NOW() - INTERVAL '7 days') AND last_seen_at IS NULL) OR diff --git a/coderd/database/queries/prebuilds.sql b/coderd/database/queries/prebuilds.sql index 8c27ddf62b7c3..9cd4321afec23 100644 --- a/coderd/database/queries/prebuilds.sql +++ b/coderd/database/queries/prebuilds.sql @@ -27,6 +27,7 @@ RETURNING w.id, w.name; SELECT t.id AS template_id, t.name AS template_name, + o.id AS organization_id, o.name AS organization_name, tv.id AS template_version_id, tv.name AS template_version_name, @@ -34,6 +35,7 @@ SELECT tvp.id, tvp.name, tvp.desired_instances AS desired_instances, + tvp.prebuild_status, t.deleted, t.deprecated != '' AS deprecated FROM templates t @@ -129,6 +131,42 @@ WHERE tsb.rn <= tsb.desired_instances -- Fetch the last N builds, where N is the AND created_at >= @lookback::timestamptz GROUP BY tsb.template_version_id, tsb.preset_id, fc.num_failed; +-- GetPresetsAtFailureLimit groups workspace builds by preset ID. +-- Each preset is associated with exactly one template version ID. +-- For each preset, the query checks the last hard_limit builds. +-- If all of them failed, the preset is considered to have hit the hard failure limit. +-- The query returns a list of preset IDs that have reached this failure threshold. +-- Only active template versions with configured presets are considered. +-- name: GetPresetsAtFailureLimit :many +WITH filtered_builds AS ( + -- Only select builds which are for prebuild creations + SELECT wlb.template_version_id, wlb.created_at, tvp.id AS preset_id, wlb.job_status, tvp.desired_instances + FROM template_version_presets tvp + INNER JOIN workspace_latest_builds wlb ON wlb.template_version_preset_id = tvp.id + INNER JOIN workspaces w ON wlb.workspace_id = w.id + INNER JOIN template_versions tv ON wlb.template_version_id = tv.id + INNER JOIN templates t ON tv.template_id = t.id AND t.active_version_id = tv.id + WHERE tvp.desired_instances IS NOT NULL -- Consider only presets that have a prebuild configuration. + AND wlb.transition = 'start'::workspace_transition + AND w.owner_id = 'c42fdf75-3097-471c-8c33-fb52454d81c0' +), +time_sorted_builds AS ( + -- Group builds by preset, then sort each group by created_at. + SELECT fb.template_version_id, fb.created_at, fb.preset_id, fb.job_status, fb.desired_instances, + ROW_NUMBER() OVER (PARTITION BY fb.preset_id ORDER BY fb.created_at DESC) as rn + FROM filtered_builds fb +) +SELECT + tsb.template_version_id, + tsb.preset_id +FROM time_sorted_builds tsb +-- For each preset, check the last hard_limit builds. +-- If all of them failed, the preset is considered to have hit the hard failure limit. +WHERE tsb.rn <= @hard_limit::bigint + AND tsb.job_status = 'failed'::provisioner_job_status +GROUP BY tsb.template_version_id, tsb.preset_id +HAVING COUNT(*) = @hard_limit::bigint; + -- name: GetPrebuildMetrics :many SELECT t.name as template_name, diff --git a/coderd/database/queries/presets.sql b/coderd/database/queries/presets.sql index 6d5646a285b4a..2fb6722bc2c33 100644 --- a/coderd/database/queries/presets.sql +++ b/coderd/database/queries/presets.sql @@ -25,6 +25,11 @@ SELECT unnest(@values :: TEXT[]) RETURNING *; +-- name: UpdatePresetPrebuildStatus :exec +UPDATE template_version_presets +SET prebuild_status = @status +WHERE id = @preset_id; + -- name: GetPresetsByTemplateVersionID :many SELECT * diff --git a/coderd/notifications/events.go b/coderd/notifications/events.go index 35d9925055da5..0e88361b56f68 100644 --- a/coderd/notifications/events.go +++ b/coderd/notifications/events.go @@ -42,6 +42,11 @@ var ( TemplateWorkspaceResourceReplaced = uuid.MustParse("89d9745a-816e-4695-a17f-3d0a229e2b8d") ) +// Prebuilds-related events +var ( + PrebuildFailureLimitReached = uuid.MustParse("414d9331-c1fc-4761-b40c-d1f4702279eb") +) + // Notification-related events. var ( TemplateTestNotification = uuid.MustParse("c425f63e-716a-4bf4-ae24-78348f706c3f") diff --git a/coderd/notifications/notifications_test.go b/coderd/notifications/notifications_test.go index 8f8a3c82441e0..fab87af41deb9 100644 --- a/coderd/notifications/notifications_test.go +++ b/coderd/notifications/notifications_test.go @@ -1250,6 +1250,22 @@ func TestNotificationTemplates_Golden(t *testing.T) { }, }, }, + { + name: "PrebuildFailureLimitReached", + id: notifications.PrebuildFailureLimitReached, + payload: types.MessagePayload{ + UserName: "Bobby", + UserEmail: "bobby@coder.com", + UserUsername: "bobby", + Labels: map[string]string{ + "org": "cern", + "template": "docker", + "template_version": "angry_torvalds", + "preset": "particle-accelerator", + }, + Data: map[string]any{}, + }, + }, } // We must have a test case for every notification_template. This is enforced below: diff --git a/coderd/notifications/testdata/rendered-templates/smtp/PrebuildFailureLimitReached.html.golden b/coderd/notifications/testdata/rendered-templates/smtp/PrebuildFailureLimitReached.html.golden new file mode 100644 index 0000000000000..69f13b86ca71c --- /dev/null +++ b/coderd/notifications/testdata/rendered-templates/smtp/PrebuildFailureLimitReached.html.golden @@ -0,0 +1,112 @@ +From: system@coder.com +To: bobby@coder.com +Subject: There is a problem creating prebuilt workspaces +Message-Id: 02ee4935-73be-4fa1-a290-ff9999026b13@blush-whale-48 +Date: Fri, 11 Oct 2024 09:03:06 +0000 +Content-Type: multipart/alternative; boundary=bbe61b741255b6098bb6b3c1f41b885773df633cb18d2a3002b68e4bc9c4 +MIME-Version: 1.0 + +--bbe61b741255b6098bb6b3c1f41b885773df633cb18d2a3002b68e4bc9c4 +Content-Transfer-Encoding: quoted-printable +Content-Type: text/plain; charset=UTF-8 + +Hi Bobby, + +The number of failed prebuild attempts has reached the hard limit for templ= +ate docker and preset particle-accelerator. + +To resume prebuilds, fix the underlying issue and upload a new template ver= +sion. + +Refer to the documentation for more details: + +Troubleshooting templates (https://coder.com/docs/admin/templates/troublesh= +ooting) +Troubleshooting of prebuilt workspaces (https://coder.com/docs/admin/templa= +tes/extending-templates/prebuilt-workspaces#administration-and-troubleshoot= +ing) + + +View failed prebuilt workspaces: http://test.com/workspaces?filter=3Downer:= +prebuilds+status:failed+template:docker + +View template version: http://test.com/templates/cern/docker/versions/angry= +_torvalds + +--bbe61b741255b6098bb6b3c1f41b885773df633cb18d2a3002b68e4bc9c4 +Content-Transfer-Encoding: quoted-printable +Content-Type: text/html; charset=UTF-8 + + + +
+ + +Hi Bobby,
+The number of failed prebuild attempts has reached the hard limi= +t for template docker and preset particle-accelera= +tor.
+ +To resume prebuilds, fix the underlying issue and upload a new template = +version.
+ +Refer to the documentation for more details:
+- Troubl=
+eshooting templates
+- Troubleshooting of pre=
+built workspaces
© 2024 Coder. All rights reserved - h= +ttp://test.com
+ + +