From 9d3ea6f77f7f9fcc7eaaaedea6d630f6c4ae41d1 Mon Sep 17 00:00:00 2001 From: Cian Johnston Date: Thu, 19 Jun 2025 22:41:09 +0100 Subject: [PATCH 1/7] fix: wsbuilder: complete job and mark workspace deleted if no provisioners available --- cli/delete_test.go | 26 +++-- coderd/database/dbmem/dbmem.go | 2 +- coderd/workspacebuilds.go | 8 +- coderd/workspacebuilds_test.go | 157 ++++++++++++++++++++++----- coderd/wsbuilder/wsbuilder.go | 29 +++++ coderd/wsbuilder/wsbuilder_test.go | 167 +++++++++++++++++++++++++++++ 6 files changed, 346 insertions(+), 43 deletions(-) diff --git a/cli/delete_test.go b/cli/delete_test.go index 1d4dc8dfb40ad..bde50a13158cd 100644 --- a/cli/delete_test.go +++ b/cli/delete_test.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "io" + "net/http" "testing" "github.com/stretchr/testify/assert" @@ -51,28 +52,35 @@ func TestDelete(t *testing.T) { t.Parallel() client := coderdtest.New(t, &coderdtest.Options{IncludeProvisionerDaemon: true}) owner := coderdtest.CreateFirstUser(t, client) - version := coderdtest.CreateTemplateVersion(t, client, owner.OrganizationID, nil) - coderdtest.AwaitTemplateVersionJobCompleted(t, client, version.ID) - template := coderdtest.CreateTemplate(t, client, owner.OrganizationID, version.ID) - workspace := coderdtest.CreateWorkspace(t, client, template.ID) - coderdtest.AwaitWorkspaceBuildJobCompleted(t, client, workspace.LatestBuild.ID) + templateAdmin, _ := coderdtest.CreateAnotherUser(t, client, owner.OrganizationID, rbac.RoleTemplateAdmin()) + version := coderdtest.CreateTemplateVersion(t, templateAdmin, owner.OrganizationID, nil) + coderdtest.AwaitTemplateVersionJobCompleted(t, templateAdmin, version.ID) + template := coderdtest.CreateTemplate(t, templateAdmin, owner.OrganizationID, version.ID) + workspace := coderdtest.CreateWorkspace(t, templateAdmin, template.ID) + coderdtest.AwaitWorkspaceBuildJobCompleted(t, templateAdmin, workspace.LatestBuild.ID) + + ctx := testutil.Context(t, testutil.WaitShort) inv, root := clitest.New(t, "delete", workspace.Name, "-y", "--orphan") + clitest.SetupConfig(t, templateAdmin, root) - //nolint:gocritic // Deleting orphaned workspaces requires an admin. - clitest.SetupConfig(t, client, root) doneChan := make(chan struct{}) pty := ptytest.New(t).Attach(inv) inv.Stderr = pty.Output() go func() { defer close(doneChan) - err := inv.Run() + err := inv.WithContext(ctx).Run() // When running with the race detector on, we sometimes get an EOF. if err != nil { assert.ErrorIs(t, err, io.EOF) } }() pty.ExpectMatch("has been deleted") - <-doneChan + testutil.TryReceive(ctx, t, doneChan) + + _, err := client.Workspace(ctx, workspace.ID) + require.Error(t, err) + cerr := coderdtest.SDKError(t, err) + require.Equal(t, http.StatusGone, cerr.StatusCode()) }) // Super orphaned, as the workspace doesn't even have a user. diff --git a/coderd/database/dbmem/dbmem.go b/coderd/database/dbmem/dbmem.go index ee1c7471808d5..60957dab8a013 100644 --- a/coderd/database/dbmem/dbmem.go +++ b/coderd/database/dbmem/dbmem.go @@ -4464,7 +4464,7 @@ func (q *FakeQuerier) GetProvisionerDaemons(_ context.Context) ([]database.Provi defer q.mutex.RUnlock() if len(q.provisionerDaemons) == 0 { - return nil, sql.ErrNoRows + return []database.ProvisionerDaemon{}, nil } // copy the data so that the caller can't manipulate any data inside dbmem // after returning diff --git a/coderd/workspacebuilds.go b/coderd/workspacebuilds.go index 4d90948a8f9a1..fcc9ce07650cf 100644 --- a/coderd/workspacebuilds.go +++ b/coderd/workspacebuilds.go @@ -423,7 +423,10 @@ func (api *API) postWorkspaceBuilds(rw http.ResponseWriter, r *http.Request) { return } + var qpr database.GetProvisionerJobsByIDsWithQueuePositionRow if provisionerJob != nil { + qpr.ProvisionerJob = *provisionerJob + qpr.QueuePosition = 0 if err := provisionerjobs.PostJob(api.Pubsub, *provisionerJob); err != nil { // Client probably doesn't care about this error, so just log it. api.Logger.Error(ctx, "failed to post provisioner job to pubsub", slog.Error(err)) @@ -433,10 +436,7 @@ func (api *API) postWorkspaceBuilds(rw http.ResponseWriter, r *http.Request) { apiBuild, err := api.convertWorkspaceBuild( *workspaceBuild, workspace, - database.GetProvisionerJobsByIDsWithQueuePositionRow{ - ProvisionerJob: *provisionerJob, - QueuePosition: 0, - }, + qpr, []database.WorkspaceResource{}, []database.WorkspaceResourceMetadatum{}, []database.WorkspaceAgent{}, diff --git a/coderd/workspacebuilds_test.go b/coderd/workspacebuilds_test.go index ac33c9e92c4f7..c6cc2418c3c54 100644 --- a/coderd/workspacebuilds_test.go +++ b/coderd/workspacebuilds_test.go @@ -25,6 +25,7 @@ import ( "github.com/coder/coder/v2/coderd/coderdtest/oidctest" "github.com/coder/coder/v2/coderd/database" "github.com/coder/coder/v2/coderd/database/dbauthz" + "github.com/coder/coder/v2/coderd/database/dbfake" "github.com/coder/coder/v2/coderd/database/dbgen" "github.com/coder/coder/v2/coderd/database/dbtestutil" "github.com/coder/coder/v2/coderd/database/dbtime" @@ -371,42 +372,140 @@ func TestWorkspaceBuildsProvisionerState(t *testing.T) { t.Run("Orphan", func(t *testing.T) { t.Parallel() - client := coderdtest.New(t, &coderdtest.Options{IncludeProvisionerDaemon: true}) - first := coderdtest.CreateFirstUser(t, client) - - ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong) - defer cancel() - version := coderdtest.CreateTemplateVersion(t, client, first.OrganizationID, nil) - template := coderdtest.CreateTemplate(t, client, first.OrganizationID, version.ID) - coderdtest.AwaitTemplateVersionJobCompleted(t, client, version.ID) + t.Run("WithoutDelete", func(t *testing.T) { + t.Parallel() + client, store := coderdtest.NewWithDatabase(t, nil) + first := coderdtest.CreateFirstUser(t, client) + templateAdmin, templateAdminUser := coderdtest.CreateAnotherUser(t, client, first.OrganizationID, rbac.RoleTemplateAdmin()) + + r := dbfake.WorkspaceBuild(t, store, database.WorkspaceTable{ + OwnerID: templateAdminUser.ID, + OrganizationID: first.OrganizationID, + }).Do() + + ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong) + defer cancel() + + // Trying to orphan without delete transition fails. + _, err := templateAdmin.CreateWorkspaceBuild(ctx, r.Workspace.ID, codersdk.CreateWorkspaceBuildRequest{ + TemplateVersionID: r.TemplateVersion.ID, + Transition: codersdk.WorkspaceTransitionStart, + Orphan: true, + }) + require.Error(t, err, "Orphan is only permitted when deleting a workspace.") + cerr := coderdtest.SDKError(t, err) + require.Equal(t, http.StatusBadRequest, cerr.StatusCode()) + }) - workspace := coderdtest.CreateWorkspace(t, client, template.ID) - coderdtest.AwaitWorkspaceBuildJobCompleted(t, client, workspace.LatestBuild.ID) + t.Run("WithState", func(t *testing.T) { + t.Parallel() + client, store := coderdtest.NewWithDatabase(t, nil) + first := coderdtest.CreateFirstUser(t, client) + templateAdmin, templateAdminUser := coderdtest.CreateAnotherUser(t, client, first.OrganizationID, rbac.RoleTemplateAdmin()) + + r := dbfake.WorkspaceBuild(t, store, database.WorkspaceTable{ + OwnerID: templateAdminUser.ID, + OrganizationID: first.OrganizationID, + }).Do() + + ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong) + defer cancel() + + // Providing both state and orphan fails. + _, err := templateAdmin.CreateWorkspaceBuild(ctx, r.Workspace.ID, codersdk.CreateWorkspaceBuildRequest{ + TemplateVersionID: r.TemplateVersion.ID, + Transition: codersdk.WorkspaceTransitionDelete, + ProvisionerState: []byte(" "), + Orphan: true, + }) + require.Error(t, err) + cerr := coderdtest.SDKError(t, err) + require.Equal(t, http.StatusBadRequest, cerr.StatusCode()) + }) - // Providing both state and orphan fails. - _, err := client.CreateWorkspaceBuild(ctx, workspace.ID, codersdk.CreateWorkspaceBuildRequest{ - TemplateVersionID: workspace.LatestBuild.TemplateVersionID, - Transition: codersdk.WorkspaceTransitionDelete, - ProvisionerState: []byte(" "), - Orphan: true, + t.Run("NoPermission", func(t *testing.T) { + t.Parallel() + client, store := coderdtest.NewWithDatabase(t, nil) + first := coderdtest.CreateFirstUser(t, client) + member, memberUser := coderdtest.CreateAnotherUser(t, client, first.OrganizationID) + + r := dbfake.WorkspaceBuild(t, store, database.WorkspaceTable{ + OwnerID: memberUser.ID, + OrganizationID: first.OrganizationID, + }).Do() + + ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong) + defer cancel() + + // Trying to orphan without being a template admin fails. + _, err := member.CreateWorkspaceBuild(ctx, r.Workspace.ID, codersdk.CreateWorkspaceBuildRequest{ + TemplateVersionID: r.TemplateVersion.ID, + Transition: codersdk.WorkspaceTransitionDelete, + Orphan: true, + }) + require.Error(t, err) + cerr := coderdtest.SDKError(t, err) + require.Equal(t, http.StatusForbidden, cerr.StatusCode()) }) - require.Error(t, err) - cerr := coderdtest.SDKError(t, err) - require.Equal(t, http.StatusBadRequest, cerr.StatusCode()) - // Regular orphan operation succeeds. - build, err := client.CreateWorkspaceBuild(ctx, workspace.ID, codersdk.CreateWorkspaceBuildRequest{ - TemplateVersionID: workspace.LatestBuild.TemplateVersionID, - Transition: codersdk.WorkspaceTransitionDelete, - Orphan: true, + t.Run("OK", func(t *testing.T) { + // Include a provisioner so that we can test that provisionerdserver + // performs deletion. + client, store := coderdtest.NewWithDatabase(t, &coderdtest.Options{IncludeProvisionerDaemon: true}) + first := coderdtest.CreateFirstUser(t, client) + templateAdmin, templateAdminUser := coderdtest.CreateAnotherUser(t, client, first.OrganizationID, rbac.RoleTemplateAdmin()) + + r := dbfake.WorkspaceBuild(t, store, database.WorkspaceTable{ + OwnerID: templateAdminUser.ID, + OrganizationID: first.OrganizationID, + }).Do() + + ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong) + defer cancel() + + // Regular orphan operation succeeds. + build, err := templateAdmin.CreateWorkspaceBuild(ctx, r.Workspace.ID, codersdk.CreateWorkspaceBuildRequest{ + TemplateVersionID: r.TemplateVersion.ID, + Transition: codersdk.WorkspaceTransitionDelete, + Orphan: true, + }) + require.NoError(t, err) + require.Equal(t, codersdk.WorkspaceTransitionDelete, build.Transition) + require.Equal(t, codersdk.ProvisionerJobPending, build.Job.Status) }) - require.NoError(t, err) - coderdtest.AwaitWorkspaceBuildJobCompleted(t, client, build.ID) - _, err = client.Workspace(ctx, workspace.ID) - require.Error(t, err) - require.Equal(t, http.StatusGone, coderdtest.SDKError(t, err).StatusCode()) + t.Run("NoProvisioners", func(t *testing.T) { + t.Parallel() + client, store := coderdtest.NewWithDatabase(t, nil) + first := coderdtest.CreateFirstUser(t, client) + templateAdmin, templateAdminUser := coderdtest.CreateAnotherUser(t, client, first.OrganizationID, rbac.RoleTemplateAdmin()) + + ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong) + defer cancel() + r := dbfake.WorkspaceBuild(t, store, database.WorkspaceTable{ + OwnerID: templateAdminUser.ID, + OrganizationID: first.OrganizationID, + }).Do() + + // nolint:gocritic // For testing + daemons, err := store.GetProvisionerDaemons(dbauthz.AsSystemReadProvisionerDaemons(ctx)) + require.NoError(t, err) + require.Empty(t, daemons, "Provisioner daemons should be empty for this test") + + // Orphan deletion still succeeds despite no provisioners being available. + build, err := templateAdmin.CreateWorkspaceBuild(ctx, r.Workspace.ID, codersdk.CreateWorkspaceBuildRequest{ + TemplateVersionID: r.TemplateVersion.ID, + Transition: codersdk.WorkspaceTransitionDelete, + Orphan: true, + }) + require.NoError(t, err) + coderdtest.AwaitWorkspaceBuildJobCompleted(t, templateAdmin, build.ID) + + ws, err := client.Workspace(ctx, r.Workspace.ID) + require.Empty(t, ws) + require.Equal(t, http.StatusGone, coderdtest.SDKError(t, err).StatusCode()) + }) }) } diff --git a/coderd/wsbuilder/wsbuilder.go b/coderd/wsbuilder/wsbuilder.go index 9605df58014de..f2f4493f9c71f 100644 --- a/coderd/wsbuilder/wsbuilder.go +++ b/coderd/wsbuilder/wsbuilder.go @@ -464,6 +464,35 @@ func (b *Builder) buildTx(authFunc func(action policy.Action, object rbac.Object return BuildError{http.StatusInternalServerError, "get workspace build", err} } + // If the requestor is trying to orphan-delete a workspace and there are no + // provisioners available, we should complete the build and mark the + // workspace as deleted ourselves. + // Orphan-deleting a workspace sends an empty state to Terraform, which means + // it won't actually delete anything. + // There are cases where tagged provisioner daemons have been decommissioned + // without deleting the relevant workspaces, and without any provisioners + // available these workspaces cannot be deleted. + if b.state.orphan && len(provisionerDaemons) == 0 { + // nolint: gocritic // At this moment, we are pretending to be provisionerd. + if err := store.UpdateProvisionerJobWithCompleteWithStartedAtByID(dbauthz.AsProvisionerd(b.ctx), database.UpdateProvisionerJobWithCompleteWithStartedAtByIDParams{ + CompletedAt: sql.NullTime{Valid: true, Time: now}, + Error: sql.NullString{Valid: true, String: "No provisioners were available to handle the orphan-delete request. The workspace was marked as deleted, but no resources were destroyed."}, + ErrorCode: sql.NullString{Valid: false}, + ID: provisionerJob.ID, + StartedAt: sql.NullTime{Valid: true, Time: now}, + UpdatedAt: now, + }); err != nil { + return BuildError{http.StatusInternalServerError, "mark orphan-delete provisioner job as completed", err} + } + + if err := store.UpdateWorkspaceDeletedByID(b.ctx, database.UpdateWorkspaceDeletedByIDParams{ + ID: b.workspace.ID, + Deleted: true, + }); err != nil { + return BuildError{http.StatusInternalServerError, "mark workspace as deleted", err} + } + } + return nil }, nil) if err != nil { diff --git a/coderd/wsbuilder/wsbuilder_test.go b/coderd/wsbuilder/wsbuilder_test.go index 58999a33e6e5e..420c440916211 100644 --- a/coderd/wsbuilder/wsbuilder_test.go +++ b/coderd/wsbuilder/wsbuilder_test.go @@ -839,6 +839,142 @@ func TestWorkspaceBuildWithPreset(t *testing.T) { req.NoError(err) } +func TestWorkspaceBuildDeleteOrphan(t *testing.T) { + t.Parallel() + + t.Run("Provisioners", func(t *testing.T) { + t.Parallel() + req := require.New(t) + asrt := assert.New(t) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + var buildID uuid.UUID + + mDB := expectDB(t, + // Inputs + withTemplate, + withInactiveVersion(nil), + withLastBuildFound, + withTemplateVersionVariables(inactiveVersionID, nil), + withRichParameters(nil), + withWorkspaceTags(inactiveVersionID, nil), + withProvisionerDaemons([]database.GetEligibleProvisionerDaemonsByProvisionerJobIDsRow{{ + JobID: inactiveJobID, + ProvisionerDaemon: database.ProvisionerDaemon{}, + }}), + + // Outputs + expectProvisionerJob(func(job database.InsertProvisionerJobParams) { + asrt.Equal(userID, job.InitiatorID) + asrt.Equal(inactiveFileID, job.FileID) + input := provisionerdserver.WorkspaceProvisionJob{} + err := json.Unmarshal(job.Input, &input) + req.NoError(err) + // store build ID for later + buildID = input.WorkspaceBuildID + }), + + withInTx, + expectBuild(func(bld database.InsertWorkspaceBuildParams) { + asrt.Equal(inactiveVersionID, bld.TemplateVersionID) + asrt.Equal(workspaceID, bld.WorkspaceID) + asrt.Equal(int32(2), bld.BuildNumber) + asrt.Empty(string(bld.ProvisionerState)) + asrt.Equal(userID, bld.InitiatorID) + asrt.Equal(database.WorkspaceTransitionDelete, bld.Transition) + asrt.Equal(database.BuildReasonInitiator, bld.Reason) + asrt.Equal(buildID, bld.ID) + }), + withBuild, + expectBuildParameters(func(params database.InsertWorkspaceBuildParametersParams) { + asrt.Equal(buildID, params.WorkspaceBuildID) + asrt.Empty(params.Name) + asrt.Empty(params.Value) + }), + ) + + ws := database.Workspace{ID: workspaceID, TemplateID: templateID, OwnerID: userID} + uut := wsbuilder.New(ws, database.WorkspaceTransitionDelete).Orphan() + // nolint: dogsled + _, _, _, err := uut.Build(ctx, mDB, nil, audit.WorkspaceBuildBaggage{}) + req.NoError(err) + }) + + t.Run("NoProvisioners", func(t *testing.T) { + t.Parallel() + req := require.New(t) + asrt := assert.New(t) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + var buildID uuid.UUID + var jobID uuid.UUID + + mDB := expectDB(t, + // Inputs + withTemplate, + withInactiveVersion(nil), + withLastBuildFound, + withTemplateVersionVariables(inactiveVersionID, nil), + withRichParameters(nil), + withWorkspaceTags(inactiveVersionID, nil), + withProvisionerDaemons([]database.GetEligibleProvisionerDaemonsByProvisionerJobIDsRow{}), + + // Outputs + expectProvisionerJob(func(job database.InsertProvisionerJobParams) { + asrt.Equal(userID, job.InitiatorID) + asrt.Equal(inactiveFileID, job.FileID) + input := provisionerdserver.WorkspaceProvisionJob{} + err := json.Unmarshal(job.Input, &input) + req.NoError(err) + // store build ID for later + buildID = input.WorkspaceBuildID + // store job ID for later + jobID = job.ID + }), + + withInTx, + expectBuild(func(bld database.InsertWorkspaceBuildParams) { + asrt.Equal(inactiveVersionID, bld.TemplateVersionID) + asrt.Equal(workspaceID, bld.WorkspaceID) + asrt.Equal(int32(2), bld.BuildNumber) + asrt.Empty(string(bld.ProvisionerState)) + asrt.Equal(userID, bld.InitiatorID) + asrt.Equal(database.WorkspaceTransitionDelete, bld.Transition) + asrt.Equal(database.BuildReasonInitiator, bld.Reason) + asrt.Equal(buildID, bld.ID) + }), + withBuild, + expectBuildParameters(func(params database.InsertWorkspaceBuildParametersParams) { + asrt.Equal(buildID, params.WorkspaceBuildID) + asrt.Empty(params.Name) + asrt.Empty(params.Value) + }), + + // Because no provisioners were available and the request was to delete --orphan + expectUpdateProvisionerJobWithCompleteWithStartedAtByID(func(params database.UpdateProvisionerJobWithCompleteWithStartedAtByIDParams) { + asrt.Equal(jobID, params.ID) + asrt.Contains(params.Error.String, "No provisioners were available") + asrt.True(params.CompletedAt.Valid) + asrt.True(params.StartedAt.Valid) + }), + expectUpdateWorkspaceDeletedByID(func(params database.UpdateWorkspaceDeletedByIDParams) { + asrt.Equal(workspaceID, params.ID) + asrt.True(params.Deleted) + }), + ) + + ws := database.Workspace{ID: workspaceID, TemplateID: templateID, OwnerID: userID} + uut := wsbuilder.New(ws, database.WorkspaceTransitionDelete).Orphan() + // nolint: dogsled + _, _, _, err := uut.Build(ctx, mDB, nil, audit.WorkspaceBuildBaggage{}) + req.NoError(err) + }) +} + func TestProvisionerVersionSupportsDynamicParameters(t *testing.T) { t.Parallel() @@ -1107,6 +1243,37 @@ func expectProvisionerJob( } } +// expectUpdateProvisionerJobWithCompleteWithStartedAtByID asserts a call to +// expectUpdateProvisionerJobWithCompleteWithStartedAtByID and runs the provided +// assertions against it. +func expectUpdateProvisionerJobWithCompleteWithStartedAtByID(assertions func(params database.UpdateProvisionerJobWithCompleteWithStartedAtByIDParams)) func(mTx *dbmock.MockStore) { + return func(mTx *dbmock.MockStore) { + mTx.EXPECT().UpdateProvisionerJobWithCompleteWithStartedAtByID(gomock.Any(), gomock.Any()). + Times(1). + DoAndReturn( + func(ctx context.Context, params database.UpdateProvisionerJobWithCompleteWithStartedAtByIDParams) error { + assertions(params) + return nil + }, + ) + } +} + +// expectUpdateWorkspaceDeletedByID asserts a call to UpdateWorkspaceDeletedByID +// and runs the provided assertions against it. +func expectUpdateWorkspaceDeletedByID(assertions func(params database.UpdateWorkspaceDeletedByIDParams)) func(mTx *dbmock.MockStore) { + return func(mTx *dbmock.MockStore) { + mTx.EXPECT().UpdateWorkspaceDeletedByID(gomock.Any(), gomock.Any()). + Times(1). + DoAndReturn( + func(ctx context.Context, params database.UpdateWorkspaceDeletedByIDParams) error { + assertions(params) + return nil + }, + ) + } +} + func withBuild(mTx *dbmock.MockStore) { mTx.EXPECT().GetWorkspaceBuildByID(gomock.Any(), gomock.Any()).Times(1). DoAndReturn(func(ctx context.Context, id uuid.UUID) (database.WorkspaceBuild, error) { From d46f1564eb8146659f5620b08213030ddc188956 Mon Sep 17 00:00:00 2001 From: Cian Johnston Date: Fri, 20 Jun 2025 10:30:15 +0100 Subject: [PATCH 2/7] check daemon last seen at --- coderd/wsbuilder/wsbuilder.go | 13 +++++++++++-- coderd/wsbuilder/wsbuilder_test.go | 6 ++++-- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/coderd/wsbuilder/wsbuilder.go b/coderd/wsbuilder/wsbuilder.go index f2f4493f9c71f..b67dc057ff5da 100644 --- a/coderd/wsbuilder/wsbuilder.go +++ b/coderd/wsbuilder/wsbuilder.go @@ -472,11 +472,19 @@ func (b *Builder) buildTx(authFunc func(action policy.Action, object rbac.Object // There are cases where tagged provisioner daemons have been decommissioned // without deleting the relevant workspaces, and without any provisioners // available these workspaces cannot be deleted. - if b.state.orphan && len(provisionerDaemons) == 0 { + hasActiveEligibleProvisioner := false + for _, pd := range provisionerDaemons { + age := now.Sub(pd.ProvisionerDaemon.LastSeenAt.Time) + if age <= provisionerdserver.StaleInterval { + hasActiveEligibleProvisioner = true + break + } + } + if b.state.orphan && !hasActiveEligibleProvisioner { // nolint: gocritic // At this moment, we are pretending to be provisionerd. if err := store.UpdateProvisionerJobWithCompleteWithStartedAtByID(dbauthz.AsProvisionerd(b.ctx), database.UpdateProvisionerJobWithCompleteWithStartedAtByIDParams{ CompletedAt: sql.NullTime{Valid: true, Time: now}, - Error: sql.NullString{Valid: true, String: "No provisioners were available to handle the orphan-delete request. The workspace was marked as deleted, but no resources were destroyed."}, + Error: sql.NullString{Valid: true, String: "No provisioners were available to handle the request. The workspace has been deleted. No resources were destroyed."}, ErrorCode: sql.NullString{Valid: false}, ID: provisionerJob.ID, StartedAt: sql.NullTime{Valid: true, Time: now}, @@ -484,6 +492,7 @@ func (b *Builder) buildTx(authFunc func(action policy.Action, object rbac.Object }); err != nil { return BuildError{http.StatusInternalServerError, "mark orphan-delete provisioner job as completed", err} } + // TODO: audit baggage? if err := store.UpdateWorkspaceDeletedByID(b.ctx, database.UpdateWorkspaceDeletedByIDParams{ ID: b.workspace.ID, diff --git a/coderd/wsbuilder/wsbuilder_test.go b/coderd/wsbuilder/wsbuilder_test.go index 420c440916211..0eb1cb79b144c 100644 --- a/coderd/wsbuilder/wsbuilder_test.go +++ b/coderd/wsbuilder/wsbuilder_test.go @@ -861,8 +861,10 @@ func TestWorkspaceBuildDeleteOrphan(t *testing.T) { withRichParameters(nil), withWorkspaceTags(inactiveVersionID, nil), withProvisionerDaemons([]database.GetEligibleProvisionerDaemonsByProvisionerJobIDsRow{{ - JobID: inactiveJobID, - ProvisionerDaemon: database.ProvisionerDaemon{}, + JobID: inactiveJobID, + ProvisionerDaemon: database.ProvisionerDaemon{ + LastSeenAt: sql.NullTime{Valid: true, Time: dbtime.Now()}, + }, }}), // Outputs From dba9fced3ddd88d22b210d7d9e0d4712c0636a37 Mon Sep 17 00:00:00 2001 From: Cian Johnston Date: Fri, 20 Jun 2025 10:30:56 +0100 Subject: [PATCH 3/7] allow orphaning if last build was cancelled --- .../WorkspaceDeleteDialog.tsx | 89 ++++++++++--------- 1 file changed, 46 insertions(+), 43 deletions(-) diff --git a/site/src/modules/workspaces/WorkspaceMoreActions/WorkspaceDeleteDialog.tsx b/site/src/modules/workspaces/WorkspaceMoreActions/WorkspaceDeleteDialog.tsx index 8f5179b0b64da..4379ef84cc0cb 100644 --- a/site/src/modules/workspaces/WorkspaceMoreActions/WorkspaceDeleteDialog.tsx +++ b/site/src/modules/workspaces/WorkspaceMoreActions/WorkspaceDeleteDialog.tsx @@ -43,6 +43,17 @@ export const WorkspaceDeleteDialog: FC = ({ const hasError = !deletionConfirmed && userConfirmationText.length > 0; const displayErrorMessage = hasError && !isFocused; const inputColor = hasError ? "error" : "primary"; + // Orphaning is sort of a "last resort" that should really only + // be used under the following circumstances: + // a) Terraform is failing to apply while deleting, which + // usually means that builds are failing as well. + // b) No provisioner is available to delete the workspace, which will + // cause the job to remain in the "pending" state indefinitely. + // The assumption here is that and admin will cancel the job. + const canOrphan = + canDeleteFailedWorkspace && + (workspace.latest_build.status === "failed" || + workspace.latest_build.status === "canceled"); return ( = ({ "data-testid": "delete-dialog-name-confirmation", }} /> - { - // Orphaning is sort of a "last resort" that should really only - // be used if Terraform is failing to apply while deleting, which - // usually means that builds are failing as well. - canDeleteFailedWorkspace && - workspace.latest_build.status === "failed" && ( -
-
- { - setOrphanWorkspace(!orphanWorkspace); - }} - className="option" - name="orphan_resources" - checked={orphanWorkspace} - data-testid="orphan-checkbox" - /> -
-
-

Orphan Resources

- - As a Template Admin, you may skip resource cleanup to - delete a failed workspace. Resources such as volumes and - virtual machines will not be destroyed.  - - Learn more... - - -
-
- ) - } + {canOrphan && ( +
+
+ { + setOrphanWorkspace(!orphanWorkspace); + }} + className="option" + name="orphan_resources" + checked={orphanWorkspace} + data-testid="orphan-checkbox" + /> +
+
+

Orphan Resources

+ + As a Template Admin, you may skip resource cleanup to delete + a failed workspace. Resources such as volumes and virtual + machines will not be destroyed.  + + Learn more... + + +
+
+ )} } From f312292935acff66becc13da36042a07f39d40e8 Mon Sep 17 00:00:00 2001 From: Cian Johnston Date: Fri, 20 Jun 2025 11:47:45 +0100 Subject: [PATCH 4/7] re-fetch provisioner job before returning --- coderd/workspacebuilds_test.go | 4 +++- coderd/wsbuilder/wsbuilder.go | 6 ++++++ coderd/wsbuilder/wsbuilder_test.go | 19 +++++++++++++++++++ 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/coderd/workspacebuilds_test.go b/coderd/workspacebuilds_test.go index c6cc2418c3c54..6d6ac4cc91d9a 100644 --- a/coderd/workspacebuilds_test.go +++ b/coderd/workspacebuilds_test.go @@ -500,7 +500,9 @@ func TestWorkspaceBuildsProvisionerState(t *testing.T) { Orphan: true, }) require.NoError(t, err) - coderdtest.AwaitWorkspaceBuildJobCompleted(t, templateAdmin, build.ID) + require.Equal(t, codersdk.WorkspaceTransitionDelete, build.Transition) + require.Equal(t, codersdk.ProvisionerJobFailed, build.Job.Status) + require.Contains(t, build.Job.Error, "No provisioners were available to handle the request") ws, err := client.Workspace(ctx, r.Workspace.ID) require.Empty(t, ws) diff --git a/coderd/wsbuilder/wsbuilder.go b/coderd/wsbuilder/wsbuilder.go index b67dc057ff5da..6022f2412468b 100644 --- a/coderd/wsbuilder/wsbuilder.go +++ b/coderd/wsbuilder/wsbuilder.go @@ -492,8 +492,14 @@ func (b *Builder) buildTx(authFunc func(action policy.Action, object rbac.Object }); err != nil { return BuildError{http.StatusInternalServerError, "mark orphan-delete provisioner job as completed", err} } + // TODO: audit baggage? + // Re-fetch the completed provisioner job. + if pj, err := store.GetProvisionerJobByID(b.ctx, provisionerJob.ID); err == nil { + provisionerJob = pj + } + if err := store.UpdateWorkspaceDeletedByID(b.ctx, database.UpdateWorkspaceDeletedByIDParams{ ID: b.workspace.ID, Deleted: true, diff --git a/coderd/wsbuilder/wsbuilder_test.go b/coderd/wsbuilder/wsbuilder_test.go index 0eb1cb79b144c..97e45c3793c84 100644 --- a/coderd/wsbuilder/wsbuilder_test.go +++ b/coderd/wsbuilder/wsbuilder_test.go @@ -967,6 +967,9 @@ func TestWorkspaceBuildDeleteOrphan(t *testing.T) { asrt.Equal(workspaceID, params.ID) asrt.True(params.Deleted) }), + expectGetProvisionerJobByID(func(job database.ProvisionerJob) { + asrt.Equal(jobID, job.ID) + }), ) ws := database.Workspace{ID: workspaceID, TemplateID: templateID, OwnerID: userID} @@ -1276,6 +1279,22 @@ func expectUpdateWorkspaceDeletedByID(assertions func(params database.UpdateWork } } +// expectGetProvisionerJobByID asserts a call to GetProvisionerJobByID +// and runs the provided assertions against it. +func expectGetProvisionerJobByID(assertions func(job database.ProvisionerJob)) func(mTx *dbmock.MockStore) { + return func(mTx *dbmock.MockStore) { + mTx.EXPECT().GetProvisionerJobByID(gomock.Any(), gomock.Any()). + Times(1). + DoAndReturn( + func(ctx context.Context, id uuid.UUID) (database.ProvisionerJob, error) { + job := database.ProvisionerJob{ID: id} + assertions(job) + return job, nil + }, + ) + } +} + func withBuild(mTx *dbmock.MockStore) { mTx.EXPECT().GetWorkspaceBuildByID(gomock.Any(), gomock.Any()).Times(1). DoAndReturn(func(ctx context.Context, id uuid.UUID) (database.WorkspaceBuild, error) { From 2eea932236c39f716bdb840779429b81a0fd25d6 Mon Sep 17 00:00:00 2001 From: Cian Johnston Date: Fri, 20 Jun 2025 12:35:43 +0100 Subject: [PATCH 5/7] ensure that audit logs are created --- coderd/workspacebuilds_test.go | 47 +++++++++++++++++++++++++++++----- 1 file changed, 40 insertions(+), 7 deletions(-) diff --git a/coderd/workspacebuilds_test.go b/coderd/workspacebuilds_test.go index 6d6ac4cc91d9a..af831f403b7e4 100644 --- a/coderd/workspacebuilds_test.go +++ b/coderd/workspacebuilds_test.go @@ -1,6 +1,7 @@ package coderd_test import ( + "bytes" "context" "database/sql" "errors" @@ -452,18 +453,38 @@ func TestWorkspaceBuildsProvisionerState(t *testing.T) { t.Run("OK", func(t *testing.T) { // Include a provisioner so that we can test that provisionerdserver // performs deletion. - client, store := coderdtest.NewWithDatabase(t, &coderdtest.Options{IncludeProvisionerDaemon: true}) + auditor := audit.NewMock() + client, store := coderdtest.NewWithDatabase(t, &coderdtest.Options{IncludeProvisionerDaemon: true, Auditor: auditor}) first := coderdtest.CreateFirstUser(t, client) templateAdmin, templateAdminUser := coderdtest.CreateAnotherUser(t, client, first.OrganizationID, rbac.RoleTemplateAdmin()) + ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong) + defer cancel() + // This is a valid zip file. Without this the job will fail to complete. + // TODO: add this to dbfake by default. + zipBytes := make([]byte, 22) + zipBytes[0] = 80 + zipBytes[1] = 75 + zipBytes[2] = 0o5 + zipBytes[3] = 0o6 + uploadRes, err := client.Upload(ctx, codersdk.ContentTypeZip, bytes.NewReader(zipBytes)) + require.NoError(t, err) + + tv := dbfake.TemplateVersion(t, store). + FileID(uploadRes.ID). + Seed(database.TemplateVersion{ + OrganizationID: first.OrganizationID, + CreatedBy: templateAdminUser.ID, + }). + Do() + r := dbfake.WorkspaceBuild(t, store, database.WorkspaceTable{ OwnerID: templateAdminUser.ID, OrganizationID: first.OrganizationID, + TemplateID: tv.Template.ID, }).Do() - ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong) - defer cancel() - + auditor.ResetLogs() // Regular orphan operation succeeds. build, err := templateAdmin.CreateWorkspaceBuild(ctx, r.Workspace.ID, codersdk.CreateWorkspaceBuildRequest{ TemplateVersionID: r.TemplateVersion.ID, @@ -471,13 +492,19 @@ func TestWorkspaceBuildsProvisionerState(t *testing.T) { Orphan: true, }) require.NoError(t, err) - require.Equal(t, codersdk.WorkspaceTransitionDelete, build.Transition) - require.Equal(t, codersdk.ProvisionerJobPending, build.Job.Status) + coderdtest.AwaitWorkspaceBuildJobCompleted(t, client, build.ID) + + // Validate that the deletion was audited. + require.True(t, auditor.Contains(t, database.AuditLog{ + ResourceID: build.ID, + Action: database.AuditActionDelete, + })) }) t.Run("NoProvisioners", func(t *testing.T) { t.Parallel() - client, store := coderdtest.NewWithDatabase(t, nil) + auditor := audit.NewMock() + client, store := coderdtest.NewWithDatabase(t, &coderdtest.Options{Auditor: auditor}) first := coderdtest.CreateFirstUser(t, client) templateAdmin, templateAdminUser := coderdtest.CreateAnotherUser(t, client, first.OrganizationID, rbac.RoleTemplateAdmin()) @@ -507,6 +534,12 @@ func TestWorkspaceBuildsProvisionerState(t *testing.T) { ws, err := client.Workspace(ctx, r.Workspace.ID) require.Empty(t, ws) require.Equal(t, http.StatusGone, coderdtest.SDKError(t, err).StatusCode()) + + // Validate that the deletion was audited. + require.True(t, auditor.Contains(t, database.AuditLog{ + ResourceID: build.ID, + Action: database.AuditActionDelete, + })) }) }) } From 459ce5cc2e9df7e850bc00d43fd398e782a5979b Mon Sep 17 00:00:00 2001 From: Cian Johnston Date: Fri, 20 Jun 2025 12:46:10 +0100 Subject: [PATCH 6/7] write audit log --- coderd/workspacebuilds.go | 32 ++++++++++++++++++++++++++++++++ coderd/wsbuilder/wsbuilder.go | 2 -- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/coderd/workspacebuilds.go b/coderd/workspacebuilds.go index fcc9ce07650cf..d15ceb919a3eb 100644 --- a/coderd/workspacebuilds.go +++ b/coderd/workspacebuilds.go @@ -3,6 +3,7 @@ package coderd import ( "context" "database/sql" + "encoding/json" "errors" "fmt" "math" @@ -431,6 +432,37 @@ func (api *API) postWorkspaceBuilds(rw http.ResponseWriter, r *http.Request) { // Client probably doesn't care about this error, so just log it. api.Logger.Error(ctx, "failed to post provisioner job to pubsub", slog.Error(err)) } + + // We may need to complete the audit if wsbuilder determined that + // no provisioner could handle an orphan-delete job and completed it. + if createBuild.Orphan && createBuild.Transition == codersdk.WorkspaceTransitionDelete && provisionerJob.CompletedAt.Valid { + buildResourceInfo := audit.AdditionalFields{ + WorkspaceName: workspace.Name, + BuildNumber: strconv.Itoa(int(workspaceBuild.BuildNumber)), + BuildReason: workspaceBuild.Reason, + WorkspaceID: workspace.ID, + WorkspaceOwner: workspace.OwnerName, + } + briBytes, err := json.Marshal(buildResourceInfo) + if err != nil { + api.Logger.Error(ctx, "failed to marshal build resource info for audit", slog.Error(err)) + } + auditor := api.Auditor.Load() + bag := audit.BaggageFromContext(ctx) + audit.BackgroundAudit(ctx, &audit.BackgroundAuditParams[database.WorkspaceBuild]{ + Audit: *auditor, + Log: api.Logger, + UserID: provisionerJob.InitiatorID, + OrganizationID: workspace.OrganizationID, + RequestID: provisionerJob.ID, + IP: bag.IP, + Action: database.AuditActionDelete, + Old: previousWorkspaceBuild, + New: *workspaceBuild, + Status: http.StatusOK, + AdditionalFields: briBytes, + }) + } } apiBuild, err := api.convertWorkspaceBuild( diff --git a/coderd/wsbuilder/wsbuilder.go b/coderd/wsbuilder/wsbuilder.go index 6022f2412468b..3a49a824f1e7c 100644 --- a/coderd/wsbuilder/wsbuilder.go +++ b/coderd/wsbuilder/wsbuilder.go @@ -493,8 +493,6 @@ func (b *Builder) buildTx(authFunc func(action policy.Action, object rbac.Object return BuildError{http.StatusInternalServerError, "mark orphan-delete provisioner job as completed", err} } - // TODO: audit baggage? - // Re-fetch the completed provisioner job. if pj, err := store.GetProvisionerJobByID(b.ctx, provisionerJob.ID); err == nil { provisionerJob = pj From 92a4b991c0da20d123a5b01c5b5db9263c4f68a6 Mon Sep 17 00:00:00 2001 From: Cian Johnston Date: Fri, 20 Jun 2025 13:03:45 +0100 Subject: [PATCH 7/7] nits --- coderd/database/dbmem/dbmem.go | 1 + coderd/workspacebuilds.go | 8 ++++---- .../WorkspaceMoreActions/WorkspaceDeleteDialog.tsx | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/coderd/database/dbmem/dbmem.go b/coderd/database/dbmem/dbmem.go index 60957dab8a013..ce15b0b05264c 100644 --- a/coderd/database/dbmem/dbmem.go +++ b/coderd/database/dbmem/dbmem.go @@ -4464,6 +4464,7 @@ func (q *FakeQuerier) GetProvisionerDaemons(_ context.Context) ([]database.Provi defer q.mutex.RUnlock() if len(q.provisionerDaemons) == 0 { + // Returning err=nil here for consistency with real querier return []database.ProvisionerDaemon{}, nil } // copy the data so that the caller can't manipulate any data inside dbmem diff --git a/coderd/workspacebuilds.go b/coderd/workspacebuilds.go index d15ceb919a3eb..356b902d28a32 100644 --- a/coderd/workspacebuilds.go +++ b/coderd/workspacebuilds.go @@ -424,10 +424,10 @@ func (api *API) postWorkspaceBuilds(rw http.ResponseWriter, r *http.Request) { return } - var qpr database.GetProvisionerJobsByIDsWithQueuePositionRow + var queuePos database.GetProvisionerJobsByIDsWithQueuePositionRow if provisionerJob != nil { - qpr.ProvisionerJob = *provisionerJob - qpr.QueuePosition = 0 + queuePos.ProvisionerJob = *provisionerJob + queuePos.QueuePosition = 0 if err := provisionerjobs.PostJob(api.Pubsub, *provisionerJob); err != nil { // Client probably doesn't care about this error, so just log it. api.Logger.Error(ctx, "failed to post provisioner job to pubsub", slog.Error(err)) @@ -468,7 +468,7 @@ func (api *API) postWorkspaceBuilds(rw http.ResponseWriter, r *http.Request) { apiBuild, err := api.convertWorkspaceBuild( *workspaceBuild, workspace, - qpr, + queuePos, []database.WorkspaceResource{}, []database.WorkspaceResourceMetadatum{}, []database.WorkspaceAgent{}, diff --git a/site/src/modules/workspaces/WorkspaceMoreActions/WorkspaceDeleteDialog.tsx b/site/src/modules/workspaces/WorkspaceMoreActions/WorkspaceDeleteDialog.tsx index 4379ef84cc0cb..0d858d401787e 100644 --- a/site/src/modules/workspaces/WorkspaceMoreActions/WorkspaceDeleteDialog.tsx +++ b/site/src/modules/workspaces/WorkspaceMoreActions/WorkspaceDeleteDialog.tsx @@ -49,7 +49,7 @@ export const WorkspaceDeleteDialog: FC = ({ // usually means that builds are failing as well. // b) No provisioner is available to delete the workspace, which will // cause the job to remain in the "pending" state indefinitely. - // The assumption here is that and admin will cancel the job. + // The assumption here is that an admin will cancel the job. const canOrphan = canDeleteFailedWorkspace && (workspace.latest_build.status === "failed" ||