From 109cee1e198de581df7dd5feaa38cad75c54799b Mon Sep 17 00:00:00 2001 From: Cian Johnston Date: Tue, 26 Aug 2025 20:44:39 +0100 Subject: [PATCH 1/3] fix(coderd/provisionerdserver): workaround lack of coder_ai_task resource on stop transition --- .../provisionerdserver/provisionerdserver.go | 31 ++++++++ .../provisionerdserver_test.go | 79 ++++++++++++++++++- 2 files changed, 109 insertions(+), 1 deletion(-) diff --git a/coderd/provisionerdserver/provisionerdserver.go b/coderd/provisionerdserver/provisionerdserver.go index d7bc29aca3044..1feb50170bfdd 100644 --- a/coderd/provisionerdserver/provisionerdserver.go +++ b/coderd/provisionerdserver/provisionerdserver.go @@ -1995,6 +1995,37 @@ func (s *server) completeWorkspaceBuildJob(ctx context.Context, job database.Pro sidebarAppID = uuid.NullUUID{UUID: id, Valid: true} } + // This is a hacky workaround for the issue with tasks 'disappearing' on stop: + // reuse has_ai_task and sidebar_app_id from the previous build. + // It should be removed as soon as possible. + if workspaceBuild.Transition == database.WorkspaceTransitionStop && workspaceBuild.BuildNumber > 1 { + if prevBuild, err := s.Database.GetWorkspaceBuildByWorkspaceIDAndBuildNumber(ctx, database.GetWorkspaceBuildByWorkspaceIDAndBuildNumberParams{ + WorkspaceID: workspaceBuild.WorkspaceID, + BuildNumber: workspaceBuild.BuildNumber - 1, + }); err == nil { + hasAITask = prevBuild.HasAITask.Bool + sidebarAppID = prevBuild.AITaskSidebarAppID + warnUnknownSidebarAppID = false + s.Logger.Warn(ctx, "hacky task workaround: reused has_ai_task and sidebar_app_id from previous build", + slog.F("job_id", job.ID.String()), + slog.F("build_number", prevBuild.BuildNumber), + slog.F("workspace_id", workspace.ID), + slog.F("workspace_build_id", workspaceBuild.ID), + slog.F("transition", string(workspaceBuild.Transition)), + slog.F("sidebar_app_id", sidebarAppID.UUID), + slog.F("has_ai_task", hasAITask), + ) + } else { + s.Logger.Error(ctx, "hacky task workaround failed", + slog.Error(err), + slog.F("job_id", job.ID.String()), + slog.F("workspace_id", workspace.ID), + slog.F("workspace_build_id", workspaceBuild.ID), + slog.F("transition", string(workspaceBuild.Transition)), + ) + } + } + if warnUnknownSidebarAppID { // Ref: https://github.com/coder/coder/issues/18776 // This can happen for a number of reasons: diff --git a/coderd/provisionerdserver/provisionerdserver_test.go b/coderd/provisionerdserver/provisionerdserver_test.go index 8baa7c99c30b9..01787f8ceee82 100644 --- a/coderd/provisionerdserver/provisionerdserver_test.go +++ b/coderd/provisionerdserver/provisionerdserver_test.go @@ -2842,9 +2842,12 @@ func TestCompleteJob(t *testing.T) { // has_ai_task has a default value of nil, but once the workspace build completes it will have a value; // it is set to "true" if the related template has any coder_ai_task resources defined, and its sidebar app ID // will be set as well in that case. + // HACK: we also set it to "true" if any _previous_ workspace builds ever had it set to "true". + // This is to avoid tasks "disappearing" when you stop them. t.Run("WorkspaceBuild", func(t *testing.T) { type testcase struct { name string + seedFunc func(context.Context, database.Store) error // If you need to insert other resources transition database.WorkspaceTransition input *proto.CompletedJob_WorkspaceBuild expectHasAiTask bool @@ -2944,6 +2947,72 @@ func TestCompleteJob(t *testing.T) { expectHasAiTask: true, expectUsageEvent: false, }, + { + name: "current build does not have ai task but previous build did", + seedFunc: func(ctx context.Context, db database.Store) error { + // ws, err := db.GetWorkspaces(ctx, database.GetWorkspacesParams{}) + tpls, err := db.GetTemplates(ctx) + if err != nil { + return xerrors.Errorf("seedFunc: get template: %w", err) + } + if len(tpls) != 1 { + return xerrors.Errorf("seed: expected exactly one template, got %d", len(tpls)) + } + ws, err := db.GetWorkspacesByTemplateID(ctx, tpls[0].ID) + if err != nil { + return xerrors.Errorf("seedFunc: get workspaces: %w", err) + } + if len(ws) != 1 { + return xerrors.Errorf("seed: expected exactly one workspace, got %d", len(ws)) + } + w := ws[0] + prevJob := dbgen.ProvisionerJob(t, db, nil, database.ProvisionerJob{ + OrganizationID: w.OrganizationID, + InitiatorID: w.OwnerID, + Type: database.ProvisionerJobTypeWorkspaceBuild, + }) + tvs, err := db.GetTemplateVersionsByTemplateID(ctx, database.GetTemplateVersionsByTemplateIDParams{ + TemplateID: tpls[0].ID, + }) + if err != nil { + return xerrors.Errorf("seedFunc: get template version: %w", err) + } + if len(tvs) != 1 { + return xerrors.Errorf("seed: expected exactly one template version, got %d", len(tvs)) + } + if tpls[0].ActiveVersionID == uuid.Nil { + return xerrors.Errorf("seed: active version id is nil") + } + res := dbgen.WorkspaceResource(t, db, database.WorkspaceResource{ + JobID: prevJob.ID, + }) + agt := dbgen.WorkspaceAgent(t, db, database.WorkspaceAgent{ + ResourceID: res.ID, + }) + wa := dbgen.WorkspaceApp(t, db, database.WorkspaceApp{ + AgentID: agt.ID, + }) + _ = dbgen.WorkspaceBuild(t, db, database.WorkspaceBuild{ + BuildNumber: 1, + HasAITask: sql.NullBool{Valid: true, Bool: true}, + AITaskSidebarAppID: uuid.NullUUID{Valid: true, UUID: wa.ID}, + ID: w.ID, + InitiatorID: w.OwnerID, + JobID: prevJob.ID, + TemplateVersionID: tvs[0].ID, + Transition: database.WorkspaceTransitionStart, + WorkspaceID: w.ID, + }) + return nil + }, + transition: database.WorkspaceTransitionStop, + input: &proto.CompletedJob_WorkspaceBuild{ + AiTasks: []*sdkproto.AITask{}, + Resources: []*sdkproto.Resource{}, + }, + expectHasAiTask: true, + expectUsageEvent: false, + }, } { t.Run(tc.name, func(t *testing.T) { t.Parallel() @@ -2980,6 +3049,9 @@ func TestCompleteJob(t *testing.T) { }) ctx := testutil.Context(t, testutil.WaitShort) + if tc.seedFunc != nil { + require.NoError(t, tc.seedFunc(ctx, db)) + } buildJobID := uuid.New() wsBuildID := uuid.New() @@ -2999,8 +3071,13 @@ func TestCompleteJob(t *testing.T) { Tags: pd.Tags, }) require.NoError(t, err) + var buildNum int32 + if latestBuild, err := db.GetLatestWorkspaceBuildByWorkspaceID(ctx, workspaceTable.ID); err == nil { + buildNum = latestBuild.BuildNumber + } build := dbgen.WorkspaceBuild(t, db, database.WorkspaceBuild{ ID: wsBuildID, + BuildNumber: buildNum + 1, JobID: buildJobID, WorkspaceID: workspaceTable.ID, TemplateVersionID: version.ID, @@ -3038,7 +3115,7 @@ func TestCompleteJob(t *testing.T) { require.True(t, build.HasAITask.Valid) // We ALWAYS expect a value to be set, therefore not nil, i.e. valid = true. require.Equal(t, tc.expectHasAiTask, build.HasAITask.Bool) - if tc.expectHasAiTask { + if tc.expectHasAiTask && build.Transition != database.WorkspaceTransitionStop { require.Equal(t, sidebarAppID, build.AITaskSidebarAppID.UUID.String()) } From 2d3faf27e807efb804d1972dc4b731040c336b11 Mon Sep 17 00:00:00 2001 From: Cian Johnston Date: Wed, 27 Aug 2025 09:30:03 +0100 Subject: [PATCH 2/3] cleanup --- .../provisionerdserver_test.go | 123 +++++++++--------- 1 file changed, 64 insertions(+), 59 deletions(-) diff --git a/coderd/provisionerdserver/provisionerdserver_test.go b/coderd/provisionerdserver/provisionerdserver_test.go index 01787f8ceee82..12b1e138cf12d 100644 --- a/coderd/provisionerdserver/provisionerdserver_test.go +++ b/coderd/provisionerdserver/provisionerdserver_test.go @@ -2847,7 +2847,7 @@ func TestCompleteJob(t *testing.T) { t.Run("WorkspaceBuild", func(t *testing.T) { type testcase struct { name string - seedFunc func(context.Context, database.Store) error // If you need to insert other resources + seedFunc func(context.Context, testing.TB, database.Store) error // If you need to insert other resources transition database.WorkspaceTransition input *proto.CompletedJob_WorkspaceBuild expectHasAiTask bool @@ -2948,63 +2948,8 @@ func TestCompleteJob(t *testing.T) { expectUsageEvent: false, }, { - name: "current build does not have ai task but previous build did", - seedFunc: func(ctx context.Context, db database.Store) error { - // ws, err := db.GetWorkspaces(ctx, database.GetWorkspacesParams{}) - tpls, err := db.GetTemplates(ctx) - if err != nil { - return xerrors.Errorf("seedFunc: get template: %w", err) - } - if len(tpls) != 1 { - return xerrors.Errorf("seed: expected exactly one template, got %d", len(tpls)) - } - ws, err := db.GetWorkspacesByTemplateID(ctx, tpls[0].ID) - if err != nil { - return xerrors.Errorf("seedFunc: get workspaces: %w", err) - } - if len(ws) != 1 { - return xerrors.Errorf("seed: expected exactly one workspace, got %d", len(ws)) - } - w := ws[0] - prevJob := dbgen.ProvisionerJob(t, db, nil, database.ProvisionerJob{ - OrganizationID: w.OrganizationID, - InitiatorID: w.OwnerID, - Type: database.ProvisionerJobTypeWorkspaceBuild, - }) - tvs, err := db.GetTemplateVersionsByTemplateID(ctx, database.GetTemplateVersionsByTemplateIDParams{ - TemplateID: tpls[0].ID, - }) - if err != nil { - return xerrors.Errorf("seedFunc: get template version: %w", err) - } - if len(tvs) != 1 { - return xerrors.Errorf("seed: expected exactly one template version, got %d", len(tvs)) - } - if tpls[0].ActiveVersionID == uuid.Nil { - return xerrors.Errorf("seed: active version id is nil") - } - res := dbgen.WorkspaceResource(t, db, database.WorkspaceResource{ - JobID: prevJob.ID, - }) - agt := dbgen.WorkspaceAgent(t, db, database.WorkspaceAgent{ - ResourceID: res.ID, - }) - wa := dbgen.WorkspaceApp(t, db, database.WorkspaceApp{ - AgentID: agt.ID, - }) - _ = dbgen.WorkspaceBuild(t, db, database.WorkspaceBuild{ - BuildNumber: 1, - HasAITask: sql.NullBool{Valid: true, Bool: true}, - AITaskSidebarAppID: uuid.NullUUID{Valid: true, UUID: wa.ID}, - ID: w.ID, - InitiatorID: w.OwnerID, - JobID: prevJob.ID, - TemplateVersionID: tvs[0].ID, - Transition: database.WorkspaceTransitionStart, - WorkspaceID: w.ID, - }) - return nil - }, + name: "current build does not have ai task but previous build did", + seedFunc: seedPreviousWorkspaceStartWithAITask, transition: database.WorkspaceTransitionStop, input: &proto.CompletedJob_WorkspaceBuild{ AiTasks: []*sdkproto.AITask{}, @@ -3050,7 +2995,7 @@ func TestCompleteJob(t *testing.T) { ctx := testutil.Context(t, testutil.WaitShort) if tc.seedFunc != nil { - require.NoError(t, tc.seedFunc(ctx, db)) + require.NoError(t, tc.seedFunc(ctx, t, db)) } buildJobID := uuid.New() @@ -4321,3 +4266,63 @@ func (f *fakeUsageInserter) InsertDiscreteUsageEvent(_ context.Context, _ databa f.collectedEvents = append(f.collectedEvents, event) return nil } + +func seedPreviousWorkspaceStartWithAITask(ctx context.Context, t testing.TB, db database.Store) error { + t.Helper() + // If the below looks slightly convoluted, that's because it is. + // The workspace doesn't yet have a latest build, so querying all + // workspaces will fail. + tpls, err := db.GetTemplates(ctx) + if err != nil { + return xerrors.Errorf("seedFunc: get template: %w", err) + } + if len(tpls) != 1 { + return xerrors.Errorf("seedFunc: expected exactly one template, got %d", len(tpls)) + } + ws, err := db.GetWorkspacesByTemplateID(ctx, tpls[0].ID) + if err != nil { + return xerrors.Errorf("seedFunc: get workspaces: %w", err) + } + if len(ws) != 1 { + return xerrors.Errorf("seedFunc: expected exactly one workspace, got %d", len(ws)) + } + w := ws[0] + prevJob := dbgen.ProvisionerJob(t, db, nil, database.ProvisionerJob{ + OrganizationID: w.OrganizationID, + InitiatorID: w.OwnerID, + Type: database.ProvisionerJobTypeWorkspaceBuild, + }) + tvs, err := db.GetTemplateVersionsByTemplateID(ctx, database.GetTemplateVersionsByTemplateIDParams{ + TemplateID: tpls[0].ID, + }) + if err != nil { + return xerrors.Errorf("seedFunc: get template version: %w", err) + } + if len(tvs) != 1 { + return xerrors.Errorf("seedFunc: expected exactly one template version, got %d", len(tvs)) + } + if tpls[0].ActiveVersionID == uuid.Nil { + return xerrors.Errorf("seedFunc: active version id is nil") + } + res := dbgen.WorkspaceResource(t, db, database.WorkspaceResource{ + JobID: prevJob.ID, + }) + agt := dbgen.WorkspaceAgent(t, db, database.WorkspaceAgent{ + ResourceID: res.ID, + }) + wa := dbgen.WorkspaceApp(t, db, database.WorkspaceApp{ + AgentID: agt.ID, + }) + _ = dbgen.WorkspaceBuild(t, db, database.WorkspaceBuild{ + BuildNumber: 1, + HasAITask: sql.NullBool{Valid: true, Bool: true}, + AITaskSidebarAppID: uuid.NullUUID{Valid: true, UUID: wa.ID}, + ID: w.ID, + InitiatorID: w.OwnerID, + JobID: prevJob.ID, + TemplateVersionID: tvs[0].ID, + Transition: database.WorkspaceTransitionStart, + WorkspaceID: w.ID, + }) + return nil +} From bcc545949e344b23fcc3cdd4f8ad0198d730459a Mon Sep 17 00:00:00 2001 From: Cian Johnston Date: Wed, 27 Aug 2025 10:01:46 +0100 Subject: [PATCH 3/3] Apply suggestions from code review Co-authored-by: Mathias Fredriksson --- coderd/provisionerdserver/provisionerdserver.go | 6 +++--- coderd/provisionerdserver/provisionerdserver_test.go | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/coderd/provisionerdserver/provisionerdserver.go b/coderd/provisionerdserver/provisionerdserver.go index 1feb50170bfdd..938fdf1774008 100644 --- a/coderd/provisionerdserver/provisionerdserver.go +++ b/coderd/provisionerdserver/provisionerdserver.go @@ -1997,7 +1997,7 @@ func (s *server) completeWorkspaceBuildJob(ctx context.Context, job database.Pro // This is a hacky workaround for the issue with tasks 'disappearing' on stop: // reuse has_ai_task and sidebar_app_id from the previous build. - // It should be removed as soon as possible. + // This workaround should be removed as soon as possible. if workspaceBuild.Transition == database.WorkspaceTransitionStop && workspaceBuild.BuildNumber > 1 { if prevBuild, err := s.Database.GetWorkspaceBuildByWorkspaceIDAndBuildNumber(ctx, database.GetWorkspaceBuildByWorkspaceIDAndBuildNumberParams{ WorkspaceID: workspaceBuild.WorkspaceID, @@ -2006,7 +2006,7 @@ func (s *server) completeWorkspaceBuildJob(ctx context.Context, job database.Pro hasAITask = prevBuild.HasAITask.Bool sidebarAppID = prevBuild.AITaskSidebarAppID warnUnknownSidebarAppID = false - s.Logger.Warn(ctx, "hacky task workaround: reused has_ai_task and sidebar_app_id from previous build", + s.Logger.Debug(ctx, "task workaround: reused has_ai_task and sidebar_app_id from previous build to keep track of task", slog.F("job_id", job.ID.String()), slog.F("build_number", prevBuild.BuildNumber), slog.F("workspace_id", workspace.ID), @@ -2016,7 +2016,7 @@ func (s *server) completeWorkspaceBuildJob(ctx context.Context, job database.Pro slog.F("has_ai_task", hasAITask), ) } else { - s.Logger.Error(ctx, "hacky task workaround failed", + s.Logger.Error(ctx, "task workaround: tracking via has_ai_task and sidebar_app from previous build failed", slog.Error(err), slog.F("job_id", job.ID.String()), slog.F("workspace_id", workspace.ID), diff --git a/coderd/provisionerdserver/provisionerdserver_test.go b/coderd/provisionerdserver/provisionerdserver_test.go index 12b1e138cf12d..98af0bb86a73f 100644 --- a/coderd/provisionerdserver/provisionerdserver_test.go +++ b/coderd/provisionerdserver/provisionerdserver_test.go @@ -2842,7 +2842,7 @@ func TestCompleteJob(t *testing.T) { // has_ai_task has a default value of nil, but once the workspace build completes it will have a value; // it is set to "true" if the related template has any coder_ai_task resources defined, and its sidebar app ID // will be set as well in that case. - // HACK: we also set it to "true" if any _previous_ workspace builds ever had it set to "true". + // HACK(johnstcn): we also set it to "true" if any _previous_ workspace builds ever had it set to "true". // This is to avoid tasks "disappearing" when you stop them. t.Run("WorkspaceBuild", func(t *testing.T) { type testcase struct {