Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
c09c9b9
WIP: agent reinitialization
SasSwart Apr 21, 2025
476fe71
fix assignment to nil map
SasSwart Apr 21, 2025
8c8bca6
fix: ensure prebuilt workspace agent tokens are reused when a prebuil…
SasSwart Apr 23, 2025
7ce4eea
test agent reinitialization
SasSwart Apr 24, 2025
52ac64e
remove defunct metric
SasSwart Apr 24, 2025
362db7c
Remove todo
SasSwart Apr 25, 2025
dcc7379
test that we trigger workspace agent reinitialization under the right…
SasSwart Apr 28, 2025
ff66b3f
slight improvements to a test
SasSwart Apr 28, 2025
efff5d9
review notes to improve legibility
SasSwart Apr 28, 2025
cebd5db
add an integration test for prebuilt workspace agent reinitialization
SasSwart Apr 29, 2025
2679138
Merge remote-tracking branch 'origin/main' into jjs/prebuilds-agent-r…
SasSwart Apr 29, 2025
9feebef
enable the premium license in a prebuilds integration test
SasSwart Apr 29, 2025
b117b5c
encapsulate WaitForReinitLoop for easier testing
SasSwart Apr 30, 2025
a22b414
introduce unit testable abstraction layers
SasSwart Apr 30, 2025
9bbd2c7
test workspace claim pubsub
SasSwart May 1, 2025
5804201
add tests for agent reinitialization
SasSwart May 1, 2025
7e8dcee
review notes
SasSwart May 1, 2025
725f97b
Merge remote-tracking branch 'origin/main' into jjs/prebuilds-agent-r…
SasSwart May 1, 2025
a9b1567
make fmt lint
SasSwart May 1, 2025
21ee970
remove go mod replace
SasSwart May 1, 2025
e54d7e7
remove defunct logging
SasSwart May 1, 2025
2799858
update dependency on terraform-provider-coder
SasSwart May 2, 2025
1d93003
update dependency on terraform-provider-coder
SasSwart May 2, 2025
763fc12
go mod tidy
SasSwart May 2, 2025
0f879c7
make -B gen
SasSwart May 2, 2025
61784c9
dont require ids to InsertPresetParameters
SasSwart May 2, 2025
604eb27
dont require ids to InsertPresetParameters
SasSwart May 2, 2025
bf4d2cf
fix: set the running agent token
dannykopping May 2, 2025
38b4f0d
fix: use http client without timeout like we do in connectRPCVersion
dannykopping May 5, 2025
20df538
review notes
SasSwart May 6, 2025
4bb3b68
Merge remote-tracking branch 'origin/main' into jjs/prebuilds-agent-r…
SasSwart May 7, 2025
83972db
bump provisionerd proto version
SasSwart May 7, 2025
146b158
fix: fetch the previous agent when we need its token for prebuilt wor…
SasSwart May 12, 2025
5eb16cd
Merge remote-tracking branch 'origin/main' into jjs/prebuilds-agent-r…
SasSwart May 12, 2025
730d803
make -B lint
SasSwart May 12, 2025
150adc0
Test GetWorkspaceAgentsByBuildID
SasSwart May 12, 2025
b4ecf10
Rename GetWorkspaceAgentsByWorkspaceAndBuildNumber
SasSwart May 12, 2025
3fa3edf
make gen
SasSwart May 12, 2025
7e45919
fix a race condition
SasSwart May 12, 2025
a632508
Merge remote-tracking branch 'origin/main' into jjs/prebuilds-agent-r…
SasSwart May 12, 2025
72125ec
Merge remote-tracking branch 'origin/main' into jjs/prebuilds-agent-r…
SasSwart May 13, 2025
b65eea7
fix provisionerdserver test for prebuild claims
SasSwart May 13, 2025
e1339f3
fix race conditions
SasSwart May 13, 2025
c1a8ba6
Merge remote-tracking branch 'origin/main' into jjs/prebuilds-agent-r…
SasSwart May 13, 2025
5363dcc
Make TestReinitializeAgent more robust
SasSwart May 13, 2025
7ad9b6d
fix tests
SasSwart May 14, 2025
394571d
make -B gen
SasSwart May 14, 2025
890747b
remove a potential race in reinitialization testing in TestCompleteJob
SasSwart May 14, 2025
b3870db
fix a potential race in TestReinit
SasSwart May 14, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
fix: fetch the previous agent when we need its token for prebuilt wor…
…kspaces
  • Loading branch information
SasSwart committed May 12, 2025
commit 146b15857c5f5637168897dddb94940d94fa6a66
9 changes: 9 additions & 0 deletions coderd/database/dbauthz/dbauthz.go
Original file line number Diff line number Diff line change
Expand Up @@ -3012,6 +3012,15 @@ func (q *querier) GetWorkspaceAgentUsageStatsAndLabels(ctx context.Context, crea
return q.db.GetWorkspaceAgentUsageStatsAndLabels(ctx, createdAt)
}

func (q *querier) GetWorkspaceAgentsByBuildID(ctx context.Context, arg database.GetWorkspaceAgentsByBuildIDParams) ([]database.WorkspaceAgent, error) {
_, err := q.GetWorkspaceByID(ctx, arg.WorkspaceID)
if err != nil {
return nil, err
}

return q.db.GetWorkspaceAgentsByBuildID(ctx, arg)
}

// GetWorkspaceAgentsByResourceIDs
// The workspace/job is already fetched.
func (q *querier) GetWorkspaceAgentsByResourceIDs(ctx context.Context, ids []uuid.UUID) ([]database.WorkspaceAgent, error) {
Expand Down
24 changes: 24 additions & 0 deletions coderd/database/dbmem/dbmem.go
Original file line number Diff line number Diff line change
Expand Up @@ -7641,6 +7641,30 @@ func (q *FakeQuerier) GetWorkspaceAgentUsageStatsAndLabels(_ context.Context, cr
return stats, nil
}

func (q *FakeQuerier) GetWorkspaceAgentsByBuildID(ctx context.Context, arg database.GetWorkspaceAgentsByBuildIDParams) ([]database.WorkspaceAgent, error) {
err := validateDatabaseType(arg)
if err != nil {
return nil, err
}

build, err := q.GetWorkspaceBuildByWorkspaceIDAndBuildNumber(ctx, database.GetWorkspaceBuildByWorkspaceIDAndBuildNumberParams(arg))
if err != nil {
return nil, err
}

resources, err := q.getWorkspaceResourcesByJobIDNoLock(ctx, build.JobID)
if err != nil {
return nil, err
}

var resourceIDs []uuid.UUID
for _, resource := range resources {
resourceIDs = append(resourceIDs, resource.ID)
}

return q.GetWorkspaceAgentsByResourceIDs(ctx, resourceIDs)
}

func (q *FakeQuerier) GetWorkspaceAgentsByResourceIDs(ctx context.Context, resourceIDs []uuid.UUID) ([]database.WorkspaceAgent, error) {
q.mutex.RLock()
defer q.mutex.RUnlock()
Expand Down
7 changes: 7 additions & 0 deletions coderd/database/dbmetrics/querymetrics.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 15 additions & 0 deletions coderd/database/dbmock/dbmock.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions coderd/database/querier.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

74 changes: 74 additions & 0 deletions coderd/database/queries.sql.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 13 additions & 0 deletions coderd/database/queries/workspaceagents.sql
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,19 @@ WHERE
wb.workspace_id = @workspace_id :: uuid
);

-- name: GetWorkspaceAgentsByBuildID :many
SELECT
workspace_agents.*
FROM
workspace_agents
JOIN
workspace_resources ON workspace_agents.resource_id = workspace_resources.id
JOIN
workspace_builds ON workspace_resources.job_id = workspace_builds.job_id
WHERE
workspace_builds.workspace_id = @workspace_id :: uuid AND
workspace_builds.build_number = @build_number :: int;

-- name: GetWorkspaceAgentAndLatestBuildByAuthToken :one
SELECT
sqlc.embed(workspaces),
Expand Down
19 changes: 9 additions & 10 deletions coderd/prebuilds/claim_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,20 +18,19 @@ import (

func TestPubsubWorkspaceClaimPublisher(t *testing.T) {
t.Parallel()
t.Run("publish claim", func(t *testing.T) {
t.Run("published claim is received by a listener for the same workspace", func(t *testing.T) {
t.Parallel()

ctx := testutil.Context(t, testutil.WaitShort)
logger := testutil.Logger(t)
ps := pubsub.NewInMemory()
publisher := prebuilds.NewPubsubWorkspaceClaimPublisher(ps)

workspaceID := uuid.New()
userID := uuid.New()
reinitEvents := make(chan agentsdk.ReinitializationEvent, 1)
publisher := prebuilds.NewPubsubWorkspaceClaimPublisher(ps)
listener := prebuilds.NewPubsubWorkspaceClaimListener(ps, logger)

userIDCh := make(chan uuid.UUID, 1)
channel := agentsdk.PrebuildClaimedChannel(workspaceID)
cancel, err := ps.Subscribe(channel, func(ctx context.Context, message []byte) {
userIDCh <- uuid.MustParse(string(message))
})
cancel, err := listener.ListenForWorkspaceClaims(ctx, workspaceID, reinitEvents)
require.NoError(t, err)
defer cancel()

Expand All @@ -43,8 +42,8 @@ func TestPubsubWorkspaceClaimPublisher(t *testing.T) {
err = publisher.PublishWorkspaceClaim(claim)
require.NoError(t, err)

gotUserID := testutil.TryReceive(testutil.Context(t, testutil.WaitShort), t, userIDCh)
require.Equal(t, userID, gotUserID)
gotUserID := testutil.RequireReceive(testutil.Context(t, testutil.WaitShort), t, reinitEvents)
require.Equal(t, userID, gotUserID.UserID)
})

t.Run("fail to publish claim", func(t *testing.T) {
Expand Down
6 changes: 5 additions & 1 deletion coderd/provisionerdserver/provisionerdserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -627,7 +627,11 @@ func (s *server) acquireProtoJob(ctx context.Context, job database.ProvisionerJo
// reused. Context: the agent token is often used in immutable attributes of workspace resource (e.g. VM/container)
// to initialize the agent, so if that value changes it will necessitate a replacement of that resource, thus
// obviating the whole point of the prebuild.
agents, err := s.Database.GetWorkspaceAgentsInLatestBuildByWorkspaceID(ctx, workspace.ID)
agents, err := s.Database.GetWorkspaceAgentsByBuildID(ctx, database.GetWorkspaceAgentsByBuildIDParams{
WorkspaceID: workspace.ID,
BuildNumber: 1,
})

if err != nil {
s.Logger.Error(ctx, "failed to retrieve running agents of claimed prebuilt workspace",
slog.F("workspace_id", workspace.ID), slog.Error(err))
Expand Down
28 changes: 16 additions & 12 deletions coderd/provisionerdserver/provisionerdserver_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"storj.io/drpc"

"cdr.dev/slog/sloggers/slogtest"
"github.com/coder/coder/v2/coderd/prebuilds"
"github.com/coder/coder/v2/coderd/rbac"
"github.com/coder/coder/v2/codersdk/agentsdk"
"github.com/coder/quartz"
Expand Down Expand Up @@ -1823,17 +1824,12 @@ func TestCompleteJob(t *testing.T) {
require.NoError(t, err)

// GIVEN something is listening to process workspace reinitialization:

eventName := agentsdk.PrebuildClaimedChannel(workspace.ID)
reinitChan := make(chan []byte, 1)
cancel, err := ps.Subscribe(eventName, func(inner context.Context, userIDMessage []byte) {
reinitChan <- userIDMessage
})
reinitChan := make(chan agentsdk.ReinitializationEvent, 1) // Buffered to simplify test structure
cancel, err := prebuilds.NewPubsubWorkspaceClaimListener(ps, testutil.Logger(t)).ListenForWorkspaceClaims(ctx, workspace.ID, reinitChan)
require.NoError(t, err)
defer cancel()

// WHEN the job is completed

completedJob := proto.CompletedJob{
JobId: job.ID.String(),
Type: &proto.CompletedJob_WorkspaceBuild_{
Expand All @@ -1844,13 +1840,11 @@ func TestCompleteJob(t *testing.T) {
require.NoError(t, err)

select {
case userIDMessage := <-reinitChan:
case reinitEvent := <-reinitChan:
// THEN workspace agent reinitialization instruction was received:
gotUserID, err := uuid.ParseBytes(userIDMessage)
require.NoError(t, err)
require.True(t, tc.shouldReinitializeAgent)
require.Equal(t, userID, gotUserID)
case <-ctx.Done():
require.Equal(t, userID, reinitEvent.UserID)
default:
// THEN workspace agent reinitialization instruction was not received.
require.False(t, tc.shouldReinitializeAgent)
}
Expand Down Expand Up @@ -2953,3 +2947,13 @@ func (s *fakeStream) cancel() {
s.canceled = true
s.c.Broadcast()
}

type pubsubReinitSpy struct {
pubsub.Pubsub
subscriptions chan string
}

func (p pubsubReinitSpy) Subscribe(event string, listener pubsub.Listener) (cancel func(), err error) {
p.subscriptions <- event
return p.Pubsub.Subscribe(event, listener)
}
2 changes: 2 additions & 0 deletions coderd/workspaceagents.go
Original file line number Diff line number Diff line change
Expand Up @@ -1227,6 +1227,8 @@ func (api *API) workspaceAgentReinit(rw http.ResponseWriter, r *http.Request) {
log.Info(ctx, "agent reinitialization subscription closed", slog.F("workspace_agent_id", workspaceAgent.ID))
case errors.Is(err, agentsdk.ErrTransmissionTargetClosed):
log.Info(ctx, "agent connection closed", slog.F("workspace_agent_id", workspaceAgent.ID))
case errors.Is(err, context.Canceled):
log.Info(ctx, "agent reinitialization", slog.Error(err))
case err != nil:
log.Error(ctx, "failed to stream agent reinit events", slog.Error(err))
httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{
Expand Down
Loading