Skip to content

chore: cherry-pick remaining PRs into 2.22 #17851

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
May 15, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
feat: reinitialize agents when a prebuilt workspace is claimed (#17475)
This pull request allows coder workspace agents to be reinitialized when
a prebuilt workspace is claimed by a user. This facilitates the transfer
of ownership between the anonymous prebuilds system user and the new
owner of the workspace.

Only a single agent per prebuilt workspace is supported for now, but
plumbing has already been done to facilitate the seamless transition to
multi-agent support.

---------

Signed-off-by: Danny Kopping <dannykopping@gmail.com>
Co-authored-by: Danny Kopping <dannykopping@gmail.com>
Signed-off-by: Danny Kopping <dannykopping@gmail.com>
  • Loading branch information
SasSwart and dannykopping committed May 15, 2025
commit 93ce73976b9b0e631f62ce4fd7a0b7846ade3b04
8 changes: 7 additions & 1 deletion agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -363,9 +363,11 @@ func (a *agent) runLoop() {
if ctx.Err() != nil {
// Context canceled errors may come from websocket pings, so we
// don't want to use `errors.Is(err, context.Canceled)` here.
a.logger.Warn(ctx, "runLoop exited with error", slog.Error(ctx.Err()))
return
}
if a.isClosed() {
a.logger.Warn(ctx, "runLoop exited because agent is closed")
return
}
if errors.Is(err, io.EOF) {
Expand Down Expand Up @@ -1046,7 +1048,11 @@ func (a *agent) run() (retErr error) {
return a.statsReporter.reportLoop(ctx, aAPI)
})

return connMan.wait()
err = connMan.wait()
if err != nil {
a.logger.Info(context.Background(), "connection manager errored", slog.Error(err))
}
return err
}

// handleManifest returns a function that fetches and processes the manifest
Expand Down
93 changes: 60 additions & 33 deletions cli/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ import (
"cdr.dev/slog/sloggers/sloghuman"
"cdr.dev/slog/sloggers/slogjson"
"cdr.dev/slog/sloggers/slogstackdriver"
"github.com/coder/serpent"

"github.com/coder/coder/v2/agent"
"github.com/coder/coder/v2/agent/agentexec"
"github.com/coder/coder/v2/agent/agentssh"
Expand All @@ -33,7 +35,6 @@ import (
"github.com/coder/coder/v2/cli/clilog"
"github.com/coder/coder/v2/codersdk"
"github.com/coder/coder/v2/codersdk/agentsdk"
"github.com/coder/serpent"
)

func (r *RootCmd) workspaceAgent() *serpent.Command {
Expand Down Expand Up @@ -62,8 +63,10 @@ func (r *RootCmd) workspaceAgent() *serpent.Command {
// This command isn't useful to manually execute.
Hidden: true,
Handler: func(inv *serpent.Invocation) error {
ctx, cancel := context.WithCancel(inv.Context())
defer cancel()
ctx, cancel := context.WithCancelCause(inv.Context())
defer func() {
cancel(xerrors.New("agent exited"))
}()

var (
ignorePorts = map[int]string{}
Expand Down Expand Up @@ -280,7 +283,6 @@ func (r *RootCmd) workspaceAgent() *serpent.Command {
return xerrors.Errorf("add executable to $PATH: %w", err)
}

prometheusRegistry := prometheus.NewRegistry()
subsystemsRaw := inv.Environ.Get(agent.EnvAgentSubsystem)
subsystems := []codersdk.AgentSubsystem{}
for _, s := range strings.Split(subsystemsRaw, ",") {
Expand Down Expand Up @@ -324,28 +326,37 @@ func (r *RootCmd) workspaceAgent() *serpent.Command {
logger.Info(ctx, "agent devcontainer detection not enabled")
}

agnt := agent.New(agent.Options{
Client: client,
Logger: logger,
LogDir: logDir,
ScriptDataDir: scriptDataDir,
// #nosec G115 - Safe conversion as tailnet listen port is within uint16 range (0-65535)
TailnetListenPort: uint16(tailnetListenPort),
ExchangeToken: func(ctx context.Context) (string, error) {
if exchangeToken == nil {
return client.SDK.SessionToken(), nil
}
resp, err := exchangeToken(ctx)
if err != nil {
return "", err
}
client.SetSessionToken(resp.SessionToken)
return resp.SessionToken, nil
},
EnvironmentVariables: environmentVariables,
IgnorePorts: ignorePorts,
SSHMaxTimeout: sshMaxTimeout,
Subsystems: subsystems,
reinitEvents := agentsdk.WaitForReinitLoop(ctx, logger, client)

var (
lastErr error
mustExit bool
)
for {
prometheusRegistry := prometheus.NewRegistry()

agnt := agent.New(agent.Options{
Client: client,
Logger: logger,
LogDir: logDir,
ScriptDataDir: scriptDataDir,
// #nosec G115 - Safe conversion as tailnet listen port is within uint16 range (0-65535)
TailnetListenPort: uint16(tailnetListenPort),
ExchangeToken: func(ctx context.Context) (string, error) {
if exchangeToken == nil {
return client.SDK.SessionToken(), nil
}
resp, err := exchangeToken(ctx)
if err != nil {
return "", err
}
client.SetSessionToken(resp.SessionToken)
return resp.SessionToken, nil
},
EnvironmentVariables: environmentVariables,
IgnorePorts: ignorePorts,
SSHMaxTimeout: sshMaxTimeout,
Subsystems: subsystems,

PrometheusRegistry: prometheusRegistry,
BlockFileTransfer: blockFileTransfer,
Expand All @@ -354,15 +365,31 @@ func (r *RootCmd) workspaceAgent() *serpent.Command {
ExperimentalDevcontainersEnabled: experimentalDevcontainersEnabled,
})

promHandler := agent.PrometheusMetricsHandler(prometheusRegistry, logger)
prometheusSrvClose := ServeHandler(ctx, logger, promHandler, prometheusAddress, "prometheus")
defer prometheusSrvClose()
promHandler := agent.PrometheusMetricsHandler(prometheusRegistry, logger)
prometheusSrvClose := ServeHandler(ctx, logger, promHandler, prometheusAddress, "prometheus")

debugSrvClose := ServeHandler(ctx, logger, agnt.HTTPDebug(), debugAddress, "debug")

debugSrvClose := ServeHandler(ctx, logger, agnt.HTTPDebug(), debugAddress, "debug")
defer debugSrvClose()
select {
case <-ctx.Done():
logger.Info(ctx, "agent shutting down", slog.Error(context.Cause(ctx)))
mustExit = true
case event := <-reinitEvents:
logger.Info(ctx, "agent received instruction to reinitialize",
slog.F("workspace_id", event.WorkspaceID), slog.F("reason", event.Reason))
}

lastErr = agnt.Close()
debugSrvClose()
prometheusSrvClose()

<-ctx.Done()
return agnt.Close()
if mustExit {
break
}

logger.Info(ctx, "agent reinitializing")
}
return lastErr
},
}

Expand Down
45 changes: 45 additions & 0 deletions coderd/apidoc/docs.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

37 changes: 37 additions & 0 deletions coderd/apidoc/swagger.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion coderd/coderd.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ import (
"sync/atomic"
"time"

"github.com/coder/coder/v2/coderd/prebuilds"

"github.com/andybalholm/brotli"
"github.com/go-chi/chi/v5"
"github.com/go-chi/chi/v5/middleware"
Expand All @@ -45,7 +47,6 @@ import (
"github.com/coder/coder/v2/coderd/entitlements"
"github.com/coder/coder/v2/coderd/files"
"github.com/coder/coder/v2/coderd/idpsync"
"github.com/coder/coder/v2/coderd/prebuilds"
"github.com/coder/coder/v2/coderd/runtimeconfig"
"github.com/coder/coder/v2/coderd/webpush"

Expand Down Expand Up @@ -1278,6 +1279,7 @@ func New(options *Options) *API {
r.Get("/external-auth", api.workspaceAgentsExternalAuth)
r.Get("/gitsshkey", api.agentGitSSHKey)
r.Post("/log-source", api.workspaceAgentPostLogSource)
r.Get("/reinit", api.workspaceAgentReinit)
})
r.Route("/{workspaceagent}", func(r chi.Router) {
r.Use(
Expand Down
63 changes: 63 additions & 0 deletions coderd/coderdtest/coderdtest.go
Original file line number Diff line number Diff line change
Expand Up @@ -1105,6 +1105,69 @@ func (w WorkspaceAgentWaiter) MatchResources(m func([]codersdk.WorkspaceResource
return w
}

// WaitForAgentFn represents a boolean assertion to be made against each agent
// that a given WorkspaceAgentWaited knows about. Each WaitForAgentFn should apply
// the check to a single agent, but it should be named for plural, because `func (w WorkspaceAgentWaiter) WaitFor`
// applies the check to all agents that it is aware of. This ensures that the public API of the waiter
// reads correctly. For example:
//
// waiter := coderdtest.NewWorkspaceAgentWaiter(t, client, r.Workspace.ID)
// waiter.WaitFor(coderdtest.AgentsReady)
type WaitForAgentFn func(agent codersdk.WorkspaceAgent) bool

// AgentsReady checks that the latest lifecycle state of an agent is "Ready".
func AgentsReady(agent codersdk.WorkspaceAgent) bool {
return agent.LifecycleState == codersdk.WorkspaceAgentLifecycleReady
}

// AgentsNotReady checks that the latest lifecycle state of an agent is anything except "Ready".
func AgentsNotReady(agent codersdk.WorkspaceAgent) bool {
return !AgentsReady(agent)
}

func (w WorkspaceAgentWaiter) WaitFor(criteria ...WaitForAgentFn) {
w.t.Helper()

agentNamesMap := make(map[string]struct{}, len(w.agentNames))
for _, name := range w.agentNames {
agentNamesMap[name] = struct{}{}
}

ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
defer cancel()

w.t.Logf("waiting for workspace agents (workspace %s)", w.workspaceID)
require.Eventually(w.t, func() bool {
var err error
workspace, err := w.client.Workspace(ctx, w.workspaceID)
if err != nil {
return false
}
if workspace.LatestBuild.Job.CompletedAt == nil {
return false
}
if workspace.LatestBuild.Job.CompletedAt.IsZero() {
return false
}

for _, resource := range workspace.LatestBuild.Resources {
for _, agent := range resource.Agents {
if len(w.agentNames) > 0 {
if _, ok := agentNamesMap[agent.Name]; !ok {
continue
}
}
for _, criterium := range criteria {
if !criterium(agent) {
return false
}
}
}
}
return true
}, testutil.WaitLong, testutil.IntervalMedium)
}

// Wait waits for the agent(s) to connect and fails the test if they do not within testutil.WaitLong
func (w WorkspaceAgentWaiter) Wait() []codersdk.WorkspaceResource {
w.t.Helper()
Expand Down
9 changes: 9 additions & 0 deletions coderd/database/dbauthz/dbauthz.go
Original file line number Diff line number Diff line change
Expand Up @@ -3001,6 +3001,15 @@ func (q *querier) GetWorkspaceAgentsByResourceIDs(ctx context.Context, ids []uui
return q.db.GetWorkspaceAgentsByResourceIDs(ctx, ids)
}

func (q *querier) GetWorkspaceAgentsByWorkspaceAndBuildNumber(ctx context.Context, arg database.GetWorkspaceAgentsByWorkspaceAndBuildNumberParams) ([]database.WorkspaceAgent, error) {
_, err := q.GetWorkspaceByID(ctx, arg.WorkspaceID)
if err != nil {
return nil, err
}

return q.db.GetWorkspaceAgentsByWorkspaceAndBuildNumber(ctx, arg)
}

func (q *querier) GetWorkspaceAgentsCreatedAfter(ctx context.Context, createdAt time.Time) ([]database.WorkspaceAgent, error) {
if err := q.authorizeContext(ctx, policy.ActionRead, rbac.ResourceSystem); err != nil {
return nil, err
Expand Down
Loading