-
Notifications
You must be signed in to change notification settings - Fork 874
feat: reinitialize agents when a prebuilt workspace is claimed #17475
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
c09c9b9
476fe71
8c8bca6
7ce4eea
52ac64e
362db7c
dcc7379
ff66b3f
efff5d9
cebd5db
2679138
9feebef
b117b5c
a22b414
9bbd2c7
5804201
7e8dcee
725f97b
a9b1567
21ee970
e54d7e7
2799858
1d93003
763fc12
0f879c7
61784c9
604eb27
bf4d2cf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -36,6 +36,9 @@ import ( | |
"tailscale.com/util/clientmetric" | ||
|
||
"cdr.dev/slog" | ||
|
||
"github.com/coder/retry" | ||
|
||
"github.com/coder/clistat" | ||
"github.com/coder/coder/v2/agent/agentcontainers" | ||
"github.com/coder/coder/v2/agent/agentexec" | ||
|
@@ -53,7 +56,6 @@ import ( | |
"github.com/coder/coder/v2/tailnet" | ||
tailnetproto "github.com/coder/coder/v2/tailnet/proto" | ||
"github.com/coder/quartz" | ||
"github.com/coder/retry" | ||
) | ||
|
||
const ( | ||
|
@@ -363,9 +365,11 @@ func (a *agent) runLoop() { | |
if ctx.Err() != nil { | ||
// Context canceled errors may come from websocket pings, so we | ||
// don't want to use `errors.Is(err, context.Canceled)` here. | ||
a.logger.Warn(ctx, "runLoop exited with error", slog.Error(ctx.Err())) | ||
return | ||
} | ||
if a.isClosed() { | ||
a.logger.Warn(ctx, "runLoop exited because agent is closed") | ||
return | ||
} | ||
if errors.Is(err, io.EOF) { | ||
|
@@ -1046,7 +1050,11 @@ func (a *agent) run() (retErr error) { | |
return a.statsReporter.reportLoop(ctx, aAPI) | ||
}) | ||
|
||
return connMan.wait() | ||
err = connMan.wait() | ||
if err != nil { | ||
a.logger.Warn(context.Background(), "connection manager errored", slog.Error(err)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should be info or debug, as it can throw benign error like being closed or context canceled. |
||
} | ||
return err | ||
} | ||
|
||
// handleManifest returns a function that fetches and processes the manifest | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,6 +25,8 @@ import ( | |
"cdr.dev/slog/sloggers/sloghuman" | ||
"cdr.dev/slog/sloggers/slogjson" | ||
"cdr.dev/slog/sloggers/slogstackdriver" | ||
"github.com/coder/serpent" | ||
|
||
"github.com/coder/coder/v2/agent" | ||
"github.com/coder/coder/v2/agent/agentexec" | ||
"github.com/coder/coder/v2/agent/agentssh" | ||
|
@@ -33,7 +35,6 @@ import ( | |
"github.com/coder/coder/v2/cli/clilog" | ||
"github.com/coder/coder/v2/codersdk" | ||
"github.com/coder/coder/v2/codersdk/agentsdk" | ||
"github.com/coder/serpent" | ||
) | ||
|
||
func (r *RootCmd) workspaceAgent() *serpent.Command { | ||
|
@@ -62,8 +63,10 @@ func (r *RootCmd) workspaceAgent() *serpent.Command { | |
// This command isn't useful to manually execute. | ||
Hidden: true, | ||
Handler: func(inv *serpent.Invocation) error { | ||
ctx, cancel := context.WithCancel(inv.Context()) | ||
defer cancel() | ||
ctx, cancel := context.WithCancelCause(inv.Context()) | ||
defer func() { | ||
cancel(xerrors.New("agent exited")) | ||
}() | ||
|
||
var ( | ||
ignorePorts = map[int]string{} | ||
|
@@ -280,7 +283,6 @@ func (r *RootCmd) workspaceAgent() *serpent.Command { | |
return xerrors.Errorf("add executable to $PATH: %w", err) | ||
} | ||
|
||
prometheusRegistry := prometheus.NewRegistry() | ||
subsystemsRaw := inv.Environ.Get(agent.EnvAgentSubsystem) | ||
subsystems := []codersdk.AgentSubsystem{} | ||
for _, s := range strings.Split(subsystemsRaw, ",") { | ||
|
@@ -324,45 +326,69 @@ func (r *RootCmd) workspaceAgent() *serpent.Command { | |
logger.Info(ctx, "agent devcontainer detection not enabled") | ||
} | ||
|
||
agnt := agent.New(agent.Options{ | ||
Client: client, | ||
Logger: logger, | ||
LogDir: logDir, | ||
ScriptDataDir: scriptDataDir, | ||
// #nosec G115 - Safe conversion as tailnet listen port is within uint16 range (0-65535) | ||
TailnetListenPort: uint16(tailnetListenPort), | ||
ExchangeToken: func(ctx context.Context) (string, error) { | ||
if exchangeToken == nil { | ||
return client.SDK.SessionToken(), nil | ||
} | ||
resp, err := exchangeToken(ctx) | ||
if err != nil { | ||
return "", err | ||
} | ||
client.SetSessionToken(resp.SessionToken) | ||
return resp.SessionToken, nil | ||
}, | ||
EnvironmentVariables: environmentVariables, | ||
IgnorePorts: ignorePorts, | ||
SSHMaxTimeout: sshMaxTimeout, | ||
Subsystems: subsystems, | ||
|
||
PrometheusRegistry: prometheusRegistry, | ||
BlockFileTransfer: blockFileTransfer, | ||
Execer: execer, | ||
|
||
ExperimentalDevcontainersEnabled: experimentalDevcontainersEnabled, | ||
}) | ||
|
||
promHandler := agent.PrometheusMetricsHandler(prometheusRegistry, logger) | ||
prometheusSrvClose := ServeHandler(ctx, logger, promHandler, prometheusAddress, "prometheus") | ||
defer prometheusSrvClose() | ||
|
||
debugSrvClose := ServeHandler(ctx, logger, agnt.HTTPDebug(), debugAddress, "debug") | ||
defer debugSrvClose() | ||
|
||
<-ctx.Done() | ||
return agnt.Close() | ||
reinitEvents := agentsdk.WaitForReinitLoop(ctx, logger, client) | ||
|
||
var ( | ||
lastErr error | ||
mustExit bool | ||
) | ||
for { | ||
prometheusRegistry := prometheus.NewRegistry() | ||
|
||
agnt := agent.New(agent.Options{ | ||
Client: client, | ||
Logger: logger, | ||
LogDir: logDir, | ||
ScriptDataDir: scriptDataDir, | ||
// #nosec G115 - Safe conversion as tailnet listen port is within uint16 range (0-65535) | ||
TailnetListenPort: uint16(tailnetListenPort), | ||
ExchangeToken: func(ctx context.Context) (string, error) { | ||
if exchangeToken == nil { | ||
return client.SDK.SessionToken(), nil | ||
} | ||
resp, err := exchangeToken(ctx) | ||
if err != nil { | ||
return "", err | ||
} | ||
client.SetSessionToken(resp.SessionToken) | ||
return resp.SessionToken, nil | ||
}, | ||
EnvironmentVariables: environmentVariables, | ||
IgnorePorts: ignorePorts, | ||
SSHMaxTimeout: sshMaxTimeout, | ||
Subsystems: subsystems, | ||
|
||
PrometheusRegistry: prometheusRegistry, | ||
BlockFileTransfer: blockFileTransfer, | ||
Execer: execer, | ||
ExperimentalDevcontainersEnabled: experimentalDevcontainersEnabled, | ||
}) | ||
|
||
promHandler := agent.PrometheusMetricsHandler(prometheusRegistry, logger) | ||
prometheusSrvClose := ServeHandler(ctx, logger, promHandler, prometheusAddress, "prometheus") | ||
|
||
debugSrvClose := ServeHandler(ctx, logger, agnt.HTTPDebug(), debugAddress, "debug") | ||
|
||
select { | ||
case <-ctx.Done(): | ||
logger.Warn(ctx, "agent shutting down", slog.Error(ctx.Err()), slog.Error(context.Cause(ctx))) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Warn seems alarmist; lots of legit reasons to shut down. Also, it's unexpected to include more than one call to |
||
mustExit = true | ||
case event := <-reinitEvents: | ||
logger.Warn(ctx, "agent received instruction to reinitialize", | ||
slog.F("user_id", event.UserID), slog.F("workspace_id", event.WorkspaceID), slog.F("reason", event.Reason)) | ||
} | ||
|
||
lastErr = agnt.Close() | ||
debugSrvClose() | ||
prometheusSrvClose() | ||
|
||
if mustExit { | ||
break | ||
} | ||
|
||
logger.Info(ctx, "agent reinitializing") | ||
} | ||
return lastErr | ||
}, | ||
} | ||
|
||
|
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
inconsistent formatting of imports