Skip to content

Commit a84e467

Browse files
committed
feat: Add agent state reporting
1 parent 5abf555 commit a84e467

File tree

7 files changed

+156
-12
lines changed

7 files changed

+156
-12
lines changed

agent/agent.go

Lines changed: 70 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ type Client interface {
7171
WorkspaceAgentMetadata(ctx context.Context) (codersdk.WorkspaceAgentMetadata, error)
7272
ListenWorkspaceAgent(ctx context.Context) (net.Conn, error)
7373
AgentReportStats(ctx context.Context, log slog.Logger, stats func() *codersdk.AgentStats) (io.Closer, error)
74+
PostWorkspaceAgentState(ctx context.Context, state codersdk.PostWorkspaceAgentStateRequest) error
7475
PostWorkspaceAgentAppHealth(ctx context.Context, req codersdk.PostWorkspaceAppHealthsRequest) error
7576
PostWorkspaceAgentVersion(ctx context.Context, version string) error
7677
}
@@ -127,6 +128,9 @@ type agent struct {
127128
sessionToken atomic.Pointer[string]
128129
sshServer *ssh.Server
129130

131+
stateMu sync.Mutex // Protects following.
132+
state codersdk.WorkspaceAgentState
133+
130134
network *tailnet.Conn
131135
}
132136

@@ -156,6 +160,30 @@ func (a *agent) runLoop(ctx context.Context) {
156160
}
157161
}
158162

163+
func (a *agent) setState(ctx context.Context, state codersdk.WorkspaceAgentState) {
164+
a.stateMu.Lock()
165+
defer a.stateMu.Unlock()
166+
167+
a.state = state
168+
169+
var err error
170+
for r := retry.New(time.Second, 30*time.Second); r.Wait(ctx); {
171+
err = a.client.PostWorkspaceAgentState(ctx, codersdk.PostWorkspaceAgentStateRequest{
172+
State: state,
173+
})
174+
if err == nil {
175+
return
176+
}
177+
}
178+
if xerrors.Is(err, context.Canceled) || xerrors.Is(err, context.DeadlineExceeded) || a.isClosed() {
179+
return
180+
}
181+
if err != nil {
182+
// If we fail to report the state we probably shouldn't exit, log only.
183+
a.logger.Error(ctx, "post state", slog.Error(err))
184+
}
185+
}
186+
159187
func (a *agent) run(ctx context.Context) error {
160188
// This allows the agent to refresh it's token if necessary.
161189
// For instance identity this is required, since the instance
@@ -180,22 +208,55 @@ func (a *agent) run(ctx context.Context) error {
180208

181209
// The startup script should only execute on the first run!
182210
if oldMetadata == nil {
211+
scriptDone := make(chan error, 1)
212+
scriptStart := time.Now()
213+
go func() {
214+
defer close(scriptDone)
215+
scriptDone <- a.runStartupScript(ctx, metadata.StartupScript)
216+
}()
183217
go func() {
184-
err := a.runStartupScript(ctx, metadata.StartupScript)
218+
var timeout <-chan time.Time
219+
// If timeout is zero, an older version of the coder
220+
// provider was used. Otherwise a timeout is always > 0.
221+
if metadata.StartupScriptTimeout > 0 {
222+
t := time.NewTimer(metadata.StartupScriptTimeout)
223+
defer t.Stop()
224+
timeout = t.C
225+
}
226+
227+
a.setState(ctx, codersdk.WorkspaceAgentStateStarting)
228+
229+
var err error
230+
select {
231+
case err = <-scriptDone:
232+
case <-timeout:
233+
a.logger.Warn(ctx, "startup script timed out")
234+
a.setState(ctx, codersdk.WorkspaceAgentStateStartTimeout)
235+
err = <-scriptDone // The script can still complete after a timeout.
236+
}
185237
if errors.Is(err, context.Canceled) {
186238
return
187239
}
240+
execTime := time.Since(scriptStart)
188241
if err != nil {
189-
a.logger.Warn(ctx, "agent script failed", slog.Error(err))
242+
a.logger.Warn(ctx, "startup script failed", slog.F("execution_time", execTime), slog.Error(err))
243+
a.setState(ctx, codersdk.WorkspaceAgentStateStartError)
244+
return
190245
}
191-
}()
192-
}
246+
a.logger.Info(ctx, "startup script completed", slog.F("execution_time", execTime))
193247

194-
if metadata.GitAuthConfigs > 0 {
195-
err = gitauth.OverrideVSCodeConfigs(a.filesystem)
196-
if err != nil {
197-
return xerrors.Errorf("override vscode configuration for git auth: %w", err)
198-
}
248+
// Perform overrides after startup script has completed to ensure
249+
// there is no conflict with the user's scripts. We also want to
250+
// ensure this is done before the workspace is marked as ready.
251+
if metadata.GitAuthConfigs > 0 {
252+
err = gitauth.OverrideVSCodeConfigs(a.filesystem)
253+
if err != nil {
254+
a.logger.Warn(ctx, "failed to override vscode git auth configs", slog.Error(err))
255+
}
256+
}
257+
258+
a.setState(ctx, codersdk.WorkspaceAgentStateReady)
259+
}()
199260
}
200261

201262
// This automatically closes when the context ends!

agent/agent_test.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1130,6 +1130,10 @@ func (c *client) AgentReportStats(ctx context.Context, _ slog.Logger, stats func
11301130
}), nil
11311131
}
11321132

1133+
func (*client) PostWorkspaceAgentState(_ context.Context, _ codersdk.PostWorkspaceAgentStateRequest) error {
1134+
return nil
1135+
}
1136+
11331137
func (*client) PostWorkspaceAgentAppHealth(_ context.Context, _ codersdk.PostWorkspaceAppHealthsRequest) error {
11341138
return nil
11351139
}

coderd/coderd.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -541,6 +541,7 @@ func New(options *Options) *API {
541541
r.Get("/gitsshkey", api.agentGitSSHKey)
542542
r.Get("/coordinate", api.workspaceAgentCoordinate)
543543
r.Post("/report-stats", api.workspaceAgentReportStats)
544+
r.Post("/report-state", api.workspaceAgentReportState)
544545
})
545546
r.Route("/{workspaceagent}", func(r chi.Router) {
546547
r.Use(

coderd/database/databasefake/databasefake.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4295,12 +4295,12 @@ func (q *fakeQuerier) GetQuotaConsumedForUser(_ context.Context, userID uuid.UUI
42954295
return sum, nil
42964296
}
42974297

4298-
func (q *fakeQuerier) UpdateWorkspaceAgentStateByID(_ context.Context, id uuid.UUID, state database.AgentState) error {
4298+
func (q *fakeQuerier) UpdateWorkspaceAgentStateByID(_ context.Context, arg database.UpdateWorkspaceAgentStateByIDParams) error {
42994299
q.mutex.Lock()
43004300
defer q.mutex.Unlock()
43014301
for i, agent := range q.workspaceAgents {
4302-
if agent.ID == id {
4303-
agent.State = state
4302+
if agent.ID == arg.ID {
4303+
agent.State = arg.State
43044304
q.workspaceAgents[i] = agent
43054305
return nil
43064306
}

coderd/workspaceagents.go

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -900,6 +900,50 @@ func (api *API) workspaceAgentReportStats(rw http.ResponseWriter, r *http.Reques
900900
})
901901
}
902902

903+
// @Summary Submit workspace agent state
904+
// @ID submit-workspace-agent-state
905+
// @Security CoderSessionToken
906+
// @Accept json
907+
// @Tags Agents
908+
// @Param request body codersdk.PostWorkspaceAgentStateRequest true "Workspace agent state request"
909+
// @Success 204 "Success"
910+
// @Router /workspaceagents/me/report-state [post]
911+
func (api *API) workspaceAgentReportState(rw http.ResponseWriter, r *http.Request) {
912+
ctx := r.Context()
913+
914+
workspaceAgent := httpmw.WorkspaceAgent(r)
915+
workspace, err := api.Database.GetWorkspaceByAgentID(ctx, workspaceAgent.ID)
916+
if err != nil {
917+
httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{
918+
Message: "Failed to get workspace.",
919+
Detail: err.Error(),
920+
})
921+
return
922+
}
923+
924+
var req codersdk.PostWorkspaceAgentStateRequest
925+
if !httpapi.Read(ctx, rw, r, &req) {
926+
return
927+
}
928+
929+
api.Logger.Debug(ctx, "workspace agent state report",
930+
slog.F("agent", workspaceAgent.ID),
931+
slog.F("workspace", workspace.ID),
932+
slog.F("payload", req),
933+
)
934+
935+
err = api.Database.UpdateWorkspaceAgentStateByID(ctx, database.UpdateWorkspaceAgentStateByIDParams{
936+
ID: workspaceAgent.ID,
937+
State: database.WorkspaceAgentState(req.State),
938+
})
939+
if err != nil {
940+
httpapi.InternalServerError(rw, err)
941+
return
942+
}
943+
944+
httpapi.Write(ctx, rw, http.StatusNoContent, nil)
945+
}
946+
903947
// @Summary Submit workspace agent application health
904948
// @ID submit-workspace-agent-application-health
905949
// @Security CoderSessionToken

coderd/wsconncache/wsconncache_test.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,10 @@ func (*client) AgentReportStats(_ context.Context, _ slog.Logger, _ func() *code
217217
return io.NopCloser(strings.NewReader("")), nil
218218
}
219219

220+
func (*client) PostWorkspaceAgentState(_ context.Context, _ codersdk.PostWorkspaceAgentStateRequest) error {
221+
return nil
222+
}
223+
220224
func (*client) PostWorkspaceAgentAppHealth(_ context.Context, _ codersdk.PostWorkspaceAppHealthsRequest) error {
221225
return nil
222226
}

codersdk/workspaceagents.go

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,16 @@ const (
3434
WorkspaceAgentTimeout WorkspaceAgentStatus = "timeout"
3535
)
3636

37+
// WorkspaceAgentState represents the lifecycle state of a workspace agent.
38+
type WorkspaceAgentState string
39+
40+
const (
41+
WorkspaceAgentStateStarting WorkspaceAgentState = "starting"
42+
WorkspaceAgentStateStartTimeout WorkspaceAgentState = "start_timeout"
43+
WorkspaceAgentStateStartError WorkspaceAgentState = "start_error"
44+
WorkspaceAgentStateReady WorkspaceAgentState = "ready"
45+
)
46+
3747
type WorkspaceAgent struct {
3848
ID uuid.UUID `json:"id" format:"uuid"`
3949
CreatedAt time.Time `json:"created_at" format:"date-time"`
@@ -42,6 +52,7 @@ type WorkspaceAgent struct {
4252
LastConnectedAt *time.Time `json:"last_connected_at,omitempty" format:"date-time"`
4353
DisconnectedAt *time.Time `json:"disconnected_at,omitempty" format:"date-time"`
4454
Status WorkspaceAgentStatus `json:"status" enums:"connecting,connected,disconnected,timeout"`
55+
State WorkspaceAgentState `json:"state" enums:"starting,start_timeout,start_error,ready"`
4556
Name string `json:"name"`
4657
ResourceID uuid.UUID `json:"resource_id" format:"uuid"`
4758
InstanceID string `json:"instance_id,omitempty"`
@@ -131,6 +142,7 @@ type WorkspaceAgentMetadata struct {
131142
DERPMap *tailcfg.DERPMap `json:"derpmap"`
132143
EnvironmentVariables map[string]string `json:"environment_variables"`
133144
StartupScript string `json:"startup_script"`
145+
StartupScriptTimeout time.Duration `json:"startup_script_timeout" format:"duration"`
134146
Directory string `json:"directory"`
135147
MOTDFile string `json:"motd_file"`
136148
}
@@ -681,3 +693,21 @@ func (c *Client) WorkspaceAgentGitAuth(ctx context.Context, gitURL string, liste
681693
var authResp WorkspaceAgentGitAuthResponse
682694
return authResp, json.NewDecoder(res.Body).Decode(&authResp)
683695
}
696+
697+
// @typescript-ignore PostWorkspaceAgentStateRequest
698+
type PostWorkspaceAgentStateRequest struct {
699+
State WorkspaceAgentState `json:"state"`
700+
}
701+
702+
func (c *Client) PostWorkspaceAgentState(ctx context.Context, req PostWorkspaceAgentStateRequest) error {
703+
res, err := c.Request(ctx, http.MethodPost, "/api/v2/workspaceagents/me/report-state", req)
704+
if err != nil {
705+
return xerrors.Errorf("agent state post request: %w", err)
706+
}
707+
defer res.Body.Close()
708+
if res.StatusCode != http.StatusOK {
709+
return readBodyAsError(res)
710+
}
711+
712+
return nil
713+
}

0 commit comments

Comments
 (0)