Skip to content

Commit 2139382

Browse files
committed
fix: remove startup logs eof for streaming
We have external utilities like logstream-kube that may send logs after an agent shuts down unexpectedly to report additional information. In a recent change we stopped accepting these logs, which broke these utilities. In the future we'll rename startup logs to agent logs or something more generalized so this is less confusing in the future.
1 parent 970a829 commit 2139382

File tree

3 files changed

+44
-197
lines changed

3 files changed

+44
-197
lines changed

coderd/workspaceagents.go

+44-72
Original file line numberDiff line numberDiff line change
@@ -280,81 +280,61 @@ func (api *API) patchWorkspaceAgentStartupLogs(rw http.ResponseWriter, r *http.R
280280
level = append(level, parsedLevel)
281281
}
282282

283-
var logs []database.WorkspaceAgentStartupLog
284-
// Ensure logs are not written after script ended.
285-
scriptEndedError := xerrors.New("startup script has ended")
286-
err := api.Database.InTx(func(db database.Store) error {
287-
state, err := db.GetWorkspaceAgentLifecycleStateByID(ctx, workspaceAgent.ID)
288-
if err != nil {
289-
return xerrors.Errorf("workspace agent startup script status: %w", err)
283+
logs, err := api.Database.InsertWorkspaceAgentStartupLogs(ctx, database.InsertWorkspaceAgentStartupLogsParams{
284+
AgentID: workspaceAgent.ID,
285+
CreatedAt: createdAt,
286+
Output: output,
287+
Level: level,
288+
OutputLength: int32(outputLength),
289+
})
290+
if err != nil {
291+
if !database.IsStartupLogsLimitError(err) {
292+
httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{
293+
Message: "Failed to upload startup logs",
294+
Detail: err.Error(),
295+
})
296+
return
290297
}
291-
292-
if state.ReadyAt.Valid {
293-
// The agent startup script has already ended, so we don't want to
294-
// process any more logs.
295-
return scriptEndedError
298+
if workspaceAgent.StartupLogsOverflowed {
299+
httpapi.Write(ctx, rw, http.StatusRequestEntityTooLarge, codersdk.Response{
300+
Message: "Startup logs limit exceeded",
301+
Detail: err.Error(),
302+
})
303+
return
296304
}
297-
298-
logs, err = db.InsertWorkspaceAgentStartupLogs(ctx, database.InsertWorkspaceAgentStartupLogsParams{
299-
AgentID: workspaceAgent.ID,
300-
CreatedAt: createdAt,
301-
Output: output,
302-
Level: level,
303-
OutputLength: int32(outputLength),
305+
err := api.Database.UpdateWorkspaceAgentStartupLogOverflowByID(ctx, database.UpdateWorkspaceAgentStartupLogOverflowByIDParams{
306+
ID: workspaceAgent.ID,
307+
StartupLogsOverflowed: true,
304308
})
305-
return err
306-
}, nil)
307-
if err != nil {
308-
if errors.Is(err, scriptEndedError) {
309-
httpapi.Write(ctx, rw, http.StatusConflict, codersdk.Response{
310-
Message: "Failed to upload logs, startup script has already ended.",
309+
if err != nil {
310+
// We don't want to return here, because the agent will retry
311+
// on failure and this isn't a huge deal. The overflow state
312+
// is just a hint to the user that the logs are incomplete.
313+
api.Logger.Warn(ctx, "failed to update workspace agent startup log overflow", slog.Error(err))
314+
}
315+
316+
resource, err := api.Database.GetWorkspaceResourceByID(ctx, workspaceAgent.ResourceID)
317+
if err != nil {
318+
httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{
319+
Message: "Failed to get workspace resource.",
311320
Detail: err.Error(),
312321
})
313322
return
314323
}
315-
if database.IsStartupLogsLimitError(err) {
316-
if !workspaceAgent.StartupLogsOverflowed {
317-
err := api.Database.UpdateWorkspaceAgentStartupLogOverflowByID(ctx, database.UpdateWorkspaceAgentStartupLogOverflowByIDParams{
318-
ID: workspaceAgent.ID,
319-
StartupLogsOverflowed: true,
320-
})
321-
if err != nil {
322-
// We don't want to return here, because the agent will retry
323-
// on failure and this isn't a huge deal. The overflow state
324-
// is just a hint to the user that the logs are incomplete.
325-
api.Logger.Warn(ctx, "failed to update workspace agent startup log overflow", slog.Error(err))
326-
}
327-
328-
resource, err := api.Database.GetWorkspaceResourceByID(ctx, workspaceAgent.ResourceID)
329-
if err != nil {
330-
httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{
331-
Message: "Failed to get workspace resource.",
332-
Detail: err.Error(),
333-
})
334-
return
335-
}
336-
337-
build, err := api.Database.GetWorkspaceBuildByJobID(ctx, resource.JobID)
338-
if err != nil {
339-
httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{
340-
Message: "Internal error fetching workspace build job.",
341-
Detail: err.Error(),
342-
})
343-
return
344-
}
345-
346-
api.publishWorkspaceUpdate(ctx, build.WorkspaceID)
347-
}
348324

349-
httpapi.Write(ctx, rw, http.StatusRequestEntityTooLarge, codersdk.Response{
350-
Message: "Startup logs limit exceeded",
325+
build, err := api.Database.GetWorkspaceBuildByJobID(ctx, resource.JobID)
326+
if err != nil {
327+
httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{
328+
Message: "Internal error fetching workspace build job.",
351329
Detail: err.Error(),
352330
})
353331
return
354332
}
355-
httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{
356-
Message: "Failed to upload startup logs",
357-
Detail: err.Error(),
333+
334+
api.publishWorkspaceUpdate(ctx, build.WorkspaceID)
335+
336+
httpapi.Write(ctx, rw, http.StatusRequestEntityTooLarge, codersdk.Response{
337+
Message: "Startup logs limit exceeded",
358338
})
359339
return
360340
}
@@ -580,9 +560,7 @@ func (api *API) workspaceAgentStartupLogs(rw http.ResponseWriter, r *http.Reques
580560
continue
581561
}
582562
if len(logs) == 0 {
583-
if state.ReadyAt.Valid {
584-
return
585-
}
563+
// Just keep listening - more logs might come in the future!
586564
continue
587565
}
588566

@@ -1689,12 +1667,6 @@ func (api *API) workspaceAgentReportLifecycle(rw http.ResponseWriter, r *http.Re
16891667
return
16901668
}
16911669

1692-
if readyAt.Valid {
1693-
api.publishWorkspaceAgentStartupLogsUpdate(ctx, workspaceAgent.ID, agentsdk.StartupLogsNotifyMessage{
1694-
EndOfLogs: true,
1695-
})
1696-
}
1697-
16981670
api.publishWorkspaceUpdate(ctx, workspace.ID)
16991671

17001672
httpapi.Write(ctx, rw, http.StatusNoContent, nil)

coderd/workspaceagents_test.go

-124
Original file line numberDiff line numberDiff line change
@@ -301,130 +301,6 @@ func TestWorkspaceAgentStartupLogs(t *testing.T) {
301301
}
302302
}
303303
})
304-
t.Run("CloseAfterLifecycleStateIsNotRunning", func(t *testing.T) {
305-
t.Parallel()
306-
ctx := testutil.Context(t, testutil.WaitMedium)
307-
client := coderdtest.New(t, &coderdtest.Options{
308-
IncludeProvisionerDaemon: true,
309-
})
310-
user := coderdtest.CreateFirstUser(t, client)
311-
authToken := uuid.NewString()
312-
version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, &echo.Responses{
313-
Parse: echo.ParseComplete,
314-
ProvisionPlan: echo.ProvisionComplete,
315-
ProvisionApply: []*proto.Provision_Response{{
316-
Type: &proto.Provision_Response_Complete{
317-
Complete: &proto.Provision_Complete{
318-
Resources: []*proto.Resource{{
319-
Name: "example",
320-
Type: "aws_instance",
321-
Agents: []*proto.Agent{{
322-
Id: uuid.NewString(),
323-
Auth: &proto.Agent_Token{
324-
Token: authToken,
325-
},
326-
}},
327-
}},
328-
},
329-
},
330-
}},
331-
})
332-
template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID)
333-
coderdtest.AwaitTemplateVersionJob(t, client, version.ID)
334-
workspace := coderdtest.CreateWorkspace(t, client, user.OrganizationID, template.ID)
335-
build := coderdtest.AwaitWorkspaceBuildJob(t, client, workspace.LatestBuild.ID)
336-
337-
agentClient := agentsdk.New(client.URL)
338-
agentClient.SetSessionToken(authToken)
339-
340-
logs, closer, err := client.WorkspaceAgentStartupLogsAfter(ctx, build.Resources[0].Agents[0].ID, 0, true)
341-
require.NoError(t, err)
342-
defer func() {
343-
_ = closer.Close()
344-
}()
345-
346-
err = agentClient.PatchStartupLogs(ctx, agentsdk.PatchStartupLogs{
347-
Logs: []agentsdk.StartupLog{
348-
{
349-
CreatedAt: database.Now(),
350-
Output: "testing",
351-
},
352-
},
353-
})
354-
require.NoError(t, err)
355-
356-
err = agentClient.PostLifecycle(ctx, agentsdk.PostLifecycleRequest{
357-
State: codersdk.WorkspaceAgentLifecycleReady,
358-
ChangedAt: time.Now(),
359-
})
360-
require.NoError(t, err)
361-
362-
var gotLogs []codersdk.WorkspaceAgentStartupLog
363-
for {
364-
select {
365-
case <-ctx.Done():
366-
require.Fail(t, "timed out waiting for logs to end")
367-
case l, ok := <-logs:
368-
gotLogs = append(gotLogs, l...)
369-
if !ok {
370-
require.Len(t, gotLogs, 1, "expected one log")
371-
return // Success.
372-
}
373-
}
374-
}
375-
})
376-
t.Run("NoLogAfterScriptEnded", func(t *testing.T) {
377-
t.Parallel()
378-
ctx := testutil.Context(t, testutil.WaitMedium)
379-
client := coderdtest.New(t, &coderdtest.Options{
380-
IncludeProvisionerDaemon: true,
381-
})
382-
user := coderdtest.CreateFirstUser(t, client)
383-
authToken := uuid.NewString()
384-
version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, &echo.Responses{
385-
Parse: echo.ParseComplete,
386-
ProvisionPlan: echo.ProvisionComplete,
387-
ProvisionApply: []*proto.Provision_Response{{
388-
Type: &proto.Provision_Response_Complete{
389-
Complete: &proto.Provision_Complete{
390-
Resources: []*proto.Resource{{
391-
Name: "example",
392-
Type: "aws_instance",
393-
Agents: []*proto.Agent{{
394-
Id: uuid.NewString(),
395-
Auth: &proto.Agent_Token{
396-
Token: authToken,
397-
},
398-
}},
399-
}},
400-
},
401-
},
402-
}},
403-
})
404-
template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID)
405-
coderdtest.AwaitTemplateVersionJob(t, client, version.ID)
406-
workspace := coderdtest.CreateWorkspace(t, client, user.OrganizationID, template.ID)
407-
_ = coderdtest.AwaitWorkspaceBuildJob(t, client, workspace.LatestBuild.ID)
408-
409-
agentClient := agentsdk.New(client.URL)
410-
agentClient.SetSessionToken(authToken)
411-
412-
err := agentClient.PostLifecycle(ctx, agentsdk.PostLifecycleRequest{
413-
State: codersdk.WorkspaceAgentLifecycleReady,
414-
ChangedAt: time.Now(),
415-
})
416-
require.NoError(t, err)
417-
418-
err = agentClient.PatchStartupLogs(ctx, agentsdk.PatchStartupLogs{
419-
Logs: []agentsdk.StartupLog{
420-
{
421-
CreatedAt: database.Now(),
422-
Output: "testing",
423-
},
424-
},
425-
})
426-
require.Error(t, err, "insert after script ended should not succeed")
427-
})
428304
}
429305

430306
func TestWorkspaceAgentListen(t *testing.T) {

codersdk/agentsdk/agentsdk.go

-1
Original file line numberDiff line numberDiff line change
@@ -694,7 +694,6 @@ func StartupLogsNotifyChannel(agentID uuid.UUID) string {
694694

695695
type StartupLogsNotifyMessage struct {
696696
CreatedAfter int64 `json:"created_after"`
697-
EndOfLogs bool `json:"end_of_logs"`
698697
}
699698

700699
type closeNetConn struct {

0 commit comments

Comments
 (0)