Skip to content

Commit c1e6aa0

Browse files
committed
fix: remove startup logs eof for streaming
We have external utilities like logstream-kube that may send logs after an agent shuts down unexpectedly to report additional information. In a recent change we stopped accepting these logs, which broke these utilities. In the future we'll rename startup logs to agent logs or something more generalized so this is less confusing in the future.
1 parent 970a829 commit c1e6aa0

File tree

3 files changed

+44
-222
lines changed

3 files changed

+44
-222
lines changed

coderd/workspaceagents.go

+44-97
Original file line numberDiff line numberDiff line change
@@ -280,81 +280,61 @@ func (api *API) patchWorkspaceAgentStartupLogs(rw http.ResponseWriter, r *http.R
280280
level = append(level, parsedLevel)
281281
}
282282

283-
var logs []database.WorkspaceAgentStartupLog
284-
// Ensure logs are not written after script ended.
285-
scriptEndedError := xerrors.New("startup script has ended")
286-
err := api.Database.InTx(func(db database.Store) error {
287-
state, err := db.GetWorkspaceAgentLifecycleStateByID(ctx, workspaceAgent.ID)
288-
if err != nil {
289-
return xerrors.Errorf("workspace agent startup script status: %w", err)
283+
logs, err := api.Database.InsertWorkspaceAgentStartupLogs(ctx, database.InsertWorkspaceAgentStartupLogsParams{
284+
AgentID: workspaceAgent.ID,
285+
CreatedAt: createdAt,
286+
Output: output,
287+
Level: level,
288+
OutputLength: int32(outputLength),
289+
})
290+
if err != nil {
291+
if !database.IsStartupLogsLimitError(err) {
292+
httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{
293+
Message: "Failed to upload startup logs",
294+
Detail: err.Error(),
295+
})
296+
return
290297
}
291-
292-
if state.ReadyAt.Valid {
293-
// The agent startup script has already ended, so we don't want to
294-
// process any more logs.
295-
return scriptEndedError
298+
if workspaceAgent.StartupLogsOverflowed {
299+
httpapi.Write(ctx, rw, http.StatusRequestEntityTooLarge, codersdk.Response{
300+
Message: "Startup logs limit exceeded",
301+
Detail: err.Error(),
302+
})
303+
return
296304
}
297-
298-
logs, err = db.InsertWorkspaceAgentStartupLogs(ctx, database.InsertWorkspaceAgentStartupLogsParams{
299-
AgentID: workspaceAgent.ID,
300-
CreatedAt: createdAt,
301-
Output: output,
302-
Level: level,
303-
OutputLength: int32(outputLength),
305+
err := api.Database.UpdateWorkspaceAgentStartupLogOverflowByID(ctx, database.UpdateWorkspaceAgentStartupLogOverflowByIDParams{
306+
ID: workspaceAgent.ID,
307+
StartupLogsOverflowed: true,
304308
})
305-
return err
306-
}, nil)
307-
if err != nil {
308-
if errors.Is(err, scriptEndedError) {
309-
httpapi.Write(ctx, rw, http.StatusConflict, codersdk.Response{
310-
Message: "Failed to upload logs, startup script has already ended.",
309+
if err != nil {
310+
// We don't want to return here, because the agent will retry
311+
// on failure and this isn't a huge deal. The overflow state
312+
// is just a hint to the user that the logs are incomplete.
313+
api.Logger.Warn(ctx, "failed to update workspace agent startup log overflow", slog.Error(err))
314+
}
315+
316+
resource, err := api.Database.GetWorkspaceResourceByID(ctx, workspaceAgent.ResourceID)
317+
if err != nil {
318+
httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{
319+
Message: "Failed to get workspace resource.",
311320
Detail: err.Error(),
312321
})
313322
return
314323
}
315-
if database.IsStartupLogsLimitError(err) {
316-
if !workspaceAgent.StartupLogsOverflowed {
317-
err := api.Database.UpdateWorkspaceAgentStartupLogOverflowByID(ctx, database.UpdateWorkspaceAgentStartupLogOverflowByIDParams{
318-
ID: workspaceAgent.ID,
319-
StartupLogsOverflowed: true,
320-
})
321-
if err != nil {
322-
// We don't want to return here, because the agent will retry
323-
// on failure and this isn't a huge deal. The overflow state
324-
// is just a hint to the user that the logs are incomplete.
325-
api.Logger.Warn(ctx, "failed to update workspace agent startup log overflow", slog.Error(err))
326-
}
327324

328-
resource, err := api.Database.GetWorkspaceResourceByID(ctx, workspaceAgent.ResourceID)
329-
if err != nil {
330-
httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{
331-
Message: "Failed to get workspace resource.",
332-
Detail: err.Error(),
333-
})
334-
return
335-
}
336-
337-
build, err := api.Database.GetWorkspaceBuildByJobID(ctx, resource.JobID)
338-
if err != nil {
339-
httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{
340-
Message: "Internal error fetching workspace build job.",
341-
Detail: err.Error(),
342-
})
343-
return
344-
}
345-
346-
api.publishWorkspaceUpdate(ctx, build.WorkspaceID)
347-
}
348-
349-
httpapi.Write(ctx, rw, http.StatusRequestEntityTooLarge, codersdk.Response{
350-
Message: "Startup logs limit exceeded",
325+
build, err := api.Database.GetWorkspaceBuildByJobID(ctx, resource.JobID)
326+
if err != nil {
327+
httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{
328+
Message: "Internal error fetching workspace build job.",
351329
Detail: err.Error(),
352330
})
353331
return
354332
}
355-
httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{
356-
Message: "Failed to upload startup logs",
357-
Detail: err.Error(),
333+
334+
api.publishWorkspaceUpdate(ctx, build.WorkspaceID)
335+
336+
httpapi.Write(ctx, rw, http.StatusRequestEntityTooLarge, codersdk.Response{
337+
Message: "Startup logs limit exceeded",
358338
})
359339
return
360340
}
@@ -497,18 +477,6 @@ func (api *API) workspaceAgentStartupLogs(rw http.ResponseWriter, r *http.Reques
497477
return
498478
}
499479

500-
if workspaceAgent.ReadyAt.Valid {
501-
// Fast path, the startup script has finished running, so we can close
502-
// the connection.
503-
return
504-
}
505-
if !codersdk.WorkspaceAgentLifecycle(workspaceAgent.LifecycleState).Starting() {
506-
// Backwards compatibility: Avoid waiting forever in case this agent is
507-
// older than the current release and has already reported the ready
508-
// state.
509-
return
510-
}
511-
512480
lastSentLogID := after
513481
if len(logs) > 0 {
514482
lastSentLogID = logs[len(logs)-1].ID
@@ -543,11 +511,9 @@ func (api *API) workspaceAgentStartupLogs(rw http.ResponseWriter, r *http.Reques
543511
t := time.NewTicker(recheckInterval)
544512
defer t.Stop()
545513

546-
var state database.GetWorkspaceAgentLifecycleStateByIDRow
547514
go func() {
548515
defer close(bufferedLogs)
549516

550-
var err error
551517
for {
552518
select {
553519
case <-ctx.Done():
@@ -557,17 +523,6 @@ func (api *API) workspaceAgentStartupLogs(rw http.ResponseWriter, r *http.Reques
557523
t.Reset(recheckInterval)
558524
}
559525

560-
if !state.ReadyAt.Valid {
561-
state, err = api.Database.GetWorkspaceAgentLifecycleStateByID(ctx, workspaceAgent.ID)
562-
if err != nil {
563-
if xerrors.Is(err, context.Canceled) {
564-
return
565-
}
566-
logger.Warn(ctx, "failed to get workspace agent lifecycle state", slog.Error(err))
567-
continue
568-
}
569-
}
570-
571526
logs, err := api.Database.GetWorkspaceAgentStartupLogsAfter(ctx, database.GetWorkspaceAgentStartupLogsAfterParams{
572527
AgentID: workspaceAgent.ID,
573528
CreatedAfter: lastSentLogID,
@@ -580,9 +535,7 @@ func (api *API) workspaceAgentStartupLogs(rw http.ResponseWriter, r *http.Reques
580535
continue
581536
}
582537
if len(logs) == 0 {
583-
if state.ReadyAt.Valid {
584-
return
585-
}
538+
// Just keep listening - more logs might come in the future!
586539
continue
587540
}
588541

@@ -1689,12 +1642,6 @@ func (api *API) workspaceAgentReportLifecycle(rw http.ResponseWriter, r *http.Re
16891642
return
16901643
}
16911644

1692-
if readyAt.Valid {
1693-
api.publishWorkspaceAgentStartupLogsUpdate(ctx, workspaceAgent.ID, agentsdk.StartupLogsNotifyMessage{
1694-
EndOfLogs: true,
1695-
})
1696-
}
1697-
16981645
api.publishWorkspaceUpdate(ctx, workspace.ID)
16991646

17001647
httpapi.Write(ctx, rw, http.StatusNoContent, nil)

coderd/workspaceagents_test.go

-124
Original file line numberDiff line numberDiff line change
@@ -301,130 +301,6 @@ func TestWorkspaceAgentStartupLogs(t *testing.T) {
301301
}
302302
}
303303
})
304-
t.Run("CloseAfterLifecycleStateIsNotRunning", func(t *testing.T) {
305-
t.Parallel()
306-
ctx := testutil.Context(t, testutil.WaitMedium)
307-
client := coderdtest.New(t, &coderdtest.Options{
308-
IncludeProvisionerDaemon: true,
309-
})
310-
user := coderdtest.CreateFirstUser(t, client)
311-
authToken := uuid.NewString()
312-
version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, &echo.Responses{
313-
Parse: echo.ParseComplete,
314-
ProvisionPlan: echo.ProvisionComplete,
315-
ProvisionApply: []*proto.Provision_Response{{
316-
Type: &proto.Provision_Response_Complete{
317-
Complete: &proto.Provision_Complete{
318-
Resources: []*proto.Resource{{
319-
Name: "example",
320-
Type: "aws_instance",
321-
Agents: []*proto.Agent{{
322-
Id: uuid.NewString(),
323-
Auth: &proto.Agent_Token{
324-
Token: authToken,
325-
},
326-
}},
327-
}},
328-
},
329-
},
330-
}},
331-
})
332-
template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID)
333-
coderdtest.AwaitTemplateVersionJob(t, client, version.ID)
334-
workspace := coderdtest.CreateWorkspace(t, client, user.OrganizationID, template.ID)
335-
build := coderdtest.AwaitWorkspaceBuildJob(t, client, workspace.LatestBuild.ID)
336-
337-
agentClient := agentsdk.New(client.URL)
338-
agentClient.SetSessionToken(authToken)
339-
340-
logs, closer, err := client.WorkspaceAgentStartupLogsAfter(ctx, build.Resources[0].Agents[0].ID, 0, true)
341-
require.NoError(t, err)
342-
defer func() {
343-
_ = closer.Close()
344-
}()
345-
346-
err = agentClient.PatchStartupLogs(ctx, agentsdk.PatchStartupLogs{
347-
Logs: []agentsdk.StartupLog{
348-
{
349-
CreatedAt: database.Now(),
350-
Output: "testing",
351-
},
352-
},
353-
})
354-
require.NoError(t, err)
355-
356-
err = agentClient.PostLifecycle(ctx, agentsdk.PostLifecycleRequest{
357-
State: codersdk.WorkspaceAgentLifecycleReady,
358-
ChangedAt: time.Now(),
359-
})
360-
require.NoError(t, err)
361-
362-
var gotLogs []codersdk.WorkspaceAgentStartupLog
363-
for {
364-
select {
365-
case <-ctx.Done():
366-
require.Fail(t, "timed out waiting for logs to end")
367-
case l, ok := <-logs:
368-
gotLogs = append(gotLogs, l...)
369-
if !ok {
370-
require.Len(t, gotLogs, 1, "expected one log")
371-
return // Success.
372-
}
373-
}
374-
}
375-
})
376-
t.Run("NoLogAfterScriptEnded", func(t *testing.T) {
377-
t.Parallel()
378-
ctx := testutil.Context(t, testutil.WaitMedium)
379-
client := coderdtest.New(t, &coderdtest.Options{
380-
IncludeProvisionerDaemon: true,
381-
})
382-
user := coderdtest.CreateFirstUser(t, client)
383-
authToken := uuid.NewString()
384-
version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, &echo.Responses{
385-
Parse: echo.ParseComplete,
386-
ProvisionPlan: echo.ProvisionComplete,
387-
ProvisionApply: []*proto.Provision_Response{{
388-
Type: &proto.Provision_Response_Complete{
389-
Complete: &proto.Provision_Complete{
390-
Resources: []*proto.Resource{{
391-
Name: "example",
392-
Type: "aws_instance",
393-
Agents: []*proto.Agent{{
394-
Id: uuid.NewString(),
395-
Auth: &proto.Agent_Token{
396-
Token: authToken,
397-
},
398-
}},
399-
}},
400-
},
401-
},
402-
}},
403-
})
404-
template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID)
405-
coderdtest.AwaitTemplateVersionJob(t, client, version.ID)
406-
workspace := coderdtest.CreateWorkspace(t, client, user.OrganizationID, template.ID)
407-
_ = coderdtest.AwaitWorkspaceBuildJob(t, client, workspace.LatestBuild.ID)
408-
409-
agentClient := agentsdk.New(client.URL)
410-
agentClient.SetSessionToken(authToken)
411-
412-
err := agentClient.PostLifecycle(ctx, agentsdk.PostLifecycleRequest{
413-
State: codersdk.WorkspaceAgentLifecycleReady,
414-
ChangedAt: time.Now(),
415-
})
416-
require.NoError(t, err)
417-
418-
err = agentClient.PatchStartupLogs(ctx, agentsdk.PatchStartupLogs{
419-
Logs: []agentsdk.StartupLog{
420-
{
421-
CreatedAt: database.Now(),
422-
Output: "testing",
423-
},
424-
},
425-
})
426-
require.Error(t, err, "insert after script ended should not succeed")
427-
})
428304
}
429305

430306
func TestWorkspaceAgentListen(t *testing.T) {

codersdk/agentsdk/agentsdk.go

-1
Original file line numberDiff line numberDiff line change
@@ -694,7 +694,6 @@ func StartupLogsNotifyChannel(agentID uuid.UUID) string {
694694

695695
type StartupLogsNotifyMessage struct {
696696
CreatedAfter int64 `json:"created_after"`
697-
EndOfLogs bool `json:"end_of_logs"`
698697
}
699698

700699
type closeNetConn struct {

0 commit comments

Comments
 (0)