Skip to content

Commit 0c50774

Browse files
authored
fix: avoid missed logs when streaming startup logs (#8029)
* feat(coderd,agent): send startup log eof at the end * fix(coderd): fix edge case in startup log pubsub * fix(coderd): ensure startup logs are closed on lifecycle state change (fallback) * fix(codersdk): fix startup log channel shared memory bug * fix(site): remove the EOF log line
1 parent 247f8a9 commit 0c50774

28 files changed

+658
-131
lines changed

agent/agent.go

+13
Original file line numberDiff line numberDiff line change
@@ -929,6 +929,11 @@ func (a *agent) trackScriptLogs(ctx context.Context, reader io.Reader) (chan str
929929
if errors.As(err, &sdkErr) {
930930
if sdkErr.StatusCode() == http.StatusRequestEntityTooLarge {
931931
a.logger.Warn(ctx, "startup logs too large, dropping logs")
932+
// Always send the EOF even if logs overflow.
933+
if len(logsToSend) > 1 && logsToSend[len(logsToSend)-1].EOF {
934+
logsToSend = logsToSend[len(logsToSend)-1:]
935+
continue
936+
}
932937
break
933938
}
934939
}
@@ -978,6 +983,14 @@ func (a *agent) trackScriptLogs(ctx context.Context, reader io.Reader) (chan str
978983
Output: scanner.Text(),
979984
})
980985
}
986+
if err := scanner.Err(); err != nil {
987+
a.logger.Error(ctx, "scan startup logs", slog.Error(err))
988+
}
989+
queueLog(agentsdk.StartupLog{
990+
CreatedAt: database.Now(),
991+
Output: "",
992+
EOF: true,
993+
})
981994
defer close(logsFinished)
982995
logsFlushed.L.Lock()
983996
for {

agent/agent_test.go

+3-1
Original file line numberDiff line numberDiff line change
@@ -905,8 +905,10 @@ func TestAgent_StartupScript(t *testing.T) {
905905
return len(got) > 0 && got[len(got)-1] == codersdk.WorkspaceAgentLifecycleReady
906906
}, testutil.WaitShort, testutil.IntervalMedium)
907907

908-
require.Len(t, client.getStartupLogs(), 1)
908+
require.Len(t, client.getStartupLogs(), 2)
909909
require.Equal(t, output, client.getStartupLogs()[0].Output)
910+
require.False(t, client.getStartupLogs()[0].EOF)
911+
require.True(t, client.getStartupLogs()[1].EOF)
910912
})
911913
// This ensures that even when coderd sends back that the startup
912914
// script has written too many lines it will still succeed!

coderd/apidoc/docs.go

+7
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

coderd/apidoc/swagger.json

+7
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

coderd/database/dbauthz/dbauthz.go

+8
Original file line numberDiff line numberDiff line change
@@ -1407,6 +1407,14 @@ func (q *querier) GetWorkspaceAgentStartupLogsAfter(ctx context.Context, arg dat
14071407
return q.db.GetWorkspaceAgentStartupLogsAfter(ctx, arg)
14081408
}
14091409

1410+
func (q *querier) GetWorkspaceAgentStartupLogsEOF(ctx context.Context, agentID uuid.UUID) (bool, error) {
1411+
_, err := q.GetWorkspaceAgentByID(ctx, agentID)
1412+
if err != nil {
1413+
return false, err
1414+
}
1415+
return q.db.GetWorkspaceAgentStartupLogsEOF(ctx, agentID)
1416+
}
1417+
14101418
func (q *querier) GetWorkspaceAgentStats(ctx context.Context, createdAfter time.Time) ([]database.GetWorkspaceAgentStatsRow, error) {
14111419
return q.db.GetWorkspaceAgentStats(ctx, createdAfter)
14121420
}

coderd/database/dbfake/dbfake.go

+19-2
Original file line numberDiff line numberDiff line change
@@ -2722,14 +2722,30 @@ func (q *fakeQuerier) GetWorkspaceAgentStartupLogsAfter(_ context.Context, arg d
27222722
if log.AgentID != arg.AgentID {
27232723
continue
27242724
}
2725-
if arg.CreatedAfter != 0 && log.ID < arg.CreatedAfter {
2725+
if arg.CreatedAfter != 0 && log.ID <= arg.CreatedAfter {
27262726
continue
27272727
}
27282728
logs = append(logs, log)
27292729
}
27302730
return logs, nil
27312731
}
27322732

2733+
func (q *fakeQuerier) GetWorkspaceAgentStartupLogsEOF(_ context.Context, agentID uuid.UUID) (bool, error) {
2734+
q.mutex.RLock()
2735+
defer q.mutex.RUnlock()
2736+
2737+
var lastLog database.WorkspaceAgentStartupLog
2738+
for _, log := range q.workspaceAgentLogs {
2739+
if log.AgentID != agentID {
2740+
continue
2741+
}
2742+
if log.ID > lastLog.ID {
2743+
lastLog = log
2744+
}
2745+
}
2746+
return lastLog.EOF, nil
2747+
}
2748+
27332749
func (q *fakeQuerier) GetWorkspaceAgentStats(_ context.Context, createdAfter time.Time) ([]database.GetWorkspaceAgentStatsRow, error) {
27342750
q.mutex.RLock()
27352751
defer q.mutex.RUnlock()
@@ -4013,7 +4029,7 @@ func (q *fakeQuerier) InsertWorkspaceAgentStartupLogs(_ context.Context, arg dat
40134029
defer q.mutex.Unlock()
40144030

40154031
logs := []database.WorkspaceAgentStartupLog{}
4016-
id := int64(1)
4032+
id := int64(0)
40174033
if len(q.workspaceAgentLogs) > 0 {
40184034
id = q.workspaceAgentLogs[len(q.workspaceAgentLogs)-1].ID
40194035
}
@@ -4026,6 +4042,7 @@ func (q *fakeQuerier) InsertWorkspaceAgentStartupLogs(_ context.Context, arg dat
40264042
CreatedAt: arg.CreatedAt[index],
40274043
Level: arg.Level[index],
40284044
Output: output,
4045+
EOF: arg.EOF[index],
40294046
})
40304047
outputLength += int32(len(output))
40314048
}

coderd/database/dbmetrics/dbmetrics.go

+7
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

coderd/database/dbmock/dbmock.go

+15
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

coderd/database/dump.sql

+4-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
ALTER TABLE workspace_agent_startup_logs DROP COLUMN eof;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
ALTER TABLE workspace_agent_startup_logs ADD COLUMN eof boolean NOT NULL DEFAULT false;
2+
3+
COMMENT ON COLUMN workspace_agent_startup_logs.eof IS 'End of file reached';

coderd/database/models.go

+2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

coderd/database/querier.go

+1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

coderd/database/querier_test.go

+2
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ func TestInsertWorkspaceAgentStartupLogs(t *testing.T) {
114114
CreatedAt: []time.Time{database.Now()},
115115
Output: []string{"first"},
116116
Level: []database.LogLevel{database.LogLevelInfo},
117+
EOF: []bool{false},
117118
// 1 MB is the max
118119
OutputLength: 1 << 20,
119120
})
@@ -125,6 +126,7 @@ func TestInsertWorkspaceAgentStartupLogs(t *testing.T) {
125126
CreatedAt: []time.Time{database.Now()},
126127
Output: []string{"second"},
127128
Level: []database.LogLevel{database.LogLevelInfo},
129+
EOF: []bool{false},
128130
OutputLength: 1,
129131
})
130132
require.True(t, database.IsStartupLogsLimitError(err))

coderd/database/queries.sql.go

+30-5
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

coderd/database/queries/workspaceagents.sql

+15-2
Original file line numberDiff line numberDiff line change
@@ -146,18 +146,31 @@ WHERE
146146
id > @created_after
147147
) ORDER BY id ASC;
148148

149+
-- name: GetWorkspaceAgentStartupLogsEOF :one
150+
SELECT CASE WHEN EXISTS (
151+
SELECT
152+
*
153+
FROM
154+
workspace_agent_startup_logs
155+
WHERE
156+
agent_id = $1
157+
AND eof = true
158+
LIMIT 1
159+
) THEN TRUE ELSE FALSE END;
160+
149161
-- name: InsertWorkspaceAgentStartupLogs :many
150162
WITH new_length AS (
151163
UPDATE workspace_agents SET
152164
startup_logs_length = startup_logs_length + @output_length WHERE workspace_agents.id = @agent_id
153165
)
154166
INSERT INTO
155-
workspace_agent_startup_logs (agent_id, created_at, output, level)
167+
workspace_agent_startup_logs (agent_id, created_at, output, level, eof)
156168
SELECT
157169
@agent_id :: uuid AS agent_id,
158170
unnest(@created_at :: timestamptz [ ]) AS created_at,
159171
unnest(@output :: VARCHAR(1024) [ ]) AS output,
160-
unnest(@level :: log_level [ ]) AS level
172+
unnest(@level :: log_level [ ]) AS level,
173+
unnest(@eof :: boolean [ ]) AS eof
161174
RETURNING workspace_agent_startup_logs.*;
162175

163176
-- If an agent hasn't connected in the last 7 days, we purge it's logs.

coderd/database/sqlc.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ overrides:
5555
uuid: UUID
5656
failure_ttl: FailureTTL
5757
inactivity_ttl: InactivityTTL
58+
eof: EOF
5859

5960
sql:
6061
- schema: "./dump.sql"

0 commit comments

Comments
 (0)