From c148d0d26fbeeb30e297246abe050b790ee55d4a Mon Sep 17 00:00:00 2001 From: Cian Johnston Date: Tue, 27 May 2025 18:08:13 +0100 Subject: [PATCH 1/7] fix(coderd): ensure agent timings are present on insert --- coderd/agentapi/scripts.go | 26 ++++- coderd/agentapi/scripts_test.go | 95 ++++++++++++++++--- .../provisionerdserver/provisionerdserver.go | 5 + coderd/workspacebuilds.go | 21 ++++ 4 files changed, 135 insertions(+), 12 deletions(-) diff --git a/coderd/agentapi/scripts.go b/coderd/agentapi/scripts.go index 9f5e098e3c721..764d3cb1848ed 100644 --- a/coderd/agentapi/scripts.go +++ b/coderd/agentapi/scripts.go @@ -18,11 +18,35 @@ type ScriptsAPI struct { func (s *ScriptsAPI) ScriptCompleted(ctx context.Context, req *agentproto.WorkspaceAgentScriptCompletedRequest) (*agentproto.WorkspaceAgentScriptCompletedResponse, error) { res := &agentproto.WorkspaceAgentScriptCompletedResponse{} - scriptID, err := uuid.FromBytes(req.Timing.ScriptId) + if req.GetTiming() == nil { + return nil, xerrors.New("script timing is required") + } + + scriptID, err := uuid.FromBytes(req.GetTiming().GetScriptId()) if err != nil { return nil, xerrors.Errorf("script id from bytes: %w", err) } + if req.GetTiming().GetStart() == nil { + return nil, xerrors.New("script start time is required") + } + + if req.GetTiming().GetEnd() == nil { + return nil, xerrors.New("script end time is required") + } + + if req.GetTiming().GetStart().AsTime().IsZero() { + return nil, xerrors.New("script start time cannot be zero") + } + + if req.GetTiming().GetEnd().AsTime().IsZero() { + return nil, xerrors.New("script end time cannot be zero") + } + + if req.GetTiming().GetStart().AsTime().After(req.GetTiming().GetEnd().AsTime()) { + return nil, xerrors.New("script start time cannot be after end time") + } + var stage database.WorkspaceAgentScriptTimingStage switch req.Timing.Stage { case agentproto.Timing_START: diff --git a/coderd/agentapi/scripts_test.go b/coderd/agentapi/scripts_test.go index 44c1841be3396..ba2026f2f4ced 100644 --- a/coderd/agentapi/scripts_test.go +++ b/coderd/agentapi/scripts_test.go @@ -6,6 +6,7 @@ import ( "time" "github.com/google/uuid" + "github.com/stretchr/testify/require" "go.uber.org/mock/gomock" "google.golang.org/protobuf/types/known/timestamppb" @@ -20,8 +21,10 @@ func TestScriptCompleted(t *testing.T) { t.Parallel() tests := []struct { - scriptID uuid.UUID - timing *agentproto.Timing + scriptID uuid.UUID + timing *agentproto.Timing + expectInsert bool + expectError string }{ { scriptID: uuid.New(), @@ -32,6 +35,7 @@ func TestScriptCompleted(t *testing.T) { Status: agentproto.Timing_OK, ExitCode: 0, }, + expectInsert: true, }, { scriptID: uuid.New(), @@ -42,6 +46,7 @@ func TestScriptCompleted(t *testing.T) { Status: agentproto.Timing_OK, ExitCode: 0, }, + expectInsert: true, }, { scriptID: uuid.New(), @@ -52,6 +57,7 @@ func TestScriptCompleted(t *testing.T) { Status: agentproto.Timing_OK, ExitCode: 0, }, + expectInsert: true, }, { scriptID: uuid.New(), @@ -62,6 +68,7 @@ func TestScriptCompleted(t *testing.T) { Status: agentproto.Timing_TIMED_OUT, ExitCode: 255, }, + expectInsert: true, }, { scriptID: uuid.New(), @@ -72,6 +79,67 @@ func TestScriptCompleted(t *testing.T) { Status: agentproto.Timing_EXIT_FAILURE, ExitCode: 1, }, + expectInsert: true, + }, + { + scriptID: uuid.New(), + timing: &agentproto.Timing{ + Stage: agentproto.Timing_START, + Start: nil, + End: timestamppb.New(dbtime.Now().Add(time.Second)), + Status: agentproto.Timing_OK, + ExitCode: 0, + }, + expectInsert: false, + expectError: "script start time is required", + }, + { + scriptID: uuid.New(), + timing: &agentproto.Timing{ + Stage: agentproto.Timing_START, + Start: timestamppb.New(dbtime.Now()), + End: nil, + Status: agentproto.Timing_OK, + ExitCode: 0, + }, + expectInsert: false, + expectError: "script end time is required", + }, + { + scriptID: uuid.New(), + timing: &agentproto.Timing{ + Stage: agentproto.Timing_START, + Start: timestamppb.New(time.Time{}), + End: timestamppb.New(dbtime.Now()), + Status: agentproto.Timing_OK, + ExitCode: 0, + }, + expectInsert: false, + expectError: "script start time cannot be zero", + }, + { + scriptID: uuid.New(), + timing: &agentproto.Timing{ + Stage: agentproto.Timing_START, + Start: timestamppb.New(dbtime.Now()), + End: timestamppb.New(time.Time{}), + Status: agentproto.Timing_OK, + ExitCode: 0, + }, + expectInsert: false, + expectError: "script end time cannot be zero", + }, + { + scriptID: uuid.New(), + timing: &agentproto.Timing{ + Stage: agentproto.Timing_START, + Start: timestamppb.New(dbtime.Now()), + End: timestamppb.New(dbtime.Now().Add(-time.Second)), + Status: agentproto.Timing_OK, + ExitCode: 0, + }, + expectInsert: false, + expectError: "script start time cannot be after end time", }, } @@ -80,19 +148,24 @@ func TestScriptCompleted(t *testing.T) { tt.timing.ScriptId = tt.scriptID[:] mDB := dbmock.NewMockStore(gomock.NewController(t)) - mDB.EXPECT().InsertWorkspaceAgentScriptTimings(gomock.Any(), database.InsertWorkspaceAgentScriptTimingsParams{ - ScriptID: tt.scriptID, - Stage: protoScriptTimingStageToDatabase(tt.timing.Stage), - Status: protoScriptTimingStatusToDatabase(tt.timing.Status), - StartedAt: tt.timing.Start.AsTime(), - EndedAt: tt.timing.End.AsTime(), - ExitCode: tt.timing.ExitCode, - }) + if tt.expectInsert { + mDB.EXPECT().InsertWorkspaceAgentScriptTimings(gomock.Any(), database.InsertWorkspaceAgentScriptTimingsParams{ + ScriptID: tt.scriptID, + Stage: protoScriptTimingStageToDatabase(tt.timing.Stage), + Status: protoScriptTimingStatusToDatabase(tt.timing.Status), + StartedAt: tt.timing.Start.AsTime(), + EndedAt: tt.timing.End.AsTime(), + ExitCode: tt.timing.ExitCode, + }) + } api := &agentapi.ScriptsAPI{Database: mDB} - api.ScriptCompleted(context.Background(), &agentproto.WorkspaceAgentScriptCompletedRequest{ + _, err := api.ScriptCompleted(context.Background(), &agentproto.WorkspaceAgentScriptCompletedRequest{ Timing: tt.timing, }) + if tt.expectError != "" { + require.Contains(t, err.Error(), tt.expectError, "expected error did not match") + } } } diff --git a/coderd/provisionerdserver/provisionerdserver.go b/coderd/provisionerdserver/provisionerdserver.go index f689c3681f6fc..3743f68c9e228 100644 --- a/coderd/provisionerdserver/provisionerdserver.go +++ b/coderd/provisionerdserver/provisionerdserver.go @@ -1746,6 +1746,11 @@ func (s *server) completeWorkspaceBuildJob(ctx context.Context, job database.Pro continue } + if t.Start.AsTime().IsZero() || t.End.AsTime().IsZero() { + s.Logger.Warn(ctx, "timings entry has zero start or end time, skipping", slog.F("entry", t.String())) + continue + } + var stg database.ProvisionerJobTimingStage if err := stg.Scan(t.Stage); err != nil { s.Logger.Warn(ctx, "failed to parse timings stage, skipping", slog.F("value", t.Stage)) diff --git a/coderd/workspacebuilds.go b/coderd/workspacebuilds.go index 1fd0c95ff3a77..aeee71fe0b127 100644 --- a/coderd/workspacebuilds.go +++ b/coderd/workspacebuilds.go @@ -1176,6 +1176,16 @@ func (api *API) buildTimings(ctx context.Context, build database.WorkspaceBuild) } for _, t := range provisionerTimings { + // Ref: #15432: agent script timings must not have a zero start or end time. + if t.StartedAt.IsZero() || t.EndedAt.IsZero() { + api.Logger.Debug(ctx, "ignoring provisioner timing with zero start or end time", + slog.F("workspace_id", build.WorkspaceID), + slog.F("workspace_build_id", build.ID), + slog.F("provisioner_job_id", t.JobID), + ) + continue + } + res.ProvisionerTimings = append(res.ProvisionerTimings, codersdk.ProvisionerTiming{ JobID: t.JobID, Stage: codersdk.TimingStage(t.Stage), @@ -1187,6 +1197,17 @@ func (api *API) buildTimings(ctx context.Context, build database.WorkspaceBuild) }) } for _, t := range agentScriptTimings { + // Ref: #15432: agent script timings must not have a zero start or end time. + if t.StartedAt.IsZero() || t.EndedAt.IsZero() { + api.Logger.Debug(ctx, "ignoring agent script timing with zero start or end time", + slog.F("workspace_id", build.WorkspaceID), + slog.F("workspace_agent_id", t.WorkspaceAgentID), + slog.F("workspace_build_id", build.ID), + slog.F("workspace_agent_script_id", t.ScriptID), + ) + continue + } + res.AgentScriptTimings = append(res.AgentScriptTimings, codersdk.AgentScriptTiming{ StartedAt: t.StartedAt, EndedAt: t.EndedAt, From 6afbafe685c428e98286d4f27c4c3ae76e8ea632 Mon Sep 17 00:00:00 2001 From: Cian Johnston Date: Wed, 28 May 2025 16:58:12 +0100 Subject: [PATCH 2/7] skip timings for agents that never connected --- coderd/workspacebuilds.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/coderd/workspacebuilds.go b/coderd/workspacebuilds.go index aeee71fe0b127..dbd99ef25d352 100644 --- a/coderd/workspacebuilds.go +++ b/coderd/workspacebuilds.go @@ -1220,6 +1220,14 @@ func (api *API) buildTimings(ctx context.Context, build database.WorkspaceBuild) }) } for _, agent := range agents { + if agent.FirstConnectedAt.Time.IsZero() { + api.Logger.Debug(ctx, "ignoring agent connection timing with zero first connected time", + slog.F("workspace_id", build.WorkspaceID), + slog.F("workspace_agent_id", agent.ID), + slog.F("workspace_build_id", build.ID), + ) + continue + } res.AgentConnectionTimings = append(res.AgentConnectionTimings, codersdk.AgentConnectionTiming{ WorkspaceAgentID: agent.ID.String(), WorkspaceAgentName: agent.Name, From db019ead3ccaf26c023164e38a778aee18fe546a Mon Sep 17 00:00:00 2001 From: Cian Johnston Date: Wed, 28 May 2025 16:59:14 +0100 Subject: [PATCH 3/7] add missing require.NoError --- coderd/agentapi/scripts_test.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/coderd/agentapi/scripts_test.go b/coderd/agentapi/scripts_test.go index ba2026f2f4ced..e7b1e3f48254d 100644 --- a/coderd/agentapi/scripts_test.go +++ b/coderd/agentapi/scripts_test.go @@ -165,6 +165,8 @@ func TestScriptCompleted(t *testing.T) { }) if tt.expectError != "" { require.Contains(t, err.Error(), tt.expectError, "expected error did not match") + } else { + require.NoError(t, err, "expected no error but got one") } } } From 1b83778460a1619fd69de3961db485220d0c31bf Mon Sep 17 00:00:00 2001 From: Cian Johnston Date: Wed, 28 May 2025 19:12:45 +0100 Subject: [PATCH 4/7] fix tests --- coderd/database/dbgen/dbgen.go | 11 +++++++++++ coderd/workspacebuilds_test.go | 7 +++++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/coderd/database/dbgen/dbgen.go b/coderd/database/dbgen/dbgen.go index 286c80f1c2143..e941845726259 100644 --- a/coderd/database/dbgen/dbgen.go +++ b/coderd/database/dbgen/dbgen.go @@ -215,6 +215,17 @@ func WorkspaceAgent(t testing.TB, db database.Store, orig database.WorkspaceAgen APIKeyScope: takeFirst(orig.APIKeyScope, database.AgentKeyScopeEnumAll), }) require.NoError(t, err, "insert workspace agent") + if orig.FirstConnectedAt.Valid || orig.LastConnectedAt.Valid || orig.DisconnectedAt.Valid || orig.LastConnectedReplicaID.Valid { + err = db.UpdateWorkspaceAgentConnectionByID(genCtx, database.UpdateWorkspaceAgentConnectionByIDParams{ + ID: agt.ID, + FirstConnectedAt: takeFirst(orig.FirstConnectedAt, agt.FirstConnectedAt), + LastConnectedAt: takeFirst(orig.LastConnectedAt, agt.LastConnectedAt), + DisconnectedAt: takeFirst(orig.DisconnectedAt, agt.DisconnectedAt), + LastConnectedReplicaID: takeFirst(orig.LastConnectedReplicaID, agt.LastConnectedReplicaID), + UpdatedAt: takeFirst(orig.UpdatedAt, agt.UpdatedAt), + }) + require.NoError(t, err, "update workspace agent first connected at") + } return agt } diff --git a/coderd/workspacebuilds_test.go b/coderd/workspacebuilds_test.go index d9a6baee7b68a..c121523c4da4a 100644 --- a/coderd/workspacebuilds_test.go +++ b/coderd/workspacebuilds_test.go @@ -2,6 +2,7 @@ package coderd_test import ( "context" + "database/sql" "errors" "fmt" "net/http" @@ -1736,7 +1737,8 @@ func TestWorkspaceBuildTimings(t *testing.T) { JobID: build.JobID, }) agent := dbgen.WorkspaceAgent(t, db, database.WorkspaceAgent{ - ResourceID: resource.ID, + ResourceID: resource.ID, + FirstConnectedAt: sql.NullTime{Valid: true, Time: dbtime.Now().Add(-time.Hour)}, }) // When: fetching timings for the build @@ -1767,7 +1769,8 @@ func TestWorkspaceBuildTimings(t *testing.T) { agents := make([]database.WorkspaceAgent, 5) for i := range agents { agents[i] = dbgen.WorkspaceAgent(t, db, database.WorkspaceAgent{ - ResourceID: resource.ID, + ResourceID: resource.ID, + FirstConnectedAt: sql.NullTime{Valid: true, Time: dbtime.Now().Add(-time.Duration(i) * time.Hour)}, }) } From afe791f3d6c1e8a2011eb906a0467da876577f86 Mon Sep 17 00:00:00 2001 From: Cian Johnston Date: Wed, 28 May 2025 19:35:01 +0100 Subject: [PATCH 5/7] address suggestion --- coderd/agentapi/scripts.go | 20 +++++++------------- coderd/agentapi/scripts_test.go | 8 ++++---- 2 files changed, 11 insertions(+), 17 deletions(-) diff --git a/coderd/agentapi/scripts.go b/coderd/agentapi/scripts.go index 764d3cb1848ed..57dd071d23b17 100644 --- a/coderd/agentapi/scripts.go +++ b/coderd/agentapi/scripts.go @@ -27,23 +27,17 @@ func (s *ScriptsAPI) ScriptCompleted(ctx context.Context, req *agentproto.Worksp return nil, xerrors.Errorf("script id from bytes: %w", err) } - if req.GetTiming().GetStart() == nil { - return nil, xerrors.New("script start time is required") + scriptStart := req.GetTiming().GetStart() + if !scriptStart.IsValid() || scriptStart.AsTime().IsZero() { + return nil, xerrors.New("script start time is required and cannot be zero") } - if req.GetTiming().GetEnd() == nil { - return nil, xerrors.New("script end time is required") + scriptEnd := req.GetTiming().GetEnd() + if !scriptEnd.IsValid() || scriptEnd.AsTime().IsZero() { + return nil, xerrors.New("script end time is required and cannot be zero") } - if req.GetTiming().GetStart().AsTime().IsZero() { - return nil, xerrors.New("script start time cannot be zero") - } - - if req.GetTiming().GetEnd().AsTime().IsZero() { - return nil, xerrors.New("script end time cannot be zero") - } - - if req.GetTiming().GetStart().AsTime().After(req.GetTiming().GetEnd().AsTime()) { + if scriptStart.AsTime().After(scriptEnd.AsTime()) { return nil, xerrors.New("script start time cannot be after end time") } diff --git a/coderd/agentapi/scripts_test.go b/coderd/agentapi/scripts_test.go index e7b1e3f48254d..6185e643b9fac 100644 --- a/coderd/agentapi/scripts_test.go +++ b/coderd/agentapi/scripts_test.go @@ -91,7 +91,7 @@ func TestScriptCompleted(t *testing.T) { ExitCode: 0, }, expectInsert: false, - expectError: "script start time is required", + expectError: "script start time is required and cannot be zero", }, { scriptID: uuid.New(), @@ -103,7 +103,7 @@ func TestScriptCompleted(t *testing.T) { ExitCode: 0, }, expectInsert: false, - expectError: "script end time is required", + expectError: "script end time is required and cannot be zero", }, { scriptID: uuid.New(), @@ -115,7 +115,7 @@ func TestScriptCompleted(t *testing.T) { ExitCode: 0, }, expectInsert: false, - expectError: "script start time cannot be zero", + expectError: "script start time is required and cannot be zero", }, { scriptID: uuid.New(), @@ -127,7 +127,7 @@ func TestScriptCompleted(t *testing.T) { ExitCode: 0, }, expectInsert: false, - expectError: "script end time cannot be zero", + expectError: "script end time is required and cannot be zero", }, { scriptID: uuid.New(), From e2a6e5e9d2df91dff6b741e94183f9797af4f975 Mon Sep 17 00:00:00 2001 From: Cian Johnston Date: Thu, 29 May 2025 13:01:50 +0100 Subject: [PATCH 6/7] another test update --- agent/agentscripts/agentscripts_test.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/agent/agentscripts/agentscripts_test.go b/agent/agentscripts/agentscripts_test.go index 3104bb805a40c..f50a0cc065138 100644 --- a/agent/agentscripts/agentscripts_test.go +++ b/agent/agentscripts/agentscripts_test.go @@ -144,6 +144,12 @@ func TestScriptReportsTiming(t *testing.T) { timing := timings[0] require.Equal(t, int32(0), timing.ExitCode) + if assert.True(t, timing.Start.IsValid(), "start time should be valid") { + require.NotZero(t, timing.Start.AsTime(), "start time should not be zero") + } + if assert.True(t, timing.End.IsValid(), "end time should be valid") { + require.NotZero(t, timing.End.AsTime(), "end time should not be zero") + } require.GreaterOrEqual(t, timing.End.AsTime(), timing.Start.AsTime()) } From 1f1ceb185b99fa6c9f92c440a117e85c694d6dc4 Mon Sep 17 00:00:00 2001 From: Cian Johnston Date: Thu, 29 May 2025 13:22:33 +0100 Subject: [PATCH 7/7] more paranoia --- .../provisionerdserver/provisionerdserver.go | 12 ++-- .../provisionerdserver_test.go | 59 ++++++++++++++++--- 2 files changed, 58 insertions(+), 13 deletions(-) diff --git a/coderd/provisionerdserver/provisionerdserver.go b/coderd/provisionerdserver/provisionerdserver.go index 3743f68c9e228..eab8c8b907aaa 100644 --- a/coderd/provisionerdserver/provisionerdserver.go +++ b/coderd/provisionerdserver/provisionerdserver.go @@ -1741,13 +1741,15 @@ func (s *server) completeWorkspaceBuildJob(ctx context.Context, job database.Pro JobID: jobID, } for _, t := range jobType.WorkspaceBuild.Timings { - if t.Start == nil || t.End == nil { - s.Logger.Warn(ctx, "timings entry has nil start or end time", slog.F("entry", t.String())) + start := t.GetStart() + if !start.IsValid() || start.AsTime().IsZero() { + s.Logger.Warn(ctx, "timings entry has nil or zero start time", slog.F("job_id", job.ID.String()), slog.F("workspace_id", workspace.ID), slog.F("workspace_build_id", workspaceBuild.ID), slog.F("user_id", workspace.OwnerID)) continue } - if t.Start.AsTime().IsZero() || t.End.AsTime().IsZero() { - s.Logger.Warn(ctx, "timings entry has zero start or end time, skipping", slog.F("entry", t.String())) + end := t.GetEnd() + if !end.IsValid() || end.AsTime().IsZero() { + s.Logger.Warn(ctx, "timings entry has nil or zero end time, skipping", slog.F("job_id", job.ID.String()), slog.F("workspace_id", workspace.ID), slog.F("workspace_build_id", workspaceBuild.ID), slog.F("user_id", workspace.OwnerID)) continue } @@ -1776,7 +1778,7 @@ func (s *server) completeWorkspaceBuildJob(ctx context.Context, job database.Pro // after being started. // // Agent timeouts could be minutes apart, resulting in an unresponsive - // experience, so we'll notify after every unique timeout seconds. + // experience, so we'll notify after every unique timeout seconds if !input.DryRun && workspaceBuild.Transition == database.WorkspaceTransitionStart && len(agentTimeouts) > 0 { timeouts := maps.Keys(agentTimeouts) slices.Sort(timeouts) diff --git a/coderd/provisionerdserver/provisionerdserver_test.go b/coderd/provisionerdserver/provisionerdserver_test.go index eb63d84b1df1b..087f6aa21a5d7 100644 --- a/coderd/provisionerdserver/provisionerdserver_test.go +++ b/coderd/provisionerdserver/provisionerdserver_test.go @@ -1301,14 +1301,57 @@ func TestCompleteJob(t *testing.T) { Name: "test-workspace-resource", Type: "aws_instance", }}, - Timings: []*sdkproto.Timing{{ - Stage: "test", - Source: "test-source", - Resource: "test-resource", - Action: "test-action", - Start: timestamppb.Now(), - End: timestamppb.Now(), - }}, + Timings: []*sdkproto.Timing{ + { + Stage: "test", + Source: "test-source", + Resource: "test-resource", + Action: "test-action", + Start: timestamppb.Now(), + End: timestamppb.Now(), + }, + { + Stage: "test2", + Source: "test-source2", + Resource: "test-resource2", + Action: "test-action2", + // Start: omitted + // End: omitted + }, + { + Stage: "test3", + Source: "test-source3", + Resource: "test-resource3", + Action: "test-action3", + Start: timestamppb.Now(), + End: nil, + }, + { + Stage: "test3", + Source: "test-source3", + Resource: "test-resource3", + Action: "test-action3", + Start: nil, + End: timestamppb.Now(), + }, + { + Stage: "test4", + Source: "test-source4", + Resource: "test-resource4", + Action: "test-action4", + Start: timestamppb.New(time.Time{}), + End: timestamppb.Now(), + }, + { + Stage: "test5", + Source: "test-source5", + Resource: "test-resource5", + Action: "test-action5", + Start: timestamppb.Now(), + End: timestamppb.New(time.Time{}), + }, + nil, // nil timing should be ignored + }, }, }, })