Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
57 commits
Select commit Hold shift + click to select a range
bcfacf7
feat: begin impl of agent script timings
DanielleMaywood Sep 17, 2024
d55b2e6
feat: add job_id and display_name to script timings
DanielleMaywood Sep 17, 2024
05d8a8b
fix: increment migration number
DanielleMaywood Sep 17, 2024
d54f6c3
fix: rename migrations from 251 to 254
DanielleMaywood Sep 18, 2024
d573c67
test: get tests compiling
DanielleMaywood Sep 18, 2024
09a1a42
fix: appease the linter
DanielleMaywood Sep 18, 2024
6d829f3
fix: get tests passing again
DanielleMaywood Sep 18, 2024
4df7831
fix: drop column from correct table
DanielleMaywood Sep 18, 2024
2565f0a
test: add fixture for agent script timings
DanielleMaywood Sep 18, 2024
669b837
fix: typo
DanielleMaywood Sep 18, 2024
df51673
fix: use job id used in provisioner job timings
DanielleMaywood Sep 18, 2024
c6d71d1
fix: increment migration number
DanielleMaywood Sep 18, 2024
a306e48
test: behaviour of script runner
DanielleMaywood Sep 18, 2024
4b0056c
test: rewrite test
DanielleMaywood Sep 18, 2024
14b68a1
test: does exit 1 script break things?
DanielleMaywood Sep 18, 2024
b22cb57
test: rewrite test again
DanielleMaywood Sep 18, 2024
ef5e2fe
fix: revert change
DanielleMaywood Sep 19, 2024
a5b412b
fix: let code breathe
DanielleMaywood Sep 19, 2024
57ebf30
fix: wrap errors
DanielleMaywood Sep 19, 2024
2a49f67
fix: justify nolint
DanielleMaywood Sep 19, 2024
0698584
fix: swap require.Equal argument order
DanielleMaywood Sep 19, 2024
3580069
fix: add mutex operations
DanielleMaywood Sep 19, 2024
9cb1252
feat: add 'ran_on_start' and 'blocked_login' fields
DanielleMaywood Sep 19, 2024
0b0d1ef
fix: update testdata fixture
DanielleMaywood Sep 19, 2024
eb857c1
fix: refer to agent_id instead of job_id in timings
DanielleMaywood Sep 19, 2024
586d88f
fix: JobID -> AgentID in dbauthz_test
DanielleMaywood Sep 19, 2024
b6289bb
fix: add 'id' to scripts, make timing refer to script id
DanielleMaywood Sep 19, 2024
863c3dc
fix: fix broken tests and convert bug
DanielleMaywood Sep 19, 2024
b8d5d1d
fix: update testdata fixtures
DanielleMaywood Sep 19, 2024
61c26ea
fix: update testdata fixtures again
DanielleMaywood Sep 19, 2024
7f8b6f9
feat: capture stage and if script timed out
DanielleMaywood Sep 20, 2024
c20ac32
fix: update migration number
DanielleMaywood Sep 20, 2024
c652133
test: add test for script api
DanielleMaywood Sep 20, 2024
1199b64
fix: fake db query
DanielleMaywood Sep 20, 2024
afa61eb
fix: use UTC time
DanielleMaywood Sep 20, 2024
8d325e2
fix: ensure r.scriptComplete is not nil
DanielleMaywood Sep 20, 2024
aeee582
fix: move err check to right after call
DanielleMaywood Sep 20, 2024
424069c
fix: uppercase sql
DanielleMaywood Sep 20, 2024
9b43a94
fix: use dbtime.Now()
DanielleMaywood Sep 20, 2024
ed31199
fix: debug log on r.scriptCompleted being nil
DanielleMaywood Sep 20, 2024
8814711
fix: ensure correct rbac permissions
DanielleMaywood Sep 20, 2024
0414623
chore: remove DisplayName
DanielleMaywood Sep 20, 2024
a8192a5
Merge branch 'main' into dm-add-agent-timings
DanielleMaywood Sep 20, 2024
08a466a
fix: get tests passing
DanielleMaywood Sep 20, 2024
8e7c757
fix: remove space in sql up
DanielleMaywood Sep 20, 2024
621071e
docs: document ExecuteOption
DanielleMaywood Sep 20, 2024
3b4df92
fix: drop 'RETURNING' from sql
DanielleMaywood Sep 20, 2024
44127b8
chore: remove 'display_name' from timing table
DanielleMaywood Sep 20, 2024
5846216
fix: testdata fixture
DanielleMaywood Sep 20, 2024
c43a143
fix: put r.scriptCompleted call in goroutine
DanielleMaywood Sep 23, 2024
13889bf
fix: track goroutine for test + use separate context for reporting
DanielleMaywood Sep 23, 2024
bf32b89
fix: appease linter, handle trackCommandGoroutine error
DanielleMaywood Sep 23, 2024
7fe6d8c
fix: resolve race condition
DanielleMaywood Sep 23, 2024
d7e86c6
feat: replace timed_out column with status column
DanielleMaywood Sep 23, 2024
6e338f2
test: update testdata fixture
DanielleMaywood Sep 23, 2024
79a620b
fix: apply suggestions from review
DanielleMaywood Sep 24, 2024
180307f
revert: linter changes
DanielleMaywood Sep 24, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
fix: get tests passing again
  • Loading branch information
DanielleMaywood committed Sep 20, 2024
commit 6d829f3cd20083e75ddf1199366f98b258e303b8
7 changes: 1 addition & 6 deletions agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -582,11 +582,6 @@ func (a *agent) reportMetadata(ctx context.Context, conn drpc.Conn) error {
select {
case <-ctx.Done():
return ctx.Err()
case timing := <-*a.scriptRunner.ScriptTimings():
_, err := aAPI.ScriptCompleted(ctx, &proto.WorkspaceAgentScriptCompletedRequest{
Timing: timing.ToProto(),
})
return err
case mr := <-metadataResults:
// This can overwrite unsent values, but that's fine because
// we're only interested about up-to-date values.
Expand Down Expand Up @@ -946,7 +941,7 @@ func (a *agent) handleManifest(manifestOK *checkpoint) func(ctx context.Context,
}
}

err = a.scriptRunner.Init(manifest.Scripts)
err = a.scriptRunner.Init(manifest.Scripts, aAPI.ScriptCompleted)
if err != nil {
return xerrors.Errorf("init script runner: %w", err)
}
Expand Down
46 changes: 24 additions & 22 deletions agent/agentscripts/agentscripts.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,12 @@ import (
"github.com/spf13/afero"
"golang.org/x/sync/errgroup"
"golang.org/x/xerrors"
"google.golang.org/protobuf/types/known/timestamppb"

"cdr.dev/slog"

"github.com/coder/coder/v2/agent/agentssh"
"github.com/coder/coder/v2/agent/proto"
"github.com/coder/coder/v2/codersdk"
"github.com/coder/coder/v2/codersdk/agentsdk"
)
Expand Down Expand Up @@ -66,7 +68,6 @@ func New(opts Options) *Runner {
cronCtxCancel: cronCtxCancel,
cron: cron.New(cron.WithParser(parser)),
closed: make(chan struct{}),
scriptTimings: make(chan TimingSpan),
dataDir: filepath.Join(opts.DataDirBase, "coder-script-data"),
scriptsExecuted: prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: "agent",
Expand All @@ -76,30 +77,28 @@ func New(opts Options) *Runner {
}
}

type ScriptCompletedFunc func(context.Context, *proto.WorkspaceAgentScriptCompletedRequest) (*proto.WorkspaceAgentScriptCompletedResponse, error)

type Runner struct {
Options

cronCtx context.Context
cronCtxCancel context.CancelFunc
cmdCloseWait sync.WaitGroup
closed chan struct{}
closeMutex sync.Mutex
cron *cron.Cron
initialized atomic.Bool
scripts []codersdk.WorkspaceAgentScript
scriptTimings chan TimingSpan
dataDir string
cronCtx context.Context
cronCtxCancel context.CancelFunc
cmdCloseWait sync.WaitGroup
closed chan struct{}
closeMutex sync.Mutex
cron *cron.Cron
initialized atomic.Bool
scripts []codersdk.WorkspaceAgentScript
dataDir string
scriptCompleted ScriptCompletedFunc

// scriptsExecuted includes all scripts executed by the workspace agent. Agents
// execute startup scripts, and scripts on a cron schedule. Both will increment
// this counter.
scriptsExecuted *prometheus.CounterVec
}

func (r *Runner) ScriptTimings() *chan TimingSpan {
return &r.scriptTimings
}

// DataDir returns the directory where scripts data is stored.
func (r *Runner) DataDir() string {
return r.dataDir
Expand All @@ -122,12 +121,13 @@ func (r *Runner) RegisterMetrics(reg prometheus.Registerer) {
// Init initializes the runner with the provided scripts.
// It also schedules any scripts that have a schedule.
// This function must be called before Execute.
func (r *Runner) Init(scripts []codersdk.WorkspaceAgentScript) error {
func (r *Runner) Init(scripts []codersdk.WorkspaceAgentScript, scriptCompleted ScriptCompletedFunc) error {
if r.initialized.Load() {
return xerrors.New("init: already initialized")
}
r.initialized.Store(true)
r.scripts = scripts
r.scriptCompleted = scriptCompleted
r.Logger.Info(r.cronCtx, "initializing agent scripts", slog.F("script_count", len(scripts)), slog.F("log_dir", r.LogDir))

err := r.Filesystem.MkdirAll(r.ScriptBinDir(), 0o700)
Expand Down Expand Up @@ -321,12 +321,14 @@ func (r *Runner) run(ctx context.Context, script codersdk.WorkspaceAgentScript)
logger.Info(ctx, fmt.Sprintf("%s script completed", logPath), slog.F("execution_time", execTime), slog.F("exit_code", exitCode))
}

r.scriptTimings <- TimingSpan{
displayName: script.DisplayName,
start: start,
end: end,
exitCode: int32(exitCode),
}
_, err = r.scriptCompleted(ctx, &proto.WorkspaceAgentScriptCompletedRequest{
Timing: &proto.Timing{
DisplayName: script.DisplayName,
Start: timestamppb.New(start),
End: timestamppb.New(end),
ExitCode: int32(exitCode),
},
})
}()

err = cmd.Start()
Expand Down
10 changes: 7 additions & 3 deletions agent/agentscripts/agentscripts_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"cdr.dev/slog/sloggers/slogtest"
"github.com/coder/coder/v2/agent/agentscripts"
"github.com/coder/coder/v2/agent/agentssh"
"github.com/coder/coder/v2/agent/agenttest"
"github.com/coder/coder/v2/codersdk"
"github.com/coder/coder/v2/codersdk/agentsdk"
"github.com/coder/coder/v2/testutil"
Expand All @@ -34,10 +35,11 @@ func TestExecuteBasic(t *testing.T) {
return fLogger
})
defer runner.Close()
aAPI := agenttest.NewFakeAgentAPI(t, slogtest.Make(t, nil), nil, nil)
err := runner.Init([]codersdk.WorkspaceAgentScript{{
LogSourceID: uuid.New(),
Script: "echo hello",
}})
}}, aAPI.ScriptCompleted)
require.NoError(t, err)
require.NoError(t, runner.Execute(context.Background(), func(script codersdk.WorkspaceAgentScript) bool {
return true
Expand All @@ -61,10 +63,11 @@ func TestEnv(t *testing.T) {
cmd.exe /c echo %CODER_SCRIPT_BIN_DIR%
`
}
aAPI := agenttest.NewFakeAgentAPI(t, slogtest.Make(t, nil), nil, nil)
err := runner.Init([]codersdk.WorkspaceAgentScript{{
LogSourceID: id,
Script: script,
}})
}}, aAPI.ScriptCompleted)
require.NoError(t, err)

ctx := testutil.Context(t, testutil.WaitLong)
Expand Down Expand Up @@ -103,11 +106,12 @@ func TestTimeout(t *testing.T) {
t.Parallel()
runner := setup(t, nil)
defer runner.Close()
aAPI := agenttest.NewFakeAgentAPI(t, slogtest.Make(t, nil), nil, nil)
err := runner.Init([]codersdk.WorkspaceAgentScript{{
LogSourceID: uuid.New(),
Script: "sleep infinity",
Timeout: time.Millisecond,
}})
}}, aAPI.ScriptCompleted)
require.NoError(t, err)
require.ErrorIs(t, runner.Execute(context.Background(), nil), agentscripts.ErrTimeout)
}
Expand Down
24 changes: 0 additions & 24 deletions agent/agentscripts/timings.go

This file was deleted.