@@ -12,7 +12,6 @@ import (
12
12
"net/http"
13
13
"net/netip"
14
14
"os"
15
- "os/exec"
16
15
"os/user"
17
16
"path/filepath"
18
17
"runtime"
@@ -37,6 +36,7 @@ import (
37
36
38
37
"cdr.dev/slog"
39
38
"github.com/coder/coder/v2/agent/agentproc"
39
+ "github.com/coder/coder/v2/agent/agentscripts"
40
40
"github.com/coder/coder/v2/agent/agentssh"
41
41
"github.com/coder/coder/v2/agent/reconnectingpty"
42
42
"github.com/coder/coder/v2/buildinfo"
@@ -196,6 +196,7 @@ type agent struct {
196
196
197
197
manifest atomic.Pointer [agentsdk.Manifest ] // manifest is atomic because values can change after reconnection.
198
198
reportMetadataInterval time.Duration
199
+ scriptRunner * agentscripts.Runner
199
200
serviceBanner atomic.Pointer [codersdk.ServiceBannerConfig ] // serviceBanner is atomic because it is periodically updated.
200
201
serviceBannerRefreshInterval time.Duration
201
202
sessionToken atomic.Pointer [string ]
@@ -238,7 +239,13 @@ func (a *agent) init(ctx context.Context) {
238
239
sshSrv .Manifest = & a .manifest
239
240
sshSrv .ServiceBanner = & a .serviceBanner
240
241
a .sshServer = sshSrv
241
-
242
+ a .scriptRunner = agentscripts .New (agentscripts.Options {
243
+ LogDir : a .logDir ,
244
+ Logger : a .logger ,
245
+ SSHServer : sshSrv ,
246
+ Filesystem : a .filesystem ,
247
+ PatchLogs : a .client .PatchLogs ,
248
+ })
242
249
go a .runLoop (ctx )
243
250
}
244
251
@@ -657,41 +664,29 @@ func (a *agent) run(ctx context.Context) error {
657
664
}
658
665
}
659
666
660
- lifecycleState := codersdk .WorkspaceAgentLifecycleReady
661
- scriptDone := make (chan error , 1 )
662
- err = a .trackConnGoroutine (func () {
663
- defer close (scriptDone )
664
- scriptDone <- a .runStartupScript (ctx , manifest .StartupScript )
665
- })
667
+ err = a .scriptRunner .Init (manifest .Scripts )
666
668
if err != nil {
667
- return xerrors .Errorf ("track startup script: %w" , err )
669
+ return xerrors .Errorf ("init script runner : %w" , err )
668
670
}
669
- go func () {
670
- var timeout <- chan time.Time
671
- // If timeout is zero, an older version of the coder
672
- // provider was used. Otherwise a timeout is always > 0.
673
- if manifest .StartupScriptTimeout > 0 {
674
- t := time .NewTimer (manifest .StartupScriptTimeout )
675
- defer t .Stop ()
676
- timeout = t .C
677
- }
678
-
679
- var err error
680
- select {
681
- case err = <- scriptDone :
682
- case <- timeout :
683
- a .logger .Warn (ctx , "script timed out" , slog .F ("lifecycle" , "startup" ), slog .F ("timeout" , manifest .StartupScriptTimeout ))
684
- a .setLifecycle (ctx , codersdk .WorkspaceAgentLifecycleStartTimeout )
685
- err = <- scriptDone // The script can still complete after a timeout.
686
- }
671
+ err = a .trackConnGoroutine (func () {
672
+ err := a .scriptRunner .Execute (ctx , func (script codersdk.WorkspaceAgentScript ) bool {
673
+ return script .RunOnStart
674
+ })
687
675
if err != nil {
688
- if errors .Is (err , context .Canceled ) {
689
- return
676
+ a .logger .Warn (ctx , "startup script failed" , slog .Error (err ))
677
+ if errors .Is (err , agentscripts .ErrTimeout ) {
678
+ a .setLifecycle (ctx , codersdk .WorkspaceAgentLifecycleStartTimeout )
679
+ } else {
680
+ a .setLifecycle (ctx , codersdk .WorkspaceAgentLifecycleStartError )
690
681
}
691
- lifecycleState = codersdk .WorkspaceAgentLifecycleStartError
682
+ } else {
683
+ a .setLifecycle (ctx , codersdk .WorkspaceAgentLifecycleReady )
692
684
}
693
- a .setLifecycle (ctx , lifecycleState )
694
- }()
685
+ a .scriptRunner .StartCron ()
686
+ })
687
+ if err != nil {
688
+ return xerrors .Errorf ("track conn goroutine: %w" , err )
689
+ }
695
690
}
696
691
697
692
// This automatically closes when the context ends!
@@ -1006,93 +1001,6 @@ func (a *agent) runDERPMapSubscriber(ctx context.Context, network *tailnet.Conn)
1006
1001
}
1007
1002
}
1008
1003
1009
- func (a * agent ) runStartupScript (ctx context.Context , script string ) error {
1010
- return a .runScript (ctx , "startup" , script )
1011
- }
1012
-
1013
- func (a * agent ) runShutdownScript (ctx context.Context , script string ) error {
1014
- return a .runScript (ctx , "shutdown" , script )
1015
- }
1016
-
1017
- func (a * agent ) runScript (ctx context.Context , lifecycle , script string ) (err error ) {
1018
- if script == "" {
1019
- return nil
1020
- }
1021
-
1022
- logger := a .logger .With (slog .F ("lifecycle" , lifecycle ))
1023
-
1024
- logger .Info (ctx , fmt .Sprintf ("running %s script" , lifecycle ), slog .F ("script" , script ))
1025
- fileWriter , err := a .filesystem .OpenFile (filepath .Join (a .logDir , fmt .Sprintf ("coder-%s-script.log" , lifecycle )), os .O_CREATE | os .O_RDWR , 0o600 )
1026
- if err != nil {
1027
- return xerrors .Errorf ("open %s script log file: %w" , lifecycle , err )
1028
- }
1029
- defer func () {
1030
- err := fileWriter .Close ()
1031
- if err != nil {
1032
- logger .Warn (ctx , fmt .Sprintf ("close %s script log file" , lifecycle ), slog .Error (err ))
1033
- }
1034
- }()
1035
-
1036
- cmdPty , err := a .sshServer .CreateCommand (ctx , script , nil )
1037
- if err != nil {
1038
- return xerrors .Errorf ("%s script: create command: %w" , lifecycle , err )
1039
- }
1040
- cmd := cmdPty .AsExec ()
1041
-
1042
- var stdout , stderr io.Writer = fileWriter , fileWriter
1043
- if lifecycle == "startup" {
1044
- send , flushAndClose := agentsdk .LogsSender (a .client .PatchLogs , logger )
1045
- // If ctx is canceled here (or in a writer below), we may be
1046
- // discarding logs, but that's okay because we're shutting down
1047
- // anyway. We could consider creating a new context here if we
1048
- // want better control over flush during shutdown.
1049
- defer func () {
1050
- if err := flushAndClose (ctx ); err != nil {
1051
- logger .Warn (ctx , "flush startup logs failed" , slog .Error (err ))
1052
- }
1053
- }()
1054
-
1055
- infoW := agentsdk .StartupLogsWriter (ctx , send , codersdk .WorkspaceAgentLogSourceStartupScript , codersdk .LogLevelInfo )
1056
- defer infoW .Close ()
1057
- errW := agentsdk .StartupLogsWriter (ctx , send , codersdk .WorkspaceAgentLogSourceStartupScript , codersdk .LogLevelError )
1058
- defer errW .Close ()
1059
-
1060
- stdout = io .MultiWriter (fileWriter , infoW )
1061
- stderr = io .MultiWriter (fileWriter , errW )
1062
- }
1063
-
1064
- cmd .Stdout = stdout
1065
- cmd .Stderr = stderr
1066
-
1067
- start := time .Now ()
1068
- defer func () {
1069
- end := time .Now ()
1070
- execTime := end .Sub (start )
1071
- exitCode := 0
1072
- if err != nil {
1073
- exitCode = 255 // Unknown status.
1074
- var exitError * exec.ExitError
1075
- if xerrors .As (err , & exitError ) {
1076
- exitCode = exitError .ExitCode ()
1077
- }
1078
- logger .Warn (ctx , fmt .Sprintf ("%s script failed" , lifecycle ), slog .F ("execution_time" , execTime ), slog .F ("exit_code" , exitCode ), slog .Error (err ))
1079
- } else {
1080
- logger .Info (ctx , fmt .Sprintf ("%s script completed" , lifecycle ), slog .F ("execution_time" , execTime ), slog .F ("exit_code" , exitCode ))
1081
- }
1082
- }()
1083
-
1084
- err = cmd .Run ()
1085
- if err != nil {
1086
- // cmd.Run does not return a context canceled error, it returns "signal: killed".
1087
- if ctx .Err () != nil {
1088
- return ctx .Err ()
1089
- }
1090
-
1091
- return xerrors .Errorf ("%s script: run: %w" , lifecycle , err )
1092
- }
1093
- return nil
1094
- }
1095
-
1096
1004
func (a * agent ) handleReconnectingPTY (ctx context.Context , logger slog.Logger , msg codersdk.WorkspaceAgentReconnectingPTYInit , conn net.Conn ) (retErr error ) {
1097
1005
defer conn .Close ()
1098
1006
a .metrics .connectionsTotal .Add (1 )
@@ -1475,39 +1383,23 @@ func (a *agent) Close() error {
1475
1383
}
1476
1384
1477
1385
lifecycleState := codersdk .WorkspaceAgentLifecycleOff
1478
- if manifest := a .manifest .Load (); manifest != nil && manifest .ShutdownScript != "" {
1479
- scriptDone := make (chan error , 1 )
1480
- go func () {
1481
- defer close (scriptDone )
1482
- scriptDone <- a .runShutdownScript (ctx , manifest .ShutdownScript )
1483
- }()
1484
-
1485
- var timeout <- chan time.Time
1486
- // If timeout is zero, an older version of the coder
1487
- // provider was used. Otherwise a timeout is always > 0.
1488
- if manifest .ShutdownScriptTimeout > 0 {
1489
- t := time .NewTimer (manifest .ShutdownScriptTimeout )
1490
- defer t .Stop ()
1491
- timeout = t .C
1492
- }
1493
-
1494
- var err error
1495
- select {
1496
- case err = <- scriptDone :
1497
- case <- timeout :
1498
- a .logger .Warn (ctx , "script timed out" , slog .F ("lifecycle" , "shutdown" ), slog .F ("timeout" , manifest .ShutdownScriptTimeout ))
1499
- a .setLifecycle (ctx , codersdk .WorkspaceAgentLifecycleShutdownTimeout )
1500
- err = <- scriptDone // The script can still complete after a timeout.
1501
- }
1502
- if err != nil {
1386
+ err = a .scriptRunner .Execute (ctx , func (script codersdk.WorkspaceAgentScript ) bool {
1387
+ return script .RunOnStop
1388
+ })
1389
+ if err != nil {
1390
+ if errors .Is (err , agentscripts .ErrTimeout ) {
1391
+ lifecycleState = codersdk .WorkspaceAgentLifecycleShutdownTimeout
1392
+ } else {
1503
1393
lifecycleState = codersdk .WorkspaceAgentLifecycleShutdownError
1504
1394
}
1505
1395
}
1506
-
1507
- // Set final state and wait for it to be reported because context
1508
- // cancellation will stop the report loop.
1509
1396
a .setLifecycle (ctx , lifecycleState )
1510
1397
1398
+ err = a .scriptRunner .Close ()
1399
+ if err != nil {
1400
+ a .logger .Error (ctx , "script runner close" , slog .Error (err ))
1401
+ }
1402
+
1511
1403
// Wait for the lifecycle to be reported, but don't wait forever so
1512
1404
// that we don't break user expectations.
1513
1405
ctx , cancel := context .WithTimeout (ctx , 5 * time .Second )
0 commit comments