Skip to content

Commit d920c64

Browse files
committed
refactor(toolsdk): simplify workspace bash background execution with nohup
Change-Id: If67c30717158bdd84e9f733b56365af7c8d0b51a Signed-off-by: Thomas Kosiewski <tk@coder.com>
1 parent 3750aee commit d920c64

File tree

4 files changed

+85
-114
lines changed

4 files changed

+85
-114
lines changed

codersdk/toolsdk/bash.go

Lines changed: 35 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@ package toolsdk
33
import (
44
"bytes"
55
"context"
6-
_ "embed"
7-
"encoding/base64"
86
"errors"
97
"fmt"
108
"io"
@@ -20,7 +18,6 @@ import (
2018
"github.com/coder/coder/v2/cli/cliui"
2119
"github.com/coder/coder/v2/codersdk"
2220
"github.com/coder/coder/v2/codersdk/workspacesdk"
23-
"github.com/coder/coder/v2/cryptorand"
2421
)
2522

2623
type WorkspaceBashArgs struct {
@@ -35,9 +32,6 @@ type WorkspaceBashResult struct {
3532
ExitCode int `json:"exit_code"`
3633
}
3734

38-
//go:embed resources/background.sh
39-
var backgroundScript string
40-
4135
var WorkspaceBash = Tool[WorkspaceBashArgs, WorkspaceBashResult]{
4236
Tool: aisdk.Tool{
4337
Name: ToolNameWorkspaceBash,
@@ -57,10 +51,13 @@ The workspace parameter supports various formats:
5751
The timeout_ms parameter specifies the command timeout in milliseconds (defaults to 60000ms, maximum of 300000ms).
5852
If the command times out, all output captured up to that point is returned with a cancellation message.
5953
54+
For background commands (background: true), output is captured until the timeout is reached, then the command
55+
continues running in the background. The captured output is returned as the result.
56+
6057
Examples:
6158
- workspace: "my-workspace", command: "ls -la"
6259
- workspace: "john/dev-env", command: "git status", timeout_ms: 30000
63-
- workspace: "my-workspace", command: "npm run dev", background: true
60+
- workspace: "my-workspace", command: "npm run dev", background: true, timeout_ms: 10000
6461
- workspace: "my-workspace.main", command: "docker ps"`,
6562
Schema: aisdk.Schema{
6663
Properties: map[string]any{
@@ -80,7 +77,7 @@ Examples:
8077
},
8178
"background": map[string]any{
8279
"type": "boolean",
83-
"description": "Whether to run the command in the background. The command will not be affected by the timeout.",
80+
"description": "Whether to run the command in the background. Output is captured until timeout, then the command continues running in the background.",
8481
},
8582
},
8683
Required: []string{"workspace", "command"},
@@ -155,35 +152,29 @@ Examples:
155152
}
156153
command := args.Command
157154
if args.Background {
158-
// Background commands are not affected by the timeout
159-
timeoutMs = defaultTimeoutMs
160-
encodedCommand := base64.StdEncoding.EncodeToString([]byte(args.Command))
161-
encodedScript := base64.StdEncoding.EncodeToString([]byte(backgroundScript))
162-
commandID, err := cryptorand.StringCharset(cryptorand.Human, 8)
163-
if err != nil {
164-
return WorkspaceBashResult{}, xerrors.Errorf("failed to generate command ID: %w", err)
165-
}
166-
command = fmt.Sprintf(
167-
"ARG_COMMAND=\"$(echo -n %s | base64 -d)\" ARG_COMMAND_ID=%s bash -c \"$(echo -n %s | base64 -d)\"",
168-
encodedCommand,
169-
commandID,
170-
encodedScript,
171-
)
155+
// For background commands, use nohup directly to ensure they survive SSH session
156+
// termination. This captures output normally but allows the process to continue
157+
// running even after the SSH connection closes.
158+
command = fmt.Sprintf("nohup %s </dev/null 2>&1", args.Command)
172159
}
173160

174-
// Create context with timeout
175-
ctx, cancel = context.WithTimeout(ctx, time.Duration(timeoutMs)*time.Millisecond)
176-
defer cancel()
161+
// Create context with command timeout (replace the broader MCP timeout)
162+
commandCtx, commandCancel := context.WithTimeout(ctx, time.Duration(timeoutMs)*time.Millisecond)
163+
defer commandCancel()
177164

178165
// Execute command with timeout handling
179-
output, err := executeCommandWithTimeout(ctx, session, command)
166+
output, err := executeCommandWithTimeout(commandCtx, session, command)
180167
outputStr := strings.TrimSpace(string(output))
181168

182169
// Handle command execution results
183170
if err != nil {
184171
// Check if the command timed out
185-
if errors.Is(context.Cause(ctx), context.DeadlineExceeded) {
186-
outputStr += "\nCommand canceled due to timeout"
172+
if errors.Is(context.Cause(commandCtx), context.DeadlineExceeded) {
173+
if args.Background {
174+
outputStr += "\nCommand continues running in background"
175+
} else {
176+
outputStr += "\nCommand canceled due to timeout"
177+
}
187178
return WorkspaceBashResult{
188179
Output: outputStr,
189180
ExitCode: 124,
@@ -417,21 +408,27 @@ func executeCommandWithTimeout(ctx context.Context, session *gossh.Session, comm
417408
return safeWriter.Bytes(), err
418409
case <-ctx.Done():
419410
// Context was canceled (timeout or other cancellation)
420-
// Close the session to stop the command
421-
_ = session.Close()
411+
// Close the session to stop the command, but handle errors gracefully
412+
closeErr := session.Close()
422413

423-
// Give a brief moment to collect any remaining output
424-
timer := time.NewTimer(50 * time.Millisecond)
414+
// Give a brief moment to collect any remaining output and for goroutines to finish
415+
timer := time.NewTimer(100 * time.Millisecond)
425416
defer timer.Stop()
426417

427418
select {
428419
case <-timer.C:
429420
// Timer expired, return what we have
421+
break
430422
case err := <-done:
431423
// Command finished during grace period
432-
return safeWriter.Bytes(), err
424+
if closeErr == nil {
425+
return safeWriter.Bytes(), err
426+
}
427+
// If session close failed, prioritize the context error
428+
break
433429
}
434430

431+
// Return the collected output with the context error
435432
return safeWriter.Bytes(), context.Cause(ctx)
436433
}
437434
}
@@ -451,5 +448,9 @@ func (sw *syncWriter) Write(p []byte) (n int, err error) {
451448
func (sw *syncWriter) Bytes() []byte {
452449
sw.mu.Lock()
453450
defer sw.mu.Unlock()
454-
return sw.w.Bytes()
451+
// Return a copy to prevent race conditions with the underlying buffer
452+
b := sw.w.Bytes()
453+
result := make([]byte, len(b))
454+
copy(result, b)
455+
return result
455456
}

codersdk/toolsdk/bash_test.go

Lines changed: 38 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -175,8 +175,6 @@ func TestWorkspaceBashTimeout(t *testing.T) {
175175

176176
// Test that the TimeoutMs field can be set and read correctly
177177
args := toolsdk.WorkspaceBashArgs{
178-
Workspace: "test-workspace",
179-
Command: "echo test",
180178
TimeoutMs: 0, // Should default to 60000 in handler
181179
}
182180

@@ -193,8 +191,6 @@ func TestWorkspaceBashTimeout(t *testing.T) {
193191

194192
// Test that negative values can be set and will be handled by the default logic
195193
args := toolsdk.WorkspaceBashArgs{
196-
Workspace: "test-workspace",
197-
Command: "echo test",
198194
TimeoutMs: -100,
199195
}
200196

@@ -280,7 +276,7 @@ func TestWorkspaceBashTimeoutIntegration(t *testing.T) {
280276
TimeoutMs: 2000, // 2 seconds timeout - should timeout after first echo
281277
}
282278

283-
result, err := toolsdk.WorkspaceBash.Handler(t.Context(), deps, args)
279+
result, err := testTool(t, toolsdk.WorkspaceBash, deps, args)
284280

285281
// Should not error (timeout is handled gracefully)
286282
require.NoError(t, err)
@@ -314,15 +310,15 @@ func TestWorkspaceBashTimeoutIntegration(t *testing.T) {
314310

315311
deps, err := toolsdk.NewDeps(client)
316312
require.NoError(t, err)
317-
ctx := context.Background()
318313

319314
args := toolsdk.WorkspaceBashArgs{
320315
Workspace: workspace.Name,
321316
Command: `echo "normal command"`, // Quick command that should complete normally
322317
TimeoutMs: 5000, // 5 second timeout - plenty of time
323318
}
324319

325-
result, err := toolsdk.WorkspaceBash.Handler(ctx, deps, args)
320+
// Use testTool to register the tool as tested and satisfy coverage validation
321+
result, err := testTool(t, toolsdk.WorkspaceBash, deps, args)
326322

327323
// Should not error
328324
require.NoError(t, err)
@@ -343,7 +339,7 @@ func TestWorkspaceBashTimeoutIntegration(t *testing.T) {
343339
func TestWorkspaceBashBackgroundIntegration(t *testing.T) {
344340
t.Parallel()
345341

346-
t.Run("BackgroundCommandReturnsImmediately", func(t *testing.T) {
342+
t.Run("BackgroundCommandCapturesOutput", func(t *testing.T) {
347343
t.Parallel()
348344

349345
client, workspace, agentToken := setupWorkspaceForAgent(t)
@@ -359,29 +355,29 @@ func TestWorkspaceBashBackgroundIntegration(t *testing.T) {
359355

360356
args := toolsdk.WorkspaceBashArgs{
361357
Workspace: workspace.Name,
362-
Command: `echo "started" && sleep 5 && echo "completed"`, // Command that would take 5+ seconds
363-
Background: true, // Run in background
358+
Command: `echo "started" && sleep 60 && echo "completed"`, // Command that would take 60+ seconds
359+
Background: true, // Run in background
360+
TimeoutMs: 2000, // 2 second timeout
364361
}
365362

366-
result, err := toolsdk.WorkspaceBash.Handler(t.Context(), deps, args)
363+
result, err := testTool(t, toolsdk.WorkspaceBash, deps, args)
367364

368365
// Should not error
369366
require.NoError(t, err)
370367

371368
t.Logf("Background result: exitCode=%d, output=%q", result.ExitCode, result.Output)
372369

373-
// Should have exit code 0 (background start successful)
374-
require.Equal(t, 0, result.ExitCode)
370+
// Should have exit code 124 (timeout) since command times out
371+
require.Equal(t, 124, result.ExitCode)
375372

376-
// Should contain PID and log path info, not the actual command output
377-
require.Contains(t, result.Output, "Command started with PID:")
378-
require.Contains(t, result.Output, "Log path: /tmp/mcp-bg/")
373+
// Should capture output up to timeout point
374+
require.Contains(t, result.Output, "started", "Should contain output captured before timeout")
379375

380-
// Should NOT contain the actual command output since it runs in background
381-
// The command was `echo "started" && sleep 5 && echo "completed"`
382-
// So we check that the quoted strings don't appear in the output
383-
require.NotContains(t, result.Output, `"started"`, "Should not contain command output in background mode")
384-
require.NotContains(t, result.Output, `"completed"`, "Should not contain command output in background mode")
376+
// Should NOT contain the second echo (it never executed due to timeout)
377+
require.NotContains(t, result.Output, "completed", "Should not contain output after timeout")
378+
379+
// Should contain background continuation message
380+
require.Contains(t, result.Output, "Command continues running in background")
385381
})
386382

387383
t.Run("BackgroundVsNormalExecution", func(t *testing.T) {
@@ -419,20 +415,18 @@ func TestWorkspaceBashBackgroundIntegration(t *testing.T) {
419415
Background: true,
420416
}
421417

422-
backgroundResult, err := toolsdk.WorkspaceBash.Handler(t.Context(), deps, backgroundArgs)
418+
backgroundResult, err := testTool(t, toolsdk.WorkspaceBash, deps, backgroundArgs)
423419
require.NoError(t, err)
424420

425421
t.Logf("Normal result: %q", normalResult.Output)
426422
t.Logf("Background result: %q", backgroundResult.Output)
427423

428-
// Background mode should return PID/log info, not the actual output
424+
// Background mode should also return the actual output since command completes quickly
429425
require.Equal(t, 0, backgroundResult.ExitCode)
430-
require.Contains(t, backgroundResult.Output, "Command started with PID:")
431-
require.Contains(t, backgroundResult.Output, "Log path: /tmp/mcp-bg/")
432-
require.NotContains(t, backgroundResult.Output, "hello world")
426+
require.Equal(t, "hello world", backgroundResult.Output)
433427
})
434428

435-
t.Run("BackgroundIgnoresTimeout", func(t *testing.T) {
429+
t.Run("BackgroundCommandContinuesAfterTimeout", func(t *testing.T) {
436430
t.Parallel()
437431

438432
client, workspace, agentToken := setupWorkspaceForAgent(t)
@@ -448,36 +442,35 @@ func TestWorkspaceBashBackgroundIntegration(t *testing.T) {
448442

449443
args := toolsdk.WorkspaceBashArgs{
450444
Workspace: workspace.Name,
451-
Command: `sleep 1 && echo "done" > /tmp/done`, // Command that would normally timeout
452-
TimeoutMs: 1, // 1 ms timeout (shorter than command duration)
453-
Background: true, // But running in background should ignore timeout
445+
Command: `echo "started" && sleep 4 && echo "done" > /tmp/bg-test-done`, // Command that will timeout but continue
446+
TimeoutMs: 2000, // 2000ms timeout (shorter than command duration)
447+
Background: true, // Run in background
454448
}
455449

456-
result, err := toolsdk.WorkspaceBash.Handler(t.Context(), deps, args)
450+
result, err := testTool(t, toolsdk.WorkspaceBash, deps, args)
457451

458-
// Should not error and should not timeout
452+
// Should not error but should timeout
459453
require.NoError(t, err)
460454

461455
t.Logf("Background with timeout result: exitCode=%d, output=%q", result.ExitCode, result.Output)
462456

463-
// Should have exit code 0 (background start successful)
464-
require.Equal(t, 0, result.ExitCode)
457+
// Should have timeout exit code
458+
require.Equal(t, 124, result.ExitCode)
465459

466-
// Should return PID/log info, indicating the background command started successfully
467-
require.Contains(t, result.Output, "Command started with PID:")
468-
require.Contains(t, result.Output, "Log path: /tmp/mcp-bg/")
460+
// Should capture output before timeout
461+
require.Contains(t, result.Output, "started", "Should contain output captured before timeout")
469462

470-
// Should NOT contain timeout message since background mode ignores timeout
471-
require.NotContains(t, result.Output, "Command canceled due to timeout")
463+
// Should contain background continuation message
464+
require.Contains(t, result.Output, "Command continues running in background")
472465

473-
// Wait for the background command to complete
466+
// Wait for the background command to complete (even though SSH session timed out)
474467
require.Eventually(t, func() bool {
475-
args := toolsdk.WorkspaceBashArgs{
468+
checkArgs := toolsdk.WorkspaceBashArgs{
476469
Workspace: workspace.Name,
477-
Command: `cat /tmp/done`,
470+
Command: `cat /tmp/bg-test-done 2>/dev/null || echo "not found"`,
478471
}
479-
result, err := toolsdk.WorkspaceBash.Handler(t.Context(), deps, args)
480-
return err == nil && result.Output == "done"
481-
}, testutil.WaitMedium, testutil.IntervalMedium)
472+
checkResult, err := toolsdk.WorkspaceBash.Handler(t.Context(), deps, checkArgs)
473+
return err == nil && checkResult.Output == "done"
474+
}, testutil.WaitMedium, testutil.IntervalMedium, "Background command should continue running and complete after timeout")
482475
})
483476
}

codersdk/toolsdk/resources/background.sh

Lines changed: 0 additions & 23 deletions
This file was deleted.

0 commit comments

Comments
 (0)