Skip to content

Commit d6ae953

Browse files
committed
refactor(toolsdk): simplify workspace bash background execution with nohup
Change-Id: If67c30717158bdd84e9f733b56365af7c8d0b51a Signed-off-by: Thomas Kosiewski <tk@coder.com>
1 parent 3750aee commit d6ae953

File tree

3 files changed

+67
-93
lines changed

3 files changed

+67
-93
lines changed

codersdk/toolsdk/bash.go

Lines changed: 33 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@ package toolsdk
33
import (
44
"bytes"
55
"context"
6-
_ "embed"
7-
"encoding/base64"
86
"errors"
97
"fmt"
108
"io"
@@ -20,7 +18,6 @@ import (
2018
"github.com/coder/coder/v2/cli/cliui"
2119
"github.com/coder/coder/v2/codersdk"
2220
"github.com/coder/coder/v2/codersdk/workspacesdk"
23-
"github.com/coder/coder/v2/cryptorand"
2421
)
2522

2623
type WorkspaceBashArgs struct {
@@ -35,9 +32,6 @@ type WorkspaceBashResult struct {
3532
ExitCode int `json:"exit_code"`
3633
}
3734

38-
//go:embed resources/background.sh
39-
var backgroundScript string
40-
4135
var WorkspaceBash = Tool[WorkspaceBashArgs, WorkspaceBashResult]{
4236
Tool: aisdk.Tool{
4337
Name: ToolNameWorkspaceBash,
@@ -57,6 +51,9 @@ The workspace parameter supports various formats:
5751
The timeout_ms parameter specifies the command timeout in milliseconds (defaults to 60000ms, maximum of 300000ms).
5852
If the command times out, all output captured up to that point is returned with a cancellation message.
5953
54+
For background commands (background: true), output is captured until the timeout is reached, then the command
55+
continues running in the background. The captured output is returned as the result.
56+
6057
Examples:
6158
- workspace: "my-workspace", command: "ls -la"
6259
- workspace: "john/dev-env", command: "git status", timeout_ms: 30000
@@ -80,7 +77,7 @@ Examples:
8077
},
8178
"background": map[string]any{
8279
"type": "boolean",
83-
"description": "Whether to run the command in the background. The command will not be affected by the timeout.",
80+
"description": "Whether to run the command in the background. Output is captured until timeout, then the command continues running in the background.",
8481
},
8582
},
8683
Required: []string{"workspace", "command"},
@@ -155,35 +152,28 @@ Examples:
155152
}
156153
command := args.Command
157154
if args.Background {
158-
// Background commands are not affected by the timeout
159-
timeoutMs = defaultTimeoutMs
160-
encodedCommand := base64.StdEncoding.EncodeToString([]byte(args.Command))
161-
encodedScript := base64.StdEncoding.EncodeToString([]byte(backgroundScript))
162-
commandID, err := cryptorand.StringCharset(cryptorand.Human, 8)
163-
if err != nil {
164-
return WorkspaceBashResult{}, xerrors.Errorf("failed to generate command ID: %w", err)
165-
}
166-
command = fmt.Sprintf(
167-
"ARG_COMMAND=\"$(echo -n %s | base64 -d)\" ARG_COMMAND_ID=%s bash -c \"$(echo -n %s | base64 -d)\"",
168-
encodedCommand,
169-
commandID,
170-
encodedScript,
171-
)
155+
// For background commands, use nohup to detach the process
156+
// We redirect stdin to prevent hanging and combine stdout/stderr
157+
command = fmt.Sprintf("nohup bash -c %q </dev/null 2>&1", args.Command)
172158
}
173159

174-
// Create context with timeout
175-
ctx, cancel = context.WithTimeout(ctx, time.Duration(timeoutMs)*time.Millisecond)
176-
defer cancel()
160+
// Create context with command timeout (replace the broader MCP timeout)
161+
commandCtx, commandCancel := context.WithTimeout(ctx, time.Duration(timeoutMs)*time.Millisecond)
162+
defer commandCancel()
177163

178164
// Execute command with timeout handling
179-
output, err := executeCommandWithTimeout(ctx, session, command)
165+
output, err := executeCommandWithTimeout(commandCtx, session, command)
180166
outputStr := strings.TrimSpace(string(output))
181167

182168
// Handle command execution results
183169
if err != nil {
184170
// Check if the command timed out
185-
if errors.Is(context.Cause(ctx), context.DeadlineExceeded) {
186-
outputStr += "\nCommand canceled due to timeout"
171+
if errors.Is(context.Cause(commandCtx), context.DeadlineExceeded) {
172+
if args.Background {
173+
outputStr += "\nCommand continues running in background"
174+
} else {
175+
outputStr += "\nCommand canceled due to timeout"
176+
}
187177
return WorkspaceBashResult{
188178
Output: outputStr,
189179
ExitCode: 124,
@@ -417,21 +407,27 @@ func executeCommandWithTimeout(ctx context.Context, session *gossh.Session, comm
417407
return safeWriter.Bytes(), err
418408
case <-ctx.Done():
419409
// Context was canceled (timeout or other cancellation)
420-
// Close the session to stop the command
421-
_ = session.Close()
410+
// Close the session to stop the command, but handle errors gracefully
411+
closeErr := session.Close()
422412

423-
// Give a brief moment to collect any remaining output
424-
timer := time.NewTimer(50 * time.Millisecond)
413+
// Give a brief moment to collect any remaining output and for goroutines to finish
414+
timer := time.NewTimer(100 * time.Millisecond)
425415
defer timer.Stop()
426416

427417
select {
428418
case <-timer.C:
429419
// Timer expired, return what we have
420+
break
430421
case err := <-done:
431422
// Command finished during grace period
432-
return safeWriter.Bytes(), err
423+
if closeErr == nil {
424+
return safeWriter.Bytes(), err
425+
}
426+
// If session close failed, prioritize the context error
427+
break
433428
}
434429

430+
// Return the collected output with the context error
435431
return safeWriter.Bytes(), context.Cause(ctx)
436432
}
437433
}
@@ -451,5 +447,9 @@ func (sw *syncWriter) Write(p []byte) (n int, err error) {
451447
func (sw *syncWriter) Bytes() []byte {
452448
sw.mu.Lock()
453449
defer sw.mu.Unlock()
454-
return sw.w.Bytes()
450+
// Return a copy to prevent race conditions with the underlying buffer
451+
b := sw.w.Bytes()
452+
result := make([]byte, len(b))
453+
copy(result, b)
454+
return result
455455
}

codersdk/toolsdk/bash_test.go

Lines changed: 34 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -314,15 +314,15 @@ func TestWorkspaceBashTimeoutIntegration(t *testing.T) {
314314

315315
deps, err := toolsdk.NewDeps(client)
316316
require.NoError(t, err)
317-
ctx := context.Background()
318317

319318
args := toolsdk.WorkspaceBashArgs{
320319
Workspace: workspace.Name,
321320
Command: `echo "normal command"`, // Quick command that should complete normally
322321
TimeoutMs: 5000, // 5 second timeout - plenty of time
323322
}
324323

325-
result, err := toolsdk.WorkspaceBash.Handler(ctx, deps, args)
324+
// Use testTool to register the tool as tested and satisfy coverage validation
325+
result, err := testTool(t, toolsdk.WorkspaceBash, deps, args)
326326

327327
// Should not error
328328
require.NoError(t, err)
@@ -343,7 +343,7 @@ func TestWorkspaceBashTimeoutIntegration(t *testing.T) {
343343
func TestWorkspaceBashBackgroundIntegration(t *testing.T) {
344344
t.Parallel()
345345

346-
t.Run("BackgroundCommandReturnsImmediately", func(t *testing.T) {
346+
t.Run("BackgroundCommandCapturesOutput", func(t *testing.T) {
347347
t.Parallel()
348348

349349
client, workspace, agentToken := setupWorkspaceForAgent(t)
@@ -359,8 +359,9 @@ func TestWorkspaceBashBackgroundIntegration(t *testing.T) {
359359

360360
args := toolsdk.WorkspaceBashArgs{
361361
Workspace: workspace.Name,
362-
Command: `echo "started" && sleep 5 && echo "completed"`, // Command that would take 5+ seconds
363-
Background: true, // Run in background
362+
Command: `echo "started" && sleep 60 && echo "completed"`, // Command that would take 60+ seconds
363+
Background: true, // Run in background
364+
TimeoutMs: 2000, // 2 second timeout
364365
}
365366

366367
result, err := toolsdk.WorkspaceBash.Handler(t.Context(), deps, args)
@@ -370,18 +371,17 @@ func TestWorkspaceBashBackgroundIntegration(t *testing.T) {
370371

371372
t.Logf("Background result: exitCode=%d, output=%q", result.ExitCode, result.Output)
372373

373-
// Should have exit code 0 (background start successful)
374-
require.Equal(t, 0, result.ExitCode)
374+
// Should have exit code 124 (timeout) since command times out
375+
require.Equal(t, 124, result.ExitCode)
375376

376-
// Should contain PID and log path info, not the actual command output
377-
require.Contains(t, result.Output, "Command started with PID:")
378-
require.Contains(t, result.Output, "Log path: /tmp/mcp-bg/")
377+
// Should capture output up to timeout point
378+
require.Contains(t, result.Output, "started", "Should contain output captured before timeout")
379+
380+
// Should NOT contain the second echo (it never executed due to timeout)
381+
require.NotContains(t, result.Output, "completed", "Should not contain output after timeout")
379382

380-
// Should NOT contain the actual command output since it runs in background
381-
// The command was `echo "started" && sleep 5 && echo "completed"`
382-
// So we check that the quoted strings don't appear in the output
383-
require.NotContains(t, result.Output, `"started"`, "Should not contain command output in background mode")
384-
require.NotContains(t, result.Output, `"completed"`, "Should not contain command output in background mode")
383+
// Should contain background continuation message
384+
require.Contains(t, result.Output, "Command continues running in background")
385385
})
386386

387387
t.Run("BackgroundVsNormalExecution", func(t *testing.T) {
@@ -425,14 +425,12 @@ func TestWorkspaceBashBackgroundIntegration(t *testing.T) {
425425
t.Logf("Normal result: %q", normalResult.Output)
426426
t.Logf("Background result: %q", backgroundResult.Output)
427427

428-
// Background mode should return PID/log info, not the actual output
428+
// Background mode should also return the actual output since command completes quickly
429429
require.Equal(t, 0, backgroundResult.ExitCode)
430-
require.Contains(t, backgroundResult.Output, "Command started with PID:")
431-
require.Contains(t, backgroundResult.Output, "Log path: /tmp/mcp-bg/")
432-
require.NotContains(t, backgroundResult.Output, "hello world")
430+
require.Equal(t, "hello world", backgroundResult.Output)
433431
})
434432

435-
t.Run("BackgroundIgnoresTimeout", func(t *testing.T) {
433+
t.Run("BackgroundCommandContinuesAfterTimeout", func(t *testing.T) {
436434
t.Parallel()
437435

438436
client, workspace, agentToken := setupWorkspaceForAgent(t)
@@ -448,36 +446,35 @@ func TestWorkspaceBashBackgroundIntegration(t *testing.T) {
448446

449447
args := toolsdk.WorkspaceBashArgs{
450448
Workspace: workspace.Name,
451-
Command: `sleep 1 && echo "done" > /tmp/done`, // Command that would normally timeout
452-
TimeoutMs: 1, // 1 ms timeout (shorter than command duration)
453-
Background: true, // But running in background should ignore timeout
449+
Command: `echo "started" && sleep 10 && echo "done" > /tmp/bg-test-done`, // Command that will timeout but continue
450+
TimeoutMs: 5000, // 5000ms timeout (shorter than command duration)
451+
Background: true, // Run in background
454452
}
455453

456454
result, err := toolsdk.WorkspaceBash.Handler(t.Context(), deps, args)
457455

458-
// Should not error and should not timeout
456+
// Should not error but should timeout
459457
require.NoError(t, err)
460458

461459
t.Logf("Background with timeout result: exitCode=%d, output=%q", result.ExitCode, result.Output)
462460

463-
// Should have exit code 0 (background start successful)
464-
require.Equal(t, 0, result.ExitCode)
461+
// Should have timeout exit code
462+
require.Equal(t, 124, result.ExitCode)
465463

466-
// Should return PID/log info, indicating the background command started successfully
467-
require.Contains(t, result.Output, "Command started with PID:")
468-
require.Contains(t, result.Output, "Log path: /tmp/mcp-bg/")
464+
// Should capture output before timeout
465+
require.Contains(t, result.Output, "started", "Should contain output captured before timeout")
469466

470-
// Should NOT contain timeout message since background mode ignores timeout
471-
require.NotContains(t, result.Output, "Command canceled due to timeout")
467+
// Should contain background continuation message
468+
require.Contains(t, result.Output, "Command continues running in background")
472469

473-
// Wait for the background command to complete
470+
// Wait for the background command to complete (even though SSH session timed out)
474471
require.Eventually(t, func() bool {
475-
args := toolsdk.WorkspaceBashArgs{
472+
checkArgs := toolsdk.WorkspaceBashArgs{
476473
Workspace: workspace.Name,
477-
Command: `cat /tmp/done`,
474+
Command: `cat /tmp/bg-test-done 2>/dev/null || echo "not found"`,
478475
}
479-
result, err := toolsdk.WorkspaceBash.Handler(t.Context(), deps, args)
480-
return err == nil && result.Output == "done"
481-
}, testutil.WaitMedium, testutil.IntervalMedium)
476+
checkResult, err := toolsdk.WorkspaceBash.Handler(t.Context(), deps, checkArgs)
477+
return err == nil && checkResult.Output == "done"
478+
}, testutil.WaitMedium, testutil.IntervalMedium, "Background command should continue running and complete after timeout")
482479
})
483480
}

codersdk/toolsdk/resources/background.sh

Lines changed: 0 additions & 23 deletions
This file was deleted.

0 commit comments

Comments
 (0)