Skip to content

feat(agent): send devcontainer CLI logs during recreate #17845

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
May 15, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
186 changes: 175 additions & 11 deletions agent/agent_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1935,8 +1935,6 @@ func TestAgent_ReconnectingPTYContainer(t *testing.T) {
t.Skip("Set CODER_TEST_USE_DOCKER=1 to run this test")
}

ctx := testutil.Context(t, testutil.WaitLong)

pool, err := dockertest.NewPool("")
require.NoError(t, err, "Could not connect to docker")
ct, err := pool.RunWithOptions(&dockertest.RunOptions{
Expand All @@ -1948,10 +1946,10 @@ func TestAgent_ReconnectingPTYContainer(t *testing.T) {
config.RestartPolicy = docker.RestartPolicy{Name: "no"}
})
require.NoError(t, err, "Could not start container")
t.Cleanup(func() {
defer func() {
err := pool.Purge(ct)
require.NoError(t, err, "Could not stop container")
})
}()
// Wait for container to start
require.Eventually(t, func() bool {
ct, ok := pool.ContainerByName(ct.Container.Name)
Expand All @@ -1962,6 +1960,7 @@ func TestAgent_ReconnectingPTYContainer(t *testing.T) {
conn, _, _, _, _ := setupAgent(t, agentsdk.Manifest{}, 0, func(_ *agenttest.Client, o *agent.Options) {
o.ExperimentalDevcontainersEnabled = true
})
ctx := testutil.Context(t, testutil.WaitLong)
ac, err := conn.ReconnectingPTY(ctx, uuid.New(), 80, 80, "/bin/sh", func(arp *workspacesdk.AgentReconnectingPTYInit) {
arp.Container = ct.Container.ID
})
Expand Down Expand Up @@ -2005,9 +2004,6 @@ func TestAgent_DevcontainerAutostart(t *testing.T) {
t.Skip("Set CODER_TEST_USE_DOCKER=1 to run this test")
}

ctx := testutil.Context(t, testutil.WaitLong)

// Connect to Docker
pool, err := dockertest.NewPool("")
require.NoError(t, err, "Could not connect to docker")

Expand Down Expand Up @@ -2051,7 +2047,7 @@ func TestAgent_DevcontainerAutostart(t *testing.T) {
},
},
}
// nolint: dogsled
//nolint:dogsled
conn, _, _, _, _ := setupAgent(t, manifest, 0, func(_ *agenttest.Client, o *agent.Options) {
o.ExperimentalDevcontainersEnabled = true
})
Expand Down Expand Up @@ -2079,8 +2075,7 @@ func TestAgent_DevcontainerAutostart(t *testing.T) {

return false
}, testutil.WaitSuperLong, testutil.IntervalMedium, "no container with workspace folder label found")

t.Cleanup(func() {
defer func() {
// We can't rely on pool here because the container is not
// managed by it (it is managed by @devcontainer/cli).
err := pool.Client.RemoveContainer(docker.RemoveContainerOptions{
Expand All @@ -2089,13 +2084,15 @@ func TestAgent_DevcontainerAutostart(t *testing.T) {
Force: true,
})
assert.NoError(t, err, "remove container")
})
}()

containerInfo, err := pool.Client.InspectContainer(container.ID)
require.NoError(t, err, "inspect container")
t.Logf("Container state: status: %v", containerInfo.State.Status)
require.True(t, containerInfo.State.Running, "container should be running")

ctx := testutil.Context(t, testutil.WaitLong)

ac, err := conn.ReconnectingPTY(ctx, uuid.New(), 80, 80, "", func(opts *workspacesdk.AgentReconnectingPTYInit) {
opts.Container = container.ID
})
Expand Down Expand Up @@ -2124,6 +2121,173 @@ func TestAgent_DevcontainerAutostart(t *testing.T) {
require.NoError(t, err, "file should exist outside devcontainer")
}

// TestAgent_DevcontainerRecreate tests that RecreateDevcontainer
// recreates a devcontainer and emits logs.
//
// This tests end-to-end functionality of auto-starting a devcontainer.
// It runs "devcontainer up" which creates a real Docker container. As
// such, it does not run by default in CI.
//
// You can run it manually as follows:
//
// CODER_TEST_USE_DOCKER=1 go test -count=1 ./agent -run TestAgent_DevcontainerRecreate
func TestAgent_DevcontainerRecreate(t *testing.T) {
if os.Getenv("CODER_TEST_USE_DOCKER") != "1" {
t.Skip("Set CODER_TEST_USE_DOCKER=1 to run this test")
}
t.Parallel()

pool, err := dockertest.NewPool("")
require.NoError(t, err, "Could not connect to docker")

// Prepare temporary devcontainer for test (mywork).
devcontainerID := uuid.New()
devcontainerLogSourceID := uuid.New()
workspaceFolder := filepath.Join(t.TempDir(), "mywork")
t.Logf("Workspace folder: %s", workspaceFolder)
devcontainerPath := filepath.Join(workspaceFolder, ".devcontainer")
err = os.MkdirAll(devcontainerPath, 0o755)
require.NoError(t, err, "create devcontainer directory")
devcontainerFile := filepath.Join(devcontainerPath, "devcontainer.json")
err = os.WriteFile(devcontainerFile, []byte(`{
"name": "mywork",
"image": "busybox:latest",
"cmd": ["sleep", "infinity"]
}`), 0o600)
require.NoError(t, err, "write devcontainer.json")

manifest := agentsdk.Manifest{
// Set up pre-conditions for auto-starting a devcontainer, the
// script is used to extract the log source ID.
Devcontainers: []codersdk.WorkspaceAgentDevcontainer{
{
ID: devcontainerID,
Name: "test",
WorkspaceFolder: workspaceFolder,
},
},
Scripts: []codersdk.WorkspaceAgentScript{
{
ID: devcontainerID,
LogSourceID: devcontainerLogSourceID,
},
},
}

//nolint:dogsled
conn, client, _, _, _ := setupAgent(t, manifest, 0, func(_ *agenttest.Client, o *agent.Options) {
o.ExperimentalDevcontainersEnabled = true
})

ctx := testutil.Context(t, testutil.WaitLong)

// We enabled autostart for the devcontainer, so ready is a good
// indication that the devcontainer is up and running. Importantly,
// this also means that the devcontainer startup is no longer
// producing logs that may interfere with the recreate logs.
testutil.Eventually(ctx, t, func(context.Context) bool {
states := client.GetLifecycleStates()
return slices.Contains(states, codersdk.WorkspaceAgentLifecycleReady)
}, testutil.IntervalMedium, "devcontainer not ready")

t.Logf("Looking for container with label: devcontainer.local_folder=%s", workspaceFolder)

var container docker.APIContainers
testutil.Eventually(ctx, t, func(context.Context) bool {
containers, err := pool.Client.ListContainers(docker.ListContainersOptions{All: true})
if err != nil {
t.Logf("Error listing containers: %v", err)
return false
}
for _, c := range containers {
t.Logf("Found container: %s with labels: %v", c.ID[:12], c.Labels)
if v, ok := c.Labels["devcontainer.local_folder"]; ok && v == workspaceFolder {
t.Logf("Found matching container: %s", c.ID[:12])
container = c
return true
}
}
return false
}, testutil.IntervalMedium, "no container with workspace folder label found")
defer func(container docker.APIContainers) {
// We can't rely on pool here because the container is not
// managed by it (it is managed by @devcontainer/cli).
err := pool.Client.RemoveContainer(docker.RemoveContainerOptions{
ID: container.ID,
RemoveVolumes: true,
Force: true,
})
assert.Error(t, err, "container should be removed by recreate")
}(container)

ctx = testutil.Context(t, testutil.WaitLong) // Reset context.

// Capture logs via ScriptLogger.
logsCh := make(chan *proto.BatchCreateLogsRequest, 1)
client.SetLogsChannel(logsCh)

// Invoke recreate to trigger the destruction and recreation of the
// devcontainer, we do it in a goroutine so we can process logs
// concurrently.
go func(container docker.APIContainers) {
err := conn.RecreateDevcontainer(ctx, container.ID)
assert.NoError(t, err, "recreate devcontainer should succeed")
}(container)

t.Logf("Checking recreate logs for outcome...")

// Wait for the logs to be emitted, the @devcontainer/cli up command
// will emit a log with the outcome at the end suggesting we did
// receive all the logs.
waitForOutcomeLoop:
for {
batch := testutil.RequireReceive(ctx, t, logsCh)

if bytes.Equal(batch.LogSourceId, devcontainerLogSourceID[:]) {
for _, log := range batch.Logs {
t.Logf("Received log: %s", log.Output)
if strings.Contains(log.Output, "\"outcome\"") {
break waitForOutcomeLoop
}
}
}
}

t.Logf("Checking there's a new container with label: devcontainer.local_folder=%s", workspaceFolder)

// Make sure the container exists and isn't the same as the old one.
testutil.Eventually(ctx, t, func(context.Context) bool {
containers, err := pool.Client.ListContainers(docker.ListContainersOptions{All: true})
if err != nil {
t.Logf("Error listing containers: %v", err)
return false
}
for _, c := range containers {
t.Logf("Found container: %s with labels: %v", c.ID[:12], c.Labels)
if v, ok := c.Labels["devcontainer.local_folder"]; ok && v == workspaceFolder {
if c.ID == container.ID {
t.Logf("Found same container: %s", c.ID[:12])
return false
}
t.Logf("Found new container: %s", c.ID[:12])
container = c
return true
}
}
return false
}, testutil.IntervalMedium, "new devcontainer not found")
defer func(container docker.APIContainers) {
// We can't rely on pool here because the container is not
// managed by it (it is managed by @devcontainer/cli).
err := pool.Client.RemoveContainer(docker.RemoveContainerOptions{
ID: container.ID,
RemoveVolumes: true,
Force: true,
})
assert.NoError(t, err, "remove container")
}(container)
}

func TestAgent_Dial(t *testing.T) {
t.Parallel()

Expand Down
78 changes: 71 additions & 7 deletions agent/agentcontainers/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"github.com/coder/coder/v2/agent/agentexec"
"github.com/coder/coder/v2/coderd/httpapi"
"github.com/coder/coder/v2/codersdk"
"github.com/coder/coder/v2/codersdk/agentsdk"
"github.com/coder/quartz"
)

Expand All @@ -43,6 +44,7 @@ type API struct {
cl Lister
dccli DevcontainerCLI
clock quartz.Clock
scriptLogger func(logSourceID uuid.UUID) ScriptLogger

// lockCh protects the below fields. We use a channel instead of a
// mutex so we can handle cancellation properly.
Expand All @@ -52,6 +54,8 @@ type API struct {
devcontainerNames map[string]struct{} // Track devcontainer names to avoid duplicates.
knownDevcontainers []codersdk.WorkspaceAgentDevcontainer // Track predefined and runtime-detected devcontainers.
configFileModifiedTimes map[string]time.Time // Track when config files were last modified.

devcontainerLogSourceIDs map[string]uuid.UUID // Track devcontainer log source IDs.
}

// Option is a functional option for API.
Expand Down Expand Up @@ -91,13 +95,30 @@ func WithDevcontainerCLI(dccli DevcontainerCLI) Option {
// WithDevcontainers sets the known devcontainers for the API. This
// allows the API to be aware of devcontainers defined in the workspace
// agent manifest.
func WithDevcontainers(devcontainers []codersdk.WorkspaceAgentDevcontainer) Option {
func WithDevcontainers(devcontainers []codersdk.WorkspaceAgentDevcontainer, scripts []codersdk.WorkspaceAgentScript) Option {
return func(api *API) {
if len(devcontainers) > 0 {
api.knownDevcontainers = slices.Clone(devcontainers)
api.devcontainerNames = make(map[string]struct{}, len(devcontainers))
for _, devcontainer := range devcontainers {
api.devcontainerNames[devcontainer.Name] = struct{}{}
if len(devcontainers) == 0 {
return
}
api.knownDevcontainers = slices.Clone(devcontainers)
api.devcontainerNames = make(map[string]struct{}, len(devcontainers))
api.devcontainerLogSourceIDs = make(map[string]uuid.UUID)
for _, devcontainer := range devcontainers {
api.devcontainerNames[devcontainer.Name] = struct{}{}
for _, script := range scripts {
// The devcontainer scripts match the devcontainer ID for
// identification.
if script.ID == devcontainer.ID {
api.devcontainerLogSourceIDs[devcontainer.WorkspaceFolder] = script.LogSourceID
break
Comment on lines +108 to +113
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we warn if there are scripts that do not match up with any devcontainer?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suppose it would be an error rather than warn, this is an expectation coming from the provisioner currently.

Added.

}
}
if api.devcontainerLogSourceIDs[devcontainer.WorkspaceFolder] == uuid.Nil {
api.logger.Error(api.ctx, "devcontainer log source ID not found for devcontainer",
slog.F("devcontainer", devcontainer.Name),
slog.F("workspace_folder", devcontainer.WorkspaceFolder),
slog.F("config_path", devcontainer.ConfigPath),
)
}
}
}
Expand All @@ -112,6 +133,27 @@ func WithWatcher(w watcher.Watcher) Option {
}
}

// ScriptLogger is an interface for sending devcontainer logs to the
// controlplane.
type ScriptLogger interface {
Send(ctx context.Context, log ...agentsdk.Log) error
Flush(ctx context.Context) error
}

// noopScriptLogger is a no-op implementation of the ScriptLogger
// interface.
type noopScriptLogger struct{}

func (noopScriptLogger) Send(context.Context, ...agentsdk.Log) error { return nil }
func (noopScriptLogger) Flush(context.Context) error { return nil }

// WithScriptLogger sets the script logger provider for devcontainer operations.
func WithScriptLogger(scriptLogger func(logSourceID uuid.UUID) ScriptLogger) Option {
return func(api *API) {
api.scriptLogger = scriptLogger
}
}

// NewAPI returns a new API with the given options applied.
func NewAPI(logger slog.Logger, options ...Option) *API {
ctx, cancel := context.WithCancel(context.Background())
Expand All @@ -127,7 +169,10 @@ func NewAPI(logger slog.Logger, options ...Option) *API {
devcontainerNames: make(map[string]struct{}),
knownDevcontainers: []codersdk.WorkspaceAgentDevcontainer{},
configFileModifiedTimes: make(map[string]time.Time),
scriptLogger: func(uuid.UUID) ScriptLogger { return noopScriptLogger{} },
}
// The ctx and logger must be set before applying options to avoid
// nil pointer dereference.
for _, opt := range options {
opt(api)
}
Expand Down Expand Up @@ -426,7 +471,26 @@ func (api *API) handleDevcontainerRecreate(w http.ResponseWriter, r *http.Reques
return
}

_, err = api.dccli.Up(ctx, workspaceFolder, configPath, WithRemoveExistingContainer())
// Send logs via agent logging facilities.
logSourceID := api.devcontainerLogSourceIDs[workspaceFolder]
if logSourceID == uuid.Nil {
// Fallback to the external log source ID if not found.
logSourceID = agentsdk.ExternalLogSourceID
}
scriptLogger := api.scriptLogger(logSourceID)
defer func() {
flushCtx, cancel := context.WithTimeout(api.ctx, 5*time.Second)
defer cancel()
if err := scriptLogger.Flush(flushCtx); err != nil {
api.logger.Error(flushCtx, "flush devcontainer logs failed", slog.Error(err))
}
}()
infoW := agentsdk.LogsWriter(ctx, scriptLogger.Send, logSourceID, codersdk.LogLevelInfo)
defer infoW.Close()
errW := agentsdk.LogsWriter(ctx, scriptLogger.Send, logSourceID, codersdk.LogLevelError)
defer errW.Close()

_, err = api.dccli.Up(ctx, workspaceFolder, configPath, WithOutput(infoW, errW), WithRemoveExistingContainer())
if err != nil {
httpapi.Write(ctx, w, http.StatusInternalServerError, codersdk.Response{
Message: "Could not recreate devcontainer",
Expand Down
11 changes: 10 additions & 1 deletion agent/agentcontainers/api_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -563,8 +563,17 @@ func TestAPI(t *testing.T) {
agentcontainers.WithWatcher(watcher.NewNoop()),
}

// Generate matching scripts for the known devcontainers
// (required to extract log source ID).
var scripts []codersdk.WorkspaceAgentScript
for i := range tt.knownDevcontainers {
scripts = append(scripts, codersdk.WorkspaceAgentScript{
ID: tt.knownDevcontainers[i].ID,
LogSourceID: uuid.New(),
})
}
if len(tt.knownDevcontainers) > 0 {
apiOptions = append(apiOptions, agentcontainers.WithDevcontainers(tt.knownDevcontainers))
apiOptions = append(apiOptions, agentcontainers.WithDevcontainers(tt.knownDevcontainers, scripts))
}

api := agentcontainers.NewAPI(logger, apiOptions...)
Expand Down
Loading
Loading