diff --git a/agent/agent.go b/agent/agent.go index 74cf305c9434a..17298e7aa5772 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -1188,7 +1188,7 @@ func (a *agent) handleManifest(manifestOK *checkpoint) func(ctx context.Context, // createOrUpdateNetwork waits for the manifest to be set using manifestOK, then creates or updates // the tailnet using the information in the manifest func (a *agent) createOrUpdateNetwork(manifestOK, networkOK *checkpoint) func(context.Context, proto.DRPCAgentClient26) error { - return func(ctx context.Context, _ proto.DRPCAgentClient26) (retErr error) { + return func(ctx context.Context, aAPI proto.DRPCAgentClient26) (retErr error) { if err := manifestOK.wait(ctx); err != nil { return xerrors.Errorf("no manifest: %w", err) } @@ -1208,6 +1208,7 @@ func (a *agent) createOrUpdateNetwork(manifestOK, networkOK *checkpoint) func(co // agent API. network, err = a.createTailnet( a.gracefulCtx, + aAPI, manifest.AgentID, manifest.DERPMap, manifest.DERPForceWebSockets, @@ -1355,6 +1356,7 @@ func (a *agent) trackGoroutine(fn func()) error { func (a *agent) createTailnet( ctx context.Context, + aAPI proto.DRPCAgentClient26, agentID uuid.UUID, derpMap *tailcfg.DERPMap, derpForceWebSockets, disableDirectConnections bool, @@ -1487,7 +1489,7 @@ func (a *agent) createTailnet( }() if err = a.trackGoroutine(func() { defer apiListener.Close() - apiHandler, closeAPIHAndler := a.apiHandler() + apiHandler, closeAPIHAndler := a.apiHandler(aAPI) defer func() { _ = closeAPIHAndler() }() diff --git a/agent/agentcontainers/api.go b/agent/agentcontainers/api.go index d826cb23cbc1f..17886da53826a 100644 --- a/agent/agentcontainers/api.go +++ b/agent/agentcontainers/api.go @@ -5,7 +5,10 @@ import ( "errors" "fmt" "net/http" + "os" "path" + "path/filepath" + "runtime" "slices" "strings" "sync" @@ -26,8 +29,14 @@ import ( ) const ( - defaultUpdateInterval = 10 * time.Second - listContainersTimeout = 15 * time.Second + defaultUpdateInterval = 10 * time.Second + defaultOperationTimeout = 15 * time.Second + + // Destination path inside the container, we store it in a fixed location + // under /.coder-agent/coder to avoid conflicts and avoid being shadowed + // by tmpfs or other mounts. This assumes the container root filesystem is + // read-write, which seems sensible for dev containers. + coderPathInsideContainer = "/.coder-agent/coder" ) // API is responsible for container-related operations in the agent. @@ -47,6 +56,8 @@ type API struct { dccli DevcontainerCLI clock quartz.Clock scriptLogger func(logSourceID uuid.UUID) ScriptLogger + subAgentClient SubAgentClient + subAgentURL string mu sync.RWMutex closed bool @@ -57,11 +68,18 @@ type API struct { configFileModifiedTimes map[string]time.Time // By config file path. recreateSuccessTimes map[string]time.Time // By workspace folder. recreateErrorTimes map[string]time.Time // By workspace folder. - recreateWg sync.WaitGroup + injectedSubAgentProcs map[string]subAgentProcess // By container ID. + asyncWg sync.WaitGroup devcontainerLogSourceIDs map[string]uuid.UUID // By workspace folder. } +type subAgentProcess struct { + agent SubAgent + ctx context.Context + stop context.CancelFunc +} + // Option is a functional option for API. type Option func(*API) @@ -96,6 +114,22 @@ func WithDevcontainerCLI(dccli DevcontainerCLI) Option { } } +// WithSubAgentClient sets the SubAgentClient implementation to use. +// This is used to list, create and delete Dev Container agents. +func WithSubAgentClient(client SubAgentClient) Option { + return func(api *API) { + api.subAgentClient = client + } +} + +// WithSubAgentURL sets the agent URL for the sub-agent for +// communicating with the control plane. +func WithSubAgentURL(url string) Option { + return func(api *API) { + api.subAgentURL = url + } +} + // WithDevcontainers sets the known devcontainers for the API. This // allows the API to be aware of devcontainers defined in the workspace // agent manifest. @@ -174,12 +208,14 @@ func NewAPI(logger slog.Logger, options ...Option) *API { logger: logger, clock: quartz.NewReal(), execer: agentexec.DefaultExecer, + subAgentClient: noopSubAgentClient{}, devcontainerNames: make(map[string]bool), knownDevcontainers: make(map[string]codersdk.WorkspaceAgentDevcontainer), configFileModifiedTimes: make(map[string]time.Time), recreateSuccessTimes: make(map[string]time.Time), recreateErrorTimes: make(map[string]time.Time), scriptLogger: func(uuid.UUID) ScriptLogger { return noopScriptLogger{} }, + injectedSubAgentProcs: make(map[string]subAgentProcess), } // The ctx and logger must be set before applying options to avoid // nil pointer dereference. @@ -254,6 +290,15 @@ func (api *API) updaterLoop() { defer api.logger.Debug(api.ctx, "updater loop stopped") api.logger.Debug(api.ctx, "updater loop started") + // Make sure we clean up any subagents not tracked by this process + // before starting the update loop and creating new ones. + api.logger.Debug(api.ctx, "cleaning up subagents") + if err := api.cleanupSubAgents(api.ctx); err != nil { + api.logger.Error(api.ctx, "cleanup subagents failed", slog.Error(err)) + } else { + api.logger.Debug(api.ctx, "subagent cleanup complete") + } + // Perform an initial update to populate the container list, this // gives us a guarantee that the API has loaded the initial state // before returning any responses. This is useful for both tests @@ -360,7 +405,7 @@ func (api *API) handleList(rw http.ResponseWriter, r *http.Request) { // updateContainers fetches the latest container list, processes it, and // updates the cache. It performs locking for updating shared API state. func (api *API) updateContainers(ctx context.Context) error { - listCtx, listCancel := context.WithTimeout(ctx, listContainersTimeout) + listCtx, listCancel := context.WithTimeout(ctx, defaultOperationTimeout) defer listCancel() updated, err := api.ccli.List(listCtx) @@ -432,20 +477,6 @@ func (api *API) processUpdatedContainersLocked(ctx context.Context, updated code continue } - // NOTE(mafredri): This name impl. may change to accommodate devcontainer agents RFC. - // If not in our known list, add as a runtime detected entry. - name := path.Base(workspaceFolder) - if api.devcontainerNames[name] { - // Try to find a unique name by appending a number. - for i := 2; ; i++ { - newName := fmt.Sprintf("%s-%d", name, i) - if !api.devcontainerNames[newName] { - name = newName - break - } - } - } - api.devcontainerNames[name] = true if configFile != "" { if err := api.watcher.Add(configFile); err != nil { api.logger.Error(ctx, "watch devcontainer config file failed", slog.Error(err), slog.F("file", configFile)) @@ -454,7 +485,7 @@ func (api *API) processUpdatedContainersLocked(ctx context.Context, updated code api.knownDevcontainers[workspaceFolder] = codersdk.WorkspaceAgentDevcontainer{ ID: uuid.New(), - Name: name, + Name: "", // Updated later based on container state. WorkspaceFolder: workspaceFolder, ConfigPath: configFile, Status: "", // Updated later based on container state. @@ -466,19 +497,23 @@ func (api *API) processUpdatedContainersLocked(ctx context.Context, updated code // Iterate through all known devcontainers and update their status // based on the current state of the containers. for _, dc := range api.knownDevcontainers { + if dc.Container != nil { + if !api.devcontainerNames[dc.Name] { + // If the devcontainer name wasn't set via terraform, we + // use the containers friendly name as a fallback which + // will keep changing as the dev container is recreated. + // TODO(mafredri): Parse the container label (i.e. devcontainer.json) for customization. + dc.Name = safeFriendlyName(dc.Container.FriendlyName) + } + dc.Container.DevcontainerStatus = dc.Status + dc.Container.DevcontainerDirty = dc.Dirty + } + switch { case dc.Status == codersdk.WorkspaceAgentDevcontainerStatusStarting: - if dc.Container != nil { - dc.Container.DevcontainerStatus = dc.Status - dc.Container.DevcontainerDirty = dc.Dirty - } continue // This state is handled by the recreation routine. case dc.Status == codersdk.WorkspaceAgentDevcontainerStatusError && (dc.Container == nil || dc.Container.CreatedAt.Before(api.recreateErrorTimes[dc.WorkspaceFolder])): - if dc.Container != nil { - dc.Container.DevcontainerStatus = dc.Status - dc.Container.DevcontainerDirty = dc.Dirty - } continue // The devcontainer needs to be recreated. case dc.Container != nil: @@ -494,7 +529,17 @@ func (api *API) processUpdatedContainersLocked(ctx context.Context, updated code } dc.Container.DevcontainerDirty = dc.Dirty + if _, injected := api.injectedSubAgentProcs[dc.Container.ID]; !injected && dc.Status == codersdk.WorkspaceAgentDevcontainerStatusRunning { + err := api.injectSubAgentIntoContainerLocked(ctx, dc) + if err != nil { + api.logger.Error(ctx, "inject subagent into container failed", slog.Error(err)) + } + } + case dc.Container == nil: + if !api.devcontainerNames[dc.Name] { + dc.Name = "" + } dc.Status = codersdk.WorkspaceAgentDevcontainerStatusStopped dc.Dirty = false } @@ -507,6 +552,18 @@ func (api *API) processUpdatedContainersLocked(ctx context.Context, updated code api.containersErr = nil } +// safeFriendlyName returns a API safe version of the container's +// friendly name. +// +// See provisioner/regexes.go for the regex used to validate +// the friendly name on the API side. +func safeFriendlyName(name string) string { + name = strings.ToLower(name) + name = strings.ReplaceAll(name, "_", "-") + + return name +} + // refreshContainers triggers an immediate update of the container list // and waits for it to complete. func (api *API) refreshContainers(ctx context.Context) (err error) { @@ -624,7 +681,7 @@ func (api *API) handleDevcontainerRecreate(w http.ResponseWriter, r *http.Reques dc.Container.DevcontainerStatus = dc.Status } api.knownDevcontainers[dc.WorkspaceFolder] = dc - api.recreateWg.Add(1) + api.asyncWg.Add(1) go api.recreateDevcontainer(dc, configPath) api.mu.Unlock() @@ -640,10 +697,10 @@ func (api *API) handleDevcontainerRecreate(w http.ResponseWriter, r *http.Reques // It updates the devcontainer status and logs the process. The configPath is // passed as a parameter for the odd chance that the container being recreated // has a different config file than the one stored in the devcontainer state. -// The devcontainer state must be set to starting and the recreateWg must be +// The devcontainer state must be set to starting and the asyncWg must be // incremented before calling this function. func (api *API) recreateDevcontainer(dc codersdk.WorkspaceAgentDevcontainer, configPath string) { - defer api.recreateWg.Done() + defer api.asyncWg.Done() var ( err error @@ -803,6 +860,250 @@ func (api *API) markDevcontainerDirty(configPath string, modifiedAt time.Time) { } } +// cleanupSubAgents removes subagents that are no longer managed by +// this agent. This is usually only run at startup to ensure a clean +// slate. This method has an internal timeout to prevent blocking +// indefinitely if something goes wrong with the subagent deletion. +func (api *API) cleanupSubAgents(ctx context.Context) error { + agents, err := api.subAgentClient.List(ctx) + if err != nil { + return xerrors.Errorf("list agents: %w", err) + } + if len(agents) == 0 { + return nil + } + + api.mu.Lock() + defer api.mu.Unlock() + + injected := make(map[uuid.UUID]bool, len(api.injectedSubAgentProcs)) + for _, proc := range api.injectedSubAgentProcs { + injected[proc.agent.ID] = true + } + + ctx, cancel := context.WithTimeout(ctx, defaultOperationTimeout) + defer cancel() + + for _, agent := range agents { + if injected[agent.ID] { + continue + } + err := api.subAgentClient.Delete(ctx, agent.ID) + if err != nil { + api.logger.Error(ctx, "failed to delete agent", + slog.Error(err), + slog.F("agent_id", agent.ID), + slog.F("agent_name", agent.Name), + ) + } + } + + return nil +} + +// injectSubAgentIntoContainerLocked injects a subagent into a dev +// container and starts the subagent process. This method assumes that +// api.mu is held. +// +// This method uses an internal timeout to prevent blocking indefinitely +// if something goes wrong with the injection. +func (api *API) injectSubAgentIntoContainerLocked(ctx context.Context, dc codersdk.WorkspaceAgentDevcontainer) (err error) { + ctx, cancel := context.WithTimeout(ctx, defaultOperationTimeout) + defer cancel() + + container := dc.Container + if container == nil { + return xerrors.New("container is nil, cannot inject subagent") + } + + // Skip if subagent already exists for this container. + if _, injected := api.injectedSubAgentProcs[container.ID]; injected || api.closed { + return nil + } + + // Mark subagent as being injected immediately with a placeholder. + subAgent := subAgentProcess{ + ctx: context.Background(), + stop: func() {}, + } + api.injectedSubAgentProcs[container.ID] = subAgent + + // This is used to track the goroutine that will run the subagent + // process inside the container. It will be decremented when the + // subagent process completes or if an error occurs before we can + // start the subagent. + api.asyncWg.Add(1) + ranSubAgent := false + + // Clean up if injection fails. + defer func() { + if !ranSubAgent { + api.asyncWg.Done() + } + if err != nil { + // Mutex is held (defer re-lock). + delete(api.injectedSubAgentProcs, container.ID) + } + }() + + // Unlock the mutex to allow other operations while we + // inject the subagent into the container. + api.mu.Unlock() + defer api.mu.Lock() // Re-lock. + + logger := api.logger.With( + slog.F("devcontainer_id", dc.ID), + slog.F("devcontainer_name", dc.Name), + slog.F("workspace_folder", dc.WorkspaceFolder), + slog.F("config_path", dc.ConfigPath), + ) + + arch, err := api.ccli.DetectArchitecture(ctx, container.ID) + if err != nil { + return xerrors.Errorf("detect architecture: %w", err) + } + + logger.Info(ctx, "detected container architecture", slog.F("architecture", arch)) + + // For now, only support injecting if the architecture matches the host. + hostArch := runtime.GOARCH + + // TODO(mafredri): Add support for downloading agents for supported architectures. + if arch != hostArch { + logger.Warn(ctx, "skipping subagent injection for unsupported architecture", + slog.F("container_arch", arch), + slog.F("host_arch", hostArch)) + return nil + } + agentBinaryPath, err := os.Executable() + if err != nil { + return xerrors.Errorf("get agent binary path: %w", err) + } + agentBinaryPath, err = filepath.EvalSymlinks(agentBinaryPath) + if err != nil { + return xerrors.Errorf("resolve agent binary path: %w", err) + } + + // If we scripted this as a `/bin/sh` script, we could reduce these + // steps to one instruction, speeding up the injection process. + // + // Note: We use `path` instead of `filepath` here because we are + // working with Unix-style paths inside the container. + if _, err := api.ccli.ExecAs(ctx, container.ID, "root", "mkdir", "-p", path.Dir(coderPathInsideContainer)); err != nil { + return xerrors.Errorf("create agent directory in container: %w", err) + } + + if err := api.ccli.Copy(ctx, container.ID, agentBinaryPath, coderPathInsideContainer); err != nil { + return xerrors.Errorf("copy agent binary: %w", err) + } + + logger.Info(ctx, "copied agent binary to container") + + // Make sure the agent binary is executable so we can run it. + if _, err := api.ccli.ExecAs(ctx, container.ID, "root", "chmod", "+x", coderPathInsideContainer); err != nil { + return xerrors.Errorf("set agent binary executable: %w", err) + } + + // Attempt to add CAP_NET_ADMIN to the binary to improve network + // performance (optional, allow to fail). + if _, err := api.ccli.ExecAs(ctx, container.ID, "root", "setcap", "cap_net_admin+ep", coderPathInsideContainer); err != nil { + logger.Warn(ctx, "set CAP_NET_ADMIN on agent binary failed", slog.Error(err)) + } + + // The preparation of the subagent is done, now we can create the + // subagent record in the database to receive the auth token. + createdAgent, err := api.subAgentClient.Create(ctx, SubAgent{ + Name: dc.Name, + // The default workspaceFolder for devcontainers is /workspaces. + // However, it can be changed by setting {"workspaceFolder": "/src"} + // in the devcontainer.json. This information is not encoded into + // the container labels, so we must rely on the values parsed from + // the devcontainer.json file on disk. + // TODO(mafredri): Support custom workspace folders in the future. + Directory: DevcontainerDefaultContainerWorkspaceFolder, + OperatingSystem: "linux", // Assuming Linux for dev containers. + Architecture: arch, + }) + if err != nil { + return xerrors.Errorf("create agent: %w", err) + } + + logger.Info(ctx, "created subagent record", slog.F("agent_id", createdAgent.ID)) + + // Start the subagent in the container in a new goroutine to avoid + // blocking. Note that we pass the api.ctx to the subagent process + // so that it isn't affected by the timeout. + go api.runSubAgentInContainer(api.ctx, dc, createdAgent, coderPathInsideContainer) + ranSubAgent = true + + return nil +} + +// runSubAgentInContainer runs the subagent process inside a dev +// container. The api.asyncWg must be incremented before calling this +// function, and it will be decremented when the subagent process +// completes or if an error occurs. +func (api *API) runSubAgentInContainer(ctx context.Context, dc codersdk.WorkspaceAgentDevcontainer, agent SubAgent, agentPath string) { + container := dc.Container // Must not be nil. + logger := api.logger.With( + slog.F("container_name", container.FriendlyName), + slog.F("agent_id", agent.ID), + ) + + agentCtx, agentStop := context.WithCancel(ctx) + defer func() { + agentStop() + + // Best effort cleanup of the agent record after the process + // completes. Note that we use the background context here + // because the api.ctx will be canceled when the API is closed. + // This may delay shutdown of the agent by the given timeout. + deleteCtx, cancel := context.WithTimeout(context.Background(), defaultOperationTimeout) + defer cancel() + err := api.subAgentClient.Delete(deleteCtx, agent.ID) + if err != nil { + logger.Error(deleteCtx, "failed to delete agent record after process completion", slog.Error(err)) + } + + api.mu.Lock() + delete(api.injectedSubAgentProcs, container.ID) + api.mu.Unlock() + + logger.Debug(ctx, "agent process cleanup complete") + api.asyncWg.Done() + }() + + api.mu.Lock() + if api.closed { + api.mu.Unlock() + // If the API is closed, we should not run the agent. + logger.Debug(ctx, "the API is closed, not running subagent in container") + return + } + // Update the placeholder with a valid subagent, context and stop. + api.injectedSubAgentProcs[container.ID] = subAgentProcess{ + agent: agent, + ctx: agentCtx, + stop: agentStop, + } + api.mu.Unlock() + + logger.Info(ctx, "starting subagent in dev container") + + err := api.dccli.Exec(agentCtx, dc.WorkspaceFolder, dc.ConfigPath, agentPath, []string{"agent"}, + WithContainerID(container.ID), + WithRemoteEnv( + "CODER_AGENT_URL="+api.subAgentURL, + "CODER_AGENT_TOKEN="+agent.AuthToken.String(), + ), + ) + if err != nil && !errors.Is(err, context.Canceled) { + logger.Error(ctx, "subagent process failed", slog.Error(err)) + } + + logger.Info(ctx, "subagent process finished") +} + func (api *API) Close() error { api.mu.Lock() if api.closed { @@ -811,6 +1112,12 @@ func (api *API) Close() error { } api.logger.Debug(api.ctx, "closing API") api.closed = true + + for _, proc := range api.injectedSubAgentProcs { + api.logger.Debug(api.ctx, "canceling subagent process", slog.F("agent_name", proc.agent.Name), slog.F("agent_id", proc.agent.ID)) + proc.stop() + } + api.cancel() // Interrupt all routines. api.mu.Unlock() // Release lock before waiting for goroutines. @@ -821,8 +1128,8 @@ func (api *API) Close() error { <-api.watcherDone <-api.updaterDone - // Wait for all devcontainer recreation tasks to complete. - api.recreateWg.Wait() + // Wait for all async tasks to complete. + api.asyncWg.Wait() api.logger.Debug(api.ctx, "closed API") return err diff --git a/agent/agentcontainers/api_test.go b/agent/agentcontainers/api_test.go index 313da6f9f615f..fdbb5c694ff63 100644 --- a/agent/agentcontainers/api_test.go +++ b/agent/agentcontainers/api_test.go @@ -6,6 +6,8 @@ import ( "math/rand" "net/http" "net/http/httptest" + "os" + "runtime" "strings" "testing" "time" @@ -190,6 +192,80 @@ func (w *fakeWatcher) sendEventWaitNextCalled(ctx context.Context, event fsnotif w.waitNext(ctx) } +// fakeSubAgentClient implements SubAgentClient for testing purposes. +type fakeSubAgentClient struct { + agents map[uuid.UUID]agentcontainers.SubAgent + nextID int + + listErrC chan error // If set, send to return error, close to return nil. + created []agentcontainers.SubAgent + createErrC chan error // If set, send to return error, close to return nil. + deleted []uuid.UUID + deleteErrC chan error // If set, send to return error, close to return nil. +} + +func (m *fakeSubAgentClient) List(ctx context.Context) ([]agentcontainers.SubAgent, error) { + var listErr error + if m.listErrC != nil { + select { + case <-ctx.Done(): + return nil, ctx.Err() + case err, ok := <-m.listErrC: + if ok { + listErr = err + } + } + } + var agents []agentcontainers.SubAgent + for _, agent := range m.agents { + agents = append(agents, agent) + } + return agents, listErr +} + +func (m *fakeSubAgentClient) Create(ctx context.Context, agent agentcontainers.SubAgent) (agentcontainers.SubAgent, error) { + var createErr error + if m.createErrC != nil { + select { + case <-ctx.Done(): + return agentcontainers.SubAgent{}, ctx.Err() + case err, ok := <-m.createErrC: + if ok { + createErr = err + } + } + } + m.nextID++ + agent.ID = uuid.New() + agent.AuthToken = uuid.New() + if m.agents == nil { + m.agents = make(map[uuid.UUID]agentcontainers.SubAgent) + } + m.agents[agent.ID] = agent + m.created = append(m.created, agent) + return agent, createErr +} + +func (m *fakeSubAgentClient) Delete(ctx context.Context, id uuid.UUID) error { + var deleteErr error + if m.deleteErrC != nil { + select { + case <-ctx.Done(): + return ctx.Err() + case err, ok := <-m.deleteErrC: + if ok { + deleteErr = err + } + } + } + if m.agents == nil { + m.agents = make(map[uuid.UUID]agentcontainers.SubAgent) + } + delete(m.agents, id) + m.deleted = append(m.deleted, id) + return deleteErr +} + func TestAPI(t *testing.T) { t.Parallel() @@ -226,6 +302,7 @@ func TestAPI(t *testing.T) { initialData: initialDataPayload{makeResponse(), nil}, setupMock: func(mcl *acmock.MockContainerCLI, preReq *gomock.Call) { mcl.EXPECT().List(gomock.Any()).Return(makeResponse(fakeCt), nil).After(preReq).AnyTimes() + mcl.EXPECT().DetectArchitecture(gomock.Any(), gomock.Any()).Return("", nil).AnyTimes() }, expected: makeResponse(fakeCt), }, @@ -244,6 +321,7 @@ func TestAPI(t *testing.T) { initialData: initialDataPayload{makeResponse(), assert.AnError}, setupMock: func(mcl *acmock.MockContainerCLI, preReq *gomock.Call) { mcl.EXPECT().List(gomock.Any()).Return(makeResponse(fakeCt), nil).After(preReq).AnyTimes() + mcl.EXPECT().DetectArchitecture(gomock.Any(), gomock.Any()).Return("", nil).AnyTimes() }, expected: makeResponse(fakeCt), }, @@ -260,6 +338,7 @@ func TestAPI(t *testing.T) { initialData: initialDataPayload{makeResponse(fakeCt), nil}, setupMock: func(mcl *acmock.MockContainerCLI, preReq *gomock.Call) { mcl.EXPECT().List(gomock.Any()).Return(makeResponse(fakeCt2), nil).After(preReq).AnyTimes() + mcl.EXPECT().DetectArchitecture(gomock.Any(), gomock.Any()).Return("", nil).AnyTimes() }, expected: makeResponse(fakeCt2), }, @@ -347,7 +426,7 @@ func TestAPI(t *testing.T) { FriendlyName: "container-name", Running: true, Labels: map[string]string{ - agentcontainers.DevcontainerLocalFolderLabel: "/workspace", + agentcontainers.DevcontainerLocalFolderLabel: "/workspaces", agentcontainers.DevcontainerConfigFileLabel: "/workspace/.devcontainer/devcontainer.json", }, } @@ -415,6 +494,7 @@ func TestAPI(t *testing.T) { containers: codersdk.WorkspaceAgentListContainersResponse{ Containers: []codersdk.WorkspaceAgentContainer{validContainer}, }, + arch: "", // Unsupported architecture, don't inject subagent. }, devcontainerCLI: &fakeDevcontainerCLI{ upErr: xerrors.New("devcontainer CLI error"), @@ -429,6 +509,7 @@ func TestAPI(t *testing.T) { containers: codersdk.WorkspaceAgentListContainersResponse{ Containers: []codersdk.WorkspaceAgentContainer{validContainer}, }, + arch: "", // Unsupported architecture, don't inject subagent. }, devcontainerCLI: &fakeDevcontainerCLI{}, wantStatus: []int{http.StatusAccepted, http.StatusConflict}, @@ -1151,6 +1232,167 @@ func TestAPI(t *testing.T) { assert.False(t, response.Devcontainers[0].Container.DevcontainerDirty, "dirty flag should be cleared on the container after container recreation") }) + + t.Run("SubAgentLifecycle", func(t *testing.T) { + t.Parallel() + + var ( + ctx = testutil.Context(t, testutil.WaitMedium) + logger = slog.Make() + mClock = quartz.NewMock(t) + mCCLI = acmock.NewMockContainerCLI(gomock.NewController(t)) + fakeSAC = &fakeSubAgentClient{ + createErrC: make(chan error, 1), + deleteErrC: make(chan error, 1), + } + fakeDCCLI = &fakeDevcontainerCLI{ + execErrC: make(chan error, 1), + } + + testContainer = codersdk.WorkspaceAgentContainer{ + ID: "test-container-id", + FriendlyName: "test-container", + Image: "test-image", + Running: true, + CreatedAt: time.Now(), + Labels: map[string]string{ + agentcontainers.DevcontainerLocalFolderLabel: "/workspaces", + agentcontainers.DevcontainerConfigFileLabel: "/workspace/.devcontainer/devcontainer.json", + }, + } + ) + + defer close(fakeDCCLI.execErrC) + + coderBin, err := os.Executable() + require.NoError(t, err) + + mCCLI.EXPECT().List(gomock.Any()).Return(codersdk.WorkspaceAgentListContainersResponse{ + Containers: []codersdk.WorkspaceAgentContainer{testContainer}, + }, nil).AnyTimes() + gomock.InOrder( + mCCLI.EXPECT().DetectArchitecture(gomock.Any(), "test-container-id").Return(runtime.GOARCH, nil), + mCCLI.EXPECT().ExecAs(gomock.Any(), "test-container-id", "root", "mkdir", "-p", "/.coder-agent").Return(nil, nil), + mCCLI.EXPECT().Copy(gomock.Any(), "test-container-id", coderBin, "/.coder-agent/coder").Return(nil), + mCCLI.EXPECT().ExecAs(gomock.Any(), "test-container-id", "root", "chmod", "+x", "/.coder-agent/coder").Return(nil, nil), + mCCLI.EXPECT().ExecAs(gomock.Any(), "test-container-id", "root", "setcap", "cap_net_admin+ep", "/.coder-agent/coder").Return(nil, nil), + ) + + mClock.Set(time.Now()).MustWait(ctx) + tickerTrap := mClock.Trap().TickerFunc("updaterLoop") + + api := agentcontainers.NewAPI(logger, + agentcontainers.WithClock(mClock), + agentcontainers.WithContainerCLI(mCCLI), + agentcontainers.WithSubAgentClient(fakeSAC), + agentcontainers.WithSubAgentURL("test-subagent-url"), + agentcontainers.WithDevcontainerCLI(fakeDCCLI), + ) + defer api.Close() + + // Allow initial agent creation to succeed. + testutil.RequireSend(ctx, t, fakeSAC.createErrC, nil) + + // Make sure the ticker function has been registered + // before advancing the clock. + tickerTrap.MustWait(ctx).MustRelease(ctx) + tickerTrap.Close() + + // Pre-populate for next iteration (also verify previous consumption). + testutil.RequireSend(ctx, t, fakeSAC.createErrC, nil) + + // Ensure we only inject the agent once. + for i := range 3 { + _, aw := mClock.AdvanceNext() + aw.MustWait(ctx) + + t.Logf("Iteration %d: agents created: %d", i+1, len(fakeSAC.created)) + + // Verify agent was created. + require.Len(t, fakeSAC.created, 1) + assert.Equal(t, "test-container", fakeSAC.created[0].Name) + assert.Equal(t, "/workspaces", fakeSAC.created[0].Directory) + assert.Len(t, fakeSAC.deleted, 0) + } + + // Expect the agent to be reinjected. + gomock.InOrder( + mCCLI.EXPECT().DetectArchitecture(gomock.Any(), "test-container-id").Return(runtime.GOARCH, nil), + mCCLI.EXPECT().ExecAs(gomock.Any(), "test-container-id", "root", "mkdir", "-p", "/.coder-agent").Return(nil, nil), + mCCLI.EXPECT().Copy(gomock.Any(), "test-container-id", coderBin, "/.coder-agent/coder").Return(nil), + mCCLI.EXPECT().ExecAs(gomock.Any(), "test-container-id", "root", "chmod", "+x", "/.coder-agent/coder").Return(nil, nil), + mCCLI.EXPECT().ExecAs(gomock.Any(), "test-container-id", "root", "setcap", "cap_net_admin+ep", "/.coder-agent/coder").Return(nil, nil), + ) + + // Terminate the agent and verify it is deleted. + testutil.RequireSend(ctx, t, fakeDCCLI.execErrC, nil) + + // Allow cleanup to proceed. + testutil.RequireSend(ctx, t, fakeSAC.deleteErrC, xerrors.New("test termination")) + + // Wait until the agent recreation is started. + for len(fakeSAC.createErrC) > 0 { + _, aw := mClock.AdvanceNext() + aw.MustWait(ctx) + } + + // Verify agent was deleted. + require.Len(t, fakeSAC.deleted, 1) + assert.Equal(t, fakeSAC.created[0].ID, fakeSAC.deleted[0]) + + // Verify the agent recreated. + require.Len(t, fakeSAC.created, 2) + }) + + t.Run("SubAgentCleanup", func(t *testing.T) { + t.Parallel() + + var ( + existingAgentID = uuid.New() + existingAgentToken = uuid.New() + existingAgent = agentcontainers.SubAgent{ + ID: existingAgentID, + Name: "stopped-container", + Directory: "/tmp", + AuthToken: existingAgentToken, + } + + ctx = testutil.Context(t, testutil.WaitMedium) + logger = slog.Make() + mClock = quartz.NewMock(t) + mCCLI = acmock.NewMockContainerCLI(gomock.NewController(t)) + fakeSAC = &fakeSubAgentClient{ + agents: map[uuid.UUID]agentcontainers.SubAgent{ + existingAgentID: existingAgent, + }, + } + ) + + mCCLI.EXPECT().List(gomock.Any()).Return(codersdk.WorkspaceAgentListContainersResponse{ + Containers: []codersdk.WorkspaceAgentContainer{}, + }, nil).AnyTimes() + + mClock.Set(time.Now()).MustWait(ctx) + tickerTrap := mClock.Trap().TickerFunc("updaterLoop") + + api := agentcontainers.NewAPI(logger, + agentcontainers.WithClock(mClock), + agentcontainers.WithContainerCLI(mCCLI), + agentcontainers.WithSubAgentClient(fakeSAC), + agentcontainers.WithDevcontainerCLI(&fakeDevcontainerCLI{}), + ) + defer api.Close() + + tickerTrap.MustWait(ctx).MustRelease(ctx) + tickerTrap.Close() + + _, aw := mClock.AdvanceNext() + aw.MustWait(ctx) + + // Verify agent was deleted. + assert.Contains(t, fakeSAC.deleted, existingAgentID) + assert.Empty(t, fakeSAC.agents) + }) } // mustFindDevcontainerByPath returns the devcontainer with the given workspace diff --git a/agent/agentcontainers/devcontainer.go b/agent/agentcontainers/devcontainer.go index 09d4837d4b27a..f13963d7b63d7 100644 --- a/agent/agentcontainers/devcontainer.go +++ b/agent/agentcontainers/devcontainer.go @@ -18,6 +18,8 @@ const ( // DevcontainerConfigFileLabel is the label that contains the path to // the devcontainer.json configuration file. DevcontainerConfigFileLabel = "devcontainer.config_file" + // The default workspace folder inside the devcontainer. + DevcontainerDefaultContainerWorkspaceFolder = "/workspaces" ) const devcontainerUpScriptTemplate = ` diff --git a/agent/agentcontainers/subagent.go b/agent/agentcontainers/subagent.go new file mode 100644 index 0000000000000..70899fb96f70d --- /dev/null +++ b/agent/agentcontainers/subagent.go @@ -0,0 +1,128 @@ +package agentcontainers + +import ( + "context" + + "github.com/google/uuid" + "golang.org/x/xerrors" + + "cdr.dev/slog" + + agentproto "github.com/coder/coder/v2/agent/proto" +) + +// SubAgent represents an agent running in a dev container. +type SubAgent struct { + ID uuid.UUID + Name string + AuthToken uuid.UUID + Directory string + Architecture string + OperatingSystem string +} + +// SubAgentClient is an interface for managing sub agents and allows +// changing the implementation without having to deal with the +// agentproto package directly. +type SubAgentClient interface { + // List returns a list of all agents. + List(ctx context.Context) ([]SubAgent, error) + // Create adds a new agent. + Create(ctx context.Context, agent SubAgent) (SubAgent, error) + // Delete removes an agent by its ID. + Delete(ctx context.Context, id uuid.UUID) error +} + +// NewSubAgentClient returns a SubAgentClient that uses the provided +// agent API client. +type subAgentAPIClient struct { + logger slog.Logger + api agentproto.DRPCAgentClient26 +} + +var _ SubAgentClient = (*subAgentAPIClient)(nil) + +func NewSubAgentClientFromAPI(logger slog.Logger, agentAPI agentproto.DRPCAgentClient26) SubAgentClient { + if agentAPI == nil { + panic("developer error: agentAPI cannot be nil") + } + return &subAgentAPIClient{ + logger: logger.Named("subagentclient"), + api: agentAPI, + } +} + +func (a *subAgentAPIClient) List(ctx context.Context) ([]SubAgent, error) { + a.logger.Debug(ctx, "listing sub agents") + resp, err := a.api.ListSubAgents(ctx, &agentproto.ListSubAgentsRequest{}) + if err != nil { + return nil, err + } + + agents := make([]SubAgent, len(resp.Agents)) + for i, agent := range resp.Agents { + id, err := uuid.FromBytes(agent.GetId()) + if err != nil { + return nil, err + } + authToken, err := uuid.FromBytes(agent.GetAuthToken()) + if err != nil { + return nil, err + } + agents[i] = SubAgent{ + ID: id, + Name: agent.GetName(), + AuthToken: authToken, + } + } + return agents, nil +} + +func (a *subAgentAPIClient) Create(ctx context.Context, agent SubAgent) (SubAgent, error) { + a.logger.Debug(ctx, "creating sub agent", slog.F("name", agent.Name), slog.F("directory", agent.Directory)) + resp, err := a.api.CreateSubAgent(ctx, &agentproto.CreateSubAgentRequest{ + Name: agent.Name, + Directory: agent.Directory, + Architecture: agent.Architecture, + OperatingSystem: agent.OperatingSystem, + }) + if err != nil { + return SubAgent{}, err + } + + agent.Name = resp.Agent.Name + agent.ID, err = uuid.FromBytes(resp.Agent.Id) + if err != nil { + return agent, err + } + agent.AuthToken, err = uuid.FromBytes(resp.Agent.AuthToken) + if err != nil { + return agent, err + } + return agent, nil +} + +func (a *subAgentAPIClient) Delete(ctx context.Context, id uuid.UUID) error { + a.logger.Debug(ctx, "deleting sub agent", slog.F("id", id.String())) + _, err := a.api.DeleteSubAgent(ctx, &agentproto.DeleteSubAgentRequest{ + Id: id[:], + }) + return err +} + +// noopSubAgentClient is a SubAgentClient that does nothing. +type noopSubAgentClient struct{} + +var _ SubAgentClient = noopSubAgentClient{} + +func (noopSubAgentClient) List(_ context.Context) ([]SubAgent, error) { + return nil, nil +} + +func (noopSubAgentClient) Create(_ context.Context, _ SubAgent) (SubAgent, error) { + return SubAgent{}, xerrors.New("noopSubAgentClient does not support creating sub agents") +} + +func (noopSubAgentClient) Delete(_ context.Context, _ uuid.UUID) error { + return xerrors.New("noopSubAgentClient does not support deleting sub agents") +} diff --git a/agent/api.go b/agent/api.go index 2e15530adc608..1c9a707fbb338 100644 --- a/agent/api.go +++ b/agent/api.go @@ -10,11 +10,12 @@ import ( "github.com/google/uuid" "github.com/coder/coder/v2/agent/agentcontainers" + "github.com/coder/coder/v2/agent/proto" "github.com/coder/coder/v2/coderd/httpapi" "github.com/coder/coder/v2/codersdk" ) -func (a *agent) apiHandler() (http.Handler, func() error) { +func (a *agent) apiHandler(aAPI proto.DRPCAgentClient26) (http.Handler, func() error) { r := chi.NewRouter() r.Get("/", func(rw http.ResponseWriter, r *http.Request) { httpapi.Write(r.Context(), rw, http.StatusOK, codersdk.Response{ @@ -45,6 +46,7 @@ func (a *agent) apiHandler() (http.Handler, func() error) { agentcontainers.WithScriptLogger(func(logSourceID uuid.UUID) agentcontainers.ScriptLogger { return a.logSender.GetScriptLogger(logSourceID) }), + agentcontainers.WithSubAgentClient(agentcontainers.NewSubAgentClientFromAPI(a.logger, aAPI)), } manifest := a.manifest.Load() if manifest != nil && len(manifest.Devcontainers) > 0 { diff --git a/cli/agent.go b/cli/agent.go index deca447664337..5d6037f9930ec 100644 --- a/cli/agent.go +++ b/cli/agent.go @@ -28,6 +28,7 @@ import ( "github.com/coder/serpent" "github.com/coder/coder/v2/agent" + "github.com/coder/coder/v2/agent/agentcontainers" "github.com/coder/coder/v2/agent/agentexec" "github.com/coder/coder/v2/agent/agentssh" "github.com/coder/coder/v2/agent/reaper" @@ -362,6 +363,9 @@ func (r *RootCmd) workspaceAgent() *serpent.Command { BlockFileTransfer: blockFileTransfer, Execer: execer, ExperimentalDevcontainersEnabled: experimentalDevcontainersEnabled, + ContainerAPIOptions: []agentcontainers.Option{ + agentcontainers.WithSubAgentURL(r.agentURL.String()), + }, }) promHandler := agent.PrometheusMetricsHandler(prometheusRegistry, logger) diff --git a/cli/open_test.go b/cli/open_test.go index 4441e51e58c4b..0f757c99bfcf1 100644 --- a/cli/open_test.go +++ b/cli/open_test.go @@ -306,8 +306,8 @@ func TestOpenVSCodeDevContainer(t *testing.T) { containerFolder := "/workspace/coder" ctrl := gomock.NewController(t) - mcl := acmock.NewMockContainerCLI(ctrl) - mcl.EXPECT().List(gomock.Any()).Return( + mccli := acmock.NewMockContainerCLI(ctrl) + mccli.EXPECT().List(gomock.Any()).Return( codersdk.WorkspaceAgentListContainersResponse{ Containers: []codersdk.WorkspaceAgentContainer{ { @@ -327,6 +327,8 @@ func TestOpenVSCodeDevContainer(t *testing.T) { }, }, nil, ).AnyTimes() + // DetectArchitecture always returns "" for this test to disable agent injection. + mccli.EXPECT().DetectArchitecture(gomock.Any(), gomock.Any()).Return("", nil).AnyTimes() client, workspace, agentToken := setupWorkspaceForAgent(t, func(agents []*proto.Agent) []*proto.Agent { agents[0].Directory = agentDir @@ -337,7 +339,7 @@ func TestOpenVSCodeDevContainer(t *testing.T) { _ = agenttest.New(t, client.URL, agentToken, func(o *agent.Options) { o.ExperimentalDevcontainersEnabled = true - o.ContainerAPIOptions = append(o.ContainerAPIOptions, agentcontainers.WithContainerCLI(mcl)) + o.ContainerAPIOptions = append(o.ContainerAPIOptions, agentcontainers.WithContainerCLI(mccli)) }) _ = coderdtest.NewWorkspaceAgentWaiter(t, client, workspace.ID).Wait() @@ -481,8 +483,8 @@ func TestOpenVSCodeDevContainer_NoAgentDirectory(t *testing.T) { containerFolder := "/workspace/coder" ctrl := gomock.NewController(t) - mcl := acmock.NewMockContainerCLI(ctrl) - mcl.EXPECT().List(gomock.Any()).Return( + mccli := acmock.NewMockContainerCLI(ctrl) + mccli.EXPECT().List(gomock.Any()).Return( codersdk.WorkspaceAgentListContainersResponse{ Containers: []codersdk.WorkspaceAgentContainer{ { @@ -502,6 +504,8 @@ func TestOpenVSCodeDevContainer_NoAgentDirectory(t *testing.T) { }, }, nil, ).AnyTimes() + // DetectArchitecture always returns "" for this test to disable agent injection. + mccli.EXPECT().DetectArchitecture(gomock.Any(), gomock.Any()).Return("", nil).AnyTimes() client, workspace, agentToken := setupWorkspaceForAgent(t, func(agents []*proto.Agent) []*proto.Agent { agents[0].Name = agentName @@ -511,7 +515,7 @@ func TestOpenVSCodeDevContainer_NoAgentDirectory(t *testing.T) { _ = agenttest.New(t, client.URL, agentToken, func(o *agent.Options) { o.ExperimentalDevcontainersEnabled = true - o.ContainerAPIOptions = append(o.ContainerAPIOptions, agentcontainers.WithContainerCLI(mcl)) + o.ContainerAPIOptions = append(o.ContainerAPIOptions, agentcontainers.WithContainerCLI(mccli)) }) _ = coderdtest.NewWorkspaceAgentWaiter(t, client, workspace.ID).Wait() diff --git a/coderd/workspaceagents_test.go b/coderd/workspaceagents_test.go index f32c7b1458ca2..6a18ec76cdde2 100644 --- a/coderd/workspaceagents_test.go +++ b/coderd/workspaceagents_test.go @@ -1397,14 +1397,15 @@ func TestWorkspaceAgentRecreateDevcontainer(t *testing.T) { agentcontainers.DevcontainerConfigFileLabel: configFile, } devContainer = codersdk.WorkspaceAgentContainer{ - ID: uuid.NewString(), - CreatedAt: dbtime.Now(), - FriendlyName: testutil.GetRandomName(t), - Image: "busybox:latest", - Labels: dcLabels, - Running: true, - Status: "running", - DevcontainerDirty: true, + ID: uuid.NewString(), + CreatedAt: dbtime.Now(), + FriendlyName: testutil.GetRandomName(t), + Image: "busybox:latest", + Labels: dcLabels, + Running: true, + Status: "running", + DevcontainerDirty: true, + DevcontainerStatus: codersdk.WorkspaceAgentDevcontainerStatusRunning, } plainContainer = codersdk.WorkspaceAgentContainer{ ID: uuid.NewString(), @@ -1419,29 +1420,31 @@ func TestWorkspaceAgentRecreateDevcontainer(t *testing.T) { for _, tc := range []struct { name string - setupMock func(*acmock.MockContainerCLI, *acmock.MockDevcontainerCLI) (status int) + setupMock func(mccli *acmock.MockContainerCLI, mdccli *acmock.MockDevcontainerCLI) (status int) }{ { name: "Recreate", - setupMock: func(mcl *acmock.MockContainerCLI, mdccli *acmock.MockDevcontainerCLI) int { - mcl.EXPECT().List(gomock.Any()).Return(codersdk.WorkspaceAgentListContainersResponse{ + setupMock: func(mccli *acmock.MockContainerCLI, mdccli *acmock.MockDevcontainerCLI) int { + mccli.EXPECT().List(gomock.Any()).Return(codersdk.WorkspaceAgentListContainersResponse{ Containers: []codersdk.WorkspaceAgentContainer{devContainer}, }, nil).AnyTimes() + // DetectArchitecture always returns "" for this test to disable agent injection. + mccli.EXPECT().DetectArchitecture(gomock.Any(), devContainer.ID).Return("", nil).AnyTimes() mdccli.EXPECT().Up(gomock.Any(), workspaceFolder, configFile, gomock.Any()).Return("someid", nil).Times(1) return 0 }, }, { name: "Container does not exist", - setupMock: func(mcl *acmock.MockContainerCLI, mdccli *acmock.MockDevcontainerCLI) int { - mcl.EXPECT().List(gomock.Any()).Return(codersdk.WorkspaceAgentListContainersResponse{}, nil).AnyTimes() + setupMock: func(mccli *acmock.MockContainerCLI, mdccli *acmock.MockDevcontainerCLI) int { + mccli.EXPECT().List(gomock.Any()).Return(codersdk.WorkspaceAgentListContainersResponse{}, nil).AnyTimes() return http.StatusNotFound }, }, { name: "Not a devcontainer", - setupMock: func(mcl *acmock.MockContainerCLI, mdccli *acmock.MockDevcontainerCLI) int { - mcl.EXPECT().List(gomock.Any()).Return(codersdk.WorkspaceAgentListContainersResponse{ + setupMock: func(mccli *acmock.MockContainerCLI, mdccli *acmock.MockDevcontainerCLI) int { + mccli.EXPECT().List(gomock.Any()).Return(codersdk.WorkspaceAgentListContainersResponse{ Containers: []codersdk.WorkspaceAgentContainer{plainContainer}, }, nil).AnyTimes() return http.StatusNotFound @@ -1452,9 +1455,9 @@ func TestWorkspaceAgentRecreateDevcontainer(t *testing.T) { t.Parallel() ctrl := gomock.NewController(t) - mcl := acmock.NewMockContainerCLI(ctrl) + mccli := acmock.NewMockContainerCLI(ctrl) mdccli := acmock.NewMockDevcontainerCLI(ctrl) - wantStatus := tc.setupMock(mcl, mdccli) + wantStatus := tc.setupMock(mccli, mdccli) logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}).Leveled(slog.LevelDebug) client, db := coderdtest.NewWithDatabase(t, &coderdtest.Options{ Logger: &logger, @@ -1471,7 +1474,7 @@ func TestWorkspaceAgentRecreateDevcontainer(t *testing.T) { o.ExperimentalDevcontainersEnabled = true o.ContainerAPIOptions = append( o.ContainerAPIOptions, - agentcontainers.WithContainerCLI(mcl), + agentcontainers.WithContainerCLI(mccli), agentcontainers.WithDevcontainerCLI(mdccli), agentcontainers.WithWatcher(watcher.NewNoop()), )