Skip to content

Commit fca9917

Browse files
authored
feat(agent/agentcontainers): implement sub agent injection (#18245)
This change adds support for sub agent creation and injection into dev containers. Updates coder/internal#621
1 parent 44fff54 commit fca9917

File tree

15 files changed

+1218
-146
lines changed

15 files changed

+1218
-146
lines changed

agent/agent.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1188,7 +1188,7 @@ func (a *agent) handleManifest(manifestOK *checkpoint) func(ctx context.Context,
11881188
// createOrUpdateNetwork waits for the manifest to be set using manifestOK, then creates or updates
11891189
// the tailnet using the information in the manifest
11901190
func (a *agent) createOrUpdateNetwork(manifestOK, networkOK *checkpoint) func(context.Context, proto.DRPCAgentClient26) error {
1191-
return func(ctx context.Context, _ proto.DRPCAgentClient26) (retErr error) {
1191+
return func(ctx context.Context, aAPI proto.DRPCAgentClient26) (retErr error) {
11921192
if err := manifestOK.wait(ctx); err != nil {
11931193
return xerrors.Errorf("no manifest: %w", err)
11941194
}
@@ -1208,6 +1208,7 @@ func (a *agent) createOrUpdateNetwork(manifestOK, networkOK *checkpoint) func(co
12081208
// agent API.
12091209
network, err = a.createTailnet(
12101210
a.gracefulCtx,
1211+
aAPI,
12111212
manifest.AgentID,
12121213
manifest.DERPMap,
12131214
manifest.DERPForceWebSockets,
@@ -1355,6 +1356,7 @@ func (a *agent) trackGoroutine(fn func()) error {
13551356

13561357
func (a *agent) createTailnet(
13571358
ctx context.Context,
1359+
aAPI proto.DRPCAgentClient26,
13581360
agentID uuid.UUID,
13591361
derpMap *tailcfg.DERPMap,
13601362
derpForceWebSockets, disableDirectConnections bool,
@@ -1487,7 +1489,7 @@ func (a *agent) createTailnet(
14871489
}()
14881490
if err = a.trackGoroutine(func() {
14891491
defer apiListener.Close()
1490-
apiHandler, closeAPIHAndler := a.apiHandler()
1492+
apiHandler, closeAPIHAndler := a.apiHandler(aAPI)
14911493
defer func() {
14921494
_ = closeAPIHAndler()
14931495
}()

agent/agent_test.go

Lines changed: 163 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ import (
4848
"cdr.dev/slog/sloggers/slogtest"
4949

5050
"github.com/coder/coder/v2/agent"
51+
"github.com/coder/coder/v2/agent/agentcontainers"
5152
"github.com/coder/coder/v2/agent/agentssh"
5253
"github.com/coder/coder/v2/agent/agenttest"
5354
"github.com/coder/coder/v2/agent/proto"
@@ -60,9 +61,16 @@ import (
6061
"github.com/coder/coder/v2/tailnet"
6162
"github.com/coder/coder/v2/tailnet/tailnettest"
6263
"github.com/coder/coder/v2/testutil"
64+
"github.com/coder/quartz"
6365
)
6466

6567
func TestMain(m *testing.M) {
68+
if os.Getenv("CODER_TEST_RUN_SUB_AGENT_MAIN") == "1" {
69+
// If we're running as a subagent, we don't want to run the main tests.
70+
// Instead, we just run the subagent tests.
71+
exit := runSubAgentMain()
72+
os.Exit(exit)
73+
}
6674
goleak.VerifyTestMain(m, testutil.GoleakOptions...)
6775
}
6876

@@ -1930,6 +1938,9 @@ func TestAgent_ReconnectingPTYContainer(t *testing.T) {
19301938
if os.Getenv("CODER_TEST_USE_DOCKER") != "1" {
19311939
t.Skip("Set CODER_TEST_USE_DOCKER=1 to run this test")
19321940
}
1941+
if _, err := exec.LookPath("devcontainer"); err != nil {
1942+
t.Skip("This test requires the devcontainer CLI: npm install -g @devcontainers/cli")
1943+
}
19331944

19341945
pool, err := dockertest.NewPool("")
19351946
require.NoError(t, err, "Could not connect to docker")
@@ -1955,6 +1966,9 @@ func TestAgent_ReconnectingPTYContainer(t *testing.T) {
19551966
// nolint: dogsled
19561967
conn, _, _, _, _ := setupAgent(t, agentsdk.Manifest{}, 0, func(_ *agenttest.Client, o *agent.Options) {
19571968
o.ExperimentalDevcontainersEnabled = true
1969+
o.ContainerAPIOptions = append(o.ContainerAPIOptions,
1970+
agentcontainers.WithContainerLabelIncludeFilter("this.label.does.not.exist.ignore.devcontainers", "true"),
1971+
)
19581972
})
19591973
ctx := testutil.Context(t, testutil.WaitLong)
19601974
ac, err := conn.ReconnectingPTY(ctx, uuid.New(), 80, 80, "/bin/sh", func(arp *workspacesdk.AgentReconnectingPTYInit) {
@@ -1986,6 +2000,60 @@ func TestAgent_ReconnectingPTYContainer(t *testing.T) {
19862000
require.ErrorIs(t, tr.ReadUntil(ctx, nil), io.EOF)
19872001
}
19882002

2003+
type subAgentRequestPayload struct {
2004+
Token string `json:"token"`
2005+
Directory string `json:"directory"`
2006+
}
2007+
2008+
// runSubAgentMain is the main function for the sub-agent that connects
2009+
// to the control plane. It reads the CODER_AGENT_URL and
2010+
// CODER_AGENT_TOKEN environment variables, sends the token, and exits
2011+
// with a status code based on the response.
2012+
func runSubAgentMain() int {
2013+
url := os.Getenv("CODER_AGENT_URL")
2014+
token := os.Getenv("CODER_AGENT_TOKEN")
2015+
if url == "" || token == "" {
2016+
_, _ = fmt.Fprintln(os.Stderr, "CODER_AGENT_URL and CODER_AGENT_TOKEN must be set")
2017+
return 10
2018+
}
2019+
2020+
dir, err := os.Getwd()
2021+
if err != nil {
2022+
_, _ = fmt.Fprintf(os.Stderr, "failed to get current working directory: %v\n", err)
2023+
return 1
2024+
}
2025+
payload := subAgentRequestPayload{
2026+
Token: token,
2027+
Directory: dir,
2028+
}
2029+
b, err := json.Marshal(payload)
2030+
if err != nil {
2031+
_, _ = fmt.Fprintf(os.Stderr, "failed to marshal payload: %v\n", err)
2032+
return 1
2033+
}
2034+
2035+
req, err := http.NewRequest("POST", url, bytes.NewReader(b))
2036+
if err != nil {
2037+
_, _ = fmt.Fprintf(os.Stderr, "failed to create request: %v\n", err)
2038+
return 1
2039+
}
2040+
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
2041+
defer cancel()
2042+
req = req.WithContext(ctx)
2043+
resp, err := http.DefaultClient.Do(req)
2044+
if err != nil {
2045+
_, _ = fmt.Fprintf(os.Stderr, "agent connection failed: %v\n", err)
2046+
return 11
2047+
}
2048+
defer resp.Body.Close()
2049+
if resp.StatusCode != http.StatusOK {
2050+
_, _ = fmt.Fprintf(os.Stderr, "agent exiting with non-zero exit code %d\n", resp.StatusCode)
2051+
return 12
2052+
}
2053+
_, _ = fmt.Println("sub-agent connected successfully")
2054+
return 0
2055+
}
2056+
19892057
// This tests end-to-end functionality of auto-starting a devcontainer.
19902058
// It runs "devcontainer up" which creates a real Docker container. As
19912059
// such, it does not run by default in CI.
@@ -1999,6 +2067,56 @@ func TestAgent_DevcontainerAutostart(t *testing.T) {
19992067
if os.Getenv("CODER_TEST_USE_DOCKER") != "1" {
20002068
t.Skip("Set CODER_TEST_USE_DOCKER=1 to run this test")
20012069
}
2070+
if _, err := exec.LookPath("devcontainer"); err != nil {
2071+
t.Skip("This test requires the devcontainer CLI: npm install -g @devcontainers/cli")
2072+
}
2073+
2074+
// This HTTP handler handles requests from runSubAgentMain which
2075+
// acts as a fake sub-agent. We want to verify that the sub-agent
2076+
// connects and sends its token. We use a channel to signal
2077+
// that the sub-agent has connected successfully and then we wait
2078+
// until we receive another signal to return from the handler. This
2079+
// keeps the agent "alive" for as long as we want.
2080+
subAgentConnected := make(chan subAgentRequestPayload, 1)
2081+
subAgentReady := make(chan struct{}, 1)
2082+
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
2083+
t.Logf("Sub-agent request received: %s %s", r.Method, r.URL.Path)
2084+
2085+
if r.Method != http.MethodPost {
2086+
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
2087+
return
2088+
}
2089+
2090+
// Read the token from the request body.
2091+
var payload subAgentRequestPayload
2092+
if err := json.NewDecoder(r.Body).Decode(&payload); err != nil {
2093+
http.Error(w, "Failed to read token", http.StatusBadRequest)
2094+
t.Logf("Failed to read token: %v", err)
2095+
return
2096+
}
2097+
defer r.Body.Close()
2098+
2099+
t.Logf("Sub-agent request payload received: %+v", payload)
2100+
2101+
// Signal that the sub-agent has connected successfully.
2102+
select {
2103+
case <-t.Context().Done():
2104+
t.Logf("Test context done, not processing sub-agent request")
2105+
return
2106+
case subAgentConnected <- payload:
2107+
}
2108+
2109+
// Wait for the signal to return from the handler.
2110+
select {
2111+
case <-t.Context().Done():
2112+
t.Logf("Test context done, not waiting for sub-agent ready")
2113+
return
2114+
case <-subAgentReady:
2115+
}
2116+
2117+
w.WriteHeader(http.StatusOK)
2118+
}))
2119+
defer srv.Close()
20022120

20032121
pool, err := dockertest.NewPool("")
20042122
require.NoError(t, err, "Could not connect to docker")
@@ -2016,9 +2134,10 @@ func TestAgent_DevcontainerAutostart(t *testing.T) {
20162134
require.NoError(t, err, "create devcontainer directory")
20172135
devcontainerFile := filepath.Join(devcontainerPath, "devcontainer.json")
20182136
err = os.WriteFile(devcontainerFile, []byte(`{
2019-
"name": "mywork",
2020-
"image": "busybox:latest",
2021-
"cmd": ["sleep", "infinity"]
2137+
"name": "mywork",
2138+
"image": "ubuntu:latest",
2139+
"cmd": ["sleep", "infinity"],
2140+
"runArgs": ["--network=host"]
20222141
}`), 0o600)
20232142
require.NoError(t, err, "write devcontainer.json")
20242143

@@ -2043,9 +2162,24 @@ func TestAgent_DevcontainerAutostart(t *testing.T) {
20432162
},
20442163
},
20452164
}
2165+
mClock := quartz.NewMock(t)
2166+
mClock.Set(time.Now())
2167+
tickerFuncTrap := mClock.Trap().TickerFunc("agentcontainers")
2168+
20462169
//nolint:dogsled
2047-
conn, _, _, _, _ := setupAgent(t, manifest, 0, func(_ *agenttest.Client, o *agent.Options) {
2170+
_, agentClient, _, _, _ := setupAgent(t, manifest, 0, func(_ *agenttest.Client, o *agent.Options) {
20482171
o.ExperimentalDevcontainersEnabled = true
2172+
o.ContainerAPIOptions = append(
2173+
o.ContainerAPIOptions,
2174+
// Only match this specific dev container.
2175+
agentcontainers.WithClock(mClock),
2176+
agentcontainers.WithContainerLabelIncludeFilter("devcontainer.local_folder", tempWorkspaceFolder),
2177+
agentcontainers.WithSubAgentURL(srv.URL),
2178+
// The agent will copy "itself", but in the case of this test, the
2179+
// agent is actually this test binary. So we'll tell the test binary
2180+
// to execute the sub-agent main function via this env.
2181+
agentcontainers.WithSubAgentEnv("CODER_TEST_RUN_SUB_AGENT_MAIN=1"),
2182+
)
20492183
})
20502184

20512185
t.Logf("Waiting for container with label: devcontainer.local_folder=%s", tempWorkspaceFolder)
@@ -2089,32 +2223,34 @@ func TestAgent_DevcontainerAutostart(t *testing.T) {
20892223

20902224
ctx := testutil.Context(t, testutil.WaitLong)
20912225

2092-
ac, err := conn.ReconnectingPTY(ctx, uuid.New(), 80, 80, "", func(opts *workspacesdk.AgentReconnectingPTYInit) {
2093-
opts.Container = container.ID
2094-
})
2095-
require.NoError(t, err, "failed to create ReconnectingPTY")
2096-
defer ac.Close()
2226+
// Ensure the container update routine runs.
2227+
tickerFuncTrap.MustWait(ctx).MustRelease(ctx)
2228+
tickerFuncTrap.Close()
2229+
_, next := mClock.AdvanceNext()
2230+
next.MustWait(ctx)
20972231

2098-
// Use terminal reader so we can see output in case somethin goes wrong.
2099-
tr := testutil.NewTerminalReader(t, ac)
2232+
// Verify that a subagent was created.
2233+
subAgents := agentClient.GetSubAgents()
2234+
require.Len(t, subAgents, 1, "expected one sub agent")
21002235

2101-
require.NoError(t, tr.ReadUntil(ctx, func(line string) bool {
2102-
return strings.Contains(line, "#") || strings.Contains(line, "$")
2103-
}), "find prompt")
2236+
subAgent := subAgents[0]
2237+
subAgentID, err := uuid.FromBytes(subAgent.GetId())
2238+
require.NoError(t, err, "failed to parse sub-agent ID")
2239+
t.Logf("Connecting to sub-agent: %s (ID: %s)", subAgent.Name, subAgentID)
21042240

2105-
wantFileName := "file-from-devcontainer"
2106-
wantFile := filepath.Join(tempWorkspaceFolder, wantFileName)
2241+
gotDir, err := agentClient.GetSubAgentDirectory(subAgentID)
2242+
require.NoError(t, err, "failed to get sub-agent directory")
2243+
require.Equal(t, "/workspaces/mywork", gotDir, "sub-agent directory should match")
21072244

2108-
require.NoError(t, json.NewEncoder(ac).Encode(workspacesdk.ReconnectingPTYRequest{
2109-
// NOTE(mafredri): We must use absolute path here for some reason.
2110-
Data: fmt.Sprintf("touch /workspaces/mywork/%s; exit\r", wantFileName),
2111-
}), "create file inside devcontainer")
2245+
subAgentToken, err := uuid.FromBytes(subAgent.GetAuthToken())
2246+
require.NoError(t, err, "failed to parse sub-agent token")
21122247

2113-
// Wait for the connection to close to ensure the touch was executed.
2114-
require.ErrorIs(t, tr.ReadUntil(ctx, nil), io.EOF)
2248+
payload := testutil.RequireReceive(ctx, t, subAgentConnected)
2249+
require.Equal(t, subAgentToken.String(), payload.Token, "sub-agent token should match")
2250+
require.Equal(t, "/workspaces/mywork", payload.Directory, "sub-agent directory should match")
21152251

2116-
_, err = os.Stat(wantFile)
2117-
require.NoError(t, err, "file should exist outside devcontainer")
2252+
// Allow the subagent to exit.
2253+
close(subAgentReady)
21182254
}
21192255

21202256
// TestAgent_DevcontainerRecreate tests that RecreateDevcontainer
@@ -2173,6 +2309,9 @@ func TestAgent_DevcontainerRecreate(t *testing.T) {
21732309
//nolint:dogsled
21742310
conn, client, _, _, _ := setupAgent(t, manifest, 0, func(_ *agenttest.Client, o *agent.Options) {
21752311
o.ExperimentalDevcontainersEnabled = true
2312+
o.ContainerAPIOptions = append(o.ContainerAPIOptions,
2313+
agentcontainers.WithContainerLabelIncludeFilter("devcontainer.local_folder", workspaceFolder),
2314+
)
21762315
})
21772316

21782317
ctx := testutil.Context(t, testutil.WaitLong)

0 commit comments

Comments
 (0)