Skip to content

Commit bea0596

Browse files
committed
fix(agent/agentcontainers): reduce need to recreate sub agents
Updates #18332
1 parent c033618 commit bea0596

File tree

3 files changed

+127
-78
lines changed

3 files changed

+127
-78
lines changed

agent/agentcontainers/api.go

Lines changed: 69 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -977,7 +977,7 @@ func (api *API) maybeInjectSubAgentIntoContainerLocked(ctx context.Context, dc c
977977
)
978978

979979
// Check if subagent already exists for this devcontainer.
980-
recreateSubAgent := false
980+
maybeRecreateSubAgent := false
981981
proc, injected := api.injectedSubAgentProcs[dc.WorkspaceFolder]
982982
if injected {
983983
if proc.containerID == container.ID && proc.ctx.Err() == nil {
@@ -992,12 +992,15 @@ func (api *API) maybeInjectSubAgentIntoContainerLocked(ctx context.Context, dc c
992992
logger.Debug(ctx, "container ID changed, injecting subagent into new container",
993993
slog.F("old_container_id", proc.containerID),
994994
)
995-
recreateSubAgent = true
995+
maybeRecreateSubAgent = true
996996
}
997997

998998
// Container ID changed or the subagent process is not running,
999999
// stop the existing subagent context to replace it.
10001000
proc.stop()
1001+
} else {
1002+
// Set SubAgent defaults.
1003+
proc.agent.OperatingSystem = "linux" // Assuming Linux for devcontainers.
10011004
}
10021005

10031006
// Prepare the subAgentProcess to be used when running the subagent.
@@ -1090,36 +1093,31 @@ func (api *API) maybeInjectSubAgentIntoContainerLocked(ctx context.Context, dc c
10901093
// logger.Warn(ctx, "set CAP_NET_ADMIN on agent binary failed", slog.Error(err))
10911094
// }
10921095

1093-
// Detect workspace folder by executing `pwd` in the container.
1094-
// NOTE(mafredri): This is a quick and dirty way to detect the
1095-
// workspace folder inside the container. In the future we will
1096-
// rely more on `devcontainer read-configuration`.
1097-
var pwdBuf bytes.Buffer
1098-
err = api.dccli.Exec(ctx, dc.WorkspaceFolder, dc.ConfigPath, "pwd", []string{},
1099-
WithExecOutput(&pwdBuf, io.Discard),
1100-
WithExecContainerID(container.ID),
1101-
)
1102-
if err != nil {
1103-
return xerrors.Errorf("check workspace folder in container: %w", err)
1104-
}
1105-
directory := strings.TrimSpace(pwdBuf.String())
1106-
if directory == "" {
1107-
logger.Warn(ctx, "detected workspace folder is empty, using default workspace folder",
1108-
slog.F("default_workspace_folder", DevcontainerDefaultContainerWorkspaceFolder),
1096+
updatedSubAgent := proc.agent
1097+
updatedSubAgent.Name = dc.Name
1098+
1099+
if proc.agent.ID == uuid.Nil || maybeRecreateSubAgent {
1100+
// Detect workspace folder by executing `pwd` in the container.
1101+
// NOTE(mafredri): This is a quick and dirty way to detect the
1102+
// workspace folder inside the container. In the future we will
1103+
// rely more on `devcontainer read-configuration`.
1104+
var pwdBuf bytes.Buffer
1105+
err = api.dccli.Exec(ctx, dc.WorkspaceFolder, dc.ConfigPath, "pwd", []string{},
1106+
WithExecOutput(&pwdBuf, io.Discard),
1107+
WithExecContainerID(container.ID),
11091108
)
1110-
directory = DevcontainerDefaultContainerWorkspaceFolder
1111-
}
1112-
1113-
if proc.agent.ID != uuid.Nil && recreateSubAgent {
1114-
logger.Debug(ctx, "deleting existing subagent for recreation", slog.F("agent_id", proc.agent.ID))
1115-
client := *api.subAgentClient.Load()
1116-
err = client.Delete(ctx, proc.agent.ID)
11171109
if err != nil {
1118-
return xerrors.Errorf("delete existing subagent failed: %w", err)
1110+
return xerrors.Errorf("check workspace folder in container: %w", err)
11191111
}
1120-
proc.agent = SubAgent{}
1121-
}
1122-
if proc.agent.ID == uuid.Nil {
1112+
directory := strings.TrimSpace(pwdBuf.String())
1113+
if directory == "" {
1114+
logger.Warn(ctx, "detected workspace folder is empty, using default workspace folder",
1115+
slog.F("default_workspace_folder", DevcontainerDefaultContainerWorkspaceFolder),
1116+
)
1117+
directory = DevcontainerDefaultContainerWorkspaceFolder
1118+
}
1119+
updatedSubAgent.Directory = directory
1120+
11231121
displayAppsMap := map[codersdk.DisplayApp]bool{
11241122
// NOTE(DanielleMaywood):
11251123
// We use the same defaults here as set in terraform-provider-coder.
@@ -1138,6 +1136,13 @@ func (api *API) maybeInjectSubAgentIntoContainerLocked(ctx context.Context, dc c
11381136

11391137
for _, customization := range coderCustomization {
11401138
for app, enabled := range customization.DisplayApps {
1139+
if _, ok := displayAppsMap[app]; !ok {
1140+
logger.Warn(ctx, "unknown display app in devcontainer customization, ignoring",
1141+
slog.F("app", app),
1142+
slog.F("enabled", enabled),
1143+
)
1144+
continue
1145+
}
11411146
displayAppsMap[app] = enabled
11421147
}
11431148
}
@@ -1149,26 +1154,52 @@ func (api *API) maybeInjectSubAgentIntoContainerLocked(ctx context.Context, dc c
11491154
displayApps = append(displayApps, app)
11501155
}
11511156
}
1157+
slices.Sort(displayApps)
1158+
1159+
updatedSubAgent.DisplayApps = displayApps
1160+
}
1161+
1162+
logger.Debug(ctx, "proc subagent configuration",
1163+
slog.F("agent", proc.agent),
1164+
)
1165+
logger.Debug(ctx, "subagent configuration",
1166+
slog.F("agent", updatedSubAgent),
1167+
)
1168+
1169+
deleteSubAgent := maybeRecreateSubAgent && !proc.agent.Equal(updatedSubAgent)
1170+
if deleteSubAgent {
1171+
logger.Debug(ctx, "deleting existing subagent for recreation", slog.F("agent_id", proc.agent.ID))
1172+
client := *api.subAgentClient.Load()
1173+
err = client.Delete(ctx, proc.agent.ID)
1174+
if err != nil {
1175+
return xerrors.Errorf("delete existing subagent failed: %w", err)
1176+
}
1177+
proc.agent = SubAgent{} // Clear agent to signal that we need to create a new one.
1178+
}
1179+
1180+
if proc.agent.ID == uuid.Nil {
1181+
// Reset fields that are not part of the request.
1182+
updatedSubAgent.ID = uuid.Nil
1183+
updatedSubAgent.AuthToken = uuid.Nil
11521184

11531185
logger.Debug(ctx, "creating new subagent",
1154-
slog.F("directory", directory),
1155-
slog.F("display_apps", displayApps),
1186+
slog.F("directory", updatedSubAgent.Directory),
1187+
slog.F("display_apps", updatedSubAgent.DisplayApps),
11561188
)
11571189

11581190
// Create new subagent record in the database to receive the auth token.
11591191
client := *api.subAgentClient.Load()
1160-
proc.agent, err = client.Create(ctx, SubAgent{
1161-
Name: dc.Name,
1162-
Directory: directory,
1163-
OperatingSystem: "linux", // Assuming Linux for devcontainers.
1164-
Architecture: arch,
1165-
DisplayApps: displayApps,
1166-
})
1192+
newSubAgent, err := client.Create(ctx, updatedSubAgent)
11671193
if err != nil {
11681194
return xerrors.Errorf("create subagent failed: %w", err)
11691195
}
1196+
proc.agent = newSubAgent
11701197

11711198
logger.Info(ctx, "created new subagent", slog.F("agent_id", proc.agent.ID))
1199+
} else {
1200+
logger.Debug(ctx, "subagent already exists, skipping recreation",
1201+
slog.F("agent_id", proc.agent.ID),
1202+
)
11721203
}
11731204

11741205
api.mu.Lock() // Re-lock to update the agent.

agent/agentcontainers/api_test.go

Lines changed: 49 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,7 @@ func (w *fakeWatcher) sendEventWaitNextCalled(ctx context.Context, event fsnotif
212212

213213
// fakeSubAgentClient implements SubAgentClient for testing purposes.
214214
type fakeSubAgentClient struct {
215+
logger slog.Logger
215216
agents map[uuid.UUID]agentcontainers.SubAgent
216217

217218
listErrC chan error // If set, send to return error, close to return nil.
@@ -240,6 +241,7 @@ func (m *fakeSubAgentClient) List(ctx context.Context) ([]agentcontainers.SubAge
240241
}
241242

242243
func (m *fakeSubAgentClient) Create(ctx context.Context, agent agentcontainers.SubAgent) (agentcontainers.SubAgent, error) {
244+
m.logger.Debug(ctx, "creating sub agent", slog.F("agent", agent))
243245
if m.createErrC != nil {
244246
select {
245247
case <-ctx.Done():
@@ -261,6 +263,7 @@ func (m *fakeSubAgentClient) Create(ctx context.Context, agent agentcontainers.S
261263
}
262264

263265
func (m *fakeSubAgentClient) Delete(ctx context.Context, id uuid.UUID) error {
266+
m.logger.Debug(ctx, "deleting sub agent", slog.F("id", id.String()))
264267
if m.deleteErrC != nil {
265268
select {
266269
case <-ctx.Done():
@@ -1245,6 +1248,7 @@ func TestAPI(t *testing.T) {
12451248
mClock = quartz.NewMock(t)
12461249
mCCLI = acmock.NewMockContainerCLI(gomock.NewController(t))
12471250
fakeSAC = &fakeSubAgentClient{
1251+
logger: logger.Named("fakeSubAgentClient"),
12481252
createErrC: make(chan error, 1),
12491253
deleteErrC: make(chan error, 1),
12501254
}
@@ -1270,7 +1274,7 @@ func TestAPI(t *testing.T) {
12701274

12711275
mCCLI.EXPECT().List(gomock.Any()).Return(codersdk.WorkspaceAgentListContainersResponse{
12721276
Containers: []codersdk.WorkspaceAgentContainer{testContainer},
1273-
}, nil).Times(1 + 3) // 1 initial call + 3 updates.
1277+
}, nil).Times(3) // 1 initial call + 2 updates.
12741278
gomock.InOrder(
12751279
mCCLI.EXPECT().DetectArchitecture(gomock.Any(), "test-container-id").Return(runtime.GOARCH, nil),
12761280
mCCLI.EXPECT().ExecAs(gomock.Any(), "test-container-id", "root", "mkdir", "-p", "/.coder-agent").Return(nil, nil),
@@ -1315,19 +1319,20 @@ func TestAPI(t *testing.T) {
13151319
tickerTrap.MustWait(ctx).MustRelease(ctx)
13161320
tickerTrap.Close()
13171321

1318-
// Ensure we only inject the agent once.
1319-
for i := range 3 {
1320-
_, aw := mClock.AdvanceNext()
1321-
aw.MustWait(ctx)
1322+
// Refresh twice to ensure idempotency of agent creation.
1323+
err = api.RefreshContainers(ctx)
1324+
require.NoError(t, err, "refresh containers should not fail")
1325+
t.Logf("Agents created: %d, deleted: %d", len(fakeSAC.created), len(fakeSAC.deleted))
13221326

1323-
t.Logf("Iteration %d: agents created: %d", i+1, len(fakeSAC.created))
1327+
err = api.RefreshContainers(ctx)
1328+
require.NoError(t, err, "refresh containers should not fail")
1329+
t.Logf("Agents created: %d, deleted: %d", len(fakeSAC.created), len(fakeSAC.deleted))
13241330

1325-
// Verify agent was created.
1326-
require.Len(t, fakeSAC.created, 1)
1327-
assert.Equal(t, "test-container", fakeSAC.created[0].Name)
1328-
assert.Equal(t, "/workspaces", fakeSAC.created[0].Directory)
1329-
assert.Len(t, fakeSAC.deleted, 0)
1330-
}
1331+
// Verify agent was created.
1332+
require.Len(t, fakeSAC.created, 1)
1333+
assert.Equal(t, "test-container", fakeSAC.created[0].Name)
1334+
assert.Equal(t, "/workspaces", fakeSAC.created[0].Directory)
1335+
assert.Len(t, fakeSAC.deleted, 0)
13311336

13321337
t.Log("Agent injected successfully, now testing reinjection into the same container...")
13331338

@@ -1349,32 +1354,23 @@ func TestAPI(t *testing.T) {
13491354
// Expect the agent to be reinjected.
13501355
mCCLI.EXPECT().List(gomock.Any()).Return(codersdk.WorkspaceAgentListContainersResponse{
13511356
Containers: []codersdk.WorkspaceAgentContainer{testContainer},
1352-
}, nil).Times(3) // 3 updates.
1357+
}, nil).Times(1) // 1 update.
13531358
gomock.InOrder(
13541359
mCCLI.EXPECT().DetectArchitecture(gomock.Any(), "test-container-id").Return(runtime.GOARCH, nil),
13551360
mCCLI.EXPECT().ExecAs(gomock.Any(), "test-container-id", "root", "mkdir", "-p", "/.coder-agent").Return(nil, nil),
13561361
mCCLI.EXPECT().Copy(gomock.Any(), "test-container-id", coderBin, "/.coder-agent/coder").Return(nil),
13571362
mCCLI.EXPECT().ExecAs(gomock.Any(), "test-container-id", "root", "chmod", "0755", "/.coder-agent", "/.coder-agent/coder").Return(nil, nil),
13581363
)
13591364

1360-
// Allow agent reinjection to succeed.
1361-
testutil.RequireSend(ctx, t, fakeDCCLI.execErrC, func(cmd string, args ...string) error {
1362-
assert.Equal(t, "pwd", cmd)
1363-
assert.Empty(t, args)
1364-
return nil
1365-
}) // Exec pwd.
1366-
1367-
// Ensure we only inject the agent once.
1368-
for i := range 3 {
1369-
_, aw := mClock.AdvanceNext()
1370-
aw.MustWait(ctx)
1365+
// Agent reinjection will succeed and we will not re-create the
1366+
// agent, nor re-probe pwd.
1367+
err = api.RefreshContainers(ctx)
1368+
require.NoError(t, err, "refresh containers should not fail")
1369+
t.Logf("Agents created: %d, deleted: %d", len(fakeSAC.created), len(fakeSAC.deleted))
13711370

1372-
t.Logf("Iteration %d: agents created: %d", i+1, len(fakeSAC.created))
1373-
1374-
// Verify that the agent was reused.
1375-
require.Len(t, fakeSAC.created, 1)
1376-
assert.Len(t, fakeSAC.deleted, 0)
1377-
}
1371+
// Verify that the agent was reused.
1372+
require.Len(t, fakeSAC.created, 1)
1373+
assert.Len(t, fakeSAC.deleted, 0)
13781374

13791375
t.Log("Agent reinjected successfully, now testing agent deletion and recreation...")
13801376

@@ -1383,7 +1379,7 @@ func TestAPI(t *testing.T) {
13831379
// Expect the agent to be injected.
13841380
mCCLI.EXPECT().List(gomock.Any()).Return(codersdk.WorkspaceAgentListContainersResponse{
13851381
Containers: []codersdk.WorkspaceAgentContainer{testContainer},
1386-
}, nil).Times(3) // 3 updates.
1382+
}, nil).Times(1) // 1 update.
13871383
gomock.InOrder(
13881384
mCCLI.EXPECT().DetectArchitecture(gomock.Any(), "new-test-container-id").Return(runtime.GOARCH, nil),
13891385
mCCLI.EXPECT().ExecAs(gomock.Any(), "new-test-container-id", "root", "mkdir", "-p", "/.coder-agent").Return(nil, nil),
@@ -1404,7 +1400,20 @@ func TestAPI(t *testing.T) {
14041400
})
14051401
<-terminated
14061402

1407-
// Simulate the agent deletion.
1403+
fakeDCCLI.readConfig.MergedConfiguration.Customizations.Coder = []agentcontainers.CoderCustomization{
1404+
{
1405+
DisplayApps: map[codersdk.DisplayApp]bool{
1406+
codersdk.DisplayAppSSH: true,
1407+
codersdk.DisplayAppWebTerminal: true,
1408+
codersdk.DisplayAppVSCodeDesktop: true,
1409+
codersdk.DisplayAppVSCodeInsiders: true,
1410+
codersdk.DisplayAppPortForward: true,
1411+
},
1412+
},
1413+
}
1414+
1415+
// Simulate the agent deletion (this happens because the
1416+
// devcontainer configuration changed).
14081417
testutil.RequireSend(ctx, t, fakeSAC.deleteErrC, nil)
14091418
// Expect the agent to be recreated.
14101419
testutil.RequireSend(ctx, t, fakeSAC.createErrC, nil)
@@ -1414,13 +1423,9 @@ func TestAPI(t *testing.T) {
14141423
return nil
14151424
}) // Exec pwd.
14161425

1417-
// Advance the clock to run updaterLoop.
1418-
for i := range 3 {
1419-
_, aw := mClock.AdvanceNext()
1420-
aw.MustWait(ctx)
1421-
1422-
t.Logf("Iteration %d: agents created: %d, deleted: %d", i+1, len(fakeSAC.created), len(fakeSAC.deleted))
1423-
}
1426+
err = api.RefreshContainers(ctx)
1427+
require.NoError(t, err, "refresh containers should not fail")
1428+
t.Logf("Agents created: %d, deleted: %d", len(fakeSAC.created), len(fakeSAC.deleted))
14241429

14251430
// Verify the agent was deleted and recreated.
14261431
require.Len(t, fakeSAC.deleted, 1, "there should be one deleted agent after recreation")
@@ -1453,6 +1458,7 @@ func TestAPI(t *testing.T) {
14531458
mClock = quartz.NewMock(t)
14541459
mCCLI = acmock.NewMockContainerCLI(gomock.NewController(t))
14551460
fakeSAC = &fakeSubAgentClient{
1461+
logger: logger.Named("fakeSubAgentClient"),
14561462
agents: map[uuid.UUID]agentcontainers.SubAgent{
14571463
existingAgentID: existingAgent,
14581464
},
@@ -1577,7 +1583,10 @@ func TestAPI(t *testing.T) {
15771583
logger = testutil.Logger(t)
15781584
mClock = quartz.NewMock(t)
15791585
mCCLI = acmock.NewMockContainerCLI(gomock.NewController(t))
1580-
fSAC = &fakeSubAgentClient{createErrC: make(chan error, 1)}
1586+
fSAC = &fakeSubAgentClient{
1587+
logger: logger.Named("fakeSubAgentClient"),
1588+
createErrC: make(chan error, 1),
1589+
}
15811590
fDCCLI = &fakeDevcontainerCLI{
15821591
readConfig: agentcontainers.DevcontainerConfig{
15831592
MergedConfiguration: agentcontainers.DevcontainerConfiguration{

agent/agentcontainers/subagent.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package agentcontainers
22

33
import (
44
"context"
5+
"slices"
56

67
"github.com/google/uuid"
78
"golang.org/x/xerrors"
@@ -23,6 +24,14 @@ type SubAgent struct {
2324
DisplayApps []codersdk.DisplayApp
2425
}
2526

27+
func (s SubAgent) Equal(other SubAgent) bool {
28+
return s.Name == other.Name &&
29+
s.Directory == other.Directory &&
30+
s.Architecture == other.Architecture &&
31+
s.OperatingSystem == other.OperatingSystem &&
32+
slices.Equal(s.DisplayApps, other.DisplayApps)
33+
}
34+
2635
// SubAgentClient is an interface for managing sub agents and allows
2736
// changing the implementation without having to deal with the
2837
// agentproto package directly.

0 commit comments

Comments
 (0)