Skip to content

feat: improve coder connect tunnel handling on reconnect #17598

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Prev Previous commit
Next Next commit
refactored FreshState to UpdateKind to be a better indicator of the f…
…ield's usage
  • Loading branch information
ibetitsmike committed May 5, 2025
commit 5dec731072d71659a3851a39cabe9d83702669fa
23 changes: 15 additions & 8 deletions tailnet/controllers.go
Original file line number Diff line number Diff line change
Expand Up @@ -1049,7 +1049,7 @@ func (t *tunnelUpdater) recvLoop() {
t.logger.Debug(context.Background(), "tunnel updater recvLoop started")
defer t.logger.Debug(context.Background(), "tunnel updater recvLoop done")
defer close(t.recvLoopDone)
freshState := true
updateKind := Snapshot
for {
update, err := t.client.Recv()
if err != nil {
Expand All @@ -1062,10 +1062,10 @@ func (t *tunnelUpdater) recvLoop() {
}
t.logger.Debug(context.Background(), "got workspace update",
slog.F("workspace_update", update),
slog.F("fresh_state", freshState),
slog.F("update_kind", updateKind),
)
err = t.handleUpdate(update, freshState)
freshState = false
err = t.handleUpdate(update, updateKind)
updateKind = Diff
if err != nil {
t.logger.Critical(context.Background(), "failed to handle workspace Update", slog.Error(err))
cErr := t.client.Close()
Expand All @@ -1086,16 +1086,23 @@ type WorkspaceUpdate struct {
UpsertedAgents []*Agent
DeletedWorkspaces []*Workspace
DeletedAgents []*Agent
FreshState bool
Kind UpdateKind
}

type UpdateKind int

const (
Diff UpdateKind = iota
Snapshot
)

func (w *WorkspaceUpdate) Clone() WorkspaceUpdate {
clone := WorkspaceUpdate{
UpsertedWorkspaces: make([]*Workspace, len(w.UpsertedWorkspaces)),
UpsertedAgents: make([]*Agent, len(w.UpsertedAgents)),
DeletedWorkspaces: make([]*Workspace, len(w.DeletedWorkspaces)),
DeletedAgents: make([]*Agent, len(w.DeletedAgents)),
FreshState: w.FreshState,
Kind: w.Kind,
}
for i, ws := range w.UpsertedWorkspaces {
clone.UpsertedWorkspaces[i] = &Workspace{
Expand All @@ -1120,7 +1127,7 @@ func (w *WorkspaceUpdate) Clone() WorkspaceUpdate {
return clone
}

func (t *tunnelUpdater) handleUpdate(update *proto.WorkspaceUpdate, freshState bool) error {
func (t *tunnelUpdater) handleUpdate(update *proto.WorkspaceUpdate, updateKind UpdateKind) error {
t.Lock()
defer t.Unlock()

Expand All @@ -1129,7 +1136,7 @@ func (t *tunnelUpdater) handleUpdate(update *proto.WorkspaceUpdate, freshState b
UpsertedAgents: []*Agent{},
DeletedWorkspaces: []*Workspace{},
DeletedAgents: []*Agent{},
FreshState: freshState,
Kind: updateKind,
}

for _, uw := range update.UpsertedWorkspaces {
Expand Down
15 changes: 8 additions & 7 deletions tailnet/controllers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1611,15 +1611,14 @@ func TestTunnelAllWorkspaceUpdatesController_Initial(t *testing.T) {
},
DeletedWorkspaces: []*tailnet.Workspace{},
DeletedAgents: []*tailnet.Agent{},
FreshState: true,
Kind: tailnet.Snapshot,
}

// And the callback
cbUpdate := testutil.TryReceive(ctx, t, fUH.ch)
require.Equal(t, currentState, cbUpdate)

// Current recvState should match but shouldn't be a fresh state
currentState.FreshState = false
recvState, err := updateCtrl.CurrentState()
require.NoError(t, err)
slices.SortFunc(recvState.UpsertedWorkspaces, func(a, b *tailnet.Workspace) int {
Expand All @@ -1628,6 +1627,9 @@ func TestTunnelAllWorkspaceUpdatesController_Initial(t *testing.T) {
slices.SortFunc(recvState.UpsertedAgents, func(a, b *tailnet.Agent) int {
return strings.Compare(a.Name, b.Name)
})
// tunnel is still open, so it's a diff
currentState.Kind = tailnet.Diff

require.Equal(t, currentState, recvState)
}

Expand Down Expand Up @@ -1694,16 +1696,17 @@ func TestTunnelAllWorkspaceUpdatesController_DeleteAgent(t *testing.T) {
},
DeletedWorkspaces: []*tailnet.Workspace{},
DeletedAgents: []*tailnet.Agent{},
FreshState: true,
Kind: tailnet.Snapshot,
}

cbUpdate := testutil.TryReceive(ctx, t, fUH.ch)
require.Equal(t, initRecvUp, cbUpdate)

// Current state should match initial but shouldn't be a fresh state
initRecvUp.FreshState = false
state, err := updateCtrl.CurrentState()
require.NoError(t, err)
// tunnel is still open, so it's a diff
initRecvUp.Kind = tailnet.Diff

require.Equal(t, initRecvUp, state)

// Send update that removes w1a1 and adds w1a2
Expand Down Expand Up @@ -1757,7 +1760,6 @@ func TestTunnelAllWorkspaceUpdatesController_DeleteAgent(t *testing.T) {
"w1.coder.": {ws1a1IP},
}},
},
FreshState: false,
}
require.Equal(t, sndRecvUpdate, cbUpdate)

Expand All @@ -1776,7 +1778,6 @@ func TestTunnelAllWorkspaceUpdatesController_DeleteAgent(t *testing.T) {
},
DeletedWorkspaces: []*tailnet.Workspace{},
DeletedAgents: []*tailnet.Agent{},
FreshState: false,
}, state)
}

Expand Down
14 changes: 9 additions & 5 deletions vpn/tunnel.go
Original file line number Diff line number Diff line change
Expand Up @@ -397,9 +397,9 @@ func (u *updater) sendUpdateResponse(req *request[*TunnelMessage, *ManagerMessag
// createPeerUpdateLocked creates a PeerUpdate message from a workspace update, populating
// the network status of the agents.
func (u *updater) createPeerUpdateLocked(update tailnet.WorkspaceUpdate) *PeerUpdate {
// this flag is true on the first update after a reconnect
if update.FreshState {
processFreshState(&update, u.agents)
// if the update is a snapshot, we need to process the full state
if update.Kind == tailnet.Snapshot {
processSnapshotUpdate(&update, u.agents)
}

out := &PeerUpdate{
Expand Down Expand Up @@ -554,8 +554,12 @@ func (u *updater) netStatusLoop() {
}
}

// processFreshState handles the logic for when a fresh state update is received.
func processFreshState(update *tailnet.WorkspaceUpdate, agents map[uuid.UUID]tailnet.Agent) {
// processSnapshotUpdate handles the logic when a full state update is received.
// When the tunnel is live, we only receive diffs, but the first packet on any given
// reconnect to the tailnet API is a full state.
// Without this logic we weren't processing deletes for any workspaces or agents deleted
// while the client was disconnected while the computer was asleep.
func processSnapshotUpdate(update *tailnet.WorkspaceUpdate, agents map[uuid.UUID]tailnet.Agent) {
// ignoredWorkspaces is initially populated with the workspaces that are
// in the current update. Later on we populate it with the deleted workspaces too
// so that we don't send duplicate updates. Same applies to ignoredAgents.
Expand Down
19 changes: 10 additions & 9 deletions vpn/tunnel_internal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -567,7 +567,7 @@ func TestTunnel_sendAgentUpdateReconnect(t *testing.T) {
},
},
},
FreshState: true,
Kind: tailnet.Snapshot,
})
require.NoError(t, err)

Expand All @@ -579,6 +579,7 @@ func TestTunnel_sendAgentUpdateReconnect(t *testing.T) {
require.NotNil(t, peerUpdate)
require.Len(t, peerUpdate.UpsertedAgents, 1)
require.Len(t, peerUpdate.DeletedAgents, 1)
require.Len(t, peerUpdate.DeletedWorkspaces, 0)

require.Equal(t, aID2[:], peerUpdate.UpsertedAgents[0].Id)
require.Equal(t, hsTime, peerUpdate.UpsertedAgents[0].LastHandshake.AsTime())
Expand Down Expand Up @@ -667,7 +668,7 @@ func TestTunnel_sendAgentUpdateWorkspaceReconnect(t *testing.T) {
},
},
},
FreshState: true,
Kind: tailnet.Snapshot,
})
require.NoError(t, err)

Expand Down Expand Up @@ -757,7 +758,7 @@ func TestProcessFreshState(t *testing.T) {
name: "NoChange",
initialAgents: initialAgents,
update: &tailnet.WorkspaceUpdate{
FreshState: true,
Kind: tailnet.Snapshot,
UpsertedWorkspaces: []*tailnet.Workspace{&ws1, &ws2},
UpsertedAgents: []*tailnet.Agent{&agent1, &agent2, &agent4},
DeletedWorkspaces: []*tailnet.Workspace{},
Expand All @@ -772,7 +773,7 @@ func TestProcessFreshState(t *testing.T) {
name: "AgentAdded", // Agent 3 added in update
initialAgents: initialAgents,
update: &tailnet.WorkspaceUpdate{
FreshState: true,
Kind: tailnet.Snapshot,
UpsertedWorkspaces: []*tailnet.Workspace{&ws1, &ws2, &ws3},
UpsertedAgents: []*tailnet.Agent{&agent1, &agent2, &agent3, &agent4},
DeletedWorkspaces: []*tailnet.Workspace{},
Expand All @@ -787,7 +788,7 @@ func TestProcessFreshState(t *testing.T) {
name: "AgentRemovedWorkspaceAlsoRemoved", // Agent 2 removed, ws2 also removed
initialAgents: initialAgents,
update: &tailnet.WorkspaceUpdate{
FreshState: true,
Kind: tailnet.Snapshot,
UpsertedWorkspaces: []*tailnet.Workspace{&ws1}, // ws2 not present
UpsertedAgents: []*tailnet.Agent{&agent1, &agent4}, // agent2 not present
DeletedWorkspaces: []*tailnet.Workspace{},
Expand All @@ -804,7 +805,7 @@ func TestProcessFreshState(t *testing.T) {
name: "AgentRemovedWorkspaceStays", // Agent 4 removed, but ws1 stays (due to agent1)
initialAgents: initialAgents,
update: &tailnet.WorkspaceUpdate{
FreshState: true,
Kind: tailnet.Snapshot,
UpsertedWorkspaces: []*tailnet.Workspace{&ws1, &ws2}, // ws1 still present
UpsertedAgents: []*tailnet.Agent{&agent1, &agent2}, // agent4 not present
DeletedWorkspaces: []*tailnet.Workspace{},
Expand All @@ -821,7 +822,7 @@ func TestProcessFreshState(t *testing.T) {
name: "InitialAgentsEmpty",
initialAgents: map[uuid.UUID]tailnet.Agent{}, // Start with no agents known
update: &tailnet.WorkspaceUpdate{
FreshState: true,
Kind: tailnet.Snapshot,
UpsertedWorkspaces: []*tailnet.Workspace{&ws1, &ws2},
UpsertedAgents: []*tailnet.Agent{&agent1, &agent2},
DeletedWorkspaces: []*tailnet.Workspace{},
Expand All @@ -836,7 +837,7 @@ func TestProcessFreshState(t *testing.T) {
name: "UpdateEmpty", // Fresh state says nothing exists
initialAgents: initialAgents,
update: &tailnet.WorkspaceUpdate{
FreshState: true,
Kind: tailnet.Snapshot,
UpsertedWorkspaces: []*tailnet.Workspace{},
UpsertedAgents: []*tailnet.Agent{},
DeletedWorkspaces: []*tailnet.Workspace{},
Expand All @@ -861,7 +862,7 @@ func TestProcessFreshState(t *testing.T) {
agentsCopy := make(map[uuid.UUID]tailnet.Agent)
maps.Copy(agentsCopy, tt.initialAgents)

processFreshState(tt.update, agentsCopy)
processSnapshotUpdate(tt.update, agentsCopy)

require.ElementsMatch(t, tt.expectedDelete.DeletedAgents, tt.update.DeletedAgents, "DeletedAgents mismatch")
require.ElementsMatch(t, tt.expectedDelete.DeletedWorkspaces, tt.update.DeletedWorkspaces, "DeletedWorkspaces mismatch")
Expand Down
Loading