-
Notifications
You must be signed in to change notification settings - Fork 874
feat: improve coder connect tunnel handling on reconnect #17598
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
52f1c2b
288c33e
0de5df2
2b77b8e
3d39713
ca2e1bf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1611,13 +1611,15 @@ func TestTunnelAllWorkspaceUpdatesController_Initial(t *testing.T) { | |
}, | ||
DeletedWorkspaces: []*tailnet.Workspace{}, | ||
DeletedAgents: []*tailnet.Agent{}, | ||
FreshState: true, | ||
} | ||
|
||
// And the callback | ||
cbUpdate := testutil.TryReceive(ctx, t, fUH.ch) | ||
require.Equal(t, currentState, cbUpdate) | ||
|
||
// Current recvState should match | ||
// Current recvState should match but shouldn't be a fresh state | ||
currentState.FreshState = false | ||
recvState, err := updateCtrl.CurrentState() | ||
require.NoError(t, err) | ||
slices.SortFunc(recvState.UpsertedWorkspaces, func(a, b *tailnet.Workspace) int { | ||
|
@@ -1692,12 +1694,14 @@ func TestTunnelAllWorkspaceUpdatesController_DeleteAgent(t *testing.T) { | |
}, | ||
DeletedWorkspaces: []*tailnet.Workspace{}, | ||
DeletedAgents: []*tailnet.Agent{}, | ||
FreshState: true, | ||
} | ||
|
||
cbUpdate := testutil.TryReceive(ctx, t, fUH.ch) | ||
require.Equal(t, initRecvUp, cbUpdate) | ||
|
||
// Current state should match initial | ||
// Current state should match initial but shouldn't be a fresh state | ||
initRecvUp.FreshState = false | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This seems wrong to me. When we ask for the current state, we are getting a complete snapshot, not a diff, so it should be "fresh" in your terminology. |
||
state, err := updateCtrl.CurrentState() | ||
require.NoError(t, err) | ||
require.Equal(t, initRecvUp, state) | ||
|
@@ -1753,6 +1757,7 @@ func TestTunnelAllWorkspaceUpdatesController_DeleteAgent(t *testing.T) { | |
"w1.coder.": {ws1a1IP}, | ||
}}, | ||
}, | ||
FreshState: false, | ||
} | ||
require.Equal(t, sndRecvUpdate, cbUpdate) | ||
|
||
|
@@ -1771,6 +1776,7 @@ func TestTunnelAllWorkspaceUpdatesController_DeleteAgent(t *testing.T) { | |
}, | ||
DeletedWorkspaces: []*tailnet.Workspace{}, | ||
DeletedAgents: []*tailnet.Agent{}, | ||
FreshState: false, | ||
}, state) | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -397,14 +397,26 @@ func (u *updater) sendUpdateResponse(req *request[*TunnelMessage, *ManagerMessag | |
// createPeerUpdateLocked creates a PeerUpdate message from a workspace update, populating | ||
// the network status of the agents. | ||
func (u *updater) createPeerUpdateLocked(update tailnet.WorkspaceUpdate) *PeerUpdate { | ||
// this flag is true on the first update after a reconnect | ||
if update.FreshState { | ||
processFreshState(&update, u.agents) | ||
} | ||
|
||
out := &PeerUpdate{ | ||
UpsertedWorkspaces: make([]*Workspace, len(update.UpsertedWorkspaces)), | ||
UpsertedAgents: make([]*Agent, len(update.UpsertedAgents)), | ||
DeletedWorkspaces: make([]*Workspace, len(update.DeletedWorkspaces)), | ||
DeletedAgents: make([]*Agent, len(update.DeletedAgents)), | ||
} | ||
|
||
u.saveUpdateLocked(update) | ||
// save the workspace update to the tunnel's state, such that it can | ||
// be used to populate automated peer updates. | ||
for _, agent := range update.UpsertedAgents { | ||
u.agents[agent.ID] = agent.Clone() | ||
} | ||
for _, agent := range update.DeletedAgents { | ||
delete(u.agents, agent.ID) | ||
} | ||
|
||
for i, ws := range update.UpsertedWorkspaces { | ||
out.UpsertedWorkspaces[i] = &Workspace{ | ||
|
@@ -413,6 +425,7 @@ func (u *updater) createPeerUpdateLocked(update tailnet.WorkspaceUpdate) *PeerUp | |
Status: Workspace_Status(ws.Status), | ||
} | ||
} | ||
|
||
upsertedAgents := u.convertAgentsLocked(update.UpsertedAgents) | ||
out.UpsertedAgents = upsertedAgents | ||
for i, ws := range update.DeletedWorkspaces { | ||
|
@@ -472,17 +485,6 @@ func (u *updater) convertAgentsLocked(agents []*tailnet.Agent) []*Agent { | |
return out | ||
} | ||
|
||
// saveUpdateLocked saves the workspace update to the tunnel's state, such that it can | ||
// be used to populate automated peer updates. | ||
func (u *updater) saveUpdateLocked(update tailnet.WorkspaceUpdate) { | ||
for _, agent := range update.UpsertedAgents { | ||
u.agents[agent.ID] = agent.Clone() | ||
} | ||
for _, agent := range update.DeletedAgents { | ||
delete(u.agents, agent.ID) | ||
} | ||
} | ||
|
||
// setConn sets the `conn` and returns false if there's already a connection set. | ||
func (u *updater) setConn(conn Conn) bool { | ||
u.mu.Lock() | ||
|
@@ -552,6 +554,42 @@ func (u *updater) netStatusLoop() { | |
} | ||
} | ||
|
||
// processFreshState handles the logic for when a fresh state update is received. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You should probably expand this comment to explain why this is necessary. Should mention that we only receive diffs except for the first packet on any given reconnect to the tailnet API, which means that without this we weren't processing deletes for any workspaces or agents deleted while the client was disconnected (e.g. while the computer was asleep) |
||
func processFreshState(update *tailnet.WorkspaceUpdate, agents map[uuid.UUID]tailnet.Agent) { | ||
// ignoredWorkspaces is initially populated with the workspaces that are | ||
// in the current update. Later on we populate it with the deleted workspaces too | ||
// so that we don't send duplicate updates. Same applies to ignoredAgents. | ||
ignoredWorkspaces := make(map[uuid.UUID]struct{}, len(update.UpsertedWorkspaces)) | ||
ignoredAgents := make(map[uuid.UUID]struct{}, len(update.UpsertedAgents)) | ||
|
||
for _, workspace := range update.UpsertedWorkspaces { | ||
ignoredWorkspaces[workspace.ID] = struct{}{} | ||
} | ||
for _, agent := range update.UpsertedAgents { | ||
ignoredAgents[agent.ID] = struct{}{} | ||
} | ||
for _, agent := range agents { | ||
if _, ok := ignoredAgents[agent.ID]; !ok { | ||
// delete any current agents that are not in the new update | ||
update.DeletedAgents = append(update.DeletedAgents, &tailnet.Agent{ | ||
ID: agent.ID, | ||
Name: agent.Name, | ||
WorkspaceID: agent.WorkspaceID, | ||
}) | ||
// if the workspace connected to an agent we're deleting, | ||
// is not present in the fresh state, add it to the deleted workspaces | ||
if _, ok := ignoredWorkspaces[agent.WorkspaceID]; !ok { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The assumption here seems to be that every deleted workspace is going to be associated with a deleted agent, which, I think, assumes that every workspace has at least one agent. That's definitely not true of stopped workspaces. I think it also technically doesn't have to be true of started workspaces (although in practice it is). The consequence is that if a workspace that was stopped is deleted while we are disconnected, then I don't think we'll ever generate a Delete for it on the protocol and Coder Desktop will continue to think it exists. This is a good test case to have! Basically, right now we're only storing the agents, not the workspaces. So, there is no way for us to notice that a workspace without an agent needs to be deleted. I don't think there is any way around this fact and we need to store the workspaces too in order to do the right thing. |
||
update.DeletedWorkspaces = append(update.DeletedWorkspaces, &tailnet.Workspace{ | ||
// other fields cannot be populated because the tunnel | ||
// only stores agents and corresponding workspaceIDs | ||
ID: agent.WorkspaceID, | ||
}) | ||
ignoredWorkspaces[agent.WorkspaceID] = struct{}{} | ||
} | ||
} | ||
} | ||
} | ||
|
||
// hostsToIPStrings returns a slice of all unique IP addresses in the values | ||
// of the given map. | ||
func hostsToIPStrings(hosts map[dnsname.FQDN][]netip.Addr) []string { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not sure about others, but "fresh" doesn't really capture the meaning for me. An update that is not fresh sounds like it is outdated or of dubious validity, which isn't the case. I think the most clear would be an enum:
UpdateKind: [Snapshot, Diff]