Skip to content

Commit 269bc1e

Browse files
committed
fix(wsconncache): only allow one peer per connection
If an agent went away and reconnected, the wsconncache connection would be polluted for about 10m because there would be two peers with the same IP. The old peer always had priority, which caused the dashboard to try and always dial the old peer until it was removed. Fixes: #5292
1 parent b0a1615 commit 269bc1e

File tree

2 files changed

+47
-6
lines changed

2 files changed

+47
-6
lines changed

coderd/workspaceagents.go

+22-6
Original file line numberDiff line numberDiff line change
@@ -278,18 +278,21 @@ func (api *API) workspaceAgentPTY(rw http.ResponseWriter, r *http.Request) {
278278
_, wsNetConn := websocketNetConn(ctx, conn, websocket.MessageBinary)
279279
defer wsNetConn.Close() // Also closes conn.
280280

281+
fmt.Println("getting conn from cache")
281282
agentConn, release, err := api.workspaceAgentCache.Acquire(r, workspaceAgent.ID)
282283
if err != nil {
283284
_ = conn.Close(websocket.StatusInternalError, httpapi.WebsocketCloseSprintf("dial workspace agent: %s", err))
284285
return
285286
}
286287
defer release()
288+
fmt.Println("got conn from cache")
287289
ptNetConn, err := agentConn.ReconnectingPTY(ctx, reconnect, uint16(height), uint16(width), r.URL.Query().Get("command"))
288290
if err != nil {
289291
_ = conn.Close(websocket.StatusInternalError, httpapi.WebsocketCloseSprintf("dial: %s", err))
290292
return
291293
}
292294
defer ptNetConn.Close()
295+
fmt.Println("dialed reconnecting pty, doing bicopy")
293296
agent.Bicopy(ctx, wsNetConn, ptNetConn)
294297
}
295298

@@ -402,11 +405,11 @@ func (api *API) workspaceAgentListeningPorts(rw http.ResponseWriter, r *http.Req
402405

403406
func (api *API) dialWorkspaceAgentTailnet(r *http.Request, agentID uuid.UUID) (*codersdk.AgentConn, error) {
404407
clientConn, serverConn := net.Pipe()
405-
go func() {
406-
<-r.Context().Done()
407-
_ = clientConn.Close()
408-
_ = serverConn.Close()
409-
}()
408+
// go func() {
409+
// <-r.Context().Done()
410+
// _ = clientConn.Close()
411+
// _ = serverConn.Close()
412+
// }()
410413

411414
derpMap := api.DERPMap.Clone()
412415
for _, region := range derpMap.Regions {
@@ -453,7 +456,16 @@ func (api *API) dialWorkspaceAgentTailnet(r *http.Request, agentID uuid.UUID) (*
453456
}
454457

455458
sendNodes, _ := tailnet.ServeCoordinator(clientConn, func(node []*tailnet.Node) error {
456-
return conn.UpdateNodes(node)
459+
err := conn.RemoveAllPeers()
460+
if err != nil {
461+
return xerrors.Errorf("remove all peers: %w", err)
462+
}
463+
464+
err = conn.UpdateNodes(node)
465+
if err != nil {
466+
return xerrors.Errorf("update nodes: %w", err)
467+
}
468+
return nil
457469
})
458470
conn.SetNodeCallback(sendNodes)
459471
go func() {
@@ -465,6 +477,10 @@ func (api *API) dialWorkspaceAgentTailnet(r *http.Request, agentID uuid.UUID) (*
465477
}()
466478
return &codersdk.AgentConn{
467479
Conn: conn,
480+
CloseFunc: func() {
481+
_ = clientConn.Close()
482+
_ = serverConn.Close()
483+
},
468484
}, nil
469485
}
470486

tailnet/conn.go

+25
Original file line numberDiff line numberDiff line change
@@ -325,6 +325,31 @@ func (c *Conn) SetDERPMap(derpMap *tailcfg.DERPMap) {
325325
c.wireguardEngine.SetDERPMap(derpMap)
326326
}
327327

328+
func (c *Conn) RemoveAllPeers() error {
329+
c.mutex.Lock()
330+
defer c.mutex.Unlock()
331+
332+
c.netMap.Peers = []*tailcfg.Node{}
333+
c.peerMap = map[tailcfg.NodeID]*tailcfg.Node{}
334+
netMapCopy := *c.netMap
335+
c.wireguardEngine.SetNetworkMap(&netMapCopy)
336+
cfg, err := nmcfg.WGCfg(c.netMap, Logger(c.logger.Named("wgconfig")), netmap.AllowSingleHosts, "")
337+
if err != nil {
338+
return xerrors.Errorf("update wireguard config: %w", err)
339+
}
340+
err = c.wireguardEngine.Reconfig(cfg, c.wireguardRouter, &dns.Config{}, &tailcfg.Debug{})
341+
if err != nil {
342+
if c.isClosed() {
343+
return nil
344+
}
345+
if errors.Is(err, wgengine.ErrNoChanges) {
346+
return nil
347+
}
348+
return xerrors.Errorf("reconfig: %w", err)
349+
}
350+
return nil
351+
}
352+
328353
// UpdateNodes connects with a set of peers. This can be constantly updated,
329354
// and peers will continually be reconnected as necessary.
330355
func (c *Conn) UpdateNodes(nodes []*Node) error {

0 commit comments

Comments
 (0)