From b87fd812f645ad299fbfb3b99ea67d05317c9926 Mon Sep 17 00:00:00 2001 From: Colin Adler Date: Thu, 26 Jan 2023 16:05:25 -0600 Subject: [PATCH] fix(wsconncache): only allow one peer per connection If an agent went away and reconnected, the wsconncache connection would be polluted for about 10m because there would be two peers with the same IP. The old peer always had priority, which caused the dashboard to try and always dial the old peer until it was removed. Fixes: https://github.com/coder/coder/issues/5292 --- coderd/workspaceagents.go | 20 ++++++++++++++------ tailnet/conn.go | 25 +++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 6 deletions(-) diff --git a/coderd/workspaceagents.go b/coderd/workspaceagents.go index ca1a7ab58b69d..32dc30b313781 100644 --- a/coderd/workspaceagents.go +++ b/coderd/workspaceagents.go @@ -402,11 +402,6 @@ func (api *API) workspaceAgentListeningPorts(rw http.ResponseWriter, r *http.Req func (api *API) dialWorkspaceAgentTailnet(r *http.Request, agentID uuid.UUID) (*codersdk.AgentConn, error) { clientConn, serverConn := net.Pipe() - go func() { - <-r.Context().Done() - _ = clientConn.Close() - _ = serverConn.Close() - }() derpMap := api.DERPMap.Clone() for _, region := range derpMap.Regions { @@ -453,7 +448,16 @@ func (api *API) dialWorkspaceAgentTailnet(r *http.Request, agentID uuid.UUID) (* } sendNodes, _ := tailnet.ServeCoordinator(clientConn, func(node []*tailnet.Node) error { - return conn.UpdateNodes(node) + err := conn.RemoveAllPeers() + if err != nil { + return xerrors.Errorf("remove all peers: %w", err) + } + + err = conn.UpdateNodes(node) + if err != nil { + return xerrors.Errorf("update nodes: %w", err) + } + return nil }) conn.SetNodeCallback(sendNodes) go func() { @@ -465,6 +469,10 @@ func (api *API) dialWorkspaceAgentTailnet(r *http.Request, agentID uuid.UUID) (* }() return &codersdk.AgentConn{ Conn: conn, + CloseFunc: func() { + _ = clientConn.Close() + _ = serverConn.Close() + }, }, nil } diff --git a/tailnet/conn.go b/tailnet/conn.go index a23ef3f5a184d..4f2315f4e6dfe 100644 --- a/tailnet/conn.go +++ b/tailnet/conn.go @@ -325,6 +325,31 @@ func (c *Conn) SetDERPMap(derpMap *tailcfg.DERPMap) { c.wireguardEngine.SetDERPMap(derpMap) } +func (c *Conn) RemoveAllPeers() error { + c.mutex.Lock() + defer c.mutex.Unlock() + + c.netMap.Peers = []*tailcfg.Node{} + c.peerMap = map[tailcfg.NodeID]*tailcfg.Node{} + netMapCopy := *c.netMap + c.wireguardEngine.SetNetworkMap(&netMapCopy) + cfg, err := nmcfg.WGCfg(c.netMap, Logger(c.logger.Named("wgconfig")), netmap.AllowSingleHosts, "") + if err != nil { + return xerrors.Errorf("update wireguard config: %w", err) + } + err = c.wireguardEngine.Reconfig(cfg, c.wireguardRouter, &dns.Config{}, &tailcfg.Debug{}) + if err != nil { + if c.isClosed() { + return nil + } + if errors.Is(err, wgengine.ErrNoChanges) { + return nil + } + return xerrors.Errorf("reconfig: %w", err) + } + return nil +} + // UpdateNodes connects with a set of peers. This can be constantly updated, // and peers will continually be reconnected as necessary. func (c *Conn) UpdateNodes(nodes []*Node) error {