Skip to content

Commit 6826b97

Browse files
authored
fix: Add latency-check for DERP over HTTP(s) (#3788)
* fix: Add latency-check for DERP over HTTP(s) This fixes scenarios where latency wasn't being reported if a connection had UDP entirely blocked. * Add inactivity ping * Improve coordinator error reporting consistency
1 parent f4c8bfd commit 6826b97

File tree

8 files changed

+49
-32
lines changed

8 files changed

+49
-32
lines changed

coderd/coderd.go

+7-1
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,13 @@ func New(options *Options) *API {
165165
// other applications might not as well.
166166
r.Route("/%40{user}/{workspace_and_agent}/apps/{workspaceapp}", apps)
167167
r.Route("/@{user}/{workspace_and_agent}/apps/{workspaceapp}", apps)
168-
r.Get("/derp", derphttp.Handler(api.derpServer).ServeHTTP)
168+
r.Route("/derp", func(r chi.Router) {
169+
r.Get("/", derphttp.Handler(api.derpServer).ServeHTTP)
170+
// This is used when UDP is blocked, and latency must be checked via HTTP(s).
171+
r.Get("/latency-check", func(w http.ResponseWriter, r *http.Request) {
172+
w.WriteHeader(http.StatusOK)
173+
})
174+
})
169175

170176
r.Route("/api/v2", func(r chi.Router) {
171177
r.NotFound(func(rw http.ResponseWriter, r *http.Request) {

coderd/coderd_test.go

+10
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package coderd_test
22

33
import (
44
"context"
5+
"net/http"
56
"net/netip"
67
"strconv"
78
"testing"
@@ -114,3 +115,12 @@ func TestDERP(t *testing.T) {
114115
w1.Close()
115116
w2.Close()
116117
}
118+
119+
func TestDERPLatencyCheck(t *testing.T) {
120+
t.Parallel()
121+
client := coderdtest.New(t, nil)
122+
res, err := client.Request(context.Background(), http.MethodGet, "/derp/latency-check", nil)
123+
require.NoError(t, err)
124+
defer res.Body.Close()
125+
require.Equal(t, http.StatusOK, res.StatusCode)
126+
}

coderd/coderdtest/authtest.go

+1
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,7 @@ func AGPLRoutes(a *AuthTester) (map[string]string, map[string]RouteCheck) {
168168
skipRoutes := map[string]string{
169169
"POST:/api/v2/users/logout": "Logging out deletes the API Key for other routes",
170170
"GET:/derp": "This requires a WebSocket upgrade!",
171+
"GET:/derp/latency-check": "This always returns a 200!",
171172
}
172173

173174
assertRoute := map[string]RouteCheck{

go.mod

+1-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ replace github.com/tcnksm/go-httpstat => github.com/kylecarbs/go-httpstat v0.0.0
4949

5050
// There are a few minor changes we make to Tailscale that we're slowly upstreaming. Compare here:
5151
// https://github.com/tailscale/tailscale/compare/main...coder:tailscale:main
52-
replace tailscale.com => github.com/coder/tailscale v1.1.1-0.20220831012541-a77bda274fd6
52+
replace tailscale.com => github.com/coder/tailscale v1.1.1-0.20220901032724-2ed2977662a4
5353

5454
require (
5555
cdr.dev/slog v1.4.2-0.20220525200111-18dce5c2cd5f

go.sum

+2-2
Original file line numberDiff line numberDiff line change
@@ -352,8 +352,8 @@ github.com/coder/glog v1.0.1-0.20220322161911-7365fe7f2cd1 h1:UqBrPWSYvRI2s5RtOu
352352
github.com/coder/glog v1.0.1-0.20220322161911-7365fe7f2cd1/go.mod h1:EWib/APOK0SL3dFbYqvxE3UYd8E6s1ouQ7iEp/0LWV4=
353353
github.com/coder/retry v1.3.0 h1:5lAAwt/2Cm6lVmnfBY7sOMXcBOwcwJhmV5QGSELIVWY=
354354
github.com/coder/retry v1.3.0/go.mod h1:tXuRgZgWjUnU5LZPT4lJh4ew2elUhexhlnXzrJWdyFY=
355-
github.com/coder/tailscale v1.1.1-0.20220831012541-a77bda274fd6 h1://ApBDDh58hFwMe0AzlgqJrGhzu6Rjk8fQXrR+mbhYE=
356-
github.com/coder/tailscale v1.1.1-0.20220831012541-a77bda274fd6/go.mod h1:MO+tWkQp2YIF3KBnnej/mQvgYccRS5Xk/IrEpZ4Z3BU=
355+
github.com/coder/tailscale v1.1.1-0.20220901032724-2ed2977662a4 h1:TXtMXPt4ds1pM9QrLsfiDEJv7KE8q0yRQGCkSepeKZ8=
356+
github.com/coder/tailscale v1.1.1-0.20220901032724-2ed2977662a4/go.mod h1:MO+tWkQp2YIF3KBnnej/mQvgYccRS5Xk/IrEpZ4Z3BU=
357357
github.com/coder/wireguard-go/tun/netstack v0.0.0-20220823170024-a78136eb0cab h1:9yEvRWXXfyKzXu8AqywCi+tFZAoqCy4wVcsXwuvZNMc=
358358
github.com/coder/wireguard-go/tun/netstack v0.0.0-20220823170024-a78136eb0cab/go.mod h1:TCJ66NtXh3urJotTdoYQOHHkyE899vOQl5TuF+WLSes=
359359
github.com/containerd/aufs v0.0.0-20200908144142-dab0cbea06f4/go.mod h1:nukgQABAEopAHvB6j7cnP5zJ+/3aVcE7hCYqvIwAHyE=

tailnet/conn.go

+13-21
Original file line numberDiff line numberDiff line change
@@ -161,21 +161,6 @@ func NewConn(options *Options) (*Conn, error) {
161161
return nil, xerrors.Errorf("start netstack: %w", err)
162162
}
163163
wireguardEngine = wgengine.NewWatchdog(wireguardEngine)
164-
165-
// Update the wireguard configuration to allow traffic to flow.
166-
wireguardConfig, err := nmcfg.WGCfg(netMap, Logger(options.Logger.Named("wgconfig")), netmap.AllowSingleHosts, "")
167-
if err != nil {
168-
return nil, xerrors.Errorf("create wgcfg: %w", err)
169-
}
170-
171-
wireguardRouter := &router.Config{
172-
LocalAddrs: wireguardConfig.Addresses,
173-
}
174-
err = wireguardEngine.Reconfig(wireguardConfig, wireguardRouter, &dns.Config{}, &tailcfg.Debug{})
175-
if err != nil {
176-
return nil, xerrors.Errorf("reconfig: %w", err)
177-
}
178-
179164
wireguardEngine.SetDERPMap(options.DERPMap)
180165
netMapCopy := *netMap
181166
wireguardEngine.SetNetworkMap(&netMapCopy)
@@ -198,8 +183,10 @@ func NewConn(options *Options) (*Conn, error) {
198183
netMap: netMap,
199184
netStack: netStack,
200185
wireguardMonitor: wireguardMonitor,
201-
wireguardRouter: wireguardRouter,
202-
wireguardEngine: wireguardEngine,
186+
wireguardRouter: &router.Config{
187+
LocalAddrs: netMap.Addresses,
188+
},
189+
wireguardEngine: wireguardEngine,
203190
}
204191
netStack.ForwardTCPIn = server.forwardTCP
205192
return server, nil
@@ -261,7 +248,7 @@ func (c *Conn) SetNodeCallback(callback func(node *Node)) {
261248
DERPLatency: c.lastDERPLatency,
262249
}
263250
}
264-
c.magicConn.SetNetInfoCallback(func(ni *tailcfg.NetInfo) {
251+
c.wireguardEngine.SetNetInfoCallback(func(ni *tailcfg.NetInfo) {
265252
c.lastMutex.Lock()
266253
c.lastPreferredDERP = ni.PreferredDERP
267254
c.lastDERPLatency = ni.DERPLatency
@@ -309,6 +296,7 @@ func (c *Conn) UpdateNodes(nodes []*Node) error {
309296
peerMap[peer.ID] = peer
310297
}
311298
for _, node := range nodes {
299+
peerStatus, ok := status.Peer[node.Key]
312300
peerMap[node.ID] = &tailcfg.Node{
313301
ID: node.ID,
314302
Key: node.Key,
@@ -318,12 +306,18 @@ func (c *Conn) UpdateNodes(nodes []*Node) error {
318306
Endpoints: node.Endpoints,
319307
DERP: fmt.Sprintf("%s:%d", tailcfg.DerpMagicIP, node.PreferredDERP),
320308
Hostinfo: hostinfo.New().View(),
309+
// Starting KeepAlive messages at the initialization
310+
// of a connection cause it to hang for an unknown
311+
// reason. TODO: @kylecarbs debug this!
312+
KeepAlive: ok && peerStatus.Active,
321313
}
322314
}
323315
c.netMap.Peers = make([]*tailcfg.Node, 0, len(peerMap))
324316
for _, peer := range peerMap {
325317
c.netMap.Peers = append(c.netMap.Peers, peer)
326318
}
319+
netMapCopy := *c.netMap
320+
c.wireguardEngine.SetNetworkMap(&netMapCopy)
327321
cfg, err := nmcfg.WGCfg(c.netMap, Logger(c.logger.Named("wgconfig")), netmap.AllowSingleHosts, "")
328322
if err != nil {
329323
return xerrors.Errorf("update wireguard config: %w", err)
@@ -332,15 +326,13 @@ func (c *Conn) UpdateNodes(nodes []*Node) error {
332326
if err != nil {
333327
return xerrors.Errorf("reconfig: %w", err)
334328
}
335-
netMapCopy := *c.netMap
336-
c.wireguardEngine.SetNetworkMap(&netMapCopy)
337329
return nil
338330
}
339331

340332
// Status returns the current ipnstate of a connection.
341333
func (c *Conn) Status() *ipnstate.Status {
342334
sb := &ipnstate.StatusBuilder{}
343-
c.magicConn.UpdateStatus(sb)
335+
c.wireguardEngine.UpdateStatus(sb)
344336
return sb.Status()
345337
}
346338

tailnet/conn_test.go

+4-2
Original file line numberDiff line numberDiff line change
@@ -55,10 +55,12 @@ func TestTailnet(t *testing.T) {
5555
_ = w2.Close()
5656
})
5757
w1.SetNodeCallback(func(node *tailnet.Node) {
58-
w2.UpdateNodes([]*tailnet.Node{node})
58+
err := w2.UpdateNodes([]*tailnet.Node{node})
59+
require.NoError(t, err)
5960
})
6061
w2.SetNodeCallback(func(node *tailnet.Node) {
61-
w1.UpdateNodes([]*tailnet.Node{node})
62+
err := w1.UpdateNodes([]*tailnet.Node{node})
63+
require.NoError(t, err)
6264
})
6365

6466
conn := make(chan struct{})

tailnet/coordinator.go

+11-5
Original file line numberDiff line numberDiff line change
@@ -28,32 +28,38 @@ type Node struct {
2828

2929
// ServeCoordinator matches the RW structure of a coordinator to exchange node messages.
3030
func ServeCoordinator(conn net.Conn, updateNodes func(node []*Node) error) (func(node *Node), <-chan error) {
31-
errChan := make(chan error, 3)
31+
errChan := make(chan error, 1)
32+
sendErr := func(err error) {
33+
select {
34+
case errChan <- err:
35+
default:
36+
}
37+
}
3238
go func() {
3339
decoder := json.NewDecoder(conn)
3440
for {
3541
var nodes []*Node
3642
err := decoder.Decode(&nodes)
3743
if err != nil {
38-
errChan <- xerrors.Errorf("read: %w", err)
44+
sendErr(xerrors.Errorf("read: %w", err))
3945
return
4046
}
4147
err = updateNodes(nodes)
4248
if err != nil {
43-
errChan <- xerrors.Errorf("update nodes: %w", err)
49+
sendErr(xerrors.Errorf("update nodes: %w", err))
4450
}
4551
}
4652
}()
4753

4854
return func(node *Node) {
4955
data, err := json.Marshal(node)
5056
if err != nil {
51-
errChan <- xerrors.Errorf("marshal node: %w", err)
57+
sendErr(xerrors.Errorf("marshal node: %w", err))
5258
return
5359
}
5460
_, err = conn.Write(data)
5561
if err != nil {
56-
errChan <- xerrors.Errorf("write: %w", err)
62+
sendErr(xerrors.Errorf("write: %w", err))
5763
}
5864
}, errChan
5965
}

0 commit comments

Comments
 (0)