Skip to content

Commit 5388a1b

Browse files
authored
fix: use TSMP ping for reachability, not latency (#11749)
Use TSMP ping for reachability, but leave Disco ping for when we call Ping() since we often use that to determine whether we have a direct connection. Also adds unit tests to make sure Ping() returns direct connection vs DERP correctly.
1 parent 66f119b commit 5388a1b

File tree

2 files changed

+101
-3
lines changed

2 files changed

+101
-3
lines changed

tailnet/conn.go

+12-2
Original file line numberDiff line numberDiff line change
@@ -374,9 +374,13 @@ func (c *Conn) Status() *ipnstate.Status {
374374
// Ping sends a ping to the Wireguard engine.
375375
// The bool returned is true if the ping was performed P2P.
376376
func (c *Conn) Ping(ctx context.Context, ip netip.Addr) (time.Duration, bool, *ipnstate.PingResult, error) {
377+
return c.pingWithType(ctx, ip, tailcfg.PingDisco)
378+
}
379+
380+
func (c *Conn) pingWithType(ctx context.Context, ip netip.Addr, pt tailcfg.PingType) (time.Duration, bool, *ipnstate.PingResult, error) {
377381
errCh := make(chan error, 1)
378382
prChan := make(chan *ipnstate.PingResult, 1)
379-
go c.wireguardEngine.Ping(ip, tailcfg.PingDisco, func(pr *ipnstate.PingResult) {
383+
go c.wireguardEngine.Ping(ip, pt, func(pr *ipnstate.PingResult) {
380384
if pr.Err != "" {
381385
errCh <- xerrors.New(pr.Err)
382386
return
@@ -418,7 +422,13 @@ func (c *Conn) AwaitReachable(ctx context.Context, ip netip.Addr) bool {
418422
ctx, cancel := context.WithTimeout(ctx, 5*time.Minute)
419423
defer cancel()
420424

421-
_, _, _, err := c.Ping(ctx, ip)
425+
// For reachability, we use TSMP ping, which pings at the IP layer, and
426+
// therefore requires that wireguard and the netstack are up. If we
427+
// don't wait for wireguard to be up, we could miss a handshake, and it
428+
// might take 5 seconds for the handshake to be retried. A 5s initial
429+
// round trip can set us up for poor TCP performance, since the initial
430+
// round-trip-time sets the initial retransmit timeout.
431+
_, _, _, err := c.pingWithType(ctx, ip, tailcfg.PingTSMP)
422432
if err == nil {
423433
completed()
424434
}

tailnet/conn_test.go

+89-1
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ func TestTailnet(t *testing.T) {
8888
}
8989
})
9090
node := testutil.RequireRecvCtx(ctx, t, nodes)
91-
// Ensure this connected over DERP!
91+
// Ensure this connected over raw (not websocket) DERP!
9292
require.Len(t, node.DERPForcedWebsocket, 0)
9393

9494
w1.Close()
@@ -157,6 +157,94 @@ func TestTailnet(t *testing.T) {
157157
w1.Close()
158158
w2.Close()
159159
})
160+
161+
t.Run("PingDirect", func(t *testing.T) {
162+
t.Parallel()
163+
logger := slogtest.Make(t, nil).Leveled(slog.LevelDebug)
164+
ctx := testutil.Context(t, testutil.WaitLong)
165+
w1IP := tailnet.IP()
166+
w1, err := tailnet.NewConn(&tailnet.Options{
167+
Addresses: []netip.Prefix{netip.PrefixFrom(w1IP, 128)},
168+
Logger: logger.Named("w1"),
169+
DERPMap: derpMap,
170+
})
171+
require.NoError(t, err)
172+
173+
w2, err := tailnet.NewConn(&tailnet.Options{
174+
Addresses: []netip.Prefix{netip.PrefixFrom(tailnet.IP(), 128)},
175+
Logger: logger.Named("w2"),
176+
DERPMap: derpMap,
177+
})
178+
require.NoError(t, err)
179+
t.Cleanup(func() {
180+
_ = w1.Close()
181+
_ = w2.Close()
182+
})
183+
stitch(t, w2, w1)
184+
stitch(t, w1, w2)
185+
require.True(t, w2.AwaitReachable(context.Background(), w1IP))
186+
187+
require.Eventually(t, func() bool {
188+
_, direct, pong, err := w2.Ping(ctx, w1IP)
189+
if err != nil {
190+
t.Logf("ping error: %s", err.Error())
191+
return false
192+
}
193+
if !direct {
194+
t.Logf("got pong: %+v", pong)
195+
return false
196+
}
197+
return true
198+
}, testutil.WaitShort, testutil.IntervalFast)
199+
200+
w1.Close()
201+
w2.Close()
202+
})
203+
204+
t.Run("PingDERPOnly", func(t *testing.T) {
205+
t.Parallel()
206+
logger := slogtest.Make(t, nil).Leveled(slog.LevelDebug)
207+
ctx := testutil.Context(t, testutil.WaitLong)
208+
w1IP := tailnet.IP()
209+
w1, err := tailnet.NewConn(&tailnet.Options{
210+
Addresses: []netip.Prefix{netip.PrefixFrom(w1IP, 128)},
211+
Logger: logger.Named("w1"),
212+
DERPMap: derpMap,
213+
BlockEndpoints: true,
214+
})
215+
require.NoError(t, err)
216+
217+
w2, err := tailnet.NewConn(&tailnet.Options{
218+
Addresses: []netip.Prefix{netip.PrefixFrom(tailnet.IP(), 128)},
219+
Logger: logger.Named("w2"),
220+
DERPMap: derpMap,
221+
BlockEndpoints: true,
222+
})
223+
require.NoError(t, err)
224+
t.Cleanup(func() {
225+
_ = w1.Close()
226+
_ = w2.Close()
227+
})
228+
stitch(t, w2, w1)
229+
stitch(t, w1, w2)
230+
require.True(t, w2.AwaitReachable(context.Background(), w1IP))
231+
232+
require.Eventually(t, func() bool {
233+
_, direct, pong, err := w2.Ping(ctx, w1IP)
234+
if err != nil {
235+
t.Logf("ping error: %s", err.Error())
236+
return false
237+
}
238+
if direct || pong.DERPRegionID != derpMap.RegionIDs()[0] {
239+
t.Logf("got pong: %+v", pong)
240+
return false
241+
}
242+
return true
243+
}, testutil.WaitShort, testutil.IntervalFast)
244+
245+
w1.Close()
246+
w2.Close()
247+
})
160248
}
161249

162250
// TestConn_PreferredDERP tests that we only trigger the NodeCallback when we have a preferred DERP server.

0 commit comments

Comments
 (0)