Skip to content

Commit 46052b4

Browse files
committed
test(tailnet): Add TestTransmitHang test with packet capture and logging
This commit adds a test that occasionally triggers the issue in #7262. When triggered, the test takes e.g. ~30s to complete instead of 2s. The test also logs both from both `tailnet.Conn`s into a log file and produces pcap files from each side.
1 parent b4d913e commit 46052b4

File tree

2 files changed

+134
-0
lines changed

2 files changed

+134
-0
lines changed

tailnet/conn.go

+29
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ import (
3333
"tailscale.com/types/netlogtype"
3434
"tailscale.com/types/netmap"
3535
"tailscale.com/wgengine"
36+
"tailscale.com/wgengine/capture"
3637
"tailscale.com/wgengine/filter"
3738
"tailscale.com/wgengine/magicsock"
3839
"tailscale.com/wgengine/monitor"
@@ -308,6 +309,8 @@ type Conn struct {
308309
closed chan struct{}
309310
logger slog.Logger
310311
blockEndpoints bool
312+
capture *capture.Sink
313+
captureCount int
311314

312315
dialer *tsdial.Dialer
313316
tunDevice *tstun.Wrapper
@@ -582,6 +585,10 @@ func (c *Conn) Close() error {
582585
default:
583586
}
584587
close(c.closed)
588+
if c.capture != nil {
589+
c.capture.Close()
590+
c.capture = nil
591+
}
585592
c.mutex.Unlock()
586593

587594
var wg sync.WaitGroup
@@ -840,6 +847,28 @@ func (c *Conn) MagicsockServeHTTPDebug(w http.ResponseWriter, r *http.Request) {
840847
c.magicConn.ServeHTTPDebug(w, r)
841848
}
842849

850+
func (c *Conn) Capture(w io.Writer) (unregister func()) {
851+
c.mutex.Lock()
852+
defer c.mutex.Unlock()
853+
854+
if c.capture == nil {
855+
c.capture = capture.New()
856+
c.wireguardEngine.InstallCaptureHook(c.capture.LogPacket)
857+
}
858+
unreg := c.capture.RegisterOutput(w)
859+
return func() {
860+
c.mutex.Lock()
861+
defer c.mutex.Unlock()
862+
unreg()
863+
c.captureCount--
864+
if c.captureCount == 0 {
865+
c.wireguardEngine.InstallCaptureHook(nil)
866+
_ = c.capture.Close()
867+
c.capture = nil
868+
}
869+
}
870+
}
871+
843872
type listenKey struct {
844873
network string
845874
host string

tailnet/conn_test.go

+105
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,19 @@ package tailnet_test
22

33
import (
44
"context"
5+
"io"
56
"net/netip"
7+
"os"
8+
"path/filepath"
9+
"strings"
610
"testing"
711

812
"github.com/stretchr/testify/assert"
913
"github.com/stretchr/testify/require"
1014
"go.uber.org/goleak"
1115

1216
"cdr.dev/slog"
17+
"cdr.dev/slog/sloggers/sloghuman"
1318
"cdr.dev/slog/sloggers/slogtest"
1419
"github.com/coder/coder/tailnet"
1520
"github.com/coder/coder/tailnet/tailnettest"
@@ -195,3 +200,103 @@ func TestConn_PreferredDERP(t *testing.T) {
195200
t.Fatal("timed out waiting for node")
196201
}
197202
}
203+
204+
func TestTransmitHang(t *testing.T) {
205+
t.Parallel()
206+
207+
// Not using t.TempDir() here so that we keep logs afterwards.
208+
captureDir, err := os.MkdirTemp("", "tailnet-test-")
209+
require.NoError(t, err)
210+
211+
testLog, err := os.Create(filepath.Join(captureDir, "test.log"))
212+
require.NoError(t, err)
213+
defer testLog.Close()
214+
recvCapture, err := os.Create(filepath.Join(captureDir, "recv.pcap"))
215+
require.NoError(t, err)
216+
defer recvCapture.Close()
217+
sendCapture, err := os.Create(filepath.Join(captureDir, "send.pcap"))
218+
require.NoError(t, err)
219+
defer sendCapture.Close()
220+
221+
logger := slogtest.Make(t, nil).
222+
Leveled(slog.LevelDebug).
223+
AppendSinks(sloghuman.Sink(testLog))
224+
225+
t.Logf("test log file: %v", testLog.Name())
226+
t.Logf("recv capture file: %v", recvCapture.Name())
227+
t.Logf("send capture file: %v", sendCapture.Name())
228+
229+
derpMap := tailnettest.RunDERPAndSTUN(t)
230+
updateNodes := func(c *tailnet.Conn) func(*tailnet.Node) {
231+
return func(node *tailnet.Node) {
232+
err := c.UpdateNodes([]*tailnet.Node{node}, false)
233+
assert.NoError(t, err)
234+
}
235+
}
236+
237+
recvIP := tailnet.IP()
238+
recv, err := tailnet.NewConn(&tailnet.Options{
239+
Addresses: []netip.Prefix{netip.PrefixFrom(recvIP, 128)},
240+
Logger: logger.Named("recv"),
241+
DERPMap: derpMap,
242+
})
243+
require.NoError(t, err)
244+
defer recv.Close()
245+
recvCaptureStop := recv.Capture(recvCapture)
246+
defer recvCaptureStop()
247+
248+
send, err := tailnet.NewConn(&tailnet.Options{
249+
Addresses: []netip.Prefix{netip.PrefixFrom(tailnet.IP(), 128)},
250+
Logger: logger.Named("send"),
251+
DERPMap: derpMap,
252+
})
253+
require.NoError(t, err)
254+
defer send.Close()
255+
sendCaptureStop := send.Capture(sendCapture)
256+
defer sendCaptureStop()
257+
258+
recv.SetNodeCallback(updateNodes(send))
259+
send.SetNodeCallback(updateNodes(recv))
260+
261+
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
262+
defer cancel()
263+
264+
require.True(t, send.AwaitReachable(ctx, recvIP))
265+
266+
copyDone := make(chan struct{})
267+
go func() {
268+
defer close(copyDone)
269+
270+
ln, err := recv.Listen("tcp", ":35565")
271+
if !assert.NoError(t, err) {
272+
return
273+
}
274+
defer ln.Close()
275+
276+
r, err := ln.Accept()
277+
if !assert.NoError(t, err) {
278+
return
279+
}
280+
defer r.Close()
281+
282+
_, err = io.Copy(io.Discard, r)
283+
assert.NoError(t, err)
284+
}()
285+
286+
w, err := send.DialContextTCP(ctx, netip.AddrPortFrom(recvIP, 35565))
287+
require.NoError(t, err)
288+
289+
payload := []byte(strings.Repeat("hello world\n", 65536/12))
290+
size := 0
291+
for i := 0; i < 1024*2; i++ {
292+
logger.Debug(ctx, "write payload", slog.F("num", i), slog.F("transmitted_kb", size/1024))
293+
n, err := w.Write(payload)
294+
require.NoError(t, err)
295+
size += n
296+
}
297+
298+
err = w.Close()
299+
require.NoError(t, err)
300+
301+
<-copyDone
302+
}

0 commit comments

Comments
 (0)