Skip to content

chore: retry TestAgent_Dial subtests #19387

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Aug 18, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 68 additions & 57 deletions agent/agent_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2668,19 +2668,19 @@ func TestAgent_Dial(t *testing.T) {

cases := []struct {
name string
setup func(t *testing.T) net.Listener
setup func(t testing.TB) net.Listener
}{
{
name: "TCP",
setup: func(t *testing.T) net.Listener {
setup: func(t testing.TB) net.Listener {
l, err := net.Listen("tcp", "127.0.0.1:0")
require.NoError(t, err, "create TCP listener")
return l
},
},
{
name: "UDP",
setup: func(t *testing.T) net.Listener {
setup: func(t testing.TB) net.Listener {
addr := net.UDPAddr{
IP: net.ParseIP("127.0.0.1"),
Port: 0,
Expand All @@ -2698,57 +2698,68 @@ func TestAgent_Dial(t *testing.T) {

// The purpose of this test is to ensure that a client can dial a
// listener in the workspace over tailnet.
l := c.setup(t)
done := make(chan struct{})
defer func() {
l.Close()
<-done
}()

ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
defer cancel()

go func() {
defer close(done)
for range 2 {
c, err := l.Accept()
if assert.NoError(t, err, "accept connection") {
testAccept(ctx, t, c)
_ = c.Close()
//
// The OS sometimes drops packets if the system can't keep up with
// them. For TCP packets, it's typically fine due to
// retransmissions, but for UDP packets, it can fail this test.
//
// The OS gets involved for the Wireguard traffic (either via DERP
// or direct UDP), and also for the traffic between the agent and
// the listener in the "workspace".
//
// To avoid this, we'll retry this test up to 3 times.
testutil.RunRetry(t, 3, func(t testing.TB) {
ctx := testutil.Context(t, testutil.WaitLong)

l := c.setup(t)
done := make(chan struct{})
defer func() {
l.Close()
<-done
}()

go func() {
defer close(done)
for range 2 {
c, err := l.Accept()
if assert.NoError(t, err, "accept connection") {
testAccept(ctx, t, c)
_ = c.Close()
}
}
}
}()
}()

agentID := uuid.UUID{0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8}
//nolint:dogsled
agentConn, _, _, _, _ := setupAgent(t, agentsdk.Manifest{
AgentID: agentID,
}, 0)
require.True(t, agentConn.AwaitReachable(ctx))
conn, err := agentConn.DialContext(ctx, l.Addr().Network(), l.Addr().String())
require.NoError(t, err)
testDial(ctx, t, conn)
err = conn.Close()
require.NoError(t, err)
agentID := uuid.UUID{0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8}
//nolint:dogsled
agentConn, _, _, _, _ := setupAgent(t, agentsdk.Manifest{
AgentID: agentID,
}, 0)
require.True(t, agentConn.AwaitReachable(ctx))
conn, err := agentConn.DialContext(ctx, l.Addr().Network(), l.Addr().String())
require.NoError(t, err)
testDial(ctx, t, conn)
err = conn.Close()
require.NoError(t, err)

// also connect via the CoderServicePrefix, to test that we can reach the agent on this
// IP. This will be required for CoderVPN.
_, rawPort, _ := net.SplitHostPort(l.Addr().String())
port, _ := strconv.ParseUint(rawPort, 10, 16)
ipp := netip.AddrPortFrom(tailnet.CoderServicePrefix.AddrFromUUID(agentID), uint16(port))

switch l.Addr().Network() {
case "tcp":
conn, err = agentConn.Conn.DialContextTCP(ctx, ipp)
case "udp":
conn, err = agentConn.Conn.DialContextUDP(ctx, ipp)
default:
t.Fatalf("unknown network: %s", l.Addr().Network())
}
require.NoError(t, err)
testDial(ctx, t, conn)
err = conn.Close()
require.NoError(t, err)
// also connect via the CoderServicePrefix, to test that we can reach the agent on this
// IP. This will be required for CoderVPN.
_, rawPort, _ := net.SplitHostPort(l.Addr().String())
port, _ := strconv.ParseUint(rawPort, 10, 16)
ipp := netip.AddrPortFrom(tailnet.CoderServicePrefix.AddrFromUUID(agentID), uint16(port))

switch l.Addr().Network() {
case "tcp":
conn, err = agentConn.Conn.DialContextTCP(ctx, ipp)
case "udp":
conn, err = agentConn.Conn.DialContextUDP(ctx, ipp)
default:
t.Fatalf("unknown network: %s", l.Addr().Network())
}
require.NoError(t, err)
testDial(ctx, t, conn)
err = conn.Close()
require.NoError(t, err)
})
})
}
}
Expand Down Expand Up @@ -3251,7 +3262,7 @@ func setupSSHSessionOnPort(
return session
}

func setupAgent(t *testing.T, metadata agentsdk.Manifest, ptyTimeout time.Duration, opts ...func(*agenttest.Client, *agent.Options)) (
func setupAgent(t testing.TB, metadata agentsdk.Manifest, ptyTimeout time.Duration, opts ...func(*agenttest.Client, *agent.Options)) (
*workspacesdk.AgentConn,
*agenttest.Client,
<-chan *proto.Stats,
Expand Down Expand Up @@ -3349,7 +3360,7 @@ func setupAgent(t *testing.T, metadata agentsdk.Manifest, ptyTimeout time.Durati

var dialTestPayload = []byte("dean-was-here123")

func testDial(ctx context.Context, t *testing.T, c net.Conn) {
func testDial(ctx context.Context, t testing.TB, c net.Conn) {
t.Helper()

if deadline, ok := ctx.Deadline(); ok {
Expand All @@ -3365,7 +3376,7 @@ func testDial(ctx context.Context, t *testing.T, c net.Conn) {
assertReadPayload(t, c, dialTestPayload)
}

func testAccept(ctx context.Context, t *testing.T, c net.Conn) {
func testAccept(ctx context.Context, t testing.TB, c net.Conn) {
t.Helper()
defer c.Close()

Expand All @@ -3382,7 +3393,7 @@ func testAccept(ctx context.Context, t *testing.T, c net.Conn) {
assertWritePayload(t, c, dialTestPayload)
}

func assertReadPayload(t *testing.T, r io.Reader, payload []byte) {
func assertReadPayload(t testing.TB, r io.Reader, payload []byte) {
t.Helper()
b := make([]byte, len(payload)+16)
n, err := r.Read(b)
Expand All @@ -3391,11 +3402,11 @@ func assertReadPayload(t *testing.T, r io.Reader, payload []byte) {
assert.Equal(t, payload, b[:n])
}

func assertWritePayload(t *testing.T, w io.Writer, payload []byte) {
func assertWritePayload(t testing.TB, w io.Writer, payload []byte) {
t.Helper()
n, err := w.Write(payload)
assert.NoError(t, err, "write payload")
assert.Equal(t, len(payload), n, "payload length does not match")
assert.Equal(t, len(payload), n, "written payload length does not match")
}

func testSessionOutput(t *testing.T, session *ssh.Session, expected, unexpected []string, expectedRe *regexp.Regexp) {
Expand Down
2 changes: 1 addition & 1 deletion tailnet/tailnettest/tailnettest.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ func DERPIsEmbedded(cfg *derpAndSTUNCfg) {
}

// RunDERPAndSTUN creates a DERP mapping for tests.
func RunDERPAndSTUN(t *testing.T, opts ...DERPAndStunOption) (*tailcfg.DERPMap, *derp.Server) {
func RunDERPAndSTUN(t testing.TB, opts ...DERPAndStunOption) (*tailcfg.DERPMap, *derp.Server) {
cfg := new(derpAndSTUNCfg)
for _, o := range opts {
o(cfg)
Expand Down
2 changes: 1 addition & 1 deletion testutil/ctx.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import (
"time"
)

func Context(t *testing.T, dur time.Duration) context.Context {
func Context(t testing.TB, dur time.Duration) context.Context {
ctx, cancel := context.WithTimeout(context.Background(), dur)
t.Cleanup(cancel)
return ctx
Expand Down
Loading
Loading