Skip to content

Commit 82f494c

Browse files
authored
fix: Improve tailnet connections by reducing timeouts (#5043)
* fix: Improve tailnet connections by reducing timeouts This awaits connection ping before running a dial. Before, we were hitting the TCP retransmission and handshake timeouts, which could intermittently add 1 or 5 seconds to a connection being initialized. * Update Tailscale
1 parent 4646f58 commit 82f494c

File tree

12 files changed

+112
-108
lines changed

12 files changed

+112
-108
lines changed

agent/agent_test.go

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -498,12 +498,7 @@ func TestAgent(t *testing.T) {
498498
}()
499499

500500
conn, _ := setupAgent(t, codersdk.WorkspaceAgentMetadata{}, 0)
501-
require.Eventually(t, func() bool {
502-
ctx, cancelFunc := context.WithTimeout(context.Background(), testutil.IntervalFast)
503-
defer cancelFunc()
504-
_, err := conn.Ping(ctx)
505-
return err == nil
506-
}, testutil.WaitMedium, testutil.IntervalFast)
501+
require.True(t, conn.AwaitReachable(context.Background()))
507502
conn1, err := conn.DialContext(context.Background(), l.Addr().Network(), l.Addr().String())
508503
require.NoError(t, err)
509504
defer conn1.Close()

cli/agent_test.go

Lines changed: 3 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ import (
1414
"github.com/coder/coder/coderd/coderdtest"
1515
"github.com/coder/coder/provisioner/echo"
1616
"github.com/coder/coder/provisionersdk/proto"
17-
"github.com/coder/coder/testutil"
1817
)
1918

2019
func TestWorkspaceAgent(t *testing.T) {
@@ -71,12 +70,7 @@ func TestWorkspaceAgent(t *testing.T) {
7170
dialer, err := client.DialWorkspaceAgent(ctx, resources[0].Agents[0].ID, nil)
7271
require.NoError(t, err)
7372
defer dialer.Close()
74-
require.Eventually(t, func() bool {
75-
ctx, cancelFunc := context.WithTimeout(ctx, testutil.IntervalFast)
76-
defer cancelFunc()
77-
_, err := dialer.Ping(ctx)
78-
return err == nil
79-
}, testutil.WaitMedium, testutil.IntervalFast)
73+
require.True(t, dialer.AwaitReachable(context.Background()))
8074
cancelFunc()
8175
err = <-errC
8276
require.NoError(t, err)
@@ -134,12 +128,7 @@ func TestWorkspaceAgent(t *testing.T) {
134128
dialer, err := client.DialWorkspaceAgent(ctx, resources[0].Agents[0].ID, nil)
135129
require.NoError(t, err)
136130
defer dialer.Close()
137-
require.Eventually(t, func() bool {
138-
ctx, cancelFunc := context.WithTimeout(ctx, testutil.IntervalFast)
139-
defer cancelFunc()
140-
_, err := dialer.Ping(ctx)
141-
return err == nil
142-
}, testutil.WaitMedium, testutil.IntervalFast)
131+
require.True(t, dialer.AwaitReachable(context.Background()))
143132
cancelFunc()
144133
err = <-errC
145134
require.NoError(t, err)
@@ -197,13 +186,7 @@ func TestWorkspaceAgent(t *testing.T) {
197186
dialer, err := client.DialWorkspaceAgent(ctx, resources[0].Agents[0].ID, nil)
198187
require.NoError(t, err)
199188
defer dialer.Close()
200-
require.Eventually(t, func() bool {
201-
ctx, cancelFunc := context.WithTimeout(ctx, testutil.IntervalFast)
202-
defer cancelFunc()
203-
_, err := dialer.Ping(ctx)
204-
return err == nil
205-
}, testutil.WaitMedium, testutil.IntervalFast)
206-
189+
require.True(t, dialer.AwaitReachable(context.Background()))
207190
sshClient, err := dialer.SSHClient(ctx)
208191
require.NoError(t, err)
209192
defer sshClient.Close()

cli/portforward.go

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ import (
1010
"strings"
1111
"sync"
1212
"syscall"
13-
"time"
1413

1514
"github.com/pion/udp"
1615
"github.com/spf13/cobra"
@@ -145,22 +144,7 @@ func portForward() *cobra.Command {
145144
closeAllListeners()
146145
}()
147146

148-
ticker := time.NewTicker(250 * time.Millisecond)
149-
defer ticker.Stop()
150-
for {
151-
select {
152-
case <-ctx.Done():
153-
return ctx.Err()
154-
case <-ticker.C:
155-
}
156-
157-
_, err = conn.Ping(ctx)
158-
if err != nil {
159-
continue
160-
}
161-
break
162-
}
163-
ticker.Stop()
147+
conn.AwaitReachable(ctx)
164148
_, _ = fmt.Fprintln(cmd.OutOrStderr(), "Ready!")
165149
wg.Wait()
166150
return closeErr

cli/speedtest.go

Lines changed: 30 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -62,33 +62,37 @@ func speedtest() *cobra.Command {
6262
return err
6363
}
6464
defer conn.Close()
65-
ticker := time.NewTicker(time.Second)
66-
defer ticker.Stop()
67-
for {
68-
select {
69-
case <-ctx.Done():
70-
return ctx.Err()
71-
case <-ticker.C:
65+
if direct {
66+
ticker := time.NewTicker(time.Second)
67+
defer ticker.Stop()
68+
for {
69+
select {
70+
case <-ctx.Done():
71+
return ctx.Err()
72+
case <-ticker.C:
73+
}
74+
dur, err := conn.Ping(ctx)
75+
if err != nil {
76+
continue
77+
}
78+
status := conn.Status()
79+
if len(status.Peers()) != 1 {
80+
continue
81+
}
82+
peer := status.Peer[status.Peers()[0]]
83+
if peer.CurAddr == "" && direct {
84+
cmd.Printf("Waiting for a direct connection... (%dms via %s)\n", dur.Milliseconds(), peer.Relay)
85+
continue
86+
}
87+
via := peer.Relay
88+
if via == "" {
89+
via = "direct"
90+
}
91+
cmd.Printf("%dms via %s\n", dur.Milliseconds(), via)
92+
break
7293
}
73-
dur, err := conn.Ping(ctx)
74-
if err != nil {
75-
continue
76-
}
77-
status := conn.Status()
78-
if len(status.Peers()) != 1 {
79-
continue
80-
}
81-
peer := status.Peer[status.Peers()[0]]
82-
if peer.CurAddr == "" && direct {
83-
cmd.Printf("Waiting for a direct connection... (%dms via %s)\n", dur.Milliseconds(), peer.Relay)
84-
continue
85-
}
86-
via := peer.Relay
87-
if via == "" {
88-
via = "direct"
89-
}
90-
cmd.Printf("%dms via %s\n", dur.Milliseconds(), via)
91-
break
94+
} else {
95+
conn.AwaitReachable(ctx)
9296
}
9397
dir := tsspeedtest.Download
9498
if reverse {

cli/ssh.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,12 +90,12 @@ func ssh() *cobra.Command {
9090
return xerrors.Errorf("await agent: %w", err)
9191
}
9292

93-
conn, err := client.DialWorkspaceAgent(ctx, workspaceAgent.ID, nil)
93+
conn, err := client.DialWorkspaceAgent(ctx, workspaceAgent.ID, &codersdk.DialWorkspaceAgentOptions{})
9494
if err != nil {
9595
return err
9696
}
9797
defer conn.Close()
98-
98+
conn.AwaitReachable(ctx)
9999
stopPolling := tryPollWorkspaceAutostop(ctx, client, workspace)
100100
defer stopPolling()
101101

coderd/workspaceagents_test.go

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -178,12 +178,7 @@ func TestWorkspaceAgentListen(t *testing.T) {
178178
defer func() {
179179
_ = conn.Close()
180180
}()
181-
require.Eventually(t, func() bool {
182-
ctx, cancelFunc := context.WithTimeout(ctx, testutil.IntervalFast)
183-
defer cancelFunc()
184-
_, err := conn.Ping(ctx)
185-
return err == nil
186-
}, testutil.WaitLong, testutil.IntervalFast)
181+
conn.AwaitReachable(ctx)
187182
})
188183

189184
t.Run("FailNonLatestBuild", func(t *testing.T) {

codersdk/agentconn.go

Lines changed: 8 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,7 @@ import (
1616

1717
"golang.org/x/crypto/ssh"
1818
"golang.org/x/xerrors"
19-
"tailscale.com/ipn/ipnstate"
2019
"tailscale.com/net/speedtest"
21-
"tailscale.com/tailcfg"
2220

2321
"github.com/coder/coder/coderd/tracing"
2422
"github.com/coder/coder/tailnet"
@@ -133,27 +131,18 @@ type AgentConn struct {
133131
CloseFunc func()
134132
}
135133

134+
func (c *AgentConn) AwaitReachable(ctx context.Context) bool {
135+
ctx, span := tracing.StartSpan(ctx)
136+
defer span.End()
137+
138+
return c.Conn.AwaitReachable(ctx, TailnetIP)
139+
}
140+
136141
func (c *AgentConn) Ping(ctx context.Context) (time.Duration, error) {
137142
ctx, span := tracing.StartSpan(ctx)
138143
defer span.End()
139144

140-
errCh := make(chan error, 1)
141-
durCh := make(chan time.Duration, 1)
142-
go c.Conn.Ping(TailnetIP, tailcfg.PingDisco, func(pr *ipnstate.PingResult) {
143-
if pr.Err != "" {
144-
errCh <- xerrors.New(pr.Err)
145-
return
146-
}
147-
durCh <- time.Duration(pr.LatencySeconds * float64(time.Second))
148-
})
149-
select {
150-
case err := <-errCh:
151-
return 0, err
152-
case <-ctx.Done():
153-
return 0, ctx.Err()
154-
case dur := <-durCh:
155-
return dur, nil
156-
}
145+
return c.Conn.Ping(ctx, TailnetIP)
157146
}
158147

159148
func (c *AgentConn) CloseWithError(_ error) error {

codersdk/workspaceagents.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -447,13 +447,14 @@ func (c *Client) DialWorkspaceAgent(ctx context.Context, agentID uuid.UUID, opti
447447
_ = conn.Close()
448448
return nil, err
449449
}
450+
450451
return &AgentConn{
451452
Conn: conn,
452453
CloseFunc: func() {
453454
cancelFunc()
454455
<-closed
455456
},
456-
}, err
457+
}, nil
457458
}
458459

459460
// WorkspaceAgent returns an agent by ID.

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ replace github.com/tcnksm/go-httpstat => github.com/kylecarbs/go-httpstat v0.0.0
4040

4141
// There are a few minor changes we make to Tailscale that we're slowly upstreaming. Compare here:
4242
// https://github.com/tailscale/tailscale/compare/main...coder:tailscale:main
43-
replace tailscale.com => github.com/coder/tailscale v1.1.1-0.20221104170440-ef53dca69a41
43+
replace tailscale.com => github.com/coder/tailscale v1.1.1-0.20221113171243-7d90f070c5dc
4444

4545
// Switch to our fork that imports fixes from http://github.com/tailscale/ssh.
4646
// See: https://github.com/coder/coder/issues/3371

go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -355,8 +355,8 @@ github.com/coder/retry v1.3.0 h1:5lAAwt/2Cm6lVmnfBY7sOMXcBOwcwJhmV5QGSELIVWY=
355355
github.com/coder/retry v1.3.0/go.mod h1:tXuRgZgWjUnU5LZPT4lJh4ew2elUhexhlnXzrJWdyFY=
356356
github.com/coder/ssh v0.0.0-20220811105153-fcea99919338 h1:tN5GKFT68YLVzJoA8AHuiMNJ0qlhoD3pGN3JY9gxSko=
357357
github.com/coder/ssh v0.0.0-20220811105153-fcea99919338/go.mod h1:ZSS+CUoKHDrqVakTfTWUlKSr9MtMFkC4UvtQKD7O914=
358-
github.com/coder/tailscale v1.1.1-0.20221104170440-ef53dca69a41 h1:/mjNjfUarvH8BdmvDVLvtIIENoe3PevqCyZQmAlILuw=
359-
github.com/coder/tailscale v1.1.1-0.20221104170440-ef53dca69a41/go.mod h1:lkCb74eSJwxeNq8YwyILoHD5vtHktiZnTOxBxo3tbNc=
358+
github.com/coder/tailscale v1.1.1-0.20221113171243-7d90f070c5dc h1:qozpteSLz0ifMasetJ+/Qac5Ud/NRNIlgTubGf6TAaQ=
359+
github.com/coder/tailscale v1.1.1-0.20221113171243-7d90f070c5dc/go.mod h1:lkCb74eSJwxeNq8YwyILoHD5vtHktiZnTOxBxo3tbNc=
360360
github.com/containerd/aufs v0.0.0-20200908144142-dab0cbea06f4/go.mod h1:nukgQABAEopAHvB6j7cnP5zJ+/3aVcE7hCYqvIwAHyE=
361361
github.com/containerd/aufs v0.0.0-20201003224125-76a6863f2989/go.mod h1:AkGGQs9NM2vtYHaUen+NljV0/baGCAPELGm2q9ZXpWU=
362362
github.com/containerd/aufs v0.0.0-20210316121734-20793ff83c97/go.mod h1:kL5kd6KM5TzQjR79jljyi4olc1Vrx6XBlcyj3gNv2PU=

0 commit comments

Comments
 (0)