Skip to content

Commit fbf329f

Browse files
authored
fix(tailnet): set TCP keepalive idle to 72 hours for SSH conns (#7196)
1 parent 57c4de4 commit fbf329f

File tree

4 files changed

+41
-36
lines changed

4 files changed

+41
-36
lines changed

codersdk/workspaceagentconn.go

+3-3
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,9 @@ import (
3030
var WorkspaceAgentIP = netip.MustParseAddr("fd7a:115c:a1e0:49d6:b259:b7ac:b1b2:48f4")
3131

3232
const (
33-
WorkspaceAgentSSHPort = 1
34-
WorkspaceAgentReconnectingPTYPort = 2
35-
WorkspaceAgentSpeedtestPort = 3
33+
WorkspaceAgentSSHPort = tailnet.WorkspaceAgentSSHPort
34+
WorkspaceAgentReconnectingPTYPort = tailnet.WorkspaceAgentReconnectingPTYPort
35+
WorkspaceAgentSpeedtestPort = tailnet.WorkspaceAgentSpeedtestPort
3636
// WorkspaceAgentHTTPAPIServerPort serves a HTTP server with endpoints for e.g.
3737
// gathering agent statistics.
3838
WorkspaceAgentHTTPAPIServerPort = 4

go.mod

+1-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ replace github.com/dlclark/regexp2 => github.com/dlclark/regexp2 v1.7.0
3636

3737
// There are a few minor changes we make to Tailscale that we're slowly upstreaming. Compare here:
3838
// https://github.com/tailscale/tailscale/compare/main...coder:tailscale:main
39-
replace tailscale.com => github.com/coder/tailscale v1.1.1-0.20230411160749-27a458a0ac0a
39+
replace tailscale.com => github.com/coder/tailscale v1.1.1-0.20230418202606-ed9307cf1b22
4040

4141
// Switch to our fork that imports fixes from http://github.com/tailscale/ssh.
4242
// See: https://github.com/coder/coder/issues/3371

go.sum

+2-2
Original file line numberDiff line numberDiff line change
@@ -380,8 +380,8 @@ github.com/coder/retry v1.3.1-0.20230210155434-e90a2e1e091d h1:09JG37IgTB6n3ouX9
380380
github.com/coder/retry v1.3.1-0.20230210155434-e90a2e1e091d/go.mod h1:r+1J5i/989wt6CUeNSuvFKKA9hHuKKPMxdzDbTuvwwk=
381381
github.com/coder/ssh v0.0.0-20220811105153-fcea99919338 h1:tN5GKFT68YLVzJoA8AHuiMNJ0qlhoD3pGN3JY9gxSko=
382382
github.com/coder/ssh v0.0.0-20220811105153-fcea99919338/go.mod h1:ZSS+CUoKHDrqVakTfTWUlKSr9MtMFkC4UvtQKD7O914=
383-
github.com/coder/tailscale v1.1.1-0.20230411160749-27a458a0ac0a h1:kgfkNHT0yiDAfs5AKwxICqsFWeiHD/pR+bd0w20LXYI=
384-
github.com/coder/tailscale v1.1.1-0.20230411160749-27a458a0ac0a/go.mod h1:jpg+77g19FpXL43U1VoIqoSg1K/Vh5CVxycGldQ8KhA=
383+
github.com/coder/tailscale v1.1.1-0.20230418202606-ed9307cf1b22 h1:bvGOqnI0ITbwOZFQ0SZ4MBw/8LLUEjxmNu57XEujrfQ=
384+
github.com/coder/tailscale v1.1.1-0.20230418202606-ed9307cf1b22/go.mod h1:jpg+77g19FpXL43U1VoIqoSg1K/Vh5CVxycGldQ8KhA=
385385
github.com/coder/terraform-provider-coder v0.6.23 h1:O2Rcj0umez4DfVdGnKZi63z1Xzxd0IQOn9VQDB8YU8g=
386386
github.com/coder/terraform-provider-coder v0.6.23/go.mod h1:UIfU3bYNeSzJJvHyJ30tEKjD6Z9utloI+HUM/7n94CY=
387387
github.com/coder/wgtunnel v0.1.5 h1:WP3sCj/3iJ34eKvpMQEp1oJHvm24RYh0NHbj1kfUKfs=

tailnet/conn.go

+35-30
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import (
1717
"github.com/google/uuid"
1818
"go4.org/netipx"
1919
"golang.org/x/xerrors"
20+
"gvisor.dev/gvisor/pkg/tcpip"
2021
"gvisor.dev/gvisor/pkg/tcpip/adapters/gonet"
2122
"tailscale.com/hostinfo"
2223
"tailscale.com/ipn/ipnstate"
@@ -44,6 +45,12 @@ import (
4445
"github.com/coder/coder/cryptorand"
4546
)
4647

48+
const (
49+
WorkspaceAgentSSHPort = 1
50+
WorkspaceAgentReconnectingPTYPort = 2
51+
WorkspaceAgentSpeedtestPort = 3
52+
)
53+
4754
func init() {
4855
// Globally disable network namespacing. All networking happens in
4956
// userspace.
@@ -267,6 +274,7 @@ func NewConn(options *Options) (conn *Conn, err error) {
267274
server.sendNode()
268275
})
269276
netStack.ForwardTCPIn = server.forwardTCP
277+
netStack.ForwardTCPSockOpts = server.forwardTCPSockOpts
270278

271279
err = netStack.Start(nil)
272280
if err != nil {
@@ -301,17 +309,16 @@ type Conn struct {
301309
logger slog.Logger
302310
blockEndpoints bool
303311

304-
dialer *tsdial.Dialer
305-
tunDevice *tstun.Wrapper
306-
peerMap map[tailcfg.NodeID]*tailcfg.Node
307-
netMap *netmap.NetworkMap
308-
netStack *netstack.Impl
309-
magicConn *magicsock.Conn
310-
wireguardMonitor *monitor.Mon
311-
wireguardRouter *router.Config
312-
wireguardEngine wgengine.Engine
313-
listeners map[listenKey]*listener
314-
forwardTCPCallback func(conn net.Conn, listenerExists bool) net.Conn
312+
dialer *tsdial.Dialer
313+
tunDevice *tstun.Wrapper
314+
peerMap map[tailcfg.NodeID]*tailcfg.Node
315+
netMap *netmap.NetworkMap
316+
netStack *netstack.Impl
317+
magicConn *magicsock.Conn
318+
wireguardMonitor *monitor.Mon
319+
wireguardRouter *router.Config
320+
wireguardEngine wgengine.Engine
321+
listeners map[listenKey]*listener
315322

316323
lastMutex sync.Mutex
317324
nodeSending bool
@@ -327,17 +334,6 @@ type Conn struct {
327334
trafficStats *connstats.Statistics
328335
}
329336

330-
// SetForwardTCPCallback is called every time a TCP connection is initiated inbound.
331-
// listenerExists is true if a listener is registered for the target port. If there
332-
// isn't one, traffic is forwarded to the local listening port.
333-
//
334-
// This allows wrapping a Conn to track reads and writes.
335-
func (c *Conn) SetForwardTCPCallback(callback func(conn net.Conn, listenerExists bool) net.Conn) {
336-
c.mutex.Lock()
337-
defer c.mutex.Unlock()
338-
c.forwardTCPCallback = callback
339-
}
340-
341337
func (c *Conn) SetNodeCallback(callback func(node *Node)) {
342338
c.lastMutex.Lock()
343339
c.nodeCallback = callback
@@ -699,12 +695,11 @@ func (c *Conn) selfNode() *Node {
699695
// This and below is taken _mostly_ verbatim from Tailscale:
700696
// https://github.com/tailscale/tailscale/blob/c88bd53b1b7b2fcf7ba302f2e53dd1ce8c32dad4/tsnet/tsnet.go#L459-L494
701697

702-
// Listen announces only on the Tailscale network.
703-
// It will start the server if it has not been started yet.
698+
// Listen listens for connections only on the Tailscale network.
704699
func (c *Conn) Listen(network, addr string) (net.Listener, error) {
705700
host, port, err := net.SplitHostPort(addr)
706701
if err != nil {
707-
return nil, xerrors.Errorf("wgnet: %w", err)
702+
return nil, xerrors.Errorf("tailnet: split host port for listen: %w", err)
708703
}
709704
lk := listenKey{network, host, port}
710705
ln := &listener{
@@ -725,7 +720,7 @@ func (c *Conn) Listen(network, addr string) (net.Listener, error) {
725720
}
726721
if _, ok := c.listeners[lk]; ok {
727722
c.mutex.Unlock()
728-
return nil, xerrors.Errorf("wgnet: listener already open for %s, %s", network, addr)
723+
return nil, xerrors.Errorf("tailnet: listener already open for %s, %s", network, addr)
729724
}
730725
c.listeners[lk] = ln
731726
c.mutex.Unlock()
@@ -743,14 +738,12 @@ func (c *Conn) DialContextUDP(ctx context.Context, ipp netip.AddrPort) (*gonet.U
743738
func (c *Conn) forwardTCP(conn net.Conn, port uint16) {
744739
c.mutex.Lock()
745740
ln, ok := c.listeners[listenKey{"tcp", "", fmt.Sprint(port)}]
746-
if c.forwardTCPCallback != nil {
747-
conn = c.forwardTCPCallback(conn, ok)
748-
}
749741
c.mutex.Unlock()
750742
if !ok {
751743
c.forwardTCPToLocal(conn, port)
752744
return
753745
}
746+
754747
t := time.NewTimer(time.Second)
755748
defer t.Stop()
756749
select {
@@ -763,6 +756,18 @@ func (c *Conn) forwardTCP(conn net.Conn, port uint16) {
763756
_ = conn.Close()
764757
}
765758

759+
func (*Conn) forwardTCPSockOpts(port uint16) []tcpip.SettableSocketOption {
760+
opts := []tcpip.SettableSocketOption{}
761+
762+
// See: https://github.com/tailscale/tailscale/blob/c7cea825aea39a00aca71ea02bab7266afc03e7c/wgengine/netstack/netstack.go#L888
763+
if port == WorkspaceAgentSSHPort || port == 22 {
764+
opt := tcpip.KeepaliveIdleOption(72 * time.Hour)
765+
opts = append(opts, &opt)
766+
}
767+
768+
return opts
769+
}
770+
766771
func (c *Conn) forwardTCPToLocal(conn net.Conn, port uint16) {
767772
defer conn.Close()
768773
dialAddrStr := net.JoinHostPort("127.0.0.1", strconv.Itoa(int(port)))
@@ -842,7 +847,7 @@ func (ln *listener) Accept() (net.Conn, error) {
842847
select {
843848
case c = <-ln.conn:
844849
case <-ln.closed:
845-
return nil, xerrors.Errorf("wgnet: %w", net.ErrClosed)
850+
return nil, xerrors.Errorf("tailnet: %w", net.ErrClosed)
846851
}
847852
return c, nil
848853
}

0 commit comments

Comments
 (0)