From 09dc7d7a00c8eb2070656066a51b04df9f95e8ff Mon Sep 17 00:00:00 2001 From: Colin Adler Date: Tue, 15 Aug 2023 01:40:58 +0000 Subject: [PATCH 1/2] fix(tailnet): disable wireguard trimming Since Tailscale removed keep-alives, it seems like open but idle connections (SSH, port-forward, etc) can get trimmed fairly easily, causing hangs for a few seconds while the connection is setup again. This was taken from Spike's PR https://github.com/coder/coder/pull/8492 Co-authored-by: Spike Curtis --- tailnet/conn.go | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tailnet/conn.go b/tailnet/conn.go index 945402b43da89..ee47b5733b42c 100644 --- a/tailnet/conn.go +++ b/tailnet/conn.go @@ -19,6 +19,7 @@ import ( "golang.org/x/xerrors" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/adapters/gonet" + "tailscale.com/envknob" "tailscale.com/hostinfo" "tailscale.com/ipn/ipnstate" "tailscale.com/net/connstats" @@ -64,6 +65,22 @@ func init() { // Globally disable network namespacing. All networking happens in // userspace. netns.SetEnabled(false) + // Tailscale, by default, "trims" the set of peers down to ones that we are "actively" communicating with in + // an effort to save memory. But, we want to make sure the Wireguard connection is up and handshaked before sending + // TCP traffic over it to avoid anomalously long round-trip time of the initial handshake + // c.f. https://github.com/coder/coder/issues/7388#issuecomment-1625463069 for more details. + // + // If Tailscale is waiting for traffic to bring up Wireguard, and we wait for Wireguard to send traffic, that's a + // deadlock. So, disable this feature. + // + // Note that Tailscale.com's use case is very different from ours: in their use case, users create one persistent + // tailnet per device, and it allows connections to every other thing in Tailscale that belongs to them. The + // tailnet stays up as long as your laptop or phone is turned on. + // + // Our use case is different: for clients, it's a point-to-point connection to a single workspace, and lasts only as + // long as the connection. For agents, it's connections to a small number of clients (CLI or Coderd) that are being + // actively used by the end user. + envknob.Setenv("TS_DEBUG_TRIM_WIREGUARD", "false") } type Options struct { From 1baad7602ff096434d71b46c6aa219bdd2836bb2 Mon Sep 17 00:00:00 2001 From: Colin Adler Date: Tue, 15 Aug 2023 18:17:47 +0000 Subject: [PATCH 2/2] fixup! Merge branch 'main' into colin/wg-trim --- tailnet/conn.go | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tailnet/conn.go b/tailnet/conn.go index 11aa98067c8f4..2968feb35d72d 100644 --- a/tailnet/conn.go +++ b/tailnet/conn.go @@ -65,21 +65,21 @@ func init() { // Globally disable network namespacing. All networking happens in // userspace. netns.SetEnabled(false) - // Tailscale, by default, "trims" the set of peers down to ones that we are "actively" communicating with in - // an effort to save memory. But, we want to make sure the Wireguard connection is up and handshaked before sending - // TCP traffic over it to avoid anomalously long round-trip time of the initial handshake - // c.f. https://github.com/coder/coder/issues/7388#issuecomment-1625463069 for more details. + // Tailscale, by default, "trims" the set of peers down to ones that we are + // "actively" communicating with in an effort to save memory. Since + // Tailscale removed keep-alives, it seems like open but idle connections + // (SSH, port-forward, etc) can get trimmed fairly easily, causing hangs for + // a few seconds while the connection is setup again. // - // If Tailscale is waiting for traffic to bring up Wireguard, and we wait for Wireguard to send traffic, that's a - // deadlock. So, disable this feature. - // - // Note that Tailscale.com's use case is very different from ours: in their use case, users create one persistent - // tailnet per device, and it allows connections to every other thing in Tailscale that belongs to them. The + // Note that Tailscale.com's use case is very different from ours: in their + // use case, users create one persistent tailnet per device, and it allows + // connections to every other thing in Tailscale that belongs to them. The // tailnet stays up as long as your laptop or phone is turned on. // - // Our use case is different: for clients, it's a point-to-point connection to a single workspace, and lasts only as - // long as the connection. For agents, it's connections to a small number of clients (CLI or Coderd) that are being - // actively used by the end user. + // Our use case is different: for clients, it's a point-to-point connection + // to a single workspace, and lasts only as long as the connection. For + // agents, it's connections to a small number of clients (CLI or Coderd) + // that are being actively used by the end user. envknob.Setenv("TS_DEBUG_TRIM_WIREGUARD", "false") }