From 615450c3dcfb1fdee6aa504ccc45b2fbc9f5e8f6 Mon Sep 17 00:00:00 2001 From: Marcin Tojek Date: Fri, 16 Jun 2023 11:39:27 +0200 Subject: [PATCH 1/4] Bump up coder/ssh --- go.mod | 4 ++-- go.sum | 7 ++++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/go.mod b/go.mod index cfccde07c06af..f565321aacb8e 100644 --- a/go.mod +++ b/go.mod @@ -45,7 +45,7 @@ replace tailscale.com => github.com/coder/tailscale v0.0.0-20230522123520-747122 // repo as tailscale.com/tempfork/gliderlabs/ssh, however, we can't replace the // subpath and it includes changes to golang.org/x/crypto/ssh as well which // makes importing it directly a bit messy. -replace github.com/gliderlabs/ssh => github.com/coder/ssh v0.0.0-20230421140225-04bb837133e1 +replace github.com/gliderlabs/ssh => github.com/coder/ssh v0.0.0-20230615124436-fc6e4b009688 // Waiting on https://github.com/imulab/go-scim/pull/95 to merge. replace github.com/imulab/go-scim/pkg/v2 => github.com/coder/go-scim/pkg/v2 v2.0.0-20230221055123-1d63c1222136 @@ -140,7 +140,7 @@ require ( github.com/robfig/cron/v3 v3.0.1 github.com/spf13/afero v1.9.3 github.com/spf13/pflag v1.0.5 - github.com/stretchr/testify v1.8.3 + github.com/stretchr/testify v1.8.4 github.com/swaggo/http-swagger/v2 v2.0.1 github.com/swaggo/swag v1.8.6 github.com/tabbed/pqtype v0.1.1 diff --git a/go.sum b/go.sum index dd6f0facbfb65..38ad723ede24c 100644 --- a/go.sum +++ b/go.sum @@ -187,8 +187,8 @@ github.com/coder/go-scim/pkg/v2 v2.0.0-20230221055123-1d63c1222136 h1:0RgB61LcNs github.com/coder/go-scim/pkg/v2 v2.0.0-20230221055123-1d63c1222136/go.mod h1:VkD1P761nykiq75dz+4iFqIQIZka189tx1BQLOp0Skc= github.com/coder/retry v1.4.0 h1:g0fojHFxcdgM3sBULqgjFDxw1UIvaCqk4ngUDu0EWag= github.com/coder/retry v1.4.0/go.mod h1:blHMk9vs6LkoRT9ZHyuZo360cufXEhrxqvEzeMtRGoY= -github.com/coder/ssh v0.0.0-20230421140225-04bb837133e1 h1:LBw76rEDuhNJyohve11mbvYv5CmCLmcuUQGiz7Guk50= -github.com/coder/ssh v0.0.0-20230421140225-04bb837133e1/go.mod h1:ZSS+CUoKHDrqVakTfTWUlKSr9MtMFkC4UvtQKD7O914= +github.com/coder/ssh v0.0.0-20230615124436-fc6e4b009688 h1:udcMVKmo37Jv6Nq+Z2gCsDcF5F6zDvwArRGgUdVFD8s= +github.com/coder/ssh v0.0.0-20230615124436-fc6e4b009688/go.mod h1:aGQbuCLyhRLMzZF067xc84Lh7JDs1FKwCmF1Crl9dxQ= github.com/coder/tailscale v0.0.0-20230522123520-74712221d00f h1:F0Xr1d8h8dAHn7tab1HXuzYFkcjmCydnEfdMbkOhlVk= github.com/coder/tailscale v0.0.0-20230522123520-74712221d00f/go.mod h1:jpg+77g19FpXL43U1VoIqoSg1K/Vh5CVxycGldQ8KhA= github.com/coder/terraform-provider-coder v0.8.2 h1:EPhkdpsNd8fcg6eqpAQr+W1eRrEAMtugoqujoTK4O6o= @@ -786,8 +786,9 @@ github.com/stretchr/testify v1.7.2/go.mod h1:R6va5+xMeoiuVRoj+gSkQ7d3FALtqAAGI1F github.com/stretchr/testify v1.7.4/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= -github.com/stretchr/testify v1.8.3 h1:RP3t2pwF7cMEbC1dqtB6poj3niw/9gnV4Cjg5oW5gtY= github.com/stretchr/testify v1.8.3/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/swaggest/assertjson v1.7.0 h1:SKw5Rn0LQs6UvmGrIdaKQbMR1R3ncXm5KNon+QJ7jtw= github.com/swaggo/files/v2 v2.0.0 h1:hmAt8Dkynw7Ssz46F6pn8ok6YmGZqHSVLZ+HQM7i0kw= github.com/swaggo/files/v2 v2.0.0/go.mod h1:24kk2Y9NYEJ5lHuCra6iVwkMjIekMCaFq/0JQj66kyM= From 8334f26265f49b8e5b7ed1b8706eef171eb652e0 Mon Sep 17 00:00:00 2001 From: Marcin Tojek Date: Fri, 16 Jun 2023 12:03:45 +0200 Subject: [PATCH 2/4] feat: Set default agent timeout to ~72h --- agent/agentssh/agentssh.go | 14 ++++++++++++-- agent/agentssh/agentssh_internal_test.go | 4 ++++ cli/agent.go | 5 +++-- cli/testdata/coder_agent_--help.golden | 2 +- 4 files changed, 20 insertions(+), 5 deletions(-) diff --git a/agent/agentssh/agentssh.go b/agent/agentssh/agentssh.go index 6f39d116acf00..9519deaa1a7e0 100644 --- a/agent/agentssh/agentssh.go +++ b/agent/agentssh/agentssh.go @@ -105,7 +105,7 @@ func NewServer(ctx context.Context, logger slog.Logger, prometheusRegistry *prom metrics: metrics, } - s.srv = &ssh.Server{ + srv := &ssh.Server{ ChannelHandlers: map[string]ssh.ChannelHandler{ "direct-tcpip": ssh.DirectTCPIPHandler, "direct-streamlocal@openssh.com": directStreamLocalHandler, @@ -149,9 +149,19 @@ func NewServer(ctx context.Context, logger slog.Logger, prometheusRegistry *prom SubsystemHandlers: map[string]ssh.SubsystemHandler{ "sftp": s.sessionHandler, }, - MaxTimeout: maxTimeout, } + // The MaxTimeout functionality has been substituted with the introduction of the KeepAlive feature. + // In cases where very short timeouts are set, the SSH server will automatically switch to the connection timeout for both read and write operations. + if maxTimeout >= 3*time.Second { + srv.ClientAliveInterval = maxTimeout / 3 + srv.ClientAliveCountMax = 3 + srv.MaxTimeout = 0 + } else { + srv.MaxTimeout = maxTimeout + } + + s.srv = srv return s, nil } diff --git a/agent/agentssh/agentssh_internal_test.go b/agent/agentssh/agentssh_internal_test.go index 8fe9c58a79091..ba4295bdbc149 100644 --- a/agent/agentssh/agentssh_internal_test.go +++ b/agent/agentssh/agentssh_internal_test.go @@ -191,3 +191,7 @@ func (testSSHContext) Permissions() *gliderssh.Permissions { func (testSSHContext) SetValue(_, _ interface{}) { panic("not implemented") } + +func (testSSHContext) KeepAlive() *gliderssh.SessionKeepAlive { + panic("not implemented") +} diff --git a/cli/agent.go b/cli/agent.go index a12f76decc7a2..235985184247a 100644 --- a/cli/agent.go +++ b/cli/agent.go @@ -323,8 +323,9 @@ func (r *RootCmd) workspaceAgent() *clibase.Cmd { Value: clibase.BoolOf(&noReap), }, { - Flag: "ssh-max-timeout", - Default: "0", + Flag: "ssh-max-timeout", + // tcpip.KeepaliveIdleOption = 72h (forwardTCPSockOpts() in tailnet/conn.go) + Default: fmt.Sprintf("%v", 72*time.Hour-time.Minute), Env: "CODER_AGENT_SSH_MAX_TIMEOUT", Description: "Specify the max timeout for a SSH connection.", Value: clibase.DurationOf(&sshMaxTimeout), diff --git a/cli/testdata/coder_agent_--help.golden b/cli/testdata/coder_agent_--help.golden index 90fff753cd365..be5914881b0c3 100644 --- a/cli/testdata/coder_agent_--help.golden +++ b/cli/testdata/coder_agent_--help.golden @@ -30,7 +30,7 @@ Starts the Coder workspace agent. --prometheus-address string, $CODER_AGENT_PROMETHEUS_ADDRESS (default: 127.0.0.1:2112) The bind address to serve Prometheus metrics. - --ssh-max-timeout duration, $CODER_AGENT_SSH_MAX_TIMEOUT (default: 0) + --ssh-max-timeout duration, $CODER_AGENT_SSH_MAX_TIMEOUT (default: 71h59m0s) Specify the max timeout for a SSH connection. --tailnet-listen-port int, $CODER_AGENT_TAILNET_LISTEN_PORT (default: 0) From ad007adb09cae285f87694380bebd93a44e0def2 Mon Sep 17 00:00:00 2001 From: Marcin Tojek Date: Fri, 16 Jun 2023 14:20:35 +0200 Subject: [PATCH 3/4] Address PR comments --- agent/agentssh/agentssh.go | 2 +- cli/agent.go | 6 +++--- cli/testdata/coder_agent_--help.golden | 5 +++-- tailnet/conn.go | 2 +- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/agent/agentssh/agentssh.go b/agent/agentssh/agentssh.go index 9519deaa1a7e0..182bfe1883b8c 100644 --- a/agent/agentssh/agentssh.go +++ b/agent/agentssh/agentssh.go @@ -154,8 +154,8 @@ func NewServer(ctx context.Context, logger slog.Logger, prometheusRegistry *prom // The MaxTimeout functionality has been substituted with the introduction of the KeepAlive feature. // In cases where very short timeouts are set, the SSH server will automatically switch to the connection timeout for both read and write operations. if maxTimeout >= 3*time.Second { - srv.ClientAliveInterval = maxTimeout / 3 srv.ClientAliveCountMax = 3 + srv.ClientAliveInterval = maxTimeout / time.Duration(srv.ClientAliveCountMax) srv.MaxTimeout = 0 } else { srv.MaxTimeout = maxTimeout diff --git a/cli/agent.go b/cli/agent.go index 235985184247a..c8b54facbcb6f 100644 --- a/cli/agent.go +++ b/cli/agent.go @@ -324,10 +324,10 @@ func (r *RootCmd) workspaceAgent() *clibase.Cmd { }, { Flag: "ssh-max-timeout", - // tcpip.KeepaliveIdleOption = 72h (forwardTCPSockOpts() in tailnet/conn.go) - Default: fmt.Sprintf("%v", 72*time.Hour-time.Minute), + // tcpip.KeepaliveIdleOption = 72h + 1min (forwardTCPSockOpts() in tailnet/conn.go) + Default: "72h", Env: "CODER_AGENT_SSH_MAX_TIMEOUT", - Description: "Specify the max timeout for a SSH connection.", + Description: "Specify the max timeout for a SSH connection, it is advisable to set it to a minimum of 60 seconds.", Value: clibase.DurationOf(&sshMaxTimeout), }, { diff --git a/cli/testdata/coder_agent_--help.golden b/cli/testdata/coder_agent_--help.golden index be5914881b0c3..fbb1ffedecba5 100644 --- a/cli/testdata/coder_agent_--help.golden +++ b/cli/testdata/coder_agent_--help.golden @@ -30,8 +30,9 @@ Starts the Coder workspace agent. --prometheus-address string, $CODER_AGENT_PROMETHEUS_ADDRESS (default: 127.0.0.1:2112) The bind address to serve Prometheus metrics. - --ssh-max-timeout duration, $CODER_AGENT_SSH_MAX_TIMEOUT (default: 71h59m0s) - Specify the max timeout for a SSH connection. + --ssh-max-timeout duration, $CODER_AGENT_SSH_MAX_TIMEOUT (default: 72h) + Specify the max timeout for a SSH connection, it is advisable to set + it to a minimum of 60 seconds. --tailnet-listen-port int, $CODER_AGENT_TAILNET_LISTEN_PORT (default: 0) Specify a static port for Tailscale to use for listening. diff --git a/tailnet/conn.go b/tailnet/conn.go index 20a00d94d6804..e36f40d8b2d95 100644 --- a/tailnet/conn.go +++ b/tailnet/conn.go @@ -769,7 +769,7 @@ func (*Conn) forwardTCPSockOpts(port uint16) []tcpip.SettableSocketOption { // See: https://github.com/tailscale/tailscale/blob/c7cea825aea39a00aca71ea02bab7266afc03e7c/wgengine/netstack/netstack.go#L888 if port == WorkspaceAgentSSHPort || port == 22 { - opt := tcpip.KeepaliveIdleOption(72 * time.Hour) + opt := tcpip.KeepaliveIdleOption(72*time.Hour + time.Minute) // Default ssh-max-timeout is 72h, so let's add some extra time. opts = append(opts, &opt) } From b26a825800c3f3c4f1fc6ffb830c350448e5329d Mon Sep 17 00:00:00 2001 From: Marcin Tojek Date: Fri, 16 Jun 2023 14:23:36 +0200 Subject: [PATCH 4/4] Fix --- cli/agent.go | 2 +- cli/testdata/coder_agent_--help.golden | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cli/agent.go b/cli/agent.go index c8b54facbcb6f..f895da0927c4d 100644 --- a/cli/agent.go +++ b/cli/agent.go @@ -327,7 +327,7 @@ func (r *RootCmd) workspaceAgent() *clibase.Cmd { // tcpip.KeepaliveIdleOption = 72h + 1min (forwardTCPSockOpts() in tailnet/conn.go) Default: "72h", Env: "CODER_AGENT_SSH_MAX_TIMEOUT", - Description: "Specify the max timeout for a SSH connection, it is advisable to set it to a minimum of 60 seconds.", + Description: "Specify the max timeout for a SSH connection, it is advisable to set it to a minimum of 60s, but no more than 72h.", Value: clibase.DurationOf(&sshMaxTimeout), }, { diff --git a/cli/testdata/coder_agent_--help.golden b/cli/testdata/coder_agent_--help.golden index fbb1ffedecba5..723afce139810 100644 --- a/cli/testdata/coder_agent_--help.golden +++ b/cli/testdata/coder_agent_--help.golden @@ -32,7 +32,7 @@ Starts the Coder workspace agent. --ssh-max-timeout duration, $CODER_AGENT_SSH_MAX_TIMEOUT (default: 72h) Specify the max timeout for a SSH connection, it is advisable to set - it to a minimum of 60 seconds. + it to a minimum of 60s, but no more than 72h. --tailnet-listen-port int, $CODER_AGENT_TAILNET_LISTEN_PORT (default: 0) Specify a static port for Tailscale to use for listening.