Skip to content

feat: replace ssh maxTimeout with keep-alive mechanism #8062

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jun 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions agent/agentssh/agentssh.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ func NewServer(ctx context.Context, logger slog.Logger, prometheusRegistry *prom
metrics: metrics,
}

s.srv = &ssh.Server{
srv := &ssh.Server{
ChannelHandlers: map[string]ssh.ChannelHandler{
"direct-tcpip": ssh.DirectTCPIPHandler,
"direct-streamlocal@openssh.com": directStreamLocalHandler,
Expand Down Expand Up @@ -149,9 +149,19 @@ func NewServer(ctx context.Context, logger slog.Logger, prometheusRegistry *prom
SubsystemHandlers: map[string]ssh.SubsystemHandler{
"sftp": s.sessionHandler,
},
MaxTimeout: maxTimeout,
}

// The MaxTimeout functionality has been substituted with the introduction of the KeepAlive feature.
// In cases where very short timeouts are set, the SSH server will automatically switch to the connection timeout for both read and write operations.
if maxTimeout >= 3*time.Second {
srv.ClientAliveCountMax = 3
srv.ClientAliveInterval = maxTimeout / time.Duration(srv.ClientAliveCountMax)
srv.MaxTimeout = 0
} else {
srv.MaxTimeout = maxTimeout
}

s.srv = srv
return s, nil
}

Expand Down
4 changes: 4 additions & 0 deletions agent/agentssh/agentssh_internal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -191,3 +191,7 @@ func (testSSHContext) Permissions() *gliderssh.Permissions {
func (testSSHContext) SetValue(_, _ interface{}) {
panic("not implemented")
}

func (testSSHContext) KeepAlive() *gliderssh.SessionKeepAlive {
panic("not implemented")
}
7 changes: 4 additions & 3 deletions cli/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -323,10 +323,11 @@ func (r *RootCmd) workspaceAgent() *clibase.Cmd {
Value: clibase.BoolOf(&noReap),
},
{
Flag: "ssh-max-timeout",
Default: "0",
Flag: "ssh-max-timeout",
// tcpip.KeepaliveIdleOption = 72h + 1min (forwardTCPSockOpts() in tailnet/conn.go)
Default: "72h",
Env: "CODER_AGENT_SSH_MAX_TIMEOUT",
Description: "Specify the max timeout for a SSH connection.",
Description: "Specify the max timeout for a SSH connection, it is advisable to set it to a minimum of 60s, but no more than 72h.",
Value: clibase.DurationOf(&sshMaxTimeout),
},
{
Expand Down
5 changes: 3 additions & 2 deletions cli/testdata/coder_agent_--help.golden
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,9 @@ Starts the Coder workspace agent.
--prometheus-address string, $CODER_AGENT_PROMETHEUS_ADDRESS (default: 127.0.0.1:2112)
The bind address to serve Prometheus metrics.

--ssh-max-timeout duration, $CODER_AGENT_SSH_MAX_TIMEOUT (default: 0)
Specify the max timeout for a SSH connection.
--ssh-max-timeout duration, $CODER_AGENT_SSH_MAX_TIMEOUT (default: 72h)
Specify the max timeout for a SSH connection, it is advisable to set
it to a minimum of 60s, but no more than 72h.

--tailnet-listen-port int, $CODER_AGENT_TAILNET_LISTEN_PORT (default: 0)
Specify a static port for Tailscale to use for listening.
Expand Down
4 changes: 2 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ replace tailscale.com => github.com/coder/tailscale v0.0.0-20230522123520-747122
// repo as tailscale.com/tempfork/gliderlabs/ssh, however, we can't replace the
// subpath and it includes changes to golang.org/x/crypto/ssh as well which
// makes importing it directly a bit messy.
replace github.com/gliderlabs/ssh => github.com/coder/ssh v0.0.0-20230421140225-04bb837133e1
replace github.com/gliderlabs/ssh => github.com/coder/ssh v0.0.0-20230615124436-fc6e4b009688

// Waiting on https://github.com/imulab/go-scim/pull/95 to merge.
replace github.com/imulab/go-scim/pkg/v2 => github.com/coder/go-scim/pkg/v2 v2.0.0-20230221055123-1d63c1222136
Expand Down Expand Up @@ -140,7 +140,7 @@ require (
github.com/robfig/cron/v3 v3.0.1
github.com/spf13/afero v1.9.3
github.com/spf13/pflag v1.0.5
github.com/stretchr/testify v1.8.3
github.com/stretchr/testify v1.8.4
github.com/swaggo/http-swagger/v2 v2.0.1
github.com/swaggo/swag v1.8.6
github.com/tabbed/pqtype v0.1.1
Expand Down
7 changes: 4 additions & 3 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -187,8 +187,8 @@ github.com/coder/go-scim/pkg/v2 v2.0.0-20230221055123-1d63c1222136 h1:0RgB61LcNs
github.com/coder/go-scim/pkg/v2 v2.0.0-20230221055123-1d63c1222136/go.mod h1:VkD1P761nykiq75dz+4iFqIQIZka189tx1BQLOp0Skc=
github.com/coder/retry v1.4.0 h1:g0fojHFxcdgM3sBULqgjFDxw1UIvaCqk4ngUDu0EWag=
github.com/coder/retry v1.4.0/go.mod h1:blHMk9vs6LkoRT9ZHyuZo360cufXEhrxqvEzeMtRGoY=
github.com/coder/ssh v0.0.0-20230421140225-04bb837133e1 h1:LBw76rEDuhNJyohve11mbvYv5CmCLmcuUQGiz7Guk50=
github.com/coder/ssh v0.0.0-20230421140225-04bb837133e1/go.mod h1:ZSS+CUoKHDrqVakTfTWUlKSr9MtMFkC4UvtQKD7O914=
github.com/coder/ssh v0.0.0-20230615124436-fc6e4b009688 h1:udcMVKmo37Jv6Nq+Z2gCsDcF5F6zDvwArRGgUdVFD8s=
github.com/coder/ssh v0.0.0-20230615124436-fc6e4b009688/go.mod h1:aGQbuCLyhRLMzZF067xc84Lh7JDs1FKwCmF1Crl9dxQ=
github.com/coder/tailscale v0.0.0-20230522123520-74712221d00f h1:F0Xr1d8h8dAHn7tab1HXuzYFkcjmCydnEfdMbkOhlVk=
github.com/coder/tailscale v0.0.0-20230522123520-74712221d00f/go.mod h1:jpg+77g19FpXL43U1VoIqoSg1K/Vh5CVxycGldQ8KhA=
github.com/coder/terraform-provider-coder v0.8.2 h1:EPhkdpsNd8fcg6eqpAQr+W1eRrEAMtugoqujoTK4O6o=
Expand Down Expand Up @@ -786,8 +786,9 @@ github.com/stretchr/testify v1.7.2/go.mod h1:R6va5+xMeoiuVRoj+gSkQ7d3FALtqAAGI1F
github.com/stretchr/testify v1.7.4/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/stretchr/testify v1.8.3 h1:RP3t2pwF7cMEbC1dqtB6poj3niw/9gnV4Cjg5oW5gtY=
github.com/stretchr/testify v1.8.3/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/swaggest/assertjson v1.7.0 h1:SKw5Rn0LQs6UvmGrIdaKQbMR1R3ncXm5KNon+QJ7jtw=
github.com/swaggo/files/v2 v2.0.0 h1:hmAt8Dkynw7Ssz46F6pn8ok6YmGZqHSVLZ+HQM7i0kw=
github.com/swaggo/files/v2 v2.0.0/go.mod h1:24kk2Y9NYEJ5lHuCra6iVwkMjIekMCaFq/0JQj66kyM=
Expand Down
2 changes: 1 addition & 1 deletion tailnet/conn.go
Original file line number Diff line number Diff line change
Expand Up @@ -769,7 +769,7 @@ func (*Conn) forwardTCPSockOpts(port uint16) []tcpip.SettableSocketOption {

// See: https://github.com/tailscale/tailscale/blob/c7cea825aea39a00aca71ea02bab7266afc03e7c/wgengine/netstack/netstack.go#L888
if port == WorkspaceAgentSSHPort || port == 22 {
opt := tcpip.KeepaliveIdleOption(72 * time.Hour)
opt := tcpip.KeepaliveIdleOption(72*time.Hour + time.Minute) // Default ssh-max-timeout is 72h, so let's add some extra time.
opts = append(opts, &opt)
}

Expand Down