@@ -37,6 +37,7 @@ import (
37
37
"github.com/coder/coder/v2/codersdk/workspacesdk"
38
38
"github.com/coder/coder/v2/cryptorand"
39
39
"github.com/coder/coder/v2/pty"
40
+ "github.com/coder/quartz"
40
41
"github.com/coder/retry"
41
42
"github.com/coder/serpent"
42
43
)
@@ -48,6 +49,7 @@ const (
48
49
var (
49
50
workspacePollInterval = time .Minute
50
51
autostopNotifyCountdown = []time.Duration {30 * time .Minute }
52
+ gracefulShutdownTimeout = 5 * time .Second
51
53
)
52
54
53
55
func (r * RootCmd ) ssh () * serpent.Command {
@@ -250,7 +252,16 @@ func (r *RootCmd) ssh() *serpent.Command {
250
252
if err != nil {
251
253
return xerrors .Errorf ("dial agent: %w" , err )
252
254
}
253
- if err = stack .push ("agent conn" , conn ); err != nil {
255
+ if err = stack .push (
256
+ "agent conn" ,
257
+ // We set a long TCP timeout on SSH connections, which means if the underlying
258
+ // network fails, the SSH layer can hang for a really long time trying to send a
259
+ // shutdown message for any remote forwards (https://github.com/golang/go/issues/69484)
260
+ // Normally, we want to tear stuff down top to bottom, but if we get stuck doing it
261
+ // that way, this timeoutCloser will trip and close the underlying connection,
262
+ // bottom-up.
263
+ newTimeoutCloser (ctx , logger , gracefulShutdownTimeout , conn , quartz .NewReal ()),
264
+ ); err != nil {
254
265
return err
255
266
}
256
267
conn .AwaitReachable (ctx )
@@ -1085,3 +1096,49 @@ func getUsageAppName(usageApp string) codersdk.UsageAppName {
1085
1096
1086
1097
return codersdk .UsageAppNameSSH
1087
1098
}
1099
+
1100
+ type timeoutCloser struct {
1101
+ target io.Closer
1102
+ closeCalled chan struct {}
1103
+
1104
+ // for testing
1105
+ clock quartz.Clock
1106
+ }
1107
+
1108
+ func newTimeoutCloser (
1109
+ ctx context.Context , logger slog.Logger , timeout time.Duration , target io.Closer , clock quartz.Clock ,
1110
+ ) * timeoutCloser {
1111
+ b := & timeoutCloser {
1112
+ target : target ,
1113
+ closeCalled : make (chan struct {}),
1114
+ clock : clock ,
1115
+ }
1116
+ go b .waitForCtxOrClose (ctx , logger , timeout )
1117
+ return b
1118
+ }
1119
+
1120
+ func (t * timeoutCloser ) waitForCtxOrClose (ctx context.Context , logger slog.Logger , timeout time.Duration ) {
1121
+ select {
1122
+ case <- t .closeCalled :
1123
+ return
1124
+ case <- ctx .Done ():
1125
+ }
1126
+ tmr := t .clock .NewTimer (timeout , "timeoutCloser" , "waitForCtxOrClose" )
1127
+ defer tmr .Stop ()
1128
+ select {
1129
+ case <- t .closeCalled :
1130
+ return
1131
+ case <- tmr .C :
1132
+ logger .Warn (ctx , "timed out waiting for graceful shutdown" )
1133
+ err := t .target .Close ()
1134
+ if err != nil {
1135
+ logger .Debug (ctx , "error closing target" , slog .Error (err ))
1136
+ }
1137
+ }
1138
+ }
1139
+
1140
+ // Close should only be called at most once, e.g. in the closerStack
1141
+ func (t * timeoutCloser ) Close () error {
1142
+ close (t .closeCalled )
1143
+ return t .target .Close ()
1144
+ }
0 commit comments