-
Notifications
You must be signed in to change notification settings - Fork 979
feat(cli/ssh): simplify log file flags #7863
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 10 commits
4b3b2bd
82ce7a2
ad2c946
1ca5930
d7b26ad
48cb9b2
3c43cfc
a1b68e5
23a6833
f17e7ce
5a311b3
65cdbcc
d1cb325
8471555
6593dfe
c5a1968
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,9 +10,9 @@ import ( | |
"net/url" | ||
"os" | ||
"os/exec" | ||
"path" | ||
"path/filepath" | ||
"strings" | ||
"sync" | ||
"time" | ||
|
||
"github.com/gen2brain/beeep" | ||
|
@@ -50,8 +50,7 @@ func (r *RootCmd) ssh() *clibase.Cmd { | |
identityAgent string | ||
wsPollInterval time.Duration | ||
noWait bool | ||
logDir string | ||
logToFile bool | ||
logDirPath string | ||
) | ||
client := new(codersdk.Client) | ||
cmd := &clibase.Cmd{ | ||
|
@@ -74,24 +73,34 @@ func (r *RootCmd) ssh() *clibase.Cmd { | |
logger.Error(ctx, "command exit", slog.Error(retErr)) | ||
} | ||
}() | ||
if logToFile { | ||
// we need a way to ensure different ssh invocations don't clobber | ||
// each other's logs. Date-time strings will likely have collisions | ||
// in unit tests and/or scripts unless we extend precision out to | ||
// sub-millisecond, which seems unwieldy. A simple 5-character random | ||
// string will do it, since the operating system already tracks | ||
// dates and times for file IO. | ||
qual, err := cryptorand.String(5) | ||
if logDirPath != "" { | ||
nonce, err := cryptorand.StringCharset(cryptorand.Lower, 5) | ||
if err != nil { | ||
return xerrors.Errorf("generate random qualifier: %w", err) | ||
return xerrors.Errorf("generate nonce: %w", err) | ||
} | ||
logPth := path.Join(logDir, fmt.Sprintf("coder-ssh-%s.log", qual)) | ||
logFile, err := os.Create(logPth) | ||
logFilePath := filepath.Join( | ||
logDirPath, | ||
fmt.Sprintf( | ||
"coder-ssh-%s-%s.log", | ||
// The time portion makes it easier to find the right | ||
// log file. | ||
time.Now().Format("20060102-150405"), | ||
// The nonce prevents collisions, as SSH invocations | ||
// frequently happen in parallel. | ||
nonce, | ||
), | ||
) | ||
logFile, err := os.OpenFile( | ||
logFilePath, | ||
os.O_CREATE|os.O_APPEND|os.O_WRONLY|os.O_EXCL, | ||
0o600, | ||
) | ||
if err != nil { | ||
return xerrors.Errorf("error opening %s for logging: %w", logPth, err) | ||
return xerrors.Errorf("error opening %s for logging: %w", logDirPath, err) | ||
} | ||
logger = slog.Make(sloghuman.Sink(logFile)) | ||
defer logFile.Close() | ||
|
||
logger = slog.Make(sloghuman.Sink(logFile)) | ||
if r.verbose { | ||
logger = logger.Leveled(slog.LevelDebug) | ||
} | ||
|
@@ -100,6 +109,16 @@ func (r *RootCmd) ssh() *clibase.Cmd { | |
client.Logger = logger | ||
} | ||
|
||
// This WaitGroup solves for a race condition where we were logging | ||
// while closing the log file in in a defer. It probably solves | ||
// others too. | ||
// | ||
// Its position in this function is important. It must be after | ||
// the logger is created but before any goroutines or wind-down | ||
// defers (e.g. context cancels) are declared. | ||
var wg sync.WaitGroup | ||
defer wg.Wait() | ||
|
||
workspace, workspaceAgent, err := getWorkspaceAndAgent(ctx, inv, client, codersdk.Me, inv.Args[0]) | ||
if err != nil { | ||
return err | ||
|
@@ -166,9 +185,24 @@ func (r *RootCmd) ssh() *clibase.Cmd { | |
return xerrors.Errorf("connect SSH: %w", err) | ||
} | ||
defer rawSSH.Close() | ||
go watchAndClose(ctx, rawSSH.Close, logger, client, workspace) | ||
|
||
wg.Add(1) | ||
go func() { | ||
defer wg.Done() | ||
watchAndClose(ctx, func() error { | ||
rawSSH.Close() | ||
// If we don't close Stdin, the io.Copy below may | ||
// block indefinitely on Stdin Read. | ||
if rc, ok := inv.Stdin.(io.Closer); ok { | ||
rc.Close() | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This smells funny in that it shouldn't be the job of the cli command to close stdin. Also, in the case where rawSSH goes down unexpectedly, but the workspace stays up, I think that we could still be blocked on reading from stdin, creating a deadlock with the waitgroup. It should be the job of the clibase invoker to close stdin when the command is done -- but here that can create a deadlock with the waitgroup, since the main command won't return until the waitgroup is done. Instead of considering the waitgroup to be all the command goroutines except the main one, why don't we make the waitgroup all the command goroutines including the main one? Then we can have a separate goroutine that just waits for the group, and closes the log file. e.g.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree my solution is janky. The challenge with closing the logfile in a separate goroutine is it creates a leak — which was the main thing I was trying to fix when opening this PR. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Closing in a separate goroutine is not a leak (unless the waitgroup fails to complete). It just waits for the other things to finish, closes the file, then exits. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. And, it's not just a question of jankiness. As written, it's still bugged in the case where the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Maybe leak isn't the best word. It can show up in our leak detector and cause test flakiness, even though in production there is no leak. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I don't understand this. If There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I'm worried about There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Have you tried it and seen I can't imagine this would be the case for waiting for a waitgroup and closing a file, but if for some reason the goroutine takes longer to resolve than There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Didn't know about the retry. I'll implement your proposal. |
||
return nil | ||
}, logger, client, workspace) | ||
}() | ||
|
||
wg.Add(1) | ||
go func() { | ||
defer wg.Done() | ||
// Ensure stdout copy closes incase stdin is closed | ||
// unexpectedly. Typically we wouldn't worry about | ||
// this since OpenSSH should kill the proxy command. | ||
|
@@ -201,19 +235,24 @@ func (r *RootCmd) ssh() *clibase.Cmd { | |
return xerrors.Errorf("ssh session: %w", err) | ||
} | ||
defer sshSession.Close() | ||
go watchAndClose( | ||
ctx, | ||
func() error { | ||
err := sshSession.Close() | ||
logger.Debug(ctx, "session close", slog.Error(err)) | ||
err = sshClient.Close() | ||
logger.Debug(ctx, "client close", slog.Error(err)) | ||
return nil | ||
}, | ||
logger, | ||
client, | ||
workspace, | ||
) | ||
|
||
wg.Add(1) | ||
go func() { | ||
defer wg.Done() | ||
watchAndClose( | ||
ctx, | ||
func() error { | ||
err := sshSession.Close() | ||
logger.Debug(ctx, "session close", slog.Error(err)) | ||
err = sshClient.Close() | ||
logger.Debug(ctx, "client close", slog.Error(err)) | ||
return nil | ||
}, | ||
logger, | ||
client, | ||
workspace, | ||
) | ||
}() | ||
|
||
if identityAgent == "" { | ||
identityAgent = os.Getenv("SSH_AUTH_SOCK") | ||
|
@@ -354,18 +393,11 @@ func (r *RootCmd) ssh() *clibase.Cmd { | |
Value: clibase.BoolOf(&noWait), | ||
}, | ||
{ | ||
Flag: "log-dir", | ||
Default: os.TempDir(), | ||
Description: "Specify the location for the log files.", | ||
Env: "CODER_SSH_LOG_DIR", | ||
Value: clibase.StringOf(&logDir), | ||
}, | ||
{ | ||
Flag: "log-to-file", | ||
Flag: "log-dir", | ||
Description: "Specify the directory containing SSH diagnostic log files.", | ||
Env: "CODER_SSH_LOG_DIR", | ||
FlagShorthand: "l", | ||
Env: "CODER_SSH_LOG_TO_FILE", | ||
Description: "Enable diagnostic logging to file.", | ||
Value: clibase.BoolOf(&logToFile), | ||
Value: clibase.StringOf(&logDirPath), | ||
}, | ||
} | ||
return cmd | ||
|
Uh oh!
There was an error while loading. Please reload this page.