coder · ammario · Jun 12, 2023 · Jun 5, 2023 · Jun 6, 2023 · Jun 6, 2023
diff --git a/cli/ssh.go b/cli/ssh.go
@@ -10,9 +10,9 @@ import (
 	"net/url"
 	"os"
 	"os/exec"
-	"path"
 	"path/filepath"
 	"strings"
+	"sync"
 	"time"
 
 	"github.com/gen2brain/beeep"
@@ -50,8 +50,7 @@ func (r *RootCmd) ssh() *clibase.Cmd {
 		identityAgent  string
 		wsPollInterval time.Duration
 		noWait         bool
-		logDir         string
-		logToFile      bool
+		logDirPath     string
 	)
 	client := new(codersdk.Client)
 	cmd := &clibase.Cmd{
@@ -74,24 +73,34 @@ func (r *RootCmd) ssh() *clibase.Cmd {
 					logger.Error(ctx, "command exit", slog.Error(retErr))
 				}
 			}()
-			if logToFile {
-				// we need a way to ensure different ssh invocations don't clobber
-				// each other's logs. Date-time strings will likely have collisions
-				// in unit tests and/or scripts unless we extend precision out to
-				// sub-millisecond, which seems unwieldy.  A simple 5-character random
-				// string will do it, since the operating system already tracks
-				// dates and times for file IO.
-				qual, err := cryptorand.String(5)
+			if logDirPath != "" {
+				nonce, err := cryptorand.StringCharset(cryptorand.Lower, 5)
 				if err != nil {
-					return xerrors.Errorf("generate random qualifier: %w", err)
+					return xerrors.Errorf("generate nonce: %w", err)
 				}
-				logPth := path.Join(logDir, fmt.Sprintf("coder-ssh-%s.log", qual))
-				logFile, err := os.Create(logPth)
+				logFilePath := filepath.Join(
+					logDirPath,
+					fmt.Sprintf(
+						"coder-ssh-%s-%s.log",
+						// The time portion makes it easier to find the right
+						// log file.
+						time.Now().Format("20060102-150405"),
+						// The nonce prevents collisions, as SSH invocations
+						// frequently happen in parallel.
+						nonce,
+					),
+				)
+				logFile, err := os.OpenFile(
+					logFilePath,
+					os.O_CREATE|os.O_APPEND|os.O_WRONLY|os.O_EXCL,
+					0o600,
+				)
 				if err != nil {
-					return xerrors.Errorf("error opening %s for logging: %w", logPth, err)
+					return xerrors.Errorf("error opening %s for logging: %w", logDirPath, err)
 				}
-				logger = slog.Make(sloghuman.Sink(logFile))
 				defer logFile.Close()
+
+				logger = slog.Make(sloghuman.Sink(logFile))
 				if r.verbose {
 					logger = logger.Leveled(slog.LevelDebug)
 				}
@@ -100,6 +109,16 @@ func (r *RootCmd) ssh() *clibase.Cmd {
 				client.Logger = logger
 			}
 
+			// This WaitGroup solves for a race condition where we were logging
+			// while closing the log file in in a defer. It probably solves
+			// others too.
+			//
+			// Its position in this function is important. It must be after
+			// the logger is created but before any goroutines or wind-down
+			// defers (e.g. context cancels) are declared.
+			var wg sync.WaitGroup
+			defer wg.Wait()
+
 			workspace, workspaceAgent, err := getWorkspaceAndAgent(ctx, inv, client, codersdk.Me, inv.Args[0])
 			if err != nil {
 				return err
@@ -166,9 +185,24 @@ func (r *RootCmd) ssh() *clibase.Cmd {
 					return xerrors.Errorf("connect SSH: %w", err)
 				}
 				defer rawSSH.Close()
-				go watchAndClose(ctx, rawSSH.Close, logger, client, workspace)
 
+				wg.Add(1)
+				go func() {
+					defer wg.Done()
+					watchAndClose(ctx, func() error {
+						rawSSH.Close()
+						// If we don't close Stdin, the io.Copy below may
+						// block indefinitely on Stdin Read.
+						if rc, ok := inv.Stdin.(io.Closer); ok {
+							rc.Close()
+						}
+						return nil
+					}, logger, client, workspace)
+				}()
+
+				wg.Add(1)
 				go func() {
+					defer wg.Done()
 					// Ensure stdout copy closes incase stdin is closed
 					// unexpectedly. Typically we wouldn't worry about
 					// this since OpenSSH should kill the proxy command.
@@ -201,19 +235,24 @@ func (r *RootCmd) ssh() *clibase.Cmd {
 				return xerrors.Errorf("ssh session: %w", err)
 			}
 			defer sshSession.Close()
-			go watchAndClose(
-				ctx,
-				func() error {
-					err := sshSession.Close()
-					logger.Debug(ctx, "session close", slog.Error(err))
-					err = sshClient.Close()
-					logger.Debug(ctx, "client close", slog.Error(err))
-					return nil
-				},
-				logger,
-				client,
-				workspace,
-			)
+
+			wg.Add(1)
+			go func() {
+				defer wg.Done()
+				watchAndClose(
+					ctx,
+					func() error {
+						err := sshSession.Close()
+						logger.Debug(ctx, "session close", slog.Error(err))
+						err = sshClient.Close()
+						logger.Debug(ctx, "client close", slog.Error(err))
+						return nil
+					},
+					logger,
+					client,
+					workspace,
+				)
+			}()
 
 			if identityAgent == "" {
 				identityAgent = os.Getenv("SSH_AUTH_SOCK")
@@ -354,18 +393,11 @@ func (r *RootCmd) ssh() *clibase.Cmd {
 			Value:       clibase.BoolOf(&noWait),
 		},
 		{
-			Flag:        "log-dir",
-			Default:     os.TempDir(),
-			Description: "Specify the location for the log files.",
-			Env:         "CODER_SSH_LOG_DIR",
-			Value:       clibase.StringOf(&logDir),
-		},
-		{
-			Flag:          "log-to-file",
+			Flag:          "log-dir",
+			Description:   "Specify the directory containing SSH diagnostic log files.",
+			Env:           "CODER_SSH_LOG_DIR",
 			FlagShorthand: "l",
-			Env:           "CODER_SSH_LOG_TO_FILE",
-			Description:   "Enable diagnostic logging to file.",
-			Value:         clibase.BoolOf(&logToFile),
+			Value:         clibase.StringOf(&logDirPath),
 		},
 	}
 	return cmd

diff --git a/cli/ssh_test.go b/cli/ssh_test.go
@@ -261,7 +261,7 @@ func TestSSH(t *testing.T) {
 		client, workspace, agentToken := setupWorkspaceForAgent(t, nil)
 		_, _ = tGoContext(t, func(ctx context.Context) {
 			// Run this async so the SSH command has to wait for
-			// the build and agent to connect!
+			// the build and agent to connect.
 			agentClient := agentsdk.New(client.URL)
 			agentClient.SetSessionToken(agentToken)
 			agentCloser := agent.New(agent.Options{
@@ -411,20 +411,14 @@ func TestSSH(t *testing.T) {
 	t.Run("FileLogging", func(t *testing.T) {
 		t.Parallel()
 
-		dir := t.TempDir()
+		logDir := t.TempDir()
 
 		client, workspace, agentToken := setupWorkspaceForAgent(t, nil)
-		inv, root := clitest.New(t, "ssh", workspace.Name, "-l", "--log-dir", dir)
+		inv, root := clitest.New(t, "ssh", "-l", logDir, workspace.Name)
 		clitest.SetupConfig(t, client, root)
 		pty := ptytest.New(t).Attach(inv)
+		w := clitest.StartWithWaiter(t, inv)
 
-		ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
-		defer cancel()
-
-		cmdDone := tGo(t, func() {
-			err := inv.WithContext(ctx).Run()
-			assert.NoError(t, err)
-		})
 		pty.ExpectMatch("Waiting")
 
 		agentClient := agentsdk.New(client.URL)
@@ -439,17 +433,11 @@ func TestSSH(t *testing.T) {
 
 		// Shells on Mac, Windows, and Linux all exit shells with the "exit" command.
 		pty.WriteLine("exit")
-		<-cmdDone
+		w.RequireSuccess()
 
-		entries, err := os.ReadDir(dir)
+		ents, err := os.ReadDir(logDir)
 		require.NoError(t, err)
-		for _, e := range entries {
-			t.Logf("logdir entry: %s", e.Name())
-			if strings.HasPrefix(e.Name(), "coder-ssh") {
-				return
-			}
-		}
-		t.Fatal("failed to find ssh logfile")
+		require.Len(t, ents, 1, "expected one file in logdir %s", logDir)
 	})
 }
 

diff --git a/cli/testdata/coder_ssh_--help.golden b/cli/testdata/coder_ssh_--help.golden
@@ -18,11 +18,8 @@ Start a shell into a workspace
           Specifies which identity agent to use (overrides $SSH_AUTH_SOCK),
           forward agent must also be enabled.
 
-      --log-dir string, $CODER_SSH_LOG_DIR (default: /tmp)
-          Specify the location for the log files.
-
-  -l, --log-to-file bool, $CODER_SSH_LOG_TO_FILE
-          Enable diagnostic logging to file.
+  -l, --log-dir string, $CODER_SSH_LOG_DIR
+          Specify the directory containing SSH diagnostic log files.
 
       --no-wait bool, $CODER_SSH_NO_WAIT
           Specifies whether to wait for a workspace to become ready before

diff --git a/docs/cli/ssh.md b/docs/cli/ssh.md
@@ -39,24 +39,14 @@ Specifies whether to forward the GPG agent. Unsupported on Windows workspaces, b
 
 Specifies which identity agent to use (overrides $SSH_AUTH_SOCK), forward agent must also be enabled.
 
-### --log-dir
+### -l, --log-dir
 
 |             |                                 |
 | ----------- | ------------------------------- |
 | Type        | <code>string</code>             |
 | Environment | <code>$CODER_SSH_LOG_DIR</code> |
-| Default     | <code>/tmp</code>               |
 
-Specify the location for the log files.
-
-### -l, --log-to-file
-
-|             |                                     |
-| ----------- | ----------------------------------- |
-| Type        | <code>bool</code>                   |
-| Environment | <code>$CODER_SSH_LOG_TO_FILE</code> |
-
-Enable diagnostic logging to file.
+Specify the directory containing SSH diagnostic log files.
 
 ### --no-wait