coder · spikecurtis · Apr 24, 2023 · Apr 11, 2023 · Apr 12, 2023 · Apr 12, 2023
diff --git a/agent/agent.go b/agent/agent.go
@@ -1045,7 +1045,7 @@ func (a *agent) handleReconnectingPTY(ctx context.Context, logger slog.Logger, m
 		if err = a.trackConnGoroutine(func() {
 			buffer := make([]byte, 1024)
 			for {
-				read, err := rpty.ptty.Output().Read(buffer)
+				read, err := rpty.ptty.OutputReader().Read(buffer)
 				if err != nil {
 					// When the PTY is closed, this is triggered.
 					break
@@ -1138,7 +1138,7 @@ func (a *agent) handleReconnectingPTY(ctx context.Context, logger slog.Logger, m
 			logger.Warn(ctx, "read conn", slog.Error(err))
 			return nil
 		}
-		_, err = rpty.ptty.Input().Write([]byte(req.Data))
+		_, err = rpty.ptty.InputWriter().Write([]byte(req.Data))
 		if err != nil {
 			logger.Warn(ctx, "write to pty", slog.Error(err))
 			return nil
@@ -1358,7 +1358,7 @@ type reconnectingPTY struct {
 	circularBuffer      *circbuf.Buffer
 	circularBufferMutex sync.RWMutex
 	timeout             *time.Timer
-	ptty                pty.PTY
+	ptty                pty.PTYCmd
 }
 
 // Close ends all connections to the reconnecting

diff --git a/agent/agent_test.go b/agent/agent_test.go
@@ -45,6 +45,7 @@ import (
 	"github.com/coder/coder/coderd/httpapi"
 	"github.com/coder/coder/codersdk"
 	"github.com/coder/coder/codersdk/agentsdk"
+	"github.com/coder/coder/pty"
 	"github.com/coder/coder/pty/ptytest"
 	"github.com/coder/coder/tailnet"
 	"github.com/coder/coder/tailnet/tailnettest"
@@ -481,17 +482,10 @@ func TestAgent_TCPLocalForwarding(t *testing.T) {
 		}
 	}()
 
-	pty := ptytest.New(t)
-
-	cmd := setupSSHCommand(t, []string{"-L", fmt.Sprintf("%d:127.0.0.1:%d", randomPort, remotePort)}, []string{"sleep", "5"})
-	cmd.Stdin = pty.Input()
-	cmd.Stdout = pty.Output()
-	cmd.Stderr = pty.Output()
-	err = cmd.Start()
-	require.NoError(t, err)
+	_, proc := setupSSHCommand(t, []string{"-L", fmt.Sprintf("%d:127.0.0.1:%d", randomPort, remotePort)}, []string{"sleep", "5"})
 
 	go func() {
-		err := cmd.Wait()
+		err := proc.Wait()
 		select {
 		case <-done:
 		default:
@@ -523,7 +517,7 @@ func TestAgent_TCPLocalForwarding(t *testing.T) {
 
 	<-done
 
-	_ = cmd.Process.Kill()
+	_ = proc.Kill()
 }
 
 //nolint:paralleltest // This test reserves a port.
@@ -562,17 +556,10 @@ func TestAgent_TCPRemoteForwarding(t *testing.T) {
 		}
 	}()
 
-	pty := ptytest.New(t)
-
-	cmd := setupSSHCommand(t, []string{"-R", fmt.Sprintf("127.0.0.1:%d:127.0.0.1:%d", randomPort, localPort)}, []string{"sleep", "5"})
-	cmd.Stdin = pty.Input()
-	cmd.Stdout = pty.Output()
-	cmd.Stderr = pty.Output()
-	err = cmd.Start()
-	require.NoError(t, err)
+	_, proc := setupSSHCommand(t, []string{"-R", fmt.Sprintf("127.0.0.1:%d:127.0.0.1:%d", randomPort, localPort)}, []string{"sleep", "5"})
 
 	go func() {
-		err := cmd.Wait()
+		err := proc.Wait()
 		select {
 		case <-done:
 		default:
@@ -604,7 +591,7 @@ func TestAgent_TCPRemoteForwarding(t *testing.T) {
 
 	<-done
 
-	_ = cmd.Process.Kill()
+	_ = proc.Kill()
 }
 
 func TestAgent_UnixLocalForwarding(t *testing.T) {
@@ -641,17 +628,10 @@ func TestAgent_UnixLocalForwarding(t *testing.T) {
 		}
 	}()
 
-	pty := ptytest.New(t)
-
-	cmd := setupSSHCommand(t, []string{"-L", fmt.Sprintf("%s:%s", localSocketPath, remoteSocketPath)}, []string{"sleep", "5"})
-	cmd.Stdin = pty.Input()
-	cmd.Stdout = pty.Output()
-	cmd.Stderr = pty.Output()
-	err = cmd.Start()
-	require.NoError(t, err)
+	_, proc := setupSSHCommand(t, []string{"-L", fmt.Sprintf("%s:%s", localSocketPath, remoteSocketPath)}, []string{"sleep", "5"})
 
 	go func() {
-		err := cmd.Wait()
+		err := proc.Wait()
 		select {
 		case <-done:
 		default:
@@ -676,7 +656,7 @@ func TestAgent_UnixLocalForwarding(t *testing.T) {
 	_ = conn.Close()
 	<-done
 
-	_ = cmd.Process.Kill()
+	_ = proc.Kill()
 }
 
 func TestAgent_UnixRemoteForwarding(t *testing.T) {
@@ -713,17 +693,10 @@ func TestAgent_UnixRemoteForwarding(t *testing.T) {
 		}
 	}()
 
-	pty := ptytest.New(t)
-
-	cmd := setupSSHCommand(t, []string{"-R", fmt.Sprintf("%s:%s", remoteSocketPath, localSocketPath)}, []string{"sleep", "5"})
-	cmd.Stdin = pty.Input()
-	cmd.Stdout = pty.Output()
-	cmd.Stderr = pty.Output()
-	err = cmd.Start()
-	require.NoError(t, err)
+	_, proc := setupSSHCommand(t, []string{"-R", fmt.Sprintf("%s:%s", remoteSocketPath, localSocketPath)}, []string{"sleep", "5"})
 
 	go func() {
-		err := cmd.Wait()
+		err := proc.Wait()
 		select {
 		case <-done:
 		default:
@@ -753,7 +726,7 @@ func TestAgent_UnixRemoteForwarding(t *testing.T) {
 
 	<-done
 
-	_ = cmd.Process.Kill()
+	_ = proc.Kill()
 }
 
 func TestAgent_SFTP(t *testing.T) {
@@ -1648,7 +1621,7 @@ func TestAgent_WriteVSCodeConfigs(t *testing.T) {
 	}, testutil.WaitShort, testutil.IntervalFast)
 }
 
-func setupSSHCommand(t *testing.T, beforeArgs []string, afterArgs []string) *exec.Cmd {
+func setupSSHCommand(t *testing.T, beforeArgs []string, afterArgs []string) (*ptytest.PTYCmd, pty.Process) {
 	//nolint:dogsled
 	agentConn, _, _, _, _ := setupAgent(t, agentsdk.Manifest{}, 0)
 	listener, err := net.Listen("tcp", "127.0.0.1:0")
@@ -1690,7 +1663,8 @@ func setupSSHCommand(t *testing.T, beforeArgs []string, afterArgs []string) *exe
 		"host",
 	)
 	args = append(args, afterArgs...)
-	return exec.Command("ssh", args...)
+	cmd := exec.Command("ssh", args...)
+	return ptytest.Start(t, cmd)
 }
 
 func setupSSHSession(t *testing.T, options agentsdk.Manifest) *ssh.Session {

diff --git a/agent/agentssh/agentssh.go b/agent/agentssh/agentssh.go
@@ -253,102 +253,12 @@ func (s *Server) sessionStart(session ssh.Session, extraEnv []string) (retErr er
 
 	sshPty, windowSize, isPty := session.Pty()
 	if isPty {
-		// Disable minimal PTY emulation set by gliderlabs/ssh (NL-to-CRNL).
-		// See https://github.com/coder/coder/issues/3371.
-		session.DisablePTYEmulation()
-
-		if !isQuietLogin(session.RawCommand()) {
-			manifest := s.Manifest.Load()
-			if manifest != nil {
-				err = showMOTD(session, manifest.MOTDFile)
-				if err != nil {
-					s.logger.Error(ctx, "show MOTD", slog.Error(err))
-				}
-			} else {
-				s.logger.Warn(ctx, "metadata lookup failed, unable to show MOTD")
-			}
-		}
-
-		cmd.Env = append(cmd.Env, fmt.Sprintf("TERM=%s", sshPty.Term))
-
-		// The pty package sets `SSH_TTY` on supported platforms.
-		ptty, process, err := pty.Start(cmd, pty.WithPTYOption(
-			pty.WithSSHRequest(sshPty),
-			pty.WithLogger(slog.Stdlib(ctx, s.logger, slog.LevelInfo)),
-		))
-		if err != nil {
-			return xerrors.Errorf("start command: %w", err)
-		}
-		var wg sync.WaitGroup
-		defer func() {
-			defer wg.Wait()
-			closeErr := ptty.Close()
-			if closeErr != nil {
-				s.logger.Warn(ctx, "failed to close tty", slog.Error(closeErr))
-				if retErr == nil {
-					retErr = closeErr
-				}
-			}
-		}()
-		go func() {
-			for win := range windowSize {
-				resizeErr := ptty.Resize(uint16(win.Height), uint16(win.Width))
-				// If the pty is closed, then command has exited, no need to log.
-				if resizeErr != nil && !errors.Is(resizeErr, pty.ErrClosed) {
-					s.logger.Warn(ctx, "failed to resize tty", slog.Error(resizeErr))
-				}
-			}
-		}()
-		// We don't add input copy to wait group because
-		// it won't return until the session is closed.
-		go func() {
-			_, _ = io.Copy(ptty.Input(), session)
-		}()
-
-		// In low parallelism scenarios, the command may exit and we may close
-		// the pty before the output copy has started. This can result in the
-		// output being lost. To avoid this, we wait for the output copy to
-		// start before waiting for the command to exit. This ensures that the
-		// output copy goroutine will be scheduled before calling close on the
-		// pty. This shouldn't be needed because of `pty.Dup()` below, but it
-		// may not be supported on all platforms.
-		outputCopyStarted := make(chan struct{})
-		ptyOutput := func() io.ReadCloser {
-			defer close(outputCopyStarted)
-			// Try to dup so we can separate stdin and stdout closure.
-			// Once the original pty is closed, the dup will return
-			// input/output error once the buffered data has been read.
-			stdout, err := ptty.Dup()
-			if err == nil {
-				return stdout
-			}
-			// If we can't dup, we shouldn't close
-			// the fd since it's tied to stdin.
-			return readNopCloser{ptty.Output()}
-		}
-		wg.Add(1)
-		go func() {
-			// Ensure data is flushed to session on command exit, if we
-			// close the session too soon, we might lose data.
-			defer wg.Done()
-
-			stdout := ptyOutput()
-			defer stdout.Close()
-
-			_, _ = io.Copy(session, stdout)
-		}()
-		<-outputCopyStarted
-
-		err = process.Wait()
-		var exitErr *exec.ExitError
-		// ExitErrors just mean the command we run returned a non-zero exit code, which is normal
-		// and not something to be concerned about.  But, if it's something else, we should log it.
-		if err != nil && !xerrors.As(err, &exitErr) {
-			s.logger.Warn(ctx, "wait error", slog.Error(err))
-		}
-		return err
+		return s.startPTYSession(session, cmd, sshPty, windowSize)
 	}
+	return startNonPTYSession(session, cmd)
+}
 
+func startNonPTYSession(session ssh.Session, cmd *exec.Cmd) error {
 	cmd.Stdout = session
 	cmd.Stderr = session.Stderr()
 	// This blocks forever until stdin is received if we don't
@@ -368,10 +278,94 @@ func (s *Server) sessionStart(session ssh.Session, extraEnv []string) (retErr er
 	return cmd.Wait()
 }
 
-type readNopCloser struct{ io.Reader }
+// ptySession is the interface to the ssh.Session that startPTYSession uses
+// we use an interface here so that we can fake it in tests.
+type ptySession interface {
+	io.ReadWriter
+	Context() ssh.Context
+	DisablePTYEmulation()
+	RawCommand() string
+}
+
+func (s *Server) startPTYSession(session ptySession, cmd *exec.Cmd, sshPty ssh.Pty, windowSize <-chan ssh.Window) (retErr error) {
+	ctx := session.Context()
+	// Disable minimal PTY emulation set by gliderlabs/ssh (NL-to-CRNL).
+	// See https://github.com/coder/coder/issues/3371.
+	session.DisablePTYEmulation()
+
+	if !isQuietLogin(session.RawCommand()) {
+		manifest := s.Manifest.Load()
+		if manifest != nil {
+			err := showMOTD(session, manifest.MOTDFile)
+			if err != nil {
+				s.logger.Error(ctx, "show MOTD", slog.Error(err))
+			}
+		} else {
+			s.logger.Warn(ctx, "metadata lookup failed, unable to show MOTD")
+		}
+	}
+
+	cmd.Env = append(cmd.Env, fmt.Sprintf("TERM=%s", sshPty.Term))
+
+	// The pty package sets `SSH_TTY` on supported platforms.
+	ptty, process, err := pty.Start(cmd, pty.WithPTYOption(
+		pty.WithSSHRequest(sshPty),
+		pty.WithLogger(slog.Stdlib(ctx, s.logger, slog.LevelInfo)),
+	))
+	if err != nil {
+		return xerrors.Errorf("start command: %w", err)
+	}
+	defer func() {
+		closeErr := ptty.Close()
+		if closeErr != nil {
+			s.logger.Warn(ctx, "failed to close tty", slog.Error(closeErr))
+			if retErr == nil {
+				retErr = closeErr
+			}
+		}
+	}()
+	go func() {
+		for win := range windowSize {
+			resizeErr := ptty.Resize(uint16(win.Height), uint16(win.Width))
+			// If the pty is closed, then command has exited, no need to log.
+			if resizeErr != nil && !errors.Is(resizeErr, pty.ErrClosed) {
+				s.logger.Warn(ctx, "failed to resize tty", slog.Error(resizeErr))
+			}
+		}
+	}()
+
+	go func() {
+		_, _ = io.Copy(ptty.InputWriter(), session)
+	}()
 
-// Close implements io.Closer.
-func (readNopCloser) Close() error { return nil }
+	// We need to wait for the command output to finish copying.  It's safe to
+	// just do this copy on the main handler goroutine because one of two things
+	// will happen:
+	//
+	// 1. The command completes & closes the TTY, which then triggers an error
+	//    after we've Read() all the buffered data from the PTY.
+	// 2. The client hangs up, which cancels the command's Context, and go will
+	//    kill the command's process.  This then has the same effect as (1).
+	n, err := io.Copy(session, ptty.OutputReader())
+	s.logger.Debug(ctx, "copy output done", slog.F("bytes", n), slog.Error(err))
+	if err != nil {
+		return xerrors.Errorf("copy error: %w", err)
+	}
+	// We've gotten all the output, but we need to wait for the process to
+	// complete so that we can get the exit code.  This returns
+	// immediately if the TTY was closed as part of the command exiting.
+	err = process.Wait()
+	var exitErr *exec.ExitError
+	// ExitErrors just mean the command we run returned a non-zero exit code, which is normal
+	// and not something to be concerned about.  But, if it's something else, we should log it.
+	if err != nil && !xerrors.As(err, &exitErr) {
+		s.logger.Warn(ctx, "wait error", slog.Error(err))
+	}
+	if err != nil {
+		return xerrors.Errorf("process wait: %w", err)
+	}
+	return nil
+}
 
 func (s *Server) sftpHandler(session ssh.Session) {
 	ctx := session.Context()