From 197bedcab2927b28883b77a45490c825b7af4f83 Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Wed, 29 Mar 2023 10:27:11 +0000 Subject: [PATCH 1/2] fix(agent): Close stdin and stdout separately to fix pty output loss Fixes #6656 Closes #6840 --- agent/agent.go | 28 +++++++++++++++++++++++----- agent/agent_test.go | 30 ++++++++---------------------- pty/pty.go | 5 +++++ pty/pty_other.go | 15 +++++++++++++++ pty/pty_windows.go | 4 ++++ 5 files changed, 55 insertions(+), 27 deletions(-) diff --git a/agent/agent.go b/agent/agent.go index 1efb3e88f3dbe..e55ca63146500 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -1131,19 +1131,32 @@ func (a *agent) handleSSHSession(session ssh.Session) (retErr error) { // output being lost. To avoid this, we wait for the output copy to // start before waiting for the command to exit. This ensures that the // output copy goroutine will be scheduled before calling close on the - // pty. There is still a risk of data loss if a command produces a lot - // of output, see TestAgent_Session_TTY_HugeOutputIsNotLost (skipped). + // pty. This shouldn't be needed because of `pty.Dup()` below, but it + // may not be supported on all platforms. outputCopyStarted := make(chan struct{}) - ptyOutput := func() io.Reader { + ptyOutput := func() io.ReadCloser { defer close(outputCopyStarted) - return ptty.Output() + // Try to dup so we can separate stdin and stdout closure. + // Once the original pty is closed, the dup will return + // input/output error once the buffered data has been read. + stdout, err := ptty.Dup() + if err == nil { + return stdout + } + // If we can't dup, we shouldn't close + // the fd since it's tied to stdin. + return readNopCloser{ptty.Output()} } wg.Add(1) go func() { // Ensure data is flushed to session on command exit, if we // close the session too soon, we might lose data. defer wg.Done() - _, _ = io.Copy(session, ptyOutput()) + + stdout := ptyOutput() + defer stdout.Close() + + _, _ = io.Copy(session, stdout) }() <-outputCopyStarted @@ -1372,6 +1385,11 @@ func (a *agent) handleReconnectingPTY(ctx context.Context, logger slog.Logger, m } } +type readNopCloser struct{ io.Reader } + +// Close implements io.Closer. +func (readNopCloser) Close() error { return nil } + // startReportingConnectionStats runs the connection stats reporting goroutine. func (a *agent) startReportingConnectionStats(ctx context.Context) { reportStats := func(networkStats map[netlogtype.Connection]netlogtype.Counts) { diff --git a/agent/agent_test.go b/agent/agent_test.go index 10ccbe51242a8..4b15b85e42cab 100644 --- a/agent/agent_test.go +++ b/agent/agent_test.go @@ -350,15 +350,8 @@ func TestAgent_Session_TTY_Hushlogin(t *testing.T) { func TestAgent_Session_TTY_FastCommandHasOutput(t *testing.T) { t.Parallel() - if runtime.GOOS == "windows" { - // This might be our implementation, or ConPTY itself. - // It's difficult to find extensive tests for it, so - // it seems like it could be either. - t.Skip("ConPTY appears to be inconsistent on Windows.") - } - // This test is here to prevent regressions where quickly executing - // commands (with TTY) don't flush their output to the SSH session. + // commands (with TTY) don't sync their output to the SSH session. // // See: https://github.com/coder/coder/issues/6656 ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong) @@ -404,20 +397,13 @@ func TestAgent_Session_TTY_FastCommandHasOutput(t *testing.T) { func TestAgent_Session_TTY_HugeOutputIsNotLost(t *testing.T) { t.Parallel() - if runtime.GOOS == "windows" { - // This might be our implementation, or ConPTY itself. - // It's difficult to find extensive tests for it, so - // it seems like it could be either. - t.Skip("ConPTY appears to be inconsistent on Windows.") - } - t.Skip("This test proves we have a bug where parts of large output on a PTY can be lost after the command exits, skipped to avoid test failures.") - - // This test is here to prevent prove we have a bug where quickly executing - // commands (with TTY) don't flush their output to the SSH session. This is - // due to the pty being closed before all the output has been copied, but - // protecting against this requires a non-trivial rewrite of the output - // processing (or figuring out a way to put the pty in a mode where this - // does not happen). + + // This test is here to prevent regressions where a command (with or + // without) a large amount of output would not be fully copied to the + // SSH session. On unix systems, this was fixed by duplicating the file + // descriptor of the PTY master and using it for copying the output. + // + // See: https://github.com/coder/coder/issues/6656 ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong) defer cancel() //nolint:dogsled diff --git a/pty/pty.go b/pty/pty.go index 5910509ba45c8..0f176fb95ae0b 100644 --- a/pty/pty.go +++ b/pty/pty.go @@ -31,6 +31,11 @@ type PTY interface { // The same stream would be used to provide user input: pty.Input().Write(...) Input() ReadWriter + // Dup returns a new file descriptor for the PTY. + // + // This is useful for closing stdin and stdout separately. + Dup() (*os.File, error) + // Resize sets the size of the PTY. Resize(height uint16, width uint16) error } diff --git a/pty/pty_other.go b/pty/pty_other.go index cfe9ccd47e4f1..a8f8438945d5e 100644 --- a/pty/pty_other.go +++ b/pty/pty_other.go @@ -7,6 +7,7 @@ import ( "os/exec" "runtime" "sync" + "syscall" "github.com/creack/pty" "github.com/u-root/u-root/pkg/termios" @@ -113,6 +114,20 @@ func (p *otherPty) Resize(height uint16, width uint16) error { }) } +func (p *otherPty) Dup() (*os.File, error) { + var newfd int + err := p.control(p.pty, func(fd uintptr) error { + var err error + newfd, err = syscall.Dup(int(fd)) + return err + }) + if err != nil { + return nil, err + } + + return os.NewFile(uintptr(newfd), p.pty.Name()), nil +} + func (p *otherPty) Close() error { p.mutex.Lock() defer p.mutex.Unlock() diff --git a/pty/pty_windows.go b/pty/pty_windows.go index be2de2159f63b..b1afec6778be3 100644 --- a/pty/pty_windows.go +++ b/pty/pty_windows.go @@ -123,6 +123,10 @@ func (p *ptyWindows) Resize(height uint16, width uint16) error { return nil } +func (p *ptyWindows) Dup() (*os.File, error) { + return nil, xerrors.Errorf("not implemented") +} + func (p *ptyWindows) Close() error { p.closeMutex.Lock() defer p.closeMutex.Unlock() From a3c01b9b8bb49f83e16c03cc02a8566dafdb6717 Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Wed, 29 Mar 2023 10:47:33 +0000 Subject: [PATCH 2/2] Avoid resize errors on close --- agent/agent.go | 13 +++++++------ pty/pty.go | 4 ++++ pty/pty_other.go | 3 +-- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/agent/agent.go b/agent/agent.go index e55ca63146500..f73d004ed65b7 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -1115,7 +1115,8 @@ func (a *agent) handleSSHSession(session ssh.Session) (retErr error) { go func() { for win := range windowSize { resizeErr := ptty.Resize(uint16(win.Height), uint16(win.Width)) - if resizeErr != nil { + // If the pty is closed, then command has exited, no need to log. + if resizeErr != nil && !errors.Is(resizeErr, pty.ErrClosed) { a.logger.Warn(ctx, "failed to resize tty", slog.Error(resizeErr)) } } @@ -1189,6 +1190,11 @@ func (a *agent) handleSSHSession(session ssh.Session) (retErr error) { return cmd.Wait() } +type readNopCloser struct{ io.Reader } + +// Close implements io.Closer. +func (readNopCloser) Close() error { return nil } + func (a *agent) handleReconnectingPTY(ctx context.Context, logger slog.Logger, msg codersdk.WorkspaceAgentReconnectingPTYInit, conn net.Conn) (retErr error) { defer conn.Close() @@ -1385,11 +1391,6 @@ func (a *agent) handleReconnectingPTY(ctx context.Context, logger slog.Logger, m } } -type readNopCloser struct{ io.Reader } - -// Close implements io.Closer. -func (readNopCloser) Close() error { return nil } - // startReportingConnectionStats runs the connection stats reporting goroutine. func (a *agent) startReportingConnectionStats(ctx context.Context) { reportStats := func(networkStats map[netlogtype.Connection]netlogtype.Counts) { diff --git a/pty/pty.go b/pty/pty.go index 0f176fb95ae0b..4156e74caadee 100644 --- a/pty/pty.go +++ b/pty/pty.go @@ -6,8 +6,12 @@ import ( "os" "github.com/gliderlabs/ssh" + "golang.org/x/xerrors" ) +// ErrClosed is returned when a PTY is used after it has been closed. +var ErrClosed = xerrors.New("pty: closed") + // PTY is a minimal interface for interacting with a TTY. type PTY interface { io.Closer diff --git a/pty/pty_other.go b/pty/pty_other.go index a8f8438945d5e..f0a49184c80b9 100644 --- a/pty/pty_other.go +++ b/pty/pty_other.go @@ -12,7 +12,6 @@ import ( "github.com/creack/pty" "github.com/u-root/u-root/pkg/termios" "golang.org/x/sys/unix" - "golang.org/x/xerrors" ) func newPty(opt ...Option) (retPTY *otherPty, err error) { @@ -146,7 +145,7 @@ func (p *otherPty) Close() error { if err != nil { p.err = err } else { - p.err = xerrors.New("pty: closed") + p.err = ErrClosed } return err