Skip to content

fix(agent): Close stdin and stdout separately to fix pty output loss #6862

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Mar 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 25 additions & 6 deletions agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -1115,7 +1115,8 @@ func (a *agent) handleSSHSession(session ssh.Session) (retErr error) {
go func() {
for win := range windowSize {
resizeErr := ptty.Resize(uint16(win.Height), uint16(win.Width))
if resizeErr != nil {
// If the pty is closed, then command has exited, no need to log.
if resizeErr != nil && !errors.Is(resizeErr, pty.ErrClosed) {
a.logger.Warn(ctx, "failed to resize tty", slog.Error(resizeErr))
}
}
Expand All @@ -1131,19 +1132,32 @@ func (a *agent) handleSSHSession(session ssh.Session) (retErr error) {
// output being lost. To avoid this, we wait for the output copy to
// start before waiting for the command to exit. This ensures that the
// output copy goroutine will be scheduled before calling close on the
// pty. There is still a risk of data loss if a command produces a lot
// of output, see TestAgent_Session_TTY_HugeOutputIsNotLost (skipped).
// pty. This shouldn't be needed because of `pty.Dup()` below, but it
// may not be supported on all platforms.
outputCopyStarted := make(chan struct{})
ptyOutput := func() io.Reader {
ptyOutput := func() io.ReadCloser {
defer close(outputCopyStarted)
return ptty.Output()
// Try to dup so we can separate stdin and stdout closure.
// Once the original pty is closed, the dup will return
// input/output error once the buffered data has been read.
stdout, err := ptty.Dup()
if err == nil {
return stdout
}
// If we can't dup, we shouldn't close
// the fd since it's tied to stdin.
return readNopCloser{ptty.Output()}
}
wg.Add(1)
go func() {
// Ensure data is flushed to session on command exit, if we
// close the session too soon, we might lose data.
defer wg.Done()
_, _ = io.Copy(session, ptyOutput())

stdout := ptyOutput()
defer stdout.Close()

_, _ = io.Copy(session, stdout)
}()
<-outputCopyStarted

Expand Down Expand Up @@ -1176,6 +1190,11 @@ func (a *agent) handleSSHSession(session ssh.Session) (retErr error) {
return cmd.Wait()
}

type readNopCloser struct{ io.Reader }

// Close implements io.Closer.
func (readNopCloser) Close() error { return nil }

func (a *agent) handleReconnectingPTY(ctx context.Context, logger slog.Logger, msg codersdk.WorkspaceAgentReconnectingPTYInit, conn net.Conn) (retErr error) {
defer conn.Close()

Expand Down
30 changes: 8 additions & 22 deletions agent/agent_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -350,15 +350,8 @@ func TestAgent_Session_TTY_Hushlogin(t *testing.T) {

func TestAgent_Session_TTY_FastCommandHasOutput(t *testing.T) {
t.Parallel()
if runtime.GOOS == "windows" {
// This might be our implementation, or ConPTY itself.
// It's difficult to find extensive tests for it, so
// it seems like it could be either.
t.Skip("ConPTY appears to be inconsistent on Windows.")
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This might be risky (flakey CI), but maybe we should see if it's a problem on Windows?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed!

}

// This test is here to prevent regressions where quickly executing
// commands (with TTY) don't flush their output to the SSH session.
// commands (with TTY) don't sync their output to the SSH session.
//
// See: https://github.com/coder/coder/issues/6656
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
Expand Down Expand Up @@ -404,20 +397,13 @@ func TestAgent_Session_TTY_FastCommandHasOutput(t *testing.T) {

func TestAgent_Session_TTY_HugeOutputIsNotLost(t *testing.T) {
t.Parallel()
if runtime.GOOS == "windows" {
// This might be our implementation, or ConPTY itself.
// It's difficult to find extensive tests for it, so
// it seems like it could be either.
t.Skip("ConPTY appears to be inconsistent on Windows.")
}
t.Skip("This test proves we have a bug where parts of large output on a PTY can be lost after the command exits, skipped to avoid test failures.")

// This test is here to prevent prove we have a bug where quickly executing
// commands (with TTY) don't flush their output to the SSH session. This is
// due to the pty being closed before all the output has been copied, but
// protecting against this requires a non-trivial rewrite of the output
// processing (or figuring out a way to put the pty in a mode where this
// does not happen).

// This test is here to prevent regressions where a command (with or
// without) a large amount of output would not be fully copied to the
// SSH session. On unix systems, this was fixed by duplicating the file
// descriptor of the PTY master and using it for copying the output.
//
// See: https://github.com/coder/coder/issues/6656
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
defer cancel()
//nolint:dogsled
Expand Down
9 changes: 9 additions & 0 deletions pty/pty.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,12 @@ import (
"os"

"github.com/gliderlabs/ssh"
"golang.org/x/xerrors"
)

// ErrClosed is returned when a PTY is used after it has been closed.
var ErrClosed = xerrors.New("pty: closed")

// PTY is a minimal interface for interacting with a TTY.
type PTY interface {
io.Closer
Expand All @@ -31,6 +35,11 @@ type PTY interface {
// The same stream would be used to provide user input: pty.Input().Write(...)
Input() ReadWriter

// Dup returns a new file descriptor for the PTY.
//
// This is useful for closing stdin and stdout separately.
Dup() (*os.File, error)

// Resize sets the size of the PTY.
Resize(height uint16, width uint16) error
}
Expand Down
18 changes: 16 additions & 2 deletions pty/pty_other.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@ import (
"os/exec"
"runtime"
"sync"
"syscall"

"github.com/creack/pty"
"github.com/u-root/u-root/pkg/termios"
"golang.org/x/sys/unix"
"golang.org/x/xerrors"
)

func newPty(opt ...Option) (retPTY *otherPty, err error) {
Expand Down Expand Up @@ -113,6 +113,20 @@ func (p *otherPty) Resize(height uint16, width uint16) error {
})
}

func (p *otherPty) Dup() (*os.File, error) {
var newfd int
err := p.control(p.pty, func(fd uintptr) error {
var err error
newfd, err = syscall.Dup(int(fd))
return err
})
if err != nil {
return nil, err
}

return os.NewFile(uintptr(newfd), p.pty.Name()), nil
}

func (p *otherPty) Close() error {
p.mutex.Lock()
defer p.mutex.Unlock()
Expand All @@ -131,7 +145,7 @@ func (p *otherPty) Close() error {
if err != nil {
p.err = err
} else {
p.err = xerrors.New("pty: closed")
p.err = ErrClosed
}

return err
Expand Down
4 changes: 4 additions & 0 deletions pty/pty_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,10 @@ func (p *ptyWindows) Resize(height uint16, width uint16) error {
return nil
}

func (p *ptyWindows) Dup() (*os.File, error) {
return nil, xerrors.Errorf("not implemented")
}

func (p *ptyWindows) Close() error {
p.closeMutex.Lock()
defer p.closeMutex.Unlock()
Expand Down