Skip to content

Commit 092e1d0

Browse files
committed
feat: Collect agent SSH metrics
1 parent e951778 commit 092e1d0

File tree

3 files changed

+76
-3
lines changed

3 files changed

+76
-3
lines changed

agent/agentssh/agentssh.go

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,8 @@ func NewServer(ctx context.Context, logger slog.Logger, fs afero.Fs, maxTimeout
106106
"session": ssh.DefaultSessionHandler,
107107
},
108108
ConnectionFailedCallback: func(_ net.Conn, err error) {
109-
s.logger.Info(ctx, "ssh connection ended", slog.Error(err))
109+
s.logger.Warn(ctx, "ssh connection failed", slog.Error(err))
110+
metricConnectionFailedCallback.Add(1)
110111
},
111112
Handler: s.sessionHandler,
112113
HostSigners: []ssh.Signer{randomSigner},
@@ -115,16 +116,19 @@ func NewServer(ctx context.Context, logger slog.Logger, fs afero.Fs, maxTimeout
115116
s.logger.Debug(ctx, "local port forward",
116117
slog.F("destination-host", destinationHost),
117118
slog.F("destination-port", destinationPort))
119+
metricLocalPortForwardingCallback.Add(1)
118120
return true
119121
},
120122
PtyCallback: func(ctx ssh.Context, pty ssh.Pty) bool {
123+
metricPtyCallback.Add(1)
121124
return true
122125
},
123126
ReversePortForwardingCallback: func(ctx ssh.Context, bindHost string, bindPort uint32) bool {
124127
// Allow reverse port forwarding all!
125128
s.logger.Debug(ctx, "local port forward",
126129
slog.F("bind-host", bindHost),
127130
slog.F("bind-port", bindPort))
131+
metricReversePortForwardingCallback.Add(1)
128132
return true
129133
},
130134
RequestHandlers: map[string]ssh.RequestHandler{
@@ -205,6 +209,7 @@ func (s *Server) sessionHandler(session ssh.Session) {
205209
s.logger.Warn(ctx, "ssh session failed", slog.Error(err))
206210
// This exit code is designed to be unlikely to be confused for a legit exit code
207211
// from the process.
212+
metricsSessionError.Add(1)
208213
_ = session.Exit(MagicSessionErrorCode)
209214
return
210215
}
@@ -238,12 +243,14 @@ func (s *Server) sessionStart(session ssh.Session, extraEnv []string) (retErr er
238243

239244
cmd, err := s.CreateCommand(ctx, session.RawCommand(), env)
240245
if err != nil {
246+
metricsAgentCreateCommandError.Add(1)
241247
return err
242248
}
243249

244250
if ssh.AgentRequested(session) {
245251
l, err := ssh.NewAgentListener()
246252
if err != nil {
253+
metricsAgentListenerError.Add(1)
247254
return xerrors.Errorf("new agent listener: %w", err)
248255
}
249256
defer l.Close()
@@ -259,20 +266,27 @@ func (s *Server) sessionStart(session ssh.Session, extraEnv []string) (retErr er
259266
}
260267

261268
func startNonPTYSession(session ssh.Session, cmd *exec.Cmd) error {
269+
metricsStartNonPTYSession.Add(1)
270+
262271
cmd.Stdout = session
263272
cmd.Stderr = session.Stderr()
264273
// This blocks forever until stdin is received if we don't
265274
// use StdinPipe. It's unknown what causes this.
266275
stdinPipe, err := cmd.StdinPipe()
267276
if err != nil {
277+
metricsNonPTYStdinPipeError.Add(1)
268278
return xerrors.Errorf("create stdin pipe: %w", err)
269279
}
270280
go func() {
271-
_, _ = io.Copy(stdinPipe, session)
281+
_, err := io.Copy(stdinPipe, session)
282+
if err != nil {
283+
metricsNonPTYStdinIoCopyError.Add(1)
284+
}
272285
_ = stdinPipe.Close()
273286
}()
274287
err = cmd.Start()
275288
if err != nil {
289+
metricsNonPTYCmdStartError.Add(1)
276290
return xerrors.Errorf("start: %w", err)
277291
}
278292
return cmd.Wait()
@@ -288,6 +302,8 @@ type ptySession interface {
288302
}
289303

290304
func (s *Server) startPTYSession(session ptySession, cmd *pty.Cmd, sshPty ssh.Pty, windowSize <-chan ssh.Window) (retErr error) {
305+
metricsStartPTYSession.Add(1)
306+
291307
ctx := session.Context()
292308
// Disable minimal PTY emulation set by gliderlabs/ssh (NL-to-CRNL).
293309
// See https://github.com/coder/coder/issues/3371.
@@ -299,6 +315,7 @@ func (s *Server) startPTYSession(session ptySession, cmd *pty.Cmd, sshPty ssh.Pt
299315
err := showMOTD(session, manifest.MOTDFile)
300316
if err != nil {
301317
s.logger.Error(ctx, "show MOTD", slog.Error(err))
318+
metricsPTYMotdError.Add(1)
302319
}
303320
} else {
304321
s.logger.Warn(ctx, "metadata lookup failed, unable to show MOTD")
@@ -313,12 +330,14 @@ func (s *Server) startPTYSession(session ptySession, cmd *pty.Cmd, sshPty ssh.Pt
313330
pty.WithLogger(slog.Stdlib(ctx, s.logger, slog.LevelInfo)),
314331
))
315332
if err != nil {
333+
metricsPTYCmdStartError.Add(1)
316334
return xerrors.Errorf("start command: %w", err)
317335
}
318336
defer func() {
319337
closeErr := ptty.Close()
320338
if closeErr != nil {
321339
s.logger.Warn(ctx, "failed to close tty", slog.Error(closeErr))
340+
metricsPTYCloseError.Add(1)
322341
if retErr == nil {
323342
retErr = closeErr
324343
}
@@ -330,12 +349,16 @@ func (s *Server) startPTYSession(session ptySession, cmd *pty.Cmd, sshPty ssh.Pt
330349
// If the pty is closed, then command has exited, no need to log.
331350
if resizeErr != nil && !errors.Is(resizeErr, pty.ErrClosed) {
332351
s.logger.Warn(ctx, "failed to resize tty", slog.Error(resizeErr))
352+
metricsPTYResizeError.Add(1)
333353
}
334354
}
335355
}()
336356

337357
go func() {
338-
_, _ = io.Copy(ptty.InputWriter(), session)
358+
_, err := io.Copy(ptty.InputWriter(), session)
359+
if err != nil {
360+
metricsPTYInputIoCopyError.Add(1)
361+
}
339362
}()
340363

341364
// We need to wait for the command output to finish copying. It's safe to
@@ -349,6 +372,7 @@ func (s *Server) startPTYSession(session ptySession, cmd *pty.Cmd, sshPty ssh.Pt
349372
n, err := io.Copy(session, ptty.OutputReader())
350373
s.logger.Debug(ctx, "copy output done", slog.F("bytes", n), slog.Error(err))
351374
if err != nil {
375+
metricsPTYOutputIoCopyError.Add(1)
352376
return xerrors.Errorf("copy error: %w", err)
353377
}
354378
// We've gotten all the output, but we need to wait for the process to
@@ -360,6 +384,7 @@ func (s *Server) startPTYSession(session ptySession, cmd *pty.Cmd, sshPty ssh.Pt
360384
// and not something to be concerned about. But, if it's something else, we should log it.
361385
if err != nil && !xerrors.As(err, &exitErr) {
362386
s.logger.Warn(ctx, "wait error", slog.Error(err))
387+
metricsPTYWaitError.Add(1)
363388
}
364389
if err != nil {
365390
return xerrors.Errorf("process wait: %w", err)
@@ -368,6 +393,8 @@ func (s *Server) startPTYSession(session ptySession, cmd *pty.Cmd, sshPty ssh.Pt
368393
}
369394

370395
func (s *Server) sftpHandler(session ssh.Session) {
396+
metricSftpHandler.Add(1)
397+
371398
ctx := session.Context()
372399

373400
// Typically sftp sessions don't request a TTY, but if they do,
@@ -407,6 +434,7 @@ func (s *Server) sftpHandler(session ssh.Session) {
407434
return
408435
}
409436
s.logger.Warn(ctx, "sftp server closed with error", slog.Error(err))
437+
metricSftpServerError.Add(1)
410438
_ = session.Exit(1)
411439
}
412440

agent/agentssh/metrics.go

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
package agentssh
2+
3+
import "tailscale.com/util/clientmetric"
4+
5+
var (
6+
// SSH callbacks
7+
metricConnectionFailedCallback = clientmetric.NewCounter("ssh_connection_failed_callback")
8+
metricLocalPortForwardingCallback = clientmetric.NewCounter("ssh_local_port_forwarding_callback")
9+
metricPtyCallback = clientmetric.NewCounter("ssh_pty_callback")
10+
metricReversePortForwardingCallback = clientmetric.NewCounter("ssh_reverse_port_forwarding_callback")
11+
metricX11Callback = clientmetric.NewCounter("ssh_x11_callback")
12+
13+
// Agent sessions
14+
metricsAgentCreateCommandError = clientmetric.NewCounter("ssh_agent_create_command_error")
15+
metricsAgentListenerError = clientmetric.NewCounter("ssh_agent_listener_error")
16+
metricsStartPTYSession = clientmetric.NewCounter("ssh_agent_start_pty_session")
17+
metricsStartNonPTYSession = clientmetric.NewCounter("ssh_agent_start_non_pty_session")
18+
metricsSessionError = clientmetric.NewCounter("ssh_session_error")
19+
20+
// Non-PTY sessions
21+
metricsNonPTYStdinPipeError = clientmetric.NewCounter("ssh_non_pty_stdin_pipe_error")
22+
metricsNonPTYStdinIoCopyError = clientmetric.NewCounter("ssh_non_pty_stdin_io_copy_error")
23+
metricsNonPTYCmdStartError = clientmetric.NewCounter("ssh_non_pty_cmd_start_error")
24+
25+
// PTY sessions
26+
metricsPTYMotdError = clientmetric.NewCounter("ssh_pty_motd_error")
27+
metricsPTYCmdStartError = clientmetric.NewCounter("ssh_pty_cmd_start_error")
28+
metricsPTYCloseError = clientmetric.NewCounter("ssh_pty_close_error")
29+
metricsPTYResizeError = clientmetric.NewCounter("ssh_pty_resize_error")
30+
metricsPTYInputIoCopyError = clientmetric.NewCounter("ssh_pty_input_io_copy_error")
31+
metricsPTYOutputIoCopyError = clientmetric.NewCounter("ssh_pty_output_io_copy_error")
32+
metricsPTYWaitError = clientmetric.NewCounter("ssh_pty_wait_error")
33+
34+
// SFTP
35+
metricSftpHandler = clientmetric.NewCounter("ssh_sftp_handler")
36+
metricSftpServerError = clientmetric.NewCounter("ssh_sftp_server_error")
37+
38+
// X11
39+
metricX11SocketDirError = clientmetric.NewCounter("ssh_x11_socket_dir_error")
40+
metricX11XauthorityError = clientmetric.NewCounter("ssh_x11_xauthority_error")
41+
)

agent/agentssh/x11.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ import (
2424
// x11Callback is called when the client requests X11 forwarding.
2525
// It adds an Xauthority entry to the Xauthority file.
2626
func (s *Server) x11Callback(ctx ssh.Context, x11 ssh.X11) bool {
27+
metricX11Callback.Add(1)
28+
2729
hostname, err := os.Hostname()
2830
if err != nil {
2931
s.logger.Warn(ctx, "failed to get hostname", slog.Error(err))
@@ -33,12 +35,14 @@ func (s *Server) x11Callback(ctx ssh.Context, x11 ssh.X11) bool {
3335
err = s.fs.MkdirAll(s.x11SocketDir, 0o700)
3436
if err != nil {
3537
s.logger.Warn(ctx, "failed to make the x11 socket dir", slog.F("dir", s.x11SocketDir), slog.Error(err))
38+
metricX11SocketDirError.Add(1)
3639
return false
3740
}
3841

3942
err = addXauthEntry(ctx, s.fs, hostname, strconv.Itoa(int(x11.ScreenNumber)), x11.AuthProtocol, x11.AuthCookie)
4043
if err != nil {
4144
s.logger.Warn(ctx, "failed to add Xauthority entry", slog.Error(err))
45+
metricX11XauthorityError.Add(1)
4246
return false
4347
}
4448
return true

0 commit comments

Comments
 (0)