Skip to content

Commit 5887ee8

Browse files
committed
WIP
1 parent f05466c commit 5887ee8

File tree

5 files changed

+88
-236
lines changed

5 files changed

+88
-236
lines changed

agent/agent.go

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1019,7 +1019,6 @@ func (a *agent) handleReconnectingPTY(ctx context.Context, logger slog.Logger, m
10191019
logger.Debug(ctx, "session error after agent close", slog.Error(err))
10201020
} else {
10211021
logger.Error(ctx, "session error", slog.Error(err))
1022-
a.metrics.handlerError.Add(1)
10231022
}
10241023
}
10251024
logger.Debug(ctx, "session closed")
@@ -1039,7 +1038,7 @@ func (a *agent) handleReconnectingPTY(ctx context.Context, logger slog.Logger, m
10391038
// Empty command will default to the users shell!
10401039
cmd, err := a.sshServer.CreateCommand(ctx, msg.Command, nil)
10411040
if err != nil {
1042-
a.metrics.createCommandError.Add(1)
1041+
a.metrics.reconnectingPTYErrors.WithLabelValues("create_command").Add(1)
10431042
return xerrors.Errorf("create command: %w", err)
10441043
}
10451044
cmd.Env = append(cmd.Env, "TERM=xterm-256color")
@@ -1052,7 +1051,7 @@ func (a *agent) handleReconnectingPTY(ctx context.Context, logger slog.Logger, m
10521051

10531052
ptty, process, err := pty.Start(cmd)
10541053
if err != nil {
1055-
a.metrics.cmdStartError.Add(1)
1054+
a.metrics.reconnectingPTYErrors.WithLabelValues("start_command").Add(1)
10561055
return xerrors.Errorf("start command: %w", err)
10571056
}
10581057

@@ -1083,7 +1082,7 @@ func (a *agent) handleReconnectingPTY(ctx context.Context, logger slog.Logger, m
10831082
logger.Debug(ctx, "unable to read pty output, command exited?", slog.Error(err))
10841083
} else {
10851084
logger.Warn(ctx, "unable to read pty output, command exited?", slog.Error(err))
1086-
a.metrics.outputReaderError.Add(1)
1085+
a.metrics.reconnectingPTYErrors.WithLabelValues("output_reader").Add(1)
10871086
}
10881087
break
10891088
}
@@ -1104,7 +1103,7 @@ func (a *agent) handleReconnectingPTY(ctx context.Context, logger slog.Logger, m
11041103
slog.F("other_conn_id", cid),
11051104
slog.Error(err),
11061105
)
1107-
a.metrics.writeError.Add(1)
1106+
a.metrics.reconnectingPTYErrors.WithLabelValues("write").Add(1)
11081107
}
11091108
}
11101109
rpty.activeConnsMutex.Unlock()
@@ -1124,7 +1123,7 @@ func (a *agent) handleReconnectingPTY(ctx context.Context, logger slog.Logger, m
11241123
if err != nil {
11251124
// We can continue after this, it's not fatal!
11261125
logger.Error(ctx, "resize", slog.Error(err))
1127-
a.metrics.resizeError.Add(1)
1126+
a.metrics.reconnectingPTYErrors.WithLabelValues("resize").Add(1)
11281127
}
11291128
// Write any previously stored data for the TTY.
11301129
rpty.circularBufferMutex.RLock()
@@ -1137,7 +1136,7 @@ func (a *agent) handleReconnectingPTY(ctx context.Context, logger slog.Logger, m
11371136
// while also holding circularBufferMutex seems dangerous.
11381137
_, err = conn.Write(prevBuf)
11391138
if err != nil {
1140-
a.metrics.writeError.Add(1)
1139+
a.metrics.reconnectingPTYErrors.WithLabelValues("write").Add(1)
11411140
return xerrors.Errorf("write buffer to conn: %w", err)
11421141
}
11431142
// Multiple connections to the same TTY are permitted.
@@ -1188,7 +1187,7 @@ func (a *agent) handleReconnectingPTY(ctx context.Context, logger slog.Logger, m
11881187
_, err = rpty.ptty.InputWriter().Write([]byte(req.Data))
11891188
if err != nil {
11901189
logger.Warn(ctx, "write to pty", slog.Error(err))
1191-
a.metrics.inputWriterError.Add(1)
1190+
a.metrics.reconnectingPTYErrors.WithLabelValues("input_writer").Add(1)
11921191
return nil
11931192
}
11941193
// Check if a resize needs to happen!
@@ -1199,7 +1198,7 @@ func (a *agent) handleReconnectingPTY(ctx context.Context, logger slog.Logger, m
11991198
if err != nil {
12001199
// We can continue after this, it's not fatal!
12011200
logger.Error(ctx, "resize", slog.Error(err))
1202-
a.metrics.resizeError.Add(1)
1201+
a.metrics.reconnectingPTYErrors.WithLabelValues("resize").Add(1)
12031202
}
12041203
}
12051204
}

agent/agentssh/agentssh.go

Lines changed: 22 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -201,8 +201,7 @@ func (s *Server) sessionHandler(session ssh.Session) {
201201
return
202202
}
203203

204-
m := metricsForSession(s.metrics.sessions, magicType(session))
205-
err := s.sessionStart(session, m, extraEnv)
204+
err := s.sessionStart(session, extraEnv)
206205
var exitError *exec.ExitError
207206
if xerrors.As(err, &exitError) {
208207
s.logger.Warn(ctx, "ssh session returned", slog.Error(exitError))
@@ -229,7 +228,7 @@ func magicType(session ssh.Session) string {
229228
return ""
230229
}
231230

232-
func (s *Server) sessionStart(session ssh.Session, m sessionMetricsObject, extraEnv []string) (retErr error) {
231+
func (s *Server) sessionStart(session ssh.Session, extraEnv []string) (retErr error) {
233232
ctx := session.Context()
234233
env := append(session.Environ(), extraEnv...)
235234
var magicType string
@@ -254,16 +253,18 @@ func (s *Server) sessionStart(session ssh.Session, m sessionMetricsObject, extra
254253
s.logger.Warn(ctx, "invalid magic ssh session type specified", slog.F("type", magicType))
255254
}
256255

256+
magicTypeLabel := magicTypeMetricLabel(magicType)
257+
257258
cmd, err := s.CreateCommand(ctx, session.RawCommand(), env)
258259
if err != nil {
259-
m.agentCreateCommandError.Add(1)
260+
s.metrics.sessionErrors.WithLabelValues(magicTypeLabel, "create_command").Add(1)
260261
return err
261262
}
262263

263264
if ssh.AgentRequested(session) {
264265
l, err := ssh.NewAgentListener()
265266
if err != nil {
266-
m.agentListenerError.Add(1)
267+
s.metrics.sessionErrors.WithLabelValues(magicTypeLabel, "listener").Add(1)
267268
return xerrors.Errorf("new agent listener: %w", err)
268269
}
269270
defer l.Close()
@@ -273,33 +274,33 @@ func (s *Server) sessionStart(session ssh.Session, m sessionMetricsObject, extra
273274

274275
sshPty, windowSize, isPty := session.Pty()
275276
if isPty {
276-
return s.startPTYSession(session, m, cmd, sshPty, windowSize)
277+
return s.startPTYSession(session, magicTypeLabel, cmd, sshPty, windowSize)
277278
}
278-
return startNonPTYSession(session, m, cmd.AsExec())
279+
return s.startNonPTYSession(session, magicTypeLabel, cmd.AsExec())
279280
}
280281

281-
func startNonPTYSession(session ssh.Session, m sessionMetricsObject, cmd *exec.Cmd) error {
282-
m.startNonPTYSession.Add(1)
282+
func (s *Server) startNonPTYSession(session ssh.Session, magicTypeLabel string, cmd *exec.Cmd) error {
283+
s.metrics.sessionsTotal.WithLabelValues(magicTypeLabel, "no").Add(1)
283284

284285
cmd.Stdout = session
285286
cmd.Stderr = session.Stderr()
286287
// This blocks forever until stdin is received if we don't
287288
// use StdinPipe. It's unknown what causes this.
288289
stdinPipe, err := cmd.StdinPipe()
289290
if err != nil {
290-
m.nonPTYStdinPipeError.Add(1)
291+
s.metrics.sessionErrors.WithLabelValues(magicTypeLabel, "no", "stdin_pipe").Add(1)
291292
return xerrors.Errorf("create stdin pipe: %w", err)
292293
}
293294
go func() {
294295
_, err := io.Copy(stdinPipe, session)
295296
if err != nil {
296-
m.nonPTYStdinIoCopyError.Add(1)
297+
s.metrics.sessionErrors.WithLabelValues(magicTypeLabel, "no", "stdin_io_copy").Add(1)
297298
}
298299
_ = stdinPipe.Close()
299300
}()
300301
err = cmd.Start()
301302
if err != nil {
302-
m.nonPTYCmdStartError.Add(1)
303+
s.metrics.sessionErrors.WithLabelValues(magicTypeLabel, "no", "start_command").Add(1)
303304
return xerrors.Errorf("start: %w", err)
304305
}
305306
return cmd.Wait()
@@ -314,8 +315,8 @@ type ptySession interface {
314315
RawCommand() string
315316
}
316317

317-
func (s *Server) startPTYSession(session ptySession, m sessionMetricsObject, cmd *pty.Cmd, sshPty ssh.Pty, windowSize <-chan ssh.Window) (retErr error) {
318-
m.startPTYSession.Add(1)
318+
func (s *Server) startPTYSession(session ptySession, magicTypeLabel string, cmd *pty.Cmd, sshPty ssh.Pty, windowSize <-chan ssh.Window) (retErr error) {
319+
s.metrics.sessionsTotal.WithLabelValues(magicTypeLabel, "yes").Add(1)
319320

320321
ctx := session.Context()
321322
// Disable minimal PTY emulation set by gliderlabs/ssh (NL-to-CRNL).
@@ -328,7 +329,7 @@ func (s *Server) startPTYSession(session ptySession, m sessionMetricsObject, cmd
328329
err := showMOTD(session, manifest.MOTDFile)
329330
if err != nil {
330331
s.logger.Error(ctx, "show MOTD", slog.Error(err))
331-
m.ptyMotdError.Add(1)
332+
s.metrics.sessionErrors.WithLabelValues(magicTypeLabel, "yes", "motd").Add(1)
332333
}
333334
} else {
334335
s.logger.Warn(ctx, "metadata lookup failed, unable to show MOTD")
@@ -343,14 +344,14 @@ func (s *Server) startPTYSession(session ptySession, m sessionMetricsObject, cmd
343344
pty.WithLogger(slog.Stdlib(ctx, s.logger, slog.LevelInfo)),
344345
))
345346
if err != nil {
346-
m.ptyCmdStartError.Add(1)
347+
s.metrics.sessionErrors.WithLabelValues(magicTypeLabel, "yes", "start_command").Add(1)
347348
return xerrors.Errorf("start command: %w", err)
348349
}
349350
defer func() {
350351
closeErr := ptty.Close()
351352
if closeErr != nil {
352353
s.logger.Warn(ctx, "failed to close tty", slog.Error(closeErr))
353-
m.ptyCloseError.Add(1)
354+
s.metrics.sessionErrors.WithLabelValues(magicTypeLabel, "yes", "close").Add(1)
354355
if retErr == nil {
355356
retErr = closeErr
356357
}
@@ -362,15 +363,15 @@ func (s *Server) startPTYSession(session ptySession, m sessionMetricsObject, cmd
362363
// If the pty is closed, then command has exited, no need to log.
363364
if resizeErr != nil && !errors.Is(resizeErr, pty.ErrClosed) {
364365
s.logger.Warn(ctx, "failed to resize tty", slog.Error(resizeErr))
365-
m.ptyResizeError.Add(1)
366+
s.metrics.sessionErrors.WithLabelValues(magicTypeLabel, "yes", "resize").Add(1)
366367
}
367368
}
368369
}()
369370

370371
go func() {
371372
_, err := io.Copy(ptty.InputWriter(), session)
372373
if err != nil {
373-
m.ptyInputIoCopyError.Add(1)
374+
s.metrics.sessionErrors.WithLabelValues(magicTypeLabel, "yes", "input_io_copy").Add(1)
374375
}
375376
}()
376377

@@ -385,7 +386,7 @@ func (s *Server) startPTYSession(session ptySession, m sessionMetricsObject, cmd
385386
n, err := io.Copy(session, ptty.OutputReader())
386387
s.logger.Debug(ctx, "copy output done", slog.F("bytes", n), slog.Error(err))
387388
if err != nil {
388-
m.ptyOutputIoCopyError.Add(1)
389+
s.metrics.sessionErrors.WithLabelValues(magicTypeLabel, "yes", "output_io_copy").Add(1)
389390
return xerrors.Errorf("copy error: %w", err)
390391
}
391392
// We've gotten all the output, but we need to wait for the process to
@@ -397,7 +398,7 @@ func (s *Server) startPTYSession(session ptySession, m sessionMetricsObject, cmd
397398
// and not something to be concerned about. But, if it's something else, we should log it.
398399
if err != nil && !xerrors.As(err, &exitErr) {
399400
s.logger.Warn(ctx, "wait error", slog.Error(err))
400-
m.ptyWaitError.Add(1)
401+
s.metrics.sessionErrors.WithLabelValues(magicTypeLabel, "yes", "wait").Add(1)
401402
}
402403
if err != nil {
403404
return xerrors.Errorf("process wait: %w", err)

0 commit comments

Comments
 (0)