From 99285218f64334a1e926f03e196e2f4a79f51850 Mon Sep 17 00:00:00 2001 From: Michael Suchacz <203725896+ibetitsmike@users.noreply.github.com> Date: Tue, 22 Apr 2025 05:52:14 +0000 Subject: [PATCH] feat: log long-lived connections acceptance (#17219) Closes #16904 --- Makefile | 8 +- coderd/httpmw/logger.go | 97 +++++++++---- coderd/httpmw/logger_internal_test.go | 174 ++++++++++++++++++++++++ coderd/httpmw/loggermock/loggermock.go | 70 ++++++++++ coderd/inboxnotifications.go | 3 + coderd/provisionerjobs.go | 3 + coderd/provisionerjobs_internal_test.go | 7 + coderd/workspaceagents.go | 9 ++ enterprise/coderd/provisionerdaemons.go | 4 + 9 files changed, 351 insertions(+), 24 deletions(-) create mode 100644 coderd/httpmw/logger_internal_test.go create mode 100644 coderd/httpmw/loggermock/loggermock.go diff --git a/Makefile b/Makefile index e8cdcd3a3a1ba..6486f5cbed5fa 100644 --- a/Makefile +++ b/Makefile @@ -581,7 +581,8 @@ GEN_FILES := \ $(TAILNETTEST_MOCKS) \ coderd/database/pubsub/psmock/psmock.go \ agent/agentcontainers/acmock/acmock.go \ - agent/agentcontainers/dcspec/dcspec_gen.go + agent/agentcontainers/dcspec/dcspec_gen.go \ + coderd/httpmw/loggermock/loggermock.go # all gen targets should be added here and to gen/mark-fresh gen: gen/db gen/golden-files $(GEN_FILES) @@ -630,6 +631,7 @@ gen/mark-fresh: coderd/database/pubsub/psmock/psmock.go \ agent/agentcontainers/acmock/acmock.go \ agent/agentcontainers/dcspec/dcspec_gen.go \ + coderd/httpmw/loggermock/loggermock.go \ " for file in $$files; do @@ -669,6 +671,10 @@ agent/agentcontainers/acmock/acmock.go: agent/agentcontainers/containers.go go generate ./agent/agentcontainers/acmock/ touch "$@" +coderd/httpmw/loggermock/loggermock.go: coderd/httpmw/logger.go + go generate ./coderd/httpmw/loggermock/ + touch "$@" + agent/agentcontainers/dcspec/dcspec_gen.go: \ node_modules/.installed \ agent/agentcontainers/dcspec/devContainer.base.schema.json \ diff --git a/coderd/httpmw/logger.go b/coderd/httpmw/logger.go index 79e95cf859d8e..0da964407b3e4 100644 --- a/coderd/httpmw/logger.go +++ b/coderd/httpmw/logger.go @@ -35,42 +35,93 @@ func Logger(log slog.Logger) func(next http.Handler) http.Handler { slog.F("start", start), ) - next.ServeHTTP(sw, r) + logContext := NewRequestLogger(httplog, r.Method, start) - end := time.Now() + ctx := WithRequestLogger(r.Context(), logContext) + + next.ServeHTTP(sw, r.WithContext(ctx)) // Don't log successful health check requests. if r.URL.Path == "/api/v2" && sw.Status == http.StatusOK { return } - httplog = httplog.With( - slog.F("took", end.Sub(start)), - slog.F("status_code", sw.Status), - slog.F("latency_ms", float64(end.Sub(start)/time.Millisecond)), - ) - - // For status codes 400 and higher we + // For status codes 500 and higher we // want to log the response body. if sw.Status >= http.StatusInternalServerError { - httplog = httplog.With( + logContext.WithFields( slog.F("response_body", string(sw.ResponseBody())), ) } - // We should not log at level ERROR for 5xx status codes because 5xx - // includes proxy errors etc. It also causes slogtest to fail - // instantly without an error message by default. - logLevelFn := httplog.Debug - if sw.Status >= http.StatusInternalServerError { - logLevelFn = httplog.Warn - } - - // We already capture most of this information in the span (minus - // the response body which we don't want to capture anyways). - tracing.RunWithoutSpan(r.Context(), func(ctx context.Context) { - logLevelFn(ctx, r.Method) - }) + logContext.WriteLog(r.Context(), sw.Status) }) } } + +type RequestLogger interface { + WithFields(fields ...slog.Field) + WriteLog(ctx context.Context, status int) +} + +type SlogRequestLogger struct { + log slog.Logger + written bool + message string + start time.Time +} + +var _ RequestLogger = &SlogRequestLogger{} + +func NewRequestLogger(log slog.Logger, message string, start time.Time) RequestLogger { + return &SlogRequestLogger{ + log: log, + written: false, + message: message, + start: start, + } +} + +func (c *SlogRequestLogger) WithFields(fields ...slog.Field) { + c.log = c.log.With(fields...) +} + +func (c *SlogRequestLogger) WriteLog(ctx context.Context, status int) { + if c.written { + return + } + c.written = true + end := time.Now() + + logger := c.log.With( + slog.F("took", end.Sub(c.start)), + slog.F("status_code", status), + slog.F("latency_ms", float64(end.Sub(c.start)/time.Millisecond)), + ) + // We already capture most of this information in the span (minus + // the response body which we don't want to capture anyways). + tracing.RunWithoutSpan(ctx, func(ctx context.Context) { + // We should not log at level ERROR for 5xx status codes because 5xx + // includes proxy errors etc. It also causes slogtest to fail + // instantly without an error message by default. + if status >= http.StatusInternalServerError { + logger.Warn(ctx, c.message) + } else { + logger.Debug(ctx, c.message) + } + }) +} + +type logContextKey struct{} + +func WithRequestLogger(ctx context.Context, rl RequestLogger) context.Context { + return context.WithValue(ctx, logContextKey{}, rl) +} + +func RequestLoggerFromContext(ctx context.Context) RequestLogger { + val := ctx.Value(logContextKey{}) + if logCtx, ok := val.(RequestLogger); ok { + return logCtx + } + return nil +} diff --git a/coderd/httpmw/logger_internal_test.go b/coderd/httpmw/logger_internal_test.go new file mode 100644 index 0000000000000..d3035e50d98c9 --- /dev/null +++ b/coderd/httpmw/logger_internal_test.go @@ -0,0 +1,174 @@ +package httpmw + +import ( + "context" + "net/http" + "net/http/httptest" + "sync" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "cdr.dev/slog" + "github.com/coder/coder/v2/coderd/tracing" + "github.com/coder/coder/v2/testutil" + "github.com/coder/websocket" +) + +func TestRequestLogger_WriteLog(t *testing.T) { + t.Parallel() + ctx := context.Background() + + sink := &fakeSink{} + logger := slog.Make(sink) + logger = logger.Leveled(slog.LevelDebug) + logCtx := NewRequestLogger(logger, "GET", time.Now()) + + // Add custom fields + logCtx.WithFields( + slog.F("custom_field", "custom_value"), + ) + + // Write log for 200 status + logCtx.WriteLog(ctx, http.StatusOK) + + require.Len(t, sink.entries, 1, "log was written twice") + + require.Equal(t, sink.entries[0].Message, "GET") + + require.Equal(t, sink.entries[0].Fields[0].Value, "custom_value") + + // Attempt to write again (should be skipped). + logCtx.WriteLog(ctx, http.StatusInternalServerError) + + require.Len(t, sink.entries, 1, "log was written twice") +} + +func TestLoggerMiddleware_SingleRequest(t *testing.T) { + t.Parallel() + + sink := &fakeSink{} + logger := slog.Make(sink) + logger = logger.Leveled(slog.LevelDebug) + + ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitShort) + defer cancel() + + // Create a test handler to simulate an HTTP request + testHandler := http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) { + rw.WriteHeader(http.StatusOK) + _, _ = rw.Write([]byte("OK")) + }) + + // Wrap the test handler with the Logger middleware + loggerMiddleware := Logger(logger) + wrappedHandler := loggerMiddleware(testHandler) + + // Create a test HTTP request + req, err := http.NewRequestWithContext(ctx, http.MethodGet, "/test-path", nil) + require.NoError(t, err, "failed to create request") + + sw := &tracing.StatusWriter{ResponseWriter: httptest.NewRecorder()} + + // Serve the request + wrappedHandler.ServeHTTP(sw, req) + + require.Len(t, sink.entries, 1, "log was written twice") + + require.Equal(t, sink.entries[0].Message, "GET") + + fieldsMap := make(map[string]interface{}) + for _, field := range sink.entries[0].Fields { + fieldsMap[field.Name] = field.Value + } + + // Check that the log contains the expected fields + requiredFields := []string{"host", "path", "proto", "remote_addr", "start", "took", "status_code", "latency_ms"} + for _, field := range requiredFields { + _, exists := fieldsMap[field] + require.True(t, exists, "field %q is missing in log fields", field) + } + + require.Len(t, sink.entries[0].Fields, len(requiredFields), "log should contain only the required fields") + + // Check value of the status code + require.Equal(t, fieldsMap["status_code"], http.StatusOK) +} + +func TestLoggerMiddleware_WebSocket(t *testing.T) { + t.Parallel() + ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitShort) + defer cancel() + + sink := &fakeSink{ + newEntries: make(chan slog.SinkEntry, 2), + } + logger := slog.Make(sink) + logger = logger.Leveled(slog.LevelDebug) + done := make(chan struct{}) + wg := sync.WaitGroup{} + // Create a test handler to simulate a WebSocket connection + testHandler := http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) { + conn, err := websocket.Accept(rw, r, nil) + if !assert.NoError(t, err, "failed to accept websocket") { + return + } + defer conn.Close(websocket.StatusGoingAway, "") + + requestLgr := RequestLoggerFromContext(r.Context()) + requestLgr.WriteLog(r.Context(), http.StatusSwitchingProtocols) + // Block so we can be sure the end of the middleware isn't being called. + wg.Wait() + }) + + // Wrap the test handler with the Logger middleware + loggerMiddleware := Logger(logger) + wrappedHandler := loggerMiddleware(testHandler) + + // RequestLogger expects the ResponseWriter to be *tracing.StatusWriter + customHandler := http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) { + defer close(done) + sw := &tracing.StatusWriter{ResponseWriter: rw} + wrappedHandler.ServeHTTP(sw, r) + }) + + srv := httptest.NewServer(customHandler) + defer srv.Close() + wg.Add(1) + // nolint: bodyclose + conn, _, err := websocket.Dial(ctx, srv.URL, nil) + require.NoError(t, err, "failed to dial WebSocket") + defer conn.Close(websocket.StatusNormalClosure, "") + + // Wait for the log from within the handler + newEntry := testutil.RequireRecvCtx(ctx, t, sink.newEntries) + require.Equal(t, newEntry.Message, "GET") + + // Signal the websocket handler to return (and read to handle the close frame) + wg.Done() + _, _, err = conn.Read(ctx) + require.ErrorAs(t, err, &websocket.CloseError{}, "websocket read should fail with close error") + + // Wait for the request to finish completely and verify we only logged once + _ = testutil.RequireRecvCtx(ctx, t, done) + require.Len(t, sink.entries, 1, "log was written twice") +} + +type fakeSink struct { + entries []slog.SinkEntry + newEntries chan slog.SinkEntry +} + +func (s *fakeSink) LogEntry(_ context.Context, e slog.SinkEntry) { + s.entries = append(s.entries, e) + if s.newEntries != nil { + select { + case s.newEntries <- e: + default: + } + } +} + +func (*fakeSink) Sync() {} diff --git a/coderd/httpmw/loggermock/loggermock.go b/coderd/httpmw/loggermock/loggermock.go new file mode 100644 index 0000000000000..47818ca11d9e6 --- /dev/null +++ b/coderd/httpmw/loggermock/loggermock.go @@ -0,0 +1,70 @@ +// Code generated by MockGen. DO NOT EDIT. +// Source: github.com/coder/coder/v2/coderd/httpmw (interfaces: RequestLogger) +// +// Generated by this command: +// +// mockgen -destination=loggermock/loggermock.go -package=loggermock . RequestLogger +// + +// Package loggermock is a generated GoMock package. +package loggermock + +import ( + context "context" + reflect "reflect" + + slog "cdr.dev/slog" + gomock "go.uber.org/mock/gomock" +) + +// MockRequestLogger is a mock of RequestLogger interface. +type MockRequestLogger struct { + ctrl *gomock.Controller + recorder *MockRequestLoggerMockRecorder + isgomock struct{} +} + +// MockRequestLoggerMockRecorder is the mock recorder for MockRequestLogger. +type MockRequestLoggerMockRecorder struct { + mock *MockRequestLogger +} + +// NewMockRequestLogger creates a new mock instance. +func NewMockRequestLogger(ctrl *gomock.Controller) *MockRequestLogger { + mock := &MockRequestLogger{ctrl: ctrl} + mock.recorder = &MockRequestLoggerMockRecorder{mock} + return mock +} + +// EXPECT returns an object that allows the caller to indicate expected use. +func (m *MockRequestLogger) EXPECT() *MockRequestLoggerMockRecorder { + return m.recorder +} + +// WithFields mocks base method. +func (m *MockRequestLogger) WithFields(fields ...slog.Field) { + m.ctrl.T.Helper() + varargs := []any{} + for _, a := range fields { + varargs = append(varargs, a) + } + m.ctrl.Call(m, "WithFields", varargs...) +} + +// WithFields indicates an expected call of WithFields. +func (mr *MockRequestLoggerMockRecorder) WithFields(fields ...any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "WithFields", reflect.TypeOf((*MockRequestLogger)(nil).WithFields), fields...) +} + +// WriteLog mocks base method. +func (m *MockRequestLogger) WriteLog(ctx context.Context, status int) { + m.ctrl.T.Helper() + m.ctrl.Call(m, "WriteLog", ctx, status) +} + +// WriteLog indicates an expected call of WriteLog. +func (mr *MockRequestLoggerMockRecorder) WriteLog(ctx, status any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "WriteLog", reflect.TypeOf((*MockRequestLogger)(nil).WriteLog), ctx, status) +} diff --git a/coderd/inboxnotifications.go b/coderd/inboxnotifications.go index 6da047241d790..ea20c60de3cce 100644 --- a/coderd/inboxnotifications.go +++ b/coderd/inboxnotifications.go @@ -219,6 +219,9 @@ func (api *API) watchInboxNotifications(rw http.ResponseWriter, r *http.Request) encoder := wsjson.NewEncoder[codersdk.GetInboxNotificationResponse](conn, websocket.MessageText) defer encoder.Close(websocket.StatusNormalClosure) + // Log the request immediately instead of after it completes. + httpmw.RequestLoggerFromContext(ctx).WriteLog(ctx, http.StatusAccepted) + for { select { case <-ctx.Done(): diff --git a/coderd/provisionerjobs.go b/coderd/provisionerjobs.go index 47963798f4d32..335643390796f 100644 --- a/coderd/provisionerjobs.go +++ b/coderd/provisionerjobs.go @@ -554,6 +554,9 @@ func (f *logFollower) follow() { return } + // Log the request immediately instead of after it completes. + httpmw.RequestLoggerFromContext(f.ctx).WriteLog(f.ctx, http.StatusAccepted) + // no need to wait if the job is done if f.complete { return diff --git a/coderd/provisionerjobs_internal_test.go b/coderd/provisionerjobs_internal_test.go index af5a7d66a6f4c..c2c0a60c75ba0 100644 --- a/coderd/provisionerjobs_internal_test.go +++ b/coderd/provisionerjobs_internal_test.go @@ -19,6 +19,8 @@ import ( "github.com/coder/coder/v2/coderd/database/dbmock" "github.com/coder/coder/v2/coderd/database/dbtime" "github.com/coder/coder/v2/coderd/database/pubsub" + "github.com/coder/coder/v2/coderd/httpmw" + "github.com/coder/coder/v2/coderd/httpmw/loggermock" "github.com/coder/coder/v2/codersdk" "github.com/coder/coder/v2/provisionersdk" "github.com/coder/coder/v2/testutil" @@ -305,11 +307,16 @@ func Test_logFollower_EndOfLogs(t *testing.T) { JobStatus: database.ProvisionerJobStatusRunning, } + mockLogger := loggermock.NewMockRequestLogger(ctrl) + mockLogger.EXPECT().WriteLog(gomock.Any(), http.StatusAccepted).Times(1) + ctx = httpmw.WithRequestLogger(ctx, mockLogger) + // we need an HTTP server to get a websocket srv := httptest.NewServer(http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) { uut := newLogFollower(ctx, logger, mDB, ps, rw, r, job, 0) uut.follow() })) + defer srv.Close() // job was incomplete when we create the logFollower, and still incomplete when it queries diff --git a/coderd/workspaceagents.go b/coderd/workspaceagents.go index 0438b13b09278..9cb891ee7a5c8 100644 --- a/coderd/workspaceagents.go +++ b/coderd/workspaceagents.go @@ -555,6 +555,9 @@ func (api *API) workspaceAgentLogs(rw http.ResponseWriter, r *http.Request) { t := time.NewTicker(recheckInterval) defer t.Stop() + // Log the request immediately instead of after it completes. + httpmw.RequestLoggerFromContext(ctx).WriteLog(ctx, http.StatusAccepted) + go func() { defer func() { logger.Debug(ctx, "end log streaming loop") @@ -928,6 +931,9 @@ func (api *API) derpMapUpdates(rw http.ResponseWriter, r *http.Request) { encoder := wsjson.NewEncoder[*tailcfg.DERPMap](ws, websocket.MessageBinary) defer encoder.Close(websocket.StatusGoingAway) + // Log the request immediately instead of after it completes. + httpmw.RequestLoggerFromContext(ctx).WriteLog(ctx, http.StatusAccepted) + go func(ctx context.Context) { // TODO(mafredri): Is this too frequent? Use separate ping disconnect timeout? t := time.NewTicker(api.AgentConnectionUpdateFrequency) @@ -1303,6 +1309,9 @@ func (api *API) watchWorkspaceAgentMetadata(rw http.ResponseWriter, r *http.Requ sendTicker := time.NewTicker(sendInterval) defer sendTicker.Stop() + // Log the request immediately instead of after it completes. + httpmw.RequestLoggerFromContext(ctx).WriteLog(ctx, http.StatusAccepted) + // Send initial metadata. sendMetadata() diff --git a/enterprise/coderd/provisionerdaemons.go b/enterprise/coderd/provisionerdaemons.go index 5b0f0ca197743..15e3c3901ade3 100644 --- a/enterprise/coderd/provisionerdaemons.go +++ b/enterprise/coderd/provisionerdaemons.go @@ -376,6 +376,10 @@ func (api *API) provisionerDaemonServe(rw http.ResponseWriter, r *http.Request) logger.Debug(ctx, "drpc server error", slog.Error(err)) }, }) + + // Log the request immediately instead of after it completes. + httpmw.RequestLoggerFromContext(ctx).WriteLog(ctx, http.StatusAccepted) + err = server.Serve(ctx, session) srvCancel() logger.Info(ctx, "provisioner daemon disconnected", slog.Error(err))