From e59fe014c6895802c199e524c029f24a921320ad Mon Sep 17 00:00:00 2001 From: Kyle Carberry Date: Mon, 8 Aug 2022 14:09:44 +0000 Subject: [PATCH] feat: Add active users prometheus metric This allows deployments using our Prometheus export t determine the number of active users in the past hour. The interval is an hour to align with API key last used refresh times. SSH connections poll to check shutdown time, so this will be accurate even on long-running connections without dashboard requests. --- .vscode/settings.json | 3 + cli/server.go | 33 ++++--- cli/server_test.go | 55 +++++++++++ coderd/coderd.go | 7 +- coderd/httpmw/prometheus.go | 88 +++++++++--------- coderd/httpmw/prometheus_test.go | 31 +++++++ coderd/prometheusmetrics/prometheusmetrics.go | 52 +++++++++++ .../prometheusmetrics_test.go | 93 +++++++++++++++++++ 8 files changed, 307 insertions(+), 55 deletions(-) create mode 100644 coderd/httpmw/prometheus_test.go create mode 100644 coderd/prometheusmetrics/prometheusmetrics.go create mode 100644 coderd/prometheusmetrics/prometheusmetrics_test.go diff --git a/.vscode/settings.json b/.vscode/settings.json index 81a974f40a0a5..12ef68ca9e76a 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -11,6 +11,7 @@ "coderdtest", "codersdk", "cronstrue", + "databasefake", "devel", "drpc", "drpcconn", @@ -52,6 +53,7 @@ "oneof", "parameterscopeid", "pqtype", + "prometheusmetrics", "promptui", "protobuf", "provisionerd", @@ -72,6 +74,7 @@ "templateversions", "testdata", "testid", + "testutil", "tfexec", "tfjson", "tfplan", diff --git a/cli/server.go b/cli/server.go index bf645863d5c23..170afea8f984a 100644 --- a/cli/server.go +++ b/cli/server.go @@ -30,6 +30,7 @@ import ( "github.com/google/uuid" "github.com/pion/turn/v2" "github.com/pion/webrtc/v3" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/spf13/afero" "github.com/spf13/cobra" @@ -53,6 +54,7 @@ import ( "github.com/coder/coder/coderd/database/databasefake" "github.com/coder/coder/coderd/devtunnel" "github.com/coder/coder/coderd/gitsshkey" + "github.com/coder/coder/coderd/prometheusmetrics" "github.com/coder/coder/coderd/telemetry" "github.com/coder/coder/coderd/tracing" "github.com/coder/coder/coderd/turnconn" @@ -392,6 +394,26 @@ func server() *cobra.Command { defer options.Telemetry.Close() } + // This prevents the pprof import from being accidentally deleted. + _ = pprof.Handler + if pprofEnabled { + //nolint:revive + defer serveHandler(ctx, logger, nil, pprofAddress, "pprof")() + } + if promEnabled { + options.PrometheusRegistry = prometheus.NewRegistry() + closeFunc, err := prometheusmetrics.ActiveUsers(ctx, options.PrometheusRegistry, options.Database, 0) + if err != nil { + return xerrors.Errorf("register active users prometheus metric: %w", err) + } + defer closeFunc() + + //nolint:revive + defer serveHandler(ctx, logger, promhttp.InstrumentMetricHandler( + options.PrometheusRegistry, promhttp.HandlerFor(options.PrometheusRegistry, promhttp.HandlerOpts{}), + ), promAddress, "prometheus")() + } + coderAPI := coderd.New(options) defer coderAPI.Close() @@ -406,17 +428,6 @@ func server() *cobra.Command { } } - // This prevents the pprof import from being accidentally deleted. - _ = pprof.Handler - if pprofEnabled { - //nolint:revive - defer serveHandler(ctx, logger, nil, pprofAddress, "pprof")() - } - if promEnabled { - //nolint:revive - defer serveHandler(ctx, logger, promhttp.Handler(), promAddress, "prometheus")() - } - // Since errCh only has one buffered slot, all routines // sending on it must be wrapped in a select/default to // avoid leaving dangling goroutines waiting for the diff --git a/cli/server_test.go b/cli/server_test.go index ed9676f98f391..226f4a9f2eabb 100644 --- a/cli/server_test.go +++ b/cli/server_test.go @@ -1,6 +1,7 @@ package cli_test import ( + "bufio" "context" "crypto/ecdsa" "crypto/elliptic" @@ -10,6 +11,7 @@ import ( "crypto/x509/pkix" "encoding/json" "encoding/pem" + "fmt" "math/big" "net" "net/http" @@ -17,10 +19,13 @@ import ( "net/url" "os" "runtime" + "strconv" + "strings" "testing" "time" "github.com/go-chi/chi" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "go.uber.org/goleak" @@ -374,6 +379,56 @@ func TestServer(t *testing.T) { cancelFunc() <-errC }) + t.Run("Prometheus", func(t *testing.T) { + t.Parallel() + ctx, cancelFunc := context.WithCancel(context.Background()) + defer cancelFunc() + + random, err := net.Listen("tcp", "127.0.0.1:0") + require.NoError(t, err) + _ = random.Close() + tcpAddr, valid := random.Addr().(*net.TCPAddr) + require.True(t, valid) + randomPort := tcpAddr.Port + + root, cfg := clitest.New(t, + "server", + "--in-memory", + "--address", ":0", + "--provisioner-daemons", "1", + "--prometheus-enable", + "--prometheus-address", ":"+strconv.Itoa(randomPort), + "--cache-dir", t.TempDir(), + ) + serverErr := make(chan error, 1) + go func() { + serverErr <- root.ExecuteContext(ctx) + }() + _ = waitAccessURL(t, cfg) + + var res *http.Response + require.Eventually(t, func() bool { + req, err := http.NewRequestWithContext(ctx, "GET", fmt.Sprintf("http://127.0.0.1:%d", randomPort), nil) + assert.NoError(t, err) + res, err = http.DefaultClient.Do(req) + return err == nil + }, testutil.WaitShort, testutil.IntervalFast) + + scanner := bufio.NewScanner(res.Body) + hasActiveUsers := false + for scanner.Scan() { + // This metric is manually registered to be tracked in the server. That's + // why we test it's tracked here. + if strings.HasPrefix(scanner.Text(), "coderd_api_active_users_duration_hour") { + hasActiveUsers = true + continue + } + } + require.NoError(t, scanner.Err()) + require.True(t, hasActiveUsers) + cancelFunc() + <-serverErr + }) } func generateTLSCertificate(t testing.TB) (certPath, keyPath string) { diff --git a/coderd/coderd.go b/coderd/coderd.go index fdf0ee86a682d..9437a9219b045 100644 --- a/coderd/coderd.go +++ b/coderd/coderd.go @@ -16,6 +16,7 @@ import ( "github.com/go-chi/chi/v5/middleware" "github.com/klauspost/compress/zstd" "github.com/pion/webrtc/v3" + "github.com/prometheus/client_golang/prometheus" sdktrace "go.opentelemetry.io/otel/sdk/trace" "golang.org/x/xerrors" "google.golang.org/api/idtoken" @@ -58,6 +59,7 @@ type Options struct { GoogleTokenValidator *idtoken.Validator GithubOAuth2Config *GithubOAuth2Config OIDCConfig *OIDCConfig + PrometheusRegistry *prometheus.Registry ICEServers []webrtc.ICEServer SecureAuthCookie bool SSHKeygenAlgorithm gitsshkey.Algorithm @@ -87,6 +89,9 @@ func New(options *Options) *API { panic(xerrors.Errorf("rego authorize panic: %w", err)) } } + if options.PrometheusRegistry == nil { + options.PrometheusRegistry = prometheus.NewRegistry() + } siteCacheDir := options.CacheDir if siteCacheDir != "" { @@ -116,7 +121,7 @@ func New(options *Options) *API { next.ServeHTTP(middleware.NewWrapResponseWriter(w, r.ProtoMajor), r) }) }, - httpmw.Prometheus, + httpmw.Prometheus(options.PrometheusRegistry), tracing.HTTPMW(api.TracerProvider, "coderd.http"), ) diff --git a/coderd/httpmw/prometheus.go b/coderd/httpmw/prometheus.go index e03966ff9788f..acc57071f0ef0 100644 --- a/coderd/httpmw/prometheus.go +++ b/coderd/httpmw/prometheus.go @@ -12,26 +12,31 @@ import ( "github.com/prometheus/client_golang/prometheus/promauto" ) -var ( - requestsProcessed = promauto.NewCounterVec(prometheus.CounterOpts{ +func durationToFloatMs(d time.Duration) float64 { + return float64(d.Milliseconds()) +} + +func Prometheus(register prometheus.Registerer) func(http.Handler) http.Handler { + factory := promauto.With(register) + requestsProcessed := factory.NewCounterVec(prometheus.CounterOpts{ Namespace: "coderd", Subsystem: "api", Name: "requests_processed_total", Help: "The total number of processed API requests", }, []string{"code", "method", "path"}) - requestsConcurrent = promauto.NewGauge(prometheus.GaugeOpts{ + requestsConcurrent := factory.NewGauge(prometheus.GaugeOpts{ Namespace: "coderd", Subsystem: "api", Name: "concurrent_requests", Help: "The number of concurrent API requests", }) - websocketsConcurrent = promauto.NewGauge(prometheus.GaugeOpts{ + websocketsConcurrent := factory.NewGauge(prometheus.GaugeOpts{ Namespace: "coderd", Subsystem: "api", Name: "concurrent_websockets", Help: "The total number of concurrent API websockets", }) - websocketsDist = promauto.NewHistogramVec(prometheus.HistogramOpts{ + websocketsDist := factory.NewHistogramVec(prometheus.HistogramOpts{ Namespace: "coderd", Subsystem: "api", Name: "websocket_durations_ms", @@ -45,58 +50,55 @@ var ( durationToFloatMs(30 * time.Hour), }, }, []string{"path"}) - requestsDist = promauto.NewHistogramVec(prometheus.HistogramOpts{ + requestsDist := factory.NewHistogramVec(prometheus.HistogramOpts{ Namespace: "coderd", Subsystem: "api", Name: "request_latencies_ms", Help: "Latency distribution of requests in milliseconds", Buckets: []float64{1, 5, 10, 25, 50, 100, 500, 1000, 5000, 10000, 30000}, }, []string{"method", "path"}) -) -func durationToFloatMs(d time.Duration) float64 { - return float64(d.Milliseconds()) -} + return func(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + var ( + start = time.Now() + method = r.Method + rctx = chi.RouteContext(r.Context()) + ) -func Prometheus(next http.Handler) http.Handler { - return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - var ( - start = time.Now() - method = r.Method - rctx = chi.RouteContext(r.Context()) - ) - sw, ok := w.(chimw.WrapResponseWriter) - if !ok { - panic("dev error: http.ResponseWriter is not chimw.WrapResponseWriter") - } + sw, ok := w.(chimw.WrapResponseWriter) + if !ok { + panic("dev error: http.ResponseWriter is not chimw.WrapResponseWriter") + } - var ( - dist *prometheus.HistogramVec - distOpts []string - ) - // We want to count websockets separately. - if isWebsocketUpgrade(r) { - websocketsConcurrent.Inc() - defer websocketsConcurrent.Dec() + var ( + dist *prometheus.HistogramVec + distOpts []string + ) + // We want to count WebSockets separately. + if isWebsocketUpgrade(r) { + websocketsConcurrent.Inc() + defer websocketsConcurrent.Dec() - dist = websocketsDist - } else { - requestsConcurrent.Inc() - defer requestsConcurrent.Dec() + dist = websocketsDist + } else { + requestsConcurrent.Inc() + defer requestsConcurrent.Dec() - dist = requestsDist - distOpts = []string{method} - } + dist = requestsDist + distOpts = []string{method} + } - next.ServeHTTP(w, r) + next.ServeHTTP(w, r) - path := rctx.RoutePattern() - distOpts = append(distOpts, path) - statusStr := strconv.Itoa(sw.Status()) + path := rctx.RoutePattern() + distOpts = append(distOpts, path) + statusStr := strconv.Itoa(sw.Status()) - requestsProcessed.WithLabelValues(statusStr, method, path).Inc() - dist.WithLabelValues(distOpts...).Observe(float64(time.Since(start)) / 1e6) - }) + requestsProcessed.WithLabelValues(statusStr, method, path).Inc() + dist.WithLabelValues(distOpts...).Observe(float64(time.Since(start)) / 1e6) + }) + } } func isWebsocketUpgrade(r *http.Request) bool { diff --git a/coderd/httpmw/prometheus_test.go b/coderd/httpmw/prometheus_test.go new file mode 100644 index 0000000000000..1ab93cef06c71 --- /dev/null +++ b/coderd/httpmw/prometheus_test.go @@ -0,0 +1,31 @@ +package httpmw_test + +import ( + "context" + "net/http" + "net/http/httptest" + "testing" + + "github.com/go-chi/chi/v5" + chimw "github.com/go-chi/chi/v5/middleware" + "github.com/prometheus/client_golang/prometheus" + "github.com/stretchr/testify/require" + + "github.com/coder/coder/coderd/httpmw" +) + +func TestPrometheus(t *testing.T) { + t.Parallel() + t.Run("All", func(t *testing.T) { + req := httptest.NewRequest("GET", "/", nil) + req = req.WithContext(context.WithValue(req.Context(), chi.RouteCtxKey, chi.NewRouteContext())) + res := chimw.NewWrapResponseWriter(httptest.NewRecorder(), 0) + reg := prometheus.NewRegistry() + httpmw.Prometheus(reg)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + })).ServeHTTP(res, req) + metrics, err := reg.Gather() + require.NoError(t, err) + require.Greater(t, len(metrics), 0) + }) +} diff --git a/coderd/prometheusmetrics/prometheusmetrics.go b/coderd/prometheusmetrics/prometheusmetrics.go new file mode 100644 index 0000000000000..0b22e5fdf12bf --- /dev/null +++ b/coderd/prometheusmetrics/prometheusmetrics.go @@ -0,0 +1,52 @@ +package prometheusmetrics + +import ( + "context" + "time" + + "github.com/google/uuid" + "github.com/prometheus/client_golang/prometheus" + + "github.com/coder/coder/coderd/database" +) + +// ActiveUsers tracks the number of users that have authenticated within the past hour. +func ActiveUsers(ctx context.Context, registerer prometheus.Registerer, db database.Store, duration time.Duration) (context.CancelFunc, error) { + if duration == 0 { + duration = 5 * time.Minute + } + + gauge := prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: "coderd", + Subsystem: "api", + Name: "active_users_duration_hour", + Help: "The number of users that have been active within the last hour.", + }) + err := registerer.Register(gauge) + if err != nil { + return nil, err + } + + ctx, cancelFunc := context.WithCancel(ctx) + ticker := time.NewTicker(duration) + go func() { + defer ticker.Stop() + for { + select { + case <-ctx.Done(): + return + case <-ticker.C: + } + apiKeys, err := db.GetAPIKeysLastUsedAfter(ctx, database.Now().Add(-1*time.Hour)) + if err != nil { + continue + } + distinctUsers := map[uuid.UUID]struct{}{} + for _, apiKey := range apiKeys { + distinctUsers[apiKey.UserID] = struct{}{} + } + gauge.Set(float64(len(distinctUsers))) + } + }() + return cancelFunc, nil +} diff --git a/coderd/prometheusmetrics/prometheusmetrics_test.go b/coderd/prometheusmetrics/prometheusmetrics_test.go new file mode 100644 index 0000000000000..f842a38984acc --- /dev/null +++ b/coderd/prometheusmetrics/prometheusmetrics_test.go @@ -0,0 +1,93 @@ +package prometheusmetrics_test + +import ( + "context" + "testing" + "time" + + "github.com/google/uuid" + "github.com/prometheus/client_golang/prometheus" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/coder/coder/coderd/database" + "github.com/coder/coder/coderd/database/databasefake" + "github.com/coder/coder/coderd/prometheusmetrics" + "github.com/coder/coder/testutil" +) + +func TestActiveUsers(t *testing.T) { + t.Parallel() + + for _, tc := range []struct { + Name string + Database func() database.Store + Count int + }{{ + Name: "None", + Database: func() database.Store { + return databasefake.New() + }, + Count: 0, + }, { + Name: "One", + Database: func() database.Store { + db := databasefake.New() + _, _ = db.InsertAPIKey(context.Background(), database.InsertAPIKeyParams{ + UserID: uuid.New(), + LastUsed: database.Now(), + }) + return db + }, + Count: 1, + }, { + Name: "OneWithExpired", + Database: func() database.Store { + db := databasefake.New() + _, _ = db.InsertAPIKey(context.Background(), database.InsertAPIKeyParams{ + UserID: uuid.New(), + LastUsed: database.Now(), + }) + // Because this API key hasn't been used in the past hour, this shouldn't + // add to the user count. + _, _ = db.InsertAPIKey(context.Background(), database.InsertAPIKeyParams{ + UserID: uuid.New(), + LastUsed: database.Now().Add(-2 * time.Hour), + }) + return db + }, + Count: 1, + }, { + Name: "Multiple", + Database: func() database.Store { + db := databasefake.New() + _, _ = db.InsertAPIKey(context.Background(), database.InsertAPIKeyParams{ + UserID: uuid.New(), + LastUsed: database.Now(), + }) + _, _ = db.InsertAPIKey(context.Background(), database.InsertAPIKeyParams{ + UserID: uuid.New(), + LastUsed: database.Now(), + }) + return db + }, + Count: 2, + }} { + tc := tc + t.Run(tc.Name, func(t *testing.T) { + t.Parallel() + registry := prometheus.NewRegistry() + cancel, err := prometheusmetrics.ActiveUsers(context.Background(), registry, tc.Database(), time.Millisecond) + require.NoError(t, err) + t.Cleanup(cancel) + + var result int + require.Eventually(t, func() bool { + metrics, err := registry.Gather() + assert.NoError(t, err) + result = int(*metrics[0].Metric[0].Gauge.Value) + return result == tc.Count + }, testutil.WaitShort, testutil.IntervalFast) + }) + } +}