From 14eb20f128703761d7de50bff0bd26247a29743f Mon Sep 17 00:00:00 2001 From: Colin Adler Date: Wed, 30 Oct 2024 01:58:50 +0000 Subject: [PATCH 1/5] feat: add prometheus metric for tracking user statuses --- cli/server.go | 8 +- coderd/prometheusmetrics/prometheusmetrics.go | 52 ++++++++++++- .../prometheusmetrics_test.go | 77 ++++++++++++++++++- 3 files changed, 134 insertions(+), 3 deletions(-) diff --git a/cli/server.go b/cli/server.go index b29b39b05fb4a..4a508f77e04cd 100644 --- a/cli/server.go +++ b/cli/server.go @@ -212,7 +212,13 @@ func enablePrometheus( options.PrometheusRegistry.MustRegister(collectors.NewGoCollector()) options.PrometheusRegistry.MustRegister(collectors.NewProcessCollector(collectors.ProcessCollectorOpts{})) - closeUsersFunc, err := prometheusmetrics.ActiveUsers(ctx, options.PrometheusRegistry, options.Database, 0) + closeActiveUsersFunc, err := prometheusmetrics.ActiveUsers(ctx, options.Logger.Named("active_user_metrics"), options.PrometheusRegistry, options.Database, 0) + if err != nil { + return nil, xerrors.Errorf("register active users prometheus metric: %w", err) + } + afterCtx(ctx, closeActiveUsersFunc) + + closeUsersFunc, err := prometheusmetrics.Users(ctx, options.Logger.Named("user_metrics"), options.PrometheusRegistry, options.Database, 0) if err != nil { return nil, xerrors.Errorf("register active users prometheus metric: %w", err) } diff --git a/coderd/prometheusmetrics/prometheusmetrics.go b/coderd/prometheusmetrics/prometheusmetrics.go index ebd50ff0f42ce..1c022a46d2474 100644 --- a/coderd/prometheusmetrics/prometheusmetrics.go +++ b/coderd/prometheusmetrics/prometheusmetrics.go @@ -27,7 +27,7 @@ import ( const defaultRefreshRate = time.Minute // ActiveUsers tracks the number of users that have authenticated within the past hour. -func ActiveUsers(ctx context.Context, registerer prometheus.Registerer, db database.Store, duration time.Duration) (func(), error) { +func ActiveUsers(ctx context.Context, logger slog.Logger, registerer prometheus.Registerer, db database.Store, duration time.Duration) (func(), error) { if duration == 0 { duration = defaultRefreshRate } @@ -58,6 +58,7 @@ func ActiveUsers(ctx context.Context, registerer prometheus.Registerer, db datab apiKeys, err := db.GetAPIKeysLastUsedAfter(ctx, dbtime.Now().Add(-1*time.Hour)) if err != nil { + logger.Error(ctx, "get api keys", slog.Error(err)) continue } distinctUsers := map[uuid.UUID]struct{}{} @@ -73,6 +74,55 @@ func ActiveUsers(ctx context.Context, registerer prometheus.Registerer, db datab }, nil } +// Users tracks the total user count, broken out by status.. +func Users(ctx context.Context, logger slog.Logger, registerer prometheus.Registerer, db database.Store, duration time.Duration) (func(), error) { + if duration == 0 { + // It's not super important this tracks real-time. + duration = defaultRefreshRate * 5 + } + + gauge := prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: "coderd", + Subsystem: "api", + Name: "total_user_count", + Help: "The total number of users, broken out by status.", + }, []string{"status"}) + err := registerer.Register(gauge) + if err != nil { + return nil, err + } + + ctx, cancelFunc := context.WithCancel(ctx) + done := make(chan struct{}) + ticker := time.NewTicker(duration) + go func() { + defer close(done) + defer ticker.Stop() + for { + select { + case <-ctx.Done(): + return + case <-ticker.C: + } + + gauge.Reset() + users, err := db.GetUsers(dbauthz.AsSystemRestricted(ctx), database.GetUsersParams{}) + if err != nil { + logger.Error(ctx, "get users", slog.Error(err)) + continue + } + + for _, user := range users { + gauge.WithLabelValues(string(user.Status)).Inc() + } + } + }() + return func() { + cancelFunc() + <-done + }, nil +} + // Workspaces tracks the total number of workspaces with labels on status. func Workspaces(ctx context.Context, logger slog.Logger, registerer prometheus.Registerer, db database.Store, duration time.Duration) (func(), error) { if duration == 0 { diff --git a/coderd/prometheusmetrics/prometheusmetrics_test.go b/coderd/prometheusmetrics/prometheusmetrics_test.go index 1c904d9f342e2..383b723d0cd6e 100644 --- a/coderd/prometheusmetrics/prometheusmetrics_test.go +++ b/coderd/prometheusmetrics/prometheusmetrics_test.go @@ -98,7 +98,7 @@ func TestActiveUsers(t *testing.T) { t.Run(tc.Name, func(t *testing.T) { t.Parallel() registry := prometheus.NewRegistry() - closeFunc, err := prometheusmetrics.ActiveUsers(context.Background(), registry, tc.Database(t), time.Millisecond) + closeFunc, err := prometheusmetrics.ActiveUsers(context.Background(), slogtest.Make(t, nil), registry, tc.Database(t), time.Millisecond) require.NoError(t, err) t.Cleanup(closeFunc) @@ -112,6 +112,81 @@ func TestActiveUsers(t *testing.T) { } } +func TestUsers(t *testing.T) { + t.Parallel() + + for _, tc := range []struct { + Name string + Database func(t *testing.T) database.Store + Count map[string]int + }{{ + Name: "None", + Database: func(t *testing.T) database.Store { + return dbmem.New() + }, + Count: map[string]int{}, + }, { + Name: "One", + Database: func(t *testing.T) database.Store { + db := dbmem.New() + dbgen.User(t, db, database.User{Status: database.UserStatusActive}) + return db + }, + Count: map[string]int{"active": 1}, + }, { + Name: "MultipleStatuses", + Database: func(t *testing.T) database.Store { + db := dbmem.New() + + dbgen.User(t, db, database.User{Status: database.UserStatusActive}) + dbgen.User(t, db, database.User{Status: database.UserStatusDormant}) + + return db + }, + Count: map[string]int{"active": 1, "dormant": 1}, + }, { + Name: "MultipleActive", + Database: func(t *testing.T) database.Store { + db := dbmem.New() + dbgen.User(t, db, database.User{Status: database.UserStatusActive}) + dbgen.User(t, db, database.User{Status: database.UserStatusActive}) + dbgen.User(t, db, database.User{Status: database.UserStatusActive}) + return db + }, + Count: map[string]int{"active": 3}, + }} { + tc := tc + t.Run(tc.Name, func(t *testing.T) { + t.Parallel() + registry := prometheus.NewRegistry() + closeFunc, err := prometheusmetrics.Users(context.Background(), slogtest.Make(t, nil), registry, tc.Database(t), time.Millisecond) + require.NoError(t, err) + t.Cleanup(closeFunc) + + require.Eventually(t, func() bool { + metrics, err := registry.Gather() + if err != nil { + return false + } + + // If we get no metrics and we know none should exist, bail + // early. If we get no metrics but we expect some, retry. + if len(metrics) == 0 { + return len(tc.Count) == 0 + } + + for _, metric := range metrics[0].Metric { + if tc.Count[*metric.Label[0].Value] != int(metric.Gauge.GetValue()) { + return false + } + } + + return true + }, testutil.WaitShort, testutil.IntervalSlow) + }) + } +} + func TestWorkspaceLatestBuildTotals(t *testing.T) { t.Parallel() From 9b0d26dbaae89b61b98891a780b29b1f8c82494e Mon Sep 17 00:00:00 2001 From: Colin Adler Date: Wed, 30 Oct 2024 05:10:37 +0000 Subject: [PATCH 2/5] fixup! feat: add prometheus metric for tracking user statuses --- coderd/prometheusmetrics/prometheusmetrics.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/coderd/prometheusmetrics/prometheusmetrics.go b/coderd/prometheusmetrics/prometheusmetrics.go index 1c022a46d2474..f5e4626d4e5e0 100644 --- a/coderd/prometheusmetrics/prometheusmetrics.go +++ b/coderd/prometheusmetrics/prometheusmetrics.go @@ -106,9 +106,11 @@ func Users(ctx context.Context, logger slog.Logger, registerer prometheus.Regist } gauge.Reset() + //nolint:gocritic // This is a system service that needs full access + //to the users table. users, err := db.GetUsers(dbauthz.AsSystemRestricted(ctx), database.GetUsersParams{}) if err != nil { - logger.Error(ctx, "get users", slog.Error(err)) + logger.Error(ctx, "get all users for prometheus metrics", slog.Error(err)) continue } From bf9e53471848e416d54608338936ee1320329114 Mon Sep 17 00:00:00 2001 From: Colin Adler Date: Wed, 30 Oct 2024 18:19:50 +0000 Subject: [PATCH 3/5] fixup! feat: add prometheus metric for tracking user statuses --- cli/server.go | 2 +- coderd/prometheusmetrics/prometheusmetrics.go | 14 ++++--- .../prometheusmetrics_test.go | 38 ++++++++++++++----- 3 files changed, 38 insertions(+), 16 deletions(-) diff --git a/cli/server.go b/cli/server.go index 4a508f77e04cd..645095f8e39cf 100644 --- a/cli/server.go +++ b/cli/server.go @@ -218,7 +218,7 @@ func enablePrometheus( } afterCtx(ctx, closeActiveUsersFunc) - closeUsersFunc, err := prometheusmetrics.Users(ctx, options.Logger.Named("user_metrics"), options.PrometheusRegistry, options.Database, 0) + closeUsersFunc, err := prometheusmetrics.Users(ctx, options.Logger.Named("user_metrics"), options.Clock, options.PrometheusRegistry, options.Database, 0) if err != nil { return nil, xerrors.Errorf("register active users prometheus metric: %w", err) } diff --git a/coderd/prometheusmetrics/prometheusmetrics.go b/coderd/prometheusmetrics/prometheusmetrics.go index f5e4626d4e5e0..ccd88a9e3fc1d 100644 --- a/coderd/prometheusmetrics/prometheusmetrics.go +++ b/coderd/prometheusmetrics/prometheusmetrics.go @@ -12,6 +12,7 @@ import ( "github.com/google/uuid" "github.com/prometheus/client_golang/prometheus" + "golang.org/x/xerrors" "tailscale.com/tailcfg" "cdr.dev/slog" @@ -22,6 +23,7 @@ import ( "github.com/coder/coder/v2/coderd/database/dbtime" "github.com/coder/coder/v2/codersdk" "github.com/coder/coder/v2/tailnet" + "github.com/coder/quartz" ) const defaultRefreshRate = time.Minute @@ -58,7 +60,7 @@ func ActiveUsers(ctx context.Context, logger slog.Logger, registerer prometheus. apiKeys, err := db.GetAPIKeysLastUsedAfter(ctx, dbtime.Now().Add(-1*time.Hour)) if err != nil { - logger.Error(ctx, "get api keys", slog.Error(err)) + logger.Error(ctx, "get api keys for active users prometheus metric", slog.Error(err)) continue } distinctUsers := map[uuid.UUID]struct{}{} @@ -74,8 +76,8 @@ func ActiveUsers(ctx context.Context, logger slog.Logger, registerer prometheus. }, nil } -// Users tracks the total user count, broken out by status.. -func Users(ctx context.Context, logger slog.Logger, registerer prometheus.Registerer, db database.Store, duration time.Duration) (func(), error) { +// Users tracks the total number of registered users, partitioned by status. +func Users(ctx context.Context, logger slog.Logger, clk quartz.Clock, registerer prometheus.Registerer, db database.Store, duration time.Duration) (func(), error) { if duration == 0 { // It's not super important this tracks real-time. duration = defaultRefreshRate * 5 @@ -85,16 +87,16 @@ func Users(ctx context.Context, logger slog.Logger, registerer prometheus.Regist Namespace: "coderd", Subsystem: "api", Name: "total_user_count", - Help: "The total number of users, broken out by status.", + Help: "The total number of registered users, partitioned by status.", }, []string{"status"}) err := registerer.Register(gauge) if err != nil { - return nil, err + return nil, xerrors.Errorf("register total_user_count gauge: %w", err) } ctx, cancelFunc := context.WithCancel(ctx) done := make(chan struct{}) - ticker := time.NewTicker(duration) + ticker := clk.NewTicker(duration) go func() { defer close(done) defer ticker.Stop() diff --git a/coderd/prometheusmetrics/prometheusmetrics_test.go b/coderd/prometheusmetrics/prometheusmetrics_test.go index 383b723d0cd6e..84aeda148662e 100644 --- a/coderd/prometheusmetrics/prometheusmetrics_test.go +++ b/coderd/prometheusmetrics/prometheusmetrics_test.go @@ -38,6 +38,7 @@ import ( "github.com/coder/coder/v2/tailnet" "github.com/coder/coder/v2/tailnet/tailnettest" "github.com/coder/coder/v2/testutil" + "github.com/coder/quartz" ) func TestActiveUsers(t *testing.T) { @@ -118,13 +119,13 @@ func TestUsers(t *testing.T) { for _, tc := range []struct { Name string Database func(t *testing.T) database.Store - Count map[string]int + Count map[database.UserStatus]int }{{ Name: "None", Database: func(t *testing.T) database.Store { return dbmem.New() }, - Count: map[string]int{}, + Count: map[database.UserStatus]int{}, }, { Name: "One", Database: func(t *testing.T) database.Store { @@ -132,7 +133,7 @@ func TestUsers(t *testing.T) { dbgen.User(t, db, database.User{Status: database.UserStatusActive}) return db }, - Count: map[string]int{"active": 1}, + Count: map[database.UserStatus]int{database.UserStatusActive: 1}, }, { Name: "MultipleStatuses", Database: func(t *testing.T) database.Store { @@ -143,7 +144,7 @@ func TestUsers(t *testing.T) { return db }, - Count: map[string]int{"active": 1, "dormant": 1}, + Count: map[database.UserStatus]int{database.UserStatusActive: 1, database.UserStatusDormant: 1}, }, { Name: "MultipleActive", Database: func(t *testing.T) database.Store { @@ -153,17 +154,25 @@ func TestUsers(t *testing.T) { dbgen.User(t, db, database.User{Status: database.UserStatusActive}) return db }, - Count: map[string]int{"active": 3}, + Count: map[database.UserStatus]int{database.UserStatusActive: 3}, }} { tc := tc t.Run(tc.Name, func(t *testing.T) { t.Parallel() + ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitShort) + defer cancel() + registry := prometheus.NewRegistry() - closeFunc, err := prometheusmetrics.Users(context.Background(), slogtest.Make(t, nil), registry, tc.Database(t), time.Millisecond) + mClock := quartz.NewMock(t) + db := tc.Database(t) + closeFunc, err := prometheusmetrics.Users(context.Background(), slogtest.Make(t, nil), mClock, registry, db, time.Millisecond) require.NoError(t, err) t.Cleanup(closeFunc) - require.Eventually(t, func() bool { + _, w := mClock.AdvanceNext() + w.MustWait(ctx) + + checkFn := func() bool { metrics, err := registry.Gather() if err != nil { return false @@ -176,13 +185,24 @@ func TestUsers(t *testing.T) { } for _, metric := range metrics[0].Metric { - if tc.Count[*metric.Label[0].Value] != int(metric.Gauge.GetValue()) { + if tc.Count[database.UserStatus(*metric.Label[0].Value)] != int(metric.Gauge.GetValue()) { return false } } return true - }, testutil.WaitShort, testutil.IntervalSlow) + } + + require.Eventually(t, checkFn, testutil.WaitShort, testutil.IntervalFast) + + // Add another dormant user and ensure it updates + dbgen.User(t, db, database.User{Status: database.UserStatusDormant}) + tc.Count[database.UserStatusDormant]++ + + _, w = mClock.AdvanceNext() + w.MustWait(ctx) + + require.Eventually(t, checkFn, testutil.WaitShort, testutil.IntervalFast) }) } } From 3cf998096f5c848ed7bc20ebd1afbdc8eca25c51 Mon Sep 17 00:00:00 2001 From: Colin Adler Date: Wed, 30 Oct 2024 18:20:43 +0000 Subject: [PATCH 4/5] fixup! feat: add prometheus metric for tracking user statuses --- cli/server.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/server.go b/cli/server.go index 645095f8e39cf..f6d4260a05695 100644 --- a/cli/server.go +++ b/cli/server.go @@ -220,7 +220,7 @@ func enablePrometheus( closeUsersFunc, err := prometheusmetrics.Users(ctx, options.Logger.Named("user_metrics"), options.Clock, options.PrometheusRegistry, options.Database, 0) if err != nil { - return nil, xerrors.Errorf("register active users prometheus metric: %w", err) + return nil, xerrors.Errorf("register users prometheus metric: %w", err) } afterCtx(ctx, closeUsersFunc) From 1b4f50268ed879c1cb67e87466885fe2e6e7be1f Mon Sep 17 00:00:00 2001 From: Colin Adler Date: Wed, 30 Oct 2024 18:32:11 +0000 Subject: [PATCH 5/5] fixup! feat: add prometheus metric for tracking user statuses --- cli/server.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/server.go b/cli/server.go index f6d4260a05695..f19c0df86fd1b 100644 --- a/cli/server.go +++ b/cli/server.go @@ -218,7 +218,7 @@ func enablePrometheus( } afterCtx(ctx, closeActiveUsersFunc) - closeUsersFunc, err := prometheusmetrics.Users(ctx, options.Logger.Named("user_metrics"), options.Clock, options.PrometheusRegistry, options.Database, 0) + closeUsersFunc, err := prometheusmetrics.Users(ctx, options.Logger.Named("user_metrics"), quartz.NewReal(), options.PrometheusRegistry, options.Database, 0) if err != nil { return nil, xerrors.Errorf("register users prometheus metric: %w", err) }