From 8b695dc1d61a48b63719525cbfe5899fb8d642ff Mon Sep 17 00:00:00 2001 From: Asher Date: Fri, 13 May 2022 13:20:32 -0500 Subject: [PATCH 1/8] Add GetWorkspaces query This lets us retrieve all workspaces (for telemetry purposes). --- coderd/database/databasefake/databasefake.go | 16 +++++++++ coderd/database/querier.go | 1 + coderd/database/queries.sql.go | 38 ++++++++++++++++++++ coderd/database/queries/workspaces.sql | 3 ++ 4 files changed, 58 insertions(+) diff --git a/coderd/database/databasefake/databasefake.go b/coderd/database/databasefake/databasefake.go index e876a54fdad55..5a31c194e8b9b 100644 --- a/coderd/database/databasefake/databasefake.go +++ b/coderd/database/databasefake/databasefake.go @@ -1061,6 +1061,22 @@ func (q *fakeQuerier) GetWorkspaceResourcesByJobID(_ context.Context, jobID uuid return resources, nil } +func (q *fakeQuerier) GetWorkspaces(_ context.Context, deleted bool) ([]database.Workspace, error) { + q.mutex.RLock() + defer q.mutex.RUnlock() + + workspaces := make([]database.Workspace, 0) + for _, workspace := range q.workspaces { + if workspace.Deleted == deleted { + workspaces = append(workspaces, workspace) + } + } + if len(workspaces) == 0 { + return nil, sql.ErrNoRows + } + return workspaces, nil +} + func (q *fakeQuerier) GetProvisionerJobsByIDs(_ context.Context, ids []uuid.UUID) ([]database.ProvisionerJob, error) { q.mutex.RLock() defer q.mutex.RUnlock() diff --git a/coderd/database/querier.go b/coderd/database/querier.go index 387a2c9a06698..f7ca36b3c70cd 100644 --- a/coderd/database/querier.go +++ b/coderd/database/querier.go @@ -70,6 +70,7 @@ type querier interface { GetWorkspaceOwnerCountsByTemplateIDs(ctx context.Context, ids []uuid.UUID) ([]GetWorkspaceOwnerCountsByTemplateIDsRow, error) GetWorkspaceResourceByID(ctx context.Context, id uuid.UUID) (WorkspaceResource, error) GetWorkspaceResourcesByJobID(ctx context.Context, jobID uuid.UUID) ([]WorkspaceResource, error) + GetWorkspaces(ctx context.Context, deleted bool) ([]Workspace, error) GetWorkspacesAutostartAutostop(ctx context.Context) ([]Workspace, error) GetWorkspacesByOrganizationIDs(ctx context.Context, arg GetWorkspacesByOrganizationIDsParams) ([]Workspace, error) GetWorkspacesByTemplateID(ctx context.Context, arg GetWorkspacesByTemplateIDParams) ([]Workspace, error) diff --git a/coderd/database/queries.sql.go b/coderd/database/queries.sql.go index 5985afbf33d65..079267f7d7eab 100644 --- a/coderd/database/queries.sql.go +++ b/coderd/database/queries.sql.go @@ -3295,6 +3295,44 @@ func (q *sqlQuerier) GetWorkspaceOwnerCountsByTemplateIDs(ctx context.Context, i return items, nil } +const getWorkspaces = `-- name: GetWorkspaces :many +SELECT id, created_at, updated_at, owner_id, organization_id, template_id, deleted, name, autostart_schedule, autostop_schedule FROM workspaces WHERE deleted = $1 +` + +func (q *sqlQuerier) GetWorkspaces(ctx context.Context, deleted bool) ([]Workspace, error) { + rows, err := q.db.QueryContext(ctx, getWorkspaces, deleted) + if err != nil { + return nil, err + } + defer rows.Close() + var items []Workspace + for rows.Next() { + var i Workspace + if err := rows.Scan( + &i.ID, + &i.CreatedAt, + &i.UpdatedAt, + &i.OwnerID, + &i.OrganizationID, + &i.TemplateID, + &i.Deleted, + &i.Name, + &i.AutostartSchedule, + &i.AutostopSchedule, + ); err != nil { + return nil, err + } + items = append(items, i) + } + if err := rows.Close(); err != nil { + return nil, err + } + if err := rows.Err(); err != nil { + return nil, err + } + return items, nil +} + const getWorkspacesAutostartAutostop = `-- name: GetWorkspacesAutostartAutostop :many SELECT id, created_at, updated_at, owner_id, organization_id, template_id, deleted, name, autostart_schedule, autostop_schedule diff --git a/coderd/database/queries/workspaces.sql b/coderd/database/queries/workspaces.sql index eb87ad9a51d41..9cfc68e54dcf5 100644 --- a/coderd/database/queries/workspaces.sql +++ b/coderd/database/queries/workspaces.sql @@ -1,3 +1,6 @@ +-- name: GetWorkspaces :many +SELECT * FROM workspaces WHERE deleted = $1; + -- name: GetWorkspaceByID :one SELECT * From da4338f4decc3d8d783cd2a2af4162e10d215330 Mon Sep 17 00:00:00 2001 From: Asher Date: Fri, 13 May 2022 13:44:23 -0500 Subject: [PATCH 2/8] Add GetWorkspaceResources query This lets us get all the latest resources (for telemetry purposes). --- coderd/database/databasefake/databasefake.go | 20 +++++++++ coderd/database/querier.go | 1 + coderd/database/queries.sql.go | 41 +++++++++++++++++++ .../database/queries/workspaceresources.sql | 10 +++++ 4 files changed, 72 insertions(+) diff --git a/coderd/database/databasefake/databasefake.go b/coderd/database/databasefake/databasefake.go index 5a31c194e8b9b..5d5917af96e14 100644 --- a/coderd/database/databasefake/databasefake.go +++ b/coderd/database/databasefake/databasefake.go @@ -1044,6 +1044,26 @@ func (q *fakeQuerier) GetWorkspaceResourceByID(_ context.Context, id uuid.UUID) return database.WorkspaceResource{}, sql.ErrNoRows } +func (q *fakeQuerier) GetWorkspaceResources(ctx context.Context) ([]database.WorkspaceResource, error) { + q.mutex.RLock() + defer q.mutex.RUnlock() + + resources := make([]database.WorkspaceResource, 0) + for _, workspaceBuild := range q.workspaceBuilds { + if !workspaceBuild.AfterID.Valid { + rs, err := q.GetWorkspaceResourcesByJobID(ctx, workspaceBuild.JobID) + if err != nil { + return nil, err + } + resources = append(resources, rs...) + } + } + if len(resources) == 0 { + return nil, sql.ErrNoRows + } + return resources, nil +} + func (q *fakeQuerier) GetWorkspaceResourcesByJobID(_ context.Context, jobID uuid.UUID) ([]database.WorkspaceResource, error) { q.mutex.RLock() defer q.mutex.RUnlock() diff --git a/coderd/database/querier.go b/coderd/database/querier.go index f7ca36b3c70cd..e8cedea521004 100644 --- a/coderd/database/querier.go +++ b/coderd/database/querier.go @@ -69,6 +69,7 @@ type querier interface { GetWorkspaceByOwnerIDAndName(ctx context.Context, arg GetWorkspaceByOwnerIDAndNameParams) (Workspace, error) GetWorkspaceOwnerCountsByTemplateIDs(ctx context.Context, ids []uuid.UUID) ([]GetWorkspaceOwnerCountsByTemplateIDsRow, error) GetWorkspaceResourceByID(ctx context.Context, id uuid.UUID) (WorkspaceResource, error) + GetWorkspaceResources(ctx context.Context) ([]WorkspaceResource, error) GetWorkspaceResourcesByJobID(ctx context.Context, jobID uuid.UUID) ([]WorkspaceResource, error) GetWorkspaces(ctx context.Context, deleted bool) ([]Workspace, error) GetWorkspacesAutostartAutostop(ctx context.Context) ([]Workspace, error) diff --git a/coderd/database/queries.sql.go b/coderd/database/queries.sql.go index 079267f7d7eab..023774a6c9aee 100644 --- a/coderd/database/queries.sql.go +++ b/coderd/database/queries.sql.go @@ -3115,6 +3115,47 @@ func (q *sqlQuerier) GetWorkspaceResourceByID(ctx context.Context, id uuid.UUID) return i, err } +const getWorkspaceResources = `-- name: GetWorkspaceResources :many +SELECT + workspace_resources.id, workspace_resources.created_at, workspace_resources.job_id, workspace_resources.transition, workspace_resources.type, workspace_resources.name +FROM + workspace_resources +INNER JOIN workspace_builds + ON workspace_resources.job_id = workspace_builds.job_id +WHERE + workspace_builds.after_id IS NULL +` + +func (q *sqlQuerier) GetWorkspaceResources(ctx context.Context) ([]WorkspaceResource, error) { + rows, err := q.db.QueryContext(ctx, getWorkspaceResources) + if err != nil { + return nil, err + } + defer rows.Close() + var items []WorkspaceResource + for rows.Next() { + var i WorkspaceResource + if err := rows.Scan( + &i.ID, + &i.CreatedAt, + &i.JobID, + &i.Transition, + &i.Type, + &i.Name, + ); err != nil { + return nil, err + } + items = append(items, i) + } + if err := rows.Close(); err != nil { + return nil, err + } + if err := rows.Err(); err != nil { + return nil, err + } + return items, nil +} + const getWorkspaceResourcesByJobID = `-- name: GetWorkspaceResourcesByJobID :many SELECT id, created_at, job_id, transition, type, name diff --git a/coderd/database/queries/workspaceresources.sql b/coderd/database/queries/workspaceresources.sql index c120cf41a8d57..28614e9be6ea6 100644 --- a/coderd/database/queries/workspaceresources.sql +++ b/coderd/database/queries/workspaceresources.sql @@ -19,3 +19,13 @@ INSERT INTO workspace_resources (id, created_at, job_id, transition, type, name) VALUES ($1, $2, $3, $4, $5, $6) RETURNING *; + +-- name: GetWorkspaceResources :many +SELECT + workspace_resources.* +FROM + workspace_resources +INNER JOIN workspace_builds + ON workspace_resources.job_id = workspace_builds.job_id +WHERE + workspace_builds.after_id IS NULL; From e614ce56bccd4672274b0aaddeca1990546b9538 Mon Sep 17 00:00:00 2001 From: Asher Date: Fri, 13 May 2022 14:16:59 -0500 Subject: [PATCH 3/8] Add monitoring package --- cli/server.go | 26 ++- coderd/coderd.go | 4 +- coderd/coderdtest/coderdtest.go | 15 +- coderd/database/databasefake/databasefake.go | 1 + coderd/httpmw/prometheus.go | 89 +++---- coderd/monitoring/monitoring.go | 231 +++++++++++++++++++ coderd/monitoring/monitoring_test.go | 170 ++++++++++++++ 7 files changed, 487 insertions(+), 49 deletions(-) create mode 100644 coderd/monitoring/monitoring.go create mode 100644 coderd/monitoring/monitoring_test.go diff --git a/cli/server.go b/cli/server.go index d00d24a8cb54d..f3910c9d77bf1 100644 --- a/cli/server.go +++ b/cli/server.go @@ -44,6 +44,7 @@ import ( "github.com/coder/coder/coderd/database/databasefake" "github.com/coder/coder/coderd/devtunnel" "github.com/coder/coder/coderd/gitsshkey" + "github.com/coder/coder/coderd/monitoring" "github.com/coder/coder/coderd/turnconn" "github.com/coder/coder/codersdk" "github.com/coder/coder/cryptorand" @@ -73,6 +74,7 @@ func server() *cobra.Command { oauth2GithubClientSecret string oauth2GithubAllowedOrganizations []string oauth2GithubAllowSignups bool + telemetryLevelRaw string tlsCertFile string tlsClientCAFile string tlsClientAuth string @@ -192,6 +194,11 @@ func server() *cobra.Command { return xerrors.Errorf("parse ssh keygen algorithm %s: %w", sshKeygenAlgorithmRaw, err) } + telemetryLevel, err := monitoring.ParseTelemetryLevel(telemetryLevelRaw) + if err != nil { + return xerrors.Errorf("parse telemetry level %s: %w", telemetryLevelRaw, err) + } + turnServer, err := turnconn.New(&turn.RelayAddressGeneratorStatic{ RelayAddress: net.ParseIP(turnRelayAddress), Address: turnRelayAddress, @@ -249,6 +256,13 @@ func server() *cobra.Command { } } + options.Monitor = monitoring.New(cmd.Context(), &monitoring.Options{ + Database: options.Database, + Logger: options.Logger, + RefreshInterval: time.Hour, + TelemetryLevel: telemetryLevel, + }) + handler, closeCoderd := coderd.New(options) client := codersdk.New(localURL) if tlsEnable { @@ -461,6 +475,8 @@ func server() *cobra.Command { "Specifies organizations the user must be a member of to authenticate with GitHub.") cliflag.BoolVarP(root.Flags(), &oauth2GithubAllowSignups, "oauth2-github-allow-signups", "", "CODER_OAUTH2_GITHUB_ALLOW_SIGNUPS", false, "Specifies whether new users can sign up with GitHub.") + cliflag.StringVarP(root.Flags(), &telemetryLevelRaw, "telemetry", "", "CODER_TELEMETRY", "all", "The level of telemetry to send. "+ + `Accepted values are "all", "core", or "none"`) cliflag.BoolVarP(root.Flags(), &tlsEnable, "tls-enable", "", "CODER_TLS_ENABLE", false, "Specifies if TLS will be enabled") cliflag.StringVarP(root.Flags(), &tlsCertFile, "tls-cert-file", "", "CODER_TLS_CERT_FILE", "", "Specifies the path to the certificate for TLS. It requires a PEM-encoded file. "+ @@ -569,16 +585,16 @@ func newProvisionerDaemon(ctx context.Context, client *codersdk.Client, logger s func printLogo(cmd *cobra.Command, spooky bool) { if spooky { _, _ = fmt.Fprintf(cmd.OutOrStdout(), ` - ▄████▄ ▒█████ ▓█████▄ ▓█████ ██▀███ + ▄████▄ ▒█████ ▓█████▄ ▓█████ ██▀███ ▒██▀ ▀█ ▒██▒ ██▒▒██▀ ██▌▓█ ▀ ▓██ ▒ ██▒ ▒▓█ ▄ ▒██░ ██▒░██ █▌▒███ ▓██ ░▄█ ▒ - ▒▓▓▄ ▄██▒▒██ ██░░▓█▄ ▌▒▓█ ▄ ▒██▀▀█▄ + ▒▓▓▄ ▄██▒▒██ ██░░▓█▄ ▌▒▓█ ▄ ▒██▀▀█▄ ▒ ▓███▀ ░░ ████▓▒░░▒████▓ ░▒████▒░██▓ ▒██▒ ░ ░▒ ▒ ░░ ▒░▒░▒░ ▒▒▓ ▒ ░░ ▒░ ░░ ▒▓ ░▒▓░ ░ ▒ ░ ▒ ▒░ ░ ▒ ▒ ░ ░ ░ ░▒ ░ ▒░ - ░ ░ ░ ░ ▒ ░ ░ ░ ░ ░░ ░ - ░ ░ ░ ░ ░ ░ ░ ░ - ░ ░ + ░ ░ ░ ░ ▒ ░ ░ ░ ░ ░░ ░ + ░ ░ ░ ░ ░ ░ ░ ░ + ░ ░ `) return diff --git a/coderd/coderd.go b/coderd/coderd.go index 1f1a4ff18dfac..63239b61d1caf 100644 --- a/coderd/coderd.go +++ b/coderd/coderd.go @@ -24,6 +24,7 @@ import ( "github.com/coder/coder/coderd/gitsshkey" "github.com/coder/coder/coderd/httpapi" "github.com/coder/coder/coderd/httpmw" + "github.com/coder/coder/coderd/monitoring" "github.com/coder/coder/coderd/rbac" "github.com/coder/coder/coderd/turnconn" "github.com/coder/coder/codersdk" @@ -47,6 +48,7 @@ type Options struct { GoogleTokenValidator *idtoken.Validator GithubOAuth2Config *GithubOAuth2Config ICEServers []webrtc.ICEServer + Monitor *monitoring.Monitor SecureAuthCookie bool SSHKeygenAlgorithm gitsshkey.Algorithm TURNServer *turnconn.Server @@ -91,7 +93,7 @@ func New(options *Options) (http.Handler, func()) { next.ServeHTTP(middleware.NewWrapResponseWriter(w, r.ProtoMajor), r) }) }, - httpmw.Prometheus, + httpmw.Prometheus(options.Monitor), chitrace.Middleware(), ) diff --git a/coderd/coderdtest/coderdtest.go b/coderd/coderdtest/coderdtest.go index 117e96e6ac04f..7dc78d99019b3 100644 --- a/coderd/coderdtest/coderdtest.go +++ b/coderd/coderdtest/coderdtest.go @@ -45,6 +45,7 @@ import ( "github.com/coder/coder/coderd/database/databasefake" "github.com/coder/coder/coderd/database/postgres" "github.com/coder/coder/coderd/gitsshkey" + "github.com/coder/coder/coderd/monitoring" "github.com/coder/coder/coderd/turnconn" "github.com/coder/coder/codersdk" "github.com/coder/coder/cryptorand" @@ -145,10 +146,16 @@ func NewMemoryCoderd(t *testing.T, options *Options) (*httptest.Server, *codersd AzureCertificates: options.AzureCertificates, GithubOAuth2Config: options.GithubOAuth2Config, GoogleTokenValidator: options.GoogleTokenValidator, - SSHKeygenAlgorithm: options.SSHKeygenAlgorithm, - TURNServer: turnServer, - APIRateLimit: options.APIRateLimit, - Authorizer: options.Authorizer, + Monitor: monitoring.New(ctx, &monitoring.Options{ + Database: db, + Logger: slogtest.Make(t, nil), + RefreshInterval: time.Minute, + TelemetryLevel: monitoring.TelemetryLevelNone, + }), + SSHKeygenAlgorithm: options.SSHKeygenAlgorithm, + TURNServer: turnServer, + APIRateLimit: options.APIRateLimit, + Authorizer: options.Authorizer, }) t.Cleanup(func() { cancelFunc() diff --git a/coderd/database/databasefake/databasefake.go b/coderd/database/databasefake/databasefake.go index 5d5917af96e14..f5e1f89cd621d 100644 --- a/coderd/database/databasefake/databasefake.go +++ b/coderd/database/databasefake/databasefake.go @@ -1081,6 +1081,7 @@ func (q *fakeQuerier) GetWorkspaceResourcesByJobID(_ context.Context, jobID uuid return resources, nil } +// revive:disable-next-line:flag-parameter func (q *fakeQuerier) GetWorkspaces(_ context.Context, deleted bool) ([]database.Workspace, error) { q.mutex.RLock() defer q.mutex.RUnlock() diff --git a/coderd/httpmw/prometheus.go b/coderd/httpmw/prometheus.go index e03966ff9788f..f2edeed4a96f8 100644 --- a/coderd/httpmw/prometheus.go +++ b/coderd/httpmw/prometheus.go @@ -9,29 +9,30 @@ import ( chimw "github.com/go-chi/chi/v5/middleware" "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promauto" + + "github.com/coder/coder/coderd/monitoring" ) var ( - requestsProcessed = promauto.NewCounterVec(prometheus.CounterOpts{ + requestsProcessed = prometheus.NewCounterVec(prometheus.CounterOpts{ Namespace: "coderd", Subsystem: "api", Name: "requests_processed_total", Help: "The total number of processed API requests", }, []string{"code", "method", "path"}) - requestsConcurrent = promauto.NewGauge(prometheus.GaugeOpts{ + requestsConcurrent = prometheus.NewGauge(prometheus.GaugeOpts{ Namespace: "coderd", Subsystem: "api", Name: "concurrent_requests", Help: "The number of concurrent API requests", }) - websocketsConcurrent = promauto.NewGauge(prometheus.GaugeOpts{ + websocketsConcurrent = prometheus.NewGauge(prometheus.GaugeOpts{ Namespace: "coderd", Subsystem: "api", Name: "concurrent_websockets", Help: "The total number of concurrent API websockets", }) - websocketsDist = promauto.NewHistogramVec(prometheus.HistogramOpts{ + websocketsDist = prometheus.NewHistogramVec(prometheus.HistogramOpts{ Namespace: "coderd", Subsystem: "api", Name: "websocket_durations_ms", @@ -45,7 +46,7 @@ var ( durationToFloatMs(30 * time.Hour), }, }, []string{"path"}) - requestsDist = promauto.NewHistogramVec(prometheus.HistogramOpts{ + requestsDist = prometheus.NewHistogramVec(prometheus.HistogramOpts{ Namespace: "coderd", Subsystem: "api", Name: "request_latencies_ms", @@ -58,45 +59,55 @@ func durationToFloatMs(d time.Duration) float64 { return float64(d.Milliseconds()) } -func Prometheus(next http.Handler) http.Handler { - return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - var ( - start = time.Now() - method = r.Method - rctx = chi.RouteContext(r.Context()) - ) - sw, ok := w.(chimw.WrapResponseWriter) - if !ok { - panic("dev error: http.ResponseWriter is not chimw.WrapResponseWriter") - } +func Prometheus(monitor *monitoring.Monitor) func(http.Handler) http.Handler { + monitor.MustRegister( + monitoring.TelemetryLevelNone, + requestsProcessed, + requestsConcurrent, + websocketsConcurrent, + requestsDist, + ) - var ( - dist *prometheus.HistogramVec - distOpts []string - ) - // We want to count websockets separately. - if isWebsocketUpgrade(r) { - websocketsConcurrent.Inc() - defer websocketsConcurrent.Dec() + return func(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + var ( + start = time.Now() + method = r.Method + rctx = chi.RouteContext(r.Context()) + ) + sw, ok := w.(chimw.WrapResponseWriter) + if !ok { + panic("dev error: http.ResponseWriter is not chimw.WrapResponseWriter") + } - dist = websocketsDist - } else { - requestsConcurrent.Inc() - defer requestsConcurrent.Dec() + var ( + dist *prometheus.HistogramVec + distOpts []string + ) + // We want to count websockets separately. + if isWebsocketUpgrade(r) { + websocketsConcurrent.Inc() + defer websocketsConcurrent.Dec() - dist = requestsDist - distOpts = []string{method} - } + dist = websocketsDist + } else { + requestsConcurrent.Inc() + defer requestsConcurrent.Dec() - next.ServeHTTP(w, r) + dist = requestsDist + distOpts = []string{method} + } - path := rctx.RoutePattern() - distOpts = append(distOpts, path) - statusStr := strconv.Itoa(sw.Status()) + next.ServeHTTP(w, r) - requestsProcessed.WithLabelValues(statusStr, method, path).Inc() - dist.WithLabelValues(distOpts...).Observe(float64(time.Since(start)) / 1e6) - }) + path := rctx.RoutePattern() + distOpts = append(distOpts, path) + statusStr := strconv.Itoa(sw.Status()) + + requestsProcessed.WithLabelValues(statusStr, method, path).Inc() + dist.WithLabelValues(distOpts...).Observe(float64(time.Since(start)) / 1e6) + }) + } } func isWebsocketUpgrade(r *http.Request) bool { diff --git a/coderd/monitoring/monitoring.go b/coderd/monitoring/monitoring.go new file mode 100644 index 0000000000000..c287993c11e61 --- /dev/null +++ b/coderd/monitoring/monitoring.go @@ -0,0 +1,231 @@ +package monitoring + +import ( + "context" + "database/sql" + "strings" + "sync" + "time" + + "github.com/prometheus/client_golang/prometheus" + dto "github.com/prometheus/client_model/go" + "golang.org/x/sync/errgroup" + "golang.org/x/xerrors" + + "cdr.dev/slog" + "github.com/coder/coder/coderd/database" +) + +type TelemetryLevel string + +const ( + TelemetryLevelAll TelemetryLevel = "all" + TelemetryLevelCore TelemetryLevel = "core" + TelemetryLevelNone TelemetryLevel = "none" +) + +// ParseTelemetryLevel returns a valid TelemetryLevel or error if input is not a valid. +func ParseTelemetryLevel(t string) (TelemetryLevel, error) { + ok := []string{ + string(TelemetryLevelAll), + string(TelemetryLevelCore), + string(TelemetryLevelNone), + } + + for _, a := range ok { + if strings.EqualFold(a, t) { + return TelemetryLevel(a), nil + } + } + + return "", xerrors.Errorf(`invalid telemetry level: %s, must be one of: %s`, t, strings.Join(ok, ",")) +} + +type Options struct { + Database database.Store + Logger slog.Logger + RefreshInterval time.Duration + TelemetryLevel TelemetryLevel +} + +type Monitor struct { + // allRegistry registers metrics that will be sent when the telemetry level is + // `all`. + allRegistry *prometheus.Registry + // db is the database from which to pull stats. + db database.Store + ctx context.Context + // coreRegistry registers metrics that will be sent when the telemetry level + // is `core` or `all`. + coreRegistry *prometheus.Registry + // internalRegisry registers metrics that will never be sent. + internalRegistry *prometheus.Registry + // refreshMutex is used to prevent multiple refreshes at a time. + refreshMutex *sync.Mutex + // stats are internally registered metrics that update via Refresh. + stats Stats + // TelemetryLevel determines which metrics are sent to Coder. + TelemetryLevel TelemetryLevel +} + +type Stats struct { + Users *prometheus.GaugeVec + Workspaces *prometheus.GaugeVec + WorkspaceResources *prometheus.GaugeVec +} + +func New(ctx context.Context, options *Options) *Monitor { + monitor := Monitor{ + allRegistry: prometheus.NewRegistry(), + db: options.Database, + ctx: ctx, + coreRegistry: prometheus.NewRegistry(), + internalRegistry: prometheus.NewRegistry(), + refreshMutex: &sync.Mutex{}, + stats: Stats{ + Users: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: "coder", + Name: "users", + Help: "The users in a Coder deployment.", + }, []string{ + "user_id", + "user_name", + }), + Workspaces: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: "coder", + Name: "workspaces", + Help: "The workspaces in a Coder deployment.", + }, []string{ + "workspace_id", + "workspace_name", + }), + WorkspaceResources: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: "coder", + Name: "workspace_resources", + Help: "The workspace resources in a Coder deployment.", + }, []string{ + "workspace_resource_id", + "workspace_resource_name", + "workspace_resource_type", + }), + }, + TelemetryLevel: options.TelemetryLevel, + } + + monitor.MustRegister( + TelemetryLevelAll, + monitor.stats.Users, + monitor.stats.Workspaces, + monitor.stats.WorkspaceResources, + ) + + ticker := time.NewTicker(options.RefreshInterval) + go func() { + defer ticker.Stop() + select { + case <-ctx.Done(): + return + case <-ticker.C: + err := monitor.Refresh() + if err != nil { + options.Logger.Error(ctx, "failed to refresh stats", slog.Error(err)) + } + } + }() + + return &monitor +} + +// MustRegister registers collectors at the specified level. +func (t Monitor) MustRegister(level TelemetryLevel, cs ...prometheus.Collector) { + switch level { + case TelemetryLevelAll: + t.allRegistry.MustRegister(cs...) + case TelemetryLevelCore: + t.coreRegistry.MustRegister(cs...) + case TelemetryLevelNone: + t.internalRegistry.MustRegister(cs...) + } +} + +// Gather returns all gathered metrics. +func (t Monitor) Gather() ([]*dto.MetricFamily, error) { + allMetrics, err := t.allRegistry.Gather() + if err != nil { + return nil, err + } + + coreMetrics, err := t.coreRegistry.Gather() + if err != nil { + return nil, err + } + + internalMetrics, err := t.internalRegistry.Gather() + if err != nil { + return nil, err + } + + return append(append(allMetrics, coreMetrics...), internalMetrics...), nil +} + +// Refresh populates internal stats with the latest data. +func (t Monitor) Refresh() error { + t.refreshMutex.Lock() + defer t.refreshMutex.Unlock() + + errGroup, ctx := errgroup.WithContext(t.ctx) + + errGroup.Go(func() error { + dbUsers, err := t.db.GetUsers(ctx, database.GetUsersParams{}) + if err != nil && !xerrors.Is(err, sql.ErrNoRows) { + return err + } + + t.stats.Users.Reset() + for _, dbu := range dbUsers { + t.stats.Users.With(prometheus.Labels{ + "user_id": dbu.ID.String(), + "user_name": dbu.Username, + }).Add(1) + } + + return nil + }) + + errGroup.Go(func() error { + dbWorkspaces, err := t.db.GetWorkspaces(ctx, false) + if err != nil && !xerrors.Is(err, sql.ErrNoRows) { + return err + } + + t.stats.Workspaces.Reset() + for _, dbw := range dbWorkspaces { + t.stats.Workspaces.With(prometheus.Labels{ + "workspace_id": dbw.ID.String(), + "workspace_name": dbw.Name, + }).Add(1) + } + + return nil + }) + + errGroup.Go(func() error { + dbWorkspaceResources, err := t.db.GetWorkspaceResources(ctx) + if err != nil && !xerrors.Is(err, sql.ErrNoRows) { + return err + } + + t.stats.WorkspaceResources.Reset() + for _, dbwr := range dbWorkspaceResources { + t.stats.WorkspaceResources.With(prometheus.Labels{ + "workspace_resource_id": dbwr.ID.String(), + "workspace_resource_name": dbwr.Name, + "workspace_resource_type": dbwr.Type, + }).Add(1) + } + + return nil + }) + + return errGroup.Wait() +} diff --git a/coderd/monitoring/monitoring_test.go b/coderd/monitoring/monitoring_test.go new file mode 100644 index 0000000000000..e1efc045fcdc4 --- /dev/null +++ b/coderd/monitoring/monitoring_test.go @@ -0,0 +1,170 @@ +package monitoring_test + +import ( + "context" + "testing" + "time" + + "golang.org/x/xerrors" + + "cdr.dev/slog/sloggers/slogtest" + "github.com/coder/coder/coderd/database" + "github.com/coder/coder/coderd/database/databasefake" + "github.com/coder/coder/coderd/monitoring" + "github.com/google/uuid" + dto "github.com/prometheus/client_model/go" + "github.com/stretchr/testify/require" +) + +func TestMonitoring(t *testing.T) { + t.Parallel() + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + db := databasefake.New() + + monitor := monitoring.New(ctx, &monitoring.Options{ + Database: db, + Logger: slogtest.Make(t, nil), + RefreshInterval: time.Minute, + TelemetryLevel: monitoring.TelemetryLevelNone, + }) + + user, _ := db.InsertUser(ctx, database.InsertUserParams{ + ID: uuid.New(), + Username: "kyle", + }) + org, _ := db.InsertOrganization(ctx, database.InsertOrganizationParams{ + ID: uuid.New(), + Name: "potato", + }) + template, _ := db.InsertTemplate(ctx, database.InsertTemplateParams{ + ID: uuid.New(), + Name: "something", + OrganizationID: org.ID, + }) + workspace, _ := db.InsertWorkspace(ctx, database.InsertWorkspaceParams{ + ID: uuid.New(), + OwnerID: user.ID, + OrganizationID: org.ID, + TemplateID: template.ID, + Name: "banana1", + }) + job, _ := db.InsertProvisionerJob(ctx, database.InsertProvisionerJobParams{ + ID: uuid.New(), + OrganizationID: org.ID, + }) + version, _ := db.InsertTemplateVersion(ctx, database.InsertTemplateVersionParams{ + ID: uuid.New(), + TemplateID: uuid.NullUUID{ + UUID: template.ID, + Valid: true, + }, + CreatedAt: database.Now(), + OrganizationID: org.ID, + JobID: job.ID, + }) + db.InsertWorkspaceBuild(ctx, database.InsertWorkspaceBuildParams{ + ID: uuid.New(), + JobID: job.ID, + WorkspaceID: workspace.ID, + TemplateVersionID: version.ID, + Transition: database.WorkspaceTransitionStart, + }) + db.InsertWorkspaceResource(ctx, database.InsertWorkspaceResourceParams{ + ID: uuid.New(), + JobID: job.ID, + Type: "google_compute_instance", + Name: "banana2", + }) + db.InsertWorkspaceResource(ctx, database.InsertWorkspaceResourceParams{ + ID: uuid.New(), + JobID: job.ID, + Type: "google_compute_instance", + Name: "banana3", + }) + db.InsertWorkspace(ctx, database.InsertWorkspaceParams{ + ID: uuid.New(), + OwnerID: user.ID, + OrganizationID: org.ID, + TemplateID: template.ID, + Name: "banana4", + }) + + err := monitor.Refresh() + require.NoError(t, err) + + metrics, err := monitor.Gather() + require.NoError(t, err) + + type labels struct { + name string + value string + } + + tests := []struct { + name string + total int + labels []labels + }{ + { + name: "coder_users", + total: 1, + labels: []labels{ + {name: "user_name", value: "kyle"}, + }, + }, + { + name: "coder_workspaces", + total: 2, + labels: []labels{ + {name: "workspace_name", value: "banana1"}, + {name: "workspace_name", value: "banana4"}, + }, + }, + { + name: "coder_workspace_resources", + total: 2, + labels: []labels{ + {name: "workspace_resource_name", value: "banana2"}, + {name: "workspace_resource_name", value: "banana3"}, + }, + }, + } + require.Len(t, metrics, len(tests)) + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + metricFamily, err := findMetric(t, tt.name, metrics) + require.NoError(t, err) + + require.Len(t, metricFamily.GetMetric(), tt.total) + + for _, l := range tt.labels { + require.NoError(t, findMetricLabel(t, l.name, l.value, metricFamily)) + } + }) + } +} + +func findMetric(_ *testing.T, name string, metrics []*dto.MetricFamily) (*dto.MetricFamily, error) { + for _, m := range metrics { + if m.GetName() == name { + return m, nil + } + } + return nil, xerrors.Errorf("no metric %s in %v", name, metrics) +} + +func findMetricLabel(_ *testing.T, name string, value string, metricFamily *dto.MetricFamily) error { + for _, m := range metricFamily.GetMetric() { + for _, l := range m.GetLabel() { + if l.GetName() == name && l.GetValue() == value { + return nil + } + } + } + return xerrors.Errorf("no metric label %s:%s in %v", name, value, metricFamily) +} From 117b94921d95573b3b02d79fef364e8577ca349d Mon Sep 17 00:00:00 2001 From: Asher Date: Mon, 16 May 2022 20:24:03 +0000 Subject: [PATCH 4/8] Rename TelemetryLevel to Telemetry --- cli/server.go | 10 +++---- coderd/coderdtest/coderdtest.go | 2 +- coderd/httpmw/prometheus.go | 2 +- coderd/monitoring/monitoring.go | 40 ++++++++++++++-------------- coderd/monitoring/monitoring_test.go | 2 +- 5 files changed, 28 insertions(+), 28 deletions(-) diff --git a/cli/server.go b/cli/server.go index f3910c9d77bf1..6f34bcf5274ea 100644 --- a/cli/server.go +++ b/cli/server.go @@ -74,7 +74,7 @@ func server() *cobra.Command { oauth2GithubClientSecret string oauth2GithubAllowedOrganizations []string oauth2GithubAllowSignups bool - telemetryLevelRaw string + telemetryRaw string tlsCertFile string tlsClientCAFile string tlsClientAuth string @@ -194,9 +194,9 @@ func server() *cobra.Command { return xerrors.Errorf("parse ssh keygen algorithm %s: %w", sshKeygenAlgorithmRaw, err) } - telemetryLevel, err := monitoring.ParseTelemetryLevel(telemetryLevelRaw) + telemetry, err := monitoring.ParseTelemetry(telemetryRaw) if err != nil { - return xerrors.Errorf("parse telemetry level %s: %w", telemetryLevelRaw, err) + return xerrors.Errorf("parse telemetry %s: %w", telemetryRaw, err) } turnServer, err := turnconn.New(&turn.RelayAddressGeneratorStatic{ @@ -260,7 +260,7 @@ func server() *cobra.Command { Database: options.Database, Logger: options.Logger, RefreshInterval: time.Hour, - TelemetryLevel: telemetryLevel, + Telemetry: telemetry, }) handler, closeCoderd := coderd.New(options) @@ -475,7 +475,7 @@ func server() *cobra.Command { "Specifies organizations the user must be a member of to authenticate with GitHub.") cliflag.BoolVarP(root.Flags(), &oauth2GithubAllowSignups, "oauth2-github-allow-signups", "", "CODER_OAUTH2_GITHUB_ALLOW_SIGNUPS", false, "Specifies whether new users can sign up with GitHub.") - cliflag.StringVarP(root.Flags(), &telemetryLevelRaw, "telemetry", "", "CODER_TELEMETRY", "all", "The level of telemetry to send. "+ + cliflag.StringVarP(root.Flags(), &telemetryRaw, "telemetry", "", "CODER_TELEMETRY", "all", "The level of telemetry to send. "+ `Accepted values are "all", "core", or "none"`) cliflag.BoolVarP(root.Flags(), &tlsEnable, "tls-enable", "", "CODER_TLS_ENABLE", false, "Specifies if TLS will be enabled") cliflag.StringVarP(root.Flags(), &tlsCertFile, "tls-cert-file", "", "CODER_TLS_CERT_FILE", "", diff --git a/coderd/coderdtest/coderdtest.go b/coderd/coderdtest/coderdtest.go index 7dc78d99019b3..462ebc30a95b1 100644 --- a/coderd/coderdtest/coderdtest.go +++ b/coderd/coderdtest/coderdtest.go @@ -150,7 +150,7 @@ func NewMemoryCoderd(t *testing.T, options *Options) (*httptest.Server, *codersd Database: db, Logger: slogtest.Make(t, nil), RefreshInterval: time.Minute, - TelemetryLevel: monitoring.TelemetryLevelNone, + Telemetry: monitoring.TelemetryNone, }), SSHKeygenAlgorithm: options.SSHKeygenAlgorithm, TURNServer: turnServer, diff --git a/coderd/httpmw/prometheus.go b/coderd/httpmw/prometheus.go index f2edeed4a96f8..213bbf5c37778 100644 --- a/coderd/httpmw/prometheus.go +++ b/coderd/httpmw/prometheus.go @@ -61,7 +61,7 @@ func durationToFloatMs(d time.Duration) float64 { func Prometheus(monitor *monitoring.Monitor) func(http.Handler) http.Handler { monitor.MustRegister( - monitoring.TelemetryLevelNone, + monitoring.TelemetryNone, requestsProcessed, requestsConcurrent, websocketsConcurrent, diff --git a/coderd/monitoring/monitoring.go b/coderd/monitoring/monitoring.go index c287993c11e61..292f597796745 100644 --- a/coderd/monitoring/monitoring.go +++ b/coderd/monitoring/monitoring.go @@ -16,25 +16,25 @@ import ( "github.com/coder/coder/coderd/database" ) -type TelemetryLevel string +type Telemetry string const ( - TelemetryLevelAll TelemetryLevel = "all" - TelemetryLevelCore TelemetryLevel = "core" - TelemetryLevelNone TelemetryLevel = "none" + TelemetryAll Telemetry = "all" + TelemetryCore Telemetry = "core" + TelemetryNone Telemetry = "none" ) -// ParseTelemetryLevel returns a valid TelemetryLevel or error if input is not a valid. -func ParseTelemetryLevel(t string) (TelemetryLevel, error) { +// ParseTelemetry returns a valid Telemetry or error if input is not a valid. +func ParseTelemetry(t string) (Telemetry, error) { ok := []string{ - string(TelemetryLevelAll), - string(TelemetryLevelCore), - string(TelemetryLevelNone), + string(TelemetryAll), + string(TelemetryCore), + string(TelemetryNone), } for _, a := range ok { if strings.EqualFold(a, t) { - return TelemetryLevel(a), nil + return Telemetry(a), nil } } @@ -45,7 +45,7 @@ type Options struct { Database database.Store Logger slog.Logger RefreshInterval time.Duration - TelemetryLevel TelemetryLevel + Telemetry Telemetry } type Monitor struct { @@ -58,14 +58,14 @@ type Monitor struct { // coreRegistry registers metrics that will be sent when the telemetry level // is `core` or `all`. coreRegistry *prometheus.Registry - // internalRegisry registers metrics that will never be sent. + // internalRegistry registers metrics that will never be sent. internalRegistry *prometheus.Registry // refreshMutex is used to prevent multiple refreshes at a time. refreshMutex *sync.Mutex // stats are internally registered metrics that update via Refresh. stats Stats - // TelemetryLevel determines which metrics are sent to Coder. - TelemetryLevel TelemetryLevel + // Telemetry determines which metrics are sent to Coder. + Telemetry Telemetry } type Stats struct { @@ -109,11 +109,11 @@ func New(ctx context.Context, options *Options) *Monitor { "workspace_resource_type", }), }, - TelemetryLevel: options.TelemetryLevel, + Telemetry: options.Telemetry, } monitor.MustRegister( - TelemetryLevelAll, + TelemetryAll, monitor.stats.Users, monitor.stats.Workspaces, monitor.stats.WorkspaceResources, @@ -137,13 +137,13 @@ func New(ctx context.Context, options *Options) *Monitor { } // MustRegister registers collectors at the specified level. -func (t Monitor) MustRegister(level TelemetryLevel, cs ...prometheus.Collector) { +func (t Monitor) MustRegister(level Telemetry, cs ...prometheus.Collector) { switch level { - case TelemetryLevelAll: + case TelemetryAll: t.allRegistry.MustRegister(cs...) - case TelemetryLevelCore: + case TelemetryCore: t.coreRegistry.MustRegister(cs...) - case TelemetryLevelNone: + case TelemetryNone: t.internalRegistry.MustRegister(cs...) } } diff --git a/coderd/monitoring/monitoring_test.go b/coderd/monitoring/monitoring_test.go index e1efc045fcdc4..9e61e41c17701 100644 --- a/coderd/monitoring/monitoring_test.go +++ b/coderd/monitoring/monitoring_test.go @@ -28,7 +28,7 @@ func TestMonitoring(t *testing.T) { Database: db, Logger: slogtest.Make(t, nil), RefreshInterval: time.Minute, - TelemetryLevel: monitoring.TelemetryLevelNone, + Telemetry: monitoring.TelemetryNone, }) user, _ := db.InsertUser(ctx, database.InsertUserParams{ From f7784bbb2aa0cbd2bc7499f57223f3dae5a0728e Mon Sep 17 00:00:00 2001 From: Asher Date: Mon, 16 May 2022 20:32:50 +0000 Subject: [PATCH 5/8] Remove resource type and name from monitoring --- coderd/monitoring/monitoring.go | 2 -- coderd/monitoring/monitoring_test.go | 3 +-- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/coderd/monitoring/monitoring.go b/coderd/monitoring/monitoring.go index 292f597796745..c2ffc88c96082 100644 --- a/coderd/monitoring/monitoring.go +++ b/coderd/monitoring/monitoring.go @@ -104,8 +104,6 @@ func New(ctx context.Context, options *Options) *Monitor { Name: "workspace_resources", Help: "The workspace resources in a Coder deployment.", }, []string{ - "workspace_resource_id", - "workspace_resource_name", "workspace_resource_type", }), }, diff --git a/coderd/monitoring/monitoring_test.go b/coderd/monitoring/monitoring_test.go index 9e61e41c17701..0feeb375a71c8 100644 --- a/coderd/monitoring/monitoring_test.go +++ b/coderd/monitoring/monitoring_test.go @@ -127,8 +127,7 @@ func TestMonitoring(t *testing.T) { name: "coder_workspace_resources", total: 2, labels: []labels{ - {name: "workspace_resource_name", value: "banana2"}, - {name: "workspace_resource_name", value: "banana3"}, + {name: "workspace_resource_type", value: "google_compute_instance"}, }, }, } From 63252b1c9d6d41d8736dca5bb918c0694754de45 Mon Sep 17 00:00:00 2001 From: Asher Date: Mon, 16 May 2022 22:45:13 +0000 Subject: [PATCH 6/8] Implement stats as a Prometheus collector --- coderd/monitoring/collector.go | 122 ++++++++++++++++++++ coderd/monitoring/collector_test.go | 100 +++++++++++++++++ coderd/monitoring/monitoring.go | 158 ++------------------------ coderd/monitoring/monitoring_test.go | 160 +++------------------------ 4 files changed, 245 insertions(+), 295 deletions(-) create mode 100644 coderd/monitoring/collector.go create mode 100644 coderd/monitoring/collector_test.go diff --git a/coderd/monitoring/collector.go b/coderd/monitoring/collector.go new file mode 100644 index 0000000000000..d709f06fcad96 --- /dev/null +++ b/coderd/monitoring/collector.go @@ -0,0 +1,122 @@ +package monitoring + +import ( + "context" + "database/sql" + "sync" + + "github.com/prometheus/client_golang/prometheus" + "golang.org/x/xerrors" + + "github.com/coder/coder/coderd/database" +) + +// Collector implements prometheus.Collector and collects statistics from the +// provided database. +type Collector struct { + ctx context.Context + db database.Store + users *prometheus.Desc + workspaces *prometheus.Desc + workspaceResources *prometheus.Desc +} + +func NewCollector(ctx context.Context, db database.Store) *Collector { + return &Collector{ + ctx: ctx, + db: db, + users: prometheus.NewDesc( + "coder_users", + "The users in a Coder deployment.", + nil, + nil, + ), + workspaces: prometheus.NewDesc( + "coder_workspaces", + "The workspaces in a Coder deployment.", + nil, + nil, + ), + workspaceResources: prometheus.NewDesc( + "coder_workspace_resources", + "The workspace resources in a Coder deployment.", + []string{ + "workspace_resource_type", + }, + nil, + ), + } +} + +// Describe implements prometheus.Collector. +func (c *Collector) Describe(ch chan<- *prometheus.Desc) { + ch <- c.users + ch <- c.workspaces + ch <- c.workspaceResources +} + +// Collect implements prometheus.Collector. +func (c *Collector) Collect(ch chan<- prometheus.Metric) { + var wg sync.WaitGroup + + wg.Add(1) + go func() { + defer wg.Done() + + dbUsers, err := c.db.GetUsers(c.ctx, database.GetUsersParams{}) + if err != nil && !xerrors.Is(err, sql.ErrNoRows) { + ch <- prometheus.NewInvalidMetric(c.users, err) + return + } + + ch <- prometheus.MustNewConstMetric( + c.users, + prometheus.GaugeValue, + float64(len(dbUsers)), + ) + }() + + wg.Add(1) + go func() { + defer wg.Done() + + dbWorkspaces, err := c.db.GetWorkspaces(c.ctx, false) + if err != nil && !xerrors.Is(err, sql.ErrNoRows) { + ch <- prometheus.NewInvalidMetric(c.workspaces, err) + return + } + + ch <- prometheus.MustNewConstMetric( + c.workspaces, + prometheus.GaugeValue, + float64(len(dbWorkspaces)), + ) + }() + + wg.Add(1) + go func() { + defer wg.Done() + + dbWorkspaceResources, err := c.db.GetWorkspaceResources(c.ctx) + if err != nil && !xerrors.Is(err, sql.ErrNoRows) { + ch <- prometheus.NewInvalidMetric(c.workspaceResources, err) + return + } + + resourcesByType := map[string][]database.WorkspaceResource{} + for _, dbwr := range dbWorkspaceResources { + resourcesByType[dbwr.Type] = append(resourcesByType[dbwr.Type], dbwr) + } + + for resourceType, resources := range resourcesByType { + ch <- prometheus.MustNewConstMetric( + c.workspaceResources, + prometheus.GaugeValue, + float64(len(resources)), + resourceType, + ) + } + }() + + wg.Wait() +} diff --git a/coderd/monitoring/collector_test.go b/coderd/monitoring/collector_test.go new file mode 100644 index 0000000000000..131da453c4c58 --- /dev/null +++ b/coderd/monitoring/collector_test.go @@ -0,0 +1,100 @@ +package monitoring_test + +import ( + "context" + "strings" + "testing" + + "github.com/coder/coder/coderd/database" + "github.com/coder/coder/coderd/database/databasefake" + "github.com/coder/coder/coderd/monitoring" + "github.com/google/uuid" + "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/stretchr/testify/require" +) + +func TestCollector(t *testing.T) { + t.Parallel() + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + db := databasefake.New() + populateDB(ctx, db) + + collector := monitoring.NewCollector(ctx, db) + expected := ` + # HELP coder_users The users in a Coder deployment. + # TYPE coder_users gauge + coder_users 1 + # HELP coder_workspace_resources The workspace resources in a Coder deployment. + # TYPE coder_workspace_resources gauge + coder_workspace_resources{workspace_resource_type="google_compute_instance"} 2 + # HELP coder_workspaces The workspaces in a Coder deployment. + # TYPE coder_workspaces gauge + coder_workspaces 2 + ` + require.NoError(t, testutil.CollectAndCompare(collector, strings.NewReader(expected))) +} + +func populateDB(ctx context.Context, db database.Store) { + user, _ := db.InsertUser(ctx, database.InsertUserParams{ + ID: uuid.New(), + Username: "kyle", + }) + org, _ := db.InsertOrganization(ctx, database.InsertOrganizationParams{ + ID: uuid.New(), + Name: "potato", + }) + template, _ := db.InsertTemplate(ctx, database.InsertTemplateParams{ + ID: uuid.New(), + Name: "something", + OrganizationID: org.ID, + }) + workspace, _ := db.InsertWorkspace(ctx, database.InsertWorkspaceParams{ + ID: uuid.New(), + OwnerID: user.ID, + OrganizationID: org.ID, + TemplateID: template.ID, + Name: "banana1", + }) + job, _ := db.InsertProvisionerJob(ctx, database.InsertProvisionerJobParams{ + ID: uuid.New(), + OrganizationID: org.ID, + }) + version, _ := db.InsertTemplateVersion(ctx, database.InsertTemplateVersionParams{ + ID: uuid.New(), + TemplateID: uuid.NullUUID{ + UUID: template.ID, + Valid: true, + }, + CreatedAt: database.Now(), + OrganizationID: org.ID, + JobID: job.ID, + }) + db.InsertWorkspaceBuild(ctx, database.InsertWorkspaceBuildParams{ + ID: uuid.New(), + JobID: job.ID, + WorkspaceID: workspace.ID, + TemplateVersionID: version.ID, + Transition: database.WorkspaceTransitionStart, + }) + db.InsertWorkspaceResource(ctx, database.InsertWorkspaceResourceParams{ + ID: uuid.New(), + JobID: job.ID, + Type: "google_compute_instance", + Name: "banana2", + }) + db.InsertWorkspaceResource(ctx, database.InsertWorkspaceResourceParams{ + ID: uuid.New(), + JobID: job.ID, + Type: "google_compute_instance", + Name: "banana3", + }) + db.InsertWorkspace(ctx, database.InsertWorkspaceParams{ + ID: uuid.New(), + OwnerID: user.ID, + OrganizationID: org.ID, + TemplateID: template.ID, + Name: "banana4", + }) +} diff --git a/coderd/monitoring/monitoring.go b/coderd/monitoring/monitoring.go index c2ffc88c96082..dd86bd1fc02b0 100644 --- a/coderd/monitoring/monitoring.go +++ b/coderd/monitoring/monitoring.go @@ -2,14 +2,10 @@ package monitoring import ( "context" - "database/sql" "strings" - "sync" "time" "github.com/prometheus/client_golang/prometheus" - dto "github.com/prometheus/client_model/go" - "golang.org/x/sync/errgroup" "golang.org/x/xerrors" "cdr.dev/slog" @@ -48,88 +44,32 @@ type Options struct { Telemetry Telemetry } +// Monitor provides Prometheus registries on which to register metric +// collectors. Depending on the level these metrics may also be sent to Coder. +// Monitor automatically registers a collector that collects statistics from the +// database. type Monitor struct { - // allRegistry registers metrics that will be sent when the telemetry level is - // `all`. + // allRegistry registers metrics that will be sent when the telemetry level + // is `all`. allRegistry *prometheus.Registry - // db is the database from which to pull stats. - db database.Store - ctx context.Context // coreRegistry registers metrics that will be sent when the telemetry level // is `core` or `all`. coreRegistry *prometheus.Registry // internalRegistry registers metrics that will never be sent. internalRegistry *prometheus.Registry - // refreshMutex is used to prevent multiple refreshes at a time. - refreshMutex *sync.Mutex - // stats are internally registered metrics that update via Refresh. - stats Stats // Telemetry determines which metrics are sent to Coder. Telemetry Telemetry } -type Stats struct { - Users *prometheus.GaugeVec - Workspaces *prometheus.GaugeVec - WorkspaceResources *prometheus.GaugeVec -} - func New(ctx context.Context, options *Options) *Monitor { monitor := Monitor{ allRegistry: prometheus.NewRegistry(), - db: options.Database, - ctx: ctx, coreRegistry: prometheus.NewRegistry(), internalRegistry: prometheus.NewRegistry(), - refreshMutex: &sync.Mutex{}, - stats: Stats{ - Users: prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: "coder", - Name: "users", - Help: "The users in a Coder deployment.", - }, []string{ - "user_id", - "user_name", - }), - Workspaces: prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: "coder", - Name: "workspaces", - Help: "The workspaces in a Coder deployment.", - }, []string{ - "workspace_id", - "workspace_name", - }), - WorkspaceResources: prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: "coder", - Name: "workspace_resources", - Help: "The workspace resources in a Coder deployment.", - }, []string{ - "workspace_resource_type", - }), - }, - Telemetry: options.Telemetry, + Telemetry: options.Telemetry, } - monitor.MustRegister( - TelemetryAll, - monitor.stats.Users, - monitor.stats.Workspaces, - monitor.stats.WorkspaceResources, - ) - - ticker := time.NewTicker(options.RefreshInterval) - go func() { - defer ticker.Stop() - select { - case <-ctx.Done(): - return - case <-ticker.C: - err := monitor.Refresh() - if err != nil { - options.Logger.Error(ctx, "failed to refresh stats", slog.Error(err)) - } - } - }() + monitor.MustRegister(TelemetryAll, NewCollector(ctx, options.Database)) return &monitor } @@ -145,85 +85,3 @@ func (t Monitor) MustRegister(level Telemetry, cs ...prometheus.Collector) { t.internalRegistry.MustRegister(cs...) } } - -// Gather returns all gathered metrics. -func (t Monitor) Gather() ([]*dto.MetricFamily, error) { - allMetrics, err := t.allRegistry.Gather() - if err != nil { - return nil, err - } - - coreMetrics, err := t.coreRegistry.Gather() - if err != nil { - return nil, err - } - - internalMetrics, err := t.internalRegistry.Gather() - if err != nil { - return nil, err - } - - return append(append(allMetrics, coreMetrics...), internalMetrics...), nil -} - -// Refresh populates internal stats with the latest data. -func (t Monitor) Refresh() error { - t.refreshMutex.Lock() - defer t.refreshMutex.Unlock() - - errGroup, ctx := errgroup.WithContext(t.ctx) - - errGroup.Go(func() error { - dbUsers, err := t.db.GetUsers(ctx, database.GetUsersParams{}) - if err != nil && !xerrors.Is(err, sql.ErrNoRows) { - return err - } - - t.stats.Users.Reset() - for _, dbu := range dbUsers { - t.stats.Users.With(prometheus.Labels{ - "user_id": dbu.ID.String(), - "user_name": dbu.Username, - }).Add(1) - } - - return nil - }) - - errGroup.Go(func() error { - dbWorkspaces, err := t.db.GetWorkspaces(ctx, false) - if err != nil && !xerrors.Is(err, sql.ErrNoRows) { - return err - } - - t.stats.Workspaces.Reset() - for _, dbw := range dbWorkspaces { - t.stats.Workspaces.With(prometheus.Labels{ - "workspace_id": dbw.ID.String(), - "workspace_name": dbw.Name, - }).Add(1) - } - - return nil - }) - - errGroup.Go(func() error { - dbWorkspaceResources, err := t.db.GetWorkspaceResources(ctx) - if err != nil && !xerrors.Is(err, sql.ErrNoRows) { - return err - } - - t.stats.WorkspaceResources.Reset() - for _, dbwr := range dbWorkspaceResources { - t.stats.WorkspaceResources.With(prometheus.Labels{ - "workspace_resource_id": dbwr.ID.String(), - "workspace_resource_name": dbwr.Name, - "workspace_resource_type": dbwr.Type, - }).Add(1) - } - - return nil - }) - - return errGroup.Wait() -} diff --git a/coderd/monitoring/monitoring_test.go b/coderd/monitoring/monitoring_test.go index 0feeb375a71c8..f734ffb9451f1 100644 --- a/coderd/monitoring/monitoring_test.go +++ b/coderd/monitoring/monitoring_test.go @@ -1,169 +1,39 @@ package monitoring_test import ( - "context" "testing" - "time" - "golang.org/x/xerrors" - - "cdr.dev/slog/sloggers/slogtest" - "github.com/coder/coder/coderd/database" - "github.com/coder/coder/coderd/database/databasefake" "github.com/coder/coder/coderd/monitoring" - "github.com/google/uuid" - dto "github.com/prometheus/client_model/go" "github.com/stretchr/testify/require" ) -func TestMonitoring(t *testing.T) { +func TestParseTelemetry(t *testing.T) { t.Parallel() - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - db := databasefake.New() - - monitor := monitoring.New(ctx, &monitoring.Options{ - Database: db, - Logger: slogtest.Make(t, nil), - RefreshInterval: time.Minute, - Telemetry: monitoring.TelemetryNone, - }) - - user, _ := db.InsertUser(ctx, database.InsertUserParams{ - ID: uuid.New(), - Username: "kyle", - }) - org, _ := db.InsertOrganization(ctx, database.InsertOrganizationParams{ - ID: uuid.New(), - Name: "potato", - }) - template, _ := db.InsertTemplate(ctx, database.InsertTemplateParams{ - ID: uuid.New(), - Name: "something", - OrganizationID: org.ID, - }) - workspace, _ := db.InsertWorkspace(ctx, database.InsertWorkspaceParams{ - ID: uuid.New(), - OwnerID: user.ID, - OrganizationID: org.ID, - TemplateID: template.ID, - Name: "banana1", - }) - job, _ := db.InsertProvisionerJob(ctx, database.InsertProvisionerJobParams{ - ID: uuid.New(), - OrganizationID: org.ID, - }) - version, _ := db.InsertTemplateVersion(ctx, database.InsertTemplateVersionParams{ - ID: uuid.New(), - TemplateID: uuid.NullUUID{ - UUID: template.ID, - Valid: true, - }, - CreatedAt: database.Now(), - OrganizationID: org.ID, - JobID: job.ID, - }) - db.InsertWorkspaceBuild(ctx, database.InsertWorkspaceBuildParams{ - ID: uuid.New(), - JobID: job.ID, - WorkspaceID: workspace.ID, - TemplateVersionID: version.ID, - Transition: database.WorkspaceTransitionStart, - }) - db.InsertWorkspaceResource(ctx, database.InsertWorkspaceResourceParams{ - ID: uuid.New(), - JobID: job.ID, - Type: "google_compute_instance", - Name: "banana2", - }) - db.InsertWorkspaceResource(ctx, database.InsertWorkspaceResourceParams{ - ID: uuid.New(), - JobID: job.ID, - Type: "google_compute_instance", - Name: "banana3", - }) - db.InsertWorkspace(ctx, database.InsertWorkspaceParams{ - ID: uuid.New(), - OwnerID: user.ID, - OrganizationID: org.ID, - TemplateID: template.ID, - Name: "banana4", - }) - - err := monitor.Refresh() - require.NoError(t, err) - - metrics, err := monitor.Gather() - require.NoError(t, err) - - type labels struct { - name string - value string - } - tests := []struct { - name string - total int - labels []labels + value string + telemetry monitoring.Telemetry }{ { - name: "coder_users", - total: 1, - labels: []labels{ - {name: "user_name", value: "kyle"}, - }, + value: "all", + telemetry: monitoring.TelemetryAll, }, { - name: "coder_workspaces", - total: 2, - labels: []labels{ - {name: "workspace_name", value: "banana1"}, - {name: "workspace_name", value: "banana4"}, - }, + value: "core", + telemetry: monitoring.TelemetryCore, }, { - name: "coder_workspace_resources", - total: 2, - labels: []labels{ - {name: "workspace_resource_type", value: "google_compute_instance"}, - }, + value: "none", + telemetry: monitoring.TelemetryNone, }, } - require.Len(t, metrics, len(tests)) - for _, tt := range tests { - tt := tt - t.Run(tt.name, func(t *testing.T) { - t.Parallel() - metricFamily, err := findMetric(t, tt.name, metrics) - require.NoError(t, err) - - require.Len(t, metricFamily.GetMetric(), tt.total) - for _, l := range tt.labels { - require.NoError(t, findMetricLabel(t, l.name, l.value, metricFamily)) - } - }) - } -} - -func findMetric(_ *testing.T, name string, metrics []*dto.MetricFamily) (*dto.MetricFamily, error) { - for _, m := range metrics { - if m.GetName() == name { - return m, nil - } + for _, tt := range tests { + telemetry, err := monitoring.ParseTelemetry(tt.value) + require.NoError(t, err) + require.Equal(t, tt.telemetry, telemetry) } - return nil, xerrors.Errorf("no metric %s in %v", name, metrics) -} -func findMetricLabel(_ *testing.T, name string, value string, metricFamily *dto.MetricFamily) error { - for _, m := range metricFamily.GetMetric() { - for _, l := range m.GetLabel() { - if l.GetName() == name && l.GetValue() == value { - return nil - } - } - } - return xerrors.Errorf("no metric label %s:%s in %v", name, value, metricFamily) + _, err := monitoring.ParseTelemetry("invalid") + require.Error(t, err) } From ec7ffbfe49b4f0a665f035527ddd61473904f619 Mon Sep 17 00:00:00 2001 From: Asher Date: Mon, 16 May 2022 23:21:43 +0000 Subject: [PATCH 7/8] Remove unused RefreshInterval --- cli/server.go | 1 - coderd/coderdtest/coderdtest.go | 1 - coderd/monitoring/monitoring.go | 8 +++----- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/cli/server.go b/cli/server.go index 6f34bcf5274ea..3ceb3c814c25a 100644 --- a/cli/server.go +++ b/cli/server.go @@ -259,7 +259,6 @@ func server() *cobra.Command { options.Monitor = monitoring.New(cmd.Context(), &monitoring.Options{ Database: options.Database, Logger: options.Logger, - RefreshInterval: time.Hour, Telemetry: telemetry, }) diff --git a/coderd/coderdtest/coderdtest.go b/coderd/coderdtest/coderdtest.go index 462ebc30a95b1..2a288c81db221 100644 --- a/coderd/coderdtest/coderdtest.go +++ b/coderd/coderdtest/coderdtest.go @@ -149,7 +149,6 @@ func NewMemoryCoderd(t *testing.T, options *Options) (*httptest.Server, *codersd Monitor: monitoring.New(ctx, &monitoring.Options{ Database: db, Logger: slogtest.Make(t, nil), - RefreshInterval: time.Minute, Telemetry: monitoring.TelemetryNone, }), SSHKeygenAlgorithm: options.SSHKeygenAlgorithm, diff --git a/coderd/monitoring/monitoring.go b/coderd/monitoring/monitoring.go index dd86bd1fc02b0..86437b4113ce1 100644 --- a/coderd/monitoring/monitoring.go +++ b/coderd/monitoring/monitoring.go @@ -3,7 +3,6 @@ package monitoring import ( "context" "strings" - "time" "github.com/prometheus/client_golang/prometheus" "golang.org/x/xerrors" @@ -38,10 +37,9 @@ func ParseTelemetry(t string) (Telemetry, error) { } type Options struct { - Database database.Store - Logger slog.Logger - RefreshInterval time.Duration - Telemetry Telemetry + Database database.Store + Logger slog.Logger + Telemetry Telemetry } // Monitor provides Prometheus registries on which to register metric From e8e1e04f57d54f01b2ea46468da5155cc8115020 Mon Sep 17 00:00:00 2001 From: Asher Date: Wed, 18 May 2022 19:02:33 +0000 Subject: [PATCH 8/8] Update resource query with build number change --- coderd/database/databasefake/databasefake.go | 49 ++++++----- coderd/database/querier.go | 2 +- coderd/database/queries.sql.go | 82 +++++++++++-------- .../database/queries/workspaceresources.sql | 28 +++++-- coderd/monitoring/collector.go | 2 +- 5 files changed, 96 insertions(+), 67 deletions(-) diff --git a/coderd/database/databasefake/databasefake.go b/coderd/database/databasefake/databasefake.go index f5e1f89cd621d..d7897c78ddc4e 100644 --- a/coderd/database/databasefake/databasefake.go +++ b/coderd/database/databasefake/databasefake.go @@ -486,6 +486,35 @@ func (q *fakeQuerier) GetLatestWorkspaceBuildsByWorkspaceIDs(_ context.Context, return returnBuilds, nil } +func (q *fakeQuerier) GetLatestWorkspaceResources(ctx context.Context) ([]database.WorkspaceResource, error) { + q.mutex.RLock() + defer q.mutex.RUnlock() + + // Get latest workspace builds. + builds := make(map[uuid.UUID]database.WorkspaceBuild) + buildNumbers := make(map[uuid.UUID]int32) + for _, workspaceBuild := range q.workspaceBuilds { + if workspaceBuild.BuildNumber > buildNumbers[workspaceBuild.WorkspaceID] { + builds[workspaceBuild.WorkspaceID] = workspaceBuild + buildNumbers[workspaceBuild.WorkspaceID] = workspaceBuild.BuildNumber + } + } + + // Get resources for each latest build. + resources := make([]database.WorkspaceResource, 0) + for _, workspaceBuild := range q.workspaceBuilds { + rs, err := q.GetWorkspaceResourcesByJobID(ctx, workspaceBuild.JobID) + if err != nil { + return nil, err + } + resources = append(resources, rs...) + } + if len(resources) == 0 { + return nil, sql.ErrNoRows + } + return resources, nil +} + func (q *fakeQuerier) GetWorkspaceBuildByWorkspaceID(_ context.Context, params database.GetWorkspaceBuildByWorkspaceIDParams) ([]database.WorkspaceBuild, error) { q.mutex.RLock() @@ -1044,26 +1073,6 @@ func (q *fakeQuerier) GetWorkspaceResourceByID(_ context.Context, id uuid.UUID) return database.WorkspaceResource{}, sql.ErrNoRows } -func (q *fakeQuerier) GetWorkspaceResources(ctx context.Context) ([]database.WorkspaceResource, error) { - q.mutex.RLock() - defer q.mutex.RUnlock() - - resources := make([]database.WorkspaceResource, 0) - for _, workspaceBuild := range q.workspaceBuilds { - if !workspaceBuild.AfterID.Valid { - rs, err := q.GetWorkspaceResourcesByJobID(ctx, workspaceBuild.JobID) - if err != nil { - return nil, err - } - resources = append(resources, rs...) - } - } - if len(resources) == 0 { - return nil, sql.ErrNoRows - } - return resources, nil -} - func (q *fakeQuerier) GetWorkspaceResourcesByJobID(_ context.Context, jobID uuid.UUID) ([]database.WorkspaceResource, error) { q.mutex.RLock() defer q.mutex.RUnlock() diff --git a/coderd/database/querier.go b/coderd/database/querier.go index e8cedea521004..0df013efbafb7 100644 --- a/coderd/database/querier.go +++ b/coderd/database/querier.go @@ -29,6 +29,7 @@ type querier interface { GetGitSSHKey(ctx context.Context, userID uuid.UUID) (GitSSHKey, error) GetLatestWorkspaceBuildByWorkspaceID(ctx context.Context, workspaceID uuid.UUID) (WorkspaceBuild, error) GetLatestWorkspaceBuildsByWorkspaceIDs(ctx context.Context, ids []uuid.UUID) ([]WorkspaceBuild, error) + GetLatestWorkspaceResources(ctx context.Context) ([]WorkspaceResource, error) GetOrganizationByID(ctx context.Context, id uuid.UUID) (Organization, error) GetOrganizationByName(ctx context.Context, name string) (Organization, error) GetOrganizationIDsByMemberIDs(ctx context.Context, ids []uuid.UUID) ([]GetOrganizationIDsByMemberIDsRow, error) @@ -69,7 +70,6 @@ type querier interface { GetWorkspaceByOwnerIDAndName(ctx context.Context, arg GetWorkspaceByOwnerIDAndNameParams) (Workspace, error) GetWorkspaceOwnerCountsByTemplateIDs(ctx context.Context, ids []uuid.UUID) ([]GetWorkspaceOwnerCountsByTemplateIDsRow, error) GetWorkspaceResourceByID(ctx context.Context, id uuid.UUID) (WorkspaceResource, error) - GetWorkspaceResources(ctx context.Context) ([]WorkspaceResource, error) GetWorkspaceResourcesByJobID(ctx context.Context, jobID uuid.UUID) ([]WorkspaceResource, error) GetWorkspaces(ctx context.Context, deleted bool) ([]Workspace, error) GetWorkspacesAutostartAutostop(ctx context.Context) ([]Workspace, error) diff --git a/coderd/database/queries.sql.go b/coderd/database/queries.sql.go index 023774a6c9aee..682bac0c44834 100644 --- a/coderd/database/queries.sql.go +++ b/coderd/database/queries.sql.go @@ -3092,42 +3092,29 @@ func (q *sqlQuerier) UpdateWorkspaceBuildByID(ctx context.Context, arg UpdateWor return err } -const getWorkspaceResourceByID = `-- name: GetWorkspaceResourceByID :one -SELECT - id, created_at, job_id, transition, type, name -FROM - workspace_resources -WHERE - id = $1 -` - -func (q *sqlQuerier) GetWorkspaceResourceByID(ctx context.Context, id uuid.UUID) (WorkspaceResource, error) { - row := q.db.QueryRowContext(ctx, getWorkspaceResourceByID, id) - var i WorkspaceResource - err := row.Scan( - &i.ID, - &i.CreatedAt, - &i.JobID, - &i.Transition, - &i.Type, - &i.Name, - ) - return i, err -} - -const getWorkspaceResources = `-- name: GetWorkspaceResources :many -SELECT - workspace_resources.id, workspace_resources.created_at, workspace_resources.job_id, workspace_resources.transition, workspace_resources.type, workspace_resources.name -FROM - workspace_resources -INNER JOIN workspace_builds - ON workspace_resources.job_id = workspace_builds.job_id -WHERE - workspace_builds.after_id IS NULL -` - -func (q *sqlQuerier) GetWorkspaceResources(ctx context.Context) ([]WorkspaceResource, error) { - rows, err := q.db.QueryContext(ctx, getWorkspaceResources) +const getLatestWorkspaceResources = `-- name: GetLatestWorkspaceResources :many +SELECT workspace_resources.id, workspace_resources.created_at, workspace_resources.job_id, workspace_resources.transition, workspace_resources.type, workspace_resources.name +FROM ( + SELECT + workspace_id, MAX(build_number) as max_build_number + FROM + workspace_builds + GROUP BY + workspace_id +) latest_workspace_builds +INNER JOIN + workspace_builds +ON + workspace_builds.workspace_id = latest_workspace_builds.workspace_id + AND workspace_builds.build_number = latest_workspace_builds.max_build_number +INNER JOIN + workspace_resources +ON + workspace_resources.job_id = workspace_builds.job_id +` + +func (q *sqlQuerier) GetLatestWorkspaceResources(ctx context.Context) ([]WorkspaceResource, error) { + rows, err := q.db.QueryContext(ctx, getLatestWorkspaceResources) if err != nil { return nil, err } @@ -3156,6 +3143,29 @@ func (q *sqlQuerier) GetWorkspaceResources(ctx context.Context) ([]WorkspaceReso return items, nil } +const getWorkspaceResourceByID = `-- name: GetWorkspaceResourceByID :one +SELECT + id, created_at, job_id, transition, type, name +FROM + workspace_resources +WHERE + id = $1 +` + +func (q *sqlQuerier) GetWorkspaceResourceByID(ctx context.Context, id uuid.UUID) (WorkspaceResource, error) { + row := q.db.QueryRowContext(ctx, getWorkspaceResourceByID, id) + var i WorkspaceResource + err := row.Scan( + &i.ID, + &i.CreatedAt, + &i.JobID, + &i.Transition, + &i.Type, + &i.Name, + ) + return i, err +} + const getWorkspaceResourcesByJobID = `-- name: GetWorkspaceResourcesByJobID :many SELECT id, created_at, job_id, transition, type, name diff --git a/coderd/database/queries/workspaceresources.sql b/coderd/database/queries/workspaceresources.sql index 28614e9be6ea6..3c95e27e54d08 100644 --- a/coderd/database/queries/workspaceresources.sql +++ b/coderd/database/queries/workspaceresources.sql @@ -20,12 +20,22 @@ INSERT INTO VALUES ($1, $2, $3, $4, $5, $6) RETURNING *; --- name: GetWorkspaceResources :many -SELECT - workspace_resources.* -FROM - workspace_resources -INNER JOIN workspace_builds - ON workspace_resources.job_id = workspace_builds.job_id -WHERE - workspace_builds.after_id IS NULL; +-- name: GetLatestWorkspaceResources :many +SELECT workspace_resources.* +FROM ( + SELECT + workspace_id, MAX(build_number) as max_build_number + FROM + workspace_builds + GROUP BY + workspace_id +) latest_workspace_builds +INNER JOIN + workspace_builds +ON + workspace_builds.workspace_id = latest_workspace_builds.workspace_id + AND workspace_builds.build_number = latest_workspace_builds.max_build_number +INNER JOIN + workspace_resources +ON + workspace_resources.job_id = workspace_builds.job_id; diff --git a/coderd/monitoring/collector.go b/coderd/monitoring/collector.go index d709f06fcad96..c0e8d5f25c6c1 100644 --- a/coderd/monitoring/collector.go +++ b/coderd/monitoring/collector.go @@ -97,7 +97,7 @@ func (c *Collector) Collect(ch chan<- prometheus.Metric) { go func() { defer wg.Done() - dbWorkspaceResources, err := c.db.GetWorkspaceResources(c.ctx) + dbWorkspaceResources, err := c.db.GetLatestWorkspaceResources(c.ctx) if err != nil && !xerrors.Is(err, sql.ErrNoRows) { ch <- prometheus.NewInvalidMetric(c.workspaceResources, err) return