Skip to content

chore: add usage tracking package #19095

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion CODEOWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ tailnet/proto/ @spikecurtis @johnstcn
vpn/vpn.proto @spikecurtis @johnstcn
vpn/version.go @spikecurtis @johnstcn


# This caching code is particularly tricky, and one must be very careful when
# altering it.
coderd/files/ @aslilac
Expand All @@ -29,3 +28,8 @@ site/src/api/countriesGenerated.ts
site/src/api/rbacresourcesGenerated.ts
site/src/api/typesGenerated.ts
site/CLAUDE.md

# Usage tracking code requires intimate knowledge of Tallyman and Metronome, as
# well as guidance from revenue.
coderd/usage/ @deansheather
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in general, I'd like to ensure we have 2 code owners for each thing, so that we have some ability to make progress if someone is out

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think codeowners are required to approve PRs for merge, they just get auto-requested for review. I just want to be requested on every PR so I have visibility into changes. If I'm out, anyone else can approve.

Most other codeowners in this file have only one person listed and function the same way.

enterprise/coderd/usage/ @deansheather
22 changes: 22 additions & 0 deletions coderd/database/dbauthz/dbauthz.go
Original file line number Diff line number Diff line change
Expand Up @@ -3913,6 +3913,13 @@ func (q *querier) InsertTemplateVersionWorkspaceTag(ctx context.Context, arg dat
return q.db.InsertTemplateVersionWorkspaceTag(ctx, arg)
}

func (q *querier) InsertUsageEvent(ctx context.Context, arg database.InsertUsageEventParams) error {
if err := q.authorizeContext(ctx, policy.ActionCreate, rbac.ResourceSystem); err != nil {
return err
}
return q.db.InsertUsageEvent(ctx, arg)
}

func (q *querier) InsertUser(ctx context.Context, arg database.InsertUserParams) (database.User, error) {
// Always check if the assigned roles can actually be assigned by this actor.
impliedRoles := append([]rbac.RoleIdentifier{rbac.RoleMember()}, q.convertToDeploymentRoles(arg.RBACRoles)...)
Expand Down Expand Up @@ -4260,6 +4267,14 @@ func (q *querier) RevokeDBCryptKey(ctx context.Context, activeKeyDigest string)
return q.db.RevokeDBCryptKey(ctx, activeKeyDigest)
}

func (q *querier) SelectUsageEventsForPublishing(ctx context.Context, arg time.Time) ([]database.UsageEvent, error) {
// ActionUpdate because we're updating the publish_started_at column.
if err := q.authorizeContext(ctx, policy.ActionUpdate, rbac.ResourceSystem); err != nil {
return nil, err
}
return q.db.SelectUsageEventsForPublishing(ctx, arg)
}

func (q *querier) TryAcquireLock(ctx context.Context, id int64) (bool, error) {
return q.db.TryAcquireLock(ctx, id)
}
Expand Down Expand Up @@ -4725,6 +4740,13 @@ func (q *querier) UpdateTemplateWorkspacesLastUsedAt(ctx context.Context, arg da
return fetchAndExec(q.log, q.auth, policy.ActionUpdate, fetch, q.db.UpdateTemplateWorkspacesLastUsedAt)(ctx, arg)
}

func (q *querier) UpdateUsageEventsPostPublish(ctx context.Context, arg database.UpdateUsageEventsPostPublishParams) error {
if err := q.authorizeContext(ctx, policy.ActionUpdate, rbac.ResourceSystem); err != nil {
return err
}
return q.db.UpdateUsageEventsPostPublish(ctx, arg)
}

func (q *querier) UpdateUserDeletedByID(ctx context.Context, id uuid.UUID) error {
return deleteQ(q.log, q.auth, q.db.GetUserByID, q.db.UpdateUserDeletedByID)(ctx, id)
}
Expand Down
24 changes: 24 additions & 0 deletions coderd/database/dbauthz/dbauthz_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5845,3 +5845,27 @@ func (s *MethodTestSuite) TestAuthorizePrebuiltWorkspace() {
}).Asserts(w, policy.ActionUpdate, w.AsPrebuild(), policy.ActionUpdate)
}))
}

func (s *MethodTestSuite) TestUsageEvents() {
s.Run("InsertUsageEvent", s.Subtest(func(db database.Store, check *expects) {
check.Args(database.InsertUsageEventParams{
ID: "1",
EventType: database.UsageEventTypeDcManagedAgentsV1,
EventData: []byte("{}"),
CreatedAt: dbtime.Now(),
}).Asserts(rbac.ResourceSystem, policy.ActionCreate)
}))

s.Run("SelectUsageEventsForPublishing", s.Subtest(func(db database.Store, check *expects) {
check.Args(dbtime.Now()).Asserts(rbac.ResourceSystem, policy.ActionUpdate)
}))

s.Run("UpdateUsageEventsPostPublish", s.Subtest(func(db database.Store, check *expects) {
check.Args(database.UpdateUsageEventsPostPublishParams{
Now: dbtime.Now(),
IDs: []string{"1", "2"},
FailureMessages: []string{"error", "error"},
SetPublishedAts: []bool{false, false},
}).Asserts(rbac.ResourceSystem, policy.ActionUpdate)
}))
}
21 changes: 21 additions & 0 deletions coderd/database/dbmetrics/querymetrics.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

43 changes: 43 additions & 0 deletions coderd/database/dbmock/dbmock.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

37 changes: 37 additions & 0 deletions coderd/database/dump.sql

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
DROP TABLE usage_events;
DROP TYPE usage_event_type;
26 changes: 26 additions & 0 deletions coderd/database/migrations/000353_create_usage_events_table.up.sql
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Obligatory reminder to check migration number before merging!

Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
CREATE TYPE usage_event_type AS ENUM (
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd normally be in favor of an enum here, and I'm sorry I didn't think of it at RFC time, but...

We are restricted from using ALTER TYPE ... ADD VALUE in later migrations and instead have to convert everything to an intermediate text column, then back to the enum. Since the usage events table is likely to be very large, this could be a costly migration.

Instead we could make the event_type a text and for now just add a CHECK constraint that it equals 'dc_managed_agents_v1'. Later migrations can use NOT VALID when modifying this check constraint to avoid a costly scan. WDYT?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this is true for adding new enum values, only if you want to remove them (which still requires recreating the entire the enum type). If we don't ever remove values, an enum is fine (and saves disk space).

https://boringsql.com/posts/postgresql-enums

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

from: https://boringsql.com/posts/postgresql-enums

And new features coming (at the time of writing this article) with PostgreSQL 17 allow the use of newly added values within the same transaction block (previously not possible without an explicit commit).

This is the problem we hit, since we run all migrations within a single transaction. A migration adds a new value and then some other migration references that new value and fails. Since we don't require PostgreSQL 17 (minimum is PG 13 as of writing), we end up having to use table-rewriting means of introducing new values.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, you're right. I forgot about the single transaction thing. What a bummer. I'll switch to TEXT and a check constraint

'dc_managed_agents_v1'
);

COMMENT ON TYPE usage_event_type IS 'The usage event type with version. "dc" means "discrete" (e.g. a single event, for counters), "hb" means "heartbeat" (e.g. a recurring event that contains a total count of usage generated from the database, for gauges).';

CREATE TABLE usage_events (
id TEXT PRIMARY KEY,
event_type usage_event_type NOT NULL,
event_data JSONB NOT NULL,
created_at TIMESTAMP WITH TIME ZONE NOT NULL,
publish_started_at TIMESTAMP WITH TIME ZONE DEFAULT NULL,
published_at TIMESTAMP WITH TIME ZONE DEFAULT NULL,
failure_message TEXT DEFAULT NULL
);

COMMENT ON TABLE usage_events IS 'usage_events contains usage data that is collected from the product and potentially shipped to the usage collector service.';
COMMENT ON COLUMN usage_events.id IS 'For "discrete" event types, this is a random UUID. For "heartbeat" event types, this is a combination of the event type and a truncated timestamp.';
COMMENT ON COLUMN usage_events.event_data IS 'Event payload. Determined by the matching usage struct for this event type.';
COMMENT ON COLUMN usage_events.publish_started_at IS 'Set to a timestamp while the event is being published by a Coder replica to the usage collector service. Used to avoid duplicate publishes by multiple replicas. Timestamps older than 1 hour are considered expired.';
COMMENT ON COLUMN usage_events.published_at IS 'Set to a timestamp when the event is successfully (or permanently unsuccessfully) published to the usage collector service. If set, the event should never be attempted to be published again.';
COMMENT ON COLUMN usage_events.failure_message IS 'Set to an error message when the event is temporarily or permanently unsuccessfully published to the usage collector service.';

CREATE INDEX idx_usage_events_created_at ON usage_events (created_at);
CREATE INDEX idx_usage_events_publish_started_at ON usage_events (publish_started_at);
CREATE INDEX idx_usage_events_published_at ON usage_events (published_at);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you want a single index over a tuple and the order matters. Having 3 indexes is much less useful for a query that uses all 3 fields.

It should be published_at first, to allow the query to ignore already published events, which will be the vast majority of the table. Next is publish_started_at, which we use to filter out in-progress events. Lastly created_at since we order by this and exclude anything older than 30 days.

Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
INSERT INTO usage_events (
id,
event_type,
event_data,
created_at,
publish_started_at,
published_at,
failure_message
)
VALUES
-- Unpublished dc_managed_agents_v1 event.
(
'event1',
'dc_managed_agents_v1',
'{"count":1}',
'2023-01-01 00:00:00+00',
NULL,
NULL,
NULL
),
-- Successfully published dc_managed_agents_v1 event.
(
'event2',
'dc_managed_agents_v1',
'{"count":2}',
'2023-01-01 00:00:00+00',
NULL,
'2023-01-01 00:00:02+00',
NULL
),
-- Publish in progress dc_managed_agents_v1 event.
(
'event3',
'dc_managed_agents_v1',
'{"count":3}',
'2023-01-01 00:00:00+00',
'2023-01-01 00:00:01+00',
NULL,
NULL
),
-- Temporarily failed to publish dc_managed_agents_v1 event.
(
'event4',
'dc_managed_agents_v1',
'{"count":4}',
'2023-01-01 00:00:00+00',
NULL,
NULL,
'publish failed temporarily'
),
-- Permanently failed to publish dc_managed_agents_v1 event.
(
'event5',
'dc_managed_agents_v1',
'{"count":5}',
'2023-01-01 00:00:00+00',
NULL,
'2023-01-01 00:00:02+00',
'publish failed permanently'
)
9 changes: 9 additions & 0 deletions coderd/database/modelmethods.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"encoding/hex"
"sort"
"strconv"
"strings"
"time"

"github.com/google/uuid"
Expand Down Expand Up @@ -628,3 +629,11 @@ func (m WorkspaceAgentVolumeResourceMonitor) Debounce(

return m.DebouncedUntil, false
}

func (e UsageEventType) IsDiscrete() bool {
return e.Valid() && strings.HasPrefix(string(e), "dc_")
}

func (e UsageEventType) IsHeartbeat() bool {
return e.Valid() && strings.HasPrefix(string(e), "hb_")
}
Loading
Loading