Skip to content

feat: implement observability of notifications subsystem #13799

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 27 commits into from
Jul 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
7f60c0f
Implement observability of notification subsystem
dannykopping Jul 3, 2024
d62d704
make lint
dannykopping Jul 5, 2024
96dac65
make gen
dannykopping Jul 5, 2024
130de49
make fmt
dannykopping Jul 8, 2024
e868752
Small fixes
dannykopping Jul 8, 2024
cee93cb
Review comments
dannykopping Jul 8, 2024
387b557
Apply suggestions from code review
dannykopping Jul 8, 2024
114797d
Correcting query
dannykopping Jul 8, 2024
5ff29c0
Merge branch 'main' of github.com:/coder/coder into dk/system-notific…
dannykopping Jul 9, 2024
88451a1
Only return UUID from EnqueueNotificationMessage
dannykopping Jul 9, 2024
09f7305
Review feedback
dannykopping Jul 9, 2024
91e2a23
Minor fixups
dannykopping Jul 9, 2024
9f1d6b3
Revert hack, no output param needed
dannykopping Jul 10, 2024
15c4537
Small touch-ups
dannykopping Jul 10, 2024
53ecad4
Merge branch 'main' of https://github.com/coder/coder into dk/system-…
dannykopping Jul 10, 2024
bc2a4cb
Merge branch 'main' of https://github.com/coder/coder into dk/system-…
dannykopping Jul 10, 2024
716e591
Harden tests, fail early
dannykopping Jul 10, 2024
d408ed2
make fmt
dannykopping Jul 10, 2024
2b9eec3
Restoring deleted line
dannykopping Jul 10, 2024
4211c84
Comments
dannykopping Jul 10, 2024
24417c5
Lock before modification
dannykopping Jul 10, 2024
72bb1be
Remove TestNotifierPaused's unnecessarily fast fetch interval
dannykopping Jul 10, 2024
bfca2c1
Merge branch 'main' of https://github.com/coder/coder into dk/system-…
dannykopping Jul 10, 2024
6602682
Rename migration after numbering conflict
dannykopping Jul 10, 2024
00633a1
Small fixes
dannykopping Jul 11, 2024
f454184
Merge branch 'main' of https://github.com/coder/coder into dk/system-…
dannykopping Jul 11, 2024
84d07d4
Logging improvement
dannykopping Jul 11, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion cli/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -983,6 +983,7 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd.
)
if experiments.Enabled(codersdk.ExperimentNotifications) {
cfg := options.DeploymentValues.Notifications
metrics := notifications.NewMetrics(options.PrometheusRegistry)

// The enqueuer is responsible for enqueueing notifications to the given store.
enqueuer, err := notifications.NewStoreEnqueuer(cfg, options.Database, templateHelpers(options), logger.Named("notifications.enqueuer"))
Expand All @@ -994,7 +995,7 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd.
// The notification manager is responsible for:
// - creating notifiers and managing their lifecycles (notifiers are responsible for dequeueing/sending notifications)
// - keeping the store updated with status updates
notificationsManager, err = notifications.NewManager(cfg, options.Database, logger.Named("notifications.manager"))
notificationsManager, err = notifications.NewManager(cfg, options.Database, metrics, logger.Named("notifications.manager"))
if err != nil {
return xerrors.Errorf("failed to instantiate notification manager: %w", err)
}
Expand Down
4 changes: 2 additions & 2 deletions coderd/database/dbauthz/dbauthz.go
Original file line number Diff line number Diff line change
Expand Up @@ -1143,9 +1143,9 @@ func (q *querier) DeleteWorkspaceAgentPortSharesByTemplate(ctx context.Context,
return q.db.DeleteWorkspaceAgentPortSharesByTemplate(ctx, templateID)
}

func (q *querier) EnqueueNotificationMessage(ctx context.Context, arg database.EnqueueNotificationMessageParams) (database.NotificationMessage, error) {
func (q *querier) EnqueueNotificationMessage(ctx context.Context, arg database.EnqueueNotificationMessageParams) error {
if err := q.authorizeContext(ctx, policy.ActionCreate, rbac.ResourceSystem); err != nil {
return database.NotificationMessage{}, err
return err
}
return q.db.EnqueueNotificationMessage(ctx, arg)
}
Expand Down
25 changes: 15 additions & 10 deletions coderd/database/dbmem/dbmem.go
Original file line number Diff line number Diff line change
Expand Up @@ -935,12 +935,17 @@ func (q *FakeQuerier) AcquireNotificationMessages(_ context.Context, arg databas
q.mutex.Lock()
defer q.mutex.Unlock()

var out []database.AcquireNotificationMessagesRow
for _, nm := range q.notificationMessages {
if len(out) >= int(arg.Count) {
break
}
// Shift the first "Count" notifications off the slice (FIFO).
sz := len(q.notificationMessages)
if sz > int(arg.Count) {
sz = int(arg.Count)
}

list := q.notificationMessages[:sz]
q.notificationMessages = q.notificationMessages[sz:]

var out []database.AcquireNotificationMessagesRow
for _, nm := range list {
acquirableStatuses := []database.NotificationMessageStatus{database.NotificationMessageStatusPending, database.NotificationMessageStatusTemporaryFailure}
if !slices.Contains(acquirableStatuses, nm.Status) {
continue
Expand All @@ -956,9 +961,9 @@ func (q *FakeQuerier) AcquireNotificationMessages(_ context.Context, arg databas
ID: nm.ID,
Payload: nm.Payload,
Method: nm.Method,
CreatedBy: nm.CreatedBy,
TitleTemplate: "This is a title with {{.Labels.variable}}",
BodyTemplate: "This is a body with {{.Labels.variable}}",
TemplateID: nm.NotificationTemplateID,
})
}

Expand Down Expand Up @@ -1815,10 +1820,10 @@ func (q *FakeQuerier) DeleteWorkspaceAgentPortSharesByTemplate(_ context.Context
return nil
}

func (q *FakeQuerier) EnqueueNotificationMessage(_ context.Context, arg database.EnqueueNotificationMessageParams) (database.NotificationMessage, error) {
func (q *FakeQuerier) EnqueueNotificationMessage(_ context.Context, arg database.EnqueueNotificationMessageParams) error {
err := validateDatabaseType(arg)
if err != nil {
return database.NotificationMessage{}, err
return err
}

q.mutex.Lock()
Expand All @@ -1827,7 +1832,7 @@ func (q *FakeQuerier) EnqueueNotificationMessage(_ context.Context, arg database
var payload types.MessagePayload
err = json.Unmarshal(arg.Payload, &payload)
if err != nil {
return database.NotificationMessage{}, err
return err
}

nm := database.NotificationMessage{
Expand All @@ -1845,7 +1850,7 @@ func (q *FakeQuerier) EnqueueNotificationMessage(_ context.Context, arg database

q.notificationMessages = append(q.notificationMessages, nm)

return nm, err
return err
}

func (q *FakeQuerier) FavoriteWorkspace(_ context.Context, arg uuid.UUID) error {
Expand Down
6 changes: 3 additions & 3 deletions coderd/database/dbmetrics/dbmetrics.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 3 additions & 4 deletions coderd/database/dbmock/dbmock.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion coderd/database/dump.sql

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
ALTER TABLE notification_messages
DROP COLUMN IF EXISTS queued_seconds;
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
ALTER TABLE notification_messages
ADD COLUMN queued_seconds FLOAT NULL;
1 change: 1 addition & 0 deletions coderd/database/models.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion coderd/database/querier.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

54 changes: 23 additions & 31 deletions coderd/database/queries.sql.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 12 additions & 8 deletions coderd/database/queries/notifications.sql
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,15 @@ FROM notification_templates nt,
WHERE nt.id = @notification_template_id
AND u.id = @user_id;

-- name: EnqueueNotificationMessage :one
-- name: EnqueueNotificationMessage :exec
INSERT INTO notification_messages (id, notification_template_id, user_id, method, payload, targets, created_by)
VALUES (@id,
@notification_template_id,
@user_id,
@method::notification_method,
@payload::jsonb,
@targets,
@created_by)
RETURNING *;
@created_by);

-- Acquires the lease for a given count of notification messages, to enable concurrent dequeuing and subsequent sending.
-- Only rows that aren't already leased (or ones which are leased but have exceeded their lease period) are returned.
Expand All @@ -36,7 +35,8 @@ RETURNING *;
WITH acquired AS (
UPDATE
notification_messages
SET updated_at = NOW(),
SET queued_seconds = GREATEST(0, EXTRACT(EPOCH FROM (NOW() - updated_at)))::FLOAT,
updated_at = NOW(),
status = 'leased'::notification_message_status,
status_reason = 'Leased by notifier ' || sqlc.arg('notifier_id')::uuid,
leased_until = NOW() + CONCAT(sqlc.arg('lease_seconds')::int, ' seconds')::interval
Expand Down Expand Up @@ -78,16 +78,19 @@ SELECT
nm.id,
nm.payload,
nm.method,
nm.created_by,
nm.attempt_count::int AS attempt_count,
nm.queued_seconds::float AS queued_seconds,
-- template
nt.id AS template_id,
nt.title_template,
nt.body_template
FROM acquired nm
JOIN notification_templates nt ON nm.notification_template_id = nt.id;

-- name: BulkMarkNotificationMessagesFailed :execrows
UPDATE notification_messages
SET updated_at = subquery.failed_at,
SET queued_seconds = 0,
updated_at = subquery.failed_at,
attempt_count = attempt_count + 1,
status = CASE
WHEN attempt_count + 1 < @max_attempts::int THEN subquery.status
Expand All @@ -105,13 +108,14 @@ WHERE notification_messages.id = subquery.id;

-- name: BulkMarkNotificationMessagesSent :execrows
UPDATE notification_messages
SET updated_at = new_values.sent_at,
SET queued_seconds = 0,
updated_at = new_values.sent_at,
attempt_count = attempt_count + 1,
status = 'sent'::notification_message_status,
status_reason = NULL,
leased_until = NULL,
next_retry_after = NULL
FROM (SELECT UNNEST(@ids::uuid[]) AS id,
FROM (SELECT UNNEST(@ids::uuid[]) AS id,
UNNEST(@sent_ats::timestamptz[]) AS sent_at)
AS new_values
WHERE notification_messages.id = new_values.id;
Expand Down
4 changes: 2 additions & 2 deletions coderd/notifications/enqueuer.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ func (s *StoreEnqueuer) Enqueue(ctx context.Context, userID, templateID uuid.UUI
}

id := uuid.New()
msg, err := s.store.EnqueueNotificationMessage(ctx, database.EnqueueNotificationMessageParams{
err = s.store.EnqueueNotificationMessage(ctx, database.EnqueueNotificationMessageParams{
ID: id,
UserID: userID,
NotificationTemplateID: templateID,
Expand All @@ -73,7 +73,7 @@ func (s *StoreEnqueuer) Enqueue(ctx context.Context, userID, templateID uuid.UUI
return nil, xerrors.Errorf("enqueue notification: %w", err)
}

s.log.Debug(ctx, "enqueued notification", slog.F("msg_id", msg.ID))
s.log.Debug(ctx, "enqueued notification", slog.F("msg_id", id))
return &id, nil
}

Expand Down
Loading
Loading