Skip to content

Commit bdd2caf

Browse files
authored
feat: implement thin vertical slice of system-generated notifications (#13537)
1 parent 10aa32c commit bdd2caf

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

51 files changed

+3686
-50
lines changed

.golangci.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,11 @@ linters-settings:
195195
- name: var-naming
196196
- name: waitgroup-by-value
197197

198+
# irrelevant as of Go v1.22: https://go.dev/blog/loopvar-preview
199+
govet:
200+
disable:
201+
- loopclosure
202+
198203
issues:
199204
# Rules listed here: https://github.com/securego/gosec#available-rules
200205
exclude-rules:

cli/server.go

Lines changed: 64 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,11 @@ import (
5555

5656
"cdr.dev/slog"
5757
"cdr.dev/slog/sloggers/sloghuman"
58+
"github.com/coder/pretty"
59+
"github.com/coder/retry"
60+
"github.com/coder/serpent"
61+
"github.com/coder/wgtunnel/tunnelsdk"
62+
5863
"github.com/coder/coder/v2/buildinfo"
5964
"github.com/coder/coder/v2/cli/clilog"
6065
"github.com/coder/coder/v2/cli/cliui"
@@ -64,6 +69,7 @@ import (
6469
"github.com/coder/coder/v2/coderd/autobuild"
6570
"github.com/coder/coder/v2/coderd/database"
6671
"github.com/coder/coder/v2/coderd/database/awsiamrds"
72+
"github.com/coder/coder/v2/coderd/database/dbauthz"
6773
"github.com/coder/coder/v2/coderd/database/dbmem"
6874
"github.com/coder/coder/v2/coderd/database/dbmetrics"
6975
"github.com/coder/coder/v2/coderd/database/dbpurge"
@@ -73,6 +79,7 @@ import (
7379
"github.com/coder/coder/v2/coderd/externalauth"
7480
"github.com/coder/coder/v2/coderd/gitsshkey"
7581
"github.com/coder/coder/v2/coderd/httpmw"
82+
"github.com/coder/coder/v2/coderd/notifications"
7683
"github.com/coder/coder/v2/coderd/oauthpki"
7784
"github.com/coder/coder/v2/coderd/prometheusmetrics"
7885
"github.com/coder/coder/v2/coderd/prometheusmetrics/insights"
@@ -97,10 +104,6 @@ import (
97104
"github.com/coder/coder/v2/provisionersdk"
98105
sdkproto "github.com/coder/coder/v2/provisionersdk/proto"
99106
"github.com/coder/coder/v2/tailnet"
100-
"github.com/coder/pretty"
101-
"github.com/coder/retry"
102-
"github.com/coder/serpent"
103-
"github.com/coder/wgtunnel/tunnelsdk"
104107
)
105108

106109
func createOIDCConfig(ctx context.Context, vals *codersdk.DeploymentValues) (*coderd.OIDCConfig, error) {
@@ -592,6 +595,7 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd.
592595
SSHConfigOptions: configSSHOptions,
593596
},
594597
AllowWorkspaceRenames: vals.AllowWorkspaceRenames.Value(),
598+
NotificationsEnqueuer: notifications.NewNoopEnqueuer(), // Changed further down if notifications enabled.
595599
}
596600
if httpServers.TLSConfig != nil {
597601
options.TLSCertificates = httpServers.TLSConfig.Certificates
@@ -660,6 +664,10 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd.
660664
options.OIDCConfig = oc
661665
}
662666

667+
experiments := coderd.ReadExperiments(
668+
options.Logger, options.DeploymentValues.Experiments.Value(),
669+
)
670+
663671
// We'll read from this channel in the select below that tracks shutdown. If it remains
664672
// nil, that case of the select will just never fire, but it's important not to have a
665673
// "bare" read on this channel.
@@ -969,6 +977,32 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd.
969977
options.WorkspaceUsageTracker = tracker
970978
defer tracker.Close()
971979

980+
// Manage notifications.
981+
var (
982+
notificationsManager *notifications.Manager
983+
)
984+
if experiments.Enabled(codersdk.ExperimentNotifications) {
985+
cfg := options.DeploymentValues.Notifications
986+
987+
// The enqueuer is responsible for enqueueing notifications to the given store.
988+
enqueuer, err := notifications.NewStoreEnqueuer(cfg, options.Database, templateHelpers(options), logger.Named("notifications.enqueuer"))
989+
if err != nil {
990+
return xerrors.Errorf("failed to instantiate notification store enqueuer: %w", err)
991+
}
992+
options.NotificationsEnqueuer = enqueuer
993+
994+
// The notification manager is responsible for:
995+
// - creating notifiers and managing their lifecycles (notifiers are responsible for dequeueing/sending notifications)
996+
// - keeping the store updated with status updates
997+
notificationsManager, err = notifications.NewManager(cfg, options.Database, logger.Named("notifications.manager"))
998+
if err != nil {
999+
return xerrors.Errorf("failed to instantiate notification manager: %w", err)
1000+
}
1001+
1002+
// nolint:gocritic // TODO: create own role.
1003+
notificationsManager.Run(dbauthz.AsSystemRestricted(ctx))
1004+
}
1005+
9721006
// Wrap the server in middleware that redirects to the access URL if
9731007
// the request is not to a local IP.
9741008
var handler http.Handler = coderAPI.RootHandler
@@ -1049,10 +1083,10 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd.
10491083
case <-stopCtx.Done():
10501084
exitErr = stopCtx.Err()
10511085
waitForProvisionerJobs = true
1052-
_, _ = io.WriteString(inv.Stdout, cliui.Bold("Stop caught, waiting for provisioner jobs to complete and gracefully exiting. Use ctrl+\\ to force quit"))
1086+
_, _ = io.WriteString(inv.Stdout, cliui.Bold("Stop caught, waiting for provisioner jobs to complete and gracefully exiting. Use ctrl+\\ to force quit\n"))
10531087
case <-interruptCtx.Done():
10541088
exitErr = interruptCtx.Err()
1055-
_, _ = io.WriteString(inv.Stdout, cliui.Bold("Interrupt caught, gracefully exiting. Use ctrl+\\ to force quit"))
1089+
_, _ = io.WriteString(inv.Stdout, cliui.Bold("Interrupt caught, gracefully exiting. Use ctrl+\\ to force quit\n"))
10561090
case <-tunnelDone:
10571091
exitErr = xerrors.New("dev tunnel closed unexpectedly")
10581092
case <-pubsubWatchdogTimeout:
@@ -1088,6 +1122,21 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd.
10881122
// Cancel any remaining in-flight requests.
10891123
shutdownConns()
10901124

1125+
if notificationsManager != nil {
1126+
// Stop the notification manager, which will cause any buffered updates to the store to be flushed.
1127+
// If the Stop() call times out, messages that were sent but not reflected as such in the store will have
1128+
// their leases expire after a period of time and will be re-queued for sending.
1129+
// See CODER_NOTIFICATIONS_LEASE_PERIOD.
1130+
cliui.Info(inv.Stdout, "Shutting down notifications manager..."+"\n")
1131+
err = shutdownWithTimeout(notificationsManager.Stop, 5*time.Second)
1132+
if err != nil {
1133+
cliui.Warnf(inv.Stderr, "Notifications manager shutdown took longer than 5s, "+
1134+
"this may result in duplicate notifications being sent: %s\n", err)
1135+
} else {
1136+
cliui.Info(inv.Stdout, "Gracefully shut down notifications manager\n")
1137+
}
1138+
}
1139+
10911140
// Shut down provisioners before waiting for WebSockets
10921141
// connections to close.
10931142
var wg sync.WaitGroup
@@ -1227,6 +1276,15 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd.
12271276
return serverCmd
12281277
}
12291278

1279+
// templateHelpers builds a set of functions which can be called in templates.
1280+
// We build them here to avoid an import cycle by using coderd.Options in notifications.Manager.
1281+
// We can later use this to inject whitelabel fields when app name / logo URL are overridden.
1282+
func templateHelpers(options *coderd.Options) map[string]any {
1283+
return map[string]any{
1284+
"base_url": func() string { return options.AccessURL.String() },
1285+
}
1286+
}
1287+
12301288
// printDeprecatedOptions loops through all command options, and prints
12311289
// a warning for usage of deprecated options.
12321290
func PrintDeprecatedOptions() serpent.MiddlewareFunc {

cli/testdata/coder_server_--help.golden

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,30 @@ can safely ignore these settings.
326326
Minimum supported version of TLS. Accepted values are "tls10",
327327
"tls11", "tls12" or "tls13".
328328

329+
NOTIFICATIONS OPTIONS:
330+
--notifications-dispatch-timeout duration, $CODER_NOTIFICATIONS_DISPATCH_TIMEOUT (default: 1m0s)
331+
How long to wait while a notification is being sent before giving up.
332+
333+
--notifications-max-send-attempts int, $CODER_NOTIFICATIONS_MAX_SEND_ATTEMPTS (default: 5)
334+
The upper limit of attempts to send a notification.
335+
336+
--notifications-method string, $CODER_NOTIFICATIONS_METHOD (default: smtp)
337+
Which delivery method to use (available options: 'smtp', 'webhook').
338+
339+
NOTIFICATIONS / EMAIL OPTIONS:
340+
--notifications-email-from string, $CODER_NOTIFICATIONS_EMAIL_FROM
341+
The sender's address to use.
342+
343+
--notifications-email-hello string, $CODER_NOTIFICATIONS_EMAIL_HELLO (default: localhost)
344+
The hostname identifying the SMTP server.
345+
346+
--notifications-email-smarthost host:port, $CODER_NOTIFICATIONS_EMAIL_SMARTHOST (default: localhost:587)
347+
The intermediary SMTP host through which emails are sent.
348+
349+
NOTIFICATIONS / WEBHOOK OPTIONS:
350+
--notifications-webhook-endpoint url, $CODER_NOTIFICATIONS_WEBHOOK_ENDPOINT
351+
The endpoint to which to send webhooks.
352+
329353
OAUTH2 / GITHUB OPTIONS:
330354
--oauth2-github-allow-everyone bool, $CODER_OAUTH2_GITHUB_ALLOW_EVERYONE
331355
Allow all logins, setting this option means allowed orgs and teams

cli/testdata/server-config.yaml.golden

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -493,3 +493,58 @@ userQuietHoursSchedule:
493493
# compatibility reasons, this will be removed in a future release.
494494
# (default: false, type: bool)
495495
allowWorkspaceRenames: false
496+
notifications:
497+
# Which delivery method to use (available options: 'smtp', 'webhook').
498+
# (default: smtp, type: string)
499+
method: smtp
500+
# How long to wait while a notification is being sent before giving up.
501+
# (default: 1m0s, type: duration)
502+
dispatch-timeout: 1m0s
503+
email:
504+
# The sender's address to use.
505+
# (default: <unset>, type: string)
506+
from: ""
507+
# The intermediary SMTP host through which emails are sent.
508+
# (default: localhost:587, type: host:port)
509+
smarthost: localhost:587
510+
# The hostname identifying the SMTP server.
511+
# (default: localhost, type: string)
512+
hello: localhost
513+
webhook:
514+
# The endpoint to which to send webhooks.
515+
# (default: <unset>, type: url)
516+
hello:
517+
# The upper limit of attempts to send a notification.
518+
# (default: 5, type: int)
519+
max-send-attempts: 5
520+
# The minimum time between retries.
521+
# (default: 5m0s, type: duration)
522+
retry-interval: 5m0s
523+
# The notifications system buffers message updates in memory to ease pressure on
524+
# the database. This option controls how often it synchronizes its state with the
525+
# database. The shorter this value the lower the change of state inconsistency in
526+
# a non-graceful shutdown - but it also increases load on the database. It is
527+
# recommended to keep this option at its default value.
528+
# (default: 2s, type: duration)
529+
store-sync-interval: 2s
530+
# The notifications system buffers message updates in memory to ease pressure on
531+
# the database. This option controls how many updates are kept in memory. The
532+
# lower this value the lower the change of state inconsistency in a non-graceful
533+
# shutdown - but it also increases load on the database. It is recommended to keep
534+
# this option at its default value.
535+
# (default: 50, type: int)
536+
store-sync-buffer-size: 50
537+
# How long a notifier should lease a message. This is effectively how long a
538+
# notification is 'owned' by a notifier, and once this period expires it will be
539+
# available for lease by another notifier. Leasing is important in order for
540+
# multiple running notifiers to not pick the same messages to deliver
541+
# concurrently. This lease period will only expire if a notifier shuts down
542+
# ungracefully; a dispatch of the notification releases the lease.
543+
# (default: 2m0s, type: duration)
544+
lease-period: 2m0s
545+
# How many notifications a notifier should lease per fetch interval.
546+
# (default: 20, type: int)
547+
lease-count: 20
548+
# How often to query the database for queued notifications.
549+
# (default: 15s, type: duration)
550+
fetch-interval: 15s

coderd/apidoc/docs.go

Lines changed: 99 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)