Skip to content

feat: notify template owner on manual build failures #14262

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 8 commits into from
Closed
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
INSERT INTO
notification_templates (
id,
name,
title_template,
body_template,
"group",
actions
)
VALUES (
'48a9d2b9-3655-430c-a31a-2442479e7519',
'Template Build Failure',
E'Build failed on workspace using template "{{.Labels.name}}"',
E'Hi {{.UserName}},\n\n'
'A workspace using the template **{{.Labels.name}}** failed to build.\n\n'
'- **Version**: {{.Labels.version}}\n'
'- **Workspace**: {{.Labels.workspaceName}}\n'
'- **Transition**: {{.Labels.transition}}\n'
'{{if .Labels.initiator}}- **Initiated by**: {{.Labels.initiator}}{{end}}'
'{{if .Labels.reason}}- **Reason**: {{.Labels.reason}}{{end}}'
'\n\nYou can debug this workspace using the build logs below or contact the deployment administrator.',
'Template Events',
'[
{
"label": "View build",
"url": "{{ base_url }}/@{{.Labels.workspaceOwnerName}}/{{.Labels.workspaceName}}/builds/{{.Labels.buildNumber}}"
},
{
"label": "View template",
"url": "{{ base_url }}/templates/{{.Labels.name}}"
}
]'::jsonb
);
3 changes: 2 additions & 1 deletion coderd/notifications/events.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,5 +22,6 @@ var (

// Template-related events.
var (
TemplateTemplateDeleted = uuid.MustParse("29a09665-2a4c-403f-9648-54301670e7be")
TemplateTemplateBuildFailed = uuid.MustParse("48a9d2b9-3655-430c-a31a-2442479e7519")
TemplateTemplateDeleted = uuid.MustParse("29a09665-2a4c-403f-9648-54301670e7be")
)
84 changes: 74 additions & 10 deletions coderd/provisionerdserver/provisionerdserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -1020,7 +1020,23 @@ func (s *server) FailJob(ctx context.Context, failJob *proto.FailedJob) (*proto.
return nil, err
}

s.notifyWorkspaceBuildFailed(ctx, workspace, build)
// Only notify auto build failures
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would rather that this comment explain what classifies as an auto build (i.e. not initiated by a user)

autoBuild := build.Reason != database.BuildReasonInitiator
if autoBuild {
s.notifyWorkspaceBuildFailed(ctx, workspace, build, workspace.OwnerID)
}

// Notify template admins including owners
admins, err := findTemplateAdmins(ctx, s.Database)
if err != nil {
s.Logger.Warn(ctx, "failed to find template admins for template build failed notification", slog.Error(err))
} else {
for _, admin := range admins {
if admin.ID != build.InitiatorID && !autoBuild {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if admin.ID != build.InitiatorID && !autoBuild {
// Only notify users who did not initiate the action.
if admin.ID != build.InitiatorID && !autoBuild {

s.notifyTemplateBuildFailed(ctx, workspace, build, admin.ID)
}
}
}

err = s.Pubsub.Publish(codersdk.WorkspaceNotifyChannel(build.WorkspaceID), []byte{})
if err != nil {
Expand Down Expand Up @@ -1095,17 +1111,11 @@ func (s *server) FailJob(ctx context.Context, failJob *proto.FailedJob) (*proto.
return &proto.Empty{}, nil
}

func (s *server) notifyWorkspaceBuildFailed(ctx context.Context, workspace database.Workspace, build database.WorkspaceBuild) {
var reason string
if build.Reason.Valid() && build.Reason == database.BuildReasonInitiator {
return // failed workspace build initiated by a user should not notify
}
reason = string(build.Reason)

if _, err := s.NotificationsEnqueuer.Enqueue(ctx, workspace.OwnerID, notifications.TemplateWorkspaceAutobuildFailed,
func (s *server) notifyWorkspaceBuildFailed(ctx context.Context, workspace database.Workspace, build database.WorkspaceBuild, receiverID uuid.UUID) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't understand why you're passing in receiverID here. Can you elaborate pls?

if _, err := s.NotificationsEnqueuer.Enqueue(ctx, receiverID, notifications.TemplateWorkspaceAutobuildFailed,
map[string]string{
"name": workspace.Name,
"reason": reason,
"reason": string(build.Reason),
}, "provisionerdserver",
// Associate this notification with all the related entities.
workspace.ID, workspace.OwnerID, workspace.TemplateID, workspace.OrganizationID,
Expand All @@ -1114,6 +1124,43 @@ func (s *server) notifyWorkspaceBuildFailed(ctx context.Context, workspace datab
}
}

func (s *server) notifyTemplateBuildFailed(ctx context.Context, workspace database.Workspace, build database.WorkspaceBuild, receiverID uuid.UUID) {
template, err := s.Database.GetTemplateByID(ctx, workspace.TemplateID)
if err != nil {
s.Logger.Warn(ctx, "failed to get template", slog.Error(err))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Think about how an operator might debug with these logs; how would they know that this is related to sending notifications?

Try think of log lines as communication. You're communicating with an operator who is a) possibly not familiar with the product or less experienced, and b) trying to debug an issue, possibly under stress. Vague log lines are almost as bad as no log lines at all, sometimes worse because they can mislead or distract.

return
}

owner, err := s.Database.GetUserByID(ctx, workspace.OwnerID)
if err != nil {
s.Logger.Warn(ctx, "failed to get workspace owner", slog.Error(err))
return
}

version, err := s.Database.GetTemplateVersionByID(ctx, build.TemplateVersionID)
if err != nil {
s.Logger.Warn(ctx, "failed to get template version", slog.Error(err))
return
}

if _, err := s.NotificationsEnqueuer.Enqueue(ctx, receiverID, notifications.TemplateTemplateBuildFailed,
map[string]string{
"name": template.Name,
"version": version.Name,
"workspaceName": workspace.Name,
"transition": string(build.Transition),
"reason": string(build.Reason),
"initiator": build.InitiatorByUsername,
"workspaceOwnerName": owner.Username,
"buildNumber": strconv.FormatInt(int64(build.BuildNumber), 10),
}, "provisionerdserver",
// Associate this notification with all the related entities.
workspace.ID, workspace.OwnerID, workspace.TemplateID, workspace.OrganizationID, template.CreatedBy,
); err != nil {
s.Logger.Warn(ctx, "failed to notify of failed template manual build", slog.F("template_id", template.ID), slog.F("workspace_id", workspace.ID), slog.Error(err))
}
}

// CompleteJob is triggered by a provision daemon to mark a provisioner job as completed.
func (s *server) CompleteJob(ctx context.Context, completed *proto.CompletedJob) (*proto.Empty, error) {
ctx, span := s.startTrace(ctx, tracing.FuncName())
Expand Down Expand Up @@ -2100,3 +2147,20 @@ func convertDisplayApps(apps *sdkproto.DisplayApps) []database.DisplayApp {
}
return dapps
}

// findTemplateAdmins fetches all users with template admin permission including owners.
func findTemplateAdmins(ctx context.Context, store database.Store) ([]database.GetUsersRow, error) {
owners, err := store.GetUsers(ctx, database.GetUsersParams{
RbacRole: []string{codersdk.RoleOwner},
})
if err != nil {
return nil, xerrors.Errorf("get owners: %w", err)
}
templateAdmins, err := store.GetUsers(ctx, database.GetUsersParams{
RbacRole: []string{codersdk.RoleTemplateAdmin},
})
if err != nil {
return nil, xerrors.Errorf("get template admins: %w", err)
}
return append(owners, templateAdmins...), nil
}
161 changes: 155 additions & 6 deletions coderd/provisionerdserver/provisionerdserver_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"encoding/json"
"io"
"net/url"
"strconv"
"strings"
"sync"
"sync/atomic"
Expand Down Expand Up @@ -1568,7 +1569,7 @@ func TestInsertWorkspaceResource(t *testing.T) {
func TestNotifications(t *testing.T) {
t.Parallel()

t.Run("Workspace deletion", func(t *testing.T) {
t.Run("WorkspaceDeletion", func(t *testing.T) {
t.Parallel()

tests := []struct {
Expand Down Expand Up @@ -1695,22 +1696,21 @@ func TestNotifications(t *testing.T) {
}
})

t.Run("Workspace build failed", func(t *testing.T) {
t.Run("WorkspaceAutoBuildFailed", func(t *testing.T) {
t.Parallel()

tests := []struct {
name string

name string
buildReason database.BuildReason
shouldNotify bool
}{
{
name: "initiated by owner",
name: "InitiatedByOwner",
buildReason: database.BuildReasonInitiator,
shouldNotify: false,
},
{
name: "initiated by autostart",
name: "InitiatedByAutoStart",
buildReason: database.BuildReasonAutostart,
shouldNotify: true,
},
Expand Down Expand Up @@ -1804,6 +1804,155 @@ func TestNotifications(t *testing.T) {
})
}
})

t.Run("TemplateBuildFailed", func(t *testing.T) {
t.Parallel()

var (
ctx = context.Background()
notifEnq = &testutil.FakeNotificationsEnqueuer{}
// To avoid spamming the output, ignore log errors. This test is
// designed to check a build failure, which is expected to log errors.
ignoreLogErrors = true
srv, db, ps, pd = setup(t, ignoreLogErrors, &overrides{
notificationEnqueuer: notifEnq,
})
// Create users with different roles to ensure that notifications are sent
// to the appropriate users
ownerA = dbgen.User(t, db, database.User{RBACRoles: []string{codersdk.RoleOwner}})
ownerB = dbgen.User(t, db, database.User{RBACRoles: []string{codersdk.RoleOwner}})
tplAdmin = dbgen.User(t, db, database.User{RBACRoles: []string{codersdk.RoleTemplateAdmin}})
_ = dbgen.User(t, db, database.User{RBACRoles: []string{codersdk.RoleUserAdmin}})
member = dbgen.User(t, db, database.User{RBACRoles: []string{codersdk.RoleMember}})
)

tc := []struct {
name string
tplOwner database.User
buildInitiator database.User
reason database.BuildReason
transition database.WorkspaceTransition
receivers []uuid.UUID
}{
{
name: "ManualBuild",
tplOwner: ownerA,
buildInitiator: ownerB,
reason: database.BuildReasonInitiator,
transition: database.WorkspaceTransitionStart,
// Ensure that during manual builds, the initiator does not receive a
// notification. In this scenario, ownerB should not receive a
// notification.
receivers: []uuid.UUID{ownerA.ID, tplAdmin.ID},
},
{
name: "AutoBuild",
tplOwner: ownerA,
buildInitiator: ownerA,
reason: database.BuildReasonAutostart,
transition: database.WorkspaceTransitionStart,
// Ensure that during automated builds, all template admins and owners
// receive notifications.
receivers: []uuid.UUID{ownerA.ID, ownerB.ID, tplAdmin.ID},
},
}

for _, c := range tc {
t.Run(c.name, func(t *testing.T) {
// Given: a template and a workspace build
isManualBuild := c.reason == database.BuildReasonInitiator
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since you have this logic in multiple places, it might be worth creating a helper method.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Try and group your code into logical blocks, and use whitespace for visual assistance. Think of your code as having the same visual hierarchy that you'd design in your frontends.

For example, isManualBuild is only needed lower down: group them logically.

You've got a whole lot of code below which is doing several things; add a bit of spaces and/or comments to give the reader's "eye" some space to "breathe", and group related concepts/lines together to infer relationships.

template := dbgen.Template(t, db, database.Template{
Name: "template",
Provisioner: database.ProvisionerTypeEcho,
OrganizationID: pd.OrganizationID,
CreatedBy: c.tplOwner.ID,
})
template, err := db.GetTemplateByID(ctx, template.ID)
require.NoError(t, err)
version := dbgen.TemplateVersion(t, db, database.TemplateVersion{
OrganizationID: pd.OrganizationID,
TemplateID: uuid.NullUUID{
UUID: template.ID,
Valid: true,
},
JobID: uuid.New(),
})
workspaceOwner := member
workspace := dbgen.Workspace(t, db, database.Workspace{
TemplateID: template.ID,
OwnerID: workspaceOwner.ID,
OrganizationID: pd.OrganizationID,
})
b := database.WorkspaceBuild{
WorkspaceID: workspace.ID,
TemplateVersionID: version.ID,
Transition: c.transition,
Reason: c.reason,
}
// Set the build initiator if the test case specifies one for manual builds.
if isManualBuild {
b.InitiatorID = c.buildInitiator.ID
}
build := dbgen.WorkspaceBuild(t, db, b)
file := dbgen.File(t, db, database.File{CreatedBy: c.buildInitiator.ID})
job := dbgen.ProvisionerJob(t, db, ps, database.ProvisionerJob{
FileID: file.ID,
Type: database.ProvisionerJobTypeWorkspaceBuild,
Input: must(json.Marshal(provisionerdserver.WorkspaceProvisionJob{
WorkspaceBuildID: build.ID,
})),
OrganizationID: pd.OrganizationID,
})
_, err = db.AcquireProvisionerJob(ctx, database.AcquireProvisionerJobParams{
OrganizationID: pd.OrganizationID,
WorkerID: uuid.NullUUID{
UUID: pd.ID,
Valid: true,
},
Types: []database.ProvisionerType{database.ProvisionerTypeEcho},
})
require.NoError(t, err)

// When: the build fails
_, err = srv.FailJob(ctx, &proto.FailedJob{
JobId: job.ID.String(),
Type: &proto.FailedJob_WorkspaceBuild_{
WorkspaceBuild: &proto.FailedJob_WorkspaceBuild{
State: []byte{},
},
},
})
require.NoError(t, err)

// Then: send the template build failed notifications
var buildFailedNotifications []*testutil.Notification
for _, n := range notifEnq.Sent {
if n.TemplateID == notifications.TemplateTemplateBuildFailed {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What if they're not the template you're expecting?

Your tests should not only prove that something works; it should also prove that it only works in precisely the way you're expecting, and any relevant deviation should fail the test.

buildFailedNotifications = append(buildFailedNotifications, n)
}
}
require.Len(t, buildFailedNotifications, len(c.receivers))

for _, n := range buildFailedNotifications {
require.Contains(t, n.Targets, template.ID)
require.Contains(t, n.Targets, workspace.ID)
require.Contains(t, n.Targets, workspace.OrganizationID)
require.Contains(t, n.Targets, c.tplOwner.ID)

require.Equal(t, n.Labels["name"], template.Name)
require.Equal(t, n.Labels["version"], version.Name)
require.Equal(t, n.Labels["workspaceName"], workspace.Name)
require.Equal(t, n.Labels["transition"], string(build.Transition))
require.Equal(t, n.Labels["reason"], string(build.Reason))
require.Equal(t, n.Labels["workspaceOwnerName"], workspaceOwner.Username)
require.Equal(t, n.Labels["buildNumber"], strconv.FormatInt(int64(build.BuildNumber), 10))
if isManualBuild {
require.Equal(t, n.Labels["initiator"], c.buildInitiator.Username)
}
}
})
}
})
}

type overrides struct {
Expand Down
Loading