Skip to content

feat: notify template owner on manual build failures #14262

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Fix tests and notifications
  • Loading branch information
BrunoQuaresma committed Aug 16, 2024
commit f81611df82f6a4c4d34cf8f47ca8e22651f83c59
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
INSERT INTO
notification_templates (
id,
name,
title_template,
body_template,
"group",
actions
)
VALUES (
'48a9d2b9-3655-430c-a31a-2442479e7519',
'Template Build Failure',
E'Build failed on workspace using template "{{.Labels.name}}"',
E'Hi {{.UserName}},\n\n'
'A workspace using the template **{{.Labels.name}}** failed to build.\n\n'
'- **Version**: {{.Labels.version}}\n'
'- **Workspace**: {{.Labels.workspaceName}}\n'
'- **Transition**: {{.Labels.transition}}\n'
'{{if .Labels.initiator}}- **Initiated by**: {{.Labels.initiator}}{{end}}'
'{{if .Labels.reason}}- **Reason**: {{.Labels.reason}}{{end}}'
'\n\nYou can debug this workspace using the build logs below or contact the deployment administrator.',
'Template Events',
'[
{
"label": "View build",
"url": "{{ base_url }}/@{{.Labels.workspaceOwnerName}}/{{.Labels.workspaceName}}/builds/{{.Labels.buildNumber}}"
},
{
"label": "View template",
"url": "{{ base_url }}/templates/{{.Labels.name}}"
}
]'::jsonb
);

This file was deleted.

4 changes: 2 additions & 2 deletions coderd/notifications/events.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,6 @@ var (

// Template-related events.
var (
TemplateTemplateManualBuildFailed = uuid.MustParse("48a9d2b9-3655-430c-a31a-2442479e7519")
TemplateTemplateDeleted = uuid.MustParse("29a09665-2a4c-403f-9648-54301670e7be")
TemplateTemplateBuildFailed = uuid.MustParse("48a9d2b9-3655-430c-a31a-2442479e7519")
TemplateTemplateDeleted = uuid.MustParse("29a09665-2a4c-403f-9648-54301670e7be")
)
90 changes: 80 additions & 10 deletions coderd/provisionerdserver/provisionerdserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -1020,7 +1020,23 @@ func (s *server) FailJob(ctx context.Context, failJob *proto.FailedJob) (*proto.
return nil, err
}

s.notifyWorkspaceBuildFailed(ctx, workspace, build)
// Only notify auto build failures
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would rather that this comment explain what classifies as an auto build (i.e. not initiated by a user)

autoBuild := build.Reason.Valid() && build.Reason != database.BuildReasonInitiator
if autoBuild {
s.notifyWorkspaceBuildFailed(ctx, workspace, build, workspace.OwnerID)
}

// Notify template admins including owners
admins, err := findTemplateAdmins(ctx, s.Database)
if err != nil {
s.Logger.Warn(ctx, "failed to find template admins for template build failed notification", slog.Error(err))
} else {
for _, admin := range admins {
if admin.ID != build.InitiatorID {
s.notifyTemplateBuildFailed(ctx, workspace, build, admin.ID)
}
}
}

err = s.Pubsub.Publish(codersdk.WorkspaceNotifyChannel(build.WorkspaceID), []byte{})
if err != nil {
Expand Down Expand Up @@ -1095,17 +1111,11 @@ func (s *server) FailJob(ctx context.Context, failJob *proto.FailedJob) (*proto.
return &proto.Empty{}, nil
}

func (s *server) notifyWorkspaceBuildFailed(ctx context.Context, workspace database.Workspace, build database.WorkspaceBuild) {
var reason string
if build.Reason.Valid() && build.Reason == database.BuildReasonInitiator {
return // failed workspace build initiated by a user should not notify
}
reason = string(build.Reason)

if _, err := s.NotificationsEnqueuer.Enqueue(ctx, workspace.OwnerID, notifications.TemplateWorkspaceAutobuildFailed,
func (s *server) notifyWorkspaceBuildFailed(ctx context.Context, workspace database.Workspace, build database.WorkspaceBuild, receiverID uuid.UUID) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't understand why you're passing in receiverID here. Can you elaborate pls?

if _, err := s.NotificationsEnqueuer.Enqueue(ctx, receiverID, notifications.TemplateWorkspaceAutobuildFailed,
map[string]string{
"name": workspace.Name,
"reason": reason,
"reason": string(build.Reason),
}, "provisionerdserver",
// Associate this notification with all the related entities.
workspace.ID, workspace.OwnerID, workspace.TemplateID, workspace.OrganizationID,
Expand All @@ -1114,6 +1124,49 @@ func (s *server) notifyWorkspaceBuildFailed(ctx context.Context, workspace datab
}
}

func (s *server) notifyTemplateBuildFailed(ctx context.Context, workspace database.Workspace, build database.WorkspaceBuild, receiverID uuid.UUID) {
template, err := s.Database.GetTemplateByID(ctx, workspace.TemplateID)
if err != nil {
s.Logger.Warn(ctx, "failed to get template", slog.Error(err))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Think about how an operator might debug with these logs; how would they know that this is related to sending notifications?

Try think of log lines as communication. You're communicating with an operator who is a) possibly not familiar with the product or less experienced, and b) trying to debug an issue, possibly under stress. Vague log lines are almost as bad as no log lines at all, sometimes worse because they can mislead or distract.

return
}

owner, err := s.Database.GetUserByID(ctx, workspace.OwnerID)
if err != nil {
s.Logger.Warn(ctx, "failed to get workspace owner", slog.Error(err))
return
}

version, err := s.Database.GetTemplateVersionByID(ctx, build.TemplateVersionID)
if err != nil {
s.Logger.Warn(ctx, "failed to get template version", slog.Error(err))
return
}

// We only need to know the reason when it is not initiated by the user.
var reason string
if build.Reason != database.BuildReasonInitiator {
reason = string(build.Reason)
}

if _, err := s.NotificationsEnqueuer.Enqueue(ctx, receiverID, notifications.TemplateTemplateBuildFailed,
map[string]string{
"name": template.Name,
"version": version.Name,
"workspaceName": workspace.Name,
"transition": string(build.Transition),
"reason": reason,
"initiator": build.InitiatorByUsername,
"workspaceOwnerName": owner.Username,
"buildNumber": strconv.FormatInt(int64(build.BuildNumber), 10),
}, "provisionerdserver",
// Associate this notification with all the related entities.
workspace.ID, workspace.OwnerID, workspace.TemplateID, workspace.OrganizationID, template.CreatedBy,
); err != nil {
s.Logger.Warn(ctx, "failed to notify of failed template manual build", slog.F("template_id", template.ID), slog.F("workspace_id", workspace.ID), slog.Error(err))
}
}

// CompleteJob is triggered by a provision daemon to mark a provisioner job as completed.
func (s *server) CompleteJob(ctx context.Context, completed *proto.CompletedJob) (*proto.Empty, error) {
ctx, span := s.startTrace(ctx, tracing.FuncName())
Expand Down Expand Up @@ -2100,3 +2153,20 @@ func convertDisplayApps(apps *sdkproto.DisplayApps) []database.DisplayApp {
}
return dapps
}

// findTemplateAdmins fetches all users with template admin permission including owners.
func findTemplateAdmins(ctx context.Context, store database.Store) ([]database.GetUsersRow, error) {
owners, err := store.GetUsers(ctx, database.GetUsersParams{
RbacRole: []string{codersdk.RoleOwner},
})
if err != nil {
return nil, xerrors.Errorf("get owners: %w", err)
}
templateAdmins, err := store.GetUsers(ctx, database.GetUsersParams{
RbacRole: []string{codersdk.RoleTemplateAdmin},
})
if err != nil {
return nil, xerrors.Errorf("get template admins: %w", err)
}
return append(owners, templateAdmins...), nil
}
110 changes: 72 additions & 38 deletions coderd/provisionerdserver/provisionerdserver_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"encoding/json"
"io"
"net/url"
"strconv"
"strings"
"sync"
"sync/atomic"
Expand Down Expand Up @@ -1804,7 +1805,7 @@ func TestNotifications(t *testing.T) {
}
})

t.Run("TemplateManualBuildFailed", func(t *testing.T) {
t.Run("TemplateBuildFailed", func(t *testing.T) {
t.Parallel()

var (
Expand All @@ -1816,38 +1817,55 @@ func TestNotifications(t *testing.T) {
srv, db, ps, pd = setup(t, ignoreLogErrors, &overrides{
notificationEnqueuer: notifEnq,
})
userA = dbgen.User(t, db, database.User{})
userB = dbgen.User(t, db, database.User{})
// Create users with different roles to ensure that notifications are sent
// to the appropriate users
ownerA = dbgen.User(t, db, database.User{RBACRoles: []string{codersdk.RoleOwner}})
ownerB = dbgen.User(t, db, database.User{RBACRoles: []string{codersdk.RoleOwner}})
tplAdmin = dbgen.User(t, db, database.User{RBACRoles: []string{codersdk.RoleTemplateAdmin}})
_ = dbgen.User(t, db, database.User{RBACRoles: []string{codersdk.RoleUserAdmin}})
member = dbgen.User(t, db, database.User{RBACRoles: []string{codersdk.RoleMember}})
)

tc := []struct {
name string
owner database.User
initiator database.User
shouldNotify bool
name string
tplOwner database.User
buildInitiator *database.User
reason database.BuildReason
transition database.WorkspaceTransition
receivers []uuid.UUID
}{
{
name: "InitiatedByOwner",
owner: userA,
initiator: userA,
shouldNotify: false,
name: "ManualBuild",
tplOwner: ownerA,
buildInitiator: &ownerB,
reason: database.BuildReasonInitiator,
transition: database.WorkspaceTransitionStart,
// Ensure that during manual builds, the initiator does not receive a
// notification. In this scenario, ownerB should not receive a
// notification.
receivers: []uuid.UUID{ownerA.ID, tplAdmin.ID},
},
{
name: "InitiatedBySomeoneElse",
owner: userB,
initiator: userA,
shouldNotify: true,
name: "AutoBuild",
tplOwner: ownerA,
buildInitiator: nil,
reason: database.BuildReasonAutostart,
transition: database.WorkspaceTransitionStart,
// Ensure that during automated builds, all template admins and owners
// receive notifications.
receivers: []uuid.UUID{ownerA.ID, ownerB.ID, tplAdmin.ID},
},
}

for _, c := range tc {
t.Run(c.name, func(t *testing.T) {
// Given: a template created by the owner
// Given: a template and a workspace build
isManualBuild := c.buildInitiator != nil && c.reason == database.BuildReasonInitiator
template := dbgen.Template(t, db, database.Template{
Name: "template",
Provisioner: database.ProvisionerTypeEcho,
OrganizationID: pd.OrganizationID,
CreatedBy: c.owner.ID,
CreatedBy: c.tplOwner.ID,
})
template, err := db.GetTemplateByID(ctx, template.ID)
require.NoError(t, err)
Expand All @@ -1859,21 +1877,23 @@ func TestNotifications(t *testing.T) {
},
JobID: uuid.New(),
})

// And: a workspace build initiated manually by a user
workspaceOwner := member
workspace := dbgen.Workspace(t, db, database.Workspace{
TemplateID: template.ID,
OwnerID: c.initiator.ID,
OwnerID: workspaceOwner.ID,
OrganizationID: pd.OrganizationID,
})
build := dbgen.WorkspaceBuild(t, db, database.WorkspaceBuild{
build := database.WorkspaceBuild{
WorkspaceID: workspace.ID,
TemplateVersionID: version.ID,
InitiatorID: c.initiator.ID,
Transition: database.WorkspaceTransitionDelete,
Reason: database.BuildReasonInitiator,
})
file := dbgen.File(t, db, database.File{CreatedBy: c.initiator.ID})
Transition: c.transition,
Reason: c.reason,
}
// Set the build initiator if the test case specifies one for manual builds.
if isManualBuild {
build.InitiatorID = c.buildInitiator.ID
}
file := dbgen.File(t, db, database.File{CreatedBy: c.buildInitiator.ID})
job := dbgen.ProvisionerJob(t, db, ps, database.ProvisionerJob{
FileID: file.ID,
Type: database.ProvisionerJobTypeWorkspaceBuild,
Expand All @@ -1892,7 +1912,7 @@ func TestNotifications(t *testing.T) {
})
require.NoError(t, err)

// When: the workspace build job fails
// When: the build fails
_, err = srv.FailJob(ctx, &proto.FailedJob{
JobId: job.ID.String(),
Type: &proto.FailedJob_WorkspaceBuild_{
Expand All @@ -1903,17 +1923,31 @@ func TestNotifications(t *testing.T) {
})
require.NoError(t, err)

// Then: send the appropriate notifications
if c.shouldNotify {
require.Len(t, notifEnq.Sent, 1)
require.Equal(t, notifEnq.Sent[0].UserID, c.owner.ID)
require.Contains(t, notifEnq.Sent[0].Targets, template.ID)
require.Contains(t, notifEnq.Sent[0].Targets, workspace.ID)
require.Contains(t, notifEnq.Sent[0].Targets, workspace.OrganizationID)
require.Contains(t, notifEnq.Sent[0].Targets, c.owner.ID)
require.Contains(t, notifEnq.Sent[0].Targets, c.initiator.ID)
} else {
require.Len(t, notifEnq.Sent, 0)
// Then: send the template build failed notifications
var buildFailedNotifications []*testutil.Notification
for _, n := range notifEnq.Sent {
if n.TemplateID == notifications.TemplateTemplateBuildFailed {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What if they're not the template you're expecting?

Your tests should not only prove that something works; it should also prove that it only works in precisely the way you're expecting, and any relevant deviation should fail the test.

buildFailedNotifications = append(buildFailedNotifications, n)
}
}
require.Len(t, buildFailedNotifications, len(c.receivers))

for _, n := range buildFailedNotifications {
require.Contains(t, n.Targets, template.ID)
require.Contains(t, n.Targets, workspace.ID)
require.Contains(t, n.Targets, workspace.OrganizationID)
require.Contains(t, n.Targets, c.tplOwner.ID)

require.Equal(t, n.Labels["name"], template.Name)
require.Equal(t, n.Labels["version"], version.Name)
require.Equal(t, n.Labels["workspaceName"], workspace.Name)
require.Equal(t, n.Labels["transition"], string(build.Transition))
require.Equal(t, n.Labels["reason"], string(build.Reason))
require.Equal(t, n.Labels["workspaceOwnerName"], workspaceOwner.Name)
require.Equal(t, n.Labels["buildNumber"], strconv.FormatInt(int64(build.BuildNumber), 10))
if isManualBuild {
require.Equal(t, n.Labels["initiator"], c.buildInitiator.Username)
}
}
})
}
Expand Down
Loading