Skip to content

feat: notify about manual failed builds #14419

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 16 commits into from
Aug 27, 2024
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
DELETE FROM notification_templates WHERE id = '2faeee0f-26cb-4e96-821c-85ccb9f71513';
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
INSERT INTO notification_templates (id, name, title_template, body_template, "group", actions)
VALUES ('2faeee0f-26cb-4e96-821c-85ccb9f71513', 'Workspace Manual Build Failed', E'Workspace "{{.Labels.name}}" manual build failed',
E'Hi {{.UserName}},\n\nA manual build of the workspace **{{.Labels.name}}** using the template **{{.Labels.template_name}}** failed (version: **{{.Labels.template_version_name}}**).\nThe workspace build was initiated by **{{.Labels.initiator}}**.',
'Workspace Events', '[
{
"label": "View build",
"url": "{{ base_url }}/@{{.Labels.workspace_owner_username}}/{{.Labels.name}}/builds/{{.Labels.workspace_build_number}}"
}
]'::jsonb);
1 change: 1 addition & 0 deletions coderd/notifications/events.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ var (
TemplateWorkspaceDormant = uuid.MustParse("0ea69165-ec14-4314-91f1-69566ac3c5a0")
TemplateWorkspaceAutoUpdated = uuid.MustParse("c34a0c09-0704-4cac-bd1c-0c0146811c2b")
TemplateWorkspaceMarkedForDeletion = uuid.MustParse("51ce2fdf-c9ca-4be1-8d70-628674f9bc42")
TemplateWorkspaceManualBuildFailed = uuid.MustParse("2faeee0f-26cb-4e96-821c-85ccb9f71513")
)

// Account-related events.
Expand Down
15 changes: 15 additions & 0 deletions coderd/notifications/notifications_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -814,6 +814,21 @@ func TestNotificationTemplatesCanRender(t *testing.T) {
},
},
},
{
name: "TemplateWorkspaceManualBuildFailed",
id: notifications.TemplateWorkspaceManualBuildFailed,
payload: types.MessagePayload{
UserName: "bobby",
Labels: map[string]string{
"name": "bobby-workspace",
"template_name": "bobby-template",
"template_version_name": "bobby-template-version",
"initiator": "joe",
"workspace_owner_username": "mrbobby",
"workspace_build_number": "3",
},
},
},
}

allTemplates, err := enumerateAllTemplates(t)
Expand Down
80 changes: 79 additions & 1 deletion coderd/provisionerdserver/provisionerdserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"net/http"
"net/url"
"reflect"
"sort"
"strconv"
"strings"
"sync/atomic"
Expand Down Expand Up @@ -1098,7 +1099,8 @@ func (s *server) FailJob(ctx context.Context, failJob *proto.FailedJob) (*proto.
func (s *server) notifyWorkspaceBuildFailed(ctx context.Context, workspace database.Workspace, build database.WorkspaceBuild) {
var reason string
if build.Reason.Valid() && build.Reason == database.BuildReasonInitiator {
return // failed workspace build initiated by a user should not notify
s.notifyWorkspaceManualBuildFailed(ctx, workspace, build)
return
}
reason = string(build.Reason)

Expand All @@ -1114,6 +1116,82 @@ func (s *server) notifyWorkspaceBuildFailed(ctx context.Context, workspace datab
}
}

func (s *server) notifyWorkspaceManualBuildFailed(ctx context.Context, workspace database.Workspace, build database.WorkspaceBuild) {
templateAdmins, template, templateVersion, workspaceOwner, err := s.prepareForNotifyWorkspaceManualBuildFailed(ctx, workspace, build)
if err != nil {
s.Logger.Error(ctx, "unable to collect data for manual build failed notification", slog.Error(err))
return
}

for _, templateAdmin := range templateAdmins {
if _, err := s.NotificationsEnqueuer.Enqueue(ctx, templateAdmin.ID, notifications.TemplateWorkspaceManualBuildFailed,
map[string]string{
"name": workspace.Name,
"template_name": template.Name,
"template_version_name": templateVersion.Name,
"initiator": build.InitiatorByUsername,
"workspace_owner_username": workspaceOwner.Username,
"workspace_build_number": strconv.Itoa(int(build.BuildNumber)),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Including this will mean that deduplication won't be possible, just so you're aware.

}, "provisionerdserver",
// Associate this notification with all the related entities.
workspace.ID, workspace.OwnerID, workspace.TemplateID, workspace.OrganizationID,
); err != nil {
s.Logger.Warn(ctx, "failed to notify of failed workspace manual build", slog.Error(err))
}
}
}

func (s *server) prepareForNotifyWorkspaceManualBuildFailed(ctx context.Context, workspace database.Workspace, build database.WorkspaceBuild) ([]database.GetUsersRow,
database.Template, database.TemplateVersion, database.User, error,
) {
users, err := s.Database.GetUsers(ctx, database.GetUsersParams{
RbacRole: []string{codersdk.RoleTemplateAdmin},
})
if err != nil {
return nil, database.Template{}, database.TemplateVersion{}, database.User{}, xerrors.Errorf("unable to fetch template admins: %w", err)
}

usersByIDs := map[uuid.UUID]database.GetUsersRow{}
var userIDs []uuid.UUID
for _, user := range users {
usersByIDs[user.ID] = user
userIDs = append(userIDs, user.ID)
}

var templateAdmins []database.GetUsersRow
if len(userIDs) > 0 {
orgIDsByMemberIDs, err := s.Database.GetOrganizationIDsByMemberIDs(ctx, userIDs)
if err != nil {
return nil, database.Template{}, database.TemplateVersion{}, database.User{}, xerrors.Errorf("unable to fetch organization IDs by member IDs: %w", err)
}

for _, entry := range orgIDsByMemberIDs {
if slices.Contains(entry.OrganizationIDs, workspace.OrganizationID) {
templateAdmins = append(templateAdmins, usersByIDs[entry.UserID])
}
}
}
sort.Slice(templateAdmins, func(i, j int) bool {
return templateAdmins[i].Username < templateAdmins[j].Username
})

template, err := s.Database.GetTemplateByID(ctx, workspace.TemplateID)
if err != nil {
return nil, database.Template{}, database.TemplateVersion{}, database.User{}, xerrors.Errorf("unable to fetch template: %w", err)
}

templateVersion, err := s.Database.GetTemplateVersionByID(ctx, build.TemplateVersionID)
if err != nil {
return nil, database.Template{}, database.TemplateVersion{}, database.User{}, xerrors.Errorf("unable to fetch template version: %w", err)
}

workspaceOwner, err := s.Database.GetUserByID(ctx, workspace.OwnerID)
if err != nil {
return nil, database.Template{}, database.TemplateVersion{}, database.User{}, xerrors.Errorf("unable to fetch workspace owner: %w", err)
}
return templateAdmins, template, templateVersion, workspaceOwner, nil
}

// CompleteJob is triggered by a provision daemon to mark a provisioner job as completed.
func (s *server) CompleteJob(ctx context.Context, completed *proto.CompletedJob) (*proto.Empty, error) {
ctx, span := s.startTrace(ctx, tracing.FuncName())
Expand Down
67 changes: 64 additions & 3 deletions coderd/provisionerdserver/provisionerdserver_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"encoding/json"
"io"
"net/url"
"strconv"
"strings"
"sync"
"sync/atomic"
Expand Down Expand Up @@ -1738,8 +1739,6 @@ func TestNotifications(t *testing.T) {
Provisioner: database.ProvisionerTypeEcho,
OrganizationID: pd.OrganizationID,
})
template, err := db.GetTemplateByID(ctx, template.ID)
require.NoError(t, err)
file := dbgen.File(t, db, database.File{CreatedBy: user.ID})
workspace := dbgen.Workspace(t, db, database.Workspace{
TemplateID: template.ID,
Expand Down Expand Up @@ -1769,7 +1768,7 @@ func TestNotifications(t *testing.T) {
})),
OrganizationID: pd.OrganizationID,
})
_, err = db.AcquireProvisionerJob(ctx, database.AcquireProvisionerJobParams{
_, err := db.AcquireProvisionerJob(ctx, database.AcquireProvisionerJobParams{
OrganizationID: pd.OrganizationID,
WorkerID: uuid.NullUUID{
UUID: pd.ID,
Expand Down Expand Up @@ -1804,6 +1803,68 @@ func TestNotifications(t *testing.T) {
})
}
})

t.Run("Manual build failed, template admins notified", func(t *testing.T) {
t.Parallel()

ctx := context.Background()

// given
notifEnq := &testutil.FakeNotificationsEnqueuer{}
srv, db, ps, pd := setup(t, true /* ignoreLogErrors */, &overrides{notificationEnqueuer: notifEnq})

templateAdmin := dbgen.User(t, db, database.User{RBACRoles: []string{codersdk.RoleTemplateAdmin}})
_ /* other template admin, should not receive notification */ = dbgen.User(t, db, database.User{RBACRoles: []string{codersdk.RoleTemplateAdmin}})
_ = dbgen.OrganizationMember(t, db, database.OrganizationMember{UserID: templateAdmin.ID, OrganizationID: pd.OrganizationID})
user := dbgen.User(t, db, database.User{})
_ = dbgen.OrganizationMember(t, db, database.OrganizationMember{UserID: user.ID, OrganizationID: pd.OrganizationID})

template := dbgen.Template(t, db, database.Template{
Name: "template", Provisioner: database.ProvisionerTypeEcho, OrganizationID: pd.OrganizationID,
})
workspace := dbgen.Workspace(t, db, database.Workspace{
TemplateID: template.ID, OwnerID: user.ID, OrganizationID: pd.OrganizationID,
})
version := dbgen.TemplateVersion(t, db, database.TemplateVersion{
OrganizationID: pd.OrganizationID, TemplateID: uuid.NullUUID{UUID: template.ID, Valid: true}, JobID: uuid.New(),
})
build := dbgen.WorkspaceBuild(t, db, database.WorkspaceBuild{
WorkspaceID: workspace.ID, TemplateVersionID: version.ID, InitiatorID: user.ID, Transition: database.WorkspaceTransitionDelete, Reason: database.BuildReasonInitiator,
})
job := dbgen.ProvisionerJob(t, db, ps, database.ProvisionerJob{
FileID: dbgen.File(t, db, database.File{CreatedBy: user.ID}).ID,
Type: database.ProvisionerJobTypeWorkspaceBuild,
Input: must(json.Marshal(provisionerdserver.WorkspaceProvisionJob{WorkspaceBuildID: build.ID})),
OrganizationID: pd.OrganizationID,
})
_, err := db.AcquireProvisionerJob(ctx, database.AcquireProvisionerJobParams{
OrganizationID: pd.OrganizationID,
WorkerID: uuid.NullUUID{UUID: pd.ID, Valid: true},
Types: []database.ProvisionerType{database.ProvisionerTypeEcho},
})
require.NoError(t, err)

// when
_, err = srv.FailJob(ctx, &proto.FailedJob{
JobId: job.ID.String(), Type: &proto.FailedJob_WorkspaceBuild_{WorkspaceBuild: &proto.FailedJob_WorkspaceBuild{State: []byte{}}},
})
require.NoError(t, err)

// then
require.Len(t, notifEnq.Sent, 1)
assert.Equal(t, notifEnq.Sent[0].UserID, templateAdmin.ID)
assert.Equal(t, notifEnq.Sent[0].TemplateID, notifications.TemplateWorkspaceManualBuildFailed)
assert.Contains(t, notifEnq.Sent[0].Targets, template.ID)
assert.Contains(t, notifEnq.Sent[0].Targets, workspace.ID)
assert.Contains(t, notifEnq.Sent[0].Targets, workspace.OrganizationID)
assert.Contains(t, notifEnq.Sent[0].Targets, user.ID)
assert.Equal(t, workspace.Name, notifEnq.Sent[0].Labels["name"])
assert.Equal(t, template.Name, notifEnq.Sent[0].Labels["template_name"])
assert.Equal(t, version.Name, notifEnq.Sent[0].Labels["template_version_name"])
assert.Equal(t, user.Username, notifEnq.Sent[0].Labels["initiator"])
assert.Equal(t, user.Username, notifEnq.Sent[0].Labels["workspace_owner_username"])
Comment on lines +1855 to +1865
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I feel like these assertions could be passed into the previous table-based test "Workspace build failed"?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is a significant difference as here we expect notifications sent only to template admins, we don't care about workspace owners. I tried to squeeze this unit test as much as possible.

assert.Equal(t, strconv.Itoa(int(build.BuildNumber)), notifEnq.Sent[0].Labels["workspace_build_number"])
})
}

type overrides struct {
Expand Down
Loading