Skip to content

feat: notify owner about failed autobuild #13891

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Jul 16, 2024
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
DELETE FROM notification_templates WHERE id = '381df2a9-c0c0-4749-420f-80a9280c66f9';
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
INSERT INTO notification_templates (id, name, title_template, body_template, "group", actions)
VALUES ('381df2a9-c0c0-4749-420f-80a9280c66f9', 'Workspace Autobuild Failed', E'Workspace "{{.Labels.name}}" autobuild failed',
E'Hi {{.UserName}}\n\Automatic build of your workspace **{{.Labels.name}}** failed.\nThe specified reason was "**{{.Labels.reason}}**".',
'Workspace Events', '[
{
"label": "View workspaces",
"url": "{{ base_url }}/workspaces"
}
]'::jsonb);
5 changes: 4 additions & 1 deletion coderd/notifications/events.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,7 @@ import "github.com/google/uuid"
// TODO: autogenerate these.

// Workspace-related events.
var TemplateWorkspaceDeleted = uuid.MustParse("f517da0b-cdc9-410f-ab89-a86107c420ed")
var (
TemplateWorkspaceDeleted = uuid.MustParse("f517da0b-cdc9-410f-ab89-a86107c420ed")
WorkspaceAutobuildFailed = uuid.MustParse("381df2a9-c0c0-4749-420f-80a9280c66f9")
)
37 changes: 34 additions & 3 deletions coderd/provisionerdserver/provisionerdserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -982,12 +982,18 @@ func (s *server) FailJob(ctx context.Context, failJob *proto.FailedJob) (*proto.
}

var build database.WorkspaceBuild
var workspace database.Workspace
err = s.Database.InTx(func(db database.Store) error {
build, err = db.GetWorkspaceBuildByID(ctx, input.WorkspaceBuildID)
if err != nil {
return xerrors.Errorf("get workspace build: %w", err)
}

workspace, err = db.GetWorkspaceByID(ctx, build.WorkspaceID)
if err != nil {
return xerrors.Errorf("get workspace: %w", err)
}

if jobType.WorkspaceBuild.State != nil {
err = db.UpdateWorkspaceBuildProvisionerStateByID(ctx, database.UpdateWorkspaceBuildProvisionerStateByIDParams{
ID: input.WorkspaceBuildID,
Expand All @@ -1014,6 +1020,8 @@ func (s *server) FailJob(ctx context.Context, failJob *proto.FailedJob) (*proto.
return nil, err
}

s.notifyWorkspaceBuildFailed(ctx, workspace, build)

err = s.Pubsub.Publish(codersdk.WorkspaceNotifyChannel(build.WorkspaceID), []byte{})
if err != nil {
return nil, xerrors.Errorf("update workspace: %w", err)
Expand Down Expand Up @@ -1087,6 +1095,27 @@ func (s *server) FailJob(ctx context.Context, failJob *proto.FailedJob) (*proto.
return &proto.Empty{}, nil
}

func (s *server) notifyWorkspaceBuildFailed(ctx context.Context, workspace database.Workspace, build database.WorkspaceBuild) {
var reason string
if build.Reason.Valid() && build.Reason == database.BuildReasonInitiator {
return // failed workspace build initiated by a user should not notify
}
reason = string(build.Reason)
initiator := "autobuild"

if _, err := s.NotificationEnqueuer.Enqueue(ctx, workspace.OwnerID, notifications.WorkspaceAutobuildFailed,
map[string]string{
"name": workspace.Name,
"initiator": initiator,
"reason": reason,
}, "provisionerdserver",
// Associate this notification with all the related entities.
workspace.ID, workspace.OwnerID, workspace.TemplateID, workspace.OrganizationID,
); err != nil {
s.Logger.Warn(ctx, "failed to notify of failed workspace autobuild", slog.Error(err))
}
}

// CompleteJob is triggered by a provision daemon to mark a provisioner job as completed.
func (s *server) CompleteJob(ctx context.Context, completed *proto.CompletedJob) (*proto.Empty, error) {
ctx, span := s.startTrace(ctx, tracing.FuncName())
Expand Down Expand Up @@ -1523,6 +1552,7 @@ func (s *server) CompleteJob(ctx context.Context, completed *proto.CompletedJob)

func (s *server) notifyWorkspaceDeleted(ctx context.Context, workspace database.Workspace, build database.WorkspaceBuild) {
var reason string
initiator := build.InitiatorByUsername
if build.Reason.Valid() {
switch build.Reason {
case database.BuildReasonInitiator:
Expand All @@ -1534,6 +1564,7 @@ func (s *server) notifyWorkspaceDeleted(ctx context.Context, workspace database.
reason = "initiated by user"
case database.BuildReasonAutodelete:
reason = "autodeleted due to dormancy"
initiator = "autobuild"
default:
reason = string(build.Reason)
}
Expand All @@ -1545,9 +1576,9 @@ func (s *server) notifyWorkspaceDeleted(ctx context.Context, workspace database.

if _, err := s.NotificationEnqueuer.Enqueue(ctx, workspace.OwnerID, notifications.TemplateWorkspaceDeleted,
map[string]string{
"name": workspace.Name,
"initiatedBy": build.InitiatorByUsername,
"reason": reason,
"name": workspace.Name,
"reason": reason,
"initiator": initiator,
}, "provisionerdserver",
// Associate this notification with all the related entities.
workspace.ID, workspace.OwnerID, workspace.TemplateID, workspace.OrganizationID,
Expand Down
115 changes: 114 additions & 1 deletion coderd/provisionerdserver/provisionerdserver_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1687,14 +1687,127 @@ func TestNotifications(t *testing.T) {
require.Contains(t, notifEnq.sent[0].targets, workspace.OrganizationID)
require.Contains(t, notifEnq.sent[0].targets, user.ID)
if tc.deletionReason == database.BuildReasonInitiator {
require.Equal(t, notifEnq.sent[0].labels["initiatedBy"], initiator.Username)
require.Equal(t, notifEnq.sent[0].labels["initiator"], initiator.Username)
}
} else {
require.Len(t, notifEnq.sent, 0)
}
})
}
})

t.Run("Workspace build failed", func(t *testing.T) {
t.Parallel()

tests := []struct {
name string

buildReason database.BuildReason
shouldNotify bool
}{
{
name: "initiated by owner",
buildReason: database.BuildReasonInitiator,
shouldNotify: false,
},
{
name: "initiated by autostart",
buildReason: database.BuildReasonAutostart,
shouldNotify: true,
},
}

for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
t.Parallel()

ctx := context.Background()
notifEnq := &fakeNotificationEnqueuer{}

// Otherwise `(*Server).FailJob` fails with:
// audit log - get build {"error": "sql: no rows in result set"}
ignoreLogErrors := true
srv, db, ps, pd := setup(t, ignoreLogErrors, &overrides{
notificationEnqueuer: notifEnq,
})

user := dbgen.User(t, db, database.User{})
initiator := user

template := dbgen.Template(t, db, database.Template{
Name: "template",
Provisioner: database.ProvisionerTypeEcho,
OrganizationID: pd.OrganizationID,
})
template, err := db.GetTemplateByID(ctx, template.ID)
require.NoError(t, err)
file := dbgen.File(t, db, database.File{CreatedBy: user.ID})
workspace := dbgen.Workspace(t, db, database.Workspace{
TemplateID: template.ID,
OwnerID: user.ID,
OrganizationID: pd.OrganizationID,
})
version := dbgen.TemplateVersion(t, db, database.TemplateVersion{
OrganizationID: pd.OrganizationID,
TemplateID: uuid.NullUUID{
UUID: template.ID,
Valid: true,
},
JobID: uuid.New(),
})
build := dbgen.WorkspaceBuild(t, db, database.WorkspaceBuild{
WorkspaceID: workspace.ID,
TemplateVersionID: version.ID,
InitiatorID: initiator.ID,
Transition: database.WorkspaceTransitionDelete,
Reason: tc.buildReason,
})
job := dbgen.ProvisionerJob(t, db, ps, database.ProvisionerJob{
FileID: file.ID,
Type: database.ProvisionerJobTypeWorkspaceBuild,
Input: must(json.Marshal(provisionerdserver.WorkspaceProvisionJob{
WorkspaceBuildID: build.ID,
})),
OrganizationID: pd.OrganizationID,
})
_, err = db.AcquireProvisionerJob(ctx, database.AcquireProvisionerJobParams{
OrganizationID: pd.OrganizationID,
WorkerID: uuid.NullUUID{
UUID: pd.ID,
Valid: true,
},
Types: []database.ProvisionerType{database.ProvisionerTypeEcho},
})
require.NoError(t, err)
Comment on lines +1729 to +1781
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: all the common bits here could be moved out to a helper func.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, my problem with this idea is that we already have setup() function, so we will add another, let's say prepare(), but prepare() would be used only by 2/10 tests in the unit.

I think I will pass on it.


_, err = srv.FailJob(ctx, &proto.FailedJob{
JobId: job.ID.String(),
Type: &proto.FailedJob_WorkspaceBuild_{
WorkspaceBuild: &proto.FailedJob_WorkspaceBuild{
State: []byte{},
},
},
})
require.NoError(t, err)

workspace, err = db.GetWorkspaceByID(ctx, workspace.ID)
require.NoError(t, err)

if tc.shouldNotify {
// Validate that the notification was sent and contained the expected values.
require.Len(t, notifEnq.sent, 1)
require.Equal(t, notifEnq.sent[0].userID, user.ID)
require.Contains(t, notifEnq.sent[0].targets, template.ID)
require.Contains(t, notifEnq.sent[0].targets, workspace.ID)
require.Contains(t, notifEnq.sent[0].targets, workspace.OrganizationID)
require.Contains(t, notifEnq.sent[0].targets, user.ID)
require.Equal(t, notifEnq.sent[0].labels["initiator"], "autobuild")
} else {
require.Len(t, notifEnq.sent, 0)
}
})
}
})
}

type overrides struct {
Expand Down
Loading