Skip to content

feat: improve resources_monitoring for OOM & OOD monitoring #16241

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 28 commits into from
Feb 4, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
9c4071e
work on resources monitoring
defelmnq Jan 23, 2025
5609041
work on migrations and db
defelmnq Jan 23, 2025
fd2dbe7
work on database integration
defelmnq Jan 23, 2025
d2b42b7
add metadata logic to db
defelmnq Jan 27, 2025
893c54c
add metadata logic to db
defelmnq Jan 27, 2025
15a3837
revert formatting
defelmnq Jan 27, 2025
de9e73a
improve errors handling
defelmnq Jan 27, 2025
d8568a3
work on fake db
defelmnq Jan 27, 2025
87737eb
work on db
defelmnq Jan 27, 2025
5cd023f
update provisioner version
defelmnq Jan 27, 2025
25aedf0
work on testdata
defelmnq Jan 28, 2025
a5f44aa
merge
defelmnq Jan 29, 2025
6f0373f
work on resources monitor
defelmnq Jan 29, 2025
4bfb4cb
worked on dbmem and dbauthz
defelmnq Jan 29, 2025
68b022f
add dbauthz tests
defelmnq Jan 29, 2025
5bc849e
work on errors message improvement
defelmnq Jan 31, 2025
8a79421
improve database indexes
defelmnq Feb 3, 2025
7d9a05b
Merge remote-tracking branch 'origin/main' into agent_resource_monito…
defelmnq Feb 3, 2025
93609ca
merge main and rename migration
defelmnq Feb 3, 2025
1bf0866
fix resources volumes allocation
defelmnq Feb 3, 2025
ffb6f91
regenerate
defelmnq Feb 3, 2025
aacc294
rename fixture
defelmnq Feb 3, 2025
961ab25
use agent_id uuid
defelmnq Feb 3, 2025
d7f57d7
work on tests
defelmnq Feb 3, 2025
401b000
work on tests
defelmnq Feb 3, 2025
930bfbb
add tests on provisionerserver
defelmnq Feb 3, 2025
fae8dc9
remove unrequired test case
defelmnq Feb 3, 2025
d98ea6f
Merge remote-tracking branch 'origin/main' into agent_resource_monito…
defelmnq Feb 4, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cli/testdata/coder_provisioner_list_--output_json.golden
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
"last_seen_at": "====[timestamp]=====",
"name": "test",
"version": "v0.0.0-devel",
"api_version": "1.2",
"api_version": "1.3",
"provisioners": [
"echo"
],
Expand Down
2 changes: 2 additions & 0 deletions coderd/apidoc/docs.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions coderd/apidoc/swagger.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

32 changes: 32 additions & 0 deletions coderd/database/dbauthz/dbauthz.go
Original file line number Diff line number Diff line change
Expand Up @@ -1391,13 +1391,29 @@ func (q *querier) FavoriteWorkspace(ctx context.Context, id uuid.UUID) error {
return update(q.log, q.auth, fetch, q.db.FavoriteWorkspace)(ctx, id)
}

func (q *querier) FetchMemoryResourceMonitorsByAgentID(ctx context.Context, agentID uuid.UUID) (database.WorkspaceAgentMemoryResourceMonitor, error) {
if err := q.authorizeContext(ctx, policy.ActionRead, rbac.ResourceWorkspaceAgentResourceMonitor); err != nil {
return database.WorkspaceAgentMemoryResourceMonitor{}, err
}

return q.db.FetchMemoryResourceMonitorsByAgentID(ctx, agentID)
}

func (q *querier) FetchNewMessageMetadata(ctx context.Context, arg database.FetchNewMessageMetadataParams) (database.FetchNewMessageMetadataRow, error) {
if err := q.authorizeContext(ctx, policy.ActionRead, rbac.ResourceNotificationMessage); err != nil {
return database.FetchNewMessageMetadataRow{}, err
}
return q.db.FetchNewMessageMetadata(ctx, arg)
}

func (q *querier) FetchVolumesResourceMonitorsByAgentID(ctx context.Context, agentID uuid.UUID) ([]database.WorkspaceAgentVolumeResourceMonitor, error) {
if err := q.authorizeContext(ctx, policy.ActionRead, rbac.ResourceWorkspaceAgentResourceMonitor); err != nil {
return nil, err
}

return q.db.FetchVolumesResourceMonitorsByAgentID(ctx, agentID)
}

func (q *querier) GetAPIKeyByID(ctx context.Context, id string) (database.APIKey, error) {
return fetch(q.log, q.auth, q.db.GetAPIKeyByID)(ctx, id)
}
Expand Down Expand Up @@ -3003,6 +3019,14 @@ func (q *querier) InsertLicense(ctx context.Context, arg database.InsertLicenseP
return q.db.InsertLicense(ctx, arg)
}

func (q *querier) InsertMemoryResourceMonitor(ctx context.Context, arg database.InsertMemoryResourceMonitorParams) (database.WorkspaceAgentMemoryResourceMonitor, error) {
if err := q.authorizeContext(ctx, policy.ActionCreate, rbac.ResourceWorkspaceAgentResourceMonitor); err != nil {
return database.WorkspaceAgentMemoryResourceMonitor{}, err
}

return q.db.InsertMemoryResourceMonitor(ctx, arg)
}

func (q *querier) InsertMissingGroups(ctx context.Context, arg database.InsertMissingGroupsParams) ([]database.Group, error) {
if err := q.authorizeContext(ctx, policy.ActionCreate, rbac.ResourceSystem); err != nil {
return nil, err
Expand Down Expand Up @@ -3195,6 +3219,14 @@ func (q *querier) InsertUserLink(ctx context.Context, arg database.InsertUserLin
return q.db.InsertUserLink(ctx, arg)
}

func (q *querier) InsertVolumeResourceMonitor(ctx context.Context, arg database.InsertVolumeResourceMonitorParams) (database.WorkspaceAgentVolumeResourceMonitor, error) {
if err := q.authorizeContext(ctx, policy.ActionCreate, rbac.ResourceWorkspaceAgentResourceMonitor); err != nil {
return database.WorkspaceAgentVolumeResourceMonitor{}, err
}

return q.db.InsertVolumeResourceMonitor(ctx, arg)
}

func (q *querier) InsertWorkspace(ctx context.Context, arg database.InsertWorkspaceParams) (database.WorkspaceTable, error) {
obj := rbac.ResourceWorkspace.WithOwner(arg.OwnerID.String()).InOrg(arg.OrganizationID)
tpl, err := q.GetTemplateByID(ctx, arg.TemplateID)
Expand Down
93 changes: 93 additions & 0 deletions coderd/database/dbauthz/dbauthz_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4562,3 +4562,96 @@ func (s *MethodTestSuite) TestOAuth2ProviderAppTokens() {
}).Asserts(rbac.ResourceOauth2AppCodeToken.WithOwner(user.ID.String()), policy.ActionDelete)
}))
}

func (s *MethodTestSuite) TestResourcesMonitor() {
s.Run("InsertMemoryResourceMonitor", s.Subtest(func(db database.Store, check *expects) {
dbtestutil.DisableForeignKeysAndTriggers(s.T(), db)
check.Args(database.InsertMemoryResourceMonitorParams{}).Asserts(rbac.ResourceWorkspaceAgentResourceMonitor, policy.ActionCreate)
}))

s.Run("InsertVolumeResourceMonitor", s.Subtest(func(db database.Store, check *expects) {
dbtestutil.DisableForeignKeysAndTriggers(s.T(), db)
check.Args(database.InsertVolumeResourceMonitorParams{}).Asserts(rbac.ResourceWorkspaceAgentResourceMonitor, policy.ActionCreate)
}))

s.Run("FetchMemoryResourceMonitorsByAgentID", s.Subtest(func(db database.Store, check *expects) {
u := dbgen.User(s.T(), db, database.User{})
o := dbgen.Organization(s.T(), db, database.Organization{})
tpl := dbgen.Template(s.T(), db, database.Template{
OrganizationID: o.ID,
CreatedBy: u.ID,
})
tv := dbgen.TemplateVersion(s.T(), db, database.TemplateVersion{
TemplateID: uuid.NullUUID{UUID: tpl.ID, Valid: true},
OrganizationID: o.ID,
CreatedBy: u.ID,
})
w := dbgen.Workspace(s.T(), db, database.WorkspaceTable{
TemplateID: tpl.ID,
OrganizationID: o.ID,
OwnerID: u.ID,
})
j := dbgen.ProvisionerJob(s.T(), db, nil, database.ProvisionerJob{
Type: database.ProvisionerJobTypeWorkspaceBuild,
})
b := dbgen.WorkspaceBuild(s.T(), db, database.WorkspaceBuild{
JobID: j.ID,
WorkspaceID: w.ID,
TemplateVersionID: tv.ID,
})
res := dbgen.WorkspaceResource(s.T(), db, database.WorkspaceResource{JobID: b.JobID})
agt := dbgen.WorkspaceAgent(s.T(), db, database.WorkspaceAgent{ResourceID: res.ID})
dbgen.WorkspaceAgentMemoryResourceMonitor(s.T(), db, database.WorkspaceAgentMemoryResourceMonitor{
AgentID: agt.ID,
Enabled: true,
Threshold: 80,
CreatedAt: dbtime.Now(),
})

monitor, err := db.FetchMemoryResourceMonitorsByAgentID(context.Background(), agt.ID)
require.NoError(s.T(), err)

check.Args(agt.ID).Asserts(rbac.ResourceWorkspaceAgentResourceMonitor, policy.ActionRead).Returns(monitor)
}))

s.Run("FetchVolumesResourceMonitorsByAgentID", s.Subtest(func(db database.Store, check *expects) {
u := dbgen.User(s.T(), db, database.User{})
o := dbgen.Organization(s.T(), db, database.Organization{})
tpl := dbgen.Template(s.T(), db, database.Template{
OrganizationID: o.ID,
CreatedBy: u.ID,
})
tv := dbgen.TemplateVersion(s.T(), db, database.TemplateVersion{
TemplateID: uuid.NullUUID{UUID: tpl.ID, Valid: true},
OrganizationID: o.ID,
CreatedBy: u.ID,
})
w := dbgen.Workspace(s.T(), db, database.WorkspaceTable{
TemplateID: tpl.ID,
OrganizationID: o.ID,
OwnerID: u.ID,
})
j := dbgen.ProvisionerJob(s.T(), db, nil, database.ProvisionerJob{
Type: database.ProvisionerJobTypeWorkspaceBuild,
})
b := dbgen.WorkspaceBuild(s.T(), db, database.WorkspaceBuild{
JobID: j.ID,
WorkspaceID: w.ID,
TemplateVersionID: tv.ID,
})
res := dbgen.WorkspaceResource(s.T(), db, database.WorkspaceResource{JobID: b.JobID})
agt := dbgen.WorkspaceAgent(s.T(), db, database.WorkspaceAgent{ResourceID: res.ID})
dbgen.WorkspaceAgentVolumeResourceMonitor(s.T(), db, database.WorkspaceAgentVolumeResourceMonitor{
AgentID: agt.ID,
Path: "/var/lib",
Enabled: true,
Threshold: 80,
CreatedAt: dbtime.Now(),
})

monitors, err := db.FetchVolumesResourceMonitorsByAgentID(context.Background(), agt.ID)
require.NoError(s.T(), err)

check.Args(agt.ID).Asserts(rbac.ResourceWorkspaceAgentResourceMonitor, policy.ActionRead).Returns(monitors)
}))
}
23 changes: 23 additions & 0 deletions coderd/database/dbgen/dbgen.go
Original file line number Diff line number Diff line change
Expand Up @@ -1032,6 +1032,29 @@ func OAuth2ProviderAppToken(t testing.TB, db database.Store, seed database.OAuth
return token
}

func WorkspaceAgentMemoryResourceMonitor(t testing.TB, db database.Store, seed database.WorkspaceAgentMemoryResourceMonitor) database.WorkspaceAgentMemoryResourceMonitor {
monitor, err := db.InsertMemoryResourceMonitor(genCtx, database.InsertMemoryResourceMonitorParams{
AgentID: takeFirst(seed.AgentID, uuid.New()),
Enabled: takeFirst(seed.Enabled, true),
Threshold: takeFirst(seed.Threshold, 100),
CreatedAt: takeFirst(seed.CreatedAt, dbtime.Now()),
})
require.NoError(t, err, "insert workspace agent memory resource monitor")
return monitor
}

func WorkspaceAgentVolumeResourceMonitor(t testing.TB, db database.Store, seed database.WorkspaceAgentVolumeResourceMonitor) database.WorkspaceAgentVolumeResourceMonitor {
monitor, err := db.InsertVolumeResourceMonitor(genCtx, database.InsertVolumeResourceMonitorParams{
AgentID: takeFirst(seed.AgentID, uuid.New()),
Path: takeFirst(seed.Path, "/"),
Enabled: takeFirst(seed.Enabled, true),
Threshold: takeFirst(seed.Threshold, 100),
CreatedAt: takeFirst(seed.CreatedAt, dbtime.Now()),
})
require.NoError(t, err, "insert workspace agent volume resource monitor")
return monitor
}

func CustomRole(t testing.TB, db database.Store, seed database.CustomRole) database.CustomRole {
role, err := db.InsertCustomRole(genCtx, database.InsertCustomRoleParams{
Name: takeFirst(seed.Name, strings.ToLower(testutil.GetRandomName(t))),
Expand Down
Loading
Loading