Skip to content

feat: add agent metadata #6614

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 85 commits into from
Mar 31, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
85 commits
Select commit Hold shift + click to select a range
ca8177f
Start writing docs
ammario Mar 14, 2023
a4bbb6e
regenerate testdata
ammario Mar 15, 2023
3cb3b74
Fixup provisioner
ammario Mar 15, 2023
826cca3
Rename Agent Metadata to Agent Manifest
ammario Mar 15, 2023
c30f1c6
WIP — agent report metadata loop
ammario Mar 15, 2023
41ce694
Merge remote-tracking branch 'origin/main' into agent-metadata
ammario Mar 15, 2023
592b8a5
Finish godloop
ammario Mar 15, 2023
658f5b2
WIP agent tests
ammario Mar 15, 2023
465e0d8
Terraform tests pass!
ammario Mar 15, 2023
d0156b3
Add Post metadata endpoint to API
ammario Mar 16, 2023
361baf1
Merge remote-tracking branch 'origin/main' into agent-metadata
ammario Mar 21, 2023
7a0541c
Test setting metadata
ammario Mar 21, 2023
16cd11e
Create watch endpoint
ammario Mar 21, 2023
c840962
Watch tests pass!!
ammario Mar 21, 2023
e4a5dd1
WIP DB refinement
ammario Mar 22, 2023
5ef5671
Upsert
ammario Mar 22, 2023
34935c5
Correctly insert metadata into db
ammario Mar 22, 2023
555ee66
Return complete manifest
ammario Mar 22, 2023
14f898f
Manually verified value in DB is getting updated
ammario Mar 22, 2023
0834cc6
It shows and it glows
ammario Mar 22, 2023
f625783
Don't show stale data
ammario Mar 23, 2023
00cca25
The frontend lays out nicely
ammario Mar 23, 2023
74eb373
Add provisioner/terraform
ammario Mar 23, 2023
1c6245d
Add fixture
ammario Mar 23, 2023
85d4738
Continue beautifying
ammario Mar 23, 2023
e8cd58e
Fix clock skew issues
ammario Mar 23, 2023
d681e24
Merge remote-tracking branch 'origin/main' into agent-metadata
ammario Mar 23, 2023
cee332b
WIP mock eventsource
ammario Mar 23, 2023
95aeccb
Remove redundant "key" in MetadataResult
ammario Mar 23, 2023
390e3c9
Fix component render bug
ammario Mar 23, 2023
3582175
Fix it even better
ammario Mar 23, 2023
2dbd84c
WIP story build out
ammario Mar 23, 2023
967e347
WIP DONT PUSH
ammario Mar 23, 2023
a80541f
Popover
ammario Mar 23, 2023
206220e
Merge remote-tracking branch 'origin/main' into agent-metadata
ammario Mar 24, 2023
e1e992d
Make it look.... ok again
ammario Mar 24, 2023
4f42d4b
It looks OK
ammario Mar 24, 2023
1c6d7b3
Start working on tooltip
ammario Mar 24, 2023
5e614f5
It's all passable
ammario Mar 24, 2023
510524d
It builds!
ammario Mar 24, 2023
93de24e
Harden interval conversion in agent
ammario Mar 24, 2023
baa157f
Merge remote-tracking branch 'origin/main' into agent-metadata
ammario Mar 24, 2023
495a38e
Merge remote-tracking branch 'origin/main' into agent-metadata
ammario Mar 27, 2023
d1ff3dc
fix compilation
ammario Mar 27, 2023
1213212
windows
ammario Mar 27, 2023
1423f26
Simplify windows
ammario Mar 27, 2023
ec429fb
improve formatting
ammario Mar 27, 2023
d599851
fix
ammario Mar 27, 2023
1300009
Increase timeout for windows
ammario Mar 27, 2023
942ec2f
Merge remote-tracking branch 'origin/main' into agent-metadata
ammario Mar 27, 2023
95c8ada
make fmt
ammario Mar 27, 2023
b7ddb4a
code cleanup in agent/
ammario Mar 27, 2023
8d1ab16
Revert Upsert change
ammario Mar 27, 2023
12d8f71
Revert "Revert Upsert change"
ammario Mar 27, 2023
90687ce
Fix fixture name
ammario Mar 27, 2023
2ef0b55
Minor fixups
ammario Mar 27, 2023
4289a6a
Start working on docs
ammario Mar 28, 2023
9c6db22
Make more docs progress
ammario Mar 28, 2023
d4132ec
Merge remote-tracking branch 'origin/main' into agent-metadata
ammario Mar 28, 2023
eefd631
Fix ErrNoRows
ammario Mar 28, 2023
873e5f0
Add a bunch of examples
ammario Mar 28, 2023
fc3d8cf
Explain dstat
ammario Mar 28, 2023
1986662
docs: improve formatting
ammario Mar 28, 2023
dc631f5
Address review comments
ammario Mar 28, 2023
55a3f63
Merge remote-tracking branch 'origin/main' into agent-metadata
ammario Mar 28, 2023
d8d5c06
nit
ammario Mar 28, 2023
64be182
fixup! nit
ammario Mar 28, 2023
06d26b5
improve synchronization in metadata loop
ammario Mar 29, 2023
ed9257f
explain collected at skip
ammario Mar 29, 2023
6a7b5cb
typo
ammario Mar 29, 2023
6ef1e81
document collection loop
ammario Mar 29, 2023
c9aa5a4
make fmt
ammario Mar 29, 2023
f771a27
Merge remote-tracking branch 'origin/main' into agent-metadata
ammario Mar 29, 2023
012a6a2
UNLOG table
ammario Mar 29, 2023
fdce29f
make gen
ammario Mar 30, 2023
9ae8650
Merge remote-tracking branch 'origin/main' into agent-metadata
ammario Mar 30, 2023
66468d3
Go on a multi-route tangent
ammario Mar 30, 2023
f861771
Revert "Go on a multi-route tangent"
ammario Mar 30, 2023
2e82543
Pass swagger
ammario Mar 30, 2023
40cd260
Make concurrency more robust
ammario Mar 30, 2023
f8e1f34
Improve concurrency a bit more!
ammario Mar 31, 2023
c15d364
Versioning chores...
ammario Mar 31, 2023
7d852d1
Merge remote-tracking branch 'origin/main' into agent-metadata
ammario Mar 31, 2023
29524b9
make fmt
ammario Mar 31, 2023
67b5a39
Fix lint
ammario Mar 31, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Address review comments
  • Loading branch information
ammario committed Mar 28, 2023
commit dc631f5fa1a85a391a50d430ae08383580b7e1f3
109 changes: 55 additions & 54 deletions agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -214,9 +214,8 @@ func (a *agent) collectMetadata(ctx context.Context, md codersdk.WorkspaceAgentM
if timeout == 0 {
timeout = md.Interval
}
ctx, cancel := context.WithDeadline(ctx, time.Now().Add(
ctx, cancel := context.WithTimeout(ctx,
time.Duration(timeout)*time.Second,
),
)
defer cancel()

Expand Down Expand Up @@ -295,63 +294,65 @@ func (a *agent) reportMetadataLoop(ctx context.Context) {
a.logger.Error(ctx, "report metadata", slog.Error(err))
}
case <-baseTicker.C:
if len(metadataResults) > cap(metadataResults)/2 {
// If we're backpressured on sending back results, we risk
// runaway goroutine growth and/or overloading coderd. So,
// we just skip the collection. Since we never update
// the collections map, we'll retry the collection
// on the next tick.
a.logger.Debug(
ctx, "metadata collection backpressured",
slog.F("queue_len", len(metadataResults)),
)
continue
}
break
}

if len(metadataResults) > cap(metadataResults)/2 {
// If we're backpressured on sending back results, we risk
// runaway goroutine growth and/or overloading coderd. So,
// we just skip the collection. Since we never update
// the collections map, we'll retry the collection
// on the next tick.
a.logger.Debug(
ctx, "metadata collection backpressured",
slog.F("queue_len", len(metadataResults)),
)
continue
}

manifest := a.manifest.Load()
if manifest == nil {
continue
manifest := a.manifest.Load()
if manifest == nil {
continue
}
// If the manifest changes (e.g. on agent reconnect) we need to
// purge old cache values to prevent lastCollectedAt from growing
// boundlessly.
for key := range lastCollectedAts {
if slices.IndexFunc(manifest.Metadata, func(md codersdk.WorkspaceAgentMetadataDescription) bool {
return md.Key == key
}) < 0 {
delete(lastCollectedAts, key)
}
// If the manifest changes (e.g. on agent reconnect) we need to
// purge old cache values to prevent lastCollectedAt from growing
// boundlessly.
for key := range lastCollectedAts {
if slices.IndexFunc(manifest.Metadata, func(md codersdk.WorkspaceAgentMetadataDescription) bool {
return md.Key == key
}) < 0 {
delete(lastCollectedAts, key)
}

// Spawn a goroutine for each metadata collection, and use a
// channel to synchronize the results and avoid both messy
// mutex logic and overloading the API.
for _, md := range manifest.Metadata {
collectedAt, ok := lastCollectedAts[md.Key]
if ok {
// If the interval is zero, we assume the user just wants
// a single collection at startup, not a spinning loop.
if md.Interval == 0 {
continue
}
if collectedAt.Add(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How is this possible?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See new comment

convertInterval(md.Interval),
).After(time.Now()) {
continue
}
}

// Spawn a goroutine for each metadata collection, and use a
// channel to synchronize the results and avoid both messy
// mutex logic and overloading the API.
for _, md := range manifest.Metadata {
collectedAt, ok := lastCollectedAts[md.Key]
if ok {
// If the interval is zero, we assume the user just wants
// a single collection at startup, not a spinning loop.
if md.Interval == 0 {
continue
}
if collectedAt.Add(
convertInterval(md.Interval),
).After(time.Now()) {
continue
}
go func(md codersdk.WorkspaceAgentMetadataDescription) {
select {
case <-ctx.Done():
return
case metadataResults <- metadataResultAndKey{
key: md.Key,
result: a.collectMetadata(ctx, md),
}:
}

go func(md codersdk.WorkspaceAgentMetadataDescription) {
select {
case <-ctx.Done():
return
case metadataResults <- metadataResultAndKey{
key: md.Key,
result: a.collectMetadata(ctx, md),
}:
}
}(md)
}
}(md)
}
}
}
Expand Down Expand Up @@ -1077,7 +1078,7 @@ func (a *agent) init(ctx context.Context) {
}

// createCommand processes raw command input with OpenSSH-like behavior.
// If the rawScript provided is empty, it will default to the users shell.
// If the createCommand provided is empty, it will default to the users shell.
// This injects environment variables specified by the user at launch too.
func (a *agent) createCommand(ctx context.Context, script string, env []string) (*exec.Cmd, error) {
currentUser, err := user.Current()
Expand Down
3 changes: 3 additions & 0 deletions coderd/coderd.go
Original file line number Diff line number Diff line change
Expand Up @@ -603,6 +603,9 @@ func New(options *Options) *API {
r.Route("/me", func(r chi.Router) {
r.Use(httpmw.ExtractWorkspaceAgent(options.Database))
r.Get("/manifest", api.workspaceAgentManifest)
// This route is deprecated and will be removed in a future release.
// New agents will use /me/manifest instead.
r.Get("/metadata", api.workspaceAgentManifest)
r.Post("/startup", api.postWorkspaceAgentStartup)
r.Patch("/startup-logs", api.patchWorkspaceAgentStartupLogs)
r.Post("/app-health", api.postWorkspaceAppHealth)
Expand Down
10 changes: 3 additions & 7 deletions coderd/database/dbauthz/querier.go
Original file line number Diff line number Diff line change
Expand Up @@ -1556,13 +1556,9 @@ func (q *querier) InsertWorkspaceAgentStat(ctx context.Context, arg database.Ins
}

func (q *querier) InsertWorkspaceAgentMetadata(ctx context.Context, arg database.InsertWorkspaceAgentMetadataParams) error {
workspace, err := q.db.GetWorkspaceByAgentID(ctx, arg.WorkspaceAgentID)
if err != nil {
return xerrors.Errorf("find workspace by agent %v: %v", arg.WorkspaceAgentID, err)
}

err = q.authorizeContext(ctx, rbac.ActionUpdate, workspace)
if err != nil {
// We don't check for workspace ownership here since the agent metadata may
// be associated with an orphaned agent used by a dry run build.
if err := q.authorizeContext(ctx, rbac.ActionCreate, rbac.ResourceSystem); err != nil {
return err
}

Expand Down
10 changes: 5 additions & 5 deletions coderd/database/dump.sql

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
-- key enough?
CREATE TABLE workspace_agent_metadata (
workspace_agent_id uuid NOT NULL,
display_name text NOT NULL,
key character varying(128) NOT NULL,
script text NOT NULL,
value text NOT NULL DEFAULT '',
error text NOT NULL DEFAULT '',
display_name varchar(127) NOT NULL,
key varchar(127) NOT NULL,
script varchar(65535) NOT NULL,
value varchar(65535) NOT NULL DEFAULT '',
error varchar(65535) NOT NULL DEFAULT '',
timeout bigint NOT NULL,
interval bigint NOT NULL,
collected_at timestamp with time zone NOT NULL DEFAULT '0001-01-01 00:00:00+00',
Expand Down
32 changes: 13 additions & 19 deletions coderd/provisionerdserver/provisionerdserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -1276,27 +1276,21 @@ func InsertWorkspaceResource(ctx context.Context, db database.Store, jobID uuid.
}
snapshot.WorkspaceAgents = append(snapshot.WorkspaceAgents, telemetry.ConvertWorkspaceAgent(dbAgent))

// We don't need to insert metadata if the agent is not for a workspace.
// This is probably an agent made during the provisioning process.
_, err = db.GetWorkspaceByAgentID(ctx, agentID)
if err == nil {
for _, md := range prAgent.Metadata {
p := database.InsertWorkspaceAgentMetadataParams{
WorkspaceAgentID: agentID,
DisplayName: md.DisplayName,
Script: md.Script,
Key: md.Key,
Timeout: md.Timeout,
Interval: md.Interval,
}
err := db.InsertWorkspaceAgentMetadata(ctx, p)
if err != nil {
return xerrors.Errorf("insert agent metadata: %w, params: %+v", err, p)
}
for _, md := range prAgent.Metadata {
p := database.InsertWorkspaceAgentMetadataParams{
WorkspaceAgentID: agentID,
DisplayName: md.DisplayName,
Script: md.Script,
Key: md.Key,
Timeout: md.Timeout,
Interval: md.Interval,
}
err := db.InsertWorkspaceAgentMetadata(ctx, p)
if err != nil {
return xerrors.Errorf("insert agent metadata: %w, params: %+v", err, p)
}
} else if !errors.Is(err, sql.ErrNoRows) {
return xerrors.Errorf("get workspace by agent ID: %w", err)
}

for _, app := range prAgent.Apps {
slug := app.Slug
if slug == "" {
Expand Down
46 changes: 29 additions & 17 deletions coderd/workspaceagents.go
Original file line number Diff line number Diff line change
Expand Up @@ -1373,13 +1373,6 @@ func (api *API) workspaceAgentReportStats(rw http.ResponseWriter, r *http.Reques
})
}

func ellipse(s string, maxLength int) string {
if len(s) > maxLength {
return s[:maxLength] + "..."
}
return s
}

// @Summary Submit workspace agent metadata
// @ID submit-workspace-agent-metadata
// @Security CoderSessionToken
Expand Down Expand Up @@ -1411,12 +1404,30 @@ func (api *API) workspaceAgentPostMetadata(rw http.ResponseWriter, r *http.Reque

key := chi.URLParam(r, "key")

const (
maxValueLen = 64 << 10
maxErrorLen = maxValueLen
)

metadataError := req.Error

// We overwrite the error if the provided payload is too long.
if len(req.Value) > maxValueLen {
metadataError = fmt.Sprintf("value of %d bytes exceeded %d bytes", len(req.Value), maxValueLen)
req.Value = req.Value[:maxValueLen]
}

if len(req.Error) > maxErrorLen {
metadataError = fmt.Sprintf("error of %d bytes exceeded %d bytes", len(req.Error), maxErrorLen)
req.Error = req.Error[:maxErrorLen]
}

datum := database.UpdateWorkspaceAgentMetadataParams{
WorkspaceAgentID: workspaceAgent.ID,
// We don't want a misconfigured agent to fill the database.
Key: ellipse(key, 128),
Value: ellipse(req.Value, 10<<10),
Error: ellipse(req.Error, 10<<10),
Key: key,
Value: req.Value,
Error: metadataError,
// We ignore the CollectedAt from the agent to avoid bugs caused by
// clock skew.
CollectedAt: time.Now(),
Expand All @@ -1434,7 +1445,6 @@ func (api *API) workspaceAgentPostMetadata(rw http.ResponseWriter, r *http.Reque
slog.F("workspace", workspace.ID),
slog.F("collected_at", datum.CollectedAt),
slog.F("key", datum.Key),
slog.F("value", ellipse(datum.Value, 5)),
)

err = api.Pubsub.Publish(watchWorkspaceAgentMetadataChannel(workspaceAgent.ID), []byte(datum.Key))
Expand Down Expand Up @@ -1535,15 +1545,17 @@ func (api *API) watchWorkspaceAgentMetadata(rw http.ResponseWriter, r *http.Requ

for {
select {
case <-refreshTicker.C:
// Avoid spamming the DB with reads we know there are no updates. We want
// to continue sending updates to the frontend so that "Result.Age"
// is always accurate. This way, the frontend doesn't need to
// sync its own clock with the backend.
sendMetadata(false)
case <-senderClosed:
return
case <-refreshTicker.C:
break
}

// Avoid spamming the DB with reads we know there are no updates. We want
// to continue sending updates to the frontend so that "Result.Age"
// is always accurate. This way, the frontend doesn't need to
// sync its own clock with the backend.
sendMetadata(false)
}
}

Expand Down
27 changes: 22 additions & 5 deletions coderd/workspaceagents_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1291,6 +1291,13 @@ func TestWorkspaceAgent_Metadata(t *testing.T) {
Interval: 10,
Timeout: 3,
},
{
DisplayName: "TooLong",
Key: "foo3",
Script: "echo howdy",
Interval: 10,
Timeout: 3,
},
},
Id: uuid.NewString(),
Auth: &proto.Agent_Token{
Expand Down Expand Up @@ -1370,25 +1377,35 @@ func TestWorkspaceAgent_Metadata(t *testing.T) {
}

update = recvUpdate()
require.Len(t, update, 2)
require.Len(t, update, 3)
check(wantMetadata1, update[0])
// The second metadata result is not yet posted.
require.Zero(t, update[1].Result.CollectedAt)

wantMetadata2 := wantMetadata1
post("foo2", wantMetadata2)
update = recvUpdate()
require.Len(t, update, 2)
require.Len(t, update, 3)
check(wantMetadata1, update[0])
check(wantMetadata2, update[1])

wantMetadata1.Error = "error"
post("foo1", wantMetadata1)
update = recvUpdate()
require.Len(t, update, 2)
require.Len(t, update, 3)
check(wantMetadata1, update[0])

badMetadata := wantMetadata1
err = agentClient.PostMetadata(ctx, "unknown", badMetadata)
const maxValueLen = 64 << 10
tooLongValueMetadata := wantMetadata1
tooLongValueMetadata.Value = strings.Repeat("a", maxValueLen*2)
tooLongValueMetadata.Error = ""
tooLongValueMetadata.CollectedAt = time.Now()
post("foo3", tooLongValueMetadata)
got := recvUpdate()[2]
require.Len(t, got.Result.Value, maxValueLen)
require.NotEmpty(t, got.Result.Error)

unknownKeyMetadata := wantMetadata1
err = agentClient.PostMetadata(ctx, "unknown", unknownKeyMetadata)
require.Error(t, err)
}
Loading