Skip to content

feat: add agent metadata #6614

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 85 commits into from
Mar 31, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
85 commits
Select commit Hold shift + click to select a range
ca8177f
Start writing docs
ammario Mar 14, 2023
a4bbb6e
regenerate testdata
ammario Mar 15, 2023
3cb3b74
Fixup provisioner
ammario Mar 15, 2023
826cca3
Rename Agent Metadata to Agent Manifest
ammario Mar 15, 2023
c30f1c6
WIP — agent report metadata loop
ammario Mar 15, 2023
41ce694
Merge remote-tracking branch 'origin/main' into agent-metadata
ammario Mar 15, 2023
592b8a5
Finish godloop
ammario Mar 15, 2023
658f5b2
WIP agent tests
ammario Mar 15, 2023
465e0d8
Terraform tests pass!
ammario Mar 15, 2023
d0156b3
Add Post metadata endpoint to API
ammario Mar 16, 2023
361baf1
Merge remote-tracking branch 'origin/main' into agent-metadata
ammario Mar 21, 2023
7a0541c
Test setting metadata
ammario Mar 21, 2023
16cd11e
Create watch endpoint
ammario Mar 21, 2023
c840962
Watch tests pass!!
ammario Mar 21, 2023
e4a5dd1
WIP DB refinement
ammario Mar 22, 2023
5ef5671
Upsert
ammario Mar 22, 2023
34935c5
Correctly insert metadata into db
ammario Mar 22, 2023
555ee66
Return complete manifest
ammario Mar 22, 2023
14f898f
Manually verified value in DB is getting updated
ammario Mar 22, 2023
0834cc6
It shows and it glows
ammario Mar 22, 2023
f625783
Don't show stale data
ammario Mar 23, 2023
00cca25
The frontend lays out nicely
ammario Mar 23, 2023
74eb373
Add provisioner/terraform
ammario Mar 23, 2023
1c6245d
Add fixture
ammario Mar 23, 2023
85d4738
Continue beautifying
ammario Mar 23, 2023
e8cd58e
Fix clock skew issues
ammario Mar 23, 2023
d681e24
Merge remote-tracking branch 'origin/main' into agent-metadata
ammario Mar 23, 2023
cee332b
WIP mock eventsource
ammario Mar 23, 2023
95aeccb
Remove redundant "key" in MetadataResult
ammario Mar 23, 2023
390e3c9
Fix component render bug
ammario Mar 23, 2023
3582175
Fix it even better
ammario Mar 23, 2023
2dbd84c
WIP story build out
ammario Mar 23, 2023
967e347
WIP DONT PUSH
ammario Mar 23, 2023
a80541f
Popover
ammario Mar 23, 2023
206220e
Merge remote-tracking branch 'origin/main' into agent-metadata
ammario Mar 24, 2023
e1e992d
Make it look.... ok again
ammario Mar 24, 2023
4f42d4b
It looks OK
ammario Mar 24, 2023
1c6d7b3
Start working on tooltip
ammario Mar 24, 2023
5e614f5
It's all passable
ammario Mar 24, 2023
510524d
It builds!
ammario Mar 24, 2023
93de24e
Harden interval conversion in agent
ammario Mar 24, 2023
baa157f
Merge remote-tracking branch 'origin/main' into agent-metadata
ammario Mar 24, 2023
495a38e
Merge remote-tracking branch 'origin/main' into agent-metadata
ammario Mar 27, 2023
d1ff3dc
fix compilation
ammario Mar 27, 2023
1213212
windows
ammario Mar 27, 2023
1423f26
Simplify windows
ammario Mar 27, 2023
ec429fb
improve formatting
ammario Mar 27, 2023
d599851
fix
ammario Mar 27, 2023
1300009
Increase timeout for windows
ammario Mar 27, 2023
942ec2f
Merge remote-tracking branch 'origin/main' into agent-metadata
ammario Mar 27, 2023
95c8ada
make fmt
ammario Mar 27, 2023
b7ddb4a
code cleanup in agent/
ammario Mar 27, 2023
8d1ab16
Revert Upsert change
ammario Mar 27, 2023
12d8f71
Revert "Revert Upsert change"
ammario Mar 27, 2023
90687ce
Fix fixture name
ammario Mar 27, 2023
2ef0b55
Minor fixups
ammario Mar 27, 2023
4289a6a
Start working on docs
ammario Mar 28, 2023
9c6db22
Make more docs progress
ammario Mar 28, 2023
d4132ec
Merge remote-tracking branch 'origin/main' into agent-metadata
ammario Mar 28, 2023
eefd631
Fix ErrNoRows
ammario Mar 28, 2023
873e5f0
Add a bunch of examples
ammario Mar 28, 2023
fc3d8cf
Explain dstat
ammario Mar 28, 2023
1986662
docs: improve formatting
ammario Mar 28, 2023
dc631f5
Address review comments
ammario Mar 28, 2023
55a3f63
Merge remote-tracking branch 'origin/main' into agent-metadata
ammario Mar 28, 2023
d8d5c06
nit
ammario Mar 28, 2023
64be182
fixup! nit
ammario Mar 28, 2023
06d26b5
improve synchronization in metadata loop
ammario Mar 29, 2023
ed9257f
explain collected at skip
ammario Mar 29, 2023
6a7b5cb
typo
ammario Mar 29, 2023
6ef1e81
document collection loop
ammario Mar 29, 2023
c9aa5a4
make fmt
ammario Mar 29, 2023
f771a27
Merge remote-tracking branch 'origin/main' into agent-metadata
ammario Mar 29, 2023
012a6a2
UNLOG table
ammario Mar 29, 2023
fdce29f
make gen
ammario Mar 30, 2023
9ae8650
Merge remote-tracking branch 'origin/main' into agent-metadata
ammario Mar 30, 2023
66468d3
Go on a multi-route tangent
ammario Mar 30, 2023
f861771
Revert "Go on a multi-route tangent"
ammario Mar 30, 2023
2e82543
Pass swagger
ammario Mar 30, 2023
40cd260
Make concurrency more robust
ammario Mar 30, 2023
f8e1f34
Improve concurrency a bit more!
ammario Mar 31, 2023
c15d364
Versioning chores...
ammario Mar 31, 2023
7d852d1
Merge remote-tracking branch 'origin/main' into agent-metadata
ammario Mar 31, 2023
29524b9
make fmt
ammario Mar 31, 2023
67b5a39
Fix lint
ammario Mar 31, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
WIP — agent report metadata loop
  • Loading branch information
ammario committed Mar 15, 2023
commit c30f1c68ea36ed4faf49a443cad7f9979f54d250
87 changes: 56 additions & 31 deletions agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"encoding/binary"
"encoding/json"
"errors"
"flag"
"fmt"
"io"
"net"
Expand Down Expand Up @@ -87,6 +88,7 @@ type Client interface {
PostLifecycle(ctx context.Context, state agentsdk.PostLifecycleRequest) error
PostAppHealth(ctx context.Context, req agentsdk.PostAppHealthsRequest) error
PostStartup(ctx context.Context, req agentsdk.PostStartupRequest) error
PostMetadata(ctx context.Context, req agentsdk.PostMetadataRequest) error
}

func New(options Options) io.Closer {
Expand Down Expand Up @@ -152,8 +154,8 @@ type agent struct {
closed chan struct{}

envVars map[string]string
// metadata is atomic because values can change after reconnection.
metadata atomic.Value
// manifest is atomic because values can change after reconnection.
manifest atomic.Pointer[agentsdk.Manifest]
sessionToken atomic.Pointer[string]
sshServer *ssh.Server

Expand All @@ -178,6 +180,7 @@ type agent struct {
// failure, you'll want the agent to reconnect.
func (a *agent) runLoop(ctx context.Context) {
go a.reportLifecycleLoop(ctx)
go a.reportMetadataLoop(ctx)

for retrier := retry.New(100*time.Millisecond, 10*time.Second); retrier.Wait(ctx); {
a.logger.Info(ctx, "connecting to coderd")
Expand All @@ -200,6 +203,32 @@ func (a *agent) runLoop(ctx context.Context) {
}
}

func (a *agent) reportMetadata(ctx context.Context) error {
ma := a.manifest.Load()
tickers := make([]time.Ticker, 0, len(ma.Metadata))
}

func (a *agent) reportMetadataLoop(ctx context.Context) {
// In production, the minimum report interval is one second.
ticker := time.Second
if flag.Lookup("test.v") != nil {
ticker = time.Millisecond * 100
}
baseTicker := time.NewTicker(ticker)

for {
select {
case <-ctx.Done():
return
case <-baseTicker.C:
err := a.reportMetadata(ctx)
if err != nil {
a.logger.Error(ctx, "report metadata", slog.Error(err))
}
}
}
}

// reportLifecycleLoop reports the current lifecycle state once.
// Only the latest state is reported, intermediate states may be
// lost if the agent can't communicate with the API.
Expand Down Expand Up @@ -274,30 +303,30 @@ func (a *agent) run(ctx context.Context) error {
}
a.sessionToken.Store(&sessionToken)

metadata, err := a.client.Manifest(ctx)
manifest, err := a.client.Manifest(ctx)
if err != nil {
return xerrors.Errorf("fetch metadata: %w", err)
}
a.logger.Info(ctx, "fetched metadata", slog.F("metadata", metadata))
a.logger.Info(ctx, "fetched metadata", slog.F("metadata", manifest))

// Expand the directory and send it back to coderd so external
// applications that rely on the directory can use it.
//
// An example is VS Code Remote, which must know the directory
// before initializing a connection.
metadata.Directory, err = expandDirectory(metadata.Directory)
manifest.Directory, err = expandDirectory(manifest.Directory)
if err != nil {
return xerrors.Errorf("expand directory: %w", err)
}
err = a.client.PostStartup(ctx, agentsdk.PostStartupRequest{
Version: buildinfo.Version(),
ExpandedDirectory: metadata.Directory,
ExpandedDirectory: manifest.Directory,
})
if err != nil {
return xerrors.Errorf("update workspace agent version: %w", err)
}

oldMetadata := a.metadata.Swap(metadata)
oldMetadata := a.manifest.Swap(&manifest)

// The startup script should only execute on the first run!
if oldMetadata == nil {
Expand All @@ -307,7 +336,7 @@ func (a *agent) run(ctx context.Context) error {
// connect to a workspace that is not yet ready. We don't run this
// concurrently with the startup script to avoid conflicts between
// them.
if metadata.GitAuthConfigs > 0 {
if manifest.GitAuthConfigs > 0 {
// If this fails, we should consider surfacing the error in the
// startup log and setting the lifecycle state to be "start_error"
// (after startup script completion), but for now we'll just log it.
Expand All @@ -322,7 +351,7 @@ func (a *agent) run(ctx context.Context) error {
scriptStart := time.Now()
err = a.trackConnGoroutine(func() {
defer close(scriptDone)
scriptDone <- a.runStartupScript(ctx, metadata.StartupScript)
scriptDone <- a.runStartupScript(ctx, manifest.StartupScript)
})
if err != nil {
return xerrors.Errorf("track startup script: %w", err)
Expand All @@ -331,8 +360,8 @@ func (a *agent) run(ctx context.Context) error {
var timeout <-chan time.Time
// If timeout is zero, an older version of the coder
// provider was used. Otherwise a timeout is always > 0.
if metadata.StartupScriptTimeout > 0 {
t := time.NewTimer(metadata.StartupScriptTimeout)
if manifest.StartupScriptTimeout > 0 {
t := time.NewTimer(manifest.StartupScriptTimeout)
defer t.Stop()
timeout = t.C
}
Expand All @@ -349,7 +378,7 @@ func (a *agent) run(ctx context.Context) error {
return
}
// Only log if there was a startup script.
if metadata.StartupScript != "" {
if manifest.StartupScript != "" {
execTime := time.Since(scriptStart)
if err != nil {
a.logger.Warn(ctx, "startup script failed", slog.F("execution_time", execTime), slog.Error(err))
Expand All @@ -366,13 +395,13 @@ func (a *agent) run(ctx context.Context) error {
appReporterCtx, appReporterCtxCancel := context.WithCancel(ctx)
defer appReporterCtxCancel()
go NewWorkspaceAppHealthReporter(
a.logger, metadata.Apps, a.client.PostAppHealth)(appReporterCtx)
a.logger, manifest.Apps, a.client.PostAppHealth)(appReporterCtx)

a.closeMutex.Lock()
network := a.network
a.closeMutex.Unlock()
if network == nil {
network, err = a.createTailnet(ctx, metadata.DERPMap)
network, err = a.createTailnet(ctx, manifest.DERPMap)
if err != nil {
return xerrors.Errorf("create tailnet: %w", err)
}
Expand All @@ -391,7 +420,7 @@ func (a *agent) run(ctx context.Context) error {
a.startReportingConnectionStats(ctx)
} else {
// Update the DERP map!
network.SetDERPMap(metadata.DERPMap)
network.SetDERPMap(manifest.DERPMap)
}

a.logger.Debug(ctx, "running tailnet connection coordinator")
Expand Down Expand Up @@ -800,14 +829,10 @@ func (a *agent) createCommand(ctx context.Context, rawCommand string, env []stri
return nil, xerrors.Errorf("get user shell: %w", err)
}

rawMetadata := a.metadata.Load()
if rawMetadata == nil {
manifest := a.manifest.Load()
if manifest == nil {
return nil, xerrors.Errorf("no metadata was provided")
}
metadata, valid := rawMetadata.(agentsdk.Manifest)
if !valid {
return nil, xerrors.Errorf("metadata is the wrong type: %T", metadata)
}

// OpenSSH executes all commands with the users current shell.
// We replicate that behavior for IDE support.
Expand All @@ -829,7 +854,7 @@ func (a *agent) createCommand(ctx context.Context, rawCommand string, env []stri
}

cmd := exec.CommandContext(ctx, shell, args...)
cmd.Dir = metadata.Directory
cmd.Dir = manifest.Directory

// If the metadata directory doesn't exist, we run the command
// in the users home directory.
Expand Down Expand Up @@ -870,14 +895,14 @@ func (a *agent) createCommand(ctx context.Context, rawCommand string, env []stri

// This adds the ports dialog to code-server that enables
// proxying a port dynamically.
cmd.Env = append(cmd.Env, fmt.Sprintf("VSCODE_PROXY_URI=%s", metadata.VSCodePortProxyURI))
cmd.Env = append(cmd.Env, fmt.Sprintf("VSCODE_PROXY_URI=%s", manifest.VSCodePortProxyURI))

// Hide Coder message on code-server's "Getting Started" page
cmd.Env = append(cmd.Env, "CS_DISABLE_GETTING_STARTED_OVERRIDE=true")

// Load environment variables passed via the agent.
// These should override all variables we manually specify.
for envKey, value := range metadata.EnvironmentVariables {
for envKey, value := range manifest.EnvironmentVariables {
// Expanding environment variables allows for customization
// of the $PATH, among other variables. Customers can prepend
// or append to the $PATH, so allowing expand is required!
Expand Down Expand Up @@ -940,9 +965,9 @@ func (a *agent) handleSSHSession(session ssh.Session) (retErr error) {
session.DisablePTYEmulation()

if !isQuietLogin(session.RawCommand()) {
metadata, ok := a.metadata.Load().(agentsdk.Manifest)
if ok {
err = showMOTD(session, metadata.MOTDFile)
manifest := a.manifest.Load()
if manifest != nil {
err = showMOTD(session, manifest.MOTDFile)
if err != nil {
a.logger.Error(ctx, "show MOTD", slog.Error(err))
}
Expand Down Expand Up @@ -1330,19 +1355,19 @@ func (a *agent) Close() error {
a.setLifecycle(ctx, codersdk.WorkspaceAgentLifecycleShuttingDown)

lifecycleState := codersdk.WorkspaceAgentLifecycleOff
if metadata, ok := a.metadata.Load().(agentsdk.Manifest); ok && metadata.ShutdownScript != "" {
if manifest := a.manifest.Load(); manifest != nil && manifest.ShutdownScript != "" {
scriptDone := make(chan error, 1)
scriptStart := time.Now()
go func() {
defer close(scriptDone)
scriptDone <- a.runShutdownScript(ctx, metadata.ShutdownScript)
scriptDone <- a.runShutdownScript(ctx, manifest.ShutdownScript)
}()

var timeout <-chan time.Time
// If timeout is zero, an older version of the coder
// provider was used. Otherwise a timeout is always > 0.
if metadata.ShutdownScriptTimeout > 0 {
t := time.NewTimer(metadata.ShutdownScriptTimeout)
if manifest.ShutdownScriptTimeout > 0 {
t := time.NewTimer(manifest.ShutdownScriptTimeout)
defer t.Stop()
timeout = t.C
}
Expand Down
45 changes: 45 additions & 0 deletions agent/agent_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (
"github.com/stretchr/testify/require"
"go.uber.org/goleak"
"golang.org/x/crypto/ssh"
"golang.org/x/exp/maps"
"golang.org/x/text/encoding/unicode"
"golang.org/x/text/transform"
"golang.org/x/xerrors"
Expand Down Expand Up @@ -772,6 +773,36 @@ func TestAgent_StartupScript(t *testing.T) {
require.Equal(t, content, strings.TrimSpace(gotContent))
}

func TestAgent_Metadata(t *testing.T) {
t.Parallel()

//nolint:dogsled
_, client, _, _, _ := setupAgent(t, agentsdk.Manifest{
Metadata: []agentsdk.Metadata{
{
Key: "greeting",
Interval: time.Millisecond * 100,
Cmd: []string{"echo", "hello"},
},
{
Key: "bad",
Interval: time.Millisecond * 100,
Cmd: []string{"sh", "-c", "exit 1"},
},
},
}, 0)

var gotMd agentsdk.PostMetadataRequest
require.Eventually(t, func() bool {
gotMd = client.getMetadata()
return len(gotMd) == 2
}, testutil.WaitShort, testutil.IntervalMedium)

require.Equal(t, "hello", gotMd["greeting"].Value)
require.Empty(t, gotMd["bad"].Value)
require.Equal(t, "exit status 1", gotMd["bad"].Error)
}

func TestAgent_Lifecycle(t *testing.T) {
t.Parallel()

Expand Down Expand Up @@ -1492,6 +1523,7 @@ type client struct {
t *testing.T
agentID uuid.UUID
manifest agentsdk.Manifest
metadata agentsdk.PostMetadataRequest
statsChan chan *agentsdk.Stats
coordinator tailnet.Coordinator
lastWorkspaceAgent func()
Expand Down Expand Up @@ -1576,6 +1608,19 @@ func (c *client) getStartup() agentsdk.PostStartupRequest {
return c.startup
}

func (c *client) getMetadata() agentsdk.PostMetadataRequest {
c.mu.Lock()
defer c.mu.Unlock()
return maps.Clone(c.metadata)
}

func (c *client) PostMetadata(_ context.Context, req agentsdk.PostMetadataRequest) error {
c.mu.Lock()
defer c.mu.Unlock()
c.metadata = req
return nil
}

func (c *client) PostStartup(_ context.Context, startup agentsdk.PostStartupRequest) error {
c.mu.Lock()
defer c.mu.Unlock()
Expand Down
22 changes: 22 additions & 0 deletions codersdk/agentsdk/agentsdk.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,28 @@ type Metadata struct {
Interval time.Duration
}

type MetadataResult struct {
Key string
Value string
Error string
}

type PostMetadataRequest map[string]MetadataResult

func (c *Client) PostMetadata(ctx context.Context, req PostMetadataRequest) error {
res, err := c.SDK.Request(ctx, http.MethodPost, "/api/v2/workspaceagents/me/metadata", req)
if err != nil {
return xerrors.Errorf("execute request: %w", err)
}
defer res.Body.Close()

if res.StatusCode != http.StatusOK {
return codersdk.ReadBodyAsError(res)
}

return nil
}

type Manifest struct {
// GitAuthConfigs stores the number of Git configurations
// the Coder deployment has. If this number is >0, we
Expand Down