Skip to content

feat: add agent metadata #6614

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 85 commits into from
Mar 31, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
85 commits
Select commit Hold shift + click to select a range
ca8177f
Start writing docs
ammario Mar 14, 2023
a4bbb6e
regenerate testdata
ammario Mar 15, 2023
3cb3b74
Fixup provisioner
ammario Mar 15, 2023
826cca3
Rename Agent Metadata to Agent Manifest
ammario Mar 15, 2023
c30f1c6
WIP — agent report metadata loop
ammario Mar 15, 2023
41ce694
Merge remote-tracking branch 'origin/main' into agent-metadata
ammario Mar 15, 2023
592b8a5
Finish godloop
ammario Mar 15, 2023
658f5b2
WIP agent tests
ammario Mar 15, 2023
465e0d8
Terraform tests pass!
ammario Mar 15, 2023
d0156b3
Add Post metadata endpoint to API
ammario Mar 16, 2023
361baf1
Merge remote-tracking branch 'origin/main' into agent-metadata
ammario Mar 21, 2023
7a0541c
Test setting metadata
ammario Mar 21, 2023
16cd11e
Create watch endpoint
ammario Mar 21, 2023
c840962
Watch tests pass!!
ammario Mar 21, 2023
e4a5dd1
WIP DB refinement
ammario Mar 22, 2023
5ef5671
Upsert
ammario Mar 22, 2023
34935c5
Correctly insert metadata into db
ammario Mar 22, 2023
555ee66
Return complete manifest
ammario Mar 22, 2023
14f898f
Manually verified value in DB is getting updated
ammario Mar 22, 2023
0834cc6
It shows and it glows
ammario Mar 22, 2023
f625783
Don't show stale data
ammario Mar 23, 2023
00cca25
The frontend lays out nicely
ammario Mar 23, 2023
74eb373
Add provisioner/terraform
ammario Mar 23, 2023
1c6245d
Add fixture
ammario Mar 23, 2023
85d4738
Continue beautifying
ammario Mar 23, 2023
e8cd58e
Fix clock skew issues
ammario Mar 23, 2023
d681e24
Merge remote-tracking branch 'origin/main' into agent-metadata
ammario Mar 23, 2023
cee332b
WIP mock eventsource
ammario Mar 23, 2023
95aeccb
Remove redundant "key" in MetadataResult
ammario Mar 23, 2023
390e3c9
Fix component render bug
ammario Mar 23, 2023
3582175
Fix it even better
ammario Mar 23, 2023
2dbd84c
WIP story build out
ammario Mar 23, 2023
967e347
WIP DONT PUSH
ammario Mar 23, 2023
a80541f
Popover
ammario Mar 23, 2023
206220e
Merge remote-tracking branch 'origin/main' into agent-metadata
ammario Mar 24, 2023
e1e992d
Make it look.... ok again
ammario Mar 24, 2023
4f42d4b
It looks OK
ammario Mar 24, 2023
1c6d7b3
Start working on tooltip
ammario Mar 24, 2023
5e614f5
It's all passable
ammario Mar 24, 2023
510524d
It builds!
ammario Mar 24, 2023
93de24e
Harden interval conversion in agent
ammario Mar 24, 2023
baa157f
Merge remote-tracking branch 'origin/main' into agent-metadata
ammario Mar 24, 2023
495a38e
Merge remote-tracking branch 'origin/main' into agent-metadata
ammario Mar 27, 2023
d1ff3dc
fix compilation
ammario Mar 27, 2023
1213212
windows
ammario Mar 27, 2023
1423f26
Simplify windows
ammario Mar 27, 2023
ec429fb
improve formatting
ammario Mar 27, 2023
d599851
fix
ammario Mar 27, 2023
1300009
Increase timeout for windows
ammario Mar 27, 2023
942ec2f
Merge remote-tracking branch 'origin/main' into agent-metadata
ammario Mar 27, 2023
95c8ada
make fmt
ammario Mar 27, 2023
b7ddb4a
code cleanup in agent/
ammario Mar 27, 2023
8d1ab16
Revert Upsert change
ammario Mar 27, 2023
12d8f71
Revert "Revert Upsert change"
ammario Mar 27, 2023
90687ce
Fix fixture name
ammario Mar 27, 2023
2ef0b55
Minor fixups
ammario Mar 27, 2023
4289a6a
Start working on docs
ammario Mar 28, 2023
9c6db22
Make more docs progress
ammario Mar 28, 2023
d4132ec
Merge remote-tracking branch 'origin/main' into agent-metadata
ammario Mar 28, 2023
eefd631
Fix ErrNoRows
ammario Mar 28, 2023
873e5f0
Add a bunch of examples
ammario Mar 28, 2023
fc3d8cf
Explain dstat
ammario Mar 28, 2023
1986662
docs: improve formatting
ammario Mar 28, 2023
dc631f5
Address review comments
ammario Mar 28, 2023
55a3f63
Merge remote-tracking branch 'origin/main' into agent-metadata
ammario Mar 28, 2023
d8d5c06
nit
ammario Mar 28, 2023
64be182
fixup! nit
ammario Mar 28, 2023
06d26b5
improve synchronization in metadata loop
ammario Mar 29, 2023
ed9257f
explain collected at skip
ammario Mar 29, 2023
6a7b5cb
typo
ammario Mar 29, 2023
6ef1e81
document collection loop
ammario Mar 29, 2023
c9aa5a4
make fmt
ammario Mar 29, 2023
f771a27
Merge remote-tracking branch 'origin/main' into agent-metadata
ammario Mar 29, 2023
012a6a2
UNLOG table
ammario Mar 29, 2023
fdce29f
make gen
ammario Mar 30, 2023
9ae8650
Merge remote-tracking branch 'origin/main' into agent-metadata
ammario Mar 30, 2023
66468d3
Go on a multi-route tangent
ammario Mar 30, 2023
f861771
Revert "Go on a multi-route tangent"
ammario Mar 30, 2023
2e82543
Pass swagger
ammario Mar 30, 2023
40cd260
Make concurrency more robust
ammario Mar 30, 2023
f8e1f34
Improve concurrency a bit more!
ammario Mar 31, 2023
c15d364
Versioning chores...
ammario Mar 31, 2023
7d852d1
Merge remote-tracking branch 'origin/main' into agent-metadata
ammario Mar 31, 2023
29524b9
make fmt
ammario Mar 31, 2023
67b5a39
Fix lint
ammario Mar 31, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
Prev Previous commit
Next Next commit
Merge remote-tracking branch 'origin/main' into agent-metadata
  • Loading branch information
ammario committed Mar 24, 2023
commit 206220e65d20363f505e70aa59d155f017cd9975
2 changes: 1 addition & 1 deletion .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ jobs:
echo "cover=false" >> $GITHUB_OUTPUT
fi

gotestsum --junitfile="gotests.xml" --packages="./..." -- -parallel=8 -timeout=5m -short -failfast $COVERAGE_FLAGS
gotestsum --junitfile="gotests.xml" --packages="./..." -- -parallel=8 -timeout=7m -short -failfast $COVERAGE_FLAGS

- uses: actions/upload-artifact@v3
if: success() || failure()
Expand Down
4 changes: 1 addition & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -501,8 +501,6 @@ docs/admin/prometheus.md: scripts/metricsdocgen/main.go scripts/metricsdocgen/me
yarn run format:write:only ../docs/admin/prometheus.md

docs/cli.md: scripts/clidocgen/main.go $(GO_SRC_FILES) docs/manifest.json
# TODO(@ammario): re-enable server.md once we finish clibase migration.
ls ./docs/cli/*.md | grep -vP "\/coder_server" | xargs rm
BASE_PATH="." go run ./scripts/clidocgen
cd site
yarn run format:write:only ../docs/cli.md ../docs/cli/*.md ../docs/manifest.json
Expand All @@ -519,7 +517,7 @@ coderd/apidoc/swagger.json: $(shell find ./scripts/apidocgen $(FIND_EXCLUSIONS)
update-golden-files: cli/testdata/.gen-golden helm/tests/testdata/.gen-golden
.PHONY: update-golden-files

cli/testdata/.gen-golden: $(wildcard cli/testdata/*.golden) $(GO_SRC_FILES)
cli/testdata/.gen-golden: $(wildcard cli/testdata/*.golden) $(wildcard cli/*.tpl) $(GO_SRC_FILES)
go test ./cli -run=TestCommandHelp -update
touch "$@"

Expand Down
141 changes: 138 additions & 3 deletions agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ import (
"cdr.dev/slog"
"github.com/coder/coder/agent/usershell"
"github.com/coder/coder/buildinfo"
"github.com/coder/coder/coderd/database"
"github.com/coder/coder/coderd/gitauth"
"github.com/coder/coder/codersdk"
"github.com/coder/coder/codersdk/agentsdk"
Expand Down Expand Up @@ -91,6 +92,7 @@ type Client interface {
PostAppHealth(ctx context.Context, req agentsdk.PostAppHealthsRequest) error
PostStartup(ctx context.Context, req agentsdk.PostStartupRequest) error
PostMetadata(ctx context.Context, key string, req agentsdk.PostMetadataRequest) error
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If someone has a lot of metadata, I could see us wanting to debounce some of the requests... e.g. 5 things polling each second * 1000 workspaces = a lot of requests

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I considered this but didn't want to pursue optimizing it immediately because the feature is very optional and there's a clear path to adding a WebSocket endpoint to improve performance when the time comes.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A WebSocket endpoint would add substantial complexity (making this harder to review...) and bug risk.

PatchStartupLogs(ctx context.Context, req agentsdk.PatchStartupLogs) error
}

func New(options Options) io.Closer {
Expand Down Expand Up @@ -793,13 +795,32 @@ func (a *agent) runScript(ctx context.Context, lifecycle, script string) error {
}

a.logger.Info(ctx, "running script", slog.F("lifecycle", lifecycle), slog.F("script", script))
writer, err := a.filesystem.OpenFile(filepath.Join(a.logDir, fmt.Sprintf("coder-%s-script.log", lifecycle)), os.O_CREATE|os.O_RDWR, 0o600)
fileWriter, err := a.filesystem.OpenFile(filepath.Join(a.logDir, fmt.Sprintf("coder-%s-script.log", lifecycle)), os.O_CREATE|os.O_RDWR, 0o600)
if err != nil {
return xerrors.Errorf("open %s script log file: %w", lifecycle, err)
}
defer func() {
_ = writer.Close()
_ = fileWriter.Close()
}()

var writer io.Writer = fileWriter
if lifecycle == "startup" {
// Create pipes for startup logs reader and writer
logsReader, logsWriter := io.Pipe()
defer func() {
_ = logsReader.Close()
}()
writer = io.MultiWriter(fileWriter, logsWriter)
flushedLogs, err := a.trackScriptLogs(ctx, logsReader)
if err != nil {
return xerrors.Errorf("track script logs: %w", err)
}
defer func() {
_ = logsWriter.Close()
<-flushedLogs
}()
}

cmd, err := a.createCommand(ctx, script, nil)
if err != nil {
return xerrors.Errorf("create command: %w", err)
Expand All @@ -815,10 +836,124 @@ func (a *agent) runScript(ctx context.Context, lifecycle, script string) error {

return xerrors.Errorf("run: %w", err)
}

return nil
}

func (a *agent) trackScriptLogs(ctx context.Context, reader io.Reader) (chan struct{}, error) {
// Initialize variables for log management
queuedLogs := make([]agentsdk.StartupLog, 0)
var flushLogsTimer *time.Timer
var logMutex sync.Mutex
logsFlushed := sync.NewCond(&sync.Mutex{})
var logsSending bool
defer func() {
logMutex.Lock()
if flushLogsTimer != nil {
flushLogsTimer.Stop()
}
logMutex.Unlock()
}()

// sendLogs function uploads the queued logs to the server
sendLogs := func() {
// Lock logMutex and check if logs are already being sent
logMutex.Lock()
if logsSending {
logMutex.Unlock()
return
}
if flushLogsTimer != nil {
flushLogsTimer.Stop()
}
if len(queuedLogs) == 0 {
logMutex.Unlock()
return
}
// Move the current queued logs to logsToSend and clear the queue
logsToSend := queuedLogs
logsSending = true
queuedLogs = make([]agentsdk.StartupLog, 0)
logMutex.Unlock()

// Retry uploading logs until successful or a specific error occurs
for r := retry.New(time.Second, 5*time.Second); r.Wait(ctx); {
err := a.client.PatchStartupLogs(ctx, agentsdk.PatchStartupLogs{
Logs: logsToSend,
})
if err == nil {
break
}
var sdkErr *codersdk.Error
if errors.As(err, &sdkErr) {
if sdkErr.StatusCode() == http.StatusRequestEntityTooLarge {
a.logger.Warn(ctx, "startup logs too large, dropping logs")
break
}
}
a.logger.Error(ctx, "upload startup logs", slog.Error(err), slog.F("to_send", logsToSend))
}
// Reset logsSending flag
logMutex.Lock()
logsSending = false
flushLogsTimer.Reset(100 * time.Millisecond)
logMutex.Unlock()
logsFlushed.Broadcast()
}
// queueLog function appends a log to the queue and triggers sendLogs if necessary
queueLog := func(log agentsdk.StartupLog) {
logMutex.Lock()
defer logMutex.Unlock()

// Append log to the queue
queuedLogs = append(queuedLogs, log)

// If there are more than 100 logs, send them immediately
if len(queuedLogs) > 100 {
// Don't early return after this, because we still want
// to reset the timer just in case logs come in while
// we're sending.
go sendLogs()
}
// Reset or set the flushLogsTimer to trigger sendLogs after 100 milliseconds
if flushLogsTimer != nil {
flushLogsTimer.Reset(100 * time.Millisecond)
return
}
flushLogsTimer = time.AfterFunc(100*time.Millisecond, sendLogs)
}

// It's important that we either flush or drop all logs before returning
// because the startup state is reported after flush.
//
// It'd be weird for the startup state to be ready, but logs are still
// coming in.
logsFinished := make(chan struct{})
err := a.trackConnGoroutine(func() {
scanner := bufio.NewScanner(reader)
for scanner.Scan() {
queueLog(agentsdk.StartupLog{
CreatedAt: database.Now(),
Output: scanner.Text(),
})
}
defer close(logsFinished)
logsFlushed.L.Lock()
for {
logMutex.Lock()
if len(queuedLogs) == 0 {
logMutex.Unlock()
break
}
logMutex.Unlock()
logsFlushed.Wait()
}
})
if err != nil {
return nil, xerrors.Errorf("track conn goroutine: %w", err)
}
return logsFinished, nil
}

func (a *agent) init(ctx context.Context) {
// Clients' should ignore the host key when connecting.
// The agent needs to authenticate with coderd to SSH,
Expand Down
118 changes: 89 additions & 29 deletions agent/agent_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ import (
"fmt"
"io"
"net"
"net/http"
"net/http/httptest"
"net/netip"
"os"
"os/exec"
Expand All @@ -32,15 +34,14 @@ import (
"go.uber.org/goleak"
"golang.org/x/crypto/ssh"
"golang.org/x/exp/maps"
"golang.org/x/text/encoding/unicode"
"golang.org/x/text/transform"
"golang.org/x/xerrors"
"tailscale.com/net/speedtest"
"tailscale.com/tailcfg"

"cdr.dev/slog"
"cdr.dev/slog/sloggers/slogtest"
"github.com/coder/coder/agent"
"github.com/coder/coder/coderd/httpapi"
"github.com/coder/coder/codersdk"
"github.com/coder/coder/codersdk/agentsdk"
"github.com/coder/coder/pty/ptytest"
Expand Down Expand Up @@ -740,37 +741,78 @@ func TestAgent_SSHConnectionEnvVars(t *testing.T) {

func TestAgent_StartupScript(t *testing.T) {
t.Parallel()
output := "something"
command := "sh -c 'echo " + output + "'"
if runtime.GOOS == "windows" {
t.Skip("This test doesn't work on Windows for some reason...")
command = "cmd.exe /c echo " + output
}
content := "output"
//nolint:dogsled
_, _, _, fs, _ := setupAgent(t, agentsdk.Manifest{
StartupScript: "echo " + content,
}, 0)
var gotContent string
require.Eventually(t, func() bool {
outputPath := filepath.Join(os.TempDir(), "coder-startup-script.log")
content, err := afero.ReadFile(fs, outputPath)
if err != nil {
t.Logf("read file %q: %s", outputPath, err)
return false
}
if len(content) == 0 {
t.Logf("no content in %q", outputPath)
return false
t.Run("Success", func(t *testing.T) {
t.Parallel()
client := &client{
t: t,
agentID: uuid.New(),
manifest: agentsdk.Manifest{
StartupScript: command,
DERPMap: &tailcfg.DERPMap{},
},
statsChan: make(chan *agentsdk.Stats),
coordinator: tailnet.NewCoordinator(),
}
if runtime.GOOS == "windows" {
// Windows uses UTF16! 🪟🪟🪟
content, _, err = transform.Bytes(unicode.UTF16(unicode.LittleEndian, unicode.UseBOM).NewDecoder(), content)
if !assert.NoError(t, err) {
return false
}
closer := agent.New(agent.Options{
Client: client,
Filesystem: afero.NewMemMapFs(),
Logger: slogtest.Make(t, nil).Named("agent").Leveled(slog.LevelDebug),
ReconnectingPTYTimeout: 0,
})
t.Cleanup(func() {
_ = closer.Close()
})
assert.Eventually(t, func() bool {
got := client.getLifecycleStates()
return len(got) > 0 && got[len(got)-1] == codersdk.WorkspaceAgentLifecycleReady
}, testutil.WaitShort, testutil.IntervalMedium)

require.Len(t, client.getStartupLogs(), 1)
require.Equal(t, output, client.getStartupLogs()[0].Output)
})
// This ensures that even when coderd sends back that the startup
// script has written too many lines it will still succeed!
t.Run("OverflowsAndSkips", func(t *testing.T) {
t.Parallel()
client := &client{
t: t,
agentID: uuid.New(),
manifest: agentsdk.Manifest{
StartupScript: command,
DERPMap: &tailcfg.DERPMap{},
},
patchWorkspaceLogs: func() error {
resp := httptest.NewRecorder()
httpapi.Write(context.Background(), resp, http.StatusRequestEntityTooLarge, codersdk.Response{
Message: "Too many lines!",
})
res := resp.Result()
defer res.Body.Close()
return codersdk.ReadBodyAsError(res)
},
statsChan: make(chan *agentsdk.Stats),
coordinator: tailnet.NewCoordinator(),
}
gotContent = string(content)
return true
}, testutil.WaitShort, testutil.IntervalMedium)
require.Equal(t, content, strings.TrimSpace(gotContent))
closer := agent.New(agent.Options{
Client: client,
Filesystem: afero.NewMemMapFs(),
Logger: slogtest.Make(t, nil).Named("agent").Leveled(slog.LevelDebug),
ReconnectingPTYTimeout: 0,
})
t.Cleanup(func() {
_ = closer.Close()
})
assert.Eventually(t, func() bool {
got := client.getLifecycleStates()
return len(got) > 0 && got[len(got)-1] == codersdk.WorkspaceAgentLifecycleReady
}, testutil.WaitShort, testutil.IntervalMedium)
require.Len(t, client.getStartupLogs(), 0)
})
}

func TestAgent_Metadata(t *testing.T) {
Expand Down Expand Up @@ -1590,10 +1632,12 @@ type client struct {
statsChan chan *agentsdk.Stats
coordinator tailnet.Coordinator
lastWorkspaceAgent func()
patchWorkspaceLogs func() error

mu sync.Mutex // Protects following.
lifecycleStates []codersdk.WorkspaceAgentLifecycle
startup agentsdk.PostStartupRequest
logs []agentsdk.StartupLog
}

func (c *client) Manifest(_ context.Context) (agentsdk.Manifest, error) {
Expand Down Expand Up @@ -1694,6 +1738,22 @@ func (c *client) PostStartup(_ context.Context, startup agentsdk.PostStartupRequ
return nil
}

func (c *client) getStartupLogs() []agentsdk.StartupLog {
c.mu.Lock()
defer c.mu.Unlock()
return c.logs
}

func (c *client) PatchStartupLogs(_ context.Context, logs agentsdk.PatchStartupLogs) error {
c.mu.Lock()
defer c.mu.Unlock()
if c.patchWorkspaceLogs != nil {
return c.patchWorkspaceLogs()
}
c.logs = append(c.logs, logs.Logs...)
return nil
}

// tempDirUnixSocket returns a temporary directory that can safely hold unix
// sockets (probably).
//
Expand Down
Loading
You are viewing a condensed version of this merge commit. You can view the full changes here.