Skip to content

feat: add startup script logs to the ui #6558

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 34 commits into from
Mar 23, 2023
Merged
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
99d510c
Add startup script logs to the database
code-asher Feb 23, 2023
66c8ec3
Add coderd endpoints for startup script logs
code-asher Feb 23, 2023
1cc3e9d
Push startup script logs from agent
code-asher Feb 23, 2023
45d250f
Pull startup script logs on frontend
code-asher Feb 23, 2023
7fed360
Merge branch 'main' into startuplogs
kylecarbs Mar 10, 2023
7ce73aa
Rename queries
kylecarbs Mar 10, 2023
b86c400
Add constraint
kylecarbs Mar 11, 2023
0c4d2c3
Start creating log sending loop
kylecarbs Mar 13, 2023
1bb700f
Add log sending to the agent
kylecarbs Mar 13, 2023
736705f
Add tests for streaming logs
kylecarbs Mar 13, 2023
f741523
Shorten notify channel name
kylecarbs Mar 14, 2023
54c30be
Add FE
kylecarbs Mar 14, 2023
adb06ea
Improve bulk log performance
kylecarbs Mar 15, 2023
4061b13
Finish UI display
kylecarbs Mar 15, 2023
4c5b630
Fix startup log visibility
kylecarbs Mar 15, 2023
05d536c
Add warning for overflow
kylecarbs Mar 16, 2023
34fde1a
Fix agent queue logs overflow
kylecarbs Mar 17, 2023
379f1f4
Display staartup logs in a virtual DOM for performance
kylecarbs Mar 19, 2023
decde5c
Fix agent queue with loads of logs
kylecarbs Mar 20, 2023
d74457c
Merge branch 'main' into startuplogs
kylecarbs Mar 20, 2023
ac55f48
Fix authorize test
kylecarbs Mar 20, 2023
8d75963
Remove faulty test
kylecarbs Mar 20, 2023
cc715cd
Fix startup and shutdown reporting error
kylecarbs Mar 20, 2023
e3a4b2c
Fix gen
kylecarbs Mar 20, 2023
399dad7
Merge branch 'main' into startuplogs
kylecarbs Mar 22, 2023
45c0aca
Fix comments
kylecarbs Mar 22, 2023
5a0b15d
Periodically purge old database entries
kylecarbs Mar 23, 2023
b1b3fcb
Add test fixture for migration
kylecarbs Mar 23, 2023
6e1032c
Add Storybook
kylecarbs Mar 23, 2023
3762e8d
Check if there are logs when displaying features
kylecarbs Mar 23, 2023
f6b9fce
Fix startup component overflow gap
kylecarbs Mar 23, 2023
c48658c
Fix startup log wrapping
kylecarbs Mar 23, 2023
4ec1a0e
Merge branch 'main' into startuplogs
kylecarbs Mar 23, 2023
b55b7a1
Merge branch 'main' into startuplogs
kylecarbs Mar 23, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
141 changes: 138 additions & 3 deletions agent/agent.go
Original file line number Diff line number Diff line change
@@ -41,6 +41,7 @@ import (
"cdr.dev/slog"
"github.com/coder/coder/agent/usershell"
"github.com/coder/coder/buildinfo"
"github.com/coder/coder/coderd/database"
"github.com/coder/coder/coderd/gitauth"
"github.com/coder/coder/codersdk"
"github.com/coder/coder/codersdk/agentsdk"
@@ -88,6 +89,7 @@ type Client interface {
PostLifecycle(ctx context.Context, state agentsdk.PostLifecycleRequest) error
PostAppHealth(ctx context.Context, req agentsdk.PostAppHealthsRequest) error
PostStartup(ctx context.Context, req agentsdk.PostStartupRequest) error
PatchStartupLogs(ctx context.Context, req agentsdk.PatchStartupLogs) error
}

func New(options Options) io.Closer {
@@ -642,13 +644,32 @@ func (a *agent) runScript(ctx context.Context, lifecycle, script string) error {
}

a.logger.Info(ctx, "running script", slog.F("lifecycle", lifecycle), slog.F("script", script))
writer, err := a.filesystem.OpenFile(filepath.Join(a.logDir, fmt.Sprintf("coder-%s-script.log", lifecycle)), os.O_CREATE|os.O_RDWR, 0o600)
fileWriter, err := a.filesystem.OpenFile(filepath.Join(a.logDir, fmt.Sprintf("coder-%s-script.log", lifecycle)), os.O_CREATE|os.O_RDWR, 0o600)
if err != nil {
return xerrors.Errorf("open %s script log file: %w", lifecycle, err)
}
defer func() {
_ = writer.Close()
_ = fileWriter.Close()
}()

var writer io.Writer = fileWriter
if lifecycle == "startup" {
// Create pipes for startup logs reader and writer
logsReader, logsWriter := io.Pipe()
defer func() {
_ = logsReader.Close()
}()
writer = io.MultiWriter(fileWriter, logsWriter)
flushedLogs, err := a.trackScriptLogs(ctx, logsReader)
if err != nil {
return xerrors.Errorf("track script logs: %w", err)
}
defer func() {
_ = logsWriter.Close()
<-flushedLogs
}()
}

cmd, err := a.createCommand(ctx, script, nil)
if err != nil {
return xerrors.Errorf("create command: %w", err)
@@ -664,10 +685,124 @@ func (a *agent) runScript(ctx context.Context, lifecycle, script string) error {

return xerrors.Errorf("run: %w", err)
}

return nil
}

func (a *agent) trackScriptLogs(ctx context.Context, reader io.Reader) (chan struct{}, error) {
// Initialize variables for log management
queuedLogs := make([]agentsdk.StartupLog, 0)
var flushLogsTimer *time.Timer
var logMutex sync.Mutex
logsFlushed := sync.NewCond(&sync.Mutex{})
var logsSending bool
defer func() {
logMutex.Lock()
if flushLogsTimer != nil {
flushLogsTimer.Stop()
}
logMutex.Unlock()
}()

// sendLogs function uploads the queued logs to the server
sendLogs := func() {
// Lock logMutex and check if logs are already being sent
logMutex.Lock()
if logsSending {
logMutex.Unlock()
return
}
if flushLogsTimer != nil {
flushLogsTimer.Stop()
}
if len(queuedLogs) == 0 {
logMutex.Unlock()
return
}
// Move the current queued logs to logsToSend and clear the queue
logsToSend := queuedLogs
logsSending = true
queuedLogs = make([]agentsdk.StartupLog, 0)
logMutex.Unlock()

// Retry uploading logs until successful or a specific error occurs
for r := retry.New(time.Second, 5*time.Second); r.Wait(ctx); {
err := a.client.PatchStartupLogs(ctx, agentsdk.PatchStartupLogs{
Logs: logsToSend,
})
if err == nil {
break
}
var sdkErr *codersdk.Error
if errors.As(err, &sdkErr) {
if sdkErr.StatusCode() == http.StatusRequestEntityTooLarge {
a.logger.Warn(ctx, "startup logs too large, dropping logs")
break
}
}
a.logger.Error(ctx, "upload startup logs", slog.Error(err), slog.F("to_send", logsToSend))
}
// Reset logsSending flag
logMutex.Lock()
logsSending = false
flushLogsTimer.Reset(100 * time.Millisecond)
logMutex.Unlock()
logsFlushed.Broadcast()
}
// queueLog function appends a log to the queue and triggers sendLogs if necessary
queueLog := func(log agentsdk.StartupLog) {
logMutex.Lock()
defer logMutex.Unlock()

// Append log to the queue
queuedLogs = append(queuedLogs, log)

// If there are more than 100 logs, send them immediately
if len(queuedLogs) > 100 {
// Don't early return after this, because we still want
// to reset the timer just in case logs come in while
// we're sending.
go sendLogs()
}
// Reset or set the flushLogsTimer to trigger sendLogs after 100 milliseconds
if flushLogsTimer != nil {
flushLogsTimer.Reset(100 * time.Millisecond)
return
}
flushLogsTimer = time.AfterFunc(100*time.Millisecond, sendLogs)
}

// It's important that we either flush or drop all logs before returning
// because the startup state is reported after flush.
//
// It'd be weird for the startup state to be ready, but logs are still
// coming in.
logsFinished := make(chan struct{})
err := a.trackConnGoroutine(func() {
scanner := bufio.NewScanner(reader)
for scanner.Scan() {
queueLog(agentsdk.StartupLog{
CreatedAt: database.Now(),
Output: scanner.Text(),
})
}
defer close(logsFinished)
logsFlushed.L.Lock()
for {
logMutex.Lock()
if len(queuedLogs) == 0 {
logMutex.Unlock()
break
}
logMutex.Unlock()
logsFlushed.Wait()
}
})
if err != nil {
return nil, xerrors.Errorf("track conn goroutine: %w", err)
}
return logsFinished, nil
}

func (a *agent) init(ctx context.Context) {
// Clients' should ignore the host key when connecting.
// The agent needs to authenticate with coderd to SSH,
118 changes: 89 additions & 29 deletions agent/agent_test.go
Original file line number Diff line number Diff line change
@@ -8,6 +8,8 @@ import (
"fmt"
"io"
"net"
"net/http"
"net/http/httptest"
"net/netip"
"os"
"os/exec"
@@ -31,15 +33,14 @@ import (
"github.com/stretchr/testify/require"
"go.uber.org/goleak"
"golang.org/x/crypto/ssh"
"golang.org/x/text/encoding/unicode"
"golang.org/x/text/transform"
"golang.org/x/xerrors"
"tailscale.com/net/speedtest"
"tailscale.com/tailcfg"

"cdr.dev/slog"
"cdr.dev/slog/sloggers/slogtest"
"github.com/coder/coder/agent"
"github.com/coder/coder/coderd/httpapi"
"github.com/coder/coder/codersdk"
"github.com/coder/coder/codersdk/agentsdk"
"github.com/coder/coder/pty/ptytest"
@@ -739,37 +740,78 @@ func TestAgent_SSHConnectionEnvVars(t *testing.T) {

func TestAgent_StartupScript(t *testing.T) {
t.Parallel()
output := "something"
command := "sh -c 'echo " + output + "'"
if runtime.GOOS == "windows" {
t.Skip("This test doesn't work on Windows for some reason...")
command = "cmd.exe /c echo " + output
}
content := "output"
//nolint:dogsled
_, _, _, fs, _ := setupAgent(t, agentsdk.Metadata{
StartupScript: "echo " + content,
}, 0)
var gotContent string
require.Eventually(t, func() bool {
outputPath := filepath.Join(os.TempDir(), "coder-startup-script.log")
content, err := afero.ReadFile(fs, outputPath)
if err != nil {
t.Logf("read file %q: %s", outputPath, err)
return false
}
if len(content) == 0 {
t.Logf("no content in %q", outputPath)
return false
t.Run("Success", func(t *testing.T) {
t.Parallel()
client := &client{
t: t,
agentID: uuid.New(),
metadata: agentsdk.Metadata{
StartupScript: command,
DERPMap: &tailcfg.DERPMap{},
},
statsChan: make(chan *agentsdk.Stats),
coordinator: tailnet.NewCoordinator(),
}
if runtime.GOOS == "windows" {
// Windows uses UTF16! 🪟🪟🪟
content, _, err = transform.Bytes(unicode.UTF16(unicode.LittleEndian, unicode.UseBOM).NewDecoder(), content)
if !assert.NoError(t, err) {
return false
}
closer := agent.New(agent.Options{
Client: client,
Filesystem: afero.NewMemMapFs(),
Logger: slogtest.Make(t, nil).Named("agent").Leveled(slog.LevelDebug),
ReconnectingPTYTimeout: 0,
})
t.Cleanup(func() {
_ = closer.Close()
})
assert.Eventually(t, func() bool {
got := client.getLifecycleStates()
return len(got) > 0 && got[len(got)-1] == codersdk.WorkspaceAgentLifecycleReady
}, testutil.WaitShort, testutil.IntervalMedium)

require.Len(t, client.getStartupLogs(), 1)
require.Equal(t, output, client.getStartupLogs()[0].Output)
})
// This ensures that even when coderd sends back that the startup
// script has written too many lines it will still succeed!
t.Run("OverflowsAndSkips", func(t *testing.T) {
t.Parallel()
client := &client{
t: t,
agentID: uuid.New(),
metadata: agentsdk.Metadata{
StartupScript: command,
DERPMap: &tailcfg.DERPMap{},
},
patchWorkspaceLogs: func() error {
resp := httptest.NewRecorder()
httpapi.Write(context.Background(), resp, http.StatusRequestEntityTooLarge, codersdk.Response{
Message: "Too many lines!",
})
res := resp.Result()
defer res.Body.Close()
return codersdk.ReadBodyAsError(res)
},
statsChan: make(chan *agentsdk.Stats),
coordinator: tailnet.NewCoordinator(),
}
gotContent = string(content)
return true
}, testutil.WaitShort, testutil.IntervalMedium)
require.Equal(t, content, strings.TrimSpace(gotContent))
closer := agent.New(agent.Options{
Client: client,
Filesystem: afero.NewMemMapFs(),
Logger: slogtest.Make(t, nil).Named("agent").Leveled(slog.LevelDebug),
ReconnectingPTYTimeout: 0,
})
t.Cleanup(func() {
_ = closer.Close()
})
assert.Eventually(t, func() bool {
got := client.getLifecycleStates()
return len(got) > 0 && got[len(got)-1] == codersdk.WorkspaceAgentLifecycleReady
}, testutil.WaitShort, testutil.IntervalMedium)
require.Len(t, client.getStartupLogs(), 0)
})
}

func TestAgent_Lifecycle(t *testing.T) {
@@ -1495,10 +1537,12 @@ type client struct {
statsChan chan *agentsdk.Stats
coordinator tailnet.Coordinator
lastWorkspaceAgent func()
patchWorkspaceLogs func() error

mu sync.Mutex // Protects following.
lifecycleStates []codersdk.WorkspaceAgentLifecycle
startup agentsdk.PostStartupRequest
logs []agentsdk.StartupLog
}

func (c *client) Metadata(_ context.Context) (agentsdk.Metadata, error) {
@@ -1583,6 +1627,22 @@ func (c *client) PostStartup(_ context.Context, startup agentsdk.PostStartupRequ
return nil
}

func (c *client) getStartupLogs() []agentsdk.StartupLog {
c.mu.Lock()
defer c.mu.Unlock()
return c.logs
}

func (c *client) PatchStartupLogs(_ context.Context, logs agentsdk.PatchStartupLogs) error {
c.mu.Lock()
defer c.mu.Unlock()
if c.patchWorkspaceLogs != nil {
return c.patchWorkspaceLogs()
}
c.logs = append(c.logs, logs.Logs...)
return nil
}

// tempDirUnixSocket returns a temporary directory that can safely hold unix
// sockets (probably).
//
5 changes: 4 additions & 1 deletion cli/agent.go
Original file line number Diff line number Diff line change
@@ -118,7 +118,10 @@ func workspaceAgent() *cobra.Command {
client := agentsdk.New(coderURL)
client.SDK.Logger = logger
// Set a reasonable timeout so requests can't hang forever!
client.SDK.HTTPClient.Timeout = 10 * time.Second
// The timeout needs to be reasonably long, because requests
// with large payloads can take a bit. e.g. startup scripts
// may take a while to insert.
client.SDK.HTTPClient.Timeout = 30 * time.Second

// Enable pprof handler
// This prevents the pprof import from being accidentally deleted.
5 changes: 5 additions & 0 deletions cli/server.go
Original file line number Diff line number Diff line change
@@ -65,6 +65,7 @@ import (
"github.com/coder/coder/coderd/autobuild/executor"
"github.com/coder/coder/coderd/database"
"github.com/coder/coder/coderd/database/dbfake"
"github.com/coder/coder/coderd/database/dbpurge"
"github.com/coder/coder/coderd/database/migrations"
"github.com/coder/coder/coderd/devtunnel"
"github.com/coder/coder/coderd/gitauth"
@@ -993,6 +994,10 @@ flags, and YAML configuration. The precedence is as follows:
shutdownConnsCtx, shutdownConns := context.WithCancel(ctx)
defer shutdownConns()

// Ensures that old database entries are cleaned up over time!
purger := dbpurge.New(ctx, logger, options.Database)
defer purger.Close()

// Wrap the server in middleware that redirects to the access URL if
// the request is not to a local IP.
var handler http.Handler = coderAPI.RootHandler
Loading