Skip to content

Commit 206220e

Browse files
committed
Merge remote-tracking branch 'origin/main' into agent-metadata
2 parents a80541f + 9822745 commit 206220e

File tree

394 files changed

+13856
-8818
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

394 files changed

+13856
-8818
lines changed

.github/workflows/ci.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,7 @@ jobs:
302302
echo "cover=false" >> $GITHUB_OUTPUT
303303
fi
304304
305-
gotestsum --junitfile="gotests.xml" --packages="./..." -- -parallel=8 -timeout=5m -short -failfast $COVERAGE_FLAGS
305+
gotestsum --junitfile="gotests.xml" --packages="./..." -- -parallel=8 -timeout=7m -short -failfast $COVERAGE_FLAGS
306306
307307
- uses: actions/upload-artifact@v3
308308
if: success() || failure()

Makefile

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -501,8 +501,6 @@ docs/admin/prometheus.md: scripts/metricsdocgen/main.go scripts/metricsdocgen/me
501501
yarn run format:write:only ../docs/admin/prometheus.md
502502

503503
docs/cli.md: scripts/clidocgen/main.go $(GO_SRC_FILES) docs/manifest.json
504-
# TODO(@ammario): re-enable server.md once we finish clibase migration.
505-
ls ./docs/cli/*.md | grep -vP "\/coder_server" | xargs rm
506504
BASE_PATH="." go run ./scripts/clidocgen
507505
cd site
508506
yarn run format:write:only ../docs/cli.md ../docs/cli/*.md ../docs/manifest.json
@@ -519,7 +517,7 @@ coderd/apidoc/swagger.json: $(shell find ./scripts/apidocgen $(FIND_EXCLUSIONS)
519517
update-golden-files: cli/testdata/.gen-golden helm/tests/testdata/.gen-golden
520518
.PHONY: update-golden-files
521519

522-
cli/testdata/.gen-golden: $(wildcard cli/testdata/*.golden) $(GO_SRC_FILES)
520+
cli/testdata/.gen-golden: $(wildcard cli/testdata/*.golden) $(wildcard cli/*.tpl) $(GO_SRC_FILES)
523521
go test ./cli -run=TestCommandHelp -update
524522
touch "$@"
525523

agent/agent.go

Lines changed: 138 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ import (
4343
"cdr.dev/slog"
4444
"github.com/coder/coder/agent/usershell"
4545
"github.com/coder/coder/buildinfo"
46+
"github.com/coder/coder/coderd/database"
4647
"github.com/coder/coder/coderd/gitauth"
4748
"github.com/coder/coder/codersdk"
4849
"github.com/coder/coder/codersdk/agentsdk"
@@ -91,6 +92,7 @@ type Client interface {
9192
PostAppHealth(ctx context.Context, req agentsdk.PostAppHealthsRequest) error
9293
PostStartup(ctx context.Context, req agentsdk.PostStartupRequest) error
9394
PostMetadata(ctx context.Context, key string, req agentsdk.PostMetadataRequest) error
95+
PatchStartupLogs(ctx context.Context, req agentsdk.PatchStartupLogs) error
9496
}
9597

9698
func New(options Options) io.Closer {
@@ -793,13 +795,32 @@ func (a *agent) runScript(ctx context.Context, lifecycle, script string) error {
793795
}
794796

795797
a.logger.Info(ctx, "running script", slog.F("lifecycle", lifecycle), slog.F("script", script))
796-
writer, err := a.filesystem.OpenFile(filepath.Join(a.logDir, fmt.Sprintf("coder-%s-script.log", lifecycle)), os.O_CREATE|os.O_RDWR, 0o600)
798+
fileWriter, err := a.filesystem.OpenFile(filepath.Join(a.logDir, fmt.Sprintf("coder-%s-script.log", lifecycle)), os.O_CREATE|os.O_RDWR, 0o600)
797799
if err != nil {
798800
return xerrors.Errorf("open %s script log file: %w", lifecycle, err)
799801
}
800802
defer func() {
801-
_ = writer.Close()
803+
_ = fileWriter.Close()
802804
}()
805+
806+
var writer io.Writer = fileWriter
807+
if lifecycle == "startup" {
808+
// Create pipes for startup logs reader and writer
809+
logsReader, logsWriter := io.Pipe()
810+
defer func() {
811+
_ = logsReader.Close()
812+
}()
813+
writer = io.MultiWriter(fileWriter, logsWriter)
814+
flushedLogs, err := a.trackScriptLogs(ctx, logsReader)
815+
if err != nil {
816+
return xerrors.Errorf("track script logs: %w", err)
817+
}
818+
defer func() {
819+
_ = logsWriter.Close()
820+
<-flushedLogs
821+
}()
822+
}
823+
803824
cmd, err := a.createCommand(ctx, script, nil)
804825
if err != nil {
805826
return xerrors.Errorf("create command: %w", err)
@@ -815,10 +836,124 @@ func (a *agent) runScript(ctx context.Context, lifecycle, script string) error {
815836

816837
return xerrors.Errorf("run: %w", err)
817838
}
818-
819839
return nil
820840
}
821841

842+
func (a *agent) trackScriptLogs(ctx context.Context, reader io.Reader) (chan struct{}, error) {
843+
// Initialize variables for log management
844+
queuedLogs := make([]agentsdk.StartupLog, 0)
845+
var flushLogsTimer *time.Timer
846+
var logMutex sync.Mutex
847+
logsFlushed := sync.NewCond(&sync.Mutex{})
848+
var logsSending bool
849+
defer func() {
850+
logMutex.Lock()
851+
if flushLogsTimer != nil {
852+
flushLogsTimer.Stop()
853+
}
854+
logMutex.Unlock()
855+
}()
856+
857+
// sendLogs function uploads the queued logs to the server
858+
sendLogs := func() {
859+
// Lock logMutex and check if logs are already being sent
860+
logMutex.Lock()
861+
if logsSending {
862+
logMutex.Unlock()
863+
return
864+
}
865+
if flushLogsTimer != nil {
866+
flushLogsTimer.Stop()
867+
}
868+
if len(queuedLogs) == 0 {
869+
logMutex.Unlock()
870+
return
871+
}
872+
// Move the current queued logs to logsToSend and clear the queue
873+
logsToSend := queuedLogs
874+
logsSending = true
875+
queuedLogs = make([]agentsdk.StartupLog, 0)
876+
logMutex.Unlock()
877+
878+
// Retry uploading logs until successful or a specific error occurs
879+
for r := retry.New(time.Second, 5*time.Second); r.Wait(ctx); {
880+
err := a.client.PatchStartupLogs(ctx, agentsdk.PatchStartupLogs{
881+
Logs: logsToSend,
882+
})
883+
if err == nil {
884+
break
885+
}
886+
var sdkErr *codersdk.Error
887+
if errors.As(err, &sdkErr) {
888+
if sdkErr.StatusCode() == http.StatusRequestEntityTooLarge {
889+
a.logger.Warn(ctx, "startup logs too large, dropping logs")
890+
break
891+
}
892+
}
893+
a.logger.Error(ctx, "upload startup logs", slog.Error(err), slog.F("to_send", logsToSend))
894+
}
895+
// Reset logsSending flag
896+
logMutex.Lock()
897+
logsSending = false
898+
flushLogsTimer.Reset(100 * time.Millisecond)
899+
logMutex.Unlock()
900+
logsFlushed.Broadcast()
901+
}
902+
// queueLog function appends a log to the queue and triggers sendLogs if necessary
903+
queueLog := func(log agentsdk.StartupLog) {
904+
logMutex.Lock()
905+
defer logMutex.Unlock()
906+
907+
// Append log to the queue
908+
queuedLogs = append(queuedLogs, log)
909+
910+
// If there are more than 100 logs, send them immediately
911+
if len(queuedLogs) > 100 {
912+
// Don't early return after this, because we still want
913+
// to reset the timer just in case logs come in while
914+
// we're sending.
915+
go sendLogs()
916+
}
917+
// Reset or set the flushLogsTimer to trigger sendLogs after 100 milliseconds
918+
if flushLogsTimer != nil {
919+
flushLogsTimer.Reset(100 * time.Millisecond)
920+
return
921+
}
922+
flushLogsTimer = time.AfterFunc(100*time.Millisecond, sendLogs)
923+
}
924+
925+
// It's important that we either flush or drop all logs before returning
926+
// because the startup state is reported after flush.
927+
//
928+
// It'd be weird for the startup state to be ready, but logs are still
929+
// coming in.
930+
logsFinished := make(chan struct{})
931+
err := a.trackConnGoroutine(func() {
932+
scanner := bufio.NewScanner(reader)
933+
for scanner.Scan() {
934+
queueLog(agentsdk.StartupLog{
935+
CreatedAt: database.Now(),
936+
Output: scanner.Text(),
937+
})
938+
}
939+
defer close(logsFinished)
940+
logsFlushed.L.Lock()
941+
for {
942+
logMutex.Lock()
943+
if len(queuedLogs) == 0 {
944+
logMutex.Unlock()
945+
break
946+
}
947+
logMutex.Unlock()
948+
logsFlushed.Wait()
949+
}
950+
})
951+
if err != nil {
952+
return nil, xerrors.Errorf("track conn goroutine: %w", err)
953+
}
954+
return logsFinished, nil
955+
}
956+
822957
func (a *agent) init(ctx context.Context) {
823958
// Clients' should ignore the host key when connecting.
824959
// The agent needs to authenticate with coderd to SSH,

agent/agent_test.go

Lines changed: 89 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ import (
88
"fmt"
99
"io"
1010
"net"
11+
"net/http"
12+
"net/http/httptest"
1113
"net/netip"
1214
"os"
1315
"os/exec"
@@ -32,15 +34,14 @@ import (
3234
"go.uber.org/goleak"
3335
"golang.org/x/crypto/ssh"
3436
"golang.org/x/exp/maps"
35-
"golang.org/x/text/encoding/unicode"
36-
"golang.org/x/text/transform"
3737
"golang.org/x/xerrors"
3838
"tailscale.com/net/speedtest"
3939
"tailscale.com/tailcfg"
4040

4141
"cdr.dev/slog"
4242
"cdr.dev/slog/sloggers/slogtest"
4343
"github.com/coder/coder/agent"
44+
"github.com/coder/coder/coderd/httpapi"
4445
"github.com/coder/coder/codersdk"
4546
"github.com/coder/coder/codersdk/agentsdk"
4647
"github.com/coder/coder/pty/ptytest"
@@ -740,37 +741,78 @@ func TestAgent_SSHConnectionEnvVars(t *testing.T) {
740741

741742
func TestAgent_StartupScript(t *testing.T) {
742743
t.Parallel()
744+
output := "something"
745+
command := "sh -c 'echo " + output + "'"
743746
if runtime.GOOS == "windows" {
744-
t.Skip("This test doesn't work on Windows for some reason...")
747+
command = "cmd.exe /c echo " + output
745748
}
746-
content := "output"
747-
//nolint:dogsled
748-
_, _, _, fs, _ := setupAgent(t, agentsdk.Manifest{
749-
StartupScript: "echo " + content,
750-
}, 0)
751-
var gotContent string
752-
require.Eventually(t, func() bool {
753-
outputPath := filepath.Join(os.TempDir(), "coder-startup-script.log")
754-
content, err := afero.ReadFile(fs, outputPath)
755-
if err != nil {
756-
t.Logf("read file %q: %s", outputPath, err)
757-
return false
758-
}
759-
if len(content) == 0 {
760-
t.Logf("no content in %q", outputPath)
761-
return false
749+
t.Run("Success", func(t *testing.T) {
750+
t.Parallel()
751+
client := &client{
752+
t: t,
753+
agentID: uuid.New(),
754+
manifest: agentsdk.Manifest{
755+
StartupScript: command,
756+
DERPMap: &tailcfg.DERPMap{},
757+
},
758+
statsChan: make(chan *agentsdk.Stats),
759+
coordinator: tailnet.NewCoordinator(),
762760
}
763-
if runtime.GOOS == "windows" {
764-
// Windows uses UTF16! 🪟🪟🪟
765-
content, _, err = transform.Bytes(unicode.UTF16(unicode.LittleEndian, unicode.UseBOM).NewDecoder(), content)
766-
if !assert.NoError(t, err) {
767-
return false
768-
}
761+
closer := agent.New(agent.Options{
762+
Client: client,
763+
Filesystem: afero.NewMemMapFs(),
764+
Logger: slogtest.Make(t, nil).Named("agent").Leveled(slog.LevelDebug),
765+
ReconnectingPTYTimeout: 0,
766+
})
767+
t.Cleanup(func() {
768+
_ = closer.Close()
769+
})
770+
assert.Eventually(t, func() bool {
771+
got := client.getLifecycleStates()
772+
return len(got) > 0 && got[len(got)-1] == codersdk.WorkspaceAgentLifecycleReady
773+
}, testutil.WaitShort, testutil.IntervalMedium)
774+
775+
require.Len(t, client.getStartupLogs(), 1)
776+
require.Equal(t, output, client.getStartupLogs()[0].Output)
777+
})
778+
// This ensures that even when coderd sends back that the startup
779+
// script has written too many lines it will still succeed!
780+
t.Run("OverflowsAndSkips", func(t *testing.T) {
781+
t.Parallel()
782+
client := &client{
783+
t: t,
784+
agentID: uuid.New(),
785+
manifest: agentsdk.Manifest{
786+
StartupScript: command,
787+
DERPMap: &tailcfg.DERPMap{},
788+
},
789+
patchWorkspaceLogs: func() error {
790+
resp := httptest.NewRecorder()
791+
httpapi.Write(context.Background(), resp, http.StatusRequestEntityTooLarge, codersdk.Response{
792+
Message: "Too many lines!",
793+
})
794+
res := resp.Result()
795+
defer res.Body.Close()
796+
return codersdk.ReadBodyAsError(res)
797+
},
798+
statsChan: make(chan *agentsdk.Stats),
799+
coordinator: tailnet.NewCoordinator(),
769800
}
770-
gotContent = string(content)
771-
return true
772-
}, testutil.WaitShort, testutil.IntervalMedium)
773-
require.Equal(t, content, strings.TrimSpace(gotContent))
801+
closer := agent.New(agent.Options{
802+
Client: client,
803+
Filesystem: afero.NewMemMapFs(),
804+
Logger: slogtest.Make(t, nil).Named("agent").Leveled(slog.LevelDebug),
805+
ReconnectingPTYTimeout: 0,
806+
})
807+
t.Cleanup(func() {
808+
_ = closer.Close()
809+
})
810+
assert.Eventually(t, func() bool {
811+
got := client.getLifecycleStates()
812+
return len(got) > 0 && got[len(got)-1] == codersdk.WorkspaceAgentLifecycleReady
813+
}, testutil.WaitShort, testutil.IntervalMedium)
814+
require.Len(t, client.getStartupLogs(), 0)
815+
})
774816
}
775817

776818
func TestAgent_Metadata(t *testing.T) {
@@ -1590,10 +1632,12 @@ type client struct {
15901632
statsChan chan *agentsdk.Stats
15911633
coordinator tailnet.Coordinator
15921634
lastWorkspaceAgent func()
1635+
patchWorkspaceLogs func() error
15931636

15941637
mu sync.Mutex // Protects following.
15951638
lifecycleStates []codersdk.WorkspaceAgentLifecycle
15961639
startup agentsdk.PostStartupRequest
1640+
logs []agentsdk.StartupLog
15971641
}
15981642

15991643
func (c *client) Manifest(_ context.Context) (agentsdk.Manifest, error) {
@@ -1694,6 +1738,22 @@ func (c *client) PostStartup(_ context.Context, startup agentsdk.PostStartupRequ
16941738
return nil
16951739
}
16961740

1741+
func (c *client) getStartupLogs() []agentsdk.StartupLog {
1742+
c.mu.Lock()
1743+
defer c.mu.Unlock()
1744+
return c.logs
1745+
}
1746+
1747+
func (c *client) PatchStartupLogs(_ context.Context, logs agentsdk.PatchStartupLogs) error {
1748+
c.mu.Lock()
1749+
defer c.mu.Unlock()
1750+
if c.patchWorkspaceLogs != nil {
1751+
return c.patchWorkspaceLogs()
1752+
}
1753+
c.logs = append(c.logs, logs.Logs...)
1754+
return nil
1755+
}
1756+
16971757
// tempDirUnixSocket returns a temporary directory that can safely hold unix
16981758
// sockets (probably).
16991759
//

0 commit comments

Comments
 (0)