Skip to content

Commit b09d9eb

Browse files
committed
Merge branch 'main' into 7154-basic-test-scenario
2 parents d7179b2 + 59efa4a commit b09d9eb

File tree

176 files changed

+9406
-2369
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

176 files changed

+9406
-2369
lines changed

.github/workflows/ci.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@ jobs:
152152

153153
- uses: actions/setup-go@v4
154154
with:
155+
cache: false
155156
go-version: "~1.20"
156157

157158
- name: Echo Go Cache Paths
@@ -252,6 +253,7 @@ jobs:
252253

253254
- uses: actions/setup-go@v4
254255
with:
256+
cache: false
255257
go-version: "~1.20"
256258

257259
- name: Echo Go Cache Paths
@@ -299,6 +301,7 @@ jobs:
299301
echo "cover=false" >> $GITHUB_OUTPUT
300302
fi
301303
304+
export TS_DEBUG_DISCO=true
302305
gotestsum --junitfile="gotests.xml" --jsonfile="gotests.json" --packages="./..." -- -parallel=8 -timeout=7m -short -failfast $COVERAGE_FLAGS
303306
304307
- name: Print test stats
@@ -339,6 +342,7 @@ jobs:
339342

340343
- uses: actions/setup-go@v4
341344
with:
345+
cache: false
342346
go-version: "~1.20"
343347

344348
- name: Echo Go Cache Paths
@@ -374,6 +378,7 @@ jobs:
374378

375379
- name: Test with PostgreSQL Database
376380
run: |
381+
export TS_DEBUG_DISCO=true
377382
make test-postgres
378383
379384
- name: Print test stats
@@ -429,6 +434,7 @@ jobs:
429434

430435
- uses: actions/setup-go@v4
431436
with:
437+
cache: false
432438
go-version: "~1.20"
433439

434440
- name: Echo Go Cache Paths
@@ -558,6 +564,7 @@ jobs:
558564

559565
- uses: actions/setup-go@v4
560566
with:
567+
cache: false
561568
go-version: "~1.20"
562569

563570
- uses: hashicorp/setup-terraform@v2

agent/agent.go

Lines changed: 56 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ import (
1616
"os"
1717
"os/user"
1818
"path/filepath"
19-
"reflect"
2019
"sort"
2120
"strconv"
2221
"strings"
@@ -60,7 +59,7 @@ type Options struct {
6059
ReconnectingPTYTimeout time.Duration
6160
EnvironmentVariables map[string]string
6261
Logger slog.Logger
63-
AgentPorts map[int]string
62+
IgnorePorts map[int]string
6463
SSHMaxTimeout time.Duration
6564
TailnetListenPort uint16
6665
}
@@ -76,7 +75,12 @@ type Client interface {
7675
PatchStartupLogs(ctx context.Context, req agentsdk.PatchStartupLogs) error
7776
}
7877

79-
func New(options Options) io.Closer {
78+
type Agent interface {
79+
HTTPDebug() http.Handler
80+
io.Closer
81+
}
82+
83+
func New(options Options) Agent {
8084
if options.ReconnectingPTYTimeout == 0 {
8185
options.ReconnectingPTYTimeout = 5 * time.Minute
8286
}
@@ -112,7 +116,7 @@ func New(options Options) io.Closer {
112116
tempDir: options.TempDir,
113117
lifecycleUpdate: make(chan struct{}, 1),
114118
lifecycleReported: make(chan codersdk.WorkspaceAgentLifecycle, 1),
115-
ignorePorts: options.AgentPorts,
119+
ignorePorts: options.IgnorePorts,
116120
connStatsChan: make(chan *agentsdk.Stats, 1),
117121
sshMaxTimeout: options.SSHMaxTimeout,
118122
}
@@ -161,7 +165,7 @@ type agent struct {
161165
}
162166

163167
func (a *agent) init(ctx context.Context) {
164-
sshSrv, err := agentssh.NewServer(ctx, a.logger.Named("ssh-server"), a.sshMaxTimeout)
168+
sshSrv, err := agentssh.NewServer(ctx, a.logger.Named("ssh-server"), a.filesystem, a.sshMaxTimeout, "")
165169
if err != nil {
166170
panic(err)
167171
}
@@ -648,6 +652,7 @@ func (a *agent) createTailnet(ctx context.Context, derpMap *tailcfg.DERPMap) (_
648652
}
649653
break
650654
}
655+
logger.Debug(ctx, "accepted conn", slog.F("remote", conn.RemoteAddr().String()))
651656
wg.Add(1)
652657
closed := make(chan struct{})
653658
go func() {
@@ -676,6 +681,7 @@ func (a *agent) createTailnet(ctx context.Context, derpMap *tailcfg.DERPMap) (_
676681
var msg codersdk.WorkspaceAgentReconnectingPTYInit
677682
err = json.Unmarshal(data, &msg)
678683
if err != nil {
684+
logger.Warn(ctx, "failed to unmarshal init", slog.F("raw", data))
679685
return
680686
}
681687
_ = a.handleReconnectingPTY(ctx, logger, msg, conn)
@@ -967,6 +973,7 @@ func (a *agent) handleReconnectingPTY(ctx context.Context, logger slog.Logger, m
967973

968974
connectionID := uuid.NewString()
969975
logger = logger.With(slog.F("id", msg.ID), slog.F("connection_id", connectionID))
976+
logger.Debug(ctx, "starting handler")
970977

971978
defer func() {
972979
if err := retErr; err != nil {
@@ -1034,20 +1041,20 @@ func (a *agent) handleReconnectingPTY(ctx context.Context, logger slog.Logger, m
10341041
// 1. The timeout completed.
10351042
// 2. The parent context was canceled.
10361043
<-ctx.Done()
1044+
logger.Debug(ctx, "context done", slog.Error(ctx.Err()))
10371045
_ = process.Kill()
10381046
}()
1039-
go func() {
1040-
// If the process dies randomly, we should
1041-
// close the pty.
1042-
_ = process.Wait()
1043-
rpty.Close()
1044-
}()
1047+
// We don't need to separately monitor for the process exiting.
1048+
// When it exits, our ptty.OutputReader() will return EOF after
1049+
// reading all process output.
10451050
if err = a.trackConnGoroutine(func() {
10461051
buffer := make([]byte, 1024)
10471052
for {
1048-
read, err := rpty.ptty.Output().Read(buffer)
1053+
read, err := rpty.ptty.OutputReader().Read(buffer)
10491054
if err != nil {
10501055
// When the PTY is closed, this is triggered.
1056+
// Error is typically a benign EOF, so only log for debugging.
1057+
logger.Debug(ctx, "unable to read pty output, command exited?", slog.Error(err))
10511058
break
10521059
}
10531060
part := buffer[:read]
@@ -1059,8 +1066,15 @@ func (a *agent) handleReconnectingPTY(ctx context.Context, logger slog.Logger, m
10591066
break
10601067
}
10611068
rpty.activeConnsMutex.Lock()
1062-
for _, conn := range rpty.activeConns {
1063-
_, _ = conn.Write(part)
1069+
for cid, conn := range rpty.activeConns {
1070+
_, err = conn.Write(part)
1071+
if err != nil {
1072+
logger.Debug(ctx,
1073+
"error writing to active conn",
1074+
slog.F("other_conn_id", cid),
1075+
slog.Error(err),
1076+
)
1077+
}
10641078
}
10651079
rpty.activeConnsMutex.Unlock()
10661080
}
@@ -1138,7 +1152,7 @@ func (a *agent) handleReconnectingPTY(ctx context.Context, logger slog.Logger, m
11381152
logger.Warn(ctx, "read conn", slog.Error(err))
11391153
return nil
11401154
}
1141-
_, err = rpty.ptty.Input().Write([]byte(req.Data))
1155+
_, err = rpty.ptty.InputWriter().Write([]byte(req.Data))
11421156
if err != nil {
11431157
logger.Warn(ctx, "write to pty", slog.Error(err))
11441158
return nil
@@ -1221,11 +1235,11 @@ func (a *agent) startReportingConnectionStats(ctx context.Context) {
12211235
// Convert from microseconds to milliseconds.
12221236
stats.ConnectionMedianLatencyMS /= 1000
12231237

1224-
lastStat := a.latestStat.Load()
1225-
if lastStat != nil && reflect.DeepEqual(lastStat, stats) {
1226-
a.logger.Info(ctx, "skipping stat because nothing changed")
1227-
return
1228-
}
1238+
// Collect agent metrics.
1239+
// Agent metrics are changing all the time, so there is no need to perform
1240+
// reflect.DeepEqual to see if stats should be transferred.
1241+
stats.Metrics = collectMetrics()
1242+
12291243
a.latestStat.Store(stats)
12301244

12311245
select {
@@ -1267,6 +1281,27 @@ func (a *agent) isClosed() bool {
12671281
}
12681282
}
12691283

1284+
func (a *agent) HTTPDebug() http.Handler {
1285+
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
1286+
a.closeMutex.Lock()
1287+
network := a.network
1288+
a.closeMutex.Unlock()
1289+
1290+
if network == nil {
1291+
w.WriteHeader(http.StatusOK)
1292+
_, _ = w.Write([]byte("network is not ready yet"))
1293+
return
1294+
}
1295+
1296+
if r.URL.Path == "/debug/magicsock" {
1297+
network.MagicsockServeHTTPDebug(w, r)
1298+
} else {
1299+
w.WriteHeader(http.StatusNotFound)
1300+
_, _ = w.Write([]byte("404 not found"))
1301+
}
1302+
})
1303+
}
1304+
12701305
func (a *agent) Close() error {
12711306
a.closeMutex.Lock()
12721307
defer a.closeMutex.Unlock()
@@ -1358,7 +1393,7 @@ type reconnectingPTY struct {
13581393
circularBuffer *circbuf.Buffer
13591394
circularBufferMutex sync.RWMutex
13601395
timeout *time.Timer
1361-
ptty pty.PTY
1396+
ptty pty.PTYCmd
13621397
}
13631398

13641399
// Close ends all connections to the reconnecting

0 commit comments

Comments
 (0)