Skip to content

Commit 909801c

Browse files
committed
Merge remote-tracking branch 'origin/main' into stevenmasley/regions
2 parents 51bdaa2 + 77d9937 commit 909801c

File tree

29 files changed

+1339
-164
lines changed

29 files changed

+1339
-164
lines changed

.github/workflows/ci.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,7 @@ jobs:
301301
echo "cover=false" >> $GITHUB_OUTPUT
302302
fi
303303
304+
export TS_DEBUG_DISCO=true
304305
gotestsum --junitfile="gotests.xml" --jsonfile="gotests.json" --packages="./..." -- -parallel=8 -timeout=7m -short -failfast $COVERAGE_FLAGS
305306
306307
- name: Print test stats
@@ -377,6 +378,7 @@ jobs:
377378

378379
- name: Test with PostgreSQL Database
379380
run: |
381+
export TS_DEBUG_DISCO=true
380382
make test-postgres
381383
382384
- name: Print test stats

agent/agent.go

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ import (
1616
"os"
1717
"os/user"
1818
"path/filepath"
19-
"reflect"
2019
"sort"
2120
"strconv"
2221
"strings"
@@ -653,6 +652,7 @@ func (a *agent) createTailnet(ctx context.Context, derpMap *tailcfg.DERPMap) (_
653652
}
654653
break
655654
}
655+
logger.Debug(ctx, "accepted conn", slog.F("remote", conn.RemoteAddr().String()))
656656
wg.Add(1)
657657
closed := make(chan struct{})
658658
go func() {
@@ -681,6 +681,7 @@ func (a *agent) createTailnet(ctx context.Context, derpMap *tailcfg.DERPMap) (_
681681
var msg codersdk.WorkspaceAgentReconnectingPTYInit
682682
err = json.Unmarshal(data, &msg)
683683
if err != nil {
684+
logger.Warn(ctx, "failed to unmarshal init", slog.F("raw", data))
684685
return
685686
}
686687
_ = a.handleReconnectingPTY(ctx, logger, msg, conn)
@@ -972,6 +973,7 @@ func (a *agent) handleReconnectingPTY(ctx context.Context, logger slog.Logger, m
972973

973974
connectionID := uuid.NewString()
974975
logger = logger.With(slog.F("id", msg.ID), slog.F("connection_id", connectionID))
976+
logger.Debug(ctx, "starting handler")
975977

976978
defer func() {
977979
if err := retErr; err != nil {
@@ -1039,6 +1041,7 @@ func (a *agent) handleReconnectingPTY(ctx context.Context, logger slog.Logger, m
10391041
// 1. The timeout completed.
10401042
// 2. The parent context was canceled.
10411043
<-ctx.Done()
1044+
logger.Debug(ctx, "context done", slog.Error(ctx.Err()))
10421045
_ = process.Kill()
10431046
}()
10441047
// We don't need to separately monitor for the process exiting.
@@ -1050,6 +1053,8 @@ func (a *agent) handleReconnectingPTY(ctx context.Context, logger slog.Logger, m
10501053
read, err := rpty.ptty.OutputReader().Read(buffer)
10511054
if err != nil {
10521055
// When the PTY is closed, this is triggered.
1056+
// Error is typically a benign EOF, so only log for debugging.
1057+
logger.Debug(ctx, "unable to read pty output, command exited?", slog.Error(err))
10531058
break
10541059
}
10551060
part := buffer[:read]
@@ -1061,8 +1066,15 @@ func (a *agent) handleReconnectingPTY(ctx context.Context, logger slog.Logger, m
10611066
break
10621067
}
10631068
rpty.activeConnsMutex.Lock()
1064-
for _, conn := range rpty.activeConns {
1065-
_, _ = conn.Write(part)
1069+
for cid, conn := range rpty.activeConns {
1070+
_, err = conn.Write(part)
1071+
if err != nil {
1072+
logger.Debug(ctx,
1073+
"error writing to active conn",
1074+
slog.F("other_conn_id", cid),
1075+
slog.Error(err),
1076+
)
1077+
}
10661078
}
10671079
rpty.activeConnsMutex.Unlock()
10681080
}
@@ -1223,11 +1235,11 @@ func (a *agent) startReportingConnectionStats(ctx context.Context) {
12231235
// Convert from microseconds to milliseconds.
12241236
stats.ConnectionMedianLatencyMS /= 1000
12251237

1226-
lastStat := a.latestStat.Load()
1227-
if lastStat != nil && reflect.DeepEqual(lastStat, stats) {
1228-
a.logger.Info(ctx, "skipping stat because nothing changed")
1229-
return
1230-
}
1238+
// Collect agent metrics.
1239+
// Agent metrics are changing all the time, so there is no need to perform
1240+
// reflect.DeepEqual to see if stats should be transferred.
1241+
stats.Metrics = collectMetrics()
1242+
12311243
a.latestStat.Store(stats)
12321244

12331245
select {

agent/agent_test.go

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -879,6 +879,7 @@ func TestAgent_StartupScript(t *testing.T) {
879879
}
880880
t.Run("Success", func(t *testing.T) {
881881
t.Parallel()
882+
logger := slogtest.Make(t, nil).Leveled(slog.LevelDebug)
882883
client := &client{
883884
t: t,
884885
agentID: uuid.New(),
@@ -887,12 +888,12 @@ func TestAgent_StartupScript(t *testing.T) {
887888
DERPMap: &tailcfg.DERPMap{},
888889
},
889890
statsChan: make(chan *agentsdk.Stats),
890-
coordinator: tailnet.NewCoordinator(),
891+
coordinator: tailnet.NewCoordinator(logger),
891892
}
892893
closer := agent.New(agent.Options{
893894
Client: client,
894895
Filesystem: afero.NewMemMapFs(),
895-
Logger: slogtest.Make(t, nil).Named("agent").Leveled(slog.LevelDebug),
896+
Logger: logger.Named("agent"),
896897
ReconnectingPTYTimeout: 0,
897898
})
898899
t.Cleanup(func() {
@@ -910,6 +911,7 @@ func TestAgent_StartupScript(t *testing.T) {
910911
// script has written too many lines it will still succeed!
911912
t.Run("OverflowsAndSkips", func(t *testing.T) {
912913
t.Parallel()
914+
logger := slogtest.Make(t, nil).Leveled(slog.LevelDebug)
913915
client := &client{
914916
t: t,
915917
agentID: uuid.New(),
@@ -927,12 +929,12 @@ func TestAgent_StartupScript(t *testing.T) {
927929
return codersdk.ReadBodyAsError(res)
928930
},
929931
statsChan: make(chan *agentsdk.Stats),
930-
coordinator: tailnet.NewCoordinator(),
932+
coordinator: tailnet.NewCoordinator(logger),
931933
}
932934
closer := agent.New(agent.Options{
933935
Client: client,
934936
Filesystem: afero.NewMemMapFs(),
935-
Logger: slogtest.Make(t, nil).Named("agent").Leveled(slog.LevelDebug),
937+
Logger: logger.Named("agent"),
936938
ReconnectingPTYTimeout: 0,
937939
})
938940
t.Cleanup(func() {
@@ -1282,7 +1284,7 @@ func TestAgent_Lifecycle(t *testing.T) {
12821284

12831285
t.Run("ShutdownScriptOnce", func(t *testing.T) {
12841286
t.Parallel()
1285-
1287+
logger := slogtest.Make(t, nil).Leveled(slog.LevelDebug)
12861288
expected := "this-is-shutdown"
12871289
client := &client{
12881290
t: t,
@@ -1293,13 +1295,13 @@ func TestAgent_Lifecycle(t *testing.T) {
12931295
ShutdownScript: "echo " + expected,
12941296
},
12951297
statsChan: make(chan *agentsdk.Stats),
1296-
coordinator: tailnet.NewCoordinator(),
1298+
coordinator: tailnet.NewCoordinator(logger),
12971299
}
12981300

12991301
fs := afero.NewMemMapFs()
13001302
agent := agent.New(agent.Options{
13011303
Client: client,
1302-
Logger: slogtest.Make(t, nil).Leveled(slog.LevelInfo),
1304+
Logger: logger.Named("agent"),
13031305
Filesystem: fs,
13041306
})
13051307

@@ -1548,9 +1550,10 @@ func TestAgent_Speedtest(t *testing.T) {
15481550

15491551
func TestAgent_Reconnect(t *testing.T) {
15501552
t.Parallel()
1553+
logger := slogtest.Make(t, nil).Leveled(slog.LevelDebug)
15511554
// After the agent is disconnected from a coordinator, it's supposed
15521555
// to reconnect!
1553-
coordinator := tailnet.NewCoordinator()
1556+
coordinator := tailnet.NewCoordinator(logger)
15541557
defer coordinator.Close()
15551558

15561559
agentID := uuid.New()
@@ -1572,7 +1575,7 @@ func TestAgent_Reconnect(t *testing.T) {
15721575
return "", nil
15731576
},
15741577
Client: client,
1575-
Logger: slogtest.Make(t, nil).Leveled(slog.LevelInfo),
1578+
Logger: logger.Named("agent"),
15761579
})
15771580
defer closer.Close()
15781581

@@ -1587,8 +1590,8 @@ func TestAgent_Reconnect(t *testing.T) {
15871590

15881591
func TestAgent_WriteVSCodeConfigs(t *testing.T) {
15891592
t.Parallel()
1590-
1591-
coordinator := tailnet.NewCoordinator()
1593+
logger := slogtest.Make(t, nil).Leveled(slog.LevelDebug)
1594+
coordinator := tailnet.NewCoordinator(logger)
15921595
defer coordinator.Close()
15931596

15941597
client := &client{
@@ -1607,7 +1610,7 @@ func TestAgent_WriteVSCodeConfigs(t *testing.T) {
16071610
return "", nil
16081611
},
16091612
Client: client,
1610-
Logger: slogtest.Make(t, nil).Leveled(slog.LevelInfo),
1613+
Logger: logger.Named("agent"),
16111614
Filesystem: filesystem,
16121615
})
16131616
defer closer.Close()
@@ -1698,10 +1701,11 @@ func setupAgent(t *testing.T, metadata agentsdk.Manifest, ptyTimeout time.Durati
16981701
afero.Fs,
16991702
io.Closer,
17001703
) {
1704+
logger := slogtest.Make(t, nil).Leveled(slog.LevelDebug)
17011705
if metadata.DERPMap == nil {
17021706
metadata.DERPMap = tailnettest.RunDERPAndSTUN(t)
17031707
}
1704-
coordinator := tailnet.NewCoordinator()
1708+
coordinator := tailnet.NewCoordinator(logger)
17051709
t.Cleanup(func() {
17061710
_ = coordinator.Close()
17071711
})
@@ -1718,7 +1722,7 @@ func setupAgent(t *testing.T, metadata agentsdk.Manifest, ptyTimeout time.Durati
17181722
closer := agent.New(agent.Options{
17191723
Client: c,
17201724
Filesystem: fs,
1721-
Logger: slogtest.Make(t, nil).Named("agent").Leveled(slog.LevelDebug),
1725+
Logger: logger.Named("agent"),
17221726
ReconnectingPTYTimeout: ptyTimeout,
17231727
})
17241728
t.Cleanup(func() {
@@ -1727,7 +1731,7 @@ func setupAgent(t *testing.T, metadata agentsdk.Manifest, ptyTimeout time.Durati
17271731
conn, err := tailnet.NewConn(&tailnet.Options{
17281732
Addresses: []netip.Prefix{netip.PrefixFrom(tailnet.IP(), 128)},
17291733
DERPMap: metadata.DERPMap,
1730-
Logger: slogtest.Make(t, nil).Named("client").Leveled(slog.LevelDebug),
1734+
Logger: logger.Named("client"),
17311735
})
17321736
require.NoError(t, err)
17331737
clientConn, serverConn := net.Pipe()

agent/metrics.go

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
package agent
2+
3+
import (
4+
"fmt"
5+
"strings"
6+
7+
"tailscale.com/util/clientmetric"
8+
9+
"github.com/coder/coder/codersdk/agentsdk"
10+
)
11+
12+
func collectMetrics() []agentsdk.AgentMetric {
13+
// Tailscale metrics
14+
metrics := clientmetric.Metrics()
15+
collected := make([]agentsdk.AgentMetric, 0, len(metrics))
16+
for _, m := range metrics {
17+
if isIgnoredMetric(m.Name()) {
18+
continue
19+
}
20+
21+
collected = append(collected, agentsdk.AgentMetric{
22+
Name: m.Name(),
23+
Type: asMetricType(m.Type()),
24+
Value: float64(m.Value()),
25+
})
26+
}
27+
return collected
28+
}
29+
30+
// isIgnoredMetric checks if the metric should be ignored, as Coder agent doesn't use related features.
31+
// Expected metric families: magicsock_*, derp_*, tstun_*, netcheck_*, portmap_*, etc.
32+
func isIgnoredMetric(metricName string) bool {
33+
if strings.HasPrefix(metricName, "dns_") ||
34+
strings.HasPrefix(metricName, "controlclient_") ||
35+
strings.HasPrefix(metricName, "peerapi_") ||
36+
strings.HasPrefix(metricName, "profiles_") ||
37+
strings.HasPrefix(metricName, "tstun_") {
38+
return true
39+
}
40+
return false
41+
}
42+
43+
func asMetricType(typ clientmetric.Type) agentsdk.AgentMetricType {
44+
switch typ {
45+
case clientmetric.TypeGauge:
46+
return agentsdk.AgentMetricTypeGauge
47+
case clientmetric.TypeCounter:
48+
return agentsdk.AgentMetricTypeCounter
49+
default:
50+
panic(fmt.Sprintf("unknown metric type: %d", typ))
51+
}
52+
}

cli/server.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -723,6 +723,20 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd.
723723
return xerrors.Errorf("register agent stats prometheus metric: %w", err)
724724
}
725725
defer closeAgentStatsFunc()
726+
727+
metricsAggregator, err := prometheusmetrics.NewMetricsAggregator(logger, options.PrometheusRegistry, 0)
728+
if err != nil {
729+
return xerrors.Errorf("can't initialize metrics aggregator: %w", err)
730+
}
731+
732+
cancelMetricsAggregator := metricsAggregator.Run(ctx)
733+
defer cancelMetricsAggregator()
734+
735+
options.UpdateAgentMetrics = metricsAggregator.Update
736+
err = options.PrometheusRegistry.Register(metricsAggregator)
737+
if err != nil {
738+
return xerrors.Errorf("can't register metrics aggregator as collector: %w", err)
739+
}
726740
}
727741

728742
//nolint:revive

coderd/apidoc/docs.go

Lines changed: 45 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)