Skip to content

Commit 17a9e8c

Browse files
committed
Merge branch 'main' into mes/filter-work-1
2 parents 7c1e859 + 33e896d commit 17a9e8c

File tree

202 files changed

+8483
-3931
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

202 files changed

+8483
-3931
lines changed

.github/workflows/stale.yaml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,10 @@ jobs:
1717
with:
1818
stale-issue-label: "stale"
1919
stale-pr-label: "stale"
20-
days-before-stale: 180
20+
# days-before-stale: 180
21+
# essentially disabled for now while we work through polish issues
22+
days-before-stale: 3650
23+
2124
# Pull Requests become stale more quickly due to merge conflicts.
2225
# Also, we promote minimizing WIP.
2326
days-before-pr-stale: 7

Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -451,6 +451,7 @@ lint/ts:
451451

452452
lint/go:
453453
./scripts/check_enterprise_imports.sh
454+
./scripts/check_codersdk_imports.sh
454455
linter_ver=$(shell egrep -o 'GOLANGCI_LINT_VERSION=\S+' dogfood/contents/Dockerfile | cut -d '=' -f 2)
455456
go run github.com/golangci/golangci-lint/cmd/golangci-lint@v$$linter_ver run
456457
.PHONY: lint/go

agent/agent.go

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1119,9 +1119,6 @@ func (a *agent) wireguardAddresses(agentID uuid.UUID) []netip.Prefix {
11191119
return []netip.Prefix{
11201120
// This is the IP that should be used primarily.
11211121
netip.PrefixFrom(tailnet.IPFromUUID(agentID), 128),
1122-
// We also listen on the legacy codersdk.WorkspaceAgentIP. This
1123-
// allows for a transition away from wsconncache.
1124-
netip.PrefixFrom(workspacesdk.AgentIP, 128),
11251122
}
11261123
}
11271124

@@ -1510,6 +1507,8 @@ func (a *agent) Collect(ctx context.Context, networkStats map[netlogtype.Connect
15101507
var mu sync.Mutex
15111508
status := a.network.Status()
15121509
durations := []float64{}
1510+
p2pConns := 0
1511+
derpConns := 0
15131512
pingCtx, cancelFunc := context.WithTimeout(ctx, 5*time.Second)
15141513
defer cancelFunc()
15151514
for nodeID, peer := range status.Peer {
@@ -1526,13 +1525,18 @@ func (a *agent) Collect(ctx context.Context, networkStats map[netlogtype.Connect
15261525
wg.Add(1)
15271526
go func() {
15281527
defer wg.Done()
1529-
duration, _, _, err := a.network.Ping(pingCtx, addresses[0].Addr())
1528+
duration, p2p, _, err := a.network.Ping(pingCtx, addresses[0].Addr())
15301529
if err != nil {
15311530
return
15321531
}
15331532
mu.Lock()
15341533
defer mu.Unlock()
15351534
durations = append(durations, float64(duration.Microseconds()))
1535+
if p2p {
1536+
p2pConns++
1537+
} else {
1538+
derpConns++
1539+
}
15361540
}()
15371541
}
15381542
wg.Wait()
@@ -1552,6 +1556,9 @@ func (a *agent) Collect(ctx context.Context, networkStats map[netlogtype.Connect
15521556
// Agent metrics are changing all the time, so there is no need to perform
15531557
// reflect.DeepEqual to see if stats should be transferred.
15541558

1559+
// currentConnections behaves like a hypothetical `GaugeFuncVec` and is only set at collection time.
1560+
a.metrics.currentConnections.WithLabelValues("p2p").Set(float64(p2pConns))
1561+
a.metrics.currentConnections.WithLabelValues("derp").Set(float64(derpConns))
15551562
metricsCtx, cancelFunc := context.WithTimeout(ctx, 5*time.Second)
15561563
defer cancelFunc()
15571564
a.logger.Debug(ctx, "collecting agent metrics for stats")

agent/agent_test.go

Lines changed: 52 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -2531,17 +2531,17 @@ func TestAgent_Metrics_SSH(t *testing.T) {
25312531
err = session.Shell()
25322532
require.NoError(t, err)
25332533

2534-
expected := []agentsdk.AgentMetric{
2534+
expected := []*proto.Stats_Metric{
25352535
{
25362536
Name: "agent_reconnecting_pty_connections_total",
2537-
Type: agentsdk.AgentMetricTypeCounter,
2537+
Type: proto.Stats_Metric_COUNTER,
25382538
Value: 0,
25392539
},
25402540
{
25412541
Name: "agent_sessions_total",
2542-
Type: agentsdk.AgentMetricTypeCounter,
2542+
Type: proto.Stats_Metric_COUNTER,
25432543
Value: 1,
2544-
Labels: []agentsdk.AgentMetricLabel{
2544+
Labels: []*proto.Stats_Metric_Label{
25452545
{
25462546
Name: "magic_type",
25472547
Value: "ssh",
@@ -2554,30 +2554,46 @@ func TestAgent_Metrics_SSH(t *testing.T) {
25542554
},
25552555
{
25562556
Name: "agent_ssh_server_failed_connections_total",
2557-
Type: agentsdk.AgentMetricTypeCounter,
2557+
Type: proto.Stats_Metric_COUNTER,
25582558
Value: 0,
25592559
},
25602560
{
25612561
Name: "agent_ssh_server_sftp_connections_total",
2562-
Type: agentsdk.AgentMetricTypeCounter,
2562+
Type: proto.Stats_Metric_COUNTER,
25632563
Value: 0,
25642564
},
25652565
{
25662566
Name: "agent_ssh_server_sftp_server_errors_total",
2567-
Type: agentsdk.AgentMetricTypeCounter,
2567+
Type: proto.Stats_Metric_COUNTER,
25682568
Value: 0,
25692569
},
25702570
{
2571-
Name: "coderd_agentstats_startup_script_seconds",
2572-
Type: agentsdk.AgentMetricTypeGauge,
2571+
Name: "coderd_agentstats_currently_reachable_peers",
2572+
Type: proto.Stats_Metric_GAUGE,
25732573
Value: 0,
2574-
Labels: []agentsdk.AgentMetricLabel{
2574+
Labels: []*proto.Stats_Metric_Label{
2575+
{
2576+
Name: "connection_type",
2577+
Value: "derp",
2578+
},
2579+
},
2580+
},
2581+
{
2582+
Name: "coderd_agentstats_currently_reachable_peers",
2583+
Type: proto.Stats_Metric_GAUGE,
2584+
Value: 1,
2585+
Labels: []*proto.Stats_Metric_Label{
25752586
{
2576-
Name: "success",
2577-
Value: "true",
2587+
Name: "connection_type",
2588+
Value: "p2p",
25782589
},
25792590
},
25802591
},
2592+
{
2593+
Name: "coderd_agentstats_startup_script_seconds",
2594+
Type: proto.Stats_Metric_GAUGE,
2595+
Value: 1,
2596+
},
25812597
}
25822598

25832599
var actual []*promgo.MetricFamily
@@ -2586,17 +2602,33 @@ func TestAgent_Metrics_SSH(t *testing.T) {
25862602
if err != nil {
25872603
return false
25882604
}
2589-
2590-
if len(expected) != len(actual) {
2591-
return false
2605+
count := 0
2606+
for _, m := range actual {
2607+
count += len(m.GetMetric())
25922608
}
2593-
2594-
return verifyCollectedMetrics(t, expected, actual)
2609+
return count == len(expected)
25952610
}, testutil.WaitLong, testutil.IntervalFast)
25962611

2597-
require.Len(t, actual, len(expected))
2598-
collected := verifyCollectedMetrics(t, expected, actual)
2599-
require.True(t, collected, "expected metrics were not collected")
2612+
i := 0
2613+
for _, mf := range actual {
2614+
for _, m := range mf.GetMetric() {
2615+
assert.Equal(t, expected[i].Name, mf.GetName())
2616+
assert.Equal(t, expected[i].Type.String(), mf.GetType().String())
2617+
// Value is max expected
2618+
if expected[i].Type == proto.Stats_Metric_GAUGE {
2619+
assert.GreaterOrEqualf(t, expected[i].Value, m.GetGauge().GetValue(), "expected %s to be greater than or equal to %f, got %f", expected[i].Name, expected[i].Value, m.GetGauge().GetValue())
2620+
} else if expected[i].Type == proto.Stats_Metric_COUNTER {
2621+
assert.GreaterOrEqualf(t, expected[i].Value, m.GetCounter().GetValue(), "expected %s to be greater than or equal to %f, got %f", expected[i].Name, expected[i].Value, m.GetCounter().GetValue())
2622+
}
2623+
for j, lbl := range expected[i].Labels {
2624+
assert.Equal(t, m.GetLabel()[j], &promgo.LabelPair{
2625+
Name: &lbl.Name,
2626+
Value: &lbl.Value,
2627+
})
2628+
}
2629+
i++
2630+
}
2631+
}
26002632

26012633
_ = stdin.Close()
26022634
err = session.Wait()
@@ -2828,28 +2860,6 @@ func TestAgent_ManageProcessPriority(t *testing.T) {
28282860
})
28292861
}
28302862

2831-
func verifyCollectedMetrics(t *testing.T, expected []agentsdk.AgentMetric, actual []*promgo.MetricFamily) bool {
2832-
t.Helper()
2833-
2834-
for i, e := range expected {
2835-
assert.Equal(t, e.Name, actual[i].GetName())
2836-
assert.Equal(t, string(e.Type), strings.ToLower(actual[i].GetType().String()))
2837-
2838-
for _, m := range actual[i].GetMetric() {
2839-
assert.Equal(t, e.Value, m.Counter.GetValue())
2840-
2841-
if len(m.GetLabel()) > 0 {
2842-
for j, lbl := range m.GetLabel() {
2843-
assert.Equal(t, e.Labels[j].Name, lbl.GetName())
2844-
assert.Equal(t, e.Labels[j].Value, lbl.GetValue())
2845-
}
2846-
}
2847-
m.GetLabel()
2848-
}
2849-
}
2850-
return true
2851-
}
2852-
28532863
type syncWriter struct {
28542864
mu sync.Mutex
28552865
w io.Writer

agent/metrics.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ type agentMetrics struct {
1919
// startupScriptSeconds is the time in seconds that the start script(s)
2020
// took to run. This is reported once per agent.
2121
startupScriptSeconds *prometheus.GaugeVec
22+
currentConnections *prometheus.GaugeVec
2223
}
2324

2425
func newAgentMetrics(registerer prometheus.Registerer) *agentMetrics {
@@ -45,10 +46,19 @@ func newAgentMetrics(registerer prometheus.Registerer) *agentMetrics {
4546
}, []string{"success"})
4647
registerer.MustRegister(startupScriptSeconds)
4748

49+
currentConnections := prometheus.NewGaugeVec(prometheus.GaugeOpts{
50+
Namespace: "coderd",
51+
Subsystem: "agentstats",
52+
Name: "currently_reachable_peers",
53+
Help: "The number of peers (e.g. clients) that are currently reachable over the encrypted network.",
54+
}, []string{"connection_type"})
55+
registerer.MustRegister(currentConnections)
56+
4857
return &agentMetrics{
4958
connectionsTotal: connectionsTotal,
5059
reconnectingPTYErrors: reconnectingPTYErrors,
5160
startupScriptSeconds: startupScriptSeconds,
61+
currentConnections: currentConnections,
5262
}
5363
}
5464

0 commit comments

Comments
 (0)