Skip to content

Commit 2c6fd6d

Browse files
committed
Merge branch 'provisioner-version-popups' into provisioner-types
2 parents 55360f7 + 37e3933 commit 2c6fd6d

File tree

345 files changed

+15064
-5131
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

345 files changed

+15064
-5131
lines changed

.github/workflows/ci.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ jobs:
186186
187187
# Check for any typos
188188
- name: Check for typos
189-
uses: crate-ci/typos@v1.24.3
189+
uses: crate-ci/typos@v1.24.5
190190
with:
191191
config: .github/workflows/typos.toml
192192

.github/workflows/contrib.yaml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ on:
1313
- opened
1414
- reopened
1515
- edited
16+
# For jobs that don't run on draft PRs.
17+
- ready_for_review
1618

1719
# Only run one instance per PR to ensure in-order execution.
1820
concurrency: pr-${{ github.ref }}
@@ -52,7 +54,7 @@ jobs:
5254
release-labels:
5355
runs-on: ubuntu-latest
5456
# Skip tagging for draft PRs.
55-
if: ${{ github.event_name == 'pull_request_target' && success() && !github.event.pull_request.draft }}
57+
if: ${{ github.event_name == 'pull_request_target' && !github.event.pull_request.draft }}
5658
steps:
5759
- name: release-labels
5860
uses: actions/github-script@v7

.github/workflows/stale.yaml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,10 @@ jobs:
1717
with:
1818
stale-issue-label: "stale"
1919
stale-pr-label: "stale"
20-
days-before-stale: 180
20+
# days-before-stale: 180
21+
# essentially disabled for now while we work through polish issues
22+
days-before-stale: 3650
23+
2124
# Pull Requests become stale more quickly due to merge conflicts.
2225
# Also, we promote minimizing WIP.
2326
days-before-pr-stale: 7

Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -451,6 +451,7 @@ lint/ts:
451451

452452
lint/go:
453453
./scripts/check_enterprise_imports.sh
454+
./scripts/check_codersdk_imports.sh
454455
linter_ver=$(shell egrep -o 'GOLANGCI_LINT_VERSION=\S+' dogfood/contents/Dockerfile | cut -d '=' -f 2)
455456
go run github.com/golangci/golangci-lint/cmd/golangci-lint@v$$linter_ver run
456457
.PHONY: lint/go

agent/agent.go

Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1119,9 +1119,6 @@ func (a *agent) wireguardAddresses(agentID uuid.UUID) []netip.Prefix {
11191119
return []netip.Prefix{
11201120
// This is the IP that should be used primarily.
11211121
netip.PrefixFrom(tailnet.IPFromUUID(agentID), 128),
1122-
// We also listen on the legacy codersdk.WorkspaceAgentIP. This
1123-
// allows for a transition away from wsconncache.
1124-
netip.PrefixFrom(workspacesdk.AgentIP, 128),
11251122
}
11261123
}
11271124

@@ -1360,7 +1357,7 @@ func (a *agent) runCoordinator(ctx context.Context, conn drpc.Conn, network *tai
13601357
defer close(errCh)
13611358
select {
13621359
case <-ctx.Done():
1363-
err := coordination.Close()
1360+
err := coordination.Close(a.hardCtx)
13641361
if err != nil {
13651362
a.logger.Warn(ctx, "failed to close remote coordination", slog.Error(err))
13661363
}
@@ -1510,6 +1507,8 @@ func (a *agent) Collect(ctx context.Context, networkStats map[netlogtype.Connect
15101507
var mu sync.Mutex
15111508
status := a.network.Status()
15121509
durations := []float64{}
1510+
p2pConns := 0
1511+
derpConns := 0
15131512
pingCtx, cancelFunc := context.WithTimeout(ctx, 5*time.Second)
15141513
defer cancelFunc()
15151514
for nodeID, peer := range status.Peer {
@@ -1526,13 +1525,18 @@ func (a *agent) Collect(ctx context.Context, networkStats map[netlogtype.Connect
15261525
wg.Add(1)
15271526
go func() {
15281527
defer wg.Done()
1529-
duration, _, _, err := a.network.Ping(pingCtx, addresses[0].Addr())
1528+
duration, p2p, _, err := a.network.Ping(pingCtx, addresses[0].Addr())
15301529
if err != nil {
15311530
return
15321531
}
15331532
mu.Lock()
15341533
defer mu.Unlock()
15351534
durations = append(durations, float64(duration.Microseconds()))
1535+
if p2p {
1536+
p2pConns++
1537+
} else {
1538+
derpConns++
1539+
}
15361540
}()
15371541
}
15381542
wg.Wait()
@@ -1552,6 +1556,9 @@ func (a *agent) Collect(ctx context.Context, networkStats map[netlogtype.Connect
15521556
// Agent metrics are changing all the time, so there is no need to perform
15531557
// reflect.DeepEqual to see if stats should be transferred.
15541558

1559+
// currentConnections behaves like a hypothetical `GaugeFuncVec` and is only set at collection time.
1560+
a.metrics.currentConnections.WithLabelValues("p2p").Set(float64(p2pConns))
1561+
a.metrics.currentConnections.WithLabelValues("derp").Set(float64(derpConns))
15551562
metricsCtx, cancelFunc := context.WithTimeout(ctx, 5*time.Second)
15561563
defer cancelFunc()
15571564
a.logger.Debug(ctx, "collecting agent metrics for stats")
@@ -1669,13 +1676,12 @@ func (a *agent) manageProcessPriority(ctx context.Context, debouncer *logDebounc
16691676
}
16701677

16711678
score, niceErr := proc.Niceness(a.syscaller)
1672-
if niceErr != nil && !xerrors.Is(niceErr, os.ErrPermission) {
1679+
if !isBenignProcessErr(niceErr) {
16731680
debouncer.Warn(ctx, "unable to get proc niceness",
16741681
slog.F("cmd", proc.Cmd()),
16751682
slog.F("pid", proc.PID),
16761683
slog.Error(niceErr),
16771684
)
1678-
continue
16791685
}
16801686

16811687
// We only want processes that don't have a nice value set
@@ -1689,7 +1695,7 @@ func (a *agent) manageProcessPriority(ctx context.Context, debouncer *logDebounc
16891695

16901696
if niceErr == nil {
16911697
err := proc.SetNiceness(a.syscaller, niceness)
1692-
if err != nil && !xerrors.Is(err, os.ErrPermission) {
1698+
if !isBenignProcessErr(err) {
16931699
debouncer.Warn(ctx, "unable to set proc niceness",
16941700
slog.F("cmd", proc.Cmd()),
16951701
slog.F("pid", proc.PID),
@@ -1703,7 +1709,7 @@ func (a *agent) manageProcessPriority(ctx context.Context, debouncer *logDebounc
17031709
if oomScore != unsetOOMScore && oomScore != proc.OOMScoreAdj && !isCustomOOMScore(agentScore, proc) {
17041710
oomScoreStr := strconv.Itoa(oomScore)
17051711
err := afero.WriteFile(a.filesystem, fmt.Sprintf("/proc/%d/oom_score_adj", proc.PID), []byte(oomScoreStr), 0o644)
1706-
if err != nil && !xerrors.Is(err, os.ErrPermission) {
1712+
if !isBenignProcessErr(err) {
17071713
debouncer.Warn(ctx, "unable to set oom_score_adj",
17081714
slog.F("cmd", proc.Cmd()),
17091715
slog.F("pid", proc.PID),
@@ -2139,3 +2145,14 @@ func (l *logDebouncer) log(ctx context.Context, level slog.Level, msg string, fi
21392145
}
21402146
l.messages[msg] = time.Now()
21412147
}
2148+
2149+
func isBenignProcessErr(err error) bool {
2150+
return err != nil &&
2151+
(xerrors.Is(err, os.ErrNotExist) ||
2152+
xerrors.Is(err, os.ErrPermission) ||
2153+
isNoSuchProcessErr(err))
2154+
}
2155+
2156+
func isNoSuchProcessErr(err error) bool {
2157+
return err != nil && strings.Contains(err.Error(), "no such process")
2158+
}

agent/agent_test.go

Lines changed: 58 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1896,7 +1896,9 @@ func TestAgent_UpdatedDERP(t *testing.T) {
18961896
coordinator, conn)
18971897
t.Cleanup(func() {
18981898
t.Logf("closing coordination %s", name)
1899-
err := coordination.Close()
1899+
cctx, ccancel := context.WithTimeout(testCtx, testutil.WaitShort)
1900+
defer ccancel()
1901+
err := coordination.Close(cctx)
19001902
if err != nil {
19011903
t.Logf("error closing in-memory coordination: %s", err.Error())
19021904
}
@@ -2384,7 +2386,9 @@ func setupAgent(t *testing.T, metadata agentsdk.Manifest, ptyTimeout time.Durati
23842386
clientID, metadata.AgentID,
23852387
coordinator, conn)
23862388
t.Cleanup(func() {
2387-
err := coordination.Close()
2389+
cctx, ccancel := context.WithTimeout(testCtx, testutil.WaitShort)
2390+
defer ccancel()
2391+
err := coordination.Close(cctx)
23882392
if err != nil {
23892393
t.Logf("error closing in-mem coordination: %s", err.Error())
23902394
}
@@ -2531,17 +2535,17 @@ func TestAgent_Metrics_SSH(t *testing.T) {
25312535
err = session.Shell()
25322536
require.NoError(t, err)
25332537

2534-
expected := []agentsdk.AgentMetric{
2538+
expected := []*proto.Stats_Metric{
25352539
{
25362540
Name: "agent_reconnecting_pty_connections_total",
2537-
Type: agentsdk.AgentMetricTypeCounter,
2541+
Type: proto.Stats_Metric_COUNTER,
25382542
Value: 0,
25392543
},
25402544
{
25412545
Name: "agent_sessions_total",
2542-
Type: agentsdk.AgentMetricTypeCounter,
2546+
Type: proto.Stats_Metric_COUNTER,
25432547
Value: 1,
2544-
Labels: []agentsdk.AgentMetricLabel{
2548+
Labels: []*proto.Stats_Metric_Label{
25452549
{
25462550
Name: "magic_type",
25472551
Value: "ssh",
@@ -2554,30 +2558,46 @@ func TestAgent_Metrics_SSH(t *testing.T) {
25542558
},
25552559
{
25562560
Name: "agent_ssh_server_failed_connections_total",
2557-
Type: agentsdk.AgentMetricTypeCounter,
2561+
Type: proto.Stats_Metric_COUNTER,
25582562
Value: 0,
25592563
},
25602564
{
25612565
Name: "agent_ssh_server_sftp_connections_total",
2562-
Type: agentsdk.AgentMetricTypeCounter,
2566+
Type: proto.Stats_Metric_COUNTER,
25632567
Value: 0,
25642568
},
25652569
{
25662570
Name: "agent_ssh_server_sftp_server_errors_total",
2567-
Type: agentsdk.AgentMetricTypeCounter,
2571+
Type: proto.Stats_Metric_COUNTER,
25682572
Value: 0,
25692573
},
25702574
{
2571-
Name: "coderd_agentstats_startup_script_seconds",
2572-
Type: agentsdk.AgentMetricTypeGauge,
2575+
Name: "coderd_agentstats_currently_reachable_peers",
2576+
Type: proto.Stats_Metric_GAUGE,
25732577
Value: 0,
2574-
Labels: []agentsdk.AgentMetricLabel{
2578+
Labels: []*proto.Stats_Metric_Label{
2579+
{
2580+
Name: "connection_type",
2581+
Value: "derp",
2582+
},
2583+
},
2584+
},
2585+
{
2586+
Name: "coderd_agentstats_currently_reachable_peers",
2587+
Type: proto.Stats_Metric_GAUGE,
2588+
Value: 1,
2589+
Labels: []*proto.Stats_Metric_Label{
25752590
{
2576-
Name: "success",
2577-
Value: "true",
2591+
Name: "connection_type",
2592+
Value: "p2p",
25782593
},
25792594
},
25802595
},
2596+
{
2597+
Name: "coderd_agentstats_startup_script_seconds",
2598+
Type: proto.Stats_Metric_GAUGE,
2599+
Value: 1,
2600+
},
25812601
}
25822602

25832603
var actual []*promgo.MetricFamily
@@ -2586,17 +2606,33 @@ func TestAgent_Metrics_SSH(t *testing.T) {
25862606
if err != nil {
25872607
return false
25882608
}
2589-
2590-
if len(expected) != len(actual) {
2591-
return false
2609+
count := 0
2610+
for _, m := range actual {
2611+
count += len(m.GetMetric())
25922612
}
2593-
2594-
return verifyCollectedMetrics(t, expected, actual)
2613+
return count == len(expected)
25952614
}, testutil.WaitLong, testutil.IntervalFast)
25962615

2597-
require.Len(t, actual, len(expected))
2598-
collected := verifyCollectedMetrics(t, expected, actual)
2599-
require.True(t, collected, "expected metrics were not collected")
2616+
i := 0
2617+
for _, mf := range actual {
2618+
for _, m := range mf.GetMetric() {
2619+
assert.Equal(t, expected[i].Name, mf.GetName())
2620+
assert.Equal(t, expected[i].Type.String(), mf.GetType().String())
2621+
// Value is max expected
2622+
if expected[i].Type == proto.Stats_Metric_GAUGE {
2623+
assert.GreaterOrEqualf(t, expected[i].Value, m.GetGauge().GetValue(), "expected %s to be greater than or equal to %f, got %f", expected[i].Name, expected[i].Value, m.GetGauge().GetValue())
2624+
} else if expected[i].Type == proto.Stats_Metric_COUNTER {
2625+
assert.GreaterOrEqualf(t, expected[i].Value, m.GetCounter().GetValue(), "expected %s to be greater than or equal to %f, got %f", expected[i].Name, expected[i].Value, m.GetCounter().GetValue())
2626+
}
2627+
for j, lbl := range expected[i].Labels {
2628+
assert.Equal(t, m.GetLabel()[j], &promgo.LabelPair{
2629+
Name: &lbl.Name,
2630+
Value: &lbl.Value,
2631+
})
2632+
}
2633+
i++
2634+
}
2635+
}
26002636

26012637
_ = stdin.Close()
26022638
err = session.Wait()
@@ -2828,28 +2864,6 @@ func TestAgent_ManageProcessPriority(t *testing.T) {
28282864
})
28292865
}
28302866

2831-
func verifyCollectedMetrics(t *testing.T, expected []agentsdk.AgentMetric, actual []*promgo.MetricFamily) bool {
2832-
t.Helper()
2833-
2834-
for i, e := range expected {
2835-
assert.Equal(t, e.Name, actual[i].GetName())
2836-
assert.Equal(t, string(e.Type), strings.ToLower(actual[i].GetType().String()))
2837-
2838-
for _, m := range actual[i].GetMetric() {
2839-
assert.Equal(t, e.Value, m.Counter.GetValue())
2840-
2841-
if len(m.GetLabel()) > 0 {
2842-
for j, lbl := range m.GetLabel() {
2843-
assert.Equal(t, e.Labels[j].Name, lbl.GetName())
2844-
assert.Equal(t, e.Labels[j].Value, lbl.GetValue())
2845-
}
2846-
}
2847-
m.GetLabel()
2848-
}
2849-
}
2850-
return true
2851-
}
2852-
28532867
type syncWriter struct {
28542868
mu sync.Mutex
28552869
w io.Writer

agent/agentproc/proc_unix.go

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,16 +45,15 @@ func List(fs afero.Fs, syscaller Syscaller) ([]*Process, error) {
4545

4646
cmdline, err := afero.ReadFile(fs, filepath.Join(defaultProcDir, entry, "cmdline"))
4747
if err != nil {
48-
var errNo syscall.Errno
49-
if xerrors.As(err, &errNo) && errNo == syscall.EPERM {
48+
if isBenignError(err) {
5049
continue
5150
}
5251
return nil, xerrors.Errorf("read cmdline: %w", err)
5352
}
5453

5554
oomScore, err := afero.ReadFile(fs, filepath.Join(defaultProcDir, entry, "oom_score_adj"))
5655
if err != nil {
57-
if xerrors.Is(err, os.ErrPermission) {
56+
if isBenignError(err) {
5857
continue
5958
}
6059

@@ -124,3 +123,12 @@ func (p *Process) Cmd() string {
124123
func (p *Process) cmdLine() []string {
125124
return strings.Split(p.CmdLine, "\x00")
126125
}
126+
127+
func isBenignError(err error) bool {
128+
var errno syscall.Errno
129+
if !xerrors.As(err, &errno) {
130+
return false
131+
}
132+
133+
return errno == syscall.ESRCH || errno == syscall.EPERM || xerrors.Is(err, os.ErrNotExist)
134+
}

agent/metrics.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ type agentMetrics struct {
1919
// startupScriptSeconds is the time in seconds that the start script(s)
2020
// took to run. This is reported once per agent.
2121
startupScriptSeconds *prometheus.GaugeVec
22+
currentConnections *prometheus.GaugeVec
2223
}
2324

2425
func newAgentMetrics(registerer prometheus.Registerer) *agentMetrics {
@@ -45,10 +46,19 @@ func newAgentMetrics(registerer prometheus.Registerer) *agentMetrics {
4546
}, []string{"success"})
4647
registerer.MustRegister(startupScriptSeconds)
4748

49+
currentConnections := prometheus.NewGaugeVec(prometheus.GaugeOpts{
50+
Namespace: "coderd",
51+
Subsystem: "agentstats",
52+
Name: "currently_reachable_peers",
53+
Help: "The number of peers (e.g. clients) that are currently reachable over the encrypted network.",
54+
}, []string{"connection_type"})
55+
registerer.MustRegister(currentConnections)
56+
4857
return &agentMetrics{
4958
connectionsTotal: connectionsTotal,
5059
reconnectingPTYErrors: reconnectingPTYErrors,
5160
startupScriptSeconds: startupScriptSeconds,
61+
currentConnections: currentConnections,
5262
}
5363
}
5464

0 commit comments

Comments
 (0)