Skip to content

Commit 07fe74a

Browse files
committed
make metric a normal prom metric exported, rather than a first class
stat
1 parent 5d39495 commit 07fe74a

File tree

12 files changed

+82
-215
lines changed

12 files changed

+82
-215
lines changed

agent/agent.go

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,6 @@ type agent struct {
226226
// metrics are prometheus registered metrics that will be collected and
227227
// labeled in Coder with the agent + workspace.
228228
metrics *agentMetrics
229-
stats agentStats
230229
syscaller agentproc.Syscaller
231230

232231
// modifiedProcs is used for testing process priority management.
@@ -767,15 +766,18 @@ func (a *agent) run(ctx context.Context) error {
767766
a.setLifecycle(ctx, codersdk.WorkspaceAgentLifecycleReady)
768767
}
769768

770-
dur := time.Since(start).Nanoseconds()
769+
dur := time.Since(start).Seconds()
771770
// If something really look 0 ns, just set it to 1 to indicate that it ran.
772771
// Otherwise, 0 looks like the startup script has not run yet. I don't think
773772
// this will ever be 1ns
774773
if dur == 0 {
775774
dur = 1
776775
}
777-
a.stats.startScriptNs.Store(dur)
778-
a.stats.startScriptSuccess.Store(err == nil)
776+
label := "false"
777+
if err == nil {
778+
label = "true"
779+
}
780+
a.metrics.startScriptNs.WithLabelValues(label).Set(float64(dur))
779781
a.scriptRunner.StartCron()
780782
})
781783
if err != nil {
@@ -1203,11 +1205,6 @@ func (a *agent) startReportingConnectionStats(ctx context.Context) {
12031205
stats.TxPackets += int64(counts.TxPackets)
12041206
}
12051207

1206-
// Load the latest startup script stats. These stats are static
1207-
// once the agent has started.
1208-
stats.StartupScriptNs = a.stats.startScriptNs.Load()
1209-
stats.StartupScriptSuccess = a.stats.startScriptSuccess.Load()
1210-
12111208
// The count of active sessions.
12121209
sshStats := a.sshServer.ConnStats()
12131210
stats.SessionCountSSH = sshStats.Sessions

agent/metrics.go

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,30 +7,22 @@ import (
77

88
"github.com/prometheus/client_golang/prometheus"
99
prompb "github.com/prometheus/client_model/go"
10-
"go.uber.org/atomic"
1110
"tailscale.com/util/clientmetric"
1211

1312
"cdr.dev/slog"
1413

1514
"github.com/coder/coder/v2/codersdk/agentsdk"
1615
)
1716

18-
// agentStats unlike agentMetrics, are not prometheus metrics. Prometheus' metrics
19-
// are sent to Coder as generic "metrics" that get labeled and reported for each
20-
// workspace. agentStats are sent to Coder as first-class metrics that Coder decides
21-
// how to aggregate and report.
22-
type agentStats struct {
23-
// startScriptNs is the time in nanoseconds that the start script(s)
24-
// took to run. This is reported once per agent, and is collected into a
25-
// histogram by Coder.
26-
startScriptNs atomic.Int64
27-
// startScriptSuccess should be ignored if startScriptReadyMs is 0.
28-
startScriptSuccess atomic.Bool
29-
}
17+
3018

3119
type agentMetrics struct {
3220
connectionsTotal prometheus.Counter
3321
reconnectingPTYErrors *prometheus.CounterVec
22+
// startScriptNs is the time in nanoseconds that the start script(s)
23+
// took to run. This is reported once per agent, and is collected into a
24+
// histogram by Coder.
25+
startScriptNs *prometheus.GaugeVec
3426
}
3527

3628
func newAgentMetrics(registerer prometheus.Registerer) *agentMetrics {
@@ -49,9 +41,18 @@ func newAgentMetrics(registerer prometheus.Registerer) *agentMetrics {
4941
)
5042
registerer.MustRegister(reconnectingPTYErrors)
5143

44+
startScriptNs := prometheus.NewGaugeVec(prometheus.GaugeOpts{
45+
Namespace: "coderd",
46+
Subsystem: "agentstats",
47+
Name: "startup_script_s",
48+
Help: "Amount of time taken to run the startup script in seconds.",
49+
}, []string{"success"})
50+
registerer.MustRegister(startScriptNs)
51+
5252
return &agentMetrics{
5353
connectionsTotal: connectionsTotal,
5454
reconnectingPTYErrors: reconnectingPTYErrors,
55+
startScriptNs: startScriptNs,
5556
}
5657
}
5758

coderd/batchstats/batcher.go

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -162,8 +162,6 @@ func (b *Batcher) Add(
162162
b.buf.SessionCountReconnectingPTY = append(b.buf.SessionCountReconnectingPTY, st.SessionCountReconnectingPTY)
163163
b.buf.SessionCountSSH = append(b.buf.SessionCountSSH, st.SessionCountSSH)
164164
b.buf.ConnectionMedianLatencyMS = append(b.buf.ConnectionMedianLatencyMS, st.ConnectionMedianLatencyMS)
165-
b.buf.StartupScriptNs = append(b.buf.StartupScriptNs, st.StartupScriptNs)
166-
b.buf.StartupScriptSuccess = append(b.buf.StartupScriptSuccess, st.StartupScriptSuccess)
167165

168166
// If the buffer is over 80% full, signal the flusher to flush immediately.
169167
// We want to trigger flushes early to reduce the likelihood of

coderd/database/dump.sql

Lines changed: 1 addition & 7 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

coderd/database/migrations/000176_agent_startup_script_stats.down.sql

Lines changed: 0 additions & 2 deletions
This file was deleted.

coderd/database/migrations/000176_agent_startup_script_stats.up.sql

Lines changed: 0 additions & 5 deletions
This file was deleted.

coderd/database/models.go

Lines changed: 0 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

coderd/database/queries.sql.go

Lines changed: 20 additions & 46 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

coderd/database/queries/workspaceagentstats.sql

Lines changed: 6 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,7 @@ INSERT INTO
4141
session_count_jetbrains,
4242
session_count_reconnecting_pty,
4343
session_count_ssh,
44-
connection_median_latency_ms,
45-
startup_script_ns,
46-
startup_script_success
44+
connection_median_latency_ms
4745
)
4846
SELECT
4947
unnest(@id :: uuid[]) AS id,
@@ -62,10 +60,7 @@ SELECT
6260
unnest(@session_count_jetbrains :: bigint[]) AS session_count_jetbrains,
6361
unnest(@session_count_reconnecting_pty :: bigint[]) AS session_count_reconnecting_pty,
6462
unnest(@session_count_ssh :: bigint[]) AS session_count_ssh,
65-
unnest(@connection_median_latency_ms :: double precision[]) AS connection_median_latency_ms,
66-
unnest(@startup_script_ns :: bigint[]) AS startup_script_ns,
67-
unnest(@startup_script_success :: bool[]) AS startup_script_success
68-
;
63+
unnest(@connection_median_latency_ms :: double precision[]) AS connection_median_latency_ms;
6964

7065
-- name: GetTemplateDAUs :many
7166
SELECT
@@ -168,10 +163,7 @@ WITH agent_stats AS (
168163
coalesce(SUM(session_count_jetbrains), 0)::bigint AS session_count_jetbrains,
169164
coalesce(SUM(session_count_reconnecting_pty), 0)::bigint AS session_count_reconnecting_pty,
170165
coalesce(SUM(connection_count), 0)::bigint AS connection_count,
171-
coalesce(MAX(connection_median_latency_ms), 0)::float AS connection_median_latency_ms,
172-
-- TODO: Figure this out
173-
coalesce(MAX(startup_script_ns), 0)::float AS startup_script_ns,
174-
coalesce(MAX(startup_script_success), false)::float AS startup_script_success
166+
coalesce(MAX(connection_median_latency_ms), 0)::float AS connection_median_latency_ms
175167
FROM (
176168
SELECT *, ROW_NUMBER() OVER(PARTITION BY agent_id ORDER BY created_at DESC) AS rn
177169
FROM workspace_agent_stats
@@ -182,10 +174,9 @@ WITH agent_stats AS (
182174
GROUP BY a.user_id, a.agent_id, a.workspace_id
183175
)
184176
SELECT
185-
users.username, workspace_agents.name AS agent_name, workspaces.name AS workspace_name,
186-
workspaces.template_id AS template_id, rx_bytes, tx_bytes,
177+
users.username, workspace_agents.name AS agent_name, workspaces.name AS workspace_name, rx_bytes, tx_bytes,
187178
session_count_vscode, session_count_ssh, session_count_jetbrains, session_count_reconnecting_pty,
188-
connection_count, connection_median_latency_ms, startup_script_ns, startup_script_success, templates.name AS template_name
179+
connection_count, connection_median_latency_ms
189180
FROM
190181
agent_stats
191182
JOIN
@@ -203,8 +194,4 @@ ON
203194
JOIN
204195
workspaces
205196
ON
206-
workspaces.id = agent_stats.workspace_id
207-
JOIN
208-
templates
209-
ON
210-
templates.id = workspaces.template_id;
197+
workspaces.id = agent_stats.workspace_id;

0 commit comments

Comments
 (0)