Skip to content

Commit 98783e3

Browse files
committed
call alt queries from prometheus, telemetry
1 parent a5f0272 commit 98783e3

File tree

11 files changed

+97
-51
lines changed

11 files changed

+97
-51
lines changed

cli/server.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,8 @@ func enablePrometheus(
245245
afterCtx(ctx, closeInsightsMetricsCollector)
246246

247247
if vals.Prometheus.CollectAgentStats {
248-
closeAgentStatsFunc, err := prometheusmetrics.AgentStats(ctx, logger, options.PrometheusRegistry, options.Database, time.Now(), 0, options.DeploymentValues.Prometheus.AggregateAgentStatsBy.Value())
248+
experiments := coderd.ReadExperiments(options.Logger, options.DeploymentValues.Experiments.Value())
249+
closeAgentStatsFunc, err := prometheusmetrics.AgentStats(ctx, logger, options.PrometheusRegistry, options.Database, time.Now(), 0, options.DeploymentValues.Prometheus.AggregateAgentStatsBy.Value(), experiments.Enabled(codersdk.ExperimentWorkspaceUsage))
249250
if err != nil {
250251
return nil, xerrors.Errorf("register agent stats prometheus metric: %w", err)
251252
}

coderd/coderd.go

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -421,8 +421,6 @@ func New(options *Options) *API {
421421
panic(xerrors.Errorf("read site bin failed: %w", err))
422422
}
423423

424-
experiments.Enabled(codersdk.ExperimentWorkspaceUsage)
425-
426424
metricsCache := metricscache.New(
427425
options.Database,
428426
options.Logger.Named("metrics_cache"),

coderd/database/dbauthz/dbauthz_test.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2574,6 +2574,9 @@ func (s *MethodTestSuite) TestSystemFunctions() {
25742574
s.Run("GetDeploymentWorkspaceAgentStats", s.Subtest(func(db database.Store, check *expects) {
25752575
check.Args(time.Time{}).Asserts()
25762576
}))
2577+
s.Run("GetDeploymentWorkspaceAgentUsageStats", s.Subtest(func(db database.Store, check *expects) {
2578+
check.Args(time.Time{}).Asserts()
2579+
}))
25772580
s.Run("GetDeploymentWorkspaceStats", s.Subtest(func(db database.Store, check *expects) {
25782581
check.Args().Asserts()
25792582
}))
@@ -2610,9 +2613,15 @@ func (s *MethodTestSuite) TestSystemFunctions() {
26102613
s.Run("GetWorkspaceAgentStatsAndLabels", s.Subtest(func(db database.Store, check *expects) {
26112614
check.Args(time.Time{}).Asserts()
26122615
}))
2616+
s.Run("GetWorkspaceAgentUsageStatsAndLabels", s.Subtest(func(db database.Store, check *expects) {
2617+
check.Args(time.Time{}).Asserts()
2618+
}))
26132619
s.Run("GetWorkspaceAgentStats", s.Subtest(func(db database.Store, check *expects) {
26142620
check.Args(time.Time{}).Asserts()
26152621
}))
2622+
s.Run("GetWorkspaceAgentUsageStats", s.Subtest(func(db database.Store, check *expects) {
2623+
check.Args(time.Time{}).Asserts()
2624+
}))
26162625
s.Run("GetWorkspaceProxyByHostname", s.Subtest(func(db database.Store, check *expects) {
26172626
p, _ := dbgen.WorkspaceProxy(s.T(), db, database.WorkspaceProxy{
26182627
WildcardHostname: "*.example.com",

coderd/database/dbmem/dbmem.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5822,9 +5822,9 @@ func (q *FakeQuerier) GetWorkspaceAgentUsageStatsAndLabels(_ context.Context, cr
58225822
latestAgentStats[key] = val
58235823
}
58245824
}
5825-
// WHERE usage = true
5825+
// WHERE usage = true AND created_at > now() - '1 minute'::interval
58265826
// GROUP BY user_id, agent_id, workspace_id
5827-
if agentStat.Usage {
5827+
if agentStat.Usage && agentStat.CreatedAt.After(time.Now().Add(-time.Minute)) {
58285828
val, ok := latestAgentStats[key]
58295829
if !ok {
58305830
latestAgentStats[key] = agentStat

coderd/database/querier_test.go

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -255,8 +255,7 @@ func TestGetWorkspaceAgentUsageStatsAndLabels(t *testing.T) {
255255

256256
db, _ := dbtestutil.NewDB(t)
257257
ctx := context.Background()
258-
// Since the queries exclude the current minute
259-
insertTime := dbtime.Now().Add(-time.Minute)
258+
insertTime := dbtime.Now()
260259

261260
// Insert user, agent, template, workspace
262261
user1 := dbgen.User(t, db, database.User{})
@@ -372,7 +371,7 @@ func TestGetWorkspaceAgentUsageStatsAndLabels(t *testing.T) {
372371
WorkspaceName: workspace1.Name,
373372
RxBytes: 3,
374373
TxBytes: 3,
375-
SessionCountVSCode: 3,
374+
SessionCountVSCode: 2,
376375
ConnectionMedianLatencyMS: 1,
377376
})
378377

coderd/database/queries.sql.go

Lines changed: 22 additions & 16 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

coderd/database/queries/workspaceagentstats.sql

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -136,10 +136,10 @@ minute_buckets AS (
136136
SELECT
137137
agent_id,
138138
date_trunc('minute', created_at) AS minute_bucket,
139-
SUM(session_count_vscode) AS session_count_vscode,
140-
SUM(session_count_ssh) AS session_count_ssh,
141-
SUM(session_count_jetbrains) AS session_count_jetbrains,
142-
SUM(session_count_reconnecting_pty) AS session_count_reconnecting_pty
139+
coalesce(SUM(session_count_vscode), 0)::bigint AS session_count_vscode,
140+
coalesce(SUM(session_count_ssh), 0)::bigint AS session_count_ssh,
141+
coalesce(SUM(session_count_jetbrains), 0)::bigint AS session_count_jetbrains,
142+
coalesce(SUM(session_count_reconnecting_pty), 0)::bigint AS session_count_reconnecting_pty
143143
FROM
144144
workspace_agent_stats
145145
WHERE
@@ -166,10 +166,10 @@ latest_buckets AS (
166166
),
167167
latest_agent_stats AS (
168168
SELECT
169-
SUM(session_count_vscode) AS session_count_vscode,
170-
SUM(session_count_ssh) AS session_count_ssh,
171-
SUM(session_count_jetbrains) AS session_count_jetbrains,
172-
SUM(session_count_reconnecting_pty) AS session_count_reconnecting_pty
169+
coalesce(SUM(session_count_vscode), 0)::bigint AS session_count_vscode,
170+
coalesce(SUM(session_count_ssh), 0)::bigint AS session_count_ssh,
171+
coalesce(SUM(session_count_jetbrains), 0)::bigint AS session_count_jetbrains,
172+
coalesce(SUM(session_count_reconnecting_pty), 0)::bigint AS session_count_reconnecting_pty
173173
FROM
174174
latest_buckets
175175
)
@@ -225,10 +225,10 @@ minute_buckets AS (
225225
SELECT
226226
agent_id,
227227
date_trunc('minute', created_at) AS minute_bucket,
228-
SUM(session_count_vscode) AS session_count_vscode,
229-
SUM(session_count_ssh) AS session_count_ssh,
230-
SUM(session_count_jetbrains) AS session_count_jetbrains,
231-
SUM(session_count_reconnecting_pty) AS session_count_reconnecting_pty
228+
coalesce(SUM(session_count_vscode), 0)::bigint AS session_count_vscode,
229+
coalesce(SUM(session_count_ssh), 0)::bigint AS session_count_ssh,
230+
coalesce(SUM(session_count_jetbrains), 0)::bigint AS session_count_jetbrains,
231+
coalesce(SUM(session_count_reconnecting_pty), 0)::bigint AS session_count_reconnecting_pty
232232
FROM
233233
workspace_agent_stats
234234
WHERE
@@ -330,7 +330,9 @@ WITH agent_stats AS (
330330
coalesce(SUM(session_count_reconnecting_pty), 0)::bigint AS session_count_reconnecting_pty,
331331
coalesce(SUM(connection_count), 0)::bigint AS connection_count
332332
FROM workspace_agent_stats
333-
WHERE usage = true
333+
-- We only want the latest stats, but those stats might be
334+
-- spread across multiple rows.
335+
WHERE usage = true AND created_at > now() - '1 minute'::interval
334336
GROUP BY user_id, agent_id, workspace_id
335337
), latest_agent_latencies AS (
336338
SELECT
@@ -341,11 +343,15 @@ WITH agent_stats AS (
341343
)
342344
SELECT
343345
users.username, workspace_agents.name AS agent_name, workspaces.name AS workspace_name, rx_bytes, tx_bytes,
344-
session_count_vscode, session_count_ssh, session_count_jetbrains, session_count_reconnecting_pty,
345-
connection_count, connection_median_latency_ms
346+
coalesce(session_count_vscode, 0)::bigint AS session_count_vscode,
347+
coalesce(session_count_ssh, 0)::bigint AS session_count_ssh,
348+
coalesce(session_count_jetbrains, 0)::bigint AS session_count_jetbrains,
349+
coalesce(session_count_reconnecting_pty, 0)::bigint AS session_count_reconnecting_pty,
350+
coalesce(connection_count, 0)::bigint AS connection_count,
351+
connection_median_latency_ms
346352
FROM
347353
agent_stats
348-
JOIN
354+
LEFT JOIN
349355
latest_agent_stats
350356
ON
351357
agent_stats.agent_id = latest_agent_stats.agent_id

coderd/metricscache/metricscache_test.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ func TestCache_BuildTime(t *testing.T) {
150150
},
151151
},
152152
transition: database.WorkspaceTransitionStop,
153-
}, want{50 * 1000, true},
153+
}, want{30 * 1000, true},
154154
},
155155
{
156156
"three/delete", args{
@@ -300,6 +300,7 @@ func TestCache_DeploymentStats(t *testing.T) {
300300
SessionCountReconnectingPTY: []int64{0},
301301
SessionCountSSH: []int64{0},
302302
ConnectionMedianLatencyMS: []float64{10},
303+
Usage: []bool{false},
303304
})
304305
require.NoError(t, err)
305306

coderd/prometheusmetrics/prometheusmetrics.go

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -388,7 +388,8 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis
388388
}, nil
389389
}
390390

391-
func AgentStats(ctx context.Context, logger slog.Logger, registerer prometheus.Registerer, db database.Store, initialCreateAfter time.Time, duration time.Duration, aggregateByLabels []string) (func(), error) {
391+
// nolint:revive // This will be removed alongside the workspaceusage experiment
392+
func AgentStats(ctx context.Context, logger slog.Logger, registerer prometheus.Registerer, db database.Store, initialCreateAfter time.Time, duration time.Duration, aggregateByLabels []string, usage bool) (func(), error) {
392393
if duration == 0 {
393394
duration = defaultRefreshRate
394395
}
@@ -520,7 +521,20 @@ func AgentStats(ctx context.Context, logger slog.Logger, registerer prometheus.R
520521
timer := prometheus.NewTimer(metricsCollectorAgentStats)
521522

522523
checkpoint := time.Now()
523-
stats, err := db.GetWorkspaceAgentStatsAndLabels(ctx, createdAfter)
524+
var (
525+
stats []database.GetWorkspaceAgentStatsAndLabelsRow
526+
err error
527+
)
528+
if usage {
529+
var agentUsageStats []database.GetWorkspaceAgentUsageStatsAndLabelsRow
530+
agentUsageStats, err = db.GetWorkspaceAgentUsageStatsAndLabels(ctx, createdAfter)
531+
stats = make([]database.GetWorkspaceAgentStatsAndLabelsRow, 0, len(agentUsageStats))
532+
for _, agentUsageStat := range agentUsageStats {
533+
stats = append(stats, database.GetWorkspaceAgentStatsAndLabelsRow(agentUsageStat))
534+
}
535+
} else {
536+
stats, err = db.GetWorkspaceAgentStatsAndLabels(ctx, createdAfter)
537+
}
524538
if err != nil {
525539
logger.Error(ctx, "can't get agent stats", slog.Error(err))
526540
} else {

coderd/prometheusmetrics/prometheusmetrics_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -470,7 +470,7 @@ func TestAgentStats(t *testing.T) {
470470
// and it doesn't depend on the real time.
471471
closeFunc, err := prometheusmetrics.AgentStats(ctx, slogtest.Make(t, &slogtest.Options{
472472
IgnoreErrors: true,
473-
}), registry, db, time.Now().Add(-time.Minute), time.Millisecond, agentmetrics.LabelAll)
473+
}), registry, db, time.Now().Add(-time.Minute), time.Millisecond, agentmetrics.LabelAll, false)
474474
require.NoError(t, err)
475475
t.Cleanup(closeFunc)
476476

coderd/telemetry/telemetry.go

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212
"net/url"
1313
"os"
1414
"runtime"
15+
"slices"
1516
"strings"
1617
"sync"
1718
"time"
@@ -473,13 +474,24 @@ func (r *remoteReporter) createSnapshot() (*Snapshot, error) {
473474
return nil
474475
})
475476
eg.Go(func() error {
476-
stats, err := r.options.Database.GetWorkspaceAgentStats(ctx, createdAfter)
477-
if err != nil {
478-
return xerrors.Errorf("get workspace agent stats: %w", err)
479-
}
480-
snapshot.WorkspaceAgentStats = make([]WorkspaceAgentStat, 0, len(stats))
481-
for _, stat := range stats {
482-
snapshot.WorkspaceAgentStats = append(snapshot.WorkspaceAgentStats, ConvertWorkspaceAgentStat(stat))
477+
if r.options.DeploymentConfig != nil && slices.Contains(r.options.DeploymentConfig.Experiments, string(codersdk.ExperimentWorkspaceUsage)) {
478+
agentStats, err := r.options.Database.GetWorkspaceAgentUsageStats(ctx, createdAfter)
479+
if err != nil {
480+
return xerrors.Errorf("get workspace agent stats: %w", err)
481+
}
482+
snapshot.WorkspaceAgentStats = make([]WorkspaceAgentStat, 0, len(agentStats))
483+
for _, stat := range agentStats {
484+
snapshot.WorkspaceAgentStats = append(snapshot.WorkspaceAgentStats, ConvertWorkspaceAgentStat(database.GetWorkspaceAgentStatsRow(stat)))
485+
}
486+
} else {
487+
agentStats, err := r.options.Database.GetWorkspaceAgentStats(ctx, createdAfter)
488+
if err != nil {
489+
return xerrors.Errorf("get workspace agent stats: %w", err)
490+
}
491+
snapshot.WorkspaceAgentStats = make([]WorkspaceAgentStat, 0, len(agentStats))
492+
for _, stat := range agentStats {
493+
snapshot.WorkspaceAgentStats = append(snapshot.WorkspaceAgentStats, ConvertWorkspaceAgentStat(stat))
494+
}
483495
}
484496
return nil
485497
})

0 commit comments

Comments
 (0)