Skip to content

Commit 5171f71

Browse files
committed
fix: stop incrementing on empty agent stats
1 parent 3e2717d commit 5171f71

File tree

6 files changed

+52
-69
lines changed

6 files changed

+52
-69
lines changed

coderd/insights_test.go

+2-4
Original file line numberDiff line numberDiff line change
@@ -700,14 +700,13 @@ func TestTemplateInsights_Golden(t *testing.T) {
700700
connectionCount = 0
701701
}
702702
for createdAt.Before(stat.endedAt) {
703-
err = batcher.Add(createdAt, workspace.agentID, workspace.template.id, workspace.user.(*testUser).sdk.ID, workspace.id, &agentproto.Stats{
703+
batcher.Add(createdAt, workspace.agentID, workspace.template.id, workspace.user.(*testUser).sdk.ID, workspace.id, &agentproto.Stats{
704704
ConnectionCount: connectionCount,
705705
SessionCountVscode: stat.sessionCountVSCode,
706706
SessionCountJetbrains: stat.sessionCountJetBrains,
707707
SessionCountReconnectingPty: stat.sessionCountReconnectingPTY,
708708
SessionCountSsh: stat.sessionCountSSH,
709709
}, false)
710-
require.NoError(t, err, "want no error inserting agent stats")
711710
createdAt = createdAt.Add(30 * time.Second)
712711
}
713712
}
@@ -1599,14 +1598,13 @@ func TestUserActivityInsights_Golden(t *testing.T) {
15991598
connectionCount = 0
16001599
}
16011600
for createdAt.Before(stat.endedAt) {
1602-
err = batcher.Add(createdAt, workspace.agentID, workspace.template.id, workspace.user.(*testUser).sdk.ID, workspace.id, &agentproto.Stats{
1601+
batcher.Add(createdAt, workspace.agentID, workspace.template.id, workspace.user.(*testUser).sdk.ID, workspace.id, &agentproto.Stats{
16031602
ConnectionCount: connectionCount,
16041603
SessionCountVscode: stat.sessionCountVSCode,
16051604
SessionCountJetbrains: stat.sessionCountJetBrains,
16061605
SessionCountReconnectingPty: stat.sessionCountReconnectingPTY,
16071606
SessionCountSsh: stat.sessionCountSSH,
16081607
}, false)
1609-
require.NoError(t, err, "want no error inserting agent stats")
16101608
createdAt = createdAt.Add(30 * time.Second)
16111609
}
16121610
}

coderd/workspacestats/batcher.go

+2-3
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ const (
2525
)
2626

2727
type Batcher interface {
28-
Add(now time.Time, agentID uuid.UUID, templateID uuid.UUID, userID uuid.UUID, workspaceID uuid.UUID, st *agentproto.Stats, usage bool) error
28+
Add(now time.Time, agentID uuid.UUID, templateID uuid.UUID, userID uuid.UUID, workspaceID uuid.UUID, st *agentproto.Stats, usage bool)
2929
}
3030

3131
// DBBatcher holds a buffer of agent stats and periodically flushes them to
@@ -139,7 +139,7 @@ func (b *DBBatcher) Add(
139139
workspaceID uuid.UUID,
140140
st *agentproto.Stats,
141141
usage bool,
142-
) error {
142+
) {
143143
b.mu.Lock()
144144
defer b.mu.Unlock()
145145

@@ -176,7 +176,6 @@ func (b *DBBatcher) Add(
176176
b.flushLever <- struct{}{}
177177
b.flushForced.Store(true)
178178
}
179-
return nil
180179
}
181180

182181
// Run runs the batcher.

coderd/workspacestats/batcher_internal_test.go

+3-3
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ func TestBatchStats(t *testing.T) {
6363
// Given: a single data point is added for workspace
6464
t2 := t1.Add(time.Second)
6565
t.Logf("inserting 1 stat")
66-
require.NoError(t, b.Add(t2.Add(time.Millisecond), deps1.Agent.ID, deps1.User.ID, deps1.Template.ID, deps1.Workspace.ID, randStats(t), false))
66+
b.Add(t2.Add(time.Millisecond), deps1.Agent.ID, deps1.User.ID, deps1.Template.ID, deps1.Workspace.ID, randStats(t), false)
6767

6868
// When: it becomes time to report stats
6969
// Signal a tick and wait for a flush to complete.
@@ -87,9 +87,9 @@ func TestBatchStats(t *testing.T) {
8787
t.Logf("inserting %d stats", defaultBufferSize)
8888
for i := 0; i < defaultBufferSize; i++ {
8989
if i%2 == 0 {
90-
require.NoError(t, b.Add(t3.Add(time.Millisecond), deps1.Agent.ID, deps1.User.ID, deps1.Template.ID, deps1.Workspace.ID, randStats(t), false))
90+
b.Add(t3.Add(time.Millisecond), deps1.Agent.ID, deps1.User.ID, deps1.Template.ID, deps1.Workspace.ID, randStats(t), false)
9191
} else {
92-
require.NoError(t, b.Add(t3.Add(time.Millisecond), deps2.Agent.ID, deps2.User.ID, deps2.Template.ID, deps2.Workspace.ID, randStats(t), false))
92+
b.Add(t3.Add(time.Millisecond), deps2.Agent.ID, deps2.User.ID, deps2.Template.ID, deps2.Workspace.ID, randStats(t), false)
9393
}
9494
}
9595
}()

coderd/workspacestats/reporter.go

+44-56
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ import (
66
"time"
77

88
"github.com/google/uuid"
9-
"golang.org/x/sync/errgroup"
109
"golang.org/x/xerrors"
1110

1211
"cdr.dev/slog"
@@ -119,69 +118,58 @@ func (r *Reporter) ReportAppStats(ctx context.Context, stats []workspaceapps.Sta
119118
}
120119

121120
func (r *Reporter) ReportAgentStats(ctx context.Context, now time.Time, workspace database.Workspace, workspaceAgent database.WorkspaceAgent, templateName string, stats *agentproto.Stats, usage bool) error {
122-
if stats.ConnectionCount > 0 {
123-
var nextAutostart time.Time
124-
if workspace.AutostartSchedule.String != "" {
125-
templateSchedule, err := (*(r.opts.TemplateScheduleStore.Load())).Get(ctx, r.opts.Database, workspace.TemplateID)
126-
// If the template schedule fails to load, just default to bumping
127-
// without the next transition and log it.
128-
if err != nil {
129-
r.opts.Logger.Error(ctx, "failed to load template schedule bumping activity, defaulting to bumping by 60min",
130-
slog.F("workspace_id", workspace.ID),
131-
slog.F("template_id", workspace.TemplateID),
132-
slog.Error(err),
133-
)
134-
} else {
135-
next, allowed := schedule.NextAutostart(now, workspace.AutostartSchedule.String, templateSchedule)
136-
if allowed {
137-
nextAutostart = next
138-
}
139-
}
140-
}
141-
ActivityBumpWorkspace(ctx, r.opts.Logger.Named("activity_bump"), r.opts.Database, workspace.ID, nextAutostart)
142-
}
121+
// update agent stats
122+
r.opts.StatsBatcher.Add(now, workspaceAgent.ID, workspace.TemplateID, workspace.OwnerID, workspace.ID, stats, usage)
143123

144-
var errGroup errgroup.Group
145-
errGroup.Go(func() error {
146-
err := r.opts.StatsBatcher.Add(now, workspaceAgent.ID, workspace.TemplateID, workspace.OwnerID, workspace.ID, stats, usage)
147-
if err != nil {
148-
r.opts.Logger.Error(ctx, "add agent stats to batcher", slog.Error(err))
149-
return xerrors.Errorf("insert workspace agent stats batch: %w", err)
150-
}
151-
return nil
152-
})
153-
errGroup.Go(func() error {
154-
err := r.opts.Database.UpdateWorkspaceLastUsedAt(ctx, database.UpdateWorkspaceLastUsedAtParams{
155-
ID: workspace.ID,
156-
LastUsedAt: now,
157-
})
124+
// update prometheus metrics
125+
if r.opts.UpdateAgentMetricsFn != nil {
126+
user, err := r.opts.Database.GetUserByID(ctx, workspace.OwnerID)
158127
if err != nil {
159-
return xerrors.Errorf("update workspace LastUsedAt: %w", err)
128+
return xerrors.Errorf("get user: %w", err)
160129
}
130+
131+
r.opts.UpdateAgentMetricsFn(ctx, prometheusmetrics.AgentMetricLabels{
132+
Username: user.Username,
133+
WorkspaceName: workspace.Name,
134+
AgentName: workspaceAgent.Name,
135+
TemplateName: templateName,
136+
}, stats.Metrics)
161137
return nil
162-
})
163-
if r.opts.UpdateAgentMetricsFn != nil {
164-
errGroup.Go(func() error {
165-
user, err := r.opts.Database.GetUserByID(ctx, workspace.OwnerID)
166-
if err != nil {
167-
return xerrors.Errorf("get user: %w", err)
168-
}
138+
}
169139

170-
r.opts.UpdateAgentMetricsFn(ctx, prometheusmetrics.AgentMetricLabels{
171-
Username: user.Username,
172-
WorkspaceName: workspace.Name,
173-
AgentName: workspaceAgent.Name,
174-
TemplateName: templateName,
175-
}, stats.Metrics)
176-
return nil
177-
})
140+
// if no active sessions we do not bump activity
141+
if stats.SessionCountJetbrains == 0 && stats.SessionCountReconnectingPty == 0 && stats.SessionCountSsh == 0 && stats.SessionCountVscode == 0 {
142+
return nil
178143
}
179-
err := errGroup.Wait()
180-
if err != nil {
181-
return xerrors.Errorf("update stats in database: %w", err)
144+
145+
// check next autostart
146+
var nextAutostart time.Time
147+
if workspace.AutostartSchedule.String != "" {
148+
templateSchedule, err := (*(r.opts.TemplateScheduleStore.Load())).Get(ctx, r.opts.Database, workspace.TemplateID)
149+
// If the template schedule fails to load, just default to bumping
150+
// without the next transition and log it.
151+
if err != nil {
152+
r.opts.Logger.Error(ctx, "failed to load template schedule bumping activity, defaulting to bumping by 60min",
153+
slog.F("workspace_id", workspace.ID),
154+
slog.F("template_id", workspace.TemplateID),
155+
slog.Error(err),
156+
)
157+
} else {
158+
next, allowed := schedule.NextAutostart(now, workspace.AutostartSchedule.String, templateSchedule)
159+
if allowed {
160+
nextAutostart = next
161+
}
162+
}
182163
}
183164

184-
err = r.opts.Pubsub.Publish(codersdk.WorkspaceNotifyChannel(workspace.ID), []byte{})
165+
// bump workspace activity
166+
ActivityBumpWorkspace(ctx, r.opts.Logger.Named("activity_bump"), r.opts.Database, workspace.ID, nextAutostart)
167+
168+
// bump workspace last_used_at
169+
r.opts.UsageTracker.Add(workspace.ID)
170+
171+
// notify workspace update
172+
err := r.opts.Pubsub.Publish(codersdk.WorkspaceNotifyChannel(workspace.ID), []byte{})
185173
if err != nil {
186174
r.opts.Logger.Warn(ctx, "failed to publish workspace agent stats",
187175
slog.F("workspace_id", workspace.ID), slog.Error(err))

coderd/workspacestats/tracker.go

-1
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,6 @@ func (tr *UsageTracker) flush(now time.Time) {
130130
authCtx := dbauthz.AsSystemRestricted(ctx)
131131
tr.flushLock.Lock()
132132
defer tr.flushLock.Unlock()
133-
// nolint:gocritic // (#13146) Will be moved soon as part of refactor.
134133
if err := tr.s.BatchUpdateWorkspaceLastUsedAt(authCtx, database.BatchUpdateWorkspaceLastUsedAtParams{
135134
LastUsedAt: now,
136135
IDs: ids,

coderd/workspacestats/workspacestatstest/batcher.go

+1-2
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ type StatsBatcher struct {
2525

2626
var _ workspacestats.Batcher = &StatsBatcher{}
2727

28-
func (b *StatsBatcher) Add(now time.Time, agentID uuid.UUID, templateID uuid.UUID, userID uuid.UUID, workspaceID uuid.UUID, st *agentproto.Stats, usage bool) error {
28+
func (b *StatsBatcher) Add(now time.Time, agentID uuid.UUID, templateID uuid.UUID, userID uuid.UUID, workspaceID uuid.UUID, st *agentproto.Stats, usage bool) {
2929
b.Mu.Lock()
3030
defer b.Mu.Unlock()
3131
b.Called++
@@ -36,5 +36,4 @@ func (b *StatsBatcher) Add(now time.Time, agentID uuid.UUID, templateID uuid.UUI
3636
b.LastWorkspaceID = workspaceID
3737
b.LastStats = st
3838
b.LastUsage = usage
39-
return nil
4039
}

0 commit comments

Comments
 (0)