-
Notifications
You must be signed in to change notification settings - Fork 894
fix: stop incrementing activity on empty agent stats #15204
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 8 commits
e1a4f0b
3e2717d
5171f71
61f2b7e
1b9be00
c8bc07e
082a599
12b1ccf
7412e77
0b8e06e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,7 +6,6 @@ import ( | |
"time" | ||
|
||
"github.com/google/uuid" | ||
"golang.org/x/sync/errgroup" | ||
"golang.org/x/xerrors" | ||
|
||
"cdr.dev/slog" | ||
|
@@ -119,69 +118,57 @@ func (r *Reporter) ReportAppStats(ctx context.Context, stats []workspaceapps.Sta | |
} | ||
|
||
func (r *Reporter) ReportAgentStats(ctx context.Context, now time.Time, workspace database.Workspace, workspaceAgent database.WorkspaceAgent, templateName string, stats *agentproto.Stats, usage bool) error { | ||
if stats.ConnectionCount > 0 { | ||
var nextAutostart time.Time | ||
if workspace.AutostartSchedule.String != "" { | ||
templateSchedule, err := (*(r.opts.TemplateScheduleStore.Load())).Get(ctx, r.opts.Database, workspace.TemplateID) | ||
// If the template schedule fails to load, just default to bumping | ||
// without the next transition and log it. | ||
if err != nil { | ||
r.opts.Logger.Error(ctx, "failed to load template schedule bumping activity, defaulting to bumping by 60min", | ||
slog.F("workspace_id", workspace.ID), | ||
slog.F("template_id", workspace.TemplateID), | ||
slog.Error(err), | ||
) | ||
} else { | ||
next, allowed := schedule.NextAutostart(now, workspace.AutostartSchedule.String, templateSchedule) | ||
if allowed { | ||
nextAutostart = next | ||
} | ||
} | ||
} | ||
ActivityBumpWorkspace(ctx, r.opts.Logger.Named("activity_bump"), r.opts.Database, workspace.ID, nextAutostart) | ||
} | ||
// update agent stats | ||
r.opts.StatsBatcher.Add(now, workspaceAgent.ID, workspace.TemplateID, workspace.OwnerID, workspace.ID, stats, usage) | ||
|
||
var errGroup errgroup.Group | ||
errGroup.Go(func() error { | ||
err := r.opts.StatsBatcher.Add(now, workspaceAgent.ID, workspace.TemplateID, workspace.OwnerID, workspace.ID, stats, usage) | ||
// update prometheus metrics | ||
if r.opts.UpdateAgentMetricsFn != nil { | ||
user, err := r.opts.Database.GetUserByID(ctx, workspace.OwnerID) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. thought (non-blocking): It's unfortunate to have to do this just to get the username related to the workspace. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah we just have generally avoided database types that embed other types as a default strategy which leads to cleaner types but unfortunately sometimes more fetching. I think it's OK for now and can always be optimized if found to be a bottleneck later. It's a good goal to get this func as low IO as possible. |
||
if err != nil { | ||
r.opts.Logger.Error(ctx, "add agent stats to batcher", slog.Error(err)) | ||
return xerrors.Errorf("insert workspace agent stats batch: %w", err) | ||
return xerrors.Errorf("get user: %w", err) | ||
} | ||
|
||
r.opts.UpdateAgentMetricsFn(ctx, prometheusmetrics.AgentMetricLabels{ | ||
Username: user.Username, | ||
WorkspaceName: workspace.Name, | ||
AgentName: workspaceAgent.Name, | ||
TemplateName: templateName, | ||
}, stats.Metrics) | ||
} | ||
|
||
// if no active connections we do not bump activity | ||
if stats.ConnectionCount == 0 { | ||
return nil | ||
}) | ||
errGroup.Go(func() error { | ||
err := r.opts.Database.UpdateWorkspaceLastUsedAt(ctx, database.UpdateWorkspaceLastUsedAtParams{ | ||
ID: workspace.ID, | ||
LastUsedAt: now, | ||
}) | ||
} | ||
|
||
// check next autostart | ||
var nextAutostart time.Time | ||
if workspace.AutostartSchedule.String != "" { | ||
templateSchedule, err := (*(r.opts.TemplateScheduleStore.Load())).Get(ctx, r.opts.Database, workspace.TemplateID) | ||
// If the template schedule fails to load, just default to bumping | ||
// without the next transition and log it. | ||
if err != nil { | ||
return xerrors.Errorf("update workspace LastUsedAt: %w", err) | ||
} | ||
return nil | ||
}) | ||
if r.opts.UpdateAgentMetricsFn != nil { | ||
errGroup.Go(func() error { | ||
user, err := r.opts.Database.GetUserByID(ctx, workspace.OwnerID) | ||
if err != nil { | ||
return xerrors.Errorf("get user: %w", err) | ||
r.opts.Logger.Error(ctx, "failed to load template schedule bumping activity, defaulting to bumping by 60min", | ||
slog.F("workspace_id", workspace.ID), | ||
slog.F("template_id", workspace.TemplateID), | ||
slog.Error(err), | ||
) | ||
} else { | ||
next, allowed := schedule.NextAutostart(now, workspace.AutostartSchedule.String, templateSchedule) | ||
if allowed { | ||
nextAutostart = next | ||
} | ||
|
||
r.opts.UpdateAgentMetricsFn(ctx, prometheusmetrics.AgentMetricLabels{ | ||
Username: user.Username, | ||
WorkspaceName: workspace.Name, | ||
AgentName: workspaceAgent.Name, | ||
TemplateName: templateName, | ||
}, stats.Metrics) | ||
return nil | ||
}) | ||
} | ||
err := errGroup.Wait() | ||
if err != nil { | ||
return xerrors.Errorf("update stats in database: %w", err) | ||
} | ||
} | ||
|
||
err = r.opts.Pubsub.Publish(codersdk.WorkspaceNotifyChannel(workspace.ID), []byte{}) | ||
// bump workspace activity | ||
ActivityBumpWorkspace(ctx, r.opts.Logger.Named("activity_bump"), r.opts.Database, workspace.ID, nextAutostart) | ||
|
||
// bump workspace last_used_at | ||
r.opts.UsageTracker.Add(workspace.ID) | ||
|
||
// notify workspace update | ||
err := r.opts.Pubsub.Publish(codersdk.WorkspaceNotifyChannel(workspace.ID), []byte{}) | ||
if err != nil { | ||
r.opts.Logger.Warn(ctx, "failed to publish workspace agent stats", | ||
slog.F("workspace_id", workspace.ID), slog.Error(err)) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
praise: This does make the whole interface more convenient to use 👍