-
Notifications
You must be signed in to change notification settings - Fork 975
fix: accumulate agentstats until reported and fix insights DAU offset #15832
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
e97f3a9
bd4ae11
53f3275
57fde09
7f7df65
ff236f0
08dea24
3fc95da
02bafec
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,6 +2,7 @@ package agent | |
|
||
import ( | ||
"context" | ||
"maps" | ||
"sync" | ||
"time" | ||
|
||
|
@@ -32,7 +33,7 @@ type statsDest interface { | |
// statsDest (agent API in prod) | ||
type statsReporter struct { | ||
*sync.Cond | ||
networkStats *map[netlogtype.Connection]netlogtype.Counts | ||
networkStats map[netlogtype.Connection]netlogtype.Counts | ||
unreported bool | ||
lastInterval time.Duration | ||
|
||
|
@@ -54,8 +55,18 @@ func (s *statsReporter) callback(_, _ time.Time, virtual, _ map[netlogtype.Conne | |
s.L.Lock() | ||
defer s.L.Unlock() | ||
s.logger.Debug(context.Background(), "got stats callback") | ||
s.networkStats = &virtual | ||
s.unreported = true | ||
// Accumulate stats until they've been reported. | ||
if s.unreported { | ||
if s.networkStats == nil && virtual != nil { | ||
s.networkStats = make(map[netlogtype.Connection]netlogtype.Counts) | ||
} | ||
for k, v := range virtual { | ||
s.networkStats[k] = s.networkStats[k].Add(v) | ||
} | ||
} else { | ||
s.networkStats = maps.Clone(virtual) | ||
s.unreported = true | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Review: If the callback was called multiple times before reporting, we lost data as each update is a snapshot since the last. This can happen if:
I believe the assumption is that the "ConnStatsCallback" reports a realistic count for "now", however, what it actually returns is closer to an additive diff between this and the previous report. Thus, if two callbacks happen in quick succession we're effectively zeroing the actual data. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Great catch! |
||
s.Broadcast() | ||
} | ||
|
||
|
@@ -96,9 +107,8 @@ func (s *statsReporter) reportLoop(ctx context.Context, dest statsDest) error { | |
if ctxDone { | ||
return nil | ||
} | ||
networkStats := *s.networkStats | ||
s.unreported = false | ||
if err = s.reportLocked(ctx, dest, networkStats); err != nil { | ||
if err = s.reportLocked(ctx, dest, s.networkStats); err != nil { | ||
return xerrors.Errorf("report stats: %w", err) | ||
} | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -89,7 +89,7 @@ func (api *API) returnDAUsInternal(rw http.ResponseWriter, r *http.Request, temp | |
} | ||
for _, row := range rows { | ||
resp.Entries = append(resp.Entries, codersdk.DAUEntry{ | ||
Date: row.StartTime.Format(time.DateOnly), | ||
Date: row.StartTime.In(loc).Format(time.DateOnly), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Review: Drive-by fix, the date was off-by-one depending on timezone. |
||
Amount: int(row.ActiveUsers), | ||
}) | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -48,16 +48,26 @@ func TestDeploymentInsights(t *testing.T) { | |
db, ps := dbtestutil.NewDB(t, dbtestutil.WithDumpOnFailure()) | ||
logger := testutil.Logger(t) | ||
rollupEvents := make(chan dbrollup.Event) | ||
statsInterval := 500 * time.Millisecond | ||
batcher, closeBatcher, err := workspacestats.NewBatcher(context.Background(), | ||
workspacestats.BatcherWithLogger(logger.Named("batcher").Leveled(slog.LevelDebug)), | ||
workspacestats.BatcherWithStore(db), | ||
workspacestats.BatcherWithBatchSize(1), | ||
workspacestats.BatcherWithInterval(statsInterval), | ||
dannykopping marked this conversation as resolved.
Show resolved
Hide resolved
|
||
) | ||
require.NoError(t, err) | ||
defer closeBatcher() | ||
client := coderdtest.New(t, &coderdtest.Options{ | ||
Database: db, | ||
Pubsub: ps, | ||
Logger: &logger, | ||
IncludeProvisionerDaemon: true, | ||
AgentStatsRefreshInterval: time.Millisecond * 100, | ||
AgentStatsRefreshInterval: statsInterval, | ||
StatsBatcher: batcher, | ||
DatabaseRolluper: dbrollup.New( | ||
logger.Named("dbrollup").Leveled(slog.LevelDebug), | ||
db, | ||
dbrollup.WithInterval(time.Millisecond*100), | ||
dbrollup.WithInterval(statsInterval/2), | ||
dbrollup.WithEventChannel(rollupEvents), | ||
), | ||
}) | ||
|
@@ -76,7 +86,7 @@ func TestDeploymentInsights(t *testing.T) { | |
workspace := coderdtest.CreateWorkspace(t, client, template.ID) | ||
coderdtest.AwaitWorkspaceBuildJobCompleted(t, client, workspace.LatestBuild.ID) | ||
|
||
ctx := testutil.Context(t, testutil.WaitLong) | ||
ctx := testutil.Context(t, testutil.WaitSuperLong) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Review: In race mode, propagating the agent connection stats can take a while. |
||
|
||
// Pre-check, no permission issues. | ||
daus, err := client.DeploymentDAUs(ctx, codersdk.TimezoneOffsetHour(clientTz)) | ||
|
@@ -108,6 +118,13 @@ func TestDeploymentInsights(t *testing.T) { | |
err = sess.Start("cat") | ||
require.NoError(t, err) | ||
|
||
select { | ||
case <-ctx.Done(): | ||
require.Fail(t, "timed out waiting for initial rollup event") | ||
mafredri marked this conversation as resolved.
Show resolved
Hide resolved
|
||
case ev := <-rollupEvents: | ||
require.True(t, ev.Init, "want init event") | ||
} | ||
|
||
for { | ||
select { | ||
case <-ctx.Done(): | ||
|
@@ -120,6 +137,7 @@ func TestDeploymentInsights(t *testing.T) { | |
if len(daus.Entries) > 0 && daus.Entries[len(daus.Entries)-1].Amount > 0 { | ||
break | ||
} | ||
t.Logf("waiting for deployment daus to update: %+v", daus) | ||
} | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: let's save some allocations.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've never actually benchmarked how much a difference a size hint gives for maps, especially ones that don't have a lot of data. Is there a significant difference?
Your suggestion made me realize this had a better fix 😄.