Skip to content

Commit 3e356f2

Browse files
committed
feat(coderd/database): keep only 1 day of workspace_agent_stats after rollup
1 parent eb01ba5 commit 3e356f2

File tree

6 files changed

+148
-16
lines changed

6 files changed

+148
-16
lines changed

cli/server.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -962,7 +962,7 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd.
962962
defer shutdownConns()
963963

964964
// Ensures that old database entries are cleaned up over time!
965-
purger := dbpurge.New(ctx, logger, options.Database)
965+
purger := dbpurge.New(ctx, logger.Named("dbpurge"), options.Database)
966966
defer purger.Close()
967967

968968
// Updates workspace usage

coderd/database/dbmem/dbmem.go

+37-3
Original file line numberDiff line numberDiff line change
@@ -1496,13 +1496,47 @@ func (q *FakeQuerier) DeleteOldWorkspaceAgentStats(_ context.Context) error {
14961496
q.mutex.Lock()
14971497
defer q.mutex.Unlock()
14981498

1499+
/*
1500+
DELETE FROM
1501+
workspace_agent_stats
1502+
WHERE
1503+
created_at < (
1504+
SELECT
1505+
COALESCE(
1506+
-- When generating initial template usage stats, all the
1507+
-- raw agent stats are needed, after that only ~30 mins
1508+
-- from last rollup is needed. Deployment stats seem to
1509+
-- use between 15 mins and 1 hour of data. We keep a
1510+
-- little bit more (1 day) just in case.
1511+
MAX(start_time) - '1 days'::interval,
1512+
-- Fall back to 6 months ago if there are no template
1513+
-- usage stats so that we don't delete the data before
1514+
-- it's rolled up.
1515+
NOW() - '6 months'::interval
1516+
)
1517+
FROM
1518+
template_usage_stats
1519+
);
1520+
*/
1521+
14991522
now := dbtime.Now()
1500-
sixMonthInterval := 6 * 30 * 24 * time.Hour
1501-
sixMonthsAgo := now.Add(-sixMonthInterval)
1523+
var limit time.Time
1524+
// MAX
1525+
for _, stat := range q.templateUsageStats {
1526+
if stat.StartTime.After(limit) {
1527+
limit = stat.StartTime.AddDate(0, 0, -1)
1528+
}
1529+
}
1530+
// COALESCE
1531+
if limit.IsZero() {
1532+
limit = now.AddDate(0, -6, 0)
1533+
}
15021534

15031535
var validStats []database.WorkspaceAgentStat
15041536
for _, stat := range q.workspaceAgentStats {
1505-
if stat.CreatedAt.Before(sixMonthsAgo) {
1537+
fmt.Println(stat.CreatedAt, limit)
1538+
if stat.CreatedAt.Before(limit) {
1539+
fmt.Println("delete")
15061540
continue
15071541
}
15081542
validStats = append(validStats, stat)

coderd/database/dbpurge/dbpurge.go

-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ const (
2424
// This is for cleaning up old, unused resources from the database that take up space.
2525
func New(ctx context.Context, logger slog.Logger, db database.Store) io.Closer {
2626
closed := make(chan struct{})
27-
logger = logger.Named("dbpurge")
2827

2928
ctx, cancelFunc := context.WithCancel(ctx)
3029
//nolint:gocritic // The system purges old db records without user input.

coderd/database/dbpurge/dbpurge_test.go

+70-9
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,14 @@ import (
1111
"go.uber.org/goleak"
1212
"golang.org/x/exp/slices"
1313

14+
"cdr.dev/slog"
1415
"cdr.dev/slog/sloggers/slogtest"
1516

1617
"github.com/coder/coder/v2/coderd/database"
1718
"github.com/coder/coder/v2/coderd/database/dbgen"
1819
"github.com/coder/coder/v2/coderd/database/dbmem"
1920
"github.com/coder/coder/v2/coderd/database/dbpurge"
21+
"github.com/coder/coder/v2/coderd/database/dbrollup"
2022
"github.com/coder/coder/v2/coderd/database/dbtestutil"
2123
"github.com/coder/coder/v2/coderd/database/dbtime"
2224
"github.com/coder/coder/v2/provisionerd/proto"
@@ -40,27 +42,62 @@ func TestDeleteOldWorkspaceAgentStats(t *testing.T) {
4042
t.Parallel()
4143

4244
db, _ := dbtestutil.NewDB(t)
43-
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true})
45+
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}).Leveled(slog.LevelDebug)
46+
47+
now := dbtime.Now()
48+
49+
defer func() {
50+
if t.Failed() {
51+
t.Logf("Test failed, printing rows...")
52+
ctx := testutil.Context(t, testutil.WaitShort)
53+
wasRows, err := db.GetWorkspaceAgentStats(ctx, now.AddDate(0, -7, 0))
54+
if err == nil {
55+
for _, row := range wasRows {
56+
t.Logf("workspace agent stat: %v", row)
57+
}
58+
}
59+
tusRows, err := db.GetTemplateUsageStats(context.Background(), database.GetTemplateUsageStatsParams{
60+
StartTime: now.AddDate(0, -7, 0),
61+
EndTime: now,
62+
})
63+
if err == nil {
64+
for _, row := range tusRows {
65+
t.Logf("template usage stat: %v", row)
66+
}
67+
}
68+
}
69+
}()
4470

4571
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitShort)
4672
defer cancel()
4773

48-
now := dbtime.Now()
49-
5074
// given
5175
// Let's use RxBytes to identify stat entries.
5276
// Stat inserted 6 months + 1 hour ago, should be deleted.
5377
first := dbgen.WorkspaceAgentStat(t, db, database.WorkspaceAgentStat{
54-
CreatedAt: now.Add(-6*30*24*time.Hour - time.Hour),
78+
CreatedAt: now.AddDate(0, -6, 0).Add(-time.Hour),
79+
ConnectionCount: 1,
5580
ConnectionMedianLatencyMS: 1,
5681
RxBytes: 1111,
82+
SessionCountSSH: 1,
5783
})
5884

59-
// Stat inserted 6 months - 1 hour ago, should not be deleted.
85+
// Stat inserted 6 months - 1 hour ago, should not be deleted before rollup.
6086
second := dbgen.WorkspaceAgentStat(t, db, database.WorkspaceAgentStat{
61-
CreatedAt: now.Add(-5*30*24*time.Hour + time.Hour),
87+
CreatedAt: now.AddDate(0, -6, 0).Add(time.Hour),
88+
ConnectionCount: 1,
6289
ConnectionMedianLatencyMS: 1,
6390
RxBytes: 2222,
91+
SessionCountSSH: 1,
92+
})
93+
94+
// Stat inserted 6 months - 1 day - 2 hour ago, should not be deleted at all.
95+
third := dbgen.WorkspaceAgentStat(t, db, database.WorkspaceAgentStat{
96+
CreatedAt: now.AddDate(0, -6, 0).AddDate(0, 0, 1).Add(2 * time.Hour),
97+
ConnectionCount: 1,
98+
ConnectionMedianLatencyMS: 1,
99+
RxBytes: 3333,
100+
SessionCountSSH: 1,
64101
})
65102

66103
// when
@@ -70,15 +107,39 @@ func TestDeleteOldWorkspaceAgentStats(t *testing.T) {
70107
// then
71108
var stats []database.GetWorkspaceAgentStatsRow
72109
var err error
73-
require.Eventually(t, func() bool {
110+
require.Eventuallyf(t, func() bool {
74111
// Query all stats created not earlier than 7 months ago
75-
stats, err = db.GetWorkspaceAgentStats(ctx, now.Add(-7*30*24*time.Hour))
112+
stats, err = db.GetWorkspaceAgentStats(ctx, now.AddDate(0, -7, 0))
76113
if err != nil {
77114
return false
78115
}
79116
return !containsWorkspaceAgentStat(stats, first) &&
80117
containsWorkspaceAgentStat(stats, second)
81-
}, testutil.WaitShort, testutil.IntervalFast, stats)
118+
}, testutil.WaitShort, testutil.IntervalFast, "it should delete old stats: %v", stats)
119+
120+
// when
121+
events := make(chan dbrollup.Event)
122+
rolluper := dbrollup.New(logger, db, dbrollup.WithEventChannel(events))
123+
defer rolluper.Close()
124+
125+
_, _ = <-events, <-events
126+
127+
// Start a new purger to immediately trigger delete after rollup.
128+
_ = closer.Close()
129+
closer = dbpurge.New(ctx, logger, db)
130+
defer closer.Close()
131+
132+
// then
133+
require.Eventuallyf(t, func() bool {
134+
// Query all stats created not earlier than 7 months ago
135+
stats, err = db.GetWorkspaceAgentStats(ctx, now.AddDate(0, -7, 0))
136+
if err != nil {
137+
return false
138+
}
139+
return !containsWorkspaceAgentStat(stats, first) &&
140+
!containsWorkspaceAgentStat(stats, second) &&
141+
containsWorkspaceAgentStat(stats, third)
142+
}, testutil.WaitShort, testutil.IntervalFast, "it should delete old stats after rollup: %v", stats)
82143
}
83144

84145
func containsWorkspaceAgentStat(stats []database.GetWorkspaceAgentStatsRow, needle database.WorkspaceAgentStat) bool {

coderd/database/queries.sql.go

+20-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

coderd/database/queries/workspaceagentstats.sql

+20-1
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,26 @@ ORDER BY
9090
date ASC;
9191

9292
-- name: DeleteOldWorkspaceAgentStats :exec
93-
DELETE FROM workspace_agent_stats WHERE created_at < NOW() - INTERVAL '180 days';
93+
DELETE FROM
94+
workspace_agent_stats
95+
WHERE
96+
created_at < (
97+
SELECT
98+
COALESCE(
99+
-- When generating initial template usage stats, all the
100+
-- raw agent stats are needed, after that only ~30 mins
101+
-- from last rollup is needed. Deployment stats seem to
102+
-- use between 15 mins and 1 hour of data. We keep a
103+
-- little bit more (1 day) just in case.
104+
MAX(start_time) - '1 days'::interval,
105+
-- Fall back to 6 months ago if there are no template
106+
-- usage stats so that we don't delete the data before
107+
-- it's rolled up.
108+
NOW() - '6 months'::interval
109+
)
110+
FROM
111+
template_usage_stats
112+
);
94113

95114
-- name: GetDeploymentWorkspaceAgentStats :one
96115
WITH agent_stats AS (

0 commit comments

Comments
 (0)