Skip to content

Commit 0f8251b

Browse files
authored
feat(coderd/database/dbpurge): retain most recent agent build logs (coder#14460)
Updates the `DeleteOldWorkspaceAgentLogs` to: - Retain logs for the most recent build regardless of age, - Delete logs for agents that never connected and were created before the cutoff for deleting logs while still retaining the logs most recent build.
1 parent 10c958b commit 0f8251b

File tree

6 files changed

+349
-126
lines changed

6 files changed

+349
-126
lines changed

coderd/database/dbmem/dbmem.go

Lines changed: 81 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1710,19 +1710,90 @@ func (q *FakeQuerier) DeleteOldWorkspaceAgentLogs(_ context.Context, threshold t
17101710
q.mutex.Lock()
17111711
defer q.mutex.Unlock()
17121712

1713-
var validLogs []database.WorkspaceAgentLog
1714-
for _, log := range q.workspaceAgentLogs {
1715-
var toBeDeleted bool
1716-
for _, agent := range q.workspaceAgents {
1717-
if agent.ID == log.AgentID && agent.LastConnectedAt.Valid && agent.LastConnectedAt.Time.Before(threshold) {
1718-
toBeDeleted = true
1719-
break
1720-
}
1713+
/*
1714+
WITH
1715+
latest_builds AS (
1716+
SELECT
1717+
workspace_id, max(build_number) AS max_build_number
1718+
FROM
1719+
workspace_builds
1720+
GROUP BY
1721+
workspace_id
1722+
),
1723+
*/
1724+
latestBuilds := make(map[uuid.UUID]int32)
1725+
for _, wb := range q.workspaceBuilds {
1726+
if lastBuildNumber, found := latestBuilds[wb.WorkspaceID]; found && lastBuildNumber > wb.BuildNumber {
1727+
continue
17211728
}
1729+
// not found or newer build number
1730+
latestBuilds[wb.WorkspaceID] = wb.BuildNumber
1731+
}
17221732

1723-
if !toBeDeleted {
1724-
validLogs = append(validLogs, log)
1733+
/*
1734+
old_agents AS (
1735+
SELECT
1736+
wa.id
1737+
FROM
1738+
workspace_agents AS wa
1739+
JOIN
1740+
workspace_resources AS wr
1741+
ON
1742+
wa.resource_id = wr.id
1743+
JOIN
1744+
workspace_builds AS wb
1745+
ON
1746+
wb.job_id = wr.job_id
1747+
LEFT JOIN
1748+
latest_builds
1749+
ON
1750+
latest_builds.workspace_id = wb.workspace_id
1751+
AND
1752+
latest_builds.max_build_number = wb.build_number
1753+
WHERE
1754+
-- Filter out the latest builds for each workspace.
1755+
latest_builds.workspace_id IS NULL
1756+
AND CASE
1757+
-- If the last time the agent connected was before @threshold
1758+
WHEN wa.last_connected_at IS NOT NULL THEN
1759+
wa.last_connected_at < @threshold :: timestamptz
1760+
-- The agent never connected, and was created before @threshold
1761+
ELSE wa.created_at < @threshold :: timestamptz
1762+
END
1763+
)
1764+
*/
1765+
oldAgents := make(map[uuid.UUID]struct{})
1766+
for _, wa := range q.workspaceAgents {
1767+
for _, wr := range q.workspaceResources {
1768+
if wr.ID != wa.ResourceID {
1769+
continue
1770+
}
1771+
for _, wb := range q.workspaceBuilds {
1772+
if wb.JobID != wr.JobID {
1773+
continue
1774+
}
1775+
latestBuildNumber, found := latestBuilds[wb.WorkspaceID]
1776+
if !found {
1777+
panic("workspaceBuilds got modified somehow while q was locked! This is a bug in dbmem!")
1778+
}
1779+
if latestBuildNumber == wb.BuildNumber {
1780+
continue
1781+
}
1782+
if wa.LastConnectedAt.Valid && wa.LastConnectedAt.Time.Before(threshold) || wa.CreatedAt.Before(threshold) {
1783+
oldAgents[wa.ID] = struct{}{}
1784+
}
1785+
}
1786+
}
1787+
}
1788+
/*
1789+
DELETE FROM workspace_agent_logs WHERE agent_id IN (SELECT id FROM old_agents);
1790+
*/
1791+
var validLogs []database.WorkspaceAgentLog
1792+
for _, log := range q.workspaceAgentLogs {
1793+
if _, found := oldAgents[log.AgentID]; found {
1794+
continue
17251795
}
1796+
validLogs = append(validLogs, log)
17261797
}
17271798
q.workspaceAgentLogs = validLogs
17281799
return nil

coderd/database/dbpurge/dbpurge.go

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111

1212
"github.com/coder/coder/v2/coderd/database"
1313
"github.com/coder/coder/v2/coderd/database/dbauthz"
14+
"github.com/coder/coder/v2/coderd/database/dbtime"
1415
"github.com/coder/quartz"
1516
)
1617

@@ -30,7 +31,8 @@ func New(ctx context.Context, logger slog.Logger, db database.Store, clk quartz.
3031
//nolint:gocritic // The system purges old db records without user input.
3132
ctx = dbauthz.AsSystemRestricted(ctx)
3233

33-
ticker := clk.NewTicker(time.Nanosecond)
34+
// Start the ticker with the initial delay.
35+
ticker := clk.NewTicker(delay)
3436
doTick := func(start time.Time) {
3537
defer ticker.Reset(delay)
3638
// Start a transaction to grab advisory lock, we don't want to run
@@ -47,7 +49,8 @@ func New(ctx context.Context, logger slog.Logger, db database.Store, clk quartz.
4749
return nil
4850
}
4951

50-
if err := tx.DeleteOldWorkspaceAgentLogs(ctx, start.Add(-maxAgentLogAge)); err != nil {
52+
deleteOldWorkspaceAgentLogsBefore := start.Add(-maxAgentLogAge)
53+
if err := tx.DeleteOldWorkspaceAgentLogs(ctx, deleteOldWorkspaceAgentLogsBefore); err != nil {
5154
return xerrors.Errorf("failed to delete old workspace agent logs: %w", err)
5255
}
5356
if err := tx.DeleteOldWorkspaceAgentStats(ctx); err != nil {
@@ -72,13 +75,15 @@ func New(ctx context.Context, logger slog.Logger, db database.Store, clk quartz.
7275
go func() {
7376
defer close(closed)
7477
defer ticker.Stop()
78+
// Force an initial tick.
79+
doTick(dbtime.Time(clk.Now()).UTC())
7580
for {
7681
select {
7782
case <-ctx.Done():
7883
return
7984
case tick := <-ticker.C:
8085
ticker.Stop()
81-
doTick(tick)
86+
doTick(dbtime.Time(tick).UTC())
8287
}
8388
}
8489
}()

0 commit comments

Comments
 (0)