Skip to content

feat(coderd): add coder_app usage stats #9001

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 28 commits into from
Aug 16, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
befb4dc
feat(coderd): track `coder_app` usage
mafredri Aug 9, 2023
0bb672a
amend pr comments
mafredri Aug 10, 2023
1171a9d
refactor stats with rollups, add tests
mafredri Aug 10, 2023
858264a
add close
mafredri Aug 14, 2023
e8969d3
fix wsproxy endpoint auth todo
mafredri Aug 14, 2023
7dc4ff2
add backlog for re-reporting in case of failure
mafredri Aug 14, 2023
0561f8d
update dbauthz
mafredri Aug 14, 2023
59d69ac
use dbauthz.AsSystemRestricted for collector flush
mafredri Aug 14, 2023
97ef37b
add stats collection test to apptest
mafredri Aug 14, 2023
fda79c6
Merge branch 'main' into mafredri/feat-add-app-usage-to-template-insi…
mafredri Aug 14, 2023
8926dd0
fix migrations
mafredri Aug 14, 2023
1705138
update plumbing to fix wsproxy tests
mafredri Aug 14, 2023
d709e68
test the stat output in apptest
mafredri Aug 14, 2023
6ec178b
fix issues
mafredri Aug 14, 2023
7db34ba
fix id in dbfake
mafredri Aug 14, 2023
ca83430
fix gen
mafredri Aug 14, 2023
80b7ff3
Merge branch 'main' into mafredri/feat-add-app-usage-to-template-insi…
mafredri Aug 15, 2023
8039a4d
add fixture
mafredri Aug 15, 2023
7e2ec9f
use 15s timeout and timer reset
mafredri Aug 16, 2023
70f6249
pass now to rollup
mafredri Aug 16, 2023
5615e27
track last insert id in fakedb
mafredri Aug 16, 2023
9418abd
fix migration indentation
mafredri Aug 16, 2023
a032f8a
remove sqlc type alias
mafredri Aug 16, 2023
94563ef
defer collect stats
mafredri Aug 16, 2023
e27e905
remove stale comment
mafredri Aug 16, 2023
be67aef
preallocate batch slices
mafredri Aug 16, 2023
37d06e3
Merge branch 'main' into mafredri/feat-add-app-usage-to-template-insi…
mafredri Aug 16, 2023
c9272c5
lower log level to debug for start/stop
mafredri Aug 16, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
add backlog for re-reporting in case of failure
  • Loading branch information
mafredri committed Aug 14, 2023
commit 7dc4ff226baffcd2f5bbfc89a98013df7db88a18
4 changes: 4 additions & 0 deletions coderd/database/queries.sql.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 5 additions & 1 deletion coderd/database/queries/workspaceappstats.sql
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,8 @@ DO
WHERE
workspace_app_stats.user_id = EXCLUDED.user_id
AND workspace_app_stats.agent_id = EXCLUDED.agent_id
AND workspace_app_stats.session_id = EXCLUDED.session_id;
AND workspace_app_stats.session_id = EXCLUDED.session_id
-- Since stats are updated in place as time progresses, we only
-- want to update this row if it's fresh.
AND workspace_app_stats.session_ended_at <= EXCLUDED.session_ended_at
AND workspace_app_stats.requests <= EXCLUDED.requests;
29 changes: 26 additions & 3 deletions coderd/workspaceapps/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ type StatsCollector struct {
mu sync.Mutex // Protects following.
statsBySessionID map[uuid.UUID]*StatsReport // Track unique sessions.
groupedStats map[statsGroupKey][]*StatsReport // Rolled up stats for sessions in close proximity.
backlog []StatsReport // Stats that have not been reported yet (due to error).
}

type StatsCollectorOptions struct {
Expand Down Expand Up @@ -242,7 +243,7 @@ func (sc *StatsCollector) rollup() []StatsReport {
rolledUp: true,
}
}

rollupChanged := false
newGroup := []*StatsReport{rolledUp} // Must be first in slice for future iterations (see group[0] above).
for _, stat := range group {
if !stat.SessionEndedAt.IsZero() && stat.SessionEndedAt.Sub(stat.SessionStartedAt) <= sc.opts.RollupWindow {
Expand All @@ -251,6 +252,7 @@ func (sc *StatsCollector) rollup() []StatsReport {
rolledUp.SessionID = stat.SessionID // Borrow the first session ID, useful in tests.
}
rolledUp.Requests += stat.Requests
rollupChanged = true
continue
}
if stat.SessionEndedAt.IsZero() && sc.opts.Now().Sub(stat.SessionStartedAt) <= sc.opts.RollupWindow {
Expand All @@ -273,7 +275,7 @@ func (sc *StatsCollector) rollup() []StatsReport {
newGroup = append(newGroup, stat) // Keep it for future updates.
}
}
if rolledUp.Requests > 0 {
if rollupChanged {
report = append(report, *rolledUp)
}

Expand All @@ -300,12 +302,33 @@ func (sc *StatsCollector) flush(ctx context.Context) (err error) {
}
}()

// We keep the backlog as a simple slice so that we don't need to
// attempt to merge it with the stats we're about to report. This
// is because the rollup is a one-way operation and the backlog may
// contain stats that are still in the statsBySessionID map and will
// be reported again in the future. It is possible to merge the
// backlog and the stats we're about to report, but it's not worth
// the complexity.
if len(sc.backlog) > 0 {
err = sc.opts.Reporter.Report(ctx, sc.backlog)
if err != nil {
return xerrors.Errorf("report workspace app stats from backlog failed: %w", err)
}
sc.backlog = nil
}

stats := sc.rollup()
if len(stats) == 0 {
return nil
}

return sc.opts.Reporter.Report(ctx, stats)
err = sc.opts.Reporter.Report(ctx, stats)
if err != nil {
sc.backlog = stats
return xerrors.Errorf("report workspace app stats failed: %w", err)
}

return nil
}

func (sc *StatsCollector) Close() error {
Expand Down
82 changes: 79 additions & 3 deletions coderd/workspaceapps/stats_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,18 @@ import (
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"golang.org/x/exp/slices"
"golang.org/x/xerrors"

"github.com/coder/coder/coderd/database"
"github.com/coder/coder/coderd/workspaceapps"
"github.com/coder/coder/testutil"
)

type fakeReporter struct {
mu sync.Mutex
s []workspaceapps.StatsReport
mu sync.Mutex
s []workspaceapps.StatsReport
err error
errN int
}

func (r *fakeReporter) stats() []workspaceapps.StatsReport {
Expand All @@ -28,8 +31,25 @@ func (r *fakeReporter) stats() []workspaceapps.StatsReport {
return r.s
}

func (r *fakeReporter) errors() int {
r.mu.Lock()
defer r.mu.Unlock()
return r.errN
}

func (r *fakeReporter) setError(err error) {
r.mu.Lock()
defer r.mu.Unlock()
r.err = err
}

func (r *fakeReporter) Report(_ context.Context, stats []workspaceapps.StatsReport) error {
r.mu.Lock()
if r.err != nil {
r.errN++
r.mu.Unlock()
return r.err
}
r.s = append(r.s, stats...)
r.mu.Unlock()
return nil
Expand Down Expand Up @@ -296,7 +316,6 @@ func TestStatsCollector(t *testing.T) {
var gotStats []workspaceapps.StatsReport
require.Eventually(t, func() bool {
gotStats = reporter.stats()
t.Logf("len(reporter.stats()) = %d, len(tt.want) = %d", len(gotStats), len(tt.want))
return len(gotStats) == len(tt.want)
}, testutil.WaitMedium, testutil.IntervalFast)

Expand Down Expand Up @@ -329,6 +348,63 @@ func TestStatsCollector(t *testing.T) {
}
}

func TestStatsCollector_backlog(t *testing.T) {
t.Parallel()

rollupWindow := time.Minute
flush := make(chan chan<- struct{}, 1)

start := database.Now().Truncate(time.Minute).UTC()
var now atomic.Pointer[time.Time]
now.Store(&start)

reporter := &fakeReporter{}
collector := workspaceapps.NewStatsCollector(workspaceapps.StatsCollectorOptions{
Reporter: reporter,
ReportInterval: time.Hour,
RollupWindow: rollupWindow,

Flush: flush,
Now: func() time.Time { return *now.Load() },
})

reporter.setError(xerrors.New("some error"))

// The first collected stat is "rolled up" and moved into the
// backlog during the first flush. On the second flush nothing is
// rolled up due to being unable to report the backlog.
for i := 0; i < 2; i++ {
collector.Collect(workspaceapps.StatsReport{
SessionID: uuid.New(),
SessionStartedAt: start,
SessionEndedAt: start.Add(10 * time.Second),
Requests: 1,
})
start = start.Add(time.Minute)
now.Store(&start)

flushDone := make(chan struct{}, 1)
flush <- flushDone
<-flushDone
}

// Flush was performed 2 times, 2 reports should have failed.
wantErrors := 2
assert.Equal(t, wantErrors, reporter.errors())
assert.Empty(t, reporter.stats())

reporter.setError(nil)

// Flush again, this time the backlog should be reported in addition
// to the second collected stat being rolled up and reported.
flushDone := make(chan struct{}, 1)
flush <- flushDone
<-flushDone

assert.Equal(t, wantErrors, reporter.errors())
assert.Len(t, reporter.stats(), 2)
}

func TestStatsCollector_Close(t *testing.T) {
t.Parallel()

Expand Down