Skip to content

feat(coderd): add times_used to coder_apps in insights API #13292

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
feat(coderd): add times_used to coder_apps in insights API
For now, only applied to `coder_app`s, same logic can be implemented for
VS Code, SSH, etc.

Part of #13099
  • Loading branch information
mafredri committed May 16, 2024
commit 7347b818e8a614de994e960ab98ada38fc77467d
4 changes: 4 additions & 0 deletions coderd/apidoc/docs.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions coderd/apidoc/swagger.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

74 changes: 72 additions & 2 deletions coderd/database/dbmem/dbmem.go
Original file line number Diff line number Diff line change
Expand Up @@ -3149,6 +3149,30 @@ func (q *FakeQuerier) GetTemplateAppInsights(ctx context.Context, arg database.G
GROUP BY
start_time, user_id, slug, display_name, icon
),
-- Analyze the users unique app usage across all templates. Count
-- usage across consecutive intervals as continuous usage.
times_used AS (
SELECT DISTINCT ON (user_id, slug, display_name, icon, uniq)
slug,
display_name,
icon,
-- Turn start_time into a unique identifier that identifies a users
-- continuous app usage. The value of uniq is otherwise garbage.
--
-- Since we're aggregating per user app usage across templates,
-- there can be duplicate start_times. To handle this, we use the
-- dense_rank() function, otherwise row_number() would suffice.
start_time - (
dense_rank() OVER (
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TIL dense_rank()

PARTITION BY
user_id, slug, display_name, icon
ORDER BY
start_time
) * '30 minutes'::interval
) AS uniq
FROM
template_usage_stats_with_apps
),
*/

// Due to query optimizations, this logic is somewhat inverted from
Expand All @@ -3160,12 +3184,19 @@ func (q *FakeQuerier) GetTemplateAppInsights(ctx context.Context, arg database.G
DisplayName string
Icon string
}
type appTimesUsedGroupBy struct {
UserID uuid.UUID
Slug string
DisplayName string
Icon string
}
type appInsightsRow struct {
appInsightsGroupBy
TemplateIDs []uuid.UUID
AppUsageMins int64
}
appInsightRows := make(map[appInsightsGroupBy]appInsightsRow)
appTimesUsedRows := make(map[appTimesUsedGroupBy]map[time.Time]struct{})
// FROM
for _, stat := range q.templateUsageStats {
// WHERE
Expand Down Expand Up @@ -3201,9 +3232,42 @@ func (q *FakeQuerier) GetTemplateAppInsights(ctx context.Context, arg database.G
row.TemplateIDs = append(row.TemplateIDs, stat.TemplateID)
row.AppUsageMins = least(row.AppUsageMins+appUsage, 30)
appInsightRows[key] = row

// Prepare to do times_used calculation, distinct start times.
timesUsedKey := appTimesUsedGroupBy{
UserID: stat.UserID,
Slug: slug,
DisplayName: app.DisplayName,
Icon: app.Icon,
}
if appTimesUsedRows[timesUsedKey] == nil {
appTimesUsedRows[timesUsedKey] = make(map[time.Time]struct{})
}
// This assigns a distinct time, so we don't need to
// dense_rank() later on, we can simply do row_number().
appTimesUsedRows[timesUsedKey][stat.StartTime] = struct{}{}
}
}

appTimesUsedTempRows := make(map[appTimesUsedGroupBy][]time.Time)
for key, times := range appTimesUsedRows {
for t := range times {
appTimesUsedTempRows[key] = append(appTimesUsedTempRows[key], t)
}
}
for _, times := range appTimesUsedTempRows {
slices.SortFunc(times, func(a, b time.Time) int {
return int(a.Sub(b))
})
}
for key, times := range appTimesUsedTempRows {
uniq := make(map[time.Time]struct{})
for i, t := range times {
uniq[t.Add(-(30 * time.Minute * time.Duration(i)))] = struct{}{}
}
appTimesUsedRows[key] = uniq
}

/*
-- Even though we allow identical apps to be aggregated across
-- templates, we still want to be able to report which templates
Expand Down Expand Up @@ -3288,14 +3352,20 @@ func (q *FakeQuerier) GetTemplateAppInsights(ctx context.Context, arg database.G

var rows []database.GetTemplateAppInsightsRow
for key, gr := range groupedRows {
rows = append(rows, database.GetTemplateAppInsightsRow{
row := database.GetTemplateAppInsightsRow{
TemplateIDs: templateRows[key].TemplateIDs,
ActiveUsers: int64(len(uniqueSortedUUIDs(gr.ActiveUserIDs))),
Slug: key.Slug,
DisplayName: key.DisplayName,
Icon: key.Icon,
UsageSeconds: gr.UsageSeconds,
})
}
for tuk, uniq := range appTimesUsedRows {
if key.Slug == tuk.Slug && key.DisplayName == tuk.DisplayName && key.Icon == tuk.Icon {
row.TimesUsed += int64(len(uniq))
}
}
rows = append(rows, row)
}

// NOTE(mafredri): Add sorting if we decide on how to handle PostgreSQL collations.
Expand Down
46 changes: 40 additions & 6 deletions coderd/database/queries.sql.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

44 changes: 38 additions & 6 deletions coderd/database/queries/insights.sql
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ WITH
apps.slug,
apps.display_name,
apps.icon,
tus.app_usage_mins
(tus.app_usage_mins -> apps.slug)::smallint AS usage_mins
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Drive-by optimization to avoid jsonb_each in the following CTE.

FROM
apps
JOIN
Expand All @@ -273,14 +273,36 @@ WITH
display_name,
icon,
-- See motivation in GetTemplateInsights for LEAST(SUM(n), 30).
LEAST(SUM(app_usage.value::smallint), 30) AS usage_mins
LEAST(SUM(usage_mins), 30) AS usage_mins
FROM
template_usage_stats_with_apps, jsonb_each(app_usage_mins) AS app_usage
WHERE
app_usage.key = slug
template_usage_stats_with_apps
GROUP BY
start_time, user_id, slug, display_name, icon
),
-- Analyze the users unique app usage across all templates. Count
-- usage across consecutive intervals as continuous usage.
times_used AS (
SELECT DISTINCT ON (user_id, slug, display_name, icon, uniq)
slug,
display_name,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

QQ: If somebody changes the display_name for the app, will it break the calculation? Probably a question related with other subqueries too.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It'll handle it according to current behavior -- a change in display_name will be treated as a new app.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But it should nit as technically the app is the same app.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It depends, in aggregation across the whole deployment (which we don't show, but is possible), the apps might not be the same at all. For now I would not want to introduce one feature that behaves differently from the rest, we already show multiple entries for apps and parameters if something (like the display name) has changed.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. I am aware if the existing behavior and suggesting that we should change it sometimes in the future to be more robust to cosmetic changes to a coder_app.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@matifali actually, have you recently confirmed this is still a problem? I think this is mainly a problem with parameters. If the app name changes it should only use the most recent name, at least since the rollup refactor.

icon,
-- Turn start_time into a unique identifier that identifies a users
-- continuous app usage. The value of uniq is otherwise garbage.
--
-- Since we're aggregating per user app usage across templates,
-- there can be duplicate start_times. To handle this, we use the
-- dense_rank() function, otherwise row_number() would suffice.
start_time - (
dense_rank() OVER (
PARTITION BY
user_id, slug, display_name, icon
ORDER BY
start_time
) * '30 minutes'::interval
) AS uniq
FROM
template_usage_stats_with_apps
Comment on lines +284 to +304
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What sort of additional load does this place on the DB?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't measure but I don't expect it to be a lot, this is similar in weight (or lighter) as the app_insights CTE and could theoretically be run in parallel (don't know if PG will, though).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I posted the explains, it seems to be parallel and very cheap.

),
-- Even though we allow identical apps to be aggregated across
-- templates, we still want to be able to report which templates
-- the data comes from.
Expand All @@ -302,7 +324,17 @@ SELECT
ai.slug,
ai.display_name,
ai.icon,
(SUM(ai.usage_mins) * 60)::bigint AS usage_seconds
(SUM(ai.usage_mins) * 60)::bigint AS usage_seconds,
COALESCE((
SELECT
COUNT(*)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is just about cardinality of app usage, right?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, we are only interested in rows of this table, not of it's content.

FROM
times_used
WHERE
times_used.slug = ai.slug
AND times_used.display_name = ai.display_name
AND times_used.icon = ai.icon
), 0)::bigint AS times_used
FROM
app_insights AS ai
JOIN
Expand Down
1 change: 1 addition & 0 deletions coderd/insights.go
Original file line number Diff line number Diff line change
Expand Up @@ -543,6 +543,7 @@ func convertTemplateInsightsApps(usage database.GetTemplateInsightsRow, appUsage
Slug: app.Slug,
Icon: app.Icon,
Seconds: app.UsageSeconds,
TimesUsed: app.TimesUsed,
})
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,17 @@
"display_name": "Visual Studio Code",
"slug": "vscode",
"icon": "/icon/code.svg",
"seconds": 3600
"seconds": 3600,
"times_used": 0
},
{
"template_ids": [],
"type": "builtin",
"display_name": "JetBrains",
"slug": "jetbrains",
"icon": "/icon/intellij.svg",
"seconds": 0
"seconds": 0,
"times_used": 0
},
{
"template_ids": [
Expand All @@ -33,7 +35,8 @@
"display_name": "Web Terminal",
"slug": "reconnecting-pty",
"icon": "/icon/terminal.svg",
"seconds": 7200
"seconds": 7200,
"times_used": 0
},
{
"template_ids": [
Expand All @@ -43,15 +46,17 @@
"display_name": "SSH",
"slug": "ssh",
"icon": "/icon/terminal.svg",
"seconds": 10800
"seconds": 10800,
"times_used": 0
},
{
"template_ids": [],
"type": "builtin",
"display_name": "SFTP",
"slug": "sftp",
"icon": "/icon/terminal.svg",
"seconds": 0
"seconds": 0,
"times_used": 0
},
{
"template_ids": [
Expand All @@ -61,7 +66,8 @@
"display_name": "app1",
"slug": "app1",
"icon": "/icon1.png",
"seconds": 25200
"seconds": 25200,
"times_used": 2
}
],
"parameters_usage": []
Expand Down
Loading
Loading