Skip to content

feat: expose app insights as Prometheus metrics #10346

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
Nov 7, 2023
Merged
7 changes: 7 additions & 0 deletions coderd/database/dbauthz/dbauthz.go
Original file line number Diff line number Diff line change
Expand Up @@ -1265,6 +1265,13 @@ func (q *querier) GetTemplateAppInsights(ctx context.Context, arg database.GetTe
return q.db.GetTemplateAppInsights(ctx, arg)
}

func (q *querier) GetTemplateAppInsightsByTemplate(ctx context.Context, arg database.GetTemplateAppInsightsByTemplateParams) ([]database.GetTemplateAppInsightsByTemplateRow, error) {
if err := q.authorizeContext(ctx, rbac.ActionUpdate, rbac.ResourceTemplate.All()); err != nil {
return nil, err
}
return q.db.GetTemplateAppInsightsByTemplate(ctx, arg)
}

// Only used by metrics cache.
func (q *querier) GetTemplateAverageBuildTime(ctx context.Context, arg database.GetTemplateAverageBuildTimeParams) (database.GetTemplateAverageBuildTimeRow, error) {
if err := q.authorizeContext(ctx, rbac.ActionRead, rbac.ResourceSystem); err != nil {
Expand Down
100 changes: 100 additions & 0 deletions coderd/database/dbmem/dbmem.go
Original file line number Diff line number Diff line change
Expand Up @@ -2365,6 +2365,106 @@ func (q *FakeQuerier) GetTemplateAppInsights(ctx context.Context, arg database.G
return rows, nil
}

func (q *FakeQuerier) GetTemplateAppInsightsByTemplate(ctx context.Context, arg database.GetTemplateAppInsightsByTemplateParams) ([]database.GetTemplateAppInsightsByTemplateRow, error) {
err := validateDatabaseType(arg)
if err != nil {
return nil, err
}

q.mutex.RLock()
defer q.mutex.RUnlock()

type uniqueKey struct {
TemplateID uuid.UUID
DisplayName string
Slug string
}

// map (TemplateID + DisplayName + Slug) x time.Time x UserID x <usage>
usageByTemplateAppUser := map[uniqueKey]map[time.Time]map[uuid.UUID]int64{}

// Review agent stats in terms of usage
for _, s := range q.workspaceAppStats {
// (was.session_started_at >= ts.from_ AND was.session_started_at < ts.to_)
// OR (was.session_ended_at > ts.from_ AND was.session_ended_at < ts.to_)
// OR (was.session_started_at < ts.from_ AND was.session_ended_at >= ts.to_)
if !(((s.SessionStartedAt.After(arg.StartTime) || s.SessionStartedAt.Equal(arg.StartTime)) && s.SessionStartedAt.Before(arg.EndTime)) ||
(s.SessionEndedAt.After(arg.StartTime) && s.SessionEndedAt.Before(arg.EndTime)) ||
(s.SessionStartedAt.Before(arg.StartTime) && (s.SessionEndedAt.After(arg.EndTime) || s.SessionEndedAt.Equal(arg.EndTime)))) {
continue
}

w, err := q.getWorkspaceByIDNoLock(ctx, s.WorkspaceID)
if err != nil {
return nil, err
}

app, _ := q.getWorkspaceAppByAgentIDAndSlugNoLock(ctx, database.GetWorkspaceAppByAgentIDAndSlugParams{
AgentID: s.AgentID,
Slug: s.SlugOrPort,
})

key := uniqueKey{
TemplateID: w.TemplateID,
DisplayName: app.DisplayName,
Slug: app.Slug,
}

t := s.SessionStartedAt.Truncate(time.Minute)
if t.Before(arg.StartTime) {
t = arg.StartTime
}
for t.Before(s.SessionEndedAt) && t.Before(arg.EndTime) {
if _, ok := usageByTemplateAppUser[key]; !ok {
usageByTemplateAppUser[key] = map[time.Time]map[uuid.UUID]int64{}
}
if _, ok := usageByTemplateAppUser[key][t]; !ok {
usageByTemplateAppUser[key][t] = map[uuid.UUID]int64{}
}
if _, ok := usageByTemplateAppUser[key][t][s.UserID]; !ok {
usageByTemplateAppUser[key][t][s.UserID] = 60 // 1 minute
}
t = t.Add(1 * time.Minute)
}
}

// Sort usage data
usageKeys := make([]uniqueKey, len(usageByTemplateAppUser))
var i int
for key := range usageByTemplateAppUser {
usageKeys[i] = key
i++
}

slices.SortFunc(usageKeys, func(a, b uniqueKey) int {
if a.TemplateID != b.TemplateID {
return slice.Ascending(a.TemplateID.String(), b.TemplateID.String())
}
if a.DisplayName != b.DisplayName {
return slice.Ascending(a.DisplayName, b.DisplayName)
}
return slice.Ascending(a.Slug, b.Slug)
})

// Build result
var result []database.GetTemplateAppInsightsByTemplateRow
for _, usageKey := range usageKeys {
r := database.GetTemplateAppInsightsByTemplateRow{
TemplateID: usageKey.TemplateID,
DisplayName: sql.NullString{String: usageKey.DisplayName, Valid: true},
SlugOrPort: usageKey.Slug,
}
for _, mUserUsage := range usageByTemplateAppUser[usageKey] {
r.ActiveUsers += int64(len(mUserUsage))
for _, usage := range mUserUsage {
r.UsageSeconds += usage
}
}
result = append(result, r)
}
return result, nil
}

func (q *FakeQuerier) GetTemplateAverageBuildTime(ctx context.Context, arg database.GetTemplateAverageBuildTimeParams) (database.GetTemplateAverageBuildTimeRow, error) {
if err := validateDatabaseType(arg); err != nil {
return database.GetTemplateAverageBuildTimeRow{}, err
Expand Down
7 changes: 7 additions & 0 deletions coderd/database/dbmetrics/dbmetrics.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 15 additions & 0 deletions coderd/database/dbmock/dbmock.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions coderd/database/querier.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

90 changes: 90 additions & 0 deletions coderd/database/queries.sql.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

47 changes: 47 additions & 0 deletions coderd/database/queries/insights.sql
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,53 @@ SELECT
FROM app_stats_by_user_and_agent
GROUP BY access_method, slug_or_port, display_name, icon, is_app;

-- name: GetTemplateAppInsightsByTemplate :many
WITH app_stats_by_user_and_agent AS (
SELECT
s.start_time,
60 as seconds,
w.template_id,
was.user_id,
was.agent_id,
was.slug_or_port,
wa.display_name,
(wa.slug IS NOT NULL)::boolean AS is_app
FROM workspace_app_stats was
JOIN workspaces w ON (
w.id = was.workspace_id
)
-- We do a left join here because we want to include user IDs that have used
-- e.g. ports when counting active users.
LEFT JOIN workspace_apps wa ON (
wa.agent_id = was.agent_id
AND wa.slug = was.slug_or_port
)
-- This table contains both 1 minute entries and >1 minute entries,
-- to calculate this with our uniqueness constraints, we generate series
-- for the longer intervals.
CROSS JOIN LATERAL generate_series(
date_trunc('minute', was.session_started_at),
-- Subtract 1 microsecond to avoid creating an extra series.
date_trunc('minute', was.session_ended_at - '1 microsecond'::interval),
'1 minute'::interval
) s(start_time)
WHERE
s.start_time >= @start_time::timestamptz
-- Subtract one minute because the series only contains the start time.
AND s.start_time < (@end_time::timestamptz) - '1 minute'::interval
GROUP BY s.start_time, w.template_id, was.user_id, was.agent_id, was.slug_or_port, wa.display_name, wa.slug
)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it make sense to keep this part as similar as GetTemplateAppInsights as possible, to reduce the maintenance burden/overhead of figuring out if a change should affect both? AFAICT the only change is removal of access_method, but if we drop that there's also other simplifications we could do. Similarly, I don't think we gain much by dropping it.

Copy link
Member Author

@mtojek mtojek Nov 7, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reasons why I decided to "fork" the query are:

  1. Pulling unused columns from the database - I guess we can pull more as these fields do not carry heavy data.
  2. AND CASE WHEN COALESCE(array_length(@template_ids::uuid[], 1), 0) > 0 THEN w.template_id = ANY(@template_ids::uuid[]) ELSE TRUE END in GetTemplateAppInsights will be always false, so I just dropped it.
  3. No need to perform array_agg in the last SELECT operation.

I would rather keep the forked query, but I could add a comment to justify above differences? Unless you are aware of a smart trick which isn't a FUNCTION.

EDIT:

I guess we can improve this in a follow-up 👍


SELECT
template_id,
display_name,
slug_or_port,
COALESCE(COUNT(DISTINCT user_id))::bigint AS active_users,
SUM(seconds) AS usage_seconds
FROM app_stats_by_user_and_agent
WHERE is_app IS TRUE
GROUP BY template_id, display_name, slug_or_port;

-- name: GetTemplateInsightsByInterval :many
-- GetTemplateInsightsByInterval returns all intervals between start and end
-- time, if end time is a partial interval, it will be included in the results and
Expand Down
8 changes: 4 additions & 4 deletions coderd/insights.go
Original file line number Diff line number Diff line change
Expand Up @@ -452,15 +452,15 @@ func convertTemplateInsightsApps(usage database.GetTemplateInsightsRow, appUsage
{
TemplateIDs: usage.TemplateIDs,
Type: codersdk.TemplateAppsTypeBuiltin,
DisplayName: "Visual Studio Code",
DisplayName: codersdk.TemplateBuiltinAppDisplayNameVSCode,
Slug: "vscode",
Icon: "/icon/code.svg",
Seconds: usage.UsageVscodeSeconds,
},
{
TemplateIDs: usage.TemplateIDs,
Type: codersdk.TemplateAppsTypeBuiltin,
DisplayName: "JetBrains",
DisplayName: codersdk.TemplateBuiltinAppDisplayNameJetBrains,
Slug: "jetbrains",
Icon: "/icon/intellij.svg",
Seconds: usage.UsageJetbrainsSeconds,
Expand All @@ -474,15 +474,15 @@ func convertTemplateInsightsApps(usage database.GetTemplateInsightsRow, appUsage
{
TemplateIDs: usage.TemplateIDs,
Type: codersdk.TemplateAppsTypeBuiltin,
DisplayName: "Web Terminal",
DisplayName: codersdk.TemplateBuiltinAppDisplayNameWebTerminal,
Slug: "reconnecting-pty",
Icon: "/icon/terminal.svg",
Seconds: usage.UsageReconnectingPtySeconds,
},
{
TemplateIDs: usage.TemplateIDs,
Type: codersdk.TemplateAppsTypeBuiltin,
DisplayName: "SSH",
DisplayName: codersdk.TemplateBuiltinAppDisplayNameSSH,
Slug: "ssh",
Icon: "/icon/terminal.svg",
Seconds: usage.UsageSshSeconds,
Expand Down
Loading