From 6e67f5b70dedff6fe943202e02f6534a4e9cb81a Mon Sep 17 00:00:00 2001 From: Sas Swart Date: Wed, 15 Jan 2025 08:29:17 +0000 Subject: [PATCH 1/5] Group user status counts by interval with support for a timezone offset --- coderd/database/dbauthz/dbauthz_test.go | 2 ++ coderd/database/queries.sql.go | 34 ++++++++++--------------- coderd/database/queries/insights.sql | 25 +++++------------- coderd/insights.go | 3 +++ 4 files changed, 25 insertions(+), 39 deletions(-) diff --git a/coderd/database/dbauthz/dbauthz_test.go b/coderd/database/dbauthz/dbauthz_test.go index 5ad8dca748c3f..b3166f8374d2d 100644 --- a/coderd/database/dbauthz/dbauthz_test.go +++ b/coderd/database/dbauthz/dbauthz_test.go @@ -1712,6 +1712,8 @@ func (s *MethodTestSuite) TestUser() { check.Args(database.GetUserStatusCountsParams{ StartTime: time.Now().Add(-time.Hour * 24 * 30), EndTime: time.Now(), + Interval: int32((time.Hour * 24).Seconds()), + TzOffset: 0, }).Asserts(rbac.ResourceUser, policy.ActionRead) })) } diff --git a/coderd/database/queries.sql.go b/coderd/database/queries.sql.go index 90f9d8d4a165d..a187c6ab0a848 100644 --- a/coderd/database/queries.sql.go +++ b/coderd/database/queries.sql.go @@ -3099,25 +3099,12 @@ WITH -- dates_of_interest defines all points in time that are relevant to the query. -- It includes the start_time, all status changes, all deletions, and the end_time. dates_of_interest AS ( - SELECT $1::timestamptz AS date - - UNION - - SELECT DISTINCT changed_at AS date - FROM user_status_changes - WHERE changed_at > $1::timestamptz - AND changed_at < $2::timestamptz - - UNION - - SELECT DISTINCT deleted_at AS date - FROM user_deleted - WHERE deleted_at > $1::timestamptz - AND deleted_at < $2::timestamptz - - UNION - - SELECT $2::timestamptz AS date + SELECT + (generate_series( + $1::timestamptz, + $2::timestamptz, + ($3::int || ' seconds')::interval + ) + ($4::int || ' seconds')::interval)::timestamptz AS date ), -- latest_status_before_range defines the status of each user before the start_time. -- We do not include users who were deleted before the start_time. We use this to ensure that @@ -3217,6 +3204,8 @@ ORDER BY rscpupd.date type GetUserStatusCountsParams struct { StartTime time.Time `db:"start_time" json:"start_time"` EndTime time.Time `db:"end_time" json:"end_time"` + Interval int32 `db:"interval" json:"interval"` + TzOffset int32 `db:"tz_offset" json:"tz_offset"` } type GetUserStatusCountsRow struct { @@ -3238,7 +3227,12 @@ type GetUserStatusCountsRow struct { // We do not start counting from 0 at the start_time. We check the last status change before the start_time for each user. As such, // the result shows the total number of users in each status on any particular day. func (q *sqlQuerier) GetUserStatusCounts(ctx context.Context, arg GetUserStatusCountsParams) ([]GetUserStatusCountsRow, error) { - rows, err := q.db.QueryContext(ctx, getUserStatusCounts, arg.StartTime, arg.EndTime) + rows, err := q.db.QueryContext(ctx, getUserStatusCounts, + arg.StartTime, + arg.EndTime, + arg.Interval, + arg.TzOffset, + ) if err != nil { return nil, err } diff --git a/coderd/database/queries/insights.sql b/coderd/database/queries/insights.sql index c6e3862deed4f..8e60cc1430cda 100644 --- a/coderd/database/queries/insights.sql +++ b/coderd/database/queries/insights.sql @@ -789,25 +789,12 @@ WITH -- dates_of_interest defines all points in time that are relevant to the query. -- It includes the start_time, all status changes, all deletions, and the end_time. dates_of_interest AS ( - SELECT @start_time::timestamptz AS date - - UNION - - SELECT DISTINCT changed_at AS date - FROM user_status_changes - WHERE changed_at > @start_time::timestamptz - AND changed_at < @end_time::timestamptz - - UNION - - SELECT DISTINCT deleted_at AS date - FROM user_deleted - WHERE deleted_at > @start_time::timestamptz - AND deleted_at < @end_time::timestamptz - - UNION - - SELECT @end_time::timestamptz AS date + SELECT + (generate_series( + @start_time::timestamptz, + @end_time::timestamptz, + (@interval::int || ' seconds')::interval + ) + (@tz_offset::int || ' seconds')::interval)::timestamptz AS date ), -- latest_status_before_range defines the status of each user before the start_time. -- We do not include users who were deleted before the start_time. We use this to ensure that diff --git a/coderd/insights.go b/coderd/insights.go index e4695f50495fb..614cb96d195bf 100644 --- a/coderd/insights.go +++ b/coderd/insights.go @@ -306,6 +306,7 @@ func (api *API) insightsUserStatusCounts(rw http.ResponseWriter, r *http.Request p := httpapi.NewQueryParamParser() vals := r.URL.Query() tzOffset := p.Int(vals, 0, "tz_offset") + interval := p.Int(vals, int((24 * time.Hour).Seconds()), "interval") p.ErrorExcessParams(vals) if len(p.Errors) > 0 { @@ -327,6 +328,8 @@ func (api *API) insightsUserStatusCounts(rw http.ResponseWriter, r *http.Request rows, err := api.Database.GetUserStatusCounts(ctx, database.GetUserStatusCountsParams{ StartTime: sixtyDaysAgo, EndTime: nextHourInLoc, + Interval: int32(interval), + TzOffset: int32(tzOffset), }) if err != nil { if httpapi.IsUnauthorizedError(err) { From 3fa8ca72f742a447c23856e2db08e793ddad0be1 Mon Sep 17 00:00:00 2001 From: Sas Swart Date: Wed, 15 Jan 2025 11:57:24 +0000 Subject: [PATCH 2/5] update comments and timezone handling --- coderd/database/querier_test.go | 49 +++++--- coderd/database/queries/insights.sql | 172 +++++++++++++-------------- 2 files changed, 117 insertions(+), 104 deletions(-) diff --git a/coderd/database/querier_test.go b/coderd/database/querier_test.go index 7d489cf559220..31dc96c3d5ac3 100644 --- a/coderd/database/querier_test.go +++ b/coderd/database/querier_test.go @@ -2411,12 +2411,9 @@ func TestGetUserStatusCounts(t *testing.T) { db, _ := dbtestutil.NewDB(t) ctx := testutil.Context(t, testutil.WaitShort) - end := dbtime.Now() - start := end.Add(-30 * 24 * time.Hour) - counts, err := db.GetUserStatusCounts(ctx, database.GetUserStatusCountsParams{ - StartTime: start, - EndTime: end, + StartTime: createdAt, + EndTime: today, }) require.NoError(t, err) require.Empty(t, counts, "should return no results when there are no users") @@ -2457,23 +2454,33 @@ func TestGetUserStatusCounts(t *testing.T) { UpdatedAt: createdAt, }) - // Query for the last 30 days + _, offset := today.Zone() userStatusChanges, err := db.GetUserStatusCounts(ctx, database.GetUserStatusCountsParams{ - StartTime: createdAt, - EndTime: today, + StartTime: startOfDay(createdAt), + EndTime: startOfDay(today), + TzOffset: int32(offset), }) require.NoError(t, err) - require.NotEmpty(t, userStatusChanges, "should return results") - require.Len(t, userStatusChanges, 2, "should have 1 entry per status change plus and 1 entry for the end of the range = 2 entries") - - require.Equal(t, userStatusChanges[0].Status, tc.status, "should have the correct status") - require.Equal(t, userStatusChanges[0].Count, int64(1), "should have 1 user") - require.True(t, userStatusChanges[0].Date.Equal(createdAt), "should have the correct date") - - require.Equal(t, userStatusChanges[1].Status, tc.status, "should have the correct status") - require.Equal(t, userStatusChanges[1].Count, int64(1), "should have 1 user") - require.True(t, userStatusChanges[1].Date.Equal(today), "should have the correct date") + numDays := int(startOfDay(today).Sub(startOfDay(createdAt)).Hours() / 24) + require.Len(t, userStatusChanges, numDays+1, "should have 1 entry per day between the start and end time, including the end time") + + for i, row := range userStatusChanges { + require.Equal(t, tc.status, row.Status, "should have the correct status") + require.True( + t, + row.Date.In(location).Equal(startOfDay(createdAt).AddDate(0, 0, i)), + "expected date %s, but got %s for row %n", + startOfDay(createdAt).AddDate(0, 0, i), + row.Date.In(location).String(), + i, + ) + if row.Date.Before(startOfDay(createdAt)) { + require.Equal(t, int64(0), row.Count, "should have 0 users before creation") + } else { + require.Equal(t, int64(1), row.Count, "should have 1 user after creation") + } + } }) } }) @@ -2724,6 +2731,7 @@ func TestGetUserStatusCounts(t *testing.T) { for _, tc := range testCases { tc := tc t.Run(tc.name, func(t *testing.T) { + t.SkipNow() t.Parallel() db, _ := dbtestutil.NewDB(t) ctx := testutil.Context(t, testutil.WaitShort) @@ -2900,3 +2908,8 @@ func requireUsersMatch(t testing.TB, expected []database.User, found []database. t.Helper() require.ElementsMatch(t, expected, database.ConvertUserRows(found), msg) } + +func startOfDay(t time.Time) time.Time { + year, month, day := t.Date() + return time.Date(year, month, day, 0, 0, 0, 0, t.Location()) +} diff --git a/coderd/database/queries/insights.sql b/coderd/database/queries/insights.sql index 8e60cc1430cda..52b7fea147447 100644 --- a/coderd/database/queries/insights.sql +++ b/coderd/database/queries/insights.sql @@ -777,98 +777,98 @@ GROUP BY utp.num, utp.template_ids, utp.name, utp.type, utp.display_name, utp.de -- The time range is inclusively defined by the start_time and end_time parameters. -- -- Bucketing: --- Between the start_time and end_time, we include each timestamp where a user's status changed or they were deleted. --- We do not bucket these results by day or some other time unit. This is because such bucketing would hide potentially --- important patterns. If a user was active for 23 hours and 59 minutes, and then suspended, a daily bucket would hide this. --- A daily bucket would also have required us to carefully manage the timezone of the bucket based on the timezone of the user. +-- Between the start_time and end_time, we generate buckets based on the interval parameter +-- (defaulting to daily if interval <= 0). The timestamps are adjusted by the tz_offset +-- parameter to align with the desired timezone. -- -- Accumulation: --- We do not start counting from 0 at the start_time. We check the last status change before the start_time for each user. As such, --- the result shows the total number of users in each status on any particular day. +-- We do not start counting from 0 at the start_time. We check the last status change +-- before the start_time for each user. As such, the result shows the total number of +-- users in each status at each interval. WITH -- dates_of_interest defines all points in time that are relevant to the query. - -- It includes the start_time, all status changes, all deletions, and the end_time. -dates_of_interest AS ( - SELECT - (generate_series( - @start_time::timestamptz, - @end_time::timestamptz, - (@interval::int || ' seconds')::interval - ) + (@tz_offset::int || ' seconds')::interval)::timestamptz AS date -), - -- latest_status_before_range defines the status of each user before the start_time. - -- We do not include users who were deleted before the start_time. We use this to ensure that - -- we correctly count users prior to the start_time for a complete graph. -latest_status_before_range AS ( - SELECT - DISTINCT usc.user_id, - usc.new_status, - usc.changed_at, - ud.deleted - FROM user_status_changes usc - LEFT JOIN LATERAL ( - SELECT COUNT(*) > 0 AS deleted - FROM user_deleted ud - WHERE ud.user_id = usc.user_id AND (ud.deleted_at < usc.changed_at OR ud.deleted_at < @start_time) - ) AS ud ON true - WHERE usc.changed_at < @start_time::timestamptz - ORDER BY usc.user_id, usc.changed_at DESC -), - -- status_changes_during_range defines the status of each user during the start_time and end_time. - -- If a user is deleted during the time range, we count status changes between the start_time and the deletion date. - -- Theoretically, it should probably not be possible to update the status of a deleted user, but we - -- need to ensure that this is enforced, so that a change in business logic later does not break this graph. -status_changes_during_range AS ( - SELECT - usc.user_id, - usc.new_status, - usc.changed_at, - ud.deleted - FROM user_status_changes usc - LEFT JOIN LATERAL ( - SELECT COUNT(*) > 0 AS deleted - FROM user_deleted ud - WHERE ud.user_id = usc.user_id AND ud.deleted_at < usc.changed_at - ) AS ud ON true - WHERE usc.changed_at >= @start_time::timestamptz - AND usc.changed_at <= @end_time::timestamptz -), - -- relevant_status_changes defines the status of each user at any point in time. - -- It includes the status of each user before the start_time, and the status of each user during the start_time and end_time. -relevant_status_changes AS ( - SELECT - user_id, - new_status, - changed_at - FROM latest_status_before_range - WHERE NOT deleted + -- It generates regular intervals between start_time and end_time based on the interval + -- parameter, adjusted by the timezone offset. + dates_of_interest AS ( + SELECT + (generate_series( + @start_time::timestamptz + (@tz_offset::int || ' seconds')::interval, + @end_time::timestamptz + (@tz_offset::int || ' seconds')::interval, + (CASE WHEN @interval::int <= 0 THEN 3600 * 24 ELSE @interval::int END|| ' seconds')::interval + ))::timestamptz AS date + ), + -- latest_status_before_range finds each user's last status before start_time. + -- Users who were deleted before start_time are excluded. This ensures accurate + -- initial counts at the start of the time range. + latest_status_before_range AS ( + SELECT + DISTINCT usc.user_id, + usc.new_status, + usc.changed_at, + ud.deleted + FROM user_status_changes usc + LEFT JOIN LATERAL ( + SELECT COUNT(*) > 0 AS deleted + FROM user_deleted ud + WHERE ud.user_id = usc.user_id AND (ud.deleted_at < usc.changed_at OR ud.deleted_at < @start_time) + ) AS ud ON true + WHERE usc.changed_at < @start_time::timestamptz + ORDER BY usc.user_id, usc.changed_at DESC + ), + -- status_changes_during_range captures all status changes between start_time and end_time. + -- Changes after a user's deletion date are excluded, as deleted users should not have + -- status changes. + status_changes_during_range AS ( + SELECT + usc.user_id, + usc.new_status, + usc.changed_at, + ud.deleted + FROM user_status_changes usc + LEFT JOIN LATERAL ( + SELECT COUNT(*) > 0 AS deleted + FROM user_deleted ud + WHERE ud.user_id = usc.user_id AND ud.deleted_at < usc.changed_at + ) AS ud ON true + WHERE usc.changed_at >= @start_time::timestamptz + AND usc.changed_at <= @end_time::timestamptz + ), + -- relevant_status_changes combines user statuses from before and during the time range. + -- Only includes non-deleted users to ensure accurate counting. + relevant_status_changes AS ( + SELECT + user_id, + new_status, + changed_at + FROM latest_status_before_range + WHERE NOT deleted - UNION ALL + UNION ALL - SELECT - user_id, - new_status, - changed_at - FROM status_changes_during_range - WHERE NOT deleted -), - -- statuses defines all the distinct statuses that were present just before and during the time range. - -- This is used to ensure that we have a series for every relevant status. -statuses AS ( - SELECT DISTINCT new_status FROM relevant_status_changes -), - -- We only want to count the latest status change for each user on each date and then filter them by the relevant status. - -- We use the row_number function to ensure that we only count the latest status change for each user on each date. - -- We then filter the status changes by the relevant status in the final select statement below. -ranked_status_change_per_user_per_date AS ( - SELECT - d.date, - rsc1.user_id, - ROW_NUMBER() OVER (PARTITION BY d.date, rsc1.user_id ORDER BY rsc1.changed_at DESC) AS rn, - rsc1.new_status - FROM dates_of_interest d - LEFT JOIN relevant_status_changes rsc1 ON rsc1.changed_at <= d.date -) + SELECT + user_id, + new_status, + changed_at + FROM status_changes_during_range + WHERE NOT deleted + ), + -- statuses collects all distinct status values that appeared in the time range + -- or just before it, ensuring we have a complete set of statuses to count. + statuses AS ( + SELECT DISTINCT new_status FROM relevant_status_changes + ), + -- ranked_status_change_per_user_per_date finds the latest status for each user + -- at each interval date. The row_number helps us select only the most recent + -- status change when multiple changes occur before an interval. + ranked_status_change_per_user_per_date AS ( + SELECT + d.date, + rsc1.user_id, + ROW_NUMBER() OVER (PARTITION BY d.date, rsc1.user_id ORDER BY rsc1.changed_at DESC) AS rn, + rsc1.new_status + FROM dates_of_interest d + LEFT JOIN relevant_status_changes rsc1 ON rsc1.changed_at <= d.date + ) SELECT rscpupd.date, statuses.new_status AS status, From f689b0d25bbe515c304595c7de84eea9662d6fa5 Mon Sep 17 00:00:00 2001 From: Sas Swart Date: Wed, 15 Jan 2025 14:11:43 +0000 Subject: [PATCH 3/5] aggregate user status changes by predictable buckets instead of at the moment of each user status change --- coderd/database/dbauthz/dbauthz_test.go | 1 - coderd/database/dbtime/dbtime.go | 6 + coderd/database/querier_test.go | 108 +++++++++------ coderd/database/queries.sql.go | 21 +-- coderd/database/queries/insights.sql | 173 ++++++++++++------------ coderd/insights.go | 10 +- 6 files changed, 169 insertions(+), 150 deletions(-) diff --git a/coderd/database/dbauthz/dbauthz_test.go b/coderd/database/dbauthz/dbauthz_test.go index b3166f8374d2d..014388e6cd98e 100644 --- a/coderd/database/dbauthz/dbauthz_test.go +++ b/coderd/database/dbauthz/dbauthz_test.go @@ -1713,7 +1713,6 @@ func (s *MethodTestSuite) TestUser() { StartTime: time.Now().Add(-time.Hour * 24 * 30), EndTime: time.Now(), Interval: int32((time.Hour * 24).Seconds()), - TzOffset: 0, }).Asserts(rbac.ResourceUser, policy.ActionRead) })) } diff --git a/coderd/database/dbtime/dbtime.go b/coderd/database/dbtime/dbtime.go index 4d740ba941345..bda5a2263ce2b 100644 --- a/coderd/database/dbtime/dbtime.go +++ b/coderd/database/dbtime/dbtime.go @@ -16,3 +16,9 @@ func Now() time.Time { func Time(t time.Time) time.Time { return t.Round(time.Microsecond) } + +// StartOfDay returns the first timestamp of the day of the input timestamp in its location. +func StartOfDay(t time.Time) time.Time { + year, month, day := t.Date() + return time.Date(year, month, day, 0, 0, 0, 0, t.Location()) +} diff --git a/coderd/database/querier_test.go b/coderd/database/querier_test.go index 31dc96c3d5ac3..0aa79be316662 100644 --- a/coderd/database/querier_test.go +++ b/coderd/database/querier_test.go @@ -2454,28 +2454,26 @@ func TestGetUserStatusCounts(t *testing.T) { UpdatedAt: createdAt, }) - _, offset := today.Zone() userStatusChanges, err := db.GetUserStatusCounts(ctx, database.GetUserStatusCountsParams{ - StartTime: startOfDay(createdAt), - EndTime: startOfDay(today), - TzOffset: int32(offset), + StartTime: dbtime.StartOfDay(createdAt), + EndTime: dbtime.StartOfDay(today), }) require.NoError(t, err) - numDays := int(startOfDay(today).Sub(startOfDay(createdAt)).Hours() / 24) + numDays := int(dbtime.StartOfDay(today).Sub(dbtime.StartOfDay(createdAt)).Hours() / 24) require.Len(t, userStatusChanges, numDays+1, "should have 1 entry per day between the start and end time, including the end time") for i, row := range userStatusChanges { require.Equal(t, tc.status, row.Status, "should have the correct status") require.True( t, - row.Date.In(location).Equal(startOfDay(createdAt).AddDate(0, 0, i)), + row.Date.In(location).Equal(dbtime.StartOfDay(createdAt).AddDate(0, 0, i)), "expected date %s, but got %s for row %n", - startOfDay(createdAt).AddDate(0, 0, i), + dbtime.StartOfDay(createdAt).AddDate(0, 0, i), row.Date.In(location).String(), i, ) - if row.Date.Before(startOfDay(createdAt)) { + if row.Date.Before(createdAt) { require.Equal(t, int64(0), row.Count, "should have 0 users before creation") } else { require.Equal(t, int64(1), row.Count, "should have 1 user after creation") @@ -2634,24 +2632,38 @@ func TestGetUserStatusCounts(t *testing.T) { // Query for the last 5 days userStatusChanges, err := db.GetUserStatusCounts(ctx, database.GetUserStatusCountsParams{ - StartTime: createdAt, - EndTime: today, + StartTime: dbtime.StartOfDay(createdAt), + EndTime: dbtime.StartOfDay(today), }) require.NoError(t, err) - require.NotEmpty(t, userStatusChanges, "should return results") - gotCounts := map[time.Time]map[database.UserStatus]int64{} - for _, row := range userStatusChanges { - gotDateInLocation := row.Date.In(location) - if _, ok := gotCounts[gotDateInLocation]; !ok { - gotCounts[gotDateInLocation] = map[database.UserStatus]int64{} - } - if _, ok := gotCounts[gotDateInLocation][row.Status]; !ok { - gotCounts[gotDateInLocation][row.Status] = 0 + for i, row := range userStatusChanges { + require.True( + t, + row.Date.In(location).Equal(dbtime.StartOfDay(createdAt).AddDate(0, 0, i/2)), + "expected date %s, but got %s for row %n", + dbtime.StartOfDay(createdAt).AddDate(0, 0, i/2), + row.Date.In(location).String(), + i, + ) + if row.Date.Before(createdAt) { + require.Equal(t, int64(0), row.Count) + } else if row.Date.Before(firstTransitionTime) { + if row.Status == tc.initialStatus { + require.Equal(t, int64(1), row.Count) + } else if row.Status == tc.targetStatus { + require.Equal(t, int64(0), row.Count) + } + } else if !row.Date.After(today) { + if row.Status == tc.initialStatus { + require.Equal(t, int64(0), row.Count) + } else if row.Status == tc.targetStatus { + require.Equal(t, int64(1), row.Count) + } + } else { + t.Errorf("date %q beyond expected range end %q", row.Date, today) } - gotCounts[gotDateInLocation][row.Status] += row.Count } - require.Equal(t, tc.expectedCounts, gotCounts) }) } }) @@ -2840,16 +2852,23 @@ func TestGetUserStatusCounts(t *testing.T) { }) userStatusChanges, err := db.GetUserStatusCounts(ctx, database.GetUserStatusCountsParams{ - StartTime: createdAt.Add(time.Hour * 24), - EndTime: today, + StartTime: dbtime.StartOfDay(createdAt.Add(time.Hour * 24)), + EndTime: dbtime.StartOfDay(today), }) require.NoError(t, err) - require.Len(t, userStatusChanges, 2) - require.Equal(t, userStatusChanges[0].Count, int64(1)) - require.Equal(t, userStatusChanges[0].Status, database.UserStatusActive) - require.Equal(t, userStatusChanges[1].Count, int64(1)) - require.Equal(t, userStatusChanges[1].Status, database.UserStatusActive) + for i, row := range userStatusChanges { + require.True( + t, + row.Date.In(location).Equal(dbtime.StartOfDay(createdAt).AddDate(0, 0, 1+i)), + "expected date %s, but got %s for row %n", + dbtime.StartOfDay(createdAt).AddDate(0, 0, 1+i), + row.Date.In(location).String(), + i, + ) + require.Equal(t, database.UserStatusActive, row.Status) + require.Equal(t, int64(1), row.Count) + } }) t.Run("User deleted before query range", func(t *testing.T) { @@ -2889,16 +2908,28 @@ func TestGetUserStatusCounts(t *testing.T) { require.NoError(t, err) userStatusChanges, err := db.GetUserStatusCounts(ctx, database.GetUserStatusCountsParams{ - StartTime: createdAt, - EndTime: today.Add(time.Hour * 24), + StartTime: dbtime.StartOfDay(createdAt), + EndTime: dbtime.StartOfDay(today.Add(time.Hour * 24)), }) require.NoError(t, err) - require.Equal(t, userStatusChanges[0].Count, int64(1)) - require.Equal(t, userStatusChanges[0].Status, database.UserStatusActive) - require.Equal(t, userStatusChanges[1].Count, int64(0)) - require.Equal(t, userStatusChanges[1].Status, database.UserStatusActive) - require.Equal(t, userStatusChanges[2].Count, int64(0)) - require.Equal(t, userStatusChanges[2].Status, database.UserStatusActive) + for i, row := range userStatusChanges { + require.True( + t, + row.Date.In(location).Equal(dbtime.StartOfDay(createdAt).AddDate(0, 0, i)), + "expected date %s, but got %s for row %n", + dbtime.StartOfDay(createdAt).AddDate(0, 0, i), + row.Date.In(location).String(), + i, + ) + require.Equal(t, database.UserStatusActive, row.Status) + if row.Date.Before(createdAt) { + require.Equal(t, int64(0), row.Count) + } else if i == len(userStatusChanges)-1 { + require.Equal(t, int64(0), row.Count) + } else { + require.Equal(t, int64(1), row.Count) + } + } }) }) } @@ -2908,8 +2939,3 @@ func requireUsersMatch(t testing.TB, expected []database.User, found []database. t.Helper() require.ElementsMatch(t, expected, database.ConvertUserRows(found), msg) } - -func startOfDay(t time.Time) time.Time { - year, month, day := t.Date() - return time.Date(year, month, day, 0, 0, 0, 0, t.Location()) -} diff --git a/coderd/database/queries.sql.go b/coderd/database/queries.sql.go index a187c6ab0a848..6ac9e2b9c69ba 100644 --- a/coderd/database/queries.sql.go +++ b/coderd/database/queries.sql.go @@ -3099,12 +3099,11 @@ WITH -- dates_of_interest defines all points in time that are relevant to the query. -- It includes the start_time, all status changes, all deletions, and the end_time. dates_of_interest AS ( - SELECT - (generate_series( - $1::timestamptz, - $2::timestamptz, - ($3::int || ' seconds')::interval - ) + ($4::int || ' seconds')::interval)::timestamptz AS date + SELECT date FROM generate_series( + $1::timestamptz, + $2::timestamptz, + (CASE WHEN $3::int <= 0 THEN 3600 * 24 ELSE $3::int END || ' seconds')::interval + ) AS date ), -- latest_status_before_range defines the status of each user before the start_time. -- We do not include users who were deleted before the start_time. We use this to ensure that @@ -3180,7 +3179,7 @@ ranked_status_change_per_user_per_date AS ( LEFT JOIN relevant_status_changes rsc1 ON rsc1.changed_at <= d.date ) SELECT - rscpupd.date, + rscpupd.date::timestamptz AS date, statuses.new_status AS status, COUNT(rscpupd.user_id) FILTER ( WHERE rscpupd.rn = 1 @@ -3205,7 +3204,6 @@ type GetUserStatusCountsParams struct { StartTime time.Time `db:"start_time" json:"start_time"` EndTime time.Time `db:"end_time" json:"end_time"` Interval int32 `db:"interval" json:"interval"` - TzOffset int32 `db:"tz_offset" json:"tz_offset"` } type GetUserStatusCountsRow struct { @@ -3227,12 +3225,7 @@ type GetUserStatusCountsRow struct { // We do not start counting from 0 at the start_time. We check the last status change before the start_time for each user. As such, // the result shows the total number of users in each status on any particular day. func (q *sqlQuerier) GetUserStatusCounts(ctx context.Context, arg GetUserStatusCountsParams) ([]GetUserStatusCountsRow, error) { - rows, err := q.db.QueryContext(ctx, getUserStatusCounts, - arg.StartTime, - arg.EndTime, - arg.Interval, - arg.TzOffset, - ) + rows, err := q.db.QueryContext(ctx, getUserStatusCounts, arg.StartTime, arg.EndTime, arg.Interval) if err != nil { return nil, err } diff --git a/coderd/database/queries/insights.sql b/coderd/database/queries/insights.sql index 52b7fea147447..8b4d8540cfb1a 100644 --- a/coderd/database/queries/insights.sql +++ b/coderd/database/queries/insights.sql @@ -777,100 +777,99 @@ GROUP BY utp.num, utp.template_ids, utp.name, utp.type, utp.display_name, utp.de -- The time range is inclusively defined by the start_time and end_time parameters. -- -- Bucketing: --- Between the start_time and end_time, we generate buckets based on the interval parameter --- (defaulting to daily if interval <= 0). The timestamps are adjusted by the tz_offset --- parameter to align with the desired timezone. +-- Between the start_time and end_time, we include each timestamp where a user's status changed or they were deleted. +-- We do not bucket these results by day or some other time unit. This is because such bucketing would hide potentially +-- important patterns. If a user was active for 23 hours and 59 minutes, and then suspended, a daily bucket would hide this. +-- A daily bucket would also have required us to carefully manage the timezone of the bucket based on the timezone of the user. -- -- Accumulation: --- We do not start counting from 0 at the start_time. We check the last status change --- before the start_time for each user. As such, the result shows the total number of --- users in each status at each interval. +-- We do not start counting from 0 at the start_time. We check the last status change before the start_time for each user. As such, +-- the result shows the total number of users in each status on any particular day. WITH -- dates_of_interest defines all points in time that are relevant to the query. - -- It generates regular intervals between start_time and end_time based on the interval - -- parameter, adjusted by the timezone offset. - dates_of_interest AS ( - SELECT - (generate_series( - @start_time::timestamptz + (@tz_offset::int || ' seconds')::interval, - @end_time::timestamptz + (@tz_offset::int || ' seconds')::interval, - (CASE WHEN @interval::int <= 0 THEN 3600 * 24 ELSE @interval::int END|| ' seconds')::interval - ))::timestamptz AS date - ), - -- latest_status_before_range finds each user's last status before start_time. - -- Users who were deleted before start_time are excluded. This ensures accurate - -- initial counts at the start of the time range. - latest_status_before_range AS ( - SELECT - DISTINCT usc.user_id, - usc.new_status, - usc.changed_at, - ud.deleted - FROM user_status_changes usc - LEFT JOIN LATERAL ( - SELECT COUNT(*) > 0 AS deleted - FROM user_deleted ud - WHERE ud.user_id = usc.user_id AND (ud.deleted_at < usc.changed_at OR ud.deleted_at < @start_time) - ) AS ud ON true - WHERE usc.changed_at < @start_time::timestamptz - ORDER BY usc.user_id, usc.changed_at DESC - ), - -- status_changes_during_range captures all status changes between start_time and end_time. - -- Changes after a user's deletion date are excluded, as deleted users should not have - -- status changes. - status_changes_during_range AS ( - SELECT - usc.user_id, - usc.new_status, - usc.changed_at, - ud.deleted - FROM user_status_changes usc - LEFT JOIN LATERAL ( - SELECT COUNT(*) > 0 AS deleted - FROM user_deleted ud - WHERE ud.user_id = usc.user_id AND ud.deleted_at < usc.changed_at - ) AS ud ON true - WHERE usc.changed_at >= @start_time::timestamptz - AND usc.changed_at <= @end_time::timestamptz - ), - -- relevant_status_changes combines user statuses from before and during the time range. - -- Only includes non-deleted users to ensure accurate counting. - relevant_status_changes AS ( - SELECT - user_id, - new_status, - changed_at - FROM latest_status_before_range - WHERE NOT deleted + -- It includes the start_time, all status changes, all deletions, and the end_time. +dates_of_interest AS ( + SELECT date FROM generate_series( + @start_time::timestamptz, + @end_time::timestamptz, + (CASE WHEN @interval::int <= 0 THEN 3600 * 24 ELSE @interval::int END || ' seconds')::interval + ) AS date +), + -- latest_status_before_range defines the status of each user before the start_time. + -- We do not include users who were deleted before the start_time. We use this to ensure that + -- we correctly count users prior to the start_time for a complete graph. +latest_status_before_range AS ( + SELECT + DISTINCT usc.user_id, + usc.new_status, + usc.changed_at, + ud.deleted + FROM user_status_changes usc + LEFT JOIN LATERAL ( + SELECT COUNT(*) > 0 AS deleted + FROM user_deleted ud + WHERE ud.user_id = usc.user_id AND (ud.deleted_at < usc.changed_at OR ud.deleted_at < @start_time) + ) AS ud ON true + WHERE usc.changed_at < @start_time::timestamptz + ORDER BY usc.user_id, usc.changed_at DESC +), + -- status_changes_during_range defines the status of each user during the start_time and end_time. + -- If a user is deleted during the time range, we count status changes between the start_time and the deletion date. + -- Theoretically, it should probably not be possible to update the status of a deleted user, but we + -- need to ensure that this is enforced, so that a change in business logic later does not break this graph. +status_changes_during_range AS ( + SELECT + usc.user_id, + usc.new_status, + usc.changed_at, + ud.deleted + FROM user_status_changes usc + LEFT JOIN LATERAL ( + SELECT COUNT(*) > 0 AS deleted + FROM user_deleted ud + WHERE ud.user_id = usc.user_id AND ud.deleted_at < usc.changed_at + ) AS ud ON true + WHERE usc.changed_at >= @start_time::timestamptz + AND usc.changed_at <= @end_time::timestamptz +), + -- relevant_status_changes defines the status of each user at any point in time. + -- It includes the status of each user before the start_time, and the status of each user during the start_time and end_time. +relevant_status_changes AS ( + SELECT + user_id, + new_status, + changed_at + FROM latest_status_before_range + WHERE NOT deleted - UNION ALL + UNION ALL - SELECT - user_id, - new_status, - changed_at - FROM status_changes_during_range - WHERE NOT deleted - ), - -- statuses collects all distinct status values that appeared in the time range - -- or just before it, ensuring we have a complete set of statuses to count. - statuses AS ( - SELECT DISTINCT new_status FROM relevant_status_changes - ), - -- ranked_status_change_per_user_per_date finds the latest status for each user - -- at each interval date. The row_number helps us select only the most recent - -- status change when multiple changes occur before an interval. - ranked_status_change_per_user_per_date AS ( - SELECT - d.date, - rsc1.user_id, - ROW_NUMBER() OVER (PARTITION BY d.date, rsc1.user_id ORDER BY rsc1.changed_at DESC) AS rn, - rsc1.new_status - FROM dates_of_interest d - LEFT JOIN relevant_status_changes rsc1 ON rsc1.changed_at <= d.date - ) + SELECT + user_id, + new_status, + changed_at + FROM status_changes_during_range + WHERE NOT deleted +), + -- statuses defines all the distinct statuses that were present just before and during the time range. + -- This is used to ensure that we have a series for every relevant status. +statuses AS ( + SELECT DISTINCT new_status FROM relevant_status_changes +), + -- We only want to count the latest status change for each user on each date and then filter them by the relevant status. + -- We use the row_number function to ensure that we only count the latest status change for each user on each date. + -- We then filter the status changes by the relevant status in the final select statement below. +ranked_status_change_per_user_per_date AS ( + SELECT + d.date, + rsc1.user_id, + ROW_NUMBER() OVER (PARTITION BY d.date, rsc1.user_id ORDER BY rsc1.changed_at DESC) AS rn, + rsc1.new_status + FROM dates_of_interest d + LEFT JOIN relevant_status_changes rsc1 ON rsc1.changed_at <= d.date +) SELECT - rscpupd.date, + rscpupd.date::timestamptz AS date, statuses.new_status AS status, COUNT(rscpupd.user_id) FILTER ( WHERE rscpupd.rn = 1 diff --git a/coderd/insights.go b/coderd/insights.go index 614cb96d195bf..f82887c042812 100644 --- a/coderd/insights.go +++ b/coderd/insights.go @@ -4,6 +4,7 @@ import ( "context" "database/sql" "fmt" + "github.com/coder/coder/v2/coderd/database/dbtime" "net/http" "strings" "time" @@ -318,18 +319,13 @@ func (api *API) insightsUserStatusCounts(rw http.ResponseWriter, r *http.Request } loc := time.FixedZone("", tzOffset*3600) - // If the time is 14:01 or 14:31, we still want to include all the - // data between 14:00 and 15:00. Our rollups buckets are 30 minutes - // so this works nicely. It works just as well for 23:59 as well. - nextHourInLoc := time.Now().In(loc).Truncate(time.Hour).Add(time.Hour) - // Always return 60 days of data (2 months). - sixtyDaysAgo := nextHourInLoc.In(loc).Truncate(24*time.Hour).AddDate(0, 0, -60) + nextHourInLoc := dbtime.Now().Truncate(time.Hour).Add(time.Hour).In(loc) + sixtyDaysAgo := dbtime.StartOfDay(nextHourInLoc).AddDate(0, 0, -60) rows, err := api.Database.GetUserStatusCounts(ctx, database.GetUserStatusCountsParams{ StartTime: sixtyDaysAgo, EndTime: nextHourInLoc, Interval: int32(interval), - TzOffset: int32(tzOffset), }) if err != nil { if httpapi.IsUnauthorizedError(err) { From 0e093e379a9ee5beb1f5ded4ba39d3307ade5713 Mon Sep 17 00:00:00 2001 From: Sas Swart Date: Wed, 15 Jan 2025 14:17:44 +0000 Subject: [PATCH 4/5] make fmt lint --- coderd/insights.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/coderd/insights.go b/coderd/insights.go index f82887c042812..9c9fdcfa3c200 100644 --- a/coderd/insights.go +++ b/coderd/insights.go @@ -4,11 +4,12 @@ import ( "context" "database/sql" "fmt" - "github.com/coder/coder/v2/coderd/database/dbtime" "net/http" "strings" "time" + "github.com/coder/coder/v2/coderd/database/dbtime" + "github.com/google/uuid" "golang.org/x/exp/slices" "golang.org/x/sync/errgroup" From c110483fc331af7446dec5ec6a080acaeffed729 Mon Sep 17 00:00:00 2001 From: Sas Swart Date: Wed, 15 Jan 2025 14:59:34 +0000 Subject: [PATCH 5/5] fix test --- coderd/database/querier_test.go | 81 +++++++++++++-------------------- 1 file changed, 31 insertions(+), 50 deletions(-) diff --git a/coderd/database/querier_test.go b/coderd/database/querier_test.go index 0aa79be316662..00b189967f5a6 100644 --- a/coderd/database/querier_test.go +++ b/coderd/database/querier_test.go @@ -7,7 +7,6 @@ import ( "database/sql" "encoding/json" "fmt" - "maps" "sort" "testing" "time" @@ -2743,8 +2742,8 @@ func TestGetUserStatusCounts(t *testing.T) { for _, tc := range testCases { tc := tc t.Run(tc.name, func(t *testing.T) { - t.SkipNow() t.Parallel() + db, _ := dbtestutil.NewDB(t) ctx := testutil.Context(t, testutil.WaitShort) @@ -2776,66 +2775,48 @@ func TestGetUserStatusCounts(t *testing.T) { require.NoError(t, err) userStatusChanges, err := db.GetUserStatusCounts(ctx, database.GetUserStatusCountsParams{ - StartTime: createdAt, - EndTime: today, + StartTime: dbtime.StartOfDay(createdAt), + EndTime: dbtime.StartOfDay(today), }) require.NoError(t, err) require.NotEmpty(t, userStatusChanges) - gotCounts := map[time.Time]map[database.UserStatus]int64{ - createdAt.In(location): {}, - firstTransitionTime.In(location): {}, - secondTransitionTime.In(location): {}, - today.In(location): {}, - } + gotCounts := map[time.Time]map[database.UserStatus]int64{} for _, row := range userStatusChanges { dateInLocation := row.Date.In(location) - switch { - case dateInLocation.Equal(createdAt.In(location)): - gotCounts[createdAt][row.Status] = row.Count - case dateInLocation.Equal(firstTransitionTime.In(location)): - gotCounts[firstTransitionTime][row.Status] = row.Count - case dateInLocation.Equal(secondTransitionTime.In(location)): - gotCounts[secondTransitionTime][row.Status] = row.Count - case dateInLocation.Equal(today.In(location)): - gotCounts[today][row.Status] = row.Count - default: - t.Fatalf("unexpected date %s", row.Date) + if gotCounts[dateInLocation] == nil { + gotCounts[dateInLocation] = map[database.UserStatus]int64{} } + gotCounts[dateInLocation][row.Status] = row.Count } expectedCounts := map[time.Time]map[database.UserStatus]int64{} - for _, status := range []database.UserStatus{ - tc.user1Transition.from, - tc.user1Transition.to, - tc.user2Transition.from, - tc.user2Transition.to, - } { - if _, ok := expectedCounts[createdAt]; !ok { - expectedCounts[createdAt] = map[database.UserStatus]int64{} + for d := dbtime.StartOfDay(createdAt); !d.After(dbtime.StartOfDay(today)); d = d.AddDate(0, 0, 1) { + expectedCounts[d] = map[database.UserStatus]int64{} + + // Default values + expectedCounts[d][tc.user1Transition.from] = 0 + expectedCounts[d][tc.user1Transition.to] = 0 + expectedCounts[d][tc.user2Transition.from] = 0 + expectedCounts[d][tc.user2Transition.to] = 0 + + // Counted Values + if d.Before(createdAt) { + continue + } else if d.Before(firstTransitionTime) { + expectedCounts[d][tc.user1Transition.from]++ + expectedCounts[d][tc.user2Transition.from]++ + } else if d.Before(secondTransitionTime) { + expectedCounts[d][tc.user1Transition.to]++ + expectedCounts[d][tc.user2Transition.from]++ + } else if d.Before(today) { + expectedCounts[d][tc.user1Transition.to]++ + expectedCounts[d][tc.user2Transition.to]++ + } else { + t.Fatalf("date %q beyond expected range end %q", d, today) } - expectedCounts[createdAt][status] = 0 } - expectedCounts[createdAt][tc.user1Transition.from]++ - expectedCounts[createdAt][tc.user2Transition.from]++ - - expectedCounts[firstTransitionTime] = map[database.UserStatus]int64{} - maps.Copy(expectedCounts[firstTransitionTime], expectedCounts[createdAt]) - expectedCounts[firstTransitionTime][tc.user1Transition.from]-- - expectedCounts[firstTransitionTime][tc.user1Transition.to]++ - - expectedCounts[secondTransitionTime] = map[database.UserStatus]int64{} - maps.Copy(expectedCounts[secondTransitionTime], expectedCounts[firstTransitionTime]) - expectedCounts[secondTransitionTime][tc.user2Transition.from]-- - expectedCounts[secondTransitionTime][tc.user2Transition.to]++ - - expectedCounts[today] = map[database.UserStatus]int64{} - maps.Copy(expectedCounts[today], expectedCounts[secondTransitionTime]) - - require.Equal(t, expectedCounts[createdAt], gotCounts[createdAt]) - require.Equal(t, expectedCounts[firstTransitionTime], gotCounts[firstTransitionTime]) - require.Equal(t, expectedCounts[secondTransitionTime], gotCounts[secondTransitionTime]) - require.Equal(t, expectedCounts[today], gotCounts[today]) + require.Equal(t, expectedCounts, gotCounts) }) } })