@@ -777,98 +777,98 @@ GROUP BY utp.num, utp.template_ids, utp.name, utp.type, utp.display_name, utp.de
777
777
-- The time range is inclusively defined by the start_time and end_time parameters.
778
778
--
779
779
-- Bucketing:
780
- -- Between the start_time and end_time, we include each timestamp where a user's status changed or they were deleted.
781
- -- We do not bucket these results by day or some other time unit. This is because such bucketing would hide potentially
782
- -- important patterns. If a user was active for 23 hours and 59 minutes, and then suspended, a daily bucket would hide this.
783
- -- A daily bucket would also have required us to carefully manage the timezone of the bucket based on the timezone of the user.
780
+ -- Between the start_time and end_time, we generate buckets based on the interval parameter
781
+ -- (defaulting to daily if interval <= 0). The timestamps are adjusted by the tz_offset
782
+ -- parameter to align with the desired timezone.
784
783
--
785
784
-- Accumulation:
786
- -- We do not start counting from 0 at the start_time. We check the last status change before the start_time for each user. As such,
787
- -- the result shows the total number of users in each status on any particular day.
785
+ -- We do not start counting from 0 at the start_time. We check the last status change
786
+ -- before the start_time for each user. As such, the result shows the total number of
787
+ -- users in each status at each interval.
788
788
WITH
789
789
-- dates_of_interest defines all points in time that are relevant to the query.
790
- -- It includes the start_time, all status changes, all deletions, and the end_time.
791
- dates_of_interest AS (
792
- SELECT
793
- (generate_series(
794
- @start_time:: timestamptz ,
795
- @end_time ::timestamptz ,
796
- (@interval ::int || ' seconds' )::interval
797
- ) + (@tz_offset ::int || ' seconds' )::interval):: timestamptz AS date
798
- ),
799
- -- latest_status_before_range defines the status of each user before the start_time.
800
- -- We do not include users who were deleted before the start_time. We use this to ensure that
801
- -- we correctly count users prior to the start_time for a complete graph.
802
- latest_status_before_range AS (
803
- SELECT
804
- DISTINCT usc . user_id ,
805
- usc .new_status ,
806
- usc .changed_at ,
807
- ud . deleted
808
- FROM user_status_changes usc
809
- LEFT JOIN LATERAL (
810
- SELECT COUNT ( * ) > 0 AS deleted
811
- FROM user_deleted ud
812
- WHERE ud . user_id = usc . user_id AND ( ud . deleted_at < usc . changed_at OR ud . deleted_at < @start_time)
813
- ) AS ud ON true
814
- WHERE usc . changed_at < @start_time:: timestamptz
815
- ORDER BY usc .user_id , usc . changed_at DESC
816
- ),
817
- -- status_changes_during_range defines the status of each user during the start_time and end_time.
818
- -- If a user is deleted during the time range, we count status changes between the start_time and the deletion date .
819
- -- Theoretically, it should probably not be possible to update the status of a deleted user, but we
820
- -- need to ensure that this is enforced, so that a change in business logic later does not break this graph .
821
- status_changes_during_range AS (
822
- SELECT
823
- usc .user_id ,
824
- usc .new_status ,
825
- usc .changed_at ,
826
- ud .deleted
827
- FROM user_status_changes usc
828
- LEFT JOIN LATERAL (
829
- SELECT COUNT (* ) > 0 AS deleted
830
- FROM user_deleted ud
831
- WHERE ud .user_id = usc .user_id AND ud .deleted_at < usc .changed_at
832
- ) AS ud ON true
833
- WHERE usc .changed_at >= @start_time::timestamptz
834
- AND usc .changed_at <= @end_time::timestamptz
835
- ),
836
- -- relevant_status_changes defines the status of each user at any point in time.
837
- -- It includes the status of each user before the start_time, and the status of each user during the start_time and end_time .
838
- relevant_status_changes AS (
839
- SELECT
840
- user_id,
841
- new_status,
842
- changed_at
843
- FROM latest_status_before_range
844
- WHERE NOT deleted
790
+ -- It generates regular intervals between start_time and end_time based on the interval
791
+ -- parameter, adjusted by the timezone offset.
792
+ dates_of_interest AS (
793
+ SELECT
794
+ (generate_series(
795
+ @start_time ::timestamptz + (@tz_offset:: int || ' seconds ' )::interval ,
796
+ @end_time:: timestamptz + (@tz_offset ::int || ' seconds' )::interval,
797
+ (CASE WHEN @interval ::int <= 0 THEN 3600 * 24 ELSE @interval:: int END || ' seconds' )::interval
798
+ )):: timestamptz AS date
799
+ ),
800
+ -- latest_status_before_range finds each user's last status before start_time.
801
+ -- Users who were deleted before start_time are excluded. This ensures accurate
802
+ -- initial counts at the start of the time range.
803
+ latest_status_before_range AS (
804
+ SELECT
805
+ DISTINCT usc .user_id ,
806
+ usc .new_status ,
807
+ usc . changed_at ,
808
+ ud . deleted
809
+ FROM user_status_changes usc
810
+ LEFT JOIN LATERAL (
811
+ SELECT COUNT ( * ) > 0 AS deleted
812
+ FROM user_deleted ud
813
+ WHERE ud . user_id = usc . user_id AND ( ud . deleted_at < usc . changed_at OR ud . deleted_at < @start_time)
814
+ ) AS ud ON true
815
+ WHERE usc .changed_at < @start_time:: timestamptz
816
+ ORDER BY usc . user_id , usc . changed_at DESC
817
+ ),
818
+ -- status_changes_during_range captures all status changes between start_time and end_time .
819
+ -- Changes after a user's deletion date are excluded, as deleted users should not have
820
+ -- status changes .
821
+ status_changes_during_range AS (
822
+ SELECT
823
+ usc .user_id ,
824
+ usc .new_status ,
825
+ usc .changed_at ,
826
+ ud .deleted
827
+ FROM user_status_changes usc
828
+ LEFT JOIN LATERAL (
829
+ SELECT COUNT (* ) > 0 AS deleted
830
+ FROM user_deleted ud
831
+ WHERE ud .user_id = usc .user_id AND ud .deleted_at < usc .changed_at
832
+ ) AS ud ON true
833
+ WHERE usc .changed_at >= @start_time::timestamptz
834
+ AND usc .changed_at <= @end_time::timestamptz
835
+ ),
836
+ -- relevant_status_changes combines user statuses from before and during the time range .
837
+ -- Only includes non-deleted users to ensure accurate counting .
838
+ relevant_status_changes AS (
839
+ SELECT
840
+ user_id,
841
+ new_status,
842
+ changed_at
843
+ FROM latest_status_before_range
844
+ WHERE NOT deleted
845
845
846
- UNION ALL
846
+ UNION ALL
847
847
848
- SELECT
849
- user_id,
850
- new_status,
851
- changed_at
852
- FROM status_changes_during_range
853
- WHERE NOT deleted
854
- ),
855
- -- statuses defines all the distinct statuses that were present just before and during the time range.
856
- -- This is used to ensure that we have a series for every relevant status .
857
- statuses AS (
858
- SELECT DISTINCT new_status FROM relevant_status_changes
859
- ),
860
- -- We only want to count the latest status change for each user on each date and then filter them by the relevant status.
861
- -- We use the row_number function to ensure that we only count the latest status change for each user on each date.
862
- -- We then filter the status changes by the relevant status in the final select statement below .
863
- ranked_status_change_per_user_per_date AS (
864
- SELECT
865
- d .date ,
866
- rsc1 .user_id ,
867
- ROW_NUMBER() OVER (PARTITION BY d .date , rsc1 .user_id ORDER BY rsc1 .changed_at DESC ) AS rn,
868
- rsc1 .new_status
869
- FROM dates_of_interest d
870
- LEFT JOIN relevant_status_changes rsc1 ON rsc1 .changed_at <= d .date
871
- )
848
+ SELECT
849
+ user_id,
850
+ new_status,
851
+ changed_at
852
+ FROM status_changes_during_range
853
+ WHERE NOT deleted
854
+ ),
855
+ -- statuses collects all distinct status values that appeared in the time range
856
+ -- or just before it, ensuring we have a complete set of statuses to count .
857
+ statuses AS (
858
+ SELECT DISTINCT new_status FROM relevant_status_changes
859
+ ),
860
+ -- ranked_status_change_per_user_per_date finds the latest status for each user
861
+ -- at each interval date. The row_number helps us select only the most recent
862
+ -- status change when multiple changes occur before an interval .
863
+ ranked_status_change_per_user_per_date AS (
864
+ SELECT
865
+ d .date ,
866
+ rsc1 .user_id ,
867
+ ROW_NUMBER() OVER (PARTITION BY d .date , rsc1 .user_id ORDER BY rsc1 .changed_at DESC ) AS rn,
868
+ rsc1 .new_status
869
+ FROM dates_of_interest d
870
+ LEFT JOIN relevant_status_changes rsc1 ON rsc1 .changed_at <= d .date
871
+ )
872
872
SELECT
873
873
rscpupd .date ,
874
874
statuses .new_status AS status,
0 commit comments