Skip to content

Commit 874d817

Browse files
committed
Multiple revisions to the GROUP BY reordering tests
Discussion: https://postgr.es/m/CAMbWs4-NKLa%2BSs%2BX%3DWR6h0x%3DT07YBJoAz70ZGHzc-2zcHUHb0A%40mail.gmail.com Author: Richard Guo Reviewed-by: Andrei Lepikhov, Alexander Korotkov
1 parent 466979e commit 874d817

File tree

2 files changed

+154
-189
lines changed

2 files changed

+154
-189
lines changed

src/test/regress/expected/aggregates.out

Lines changed: 99 additions & 137 deletions
Original file line numberDiff line numberDiff line change
@@ -2728,29 +2728,20 @@ SELECT balk(hundred) FROM tenk1;
27282728
(1 row)
27292729

27302730
ROLLBACK;
2731-
-- GROUP BY optimization by reorder columns
2731+
-- GROUP BY optimization by reordering GROUP BY clauses
27322732
CREATE TABLE btg AS SELECT
2733-
i % 100 AS x,
2734-
i % 100 AS y,
2733+
i % 10 AS x,
2734+
i % 10 AS y,
27352735
'abc' || i % 10 AS z,
27362736
i AS w
2737-
FROM generate_series(1,10000) AS i;
2738-
CREATE INDEX btg_x_y_idx ON btg(x,y);
2737+
FROM generate_series(1, 100) AS i;
2738+
CREATE INDEX btg_x_y_idx ON btg(x, y);
27392739
ANALYZE btg;
2740-
-- GROUP BY optimization by reorder columns by frequency
2741-
SET enable_hashagg=off;
2742-
SET max_parallel_workers= 0;
2743-
SET max_parallel_workers_per_gather = 0;
2744-
-- Utilize index scan ordering to avoid a Sort operation
2745-
EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY x,y;
2746-
QUERY PLAN
2747-
------------------------------------------------
2748-
GroupAggregate
2749-
Group Key: x, y
2750-
-> Index Only Scan using btg_x_y_idx on btg
2751-
(3 rows)
2752-
2753-
EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY y,x;
2740+
SET enable_hashagg = off;
2741+
SET enable_seqscan = off;
2742+
-- Utilize the ordering of index scan to avoid a Sort operation
2743+
EXPLAIN (COSTS OFF)
2744+
SELECT count(*) FROM btg GROUP BY y, x;
27542745
QUERY PLAN
27552746
------------------------------------------------
27562747
GroupAggregate
@@ -2759,121 +2750,103 @@ EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY y,x;
27592750
(3 rows)
27602751

27612752
-- Engage incremental sort
2762-
explain (COSTS OFF) SELECT x,y FROM btg GROUP BY x,y,z,w;
2753+
EXPLAIN (COSTS OFF)
2754+
SELECT count(*) FROM btg GROUP BY z, y, w, x;
27632755
QUERY PLAN
27642756
-------------------------------------------------
2765-
Group
2766-
Group Key: x, y, z, w
2767-
-> Incremental Sort
2768-
Sort Key: x, y, z, w
2769-
Presorted Key: x, y
2770-
-> Index Scan using btg_x_y_idx on btg
2771-
(6 rows)
2772-
2773-
explain (COSTS OFF) SELECT x,y FROM btg GROUP BY z,y,w,x;
2774-
QUERY PLAN
2775-
-------------------------------------------------
2776-
Group
2757+
GroupAggregate
27772758
Group Key: x, y, z, w
27782759
-> Incremental Sort
27792760
Sort Key: x, y, z, w
27802761
Presorted Key: x, y
27812762
-> Index Scan using btg_x_y_idx on btg
27822763
(6 rows)
27832764

2784-
explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,z,x,y;
2785-
QUERY PLAN
2786-
-------------------------------------------------
2787-
Group
2788-
Group Key: x, y, w, z
2789-
-> Incremental Sort
2790-
Sort Key: x, y, w, z
2791-
Presorted Key: x, y
2792-
-> Index Scan using btg_x_y_idx on btg
2793-
(6 rows)
2794-
2795-
explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,x,z,y;
2765+
-- Utilize the ordering of subquery scan to avoid a Sort operation
2766+
EXPLAIN (COSTS OFF) SELECT count(*)
2767+
FROM (SELECT * FROM btg ORDER BY x, y, w, z) AS q1
2768+
GROUP BY w, x, z, y;
27962769
QUERY PLAN
27972770
-------------------------------------------------
2798-
Group
2799-
Group Key: x, y, w, z
2800-
-> Incremental Sort
2801-
Sort Key: x, y, w, z
2802-
Presorted Key: x, y
2803-
-> Index Scan using btg_x_y_idx on btg
2804-
(6 rows)
2805-
2806-
-- Subqueries
2807-
explain (COSTS OFF) SELECT x,y
2808-
FROM (SELECT * FROM btg ORDER BY x,y,w,z) AS q1
2809-
GROUP BY (w,x,z,y);
2810-
QUERY PLAN
2811-
-------------------------------------------------
2812-
Group
2771+
GroupAggregate
28132772
Group Key: btg.x, btg.y, btg.w, btg.z
28142773
-> Incremental Sort
28152774
Sort Key: btg.x, btg.y, btg.w, btg.z
28162775
Presorted Key: btg.x, btg.y
28172776
-> Index Scan using btg_x_y_idx on btg
28182777
(6 rows)
28192778

2820-
explain (COSTS OFF) SELECT x,y
2821-
FROM (SELECT * FROM btg ORDER BY x,y,w,z LIMIT 100) AS q1
2822-
GROUP BY (w,x,z,y);
2823-
QUERY PLAN
2824-
-------------------------------------------------------
2825-
Group
2826-
Group Key: btg.x, btg.y, btg.w, btg.z
2827-
-> Limit
2828-
-> Incremental Sort
2829-
Sort Key: btg.x, btg.y, btg.w, btg.z
2830-
Presorted Key: btg.x, btg.y
2831-
-> Index Scan using btg_x_y_idx on btg
2832-
(7 rows)
2779+
-- Utilize the ordering of merge join to avoid a full Sort operation
2780+
SET enable_hashjoin = off;
2781+
SET enable_nestloop = off;
2782+
EXPLAIN (COSTS OFF)
2783+
SELECT count(*)
2784+
FROM btg t1 JOIN btg t2 ON t1.z = t2.z AND t1.w = t2.w AND t1.x = t2.x
2785+
GROUP BY t1.x, t1.y, t1.z, t1.w;
2786+
QUERY PLAN
2787+
-------------------------------------------------------------------------------
2788+
GroupAggregate
2789+
Group Key: t1.z, t1.w, t1.x, t1.y
2790+
-> Incremental Sort
2791+
Sort Key: t1.z, t1.w, t1.x, t1.y
2792+
Presorted Key: t1.z, t1.w, t1.x
2793+
-> Merge Join
2794+
Merge Cond: ((t1.z = t2.z) AND (t1.w = t2.w) AND (t1.x = t2.x))
2795+
-> Sort
2796+
Sort Key: t1.z, t1.w, t1.x
2797+
-> Index Scan using btg_x_y_idx on btg t1
2798+
-> Sort
2799+
Sort Key: t2.z, t2.w, t2.x
2800+
-> Index Scan using btg_x_y_idx on btg t2
2801+
(13 rows)
28332802

2803+
RESET enable_nestloop;
2804+
RESET enable_hashjoin;
28342805
-- Should work with and without GROUP-BY optimization
2835-
explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,x,z,y ORDER BY y,x,z,w;
2836-
QUERY PLAN
2837-
------------------------------
2838-
Group
2806+
EXPLAIN (COSTS OFF)
2807+
SELECT count(*) FROM btg GROUP BY w, x, z, y ORDER BY y, x, z, w;
2808+
QUERY PLAN
2809+
-------------------------------------------------
2810+
GroupAggregate
28392811
Group Key: y, x, z, w
28402812
-> Sort
28412813
Sort Key: y, x, z, w
2842-
-> Seq Scan on btg
2814+
-> Index Scan using btg_x_y_idx on btg
28432815
(5 rows)
28442816

28452817
-- Utilize incremental sort to make the ORDER BY rule a bit cheaper
2846-
explain (COSTS OFF) SELECT x,w FROM btg GROUP BY w,x,y,z ORDER BY x*x,z;
2818+
EXPLAIN (COSTS OFF)
2819+
SELECT count(*) FROM btg GROUP BY w, x, y, z ORDER BY x*x, z;
28472820
QUERY PLAN
28482821
-------------------------------------------------------
28492822
Sort
28502823
Sort Key: ((x * x)), z
2851-
-> Group
2824+
-> GroupAggregate
28522825
Group Key: x, y, w, z
28532826
-> Incremental Sort
28542827
Sort Key: x, y, w, z
28552828
Presorted Key: x, y
28562829
-> Index Scan using btg_x_y_idx on btg
28572830
(8 rows)
28582831

2859-
SET enable_incremental_sort = off;
2860-
-- The case when the number of incoming subtree path keys is more than
2832+
-- Test the case where the number of incoming subtree path keys is more than
28612833
-- the number of grouping keys.
2862-
CREATE INDEX idx_y_x_z ON btg(y,x,w);
2834+
CREATE INDEX btg_y_x_w_idx ON btg(y, x, w);
28632835
EXPLAIN (VERBOSE, COSTS OFF)
2864-
SELECT y,x,array_agg(distinct w) FROM btg WHERE y < 0 GROUP BY x,y;
2865-
QUERY PLAN
2866-
-----------------------------------------------------
2836+
SELECT y, x, array_agg(distinct w)
2837+
FROM btg WHERE y < 0 GROUP BY x, y;
2838+
QUERY PLAN
2839+
---------------------------------------------------------
28672840
GroupAggregate
28682841
Output: y, x, array_agg(DISTINCT w)
28692842
Group Key: btg.y, btg.x
2870-
-> Index Only Scan using idx_y_x_z on public.btg
2843+
-> Index Only Scan using btg_y_x_w_idx on public.btg
28712844
Output: y, x, w
28722845
Index Cond: (btg.y < 0)
28732846
(6 rows)
28742847

2875-
RESET enable_incremental_sort;
2876-
-- Check we don't pick aggregate path key instead of grouping path key
2848+
-- Ensure that we do not select the aggregate pathkeys instead of the grouping
2849+
-- pathkeys
28772850
CREATE TABLE group_agg_pk AS SELECT
28782851
i % 10 AS x,
28792852
i % 2 AS y,
@@ -2884,74 +2857,63 @@ FROM generate_series(1,100) AS i;
28842857
ANALYZE group_agg_pk;
28852858
SET enable_nestloop = off;
28862859
SET enable_hashjoin = off;
2887-
SELECT
2888-
c1.z, c1.w, string_agg(''::text, repeat(''::text, c1.f) ORDER BY c1.x,c1.y)
2889-
FROM group_agg_pk c1 JOIN group_agg_pk c2 ON (c1.x = c2.f)
2860+
EXPLAIN (COSTS OFF)
2861+
SELECT avg(c1.f ORDER BY c1.x, c1.y)
2862+
FROM group_agg_pk c1 JOIN group_agg_pk c2 ON c1.x = c2.x
28902863
GROUP BY c1.w, c1.z;
2891-
z | w | string_agg
2892-
---+---+------------
2893-
0 | 2 |
2894-
1 | 2 |
2864+
QUERY PLAN
2865+
-----------------------------------------------------
2866+
GroupAggregate
2867+
Group Key: c1.w, c1.z
2868+
-> Sort
2869+
Sort Key: c1.w, c1.z, c1.x, c1.y
2870+
-> Merge Join
2871+
Merge Cond: (c1.x = c2.x)
2872+
-> Sort
2873+
Sort Key: c1.x
2874+
-> Seq Scan on group_agg_pk c1
2875+
-> Sort
2876+
Sort Key: c2.x
2877+
-> Seq Scan on group_agg_pk c2
2878+
(12 rows)
2879+
2880+
SELECT avg(c1.f ORDER BY c1.x, c1.y)
2881+
FROM group_agg_pk c1 JOIN group_agg_pk c2 ON c1.x = c2.x
2882+
GROUP BY c1.w, c1.z;
2883+
avg
2884+
--------------------
2885+
4.0000000000000000
2886+
5.0000000000000000
28952887
(2 rows)
28962888

28972889
RESET enable_nestloop;
28982890
RESET enable_hashjoin;
28992891
DROP TABLE group_agg_pk;
2900-
-- The case, when scanning sort order correspond to aggregate sort order but
2901-
-- can not be found in the group-by list
2892+
-- Test the case where the the ordering of scan matches the ordering within the
2893+
-- aggregate but cannot be found in the group-by list
29022894
CREATE TABLE agg_sort_order (c1 int PRIMARY KEY, c2 int);
2903-
CREATE UNIQUE INDEX ON agg_sort_order(c2);
2904-
explain (costs off)
2895+
CREATE UNIQUE INDEX agg_sort_order_c2_idx ON agg_sort_order(c2);
2896+
INSERT INTO agg_sort_order SELECT i, i FROM generate_series(1,100)i;
2897+
ANALYZE agg_sort_order;
2898+
EXPLAIN (COSTS OFF)
29052899
SELECT array_agg(c1 ORDER BY c2),c2
29062900
FROM agg_sort_order WHERE c2 < 100 GROUP BY c1 ORDER BY 2;
2907-
QUERY PLAN
2908-
--------------------------------------------------------------------
2901+
QUERY PLAN
2902+
----------------------------------------------------------------------------
29092903
Sort
29102904
Sort Key: c2
29112905
-> GroupAggregate
29122906
Group Key: c1
29132907
-> Sort
29142908
Sort Key: c1, c2
2915-
-> Bitmap Heap Scan on agg_sort_order
2916-
Recheck Cond: (c2 < 100)
2917-
-> Bitmap Index Scan on agg_sort_order_c2_idx
2918-
Index Cond: (c2 < 100)
2919-
(10 rows)
2909+
-> Index Scan using agg_sort_order_c2_idx on agg_sort_order
2910+
Index Cond: (c2 < 100)
2911+
(8 rows)
29202912

29212913
DROP TABLE agg_sort_order CASCADE;
2922-
-- Check, that GROUP-BY reordering optimization can operate with pathkeys, built
2923-
-- by planner itself. For example, by MergeJoin.
2924-
SET enable_hashjoin = off;
2925-
SET enable_nestloop = off;
2926-
explain (COSTS OFF)
2927-
SELECT b1.x,b1.w FROM btg b1 JOIN btg b2 ON (b1.z=b2.z AND b1.w=b2.w)
2928-
GROUP BY b1.x,b1.z,b1.w ORDER BY b1.z, b1.w, b1.x*b1.x;
2929-
QUERY PLAN
2930-
-------------------------------------------------------------------
2931-
Incremental Sort
2932-
Sort Key: b1.z, b1.w, ((b1.x * b1.x))
2933-
Presorted Key: b1.z, b1.w
2934-
-> Group
2935-
Group Key: b1.z, b1.w, b1.x
2936-
-> Incremental Sort
2937-
Sort Key: b1.z, b1.w, b1.x
2938-
Presorted Key: b1.z, b1.w
2939-
-> Merge Join
2940-
Merge Cond: ((b1.z = b2.z) AND (b1.w = b2.w))
2941-
-> Sort
2942-
Sort Key: b1.z, b1.w
2943-
-> Seq Scan on btg b1
2944-
-> Sort
2945-
Sort Key: b2.z, b2.w
2946-
-> Seq Scan on btg b2
2947-
(16 rows)
2948-
2949-
RESET enable_hashjoin;
2950-
RESET enable_nestloop;
29512914
DROP TABLE btg;
29522915
RESET enable_hashagg;
2953-
RESET max_parallel_workers;
2954-
RESET max_parallel_workers_per_gather;
2916+
RESET enable_seqscan;
29552917
-- Secondly test the case of a parallel aggregate combiner function
29562918
-- returning NULL. For that use normal transition function, but a
29572919
-- combiner function returning NULL.

0 commit comments

Comments
 (0)