Skip to content

Commit c01f6ef

Browse files
committed
Fix usage of aggregate pathkeys in group_keys_reorder_by_pathkeys()
group_keys_reorder_by_pathkeys() function searched for matching pathkeys within root->group_pathkeys. That could lead to picking an aggregate pathkey and using its pathkey->pk_eclass->ec_sortref as an argument of get_sortgroupref_clause_noerr(). Given that ec_sortref of an aggregate pathkey references aggregate targetlist not query targetlist, this leads to incorrect query optimization. Fix this by looking for matching pathkeys only within the first num_groupby_pathkeys pathkeys. Reported-by: David G. Johnston Discussion: https://postgr.es/m/CAKFQuwY3Ek%3DcLThgd8FdaSc5JRDVt0FaV00gMcWra%2BTAR4gGUw%40mail.gmail.com Author: Andrei Lepikhov, Alexander Korotkov
1 parent 6743c5a commit c01f6ef

File tree

3 files changed

+64
-4
lines changed

3 files changed

+64
-4
lines changed

src/backend/optimizer/path/pathkeys.c

+21-4
Original file line numberDiff line numberDiff line change
@@ -355,10 +355,14 @@ pathkeys_contained_in(List *keys1, List *keys2)
355355

356356
/*
357357
* group_keys_reorder_by_pathkeys
358-
* Reorder GROUP BY keys to match the input pathkeys.
358+
* Reorder GROUP BY pathkeys and clauses to match the input pathkeys.
359359
*
360-
* Function returns new lists (pathkeys and clauses), original GROUP BY lists
361-
* stay untouched.
360+
* 'pathkeys' is an input list of pathkeys
361+
* '*group_pathkeys' and '*group_clauses' are pathkeys and clauses lists to
362+
* reorder. The pointers are redirected to new lists, original lists
363+
* stay untouched.
364+
* 'num_groupby_pathkeys' is the number of first '*group_pathkeys' items to
365+
* search matching pathkeys.
362366
*
363367
* Returns the number of GROUP BY keys with a matching pathkey.
364368
*/
@@ -369,12 +373,24 @@ group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
369373
{
370374
List *new_group_pathkeys = NIL,
371375
*new_group_clauses = NIL;
376+
List *grouping_pathkeys;
372377
ListCell *lc;
373378
int n;
374379

375380
if (pathkeys == NIL || *group_pathkeys == NIL)
376381
return 0;
377382

383+
/*
384+
* We're going to search within just the first num_groupby_pathkeys of
385+
* *group_pathkeys. The thing is that root->group_pathkeys is passed as
386+
* *group_pathkeys containing grouping pathkeys altogether with aggregate
387+
* pathkeys. If we process aggregate pathkeys we could get an invalid
388+
* result of get_sortgroupref_clause_noerr(), because their
389+
* pathkey->pk_eclass->ec_sortref doesn't referece query targetlist. So,
390+
* we allocate a separate list of pathkeys for lookups.
391+
*/
392+
grouping_pathkeys = list_copy_head(*group_pathkeys, num_groupby_pathkeys);
393+
378394
/*
379395
* Walk the pathkeys (determining ordering of the input path) and see if
380396
* there's a matching GROUP BY key. If we find one, we append it to the
@@ -395,7 +411,7 @@ group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
395411
* there is no sortclause reference for some reason.
396412
*/
397413
if (foreach_current_index(lc) >= num_groupby_pathkeys ||
398-
!list_member_ptr(*group_pathkeys, pathkey) ||
414+
!list_member_ptr(grouping_pathkeys, pathkey) ||
399415
pathkey->pk_eclass->ec_sortref == 0)
400416
break;
401417

@@ -429,6 +445,7 @@ group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
429445
*group_clauses = list_concat_unique_ptr(new_group_clauses,
430446
*group_clauses);
431447

448+
list_free(grouping_pathkeys);
432449
return n;
433450
}
434451

src/test/regress/expected/aggregates.out

+24
Original file line numberDiff line numberDiff line change
@@ -2874,6 +2874,30 @@ SELECT y,x,array_agg(distinct w) FROM btg WHERE y < 0 GROUP BY x,y;
28742874

28752875
RESET enable_incremental_sort;
28762876
DROP TABLE btg;
2877+
-- Check we don't pick aggregate path key instead of grouping path key
2878+
CREATE TABLE group_agg_pk AS SELECT
2879+
i % 10 AS x,
2880+
i % 2 AS y,
2881+
i % 2 AS z,
2882+
2 AS w,
2883+
i % 10 AS f
2884+
FROM generate_series(1,100) AS i;
2885+
ANALYZE group_agg_pk;
2886+
SET enable_nestloop = off;
2887+
SET enable_hashjoin = off;
2888+
SELECT
2889+
c1.z, c1.w, string_agg(''::text, repeat(''::text, c1.f) ORDER BY c1.x,c1.y)
2890+
FROM group_agg_pk c1 JOIN group_agg_pk c2 ON (c1.x = c2.f)
2891+
GROUP BY c1.w, c1.z;
2892+
z | w | string_agg
2893+
---+---+------------
2894+
0 | 2 |
2895+
1 | 2 |
2896+
(2 rows)
2897+
2898+
RESET enable_nestloop;
2899+
RESET enable_hashjoin;
2900+
DROP TABLE group_agg_pk;
28772901
-- The case, when scanning sort order correspond to aggregate sort order but
28782902
-- can not be found in the group-by list
28792903
CREATE TABLE agg_sort_order (c1 int PRIMARY KEY, c2 int);

src/test/regress/sql/aggregates.sql

+19
Original file line numberDiff line numberDiff line change
@@ -1231,6 +1231,25 @@ RESET enable_incremental_sort;
12311231

12321232
DROP TABLE btg;
12331233

1234+
-- Check we don't pick aggregate path key instead of grouping path key
1235+
CREATE TABLE group_agg_pk AS SELECT
1236+
i % 10 AS x,
1237+
i % 2 AS y,
1238+
i % 2 AS z,
1239+
2 AS w,
1240+
i % 10 AS f
1241+
FROM generate_series(1,100) AS i;
1242+
ANALYZE group_agg_pk;
1243+
SET enable_nestloop = off;
1244+
SET enable_hashjoin = off;
1245+
SELECT
1246+
c1.z, c1.w, string_agg(''::text, repeat(''::text, c1.f) ORDER BY c1.x,c1.y)
1247+
FROM group_agg_pk c1 JOIN group_agg_pk c2 ON (c1.x = c2.f)
1248+
GROUP BY c1.w, c1.z;
1249+
RESET enable_nestloop;
1250+
RESET enable_hashjoin;
1251+
DROP TABLE group_agg_pk;
1252+
12341253
-- The case, when scanning sort order correspond to aggregate sort order but
12351254
-- can not be found in the group-by list
12361255
CREATE TABLE agg_sort_order (c1 int PRIMARY KEY, c2 int);

0 commit comments

Comments
 (0)