Skip to content

Commit 8d83a5d

Browse files
committed
Remove redundant grouping and DISTINCT columns.
Avoid explicitly grouping by columns that we know are redundant for sorting, for example we need group by only one of x and y in SELECT ... WHERE x = y GROUP BY x, y This comes up more often than you might think, as shown by the changes in the regression tests. It's nearly free to detect too, since we are just piggybacking on the existing logic that detects redundant pathkeys. (In some of the existing plans that change, it's visible that a sort step preceding the grouping step already didn't bother to sort by the redundant column, making the old plan a bit silly-looking.) To do this, build processed_groupClause and processed_distinctClause lists that omit any provably-redundant sort items, and consult those not the originals where relevant. This means that within the planner, one should usually consult root->processed_groupClause or root->processed_distinctClause if one wants to know which columns are to be grouped on; but to check whether grouping or distinct-ing is happening at all, check non-NIL-ness of parse->groupClause or parse->distinctClause. This is comparable to longstanding rules about handling the HAVING clause, so I don't think it'll be a huge maintenance problem. nodeAgg.c also needs minor mods, because it's now possible to generate AGG_PLAIN and AGG_SORTED Agg nodes with zero grouping columns. Patch by me; thanks to Richard Guo and David Rowley for review. Discussion: https://postgr.es/m/185315.1672179489@sss.pgh.pa.us
1 parent d540a02 commit 8d83a5d

File tree

16 files changed

+303
-171
lines changed

16 files changed

+303
-171
lines changed

contrib/pg_trgm/expected/pg_trgm.out

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5366,10 +5366,10 @@ SELECT similarity('Szczecin', 'Warsaw');
53665366
EXPLAIN (COSTS OFF)
53675367
SELECT DISTINCT city, similarity(city, 'Warsaw'), show_limit()
53685368
FROM restaurants WHERE city % 'Warsaw';
5369-
QUERY PLAN
5370-
-------------------------------------------------------------------
5369+
QUERY PLAN
5370+
-------------------------------------------------------
53715371
HashAggregate
5372-
Group Key: city, similarity(city, 'Warsaw'::text), show_limit()
5372+
Group Key: city, similarity(city, 'Warsaw'::text)
53735373
-> Bitmap Heap Scan on restaurants
53745374
Recheck Cond: (city % 'Warsaw'::text)
53755375
-> Bitmap Index Scan on restaurants_city_idx

contrib/postgres_fdw/deparse.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3736,6 +3736,13 @@ appendGroupByClause(List *tlist, deparse_expr_cxt *context)
37363736
*/
37373737
Assert(!query->groupingSets);
37383738

3739+
/*
3740+
* We intentionally print query->groupClause not processed_groupClause,
3741+
* leaving it to the remote planner to get rid of any redundant GROUP BY
3742+
* items again. This is necessary in case processed_groupClause reduced
3743+
* to empty, and in any case the redundancy situation on the remote might
3744+
* be different than what we think here.
3745+
*/
37393746
foreach(lc, query->groupClause)
37403747
{
37413748
SortGroupClause *grp = (SortGroupClause *) lfirst(lc);

contrib/postgres_fdw/expected/postgres_fdw.out

Lines changed: 16 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2293,7 +2293,7 @@ SELECT t1."C 1" FROM "S 1"."T 1" t1, LATERAL (SELECT DISTINCT t2.c1, t3.c1 FROM
22932293
-> Subquery Scan on q
22942294
-> HashAggregate
22952295
Output: t2.c1, t3.c1
2296-
Group Key: t2.c1, t3.c1
2296+
Group Key: t2.c1
22972297
-> Foreign Scan
22982298
Output: t2.c1, t3.c1
22992299
Relations: (public.ft1 t2) INNER JOIN (public.ft2 t3)
@@ -2864,16 +2864,13 @@ select c2 * (random() <= 1)::int as c2 from ft2 group by c2 * (random() <= 1)::i
28642864
-- GROUP BY clause in various forms, cardinal, alias and constant expression
28652865
explain (verbose, costs off)
28662866
select count(c2) w, c2 x, 5 y, 7.0 z from ft1 group by 2, y, 9.0::int order by 2;
2867-
QUERY PLAN
2868-
---------------------------------------------------------------------------------------
2869-
Sort
2867+
QUERY PLAN
2868+
------------------------------------------------------------------------------------------------------------
2869+
Foreign Scan
28702870
Output: (count(c2)), c2, 5, 7.0, 9
2871-
Sort Key: ft1.c2
2872-
-> Foreign Scan
2873-
Output: (count(c2)), c2, 5, 7.0, 9
2874-
Relations: Aggregate on (public.ft1)
2875-
Remote SQL: SELECT count(c2), c2, 5, 7.0, 9 FROM "S 1"."T 1" GROUP BY 2, 3, 5
2876-
(7 rows)
2871+
Relations: Aggregate on (public.ft1)
2872+
Remote SQL: SELECT count(c2), c2, 5, 7.0, 9 FROM "S 1"."T 1" GROUP BY 2, 3, 5 ORDER BY c2 ASC NULLS LAST
2873+
(4 rows)
28772874

28782875
select count(c2) w, c2 x, 5 y, 7.0 z from ft1 group by 2, y, 9.0::int order by 2;
28792876
w | x | y | z
@@ -2990,14 +2987,13 @@ select exists(select 1 from pg_enum), sum(c1) from ft1 group by 1;
29902987
QUERY PLAN
29912988
---------------------------------------------------
29922989
GroupAggregate
2993-
Output: ($0), sum(ft1.c1)
2994-
Group Key: $0
2990+
Output: $0, sum(ft1.c1)
29952991
InitPlan 1 (returns $0)
29962992
-> Seq Scan on pg_catalog.pg_enum
29972993
-> Foreign Scan on public.ft1
2998-
Output: $0, ft1.c1
2994+
Output: ft1.c1
29992995
Remote SQL: SELECT "C 1" FROM "S 1"."T 1"
3000-
(8 rows)
2996+
(7 rows)
30012997

30022998
select exists(select 1 from pg_enum), sum(c1) from ft1 group by 1;
30032999
exists | sum
@@ -3382,14 +3378,13 @@ select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6
33823378
--------------------------------------------------------------------------------------------------
33833379
GroupAggregate
33843380
Output: array_agg(c1 ORDER BY c1 USING <^ NULLS LAST), c2
3385-
Group Key: ft2.c2
33863381
-> Sort
3387-
Output: c2, c1
3382+
Output: c1, c2
33883383
Sort Key: ft2.c1 USING <^
33893384
-> Foreign Scan on public.ft2
3390-
Output: c2, c1
3385+
Output: c1, c2
33913386
Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE (("C 1" < 100)) AND ((c2 = 6))
3392-
(9 rows)
3387+
(8 rows)
33933388

33943389
-- This should not be pushed either.
33953390
explain (verbose, costs off)
@@ -3458,14 +3453,13 @@ select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6
34583453
--------------------------------------------------------------------------------------------------
34593454
GroupAggregate
34603455
Output: array_agg(c1 ORDER BY c1 USING <^ NULLS LAST), c2
3461-
Group Key: ft2.c2
34623456
-> Sort
3463-
Output: c2, c1
3457+
Output: c1, c2
34643458
Sort Key: ft2.c1 USING <^
34653459
-> Foreign Scan on public.ft2
3466-
Output: c2, c1
3460+
Output: c1, c2
34673461
Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE (("C 1" < 100)) AND ((c2 = 6))
3468-
(9 rows)
3462+
(8 rows)
34693463

34703464
-- Cleanup
34713465
drop operator class my_op_class using btree;

contrib/postgres_fdw/postgres_fdw.c

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3345,9 +3345,9 @@ estimate_path_cost_size(PlannerInfo *root,
33453345
}
33463346

33473347
/* Get number of grouping columns and possible number of groups */
3348-
numGroupCols = list_length(root->parse->groupClause);
3348+
numGroupCols = list_length(root->processed_groupClause);
33493349
numGroups = estimate_num_groups(root,
3350-
get_sortgrouplist_exprs(root->parse->groupClause,
3350+
get_sortgrouplist_exprs(root->processed_groupClause,
33513351
fpinfo->grouped_tlist),
33523352
input_rows, NULL, NULL);
33533353

@@ -3636,7 +3636,7 @@ adjust_foreign_grouping_path_cost(PlannerInfo *root,
36363636
* pathkeys, adjust the costs with that function. Otherwise, adjust the
36373637
* costs by applying the same heuristic as for the scan or join case.
36383638
*/
3639-
if (!grouping_is_sortable(root->parse->groupClause) ||
3639+
if (!grouping_is_sortable(root->processed_groupClause) ||
36403640
!pathkeys_contained_in(pathkeys, root->group_pathkeys))
36413641
{
36423642
Path sort_path; /* dummy for result of cost_sort */
@@ -6415,7 +6415,11 @@ foreign_grouping_ok(PlannerInfo *root, RelOptInfo *grouped_rel,
64156415
Index sgref = get_pathtarget_sortgroupref(grouping_target, i);
64166416
ListCell *l;
64176417

6418-
/* Check whether this expression is part of GROUP BY clause */
6418+
/*
6419+
* Check whether this expression is part of GROUP BY clause. Note we
6420+
* check the whole GROUP BY clause not just processed_groupClause,
6421+
* because we will ship all of it, cf. appendGroupByClause.
6422+
*/
64196423
if (sgref && get_sortgroupref_clause_noerr(sgref, query->groupClause))
64206424
{
64216425
TargetEntry *tle;

src/backend/executor/nodeAgg.c

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2476,7 +2476,7 @@ agg_retrieve_direct(AggState *aggstate)
24762476
* If we are grouping, check whether we've crossed a group
24772477
* boundary.
24782478
*/
2479-
if (node->aggstrategy != AGG_PLAIN)
2479+
if (node->aggstrategy != AGG_PLAIN && node->numCols > 0)
24802480
{
24812481
tmpcontext->ecxt_innertuple = firstSlot;
24822482
if (!ExecQual(aggstate->phase->eqfunctions[node->numCols - 1],
@@ -3480,8 +3480,6 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
34803480
*/
34813481
if (aggnode->aggstrategy == AGG_SORTED)
34823482
{
3483-
Assert(aggnode->numCols > 0);
3484-
34853483
/*
34863484
* Build a separate function for each subset of columns that
34873485
* need to be compared.
@@ -3512,7 +3510,8 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
35123510
}
35133511

35143512
/* and for all grouped columns, unless already computed */
3515-
if (phasedata->eqfunctions[aggnode->numCols - 1] == NULL)
3513+
if (aggnode->numCols > 0 &&
3514+
phasedata->eqfunctions[aggnode->numCols - 1] == NULL)
35163515
{
35173516
phasedata->eqfunctions[aggnode->numCols - 1] =
35183517
execTuplesMatchPrepare(scanDesc,

src/backend/optimizer/path/pathkeys.c

Lines changed: 48 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1152,18 +1152,62 @@ List *
11521152
make_pathkeys_for_sortclauses(PlannerInfo *root,
11531153
List *sortclauses,
11541154
List *tlist)
1155+
{
1156+
List *result;
1157+
bool sortable;
1158+
1159+
result = make_pathkeys_for_sortclauses_extended(root,
1160+
&sortclauses,
1161+
tlist,
1162+
false,
1163+
&sortable);
1164+
/* It's caller error if not all clauses were sortable */
1165+
Assert(sortable);
1166+
return result;
1167+
}
1168+
1169+
/*
1170+
* make_pathkeys_for_sortclauses_extended
1171+
* Generate a pathkeys list that represents the sort order specified
1172+
* by a list of SortGroupClauses
1173+
*
1174+
* The comments for make_pathkeys_for_sortclauses apply here too. In addition:
1175+
*
1176+
* If remove_redundant is true, then any sort clauses that are found to
1177+
* give rise to redundant pathkeys are removed from the sortclauses list
1178+
* (which therefore must be pass-by-reference in this version).
1179+
*
1180+
* *sortable is set to true if all the sort clauses are in fact sortable.
1181+
* If any are not, they are ignored except for setting *sortable false.
1182+
* (In that case, the output pathkey list isn't really useful. However,
1183+
* we process the whole sortclauses list anyway, because it's still valid
1184+
* to remove any clauses that can be proven redundant via the eclass logic.
1185+
* Even though we'll have to hash in that case, we might as well not hash
1186+
* redundant columns.)
1187+
*/
1188+
List *
1189+
make_pathkeys_for_sortclauses_extended(PlannerInfo *root,
1190+
List **sortclauses,
1191+
List *tlist,
1192+
bool remove_redundant,
1193+
bool *sortable)
11551194
{
11561195
List *pathkeys = NIL;
11571196
ListCell *l;
11581197

1159-
foreach(l, sortclauses)
1198+
*sortable = true;
1199+
foreach(l, *sortclauses)
11601200
{
11611201
SortGroupClause *sortcl = (SortGroupClause *) lfirst(l);
11621202
Expr *sortkey;
11631203
PathKey *pathkey;
11641204

11651205
sortkey = (Expr *) get_sortgroupclause_expr(sortcl, tlist);
1166-
Assert(OidIsValid(sortcl->sortop));
1206+
if (!OidIsValid(sortcl->sortop))
1207+
{
1208+
*sortable = false;
1209+
continue;
1210+
}
11671211
pathkey = make_pathkey_from_sortop(root,
11681212
sortkey,
11691213
root->nullable_baserels,
@@ -1175,6 +1219,8 @@ make_pathkeys_for_sortclauses(PlannerInfo *root,
11751219
/* Canonical form eliminates redundant ordering keys */
11761220
if (!pathkey_is_redundant(pathkey, pathkeys))
11771221
pathkeys = lappend(pathkeys, pathkey);
1222+
else if (remove_redundant)
1223+
*sortclauses = foreach_delete_current(*sortclauses, l);
11781224
}
11791225
return pathkeys;
11801226
}

src/backend/optimizer/plan/createplan.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2404,7 +2404,7 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path)
24042404
* sizing.
24052405
*/
24062406
maxref = 0;
2407-
foreach(lc, root->parse->groupClause)
2407+
foreach(lc, root->processed_groupClause)
24082408
{
24092409
SortGroupClause *gc = (SortGroupClause *) lfirst(lc);
24102410

@@ -2415,7 +2415,7 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path)
24152415
grouping_map = (AttrNumber *) palloc0((maxref + 1) * sizeof(AttrNumber));
24162416

24172417
/* Now look up the column numbers in the child's tlist */
2418-
foreach(lc, root->parse->groupClause)
2418+
foreach(lc, root->processed_groupClause)
24192419
{
24202420
SortGroupClause *gc = (SortGroupClause *) lfirst(lc);
24212421
TargetEntry *tle = get_sortgroupclause_tle(gc, subplan->targetlist);

0 commit comments

Comments
 (0)