Skip to content

Commit da5800d

Browse files
committed
Don't presort ORDER BY/DISTINCT Aggrefs with volatile functions
In 1349d27, we gave the planner the ability to provide ORDER BY/DISTINCT Aggrefs with presorted input so that nodeAgg would not have to perform sorts during execution. That commit failed to properly consider the implications of if the Aggref had a volatile function in its ORDER BY/DISTINCT clause. As it happened, this resulted in an ERROR about the volatile function being missing from the targetlist. Here, instead of adding the volatile function to the targetlist, we just never consider an Aggref with a volatile function in its ORDER BY/DISTINCT clause when choosing which Aggrefs we should sort by. We do this as if we were to choose a plan which provided these aggregates with presorted input, then if there were many such aggregates which could all share the same sort order, then it may be surprising if they all shared the same sort sometimes and didn't at other times when some other set of aggregates were given presorted results. We can avoid this inconsistency by just never providing these volatile function aggregates with presorted input. Reported-by: Dean Rasheed Discussion: https://postgr.es/m/CAEZATCWETioXs5kY8vT6BVguY41_wD962VDk=u_Nvd7S1UXzuQ@mail.gmail.com
1 parent 52585f8 commit da5800d

File tree

3 files changed

+79
-6
lines changed

3 files changed

+79
-6
lines changed

src/backend/optimizer/plan/planner.c

Lines changed: 49 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3147,6 +3147,27 @@ reorder_grouping_sets(List *groupingSets, List *sortclause)
31473147
return result;
31483148
}
31493149

3150+
/*
3151+
* has_volatile_pathkey
3152+
* Returns true if any PathKey in 'keys' has an EquivalenceClass
3153+
* containing a volatile function. Otherwise returns false.
3154+
*/
3155+
static bool
3156+
has_volatile_pathkey(List *keys)
3157+
{
3158+
ListCell *lc;
3159+
3160+
foreach(lc, keys)
3161+
{
3162+
PathKey *pathkey = lfirst_node(PathKey, lc);
3163+
3164+
if (pathkey->pk_eclass->ec_has_volatile)
3165+
return true;
3166+
}
3167+
3168+
return false;
3169+
}
3170+
31503171
/*
31513172
* make_pathkeys_for_groupagg
31523173
* Determine the pathkeys for the GROUP BY clause and/or any ordered
@@ -3168,6 +3189,19 @@ reorder_grouping_sets(List *groupingSets, List *sortclause)
31683189
*
31693190
* When the best pathkeys are found we also mark each Aggref that can use
31703191
* those pathkeys as aggpresorted = true.
3192+
*
3193+
* Note: When an aggregate function's ORDER BY / DISTINCT clause contains any
3194+
* volatile functions, we never make use of these pathkeys. We want to ensure
3195+
* that sorts using volatile functions are done independently in each Aggref
3196+
* rather than once at the query level. If we were to allow this then Aggrefs
3197+
* with compatible sort orders would all transition their rows in the same
3198+
* order if those pathkeys were deemed to be the best pathkeys to sort on.
3199+
* Whereas, if some other set of Aggref's pathkeys happened to be deemed
3200+
* better pathkeys to sort on, then the volatile function Aggrefs would be
3201+
* left to perform their sorts individually. To avoid this inconsistent
3202+
* behavior which could make Aggref results depend on what other Aggrefs the
3203+
* query contains, we always force Aggrefs with volatile functions to perform
3204+
* their own sorts.
31713205
*/
31723206
static List *
31733207
make_pathkeys_for_groupagg(PlannerInfo *root, List *groupClause, List *tlist,
@@ -3254,20 +3288,33 @@ make_pathkeys_for_groupagg(PlannerInfo *root, List *groupClause, List *tlist,
32543288
AggInfo *agginfo = list_nth_node(AggInfo, root->agginfos, i);
32553289
Aggref *aggref = linitial_node(Aggref, agginfo->aggrefs);
32563290
List *sortlist;
3291+
List *pathkeys;
32573292

32583293
if (aggref->aggdistinct != NIL)
32593294
sortlist = aggref->aggdistinct;
32603295
else
32613296
sortlist = aggref->aggorder;
32623297

3298+
pathkeys = make_pathkeys_for_sortclauses(root, sortlist,
3299+
aggref->args);
3300+
3301+
/*
3302+
* Ignore Aggrefs which have volatile functions in their ORDER BY
3303+
* or DISTINCT clause.
3304+
*/
3305+
if (has_volatile_pathkey(pathkeys))
3306+
{
3307+
unprocessed_aggs = bms_del_member(unprocessed_aggs, i);
3308+
continue;
3309+
}
3310+
32633311
/*
32643312
* When not set yet, take the pathkeys from the first unprocessed
32653313
* aggregate.
32663314
*/
32673315
if (currpathkeys == NIL)
32683316
{
3269-
currpathkeys = make_pathkeys_for_sortclauses(root, sortlist,
3270-
aggref->args);
3317+
currpathkeys = pathkeys;
32713318

32723319
/* include the GROUP BY pathkeys, if they exist */
32733320
if (grouppathkeys != NIL)
@@ -3279,11 +3326,7 @@ make_pathkeys_for_groupagg(PlannerInfo *root, List *groupClause, List *tlist,
32793326
}
32803327
else
32813328
{
3282-
List *pathkeys;
3283-
32843329
/* now look for a stronger set of matching pathkeys */
3285-
pathkeys = make_pathkeys_for_sortclauses(root, sortlist,
3286-
aggref->args);
32873330

32883331
/* include the GROUP BY pathkeys, if they exist */
32893332
if (grouppathkeys != NIL)

src/test/regress/expected/aggregates.out

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1471,6 +1471,25 @@ group by ten;
14711471
-> Seq Scan on tenk1
14721472
(5 rows)
14731473

1474+
-- Ensure that we never choose to provide presorted input to an Aggref with
1475+
-- a volatile function in the ORDER BY / DISTINCT clause. We want to ensure
1476+
-- these sorts are performed individually rather than at the query level.
1477+
explain (costs off)
1478+
select
1479+
sum(unique1 order by two), sum(unique1 order by four),
1480+
sum(unique1 order by four, two), sum(unique1 order by two, random()),
1481+
sum(unique1 order by two, random(), random() + 1)
1482+
from tenk1
1483+
group by ten;
1484+
QUERY PLAN
1485+
----------------------------------
1486+
GroupAggregate
1487+
Group Key: ten
1488+
-> Sort
1489+
Sort Key: ten, four, two
1490+
-> Seq Scan on tenk1
1491+
(5 rows)
1492+
14741493
-- Ensure no ordering is requested when enable_presorted_aggregate is off
14751494
set enable_presorted_aggregate to off;
14761495
explain (costs off)

src/test/regress/sql/aggregates.sql

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -546,6 +546,17 @@ select
546546
from tenk1
547547
group by ten;
548548

549+
-- Ensure that we never choose to provide presorted input to an Aggref with
550+
-- a volatile function in the ORDER BY / DISTINCT clause. We want to ensure
551+
-- these sorts are performed individually rather than at the query level.
552+
explain (costs off)
553+
select
554+
sum(unique1 order by two), sum(unique1 order by four),
555+
sum(unique1 order by four, two), sum(unique1 order by two, random()),
556+
sum(unique1 order by two, random(), random() + 1)
557+
from tenk1
558+
group by ten;
559+
549560
-- Ensure no ordering is requested when enable_presorted_aggregate is off
550561
set enable_presorted_aggregate to off;
551562
explain (costs off)

0 commit comments

Comments
 (0)