Skip to content

Commit aaad96e

Browse files
committed
Fix improper repetition of previous results from a hashed aggregate.
ExecReScanAgg's check for whether it could re-use a previously calculated hashtable neglected the possibility that the Agg node might reference PARAM_EXEC Params that are not referenced by its input plan node. That's okay if the Params are in upper tlist or qual expressions; but if one appears in aggregate input expressions, then the hashtable contents need to be recomputed when the Param's value changes. To avoid unnecessary performance degradation in the case of a Param that isn't within an aggregate input, add logic to the planner to determine which Params are within aggregate inputs. This requires a new field in struct Agg, but fortunately we never write plans to disk, so this isn't an initdb-forcing change. Per report from Jeevan Chalke. This has been broken since forever, so back-patch to all supported branches. Andrew Gierth, with minor adjustments by me Report: <CAM2+6=VY8ykfLT5Q8vb9B6EbeBk-NGuLbT6seaQ+Fq4zXvrDcA@mail.gmail.com>
1 parent e8aed97 commit aaad96e

File tree

8 files changed

+155
-7
lines changed

8 files changed

+155
-7
lines changed

src/backend/executor/nodeAgg.c

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1914,13 +1914,14 @@ void
19141914
ExecReScanAgg(AggState *node)
19151915
{
19161916
ExprContext *econtext = node->ss.ps.ps_ExprContext;
1917+
Agg *aggnode = (Agg *) node->ss.ps.plan;
19171918
int aggno;
19181919

19191920
node->agg_done = false;
19201921

19211922
node->ss.ps.ps_TupFromTlist = false;
19221923

1923-
if (((Agg *) node->ss.ps.plan)->aggstrategy == AGG_HASHED)
1924+
if (aggnode->aggstrategy == AGG_HASHED)
19241925
{
19251926
/*
19261927
* In the hashed case, if we haven't yet built the hash table then we
@@ -1932,11 +1933,13 @@ ExecReScanAgg(AggState *node)
19321933
return;
19331934

19341935
/*
1935-
* If we do have the hash table and the subplan does not have any
1936-
* parameter changes, then we can just rescan the existing hash table;
1937-
* no need to build it again.
1936+
* If we do have the hash table, and the subplan does not have any
1937+
* parameter changes, and none of our own parameter changes affect
1938+
* input expressions of the aggregated functions, then we can just
1939+
* rescan the existing hash table; no need to build it again.
19381940
*/
1939-
if (node->ss.ps.lefttree->chgParam == NULL)
1941+
if (node->ss.ps.lefttree->chgParam == NULL &&
1942+
!bms_overlap(node->ss.ps.chgParam, aggnode->aggParams))
19401943
{
19411944
ResetTupleHashIterator(node->hashtable, &node->hashiter);
19421945
return;
@@ -1973,7 +1976,7 @@ ExecReScanAgg(AggState *node)
19731976
*/
19741977
MemoryContextResetAndDeleteChildren(node->aggcontext);
19751978

1976-
if (((Agg *) node->ss.ps.plan)->aggstrategy == AGG_HASHED)
1979+
if (aggnode->aggstrategy == AGG_HASHED)
19771980
{
19781981
/* Rebuild an empty hash table */
19791982
build_hash_table(node);

src/backend/nodes/copyfuncs.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -781,6 +781,7 @@ _copyAgg(const Agg *from)
781781
COPY_POINTER_FIELD(grpOperators, from->numCols * sizeof(Oid));
782782
}
783783
COPY_SCALAR_FIELD(numGroups);
784+
COPY_BITMAPSET_FIELD(aggParams);
784785

785786
return newnode;
786787
}

src/backend/nodes/outfuncs.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -645,6 +645,7 @@ _outAgg(StringInfo str, const Agg *node)
645645
appendStringInfo(str, " %u", node->grpOperators[i]);
646646

647647
WRITE_LONG_FIELD(numGroups);
648+
WRITE_BITMAPSET_FIELD(aggParams);
648649
}
649650

650651
static void

src/backend/optimizer/plan/createplan.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4311,6 +4311,7 @@ make_agg(PlannerInfo *root, List *tlist, List *qual,
43114311
node->grpColIdx = grpColIdx;
43124312
node->grpOperators = grpOperators;
43134313
node->numGroups = numGroups;
4314+
node->aggParams = NULL; /* SS_finalize_plan() will fill this */
43144315

43154316
copy_plan_costsize(plan, lefttree); /* only care about copying size */
43164317
cost_agg(&agg_path, root,

src/backend/optimizer/plan/subselect.c

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ static Bitmapset *finalize_plan(PlannerInfo *root,
8080
Bitmapset *valid_params,
8181
Bitmapset *scan_params);
8282
static bool finalize_primnode(Node *node, finalize_primnode_context *context);
83+
static bool finalize_agg_primnode(Node *node, finalize_primnode_context *context);
8384

8485

8586
/*
@@ -2356,6 +2357,29 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params,
23562357
locally_added_param);
23572358
break;
23582359

2360+
case T_Agg:
2361+
{
2362+
Agg *agg = (Agg *) plan;
2363+
2364+
/*
2365+
* AGG_HASHED plans need to know which Params are referenced
2366+
* in aggregate calls. Do a separate scan to identify them.
2367+
*/
2368+
if (agg->aggstrategy == AGG_HASHED)
2369+
{
2370+
finalize_primnode_context aggcontext;
2371+
2372+
aggcontext.root = root;
2373+
aggcontext.paramids = NULL;
2374+
finalize_agg_primnode((Node *) agg->plan.targetlist,
2375+
&aggcontext);
2376+
finalize_agg_primnode((Node *) agg->plan.qual,
2377+
&aggcontext);
2378+
agg->aggParams = aggcontext.paramids;
2379+
}
2380+
}
2381+
break;
2382+
23592383
case T_WindowAgg:
23602384
finalize_primnode(((WindowAgg *) plan)->startOffset,
23612385
&context);
@@ -2364,7 +2388,6 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params,
23642388
break;
23652389

23662390
case T_Hash:
2367-
case T_Agg:
23682391
case T_Material:
23692392
case T_Sort:
23702393
case T_Unique:
@@ -2508,6 +2531,28 @@ finalize_primnode(Node *node, finalize_primnode_context *context)
25082531
(void *) context);
25092532
}
25102533

2534+
/*
2535+
* finalize_agg_primnode: find all Aggref nodes in the given expression tree,
2536+
* and add IDs of all PARAM_EXEC params appearing within their aggregated
2537+
* arguments to the result set.
2538+
*/
2539+
static bool
2540+
finalize_agg_primnode(Node *node, finalize_primnode_context *context)
2541+
{
2542+
if (node == NULL)
2543+
return false;
2544+
if (IsA(node, Aggref))
2545+
{
2546+
Aggref *agg = (Aggref *) node;
2547+
2548+
/* we should not consider the direct arguments, if any */
2549+
finalize_primnode((Node *) agg->args, context);
2550+
return false; /* there can't be any Aggrefs below here */
2551+
}
2552+
return expression_tree_walker(node, finalize_agg_primnode,
2553+
(void *) context);
2554+
}
2555+
25112556
/*
25122557
* SS_make_initplan_from_plan - given a plan tree, make it an InitPlan
25132558
*

src/include/nodes/plannodes.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -633,6 +633,8 @@ typedef struct Agg
633633
AttrNumber *grpColIdx; /* their indexes in the target list */
634634
Oid *grpOperators; /* equality operators to compare with */
635635
long numGroups; /* estimated number of groups in input */
636+
Bitmapset *aggParams; /* IDs of Params used in Aggref inputs */
637+
/* Note: planner provides numGroups & aggParams only in AGG_HASHED case */
636638
} Agg;
637639

638640
/* ----------------

src/test/regress/expected/aggregates.out

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,79 @@ from tenk1 o;
305305
9999
306306
(1 row)
307307

308+
-- Test handling of Params within aggregate arguments in hashed aggregation.
309+
-- Per bug report from Jeevan Chalke.
310+
explain (verbose, costs off)
311+
select s1, s2, sm
312+
from generate_series(1, 3) s1,
313+
lateral (select s2, sum(s1 + s2) sm
314+
from generate_series(1, 3) s2 group by s2) ss
315+
order by 1, 2;
316+
QUERY PLAN
317+
------------------------------------------------------------------
318+
Sort
319+
Output: s1.s1, s2.s2, (sum((s1.s1 + s2.s2)))
320+
Sort Key: s1.s1, s2.s2
321+
-> Nested Loop
322+
Output: s1.s1, s2.s2, (sum((s1.s1 + s2.s2)))
323+
-> Function Scan on pg_catalog.generate_series s1
324+
Output: s1.s1
325+
Function Call: generate_series(1, 3)
326+
-> HashAggregate
327+
Output: s2.s2, sum((s1.s1 + s2.s2))
328+
-> Function Scan on pg_catalog.generate_series s2
329+
Output: s2.s2
330+
Function Call: generate_series(1, 3)
331+
(13 rows)
332+
333+
select s1, s2, sm
334+
from generate_series(1, 3) s1,
335+
lateral (select s2, sum(s1 + s2) sm
336+
from generate_series(1, 3) s2 group by s2) ss
337+
order by 1, 2;
338+
s1 | s2 | sm
339+
----+----+----
340+
1 | 1 | 2
341+
1 | 2 | 3
342+
1 | 3 | 4
343+
2 | 1 | 3
344+
2 | 2 | 4
345+
2 | 3 | 5
346+
3 | 1 | 4
347+
3 | 2 | 5
348+
3 | 3 | 6
349+
(9 rows)
350+
351+
explain (verbose, costs off)
352+
select array(select sum(x+y) s
353+
from generate_series(1,3) y group by y order by s)
354+
from generate_series(1,3) x;
355+
QUERY PLAN
356+
-------------------------------------------------------------------
357+
Function Scan on pg_catalog.generate_series x
358+
Output: (SubPlan 1)
359+
Function Call: generate_series(1, 3)
360+
SubPlan 1
361+
-> Sort
362+
Output: (sum((x.x + y.y))), y.y
363+
Sort Key: (sum((x.x + y.y)))
364+
-> HashAggregate
365+
Output: sum((x.x + y.y)), y.y
366+
-> Function Scan on pg_catalog.generate_series y
367+
Output: y.y
368+
Function Call: generate_series(1, 3)
369+
(12 rows)
370+
371+
select array(select sum(x+y) s
372+
from generate_series(1,3) y group by y order by s)
373+
from generate_series(1,3) x;
374+
array
375+
---------
376+
{2,3,4}
377+
{3,4,5}
378+
{4,5,6}
379+
(3 rows)
380+
308381
--
309382
-- test for bitwise integer aggregates
310383
--

src/test/regress/sql/aggregates.sql

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,28 @@ select
8686
(select max((select i.unique2 from tenk1 i where i.unique1 = o.unique1)))
8787
from tenk1 o;
8888

89+
-- Test handling of Params within aggregate arguments in hashed aggregation.
90+
-- Per bug report from Jeevan Chalke.
91+
explain (verbose, costs off)
92+
select s1, s2, sm
93+
from generate_series(1, 3) s1,
94+
lateral (select s2, sum(s1 + s2) sm
95+
from generate_series(1, 3) s2 group by s2) ss
96+
order by 1, 2;
97+
select s1, s2, sm
98+
from generate_series(1, 3) s1,
99+
lateral (select s2, sum(s1 + s2) sm
100+
from generate_series(1, 3) s2 group by s2) ss
101+
order by 1, 2;
102+
103+
explain (verbose, costs off)
104+
select array(select sum(x+y) s
105+
from generate_series(1,3) y group by y order by s)
106+
from generate_series(1,3) x;
107+
select array(select sum(x+y) s
108+
from generate_series(1,3) y group by y order by s)
109+
from generate_series(1,3) x;
110+
89111
--
90112
-- test for bitwise integer aggregates
91113
--

0 commit comments

Comments
 (0)