Skip to content

Commit 1b54e91

Browse files
committed
Fix run-time partition pruning for appends with multiple source rels.
The previous coding here supposed that if run-time partitioning applied to a particular Append/MergeAppend plan, then all child plans of that node must be members of a single partitioning hierarchy. This is totally wrong, since an Append could be formed from a UNION ALL: we could have multiple hierarchies sharing the same Append, or child plans that aren't part of any hierarchy. To fix, restructure the related plan-time and execution-time data structures so that we can have a separate list or array for each partitioning hierarchy. Also track subplans that are not part of any hierarchy, and make sure they don't get pruned. Per reports from Phil Florent and others. Back-patch to v11, since the bug originated there. David Rowley, with a lot of cosmetic adjustments by me; thanks also to Amit Langote for review. Discussion: https://postgr.es/m/HE1PR03MB17068BB27404C90B5B788BCABA7B0@HE1PR03MB1706.eurprd03.prod.outlook.com
1 parent 7326a7d commit 1b54e91

File tree

15 files changed

+780
-270
lines changed

15 files changed

+780
-270
lines changed

src/backend/executor/execPartition.c

Lines changed: 239 additions & 160 deletions
Large diffs are not rendered by default.

src/backend/executor/nodeAppend.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ ExecInitAppend(Append *node, EState *estate, int eflags)
129129
appendstate->as_whichplan = INVALID_SUBPLAN_INDEX;
130130

131131
/* If run-time partition pruning is enabled, then set that up now */
132-
if (node->part_prune_infos != NIL)
132+
if (node->part_prune_info != NULL)
133133
{
134134
PartitionPruneState *prunestate;
135135

@@ -138,7 +138,7 @@ ExecInitAppend(Append *node, EState *estate, int eflags)
138138

139139
/* Create the working data structure for pruning. */
140140
prunestate = ExecCreatePartitionPruneState(&appendstate->ps,
141-
node->part_prune_infos);
141+
node->part_prune_info);
142142
appendstate->as_prune_state = prunestate;
143143

144144
/* Perform an initial partition prune, if required. */

src/backend/nodes/copyfuncs.c

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,7 @@ _copyAppend(const Append *from)
245245
COPY_NODE_FIELD(appendplans);
246246
COPY_SCALAR_FIELD(first_partial_plan);
247247
COPY_NODE_FIELD(partitioned_rels);
248-
COPY_NODE_FIELD(part_prune_infos);
248+
COPY_NODE_FIELD(part_prune_info);
249249

250250
return newnode;
251251
}
@@ -1181,6 +1181,17 @@ _copyPartitionPruneInfo(const PartitionPruneInfo *from)
11811181
{
11821182
PartitionPruneInfo *newnode = makeNode(PartitionPruneInfo);
11831183

1184+
COPY_NODE_FIELD(prune_infos);
1185+
COPY_BITMAPSET_FIELD(other_subplans);
1186+
1187+
return newnode;
1188+
}
1189+
1190+
static PartitionedRelPruneInfo *
1191+
_copyPartitionedRelPruneInfo(const PartitionedRelPruneInfo *from)
1192+
{
1193+
PartitionedRelPruneInfo *newnode = makeNode(PartitionedRelPruneInfo);
1194+
11841195
COPY_SCALAR_FIELD(reloid);
11851196
COPY_NODE_FIELD(pruning_steps);
11861197
COPY_BITMAPSET_FIELD(present_parts);
@@ -4907,6 +4918,9 @@ copyObjectImpl(const void *from)
49074918
case T_PartitionPruneInfo:
49084919
retval = _copyPartitionPruneInfo(from);
49094920
break;
4921+
case T_PartitionedRelPruneInfo:
4922+
retval = _copyPartitionedRelPruneInfo(from);
4923+
break;
49104924
case T_PartitionPruneStepOp:
49114925
retval = _copyPartitionPruneStepOp(from);
49124926
break;

src/backend/nodes/outfuncs.c

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -402,7 +402,7 @@ _outAppend(StringInfo str, const Append *node)
402402
WRITE_NODE_FIELD(appendplans);
403403
WRITE_INT_FIELD(first_partial_plan);
404404
WRITE_NODE_FIELD(partitioned_rels);
405-
WRITE_NODE_FIELD(part_prune_infos);
405+
WRITE_NODE_FIELD(part_prune_info);
406406
}
407407

408408
static void
@@ -1012,10 +1012,19 @@ _outPlanRowMark(StringInfo str, const PlanRowMark *node)
10121012

10131013
static void
10141014
_outPartitionPruneInfo(StringInfo str, const PartitionPruneInfo *node)
1015+
{
1016+
WRITE_NODE_TYPE("PARTITIONPRUNEINFO");
1017+
1018+
WRITE_NODE_FIELD(prune_infos);
1019+
WRITE_BITMAPSET_FIELD(other_subplans);
1020+
}
1021+
1022+
static void
1023+
_outPartitionedRelPruneInfo(StringInfo str, const PartitionedRelPruneInfo *node)
10151024
{
10161025
int i;
10171026

1018-
WRITE_NODE_TYPE("PARTITIONPRUNEINFO");
1027+
WRITE_NODE_TYPE("PARTITIONEDRELPRUNEINFO");
10191028

10201029
WRITE_OID_FIELD(reloid);
10211030
WRITE_NODE_FIELD(pruning_steps);
@@ -3829,6 +3838,9 @@ outNode(StringInfo str, const void *obj)
38293838
case T_PartitionPruneInfo:
38303839
_outPartitionPruneInfo(str, obj);
38313840
break;
3841+
case T_PartitionedRelPruneInfo:
3842+
_outPartitionedRelPruneInfo(str, obj);
3843+
break;
38323844
case T_PartitionPruneStepOp:
38333845
_outPartitionPruneStepOp(str, obj);
38343846
break;

src/backend/nodes/readfuncs.c

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1612,7 +1612,7 @@ _readAppend(void)
16121612
READ_NODE_FIELD(appendplans);
16131613
READ_INT_FIELD(first_partial_plan);
16141614
READ_NODE_FIELD(partitioned_rels);
1615-
READ_NODE_FIELD(part_prune_infos);
1615+
READ_NODE_FIELD(part_prune_info);
16161616

16171617
READ_DONE();
16181618
}
@@ -2328,6 +2328,17 @@ _readPartitionPruneInfo(void)
23282328
{
23292329
READ_LOCALS(PartitionPruneInfo);
23302330

2331+
READ_NODE_FIELD(prune_infos);
2332+
READ_BITMAPSET_FIELD(other_subplans);
2333+
2334+
READ_DONE();
2335+
}
2336+
2337+
static PartitionedRelPruneInfo *
2338+
_readPartitionedRelPruneInfo(void)
2339+
{
2340+
READ_LOCALS(PartitionedRelPruneInfo);
2341+
23312342
READ_OID_FIELD(reloid);
23322343
READ_NODE_FIELD(pruning_steps);
23332344
READ_BITMAPSET_FIELD(present_parts);
@@ -2725,6 +2736,8 @@ parseNodeString(void)
27252736
return_value = _readPlanRowMark();
27262737
else if (MATCH("PARTITIONPRUNEINFO", 18))
27272738
return_value = _readPartitionPruneInfo();
2739+
else if (MATCH("PARTITIONEDRELPRUNEINFO", 23))
2740+
return_value = _readPartitionedRelPruneInfo();
27282741
else if (MATCH("PARTITIONPRUNESTEPOP", 20))
27292742
return_value = _readPartitionPruneStepOp();
27302743
else if (MATCH("PARTITIONPRUNESTEPCOMBINE", 25))

src/backend/optimizer/path/allpaths.c

Lines changed: 11 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1388,7 +1388,6 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
13881388
List *all_child_outers = NIL;
13891389
ListCell *l;
13901390
List *partitioned_rels = NIL;
1391-
bool build_partitioned_rels = false;
13921391
double partial_rows = -1;
13931392

13941393
/* If appropriate, consider parallel append */
@@ -1413,10 +1412,11 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
14131412
if (rel->part_scheme != NULL)
14141413
{
14151414
if (IS_SIMPLE_REL(rel))
1416-
partitioned_rels = rel->partitioned_child_rels;
1415+
partitioned_rels = list_make1(rel->partitioned_child_rels);
14171416
else if (IS_JOIN_REL(rel))
14181417
{
14191418
int relid = -1;
1419+
List *partrels = NIL;
14201420

14211421
/*
14221422
* For a partitioned joinrel, concatenate the component rels'
@@ -1430,16 +1430,16 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
14301430
component = root->simple_rel_array[relid];
14311431
Assert(component->part_scheme != NULL);
14321432
Assert(list_length(component->partitioned_child_rels) >= 1);
1433-
partitioned_rels =
1434-
list_concat(partitioned_rels,
1433+
partrels =
1434+
list_concat(partrels,
14351435
list_copy(component->partitioned_child_rels));
14361436
}
1437+
1438+
partitioned_rels = list_make1(partrels);
14371439
}
14381440

14391441
Assert(list_length(partitioned_rels) >= 1);
14401442
}
1441-
else if (rel->rtekind == RTE_SUBQUERY)
1442-
build_partitioned_rels = true;
14431443

14441444
/*
14451445
* For every non-dummy child, remember the cheapest path. Also, identify
@@ -1453,17 +1453,12 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
14531453
Path *cheapest_partial_path = NULL;
14541454

14551455
/*
1456-
* If we need to build partitioned_rels, accumulate the partitioned
1457-
* rels for this child. We must ensure that parents are always listed
1458-
* before their child partitioned tables.
1456+
* For UNION ALLs with non-empty partitioned_child_rels, accumulate
1457+
* the Lists of child relations.
14591458
*/
1460-
if (build_partitioned_rels)
1461-
{
1462-
List *cprels = childrel->partitioned_child_rels;
1463-
1464-
partitioned_rels = list_concat(partitioned_rels,
1465-
list_copy(cprels));
1466-
}
1459+
if (rel->rtekind == RTE_SUBQUERY && childrel->partitioned_child_rels != NIL)
1460+
partitioned_rels = lappend(partitioned_rels,
1461+
childrel->partitioned_child_rels);
14671462

14681463
/*
14691464
* If child has an unparameterized cheapest-total path, add that to

src/backend/optimizer/plan/createplan.c

Lines changed: 41 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@ static BitmapHeapScan *create_bitmap_scan_plan(PlannerInfo *root,
124124
static Plan *create_bitmap_subplan(PlannerInfo *root, Path *bitmapqual,
125125
List **qual, List **indexqual, List **indexECs);
126126
static void bitmap_subplan_mark_shared(Plan *plan);
127+
static List *flatten_partitioned_rels(List *partitioned_rels);
127128
static TidScan *create_tidscan_plan(PlannerInfo *root, TidPath *best_path,
128129
List *tlist, List *scan_clauses);
129130
static SubqueryScan *create_subqueryscan_plan(PlannerInfo *root,
@@ -202,7 +203,8 @@ static NamedTuplestoreScan *make_namedtuplestorescan(List *qptlist, List *qpqual
202203
static WorkTableScan *make_worktablescan(List *qptlist, List *qpqual,
203204
Index scanrelid, int wtParam);
204205
static Append *make_append(List *appendplans, int first_partial_plan,
205-
List *tlist, List *partitioned_rels, List *partpruneinfos);
206+
List *tlist, List *partitioned_rels,
207+
PartitionPruneInfo *partpruneinfo);
206208
static RecursiveUnion *make_recursive_union(List *tlist,
207209
Plan *lefttree,
208210
Plan *righttree,
@@ -1030,7 +1032,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path)
10301032
List *subplans = NIL;
10311033
ListCell *subpaths;
10321034
RelOptInfo *rel = best_path->path.parent;
1033-
List *partpruneinfos = NIL;
1035+
PartitionPruneInfo *partpruneinfo = NULL;
10341036

10351037
/*
10361038
* The subpaths list could be empty, if every child was proven empty by
@@ -1068,6 +1070,11 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path)
10681070
subplans = lappend(subplans, subplan);
10691071
}
10701072

1073+
/*
1074+
* If any quals exist, they may be useful to perform further partition
1075+
* pruning during execution. Gather information needed by the executor to
1076+
* do partition pruning.
1077+
*/
10711078
if (enable_partition_pruning &&
10721079
rel->reloptkind == RELOPT_BASEREL &&
10731080
best_path->partitioned_rels != NIL)
@@ -1095,10 +1102,11 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path)
10951102
* partition indexes into subpath indexes.
10961103
*/
10971104
if (prunequal != NIL)
1098-
partpruneinfos =
1099-
make_partition_pruneinfo(root,
1105+
partpruneinfo =
1106+
make_partition_pruneinfo(root, rel,
1107+
best_path->subpaths,
11001108
best_path->partitioned_rels,
1101-
best_path->subpaths, prunequal);
1109+
prunequal);
11021110
}
11031111

11041112
/*
@@ -1110,7 +1118,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path)
11101118

11111119
plan = make_append(subplans, best_path->first_partial_path,
11121120
tlist, best_path->partitioned_rels,
1113-
partpruneinfos);
1121+
partpruneinfo);
11141122

11151123
copy_generic_path_info(&plan->plan, (Path *) best_path);
11161124

@@ -1218,7 +1226,8 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path)
12181226
subplans = lappend(subplans, subplan);
12191227
}
12201228

1221-
node->partitioned_rels = best_path->partitioned_rels;
1229+
node->partitioned_rels =
1230+
flatten_partitioned_rels(best_path->partitioned_rels);
12221231
node->mergeplans = subplans;
12231232

12241233
return (Plan *) node;
@@ -4968,6 +4977,27 @@ bitmap_subplan_mark_shared(Plan *plan)
49684977
elog(ERROR, "unrecognized node type: %d", nodeTag(plan));
49694978
}
49704979

4980+
/*
4981+
* flatten_partitioned_rels
4982+
* Convert List of Lists into a single List with all elements from the
4983+
* sub-lists.
4984+
*/
4985+
static List *
4986+
flatten_partitioned_rels(List *partitioned_rels)
4987+
{
4988+
List *newlist = NIL;
4989+
ListCell *lc;
4990+
4991+
foreach(lc, partitioned_rels)
4992+
{
4993+
List *sublist = lfirst(lc);
4994+
4995+
newlist = list_concat(newlist, list_copy(sublist));
4996+
}
4997+
4998+
return newlist;
4999+
}
5000+
49715001
/*****************************************************************************
49725002
*
49735003
* PLAN NODE BUILDING ROUTINES
@@ -5311,7 +5341,7 @@ make_foreignscan(List *qptlist,
53115341
static Append *
53125342
make_append(List *appendplans, int first_partial_plan,
53135343
List *tlist, List *partitioned_rels,
5314-
List *partpruneinfos)
5344+
PartitionPruneInfo *partpruneinfo)
53155345
{
53165346
Append *node = makeNode(Append);
53175347
Plan *plan = &node->plan;
@@ -5322,8 +5352,8 @@ make_append(List *appendplans, int first_partial_plan,
53225352
plan->righttree = NULL;
53235353
node->appendplans = appendplans;
53245354
node->first_partial_plan = first_partial_plan;
5325-
node->partitioned_rels = partitioned_rels;
5326-
node->part_prune_infos = partpruneinfos;
5355+
node->partitioned_rels = flatten_partitioned_rels(partitioned_rels);
5356+
node->part_prune_info = partpruneinfo;
53275357
return node;
53285358
}
53295359

@@ -6480,7 +6510,7 @@ make_modifytable(PlannerInfo *root,
64806510
node->operation = operation;
64816511
node->canSetTag = canSetTag;
64826512
node->nominalRelation = nominalRelation;
6483-
node->partitioned_rels = partitioned_rels;
6513+
node->partitioned_rels = flatten_partitioned_rels(partitioned_rels);
64846514
node->partColsUpdated = partColsUpdated;
64856515
node->resultRelations = resultRelations;
64866516
node->resultRelIndex = -1; /* will be set correctly in setrefs.c */

src/backend/optimizer/plan/planner.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1617,6 +1617,7 @@ inheritance_planner(PlannerInfo *root)
16171617
* contain at least one member, that is, the root parent's index.
16181618
*/
16191619
Assert(list_length(partitioned_rels) >= 1);
1620+
partitioned_rels = list_make1(partitioned_rels);
16201621
}
16211622

16221623
/* Create Path representing a ModifyTable to do the UPDATE/DELETE work */

0 commit comments

Comments
 (0)