Skip to content

Commit a9db37a

Browse files
committed
Generate EquivalenceClass members for partitionwise child join rels.
Commit d25ea01 got rid of what I thought were entirely unnecessary derived child expressions in EquivalenceClasses for EC members that mention multiple baserels. But it turns out that some of the child expressions that code created are necessary for partitionwise joins, else we fail to find matching pathkeys for Sort nodes. (This happens only for certain shapes of the resulting plan; it may be that partitionwise aggregation is also necessary to show the failure, though I'm not sure of that.) Reverting that commit entirely would be quite painful performance-wise for large partition sets. So instead, add code that explicitly generates child expressions that match only partitionwise child join rels we have actually generated. Per report from Justin Pryzby. (Amit Langote noticed the problem earlier, though it's not clear if he recognized then that it could result in a planner error, not merely failure to exploit partitionwise join, in the code as-committed.) Back-patch to v12 where commit d25ea01 came in. Amit Langote, with lots of kibitzing from me Discussion: https://postgr.es/m/CA+HiwqG2WVUGmLJqtR0tPFhniO=H=9qQ+Z3L_ZC+Y3-EVQHFGg@mail.gmail.com Discussion: https://postgr.es/m/20191011143703.GN10470@telsasoft.com
1 parent 4d04031 commit a9db37a

File tree

5 files changed

+253
-13
lines changed

5 files changed

+253
-13
lines changed

src/backend/optimizer/path/equivclass.c

Lines changed: 142 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2103,23 +2103,33 @@ match_eclasses_to_foreign_key_col(PlannerInfo *root,
21032103

21042104
/*
21052105
* add_child_rel_equivalences
2106-
* Search for EC members that reference the parent_rel, and
2106+
* Search for EC members that reference the root parent of child_rel, and
21072107
* add transformed members referencing the child_rel.
21082108
*
21092109
* Note that this function won't be called at all unless we have at least some
21102110
* reason to believe that the EC members it generates will be useful.
21112111
*
21122112
* parent_rel and child_rel could be derived from appinfo, but since the
21132113
* caller has already computed them, we might as well just pass them in.
2114+
*
2115+
* The passed-in AppendRelInfo is not used when the parent_rel is not a
2116+
* top-level baserel, since it shows the mapping from the parent_rel but
2117+
* we need to translate EC expressions that refer to the top-level parent.
2118+
* Using it is faster than using adjust_appendrel_attrs_multilevel(), though,
2119+
* so we prefer it when we can.
21142120
*/
21152121
void
21162122
add_child_rel_equivalences(PlannerInfo *root,
21172123
AppendRelInfo *appinfo,
21182124
RelOptInfo *parent_rel,
21192125
RelOptInfo *child_rel)
21202126
{
2127+
Relids top_parent_relids = child_rel->top_parent_relids;
2128+
Relids child_relids = child_rel->relids;
21212129
ListCell *lc1;
21222130

2131+
Assert(IS_SIMPLE_REL(parent_rel));
2132+
21232133
foreach(lc1, root->eq_classes)
21242134
{
21252135
EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1);
@@ -2137,7 +2147,7 @@ add_child_rel_equivalences(PlannerInfo *root,
21372147
* No point in searching if child's topmost parent rel is not
21382148
* mentioned in eclass.
21392149
*/
2140-
if (!bms_is_subset(child_rel->top_parent_relids, cur_ec->ec_relids))
2150+
if (!bms_is_subset(top_parent_relids, cur_ec->ec_relids))
21412151
continue;
21422152

21432153
foreach(lc2, cur_ec->ec_members)
@@ -2152,13 +2162,14 @@ add_child_rel_equivalences(PlannerInfo *root,
21522162
* already-transformed child members. Otherwise, if some original
21532163
* member expression references more than one appendrel, we'd get
21542164
* an O(N^2) explosion of useless derived expressions for
2155-
* combinations of children.
2165+
* combinations of children. (But add_child_join_rel_equivalences
2166+
* may add targeted combinations for partitionwise-join purposes.)
21562167
*/
21572168
if (cur_em->em_is_child)
21582169
continue; /* ignore children here */
21592170

21602171
/* Does this member reference child's topmost parent rel? */
2161-
if (bms_overlap(cur_em->em_relids, child_rel->top_parent_relids))
2172+
if (bms_overlap(cur_em->em_relids, top_parent_relids))
21622173
{
21632174
/* Yes, generate transformed child version */
21642175
Expr *child_expr;
@@ -2179,8 +2190,8 @@ add_child_rel_equivalences(PlannerInfo *root,
21792190
child_expr = (Expr *)
21802191
adjust_appendrel_attrs_multilevel(root,
21812192
(Node *) cur_em->em_expr,
2182-
child_rel->relids,
2183-
child_rel->top_parent_relids);
2193+
child_relids,
2194+
top_parent_relids);
21842195
}
21852196

21862197
/*
@@ -2190,22 +2201,141 @@ add_child_rel_equivalences(PlannerInfo *root,
21902201
* don't want the child member to be marked as constant.
21912202
*/
21922203
new_relids = bms_difference(cur_em->em_relids,
2193-
child_rel->top_parent_relids);
2194-
new_relids = bms_add_members(new_relids, child_rel->relids);
2204+
top_parent_relids);
2205+
new_relids = bms_add_members(new_relids, child_relids);
21952206

21962207
/*
21972208
* And likewise for nullable_relids. Note this code assumes
21982209
* parent and child relids are singletons.
21992210
*/
22002211
new_nullable_relids = cur_em->em_nullable_relids;
2201-
if (bms_overlap(new_nullable_relids,
2202-
child_rel->top_parent_relids))
2212+
if (bms_overlap(new_nullable_relids, top_parent_relids))
22032213
{
22042214
new_nullable_relids = bms_difference(new_nullable_relids,
2205-
child_rel->top_parent_relids);
2215+
top_parent_relids);
22062216
new_nullable_relids = bms_add_members(new_nullable_relids,
2207-
child_rel->relids);
2217+
child_relids);
2218+
}
2219+
2220+
(void) add_eq_member(cur_ec, child_expr,
2221+
new_relids, new_nullable_relids,
2222+
true, cur_em->em_datatype);
2223+
}
2224+
}
2225+
}
2226+
}
2227+
2228+
/*
2229+
* add_child_join_rel_equivalences
2230+
* Like add_child_rel_equivalences(), but for joinrels
2231+
*
2232+
* Here we find the ECs relevant to the top parent joinrel and add transformed
2233+
* member expressions that refer to this child joinrel.
2234+
*
2235+
* Note that this function won't be called at all unless we have at least some
2236+
* reason to believe that the EC members it generates will be useful.
2237+
*/
2238+
void
2239+
add_child_join_rel_equivalences(PlannerInfo *root,
2240+
int nappinfos, AppendRelInfo **appinfos,
2241+
RelOptInfo *parent_joinrel,
2242+
RelOptInfo *child_joinrel)
2243+
{
2244+
Relids top_parent_relids = child_joinrel->top_parent_relids;
2245+
Relids child_relids = child_joinrel->relids;
2246+
ListCell *lc1;
2247+
2248+
Assert(IS_JOIN_REL(child_joinrel) && IS_JOIN_REL(parent_joinrel));
2249+
2250+
foreach(lc1, root->eq_classes)
2251+
{
2252+
EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1);
2253+
ListCell *lc2;
2254+
2255+
/*
2256+
* If this EC contains a volatile expression, then generating child
2257+
* EMs would be downright dangerous, so skip it. We rely on a
2258+
* volatile EC having only one EM.
2259+
*/
2260+
if (cur_ec->ec_has_volatile)
2261+
continue;
2262+
2263+
/*
2264+
* No point in searching if child's topmost parent rel is not
2265+
* mentioned in eclass.
2266+
*/
2267+
if (!bms_overlap(top_parent_relids, cur_ec->ec_relids))
2268+
continue;
2269+
2270+
foreach(lc2, cur_ec->ec_members)
2271+
{
2272+
EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2);
2273+
2274+
if (cur_em->em_is_const)
2275+
continue; /* ignore consts here */
2276+
2277+
/*
2278+
* We consider only original EC members here, not
2279+
* already-transformed child members.
2280+
*/
2281+
if (cur_em->em_is_child)
2282+
continue; /* ignore children here */
2283+
2284+
/*
2285+
* We may ignore expressions that reference a single baserel,
2286+
* because add_child_rel_equivalences should have handled them.
2287+
*/
2288+
if (bms_membership(cur_em->em_relids) != BMS_MULTIPLE)
2289+
continue;
2290+
2291+
/* Does this member reference child's topmost parent rel? */
2292+
if (bms_overlap(cur_em->em_relids, top_parent_relids))
2293+
{
2294+
/* Yes, generate transformed child version */
2295+
Expr *child_expr;
2296+
Relids new_relids;
2297+
Relids new_nullable_relids;
2298+
2299+
if (parent_joinrel->reloptkind == RELOPT_JOINREL)
2300+
{
2301+
/* Simple single-level transformation */
2302+
child_expr = (Expr *)
2303+
adjust_appendrel_attrs(root,
2304+
(Node *) cur_em->em_expr,
2305+
nappinfos, appinfos);
22082306
}
2307+
else
2308+
{
2309+
/* Must do multi-level transformation */
2310+
Assert(parent_joinrel->reloptkind == RELOPT_OTHER_JOINREL);
2311+
child_expr = (Expr *)
2312+
adjust_appendrel_attrs_multilevel(root,
2313+
(Node *) cur_em->em_expr,
2314+
child_relids,
2315+
top_parent_relids);
2316+
}
2317+
2318+
/*
2319+
* Transform em_relids to match. Note we do *not* do
2320+
* pull_varnos(child_expr) here, as for example the
2321+
* transformation might have substituted a constant, but we
2322+
* don't want the child member to be marked as constant.
2323+
*/
2324+
new_relids = bms_difference(cur_em->em_relids,
2325+
top_parent_relids);
2326+
new_relids = bms_add_members(new_relids, child_relids);
2327+
2328+
/*
2329+
* For nullable_relids, we must selectively replace parent
2330+
* nullable relids with child ones.
2331+
*/
2332+
new_nullable_relids = cur_em->em_nullable_relids;
2333+
if (bms_overlap(new_nullable_relids, top_parent_relids))
2334+
new_nullable_relids =
2335+
adjust_child_relids_multilevel(root,
2336+
new_nullable_relids,
2337+
child_relids,
2338+
top_parent_relids);
22092339

22102340
(void) add_eq_member(cur_ec, child_expr,
22112341
new_relids, new_nullable_relids,

src/backend/optimizer/util/relnode.c

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -837,6 +837,7 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel,
837837
/* Compute information relevant to foreign relations. */
838838
set_foreign_rel_properties(joinrel, outer_rel, inner_rel);
839839

840+
/* Compute information needed for mapping Vars to the child rel */
840841
appinfos = find_appinfos_by_relids(root, joinrel->relids, &nappinfos);
841842

842843
/* Set up reltarget struct */
@@ -848,7 +849,6 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel,
848849
(Node *) parent_joinrel->joininfo,
849850
nappinfos,
850851
appinfos);
851-
pfree(appinfos);
852852

853853
/*
854854
* Lateral relids referred in child join will be same as that referred in
@@ -883,6 +883,19 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel,
883883
/* Add the relation to the PlannerInfo. */
884884
add_join_rel(root, joinrel);
885885

886+
/*
887+
* We might need EquivalenceClass members corresponding to the child join,
888+
* so that we can represent sort pathkeys for it. As with children of
889+
* baserels, we shouldn't need this unless there are relevant eclass joins
890+
* (implying that a merge join might be possible) or pathkeys to sort by.
891+
*/
892+
if (joinrel->has_eclass_joins || has_useful_pathkeys(root, parent_joinrel))
893+
add_child_join_rel_equivalences(root,
894+
nappinfos, appinfos,
895+
parent_joinrel, joinrel);
896+
897+
pfree(appinfos);
898+
886899
return joinrel;
887900
}
888901

src/include/optimizer/paths.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,11 @@ extern void add_child_rel_equivalences(PlannerInfo *root,
153153
AppendRelInfo *appinfo,
154154
RelOptInfo *parent_rel,
155155
RelOptInfo *child_rel);
156+
extern void add_child_join_rel_equivalences(PlannerInfo *root,
157+
int nappinfos,
158+
AppendRelInfo **appinfos,
159+
RelOptInfo *parent_rel,
160+
RelOptInfo *child_rel);
156161
extern List *generate_implied_equalities_for_column(PlannerInfo *root,
157162
RelOptInfo *rel,
158163
ec_matches_callback_type callback,

src/test/regress/expected/partition_join.out

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -459,6 +459,83 @@ SELECT t1.a, ss.t2a, ss.t2c FROM prt1 t1 LEFT JOIN LATERAL
459459
550 | |
460460
(12 rows)
461461

462+
-- bug with inadequate sort key representation
463+
SET enable_partitionwise_aggregate TO true;
464+
SET enable_hashjoin TO false;
465+
EXPLAIN (COSTS OFF)
466+
SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b)
467+
WHERE a BETWEEN 490 AND 510
468+
GROUP BY 1, 2 ORDER BY 1, 2;
469+
QUERY PLAN
470+
-------------------------------------------------------------------------------------------------------------------
471+
Group
472+
Group Key: (COALESCE(prt1_p1.a, p2.a)), (COALESCE(prt1_p1.b, p2.b))
473+
-> Merge Append
474+
Sort Key: (COALESCE(prt1_p1.a, p2.a)), (COALESCE(prt1_p1.b, p2.b))
475+
-> Group
476+
Group Key: (COALESCE(prt1_p1.a, p2.a)), (COALESCE(prt1_p1.b, p2.b))
477+
-> Sort
478+
Sort Key: (COALESCE(prt1_p1.a, p2.a)), (COALESCE(prt1_p1.b, p2.b))
479+
-> Merge Full Join
480+
Merge Cond: ((prt1_p1.a = p2.a) AND (prt1_p1.b = p2.b))
481+
Filter: ((COALESCE(prt1_p1.a, p2.a) >= 490) AND (COALESCE(prt1_p1.a, p2.a) <= 510))
482+
-> Sort
483+
Sort Key: prt1_p1.a, prt1_p1.b
484+
-> Seq Scan on prt1_p1
485+
-> Sort
486+
Sort Key: p2.a, p2.b
487+
-> Seq Scan on prt2_p1 p2
488+
-> Group
489+
Group Key: (COALESCE(prt1_p2.a, p2_1.a)), (COALESCE(prt1_p2.b, p2_1.b))
490+
-> Sort
491+
Sort Key: (COALESCE(prt1_p2.a, p2_1.a)), (COALESCE(prt1_p2.b, p2_1.b))
492+
-> Merge Full Join
493+
Merge Cond: ((prt1_p2.a = p2_1.a) AND (prt1_p2.b = p2_1.b))
494+
Filter: ((COALESCE(prt1_p2.a, p2_1.a) >= 490) AND (COALESCE(prt1_p2.a, p2_1.a) <= 510))
495+
-> Sort
496+
Sort Key: prt1_p2.a, prt1_p2.b
497+
-> Seq Scan on prt1_p2
498+
-> Sort
499+
Sort Key: p2_1.a, p2_1.b
500+
-> Seq Scan on prt2_p2 p2_1
501+
-> Group
502+
Group Key: (COALESCE(prt1_p3.a, p2_2.a)), (COALESCE(prt1_p3.b, p2_2.b))
503+
-> Sort
504+
Sort Key: (COALESCE(prt1_p3.a, p2_2.a)), (COALESCE(prt1_p3.b, p2_2.b))
505+
-> Merge Full Join
506+
Merge Cond: ((prt1_p3.a = p2_2.a) AND (prt1_p3.b = p2_2.b))
507+
Filter: ((COALESCE(prt1_p3.a, p2_2.a) >= 490) AND (COALESCE(prt1_p3.a, p2_2.a) <= 510))
508+
-> Sort
509+
Sort Key: prt1_p3.a, prt1_p3.b
510+
-> Seq Scan on prt1_p3
511+
-> Sort
512+
Sort Key: p2_2.a, p2_2.b
513+
-> Seq Scan on prt2_p3 p2_2
514+
(43 rows)
515+
516+
SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b)
517+
WHERE a BETWEEN 490 AND 510
518+
GROUP BY 1, 2 ORDER BY 1, 2;
519+
a | b
520+
-----+----
521+
490 | 15
522+
492 | 17
523+
494 | 19
524+
495 | 20
525+
496 | 21
526+
498 | 23
527+
500 | 0
528+
501 | 1
529+
502 | 2
530+
504 | 4
531+
506 | 6
532+
507 | 7
533+
508 | 8
534+
510 | 10
535+
(14 rows)
536+
537+
RESET enable_partitionwise_aggregate;
538+
RESET enable_hashjoin;
462539
--
463540
-- partitioned by expression
464541
--

src/test/regress/sql/partition_join.sql

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,21 @@ SELECT t1.a, ss.t2a, ss.t2c FROM prt1 t1 LEFT JOIN LATERAL
9191
(SELECT t2.a AS t2a, t3.a AS t3a, t2.b t2b, t2.c t2c, least(t1.a,t2.a,t3.a) FROM prt1 t2 JOIN prt2 t3 ON (t2.a = t3.b)) ss
9292
ON t1.c = ss.t2c WHERE (t1.b + coalesce(ss.t2b, 0)) = 0 ORDER BY t1.a;
9393

94+
-- bug with inadequate sort key representation
95+
SET enable_partitionwise_aggregate TO true;
96+
SET enable_hashjoin TO false;
97+
98+
EXPLAIN (COSTS OFF)
99+
SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b)
100+
WHERE a BETWEEN 490 AND 510
101+
GROUP BY 1, 2 ORDER BY 1, 2;
102+
SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b)
103+
WHERE a BETWEEN 490 AND 510
104+
GROUP BY 1, 2 ORDER BY 1, 2;
105+
106+
RESET enable_partitionwise_aggregate;
107+
RESET enable_hashjoin;
108+
94109
--
95110
-- partitioned by expression
96111
--

0 commit comments

Comments
 (0)