Skip to content

Commit f69b4b9

Browse files
committed
Fix some planner issues with degenerate outer join clauses.
An outer join clause that didn't actually reference the RHS (perhaps only after constant-folding) could confuse the join order enforcement logic, leading to wrong query results. Also, nested occurrences of such things could trigger an Assertion that on reflection seems incorrect. Per fuzz testing by Andreas Seltenreich. The practical use of such cases seems thin enough that it's not too surprising we've not heard field reports about it. This has been broken for a long time, so back-patch to all active branches.
1 parent dea1491 commit f69b4b9

File tree

4 files changed

+211
-12
lines changed

4 files changed

+211
-12
lines changed

src/backend/optimizer/path/joinrels.c

+17-9
Original file line numberDiff line numberDiff line change
@@ -467,20 +467,26 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
467467
}
468468
else
469469
{
470+
/*
471+
* Otherwise, the proposed join overlaps the RHS but isn't a valid
472+
* implementation of this SJ. It might still be a legal join,
473+
* however, if it does not overlap the LHS.
474+
*/
475+
if (bms_overlap(joinrelids, sjinfo->min_lefthand))
476+
return false;
477+
470478
/*----------
471-
* Otherwise, the proposed join overlaps the RHS but isn't
472-
* a valid implementation of this SJ. It might still be
473-
* a legal join, however. If both inputs overlap the RHS,
474-
* assume that it's OK. Since the inputs presumably got past
475-
* this function's checks previously, they can't overlap the
476-
* LHS and their violations of the RHS boundary must represent
477-
* SJs that have been determined to commute with this one.
479+
* If both inputs overlap the RHS, assume that it's OK. Since the
480+
* inputs presumably got past this function's checks previously,
481+
* their violations of the RHS boundary must represent SJs that
482+
* have been determined to commute with this one.
478483
* We have to allow this to work correctly in cases like
479484
* (a LEFT JOIN (b JOIN (c LEFT JOIN d)))
480485
* when the c/d join has been determined to commute with the join
481486
* to a, and hence d is not part of min_righthand for the upper
482487
* join. It should be legal to join b to c/d but this will appear
483488
* as a violation of the upper join's RHS.
489+
*
484490
* Furthermore, if one input overlaps the RHS and the other does
485491
* not, we should still allow the join if it is a valid
486492
* implementation of some other SJ. We have to allow this to
@@ -496,11 +502,13 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
496502
bms_overlap(rel1->relids, sjinfo->min_righthand) &&
497503
bms_overlap(rel2->relids, sjinfo->min_righthand))
498504
{
499-
/* seems OK */
500-
Assert(!bms_overlap(joinrelids, sjinfo->min_lefthand));
505+
/* both overlap; assume OK */
501506
}
502507
else
508+
{
509+
/* one overlaps, the other doesn't (or it's a semijoin) */
503510
is_valid_inner = false;
511+
}
504512
}
505513
}
506514

src/backend/optimizer/plan/initsplan.c

+15-3
Original file line numberDiff line numberDiff line change
@@ -1128,6 +1128,20 @@ make_outerjoininfo(PlannerInfo *root,
11281128
min_righthand = bms_int_members(bms_union(clause_relids, inner_join_rels),
11291129
right_rels);
11301130

1131+
/*
1132+
* If we have a degenerate join clause that doesn't mention any RHS rels,
1133+
* force the min RHS to be the syntactic RHS; otherwise we can end up
1134+
* making serious errors, like putting the LHS on the wrong side of an
1135+
* outer join. It seems to be safe to not do this when we have a
1136+
* contribution from inner_join_rels, though; that's enough to pin the SJ
1137+
* to occur at a reasonable place in the tree.
1138+
*/
1139+
if (bms_is_empty(min_righthand))
1140+
min_righthand = bms_copy(right_rels);
1141+
1142+
/*
1143+
* Now check previous outer joins for ordering restrictions.
1144+
*/
11311145
foreach(l, root->join_info_list)
11321146
{
11331147
SpecialJoinInfo *otherinfo = (SpecialJoinInfo *) lfirst(l);
@@ -1224,12 +1238,10 @@ make_outerjoininfo(PlannerInfo *root,
12241238
* If we found nothing to put in min_lefthand, punt and make it the full
12251239
* LHS, to avoid having an empty min_lefthand which will confuse later
12261240
* processing. (We don't try to be smart about such cases, just correct.)
1227-
* Likewise for min_righthand.
1241+
* We already forced min_righthand nonempty, so nothing to do for that.
12281242
*/
12291243
if (bms_is_empty(min_lefthand))
12301244
min_lefthand = bms_copy(left_rels);
1231-
if (bms_is_empty(min_righthand))
1232-
min_righthand = bms_copy(right_rels);
12331245

12341246
/* Now they'd better be nonempty */
12351247
Assert(!bms_is_empty(min_lefthand));

src/test/regress/expected/join.out

+129
Original file line numberDiff line numberDiff line change
@@ -3293,6 +3293,135 @@ using (join_key);
32933293
1 | |
32943294
(2 rows)
32953295

3296+
--
3297+
-- test successful handling of nested outer joins with degenerate join quals
3298+
--
3299+
explain (verbose, costs off)
3300+
select t1.* from
3301+
text_tbl t1
3302+
left join (select *, '***'::text as d1 from int8_tbl i8b1) b1
3303+
left join int8_tbl i8
3304+
left join (select *, null::int as d2 from int8_tbl i8b2) b2
3305+
on (i8.q1 = b2.q1)
3306+
on (b2.d2 = b1.q2)
3307+
on (t1.f1 = b1.d1)
3308+
left join int4_tbl i4
3309+
on (i8.q2 = i4.f1);
3310+
QUERY PLAN
3311+
----------------------------------------------------------------------
3312+
Hash Left Join
3313+
Output: t1.f1
3314+
Hash Cond: (i8.q2 = i4.f1)
3315+
-> Nested Loop Left Join
3316+
Output: t1.f1, i8.q2
3317+
Join Filter: (t1.f1 = '***'::text)
3318+
-> Seq Scan on public.text_tbl t1
3319+
Output: t1.f1
3320+
-> Materialize
3321+
Output: i8.q2
3322+
-> Hash Right Join
3323+
Output: i8.q2
3324+
Hash Cond: ((NULL::integer) = i8b1.q2)
3325+
-> Hash Left Join
3326+
Output: i8.q2, (NULL::integer)
3327+
Hash Cond: (i8.q1 = i8b2.q1)
3328+
-> Seq Scan on public.int8_tbl i8
3329+
Output: i8.q1, i8.q2
3330+
-> Hash
3331+
Output: i8b2.q1, (NULL::integer)
3332+
-> Seq Scan on public.int8_tbl i8b2
3333+
Output: i8b2.q1, NULL::integer
3334+
-> Hash
3335+
Output: i8b1.q2
3336+
-> Seq Scan on public.int8_tbl i8b1
3337+
Output: i8b1.q2
3338+
-> Hash
3339+
Output: i4.f1
3340+
-> Seq Scan on public.int4_tbl i4
3341+
Output: i4.f1
3342+
(30 rows)
3343+
3344+
select t1.* from
3345+
text_tbl t1
3346+
left join (select *, '***'::text as d1 from int8_tbl i8b1) b1
3347+
left join int8_tbl i8
3348+
left join (select *, null::int as d2 from int8_tbl i8b2) b2
3349+
on (i8.q1 = b2.q1)
3350+
on (b2.d2 = b1.q2)
3351+
on (t1.f1 = b1.d1)
3352+
left join int4_tbl i4
3353+
on (i8.q2 = i4.f1);
3354+
f1
3355+
-------------------
3356+
doh!
3357+
hi de ho neighbor
3358+
(2 rows)
3359+
3360+
explain (verbose, costs off)
3361+
select t1.* from
3362+
text_tbl t1
3363+
left join (select *, '***'::text as d1 from int8_tbl i8b1) b1
3364+
left join int8_tbl i8
3365+
left join (select *, null::int as d2 from int8_tbl i8b2, int4_tbl i4b2) b2
3366+
on (i8.q1 = b2.q1)
3367+
on (b2.d2 = b1.q2)
3368+
on (t1.f1 = b1.d1)
3369+
left join int4_tbl i4
3370+
on (i8.q2 = i4.f1);
3371+
QUERY PLAN
3372+
----------------------------------------------------------------------------
3373+
Hash Left Join
3374+
Output: t1.f1
3375+
Hash Cond: (i8.q2 = i4.f1)
3376+
-> Nested Loop Left Join
3377+
Output: i8.q2, t1.f1
3378+
Join Filter: (t1.f1 = '***'::text)
3379+
-> Seq Scan on public.text_tbl t1
3380+
Output: t1.f1
3381+
-> Materialize
3382+
Output: i8.q2
3383+
-> Hash Right Join
3384+
Output: i8.q2
3385+
Hash Cond: ((NULL::integer) = i8b1.q2)
3386+
-> Hash Right Join
3387+
Output: i8.q2, (NULL::integer)
3388+
Hash Cond: (i8b2.q1 = i8.q1)
3389+
-> Nested Loop
3390+
Output: i8b2.q1, NULL::integer
3391+
-> Seq Scan on public.int8_tbl i8b2
3392+
Output: i8b2.q1, i8b2.q2
3393+
-> Materialize
3394+
-> Seq Scan on public.int4_tbl i4b2
3395+
-> Hash
3396+
Output: i8.q1, i8.q2
3397+
-> Seq Scan on public.int8_tbl i8
3398+
Output: i8.q1, i8.q2
3399+
-> Hash
3400+
Output: i8b1.q2
3401+
-> Seq Scan on public.int8_tbl i8b1
3402+
Output: i8b1.q2
3403+
-> Hash
3404+
Output: i4.f1
3405+
-> Seq Scan on public.int4_tbl i4
3406+
Output: i4.f1
3407+
(34 rows)
3408+
3409+
select t1.* from
3410+
text_tbl t1
3411+
left join (select *, '***'::text as d1 from int8_tbl i8b1) b1
3412+
left join int8_tbl i8
3413+
left join (select *, null::int as d2 from int8_tbl i8b2, int4_tbl i4b2) b2
3414+
on (i8.q1 = b2.q1)
3415+
on (b2.d2 = b1.q2)
3416+
on (t1.f1 = b1.d1)
3417+
left join int4_tbl i4
3418+
on (i8.q2 = i4.f1);
3419+
f1
3420+
-------------------
3421+
doh!
3422+
hi de ho neighbor
3423+
(2 rows)
3424+
32963425
--
32973426
-- test ability to push constants through outer join clauses
32983427
--

src/test/regress/sql/join.sql

+50
Original file line numberDiff line numberDiff line change
@@ -984,6 +984,56 @@ left join
984984
) foo3
985985
using (join_key);
986986

987+
--
988+
-- test successful handling of nested outer joins with degenerate join quals
989+
--
990+
991+
explain (verbose, costs off)
992+
select t1.* from
993+
text_tbl t1
994+
left join (select *, '***'::text as d1 from int8_tbl i8b1) b1
995+
left join int8_tbl i8
996+
left join (select *, null::int as d2 from int8_tbl i8b2) b2
997+
on (i8.q1 = b2.q1)
998+
on (b2.d2 = b1.q2)
999+
on (t1.f1 = b1.d1)
1000+
left join int4_tbl i4
1001+
on (i8.q2 = i4.f1);
1002+
1003+
select t1.* from
1004+
text_tbl t1
1005+
left join (select *, '***'::text as d1 from int8_tbl i8b1) b1
1006+
left join int8_tbl i8
1007+
left join (select *, null::int as d2 from int8_tbl i8b2) b2
1008+
on (i8.q1 = b2.q1)
1009+
on (b2.d2 = b1.q2)
1010+
on (t1.f1 = b1.d1)
1011+
left join int4_tbl i4
1012+
on (i8.q2 = i4.f1);
1013+
1014+
explain (verbose, costs off)
1015+
select t1.* from
1016+
text_tbl t1
1017+
left join (select *, '***'::text as d1 from int8_tbl i8b1) b1
1018+
left join int8_tbl i8
1019+
left join (select *, null::int as d2 from int8_tbl i8b2, int4_tbl i4b2) b2
1020+
on (i8.q1 = b2.q1)
1021+
on (b2.d2 = b1.q2)
1022+
on (t1.f1 = b1.d1)
1023+
left join int4_tbl i4
1024+
on (i8.q2 = i4.f1);
1025+
1026+
select t1.* from
1027+
text_tbl t1
1028+
left join (select *, '***'::text as d1 from int8_tbl i8b1) b1
1029+
left join int8_tbl i8
1030+
left join (select *, null::int as d2 from int8_tbl i8b2, int4_tbl i4b2) b2
1031+
on (i8.q1 = b2.q1)
1032+
on (b2.d2 = b1.q2)
1033+
on (t1.f1 = b1.d1)
1034+
left join int4_tbl i4
1035+
on (i8.q2 = i4.f1);
1036+
9871037
--
9881038
-- test ability to push constants through outer join clauses
9891039
--

0 commit comments

Comments
 (0)