Skip to content

Commit e39a3b2

Browse files
committed
Fix some planner issues with degenerate outer join clauses.
An outer join clause that didn't actually reference the RHS (perhaps only after constant-folding) could confuse the join order enforcement logic, leading to wrong query results. Also, nested occurrences of such things could trigger an Assertion that on reflection seems incorrect. Per fuzz testing by Andreas Seltenreich. The practical use of such cases seems thin enough that it's not too surprising we've not heard field reports about it. This has been broken for a long time, so back-patch to all active branches.
1 parent 216977a commit e39a3b2

File tree

4 files changed

+211
-12
lines changed

4 files changed

+211
-12
lines changed

src/backend/optimizer/path/joinrels.c

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -467,20 +467,26 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
467467
}
468468
else
469469
{
470+
/*
471+
* Otherwise, the proposed join overlaps the RHS but isn't a valid
472+
* implementation of this SJ. It might still be a legal join,
473+
* however, if it does not overlap the LHS.
474+
*/
475+
if (bms_overlap(joinrelids, sjinfo->min_lefthand))
476+
return false;
477+
470478
/*----------
471-
* Otherwise, the proposed join overlaps the RHS but isn't
472-
* a valid implementation of this SJ. It might still be
473-
* a legal join, however. If both inputs overlap the RHS,
474-
* assume that it's OK. Since the inputs presumably got past
475-
* this function's checks previously, they can't overlap the
476-
* LHS and their violations of the RHS boundary must represent
477-
* SJs that have been determined to commute with this one.
479+
* If both inputs overlap the RHS, assume that it's OK. Since the
480+
* inputs presumably got past this function's checks previously,
481+
* their violations of the RHS boundary must represent SJs that
482+
* have been determined to commute with this one.
478483
* We have to allow this to work correctly in cases like
479484
* (a LEFT JOIN (b JOIN (c LEFT JOIN d)))
480485
* when the c/d join has been determined to commute with the join
481486
* to a, and hence d is not part of min_righthand for the upper
482487
* join. It should be legal to join b to c/d but this will appear
483488
* as a violation of the upper join's RHS.
489+
*
484490
* Furthermore, if one input overlaps the RHS and the other does
485491
* not, we should still allow the join if it is a valid
486492
* implementation of some other SJ. We have to allow this to
@@ -496,11 +502,13 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
496502
bms_overlap(rel1->relids, sjinfo->min_righthand) &&
497503
bms_overlap(rel2->relids, sjinfo->min_righthand))
498504
{
499-
/* seems OK */
500-
Assert(!bms_overlap(joinrelids, sjinfo->min_lefthand));
505+
/* both overlap; assume OK */
501506
}
502507
else
508+
{
509+
/* one overlaps, the other doesn't (or it's a semijoin) */
503510
is_valid_inner = false;
511+
}
504512
}
505513
}
506514

src/backend/optimizer/plan/initsplan.c

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1123,6 +1123,20 @@ make_outerjoininfo(PlannerInfo *root,
11231123
min_righthand = bms_int_members(bms_union(clause_relids, inner_join_rels),
11241124
right_rels);
11251125

1126+
/*
1127+
* If we have a degenerate join clause that doesn't mention any RHS rels,
1128+
* force the min RHS to be the syntactic RHS; otherwise we can end up
1129+
* making serious errors, like putting the LHS on the wrong side of an
1130+
* outer join. It seems to be safe to not do this when we have a
1131+
* contribution from inner_join_rels, though; that's enough to pin the SJ
1132+
* to occur at a reasonable place in the tree.
1133+
*/
1134+
if (bms_is_empty(min_righthand))
1135+
min_righthand = bms_copy(right_rels);
1136+
1137+
/*
1138+
* Now check previous outer joins for ordering restrictions.
1139+
*/
11261140
foreach(l, root->join_info_list)
11271141
{
11281142
SpecialJoinInfo *otherinfo = (SpecialJoinInfo *) lfirst(l);
@@ -1219,12 +1233,10 @@ make_outerjoininfo(PlannerInfo *root,
12191233
* If we found nothing to put in min_lefthand, punt and make it the full
12201234
* LHS, to avoid having an empty min_lefthand which will confuse later
12211235
* processing. (We don't try to be smart about such cases, just correct.)
1222-
* Likewise for min_righthand.
1236+
* We already forced min_righthand nonempty, so nothing to do for that.
12231237
*/
12241238
if (bms_is_empty(min_lefthand))
12251239
min_lefthand = bms_copy(left_rels);
1226-
if (bms_is_empty(min_righthand))
1227-
min_righthand = bms_copy(right_rels);
12281240

12291241
/* Now they'd better be nonempty */
12301242
Assert(!bms_is_empty(min_lefthand));

src/test/regress/expected/join.out

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3253,6 +3253,135 @@ using (join_key);
32533253
1 | |
32543254
(2 rows)
32553255

3256+
--
3257+
-- test successful handling of nested outer joins with degenerate join quals
3258+
--
3259+
explain (verbose, costs off)
3260+
select t1.* from
3261+
text_tbl t1
3262+
left join (select *, '***'::text as d1 from int8_tbl i8b1) b1
3263+
left join int8_tbl i8
3264+
left join (select *, null::int as d2 from int8_tbl i8b2) b2
3265+
on (i8.q1 = b2.q1)
3266+
on (b2.d2 = b1.q2)
3267+
on (t1.f1 = b1.d1)
3268+
left join int4_tbl i4
3269+
on (i8.q2 = i4.f1);
3270+
QUERY PLAN
3271+
----------------------------------------------------------------------
3272+
Hash Left Join
3273+
Output: t1.f1
3274+
Hash Cond: (i8.q2 = i4.f1)
3275+
-> Nested Loop Left Join
3276+
Output: t1.f1, i8.q2
3277+
Join Filter: (t1.f1 = '***'::text)
3278+
-> Seq Scan on public.text_tbl t1
3279+
Output: t1.f1
3280+
-> Materialize
3281+
Output: i8.q2
3282+
-> Hash Right Join
3283+
Output: i8.q2
3284+
Hash Cond: ((NULL::integer) = i8b1.q2)
3285+
-> Hash Left Join
3286+
Output: i8.q2, (NULL::integer)
3287+
Hash Cond: (i8.q1 = i8b2.q1)
3288+
-> Seq Scan on public.int8_tbl i8
3289+
Output: i8.q1, i8.q2
3290+
-> Hash
3291+
Output: i8b2.q1, (NULL::integer)
3292+
-> Seq Scan on public.int8_tbl i8b2
3293+
Output: i8b2.q1, NULL::integer
3294+
-> Hash
3295+
Output: i8b1.q2
3296+
-> Seq Scan on public.int8_tbl i8b1
3297+
Output: i8b1.q2
3298+
-> Hash
3299+
Output: i4.f1
3300+
-> Seq Scan on public.int4_tbl i4
3301+
Output: i4.f1
3302+
(30 rows)
3303+
3304+
select t1.* from
3305+
text_tbl t1
3306+
left join (select *, '***'::text as d1 from int8_tbl i8b1) b1
3307+
left join int8_tbl i8
3308+
left join (select *, null::int as d2 from int8_tbl i8b2) b2
3309+
on (i8.q1 = b2.q1)
3310+
on (b2.d2 = b1.q2)
3311+
on (t1.f1 = b1.d1)
3312+
left join int4_tbl i4
3313+
on (i8.q2 = i4.f1);
3314+
f1
3315+
-------------------
3316+
doh!
3317+
hi de ho neighbor
3318+
(2 rows)
3319+
3320+
explain (verbose, costs off)
3321+
select t1.* from
3322+
text_tbl t1
3323+
left join (select *, '***'::text as d1 from int8_tbl i8b1) b1
3324+
left join int8_tbl i8
3325+
left join (select *, null::int as d2 from int8_tbl i8b2, int4_tbl i4b2) b2
3326+
on (i8.q1 = b2.q1)
3327+
on (b2.d2 = b1.q2)
3328+
on (t1.f1 = b1.d1)
3329+
left join int4_tbl i4
3330+
on (i8.q2 = i4.f1);
3331+
QUERY PLAN
3332+
----------------------------------------------------------------------------
3333+
Hash Left Join
3334+
Output: t1.f1
3335+
Hash Cond: (i8.q2 = i4.f1)
3336+
-> Nested Loop Left Join
3337+
Output: i8.q2, t1.f1
3338+
Join Filter: (t1.f1 = '***'::text)
3339+
-> Seq Scan on public.text_tbl t1
3340+
Output: t1.f1
3341+
-> Materialize
3342+
Output: i8.q2
3343+
-> Hash Right Join
3344+
Output: i8.q2
3345+
Hash Cond: ((NULL::integer) = i8b1.q2)
3346+
-> Hash Right Join
3347+
Output: i8.q2, (NULL::integer)
3348+
Hash Cond: (i8b2.q1 = i8.q1)
3349+
-> Nested Loop
3350+
Output: i8b2.q1, NULL::integer
3351+
-> Seq Scan on public.int8_tbl i8b2
3352+
Output: i8b2.q1, i8b2.q2
3353+
-> Materialize
3354+
-> Seq Scan on public.int4_tbl i4b2
3355+
-> Hash
3356+
Output: i8.q1, i8.q2
3357+
-> Seq Scan on public.int8_tbl i8
3358+
Output: i8.q1, i8.q2
3359+
-> Hash
3360+
Output: i8b1.q2
3361+
-> Seq Scan on public.int8_tbl i8b1
3362+
Output: i8b1.q2
3363+
-> Hash
3364+
Output: i4.f1
3365+
-> Seq Scan on public.int4_tbl i4
3366+
Output: i4.f1
3367+
(34 rows)
3368+
3369+
select t1.* from
3370+
text_tbl t1
3371+
left join (select *, '***'::text as d1 from int8_tbl i8b1) b1
3372+
left join int8_tbl i8
3373+
left join (select *, null::int as d2 from int8_tbl i8b2, int4_tbl i4b2) b2
3374+
on (i8.q1 = b2.q1)
3375+
on (b2.d2 = b1.q2)
3376+
on (t1.f1 = b1.d1)
3377+
left join int4_tbl i4
3378+
on (i8.q2 = i4.f1);
3379+
f1
3380+
-------------------
3381+
doh!
3382+
hi de ho neighbor
3383+
(2 rows)
3384+
32563385
--
32573386
-- test ability to push constants through outer join clauses
32583387
--

src/test/regress/sql/join.sql

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -968,6 +968,56 @@ left join
968968
) foo3
969969
using (join_key);
970970

971+
--
972+
-- test successful handling of nested outer joins with degenerate join quals
973+
--
974+
975+
explain (verbose, costs off)
976+
select t1.* from
977+
text_tbl t1
978+
left join (select *, '***'::text as d1 from int8_tbl i8b1) b1
979+
left join int8_tbl i8
980+
left join (select *, null::int as d2 from int8_tbl i8b2) b2
981+
on (i8.q1 = b2.q1)
982+
on (b2.d2 = b1.q2)
983+
on (t1.f1 = b1.d1)
984+
left join int4_tbl i4
985+
on (i8.q2 = i4.f1);
986+
987+
select t1.* from
988+
text_tbl t1
989+
left join (select *, '***'::text as d1 from int8_tbl i8b1) b1
990+
left join int8_tbl i8
991+
left join (select *, null::int as d2 from int8_tbl i8b2) b2
992+
on (i8.q1 = b2.q1)
993+
on (b2.d2 = b1.q2)
994+
on (t1.f1 = b1.d1)
995+
left join int4_tbl i4
996+
on (i8.q2 = i4.f1);
997+
998+
explain (verbose, costs off)
999+
select t1.* from
1000+
text_tbl t1
1001+
left join (select *, '***'::text as d1 from int8_tbl i8b1) b1
1002+
left join int8_tbl i8
1003+
left join (select *, null::int as d2 from int8_tbl i8b2, int4_tbl i4b2) b2
1004+
on (i8.q1 = b2.q1)
1005+
on (b2.d2 = b1.q2)
1006+
on (t1.f1 = b1.d1)
1007+
left join int4_tbl i4
1008+
on (i8.q2 = i4.f1);
1009+
1010+
select t1.* from
1011+
text_tbl t1
1012+
left join (select *, '***'::text as d1 from int8_tbl i8b1) b1
1013+
left join int8_tbl i8
1014+
left join (select *, null::int as d2 from int8_tbl i8b2, int4_tbl i4b2) b2
1015+
on (i8.q1 = b2.q1)
1016+
on (b2.d2 = b1.q2)
1017+
on (t1.f1 = b1.d1)
1018+
left join int4_tbl i4
1019+
on (i8.q2 = i4.f1);
1020+
9711021
--
9721022
-- test ability to push constants through outer join clauses
9731023
--

0 commit comments

Comments
 (0)