Skip to content

Commit 1044541

Browse files
committed
Fix some planner issues with degenerate outer join clauses.
An outer join clause that didn't actually reference the RHS (perhaps only after constant-folding) could confuse the join order enforcement logic, leading to wrong query results. Also, nested occurrences of such things could trigger an Assertion that on reflection seems incorrect. Per fuzz testing by Andreas Seltenreich. The practical use of such cases seems thin enough that it's not too surprising we've not heard field reports about it. This has been broken for a long time, so back-patch to all active branches.
1 parent a4df781 commit 1044541

File tree

4 files changed

+211
-12
lines changed

4 files changed

+211
-12
lines changed

src/backend/optimizer/path/joinrels.c

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -467,20 +467,26 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
467467
}
468468
else
469469
{
470+
/*
471+
* Otherwise, the proposed join overlaps the RHS but isn't a valid
472+
* implementation of this SJ. It might still be a legal join,
473+
* however, if it does not overlap the LHS.
474+
*/
475+
if (bms_overlap(joinrelids, sjinfo->min_lefthand))
476+
return false;
477+
470478
/*----------
471-
* Otherwise, the proposed join overlaps the RHS but isn't
472-
* a valid implementation of this SJ. It might still be
473-
* a legal join, however. If both inputs overlap the RHS,
474-
* assume that it's OK. Since the inputs presumably got past
475-
* this function's checks previously, they can't overlap the
476-
* LHS and their violations of the RHS boundary must represent
477-
* SJs that have been determined to commute with this one.
479+
* If both inputs overlap the RHS, assume that it's OK. Since the
480+
* inputs presumably got past this function's checks previously,
481+
* their violations of the RHS boundary must represent SJs that
482+
* have been determined to commute with this one.
478483
* We have to allow this to work correctly in cases like
479484
* (a LEFT JOIN (b JOIN (c LEFT JOIN d)))
480485
* when the c/d join has been determined to commute with the join
481486
* to a, and hence d is not part of min_righthand for the upper
482487
* join. It should be legal to join b to c/d but this will appear
483488
* as a violation of the upper join's RHS.
489+
*
484490
* Furthermore, if one input overlaps the RHS and the other does
485491
* not, we should still allow the join if it is a valid
486492
* implementation of some other SJ. We have to allow this to
@@ -496,11 +502,13 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
496502
bms_overlap(rel1->relids, sjinfo->min_righthand) &&
497503
bms_overlap(rel2->relids, sjinfo->min_righthand))
498504
{
499-
/* seems OK */
500-
Assert(!bms_overlap(joinrelids, sjinfo->min_lefthand));
505+
/* both overlap; assume OK */
501506
}
502507
else
508+
{
509+
/* one overlaps, the other doesn't (or it's a semijoin) */
503510
is_valid_inner = false;
511+
}
504512
}
505513
}
506514

src/backend/optimizer/plan/initsplan.c

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1123,6 +1123,20 @@ make_outerjoininfo(PlannerInfo *root,
11231123
min_righthand = bms_int_members(bms_union(clause_relids, inner_join_rels),
11241124
right_rels);
11251125

1126+
/*
1127+
* If we have a degenerate join clause that doesn't mention any RHS rels,
1128+
* force the min RHS to be the syntactic RHS; otherwise we can end up
1129+
* making serious errors, like putting the LHS on the wrong side of an
1130+
* outer join. It seems to be safe to not do this when we have a
1131+
* contribution from inner_join_rels, though; that's enough to pin the SJ
1132+
* to occur at a reasonable place in the tree.
1133+
*/
1134+
if (bms_is_empty(min_righthand))
1135+
min_righthand = bms_copy(right_rels);
1136+
1137+
/*
1138+
* Now check previous outer joins for ordering restrictions.
1139+
*/
11261140
foreach(l, root->join_info_list)
11271141
{
11281142
SpecialJoinInfo *otherinfo = (SpecialJoinInfo *) lfirst(l);
@@ -1219,12 +1233,10 @@ make_outerjoininfo(PlannerInfo *root,
12191233
* If we found nothing to put in min_lefthand, punt and make it the full
12201234
* LHS, to avoid having an empty min_lefthand which will confuse later
12211235
* processing. (We don't try to be smart about such cases, just correct.)
1222-
* Likewise for min_righthand.
1236+
* We already forced min_righthand nonempty, so nothing to do for that.
12231237
*/
12241238
if (bms_is_empty(min_lefthand))
12251239
min_lefthand = bms_copy(left_rels);
1226-
if (bms_is_empty(min_righthand))
1227-
min_righthand = bms_copy(right_rels);
12281240

12291241
/* Now they'd better be nonempty */
12301242
Assert(!bms_is_empty(min_lefthand));

src/test/regress/expected/join.out

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3179,6 +3179,135 @@ using (join_key);
31793179
1 | |
31803180
(2 rows)
31813181

3182+
--
3183+
-- test successful handling of nested outer joins with degenerate join quals
3184+
--
3185+
explain (verbose, costs off)
3186+
select t1.* from
3187+
text_tbl t1
3188+
left join (select *, '***'::text as d1 from int8_tbl i8b1) b1
3189+
left join int8_tbl i8
3190+
left join (select *, null::int as d2 from int8_tbl i8b2) b2
3191+
on (i8.q1 = b2.q1)
3192+
on (b2.d2 = b1.q2)
3193+
on (t1.f1 = b1.d1)
3194+
left join int4_tbl i4
3195+
on (i8.q2 = i4.f1);
3196+
QUERY PLAN
3197+
----------------------------------------------------------------------
3198+
Hash Left Join
3199+
Output: t1.f1
3200+
Hash Cond: (i8.q2 = i4.f1)
3201+
-> Nested Loop Left Join
3202+
Output: t1.f1, i8.q2
3203+
Join Filter: (t1.f1 = '***'::text)
3204+
-> Seq Scan on public.text_tbl t1
3205+
Output: t1.f1
3206+
-> Materialize
3207+
Output: i8.q2
3208+
-> Hash Right Join
3209+
Output: i8.q2
3210+
Hash Cond: ((NULL::integer) = i8b1.q2)
3211+
-> Hash Left Join
3212+
Output: i8.q2, (NULL::integer)
3213+
Hash Cond: (i8.q1 = i8b2.q1)
3214+
-> Seq Scan on public.int8_tbl i8
3215+
Output: i8.q1, i8.q2
3216+
-> Hash
3217+
Output: i8b2.q1, (NULL::integer)
3218+
-> Seq Scan on public.int8_tbl i8b2
3219+
Output: i8b2.q1, NULL::integer
3220+
-> Hash
3221+
Output: i8b1.q2
3222+
-> Seq Scan on public.int8_tbl i8b1
3223+
Output: i8b1.q2
3224+
-> Hash
3225+
Output: i4.f1
3226+
-> Seq Scan on public.int4_tbl i4
3227+
Output: i4.f1
3228+
(30 rows)
3229+
3230+
select t1.* from
3231+
text_tbl t1
3232+
left join (select *, '***'::text as d1 from int8_tbl i8b1) b1
3233+
left join int8_tbl i8
3234+
left join (select *, null::int as d2 from int8_tbl i8b2) b2
3235+
on (i8.q1 = b2.q1)
3236+
on (b2.d2 = b1.q2)
3237+
on (t1.f1 = b1.d1)
3238+
left join int4_tbl i4
3239+
on (i8.q2 = i4.f1);
3240+
f1
3241+
-------------------
3242+
doh!
3243+
hi de ho neighbor
3244+
(2 rows)
3245+
3246+
explain (verbose, costs off)
3247+
select t1.* from
3248+
text_tbl t1
3249+
left join (select *, '***'::text as d1 from int8_tbl i8b1) b1
3250+
left join int8_tbl i8
3251+
left join (select *, null::int as d2 from int8_tbl i8b2, int4_tbl i4b2) b2
3252+
on (i8.q1 = b2.q1)
3253+
on (b2.d2 = b1.q2)
3254+
on (t1.f1 = b1.d1)
3255+
left join int4_tbl i4
3256+
on (i8.q2 = i4.f1);
3257+
QUERY PLAN
3258+
----------------------------------------------------------------------------
3259+
Hash Left Join
3260+
Output: t1.f1
3261+
Hash Cond: (i8.q2 = i4.f1)
3262+
-> Nested Loop Left Join
3263+
Output: i8.q2, t1.f1
3264+
Join Filter: (t1.f1 = '***'::text)
3265+
-> Seq Scan on public.text_tbl t1
3266+
Output: t1.f1
3267+
-> Materialize
3268+
Output: i8.q2
3269+
-> Hash Right Join
3270+
Output: i8.q2
3271+
Hash Cond: ((NULL::integer) = i8b1.q2)
3272+
-> Hash Right Join
3273+
Output: i8.q2, (NULL::integer)
3274+
Hash Cond: (i8b2.q1 = i8.q1)
3275+
-> Nested Loop
3276+
Output: i8b2.q1, NULL::integer
3277+
-> Seq Scan on public.int8_tbl i8b2
3278+
Output: i8b2.q1, i8b2.q2
3279+
-> Materialize
3280+
-> Seq Scan on public.int4_tbl i4b2
3281+
-> Hash
3282+
Output: i8.q1, i8.q2
3283+
-> Seq Scan on public.int8_tbl i8
3284+
Output: i8.q1, i8.q2
3285+
-> Hash
3286+
Output: i8b1.q2
3287+
-> Seq Scan on public.int8_tbl i8b1
3288+
Output: i8b1.q2
3289+
-> Hash
3290+
Output: i4.f1
3291+
-> Seq Scan on public.int4_tbl i4
3292+
Output: i4.f1
3293+
(34 rows)
3294+
3295+
select t1.* from
3296+
text_tbl t1
3297+
left join (select *, '***'::text as d1 from int8_tbl i8b1) b1
3298+
left join int8_tbl i8
3299+
left join (select *, null::int as d2 from int8_tbl i8b2, int4_tbl i4b2) b2
3300+
on (i8.q1 = b2.q1)
3301+
on (b2.d2 = b1.q2)
3302+
on (t1.f1 = b1.d1)
3303+
left join int4_tbl i4
3304+
on (i8.q2 = i4.f1);
3305+
f1
3306+
-------------------
3307+
doh!
3308+
hi de ho neighbor
3309+
(2 rows)
3310+
31823311
--
31833312
-- test ability to push constants through outer join clauses
31843313
--

src/test/regress/sql/join.sql

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -952,6 +952,56 @@ left join
952952
) foo3
953953
using (join_key);
954954

955+
--
956+
-- test successful handling of nested outer joins with degenerate join quals
957+
--
958+
959+
explain (verbose, costs off)
960+
select t1.* from
961+
text_tbl t1
962+
left join (select *, '***'::text as d1 from int8_tbl i8b1) b1
963+
left join int8_tbl i8
964+
left join (select *, null::int as d2 from int8_tbl i8b2) b2
965+
on (i8.q1 = b2.q1)
966+
on (b2.d2 = b1.q2)
967+
on (t1.f1 = b1.d1)
968+
left join int4_tbl i4
969+
on (i8.q2 = i4.f1);
970+
971+
select t1.* from
972+
text_tbl t1
973+
left join (select *, '***'::text as d1 from int8_tbl i8b1) b1
974+
left join int8_tbl i8
975+
left join (select *, null::int as d2 from int8_tbl i8b2) b2
976+
on (i8.q1 = b2.q1)
977+
on (b2.d2 = b1.q2)
978+
on (t1.f1 = b1.d1)
979+
left join int4_tbl i4
980+
on (i8.q2 = i4.f1);
981+
982+
explain (verbose, costs off)
983+
select t1.* from
984+
text_tbl t1
985+
left join (select *, '***'::text as d1 from int8_tbl i8b1) b1
986+
left join int8_tbl i8
987+
left join (select *, null::int as d2 from int8_tbl i8b2, int4_tbl i4b2) b2
988+
on (i8.q1 = b2.q1)
989+
on (b2.d2 = b1.q2)
990+
on (t1.f1 = b1.d1)
991+
left join int4_tbl i4
992+
on (i8.q2 = i4.f1);
993+
994+
select t1.* from
995+
text_tbl t1
996+
left join (select *, '***'::text as d1 from int8_tbl i8b1) b1
997+
left join int8_tbl i8
998+
left join (select *, null::int as d2 from int8_tbl i8b2, int4_tbl i4b2) b2
999+
on (i8.q1 = b2.q1)
1000+
on (b2.d2 = b1.q2)
1001+
on (t1.f1 = b1.d1)
1002+
left join int4_tbl i4
1003+
on (i8.q2 = i4.f1);
1004+
9551005
--
9561006
-- test ability to push constants through outer join clauses
9571007
--

0 commit comments

Comments
 (0)