Skip to content

Commit 95f4e59

Browse files
committed
Remove an unsafe Assert, and explain join_clause_is_movable_into() better.
join_clause_is_movable_into() is approximate, in the sense that it might sometimes return "false" when actually it would be valid to push the given join clause down to the specified level. This is okay ... but there was an Assert in get_joinrel_parampathinfo() that's only safe if the answers are always exact. Comment out the Assert, and add a bunch of commentary to clarify what's going on. Per fuzz testing by Andreas Seltenreich. The added regression test is a pretty silly query, but it's based on his crasher example. Back-patch to 9.2 where the faulty logic was introduced.
1 parent b2ed8ed commit 95f4e59

File tree

4 files changed

+115
-7
lines changed

4 files changed

+115
-7
lines changed

src/backend/optimizer/util/relnode.c

+9
Original file line numberDiff line numberDiff line change
@@ -982,9 +982,18 @@ get_joinrel_parampathinfo(PlannerInfo *root, RelOptInfo *joinrel,
982982
{
983983
RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
984984

985+
/*
986+
* In principle, join_clause_is_movable_into() should accept anything
987+
* returned by generate_join_implied_equalities(); but because its
988+
* analysis is only approximate, sometimes it doesn't. So we
989+
* currently cannot use this Assert; instead just assume it's okay to
990+
* apply the joinclause at this level.
991+
*/
992+
#ifdef NOT_USED
985993
Assert(join_clause_is_movable_into(rinfo,
986994
joinrel->relids,
987995
join_and_req));
996+
#endif
988997
if (!join_clause_is_movable_into(rinfo,
989998
outer_path->parent->relids,
990999
outer_and_req) &&

src/backend/optimizer/util/restrictinfo.c

+32-7
Original file line numberDiff line numberDiff line change
@@ -464,10 +464,9 @@ extract_actual_join_clauses(List *restrictinfo_list,
464464
* outer join, as that would change the results (rows would be suppressed
465465
* rather than being null-extended).
466466
*
467-
* Also the target relation must not be in the clause's nullable_relids, i.e.,
468-
* there must not be an outer join below the clause that would null the Vars
469-
* coming from the target relation. Otherwise the clause might give results
470-
* different from what it would give at its normal semantic level.
467+
* Also there must not be an outer join below the clause that would null the
468+
* Vars coming from the target relation. Otherwise the clause might give
469+
* results different from what it would give at its normal semantic level.
471470
*
472471
* Also, the join clause must not use any relations that have LATERAL
473472
* references to the target relation, since we could not put such rels on
@@ -516,10 +515,31 @@ join_clause_is_movable_to(RestrictInfo *rinfo, RelOptInfo *baserel)
516515
* not pushing the clause into its outer-join outer side, nor down into
517516
* a lower outer join's inner side.
518517
*
518+
* The check about pushing a clause down into a lower outer join's inner side
519+
* is only approximate; it sometimes returns "false" when actually it would
520+
* be safe to use the clause here because we're still above the outer join
521+
* in question. This is okay as long as the answers at different join levels
522+
* are consistent: it just means we might sometimes fail to push a clause as
523+
* far down as it could safely be pushed. It's unclear whether it would be
524+
* worthwhile to do this more precisely. (But if it's ever fixed to be
525+
* exactly accurate, there's an Assert in get_joinrel_parampathinfo() that
526+
* should be re-enabled.)
527+
*
519528
* There's no check here equivalent to join_clause_is_movable_to's test on
520529
* lateral_referencers. We assume the caller wouldn't be inquiring unless
521530
* it'd verified that the proposed outer rels don't have lateral references
522-
* to the current rel(s).
531+
* to the current rel(s). (If we are considering join paths with the outer
532+
* rels on the outside and the current rels on the inside, then this should
533+
* have been checked at the outset of such consideration; see join_is_legal
534+
* and the path parameterization checks in joinpath.c.) On the other hand,
535+
* in join_clause_is_movable_to we are asking whether the clause could be
536+
* moved for some valid set of outer rels, so we don't have the benefit of
537+
* relying on prior checks for lateral-reference validity.
538+
*
539+
* Note: if this returns true, it means that the clause could be moved to
540+
* this join relation, but that doesn't mean that this is the lowest join
541+
* it could be moved to. Caller may need to make additional calls to verify
542+
* that this doesn't succeed on either of the inputs of a proposed join.
523543
*
524544
* Note: get_joinrel_parampathinfo depends on the fact that if
525545
* current_and_outer is NULL, this function will always return false
@@ -534,15 +554,20 @@ join_clause_is_movable_into(RestrictInfo *rinfo,
534554
if (!bms_is_subset(rinfo->clause_relids, current_and_outer))
535555
return false;
536556

537-
/* Clause must physically reference target rel(s) */
557+
/* Clause must physically reference at least one target rel */
538558
if (!bms_overlap(currentrelids, rinfo->clause_relids))
539559
return false;
540560

541561
/* Cannot move an outer-join clause into the join's outer side */
542562
if (bms_overlap(currentrelids, rinfo->outer_relids))
543563
return false;
544564

545-
/* Target rel(s) must not be nullable below the clause */
565+
/*
566+
* Target rel(s) must not be nullable below the clause. This is
567+
* approximate, in the safe direction, because the current join might be
568+
* above the join where the nulling would happen, in which case the clause
569+
* would work correctly here. But we don't have enough info to be sure.
570+
*/
546571
if (bms_overlap(currentrelids, rinfo->nullable_relids))
547572
return false;
548573

src/test/regress/expected/join.out

+48
Original file line numberDiff line numberDiff line change
@@ -2218,6 +2218,54 @@ order by 1, 2;
22182218
4567890123456789 | 4567890123456789 | 123 | 4567890123456789 | 123
22192219
(5 rows)
22202220

2221+
--
2222+
-- regression test: check a case where join_clause_is_movable_into() gives
2223+
-- an imprecise result
2224+
--
2225+
analyze pg_enum;
2226+
explain (costs off)
2227+
select anname, outname, enumtypid
2228+
from
2229+
(select pa.proname as anname, coalesce(po.proname, typname) as outname
2230+
from pg_type t
2231+
left join pg_proc po on po.oid = t.typoutput
2232+
join pg_proc pa on pa.oid = t.typanalyze) ss,
2233+
pg_enum,
2234+
pg_type t2
2235+
where anname = enumlabel and outname = t2.typname and enumtypid = t2.oid;
2236+
QUERY PLAN
2237+
-----------------------------------------------------------------------
2238+
Nested Loop
2239+
Join Filter: (pg_enum.enumtypid = t2.oid)
2240+
-> Nested Loop Left Join
2241+
-> Hash Join
2242+
Hash Cond: ((t.typanalyze)::oid = pa.oid)
2243+
-> Seq Scan on pg_type t
2244+
-> Hash
2245+
-> Hash Join
2246+
Hash Cond: (pa.proname = pg_enum.enumlabel)
2247+
-> Seq Scan on pg_proc pa
2248+
-> Hash
2249+
-> Seq Scan on pg_enum
2250+
-> Index Scan using pg_proc_oid_index on pg_proc po
2251+
Index Cond: (oid = (t.typoutput)::oid)
2252+
-> Index Scan using pg_type_typname_nsp_index on pg_type t2
2253+
Index Cond: (typname = COALESCE(po.proname, t.typname))
2254+
(16 rows)
2255+
2256+
select anname, outname, enumtypid
2257+
from
2258+
(select pa.proname as anname, coalesce(po.proname, typname) as outname
2259+
from pg_type t
2260+
left join pg_proc po on po.oid = t.typoutput
2261+
join pg_proc pa on pa.oid = t.typanalyze) ss,
2262+
pg_enum,
2263+
pg_type t2
2264+
where anname = enumlabel and outname = t2.typname and enumtypid = t2.oid;
2265+
anname | outname | enumtypid
2266+
--------+---------+-----------
2267+
(0 rows)
2268+
22212269
--
22222270
-- Clean up
22232271
--

src/test/regress/sql/join.sql

+26
Original file line numberDiff line numberDiff line change
@@ -377,6 +377,32 @@ select * from int8_tbl i1 left join (int8_tbl i2 join
377377
(select 123 as x) ss on i2.q1 = x) on i1.q2 = i2.q2
378378
order by 1, 2;
379379

380+
--
381+
-- regression test: check a case where join_clause_is_movable_into() gives
382+
-- an imprecise result
383+
--
384+
analyze pg_enum;
385+
explain (costs off)
386+
select anname, outname, enumtypid
387+
from
388+
(select pa.proname as anname, coalesce(po.proname, typname) as outname
389+
from pg_type t
390+
left join pg_proc po on po.oid = t.typoutput
391+
join pg_proc pa on pa.oid = t.typanalyze) ss,
392+
pg_enum,
393+
pg_type t2
394+
where anname = enumlabel and outname = t2.typname and enumtypid = t2.oid;
395+
396+
select anname, outname, enumtypid
397+
from
398+
(select pa.proname as anname, coalesce(po.proname, typname) as outname
399+
from pg_type t
400+
left join pg_proc po on po.oid = t.typoutput
401+
join pg_proc pa on pa.oid = t.typanalyze) ss,
402+
pg_enum,
403+
pg_type t2
404+
where anname = enumlabel and outname = t2.typname and enumtypid = t2.oid;
405+
380406

381407
--
382408
-- Clean up

0 commit comments

Comments
 (0)