Skip to content

Commit fee7b77

Browse files
committed
Rethink nullingrel marking rules in build_joinrel_tlist().
The logic for when to add the current outer join's own relid to the nullingrels sets of output Vars and PHVs was overly complicated and underly correct. Not sure why I didn't think of this before, but since what we want is marking per the syntactic structure, we can just consult our records about the syntactic structure, ie syn_righthand/syn_lefthand. Also, tighten the rule about when to add the commute_above_r bits, in hopes of eliminating some squishy reasoning. I do not know of a reason to think that that's broken as-is, but this way seems better. Per bug #17781 from Robins Tharakan. Discussion: https://postgr.es/m/17781-c0405c8b3cd5e072@postgresql.org
1 parent e2c78e7 commit fee7b77

File tree

3 files changed

+96
-19
lines changed

3 files changed

+96
-19
lines changed

src/backend/optimizer/util/relnode.c

Lines changed: 24 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1043,23 +1043,23 @@ min_join_parameterization(PlannerInfo *root,
10431043
* two joins had been done in syntactic order; else they won't match Vars
10441044
* appearing higher in the query tree. We need to do two things:
10451045
*
1046-
* First, sjinfo->commute_above_r is added to the nulling bitmaps of RHS Vars.
1047-
* This takes care of the case where we implement
1046+
* First, we add the outer join's relid to the nulling bitmap only if the Var
1047+
* or PHV actually comes from within the syntactically nullable side(s) of the
1048+
* outer join. This takes care of the possibility that we have transformed
1049+
* (A leftjoin B on (Pab)) leftjoin C on (Pbc)
1050+
* to
1051+
* A leftjoin (B leftjoin C on (Pbc)) on (Pab)
1052+
* Here the now-upper A/B join must not mark C columns as nulled by itself.
1053+
*
1054+
* Second, if sjinfo->commute_above_r is already part of the joinrel then
1055+
* it is added to the nulling bitmaps of nullable Vars. This takes care of
1056+
* the reverse case where we implement
10481057
* A leftjoin (B leftjoin C on (Pbc)) on (Pab)
10491058
* as
10501059
* (A leftjoin B on (Pab)) leftjoin C on (Pbc)
10511060
* The C columns emitted by the B/C join need to be shown as nulled by both
1052-
* the B/C and A/B joins, even though they've not traversed the A/B join.
1053-
* (If the joins haven't been commuted, we are adding the nullingrel bits
1054-
* prematurely; but that's okay because the C columns can't be referenced
1055-
* between here and the upper join.)
1056-
*
1057-
* Second, if a RHS Var has any of the relids in sjinfo->commute_above_l
1058-
* already set in its nulling bitmap, then we *don't* add sjinfo->ojrelid
1059-
* to its nulling bitmap (but we do still add commute_above_r). This takes
1060-
* care of the reverse transformation: if the original syntax was
1061-
* (A leftjoin B on (Pab)) leftjoin C on (Pbc)
1062-
* then the now-upper A/B join must not mark C columns as nulled by itself.
1061+
* the B/C and A/B joins, even though they've not physically traversed the
1062+
* A/B join.
10631063
*/
10641064
static void
10651065
build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel,
@@ -1095,10 +1095,12 @@ build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel,
10951095
phv = copyObject(phv);
10961096
/* See comments above to understand this logic */
10971097
if (sjinfo->ojrelid != 0 &&
1098-
!bms_overlap(phv->phnullingrels, sjinfo->commute_above_l))
1098+
(bms_is_subset(phv->phrels, sjinfo->syn_righthand) ||
1099+
(sjinfo->jointype == JOIN_FULL &&
1100+
bms_is_subset(phv->phrels, sjinfo->syn_lefthand))))
10991101
phv->phnullingrels = bms_add_member(phv->phnullingrels,
11001102
sjinfo->ojrelid);
1101-
if (sjinfo->commute_above_r)
1103+
if (bms_overlap(sjinfo->commute_above_r, joinrel->relids))
11021104
phv->phnullingrels = bms_add_members(phv->phnullingrels,
11031105
sjinfo->commute_above_r);
11041106
}
@@ -1149,17 +1151,20 @@ build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel,
11491151
/*
11501152
* Add the Var to the output. If this join potentially nulls this
11511153
* input, we have to update the Var's varnullingrels, which means
1152-
* making a copy.
1154+
* making a copy. But note that we don't ever add nullingrel bits to
1155+
* row identity Vars (cf. comments in setrefs.c).
11531156
*/
1154-
if (can_null)
1157+
if (can_null && var->varno != ROWID_VAR)
11551158
{
11561159
var = copyObject(var);
11571160
/* See comments above to understand this logic */
11581161
if (sjinfo->ojrelid != 0 &&
1159-
!bms_overlap(var->varnullingrels, sjinfo->commute_above_l))
1162+
(bms_is_member(var->varno, sjinfo->syn_righthand) ||
1163+
(sjinfo->jointype == JOIN_FULL &&
1164+
bms_is_member(var->varno, sjinfo->syn_lefthand))))
11601165
var->varnullingrels = bms_add_member(var->varnullingrels,
11611166
sjinfo->ojrelid);
1162-
if (sjinfo->commute_above_r)
1167+
if (bms_overlap(sjinfo->commute_above_r, joinrel->relids))
11631168
var->varnullingrels = bms_add_members(var->varnullingrels,
11641169
sjinfo->commute_above_r);
11651170
}

src/test/regress/expected/join.out

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5008,6 +5008,50 @@ select a1.id from
50085008
Seq Scan on a a1
50095009
(1 row)
50105010

5011+
-- another example (bug #17781)
5012+
explain (costs off)
5013+
select ss1.f1
5014+
from int4_tbl as t1
5015+
left join (int4_tbl as t2
5016+
right join int4_tbl as t3 on null
5017+
left join (int4_tbl as t4
5018+
right join int8_tbl as t5 on null)
5019+
on t2.f1 = t4.f1
5020+
left join ((select null as f1 from int4_tbl as t6) as ss1
5021+
inner join int8_tbl as t7 on null)
5022+
on t5.q1 = t7.q2)
5023+
on false;
5024+
QUERY PLAN
5025+
--------------------------------
5026+
Nested Loop Left Join
5027+
Join Filter: false
5028+
-> Seq Scan on int4_tbl t1
5029+
-> Result
5030+
One-Time Filter: false
5031+
(5 rows)
5032+
5033+
-- variant with Var rather than PHV coming from t6
5034+
explain (costs off)
5035+
select ss1.f1
5036+
from int4_tbl as t1
5037+
left join (int4_tbl as t2
5038+
right join int4_tbl as t3 on null
5039+
left join (int4_tbl as t4
5040+
right join int8_tbl as t5 on null)
5041+
on t2.f1 = t4.f1
5042+
left join ((select f1 from int4_tbl as t6) as ss1
5043+
inner join int8_tbl as t7 on null)
5044+
on t5.q1 = t7.q2)
5045+
on false;
5046+
QUERY PLAN
5047+
--------------------------------
5048+
Nested Loop Left Join
5049+
Join Filter: false
5050+
-> Seq Scan on int4_tbl t1
5051+
-> Result
5052+
One-Time Filter: false
5053+
(5 rows)
5054+
50115055
-- check that join removal works for a left join when joining a subquery
50125056
-- that is guaranteed to be unique by its GROUP BY clause
50135057
explain (costs off)

src/test/regress/sql/join.sql

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1785,6 +1785,34 @@ select a1.id from
17851785
(a a3 left join a a4 on a3.id = a4.id)
17861786
on a2.id = a3.id;
17871787

1788+
-- another example (bug #17781)
1789+
explain (costs off)
1790+
select ss1.f1
1791+
from int4_tbl as t1
1792+
left join (int4_tbl as t2
1793+
right join int4_tbl as t3 on null
1794+
left join (int4_tbl as t4
1795+
right join int8_tbl as t5 on null)
1796+
on t2.f1 = t4.f1
1797+
left join ((select null as f1 from int4_tbl as t6) as ss1
1798+
inner join int8_tbl as t7 on null)
1799+
on t5.q1 = t7.q2)
1800+
on false;
1801+
1802+
-- variant with Var rather than PHV coming from t6
1803+
explain (costs off)
1804+
select ss1.f1
1805+
from int4_tbl as t1
1806+
left join (int4_tbl as t2
1807+
right join int4_tbl as t3 on null
1808+
left join (int4_tbl as t4
1809+
right join int8_tbl as t5 on null)
1810+
on t2.f1 = t4.f1
1811+
left join ((select f1 from int4_tbl as t6) as ss1
1812+
inner join int8_tbl as t7 on null)
1813+
on t5.q1 = t7.q2)
1814+
on false;
1815+
17881816
-- check that join removal works for a left join when joining a subquery
17891817
-- that is guaranteed to be unique by its GROUP BY clause
17901818
explain (costs off)

0 commit comments

Comments
 (0)