Skip to content

Commit f64ec81

Browse files
author
Richard Guo
committed
Avoid unnecessary wrapping for Vars and PHVs
When pulling up a lateral subquery that is under an outer join, the current code always wraps a Var or PHV in the subquery's targetlist into a new PlaceHolderVar if it is a lateral reference to something outside the subquery. This is necessary when the Var/PHV references the non-nullable side of the outer join from the nullable side: we need to ensure that it is evaluated at the right place and hence is forced to null when the outer join should do so. However, if the referenced rel is under the same lowest nulling outer join, we can actually omit the wrapping. That's safe because if the subquery variable is forced to NULL by the outer join, the lateral reference variable will come out as NULL too. It could be beneficial to get rid of such PHVs because they imply lateral dependencies, which force us to resort to nestloop joins. This patch leverages the newly introduced nullingrel_info to check if the nullingrels of the subquery RTE are a subset of those of the laterally referenced rel, in order to determine if the referenced rel is under the same lowest nulling outer join. No backpatch as this could result in plan changes. Author: Richard Guo Reviewed-by: James Coleman, Dmitry Dolgov, Andrei Lepikhov Discussion: https://postgr.es/m/CAMbWs48uk6C7Z9m_FNT8_21CMCk68hrgAsz=z6zpP1PNZMkeoQ@mail.gmail.com
1 parent 5668a85 commit f64ec81

File tree

3 files changed

+375
-9
lines changed

3 files changed

+375
-9
lines changed

src/backend/optimizer/prep/prepjointree.c

Lines changed: 30 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2598,26 +2598,47 @@ pullup_replace_vars_callback(Var *var,
25982598
/*
25992599
* Simple Vars always escape being wrapped, unless they are
26002600
* lateral references to something outside the subquery being
2601-
* pulled up. (Even then, we could omit the PlaceHolderVar if
2602-
* the referenced rel is under the same lowest outer join, but
2603-
* it doesn't seem worth the trouble to check that.)
2601+
* pulled up and the referenced rel is not under the same
2602+
* lowest nulling outer join.
26042603
*/
2604+
wrap = false;
26052605
if (rcon->target_rte->lateral &&
26062606
!bms_is_member(((Var *) newnode)->varno, rcon->relids))
2607-
wrap = true;
2608-
else
2609-
wrap = false;
2607+
{
2608+
nullingrel_info *nullinfo = rcon->nullinfo;
2609+
int lvarno = ((Var *) newnode)->varno;
2610+
2611+
Assert(lvarno > 0 && lvarno <= nullinfo->rtlength);
2612+
if (!bms_is_subset(nullinfo->nullingrels[rcon->varno],
2613+
nullinfo->nullingrels[lvarno]))
2614+
wrap = true;
2615+
}
26102616
}
26112617
else if (newnode && IsA(newnode, PlaceHolderVar) &&
26122618
((PlaceHolderVar *) newnode)->phlevelsup == 0)
26132619
{
26142620
/* The same rules apply for a PlaceHolderVar */
2621+
wrap = false;
26152622
if (rcon->target_rte->lateral &&
26162623
!bms_is_subset(((PlaceHolderVar *) newnode)->phrels,
26172624
rcon->relids))
2618-
wrap = true;
2619-
else
2620-
wrap = false;
2625+
{
2626+
nullingrel_info *nullinfo = rcon->nullinfo;
2627+
Relids lvarnos = ((PlaceHolderVar *) newnode)->phrels;
2628+
int lvarno;
2629+
2630+
lvarno = -1;
2631+
while ((lvarno = bms_next_member(lvarnos, lvarno)) >= 0)
2632+
{
2633+
Assert(lvarno > 0 && lvarno <= nullinfo->rtlength);
2634+
if (!bms_is_subset(nullinfo->nullingrels[rcon->varno],
2635+
nullinfo->nullingrels[lvarno]))
2636+
{
2637+
wrap = true;
2638+
break;
2639+
}
2640+
}
2641+
}
26212642
}
26222643
else if (rcon->wrap_non_vars)
26232644
{

src/test/regress/expected/subselect.out

Lines changed: 275 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1848,6 +1848,281 @@ order by 1, 2;
18481848
4567890123456789 | 9135780246913578
18491849
(11 rows)
18501850

1851+
-- lateral references for simple Vars can escape being wrapped if the
1852+
-- referenced rel is under the same lowest nulling outer join
1853+
explain (verbose, costs off)
1854+
select t1.q1, x from
1855+
int8_tbl t1 left join
1856+
(int8_tbl t2 inner join
1857+
lateral (select t2.q2 as x, * from int8_tbl t3) ss on t2.q2 = ss.q1)
1858+
on t1.q1 = t2.q1
1859+
order by 1, 2;
1860+
QUERY PLAN
1861+
--------------------------------------------------------
1862+
Sort
1863+
Output: t1.q1, t2.q2
1864+
Sort Key: t1.q1, t2.q2
1865+
-> Hash Right Join
1866+
Output: t1.q1, t2.q2
1867+
Hash Cond: (t2.q1 = t1.q1)
1868+
-> Hash Join
1869+
Output: t2.q2, t2.q1
1870+
Hash Cond: (t2.q2 = t3.q1)
1871+
-> Seq Scan on public.int8_tbl t2
1872+
Output: t2.q1, t2.q2
1873+
-> Hash
1874+
Output: t3.q1
1875+
-> Seq Scan on public.int8_tbl t3
1876+
Output: t3.q1
1877+
-> Hash
1878+
Output: t1.q1
1879+
-> Seq Scan on public.int8_tbl t1
1880+
Output: t1.q1
1881+
(19 rows)
1882+
1883+
select t1.q1, x from
1884+
int8_tbl t1 left join
1885+
(int8_tbl t2 inner join
1886+
lateral (select t2.q2 as x, * from int8_tbl t3) ss on t2.q2 = ss.q1)
1887+
on t1.q1 = t2.q1
1888+
order by 1, 2;
1889+
q1 | x
1890+
------------------+------------------
1891+
123 | 4567890123456789
1892+
123 | 4567890123456789
1893+
123 | 4567890123456789
1894+
123 | 4567890123456789
1895+
123 | 4567890123456789
1896+
123 | 4567890123456789
1897+
4567890123456789 | 123
1898+
4567890123456789 | 123
1899+
4567890123456789 | 123
1900+
4567890123456789 | 123
1901+
4567890123456789 | 123
1902+
4567890123456789 | 123
1903+
4567890123456789 | 4567890123456789
1904+
4567890123456789 | 4567890123456789
1905+
4567890123456789 | 4567890123456789
1906+
4567890123456789 | 4567890123456789
1907+
4567890123456789 | 4567890123456789
1908+
4567890123456789 | 4567890123456789
1909+
4567890123456789 | 4567890123456789
1910+
4567890123456789 | 4567890123456789
1911+
4567890123456789 | 4567890123456789
1912+
(21 rows)
1913+
1914+
-- otherwise we need to wrap the Vars
1915+
explain (verbose, costs off)
1916+
select t1.q1, x from
1917+
int8_tbl t1 left join
1918+
(int8_tbl t2 left join
1919+
lateral (select t2.q2 as x, * from int8_tbl t3) ss on t2.q2 = ss.q1)
1920+
on t1.q1 = t2.q1
1921+
order by 1, 2;
1922+
QUERY PLAN
1923+
--------------------------------------------------
1924+
Sort
1925+
Output: t1.q1, (t2.q2)
1926+
Sort Key: t1.q1, (t2.q2)
1927+
-> Hash Right Join
1928+
Output: t1.q1, (t2.q2)
1929+
Hash Cond: (t2.q1 = t1.q1)
1930+
-> Nested Loop Left Join
1931+
Output: t2.q1, (t2.q2)
1932+
-> Seq Scan on public.int8_tbl t2
1933+
Output: t2.q1, t2.q2
1934+
-> Seq Scan on public.int8_tbl t3
1935+
Output: t3.q1, t2.q2
1936+
Filter: (t2.q2 = t3.q1)
1937+
-> Hash
1938+
Output: t1.q1
1939+
-> Seq Scan on public.int8_tbl t1
1940+
Output: t1.q1
1941+
(17 rows)
1942+
1943+
select t1.q1, x from
1944+
int8_tbl t1 left join
1945+
(int8_tbl t2 left join
1946+
lateral (select t2.q2 as x, * from int8_tbl t3) ss on t2.q2 = ss.q1)
1947+
on t1.q1 = t2.q1
1948+
order by 1, 2;
1949+
q1 | x
1950+
------------------+------------------
1951+
123 | 4567890123456789
1952+
123 | 4567890123456789
1953+
123 | 4567890123456789
1954+
123 | 4567890123456789
1955+
123 | 4567890123456789
1956+
123 | 4567890123456789
1957+
123 |
1958+
123 |
1959+
4567890123456789 | 123
1960+
4567890123456789 | 123
1961+
4567890123456789 | 123
1962+
4567890123456789 | 123
1963+
4567890123456789 | 123
1964+
4567890123456789 | 123
1965+
4567890123456789 | 4567890123456789
1966+
4567890123456789 | 4567890123456789
1967+
4567890123456789 | 4567890123456789
1968+
4567890123456789 | 4567890123456789
1969+
4567890123456789 | 4567890123456789
1970+
4567890123456789 | 4567890123456789
1971+
4567890123456789 | 4567890123456789
1972+
4567890123456789 | 4567890123456789
1973+
4567890123456789 | 4567890123456789
1974+
4567890123456789 |
1975+
4567890123456789 |
1976+
4567890123456789 |
1977+
(26 rows)
1978+
1979+
-- lateral references for PHVs can also escape being wrapped if the
1980+
-- referenced rel is under the same lowest nulling outer join
1981+
explain (verbose, costs off)
1982+
select ss2.* from
1983+
int8_tbl t1 left join
1984+
(int8_tbl t2 left join
1985+
(select coalesce(q1) as x, * from int8_tbl t3) ss1 on t2.q1 = ss1.q2 inner join
1986+
lateral (select ss1.x as y, * from int8_tbl t4) ss2 on t2.q2 = ss2.q1)
1987+
on t1.q2 = ss2.q1
1988+
order by 1, 2, 3;
1989+
QUERY PLAN
1990+
----------------------------------------------------------------
1991+
Sort
1992+
Output: (COALESCE(t3.q1)), t4.q1, t4.q2
1993+
Sort Key: (COALESCE(t3.q1)), t4.q1, t4.q2
1994+
-> Hash Right Join
1995+
Output: (COALESCE(t3.q1)), t4.q1, t4.q2
1996+
Hash Cond: (t4.q1 = t1.q2)
1997+
-> Hash Join
1998+
Output: (COALESCE(t3.q1)), t4.q1, t4.q2
1999+
Hash Cond: (t2.q2 = t4.q1)
2000+
-> Hash Left Join
2001+
Output: t2.q2, (COALESCE(t3.q1))
2002+
Hash Cond: (t2.q1 = t3.q2)
2003+
-> Seq Scan on public.int8_tbl t2
2004+
Output: t2.q1, t2.q2
2005+
-> Hash
2006+
Output: t3.q2, (COALESCE(t3.q1))
2007+
-> Seq Scan on public.int8_tbl t3
2008+
Output: t3.q2, COALESCE(t3.q1)
2009+
-> Hash
2010+
Output: t4.q1, t4.q2
2011+
-> Seq Scan on public.int8_tbl t4
2012+
Output: t4.q1, t4.q2
2013+
-> Hash
2014+
Output: t1.q2
2015+
-> Seq Scan on public.int8_tbl t1
2016+
Output: t1.q2
2017+
(26 rows)
2018+
2019+
select ss2.* from
2020+
int8_tbl t1 left join
2021+
(int8_tbl t2 left join
2022+
(select coalesce(q1) as x, * from int8_tbl t3) ss1 on t2.q1 = ss1.q2 inner join
2023+
lateral (select ss1.x as y, * from int8_tbl t4) ss2 on t2.q2 = ss2.q1)
2024+
on t1.q2 = ss2.q1
2025+
order by 1, 2, 3;
2026+
y | q1 | q2
2027+
------------------+------------------+-------------------
2028+
123 | 123 | 456
2029+
123 | 123 | 4567890123456789
2030+
123 | 4567890123456789 | -4567890123456789
2031+
123 | 4567890123456789 | -4567890123456789
2032+
123 | 4567890123456789 | 123
2033+
123 | 4567890123456789 | 123
2034+
123 | 4567890123456789 | 4567890123456789
2035+
123 | 4567890123456789 | 4567890123456789
2036+
4567890123456789 | 123 | 456
2037+
4567890123456789 | 123 | 4567890123456789
2038+
4567890123456789 | 4567890123456789 | -4567890123456789
2039+
4567890123456789 | 4567890123456789 | -4567890123456789
2040+
4567890123456789 | 4567890123456789 | -4567890123456789
2041+
4567890123456789 | 4567890123456789 | -4567890123456789
2042+
4567890123456789 | 4567890123456789 | 123
2043+
4567890123456789 | 4567890123456789 | 123
2044+
4567890123456789 | 4567890123456789 | 123
2045+
4567890123456789 | 4567890123456789 | 123
2046+
4567890123456789 | 4567890123456789 | 4567890123456789
2047+
4567890123456789 | 4567890123456789 | 4567890123456789
2048+
4567890123456789 | 4567890123456789 | 4567890123456789
2049+
4567890123456789 | 4567890123456789 | 4567890123456789
2050+
| |
2051+
| |
2052+
(24 rows)
2053+
2054+
-- otherwise we need to wrap the PHVs
2055+
explain (verbose, costs off)
2056+
select ss2.* from
2057+
int8_tbl t1 left join
2058+
(int8_tbl t2 left join
2059+
(select coalesce(q1) as x, * from int8_tbl t3) ss1 on t2.q1 = ss1.q2 left join
2060+
lateral (select ss1.x as y, * from int8_tbl t4) ss2 on t2.q2 = ss2.q1)
2061+
on t1.q2 = ss2.q1
2062+
order by 1, 2, 3;
2063+
QUERY PLAN
2064+
----------------------------------------------------------------
2065+
Sort
2066+
Output: ((COALESCE(t3.q1))), t4.q1, t4.q2
2067+
Sort Key: ((COALESCE(t3.q1))), t4.q1, t4.q2
2068+
-> Hash Right Join
2069+
Output: ((COALESCE(t3.q1))), t4.q1, t4.q2
2070+
Hash Cond: (t4.q1 = t1.q2)
2071+
-> Nested Loop
2072+
Output: t4.q1, t4.q2, ((COALESCE(t3.q1)))
2073+
Join Filter: (t2.q2 = t4.q1)
2074+
-> Hash Left Join
2075+
Output: t2.q2, (COALESCE(t3.q1))
2076+
Hash Cond: (t2.q1 = t3.q2)
2077+
-> Seq Scan on public.int8_tbl t2
2078+
Output: t2.q1, t2.q2
2079+
-> Hash
2080+
Output: t3.q2, (COALESCE(t3.q1))
2081+
-> Seq Scan on public.int8_tbl t3
2082+
Output: t3.q2, COALESCE(t3.q1)
2083+
-> Seq Scan on public.int8_tbl t4
2084+
Output: t4.q1, t4.q2, (COALESCE(t3.q1))
2085+
-> Hash
2086+
Output: t1.q2
2087+
-> Seq Scan on public.int8_tbl t1
2088+
Output: t1.q2
2089+
(24 rows)
2090+
2091+
select ss2.* from
2092+
int8_tbl t1 left join
2093+
(int8_tbl t2 left join
2094+
(select coalesce(q1) as x, * from int8_tbl t3) ss1 on t2.q1 = ss1.q2 left join
2095+
lateral (select ss1.x as y, * from int8_tbl t4) ss2 on t2.q2 = ss2.q1)
2096+
on t1.q2 = ss2.q1
2097+
order by 1, 2, 3;
2098+
y | q1 | q2
2099+
------------------+------------------+-------------------
2100+
123 | 123 | 456
2101+
123 | 123 | 4567890123456789
2102+
123 | 4567890123456789 | -4567890123456789
2103+
123 | 4567890123456789 | -4567890123456789
2104+
123 | 4567890123456789 | 123
2105+
123 | 4567890123456789 | 123
2106+
123 | 4567890123456789 | 4567890123456789
2107+
123 | 4567890123456789 | 4567890123456789
2108+
4567890123456789 | 123 | 456
2109+
4567890123456789 | 123 | 4567890123456789
2110+
4567890123456789 | 4567890123456789 | -4567890123456789
2111+
4567890123456789 | 4567890123456789 | -4567890123456789
2112+
4567890123456789 | 4567890123456789 | -4567890123456789
2113+
4567890123456789 | 4567890123456789 | -4567890123456789
2114+
4567890123456789 | 4567890123456789 | 123
2115+
4567890123456789 | 4567890123456789 | 123
2116+
4567890123456789 | 4567890123456789 | 123
2117+
4567890123456789 | 4567890123456789 | 123
2118+
4567890123456789 | 4567890123456789 | 4567890123456789
2119+
4567890123456789 | 4567890123456789 | 4567890123456789
2120+
4567890123456789 | 4567890123456789 | 4567890123456789
2121+
4567890123456789 | 4567890123456789 | 4567890123456789
2122+
| |
2123+
| |
2124+
(24 rows)
2125+
18512126
--
18522127
-- Tests for CTE inlining behavior
18532128
--

0 commit comments

Comments
 (0)