Skip to content

Commit 29e0b4c

Browse files
committed
Fix planner's handling of outer PlaceHolderVars within subqueries.
For some reason, in the original coding of the PlaceHolderVar mechanism I had supposed that PlaceHolderVars couldn't propagate into subqueries. That is of course entirely possible. When it happens, we need to treat an outer-level PlaceHolderVar much like an outer Var or Aggref, that is SS_replace_correlation_vars() needs to replace the PlaceHolderVar with a Param, and then when building the finished SubPlan we have to provide the PlaceHolderVar expression as an actual parameter for the SubPlan. The handling of the contained expression is a bit delicate but it can be treated exactly like an Aggref's expression. In addition to the missing logic in subselect.c, prepjointree.c was failing to search subqueries for PlaceHolderVars that need their relids adjusted during subquery pullup. It looks like everyplace else that touches PlaceHolderVars got it right, though. Per report from Mark Murawski. In 9.1 and HEAD, queries affected by this oversight would fail with "ERROR: Upper-level PlaceHolderVar found where not expected". But in 9.0 and 8.4, you'd silently get possibly-wrong answers, since the value transmitted into the subquery wouldn't go to null when it should.
1 parent 7bdf9b8 commit 29e0b4c

File tree

5 files changed

+191
-32
lines changed

5 files changed

+191
-32
lines changed

src/backend/optimizer/plan/subselect.c

Lines changed: 98 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,65 @@ replace_outer_var(PlannerInfo *root, Var *var)
148148
return retval;
149149
}
150150

151+
/*
152+
* Generate a Param node to replace the given PlaceHolderVar,
153+
* which is expected to have phlevelsup > 0 (ie, it is not local).
154+
*
155+
* This is just like replace_outer_var, except for PlaceHolderVars.
156+
*/
157+
static Param *
158+
replace_outer_placeholdervar(PlannerInfo *root, PlaceHolderVar *phv)
159+
{
160+
Param *retval;
161+
ListCell *ppl;
162+
PlannerParamItem *pitem;
163+
Index abslevel;
164+
int i;
165+
166+
Assert(phv->phlevelsup > 0 && phv->phlevelsup < root->query_level);
167+
abslevel = root->query_level - phv->phlevelsup;
168+
169+
/* If there's already a paramlist entry for this same PHV, just use it */
170+
i = 0;
171+
foreach(ppl, root->glob->paramlist)
172+
{
173+
pitem = (PlannerParamItem *) lfirst(ppl);
174+
if (pitem->abslevel == abslevel && IsA(pitem->item, PlaceHolderVar))
175+
{
176+
PlaceHolderVar *pphv = (PlaceHolderVar *) pitem->item;
177+
178+
/* We assume comparing the PHIDs is sufficient */
179+
if (pphv->phid == phv->phid)
180+
break;
181+
}
182+
i++;
183+
}
184+
185+
if (!ppl)
186+
{
187+
/* Nope, so make a new one */
188+
phv = (PlaceHolderVar *) copyObject(phv);
189+
IncrementVarSublevelsUp((Node *) phv, -((int) phv->phlevelsup), 0);
190+
Assert(phv->phlevelsup == 0);
191+
192+
pitem = makeNode(PlannerParamItem);
193+
pitem->item = (Node *) phv;
194+
pitem->abslevel = abslevel;
195+
196+
root->glob->paramlist = lappend(root->glob->paramlist, pitem);
197+
/* i is already the correct index for the new item */
198+
}
199+
200+
retval = makeNode(Param);
201+
retval->paramkind = PARAM_EXEC;
202+
retval->paramid = i;
203+
retval->paramtype = exprType((Node *) phv->phexpr);
204+
retval->paramtypmod = exprTypmod((Node *) phv->phexpr);
205+
retval->location = -1;
206+
207+
return retval;
208+
}
209+
151210
/*
152211
* Generate a Param node to replace the given Aggref
153212
* which is expected to have agglevelsup > 0 (ie, it is not local).
@@ -449,17 +508,19 @@ build_subplan(PlannerInfo *root, Plan *plan, List *rtable, List *rowmarks,
449508
Node *arg;
450509

451510
/*
452-
* The Var or Aggref has already been adjusted to have the correct
453-
* varlevelsup or agglevelsup. We probably don't even need to
454-
* copy it again, but be safe.
511+
* The Var, PlaceHolderVar, or Aggref has already been adjusted to
512+
* have the correct varlevelsup, phlevelsup, or agglevelsup. We
513+
* probably don't even need to copy it again, but be safe.
455514
*/
456515
arg = copyObject(pitem->item);
457516

458517
/*
459-
* If it's an Aggref, its arguments might contain SubLinks, which
460-
* have not yet been processed. Do that now.
518+
* If it's a PlaceHolderVar or Aggref, its arguments might contain
519+
* SubLinks, which have not yet been processed (see the comments
520+
* for SS_replace_correlation_vars). Do that now.
461521
*/
462-
if (IsA(arg, Aggref))
522+
if (IsA(arg, PlaceHolderVar) ||
523+
IsA(arg, Aggref))
463524
arg = SS_process_sublinks(root, arg, false);
464525

465526
splan->parParam = lappend_int(splan->parParam, paramid);
@@ -1539,24 +1600,25 @@ convert_EXISTS_to_ANY(PlannerInfo *root, Query *subselect,
15391600
/*
15401601
* Replace correlation vars (uplevel vars) with Params.
15411602
*
1542-
* Uplevel aggregates are replaced, too.
1603+
* Uplevel PlaceHolderVars and aggregates are replaced, too.
15431604
*
15441605
* Note: it is critical that this runs immediately after SS_process_sublinks.
1545-
* Since we do not recurse into the arguments of uplevel aggregates, they will
1546-
* get copied to the appropriate subplan args list in the parent query with
1547-
* uplevel vars not replaced by Params, but only adjusted in level (see
1548-
* replace_outer_agg). That's exactly what we want for the vars of the parent
1549-
* level --- but if an aggregate's argument contains any further-up variables,
1550-
* they have to be replaced with Params in their turn. That will happen when
1551-
* the parent level runs SS_replace_correlation_vars. Therefore it must do
1552-
* so after expanding its sublinks to subplans. And we don't want any steps
1553-
* in between, else those steps would never get applied to the aggregate
1554-
* argument expressions, either in the parent or the child level.
1606+
* Since we do not recurse into the arguments of uplevel PHVs and aggregates,
1607+
* they will get copied to the appropriate subplan args list in the parent
1608+
* query with uplevel vars not replaced by Params, but only adjusted in level
1609+
* (see replace_outer_placeholdervar and replace_outer_agg). That's exactly
1610+
* what we want for the vars of the parent level --- but if a PHV's or
1611+
* aggregate's argument contains any further-up variables, they have to be
1612+
* replaced with Params in their turn. That will happen when the parent level
1613+
* runs SS_replace_correlation_vars. Therefore it must do so after expanding
1614+
* its sublinks to subplans. And we don't want any steps in between, else
1615+
* those steps would never get applied to the argument expressions, either in
1616+
* the parent or the child level.
15551617
*
15561618
* Another fairly tricky thing going on here is the handling of SubLinks in
1557-
* the arguments of uplevel aggregates. Those are not touched inside the
1558-
* intermediate query level, either. Instead, SS_process_sublinks recurses
1559-
* on them after copying the Aggref expression into the parent plan level
1619+
* the arguments of uplevel PHVs/aggregates. Those are not touched inside the
1620+
* intermediate query level, either. Instead, SS_process_sublinks recurses on
1621+
* them after copying the PHV or Aggref expression into the parent plan level
15601622
* (this is actually taken care of in build_subplan).
15611623
*/
15621624
Node *
@@ -1576,6 +1638,12 @@ replace_correlation_vars_mutator(Node *node, PlannerInfo *root)
15761638
if (((Var *) node)->varlevelsup > 0)
15771639
return (Node *) replace_outer_var(root, (Var *) node);
15781640
}
1641+
if (IsA(node, PlaceHolderVar))
1642+
{
1643+
if (((PlaceHolderVar *) node)->phlevelsup > 0)
1644+
return (Node *) replace_outer_placeholdervar(root,
1645+
(PlaceHolderVar *) node);
1646+
}
15791647
if (IsA(node, Aggref))
15801648
{
15811649
if (((Aggref *) node)->agglevelsup > 0)
@@ -1635,12 +1703,17 @@ process_sublinks_mutator(Node *node, process_sublinks_context *context)
16351703
}
16361704

16371705
/*
1638-
* Don't recurse into the arguments of an outer aggregate here. Any
1639-
* SubLinks in the arguments have to be dealt with at the outer query
1640-
* level; they'll be handled when build_subplan collects the Aggref into
1641-
* the arguments to be passed down to the current subplan.
1706+
* Don't recurse into the arguments of an outer PHV or aggregate here.
1707+
* Any SubLinks in the arguments have to be dealt with at the outer query
1708+
* level; they'll be handled when build_subplan collects the PHV or Aggref
1709+
* into the arguments to be passed down to the current subplan.
16421710
*/
1643-
if (IsA(node, Aggref))
1711+
if (IsA(node, PlaceHolderVar))
1712+
{
1713+
if (((PlaceHolderVar *) node)->phlevelsup > 0)
1714+
return node;
1715+
}
1716+
else if (IsA(node, Aggref))
16441717
{
16451718
if (((Aggref *) node)->agglevelsup > 0)
16461719
return node;

src/backend/optimizer/prep/prepjointree.c

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1882,8 +1882,6 @@ reduce_outer_joins_pass2(Node *jtnode,
18821882
*
18831883
* Find any PlaceHolderVar nodes in the given tree that reference the
18841884
* pulled-up relid, and change them to reference the replacement relid(s).
1885-
* We do not need to recurse into subqueries, since no subquery of the current
1886-
* top query could (yet) contain such a reference.
18871885
*
18881886
* NOTE: although this has the form of a walker, we cheat and modify the
18891887
* nodes in-place. This should be OK since the tree was copied by
@@ -1894,6 +1892,7 @@ reduce_outer_joins_pass2(Node *jtnode,
18941892
typedef struct
18951893
{
18961894
int varno;
1895+
int sublevels_up;
18971896
Relids subrelids;
18981897
} substitute_multiple_relids_context;
18991898

@@ -1907,7 +1906,8 @@ substitute_multiple_relids_walker(Node *node,
19071906
{
19081907
PlaceHolderVar *phv = (PlaceHolderVar *) node;
19091908

1910-
if (bms_is_member(context->varno, phv->phrels))
1909+
if (phv->phlevelsup == context->sublevels_up &&
1910+
bms_is_member(context->varno, phv->phrels))
19111911
{
19121912
phv->phrels = bms_union(phv->phrels,
19131913
context->subrelids);
@@ -1916,6 +1916,18 @@ substitute_multiple_relids_walker(Node *node,
19161916
}
19171917
/* fall through to examine children */
19181918
}
1919+
if (IsA(node, Query))
1920+
{
1921+
/* Recurse into subselects */
1922+
bool result;
1923+
1924+
context->sublevels_up++;
1925+
result = query_tree_walker((Query *) node,
1926+
substitute_multiple_relids_walker,
1927+
(void *) context, 0);
1928+
context->sublevels_up--;
1929+
return result;
1930+
}
19191931
/* Shouldn't need to handle planner auxiliary nodes here */
19201932
Assert(!IsA(node, SpecialJoinInfo));
19211933
Assert(!IsA(node, AppendRelInfo));
@@ -1931,6 +1943,7 @@ substitute_multiple_relids(Node *node, int varno, Relids subrelids)
19311943
substitute_multiple_relids_context context;
19321944

19331945
context.varno = varno;
1946+
context.sublevels_up = 0;
19341947
context.subrelids = subrelids;
19351948

19361949
/*

src/include/nodes/relation.h

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1357,12 +1357,17 @@ typedef struct PlaceHolderInfo
13571357
*
13581358
* Each paramlist item shows the absolute query level it is associated with,
13591359
* where the outermost query is level 1 and nested subqueries have higher
1360-
* numbers. The item the parameter slot represents can be one of three kinds:
1360+
* numbers. The item the parameter slot represents can be one of four kinds:
13611361
*
13621362
* A Var: the slot represents a variable of that level that must be passed
13631363
* down because subqueries have outer references to it. The varlevelsup
13641364
* value in the Var will always be zero.
13651365
*
1366+
* A PlaceHolderVar: this works much like the Var case, except that the
1367+
* entry is a PlaceHolderVar node with a contained expression. The PHV
1368+
* will have phlevelsup = 0, and the contained expression is adjusted
1369+
* to match in level.
1370+
*
13661371
* An Aggref (with an expression tree representing its argument): the slot
13671372
* represents an aggregate expression that is an outer reference for some
13681373
* subquery. The Aggref itself has agglevelsup = 0, and its argument tree
@@ -1372,14 +1377,14 @@ typedef struct PlaceHolderInfo
13721377
* for that subplan). The absolute level shown for such items corresponds
13731378
* to the parent query of the subplan.
13741379
*
1375-
* Note: we detect duplicate Var parameters and coalesce them into one slot,
1376-
* but we do not do this for Aggref or Param slots.
1380+
* Note: we detect duplicate Var and PlaceHolderVar parameters and coalesce
1381+
* them into one slot, but we do not do this for Aggref or Param slots.
13771382
*/
13781383
typedef struct PlannerParamItem
13791384
{
13801385
NodeTag type;
13811386

1382-
Node *item; /* the Var, Aggref, or Param */
1387+
Node *item; /* the Var, PlaceHolderVar, Aggref, or Param */
13831388
Index abslevel; /* its absolute query level */
13841389
} PlannerParamItem;
13851390

src/test/regress/expected/join.out

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2533,6 +2533,53 @@ ON sub1.key1 = sub2.key3;
25332533
1 | 1 | 1 | 1
25342534
(1 row)
25352535

2536+
--
2537+
-- test case where a PlaceHolderVar is propagated into a subquery
2538+
--
2539+
explain (costs off)
2540+
select * from
2541+
int8_tbl t1 left join
2542+
(select q1 as x, 42 as y from int8_tbl t2) ss
2543+
on t1.q2 = ss.x
2544+
where
2545+
1 = (select 1 from int8_tbl t3 where ss.y is not null limit 1)
2546+
order by 1,2;
2547+
QUERY PLAN
2548+
---------------------------------------------------------
2549+
Sort
2550+
Sort Key: t1.q1, t1.q2
2551+
-> Hash Left Join
2552+
Hash Cond: (t1.q2 = t2.q1)
2553+
Filter: (1 = (SubPlan 1))
2554+
-> Seq Scan on int8_tbl t1
2555+
-> Hash
2556+
-> Seq Scan on int8_tbl t2
2557+
SubPlan 1
2558+
-> Limit
2559+
-> Result
2560+
One-Time Filter: ($0 IS NOT NULL)
2561+
-> Seq Scan on int8_tbl t3
2562+
(13 rows)
2563+
2564+
select * from
2565+
int8_tbl t1 left join
2566+
(select q1 as x, 42 as y from int8_tbl t2) ss
2567+
on t1.q2 = ss.x
2568+
where
2569+
1 = (select 1 from int8_tbl t3 where ss.y is not null limit 1)
2570+
order by 1,2;
2571+
q1 | q2 | x | y
2572+
------------------+------------------+------------------+----
2573+
123 | 4567890123456789 | 4567890123456789 | 42
2574+
123 | 4567890123456789 | 4567890123456789 | 42
2575+
123 | 4567890123456789 | 4567890123456789 | 42
2576+
4567890123456789 | 123 | 123 | 42
2577+
4567890123456789 | 123 | 123 | 42
2578+
4567890123456789 | 4567890123456789 | 4567890123456789 | 42
2579+
4567890123456789 | 4567890123456789 | 4567890123456789 | 42
2580+
4567890123456789 | 4567890123456789 | 4567890123456789 | 42
2581+
(8 rows)
2582+
25362583
--
25372584
-- test the corner cases FULL JOIN ON TRUE and FULL JOIN ON FALSE
25382585
--

src/test/regress/sql/join.sql

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -639,6 +639,27 @@ LEFT JOIN
639639
) sub2
640640
ON sub1.key1 = sub2.key3;
641641

642+
--
643+
-- test case where a PlaceHolderVar is propagated into a subquery
644+
--
645+
646+
explain (costs off)
647+
select * from
648+
int8_tbl t1 left join
649+
(select q1 as x, 42 as y from int8_tbl t2) ss
650+
on t1.q2 = ss.x
651+
where
652+
1 = (select 1 from int8_tbl t3 where ss.y is not null limit 1)
653+
order by 1,2;
654+
655+
select * from
656+
int8_tbl t1 left join
657+
(select q1 as x, 42 as y from int8_tbl t2) ss
658+
on t1.q2 = ss.x
659+
where
660+
1 = (select 1 from int8_tbl t3 where ss.y is not null limit 1)
661+
order by 1,2;
662+
642663
--
643664
-- test the corner cases FULL JOIN ON TRUE and FULL JOIN ON FALSE
644665
--

0 commit comments

Comments
 (0)