Skip to content

Commit 341757b

Browse files
committed
Prevent pushing down WHERE clauses into unsafe UNION/INTERSECT nests.
The planner is aware that it mustn't push down upper-level quals into subqueries if the quals reference subquery output columns that contain set-returning functions or volatile functions, or are non-DISTINCT outputs of a DISTINCT ON subquery. However, it missed making this check when there were one or more levels of UNION or INTERSECT above the dangerous expression. This could lead to "set-valued function called in context that cannot accept a set" errors, as seen in bug #8213 from Eric Soroos, or to silently wrong answers in the other cases. To fix, refactor the checks so that we make the column-is-unsafe checks during subquery_is_pushdown_safe(), which already has to recursively inspect all arms of a set-operation tree. This makes qual_is_pushdown_safe() considerably simpler, at the cost that we will spend some cycles checking output columns that possibly aren't referenced in any upper qual. But the cases where this code gets executed at all are already nontrivial queries, so it's unlikely anybody will notice any slowdown of planning. This has been broken since commit 05f916e, which makes the bug over ten years old. A bit surprising nobody noticed it before now.
1 parent 48b5120 commit 341757b

File tree

3 files changed

+255
-91
lines changed

3 files changed

+255
-91
lines changed

src/backend/optimizer/path/allpaths.c

Lines changed: 125 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -82,13 +82,14 @@ static void set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel,
8282
RangeTblEntry *rte);
8383
static RelOptInfo *make_rel_from_joinlist(PlannerInfo *root, List *joinlist);
8484
static bool subquery_is_pushdown_safe(Query *subquery, Query *topquery,
85-
bool *differentTypes);
85+
bool *unsafeColumns);
8686
static bool recurse_pushdown_safe(Node *setOp, Query *topquery,
87-
bool *differentTypes);
87+
bool *unsafeColumns);
88+
static void check_output_expressions(Query *subquery, bool *unsafeColumns);
8889
static void compare_tlist_datatypes(List *tlist, List *colTypes,
89-
bool *differentTypes);
90+
bool *unsafeColumns);
9091
static bool qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
91-
bool *differentTypes);
92+
bool *unsafeColumns);
9293
static void subquery_push_qual(Query *subquery,
9394
RangeTblEntry *rte, Index rti, Node *qual);
9495
static void recurse_push_qual(Node *setOp, Query *topquery,
@@ -1021,7 +1022,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
10211022
{
10221023
Query *parse = root->parse;
10231024
Query *subquery = rte->subquery;
1024-
bool *differentTypes;
1025+
bool *unsafeColumns;
10251026
double tuple_fraction;
10261027
PlannerInfo *subroot;
10271028
List *pathkeys;
@@ -1033,8 +1034,12 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
10331034
*/
10341035
subquery = copyObject(subquery);
10351036

1036-
/* We need a workspace for keeping track of set-op type coercions */
1037-
differentTypes = (bool *)
1037+
/*
1038+
* We need a workspace for keeping track of unsafe-to-reference columns.
1039+
* unsafeColumns[i] is set TRUE if we've found that output column i of the
1040+
* subquery is unsafe to use in a pushed-down qual.
1041+
*/
1042+
unsafeColumns = (bool *)
10381043
palloc0((list_length(subquery->targetList) + 1) * sizeof(bool));
10391044

10401045
/*
@@ -1063,7 +1068,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
10631068
* push down a pushable qual, because it'd result in a worse plan?
10641069
*/
10651070
if (rel->baserestrictinfo != NIL &&
1066-
subquery_is_pushdown_safe(subquery, subquery, differentTypes))
1071+
subquery_is_pushdown_safe(subquery, subquery, unsafeColumns))
10671072
{
10681073
/* OK to consider pushing down individual quals */
10691074
List *upperrestrictlist = NIL;
@@ -1077,7 +1082,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
10771082
if (!rinfo->pseudoconstant &&
10781083
(!rte->security_barrier ||
10791084
!contain_leaky_functions(clause)) &&
1080-
qual_is_pushdown_safe(subquery, rti, clause, differentTypes))
1085+
qual_is_pushdown_safe(subquery, rti, clause, unsafeColumns))
10811086
{
10821087
/* Push it down */
10831088
subquery_push_qual(subquery, rte, rti, clause);
@@ -1091,7 +1096,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
10911096
rel->baserestrictinfo = upperrestrictlist;
10921097
}
10931098

1094-
pfree(differentTypes);
1099+
pfree(unsafeColumns);
10951100

10961101
/*
10971102
* We can safely pass the outer tuple_fraction down to the subquery if the
@@ -1485,17 +1490,19 @@ standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels)
14851490
* 3. If the subquery contains EXCEPT or EXCEPT ALL set ops we cannot push
14861491
* quals into it, because that could change the results.
14871492
*
1488-
* 4. For subqueries using UNION/UNION ALL/INTERSECT/INTERSECT ALL, we can
1489-
* push quals into each component query, but the quals can only reference
1490-
* subquery columns that suffer no type coercions in the set operation.
1491-
* Otherwise there are possible semantic gotchas. So, we check the
1492-
* component queries to see if any of them have different output types;
1493-
* differentTypes[k] is set true if column k has different type in any
1494-
* component.
1493+
* In addition, we make several checks on the subquery's output columns
1494+
* to see if it is safe to reference them in pushed-down quals. If output
1495+
* column k is found to be unsafe to reference, we set unsafeColumns[k] to
1496+
* TRUE, but we don't reject the subquery overall since column k might
1497+
* not be referenced by some/all quals. The unsafeColumns[] array will be
1498+
* consulted later by qual_is_pushdown_safe(). It's better to do it this
1499+
* way than to make the checks directly in qual_is_pushdown_safe(), because
1500+
* when the subquery involves set operations we have to check the output
1501+
* expressions in each arm of the set op.
14951502
*/
14961503
static bool
14971504
subquery_is_pushdown_safe(Query *subquery, Query *topquery,
1498-
bool *differentTypes)
1505+
bool *unsafeColumns)
14991506
{
15001507
SetOperationStmt *topop;
15011508

@@ -1507,13 +1514,22 @@ subquery_is_pushdown_safe(Query *subquery, Query *topquery,
15071514
if (subquery->hasWindowFuncs)
15081515
return false;
15091516

1517+
/*
1518+
* If we're at a leaf query, check for unsafe expressions in its target
1519+
* list, and mark any unsafe ones in unsafeColumns[]. (Non-leaf nodes in
1520+
* setop trees have only simple Vars in their tlists, so no need to check
1521+
* them.)
1522+
*/
1523+
if (subquery->setOperations == NULL)
1524+
check_output_expressions(subquery, unsafeColumns);
1525+
15101526
/* Are we at top level, or looking at a setop component? */
15111527
if (subquery == topquery)
15121528
{
15131529
/* Top level, so check any component queries */
15141530
if (subquery->setOperations != NULL)
15151531
if (!recurse_pushdown_safe(subquery->setOperations, topquery,
1516-
differentTypes))
1532+
unsafeColumns))
15171533
return false;
15181534
}
15191535
else
@@ -1526,7 +1542,7 @@ subquery_is_pushdown_safe(Query *subquery, Query *topquery,
15261542
Assert(topop && IsA(topop, SetOperationStmt));
15271543
compare_tlist_datatypes(subquery->targetList,
15281544
topop->colTypes,
1529-
differentTypes);
1545+
unsafeColumns);
15301546
}
15311547
return true;
15321548
}
@@ -1536,7 +1552,7 @@ subquery_is_pushdown_safe(Query *subquery, Query *topquery,
15361552
*/
15371553
static bool
15381554
recurse_pushdown_safe(Node *setOp, Query *topquery,
1539-
bool *differentTypes)
1555+
bool *unsafeColumns)
15401556
{
15411557
if (IsA(setOp, RangeTblRef))
15421558
{
@@ -1545,19 +1561,19 @@ recurse_pushdown_safe(Node *setOp, Query *topquery,
15451561
Query *subquery = rte->subquery;
15461562

15471563
Assert(subquery != NULL);
1548-
return subquery_is_pushdown_safe(subquery, topquery, differentTypes);
1564+
return subquery_is_pushdown_safe(subquery, topquery, unsafeColumns);
15491565
}
15501566
else if (IsA(setOp, SetOperationStmt))
15511567
{
15521568
SetOperationStmt *op = (SetOperationStmt *) setOp;
15531569

1554-
/* EXCEPT is no good */
1570+
/* EXCEPT is no good (point 3 for subquery_is_pushdown_safe) */
15551571
if (op->op == SETOP_EXCEPT)
15561572
return false;
15571573
/* Else recurse */
1558-
if (!recurse_pushdown_safe(op->larg, topquery, differentTypes))
1574+
if (!recurse_pushdown_safe(op->larg, topquery, unsafeColumns))
15591575
return false;
1560-
if (!recurse_pushdown_safe(op->rarg, topquery, differentTypes))
1576+
if (!recurse_pushdown_safe(op->rarg, topquery, unsafeColumns))
15611577
return false;
15621578
}
15631579
else
@@ -1569,17 +1585,92 @@ recurse_pushdown_safe(Node *setOp, Query *topquery,
15691585
}
15701586

15711587
/*
1572-
* Compare tlist's datatypes against the list of set-operation result types.
1573-
* For any items that are different, mark the appropriate element of
1574-
* differentTypes[] to show that this column will have type conversions.
1588+
* check_output_expressions - check subquery's output expressions for safety
1589+
*
1590+
* There are several cases in which it's unsafe to push down an upper-level
1591+
* qual if it references a particular output column of a subquery. We check
1592+
* each output column of the subquery and set unsafeColumns[k] to TRUE if
1593+
* that column is unsafe for a pushed-down qual to reference. The conditions
1594+
* checked here are:
1595+
*
1596+
* 1. We must not push down any quals that refer to subselect outputs that
1597+
* return sets, else we'd introduce functions-returning-sets into the
1598+
* subquery's WHERE/HAVING quals.
1599+
*
1600+
* 2. We must not push down any quals that refer to subselect outputs that
1601+
* contain volatile functions, for fear of introducing strange results due
1602+
* to multiple evaluation of a volatile function.
1603+
*
1604+
* 3. If the subquery uses DISTINCT ON, we must not push down any quals that
1605+
* refer to non-DISTINCT output columns, because that could change the set
1606+
* of rows returned. (This condition is vacuous for DISTINCT, because then
1607+
* there are no non-DISTINCT output columns, so we needn't check. But note
1608+
* we are assuming that the qual can't distinguish values that the DISTINCT
1609+
* operator sees as equal. This is a bit shaky but we have no way to test
1610+
* for the case, and it's unlikely enough that we shouldn't refuse the
1611+
* optimization just because it could theoretically happen.)
1612+
*/
1613+
static void
1614+
check_output_expressions(Query *subquery, bool *unsafeColumns)
1615+
{
1616+
ListCell *lc;
1617+
1618+
foreach(lc, subquery->targetList)
1619+
{
1620+
TargetEntry *tle = (TargetEntry *) lfirst(lc);
1621+
1622+
if (tle->resjunk)
1623+
continue; /* ignore resjunk columns */
1624+
1625+
/* We need not check further if output col is already known unsafe */
1626+
if (unsafeColumns[tle->resno])
1627+
continue;
1628+
1629+
/* Functions returning sets are unsafe (point 1) */
1630+
if (expression_returns_set((Node *) tle->expr))
1631+
{
1632+
unsafeColumns[tle->resno] = true;
1633+
continue;
1634+
}
1635+
1636+
/* Volatile functions are unsafe (point 2) */
1637+
if (contain_volatile_functions((Node *) tle->expr))
1638+
{
1639+
unsafeColumns[tle->resno] = true;
1640+
continue;
1641+
}
1642+
1643+
/* If subquery uses DISTINCT ON, check point 3 */
1644+
if (subquery->hasDistinctOn &&
1645+
!targetIsInSortList(tle, InvalidOid, subquery->distinctClause))
1646+
{
1647+
/* non-DISTINCT column, so mark it unsafe */
1648+
unsafeColumns[tle->resno] = true;
1649+
continue;
1650+
}
1651+
}
1652+
}
1653+
1654+
/*
1655+
* For subqueries using UNION/UNION ALL/INTERSECT/INTERSECT ALL, we can
1656+
* push quals into each component query, but the quals can only reference
1657+
* subquery columns that suffer no type coercions in the set operation.
1658+
* Otherwise there are possible semantic gotchas. So, we check the
1659+
* component queries to see if any of them have output types different from
1660+
* the top-level setop outputs. unsafeColumns[k] is set true if column k
1661+
* has different type in any component.
15751662
*
15761663
* We don't have to care about typmods here: the only allowed difference
15771664
* between set-op input and output typmods is input is a specific typmod
15781665
* and output is -1, and that does not require a coercion.
1666+
*
1667+
* tlist is a subquery tlist.
1668+
* colTypes is an OID list of the top-level setop's output column types.
1669+
* unsafeColumns[] is the result array.
15791670
*/
15801671
static void
15811672
compare_tlist_datatypes(List *tlist, List *colTypes,
1582-
bool *differentTypes)
1673+
bool *unsafeColumns)
15831674
{
15841675
ListCell *l;
15851676
ListCell *colType = list_head(colTypes);
@@ -1593,7 +1684,7 @@ compare_tlist_datatypes(List *tlist, List *colTypes,
15931684
if (colType == NULL)
15941685
elog(ERROR, "wrong number of tlist entries");
15951686
if (exprType((Node *) tle->expr) != lfirst_oid(colType))
1596-
differentTypes[tle->resno] = true;
1687+
unsafeColumns[tle->resno] = true;
15971688
colType = lnext(colType);
15981689
}
15991690
if (colType != NULL)
@@ -1616,34 +1707,15 @@ compare_tlist_datatypes(List *tlist, List *colTypes,
16161707
* (since there is no easy way to name that within the subquery itself).
16171708
*
16181709
* 3. The qual must not refer to any subquery output columns that were
1619-
* found to have inconsistent types across a set operation tree by
1620-
* subquery_is_pushdown_safe().
1621-
*
1622-
* 4. If the subquery uses DISTINCT ON, we must not push down any quals that
1623-
* refer to non-DISTINCT output columns, because that could change the set
1624-
* of rows returned. (This condition is vacuous for DISTINCT, because then
1625-
* there are no non-DISTINCT output columns, so we needn't check. But note
1626-
* we are assuming that the qual can't distinguish values that the DISTINCT
1627-
* operator sees as equal. This is a bit shaky but we have no way to test
1628-
* for the case, and it's unlikely enough that we shouldn't refuse the
1629-
* optimization just because it could theoretically happen.)
1630-
*
1631-
* 5. We must not push down any quals that refer to subselect outputs that
1632-
* return sets, else we'd introduce functions-returning-sets into the
1633-
* subquery's WHERE/HAVING quals.
1634-
*
1635-
* 6. We must not push down any quals that refer to subselect outputs that
1636-
* contain volatile functions, for fear of introducing strange results due
1637-
* to multiple evaluation of a volatile function.
1710+
* found to be unsafe to reference by subquery_is_pushdown_safe().
16381711
*/
16391712
static bool
16401713
qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
1641-
bool *differentTypes)
1714+
bool *unsafeColumns)
16421715
{
16431716
bool safe = true;
16441717
List *vars;
16451718
ListCell *vl;
1646-
Bitmapset *tested = NULL;
16471719

16481720
/* Refuse subselects (point 1) */
16491721
if (contain_subplans(qual))
@@ -1666,7 +1738,6 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
16661738
foreach(vl, vars)
16671739
{
16681740
Var *var = (Var *) lfirst(vl);
1669-
TargetEntry *tle;
16701741

16711742
/*
16721743
* XXX Punt if we find any PlaceHolderVars in the restriction clause.
@@ -1682,6 +1753,7 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
16821753
}
16831754

16841755
Assert(var->varno == rti);
1756+
Assert(var->varattno >= 0);
16851757

16861758
/* Check point 2 */
16871759
if (var->varattno == 0)
@@ -1690,53 +1762,15 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
16901762
break;
16911763
}
16921764

1693-
/*
1694-
* We use a bitmapset to avoid testing the same attno more than once.
1695-
* (NB: this only works because subquery outputs can't have negative
1696-
* attnos.)
1697-
*/
1698-
if (bms_is_member(var->varattno, tested))
1699-
continue;
1700-
tested = bms_add_member(tested, var->varattno);
1701-
17021765
/* Check point 3 */
1703-
if (differentTypes[var->varattno])
1704-
{
1705-
safe = false;
1706-
break;
1707-
}
1708-
1709-
/* Must find the tlist element referenced by the Var */
1710-
tle = get_tle_by_resno(subquery->targetList, var->varattno);
1711-
Assert(tle != NULL);
1712-
Assert(!tle->resjunk);
1713-
1714-
/* If subquery uses DISTINCT ON, check point 4 */
1715-
if (subquery->hasDistinctOn &&
1716-
!targetIsInSortList(tle, InvalidOid, subquery->distinctClause))
1717-
{
1718-
/* non-DISTINCT column, so fail */
1719-
safe = false;
1720-
break;
1721-
}
1722-
1723-
/* Refuse functions returning sets (point 5) */
1724-
if (expression_returns_set((Node *) tle->expr))
1725-
{
1726-
safe = false;
1727-
break;
1728-
}
1729-
1730-
/* Refuse volatile functions (point 6) */
1731-
if (contain_volatile_functions((Node *) tle->expr))
1766+
if (unsafeColumns[var->varattno])
17321767
{
17331768
safe = false;
17341769
break;
17351770
}
17361771
}
17371772

17381773
list_free(vars);
1739-
bms_free(tested);
17401774

17411775
return safe;
17421776
}

0 commit comments

Comments
 (0)