Skip to content

Commit b62f94c

Browse files
committed
Allow simplification of EXISTS() subqueries containing LIMIT.
The locution "EXISTS(SELECT ... LIMIT 1)" seems to be rather common among people who don't realize that the database already performs optimizations equivalent to putting LIMIT 1 in the sub-select. Unfortunately, this was actually making things worse, because it prevented us from optimizing such EXISTS clauses into semi or anti joins. Teach simplify_EXISTS_query() to suppress constant-positive LIMIT clauses. That fixes the semi/anti-join case, and may help marginally even for cases that have to be left as sub-SELECTs. Marti Raudsepp, reviewed by David Rowley
1 parent 9c58101 commit b62f94c

File tree

3 files changed

+96
-9
lines changed

3 files changed

+96
-9
lines changed

src/backend/optimizer/plan/subselect.c

+43-9
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ static Node *convert_testexpr_mutator(Node *node,
7070
static bool subplan_is_hashable(Plan *plan);
7171
static bool testexpr_is_hashable(Node *testexpr);
7272
static bool hash_ok_operator(OpExpr *expr);
73-
static bool simplify_EXISTS_query(Query *query);
73+
static bool simplify_EXISTS_query(PlannerInfo *root, Query *query);
7474
static Query *convert_EXISTS_to_ANY(PlannerInfo *root, Query *subselect,
7575
Node **testexpr, List **paramIds);
7676
static Node *replace_correlation_vars_mutator(Node *node, PlannerInfo *root);
@@ -452,7 +452,7 @@ make_subplan(PlannerInfo *root, Query *orig_subquery,
452452
* If it's an EXISTS subplan, we might be able to simplify it.
453453
*/
454454
if (subLinkType == EXISTS_SUBLINK)
455-
simple_exists = simplify_EXISTS_query(subquery);
455+
simple_exists = simplify_EXISTS_query(root, subquery);
456456

457457
/*
458458
* For an EXISTS subplan, tell lower-level planner to expect that only the
@@ -518,7 +518,7 @@ make_subplan(PlannerInfo *root, Query *orig_subquery,
518518
/* Make a second copy of the original subquery */
519519
subquery = (Query *) copyObject(orig_subquery);
520520
/* and re-simplify */
521-
simple_exists = simplify_EXISTS_query(subquery);
521+
simple_exists = simplify_EXISTS_query(root, subquery);
522522
Assert(simple_exists);
523523
/* See if it can be converted to an ANY query */
524524
subquery = convert_EXISTS_to_ANY(root, subquery,
@@ -1359,7 +1359,7 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
13591359
* targetlist, we have to fail, because the pullup operation leaves us
13601360
* with noplace to evaluate the targetlist.
13611361
*/
1362-
if (!simplify_EXISTS_query(subselect))
1362+
if (!simplify_EXISTS_query(root, subselect))
13631363
return NULL;
13641364

13651365
/*
@@ -1486,13 +1486,14 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
14861486
* Returns TRUE if was able to discard the targetlist, else FALSE.
14871487
*/
14881488
static bool
1489-
simplify_EXISTS_query(Query *query)
1489+
simplify_EXISTS_query(PlannerInfo *root, Query *query)
14901490
{
14911491
/*
14921492
* We don't try to simplify at all if the query uses set operations,
1493-
* aggregates, modifying CTEs, HAVING, LIMIT/OFFSET, or FOR UPDATE/SHARE;
1494-
* none of these seem likely in normal usage and their possible effects
1495-
* are complex.
1493+
* aggregates, modifying CTEs, HAVING, OFFSET, or FOR UPDATE/SHARE; none
1494+
* of these seem likely in normal usage and their possible effects are
1495+
* complex. (Note: we could ignore an "OFFSET 0" clause, but that
1496+
* traditionally is used as an optimization fence, so we don't.)
14961497
*/
14971498
if (query->commandType != CMD_SELECT ||
14981499
query->setOperations ||
@@ -1501,10 +1502,43 @@ simplify_EXISTS_query(Query *query)
15011502
query->hasModifyingCTE ||
15021503
query->havingQual ||
15031504
query->limitOffset ||
1504-
query->limitCount ||
15051505
query->rowMarks)
15061506
return false;
15071507

1508+
/*
1509+
* LIMIT with a constant positive (or NULL) value doesn't affect the
1510+
* semantics of EXISTS, so let's ignore such clauses. This is worth doing
1511+
* because people accustomed to certain other DBMSes may be in the habit
1512+
* of writing EXISTS(SELECT ... LIMIT 1) as an optimization. If there's a
1513+
* LIMIT with anything else as argument, though, we can't simplify.
1514+
*/
1515+
if (query->limitCount)
1516+
{
1517+
/*
1518+
* The LIMIT clause has not yet been through eval_const_expressions,
1519+
* so we have to apply that here. It might seem like this is a waste
1520+
* of cycles, since the only case plausibly worth worrying about is
1521+
* "LIMIT 1" ... but what we'll actually see is "LIMIT int8(1::int4)",
1522+
* so we have to fold constants or we're not going to recognize it.
1523+
*/
1524+
Node *node = eval_const_expressions(root, query->limitCount);
1525+
Const *limit;
1526+
1527+
/* Might as well update the query if we simplified the clause. */
1528+
query->limitCount = node;
1529+
1530+
if (!IsA(node, Const))
1531+
return false;
1532+
1533+
limit = (Const *) node;
1534+
Assert(limit->consttype == INT8OID);
1535+
if (!limit->constisnull && DatumGetInt64(limit->constvalue) <= 0)
1536+
return false;
1537+
1538+
/* Whether or not the targetlist is safe, we can drop the LIMIT. */
1539+
query->limitCount = NULL;
1540+
}
1541+
15081542
/*
15091543
* Mustn't throw away the targetlist if it contains set-returning
15101544
* functions; those could affect whether zero rows are returned!

src/test/regress/expected/subselect.out

+40
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,46 @@ from int8_tbl group by q1 order by q1;
221221
4567890123456789 | 0.6
222222
(2 rows)
223223

224+
--
225+
-- Check EXISTS simplification with LIMIT
226+
--
227+
explain (costs off)
228+
select * from int4_tbl o where exists
229+
(select 1 from int4_tbl i where i.f1=o.f1 limit null);
230+
QUERY PLAN
231+
------------------------------------
232+
Hash Semi Join
233+
Hash Cond: (o.f1 = i.f1)
234+
-> Seq Scan on int4_tbl o
235+
-> Hash
236+
-> Seq Scan on int4_tbl i
237+
(5 rows)
238+
239+
explain (costs off)
240+
select * from int4_tbl o where not exists
241+
(select 1 from int4_tbl i where i.f1=o.f1 limit 1);
242+
QUERY PLAN
243+
------------------------------------
244+
Hash Anti Join
245+
Hash Cond: (o.f1 = i.f1)
246+
-> Seq Scan on int4_tbl o
247+
-> Hash
248+
-> Seq Scan on int4_tbl i
249+
(5 rows)
250+
251+
explain (costs off)
252+
select * from int4_tbl o where exists
253+
(select 1 from int4_tbl i where i.f1=o.f1 limit 0);
254+
QUERY PLAN
255+
--------------------------------------
256+
Seq Scan on int4_tbl o
257+
Filter: (SubPlan 1)
258+
SubPlan 1
259+
-> Limit
260+
-> Seq Scan on int4_tbl i
261+
Filter: (f1 = o.f1)
262+
(6 rows)
263+
224264
--
225265
-- Test cases to catch unpleasant interactions between IN-join processing
226266
-- and subquery pullup.

src/test/regress/sql/subselect.sql

+13
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,19 @@ SELECT '' AS eight, ss.f1 AS "Correlated Field", ss.f3 AS "Second Field"
9292
select q1, float8(count(*)) / (select count(*) from int8_tbl)
9393
from int8_tbl group by q1 order by q1;
9494

95+
--
96+
-- Check EXISTS simplification with LIMIT
97+
--
98+
explain (costs off)
99+
select * from int4_tbl o where exists
100+
(select 1 from int4_tbl i where i.f1=o.f1 limit null);
101+
explain (costs off)
102+
select * from int4_tbl o where not exists
103+
(select 1 from int4_tbl i where i.f1=o.f1 limit 1);
104+
explain (costs off)
105+
select * from int4_tbl o where exists
106+
(select 1 from int4_tbl i where i.f1=o.f1 limit 0);
107+
95108
--
96109
-- Test cases to catch unpleasant interactions between IN-join processing
97110
-- and subquery pullup.

0 commit comments

Comments
 (0)