Skip to content

Commit c0962a1

Browse files
committed
Convert 'x IN (VALUES ...)' to 'x = ANY ...' then appropriate
This commit implements the automatic conversion of 'x IN (VALUES ...)' into ScalarArrayOpExpr. That simplifies the query tree, eliminating the appearance of an unnecessary join. Since VALUES describes a relational table, and the value of such a list is a table row, the optimizer will likely face an underestimation problem due to the inability to estimate cardinality through MCV statistics. The cardinality evaluation mechanism can work with the array inclusion check operation. If the array is small enough (< 100 elements), it will perform a statistical evaluation element by element. We perform the transformation in the convert_ANY_sublink_to_join() if VALUES RTE is proper and the transformation is convertible. The conversion is only possible for operations on scalar values, not rows. Also, we currently support the transformation only when it ends up with a constant array. Otherwise, the evaluation of non-hashed SAOP might be slower than the corresponding Hash Join with VALUES. Discussion: https://postgr.es/m/0184212d-1248-4f1f-a42d-f5cb1c1976d2%40tantorlabs.com Author: Alena Rybakina <a.rybakina@postgrespro.ru> Author: Andrei Lepikhov <lepihov@gmail.com> Reviewed-by: Ivan Kush <ivan.kush@tantorlabs.com> Reviewed-by: Alexander Korotkov <aekorotkov@gmail.com>
1 parent d48d2e2 commit c0962a1

File tree

6 files changed

+512
-5
lines changed

6 files changed

+512
-5
lines changed

src/backend/optimizer/plan/subselect.c

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1214,6 +1214,86 @@ inline_cte_walker(Node *node, inline_cte_walker_context *context)
12141214
return expression_tree_walker(node, inline_cte_walker, context);
12151215
}
12161216

1217+
/*
1218+
* Attempt to transform 'testexpr' over the VALUES subquery into
1219+
* a ScalarArrayOpExpr. We currently support the transformation only when
1220+
* it ends up with a constant array. Otherwise, the evaluation of non-hashed
1221+
* SAOP might be slower than the corresponding Hash Join with VALUES.
1222+
*
1223+
* Return transformed ScalarArrayOpExpr or NULL if transformation isn't
1224+
* allowed.
1225+
*/
1226+
ScalarArrayOpExpr *
1227+
convert_VALUES_to_ANY(PlannerInfo *root, Node *testexpr, Query *values)
1228+
{
1229+
RangeTblEntry *rte;
1230+
Node *leftop;
1231+
Node *rightop;
1232+
Oid opno;
1233+
ListCell *lc;
1234+
Oid inputcollid;
1235+
List *exprs = NIL;
1236+
1237+
/*
1238+
* Check we have a binary operator over a single-column subquery with no
1239+
* joins and no LIMIT/OFFSET/ORDER BY clauses.
1240+
*/
1241+
if (!IsA(testexpr, OpExpr) ||
1242+
list_length(((OpExpr *) testexpr)->args) != 2 ||
1243+
list_length(values->targetList) > 1 ||
1244+
values->limitCount != NULL ||
1245+
values->limitOffset != NULL ||
1246+
values->sortClause != NIL ||
1247+
list_length(values->rtable) != 1)
1248+
return NULL;
1249+
1250+
rte = linitial_node(RangeTblEntry, values->rtable);
1251+
leftop = linitial(((OpExpr *) testexpr)->args);
1252+
rightop = lsecond(((OpExpr *) testexpr)->args);
1253+
opno = ((OpExpr *) testexpr)->opno;
1254+
inputcollid = ((OpExpr *) testexpr)->inputcollid;
1255+
1256+
/*
1257+
* Also, check that only RTE corresponds to VALUES; the list of values has
1258+
* at least two items and no volatile functions.
1259+
*/
1260+
if (rte->rtekind != RTE_VALUES ||
1261+
list_length(rte->values_lists) < 2 ||
1262+
contain_volatile_functions((Node *) rte->values_lists))
1263+
return NULL;
1264+
1265+
foreach(lc, rte->values_lists)
1266+
{
1267+
List *elem = lfirst(lc);
1268+
Node *value = linitial(elem);
1269+
1270+
/*
1271+
* Prepare an evaluation of the right side of the operator with
1272+
* substitution of the given value.
1273+
*/
1274+
value = convert_testexpr(root, rightop, list_make1(value));
1275+
1276+
/*
1277+
* Try to evaluate constant expressions. We could get Const as a
1278+
* result.
1279+
*/
1280+
value = eval_const_expressions(root, value);
1281+
1282+
/*
1283+
* As we only support constant output arrays, all the items must also
1284+
* be constant.
1285+
*/
1286+
if (!IsA(value, Const))
1287+
return NULL;
1288+
1289+
exprs = lappend(exprs, value);
1290+
}
1291+
1292+
/* Finally, build ScalarArrayOpExpr at the top of the 'exprs' list. */
1293+
return make_SAOP_expr(opno, leftop, exprType(rightop),
1294+
linitial_oid(rte->colcollations), inputcollid,
1295+
exprs, false);
1296+
}
12171297

12181298
/*
12191299
* convert_ANY_sublink_to_join: try to convert an ANY SubLink to a join

src/backend/optimizer/prep/prepjointree.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -664,6 +664,18 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
664664
/* Is it a convertible ANY or EXISTS clause? */
665665
if (sublink->subLinkType == ANY_SUBLINK)
666666
{
667+
ScalarArrayOpExpr *saop;
668+
669+
if ((saop = convert_VALUES_to_ANY(root,
670+
sublink->testexpr,
671+
(Query *) sublink->subselect)) != NULL)
672+
673+
/*
674+
* The VALUES sequence was simplified. Nothing more to do
675+
* here.
676+
*/
677+
return (Node *) saop;
678+
667679
if ((j = convert_ANY_sublink_to_join(root, sublink,
668680
available_rels1)) != NULL)
669681
{

src/backend/optimizer/util/clauses.c

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5484,26 +5484,30 @@ make_SAOP_expr(Oid oper, Node *leftexpr, Oid coltype, Oid arraycollid,
54845484
bool typbyval;
54855485
char typalign;
54865486
Datum *elems;
5487+
bool *nulls;
54875488
int i = 0;
54885489
ArrayType *arrayConst;
5490+
int dims[1] = {list_length(exprs)};
5491+
int lbs[1] = {1};
54895492

54905493
get_typlenbyvalalign(coltype, &typlen, &typbyval, &typalign);
54915494

54925495
elems = (Datum *) palloc(sizeof(Datum) * list_length(exprs));
5496+
nulls = (bool *) palloc(sizeof(bool) * list_length(exprs));
54935497
foreach_node(Const, value, exprs)
54945498
{
5495-
Assert(!value->constisnull);
5496-
5497-
elems[i++] = value->constvalue;
5499+
elems[i] = value->constvalue;
5500+
nulls[i++] = value->constisnull;
54985501
}
54995502

5500-
arrayConst = construct_array(elems, i, coltype,
5501-
typlen, typbyval, typalign);
5503+
arrayConst = construct_md_array(elems, nulls, 1, dims, lbs,
5504+
coltype, typlen, typbyval, typalign);
55025505
arrayNode = (Node *) makeConst(arraytype, -1, arraycollid,
55035506
-1, PointerGetDatum(arrayConst),
55045507
false, false);
55055508

55065509
pfree(elems);
5510+
pfree(nulls);
55075511
list_free(exprs);
55085512
}
55095513

src/include/optimizer/subselect.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@
1717
#include "nodes/plannodes.h"
1818

1919
extern void SS_process_ctes(PlannerInfo *root);
20+
extern ScalarArrayOpExpr *convert_VALUES_to_ANY(PlannerInfo *root,
21+
Node *testexpr,
22+
Query *values);
2023
extern JoinExpr *convert_ANY_sublink_to_join(PlannerInfo *root,
2124
SubLink *sublink,
2225
Relids available_rels);

0 commit comments

Comments
 (0)