Skip to content

Commit e5dcbb8

Browse files
committed
Rework code to determine partition pruning procedure
Amit Langote reported that partition prune was unable to work with arrays, enums, etc, which led him to research the appropriate way to match query clauses to partition keys: instead of searching for an exact match of the expression's type, it is better to rely on the fact that the expression qual has already been resolved to a specific operator, and that the partition key is linked to a specific operator family. With that info, it's possible to figure out the strategy and comparison function to use for the pruning clause in a manner that works reliably for pseudo-types also. Include new test cases that demonstrate pruning where pseudotypes are involved. Author: Amit Langote, Álvaro Herrera Discussion: https://postgr.es/m/2b02f1e9-9812-9c41-972d-517bdc0f815d@lab.ntt.co.jp
1 parent cea5f9a commit e5dcbb8

File tree

3 files changed

+251
-50
lines changed

3 files changed

+251
-50
lines changed

src/backend/partitioning/partprune.c

Lines changed: 60 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -946,13 +946,7 @@ gen_prune_step_op(GeneratePruningStepsContext *context,
946946
* InvalidStrategy to signal get_matching_list_bounds to do the right
947947
* thing.
948948
*/
949-
if (op_is_ne)
950-
{
951-
Assert(opstrategy == BTEqualStrategyNumber);
952-
opstep->opstrategy = InvalidStrategy;
953-
}
954-
else
955-
opstep->opstrategy = opstrategy;
949+
opstep->opstrategy = op_is_ne ? InvalidStrategy : opstrategy;
956950
Assert(list_length(exprs) == list_length(cmpfns));
957951
opstep->exprs = exprs;
958952
opstep->cmpfns = cmpfns;
@@ -1426,10 +1420,12 @@ match_clause_to_partition_key(RelOptInfo *rel,
14261420
OpExpr *opclause = (OpExpr *) clause;
14271421
Expr *leftop,
14281422
*rightop;
1429-
Oid commutator = InvalidOid,
1423+
Oid op_lefttype,
1424+
op_righttype,
1425+
commutator = InvalidOid,
14301426
negator = InvalidOid;
14311427
Oid cmpfn;
1432-
Oid exprtype;
1428+
int op_strategy;
14331429
bool is_opne_listp = false;
14341430
PartClauseInfo *partclause;
14351431

@@ -1483,58 +1479,80 @@ match_clause_to_partition_key(RelOptInfo *rel,
14831479
return PARTCLAUSE_UNSUPPORTED;
14841480

14851481
/*
1486-
* Normally we only bother with operators that are listed as being
1487-
* part of the partitioning operator family. But we make an exception
1488-
* in one case -- operators named '<>' are not listed in any operator
1489-
* family whatsoever, in which case, we try to perform partition
1490-
* pruning with it only if list partitioning is in use.
1482+
* Determine the input types of the operator we're considering.
1483+
*
1484+
* Normally we only care about operators that are listed as being part
1485+
* of the partitioning operator family. But there is one exception:
1486+
* the not-equals operators are not listed in any operator family
1487+
* whatsoever, but their negators (equality) are. We can use one of
1488+
* those if we find it, but only for list partitioning.
14911489
*/
1492-
if (!op_in_opfamily(opclause->opno, partopfamily))
1490+
if (op_in_opfamily(opclause->opno, partopfamily))
14931491
{
1492+
Oid oper;
1493+
1494+
oper = OidIsValid(commutator) ? commutator : opclause->opno;
1495+
get_op_opfamily_properties(oper, partopfamily, false,
1496+
&op_strategy, &op_lefttype,
1497+
&op_righttype);
1498+
}
1499+
else
1500+
{
1501+
/* Not good unless list partitioning */
14941502
if (part_scheme->strategy != PARTITION_STRATEGY_LIST)
14951503
return PARTCLAUSE_UNSUPPORTED;
14961504

1497-
/*
1498-
* To confirm if the operator is really '<>', check if its negator
1499-
* is a btree equality operator.
1500-
*/
1505+
/* See if the negator is equality */
15011506
negator = get_negator(opclause->opno);
15021507
if (OidIsValid(negator) && op_in_opfamily(negator, partopfamily))
15031508
{
1504-
Oid lefttype;
1505-
Oid righttype;
1506-
int strategy;
1507-
15081509
get_op_opfamily_properties(negator, partopfamily, false,
1509-
&strategy, &lefttype, &righttype);
1510-
1511-
if (strategy == BTEqualStrategyNumber)
1512-
is_opne_listp = true;
1510+
&op_strategy, &op_lefttype,
1511+
&op_righttype);
1512+
if (op_strategy == BTEqualStrategyNumber)
1513+
is_opne_listp = true; /* bingo */
15131514
}
15141515

15151516
/* Operator isn't really what we were hoping it'd be. */
15161517
if (!is_opne_listp)
15171518
return PARTCLAUSE_UNSUPPORTED;
15181519
}
15191520

1520-
/* Check if we're going to need a cross-type comparison function. */
1521-
exprtype = exprType((Node *) expr);
1522-
if (exprtype != part_scheme->partopcintype[partkeyidx])
1521+
/*
1522+
* Now find the procedure to use, based on the types. If the clause's
1523+
* other argument is of the same type as the partitioning opclass's
1524+
* declared input type, we can use the procedure cached in
1525+
* PartitionKey. If not, search for a cross-type one in the same
1526+
* opfamily; if one doesn't exist, give up on pruning for this clause.
1527+
*/
1528+
if (op_righttype == part_scheme->partopcintype[partkeyidx])
1529+
cmpfn = part_scheme->partsupfunc[partkeyidx].fn_oid;
1530+
else
15231531
{
15241532
switch (part_scheme->strategy)
15251533
{
1534+
/*
1535+
* For range and list partitioning, we need the ordering
1536+
* procedure with lefttype being the partition key's type, and
1537+
* righttype the clause's operator's right type.
1538+
*/
15261539
case PARTITION_STRATEGY_LIST:
15271540
case PARTITION_STRATEGY_RANGE:
15281541
cmpfn =
15291542
get_opfamily_proc(part_scheme->partopfamily[partkeyidx],
15301543
part_scheme->partopcintype[partkeyidx],
1531-
exprtype, BTORDER_PROC);
1544+
op_righttype, BTORDER_PROC);
15321545
break;
15331546

1547+
/*
1548+
* For hash partitioning, we need the hashing procedure for
1549+
* the clause's type.
1550+
*/
15341551
case PARTITION_STRATEGY_HASH:
15351552
cmpfn =
15361553
get_opfamily_proc(part_scheme->partopfamily[partkeyidx],
1537-
exprtype, exprtype, HASHEXTENDED_PROC);
1554+
op_righttype, op_righttype,
1555+
HASHEXTENDED_PROC);
15381556
break;
15391557

15401558
default:
@@ -1547,34 +1565,26 @@ match_clause_to_partition_key(RelOptInfo *rel,
15471565
if (!OidIsValid(cmpfn))
15481566
return PARTCLAUSE_UNSUPPORTED;
15491567
}
1550-
else
1551-
cmpfn = part_scheme->partsupfunc[partkeyidx].fn_oid;
15521568

1569+
/*
1570+
* Build the clause, passing the negator or commutator if applicable.
1571+
*/
15531572
partclause = (PartClauseInfo *) palloc(sizeof(PartClauseInfo));
15541573
partclause->keyno = partkeyidx;
1555-
1556-
/* For <> operator clauses, pass on the negator. */
1557-
partclause->op_is_ne = false;
1558-
partclause->op_strategy = InvalidStrategy;
1559-
15601574
if (is_opne_listp)
15611575
{
15621576
Assert(OidIsValid(negator));
15631577
partclause->opno = negator;
15641578
partclause->op_is_ne = true;
1565-
1566-
/*
1567-
* We already know the strategy in this case, so may as well set
1568-
* it rather than having to look it up later.
1569-
*/
1570-
partclause->op_strategy = BTEqualStrategyNumber;
1579+
partclause->op_strategy = InvalidStrategy;
15711580
}
1572-
/* And if commuted before matching, pass on the commutator */
1573-
else if (OidIsValid(commutator))
1574-
partclause->opno = commutator;
15751581
else
1576-
partclause->opno = opclause->opno;
1577-
1582+
{
1583+
partclause->opno = OidIsValid(commutator) ?
1584+
commutator : opclause->opno;
1585+
partclause->op_is_ne = false;
1586+
partclause->op_strategy = op_strategy;
1587+
}
15781588
partclause->expr = expr;
15791589
partclause->cmpfn = cmpfn;
15801590

src/test/regress/expected/partition_prune.out

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2599,3 +2599,141 @@ select * from boolp where a = (select value from boolvalues where not value);
25992599

26002600
drop table boolp;
26012601
reset enable_indexonlyscan;
2602+
--
2603+
-- check that pruning works properly when the partition key is of a
2604+
-- pseudotype
2605+
--
2606+
-- array type list partition key
2607+
create table pp_arrpart (a int[]) partition by list (a);
2608+
create table pp_arrpart1 partition of pp_arrpart for values in ('{1}');
2609+
create table pp_arrpart2 partition of pp_arrpart for values in ('{2, 3}', '{4, 5}');
2610+
explain (costs off) select * from pp_arrpart where a = '{1}';
2611+
QUERY PLAN
2612+
----------------------------------------
2613+
Append
2614+
-> Seq Scan on pp_arrpart1
2615+
Filter: (a = '{1}'::integer[])
2616+
(3 rows)
2617+
2618+
explain (costs off) select * from pp_arrpart where a = '{1, 2}';
2619+
QUERY PLAN
2620+
--------------------------
2621+
Result
2622+
One-Time Filter: false
2623+
(2 rows)
2624+
2625+
explain (costs off) select * from pp_arrpart where a in ('{4, 5}', '{1}');
2626+
QUERY PLAN
2627+
----------------------------------------------------------------------
2628+
Append
2629+
-> Seq Scan on pp_arrpart1
2630+
Filter: ((a = '{4,5}'::integer[]) OR (a = '{1}'::integer[]))
2631+
-> Seq Scan on pp_arrpart2
2632+
Filter: ((a = '{4,5}'::integer[]) OR (a = '{1}'::integer[]))
2633+
(5 rows)
2634+
2635+
drop table pp_arrpart;
2636+
-- array type hash partition key
2637+
create table pph_arrpart (a int[]) partition by hash (a);
2638+
create table pph_arrpart1 partition of pph_arrpart for values with (modulus 2, remainder 0);
2639+
create table pph_arrpart2 partition of pph_arrpart for values with (modulus 2, remainder 1);
2640+
insert into pph_arrpart values ('{1}'), ('{1, 2}'), ('{4, 5}');
2641+
select tableoid::regclass, * from pph_arrpart order by 1;
2642+
tableoid | a
2643+
--------------+-------
2644+
pph_arrpart1 | {1,2}
2645+
pph_arrpart1 | {4,5}
2646+
pph_arrpart2 | {1}
2647+
(3 rows)
2648+
2649+
explain (costs off) select * from pph_arrpart where a = '{1}';
2650+
QUERY PLAN
2651+
----------------------------------------
2652+
Append
2653+
-> Seq Scan on pph_arrpart2
2654+
Filter: (a = '{1}'::integer[])
2655+
(3 rows)
2656+
2657+
explain (costs off) select * from pph_arrpart where a = '{1, 2}';
2658+
QUERY PLAN
2659+
------------------------------------------
2660+
Append
2661+
-> Seq Scan on pph_arrpart1
2662+
Filter: (a = '{1,2}'::integer[])
2663+
(3 rows)
2664+
2665+
explain (costs off) select * from pph_arrpart where a in ('{4, 5}', '{1}');
2666+
QUERY PLAN
2667+
----------------------------------------------------------------------
2668+
Append
2669+
-> Seq Scan on pph_arrpart1
2670+
Filter: ((a = '{4,5}'::integer[]) OR (a = '{1}'::integer[]))
2671+
-> Seq Scan on pph_arrpart2
2672+
Filter: ((a = '{4,5}'::integer[]) OR (a = '{1}'::integer[]))
2673+
(5 rows)
2674+
2675+
drop table pph_arrpart;
2676+
-- enum type list partition key
2677+
create type pp_colors as enum ('green', 'blue', 'black');
2678+
create table pp_enumpart (a pp_colors) partition by list (a);
2679+
create table pp_enumpart_green partition of pp_enumpart for values in ('green');
2680+
create table pp_enumpart_blue partition of pp_enumpart for values in ('blue');
2681+
explain (costs off) select * from pp_enumpart where a = 'blue';
2682+
QUERY PLAN
2683+
-----------------------------------------
2684+
Append
2685+
-> Seq Scan on pp_enumpart_blue
2686+
Filter: (a = 'blue'::pp_colors)
2687+
(3 rows)
2688+
2689+
explain (costs off) select * from pp_enumpart where a = 'black';
2690+
QUERY PLAN
2691+
--------------------------
2692+
Result
2693+
One-Time Filter: false
2694+
(2 rows)
2695+
2696+
drop table pp_enumpart;
2697+
drop type pp_colors;
2698+
-- record type as partition key
2699+
create type pp_rectype as (a int, b int);
2700+
create table pp_recpart (a pp_rectype) partition by list (a);
2701+
create table pp_recpart_11 partition of pp_recpart for values in ('(1,1)');
2702+
create table pp_recpart_23 partition of pp_recpart for values in ('(2,3)');
2703+
explain (costs off) select * from pp_recpart where a = '(1,1)'::pp_rectype;
2704+
QUERY PLAN
2705+
-------------------------------------------
2706+
Append
2707+
-> Seq Scan on pp_recpart_11
2708+
Filter: (a = '(1,1)'::pp_rectype)
2709+
(3 rows)
2710+
2711+
explain (costs off) select * from pp_recpart where a = '(1,2)'::pp_rectype;
2712+
QUERY PLAN
2713+
--------------------------
2714+
Result
2715+
One-Time Filter: false
2716+
(2 rows)
2717+
2718+
drop table pp_recpart;
2719+
drop type pp_rectype;
2720+
-- range type partition key
2721+
create table pp_intrangepart (a int4range) partition by list (a);
2722+
create table pp_intrangepart12 partition of pp_intrangepart for values in ('[1,2]');
2723+
create table pp_intrangepart2inf partition of pp_intrangepart for values in ('[2,)');
2724+
explain (costs off) select * from pp_intrangepart where a = '[1,2]'::int4range;
2725+
QUERY PLAN
2726+
------------------------------------------
2727+
Append
2728+
-> Seq Scan on pp_intrangepart12
2729+
Filter: (a = '[1,3)'::int4range)
2730+
(3 rows)
2731+
2732+
explain (costs off) select * from pp_intrangepart where a = '(1,2)'::int4range;
2733+
QUERY PLAN
2734+
--------------------------
2735+
Result
2736+
One-Time Filter: false
2737+
(2 rows)
2738+
2739+
drop table pp_intrangepart;

src/test/regress/sql/partition_prune.sql

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -645,3 +645,56 @@ select * from boolp where a = (select value from boolvalues where not value);
645645
drop table boolp;
646646

647647
reset enable_indexonlyscan;
648+
649+
--
650+
-- check that pruning works properly when the partition key is of a
651+
-- pseudotype
652+
--
653+
654+
-- array type list partition key
655+
create table pp_arrpart (a int[]) partition by list (a);
656+
create table pp_arrpart1 partition of pp_arrpart for values in ('{1}');
657+
create table pp_arrpart2 partition of pp_arrpart for values in ('{2, 3}', '{4, 5}');
658+
explain (costs off) select * from pp_arrpart where a = '{1}';
659+
explain (costs off) select * from pp_arrpart where a = '{1, 2}';
660+
explain (costs off) select * from pp_arrpart where a in ('{4, 5}', '{1}');
661+
drop table pp_arrpart;
662+
663+
-- array type hash partition key
664+
create table pph_arrpart (a int[]) partition by hash (a);
665+
create table pph_arrpart1 partition of pph_arrpart for values with (modulus 2, remainder 0);
666+
create table pph_arrpart2 partition of pph_arrpart for values with (modulus 2, remainder 1);
667+
insert into pph_arrpart values ('{1}'), ('{1, 2}'), ('{4, 5}');
668+
select tableoid::regclass, * from pph_arrpart order by 1;
669+
explain (costs off) select * from pph_arrpart where a = '{1}';
670+
explain (costs off) select * from pph_arrpart where a = '{1, 2}';
671+
explain (costs off) select * from pph_arrpart where a in ('{4, 5}', '{1}');
672+
drop table pph_arrpart;
673+
674+
-- enum type list partition key
675+
create type pp_colors as enum ('green', 'blue', 'black');
676+
create table pp_enumpart (a pp_colors) partition by list (a);
677+
create table pp_enumpart_green partition of pp_enumpart for values in ('green');
678+
create table pp_enumpart_blue partition of pp_enumpart for values in ('blue');
679+
explain (costs off) select * from pp_enumpart where a = 'blue';
680+
explain (costs off) select * from pp_enumpart where a = 'black';
681+
drop table pp_enumpart;
682+
drop type pp_colors;
683+
684+
-- record type as partition key
685+
create type pp_rectype as (a int, b int);
686+
create table pp_recpart (a pp_rectype) partition by list (a);
687+
create table pp_recpart_11 partition of pp_recpart for values in ('(1,1)');
688+
create table pp_recpart_23 partition of pp_recpart for values in ('(2,3)');
689+
explain (costs off) select * from pp_recpart where a = '(1,1)'::pp_rectype;
690+
explain (costs off) select * from pp_recpart where a = '(1,2)'::pp_rectype;
691+
drop table pp_recpart;
692+
drop type pp_rectype;
693+
694+
-- range type partition key
695+
create table pp_intrangepart (a int4range) partition by list (a);
696+
create table pp_intrangepart12 partition of pp_intrangepart for values in ('[1,2]');
697+
create table pp_intrangepart2inf partition of pp_intrangepart for values in ('[2,)');
698+
explain (costs off) select * from pp_intrangepart where a = '[1,2]'::int4range;
699+
explain (costs off) select * from pp_intrangepart where a = '(1,2)'::int4range;
700+
drop table pp_intrangepart;

0 commit comments

Comments
 (0)