Skip to content

Commit 1383874

Browse files
author
Etsuro Fujita
committed
Fix some issues with step generation in partition pruning.
In the case of range partitioning, get_steps_using_prefix() assumes that the passed-in prefix list contains at least one clause for each of the partition keys earlier than one specified in the passed-in step_lastkeyno, but the caller (ie, gen_prune_steps_from_opexps()) didn't take it into account, which led to a server crash or incorrect results when the list contained no clauses for such partition keys, as reported in bug #16500 and #16501 from Kobayashi Hisanori. Update the caller to call that function only when the list created there contains at least one clause for each of the earlier partition keys in the case of range partitioning. While at it, fix some other issues: * The list to pass to get_steps_using_prefix() is allowed to contain multiple clauses for the same partition key, as described in the comment for that function, but that function actually assumed that the list contained just a single clause for each of middle partition keys, which led to an assertion failure when the list contained multiple clauses for such partition keys. Update that function to match the comment. * In the case of hash partitioning, partition keys are allowed to be NULL, in which case the list to pass to get_steps_using_prefix() contains no clauses for NULL partition keys, but that function treats that case as like the case of range partitioning, which led to the assertion failure. Update the assertion test to take into account NULL partition keys in the case of hash partitioning. * Fix a typo in a comment in get_steps_using_prefix_recurse(). * gen_partprune_steps() failed to detect self-contradiction from strict-qual clauses and an IS NULL clause for the same partition key in some cases, producing incorrect partition-pruning steps, which led to incorrect results of partition pruning, but didn't cause any user-visible problems fortunately, as the self-contradiction is detected later in the query planning. Update that function to detect the self-contradiction. Per bug #16500 and #16501 from Kobayashi Hisanori. Patch by me, initial diagnosis for the reported issue and review by Dmitry Dolgov. Back-patch to v11, where partition pruning was introduced. Discussion: https://postgr.es/m/16500-d1613f2a78e1e090%40postgresql.org Discussion: https://postgr.es/m/16501-5234a9a0394f6754%40postgresql.org
1 parent bcbf944 commit 1383874

File tree

3 files changed

+294
-56
lines changed

3 files changed

+294
-56
lines changed

src/backend/partitioning/partprune.c

+131-56
Original file line numberDiff line numberDiff line change
@@ -1058,8 +1058,12 @@ gen_partprune_steps_internal(GeneratePruningStepsContext *context,
10581058
case PARTCLAUSE_MATCH_NULLNESS:
10591059
if (!clause_is_not_null)
10601060
{
1061-
/* check for conflicting IS NOT NULL */
1062-
if (bms_is_member(i, notnullkeys))
1061+
/*
1062+
* check for conflicting IS NOT NULL as well as
1063+
* contradicting strict clauses
1064+
*/
1065+
if (bms_is_member(i, notnullkeys) ||
1066+
keyclauses[i] != NIL)
10631067
{
10641068
context->contradictory = true;
10651069
return NIL;
@@ -1308,34 +1312,18 @@ gen_prune_steps_from_opexps(GeneratePruningStepsContext *context,
13081312
{
13091313
case PARTITION_STRATEGY_LIST:
13101314
case PARTITION_STRATEGY_RANGE:
1311-
{
1312-
PartClauseInfo *last = NULL;
1313-
1314-
/*
1315-
* Add this clause to the list of clauses to be used
1316-
* for pruning if this is the first such key for this
1317-
* operator strategy or if it is consecutively next to
1318-
* the last column for which a clause with this
1319-
* operator strategy was matched.
1320-
*/
1321-
if (btree_clauses[pc->op_strategy] != NIL)
1322-
last = llast(btree_clauses[pc->op_strategy]);
1323-
1324-
if (last == NULL ||
1325-
i == last->keyno || i == last->keyno + 1)
1326-
btree_clauses[pc->op_strategy] =
1327-
lappend(btree_clauses[pc->op_strategy], pc);
1315+
btree_clauses[pc->op_strategy] =
1316+
lappend(btree_clauses[pc->op_strategy], pc);
13281317

1329-
/*
1330-
* We can't consider subsequent partition keys if the
1331-
* clause for the current key contains a non-inclusive
1332-
* operator.
1333-
*/
1334-
if (pc->op_strategy == BTLessStrategyNumber ||
1335-
pc->op_strategy == BTGreaterStrategyNumber)
1336-
consider_next_key = false;
1337-
break;
1338-
}
1318+
/*
1319+
* We can't consider subsequent partition keys if the
1320+
* clause for the current key contains a non-inclusive
1321+
* operator.
1322+
*/
1323+
if (pc->op_strategy == BTLessStrategyNumber ||
1324+
pc->op_strategy == BTGreaterStrategyNumber)
1325+
consider_next_key = false;
1326+
break;
13391327

13401328
case PARTITION_STRATEGY_HASH:
13411329
if (pc->op_strategy != HTEqualStrategyNumber)
@@ -1374,6 +1362,7 @@ gen_prune_steps_from_opexps(GeneratePruningStepsContext *context,
13741362
List *eq_clauses = btree_clauses[BTEqualStrategyNumber];
13751363
List *le_clauses = btree_clauses[BTLessEqualStrategyNumber];
13761364
List *ge_clauses = btree_clauses[BTGreaterEqualStrategyNumber];
1365+
bool pk_has_clauses[PARTITION_MAX_KEYS];
13771366
int strat;
13781367

13791368
/*
@@ -1396,6 +1385,35 @@ gen_prune_steps_from_opexps(GeneratePruningStepsContext *context,
13961385
ListCell *lc1;
13971386
List *prefix = NIL;
13981387
List *pc_steps;
1388+
bool prefix_valid = true;
1389+
1390+
/*
1391+
* If this is a clause for the first partition key,
1392+
* there are no preceding expressions; generate a
1393+
* pruning step without a prefix.
1394+
*
1395+
* Note that we pass NULL for step_nullkeys, because
1396+
* we don't search list/range partition bounds where
1397+
* some keys are NULL.
1398+
*/
1399+
if (pc->keyno == 0)
1400+
{
1401+
Assert(pc->op_strategy == strat);
1402+
pc_steps = get_steps_using_prefix(context, strat,
1403+
pc->op_is_ne,
1404+
pc->expr,
1405+
pc->cmpfn,
1406+
0,
1407+
NULL,
1408+
NIL);
1409+
opsteps = list_concat(opsteps, pc_steps);
1410+
continue;
1411+
}
1412+
1413+
/* (Re-)initialize the pk_has_clauses array */
1414+
Assert(pc->keyno > 0);
1415+
for (i = 0; i < pc->keyno; i++)
1416+
pk_has_clauses[i] = false;
13991417

14001418
/*
14011419
* Expressions from = clauses can always be in the
@@ -1408,7 +1426,10 @@ gen_prune_steps_from_opexps(GeneratePruningStepsContext *context,
14081426
if (eqpc->keyno == pc->keyno)
14091427
break;
14101428
if (eqpc->keyno < pc->keyno)
1429+
{
14111430
prefix = lappend(prefix, eqpc);
1431+
pk_has_clauses[eqpc->keyno] = true;
1432+
}
14121433
}
14131434

14141435
/*
@@ -1426,7 +1447,10 @@ gen_prune_steps_from_opexps(GeneratePruningStepsContext *context,
14261447
if (lepc->keyno == pc->keyno)
14271448
break;
14281449
if (lepc->keyno < pc->keyno)
1450+
{
14291451
prefix = lappend(prefix, lepc);
1452+
pk_has_clauses[lepc->keyno] = true;
1453+
}
14301454
}
14311455
}
14321456

@@ -1445,11 +1469,33 @@ gen_prune_steps_from_opexps(GeneratePruningStepsContext *context,
14451469
if (gepc->keyno == pc->keyno)
14461470
break;
14471471
if (gepc->keyno < pc->keyno)
1472+
{
14481473
prefix = lappend(prefix, gepc);
1474+
pk_has_clauses[gepc->keyno] = true;
1475+
}
14491476
}
14501477
}
14511478

14521479
/*
1480+
* Check whether every earlier partition key has at
1481+
* least one clause.
1482+
*/
1483+
for (i = 0; i < pc->keyno; i++)
1484+
{
1485+
if (!pk_has_clauses[i])
1486+
{
1487+
prefix_valid = false;
1488+
break;
1489+
}
1490+
}
1491+
1492+
/*
1493+
* If prefix_valid, generate PartitionPruneStepOps.
1494+
* Otherwise, we would not find clauses for a valid
1495+
* subset of the partition keys anymore for the
1496+
* strategy; give up on generating partition pruning
1497+
* steps further for the strategy.
1498+
*
14531499
* As mentioned above, if 'prefix' contains multiple
14541500
* expressions for the same key, the following will
14551501
* generate multiple steps, one for each combination
@@ -1459,15 +1505,20 @@ gen_prune_steps_from_opexps(GeneratePruningStepsContext *context,
14591505
* we don't search list/range partition bounds where
14601506
* some keys are NULL.
14611507
*/
1462-
Assert(pc->op_strategy == strat);
1463-
pc_steps = get_steps_using_prefix(context, strat,
1464-
pc->op_is_ne,
1465-
pc->expr,
1466-
pc->cmpfn,
1467-
pc->keyno,
1468-
NULL,
1469-
prefix);
1470-
opsteps = list_concat(opsteps, pc_steps);
1508+
if (prefix_valid)
1509+
{
1510+
Assert(pc->op_strategy == strat);
1511+
pc_steps = get_steps_using_prefix(context, strat,
1512+
pc->op_is_ne,
1513+
pc->expr,
1514+
pc->cmpfn,
1515+
pc->keyno,
1516+
NULL,
1517+
prefix);
1518+
opsteps = list_concat(opsteps, pc_steps);
1519+
}
1520+
else
1521+
break;
14711522
}
14721523
}
14731524
break;
@@ -2182,6 +2233,14 @@ match_clause_to_partition_key(GeneratePruningStepsContext *context,
21822233
* 'prefix'. Actually, since 'prefix' may contain multiple clauses for the
21832234
* same partition key column, we must generate steps for various combinations
21842235
* of the clauses of different keys.
2236+
*
2237+
* For list/range partitioning, callers must ensure that step_nullkeys is
2238+
* NULL, and that prefix contains at least one clause for each of the
2239+
* partition keys earlier than one specified in step_lastkeyno if it's
2240+
* greater than zero. For hash partitioning, step_nullkeys is allowed to be
2241+
* non-NULL, but they must ensure that prefix contains at least one clause
2242+
* for each of the partition keys other than those specified in step_nullkeys
2243+
* and step_lastkeyno.
21852244
*/
21862245
static List *
21872246
get_steps_using_prefix(GeneratePruningStepsContext *context,
@@ -2193,6 +2252,9 @@ get_steps_using_prefix(GeneratePruningStepsContext *context,
21932252
Bitmapset *step_nullkeys,
21942253
List *prefix)
21952254
{
2255+
Assert(step_nullkeys == NULL ||
2256+
context->rel->part_scheme->strategy == PARTITION_STRATEGY_HASH);
2257+
21962258
/* Quick exit if there are no values to prefix with. */
21972259
if (list_length(prefix) == 0)
21982260
{
@@ -2261,7 +2323,7 @@ get_steps_using_prefix_recurse(GeneratePruningStepsContext *context,
22612323
ListCell *next_start;
22622324

22632325
/*
2264-
* For each clause with cur_keyno, adds its expr and cmpfn to
2326+
* For each clause with cur_keyno, add its expr and cmpfn to
22652327
* step_exprs and step_cmpfns, respectively, and recurse after setting
22662328
* next_start to the ListCell of the first clause for the next
22672329
* partition key.
@@ -2278,23 +2340,19 @@ get_steps_using_prefix_recurse(GeneratePruningStepsContext *context,
22782340
for_each_cell(lc, prefix, start)
22792341
{
22802342
List *moresteps;
2343+
List *step_exprs1,
2344+
*step_cmpfns1;
22812345

22822346
pc = lfirst(lc);
22832347
if (pc->keyno == cur_keyno)
22842348
{
2285-
/* clean up before starting a new recursion cycle. */
2286-
if (cur_keyno == 0)
2287-
{
2288-
list_free(step_exprs);
2289-
list_free(step_cmpfns);
2290-
step_exprs = list_make1(pc->expr);
2291-
step_cmpfns = list_make1_oid(pc->cmpfn);
2292-
}
2293-
else
2294-
{
2295-
step_exprs = lappend(step_exprs, pc->expr);
2296-
step_cmpfns = lappend_oid(step_cmpfns, pc->cmpfn);
2297-
}
2349+
/* Leave the original step_exprs unmodified. */
2350+
step_exprs1 = list_copy(step_exprs);
2351+
step_exprs1 = lappend(step_exprs1, pc->expr);
2352+
2353+
/* Leave the original step_cmpfns unmodified. */
2354+
step_cmpfns1 = list_copy(step_cmpfns);
2355+
step_cmpfns1 = lappend_oid(step_cmpfns1, pc->cmpfn);
22982356
}
22992357
else
23002358
{
@@ -2311,19 +2369,36 @@ get_steps_using_prefix_recurse(GeneratePruningStepsContext *context,
23112369
step_nullkeys,
23122370
prefix,
23132371
next_start,
2314-
step_exprs,
2315-
step_cmpfns);
2372+
step_exprs1,
2373+
step_cmpfns1);
23162374
result = list_concat(result, moresteps);
2375+
2376+
list_free(step_exprs1);
2377+
list_free(step_cmpfns1);
23172378
}
23182379
}
23192380
else
23202381
{
23212382
/*
23222383
* End the current recursion cycle and start generating steps, one for
23232384
* each clause with cur_keyno, which is all clauses from here onward
2324-
* till the end of the list.
2385+
* till the end of the list. Note that for hash partitioning,
2386+
* step_nullkeys is allowed to be non-empty, in which case step_exprs
2387+
* would only contain expressions for the earlier partition keys that
2388+
* are not specified in step_nullkeys.
2389+
*/
2390+
Assert(list_length(step_exprs) == cur_keyno ||
2391+
!bms_is_empty(step_nullkeys));
2392+
/*
2393+
* Note also that for hash partitioning, each partition key should
2394+
* have either equality clauses or an IS NULL clause, so if a
2395+
* partition key doesn't have an expression, it would be specified
2396+
* in step_nullkeys.
23252397
*/
2326-
Assert(list_length(step_exprs) == cur_keyno);
2398+
Assert(context->rel->part_scheme->strategy
2399+
!= PARTITION_STRATEGY_HASH ||
2400+
list_length(step_exprs) + 2 + bms_num_members(step_nullkeys) ==
2401+
context->rel->part_scheme->partnatts);
23272402
for_each_cell(lc, prefix, start)
23282403
{
23292404
PartClauseInfo *pc = lfirst(lc);

src/test/regress/expected/partition_prune.out

+92
Original file line numberDiff line numberDiff line change
@@ -3671,3 +3671,95 @@ explain (costs off) update listp1 set a = 1 where a = 2;
36713671
reset constraint_exclusion;
36723672
reset enable_partition_pruning;
36733673
drop table listp;
3674+
--
3675+
-- Check that gen_prune_steps_from_opexps() works well for various cases of
3676+
-- clauses for different partition keys
3677+
--
3678+
create table rp_prefix_test1 (a int, b varchar) partition by range(a, b);
3679+
create table rp_prefix_test1_p1 partition of rp_prefix_test1 for values from (1, 'a') to (1, 'b');
3680+
create table rp_prefix_test1_p2 partition of rp_prefix_test1 for values from (2, 'a') to (2, 'b');
3681+
-- Don't call get_steps_using_prefix() with the last partition key b plus
3682+
-- an empty prefix
3683+
explain (costs off) select * from rp_prefix_test1 where a <= 1 and b = 'a';
3684+
QUERY PLAN
3685+
--------------------------------------------------
3686+
Seq Scan on rp_prefix_test1_p1 rp_prefix_test1
3687+
Filter: ((a <= 1) AND ((b)::text = 'a'::text))
3688+
(2 rows)
3689+
3690+
create table rp_prefix_test2 (a int, b int, c int) partition by range(a, b, c);
3691+
create table rp_prefix_test2_p1 partition of rp_prefix_test2 for values from (1, 1, 0) to (1, 1, 10);
3692+
create table rp_prefix_test2_p2 partition of rp_prefix_test2 for values from (2, 2, 0) to (2, 2, 10);
3693+
-- Don't call get_steps_using_prefix() with the last partition key c plus
3694+
-- an invalid prefix (ie, b = 1)
3695+
explain (costs off) select * from rp_prefix_test2 where a <= 1 and b = 1 and c >= 0;
3696+
QUERY PLAN
3697+
------------------------------------------------
3698+
Seq Scan on rp_prefix_test2_p1 rp_prefix_test2
3699+
Filter: ((a <= 1) AND (c >= 0) AND (b = 1))
3700+
(2 rows)
3701+
3702+
create table rp_prefix_test3 (a int, b int, c int, d int) partition by range(a, b, c, d);
3703+
create table rp_prefix_test3_p1 partition of rp_prefix_test3 for values from (1, 1, 1, 0) to (1, 1, 1, 10);
3704+
create table rp_prefix_test3_p2 partition of rp_prefix_test3 for values from (2, 2, 2, 0) to (2, 2, 2, 10);
3705+
-- Test that get_steps_using_prefix() handles a prefix that contains multiple
3706+
-- clauses for the partition key b (ie, b >= 1 and b >= 2)
3707+
explain (costs off) select * from rp_prefix_test3 where a >= 1 and b >= 1 and b >= 2 and c >= 2 and d >= 0;
3708+
QUERY PLAN
3709+
--------------------------------------------------------------------------
3710+
Seq Scan on rp_prefix_test3_p2 rp_prefix_test3
3711+
Filter: ((a >= 1) AND (b >= 1) AND (b >= 2) AND (c >= 2) AND (d >= 0))
3712+
(2 rows)
3713+
3714+
create table hp_prefix_test (a int, b int, c int, d int) partition by hash (a part_test_int4_ops, b part_test_int4_ops, c part_test_int4_ops, d part_test_int4_ops);
3715+
create table hp_prefix_test_p1 partition of hp_prefix_test for values with (modulus 2, remainder 0);
3716+
create table hp_prefix_test_p2 partition of hp_prefix_test for values with (modulus 2, remainder 1);
3717+
-- Test that get_steps_using_prefix() handles non-NULL step_nullkeys
3718+
explain (costs off) select * from hp_prefix_test where a = 1 and b is null and c = 1 and d = 1;
3719+
QUERY PLAN
3720+
-------------------------------------------------------------
3721+
Seq Scan on hp_prefix_test_p1 hp_prefix_test
3722+
Filter: ((b IS NULL) AND (a = 1) AND (c = 1) AND (d = 1))
3723+
(2 rows)
3724+
3725+
drop table rp_prefix_test1;
3726+
drop table rp_prefix_test2;
3727+
drop table rp_prefix_test3;
3728+
drop table hp_prefix_test;
3729+
--
3730+
-- Check that gen_partprune_steps() detects self-contradiction from clauses
3731+
-- regardless of the order of the clauses (Here we use a custom operator to
3732+
-- prevent the equivclass.c machinery from reordering the clauses)
3733+
--
3734+
create operator === (
3735+
leftarg = int4,
3736+
rightarg = int4,
3737+
procedure = int4eq,
3738+
commutator = ===,
3739+
hashes
3740+
);
3741+
create operator class part_test_int4_ops2
3742+
for type int4
3743+
using hash as
3744+
operator 1 ===,
3745+
function 2 part_hashint4_noop(int4, int8);
3746+
create table hp_contradict_test (a int, b int) partition by hash (a part_test_int4_ops2, b part_test_int4_ops2);
3747+
create table hp_contradict_test_p1 partition of hp_contradict_test for values with (modulus 2, remainder 0);
3748+
create table hp_contradict_test_p2 partition of hp_contradict_test for values with (modulus 2, remainder 1);
3749+
explain (costs off) select * from hp_contradict_test where a is null and a === 1 and b === 1;
3750+
QUERY PLAN
3751+
--------------------------
3752+
Result
3753+
One-Time Filter: false
3754+
(2 rows)
3755+
3756+
explain (costs off) select * from hp_contradict_test where a === 1 and b === 1 and a is null;
3757+
QUERY PLAN
3758+
--------------------------
3759+
Result
3760+
One-Time Filter: false
3761+
(2 rows)
3762+
3763+
drop table hp_contradict_test;
3764+
drop operator class part_test_int4_ops2 using hash;
3765+
drop operator ===(int4, int4);

0 commit comments

Comments
 (0)