Skip to content

Commit 62c4a77

Browse files
author
Etsuro Fujita
committed
Fix some issues with step generation in partition pruning.
In the case of range partitioning, get_steps_using_prefix() assumes that the passed-in prefix list contains at least one clause for each of the partition keys earlier than one specified in the passed-in step_lastkeyno, but the caller (ie, gen_prune_steps_from_opexps()) didn't take it into account, which led to a server crash or incorrect results when the list contained no clauses for such partition keys, as reported in bug #16500 and #16501 from Kobayashi Hisanori. Update the caller to call that function only when the list created there contains at least one clause for each of the earlier partition keys in the case of range partitioning. While at it, fix some other issues: * The list to pass to get_steps_using_prefix() is allowed to contain multiple clauses for the same partition key, as described in the comment for that function, but that function actually assumed that the list contained just a single clause for each of middle partition keys, which led to an assertion failure when the list contained multiple clauses for such partition keys. Update that function to match the comment. * In the case of hash partitioning, partition keys are allowed to be NULL, in which case the list to pass to get_steps_using_prefix() contains no clauses for NULL partition keys, but that function treats that case as like the case of range partitioning, which led to the assertion failure. Update the assertion test to take into account NULL partition keys in the case of hash partitioning. * Fix a typo in a comment in get_steps_using_prefix_recurse(). * gen_partprune_steps() failed to detect self-contradiction from strict-qual clauses and an IS NULL clause for the same partition key in some cases, producing incorrect partition-pruning steps, which led to incorrect results of partition pruning, but didn't cause any user-visible problems fortunately, as the self-contradiction is detected later in the query planning. Update that function to detect the self-contradiction. Per bug #16500 and #16501 from Kobayashi Hisanori. Patch by me, initial diagnosis for the reported issue and review by Dmitry Dolgov. Back-patch to v11, where partition pruning was introduced. Discussion: https://postgr.es/m/16500-d1613f2a78e1e090%40postgresql.org Discussion: https://postgr.es/m/16501-5234a9a0394f6754%40postgresql.org
1 parent 5bd087e commit 62c4a77

File tree

3 files changed

+294
-56
lines changed

3 files changed

+294
-56
lines changed

src/backend/partitioning/partprune.c

Lines changed: 131 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -1071,8 +1071,12 @@ gen_partprune_steps_internal(GeneratePruningStepsContext *context,
10711071
case PARTCLAUSE_MATCH_NULLNESS:
10721072
if (!clause_is_not_null)
10731073
{
1074-
/* check for conflicting IS NOT NULL */
1075-
if (bms_is_member(i, notnullkeys))
1074+
/*
1075+
* check for conflicting IS NOT NULL as well as
1076+
* contradicting strict clauses
1077+
*/
1078+
if (bms_is_member(i, notnullkeys) ||
1079+
keyclauses[i] != NIL)
10761080
{
10771081
context->contradictory = true;
10781082
return NIL;
@@ -1321,34 +1325,18 @@ gen_prune_steps_from_opexps(GeneratePruningStepsContext *context,
13211325
{
13221326
case PARTITION_STRATEGY_LIST:
13231327
case PARTITION_STRATEGY_RANGE:
1324-
{
1325-
PartClauseInfo *last = NULL;
1326-
1327-
/*
1328-
* Add this clause to the list of clauses to be used
1329-
* for pruning if this is the first such key for this
1330-
* operator strategy or if it is consecutively next to
1331-
* the last column for which a clause with this
1332-
* operator strategy was matched.
1333-
*/
1334-
if (btree_clauses[pc->op_strategy] != NIL)
1335-
last = llast(btree_clauses[pc->op_strategy]);
1336-
1337-
if (last == NULL ||
1338-
i == last->keyno || i == last->keyno + 1)
1339-
btree_clauses[pc->op_strategy] =
1340-
lappend(btree_clauses[pc->op_strategy], pc);
1328+
btree_clauses[pc->op_strategy] =
1329+
lappend(btree_clauses[pc->op_strategy], pc);
13411330

1342-
/*
1343-
* We can't consider subsequent partition keys if the
1344-
* clause for the current key contains a non-inclusive
1345-
* operator.
1346-
*/
1347-
if (pc->op_strategy == BTLessStrategyNumber ||
1348-
pc->op_strategy == BTGreaterStrategyNumber)
1349-
consider_next_key = false;
1350-
break;
1351-
}
1331+
/*
1332+
* We can't consider subsequent partition keys if the
1333+
* clause for the current key contains a non-inclusive
1334+
* operator.
1335+
*/
1336+
if (pc->op_strategy == BTLessStrategyNumber ||
1337+
pc->op_strategy == BTGreaterStrategyNumber)
1338+
consider_next_key = false;
1339+
break;
13521340

13531341
case PARTITION_STRATEGY_HASH:
13541342
if (pc->op_strategy != HTEqualStrategyNumber)
@@ -1387,6 +1375,7 @@ gen_prune_steps_from_opexps(GeneratePruningStepsContext *context,
13871375
List *eq_clauses = btree_clauses[BTEqualStrategyNumber];
13881376
List *le_clauses = btree_clauses[BTLessEqualStrategyNumber];
13891377
List *ge_clauses = btree_clauses[BTGreaterEqualStrategyNumber];
1378+
bool pk_has_clauses[PARTITION_MAX_KEYS];
13901379
int strat;
13911380

13921381
/*
@@ -1409,6 +1398,35 @@ gen_prune_steps_from_opexps(GeneratePruningStepsContext *context,
14091398
ListCell *lc1;
14101399
List *prefix = NIL;
14111400
List *pc_steps;
1401+
bool prefix_valid = true;
1402+
1403+
/*
1404+
* If this is a clause for the first partition key,
1405+
* there are no preceding expressions; generate a
1406+
* pruning step without a prefix.
1407+
*
1408+
* Note that we pass NULL for step_nullkeys, because
1409+
* we don't search list/range partition bounds where
1410+
* some keys are NULL.
1411+
*/
1412+
if (pc->keyno == 0)
1413+
{
1414+
Assert(pc->op_strategy == strat);
1415+
pc_steps = get_steps_using_prefix(context, strat,
1416+
pc->op_is_ne,
1417+
pc->expr,
1418+
pc->cmpfn,
1419+
0,
1420+
NULL,
1421+
NIL);
1422+
opsteps = list_concat(opsteps, pc_steps);
1423+
continue;
1424+
}
1425+
1426+
/* (Re-)initialize the pk_has_clauses array */
1427+
Assert(pc->keyno > 0);
1428+
for (i = 0; i < pc->keyno; i++)
1429+
pk_has_clauses[i] = false;
14121430

14131431
/*
14141432
* Expressions from = clauses can always be in the
@@ -1421,7 +1439,10 @@ gen_prune_steps_from_opexps(GeneratePruningStepsContext *context,
14211439
if (eqpc->keyno == pc->keyno)
14221440
break;
14231441
if (eqpc->keyno < pc->keyno)
1442+
{
14241443
prefix = lappend(prefix, eqpc);
1444+
pk_has_clauses[eqpc->keyno] = true;
1445+
}
14251446
}
14261447

14271448
/*
@@ -1439,7 +1460,10 @@ gen_prune_steps_from_opexps(GeneratePruningStepsContext *context,
14391460
if (lepc->keyno == pc->keyno)
14401461
break;
14411462
if (lepc->keyno < pc->keyno)
1463+
{
14421464
prefix = lappend(prefix, lepc);
1465+
pk_has_clauses[lepc->keyno] = true;
1466+
}
14431467
}
14441468
}
14451469

@@ -1458,11 +1482,33 @@ gen_prune_steps_from_opexps(GeneratePruningStepsContext *context,
14581482
if (gepc->keyno == pc->keyno)
14591483
break;
14601484
if (gepc->keyno < pc->keyno)
1485+
{
14611486
prefix = lappend(prefix, gepc);
1487+
pk_has_clauses[gepc->keyno] = true;
1488+
}
14621489
}
14631490
}
14641491

14651492
/*
1493+
* Check whether every earlier partition key has at
1494+
* least one clause.
1495+
*/
1496+
for (i = 0; i < pc->keyno; i++)
1497+
{
1498+
if (!pk_has_clauses[i])
1499+
{
1500+
prefix_valid = false;
1501+
break;
1502+
}
1503+
}
1504+
1505+
/*
1506+
* If prefix_valid, generate PartitionPruneStepOps.
1507+
* Otherwise, we would not find clauses for a valid
1508+
* subset of the partition keys anymore for the
1509+
* strategy; give up on generating partition pruning
1510+
* steps further for the strategy.
1511+
*
14661512
* As mentioned above, if 'prefix' contains multiple
14671513
* expressions for the same key, the following will
14681514
* generate multiple steps, one for each combination
@@ -1472,15 +1518,20 @@ gen_prune_steps_from_opexps(GeneratePruningStepsContext *context,
14721518
* we don't search list/range partition bounds where
14731519
* some keys are NULL.
14741520
*/
1475-
Assert(pc->op_strategy == strat);
1476-
pc_steps = get_steps_using_prefix(context, strat,
1477-
pc->op_is_ne,
1478-
pc->expr,
1479-
pc->cmpfn,
1480-
pc->keyno,
1481-
NULL,
1482-
prefix);
1483-
opsteps = list_concat(opsteps, list_copy(pc_steps));
1521+
if (prefix_valid)
1522+
{
1523+
Assert(pc->op_strategy == strat);
1524+
pc_steps = get_steps_using_prefix(context, strat,
1525+
pc->op_is_ne,
1526+
pc->expr,
1527+
pc->cmpfn,
1528+
pc->keyno,
1529+
NULL,
1530+
prefix);
1531+
opsteps = list_concat(opsteps, pc_steps);
1532+
}
1533+
else
1534+
break;
14841535
}
14851536
}
14861537
break;
@@ -2195,6 +2246,14 @@ match_clause_to_partition_key(GeneratePruningStepsContext *context,
21952246
* 'prefix'. Actually, since 'prefix' may contain multiple clauses for the
21962247
* same partition key column, we must generate steps for various combinations
21972248
* of the clauses of different keys.
2249+
*
2250+
* For list/range partitioning, callers must ensure that step_nullkeys is
2251+
* NULL, and that prefix contains at least one clause for each of the
2252+
* partition keys earlier than one specified in step_lastkeyno if it's
2253+
* greater than zero. For hash partitioning, step_nullkeys is allowed to be
2254+
* non-NULL, but they must ensure that prefix contains at least one clause
2255+
* for each of the partition keys other than those specified in step_nullkeys
2256+
* and step_lastkeyno.
21982257
*/
21992258
static List *
22002259
get_steps_using_prefix(GeneratePruningStepsContext *context,
@@ -2206,6 +2265,9 @@ get_steps_using_prefix(GeneratePruningStepsContext *context,
22062265
Bitmapset *step_nullkeys,
22072266
List *prefix)
22082267
{
2268+
Assert(step_nullkeys == NULL ||
2269+
context->rel->part_scheme->strategy == PARTITION_STRATEGY_HASH);
2270+
22092271
/* Quick exit if there are no values to prefix with. */
22102272
if (list_length(prefix) == 0)
22112273
{
@@ -2271,7 +2333,7 @@ get_steps_using_prefix_recurse(GeneratePruningStepsContext *context,
22712333
ListCell *next_start;
22722334

22732335
/*
2274-
* For each clause with cur_keyno, adds its expr and cmpfn to
2336+
* For each clause with cur_keyno, add its expr and cmpfn to
22752337
* step_exprs and step_cmpfns, respectively, and recurse after setting
22762338
* next_start to the ListCell of the first clause for the next
22772339
* partition key.
@@ -2288,23 +2350,19 @@ get_steps_using_prefix_recurse(GeneratePruningStepsContext *context,
22882350
for_each_cell(lc, start)
22892351
{
22902352
List *moresteps;
2353+
List *step_exprs1,
2354+
*step_cmpfns1;
22912355

22922356
pc = lfirst(lc);
22932357
if (pc->keyno == cur_keyno)
22942358
{
2295-
/* clean up before starting a new recursion cycle. */
2296-
if (cur_keyno == 0)
2297-
{
2298-
list_free(step_exprs);
2299-
list_free(step_cmpfns);
2300-
step_exprs = list_make1(pc->expr);
2301-
step_cmpfns = list_make1_oid(pc->cmpfn);
2302-
}
2303-
else
2304-
{
2305-
step_exprs = lappend(step_exprs, pc->expr);
2306-
step_cmpfns = lappend_oid(step_cmpfns, pc->cmpfn);
2307-
}
2359+
/* Leave the original step_exprs unmodified. */
2360+
step_exprs1 = list_copy(step_exprs);
2361+
step_exprs1 = lappend(step_exprs1, pc->expr);
2362+
2363+
/* Leave the original step_cmpfns unmodified. */
2364+
step_cmpfns1 = list_copy(step_cmpfns);
2365+
step_cmpfns1 = lappend_oid(step_cmpfns1, pc->cmpfn);
23082366
}
23092367
else
23102368
{
@@ -2320,19 +2378,36 @@ get_steps_using_prefix_recurse(GeneratePruningStepsContext *context,
23202378
step_lastkeyno,
23212379
step_nullkeys,
23222380
next_start,
2323-
step_exprs,
2324-
step_cmpfns);
2381+
step_exprs1,
2382+
step_cmpfns1);
23252383
result = list_concat(result, moresteps);
2384+
2385+
list_free(step_exprs1);
2386+
list_free(step_cmpfns1);
23262387
}
23272388
}
23282389
else
23292390
{
23302391
/*
23312392
* End the current recursion cycle and start generating steps, one for
23322393
* each clause with cur_keyno, which is all clauses from here onward
2333-
* till the end of the list.
2394+
* till the end of the list. Note that for hash partitioning,
2395+
* step_nullkeys is allowed to be non-empty, in which case step_exprs
2396+
* would only contain expressions for the earlier partition keys that
2397+
* are not specified in step_nullkeys.
23342398
*/
2335-
Assert(list_length(step_exprs) == cur_keyno);
2399+
Assert(list_length(step_exprs) == cur_keyno ||
2400+
!bms_is_empty(step_nullkeys));
2401+
/*
2402+
* Note also that for hash partitioning, each partition key should
2403+
* have either equality clauses or an IS NULL clause, so if a
2404+
* partition key doesn't have an expression, it would be specified
2405+
* in step_nullkeys.
2406+
*/
2407+
Assert(context->rel->part_scheme->strategy
2408+
!= PARTITION_STRATEGY_HASH ||
2409+
list_length(step_exprs) + 2 + bms_num_members(step_nullkeys) ==
2410+
context->rel->part_scheme->partnatts);
23362411
for_each_cell(lc, start)
23372412
{
23382413
PartClauseInfo *pc = lfirst(lc);

src/test/regress/expected/partition_prune.out

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3923,3 +3923,95 @@ explain (costs off) update listp1 set a = 1 where a = 2;
39233923
reset constraint_exclusion;
39243924
reset enable_partition_pruning;
39253925
drop table listp;
3926+
--
3927+
-- Check that gen_prune_steps_from_opexps() works well for various cases of
3928+
-- clauses for different partition keys
3929+
--
3930+
create table rp_prefix_test1 (a int, b varchar) partition by range(a, b);
3931+
create table rp_prefix_test1_p1 partition of rp_prefix_test1 for values from (1, 'a') to (1, 'b');
3932+
create table rp_prefix_test1_p2 partition of rp_prefix_test1 for values from (2, 'a') to (2, 'b');
3933+
-- Don't call get_steps_using_prefix() with the last partition key b plus
3934+
-- an empty prefix
3935+
explain (costs off) select * from rp_prefix_test1 where a <= 1 and b = 'a';
3936+
QUERY PLAN
3937+
--------------------------------------------------
3938+
Seq Scan on rp_prefix_test1_p1
3939+
Filter: ((a <= 1) AND ((b)::text = 'a'::text))
3940+
(2 rows)
3941+
3942+
create table rp_prefix_test2 (a int, b int, c int) partition by range(a, b, c);
3943+
create table rp_prefix_test2_p1 partition of rp_prefix_test2 for values from (1, 1, 0) to (1, 1, 10);
3944+
create table rp_prefix_test2_p2 partition of rp_prefix_test2 for values from (2, 2, 0) to (2, 2, 10);
3945+
-- Don't call get_steps_using_prefix() with the last partition key c plus
3946+
-- an invalid prefix (ie, b = 1)
3947+
explain (costs off) select * from rp_prefix_test2 where a <= 1 and b = 1 and c >= 0;
3948+
QUERY PLAN
3949+
-----------------------------------------------
3950+
Seq Scan on rp_prefix_test2_p1
3951+
Filter: ((a <= 1) AND (c >= 0) AND (b = 1))
3952+
(2 rows)
3953+
3954+
create table rp_prefix_test3 (a int, b int, c int, d int) partition by range(a, b, c, d);
3955+
create table rp_prefix_test3_p1 partition of rp_prefix_test3 for values from (1, 1, 1, 0) to (1, 1, 1, 10);
3956+
create table rp_prefix_test3_p2 partition of rp_prefix_test3 for values from (2, 2, 2, 0) to (2, 2, 2, 10);
3957+
-- Test that get_steps_using_prefix() handles a prefix that contains multiple
3958+
-- clauses for the partition key b (ie, b >= 1 and b >= 2)
3959+
explain (costs off) select * from rp_prefix_test3 where a >= 1 and b >= 1 and b >= 2 and c >= 2 and d >= 0;
3960+
QUERY PLAN
3961+
--------------------------------------------------------------------------
3962+
Seq Scan on rp_prefix_test3_p2
3963+
Filter: ((a >= 1) AND (b >= 1) AND (b >= 2) AND (c >= 2) AND (d >= 0))
3964+
(2 rows)
3965+
3966+
create table hp_prefix_test (a int, b int, c int, d int) partition by hash (a part_test_int4_ops, b part_test_int4_ops, c part_test_int4_ops, d part_test_int4_ops);
3967+
create table hp_prefix_test_p1 partition of hp_prefix_test for values with (modulus 2, remainder 0);
3968+
create table hp_prefix_test_p2 partition of hp_prefix_test for values with (modulus 2, remainder 1);
3969+
-- Test that get_steps_using_prefix() handles non-NULL step_nullkeys
3970+
explain (costs off) select * from hp_prefix_test where a = 1 and b is null and c = 1 and d = 1;
3971+
QUERY PLAN
3972+
-------------------------------------------------------------
3973+
Seq Scan on hp_prefix_test_p1
3974+
Filter: ((b IS NULL) AND (a = 1) AND (c = 1) AND (d = 1))
3975+
(2 rows)
3976+
3977+
drop table rp_prefix_test1;
3978+
drop table rp_prefix_test2;
3979+
drop table rp_prefix_test3;
3980+
drop table hp_prefix_test;
3981+
--
3982+
-- Check that gen_partprune_steps() detects self-contradiction from clauses
3983+
-- regardless of the order of the clauses (Here we use a custom operator to
3984+
-- prevent the equivclass.c machinery from reordering the clauses)
3985+
--
3986+
create operator === (
3987+
leftarg = int4,
3988+
rightarg = int4,
3989+
procedure = int4eq,
3990+
commutator = ===,
3991+
hashes
3992+
);
3993+
create operator class part_test_int4_ops2
3994+
for type int4
3995+
using hash as
3996+
operator 1 ===,
3997+
function 2 part_hashint4_noop(int4, int8);
3998+
create table hp_contradict_test (a int, b int) partition by hash (a part_test_int4_ops2, b part_test_int4_ops2);
3999+
create table hp_contradict_test_p1 partition of hp_contradict_test for values with (modulus 2, remainder 0);
4000+
create table hp_contradict_test_p2 partition of hp_contradict_test for values with (modulus 2, remainder 1);
4001+
explain (costs off) select * from hp_contradict_test where a is null and a === 1 and b === 1;
4002+
QUERY PLAN
4003+
--------------------------
4004+
Result
4005+
One-Time Filter: false
4006+
(2 rows)
4007+
4008+
explain (costs off) select * from hp_contradict_test where a === 1 and b === 1 and a is null;
4009+
QUERY PLAN
4010+
--------------------------
4011+
Result
4012+
One-Time Filter: false
4013+
(2 rows)
4014+
4015+
drop table hp_contradict_test;
4016+
drop operator class part_test_int4_ops2 using hash;
4017+
drop operator ===(int4, int4);

0 commit comments

Comments
 (0)