Skip to content

Commit 07f261b

Browse files
committed
Fix incorrect step generation in HASH partition pruning
get_steps_using_prefix_recurse() incorrectly assumed that it could stop recursive processing of the 'prefix' list when cur_keyno was one before the step_lastkeyno. Since hash partition pruning can prune using IS NULL quals, and these IS NULL quals are not present in the 'prefix' list, then that logic could cause more levels of recursion than what is needed and lead to there being no more items in the 'prefix' list to process. This would manifest itself as a crash in some code that expected the 'start' ListCell not to be NULL. Here we adjust the logic so that instead of stopping recursion at 1 key before the step_lastkeyno, we just look at the llast(prefix) item and ensure we only recursively process up until just before whichever the last key is. This effectively allows keys to be missing in the 'prefix' list. This change does mean that step_lastkeyno is no longer needed, so we remove that from the static functions. I also spent quite some time reading this code and testing it to try to convince myself that there are no other issues. That resulted in the irresistible temptation of rewriting some comments, many of which were just not true or inconcise. Reported-by: Sergei Glukhov Reviewed-by: Sergei Glukhov, tender wang Discussion: https://postgr.es/m/2f09ce72-315e-2a33-589a-8519ada8df61@postgrespro.ru Backpatch-through: 11, where partition pruning was introduced.
1 parent 2b729cf commit 07f261b

File tree

3 files changed

+335
-62
lines changed

3 files changed

+335
-62
lines changed

src/backend/partitioning/partprune.c

Lines changed: 63 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -165,16 +165,15 @@ static List *get_steps_using_prefix(GeneratePruningStepsContext *context,
165165
bool step_op_is_ne,
166166
Expr *step_lastexpr,
167167
Oid step_lastcmpfn,
168-
int step_lastkeyno,
169168
Bitmapset *step_nullkeys,
170169
List *prefix);
171170
static List *get_steps_using_prefix_recurse(GeneratePruningStepsContext *context,
172171
StrategyNumber step_opstrategy,
173172
bool step_op_is_ne,
174173
Expr *step_lastexpr,
175174
Oid step_lastcmpfn,
176-
int step_lastkeyno,
177175
Bitmapset *step_nullkeys,
176+
List *prefix,
178177
ListCell *start,
179178
List *step_exprs,
180179
List *step_cmpfns);
@@ -1404,7 +1403,6 @@ gen_prune_steps_from_opexps(GeneratePruningStepsContext *context,
14041403
pc->op_is_ne,
14051404
pc->expr,
14061405
pc->cmpfn,
1407-
0,
14081406
NULL,
14091407
NIL);
14101408
opsteps = list_concat(opsteps, pc_steps);
@@ -1530,7 +1528,6 @@ gen_prune_steps_from_opexps(GeneratePruningStepsContext *context,
15301528
pc->op_is_ne,
15311529
pc->expr,
15321530
pc->cmpfn,
1533-
pc->keyno,
15341531
NULL,
15351532
prefix);
15361533
opsteps = list_concat(opsteps, pc_steps);
@@ -1604,7 +1601,6 @@ gen_prune_steps_from_opexps(GeneratePruningStepsContext *context,
16041601
false,
16051602
pc->expr,
16061603
pc->cmpfn,
1607-
pc->keyno,
16081604
nullkeys,
16091605
prefix);
16101606
opsteps = list_concat(opsteps, list_copy(pc_steps));
@@ -2244,40 +2240,49 @@ match_clause_to_partition_key(GeneratePruningStepsContext *context,
22442240

22452241
/*
22462242
* get_steps_using_prefix
2247-
* Generate list of PartitionPruneStepOp steps each consisting of given
2248-
* opstrategy
2249-
*
2250-
* To generate steps, step_lastexpr and step_lastcmpfn are appended to
2251-
* expressions and cmpfns, respectively, extracted from the clauses in
2252-
* 'prefix'. Actually, since 'prefix' may contain multiple clauses for the
2253-
* same partition key column, we must generate steps for various combinations
2254-
* of the clauses of different keys.
2255-
*
2256-
* For list/range partitioning, callers must ensure that step_nullkeys is
2257-
* NULL, and that prefix contains at least one clause for each of the
2258-
* partition keys earlier than one specified in step_lastkeyno if it's
2259-
* greater than zero. For hash partitioning, step_nullkeys is allowed to be
2260-
* non-NULL, but they must ensure that prefix contains at least one clause
2261-
* for each of the partition keys other than those specified in step_nullkeys
2262-
* and step_lastkeyno.
2263-
*
2264-
* For both cases, callers must also ensure that clauses in prefix are sorted
2265-
* in ascending order of their partition key numbers.
2243+
* Generate a list of PartitionPruneStepOps based on the given input.
2244+
*
2245+
* 'step_lastexpr' and 'step_lastcmpfn' are the Expr and comparison function
2246+
* belonging to the final partition key that we have a clause for. 'prefix'
2247+
* is a list of PartClauseInfos for partition key numbers prior to the given
2248+
* 'step_lastexpr' and 'step_lastcmpfn'. 'prefix' may contain multiple
2249+
* PartClauseInfos belonging to a single partition key. We will generate a
2250+
* PartitionPruneStepOp for each combination of the given PartClauseInfos
2251+
* using, at most, one PartClauseInfo per partition key.
2252+
*
2253+
* For LIST and RANGE partitioned tables, callers must ensure that
2254+
* step_nullkeys is NULL, and that prefix contains at least one clause for
2255+
* each of the partition keys prior to the key that 'step_lastexpr' and
2256+
* 'step_lastcmpfn'belong to.
2257+
*
2258+
* For HASH partitioned tables, callers must ensure that 'prefix' contains at
2259+
* least one clause for each of the partition keys apart from the final key
2260+
* (the expr and comparison function for the final key are in 'step_lastexpr'
2261+
* and 'step_lastcmpfn'). A bit set in step_nullkeys can substitute clauses
2262+
* in the 'prefix' list for any given key. If a bit is set in 'step_nullkeys'
2263+
* for a given key, then there must be no PartClauseInfo for that key in the
2264+
* 'prefix' list.
2265+
*
2266+
* For each of the above cases, callers must ensure that PartClauseInfos in
2267+
* 'prefix' are sorted in ascending order of keyno.
22662268
*/
22672269
static List *
22682270
get_steps_using_prefix(GeneratePruningStepsContext *context,
22692271
StrategyNumber step_opstrategy,
22702272
bool step_op_is_ne,
22712273
Expr *step_lastexpr,
22722274
Oid step_lastcmpfn,
2273-
int step_lastkeyno,
22742275
Bitmapset *step_nullkeys,
22752276
List *prefix)
22762277
{
2278+
/* step_nullkeys must be empty for RANGE and LIST partitioned tables */
22772279
Assert(step_nullkeys == NULL ||
22782280
context->rel->part_scheme->strategy == PARTITION_STRATEGY_HASH);
22792281

2280-
/* Quick exit if there are no values to prefix with. */
2282+
/*
2283+
* No recursive processing is required when 'prefix' is an empty list. This
2284+
* occurs when there is only 1 partition key column.
2285+
*/
22812286
if (list_length(prefix) == 0)
22822287
{
22832288
PartitionPruneStep *step;
@@ -2291,26 +2296,31 @@ get_steps_using_prefix(GeneratePruningStepsContext *context,
22912296
return list_make1(step);
22922297
}
22932298

2294-
/* Recurse to generate steps for various combinations. */
2299+
/* Recurse to generate steps for every combination of clauses. */
22952300
return get_steps_using_prefix_recurse(context,
22962301
step_opstrategy,
22972302
step_op_is_ne,
22982303
step_lastexpr,
22992304
step_lastcmpfn,
2300-
step_lastkeyno,
23012305
step_nullkeys,
2306+
prefix,
23022307
list_head(prefix),
23032308
NIL, NIL);
23042309
}
23052310

23062311
/*
23072312
* get_steps_using_prefix_recurse
2308-
* Recursively generate combinations of clauses for different partition
2309-
* keys and start generating steps upon reaching clauses for the greatest
2310-
* column that is less than the one for which we're currently generating
2311-
* steps (that is, step_lastkeyno)
2313+
* Generate and return a list of PartitionPruneStepOps using the 'prefix'
2314+
* list of PartClauseInfos starting at the 'start' cell.
2315+
*
2316+
* When 'prefix' contains multiple PartClauseInfos for a single partition key
2317+
* we create a PartitionPruneStepOp for each combination of duplicated
2318+
* PartClauseInfos. The returned list will contain a PartitionPruneStepOp
2319+
* for each unique combination of input PartClauseInfos containing at most one
2320+
* PartClauseInfo per partition key.
23122321
*
2313-
* 'start' is where we should start iterating for the current invocation.
2322+
* 'prefix' is the input list of PartClauseInfos sorted by keyno.
2323+
* 'start' marks the cell that searching the 'prefix' list should start from.
23142324
* 'step_exprs' and 'step_cmpfns' each contains the expressions and cmpfns
23152325
* we've generated so far from the clauses for the previous part keys.
23162326
*/
@@ -2320,32 +2330,34 @@ get_steps_using_prefix_recurse(GeneratePruningStepsContext *context,
23202330
bool step_op_is_ne,
23212331
Expr *step_lastexpr,
23222332
Oid step_lastcmpfn,
2323-
int step_lastkeyno,
23242333
Bitmapset *step_nullkeys,
2334+
List *prefix,
23252335
ListCell *start,
23262336
List *step_exprs,
23272337
List *step_cmpfns)
23282338
{
23292339
List *result = NIL;
23302340
ListCell *lc;
23312341
int cur_keyno;
2342+
int final_keyno;
23322343

23332344
/* Actually, recursion would be limited by PARTITION_MAX_KEYS. */
23342345
check_stack_depth();
23352346

2336-
/* Check if we need to recurse. */
23372347
Assert(start != NULL);
23382348
cur_keyno = ((PartClauseInfo *) lfirst(start))->keyno;
2339-
if (cur_keyno < step_lastkeyno - 1)
2349+
final_keyno = ((PartClauseInfo *) llast(prefix))->keyno;
2350+
2351+
/* Check if we need to recurse. */
2352+
if (cur_keyno < final_keyno)
23402353
{
23412354
PartClauseInfo *pc;
23422355
ListCell *next_start;
23432356

23442357
/*
2345-
* For each clause with cur_keyno, add its expr and cmpfn to
2346-
* step_exprs and step_cmpfns, respectively, and recurse after setting
2347-
* next_start to the ListCell of the first clause for the next
2348-
* partition key.
2358+
* Find the first PartClauseInfo belonging to the next partition key, the
2359+
* next recursive call must start iteration of the prefix list from that
2360+
* point.
23492361
*/
23502362
for_each_cell(lc, start)
23512363
{
@@ -2354,8 +2366,15 @@ get_steps_using_prefix_recurse(GeneratePruningStepsContext *context,
23542366
if (pc->keyno > cur_keyno)
23552367
break;
23562368
}
2369+
2370+
/* record where to start iterating in the next recursive call */
23572371
next_start = lc;
23582372

2373+
/*
2374+
* For each PartClauseInfo with keyno set to cur_keyno, add its expr and
2375+
* cmpfn to step_exprs and step_cmpfns, respectively, and recurse using
2376+
* 'next_start' as the starting point in the 'prefix' list.
2377+
*/
23592378
for_each_cell(lc, start)
23602379
{
23612380
List *moresteps;
@@ -2375,6 +2394,7 @@ get_steps_using_prefix_recurse(GeneratePruningStepsContext *context,
23752394
}
23762395
else
23772396
{
2397+
/* check the 'prefix' list is sorted correctly */
23782398
Assert(pc->keyno > cur_keyno);
23792399
break;
23802400
}
@@ -2384,8 +2404,8 @@ get_steps_using_prefix_recurse(GeneratePruningStepsContext *context,
23842404
step_op_is_ne,
23852405
step_lastexpr,
23862406
step_lastcmpfn,
2387-
step_lastkeyno,
23882407
step_nullkeys,
2408+
prefix,
23892409
next_start,
23902410
step_exprs1,
23912411
step_cmpfns1);
@@ -2402,8 +2422,8 @@ get_steps_using_prefix_recurse(GeneratePruningStepsContext *context,
24022422
* each clause with cur_keyno, which is all clauses from here onward
24032423
* till the end of the list. Note that for hash partitioning,
24042424
* step_nullkeys is allowed to be non-empty, in which case step_exprs
2405-
* would only contain expressions for the earlier partition keys that
2406-
* are not specified in step_nullkeys.
2425+
* would only contain expressions for the partition keys that are not
2426+
* specified in step_nullkeys.
24072427
*/
24082428
Assert(list_length(step_exprs) == cur_keyno ||
24092429
!bms_is_empty(step_nullkeys));

0 commit comments

Comments
 (0)