Skip to content

Commit 8654407

Browse files
committed
Improve pruning of a default partition
When querying a partitioned table containing a default partition, we were wrongly deciding to include it in the scan too early in the process, failing to exclude it in some cases. If we reinterpret the PruneStepResult.scan_default flag slightly, we can do a better job at detecting that it can be excluded. The change is that we avoid setting the flag for that pruning step unless the step absolutely requires the default partition to be scanned (in contrast with the previous arrangement, which was to set it unless the step was able to prune it). So get_matching_partitions() must explicitly check the partition that each returned bound value corresponds to in order to determine whether the default one needs to be included, rather than relying on the flag from the final step result. Author: Yuzuko Hosoya <hosoya.yuzuko@lab.ntt.co.jp> Reviewed-by: Amit Langote <Langote_Amit_f8@lab.ntt.co.jp> Discussion: https://postgr.es/m/00e601d4ca86$932b8bc0$b982a340$@lab.ntt.co.jp
1 parent d58c9c0 commit 8654407

File tree

4 files changed

+111
-130
lines changed

4 files changed

+111
-130
lines changed

src/backend/partitioning/partprune.c

Lines changed: 98 additions & 121 deletions
Original file line numberDiff line numberDiff line change
@@ -734,6 +734,7 @@ get_matching_partitions(PartitionPruneContext *context, List *pruning_steps)
734734
PruneStepResult **results,
735735
*final_result;
736736
ListCell *lc;
737+
bool scan_default;
737738

738739
/* If there are no pruning steps then all partitions match. */
739740
if (num_steps == 0)
@@ -785,30 +786,39 @@ get_matching_partitions(PartitionPruneContext *context, List *pruning_steps)
785786
Assert(final_result != NULL);
786787
i = -1;
787788
result = NULL;
789+
scan_default = final_result->scan_default;
788790
while ((i = bms_next_member(final_result->bound_offsets, i)) >= 0)
789791
{
790792
int partindex = context->boundinfo->indexes[i];
791793

792-
/*
793-
* In range and hash partitioning cases, some slots may contain -1,
794-
* indicating that no partition has been defined to accept a given
795-
* range of data or for a given remainder, respectively. The default
796-
* partition, if any, in case of range partitioning, will be added to
797-
* the result, because the specified range still satisfies the query's
798-
* conditions.
799-
*/
800-
if (partindex >= 0)
801-
result = bms_add_member(result, partindex);
794+
if (partindex < 0)
795+
{
796+
/*
797+
* In range partitioning cases, if a partition index is -1 it
798+
* means that the bound at the offset is the upper bound for a
799+
* range not covered by any partition (other than a possible
800+
* default partition). In hash partitioning, the same means no
801+
* partition has been defined for the corresponding remainder
802+
* value.
803+
*
804+
* In either case, the value is still part of the queried range of
805+
* values, so mark to scan the default partition if one exists.
806+
*/
807+
scan_default |= partition_bound_has_default(context->boundinfo);
808+
continue;
809+
}
810+
811+
result = bms_add_member(result, partindex);
802812
}
803813

804-
/* Add the null and/or default partition if needed and if present. */
814+
/* Add the null and/or default partition if needed and present. */
805815
if (final_result->scan_null)
806816
{
807817
Assert(context->strategy == PARTITION_STRATEGY_LIST);
808818
Assert(partition_bound_accepts_nulls(context->boundinfo));
809819
result = bms_add_member(result, context->boundinfo->null_index);
810820
}
811-
if (final_result->scan_default)
821+
if (scan_default)
812822
{
813823
Assert(context->strategy == PARTITION_STRATEGY_LIST ||
814824
context->strategy == PARTITION_STRATEGY_RANGE);
@@ -2433,6 +2443,11 @@ get_matching_hash_bounds(PartitionPruneContext *context,
24332443
* get_matching_list_bounds
24342444
* Determine the offsets of list bounds matching the specified value,
24352445
* according to the semantics of the given operator strategy
2446+
*
2447+
* scan_default will be set in the returned struct, if the default partition
2448+
* needs to be scanned, provided one exists at all. scan_null will be set if
2449+
* the special null-accepting partition needs to be scanned.
2450+
*
24362451
* 'opstrategy' if non-zero must be a btree strategy number.
24372452
*
24382453
* 'value' contains the value to use for pruning.
@@ -2635,8 +2650,13 @@ get_matching_list_bounds(PartitionPruneContext *context,
26352650
* Each datum whose offset is in result is to be treated as the upper bound of
26362651
* the partition that will contain the desired values.
26372652
*
2638-
* If default partition needs to be scanned for given values, set scan_default
2639-
* in result if present.
2653+
* scan_default is set in the returned struct if a default partition exists
2654+
* and we're absolutely certain that it needs to be scanned. We do *not* set
2655+
* it just because values match portions of the key space uncovered by
2656+
* partitions other than default (space which we normally assume to belong to
2657+
* the default partition): the final set of bounds obtained after combining
2658+
* multiple pruning steps might exclude it, so we infer its inclusion
2659+
* elsewhere.
26402660
*
26412661
* 'opstrategy' if non-zero must be a btree strategy number.
26422662
*
@@ -2662,8 +2682,7 @@ get_matching_range_bounds(PartitionPruneContext *context,
26622682
int *partindices = boundinfo->indexes;
26632683
int off,
26642684
minoff,
2665-
maxoff,
2666-
i;
2685+
maxoff;
26672686
bool is_equal;
26682687
bool inclusive = false;
26692688

@@ -2693,13 +2712,15 @@ get_matching_range_bounds(PartitionPruneContext *context,
26932712
*/
26942713
if (nvalues == 0)
26952714
{
2715+
/* ignore key space not covered by any partitions */
26962716
if (partindices[minoff] < 0)
26972717
minoff++;
26982718
if (partindices[maxoff] < 0)
26992719
maxoff--;
27002720

27012721
result->scan_default = partition_bound_has_default(boundinfo);
2702-
Assert(minoff >= 0 && maxoff >= 0);
2722+
Assert(partindices[minoff] >= 0 &&
2723+
partindices[maxoff] >= 0);
27032724
result->bound_offsets = bms_add_range(NULL, minoff, maxoff);
27042725

27052726
return result;
@@ -2727,11 +2748,7 @@ get_matching_range_bounds(PartitionPruneContext *context,
27272748
if (nvalues == partnatts)
27282749
{
27292750
/* There can only be zero or one matching partition. */
2730-
if (partindices[off + 1] >= 0)
2731-
result->bound_offsets = bms_make_singleton(off + 1);
2732-
else
2733-
result->scan_default =
2734-
partition_bound_has_default(boundinfo);
2751+
result->bound_offsets = bms_make_singleton(off + 1);
27352752
return result;
27362753
}
27372754
else
@@ -2819,57 +2836,21 @@ get_matching_range_bounds(PartitionPruneContext *context,
28192836
maxoff = off + 1;
28202837
}
28212838

2822-
/*
2823-
* Skip if minoff/maxoff are actually the upper bound of a
2824-
* un-assigned portion of values.
2825-
*/
2826-
if (partindices[minoff] < 0 && minoff < boundinfo->ndatums)
2827-
minoff++;
2828-
if (partindices[maxoff] < 0 && maxoff >= 1)
2829-
maxoff--;
2830-
2831-
/*
2832-
* There may exist a range of values unassigned to any
2833-
* non-default partition between the datums at minoff and
2834-
* maxoff. Add the default partition in that case.
2835-
*/
2836-
if (partition_bound_has_default(boundinfo))
2837-
{
2838-
for (i = minoff; i <= maxoff; i++)
2839-
{
2840-
if (partindices[i] < 0)
2841-
{
2842-
result->scan_default = true;
2843-
break;
2844-
}
2845-
}
2846-
}
2847-
28482839
Assert(minoff >= 0 && maxoff >= 0);
28492840
result->bound_offsets = bms_add_range(NULL, minoff, maxoff);
28502841
}
2851-
else if (off >= 0) /* !is_equal */
2842+
else
28522843
{
28532844
/*
28542845
* The lookup value falls in the range between some bounds in
28552846
* boundinfo. 'off' would be the offset of the greatest bound
28562847
* that is <= lookup value, so add off + 1 to the result
28572848
* instead as the offset of the upper bound of the only
2858-
* partition that may contain the lookup value.
2859-
*/
2860-
if (partindices[off + 1] >= 0)
2861-
result->bound_offsets = bms_make_singleton(off + 1);
2862-
else
2863-
result->scan_default =
2864-
partition_bound_has_default(boundinfo);
2865-
}
2866-
else
2867-
{
2868-
/*
2869-
* off < 0: the lookup value is smaller than all bounds, so
2870-
* only the default partition qualifies, if there is one.
2849+
* partition that may contain the lookup value. If 'off' is
2850+
* -1 indicating that all bounds are greater, then we simply
2851+
* end up adding the first bound's offset, that is, 0.
28712852
*/
2872-
result->scan_default = partition_bound_has_default(boundinfo);
2853+
result->bound_offsets = bms_make_singleton(off + 1);
28732854
}
28742855

28752856
return result;
@@ -2940,16 +2921,18 @@ get_matching_range_bounds(PartitionPruneContext *context,
29402921

29412922
minoff = inclusive ? off : off + 1;
29422923
}
2943-
2944-
/*
2945-
* lookup value falls in the range between some bounds in
2946-
* boundinfo. off would be the offset of the greatest bound
2947-
* that is <= lookup value, so add off + 1 to the result
2948-
* instead as the offset of the upper bound of the smallest
2949-
* partition that may contain the lookup value.
2950-
*/
29512924
else
2925+
{
2926+
2927+
/*
2928+
* lookup value falls in the range between some bounds in
2929+
* boundinfo. off would be the offset of the greatest
2930+
* bound that is <= lookup value, so add off + 1 to the
2931+
* result instead as the offset of the upper bound of the
2932+
* smallest partition that may contain the lookup value.
2933+
*/
29522934
minoff = off + 1;
2935+
}
29532936
}
29542937
break;
29552938

@@ -2967,16 +2950,7 @@ get_matching_range_bounds(PartitionPruneContext *context,
29672950
boundinfo,
29682951
nvalues, values,
29692952
&is_equal);
2970-
if (off < 0)
2971-
{
2972-
/*
2973-
* All bounds are greater than the key, so we could only
2974-
* expect to find the lookup key in the default partition.
2975-
*/
2976-
result->scan_default = partition_bound_has_default(boundinfo);
2977-
return result;
2978-
}
2979-
else
2953+
if (off >= 0)
29802954
{
29812955
/*
29822956
* See the comment above.
@@ -3024,65 +2998,58 @@ get_matching_range_bounds(PartitionPruneContext *context,
30242998
else
30252999
maxoff = off;
30263000
}
3001+
else
3002+
{
3003+
/*
3004+
* 'off' is -1 indicating that all bounds are greater, so just
3005+
* set the first bound's offset as maxoff.
3006+
*/
3007+
maxoff = off + 1;
3008+
}
30273009
break;
30283010

30293011
default:
30303012
elog(ERROR, "invalid strategy number %d", opstrategy);
30313013
break;
30323014
}
30333015

3016+
Assert(minoff >= 0 && minoff <= boundinfo->ndatums);
3017+
Assert(maxoff >= 0 && maxoff <= boundinfo->ndatums);
3018+
30343019
/*
3035-
* Skip a gap and when doing so, check if the bound contains a finite
3036-
* value to decide if we need to add the default partition. If it's an
3037-
* infinite bound, we need not add the default partition, as having an
3038-
* infinite bound means the partition in question catches any values that
3039-
* would otherwise be in the default partition.
3020+
* If the smallest partition to return has MINVALUE (negative infinity) as
3021+
* its lower bound, increment it to point to the next finite bound
3022+
* (supposedly its upper bound), so that we don't advertently end up
3023+
* scanning the default partition.
30403024
*/
3041-
if (partindices[minoff] < 0)
3025+
if (minoff < boundinfo->ndatums && partindices[minoff] < 0)
30423026
{
30433027
int lastkey = nvalues - 1;
30443028

3045-
if (minoff >= 0 &&
3046-
minoff < boundinfo->ndatums &&
3047-
boundinfo->kind[minoff][lastkey] ==
3048-
PARTITION_RANGE_DATUM_VALUE)
3049-
result->scan_default = partition_bound_has_default(boundinfo);
3050-
3051-
minoff++;
3029+
if (boundinfo->kind[minoff][lastkey] ==
3030+
PARTITION_RANGE_DATUM_MINVALUE)
3031+
{
3032+
minoff++;
3033+
Assert(boundinfo->indexes[minoff] >= 0);
3034+
}
30523035
}
30533036

30543037
/*
3055-
* Skip a gap. See the above comment about how we decide whether or not
3056-
* to scan the default partition based whether the datum that will become
3057-
* the maximum datum is finite or not.
3038+
* If the previous greatest partition has MAXVALUE (positive infinity) as
3039+
* its upper bound (something only possible to do with multi-column range
3040+
* partitioning), we scan switch to it as the greatest partition to
3041+
* return. Again, so that we don't advertently end up scanning the
3042+
* default partition.
30583043
*/
30593044
if (maxoff >= 1 && partindices[maxoff] < 0)
30603045
{
30613046
int lastkey = nvalues - 1;
30623047

3063-
if (maxoff >= 0 &&
3064-
maxoff <= boundinfo->ndatums &&
3065-
boundinfo->kind[maxoff - 1][lastkey] ==
3066-
PARTITION_RANGE_DATUM_VALUE)
3067-
result->scan_default = partition_bound_has_default(boundinfo);
3068-
3069-
maxoff--;
3070-
}
3071-
3072-
if (partition_bound_has_default(boundinfo))
3073-
{
3074-
/*
3075-
* There may exist a range of values unassigned to any non-default
3076-
* partition between the datums at minoff and maxoff. Add the default
3077-
* partition in that case.
3078-
*/
3079-
for (i = minoff; i <= maxoff; i++)
3048+
if (boundinfo->kind[maxoff - 1][lastkey] ==
3049+
PARTITION_RANGE_DATUM_MAXVALUE)
30803050
{
3081-
if (partindices[i] < 0)
3082-
{
3083-
result->scan_default = true;
3084-
break;
3085-
}
3051+
maxoff--;
3052+
Assert(boundinfo->indexes[maxoff] >= 0);
30863053
}
30873054
}
30883055

@@ -3327,14 +3294,24 @@ perform_pruning_combine_step(PartitionPruneContext *context,
33273294

33283295
/*
33293296
* A combine step without any source steps is an indication to not perform
3330-
* any partition pruning, we just return all partitions.
3297+
* any partition pruning. Return all datum indexes in that case.
33313298
*/
33323299
result = (PruneStepResult *) palloc0(sizeof(PruneStepResult));
33333300
if (list_length(cstep->source_stepids) == 0)
33343301
{
33353302
PartitionBoundInfo boundinfo = context->boundinfo;
3303+
int rangemax;
3304+
3305+
/*
3306+
* Add all valid offsets into the boundinfo->indexes array. For range
3307+
* partitioning, boundinfo->indexes contains (boundinfo->ndatums + 1)
3308+
* valid entries; otherwise there are boundinfo->ndatums.
3309+
*/
3310+
rangemax = context->strategy == PARTITION_STRATEGY_RANGE ?
3311+
boundinfo->ndatums : boundinfo->ndatums - 1;
33363312

3337-
result->bound_offsets = bms_add_range(NULL, 0, boundinfo->ndatums - 1);
3313+
result->bound_offsets =
3314+
bms_add_range(result->bound_offsets, 0, rangemax);
33383315
result->scan_default = partition_bound_has_default(boundinfo);
33393316
result->scan_null = partition_bound_accepts_nulls(boundinfo);
33403317
return result;

src/include/partitioning/partbounds.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,6 @@
5656
* pointed by remainder produced when hash value of the datum-tuple is divided
5757
* by the greatest modulus.
5858
*/
59-
6059
typedef struct PartitionBoundInfoData
6160
{
6261
char strategy; /* hash, list or range? */

0 commit comments

Comments
 (0)