Improve pruning of a default partition

When querying a partitioned table containing a default partition, we
were wrongly deciding to include it in the scan too early in the
process, failing to exclude it in some cases.  If we reinterpret the
PruneStepResult.scan_default flag slightly, we can do a better job at
detecting that it can be excluded.  The change is that we avoid setting
the flag for that pruning step unless the step absolutely requires the
default partition to be scanned (in contrast with the previous
arrangement, which was to set it unless the step was able to prune it).
So get_matching_partitions() must explicitly check the partition that
each returned bound value corresponds to in order to determine whether
the default one needs to be included, rather than relying on the flag
from the final step result.

Author: Yuzuko Hosoya <hosoya.yuzuko@lab.ntt.co.jp>
Reviewed-by: Amit Langote <Langote_Amit_f8@lab.ntt.co.jp>
Discussion: https://postgr.es/m/00e601d4ca86$932b8bc0$b982a340$@lab.ntt.co.jp
This commit is contained in:
Alvaro Herrera 2019-08-04 11:18:45 -04:00
parent 69edf4f880
commit 489247b0e6
4 changed files with 111 additions and 130 deletions

View File

@ -735,6 +735,7 @@ get_matching_partitions(PartitionPruneContext *context, List *pruning_steps)
PruneStepResult **results, PruneStepResult **results,
*final_result; *final_result;
ListCell *lc; ListCell *lc;
bool scan_default;
/* If there are no pruning steps then all partitions match. */ /* If there are no pruning steps then all partitions match. */
if (num_steps == 0) if (num_steps == 0)
@ -786,30 +787,39 @@ get_matching_partitions(PartitionPruneContext *context, List *pruning_steps)
Assert(final_result != NULL); Assert(final_result != NULL);
i = -1; i = -1;
result = NULL; result = NULL;
scan_default = final_result->scan_default;
while ((i = bms_next_member(final_result->bound_offsets, i)) >= 0) while ((i = bms_next_member(final_result->bound_offsets, i)) >= 0)
{ {
int partindex = context->boundinfo->indexes[i]; int partindex = context->boundinfo->indexes[i];
/* if (partindex < 0)
* In range and hash partitioning cases, some slots may contain -1, {
* indicating that no partition has been defined to accept a given /*
* range of data or for a given remainder, respectively. The default * In range partitioning cases, if a partition index is -1 it
* partition, if any, in case of range partitioning, will be added to * means that the bound at the offset is the upper bound for a
* the result, because the specified range still satisfies the query's * range not covered by any partition (other than a possible
* conditions. * default partition). In hash partitioning, the same means no
*/ * partition has been defined for the corresponding remainder
if (partindex >= 0) * value.
result = bms_add_member(result, partindex); *
* In either case, the value is still part of the queried range of
* values, so mark to scan the default partition if one exists.
*/
scan_default |= partition_bound_has_default(context->boundinfo);
continue;
}
result = bms_add_member(result, partindex);
} }
/* Add the null and/or default partition if needed and if present. */ /* Add the null and/or default partition if needed and present. */
if (final_result->scan_null) if (final_result->scan_null)
{ {
Assert(context->strategy == PARTITION_STRATEGY_LIST); Assert(context->strategy == PARTITION_STRATEGY_LIST);
Assert(partition_bound_accepts_nulls(context->boundinfo)); Assert(partition_bound_accepts_nulls(context->boundinfo));
result = bms_add_member(result, context->boundinfo->null_index); result = bms_add_member(result, context->boundinfo->null_index);
} }
if (final_result->scan_default) if (scan_default)
{ {
Assert(context->strategy == PARTITION_STRATEGY_LIST || Assert(context->strategy == PARTITION_STRATEGY_LIST ||
context->strategy == PARTITION_STRATEGY_RANGE); context->strategy == PARTITION_STRATEGY_RANGE);
@ -2438,6 +2448,11 @@ get_matching_hash_bounds(PartitionPruneContext *context,
* get_matching_list_bounds * get_matching_list_bounds
* Determine the offsets of list bounds matching the specified value, * Determine the offsets of list bounds matching the specified value,
* according to the semantics of the given operator strategy * according to the semantics of the given operator strategy
*
* scan_default will be set in the returned struct, if the default partition
* needs to be scanned, provided one exists at all. scan_null will be set if
* the special null-accepting partition needs to be scanned.
*
* 'opstrategy' if non-zero must be a btree strategy number. * 'opstrategy' if non-zero must be a btree strategy number.
* *
* 'value' contains the value to use for pruning. * 'value' contains the value to use for pruning.
@ -2640,8 +2655,13 @@ get_matching_list_bounds(PartitionPruneContext *context,
* Each datum whose offset is in result is to be treated as the upper bound of * Each datum whose offset is in result is to be treated as the upper bound of
* the partition that will contain the desired values. * the partition that will contain the desired values.
* *
* If default partition needs to be scanned for given values, set scan_default * scan_default is set in the returned struct if a default partition exists
* in result if present. * and we're absolutely certain that it needs to be scanned. We do *not* set
* it just because values match portions of the key space uncovered by
* partitions other than default (space which we normally assume to belong to
* the default partition): the final set of bounds obtained after combining
* multiple pruning steps might exclude it, so we infer its inclusion
* elsewhere.
* *
* 'opstrategy' if non-zero must be a btree strategy number. * 'opstrategy' if non-zero must be a btree strategy number.
* *
@ -2667,8 +2687,7 @@ get_matching_range_bounds(PartitionPruneContext *context,
int *partindices = boundinfo->indexes; int *partindices = boundinfo->indexes;
int off, int off,
minoff, minoff,
maxoff, maxoff;
i;
bool is_equal; bool is_equal;
bool inclusive = false; bool inclusive = false;
@ -2698,13 +2717,15 @@ get_matching_range_bounds(PartitionPruneContext *context,
*/ */
if (nvalues == 0) if (nvalues == 0)
{ {
/* ignore key space not covered by any partitions */
if (partindices[minoff] < 0) if (partindices[minoff] < 0)
minoff++; minoff++;
if (partindices[maxoff] < 0) if (partindices[maxoff] < 0)
maxoff--; maxoff--;
result->scan_default = partition_bound_has_default(boundinfo); result->scan_default = partition_bound_has_default(boundinfo);
Assert(minoff >= 0 && maxoff >= 0); Assert(partindices[minoff] >= 0 &&
partindices[maxoff] >= 0);
result->bound_offsets = bms_add_range(NULL, minoff, maxoff); result->bound_offsets = bms_add_range(NULL, minoff, maxoff);
return result; return result;
@ -2732,11 +2753,7 @@ get_matching_range_bounds(PartitionPruneContext *context,
if (nvalues == partnatts) if (nvalues == partnatts)
{ {
/* There can only be zero or one matching partition. */ /* There can only be zero or one matching partition. */
if (partindices[off + 1] >= 0) result->bound_offsets = bms_make_singleton(off + 1);
result->bound_offsets = bms_make_singleton(off + 1);
else
result->scan_default =
partition_bound_has_default(boundinfo);
return result; return result;
} }
else else
@ -2824,57 +2841,21 @@ get_matching_range_bounds(PartitionPruneContext *context,
maxoff = off + 1; maxoff = off + 1;
} }
/*
* Skip if minoff/maxoff are actually the upper bound of a
* un-assigned portion of values.
*/
if (partindices[minoff] < 0 && minoff < boundinfo->ndatums)
minoff++;
if (partindices[maxoff] < 0 && maxoff >= 1)
maxoff--;
/*
* There may exist a range of values unassigned to any
* non-default partition between the datums at minoff and
* maxoff. Add the default partition in that case.
*/
if (partition_bound_has_default(boundinfo))
{
for (i = minoff; i <= maxoff; i++)
{
if (partindices[i] < 0)
{
result->scan_default = true;
break;
}
}
}
Assert(minoff >= 0 && maxoff >= 0); Assert(minoff >= 0 && maxoff >= 0);
result->bound_offsets = bms_add_range(NULL, minoff, maxoff); result->bound_offsets = bms_add_range(NULL, minoff, maxoff);
} }
else if (off >= 0) /* !is_equal */ else
{ {
/* /*
* The lookup value falls in the range between some bounds in * The lookup value falls in the range between some bounds in
* boundinfo. 'off' would be the offset of the greatest bound * boundinfo. 'off' would be the offset of the greatest bound
* that is <= lookup value, so add off + 1 to the result * that is <= lookup value, so add off + 1 to the result
* instead as the offset of the upper bound of the only * instead as the offset of the upper bound of the only
* partition that may contain the lookup value. * partition that may contain the lookup value. If 'off' is
* -1 indicating that all bounds are greater, then we simply
* end up adding the first bound's offset, that is, 0.
*/ */
if (partindices[off + 1] >= 0) result->bound_offsets = bms_make_singleton(off + 1);
result->bound_offsets = bms_make_singleton(off + 1);
else
result->scan_default =
partition_bound_has_default(boundinfo);
}
else
{
/*
* off < 0: the lookup value is smaller than all bounds, so
* only the default partition qualifies, if there is one.
*/
result->scan_default = partition_bound_has_default(boundinfo);
} }
return result; return result;
@ -2945,16 +2926,18 @@ get_matching_range_bounds(PartitionPruneContext *context,
minoff = inclusive ? off : off + 1; minoff = inclusive ? off : off + 1;
} }
/*
* lookup value falls in the range between some bounds in
* boundinfo. off would be the offset of the greatest bound
* that is <= lookup value, so add off + 1 to the result
* instead as the offset of the upper bound of the smallest
* partition that may contain the lookup value.
*/
else else
{
/*
* lookup value falls in the range between some bounds in
* boundinfo. off would be the offset of the greatest
* bound that is <= lookup value, so add off + 1 to the
* result instead as the offset of the upper bound of the
* smallest partition that may contain the lookup value.
*/
minoff = off + 1; minoff = off + 1;
}
} }
break; break;
@ -2972,16 +2955,7 @@ get_matching_range_bounds(PartitionPruneContext *context,
boundinfo, boundinfo,
nvalues, values, nvalues, values,
&is_equal); &is_equal);
if (off < 0) if (off >= 0)
{
/*
* All bounds are greater than the key, so we could only
* expect to find the lookup key in the default partition.
*/
result->scan_default = partition_bound_has_default(boundinfo);
return result;
}
else
{ {
/* /*
* See the comment above. * See the comment above.
@ -3029,6 +3003,14 @@ get_matching_range_bounds(PartitionPruneContext *context,
else else
maxoff = off; maxoff = off;
} }
else
{
/*
* 'off' is -1 indicating that all bounds are greater, so just
* set the first bound's offset as maxoff.
*/
maxoff = off + 1;
}
break; break;
default: default:
@ -3036,58 +3018,43 @@ get_matching_range_bounds(PartitionPruneContext *context,
break; break;
} }
Assert(minoff >= 0 && minoff <= boundinfo->ndatums);
Assert(maxoff >= 0 && maxoff <= boundinfo->ndatums);
/* /*
* Skip a gap and when doing so, check if the bound contains a finite * If the smallest partition to return has MINVALUE (negative infinity) as
* value to decide if we need to add the default partition. If it's an * its lower bound, increment it to point to the next finite bound
* infinite bound, we need not add the default partition, as having an * (supposedly its upper bound), so that we don't advertently end up
* infinite bound means the partition in question catches any values that * scanning the default partition.
* would otherwise be in the default partition.
*/ */
if (partindices[minoff] < 0) if (minoff < boundinfo->ndatums && partindices[minoff] < 0)
{ {
int lastkey = nvalues - 1; int lastkey = nvalues - 1;
if (minoff >= 0 && if (boundinfo->kind[minoff][lastkey] ==
minoff < boundinfo->ndatums && PARTITION_RANGE_DATUM_MINVALUE)
boundinfo->kind[minoff][lastkey] == {
PARTITION_RANGE_DATUM_VALUE) minoff++;
result->scan_default = partition_bound_has_default(boundinfo); Assert(boundinfo->indexes[minoff] >= 0);
}
minoff++;
} }
/* /*
* Skip a gap. See the above comment about how we decide whether or not * If the previous greatest partition has MAXVALUE (positive infinity) as
* to scan the default partition based whether the datum that will become * its upper bound (something only possible to do with multi-column range
* the maximum datum is finite or not. * partitioning), we scan switch to it as the greatest partition to
* return. Again, so that we don't advertently end up scanning the
* default partition.
*/ */
if (maxoff >= 1 && partindices[maxoff] < 0) if (maxoff >= 1 && partindices[maxoff] < 0)
{ {
int lastkey = nvalues - 1; int lastkey = nvalues - 1;
if (maxoff >= 0 && if (boundinfo->kind[maxoff - 1][lastkey] ==
maxoff <= boundinfo->ndatums && PARTITION_RANGE_DATUM_MAXVALUE)
boundinfo->kind[maxoff - 1][lastkey] ==
PARTITION_RANGE_DATUM_VALUE)
result->scan_default = partition_bound_has_default(boundinfo);
maxoff--;
}
if (partition_bound_has_default(boundinfo))
{
/*
* There may exist a range of values unassigned to any non-default
* partition between the datums at minoff and maxoff. Add the default
* partition in that case.
*/
for (i = minoff; i <= maxoff; i++)
{ {
if (partindices[i] < 0) maxoff--;
{ Assert(boundinfo->indexes[maxoff] >= 0);
result->scan_default = true;
break;
}
} }
} }
@ -3332,14 +3299,24 @@ perform_pruning_combine_step(PartitionPruneContext *context,
/* /*
* A combine step without any source steps is an indication to not perform * A combine step without any source steps is an indication to not perform
* any partition pruning, we just return all partitions. * any partition pruning. Return all datum indexes in that case.
*/ */
result = (PruneStepResult *) palloc0(sizeof(PruneStepResult)); result = (PruneStepResult *) palloc0(sizeof(PruneStepResult));
if (list_length(cstep->source_stepids) == 0) if (list_length(cstep->source_stepids) == 0)
{ {
PartitionBoundInfo boundinfo = context->boundinfo; PartitionBoundInfo boundinfo = context->boundinfo;
int rangemax;
result->bound_offsets = bms_add_range(NULL, 0, boundinfo->ndatums - 1); /*
* Add all valid offsets into the boundinfo->indexes array. For range
* partitioning, boundinfo->indexes contains (boundinfo->ndatums + 1)
* valid entries; otherwise there are boundinfo->ndatums.
*/
rangemax = context->strategy == PARTITION_STRATEGY_RANGE ?
boundinfo->ndatums : boundinfo->ndatums - 1;
result->bound_offsets =
bms_add_range(result->bound_offsets, 0, rangemax);
result->scan_default = partition_bound_has_default(boundinfo); result->scan_default = partition_bound_has_default(boundinfo);
result->scan_null = partition_bound_accepts_nulls(boundinfo); result->scan_null = partition_bound_accepts_nulls(boundinfo);
return result; return result;

View File

@ -56,7 +56,6 @@
* pointed by remainder produced when hash value of the datum-tuple is divided * pointed by remainder produced when hash value of the datum-tuple is divided
* by the greatest modulus. * by the greatest modulus.
*/ */
typedef struct PartitionBoundInfoData typedef struct PartitionBoundInfoData
{ {
char strategy; /* hash, list or range? */ char strategy; /* hash, list or range? */

View File

@ -504,15 +504,13 @@ explain (costs off) select * from rlp where a <= 31;
Filter: (a <= 31) Filter: (a <= 31)
-> Seq Scan on rlp5_1 -> Seq Scan on rlp5_1
Filter: (a <= 31) Filter: (a <= 31)
-> Seq Scan on rlp5_default
Filter: (a <= 31)
-> Seq Scan on rlp_default_10 -> Seq Scan on rlp_default_10
Filter: (a <= 31) Filter: (a <= 31)
-> Seq Scan on rlp_default_30 -> Seq Scan on rlp_default_30
Filter: (a <= 31) Filter: (a <= 31)
-> Seq Scan on rlp_default_default -> Seq Scan on rlp_default_default
Filter: (a <= 31) Filter: (a <= 31)
(29 rows) (27 rows)
explain (costs off) select * from rlp where a = 1 or a = 7; explain (costs off) select * from rlp where a = 1 or a = 7;
QUERY PLAN QUERY PLAN
@ -559,11 +557,7 @@ explain (costs off) select * from rlp where a > 20 and a < 27;
Filter: ((a > 20) AND (a < 27)) Filter: ((a > 20) AND (a < 27))
-> Seq Scan on rlp4_2 -> Seq Scan on rlp4_2
Filter: ((a > 20) AND (a < 27)) Filter: ((a > 20) AND (a < 27))
-> Seq Scan on rlp4_default (5 rows)
Filter: ((a > 20) AND (a < 27))
-> Seq Scan on rlp_default_default
Filter: ((a > 20) AND (a < 27))
(9 rows)
explain (costs off) select * from rlp where a = 29; explain (costs off) select * from rlp where a = 29;
QUERY PLAN QUERY PLAN
@ -588,6 +582,16 @@ explain (costs off) select * from rlp where a >= 29;
Filter: (a >= 29) Filter: (a >= 29)
(11 rows) (11 rows)
explain (costs off) select * from rlp where a < 1 or (a > 20 and a < 25);
QUERY PLAN
------------------------------------------------------
Append
-> Seq Scan on rlp1
Filter: ((a < 1) OR ((a > 20) AND (a < 25)))
-> Seq Scan on rlp4_1
Filter: ((a < 1) OR ((a > 20) AND (a < 25)))
(5 rows)
-- redundant clauses are eliminated -- redundant clauses are eliminated
explain (costs off) select * from rlp where a > 1 and a = 10; /* only default */ explain (costs off) select * from rlp where a > 1 and a = 10; /* only default */
QUERY PLAN QUERY PLAN

View File

@ -83,6 +83,7 @@ explain (costs off) select * from rlp where a = 1 or b = 'ab';
explain (costs off) select * from rlp where a > 20 and a < 27; explain (costs off) select * from rlp where a > 20 and a < 27;
explain (costs off) select * from rlp where a = 29; explain (costs off) select * from rlp where a = 29;
explain (costs off) select * from rlp where a >= 29; explain (costs off) select * from rlp where a >= 29;
explain (costs off) select * from rlp where a < 1 or (a > 20 and a < 25);
-- redundant clauses are eliminated -- redundant clauses are eliminated
explain (costs off) select * from rlp where a > 1 and a = 10; /* only default */ explain (costs off) select * from rlp where a > 1 and a = 10; /* only default */