diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c index 1746cb8793..746cd1e9d7 100644 --- a/src/backend/executor/execPartition.c +++ b/src/backend/executor/execPartition.c @@ -1323,16 +1323,14 @@ get_partition_for_tuple(PartitionDispatch pd, Datum *values, bool *isnull) { case PARTITION_STRATEGY_HASH: { - int greatest_modulus; uint64 rowHash; - greatest_modulus = get_hash_partition_greatest_modulus(boundinfo); rowHash = compute_partition_hash_value(key->partnatts, key->partsupfunc, key->partcollation, values, isnull); - part_index = boundinfo->indexes[rowHash % greatest_modulus]; + part_index = boundinfo->indexes[rowHash % boundinfo->nindexes]; } break; diff --git a/src/backend/partitioning/partbounds.c b/src/backend/partitioning/partbounds.c index b9aeb77bc2..0c3f212ff2 100644 --- a/src/backend/partitioning/partbounds.c +++ b/src/backend/partitioning/partbounds.c @@ -224,7 +224,6 @@ static int partition_range_bsearch(int partnatts, FmgrInfo *partsupfunc, Oid *partcollation, PartitionBoundInfo boundinfo, PartitionRangeBound *probe, int32 *cmpval); -static int get_partition_bound_num_indexes(PartitionBoundInfo b); static Expr *make_partition_op_expr(PartitionKey key, int keynum, uint16 strategy, Expr *arg1, Expr *arg2); static Oid get_partition_operator(PartitionKey key, int col, @@ -398,6 +397,7 @@ create_hash_bounds(PartitionBoundSpec **boundspecs, int nparts, boundinfo->ndatums = ndatums; boundinfo->datums = (Datum **) palloc0(ndatums * sizeof(Datum *)); + boundinfo->nindexes = greatest_modulus; boundinfo->indexes = (int *) palloc(greatest_modulus * sizeof(int)); for (i = 0; i < greatest_modulus; i++) boundinfo->indexes[i] = -1; @@ -530,6 +530,7 @@ create_list_bounds(PartitionBoundSpec **boundspecs, int nparts, boundinfo->ndatums = ndatums; boundinfo->datums = (Datum **) palloc0(ndatums * sizeof(Datum *)); + boundinfo->nindexes = ndatums; boundinfo->indexes = (int *) palloc(ndatums * sizeof(int)); /* @@ -725,8 +726,9 @@ create_range_bounds(PartitionBoundSpec **boundspecs, int nparts, /* * For range partitioning, an additional value of -1 is stored as the last - * element. + * element of the indexes[] array. */ + boundinfo->nindexes = ndatums + 1; boundinfo->indexes = (int *) palloc((ndatums + 1) * sizeof(int)); for (i = 0; i < ndatums; i++) @@ -807,45 +809,41 @@ partition_bounds_equal(int partnatts, int16 *parttyplen, bool *parttypbyval, if (b1->ndatums != b2->ndatums) return false; + if (b1->nindexes != b2->nindexes) + return false; + if (b1->null_index != b2->null_index) return false; if (b1->default_index != b2->default_index) return false; + /* For all partition strategies, the indexes[] arrays have to match */ + for (i = 0; i < b1->nindexes; i++) + { + if (b1->indexes[i] != b2->indexes[i]) + return false; + } + + /* Finally, compare the datums[] arrays */ if (b1->strategy == PARTITION_STRATEGY_HASH) { - int greatest_modulus = get_hash_partition_greatest_modulus(b1); - - /* - * If two hash partitioned tables have different greatest moduli, - * their partition schemes don't match. - */ - if (greatest_modulus != get_hash_partition_greatest_modulus(b2)) - return false; - /* * We arrange the partitions in the ascending order of their moduli * and remainders. Also every modulus is factor of next larger * modulus. Therefore we can safely store index of a given partition * in indexes array at remainder of that partition. Also entries at * (remainder + N * modulus) positions in indexes array are all same - * for (modulus, remainder) specification for any partition. Thus - * datums array from both the given bounds are same, if and only if - * their indexes array will be same. So, it suffices to compare - * indexes array. - */ - for (i = 0; i < greatest_modulus; i++) - if (b1->indexes[i] != b2->indexes[i]) - return false; - -#ifdef USE_ASSERT_CHECKING - - /* - * Nonetheless make sure that the bounds are indeed same when the + * for (modulus, remainder) specification for any partition. Thus the + * datums arrays from the given bounds are the same, if and only if + * their indexes arrays are the same. So, it suffices to compare the + * indexes arrays. + * + * Nonetheless make sure that the bounds are indeed the same when the * indexes match. Hash partition bound stores modulus and remainder * at b1->datums[i][0] and b1->datums[i][1] position respectively. */ +#ifdef USE_ASSERT_CHECKING for (i = 0; i < b1->ndatums; i++) Assert((b1->datums[i][0] == b2->datums[i][0] && b1->datums[i][1] == b2->datums[i][1])); @@ -891,15 +889,7 @@ partition_bounds_equal(int partnatts, int16 *parttyplen, bool *parttypbyval, parttypbyval[j], parttyplen[j])) return false; } - - if (b1->indexes[i] != b2->indexes[i]) - return false; } - - /* There are ndatums+1 indexes in case of range partitions */ - if (b1->strategy == PARTITION_STRATEGY_RANGE && - b1->indexes[i] != b2->indexes[i]) - return false; } return true; } @@ -920,8 +910,8 @@ partition_bounds_copy(PartitionBoundInfo src, PartitionBoundInfo dest; int i; int ndatums; + int nindexes; int partnatts; - int num_indexes; bool hash_part; int natts; @@ -929,10 +919,9 @@ partition_bounds_copy(PartitionBoundInfo src, dest->strategy = src->strategy; ndatums = dest->ndatums = src->ndatums; + nindexes = dest->nindexes = src->nindexes; partnatts = key->partnatts; - num_indexes = get_partition_bound_num_indexes(src); - /* List partitioned tables have only a single partition key. */ Assert(key->strategy != PARTITION_STRATEGY_LIST || partnatts == 1); @@ -990,8 +979,8 @@ partition_bounds_copy(PartitionBoundInfo src, } } - dest->indexes = (int *) palloc(sizeof(int) * num_indexes); - memcpy(dest->indexes, src->indexes, sizeof(int) * num_indexes); + dest->indexes = (int *) palloc(sizeof(int) * nindexes); + memcpy(dest->indexes, src->indexes, sizeof(int) * nindexes); dest->null_index = src->null_index; dest->default_index = src->default_index; @@ -2456,6 +2445,7 @@ build_merged_partition_bounds(char strategy, List *merged_datums, } Assert(list_length(merged_indexes) == ndatums); + merged_bounds->nindexes = ndatums; merged_bounds->indexes = (int *) palloc(sizeof(int) * ndatums); pos = 0; foreach(lc, merged_indexes) @@ -2889,7 +2879,7 @@ check_new_partition_bound(char *relname, Relation parent, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), errmsg("every hash partition modulus must be a factor of the next larger modulus"))); - greatest_modulus = get_hash_partition_greatest_modulus(boundinfo); + greatest_modulus = boundinfo->nindexes; remainder = spec->remainder; /* @@ -3282,18 +3272,15 @@ check_default_partition_contents(Relation parent, Relation default_rel, /* * get_hash_partition_greatest_modulus * - * Returns the greatest modulus of the hash partition bound. The greatest - * modulus will be at the end of the datums array because hash partitions are - * arranged in the ascending order of their moduli and remainders. + * Returns the greatest modulus of the hash partition bound. + * This is no longer used in the core code, but we keep it around + * in case external modules are using it. */ int get_hash_partition_greatest_modulus(PartitionBoundInfo bound) { Assert(bound && bound->strategy == PARTITION_STRATEGY_HASH); - Assert(bound->datums && bound->ndatums > 0); - Assert(DatumGetInt32(bound->datums[bound->ndatums - 1][0]) > 0); - - return DatumGetInt32(bound->datums[bound->ndatums - 1][0]); + return bound->nindexes; } /* @@ -3697,46 +3684,6 @@ qsort_partition_rbound_cmp(const void *a, const void *b, void *arg) b1, b2); } -/* - * get_partition_bound_num_indexes - * - * Returns the number of the entries in the partition bound indexes array. - */ -static int -get_partition_bound_num_indexes(PartitionBoundInfo bound) -{ - int num_indexes; - - Assert(bound); - - switch (bound->strategy) - { - case PARTITION_STRATEGY_HASH: - - /* - * The number of the entries in the indexes array is same as the - * greatest modulus. - */ - num_indexes = get_hash_partition_greatest_modulus(bound); - break; - - case PARTITION_STRATEGY_LIST: - num_indexes = bound->ndatums; - break; - - case PARTITION_STRATEGY_RANGE: - /* Range partitioned table has an extra index. */ - num_indexes = bound->ndatums + 1; - break; - - default: - elog(ERROR, "unexpected partition strategy: %d", - (int) bound->strategy); - } - - return num_indexes; -} - /* * get_partition_operator * diff --git a/src/backend/partitioning/partprune.c b/src/backend/partitioning/partprune.c index fac921eea5..42c7c5f554 100644 --- a/src/backend/partitioning/partprune.c +++ b/src/backend/partitioning/partprune.c @@ -781,7 +781,10 @@ get_matching_partitions(PartitionPruneContext *context, List *pruning_steps) scan_default = final_result->scan_default; while ((i = bms_next_member(final_result->bound_offsets, i)) >= 0) { - int partindex = context->boundinfo->indexes[i]; + int partindex; + + Assert(i < context->boundinfo->nindexes); + partindex = context->boundinfo->indexes[i]; if (partindex < 0) { @@ -2514,20 +2517,19 @@ get_matching_hash_bounds(PartitionPruneContext *context, for (i = 0; i < partnatts; i++) isnull[i] = bms_is_member(i, nullkeys); - greatest_modulus = get_hash_partition_greatest_modulus(boundinfo); rowHash = compute_partition_hash_value(partnatts, partsupfunc, partcollation, values, isnull); + greatest_modulus = boundinfo->nindexes; if (partindices[rowHash % greatest_modulus] >= 0) result->bound_offsets = bms_make_singleton(rowHash % greatest_modulus); } else { - /* Getting here means at least one hash partition exists. */ - Assert(boundinfo->ndatums > 0); + /* Report all valid offsets into the boundinfo->indexes array. */ result->bound_offsets = bms_add_range(NULL, 0, - boundinfo->ndatums - 1); + boundinfo->nindexes - 1); } /* @@ -3388,30 +3390,20 @@ perform_pruning_combine_step(PartitionPruneContext *context, PartitionPruneStepCombine *cstep, PruneStepResult **step_results) { - ListCell *lc1; - PruneStepResult *result = NULL; + PruneStepResult *result = (PruneStepResult *) palloc0(sizeof(PruneStepResult)); bool firststep; + ListCell *lc1; /* * A combine step without any source steps is an indication to not perform * any partition pruning. Return all datum indexes in that case. */ - result = (PruneStepResult *) palloc0(sizeof(PruneStepResult)); - if (list_length(cstep->source_stepids) == 0) + if (cstep->source_stepids == NIL) { PartitionBoundInfo boundinfo = context->boundinfo; - int rangemax; - - /* - * Add all valid offsets into the boundinfo->indexes array. For range - * partitioning, boundinfo->indexes contains (boundinfo->ndatums + 1) - * valid entries; otherwise there are boundinfo->ndatums. - */ - rangemax = context->strategy == PARTITION_STRATEGY_RANGE ? - boundinfo->ndatums : boundinfo->ndatums - 1; result->bound_offsets = - bms_add_range(result->bound_offsets, 0, rangemax); + bms_add_range(NULL, 0, boundinfo->nindexes - 1); result->scan_default = partition_bound_has_default(boundinfo); result->scan_null = partition_bound_accepts_nulls(boundinfo); return result; diff --git a/src/include/partitioning/partbounds.h b/src/include/partitioning/partbounds.h index 6bd330dd93..ebf3ff1f49 100644 --- a/src/include/partitioning/partbounds.h +++ b/src/include/partitioning/partbounds.h @@ -30,7 +30,7 @@ struct RelOptInfo; /* avoid including pathnodes.h here */ * In the case of range partitioning, ndatums will typically be far less than * 2 * nparts, because a partition's upper bound and the next partition's lower * bound are the same in most common cases, and we only store one of them (the - * upper bound). In case of hash partitioning, ndatums will be same as the + * upper bound). In case of hash partitioning, ndatums will be the same as the * number of partitions. * * For range and list partitioned tables, datums is an array of datum-tuples @@ -46,24 +46,31 @@ struct RelOptInfo; /* avoid including pathnodes.h here */ * the partition key's operator classes and collations. * * In the case of list partitioning, the indexes array stores one entry for - * every datum, which is the index of the partition that accepts a given datum. - * In case of range partitioning, it stores one entry per distinct range - * datum, which is the index of the partition for which a given datum - * is an upper bound. In the case of hash partitioning, the number of the - * entries in the indexes array is same as the greatest modulus amongst all - * partitions. For a given partition key datum-tuple, the index of the - * partition which would accept that datum-tuple would be given by the entry - * pointed by remainder produced when hash value of the datum-tuple is divided - * by the greatest modulus. + * each datum-array entry, which is the index of the partition that accepts + * rows matching that datum. So nindexes == ndatums. + * + * In the case of range partitioning, the indexes array stores one entry per + * distinct range datum, which is the index of the partition for which that + * datum is an upper bound (or -1 for a "gap" that has no partition). It is + * convenient to have an extra -1 entry representing values above the last + * range datum, so nindexes == ndatums + 1. + * + * In the case of hash partitioning, the number of entries in the indexes + * array is the same as the greatest modulus amongst all partitions (which + * is a multiple of all partition moduli), so nindexes == greatest modulus. + * The indexes array is indexed according to the hash key's remainder modulo + * the greatest modulus, and it contains either the partition index accepting + * that remainder, or -1 if there is no partition for that remainder. */ typedef struct PartitionBoundInfoData { char strategy; /* hash, list or range? */ - int ndatums; /* Length of the datums following array */ + int ndatums; /* Length of the datums[] array */ Datum **datums; PartitionRangeDatumKind **kind; /* The kind of each range bound datum; * NULL for hash and list partitioned * tables */ + int nindexes; /* Length of the indexes[] array */ int *indexes; /* Partition indexes */ int null_index; /* Index of the null-accepting partition; -1 * if there isn't one */ diff --git a/src/test/regress/expected/partition_prune.out b/src/test/regress/expected/partition_prune.out index c72a6d051f..bde29e38a9 100644 --- a/src/test/regress/expected/partition_prune.out +++ b/src/test/regress/expected/partition_prune.out @@ -1538,26 +1538,27 @@ drop table lp, coll_pruning, rlp, mc3p, mc2p, boolpart, boolrangep, rp, coll_pru -- result on different machines. See the definitions of -- part_part_test_int4_ops and part_test_text_ops in insert.sql. -- -create table hp (a int, b text) partition by hash (a part_test_int4_ops, b part_test_text_ops); +create table hp (a int, b text, c int) + partition by hash (a part_test_int4_ops, b part_test_text_ops); create table hp0 partition of hp for values with (modulus 4, remainder 0); create table hp3 partition of hp for values with (modulus 4, remainder 3); create table hp1 partition of hp for values with (modulus 4, remainder 1); create table hp2 partition of hp for values with (modulus 4, remainder 2); -insert into hp values (null, null); -insert into hp values (1, null); -insert into hp values (1, 'xxx'); -insert into hp values (null, 'xxx'); -insert into hp values (2, 'xxx'); -insert into hp values (1, 'abcde'); -select tableoid::regclass, * from hp order by 1; - tableoid | a | b -----------+---+------- - hp0 | | - hp0 | 1 | xxx - hp3 | 2 | xxx - hp1 | 1 | - hp2 | | xxx - hp2 | 1 | abcde +insert into hp values (null, null, 0); +insert into hp values (1, null, 1); +insert into hp values (1, 'xxx', 2); +insert into hp values (null, 'xxx', 3); +insert into hp values (2, 'xxx', 4); +insert into hp values (1, 'abcde', 5); +select tableoid::regclass, * from hp order by c; + tableoid | a | b | c +----------+---+-------+--- + hp0 | | | 0 + hp1 | 1 | | 1 + hp0 | 1 | xxx | 2 + hp2 | | xxx | 3 + hp3 | 2 | xxx | 4 + hp2 | 1 | abcde | 5 (6 rows) -- partial keys won't prune, nor would non-equality conditions @@ -1715,6 +1716,33 @@ explain (costs off) select * from hp where (a = 1 and b = 'abcde') or (a = 2 and Filter: (((a = 1) AND (b = 'abcde'::text)) OR ((a = 2) AND (b = 'xxx'::text)) OR ((a IS NULL) AND (b IS NULL))) (7 rows) +-- test pruning when not all the partitions exist +drop table hp1; +drop table hp3; +explain (costs off) select * from hp where a = 1 and b = 'abcde'; + QUERY PLAN +--------------------------------------------- + Seq Scan on hp2 hp + Filter: ((a = 1) AND (b = 'abcde'::text)) +(2 rows) + +explain (costs off) select * from hp where a = 1 and b = 'abcde' and + (c = 2 or c = 3); + QUERY PLAN +---------------------------------------------------------------------- + Seq Scan on hp2 hp + Filter: ((a = 1) AND (b = 'abcde'::text) AND ((c = 2) OR (c = 3))) +(2 rows) + +drop table hp2; +explain (costs off) select * from hp where a = 1 and b = 'abcde' and + (c = 2 or c = 3); + QUERY PLAN +-------------------------- + Result + One-Time Filter: false +(2 rows) + drop table hp; -- -- Test runtime partition pruning diff --git a/src/test/regress/sql/partition_prune.sql b/src/test/regress/sql/partition_prune.sql index ffd5fe8b0d..6ccb52ad1d 100644 --- a/src/test/regress/sql/partition_prune.sql +++ b/src/test/regress/sql/partition_prune.sql @@ -304,19 +304,20 @@ drop table lp, coll_pruning, rlp, mc3p, mc2p, boolpart, boolrangep, rp, coll_pru -- part_part_test_int4_ops and part_test_text_ops in insert.sql. -- -create table hp (a int, b text) partition by hash (a part_test_int4_ops, b part_test_text_ops); +create table hp (a int, b text, c int) + partition by hash (a part_test_int4_ops, b part_test_text_ops); create table hp0 partition of hp for values with (modulus 4, remainder 0); create table hp3 partition of hp for values with (modulus 4, remainder 3); create table hp1 partition of hp for values with (modulus 4, remainder 1); create table hp2 partition of hp for values with (modulus 4, remainder 2); -insert into hp values (null, null); -insert into hp values (1, null); -insert into hp values (1, 'xxx'); -insert into hp values (null, 'xxx'); -insert into hp values (2, 'xxx'); -insert into hp values (1, 'abcde'); -select tableoid::regclass, * from hp order by 1; +insert into hp values (null, null, 0); +insert into hp values (1, null, 1); +insert into hp values (1, 'xxx', 2); +insert into hp values (null, 'xxx', 3); +insert into hp values (2, 'xxx', 4); +insert into hp values (1, 'abcde', 5); +select tableoid::regclass, * from hp order by c; -- partial keys won't prune, nor would non-equality conditions explain (costs off) select * from hp where a = 1; @@ -337,6 +338,16 @@ explain (costs off) select * from hp where a = 2 and b = 'xxx'; explain (costs off) select * from hp where a = 1 and b = 'abcde'; explain (costs off) select * from hp where (a = 1 and b = 'abcde') or (a = 2 and b = 'xxx') or (a is null and b is null); +-- test pruning when not all the partitions exist +drop table hp1; +drop table hp3; +explain (costs off) select * from hp where a = 1 and b = 'abcde'; +explain (costs off) select * from hp where a = 1 and b = 'abcde' and + (c = 2 or c = 3); +drop table hp2; +explain (costs off) select * from hp where a = 1 and b = 'abcde' and + (c = 2 or c = 3); + drop table hp; --