diff --git a/doc/src/sgml/ddl.sgml b/doc/src/sgml/ddl.sgml index 3f41939bea..daba66c187 100644 --- a/doc/src/sgml/ddl.sgml +++ b/doc/src/sgml/ddl.sgml @@ -2875,6 +2875,19 @@ VALUES ('Albany', NULL, NULL, 'NY'); + + + Hash Partitioning + + + + The table is partitioned by specifying a modulus and a remainder for + each partition. Each partition will hold the rows for which the hash + value of the partition key divided by the specified modulus will + produce the specified remainder. + + + If your application needs to use other forms of partitioning not listed @@ -2901,9 +2914,8 @@ VALUES ('Albany', NULL, NULL, 'NY'); All rows inserted into a partitioned table will be routed to one of the partitions based on the value of the partition key. Each partition has a subset of the data defined by its - partition bounds. Currently supported - partitioning methods include range and list, where each partition is - assigned a range of keys and a list of keys, respectively. + partition bounds. The currently supported + partitioning methods are range, list, and hash. @@ -3328,11 +3340,11 @@ ALTER TABLE measurement ATTACH PARTITION measurement_y2008m02 - Declarative partitioning only supports list and range partitioning, - whereas table inheritance allows data to be divided in a manner of - the user's choosing. (Note, however, that if constraint exclusion is - unable to prune partitions effectively, query performance will be very - poor.) + Declarative partitioning only supports range, list and hash + partitioning, whereas table inheritance allows data to be divided in a + manner of the user's choosing. (Note, however, that if constraint + exclusion is unable to prune partitions effectively, query performance + will be very poor.) diff --git a/doc/src/sgml/ref/alter_table.sgml b/doc/src/sgml/ref/alter_table.sgml index 41acda003f..3b19ea7131 100644 --- a/doc/src/sgml/ref/alter_table.sgml +++ b/doc/src/sgml/ref/alter_table.sgml @@ -1431,6 +1431,13 @@ ALTER TABLE cities ATTACH PARTITION cities_partdef DEFAULT; + + Attach a partition to hash partitioned table: + +ALTER TABLE orders + ATTACH PARTITION orders_p4 FOR VALUES WITH (MODULUS 4, REMAINDER 3); + + Detach a partition from partitioned table: diff --git a/doc/src/sgml/ref/create_table.sgml b/doc/src/sgml/ref/create_table.sgml index 4f7b741526..bbb3a51def 100644 --- a/doc/src/sgml/ref/create_table.sgml +++ b/doc/src/sgml/ref/create_table.sgml @@ -28,7 +28,7 @@ CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXI [, ... ] ] ) [ INHERITS ( parent_table [, ... ] ) ] -[ PARTITION BY { RANGE | LIST } ( { column_name | ( expression ) } [ COLLATE collation ] [ opclass ] [, ... ] ) ] +[ PARTITION BY { RANGE | LIST | HASH } ( { column_name | ( expression ) } [ COLLATE collation ] [ opclass ] [, ... ] ) ] [ WITH ( storage_parameter [= value] [, ... ] ) | WITH OIDS | WITHOUT OIDS ] [ ON COMMIT { PRESERVE ROWS | DELETE ROWS | DROP } ] [ TABLESPACE tablespace_name ] @@ -39,7 +39,7 @@ CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXI | table_constraint } [, ... ] ) ] -[ PARTITION BY { RANGE | LIST } ( { column_name | ( expression ) } [ COLLATE collation ] [ opclass ] [, ... ] ) ] +[ PARTITION BY { RANGE | LIST | HASH } ( { column_name | ( expression ) } [ COLLATE collation ] [ opclass ] [, ... ] ) ] [ WITH ( storage_parameter [= value] [, ... ] ) | WITH OIDS | WITHOUT OIDS ] [ ON COMMIT { PRESERVE ROWS | DELETE ROWS | DROP } ] [ TABLESPACE tablespace_name ] @@ -50,7 +50,7 @@ CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXI | table_constraint } [, ... ] ) ] { FOR VALUES partition_bound_spec | DEFAULT } -[ PARTITION BY { RANGE | LIST } ( { column_name | ( expression ) } [ COLLATE collation ] [ opclass ] [, ... ] ) ] +[ PARTITION BY { RANGE | LIST | HASH } ( { column_name | ( expression ) } [ COLLATE collation ] [ opclass ] [, ... ] ) ] [ WITH ( storage_parameter [= value] [, ... ] ) | WITH OIDS | WITHOUT OIDS ] [ ON COMMIT { PRESERVE ROWS | DELETE ROWS | DROP } ] [ TABLESPACE tablespace_name ] @@ -88,7 +88,8 @@ CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXI IN ( { numeric_literal | string_literal | NULL } [, ...] ) | FROM ( { numeric_literal | string_literal | MINVALUE | MAXVALUE } [, ...] ) - TO ( { numeric_literal | string_literal | MINVALUE | MAXVALUE } [, ...] ) + TO ( { numeric_literal | string_literal | MINVALUE | MAXVALUE } [, ...] ) | +WITH ( MODULUS numeric_literal, REMAINDER numeric_literal ) index_parameters in UNIQUE, PRIMARY KEY, and EXCLUDE constraints are: @@ -256,7 +257,8 @@ FROM ( { numeric_literal | partition of the specified parent table. The table can be created either as a partition for specific values using FOR VALUES or as a default partition - using DEFAULT. + using DEFAULT. This option is not available for + hash-partitioned tables. @@ -264,8 +266,9 @@ FROM ( { numeric_literal | IN is used for list partitioning, - while the form with FROM and TO is used for - range partitioning. + the form with FROM and TO is used + for range partitioning, and the form with WITH is used + for hash partitioning. @@ -363,6 +366,29 @@ FROM ( { numeric_literal | + + When creating a hash partition, a modulus and remainder must be specified. + The modulus must be a positive integer, and the remainder must be a + non-negative integer less than the modulus. Typically, when initially + setting up a hash-partitioned table, you should choose a modulus equal to + the number of partitions and assign every table the same modulus and a + different remainder (see examples, below). However, it is not required + that every partition have the same modulus, only that every modulus which + occurs among the partitions of a hash-partitioned table is a factor of the + next larger modulus. This allows the number of partitions to be increased + incrementally without needing to move all the data at once. For example, + suppose you have a hash-partitioned table with 8 partitions, each of which + has modulus 8, but find it necessary to increase the number of partitions + to 16. You can detach one of the modulus-8 partitions, create two new + modulus-16 partitions covering the same portion of the key space (one with + a remainder equal to the remainder of the detached partition, and the + other with a remainder equal to that value plus 8), and repopulate them + with data. You can then repeat this -- perhaps at a later time -- for + each modulus-8 partition until none remain. While this may still involve + a large amount of data movement at each step, it is still better than + having to create a whole new table and move all the data at once. + + A partition must have the same column names and types as the partitioned table to which it belongs. If the parent is specified WITH @@ -486,20 +512,28 @@ FROM ( { numeric_literal | - PARTITION BY { RANGE | LIST } ( { column_name | ( expression ) } [ opclass ] [, ...] ) + PARTITION BY { RANGE | LIST | HASH } ( { column_name | ( expression ) } [ opclass ] [, ...] ) The optional PARTITION BY clause specifies a strategy of partitioning the table. The table thus created is called a partitioned table. The parenthesized list of columns or expressions forms the partition key - for the table. When using range partitioning, the partition key can - include multiple columns or expressions (up to 32, but this limit can be - altered when building PostgreSQL), but for + for the table. When using range or hash partitioning, the partition key + can include multiple columns or expressions (up to 32, but this limit can + be altered when building PostgreSQL), but for list partitioning, the partition key must consist of a single column or - expression. If no B-tree operator class is specified when creating a - partitioned table, the default B-tree operator class for the datatype will - be used. If there is none, an error will be reported. + expression. + + + + Range and list partitioning require a btree operator class, while hash + partitioning requires a hash operator class. If no operator class is + specified explicitly, the default operator class of the appropriate + type will be used; if no default operator class exists, an error will + be raised. When hash partitioning is used, the operator class used + must implement support function 2 (see + for details). @@ -1647,6 +1681,16 @@ CREATE TABLE cities ( name text not null, population bigint ) PARTITION BY LIST (left(lower(name), 1)); + + + + Create a hash partitioned table: + +CREATE TABLE orders ( + order_id bigint not null, + cust_id bigint not null, + status text +) PARTITION BY HASH (order_id); @@ -1701,6 +1745,19 @@ CREATE TABLE cities_ab_10000_to_100000 PARTITION OF cities_ab FOR VALUES FROM (10000) TO (100000); + + Create partitions of a hash partitioned table: + +CREATE TABLE orders_p1 PARTITION OF orders + FOR VALUES WITH (MODULUS 4, REMAINDER 0); +CREATE TABLE orders_p2 PARTITION OF orders + FOR VALUES WITH (MODULUS 4, REMAINDER 1); +CREATE TABLE orders_p3 PARTITION OF orders + FOR VALUES WITH (MODULUS 4, REMAINDER 2); +CREATE TABLE orders_p4 PARTITION OF orders + FOR VALUES WITH (MODULUS 4, REMAINDER 3); + + Create a default partition: diff --git a/src/backend/catalog/partition.c b/src/backend/catalog/partition.c index 5daa8a1c19..cff59ed055 100644 --- a/src/backend/catalog/partition.c +++ b/src/backend/catalog/partition.c @@ -15,6 +15,7 @@ #include "postgres.h" +#include "access/hash.h" #include "access/heapam.h" #include "access/htup_details.h" #include "access/nbtree.h" @@ -46,6 +47,7 @@ #include "utils/datum.h" #include "utils/memutils.h" #include "utils/fmgroids.h" +#include "utils/hashutils.h" #include "utils/inval.h" #include "utils/lsyscache.h" #include "utils/rel.h" @@ -61,26 +63,35 @@ * In the case of range partitioning, ndatums will typically be far less than * 2 * nparts, because a partition's upper bound and the next partition's lower * bound are the same in most common cases, and we only store one of them (the - * upper bound). + * upper bound). In case of hash partitioning, ndatums will be same as the + * number of partitions. + * + * For range and list partitioned tables, datums is an array of datum-tuples + * with key->partnatts datums each. For hash partitioned tables, it is an array + * of datum-tuples with 2 datums, modulus and remainder, corresponding to a + * given partition. * * In the case of list partitioning, the indexes array stores one entry for * every datum, which is the index of the partition that accepts a given datum. * In case of range partitioning, it stores one entry per distinct range * datum, which is the index of the partition for which a given datum - * is an upper bound. + * is an upper bound. In the case of hash partitioning, the number of the + * entries in the indexes array is same as the greatest modulus amongst all + * partitions. For a given partition key datum-tuple, the index of the + * partition which would accept that datum-tuple would be given by the entry + * pointed by remainder produced when hash value of the datum-tuple is divided + * by the greatest modulus. */ typedef struct PartitionBoundInfoData { - char strategy; /* list or range bounds? */ + char strategy; /* hash, list or range? */ int ndatums; /* Length of the datums following array */ - Datum **datums; /* Array of datum-tuples with key->partnatts - * datums each */ + Datum **datums; PartitionRangeDatumKind **kind; /* The kind of each range bound datum; - * NULL for list partitioned tables */ - int *indexes; /* Partition indexes; one entry per member of - * the datums array (plus one if range - * partitioned table) */ + * NULL for hash and list partitioned + * tables */ + int *indexes; /* Partition indexes */ int null_index; /* Index of the null-accepting partition; -1 * if there isn't one */ int default_index; /* Index of the default partition; -1 if there @@ -95,6 +106,14 @@ typedef struct PartitionBoundInfoData * is represented with one of the following structs. */ +/* One bound of a hash partition */ +typedef struct PartitionHashBound +{ + int modulus; + int remainder; + int index; +} PartitionHashBound; + /* One value coming from some (index'th) list partition */ typedef struct PartitionListValue { @@ -111,6 +130,7 @@ typedef struct PartitionRangeBound bool lower; /* this is the lower (vs upper) bound */ } PartitionRangeBound; +static int32 qsort_partition_hbound_cmp(const void *a, const void *b); static int32 qsort_partition_list_value_cmp(const void *a, const void *b, void *arg); static int32 qsort_partition_rbound_cmp(const void *a, const void *b, @@ -126,6 +146,7 @@ static void get_range_key_properties(PartitionKey key, int keynum, ListCell **partexprs_item, Expr **keyCol, Const **lower_val, Const **upper_val); +static List *get_qual_for_hash(Relation parent, PartitionBoundSpec *spec); static List *get_qual_for_list(Relation parent, PartitionBoundSpec *spec); static List *get_qual_for_range(Relation parent, PartitionBoundSpec *spec, bool for_default); @@ -134,6 +155,8 @@ static List *generate_partition_qual(Relation rel); static PartitionRangeBound *make_one_range_bound(PartitionKey key, int index, List *datums, bool lower); +static int32 partition_hbound_cmp(int modulus1, int remainder1, int modulus2, + int remainder2); static int32 partition_rbound_cmp(PartitionKey key, Datum *datums1, PartitionRangeDatumKind *kind1, bool lower1, PartitionRangeBound *b2); @@ -149,6 +172,12 @@ static int partition_bound_bsearch(PartitionKey key, void *probe, bool probe_is_bound, bool *is_equal); static void get_partition_dispatch_recurse(Relation rel, Relation parent, List **pds, List **leaf_part_oids); +static int get_partition_bound_num_indexes(PartitionBoundInfo b); +static int get_greatest_modulus(PartitionBoundInfo b); +static uint64 compute_hash_value(PartitionKey key, Datum *values, bool *isnull); + +/* SQL-callable function for use in hash partition CHECK constraints */ +PG_FUNCTION_INFO_V1(satisfies_hash_partition); /* * RelationBuildPartitionDesc @@ -174,6 +203,9 @@ RelationBuildPartitionDesc(Relation rel) int ndatums = 0; int default_index = -1; + /* Hash partitioning specific */ + PartitionHashBound **hbounds = NULL; + /* List partitioning specific */ PartitionListValue **all_values = NULL; int null_index = -1; @@ -255,7 +287,35 @@ RelationBuildPartitionDesc(Relation rel) oids[i++] = lfirst_oid(cell); /* Convert from node to the internal representation */ - if (key->strategy == PARTITION_STRATEGY_LIST) + if (key->strategy == PARTITION_STRATEGY_HASH) + { + ndatums = nparts; + hbounds = (PartitionHashBound **) + palloc(nparts * sizeof(PartitionHashBound *)); + + i = 0; + foreach(cell, boundspecs) + { + PartitionBoundSpec *spec = castNode(PartitionBoundSpec, + lfirst(cell)); + + if (spec->strategy != PARTITION_STRATEGY_HASH) + elog(ERROR, "invalid strategy in partition bound spec"); + + hbounds[i] = (PartitionHashBound *) + palloc(sizeof(PartitionHashBound)); + + hbounds[i]->modulus = spec->modulus; + hbounds[i]->remainder = spec->remainder; + hbounds[i]->index = i; + i++; + } + + /* Sort all the bounds in ascending order */ + qsort(hbounds, nparts, sizeof(PartitionHashBound *), + qsort_partition_hbound_cmp); + } + else if (key->strategy == PARTITION_STRATEGY_LIST) { List *non_null_values = NIL; @@ -484,6 +544,42 @@ RelationBuildPartitionDesc(Relation rel) switch (key->strategy) { + case PARTITION_STRATEGY_HASH: + { + /* Modulus are stored in ascending order */ + int greatest_modulus = hbounds[ndatums - 1]->modulus; + + boundinfo->indexes = (int *) palloc(greatest_modulus * + sizeof(int)); + + for (i = 0; i < greatest_modulus; i++) + boundinfo->indexes[i] = -1; + + for (i = 0; i < nparts; i++) + { + int modulus = hbounds[i]->modulus; + int remainder = hbounds[i]->remainder; + + boundinfo->datums[i] = (Datum *) palloc(2 * + sizeof(Datum)); + boundinfo->datums[i][0] = Int32GetDatum(modulus); + boundinfo->datums[i][1] = Int32GetDatum(remainder); + + while (remainder < greatest_modulus) + { + /* overlap? */ + Assert(boundinfo->indexes[remainder] == -1); + boundinfo->indexes[remainder] = i; + remainder += modulus; + } + + mapping[hbounds[i]->index] = i; + pfree(hbounds[i]); + } + pfree(hbounds); + break; + } + case PARTITION_STRATEGY_LIST: { boundinfo->indexes = (int *) palloc(ndatums * sizeof(int)); @@ -617,8 +713,7 @@ RelationBuildPartitionDesc(Relation rel) * Now assign OIDs from the original array into mapped indexes of the * result array. Order of OIDs in the former is defined by the * catalog scan that retrieved them, whereas that in the latter is - * defined by canonicalized representation of the list values or the - * range bounds. + * defined by canonicalized representation of the partition bounds. */ for (i = 0; i < nparts; i++) result->oids[mapping[i]] = oids[i]; @@ -655,49 +750,97 @@ partition_bounds_equal(int partnatts, int16 *parttyplen, bool *parttypbyval, if (b1->default_index != b2->default_index) return false; - for (i = 0; i < b1->ndatums; i++) + if (b1->strategy == PARTITION_STRATEGY_HASH) { - int j; + int greatest_modulus; - for (j = 0; j < partnatts; j++) + /* + * If two hash partitioned tables have different greatest moduli, + * their partition schemes don't match. For hash partitioned table, + * the greatest modulus is given by the last datum and number of + * partitions is given by ndatums. + */ + if (b1->datums[b1->ndatums - 1][0] != b2->datums[b2->ndatums - 1][0]) + return false; + + /* + * We arrange the partitions in the ascending order of their modulus + * and remainders. Also every modulus is factor of next larger + * modulus. Therefore we can safely store index of a given partition + * in indexes array at remainder of that partition. Also entries at + * (remainder + N * modulus) positions in indexes array are all same + * for (modulus, remainder) specification for any partition. Thus + * datums array from both the given bounds are same, if and only if + * their indexes array will be same. So, it suffices to compare + * indexes array. + */ + greatest_modulus = get_greatest_modulus(b1); + for (i = 0; i < greatest_modulus; i++) + if (b1->indexes[i] != b2->indexes[i]) + return false; + +#ifdef USE_ASSERT_CHECKING + + /* + * Nonetheless make sure that the bounds are indeed same when the + * indexes match. Hash partition bound stores modulus and remainder + * at b1->datums[i][0] and b1->datums[i][1] position respectively. + */ + for (i = 0; i < b1->ndatums; i++) + Assert((b1->datums[i][0] == b2->datums[i][0] && + b1->datums[i][1] == b2->datums[i][1])); +#endif + } + else + { + for (i = 0; i < b1->ndatums; i++) { - /* For range partitions, the bounds might not be finite. */ - if (b1->kind != NULL) - { - /* The different kinds of bound all differ from each other */ - if (b1->kind[i][j] != b2->kind[i][j]) - return false; + int j; - /* Non-finite bounds are equal without further examination. */ - if (b1->kind[i][j] != PARTITION_RANGE_DATUM_VALUE) - continue; + for (j = 0; j < partnatts; j++) + { + /* For range partitions, the bounds might not be finite. */ + if (b1->kind != NULL) + { + /* The different kinds of bound all differ from each other */ + if (b1->kind[i][j] != b2->kind[i][j]) + return false; + + /* + * Non-finite bounds are equal without further + * examination. + */ + if (b1->kind[i][j] != PARTITION_RANGE_DATUM_VALUE) + continue; + } + + /* + * Compare the actual values. Note that it would be both + * incorrect and unsafe to invoke the comparison operator + * derived from the partitioning specification here. It would + * be incorrect because we want the relcache entry to be + * updated for ANY change to the partition bounds, not just + * those that the partitioning operator thinks are + * significant. It would be unsafe because we might reach + * this code in the context of an aborted transaction, and an + * arbitrary partitioning operator might not be safe in that + * context. datumIsEqual() should be simple enough to be + * safe. + */ + if (!datumIsEqual(b1->datums[i][j], b2->datums[i][j], + parttypbyval[j], parttyplen[j])) + return false; } - /* - * Compare the actual values. Note that it would be both incorrect - * and unsafe to invoke the comparison operator derived from the - * partitioning specification here. It would be incorrect because - * we want the relcache entry to be updated for ANY change to the - * partition bounds, not just those that the partitioning operator - * thinks are significant. It would be unsafe because we might - * reach this code in the context of an aborted transaction, and - * an arbitrary partitioning operator might not be safe in that - * context. datumIsEqual() should be simple enough to be safe. - */ - if (!datumIsEqual(b1->datums[i][j], b2->datums[i][j], - parttypbyval[j], parttyplen[j])) + if (b1->indexes[i] != b2->indexes[i]) return false; } - if (b1->indexes[i] != b2->indexes[i]) + /* There are ndatums+1 indexes in case of range partitions */ + if (b1->strategy == PARTITION_STRATEGY_RANGE && + b1->indexes[i] != b2->indexes[i]) return false; } - - /* There are ndatums+1 indexes in case of range partitions */ - if (b1->strategy == PARTITION_STRATEGY_RANGE && - b1->indexes[i] != b2->indexes[i]) - return false; - return true; } @@ -709,11 +852,11 @@ extern PartitionBoundInfo partition_bounds_copy(PartitionBoundInfo src, PartitionKey key) { - PartitionBoundInfo dest; - int i; - int ndatums; - int partnatts; - int num_indexes; + PartitionBoundInfo dest; + int i; + int ndatums; + int partnatts; + int num_indexes; dest = (PartitionBoundInfo) palloc(sizeof(PartitionBoundInfoData)); @@ -721,8 +864,7 @@ partition_bounds_copy(PartitionBoundInfo src, ndatums = dest->ndatums = src->ndatums; partnatts = key->partnatts; - /* Range partitioned table has an extra index. */ - num_indexes = key->strategy == PARTITION_STRATEGY_RANGE ? ndatums + 1 : ndatums; + num_indexes = get_partition_bound_num_indexes(src); /* List partitioned tables have only a single partition key. */ Assert(key->strategy != PARTITION_STRATEGY_LIST || partnatts == 1); @@ -732,11 +874,11 @@ partition_bounds_copy(PartitionBoundInfo src, if (src->kind != NULL) { dest->kind = (PartitionRangeDatumKind **) palloc(ndatums * - sizeof(PartitionRangeDatumKind *)); + sizeof(PartitionRangeDatumKind *)); for (i = 0; i < ndatums; i++) { dest->kind[i] = (PartitionRangeDatumKind *) palloc(partnatts * - sizeof(PartitionRangeDatumKind)); + sizeof(PartitionRangeDatumKind)); memcpy(dest->kind[i], src->kind[i], sizeof(PartitionRangeDatumKind) * key->partnatts); @@ -747,16 +889,37 @@ partition_bounds_copy(PartitionBoundInfo src, for (i = 0; i < ndatums; i++) { - int j; - dest->datums[i] = (Datum *) palloc(sizeof(Datum) * partnatts); + int j; - for (j = 0; j < partnatts; j++) + /* + * For a corresponding to hash partition, datums array will have two + * elements - modulus and remainder. + */ + bool hash_part = (key->strategy == PARTITION_STRATEGY_HASH); + int natts = hash_part ? 2 : partnatts; + + dest->datums[i] = (Datum *) palloc(sizeof(Datum) * natts); + + for (j = 0; j < natts; j++) { + bool byval; + int typlen; + + if (hash_part) + { + typlen = sizeof(int32); /* Always int4 */ + byval = true; /* int4 is pass-by-value */ + } + else + { + byval = key->parttypbyval[j]; + typlen = key->parttyplen[j]; + } + if (dest->kind == NULL || dest->kind[i][j] == PARTITION_RANGE_DATUM_VALUE) dest->datums[i][j] = datumCopy(src->datums[i][j], - key->parttypbyval[j], - key->parttyplen[j]); + byval, typlen); } } @@ -801,6 +964,89 @@ check_new_partition_bound(char *relname, Relation parent, switch (key->strategy) { + case PARTITION_STRATEGY_HASH: + { + Assert(spec->strategy == PARTITION_STRATEGY_HASH); + Assert(spec->remainder >= 0 && spec->remainder < spec->modulus); + + if (partdesc->nparts > 0) + { + PartitionBoundInfo boundinfo = partdesc->boundinfo; + Datum **datums = boundinfo->datums; + int ndatums = boundinfo->ndatums; + int greatest_modulus; + int remainder; + int offset; + bool equal, + valid_modulus = true; + int prev_modulus, /* Previous largest modulus */ + next_modulus; /* Next largest modulus */ + + /* + * Check rule that every modulus must be a factor of the + * next larger modulus. For example, if you have a bunch + * of partitions that all have modulus 5, you can add a + * new partition with modulus 10 or a new partition with + * modulus 15, but you cannot add both a partition with + * modulus 10 and a partition with modulus 15, because 10 + * is not a factor of 15. + * + * Get greatest bound in array boundinfo->datums which is + * less than or equal to spec->modulus and + * spec->remainder. + */ + offset = partition_bound_bsearch(key, boundinfo, spec, + true, &equal); + if (offset < 0) + { + next_modulus = DatumGetInt32(datums[0][0]); + valid_modulus = (next_modulus % spec->modulus) == 0; + } + else + { + prev_modulus = DatumGetInt32(datums[offset][0]); + valid_modulus = (spec->modulus % prev_modulus) == 0; + + if (valid_modulus && (offset + 1) < ndatums) + { + next_modulus = DatumGetInt32(datums[offset + 1][0]); + valid_modulus = (next_modulus % spec->modulus) == 0; + } + } + + if (!valid_modulus) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("every hash partition modulus must be a factor of the next larger modulus"))); + + greatest_modulus = get_greatest_modulus(boundinfo); + remainder = spec->remainder; + + /* + * Normally, the lowest remainder that could conflict with + * the new partition is equal to the remainder specified + * for the new partition, but when the new partition has a + * modulus higher than any used so far, we need to adjust. + */ + if (remainder >= greatest_modulus) + remainder = remainder % greatest_modulus; + + /* Check every potentially-conflicting remainder. */ + do + { + if (boundinfo->indexes[remainder] != -1) + { + overlap = true; + with = boundinfo->indexes[remainder]; + break; + } + remainder += spec->modulus; + } while (remainder < greatest_modulus); + } + + break; + } + case PARTITION_STRATEGY_LIST: { Assert(spec->strategy == PARTITION_STRATEGY_LIST); @@ -1171,6 +1417,11 @@ get_qual_from_partbound(Relation rel, Relation parent, switch (key->strategy) { + case PARTITION_STRATEGY_HASH: + Assert(spec->strategy == PARTITION_STRATEGY_HASH); + my_qual = get_qual_for_hash(parent, spec); + break; + case PARTITION_STRATEGY_LIST: Assert(spec->strategy == PARTITION_STRATEGY_LIST); my_qual = get_qual_for_list(parent, spec); @@ -1541,6 +1792,92 @@ make_partition_op_expr(PartitionKey key, int keynum, return result; } +/* + * get_qual_for_hash + * + * Given a list of partition columns, modulus and remainder corresponding to a + * partition, this function returns CHECK constraint expression Node for that + * partition. + * + * The partition constraint for a hash partition is always a call to the + * built-in function satisfies_hash_partition(). The first two arguments are + * the modulus and remainder for the partition; the remaining arguments are the + * values to be hashed. + */ +static List * +get_qual_for_hash(Relation parent, PartitionBoundSpec *spec) +{ + PartitionKey key = RelationGetPartitionKey(parent); + FuncExpr *fexpr; + Node *relidConst; + Node *modulusConst; + Node *remainderConst; + List *args; + ListCell *partexprs_item; + int i; + + /* Fixed arguments. */ + relidConst = (Node *) makeConst(OIDOID, + -1, + InvalidOid, + sizeof(Oid), + ObjectIdGetDatum(RelationGetRelid(parent)), + false, + true); + + modulusConst = (Node *) makeConst(INT4OID, + -1, + InvalidOid, + sizeof(int32), + Int32GetDatum(spec->modulus), + false, + true); + + remainderConst = (Node *) makeConst(INT4OID, + -1, + InvalidOid, + sizeof(int32), + Int32GetDatum(spec->remainder), + false, + true); + + args = list_make3(relidConst, modulusConst, remainderConst); + partexprs_item = list_head(key->partexprs); + + /* Add an argument for each key column. */ + for (i = 0; i < key->partnatts; i++) + { + Node *keyCol; + + /* Left operand */ + if (key->partattrs[i] != 0) + { + keyCol = (Node *) makeVar(1, + key->partattrs[i], + key->parttypid[i], + key->parttypmod[i], + key->parttypcoll[i], + 0); + } + else + { + keyCol = (Node *) copyObject(lfirst(partexprs_item)); + partexprs_item = lnext(partexprs_item); + } + + args = lappend(args, keyCol); + } + + fexpr = makeFuncExpr(F_SATISFIES_HASH_PARTITION, + BOOLOID, + args, + InvalidOid, + InvalidOid, + COERCE_EXPLICIT_CALL); + + return list_make1(fexpr); +} + /* * get_qual_for_list * @@ -2412,6 +2749,17 @@ get_partition_for_tuple(PartitionDispatch *pd, /* Route as appropriate based on partitioning strategy. */ switch (key->strategy) { + case PARTITION_STRATEGY_HASH: + { + PartitionBoundInfo boundinfo = partdesc->boundinfo; + int greatest_modulus = get_greatest_modulus(boundinfo); + uint64 rowHash = compute_hash_value(key, values, + isnull); + + cur_index = boundinfo->indexes[rowHash % greatest_modulus]; + } + break; + case PARTITION_STRATEGY_LIST: if (isnull[0]) @@ -2524,6 +2872,38 @@ error_exit: return result; } +/* + * qsort_partition_hbound_cmp + * + * We sort hash bounds by modulus, then by remainder. + */ +static int32 +qsort_partition_hbound_cmp(const void *a, const void *b) +{ + PartitionHashBound *h1 = (*(PartitionHashBound *const *) a); + PartitionHashBound *h2 = (*(PartitionHashBound *const *) b); + + return partition_hbound_cmp(h1->modulus, h1->remainder, + h2->modulus, h2->remainder); +} + +/* + * partition_hbound_cmp + * + * Compares modulus first, then remainder if modulus are equal. + */ +static int32 +partition_hbound_cmp(int modulus1, int remainder1, int modulus2, int remainder2) +{ + if (modulus1 < modulus2) + return -1; + if (modulus1 > modulus2) + return 1; + if (modulus1 == modulus2 && remainder1 != remainder2) + return (remainder1 > remainder2) ? 1 : -1; + return 0; +} + /* * qsort_partition_list_value_cmp * @@ -2710,6 +3090,15 @@ partition_bound_cmp(PartitionKey key, PartitionBoundInfo boundinfo, switch (key->strategy) { + case PARTITION_STRATEGY_HASH: + { + PartitionBoundSpec *spec = (PartitionBoundSpec *) probe; + + cmpval = partition_hbound_cmp(DatumGetInt32(bound_datums[0]), + DatumGetInt32(bound_datums[1]), + spec->modulus, spec->remainder); + break; + } case PARTITION_STRATEGY_LIST: cmpval = DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[0], key->partcollation[0], @@ -2894,3 +3283,182 @@ get_proposed_default_constraint(List *new_part_constraints) return list_make1(defPartConstraint); } + +/* + * get_partition_bound_num_indexes + * + * Returns the number of the entries in the partition bound indexes array. + */ +static int +get_partition_bound_num_indexes(PartitionBoundInfo bound) +{ + int num_indexes; + + Assert(bound); + + switch (bound->strategy) + { + case PARTITION_STRATEGY_HASH: + + /* + * The number of the entries in the indexes array is same as the + * greatest modulus. + */ + num_indexes = get_greatest_modulus(bound); + break; + + case PARTITION_STRATEGY_LIST: + num_indexes = bound->ndatums; + break; + + case PARTITION_STRATEGY_RANGE: + /* Range partitioned table has an extra index. */ + num_indexes = bound->ndatums + 1; + break; + + default: + elog(ERROR, "unexpected partition strategy: %d", + (int) bound->strategy); + } + + return num_indexes; +} + +/* + * get_greatest_modulus + * + * Returns the greatest modulus of the hash partition bound. The greatest + * modulus will be at the end of the datums array because hash partitions are + * arranged in the ascending order of their modulus and remainders. + */ +static int +get_greatest_modulus(PartitionBoundInfo bound) +{ + Assert(bound && bound->strategy == PARTITION_STRATEGY_HASH); + Assert(bound->datums && bound->ndatums > 0); + Assert(DatumGetInt32(bound->datums[bound->ndatums - 1][0]) > 0); + + return DatumGetInt32(bound->datums[bound->ndatums - 1][0]); +} + +/* + * compute_hash_value + * + * Compute the hash value for given not null partition key values. + */ +static uint64 +compute_hash_value(PartitionKey key, Datum *values, bool *isnull) +{ + int i; + int nkeys = key->partnatts; + uint64 rowHash = 0; + Datum seed = UInt64GetDatum(HASH_PARTITION_SEED); + + for (i = 0; i < nkeys; i++) + { + if (!isnull[i]) + { + Datum hash; + + Assert(OidIsValid(key->partsupfunc[i].fn_oid)); + + /* + * Compute hash for each datum value by calling respective + * datatype-specific hash functions of each partition key + * attribute. + */ + hash = FunctionCall2(&key->partsupfunc[i], values[i], seed); + + /* Form a single 64-bit hash value */ + rowHash = hash_combine64(rowHash, DatumGetUInt64(hash)); + } + } + + return rowHash; +} + +/* + * satisfies_hash_partition + * + * This is a SQL-callable function for use in hash partition constraints takes + * an already computed hash values of each partition key attribute, and combine + * them into a single hash value by calling hash_combine64. + * + * Returns true if remainder produced when this computed single hash value is + * divided by the given modulus is equal to given remainder, otherwise false. + * + * See get_qual_for_hash() for usage. + */ +Datum +satisfies_hash_partition(PG_FUNCTION_ARGS) +{ + typedef struct ColumnsHashData + { + Oid relid; + int16 nkeys; + FmgrInfo partsupfunc[PARTITION_MAX_KEYS]; + } ColumnsHashData; + Oid parentId = PG_GETARG_OID(0); + int modulus = PG_GETARG_INT32(1); + int remainder = PG_GETARG_INT32(2); + short nkeys = PG_NARGS() - 3; + int i; + Datum seed = UInt64GetDatum(HASH_PARTITION_SEED); + ColumnsHashData *my_extra; + uint64 rowHash = 0; + + /* + * Cache hash function information. + */ + my_extra = (ColumnsHashData *) fcinfo->flinfo->fn_extra; + if (my_extra == NULL || my_extra->nkeys != nkeys || + my_extra->relid != parentId) + { + Relation parent; + PartitionKey key; + int j; + + fcinfo->flinfo->fn_extra = + MemoryContextAllocZero(fcinfo->flinfo->fn_mcxt, + offsetof(ColumnsHashData, partsupfunc) + + sizeof(FmgrInfo) * nkeys); + my_extra = (ColumnsHashData *) fcinfo->flinfo->fn_extra; + my_extra->nkeys = nkeys; + my_extra->relid = parentId; + + /* Open parent relation and fetch partition keyinfo */ + parent = heap_open(parentId, AccessShareLock); + key = RelationGetPartitionKey(parent); + + Assert(key->partnatts == nkeys); + for (j = 0; j < nkeys; ++j) + fmgr_info_copy(&my_extra->partsupfunc[j], + key->partsupfunc, + fcinfo->flinfo->fn_mcxt); + + /* Hold lock until commit */ + heap_close(parent, NoLock); + } + + for (i = 0; i < nkeys; i++) + { + /* keys start from fourth argument of function. */ + int argno = i + 3; + + if (!PG_ARGISNULL(argno)) + { + Datum hash; + + Assert(OidIsValid(my_extra->partsupfunc[i].fn_oid)); + + hash = FunctionCall2(&my_extra->partsupfunc[i], + PG_GETARG_DATUM(argno), + seed); + + /* Form a single 64-bit hash value */ + rowHash = hash_combine64(rowHash, DatumGetUInt64(hash)); + } + } + + PG_RETURN_BOOL(rowHash % modulus == remainder); +} diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index b7ddb335d2..165b165d55 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -471,7 +471,7 @@ static void RangeVarCallbackForAlterRelation(const RangeVar *rv, Oid relid, static bool is_partition_attr(Relation rel, AttrNumber attnum, bool *used_in_expr); static PartitionSpec *transformPartitionSpec(Relation rel, PartitionSpec *partspec, char *strategy); static void ComputePartitionAttrs(Relation rel, List *partParams, AttrNumber *partattrs, - List **partexprs, Oid *partopclass, Oid *partcollation); + List **partexprs, Oid *partopclass, Oid *partcollation, char strategy); static void CreateInheritance(Relation child_rel, Relation parent_rel); static void RemoveInheritance(Relation child_rel, Relation parent_rel); static ObjectAddress ATExecAttachPartition(List **wqueue, Relation rel, @@ -894,7 +894,7 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId, ComputePartitionAttrs(rel, stmt->partspec->partParams, partattrs, &partexprs, partopclass, - partcollation); + partcollation, strategy); StorePartitionKey(rel, strategy, partnatts, partattrs, partexprs, partopclass, partcollation); @@ -13337,7 +13337,9 @@ transformPartitionSpec(Relation rel, PartitionSpec *partspec, char *strategy) newspec->location = partspec->location; /* Parse partitioning strategy name */ - if (pg_strcasecmp(partspec->strategy, "list") == 0) + if (pg_strcasecmp(partspec->strategy, "hash") == 0) + *strategy = PARTITION_STRATEGY_HASH; + else if (pg_strcasecmp(partspec->strategy, "list") == 0) *strategy = PARTITION_STRATEGY_LIST; else if (pg_strcasecmp(partspec->strategy, "range") == 0) *strategy = PARTITION_STRATEGY_RANGE; @@ -13407,10 +13409,12 @@ transformPartitionSpec(Relation rel, PartitionSpec *partspec, char *strategy) */ static void ComputePartitionAttrs(Relation rel, List *partParams, AttrNumber *partattrs, - List **partexprs, Oid *partopclass, Oid *partcollation) + List **partexprs, Oid *partopclass, Oid *partcollation, + char strategy) { int attn; ListCell *lc; + Oid am_oid; attn = 0; foreach(lc, partParams) @@ -13570,25 +13574,41 @@ ComputePartitionAttrs(Relation rel, List *partParams, AttrNumber *partattrs, partcollation[attn] = attcollation; /* - * Identify a btree opclass to use. Currently, we use only btree - * operators, which seems enough for list and range partitioning. + * Identify the appropriate operator class. For list and range + * partitioning, we use a btree operator class; hash partitioning uses + * a hash operator class. */ + if (strategy == PARTITION_STRATEGY_HASH) + am_oid = HASH_AM_OID; + else + am_oid = BTREE_AM_OID; + if (!pelem->opclass) { - partopclass[attn] = GetDefaultOpClass(atttype, BTREE_AM_OID); + partopclass[attn] = GetDefaultOpClass(atttype, am_oid); if (!OidIsValid(partopclass[attn])) - ereport(ERROR, - (errcode(ERRCODE_UNDEFINED_OBJECT), - errmsg("data type %s has no default btree operator class", - format_type_be(atttype)), - errhint("You must specify a btree operator class or define a default btree operator class for the data type."))); + { + if (strategy == PARTITION_STRATEGY_HASH) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("data type %s has no default hash operator class", + format_type_be(atttype)), + errhint("You must specify a hash operator class or define a default hash operator class for the data type."))); + else + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("data type %s has no default btree operator class", + format_type_be(atttype)), + errhint("You must specify a btree operator class or define a default btree operator class for the data type."))); + + } } else partopclass[attn] = ResolveOpClass(pelem->opclass, atttype, - "btree", - BTREE_AM_OID); + am_oid == HASH_AM_OID ? "hash" : "btree", + am_oid); attn++; } diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index c1a83ca909..cadd253ef1 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -4461,6 +4461,8 @@ _copyPartitionBoundSpec(const PartitionBoundSpec *from) COPY_SCALAR_FIELD(strategy); COPY_SCALAR_FIELD(is_default); + COPY_SCALAR_FIELD(modulus); + COPY_SCALAR_FIELD(remainder); COPY_NODE_FIELD(listdatums); COPY_NODE_FIELD(lowerdatums); COPY_NODE_FIELD(upperdatums); diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 7a700018e7..2866fd7b4a 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -2848,6 +2848,8 @@ _equalPartitionBoundSpec(const PartitionBoundSpec *a, const PartitionBoundSpec * { COMPARE_SCALAR_FIELD(strategy); COMPARE_SCALAR_FIELD(is_default); + COMPARE_SCALAR_FIELD(modulus); + COMPARE_SCALAR_FIELD(remainder); COMPARE_NODE_FIELD(listdatums); COMPARE_NODE_FIELD(lowerdatums); COMPARE_NODE_FIELD(upperdatums); diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 43d62062bc..291d1eeb46 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -3578,6 +3578,8 @@ _outPartitionBoundSpec(StringInfo str, const PartitionBoundSpec *node) WRITE_CHAR_FIELD(strategy); WRITE_BOOL_FIELD(is_default); + WRITE_INT_FIELD(modulus); + WRITE_INT_FIELD(remainder); WRITE_NODE_FIELD(listdatums); WRITE_NODE_FIELD(lowerdatums); WRITE_NODE_FIELD(upperdatums); diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index ccb6a1f4ac..42c595dc03 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -2397,6 +2397,8 @@ _readPartitionBoundSpec(void) READ_CHAR_FIELD(strategy); READ_BOOL_FIELD(is_default); + READ_INT_FIELD(modulus); + READ_INT_FIELD(remainder); READ_NODE_FIELD(listdatums); READ_NODE_FIELD(lowerdatums); READ_NODE_FIELD(upperdatums); diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index 244708ad5a..453f25964a 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -1463,7 +1463,7 @@ have_partkey_equi_join(RelOptInfo *rel1, RelOptInfo *rel2, JoinType jointype, continue; /* Skip clauses which are not equality conditions. */ - if (!rinfo->mergeopfamilies) + if (!rinfo->mergeopfamilies && !OidIsValid(rinfo->hashjoinoperator)) continue; opexpr = (OpExpr *) rinfo->clause; @@ -1515,8 +1515,14 @@ have_partkey_equi_join(RelOptInfo *rel1, RelOptInfo *rel2, JoinType jointype, * The clause allows partition-wise join if only it uses the same * operator family as that specified by the partition key. */ - if (!list_member_oid(rinfo->mergeopfamilies, - part_scheme->partopfamily[ipk1])) + if (rel1->part_scheme->strategy == PARTITION_STRATEGY_HASH) + { + if (!op_in_opfamily(rinfo->hashjoinoperator, + part_scheme->partopfamily[ipk1])) + continue; + } + else if (!list_member_oid(rinfo->mergeopfamilies, + part_scheme->partopfamily[ipk1])) continue; /* Mark the partition key as having an equi-join clause. */ diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 09b9a899e4..c301ca465d 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -579,7 +579,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); %type part_params %type PartitionBoundSpec %type partbound_datum PartitionRangeDatum -%type partbound_datum_list range_datum_list +%type hash_partbound partbound_datum_list range_datum_list +%type hash_partbound_elem /* * Non-keyword token types. These are hard-wired into the "flex" lexer. @@ -2638,8 +2639,61 @@ alter_identity_column_option: ; PartitionBoundSpec: + /* a HASH partition*/ + FOR VALUES WITH '(' hash_partbound ')' + { + ListCell *lc; + PartitionBoundSpec *n = makeNode(PartitionBoundSpec); + + n->strategy = PARTITION_STRATEGY_HASH; + n->modulus = n->remainder = -1; + + foreach (lc, $5) + { + DefElem *opt = lfirst_node(DefElem, lc); + + if (strcmp(opt->defname, "modulus") == 0) + { + if (n->modulus != -1) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("modulus for hash partition provided more than once"), + parser_errposition(opt->location))); + n->modulus = defGetInt32(opt); + } + else if (strcmp(opt->defname, "remainder") == 0) + { + if (n->remainder != -1) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("remainder for hash partition provided more than once"), + parser_errposition(opt->location))); + n->remainder = defGetInt32(opt); + } + else + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("unrecognized hash partition bound specification \"%s\"", + opt->defname), + parser_errposition(opt->location))); + } + + if (n->modulus == -1) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("modulus for hash partition must be specified"))); + if (n->remainder == -1) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("remainder for hash partition must be specified"))); + + n->location = @3; + + $$ = n; + } + /* a LIST partition */ - FOR VALUES IN_P '(' partbound_datum_list ')' + | FOR VALUES IN_P '(' partbound_datum_list ')' { PartitionBoundSpec *n = makeNode(PartitionBoundSpec); @@ -2677,6 +2731,24 @@ PartitionBoundSpec: } ; +hash_partbound_elem: + NonReservedWord Iconst + { + $$ = makeDefElem($1, (Node *)makeInteger($2), @1); + } + ; + +hash_partbound: + hash_partbound_elem + { + $$ = list_make1($1); + } + | hash_partbound ',' hash_partbound_elem + { + $$ = lappend($1, $3); + } + ; + partbound_datum: Sconst { $$ = makeStringConst($1, @1); } | NumericOnly { $$ = makeAConst($1, @1); } diff --git a/src/backend/parser/parse_utilcmd.c b/src/backend/parser/parse_utilcmd.c index 30fc2d9ff8..8461da490a 100644 --- a/src/backend/parser/parse_utilcmd.c +++ b/src/backend/parser/parse_utilcmd.c @@ -3310,6 +3310,11 @@ transformPartitionBound(ParseState *pstate, Relation parent, if (spec->is_default) { + if (strategy == PARTITION_STRATEGY_HASH) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("a hash-partitioned table may not have a default partition"))); + /* * In case of the default partition, parser had no way to identify the * partition strategy. Assign the parent's strategy to the default @@ -3320,7 +3325,27 @@ transformPartitionBound(ParseState *pstate, Relation parent, return result_spec; } - if (strategy == PARTITION_STRATEGY_LIST) + if (strategy == PARTITION_STRATEGY_HASH) + { + if (spec->strategy != PARTITION_STRATEGY_HASH) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("invalid bound specification for a hash partition"), + parser_errposition(pstate, exprLocation((Node *) spec)))); + + if (spec->modulus <= 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("modulus for hash partition must be a positive integer"))); + + Assert(spec->remainder >= 0); + + if (spec->remainder >= spec->modulus) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("remainder for hash partition must be less than modulus"))); + } + else if (strategy == PARTITION_STRATEGY_LIST) { ListCell *cell; char *colname; @@ -3485,7 +3510,7 @@ transformPartitionBound(ParseState *pstate, Relation parent, static void validateInfiniteBounds(ParseState *pstate, List *blist) { - ListCell *lc; + ListCell *lc; PartitionRangeDatumKind kind = PARTITION_RANGE_DATUM_VALUE; foreach(lc, blist) diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index 752cef09e6..b543b7046c 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -1551,7 +1551,7 @@ pg_get_statisticsobj_worker(Oid statextid, bool missing_ok) * * Returns the partition key specification, ie, the following: * - * PARTITION BY { RANGE | LIST } (column opt_collation opt_opclass [, ...]) + * PARTITION BY { RANGE | LIST | HASH } (column opt_collation opt_opclass [, ...]) */ Datum pg_get_partkeydef(PG_FUNCTION_ARGS) @@ -1655,6 +1655,10 @@ pg_get_partkeydef_worker(Oid relid, int prettyFlags, switch (form->partstrat) { + case PARTITION_STRATEGY_HASH: + if (!attrsOnly) + appendStringInfo(&buf, "HASH"); + break; case PARTITION_STRATEGY_LIST: if (!attrsOnly) appendStringInfoString(&buf, "LIST"); @@ -8711,6 +8715,15 @@ get_rule_expr(Node *node, deparse_context *context, switch (spec->strategy) { + case PARTITION_STRATEGY_HASH: + Assert(spec->modulus > 0 && spec->remainder >= 0); + Assert(spec->modulus > spec->remainder); + + appendStringInfoString(buf, "FOR VALUES"); + appendStringInfo(buf, " WITH (modulus %d, remainder %d)", + spec->modulus, spec->remainder); + break; + case PARTITION_STRATEGY_LIST: Assert(spec->listdatums != NIL); diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index a31b68a8d5..1908420d82 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -30,6 +30,7 @@ #include #include +#include "access/hash.h" #include "access/htup_details.h" #include "access/multixact.h" #include "access/nbtree.h" @@ -833,6 +834,7 @@ RelationBuildPartitionKey(Relation relation) Datum datum; MemoryContext partkeycxt, oldcxt; + int16 procnum; tuple = SearchSysCache1(PARTRELID, ObjectIdGetDatum(RelationGetRelid(relation))); @@ -912,6 +914,10 @@ RelationBuildPartitionKey(Relation relation) key->parttypalign = (char *) palloc0(key->partnatts * sizeof(char)); key->parttypcoll = (Oid *) palloc0(key->partnatts * sizeof(Oid)); + /* For the hash partitioning, an extended hash function will be used. */ + procnum = (key->strategy == PARTITION_STRATEGY_HASH) ? + HASHEXTENDED_PROC : BTORDER_PROC; + /* Copy partattrs and fill other per-attribute info */ memcpy(key->partattrs, attrs, key->partnatts * sizeof(int16)); partexprs_item = list_head(key->partexprs); @@ -932,18 +938,20 @@ RelationBuildPartitionKey(Relation relation) key->partopfamily[i] = opclassform->opcfamily; key->partopcintype[i] = opclassform->opcintype; - /* - * A btree support function covers the cases of list and range methods - * currently supported. - */ + /* Get a support function for the specified opfamily and datatypes */ funcid = get_opfamily_proc(opclassform->opcfamily, opclassform->opcintype, opclassform->opcintype, - BTORDER_PROC); - if (!OidIsValid(funcid)) /* should not happen */ - elog(ERROR, "missing support function %d(%u,%u) in opfamily %u", - BTORDER_PROC, opclassform->opcintype, opclassform->opcintype, - opclassform->opcfamily); + procnum); + if (!OidIsValid(funcid)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("operator class \"%s\" of access method %s is missing support function %d for data type \"%s\"", + NameStr(opclassform->opcname), + (key->strategy == PARTITION_STRATEGY_HASH) ? + "hash" : "btree", + procnum, + format_type_be(opclassform->opcintype)))); fmgr_info(funcid, &key->partsupfunc[i]); diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c index a09c49d6cf..b3e3799c13 100644 --- a/src/bin/psql/tab-complete.c +++ b/src/bin/psql/tab-complete.c @@ -2055,7 +2055,7 @@ psql_completion(const char *text, int start, int end) else if (TailMatches3("ATTACH", "PARTITION", MatchAny)) COMPLETE_WITH_LIST2("FOR VALUES", "DEFAULT"); else if (TailMatches2("FOR", "VALUES")) - COMPLETE_WITH_LIST2("FROM (", "IN ("); + COMPLETE_WITH_LIST3("FROM (", "IN (", "WITH ("); /* * If we have ALTER TABLE DETACH PARTITION, provide a list of diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 39c70b415a..bd4014a69d 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 201711091 +#define CATALOG_VERSION_NO 201711092 #endif diff --git a/src/include/catalog/partition.h b/src/include/catalog/partition.h index 945ac0239d..8acc01a876 100644 --- a/src/include/catalog/partition.h +++ b/src/include/catalog/partition.h @@ -19,6 +19,9 @@ #include "parser/parse_node.h" #include "utils/rel.h" +/* Seed for the extended hash function */ +#define HASH_PARTITION_SEED UINT64CONST(0x7A5B22367996DCFD) + /* * PartitionBoundInfo encapsulates a set of partition bounds. It is usually * associated with partitioned tables as part of its partition descriptor. diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index 5e3e7228d6..0330c04f16 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -5522,6 +5522,10 @@ DESCR("list files in the log directory"); DATA(insert OID = 3354 ( pg_ls_waldir PGNSP PGUID 12 10 20 0 0 f f f f t t v s 0 0 2249 "" "{25,20,1184}" "{o,o,o}" "{name,size,modification}" _null_ _null_ pg_ls_waldir _null_ _null_ _null_ )); DESCR("list of files in the WAL directory"); +/* hash partitioning constraint function */ +DATA(insert OID = 5028 ( satisfies_hash_partition PGNSP PGUID 12 1 0 2276 0 f f f f f f i s 4 0 16 "26 23 23 2276" _null_ _null_ _null_ _null_ _null_ satisfies_hash_partition _null_ _null_ _null_ )); +DESCR("hash partition CHECK constraint"); + /* * Symbolic values for provolatile column: these indicate whether the result * of a function is dependent *only* on the values of its explicit arguments, diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index a240c271db..34d6afc80f 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -777,12 +777,14 @@ typedef struct PartitionElem typedef struct PartitionSpec { NodeTag type; - char *strategy; /* partitioning strategy ('list' or 'range') */ + char *strategy; /* partitioning strategy ('hash', 'list' or + * 'range') */ List *partParams; /* List of PartitionElems */ int location; /* token location, or -1 if unknown */ } PartitionSpec; /* Internal codes for partitioning strategies */ +#define PARTITION_STRATEGY_HASH 'h' #define PARTITION_STRATEGY_LIST 'l' #define PARTITION_STRATEGY_RANGE 'r' @@ -799,6 +801,10 @@ typedef struct PartitionBoundSpec char strategy; /* see PARTITION_STRATEGY codes above */ bool is_default; /* is it a default partition bound? */ + /* Partitioning info for HASH strategy: */ + int modulus; + int remainder; + /* Partitioning info for LIST strategy: */ List *listdatums; /* List of Consts (or A_Consts in raw tree) */ diff --git a/src/test/regress/expected/alter_table.out b/src/test/regress/expected/alter_table.out index ee1f10c8e0..11f0baa11b 100644 --- a/src/test/regress/expected/alter_table.out +++ b/src/test/regress/expected/alter_table.out @@ -3399,6 +3399,7 @@ SELECT conislocal, coninhcount FROM pg_constraint WHERE conrelid = 'part_1'::reg CREATE TABLE fail_part (LIKE part_1 INCLUDING CONSTRAINTS); ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1); ERROR: partition "fail_part" would overlap partition "part_1" +DROP TABLE fail_part; -- check that an existing table can be attached as a default partition CREATE TABLE def_part (LIKE list_parted INCLUDING CONSTRAINTS); ALTER TABLE list_parted ATTACH PARTITION def_part DEFAULT; @@ -3596,6 +3597,59 @@ CREATE TABLE quuux1 PARTITION OF quuux FOR VALUES IN (1); CREATE TABLE quuux2 PARTITION OF quuux FOR VALUES IN (2); INFO: updated partition constraint for default partition "quuux_default1" is implied by existing constraints DROP TABLE quuux; +-- check validation when attaching hash partitions +-- The default hash functions as they exist today aren't portable; they can +-- return different results on different machines. Depending upon how the +-- values are hashed, the row may map to different partitions, which result in +-- regression failure. To avoid this, let's create a non-default hash function +-- that just returns the input value unchanged. +CREATE OR REPLACE FUNCTION dummy_hashint4(a int4, seed int8) RETURNS int8 AS +$$ BEGIN RETURN (a + 1 + seed); END; $$ LANGUAGE 'plpgsql' IMMUTABLE; +CREATE OPERATOR CLASS custom_opclass FOR TYPE int4 USING HASH AS +OPERATOR 1 = , FUNCTION 2 dummy_hashint4(int4, int8); +-- check that the new partition won't overlap with an existing partition +CREATE TABLE hash_parted ( + a int, + b int +) PARTITION BY HASH (a custom_opclass); +CREATE TABLE hpart_1 PARTITION OF hash_parted FOR VALUES WITH (MODULUS 4, REMAINDER 0); +CREATE TABLE fail_part (LIKE hpart_1); +ALTER TABLE hash_parted ATTACH PARTITION fail_part FOR VALUES WITH (MODULUS 8, REMAINDER 4); +ERROR: partition "fail_part" would overlap partition "hpart_1" +ALTER TABLE hash_parted ATTACH PARTITION fail_part FOR VALUES WITH (MODULUS 8, REMAINDER 0); +ERROR: partition "fail_part" would overlap partition "hpart_1" +DROP TABLE fail_part; +-- check validation when attaching hash partitions +-- check that violating rows are correctly reported +CREATE TABLE hpart_2 (LIKE hash_parted); +INSERT INTO hpart_2 VALUES (3, 0); +ALTER TABLE hash_parted ATTACH PARTITION hpart_2 FOR VALUES WITH (MODULUS 4, REMAINDER 1); +ERROR: partition constraint is violated by some row +-- should be ok after deleting the bad row +DELETE FROM hpart_2; +ALTER TABLE hash_parted ATTACH PARTITION hpart_2 FOR VALUES WITH (MODULUS 4, REMAINDER 1); +-- check that leaf partitions are scanned when attaching a partitioned +-- table +CREATE TABLE hpart_5 ( + LIKE hash_parted +) PARTITION BY LIST (b); +-- check that violating rows are correctly reported +CREATE TABLE hpart_5_a PARTITION OF hpart_5 FOR VALUES IN ('1', '2', '3'); +INSERT INTO hpart_5_a (a, b) VALUES (7, 1); +ALTER TABLE hash_parted ATTACH PARTITION hpart_5 FOR VALUES WITH (MODULUS 4, REMAINDER 2); +ERROR: partition constraint is violated by some row +-- should be ok after deleting the bad row +DELETE FROM hpart_5_a; +ALTER TABLE hash_parted ATTACH PARTITION hpart_5 FOR VALUES WITH (MODULUS 4, REMAINDER 2); +-- check that the table being attach is with valid modulus and remainder value +CREATE TABLE fail_part(LIKE hash_parted); +ALTER TABLE hash_parted ATTACH PARTITION fail_part FOR VALUES WITH (MODULUS 0, REMAINDER 1); +ERROR: modulus for hash partition must be a positive integer +ALTER TABLE hash_parted ATTACH PARTITION fail_part FOR VALUES WITH (MODULUS 8, REMAINDER 8); +ERROR: remainder for hash partition must be less than modulus +ALTER TABLE hash_parted ATTACH PARTITION fail_part FOR VALUES WITH (MODULUS 3, REMAINDER 2); +ERROR: every hash partition modulus must be a factor of the next larger modulus +DROP TABLE fail_part; -- -- DETACH PARTITION -- @@ -3607,12 +3661,17 @@ DROP TABLE regular_table; -- check that the partition being detached exists at all ALTER TABLE list_parted2 DETACH PARTITION part_4; ERROR: relation "part_4" does not exist +ALTER TABLE hash_parted DETACH PARTITION hpart_4; +ERROR: relation "hpart_4" does not exist -- check that the partition being detached is actually a partition of the parent CREATE TABLE not_a_part (a int); ALTER TABLE list_parted2 DETACH PARTITION not_a_part; ERROR: relation "not_a_part" is not a partition of relation "list_parted2" ALTER TABLE list_parted2 DETACH PARTITION part_1; ERROR: relation "part_1" is not a partition of relation "list_parted2" +ALTER TABLE hash_parted DETACH PARTITION not_a_part; +ERROR: relation "not_a_part" is not a partition of relation "hash_parted" +DROP TABLE not_a_part; -- check that, after being detached, attinhcount/coninhcount is dropped to 0 and -- attislocal/conislocal is set to true ALTER TABLE list_parted2 DETACH PARTITION part_3_4; @@ -3716,6 +3775,9 @@ SELECT * FROM list_parted; -- cleanup DROP TABLE list_parted, list_parted2, range_parted; DROP TABLE fail_def_part; +DROP TABLE hash_parted; +DROP OPERATOR CLASS custom_opclass USING HASH; +DROP FUNCTION dummy_hashint4(a int4, seed int8); -- more tests for certain multi-level partitioning scenarios create table p (a int, b int) partition by range (a, b); create table p1 (b int, a int not null) partition by range (b); diff --git a/src/test/regress/expected/create_table.out b/src/test/regress/expected/create_table.out index 60ab28a96a..335cd37e18 100644 --- a/src/test/regress/expected/create_table.out +++ b/src/test/regress/expected/create_table.out @@ -340,11 +340,11 @@ CREATE TABLE partitioned ( ) PARTITION BY RANGE (const_func()); ERROR: cannot use constant expression as partition key DROP FUNCTION const_func(); --- only accept "list" and "range" as partitioning strategy +-- only accept valid partitioning strategy CREATE TABLE partitioned ( - a int -) PARTITION BY HASH (a); -ERROR: unrecognized partitioning strategy "hash" + a int +) PARTITION BY MAGIC (a); +ERROR: unrecognized partitioning strategy "magic" -- specified column must be present in the table CREATE TABLE partitioned ( a int @@ -467,6 +467,11 @@ CREATE TABLE fail_part PARTITION OF list_parted FOR VALUES FROM (1) TO (2); ERROR: invalid bound specification for a list partition LINE 1: ...BLE fail_part PARTITION OF list_parted FOR VALUES FROM (1) T... ^ +-- trying to specify modulus and remainder for list partitioned table +CREATE TABLE fail_part PARTITION OF list_parted FOR VALUES WITH (MODULUS 10, REMAINDER 1); +ERROR: invalid bound specification for a list partition +LINE 1: ...BLE fail_part PARTITION OF list_parted FOR VALUES WITH (MODU... + ^ -- check default partition cannot be created more than once CREATE TABLE part_default PARTITION OF list_parted DEFAULT; CREATE TABLE fail_default_part PARTITION OF list_parted DEFAULT; @@ -509,6 +514,11 @@ CREATE TABLE fail_part PARTITION OF range_parted FOR VALUES IN ('a'); ERROR: invalid bound specification for a range partition LINE 1: ...BLE fail_part PARTITION OF range_parted FOR VALUES IN ('a'); ^ +-- trying to specify modulus and remainder for range partitioned table +CREATE TABLE fail_part PARTITION OF range_parted FOR VALUES WITH (MODULUS 10, REMAINDER 1); +ERROR: invalid bound specification for a range partition +LINE 1: ...LE fail_part PARTITION OF range_parted FOR VALUES WITH (MODU... + ^ -- each of start and end bounds must have same number of values as the -- length of the partition key CREATE TABLE fail_part PARTITION OF range_parted FOR VALUES FROM ('a', 1) TO ('z'); @@ -518,6 +528,37 @@ ERROR: TO must specify exactly one value per partitioning column -- cannot specify null values in range bounds CREATE TABLE fail_part PARTITION OF range_parted FOR VALUES FROM (null) TO (maxvalue); ERROR: cannot specify NULL in range bound +-- trying to specify modulus and remainder for range partitioned table +CREATE TABLE fail_part PARTITION OF range_parted FOR VALUES WITH (MODULUS 10, REMAINDER 1); +ERROR: invalid bound specification for a range partition +LINE 1: ...LE fail_part PARTITION OF range_parted FOR VALUES WITH (MODU... + ^ +-- check partition bound syntax for the hash partition +CREATE TABLE hash_parted ( + a int +) PARTITION BY HASH (a); +CREATE TABLE hpart_1 PARTITION OF hash_parted FOR VALUES WITH (MODULUS 10, REMAINDER 0); +CREATE TABLE hpart_2 PARTITION OF hash_parted FOR VALUES WITH (MODULUS 50, REMAINDER 1); +CREATE TABLE hpart_3 PARTITION OF hash_parted FOR VALUES WITH (MODULUS 200, REMAINDER 2); +-- modulus 25 is factor of modulus of 50 but 10 is not factor of 25. +CREATE TABLE fail_part PARTITION OF hash_parted FOR VALUES WITH (MODULUS 25, REMAINDER 3); +ERROR: every hash partition modulus must be a factor of the next larger modulus +-- previous modulus 50 is factor of 150 but this modulus is not factor of next modulus 200. +CREATE TABLE fail_part PARTITION OF hash_parted FOR VALUES WITH (MODULUS 150, REMAINDER 3); +ERROR: every hash partition modulus must be a factor of the next larger modulus +-- trying to specify range for the hash partitioned table +CREATE TABLE fail_part PARTITION OF hash_parted FOR VALUES FROM ('a', 1) TO ('z'); +ERROR: invalid bound specification for a hash partition +LINE 1: ...BLE fail_part PARTITION OF hash_parted FOR VALUES FROM ('a',... + ^ +-- trying to specify list value for the hash partitioned table +CREATE TABLE fail_part PARTITION OF hash_parted FOR VALUES IN (1000); +ERROR: invalid bound specification for a hash partition +LINE 1: ...BLE fail_part PARTITION OF hash_parted FOR VALUES IN (1000); + ^ +-- trying to create default partition for the hash partitioned table +CREATE TABLE fail_default_part PARTITION OF hash_parted DEFAULT; +ERROR: a hash-partitioned table may not have a default partition -- check if compatible with the specified parent -- cannot create as partition of a non-partitioned table CREATE TABLE unparted ( @@ -525,6 +566,8 @@ CREATE TABLE unparted ( ); CREATE TABLE fail_part PARTITION OF unparted FOR VALUES IN ('a'); ERROR: "unparted" is not partitioned +CREATE TABLE fail_part PARTITION OF unparted FOR VALUES WITH (MODULUS 2, REMAINDER 1); +ERROR: "unparted" is not partitioned DROP TABLE unparted; -- cannot create a permanent rel as partition of a temp rel CREATE TEMP TABLE temp_parted ( @@ -623,6 +666,23 @@ CREATE TABLE range3_default PARTITION OF range_parted3 DEFAULT; -- more specific ranges CREATE TABLE fail_part PARTITION OF range_parted3 FOR VALUES FROM (1, minvalue) TO (1, maxvalue); ERROR: partition "fail_part" would overlap partition "part10" +-- check for partition bound overlap and other invalid specifications for the hash partition +CREATE TABLE hash_parted2 ( + a varchar +) PARTITION BY HASH (a); +CREATE TABLE h2part_1 PARTITION OF hash_parted2 FOR VALUES WITH (MODULUS 4, REMAINDER 2); +CREATE TABLE h2part_2 PARTITION OF hash_parted2 FOR VALUES WITH (MODULUS 8, REMAINDER 0); +CREATE TABLE h2part_3 PARTITION OF hash_parted2 FOR VALUES WITH (MODULUS 8, REMAINDER 4); +CREATE TABLE h2part_4 PARTITION OF hash_parted2 FOR VALUES WITH (MODULUS 8, REMAINDER 5); +-- overlap with part_4 +CREATE TABLE fail_part PARTITION OF hash_parted2 FOR VALUES WITH (MODULUS 2, REMAINDER 1); +ERROR: partition "fail_part" would overlap partition "h2part_4" +-- modulus must be greater than zero +CREATE TABLE fail_part PARTITION OF hash_parted2 FOR VALUES WITH (MODULUS 0, REMAINDER 1); +ERROR: modulus for hash partition must be a positive integer +-- remainder must be greater than or equal to zero and less than modulus +CREATE TABLE fail_part PARTITION OF hash_parted2 FOR VALUES WITH (MODULUS 8, REMAINDER 8); +ERROR: remainder for hash partition must be less than modulus -- check schema propagation from parent CREATE TABLE parted ( a text, @@ -721,6 +781,14 @@ Check constraints: "check_a" CHECK (length(a) > 0) Number of partitions: 3 (Use \d+ to list them.) +\d hash_parted + Table "public.hash_parted" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | +Partition key: HASH (a) +Number of partitions: 3 (Use \d+ to list them.) + -- check that we get the expected partition constraints CREATE TABLE range_parted4 (a int, b int, c int) PARTITION BY RANGE (abs(a), abs(b), c); CREATE TABLE unbounded_range_part PARTITION OF range_parted4 FOR VALUES FROM (MINVALUE, MINVALUE, MINVALUE) TO (MAXVALUE, MAXVALUE, MAXVALUE); @@ -771,6 +839,8 @@ Partition constraint: ((abs(a) IS NOT NULL) AND (abs(b) IS NOT NULL) AND (c IS N DROP TABLE range_parted4; -- cleanup DROP TABLE parted, list_parted, range_parted, list_parted2, range_parted2, range_parted3; +DROP TABLE hash_parted; +DROP TABLE hash_parted2; -- comments on partitioned tables columns CREATE TABLE parted_col_comment (a int, b text) PARTITION BY LIST (a); COMMENT ON TABLE parted_col_comment IS 'Am partitioned table'; diff --git a/src/test/regress/expected/insert.out b/src/test/regress/expected/insert.out index b715619313..9d84ba4658 100644 --- a/src/test/regress/expected/insert.out +++ b/src/test/regress/expected/insert.out @@ -382,8 +382,54 @@ select tableoid::regclass::text, a, min(b) as min_b, max(b) as max_b from list_p part_null | | 1 | 1 (9 rows) +-- direct partition inserts should check hash partition bound constraint +-- create custom operator class and hash function, for the same reason +-- explained in alter_table.sql +create or replace function dummy_hashint4(a int4, seed int8) returns int8 as +$$ begin return (a + seed); end; $$ language 'plpgsql' immutable; +create operator class custom_opclass for type int4 using hash as +operator 1 = , function 2 dummy_hashint4(int4, int8); +create table hash_parted ( + a int +) partition by hash (a custom_opclass); +create table hpart0 partition of hash_parted for values with (modulus 4, remainder 0); +create table hpart1 partition of hash_parted for values with (modulus 4, remainder 1); +create table hpart2 partition of hash_parted for values with (modulus 4, remainder 2); +create table hpart3 partition of hash_parted for values with (modulus 4, remainder 3); +insert into hash_parted values(generate_series(1,10)); +-- direct insert of values divisible by 4 - ok; +insert into hpart0 values(12),(16); +-- fail; +insert into hpart0 values(11); +ERROR: new row for relation "hpart0" violates partition constraint +DETAIL: Failing row contains (11). +-- 11 % 4 -> 3 remainder i.e. valid data for hpart3 partition +insert into hpart3 values(11); +-- view data +select tableoid::regclass as part, a, a%4 as "remainder = a % 4" +from hash_parted order by part; + part | a | remainder = a % 4 +--------+----+------------------- + hpart0 | 4 | 0 + hpart0 | 8 | 0 + hpart0 | 12 | 0 + hpart0 | 16 | 0 + hpart1 | 1 | 1 + hpart1 | 5 | 1 + hpart1 | 9 | 1 + hpart2 | 2 | 2 + hpart2 | 6 | 2 + hpart2 | 10 | 2 + hpart3 | 3 | 3 + hpart3 | 7 | 3 + hpart3 | 11 | 3 +(13 rows) + -- cleanup drop table range_parted, list_parted; +drop table hash_parted; +drop operator class custom_opclass using hash; +drop function dummy_hashint4(a int4, seed int8); -- test that a default partition added as the first partition accepts any value -- including null create table list_parted (a int) partition by list (a); diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out index adf6aedfa6..27ab8521f8 100644 --- a/src/test/regress/expected/partition_join.out +++ b/src/test/regress/expected/partition_join.out @@ -1256,6 +1256,87 @@ SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1 WHERE a = 1 AND a = 2) t1 One-Time Filter: false (14 rows) +-- +-- tests for hash partitioned tables. +-- +CREATE TABLE pht1 (a int, b int, c text) PARTITION BY HASH(c); +CREATE TABLE pht1_p1 PARTITION OF pht1 FOR VALUES WITH (MODULUS 3, REMAINDER 0); +CREATE TABLE pht1_p2 PARTITION OF pht1 FOR VALUES WITH (MODULUS 3, REMAINDER 1); +CREATE TABLE pht1_p3 PARTITION OF pht1 FOR VALUES WITH (MODULUS 3, REMAINDER 2); +INSERT INTO pht1 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(0, 599, 2) i; +ANALYZE pht1; +CREATE TABLE pht2 (a int, b int, c text) PARTITION BY HASH(c); +CREATE TABLE pht2_p1 PARTITION OF pht2 FOR VALUES WITH (MODULUS 3, REMAINDER 0); +CREATE TABLE pht2_p2 PARTITION OF pht2 FOR VALUES WITH (MODULUS 3, REMAINDER 1); +CREATE TABLE pht2_p3 PARTITION OF pht2 FOR VALUES WITH (MODULUS 3, REMAINDER 2); +INSERT INTO pht2 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(0, 599, 3) i; +ANALYZE pht2; +-- +-- hash partitioned by expression +-- +CREATE TABLE pht1_e (a int, b int, c text) PARTITION BY HASH(ltrim(c, 'A')); +CREATE TABLE pht1_e_p1 PARTITION OF pht1_e FOR VALUES WITH (MODULUS 3, REMAINDER 0); +CREATE TABLE pht1_e_p2 PARTITION OF pht1_e FOR VALUES WITH (MODULUS 3, REMAINDER 1); +CREATE TABLE pht1_e_p3 PARTITION OF pht1_e FOR VALUES WITH (MODULUS 3, REMAINDER 2); +INSERT INTO pht1_e SELECT i, i, 'A' || to_char(i/50, 'FM0000') FROM generate_series(0, 599, 2) i; +ANALYZE pht1_e; +-- test partition matching with N-way join +EXPLAIN (COSTS OFF) +SELECT avg(t1.a), avg(t2.b), avg(t3.a + t3.b), t1.c, t2.c, t3.c FROM pht1 t1, pht2 t2, pht1_e t3 WHERE t1.c = t2.c AND ltrim(t3.c, 'A') = t1.c GROUP BY t1.c, t2.c, t3.c ORDER BY t1.c, t2.c, t3.c; + QUERY PLAN +-------------------------------------------------------------------------------------- + Sort + Sort Key: t1.c, t3.c + -> HashAggregate + Group Key: t1.c, t2.c, t3.c + -> Result + -> Append + -> Hash Join + Hash Cond: (t1.c = t2.c) + -> Seq Scan on pht1_p1 t1 + -> Hash + -> Hash Join + Hash Cond: (t2.c = ltrim(t3.c, 'A'::text)) + -> Seq Scan on pht2_p1 t2 + -> Hash + -> Seq Scan on pht1_e_p1 t3 + -> Hash Join + Hash Cond: (t1_1.c = t2_1.c) + -> Seq Scan on pht1_p2 t1_1 + -> Hash + -> Hash Join + Hash Cond: (t2_1.c = ltrim(t3_1.c, 'A'::text)) + -> Seq Scan on pht2_p2 t2_1 + -> Hash + -> Seq Scan on pht1_e_p2 t3_1 + -> Hash Join + Hash Cond: (t1_2.c = t2_2.c) + -> Seq Scan on pht1_p3 t1_2 + -> Hash + -> Hash Join + Hash Cond: (t2_2.c = ltrim(t3_2.c, 'A'::text)) + -> Seq Scan on pht2_p3 t2_2 + -> Hash + -> Seq Scan on pht1_e_p3 t3_2 +(33 rows) + +SELECT avg(t1.a), avg(t2.b), avg(t3.a + t3.b), t1.c, t2.c, t3.c FROM pht1 t1, pht2 t2, pht1_e t3 WHERE t1.c = t2.c AND ltrim(t3.c, 'A') = t1.c GROUP BY t1.c, t2.c, t3.c ORDER BY t1.c, t2.c, t3.c; + avg | avg | avg | c | c | c +----------------------+----------------------+-----------------------+------+------+------- + 24.0000000000000000 | 24.0000000000000000 | 48.0000000000000000 | 0000 | 0000 | A0000 + 74.0000000000000000 | 75.0000000000000000 | 148.0000000000000000 | 0001 | 0001 | A0001 + 124.0000000000000000 | 124.5000000000000000 | 248.0000000000000000 | 0002 | 0002 | A0002 + 174.0000000000000000 | 174.0000000000000000 | 348.0000000000000000 | 0003 | 0003 | A0003 + 224.0000000000000000 | 225.0000000000000000 | 448.0000000000000000 | 0004 | 0004 | A0004 + 274.0000000000000000 | 274.5000000000000000 | 548.0000000000000000 | 0005 | 0005 | A0005 + 324.0000000000000000 | 324.0000000000000000 | 648.0000000000000000 | 0006 | 0006 | A0006 + 374.0000000000000000 | 375.0000000000000000 | 748.0000000000000000 | 0007 | 0007 | A0007 + 424.0000000000000000 | 424.5000000000000000 | 848.0000000000000000 | 0008 | 0008 | A0008 + 474.0000000000000000 | 474.0000000000000000 | 948.0000000000000000 | 0009 | 0009 | A0009 + 524.0000000000000000 | 525.0000000000000000 | 1048.0000000000000000 | 0010 | 0010 | A0010 + 574.0000000000000000 | 574.5000000000000000 | 1148.0000000000000000 | 0011 | 0011 | A0011 +(12 rows) + -- -- multiple levels of partitioning -- diff --git a/src/test/regress/expected/update.out b/src/test/regress/expected/update.out index cef70b1a1e..a4fe96112e 100644 --- a/src/test/regress/expected/update.out +++ b/src/test/regress/expected/update.out @@ -250,6 +250,35 @@ ERROR: new row for relation "list_default" violates partition constraint DETAIL: Failing row contains (a, 10). -- ok update list_default set a = 'x' where a = 'd'; +-- create custom operator class and hash function, for the same reason +-- explained in alter_table.sql +create or replace function dummy_hashint4(a int4, seed int8) returns int8 as +$$ begin return (a + seed); end; $$ language 'plpgsql' immutable; +create operator class custom_opclass for type int4 using hash as +operator 1 = , function 2 dummy_hashint4(int4, int8); +create table hash_parted ( + a int, + b int +) partition by hash (a custom_opclass, b custom_opclass); +create table hpart1 partition of hash_parted for values with (modulus 2, remainder 1); +create table hpart2 partition of hash_parted for values with (modulus 4, remainder 2); +create table hpart3 partition of hash_parted for values with (modulus 8, remainder 0); +create table hpart4 partition of hash_parted for values with (modulus 8, remainder 4); +insert into hpart1 values (1, 1); +insert into hpart2 values (2, 5); +insert into hpart4 values (3, 4); +-- fail +update hpart1 set a = 3, b=4 where a = 1; +ERROR: new row for relation "hpart1" violates partition constraint +DETAIL: Failing row contains (3, 4). +update hash_parted set b = b - 1 where b = 1; +ERROR: new row for relation "hpart1" violates partition constraint +DETAIL: Failing row contains (1, 0). +-- ok +update hash_parted set b = b + 8 where b = 1; -- cleanup drop table range_parted; drop table list_parted; +drop table hash_parted; +drop operator class custom_opclass using hash; +drop function dummy_hashint4(a int4, seed int8); diff --git a/src/test/regress/sql/alter_table.sql b/src/test/regress/sql/alter_table.sql index 4ae4c2ecee..02a33ca7c4 100644 --- a/src/test/regress/sql/alter_table.sql +++ b/src/test/regress/sql/alter_table.sql @@ -2139,6 +2139,7 @@ SELECT conislocal, coninhcount FROM pg_constraint WHERE conrelid = 'part_1'::reg -- check that the new partition won't overlap with an existing partition CREATE TABLE fail_part (LIKE part_1 INCLUDING CONSTRAINTS); ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1); +DROP TABLE fail_part; -- check that an existing table can be attached as a default partition CREATE TABLE def_part (LIKE list_parted INCLUDING CONSTRAINTS); ALTER TABLE list_parted ATTACH PARTITION def_part DEFAULT; @@ -2332,6 +2333,62 @@ CREATE TABLE quuux1 PARTITION OF quuux FOR VALUES IN (1); CREATE TABLE quuux2 PARTITION OF quuux FOR VALUES IN (2); DROP TABLE quuux; +-- check validation when attaching hash partitions + +-- The default hash functions as they exist today aren't portable; they can +-- return different results on different machines. Depending upon how the +-- values are hashed, the row may map to different partitions, which result in +-- regression failure. To avoid this, let's create a non-default hash function +-- that just returns the input value unchanged. +CREATE OR REPLACE FUNCTION dummy_hashint4(a int4, seed int8) RETURNS int8 AS +$$ BEGIN RETURN (a + 1 + seed); END; $$ LANGUAGE 'plpgsql' IMMUTABLE; +CREATE OPERATOR CLASS custom_opclass FOR TYPE int4 USING HASH AS +OPERATOR 1 = , FUNCTION 2 dummy_hashint4(int4, int8); + +-- check that the new partition won't overlap with an existing partition +CREATE TABLE hash_parted ( + a int, + b int +) PARTITION BY HASH (a custom_opclass); +CREATE TABLE hpart_1 PARTITION OF hash_parted FOR VALUES WITH (MODULUS 4, REMAINDER 0); +CREATE TABLE fail_part (LIKE hpart_1); +ALTER TABLE hash_parted ATTACH PARTITION fail_part FOR VALUES WITH (MODULUS 8, REMAINDER 4); +ALTER TABLE hash_parted ATTACH PARTITION fail_part FOR VALUES WITH (MODULUS 8, REMAINDER 0); +DROP TABLE fail_part; + +-- check validation when attaching hash partitions + +-- check that violating rows are correctly reported +CREATE TABLE hpart_2 (LIKE hash_parted); +INSERT INTO hpart_2 VALUES (3, 0); +ALTER TABLE hash_parted ATTACH PARTITION hpart_2 FOR VALUES WITH (MODULUS 4, REMAINDER 1); + +-- should be ok after deleting the bad row +DELETE FROM hpart_2; +ALTER TABLE hash_parted ATTACH PARTITION hpart_2 FOR VALUES WITH (MODULUS 4, REMAINDER 1); + +-- check that leaf partitions are scanned when attaching a partitioned +-- table +CREATE TABLE hpart_5 ( + LIKE hash_parted +) PARTITION BY LIST (b); + +-- check that violating rows are correctly reported +CREATE TABLE hpart_5_a PARTITION OF hpart_5 FOR VALUES IN ('1', '2', '3'); +INSERT INTO hpart_5_a (a, b) VALUES (7, 1); +ALTER TABLE hash_parted ATTACH PARTITION hpart_5 FOR VALUES WITH (MODULUS 4, REMAINDER 2); + +-- should be ok after deleting the bad row +DELETE FROM hpart_5_a; +ALTER TABLE hash_parted ATTACH PARTITION hpart_5 FOR VALUES WITH (MODULUS 4, REMAINDER 2); + +-- check that the table being attach is with valid modulus and remainder value +CREATE TABLE fail_part(LIKE hash_parted); +ALTER TABLE hash_parted ATTACH PARTITION fail_part FOR VALUES WITH (MODULUS 0, REMAINDER 1); +ALTER TABLE hash_parted ATTACH PARTITION fail_part FOR VALUES WITH (MODULUS 8, REMAINDER 8); +ALTER TABLE hash_parted ATTACH PARTITION fail_part FOR VALUES WITH (MODULUS 3, REMAINDER 2); +DROP TABLE fail_part; + -- -- DETACH PARTITION -- @@ -2343,12 +2400,16 @@ DROP TABLE regular_table; -- check that the partition being detached exists at all ALTER TABLE list_parted2 DETACH PARTITION part_4; +ALTER TABLE hash_parted DETACH PARTITION hpart_4; -- check that the partition being detached is actually a partition of the parent CREATE TABLE not_a_part (a int); ALTER TABLE list_parted2 DETACH PARTITION not_a_part; ALTER TABLE list_parted2 DETACH PARTITION part_1; +ALTER TABLE hash_parted DETACH PARTITION not_a_part; +DROP TABLE not_a_part; + -- check that, after being detached, attinhcount/coninhcount is dropped to 0 and -- attislocal/conislocal is set to true ALTER TABLE list_parted2 DETACH PARTITION part_3_4; @@ -2425,6 +2486,9 @@ SELECT * FROM list_parted; -- cleanup DROP TABLE list_parted, list_parted2, range_parted; DROP TABLE fail_def_part; +DROP TABLE hash_parted; +DROP OPERATOR CLASS custom_opclass USING HASH; +DROP FUNCTION dummy_hashint4(a int4, seed int8); -- more tests for certain multi-level partitioning scenarios create table p (a int, b int) partition by range (a, b); diff --git a/src/test/regress/sql/create_table.sql b/src/test/regress/sql/create_table.sql index df6a6d7326..b77b476436 100644 --- a/src/test/regress/sql/create_table.sql +++ b/src/test/regress/sql/create_table.sql @@ -350,10 +350,10 @@ CREATE TABLE partitioned ( ) PARTITION BY RANGE (const_func()); DROP FUNCTION const_func(); --- only accept "list" and "range" as partitioning strategy +-- only accept valid partitioning strategy CREATE TABLE partitioned ( - a int -) PARTITION BY HASH (a); + a int +) PARTITION BY MAGIC (a); -- specified column must be present in the table CREATE TABLE partitioned ( @@ -446,6 +446,8 @@ CREATE TABLE fail_part PARTITION OF list_parted FOR VALUES IN ('1'::int); CREATE TABLE fail_part PARTITION OF list_parted FOR VALUES IN (); -- trying to specify range for list partitioned table CREATE TABLE fail_part PARTITION OF list_parted FOR VALUES FROM (1) TO (2); +-- trying to specify modulus and remainder for list partitioned table +CREATE TABLE fail_part PARTITION OF list_parted FOR VALUES WITH (MODULUS 10, REMAINDER 1); -- check default partition cannot be created more than once CREATE TABLE part_default PARTITION OF list_parted DEFAULT; @@ -481,6 +483,8 @@ CREATE TABLE range_parted ( -- trying to specify list for range partitioned table CREATE TABLE fail_part PARTITION OF range_parted FOR VALUES IN ('a'); +-- trying to specify modulus and remainder for range partitioned table +CREATE TABLE fail_part PARTITION OF range_parted FOR VALUES WITH (MODULUS 10, REMAINDER 1); -- each of start and end bounds must have same number of values as the -- length of the partition key CREATE TABLE fail_part PARTITION OF range_parted FOR VALUES FROM ('a', 1) TO ('z'); @@ -489,6 +493,28 @@ CREATE TABLE fail_part PARTITION OF range_parted FOR VALUES FROM ('a') TO ('z', -- cannot specify null values in range bounds CREATE TABLE fail_part PARTITION OF range_parted FOR VALUES FROM (null) TO (maxvalue); +-- trying to specify modulus and remainder for range partitioned table +CREATE TABLE fail_part PARTITION OF range_parted FOR VALUES WITH (MODULUS 10, REMAINDER 1); + +-- check partition bound syntax for the hash partition +CREATE TABLE hash_parted ( + a int +) PARTITION BY HASH (a); +CREATE TABLE hpart_1 PARTITION OF hash_parted FOR VALUES WITH (MODULUS 10, REMAINDER 0); +CREATE TABLE hpart_2 PARTITION OF hash_parted FOR VALUES WITH (MODULUS 50, REMAINDER 1); +CREATE TABLE hpart_3 PARTITION OF hash_parted FOR VALUES WITH (MODULUS 200, REMAINDER 2); +-- modulus 25 is factor of modulus of 50 but 10 is not factor of 25. +CREATE TABLE fail_part PARTITION OF hash_parted FOR VALUES WITH (MODULUS 25, REMAINDER 3); +-- previous modulus 50 is factor of 150 but this modulus is not factor of next modulus 200. +CREATE TABLE fail_part PARTITION OF hash_parted FOR VALUES WITH (MODULUS 150, REMAINDER 3); +-- trying to specify range for the hash partitioned table +CREATE TABLE fail_part PARTITION OF hash_parted FOR VALUES FROM ('a', 1) TO ('z'); +-- trying to specify list value for the hash partitioned table +CREATE TABLE fail_part PARTITION OF hash_parted FOR VALUES IN (1000); + +-- trying to create default partition for the hash partitioned table +CREATE TABLE fail_default_part PARTITION OF hash_parted DEFAULT; + -- check if compatible with the specified parent -- cannot create as partition of a non-partitioned table @@ -496,6 +522,7 @@ CREATE TABLE unparted ( a int ); CREATE TABLE fail_part PARTITION OF unparted FOR VALUES IN ('a'); +CREATE TABLE fail_part PARTITION OF unparted FOR VALUES WITH (MODULUS 2, REMAINDER 1); DROP TABLE unparted; -- cannot create a permanent rel as partition of a temp rel @@ -585,6 +612,21 @@ CREATE TABLE range3_default PARTITION OF range_parted3 DEFAULT; -- more specific ranges CREATE TABLE fail_part PARTITION OF range_parted3 FOR VALUES FROM (1, minvalue) TO (1, maxvalue); +-- check for partition bound overlap and other invalid specifications for the hash partition +CREATE TABLE hash_parted2 ( + a varchar +) PARTITION BY HASH (a); +CREATE TABLE h2part_1 PARTITION OF hash_parted2 FOR VALUES WITH (MODULUS 4, REMAINDER 2); +CREATE TABLE h2part_2 PARTITION OF hash_parted2 FOR VALUES WITH (MODULUS 8, REMAINDER 0); +CREATE TABLE h2part_3 PARTITION OF hash_parted2 FOR VALUES WITH (MODULUS 8, REMAINDER 4); +CREATE TABLE h2part_4 PARTITION OF hash_parted2 FOR VALUES WITH (MODULUS 8, REMAINDER 5); +-- overlap with part_4 +CREATE TABLE fail_part PARTITION OF hash_parted2 FOR VALUES WITH (MODULUS 2, REMAINDER 1); +-- modulus must be greater than zero +CREATE TABLE fail_part PARTITION OF hash_parted2 FOR VALUES WITH (MODULUS 0, REMAINDER 1); +-- remainder must be greater than or equal to zero and less than modulus +CREATE TABLE fail_part PARTITION OF hash_parted2 FOR VALUES WITH (MODULUS 8, REMAINDER 8); + -- check schema propagation from parent CREATE TABLE parted ( @@ -638,6 +680,7 @@ CREATE TABLE part_c_1_10 PARTITION OF part_c FOR VALUES FROM (1) TO (10); -- output could vary depending on the order in which partition oids are -- returned. \d parted +\d hash_parted -- check that we get the expected partition constraints CREATE TABLE range_parted4 (a int, b int, c int) PARTITION BY RANGE (abs(a), abs(b), c); @@ -654,6 +697,8 @@ DROP TABLE range_parted4; -- cleanup DROP TABLE parted, list_parted, range_parted, list_parted2, range_parted2, range_parted3; +DROP TABLE hash_parted; +DROP TABLE hash_parted2; -- comments on partitioned tables columns CREATE TABLE parted_col_comment (a int, b text) PARTITION BY LIST (a); diff --git a/src/test/regress/sql/insert.sql b/src/test/regress/sql/insert.sql index d741514414..791817ba50 100644 --- a/src/test/regress/sql/insert.sql +++ b/src/test/regress/sql/insert.sql @@ -222,8 +222,41 @@ insert into list_parted select 'gg', s.a from generate_series(1, 9) s(a); insert into list_parted (b) values (1); select tableoid::regclass::text, a, min(b) as min_b, max(b) as max_b from list_parted group by 1, 2 order by 1; +-- direct partition inserts should check hash partition bound constraint + +-- create custom operator class and hash function, for the same reason +-- explained in alter_table.sql +create or replace function dummy_hashint4(a int4, seed int8) returns int8 as +$$ begin return (a + seed); end; $$ language 'plpgsql' immutable; +create operator class custom_opclass for type int4 using hash as +operator 1 = , function 2 dummy_hashint4(int4, int8); + +create table hash_parted ( + a int +) partition by hash (a custom_opclass); +create table hpart0 partition of hash_parted for values with (modulus 4, remainder 0); +create table hpart1 partition of hash_parted for values with (modulus 4, remainder 1); +create table hpart2 partition of hash_parted for values with (modulus 4, remainder 2); +create table hpart3 partition of hash_parted for values with (modulus 4, remainder 3); + +insert into hash_parted values(generate_series(1,10)); + +-- direct insert of values divisible by 4 - ok; +insert into hpart0 values(12),(16); +-- fail; +insert into hpart0 values(11); +-- 11 % 4 -> 3 remainder i.e. valid data for hpart3 partition +insert into hpart3 values(11); + +-- view data +select tableoid::regclass as part, a, a%4 as "remainder = a % 4" +from hash_parted order by part; + -- cleanup drop table range_parted, list_parted; +drop table hash_parted; +drop operator class custom_opclass using hash; +drop function dummy_hashint4(a int4, seed int8); -- test that a default partition added as the first partition accepts any value -- including null diff --git a/src/test/regress/sql/partition_join.sql b/src/test/regress/sql/partition_join.sql index 25abf2dc13..6efdf3c517 100644 --- a/src/test/regress/sql/partition_join.sql +++ b/src/test/regress/sql/partition_join.sql @@ -229,6 +229,38 @@ SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1 WHERE a = 1 AND a = 2) t1 EXPLAIN (COSTS OFF) SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1 WHERE a = 1 AND a = 2) t1 FULL JOIN prt2 t2 ON t1.a = t2.b WHERE t2.a = 0 ORDER BY t1.a, t2.b; +-- +-- tests for hash partitioned tables. +-- +CREATE TABLE pht1 (a int, b int, c text) PARTITION BY HASH(c); +CREATE TABLE pht1_p1 PARTITION OF pht1 FOR VALUES WITH (MODULUS 3, REMAINDER 0); +CREATE TABLE pht1_p2 PARTITION OF pht1 FOR VALUES WITH (MODULUS 3, REMAINDER 1); +CREATE TABLE pht1_p3 PARTITION OF pht1 FOR VALUES WITH (MODULUS 3, REMAINDER 2); +INSERT INTO pht1 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(0, 599, 2) i; +ANALYZE pht1; + +CREATE TABLE pht2 (a int, b int, c text) PARTITION BY HASH(c); +CREATE TABLE pht2_p1 PARTITION OF pht2 FOR VALUES WITH (MODULUS 3, REMAINDER 0); +CREATE TABLE pht2_p2 PARTITION OF pht2 FOR VALUES WITH (MODULUS 3, REMAINDER 1); +CREATE TABLE pht2_p3 PARTITION OF pht2 FOR VALUES WITH (MODULUS 3, REMAINDER 2); +INSERT INTO pht2 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(0, 599, 3) i; +ANALYZE pht2; + +-- +-- hash partitioned by expression +-- +CREATE TABLE pht1_e (a int, b int, c text) PARTITION BY HASH(ltrim(c, 'A')); +CREATE TABLE pht1_e_p1 PARTITION OF pht1_e FOR VALUES WITH (MODULUS 3, REMAINDER 0); +CREATE TABLE pht1_e_p2 PARTITION OF pht1_e FOR VALUES WITH (MODULUS 3, REMAINDER 1); +CREATE TABLE pht1_e_p3 PARTITION OF pht1_e FOR VALUES WITH (MODULUS 3, REMAINDER 2); +INSERT INTO pht1_e SELECT i, i, 'A' || to_char(i/50, 'FM0000') FROM generate_series(0, 599, 2) i; +ANALYZE pht1_e; + +-- test partition matching with N-way join +EXPLAIN (COSTS OFF) +SELECT avg(t1.a), avg(t2.b), avg(t3.a + t3.b), t1.c, t2.c, t3.c FROM pht1 t1, pht2 t2, pht1_e t3 WHERE t1.c = t2.c AND ltrim(t3.c, 'A') = t1.c GROUP BY t1.c, t2.c, t3.c ORDER BY t1.c, t2.c, t3.c; +SELECT avg(t1.a), avg(t2.b), avg(t3.a + t3.b), t1.c, t2.c, t3.c FROM pht1 t1, pht2 t2, pht1_e t3 WHERE t1.c = t2.c AND ltrim(t3.c, 'A') = t1.c GROUP BY t1.c, t2.c, t3.c ORDER BY t1.c, t2.c, t3.c; + -- -- multiple levels of partitioning -- diff --git a/src/test/regress/sql/update.sql b/src/test/regress/sql/update.sql index 66d1feca10..0c70d64a89 100644 --- a/src/test/regress/sql/update.sql +++ b/src/test/regress/sql/update.sql @@ -148,6 +148,34 @@ update list_default set a = 'a' where a = 'd'; -- ok update list_default set a = 'x' where a = 'd'; +-- create custom operator class and hash function, for the same reason +-- explained in alter_table.sql +create or replace function dummy_hashint4(a int4, seed int8) returns int8 as +$$ begin return (a + seed); end; $$ language 'plpgsql' immutable; +create operator class custom_opclass for type int4 using hash as +operator 1 = , function 2 dummy_hashint4(int4, int8); + +create table hash_parted ( + a int, + b int +) partition by hash (a custom_opclass, b custom_opclass); +create table hpart1 partition of hash_parted for values with (modulus 2, remainder 1); +create table hpart2 partition of hash_parted for values with (modulus 4, remainder 2); +create table hpart3 partition of hash_parted for values with (modulus 8, remainder 0); +create table hpart4 partition of hash_parted for values with (modulus 8, remainder 4); +insert into hpart1 values (1, 1); +insert into hpart2 values (2, 5); +insert into hpart4 values (3, 4); + +-- fail +update hpart1 set a = 3, b=4 where a = 1; +update hash_parted set b = b - 1 where b = 1; +-- ok +update hash_parted set b = b + 8 where b = 1; + -- cleanup drop table range_parted; drop table list_parted; +drop table hash_parted; +drop operator class custom_opclass using hash; +drop function dummy_hashint4(a int4, seed int8); diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 7f0ae978c1..61aeb51c29 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -1565,6 +1565,7 @@ PartitionDispatch PartitionDispatchData PartitionElem PartitionKey +PartitionHashBound PartitionListValue PartitionRangeBound PartitionRangeDatum