3399 lines
95 KiB
C
3399 lines
95 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* partition.c
|
|
* Partitioning related data structures and functions.
|
|
*
|
|
* Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
*
|
|
* IDENTIFICATION
|
|
* src/backend/catalog/partition.c
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
|
|
#include "postgres.h"
|
|
|
|
#include "access/hash.h"
|
|
#include "access/heapam.h"
|
|
#include "access/htup_details.h"
|
|
#include "access/nbtree.h"
|
|
#include "access/sysattr.h"
|
|
#include "catalog/dependency.h"
|
|
#include "catalog/indexing.h"
|
|
#include "catalog/objectaddress.h"
|
|
#include "catalog/partition.h"
|
|
#include "catalog/pg_collation.h"
|
|
#include "catalog/pg_inherits.h"
|
|
#include "catalog/pg_inherits_fn.h"
|
|
#include "catalog/pg_opclass.h"
|
|
#include "catalog/pg_partitioned_table.h"
|
|
#include "catalog/pg_type.h"
|
|
#include "commands/tablecmds.h"
|
|
#include "executor/executor.h"
|
|
#include "miscadmin.h"
|
|
#include "nodes/makefuncs.h"
|
|
#include "nodes/nodeFuncs.h"
|
|
#include "nodes/parsenodes.h"
|
|
#include "optimizer/clauses.h"
|
|
#include "optimizer/planmain.h"
|
|
#include "optimizer/prep.h"
|
|
#include "optimizer/var.h"
|
|
#include "parser/parse_coerce.h"
|
|
#include "rewrite/rewriteManip.h"
|
|
#include "storage/lmgr.h"
|
|
#include "utils/array.h"
|
|
#include "utils/builtins.h"
|
|
#include "utils/datum.h"
|
|
#include "utils/memutils.h"
|
|
#include "utils/fmgroids.h"
|
|
#include "utils/hashutils.h"
|
|
#include "utils/inval.h"
|
|
#include "utils/lsyscache.h"
|
|
#include "utils/rel.h"
|
|
#include "utils/ruleutils.h"
|
|
#include "utils/syscache.h"
|
|
|
|
/*
|
|
* Information about bounds of a partitioned relation
|
|
*
|
|
* A list partition datum that is known to be NULL is never put into the
|
|
* datums array. Instead, it is tracked using the null_index field.
|
|
*
|
|
* In the case of range partitioning, ndatums will typically be far less than
|
|
* 2 * nparts, because a partition's upper bound and the next partition's lower
|
|
* bound are the same in most common cases, and we only store one of them (the
|
|
* upper bound). In case of hash partitioning, ndatums will be same as the
|
|
* number of partitions.
|
|
*
|
|
* For range and list partitioned tables, datums is an array of datum-tuples
|
|
* with key->partnatts datums each. For hash partitioned tables, it is an array
|
|
* of datum-tuples with 2 datums, modulus and remainder, corresponding to a
|
|
* given partition.
|
|
*
|
|
* The datums in datums array are arranged in increasing order as defined by
|
|
* functions qsort_partition_rbound_cmp(), qsort_partition_list_value_cmp() and
|
|
* qsort_partition_hbound_cmp() for range, list and hash partitioned tables
|
|
* respectively. For range and list partitions this simply means that the
|
|
* datums in the datums array are arranged in increasing order as defined by
|
|
* the partition key's operator classes and collations.
|
|
*
|
|
* In the case of list partitioning, the indexes array stores one entry for
|
|
* every datum, which is the index of the partition that accepts a given datum.
|
|
* In case of range partitioning, it stores one entry per distinct range
|
|
* datum, which is the index of the partition for which a given datum
|
|
* is an upper bound. In the case of hash partitioning, the number of the
|
|
* entries in the indexes array is same as the greatest modulus amongst all
|
|
* partitions. For a given partition key datum-tuple, the index of the
|
|
* partition which would accept that datum-tuple would be given by the entry
|
|
* pointed by remainder produced when hash value of the datum-tuple is divided
|
|
* by the greatest modulus.
|
|
*/
|
|
|
|
typedef struct PartitionBoundInfoData
|
|
{
|
|
char strategy; /* hash, list or range? */
|
|
int ndatums; /* Length of the datums following array */
|
|
Datum **datums;
|
|
PartitionRangeDatumKind **kind; /* The kind of each range bound datum;
|
|
* NULL for hash and list partitioned
|
|
* tables */
|
|
int *indexes; /* Partition indexes */
|
|
int null_index; /* Index of the null-accepting partition; -1
|
|
* if there isn't one */
|
|
int default_index; /* Index of the default partition; -1 if there
|
|
* isn't one */
|
|
} PartitionBoundInfoData;
|
|
|
|
#define partition_bound_accepts_nulls(bi) ((bi)->null_index != -1)
|
|
#define partition_bound_has_default(bi) ((bi)->default_index != -1)
|
|
|
|
/*
|
|
* When qsort'ing partition bounds after reading from the catalog, each bound
|
|
* is represented with one of the following structs.
|
|
*/
|
|
|
|
/* One bound of a hash partition */
|
|
typedef struct PartitionHashBound
|
|
{
|
|
int modulus;
|
|
int remainder;
|
|
int index;
|
|
} PartitionHashBound;
|
|
|
|
/* One value coming from some (index'th) list partition */
|
|
typedef struct PartitionListValue
|
|
{
|
|
int index;
|
|
Datum value;
|
|
} PartitionListValue;
|
|
|
|
/* One bound of a range partition */
|
|
typedef struct PartitionRangeBound
|
|
{
|
|
int index;
|
|
Datum *datums; /* range bound datums */
|
|
PartitionRangeDatumKind *kind; /* the kind of each datum */
|
|
bool lower; /* this is the lower (vs upper) bound */
|
|
} PartitionRangeBound;
|
|
|
|
static int32 qsort_partition_hbound_cmp(const void *a, const void *b);
|
|
static int32 qsort_partition_list_value_cmp(const void *a, const void *b,
|
|
void *arg);
|
|
static int32 qsort_partition_rbound_cmp(const void *a, const void *b,
|
|
void *arg);
|
|
|
|
static Oid get_partition_operator(PartitionKey key, int col,
|
|
StrategyNumber strategy, bool *need_relabel);
|
|
static Expr *make_partition_op_expr(PartitionKey key, int keynum,
|
|
uint16 strategy, Expr *arg1, Expr *arg2);
|
|
static void get_range_key_properties(PartitionKey key, int keynum,
|
|
PartitionRangeDatum *ldatum,
|
|
PartitionRangeDatum *udatum,
|
|
ListCell **partexprs_item,
|
|
Expr **keyCol,
|
|
Const **lower_val, Const **upper_val);
|
|
static List *get_qual_for_hash(Relation parent, PartitionBoundSpec *spec);
|
|
static List *get_qual_for_list(Relation parent, PartitionBoundSpec *spec);
|
|
static List *get_qual_for_range(Relation parent, PartitionBoundSpec *spec,
|
|
bool for_default);
|
|
static List *get_range_nulltest(PartitionKey key);
|
|
static List *generate_partition_qual(Relation rel);
|
|
|
|
static PartitionRangeBound *make_one_range_bound(PartitionKey key, int index,
|
|
List *datums, bool lower);
|
|
static int32 partition_hbound_cmp(int modulus1, int remainder1, int modulus2,
|
|
int remainder2);
|
|
static int32 partition_rbound_cmp(PartitionKey key,
|
|
Datum *datums1, PartitionRangeDatumKind *kind1,
|
|
bool lower1, PartitionRangeBound *b2);
|
|
static int32 partition_rbound_datum_cmp(PartitionKey key,
|
|
Datum *rb_datums, PartitionRangeDatumKind *rb_kind,
|
|
Datum *tuple_datums);
|
|
|
|
static int32 partition_bound_cmp(PartitionKey key,
|
|
PartitionBoundInfo boundinfo,
|
|
int offset, void *probe, bool probe_is_bound);
|
|
static int partition_bound_bsearch(PartitionKey key,
|
|
PartitionBoundInfo boundinfo,
|
|
void *probe, bool probe_is_bound, bool *is_equal);
|
|
static int get_partition_bound_num_indexes(PartitionBoundInfo b);
|
|
static int get_greatest_modulus(PartitionBoundInfo b);
|
|
static uint64 compute_hash_value(PartitionKey key, Datum *values, bool *isnull);
|
|
|
|
/* SQL-callable function for use in hash partition CHECK constraints */
|
|
PG_FUNCTION_INFO_V1(satisfies_hash_partition);
|
|
|
|
/*
|
|
* RelationBuildPartitionDesc
|
|
* Form rel's partition descriptor
|
|
*
|
|
* Not flushed from the cache by RelationClearRelation() unless changed because
|
|
* of addition or removal of partition.
|
|
*/
|
|
void
|
|
RelationBuildPartitionDesc(Relation rel)
|
|
{
|
|
List *inhoids,
|
|
*partoids;
|
|
Oid *oids = NULL;
|
|
List *boundspecs = NIL;
|
|
ListCell *cell;
|
|
int i,
|
|
nparts;
|
|
PartitionKey key = RelationGetPartitionKey(rel);
|
|
PartitionDesc result;
|
|
MemoryContext oldcxt;
|
|
|
|
int ndatums = 0;
|
|
int default_index = -1;
|
|
|
|
/* Hash partitioning specific */
|
|
PartitionHashBound **hbounds = NULL;
|
|
|
|
/* List partitioning specific */
|
|
PartitionListValue **all_values = NULL;
|
|
int null_index = -1;
|
|
|
|
/* Range partitioning specific */
|
|
PartitionRangeBound **rbounds = NULL;
|
|
|
|
/*
|
|
* The following could happen in situations where rel has a pg_class entry
|
|
* but not the pg_partitioned_table entry yet.
|
|
*/
|
|
if (key == NULL)
|
|
return;
|
|
|
|
/* Get partition oids from pg_inherits */
|
|
inhoids = find_inheritance_children(RelationGetRelid(rel), NoLock);
|
|
|
|
/* Collect bound spec nodes in a list */
|
|
i = 0;
|
|
partoids = NIL;
|
|
foreach(cell, inhoids)
|
|
{
|
|
Oid inhrelid = lfirst_oid(cell);
|
|
HeapTuple tuple;
|
|
Datum datum;
|
|
bool isnull;
|
|
Node *boundspec;
|
|
|
|
tuple = SearchSysCache1(RELOID, inhrelid);
|
|
if (!HeapTupleIsValid(tuple))
|
|
elog(ERROR, "cache lookup failed for relation %u", inhrelid);
|
|
|
|
/*
|
|
* It is possible that the pg_class tuple of a partition has not been
|
|
* updated yet to set its relpartbound field. The only case where
|
|
* this happens is when we open the parent relation to check using its
|
|
* partition descriptor that a new partition's bound does not overlap
|
|
* some existing partition.
|
|
*/
|
|
if (!((Form_pg_class) GETSTRUCT(tuple))->relispartition)
|
|
{
|
|
ReleaseSysCache(tuple);
|
|
continue;
|
|
}
|
|
|
|
datum = SysCacheGetAttr(RELOID, tuple,
|
|
Anum_pg_class_relpartbound,
|
|
&isnull);
|
|
Assert(!isnull);
|
|
boundspec = (Node *) stringToNode(TextDatumGetCString(datum));
|
|
|
|
/*
|
|
* Sanity check: If the PartitionBoundSpec says this is the default
|
|
* partition, its OID should correspond to whatever's stored in
|
|
* pg_partitioned_table.partdefid; if not, the catalog is corrupt.
|
|
*/
|
|
if (castNode(PartitionBoundSpec, boundspec)->is_default)
|
|
{
|
|
Oid partdefid;
|
|
|
|
partdefid = get_default_partition_oid(RelationGetRelid(rel));
|
|
if (partdefid != inhrelid)
|
|
elog(ERROR, "expected partdefid %u, but got %u",
|
|
inhrelid, partdefid);
|
|
}
|
|
|
|
boundspecs = lappend(boundspecs, boundspec);
|
|
partoids = lappend_oid(partoids, inhrelid);
|
|
ReleaseSysCache(tuple);
|
|
}
|
|
|
|
nparts = list_length(partoids);
|
|
|
|
if (nparts > 0)
|
|
{
|
|
oids = (Oid *) palloc(nparts * sizeof(Oid));
|
|
i = 0;
|
|
foreach(cell, partoids)
|
|
oids[i++] = lfirst_oid(cell);
|
|
|
|
/* Convert from node to the internal representation */
|
|
if (key->strategy == PARTITION_STRATEGY_HASH)
|
|
{
|
|
ndatums = nparts;
|
|
hbounds = (PartitionHashBound **)
|
|
palloc(nparts * sizeof(PartitionHashBound *));
|
|
|
|
i = 0;
|
|
foreach(cell, boundspecs)
|
|
{
|
|
PartitionBoundSpec *spec = castNode(PartitionBoundSpec,
|
|
lfirst(cell));
|
|
|
|
if (spec->strategy != PARTITION_STRATEGY_HASH)
|
|
elog(ERROR, "invalid strategy in partition bound spec");
|
|
|
|
hbounds[i] = (PartitionHashBound *)
|
|
palloc(sizeof(PartitionHashBound));
|
|
|
|
hbounds[i]->modulus = spec->modulus;
|
|
hbounds[i]->remainder = spec->remainder;
|
|
hbounds[i]->index = i;
|
|
i++;
|
|
}
|
|
|
|
/* Sort all the bounds in ascending order */
|
|
qsort(hbounds, nparts, sizeof(PartitionHashBound *),
|
|
qsort_partition_hbound_cmp);
|
|
}
|
|
else if (key->strategy == PARTITION_STRATEGY_LIST)
|
|
{
|
|
List *non_null_values = NIL;
|
|
|
|
/*
|
|
* Create a unified list of non-null values across all partitions.
|
|
*/
|
|
i = 0;
|
|
null_index = -1;
|
|
foreach(cell, boundspecs)
|
|
{
|
|
PartitionBoundSpec *spec = castNode(PartitionBoundSpec,
|
|
lfirst(cell));
|
|
ListCell *c;
|
|
|
|
if (spec->strategy != PARTITION_STRATEGY_LIST)
|
|
elog(ERROR, "invalid strategy in partition bound spec");
|
|
|
|
/*
|
|
* Note the index of the partition bound spec for the default
|
|
* partition. There's no datum to add to the list of non-null
|
|
* datums for this partition.
|
|
*/
|
|
if (spec->is_default)
|
|
{
|
|
default_index = i;
|
|
i++;
|
|
continue;
|
|
}
|
|
|
|
foreach(c, spec->listdatums)
|
|
{
|
|
Const *val = castNode(Const, lfirst(c));
|
|
PartitionListValue *list_value = NULL;
|
|
|
|
if (!val->constisnull)
|
|
{
|
|
list_value = (PartitionListValue *)
|
|
palloc0(sizeof(PartitionListValue));
|
|
list_value->index = i;
|
|
list_value->value = val->constvalue;
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
* Never put a null into the values array, flag
|
|
* instead for the code further down below where we
|
|
* construct the actual relcache struct.
|
|
*/
|
|
if (null_index != -1)
|
|
elog(ERROR, "found null more than once");
|
|
null_index = i;
|
|
}
|
|
|
|
if (list_value)
|
|
non_null_values = lappend(non_null_values,
|
|
list_value);
|
|
}
|
|
|
|
i++;
|
|
}
|
|
|
|
ndatums = list_length(non_null_values);
|
|
|
|
/*
|
|
* Collect all list values in one array. Alongside the value, we
|
|
* also save the index of partition the value comes from.
|
|
*/
|
|
all_values = (PartitionListValue **) palloc(ndatums *
|
|
sizeof(PartitionListValue *));
|
|
i = 0;
|
|
foreach(cell, non_null_values)
|
|
{
|
|
PartitionListValue *src = lfirst(cell);
|
|
|
|
all_values[i] = (PartitionListValue *)
|
|
palloc(sizeof(PartitionListValue));
|
|
all_values[i]->value = src->value;
|
|
all_values[i]->index = src->index;
|
|
i++;
|
|
}
|
|
|
|
qsort_arg(all_values, ndatums, sizeof(PartitionListValue *),
|
|
qsort_partition_list_value_cmp, (void *) key);
|
|
}
|
|
else if (key->strategy == PARTITION_STRATEGY_RANGE)
|
|
{
|
|
int k;
|
|
PartitionRangeBound **all_bounds,
|
|
*prev;
|
|
|
|
all_bounds = (PartitionRangeBound **) palloc0(2 * nparts *
|
|
sizeof(PartitionRangeBound *));
|
|
|
|
/*
|
|
* Create a unified list of range bounds across all the
|
|
* partitions.
|
|
*/
|
|
i = ndatums = 0;
|
|
foreach(cell, boundspecs)
|
|
{
|
|
PartitionBoundSpec *spec = castNode(PartitionBoundSpec,
|
|
lfirst(cell));
|
|
PartitionRangeBound *lower,
|
|
*upper;
|
|
|
|
if (spec->strategy != PARTITION_STRATEGY_RANGE)
|
|
elog(ERROR, "invalid strategy in partition bound spec");
|
|
|
|
/*
|
|
* Note the index of the partition bound spec for the default
|
|
* partition. There's no datum to add to the allbounds array
|
|
* for this partition.
|
|
*/
|
|
if (spec->is_default)
|
|
{
|
|
default_index = i++;
|
|
continue;
|
|
}
|
|
|
|
lower = make_one_range_bound(key, i, spec->lowerdatums,
|
|
true);
|
|
upper = make_one_range_bound(key, i, spec->upperdatums,
|
|
false);
|
|
all_bounds[ndatums++] = lower;
|
|
all_bounds[ndatums++] = upper;
|
|
i++;
|
|
}
|
|
|
|
Assert(ndatums == nparts * 2 ||
|
|
(default_index != -1 && ndatums == (nparts - 1) * 2));
|
|
|
|
/* Sort all the bounds in ascending order */
|
|
qsort_arg(all_bounds, ndatums,
|
|
sizeof(PartitionRangeBound *),
|
|
qsort_partition_rbound_cmp,
|
|
(void *) key);
|
|
|
|
/* Save distinct bounds from all_bounds into rbounds. */
|
|
rbounds = (PartitionRangeBound **)
|
|
palloc(ndatums * sizeof(PartitionRangeBound *));
|
|
k = 0;
|
|
prev = NULL;
|
|
for (i = 0; i < ndatums; i++)
|
|
{
|
|
PartitionRangeBound *cur = all_bounds[i];
|
|
bool is_distinct = false;
|
|
int j;
|
|
|
|
/* Is the current bound distinct from the previous one? */
|
|
for (j = 0; j < key->partnatts; j++)
|
|
{
|
|
Datum cmpval;
|
|
|
|
if (prev == NULL || cur->kind[j] != prev->kind[j])
|
|
{
|
|
is_distinct = true;
|
|
break;
|
|
}
|
|
|
|
/*
|
|
* If the bounds are both MINVALUE or MAXVALUE, stop now
|
|
* and treat them as equal, since any values after this
|
|
* point must be ignored.
|
|
*/
|
|
if (cur->kind[j] != PARTITION_RANGE_DATUM_VALUE)
|
|
break;
|
|
|
|
cmpval = FunctionCall2Coll(&key->partsupfunc[j],
|
|
key->partcollation[j],
|
|
cur->datums[j],
|
|
prev->datums[j]);
|
|
if (DatumGetInt32(cmpval) != 0)
|
|
{
|
|
is_distinct = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Only if the bound is distinct save it into a temporary
|
|
* array i.e. rbounds which is later copied into boundinfo
|
|
* datums array.
|
|
*/
|
|
if (is_distinct)
|
|
rbounds[k++] = all_bounds[i];
|
|
|
|
prev = cur;
|
|
}
|
|
|
|
/* Update ndatums to hold the count of distinct datums. */
|
|
ndatums = k;
|
|
}
|
|
else
|
|
elog(ERROR, "unexpected partition strategy: %d",
|
|
(int) key->strategy);
|
|
}
|
|
|
|
/* Now build the actual relcache partition descriptor */
|
|
rel->rd_pdcxt = AllocSetContextCreateExtended(CacheMemoryContext,
|
|
RelationGetRelationName(rel),
|
|
MEMCONTEXT_COPY_NAME,
|
|
ALLOCSET_DEFAULT_SIZES);
|
|
oldcxt = MemoryContextSwitchTo(rel->rd_pdcxt);
|
|
|
|
result = (PartitionDescData *) palloc0(sizeof(PartitionDescData));
|
|
result->nparts = nparts;
|
|
if (nparts > 0)
|
|
{
|
|
PartitionBoundInfo boundinfo;
|
|
int *mapping;
|
|
int next_index = 0;
|
|
|
|
result->oids = (Oid *) palloc0(nparts * sizeof(Oid));
|
|
|
|
boundinfo = (PartitionBoundInfoData *)
|
|
palloc0(sizeof(PartitionBoundInfoData));
|
|
boundinfo->strategy = key->strategy;
|
|
boundinfo->default_index = -1;
|
|
boundinfo->ndatums = ndatums;
|
|
boundinfo->null_index = -1;
|
|
boundinfo->datums = (Datum **) palloc0(ndatums * sizeof(Datum *));
|
|
|
|
/* Initialize mapping array with invalid values */
|
|
mapping = (int *) palloc(sizeof(int) * nparts);
|
|
for (i = 0; i < nparts; i++)
|
|
mapping[i] = -1;
|
|
|
|
switch (key->strategy)
|
|
{
|
|
case PARTITION_STRATEGY_HASH:
|
|
{
|
|
/* Modulus are stored in ascending order */
|
|
int greatest_modulus = hbounds[ndatums - 1]->modulus;
|
|
|
|
boundinfo->indexes = (int *) palloc(greatest_modulus *
|
|
sizeof(int));
|
|
|
|
for (i = 0; i < greatest_modulus; i++)
|
|
boundinfo->indexes[i] = -1;
|
|
|
|
for (i = 0; i < nparts; i++)
|
|
{
|
|
int modulus = hbounds[i]->modulus;
|
|
int remainder = hbounds[i]->remainder;
|
|
|
|
boundinfo->datums[i] = (Datum *) palloc(2 *
|
|
sizeof(Datum));
|
|
boundinfo->datums[i][0] = Int32GetDatum(modulus);
|
|
boundinfo->datums[i][1] = Int32GetDatum(remainder);
|
|
|
|
while (remainder < greatest_modulus)
|
|
{
|
|
/* overlap? */
|
|
Assert(boundinfo->indexes[remainder] == -1);
|
|
boundinfo->indexes[remainder] = i;
|
|
remainder += modulus;
|
|
}
|
|
|
|
mapping[hbounds[i]->index] = i;
|
|
pfree(hbounds[i]);
|
|
}
|
|
pfree(hbounds);
|
|
break;
|
|
}
|
|
|
|
case PARTITION_STRATEGY_LIST:
|
|
{
|
|
boundinfo->indexes = (int *) palloc(ndatums * sizeof(int));
|
|
|
|
/*
|
|
* Copy values. Indexes of individual values are mapped
|
|
* to canonical values so that they match for any two list
|
|
* partitioned tables with same number of partitions and
|
|
* same lists per partition. One way to canonicalize is
|
|
* to assign the index in all_values[] of the smallest
|
|
* value of each partition, as the index of all of the
|
|
* partition's values.
|
|
*/
|
|
for (i = 0; i < ndatums; i++)
|
|
{
|
|
boundinfo->datums[i] = (Datum *) palloc(sizeof(Datum));
|
|
boundinfo->datums[i][0] = datumCopy(all_values[i]->value,
|
|
key->parttypbyval[0],
|
|
key->parttyplen[0]);
|
|
|
|
/* If the old index has no mapping, assign one */
|
|
if (mapping[all_values[i]->index] == -1)
|
|
mapping[all_values[i]->index] = next_index++;
|
|
|
|
boundinfo->indexes[i] = mapping[all_values[i]->index];
|
|
}
|
|
|
|
/*
|
|
* If null-accepting partition has no mapped index yet,
|
|
* assign one. This could happen if such partition
|
|
* accepts only null and hence not covered in the above
|
|
* loop which only handled non-null values.
|
|
*/
|
|
if (null_index != -1)
|
|
{
|
|
Assert(null_index >= 0);
|
|
if (mapping[null_index] == -1)
|
|
mapping[null_index] = next_index++;
|
|
boundinfo->null_index = mapping[null_index];
|
|
}
|
|
|
|
/* Assign mapped index for the default partition. */
|
|
if (default_index != -1)
|
|
{
|
|
/*
|
|
* The default partition accepts any value not
|
|
* specified in the lists of other partitions, hence
|
|
* it should not get mapped index while assigning
|
|
* those for non-null datums.
|
|
*/
|
|
Assert(default_index >= 0 &&
|
|
mapping[default_index] == -1);
|
|
mapping[default_index] = next_index++;
|
|
boundinfo->default_index = mapping[default_index];
|
|
}
|
|
|
|
/* All partition must now have a valid mapping */
|
|
Assert(next_index == nparts);
|
|
break;
|
|
}
|
|
|
|
case PARTITION_STRATEGY_RANGE:
|
|
{
|
|
boundinfo->kind = (PartitionRangeDatumKind **)
|
|
palloc(ndatums *
|
|
sizeof(PartitionRangeDatumKind *));
|
|
boundinfo->indexes = (int *) palloc((ndatums + 1) *
|
|
sizeof(int));
|
|
|
|
for (i = 0; i < ndatums; i++)
|
|
{
|
|
int j;
|
|
|
|
boundinfo->datums[i] = (Datum *) palloc(key->partnatts *
|
|
sizeof(Datum));
|
|
boundinfo->kind[i] = (PartitionRangeDatumKind *)
|
|
palloc(key->partnatts *
|
|
sizeof(PartitionRangeDatumKind));
|
|
for (j = 0; j < key->partnatts; j++)
|
|
{
|
|
if (rbounds[i]->kind[j] == PARTITION_RANGE_DATUM_VALUE)
|
|
boundinfo->datums[i][j] =
|
|
datumCopy(rbounds[i]->datums[j],
|
|
key->parttypbyval[j],
|
|
key->parttyplen[j]);
|
|
boundinfo->kind[i][j] = rbounds[i]->kind[j];
|
|
}
|
|
|
|
/*
|
|
* There is no mapping for invalid indexes.
|
|
*
|
|
* Any lower bounds in the rbounds array have invalid
|
|
* indexes assigned, because the values between the
|
|
* previous bound (if there is one) and this (lower)
|
|
* bound are not part of the range of any existing
|
|
* partition.
|
|
*/
|
|
if (rbounds[i]->lower)
|
|
boundinfo->indexes[i] = -1;
|
|
else
|
|
{
|
|
int orig_index = rbounds[i]->index;
|
|
|
|
/* If the old index has no mapping, assign one */
|
|
if (mapping[orig_index] == -1)
|
|
mapping[orig_index] = next_index++;
|
|
|
|
boundinfo->indexes[i] = mapping[orig_index];
|
|
}
|
|
}
|
|
|
|
/* Assign mapped index for the default partition. */
|
|
if (default_index != -1)
|
|
{
|
|
Assert(default_index >= 0 && mapping[default_index] == -1);
|
|
mapping[default_index] = next_index++;
|
|
boundinfo->default_index = mapping[default_index];
|
|
}
|
|
boundinfo->indexes[i] = -1;
|
|
break;
|
|
}
|
|
|
|
default:
|
|
elog(ERROR, "unexpected partition strategy: %d",
|
|
(int) key->strategy);
|
|
}
|
|
|
|
result->boundinfo = boundinfo;
|
|
|
|
/*
|
|
* Now assign OIDs from the original array into mapped indexes of the
|
|
* result array. Order of OIDs in the former is defined by the
|
|
* catalog scan that retrieved them, whereas that in the latter is
|
|
* defined by canonicalized representation of the partition bounds.
|
|
*/
|
|
for (i = 0; i < nparts; i++)
|
|
result->oids[mapping[i]] = oids[i];
|
|
pfree(mapping);
|
|
}
|
|
|
|
MemoryContextSwitchTo(oldcxt);
|
|
rel->rd_partdesc = result;
|
|
}
|
|
|
|
/*
|
|
* Are two partition bound collections logically equal?
|
|
*
|
|
* Used in the keep logic of relcache.c (ie, in RelationClearRelation()).
|
|
* This is also useful when b1 and b2 are bound collections of two separate
|
|
* relations, respectively, because PartitionBoundInfo is a canonical
|
|
* representation of partition bounds.
|
|
*/
|
|
bool
|
|
partition_bounds_equal(int partnatts, int16 *parttyplen, bool *parttypbyval,
|
|
PartitionBoundInfo b1, PartitionBoundInfo b2)
|
|
{
|
|
int i;
|
|
|
|
if (b1->strategy != b2->strategy)
|
|
return false;
|
|
|
|
if (b1->ndatums != b2->ndatums)
|
|
return false;
|
|
|
|
if (b1->null_index != b2->null_index)
|
|
return false;
|
|
|
|
if (b1->default_index != b2->default_index)
|
|
return false;
|
|
|
|
if (b1->strategy == PARTITION_STRATEGY_HASH)
|
|
{
|
|
int greatest_modulus = get_greatest_modulus(b1);
|
|
|
|
/*
|
|
* If two hash partitioned tables have different greatest moduli,
|
|
* their partition schemes don't match.
|
|
*/
|
|
if (greatest_modulus != get_greatest_modulus(b2))
|
|
return false;
|
|
|
|
/*
|
|
* We arrange the partitions in the ascending order of their modulus
|
|
* and remainders. Also every modulus is factor of next larger
|
|
* modulus. Therefore we can safely store index of a given partition
|
|
* in indexes array at remainder of that partition. Also entries at
|
|
* (remainder + N * modulus) positions in indexes array are all same
|
|
* for (modulus, remainder) specification for any partition. Thus
|
|
* datums array from both the given bounds are same, if and only if
|
|
* their indexes array will be same. So, it suffices to compare
|
|
* indexes array.
|
|
*/
|
|
for (i = 0; i < greatest_modulus; i++)
|
|
if (b1->indexes[i] != b2->indexes[i])
|
|
return false;
|
|
|
|
#ifdef USE_ASSERT_CHECKING
|
|
|
|
/*
|
|
* Nonetheless make sure that the bounds are indeed same when the
|
|
* indexes match. Hash partition bound stores modulus and remainder
|
|
* at b1->datums[i][0] and b1->datums[i][1] position respectively.
|
|
*/
|
|
for (i = 0; i < b1->ndatums; i++)
|
|
Assert((b1->datums[i][0] == b2->datums[i][0] &&
|
|
b1->datums[i][1] == b2->datums[i][1]));
|
|
#endif
|
|
}
|
|
else
|
|
{
|
|
for (i = 0; i < b1->ndatums; i++)
|
|
{
|
|
int j;
|
|
|
|
for (j = 0; j < partnatts; j++)
|
|
{
|
|
/* For range partitions, the bounds might not be finite. */
|
|
if (b1->kind != NULL)
|
|
{
|
|
/* The different kinds of bound all differ from each other */
|
|
if (b1->kind[i][j] != b2->kind[i][j])
|
|
return false;
|
|
|
|
/*
|
|
* Non-finite bounds are equal without further
|
|
* examination.
|
|
*/
|
|
if (b1->kind[i][j] != PARTITION_RANGE_DATUM_VALUE)
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
* Compare the actual values. Note that it would be both
|
|
* incorrect and unsafe to invoke the comparison operator
|
|
* derived from the partitioning specification here. It would
|
|
* be incorrect because we want the relcache entry to be
|
|
* updated for ANY change to the partition bounds, not just
|
|
* those that the partitioning operator thinks are
|
|
* significant. It would be unsafe because we might reach
|
|
* this code in the context of an aborted transaction, and an
|
|
* arbitrary partitioning operator might not be safe in that
|
|
* context. datumIsEqual() should be simple enough to be
|
|
* safe.
|
|
*/
|
|
if (!datumIsEqual(b1->datums[i][j], b2->datums[i][j],
|
|
parttypbyval[j], parttyplen[j]))
|
|
return false;
|
|
}
|
|
|
|
if (b1->indexes[i] != b2->indexes[i])
|
|
return false;
|
|
}
|
|
|
|
/* There are ndatums+1 indexes in case of range partitions */
|
|
if (b1->strategy == PARTITION_STRATEGY_RANGE &&
|
|
b1->indexes[i] != b2->indexes[i])
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* Return a copy of given PartitionBoundInfo structure. The data types of bounds
|
|
* are described by given partition key specification.
|
|
*/
|
|
extern PartitionBoundInfo
|
|
partition_bounds_copy(PartitionBoundInfo src,
|
|
PartitionKey key)
|
|
{
|
|
PartitionBoundInfo dest;
|
|
int i;
|
|
int ndatums;
|
|
int partnatts;
|
|
int num_indexes;
|
|
|
|
dest = (PartitionBoundInfo) palloc(sizeof(PartitionBoundInfoData));
|
|
|
|
dest->strategy = src->strategy;
|
|
ndatums = dest->ndatums = src->ndatums;
|
|
partnatts = key->partnatts;
|
|
|
|
num_indexes = get_partition_bound_num_indexes(src);
|
|
|
|
/* List partitioned tables have only a single partition key. */
|
|
Assert(key->strategy != PARTITION_STRATEGY_LIST || partnatts == 1);
|
|
|
|
dest->datums = (Datum **) palloc(sizeof(Datum *) * ndatums);
|
|
|
|
if (src->kind != NULL)
|
|
{
|
|
dest->kind = (PartitionRangeDatumKind **) palloc(ndatums *
|
|
sizeof(PartitionRangeDatumKind *));
|
|
for (i = 0; i < ndatums; i++)
|
|
{
|
|
dest->kind[i] = (PartitionRangeDatumKind *) palloc(partnatts *
|
|
sizeof(PartitionRangeDatumKind));
|
|
|
|
memcpy(dest->kind[i], src->kind[i],
|
|
sizeof(PartitionRangeDatumKind) * key->partnatts);
|
|
}
|
|
}
|
|
else
|
|
dest->kind = NULL;
|
|
|
|
for (i = 0; i < ndatums; i++)
|
|
{
|
|
int j;
|
|
|
|
/*
|
|
* For a corresponding to hash partition, datums array will have two
|
|
* elements - modulus and remainder.
|
|
*/
|
|
bool hash_part = (key->strategy == PARTITION_STRATEGY_HASH);
|
|
int natts = hash_part ? 2 : partnatts;
|
|
|
|
dest->datums[i] = (Datum *) palloc(sizeof(Datum) * natts);
|
|
|
|
for (j = 0; j < natts; j++)
|
|
{
|
|
bool byval;
|
|
int typlen;
|
|
|
|
if (hash_part)
|
|
{
|
|
typlen = sizeof(int32); /* Always int4 */
|
|
byval = true; /* int4 is pass-by-value */
|
|
}
|
|
else
|
|
{
|
|
byval = key->parttypbyval[j];
|
|
typlen = key->parttyplen[j];
|
|
}
|
|
|
|
if (dest->kind == NULL ||
|
|
dest->kind[i][j] == PARTITION_RANGE_DATUM_VALUE)
|
|
dest->datums[i][j] = datumCopy(src->datums[i][j],
|
|
byval, typlen);
|
|
}
|
|
}
|
|
|
|
dest->indexes = (int *) palloc(sizeof(int) * num_indexes);
|
|
memcpy(dest->indexes, src->indexes, sizeof(int) * num_indexes);
|
|
|
|
dest->null_index = src->null_index;
|
|
dest->default_index = src->default_index;
|
|
|
|
return dest;
|
|
}
|
|
|
|
/*
|
|
* check_new_partition_bound
|
|
*
|
|
* Checks if the new partition's bound overlaps any of the existing partitions
|
|
* of parent. Also performs additional checks as necessary per strategy.
|
|
*/
|
|
void
|
|
check_new_partition_bound(char *relname, Relation parent,
|
|
PartitionBoundSpec *spec)
|
|
{
|
|
PartitionKey key = RelationGetPartitionKey(parent);
|
|
PartitionDesc partdesc = RelationGetPartitionDesc(parent);
|
|
PartitionBoundInfo boundinfo = partdesc->boundinfo;
|
|
ParseState *pstate = make_parsestate(NULL);
|
|
int with = -1;
|
|
bool overlap = false;
|
|
|
|
if (spec->is_default)
|
|
{
|
|
if (boundinfo == NULL || !partition_bound_has_default(boundinfo))
|
|
return;
|
|
|
|
/* Default partition already exists, error out. */
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
|
|
errmsg("partition \"%s\" conflicts with existing default partition \"%s\"",
|
|
relname, get_rel_name(partdesc->oids[boundinfo->default_index])),
|
|
parser_errposition(pstate, spec->location)));
|
|
}
|
|
|
|
switch (key->strategy)
|
|
{
|
|
case PARTITION_STRATEGY_HASH:
|
|
{
|
|
Assert(spec->strategy == PARTITION_STRATEGY_HASH);
|
|
Assert(spec->remainder >= 0 && spec->remainder < spec->modulus);
|
|
|
|
if (partdesc->nparts > 0)
|
|
{
|
|
PartitionBoundInfo boundinfo = partdesc->boundinfo;
|
|
Datum **datums = boundinfo->datums;
|
|
int ndatums = boundinfo->ndatums;
|
|
int greatest_modulus;
|
|
int remainder;
|
|
int offset;
|
|
bool equal,
|
|
valid_modulus = true;
|
|
int prev_modulus, /* Previous largest modulus */
|
|
next_modulus; /* Next largest modulus */
|
|
|
|
/*
|
|
* Check rule that every modulus must be a factor of the
|
|
* next larger modulus. For example, if you have a bunch
|
|
* of partitions that all have modulus 5, you can add a
|
|
* new partition with modulus 10 or a new partition with
|
|
* modulus 15, but you cannot add both a partition with
|
|
* modulus 10 and a partition with modulus 15, because 10
|
|
* is not a factor of 15.
|
|
*
|
|
* Get greatest bound in array boundinfo->datums which is
|
|
* less than or equal to spec->modulus and
|
|
* spec->remainder.
|
|
*/
|
|
offset = partition_bound_bsearch(key, boundinfo, spec,
|
|
true, &equal);
|
|
if (offset < 0)
|
|
{
|
|
next_modulus = DatumGetInt32(datums[0][0]);
|
|
valid_modulus = (next_modulus % spec->modulus) == 0;
|
|
}
|
|
else
|
|
{
|
|
prev_modulus = DatumGetInt32(datums[offset][0]);
|
|
valid_modulus = (spec->modulus % prev_modulus) == 0;
|
|
|
|
if (valid_modulus && (offset + 1) < ndatums)
|
|
{
|
|
next_modulus = DatumGetInt32(datums[offset + 1][0]);
|
|
valid_modulus = (next_modulus % spec->modulus) == 0;
|
|
}
|
|
}
|
|
|
|
if (!valid_modulus)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
|
|
errmsg("every hash partition modulus must be a factor of the next larger modulus")));
|
|
|
|
greatest_modulus = get_greatest_modulus(boundinfo);
|
|
remainder = spec->remainder;
|
|
|
|
/*
|
|
* Normally, the lowest remainder that could conflict with
|
|
* the new partition is equal to the remainder specified
|
|
* for the new partition, but when the new partition has a
|
|
* modulus higher than any used so far, we need to adjust.
|
|
*/
|
|
if (remainder >= greatest_modulus)
|
|
remainder = remainder % greatest_modulus;
|
|
|
|
/* Check every potentially-conflicting remainder. */
|
|
do
|
|
{
|
|
if (boundinfo->indexes[remainder] != -1)
|
|
{
|
|
overlap = true;
|
|
with = boundinfo->indexes[remainder];
|
|
break;
|
|
}
|
|
remainder += spec->modulus;
|
|
} while (remainder < greatest_modulus);
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
case PARTITION_STRATEGY_LIST:
|
|
{
|
|
Assert(spec->strategy == PARTITION_STRATEGY_LIST);
|
|
|
|
if (partdesc->nparts > 0)
|
|
{
|
|
ListCell *cell;
|
|
|
|
Assert(boundinfo &&
|
|
boundinfo->strategy == PARTITION_STRATEGY_LIST &&
|
|
(boundinfo->ndatums > 0 ||
|
|
partition_bound_accepts_nulls(boundinfo) ||
|
|
partition_bound_has_default(boundinfo)));
|
|
|
|
foreach(cell, spec->listdatums)
|
|
{
|
|
Const *val = castNode(Const, lfirst(cell));
|
|
|
|
if (!val->constisnull)
|
|
{
|
|
int offset;
|
|
bool equal;
|
|
|
|
offset = partition_bound_bsearch(key, boundinfo,
|
|
&val->constvalue,
|
|
true, &equal);
|
|
if (offset >= 0 && equal)
|
|
{
|
|
overlap = true;
|
|
with = boundinfo->indexes[offset];
|
|
break;
|
|
}
|
|
}
|
|
else if (partition_bound_accepts_nulls(boundinfo))
|
|
{
|
|
overlap = true;
|
|
with = boundinfo->null_index;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
case PARTITION_STRATEGY_RANGE:
|
|
{
|
|
PartitionRangeBound *lower,
|
|
*upper;
|
|
|
|
Assert(spec->strategy == PARTITION_STRATEGY_RANGE);
|
|
lower = make_one_range_bound(key, -1, spec->lowerdatums, true);
|
|
upper = make_one_range_bound(key, -1, spec->upperdatums, false);
|
|
|
|
/*
|
|
* First check if the resulting range would be empty with
|
|
* specified lower and upper bounds
|
|
*/
|
|
if (partition_rbound_cmp(key, lower->datums, lower->kind, true,
|
|
upper) >= 0)
|
|
{
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
|
|
errmsg("empty range bound specified for partition \"%s\"",
|
|
relname),
|
|
errdetail("Specified lower bound %s is greater than or equal to upper bound %s.",
|
|
get_range_partbound_string(spec->lowerdatums),
|
|
get_range_partbound_string(spec->upperdatums)),
|
|
parser_errposition(pstate, spec->location)));
|
|
}
|
|
|
|
if (partdesc->nparts > 0)
|
|
{
|
|
PartitionBoundInfo boundinfo = partdesc->boundinfo;
|
|
int offset;
|
|
bool equal;
|
|
|
|
Assert(boundinfo &&
|
|
boundinfo->strategy == PARTITION_STRATEGY_RANGE &&
|
|
(boundinfo->ndatums > 0 ||
|
|
partition_bound_has_default(boundinfo)));
|
|
|
|
/*
|
|
* Test whether the new lower bound (which is treated
|
|
* inclusively as part of the new partition) lies inside
|
|
* an existing partition, or in a gap.
|
|
*
|
|
* If it's inside an existing partition, the bound at
|
|
* offset + 1 will be the upper bound of that partition,
|
|
* and its index will be >= 0.
|
|
*
|
|
* If it's in a gap, the bound at offset + 1 will be the
|
|
* lower bound of the next partition, and its index will
|
|
* be -1. This is also true if there is no next partition,
|
|
* since the index array is initialised with an extra -1
|
|
* at the end.
|
|
*/
|
|
offset = partition_bound_bsearch(key, boundinfo, lower,
|
|
true, &equal);
|
|
|
|
if (boundinfo->indexes[offset + 1] < 0)
|
|
{
|
|
/*
|
|
* Check that the new partition will fit in the gap.
|
|
* For it to fit, the new upper bound must be less
|
|
* than or equal to the lower bound of the next
|
|
* partition, if there is one.
|
|
*/
|
|
if (offset + 1 < boundinfo->ndatums)
|
|
{
|
|
int32 cmpval;
|
|
|
|
cmpval = partition_bound_cmp(key, boundinfo,
|
|
offset + 1, upper,
|
|
true);
|
|
if (cmpval < 0)
|
|
{
|
|
/*
|
|
* The new partition overlaps with the
|
|
* existing partition between offset + 1 and
|
|
* offset + 2.
|
|
*/
|
|
overlap = true;
|
|
with = boundinfo->indexes[offset + 2];
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
* The new partition overlaps with the existing
|
|
* partition between offset and offset + 1.
|
|
*/
|
|
overlap = true;
|
|
with = boundinfo->indexes[offset + 1];
|
|
}
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
default:
|
|
elog(ERROR, "unexpected partition strategy: %d",
|
|
(int) key->strategy);
|
|
}
|
|
|
|
if (overlap)
|
|
{
|
|
Assert(with >= 0);
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
|
|
errmsg("partition \"%s\" would overlap partition \"%s\"",
|
|
relname, get_rel_name(partdesc->oids[with])),
|
|
parser_errposition(pstate, spec->location)));
|
|
}
|
|
}
|
|
|
|
/*
|
|
* check_default_allows_bound
|
|
*
|
|
* This function checks if there exists a row in the default partition that
|
|
* would properly belong to the new partition being added. If it finds one,
|
|
* it throws an error.
|
|
*/
|
|
void
|
|
check_default_allows_bound(Relation parent, Relation default_rel,
|
|
PartitionBoundSpec *new_spec)
|
|
{
|
|
List *new_part_constraints;
|
|
List *def_part_constraints;
|
|
List *all_parts;
|
|
ListCell *lc;
|
|
|
|
new_part_constraints = (new_spec->strategy == PARTITION_STRATEGY_LIST)
|
|
? get_qual_for_list(parent, new_spec)
|
|
: get_qual_for_range(parent, new_spec, false);
|
|
def_part_constraints =
|
|
get_proposed_default_constraint(new_part_constraints);
|
|
|
|
/*
|
|
* If the existing constraints on the default partition imply that it will
|
|
* not contain any row that would belong to the new partition, we can
|
|
* avoid scanning the default partition.
|
|
*/
|
|
if (PartConstraintImpliedByRelConstraint(default_rel, def_part_constraints))
|
|
{
|
|
ereport(INFO,
|
|
(errmsg("updated partition constraint for default partition \"%s\" is implied by existing constraints",
|
|
RelationGetRelationName(default_rel))));
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Scan the default partition and its subpartitions, and check for rows
|
|
* that do not satisfy the revised partition constraints.
|
|
*/
|
|
if (default_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
|
|
all_parts = find_all_inheritors(RelationGetRelid(default_rel),
|
|
AccessExclusiveLock, NULL);
|
|
else
|
|
all_parts = list_make1_oid(RelationGetRelid(default_rel));
|
|
|
|
foreach(lc, all_parts)
|
|
{
|
|
Oid part_relid = lfirst_oid(lc);
|
|
Relation part_rel;
|
|
Expr *constr;
|
|
Expr *partition_constraint;
|
|
EState *estate;
|
|
HeapTuple tuple;
|
|
ExprState *partqualstate = NULL;
|
|
Snapshot snapshot;
|
|
TupleDesc tupdesc;
|
|
ExprContext *econtext;
|
|
HeapScanDesc scan;
|
|
MemoryContext oldCxt;
|
|
TupleTableSlot *tupslot;
|
|
|
|
/* Lock already taken above. */
|
|
if (part_relid != RelationGetRelid(default_rel))
|
|
{
|
|
part_rel = heap_open(part_relid, NoLock);
|
|
|
|
/*
|
|
* If the partition constraints on default partition child imply
|
|
* that it will not contain any row that would belong to the new
|
|
* partition, we can avoid scanning the child table.
|
|
*/
|
|
if (PartConstraintImpliedByRelConstraint(part_rel,
|
|
def_part_constraints))
|
|
{
|
|
ereport(INFO,
|
|
(errmsg("updated partition constraint for default partition \"%s\" is implied by existing constraints",
|
|
RelationGetRelationName(part_rel))));
|
|
|
|
heap_close(part_rel, NoLock);
|
|
continue;
|
|
}
|
|
}
|
|
else
|
|
part_rel = default_rel;
|
|
|
|
/*
|
|
* Only RELKIND_RELATION relations (i.e. leaf partitions) need to be
|
|
* scanned.
|
|
*/
|
|
if (part_rel->rd_rel->relkind != RELKIND_RELATION)
|
|
{
|
|
if (part_rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
|
|
ereport(WARNING,
|
|
(errcode(ERRCODE_CHECK_VIOLATION),
|
|
errmsg("skipped scanning foreign table \"%s\" which is a partition of default partition \"%s\"",
|
|
RelationGetRelationName(part_rel),
|
|
RelationGetRelationName(default_rel))));
|
|
|
|
if (RelationGetRelid(default_rel) != RelationGetRelid(part_rel))
|
|
heap_close(part_rel, NoLock);
|
|
|
|
continue;
|
|
}
|
|
|
|
tupdesc = CreateTupleDescCopy(RelationGetDescr(part_rel));
|
|
constr = linitial(def_part_constraints);
|
|
partition_constraint = (Expr *)
|
|
map_partition_varattnos((List *) constr,
|
|
1, part_rel, parent, NULL);
|
|
estate = CreateExecutorState();
|
|
|
|
/* Build expression execution states for partition check quals */
|
|
partqualstate = ExecPrepareExpr(partition_constraint, estate);
|
|
|
|
econtext = GetPerTupleExprContext(estate);
|
|
snapshot = RegisterSnapshot(GetLatestSnapshot());
|
|
scan = heap_beginscan(part_rel, snapshot, 0, NULL);
|
|
tupslot = MakeSingleTupleTableSlot(tupdesc);
|
|
|
|
/*
|
|
* Switch to per-tuple memory context and reset it for each tuple
|
|
* produced, so we don't leak memory.
|
|
*/
|
|
oldCxt = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
|
|
|
|
while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
|
|
{
|
|
ExecStoreTuple(tuple, tupslot, InvalidBuffer, false);
|
|
econtext->ecxt_scantuple = tupslot;
|
|
|
|
if (!ExecCheck(partqualstate, econtext))
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_CHECK_VIOLATION),
|
|
errmsg("updated partition constraint for default partition \"%s\" would be violated by some row",
|
|
RelationGetRelationName(default_rel))));
|
|
|
|
ResetExprContext(econtext);
|
|
CHECK_FOR_INTERRUPTS();
|
|
}
|
|
|
|
MemoryContextSwitchTo(oldCxt);
|
|
heap_endscan(scan);
|
|
UnregisterSnapshot(snapshot);
|
|
ExecDropSingleTupleTableSlot(tupslot);
|
|
FreeExecutorState(estate);
|
|
|
|
if (RelationGetRelid(default_rel) != RelationGetRelid(part_rel))
|
|
heap_close(part_rel, NoLock); /* keep the lock until commit */
|
|
}
|
|
}
|
|
|
|
/*
|
|
* get_partition_parent
|
|
*
|
|
* Returns inheritance parent of a partition by scanning pg_inherits
|
|
*
|
|
* Note: Because this function assumes that the relation whose OID is passed
|
|
* as an argument will have precisely one parent, it should only be called
|
|
* when it is known that the relation is a partition.
|
|
*/
|
|
Oid
|
|
get_partition_parent(Oid relid)
|
|
{
|
|
Form_pg_inherits form;
|
|
Relation catalogRelation;
|
|
SysScanDesc scan;
|
|
ScanKeyData key[2];
|
|
HeapTuple tuple;
|
|
Oid result;
|
|
|
|
catalogRelation = heap_open(InheritsRelationId, AccessShareLock);
|
|
|
|
ScanKeyInit(&key[0],
|
|
Anum_pg_inherits_inhrelid,
|
|
BTEqualStrategyNumber, F_OIDEQ,
|
|
ObjectIdGetDatum(relid));
|
|
ScanKeyInit(&key[1],
|
|
Anum_pg_inherits_inhseqno,
|
|
BTEqualStrategyNumber, F_INT4EQ,
|
|
Int32GetDatum(1));
|
|
|
|
scan = systable_beginscan(catalogRelation, InheritsRelidSeqnoIndexId, true,
|
|
NULL, 2, key);
|
|
|
|
tuple = systable_getnext(scan);
|
|
if (!HeapTupleIsValid(tuple))
|
|
elog(ERROR, "could not find tuple for parent of relation %u", relid);
|
|
|
|
form = (Form_pg_inherits) GETSTRUCT(tuple);
|
|
result = form->inhparent;
|
|
|
|
systable_endscan(scan);
|
|
heap_close(catalogRelation, AccessShareLock);
|
|
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* get_qual_from_partbound
|
|
* Given a parser node for partition bound, return the list of executable
|
|
* expressions as partition constraint
|
|
*/
|
|
List *
|
|
get_qual_from_partbound(Relation rel, Relation parent,
|
|
PartitionBoundSpec *spec)
|
|
{
|
|
PartitionKey key = RelationGetPartitionKey(parent);
|
|
List *my_qual = NIL;
|
|
|
|
Assert(key != NULL);
|
|
|
|
switch (key->strategy)
|
|
{
|
|
case PARTITION_STRATEGY_HASH:
|
|
Assert(spec->strategy == PARTITION_STRATEGY_HASH);
|
|
my_qual = get_qual_for_hash(parent, spec);
|
|
break;
|
|
|
|
case PARTITION_STRATEGY_LIST:
|
|
Assert(spec->strategy == PARTITION_STRATEGY_LIST);
|
|
my_qual = get_qual_for_list(parent, spec);
|
|
break;
|
|
|
|
case PARTITION_STRATEGY_RANGE:
|
|
Assert(spec->strategy == PARTITION_STRATEGY_RANGE);
|
|
my_qual = get_qual_for_range(parent, spec, false);
|
|
break;
|
|
|
|
default:
|
|
elog(ERROR, "unexpected partition strategy: %d",
|
|
(int) key->strategy);
|
|
}
|
|
|
|
return my_qual;
|
|
}
|
|
|
|
/*
|
|
* map_partition_varattnos - maps varattno of any Vars in expr from the
|
|
* attno's of 'from_rel' to the attno's of 'to_rel' partition, each of which
|
|
* may be either a leaf partition or a partitioned table, but both of which
|
|
* must be from the same partitioning hierarchy.
|
|
*
|
|
* Even though all of the same column names must be present in all relations
|
|
* in the hierarchy, and they must also have the same types, the attnos may
|
|
* be different.
|
|
*
|
|
* If found_whole_row is not NULL, *found_whole_row returns whether a
|
|
* whole-row variable was found in the input expression.
|
|
*
|
|
* Note: this will work on any node tree, so really the argument and result
|
|
* should be declared "Node *". But a substantial majority of the callers
|
|
* are working on Lists, so it's less messy to do the casts internally.
|
|
*/
|
|
List *
|
|
map_partition_varattnos(List *expr, int fromrel_varno,
|
|
Relation to_rel, Relation from_rel,
|
|
bool *found_whole_row)
|
|
{
|
|
bool my_found_whole_row = false;
|
|
|
|
if (expr != NIL)
|
|
{
|
|
AttrNumber *part_attnos;
|
|
|
|
part_attnos = convert_tuples_by_name_map(RelationGetDescr(to_rel),
|
|
RelationGetDescr(from_rel),
|
|
gettext_noop("could not convert row type"));
|
|
expr = (List *) map_variable_attnos((Node *) expr,
|
|
fromrel_varno, 0,
|
|
part_attnos,
|
|
RelationGetDescr(from_rel)->natts,
|
|
RelationGetForm(to_rel)->reltype,
|
|
&my_found_whole_row);
|
|
}
|
|
|
|
if (found_whole_row)
|
|
*found_whole_row = my_found_whole_row;
|
|
|
|
return expr;
|
|
}
|
|
|
|
/*
|
|
* RelationGetPartitionQual
|
|
*
|
|
* Returns a list of partition quals
|
|
*/
|
|
List *
|
|
RelationGetPartitionQual(Relation rel)
|
|
{
|
|
/* Quick exit */
|
|
if (!rel->rd_rel->relispartition)
|
|
return NIL;
|
|
|
|
return generate_partition_qual(rel);
|
|
}
|
|
|
|
/*
|
|
* get_partition_qual_relid
|
|
*
|
|
* Returns an expression tree describing the passed-in relation's partition
|
|
* constraint. If there is no partition constraint returns NULL; this can
|
|
* happen if the default partition is the only partition.
|
|
*/
|
|
Expr *
|
|
get_partition_qual_relid(Oid relid)
|
|
{
|
|
Relation rel = heap_open(relid, AccessShareLock);
|
|
Expr *result = NULL;
|
|
List *and_args;
|
|
|
|
/* Do the work only if this relation is a partition. */
|
|
if (rel->rd_rel->relispartition)
|
|
{
|
|
and_args = generate_partition_qual(rel);
|
|
|
|
if (and_args == NIL)
|
|
result = NULL;
|
|
else if (list_length(and_args) > 1)
|
|
result = makeBoolExpr(AND_EXPR, and_args, -1);
|
|
else
|
|
result = linitial(and_args);
|
|
}
|
|
|
|
/* Keep the lock. */
|
|
heap_close(rel, NoLock);
|
|
|
|
return result;
|
|
}
|
|
|
|
/* Module-local functions */
|
|
|
|
/*
|
|
* get_partition_operator
|
|
*
|
|
* Return oid of the operator of given strategy for a given partition key
|
|
* column.
|
|
*/
|
|
static Oid
|
|
get_partition_operator(PartitionKey key, int col, StrategyNumber strategy,
|
|
bool *need_relabel)
|
|
{
|
|
Oid operoid;
|
|
|
|
/*
|
|
* First check if there exists an operator of the given strategy, with
|
|
* this column's type as both its lefttype and righttype, in the
|
|
* partitioning operator family specified for the column.
|
|
*/
|
|
operoid = get_opfamily_member(key->partopfamily[col],
|
|
key->parttypid[col],
|
|
key->parttypid[col],
|
|
strategy);
|
|
|
|
/*
|
|
* If one doesn't exist, we must resort to using an operator in the same
|
|
* operator family but with the operator class declared input type. It is
|
|
* OK to do so, because the column's type is known to be binary-coercible
|
|
* with the operator class input type (otherwise, the operator class in
|
|
* question would not have been accepted as the partitioning operator
|
|
* class). We must however inform the caller to wrap the non-Const
|
|
* expression with a RelabelType node to denote the implicit coercion. It
|
|
* ensures that the resulting expression structurally matches similarly
|
|
* processed expressions within the optimizer.
|
|
*/
|
|
if (!OidIsValid(operoid))
|
|
{
|
|
operoid = get_opfamily_member(key->partopfamily[col],
|
|
key->partopcintype[col],
|
|
key->partopcintype[col],
|
|
strategy);
|
|
if (!OidIsValid(operoid))
|
|
elog(ERROR, "missing operator %d(%u,%u) in opfamily %u",
|
|
strategy, key->partopcintype[col], key->partopcintype[col],
|
|
key->partopfamily[col]);
|
|
*need_relabel = true;
|
|
}
|
|
else
|
|
*need_relabel = false;
|
|
|
|
return operoid;
|
|
}
|
|
|
|
/*
|
|
* make_partition_op_expr
|
|
* Returns an Expr for the given partition key column with arg1 and
|
|
* arg2 as its leftop and rightop, respectively
|
|
*/
|
|
static Expr *
|
|
make_partition_op_expr(PartitionKey key, int keynum,
|
|
uint16 strategy, Expr *arg1, Expr *arg2)
|
|
{
|
|
Oid operoid;
|
|
bool need_relabel = false;
|
|
Expr *result = NULL;
|
|
|
|
/* Get the correct btree operator for this partitioning column */
|
|
operoid = get_partition_operator(key, keynum, strategy, &need_relabel);
|
|
|
|
/*
|
|
* Chosen operator may be such that the non-Const operand needs to be
|
|
* coerced, so apply the same; see the comment in
|
|
* get_partition_operator().
|
|
*/
|
|
if (!IsA(arg1, Const) &&
|
|
(need_relabel ||
|
|
key->partcollation[keynum] != key->parttypcoll[keynum]))
|
|
arg1 = (Expr *) makeRelabelType(arg1,
|
|
key->partopcintype[keynum],
|
|
-1,
|
|
key->partcollation[keynum],
|
|
COERCE_EXPLICIT_CAST);
|
|
|
|
/* Generate the actual expression */
|
|
switch (key->strategy)
|
|
{
|
|
case PARTITION_STRATEGY_LIST:
|
|
{
|
|
ScalarArrayOpExpr *saopexpr;
|
|
|
|
/* Build leftop = ANY (rightop) */
|
|
saopexpr = makeNode(ScalarArrayOpExpr);
|
|
saopexpr->opno = operoid;
|
|
saopexpr->opfuncid = get_opcode(operoid);
|
|
saopexpr->useOr = true;
|
|
saopexpr->inputcollid = key->partcollation[keynum];
|
|
saopexpr->args = list_make2(arg1, arg2);
|
|
saopexpr->location = -1;
|
|
|
|
result = (Expr *) saopexpr;
|
|
break;
|
|
}
|
|
|
|
case PARTITION_STRATEGY_RANGE:
|
|
result = make_opclause(operoid,
|
|
BOOLOID,
|
|
false,
|
|
arg1, arg2,
|
|
InvalidOid,
|
|
key->partcollation[keynum]);
|
|
break;
|
|
|
|
default:
|
|
elog(ERROR, "invalid partitioning strategy");
|
|
break;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* get_qual_for_hash
|
|
*
|
|
* Given a list of partition columns, modulus and remainder corresponding to a
|
|
* partition, this function returns CHECK constraint expression Node for that
|
|
* partition.
|
|
*
|
|
* The partition constraint for a hash partition is always a call to the
|
|
* built-in function satisfies_hash_partition(). The first two arguments are
|
|
* the modulus and remainder for the partition; the remaining arguments are the
|
|
* values to be hashed.
|
|
*/
|
|
static List *
|
|
get_qual_for_hash(Relation parent, PartitionBoundSpec *spec)
|
|
{
|
|
PartitionKey key = RelationGetPartitionKey(parent);
|
|
FuncExpr *fexpr;
|
|
Node *relidConst;
|
|
Node *modulusConst;
|
|
Node *remainderConst;
|
|
List *args;
|
|
ListCell *partexprs_item;
|
|
int i;
|
|
|
|
/* Fixed arguments. */
|
|
relidConst = (Node *) makeConst(OIDOID,
|
|
-1,
|
|
InvalidOid,
|
|
sizeof(Oid),
|
|
ObjectIdGetDatum(RelationGetRelid(parent)),
|
|
false,
|
|
true);
|
|
|
|
modulusConst = (Node *) makeConst(INT4OID,
|
|
-1,
|
|
InvalidOid,
|
|
sizeof(int32),
|
|
Int32GetDatum(spec->modulus),
|
|
false,
|
|
true);
|
|
|
|
remainderConst = (Node *) makeConst(INT4OID,
|
|
-1,
|
|
InvalidOid,
|
|
sizeof(int32),
|
|
Int32GetDatum(spec->remainder),
|
|
false,
|
|
true);
|
|
|
|
args = list_make3(relidConst, modulusConst, remainderConst);
|
|
partexprs_item = list_head(key->partexprs);
|
|
|
|
/* Add an argument for each key column. */
|
|
for (i = 0; i < key->partnatts; i++)
|
|
{
|
|
Node *keyCol;
|
|
|
|
/* Left operand */
|
|
if (key->partattrs[i] != 0)
|
|
{
|
|
keyCol = (Node *) makeVar(1,
|
|
key->partattrs[i],
|
|
key->parttypid[i],
|
|
key->parttypmod[i],
|
|
key->parttypcoll[i],
|
|
0);
|
|
}
|
|
else
|
|
{
|
|
keyCol = (Node *) copyObject(lfirst(partexprs_item));
|
|
partexprs_item = lnext(partexprs_item);
|
|
}
|
|
|
|
args = lappend(args, keyCol);
|
|
}
|
|
|
|
fexpr = makeFuncExpr(F_SATISFIES_HASH_PARTITION,
|
|
BOOLOID,
|
|
args,
|
|
InvalidOid,
|
|
InvalidOid,
|
|
COERCE_EXPLICIT_CALL);
|
|
|
|
return list_make1(fexpr);
|
|
}
|
|
|
|
/*
|
|
* get_qual_for_list
|
|
*
|
|
* Returns an implicit-AND list of expressions to use as a list partition's
|
|
* constraint, given the partition key and bound structures.
|
|
*
|
|
* The function returns NIL for a default partition when it's the only
|
|
* partition since in that case there is no constraint.
|
|
*/
|
|
static List *
|
|
get_qual_for_list(Relation parent, PartitionBoundSpec *spec)
|
|
{
|
|
PartitionKey key = RelationGetPartitionKey(parent);
|
|
List *result;
|
|
Expr *keyCol;
|
|
ArrayExpr *arr;
|
|
Expr *opexpr;
|
|
NullTest *nulltest;
|
|
ListCell *cell;
|
|
List *arrelems = NIL;
|
|
bool list_has_null = false;
|
|
|
|
/*
|
|
* Only single-column list partitioning is supported, so we are worried
|
|
* only about the partition key with index 0.
|
|
*/
|
|
Assert(key->partnatts == 1);
|
|
|
|
/* Construct Var or expression representing the partition column */
|
|
if (key->partattrs[0] != 0)
|
|
keyCol = (Expr *) makeVar(1,
|
|
key->partattrs[0],
|
|
key->parttypid[0],
|
|
key->parttypmod[0],
|
|
key->parttypcoll[0],
|
|
0);
|
|
else
|
|
keyCol = (Expr *) copyObject(linitial(key->partexprs));
|
|
|
|
/*
|
|
* For default list partition, collect datums for all the partitions. The
|
|
* default partition constraint should check that the partition key is
|
|
* equal to none of those.
|
|
*/
|
|
if (spec->is_default)
|
|
{
|
|
int i;
|
|
int ndatums = 0;
|
|
PartitionDesc pdesc = RelationGetPartitionDesc(parent);
|
|
PartitionBoundInfo boundinfo = pdesc->boundinfo;
|
|
|
|
if (boundinfo)
|
|
{
|
|
ndatums = boundinfo->ndatums;
|
|
|
|
if (partition_bound_accepts_nulls(boundinfo))
|
|
list_has_null = true;
|
|
}
|
|
|
|
/*
|
|
* If default is the only partition, there need not be any partition
|
|
* constraint on it.
|
|
*/
|
|
if (ndatums == 0 && !list_has_null)
|
|
return NIL;
|
|
|
|
for (i = 0; i < ndatums; i++)
|
|
{
|
|
Const *val;
|
|
|
|
/*
|
|
* Construct Const from known-not-null datum. We must be careful
|
|
* to copy the value, because our result has to be able to outlive
|
|
* the relcache entry we're copying from.
|
|
*/
|
|
val = makeConst(key->parttypid[0],
|
|
key->parttypmod[0],
|
|
key->parttypcoll[0],
|
|
key->parttyplen[0],
|
|
datumCopy(*boundinfo->datums[i],
|
|
key->parttypbyval[0],
|
|
key->parttyplen[0]),
|
|
false, /* isnull */
|
|
key->parttypbyval[0]);
|
|
|
|
arrelems = lappend(arrelems, val);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
* Create list of Consts for the allowed values, excluding any nulls.
|
|
*/
|
|
foreach(cell, spec->listdatums)
|
|
{
|
|
Const *val = castNode(Const, lfirst(cell));
|
|
|
|
if (val->constisnull)
|
|
list_has_null = true;
|
|
else
|
|
arrelems = lappend(arrelems, copyObject(val));
|
|
}
|
|
}
|
|
|
|
if (arrelems)
|
|
{
|
|
/* Construct an ArrayExpr for the non-null partition values */
|
|
arr = makeNode(ArrayExpr);
|
|
arr->array_typeid = !type_is_array(key->parttypid[0])
|
|
? get_array_type(key->parttypid[0])
|
|
: key->parttypid[0];
|
|
arr->array_collid = key->parttypcoll[0];
|
|
arr->element_typeid = key->parttypid[0];
|
|
arr->elements = arrelems;
|
|
arr->multidims = false;
|
|
arr->location = -1;
|
|
|
|
/* Generate the main expression, i.e., keyCol = ANY (arr) */
|
|
opexpr = make_partition_op_expr(key, 0, BTEqualStrategyNumber,
|
|
keyCol, (Expr *) arr);
|
|
}
|
|
else
|
|
{
|
|
/* If there are no partition values, we don't need an = ANY expr */
|
|
opexpr = NULL;
|
|
}
|
|
|
|
if (!list_has_null)
|
|
{
|
|
/*
|
|
* Gin up a "col IS NOT NULL" test that will be AND'd with the main
|
|
* expression. This might seem redundant, but the partition routing
|
|
* machinery needs it.
|
|
*/
|
|
nulltest = makeNode(NullTest);
|
|
nulltest->arg = keyCol;
|
|
nulltest->nulltesttype = IS_NOT_NULL;
|
|
nulltest->argisrow = false;
|
|
nulltest->location = -1;
|
|
|
|
result = opexpr ? list_make2(nulltest, opexpr) : list_make1(nulltest);
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
* Gin up a "col IS NULL" test that will be OR'd with the main
|
|
* expression.
|
|
*/
|
|
nulltest = makeNode(NullTest);
|
|
nulltest->arg = keyCol;
|
|
nulltest->nulltesttype = IS_NULL;
|
|
nulltest->argisrow = false;
|
|
nulltest->location = -1;
|
|
|
|
if (opexpr)
|
|
{
|
|
Expr *or;
|
|
|
|
or = makeBoolExpr(OR_EXPR, list_make2(nulltest, opexpr), -1);
|
|
result = list_make1(or);
|
|
}
|
|
else
|
|
result = list_make1(nulltest);
|
|
}
|
|
|
|
/*
|
|
* Note that, in general, applying NOT to a constraint expression doesn't
|
|
* necessarily invert the set of rows it accepts, because NOT (NULL) is
|
|
* NULL. However, the partition constraints we construct here never
|
|
* evaluate to NULL, so applying NOT works as intended.
|
|
*/
|
|
if (spec->is_default)
|
|
{
|
|
result = list_make1(make_ands_explicit(result));
|
|
result = list_make1(makeBoolExpr(NOT_EXPR, result, -1));
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* get_range_key_properties
|
|
* Returns range partition key information for a given column
|
|
*
|
|
* This is a subroutine for get_qual_for_range, and its API is pretty
|
|
* specialized to that caller.
|
|
*
|
|
* Constructs an Expr for the key column (returned in *keyCol) and Consts
|
|
* for the lower and upper range limits (returned in *lower_val and
|
|
* *upper_val). For MINVALUE/MAXVALUE limits, NULL is returned instead of
|
|
* a Const. All of these structures are freshly palloc'd.
|
|
*
|
|
* *partexprs_item points to the cell containing the next expression in
|
|
* the key->partexprs list, or NULL. It may be advanced upon return.
|
|
*/
|
|
static void
|
|
get_range_key_properties(PartitionKey key, int keynum,
|
|
PartitionRangeDatum *ldatum,
|
|
PartitionRangeDatum *udatum,
|
|
ListCell **partexprs_item,
|
|
Expr **keyCol,
|
|
Const **lower_val, Const **upper_val)
|
|
{
|
|
/* Get partition key expression for this column */
|
|
if (key->partattrs[keynum] != 0)
|
|
{
|
|
*keyCol = (Expr *) makeVar(1,
|
|
key->partattrs[keynum],
|
|
key->parttypid[keynum],
|
|
key->parttypmod[keynum],
|
|
key->parttypcoll[keynum],
|
|
0);
|
|
}
|
|
else
|
|
{
|
|
if (*partexprs_item == NULL)
|
|
elog(ERROR, "wrong number of partition key expressions");
|
|
*keyCol = copyObject(lfirst(*partexprs_item));
|
|
*partexprs_item = lnext(*partexprs_item);
|
|
}
|
|
|
|
/* Get appropriate Const nodes for the bounds */
|
|
if (ldatum->kind == PARTITION_RANGE_DATUM_VALUE)
|
|
*lower_val = castNode(Const, copyObject(ldatum->value));
|
|
else
|
|
*lower_val = NULL;
|
|
|
|
if (udatum->kind == PARTITION_RANGE_DATUM_VALUE)
|
|
*upper_val = castNode(Const, copyObject(udatum->value));
|
|
else
|
|
*upper_val = NULL;
|
|
}
|
|
|
|
/*
|
|
* get_range_nulltest
|
|
*
|
|
* A non-default range partition table does not currently allow partition
|
|
* keys to be null, so emit an IS NOT NULL expression for each key column.
|
|
*/
|
|
static List *
|
|
get_range_nulltest(PartitionKey key)
|
|
{
|
|
List *result = NIL;
|
|
NullTest *nulltest;
|
|
ListCell *partexprs_item;
|
|
int i;
|
|
|
|
partexprs_item = list_head(key->partexprs);
|
|
for (i = 0; i < key->partnatts; i++)
|
|
{
|
|
Expr *keyCol;
|
|
|
|
if (key->partattrs[i] != 0)
|
|
{
|
|
keyCol = (Expr *) makeVar(1,
|
|
key->partattrs[i],
|
|
key->parttypid[i],
|
|
key->parttypmod[i],
|
|
key->parttypcoll[i],
|
|
0);
|
|
}
|
|
else
|
|
{
|
|
if (partexprs_item == NULL)
|
|
elog(ERROR, "wrong number of partition key expressions");
|
|
keyCol = copyObject(lfirst(partexprs_item));
|
|
partexprs_item = lnext(partexprs_item);
|
|
}
|
|
|
|
nulltest = makeNode(NullTest);
|
|
nulltest->arg = keyCol;
|
|
nulltest->nulltesttype = IS_NOT_NULL;
|
|
nulltest->argisrow = false;
|
|
nulltest->location = -1;
|
|
result = lappend(result, nulltest);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* get_qual_for_range
|
|
*
|
|
* Returns an implicit-AND list of expressions to use as a range partition's
|
|
* constraint, given the partition key and bound structures.
|
|
*
|
|
* For a multi-column range partition key, say (a, b, c), with (al, bl, cl)
|
|
* as the lower bound tuple and (au, bu, cu) as the upper bound tuple, we
|
|
* generate an expression tree of the following form:
|
|
*
|
|
* (a IS NOT NULL) and (b IS NOT NULL) and (c IS NOT NULL)
|
|
* AND
|
|
* (a > al OR (a = al AND b > bl) OR (a = al AND b = bl AND c >= cl))
|
|
* AND
|
|
* (a < au OR (a = au AND b < bu) OR (a = au AND b = bu AND c < cu))
|
|
*
|
|
* It is often the case that a prefix of lower and upper bound tuples contains
|
|
* the same values, for example, (al = au), in which case, we will emit an
|
|
* expression tree of the following form:
|
|
*
|
|
* (a IS NOT NULL) and (b IS NOT NULL) and (c IS NOT NULL)
|
|
* AND
|
|
* (a = al)
|
|
* AND
|
|
* (b > bl OR (b = bl AND c >= cl))
|
|
* AND
|
|
* (b < bu) OR (b = bu AND c < cu))
|
|
*
|
|
* If a bound datum is either MINVALUE or MAXVALUE, these expressions are
|
|
* simplified using the fact that any value is greater than MINVALUE and less
|
|
* than MAXVALUE. So, for example, if cu = MAXVALUE, c < cu is automatically
|
|
* true, and we need not emit any expression for it, and the last line becomes
|
|
*
|
|
* (b < bu) OR (b = bu), which is simplified to (b <= bu)
|
|
*
|
|
* In most common cases with only one partition column, say a, the following
|
|
* expression tree will be generated: a IS NOT NULL AND a >= al AND a < au
|
|
*
|
|
* For default partition, it returns the negation of the constraints of all
|
|
* the other partitions.
|
|
*
|
|
* External callers should pass for_default as false; we set it to true only
|
|
* when recursing.
|
|
*/
|
|
static List *
|
|
get_qual_for_range(Relation parent, PartitionBoundSpec *spec,
|
|
bool for_default)
|
|
{
|
|
List *result = NIL;
|
|
ListCell *cell1,
|
|
*cell2,
|
|
*partexprs_item,
|
|
*partexprs_item_saved;
|
|
int i,
|
|
j;
|
|
PartitionRangeDatum *ldatum,
|
|
*udatum;
|
|
PartitionKey key = RelationGetPartitionKey(parent);
|
|
Expr *keyCol;
|
|
Const *lower_val,
|
|
*upper_val;
|
|
List *lower_or_arms,
|
|
*upper_or_arms;
|
|
int num_or_arms,
|
|
current_or_arm;
|
|
ListCell *lower_or_start_datum,
|
|
*upper_or_start_datum;
|
|
bool need_next_lower_arm,
|
|
need_next_upper_arm;
|
|
|
|
if (spec->is_default)
|
|
{
|
|
List *or_expr_args = NIL;
|
|
PartitionDesc pdesc = RelationGetPartitionDesc(parent);
|
|
Oid *inhoids = pdesc->oids;
|
|
int nparts = pdesc->nparts,
|
|
i;
|
|
|
|
for (i = 0; i < nparts; i++)
|
|
{
|
|
Oid inhrelid = inhoids[i];
|
|
HeapTuple tuple;
|
|
Datum datum;
|
|
bool isnull;
|
|
PartitionBoundSpec *bspec;
|
|
|
|
tuple = SearchSysCache1(RELOID, inhrelid);
|
|
if (!HeapTupleIsValid(tuple))
|
|
elog(ERROR, "cache lookup failed for relation %u", inhrelid);
|
|
|
|
datum = SysCacheGetAttr(RELOID, tuple,
|
|
Anum_pg_class_relpartbound,
|
|
&isnull);
|
|
|
|
Assert(!isnull);
|
|
bspec = (PartitionBoundSpec *)
|
|
stringToNode(TextDatumGetCString(datum));
|
|
if (!IsA(bspec, PartitionBoundSpec))
|
|
elog(ERROR, "expected PartitionBoundSpec");
|
|
|
|
if (!bspec->is_default)
|
|
{
|
|
List *part_qual;
|
|
|
|
part_qual = get_qual_for_range(parent, bspec, true);
|
|
|
|
/*
|
|
* AND the constraints of the partition and add to
|
|
* or_expr_args
|
|
*/
|
|
or_expr_args = lappend(or_expr_args, list_length(part_qual) > 1
|
|
? makeBoolExpr(AND_EXPR, part_qual, -1)
|
|
: linitial(part_qual));
|
|
}
|
|
ReleaseSysCache(tuple);
|
|
}
|
|
|
|
if (or_expr_args != NIL)
|
|
{
|
|
Expr *other_parts_constr;
|
|
|
|
/*
|
|
* Combine the constraints obtained for non-default partitions
|
|
* using OR. As requested, each of the OR's args doesn't include
|
|
* the NOT NULL test for partition keys (which is to avoid its
|
|
* useless repetition). Add the same now.
|
|
*/
|
|
other_parts_constr =
|
|
makeBoolExpr(AND_EXPR,
|
|
lappend(get_range_nulltest(key),
|
|
list_length(or_expr_args) > 1
|
|
? makeBoolExpr(OR_EXPR, or_expr_args,
|
|
-1)
|
|
: linitial(or_expr_args)),
|
|
-1);
|
|
|
|
/*
|
|
* Finally, the default partition contains everything *NOT*
|
|
* contained in the non-default partitions.
|
|
*/
|
|
result = list_make1(makeBoolExpr(NOT_EXPR,
|
|
list_make1(other_parts_constr), -1));
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
lower_or_start_datum = list_head(spec->lowerdatums);
|
|
upper_or_start_datum = list_head(spec->upperdatums);
|
|
num_or_arms = key->partnatts;
|
|
|
|
/*
|
|
* If it is the recursive call for default, we skip the get_range_nulltest
|
|
* to avoid accumulating the NullTest on the same keys for each partition.
|
|
*/
|
|
if (!for_default)
|
|
result = get_range_nulltest(key);
|
|
|
|
/*
|
|
* Iterate over the key columns and check if the corresponding lower and
|
|
* upper datums are equal using the btree equality operator for the
|
|
* column's type. If equal, we emit single keyCol = common_value
|
|
* expression. Starting from the first column for which the corresponding
|
|
* lower and upper bound datums are not equal, we generate OR expressions
|
|
* as shown in the function's header comment.
|
|
*/
|
|
i = 0;
|
|
partexprs_item = list_head(key->partexprs);
|
|
partexprs_item_saved = partexprs_item; /* placate compiler */
|
|
forboth(cell1, spec->lowerdatums, cell2, spec->upperdatums)
|
|
{
|
|
EState *estate;
|
|
MemoryContext oldcxt;
|
|
Expr *test_expr;
|
|
ExprState *test_exprstate;
|
|
Datum test_result;
|
|
bool isNull;
|
|
|
|
ldatum = castNode(PartitionRangeDatum, lfirst(cell1));
|
|
udatum = castNode(PartitionRangeDatum, lfirst(cell2));
|
|
|
|
/*
|
|
* Since get_range_key_properties() modifies partexprs_item, and we
|
|
* might need to start over from the previous expression in the later
|
|
* part of this function, save away the current value.
|
|
*/
|
|
partexprs_item_saved = partexprs_item;
|
|
|
|
get_range_key_properties(key, i, ldatum, udatum,
|
|
&partexprs_item,
|
|
&keyCol,
|
|
&lower_val, &upper_val);
|
|
|
|
/*
|
|
* If either value is NULL, the corresponding partition bound is
|
|
* either MINVALUE or MAXVALUE, and we treat them as unequal, because
|
|
* even if they're the same, there is no common value to equate the
|
|
* key column with.
|
|
*/
|
|
if (!lower_val || !upper_val)
|
|
break;
|
|
|
|
/* Create the test expression */
|
|
estate = CreateExecutorState();
|
|
oldcxt = MemoryContextSwitchTo(estate->es_query_cxt);
|
|
test_expr = make_partition_op_expr(key, i, BTEqualStrategyNumber,
|
|
(Expr *) lower_val,
|
|
(Expr *) upper_val);
|
|
fix_opfuncids((Node *) test_expr);
|
|
test_exprstate = ExecInitExpr(test_expr, NULL);
|
|
test_result = ExecEvalExprSwitchContext(test_exprstate,
|
|
GetPerTupleExprContext(estate),
|
|
&isNull);
|
|
MemoryContextSwitchTo(oldcxt);
|
|
FreeExecutorState(estate);
|
|
|
|
/* If not equal, go generate the OR expressions */
|
|
if (!DatumGetBool(test_result))
|
|
break;
|
|
|
|
/*
|
|
* The bounds for the last key column can't be equal, because such a
|
|
* range partition would never be allowed to be defined (it would have
|
|
* an empty range otherwise).
|
|
*/
|
|
if (i == key->partnatts - 1)
|
|
elog(ERROR, "invalid range bound specification");
|
|
|
|
/* Equal, so generate keyCol = lower_val expression */
|
|
result = lappend(result,
|
|
make_partition_op_expr(key, i, BTEqualStrategyNumber,
|
|
keyCol, (Expr *) lower_val));
|
|
|
|
i++;
|
|
}
|
|
|
|
/* First pair of lower_val and upper_val that are not equal. */
|
|
lower_or_start_datum = cell1;
|
|
upper_or_start_datum = cell2;
|
|
|
|
/* OR will have as many arms as there are key columns left. */
|
|
num_or_arms = key->partnatts - i;
|
|
current_or_arm = 0;
|
|
lower_or_arms = upper_or_arms = NIL;
|
|
need_next_lower_arm = need_next_upper_arm = true;
|
|
while (current_or_arm < num_or_arms)
|
|
{
|
|
List *lower_or_arm_args = NIL,
|
|
*upper_or_arm_args = NIL;
|
|
|
|
/* Restart scan of columns from the i'th one */
|
|
j = i;
|
|
partexprs_item = partexprs_item_saved;
|
|
|
|
for_both_cell(cell1, lower_or_start_datum, cell2, upper_or_start_datum)
|
|
{
|
|
PartitionRangeDatum *ldatum_next = NULL,
|
|
*udatum_next = NULL;
|
|
|
|
ldatum = castNode(PartitionRangeDatum, lfirst(cell1));
|
|
if (lnext(cell1))
|
|
ldatum_next = castNode(PartitionRangeDatum,
|
|
lfirst(lnext(cell1)));
|
|
udatum = castNode(PartitionRangeDatum, lfirst(cell2));
|
|
if (lnext(cell2))
|
|
udatum_next = castNode(PartitionRangeDatum,
|
|
lfirst(lnext(cell2)));
|
|
get_range_key_properties(key, j, ldatum, udatum,
|
|
&partexprs_item,
|
|
&keyCol,
|
|
&lower_val, &upper_val);
|
|
|
|
if (need_next_lower_arm && lower_val)
|
|
{
|
|
uint16 strategy;
|
|
|
|
/*
|
|
* For the non-last columns of this arm, use the EQ operator.
|
|
* For the last column of this arm, use GT, unless this is the
|
|
* last column of the whole bound check, or the next bound
|
|
* datum is MINVALUE, in which case use GE.
|
|
*/
|
|
if (j - i < current_or_arm)
|
|
strategy = BTEqualStrategyNumber;
|
|
else if (j == key->partnatts - 1 ||
|
|
(ldatum_next &&
|
|
ldatum_next->kind == PARTITION_RANGE_DATUM_MINVALUE))
|
|
strategy = BTGreaterEqualStrategyNumber;
|
|
else
|
|
strategy = BTGreaterStrategyNumber;
|
|
|
|
lower_or_arm_args = lappend(lower_or_arm_args,
|
|
make_partition_op_expr(key, j,
|
|
strategy,
|
|
keyCol,
|
|
(Expr *) lower_val));
|
|
}
|
|
|
|
if (need_next_upper_arm && upper_val)
|
|
{
|
|
uint16 strategy;
|
|
|
|
/*
|
|
* For the non-last columns of this arm, use the EQ operator.
|
|
* For the last column of this arm, use LT, unless the next
|
|
* bound datum is MAXVALUE, in which case use LE.
|
|
*/
|
|
if (j - i < current_or_arm)
|
|
strategy = BTEqualStrategyNumber;
|
|
else if (udatum_next &&
|
|
udatum_next->kind == PARTITION_RANGE_DATUM_MAXVALUE)
|
|
strategy = BTLessEqualStrategyNumber;
|
|
else
|
|
strategy = BTLessStrategyNumber;
|
|
|
|
upper_or_arm_args = lappend(upper_or_arm_args,
|
|
make_partition_op_expr(key, j,
|
|
strategy,
|
|
keyCol,
|
|
(Expr *) upper_val));
|
|
}
|
|
|
|
/*
|
|
* Did we generate enough of OR's arguments? First arm considers
|
|
* the first of the remaining columns, second arm considers first
|
|
* two of the remaining columns, and so on.
|
|
*/
|
|
++j;
|
|
if (j - i > current_or_arm)
|
|
{
|
|
/*
|
|
* We must not emit any more arms if the new column that will
|
|
* be considered is unbounded, or this one was.
|
|
*/
|
|
if (!lower_val || !ldatum_next ||
|
|
ldatum_next->kind != PARTITION_RANGE_DATUM_VALUE)
|
|
need_next_lower_arm = false;
|
|
if (!upper_val || !udatum_next ||
|
|
udatum_next->kind != PARTITION_RANGE_DATUM_VALUE)
|
|
need_next_upper_arm = false;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (lower_or_arm_args != NIL)
|
|
lower_or_arms = lappend(lower_or_arms,
|
|
list_length(lower_or_arm_args) > 1
|
|
? makeBoolExpr(AND_EXPR, lower_or_arm_args, -1)
|
|
: linitial(lower_or_arm_args));
|
|
|
|
if (upper_or_arm_args != NIL)
|
|
upper_or_arms = lappend(upper_or_arms,
|
|
list_length(upper_or_arm_args) > 1
|
|
? makeBoolExpr(AND_EXPR, upper_or_arm_args, -1)
|
|
: linitial(upper_or_arm_args));
|
|
|
|
/* If no work to do in the next iteration, break away. */
|
|
if (!need_next_lower_arm && !need_next_upper_arm)
|
|
break;
|
|
|
|
++current_or_arm;
|
|
}
|
|
|
|
/*
|
|
* Generate the OR expressions for each of lower and upper bounds (if
|
|
* required), and append to the list of implicitly ANDed list of
|
|
* expressions.
|
|
*/
|
|
if (lower_or_arms != NIL)
|
|
result = lappend(result,
|
|
list_length(lower_or_arms) > 1
|
|
? makeBoolExpr(OR_EXPR, lower_or_arms, -1)
|
|
: linitial(lower_or_arms));
|
|
if (upper_or_arms != NIL)
|
|
result = lappend(result,
|
|
list_length(upper_or_arms) > 1
|
|
? makeBoolExpr(OR_EXPR, upper_or_arms, -1)
|
|
: linitial(upper_or_arms));
|
|
|
|
/*
|
|
* As noted above, for non-default, we return list with constant TRUE. If
|
|
* the result is NIL during the recursive call for default, it implies
|
|
* this is the only other partition which can hold every value of the key
|
|
* except NULL. Hence we return the NullTest result skipped earlier.
|
|
*/
|
|
if (result == NIL)
|
|
result = for_default
|
|
? get_range_nulltest(key)
|
|
: list_make1(makeBoolConst(true, false));
|
|
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* generate_partition_qual
|
|
*
|
|
* Generate partition predicate from rel's partition bound expression. The
|
|
* function returns a NIL list if there is no predicate.
|
|
*
|
|
* Result expression tree is stored CacheMemoryContext to ensure it survives
|
|
* as long as the relcache entry. But we should be running in a less long-lived
|
|
* working context. To avoid leaking cache memory if this routine fails partway
|
|
* through, we build in working memory and then copy the completed structure
|
|
* into cache memory.
|
|
*/
|
|
static List *
|
|
generate_partition_qual(Relation rel)
|
|
{
|
|
HeapTuple tuple;
|
|
MemoryContext oldcxt;
|
|
Datum boundDatum;
|
|
bool isnull;
|
|
PartitionBoundSpec *bound;
|
|
List *my_qual = NIL,
|
|
*result = NIL;
|
|
Relation parent;
|
|
bool found_whole_row;
|
|
|
|
/* Guard against stack overflow due to overly deep partition tree */
|
|
check_stack_depth();
|
|
|
|
/* Quick copy */
|
|
if (rel->rd_partcheck != NIL)
|
|
return copyObject(rel->rd_partcheck);
|
|
|
|
/* Grab at least an AccessShareLock on the parent table */
|
|
parent = heap_open(get_partition_parent(RelationGetRelid(rel)),
|
|
AccessShareLock);
|
|
|
|
/* Get pg_class.relpartbound */
|
|
tuple = SearchSysCache1(RELOID, RelationGetRelid(rel));
|
|
if (!HeapTupleIsValid(tuple))
|
|
elog(ERROR, "cache lookup failed for relation %u",
|
|
RelationGetRelid(rel));
|
|
|
|
boundDatum = SysCacheGetAttr(RELOID, tuple,
|
|
Anum_pg_class_relpartbound,
|
|
&isnull);
|
|
if (isnull) /* should not happen */
|
|
elog(ERROR, "relation \"%s\" has relpartbound = null",
|
|
RelationGetRelationName(rel));
|
|
bound = castNode(PartitionBoundSpec,
|
|
stringToNode(TextDatumGetCString(boundDatum)));
|
|
ReleaseSysCache(tuple);
|
|
|
|
my_qual = get_qual_from_partbound(rel, parent, bound);
|
|
|
|
/* Add the parent's quals to the list (if any) */
|
|
if (parent->rd_rel->relispartition)
|
|
result = list_concat(generate_partition_qual(parent), my_qual);
|
|
else
|
|
result = my_qual;
|
|
|
|
/*
|
|
* Change Vars to have partition's attnos instead of the parent's. We do
|
|
* this after we concatenate the parent's quals, because we want every Var
|
|
* in it to bear this relation's attnos. It's safe to assume varno = 1
|
|
* here.
|
|
*/
|
|
result = map_partition_varattnos(result, 1, rel, parent,
|
|
&found_whole_row);
|
|
/* There can never be a whole-row reference here */
|
|
if (found_whole_row)
|
|
elog(ERROR, "unexpected whole-row reference found in partition key");
|
|
|
|
/* Save a copy in the relcache */
|
|
oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
|
|
rel->rd_partcheck = copyObject(result);
|
|
MemoryContextSwitchTo(oldcxt);
|
|
|
|
/* Keep the parent locked until commit */
|
|
heap_close(parent, NoLock);
|
|
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* get_partition_for_tuple
|
|
* Finds partition of relation which accepts the partition key specified
|
|
* in values and isnull
|
|
*
|
|
* Return value is index of the partition (>= 0 and < partdesc->nparts) if one
|
|
* found or -1 if none found.
|
|
*/
|
|
int
|
|
get_partition_for_tuple(Relation relation, Datum *values, bool *isnull)
|
|
{
|
|
int bound_offset;
|
|
int part_index = -1;
|
|
PartitionKey key = RelationGetPartitionKey(relation);
|
|
PartitionDesc partdesc = RelationGetPartitionDesc(relation);
|
|
|
|
/* Route as appropriate based on partitioning strategy. */
|
|
switch (key->strategy)
|
|
{
|
|
case PARTITION_STRATEGY_HASH:
|
|
{
|
|
PartitionBoundInfo boundinfo = partdesc->boundinfo;
|
|
int greatest_modulus = get_greatest_modulus(boundinfo);
|
|
uint64 rowHash = compute_hash_value(key, values, isnull);
|
|
|
|
part_index = boundinfo->indexes[rowHash % greatest_modulus];
|
|
}
|
|
break;
|
|
|
|
case PARTITION_STRATEGY_LIST:
|
|
if (isnull[0])
|
|
{
|
|
if (partition_bound_accepts_nulls(partdesc->boundinfo))
|
|
part_index = partdesc->boundinfo->null_index;
|
|
}
|
|
else
|
|
{
|
|
bool equal = false;
|
|
|
|
bound_offset = partition_bound_bsearch(key,
|
|
partdesc->boundinfo,
|
|
values,
|
|
false,
|
|
&equal);
|
|
if (bound_offset >= 0 && equal)
|
|
part_index = partdesc->boundinfo->indexes[bound_offset];
|
|
}
|
|
break;
|
|
|
|
case PARTITION_STRATEGY_RANGE:
|
|
{
|
|
bool equal = false,
|
|
range_partkey_has_null = false;
|
|
int i;
|
|
|
|
/*
|
|
* No range includes NULL, so this will be accepted by the
|
|
* default partition if there is one, and otherwise rejected.
|
|
*/
|
|
for (i = 0; i < key->partnatts; i++)
|
|
{
|
|
if (isnull[i])
|
|
{
|
|
range_partkey_has_null = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!range_partkey_has_null)
|
|
{
|
|
bound_offset = partition_bound_bsearch(key,
|
|
partdesc->boundinfo,
|
|
values,
|
|
false,
|
|
&equal);
|
|
|
|
/*
|
|
* The bound at bound_offset is less than or equal to the
|
|
* tuple value, so the bound at offset+1 is the upper
|
|
* bound of the partition we're looking for, if there
|
|
* actually exists one.
|
|
*/
|
|
part_index = partdesc->boundinfo->indexes[bound_offset + 1];
|
|
}
|
|
}
|
|
break;
|
|
|
|
default:
|
|
elog(ERROR, "unexpected partition strategy: %d",
|
|
(int) key->strategy);
|
|
}
|
|
|
|
/*
|
|
* part_index < 0 means we failed to find a partition of this parent. Use
|
|
* the default partition, if there is one.
|
|
*/
|
|
if (part_index < 0)
|
|
part_index = partdesc->boundinfo->default_index;
|
|
|
|
return part_index;
|
|
}
|
|
|
|
/*
|
|
* Checks if any of the 'attnums' is a partition key attribute for rel
|
|
*
|
|
* Sets *used_in_expr if any of the 'attnums' is found to be referenced in some
|
|
* partition key expression. It's possible for a column to be both used
|
|
* directly and as part of an expression; if that happens, *used_in_expr may
|
|
* end up as either true or false. That's OK for current uses of this
|
|
* function, because *used_in_expr is only used to tailor the error message
|
|
* text.
|
|
*/
|
|
bool
|
|
has_partition_attrs(Relation rel, Bitmapset *attnums,
|
|
bool *used_in_expr)
|
|
{
|
|
PartitionKey key;
|
|
int partnatts;
|
|
List *partexprs;
|
|
ListCell *partexprs_item;
|
|
int i;
|
|
|
|
if (attnums == NULL || rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
|
|
return false;
|
|
|
|
key = RelationGetPartitionKey(rel);
|
|
partnatts = get_partition_natts(key);
|
|
partexprs = get_partition_exprs(key);
|
|
|
|
partexprs_item = list_head(partexprs);
|
|
for (i = 0; i < partnatts; i++)
|
|
{
|
|
AttrNumber partattno = get_partition_col_attnum(key, i);
|
|
|
|
if (partattno != 0)
|
|
{
|
|
if (bms_is_member(partattno - FirstLowInvalidHeapAttributeNumber,
|
|
attnums))
|
|
{
|
|
if (used_in_expr)
|
|
*used_in_expr = false;
|
|
return true;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/* Arbitrary expression */
|
|
Node *expr = (Node *) lfirst(partexprs_item);
|
|
Bitmapset *expr_attrs = NULL;
|
|
|
|
/* Find all attributes referenced */
|
|
pull_varattnos(expr, 1, &expr_attrs);
|
|
partexprs_item = lnext(partexprs_item);
|
|
|
|
if (bms_overlap(attnums, expr_attrs))
|
|
{
|
|
if (used_in_expr)
|
|
*used_in_expr = true;
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* qsort_partition_hbound_cmp
|
|
*
|
|
* We sort hash bounds by modulus, then by remainder.
|
|
*/
|
|
static int32
|
|
qsort_partition_hbound_cmp(const void *a, const void *b)
|
|
{
|
|
PartitionHashBound *h1 = (*(PartitionHashBound *const *) a);
|
|
PartitionHashBound *h2 = (*(PartitionHashBound *const *) b);
|
|
|
|
return partition_hbound_cmp(h1->modulus, h1->remainder,
|
|
h2->modulus, h2->remainder);
|
|
}
|
|
|
|
/*
|
|
* partition_hbound_cmp
|
|
*
|
|
* Compares modulus first, then remainder if modulus are equal.
|
|
*/
|
|
static int32
|
|
partition_hbound_cmp(int modulus1, int remainder1, int modulus2, int remainder2)
|
|
{
|
|
if (modulus1 < modulus2)
|
|
return -1;
|
|
if (modulus1 > modulus2)
|
|
return 1;
|
|
if (modulus1 == modulus2 && remainder1 != remainder2)
|
|
return (remainder1 > remainder2) ? 1 : -1;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* qsort_partition_list_value_cmp
|
|
*
|
|
* Compare two list partition bound datums
|
|
*/
|
|
static int32
|
|
qsort_partition_list_value_cmp(const void *a, const void *b, void *arg)
|
|
{
|
|
Datum val1 = (*(const PartitionListValue **) a)->value,
|
|
val2 = (*(const PartitionListValue **) b)->value;
|
|
PartitionKey key = (PartitionKey) arg;
|
|
|
|
return DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[0],
|
|
key->partcollation[0],
|
|
val1, val2));
|
|
}
|
|
|
|
/*
|
|
* make_one_range_bound
|
|
*
|
|
* Return a PartitionRangeBound given a list of PartitionRangeDatum elements
|
|
* and a flag telling whether the bound is lower or not. Made into a function
|
|
* because there are multiple sites that want to use this facility.
|
|
*/
|
|
static PartitionRangeBound *
|
|
make_one_range_bound(PartitionKey key, int index, List *datums, bool lower)
|
|
{
|
|
PartitionRangeBound *bound;
|
|
ListCell *lc;
|
|
int i;
|
|
|
|
Assert(datums != NIL);
|
|
|
|
bound = (PartitionRangeBound *) palloc0(sizeof(PartitionRangeBound));
|
|
bound->index = index;
|
|
bound->datums = (Datum *) palloc0(key->partnatts * sizeof(Datum));
|
|
bound->kind = (PartitionRangeDatumKind *) palloc0(key->partnatts *
|
|
sizeof(PartitionRangeDatumKind));
|
|
bound->lower = lower;
|
|
|
|
i = 0;
|
|
foreach(lc, datums)
|
|
{
|
|
PartitionRangeDatum *datum = castNode(PartitionRangeDatum, lfirst(lc));
|
|
|
|
/* What's contained in this range datum? */
|
|
bound->kind[i] = datum->kind;
|
|
|
|
if (datum->kind == PARTITION_RANGE_DATUM_VALUE)
|
|
{
|
|
Const *val = castNode(Const, datum->value);
|
|
|
|
if (val->constisnull)
|
|
elog(ERROR, "invalid range bound datum");
|
|
bound->datums[i] = val->constvalue;
|
|
}
|
|
|
|
i++;
|
|
}
|
|
|
|
return bound;
|
|
}
|
|
|
|
/* Used when sorting range bounds across all range partitions */
|
|
static int32
|
|
qsort_partition_rbound_cmp(const void *a, const void *b, void *arg)
|
|
{
|
|
PartitionRangeBound *b1 = (*(PartitionRangeBound *const *) a);
|
|
PartitionRangeBound *b2 = (*(PartitionRangeBound *const *) b);
|
|
PartitionKey key = (PartitionKey) arg;
|
|
|
|
return partition_rbound_cmp(key, b1->datums, b1->kind, b1->lower, b2);
|
|
}
|
|
|
|
/*
|
|
* partition_rbound_cmp
|
|
*
|
|
* Return for two range bounds whether the 1st one (specified in datums1,
|
|
* kind1, and lower1) is <, =, or > the bound specified in *b2.
|
|
*
|
|
* Note that if the values of the two range bounds compare equal, then we take
|
|
* into account whether they are upper or lower bounds, and an upper bound is
|
|
* considered to be smaller than a lower bound. This is important to the way
|
|
* that RelationBuildPartitionDesc() builds the PartitionBoundInfoData
|
|
* structure, which only stores the upper bound of a common boundary between
|
|
* two contiguous partitions.
|
|
*/
|
|
static int32
|
|
partition_rbound_cmp(PartitionKey key,
|
|
Datum *datums1, PartitionRangeDatumKind *kind1,
|
|
bool lower1, PartitionRangeBound *b2)
|
|
{
|
|
int32 cmpval = 0; /* placate compiler */
|
|
int i;
|
|
Datum *datums2 = b2->datums;
|
|
PartitionRangeDatumKind *kind2 = b2->kind;
|
|
bool lower2 = b2->lower;
|
|
|
|
for (i = 0; i < key->partnatts; i++)
|
|
{
|
|
/*
|
|
* First, handle cases where the column is unbounded, which should not
|
|
* invoke the comparison procedure, and should not consider any later
|
|
* columns. Note that the PartitionRangeDatumKind enum elements
|
|
* compare the same way as the values they represent.
|
|
*/
|
|
if (kind1[i] < kind2[i])
|
|
return -1;
|
|
else if (kind1[i] > kind2[i])
|
|
return 1;
|
|
else if (kind1[i] != PARTITION_RANGE_DATUM_VALUE)
|
|
|
|
/*
|
|
* The column bounds are both MINVALUE or both MAXVALUE. No later
|
|
* columns should be considered, but we still need to compare
|
|
* whether they are upper or lower bounds.
|
|
*/
|
|
break;
|
|
|
|
cmpval = DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[i],
|
|
key->partcollation[i],
|
|
datums1[i],
|
|
datums2[i]));
|
|
if (cmpval != 0)
|
|
break;
|
|
}
|
|
|
|
/*
|
|
* If the comparison is anything other than equal, we're done. If they
|
|
* compare equal though, we still have to consider whether the boundaries
|
|
* are inclusive or exclusive. Exclusive one is considered smaller of the
|
|
* two.
|
|
*/
|
|
if (cmpval == 0 && lower1 != lower2)
|
|
cmpval = lower1 ? 1 : -1;
|
|
|
|
return cmpval;
|
|
}
|
|
|
|
/*
|
|
* partition_rbound_datum_cmp
|
|
*
|
|
* Return whether range bound (specified in rb_datums, rb_kind, and rb_lower)
|
|
* is <, =, or > partition key of tuple (tuple_datums)
|
|
*/
|
|
static int32
|
|
partition_rbound_datum_cmp(PartitionKey key,
|
|
Datum *rb_datums, PartitionRangeDatumKind *rb_kind,
|
|
Datum *tuple_datums)
|
|
{
|
|
int i;
|
|
int32 cmpval = -1;
|
|
|
|
for (i = 0; i < key->partnatts; i++)
|
|
{
|
|
if (rb_kind[i] == PARTITION_RANGE_DATUM_MINVALUE)
|
|
return -1;
|
|
else if (rb_kind[i] == PARTITION_RANGE_DATUM_MAXVALUE)
|
|
return 1;
|
|
|
|
cmpval = DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[i],
|
|
key->partcollation[i],
|
|
rb_datums[i],
|
|
tuple_datums[i]));
|
|
if (cmpval != 0)
|
|
break;
|
|
}
|
|
|
|
return cmpval;
|
|
}
|
|
|
|
/*
|
|
* partition_bound_cmp
|
|
*
|
|
* Return whether the bound at offset in boundinfo is <, =, or > the argument
|
|
* specified in *probe.
|
|
*/
|
|
static int32
|
|
partition_bound_cmp(PartitionKey key, PartitionBoundInfo boundinfo,
|
|
int offset, void *probe, bool probe_is_bound)
|
|
{
|
|
Datum *bound_datums = boundinfo->datums[offset];
|
|
int32 cmpval = -1;
|
|
|
|
switch (key->strategy)
|
|
{
|
|
case PARTITION_STRATEGY_HASH:
|
|
{
|
|
PartitionBoundSpec *spec = (PartitionBoundSpec *) probe;
|
|
|
|
cmpval = partition_hbound_cmp(DatumGetInt32(bound_datums[0]),
|
|
DatumGetInt32(bound_datums[1]),
|
|
spec->modulus, spec->remainder);
|
|
break;
|
|
}
|
|
case PARTITION_STRATEGY_LIST:
|
|
cmpval = DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[0],
|
|
key->partcollation[0],
|
|
bound_datums[0],
|
|
*(Datum *) probe));
|
|
break;
|
|
|
|
case PARTITION_STRATEGY_RANGE:
|
|
{
|
|
PartitionRangeDatumKind *kind = boundinfo->kind[offset];
|
|
|
|
if (probe_is_bound)
|
|
{
|
|
/*
|
|
* We need to pass whether the existing bound is a lower
|
|
* bound, so that two equal-valued lower and upper bounds
|
|
* are not regarded equal.
|
|
*/
|
|
bool lower = boundinfo->indexes[offset] < 0;
|
|
|
|
cmpval = partition_rbound_cmp(key,
|
|
bound_datums, kind, lower,
|
|
(PartitionRangeBound *) probe);
|
|
}
|
|
else
|
|
cmpval = partition_rbound_datum_cmp(key,
|
|
bound_datums, kind,
|
|
(Datum *) probe);
|
|
break;
|
|
}
|
|
|
|
default:
|
|
elog(ERROR, "unexpected partition strategy: %d",
|
|
(int) key->strategy);
|
|
}
|
|
|
|
return cmpval;
|
|
}
|
|
|
|
/*
|
|
* Binary search on a collection of partition bounds. Returns greatest
|
|
* bound in array boundinfo->datums which is less than or equal to *probe.
|
|
* If all bounds in the array are greater than *probe, -1 is returned.
|
|
*
|
|
* *probe could either be a partition bound or a Datum array representing
|
|
* the partition key of a tuple being routed; probe_is_bound tells which.
|
|
* We pass that down to the comparison function so that it can interpret the
|
|
* contents of *probe accordingly.
|
|
*
|
|
* *is_equal is set to whether the bound at the returned index is equal with
|
|
* *probe.
|
|
*/
|
|
static int
|
|
partition_bound_bsearch(PartitionKey key, PartitionBoundInfo boundinfo,
|
|
void *probe, bool probe_is_bound, bool *is_equal)
|
|
{
|
|
int lo,
|
|
hi,
|
|
mid;
|
|
|
|
lo = -1;
|
|
hi = boundinfo->ndatums - 1;
|
|
while (lo < hi)
|
|
{
|
|
int32 cmpval;
|
|
|
|
mid = (lo + hi + 1) / 2;
|
|
cmpval = partition_bound_cmp(key, boundinfo, mid, probe,
|
|
probe_is_bound);
|
|
if (cmpval <= 0)
|
|
{
|
|
lo = mid;
|
|
*is_equal = (cmpval == 0);
|
|
|
|
if (*is_equal)
|
|
break;
|
|
}
|
|
else
|
|
hi = mid - 1;
|
|
}
|
|
|
|
return lo;
|
|
}
|
|
|
|
/*
|
|
* get_default_oid_from_partdesc
|
|
*
|
|
* Given a partition descriptor, return the OID of the default partition, if
|
|
* one exists; else, return InvalidOid.
|
|
*/
|
|
Oid
|
|
get_default_oid_from_partdesc(PartitionDesc partdesc)
|
|
{
|
|
if (partdesc && partdesc->boundinfo &&
|
|
partition_bound_has_default(partdesc->boundinfo))
|
|
return partdesc->oids[partdesc->boundinfo->default_index];
|
|
|
|
return InvalidOid;
|
|
}
|
|
|
|
/*
|
|
* get_default_partition_oid
|
|
*
|
|
* Given a relation OID, return the OID of the default partition, if one
|
|
* exists. Use get_default_oid_from_partdesc where possible, for
|
|
* efficiency.
|
|
*/
|
|
Oid
|
|
get_default_partition_oid(Oid parentId)
|
|
{
|
|
HeapTuple tuple;
|
|
Oid defaultPartId = InvalidOid;
|
|
|
|
tuple = SearchSysCache1(PARTRELID, ObjectIdGetDatum(parentId));
|
|
|
|
if (HeapTupleIsValid(tuple))
|
|
{
|
|
Form_pg_partitioned_table part_table_form;
|
|
|
|
part_table_form = (Form_pg_partitioned_table) GETSTRUCT(tuple);
|
|
defaultPartId = part_table_form->partdefid;
|
|
ReleaseSysCache(tuple);
|
|
}
|
|
|
|
return defaultPartId;
|
|
}
|
|
|
|
/*
|
|
* update_default_partition_oid
|
|
*
|
|
* Update pg_partition_table.partdefid with a new default partition OID.
|
|
*/
|
|
void
|
|
update_default_partition_oid(Oid parentId, Oid defaultPartId)
|
|
{
|
|
HeapTuple tuple;
|
|
Relation pg_partitioned_table;
|
|
Form_pg_partitioned_table part_table_form;
|
|
|
|
pg_partitioned_table = heap_open(PartitionedRelationId, RowExclusiveLock);
|
|
|
|
tuple = SearchSysCacheCopy1(PARTRELID, ObjectIdGetDatum(parentId));
|
|
|
|
if (!HeapTupleIsValid(tuple))
|
|
elog(ERROR, "cache lookup failed for partition key of relation %u",
|
|
parentId);
|
|
|
|
part_table_form = (Form_pg_partitioned_table) GETSTRUCT(tuple);
|
|
part_table_form->partdefid = defaultPartId;
|
|
CatalogTupleUpdate(pg_partitioned_table, &tuple->t_self, tuple);
|
|
|
|
heap_freetuple(tuple);
|
|
heap_close(pg_partitioned_table, RowExclusiveLock);
|
|
}
|
|
|
|
/*
|
|
* get_proposed_default_constraint
|
|
*
|
|
* This function returns the negation of new_part_constraints, which
|
|
* would be an integral part of the default partition constraints after
|
|
* addition of the partition to which the new_part_constraints belongs.
|
|
*/
|
|
List *
|
|
get_proposed_default_constraint(List *new_part_constraints)
|
|
{
|
|
Expr *defPartConstraint;
|
|
|
|
defPartConstraint = make_ands_explicit(new_part_constraints);
|
|
|
|
/*
|
|
* Derive the partition constraints of default partition by negating the
|
|
* given partition constraints. The partition constraint never evaluates
|
|
* to NULL, so negating it like this is safe.
|
|
*/
|
|
defPartConstraint = makeBoolExpr(NOT_EXPR,
|
|
list_make1(defPartConstraint),
|
|
-1);
|
|
defPartConstraint =
|
|
(Expr *) eval_const_expressions(NULL,
|
|
(Node *) defPartConstraint);
|
|
defPartConstraint = canonicalize_qual(defPartConstraint);
|
|
|
|
return list_make1(defPartConstraint);
|
|
}
|
|
|
|
/*
|
|
* get_partition_bound_num_indexes
|
|
*
|
|
* Returns the number of the entries in the partition bound indexes array.
|
|
*/
|
|
static int
|
|
get_partition_bound_num_indexes(PartitionBoundInfo bound)
|
|
{
|
|
int num_indexes;
|
|
|
|
Assert(bound);
|
|
|
|
switch (bound->strategy)
|
|
{
|
|
case PARTITION_STRATEGY_HASH:
|
|
|
|
/*
|
|
* The number of the entries in the indexes array is same as the
|
|
* greatest modulus.
|
|
*/
|
|
num_indexes = get_greatest_modulus(bound);
|
|
break;
|
|
|
|
case PARTITION_STRATEGY_LIST:
|
|
num_indexes = bound->ndatums;
|
|
break;
|
|
|
|
case PARTITION_STRATEGY_RANGE:
|
|
/* Range partitioned table has an extra index. */
|
|
num_indexes = bound->ndatums + 1;
|
|
break;
|
|
|
|
default:
|
|
elog(ERROR, "unexpected partition strategy: %d",
|
|
(int) bound->strategy);
|
|
}
|
|
|
|
return num_indexes;
|
|
}
|
|
|
|
/*
|
|
* get_greatest_modulus
|
|
*
|
|
* Returns the greatest modulus of the hash partition bound. The greatest
|
|
* modulus will be at the end of the datums array because hash partitions are
|
|
* arranged in the ascending order of their modulus and remainders.
|
|
*/
|
|
static int
|
|
get_greatest_modulus(PartitionBoundInfo bound)
|
|
{
|
|
Assert(bound && bound->strategy == PARTITION_STRATEGY_HASH);
|
|
Assert(bound->datums && bound->ndatums > 0);
|
|
Assert(DatumGetInt32(bound->datums[bound->ndatums - 1][0]) > 0);
|
|
|
|
return DatumGetInt32(bound->datums[bound->ndatums - 1][0]);
|
|
}
|
|
|
|
/*
|
|
* compute_hash_value
|
|
*
|
|
* Compute the hash value for given not null partition key values.
|
|
*/
|
|
static uint64
|
|
compute_hash_value(PartitionKey key, Datum *values, bool *isnull)
|
|
{
|
|
int i;
|
|
int nkeys = key->partnatts;
|
|
uint64 rowHash = 0;
|
|
Datum seed = UInt64GetDatum(HASH_PARTITION_SEED);
|
|
|
|
for (i = 0; i < nkeys; i++)
|
|
{
|
|
if (!isnull[i])
|
|
{
|
|
Datum hash;
|
|
|
|
Assert(OidIsValid(key->partsupfunc[i].fn_oid));
|
|
|
|
/*
|
|
* Compute hash for each datum value by calling respective
|
|
* datatype-specific hash functions of each partition key
|
|
* attribute.
|
|
*/
|
|
hash = FunctionCall2(&key->partsupfunc[i], values[i], seed);
|
|
|
|
/* Form a single 64-bit hash value */
|
|
rowHash = hash_combine64(rowHash, DatumGetUInt64(hash));
|
|
}
|
|
}
|
|
|
|
return rowHash;
|
|
}
|
|
|
|
/*
|
|
* satisfies_hash_partition
|
|
*
|
|
* This is an SQL-callable function for use in hash partition constraints.
|
|
* The first three arguments are the parent table OID, modulus, and remainder.
|
|
* The remaining arguments are the value of the partitioning columns (or
|
|
* expressions); these are hashed and the results are combined into a single
|
|
* hash value by calling hash_combine64.
|
|
*
|
|
* Returns true if remainder produced when this computed single hash value is
|
|
* divided by the given modulus is equal to given remainder, otherwise false.
|
|
*
|
|
* See get_qual_for_hash() for usage.
|
|
*/
|
|
Datum
|
|
satisfies_hash_partition(PG_FUNCTION_ARGS)
|
|
{
|
|
typedef struct ColumnsHashData
|
|
{
|
|
Oid relid;
|
|
int nkeys;
|
|
Oid variadic_type;
|
|
int16 variadic_typlen;
|
|
bool variadic_typbyval;
|
|
char variadic_typalign;
|
|
FmgrInfo partsupfunc[PARTITION_MAX_KEYS];
|
|
} ColumnsHashData;
|
|
Oid parentId;
|
|
int modulus;
|
|
int remainder;
|
|
Datum seed = UInt64GetDatum(HASH_PARTITION_SEED);
|
|
ColumnsHashData *my_extra;
|
|
uint64 rowHash = 0;
|
|
|
|
/* Return null if the parent OID, modulus, or remainder is NULL. */
|
|
if (PG_ARGISNULL(0) || PG_ARGISNULL(1) || PG_ARGISNULL(2))
|
|
PG_RETURN_NULL();
|
|
parentId = PG_GETARG_OID(0);
|
|
modulus = PG_GETARG_INT32(1);
|
|
remainder = PG_GETARG_INT32(2);
|
|
|
|
/* Sanity check modulus and remainder. */
|
|
if (modulus <= 0)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
errmsg("modulus for hash partition must be a positive integer")));
|
|
if (remainder < 0)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
errmsg("remainder for hash partition must be a non-negative integer")));
|
|
if (remainder >= modulus)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
errmsg("remainder for hash partition must be less than modulus")));
|
|
|
|
/*
|
|
* Cache hash function information.
|
|
*/
|
|
my_extra = (ColumnsHashData *) fcinfo->flinfo->fn_extra;
|
|
if (my_extra == NULL || my_extra->relid != parentId)
|
|
{
|
|
Relation parent;
|
|
PartitionKey key;
|
|
int j;
|
|
|
|
/* Open parent relation and fetch partition keyinfo */
|
|
parent = try_relation_open(parentId, AccessShareLock);
|
|
if (parent == NULL)
|
|
PG_RETURN_NULL();
|
|
key = RelationGetPartitionKey(parent);
|
|
|
|
/* Reject parent table that is not hash-partitioned. */
|
|
if (parent->rd_rel->relkind != RELKIND_PARTITIONED_TABLE ||
|
|
key->strategy != PARTITION_STRATEGY_HASH)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
errmsg("\"%s\" is not a hash partitioned table",
|
|
get_rel_name(parentId))));
|
|
|
|
if (!get_fn_expr_variadic(fcinfo->flinfo))
|
|
{
|
|
int nargs = PG_NARGS() - 3;
|
|
|
|
/* complain if wrong number of column values */
|
|
if (key->partnatts != nargs)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
errmsg("number of partitioning columns (%d) does not match number of partition keys provided (%d)",
|
|
key->partnatts, nargs)));
|
|
|
|
/* allocate space for our cache */
|
|
fcinfo->flinfo->fn_extra =
|
|
MemoryContextAllocZero(fcinfo->flinfo->fn_mcxt,
|
|
offsetof(ColumnsHashData, partsupfunc) +
|
|
sizeof(FmgrInfo) * nargs);
|
|
my_extra = (ColumnsHashData *) fcinfo->flinfo->fn_extra;
|
|
my_extra->relid = parentId;
|
|
my_extra->nkeys = key->partnatts;
|
|
|
|
/* check argument types and save fmgr_infos */
|
|
for (j = 0; j < key->partnatts; ++j)
|
|
{
|
|
Oid argtype = get_fn_expr_argtype(fcinfo->flinfo, j + 3);
|
|
|
|
if (argtype != key->parttypid[j] && !IsBinaryCoercible(argtype, key->parttypid[j]))
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
errmsg("column %d of the partition key has type \"%s\", but supplied value is of type \"%s\"",
|
|
j + 1, format_type_be(key->parttypid[j]), format_type_be(argtype))));
|
|
|
|
fmgr_info_copy(&my_extra->partsupfunc[j],
|
|
&key->partsupfunc[j],
|
|
fcinfo->flinfo->fn_mcxt);
|
|
}
|
|
|
|
}
|
|
else
|
|
{
|
|
ArrayType *variadic_array = PG_GETARG_ARRAYTYPE_P(3);
|
|
|
|
/* allocate space for our cache -- just one FmgrInfo in this case */
|
|
fcinfo->flinfo->fn_extra =
|
|
MemoryContextAllocZero(fcinfo->flinfo->fn_mcxt,
|
|
offsetof(ColumnsHashData, partsupfunc) +
|
|
sizeof(FmgrInfo));
|
|
my_extra = (ColumnsHashData *) fcinfo->flinfo->fn_extra;
|
|
my_extra->relid = parentId;
|
|
my_extra->nkeys = key->partnatts;
|
|
my_extra->variadic_type = ARR_ELEMTYPE(variadic_array);
|
|
get_typlenbyvalalign(my_extra->variadic_type,
|
|
&my_extra->variadic_typlen,
|
|
&my_extra->variadic_typbyval,
|
|
&my_extra->variadic_typalign);
|
|
|
|
/* check argument types */
|
|
for (j = 0; j < key->partnatts; ++j)
|
|
if (key->parttypid[j] != my_extra->variadic_type)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
errmsg("column %d of the partition key has type \"%s\", but supplied value is of type \"%s\"",
|
|
j + 1,
|
|
format_type_be(key->parttypid[j]),
|
|
format_type_be(my_extra->variadic_type))));
|
|
|
|
fmgr_info_copy(&my_extra->partsupfunc[0],
|
|
&key->partsupfunc[0],
|
|
fcinfo->flinfo->fn_mcxt);
|
|
}
|
|
|
|
/* Hold lock until commit */
|
|
relation_close(parent, NoLock);
|
|
}
|
|
|
|
if (!OidIsValid(my_extra->variadic_type))
|
|
{
|
|
int nkeys = my_extra->nkeys;
|
|
int i;
|
|
|
|
/*
|
|
* For a non-variadic call, neither the number of arguments nor their
|
|
* types can change across calls, so avoid the expense of rechecking
|
|
* here.
|
|
*/
|
|
|
|
for (i = 0; i < nkeys; i++)
|
|
{
|
|
Datum hash;
|
|
|
|
/* keys start from fourth argument of function. */
|
|
int argno = i + 3;
|
|
|
|
if (PG_ARGISNULL(argno))
|
|
continue;
|
|
|
|
Assert(OidIsValid(my_extra->partsupfunc[i].fn_oid));
|
|
|
|
hash = FunctionCall2(&my_extra->partsupfunc[i],
|
|
PG_GETARG_DATUM(argno),
|
|
seed);
|
|
|
|
/* Form a single 64-bit hash value */
|
|
rowHash = hash_combine64(rowHash, DatumGetUInt64(hash));
|
|
}
|
|
}
|
|
else
|
|
{
|
|
ArrayType *variadic_array = PG_GETARG_ARRAYTYPE_P(3);
|
|
int i;
|
|
int nelems;
|
|
Datum *datum;
|
|
bool *isnull;
|
|
|
|
deconstruct_array(variadic_array,
|
|
my_extra->variadic_type,
|
|
my_extra->variadic_typlen,
|
|
my_extra->variadic_typbyval,
|
|
my_extra->variadic_typalign,
|
|
&datum, &isnull, &nelems);
|
|
|
|
/* complain if wrong number of column values */
|
|
if (nelems != my_extra->nkeys)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
errmsg("number of partitioning columns (%d) does not match number of partition keys provided (%d)",
|
|
my_extra->nkeys, nelems)));
|
|
|
|
for (i = 0; i < nelems; i++)
|
|
{
|
|
Datum hash;
|
|
|
|
if (isnull[i])
|
|
continue;
|
|
|
|
Assert(OidIsValid(my_extra->partsupfunc[0].fn_oid));
|
|
|
|
hash = FunctionCall2(&my_extra->partsupfunc[0],
|
|
datum[i],
|
|
seed);
|
|
|
|
/* Form a single 64-bit hash value */
|
|
rowHash = hash_combine64(rowHash, DatumGetUInt64(hash));
|
|
}
|
|
}
|
|
|
|
PG_RETURN_BOOL(rowHash % modulus == remainder);
|
|
}
|