mirror of
https://git.postgresql.org/git/postgresql.git
synced 2024-10-03 23:46:53 +02:00
1918 lines
52 KiB
C
1918 lines
52 KiB
C
|
/*-------------------------------------------------------------------------
|
||
|
*
|
||
|
* partition.c
|
||
|
* Partitioning related data structures and functions.
|
||
|
*
|
||
|
* Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
|
||
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||
|
*
|
||
|
*
|
||
|
* IDENTIFICATION
|
||
|
* src/backend/catalog/partition.c
|
||
|
*
|
||
|
*-------------------------------------------------------------------------
|
||
|
*/
|
||
|
|
||
|
#include "postgres.h"
|
||
|
|
||
|
#include "access/heapam.h"
|
||
|
#include "access/htup_details.h"
|
||
|
#include "access/nbtree.h"
|
||
|
#include "access/sysattr.h"
|
||
|
#include "catalog/dependency.h"
|
||
|
#include "catalog/indexing.h"
|
||
|
#include "catalog/objectaddress.h"
|
||
|
#include "catalog/partition.h"
|
||
|
#include "catalog/pg_collation.h"
|
||
|
#include "catalog/pg_inherits.h"
|
||
|
#include "catalog/pg_inherits_fn.h"
|
||
|
#include "catalog/pg_opclass.h"
|
||
|
#include "catalog/pg_type.h"
|
||
|
#include "executor/executor.h"
|
||
|
#include "miscadmin.h"
|
||
|
#include "nodes/makefuncs.h"
|
||
|
#include "nodes/nodeFuncs.h"
|
||
|
#include "nodes/parsenodes.h"
|
||
|
#include "optimizer/clauses.h"
|
||
|
#include "optimizer/planmain.h"
|
||
|
#include "optimizer/var.h"
|
||
|
#include "rewrite/rewriteManip.h"
|
||
|
#include "storage/lmgr.h"
|
||
|
#include "utils/array.h"
|
||
|
#include "utils/builtins.h"
|
||
|
#include "utils/datum.h"
|
||
|
#include "utils/memutils.h"
|
||
|
#include "utils/fmgroids.h"
|
||
|
#include "utils/inval.h"
|
||
|
#include "utils/lsyscache.h"
|
||
|
#include "utils/rel.h"
|
||
|
#include "utils/ruleutils.h"
|
||
|
#include "utils/syscache.h"
|
||
|
|
||
|
/*
|
||
|
* Information about bounds of a partitioned relation
|
||
|
*
|
||
|
* A list partition datum that is known to be NULL is never put into the
|
||
|
* datums array. Instead, it is tracked using has_null and null_index fields.
|
||
|
*
|
||
|
* In the case of range partitioning, ndatums will typically be far less than
|
||
|
* 2 * nparts, because a partition's upper bound and the next partition's lower
|
||
|
* bound are the same in most common cases, and we only store one of them.
|
||
|
*
|
||
|
* In the case of list partitioning, the indexes array stores one entry for
|
||
|
* every datum, which is the index of the partition that accepts a given datum.
|
||
|
* In case of range partitioning, it stores one entry per distinct range
|
||
|
* datum, which is the index of the partition for which a given datum
|
||
|
* is an upper bound.
|
||
|
*/
|
||
|
|
||
|
/* Ternary value to represent what's contained in a range bound datum */
|
||
|
typedef enum RangeDatumContent
|
||
|
{
|
||
|
RANGE_DATUM_FINITE = 0, /* actual datum stored elsewhere */
|
||
|
RANGE_DATUM_NEG_INF, /* negative infinity */
|
||
|
RANGE_DATUM_POS_INF /* positive infinity */
|
||
|
} RangeDatumContent;
|
||
|
|
||
|
typedef struct PartitionBoundInfoData
|
||
|
{
|
||
|
char strategy; /* list or range bounds? */
|
||
|
int ndatums; /* Length of the datums following array */
|
||
|
Datum **datums; /* Array of datum-tuples with key->partnatts
|
||
|
* datums each */
|
||
|
RangeDatumContent **content;/* what's contained in each range bound datum?
|
||
|
* (see the above enum); NULL for list
|
||
|
* partitioned tables */
|
||
|
int *indexes; /* Partition indexes; one entry per member of
|
||
|
* the datums array (plus one if range
|
||
|
* partitioned table) */
|
||
|
bool has_null; /* Is there a null-accepting partition? false
|
||
|
* for range partitioned tables */
|
||
|
int null_index; /* Index of the null-accepting partition; -1
|
||
|
* for range partitioned tables */
|
||
|
} PartitionBoundInfoData;
|
||
|
|
||
|
/*
|
||
|
* When qsort'ing partition bounds after reading from the catalog, each bound
|
||
|
* is represented with one of the following structs.
|
||
|
*/
|
||
|
|
||
|
/* One value coming from some (index'th) list partition */
|
||
|
typedef struct PartitionListValue
|
||
|
{
|
||
|
int index;
|
||
|
Datum value;
|
||
|
} PartitionListValue;
|
||
|
|
||
|
/* One bound of a range partition */
|
||
|
typedef struct PartitionRangeBound
|
||
|
{
|
||
|
int index;
|
||
|
Datum *datums; /* range bound datums */
|
||
|
RangeDatumContent *content; /* what's contained in each datum? */
|
||
|
bool lower; /* this is the lower (vs upper) bound */
|
||
|
} PartitionRangeBound;
|
||
|
|
||
|
static int32 qsort_partition_list_value_cmp(const void *a, const void *b,
|
||
|
void *arg);
|
||
|
static int32 qsort_partition_rbound_cmp(const void *a, const void *b,
|
||
|
void *arg);
|
||
|
|
||
|
static List *get_qual_for_list(PartitionKey key, PartitionBoundSpec *spec);
|
||
|
static List *get_qual_for_range(PartitionKey key, PartitionBoundSpec *spec);
|
||
|
static Oid get_partition_operator(PartitionKey key, int col,
|
||
|
StrategyNumber strategy, bool *need_relabel);
|
||
|
static List *generate_partition_qual(Relation rel, bool recurse);
|
||
|
|
||
|
static PartitionRangeBound *make_one_range_bound(PartitionKey key, int index,
|
||
|
List *datums, bool lower);
|
||
|
static int32 partition_rbound_cmp(PartitionKey key,
|
||
|
Datum *datums1, RangeDatumContent *content1, bool lower1,
|
||
|
PartitionRangeBound *b2);
|
||
|
static int32 partition_rbound_datum_cmp(PartitionKey key,
|
||
|
Datum *rb_datums, RangeDatumContent *rb_content,
|
||
|
Datum *tuple_datums);
|
||
|
|
||
|
static int32 partition_bound_cmp(PartitionKey key,
|
||
|
PartitionBoundInfo boundinfo,
|
||
|
int offset, void *probe, bool probe_is_bound);
|
||
|
static int partition_bound_bsearch(PartitionKey key,
|
||
|
PartitionBoundInfo boundinfo,
|
||
|
void *probe, bool probe_is_bound, bool *is_equal);
|
||
|
|
||
|
/* Support get_partition_for_tuple() */
|
||
|
static void FormPartitionKeyDatum(PartitionDispatch pd,
|
||
|
TupleTableSlot *slot,
|
||
|
EState *estate,
|
||
|
Datum *values,
|
||
|
bool *isnull);
|
||
|
|
||
|
/*
|
||
|
* RelationBuildPartitionDesc
|
||
|
* Form rel's partition descriptor
|
||
|
*
|
||
|
* Not flushed from the cache by RelationClearRelation() unless changed because
|
||
|
* of addition or removal of partition.
|
||
|
*/
|
||
|
void
|
||
|
RelationBuildPartitionDesc(Relation rel)
|
||
|
{
|
||
|
List *inhoids,
|
||
|
*partoids;
|
||
|
Oid *oids = NULL;
|
||
|
List *boundspecs = NIL;
|
||
|
ListCell *cell;
|
||
|
int i,
|
||
|
nparts;
|
||
|
PartitionKey key = RelationGetPartitionKey(rel);
|
||
|
PartitionDesc result;
|
||
|
MemoryContext oldcxt;
|
||
|
|
||
|
int ndatums = 0;
|
||
|
|
||
|
/* List partitioning specific */
|
||
|
PartitionListValue **all_values = NULL;
|
||
|
bool found_null = false;
|
||
|
int null_index = -1;
|
||
|
|
||
|
/* Range partitioning specific */
|
||
|
PartitionRangeBound **rbounds = NULL;
|
||
|
|
||
|
/*
|
||
|
* The following could happen in situations where rel has a pg_class entry
|
||
|
* but not the pg_partitioned_table entry yet.
|
||
|
*/
|
||
|
if (key == NULL)
|
||
|
return;
|
||
|
|
||
|
/* Get partition oids from pg_inherits */
|
||
|
inhoids = find_inheritance_children(RelationGetRelid(rel), NoLock);
|
||
|
|
||
|
/* Collect bound spec nodes in a list */
|
||
|
i = 0;
|
||
|
partoids = NIL;
|
||
|
foreach(cell, inhoids)
|
||
|
{
|
||
|
Oid inhrelid = lfirst_oid(cell);
|
||
|
HeapTuple tuple;
|
||
|
Datum datum;
|
||
|
bool isnull;
|
||
|
Node *boundspec;
|
||
|
|
||
|
tuple = SearchSysCache1(RELOID, inhrelid);
|
||
|
|
||
|
/*
|
||
|
* It is possible that the pg_class tuple of a partition has not been
|
||
|
* updated yet to set its relpartbound field. The only case where
|
||
|
* this happens is when we open the parent relation to check using its
|
||
|
* partition descriptor that a new partition's bound does not overlap
|
||
|
* some existing partition.
|
||
|
*/
|
||
|
if (!((Form_pg_class) GETSTRUCT(tuple))->relispartition)
|
||
|
{
|
||
|
ReleaseSysCache(tuple);
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
datum = SysCacheGetAttr(RELOID, tuple,
|
||
|
Anum_pg_class_relpartbound,
|
||
|
&isnull);
|
||
|
Assert(!isnull);
|
||
|
boundspec = (Node *) stringToNode(TextDatumGetCString(datum));
|
||
|
boundspecs = lappend(boundspecs, boundspec);
|
||
|
partoids = lappend_oid(partoids, inhrelid);
|
||
|
ReleaseSysCache(tuple);
|
||
|
}
|
||
|
|
||
|
nparts = list_length(partoids);
|
||
|
|
||
|
if (nparts > 0)
|
||
|
{
|
||
|
oids = (Oid *) palloc(nparts * sizeof(Oid));
|
||
|
i = 0;
|
||
|
foreach(cell, partoids)
|
||
|
oids[i++] = lfirst_oid(cell);
|
||
|
|
||
|
/* Convert from node to the internal representation */
|
||
|
if (key->strategy == PARTITION_STRATEGY_LIST)
|
||
|
{
|
||
|
List *non_null_values = NIL;
|
||
|
|
||
|
/*
|
||
|
* Create a unified list of non-null values across all partitions.
|
||
|
*/
|
||
|
i = 0;
|
||
|
found_null = false;
|
||
|
null_index = -1;
|
||
|
foreach(cell, boundspecs)
|
||
|
{
|
||
|
ListCell *c;
|
||
|
PartitionBoundSpec *spec = lfirst(cell);
|
||
|
|
||
|
if (spec->strategy != PARTITION_STRATEGY_LIST)
|
||
|
elog(ERROR, "invalid strategy in partition bound spec");
|
||
|
|
||
|
foreach(c, spec->listdatums)
|
||
|
{
|
||
|
Const *val = lfirst(c);
|
||
|
PartitionListValue *list_value = NULL;
|
||
|
|
||
|
if (!val->constisnull)
|
||
|
{
|
||
|
list_value = (PartitionListValue *)
|
||
|
palloc0(sizeof(PartitionListValue));
|
||
|
list_value->index = i;
|
||
|
list_value->value = val->constvalue;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
/*
|
||
|
* Never put a null into the values array, flag
|
||
|
* instead for the code further down below where we
|
||
|
* construct the actual relcache struct.
|
||
|
*/
|
||
|
if (found_null)
|
||
|
elog(ERROR, "found null more than once");
|
||
|
found_null = true;
|
||
|
null_index = i;
|
||
|
}
|
||
|
|
||
|
if (list_value)
|
||
|
non_null_values = lappend(non_null_values,
|
||
|
list_value);
|
||
|
}
|
||
|
|
||
|
i++;
|
||
|
}
|
||
|
|
||
|
ndatums = list_length(non_null_values);
|
||
|
|
||
|
/*
|
||
|
* Collect all list values in one array. Alongside the value, we
|
||
|
* also save the index of partition the value comes from.
|
||
|
*/
|
||
|
all_values = (PartitionListValue **) palloc(ndatums *
|
||
|
sizeof(PartitionListValue *));
|
||
|
i = 0;
|
||
|
foreach(cell, non_null_values)
|
||
|
{
|
||
|
PartitionListValue *src = lfirst(cell);
|
||
|
|
||
|
all_values[i] = (PartitionListValue *)
|
||
|
palloc(sizeof(PartitionListValue));
|
||
|
all_values[i]->value = src->value;
|
||
|
all_values[i]->index = src->index;
|
||
|
i++;
|
||
|
}
|
||
|
|
||
|
qsort_arg(all_values, ndatums, sizeof(PartitionListValue *),
|
||
|
qsort_partition_list_value_cmp, (void *) key);
|
||
|
}
|
||
|
else if (key->strategy == PARTITION_STRATEGY_RANGE)
|
||
|
{
|
||
|
int j,
|
||
|
k;
|
||
|
PartitionRangeBound **all_bounds,
|
||
|
*prev;
|
||
|
bool *distinct_indexes;
|
||
|
|
||
|
all_bounds = (PartitionRangeBound **) palloc0(2 * nparts *
|
||
|
sizeof(PartitionRangeBound *));
|
||
|
distinct_indexes = (bool *) palloc(2 * nparts * sizeof(bool));
|
||
|
|
||
|
/*
|
||
|
* Create a unified list of range bounds across all the
|
||
|
* partitions.
|
||
|
*/
|
||
|
i = j = 0;
|
||
|
foreach(cell, boundspecs)
|
||
|
{
|
||
|
PartitionBoundSpec *spec = lfirst(cell);
|
||
|
PartitionRangeBound *lower,
|
||
|
*upper;
|
||
|
|
||
|
if (spec->strategy != PARTITION_STRATEGY_RANGE)
|
||
|
elog(ERROR, "invalid strategy in partition bound spec");
|
||
|
|
||
|
lower = make_one_range_bound(key, i, spec->lowerdatums,
|
||
|
true);
|
||
|
upper = make_one_range_bound(key, i, spec->upperdatums,
|
||
|
false);
|
||
|
all_bounds[j] = lower;
|
||
|
all_bounds[j + 1] = upper;
|
||
|
j += 2;
|
||
|
i++;
|
||
|
}
|
||
|
Assert(j == 2 * nparts);
|
||
|
|
||
|
/* Sort all the bounds in ascending order */
|
||
|
qsort_arg(all_bounds, 2 * nparts,
|
||
|
sizeof(PartitionRangeBound *),
|
||
|
qsort_partition_rbound_cmp,
|
||
|
(void *) key);
|
||
|
|
||
|
/*
|
||
|
* Count the number of distinct bounds to allocate an array of
|
||
|
* that size.
|
||
|
*/
|
||
|
ndatums = 0;
|
||
|
prev = NULL;
|
||
|
for (i = 0; i < 2 * nparts; i++)
|
||
|
{
|
||
|
PartitionRangeBound *cur = all_bounds[i];
|
||
|
bool is_distinct = false;
|
||
|
int j;
|
||
|
|
||
|
/* Is current bound is distinct from the previous? */
|
||
|
for (j = 0; j < key->partnatts; j++)
|
||
|
{
|
||
|
Datum cmpval;
|
||
|
|
||
|
if (prev == NULL)
|
||
|
{
|
||
|
is_distinct = true;
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* If either of them has infinite element, we can't equate
|
||
|
* them. Even when both are infinite, they'd have
|
||
|
* opposite signs, because only one of cur and prev is a
|
||
|
* lower bound).
|
||
|
*/
|
||
|
if (cur->content[j] != RANGE_DATUM_FINITE ||
|
||
|
prev->content[j] != RANGE_DATUM_FINITE)
|
||
|
{
|
||
|
is_distinct = true;
|
||
|
break;
|
||
|
}
|
||
|
cmpval = FunctionCall2Coll(&key->partsupfunc[j],
|
||
|
key->partcollation[j],
|
||
|
cur->datums[j],
|
||
|
prev->datums[j]);
|
||
|
if (DatumGetInt32(cmpval) != 0)
|
||
|
{
|
||
|
is_distinct = true;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Count the current bound if it is distinct from the previous
|
||
|
* one. Also, store if the index i contains a distinct bound
|
||
|
* that we'd like put in the relcache array.
|
||
|
*/
|
||
|
if (is_distinct)
|
||
|
{
|
||
|
distinct_indexes[i] = true;
|
||
|
ndatums++;
|
||
|
}
|
||
|
else
|
||
|
distinct_indexes[i] = false;
|
||
|
|
||
|
prev = cur;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Finally save them in an array from where they will be copied
|
||
|
* into the relcache.
|
||
|
*/
|
||
|
rbounds = (PartitionRangeBound **) palloc(ndatums *
|
||
|
sizeof(PartitionRangeBound *));
|
||
|
k = 0;
|
||
|
for (i = 0; i < 2 * nparts; i++)
|
||
|
{
|
||
|
if (distinct_indexes[i])
|
||
|
rbounds[k++] = all_bounds[i];
|
||
|
}
|
||
|
Assert(k == ndatums);
|
||
|
}
|
||
|
else
|
||
|
elog(ERROR, "unexpected partition strategy: %d",
|
||
|
(int) key->strategy);
|
||
|
}
|
||
|
|
||
|
/* Now build the actual relcache partition descriptor */
|
||
|
rel->rd_pdcxt = AllocSetContextCreate(CacheMemoryContext,
|
||
|
RelationGetRelationName(rel),
|
||
|
ALLOCSET_DEFAULT_SIZES);
|
||
|
oldcxt = MemoryContextSwitchTo(rel->rd_pdcxt);
|
||
|
|
||
|
result = (PartitionDescData *) palloc0(sizeof(PartitionDescData));
|
||
|
result->nparts = nparts;
|
||
|
if (nparts > 0)
|
||
|
{
|
||
|
PartitionBoundInfo boundinfo;
|
||
|
int *mapping;
|
||
|
int next_index = 0;
|
||
|
|
||
|
result->oids = (Oid *) palloc0(nparts * sizeof(Oid));
|
||
|
|
||
|
boundinfo = (PartitionBoundInfoData *)
|
||
|
palloc0(sizeof(PartitionBoundInfoData));
|
||
|
boundinfo->strategy = key->strategy;
|
||
|
boundinfo->ndatums = ndatums;
|
||
|
boundinfo->datums = (Datum **) palloc0(ndatums * sizeof(Datum *));
|
||
|
|
||
|
/* Initialize mapping array with invalid values */
|
||
|
mapping = (int *) palloc(sizeof(int) * nparts);
|
||
|
for (i = 0; i < nparts; i++)
|
||
|
mapping[i] = -1;
|
||
|
|
||
|
switch (key->strategy)
|
||
|
{
|
||
|
case PARTITION_STRATEGY_LIST:
|
||
|
{
|
||
|
boundinfo->has_null = found_null;
|
||
|
boundinfo->indexes = (int *) palloc(ndatums * sizeof(int));
|
||
|
|
||
|
/*
|
||
|
* Copy values. Indexes of individual values are mapped
|
||
|
* to canonical values so that they match for any two list
|
||
|
* partitioned tables with same number of partitions and
|
||
|
* same lists per partition. One way to canonicalize is
|
||
|
* to assign the index in all_values[] of the smallest
|
||
|
* value of each partition, as the index of all of the
|
||
|
* partition's values.
|
||
|
*/
|
||
|
for (i = 0; i < ndatums; i++)
|
||
|
{
|
||
|
boundinfo->datums[i] = (Datum *) palloc(sizeof(Datum));
|
||
|
boundinfo->datums[i][0] = datumCopy(all_values[i]->value,
|
||
|
key->parttypbyval[0],
|
||
|
key->parttyplen[0]);
|
||
|
|
||
|
/* If the old index has no mapping, assign one */
|
||
|
if (mapping[all_values[i]->index] == -1)
|
||
|
mapping[all_values[i]->index] = next_index++;
|
||
|
|
||
|
boundinfo->indexes[i] = mapping[all_values[i]->index];
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* If null-accepting partition has no mapped index yet,
|
||
|
* assign one. This could happen if such partition
|
||
|
* accepts only null and hence not covered in the above
|
||
|
* loop which only handled non-null values.
|
||
|
*/
|
||
|
if (found_null)
|
||
|
{
|
||
|
Assert(null_index >= 0);
|
||
|
if (mapping[null_index] == -1)
|
||
|
mapping[null_index] = next_index++;
|
||
|
}
|
||
|
|
||
|
/* All partition must now have a valid mapping */
|
||
|
Assert(next_index == nparts);
|
||
|
|
||
|
if (found_null)
|
||
|
boundinfo->null_index = mapping[null_index];
|
||
|
else
|
||
|
boundinfo->null_index = -1;
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
case PARTITION_STRATEGY_RANGE:
|
||
|
{
|
||
|
boundinfo->content = (RangeDatumContent **) palloc(ndatums *
|
||
|
sizeof(RangeDatumContent *));
|
||
|
boundinfo->indexes = (int *) palloc((ndatums + 1) *
|
||
|
sizeof(int));
|
||
|
|
||
|
for (i = 0; i < ndatums; i++)
|
||
|
{
|
||
|
int j;
|
||
|
|
||
|
boundinfo->datums[i] = (Datum *) palloc(key->partnatts *
|
||
|
sizeof(Datum));
|
||
|
boundinfo->content[i] = (RangeDatumContent *)
|
||
|
palloc(key->partnatts *
|
||
|
sizeof(RangeDatumContent));
|
||
|
for (j = 0; j < key->partnatts; j++)
|
||
|
{
|
||
|
if (rbounds[i]->content[j] == RANGE_DATUM_FINITE)
|
||
|
boundinfo->datums[i][j] =
|
||
|
datumCopy(rbounds[i]->datums[j],
|
||
|
key->parttypbyval[j],
|
||
|
key->parttyplen[j]);
|
||
|
/* Remember, we are storing the tri-state value. */
|
||
|
boundinfo->content[i][j] = rbounds[i]->content[j];
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* There is no mapping for invalid indexes.
|
||
|
*
|
||
|
* Any lower bounds in the rbounds array have invalid
|
||
|
* indexes assigned, because the values between the
|
||
|
* previous bound (if there is one) and this (lower)
|
||
|
* bound are not part of the range of any existing
|
||
|
* partition.
|
||
|
*/
|
||
|
if (rbounds[i]->lower)
|
||
|
boundinfo->indexes[i] = -1;
|
||
|
else
|
||
|
{
|
||
|
int orig_index = rbounds[i]->index;
|
||
|
|
||
|
/* If the old index is has no mapping, assign one */
|
||
|
if (mapping[orig_index] == -1)
|
||
|
mapping[orig_index] = next_index++;
|
||
|
|
||
|
boundinfo->indexes[i] = mapping[orig_index];
|
||
|
}
|
||
|
}
|
||
|
boundinfo->indexes[i] = -1;
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
default:
|
||
|
elog(ERROR, "unexpected partition strategy: %d",
|
||
|
(int) key->strategy);
|
||
|
}
|
||
|
|
||
|
result->boundinfo = boundinfo;
|
||
|
|
||
|
/*
|
||
|
* Now assign OIDs from the original array into mapped indexes of the
|
||
|
* result array. Order of OIDs in the former is defined by the
|
||
|
* catalog scan that retrived them, whereas that in the latter is
|
||
|
* defined by canonicalized representation of the list values or the
|
||
|
* range bounds.
|
||
|
*/
|
||
|
for (i = 0; i < nparts; i++)
|
||
|
result->oids[mapping[i]] = oids[i];
|
||
|
pfree(mapping);
|
||
|
}
|
||
|
|
||
|
MemoryContextSwitchTo(oldcxt);
|
||
|
rel->rd_partdesc = result;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Are two partition bound collections logically equal?
|
||
|
*
|
||
|
* Used in the keep logic of relcache.c (ie, in RelationClearRelation()).
|
||
|
* This is also useful when b1 and b2 are bound collections of two separate
|
||
|
* relations, respectively, because PartitionBoundInfo is a canonical
|
||
|
* representation of partition bounds.
|
||
|
*/
|
||
|
bool
|
||
|
partition_bounds_equal(PartitionKey key,
|
||
|
PartitionBoundInfo b1, PartitionBoundInfo b2)
|
||
|
{
|
||
|
int i;
|
||
|
|
||
|
if (b1->strategy != b2->strategy)
|
||
|
return false;
|
||
|
|
||
|
if (b1->ndatums != b2->ndatums)
|
||
|
return false;
|
||
|
|
||
|
if (b1->has_null != b2->has_null)
|
||
|
return false;
|
||
|
|
||
|
if (b1->null_index != b2->null_index)
|
||
|
return false;
|
||
|
|
||
|
for (i = 0; i < b1->ndatums; i++)
|
||
|
{
|
||
|
int j;
|
||
|
|
||
|
for (j = 0; j < key->partnatts; j++)
|
||
|
{
|
||
|
int32 cmpval;
|
||
|
|
||
|
cmpval = DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[j],
|
||
|
key->partcollation[j],
|
||
|
b1->datums[i][j],
|
||
|
b2->datums[i][j]));
|
||
|
if (cmpval != 0)
|
||
|
return false;
|
||
|
|
||
|
/* Range partitions can have infinite datums */
|
||
|
if (b1->content != NULL && b1->content[i][j] != b2->content[i][j])
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
if (b1->indexes[i] != b2->indexes[i])
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
/* There are ndatums+1 indexes in case of range partitions */
|
||
|
if (key->strategy == PARTITION_STRATEGY_RANGE &&
|
||
|
b1->indexes[i] != b2->indexes[i])
|
||
|
return false;
|
||
|
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* check_new_partition_bound
|
||
|
*
|
||
|
* Checks if the new partition's bound overlaps any of the existing partitions
|
||
|
* of parent. Also performs additional checks as necessary per strategy.
|
||
|
*/
|
||
|
void
|
||
|
check_new_partition_bound(char *relname, Relation parent, Node *bound)
|
||
|
{
|
||
|
PartitionBoundSpec *spec = (PartitionBoundSpec *) bound;
|
||
|
PartitionKey key = RelationGetPartitionKey(parent);
|
||
|
PartitionDesc partdesc = RelationGetPartitionDesc(parent);
|
||
|
ParseState *pstate = make_parsestate(NULL);
|
||
|
int with = -1;
|
||
|
bool overlap = false;
|
||
|
|
||
|
switch (key->strategy)
|
||
|
{
|
||
|
case PARTITION_STRATEGY_LIST:
|
||
|
{
|
||
|
Assert(spec->strategy == PARTITION_STRATEGY_LIST);
|
||
|
|
||
|
if (partdesc->nparts > 0)
|
||
|
{
|
||
|
PartitionBoundInfo boundinfo = partdesc->boundinfo;
|
||
|
ListCell *cell;
|
||
|
|
||
|
Assert(boundinfo &&
|
||
|
boundinfo->strategy == PARTITION_STRATEGY_LIST &&
|
||
|
(boundinfo->ndatums > 0 || boundinfo->has_null));
|
||
|
|
||
|
foreach(cell, spec->listdatums)
|
||
|
{
|
||
|
Const *val = lfirst(cell);
|
||
|
|
||
|
if (!val->constisnull)
|
||
|
{
|
||
|
int offset;
|
||
|
bool equal;
|
||
|
|
||
|
offset = partition_bound_bsearch(key, boundinfo,
|
||
|
&val->constvalue,
|
||
|
true, &equal);
|
||
|
if (offset >= 0 && equal)
|
||
|
{
|
||
|
overlap = true;
|
||
|
with = boundinfo->indexes[offset];
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
else if (boundinfo->has_null)
|
||
|
{
|
||
|
overlap = true;
|
||
|
with = boundinfo->null_index;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
case PARTITION_STRATEGY_RANGE:
|
||
|
{
|
||
|
PartitionRangeBound *lower,
|
||
|
*upper;
|
||
|
|
||
|
Assert(spec->strategy == PARTITION_STRATEGY_RANGE);
|
||
|
lower = make_one_range_bound(key, -1, spec->lowerdatums, true);
|
||
|
upper = make_one_range_bound(key, -1, spec->upperdatums, false);
|
||
|
|
||
|
/*
|
||
|
* First check if the resulting range would be empty with
|
||
|
* specified lower and upper bounds
|
||
|
*/
|
||
|
if (partition_rbound_cmp(key, lower->datums, lower->content, true,
|
||
|
upper) >= 0)
|
||
|
ereport(ERROR,
|
||
|
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
|
||
|
errmsg("cannot create range partition with empty range"),
|
||
|
parser_errposition(pstate, spec->location)));
|
||
|
|
||
|
if (partdesc->nparts > 0)
|
||
|
{
|
||
|
PartitionBoundInfo boundinfo = partdesc->boundinfo;
|
||
|
int off1,
|
||
|
off2;
|
||
|
bool equal = false;
|
||
|
|
||
|
Assert(boundinfo && boundinfo->ndatums > 0 &&
|
||
|
boundinfo->strategy == PARTITION_STRATEGY_RANGE);
|
||
|
|
||
|
/*
|
||
|
* Find the greatest index of a range bound that is less
|
||
|
* than or equal with the new lower bound.
|
||
|
*/
|
||
|
off1 = partition_bound_bsearch(key, boundinfo, lower, true,
|
||
|
&equal);
|
||
|
|
||
|
/*
|
||
|
* If equal has been set to true, that means the new lower
|
||
|
* bound is found to be equal with the bound at off1,
|
||
|
* which clearly means an overlap with the partition at
|
||
|
* index off1+1).
|
||
|
*
|
||
|
* Otherwise, check if there is a "gap" that could be
|
||
|
* occupied by the new partition. In case of a gap, the
|
||
|
* new upper bound should not cross past the upper
|
||
|
* boundary of the gap, that is, off2 == off1 should be
|
||
|
* true.
|
||
|
*/
|
||
|
if (!equal && boundinfo->indexes[off1 + 1] < 0)
|
||
|
{
|
||
|
off2 = partition_bound_bsearch(key, boundinfo, upper,
|
||
|
true, &equal);
|
||
|
|
||
|
if (equal || off1 != off2)
|
||
|
{
|
||
|
overlap = true;
|
||
|
with = boundinfo->indexes[off2 + 1];
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
overlap = true;
|
||
|
with = boundinfo->indexes[off1 + 1];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
default:
|
||
|
elog(ERROR, "unexpected partition strategy: %d",
|
||
|
(int) key->strategy);
|
||
|
}
|
||
|
|
||
|
if (overlap)
|
||
|
{
|
||
|
Assert(with >= 0);
|
||
|
ereport(ERROR,
|
||
|
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
|
||
|
errmsg("partition \"%s\" would overlap partition \"%s\"",
|
||
|
relname, get_rel_name(partdesc->oids[with])),
|
||
|
parser_errposition(pstate, spec->location)));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* get_partition_parent
|
||
|
*
|
||
|
* Returns inheritance parent of a partition by scanning pg_inherits
|
||
|
*
|
||
|
* Note: Because this function assumes that the relation whose OID is passed
|
||
|
* as an argument will have precisely one parent, it should only be called
|
||
|
* when it is known that the relation is a partition.
|
||
|
*/
|
||
|
Oid
|
||
|
get_partition_parent(Oid relid)
|
||
|
{
|
||
|
Form_pg_inherits form;
|
||
|
Relation catalogRelation;
|
||
|
SysScanDesc scan;
|
||
|
ScanKeyData key[2];
|
||
|
HeapTuple tuple;
|
||
|
Oid result;
|
||
|
|
||
|
catalogRelation = heap_open(InheritsRelationId, AccessShareLock);
|
||
|
|
||
|
ScanKeyInit(&key[0],
|
||
|
Anum_pg_inherits_inhrelid,
|
||
|
BTEqualStrategyNumber, F_OIDEQ,
|
||
|
ObjectIdGetDatum(relid));
|
||
|
ScanKeyInit(&key[1],
|
||
|
Anum_pg_inherits_inhseqno,
|
||
|
BTEqualStrategyNumber, F_INT4EQ,
|
||
|
Int32GetDatum(1));
|
||
|
|
||
|
scan = systable_beginscan(catalogRelation, InheritsRelidSeqnoIndexId, true,
|
||
|
NULL, 2, key);
|
||
|
|
||
|
tuple = systable_getnext(scan);
|
||
|
Assert(HeapTupleIsValid(tuple));
|
||
|
|
||
|
form = (Form_pg_inherits) GETSTRUCT(tuple);
|
||
|
result = form->inhparent;
|
||
|
|
||
|
systable_endscan(scan);
|
||
|
heap_close(catalogRelation, AccessShareLock);
|
||
|
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* get_qual_from_partbound
|
||
|
* Given a parser node for partition bound, return the list of executable
|
||
|
* expressions as partition constraint
|
||
|
*/
|
||
|
List *
|
||
|
get_qual_from_partbound(Relation rel, Relation parent, Node *bound)
|
||
|
{
|
||
|
PartitionBoundSpec *spec = (PartitionBoundSpec *) bound;
|
||
|
PartitionKey key = RelationGetPartitionKey(parent);
|
||
|
List *my_qual = NIL;
|
||
|
TupleDesc parent_tupdesc = RelationGetDescr(parent);
|
||
|
AttrNumber parent_attno;
|
||
|
AttrNumber *partition_attnos;
|
||
|
bool found_whole_row;
|
||
|
|
||
|
Assert(key != NULL);
|
||
|
|
||
|
switch (key->strategy)
|
||
|
{
|
||
|
case PARTITION_STRATEGY_LIST:
|
||
|
Assert(spec->strategy == PARTITION_STRATEGY_LIST);
|
||
|
my_qual = get_qual_for_list(key, spec);
|
||
|
break;
|
||
|
|
||
|
case PARTITION_STRATEGY_RANGE:
|
||
|
Assert(spec->strategy == PARTITION_STRATEGY_RANGE);
|
||
|
my_qual = get_qual_for_range(key, spec);
|
||
|
break;
|
||
|
|
||
|
default:
|
||
|
elog(ERROR, "unexpected partition strategy: %d",
|
||
|
(int) key->strategy);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Translate vars in the generated expression to have correct attnos. Note
|
||
|
* that the vars in my_qual bear attnos dictated by key which carries
|
||
|
* physical attnos of the parent. We must allow for a case where physical
|
||
|
* attnos of a partition can be different from the parent.
|
||
|
*/
|
||
|
partition_attnos = (AttrNumber *)
|
||
|
palloc0(parent_tupdesc->natts * sizeof(AttrNumber));
|
||
|
for (parent_attno = 1; parent_attno <= parent_tupdesc->natts;
|
||
|
parent_attno++)
|
||
|
{
|
||
|
Form_pg_attribute attribute = parent_tupdesc->attrs[parent_attno - 1];
|
||
|
char *attname = NameStr(attribute->attname);
|
||
|
AttrNumber partition_attno;
|
||
|
|
||
|
if (attribute->attisdropped)
|
||
|
continue;
|
||
|
|
||
|
partition_attno = get_attnum(RelationGetRelid(rel), attname);
|
||
|
partition_attnos[parent_attno - 1] = partition_attno;
|
||
|
}
|
||
|
|
||
|
my_qual = (List *) map_variable_attnos((Node *) my_qual,
|
||
|
1, 0,
|
||
|
partition_attnos,
|
||
|
parent_tupdesc->natts,
|
||
|
&found_whole_row);
|
||
|
/* there can never be a whole-row reference here */
|
||
|
if (found_whole_row)
|
||
|
elog(ERROR, "unexpected whole-row reference found in partition key");
|
||
|
|
||
|
return my_qual;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* RelationGetPartitionQual
|
||
|
*
|
||
|
* Returns a list of partition quals
|
||
|
*/
|
||
|
List *
|
||
|
RelationGetPartitionQual(Relation rel, bool recurse)
|
||
|
{
|
||
|
/* Quick exit */
|
||
|
if (!rel->rd_rel->relispartition)
|
||
|
return NIL;
|
||
|
|
||
|
return generate_partition_qual(rel, recurse);
|
||
|
}
|
||
|
|
||
|
/* Turn an array of OIDs with N elements into a list */
|
||
|
#define OID_ARRAY_TO_LIST(arr, N, list) \
|
||
|
do\
|
||
|
{\
|
||
|
int i;\
|
||
|
for (i = 0; i < (N); i++)\
|
||
|
(list) = lappend_oid((list), (arr)[i]);\
|
||
|
} while(0)
|
||
|
|
||
|
/*
|
||
|
* RelationGetPartitionDispatchInfo
|
||
|
* Returns information necessary to route tuples down a partition tree
|
||
|
*
|
||
|
* All the partitions will be locked with lockmode, unless it is NoLock.
|
||
|
* A list of the OIDs of all the leaf partition of rel is returned in
|
||
|
* *leaf_part_oids.
|
||
|
*/
|
||
|
PartitionDispatch *
|
||
|
RelationGetPartitionDispatchInfo(Relation rel, int lockmode,
|
||
|
int *num_parted, List **leaf_part_oids)
|
||
|
{
|
||
|
PartitionDesc rootpartdesc = RelationGetPartitionDesc(rel);
|
||
|
PartitionDispatchData **pd;
|
||
|
List *all_parts = NIL,
|
||
|
*parted_rels;
|
||
|
ListCell *lc;
|
||
|
int i,
|
||
|
k;
|
||
|
|
||
|
/*
|
||
|
* Lock partitions and make a list of the partitioned ones to prepare
|
||
|
* their PartitionDispatch objects below.
|
||
|
*
|
||
|
* Cannot use find_all_inheritors() here, because then the order of OIDs
|
||
|
* in parted_rels list would be unknown, which does not help, because we
|
||
|
* we assign indexes within individual PartitionDispatch in an order that
|
||
|
* is predetermined (determined by the order of OIDs in individual
|
||
|
* partition descriptors).
|
||
|
*/
|
||
|
*num_parted = 1;
|
||
|
parted_rels = list_make1(rel);
|
||
|
OID_ARRAY_TO_LIST(rootpartdesc->oids, rootpartdesc->nparts, all_parts);
|
||
|
foreach(lc, all_parts)
|
||
|
{
|
||
|
Relation partrel = heap_open(lfirst_oid(lc), lockmode);
|
||
|
PartitionDesc partdesc = RelationGetPartitionDesc(partrel);
|
||
|
|
||
|
/*
|
||
|
* If this partition is a partitioned table, add its children to the
|
||
|
* end of the list, so that they are processed as well.
|
||
|
*/
|
||
|
if (partdesc)
|
||
|
{
|
||
|
(*num_parted)++;
|
||
|
parted_rels = lappend(parted_rels, partrel);
|
||
|
OID_ARRAY_TO_LIST(partdesc->oids, partdesc->nparts, all_parts);
|
||
|
}
|
||
|
else
|
||
|
heap_close(partrel, NoLock);
|
||
|
|
||
|
/*
|
||
|
* We keep the partitioned ones open until we're done using the
|
||
|
* information being collected here (for example, see
|
||
|
* ExecEndModifyTable).
|
||
|
*/
|
||
|
}
|
||
|
|
||
|
/* Generate PartitionDispatch objects for all partitioned tables */
|
||
|
pd = (PartitionDispatchData **) palloc(*num_parted *
|
||
|
sizeof(PartitionDispatchData *));
|
||
|
*leaf_part_oids = NIL;
|
||
|
i = k = 0;
|
||
|
foreach(lc, parted_rels)
|
||
|
{
|
||
|
Relation partrel = lfirst(lc);
|
||
|
PartitionKey partkey = RelationGetPartitionKey(partrel);
|
||
|
PartitionDesc partdesc = RelationGetPartitionDesc(partrel);
|
||
|
int j,
|
||
|
m;
|
||
|
|
||
|
pd[i] = (PartitionDispatch) palloc(sizeof(PartitionDispatchData));
|
||
|
pd[i]->reldesc = partrel;
|
||
|
pd[i]->key = partkey;
|
||
|
pd[i]->keystate = NIL;
|
||
|
pd[i]->partdesc = partdesc;
|
||
|
pd[i]->indexes = (int *) palloc(partdesc->nparts * sizeof(int));
|
||
|
|
||
|
m = 0;
|
||
|
for (j = 0; j < partdesc->nparts; j++)
|
||
|
{
|
||
|
Oid partrelid = partdesc->oids[j];
|
||
|
|
||
|
if (get_rel_relkind(partrelid) != RELKIND_PARTITIONED_TABLE)
|
||
|
{
|
||
|
*leaf_part_oids = lappend_oid(*leaf_part_oids, partrelid);
|
||
|
pd[i]->indexes[j] = k++;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
/*
|
||
|
* We can assign indexes this way because of the way
|
||
|
* parted_rels has been generated.
|
||
|
*/
|
||
|
pd[i]->indexes[j] = -(i + 1 + m);
|
||
|
m++;
|
||
|
}
|
||
|
}
|
||
|
i++;
|
||
|
}
|
||
|
|
||
|
return pd;
|
||
|
}
|
||
|
|
||
|
/* Module-local functions */
|
||
|
|
||
|
/*
|
||
|
* get_qual_for_list
|
||
|
*
|
||
|
* Returns a list of expressions to use as a list partition's constraint.
|
||
|
*/
|
||
|
static List *
|
||
|
get_qual_for_list(PartitionKey key, PartitionBoundSpec *spec)
|
||
|
{
|
||
|
List *result;
|
||
|
ArrayExpr *arr;
|
||
|
ScalarArrayOpExpr *opexpr;
|
||
|
ListCell *cell,
|
||
|
*prev,
|
||
|
*next;
|
||
|
Node *keyCol;
|
||
|
Oid operoid;
|
||
|
bool need_relabel,
|
||
|
list_has_null = false;
|
||
|
NullTest *nulltest1 = NULL,
|
||
|
*nulltest2 = NULL;
|
||
|
|
||
|
/* Left operand is either a simple Var or arbitrary expression */
|
||
|
if (key->partattrs[0] != 0)
|
||
|
keyCol = (Node *) makeVar(1,
|
||
|
key->partattrs[0],
|
||
|
key->parttypid[0],
|
||
|
key->parttypmod[0],
|
||
|
key->parttypcoll[0],
|
||
|
0);
|
||
|
else
|
||
|
keyCol = (Node *) copyObject(linitial(key->partexprs));
|
||
|
|
||
|
/*
|
||
|
* We must remove any NULL value in the list; we handle it separately
|
||
|
* below.
|
||
|
*/
|
||
|
prev = NULL;
|
||
|
for (cell = list_head(spec->listdatums); cell; cell = next)
|
||
|
{
|
||
|
Const *val = (Const *) lfirst(cell);
|
||
|
|
||
|
next = lnext(cell);
|
||
|
|
||
|
if (val->constisnull)
|
||
|
{
|
||
|
list_has_null = true;
|
||
|
spec->listdatums = list_delete_cell(spec->listdatums,
|
||
|
cell, prev);
|
||
|
}
|
||
|
else
|
||
|
prev = cell;
|
||
|
}
|
||
|
|
||
|
if (!list_has_null)
|
||
|
{
|
||
|
/*
|
||
|
* Gin up a col IS NOT NULL test that will be AND'd with other
|
||
|
* expressions
|
||
|
*/
|
||
|
nulltest1 = makeNode(NullTest);
|
||
|
nulltest1->arg = (Expr *) keyCol;
|
||
|
nulltest1->nulltesttype = IS_NOT_NULL;
|
||
|
nulltest1->argisrow = false;
|
||
|
nulltest1->location = -1;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
/*
|
||
|
* Gin up a col IS NULL test that will be OR'd with other expressions
|
||
|
*/
|
||
|
nulltest2 = makeNode(NullTest);
|
||
|
nulltest2->arg = (Expr *) keyCol;
|
||
|
nulltest2->nulltesttype = IS_NULL;
|
||
|
nulltest2->argisrow = false;
|
||
|
nulltest2->location = -1;
|
||
|
}
|
||
|
|
||
|
/* Right operand is an ArrayExpr containing this partition's values */
|
||
|
arr = makeNode(ArrayExpr);
|
||
|
arr->array_typeid = !type_is_array(key->parttypid[0])
|
||
|
? get_array_type(key->parttypid[0])
|
||
|
: key->parttypid[0];
|
||
|
arr->array_collid = key->parttypcoll[0];
|
||
|
arr->element_typeid = key->parttypid[0];
|
||
|
arr->elements = spec->listdatums;
|
||
|
arr->multidims = false;
|
||
|
arr->location = -1;
|
||
|
|
||
|
/* Get the correct btree equality operator */
|
||
|
operoid = get_partition_operator(key, 0, BTEqualStrategyNumber,
|
||
|
&need_relabel);
|
||
|
if (need_relabel || key->partcollation[0] != key->parttypcoll[0])
|
||
|
keyCol = (Node *) makeRelabelType((Expr *) keyCol,
|
||
|
key->partopcintype[0],
|
||
|
-1,
|
||
|
key->partcollation[0],
|
||
|
COERCE_EXPLICIT_CAST);
|
||
|
|
||
|
/* Build leftop = ANY (rightop) */
|
||
|
opexpr = makeNode(ScalarArrayOpExpr);
|
||
|
opexpr->opno = operoid;
|
||
|
opexpr->opfuncid = get_opcode(operoid);
|
||
|
opexpr->useOr = true;
|
||
|
opexpr->inputcollid = key->partcollation[0];
|
||
|
opexpr->args = list_make2(keyCol, arr);
|
||
|
opexpr->location = -1;
|
||
|
|
||
|
if (nulltest1)
|
||
|
result = list_make2(nulltest1, opexpr);
|
||
|
else if (nulltest2)
|
||
|
{
|
||
|
Expr *or;
|
||
|
|
||
|
or = makeBoolExpr(OR_EXPR, list_make2(nulltest2, opexpr), -1);
|
||
|
result = list_make1(or);
|
||
|
}
|
||
|
else
|
||
|
result = list_make1(opexpr);
|
||
|
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* get_qual_for_range
|
||
|
*
|
||
|
* Get a list of OpExpr's to use as a range partition's constraint.
|
||
|
*/
|
||
|
static List *
|
||
|
get_qual_for_range(PartitionKey key, PartitionBoundSpec *spec)
|
||
|
{
|
||
|
List *result = NIL;
|
||
|
ListCell *cell1,
|
||
|
*cell2,
|
||
|
*partexprs_item;
|
||
|
int i;
|
||
|
|
||
|
/*
|
||
|
* Iterate over columns of the key, emitting an OpExpr for each using the
|
||
|
* corresponding lower and upper datums as constant operands.
|
||
|
*/
|
||
|
i = 0;
|
||
|
partexprs_item = list_head(key->partexprs);
|
||
|
forboth(cell1, spec->lowerdatums, cell2, spec->upperdatums)
|
||
|
{
|
||
|
PartitionRangeDatum *ldatum = lfirst(cell1),
|
||
|
*udatum = lfirst(cell2);
|
||
|
Node *keyCol;
|
||
|
Const *lower_val = NULL,
|
||
|
*upper_val = NULL;
|
||
|
EState *estate;
|
||
|
MemoryContext oldcxt;
|
||
|
Expr *test_expr;
|
||
|
ExprState *test_exprstate;
|
||
|
Datum test_result;
|
||
|
bool isNull;
|
||
|
bool need_relabel = false;
|
||
|
Oid operoid;
|
||
|
NullTest *nulltest;
|
||
|
|
||
|
/* Left operand */
|
||
|
if (key->partattrs[i] != 0)
|
||
|
{
|
||
|
keyCol = (Node *) makeVar(1,
|
||
|
key->partattrs[i],
|
||
|
key->parttypid[i],
|
||
|
key->parttypmod[i],
|
||
|
key->parttypcoll[i],
|
||
|
0);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
keyCol = (Node *) copyObject(lfirst(partexprs_item));
|
||
|
partexprs_item = lnext(partexprs_item);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Emit a IS NOT NULL expression for non-Var keys, because whereas
|
||
|
* simple attributes are covered by NOT NULL constraints, expression
|
||
|
* keys are still nullable which is not acceptable in case of range
|
||
|
* partitioning.
|
||
|
*/
|
||
|
if (!IsA(keyCol, Var))
|
||
|
{
|
||
|
nulltest = makeNode(NullTest);
|
||
|
nulltest->arg = (Expr *) keyCol;
|
||
|
nulltest->nulltesttype = IS_NOT_NULL;
|
||
|
nulltest->argisrow = false;
|
||
|
nulltest->location = -1;
|
||
|
result = lappend(result, nulltest);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Stop at this column if either of lower or upper datum is infinite,
|
||
|
* but do emit an OpExpr for the non-infinite datum.
|
||
|
*/
|
||
|
if (!ldatum->infinite)
|
||
|
lower_val = (Const *) ldatum->value;
|
||
|
if (!udatum->infinite)
|
||
|
upper_val = (Const *) udatum->value;
|
||
|
|
||
|
/*
|
||
|
* If lower_val and upper_val are both finite and happen to be equal,
|
||
|
* emit only (keyCol = lower_val) for this column, because all rows in
|
||
|
* this partition could only ever contain this value (ie, lower_val)
|
||
|
* in the current partitioning column. We must consider further
|
||
|
* columns because the above condition does not fully constrain the
|
||
|
* rows of this partition.
|
||
|
*/
|
||
|
if (lower_val && upper_val)
|
||
|
{
|
||
|
/* Get the correct btree equality operator for the test */
|
||
|
operoid = get_partition_operator(key, i, BTEqualStrategyNumber,
|
||
|
&need_relabel);
|
||
|
|
||
|
/* Create the test expression */
|
||
|
estate = CreateExecutorState();
|
||
|
oldcxt = MemoryContextSwitchTo(estate->es_query_cxt);
|
||
|
test_expr = make_opclause(operoid,
|
||
|
BOOLOID,
|
||
|
false,
|
||
|
(Expr *) lower_val,
|
||
|
(Expr *) upper_val,
|
||
|
InvalidOid,
|
||
|
key->partcollation[i]);
|
||
|
fix_opfuncids((Node *) test_expr);
|
||
|
test_exprstate = ExecInitExpr(test_expr, NULL);
|
||
|
test_result = ExecEvalExprSwitchContext(test_exprstate,
|
||
|
GetPerTupleExprContext(estate),
|
||
|
&isNull, NULL);
|
||
|
MemoryContextSwitchTo(oldcxt);
|
||
|
FreeExecutorState(estate);
|
||
|
|
||
|
if (DatumGetBool(test_result))
|
||
|
{
|
||
|
/* This can never be, but it's better to make sure */
|
||
|
if (i == key->partnatts - 1)
|
||
|
elog(ERROR, "invalid range bound specification");
|
||
|
|
||
|
if (need_relabel || key->partcollation[i] != key->parttypcoll[i])
|
||
|
keyCol = (Node *) makeRelabelType((Expr *) keyCol,
|
||
|
key->partopcintype[i],
|
||
|
-1,
|
||
|
key->partcollation[i],
|
||
|
COERCE_EXPLICIT_CAST);
|
||
|
result = lappend(result,
|
||
|
make_opclause(operoid,
|
||
|
BOOLOID,
|
||
|
false,
|
||
|
(Expr *) keyCol,
|
||
|
(Expr *) lower_val,
|
||
|
InvalidOid,
|
||
|
key->partcollation[i]));
|
||
|
|
||
|
/* Go over to consider the next column. */
|
||
|
i++;
|
||
|
continue;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* We can say here that lower_val != upper_val. Emit expressions
|
||
|
* (keyCol >= lower_val) and (keyCol < upper_val), then stop.
|
||
|
*/
|
||
|
if (lower_val)
|
||
|
{
|
||
|
operoid = get_partition_operator(key, i,
|
||
|
BTGreaterEqualStrategyNumber,
|
||
|
&need_relabel);
|
||
|
|
||
|
if (need_relabel || key->partcollation[i] != key->parttypcoll[i])
|
||
|
keyCol = (Node *) makeRelabelType((Expr *) keyCol,
|
||
|
key->partopcintype[i],
|
||
|
-1,
|
||
|
key->partcollation[i],
|
||
|
COERCE_EXPLICIT_CAST);
|
||
|
result = lappend(result,
|
||
|
make_opclause(operoid,
|
||
|
BOOLOID,
|
||
|
false,
|
||
|
(Expr *) keyCol,
|
||
|
(Expr *) lower_val,
|
||
|
InvalidOid,
|
||
|
key->partcollation[i]));
|
||
|
}
|
||
|
|
||
|
if (upper_val)
|
||
|
{
|
||
|
operoid = get_partition_operator(key, i,
|
||
|
BTLessStrategyNumber,
|
||
|
&need_relabel);
|
||
|
|
||
|
if (need_relabel || key->partcollation[i] != key->parttypcoll[i])
|
||
|
keyCol = (Node *) makeRelabelType((Expr *) keyCol,
|
||
|
key->partopcintype[i],
|
||
|
-1,
|
||
|
key->partcollation[i],
|
||
|
COERCE_EXPLICIT_CAST);
|
||
|
|
||
|
result = lappend(result,
|
||
|
make_opclause(operoid,
|
||
|
BOOLOID,
|
||
|
false,
|
||
|
(Expr *) keyCol,
|
||
|
(Expr *) upper_val,
|
||
|
InvalidOid,
|
||
|
key->partcollation[i]));
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* We can stop at this column, because we would not have checked the
|
||
|
* next column when routing a given row into this partition.
|
||
|
*/
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* get_partition_operator
|
||
|
*
|
||
|
* Return oid of the operator of given strategy for a given partition key
|
||
|
* column.
|
||
|
*/
|
||
|
static Oid
|
||
|
get_partition_operator(PartitionKey key, int col, StrategyNumber strategy,
|
||
|
bool *need_relabel)
|
||
|
{
|
||
|
Oid operoid;
|
||
|
|
||
|
/*
|
||
|
* First check if there exists an operator of the given strategy, with
|
||
|
* this column's type as both its lefttype and righttype, in the
|
||
|
* partitioning operator family specified for the column.
|
||
|
*/
|
||
|
operoid = get_opfamily_member(key->partopfamily[col],
|
||
|
key->parttypid[col],
|
||
|
key->parttypid[col],
|
||
|
strategy);
|
||
|
|
||
|
/*
|
||
|
* If one doesn't exist, we must resort to using an operator in the same
|
||
|
* opreator family but with the operator class declared input type. It is
|
||
|
* OK to do so, because the column's type is known to be binary-coercible
|
||
|
* with the operator class input type (otherwise, the operator class in
|
||
|
* question would not have been accepted as the partitioning operator
|
||
|
* class). We must however inform the caller to wrap the non-Const
|
||
|
* expression with a RelabelType node to denote the implicit coercion. It
|
||
|
* ensures that the resulting expression structurally matches similarly
|
||
|
* processed expressions within the optimizer.
|
||
|
*/
|
||
|
if (!OidIsValid(operoid))
|
||
|
{
|
||
|
operoid = get_opfamily_member(key->partopfamily[col],
|
||
|
key->partopcintype[col],
|
||
|
key->partopcintype[col],
|
||
|
strategy);
|
||
|
*need_relabel = true;
|
||
|
}
|
||
|
else
|
||
|
*need_relabel = false;
|
||
|
|
||
|
if (!OidIsValid(operoid))
|
||
|
elog(ERROR, "could not find operator for partitioning");
|
||
|
|
||
|
return operoid;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* generate_partition_qual
|
||
|
*
|
||
|
* Generate partition predicate from rel's partition bound expression
|
||
|
*
|
||
|
* Result expression tree is stored CacheMemoryContext to ensure it survives
|
||
|
* as long as the relcache entry. But we should be running in a less long-lived
|
||
|
* working context. To avoid leaking cache memory if this routine fails partway
|
||
|
* through, we build in working memory and then copy the completed structure
|
||
|
* into cache memory.
|
||
|
*/
|
||
|
static List *
|
||
|
generate_partition_qual(Relation rel, bool recurse)
|
||
|
{
|
||
|
HeapTuple tuple;
|
||
|
MemoryContext oldcxt;
|
||
|
Datum boundDatum;
|
||
|
bool isnull;
|
||
|
Node *bound;
|
||
|
List *my_qual = NIL,
|
||
|
*result = NIL;
|
||
|
Relation parent;
|
||
|
|
||
|
/* Guard against stack overflow due to overly deep partition tree */
|
||
|
check_stack_depth();
|
||
|
|
||
|
/* Grab at least an AccessShareLock on the parent table */
|
||
|
parent = heap_open(get_partition_parent(RelationGetRelid(rel)),
|
||
|
AccessShareLock);
|
||
|
|
||
|
/* Quick copy */
|
||
|
if (rel->rd_partcheck)
|
||
|
{
|
||
|
if (parent->rd_rel->relispartition && recurse)
|
||
|
result = list_concat(generate_partition_qual(parent, true),
|
||
|
copyObject(rel->rd_partcheck));
|
||
|
else
|
||
|
result = copyObject(rel->rd_partcheck);
|
||
|
|
||
|
heap_close(parent, AccessShareLock);
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
/* Get pg_class.relpartbound */
|
||
|
if (!rel->rd_rel->relispartition) /* should not happen */
|
||
|
elog(ERROR, "relation \"%s\" has relispartition = false",
|
||
|
RelationGetRelationName(rel));
|
||
|
tuple = SearchSysCache1(RELOID, RelationGetRelid(rel));
|
||
|
boundDatum = SysCacheGetAttr(RELOID, tuple,
|
||
|
Anum_pg_class_relpartbound,
|
||
|
&isnull);
|
||
|
if (isnull) /* should not happen */
|
||
|
elog(ERROR, "relation \"%s\" has relpartbound = null",
|
||
|
RelationGetRelationName(rel));
|
||
|
bound = stringToNode(TextDatumGetCString(boundDatum));
|
||
|
ReleaseSysCache(tuple);
|
||
|
|
||
|
my_qual = get_qual_from_partbound(rel, parent, bound);
|
||
|
|
||
|
/* If requested, add parent's quals to the list (if any) */
|
||
|
if (parent->rd_rel->relispartition && recurse)
|
||
|
{
|
||
|
List *parent_check;
|
||
|
|
||
|
parent_check = generate_partition_qual(parent, true);
|
||
|
result = list_concat(parent_check, my_qual);
|
||
|
}
|
||
|
else
|
||
|
result = my_qual;
|
||
|
|
||
|
/* Save a copy of my_qual in the relcache */
|
||
|
oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
|
||
|
rel->rd_partcheck = copyObject(my_qual);
|
||
|
MemoryContextSwitchTo(oldcxt);
|
||
|
|
||
|
/* Keep the parent locked until commit */
|
||
|
heap_close(parent, NoLock);
|
||
|
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
/* ----------------
|
||
|
* FormPartitionKeyDatum
|
||
|
* Construct values[] and isnull[] arrays for the partition key
|
||
|
* of a tuple.
|
||
|
*
|
||
|
* pkinfo partition key execution info
|
||
|
* slot Heap tuple from which to extract partition key
|
||
|
* estate executor state for evaluating any partition key
|
||
|
* expressions (must be non-NULL)
|
||
|
* values Array of partition key Datums (output area)
|
||
|
* isnull Array of is-null indicators (output area)
|
||
|
*
|
||
|
* the ecxt_scantuple slot of estate's per-tuple expr context must point to
|
||
|
* the heap tuple passed in.
|
||
|
* ----------------
|
||
|
*/
|
||
|
static void
|
||
|
FormPartitionKeyDatum(PartitionDispatch pd,
|
||
|
TupleTableSlot *slot,
|
||
|
EState *estate,
|
||
|
Datum *values,
|
||
|
bool *isnull)
|
||
|
{
|
||
|
ListCell *partexpr_item;
|
||
|
int i;
|
||
|
|
||
|
if (pd->key->partexprs != NIL && pd->keystate == NIL)
|
||
|
{
|
||
|
/* Check caller has set up context correctly */
|
||
|
Assert(estate != NULL &&
|
||
|
GetPerTupleExprContext(estate)->ecxt_scantuple == slot);
|
||
|
|
||
|
/* First time through, set up expression evaluation state */
|
||
|
pd->keystate = (List *) ExecPrepareExpr((Expr *) pd->key->partexprs,
|
||
|
estate);
|
||
|
}
|
||
|
|
||
|
partexpr_item = list_head(pd->keystate);
|
||
|
for (i = 0; i < pd->key->partnatts; i++)
|
||
|
{
|
||
|
AttrNumber keycol = pd->key->partattrs[i];
|
||
|
Datum datum;
|
||
|
bool isNull;
|
||
|
|
||
|
if (keycol != 0)
|
||
|
{
|
||
|
/* Plain column; get the value directly from the heap tuple */
|
||
|
datum = slot_getattr(slot, keycol, &isNull);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
/* Expression; need to evaluate it */
|
||
|
if (partexpr_item == NULL)
|
||
|
elog(ERROR, "wrong number of partition key expressions");
|
||
|
datum = ExecEvalExprSwitchContext((ExprState *) lfirst(partexpr_item),
|
||
|
GetPerTupleExprContext(estate),
|
||
|
&isNull,
|
||
|
NULL);
|
||
|
partexpr_item = lnext(partexpr_item);
|
||
|
}
|
||
|
values[i] = datum;
|
||
|
isnull[i] = isNull;
|
||
|
}
|
||
|
|
||
|
if (partexpr_item != NULL)
|
||
|
elog(ERROR, "wrong number of partition key expressions");
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* get_partition_for_tuple
|
||
|
* Finds a leaf partition for tuple contained in *slot
|
||
|
*
|
||
|
* Returned value is the sequence number of the leaf partition thus found,
|
||
|
* or -1 if no leaf partition is found for the tuple. *failed_at is set
|
||
|
* to the OID of the partitioned table whose partition was not found in
|
||
|
* the latter case.
|
||
|
*/
|
||
|
int
|
||
|
get_partition_for_tuple(PartitionDispatch * pd,
|
||
|
TupleTableSlot *slot,
|
||
|
EState *estate,
|
||
|
Oid *failed_at)
|
||
|
{
|
||
|
PartitionDispatch parent;
|
||
|
Datum values[PARTITION_MAX_KEYS];
|
||
|
bool isnull[PARTITION_MAX_KEYS];
|
||
|
int cur_offset,
|
||
|
cur_index;
|
||
|
int i;
|
||
|
|
||
|
/* start with the root partitioned table */
|
||
|
parent = pd[0];
|
||
|
while (true)
|
||
|
{
|
||
|
PartitionKey key = parent->key;
|
||
|
PartitionDesc partdesc = parent->partdesc;
|
||
|
|
||
|
/* Quick exit */
|
||
|
if (partdesc->nparts == 0)
|
||
|
{
|
||
|
*failed_at = RelationGetRelid(parent->reldesc);
|
||
|
return -1;
|
||
|
}
|
||
|
|
||
|
/* Extract partition key from tuple */
|
||
|
FormPartitionKeyDatum(parent, slot, estate, values, isnull);
|
||
|
|
||
|
if (key->strategy == PARTITION_STRATEGY_RANGE)
|
||
|
{
|
||
|
/* Disallow nulls in the range partition key of the tuple */
|
||
|
for (i = 0; i < key->partnatts; i++)
|
||
|
if (isnull[i])
|
||
|
ereport(ERROR,
|
||
|
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
|
||
|
errmsg("range partition key of row contains null")));
|
||
|
}
|
||
|
|
||
|
if (partdesc->boundinfo->has_null && isnull[0])
|
||
|
/* Tuple maps to the null-accepting list partition */
|
||
|
cur_index = partdesc->boundinfo->null_index;
|
||
|
else
|
||
|
{
|
||
|
/* Else bsearch in partdesc->boundinfo */
|
||
|
bool equal = false;
|
||
|
|
||
|
cur_offset = partition_bound_bsearch(key, partdesc->boundinfo,
|
||
|
values, false, &equal);
|
||
|
switch (key->strategy)
|
||
|
{
|
||
|
case PARTITION_STRATEGY_LIST:
|
||
|
if (cur_offset >= 0 && equal)
|
||
|
cur_index = partdesc->boundinfo->indexes[cur_offset];
|
||
|
else
|
||
|
cur_index = -1;
|
||
|
break;
|
||
|
|
||
|
case PARTITION_STRATEGY_RANGE:
|
||
|
|
||
|
/*
|
||
|
* Offset returned is such that the bound at offset is
|
||
|
* found to be less or equal with the tuple. So, the bound
|
||
|
* at offset+1 would be the upper bound.
|
||
|
*/
|
||
|
cur_index = partdesc->boundinfo->indexes[cur_offset + 1];
|
||
|
break;
|
||
|
|
||
|
default:
|
||
|
elog(ERROR, "unexpected partition strategy: %d",
|
||
|
(int) key->strategy);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* cur_index < 0 means we failed to find a partition of this parent.
|
||
|
* cur_index >= 0 means we either found the leaf partition, or the
|
||
|
* next parent to find a partition of.
|
||
|
*/
|
||
|
if (cur_index < 0)
|
||
|
{
|
||
|
*failed_at = RelationGetRelid(parent->reldesc);
|
||
|
return -1;
|
||
|
}
|
||
|
else if (parent->indexes[cur_index] < 0)
|
||
|
parent = pd[-parent->indexes[cur_index]];
|
||
|
else
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
return parent->indexes[cur_index];
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* qsort_partition_list_value_cmp
|
||
|
*
|
||
|
* Compare two list partition bound datums
|
||
|
*/
|
||
|
static int32
|
||
|
qsort_partition_list_value_cmp(const void *a, const void *b, void *arg)
|
||
|
{
|
||
|
Datum val1 = (*(const PartitionListValue **) a)->value,
|
||
|
val2 = (*(const PartitionListValue **) b)->value;
|
||
|
PartitionKey key = (PartitionKey) arg;
|
||
|
|
||
|
return DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[0],
|
||
|
key->partcollation[0],
|
||
|
val1, val2));
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* make_one_range_bound
|
||
|
*
|
||
|
* Return a PartitionRangeBound given a list of PartitionRangeDatum elements
|
||
|
* and a flag telling whether the bound is lower or not. Made into a function
|
||
|
* because there are multiple sites that want to use this facility.
|
||
|
*/
|
||
|
static PartitionRangeBound *
|
||
|
make_one_range_bound(PartitionKey key, int index, List *datums, bool lower)
|
||
|
{
|
||
|
PartitionRangeBound *bound;
|
||
|
ListCell *cell;
|
||
|
int i;
|
||
|
|
||
|
bound = (PartitionRangeBound *) palloc0(sizeof(PartitionRangeBound));
|
||
|
bound->index = index;
|
||
|
bound->datums = (Datum *) palloc0(key->partnatts * sizeof(Datum));
|
||
|
bound->content = (RangeDatumContent *) palloc0(key->partnatts *
|
||
|
sizeof(RangeDatumContent));
|
||
|
bound->lower = lower;
|
||
|
|
||
|
i = 0;
|
||
|
foreach(cell, datums)
|
||
|
{
|
||
|
PartitionRangeDatum *datum = lfirst(cell);
|
||
|
|
||
|
/* What's contained in this range datum? */
|
||
|
bound->content[i] = !datum->infinite
|
||
|
? RANGE_DATUM_FINITE
|
||
|
: (lower ? RANGE_DATUM_NEG_INF
|
||
|
: RANGE_DATUM_POS_INF);
|
||
|
|
||
|
if (bound->content[i] == RANGE_DATUM_FINITE)
|
||
|
{
|
||
|
Const *val = (Const *) datum->value;
|
||
|
|
||
|
if (val->constisnull)
|
||
|
elog(ERROR, "invalid range bound datum");
|
||
|
bound->datums[i] = val->constvalue;
|
||
|
}
|
||
|
|
||
|
i++;
|
||
|
}
|
||
|
|
||
|
return bound;
|
||
|
}
|
||
|
|
||
|
/* Used when sorting range bounds across all range partitions */
|
||
|
static int32
|
||
|
qsort_partition_rbound_cmp(const void *a, const void *b, void *arg)
|
||
|
{
|
||
|
PartitionRangeBound *b1 = (*(PartitionRangeBound *const *) a);
|
||
|
PartitionRangeBound *b2 = (*(PartitionRangeBound *const *) b);
|
||
|
PartitionKey key = (PartitionKey) arg;
|
||
|
|
||
|
return partition_rbound_cmp(key, b1->datums, b1->content, b1->lower, b2);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* partition_rbound_cmp
|
||
|
*
|
||
|
* Return for two range bounds whether the 1st one (specified in datum1,
|
||
|
* content1, and lower1) is <=, =, >= the bound specified in *b2
|
||
|
*/
|
||
|
static int32
|
||
|
partition_rbound_cmp(PartitionKey key,
|
||
|
Datum *datums1, RangeDatumContent *content1, bool lower1,
|
||
|
PartitionRangeBound *b2)
|
||
|
{
|
||
|
int32 cmpval;
|
||
|
int i;
|
||
|
Datum *datums2 = b2->datums;
|
||
|
RangeDatumContent *content2 = b2->content;
|
||
|
bool lower2 = b2->lower;
|
||
|
|
||
|
for (i = 0; i < key->partnatts; i++)
|
||
|
{
|
||
|
/*
|
||
|
* First, handle cases involving infinity, which don't require
|
||
|
* invoking the comparison proc.
|
||
|
*/
|
||
|
if (content1[i] != RANGE_DATUM_FINITE &&
|
||
|
content2[i] != RANGE_DATUM_FINITE)
|
||
|
|
||
|
/*
|
||
|
* Both are infinity, so they are equal unless one is negative
|
||
|
* infinity and other positive (or vice versa)
|
||
|
*/
|
||
|
return content1[i] == content2[i] ? 0
|
||
|
: (content1[i] < content2[i] ? -1 : 1);
|
||
|
else if (content1[i] != RANGE_DATUM_FINITE)
|
||
|
return content1[i] == RANGE_DATUM_NEG_INF ? -1 : 1;
|
||
|
else if (content2[i] != RANGE_DATUM_FINITE)
|
||
|
return content2[i] == RANGE_DATUM_NEG_INF ? 1 : -1;
|
||
|
|
||
|
cmpval = DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[i],
|
||
|
key->partcollation[i],
|
||
|
datums1[i],
|
||
|
datums2[i]));
|
||
|
if (cmpval != 0)
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* If the comparison is anything other than equal, we're done. If they
|
||
|
* compare equal though, we still have to consider whether the boundaries
|
||
|
* are inclusive or exclusive. Exclusive one is considered smaller of the
|
||
|
* two.
|
||
|
*/
|
||
|
if (cmpval == 0 && lower1 != lower2)
|
||
|
cmpval = lower1 ? 1 : -1;
|
||
|
|
||
|
return cmpval;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* partition_rbound_datum_cmp
|
||
|
*
|
||
|
* Return whether range bound (specified in rb_datums, rb_content, and
|
||
|
* rb_lower) <=, =, >= partition key of tuple (tuple_datums)
|
||
|
*/
|
||
|
static int32
|
||
|
partition_rbound_datum_cmp(PartitionKey key,
|
||
|
Datum *rb_datums, RangeDatumContent *rb_content,
|
||
|
Datum *tuple_datums)
|
||
|
{
|
||
|
int i;
|
||
|
int32 cmpval = -1;
|
||
|
|
||
|
for (i = 0; i < key->partnatts; i++)
|
||
|
{
|
||
|
if (rb_content[i] != RANGE_DATUM_FINITE)
|
||
|
return rb_content[i] == RANGE_DATUM_NEG_INF ? -1 : 1;
|
||
|
|
||
|
cmpval = DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[i],
|
||
|
key->partcollation[i],
|
||
|
rb_datums[i],
|
||
|
tuple_datums[i]));
|
||
|
if (cmpval != 0)
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
return cmpval;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* partition_bound_cmp
|
||
|
*
|
||
|
* Return whether the bound at offset in boundinfo is <=, =, >= the argument
|
||
|
* specified in *probe.
|
||
|
*/
|
||
|
static int32
|
||
|
partition_bound_cmp(PartitionKey key, PartitionBoundInfo boundinfo,
|
||
|
int offset, void *probe, bool probe_is_bound)
|
||
|
{
|
||
|
Datum *bound_datums = boundinfo->datums[offset];
|
||
|
int32 cmpval = -1;
|
||
|
|
||
|
switch (key->strategy)
|
||
|
{
|
||
|
case PARTITION_STRATEGY_LIST:
|
||
|
cmpval = DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[0],
|
||
|
key->partcollation[0],
|
||
|
bound_datums[0],
|
||
|
*(Datum *) probe));
|
||
|
break;
|
||
|
|
||
|
case PARTITION_STRATEGY_RANGE:
|
||
|
{
|
||
|
RangeDatumContent *content = boundinfo->content[offset];
|
||
|
|
||
|
if (probe_is_bound)
|
||
|
{
|
||
|
/*
|
||
|
* We need to pass whether the existing bound is a lower
|
||
|
* bound, so that two equal-valued lower and upper bounds
|
||
|
* are not regarded equal.
|
||
|
*/
|
||
|
bool lower = boundinfo->indexes[offset] < 0;
|
||
|
|
||
|
cmpval = partition_rbound_cmp(key,
|
||
|
bound_datums, content, lower,
|
||
|
(PartitionRangeBound *) probe);
|
||
|
}
|
||
|
else
|
||
|
cmpval = partition_rbound_datum_cmp(key,
|
||
|
bound_datums, content,
|
||
|
(Datum *) probe);
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
default:
|
||
|
elog(ERROR, "unexpected partition strategy: %d",
|
||
|
(int) key->strategy);
|
||
|
}
|
||
|
|
||
|
return cmpval;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Binary search on a collection of partition bounds. Returns greatest index
|
||
|
* of bound in array boundinfo->datums which is less or equal with *probe.
|
||
|
* If all bounds in the array are greater than *probe, -1 is returned.
|
||
|
*
|
||
|
* *probe could either be a partition bound or a Datum array representing
|
||
|
* the partition key of a tuple being routed; probe_is_bound tells which.
|
||
|
* We pass that down to the comparison function so that it can interpret the
|
||
|
* contents of *probe accordingly.
|
||
|
*
|
||
|
* *is_equal is set to whether the bound at the returned index is equal with
|
||
|
* *probe.
|
||
|
*/
|
||
|
static int
|
||
|
partition_bound_bsearch(PartitionKey key, PartitionBoundInfo boundinfo,
|
||
|
void *probe, bool probe_is_bound, bool *is_equal)
|
||
|
{
|
||
|
int lo,
|
||
|
hi,
|
||
|
mid;
|
||
|
|
||
|
lo = -1;
|
||
|
hi = boundinfo->ndatums - 1;
|
||
|
while (lo < hi)
|
||
|
{
|
||
|
int32 cmpval;
|
||
|
|
||
|
mid = (lo + hi + 1) / 2;
|
||
|
cmpval = partition_bound_cmp(key, boundinfo, mid, probe,
|
||
|
probe_is_bound);
|
||
|
if (cmpval <= 0)
|
||
|
{
|
||
|
lo = mid;
|
||
|
*is_equal = (cmpval == 0);
|
||
|
}
|
||
|
else
|
||
|
hi = mid - 1;
|
||
|
}
|
||
|
|
||
|
return lo;
|
||
|
}
|