3147 lines
86 KiB
C
3147 lines
86 KiB
C
/*
|
|
* brin_minmax_multi.c
|
|
* Implementation of Multi Min/Max opclass for BRIN
|
|
*
|
|
* Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
*
|
|
* Implements a variant of minmax opclass, where the summary is composed of
|
|
* multiple smaller intervals. This allows us to handle outliers, which
|
|
* usually make the simple minmax opclass inefficient.
|
|
*
|
|
* Consider for example page range with simple minmax interval [1000,2000],
|
|
* and assume a new row gets inserted into the range with value 1000000.
|
|
* Due to that the interval gets [1000,1000000]. I.e. the minmax interval
|
|
* got 1000x wider and won't be useful to eliminate scan keys between 2001
|
|
* and 1000000.
|
|
*
|
|
* With minmax-multi opclass, we may have [1000,2000] interval initially,
|
|
* but after adding the new row we start tracking it as two interval:
|
|
*
|
|
* [1000,2000] and [1000000,1000000]
|
|
*
|
|
* This allows us to still eliminate the page range when the scan keys hit
|
|
* the gap between 2000 and 1000000, making it useful in cases when the
|
|
* simple minmax opclass gets inefficient.
|
|
*
|
|
* The number of intervals tracked per page range is somewhat flexible.
|
|
* What is restricted is the number of values per page range, and the limit
|
|
* is currently 32 (see values_per_range reloption). Collapsed intervals
|
|
* (with equal minimum and maximum value) are stored as a single value,
|
|
* while regular intervals require two values.
|
|
*
|
|
* When the number of values gets too high (by adding new values to the
|
|
* summary), we merge some of the intervals to free space for more values.
|
|
* This is done in a greedy way - we simply pick the two closest intervals,
|
|
* merge them, and repeat this until the number of values to store gets
|
|
* sufficiently low (below 50% of maximum values), but that is mostly
|
|
* arbitrary threshold and may be changed easily).
|
|
*
|
|
* To pick the closest intervals we use the "distance" support procedure,
|
|
* which measures space between two ranges (i.e. the length of an interval).
|
|
* The computed value may be an approximation - in the worst case we will
|
|
* merge two ranges that are slightly less optimal at that step, but the
|
|
* index should still produce correct results.
|
|
*
|
|
* The compactions (reducing the number of values) is fairly expensive, as
|
|
* it requires calling the distance functions, sorting etc. So when building
|
|
* the summary, we use a significantly larger buffer, and only enforce the
|
|
* exact limit at the very end. This improves performance, and it also helps
|
|
* with building better ranges (due to the greedy approach).
|
|
*
|
|
*
|
|
* IDENTIFICATION
|
|
* src/backend/access/brin/brin_minmax_multi.c
|
|
*/
|
|
#include "postgres.h"
|
|
|
|
/* needed for PGSQL_AF_INET */
|
|
#include <sys/socket.h>
|
|
|
|
#include "access/brin.h"
|
|
#include "access/brin_internal.h"
|
|
#include "access/brin_tuple.h"
|
|
#include "access/genam.h"
|
|
#include "access/htup_details.h"
|
|
#include "access/reloptions.h"
|
|
#include "access/stratnum.h"
|
|
#include "catalog/pg_am.h"
|
|
#include "catalog/pg_amop.h"
|
|
#include "catalog/pg_type.h"
|
|
#include "utils/array.h"
|
|
#include "utils/builtins.h"
|
|
#include "utils/date.h"
|
|
#include "utils/datum.h"
|
|
#include "utils/float.h"
|
|
#include "utils/inet.h"
|
|
#include "utils/lsyscache.h"
|
|
#include "utils/memutils.h"
|
|
#include "utils/pg_lsn.h"
|
|
#include "utils/rel.h"
|
|
#include "utils/syscache.h"
|
|
#include "utils/timestamp.h"
|
|
#include "utils/uuid.h"
|
|
|
|
/*
|
|
* Additional SQL level support functions
|
|
*
|
|
* Procedure numbers must not use values reserved for BRIN itself; see
|
|
* brin_internal.h.
|
|
*/
|
|
#define MINMAX_MAX_PROCNUMS 1 /* maximum support procs we need */
|
|
#define PROCNUM_DISTANCE 11 /* required, distance between values */
|
|
|
|
/*
|
|
* Subtract this from procnum to obtain index in MinmaxMultiOpaque arrays
|
|
* (Must be equal to minimum of private procnums).
|
|
*/
|
|
#define PROCNUM_BASE 11
|
|
|
|
/*
|
|
* Sizing the insert buffer - we use 10x the number of values specified
|
|
* in the reloption, but we cap it to 8192 not to get too large. When
|
|
* the buffer gets full, we reduce the number of values by half.
|
|
*/
|
|
#define MINMAX_BUFFER_FACTOR 10
|
|
#define MINMAX_BUFFER_MIN 256
|
|
#define MINMAX_BUFFER_MAX 8192
|
|
#define MINMAX_BUFFER_LOAD_FACTOR 0.5
|
|
|
|
typedef struct MinmaxMultiOpaque
|
|
{
|
|
FmgrInfo extra_procinfos[MINMAX_MAX_PROCNUMS];
|
|
bool extra_proc_missing[MINMAX_MAX_PROCNUMS];
|
|
Oid cached_subtype;
|
|
FmgrInfo strategy_procinfos[BTMaxStrategyNumber];
|
|
} MinmaxMultiOpaque;
|
|
|
|
/*
|
|
* Storage type for BRIN's minmax reloptions
|
|
*/
|
|
typedef struct MinMaxMultiOptions
|
|
{
|
|
int32 vl_len_; /* varlena header (do not touch directly!) */
|
|
int valuesPerRange; /* number of values per range */
|
|
} MinMaxMultiOptions;
|
|
|
|
#define MINMAX_MULTI_DEFAULT_VALUES_PER_PAGE 32
|
|
|
|
#define MinMaxMultiGetValuesPerRange(opts) \
|
|
((opts) && (((MinMaxMultiOptions *) (opts))->valuesPerRange != 0) ? \
|
|
((MinMaxMultiOptions *) (opts))->valuesPerRange : \
|
|
MINMAX_MULTI_DEFAULT_VALUES_PER_PAGE)
|
|
|
|
#define SAMESIGN(a,b) (((a) < 0) == ((b) < 0))
|
|
|
|
/*
|
|
* The summary of minmax-multi indexes has two representations - Ranges for
|
|
* convenient processing, and SerializedRanges for storage in bytea value.
|
|
*
|
|
* The Ranges struct stores the boundary values in a single array, but we
|
|
* treat regular and single-point ranges differently to save space. For
|
|
* regular ranges (with different boundary values) we have to store both
|
|
* the lower and upper bound of the range, while for "single-point ranges"
|
|
* we only need to store a single value.
|
|
*
|
|
* The 'values' array stores boundary values for regular ranges first (there
|
|
* are 2*nranges values to store), and then the nvalues boundary values for
|
|
* single-point ranges. That is, we have (2*nranges + nvalues) boundary
|
|
* values in the array.
|
|
*
|
|
* +-------------------------+----------------------------------+
|
|
* | ranges (2 * nranges of) | single point values (nvalues of) |
|
|
* +-------------------------+----------------------------------+
|
|
*
|
|
* This allows us to quickly add new values, and store outliers without
|
|
* having to widen any of the existing range values.
|
|
*
|
|
* 'nsorted' denotes how many of 'nvalues' in the values[] array are sorted.
|
|
* When nsorted == nvalues, all single point values are sorted.
|
|
*
|
|
* We never store more than maxvalues values (as set by values_per_range
|
|
* reloption). If needed we merge some of the ranges.
|
|
*
|
|
* To minimize palloc overhead, we always allocate the full array with
|
|
* space for maxvalues elements. This should be fine as long as the
|
|
* maxvalues is reasonably small (64 seems fine), which is the case
|
|
* thanks to values_per_range reloption being limited to 256.
|
|
*/
|
|
typedef struct Ranges
|
|
{
|
|
/* Cache information that we need quite often. */
|
|
Oid typid;
|
|
Oid colloid;
|
|
AttrNumber attno;
|
|
FmgrInfo *cmp;
|
|
|
|
/* (2*nranges + nvalues) <= maxvalues */
|
|
int nranges; /* number of ranges in the values[] array */
|
|
int nsorted; /* number of nvalues which are sorted */
|
|
int nvalues; /* number of point values in values[] array */
|
|
int maxvalues; /* number of elements in the values[] array */
|
|
|
|
/*
|
|
* We simply add the values into a large buffer, without any expensive
|
|
* steps (sorting, deduplication, ...). The buffer is a multiple of the
|
|
* target number of values, so the compaction happens less often,
|
|
* amortizing the costs. We keep the actual target and compact to the
|
|
* requested number of values at the very end, before serializing to
|
|
* on-disk representation.
|
|
*/
|
|
/* requested number of values */
|
|
int target_maxvalues;
|
|
|
|
/* values stored for this range - either raw values, or ranges */
|
|
Datum values[FLEXIBLE_ARRAY_MEMBER];
|
|
} Ranges;
|
|
|
|
/*
|
|
* On-disk the summary is stored as a bytea value, with a simple header
|
|
* with basic metadata, followed by the boundary values. It has a varlena
|
|
* header, so can be treated as varlena directly.
|
|
*
|
|
* See brin_range_serialize/brin_range_deserialize for serialization details.
|
|
*/
|
|
typedef struct SerializedRanges
|
|
{
|
|
/* varlena header (do not touch directly!) */
|
|
int32 vl_len_;
|
|
|
|
/* type of values stored in the data array */
|
|
Oid typid;
|
|
|
|
/* (2*nranges + nvalues) <= maxvalues */
|
|
int nranges; /* number of ranges in the array (stored) */
|
|
int nvalues; /* number of values in the data array (all) */
|
|
int maxvalues; /* maximum number of values (reloption) */
|
|
|
|
/* contains the actual data */
|
|
char data[FLEXIBLE_ARRAY_MEMBER];
|
|
} SerializedRanges;
|
|
|
|
static SerializedRanges *brin_range_serialize(Ranges *range);
|
|
|
|
static Ranges *brin_range_deserialize(int maxvalues,
|
|
SerializedRanges *serialized);
|
|
|
|
|
|
/*
|
|
* Used to represent ranges expanded to make merging and combining easier.
|
|
*
|
|
* Each expanded range is essentially an interval, represented by min/max
|
|
* values, along with a flag whether it's a collapsed range (in which case
|
|
* the min and max values are equal). We have the flag to handle by-ref
|
|
* data types - we can't simply compare the datums, and this saves some
|
|
* calls to the type-specific comparator function.
|
|
*/
|
|
typedef struct ExpandedRange
|
|
{
|
|
Datum minval; /* lower boundary */
|
|
Datum maxval; /* upper boundary */
|
|
bool collapsed; /* true if minval==maxval */
|
|
} ExpandedRange;
|
|
|
|
/*
|
|
* Represents a distance between two ranges (identified by index into
|
|
* an array of extended ranges).
|
|
*/
|
|
typedef struct DistanceValue
|
|
{
|
|
int index;
|
|
double value;
|
|
} DistanceValue;
|
|
|
|
|
|
/* Cache for support and strategy procedures. */
|
|
|
|
static FmgrInfo *minmax_multi_get_procinfo(BrinDesc *bdesc, uint16 attno,
|
|
uint16 procnum);
|
|
|
|
static FmgrInfo *minmax_multi_get_strategy_procinfo(BrinDesc *bdesc,
|
|
uint16 attno, Oid subtype,
|
|
uint16 strategynum);
|
|
|
|
typedef struct compare_context
|
|
{
|
|
FmgrInfo *cmpFn;
|
|
Oid colloid;
|
|
} compare_context;
|
|
|
|
static int compare_values(const void *a, const void *b, void *arg);
|
|
|
|
|
|
#ifdef USE_ASSERT_CHECKING
|
|
/*
|
|
* Check that the order of the array values is correct, using the cmp
|
|
* function (which should be BTLessStrategyNumber).
|
|
*/
|
|
static void
|
|
AssertArrayOrder(FmgrInfo *cmp, Oid colloid, Datum *values, int nvalues)
|
|
{
|
|
int i;
|
|
Datum lt;
|
|
|
|
for (i = 0; i < (nvalues - 1); i++)
|
|
{
|
|
lt = FunctionCall2Coll(cmp, colloid, values[i], values[i + 1]);
|
|
Assert(DatumGetBool(lt));
|
|
}
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
* Comprehensive check of the Ranges structure.
|
|
*/
|
|
static void
|
|
AssertCheckRanges(Ranges *ranges, FmgrInfo *cmpFn, Oid colloid)
|
|
{
|
|
#ifdef USE_ASSERT_CHECKING
|
|
int i;
|
|
|
|
/* some basic sanity checks */
|
|
Assert(ranges->nranges >= 0);
|
|
Assert(ranges->nsorted >= 0);
|
|
Assert(ranges->nvalues >= ranges->nsorted);
|
|
Assert(ranges->maxvalues >= 2 * ranges->nranges + ranges->nvalues);
|
|
Assert(ranges->typid != InvalidOid);
|
|
|
|
/*
|
|
* First the ranges - there are 2*nranges boundary values, and the values
|
|
* have to be strictly ordered (equal values would mean the range is
|
|
* collapsed, and should be stored as a point). This also guarantees that
|
|
* the ranges do not overlap.
|
|
*/
|
|
AssertArrayOrder(cmpFn, colloid, ranges->values, 2 * ranges->nranges);
|
|
|
|
/* then the single-point ranges (with nvalues boundary values ) */
|
|
AssertArrayOrder(cmpFn, colloid, &ranges->values[2 * ranges->nranges],
|
|
ranges->nsorted);
|
|
|
|
/*
|
|
* Check that none of the values are not covered by ranges (both sorted
|
|
* and unsorted)
|
|
*/
|
|
if (ranges->nranges > 0)
|
|
{
|
|
for (i = 0; i < ranges->nvalues; i++)
|
|
{
|
|
Datum compar;
|
|
int start,
|
|
end;
|
|
Datum minvalue = ranges->values[0];
|
|
Datum maxvalue = ranges->values[2 * ranges->nranges - 1];
|
|
Datum value = ranges->values[2 * ranges->nranges + i];
|
|
|
|
compar = FunctionCall2Coll(cmpFn, colloid, value, minvalue);
|
|
|
|
/*
|
|
* If the value is smaller than the lower bound in the first range
|
|
* then it cannot possibly be in any of the ranges.
|
|
*/
|
|
if (DatumGetBool(compar))
|
|
continue;
|
|
|
|
compar = FunctionCall2Coll(cmpFn, colloid, maxvalue, value);
|
|
|
|
/*
|
|
* Likewise, if the value is larger than the upper bound of the
|
|
* final range, then it cannot possibly be inside any of the
|
|
* ranges.
|
|
*/
|
|
if (DatumGetBool(compar))
|
|
continue;
|
|
|
|
/* bsearch the ranges to see if 'value' fits within any of them */
|
|
start = 0; /* first range */
|
|
end = ranges->nranges - 1; /* last range */
|
|
while (true)
|
|
{
|
|
int midpoint = (start + end) / 2;
|
|
|
|
/* this means we ran out of ranges in the last step */
|
|
if (start > end)
|
|
break;
|
|
|
|
/* copy the min/max values from the ranges */
|
|
minvalue = ranges->values[2 * midpoint];
|
|
maxvalue = ranges->values[2 * midpoint + 1];
|
|
|
|
/*
|
|
* Is the value smaller than the minval? If yes, we'll recurse
|
|
* to the left side of range array.
|
|
*/
|
|
compar = FunctionCall2Coll(cmpFn, colloid, value, minvalue);
|
|
|
|
/* smaller than the smallest value in this range */
|
|
if (DatumGetBool(compar))
|
|
{
|
|
end = (midpoint - 1);
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
* Is the value greater than the minval? If yes, we'll recurse
|
|
* to the right side of range array.
|
|
*/
|
|
compar = FunctionCall2Coll(cmpFn, colloid, maxvalue, value);
|
|
|
|
/* larger than the largest value in this range */
|
|
if (DatumGetBool(compar))
|
|
{
|
|
start = (midpoint + 1);
|
|
continue;
|
|
}
|
|
|
|
/* hey, we found a matching range */
|
|
Assert(false);
|
|
}
|
|
}
|
|
}
|
|
|
|
/* and values in the unsorted part must not be in the sorted part */
|
|
if (ranges->nsorted > 0)
|
|
{
|
|
compare_context cxt;
|
|
|
|
cxt.colloid = ranges->colloid;
|
|
cxt.cmpFn = ranges->cmp;
|
|
|
|
for (i = ranges->nsorted; i < ranges->nvalues; i++)
|
|
{
|
|
Datum value = ranges->values[2 * ranges->nranges + i];
|
|
|
|
Assert(bsearch_arg(&value, &ranges->values[2 * ranges->nranges],
|
|
ranges->nsorted, sizeof(Datum),
|
|
compare_values, (void *) &cxt) == NULL);
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
* Check that the expanded ranges (built when reducing the number of ranges
|
|
* by combining some of them) are correctly sorted and do not overlap.
|
|
*/
|
|
static void
|
|
AssertCheckExpandedRanges(BrinDesc *bdesc, Oid colloid, AttrNumber attno,
|
|
Form_pg_attribute attr, ExpandedRange *ranges,
|
|
int nranges)
|
|
{
|
|
#ifdef USE_ASSERT_CHECKING
|
|
int i;
|
|
FmgrInfo *eq;
|
|
FmgrInfo *lt;
|
|
|
|
eq = minmax_multi_get_strategy_procinfo(bdesc, attno, attr->atttypid,
|
|
BTEqualStrategyNumber);
|
|
|
|
lt = minmax_multi_get_strategy_procinfo(bdesc, attno, attr->atttypid,
|
|
BTLessStrategyNumber);
|
|
|
|
/*
|
|
* Each range independently should be valid, i.e. that for the boundary
|
|
* values (lower <= upper).
|
|
*/
|
|
for (i = 0; i < nranges; i++)
|
|
{
|
|
Datum r;
|
|
Datum minval = ranges[i].minval;
|
|
Datum maxval = ranges[i].maxval;
|
|
|
|
if (ranges[i].collapsed) /* collapsed: minval == maxval */
|
|
r = FunctionCall2Coll(eq, colloid, minval, maxval);
|
|
else /* non-collapsed: minval < maxval */
|
|
r = FunctionCall2Coll(lt, colloid, minval, maxval);
|
|
|
|
Assert(DatumGetBool(r));
|
|
}
|
|
|
|
/*
|
|
* And the ranges should be ordered and must not overlap, i.e. upper <
|
|
* lower for boundaries of consecutive ranges.
|
|
*/
|
|
for (i = 0; i < nranges - 1; i++)
|
|
{
|
|
Datum r;
|
|
Datum maxval = ranges[i].maxval;
|
|
Datum minval = ranges[i + 1].minval;
|
|
|
|
r = FunctionCall2Coll(lt, colloid, maxval, minval);
|
|
|
|
Assert(DatumGetBool(r));
|
|
}
|
|
#endif
|
|
}
|
|
|
|
|
|
/*
|
|
* minmax_multi_init
|
|
* Initialize the deserialized range list, allocate all the memory.
|
|
*
|
|
* This is only in-memory representation of the ranges, so we allocate
|
|
* enough space for the maximum number of values (so as not to have to do
|
|
* repallocs as the ranges grow).
|
|
*/
|
|
static Ranges *
|
|
minmax_multi_init(int maxvalues)
|
|
{
|
|
Size len;
|
|
Ranges *ranges;
|
|
|
|
Assert(maxvalues > 0);
|
|
|
|
len = offsetof(Ranges, values); /* fixed header */
|
|
len += maxvalues * sizeof(Datum); /* Datum values */
|
|
|
|
ranges = (Ranges *) palloc0(len);
|
|
|
|
ranges->maxvalues = maxvalues;
|
|
|
|
return ranges;
|
|
}
|
|
|
|
|
|
/*
|
|
* range_deduplicate_values
|
|
* Deduplicate the part with values in the simple points.
|
|
*
|
|
* This is meant to be a cheaper way of reducing the size of the ranges. It
|
|
* does not touch the ranges, and only sorts the other values - it does not
|
|
* call the distance functions, which may be quite expensive, etc.
|
|
*
|
|
* We do know the values are not duplicate with the ranges, because we check
|
|
* that before adding a new value. Same for the sorted part of values.
|
|
*/
|
|
static void
|
|
range_deduplicate_values(Ranges *range)
|
|
{
|
|
int i,
|
|
n;
|
|
int start;
|
|
compare_context cxt;
|
|
|
|
/*
|
|
* If there are no unsorted values, we're done (this probably can't
|
|
* happen, as we're adding values to unsorted part).
|
|
*/
|
|
if (range->nsorted == range->nvalues)
|
|
return;
|
|
|
|
/* sort the values */
|
|
cxt.colloid = range->colloid;
|
|
cxt.cmpFn = range->cmp;
|
|
|
|
/* the values start right after the ranges (which are always sorted) */
|
|
start = 2 * range->nranges;
|
|
|
|
/*
|
|
* XXX This might do a merge sort, to leverage that the first part of the
|
|
* array is already sorted. If the sorted part is large, it might be quite
|
|
* a bit faster.
|
|
*/
|
|
qsort_arg(&range->values[start],
|
|
range->nvalues, sizeof(Datum),
|
|
compare_values, &cxt);
|
|
|
|
n = 1;
|
|
for (i = 1; i < range->nvalues; i++)
|
|
{
|
|
/* same as preceding value, so store it */
|
|
if (compare_values(&range->values[start + i - 1],
|
|
&range->values[start + i],
|
|
(void *) &cxt) == 0)
|
|
continue;
|
|
|
|
range->values[start + n] = range->values[start + i];
|
|
|
|
n++;
|
|
}
|
|
|
|
/* now all the values are sorted */
|
|
range->nvalues = n;
|
|
range->nsorted = n;
|
|
|
|
AssertCheckRanges(range, range->cmp, range->colloid);
|
|
}
|
|
|
|
|
|
/*
|
|
* brin_range_serialize
|
|
* Serialize the in-memory representation into a compact varlena value.
|
|
*
|
|
* Simply copy the header and then also the individual values, as stored
|
|
* in the in-memory value array.
|
|
*/
|
|
static SerializedRanges *
|
|
brin_range_serialize(Ranges *range)
|
|
{
|
|
Size len;
|
|
int nvalues;
|
|
SerializedRanges *serialized;
|
|
Oid typid;
|
|
int typlen;
|
|
bool typbyval;
|
|
|
|
char *ptr;
|
|
|
|
/* simple sanity checks */
|
|
Assert(range->nranges >= 0);
|
|
Assert(range->nsorted >= 0);
|
|
Assert(range->nvalues >= 0);
|
|
Assert(range->maxvalues > 0);
|
|
Assert(range->target_maxvalues > 0);
|
|
|
|
/* at this point the range should be compacted to the target size */
|
|
Assert(2 * range->nranges + range->nvalues <= range->target_maxvalues);
|
|
|
|
Assert(range->target_maxvalues <= range->maxvalues);
|
|
|
|
/* range boundaries are always sorted */
|
|
Assert(range->nvalues >= range->nsorted);
|
|
|
|
/* deduplicate values, if there's unsorted part */
|
|
range_deduplicate_values(range);
|
|
|
|
/* see how many Datum values we actually have */
|
|
nvalues = 2 * range->nranges + range->nvalues;
|
|
|
|
typid = range->typid;
|
|
typbyval = get_typbyval(typid);
|
|
typlen = get_typlen(typid);
|
|
|
|
/* header is always needed */
|
|
len = offsetof(SerializedRanges, data);
|
|
|
|
/*
|
|
* The space needed depends on data type - for fixed-length data types
|
|
* (by-value and some by-reference) it's pretty simple, just multiply
|
|
* (attlen * nvalues) and we're done. For variable-length by-reference
|
|
* types we need to actually walk all the values and sum the lengths.
|
|
*/
|
|
if (typlen == -1) /* varlena */
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < nvalues; i++)
|
|
{
|
|
len += VARSIZE_ANY(range->values[i]);
|
|
}
|
|
}
|
|
else if (typlen == -2) /* cstring */
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < nvalues; i++)
|
|
{
|
|
/* don't forget to include the null terminator ;-) */
|
|
len += strlen(DatumGetCString(range->values[i])) + 1;
|
|
}
|
|
}
|
|
else /* fixed-length types (even by-reference) */
|
|
{
|
|
Assert(typlen > 0);
|
|
len += nvalues * typlen;
|
|
}
|
|
|
|
/*
|
|
* Allocate the serialized object, copy the basic information. The
|
|
* serialized object is a varlena, so update the header.
|
|
*/
|
|
serialized = (SerializedRanges *) palloc0(len);
|
|
SET_VARSIZE(serialized, len);
|
|
|
|
serialized->typid = typid;
|
|
serialized->nranges = range->nranges;
|
|
serialized->nvalues = range->nvalues;
|
|
serialized->maxvalues = range->target_maxvalues;
|
|
|
|
/*
|
|
* And now copy also the boundary values (like the length calculation this
|
|
* depends on the particular data type).
|
|
*/
|
|
ptr = serialized->data; /* start of the serialized data */
|
|
|
|
for (int i = 0; i < nvalues; i++)
|
|
{
|
|
if (typbyval) /* simple by-value data types */
|
|
{
|
|
Datum tmp;
|
|
|
|
/*
|
|
* For byval types, we need to copy just the significant bytes -
|
|
* we can't use memcpy directly, as that assumes little-endian
|
|
* behavior. store_att_byval does almost what we need, but it
|
|
* requires a properly aligned buffer - the output buffer does not
|
|
* guarantee that. So we simply use a local Datum variable (which
|
|
* guarantees proper alignment), and then copy the value from it.
|
|
*/
|
|
store_att_byval(&tmp, range->values[i], typlen);
|
|
|
|
memcpy(ptr, &tmp, typlen);
|
|
ptr += typlen;
|
|
}
|
|
else if (typlen > 0) /* fixed-length by-ref types */
|
|
{
|
|
memcpy(ptr, DatumGetPointer(range->values[i]), typlen);
|
|
ptr += typlen;
|
|
}
|
|
else if (typlen == -1) /* varlena */
|
|
{
|
|
int tmp = VARSIZE_ANY(DatumGetPointer(range->values[i]));
|
|
|
|
memcpy(ptr, DatumGetPointer(range->values[i]), tmp);
|
|
ptr += tmp;
|
|
}
|
|
else if (typlen == -2) /* cstring */
|
|
{
|
|
int tmp = strlen(DatumGetCString(range->values[i])) + 1;
|
|
|
|
memcpy(ptr, DatumGetCString(range->values[i]), tmp);
|
|
ptr += tmp;
|
|
}
|
|
|
|
/* make sure we haven't overflown the buffer end */
|
|
Assert(ptr <= ((char *) serialized + len));
|
|
}
|
|
|
|
/* exact size */
|
|
Assert(ptr == ((char *) serialized + len));
|
|
|
|
return serialized;
|
|
}
|
|
|
|
/*
|
|
* brin_range_deserialize
|
|
* Serialize the in-memory representation into a compact varlena value.
|
|
*
|
|
* Simply copy the header and then also the individual values, as stored
|
|
* in the in-memory value array.
|
|
*/
|
|
static Ranges *
|
|
brin_range_deserialize(int maxvalues, SerializedRanges *serialized)
|
|
{
|
|
int i,
|
|
nvalues;
|
|
char *ptr,
|
|
*dataptr;
|
|
bool typbyval;
|
|
int typlen;
|
|
Size datalen;
|
|
|
|
Ranges *range;
|
|
|
|
Assert(serialized->nranges >= 0);
|
|
Assert(serialized->nvalues >= 0);
|
|
Assert(serialized->maxvalues > 0);
|
|
|
|
nvalues = 2 * serialized->nranges + serialized->nvalues;
|
|
|
|
Assert(nvalues <= serialized->maxvalues);
|
|
Assert(serialized->maxvalues <= maxvalues);
|
|
|
|
range = minmax_multi_init(maxvalues);
|
|
|
|
/* copy the header info */
|
|
range->nranges = serialized->nranges;
|
|
range->nvalues = serialized->nvalues;
|
|
range->nsorted = serialized->nvalues;
|
|
range->maxvalues = maxvalues;
|
|
range->target_maxvalues = serialized->maxvalues;
|
|
|
|
range->typid = serialized->typid;
|
|
|
|
typbyval = get_typbyval(serialized->typid);
|
|
typlen = get_typlen(serialized->typid);
|
|
|
|
/*
|
|
* And now deconstruct the values into Datum array. We have to copy the
|
|
* data because the serialized representation ignores alignment, and we
|
|
* don't want to rely on it being kept around anyway.
|
|
*/
|
|
ptr = serialized->data;
|
|
|
|
/*
|
|
* We don't want to allocate many pieces, so we just allocate everything
|
|
* in one chunk. How much space will we need?
|
|
*
|
|
* XXX We don't need to copy simple by-value data types.
|
|
*/
|
|
datalen = 0;
|
|
dataptr = NULL;
|
|
for (i = 0; (i < nvalues) && (!typbyval); i++)
|
|
{
|
|
if (typlen > 0) /* fixed-length by-ref types */
|
|
datalen += MAXALIGN(typlen);
|
|
else if (typlen == -1) /* varlena */
|
|
{
|
|
datalen += MAXALIGN(VARSIZE_ANY(ptr));
|
|
ptr += VARSIZE_ANY(ptr);
|
|
}
|
|
else if (typlen == -2) /* cstring */
|
|
{
|
|
Size slen = strlen(ptr) + 1;
|
|
|
|
datalen += MAXALIGN(slen);
|
|
ptr += slen;
|
|
}
|
|
}
|
|
|
|
if (datalen > 0)
|
|
dataptr = palloc(datalen);
|
|
|
|
/*
|
|
* Restore the source pointer (might have been modified when calculating
|
|
* the space we need to allocate).
|
|
*/
|
|
ptr = serialized->data;
|
|
|
|
for (i = 0; i < nvalues; i++)
|
|
{
|
|
if (typbyval) /* simple by-value data types */
|
|
{
|
|
Datum v = 0;
|
|
|
|
memcpy(&v, ptr, typlen);
|
|
|
|
range->values[i] = fetch_att(&v, true, typlen);
|
|
ptr += typlen;
|
|
}
|
|
else if (typlen > 0) /* fixed-length by-ref types */
|
|
{
|
|
range->values[i] = PointerGetDatum(dataptr);
|
|
|
|
memcpy(dataptr, ptr, typlen);
|
|
dataptr += MAXALIGN(typlen);
|
|
|
|
ptr += typlen;
|
|
}
|
|
else if (typlen == -1) /* varlena */
|
|
{
|
|
range->values[i] = PointerGetDatum(dataptr);
|
|
|
|
memcpy(dataptr, ptr, VARSIZE_ANY(ptr));
|
|
dataptr += MAXALIGN(VARSIZE_ANY(ptr));
|
|
ptr += VARSIZE_ANY(ptr);
|
|
}
|
|
else if (typlen == -2) /* cstring */
|
|
{
|
|
Size slen = strlen(ptr) + 1;
|
|
|
|
range->values[i] = PointerGetDatum(dataptr);
|
|
|
|
memcpy(dataptr, ptr, slen);
|
|
dataptr += MAXALIGN(slen);
|
|
ptr += slen;
|
|
}
|
|
|
|
/* make sure we haven't overflown the buffer end */
|
|
Assert(ptr <= ((char *) serialized + VARSIZE_ANY(serialized)));
|
|
}
|
|
|
|
/* should have consumed the whole input value exactly */
|
|
Assert(ptr == ((char *) serialized + VARSIZE_ANY(serialized)));
|
|
|
|
/* return the deserialized value */
|
|
return range;
|
|
}
|
|
|
|
/*
|
|
* compare_expanded_ranges
|
|
* Compare the expanded ranges - first by minimum, then by maximum.
|
|
*
|
|
* We do guarantee that ranges in a single Ranges object do not overlap, so it
|
|
* may seem strange that we don't order just by minimum. But when merging two
|
|
* Ranges (which happens in the union function), the ranges may in fact
|
|
* overlap. So we do compare both.
|
|
*/
|
|
static int
|
|
compare_expanded_ranges(const void *a, const void *b, void *arg)
|
|
{
|
|
ExpandedRange *ra = (ExpandedRange *) a;
|
|
ExpandedRange *rb = (ExpandedRange *) b;
|
|
Datum r;
|
|
|
|
compare_context *cxt = (compare_context *) arg;
|
|
|
|
/* first compare minvals */
|
|
r = FunctionCall2Coll(cxt->cmpFn, cxt->colloid, ra->minval, rb->minval);
|
|
|
|
if (DatumGetBool(r))
|
|
return -1;
|
|
|
|
r = FunctionCall2Coll(cxt->cmpFn, cxt->colloid, rb->minval, ra->minval);
|
|
|
|
if (DatumGetBool(r))
|
|
return 1;
|
|
|
|
/* then compare maxvals */
|
|
r = FunctionCall2Coll(cxt->cmpFn, cxt->colloid, ra->maxval, rb->maxval);
|
|
|
|
if (DatumGetBool(r))
|
|
return -1;
|
|
|
|
r = FunctionCall2Coll(cxt->cmpFn, cxt->colloid, rb->maxval, ra->maxval);
|
|
|
|
if (DatumGetBool(r))
|
|
return 1;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* compare_values
|
|
* Compare the values.
|
|
*/
|
|
static int
|
|
compare_values(const void *a, const void *b, void *arg)
|
|
{
|
|
Datum *da = (Datum *) a;
|
|
Datum *db = (Datum *) b;
|
|
Datum r;
|
|
|
|
compare_context *cxt = (compare_context *) arg;
|
|
|
|
r = FunctionCall2Coll(cxt->cmpFn, cxt->colloid, *da, *db);
|
|
|
|
if (DatumGetBool(r))
|
|
return -1;
|
|
|
|
r = FunctionCall2Coll(cxt->cmpFn, cxt->colloid, *db, *da);
|
|
|
|
if (DatumGetBool(r))
|
|
return 1;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Check if the new value matches one of the existing ranges.
|
|
*/
|
|
static bool
|
|
has_matching_range(BrinDesc *bdesc, Oid colloid, Ranges *ranges,
|
|
Datum newval, AttrNumber attno, Oid typid)
|
|
{
|
|
Datum compar;
|
|
|
|
Datum minvalue;
|
|
Datum maxvalue;
|
|
|
|
FmgrInfo *cmpLessFn;
|
|
FmgrInfo *cmpGreaterFn;
|
|
|
|
/* binary search on ranges */
|
|
int start,
|
|
end;
|
|
|
|
if (ranges->nranges == 0)
|
|
return false;
|
|
|
|
minvalue = ranges->values[0];
|
|
maxvalue = ranges->values[2 * ranges->nranges - 1];
|
|
|
|
/*
|
|
* Otherwise, need to compare the new value with boundaries of all the
|
|
* ranges. First check if it's less than the absolute minimum, which is
|
|
* the first value in the array.
|
|
*/
|
|
cmpLessFn = minmax_multi_get_strategy_procinfo(bdesc, attno, typid,
|
|
BTLessStrategyNumber);
|
|
compar = FunctionCall2Coll(cmpLessFn, colloid, newval, minvalue);
|
|
|
|
/* smaller than the smallest value in the range list */
|
|
if (DatumGetBool(compar))
|
|
return false;
|
|
|
|
/*
|
|
* And now compare it to the existing maximum (last value in the data
|
|
* array). But only if we haven't already ruled out a possible match in
|
|
* the minvalue check.
|
|
*/
|
|
cmpGreaterFn = minmax_multi_get_strategy_procinfo(bdesc, attno, typid,
|
|
BTGreaterStrategyNumber);
|
|
compar = FunctionCall2Coll(cmpGreaterFn, colloid, newval, maxvalue);
|
|
|
|
if (DatumGetBool(compar))
|
|
return false;
|
|
|
|
/*
|
|
* So we know it's in the general min/max, the question is whether it
|
|
* falls in one of the ranges or gaps. We'll do a binary search on
|
|
* individual ranges - for each range we check equality (value falls into
|
|
* the range), and then check ranges either above or below the current
|
|
* range.
|
|
*/
|
|
start = 0; /* first range */
|
|
end = (ranges->nranges - 1); /* last range */
|
|
while (true)
|
|
{
|
|
int midpoint = (start + end) / 2;
|
|
|
|
/* this means we ran out of ranges in the last step */
|
|
if (start > end)
|
|
return false;
|
|
|
|
/* copy the min/max values from the ranges */
|
|
minvalue = ranges->values[2 * midpoint];
|
|
maxvalue = ranges->values[2 * midpoint + 1];
|
|
|
|
/*
|
|
* Is the value smaller than the minval? If yes, we'll recurse to the
|
|
* left side of range array.
|
|
*/
|
|
compar = FunctionCall2Coll(cmpLessFn, colloid, newval, minvalue);
|
|
|
|
/* smaller than the smallest value in this range */
|
|
if (DatumGetBool(compar))
|
|
{
|
|
end = (midpoint - 1);
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
* Is the value greater than the minval? If yes, we'll recurse to the
|
|
* right side of range array.
|
|
*/
|
|
compar = FunctionCall2Coll(cmpGreaterFn, colloid, newval, maxvalue);
|
|
|
|
/* larger than the largest value in this range */
|
|
if (DatumGetBool(compar))
|
|
{
|
|
start = (midpoint + 1);
|
|
continue;
|
|
}
|
|
|
|
/* hey, we found a matching range */
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
|
|
/*
|
|
* range_contains_value
|
|
* See if the new value is already contained in the range list.
|
|
*
|
|
* We first inspect the list of intervals. We use a small trick - we check
|
|
* the value against min/max of the whole range (min of the first interval,
|
|
* max of the last one) first, and only inspect the individual intervals if
|
|
* this passes.
|
|
*
|
|
* If the value matches none of the intervals, we check the exact values.
|
|
* We simply loop through them and invoke equality operator on them.
|
|
*
|
|
* The last parameter (full) determines whether we need to search all the
|
|
* values, including the unsorted part. With full=false, the unsorted part
|
|
* is not searched, which may produce false negatives and duplicate values
|
|
* (in the unsorted part only), but when we're building the range that's
|
|
* fine - we'll deduplicate before serialization, and it can only happen
|
|
* if there already are unsorted values (so it was already modified).
|
|
*
|
|
* Serialized ranges don't have any unsorted values, so this can't cause
|
|
* false negatives during querying.
|
|
*/
|
|
static bool
|
|
range_contains_value(BrinDesc *bdesc, Oid colloid,
|
|
AttrNumber attno, Form_pg_attribute attr,
|
|
Ranges *ranges, Datum newval, bool full)
|
|
{
|
|
int i;
|
|
FmgrInfo *cmpEqualFn;
|
|
Oid typid = attr->atttypid;
|
|
|
|
/*
|
|
* First inspect the ranges, if there are any. We first check the whole
|
|
* range, and only when there's still a chance of getting a match we
|
|
* inspect the individual ranges.
|
|
*/
|
|
if (has_matching_range(bdesc, colloid, ranges, newval, attno, typid))
|
|
return true;
|
|
|
|
cmpEqualFn = minmax_multi_get_strategy_procinfo(bdesc, attno, typid,
|
|
BTEqualStrategyNumber);
|
|
|
|
/*
|
|
* There is no matching range, so let's inspect the sorted values.
|
|
*
|
|
* We do a sequential search for small numbers of values, and binary
|
|
* search once we have more than 16 values. This threshold is somewhat
|
|
* arbitrary, as it depends on how expensive the comparison function is.
|
|
*
|
|
* XXX If we use the threshold here, maybe we should do the same thing in
|
|
* has_matching_range? Or maybe we should do the bin search all the time?
|
|
*
|
|
* XXX We could use the same optimization as for ranges, to check if the
|
|
* value is between min/max, to maybe rule out all sorted values without
|
|
* having to inspect all of them.
|
|
*/
|
|
if (ranges->nsorted >= 16)
|
|
{
|
|
compare_context cxt;
|
|
|
|
cxt.colloid = ranges->colloid;
|
|
cxt.cmpFn = ranges->cmp;
|
|
|
|
if (bsearch_arg(&newval, &ranges->values[2 * ranges->nranges],
|
|
ranges->nsorted, sizeof(Datum),
|
|
compare_values, (void *) &cxt) != NULL)
|
|
return true;
|
|
}
|
|
else
|
|
{
|
|
for (i = 2 * ranges->nranges; i < 2 * ranges->nranges + ranges->nsorted; i++)
|
|
{
|
|
Datum compar;
|
|
|
|
compar = FunctionCall2Coll(cmpEqualFn, colloid, newval, ranges->values[i]);
|
|
|
|
/* found an exact match */
|
|
if (DatumGetBool(compar))
|
|
return true;
|
|
}
|
|
}
|
|
|
|
/* If not asked to inspect the unsorted part, we're done. */
|
|
if (!full)
|
|
return false;
|
|
|
|
/* Inspect the unsorted part. */
|
|
for (i = 2 * ranges->nranges + ranges->nsorted; i < 2 * ranges->nranges + ranges->nvalues; i++)
|
|
{
|
|
Datum compar;
|
|
|
|
compar = FunctionCall2Coll(cmpEqualFn, colloid, newval, ranges->values[i]);
|
|
|
|
/* found an exact match */
|
|
if (DatumGetBool(compar))
|
|
return true;
|
|
}
|
|
|
|
/* the value is not covered by this BRIN tuple */
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* Expand ranges from Ranges into ExpandedRange array. This expects the
|
|
* eranges to be pre-allocated and with the correct size - there needs to be
|
|
* (nranges + nvalues) elements.
|
|
*
|
|
* The order of expanded ranges is arbitrary. We do expand the ranges first,
|
|
* and this part is sorted. But then we expand the values, and this part may
|
|
* be unsorted.
|
|
*/
|
|
static void
|
|
fill_expanded_ranges(ExpandedRange *eranges, int neranges, Ranges *ranges)
|
|
{
|
|
int idx;
|
|
int i;
|
|
|
|
/* Check that the output array has the right size. */
|
|
Assert(neranges == (ranges->nranges + ranges->nvalues));
|
|
|
|
idx = 0;
|
|
for (i = 0; i < ranges->nranges; i++)
|
|
{
|
|
eranges[idx].minval = ranges->values[2 * i];
|
|
eranges[idx].maxval = ranges->values[2 * i + 1];
|
|
eranges[idx].collapsed = false;
|
|
idx++;
|
|
|
|
Assert(idx <= neranges);
|
|
}
|
|
|
|
for (i = 0; i < ranges->nvalues; i++)
|
|
{
|
|
eranges[idx].minval = ranges->values[2 * ranges->nranges + i];
|
|
eranges[idx].maxval = ranges->values[2 * ranges->nranges + i];
|
|
eranges[idx].collapsed = true;
|
|
idx++;
|
|
|
|
Assert(idx <= neranges);
|
|
}
|
|
|
|
/* Did we produce the expected number of elements? */
|
|
Assert(idx == neranges);
|
|
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Sort and deduplicate expanded ranges.
|
|
*
|
|
* The ranges may be deduplicated - we're simply appending values, without
|
|
* checking for duplicates etc. So maybe the deduplication will reduce the
|
|
* number of ranges enough, and we won't have to compute the distances etc.
|
|
*
|
|
* Returns the number of expanded ranges.
|
|
*/
|
|
static int
|
|
sort_expanded_ranges(FmgrInfo *cmp, Oid colloid,
|
|
ExpandedRange *eranges, int neranges)
|
|
{
|
|
int n;
|
|
int i;
|
|
compare_context cxt;
|
|
|
|
Assert(neranges > 0);
|
|
|
|
/* sort the values */
|
|
cxt.colloid = colloid;
|
|
cxt.cmpFn = cmp;
|
|
|
|
/*
|
|
* XXX We do qsort on all the values, but we could also leverage the fact
|
|
* that some of the input data is already sorted (all the ranges and maybe
|
|
* some of the points) and do merge sort.
|
|
*/
|
|
qsort_arg(eranges, neranges, sizeof(ExpandedRange),
|
|
compare_expanded_ranges, &cxt);
|
|
|
|
/*
|
|
* Deduplicate the ranges - simply compare each range to the preceding
|
|
* one, and skip the duplicate ones.
|
|
*/
|
|
n = 1;
|
|
for (i = 1; i < neranges; i++)
|
|
{
|
|
/* if the current range is equal to the preceding one, do nothing */
|
|
if (!compare_expanded_ranges(&eranges[i - 1], &eranges[i], (void *) &cxt))
|
|
continue;
|
|
|
|
/* otherwise, copy it to n-th place (if not already there) */
|
|
if (i != n)
|
|
memcpy(&eranges[n], &eranges[i], sizeof(ExpandedRange));
|
|
|
|
n++;
|
|
}
|
|
|
|
Assert((n > 0) && (n <= neranges));
|
|
|
|
return n;
|
|
}
|
|
|
|
/*
|
|
* When combining multiple Range values (in union function), some of the
|
|
* ranges may overlap. We simply merge the overlapping ranges to fix that.
|
|
*
|
|
* XXX This assumes the expanded ranges were previously sorted (by minval
|
|
* and then maxval). We leverage this when detecting overlap.
|
|
*/
|
|
static int
|
|
merge_overlapping_ranges(FmgrInfo *cmp, Oid colloid,
|
|
ExpandedRange *eranges, int neranges)
|
|
{
|
|
int idx;
|
|
|
|
/* Merge ranges (idx) and (idx+1) if they overlap. */
|
|
idx = 0;
|
|
while (idx < (neranges - 1))
|
|
{
|
|
Datum r;
|
|
|
|
/*
|
|
* comparing [?,maxval] vs. [minval,?] - the ranges overlap if (minval
|
|
* < maxval)
|
|
*/
|
|
r = FunctionCall2Coll(cmp, colloid,
|
|
eranges[idx].maxval,
|
|
eranges[idx + 1].minval);
|
|
|
|
/*
|
|
* Nope, maxval < minval, so no overlap. And we know the ranges are
|
|
* ordered, so there are no more overlaps, because all the remaining
|
|
* ranges have greater or equal minval.
|
|
*/
|
|
if (DatumGetBool(r))
|
|
{
|
|
/* proceed to the next range */
|
|
idx += 1;
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
* So ranges 'idx' and 'idx+1' do overlap, but we don't know if
|
|
* 'idx+1' is contained in 'idx', or if they overlap only partially.
|
|
* So compare the upper bounds and keep the larger one.
|
|
*/
|
|
r = FunctionCall2Coll(cmp, colloid,
|
|
eranges[idx].maxval,
|
|
eranges[idx + 1].maxval);
|
|
|
|
if (DatumGetBool(r))
|
|
eranges[idx].maxval = eranges[idx + 1].maxval;
|
|
|
|
/*
|
|
* The range certainly is no longer collapsed (irrespectively of the
|
|
* previous state).
|
|
*/
|
|
eranges[idx].collapsed = false;
|
|
|
|
/*
|
|
* Now get rid of the (idx+1) range entirely by shifting the remaining
|
|
* ranges by 1. There are neranges elements, and we need to move
|
|
* elements from (idx+2). That means the number of elements to move is
|
|
* [ncranges - (idx+2)].
|
|
*/
|
|
memmove(&eranges[idx + 1], &eranges[idx + 2],
|
|
(neranges - (idx + 2)) * sizeof(ExpandedRange));
|
|
|
|
/*
|
|
* Decrease the number of ranges, and repeat (with the same range, as
|
|
* it might overlap with additional ranges thanks to the merge).
|
|
*/
|
|
neranges--;
|
|
}
|
|
|
|
return neranges;
|
|
}
|
|
|
|
/*
|
|
* Simple comparator for distance values, comparing the double value.
|
|
* This is intentionally sorting the distances in descending order, i.e.
|
|
* the longer gaps will be at the front.
|
|
*/
|
|
static int
|
|
compare_distances(const void *a, const void *b)
|
|
{
|
|
DistanceValue *da = (DistanceValue *) a;
|
|
DistanceValue *db = (DistanceValue *) b;
|
|
|
|
if (da->value < db->value)
|
|
return 1;
|
|
else if (da->value > db->value)
|
|
return -1;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Given an array of expanded ranges, compute size of the gaps between each
|
|
* range. For neranges there are (neranges-1) gaps.
|
|
*
|
|
* We simply call the "distance" function to compute the (max-min) for pairs
|
|
* of consecutive ranges. The function may be fairly expensive, so we do that
|
|
* just once (and then use it to pick as many ranges to merge as possible).
|
|
*
|
|
* See reduce_expanded_ranges for details.
|
|
*/
|
|
static DistanceValue *
|
|
build_distances(FmgrInfo *distanceFn, Oid colloid,
|
|
ExpandedRange *eranges, int neranges)
|
|
{
|
|
int i;
|
|
int ndistances;
|
|
DistanceValue *distances;
|
|
|
|
Assert(neranges > 0);
|
|
|
|
/* If there's only a single range, there's no distance to calculate. */
|
|
if (neranges == 1)
|
|
return NULL;
|
|
|
|
ndistances = (neranges - 1);
|
|
distances = (DistanceValue *) palloc0(sizeof(DistanceValue) * ndistances);
|
|
|
|
/*
|
|
* Walk through the ranges once and compute the distance between the
|
|
* ranges so that we can sort them once.
|
|
*/
|
|
for (i = 0; i < ndistances; i++)
|
|
{
|
|
Datum a1,
|
|
a2,
|
|
r;
|
|
|
|
a1 = eranges[i].maxval;
|
|
a2 = eranges[i + 1].minval;
|
|
|
|
/* compute length of the gap (between max/min) */
|
|
r = FunctionCall2Coll(distanceFn, colloid, a1, a2);
|
|
|
|
/* remember the index of the gap the distance is for */
|
|
distances[i].index = i;
|
|
distances[i].value = DatumGetFloat8(r);
|
|
}
|
|
|
|
/*
|
|
* Sort the distances in descending order, so that the longest gaps are at
|
|
* the front.
|
|
*/
|
|
qsort(distances, ndistances, sizeof(DistanceValue), compare_distances);
|
|
|
|
return distances;
|
|
}
|
|
|
|
/*
|
|
* Builds expanded ranges for the existing ranges (and single-point ranges),
|
|
* and also the new value (which did not fit into the array). This expanded
|
|
* representation makes the processing a bit easier, as it allows handling
|
|
* ranges and points the same way.
|
|
*
|
|
* We sort and deduplicate the expanded ranges - this is necessary, because
|
|
* the points may be unsorted. And moreover the two parts (ranges and
|
|
* points) are sorted on their own.
|
|
*/
|
|
static ExpandedRange *
|
|
build_expanded_ranges(FmgrInfo *cmp, Oid colloid, Ranges *ranges,
|
|
int *nranges)
|
|
{
|
|
int neranges;
|
|
ExpandedRange *eranges;
|
|
|
|
/* both ranges and points are expanded into a separate element */
|
|
neranges = ranges->nranges + ranges->nvalues;
|
|
|
|
eranges = (ExpandedRange *) palloc0(neranges * sizeof(ExpandedRange));
|
|
|
|
/* fill the expanded ranges */
|
|
fill_expanded_ranges(eranges, neranges, ranges);
|
|
|
|
/* sort and deduplicate the expanded ranges */
|
|
neranges = sort_expanded_ranges(cmp, colloid, eranges, neranges);
|
|
|
|
/* remember how many ranges we built */
|
|
*nranges = neranges;
|
|
|
|
return eranges;
|
|
}
|
|
|
|
#ifdef USE_ASSERT_CHECKING
|
|
/*
|
|
* Counts boundary values needed to store the ranges. Each single-point
|
|
* range is stored using a single value, each regular range needs two.
|
|
*/
|
|
static int
|
|
count_values(ExpandedRange *cranges, int ncranges)
|
|
{
|
|
int i;
|
|
int count;
|
|
|
|
count = 0;
|
|
for (i = 0; i < ncranges; i++)
|
|
{
|
|
if (cranges[i].collapsed)
|
|
count += 1;
|
|
else
|
|
count += 2;
|
|
}
|
|
|
|
return count;
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
* reduce_expanded_ranges
|
|
* reduce the ranges until the number of values is low enough
|
|
*
|
|
* Combines ranges until the number of boundary values drops below the
|
|
* threshold specified by max_values. This happens by merging enough
|
|
* ranges by the distance between them.
|
|
*
|
|
* Returns the number of result ranges.
|
|
*
|
|
* We simply use the global min/max and then add boundaries for enough
|
|
* largest gaps. Each gap adds 2 values, so we simply use (target/2-1)
|
|
* distances. Then we simply sort all the values - each two values are
|
|
* a boundary of a range (possibly collapsed).
|
|
*
|
|
* XXX Some of the ranges may be collapsed (i.e. the min/max values are
|
|
* equal), but we ignore that for now. We could repeat the process,
|
|
* adding a couple more gaps recursively.
|
|
*
|
|
* XXX The ranges to merge are selected solely using the distance. But
|
|
* that may not be the best strategy, for example when multiple gaps
|
|
* are of equal (or very similar) length.
|
|
*
|
|
* Consider for example points 1, 2, 3, .., 64, which have gaps of the
|
|
* same length 1 of course. In that case, we tend to pick the first
|
|
* gap of that length, which leads to this:
|
|
*
|
|
* step 1: [1, 2], 3, 4, 5, .., 64
|
|
* step 2: [1, 3], 4, 5, .., 64
|
|
* step 3: [1, 4], 5, .., 64
|
|
* ...
|
|
*
|
|
* So in the end we'll have one "large" range and multiple small points.
|
|
* That may be fine, but it seems a bit strange and non-optimal. Maybe
|
|
* we should consider other things when picking ranges to merge - e.g.
|
|
* length of the ranges? Or perhaps randomize the choice of ranges, with
|
|
* probability inversely proportional to the distance (the gap lengths
|
|
* may be very close, but not exactly the same).
|
|
*
|
|
* XXX Or maybe we could just handle this by using random value as a
|
|
* tie-break, or by adding random noise to the actual distance.
|
|
*/
|
|
static int
|
|
reduce_expanded_ranges(ExpandedRange *eranges, int neranges,
|
|
DistanceValue *distances, int max_values,
|
|
FmgrInfo *cmp, Oid colloid)
|
|
{
|
|
int i;
|
|
int nvalues;
|
|
Datum *values;
|
|
|
|
compare_context cxt;
|
|
|
|
/* total number of gaps between ranges */
|
|
int ndistances = (neranges - 1);
|
|
|
|
/* number of gaps to keep */
|
|
int keep = (max_values / 2 - 1);
|
|
|
|
/*
|
|
* Maybe we have a sufficiently low number of ranges already?
|
|
*
|
|
* XXX This should happen before we actually do the expensive stuff like
|
|
* sorting, so maybe this should be just an assert.
|
|
*/
|
|
if (keep >= ndistances)
|
|
return neranges;
|
|
|
|
/* sort the values */
|
|
cxt.colloid = colloid;
|
|
cxt.cmpFn = cmp;
|
|
|
|
/* allocate space for the boundary values */
|
|
nvalues = 0;
|
|
values = (Datum *) palloc(sizeof(Datum) * max_values);
|
|
|
|
/* add the global min/max values, from the first/last range */
|
|
values[nvalues++] = eranges[0].minval;
|
|
values[nvalues++] = eranges[neranges - 1].maxval;
|
|
|
|
/* add boundary values for enough gaps */
|
|
for (i = 0; i < keep; i++)
|
|
{
|
|
/* index of the gap between (index) and (index+1) ranges */
|
|
int index = distances[i].index;
|
|
|
|
Assert((index >= 0) && ((index + 1) < neranges));
|
|
|
|
/* add max from the preceding range, minval from the next one */
|
|
values[nvalues++] = eranges[index].maxval;
|
|
values[nvalues++] = eranges[index + 1].minval;
|
|
|
|
Assert(nvalues <= max_values);
|
|
}
|
|
|
|
/* We should have an even number of range values. */
|
|
Assert(nvalues % 2 == 0);
|
|
|
|
/*
|
|
* Sort the values using the comparator function, and form ranges from the
|
|
* sorted result.
|
|
*/
|
|
qsort_arg(values, nvalues, sizeof(Datum),
|
|
compare_values, &cxt);
|
|
|
|
/* We have nvalues boundary values, which means nvalues/2 ranges. */
|
|
for (i = 0; i < (nvalues / 2); i++)
|
|
{
|
|
eranges[i].minval = values[2 * i];
|
|
eranges[i].maxval = values[2 * i + 1];
|
|
|
|
/* if the boundary values are the same, it's a collapsed range */
|
|
eranges[i].collapsed = (compare_values(&values[2 * i],
|
|
&values[2 * i + 1],
|
|
&cxt) == 0);
|
|
}
|
|
|
|
return (nvalues / 2);
|
|
}
|
|
|
|
/*
|
|
* Store the boundary values from ExpandedRanges back into 'ranges' (using
|
|
* only the minimal number of values needed).
|
|
*/
|
|
static void
|
|
store_expanded_ranges(Ranges *ranges, ExpandedRange *eranges, int neranges)
|
|
{
|
|
int i;
|
|
int idx = 0;
|
|
|
|
/* first copy in the regular ranges */
|
|
ranges->nranges = 0;
|
|
for (i = 0; i < neranges; i++)
|
|
{
|
|
if (!eranges[i].collapsed)
|
|
{
|
|
ranges->values[idx++] = eranges[i].minval;
|
|
ranges->values[idx++] = eranges[i].maxval;
|
|
ranges->nranges++;
|
|
}
|
|
}
|
|
|
|
/* now copy in the collapsed ones */
|
|
ranges->nvalues = 0;
|
|
for (i = 0; i < neranges; i++)
|
|
{
|
|
if (eranges[i].collapsed)
|
|
{
|
|
ranges->values[idx++] = eranges[i].minval;
|
|
ranges->nvalues++;
|
|
}
|
|
}
|
|
|
|
/* all the values are sorted */
|
|
ranges->nsorted = ranges->nvalues;
|
|
|
|
Assert(count_values(eranges, neranges) == 2 * ranges->nranges + ranges->nvalues);
|
|
Assert(2 * ranges->nranges + ranges->nvalues <= ranges->maxvalues);
|
|
}
|
|
|
|
|
|
/*
|
|
* Consider freeing space in the ranges. Checks if there's space for at least
|
|
* one new value, and performs compaction if needed.
|
|
*
|
|
* Returns true if the value was actually modified.
|
|
*/
|
|
static bool
|
|
ensure_free_space_in_buffer(BrinDesc *bdesc, Oid colloid,
|
|
AttrNumber attno, Form_pg_attribute attr,
|
|
Ranges *range)
|
|
{
|
|
MemoryContext ctx;
|
|
MemoryContext oldctx;
|
|
|
|
FmgrInfo *cmpFn,
|
|
*distanceFn;
|
|
|
|
/* expanded ranges */
|
|
ExpandedRange *eranges;
|
|
int neranges;
|
|
DistanceValue *distances;
|
|
|
|
/*
|
|
* If there is free space in the buffer, we're done without having to
|
|
* modify anything.
|
|
*/
|
|
if (2 * range->nranges + range->nvalues < range->maxvalues)
|
|
return false;
|
|
|
|
/* we'll certainly need the comparator, so just look it up now */
|
|
cmpFn = minmax_multi_get_strategy_procinfo(bdesc, attno, attr->atttypid,
|
|
BTLessStrategyNumber);
|
|
|
|
/* deduplicate values, if there's an unsorted part */
|
|
range_deduplicate_values(range);
|
|
|
|
/*
|
|
* Did we reduce enough free space by just the deduplication?
|
|
*
|
|
* We don't simply check against range->maxvalues again. The deduplication
|
|
* might have freed very little space (e.g. just one value), forcing us to
|
|
* do deduplication very often. In that case, it's better to do the
|
|
* compaction and reduce more space.
|
|
*/
|
|
if (2 * range->nranges + range->nvalues <= range->maxvalues * MINMAX_BUFFER_LOAD_FACTOR)
|
|
return true;
|
|
|
|
/*
|
|
* We need to combine some of the existing ranges, to reduce the number of
|
|
* values we have to store.
|
|
*
|
|
* The distanceFn calls (which may internally call e.g. numeric_le) may
|
|
* allocate quite a bit of memory, and we must not leak it (we might have
|
|
* to do this repeatedly, even for a single BRIN page range). Otherwise
|
|
* we'd have problems e.g. when building new indexes. So we use a memory
|
|
* context and make sure we free the memory at the end (so if we call the
|
|
* distance function many times, it might be an issue, but meh).
|
|
*/
|
|
ctx = AllocSetContextCreate(CurrentMemoryContext,
|
|
"minmax-multi context",
|
|
ALLOCSET_DEFAULT_SIZES);
|
|
|
|
oldctx = MemoryContextSwitchTo(ctx);
|
|
|
|
/* build the expanded ranges */
|
|
eranges = build_expanded_ranges(cmpFn, colloid, range, &neranges);
|
|
|
|
/* Is the expanded representation of ranges correct? */
|
|
AssertCheckExpandedRanges(bdesc, colloid, attno, attr, eranges, neranges);
|
|
|
|
/* and we'll also need the 'distance' procedure */
|
|
distanceFn = minmax_multi_get_procinfo(bdesc, attno, PROCNUM_DISTANCE);
|
|
|
|
/* build array of gap distances and sort them in ascending order */
|
|
distances = build_distances(distanceFn, colloid, eranges, neranges);
|
|
|
|
/*
|
|
* Combine ranges until we release at least 50% of the space. This
|
|
* threshold is somewhat arbitrary, perhaps needs tuning. We must not use
|
|
* too low or high value.
|
|
*/
|
|
neranges = reduce_expanded_ranges(eranges, neranges, distances,
|
|
range->maxvalues * MINMAX_BUFFER_LOAD_FACTOR,
|
|
cmpFn, colloid);
|
|
|
|
/* Is the result of reducing expanded ranges correct? */
|
|
AssertCheckExpandedRanges(bdesc, colloid, attno, attr, eranges, neranges);
|
|
|
|
/* Make sure we've sufficiently reduced the number of ranges. */
|
|
Assert(count_values(eranges, neranges) <= range->maxvalues * MINMAX_BUFFER_LOAD_FACTOR);
|
|
|
|
/* decompose the expanded ranges into regular ranges and single values */
|
|
store_expanded_ranges(range, eranges, neranges);
|
|
|
|
MemoryContextSwitchTo(oldctx);
|
|
MemoryContextDelete(ctx);
|
|
|
|
/* Did we break the ranges somehow? */
|
|
AssertCheckRanges(range, cmpFn, colloid);
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* range_add_value
|
|
* Add the new value to the minmax-multi range.
|
|
*/
|
|
static bool
|
|
range_add_value(BrinDesc *bdesc, Oid colloid,
|
|
AttrNumber attno, Form_pg_attribute attr,
|
|
Ranges *ranges, Datum newval)
|
|
{
|
|
FmgrInfo *cmpFn;
|
|
bool modified = false;
|
|
|
|
/* we'll certainly need the comparator, so just look it up now */
|
|
cmpFn = minmax_multi_get_strategy_procinfo(bdesc, attno, attr->atttypid,
|
|
BTLessStrategyNumber);
|
|
|
|
/* comprehensive checks of the input ranges */
|
|
AssertCheckRanges(ranges, cmpFn, colloid);
|
|
|
|
/*
|
|
* Make sure there's enough free space in the buffer. We only trigger this
|
|
* when the buffer is full, which means it had to be modified as we size
|
|
* it to be larger than what is stored on disk.
|
|
*
|
|
* This needs to happen before we check if the value is contained in the
|
|
* range, because the value might be in the unsorted part, and we don't
|
|
* check that in range_contains_value. The deduplication would then move
|
|
* it to the sorted part, and we'd add the value too, which violates the
|
|
* rule that we never have duplicates with the ranges or sorted values.
|
|
*
|
|
* We might also deduplicate and recheck if the value is contained, but
|
|
* that seems like overkill. We'd need to deduplicate anyway, so why not
|
|
* do it now.
|
|
*/
|
|
modified = ensure_free_space_in_buffer(bdesc, colloid,
|
|
attno, attr, ranges);
|
|
|
|
/*
|
|
* Bail out if the value already is covered by the range.
|
|
*
|
|
* We could also add values until we hit values_per_range, and then do the
|
|
* deduplication in a batch, hoping for better efficiency. But that would
|
|
* mean we actually modify the range every time, which means having to
|
|
* serialize the value, which does palloc, walks the values, copies them,
|
|
* etc. Not exactly cheap.
|
|
*
|
|
* So instead we do the check, which should be fairly cheap - assuming the
|
|
* comparator function is not very expensive.
|
|
*
|
|
* This also implies the values array can't contain duplicate values.
|
|
*/
|
|
if (range_contains_value(bdesc, colloid, attno, attr, ranges, newval, false))
|
|
return modified;
|
|
|
|
/* Make a copy of the value, if needed. */
|
|
newval = datumCopy(newval, attr->attbyval, attr->attlen);
|
|
|
|
/*
|
|
* If there's space in the values array, copy it in and we're done.
|
|
*
|
|
* We do want to keep the values sorted (to speed up searches), so we do a
|
|
* simple insertion sort. We could do something more elaborate, e.g. by
|
|
* sorting the values only now and then, but for small counts (e.g. when
|
|
* maxvalues is 64) this should be fine.
|
|
*/
|
|
ranges->values[2 * ranges->nranges + ranges->nvalues] = newval;
|
|
ranges->nvalues++;
|
|
|
|
/* If we added the first value, we can consider it as sorted. */
|
|
if (ranges->nvalues == 1)
|
|
ranges->nsorted = 1;
|
|
|
|
/*
|
|
* Check we haven't broken the ordering of boundary values (checks both
|
|
* parts, but that doesn't hurt).
|
|
*/
|
|
AssertCheckRanges(ranges, cmpFn, colloid);
|
|
|
|
/* Check the range contains the value we just added. */
|
|
Assert(range_contains_value(bdesc, colloid, attno, attr, ranges, newval, true));
|
|
|
|
/* yep, we've modified the range */
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* Generate range representation of data collected during "batch mode".
|
|
* This is similar to reduce_expanded_ranges, except that we can't assume
|
|
* the values are sorted and there may be duplicate values.
|
|
*/
|
|
static void
|
|
compactify_ranges(BrinDesc *bdesc, Ranges *ranges, int max_values)
|
|
{
|
|
FmgrInfo *cmpFn,
|
|
*distanceFn;
|
|
|
|
/* expanded ranges */
|
|
ExpandedRange *eranges;
|
|
int neranges;
|
|
DistanceValue *distances;
|
|
|
|
MemoryContext ctx;
|
|
MemoryContext oldctx;
|
|
|
|
/*
|
|
* Do we need to actually compactify anything?
|
|
*
|
|
* There are two reasons why compaction may be needed - firstly, there may
|
|
* be too many values, or some of the values may be unsorted.
|
|
*/
|
|
if ((ranges->nranges * 2 + ranges->nvalues <= max_values) &&
|
|
(ranges->nsorted == ranges->nvalues))
|
|
return;
|
|
|
|
/* we'll certainly need the comparator, so just look it up now */
|
|
cmpFn = minmax_multi_get_strategy_procinfo(bdesc, ranges->attno, ranges->typid,
|
|
BTLessStrategyNumber);
|
|
|
|
/* and we'll also need the 'distance' procedure */
|
|
distanceFn = minmax_multi_get_procinfo(bdesc, ranges->attno, PROCNUM_DISTANCE);
|
|
|
|
/*
|
|
* The distanceFn calls (which may internally call e.g. numeric_le) may
|
|
* allocate quite a bit of memory, and we must not leak it. Otherwise,
|
|
* we'd have problems e.g. when building indexes. So we create a local
|
|
* memory context and make sure we free the memory before leaving this
|
|
* function (not after every call).
|
|
*/
|
|
ctx = AllocSetContextCreate(CurrentMemoryContext,
|
|
"minmax-multi context",
|
|
ALLOCSET_DEFAULT_SIZES);
|
|
|
|
oldctx = MemoryContextSwitchTo(ctx);
|
|
|
|
/* build the expanded ranges */
|
|
eranges = build_expanded_ranges(cmpFn, ranges->colloid, ranges, &neranges);
|
|
|
|
/* build array of gap distances and sort them in ascending order */
|
|
distances = build_distances(distanceFn, ranges->colloid,
|
|
eranges, neranges);
|
|
|
|
/*
|
|
* Combine ranges until we get below max_values. We don't use any scale
|
|
* factor, because this is used during serialization, and we don't expect
|
|
* more tuples to be inserted anytime soon.
|
|
*/
|
|
neranges = reduce_expanded_ranges(eranges, neranges, distances,
|
|
max_values, cmpFn, ranges->colloid);
|
|
|
|
Assert(count_values(eranges, neranges) <= max_values);
|
|
|
|
/* transform back into regular ranges and single values */
|
|
store_expanded_ranges(ranges, eranges, neranges);
|
|
|
|
/* check all the range invariants */
|
|
AssertCheckRanges(ranges, cmpFn, ranges->colloid);
|
|
|
|
MemoryContextSwitchTo(oldctx);
|
|
MemoryContextDelete(ctx);
|
|
}
|
|
|
|
Datum
|
|
brin_minmax_multi_opcinfo(PG_FUNCTION_ARGS)
|
|
{
|
|
BrinOpcInfo *result;
|
|
|
|
/*
|
|
* opaque->strategy_procinfos is initialized lazily; here it is set to
|
|
* all-uninitialized by palloc0 which sets fn_oid to InvalidOid.
|
|
*/
|
|
|
|
result = palloc0(MAXALIGN(SizeofBrinOpcInfo(1)) +
|
|
sizeof(MinmaxMultiOpaque));
|
|
result->oi_nstored = 1;
|
|
result->oi_regular_nulls = true;
|
|
result->oi_opaque = (MinmaxMultiOpaque *)
|
|
MAXALIGN((char *) result + SizeofBrinOpcInfo(1));
|
|
result->oi_typcache[0] = lookup_type_cache(PG_BRIN_MINMAX_MULTI_SUMMARYOID, 0);
|
|
|
|
PG_RETURN_POINTER(result);
|
|
}
|
|
|
|
/*
|
|
* Compute the distance between two float4 values (plain subtraction).
|
|
*/
|
|
Datum
|
|
brin_minmax_multi_distance_float4(PG_FUNCTION_ARGS)
|
|
{
|
|
float a1 = PG_GETARG_FLOAT4(0);
|
|
float a2 = PG_GETARG_FLOAT4(1);
|
|
|
|
/* if both values are NaN, then we consider them the same */
|
|
if (isnan(a1) && isnan(a2))
|
|
PG_RETURN_FLOAT8(0.0);
|
|
|
|
/* if one value is NaN, use infinite distance */
|
|
if (isnan(a1) || isnan(a2))
|
|
PG_RETURN_FLOAT8(get_float8_infinity());
|
|
|
|
/*
|
|
* We know the values are range boundaries, but the range may be collapsed
|
|
* (i.e. single points), with equal values.
|
|
*/
|
|
Assert(a1 <= a2);
|
|
|
|
PG_RETURN_FLOAT8((double) a2 - (double) a1);
|
|
}
|
|
|
|
/*
|
|
* Compute the distance between two float8 values (plain subtraction).
|
|
*/
|
|
Datum
|
|
brin_minmax_multi_distance_float8(PG_FUNCTION_ARGS)
|
|
{
|
|
double a1 = PG_GETARG_FLOAT8(0);
|
|
double a2 = PG_GETARG_FLOAT8(1);
|
|
|
|
/* if both values are NaN, then we consider them the same */
|
|
if (isnan(a1) && isnan(a2))
|
|
PG_RETURN_FLOAT8(0.0);
|
|
|
|
/* if one value is NaN, use infinite distance */
|
|
if (isnan(a1) || isnan(a2))
|
|
PG_RETURN_FLOAT8(get_float8_infinity());
|
|
|
|
/*
|
|
* We know the values are range boundaries, but the range may be collapsed
|
|
* (i.e. single points), with equal values.
|
|
*/
|
|
Assert(a1 <= a2);
|
|
|
|
PG_RETURN_FLOAT8(a2 - a1);
|
|
}
|
|
|
|
/*
|
|
* Compute the distance between two int2 values (plain subtraction).
|
|
*/
|
|
Datum
|
|
brin_minmax_multi_distance_int2(PG_FUNCTION_ARGS)
|
|
{
|
|
int16 a1 = PG_GETARG_INT16(0);
|
|
int16 a2 = PG_GETARG_INT16(1);
|
|
|
|
/*
|
|
* We know the values are range boundaries, but the range may be collapsed
|
|
* (i.e. single points), with equal values.
|
|
*/
|
|
Assert(a1 <= a2);
|
|
|
|
PG_RETURN_FLOAT8((double) a2 - (double) a1);
|
|
}
|
|
|
|
/*
|
|
* Compute the distance between two int4 values (plain subtraction).
|
|
*/
|
|
Datum
|
|
brin_minmax_multi_distance_int4(PG_FUNCTION_ARGS)
|
|
{
|
|
int32 a1 = PG_GETARG_INT32(0);
|
|
int32 a2 = PG_GETARG_INT32(1);
|
|
|
|
/*
|
|
* We know the values are range boundaries, but the range may be collapsed
|
|
* (i.e. single points), with equal values.
|
|
*/
|
|
Assert(a1 <= a2);
|
|
|
|
PG_RETURN_FLOAT8((double) a2 - (double) a1);
|
|
}
|
|
|
|
/*
|
|
* Compute the distance between two int8 values (plain subtraction).
|
|
*/
|
|
Datum
|
|
brin_minmax_multi_distance_int8(PG_FUNCTION_ARGS)
|
|
{
|
|
int64 a1 = PG_GETARG_INT64(0);
|
|
int64 a2 = PG_GETARG_INT64(1);
|
|
|
|
/*
|
|
* We know the values are range boundaries, but the range may be collapsed
|
|
* (i.e. single points), with equal values.
|
|
*/
|
|
Assert(a1 <= a2);
|
|
|
|
PG_RETURN_FLOAT8((double) a2 - (double) a1);
|
|
}
|
|
|
|
/*
|
|
* Compute the distance between two tid values (by mapping them to float8 and
|
|
* then subtracting them).
|
|
*/
|
|
Datum
|
|
brin_minmax_multi_distance_tid(PG_FUNCTION_ARGS)
|
|
{
|
|
double da1,
|
|
da2;
|
|
|
|
ItemPointer pa1 = (ItemPointer) PG_GETARG_DATUM(0);
|
|
ItemPointer pa2 = (ItemPointer) PG_GETARG_DATUM(1);
|
|
|
|
/*
|
|
* We know the values are range boundaries, but the range may be collapsed
|
|
* (i.e. single points), with equal values.
|
|
*/
|
|
Assert(ItemPointerCompare(pa1, pa2) <= 0);
|
|
|
|
/*
|
|
* We use the no-check variants here, because user-supplied values may
|
|
* have (ip_posid == 0). See ItemPointerCompare.
|
|
*/
|
|
da1 = ItemPointerGetBlockNumberNoCheck(pa1) * MaxHeapTuplesPerPage +
|
|
ItemPointerGetOffsetNumberNoCheck(pa1);
|
|
|
|
da2 = ItemPointerGetBlockNumberNoCheck(pa2) * MaxHeapTuplesPerPage +
|
|
ItemPointerGetOffsetNumberNoCheck(pa2);
|
|
|
|
PG_RETURN_FLOAT8(da2 - da1);
|
|
}
|
|
|
|
/*
|
|
* Compute the distance between two numeric values (plain subtraction).
|
|
*/
|
|
Datum
|
|
brin_minmax_multi_distance_numeric(PG_FUNCTION_ARGS)
|
|
{
|
|
Datum d;
|
|
Datum a1 = PG_GETARG_DATUM(0);
|
|
Datum a2 = PG_GETARG_DATUM(1);
|
|
|
|
/*
|
|
* We know the values are range boundaries, but the range may be collapsed
|
|
* (i.e. single points), with equal values.
|
|
*/
|
|
Assert(DatumGetBool(DirectFunctionCall2(numeric_le, a1, a2)));
|
|
|
|
d = DirectFunctionCall2(numeric_sub, a2, a1); /* a2 - a1 */
|
|
|
|
PG_RETURN_FLOAT8(DirectFunctionCall1(numeric_float8, d));
|
|
}
|
|
|
|
/*
|
|
* Compute the approximate distance between two UUID values.
|
|
*
|
|
* XXX We do not need a perfectly accurate value, so we approximate the
|
|
* deltas (which would have to be 128-bit integers) with a 64-bit float.
|
|
* The small inaccuracies do not matter in practice, in the worst case
|
|
* we'll decide to merge ranges that are not the closest ones.
|
|
*/
|
|
Datum
|
|
brin_minmax_multi_distance_uuid(PG_FUNCTION_ARGS)
|
|
{
|
|
int i;
|
|
float8 delta = 0;
|
|
|
|
Datum a1 = PG_GETARG_DATUM(0);
|
|
Datum a2 = PG_GETARG_DATUM(1);
|
|
|
|
pg_uuid_t *u1 = DatumGetUUIDP(a1);
|
|
pg_uuid_t *u2 = DatumGetUUIDP(a2);
|
|
|
|
/*
|
|
* We know the values are range boundaries, but the range may be collapsed
|
|
* (i.e. single points), with equal values.
|
|
*/
|
|
Assert(DatumGetBool(DirectFunctionCall2(uuid_le, a1, a2)));
|
|
|
|
/* compute approximate delta as a double precision value */
|
|
for (i = UUID_LEN - 1; i >= 0; i--)
|
|
{
|
|
delta += (int) u2->data[i] - (int) u1->data[i];
|
|
delta /= 256;
|
|
}
|
|
|
|
Assert(delta >= 0);
|
|
|
|
PG_RETURN_FLOAT8(delta);
|
|
}
|
|
|
|
/*
|
|
* Compute the approximate distance between two dates.
|
|
*/
|
|
Datum
|
|
brin_minmax_multi_distance_date(PG_FUNCTION_ARGS)
|
|
{
|
|
float8 delta = 0;
|
|
DateADT dateVal1 = PG_GETARG_DATEADT(0);
|
|
DateADT dateVal2 = PG_GETARG_DATEADT(1);
|
|
|
|
delta = (float8) dateVal2 - (float8) dateVal1;
|
|
|
|
Assert(delta >= 0);
|
|
|
|
PG_RETURN_FLOAT8(delta);
|
|
}
|
|
|
|
/*
|
|
* Compute the approximate distance between two time (without tz) values.
|
|
*
|
|
* TimeADT is just an int64, so we simply subtract the values directly.
|
|
*/
|
|
Datum
|
|
brin_minmax_multi_distance_time(PG_FUNCTION_ARGS)
|
|
{
|
|
float8 delta = 0;
|
|
|
|
TimeADT ta = PG_GETARG_TIMEADT(0);
|
|
TimeADT tb = PG_GETARG_TIMEADT(1);
|
|
|
|
delta = (tb - ta);
|
|
|
|
Assert(delta >= 0);
|
|
|
|
PG_RETURN_FLOAT8(delta);
|
|
}
|
|
|
|
/*
|
|
* Compute the approximate distance between two timetz values.
|
|
*
|
|
* Simply subtracts the TimeADT (int64) values embedded in TimeTzADT.
|
|
*/
|
|
Datum
|
|
brin_minmax_multi_distance_timetz(PG_FUNCTION_ARGS)
|
|
{
|
|
float8 delta = 0;
|
|
|
|
TimeTzADT *ta = PG_GETARG_TIMETZADT_P(0);
|
|
TimeTzADT *tb = PG_GETARG_TIMETZADT_P(1);
|
|
|
|
delta = (tb->time - ta->time) + (tb->zone - ta->zone) * USECS_PER_SEC;
|
|
|
|
Assert(delta >= 0);
|
|
|
|
PG_RETURN_FLOAT8(delta);
|
|
}
|
|
|
|
/*
|
|
* Compute the distance between two timestamp values.
|
|
*/
|
|
Datum
|
|
brin_minmax_multi_distance_timestamp(PG_FUNCTION_ARGS)
|
|
{
|
|
float8 delta = 0;
|
|
|
|
Timestamp dt1 = PG_GETARG_TIMESTAMP(0);
|
|
Timestamp dt2 = PG_GETARG_TIMESTAMP(1);
|
|
|
|
delta = (float8) dt2 - (float8) dt1;
|
|
|
|
Assert(delta >= 0);
|
|
|
|
PG_RETURN_FLOAT8(delta);
|
|
}
|
|
|
|
/*
|
|
* Compute the distance between two interval values.
|
|
*/
|
|
Datum
|
|
brin_minmax_multi_distance_interval(PG_FUNCTION_ARGS)
|
|
{
|
|
float8 delta = 0;
|
|
|
|
Interval *ia = PG_GETARG_INTERVAL_P(0);
|
|
Interval *ib = PG_GETARG_INTERVAL_P(1);
|
|
|
|
int64 dayfraction;
|
|
int64 days;
|
|
|
|
/*
|
|
* Delta is (fractional) number of days between the intervals. Assume
|
|
* months have 30 days for consistency with interval_cmp_internal. We
|
|
* don't need to be exact, in the worst case we'll build a bit less
|
|
* efficient ranges. But we should not contradict interval_cmp.
|
|
*/
|
|
dayfraction = (ib->time % USECS_PER_DAY) - (ia->time % USECS_PER_DAY);
|
|
days = (ib->time / USECS_PER_DAY) - (ia->time / USECS_PER_DAY);
|
|
days += (int64) ib->day - (int64) ia->day;
|
|
days += ((int64) ib->month - (int64) ia->month) * INT64CONST(30);
|
|
|
|
/* convert to double precision */
|
|
delta = (double) days + dayfraction / (double) USECS_PER_DAY;
|
|
|
|
Assert(delta >= 0);
|
|
|
|
PG_RETURN_FLOAT8(delta);
|
|
}
|
|
|
|
/*
|
|
* Compute the distance between two pg_lsn values.
|
|
*
|
|
* LSN is just an int64 encoding position in the stream, so just subtract
|
|
* those int64 values directly.
|
|
*/
|
|
Datum
|
|
brin_minmax_multi_distance_pg_lsn(PG_FUNCTION_ARGS)
|
|
{
|
|
float8 delta = 0;
|
|
|
|
XLogRecPtr lsna = PG_GETARG_LSN(0);
|
|
XLogRecPtr lsnb = PG_GETARG_LSN(1);
|
|
|
|
delta = (lsnb - lsna);
|
|
|
|
Assert(delta >= 0);
|
|
|
|
PG_RETURN_FLOAT8(delta);
|
|
}
|
|
|
|
/*
|
|
* Compute the distance between two macaddr values.
|
|
*
|
|
* mac addresses are treated as 6 unsigned chars, so do the same thing we
|
|
* already do for UUID values.
|
|
*/
|
|
Datum
|
|
brin_minmax_multi_distance_macaddr(PG_FUNCTION_ARGS)
|
|
{
|
|
float8 delta;
|
|
|
|
macaddr *a = PG_GETARG_MACADDR_P(0);
|
|
macaddr *b = PG_GETARG_MACADDR_P(1);
|
|
|
|
delta = ((float8) b->f - (float8) a->f);
|
|
delta /= 256;
|
|
|
|
delta += ((float8) b->e - (float8) a->e);
|
|
delta /= 256;
|
|
|
|
delta += ((float8) b->d - (float8) a->d);
|
|
delta /= 256;
|
|
|
|
delta += ((float8) b->c - (float8) a->c);
|
|
delta /= 256;
|
|
|
|
delta += ((float8) b->b - (float8) a->b);
|
|
delta /= 256;
|
|
|
|
delta += ((float8) b->a - (float8) a->a);
|
|
delta /= 256;
|
|
|
|
Assert(delta >= 0);
|
|
|
|
PG_RETURN_FLOAT8(delta);
|
|
}
|
|
|
|
/*
|
|
* Compute the distance between two macaddr8 values.
|
|
*
|
|
* macaddr8 addresses are 8 unsigned chars, so do the same thing we
|
|
* already do for UUID values.
|
|
*/
|
|
Datum
|
|
brin_minmax_multi_distance_macaddr8(PG_FUNCTION_ARGS)
|
|
{
|
|
float8 delta;
|
|
|
|
macaddr8 *a = PG_GETARG_MACADDR8_P(0);
|
|
macaddr8 *b = PG_GETARG_MACADDR8_P(1);
|
|
|
|
delta = ((float8) b->h - (float8) a->h);
|
|
delta /= 256;
|
|
|
|
delta += ((float8) b->g - (float8) a->g);
|
|
delta /= 256;
|
|
|
|
delta += ((float8) b->f - (float8) a->f);
|
|
delta /= 256;
|
|
|
|
delta += ((float8) b->e - (float8) a->e);
|
|
delta /= 256;
|
|
|
|
delta += ((float8) b->d - (float8) a->d);
|
|
delta /= 256;
|
|
|
|
delta += ((float8) b->c - (float8) a->c);
|
|
delta /= 256;
|
|
|
|
delta += ((float8) b->b - (float8) a->b);
|
|
delta /= 256;
|
|
|
|
delta += ((float8) b->a - (float8) a->a);
|
|
delta /= 256;
|
|
|
|
Assert(delta >= 0);
|
|
|
|
PG_RETURN_FLOAT8(delta);
|
|
}
|
|
|
|
/*
|
|
* Compute the distance between two inet values.
|
|
*
|
|
* The distance is defined as the difference between 32-bit/128-bit values,
|
|
* depending on the IP version. The distance is computed by subtracting
|
|
* the bytes and normalizing it to [0,1] range for each IP family.
|
|
* Addresses from different families are considered to be in maximum
|
|
* distance, which is 1.0.
|
|
*
|
|
* XXX Does this need to consider the mask (bits)? For now, it's ignored.
|
|
*/
|
|
Datum
|
|
brin_minmax_multi_distance_inet(PG_FUNCTION_ARGS)
|
|
{
|
|
float8 delta;
|
|
int i;
|
|
int len;
|
|
unsigned char *addra,
|
|
*addrb;
|
|
|
|
inet *ipa = PG_GETARG_INET_PP(0);
|
|
inet *ipb = PG_GETARG_INET_PP(1);
|
|
|
|
int lena,
|
|
lenb;
|
|
|
|
/*
|
|
* If the addresses are from different families, consider them to be in
|
|
* maximal possible distance (which is 1.0).
|
|
*/
|
|
if (ip_family(ipa) != ip_family(ipb))
|
|
PG_RETURN_FLOAT8(1.0);
|
|
|
|
addra = (unsigned char *) palloc(ip_addrsize(ipa));
|
|
memcpy(addra, ip_addr(ipa), ip_addrsize(ipa));
|
|
|
|
addrb = (unsigned char *) palloc(ip_addrsize(ipb));
|
|
memcpy(addrb, ip_addr(ipb), ip_addrsize(ipb));
|
|
|
|
/*
|
|
* The length is calculated from the mask length, because we sort the
|
|
* addresses by first address in the range, so A.B.C.D/24 < A.B.C.1 (the
|
|
* first range starts at A.B.C.0, which is before A.B.C.1). We don't want
|
|
* to produce a negative delta in this case, so we just cut the extra
|
|
* bytes.
|
|
*
|
|
* XXX Maybe this should be a bit more careful and cut the bits, not just
|
|
* whole bytes.
|
|
*/
|
|
lena = ip_bits(ipa);
|
|
lenb = ip_bits(ipb);
|
|
|
|
len = ip_addrsize(ipa);
|
|
|
|
/* apply the network mask to both addresses */
|
|
for (i = 0; i < len; i++)
|
|
{
|
|
unsigned char mask;
|
|
int nbits;
|
|
|
|
nbits = Max(0, lena - (i * 8));
|
|
if (nbits < 8)
|
|
{
|
|
mask = (0xFF << (8 - nbits));
|
|
addra[i] = (addra[i] & mask);
|
|
}
|
|
|
|
nbits = Max(0, lenb - (i * 8));
|
|
if (nbits < 8)
|
|
{
|
|
mask = (0xFF << (8 - nbits));
|
|
addrb[i] = (addrb[i] & mask);
|
|
}
|
|
}
|
|
|
|
/* Calculate the difference between the addresses. */
|
|
delta = 0;
|
|
for (i = len - 1; i >= 0; i--)
|
|
{
|
|
unsigned char a = addra[i];
|
|
unsigned char b = addrb[i];
|
|
|
|
delta += (float8) b - (float8) a;
|
|
delta /= 256;
|
|
}
|
|
|
|
Assert((delta >= 0) && (delta <= 1));
|
|
|
|
pfree(addra);
|
|
pfree(addrb);
|
|
|
|
PG_RETURN_FLOAT8(delta);
|
|
}
|
|
|
|
static void
|
|
brin_minmax_multi_serialize(BrinDesc *bdesc, Datum src, Datum *dst)
|
|
{
|
|
Ranges *ranges = (Ranges *) DatumGetPointer(src);
|
|
SerializedRanges *s;
|
|
|
|
/*
|
|
* In batch mode, we need to compress the accumulated values to the
|
|
* actually requested number of values/ranges.
|
|
*/
|
|
compactify_ranges(bdesc, ranges, ranges->target_maxvalues);
|
|
|
|
/* At this point everything has to be fully sorted. */
|
|
Assert(ranges->nsorted == ranges->nvalues);
|
|
|
|
s = brin_range_serialize(ranges);
|
|
dst[0] = PointerGetDatum(s);
|
|
}
|
|
|
|
static int
|
|
brin_minmax_multi_get_values(BrinDesc *bdesc, MinMaxMultiOptions *opts)
|
|
{
|
|
return MinMaxMultiGetValuesPerRange(opts);
|
|
}
|
|
|
|
/*
|
|
* Examine the given index tuple (which contains the partial status of a
|
|
* certain page range) by comparing it to the given value that comes from
|
|
* another heap tuple. If the new value is outside the min/max range
|
|
* specified by the existing tuple values, update the index tuple and return
|
|
* true. Otherwise, return false and do not modify in this case.
|
|
*/
|
|
Datum
|
|
brin_minmax_multi_add_value(PG_FUNCTION_ARGS)
|
|
{
|
|
BrinDesc *bdesc = (BrinDesc *) PG_GETARG_POINTER(0);
|
|
BrinValues *column = (BrinValues *) PG_GETARG_POINTER(1);
|
|
Datum newval = PG_GETARG_DATUM(2);
|
|
bool isnull PG_USED_FOR_ASSERTS_ONLY = PG_GETARG_DATUM(3);
|
|
MinMaxMultiOptions *opts = (MinMaxMultiOptions *) PG_GET_OPCLASS_OPTIONS();
|
|
Oid colloid = PG_GET_COLLATION();
|
|
bool modified = false;
|
|
Form_pg_attribute attr;
|
|
AttrNumber attno;
|
|
Ranges *ranges;
|
|
SerializedRanges *serialized = NULL;
|
|
|
|
Assert(!isnull);
|
|
|
|
attno = column->bv_attno;
|
|
attr = TupleDescAttr(bdesc->bd_tupdesc, attno - 1);
|
|
|
|
/* use the already deserialized value, if possible */
|
|
ranges = (Ranges *) DatumGetPointer(column->bv_mem_value);
|
|
|
|
/*
|
|
* If this is the first non-null value, we need to initialize the range
|
|
* list. Otherwise, just extract the existing range list from BrinValues.
|
|
*
|
|
* When starting with an empty range, we assume this is a batch mode and
|
|
* we use a larger buffer. The buffer size is derived from the BRIN range
|
|
* size, number of rows per page, with some sensible min/max values. A
|
|
* small buffer would be bad for performance, but a large buffer might
|
|
* require a lot of memory (because of keeping all the values).
|
|
*/
|
|
if (column->bv_allnulls)
|
|
{
|
|
MemoryContext oldctx;
|
|
|
|
int target_maxvalues;
|
|
int maxvalues;
|
|
BlockNumber pagesPerRange = BrinGetPagesPerRange(bdesc->bd_index);
|
|
|
|
/* what was specified as a reloption? */
|
|
target_maxvalues = brin_minmax_multi_get_values(bdesc, opts);
|
|
|
|
/*
|
|
* Determine the insert buffer size - we use 10x the target, capped to
|
|
* the maximum number of values in the heap range. This is more than
|
|
* enough, considering the actual number of rows per page is likely
|
|
* much lower, but meh.
|
|
*/
|
|
maxvalues = Min(target_maxvalues * MINMAX_BUFFER_FACTOR,
|
|
MaxHeapTuplesPerPage * pagesPerRange);
|
|
|
|
/* but always at least the original value */
|
|
maxvalues = Max(maxvalues, target_maxvalues);
|
|
|
|
/* always cap by MIN/MAX */
|
|
maxvalues = Max(maxvalues, MINMAX_BUFFER_MIN);
|
|
maxvalues = Min(maxvalues, MINMAX_BUFFER_MAX);
|
|
|
|
oldctx = MemoryContextSwitchTo(column->bv_context);
|
|
ranges = minmax_multi_init(maxvalues);
|
|
ranges->attno = attno;
|
|
ranges->colloid = colloid;
|
|
ranges->typid = attr->atttypid;
|
|
ranges->target_maxvalues = target_maxvalues;
|
|
|
|
/* we'll certainly need the comparator, so just look it up now */
|
|
ranges->cmp = minmax_multi_get_strategy_procinfo(bdesc, attno, attr->atttypid,
|
|
BTLessStrategyNumber);
|
|
|
|
MemoryContextSwitchTo(oldctx);
|
|
|
|
column->bv_allnulls = false;
|
|
modified = true;
|
|
|
|
column->bv_mem_value = PointerGetDatum(ranges);
|
|
column->bv_serialize = brin_minmax_multi_serialize;
|
|
}
|
|
else if (!ranges)
|
|
{
|
|
MemoryContext oldctx;
|
|
|
|
int maxvalues;
|
|
BlockNumber pagesPerRange = BrinGetPagesPerRange(bdesc->bd_index);
|
|
|
|
oldctx = MemoryContextSwitchTo(column->bv_context);
|
|
|
|
serialized = (SerializedRanges *) PG_DETOAST_DATUM(column->bv_values[0]);
|
|
|
|
/*
|
|
* Determine the insert buffer size - we use 10x the target, capped to
|
|
* the maximum number of values in the heap range. This is more than
|
|
* enough, considering the actual number of rows per page is likely
|
|
* much lower, but meh.
|
|
*/
|
|
maxvalues = Min(serialized->maxvalues * MINMAX_BUFFER_FACTOR,
|
|
MaxHeapTuplesPerPage * pagesPerRange);
|
|
|
|
/* but always at least the original value */
|
|
maxvalues = Max(maxvalues, serialized->maxvalues);
|
|
|
|
/* always cap by MIN/MAX */
|
|
maxvalues = Max(maxvalues, MINMAX_BUFFER_MIN);
|
|
maxvalues = Min(maxvalues, MINMAX_BUFFER_MAX);
|
|
|
|
ranges = brin_range_deserialize(maxvalues, serialized);
|
|
|
|
ranges->attno = attno;
|
|
ranges->colloid = colloid;
|
|
ranges->typid = attr->atttypid;
|
|
|
|
/* we'll certainly need the comparator, so just look it up now */
|
|
ranges->cmp = minmax_multi_get_strategy_procinfo(bdesc, attno, attr->atttypid,
|
|
BTLessStrategyNumber);
|
|
|
|
column->bv_mem_value = PointerGetDatum(ranges);
|
|
column->bv_serialize = brin_minmax_multi_serialize;
|
|
|
|
MemoryContextSwitchTo(oldctx);
|
|
}
|
|
|
|
/*
|
|
* Try to add the new value to the range. We need to update the modified
|
|
* flag, so that we serialize the updated summary later.
|
|
*/
|
|
modified |= range_add_value(bdesc, colloid, attno, attr, ranges, newval);
|
|
|
|
|
|
PG_RETURN_BOOL(modified);
|
|
}
|
|
|
|
/*
|
|
* Given an index tuple corresponding to a certain page range and a scan key,
|
|
* return whether the scan key is consistent with the index tuple's min/max
|
|
* values. Return true if so, false otherwise.
|
|
*/
|
|
Datum
|
|
brin_minmax_multi_consistent(PG_FUNCTION_ARGS)
|
|
{
|
|
BrinDesc *bdesc = (BrinDesc *) PG_GETARG_POINTER(0);
|
|
BrinValues *column = (BrinValues *) PG_GETARG_POINTER(1);
|
|
ScanKey *keys = (ScanKey *) PG_GETARG_POINTER(2);
|
|
int nkeys = PG_GETARG_INT32(3);
|
|
|
|
Oid colloid = PG_GET_COLLATION(),
|
|
subtype;
|
|
AttrNumber attno;
|
|
Datum value;
|
|
FmgrInfo *finfo;
|
|
SerializedRanges *serialized;
|
|
Ranges *ranges;
|
|
int keyno;
|
|
int rangeno;
|
|
int i;
|
|
|
|
attno = column->bv_attno;
|
|
|
|
serialized = (SerializedRanges *) PG_DETOAST_DATUM(column->bv_values[0]);
|
|
ranges = brin_range_deserialize(serialized->maxvalues, serialized);
|
|
|
|
/* inspect the ranges, and for each one evaluate the scan keys */
|
|
for (rangeno = 0; rangeno < ranges->nranges; rangeno++)
|
|
{
|
|
Datum minval = ranges->values[2 * rangeno];
|
|
Datum maxval = ranges->values[2 * rangeno + 1];
|
|
|
|
/* assume the range is matching, and we'll try to prove otherwise */
|
|
bool matching = true;
|
|
|
|
for (keyno = 0; keyno < nkeys; keyno++)
|
|
{
|
|
bool matches;
|
|
ScanKey key = keys[keyno];
|
|
|
|
/* NULL keys are handled and filtered-out in bringetbitmap */
|
|
Assert(!(key->sk_flags & SK_ISNULL));
|
|
|
|
attno = key->sk_attno;
|
|
subtype = key->sk_subtype;
|
|
value = key->sk_argument;
|
|
switch (key->sk_strategy)
|
|
{
|
|
case BTLessStrategyNumber:
|
|
case BTLessEqualStrategyNumber:
|
|
finfo = minmax_multi_get_strategy_procinfo(bdesc, attno, subtype,
|
|
key->sk_strategy);
|
|
/* first value from the array */
|
|
matches = DatumGetBool(FunctionCall2Coll(finfo, colloid, minval, value));
|
|
break;
|
|
|
|
case BTEqualStrategyNumber:
|
|
{
|
|
Datum compar;
|
|
FmgrInfo *cmpFn;
|
|
|
|
/* by default this range does not match */
|
|
matches = false;
|
|
|
|
/*
|
|
* Otherwise, need to compare the new value with
|
|
* boundaries of all the ranges. First check if it's
|
|
* less than the absolute minimum, which is the first
|
|
* value in the array.
|
|
*/
|
|
cmpFn = minmax_multi_get_strategy_procinfo(bdesc, attno, subtype,
|
|
BTGreaterStrategyNumber);
|
|
compar = FunctionCall2Coll(cmpFn, colloid, minval, value);
|
|
|
|
/* smaller than the smallest value in this range */
|
|
if (DatumGetBool(compar))
|
|
break;
|
|
|
|
cmpFn = minmax_multi_get_strategy_procinfo(bdesc, attno, subtype,
|
|
BTLessStrategyNumber);
|
|
compar = FunctionCall2Coll(cmpFn, colloid, maxval, value);
|
|
|
|
/* larger than the largest value in this range */
|
|
if (DatumGetBool(compar))
|
|
break;
|
|
|
|
/*
|
|
* We haven't managed to eliminate this range, so
|
|
* consider it matching.
|
|
*/
|
|
matches = true;
|
|
|
|
break;
|
|
}
|
|
case BTGreaterEqualStrategyNumber:
|
|
case BTGreaterStrategyNumber:
|
|
finfo = minmax_multi_get_strategy_procinfo(bdesc, attno, subtype,
|
|
key->sk_strategy);
|
|
/* last value from the array */
|
|
matches = DatumGetBool(FunctionCall2Coll(finfo, colloid, maxval, value));
|
|
break;
|
|
|
|
default:
|
|
/* shouldn't happen */
|
|
elog(ERROR, "invalid strategy number %d", key->sk_strategy);
|
|
matches = false;
|
|
break;
|
|
}
|
|
|
|
/* the range has to match all the scan keys */
|
|
matching &= matches;
|
|
|
|
/* once we find a non-matching key, we're done */
|
|
if (!matching)
|
|
break;
|
|
}
|
|
|
|
/*
|
|
* have we found a range matching all scan keys? if yes, we're done
|
|
*/
|
|
if (matching)
|
|
PG_RETURN_BOOL(true);
|
|
}
|
|
|
|
/*
|
|
* And now inspect the values. We don't bother with doing a binary search
|
|
* here, because we're dealing with serialized / fully compacted ranges,
|
|
* so there should be only very few values.
|
|
*/
|
|
for (i = 0; i < ranges->nvalues; i++)
|
|
{
|
|
Datum val = ranges->values[2 * ranges->nranges + i];
|
|
|
|
/* assume the range is matching, and we'll try to prove otherwise */
|
|
bool matching = true;
|
|
|
|
for (keyno = 0; keyno < nkeys; keyno++)
|
|
{
|
|
bool matches;
|
|
ScanKey key = keys[keyno];
|
|
|
|
/* we've already dealt with NULL keys at the beginning */
|
|
if (key->sk_flags & SK_ISNULL)
|
|
continue;
|
|
|
|
attno = key->sk_attno;
|
|
subtype = key->sk_subtype;
|
|
value = key->sk_argument;
|
|
switch (key->sk_strategy)
|
|
{
|
|
case BTLessStrategyNumber:
|
|
case BTLessEqualStrategyNumber:
|
|
case BTEqualStrategyNumber:
|
|
case BTGreaterEqualStrategyNumber:
|
|
case BTGreaterStrategyNumber:
|
|
|
|
finfo = minmax_multi_get_strategy_procinfo(bdesc, attno, subtype,
|
|
key->sk_strategy);
|
|
matches = DatumGetBool(FunctionCall2Coll(finfo, colloid, val, value));
|
|
break;
|
|
|
|
default:
|
|
/* shouldn't happen */
|
|
elog(ERROR, "invalid strategy number %d", key->sk_strategy);
|
|
matches = false;
|
|
break;
|
|
}
|
|
|
|
/* the range has to match all the scan keys */
|
|
matching &= matches;
|
|
|
|
/* once we find a non-matching key, we're done */
|
|
if (!matching)
|
|
break;
|
|
}
|
|
|
|
/* have we found a range matching all scan keys? if yes, we're done */
|
|
if (matching)
|
|
PG_RETURN_BOOL(true);
|
|
}
|
|
|
|
PG_RETURN_BOOL(false);
|
|
}
|
|
|
|
/*
|
|
* Given two BrinValues, update the first of them as a union of the summary
|
|
* values contained in both. The second one is untouched.
|
|
*/
|
|
Datum
|
|
brin_minmax_multi_union(PG_FUNCTION_ARGS)
|
|
{
|
|
BrinDesc *bdesc = (BrinDesc *) PG_GETARG_POINTER(0);
|
|
BrinValues *col_a = (BrinValues *) PG_GETARG_POINTER(1);
|
|
BrinValues *col_b = (BrinValues *) PG_GETARG_POINTER(2);
|
|
|
|
Oid colloid = PG_GET_COLLATION();
|
|
SerializedRanges *serialized_a;
|
|
SerializedRanges *serialized_b;
|
|
Ranges *ranges_a;
|
|
Ranges *ranges_b;
|
|
AttrNumber attno;
|
|
Form_pg_attribute attr;
|
|
ExpandedRange *eranges;
|
|
int neranges;
|
|
FmgrInfo *cmpFn,
|
|
*distanceFn;
|
|
DistanceValue *distances;
|
|
MemoryContext ctx;
|
|
MemoryContext oldctx;
|
|
|
|
Assert(col_a->bv_attno == col_b->bv_attno);
|
|
Assert(!col_a->bv_allnulls && !col_b->bv_allnulls);
|
|
|
|
attno = col_a->bv_attno;
|
|
attr = TupleDescAttr(bdesc->bd_tupdesc, attno - 1);
|
|
|
|
serialized_a = (SerializedRanges *) PG_DETOAST_DATUM(col_a->bv_values[0]);
|
|
serialized_b = (SerializedRanges *) PG_DETOAST_DATUM(col_b->bv_values[0]);
|
|
|
|
ranges_a = brin_range_deserialize(serialized_a->maxvalues, serialized_a);
|
|
ranges_b = brin_range_deserialize(serialized_b->maxvalues, serialized_b);
|
|
|
|
/* make sure neither of the ranges is NULL */
|
|
Assert(ranges_a && ranges_b);
|
|
|
|
neranges = (ranges_a->nranges + ranges_a->nvalues) +
|
|
(ranges_b->nranges + ranges_b->nvalues);
|
|
|
|
/*
|
|
* The distanceFn calls (which may internally call e.g. numeric_le) may
|
|
* allocate quite a bit of memory, and we must not leak it. Otherwise,
|
|
* we'd have problems e.g. when building indexes. So we create a local
|
|
* memory context and make sure we free the memory before leaving this
|
|
* function (not after every call).
|
|
*/
|
|
ctx = AllocSetContextCreate(CurrentMemoryContext,
|
|
"minmax-multi context",
|
|
ALLOCSET_DEFAULT_SIZES);
|
|
|
|
oldctx = MemoryContextSwitchTo(ctx);
|
|
|
|
/* allocate and fill */
|
|
eranges = (ExpandedRange *) palloc0(neranges * sizeof(ExpandedRange));
|
|
|
|
/* fill the expanded ranges with entries for the first range */
|
|
fill_expanded_ranges(eranges, ranges_a->nranges + ranges_a->nvalues,
|
|
ranges_a);
|
|
|
|
/* and now add combine ranges for the second range */
|
|
fill_expanded_ranges(&eranges[ranges_a->nranges + ranges_a->nvalues],
|
|
ranges_b->nranges + ranges_b->nvalues,
|
|
ranges_b);
|
|
|
|
cmpFn = minmax_multi_get_strategy_procinfo(bdesc, attno, attr->atttypid,
|
|
BTLessStrategyNumber);
|
|
|
|
/* sort the expanded ranges */
|
|
neranges = sort_expanded_ranges(cmpFn, colloid, eranges, neranges);
|
|
|
|
/*
|
|
* We've loaded two different lists of expanded ranges, so some of them
|
|
* may be overlapping. So walk through them and merge them.
|
|
*/
|
|
neranges = merge_overlapping_ranges(cmpFn, colloid, eranges, neranges);
|
|
|
|
/* check that the combine ranges are correct (no overlaps, ordering) */
|
|
AssertCheckExpandedRanges(bdesc, colloid, attno, attr, eranges, neranges);
|
|
|
|
/*
|
|
* If needed, reduce some of the ranges.
|
|
*
|
|
* XXX This may be fairly expensive, so maybe we should do it only when
|
|
* it's actually needed (when we have too many ranges).
|
|
*/
|
|
|
|
/* build array of gap distances and sort them in ascending order */
|
|
distanceFn = minmax_multi_get_procinfo(bdesc, attno, PROCNUM_DISTANCE);
|
|
distances = build_distances(distanceFn, colloid, eranges, neranges);
|
|
|
|
/*
|
|
* See how many values would be needed to store the current ranges, and if
|
|
* needed combine as many of them to get below the threshold. The
|
|
* collapsed ranges will be stored as a single value.
|
|
*
|
|
* XXX This does not apply the load factor, as we don't expect to add more
|
|
* values to the range, so we prefer to keep as many ranges as possible.
|
|
*
|
|
* XXX Can the maxvalues be different in the two ranges? Perhaps we should
|
|
* use maximum of those?
|
|
*/
|
|
neranges = reduce_expanded_ranges(eranges, neranges, distances,
|
|
ranges_a->maxvalues,
|
|
cmpFn, colloid);
|
|
|
|
/* Is the result of reducing expanded ranges correct? */
|
|
AssertCheckExpandedRanges(bdesc, colloid, attno, attr, eranges, neranges);
|
|
|
|
/* update the first range summary */
|
|
store_expanded_ranges(ranges_a, eranges, neranges);
|
|
|
|
MemoryContextSwitchTo(oldctx);
|
|
MemoryContextDelete(ctx);
|
|
|
|
/* cleanup and update the serialized value */
|
|
pfree(serialized_a);
|
|
col_a->bv_values[0] = PointerGetDatum(brin_range_serialize(ranges_a));
|
|
|
|
PG_RETURN_VOID();
|
|
}
|
|
|
|
/*
|
|
* Cache and return minmax multi opclass support procedure
|
|
*
|
|
* Return the procedure corresponding to the given function support number
|
|
* or null if it does not exist.
|
|
*/
|
|
static FmgrInfo *
|
|
minmax_multi_get_procinfo(BrinDesc *bdesc, uint16 attno, uint16 procnum)
|
|
{
|
|
MinmaxMultiOpaque *opaque;
|
|
uint16 basenum = procnum - PROCNUM_BASE;
|
|
|
|
/*
|
|
* We cache these in the opaque struct, to avoid repetitive syscache
|
|
* lookups.
|
|
*/
|
|
opaque = (MinmaxMultiOpaque *) bdesc->bd_info[attno - 1]->oi_opaque;
|
|
|
|
/*
|
|
* If we already searched for this proc and didn't find it, don't bother
|
|
* searching again.
|
|
*/
|
|
if (opaque->extra_proc_missing[basenum])
|
|
return NULL;
|
|
|
|
if (opaque->extra_procinfos[basenum].fn_oid == InvalidOid)
|
|
{
|
|
if (RegProcedureIsValid(index_getprocid(bdesc->bd_index, attno,
|
|
procnum)))
|
|
{
|
|
fmgr_info_copy(&opaque->extra_procinfos[basenum],
|
|
index_getprocinfo(bdesc->bd_index, attno, procnum),
|
|
bdesc->bd_context);
|
|
}
|
|
else
|
|
{
|
|
opaque->extra_proc_missing[basenum] = true;
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
return &opaque->extra_procinfos[basenum];
|
|
}
|
|
|
|
/*
|
|
* Cache and return the procedure for the given strategy.
|
|
*
|
|
* Note: this function mirrors minmax_multi_get_strategy_procinfo; see notes
|
|
* there. If changes are made here, see that function too.
|
|
*/
|
|
static FmgrInfo *
|
|
minmax_multi_get_strategy_procinfo(BrinDesc *bdesc, uint16 attno, Oid subtype,
|
|
uint16 strategynum)
|
|
{
|
|
MinmaxMultiOpaque *opaque;
|
|
|
|
Assert(strategynum >= 1 &&
|
|
strategynum <= BTMaxStrategyNumber);
|
|
|
|
opaque = (MinmaxMultiOpaque *) bdesc->bd_info[attno - 1]->oi_opaque;
|
|
|
|
/*
|
|
* We cache the procedures for the previous subtype in the opaque struct,
|
|
* to avoid repetitive syscache lookups. If the subtype changed,
|
|
* invalidate all the cached entries.
|
|
*/
|
|
if (opaque->cached_subtype != subtype)
|
|
{
|
|
uint16 i;
|
|
|
|
for (i = 1; i <= BTMaxStrategyNumber; i++)
|
|
opaque->strategy_procinfos[i - 1].fn_oid = InvalidOid;
|
|
opaque->cached_subtype = subtype;
|
|
}
|
|
|
|
if (opaque->strategy_procinfos[strategynum - 1].fn_oid == InvalidOid)
|
|
{
|
|
Form_pg_attribute attr;
|
|
HeapTuple tuple;
|
|
Oid opfamily,
|
|
oprid;
|
|
|
|
opfamily = bdesc->bd_index->rd_opfamily[attno - 1];
|
|
attr = TupleDescAttr(bdesc->bd_tupdesc, attno - 1);
|
|
tuple = SearchSysCache4(AMOPSTRATEGY, ObjectIdGetDatum(opfamily),
|
|
ObjectIdGetDatum(attr->atttypid),
|
|
ObjectIdGetDatum(subtype),
|
|
Int16GetDatum(strategynum));
|
|
if (!HeapTupleIsValid(tuple))
|
|
elog(ERROR, "missing operator %d(%u,%u) in opfamily %u",
|
|
strategynum, attr->atttypid, subtype, opfamily);
|
|
|
|
oprid = DatumGetObjectId(SysCacheGetAttrNotNull(AMOPSTRATEGY, tuple,
|
|
Anum_pg_amop_amopopr));
|
|
ReleaseSysCache(tuple);
|
|
Assert(RegProcedureIsValid(oprid));
|
|
|
|
fmgr_info_cxt(get_opcode(oprid),
|
|
&opaque->strategy_procinfos[strategynum - 1],
|
|
bdesc->bd_context);
|
|
}
|
|
|
|
return &opaque->strategy_procinfos[strategynum - 1];
|
|
}
|
|
|
|
Datum
|
|
brin_minmax_multi_options(PG_FUNCTION_ARGS)
|
|
{
|
|
local_relopts *relopts = (local_relopts *) PG_GETARG_POINTER(0);
|
|
|
|
init_local_reloptions(relopts, sizeof(MinMaxMultiOptions));
|
|
|
|
add_local_int_reloption(relopts, "values_per_range", "desc",
|
|
MINMAX_MULTI_DEFAULT_VALUES_PER_PAGE, 8, 256,
|
|
offsetof(MinMaxMultiOptions, valuesPerRange));
|
|
|
|
PG_RETURN_VOID();
|
|
}
|
|
|
|
/*
|
|
* brin_minmax_multi_summary_in
|
|
* - input routine for type brin_minmax_multi_summary.
|
|
*
|
|
* brin_minmax_multi_summary is only used internally to represent summaries
|
|
* in BRIN minmax-multi indexes, so it has no operations of its own, and we
|
|
* disallow input too.
|
|
*/
|
|
Datum
|
|
brin_minmax_multi_summary_in(PG_FUNCTION_ARGS)
|
|
{
|
|
/*
|
|
* brin_minmax_multi_summary stores the data in binary form and parsing
|
|
* text input is not needed, so disallow this.
|
|
*/
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
|
errmsg("cannot accept a value of type %s", "brin_minmax_multi_summary")));
|
|
|
|
PG_RETURN_VOID(); /* keep compiler quiet */
|
|
}
|
|
|
|
|
|
/*
|
|
* brin_minmax_multi_summary_out
|
|
* - output routine for type brin_minmax_multi_summary.
|
|
*
|
|
* BRIN minmax-multi summaries are serialized into a bytea value, but we
|
|
* want to output something nicer humans can understand.
|
|
*/
|
|
Datum
|
|
brin_minmax_multi_summary_out(PG_FUNCTION_ARGS)
|
|
{
|
|
int i;
|
|
int idx;
|
|
SerializedRanges *ranges;
|
|
Ranges *ranges_deserialized;
|
|
StringInfoData str;
|
|
bool isvarlena;
|
|
Oid outfunc;
|
|
FmgrInfo fmgrinfo;
|
|
ArrayBuildState *astate_values = NULL;
|
|
|
|
initStringInfo(&str);
|
|
appendStringInfoChar(&str, '{');
|
|
|
|
/*
|
|
* Detoast to get value with full 4B header (can't be stored in a toast
|
|
* table, but can use 1B header).
|
|
*/
|
|
ranges = (SerializedRanges *) PG_DETOAST_DATUM_PACKED(PG_GETARG_DATUM(0));
|
|
|
|
/* lookup output func for the type */
|
|
getTypeOutputInfo(ranges->typid, &outfunc, &isvarlena);
|
|
fmgr_info(outfunc, &fmgrinfo);
|
|
|
|
/* deserialize the range info easy-to-process pieces */
|
|
ranges_deserialized = brin_range_deserialize(ranges->maxvalues, ranges);
|
|
|
|
appendStringInfo(&str, "nranges: %d nvalues: %d maxvalues: %d",
|
|
ranges_deserialized->nranges,
|
|
ranges_deserialized->nvalues,
|
|
ranges_deserialized->maxvalues);
|
|
|
|
/* serialize ranges */
|
|
idx = 0;
|
|
for (i = 0; i < ranges_deserialized->nranges; i++)
|
|
{
|
|
char *a,
|
|
*b;
|
|
text *c;
|
|
StringInfoData buf;
|
|
|
|
initStringInfo(&buf);
|
|
|
|
a = OutputFunctionCall(&fmgrinfo, ranges_deserialized->values[idx++]);
|
|
b = OutputFunctionCall(&fmgrinfo, ranges_deserialized->values[idx++]);
|
|
|
|
appendStringInfo(&buf, "%s ... %s", a, b);
|
|
|
|
c = cstring_to_text_with_len(buf.data, buf.len);
|
|
|
|
astate_values = accumArrayResult(astate_values,
|
|
PointerGetDatum(c),
|
|
false,
|
|
TEXTOID,
|
|
CurrentMemoryContext);
|
|
}
|
|
|
|
if (ranges_deserialized->nranges > 0)
|
|
{
|
|
Oid typoutput;
|
|
bool typIsVarlena;
|
|
Datum val;
|
|
char *extval;
|
|
|
|
getTypeOutputInfo(ANYARRAYOID, &typoutput, &typIsVarlena);
|
|
|
|
val = makeArrayResult(astate_values, CurrentMemoryContext);
|
|
|
|
extval = OidOutputFunctionCall(typoutput, val);
|
|
|
|
appendStringInfo(&str, " ranges: %s", extval);
|
|
}
|
|
|
|
/* serialize individual values */
|
|
astate_values = NULL;
|
|
|
|
for (i = 0; i < ranges_deserialized->nvalues; i++)
|
|
{
|
|
Datum a;
|
|
text *b;
|
|
|
|
a = FunctionCall1(&fmgrinfo, ranges_deserialized->values[idx++]);
|
|
b = cstring_to_text(DatumGetCString(a));
|
|
|
|
astate_values = accumArrayResult(astate_values,
|
|
PointerGetDatum(b),
|
|
false,
|
|
TEXTOID,
|
|
CurrentMemoryContext);
|
|
}
|
|
|
|
if (ranges_deserialized->nvalues > 0)
|
|
{
|
|
Oid typoutput;
|
|
bool typIsVarlena;
|
|
Datum val;
|
|
char *extval;
|
|
|
|
getTypeOutputInfo(ANYARRAYOID, &typoutput, &typIsVarlena);
|
|
|
|
val = makeArrayResult(astate_values, CurrentMemoryContext);
|
|
|
|
extval = OidOutputFunctionCall(typoutput, val);
|
|
|
|
appendStringInfo(&str, " values: %s", extval);
|
|
}
|
|
|
|
|
|
appendStringInfoChar(&str, '}');
|
|
|
|
PG_RETURN_CSTRING(str.data);
|
|
}
|
|
|
|
/*
|
|
* brin_minmax_multi_summary_recv
|
|
* - binary input routine for type brin_minmax_multi_summary.
|
|
*/
|
|
Datum
|
|
brin_minmax_multi_summary_recv(PG_FUNCTION_ARGS)
|
|
{
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
|
errmsg("cannot accept a value of type %s", "brin_minmax_multi_summary")));
|
|
|
|
PG_RETURN_VOID(); /* keep compiler quiet */
|
|
}
|
|
|
|
/*
|
|
* brin_minmax_multi_summary_send
|
|
* - binary output routine for type brin_minmax_multi_summary.
|
|
*
|
|
* BRIN minmax-multi summaries are serialized in a bytea value (although
|
|
* the type is named differently), so let's just send that.
|
|
*/
|
|
Datum
|
|
brin_minmax_multi_summary_send(PG_FUNCTION_ARGS)
|
|
{
|
|
return byteasend(fcinfo);
|
|
}
|