/* * brin_minmax_multi.c * Implementation of Multi Min/Max opclass for BRIN * * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * Implements a variant of minmax opclass, where the summary is composed of * multiple smaller intervals. This allows us to handle outliers, which * usually make the simple minmax opclass inefficient. * * Consider for example page range with simple minmax interval [1000,2000], * and assume a new row gets inserted into the range with value 1000000. * Due to that the interval gets [1000,1000000]. I.e. the minmax interval * got 1000x wider and won't be useful to eliminate scan keys between 2001 * and 1000000. * * With minmax-multi opclass, we may have [1000,2000] interval initially, * but after adding the new row we start tracking it as two interval: * * [1000,2000] and [1000000,1000000] * * This allows us to still eliminate the page range when the scan keys hit * the gap between 2000 and 1000000, making it useful in cases when the * simple minmax opclass gets inefficient. * * The number of intervals tracked per page range is somewhat flexible. * What is restricted is the number of values per page range, and the limit * is currently 32 (see values_per_range reloption). Collapsed intervals * (with equal minimum and maximum value) are stored as a single value, * while regular intervals require two values. * * When the number of values gets too high (by adding new values to the * summary), we merge some of the intervals to free space for more values. * This is done in a greedy way - we simply pick the two closest intervals, * merge them, and repeat this until the number of values to store gets * sufficiently low (below 50% of maximum values), but that is mostly * arbitrary threshold and may be changed easily). * * To pick the closest intervals we use the "distance" support procedure, * which measures space between two ranges (i.e. the length of an interval). * The computed value may be an approximation - in the worst case we will * merge two ranges that are slightly less optimal at that step, but the * index should still produce correct results. * * The compactions (reducing the number of values) is fairly expensive, as * it requires calling the distance functions, sorting etc. So when building * the summary, we use a significantly larger buffer, and only enforce the * exact limit at the very end. This improves performance, and it also helps * with building better ranges (due to the greedy approach). * * * IDENTIFICATION * src/backend/access/brin/brin_minmax_multi.c */ #include "postgres.h" /* needed for PGSQL_AF_INET */ #include #include "access/genam.h" #include "access/brin.h" #include "access/brin_internal.h" #include "access/brin_tuple.h" #include "access/reloptions.h" #include "access/stratnum.h" #include "access/htup_details.h" #include "catalog/pg_type.h" #include "catalog/pg_am.h" #include "catalog/pg_amop.h" #include "utils/array.h" #include "utils/builtins.h" #include "utils/date.h" #include "utils/datum.h" #include "utils/float.h" #include "utils/inet.h" #include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/numeric.h" #include "utils/pg_lsn.h" #include "utils/rel.h" #include "utils/syscache.h" #include "utils/timestamp.h" #include "utils/uuid.h" /* * Additional SQL level support functions * * Procedure numbers must not use values reserved for BRIN itself; see * brin_internal.h. */ #define MINMAX_MAX_PROCNUMS 1 /* maximum support procs we need */ #define PROCNUM_DISTANCE 11 /* required, distance between values */ /* * Subtract this from procnum to obtain index in MinmaxMultiOpaque arrays * (Must be equal to minimum of private procnums). */ #define PROCNUM_BASE 11 /* * Sizing the insert buffer - we use 10x the number of values specified * in the reloption, but we cap it to 8192 not to get too large. When * the buffer gets full, we reduce the number of values by half. */ #define MINMAX_BUFFER_FACTOR 10 #define MINMAX_BUFFER_MIN 256 #define MINMAX_BUFFER_MAX 8192 #define MINMAX_BUFFER_LOAD_FACTOR 0.5 typedef struct MinmaxMultiOpaque { FmgrInfo extra_procinfos[MINMAX_MAX_PROCNUMS]; bool extra_proc_missing[MINMAX_MAX_PROCNUMS]; Oid cached_subtype; FmgrInfo strategy_procinfos[BTMaxStrategyNumber]; } MinmaxMultiOpaque; /* * Storage type for BRIN's minmax reloptions */ typedef struct MinMaxMultiOptions { int32 vl_len_; /* varlena header (do not touch directly!) */ int valuesPerRange; /* number of values per range */ } MinMaxMultiOptions; #define MINMAX_MULTI_DEFAULT_VALUES_PER_PAGE 32 #define MinMaxMultiGetValuesPerRange(opts) \ ((opts) && (((MinMaxMultiOptions *) (opts))->valuesPerRange != 0) ? \ ((MinMaxMultiOptions *) (opts))->valuesPerRange : \ MINMAX_MULTI_DEFAULT_VALUES_PER_PAGE) #define SAMESIGN(a,b) (((a) < 0) == ((b) < 0)) /* * The summary of minmax-multi indexes has two representations - Ranges for * convenient processing, and SerializedRanges for storage in bytea value. * * The Ranges struct stores the boundary values in a single array, but we * treat regular and single-point ranges differently to save space. For * regular ranges (with different boundary values) we have to store both * the lower and upper bound of the range, while for "single-point ranges" * we only need to store a single value. * * The 'values' array stores boundary values for regular ranges first (there * are 2*nranges values to store), and then the nvalues boundary values for * single-point ranges. That is, we have (2*nranges + nvalues) boundary * values in the array. * * +-------------------------+----------------------------------+ * | ranges (2 * nranges of) | single point values (nvalues of) | * +-------------------------+----------------------------------+ * * This allows us to quickly add new values, and store outliers without * having to widen any of the existing range values. * * 'nsorted' denotes how many of 'nvalues' in the values[] array are sorted. * When nsorted == nvalues, all single point values are sorted. * * We never store more than maxvalues values (as set by values_per_range * reloption). If needed we merge some of the ranges. * * To minimize palloc overhead, we always allocate the full array with * space for maxvalues elements. This should be fine as long as the * maxvalues is reasonably small (64 seems fine), which is the case * thanks to values_per_range reloption being limited to 256. */ typedef struct Ranges { /* Cache information that we need quite often. */ Oid typid; Oid colloid; AttrNumber attno; FmgrInfo *cmp; /* (2*nranges + nvalues) <= maxvalues */ int nranges; /* number of ranges in the values[] array */ int nsorted; /* number of nvalues which are sorted */ int nvalues; /* number of point values in values[] array */ int maxvalues; /* number of elements in the values[] array */ /* * We simply add the values into a large buffer, without any expensive * steps (sorting, deduplication, ...). The buffer is a multiple of the * target number of values, so the compaction happens less often, * amortizing the costs. We keep the actual target and compact to the * requested number of values at the very end, before serializing to * on-disk representation. */ /* requested number of values */ int target_maxvalues; /* values stored for this range - either raw values, or ranges */ Datum values[FLEXIBLE_ARRAY_MEMBER]; } Ranges; /* * On-disk the summary is stored as a bytea value, with a simple header * with basic metadata, followed by the boundary values. It has a varlena * header, so can be treated as varlena directly. * * See brin_range_serialize/brin_range_deserialize for serialization details. */ typedef struct SerializedRanges { /* varlena header (do not touch directly!) */ int32 vl_len_; /* type of values stored in the data array */ Oid typid; /* (2*nranges + nvalues) <= maxvalues */ int nranges; /* number of ranges in the array (stored) */ int nvalues; /* number of values in the data array (all) */ int maxvalues; /* maximum number of values (reloption) */ /* contains the actual data */ char data[FLEXIBLE_ARRAY_MEMBER]; } SerializedRanges; static SerializedRanges *brin_range_serialize(Ranges *range); static Ranges *brin_range_deserialize(int maxvalues, SerializedRanges *serialized); /* * Used to represent ranges expanded to make merging and combining easier. * * Each expanded range is essentially an interval, represented by min/max * values, along with a flag whether it's a collapsed range (in which case * the min and max values are equal). We have the flag to handle by-ref * data types - we can't simply compare the datums, and this saves some * calls to the type-specific comparator function. */ typedef struct ExpandedRange { Datum minval; /* lower boundary */ Datum maxval; /* upper boundary */ bool collapsed; /* true if minval==maxval */ } ExpandedRange; /* * Represents a distance between two ranges (identified by index into * an array of extended ranges). */ typedef struct DistanceValue { int index; double value; } DistanceValue; /* Cache for support and strategy procedures. */ static FmgrInfo *minmax_multi_get_procinfo(BrinDesc *bdesc, uint16 attno, uint16 procnum); static FmgrInfo *minmax_multi_get_strategy_procinfo(BrinDesc *bdesc, uint16 attno, Oid subtype, uint16 strategynum); typedef struct compare_context { FmgrInfo *cmpFn; Oid colloid; } compare_context; static int compare_values(const void *a, const void *b, void *arg); #ifdef USE_ASSERT_CHECKING /* * Check that the order of the array values is correct, using the cmp * function (which should be BTLessStrategyNumber). */ static void AssertArrayOrder(FmgrInfo *cmp, Oid colloid, Datum *values, int nvalues) { int i; Datum lt; for (i = 0; i < (nvalues - 1); i++) { lt = FunctionCall2Coll(cmp, colloid, values[i], values[i + 1]); Assert(DatumGetBool(lt)); } } #endif /* * Comprehensive check of the Ranges structure. */ static void AssertCheckRanges(Ranges *ranges, FmgrInfo *cmpFn, Oid colloid) { #ifdef USE_ASSERT_CHECKING int i; /* some basic sanity checks */ Assert(ranges->nranges >= 0); Assert(ranges->nsorted >= 0); Assert(ranges->nvalues >= ranges->nsorted); Assert(ranges->maxvalues >= 2 * ranges->nranges + ranges->nvalues); Assert(ranges->typid != InvalidOid); /* * First the ranges - there are 2*nranges boundary values, and the values * have to be strictly ordered (equal values would mean the range is * collapsed, and should be stored as a point). This also guarantees that * the ranges do not overlap. */ AssertArrayOrder(cmpFn, colloid, ranges->values, 2 * ranges->nranges); /* then the single-point ranges (with nvalues boundary values ) */ AssertArrayOrder(cmpFn, colloid, &ranges->values[2 * ranges->nranges], ranges->nsorted); /* * Check that none of the values are not covered by ranges (both sorted * and unsorted) */ if (ranges->nranges > 0) { for (i = 0; i < ranges->nvalues; i++) { Datum compar; int start, end; Datum minvalue = ranges->values[0]; Datum maxvalue = ranges->values[2 * ranges->nranges - 1]; Datum value = ranges->values[2 * ranges->nranges + i]; compar = FunctionCall2Coll(cmpFn, colloid, value, minvalue); /* * If the value is smaller than the lower bound in the first range * then it cannot possibly be in any of the ranges. */ if (DatumGetBool(compar)) continue; compar = FunctionCall2Coll(cmpFn, colloid, maxvalue, value); /* * Likewise, if the value is larger than the upper bound of the * final range, then it cannot possibly be inside any of the * ranges. */ if (DatumGetBool(compar)) continue; /* bsearch the ranges to see if 'value' fits within any of them */ start = 0; /* first range */ end = ranges->nranges - 1; /* last range */ while (true) { int midpoint = (start + end) / 2; /* this means we ran out of ranges in the last step */ if (start > end) break; /* copy the min/max values from the ranges */ minvalue = ranges->values[2 * midpoint]; maxvalue = ranges->values[2 * midpoint + 1]; /* * Is the value smaller than the minval? If yes, we'll recurse * to the left side of range array. */ compar = FunctionCall2Coll(cmpFn, colloid, value, minvalue); /* smaller than the smallest value in this range */ if (DatumGetBool(compar)) { end = (midpoint - 1); continue; } /* * Is the value greater than the minval? If yes, we'll recurse * to the right side of range array. */ compar = FunctionCall2Coll(cmpFn, colloid, maxvalue, value); /* larger than the largest value in this range */ if (DatumGetBool(compar)) { start = (midpoint + 1); continue; } /* hey, we found a matching range */ Assert(false); } } } /* and values in the unsorted part must not be in the sorted part */ if (ranges->nsorted > 0) { compare_context cxt; cxt.colloid = ranges->colloid; cxt.cmpFn = ranges->cmp; for (i = ranges->nsorted; i < ranges->nvalues; i++) { Datum value = ranges->values[2 * ranges->nranges + i]; Assert(bsearch_arg(&value, &ranges->values[2 * ranges->nranges], ranges->nsorted, sizeof(Datum), compare_values, (void *) &cxt) == NULL); } } #endif } /* * Check that the expanded ranges (built when reducing the number of ranges * by combining some of them) are correctly sorted and do not overlap. */ static void AssertCheckExpandedRanges(BrinDesc *bdesc, Oid colloid, AttrNumber attno, Form_pg_attribute attr, ExpandedRange *ranges, int nranges) { #ifdef USE_ASSERT_CHECKING int i; FmgrInfo *eq; FmgrInfo *lt; eq = minmax_multi_get_strategy_procinfo(bdesc, attno, attr->atttypid, BTEqualStrategyNumber); lt = minmax_multi_get_strategy_procinfo(bdesc, attno, attr->atttypid, BTLessStrategyNumber); /* * Each range independently should be valid, i.e. that for the boundary * values (lower <= upper). */ for (i = 0; i < nranges; i++) { Datum r; Datum minval = ranges[i].minval; Datum maxval = ranges[i].maxval; if (ranges[i].collapsed) /* collapsed: minval == maxval */ r = FunctionCall2Coll(eq, colloid, minval, maxval); else /* non-collapsed: minval < maxval */ r = FunctionCall2Coll(lt, colloid, minval, maxval); Assert(DatumGetBool(r)); } /* * And the ranges should be ordered and must not overlap, i.e. upper < * lower for boundaries of consecutive ranges. */ for (i = 0; i < nranges - 1; i++) { Datum r; Datum maxval = ranges[i].maxval; Datum minval = ranges[i + 1].minval; r = FunctionCall2Coll(lt, colloid, maxval, minval); Assert(DatumGetBool(r)); } #endif } /* * minmax_multi_init * Initialize the deserialized range list, allocate all the memory. * * This is only in-memory representation of the ranges, so we allocate * enough space for the maximum number of values (so as not to have to do * repallocs as the ranges grow). */ static Ranges * minmax_multi_init(int maxvalues) { Size len; Ranges *ranges; Assert(maxvalues > 0); len = offsetof(Ranges, values); /* fixed header */ len += maxvalues * sizeof(Datum); /* Datum values */ ranges = (Ranges *) palloc0(len); ranges->maxvalues = maxvalues; return ranges; } /* * range_deduplicate_values * Deduplicate the part with values in the simple points. * * This is meant to be a cheaper way of reducing the size of the ranges. It * does not touch the ranges, and only sorts the other values - it does not * call the distance functions, which may be quite expensive, etc. * * We do know the values are not duplicate with the ranges, because we check * that before adding a new value. Same for the sorted part of values. */ static void range_deduplicate_values(Ranges *range) { int i, n; int start; compare_context cxt; /* * If there are no unsorted values, we're done (this probably can't * happen, as we're adding values to unsorted part). */ if (range->nsorted == range->nvalues) return; /* sort the values */ cxt.colloid = range->colloid; cxt.cmpFn = range->cmp; /* the values start right after the ranges (which are always sorted) */ start = 2 * range->nranges; /* * XXX This might do a merge sort, to leverage that the first part of the * array is already sorted. If the sorted part is large, it might be quite * a bit faster. */ qsort_arg(&range->values[start], range->nvalues, sizeof(Datum), compare_values, &cxt); n = 1; for (i = 1; i < range->nvalues; i++) { /* same as preceding value, so store it */ if (compare_values(&range->values[start + i - 1], &range->values[start + i], (void *) &cxt) == 0) continue; range->values[start + n] = range->values[start + i]; n++; } /* now all the values are sorted */ range->nvalues = n; range->nsorted = n; AssertCheckRanges(range, range->cmp, range->colloid); } /* * brin_range_serialize * Serialize the in-memory representation into a compact varlena value. * * Simply copy the header and then also the individual values, as stored * in the in-memory value array. */ static SerializedRanges * brin_range_serialize(Ranges *range) { Size len; int nvalues; SerializedRanges *serialized; Oid typid; int typlen; bool typbyval; char *ptr; /* simple sanity checks */ Assert(range->nranges >= 0); Assert(range->nsorted >= 0); Assert(range->nvalues >= 0); Assert(range->maxvalues > 0); Assert(range->target_maxvalues > 0); /* at this point the range should be compacted to the target size */ Assert(2 * range->nranges + range->nvalues <= range->target_maxvalues); Assert(range->target_maxvalues <= range->maxvalues); /* range boundaries are always sorted */ Assert(range->nvalues >= range->nsorted); /* deduplicate values, if there's unsorted part */ range_deduplicate_values(range); /* see how many Datum values we actually have */ nvalues = 2 * range->nranges + range->nvalues; typid = range->typid; typbyval = get_typbyval(typid); typlen = get_typlen(typid); /* header is always needed */ len = offsetof(SerializedRanges, data); /* * The space needed depends on data type - for fixed-length data types * (by-value and some by-reference) it's pretty simple, just multiply * (attlen * nvalues) and we're done. For variable-length by-reference * types we need to actually walk all the values and sum the lengths. */ if (typlen == -1) /* varlena */ { int i; for (i = 0; i < nvalues; i++) { len += VARSIZE_ANY(range->values[i]); } } else if (typlen == -2) /* cstring */ { int i; for (i = 0; i < nvalues; i++) { /* don't forget to include the null terminator ;-) */ len += strlen(DatumGetCString(range->values[i])) + 1; } } else /* fixed-length types (even by-reference) */ { Assert(typlen > 0); len += nvalues * typlen; } /* * Allocate the serialized object, copy the basic information. The * serialized object is a varlena, so update the header. */ serialized = (SerializedRanges *) palloc0(len); SET_VARSIZE(serialized, len); serialized->typid = typid; serialized->nranges = range->nranges; serialized->nvalues = range->nvalues; serialized->maxvalues = range->target_maxvalues; /* * And now copy also the boundary values (like the length calculation this * depends on the particular data type). */ ptr = serialized->data; /* start of the serialized data */ for (int i = 0; i < nvalues; i++) { if (typbyval) /* simple by-value data types */ { Datum tmp; /* * For byval types, we need to copy just the significant bytes - * we can't use memcpy directly, as that assumes little-endian * behavior. store_att_byval does almost what we need, but it * requires a properly aligned buffer - the output buffer does not * guarantee that. So we simply use a local Datum variable (which * guarantees proper alignment), and then copy the value from it. */ store_att_byval(&tmp, range->values[i], typlen); memcpy(ptr, &tmp, typlen); ptr += typlen; } else if (typlen > 0) /* fixed-length by-ref types */ { memcpy(ptr, DatumGetPointer(range->values[i]), typlen); ptr += typlen; } else if (typlen == -1) /* varlena */ { int tmp = VARSIZE_ANY(DatumGetPointer(range->values[i])); memcpy(ptr, DatumGetPointer(range->values[i]), tmp); ptr += tmp; } else if (typlen == -2) /* cstring */ { int tmp = strlen(DatumGetCString(range->values[i])) + 1; memcpy(ptr, DatumGetCString(range->values[i]), tmp); ptr += tmp; } /* make sure we haven't overflown the buffer end */ Assert(ptr <= ((char *) serialized + len)); } /* exact size */ Assert(ptr == ((char *) serialized + len)); return serialized; } /* * brin_range_deserialize * Serialize the in-memory representation into a compact varlena value. * * Simply copy the header and then also the individual values, as stored * in the in-memory value array. */ static Ranges * brin_range_deserialize(int maxvalues, SerializedRanges *serialized) { int i, nvalues; char *ptr, *dataptr; bool typbyval; int typlen; Size datalen; Ranges *range; Assert(serialized->nranges >= 0); Assert(serialized->nvalues >= 0); Assert(serialized->maxvalues > 0); nvalues = 2 * serialized->nranges + serialized->nvalues; Assert(nvalues <= serialized->maxvalues); Assert(serialized->maxvalues <= maxvalues); range = minmax_multi_init(maxvalues); /* copy the header info */ range->nranges = serialized->nranges; range->nvalues = serialized->nvalues; range->nsorted = serialized->nvalues; range->maxvalues = maxvalues; range->target_maxvalues = serialized->maxvalues; range->typid = serialized->typid; typbyval = get_typbyval(serialized->typid); typlen = get_typlen(serialized->typid); /* * And now deconstruct the values into Datum array. We have to copy the * data because the serialized representation ignores alignment, and we * don't want to rely on it being kept around anyway. */ ptr = serialized->data; /* * We don't want to allocate many pieces, so we just allocate everything * in one chunk. How much space will we need? * * XXX We don't need to copy simple by-value data types. */ datalen = 0; dataptr = NULL; for (i = 0; (i < nvalues) && (!typbyval); i++) { if (typlen > 0) /* fixed-length by-ref types */ datalen += MAXALIGN(typlen); else if (typlen == -1) /* varlena */ { datalen += MAXALIGN(VARSIZE_ANY(ptr)); ptr += VARSIZE_ANY(ptr); } else if (typlen == -2) /* cstring */ { Size slen = strlen(ptr) + 1; datalen += MAXALIGN(slen); ptr += slen; } } if (datalen > 0) dataptr = palloc(datalen); /* * Restore the source pointer (might have been modified when calculating * the space we need to allocate). */ ptr = serialized->data; for (i = 0; i < nvalues; i++) { if (typbyval) /* simple by-value data types */ { Datum v = 0; memcpy(&v, ptr, typlen); range->values[i] = fetch_att(&v, true, typlen); ptr += typlen; } else if (typlen > 0) /* fixed-length by-ref types */ { range->values[i] = PointerGetDatum(dataptr); memcpy(dataptr, ptr, typlen); dataptr += MAXALIGN(typlen); ptr += typlen; } else if (typlen == -1) /* varlena */ { range->values[i] = PointerGetDatum(dataptr); memcpy(dataptr, ptr, VARSIZE_ANY(ptr)); dataptr += MAXALIGN(VARSIZE_ANY(ptr)); ptr += VARSIZE_ANY(ptr); } else if (typlen == -2) /* cstring */ { Size slen = strlen(ptr) + 1; range->values[i] = PointerGetDatum(dataptr); memcpy(dataptr, ptr, slen); dataptr += MAXALIGN(slen); ptr += slen; } /* make sure we haven't overflown the buffer end */ Assert(ptr <= ((char *) serialized + VARSIZE_ANY(serialized))); } /* should have consumed the whole input value exactly */ Assert(ptr == ((char *) serialized + VARSIZE_ANY(serialized))); /* return the deserialized value */ return range; } /* * compare_expanded_ranges * Compare the expanded ranges - first by minimum, then by maximum. * * We do guarantee that ranges in a single Ranges object do not overlap, so it * may seem strange that we don't order just by minimum. But when merging two * Ranges (which happens in the union function), the ranges may in fact * overlap. So we do compare both. */ static int compare_expanded_ranges(const void *a, const void *b, void *arg) { ExpandedRange *ra = (ExpandedRange *) a; ExpandedRange *rb = (ExpandedRange *) b; Datum r; compare_context *cxt = (compare_context *) arg; /* first compare minvals */ r = FunctionCall2Coll(cxt->cmpFn, cxt->colloid, ra->minval, rb->minval); if (DatumGetBool(r)) return -1; r = FunctionCall2Coll(cxt->cmpFn, cxt->colloid, rb->minval, ra->minval); if (DatumGetBool(r)) return 1; /* then compare maxvals */ r = FunctionCall2Coll(cxt->cmpFn, cxt->colloid, ra->maxval, rb->maxval); if (DatumGetBool(r)) return -1; r = FunctionCall2Coll(cxt->cmpFn, cxt->colloid, rb->maxval, ra->maxval); if (DatumGetBool(r)) return 1; return 0; } /* * compare_values * Compare the values. */ static int compare_values(const void *a, const void *b, void *arg) { Datum *da = (Datum *) a; Datum *db = (Datum *) b; Datum r; compare_context *cxt = (compare_context *) arg; r = FunctionCall2Coll(cxt->cmpFn, cxt->colloid, *da, *db); if (DatumGetBool(r)) return -1; r = FunctionCall2Coll(cxt->cmpFn, cxt->colloid, *db, *da); if (DatumGetBool(r)) return 1; return 0; } /* * Check if the new value matches one of the existing ranges. */ static bool has_matching_range(BrinDesc *bdesc, Oid colloid, Ranges *ranges, Datum newval, AttrNumber attno, Oid typid) { Datum compar; Datum minvalue; Datum maxvalue; FmgrInfo *cmpLessFn; FmgrInfo *cmpGreaterFn; /* binary search on ranges */ int start, end; if (ranges->nranges == 0) return false; minvalue = ranges->values[0]; maxvalue = ranges->values[2 * ranges->nranges - 1]; /* * Otherwise, need to compare the new value with boundaries of all the * ranges. First check if it's less than the absolute minimum, which is * the first value in the array. */ cmpLessFn = minmax_multi_get_strategy_procinfo(bdesc, attno, typid, BTLessStrategyNumber); compar = FunctionCall2Coll(cmpLessFn, colloid, newval, minvalue); /* smaller than the smallest value in the range list */ if (DatumGetBool(compar)) return false; /* * And now compare it to the existing maximum (last value in the data * array). But only if we haven't already ruled out a possible match in * the minvalue check. */ cmpGreaterFn = minmax_multi_get_strategy_procinfo(bdesc, attno, typid, BTGreaterStrategyNumber); compar = FunctionCall2Coll(cmpGreaterFn, colloid, newval, maxvalue); if (DatumGetBool(compar)) return false; /* * So we know it's in the general min/max, the question is whether it * falls in one of the ranges or gaps. We'll do a binary search on * individual ranges - for each range we check equality (value falls into * the range), and then check ranges either above or below the current * range. */ start = 0; /* first range */ end = (ranges->nranges - 1); /* last range */ while (true) { int midpoint = (start + end) / 2; /* this means we ran out of ranges in the last step */ if (start > end) return false; /* copy the min/max values from the ranges */ minvalue = ranges->values[2 * midpoint]; maxvalue = ranges->values[2 * midpoint + 1]; /* * Is the value smaller than the minval? If yes, we'll recurse to the * left side of range array. */ compar = FunctionCall2Coll(cmpLessFn, colloid, newval, minvalue); /* smaller than the smallest value in this range */ if (DatumGetBool(compar)) { end = (midpoint - 1); continue; } /* * Is the value greater than the minval? If yes, we'll recurse to the * right side of range array. */ compar = FunctionCall2Coll(cmpGreaterFn, colloid, newval, maxvalue); /* larger than the largest value in this range */ if (DatumGetBool(compar)) { start = (midpoint + 1); continue; } /* hey, we found a matching range */ return true; } return false; } /* * range_contains_value * See if the new value is already contained in the range list. * * We first inspect the list of intervals. We use a small trick - we check * the value against min/max of the whole range (min of the first interval, * max of the last one) first, and only inspect the individual intervals if * this passes. * * If the value matches none of the intervals, we check the exact values. * We simply loop through them and invoke equality operator on them. * * The last parameter (full) determines whether we need to search all the * values, including the unsorted part. With full=false, the unsorted part * is not searched, which may produce false negatives and duplicate values * (in the unsorted part only), but when we're building the range that's * fine - we'll deduplicate before serialization, and it can only happen * if there already are unsorted values (so it was already modified). * * Serialized ranges don't have any unsorted values, so this can't cause * false negatives during querying. */ static bool range_contains_value(BrinDesc *bdesc, Oid colloid, AttrNumber attno, Form_pg_attribute attr, Ranges *ranges, Datum newval, bool full) { int i; FmgrInfo *cmpEqualFn; Oid typid = attr->atttypid; /* * First inspect the ranges, if there are any. We first check the whole * range, and only when there's still a chance of getting a match we * inspect the individual ranges. */ if (has_matching_range(bdesc, colloid, ranges, newval, attno, typid)) return true; cmpEqualFn = minmax_multi_get_strategy_procinfo(bdesc, attno, typid, BTEqualStrategyNumber); /* * There is no matching range, so let's inspect the sorted values. * * We do a sequential search for small numbers of values, and binary * search once we have more than 16 values. This threshold is somewhat * arbitrary, as it depends on how expensive the comparison function is. * * XXX If we use the threshold here, maybe we should do the same thing in * has_matching_range? Or maybe we should do the bin search all the time? * * XXX We could use the same optimization as for ranges, to check if the * value is between min/max, to maybe rule out all sorted values without * having to inspect all of them. */ if (ranges->nsorted >= 16) { compare_context cxt; cxt.colloid = ranges->colloid; cxt.cmpFn = ranges->cmp; if (bsearch_arg(&newval, &ranges->values[2 * ranges->nranges], ranges->nsorted, sizeof(Datum), compare_values, (void *) &cxt) != NULL) return true; } else { for (i = 2 * ranges->nranges; i < 2 * ranges->nranges + ranges->nsorted; i++) { Datum compar; compar = FunctionCall2Coll(cmpEqualFn, colloid, newval, ranges->values[i]); /* found an exact match */ if (DatumGetBool(compar)) return true; } } /* If not asked to inspect the unsorted part, we're done. */ if (!full) return false; /* Inspect the unsorted part. */ for (i = 2 * ranges->nranges + ranges->nsorted; i < 2 * ranges->nranges + ranges->nvalues; i++) { Datum compar; compar = FunctionCall2Coll(cmpEqualFn, colloid, newval, ranges->values[i]); /* found an exact match */ if (DatumGetBool(compar)) return true; } /* the value is not covered by this BRIN tuple */ return false; } /* * Expand ranges from Ranges into ExpandedRange array. This expects the * eranges to be pre-allocated and with the correct size - there needs to be * (nranges + nvalues) elements. * * The order of expanded ranges is arbitrary. We do expand the ranges first, * and this part is sorted. But then we expand the values, and this part may * be unsorted. */ static void fill_expanded_ranges(ExpandedRange *eranges, int neranges, Ranges *ranges) { int idx; int i; /* Check that the output array has the right size. */ Assert(neranges == (ranges->nranges + ranges->nvalues)); idx = 0; for (i = 0; i < ranges->nranges; i++) { eranges[idx].minval = ranges->values[2 * i]; eranges[idx].maxval = ranges->values[2 * i + 1]; eranges[idx].collapsed = false; idx++; Assert(idx <= neranges); } for (i = 0; i < ranges->nvalues; i++) { eranges[idx].minval = ranges->values[2 * ranges->nranges + i]; eranges[idx].maxval = ranges->values[2 * ranges->nranges + i]; eranges[idx].collapsed = true; idx++; Assert(idx <= neranges); } /* Did we produce the expected number of elements? */ Assert(idx == neranges); return; } /* * Sort and deduplicate expanded ranges. * * The ranges may be deduplicated - we're simply appending values, without * checking for duplicates etc. So maybe the deduplication will reduce the * number of ranges enough, and we won't have to compute the distances etc. * * Returns the number of expanded ranges. */ static int sort_expanded_ranges(FmgrInfo *cmp, Oid colloid, ExpandedRange *eranges, int neranges) { int n; int i; compare_context cxt; Assert(neranges > 0); /* sort the values */ cxt.colloid = colloid; cxt.cmpFn = cmp; /* * XXX We do qsort on all the values, but we could also leverage the fact * that some of the input data is already sorted (all the ranges and maybe * some of the points) and do merge sort. */ qsort_arg(eranges, neranges, sizeof(ExpandedRange), compare_expanded_ranges, &cxt); /* * Deduplicate the ranges - simply compare each range to the preceding * one, and skip the duplicate ones. */ n = 1; for (i = 1; i < neranges; i++) { /* if the current range is equal to the preceding one, do nothing */ if (!compare_expanded_ranges(&eranges[i - 1], &eranges[i], (void *) &cxt)) continue; /* otherwise, copy it to n-th place (if not already there) */ if (i != n) memcpy(&eranges[n], &eranges[i], sizeof(ExpandedRange)); n++; } Assert((n > 0) && (n <= neranges)); return n; } /* * When combining multiple Range values (in union function), some of the * ranges may overlap. We simply merge the overlapping ranges to fix that. * * XXX This assumes the expanded ranges were previously sorted (by minval * and then maxval). We leverage this when detecting overlap. */ static int merge_overlapping_ranges(FmgrInfo *cmp, Oid colloid, ExpandedRange *eranges, int neranges) { int idx; /* Merge ranges (idx) and (idx+1) if they overlap. */ idx = 0; while (idx < (neranges - 1)) { Datum r; /* * comparing [?,maxval] vs. [minval,?] - the ranges overlap if (minval * < maxval) */ r = FunctionCall2Coll(cmp, colloid, eranges[idx].maxval, eranges[idx + 1].minval); /* * Nope, maxval < minval, so no overlap. And we know the ranges are * ordered, so there are no more overlaps, because all the remaining * ranges have greater or equal minval. */ if (DatumGetBool(r)) { /* proceed to the next range */ idx += 1; continue; } /* * So ranges 'idx' and 'idx+1' do overlap, but we don't know if * 'idx+1' is contained in 'idx', or if they overlap only partially. * So compare the upper bounds and keep the larger one. */ r = FunctionCall2Coll(cmp, colloid, eranges[idx].maxval, eranges[idx + 1].maxval); if (DatumGetBool(r)) eranges[idx].maxval = eranges[idx + 1].maxval; /* * The range certainly is no longer collapsed (irrespectively of the * previous state). */ eranges[idx].collapsed = false; /* * Now get rid of the (idx+1) range entirely by shifting the remaining * ranges by 1. There are neranges elements, and we need to move * elements from (idx+2). That means the number of elements to move is * [ncranges - (idx+2)]. */ memmove(&eranges[idx + 1], &eranges[idx + 2], (neranges - (idx + 2)) * sizeof(ExpandedRange)); /* * Decrease the number of ranges, and repeat (with the same range, as * it might overlap with additional ranges thanks to the merge). */ neranges--; } return neranges; } /* * Simple comparator for distance values, comparing the double value. * This is intentionally sorting the distances in descending order, i.e. * the longer gaps will be at the front. */ static int compare_distances(const void *a, const void *b) { DistanceValue *da = (DistanceValue *) a; DistanceValue *db = (DistanceValue *) b; if (da->value < db->value) return 1; else if (da->value > db->value) return -1; return 0; } /* * Given an array of expanded ranges, compute size of the gaps between each * range. For neranges there are (neranges-1) gaps. * * We simply call the "distance" function to compute the (max-min) for pairs * of consecutive ranges. The function may be fairly expensive, so we do that * just once (and then use it to pick as many ranges to merge as possible). * * See reduce_expanded_ranges for details. */ static DistanceValue * build_distances(FmgrInfo *distanceFn, Oid colloid, ExpandedRange *eranges, int neranges) { int i; int ndistances; DistanceValue *distances; Assert(neranges > 0); /* If there's only a single range, there's no distance to calculate. */ if (neranges == 1) return NULL; ndistances = (neranges - 1); distances = (DistanceValue *) palloc0(sizeof(DistanceValue) * ndistances); /* * Walk through the ranges once and compute the distance between the * ranges so that we can sort them once. */ for (i = 0; i < ndistances; i++) { Datum a1, a2, r; a1 = eranges[i].maxval; a2 = eranges[i + 1].minval; /* compute length of the gap (between max/min) */ r = FunctionCall2Coll(distanceFn, colloid, a1, a2); /* remember the index of the gap the distance is for */ distances[i].index = i; distances[i].value = DatumGetFloat8(r); } /* * Sort the distances in descending order, so that the longest gaps are at * the front. */ pg_qsort(distances, ndistances, sizeof(DistanceValue), compare_distances); return distances; } /* * Builds expanded ranges for the existing ranges (and single-point ranges), * and also the new value (which did not fit into the array). This expanded * representation makes the processing a bit easier, as it allows handling * ranges and points the same way. * * We sort and deduplicate the expanded ranges - this is necessary, because * the points may be unsorted. And moreover the two parts (ranges and * points) are sorted on their own. */ static ExpandedRange * build_expanded_ranges(FmgrInfo *cmp, Oid colloid, Ranges *ranges, int *nranges) { int neranges; ExpandedRange *eranges; /* both ranges and points are expanded into a separate element */ neranges = ranges->nranges + ranges->nvalues; eranges = (ExpandedRange *) palloc0(neranges * sizeof(ExpandedRange)); /* fill the expanded ranges */ fill_expanded_ranges(eranges, neranges, ranges); /* sort and deduplicate the expanded ranges */ neranges = sort_expanded_ranges(cmp, colloid, eranges, neranges); /* remember how many ranges we built */ *nranges = neranges; return eranges; } #ifdef USE_ASSERT_CHECKING /* * Counts boundary values needed to store the ranges. Each single-point * range is stored using a single value, each regular range needs two. */ static int count_values(ExpandedRange *cranges, int ncranges) { int i; int count; count = 0; for (i = 0; i < ncranges; i++) { if (cranges[i].collapsed) count += 1; else count += 2; } return count; } #endif /* * reduce_expanded_ranges * reduce the ranges until the number of values is low enough * * Combines ranges until the number of boundary values drops below the * threshold specified by max_values. This happens by merging enough * ranges by the distance between them. * * Returns the number of result ranges. * * We simply use the global min/max and then add boundaries for enough * largest gaps. Each gap adds 2 values, so we simply use (target/2-1) * distances. Then we simply sort all the values - each two values are * a boundary of a range (possibly collapsed). * * XXX Some of the ranges may be collapsed (i.e. the min/max values are * equal), but we ignore that for now. We could repeat the process, * adding a couple more gaps recursively. * * XXX The ranges to merge are selected solely using the distance. But * that may not be the best strategy, for example when multiple gaps * are of equal (or very similar) length. * * Consider for example points 1, 2, 3, .., 64, which have gaps of the * same length 1 of course. In that case, we tend to pick the first * gap of that length, which leads to this: * * step 1: [1, 2], 3, 4, 5, .., 64 * step 2: [1, 3], 4, 5, .., 64 * step 3: [1, 4], 5, .., 64 * ... * * So in the end we'll have one "large" range and multiple small points. * That may be fine, but it seems a bit strange and non-optimal. Maybe * we should consider other things when picking ranges to merge - e.g. * length of the ranges? Or perhaps randomize the choice of ranges, with * probability inversely proportional to the distance (the gap lengths * may be very close, but not exactly the same). * * XXX Or maybe we could just handle this by using random value as a * tie-break, or by adding random noise to the actual distance. */ static int reduce_expanded_ranges(ExpandedRange *eranges, int neranges, DistanceValue *distances, int max_values, FmgrInfo *cmp, Oid colloid) { int i; int nvalues; Datum *values; compare_context cxt; /* total number of gaps between ranges */ int ndistances = (neranges - 1); /* number of gaps to keep */ int keep = (max_values / 2 - 1); /* * Maybe we have a sufficiently low number of ranges already? * * XXX This should happen before we actually do the expensive stuff like * sorting, so maybe this should be just an assert. */ if (keep >= ndistances) return neranges; /* sort the values */ cxt.colloid = colloid; cxt.cmpFn = cmp; /* allocate space for the boundary values */ nvalues = 0; values = (Datum *) palloc(sizeof(Datum) * max_values); /* add the global min/max values, from the first/last range */ values[nvalues++] = eranges[0].minval; values[nvalues++] = eranges[neranges - 1].maxval; /* add boundary values for enough gaps */ for (i = 0; i < keep; i++) { /* index of the gap between (index) and (index+1) ranges */ int index = distances[i].index; Assert((index >= 0) && ((index + 1) < neranges)); /* add max from the preceding range, minval from the next one */ values[nvalues++] = eranges[index].maxval; values[nvalues++] = eranges[index + 1].minval; Assert(nvalues <= max_values); } /* We should have an even number of range values. */ Assert(nvalues % 2 == 0); /* * Sort the values using the comparator function, and form ranges from the * sorted result. */ qsort_arg(values, nvalues, sizeof(Datum), compare_values, &cxt); /* We have nvalues boundary values, which means nvalues/2 ranges. */ for (i = 0; i < (nvalues / 2); i++) { eranges[i].minval = values[2 * i]; eranges[i].maxval = values[2 * i + 1]; /* if the boundary values are the same, it's a collapsed range */ eranges[i].collapsed = (compare_values(&values[2 * i], &values[2 * i + 1], &cxt) == 0); } return (nvalues / 2); } /* * Store the boundary values from ExpandedRanges back into 'ranges' (using * only the minimal number of values needed). */ static void store_expanded_ranges(Ranges *ranges, ExpandedRange *eranges, int neranges) { int i; int idx = 0; /* first copy in the regular ranges */ ranges->nranges = 0; for (i = 0; i < neranges; i++) { if (!eranges[i].collapsed) { ranges->values[idx++] = eranges[i].minval; ranges->values[idx++] = eranges[i].maxval; ranges->nranges++; } } /* now copy in the collapsed ones */ ranges->nvalues = 0; for (i = 0; i < neranges; i++) { if (eranges[i].collapsed) { ranges->values[idx++] = eranges[i].minval; ranges->nvalues++; } } /* all the values are sorted */ ranges->nsorted = ranges->nvalues; Assert(count_values(eranges, neranges) == 2 * ranges->nranges + ranges->nvalues); Assert(2 * ranges->nranges + ranges->nvalues <= ranges->maxvalues); } /* * Consider freeing space in the ranges. Checks if there's space for at least * one new value, and performs compaction if needed. * * Returns true if the value was actually modified. */ static bool ensure_free_space_in_buffer(BrinDesc *bdesc, Oid colloid, AttrNumber attno, Form_pg_attribute attr, Ranges *range) { MemoryContext ctx; MemoryContext oldctx; FmgrInfo *cmpFn, *distanceFn; /* expanded ranges */ ExpandedRange *eranges; int neranges; DistanceValue *distances; /* * If there is free space in the buffer, we're done without having to * modify anything. */ if (2 * range->nranges + range->nvalues < range->maxvalues) return false; /* we'll certainly need the comparator, so just look it up now */ cmpFn = minmax_multi_get_strategy_procinfo(bdesc, attno, attr->atttypid, BTLessStrategyNumber); /* deduplicate values, if there's an unsorted part */ range_deduplicate_values(range); /* * Did we reduce enough free space by just the deduplication? * * We don't simply check against range->maxvalues again. The deduplication * might have freed very little space (e.g. just one value), forcing us to * do deduplication very often. In that case, it's better to do the * compaction and reduce more space. */ if (2 * range->nranges + range->nvalues <= range->maxvalues * MINMAX_BUFFER_LOAD_FACTOR) return true; /* * We need to combine some of the existing ranges, to reduce the number of * values we have to store. * * The distanceFn calls (which may internally call e.g. numeric_le) may * allocate quite a bit of memory, and we must not leak it (we might have * to do this repeatedly, even for a single BRIN page range). Otherwise * we'd have problems e.g. when building new indexes. So we use a memory * context and make sure we free the memory at the end (so if we call the * distance function many times, it might be an issue, but meh). */ ctx = AllocSetContextCreate(CurrentMemoryContext, "minmax-multi context", ALLOCSET_DEFAULT_SIZES); oldctx = MemoryContextSwitchTo(ctx); /* build the expanded ranges */ eranges = build_expanded_ranges(cmpFn, colloid, range, &neranges); /* and we'll also need the 'distance' procedure */ distanceFn = minmax_multi_get_procinfo(bdesc, attno, PROCNUM_DISTANCE); /* build array of gap distances and sort them in ascending order */ distances = build_distances(distanceFn, colloid, eranges, neranges); /* * Combine ranges until we release at least 50% of the space. This * threshold is somewhat arbitrary, perhaps needs tuning. We must not use * too low or high value. */ neranges = reduce_expanded_ranges(eranges, neranges, distances, range->maxvalues * MINMAX_BUFFER_LOAD_FACTOR, cmpFn, colloid); /* Make sure we've sufficiently reduced the number of ranges. */ Assert(count_values(eranges, neranges) <= range->maxvalues * MINMAX_BUFFER_LOAD_FACTOR); /* decompose the expanded ranges into regular ranges and single values */ store_expanded_ranges(range, eranges, neranges); MemoryContextSwitchTo(oldctx); MemoryContextDelete(ctx); /* Did we break the ranges somehow? */ AssertCheckRanges(range, cmpFn, colloid); return true; } /* * range_add_value * Add the new value to the minmax-multi range. */ static bool range_add_value(BrinDesc *bdesc, Oid colloid, AttrNumber attno, Form_pg_attribute attr, Ranges *ranges, Datum newval) { FmgrInfo *cmpFn; bool modified = false; /* we'll certainly need the comparator, so just look it up now */ cmpFn = minmax_multi_get_strategy_procinfo(bdesc, attno, attr->atttypid, BTLessStrategyNumber); /* comprehensive checks of the input ranges */ AssertCheckRanges(ranges, cmpFn, colloid); /* * Make sure there's enough free space in the buffer. We only trigger this * when the buffer is full, which means it had to be modified as we size * it to be larger than what is stored on disk. * * This needs to happen before we check if the value is contained in the * range, because the value might be in the unsorted part, and we don't * check that in range_contains_value. The deduplication would then move * it to the sorted part, and we'd add the value too, which violates the * rule that we never have duplicates with the ranges or sorted values. * * We might also deduplicate and recheck if the value is contained, but * that seems like overkill. We'd need to deduplicate anyway, so why not * do it now. */ modified = ensure_free_space_in_buffer(bdesc, colloid, attno, attr, ranges); /* * Bail out if the value already is covered by the range. * * We could also add values until we hit values_per_range, and then do the * deduplication in a batch, hoping for better efficiency. But that would * mean we actually modify the range every time, which means having to * serialize the value, which does palloc, walks the values, copies them, * etc. Not exactly cheap. * * So instead we do the check, which should be fairly cheap - assuming the * comparator function is not very expensive. * * This also implies the values array can't contain duplicate values. */ if (range_contains_value(bdesc, colloid, attno, attr, ranges, newval, false)) return modified; /* Make a copy of the value, if needed. */ newval = datumCopy(newval, attr->attbyval, attr->attlen); /* * If there's space in the values array, copy it in and we're done. * * We do want to keep the values sorted (to speed up searches), so we do a * simple insertion sort. We could do something more elaborate, e.g. by * sorting the values only now and then, but for small counts (e.g. when * maxvalues is 64) this should be fine. */ ranges->values[2 * ranges->nranges + ranges->nvalues] = newval; ranges->nvalues++; /* If we added the first value, we can consider it as sorted. */ if (ranges->nvalues == 1) ranges->nsorted = 1; /* * Check we haven't broken the ordering of boundary values (checks both * parts, but that doesn't hurt). */ AssertCheckRanges(ranges, cmpFn, colloid); /* Check the range contains the value we just added. */ Assert(range_contains_value(bdesc, colloid, attno, attr, ranges, newval, true)); /* yep, we've modified the range */ return true; } /* * Generate range representation of data collected during "batch mode". * This is similar to reduce_expanded_ranges, except that we can't assume * the values are sorted and there may be duplicate values. */ static void compactify_ranges(BrinDesc *bdesc, Ranges *ranges, int max_values) { FmgrInfo *cmpFn, *distanceFn; /* expanded ranges */ ExpandedRange *eranges; int neranges; DistanceValue *distances; MemoryContext ctx; MemoryContext oldctx; /* * Do we need to actually compactify anything? * * There are two reasons why compaction may be needed - firstly, there may * be too many values, or some of the values may be unsorted. */ if ((ranges->nranges * 2 + ranges->nvalues <= max_values) && (ranges->nsorted == ranges->nvalues)) return; /* we'll certainly need the comparator, so just look it up now */ cmpFn = minmax_multi_get_strategy_procinfo(bdesc, ranges->attno, ranges->typid, BTLessStrategyNumber); /* and we'll also need the 'distance' procedure */ distanceFn = minmax_multi_get_procinfo(bdesc, ranges->attno, PROCNUM_DISTANCE); /* * The distanceFn calls (which may internally call e.g. numeric_le) may * allocate quite a bit of memory, and we must not leak it. Otherwise, * we'd have problems e.g. when building indexes. So we create a local * memory context and make sure we free the memory before leaving this * function (not after every call). */ ctx = AllocSetContextCreate(CurrentMemoryContext, "minmax-multi context", ALLOCSET_DEFAULT_SIZES); oldctx = MemoryContextSwitchTo(ctx); /* build the expanded ranges */ eranges = build_expanded_ranges(cmpFn, ranges->colloid, ranges, &neranges); /* build array of gap distances and sort them in ascending order */ distances = build_distances(distanceFn, ranges->colloid, eranges, neranges); /* * Combine ranges until we get below max_values. We don't use any scale * factor, because this is used during serialization, and we don't expect * more tuples to be inserted anytime soon. */ neranges = reduce_expanded_ranges(eranges, neranges, distances, max_values, cmpFn, ranges->colloid); Assert(count_values(eranges, neranges) <= max_values); /* transform back into regular ranges and single values */ store_expanded_ranges(ranges, eranges, neranges); /* check all the range invariants */ AssertCheckRanges(ranges, cmpFn, ranges->colloid); MemoryContextSwitchTo(oldctx); MemoryContextDelete(ctx); } Datum brin_minmax_multi_opcinfo(PG_FUNCTION_ARGS) { BrinOpcInfo *result; /* * opaque->strategy_procinfos is initialized lazily; here it is set to * all-uninitialized by palloc0 which sets fn_oid to InvalidOid. */ result = palloc0(MAXALIGN(SizeofBrinOpcInfo(1)) + sizeof(MinmaxMultiOpaque)); result->oi_nstored = 1; result->oi_regular_nulls = true; result->oi_opaque = (MinmaxMultiOpaque *) MAXALIGN((char *) result + SizeofBrinOpcInfo(1)); result->oi_typcache[0] = lookup_type_cache(PG_BRIN_MINMAX_MULTI_SUMMARYOID, 0); PG_RETURN_POINTER(result); } /* * Compute the distance between two float4 values (plain subtraction). */ Datum brin_minmax_multi_distance_float4(PG_FUNCTION_ARGS) { float a1 = PG_GETARG_FLOAT4(0); float a2 = PG_GETARG_FLOAT4(1); /* if both values are NaN, then we consider them the same */ if (isnan(a1) && isnan(a2)) PG_RETURN_FLOAT8(0.0); /* if one value is NaN, use infinite distance */ if (isnan(a1) || isnan(a2)) PG_RETURN_FLOAT8(get_float8_infinity()); /* * We know the values are range boundaries, but the range may be collapsed * (i.e. single points), with equal values. */ Assert(a1 <= a2); PG_RETURN_FLOAT8((double) a2 - (double) a1); } /* * Compute the distance between two float8 values (plain subtraction). */ Datum brin_minmax_multi_distance_float8(PG_FUNCTION_ARGS) { double a1 = PG_GETARG_FLOAT8(0); double a2 = PG_GETARG_FLOAT8(1); /* if both values are NaN, then we consider them the same */ if (isnan(a1) && isnan(a2)) PG_RETURN_FLOAT8(0.0); /* if one value is NaN, use infinite distance */ if (isnan(a1) || isnan(a2)) PG_RETURN_FLOAT8(get_float8_infinity()); /* * We know the values are range boundaries, but the range may be collapsed * (i.e. single points), with equal values. */ Assert(a1 <= a2); PG_RETURN_FLOAT8(a2 - a1); } /* * Compute the distance between two int2 values (plain subtraction). */ Datum brin_minmax_multi_distance_int2(PG_FUNCTION_ARGS) { int16 a1 = PG_GETARG_INT16(0); int16 a2 = PG_GETARG_INT16(1); /* * We know the values are range boundaries, but the range may be collapsed * (i.e. single points), with equal values. */ Assert(a1 <= a2); PG_RETURN_FLOAT8((double) a2 - (double) a1); } /* * Compute the distance between two int4 values (plain subtraction). */ Datum brin_minmax_multi_distance_int4(PG_FUNCTION_ARGS) { int32 a1 = PG_GETARG_INT32(0); int32 a2 = PG_GETARG_INT32(1); /* * We know the values are range boundaries, but the range may be collapsed * (i.e. single points), with equal values. */ Assert(a1 <= a2); PG_RETURN_FLOAT8((double) a2 - (double) a1); } /* * Compute the distance between two int8 values (plain subtraction). */ Datum brin_minmax_multi_distance_int8(PG_FUNCTION_ARGS) { int64 a1 = PG_GETARG_INT64(0); int64 a2 = PG_GETARG_INT64(1); /* * We know the values are range boundaries, but the range may be collapsed * (i.e. single points), with equal values. */ Assert(a1 <= a2); PG_RETURN_FLOAT8((double) a2 - (double) a1); } /* * Compute the distance between two tid values (by mapping them to float8 and * then subtracting them). */ Datum brin_minmax_multi_distance_tid(PG_FUNCTION_ARGS) { double da1, da2; ItemPointer pa1 = (ItemPointer) PG_GETARG_DATUM(0); ItemPointer pa2 = (ItemPointer) PG_GETARG_DATUM(1); /* * We know the values are range boundaries, but the range may be collapsed * (i.e. single points), with equal values. */ Assert(ItemPointerCompare(pa1, pa2) <= 0); /* * We use the no-check variants here, because user-supplied values may * have (ip_posid == 0). See ItemPointerCompare. */ da1 = ItemPointerGetBlockNumberNoCheck(pa1) * MaxHeapTuplesPerPage + ItemPointerGetOffsetNumberNoCheck(pa1); da2 = ItemPointerGetBlockNumberNoCheck(pa2) * MaxHeapTuplesPerPage + ItemPointerGetOffsetNumberNoCheck(pa2); PG_RETURN_FLOAT8(da2 - da1); } /* * Compute the distance between two numeric values (plain subtraction). */ Datum brin_minmax_multi_distance_numeric(PG_FUNCTION_ARGS) { Datum d; Datum a1 = PG_GETARG_DATUM(0); Datum a2 = PG_GETARG_DATUM(1); /* * We know the values are range boundaries, but the range may be collapsed * (i.e. single points), with equal values. */ Assert(DatumGetBool(DirectFunctionCall2(numeric_le, a1, a2))); d = DirectFunctionCall2(numeric_sub, a2, a1); /* a2 - a1 */ PG_RETURN_FLOAT8(DirectFunctionCall1(numeric_float8, d)); } /* * Compute the approximate distance between two UUID values. * * XXX We do not need a perfectly accurate value, so we approximate the * deltas (which would have to be 128-bit integers) with a 64-bit float. * The small inaccuracies do not matter in practice, in the worst case * we'll decide to merge ranges that are not the closest ones. */ Datum brin_minmax_multi_distance_uuid(PG_FUNCTION_ARGS) { int i; float8 delta = 0; Datum a1 = PG_GETARG_DATUM(0); Datum a2 = PG_GETARG_DATUM(1); pg_uuid_t *u1 = DatumGetUUIDP(a1); pg_uuid_t *u2 = DatumGetUUIDP(a2); /* * We know the values are range boundaries, but the range may be collapsed * (i.e. single points), with equal values. */ Assert(DatumGetBool(DirectFunctionCall2(uuid_le, a1, a2))); /* compute approximate delta as a double precision value */ for (i = UUID_LEN - 1; i >= 0; i--) { delta += (int) u2->data[i] - (int) u1->data[i]; delta /= 256; } Assert(delta >= 0); PG_RETURN_FLOAT8(delta); } /* * Compute the approximate distance between two dates. */ Datum brin_minmax_multi_distance_date(PG_FUNCTION_ARGS) { DateADT dateVal1 = PG_GETARG_DATEADT(0); DateADT dateVal2 = PG_GETARG_DATEADT(1); if (DATE_NOT_FINITE(dateVal1) || DATE_NOT_FINITE(dateVal2)) PG_RETURN_FLOAT8(0); PG_RETURN_FLOAT8(dateVal1 - dateVal2); } /* * Compute the approximate distance between two time (without tz) values. * * TimeADT is just an int64, so we simply subtract the values directly. */ Datum brin_minmax_multi_distance_time(PG_FUNCTION_ARGS) { float8 delta = 0; TimeADT ta = PG_GETARG_TIMEADT(0); TimeADT tb = PG_GETARG_TIMEADT(1); delta = (tb - ta); Assert(delta >= 0); PG_RETURN_FLOAT8(delta); } /* * Compute the approximate distance between two timetz values. * * Simply subtracts the TimeADT (int64) values embedded in TimeTzADT. */ Datum brin_minmax_multi_distance_timetz(PG_FUNCTION_ARGS) { float8 delta = 0; TimeTzADT *ta = PG_GETARG_TIMETZADT_P(0); TimeTzADT *tb = PG_GETARG_TIMETZADT_P(1); delta = (tb->time - ta->time) + (tb->zone - ta->zone) * USECS_PER_SEC; Assert(delta >= 0); PG_RETURN_FLOAT8(delta); } /* * Compute the distance between two timestamp values. */ Datum brin_minmax_multi_distance_timestamp(PG_FUNCTION_ARGS) { float8 delta = 0; Timestamp dt1 = PG_GETARG_TIMESTAMP(0); Timestamp dt2 = PG_GETARG_TIMESTAMP(1); if (TIMESTAMP_NOT_FINITE(dt1) || TIMESTAMP_NOT_FINITE(dt2)) PG_RETURN_FLOAT8(0); delta = dt2 - dt1; Assert(delta >= 0); PG_RETURN_FLOAT8(delta); } /* * Compute the distance between two interval values. */ Datum brin_minmax_multi_distance_interval(PG_FUNCTION_ARGS) { float8 delta = 0; Interval *ia = PG_GETARG_INTERVAL_P(0); Interval *ib = PG_GETARG_INTERVAL_P(1); Interval *result; int64 dayfraction; int64 days; result = (Interval *) palloc(sizeof(Interval)); result->month = ib->month - ia->month; /* overflow check copied from int4mi */ if (!SAMESIGN(ib->month, ia->month) && !SAMESIGN(result->month, ib->month)) ereport(ERROR, (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), errmsg("interval out of range"))); result->day = ib->day - ia->day; if (!SAMESIGN(ib->day, ia->day) && !SAMESIGN(result->day, ib->day)) ereport(ERROR, (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), errmsg("interval out of range"))); result->time = ib->time - ia->time; if (!SAMESIGN(ib->time, ia->time) && !SAMESIGN(result->time, ib->time)) ereport(ERROR, (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), errmsg("interval out of range"))); /* * Delta is (fractional) number of days between the intervals. Assume * months have 30 days for consistency with interval_cmp_internal. We * don't need to be exact, in the worst case we'll build a bit less * efficient ranges. But we should not contradict interval_cmp. */ dayfraction = result->time % USECS_PER_DAY; days = result->time / USECS_PER_DAY; days += result->month * INT64CONST(30); days += result->day; /* convert to double precision */ delta = (double) days + dayfraction / (double) USECS_PER_DAY; Assert(delta >= 0); PG_RETURN_FLOAT8(delta); } /* * Compute the distance between two pg_lsn values. * * LSN is just an int64 encoding position in the stream, so just subtract * those int64 values directly. */ Datum brin_minmax_multi_distance_pg_lsn(PG_FUNCTION_ARGS) { float8 delta = 0; XLogRecPtr lsna = PG_GETARG_LSN(0); XLogRecPtr lsnb = PG_GETARG_LSN(1); delta = (lsnb - lsna); Assert(delta >= 0); PG_RETURN_FLOAT8(delta); } /* * Compute the distance between two macaddr values. * * mac addresses are treated as 6 unsigned chars, so do the same thing we * already do for UUID values. */ Datum brin_minmax_multi_distance_macaddr(PG_FUNCTION_ARGS) { float8 delta; macaddr *a = PG_GETARG_MACADDR_P(0); macaddr *b = PG_GETARG_MACADDR_P(1); delta = ((float8) b->f - (float8) a->f); delta /= 256; delta += ((float8) b->e - (float8) a->e); delta /= 256; delta += ((float8) b->d - (float8) a->d); delta /= 256; delta += ((float8) b->c - (float8) a->c); delta /= 256; delta += ((float8) b->b - (float8) a->b); delta /= 256; delta += ((float8) b->a - (float8) a->a); delta /= 256; Assert(delta >= 0); PG_RETURN_FLOAT8(delta); } /* * Compute the distance between two macaddr8 values. * * macaddr8 addresses are 8 unsigned chars, so do the same thing we * already do for UUID values. */ Datum brin_minmax_multi_distance_macaddr8(PG_FUNCTION_ARGS) { float8 delta; macaddr8 *a = PG_GETARG_MACADDR8_P(0); macaddr8 *b = PG_GETARG_MACADDR8_P(1); delta = ((float8) b->h - (float8) a->h); delta /= 256; delta += ((float8) b->g - (float8) a->g); delta /= 256; delta += ((float8) b->f - (float8) a->f); delta /= 256; delta += ((float8) b->e - (float8) a->e); delta /= 256; delta += ((float8) b->d - (float8) a->d); delta /= 256; delta += ((float8) b->c - (float8) a->c); delta /= 256; delta += ((float8) b->b - (float8) a->b); delta /= 256; delta += ((float8) b->a - (float8) a->a); delta /= 256; Assert(delta >= 0); PG_RETURN_FLOAT8(delta); } /* * Compute the distance between two inet values. * * The distance is defined as the difference between 32-bit/128-bit values, * depending on the IP version. The distance is computed by subtracting * the bytes and normalizing it to [0,1] range for each IP family. * Addresses from different families are considered to be in maximum * distance, which is 1.0. * * XXX Does this need to consider the mask (bits)? For now, it's ignored. */ Datum brin_minmax_multi_distance_inet(PG_FUNCTION_ARGS) { float8 delta; int i; int len; unsigned char *addra, *addrb; inet *ipa = PG_GETARG_INET_PP(0); inet *ipb = PG_GETARG_INET_PP(1); int lena, lenb; /* * If the addresses are from different families, consider them to be in * maximal possible distance (which is 1.0). */ if (ip_family(ipa) != ip_family(ipb)) PG_RETURN_FLOAT8(1.0); addra = (unsigned char *) palloc(ip_addrsize(ipa)); memcpy(addra, ip_addr(ipa), ip_addrsize(ipa)); addrb = (unsigned char *) palloc(ip_addrsize(ipb)); memcpy(addrb, ip_addr(ipb), ip_addrsize(ipb)); /* * The length is calculated from the mask length, because we sort the * addresses by first address in the range, so A.B.C.D/24 < A.B.C.1 (the * first range starts at A.B.C.0, which is before A.B.C.1). We don't want * to produce a negative delta in this case, so we just cut the extra * bytes. * * XXX Maybe this should be a bit more careful and cut the bits, not just * whole bytes. */ lena = ip_bits(ipa); lenb = ip_bits(ipb); len = ip_addrsize(ipa); /* apply the network mask to both addresses */ for (i = 0; i < len; i++) { unsigned char mask; int nbits; nbits = Max(0, lena - (i * 8)); if (nbits < 8) { mask = (0xFF << (8 - nbits)); addra[i] = (addra[i] & mask); } nbits = Max(0, lenb - (i * 8)); if (nbits < 8) { mask = (0xFF << (8 - nbits)); addrb[i] = (addrb[i] & mask); } } /* Calculate the difference between the addresses. */ delta = 0; for (i = len - 1; i >= 0; i--) { unsigned char a = addra[i]; unsigned char b = addrb[i]; delta += (float8) b - (float8) a; delta /= 256; } Assert((delta >= 0) && (delta <= 1)); pfree(addra); pfree(addrb); PG_RETURN_FLOAT8(delta); } static void brin_minmax_multi_serialize(BrinDesc *bdesc, Datum src, Datum *dst) { Ranges *ranges = (Ranges *) DatumGetPointer(src); SerializedRanges *s; /* * In batch mode, we need to compress the accumulated values to the * actually requested number of values/ranges. */ compactify_ranges(bdesc, ranges, ranges->target_maxvalues); /* At this point everything has to be fully sorted. */ Assert(ranges->nsorted == ranges->nvalues); s = brin_range_serialize(ranges); dst[0] = PointerGetDatum(s); } static int brin_minmax_multi_get_values(BrinDesc *bdesc, MinMaxMultiOptions *opts) { return MinMaxMultiGetValuesPerRange(opts); } /* * Examine the given index tuple (which contains the partial status of a * certain page range) by comparing it to the given value that comes from * another heap tuple. If the new value is outside the min/max range * specified by the existing tuple values, update the index tuple and return * true. Otherwise, return false and do not modify in this case. */ Datum brin_minmax_multi_add_value(PG_FUNCTION_ARGS) { BrinDesc *bdesc = (BrinDesc *) PG_GETARG_POINTER(0); BrinValues *column = (BrinValues *) PG_GETARG_POINTER(1); Datum newval = PG_GETARG_DATUM(2); bool isnull PG_USED_FOR_ASSERTS_ONLY = PG_GETARG_DATUM(3); MinMaxMultiOptions *opts = (MinMaxMultiOptions *) PG_GET_OPCLASS_OPTIONS(); Oid colloid = PG_GET_COLLATION(); bool modified = false; Form_pg_attribute attr; AttrNumber attno; Ranges *ranges; SerializedRanges *serialized = NULL; Assert(!isnull); attno = column->bv_attno; attr = TupleDescAttr(bdesc->bd_tupdesc, attno - 1); /* use the already deserialized value, if possible */ ranges = (Ranges *) DatumGetPointer(column->bv_mem_value); /* * If this is the first non-null value, we need to initialize the range * list. Otherwise, just extract the existing range list from BrinValues. * * When starting with an empty range, we assume this is a batch mode and * we use a larger buffer. The buffer size is derived from the BRIN range * size, number of rows per page, with some sensible min/max values. A * small buffer would be bad for performance, but a large buffer might * require a lot of memory (because of keeping all the values). */ if (column->bv_allnulls) { MemoryContext oldctx; int target_maxvalues; int maxvalues; BlockNumber pagesPerRange = BrinGetPagesPerRange(bdesc->bd_index); /* what was specified as a reloption? */ target_maxvalues = brin_minmax_multi_get_values(bdesc, opts); /* * Determine the insert buffer size - we use 10x the target, capped to * the maximum number of values in the heap range. This is more than * enough, considering the actual number of rows per page is likely * much lower, but meh. */ maxvalues = Min(target_maxvalues * MINMAX_BUFFER_FACTOR, MaxHeapTuplesPerPage * pagesPerRange); /* but always at least the original value */ maxvalues = Max(maxvalues, target_maxvalues); /* always cap by MIN/MAX */ maxvalues = Max(maxvalues, MINMAX_BUFFER_MIN); maxvalues = Min(maxvalues, MINMAX_BUFFER_MAX); oldctx = MemoryContextSwitchTo(column->bv_context); ranges = minmax_multi_init(maxvalues); ranges->attno = attno; ranges->colloid = colloid; ranges->typid = attr->atttypid; ranges->target_maxvalues = target_maxvalues; /* we'll certainly need the comparator, so just look it up now */ ranges->cmp = minmax_multi_get_strategy_procinfo(bdesc, attno, attr->atttypid, BTLessStrategyNumber); MemoryContextSwitchTo(oldctx); column->bv_allnulls = false; modified = true; column->bv_mem_value = PointerGetDatum(ranges); column->bv_serialize = brin_minmax_multi_serialize; } else if (!ranges) { MemoryContext oldctx; int maxvalues; BlockNumber pagesPerRange = BrinGetPagesPerRange(bdesc->bd_index); oldctx = MemoryContextSwitchTo(column->bv_context); serialized = (SerializedRanges *) PG_DETOAST_DATUM(column->bv_values[0]); /* * Determine the insert buffer size - we use 10x the target, capped to * the maximum number of values in the heap range. This is more than * enough, considering the actual number of rows per page is likely * much lower, but meh. */ maxvalues = Min(serialized->maxvalues * MINMAX_BUFFER_FACTOR, MaxHeapTuplesPerPage * pagesPerRange); /* but always at least the original value */ maxvalues = Max(maxvalues, serialized->maxvalues); /* always cap by MIN/MAX */ maxvalues = Max(maxvalues, MINMAX_BUFFER_MIN); maxvalues = Min(maxvalues, MINMAX_BUFFER_MAX); ranges = brin_range_deserialize(maxvalues, serialized); ranges->attno = attno; ranges->colloid = colloid; ranges->typid = attr->atttypid; /* we'll certainly need the comparator, so just look it up now */ ranges->cmp = minmax_multi_get_strategy_procinfo(bdesc, attno, attr->atttypid, BTLessStrategyNumber); column->bv_mem_value = PointerGetDatum(ranges); column->bv_serialize = brin_minmax_multi_serialize; MemoryContextSwitchTo(oldctx); } /* * Try to add the new value to the range. We need to update the modified * flag, so that we serialize the updated summary later. */ modified |= range_add_value(bdesc, colloid, attno, attr, ranges, newval); PG_RETURN_BOOL(modified); } /* * Given an index tuple corresponding to a certain page range and a scan key, * return whether the scan key is consistent with the index tuple's min/max * values. Return true if so, false otherwise. */ Datum brin_minmax_multi_consistent(PG_FUNCTION_ARGS) { BrinDesc *bdesc = (BrinDesc *) PG_GETARG_POINTER(0); BrinValues *column = (BrinValues *) PG_GETARG_POINTER(1); ScanKey *keys = (ScanKey *) PG_GETARG_POINTER(2); int nkeys = PG_GETARG_INT32(3); Oid colloid = PG_GET_COLLATION(), subtype; AttrNumber attno; Datum value; FmgrInfo *finfo; SerializedRanges *serialized; Ranges *ranges; int keyno; int rangeno; int i; attno = column->bv_attno; serialized = (SerializedRanges *) PG_DETOAST_DATUM(column->bv_values[0]); ranges = brin_range_deserialize(serialized->maxvalues, serialized); /* inspect the ranges, and for each one evaluate the scan keys */ for (rangeno = 0; rangeno < ranges->nranges; rangeno++) { Datum minval = ranges->values[2 * rangeno]; Datum maxval = ranges->values[2 * rangeno + 1]; /* assume the range is matching, and we'll try to prove otherwise */ bool matching = true; for (keyno = 0; keyno < nkeys; keyno++) { Datum matches; ScanKey key = keys[keyno]; /* NULL keys are handled and filtered-out in bringetbitmap */ Assert(!(key->sk_flags & SK_ISNULL)); attno = key->sk_attno; subtype = key->sk_subtype; value = key->sk_argument; switch (key->sk_strategy) { case BTLessStrategyNumber: case BTLessEqualStrategyNumber: finfo = minmax_multi_get_strategy_procinfo(bdesc, attno, subtype, key->sk_strategy); /* first value from the array */ matches = FunctionCall2Coll(finfo, colloid, minval, value); break; case BTEqualStrategyNumber: { Datum compar; FmgrInfo *cmpFn; /* by default this range does not match */ matches = false; /* * Otherwise, need to compare the new value with * boundaries of all the ranges. First check if it's * less than the absolute minimum, which is the first * value in the array. */ cmpFn = minmax_multi_get_strategy_procinfo(bdesc, attno, subtype, BTGreaterStrategyNumber); compar = FunctionCall2Coll(cmpFn, colloid, minval, value); /* smaller than the smallest value in this range */ if (DatumGetBool(compar)) break; cmpFn = minmax_multi_get_strategy_procinfo(bdesc, attno, subtype, BTLessStrategyNumber); compar = FunctionCall2Coll(cmpFn, colloid, maxval, value); /* larger than the largest value in this range */ if (DatumGetBool(compar)) break; /* * We haven't managed to eliminate this range, so * consider it matching. */ matches = true; break; } case BTGreaterEqualStrategyNumber: case BTGreaterStrategyNumber: finfo = minmax_multi_get_strategy_procinfo(bdesc, attno, subtype, key->sk_strategy); /* last value from the array */ matches = FunctionCall2Coll(finfo, colloid, maxval, value); break; default: /* shouldn't happen */ elog(ERROR, "invalid strategy number %d", key->sk_strategy); matches = 0; break; } /* the range has to match all the scan keys */ matching &= DatumGetBool(matches); /* once we find a non-matching key, we're done */ if (!matching) break; } /* * have we found a range matching all scan keys? if yes, we're done */ if (matching) PG_RETURN_DATUM(BoolGetDatum(true)); } /* * And now inspect the values. We don't bother with doing a binary search * here, because we're dealing with serialized / fully compacted ranges, * so there should be only very few values. */ for (i = 0; i < ranges->nvalues; i++) { Datum val = ranges->values[2 * ranges->nranges + i]; /* assume the range is matching, and we'll try to prove otherwise */ bool matching = true; for (keyno = 0; keyno < nkeys; keyno++) { Datum matches; ScanKey key = keys[keyno]; /* we've already dealt with NULL keys at the beginning */ if (key->sk_flags & SK_ISNULL) continue; attno = key->sk_attno; subtype = key->sk_subtype; value = key->sk_argument; switch (key->sk_strategy) { case BTLessStrategyNumber: case BTLessEqualStrategyNumber: case BTEqualStrategyNumber: case BTGreaterEqualStrategyNumber: case BTGreaterStrategyNumber: finfo = minmax_multi_get_strategy_procinfo(bdesc, attno, subtype, key->sk_strategy); matches = FunctionCall2Coll(finfo, colloid, val, value); break; default: /* shouldn't happen */ elog(ERROR, "invalid strategy number %d", key->sk_strategy); matches = 0; break; } /* the range has to match all the scan keys */ matching &= DatumGetBool(matches); /* once we find a non-matching key, we're done */ if (!matching) break; } /* have we found a range matching all scan keys? if yes, we're done */ if (matching) PG_RETURN_DATUM(BoolGetDatum(true)); } PG_RETURN_DATUM(BoolGetDatum(false)); } /* * Given two BrinValues, update the first of them as a union of the summary * values contained in both. The second one is untouched. */ Datum brin_minmax_multi_union(PG_FUNCTION_ARGS) { BrinDesc *bdesc = (BrinDesc *) PG_GETARG_POINTER(0); BrinValues *col_a = (BrinValues *) PG_GETARG_POINTER(1); BrinValues *col_b = (BrinValues *) PG_GETARG_POINTER(2); Oid colloid = PG_GET_COLLATION(); SerializedRanges *serialized_a; SerializedRanges *serialized_b; Ranges *ranges_a; Ranges *ranges_b; AttrNumber attno; Form_pg_attribute attr; ExpandedRange *eranges; int neranges; FmgrInfo *cmpFn, *distanceFn; DistanceValue *distances; MemoryContext ctx; MemoryContext oldctx; Assert(col_a->bv_attno == col_b->bv_attno); Assert(!col_a->bv_allnulls && !col_b->bv_allnulls); attno = col_a->bv_attno; attr = TupleDescAttr(bdesc->bd_tupdesc, attno - 1); serialized_a = (SerializedRanges *) PG_DETOAST_DATUM(col_a->bv_values[0]); serialized_b = (SerializedRanges *) PG_DETOAST_DATUM(col_b->bv_values[0]); ranges_a = brin_range_deserialize(serialized_a->maxvalues, serialized_a); ranges_b = brin_range_deserialize(serialized_b->maxvalues, serialized_b); /* make sure neither of the ranges is NULL */ Assert(ranges_a && ranges_b); neranges = (ranges_a->nranges + ranges_a->nvalues) + (ranges_b->nranges + ranges_b->nvalues); /* * The distanceFn calls (which may internally call e.g. numeric_le) may * allocate quite a bit of memory, and we must not leak it. Otherwise, * we'd have problems e.g. when building indexes. So we create a local * memory context and make sure we free the memory before leaving this * function (not after every call). */ ctx = AllocSetContextCreate(CurrentMemoryContext, "minmax-multi context", ALLOCSET_DEFAULT_SIZES); oldctx = MemoryContextSwitchTo(ctx); /* allocate and fill */ eranges = (ExpandedRange *) palloc0(neranges * sizeof(ExpandedRange)); /* fill the expanded ranges with entries for the first range */ fill_expanded_ranges(eranges, ranges_a->nranges + ranges_a->nvalues, ranges_a); /* and now add combine ranges for the second range */ fill_expanded_ranges(&eranges[ranges_a->nranges + ranges_a->nvalues], ranges_b->nranges + ranges_b->nvalues, ranges_b); cmpFn = minmax_multi_get_strategy_procinfo(bdesc, attno, attr->atttypid, BTLessStrategyNumber); /* sort the expanded ranges */ neranges = sort_expanded_ranges(cmpFn, colloid, eranges, neranges); /* * We've loaded two different lists of expanded ranges, so some of them * may be overlapping. So walk through them and merge them. */ neranges = merge_overlapping_ranges(cmpFn, colloid, eranges, neranges); /* check that the combine ranges are correct (no overlaps, ordering) */ AssertCheckExpandedRanges(bdesc, colloid, attno, attr, eranges, neranges); /* * If needed, reduce some of the ranges. * * XXX This may be fairly expensive, so maybe we should do it only when * it's actually needed (when we have too many ranges). */ /* build array of gap distances and sort them in ascending order */ distanceFn = minmax_multi_get_procinfo(bdesc, attno, PROCNUM_DISTANCE); distances = build_distances(distanceFn, colloid, eranges, neranges); /* * See how many values would be needed to store the current ranges, and if * needed combine as many of them to get below the threshold. The * collapsed ranges will be stored as a single value. * * XXX This does not apply the load factor, as we don't expect to add more * values to the range, so we prefer to keep as many ranges as possible. * * XXX Can the maxvalues be different in the two ranges? Perhaps we should * use maximum of those? */ neranges = reduce_expanded_ranges(eranges, neranges, distances, ranges_a->maxvalues, cmpFn, colloid); /* update the first range summary */ store_expanded_ranges(ranges_a, eranges, neranges); MemoryContextSwitchTo(oldctx); MemoryContextDelete(ctx); /* cleanup and update the serialized value */ pfree(serialized_a); col_a->bv_values[0] = PointerGetDatum(brin_range_serialize(ranges_a)); PG_RETURN_VOID(); } /* * Cache and return minmax multi opclass support procedure * * Return the procedure corresponding to the given function support number * or null if it does not exist. */ static FmgrInfo * minmax_multi_get_procinfo(BrinDesc *bdesc, uint16 attno, uint16 procnum) { MinmaxMultiOpaque *opaque; uint16 basenum = procnum - PROCNUM_BASE; /* * We cache these in the opaque struct, to avoid repetitive syscache * lookups. */ opaque = (MinmaxMultiOpaque *) bdesc->bd_info[attno - 1]->oi_opaque; /* * If we already searched for this proc and didn't find it, don't bother * searching again. */ if (opaque->extra_proc_missing[basenum]) return NULL; if (opaque->extra_procinfos[basenum].fn_oid == InvalidOid) { if (RegProcedureIsValid(index_getprocid(bdesc->bd_index, attno, procnum))) { fmgr_info_copy(&opaque->extra_procinfos[basenum], index_getprocinfo(bdesc->bd_index, attno, procnum), bdesc->bd_context); } else { opaque->extra_proc_missing[basenum] = true; return NULL; } } return &opaque->extra_procinfos[basenum]; } /* * Cache and return the procedure for the given strategy. * * Note: this function mirrors minmax_multi_get_strategy_procinfo; see notes * there. If changes are made here, see that function too. */ static FmgrInfo * minmax_multi_get_strategy_procinfo(BrinDesc *bdesc, uint16 attno, Oid subtype, uint16 strategynum) { MinmaxMultiOpaque *opaque; Assert(strategynum >= 1 && strategynum <= BTMaxStrategyNumber); opaque = (MinmaxMultiOpaque *) bdesc->bd_info[attno - 1]->oi_opaque; /* * We cache the procedures for the previous subtype in the opaque struct, * to avoid repetitive syscache lookups. If the subtype changed, * invalidate all the cached entries. */ if (opaque->cached_subtype != subtype) { uint16 i; for (i = 1; i <= BTMaxStrategyNumber; i++) opaque->strategy_procinfos[i - 1].fn_oid = InvalidOid; opaque->cached_subtype = subtype; } if (opaque->strategy_procinfos[strategynum - 1].fn_oid == InvalidOid) { Form_pg_attribute attr; HeapTuple tuple; Oid opfamily, oprid; opfamily = bdesc->bd_index->rd_opfamily[attno - 1]; attr = TupleDescAttr(bdesc->bd_tupdesc, attno - 1); tuple = SearchSysCache4(AMOPSTRATEGY, ObjectIdGetDatum(opfamily), ObjectIdGetDatum(attr->atttypid), ObjectIdGetDatum(subtype), Int16GetDatum(strategynum)); if (!HeapTupleIsValid(tuple)) elog(ERROR, "missing operator %d(%u,%u) in opfamily %u", strategynum, attr->atttypid, subtype, opfamily); oprid = DatumGetObjectId(SysCacheGetAttrNotNull(AMOPSTRATEGY, tuple, Anum_pg_amop_amopopr)); ReleaseSysCache(tuple); Assert(RegProcedureIsValid(oprid)); fmgr_info_cxt(get_opcode(oprid), &opaque->strategy_procinfos[strategynum - 1], bdesc->bd_context); } return &opaque->strategy_procinfos[strategynum - 1]; } Datum brin_minmax_multi_options(PG_FUNCTION_ARGS) { local_relopts *relopts = (local_relopts *) PG_GETARG_POINTER(0); init_local_reloptions(relopts, sizeof(MinMaxMultiOptions)); add_local_int_reloption(relopts, "values_per_range", "desc", MINMAX_MULTI_DEFAULT_VALUES_PER_PAGE, 8, 256, offsetof(MinMaxMultiOptions, valuesPerRange)); PG_RETURN_VOID(); } /* * brin_minmax_multi_summary_in * - input routine for type brin_minmax_multi_summary. * * brin_minmax_multi_summary is only used internally to represent summaries * in BRIN minmax-multi indexes, so it has no operations of its own, and we * disallow input too. */ Datum brin_minmax_multi_summary_in(PG_FUNCTION_ARGS) { /* * brin_minmax_multi_summary stores the data in binary form and parsing * text input is not needed, so disallow this. */ ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot accept a value of type %s", "brin_minmax_multi_summary"))); PG_RETURN_VOID(); /* keep compiler quiet */ } /* * brin_minmax_multi_summary_out * - output routine for type brin_minmax_multi_summary. * * BRIN minmax-multi summaries are serialized into a bytea value, but we * want to output something nicer humans can understand. */ Datum brin_minmax_multi_summary_out(PG_FUNCTION_ARGS) { int i; int idx; SerializedRanges *ranges; Ranges *ranges_deserialized; StringInfoData str; bool isvarlena; Oid outfunc; FmgrInfo fmgrinfo; ArrayBuildState *astate_values = NULL; initStringInfo(&str); appendStringInfoChar(&str, '{'); /* * Detoast to get value with full 4B header (can't be stored in a toast * table, but can use 1B header). */ ranges = (SerializedRanges *) PG_DETOAST_DATUM_PACKED(PG_GETARG_DATUM(0)); /* lookup output func for the type */ getTypeOutputInfo(ranges->typid, &outfunc, &isvarlena); fmgr_info(outfunc, &fmgrinfo); /* deserialize the range info easy-to-process pieces */ ranges_deserialized = brin_range_deserialize(ranges->maxvalues, ranges); appendStringInfo(&str, "nranges: %d nvalues: %d maxvalues: %d", ranges_deserialized->nranges, ranges_deserialized->nvalues, ranges_deserialized->maxvalues); /* serialize ranges */ idx = 0; for (i = 0; i < ranges_deserialized->nranges; i++) { char *a, *b; text *c; StringInfoData buf; initStringInfo(&buf); a = OutputFunctionCall(&fmgrinfo, ranges_deserialized->values[idx++]); b = OutputFunctionCall(&fmgrinfo, ranges_deserialized->values[idx++]); appendStringInfo(&buf, "%s ... %s", a, b); c = cstring_to_text_with_len(buf.data, buf.len); astate_values = accumArrayResult(astate_values, PointerGetDatum(c), false, TEXTOID, CurrentMemoryContext); } if (ranges_deserialized->nranges > 0) { Oid typoutput; bool typIsVarlena; Datum val; char *extval; getTypeOutputInfo(ANYARRAYOID, &typoutput, &typIsVarlena); val = makeArrayResult(astate_values, CurrentMemoryContext); extval = OidOutputFunctionCall(typoutput, val); appendStringInfo(&str, " ranges: %s", extval); } /* serialize individual values */ astate_values = NULL; for (i = 0; i < ranges_deserialized->nvalues; i++) { Datum a; text *b; a = FunctionCall1(&fmgrinfo, ranges_deserialized->values[idx++]); b = cstring_to_text(DatumGetCString(a)); astate_values = accumArrayResult(astate_values, PointerGetDatum(b), false, TEXTOID, CurrentMemoryContext); } if (ranges_deserialized->nvalues > 0) { Oid typoutput; bool typIsVarlena; Datum val; char *extval; getTypeOutputInfo(ANYARRAYOID, &typoutput, &typIsVarlena); val = makeArrayResult(astate_values, CurrentMemoryContext); extval = OidOutputFunctionCall(typoutput, val); appendStringInfo(&str, " values: %s", extval); } appendStringInfoChar(&str, '}'); PG_RETURN_CSTRING(str.data); } /* * brin_minmax_multi_summary_recv * - binary input routine for type brin_minmax_multi_summary. */ Datum brin_minmax_multi_summary_recv(PG_FUNCTION_ARGS) { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot accept a value of type %s", "brin_minmax_multi_summary"))); PG_RETURN_VOID(); /* keep compiler quiet */ } /* * brin_minmax_multi_summary_send * - binary output routine for type brin_minmax_multi_summary. * * BRIN minmax-multi summaries are serialized in a bytea value (although * the type is named differently), so let's just send that. */ Datum brin_minmax_multi_summary_send(PG_FUNCTION_ARGS) { return byteasend(fcinfo); }