postgresql/src/backend/utils/adt/rangetypes_spgist.c

1001 lines
29 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* rangetypes_spgist.c
* implementation of quad tree over ranges mapped to 2d-points for SP-GiST.
*
* Quad tree is a data structure similar to a binary tree, but is adapted to
* 2d data. Each inner node of a quad tree contains a point (centroid) which
* divides the 2d-space into 4 quadrants. Each quadrant is associated with a
* child node.
*
* Ranges are mapped to 2d-points so that the lower bound is one dimension,
* and the upper bound is another. By convention, we visualize the lower bound
* to be the horizontal axis, and upper bound the vertical axis.
*
* One quirk with this mapping is the handling of empty ranges. An empty range
* doesn't have lower and upper bounds, so it cannot be mapped to 2d space in
* a straightforward way. To cope with that, the root node can have a 5th
* quadrant, which is reserved for empty ranges. Furthermore, there can be
* inner nodes in the tree with no centroid. They contain only two child nodes,
* one for empty ranges and another for non-empty ones. Such a node can appear
* as the root node, or in the tree under the 5th child of the root node (in
* which case it will only contain empty nodes).
*
* The SP-GiST picksplit function uses medians along both axes as the centroid.
* This implementation only uses the comparison function of the range element
* datatype, therefore it works for any range type.
*
* Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* src/backend/utils/adt/rangetypes_spgist.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/spgist.h"
#include "access/stratnum.h"
#include "catalog/pg_type.h"
#include "utils/builtins.h"
#include "utils/datum.h"
#include "utils/rangetypes.h"
static int16 getQuadrant(TypeCacheEntry *typcache, const RangeType *centroid,
const RangeType *tst);
static int bound_cmp(const void *a, const void *b, void *arg);
static int adjacent_inner_consistent(TypeCacheEntry *typcache,
const RangeBound *arg, const RangeBound *centroid,
const RangeBound *prev);
static int adjacent_cmp_bounds(TypeCacheEntry *typcache, const RangeBound *arg,
const RangeBound *centroid);
Fix bugs in SP-GiST search with range type's -|- (adjacent) operator. The consistent function contained several bugs: * The "if (which2) { ... }" block was broken. It compared the argument's lower bound against centroid's upper bound, while it was supposed to compare the argument's upper bound against the centroid's lower bound (the comment was correct, code was wrong). Also, it cleared bits in the "which1" variable, while it was supposed to clear bits in "which2". * If the argument's upper bound was equal to the centroid's lower bound, we descended to both halves (= all quadrants). That's unnecessary, searching the right quadrants is sufficient. This didn't lead to incorrect query results, but was clearly wrong, and slowed down queries unnecessarily. * In the case that argument's lower bound is adjacent to the centroid's upper bound, we also don't need to visit all quadrants. Per similar reasoning as previous point. * The code where we compare the previous centroid with the current centroid should match the code where we compare the current centroid with the argument. The point of that code is to redo the calculation done in the previous level, to see if we were supposed to traverse left or right (or up or down), and if we actually did. If we moved in the different direction, then we know there are no matches for bound. Refactor the code and adds comments to make it more readable and easier to reason about. Backpatch to 9.3 where SP-GiST support for range types was introduced.
2014-07-16 08:10:54 +02:00
/*
* SP-GiST 'config' interface function.
*/
Datum
spg_range_quad_config(PG_FUNCTION_ARGS)
{
/* spgConfigIn *cfgin = (spgConfigIn *) PG_GETARG_POINTER(0); */
spgConfigOut *cfg = (spgConfigOut *) PG_GETARG_POINTER(1);
cfg->prefixType = ANYRANGEOID;
cfg->labelType = VOIDOID; /* we don't need node labels */
cfg->canReturnData = true;
cfg->longValuesOK = false;
PG_RETURN_VOID();
}
/*----------
* Determine which quadrant a 2d-mapped range falls into, relative to the
* centroid.
*
* Quadrants are numbered like this:
*
* 4 | 1
* ----+----
* 3 | 2
*
* Where the lower bound of range is the horizontal axis and upper bound the
* vertical axis.
*
* Ranges on one of the axes are taken to lie in the quadrant with higher value
* along perpendicular axis. That is, a value on the horizontal axis is taken
* to belong to quadrant 1 or 4, and a value on the vertical axis is taken to
* belong to quadrant 1 or 2. A range equal to centroid is taken to lie in
* quadrant 1.
*
* Empty ranges are taken to lie in the special quadrant 5.
*----------
*/
static int16
getQuadrant(TypeCacheEntry *typcache, const RangeType *centroid, const RangeType *tst)
{
RangeBound centroidLower,
centroidUpper;
bool centroidEmpty;
RangeBound lower,
upper;
bool empty;
range_deserialize(typcache, centroid, &centroidLower, &centroidUpper,
&centroidEmpty);
range_deserialize(typcache, tst, &lower, &upper, &empty);
if (empty)
return 5;
if (range_cmp_bounds(typcache, &lower, &centroidLower) >= 0)
{
if (range_cmp_bounds(typcache, &upper, &centroidUpper) >= 0)
return 1;
else
return 2;
}
else
{
if (range_cmp_bounds(typcache, &upper, &centroidUpper) >= 0)
return 4;
else
return 3;
}
}
/*
* Choose SP-GiST function: choose path for addition of new range.
*/
Datum
spg_range_quad_choose(PG_FUNCTION_ARGS)
{
spgChooseIn *in = (spgChooseIn *) PG_GETARG_POINTER(0);
spgChooseOut *out = (spgChooseOut *) PG_GETARG_POINTER(1);
RangeType *inRange = DatumGetRangeTypeP(in->datum),
*centroid;
int16 quadrant;
TypeCacheEntry *typcache;
if (in->allTheSame)
{
out->resultType = spgMatchNode;
/* nodeN will be set by core */
out->result.matchNode.levelAdd = 0;
out->result.matchNode.restDatum = RangeTypePGetDatum(inRange);
PG_RETURN_VOID();
}
typcache = range_get_typcache(fcinfo, RangeTypeGetOid(inRange));
/*
* A node with no centroid divides ranges purely on whether they're empty
* or not. All empty ranges go to child node 0, all non-empty ranges go to
* node 1.
*/
if (!in->hasPrefix)
{
out->resultType = spgMatchNode;
if (RangeIsEmpty(inRange))
out->result.matchNode.nodeN = 0;
else
out->result.matchNode.nodeN = 1;
out->result.matchNode.levelAdd = 1;
out->result.matchNode.restDatum = RangeTypePGetDatum(inRange);
PG_RETURN_VOID();
}
centroid = DatumGetRangeTypeP(in->prefixDatum);
quadrant = getQuadrant(typcache, centroid, inRange);
Assert(quadrant <= in->nNodes);
/* Select node matching to quadrant number */
out->resultType = spgMatchNode;
out->result.matchNode.nodeN = quadrant - 1;
out->result.matchNode.levelAdd = 1;
out->result.matchNode.restDatum = RangeTypePGetDatum(inRange);
PG_RETURN_VOID();
}
/*
* Bound comparison for sorting.
*/
static int
bound_cmp(const void *a, const void *b, void *arg)
{
RangeBound *ba = (RangeBound *) a;
RangeBound *bb = (RangeBound *) b;
TypeCacheEntry *typcache = (TypeCacheEntry *) arg;
return range_cmp_bounds(typcache, ba, bb);
}
/*
* Picksplit SP-GiST function: split ranges into nodes. Select "centroid"
* range and distribute ranges according to quadrants.
*/
Datum
spg_range_quad_picksplit(PG_FUNCTION_ARGS)
{
spgPickSplitIn *in = (spgPickSplitIn *) PG_GETARG_POINTER(0);
spgPickSplitOut *out = (spgPickSplitOut *) PG_GETARG_POINTER(1);
int i;
int j;
int nonEmptyCount;
RangeType *centroid;
bool empty;
TypeCacheEntry *typcache;
/* Use the median values of lower and upper bounds as the centroid range */
RangeBound *lowerBounds,
*upperBounds;
typcache = range_get_typcache(fcinfo,
RangeTypeGetOid(DatumGetRangeTypeP(in->datums[0])));
/* Allocate memory for bounds */
lowerBounds = palloc(sizeof(RangeBound) * in->nTuples);
upperBounds = palloc(sizeof(RangeBound) * in->nTuples);
j = 0;
/* Deserialize bounds of ranges, count non-empty ranges */
for (i = 0; i < in->nTuples; i++)
{
range_deserialize(typcache, DatumGetRangeTypeP(in->datums[i]),
&lowerBounds[j], &upperBounds[j], &empty);
if (!empty)
j++;
}
nonEmptyCount = j;
/*
* All the ranges are empty. The best we can do is to construct an inner
* node with no centroid, and put all ranges into node 0. If non-empty
* ranges are added later, they will be routed to node 1.
*/
if (nonEmptyCount == 0)
{
out->nNodes = 2;
out->hasPrefix = false;
/* Prefix is empty */
out->prefixDatum = PointerGetDatum(NULL);
out->nodeLabels = NULL;
out->mapTuplesToNodes = palloc(sizeof(int) * in->nTuples);
out->leafTupleDatums = palloc(sizeof(Datum) * in->nTuples);
/* Place all ranges into node 0 */
for (i = 0; i < in->nTuples; i++)
{
RangeType *range = DatumGetRangeTypeP(in->datums[i]);
out->leafTupleDatums[i] = RangeTypePGetDatum(range);
out->mapTuplesToNodes[i] = 0;
}
PG_RETURN_VOID();
}
/* Sort range bounds in order to find medians */
qsort_arg(lowerBounds, nonEmptyCount, sizeof(RangeBound),
bound_cmp, typcache);
qsort_arg(upperBounds, nonEmptyCount, sizeof(RangeBound),
bound_cmp, typcache);
/* Construct "centroid" range from medians of lower and upper bounds */
centroid = range_serialize(typcache, &lowerBounds[nonEmptyCount / 2],
&upperBounds[nonEmptyCount / 2], false);
out->hasPrefix = true;
out->prefixDatum = RangeTypePGetDatum(centroid);
/* Create node for empty ranges only if it is a root node */
out->nNodes = (in->level == 0) ? 5 : 4;
out->nodeLabels = NULL; /* we don't need node labels */
out->mapTuplesToNodes = palloc(sizeof(int) * in->nTuples);
out->leafTupleDatums = palloc(sizeof(Datum) * in->nTuples);
/*
* Assign ranges to corresponding nodes according to quadrants relative to
* "centroid" range.
*/
for (i = 0; i < in->nTuples; i++)
{
RangeType *range = DatumGetRangeTypeP(in->datums[i]);
int16 quadrant = getQuadrant(typcache, centroid, range);
out->leafTupleDatums[i] = RangeTypePGetDatum(range);
out->mapTuplesToNodes[i] = quadrant - 1;
}
PG_RETURN_VOID();
}
/*
* SP-GiST consistent function for inner nodes: check which nodes are
* consistent with given set of queries.
*/
Datum
spg_range_quad_inner_consistent(PG_FUNCTION_ARGS)
{
spgInnerConsistentIn *in = (spgInnerConsistentIn *) PG_GETARG_POINTER(0);
spgInnerConsistentOut *out = (spgInnerConsistentOut *) PG_GETARG_POINTER(1);
int which;
int i;
MemoryContext oldCtx;
/*
* For adjacent search we need also previous centroid (if any) to improve
* the precision of the consistent check. In this case needPrevious flag
* is set and centroid is passed into traversalValue.
*/
bool needPrevious = false;
if (in->allTheSame)
{
/* Report that all nodes should be visited */
out->nNodes = in->nNodes;
out->nodeNumbers = (int *) palloc(sizeof(int) * in->nNodes);
for (i = 0; i < in->nNodes; i++)
out->nodeNumbers[i] = i;
PG_RETURN_VOID();
}
if (!in->hasPrefix)
{
/*
* No centroid on this inner node. Such a node has two child nodes,
* the first for empty ranges, and the second for non-empty ones.
*/
Assert(in->nNodes == 2);
/*
* Nth bit of which variable means that (N - 1)th node should be
* visited. Initially all bits are set. Bits of nodes which should be
* skipped will be unset.
*/
which = (1 << 1) | (1 << 2);
for (i = 0; i < in->nkeys; i++)
{
StrategyNumber strategy = in->scankeys[i].sk_strategy;
bool empty;
/*
* The only strategy when second argument of operator is not range
* is RANGESTRAT_CONTAINS_ELEM.
*/
if (strategy != RANGESTRAT_CONTAINS_ELEM)
empty = RangeIsEmpty(DatumGetRangeTypeP(in->scankeys[i].sk_argument));
else
empty = false;
switch (strategy)
{
case RANGESTRAT_BEFORE:
case RANGESTRAT_OVERLEFT:
case RANGESTRAT_OVERLAPS:
case RANGESTRAT_OVERRIGHT:
case RANGESTRAT_AFTER:
case RANGESTRAT_ADJACENT:
/* These strategies return false if any argument is empty */
if (empty)
which = 0;
else
which &= (1 << 2);
break;
case RANGESTRAT_CONTAINS:
/*
* All ranges contain an empty range. Only non-empty
* ranges can contain a non-empty range.
*/
if (!empty)
which &= (1 << 2);
break;
case RANGESTRAT_CONTAINED_BY:
/*
* Only an empty range is contained by an empty range.
* Both empty and non-empty ranges can be contained by a
* non-empty range.
*/
if (empty)
which &= (1 << 1);
break;
case RANGESTRAT_CONTAINS_ELEM:
which &= (1 << 2);
break;
case RANGESTRAT_EQ:
if (empty)
which &= (1 << 1);
else
which &= (1 << 2);
break;
default:
elog(ERROR, "unrecognized range strategy: %d", strategy);
break;
}
if (which == 0)
break; /* no need to consider remaining conditions */
}
}
else
{
RangeBound centroidLower,
centroidUpper;
bool centroidEmpty;
TypeCacheEntry *typcache;
RangeType *centroid;
/* This node has a centroid. Fetch it. */
centroid = DatumGetRangeTypeP(in->prefixDatum);
typcache = range_get_typcache(fcinfo,
RangeTypeGetOid(DatumGetRangeTypeP(centroid)));
range_deserialize(typcache, centroid, &centroidLower, &centroidUpper,
&centroidEmpty);
Assert(in->nNodes == 4 || in->nNodes == 5);
/*
* Nth bit of which variable means that (N - 1)th node (Nth quadrant)
* should be visited. Initially all bits are set. Bits of nodes which
* can be skipped will be unset.
*/
which = (1 << 1) | (1 << 2) | (1 << 3) | (1 << 4) | (1 << 5);
for (i = 0; i < in->nkeys; i++)
{
StrategyNumber strategy;
RangeBound lower,
upper;
bool empty;
RangeType *range = NULL;
Fix bugs in SP-GiST search with range type's -|- (adjacent) operator. The consistent function contained several bugs: * The "if (which2) { ... }" block was broken. It compared the argument's lower bound against centroid's upper bound, while it was supposed to compare the argument's upper bound against the centroid's lower bound (the comment was correct, code was wrong). Also, it cleared bits in the "which1" variable, while it was supposed to clear bits in "which2". * If the argument's upper bound was equal to the centroid's lower bound, we descended to both halves (= all quadrants). That's unnecessary, searching the right quadrants is sufficient. This didn't lead to incorrect query results, but was clearly wrong, and slowed down queries unnecessarily. * In the case that argument's lower bound is adjacent to the centroid's upper bound, we also don't need to visit all quadrants. Per similar reasoning as previous point. * The code where we compare the previous centroid with the current centroid should match the code where we compare the current centroid with the argument. The point of that code is to redo the calculation done in the previous level, to see if we were supposed to traverse left or right (or up or down), and if we actually did. If we moved in the different direction, then we know there are no matches for bound. Refactor the code and adds comments to make it more readable and easier to reason about. Backpatch to 9.3 where SP-GiST support for range types was introduced.
2014-07-16 08:10:54 +02:00
RangeType *prevCentroid = NULL;
RangeBound prevLower,
prevUpper;
bool prevEmpty;
/* Restrictions on range bounds according to scan strategy */
RangeBound *minLower = NULL,
*maxLower = NULL,
*minUpper = NULL,
*maxUpper = NULL;
/* Are the restrictions on range bounds inclusive? */
bool inclusive = true;
bool strictEmpty = true;
int cmp,
which1,
which2;
strategy = in->scankeys[i].sk_strategy;
/*
* RANGESTRAT_CONTAINS_ELEM is just like RANGESTRAT_CONTAINS, but
* the argument is a single element. Expand the single element to
* a range containing only the element, and treat it like
* RANGESTRAT_CONTAINS.
*/
if (strategy == RANGESTRAT_CONTAINS_ELEM)
{
lower.inclusive = true;
lower.infinite = false;
lower.lower = true;
lower.val = in->scankeys[i].sk_argument;
upper.inclusive = true;
upper.infinite = false;
upper.lower = false;
upper.val = in->scankeys[i].sk_argument;
empty = false;
strategy = RANGESTRAT_CONTAINS;
}
else
{
range = DatumGetRangeTypeP(in->scankeys[i].sk_argument);
range_deserialize(typcache, range, &lower, &upper, &empty);
}
/*
* Most strategies are handled by forming a bounding box from the
* search key, defined by a minLower, maxLower, minUpper,
* maxUpper. Some modify 'which' directly, to specify exactly
* which quadrants need to be visited.
*
* For most strategies, nothing matches an empty search key, and
* an empty range never matches a non-empty key. If a strategy
* does not behave like that wrt. empty ranges, set strictEmpty to
* false.
*/
switch (strategy)
{
case RANGESTRAT_BEFORE:
/*
* Range A is before range B if upper bound of A is lower
* than lower bound of B.
*/
maxUpper = &lower;
inclusive = false;
break;
case RANGESTRAT_OVERLEFT:
/*
* Range A is overleft to range B if upper bound of A is
* less or equal to upper bound of B.
*/
maxUpper = &upper;
break;
case RANGESTRAT_OVERLAPS:
/*
* Non-empty ranges overlap, if lower bound of each range
* is lower or equal to upper bound of the other range.
*/
maxLower = &upper;
minUpper = &lower;
break;
case RANGESTRAT_OVERRIGHT:
/*
* Range A is overright to range B if lower bound of A is
* greater or equal to lower bound of B.
*/
minLower = &lower;
break;
case RANGESTRAT_AFTER:
/*
* Range A is after range B if lower bound of A is greater
* than upper bound of B.
*/
minLower = &upper;
inclusive = false;
break;
case RANGESTRAT_ADJACENT:
if (empty)
break; /* Skip to strictEmpty check. */
/*
* Previously selected quadrant could exclude possibility
* for lower or upper bounds to be adjacent. Deserialize
* previous centroid range if present for checking this.
*/
if (in->traversalValue)
{
prevCentroid = DatumGetRangeTypeP(in->traversalValue);
range_deserialize(typcache, prevCentroid,
&prevLower, &prevUpper, &prevEmpty);
}
Fix bugs in SP-GiST search with range type's -|- (adjacent) operator. The consistent function contained several bugs: * The "if (which2) { ... }" block was broken. It compared the argument's lower bound against centroid's upper bound, while it was supposed to compare the argument's upper bound against the centroid's lower bound (the comment was correct, code was wrong). Also, it cleared bits in the "which1" variable, while it was supposed to clear bits in "which2". * If the argument's upper bound was equal to the centroid's lower bound, we descended to both halves (= all quadrants). That's unnecessary, searching the right quadrants is sufficient. This didn't lead to incorrect query results, but was clearly wrong, and slowed down queries unnecessarily. * In the case that argument's lower bound is adjacent to the centroid's upper bound, we also don't need to visit all quadrants. Per similar reasoning as previous point. * The code where we compare the previous centroid with the current centroid should match the code where we compare the current centroid with the argument. The point of that code is to redo the calculation done in the previous level, to see if we were supposed to traverse left or right (or up or down), and if we actually did. If we moved in the different direction, then we know there are no matches for bound. Refactor the code and adds comments to make it more readable and easier to reason about. Backpatch to 9.3 where SP-GiST support for range types was introduced.
2014-07-16 08:10:54 +02:00
/*
* For a range's upper bound to be adjacent to the
* argument's lower bound, it will be found along the line
* adjacent to (and just below) Y=lower. Therefore, if the
* argument's lower bound is less than the centroid's
* upper bound, the line falls in quadrants 2 and 3; if
* greater, the line falls in quadrants 1 and 4. (see
* adjacent_cmp_bounds for description of edge cases).
*/
cmp = adjacent_inner_consistent(typcache, &lower,
&centroidUpper,
prevCentroid ? &prevUpper : NULL);
Fix bugs in SP-GiST search with range type's -|- (adjacent) operator. The consistent function contained several bugs: * The "if (which2) { ... }" block was broken. It compared the argument's lower bound against centroid's upper bound, while it was supposed to compare the argument's upper bound against the centroid's lower bound (the comment was correct, code was wrong). Also, it cleared bits in the "which1" variable, while it was supposed to clear bits in "which2". * If the argument's upper bound was equal to the centroid's lower bound, we descended to both halves (= all quadrants). That's unnecessary, searching the right quadrants is sufficient. This didn't lead to incorrect query results, but was clearly wrong, and slowed down queries unnecessarily. * In the case that argument's lower bound is adjacent to the centroid's upper bound, we also don't need to visit all quadrants. Per similar reasoning as previous point. * The code where we compare the previous centroid with the current centroid should match the code where we compare the current centroid with the argument. The point of that code is to redo the calculation done in the previous level, to see if we were supposed to traverse left or right (or up or down), and if we actually did. If we moved in the different direction, then we know there are no matches for bound. Refactor the code and adds comments to make it more readable and easier to reason about. Backpatch to 9.3 where SP-GiST support for range types was introduced.
2014-07-16 08:10:54 +02:00
if (cmp > 0)
which1 = (1 << 1) | (1 << 4);
else if (cmp < 0)
which1 = (1 << 2) | (1 << 3);
else
which1 = 0;
Fix bugs in SP-GiST search with range type's -|- (adjacent) operator. The consistent function contained several bugs: * The "if (which2) { ... }" block was broken. It compared the argument's lower bound against centroid's upper bound, while it was supposed to compare the argument's upper bound against the centroid's lower bound (the comment was correct, code was wrong). Also, it cleared bits in the "which1" variable, while it was supposed to clear bits in "which2". * If the argument's upper bound was equal to the centroid's lower bound, we descended to both halves (= all quadrants). That's unnecessary, searching the right quadrants is sufficient. This didn't lead to incorrect query results, but was clearly wrong, and slowed down queries unnecessarily. * In the case that argument's lower bound is adjacent to the centroid's upper bound, we also don't need to visit all quadrants. Per similar reasoning as previous point. * The code where we compare the previous centroid with the current centroid should match the code where we compare the current centroid with the argument. The point of that code is to redo the calculation done in the previous level, to see if we were supposed to traverse left or right (or up or down), and if we actually did. If we moved in the different direction, then we know there are no matches for bound. Refactor the code and adds comments to make it more readable and easier to reason about. Backpatch to 9.3 where SP-GiST support for range types was introduced.
2014-07-16 08:10:54 +02:00
/*
* Also search for ranges's adjacent to argument's upper
* bound. They will be found along the line adjacent to
2015-05-24 03:35:49 +02:00
* (and just right of) X=upper, which falls in quadrants 3
* and 4, or 1 and 2.
Fix bugs in SP-GiST search with range type's -|- (adjacent) operator. The consistent function contained several bugs: * The "if (which2) { ... }" block was broken. It compared the argument's lower bound against centroid's upper bound, while it was supposed to compare the argument's upper bound against the centroid's lower bound (the comment was correct, code was wrong). Also, it cleared bits in the "which1" variable, while it was supposed to clear bits in "which2". * If the argument's upper bound was equal to the centroid's lower bound, we descended to both halves (= all quadrants). That's unnecessary, searching the right quadrants is sufficient. This didn't lead to incorrect query results, but was clearly wrong, and slowed down queries unnecessarily. * In the case that argument's lower bound is adjacent to the centroid's upper bound, we also don't need to visit all quadrants. Per similar reasoning as previous point. * The code where we compare the previous centroid with the current centroid should match the code where we compare the current centroid with the argument. The point of that code is to redo the calculation done in the previous level, to see if we were supposed to traverse left or right (or up or down), and if we actually did. If we moved in the different direction, then we know there are no matches for bound. Refactor the code and adds comments to make it more readable and easier to reason about. Backpatch to 9.3 where SP-GiST support for range types was introduced.
2014-07-16 08:10:54 +02:00
*/
cmp = adjacent_inner_consistent(typcache, &upper,
&centroidLower,
prevCentroid ? &prevLower : NULL);
Fix bugs in SP-GiST search with range type's -|- (adjacent) operator. The consistent function contained several bugs: * The "if (which2) { ... }" block was broken. It compared the argument's lower bound against centroid's upper bound, while it was supposed to compare the argument's upper bound against the centroid's lower bound (the comment was correct, code was wrong). Also, it cleared bits in the "which1" variable, while it was supposed to clear bits in "which2". * If the argument's upper bound was equal to the centroid's lower bound, we descended to both halves (= all quadrants). That's unnecessary, searching the right quadrants is sufficient. This didn't lead to incorrect query results, but was clearly wrong, and slowed down queries unnecessarily. * In the case that argument's lower bound is adjacent to the centroid's upper bound, we also don't need to visit all quadrants. Per similar reasoning as previous point. * The code where we compare the previous centroid with the current centroid should match the code where we compare the current centroid with the argument. The point of that code is to redo the calculation done in the previous level, to see if we were supposed to traverse left or right (or up or down), and if we actually did. If we moved in the different direction, then we know there are no matches for bound. Refactor the code and adds comments to make it more readable and easier to reason about. Backpatch to 9.3 where SP-GiST support for range types was introduced.
2014-07-16 08:10:54 +02:00
if (cmp > 0)
which2 = (1 << 1) | (1 << 2);
else if (cmp < 0)
which2 = (1 << 3) | (1 << 4);
else
which2 = 0;
Fix bugs in SP-GiST search with range type's -|- (adjacent) operator. The consistent function contained several bugs: * The "if (which2) { ... }" block was broken. It compared the argument's lower bound against centroid's upper bound, while it was supposed to compare the argument's upper bound against the centroid's lower bound (the comment was correct, code was wrong). Also, it cleared bits in the "which1" variable, while it was supposed to clear bits in "which2". * If the argument's upper bound was equal to the centroid's lower bound, we descended to both halves (= all quadrants). That's unnecessary, searching the right quadrants is sufficient. This didn't lead to incorrect query results, but was clearly wrong, and slowed down queries unnecessarily. * In the case that argument's lower bound is adjacent to the centroid's upper bound, we also don't need to visit all quadrants. Per similar reasoning as previous point. * The code where we compare the previous centroid with the current centroid should match the code where we compare the current centroid with the argument. The point of that code is to redo the calculation done in the previous level, to see if we were supposed to traverse left or right (or up or down), and if we actually did. If we moved in the different direction, then we know there are no matches for bound. Refactor the code and adds comments to make it more readable and easier to reason about. Backpatch to 9.3 where SP-GiST support for range types was introduced.
2014-07-16 08:10:54 +02:00
/* We must chase down ranges adjacent to either bound. */
which &= which1 | which2;
needPrevious = true;
break;
case RANGESTRAT_CONTAINS:
/*
* Non-empty range A contains non-empty range B if lower
* bound of A is lower or equal to lower bound of range B
* and upper bound of range A is greater or equal to upper
* bound of range A.
*
* All non-empty ranges contain an empty range.
*/
strictEmpty = false;
if (!empty)
{
which &= (1 << 1) | (1 << 2) | (1 << 3) | (1 << 4);
maxLower = &lower;
minUpper = &upper;
}
break;
case RANGESTRAT_CONTAINED_BY:
/* The opposite of contains. */
strictEmpty = false;
if (empty)
{
/* An empty range is only contained by an empty range */
which &= (1 << 5);
}
else
{
minLower = &lower;
maxUpper = &upper;
}
break;
case RANGESTRAT_EQ:
/*
* Equal range can be only in the same quadrant where
* argument would be placed to.
*/
strictEmpty = false;
which &= (1 << getQuadrant(typcache, centroid, range));
break;
default:
elog(ERROR, "unrecognized range strategy: %d", strategy);
break;
}
if (strictEmpty)
{
if (empty)
{
/* Scan key is empty, no branches are satisfying */
which = 0;
break;
}
else
{
/* Shouldn't visit tree branch with empty ranges */
which &= (1 << 1) | (1 << 2) | (1 << 3) | (1 << 4);
}
}
/*
* Using the bounding box, see which quadrants we have to descend
* into.
*/
if (minLower)
{
/*
* If the centroid's lower bound is less than or equal to the
* minimum lower bound, anything in the 3rd and 4th quadrants
* will have an even smaller lower bound, and thus can't
* match.
*/
if (range_cmp_bounds(typcache, &centroidLower, minLower) <= 0)
which &= (1 << 1) | (1 << 2) | (1 << 5);
}
if (maxLower)
{
/*
* If the centroid's lower bound is greater than the maximum
* lower bound, anything in the 1st and 2nd quadrants will
* also have a greater than or equal lower bound, and thus
* can't match. If the centroid's lower bound is equal to the
* maximum lower bound, we can still exclude the 1st and 2nd
* quadrants if we're looking for a value strictly greater
* than the maximum.
*/
int cmp;
cmp = range_cmp_bounds(typcache, &centroidLower, maxLower);
if (cmp > 0 || (!inclusive && cmp == 0))
which &= (1 << 3) | (1 << 4) | (1 << 5);
}
if (minUpper)
{
/*
* If the centroid's upper bound is less than or equal to the
* minimum upper bound, anything in the 2nd and 3rd quadrants
* will have an even smaller upper bound, and thus can't
* match.
*/
if (range_cmp_bounds(typcache, &centroidUpper, minUpper) <= 0)
which &= (1 << 1) | (1 << 4) | (1 << 5);
}
if (maxUpper)
{
/*
* If the centroid's upper bound is greater than the maximum
* upper bound, anything in the 1st and 4th quadrants will
* also have a greater than or equal upper bound, and thus
* can't match. If the centroid's upper bound is equal to the
* maximum upper bound, we can still exclude the 1st and 4th
* quadrants if we're looking for a value strictly greater
* than the maximum.
*/
int cmp;
cmp = range_cmp_bounds(typcache, &centroidUpper, maxUpper);
if (cmp > 0 || (!inclusive && cmp == 0))
which &= (1 << 2) | (1 << 3) | (1 << 5);
}
if (which == 0)
break; /* no need to consider remaining conditions */
}
}
/* We must descend into the quadrant(s) identified by 'which' */
out->nodeNumbers = (int *) palloc(sizeof(int) * in->nNodes);
if (needPrevious)
out->traversalValues = (void **) palloc(sizeof(void *) * in->nNodes);
out->nNodes = 0;
/*
* Elements of traversalValues should be allocated in
* traversalMemoryContext
*/
oldCtx = MemoryContextSwitchTo(in->traversalMemoryContext);
for (i = 1; i <= in->nNodes; i++)
{
if (which & (1 << i))
{
/* Save previous prefix if needed */
if (needPrevious)
{
2016-06-10 00:02:36 +02:00
Datum previousCentroid;
2016-06-10 00:02:36 +02:00
/*
* We know, that in->prefixDatum in this place is varlena,
* because it's range
*/
previousCentroid = datumCopy(in->prefixDatum, false, -1);
2016-06-10 00:02:36 +02:00
out->traversalValues[out->nNodes] = (void *) previousCentroid;
}
out->nodeNumbers[out->nNodes] = i - 1;
out->nNodes++;
}
}
MemoryContextSwitchTo(oldCtx);
PG_RETURN_VOID();
}
Fix bugs in SP-GiST search with range type's -|- (adjacent) operator. The consistent function contained several bugs: * The "if (which2) { ... }" block was broken. It compared the argument's lower bound against centroid's upper bound, while it was supposed to compare the argument's upper bound against the centroid's lower bound (the comment was correct, code was wrong). Also, it cleared bits in the "which1" variable, while it was supposed to clear bits in "which2". * If the argument's upper bound was equal to the centroid's lower bound, we descended to both halves (= all quadrants). That's unnecessary, searching the right quadrants is sufficient. This didn't lead to incorrect query results, but was clearly wrong, and slowed down queries unnecessarily. * In the case that argument's lower bound is adjacent to the centroid's upper bound, we also don't need to visit all quadrants. Per similar reasoning as previous point. * The code where we compare the previous centroid with the current centroid should match the code where we compare the current centroid with the argument. The point of that code is to redo the calculation done in the previous level, to see if we were supposed to traverse left or right (or up or down), and if we actually did. If we moved in the different direction, then we know there are no matches for bound. Refactor the code and adds comments to make it more readable and easier to reason about. Backpatch to 9.3 where SP-GiST support for range types was introduced.
2014-07-16 08:10:54 +02:00
/*
* adjacent_cmp_bounds
*
* Given an argument and centroid bound, this function determines if any
* bounds that are adjacent to the argument are smaller than, or greater than
* or equal to centroid. For brevity, we call the arg < centroid "left", and
* arg >= centroid case "right". This corresponds to how the quadrants are
* arranged, if you imagine that "left" is equivalent to "down" and "right"
* is equivalent to "up".
*
* For the "left" case, returns -1, and for the "right" case, returns 1.
*/
static int
adjacent_cmp_bounds(TypeCacheEntry *typcache, const RangeBound *arg,
const RangeBound *centroid)
Fix bugs in SP-GiST search with range type's -|- (adjacent) operator. The consistent function contained several bugs: * The "if (which2) { ... }" block was broken. It compared the argument's lower bound against centroid's upper bound, while it was supposed to compare the argument's upper bound against the centroid's lower bound (the comment was correct, code was wrong). Also, it cleared bits in the "which1" variable, while it was supposed to clear bits in "which2". * If the argument's upper bound was equal to the centroid's lower bound, we descended to both halves (= all quadrants). That's unnecessary, searching the right quadrants is sufficient. This didn't lead to incorrect query results, but was clearly wrong, and slowed down queries unnecessarily. * In the case that argument's lower bound is adjacent to the centroid's upper bound, we also don't need to visit all quadrants. Per similar reasoning as previous point. * The code where we compare the previous centroid with the current centroid should match the code where we compare the current centroid with the argument. The point of that code is to redo the calculation done in the previous level, to see if we were supposed to traverse left or right (or up or down), and if we actually did. If we moved in the different direction, then we know there are no matches for bound. Refactor the code and adds comments to make it more readable and easier to reason about. Backpatch to 9.3 where SP-GiST support for range types was introduced.
2014-07-16 08:10:54 +02:00
{
int cmp;
Assert(arg->lower != centroid->lower);
2015-05-24 03:35:49 +02:00
cmp = range_cmp_bounds(typcache, arg, centroid);
Fix bugs in SP-GiST search with range type's -|- (adjacent) operator. The consistent function contained several bugs: * The "if (which2) { ... }" block was broken. It compared the argument's lower bound against centroid's upper bound, while it was supposed to compare the argument's upper bound against the centroid's lower bound (the comment was correct, code was wrong). Also, it cleared bits in the "which1" variable, while it was supposed to clear bits in "which2". * If the argument's upper bound was equal to the centroid's lower bound, we descended to both halves (= all quadrants). That's unnecessary, searching the right quadrants is sufficient. This didn't lead to incorrect query results, but was clearly wrong, and slowed down queries unnecessarily. * In the case that argument's lower bound is adjacent to the centroid's upper bound, we also don't need to visit all quadrants. Per similar reasoning as previous point. * The code where we compare the previous centroid with the current centroid should match the code where we compare the current centroid with the argument. The point of that code is to redo the calculation done in the previous level, to see if we were supposed to traverse left or right (or up or down), and if we actually did. If we moved in the different direction, then we know there are no matches for bound. Refactor the code and adds comments to make it more readable and easier to reason about. Backpatch to 9.3 where SP-GiST support for range types was introduced.
2014-07-16 08:10:54 +02:00
if (centroid->lower)
{
/*------
* The argument is an upper bound, we are searching for adjacent lower
* bounds. A matching adjacent lower bound must be *larger* than the
* argument, but only just.
*
* The following table illustrates the desired result with a fixed
* argument bound, and different centroids. The CMP column shows
* the value of 'cmp' variable, and ADJ shows whether the argument
* and centroid are adjacent, per bounds_adjacent(). (N) means we
* don't need to check for that case, because it's implied by CMP.
* With the argument range [..., 500), the adjacent range we're
* searching for is [500, ...):
*
2015-05-24 03:35:49 +02:00
* ARGUMENT CENTROID CMP ADJ
* [..., 500) [498, ...) > (N) [500, ...) is to the right
* [..., 500) [499, ...) = (N) [500, ...) is to the right
* [..., 500) [500, ...) < Y [500, ...) is to the right
* [..., 500) [501, ...) < N [500, ...) is to the left
Fix bugs in SP-GiST search with range type's -|- (adjacent) operator. The consistent function contained several bugs: * The "if (which2) { ... }" block was broken. It compared the argument's lower bound against centroid's upper bound, while it was supposed to compare the argument's upper bound against the centroid's lower bound (the comment was correct, code was wrong). Also, it cleared bits in the "which1" variable, while it was supposed to clear bits in "which2". * If the argument's upper bound was equal to the centroid's lower bound, we descended to both halves (= all quadrants). That's unnecessary, searching the right quadrants is sufficient. This didn't lead to incorrect query results, but was clearly wrong, and slowed down queries unnecessarily. * In the case that argument's lower bound is adjacent to the centroid's upper bound, we also don't need to visit all quadrants. Per similar reasoning as previous point. * The code where we compare the previous centroid with the current centroid should match the code where we compare the current centroid with the argument. The point of that code is to redo the calculation done in the previous level, to see if we were supposed to traverse left or right (or up or down), and if we actually did. If we moved in the different direction, then we know there are no matches for bound. Refactor the code and adds comments to make it more readable and easier to reason about. Backpatch to 9.3 where SP-GiST support for range types was introduced.
2014-07-16 08:10:54 +02:00
*
* So, we must search left when the argument is smaller than, and not
* adjacent, to the centroid. Otherwise search right.
*------
*/
if (cmp < 0 && !bounds_adjacent(typcache, *arg, *centroid))
return -1;
else
return 1;
}
else
{
/*------
* The argument is a lower bound, we are searching for adjacent upper
* bounds. A matching adjacent upper bound must be *smaller* than the
* argument, but only just.
*
2015-05-24 03:35:49 +02:00
* ARGUMENT CENTROID CMP ADJ
* [500, ...) [..., 499) > (N) [..., 500) is to the right
* [500, ...) [..., 500) > (Y) [..., 500) is to the right
* [500, ...) [..., 501) = (N) [..., 500) is to the left
* [500, ...) [..., 502) < (N) [..., 500) is to the left
Fix bugs in SP-GiST search with range type's -|- (adjacent) operator. The consistent function contained several bugs: * The "if (which2) { ... }" block was broken. It compared the argument's lower bound against centroid's upper bound, while it was supposed to compare the argument's upper bound against the centroid's lower bound (the comment was correct, code was wrong). Also, it cleared bits in the "which1" variable, while it was supposed to clear bits in "which2". * If the argument's upper bound was equal to the centroid's lower bound, we descended to both halves (= all quadrants). That's unnecessary, searching the right quadrants is sufficient. This didn't lead to incorrect query results, but was clearly wrong, and slowed down queries unnecessarily. * In the case that argument's lower bound is adjacent to the centroid's upper bound, we also don't need to visit all quadrants. Per similar reasoning as previous point. * The code where we compare the previous centroid with the current centroid should match the code where we compare the current centroid with the argument. The point of that code is to redo the calculation done in the previous level, to see if we were supposed to traverse left or right (or up or down), and if we actually did. If we moved in the different direction, then we know there are no matches for bound. Refactor the code and adds comments to make it more readable and easier to reason about. Backpatch to 9.3 where SP-GiST support for range types was introduced.
2014-07-16 08:10:54 +02:00
*
* We must search left when the argument is smaller than or equal to
* the centroid. Otherwise search right. We don't need to check
* whether the argument is adjacent with the centroid, because it
* doesn't matter.
*------
*/
if (cmp <= 0)
return -1;
else
return 1;
}
}
/*----------
* adjacent_inner_consistent
*
* Like adjacent_cmp_bounds, but also takes into account the previous
* level's centroid. We might've traversed left (or right) at the previous
* node, in search for ranges adjacent to the other bound, even though we
* already ruled out the possibility for any matches in that direction for
* this bound. By comparing the argument with the previous centroid, and
* the previous centroid with the current centroid, we can determine which
* direction we should've moved in at previous level, and which direction we
* actually moved.
*
* If there can be any matches to the left, returns -1. If to the right,
* returns 1. If there can be no matches below this centroid, because we
* already ruled them out at the previous level, returns 0.
*
* XXX: Comparing just the previous and current level isn't foolproof; we
* might still search some branches unnecessarily. For example, imagine that
* we are searching for value 15, and we traverse the following centroids
* (only considering one bound for the moment):
*
* Level 1: 20
* Level 2: 50
* Level 3: 25
*
* At this point, previous centroid is 50, current centroid is 25, and the
* target value is to the left. But because we already moved right from
* centroid 20 to 50 in the first level, there cannot be any values < 20 in
* the current branch. But we don't know that just by looking at the previous
* and current centroid, so we traverse left, unnecessarily. The reason we are
* down this branch is that we're searching for matches with the *other*
* bound. If we kept track of which bound we are searching for explicitly,
* instead of deducing that from the previous and current centroid, we could
* avoid some unnecessary work.
*----------
*/
static int
adjacent_inner_consistent(TypeCacheEntry *typcache, const RangeBound *arg,
const RangeBound *centroid, const RangeBound *prev)
Fix bugs in SP-GiST search with range type's -|- (adjacent) operator. The consistent function contained several bugs: * The "if (which2) { ... }" block was broken. It compared the argument's lower bound against centroid's upper bound, while it was supposed to compare the argument's upper bound against the centroid's lower bound (the comment was correct, code was wrong). Also, it cleared bits in the "which1" variable, while it was supposed to clear bits in "which2". * If the argument's upper bound was equal to the centroid's lower bound, we descended to both halves (= all quadrants). That's unnecessary, searching the right quadrants is sufficient. This didn't lead to incorrect query results, but was clearly wrong, and slowed down queries unnecessarily. * In the case that argument's lower bound is adjacent to the centroid's upper bound, we also don't need to visit all quadrants. Per similar reasoning as previous point. * The code where we compare the previous centroid with the current centroid should match the code where we compare the current centroid with the argument. The point of that code is to redo the calculation done in the previous level, to see if we were supposed to traverse left or right (or up or down), and if we actually did. If we moved in the different direction, then we know there are no matches for bound. Refactor the code and adds comments to make it more readable and easier to reason about. Backpatch to 9.3 where SP-GiST support for range types was introduced.
2014-07-16 08:10:54 +02:00
{
if (prev)
{
int prevcmp;
int cmp;
/*
* Which direction were we supposed to traverse at previous level,
* left or right?
*/
prevcmp = adjacent_cmp_bounds(typcache, arg, prev);
/* and which direction did we actually go? */
cmp = range_cmp_bounds(typcache, centroid, prev);
/* if the two don't agree, there's nothing to see here */
if ((prevcmp < 0 && cmp >= 0) || (prevcmp > 0 && cmp < 0))
return 0;
}
return adjacent_cmp_bounds(typcache, arg, centroid);
}
/*
* SP-GiST consistent function for leaf nodes: check leaf value against query
* using corresponding function.
*/
Datum
spg_range_quad_leaf_consistent(PG_FUNCTION_ARGS)
{
spgLeafConsistentIn *in = (spgLeafConsistentIn *) PG_GETARG_POINTER(0);
spgLeafConsistentOut *out = (spgLeafConsistentOut *) PG_GETARG_POINTER(1);
RangeType *leafRange = DatumGetRangeTypeP(in->leafDatum);
TypeCacheEntry *typcache;
bool res;
int i;
/* all tests are exact */
out->recheck = false;
/* leafDatum is what it is... */
out->leafValue = in->leafDatum;
typcache = range_get_typcache(fcinfo, RangeTypeGetOid(leafRange));
/* Perform the required comparison(s) */
res = true;
for (i = 0; i < in->nkeys; i++)
{
Datum keyDatum = in->scankeys[i].sk_argument;
/* Call the function corresponding to the scan strategy */
switch (in->scankeys[i].sk_strategy)
{
case RANGESTRAT_BEFORE:
res = range_before_internal(typcache, leafRange,
DatumGetRangeTypeP(keyDatum));
break;
case RANGESTRAT_OVERLEFT:
res = range_overleft_internal(typcache, leafRange,
DatumGetRangeTypeP(keyDatum));
break;
case RANGESTRAT_OVERLAPS:
res = range_overlaps_internal(typcache, leafRange,
DatumGetRangeTypeP(keyDatum));
break;
case RANGESTRAT_OVERRIGHT:
res = range_overright_internal(typcache, leafRange,
DatumGetRangeTypeP(keyDatum));
break;
case RANGESTRAT_AFTER:
res = range_after_internal(typcache, leafRange,
DatumGetRangeTypeP(keyDatum));
break;
case RANGESTRAT_ADJACENT:
res = range_adjacent_internal(typcache, leafRange,
DatumGetRangeTypeP(keyDatum));
break;
case RANGESTRAT_CONTAINS:
res = range_contains_internal(typcache, leafRange,
DatumGetRangeTypeP(keyDatum));
break;
case RANGESTRAT_CONTAINED_BY:
res = range_contained_by_internal(typcache, leafRange,
DatumGetRangeTypeP(keyDatum));
break;
case RANGESTRAT_CONTAINS_ELEM:
res = range_contains_elem_internal(typcache, leafRange,
keyDatum);
break;
case RANGESTRAT_EQ:
res = range_eq_internal(typcache, leafRange,
DatumGetRangeTypeP(keyDatum));
break;
default:
elog(ERROR, "unrecognized range strategy: %d",
in->scankeys[i].sk_strategy);
break;
}
/*
* If leaf datum doesn't match to a query key, no need to check
* subsequent keys.
*/
if (!res)
break;
}
PG_RETURN_BOOL(res);
}