postgresql/contrib/ltree/_ltree_gist.c

560 lines
12 KiB
C
Raw Normal View History

/*
2010-09-20 22:08:53 +02:00
* contrib/ltree/_ltree_gist.c
*
*
2002-09-04 22:31:48 +02:00
* GiST support for ltree[]
* Teodor Sigaev <teodor@stack.net>
*/
#include "postgres.h"
#include "access/gist.h"
#include "access/stratnum.h"
#include "crc32.h"
#include "ltree.h"
#include "port/pg_bitutils.h"
2002-09-04 22:31:48 +02:00
PG_FUNCTION_INFO_V1(_ltree_compress);
PG_FUNCTION_INFO_V1(_ltree_same);
PG_FUNCTION_INFO_V1(_ltree_union);
PG_FUNCTION_INFO_V1(_ltree_penalty);
PG_FUNCTION_INFO_V1(_ltree_picksplit);
PG_FUNCTION_INFO_V1(_ltree_consistent);
#define GETENTRY(vec,pos) ((ltree_gist *) DatumGetPointer((vec)->vector[(pos)].key))
#define NEXTVAL(x) ( (ltree*)( (char*)(x) + INTALIGN( VARSIZE(x) ) ) )
#define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
static void
hashing(BITVECP sign, ltree *t)
2002-09-04 22:31:48 +02:00
{
int tlen = t->numlevel;
ltree_level *cur = LTREE_FIRST(t);
2002-09-04 22:31:48 +02:00
int hash;
2002-09-04 22:31:48 +02:00
while (tlen > 0)
{
hash = ltree_crc32_sz(cur->name, cur->len);
AHASH(sign, hash);
cur = LEVEL_NEXT(cur);
tlen--;
}
}
2002-09-04 22:31:48 +02:00
Datum
_ltree_compress(PG_FUNCTION_ARGS)
{
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
GISTENTRY *retval = entry;
2002-09-04 22:31:48 +02:00
if (entry->leafkey)
{ /* ltree */
ltree_gist *key;
ArrayType *val = DatumGetArrayTypeP(entry->key);
int32 len = LTG_HDRSIZE + ASIGLEN;
2002-09-04 22:31:48 +02:00
int num = ArrayGetNItems(ARR_NDIM(val), ARR_DIMS(val));
ltree *item = (ltree *) ARR_DATA_PTR(val);
if (ARR_NDIM(val) > 1)
ereport(ERROR,
2004-08-29 07:07:03 +02:00
(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
errmsg("array must be one-dimensional")));
if (array_contains_nulls(val))
ereport(ERROR,
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
errmsg("array must not contain nulls")));
key = (ltree_gist *) palloc0(len);
SET_VARSIZE(key, len);
key->flag = 0;
2005-12-06 19:22:33 +01:00
MemSet(LTG_SIGN(key), 0, ASIGLEN);
2002-09-04 22:31:48 +02:00
while (num > 0)
{
hashing(LTG_SIGN(key), item);
num--;
item = NEXTVAL(item);
}
2002-09-04 22:31:48 +02:00
retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
gistentryinit(*retval, PointerGetDatum(key),
2002-09-04 22:31:48 +02:00
entry->rel, entry->page,
entry->offset, false);
2002-09-04 22:31:48 +02:00
}
2004-08-29 07:07:03 +02:00
else if (!LTG_ISALLTRUE(entry->key))
2002-09-04 22:31:48 +02:00
{
int32 i,
2002-09-04 22:31:48 +02:00
len;
ltree_gist *key;
2002-09-04 22:31:48 +02:00
BITVECP sign = LTG_SIGN(DatumGetPointer(entry->key));
ALOOPBYTE
{
if ((sign[i] & 0xff) != 0xff)
PG_RETURN_POINTER(retval);
}
2002-09-04 22:31:48 +02:00
len = LTG_HDRSIZE;
key = (ltree_gist *) palloc0(len);
SET_VARSIZE(key, len);
key->flag = LTG_ALLTRUE;
2002-09-04 22:31:48 +02:00
retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
gistentryinit(*retval, PointerGetDatum(key),
2002-09-04 22:31:48 +02:00
entry->rel, entry->page,
entry->offset, false);
}
PG_RETURN_POINTER(retval);
}
2002-09-04 22:31:48 +02:00
Datum
_ltree_same(PG_FUNCTION_ARGS)
{
ltree_gist *a = (ltree_gist *) PG_GETARG_POINTER(0);
ltree_gist *b = (ltree_gist *) PG_GETARG_POINTER(1);
bool *result = (bool *) PG_GETARG_POINTER(2);
2002-09-04 22:31:48 +02:00
if (LTG_ISALLTRUE(a) && LTG_ISALLTRUE(b))
*result = true;
2002-09-04 22:31:48 +02:00
else if (LTG_ISALLTRUE(a))
*result = false;
2002-09-04 22:31:48 +02:00
else if (LTG_ISALLTRUE(b))
*result = false;
2002-09-04 22:31:48 +02:00
else
{
int32 i;
2002-09-04 22:31:48 +02:00
BITVECP sa = LTG_SIGN(a),
sb = LTG_SIGN(b);
*result = true;
ALOOPBYTE
{
if (sa[i] != sb[i])
{
*result = false;
break;
}
2002-09-04 22:31:48 +02:00
}
}
PG_RETURN_POINTER(result);
}
static int32
unionkey(BITVECP sbase, ltree_gist *add)
2002-09-04 22:31:48 +02:00
{
int32 i;
2002-09-04 22:31:48 +02:00
BITVECP sadd = LTG_SIGN(add);
2002-09-04 22:31:48 +02:00
if (LTG_ISALLTRUE(add))
return 1;
ALOOPBYTE
sbase[i] |= sadd[i];
return 0;
}
2002-09-04 22:31:48 +02:00
Datum
_ltree_union(PG_FUNCTION_ARGS)
{
2004-08-29 07:07:03 +02:00
GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
2002-09-04 22:31:48 +02:00
int *size = (int *) PG_GETARG_POINTER(1);
ABITVEC base;
int32 i,
2004-08-29 07:07:03 +02:00
len;
int32 flag = 0;
2002-09-04 22:31:48 +02:00
ltree_gist *result;
MemSet((void *) base, 0, sizeof(ABITVEC));
for (i = 0; i < entryvec->n; i++)
2002-09-04 22:31:48 +02:00
{
if (unionkey(base, GETENTRY(entryvec, i)))
{
flag = LTG_ALLTRUE;
break;
}
}
2002-09-04 22:31:48 +02:00
len = LTG_HDRSIZE + ((flag & LTG_ALLTRUE) ? 0 : ASIGLEN);
result = (ltree_gist *) palloc0(len);
SET_VARSIZE(result, len);
result->flag = flag;
2002-09-04 22:31:48 +02:00
if (!LTG_ISALLTRUE(result))
memcpy((void *) LTG_SIGN(result), (void *) base, sizeof(ABITVEC));
*size = len;
2002-09-04 22:31:48 +02:00
PG_RETURN_POINTER(result);
}
static int32
2002-09-04 22:31:48 +02:00
sizebitvec(BITVECP sign)
{
Make use of compiler builtins and/or assembly for CLZ, CTZ, POPCNT. Test for the compiler builtins __builtin_clz, __builtin_ctz, and __builtin_popcount, and make use of these in preference to handwritten C code if they're available. Create src/port infrastructure for "leftmost one", "rightmost one", and "popcount" so as to centralize these decisions. On x86_64, __builtin_popcount generally won't make use of the POPCNT opcode because that's not universally supported yet. Provide code that checks CPUID and then calls POPCNT via asm() if available. This requires indirecting through a function pointer, which is an annoying amount of overhead for a one-instruction operation, but it's probably not worth working harder than this for our current use-cases. I'm not sure we've found all the existing places that could profit from this new infrastructure; but we at least touched all the ones that used copied-and-pasted versions of the bitmapset.c code, and got rid of multiple copies of the associated constant arrays. While at it, replace c-compiler.m4's one-per-builtin-function macros with a single one that can handle all the cases we need to worry about so far. Also, because I'm paranoid, make those checks into AC_LINK checks rather than just AC_COMPILE; the former coding failed to verify that libgcc has support for the builtin, in cases where it's not inline code. David Rowley, Thomas Munro, Alvaro Herrera, Tom Lane Discussion: https://postgr.es/m/CAKJS1f9WTAGG1tPeJnD18hiQW5gAk59fQ6WK-vfdAKEHyRg2RA@mail.gmail.com
2019-02-16 05:22:27 +01:00
return pg_popcount((const char *) sign, ASIGLEN);
}
2004-08-29 07:07:03 +02:00
static int
hemdistsign(BITVECP a, BITVECP b)
{
int i,
diff,
2004-08-29 07:07:03 +02:00
dist = 0;
ALOOPBYTE
{
diff = (unsigned char) (a[i] ^ b[i]);
Make use of compiler builtins and/or assembly for CLZ, CTZ, POPCNT. Test for the compiler builtins __builtin_clz, __builtin_ctz, and __builtin_popcount, and make use of these in preference to handwritten C code if they're available. Create src/port infrastructure for "leftmost one", "rightmost one", and "popcount" so as to centralize these decisions. On x86_64, __builtin_popcount generally won't make use of the POPCNT opcode because that's not universally supported yet. Provide code that checks CPUID and then calls POPCNT via asm() if available. This requires indirecting through a function pointer, which is an annoying amount of overhead for a one-instruction operation, but it's probably not worth working harder than this for our current use-cases. I'm not sure we've found all the existing places that could profit from this new infrastructure; but we at least touched all the ones that used copied-and-pasted versions of the bitmapset.c code, and got rid of multiple copies of the associated constant arrays. While at it, replace c-compiler.m4's one-per-builtin-function macros with a single one that can handle all the cases we need to worry about so far. Also, because I'm paranoid, make those checks into AC_LINK checks rather than just AC_COMPILE; the former coding failed to verify that libgcc has support for the builtin, in cases where it's not inline code. David Rowley, Thomas Munro, Alvaro Herrera, Tom Lane Discussion: https://postgr.es/m/CAKJS1f9WTAGG1tPeJnD18hiQW5gAk59fQ6WK-vfdAKEHyRg2RA@mail.gmail.com
2019-02-16 05:22:27 +01:00
/* Using the popcount functions here isn't likely to win */
dist += pg_number_of_ones[diff];
}
return dist;
}
static int
hemdist(ltree_gist *a, ltree_gist *b)
2004-08-29 07:07:03 +02:00
{
if (LTG_ISALLTRUE(a))
{
if (LTG_ISALLTRUE(b))
return 0;
else
return ASIGLENBIT - sizebitvec(LTG_SIGN(b));
}
else if (LTG_ISALLTRUE(b))
return ASIGLENBIT - sizebitvec(LTG_SIGN(a));
return hemdistsign(LTG_SIGN(a), LTG_SIGN(b));
}
2002-09-04 22:31:48 +02:00
Datum
_ltree_penalty(PG_FUNCTION_ARGS)
{
ltree_gist *origval = (ltree_gist *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(0))->key);
ltree_gist *newval = (ltree_gist *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(1))->key);
float *penalty = (float *) PG_GETARG_POINTER(2);
2004-08-29 07:07:03 +02:00
*penalty = hemdist(origval, newval);
2002-09-04 22:31:48 +02:00
PG_RETURN_POINTER(penalty);
}
2002-09-04 22:31:48 +02:00
typedef struct
{
OffsetNumber pos;
int32 cost;
} SPLITCOST;
static int
2002-09-04 22:31:48 +02:00
comparecost(const void *a, const void *b)
{
return ((const SPLITCOST *) a)->cost - ((const SPLITCOST *) b)->cost;
}
2002-09-04 22:31:48 +02:00
Datum
_ltree_picksplit(PG_FUNCTION_ARGS)
{
2004-08-29 07:07:03 +02:00
GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
2002-09-04 22:31:48 +02:00
GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
OffsetNumber k,
j;
ltree_gist *datum_l,
*datum_r;
BITVECP union_l,
2002-09-04 22:31:48 +02:00
union_r;
int32 size_alpha,
2004-08-29 07:07:03 +02:00
size_beta;
int32 size_waste,
waste = -1;
int32 nbytes;
2002-09-04 22:31:48 +02:00
OffsetNumber seed_1 = 0,
seed_2 = 0;
OffsetNumber *left,
*right;
OffsetNumber maxoff;
BITVECP ptr;
2002-09-04 22:31:48 +02:00
int i;
SPLITCOST *costvector;
ltree_gist *_k,
*_j;
maxoff = entryvec->n - 2;
nbytes = (maxoff + 2) * sizeof(OffsetNumber);
v->spl_left = (OffsetNumber *) palloc(nbytes);
v->spl_right = (OffsetNumber *) palloc(nbytes);
2004-08-29 07:07:03 +02:00
for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
{
2002-09-04 22:31:48 +02:00
_k = GETENTRY(entryvec, k);
2004-08-29 07:07:03 +02:00
for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
{
size_waste = hemdist(_k, GETENTRY(entryvec, j));
if (size_waste > waste)
{
waste = size_waste;
seed_1 = k;
seed_2 = j;
}
}
}
left = v->spl_left;
v->spl_nleft = 0;
right = v->spl_right;
v->spl_nright = 0;
2002-09-04 22:31:48 +02:00
if (seed_1 == 0 || seed_2 == 0)
{
seed_1 = 1;
seed_2 = 2;
}
/* form initial .. */
2002-09-04 22:31:48 +02:00
if (LTG_ISALLTRUE(GETENTRY(entryvec, seed_1)))
{
datum_l = (ltree_gist *) palloc0(LTG_HDRSIZE);
SET_VARSIZE(datum_l, LTG_HDRSIZE);
2002-09-04 22:31:48 +02:00
datum_l->flag = LTG_ALLTRUE;
}
2002-09-04 22:31:48 +02:00
else
{
datum_l = (ltree_gist *) palloc0(LTG_HDRSIZE + ASIGLEN);
SET_VARSIZE(datum_l, LTG_HDRSIZE + ASIGLEN);
2002-09-04 22:31:48 +02:00
datum_l->flag = 0;
memcpy((void *) LTG_SIGN(datum_l), (void *) LTG_SIGN(GETENTRY(entryvec, seed_1)), sizeof(ABITVEC));
}
if (LTG_ISALLTRUE(GETENTRY(entryvec, seed_2)))
{
datum_r = (ltree_gist *) palloc0(LTG_HDRSIZE);
SET_VARSIZE(datum_r, LTG_HDRSIZE);
2002-09-04 22:31:48 +02:00
datum_r->flag = LTG_ALLTRUE;
}
else
{
datum_r = (ltree_gist *) palloc0(LTG_HDRSIZE + ASIGLEN);
SET_VARSIZE(datum_r, LTG_HDRSIZE + ASIGLEN);
2002-09-04 22:31:48 +02:00
datum_r->flag = 0;
memcpy((void *) LTG_SIGN(datum_r), (void *) LTG_SIGN(GETENTRY(entryvec, seed_2)), sizeof(ABITVEC));
}
maxoff = OffsetNumberNext(maxoff);
/* sort before ... */
2002-09-04 22:31:48 +02:00
costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
{
costvector[j - 1].pos = j;
_j = GETENTRY(entryvec, j);
2004-08-29 07:07:03 +02:00
size_alpha = hemdist(datum_l, _j);
size_beta = hemdist(datum_r, _j);
costvector[j - 1].cost = Abs(size_alpha - size_beta);
}
2002-09-04 22:31:48 +02:00
qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
2004-08-29 07:07:03 +02:00
union_l = LTG_SIGN(datum_l);
union_r = LTG_SIGN(datum_r);
2002-09-04 22:31:48 +02:00
for (k = 0; k < maxoff; k++)
{
j = costvector[k].pos;
2002-09-04 22:31:48 +02:00
if (j == seed_1)
{
*left++ = j;
v->spl_nleft++;
continue;
2002-09-04 22:31:48 +02:00
}
else if (j == seed_2)
{
*right++ = j;
v->spl_nright++;
continue;
}
_j = GETENTRY(entryvec, j);
2004-08-29 07:07:03 +02:00
size_alpha = hemdist(datum_l, _j);
size_beta = hemdist(datum_r, _j);
2004-08-29 07:07:03 +02:00
if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.00001))
2002-09-04 22:31:48 +02:00
{
2004-08-29 07:07:03 +02:00
if (LTG_ISALLTRUE(datum_l) || LTG_ISALLTRUE(_j))
{
if (!LTG_ISALLTRUE(datum_l))
2004-08-29 07:07:03 +02:00
MemSet((void *) union_l, 0xff, sizeof(ABITVEC));
}
else
{
ptr = LTG_SIGN(_j);
ALOOPBYTE
union_l[i] |= ptr[i];
}
*left++ = j;
v->spl_nleft++;
2002-09-04 22:31:48 +02:00
}
else
{
2004-08-29 07:07:03 +02:00
if (LTG_ISALLTRUE(datum_r) || LTG_ISALLTRUE(_j))
{
if (!LTG_ISALLTRUE(datum_r))
2004-08-29 07:07:03 +02:00
MemSet((void *) union_r, 0xff, sizeof(ABITVEC));
}
else
{
ptr = LTG_SIGN(_j);
ALOOPBYTE
union_r[i] |= ptr[i];
}
*right++ = j;
v->spl_nright++;
}
}
*right = *left = FirstOffsetNumber;
v->spl_ldatum = PointerGetDatum(datum_l);
v->spl_rdatum = PointerGetDatum(datum_r);
2002-09-04 22:31:48 +02:00
PG_RETURN_POINTER(v);
}
static bool
gist_te(ltree_gist *key, ltree *query)
2002-09-04 22:31:48 +02:00
{
ltree_level *curq = LTREE_FIRST(query);
BITVECP sign = LTG_SIGN(key);
int qlen = query->numlevel;
unsigned int hv;
2002-09-04 22:31:48 +02:00
if (LTG_ISALLTRUE(key))
return true;
2002-09-04 22:31:48 +02:00
while (qlen > 0)
{
hv = ltree_crc32_sz(curq->name, curq->len);
if (!GETBIT(sign, AHASHVAL(hv)))
return false;
curq = LEVEL_NEXT(curq);
qlen--;
}
return true;
}
static bool
checkcondition_bit(void *checkval, ITEM *val)
2002-09-04 22:31:48 +02:00
{
return (FLG_CANLOOKSIGN(val->flag)) ? GETBIT(checkval, AHASHVAL(val->val)) : true;
}
static bool
gist_qtxt(ltree_gist *key, ltxtquery *query)
2002-09-04 22:31:48 +02:00
{
if (LTG_ISALLTRUE(key))
return true;
2002-09-04 22:31:48 +02:00
return ltree_execute(
2002-09-04 22:31:48 +02:00
GETQUERY(query),
(void *) LTG_SIGN(key), false,
checkcondition_bit
);
}
static bool
gist_qe(ltree_gist *key, lquery *query)
2002-09-04 22:31:48 +02:00
{
lquery_level *curq = LQUERY_FIRST(query);
BITVECP sign = LTG_SIGN(key);
int qlen = query->numlevel;
if (LTG_ISALLTRUE(key))
return true;
2002-09-04 22:31:48 +02:00
while (qlen > 0)
{
if (curq->numvar && LQL_CANLOOKSIGN(curq))
{
bool isexist = false;
int vlen = curq->numvar;
lquery_variant *curv = LQL_FIRST(curq);
2002-09-04 22:31:48 +02:00
while (vlen > 0)
{
if (GETBIT(sign, AHASHVAL(curv->val)))
{
isexist = true;
break;
}
curv = LVAR_NEXT(curv);
vlen--;
}
2002-09-04 22:31:48 +02:00
if (!isexist)
return false;
}
2002-09-04 22:31:48 +02:00
curq = LQL_NEXT(curq);
qlen--;
}
return true;
}
static bool
_arrq_cons(ltree_gist *key, ArrayType *_query)
2004-08-29 07:07:03 +02:00
{
lquery *query = (lquery *) ARR_DATA_PTR(_query);
int num = ArrayGetNItems(ARR_NDIM(_query), ARR_DIMS(_query));
if (ARR_NDIM(_query) > 1)
2004-08-29 07:07:03 +02:00
ereport(ERROR,
(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
errmsg("array must be one-dimensional")));
if (array_contains_nulls(_query))
ereport(ERROR,
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
errmsg("array must not contain nulls")));
2004-08-29 07:07:03 +02:00
while (num > 0)
{
if (gist_qe(key, query))
return true;
num--;
query = (lquery *) NEXTVAL(query);
}
return false;
}
2002-09-04 22:31:48 +02:00
Datum
_ltree_consistent(PG_FUNCTION_ARGS)
{
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
void *query = (void *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
/* Oid subtype = PG_GETARG_OID(3); */
bool *recheck = (bool *) PG_GETARG_POINTER(4);
ltree_gist *key = (ltree_gist *) DatumGetPointer(entry->key);
2002-09-04 22:31:48 +02:00
bool res = false;
/* All cases served by this function are inexact */
*recheck = true;
2002-09-04 22:31:48 +02:00
switch (strategy)
{
case 10:
case 11:
2002-09-04 22:31:48 +02:00
res = gist_te(key, (ltree *) query);
break;
case 12:
case 13:
2002-09-04 22:31:48 +02:00
res = gist_qe(key, (lquery *) query);
break;
case 14:
case 15:
2002-09-04 22:31:48 +02:00
res = gist_qtxt(key, (ltxtquery *) query);
break;
case 16:
case 17:
res = _arrq_cons(key, (ArrayType *) query);
break;
default:
/* internal error */
elog(ERROR, "unrecognized StrategyNumber: %d", strategy);
}
2006-10-04 02:30:14 +02:00
PG_FREE_IF_COPY(query, 1);
PG_RETURN_BOOL(res);
}