postgresql/contrib/ltree/_ltree_gist.c
Tom Lane 02a6a54ecd Make use of compiler builtins and/or assembly for CLZ, CTZ, POPCNT.
Test for the compiler builtins __builtin_clz, __builtin_ctz, and
__builtin_popcount, and make use of these in preference to
handwritten C code if they're available.  Create src/port
infrastructure for "leftmost one", "rightmost one", and "popcount"
so as to centralize these decisions.

On x86_64, __builtin_popcount generally won't make use of the POPCNT
opcode because that's not universally supported yet.  Provide code
that checks CPUID and then calls POPCNT via asm() if available.
This requires indirecting through a function pointer, which is
an annoying amount of overhead for a one-instruction operation,
but it's probably not worth working harder than this for our
current use-cases.

I'm not sure we've found all the existing places that could profit
from this new infrastructure; but we at least touched all the
ones that used copied-and-pasted versions of the bitmapset.c code,
and got rid of multiple copies of the associated constant arrays.

While at it, replace c-compiler.m4's one-per-builtin-function
macros with a single one that can handle all the cases we need
to worry about so far.  Also, because I'm paranoid, make those
checks into AC_LINK checks rather than just AC_COMPILE; the
former coding failed to verify that libgcc has support for the
builtin, in cases where it's not inline code.

David Rowley, Thomas Munro, Alvaro Herrera, Tom Lane

Discussion: https://postgr.es/m/CAKJS1f9WTAGG1tPeJnD18hiQW5gAk59fQ6WK-vfdAKEHyRg2RA@mail.gmail.com
2019-02-15 23:22:33 -05:00

562 lines
12 KiB
C

/*
* contrib/ltree/_ltree_gist.c
*
*
* GiST support for ltree[]
* Teodor Sigaev <teodor@stack.net>
*/
#include "postgres.h"
#include "access/gist.h"
#include "access/stratnum.h"
#include "port/pg_bitutils.h"
#include "crc32.h"
#include "ltree.h"
PG_FUNCTION_INFO_V1(_ltree_compress);
PG_FUNCTION_INFO_V1(_ltree_same);
PG_FUNCTION_INFO_V1(_ltree_union);
PG_FUNCTION_INFO_V1(_ltree_penalty);
PG_FUNCTION_INFO_V1(_ltree_picksplit);
PG_FUNCTION_INFO_V1(_ltree_consistent);
#define GETENTRY(vec,pos) ((ltree_gist *) DatumGetPointer((vec)->vector[(pos)].key))
#define NEXTVAL(x) ( (ltree*)( (char*)(x) + INTALIGN( VARSIZE(x) ) ) )
#define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
static void
hashing(BITVECP sign, ltree *t)
{
int tlen = t->numlevel;
ltree_level *cur = LTREE_FIRST(t);
int hash;
while (tlen > 0)
{
hash = ltree_crc32_sz(cur->name, cur->len);
AHASH(sign, hash);
cur = LEVEL_NEXT(cur);
tlen--;
}
}
Datum
_ltree_compress(PG_FUNCTION_ARGS)
{
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
GISTENTRY *retval = entry;
if (entry->leafkey)
{ /* ltree */
ltree_gist *key;
ArrayType *val = DatumGetArrayTypeP(entry->key);
int32 len = LTG_HDRSIZE + ASIGLEN;
int num = ArrayGetNItems(ARR_NDIM(val), ARR_DIMS(val));
ltree *item = (ltree *) ARR_DATA_PTR(val);
if (ARR_NDIM(val) > 1)
ereport(ERROR,
(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
errmsg("array must be one-dimensional")));
if (array_contains_nulls(val))
ereport(ERROR,
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
errmsg("array must not contain nulls")));
key = (ltree_gist *) palloc0(len);
SET_VARSIZE(key, len);
key->flag = 0;
MemSet(LTG_SIGN(key), 0, ASIGLEN);
while (num > 0)
{
hashing(LTG_SIGN(key), item);
num--;
item = NEXTVAL(item);
}
retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
gistentryinit(*retval, PointerGetDatum(key),
entry->rel, entry->page,
entry->offset, false);
}
else if (!LTG_ISALLTRUE(entry->key))
{
int32 i,
len;
ltree_gist *key;
BITVECP sign = LTG_SIGN(DatumGetPointer(entry->key));
ALOOPBYTE
{
if ((sign[i] & 0xff) != 0xff)
PG_RETURN_POINTER(retval);
}
len = LTG_HDRSIZE;
key = (ltree_gist *) palloc0(len);
SET_VARSIZE(key, len);
key->flag = LTG_ALLTRUE;
retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
gistentryinit(*retval, PointerGetDatum(key),
entry->rel, entry->page,
entry->offset, false);
}
PG_RETURN_POINTER(retval);
}
Datum
_ltree_same(PG_FUNCTION_ARGS)
{
ltree_gist *a = (ltree_gist *) PG_GETARG_POINTER(0);
ltree_gist *b = (ltree_gist *) PG_GETARG_POINTER(1);
bool *result = (bool *) PG_GETARG_POINTER(2);
if (LTG_ISALLTRUE(a) && LTG_ISALLTRUE(b))
*result = true;
else if (LTG_ISALLTRUE(a))
*result = false;
else if (LTG_ISALLTRUE(b))
*result = false;
else
{
int32 i;
BITVECP sa = LTG_SIGN(a),
sb = LTG_SIGN(b);
*result = true;
ALOOPBYTE
{
if (sa[i] != sb[i])
{
*result = false;
break;
}
}
}
PG_RETURN_POINTER(result);
}
static int32
unionkey(BITVECP sbase, ltree_gist *add)
{
int32 i;
BITVECP sadd = LTG_SIGN(add);
if (LTG_ISALLTRUE(add))
return 1;
ALOOPBYTE
sbase[i] |= sadd[i];
return 0;
}
Datum
_ltree_union(PG_FUNCTION_ARGS)
{
GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
int *size = (int *) PG_GETARG_POINTER(1);
ABITVEC base;
int32 i,
len;
int32 flag = 0;
ltree_gist *result;
MemSet((void *) base, 0, sizeof(ABITVEC));
for (i = 0; i < entryvec->n; i++)
{
if (unionkey(base, GETENTRY(entryvec, i)))
{
flag = LTG_ALLTRUE;
break;
}
}
len = LTG_HDRSIZE + ((flag & LTG_ALLTRUE) ? 0 : ASIGLEN);
result = (ltree_gist *) palloc0(len);
SET_VARSIZE(result, len);
result->flag = flag;
if (!LTG_ISALLTRUE(result))
memcpy((void *) LTG_SIGN(result), (void *) base, sizeof(ABITVEC));
*size = len;
PG_RETURN_POINTER(result);
}
static int32
sizebitvec(BITVECP sign)
{
return pg_popcount((const char *) sign, ASIGLEN);
}
static int
hemdistsign(BITVECP a, BITVECP b)
{
int i,
diff,
dist = 0;
ALOOPBYTE
{
diff = (unsigned char) (a[i] ^ b[i]);
/* Using the popcount functions here isn't likely to win */
dist += pg_number_of_ones[diff];
}
return dist;
}
static int
hemdist(ltree_gist *a, ltree_gist *b)
{
if (LTG_ISALLTRUE(a))
{
if (LTG_ISALLTRUE(b))
return 0;
else
return ASIGLENBIT - sizebitvec(LTG_SIGN(b));
}
else if (LTG_ISALLTRUE(b))
return ASIGLENBIT - sizebitvec(LTG_SIGN(a));
return hemdistsign(LTG_SIGN(a), LTG_SIGN(b));
}
Datum
_ltree_penalty(PG_FUNCTION_ARGS)
{
ltree_gist *origval = (ltree_gist *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(0))->key);
ltree_gist *newval = (ltree_gist *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(1))->key);
float *penalty = (float *) PG_GETARG_POINTER(2);
*penalty = hemdist(origval, newval);
PG_RETURN_POINTER(penalty);
}
typedef struct
{
OffsetNumber pos;
int32 cost;
} SPLITCOST;
static int
comparecost(const void *a, const void *b)
{
return ((const SPLITCOST *) a)->cost - ((const SPLITCOST *) b)->cost;
}
Datum
_ltree_picksplit(PG_FUNCTION_ARGS)
{
GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
OffsetNumber k,
j;
ltree_gist *datum_l,
*datum_r;
BITVECP union_l,
union_r;
int32 size_alpha,
size_beta;
int32 size_waste,
waste = -1;
int32 nbytes;
OffsetNumber seed_1 = 0,
seed_2 = 0;
OffsetNumber *left,
*right;
OffsetNumber maxoff;
BITVECP ptr;
int i;
SPLITCOST *costvector;
ltree_gist *_k,
*_j;
maxoff = entryvec->n - 2;
nbytes = (maxoff + 2) * sizeof(OffsetNumber);
v->spl_left = (OffsetNumber *) palloc(nbytes);
v->spl_right = (OffsetNumber *) palloc(nbytes);
for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
{
_k = GETENTRY(entryvec, k);
for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
{
size_waste = hemdist(_k, GETENTRY(entryvec, j));
if (size_waste > waste)
{
waste = size_waste;
seed_1 = k;
seed_2 = j;
}
}
}
left = v->spl_left;
v->spl_nleft = 0;
right = v->spl_right;
v->spl_nright = 0;
if (seed_1 == 0 || seed_2 == 0)
{
seed_1 = 1;
seed_2 = 2;
}
/* form initial .. */
if (LTG_ISALLTRUE(GETENTRY(entryvec, seed_1)))
{
datum_l = (ltree_gist *) palloc0(LTG_HDRSIZE);
SET_VARSIZE(datum_l, LTG_HDRSIZE);
datum_l->flag = LTG_ALLTRUE;
}
else
{
datum_l = (ltree_gist *) palloc0(LTG_HDRSIZE + ASIGLEN);
SET_VARSIZE(datum_l, LTG_HDRSIZE + ASIGLEN);
datum_l->flag = 0;
memcpy((void *) LTG_SIGN(datum_l), (void *) LTG_SIGN(GETENTRY(entryvec, seed_1)), sizeof(ABITVEC));
}
if (LTG_ISALLTRUE(GETENTRY(entryvec, seed_2)))
{
datum_r = (ltree_gist *) palloc0(LTG_HDRSIZE);
SET_VARSIZE(datum_r, LTG_HDRSIZE);
datum_r->flag = LTG_ALLTRUE;
}
else
{
datum_r = (ltree_gist *) palloc0(LTG_HDRSIZE + ASIGLEN);
SET_VARSIZE(datum_r, LTG_HDRSIZE + ASIGLEN);
datum_r->flag = 0;
memcpy((void *) LTG_SIGN(datum_r), (void *) LTG_SIGN(GETENTRY(entryvec, seed_2)), sizeof(ABITVEC));
}
maxoff = OffsetNumberNext(maxoff);
/* sort before ... */
costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
{
costvector[j - 1].pos = j;
_j = GETENTRY(entryvec, j);
size_alpha = hemdist(datum_l, _j);
size_beta = hemdist(datum_r, _j);
costvector[j - 1].cost = Abs(size_alpha - size_beta);
}
qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
union_l = LTG_SIGN(datum_l);
union_r = LTG_SIGN(datum_r);
for (k = 0; k < maxoff; k++)
{
j = costvector[k].pos;
if (j == seed_1)
{
*left++ = j;
v->spl_nleft++;
continue;
}
else if (j == seed_2)
{
*right++ = j;
v->spl_nright++;
continue;
}
_j = GETENTRY(entryvec, j);
size_alpha = hemdist(datum_l, _j);
size_beta = hemdist(datum_r, _j);
if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.00001))
{
if (LTG_ISALLTRUE(datum_l) || LTG_ISALLTRUE(_j))
{
if (!LTG_ISALLTRUE(datum_l))
MemSet((void *) union_l, 0xff, sizeof(ABITVEC));
}
else
{
ptr = LTG_SIGN(_j);
ALOOPBYTE
union_l[i] |= ptr[i];
}
*left++ = j;
v->spl_nleft++;
}
else
{
if (LTG_ISALLTRUE(datum_r) || LTG_ISALLTRUE(_j))
{
if (!LTG_ISALLTRUE(datum_r))
MemSet((void *) union_r, 0xff, sizeof(ABITVEC));
}
else
{
ptr = LTG_SIGN(_j);
ALOOPBYTE
union_r[i] |= ptr[i];
}
*right++ = j;
v->spl_nright++;
}
}
*right = *left = FirstOffsetNumber;
v->spl_ldatum = PointerGetDatum(datum_l);
v->spl_rdatum = PointerGetDatum(datum_r);
PG_RETURN_POINTER(v);
}
static bool
gist_te(ltree_gist *key, ltree *query)
{
ltree_level *curq = LTREE_FIRST(query);
BITVECP sign = LTG_SIGN(key);
int qlen = query->numlevel;
unsigned int hv;
if (LTG_ISALLTRUE(key))
return true;
while (qlen > 0)
{
hv = ltree_crc32_sz(curq->name, curq->len);
if (!GETBIT(sign, AHASHVAL(hv)))
return false;
curq = LEVEL_NEXT(curq);
qlen--;
}
return true;
}
static bool
checkcondition_bit(void *checkval, ITEM *val)
{
return (FLG_CANLOOKSIGN(val->flag)) ? GETBIT(checkval, AHASHVAL(val->val)) : true;
}
static bool
gist_qtxt(ltree_gist *key, ltxtquery *query)
{
if (LTG_ISALLTRUE(key))
return true;
return ltree_execute(
GETQUERY(query),
(void *) LTG_SIGN(key), false,
checkcondition_bit
);
}
static bool
gist_qe(ltree_gist *key, lquery *query)
{
lquery_level *curq = LQUERY_FIRST(query);
BITVECP sign = LTG_SIGN(key);
int qlen = query->numlevel;
if (LTG_ISALLTRUE(key))
return true;
while (qlen > 0)
{
if (curq->numvar && LQL_CANLOOKSIGN(curq))
{
bool isexist = false;
int vlen = curq->numvar;
lquery_variant *curv = LQL_FIRST(curq);
while (vlen > 0)
{
if (GETBIT(sign, AHASHVAL(curv->val)))
{
isexist = true;
break;
}
curv = LVAR_NEXT(curv);
vlen--;
}
if (!isexist)
return false;
}
curq = LQL_NEXT(curq);
qlen--;
}
return true;
}
static bool
_arrq_cons(ltree_gist *key, ArrayType *_query)
{
lquery *query = (lquery *) ARR_DATA_PTR(_query);
int num = ArrayGetNItems(ARR_NDIM(_query), ARR_DIMS(_query));
if (ARR_NDIM(_query) > 1)
ereport(ERROR,
(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
errmsg("array must be one-dimensional")));
if (array_contains_nulls(_query))
ereport(ERROR,
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
errmsg("array must not contain nulls")));
while (num > 0)
{
if (gist_qe(key, query))
return true;
num--;
query = (lquery *) NEXTVAL(query);
}
return false;
}
Datum
_ltree_consistent(PG_FUNCTION_ARGS)
{
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
void *query = (void *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
/* Oid subtype = PG_GETARG_OID(3); */
bool *recheck = (bool *) PG_GETARG_POINTER(4);
ltree_gist *key = (ltree_gist *) DatumGetPointer(entry->key);
bool res = false;
/* All cases served by this function are inexact */
*recheck = true;
switch (strategy)
{
case 10:
case 11:
res = gist_te(key, (ltree *) query);
break;
case 12:
case 13:
res = gist_qe(key, (lquery *) query);
break;
case 14:
case 15:
res = gist_qtxt(key, (ltxtquery *) query);
break;
case 16:
case 17:
res = _arrq_cons(key, (ArrayType *) query);
break;
default:
/* internal error */
elog(ERROR, "unrecognized StrategyNumber: %d", strategy);
}
PG_FREE_IF_COPY(query, 1);
PG_RETURN_BOOL(res);
}