postgresql/contrib/hstore/hstore_gist.c
Peter Eisentraut b8b2e3b2de Replace int2/int4 in C code with int16/int32
The latter was already the dominant use, and it's preferable because
in C the convention is that intXX means XX bits.  Therefore, allowing
mixed use of int2, int4, int8, int16, int32 is obviously confusing.

Remove the typedefs for int2 and int4 for now.  They don't seem to be
widely used outside of the PostgreSQL source tree, and the few uses
can probably be cleaned up by the time this ships.
2012-06-25 01:51:46 +03:00

599 lines
13 KiB
C

/*
* contrib/hstore/hstore_gist.c
*/
#include "postgres.h"
#include "access/gist.h"
#include "access/skey.h"
#include "catalog/pg_type.h"
#include "crc32.h"
#include "hstore.h"
/* bigint defines */
#define BITBYTE 8
#define SIGLENINT 4 /* >122 => key will toast, so very slow!!! */
#define SIGLEN ( sizeof(int)*SIGLENINT )
#define SIGLENBIT (SIGLEN*BITBYTE)
typedef char BITVEC[SIGLEN];
typedef char *BITVECP;
#define SIGPTR(x) ( (BITVECP) ARR_DATA_PTR(x) )
#define LOOPBYTE \
for(i=0;i<SIGLEN;i++)
#define LOOPBIT \
for(i=0;i<SIGLENBIT;i++)
/* beware of multiple evaluation of arguments to these macros! */
#define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITBYTE ) ) )
#define GETBITBYTE(x,i) ( (*((char*)(x)) >> (i)) & 0x01 )
#define CLRBIT(x,i) GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITBYTE ) )
#define SETBIT(x,i) GETBYTE(x,i) |= ( 0x01 << ( (i) % BITBYTE ) )
#define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITBYTE )) & 0x01 )
#define HASHVAL(val) (((unsigned int)(val)) % SIGLENBIT)
#define HASH(sign, val) SETBIT((sign), HASHVAL(val))
typedef struct
{
int32 vl_len_; /* varlena header (do not touch directly!) */
int32 flag;
char data[1];
} GISTTYPE;
#define ALLISTRUE 0x04
#define ISALLTRUE(x) ( ((GISTTYPE*)x)->flag & ALLISTRUE )
#define GTHDRSIZE (VARHDRSZ + sizeof(int32))
#define CALCGTSIZE(flag) ( GTHDRSIZE+(((flag) & ALLISTRUE) ? 0 : SIGLEN) )
#define GETSIGN(x) ( (BITVECP)( (char*)x+GTHDRSIZE ) )
#define SUMBIT(val) ( \
GETBITBYTE((val),0) + \
GETBITBYTE((val),1) + \
GETBITBYTE((val),2) + \
GETBITBYTE((val),3) + \
GETBITBYTE((val),4) + \
GETBITBYTE((val),5) + \
GETBITBYTE((val),6) + \
GETBITBYTE((val),7) \
)
#define GETENTRY(vec,pos) ((GISTTYPE *) DatumGetPointer((vec)->vector[(pos)].key))
#define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
PG_FUNCTION_INFO_V1(ghstore_in);
Datum ghstore_in(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(ghstore_out);
Datum ghstore_out(PG_FUNCTION_ARGS);
Datum
ghstore_in(PG_FUNCTION_ARGS)
{
elog(ERROR, "Not implemented");
PG_RETURN_DATUM(0);
}
Datum
ghstore_out(PG_FUNCTION_ARGS)
{
elog(ERROR, "Not implemented");
PG_RETURN_DATUM(0);
}
PG_FUNCTION_INFO_V1(ghstore_consistent);
PG_FUNCTION_INFO_V1(ghstore_compress);
PG_FUNCTION_INFO_V1(ghstore_decompress);
PG_FUNCTION_INFO_V1(ghstore_penalty);
PG_FUNCTION_INFO_V1(ghstore_picksplit);
PG_FUNCTION_INFO_V1(ghstore_union);
PG_FUNCTION_INFO_V1(ghstore_same);
Datum ghstore_consistent(PG_FUNCTION_ARGS);
Datum ghstore_compress(PG_FUNCTION_ARGS);
Datum ghstore_decompress(PG_FUNCTION_ARGS);
Datum ghstore_penalty(PG_FUNCTION_ARGS);
Datum ghstore_picksplit(PG_FUNCTION_ARGS);
Datum ghstore_union(PG_FUNCTION_ARGS);
Datum ghstore_same(PG_FUNCTION_ARGS);
Datum
ghstore_compress(PG_FUNCTION_ARGS)
{
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
GISTENTRY *retval = entry;
if (entry->leafkey)
{
GISTTYPE *res = (GISTTYPE *) palloc0(CALCGTSIZE(0));
HStore *val = DatumGetHStoreP(entry->key);
HEntry *hsent = ARRPTR(val);
char *ptr = STRPTR(val);
int count = HS_COUNT(val);
int i;
SET_VARSIZE(res, CALCGTSIZE(0));
for (i = 0; i < count; ++i)
{
int h;
h = crc32_sz((char *) HS_KEY(hsent, ptr, i), HS_KEYLEN(hsent, i));
HASH(GETSIGN(res), h);
if (!HS_VALISNULL(hsent, i))
{
h = crc32_sz((char *) HS_VAL(hsent, ptr, i), HS_VALLEN(hsent, i));
HASH(GETSIGN(res), h);
}
}
retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
gistentryinit(*retval, PointerGetDatum(res),
entry->rel, entry->page,
entry->offset,
FALSE);
}
else if (!ISALLTRUE(DatumGetPointer(entry->key)))
{
int32 i;
GISTTYPE *res;
BITVECP sign = GETSIGN(DatumGetPointer(entry->key));
LOOPBYTE
{
if ((sign[i] & 0xff) != 0xff)
PG_RETURN_POINTER(retval);
}
res = (GISTTYPE *) palloc(CALCGTSIZE(ALLISTRUE));
SET_VARSIZE(res, CALCGTSIZE(ALLISTRUE));
res->flag = ALLISTRUE;
retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
gistentryinit(*retval, PointerGetDatum(res),
entry->rel, entry->page,
entry->offset,
FALSE);
}
PG_RETURN_POINTER(retval);
}
/*
* Since type ghstore isn't toastable (and doesn't need to be),
* this function can be a no-op.
*/
Datum
ghstore_decompress(PG_FUNCTION_ARGS)
{
PG_RETURN_POINTER(PG_GETARG_POINTER(0));
}
Datum
ghstore_same(PG_FUNCTION_ARGS)
{
GISTTYPE *a = (GISTTYPE *) PG_GETARG_POINTER(0);
GISTTYPE *b = (GISTTYPE *) PG_GETARG_POINTER(1);
bool *result = (bool *) PG_GETARG_POINTER(2);
if (ISALLTRUE(a) && ISALLTRUE(b))
*result = true;
else if (ISALLTRUE(a))
*result = false;
else if (ISALLTRUE(b))
*result = false;
else
{
int32 i;
BITVECP sa = GETSIGN(a),
sb = GETSIGN(b);
*result = true;
LOOPBYTE
{
if (sa[i] != sb[i])
{
*result = false;
break;
}
}
}
PG_RETURN_POINTER(result);
}
static int32
sizebitvec(BITVECP sign)
{
int32 size = 0,
i;
LOOPBYTE
{
size += SUMBIT(sign);
sign = (BITVECP) (((char *) sign) + 1);
}
return size;
}
static int
hemdistsign(BITVECP a, BITVECP b)
{
int i,
dist = 0;
LOOPBIT
{
if (GETBIT(a, i) != GETBIT(b, i))
dist++;
}
return dist;
}
static int
hemdist(GISTTYPE *a, GISTTYPE *b)
{
if (ISALLTRUE(a))
{
if (ISALLTRUE(b))
return 0;
else
return SIGLENBIT - sizebitvec(GETSIGN(b));
}
else if (ISALLTRUE(b))
return SIGLENBIT - sizebitvec(GETSIGN(a));
return hemdistsign(GETSIGN(a), GETSIGN(b));
}
static int32
unionkey(BITVECP sbase, GISTTYPE *add)
{
int32 i;
BITVECP sadd = GETSIGN(add);
if (ISALLTRUE(add))
return 1;
LOOPBYTE
sbase[i] |= sadd[i];
return 0;
}
Datum
ghstore_union(PG_FUNCTION_ARGS)
{
GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
int32 len = entryvec->n;
int *size = (int *) PG_GETARG_POINTER(1);
BITVEC base;
int32 i;
int32 flag = 0;
GISTTYPE *result;
MemSet((void *) base, 0, sizeof(BITVEC));
for (i = 0; i < len; i++)
{
if (unionkey(base, GETENTRY(entryvec, i)))
{
flag = ALLISTRUE;
break;
}
}
len = CALCGTSIZE(flag);
result = (GISTTYPE *) palloc(len);
SET_VARSIZE(result, len);
result->flag = flag;
if (!ISALLTRUE(result))
memcpy((void *) GETSIGN(result), (void *) base, sizeof(BITVEC));
*size = len;
PG_RETURN_POINTER(result);
}
Datum
ghstore_penalty(PG_FUNCTION_ARGS)
{
GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
float *penalty = (float *) PG_GETARG_POINTER(2);
GISTTYPE *origval = (GISTTYPE *) DatumGetPointer(origentry->key);
GISTTYPE *newval = (GISTTYPE *) DatumGetPointer(newentry->key);
*penalty = hemdist(origval, newval);
PG_RETURN_POINTER(penalty);
}
typedef struct
{
OffsetNumber pos;
int32 cost;
} SPLITCOST;
static int
comparecost(const void *a, const void *b)
{
return ((const SPLITCOST *) a)->cost - ((const SPLITCOST *) b)->cost;
}
Datum
ghstore_picksplit(PG_FUNCTION_ARGS)
{
GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
OffsetNumber maxoff = entryvec->n - 2;
GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
OffsetNumber k,
j;
GISTTYPE *datum_l,
*datum_r;
BITVECP union_l,
union_r;
int32 size_alpha,
size_beta;
int32 size_waste,
waste = -1;
int32 nbytes;
OffsetNumber seed_1 = 0,
seed_2 = 0;
OffsetNumber *left,
*right;
BITVECP ptr;
int i;
SPLITCOST *costvector;
GISTTYPE *_k,
*_j;
nbytes = (maxoff + 2) * sizeof(OffsetNumber);
v->spl_left = (OffsetNumber *) palloc(nbytes);
v->spl_right = (OffsetNumber *) palloc(nbytes);
for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
{
_k = GETENTRY(entryvec, k);
for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
{
size_waste = hemdist(_k, GETENTRY(entryvec, j));
if (size_waste > waste)
{
waste = size_waste;
seed_1 = k;
seed_2 = j;
}
}
}
left = v->spl_left;
v->spl_nleft = 0;
right = v->spl_right;
v->spl_nright = 0;
if (seed_1 == 0 || seed_2 == 0)
{
seed_1 = 1;
seed_2 = 2;
}
/* form initial .. */
if (ISALLTRUE(GETENTRY(entryvec, seed_1)))
{
datum_l = (GISTTYPE *) palloc(GTHDRSIZE);
SET_VARSIZE(datum_l, GTHDRSIZE);
datum_l->flag = ALLISTRUE;
}
else
{
datum_l = (GISTTYPE *) palloc(GTHDRSIZE + SIGLEN);
SET_VARSIZE(datum_l, GTHDRSIZE + SIGLEN);
datum_l->flag = 0;
memcpy((void *) GETSIGN(datum_l), (void *) GETSIGN(GETENTRY(entryvec, seed_1)), sizeof(BITVEC))
;
}
if (ISALLTRUE(GETENTRY(entryvec, seed_2)))
{
datum_r = (GISTTYPE *) palloc(GTHDRSIZE);
SET_VARSIZE(datum_r, GTHDRSIZE);
datum_r->flag = ALLISTRUE;
}
else
{
datum_r = (GISTTYPE *) palloc(GTHDRSIZE + SIGLEN);
SET_VARSIZE(datum_r, GTHDRSIZE + SIGLEN);
datum_r->flag = 0;
memcpy((void *) GETSIGN(datum_r), (void *) GETSIGN(GETENTRY(entryvec, seed_2)), sizeof(BITVEC));
}
maxoff = OffsetNumberNext(maxoff);
/* sort before ... */
costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
{
costvector[j - 1].pos = j;
_j = GETENTRY(entryvec, j);
size_alpha = hemdist(datum_l, _j);
size_beta = hemdist(datum_r, _j);
costvector[j - 1].cost = abs(size_alpha - size_beta);
}
qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
union_l = GETSIGN(datum_l);
union_r = GETSIGN(datum_r);
for (k = 0; k < maxoff; k++)
{
j = costvector[k].pos;
if (j == seed_1)
{
*left++ = j;
v->spl_nleft++;
continue;
}
else if (j == seed_2)
{
*right++ = j;
v->spl_nright++;
continue;
}
_j = GETENTRY(entryvec, j);
size_alpha = hemdist(datum_l, _j);
size_beta = hemdist(datum_r, _j);
if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.0001))
{
if (ISALLTRUE(datum_l) || ISALLTRUE(_j))
{
if (!ISALLTRUE(datum_l))
MemSet((void *) union_l, 0xff, sizeof(BITVEC));
}
else
{
ptr = GETSIGN(_j);
LOOPBYTE
union_l[i] |= ptr[i];
}
*left++ = j;
v->spl_nleft++;
}
else
{
if (ISALLTRUE(datum_r) || ISALLTRUE(_j))
{
if (!ISALLTRUE(datum_r))
MemSet((void *) union_r, 0xff, sizeof(BITVEC));
}
else
{
ptr = GETSIGN(_j);
LOOPBYTE
union_r[i] |= ptr[i];
}
*right++ = j;
v->spl_nright++;
}
}
*right = *left = FirstOffsetNumber;
v->spl_ldatum = PointerGetDatum(datum_l);
v->spl_rdatum = PointerGetDatum(datum_r);
PG_RETURN_POINTER(v);
}
Datum
ghstore_consistent(PG_FUNCTION_ARGS)
{
GISTTYPE *entry = (GISTTYPE *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(0))->key);
StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
/* Oid subtype = PG_GETARG_OID(3); */
bool *recheck = (bool *) PG_GETARG_POINTER(4);
bool res = true;
BITVECP sign;
/* All cases served by this function are inexact */
*recheck = true;
if (ISALLTRUE(entry))
PG_RETURN_BOOL(true);
sign = GETSIGN(entry);
if (strategy == HStoreContainsStrategyNumber ||
strategy == HStoreOldContainsStrategyNumber)
{
HStore *query = PG_GETARG_HS(1);
HEntry *qe = ARRPTR(query);
char *qv = STRPTR(query);
int count = HS_COUNT(query);
int i;
for (i = 0; res && i < count; ++i)
{
int crc = crc32_sz((char *) HS_KEY(qe, qv, i), HS_KEYLEN(qe, i));
if (GETBIT(sign, HASHVAL(crc)))
{
if (!HS_VALISNULL(qe, i))
{
crc = crc32_sz((char *) HS_VAL(qe, qv, i), HS_VALLEN(qe, i));
if (!GETBIT(sign, HASHVAL(crc)))
res = false;
}
}
else
res = false;
}
}
else if (strategy == HStoreExistsStrategyNumber)
{
text *query = PG_GETARG_TEXT_PP(1);
int crc = crc32_sz(VARDATA_ANY(query), VARSIZE_ANY_EXHDR(query));
res = (GETBIT(sign, HASHVAL(crc))) ? true : false;
}
else if (strategy == HStoreExistsAllStrategyNumber)
{
ArrayType *query = PG_GETARG_ARRAYTYPE_P(1);
Datum *key_datums;
bool *key_nulls;
int key_count;
int i;
deconstruct_array(query,
TEXTOID, -1, false, 'i',
&key_datums, &key_nulls, &key_count);
for (i = 0; res && i < key_count; ++i)
{
int crc;
if (key_nulls[i])
continue;
crc = crc32_sz(VARDATA(key_datums[i]), VARSIZE(key_datums[i]) - VARHDRSZ);
if (!(GETBIT(sign, HASHVAL(crc))))
res = FALSE;
}
}
else if (strategy == HStoreExistsAnyStrategyNumber)
{
ArrayType *query = PG_GETARG_ARRAYTYPE_P(1);
Datum *key_datums;
bool *key_nulls;
int key_count;
int i;
deconstruct_array(query,
TEXTOID, -1, false, 'i',
&key_datums, &key_nulls, &key_count);
res = FALSE;
for (i = 0; !res && i < key_count; ++i)
{
int crc;
if (key_nulls[i])
continue;
crc = crc32_sz(VARDATA(key_datums[i]), VARSIZE(key_datums[i]) - VARHDRSZ);
if (GETBIT(sign, HASHVAL(crc)))
res = TRUE;
}
}
else
elog(ERROR, "Unsupported strategy number: %d", strategy);
PG_RETURN_BOOL(res);
}