2007-08-21 03:11:32 +02:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
|
|
|
* tsvector_op.c
|
|
|
|
* operations over tsvector
|
|
|
|
*
|
|
|
|
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
|
|
|
*
|
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
2007-09-11 10:46:29 +02:00
|
|
|
* $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_op.c,v 1.5 2007/09/11 08:46:29 teodor Exp $
|
2007-08-21 03:11:32 +02:00
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "postgres.h"
|
|
|
|
|
|
|
|
#include "catalog/namespace.h"
|
|
|
|
#include "commands/trigger.h"
|
|
|
|
#include "executor/spi.h"
|
|
|
|
#include "funcapi.h"
|
|
|
|
#include "mb/pg_wchar.h"
|
2007-08-31 04:26:29 +02:00
|
|
|
#include "miscadmin.h"
|
2007-08-21 03:11:32 +02:00
|
|
|
#include "tsearch/ts_type.h"
|
|
|
|
#include "tsearch/ts_utils.h"
|
|
|
|
#include "utils/builtins.h"
|
|
|
|
#include "utils/lsyscache.h"
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct
|
|
|
|
{
|
|
|
|
WordEntry *arrb;
|
|
|
|
WordEntry *arre;
|
|
|
|
char *values;
|
|
|
|
char *operand;
|
|
|
|
} CHKVAL;
|
|
|
|
|
|
|
|
typedef struct
|
|
|
|
{
|
|
|
|
uint32 cur;
|
|
|
|
TSVector stat;
|
|
|
|
} StatStorage;
|
|
|
|
|
|
|
|
typedef struct
|
|
|
|
{
|
|
|
|
uint32 len;
|
|
|
|
uint32 pos;
|
|
|
|
uint32 ndoc;
|
|
|
|
uint32 nentry;
|
|
|
|
} StatEntry;
|
|
|
|
|
|
|
|
typedef struct
|
|
|
|
{
|
|
|
|
int32 vl_len_; /* varlena header (do not touch directly!) */
|
|
|
|
int4 size;
|
|
|
|
int4 weight;
|
|
|
|
char data[1];
|
|
|
|
} tsstat;
|
|
|
|
|
|
|
|
#define STATHDRSIZE (sizeof(int4) * 4)
|
|
|
|
#define CALCSTATSIZE(x, lenstr) ( (x) * sizeof(StatEntry) + STATHDRSIZE + (lenstr) )
|
|
|
|
#define STATPTR(x) ( (StatEntry*) ( (char*)(x) + STATHDRSIZE ) )
|
|
|
|
#define STATSTRPTR(x) ( (char*)(x) + STATHDRSIZE + ( sizeof(StatEntry) * ((TSVector)(x))->size ) )
|
|
|
|
#define STATSTRSIZE(x) ( VARSIZE((TSVector)(x)) - STATHDRSIZE - ( sizeof(StatEntry) * ((TSVector)(x))->size ) )
|
|
|
|
|
|
|
|
|
|
|
|
static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column);
|
|
|
|
|
|
|
|
|
2007-09-07 17:09:56 +02:00
|
|
|
/*
|
|
|
|
* Order: haspos, len, word, for all positions (pos, weight)
|
|
|
|
*/
|
2007-08-21 03:11:32 +02:00
|
|
|
static int
|
|
|
|
silly_cmp_tsvector(const TSVector a, const TSVector b)
|
|
|
|
{
|
|
|
|
if (VARSIZE(a) < VARSIZE(b))
|
|
|
|
return -1;
|
|
|
|
else if (VARSIZE(a) > VARSIZE(b))
|
|
|
|
return 1;
|
|
|
|
else if (a->size < b->size)
|
|
|
|
return -1;
|
|
|
|
else if (a->size > b->size)
|
|
|
|
return 1;
|
|
|
|
else
|
|
|
|
{
|
|
|
|
WordEntry *aptr = ARRPTR(a);
|
|
|
|
WordEntry *bptr = ARRPTR(b);
|
|
|
|
int i = 0;
|
|
|
|
int res;
|
|
|
|
|
|
|
|
|
|
|
|
for (i = 0; i < a->size; i++)
|
|
|
|
{
|
|
|
|
if (aptr->haspos != bptr->haspos)
|
|
|
|
{
|
|
|
|
return (aptr->haspos > bptr->haspos) ? -1 : 1;
|
|
|
|
}
|
|
|
|
else if (aptr->len != bptr->len)
|
|
|
|
{
|
|
|
|
return (aptr->len > bptr->len) ? -1 : 1;
|
|
|
|
}
|
|
|
|
else if ((res = strncmp(STRPTR(a) + aptr->pos, STRPTR(b) + bptr->pos, bptr->len)) != 0)
|
|
|
|
{
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
else if (aptr->haspos)
|
|
|
|
{
|
|
|
|
WordEntryPos *ap = POSDATAPTR(a, aptr);
|
|
|
|
WordEntryPos *bp = POSDATAPTR(b, bptr);
|
|
|
|
int j;
|
|
|
|
|
|
|
|
if (POSDATALEN(a, aptr) != POSDATALEN(b, bptr))
|
|
|
|
return (POSDATALEN(a, aptr) > POSDATALEN(b, bptr)) ? -1 : 1;
|
|
|
|
|
|
|
|
for (j = 0; j < POSDATALEN(a, aptr); j++)
|
|
|
|
{
|
|
|
|
if (WEP_GETPOS(*ap) != WEP_GETPOS(*bp))
|
|
|
|
{
|
|
|
|
return (WEP_GETPOS(*ap) > WEP_GETPOS(*bp)) ? -1 : 1;
|
|
|
|
}
|
|
|
|
else if (WEP_GETWEIGHT(*ap) != WEP_GETWEIGHT(*bp))
|
|
|
|
{
|
|
|
|
return (WEP_GETWEIGHT(*ap) > WEP_GETWEIGHT(*bp)) ? -1 : 1;
|
|
|
|
}
|
|
|
|
ap++, bp++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
aptr++;
|
|
|
|
bptr++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
#define TSVECTORCMPFUNC( type, action, ret ) \
|
|
|
|
Datum \
|
|
|
|
tsvector_##type(PG_FUNCTION_ARGS) \
|
|
|
|
{ \
|
|
|
|
TSVector a = PG_GETARG_TSVECTOR(0); \
|
|
|
|
TSVector b = PG_GETARG_TSVECTOR(1); \
|
|
|
|
int res = silly_cmp_tsvector(a, b); \
|
|
|
|
PG_FREE_IF_COPY(a,0); \
|
|
|
|
PG_FREE_IF_COPY(b,1); \
|
|
|
|
PG_RETURN_##ret( res action 0 ); \
|
|
|
|
}
|
|
|
|
|
|
|
|
TSVECTORCMPFUNC(lt, <, BOOL);
|
|
|
|
TSVECTORCMPFUNC(le, <=, BOOL);
|
|
|
|
TSVECTORCMPFUNC(eq, ==, BOOL);
|
|
|
|
TSVECTORCMPFUNC(ge, >=, BOOL);
|
|
|
|
TSVECTORCMPFUNC(gt, >, BOOL);
|
|
|
|
TSVECTORCMPFUNC(ne, !=, BOOL);
|
|
|
|
TSVECTORCMPFUNC(cmp, +, INT32);
|
|
|
|
|
|
|
|
Datum
|
|
|
|
tsvector_strip(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
TSVector in = PG_GETARG_TSVECTOR(0);
|
|
|
|
TSVector out;
|
|
|
|
int i,
|
|
|
|
len = 0;
|
|
|
|
WordEntry *arrin = ARRPTR(in),
|
|
|
|
*arrout;
|
|
|
|
char *cur;
|
|
|
|
|
|
|
|
for (i = 0; i < in->size; i++)
|
2007-09-07 18:03:40 +02:00
|
|
|
len += arrin[i].len;
|
2007-08-21 03:11:32 +02:00
|
|
|
|
|
|
|
len = CALCDATASIZE(in->size, len);
|
|
|
|
out = (TSVector) palloc0(len);
|
|
|
|
SET_VARSIZE(out, len);
|
|
|
|
out->size = in->size;
|
|
|
|
arrout = ARRPTR(out);
|
|
|
|
cur = STRPTR(out);
|
|
|
|
for (i = 0; i < in->size; i++)
|
|
|
|
{
|
|
|
|
memcpy(cur, STRPTR(in) + arrin[i].pos, arrin[i].len);
|
|
|
|
arrout[i].haspos = 0;
|
|
|
|
arrout[i].len = arrin[i].len;
|
|
|
|
arrout[i].pos = cur - STRPTR(out);
|
2007-09-07 18:03:40 +02:00
|
|
|
cur += arrout[i].len;
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
PG_FREE_IF_COPY(in, 0);
|
|
|
|
PG_RETURN_POINTER(out);
|
|
|
|
}
|
|
|
|
|
|
|
|
Datum
|
|
|
|
tsvector_length(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
TSVector in = PG_GETARG_TSVECTOR(0);
|
|
|
|
int4 ret = in->size;
|
|
|
|
|
|
|
|
PG_FREE_IF_COPY(in, 0);
|
|
|
|
PG_RETURN_INT32(ret);
|
|
|
|
}
|
|
|
|
|
|
|
|
Datum
|
|
|
|
tsvector_setweight(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
TSVector in = PG_GETARG_TSVECTOR(0);
|
|
|
|
char cw = PG_GETARG_CHAR(1);
|
|
|
|
TSVector out;
|
|
|
|
int i,
|
|
|
|
j;
|
|
|
|
WordEntry *entry;
|
|
|
|
WordEntryPos *p;
|
|
|
|
int w = 0;
|
|
|
|
|
|
|
|
switch (cw)
|
|
|
|
{
|
|
|
|
case 'A':
|
|
|
|
case 'a':
|
|
|
|
w = 3;
|
|
|
|
break;
|
|
|
|
case 'B':
|
|
|
|
case 'b':
|
|
|
|
w = 2;
|
|
|
|
break;
|
|
|
|
case 'C':
|
|
|
|
case 'c':
|
|
|
|
w = 1;
|
|
|
|
break;
|
|
|
|
case 'D':
|
|
|
|
case 'd':
|
|
|
|
w = 0;
|
|
|
|
break;
|
|
|
|
/* internal error */
|
|
|
|
default:
|
|
|
|
elog(ERROR, "unrecognized weight");
|
|
|
|
}
|
|
|
|
|
|
|
|
out = (TSVector) palloc(VARSIZE(in));
|
|
|
|
memcpy(out, in, VARSIZE(in));
|
|
|
|
entry = ARRPTR(out);
|
|
|
|
i = out->size;
|
|
|
|
while (i--)
|
|
|
|
{
|
|
|
|
if ((j = POSDATALEN(out, entry)) != 0)
|
|
|
|
{
|
|
|
|
p = POSDATAPTR(out, entry);
|
|
|
|
while (j--)
|
|
|
|
{
|
|
|
|
WEP_SETWEIGHT(*p, w);
|
|
|
|
p++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
entry++;
|
|
|
|
}
|
|
|
|
|
|
|
|
PG_FREE_IF_COPY(in, 0);
|
|
|
|
PG_RETURN_POINTER(out);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
compareEntry(char *ptra, WordEntry * a, char *ptrb, WordEntry * b)
|
|
|
|
{
|
|
|
|
if (a->len == b->len)
|
|
|
|
{
|
|
|
|
return strncmp(
|
|
|
|
ptra + a->pos,
|
|
|
|
ptrb + b->pos,
|
|
|
|
a->len);
|
|
|
|
}
|
|
|
|
return (a->len > b->len) ? 1 : -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int4
|
|
|
|
add_pos(TSVector src, WordEntry * srcptr, TSVector dest, WordEntry * destptr, int4 maxpos)
|
|
|
|
{
|
2007-09-11 10:46:29 +02:00
|
|
|
uint16 *clen = &_POSVECPTR(dest, destptr)->npos;
|
2007-08-21 03:11:32 +02:00
|
|
|
int i;
|
|
|
|
uint16 slen = POSDATALEN(src, srcptr),
|
|
|
|
startlen;
|
|
|
|
WordEntryPos *spos = POSDATAPTR(src, srcptr),
|
|
|
|
*dpos = POSDATAPTR(dest, destptr);
|
|
|
|
|
|
|
|
if (!destptr->haspos)
|
|
|
|
*clen = 0;
|
|
|
|
|
|
|
|
startlen = *clen;
|
|
|
|
for (i = 0; i < slen && *clen < MAXNUMPOS && (*clen == 0 || WEP_GETPOS(dpos[*clen - 1]) != MAXENTRYPOS - 1); i++)
|
|
|
|
{
|
|
|
|
WEP_SETWEIGHT(dpos[*clen], WEP_GETWEIGHT(spos[i]));
|
|
|
|
WEP_SETPOS(dpos[*clen], LIMITPOS(WEP_GETPOS(spos[i]) + maxpos));
|
|
|
|
(*clen)++;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (*clen != startlen)
|
|
|
|
destptr->haspos = 1;
|
|
|
|
return *clen - startlen;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
Datum
|
|
|
|
tsvector_concat(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
TSVector in1 = PG_GETARG_TSVECTOR(0);
|
|
|
|
TSVector in2 = PG_GETARG_TSVECTOR(1);
|
|
|
|
TSVector out;
|
|
|
|
WordEntry *ptr;
|
|
|
|
WordEntry *ptr1,
|
|
|
|
*ptr2;
|
|
|
|
WordEntryPos *p;
|
|
|
|
int maxpos = 0,
|
|
|
|
i,
|
|
|
|
j,
|
|
|
|
i1,
|
|
|
|
i2;
|
|
|
|
char *cur;
|
|
|
|
char *data,
|
|
|
|
*data1,
|
|
|
|
*data2;
|
|
|
|
|
|
|
|
ptr = ARRPTR(in1);
|
|
|
|
i = in1->size;
|
|
|
|
while (i--)
|
|
|
|
{
|
|
|
|
if ((j = POSDATALEN(in1, ptr)) != 0)
|
|
|
|
{
|
|
|
|
p = POSDATAPTR(in1, ptr);
|
|
|
|
while (j--)
|
|
|
|
{
|
|
|
|
if (WEP_GETPOS(*p) > maxpos)
|
|
|
|
maxpos = WEP_GETPOS(*p);
|
|
|
|
p++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
ptr++;
|
|
|
|
}
|
|
|
|
|
|
|
|
ptr1 = ARRPTR(in1);
|
|
|
|
ptr2 = ARRPTR(in2);
|
|
|
|
data1 = STRPTR(in1);
|
|
|
|
data2 = STRPTR(in2);
|
|
|
|
i1 = in1->size;
|
|
|
|
i2 = in2->size;
|
|
|
|
out = (TSVector) palloc0(VARSIZE(in1) + VARSIZE(in2));
|
|
|
|
SET_VARSIZE(out, VARSIZE(in1) + VARSIZE(in2));
|
|
|
|
out->size = in1->size + in2->size;
|
|
|
|
data = cur = STRPTR(out);
|
|
|
|
ptr = ARRPTR(out);
|
|
|
|
while (i1 && i2)
|
|
|
|
{
|
|
|
|
int cmp = compareEntry(data1, ptr1, data2, ptr2);
|
|
|
|
|
|
|
|
if (cmp < 0)
|
|
|
|
{ /* in1 first */
|
|
|
|
ptr->haspos = ptr1->haspos;
|
|
|
|
ptr->len = ptr1->len;
|
|
|
|
memcpy(cur, data1 + ptr1->pos, ptr1->len);
|
|
|
|
ptr->pos = cur - data;
|
|
|
|
if (ptr->haspos)
|
|
|
|
{
|
2007-09-07 18:03:40 +02:00
|
|
|
cur += SHORTALIGN(ptr1->len);
|
2007-09-11 10:46:29 +02:00
|
|
|
memcpy(cur, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
|
2007-08-21 03:11:32 +02:00
|
|
|
cur += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
|
|
|
|
}
|
2007-09-07 18:03:40 +02:00
|
|
|
else
|
|
|
|
cur += ptr1->len;
|
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
ptr++;
|
|
|
|
ptr1++;
|
|
|
|
i1--;
|
|
|
|
}
|
|
|
|
else if (cmp > 0)
|
|
|
|
{ /* in2 first */
|
|
|
|
ptr->haspos = ptr2->haspos;
|
|
|
|
ptr->len = ptr2->len;
|
|
|
|
memcpy(cur, data2 + ptr2->pos, ptr2->len);
|
|
|
|
ptr->pos = cur - data;
|
|
|
|
if (ptr->haspos)
|
|
|
|
{
|
|
|
|
int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
|
|
|
|
|
2007-09-07 18:03:40 +02:00
|
|
|
cur += SHORTALIGN(ptr2->len);
|
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
if (addlen == 0)
|
|
|
|
ptr->haspos = 0;
|
|
|
|
else
|
|
|
|
cur += addlen * sizeof(WordEntryPos) + sizeof(uint16);
|
|
|
|
}
|
2007-09-07 18:03:40 +02:00
|
|
|
else
|
|
|
|
cur += ptr2->len;
|
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
ptr++;
|
|
|
|
ptr2++;
|
|
|
|
i2--;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
ptr->haspos = ptr1->haspos | ptr2->haspos;
|
|
|
|
ptr->len = ptr1->len;
|
|
|
|
memcpy(cur, data1 + ptr1->pos, ptr1->len);
|
|
|
|
ptr->pos = cur - data;
|
|
|
|
if (ptr->haspos)
|
|
|
|
{
|
2007-09-07 18:03:40 +02:00
|
|
|
cur += SHORTALIGN(ptr1->len);
|
2007-08-21 03:11:32 +02:00
|
|
|
if (ptr1->haspos)
|
|
|
|
{
|
2007-09-11 10:46:29 +02:00
|
|
|
memcpy(cur, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
|
2007-08-21 03:11:32 +02:00
|
|
|
cur += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
|
|
|
|
if (ptr2->haspos)
|
|
|
|
cur += add_pos(in2, ptr2, out, ptr, maxpos) * sizeof(WordEntryPos);
|
|
|
|
}
|
|
|
|
else if (ptr2->haspos)
|
|
|
|
{
|
|
|
|
int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
|
|
|
|
|
|
|
|
if (addlen == 0)
|
|
|
|
ptr->haspos = 0;
|
|
|
|
else
|
|
|
|
cur += addlen * sizeof(WordEntryPos) + sizeof(uint16);
|
|
|
|
}
|
|
|
|
}
|
2007-09-07 18:03:40 +02:00
|
|
|
else
|
|
|
|
cur += ptr1->len;
|
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
ptr++;
|
|
|
|
ptr1++;
|
|
|
|
ptr2++;
|
|
|
|
i1--;
|
|
|
|
i2--;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
while (i1)
|
|
|
|
{
|
|
|
|
ptr->haspos = ptr1->haspos;
|
|
|
|
ptr->len = ptr1->len;
|
|
|
|
memcpy(cur, data1 + ptr1->pos, ptr1->len);
|
|
|
|
ptr->pos = cur - data;
|
|
|
|
if (ptr->haspos)
|
|
|
|
{
|
2007-09-07 18:03:40 +02:00
|
|
|
cur += SHORTALIGN(ptr1->len);
|
2007-09-11 10:46:29 +02:00
|
|
|
memcpy(cur, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
|
2007-08-21 03:11:32 +02:00
|
|
|
cur += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
|
|
|
|
}
|
2007-09-07 18:03:40 +02:00
|
|
|
else
|
|
|
|
cur += ptr1->len;
|
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
ptr++;
|
|
|
|
ptr1++;
|
|
|
|
i1--;
|
|
|
|
}
|
|
|
|
|
|
|
|
while (i2)
|
|
|
|
{
|
|
|
|
ptr->haspos = ptr2->haspos;
|
|
|
|
ptr->len = ptr2->len;
|
|
|
|
memcpy(cur, data2 + ptr2->pos, ptr2->len);
|
|
|
|
ptr->pos = cur - data;
|
|
|
|
if (ptr->haspos)
|
|
|
|
{
|
|
|
|
int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
|
|
|
|
|
2007-09-07 18:03:40 +02:00
|
|
|
cur += SHORTALIGN(ptr2->len);
|
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
if (addlen == 0)
|
|
|
|
ptr->haspos = 0;
|
|
|
|
else
|
|
|
|
cur += addlen * sizeof(WordEntryPos) + sizeof(uint16);
|
|
|
|
}
|
2007-09-07 18:03:40 +02:00
|
|
|
else
|
|
|
|
cur += ptr2->len;
|
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
ptr++;
|
|
|
|
ptr2++;
|
|
|
|
i2--;
|
|
|
|
}
|
|
|
|
|
|
|
|
out->size = ptr - ARRPTR(out);
|
|
|
|
SET_VARSIZE(out, CALCDATASIZE(out->size, cur - data));
|
|
|
|
if (data != STRPTR(out))
|
|
|
|
memmove(STRPTR(out), data, cur - data);
|
|
|
|
|
|
|
|
PG_FREE_IF_COPY(in1, 0);
|
|
|
|
PG_FREE_IF_COPY(in2, 1);
|
|
|
|
PG_RETURN_POINTER(out);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* compare 2 string values
|
|
|
|
*/
|
|
|
|
static int4
|
2007-09-07 17:09:56 +02:00
|
|
|
ValCompare(CHKVAL * chkval, WordEntry * ptr, QueryOperand * item)
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
|
|
|
if (ptr->len == item->length)
|
|
|
|
return strncmp(
|
|
|
|
&(chkval->values[ptr->pos]),
|
|
|
|
&(chkval->operand[item->distance]),
|
|
|
|
item->length);
|
|
|
|
|
|
|
|
return (ptr->len > item->length) ? 1 : -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* check weight info
|
|
|
|
*/
|
|
|
|
static bool
|
2007-09-11 10:46:29 +02:00
|
|
|
checkclass_str(CHKVAL *chkval, WordEntry *val, QueryOperand *item)
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
2007-09-11 10:46:29 +02:00
|
|
|
WordEntryPosVector *posvec;
|
|
|
|
WordEntryPos *ptr;
|
|
|
|
uint16 len;
|
|
|
|
|
|
|
|
posvec = (WordEntryPosVector *)
|
|
|
|
(chkval->values + SHORTALIGN(val->pos + val->len));
|
|
|
|
|
|
|
|
len = posvec->npos;
|
|
|
|
ptr = posvec->pos;
|
2007-08-21 03:11:32 +02:00
|
|
|
|
|
|
|
while (len--)
|
|
|
|
{
|
|
|
|
if (item->weight & (1 << WEP_GETWEIGHT(*ptr)))
|
|
|
|
return true;
|
|
|
|
ptr++;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* is there value 'val' in array or not ?
|
|
|
|
*/
|
|
|
|
static bool
|
2007-09-07 17:09:56 +02:00
|
|
|
checkcondition_str(void *checkval, QueryOperand * val)
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
2007-09-07 17:09:56 +02:00
|
|
|
CHKVAL *chkval = (CHKVAL *) checkval;
|
|
|
|
WordEntry *StopLow = chkval->arrb;
|
|
|
|
WordEntry *StopHigh = chkval->arre;
|
2007-08-21 03:11:32 +02:00
|
|
|
WordEntry *StopMiddle;
|
|
|
|
int difference;
|
|
|
|
|
|
|
|
/* Loop invariant: StopLow <= val < StopHigh */
|
|
|
|
|
|
|
|
while (StopLow < StopHigh)
|
|
|
|
{
|
|
|
|
StopMiddle = StopLow + (StopHigh - StopLow) / 2;
|
2007-09-07 17:09:56 +02:00
|
|
|
difference = ValCompare(chkval, StopMiddle, val);
|
2007-08-21 03:11:32 +02:00
|
|
|
if (difference == 0)
|
|
|
|
return (val->weight && StopMiddle->haspos) ?
|
2007-09-07 17:09:56 +02:00
|
|
|
checkclass_str(chkval, StopMiddle, val) : true;
|
2007-08-21 03:11:32 +02:00
|
|
|
else if (difference < 0)
|
|
|
|
StopLow = StopMiddle + 1;
|
|
|
|
else
|
|
|
|
StopHigh = StopMiddle;
|
|
|
|
}
|
|
|
|
|
|
|
|
return (false);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2007-09-07 17:09:56 +02:00
|
|
|
* check for boolean condition.
|
|
|
|
*
|
|
|
|
* if calcnot is false, NOT expressions are always evaluated to be true. This is used in ranking.
|
|
|
|
* checkval can be used to pass information to the callback. TS_execute doesn't
|
|
|
|
* do anything with it.
|
|
|
|
* chkcond is a callback function used to evaluate each VAL node in the query.
|
|
|
|
*
|
2007-08-21 03:11:32 +02:00
|
|
|
*/
|
|
|
|
bool
|
2007-08-31 04:26:29 +02:00
|
|
|
TS_execute(QueryItem * curitem, void *checkval, bool calcnot,
|
2007-09-07 17:09:56 +02:00
|
|
|
bool (*chkcond) (void *checkval, QueryOperand * val))
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
2007-08-31 04:26:29 +02:00
|
|
|
/* since this function recurses, it could be driven to stack overflow */
|
|
|
|
check_stack_depth();
|
|
|
|
|
2007-09-07 17:09:56 +02:00
|
|
|
if (curitem->type == QI_VAL)
|
|
|
|
return chkcond(checkval, (QueryOperand *) curitem);
|
|
|
|
|
|
|
|
switch(curitem->operator.oper)
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
2007-09-07 17:09:56 +02:00
|
|
|
case OP_NOT:
|
|
|
|
if (calcnot)
|
|
|
|
return !TS_execute(curitem + 1, checkval, calcnot, chkcond);
|
|
|
|
else
|
|
|
|
return true;
|
|
|
|
case OP_AND:
|
|
|
|
if (TS_execute(curitem + curitem->operator.left, checkval, calcnot, chkcond))
|
|
|
|
return TS_execute(curitem + 1, checkval, calcnot, chkcond);
|
|
|
|
else
|
|
|
|
return false;
|
|
|
|
|
|
|
|
case OP_OR:
|
|
|
|
if (TS_execute(curitem + curitem->operator.left, checkval, calcnot, chkcond))
|
|
|
|
return true;
|
|
|
|
else
|
|
|
|
return TS_execute(curitem + 1, checkval, calcnot, chkcond);
|
|
|
|
|
|
|
|
default:
|
|
|
|
elog(ERROR, "unknown operator %d", curitem->operator.oper);
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
2007-09-07 17:09:56 +02:00
|
|
|
|
|
|
|
/* not reachable, but keep compiler quiet */
|
2007-08-21 03:11:32 +02:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* boolean operations
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
ts_match_qv(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
PG_RETURN_DATUM(DirectFunctionCall2(ts_match_vq,
|
|
|
|
PG_GETARG_DATUM(1),
|
|
|
|
PG_GETARG_DATUM(0)));
|
|
|
|
}
|
|
|
|
|
|
|
|
Datum
|
|
|
|
ts_match_vq(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
TSVector val = PG_GETARG_TSVECTOR(0);
|
|
|
|
TSQuery query = PG_GETARG_TSQUERY(1);
|
|
|
|
CHKVAL chkval;
|
|
|
|
bool result;
|
|
|
|
|
|
|
|
if (!val->size || !query->size)
|
|
|
|
{
|
|
|
|
PG_FREE_IF_COPY(val, 0);
|
|
|
|
PG_FREE_IF_COPY(query, 1);
|
|
|
|
PG_RETURN_BOOL(false);
|
|
|
|
}
|
|
|
|
|
|
|
|
chkval.arrb = ARRPTR(val);
|
|
|
|
chkval.arre = chkval.arrb + val->size;
|
|
|
|
chkval.values = STRPTR(val);
|
|
|
|
chkval.operand = GETOPERAND(query);
|
|
|
|
result = TS_execute(
|
|
|
|
GETQUERY(query),
|
|
|
|
&chkval,
|
|
|
|
true,
|
|
|
|
checkcondition_str
|
|
|
|
);
|
|
|
|
|
|
|
|
PG_FREE_IF_COPY(val, 0);
|
|
|
|
PG_FREE_IF_COPY(query, 1);
|
|
|
|
PG_RETURN_BOOL(result);
|
|
|
|
}
|
|
|
|
|
|
|
|
Datum
|
|
|
|
ts_match_tt(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
TSVector vector;
|
|
|
|
TSQuery query;
|
|
|
|
bool res;
|
|
|
|
|
|
|
|
vector = DatumGetTSVector(DirectFunctionCall1(to_tsvector,
|
|
|
|
PG_GETARG_DATUM(0)));
|
|
|
|
query = DatumGetTSQuery(DirectFunctionCall1(plainto_tsquery,
|
|
|
|
PG_GETARG_DATUM(1)));
|
|
|
|
|
|
|
|
res = DatumGetBool(DirectFunctionCall2(ts_match_vq,
|
|
|
|
TSVectorGetDatum(vector),
|
|
|
|
TSQueryGetDatum(query)));
|
|
|
|
|
|
|
|
pfree(vector);
|
|
|
|
pfree(query);
|
|
|
|
|
|
|
|
PG_RETURN_BOOL(res);
|
|
|
|
}
|
|
|
|
|
|
|
|
Datum
|
|
|
|
ts_match_tq(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
TSVector vector;
|
|
|
|
TSQuery query = PG_GETARG_TSQUERY(1);
|
|
|
|
bool res;
|
|
|
|
|
|
|
|
vector = DatumGetTSVector(DirectFunctionCall1(to_tsvector,
|
|
|
|
PG_GETARG_DATUM(0)));
|
|
|
|
|
|
|
|
res = DatumGetBool(DirectFunctionCall2(ts_match_vq,
|
|
|
|
TSVectorGetDatum(vector),
|
|
|
|
TSQueryGetDatum(query)));
|
|
|
|
|
|
|
|
pfree(vector);
|
|
|
|
PG_FREE_IF_COPY(query, 1);
|
|
|
|
|
|
|
|
PG_RETURN_BOOL(res);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2007-09-11 10:46:29 +02:00
|
|
|
* ts_stat statistic function support
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Returns the number of positions in value 'wptr' within tsvector 'txt',
|
|
|
|
* that have a weight equal to one of the weights in 'weight' bitmask.
|
2007-08-21 03:11:32 +02:00
|
|
|
*/
|
|
|
|
static int
|
|
|
|
check_weight(TSVector txt, WordEntry * wptr, int8 weight)
|
|
|
|
{
|
|
|
|
int len = POSDATALEN(txt, wptr);
|
|
|
|
int num = 0;
|
|
|
|
WordEntryPos *ptr = POSDATAPTR(txt, wptr);
|
|
|
|
|
|
|
|
while (len--)
|
|
|
|
{
|
|
|
|
if (weight & (1 << WEP_GETWEIGHT(*ptr)))
|
|
|
|
num++;
|
|
|
|
ptr++;
|
|
|
|
}
|
|
|
|
return num;
|
|
|
|
}
|
|
|
|
|
|
|
|
static WordEntry **
|
|
|
|
SEI_realloc(WordEntry ** in, uint32 *len)
|
|
|
|
{
|
|
|
|
if (*len == 0 || in == NULL)
|
|
|
|
{
|
|
|
|
*len = 8;
|
|
|
|
in = palloc(sizeof(WordEntry *) * (*len));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
*len *= 2;
|
|
|
|
in = repalloc(in, sizeof(WordEntry *) * (*len));
|
|
|
|
}
|
|
|
|
return in;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
compareStatWord(StatEntry * a, WordEntry * b, tsstat * stat, TSVector txt)
|
|
|
|
{
|
|
|
|
if (a->len == b->len)
|
|
|
|
return strncmp(
|
|
|
|
STATSTRPTR(stat) + a->pos,
|
|
|
|
STRPTR(txt) + b->pos,
|
|
|
|
a->len
|
|
|
|
);
|
|
|
|
return (a->len > b->len) ? 1 : -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static tsstat *
|
|
|
|
formstat(tsstat * stat, TSVector txt, WordEntry ** entry, uint32 len)
|
|
|
|
{
|
|
|
|
tsstat *newstat;
|
|
|
|
uint32 totallen,
|
|
|
|
nentry;
|
|
|
|
uint32 slen = 0;
|
|
|
|
WordEntry **ptr = entry;
|
|
|
|
char *curptr;
|
|
|
|
StatEntry *sptr,
|
|
|
|
*nptr;
|
|
|
|
|
|
|
|
while (ptr - entry < len)
|
|
|
|
{
|
|
|
|
slen += (*ptr)->len;
|
|
|
|
ptr++;
|
|
|
|
}
|
|
|
|
|
|
|
|
nentry = stat->size + len;
|
|
|
|
slen += STATSTRSIZE(stat);
|
|
|
|
totallen = CALCSTATSIZE(nentry, slen);
|
|
|
|
newstat = palloc(totallen);
|
|
|
|
SET_VARSIZE(newstat, totallen);
|
|
|
|
newstat->weight = stat->weight;
|
|
|
|
newstat->size = nentry;
|
|
|
|
|
|
|
|
memcpy(STATSTRPTR(newstat), STATSTRPTR(stat), STATSTRSIZE(stat));
|
|
|
|
curptr = STATSTRPTR(newstat) + STATSTRSIZE(stat);
|
|
|
|
|
|
|
|
ptr = entry;
|
|
|
|
sptr = STATPTR(stat);
|
|
|
|
nptr = STATPTR(newstat);
|
|
|
|
|
|
|
|
if (len == 1)
|
|
|
|
{
|
|
|
|
StatEntry *StopLow = STATPTR(stat);
|
|
|
|
StatEntry *StopHigh = (StatEntry *) STATSTRPTR(stat);
|
|
|
|
|
|
|
|
while (StopLow < StopHigh)
|
|
|
|
{
|
|
|
|
sptr = StopLow + (StopHigh - StopLow) / 2;
|
|
|
|
if (compareStatWord(sptr, *ptr, stat, txt) < 0)
|
|
|
|
StopLow = sptr + 1;
|
|
|
|
else
|
|
|
|
StopHigh = sptr;
|
|
|
|
}
|
|
|
|
nptr = STATPTR(newstat) + (StopLow - STATPTR(stat));
|
|
|
|
memcpy(STATPTR(newstat), STATPTR(stat), sizeof(StatEntry) * (StopLow - STATPTR(stat)));
|
|
|
|
if ((*ptr)->haspos)
|
|
|
|
nptr->nentry = (stat->weight) ? check_weight(txt, *ptr, stat->weight) : POSDATALEN(txt, *ptr);
|
|
|
|
else
|
|
|
|
nptr->nentry = 1;
|
|
|
|
nptr->ndoc = 1;
|
|
|
|
nptr->len = (*ptr)->len;
|
|
|
|
memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
|
|
|
|
nptr->pos = curptr - STATSTRPTR(newstat);
|
|
|
|
memcpy(nptr + 1, StopLow, sizeof(StatEntry) * (((StatEntry *) STATSTRPTR(stat)) - StopLow));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
while (sptr - STATPTR(stat) < stat->size && ptr - entry < len)
|
|
|
|
{
|
|
|
|
if (compareStatWord(sptr, *ptr, stat, txt) < 0)
|
|
|
|
{
|
|
|
|
memcpy(nptr, sptr, sizeof(StatEntry));
|
|
|
|
sptr++;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if ((*ptr)->haspos)
|
|
|
|
nptr->nentry = (stat->weight) ? check_weight(txt, *ptr, stat->weight) : POSDATALEN(txt, *ptr);
|
|
|
|
else
|
|
|
|
nptr->nentry = 1;
|
|
|
|
nptr->ndoc = 1;
|
|
|
|
nptr->len = (*ptr)->len;
|
|
|
|
memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
|
|
|
|
nptr->pos = curptr - STATSTRPTR(newstat);
|
|
|
|
curptr += nptr->len;
|
|
|
|
ptr++;
|
|
|
|
}
|
|
|
|
nptr++;
|
|
|
|
}
|
|
|
|
|
|
|
|
memcpy(nptr, sptr, sizeof(StatEntry) * (stat->size - (sptr - STATPTR(stat))));
|
|
|
|
|
|
|
|
while (ptr - entry < len)
|
|
|
|
{
|
|
|
|
if ((*ptr)->haspos)
|
|
|
|
nptr->nentry = (stat->weight) ? check_weight(txt, *ptr, stat->weight) : POSDATALEN(txt, *ptr);
|
|
|
|
else
|
|
|
|
nptr->nentry = 1;
|
|
|
|
nptr->ndoc = 1;
|
|
|
|
nptr->len = (*ptr)->len;
|
|
|
|
memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
|
|
|
|
nptr->pos = curptr - STATSTRPTR(newstat);
|
|
|
|
curptr += nptr->len;
|
|
|
|
ptr++;
|
|
|
|
nptr++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return newstat;
|
|
|
|
}
|
|
|
|
|
2007-09-11 10:46:29 +02:00
|
|
|
/*
|
|
|
|
* This is written like a custom aggregate function, because the
|
|
|
|
* original plan was to do just that. Unfortunately, an aggregate function
|
|
|
|
* can't return a set, so that plan was abandoned. If that limitation is
|
|
|
|
* lifted in the future, ts_stat could be a real aggregate function so that
|
|
|
|
* you could use it like this:
|
|
|
|
*
|
|
|
|
* SELECT ts_stat(vector_column) FROM vector_table;
|
|
|
|
*
|
|
|
|
* where vector_column is a tsvector-type column in vector_table.
|
|
|
|
*/
|
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
static tsstat *
|
|
|
|
ts_accum(tsstat * stat, Datum data)
|
|
|
|
{
|
|
|
|
tsstat *newstat;
|
|
|
|
TSVector txt = DatumGetTSVector(data);
|
|
|
|
WordEntry **newentry = NULL;
|
|
|
|
uint32 len = 0,
|
|
|
|
cur = 0;
|
|
|
|
StatEntry *sptr;
|
|
|
|
WordEntry *wptr;
|
|
|
|
int n = 0;
|
|
|
|
|
|
|
|
if (stat == NULL)
|
|
|
|
{ /* Init in first */
|
|
|
|
stat = palloc(STATHDRSIZE);
|
|
|
|
SET_VARSIZE(stat, STATHDRSIZE);
|
|
|
|
stat->size = 0;
|
|
|
|
stat->weight = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* simple check of correctness */
|
|
|
|
if (txt == NULL || txt->size == 0)
|
|
|
|
{
|
|
|
|
if (txt != (TSVector) DatumGetPointer(data))
|
|
|
|
pfree(txt);
|
|
|
|
return stat;
|
|
|
|
}
|
|
|
|
|
|
|
|
sptr = STATPTR(stat);
|
|
|
|
wptr = ARRPTR(txt);
|
|
|
|
|
|
|
|
if (stat->size < 100 * txt->size)
|
|
|
|
{ /* merge */
|
|
|
|
while (sptr - STATPTR(stat) < stat->size && wptr - ARRPTR(txt) < txt->size)
|
|
|
|
{
|
|
|
|
int cmp = compareStatWord(sptr, wptr, stat, txt);
|
|
|
|
|
|
|
|
if (cmp < 0)
|
|
|
|
sptr++;
|
|
|
|
else if (cmp == 0)
|
|
|
|
{
|
|
|
|
if (stat->weight == 0)
|
|
|
|
{
|
|
|
|
sptr->ndoc++;
|
|
|
|
sptr->nentry += (wptr->haspos) ? POSDATALEN(txt, wptr) : 1;
|
|
|
|
}
|
|
|
|
else if (wptr->haspos && (n = check_weight(txt, wptr, stat->weight)) != 0)
|
|
|
|
{
|
|
|
|
sptr->ndoc++;
|
|
|
|
sptr->nentry += n;
|
|
|
|
}
|
|
|
|
sptr++;
|
|
|
|
wptr++;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (stat->weight == 0 || check_weight(txt, wptr, stat->weight) != 0)
|
|
|
|
{
|
|
|
|
if (cur == len)
|
|
|
|
newentry = SEI_realloc(newentry, &len);
|
|
|
|
newentry[cur] = wptr;
|
|
|
|
cur++;
|
|
|
|
}
|
|
|
|
wptr++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
while (wptr - ARRPTR(txt) < txt->size)
|
|
|
|
{
|
|
|
|
if (stat->weight == 0 || check_weight(txt, wptr, stat->weight) != 0)
|
|
|
|
{
|
|
|
|
if (cur == len)
|
|
|
|
newentry = SEI_realloc(newentry, &len);
|
|
|
|
newentry[cur] = wptr;
|
|
|
|
cur++;
|
|
|
|
}
|
|
|
|
wptr++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{ /* search */
|
|
|
|
while (wptr - ARRPTR(txt) < txt->size)
|
|
|
|
{
|
|
|
|
StatEntry *StopLow = STATPTR(stat);
|
|
|
|
StatEntry *StopHigh = (StatEntry *) STATSTRPTR(stat);
|
|
|
|
int cmp;
|
|
|
|
|
|
|
|
while (StopLow < StopHigh)
|
|
|
|
{
|
|
|
|
sptr = StopLow + (StopHigh - StopLow) / 2;
|
|
|
|
cmp = compareStatWord(sptr, wptr, stat, txt);
|
|
|
|
if (cmp == 0)
|
|
|
|
{
|
|
|
|
if (stat->weight == 0)
|
|
|
|
{
|
|
|
|
sptr->ndoc++;
|
|
|
|
sptr->nentry += (wptr->haspos) ? POSDATALEN(txt, wptr) : 1;
|
|
|
|
}
|
|
|
|
else if (wptr->haspos && (n = check_weight(txt, wptr, stat->weight)) != 0)
|
|
|
|
{
|
|
|
|
sptr->ndoc++;
|
|
|
|
sptr->nentry += n;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
else if (cmp < 0)
|
|
|
|
StopLow = sptr + 1;
|
|
|
|
else
|
|
|
|
StopHigh = sptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (StopLow >= StopHigh)
|
|
|
|
{ /* not found */
|
|
|
|
if (stat->weight == 0 || check_weight(txt, wptr, stat->weight) != 0)
|
|
|
|
{
|
|
|
|
if (cur == len)
|
|
|
|
newentry = SEI_realloc(newentry, &len);
|
|
|
|
newentry[cur] = wptr;
|
|
|
|
cur++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
wptr++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (cur == 0)
|
|
|
|
{ /* no new words */
|
|
|
|
if (txt != (TSVector) DatumGetPointer(data))
|
|
|
|
pfree(txt);
|
|
|
|
return stat;
|
|
|
|
}
|
|
|
|
|
|
|
|
newstat = formstat(stat, txt, newentry, cur);
|
|
|
|
pfree(newentry);
|
|
|
|
|
|
|
|
if (txt != (TSVector) DatumGetPointer(data))
|
|
|
|
pfree(txt);
|
|
|
|
return newstat;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
ts_setup_firstcall(FunctionCallInfo fcinfo, FuncCallContext *funcctx,
|
|
|
|
tsstat * stat)
|
|
|
|
{
|
|
|
|
TupleDesc tupdesc;
|
|
|
|
MemoryContext oldcontext;
|
|
|
|
StatStorage *st;
|
|
|
|
|
|
|
|
oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
|
|
|
|
st = palloc(sizeof(StatStorage));
|
|
|
|
st->cur = 0;
|
|
|
|
st->stat = palloc(VARSIZE(stat));
|
|
|
|
memcpy(st->stat, stat, VARSIZE(stat));
|
|
|
|
funcctx->user_fctx = (void *) st;
|
|
|
|
|
|
|
|
tupdesc = CreateTemplateTupleDesc(3, false);
|
|
|
|
TupleDescInitEntry(tupdesc, (AttrNumber) 1, "word",
|
|
|
|
TEXTOID, -1, 0);
|
|
|
|
TupleDescInitEntry(tupdesc, (AttrNumber) 2, "ndoc",
|
|
|
|
INT4OID, -1, 0);
|
|
|
|
TupleDescInitEntry(tupdesc, (AttrNumber) 3, "nentry",
|
|
|
|
INT4OID, -1, 0);
|
|
|
|
funcctx->tuple_desc = BlessTupleDesc(tupdesc);
|
|
|
|
funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
|
|
|
|
|
|
|
|
MemoryContextSwitchTo(oldcontext);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static Datum
|
|
|
|
ts_process_call(FuncCallContext *funcctx)
|
|
|
|
{
|
|
|
|
StatStorage *st;
|
|
|
|
|
|
|
|
st = (StatStorage *) funcctx->user_fctx;
|
|
|
|
|
|
|
|
if (st->cur < st->stat->size)
|
|
|
|
{
|
|
|
|
Datum result;
|
|
|
|
char *values[3];
|
|
|
|
char ndoc[16];
|
|
|
|
char nentry[16];
|
|
|
|
StatEntry *entry = STATPTR(st->stat) + st->cur;
|
|
|
|
HeapTuple tuple;
|
|
|
|
|
|
|
|
values[0] = palloc(entry->len + 1);
|
|
|
|
memcpy(values[0], STATSTRPTR(st->stat) + entry->pos, entry->len);
|
|
|
|
(values[0])[entry->len] = '\0';
|
|
|
|
sprintf(ndoc, "%d", entry->ndoc);
|
|
|
|
values[1] = ndoc;
|
|
|
|
sprintf(nentry, "%d", entry->nentry);
|
|
|
|
values[2] = nentry;
|
|
|
|
|
|
|
|
tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
|
|
|
|
result = HeapTupleGetDatum(tuple);
|
|
|
|
|
|
|
|
pfree(values[0]);
|
|
|
|
st->cur++;
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
pfree(st->stat);
|
|
|
|
pfree(st);
|
|
|
|
}
|
|
|
|
|
|
|
|
return (Datum) 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static tsstat *
|
|
|
|
ts_stat_sql(text *txt, text *ws)
|
|
|
|
{
|
|
|
|
char *query = TextPGetCString(txt);
|
|
|
|
int i;
|
|
|
|
tsstat *newstat,
|
|
|
|
*stat;
|
|
|
|
bool isnull;
|
|
|
|
Portal portal;
|
|
|
|
void *plan;
|
|
|
|
|
|
|
|
if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
|
|
|
|
/* internal error */
|
|
|
|
elog(ERROR, "SPI_prepare(\"%s\") failed", query);
|
|
|
|
|
|
|
|
if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, false)) == NULL)
|
|
|
|
/* internal error */
|
|
|
|
elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
|
|
|
|
|
|
|
|
SPI_cursor_fetch(portal, true, 100);
|
|
|
|
|
|
|
|
if (SPI_tuptable->tupdesc->natts != 1 ||
|
|
|
|
SPI_gettypeid(SPI_tuptable->tupdesc, 1) != TSVECTOROID)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
|
|
errmsg("ts_stat query must return one tsvector column")));
|
|
|
|
|
|
|
|
stat = palloc(STATHDRSIZE);
|
|
|
|
SET_VARSIZE(stat, STATHDRSIZE);
|
|
|
|
stat->size = 0;
|
|
|
|
stat->weight = 0;
|
|
|
|
|
|
|
|
if (ws)
|
|
|
|
{
|
|
|
|
char *buf;
|
|
|
|
|
|
|
|
buf = VARDATA(ws);
|
|
|
|
while (buf - VARDATA(ws) < VARSIZE(ws) - VARHDRSZ)
|
|
|
|
{
|
|
|
|
if (pg_mblen(buf) == 1)
|
|
|
|
{
|
|
|
|
switch (*buf)
|
|
|
|
{
|
|
|
|
case 'A':
|
|
|
|
case 'a':
|
|
|
|
stat->weight |= 1 << 3;
|
|
|
|
break;
|
|
|
|
case 'B':
|
|
|
|
case 'b':
|
|
|
|
stat->weight |= 1 << 2;
|
|
|
|
break;
|
|
|
|
case 'C':
|
|
|
|
case 'c':
|
|
|
|
stat->weight |= 1 << 1;
|
|
|
|
break;
|
|
|
|
case 'D':
|
|
|
|
case 'd':
|
|
|
|
stat->weight |= 1;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
stat->weight |= 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
buf += pg_mblen(buf);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
while (SPI_processed > 0)
|
|
|
|
{
|
|
|
|
for (i = 0; i < SPI_processed; i++)
|
|
|
|
{
|
|
|
|
Datum data = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
|
|
|
|
|
|
|
|
if (!isnull)
|
|
|
|
{
|
|
|
|
newstat = ts_accum(stat, data);
|
|
|
|
if (stat != newstat && stat)
|
|
|
|
pfree(stat);
|
|
|
|
stat = newstat;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
SPI_freetuptable(SPI_tuptable);
|
|
|
|
SPI_cursor_fetch(portal, true, 100);
|
|
|
|
}
|
|
|
|
|
|
|
|
SPI_freetuptable(SPI_tuptable);
|
|
|
|
SPI_cursor_close(portal);
|
|
|
|
SPI_freeplan(plan);
|
|
|
|
pfree(query);
|
|
|
|
|
|
|
|
return stat;
|
|
|
|
}
|
|
|
|
|
|
|
|
Datum
|
|
|
|
ts_stat1(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
FuncCallContext *funcctx;
|
|
|
|
Datum result;
|
|
|
|
|
|
|
|
if (SRF_IS_FIRSTCALL())
|
|
|
|
{
|
|
|
|
tsstat *stat;
|
|
|
|
text *txt = PG_GETARG_TEXT_P(0);
|
|
|
|
|
|
|
|
funcctx = SRF_FIRSTCALL_INIT();
|
|
|
|
SPI_connect();
|
|
|
|
stat = ts_stat_sql(txt, NULL);
|
|
|
|
PG_FREE_IF_COPY(txt, 0);
|
|
|
|
ts_setup_firstcall(fcinfo, funcctx, stat);
|
|
|
|
SPI_finish();
|
|
|
|
}
|
|
|
|
|
|
|
|
funcctx = SRF_PERCALL_SETUP();
|
|
|
|
if ((result = ts_process_call(funcctx)) != (Datum) 0)
|
|
|
|
SRF_RETURN_NEXT(funcctx, result);
|
|
|
|
SRF_RETURN_DONE(funcctx);
|
|
|
|
}
|
|
|
|
|
|
|
|
Datum
|
|
|
|
ts_stat2(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
FuncCallContext *funcctx;
|
|
|
|
Datum result;
|
|
|
|
|
|
|
|
if (SRF_IS_FIRSTCALL())
|
|
|
|
{
|
|
|
|
tsstat *stat;
|
|
|
|
text *txt = PG_GETARG_TEXT_P(0);
|
|
|
|
text *ws = PG_GETARG_TEXT_P(1);
|
|
|
|
|
|
|
|
funcctx = SRF_FIRSTCALL_INIT();
|
|
|
|
SPI_connect();
|
|
|
|
stat = ts_stat_sql(txt, ws);
|
|
|
|
PG_FREE_IF_COPY(txt, 0);
|
|
|
|
PG_FREE_IF_COPY(ws, 1);
|
|
|
|
ts_setup_firstcall(fcinfo, funcctx, stat);
|
|
|
|
SPI_finish();
|
|
|
|
}
|
|
|
|
|
|
|
|
funcctx = SRF_PERCALL_SETUP();
|
|
|
|
if ((result = ts_process_call(funcctx)) != (Datum) 0)
|
|
|
|
SRF_RETURN_NEXT(funcctx, result);
|
|
|
|
SRF_RETURN_DONE(funcctx);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* Check if datatype is TEXT or binary-equivalent to it */
|
|
|
|
static bool
|
|
|
|
istexttype(Oid typid)
|
|
|
|
{
|
|
|
|
/* varchar(n) and char(n) are binary-compatible with text */
|
|
|
|
if (typid==TEXTOID || typid==VARCHAROID || typid==BPCHAROID)
|
|
|
|
return true;
|
|
|
|
/* Allow domains over these types, too */
|
|
|
|
typid = getBaseType(typid);
|
|
|
|
if (typid==TEXTOID || typid==VARCHAROID || typid==BPCHAROID)
|
|
|
|
return true;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Triggers for automatic update of a tsvector column from text column(s)
|
|
|
|
*
|
|
|
|
* Trigger arguments are either
|
|
|
|
* name of tsvector col, name of tsconfig to use, name(s) of text col(s)
|
|
|
|
* name of tsvector col, name of regconfig col, name(s) of text col(s)
|
|
|
|
* ie, tsconfig can either be specified by name, or indirectly as the
|
|
|
|
* contents of a regconfig field in the row. If the name is used, it must
|
|
|
|
* be explicitly schema-qualified.
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
tsvector_update_trigger_byid(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
return tsvector_update_trigger(fcinfo, false);
|
|
|
|
}
|
|
|
|
|
|
|
|
Datum
|
|
|
|
tsvector_update_trigger_bycolumn(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
return tsvector_update_trigger(fcinfo, true);
|
|
|
|
}
|
|
|
|
|
|
|
|
static Datum
|
|
|
|
tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column)
|
|
|
|
{
|
|
|
|
TriggerData *trigdata;
|
|
|
|
Trigger *trigger;
|
|
|
|
Relation rel;
|
|
|
|
HeapTuple rettuple = NULL;
|
|
|
|
int tsvector_attr_num,
|
|
|
|
i;
|
|
|
|
ParsedText prs;
|
|
|
|
Datum datum;
|
|
|
|
bool isnull;
|
|
|
|
text *txt;
|
|
|
|
Oid cfgId;
|
|
|
|
|
|
|
|
/* Check call context */
|
|
|
|
if (!CALLED_AS_TRIGGER(fcinfo)) /* internal error */
|
|
|
|
elog(ERROR, "tsvector_update_trigger: not fired by trigger manager");
|
|
|
|
|
|
|
|
trigdata = (TriggerData *) fcinfo->context;
|
|
|
|
if (TRIGGER_FIRED_FOR_STATEMENT(trigdata->tg_event))
|
|
|
|
elog(ERROR, "tsvector_update_trigger: can't process STATEMENT events");
|
|
|
|
if (TRIGGER_FIRED_AFTER(trigdata->tg_event))
|
|
|
|
elog(ERROR, "tsvector_update_trigger: must be fired BEFORE event");
|
|
|
|
|
|
|
|
if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
|
|
|
|
rettuple = trigdata->tg_trigtuple;
|
|
|
|
else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
|
|
|
|
rettuple = trigdata->tg_newtuple;
|
|
|
|
else
|
|
|
|
elog(ERROR, "tsvector_update_trigger: must be fired for INSERT or UPDATE");
|
|
|
|
|
|
|
|
trigger = trigdata->tg_trigger;
|
|
|
|
rel = trigdata->tg_relation;
|
|
|
|
|
|
|
|
if (trigger->tgnargs < 3)
|
|
|
|
elog(ERROR, "tsvector_update_trigger: arguments must be tsvector_field, ts_config, text_field1, ...)");
|
|
|
|
|
|
|
|
/* Find the target tsvector column */
|
|
|
|
tsvector_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
|
|
|
|
if (tsvector_attr_num == SPI_ERROR_NOATTRIBUTE)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_UNDEFINED_COLUMN),
|
|
|
|
errmsg("tsvector column \"%s\" does not exist",
|
|
|
|
trigger->tgargs[0])));
|
|
|
|
if (SPI_gettypeid(rel->rd_att, tsvector_attr_num) != TSVECTOROID)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_DATATYPE_MISMATCH),
|
|
|
|
errmsg("column \"%s\" is not of tsvector type",
|
|
|
|
trigger->tgargs[0])));
|
|
|
|
|
|
|
|
/* Find the configuration to use */
|
|
|
|
if (config_column)
|
|
|
|
{
|
|
|
|
int config_attr_num;
|
|
|
|
|
|
|
|
config_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[1]);
|
|
|
|
if (config_attr_num == SPI_ERROR_NOATTRIBUTE)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_UNDEFINED_COLUMN),
|
|
|
|
errmsg("config column \"%s\" does not exist",
|
|
|
|
trigger->tgargs[1])));
|
|
|
|
if (SPI_gettypeid(rel->rd_att, config_attr_num) != REGCONFIGOID)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_DATATYPE_MISMATCH),
|
|
|
|
errmsg("column \"%s\" is not of regconfig type",
|
|
|
|
trigger->tgargs[1])));
|
|
|
|
|
|
|
|
datum = SPI_getbinval(rettuple, rel->rd_att, config_attr_num, &isnull);
|
|
|
|
if (isnull)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
|
|
|
|
errmsg("config column \"%s\" must not be NULL",
|
|
|
|
trigger->tgargs[1])));
|
|
|
|
cfgId = DatumGetObjectId(datum);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
List *names;
|
|
|
|
|
|
|
|
names = stringToQualifiedNameList(trigger->tgargs[1]);
|
|
|
|
/* require a schema so that results are not search path dependent */
|
|
|
|
if (list_length(names) < 2)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
|
|
errmsg("text search configuration name \"%s\" must be schema-qualified",
|
|
|
|
trigger->tgargs[1])));
|
|
|
|
cfgId = TSConfigGetCfgid(names, false);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* initialize parse state */
|
|
|
|
prs.lenwords = 32;
|
|
|
|
prs.curwords = 0;
|
|
|
|
prs.pos = 0;
|
|
|
|
prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
|
|
|
|
|
|
|
|
/* find all words in indexable column(s) */
|
|
|
|
for (i = 2; i < trigger->tgnargs; i++)
|
|
|
|
{
|
|
|
|
int numattr;
|
|
|
|
|
|
|
|
numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
|
|
|
|
if (numattr == SPI_ERROR_NOATTRIBUTE)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_UNDEFINED_COLUMN),
|
|
|
|
errmsg("column \"%s\" does not exist",
|
|
|
|
trigger->tgargs[i])));
|
|
|
|
if (!istexttype(SPI_gettypeid(rel->rd_att, numattr)))
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_DATATYPE_MISMATCH),
|
|
|
|
errmsg("column \"%s\" is not of character type",
|
|
|
|
trigger->tgargs[i])));
|
|
|
|
|
|
|
|
datum = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
|
|
|
|
if (isnull)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
txt = DatumGetTextP(datum);
|
|
|
|
|
|
|
|
parsetext(cfgId, &prs, VARDATA(txt), VARSIZE(txt) - VARHDRSZ);
|
|
|
|
|
|
|
|
if (txt != (text *) DatumGetPointer(datum))
|
|
|
|
pfree(txt);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* make tsvector value */
|
|
|
|
if (prs.curwords)
|
|
|
|
{
|
|
|
|
datum = PointerGetDatum(make_tsvector(&prs));
|
|
|
|
rettuple = SPI_modifytuple(rel, rettuple, 1, &tsvector_attr_num,
|
|
|
|
&datum, NULL);
|
|
|
|
pfree(DatumGetPointer(datum));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
TSVector out = palloc(CALCDATASIZE(0, 0));
|
|
|
|
|
|
|
|
SET_VARSIZE(out, CALCDATASIZE(0, 0));
|
|
|
|
out->size = 0;
|
|
|
|
datum = PointerGetDatum(out);
|
|
|
|
rettuple = SPI_modifytuple(rel, rettuple, 1, &tsvector_attr_num,
|
|
|
|
&datum, NULL);
|
|
|
|
pfree(prs.words);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (rettuple == NULL) /* internal error */
|
|
|
|
elog(ERROR, "tsvector_update_trigger: %d returned by SPI_modifytuple",
|
|
|
|
SPI_result);
|
|
|
|
|
|
|
|
return PointerGetDatum(rettuple);
|
|
|
|
}
|