2007-08-21 03:11:32 +02:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
|
|
|
* tsvector_op.c
|
|
|
|
* operations over tsvector
|
|
|
|
*
|
2016-01-02 19:33:40 +01:00
|
|
|
* Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
|
2007-08-21 03:11:32 +02:00
|
|
|
*
|
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
2010-09-20 22:08:53 +02:00
|
|
|
* src/backend/utils/adt/tsvector_op.c
|
2007-08-21 03:11:32 +02:00
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "postgres.h"
|
|
|
|
|
2016-03-11 17:22:36 +01:00
|
|
|
#include "access/htup_details.h"
|
2007-08-21 03:11:32 +02:00
|
|
|
#include "catalog/namespace.h"
|
2009-01-07 14:44:37 +01:00
|
|
|
#include "catalog/pg_type.h"
|
2007-08-21 03:11:32 +02:00
|
|
|
#include "commands/trigger.h"
|
|
|
|
#include "executor/spi.h"
|
|
|
|
#include "funcapi.h"
|
|
|
|
#include "mb/pg_wchar.h"
|
2007-08-31 04:26:29 +02:00
|
|
|
#include "miscadmin.h"
|
2015-09-18 13:32:09 +02:00
|
|
|
#include "parser/parse_coerce.h"
|
2007-08-21 03:11:32 +02:00
|
|
|
#include "tsearch/ts_utils.h"
|
|
|
|
#include "utils/builtins.h"
|
|
|
|
#include "utils/lsyscache.h"
|
2011-02-23 18:18:09 +01:00
|
|
|
#include "utils/rel.h"
|
2007-08-21 03:11:32 +02:00
|
|
|
|
|
|
|
|
|
|
|
typedef struct
|
|
|
|
{
|
|
|
|
WordEntry *arrb;
|
|
|
|
WordEntry *arre;
|
|
|
|
char *values;
|
|
|
|
char *operand;
|
|
|
|
} CHKVAL;
|
|
|
|
|
|
|
|
|
2008-11-17 13:17:09 +01:00
|
|
|
typedef struct StatEntry
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
2015-05-20 15:18:11 +02:00
|
|
|
uint32 ndoc; /* zero indicates that we were already here
|
|
|
|
* while walking through the tree */
|
2007-08-21 03:11:32 +02:00
|
|
|
uint32 nentry;
|
2008-11-17 13:17:09 +01:00
|
|
|
struct StatEntry *left;
|
|
|
|
struct StatEntry *right;
|
|
|
|
uint32 lenlexeme;
|
2015-02-21 22:12:14 +01:00
|
|
|
char lexeme[FLEXIBLE_ARRAY_MEMBER];
|
2007-08-21 03:11:32 +02:00
|
|
|
} StatEntry;
|
|
|
|
|
2008-11-17 13:17:09 +01:00
|
|
|
#define STATENTRYHDRSZ (offsetof(StatEntry, lexeme))
|
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
typedef struct
|
|
|
|
{
|
2012-06-25 00:51:46 +02:00
|
|
|
int32 weight;
|
2007-08-21 03:11:32 +02:00
|
|
|
|
2008-11-17 13:17:09 +01:00
|
|
|
uint32 maxdepth;
|
2009-06-11 16:49:15 +02:00
|
|
|
|
|
|
|
StatEntry **stack;
|
2008-11-17 13:17:09 +01:00
|
|
|
uint32 stackpos;
|
2007-08-21 03:11:32 +02:00
|
|
|
|
2009-06-11 16:49:15 +02:00
|
|
|
StatEntry *root;
|
2008-11-17 13:17:09 +01:00
|
|
|
} TSVectorStat;
|
|
|
|
|
|
|
|
#define STATHDRSIZE (offsetof(TSVectorStat, data))
|
2007-08-21 03:11:32 +02:00
|
|
|
|
|
|
|
static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column);
|
2016-06-10 00:02:36 +02:00
|
|
|
static int tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len);
|
2007-08-21 03:11:32 +02:00
|
|
|
|
2007-09-07 17:09:56 +02:00
|
|
|
/*
|
|
|
|
* Order: haspos, len, word, for all positions (pos, weight)
|
|
|
|
*/
|
2007-08-21 03:11:32 +02:00
|
|
|
static int
|
|
|
|
silly_cmp_tsvector(const TSVector a, const TSVector b)
|
|
|
|
{
|
|
|
|
if (VARSIZE(a) < VARSIZE(b))
|
|
|
|
return -1;
|
|
|
|
else if (VARSIZE(a) > VARSIZE(b))
|
|
|
|
return 1;
|
|
|
|
else if (a->size < b->size)
|
|
|
|
return -1;
|
|
|
|
else if (a->size > b->size)
|
|
|
|
return 1;
|
|
|
|
else
|
|
|
|
{
|
|
|
|
WordEntry *aptr = ARRPTR(a);
|
|
|
|
WordEntry *bptr = ARRPTR(b);
|
|
|
|
int i = 0;
|
|
|
|
int res;
|
|
|
|
|
|
|
|
|
|
|
|
for (i = 0; i < a->size; i++)
|
|
|
|
{
|
|
|
|
if (aptr->haspos != bptr->haspos)
|
|
|
|
{
|
|
|
|
return (aptr->haspos > bptr->haspos) ? -1 : 1;
|
|
|
|
}
|
2009-06-11 16:49:15 +02:00
|
|
|
else if ((res = tsCompareString(STRPTR(a) + aptr->pos, aptr->len, STRPTR(b) + bptr->pos, bptr->len, false)) != 0)
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
else if (aptr->haspos)
|
|
|
|
{
|
|
|
|
WordEntryPos *ap = POSDATAPTR(a, aptr);
|
|
|
|
WordEntryPos *bp = POSDATAPTR(b, bptr);
|
|
|
|
int j;
|
|
|
|
|
|
|
|
if (POSDATALEN(a, aptr) != POSDATALEN(b, bptr))
|
|
|
|
return (POSDATALEN(a, aptr) > POSDATALEN(b, bptr)) ? -1 : 1;
|
|
|
|
|
|
|
|
for (j = 0; j < POSDATALEN(a, aptr); j++)
|
|
|
|
{
|
|
|
|
if (WEP_GETPOS(*ap) != WEP_GETPOS(*bp))
|
|
|
|
{
|
|
|
|
return (WEP_GETPOS(*ap) > WEP_GETPOS(*bp)) ? -1 : 1;
|
|
|
|
}
|
|
|
|
else if (WEP_GETWEIGHT(*ap) != WEP_GETWEIGHT(*bp))
|
|
|
|
{
|
|
|
|
return (WEP_GETWEIGHT(*ap) > WEP_GETWEIGHT(*bp)) ? -1 : 1;
|
|
|
|
}
|
|
|
|
ap++, bp++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
aptr++;
|
|
|
|
bptr++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
#define TSVECTORCMPFUNC( type, action, ret ) \
|
|
|
|
Datum \
|
|
|
|
tsvector_##type(PG_FUNCTION_ARGS) \
|
|
|
|
{ \
|
|
|
|
TSVector a = PG_GETARG_TSVECTOR(0); \
|
|
|
|
TSVector b = PG_GETARG_TSVECTOR(1); \
|
|
|
|
int res = silly_cmp_tsvector(a, b); \
|
|
|
|
PG_FREE_IF_COPY(a,0); \
|
|
|
|
PG_FREE_IF_COPY(b,1); \
|
|
|
|
PG_RETURN_##ret( res action 0 ); \
|
2009-06-11 16:49:15 +02:00
|
|
|
} \
|
2009-05-27 21:41:58 +02:00
|
|
|
/* keep compiler quiet - no extra ; */ \
|
|
|
|
extern int no_such_variable
|
2007-08-21 03:11:32 +02:00
|
|
|
|
|
|
|
TSVECTORCMPFUNC(lt, <, BOOL);
|
|
|
|
TSVECTORCMPFUNC(le, <=, BOOL);
|
|
|
|
TSVECTORCMPFUNC(eq, ==, BOOL);
|
|
|
|
TSVECTORCMPFUNC(ge, >=, BOOL);
|
|
|
|
TSVECTORCMPFUNC(gt, >, BOOL);
|
|
|
|
TSVECTORCMPFUNC(ne, !=, BOOL);
|
|
|
|
TSVECTORCMPFUNC(cmp, +, INT32);
|
|
|
|
|
|
|
|
Datum
|
|
|
|
tsvector_strip(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
TSVector in = PG_GETARG_TSVECTOR(0);
|
|
|
|
TSVector out;
|
|
|
|
int i,
|
|
|
|
len = 0;
|
|
|
|
WordEntry *arrin = ARRPTR(in),
|
|
|
|
*arrout;
|
|
|
|
char *cur;
|
|
|
|
|
|
|
|
for (i = 0; i < in->size; i++)
|
2007-09-07 18:03:40 +02:00
|
|
|
len += arrin[i].len;
|
2007-08-21 03:11:32 +02:00
|
|
|
|
|
|
|
len = CALCDATASIZE(in->size, len);
|
|
|
|
out = (TSVector) palloc0(len);
|
|
|
|
SET_VARSIZE(out, len);
|
|
|
|
out->size = in->size;
|
|
|
|
arrout = ARRPTR(out);
|
|
|
|
cur = STRPTR(out);
|
|
|
|
for (i = 0; i < in->size; i++)
|
|
|
|
{
|
|
|
|
memcpy(cur, STRPTR(in) + arrin[i].pos, arrin[i].len);
|
|
|
|
arrout[i].haspos = 0;
|
|
|
|
arrout[i].len = arrin[i].len;
|
|
|
|
arrout[i].pos = cur - STRPTR(out);
|
2007-09-07 18:03:40 +02:00
|
|
|
cur += arrout[i].len;
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
PG_FREE_IF_COPY(in, 0);
|
|
|
|
PG_RETURN_POINTER(out);
|
|
|
|
}
|
|
|
|
|
|
|
|
Datum
|
|
|
|
tsvector_length(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
TSVector in = PG_GETARG_TSVECTOR(0);
|
2012-06-25 00:51:46 +02:00
|
|
|
int32 ret = in->size;
|
2007-08-21 03:11:32 +02:00
|
|
|
|
|
|
|
PG_FREE_IF_COPY(in, 0);
|
|
|
|
PG_RETURN_INT32(ret);
|
|
|
|
}
|
|
|
|
|
|
|
|
Datum
|
|
|
|
tsvector_setweight(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
TSVector in = PG_GETARG_TSVECTOR(0);
|
|
|
|
char cw = PG_GETARG_CHAR(1);
|
|
|
|
TSVector out;
|
|
|
|
int i,
|
|
|
|
j;
|
|
|
|
WordEntry *entry;
|
|
|
|
WordEntryPos *p;
|
|
|
|
int w = 0;
|
|
|
|
|
|
|
|
switch (cw)
|
|
|
|
{
|
|
|
|
case 'A':
|
|
|
|
case 'a':
|
|
|
|
w = 3;
|
|
|
|
break;
|
|
|
|
case 'B':
|
|
|
|
case 'b':
|
|
|
|
w = 2;
|
|
|
|
break;
|
|
|
|
case 'C':
|
|
|
|
case 'c':
|
|
|
|
w = 1;
|
|
|
|
break;
|
|
|
|
case 'D':
|
|
|
|
case 'd':
|
|
|
|
w = 0;
|
|
|
|
break;
|
|
|
|
default:
|
2007-11-28 22:56:30 +01:00
|
|
|
/* internal error */
|
|
|
|
elog(ERROR, "unrecognized weight: %d", cw);
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
out = (TSVector) palloc(VARSIZE(in));
|
|
|
|
memcpy(out, in, VARSIZE(in));
|
|
|
|
entry = ARRPTR(out);
|
|
|
|
i = out->size;
|
|
|
|
while (i--)
|
|
|
|
{
|
|
|
|
if ((j = POSDATALEN(out, entry)) != 0)
|
|
|
|
{
|
|
|
|
p = POSDATAPTR(out, entry);
|
|
|
|
while (j--)
|
|
|
|
{
|
|
|
|
WEP_SETWEIGHT(*p, w);
|
|
|
|
p++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
entry++;
|
|
|
|
}
|
|
|
|
|
|
|
|
PG_FREE_IF_COPY(in, 0);
|
|
|
|
PG_RETURN_POINTER(out);
|
|
|
|
}
|
|
|
|
|
2016-03-11 17:22:36 +01:00
|
|
|
/*
|
|
|
|
* setweight(tsin tsvector, char_weight "char", lexemes "text"[])
|
|
|
|
*
|
|
|
|
* Assign weight w to elements of tsin that are listed in lexemes.
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
tsvector_setweight_by_filter(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
TSVector tsin = PG_GETARG_TSVECTOR(0);
|
|
|
|
char char_weight = PG_GETARG_CHAR(1);
|
|
|
|
ArrayType *lexemes = PG_GETARG_ARRAYTYPE_P(2);
|
|
|
|
|
|
|
|
TSVector tsout;
|
|
|
|
int i,
|
|
|
|
j,
|
|
|
|
nlexemes,
|
|
|
|
weight;
|
|
|
|
WordEntry *entry;
|
|
|
|
Datum *dlexemes;
|
|
|
|
bool *nulls;
|
|
|
|
|
|
|
|
switch (char_weight)
|
|
|
|
{
|
2016-06-10 00:02:36 +02:00
|
|
|
case 'A':
|
|
|
|
case 'a':
|
2016-03-11 17:22:36 +01:00
|
|
|
weight = 3;
|
|
|
|
break;
|
2016-06-10 00:02:36 +02:00
|
|
|
case 'B':
|
|
|
|
case 'b':
|
2016-03-11 17:22:36 +01:00
|
|
|
weight = 2;
|
|
|
|
break;
|
2016-06-10 00:02:36 +02:00
|
|
|
case 'C':
|
|
|
|
case 'c':
|
2016-03-11 17:22:36 +01:00
|
|
|
weight = 1;
|
|
|
|
break;
|
2016-06-10 00:02:36 +02:00
|
|
|
case 'D':
|
|
|
|
case 'd':
|
2016-03-11 17:22:36 +01:00
|
|
|
weight = 0;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
/* internal error */
|
|
|
|
elog(ERROR, "unrecognized weight: %c", char_weight);
|
|
|
|
}
|
|
|
|
|
|
|
|
tsout = (TSVector) palloc(VARSIZE(tsin));
|
|
|
|
memcpy(tsout, tsin, VARSIZE(tsin));
|
|
|
|
entry = ARRPTR(tsout);
|
|
|
|
|
|
|
|
deconstruct_array(lexemes, TEXTOID, -1, false, 'i',
|
|
|
|
&dlexemes, &nulls, &nlexemes);
|
|
|
|
|
|
|
|
/*
|
2016-06-10 00:02:36 +02:00
|
|
|
* Assuming that lexemes array is significantly shorter than tsvector we
|
|
|
|
* can iterate through lexemes performing binary search of each lexeme
|
|
|
|
* from lexemes in tsvector.
|
2016-03-11 17:22:36 +01:00
|
|
|
*/
|
|
|
|
for (i = 0; i < nlexemes; i++)
|
|
|
|
{
|
2016-06-10 00:02:36 +02:00
|
|
|
char *lex;
|
|
|
|
int lex_len,
|
|
|
|
lex_pos;
|
2016-03-11 17:22:36 +01:00
|
|
|
|
|
|
|
if (nulls[i])
|
|
|
|
ereport(ERROR,
|
2016-08-05 21:14:08 +02:00
|
|
|
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
|
2016-03-11 17:22:36 +01:00
|
|
|
errmsg("lexeme array may not contain nulls")));
|
|
|
|
|
|
|
|
lex = VARDATA(dlexemes[i]);
|
|
|
|
lex_len = VARSIZE_ANY_EXHDR(dlexemes[i]);
|
|
|
|
lex_pos = tsvector_bsearch(tsout, lex, lex_len);
|
|
|
|
|
|
|
|
if (lex_pos >= 0 && (j = POSDATALEN(tsout, entry + lex_pos)) != 0)
|
|
|
|
{
|
|
|
|
WordEntryPos *p = POSDATAPTR(tsout, entry + lex_pos);
|
2016-06-10 00:02:36 +02:00
|
|
|
|
2016-03-11 17:22:36 +01:00
|
|
|
while (j--)
|
|
|
|
{
|
|
|
|
WEP_SETWEIGHT(*p, weight);
|
|
|
|
p++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
PG_FREE_IF_COPY(tsin, 0);
|
|
|
|
PG_FREE_IF_COPY(lexemes, 2);
|
|
|
|
|
|
|
|
PG_RETURN_POINTER(tsout);
|
|
|
|
}
|
|
|
|
|
2008-05-16 18:31:02 +02:00
|
|
|
#define compareEntry(pa, a, pb, b) \
|
|
|
|
tsCompareString((pa) + (a)->pos, (a)->len, \
|
|
|
|
(pb) + (b)->pos, (b)->len, \
|
|
|
|
false)
|
2007-08-21 03:11:32 +02:00
|
|
|
|
2007-10-23 02:51:23 +02:00
|
|
|
/*
|
|
|
|
* Add positions from src to dest after offsetting them by maxpos.
|
|
|
|
* Return the number added (might be less than expected due to overflow)
|
|
|
|
*/
|
2012-06-25 00:51:46 +02:00
|
|
|
static int32
|
2007-11-16 00:23:44 +01:00
|
|
|
add_pos(TSVector src, WordEntry *srcptr,
|
|
|
|
TSVector dest, WordEntry *destptr,
|
2012-06-25 00:51:46 +02:00
|
|
|
int32 maxpos)
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
2007-09-11 10:46:29 +02:00
|
|
|
uint16 *clen = &_POSVECPTR(dest, destptr)->npos;
|
2007-08-21 03:11:32 +02:00
|
|
|
int i;
|
|
|
|
uint16 slen = POSDATALEN(src, srcptr),
|
|
|
|
startlen;
|
|
|
|
WordEntryPos *spos = POSDATAPTR(src, srcptr),
|
|
|
|
*dpos = POSDATAPTR(dest, destptr);
|
|
|
|
|
|
|
|
if (!destptr->haspos)
|
|
|
|
*clen = 0;
|
|
|
|
|
|
|
|
startlen = *clen;
|
2007-10-23 02:51:23 +02:00
|
|
|
for (i = 0;
|
|
|
|
i < slen && *clen < MAXNUMPOS &&
|
2007-11-16 00:23:44 +01:00
|
|
|
(*clen == 0 || WEP_GETPOS(dpos[*clen - 1]) != MAXENTRYPOS - 1);
|
2007-10-23 02:51:23 +02:00
|
|
|
i++)
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
|
|
|
WEP_SETWEIGHT(dpos[*clen], WEP_GETWEIGHT(spos[i]));
|
|
|
|
WEP_SETPOS(dpos[*clen], LIMITPOS(WEP_GETPOS(spos[i]) + maxpos));
|
|
|
|
(*clen)++;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (*clen != startlen)
|
|
|
|
destptr->haspos = 1;
|
|
|
|
return *clen - startlen;
|
|
|
|
}
|
|
|
|
|
2016-03-11 17:22:36 +01:00
|
|
|
/*
|
|
|
|
* Perform binary search of given lexeme in TSVector.
|
|
|
|
* Returns lexeme position in TSVector's entry array or -1 if lexeme wasn't
|
|
|
|
* found.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len)
|
|
|
|
{
|
|
|
|
WordEntry *arrin = ARRPTR(tsv);
|
|
|
|
int StopLow = 0,
|
|
|
|
StopHigh = tsv->size,
|
|
|
|
StopMiddle,
|
|
|
|
cmp;
|
|
|
|
|
|
|
|
while (StopLow < StopHigh)
|
|
|
|
{
|
2016-06-10 00:02:36 +02:00
|
|
|
StopMiddle = (StopLow + StopHigh) / 2;
|
2016-03-11 17:22:36 +01:00
|
|
|
|
|
|
|
cmp = tsCompareString(lexeme, lexeme_len,
|
2016-06-10 00:02:36 +02:00
|
|
|
STRPTR(tsv) + arrin[StopMiddle].pos,
|
|
|
|
arrin[StopMiddle].len,
|
|
|
|
false);
|
2016-03-11 17:22:36 +01:00
|
|
|
|
|
|
|
if (cmp < 0)
|
|
|
|
StopHigh = StopMiddle;
|
|
|
|
else if (cmp > 0)
|
|
|
|
StopLow = StopMiddle + 1;
|
2016-06-10 00:02:36 +02:00
|
|
|
else /* found it */
|
2016-03-11 17:22:36 +01:00
|
|
|
return StopMiddle;
|
|
|
|
}
|
|
|
|
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2016-08-05 22:09:06 +02:00
|
|
|
/*
|
|
|
|
* qsort comparator functions
|
|
|
|
*/
|
|
|
|
|
2016-03-11 17:22:36 +01:00
|
|
|
static int
|
2016-08-05 22:09:06 +02:00
|
|
|
compare_int(const void *va, const void *vb)
|
2016-03-11 17:22:36 +01:00
|
|
|
{
|
2016-08-05 22:09:06 +02:00
|
|
|
int a = *((const int *) va);
|
|
|
|
int b = *((const int *) vb);
|
2016-03-11 17:22:36 +01:00
|
|
|
|
|
|
|
if (a == b)
|
|
|
|
return 0;
|
|
|
|
return (a > b) ? 1 : -1;
|
|
|
|
}
|
|
|
|
|
2016-08-05 22:09:06 +02:00
|
|
|
static int
|
|
|
|
compare_text_lexemes(const void *va, const void *vb)
|
|
|
|
{
|
|
|
|
Datum a = *((const Datum *) va);
|
|
|
|
Datum b = *((const Datum *) vb);
|
|
|
|
char *alex = VARDATA_ANY(a);
|
|
|
|
int alex_len = VARSIZE_ANY_EXHDR(a);
|
|
|
|
char *blex = VARDATA_ANY(b);
|
|
|
|
int blex_len = VARSIZE_ANY_EXHDR(b);
|
|
|
|
|
|
|
|
return tsCompareString(alex, alex_len, blex, blex_len, false);
|
|
|
|
}
|
|
|
|
|
2016-03-11 17:22:36 +01:00
|
|
|
/*
|
|
|
|
* Internal routine to delete lexemes from TSVector by array of offsets.
|
|
|
|
*
|
2016-08-05 21:14:08 +02:00
|
|
|
* int *indices_to_delete -- array of lexeme offsets to delete (modified here!)
|
2016-03-11 17:22:36 +01:00
|
|
|
* int indices_count -- size of that array
|
|
|
|
*
|
|
|
|
* Returns new TSVector without given lexemes along with their positions
|
|
|
|
* and weights.
|
|
|
|
*/
|
|
|
|
static TSVector
|
|
|
|
tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
|
|
|
|
int indices_count)
|
|
|
|
{
|
|
|
|
TSVector tsout;
|
|
|
|
WordEntry *arrin = ARRPTR(tsv),
|
|
|
|
*arrout;
|
|
|
|
char *data = STRPTR(tsv),
|
|
|
|
*dataout;
|
2016-08-05 21:14:08 +02:00
|
|
|
int i, /* index in arrin */
|
|
|
|
j, /* index in arrout */
|
|
|
|
k, /* index in indices_to_delete */
|
|
|
|
curoff; /* index in dataout area */
|
2016-03-11 17:22:36 +01:00
|
|
|
|
|
|
|
/*
|
2016-08-05 21:14:08 +02:00
|
|
|
* Sort the filter array to simplify membership checks below. Also, get
|
|
|
|
* rid of any duplicate entries, so that we can assume that indices_count
|
|
|
|
* is exactly equal to the number of lexemes that will be removed.
|
2016-03-11 17:22:36 +01:00
|
|
|
*/
|
|
|
|
if (indices_count > 1)
|
2016-08-05 21:14:08 +02:00
|
|
|
{
|
|
|
|
int kp;
|
|
|
|
|
2016-08-05 22:09:06 +02:00
|
|
|
qsort(indices_to_delete, indices_count, sizeof(int), compare_int);
|
2016-08-05 21:14:08 +02:00
|
|
|
kp = 0;
|
|
|
|
for (k = 1; k < indices_count; k++)
|
|
|
|
{
|
|
|
|
if (indices_to_delete[k] != indices_to_delete[kp])
|
|
|
|
indices_to_delete[++kp] = indices_to_delete[k];
|
|
|
|
}
|
|
|
|
indices_count = ++kp;
|
|
|
|
}
|
2016-03-11 17:22:36 +01:00
|
|
|
|
|
|
|
/*
|
2016-08-05 21:14:08 +02:00
|
|
|
* Here we overestimate tsout size, since we don't know how much space is
|
|
|
|
* used by the deleted lexeme(s). We will set exact size below.
|
2016-03-11 17:22:36 +01:00
|
|
|
*/
|
2016-08-05 21:14:08 +02:00
|
|
|
tsout = (TSVector) palloc0(VARSIZE(tsv));
|
|
|
|
|
|
|
|
/* This count must be correct because STRPTR(tsout) relies on it. */
|
|
|
|
tsout->size = tsv->size - indices_count;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Copy tsv to tsout, skipping lexemes listed in indices_to_delete.
|
|
|
|
*/
|
|
|
|
arrout = ARRPTR(tsout);
|
2016-03-11 17:22:36 +01:00
|
|
|
dataout = STRPTR(tsout);
|
2016-08-05 21:14:08 +02:00
|
|
|
curoff = 0;
|
2016-03-11 17:22:36 +01:00
|
|
|
for (i = j = k = 0; i < tsv->size; i++)
|
|
|
|
{
|
|
|
|
/*
|
2016-08-05 21:14:08 +02:00
|
|
|
* If current i is present in indices_to_delete, skip this lexeme.
|
|
|
|
* Since indices_to_delete is already sorted, we only need to check
|
|
|
|
* the current (k'th) entry.
|
2016-03-11 17:22:36 +01:00
|
|
|
*/
|
2016-06-10 00:02:36 +02:00
|
|
|
if (k < indices_count && i == indices_to_delete[k])
|
|
|
|
{
|
2016-03-11 17:22:36 +01:00
|
|
|
k++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2016-08-05 21:14:08 +02:00
|
|
|
/* Copy lexeme and its positions and weights */
|
2016-03-11 17:22:36 +01:00
|
|
|
memcpy(dataout + curoff, data + arrin[i].pos, arrin[i].len);
|
|
|
|
arrout[j].haspos = arrin[i].haspos;
|
|
|
|
arrout[j].len = arrin[i].len;
|
|
|
|
arrout[j].pos = curoff;
|
|
|
|
curoff += arrin[i].len;
|
|
|
|
if (arrin[i].haspos)
|
|
|
|
{
|
2016-08-05 21:14:08 +02:00
|
|
|
int len = POSDATALEN(tsv, arrin + i) * sizeof(WordEntryPos)
|
|
|
|
+ sizeof(uint16);
|
2016-06-10 00:02:36 +02:00
|
|
|
|
2016-03-11 17:22:36 +01:00
|
|
|
curoff = SHORTALIGN(curoff);
|
|
|
|
memcpy(dataout + curoff,
|
|
|
|
STRPTR(tsv) + SHORTALIGN(arrin[i].pos + arrin[i].len),
|
|
|
|
len);
|
|
|
|
curoff += len;
|
|
|
|
}
|
|
|
|
|
|
|
|
j++;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2016-08-05 21:14:08 +02:00
|
|
|
* k should now be exactly equal to indices_count. If it isn't then the
|
|
|
|
* caller provided us with indices outside of [0, tsv->size) range and
|
|
|
|
* estimation of tsout's size is wrong.
|
2016-03-11 17:22:36 +01:00
|
|
|
*/
|
|
|
|
Assert(k == indices_count);
|
|
|
|
|
|
|
|
SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, curoff));
|
|
|
|
return tsout;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Delete given lexeme from tsvector.
|
2016-05-06 01:43:32 +02:00
|
|
|
* Implementation of user-level ts_delete(tsvector, text).
|
2016-03-11 17:22:36 +01:00
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
tsvector_delete_str(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
TSVector tsin = PG_GETARG_TSVECTOR(0),
|
|
|
|
tsout;
|
|
|
|
text *tlexeme = PG_GETARG_TEXT_P(1);
|
|
|
|
char *lexeme = VARDATA(tlexeme);
|
|
|
|
int lexeme_len = VARSIZE_ANY_EXHDR(tlexeme),
|
|
|
|
skip_index;
|
|
|
|
|
|
|
|
if ((skip_index = tsvector_bsearch(tsin, lexeme, lexeme_len)) == -1)
|
|
|
|
PG_RETURN_POINTER(tsin);
|
|
|
|
|
|
|
|
tsout = tsvector_delete_by_indices(tsin, &skip_index, 1);
|
|
|
|
|
|
|
|
PG_FREE_IF_COPY(tsin, 0);
|
|
|
|
PG_FREE_IF_COPY(tlexeme, 1);
|
|
|
|
PG_RETURN_POINTER(tsout);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Delete given array of lexemes from tsvector.
|
2016-05-06 01:43:32 +02:00
|
|
|
* Implementation of user-level ts_delete(tsvector, text[]).
|
2016-03-11 17:22:36 +01:00
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
tsvector_delete_arr(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
TSVector tsin = PG_GETARG_TSVECTOR(0),
|
|
|
|
tsout;
|
|
|
|
ArrayType *lexemes = PG_GETARG_ARRAYTYPE_P(1);
|
2016-06-10 00:02:36 +02:00
|
|
|
int i,
|
|
|
|
nlex,
|
2016-03-11 17:22:36 +01:00
|
|
|
skip_count,
|
|
|
|
*skip_indices;
|
|
|
|
Datum *dlexemes;
|
|
|
|
bool *nulls;
|
|
|
|
|
|
|
|
deconstruct_array(lexemes, TEXTOID, -1, false, 'i',
|
|
|
|
&dlexemes, &nulls, &nlex);
|
|
|
|
|
|
|
|
/*
|
2016-06-10 00:02:36 +02:00
|
|
|
* In typical use case array of lexemes to delete is relatively small. So
|
2016-08-05 21:14:08 +02:00
|
|
|
* here we optimize things for that scenario: iterate through lexarr
|
2016-03-11 17:22:36 +01:00
|
|
|
* performing binary search of each lexeme from lexarr in tsvector.
|
|
|
|
*/
|
|
|
|
skip_indices = palloc0(nlex * sizeof(int));
|
|
|
|
for (i = skip_count = 0; i < nlex; i++)
|
|
|
|
{
|
2016-06-10 00:02:36 +02:00
|
|
|
char *lex;
|
|
|
|
int lex_len,
|
|
|
|
lex_pos;
|
2016-03-11 17:22:36 +01:00
|
|
|
|
|
|
|
if (nulls[i])
|
|
|
|
ereport(ERROR,
|
2016-08-05 21:14:08 +02:00
|
|
|
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
|
2016-03-11 17:22:36 +01:00
|
|
|
errmsg("lexeme array may not contain nulls")));
|
|
|
|
|
2016-08-05 21:14:08 +02:00
|
|
|
lex = VARDATA_ANY(dlexemes[i]);
|
2016-03-11 17:22:36 +01:00
|
|
|
lex_len = VARSIZE_ANY_EXHDR(dlexemes[i]);
|
|
|
|
lex_pos = tsvector_bsearch(tsin, lex, lex_len);
|
|
|
|
|
|
|
|
if (lex_pos >= 0)
|
|
|
|
skip_indices[skip_count++] = lex_pos;
|
|
|
|
}
|
|
|
|
|
|
|
|
tsout = tsvector_delete_by_indices(tsin, skip_indices, skip_count);
|
|
|
|
|
|
|
|
pfree(skip_indices);
|
|
|
|
PG_FREE_IF_COPY(tsin, 0);
|
|
|
|
PG_FREE_IF_COPY(lexemes, 1);
|
|
|
|
|
|
|
|
PG_RETURN_POINTER(tsout);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Expand tsvector as table with following columns:
|
2016-06-10 00:02:36 +02:00
|
|
|
* lexeme: lexeme text
|
|
|
|
* positions: integer array of lexeme positions
|
|
|
|
* weights: char array of weights corresponding to positions
|
2016-03-11 17:22:36 +01:00
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
tsvector_unnest(PG_FUNCTION_ARGS)
|
|
|
|
{
|
2016-06-10 00:02:36 +02:00
|
|
|
FuncCallContext *funcctx;
|
|
|
|
TSVector tsin;
|
2016-03-11 17:22:36 +01:00
|
|
|
|
|
|
|
if (SRF_IS_FIRSTCALL())
|
|
|
|
{
|
|
|
|
MemoryContext oldcontext;
|
|
|
|
TupleDesc tupdesc;
|
|
|
|
|
|
|
|
funcctx = SRF_FIRSTCALL_INIT();
|
|
|
|
oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
|
|
|
|
|
|
|
|
tupdesc = CreateTemplateTupleDesc(3, false);
|
|
|
|
TupleDescInitEntry(tupdesc, (AttrNumber) 1, "lexeme",
|
|
|
|
TEXTOID, -1, 0);
|
|
|
|
TupleDescInitEntry(tupdesc, (AttrNumber) 2, "positions",
|
|
|
|
INT2ARRAYOID, -1, 0);
|
|
|
|
TupleDescInitEntry(tupdesc, (AttrNumber) 3, "weights",
|
|
|
|
TEXTARRAYOID, -1, 0);
|
|
|
|
funcctx->tuple_desc = BlessTupleDesc(tupdesc);
|
|
|
|
|
|
|
|
funcctx->user_fctx = PG_GETARG_TSVECTOR_COPY(0);
|
|
|
|
|
|
|
|
MemoryContextSwitchTo(oldcontext);
|
|
|
|
}
|
|
|
|
|
|
|
|
funcctx = SRF_PERCALL_SETUP();
|
|
|
|
tsin = (TSVector) funcctx->user_fctx;
|
|
|
|
|
|
|
|
if (funcctx->call_cntr < tsin->size)
|
|
|
|
{
|
|
|
|
WordEntry *arrin = ARRPTR(tsin);
|
|
|
|
char *data = STRPTR(tsin);
|
|
|
|
HeapTuple tuple;
|
|
|
|
int j,
|
|
|
|
i = funcctx->call_cntr;
|
|
|
|
bool nulls[] = {false, false, false};
|
|
|
|
Datum values[3];
|
|
|
|
|
|
|
|
values[0] = PointerGetDatum(
|
2016-06-10 00:02:36 +02:00
|
|
|
cstring_to_text_with_len(data + arrin[i].pos, arrin[i].len)
|
|
|
|
);
|
2016-03-11 17:22:36 +01:00
|
|
|
|
|
|
|
if (arrin[i].haspos)
|
|
|
|
{
|
|
|
|
WordEntryPosVector *posv;
|
|
|
|
Datum *positions;
|
|
|
|
Datum *weights;
|
|
|
|
char weight;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Internally tsvector stores position and weight in the same
|
2016-06-10 00:02:36 +02:00
|
|
|
* uint16 (2 bits for weight, 14 for position). Here we extract
|
|
|
|
* that in two separate arrays.
|
2016-03-11 17:22:36 +01:00
|
|
|
*/
|
|
|
|
posv = _POSVECPTR(tsin, arrin + i);
|
|
|
|
positions = palloc(posv->npos * sizeof(Datum));
|
2016-06-10 00:02:36 +02:00
|
|
|
weights = palloc(posv->npos * sizeof(Datum));
|
2016-03-11 17:22:36 +01:00
|
|
|
for (j = 0; j < posv->npos; j++)
|
|
|
|
{
|
|
|
|
positions[j] = Int16GetDatum(WEP_GETPOS(posv->pos[j]));
|
|
|
|
weight = 'D' - WEP_GETWEIGHT(posv->pos[j]);
|
|
|
|
weights[j] = PointerGetDatum(
|
2016-06-10 00:02:36 +02:00
|
|
|
cstring_to_text_with_len(&weight, 1)
|
|
|
|
);
|
2016-03-11 17:22:36 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
values[1] = PointerGetDatum(
|
2016-06-10 00:02:36 +02:00
|
|
|
construct_array(positions, posv->npos, INT2OID, 2, true, 's'));
|
2016-03-11 17:22:36 +01:00
|
|
|
values[2] = PointerGetDatum(
|
2016-06-10 00:02:36 +02:00
|
|
|
construct_array(weights, posv->npos, TEXTOID, -1, false, 'i'));
|
2016-03-11 17:22:36 +01:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
nulls[1] = nulls[2] = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
|
|
|
|
SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
pfree(tsin);
|
|
|
|
SRF_RETURN_DONE(funcctx);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Convert tsvector to array of lexemes.
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
tsvector_to_array(PG_FUNCTION_ARGS)
|
|
|
|
{
|
2016-06-10 00:02:36 +02:00
|
|
|
TSVector tsin = PG_GETARG_TSVECTOR(0);
|
|
|
|
WordEntry *arrin = ARRPTR(tsin);
|
|
|
|
Datum *elements;
|
|
|
|
int i;
|
|
|
|
ArrayType *array;
|
2016-03-11 17:22:36 +01:00
|
|
|
|
2016-03-11 18:10:20 +01:00
|
|
|
elements = palloc(tsin->size * sizeof(Datum));
|
|
|
|
|
2016-03-11 17:22:36 +01:00
|
|
|
for (i = 0; i < tsin->size; i++)
|
|
|
|
{
|
|
|
|
elements[i] = PointerGetDatum(
|
2016-06-10 00:02:36 +02:00
|
|
|
cstring_to_text_with_len(STRPTR(tsin) + arrin[i].pos, arrin[i].len)
|
|
|
|
);
|
2016-03-11 17:22:36 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
array = construct_array(elements, tsin->size, TEXTOID, -1, false, 'i');
|
2016-03-11 18:10:20 +01:00
|
|
|
|
|
|
|
pfree(elements);
|
2016-03-11 17:22:36 +01:00
|
|
|
PG_FREE_IF_COPY(tsin, 0);
|
|
|
|
PG_RETURN_POINTER(array);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Build tsvector from array of lexemes.
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
array_to_tsvector(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
ArrayType *v = PG_GETARG_ARRAYTYPE_P(0);
|
|
|
|
TSVector tsout;
|
|
|
|
Datum *dlexemes;
|
|
|
|
WordEntry *arrout;
|
|
|
|
bool *nulls;
|
|
|
|
int nitems,
|
|
|
|
i,
|
2016-08-05 22:09:06 +02:00
|
|
|
j,
|
2016-03-11 17:22:36 +01:00
|
|
|
tslen,
|
|
|
|
datalen = 0;
|
|
|
|
char *cur;
|
|
|
|
|
|
|
|
deconstruct_array(v, TEXTOID, -1, false, 'i', &dlexemes, &nulls, &nitems);
|
|
|
|
|
2016-08-05 22:09:06 +02:00
|
|
|
/* Reject nulls (maybe we should just ignore them, instead?) */
|
2016-03-11 17:22:36 +01:00
|
|
|
for (i = 0; i < nitems; i++)
|
|
|
|
{
|
|
|
|
if (nulls[i])
|
|
|
|
ereport(ERROR,
|
2016-08-05 21:14:08 +02:00
|
|
|
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
|
2016-03-11 17:22:36 +01:00
|
|
|
errmsg("lexeme array may not contain nulls")));
|
2016-08-05 22:09:06 +02:00
|
|
|
}
|
2016-03-11 17:22:36 +01:00
|
|
|
|
2016-08-05 22:09:06 +02:00
|
|
|
/* Sort and de-dup, because this is required for a valid tsvector. */
|
|
|
|
if (nitems > 1)
|
|
|
|
{
|
|
|
|
qsort(dlexemes, nitems, sizeof(Datum), compare_text_lexemes);
|
|
|
|
j = 0;
|
|
|
|
for (i = 1; i < nitems; i++)
|
|
|
|
{
|
|
|
|
if (compare_text_lexemes(&dlexemes[j], &dlexemes[i]) < 0)
|
|
|
|
dlexemes[++j] = dlexemes[i];
|
|
|
|
}
|
|
|
|
nitems = ++j;
|
2016-03-11 17:22:36 +01:00
|
|
|
}
|
|
|
|
|
2016-08-05 22:09:06 +02:00
|
|
|
/* Calculate space needed for surviving lexemes. */
|
|
|
|
for (i = 0; i < nitems; i++)
|
|
|
|
datalen += VARSIZE_ANY_EXHDR(dlexemes[i]);
|
2016-03-11 17:22:36 +01:00
|
|
|
tslen = CALCDATASIZE(nitems, datalen);
|
2016-08-05 22:09:06 +02:00
|
|
|
|
|
|
|
/* Allocate and fill tsvector. */
|
2016-03-11 17:22:36 +01:00
|
|
|
tsout = (TSVector) palloc0(tslen);
|
|
|
|
SET_VARSIZE(tsout, tslen);
|
|
|
|
tsout->size = nitems;
|
2016-08-05 22:09:06 +02:00
|
|
|
|
2016-03-11 17:22:36 +01:00
|
|
|
arrout = ARRPTR(tsout);
|
|
|
|
cur = STRPTR(tsout);
|
|
|
|
for (i = 0; i < nitems; i++)
|
|
|
|
{
|
2016-08-05 22:09:06 +02:00
|
|
|
char *lex = VARDATA_ANY(dlexemes[i]);
|
2016-06-10 00:02:36 +02:00
|
|
|
int lex_len = VARSIZE_ANY_EXHDR(dlexemes[i]);
|
2016-03-11 17:22:36 +01:00
|
|
|
|
|
|
|
memcpy(cur, lex, lex_len);
|
|
|
|
arrout[i].haspos = 0;
|
|
|
|
arrout[i].len = lex_len;
|
|
|
|
arrout[i].pos = cur - STRPTR(tsout);
|
|
|
|
cur += lex_len;
|
|
|
|
}
|
|
|
|
|
|
|
|
PG_FREE_IF_COPY(v, 0);
|
|
|
|
PG_RETURN_POINTER(tsout);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2016-05-06 01:43:32 +02:00
|
|
|
* ts_filter(): keep only lexemes with given weights in tsvector.
|
2016-03-11 17:22:36 +01:00
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
tsvector_filter(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
TSVector tsin = PG_GETARG_TSVECTOR(0),
|
|
|
|
tsout;
|
|
|
|
ArrayType *weights = PG_GETARG_ARRAYTYPE_P(1);
|
|
|
|
WordEntry *arrin = ARRPTR(tsin),
|
|
|
|
*arrout;
|
|
|
|
char *datain = STRPTR(tsin),
|
|
|
|
*dataout;
|
|
|
|
Datum *dweights;
|
|
|
|
bool *nulls;
|
2016-05-06 01:43:32 +02:00
|
|
|
int nweights;
|
2016-06-10 00:02:36 +02:00
|
|
|
int i,
|
|
|
|
j;
|
2016-05-04 16:58:08 +02:00
|
|
|
int cur_pos = 0;
|
|
|
|
char mask = 0;
|
2016-03-11 17:22:36 +01:00
|
|
|
|
|
|
|
deconstruct_array(weights, CHAROID, 1, true, 'c',
|
2016-05-06 01:43:32 +02:00
|
|
|
&dweights, &nulls, &nweights);
|
2016-03-11 17:22:36 +01:00
|
|
|
|
2016-05-06 01:43:32 +02:00
|
|
|
for (i = 0; i < nweights; i++)
|
2016-03-11 17:22:36 +01:00
|
|
|
{
|
2016-06-10 00:02:36 +02:00
|
|
|
char char_weight;
|
2016-03-11 17:22:36 +01:00
|
|
|
|
|
|
|
if (nulls[i])
|
|
|
|
ereport(ERROR,
|
2016-08-05 21:14:08 +02:00
|
|
|
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
|
2016-03-11 17:22:36 +01:00
|
|
|
errmsg("weight array may not contain nulls")));
|
|
|
|
|
|
|
|
char_weight = DatumGetChar(dweights[i]);
|
|
|
|
switch (char_weight)
|
|
|
|
{
|
2016-06-10 00:02:36 +02:00
|
|
|
case 'A':
|
|
|
|
case 'a':
|
2016-03-11 17:22:36 +01:00
|
|
|
mask = mask | 8;
|
|
|
|
break;
|
2016-06-10 00:02:36 +02:00
|
|
|
case 'B':
|
|
|
|
case 'b':
|
2016-03-11 17:22:36 +01:00
|
|
|
mask = mask | 4;
|
|
|
|
break;
|
2016-06-10 00:02:36 +02:00
|
|
|
case 'C':
|
|
|
|
case 'c':
|
2016-03-11 17:22:36 +01:00
|
|
|
mask = mask | 2;
|
|
|
|
break;
|
2016-06-10 00:02:36 +02:00
|
|
|
case 'D':
|
|
|
|
case 'd':
|
2016-03-11 17:22:36 +01:00
|
|
|
mask = mask | 1;
|
|
|
|
break;
|
|
|
|
default:
|
2016-06-10 00:02:36 +02:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
|
|
errmsg("unrecognized weight: \"%c\"", char_weight)));
|
2016-03-11 17:22:36 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
tsout = (TSVector) palloc0(VARSIZE(tsin));
|
|
|
|
tsout->size = tsin->size;
|
|
|
|
arrout = ARRPTR(tsout);
|
|
|
|
dataout = STRPTR(tsout);
|
|
|
|
|
|
|
|
for (i = j = 0; i < tsin->size; i++)
|
|
|
|
{
|
|
|
|
WordEntryPosVector *posvin,
|
2016-06-10 00:02:36 +02:00
|
|
|
*posvout;
|
|
|
|
int npos = 0;
|
|
|
|
int k;
|
2016-03-11 17:22:36 +01:00
|
|
|
|
|
|
|
if (!arrin[i].haspos)
|
|
|
|
continue;
|
|
|
|
|
2016-06-10 00:02:36 +02:00
|
|
|
posvin = _POSVECPTR(tsin, arrin + i);
|
2016-03-11 17:22:36 +01:00
|
|
|
posvout = (WordEntryPosVector *)
|
2016-06-10 00:02:36 +02:00
|
|
|
(dataout + SHORTALIGN(cur_pos + arrin[i].len));
|
2016-03-11 17:22:36 +01:00
|
|
|
|
|
|
|
for (k = 0; k < posvin->npos; k++)
|
|
|
|
{
|
|
|
|
if (mask & (1 << WEP_GETWEIGHT(posvin->pos[k])))
|
|
|
|
posvout->pos[npos++] = posvin->pos[k];
|
|
|
|
}
|
|
|
|
|
2016-05-03 16:52:25 +02:00
|
|
|
/* if no satisfactory positions found, skip lexeme */
|
|
|
|
if (!npos)
|
2016-03-11 17:22:36 +01:00
|
|
|
continue;
|
|
|
|
|
|
|
|
arrout[j].haspos = true;
|
|
|
|
arrout[j].len = arrin[i].len;
|
|
|
|
arrout[j].pos = cur_pos;
|
|
|
|
|
|
|
|
memcpy(dataout + cur_pos, datain + arrin[i].pos, arrin[i].len);
|
|
|
|
posvout->npos = npos;
|
|
|
|
cur_pos += SHORTALIGN(arrin[i].len);
|
2016-06-10 00:02:36 +02:00
|
|
|
cur_pos += POSDATALEN(tsout, arrout + j) * sizeof(WordEntryPos) +
|
|
|
|
sizeof(uint16);
|
2016-03-11 17:22:36 +01:00
|
|
|
j++;
|
|
|
|
}
|
|
|
|
|
|
|
|
tsout->size = j;
|
|
|
|
if (dataout != STRPTR(tsout))
|
|
|
|
memmove(STRPTR(tsout), dataout, cur_pos);
|
|
|
|
|
|
|
|
SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, cur_pos));
|
|
|
|
|
|
|
|
PG_FREE_IF_COPY(tsin, 0);
|
|
|
|
PG_RETURN_POINTER(tsout);
|
|
|
|
}
|
2007-08-21 03:11:32 +02:00
|
|
|
|
|
|
|
Datum
|
|
|
|
tsvector_concat(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
TSVector in1 = PG_GETARG_TSVECTOR(0);
|
|
|
|
TSVector in2 = PG_GETARG_TSVECTOR(1);
|
|
|
|
TSVector out;
|
|
|
|
WordEntry *ptr;
|
|
|
|
WordEntry *ptr1,
|
|
|
|
*ptr2;
|
|
|
|
WordEntryPos *p;
|
|
|
|
int maxpos = 0,
|
|
|
|
i,
|
|
|
|
j,
|
|
|
|
i1,
|
2007-10-23 02:51:23 +02:00
|
|
|
i2,
|
2011-08-26 22:51:34 +02:00
|
|
|
dataoff,
|
|
|
|
output_bytes,
|
|
|
|
output_size;
|
2007-08-21 03:11:32 +02:00
|
|
|
char *data,
|
|
|
|
*data1,
|
|
|
|
*data2;
|
|
|
|
|
2011-08-26 22:51:34 +02:00
|
|
|
/* Get max position in in1; we'll need this to offset in2's positions */
|
2007-08-21 03:11:32 +02:00
|
|
|
ptr = ARRPTR(in1);
|
|
|
|
i = in1->size;
|
|
|
|
while (i--)
|
|
|
|
{
|
|
|
|
if ((j = POSDATALEN(in1, ptr)) != 0)
|
|
|
|
{
|
|
|
|
p = POSDATAPTR(in1, ptr);
|
|
|
|
while (j--)
|
|
|
|
{
|
|
|
|
if (WEP_GETPOS(*p) > maxpos)
|
|
|
|
maxpos = WEP_GETPOS(*p);
|
|
|
|
p++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
ptr++;
|
|
|
|
}
|
|
|
|
|
|
|
|
ptr1 = ARRPTR(in1);
|
|
|
|
ptr2 = ARRPTR(in2);
|
|
|
|
data1 = STRPTR(in1);
|
|
|
|
data2 = STRPTR(in2);
|
|
|
|
i1 = in1->size;
|
|
|
|
i2 = in2->size;
|
2011-08-26 22:51:34 +02:00
|
|
|
|
|
|
|
/*
|
2012-06-10 21:20:04 +02:00
|
|
|
* Conservative estimate of space needed. We might need all the data in
|
|
|
|
* both inputs, and conceivably add a pad byte before position data for
|
|
|
|
* each item where there was none before.
|
2011-08-26 22:51:34 +02:00
|
|
|
*/
|
|
|
|
output_bytes = VARSIZE(in1) + VARSIZE(in2) + i1 + i2;
|
|
|
|
|
|
|
|
out = (TSVector) palloc0(output_bytes);
|
|
|
|
SET_VARSIZE(out, output_bytes);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We must make out->size valid so that STRPTR(out) is sensible. We'll
|
|
|
|
* collapse out any unused space at the end.
|
|
|
|
*/
|
2007-08-21 03:11:32 +02:00
|
|
|
out->size = in1->size + in2->size;
|
2011-08-26 22:51:34 +02:00
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
ptr = ARRPTR(out);
|
2007-10-23 02:51:23 +02:00
|
|
|
data = STRPTR(out);
|
|
|
|
dataoff = 0;
|
2007-08-21 03:11:32 +02:00
|
|
|
while (i1 && i2)
|
|
|
|
{
|
|
|
|
int cmp = compareEntry(data1, ptr1, data2, ptr2);
|
|
|
|
|
|
|
|
if (cmp < 0)
|
|
|
|
{ /* in1 first */
|
|
|
|
ptr->haspos = ptr1->haspos;
|
|
|
|
ptr->len = ptr1->len;
|
2007-10-23 02:51:23 +02:00
|
|
|
memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
|
|
|
|
ptr->pos = dataoff;
|
|
|
|
dataoff += ptr1->len;
|
2007-08-21 03:11:32 +02:00
|
|
|
if (ptr->haspos)
|
|
|
|
{
|
2007-10-23 02:51:23 +02:00
|
|
|
dataoff = SHORTALIGN(dataoff);
|
|
|
|
memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
|
|
|
|
dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
2007-11-16 00:23:44 +01:00
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
ptr++;
|
|
|
|
ptr1++;
|
|
|
|
i1--;
|
|
|
|
}
|
|
|
|
else if (cmp > 0)
|
|
|
|
{ /* in2 first */
|
|
|
|
ptr->haspos = ptr2->haspos;
|
|
|
|
ptr->len = ptr2->len;
|
2007-10-23 02:51:23 +02:00
|
|
|
memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
|
|
|
|
ptr->pos = dataoff;
|
|
|
|
dataoff += ptr2->len;
|
2007-08-21 03:11:32 +02:00
|
|
|
if (ptr->haspos)
|
|
|
|
{
|
|
|
|
int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
|
|
|
|
|
|
|
|
if (addlen == 0)
|
|
|
|
ptr->haspos = 0;
|
|
|
|
else
|
2007-10-23 02:51:23 +02:00
|
|
|
{
|
|
|
|
dataoff = SHORTALIGN(dataoff);
|
|
|
|
dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
|
|
|
|
}
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
2007-09-07 18:03:40 +02:00
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
ptr++;
|
|
|
|
ptr2++;
|
|
|
|
i2--;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
ptr->haspos = ptr1->haspos | ptr2->haspos;
|
|
|
|
ptr->len = ptr1->len;
|
2007-10-23 02:51:23 +02:00
|
|
|
memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
|
|
|
|
ptr->pos = dataoff;
|
|
|
|
dataoff += ptr1->len;
|
2007-08-21 03:11:32 +02:00
|
|
|
if (ptr->haspos)
|
|
|
|
{
|
|
|
|
if (ptr1->haspos)
|
|
|
|
{
|
2007-10-23 02:51:23 +02:00
|
|
|
dataoff = SHORTALIGN(dataoff);
|
|
|
|
memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
|
|
|
|
dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
|
2007-08-21 03:11:32 +02:00
|
|
|
if (ptr2->haspos)
|
2007-10-23 02:51:23 +02:00
|
|
|
dataoff += add_pos(in2, ptr2, out, ptr, maxpos) * sizeof(WordEntryPos);
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
2007-11-16 02:51:22 +01:00
|
|
|
else /* must have ptr2->haspos */
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
|
|
|
int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
|
|
|
|
|
|
|
|
if (addlen == 0)
|
|
|
|
ptr->haspos = 0;
|
|
|
|
else
|
2007-10-23 02:51:23 +02:00
|
|
|
{
|
|
|
|
dataoff = SHORTALIGN(dataoff);
|
|
|
|
dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
|
|
|
|
}
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
|
|
|
}
|
2007-09-07 18:03:40 +02:00
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
ptr++;
|
|
|
|
ptr1++;
|
|
|
|
ptr2++;
|
|
|
|
i1--;
|
|
|
|
i2--;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
while (i1)
|
|
|
|
{
|
|
|
|
ptr->haspos = ptr1->haspos;
|
|
|
|
ptr->len = ptr1->len;
|
2007-10-23 02:51:23 +02:00
|
|
|
memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
|
|
|
|
ptr->pos = dataoff;
|
|
|
|
dataoff += ptr1->len;
|
2007-08-21 03:11:32 +02:00
|
|
|
if (ptr->haspos)
|
|
|
|
{
|
2007-10-23 02:51:23 +02:00
|
|
|
dataoff = SHORTALIGN(dataoff);
|
|
|
|
memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
|
|
|
|
dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
2007-09-07 18:03:40 +02:00
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
ptr++;
|
|
|
|
ptr1++;
|
|
|
|
i1--;
|
|
|
|
}
|
|
|
|
|
|
|
|
while (i2)
|
|
|
|
{
|
|
|
|
ptr->haspos = ptr2->haspos;
|
|
|
|
ptr->len = ptr2->len;
|
2007-10-23 02:51:23 +02:00
|
|
|
memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
|
|
|
|
ptr->pos = dataoff;
|
|
|
|
dataoff += ptr2->len;
|
2007-08-21 03:11:32 +02:00
|
|
|
if (ptr->haspos)
|
|
|
|
{
|
|
|
|
int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
|
|
|
|
|
|
|
|
if (addlen == 0)
|
|
|
|
ptr->haspos = 0;
|
|
|
|
else
|
2007-10-23 02:51:23 +02:00
|
|
|
{
|
|
|
|
dataoff = SHORTALIGN(dataoff);
|
|
|
|
dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
|
|
|
|
}
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
2007-09-07 18:03:40 +02:00
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
ptr++;
|
|
|
|
ptr2++;
|
|
|
|
i2--;
|
|
|
|
}
|
|
|
|
|
2007-10-23 02:51:23 +02:00
|
|
|
/*
|
2007-11-16 00:23:44 +01:00
|
|
|
* Instead of checking each offset individually, we check for overflow of
|
|
|
|
* pos fields once at the end.
|
2007-10-23 02:51:23 +02:00
|
|
|
*/
|
|
|
|
if (dataoff > MAXSTRPOS)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
|
2008-03-05 16:50:37 +01:00
|
|
|
errmsg("string is too long for tsvector (%d bytes, max %d bytes)", dataoff, MAXSTRPOS)));
|
2007-10-23 02:51:23 +02:00
|
|
|
|
2011-08-26 22:51:34 +02:00
|
|
|
/*
|
|
|
|
* Adjust sizes (asserting that we didn't overrun the original estimates)
|
|
|
|
* and collapse out any unused array entries.
|
|
|
|
*/
|
|
|
|
output_size = ptr - ARRPTR(out);
|
|
|
|
Assert(output_size <= out->size);
|
|
|
|
out->size = output_size;
|
2007-08-21 03:11:32 +02:00
|
|
|
if (data != STRPTR(out))
|
2007-10-23 02:51:23 +02:00
|
|
|
memmove(STRPTR(out), data, dataoff);
|
2011-08-26 22:51:34 +02:00
|
|
|
output_bytes = CALCDATASIZE(out->size, dataoff);
|
|
|
|
Assert(output_bytes <= VARSIZE(out));
|
|
|
|
SET_VARSIZE(out, output_bytes);
|
2007-08-21 03:11:32 +02:00
|
|
|
|
|
|
|
PG_FREE_IF_COPY(in1, 0);
|
|
|
|
PG_FREE_IF_COPY(in2, 1);
|
|
|
|
PG_RETURN_POINTER(out);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2009-06-11 16:49:15 +02:00
|
|
|
* Compare two strings by tsvector rules.
|
2011-01-09 20:34:50 +01:00
|
|
|
*
|
|
|
|
* if isPrefix = true then it returns zero value iff b has prefix a
|
2007-08-21 03:11:32 +02:00
|
|
|
*/
|
2012-06-25 00:51:46 +02:00
|
|
|
int32
|
2008-05-16 18:31:02 +02:00
|
|
|
tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
2009-06-11 16:49:15 +02:00
|
|
|
int cmp;
|
2008-05-16 18:31:02 +02:00
|
|
|
|
2009-06-11 16:49:15 +02:00
|
|
|
if (lena == 0)
|
2008-05-16 18:31:02 +02:00
|
|
|
{
|
2009-06-11 16:49:15 +02:00
|
|
|
if (prefix)
|
2011-01-09 20:34:50 +01:00
|
|
|
cmp = 0; /* empty string is prefix of anything */
|
2008-05-16 18:31:02 +02:00
|
|
|
else
|
2009-06-11 16:49:15 +02:00
|
|
|
cmp = (lenb > 0) ? -1 : 0;
|
2008-05-16 18:31:02 +02:00
|
|
|
}
|
2009-06-11 16:49:15 +02:00
|
|
|
else if (lenb == 0)
|
2008-05-16 18:31:02 +02:00
|
|
|
{
|
2009-06-11 16:49:15 +02:00
|
|
|
cmp = (lena > 0) ? 1 : 0;
|
2008-05-16 18:31:02 +02:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
cmp = memcmp(a, b, Min(lena, lenb));
|
2007-08-21 03:11:32 +02:00
|
|
|
|
2009-06-11 16:49:15 +02:00
|
|
|
if (prefix)
|
2008-05-16 18:31:02 +02:00
|
|
|
{
|
2009-06-11 16:49:15 +02:00
|
|
|
if (cmp == 0 && lena > lenb)
|
2011-01-09 20:34:50 +01:00
|
|
|
cmp = 1; /* a is longer, so not a prefix of b */
|
2008-05-16 18:31:02 +02:00
|
|
|
}
|
2011-01-09 20:34:50 +01:00
|
|
|
else if (cmp == 0 && lena != lenb)
|
2008-05-16 18:31:02 +02:00
|
|
|
{
|
|
|
|
cmp = (lena < lenb) ? -1 : 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return cmp;
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2016-04-07 17:44:18 +02:00
|
|
|
* Check weight info or/and fill 'data' with the required positions
|
2007-08-21 03:11:32 +02:00
|
|
|
*/
|
|
|
|
static bool
|
2016-04-07 17:44:18 +02:00
|
|
|
checkclass_str(CHKVAL *chkval, WordEntry *entry, QueryOperand *val,
|
|
|
|
ExecPhraseData *data)
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
2016-06-10 00:02:36 +02:00
|
|
|
bool result = false;
|
2007-09-11 10:46:29 +02:00
|
|
|
|
2016-04-07 17:44:18 +02:00
|
|
|
if (entry->haspos && (val->weight || data))
|
|
|
|
{
|
2016-06-10 00:02:36 +02:00
|
|
|
WordEntryPosVector *posvec;
|
2016-04-07 17:44:18 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* We can't use the _POSVECPTR macro here because the pointer to the
|
|
|
|
* tsvector's lexeme storage is already contained in chkval->values.
|
|
|
|
*/
|
|
|
|
posvec = (WordEntryPosVector *)
|
|
|
|
(chkval->values + SHORTALIGN(entry->pos + entry->len));
|
|
|
|
|
|
|
|
if (val->weight && data)
|
|
|
|
{
|
2016-06-10 00:02:36 +02:00
|
|
|
WordEntryPos *posvec_iter = posvec->pos;
|
|
|
|
WordEntryPos *dptr;
|
2016-04-07 17:44:18 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Filter position information by weights
|
|
|
|
*/
|
|
|
|
dptr = data->pos = palloc(sizeof(WordEntryPos) * posvec->npos);
|
|
|
|
data->allocated = true;
|
|
|
|
|
|
|
|
/* Is there a position with a matching weight? */
|
|
|
|
while (posvec_iter < posvec->pos + posvec->npos)
|
|
|
|
{
|
|
|
|
/* If true, append this position to the data->pos */
|
|
|
|
if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
|
|
|
|
{
|
|
|
|
*dptr = WEP_GETPOS(*posvec_iter);
|
|
|
|
dptr++;
|
|
|
|
}
|
|
|
|
|
|
|
|
posvec_iter++;
|
|
|
|
}
|
2007-09-11 10:46:29 +02:00
|
|
|
|
2016-04-07 17:44:18 +02:00
|
|
|
data->npos = dptr - data->pos;
|
2007-08-21 03:11:32 +02:00
|
|
|
|
2016-04-07 17:44:18 +02:00
|
|
|
if (data->npos > 0)
|
|
|
|
result = true;
|
|
|
|
}
|
|
|
|
else if (val->weight)
|
|
|
|
{
|
2016-06-10 00:02:36 +02:00
|
|
|
WordEntryPos *posvec_iter = posvec->pos;
|
2016-04-07 17:44:18 +02:00
|
|
|
|
|
|
|
/* Is there a position with a matching weight? */
|
|
|
|
while (posvec_iter < posvec->pos + posvec->npos)
|
|
|
|
{
|
|
|
|
if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
|
|
|
|
{
|
|
|
|
result = true;
|
2016-06-10 00:02:36 +02:00
|
|
|
break; /* no need to go further */
|
2016-04-07 17:44:18 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
posvec_iter++;
|
|
|
|
}
|
|
|
|
}
|
2016-06-10 00:02:36 +02:00
|
|
|
else /* data != NULL */
|
2016-04-07 17:44:18 +02:00
|
|
|
{
|
|
|
|
data->npos = posvec->npos;
|
2016-06-10 00:02:36 +02:00
|
|
|
data->pos = posvec->pos;
|
2016-04-07 17:44:18 +02:00
|
|
|
data->allocated = false;
|
|
|
|
result = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
2016-04-07 17:44:18 +02:00
|
|
|
result = true;
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
2016-04-07 17:44:18 +02:00
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Removes duplicate pos entries. We can't use uniquePos() from
|
|
|
|
* tsvector.c because array might be longer than MAXENTRYPOS
|
|
|
|
*
|
|
|
|
* Returns new length.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
uniqueLongPos(WordEntryPos *pos, int npos)
|
|
|
|
{
|
|
|
|
WordEntryPos *pos_iter,
|
2016-06-10 00:02:36 +02:00
|
|
|
*result;
|
2016-04-07 17:44:18 +02:00
|
|
|
|
|
|
|
if (npos <= 1)
|
|
|
|
return npos;
|
|
|
|
|
2016-04-08 11:02:45 +02:00
|
|
|
qsort((void *) pos, npos, sizeof(WordEntryPos), compareWordEntryPos);
|
2016-04-07 17:44:18 +02:00
|
|
|
|
|
|
|
result = pos;
|
|
|
|
pos_iter = pos + 1;
|
|
|
|
while (pos_iter < pos + npos)
|
|
|
|
{
|
|
|
|
if (WEP_GETPOS(*pos_iter) != WEP_GETPOS(*result))
|
|
|
|
{
|
|
|
|
result++;
|
|
|
|
*result = WEP_GETPOS(*pos_iter);
|
|
|
|
}
|
|
|
|
|
|
|
|
pos_iter++;
|
|
|
|
}
|
|
|
|
|
|
|
|
return result + 1 - pos;
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* is there value 'val' in array or not ?
|
|
|
|
*/
|
|
|
|
static bool
|
2016-04-07 17:44:18 +02:00
|
|
|
checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
2007-11-16 00:23:44 +01:00
|
|
|
CHKVAL *chkval = (CHKVAL *) checkval;
|
2007-09-07 17:09:56 +02:00
|
|
|
WordEntry *StopLow = chkval->arrb;
|
|
|
|
WordEntry *StopHigh = chkval->arre;
|
2008-05-16 18:31:02 +02:00
|
|
|
WordEntry *StopMiddle = StopHigh;
|
2009-06-11 16:49:15 +02:00
|
|
|
int difference = -1;
|
|
|
|
bool res = false;
|
2007-08-21 03:11:32 +02:00
|
|
|
|
|
|
|
/* Loop invariant: StopLow <= val < StopHigh */
|
|
|
|
while (StopLow < StopHigh)
|
|
|
|
{
|
|
|
|
StopMiddle = StopLow + (StopHigh - StopLow) / 2;
|
2016-04-07 17:44:18 +02:00
|
|
|
difference = tsCompareString(chkval->operand + val->distance,
|
|
|
|
val->length,
|
|
|
|
chkval->values + StopMiddle->pos,
|
|
|
|
StopMiddle->len,
|
2009-06-11 16:49:15 +02:00
|
|
|
false);
|
2008-05-16 18:31:02 +02:00
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
if (difference == 0)
|
2008-05-16 18:31:02 +02:00
|
|
|
{
|
2016-04-07 17:44:18 +02:00
|
|
|
/* Check weight info & fill 'data' with positions */
|
|
|
|
res = checkclass_str(chkval, StopMiddle, val, data);
|
2008-05-16 18:31:02 +02:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
else if (difference > 0)
|
2007-08-21 03:11:32 +02:00
|
|
|
StopLow = StopMiddle + 1;
|
|
|
|
else
|
|
|
|
StopHigh = StopMiddle;
|
|
|
|
}
|
|
|
|
|
2016-04-07 17:44:18 +02:00
|
|
|
if ((!res || data) && val->prefix)
|
2008-05-16 18:31:02 +02:00
|
|
|
{
|
2016-06-10 00:02:36 +02:00
|
|
|
WordEntryPos *allpos = NULL;
|
|
|
|
int npos = 0,
|
|
|
|
totalpos = 0;
|
|
|
|
|
2008-05-16 18:31:02 +02:00
|
|
|
/*
|
|
|
|
* there was a failed exact search, so we should scan further to find
|
2016-04-07 17:44:18 +02:00
|
|
|
* a prefix match. We also need to do so if caller needs position info
|
2008-05-16 18:31:02 +02:00
|
|
|
*/
|
2009-06-11 16:49:15 +02:00
|
|
|
if (StopLow >= StopHigh)
|
2008-05-16 18:31:02 +02:00
|
|
|
StopMiddle = StopHigh;
|
|
|
|
|
2016-04-07 17:44:18 +02:00
|
|
|
while ((!res || data) && StopMiddle < chkval->arre &&
|
|
|
|
tsCompareString(chkval->operand + val->distance,
|
|
|
|
val->length,
|
|
|
|
chkval->values + StopMiddle->pos,
|
|
|
|
StopMiddle->len,
|
2009-06-11 16:49:15 +02:00
|
|
|
true) == 0)
|
2008-05-16 18:31:02 +02:00
|
|
|
{
|
2016-04-07 17:44:18 +02:00
|
|
|
if (data)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* We need to join position information
|
|
|
|
*/
|
|
|
|
res = checkclass_str(chkval, StopMiddle, val, data);
|
|
|
|
|
|
|
|
if (res)
|
|
|
|
{
|
|
|
|
while (npos + data->npos >= totalpos)
|
|
|
|
{
|
|
|
|
if (totalpos == 0)
|
|
|
|
{
|
|
|
|
totalpos = 256;
|
|
|
|
allpos = palloc(sizeof(WordEntryPos) * totalpos);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
totalpos *= 2;
|
|
|
|
allpos = repalloc(allpos, sizeof(WordEntryPos) * totalpos);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
memcpy(allpos + npos, data->pos, sizeof(WordEntryPos) * data->npos);
|
|
|
|
npos += data->npos;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
res = checkclass_str(chkval, StopMiddle, val, NULL);
|
|
|
|
}
|
2008-05-16 18:31:02 +02:00
|
|
|
|
|
|
|
StopMiddle++;
|
|
|
|
}
|
2016-04-07 17:44:18 +02:00
|
|
|
|
|
|
|
if (res && data)
|
|
|
|
{
|
|
|
|
/* Sort and make unique array of found positions */
|
|
|
|
data->pos = allpos;
|
|
|
|
data->npos = uniqueLongPos(allpos, npos);
|
|
|
|
data->allocated = true;
|
|
|
|
}
|
2008-05-16 18:31:02 +02:00
|
|
|
}
|
|
|
|
|
2009-06-11 16:49:15 +02:00
|
|
|
return res;
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
|
|
|
|
2016-04-07 17:44:18 +02:00
|
|
|
/*
|
2016-12-16 17:50:07 +01:00
|
|
|
* Execute tsquery at or below an OP_PHRASE operator.
|
|
|
|
*
|
|
|
|
* This handles the recursion at levels where we need to care about
|
|
|
|
* match locations. In addition to the same arguments used for TS_execute,
|
|
|
|
* the caller may pass a preinitialized-to-zeroes ExecPhraseData struct to
|
|
|
|
* be filled with lexeme match positions on success. data == NULL if no
|
|
|
|
* match data need be returned. (In practice, outside callers pass NULL,
|
|
|
|
* and only the internal recursion cases pass a data pointer.)
|
2016-04-07 17:44:18 +02:00
|
|
|
*/
|
|
|
|
static bool
|
2016-12-16 17:50:07 +01:00
|
|
|
TS_phrase_execute(QueryItem *curitem, void *arg, uint32 flags,
|
|
|
|
ExecPhraseData *data,
|
|
|
|
TSExecuteCallback chkcond)
|
2016-04-07 17:44:18 +02:00
|
|
|
{
|
|
|
|
/* since this function recurses, it could be driven to stack overflow */
|
|
|
|
check_stack_depth();
|
|
|
|
|
|
|
|
if (curitem->type == QI_VAL)
|
|
|
|
{
|
2016-12-16 17:50:07 +01:00
|
|
|
return chkcond(arg, (QueryOperand *) curitem, data);
|
2016-04-07 17:44:18 +02:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2016-06-10 00:02:36 +02:00
|
|
|
ExecPhraseData Ldata = {0, false, NULL},
|
|
|
|
Rdata = {0, false, NULL};
|
|
|
|
WordEntryPos *Lpos,
|
2016-06-27 19:41:00 +02:00
|
|
|
*LposStart,
|
2016-06-10 00:02:36 +02:00
|
|
|
*Rpos,
|
|
|
|
*pos_iter = NULL;
|
2016-04-07 17:44:18 +02:00
|
|
|
|
|
|
|
Assert(curitem->qoperator.oper == OP_PHRASE);
|
|
|
|
|
|
|
|
if (!TS_phrase_execute(curitem + curitem->qoperator.left,
|
2016-12-16 17:50:07 +01:00
|
|
|
arg, flags, &Ldata, chkcond))
|
2016-04-07 17:44:18 +02:00
|
|
|
return false;
|
|
|
|
|
2016-12-16 17:50:07 +01:00
|
|
|
if (!TS_phrase_execute(curitem + 1, arg, flags, &Rdata, chkcond))
|
2016-04-07 17:44:18 +02:00
|
|
|
return false;
|
|
|
|
|
|
|
|
/*
|
2016-12-16 17:50:07 +01:00
|
|
|
* If either operand has no position information, then we normally
|
|
|
|
* return false. But if TS_EXEC_PHRASE_AS_AND flag is set then we
|
|
|
|
* return true, treating OP_PHRASE as if it were OP_AND.
|
2016-04-07 17:44:18 +02:00
|
|
|
*/
|
|
|
|
if (Ldata.npos == 0 || Rdata.npos == 0)
|
2016-06-27 19:47:32 +02:00
|
|
|
return (flags & TS_EXEC_PHRASE_AS_AND) ? true : false;
|
2016-04-07 17:44:18 +02:00
|
|
|
|
|
|
|
/*
|
2016-12-16 17:50:07 +01:00
|
|
|
* Prepare output position array if needed.
|
2016-04-07 17:44:18 +02:00
|
|
|
*/
|
|
|
|
if (data)
|
|
|
|
{
|
2016-12-16 17:50:07 +01:00
|
|
|
/*
|
|
|
|
* We can recycle the righthand operand's result array if it was
|
|
|
|
* palloc'd, else must allocate our own. The number of matches
|
|
|
|
* couldn't be more than the smaller of the two operands' matches.
|
|
|
|
*/
|
2016-04-07 17:44:18 +02:00
|
|
|
if (!Rdata.allocated)
|
|
|
|
data->pos = palloc(sizeof(WordEntryPos) * Min(Ldata.npos, Rdata.npos));
|
|
|
|
else
|
|
|
|
data->pos = Rdata.pos;
|
|
|
|
|
|
|
|
data->allocated = true;
|
|
|
|
data->npos = 0;
|
|
|
|
pos_iter = data->pos;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2016-12-16 17:50:07 +01:00
|
|
|
* Find matches by distance. WEP_GETPOS() is needed because
|
|
|
|
* ExecPhraseData->data can point to a tsvector's WordEntryPosVector.
|
|
|
|
*
|
|
|
|
* Note that the output positions are those of the matching RIGHT
|
|
|
|
* operands.
|
2016-04-07 17:44:18 +02:00
|
|
|
*/
|
2016-06-27 19:41:00 +02:00
|
|
|
Rpos = Rdata.pos;
|
|
|
|
LposStart = Ldata.pos;
|
2016-04-07 17:44:18 +02:00
|
|
|
while (Rpos < Rdata.pos + Rdata.npos)
|
|
|
|
{
|
2016-06-27 19:41:00 +02:00
|
|
|
/*
|
2016-08-05 20:58:13 +02:00
|
|
|
* We need to check all possible distances, so reset Lpos to
|
|
|
|
* guaranteed not yet satisfied position.
|
2016-06-27 19:41:00 +02:00
|
|
|
*/
|
|
|
|
Lpos = LposStart;
|
2016-04-07 17:44:18 +02:00
|
|
|
while (Lpos < Ldata.pos + Ldata.npos)
|
|
|
|
{
|
2016-06-27 19:41:00 +02:00
|
|
|
if (WEP_GETPOS(*Rpos) - WEP_GETPOS(*Lpos) ==
|
|
|
|
curitem->qoperator.distance)
|
2016-04-07 17:44:18 +02:00
|
|
|
{
|
2016-06-27 19:41:00 +02:00
|
|
|
/* MATCH! */
|
|
|
|
if (data)
|
2016-04-07 17:44:18 +02:00
|
|
|
{
|
2016-06-27 19:41:00 +02:00
|
|
|
/* Store position for upper phrase operator */
|
|
|
|
*pos_iter = WEP_GETPOS(*Rpos);
|
|
|
|
pos_iter++;
|
|
|
|
|
|
|
|
/*
|
2016-08-05 20:58:13 +02:00
|
|
|
* Set left start position to next, because current
|
|
|
|
* one could not satisfy distance for any other right
|
2016-06-27 19:41:00 +02:00
|
|
|
* position
|
|
|
|
*/
|
|
|
|
LposStart = Lpos + 1;
|
|
|
|
break;
|
2016-04-07 17:44:18 +02:00
|
|
|
}
|
2016-06-27 19:41:00 +02:00
|
|
|
else
|
|
|
|
{
|
|
|
|
/*
|
2016-12-16 17:50:07 +01:00
|
|
|
* We are at the root of the phrase tree and hence we
|
|
|
|
* don't have to identify all the match positions.
|
|
|
|
* Just report success.
|
2016-06-27 19:41:00 +02:00
|
|
|
*/
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2016-04-07 17:44:18 +02:00
|
|
|
}
|
2016-06-27 19:41:00 +02:00
|
|
|
else if (WEP_GETPOS(*Rpos) <= WEP_GETPOS(*Lpos) ||
|
|
|
|
WEP_GETPOS(*Rpos) - WEP_GETPOS(*Lpos) <
|
2016-08-05 20:58:13 +02:00
|
|
|
curitem->qoperator.distance)
|
2016-04-07 17:44:18 +02:00
|
|
|
{
|
|
|
|
/*
|
2016-06-27 19:41:00 +02:00
|
|
|
* Go to the next Rpos, because Lpos is ahead or on less
|
|
|
|
* distance than required by current operator
|
2016-04-07 17:44:18 +02:00
|
|
|
*/
|
|
|
|
break;
|
2016-06-27 19:41:00 +02:00
|
|
|
|
2016-04-07 17:44:18 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
Lpos++;
|
|
|
|
}
|
|
|
|
|
|
|
|
Rpos++;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (data)
|
|
|
|
{
|
|
|
|
data->npos = pos_iter - data->pos;
|
|
|
|
|
|
|
|
if (data->npos > 0)
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
/*
|
2011-01-09 20:34:50 +01:00
|
|
|
* Evaluate tsquery boolean expression.
|
2007-09-07 17:09:56 +02:00
|
|
|
*
|
2016-12-16 17:50:07 +01:00
|
|
|
* curitem: current tsquery item (initially, the first one)
|
|
|
|
* arg: opaque value to pass through to callback function
|
|
|
|
* flags: bitmask of flag bits shown in ts_utils.h
|
|
|
|
* chkcond: callback function to check whether a primitive value is present
|
|
|
|
*
|
|
|
|
* The logic here deals only with operators above any phrase operator, for
|
|
|
|
* which we do not need to worry about lexeme positions. As soon as we hit an
|
|
|
|
* OP_PHRASE operator, we pass it off to TS_phrase_execute which does worry.
|
2007-08-21 03:11:32 +02:00
|
|
|
*/
|
|
|
|
bool
|
2016-12-16 17:50:07 +01:00
|
|
|
TS_execute(QueryItem *curitem, void *arg, uint32 flags,
|
|
|
|
TSExecuteCallback chkcond)
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
2007-08-31 04:26:29 +02:00
|
|
|
/* since this function recurses, it could be driven to stack overflow */
|
|
|
|
check_stack_depth();
|
|
|
|
|
2007-09-07 17:09:56 +02:00
|
|
|
if (curitem->type == QI_VAL)
|
2016-12-16 17:50:07 +01:00
|
|
|
return chkcond(arg, (QueryOperand *) curitem,
|
2016-06-10 00:02:36 +02:00
|
|
|
NULL /* we don't need position info */ );
|
2007-09-07 17:09:56 +02:00
|
|
|
|
2009-07-16 08:33:46 +02:00
|
|
|
switch (curitem->qoperator.oper)
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
2007-09-07 17:09:56 +02:00
|
|
|
case OP_NOT:
|
2016-06-27 19:47:32 +02:00
|
|
|
if (flags & TS_EXEC_CALC_NOT)
|
2016-12-16 17:50:07 +01:00
|
|
|
return !TS_execute(curitem + 1, arg, flags, chkcond);
|
2007-09-07 17:09:56 +02:00
|
|
|
else
|
|
|
|
return true;
|
2011-01-09 20:34:50 +01:00
|
|
|
|
2007-09-07 17:09:56 +02:00
|
|
|
case OP_AND:
|
2016-12-16 17:50:07 +01:00
|
|
|
if (TS_execute(curitem + curitem->qoperator.left, arg, flags, chkcond))
|
|
|
|
return TS_execute(curitem + 1, arg, flags, chkcond);
|
2007-09-07 17:09:56 +02:00
|
|
|
else
|
|
|
|
return false;
|
|
|
|
|
|
|
|
case OP_OR:
|
2016-12-16 17:50:07 +01:00
|
|
|
if (TS_execute(curitem + curitem->qoperator.left, arg, flags, chkcond))
|
2007-09-07 17:09:56 +02:00
|
|
|
return true;
|
|
|
|
else
|
2016-12-16 17:50:07 +01:00
|
|
|
return TS_execute(curitem + 1, arg, flags, chkcond);
|
2007-09-07 17:09:56 +02:00
|
|
|
|
2016-04-07 17:44:18 +02:00
|
|
|
case OP_PHRASE:
|
2016-08-05 20:58:13 +02:00
|
|
|
|
2016-06-27 19:47:32 +02:00
|
|
|
/*
|
2016-08-05 20:58:13 +02:00
|
|
|
* do not check TS_EXEC_PHRASE_AS_AND here because chkcond() could
|
|
|
|
* do something more if it's called from TS_phrase_execute()
|
2016-06-27 19:47:32 +02:00
|
|
|
*/
|
2016-12-16 17:50:07 +01:00
|
|
|
return TS_phrase_execute(curitem, arg, flags, NULL, chkcond);
|
2016-04-07 17:44:18 +02:00
|
|
|
|
2007-09-07 17:09:56 +02:00
|
|
|
default:
|
2009-07-16 08:33:46 +02:00
|
|
|
elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
|
2011-01-09 20:34:50 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/* not reachable, but keep compiler quiet */
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Detect whether a tsquery boolean expression requires any positive matches
|
|
|
|
* to values shown in the tsquery.
|
|
|
|
*
|
|
|
|
* This is needed to know whether a GIN index search requires full index scan.
|
|
|
|
* For example, 'x & !y' requires a match of x, so it's sufficient to scan
|
|
|
|
* entries for x; but 'x | !y' could match rows containing neither x nor y.
|
|
|
|
*/
|
|
|
|
bool
|
|
|
|
tsquery_requires_match(QueryItem *curitem)
|
|
|
|
{
|
|
|
|
/* since this function recurses, it could be driven to stack overflow */
|
|
|
|
check_stack_depth();
|
|
|
|
|
|
|
|
if (curitem->type == QI_VAL)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
switch (curitem->qoperator.oper)
|
|
|
|
{
|
|
|
|
case OP_NOT:
|
2011-04-10 17:42:00 +02:00
|
|
|
|
2011-01-09 20:34:50 +01:00
|
|
|
/*
|
|
|
|
* Assume there are no required matches underneath a NOT. For
|
|
|
|
* some cases with nested NOTs, we could prove there's a required
|
|
|
|
* match, but it seems unlikely to be worth the trouble.
|
|
|
|
*/
|
|
|
|
return false;
|
|
|
|
|
2016-04-07 17:44:18 +02:00
|
|
|
case OP_PHRASE:
|
2016-06-10 00:02:36 +02:00
|
|
|
|
2016-04-07 17:44:18 +02:00
|
|
|
/*
|
|
|
|
* Treat OP_PHRASE as OP_AND here
|
|
|
|
*/
|
2011-01-09 20:34:50 +01:00
|
|
|
case OP_AND:
|
|
|
|
/* If either side requires a match, we're good */
|
|
|
|
if (tsquery_requires_match(curitem + curitem->qoperator.left))
|
|
|
|
return true;
|
|
|
|
else
|
|
|
|
return tsquery_requires_match(curitem + 1);
|
|
|
|
|
|
|
|
case OP_OR:
|
|
|
|
/* Both sides must require a match */
|
|
|
|
if (tsquery_requires_match(curitem + curitem->qoperator.left))
|
|
|
|
return tsquery_requires_match(curitem + 1);
|
|
|
|
else
|
|
|
|
return false;
|
|
|
|
|
|
|
|
default:
|
|
|
|
elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
2007-09-07 17:09:56 +02:00
|
|
|
|
|
|
|
/* not reachable, but keep compiler quiet */
|
2007-08-21 03:11:32 +02:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* boolean operations
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
ts_match_qv(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
PG_RETURN_DATUM(DirectFunctionCall2(ts_match_vq,
|
|
|
|
PG_GETARG_DATUM(1),
|
|
|
|
PG_GETARG_DATUM(0)));
|
|
|
|
}
|
|
|
|
|
|
|
|
Datum
|
|
|
|
ts_match_vq(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
TSVector val = PG_GETARG_TSVECTOR(0);
|
|
|
|
TSQuery query = PG_GETARG_TSQUERY(1);
|
|
|
|
CHKVAL chkval;
|
|
|
|
bool result;
|
|
|
|
|
|
|
|
if (!val->size || !query->size)
|
|
|
|
{
|
|
|
|
PG_FREE_IF_COPY(val, 0);
|
|
|
|
PG_FREE_IF_COPY(query, 1);
|
|
|
|
PG_RETURN_BOOL(false);
|
|
|
|
}
|
|
|
|
|
|
|
|
chkval.arrb = ARRPTR(val);
|
|
|
|
chkval.arre = chkval.arrb + val->size;
|
|
|
|
chkval.values = STRPTR(val);
|
|
|
|
chkval.operand = GETOPERAND(query);
|
2016-12-16 17:50:07 +01:00
|
|
|
result = TS_execute(GETQUERY(query),
|
2007-08-21 03:11:32 +02:00
|
|
|
&chkval,
|
2016-06-27 19:47:32 +02:00
|
|
|
TS_EXEC_CALC_NOT,
|
2016-12-16 17:50:07 +01:00
|
|
|
checkcondition_str);
|
2007-08-21 03:11:32 +02:00
|
|
|
|
|
|
|
PG_FREE_IF_COPY(val, 0);
|
|
|
|
PG_FREE_IF_COPY(query, 1);
|
|
|
|
PG_RETURN_BOOL(result);
|
|
|
|
}
|
|
|
|
|
|
|
|
Datum
|
|
|
|
ts_match_tt(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
TSVector vector;
|
|
|
|
TSQuery query;
|
|
|
|
bool res;
|
|
|
|
|
|
|
|
vector = DatumGetTSVector(DirectFunctionCall1(to_tsvector,
|
|
|
|
PG_GETARG_DATUM(0)));
|
|
|
|
query = DatumGetTSQuery(DirectFunctionCall1(plainto_tsquery,
|
|
|
|
PG_GETARG_DATUM(1)));
|
|
|
|
|
|
|
|
res = DatumGetBool(DirectFunctionCall2(ts_match_vq,
|
|
|
|
TSVectorGetDatum(vector),
|
|
|
|
TSQueryGetDatum(query)));
|
|
|
|
|
|
|
|
pfree(vector);
|
|
|
|
pfree(query);
|
|
|
|
|
|
|
|
PG_RETURN_BOOL(res);
|
|
|
|
}
|
|
|
|
|
|
|
|
Datum
|
|
|
|
ts_match_tq(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
TSVector vector;
|
|
|
|
TSQuery query = PG_GETARG_TSQUERY(1);
|
|
|
|
bool res;
|
|
|
|
|
|
|
|
vector = DatumGetTSVector(DirectFunctionCall1(to_tsvector,
|
|
|
|
PG_GETARG_DATUM(0)));
|
|
|
|
|
|
|
|
res = DatumGetBool(DirectFunctionCall2(ts_match_vq,
|
|
|
|
TSVectorGetDatum(vector),
|
|
|
|
TSQueryGetDatum(query)));
|
|
|
|
|
|
|
|
pfree(vector);
|
|
|
|
PG_FREE_IF_COPY(query, 1);
|
|
|
|
|
|
|
|
PG_RETURN_BOOL(res);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2007-09-11 10:46:29 +02:00
|
|
|
* ts_stat statistic function support
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Returns the number of positions in value 'wptr' within tsvector 'txt',
|
|
|
|
* that have a weight equal to one of the weights in 'weight' bitmask.
|
2007-08-21 03:11:32 +02:00
|
|
|
*/
|
|
|
|
static int
|
2007-11-16 00:23:44 +01:00
|
|
|
check_weight(TSVector txt, WordEntry *wptr, int8 weight)
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
|
|
|
int len = POSDATALEN(txt, wptr);
|
|
|
|
int num = 0;
|
|
|
|
WordEntryPos *ptr = POSDATAPTR(txt, wptr);
|
|
|
|
|
|
|
|
while (len--)
|
|
|
|
{
|
|
|
|
if (weight & (1 << WEP_GETWEIGHT(*ptr)))
|
|
|
|
num++;
|
|
|
|
ptr++;
|
|
|
|
}
|
|
|
|
return num;
|
|
|
|
}
|
|
|
|
|
2009-06-11 16:49:15 +02:00
|
|
|
#define compareStatWord(a,e,t) \
|
2008-11-17 13:17:09 +01:00
|
|
|
tsCompareString((a)->lexeme, (a)->lenlexeme, \
|
2008-05-16 18:31:02 +02:00
|
|
|
STRPTR(t) + (e)->pos, (e)->len, \
|
|
|
|
false)
|
2007-08-21 03:11:32 +02:00
|
|
|
|
2008-11-17 13:17:09 +01:00
|
|
|
static void
|
|
|
|
insertStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt, uint32 off)
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
2009-06-11 16:49:15 +02:00
|
|
|
WordEntry *we = ARRPTR(txt) + off;
|
|
|
|
StatEntry *node = stat->root,
|
|
|
|
*pnode = NULL;
|
2008-11-17 13:17:09 +01:00
|
|
|
int n,
|
2008-11-19 11:23:21 +01:00
|
|
|
res = 0;
|
2009-06-11 16:49:15 +02:00
|
|
|
uint32 depth = 1;
|
2008-11-17 13:17:09 +01:00
|
|
|
|
2009-06-11 16:49:15 +02:00
|
|
|
if (stat->weight == 0)
|
2008-11-17 13:17:09 +01:00
|
|
|
n = (we->haspos) ? POSDATALEN(txt, we) : 1;
|
|
|
|
else
|
|
|
|
n = (we->haspos) ? check_weight(txt, we, stat->weight) : 0;
|
2007-08-21 03:11:32 +02:00
|
|
|
|
2009-06-11 16:49:15 +02:00
|
|
|
if (n == 0)
|
|
|
|
return; /* nothing to insert */
|
2007-08-21 03:11:32 +02:00
|
|
|
|
2009-06-11 16:49:15 +02:00
|
|
|
while (node)
|
2008-11-17 13:17:09 +01:00
|
|
|
{
|
|
|
|
res = compareStatWord(node, we, txt);
|
2007-08-21 03:11:32 +02:00
|
|
|
|
2008-11-17 13:17:09 +01:00
|
|
|
if (res == 0)
|
|
|
|
{
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
pnode = node;
|
2009-06-11 16:49:15 +02:00
|
|
|
node = (res < 0) ? node->left : node->right;
|
2008-11-17 13:17:09 +01:00
|
|
|
}
|
|
|
|
depth++;
|
|
|
|
}
|
2007-08-21 03:11:32 +02:00
|
|
|
|
2008-11-17 13:17:09 +01:00
|
|
|
if (depth > stat->maxdepth)
|
|
|
|
stat->maxdepth = depth;
|
2007-08-21 03:11:32 +02:00
|
|
|
|
2008-11-17 13:17:09 +01:00
|
|
|
if (node == NULL)
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
2009-06-11 16:49:15 +02:00
|
|
|
node = MemoryContextAlloc(persistentContext, STATENTRYHDRSZ + we->len);
|
2008-11-17 13:17:09 +01:00
|
|
|
node->left = node->right = NULL;
|
|
|
|
node->ndoc = 1;
|
|
|
|
node->nentry = n;
|
|
|
|
node->lenlexeme = we->len;
|
|
|
|
memcpy(node->lexeme, STRPTR(txt) + we->pos, node->lenlexeme);
|
|
|
|
|
2009-06-11 16:49:15 +02:00
|
|
|
if (pnode == NULL)
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
2008-11-17 13:17:09 +01:00
|
|
|
stat->root = node;
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
|
|
|
else
|
2008-11-17 13:17:09 +01:00
|
|
|
{
|
|
|
|
if (res < 0)
|
|
|
|
pnode->left = node;
|
|
|
|
else
|
|
|
|
pnode->right = node;
|
|
|
|
}
|
2009-06-11 16:49:15 +02:00
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
2008-11-17 13:17:09 +01:00
|
|
|
else
|
|
|
|
{
|
|
|
|
node->ndoc++;
|
|
|
|
node->nentry += n;
|
|
|
|
}
|
|
|
|
}
|
2007-08-21 03:11:32 +02:00
|
|
|
|
2008-11-17 13:17:09 +01:00
|
|
|
static void
|
2009-06-11 16:49:15 +02:00
|
|
|
chooseNextStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt,
|
|
|
|
uint32 low, uint32 high, uint32 offset)
|
2008-11-17 13:17:09 +01:00
|
|
|
{
|
2009-06-11 16:49:15 +02:00
|
|
|
uint32 pos;
|
|
|
|
uint32 middle = (low + high) >> 1;
|
2008-11-17 13:17:09 +01:00
|
|
|
|
|
|
|
pos = (low + middle) >> 1;
|
|
|
|
if (low != middle && pos >= offset && pos - offset < txt->size)
|
2009-06-11 16:49:15 +02:00
|
|
|
insertStatEntry(persistentContext, stat, txt, pos - offset);
|
2008-11-17 13:17:09 +01:00
|
|
|
pos = (high + middle + 1) >> 1;
|
|
|
|
if (middle + 1 != high && pos >= offset && pos - offset < txt->size)
|
2009-06-11 16:49:15 +02:00
|
|
|
insertStatEntry(persistentContext, stat, txt, pos - offset);
|
2008-11-17 13:17:09 +01:00
|
|
|
|
|
|
|
if (low != middle)
|
|
|
|
chooseNextStatEntry(persistentContext, stat, txt, low, middle, offset);
|
|
|
|
if (high != middle + 1)
|
|
|
|
chooseNextStatEntry(persistentContext, stat, txt, middle + 1, high, offset);
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
|
|
|
|
2007-09-11 10:46:29 +02:00
|
|
|
/*
|
|
|
|
* This is written like a custom aggregate function, because the
|
|
|
|
* original plan was to do just that. Unfortunately, an aggregate function
|
|
|
|
* can't return a set, so that plan was abandoned. If that limitation is
|
2007-11-16 00:23:44 +01:00
|
|
|
* lifted in the future, ts_stat could be a real aggregate function so that
|
2007-09-11 10:46:29 +02:00
|
|
|
* you could use it like this:
|
|
|
|
*
|
2007-11-16 00:23:44 +01:00
|
|
|
* SELECT ts_stat(vector_column) FROM vector_table;
|
2007-09-11 10:46:29 +02:00
|
|
|
*
|
2007-11-16 00:23:44 +01:00
|
|
|
* where vector_column is a tsvector-type column in vector_table.
|
2007-09-11 10:46:29 +02:00
|
|
|
*/
|
|
|
|
|
2008-11-17 13:17:09 +01:00
|
|
|
static TSVectorStat *
|
|
|
|
ts_accum(MemoryContext persistentContext, TSVectorStat *stat, Datum data)
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
2009-06-11 16:49:15 +02:00
|
|
|
TSVector txt = DatumGetTSVector(data);
|
|
|
|
uint32 i,
|
|
|
|
nbit = 0,
|
|
|
|
offset;
|
2007-08-21 03:11:32 +02:00
|
|
|
|
|
|
|
if (stat == NULL)
|
2009-06-11 16:49:15 +02:00
|
|
|
{ /* Init in first */
|
2008-11-17 13:17:09 +01:00
|
|
|
stat = MemoryContextAllocZero(persistentContext, sizeof(TSVectorStat));
|
|
|
|
stat->maxdepth = 1;
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/* simple check of correctness */
|
|
|
|
if (txt == NULL || txt->size == 0)
|
|
|
|
{
|
2008-11-17 13:17:09 +01:00
|
|
|
if (txt && txt != (TSVector) DatumGetPointer(data))
|
2007-08-21 03:11:32 +02:00
|
|
|
pfree(txt);
|
|
|
|
return stat;
|
|
|
|
}
|
|
|
|
|
2008-11-17 13:17:09 +01:00
|
|
|
i = txt->size - 1;
|
|
|
|
for (; i > 0; i >>= 1)
|
|
|
|
nbit++;
|
2007-08-21 03:11:32 +02:00
|
|
|
|
2008-11-17 13:17:09 +01:00
|
|
|
nbit = 1 << nbit;
|
|
|
|
offset = (nbit - txt->size) / 2;
|
2007-08-21 03:11:32 +02:00
|
|
|
|
2009-06-11 16:49:15 +02:00
|
|
|
insertStatEntry(persistentContext, stat, txt, (nbit >> 1) - offset);
|
2008-11-17 13:17:09 +01:00
|
|
|
chooseNextStatEntry(persistentContext, stat, txt, 0, nbit, offset);
|
2007-08-21 03:11:32 +02:00
|
|
|
|
2008-11-17 13:17:09 +01:00
|
|
|
return stat;
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
ts_setup_firstcall(FunctionCallInfo fcinfo, FuncCallContext *funcctx,
|
2008-11-17 13:17:09 +01:00
|
|
|
TSVectorStat *stat)
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
2009-06-11 16:49:15 +02:00
|
|
|
TupleDesc tupdesc;
|
|
|
|
MemoryContext oldcontext;
|
|
|
|
StatEntry *node;
|
2008-11-17 13:17:09 +01:00
|
|
|
|
|
|
|
funcctx->user_fctx = (void *) stat;
|
2007-08-21 03:11:32 +02:00
|
|
|
|
|
|
|
oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
|
2008-11-17 13:17:09 +01:00
|
|
|
|
|
|
|
stat->stack = palloc0(sizeof(StatEntry *) * (stat->maxdepth + 1));
|
2009-06-11 16:49:15 +02:00
|
|
|
stat->stackpos = 0;
|
2008-11-17 13:17:09 +01:00
|
|
|
|
|
|
|
node = stat->root;
|
|
|
|
/* find leftmost value */
|
2009-10-13 16:33:14 +02:00
|
|
|
if (node == NULL)
|
|
|
|
stat->stack[stat->stackpos] = NULL;
|
|
|
|
else
|
|
|
|
for (;;)
|
2008-11-17 13:17:09 +01:00
|
|
|
{
|
2009-10-13 16:33:14 +02:00
|
|
|
stat->stack[stat->stackpos] = node;
|
|
|
|
if (node->left)
|
|
|
|
{
|
|
|
|
stat->stackpos++;
|
|
|
|
node = node->left;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
break;
|
2008-11-17 13:17:09 +01:00
|
|
|
}
|
2009-10-13 16:33:14 +02:00
|
|
|
Assert(stat->stackpos <= stat->maxdepth);
|
2007-08-21 03:11:32 +02:00
|
|
|
|
|
|
|
tupdesc = CreateTemplateTupleDesc(3, false);
|
|
|
|
TupleDescInitEntry(tupdesc, (AttrNumber) 1, "word",
|
|
|
|
TEXTOID, -1, 0);
|
|
|
|
TupleDescInitEntry(tupdesc, (AttrNumber) 2, "ndoc",
|
|
|
|
INT4OID, -1, 0);
|
|
|
|
TupleDescInitEntry(tupdesc, (AttrNumber) 3, "nentry",
|
|
|
|
INT4OID, -1, 0);
|
|
|
|
funcctx->tuple_desc = BlessTupleDesc(tupdesc);
|
|
|
|
funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
|
|
|
|
|
|
|
|
MemoryContextSwitchTo(oldcontext);
|
|
|
|
}
|
|
|
|
|
2008-11-17 13:17:09 +01:00
|
|
|
static StatEntry *
|
2009-06-11 16:49:15 +02:00
|
|
|
walkStatEntryTree(TSVectorStat *stat)
|
2008-11-17 13:17:09 +01:00
|
|
|
{
|
2009-06-11 16:49:15 +02:00
|
|
|
StatEntry *node = stat->stack[stat->stackpos];
|
2008-11-17 13:17:09 +01:00
|
|
|
|
2009-06-11 16:49:15 +02:00
|
|
|
if (node == NULL)
|
2008-11-17 13:17:09 +01:00
|
|
|
return NULL;
|
|
|
|
|
2009-06-11 16:49:15 +02:00
|
|
|
if (node->ndoc != 0)
|
2008-11-17 13:17:09 +01:00
|
|
|
{
|
|
|
|
/* return entry itself: we already was at left sublink */
|
|
|
|
return node;
|
|
|
|
}
|
|
|
|
else if (node->right && node->right != stat->stack[stat->stackpos + 1])
|
|
|
|
{
|
|
|
|
/* go on right sublink */
|
|
|
|
stat->stackpos++;
|
|
|
|
node = node->right;
|
|
|
|
|
|
|
|
/* find most-left value */
|
|
|
|
for (;;)
|
|
|
|
{
|
|
|
|
stat->stack[stat->stackpos] = node;
|
|
|
|
if (node->left)
|
|
|
|
{
|
|
|
|
stat->stackpos++;
|
|
|
|
node = node->left;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
break;
|
|
|
|
}
|
2009-10-13 16:33:14 +02:00
|
|
|
Assert(stat->stackpos <= stat->maxdepth);
|
2008-11-17 13:17:09 +01:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* we already return all left subtree, itself and right subtree */
|
|
|
|
if (stat->stackpos == 0)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
stat->stackpos--;
|
|
|
|
return walkStatEntryTree(stat);
|
|
|
|
}
|
|
|
|
|
|
|
|
return node;
|
|
|
|
}
|
2007-08-21 03:11:32 +02:00
|
|
|
|
|
|
|
static Datum
|
|
|
|
ts_process_call(FuncCallContext *funcctx)
|
|
|
|
{
|
2009-06-11 16:49:15 +02:00
|
|
|
TSVectorStat *st;
|
|
|
|
StatEntry *entry;
|
2008-11-17 13:17:09 +01:00
|
|
|
|
|
|
|
st = (TSVectorStat *) funcctx->user_fctx;
|
2007-08-21 03:11:32 +02:00
|
|
|
|
2008-11-17 13:17:09 +01:00
|
|
|
entry = walkStatEntryTree(st);
|
2007-08-21 03:11:32 +02:00
|
|
|
|
2008-11-17 13:17:09 +01:00
|
|
|
if (entry != NULL)
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
|
|
|
Datum result;
|
|
|
|
char *values[3];
|
|
|
|
char ndoc[16];
|
|
|
|
char nentry[16];
|
|
|
|
HeapTuple tuple;
|
|
|
|
|
2008-11-17 13:17:09 +01:00
|
|
|
values[0] = palloc(entry->lenlexeme + 1);
|
|
|
|
memcpy(values[0], entry->lexeme, entry->lenlexeme);
|
|
|
|
(values[0])[entry->lenlexeme] = '\0';
|
2007-08-21 03:11:32 +02:00
|
|
|
sprintf(ndoc, "%d", entry->ndoc);
|
|
|
|
values[1] = ndoc;
|
|
|
|
sprintf(nentry, "%d", entry->nentry);
|
|
|
|
values[2] = nentry;
|
|
|
|
|
|
|
|
tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
|
|
|
|
result = HeapTupleGetDatum(tuple);
|
|
|
|
|
|
|
|
pfree(values[0]);
|
2008-11-17 13:17:09 +01:00
|
|
|
|
|
|
|
/* mark entry as already visited */
|
|
|
|
entry->ndoc = 0;
|
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
return (Datum) 0;
|
|
|
|
}
|
|
|
|
|
2008-11-17 13:17:09 +01:00
|
|
|
static TSVectorStat *
|
|
|
|
ts_stat_sql(MemoryContext persistentContext, text *txt, text *ws)
|
2007-08-21 03:11:32 +02:00
|
|
|
{
|
2008-03-25 23:42:46 +01:00
|
|
|
char *query = text_to_cstring(txt);
|
2008-11-17 13:17:09 +01:00
|
|
|
TSVectorStat *stat;
|
2007-08-21 03:11:32 +02:00
|
|
|
bool isnull;
|
|
|
|
Portal portal;
|
2007-10-24 05:30:03 +02:00
|
|
|
SPIPlanPtr plan;
|
2007-08-21 03:11:32 +02:00
|
|
|
|
|
|
|
if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
|
|
|
|
/* internal error */
|
|
|
|
elog(ERROR, "SPI_prepare(\"%s\") failed", query);
|
|
|
|
|
2007-10-24 05:30:03 +02:00
|
|
|
if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
|
2007-08-21 03:11:32 +02:00
|
|
|
/* internal error */
|
|
|
|
elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
|
|
|
|
|
|
|
|
SPI_cursor_fetch(portal, true, 100);
|
|
|
|
|
2007-10-24 05:30:03 +02:00
|
|
|
if (SPI_tuptable == NULL ||
|
|
|
|
SPI_tuptable->tupdesc->natts != 1 ||
|
2015-09-17 18:50:51 +02:00
|
|
|
!IsBinaryCoercible(SPI_gettypeid(SPI_tuptable->tupdesc, 1),
|
2016-06-10 00:02:36 +02:00
|
|
|
TSVECTOROID))
|
2007-08-21 03:11:32 +02:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
|
|
errmsg("ts_stat query must return one tsvector column")));
|
|
|
|
|
2008-11-17 13:17:09 +01:00
|
|
|
stat = MemoryContextAllocZero(persistentContext, sizeof(TSVectorStat));
|
|
|
|
stat->maxdepth = 1;
|
2007-08-21 03:11:32 +02:00
|
|
|
|
|
|
|
if (ws)
|
|
|
|
{
|
|
|
|
char *buf;
|
|
|
|
|
|
|
|
buf = VARDATA(ws);
|
|
|
|
while (buf - VARDATA(ws) < VARSIZE(ws) - VARHDRSZ)
|
|
|
|
{
|
|
|
|
if (pg_mblen(buf) == 1)
|
|
|
|
{
|
|
|
|
switch (*buf)
|
|
|
|
{
|
|
|
|
case 'A':
|
|
|
|
case 'a':
|
|
|
|
stat->weight |= 1 << 3;
|
|
|
|
break;
|
|
|
|
case 'B':
|
|
|
|
case 'b':
|
|
|
|
stat->weight |= 1 << 2;
|
|
|
|
break;
|
|
|
|
case 'C':
|
|
|
|
case 'c':
|
|
|
|
stat->weight |= 1 << 1;
|
|
|
|
break;
|
|
|
|
case 'D':
|
|
|
|
case 'd':
|
|
|
|
stat->weight |= 1;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
stat->weight |= 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
buf += pg_mblen(buf);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
while (SPI_processed > 0)
|
|
|
|
{
|
Widen query numbers-of-tuples-processed counters to uint64.
This patch widens SPI_processed, EState's es_processed field, PortalData's
portalPos field, FuncCallContext's call_cntr and max_calls fields,
ExecutorRun's count argument, PortalRunFetch's result, and the max number
of rows in a SPITupleTable to uint64, and deals with (I hope) all the
ensuing fallout. Some of these values were declared uint32 before, and
others "long".
I also removed PortalData's posOverflow field, since that logic seems
pretty useless given that portalPos is now always 64 bits.
The user-visible results are that command tags for SELECT etc will
correctly report tuple counts larger than 4G, as will plpgsql's GET
GET DIAGNOSTICS ... ROW_COUNT command. Queries processing more tuples
than that are still not exactly the norm, but they're becoming more
common.
Most values associated with FETCH/MOVE distances, such as PortalRun's count
argument and the count argument of most SPI functions that have one, remain
declared as "long". It's not clear whether it would be worth promoting
those to int64; but it would definitely be a large dollop of additional
API churn on top of this, and it would only help 32-bit platforms which
seem relatively less likely to see any benefit.
Andreas Scherbaum, reviewed by Christian Ullrich, additional hacking by me
2016-03-12 22:05:10 +01:00
|
|
|
uint64 i;
|
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
for (i = 0; i < SPI_processed; i++)
|
|
|
|
{
|
|
|
|
Datum data = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
|
|
|
|
|
|
|
|
if (!isnull)
|
2008-11-17 13:17:09 +01:00
|
|
|
stat = ts_accum(persistentContext, stat, data);
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
SPI_freetuptable(SPI_tuptable);
|
|
|
|
SPI_cursor_fetch(portal, true, 100);
|
|
|
|
}
|
|
|
|
|
|
|
|
SPI_freetuptable(SPI_tuptable);
|
|
|
|
SPI_cursor_close(portal);
|
|
|
|
SPI_freeplan(plan);
|
|
|
|
pfree(query);
|
|
|
|
|
|
|
|
return stat;
|
|
|
|
}
|
|
|
|
|
|
|
|
Datum
|
|
|
|
ts_stat1(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
FuncCallContext *funcctx;
|
|
|
|
Datum result;
|
|
|
|
|
|
|
|
if (SRF_IS_FIRSTCALL())
|
|
|
|
{
|
2009-06-11 16:49:15 +02:00
|
|
|
TSVectorStat *stat;
|
2007-08-21 03:11:32 +02:00
|
|
|
text *txt = PG_GETARG_TEXT_P(0);
|
|
|
|
|
|
|
|
funcctx = SRF_FIRSTCALL_INIT();
|
|
|
|
SPI_connect();
|
2008-11-17 13:17:09 +01:00
|
|
|
stat = ts_stat_sql(funcctx->multi_call_memory_ctx, txt, NULL);
|
2007-08-21 03:11:32 +02:00
|
|
|
PG_FREE_IF_COPY(txt, 0);
|
|
|
|
ts_setup_firstcall(fcinfo, funcctx, stat);
|
|
|
|
SPI_finish();
|
|
|
|
}
|
|
|
|
|
|
|
|
funcctx = SRF_PERCALL_SETUP();
|
|
|
|
if ((result = ts_process_call(funcctx)) != (Datum) 0)
|
|
|
|
SRF_RETURN_NEXT(funcctx, result);
|
|
|
|
SRF_RETURN_DONE(funcctx);
|
|
|
|
}
|
|
|
|
|
|
|
|
Datum
|
|
|
|
ts_stat2(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
FuncCallContext *funcctx;
|
|
|
|
Datum result;
|
|
|
|
|
|
|
|
if (SRF_IS_FIRSTCALL())
|
|
|
|
{
|
2009-06-11 16:49:15 +02:00
|
|
|
TSVectorStat *stat;
|
2007-08-21 03:11:32 +02:00
|
|
|
text *txt = PG_GETARG_TEXT_P(0);
|
|
|
|
text *ws = PG_GETARG_TEXT_P(1);
|
|
|
|
|
|
|
|
funcctx = SRF_FIRSTCALL_INIT();
|
|
|
|
SPI_connect();
|
2008-11-17 13:17:09 +01:00
|
|
|
stat = ts_stat_sql(funcctx->multi_call_memory_ctx, txt, ws);
|
2007-08-21 03:11:32 +02:00
|
|
|
PG_FREE_IF_COPY(txt, 0);
|
|
|
|
PG_FREE_IF_COPY(ws, 1);
|
|
|
|
ts_setup_firstcall(fcinfo, funcctx, stat);
|
|
|
|
SPI_finish();
|
|
|
|
}
|
|
|
|
|
|
|
|
funcctx = SRF_PERCALL_SETUP();
|
|
|
|
if ((result = ts_process_call(funcctx)) != (Datum) 0)
|
|
|
|
SRF_RETURN_NEXT(funcctx, result);
|
|
|
|
SRF_RETURN_DONE(funcctx);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Triggers for automatic update of a tsvector column from text column(s)
|
|
|
|
*
|
|
|
|
* Trigger arguments are either
|
|
|
|
* name of tsvector col, name of tsconfig to use, name(s) of text col(s)
|
|
|
|
* name of tsvector col, name of regconfig col, name(s) of text col(s)
|
|
|
|
* ie, tsconfig can either be specified by name, or indirectly as the
|
|
|
|
* contents of a regconfig field in the row. If the name is used, it must
|
|
|
|
* be explicitly schema-qualified.
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
tsvector_update_trigger_byid(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
return tsvector_update_trigger(fcinfo, false);
|
|
|
|
}
|
|
|
|
|
|
|
|
Datum
|
|
|
|
tsvector_update_trigger_bycolumn(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
return tsvector_update_trigger(fcinfo, true);
|
|
|
|
}
|
|
|
|
|
|
|
|
static Datum
|
|
|
|
tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column)
|
|
|
|
{
|
|
|
|
TriggerData *trigdata;
|
|
|
|
Trigger *trigger;
|
|
|
|
Relation rel;
|
|
|
|
HeapTuple rettuple = NULL;
|
|
|
|
int tsvector_attr_num,
|
|
|
|
i;
|
|
|
|
ParsedText prs;
|
|
|
|
Datum datum;
|
|
|
|
bool isnull;
|
|
|
|
text *txt;
|
|
|
|
Oid cfgId;
|
|
|
|
|
|
|
|
/* Check call context */
|
2007-11-16 00:23:44 +01:00
|
|
|
if (!CALLED_AS_TRIGGER(fcinfo)) /* internal error */
|
2007-08-21 03:11:32 +02:00
|
|
|
elog(ERROR, "tsvector_update_trigger: not fired by trigger manager");
|
|
|
|
|
|
|
|
trigdata = (TriggerData *) fcinfo->context;
|
2010-10-08 19:27:31 +02:00
|
|
|
if (!TRIGGER_FIRED_FOR_ROW(trigdata->tg_event))
|
|
|
|
elog(ERROR, "tsvector_update_trigger: must be fired for row");
|
|
|
|
if (!TRIGGER_FIRED_BEFORE(trigdata->tg_event))
|
2007-08-21 03:11:32 +02:00
|
|
|
elog(ERROR, "tsvector_update_trigger: must be fired BEFORE event");
|
|
|
|
|
|
|
|
if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
|
|
|
|
rettuple = trigdata->tg_trigtuple;
|
|
|
|
else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
|
|
|
|
rettuple = trigdata->tg_newtuple;
|
|
|
|
else
|
|
|
|
elog(ERROR, "tsvector_update_trigger: must be fired for INSERT or UPDATE");
|
|
|
|
|
|
|
|
trigger = trigdata->tg_trigger;
|
|
|
|
rel = trigdata->tg_relation;
|
|
|
|
|
|
|
|
if (trigger->tgnargs < 3)
|
|
|
|
elog(ERROR, "tsvector_update_trigger: arguments must be tsvector_field, ts_config, text_field1, ...)");
|
|
|
|
|
|
|
|
/* Find the target tsvector column */
|
|
|
|
tsvector_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
|
|
|
|
if (tsvector_attr_num == SPI_ERROR_NOATTRIBUTE)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_UNDEFINED_COLUMN),
|
|
|
|
errmsg("tsvector column \"%s\" does not exist",
|
|
|
|
trigger->tgargs[0])));
|
2016-11-08 19:11:15 +01:00
|
|
|
/* This will effectively reject system columns, so no separate test: */
|
2015-09-17 18:50:51 +02:00
|
|
|
if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, tsvector_attr_num),
|
2016-06-10 00:02:36 +02:00
|
|
|
TSVECTOROID))
|
2007-08-21 03:11:32 +02:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_DATATYPE_MISMATCH),
|
|
|
|
errmsg("column \"%s\" is not of tsvector type",
|
|
|
|
trigger->tgargs[0])));
|
|
|
|
|
|
|
|
/* Find the configuration to use */
|
|
|
|
if (config_column)
|
|
|
|
{
|
2007-11-16 00:23:44 +01:00
|
|
|
int config_attr_num;
|
2007-08-21 03:11:32 +02:00
|
|
|
|
|
|
|
config_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[1]);
|
|
|
|
if (config_attr_num == SPI_ERROR_NOATTRIBUTE)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_UNDEFINED_COLUMN),
|
2007-12-27 14:02:48 +01:00
|
|
|
errmsg("configuration column \"%s\" does not exist",
|
2007-08-21 03:11:32 +02:00
|
|
|
trigger->tgargs[1])));
|
2015-09-17 18:50:51 +02:00
|
|
|
if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, config_attr_num),
|
2016-06-10 00:02:36 +02:00
|
|
|
REGCONFIGOID))
|
2007-08-21 03:11:32 +02:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_DATATYPE_MISMATCH),
|
|
|
|
errmsg("column \"%s\" is not of regconfig type",
|
|
|
|
trigger->tgargs[1])));
|
|
|
|
|
|
|
|
datum = SPI_getbinval(rettuple, rel->rd_att, config_attr_num, &isnull);
|
|
|
|
if (isnull)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
|
2007-12-27 14:02:48 +01:00
|
|
|
errmsg("configuration column \"%s\" must not be null",
|
2007-08-21 03:11:32 +02:00
|
|
|
trigger->tgargs[1])));
|
|
|
|
cfgId = DatumGetObjectId(datum);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2007-11-16 00:23:44 +01:00
|
|
|
List *names;
|
2007-08-21 03:11:32 +02:00
|
|
|
|
|
|
|
names = stringToQualifiedNameList(trigger->tgargs[1]);
|
|
|
|
/* require a schema so that results are not search path dependent */
|
|
|
|
if (list_length(names) < 2)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
|
|
errmsg("text search configuration name \"%s\" must be schema-qualified",
|
|
|
|
trigger->tgargs[1])));
|
2010-08-05 17:25:36 +02:00
|
|
|
cfgId = get_ts_config_oid(names, false);
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/* initialize parse state */
|
|
|
|
prs.lenwords = 32;
|
|
|
|
prs.curwords = 0;
|
|
|
|
prs.pos = 0;
|
|
|
|
prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
|
|
|
|
|
|
|
|
/* find all words in indexable column(s) */
|
|
|
|
for (i = 2; i < trigger->tgnargs; i++)
|
|
|
|
{
|
|
|
|
int numattr;
|
|
|
|
|
|
|
|
numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
|
|
|
|
if (numattr == SPI_ERROR_NOATTRIBUTE)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_UNDEFINED_COLUMN),
|
|
|
|
errmsg("column \"%s\" does not exist",
|
|
|
|
trigger->tgargs[i])));
|
2015-09-17 18:50:51 +02:00
|
|
|
if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, numattr), TEXTOID))
|
2007-08-21 03:11:32 +02:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_DATATYPE_MISMATCH),
|
2008-11-10 22:49:16 +01:00
|
|
|
errmsg("column \"%s\" is not of a character type",
|
2007-08-21 03:11:32 +02:00
|
|
|
trigger->tgargs[i])));
|
|
|
|
|
|
|
|
datum = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
|
|
|
|
if (isnull)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
txt = DatumGetTextP(datum);
|
|
|
|
|
|
|
|
parsetext(cfgId, &prs, VARDATA(txt), VARSIZE(txt) - VARHDRSZ);
|
|
|
|
|
|
|
|
if (txt != (text *) DatumGetPointer(datum))
|
|
|
|
pfree(txt);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* make tsvector value */
|
|
|
|
if (prs.curwords)
|
|
|
|
{
|
|
|
|
datum = PointerGetDatum(make_tsvector(&prs));
|
2016-11-08 21:36:36 +01:00
|
|
|
isnull = false;
|
|
|
|
rettuple = heap_modify_tuple_by_cols(rettuple, rel->rd_att,
|
|
|
|
1, &tsvector_attr_num,
|
|
|
|
&datum, &isnull);
|
2007-08-21 03:11:32 +02:00
|
|
|
pfree(DatumGetPointer(datum));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
TSVector out = palloc(CALCDATASIZE(0, 0));
|
|
|
|
|
|
|
|
SET_VARSIZE(out, CALCDATASIZE(0, 0));
|
|
|
|
out->size = 0;
|
|
|
|
datum = PointerGetDatum(out);
|
2016-11-08 21:36:36 +01:00
|
|
|
isnull = false;
|
|
|
|
rettuple = heap_modify_tuple_by_cols(rettuple, rel->rd_att,
|
|
|
|
1, &tsvector_attr_num,
|
|
|
|
&datum, &isnull);
|
2007-08-21 03:11:32 +02:00
|
|
|
pfree(prs.words);
|
|
|
|
}
|
|
|
|
|
|
|
|
return PointerGetDatum(rettuple);
|
|
|
|
}
|