postgresql/contrib/tsearch2/tsvector.c
Tom Lane 234a02b2a8 Replace direct assignments to VARATT_SIZEP(x) with SET_VARSIZE(x, len).
Get rid of VARATT_SIZE and VARATT_DATA, which were simply redundant with
VARSIZE and VARDATA, and as a consequence almost no code was using the
longer names.  Rename the length fields of struct varlena and various
derived structures to catch anyplace that was accessing them directly;
and clean up various places so caught.  In itself this patch doesn't
change any behavior at all, but it is necessary infrastructure if we hope
to play any games with the representation of varlena headers.
Greg Stark and Tom Lane
2007-02-27 23:48:10 +00:00

1107 lines
25 KiB
C

/*
* In/Out definitions for tsvector type
* Internal structure:
* string of values, array of position lexeme in string and it's length
* Teodor Sigaev <teodor@sigaev.ru>
*/
#include "postgres.h"
#include "access/gist.h"
#include "access/itup.h"
#include "catalog/namespace.h"
#include "commands/trigger.h"
#include "executor/spi.h"
#include "nodes/pg_list.h"
#include "storage/bufpage.h"
#include "utils/builtins.h"
#include "utils/pg_locale.h"
#include "mb/pg_wchar.h"
#include <ctype.h>
#include "tsvector.h"
#include "query.h"
#include "ts_cfg.h"
#include "common.h"
PG_FUNCTION_INFO_V1(tsvector_in);
Datum tsvector_in(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(tsvector_out);
Datum tsvector_out(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(to_tsvector);
Datum to_tsvector(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(to_tsvector_current);
Datum to_tsvector_current(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(to_tsvector_name);
Datum to_tsvector_name(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(tsearch2);
Datum tsearch2(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(tsvector_length);
Datum tsvector_length(PG_FUNCTION_ARGS);
/*
* in/out text index type
*/
static int
comparePos(const void *a, const void *b)
{
if (WEP_GETPOS(*(WordEntryPos *) a) == WEP_GETPOS(*(WordEntryPos *) b))
return 0;
return (WEP_GETPOS(*(WordEntryPos *) a) > WEP_GETPOS(*(WordEntryPos *) b)) ? 1 : -1;
}
static int
uniquePos(WordEntryPos * a, int4 l)
{
WordEntryPos *ptr,
*res;
res = a;
if (l == 1)
return l;
qsort((void *) a, l, sizeof(WordEntryPos), comparePos);
ptr = a + 1;
while (ptr - a < l)
{
if (WEP_GETPOS(*ptr) != WEP_GETPOS(*res))
{
res++;
*res = *ptr;
if (res - a >= MAXNUMPOS - 1 || WEP_GETPOS(*res) == MAXENTRYPOS - 1)
break;
}
else if (WEP_GETWEIGHT(*ptr) > WEP_GETWEIGHT(*res))
WEP_SETWEIGHT(*res, WEP_GETWEIGHT(*ptr));
ptr++;
}
return res + 1 - a;
}
static int
compareentry(const void *a, const void *b, void *arg)
{
char *BufferStr = (char *) arg;
if (((WordEntryIN *) a)->entry.len == ((WordEntryIN *) b)->entry.len)
{
return strncmp(&BufferStr[((WordEntryIN *) a)->entry.pos],
&BufferStr[((WordEntryIN *) b)->entry.pos],
((WordEntryIN *) a)->entry.len);
}
return (((WordEntryIN *) a)->entry.len > ((WordEntryIN *) b)->entry.len) ? 1 : -1;
}
static int
uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
{
WordEntryIN *ptr,
*res;
res = a;
if (l == 1)
{
if (a->entry.haspos)
{
*(uint16 *) (a->pos) = uniquePos(&(a->pos[1]), *(uint16 *) (a->pos));
*outbuflen = SHORTALIGN(res->entry.len) + (*(uint16 *) (a->pos) + 1) * sizeof(WordEntryPos);
}
return l;
}
ptr = a + 1;
qsort_arg((void *) a, l, sizeof(WordEntryIN), compareentry, (void *) buf);
while (ptr - a < l)
{
if (!(ptr->entry.len == res->entry.len &&
strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos], res->entry.len) == 0))
{
if (res->entry.haspos)
{
*(uint16 *) (res->pos) = uniquePos(&(res->pos[1]), *(uint16 *) (res->pos));
*outbuflen += *(uint16 *) (res->pos) * sizeof(WordEntryPos);
}
*outbuflen += SHORTALIGN(res->entry.len);
res++;
memcpy(res, ptr, sizeof(WordEntryIN));
}
else if (ptr->entry.haspos)
{
if (res->entry.haspos)
{
int4 len = *(uint16 *) (ptr->pos) + 1 + *(uint16 *) (res->pos);
res->pos = (WordEntryPos *) repalloc(res->pos, len * sizeof(WordEntryPos));
memcpy(&(res->pos[*(uint16 *) (res->pos) + 1]),
&(ptr->pos[1]), *(uint16 *) (ptr->pos) * sizeof(WordEntryPos));
*(uint16 *) (res->pos) += *(uint16 *) (ptr->pos);
pfree(ptr->pos);
}
else
{
res->entry.haspos = 1;
res->pos = ptr->pos;
}
}
ptr++;
}
if (res->entry.haspos)
{
*(uint16 *) (res->pos) = uniquePos(&(res->pos[1]), *(uint16 *) (res->pos));
*outbuflen += *(uint16 *) (res->pos) * sizeof(WordEntryPos);
}
*outbuflen += SHORTALIGN(res->entry.len);
return res + 1 - a;
}
#define WAITWORD 1
#define WAITENDWORD 2
#define WAITNEXTCHAR 3
#define WAITENDCMPLX 4
#define WAITPOSINFO 5
#define INPOSINFO 6
#define WAITPOSDELIM 7
#define WAITCHARCMPLX 8
#define RESIZEPRSBUF \
do { \
if ( state->curpos - state->word + pg_database_encoding_max_length() >= state->len ) \
{ \
int4 clen = state->curpos - state->word; \
state->len *= 2; \
state->word = (char*)repalloc( (void*)state->word, state->len ); \
state->curpos = state->word + clen; \
} \
} while (0)
int4
gettoken_tsvector(TI_IN_STATE * state)
{
int4 oldstate = 0;
state->curpos = state->word;
state->state = WAITWORD;
state->alen = 0;
while (1)
{
if (state->state == WAITWORD)
{
if (*(state->prsbuf) == '\0')
return 0;
else if (t_iseq(state->prsbuf, '\''))
state->state = WAITENDCMPLX;
else if (t_iseq(state->prsbuf, '\\'))
{
state->state = WAITNEXTCHAR;
oldstate = WAITENDWORD;
}
else if (state->oprisdelim && ISOPERATOR(state->prsbuf))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error")));
else if (!t_isspace(state->prsbuf))
{
COPYCHAR(state->curpos, state->prsbuf);
state->curpos += pg_mblen(state->prsbuf);
state->state = WAITENDWORD;
}
}
else if (state->state == WAITNEXTCHAR)
{
if (*(state->prsbuf) == '\0')
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("there is no escaped character")));
else
{
RESIZEPRSBUF;
COPYCHAR(state->curpos, state->prsbuf);
state->curpos += pg_mblen(state->prsbuf);
state->state = oldstate;
}
}
else if (state->state == WAITENDWORD)
{
if (t_iseq(state->prsbuf, '\\'))
{
state->state = WAITNEXTCHAR;
oldstate = WAITENDWORD;
}
else if (t_isspace(state->prsbuf) || *(state->prsbuf) == '\0' ||
(state->oprisdelim && ISOPERATOR(state->prsbuf)))
{
RESIZEPRSBUF;
if (state->curpos == state->word)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error")));
*(state->curpos) = '\0';
return 1;
}
else if (t_iseq(state->prsbuf, ':'))
{
if (state->curpos == state->word)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error")));
*(state->curpos) = '\0';
if (state->oprisdelim)
return 1;
else
state->state = INPOSINFO;
}
else
{
RESIZEPRSBUF;
COPYCHAR(state->curpos, state->prsbuf);
state->curpos += pg_mblen(state->prsbuf);
}
}
else if (state->state == WAITENDCMPLX)
{
if (t_iseq(state->prsbuf, '\''))
{
state->state = WAITCHARCMPLX;
}
else if (t_iseq(state->prsbuf, '\\'))
{
state->state = WAITNEXTCHAR;
oldstate = WAITENDCMPLX;
}
else if (*(state->prsbuf) == '\0')
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error")));
else
{
RESIZEPRSBUF;
COPYCHAR(state->curpos, state->prsbuf);
state->curpos += pg_mblen(state->prsbuf);
}
}
else if (state->state == WAITCHARCMPLX)
{
if (t_iseq(state->prsbuf, '\''))
{
RESIZEPRSBUF;
COPYCHAR(state->curpos, state->prsbuf);
state->curpos += pg_mblen(state->prsbuf);
state->state = WAITENDCMPLX;
}
else
{
RESIZEPRSBUF;
*(state->curpos) = '\0';
if (state->curpos == state->word)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error")));
if (state->oprisdelim)
{
/* state->prsbuf+=pg_mblen(state->prsbuf); */
return 1;
}
else
state->state = WAITPOSINFO;
continue; /* recheck current character */
}
}
else if (state->state == WAITPOSINFO)
{
if (t_iseq(state->prsbuf, ':'))
state->state = INPOSINFO;
else
return 1;
}
else if (state->state == INPOSINFO)
{
if (t_isdigit(state->prsbuf))
{
if (state->alen == 0)
{
state->alen = 4;
state->pos = (WordEntryPos *) palloc(sizeof(WordEntryPos) * state->alen);
*(uint16 *) (state->pos) = 0;
}
else if (*(uint16 *) (state->pos) + 1 >= state->alen)
{
state->alen *= 2;
state->pos = (WordEntryPos *) repalloc(state->pos, sizeof(WordEntryPos) * state->alen);
}
(*(uint16 *) (state->pos))++;
WEP_SETPOS(state->pos[*(uint16 *) (state->pos)], LIMITPOS(atoi(state->prsbuf)));
if (WEP_GETPOS(state->pos[*(uint16 *) (state->pos)]) == 0)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("wrong position info")));
WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 0);
state->state = WAITPOSDELIM;
}
else
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error")));
}
else if (state->state == WAITPOSDELIM)
{
if (t_iseq(state->prsbuf, ','))
state->state = INPOSINFO;
else if (t_iseq(state->prsbuf, 'a') || t_iseq(state->prsbuf, 'A') || t_iseq(state->prsbuf, '*'))
{
if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)]))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error")));
WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 3);
}
else if (t_iseq(state->prsbuf, 'b') || t_iseq(state->prsbuf, 'B'))
{
if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)]))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error")));
WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 2);
}
else if (t_iseq(state->prsbuf, 'c') || t_iseq(state->prsbuf, 'C'))
{
if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)]))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error")));
WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 1);
}
else if (t_iseq(state->prsbuf, 'd') || t_iseq(state->prsbuf, 'D'))
{
if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)]))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error")));
WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 0);
}
else if (t_isspace(state->prsbuf) ||
*(state->prsbuf) == '\0')
return 1;
else if (!t_isdigit(state->prsbuf))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error")));
}
else
/* internal error */
elog(ERROR, "internal error");
/* get next char */
state->prsbuf += pg_mblen(state->prsbuf);
}
return 0;
}
Datum
tsvector_in(PG_FUNCTION_ARGS)
{
char *buf = PG_GETARG_CSTRING(0);
TI_IN_STATE state;
WordEntryIN *arr;
WordEntry *inarr;
int4 len = 0,
totallen = 64;
tsvector *in;
char *tmpbuf,
*cur;
int4 i,
buflen = 256;
SET_FUNCOID();
pg_verifymbstr(buf, strlen(buf), false);
state.prsbuf = buf;
state.len = 32;
state.word = (char *) palloc(state.len);
state.oprisdelim = false;
arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * totallen);
cur = tmpbuf = (char *) palloc(buflen);
while (gettoken_tsvector(&state))
{
if (len >= totallen)
{
totallen *= 2;
arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * totallen);
}
while ((cur - tmpbuf) + (state.curpos - state.word) >= buflen)
{
int4 dist = cur - tmpbuf;
buflen *= 2;
tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
cur = tmpbuf + dist;
}
if (state.curpos - state.word >= MAXSTRLEN)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("word is too long")));
arr[len].entry.len = state.curpos - state.word;
if (cur - tmpbuf > MAXSTRPOS)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("too long value")));
arr[len].entry.pos = cur - tmpbuf;
memcpy((void *) cur, (void *) state.word, arr[len].entry.len);
cur += arr[len].entry.len;
if (state.alen)
{
arr[len].entry.haspos = 1;
arr[len].pos = state.pos;
}
else
arr[len].entry.haspos = 0;
len++;
}
pfree(state.word);
if (len > 0)
len = uniqueentry(arr, len, tmpbuf, &buflen);
else
buflen = 0;
totallen = CALCDATASIZE(len, buflen);
in = (tsvector *) palloc0(totallen);
SET_VARSIZE(in, totallen);
in->size = len;
cur = STRPTR(in);
inarr = ARRPTR(in);
for (i = 0; i < len; i++)
{
memcpy((void *) cur, (void *) &tmpbuf[arr[i].entry.pos], arr[i].entry.len);
arr[i].entry.pos = cur - STRPTR(in);
cur += SHORTALIGN(arr[i].entry.len);
if (arr[i].entry.haspos)
{
memcpy(cur, arr[i].pos, (*(uint16 *) arr[i].pos + 1) * sizeof(WordEntryPos));
cur += (*(uint16 *) arr[i].pos + 1) * sizeof(WordEntryPos);
pfree(arr[i].pos);
}
memcpy(&(inarr[i]), &(arr[i].entry), sizeof(WordEntry));
}
pfree(tmpbuf);
pfree(arr);
PG_RETURN_POINTER(in);
}
Datum
tsvector_length(PG_FUNCTION_ARGS)
{
tsvector *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
int4 ret = in->size;
PG_FREE_IF_COPY(in, 0);
PG_RETURN_INT32(ret);
}
Datum
tsvector_out(PG_FUNCTION_ARGS)
{
tsvector *out = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
char *outbuf;
int4 i,
lenbuf = 0,
pp;
WordEntry *ptr = ARRPTR(out);
char *curbegin,
*curin,
*curout;
lenbuf = out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /* \0 */ ;
for (i = 0; i < out->size; i++)
{
lenbuf += ptr[i].len * 2 * pg_database_encoding_max_length() /* for escape */ ;
if (ptr[i].haspos)
lenbuf += 7 * POSDATALEN(out, &(ptr[i]));
}
curout = outbuf = (char *) palloc(lenbuf);
for (i = 0; i < out->size; i++)
{
curbegin = curin = STRPTR(out) + ptr->pos;
if (i != 0)
*curout++ = ' ';
*curout++ = '\'';
while (curin - curbegin < ptr->len)
{
int len = pg_mblen(curin);
if (t_iseq(curin, '\''))
{
int4 pos = curout - outbuf;
outbuf = (char *) repalloc((void *) outbuf, ++lenbuf);
curout = outbuf + pos;
*curout++ = '\'';
}
while (len--)
*curout++ = *curin++;
}
*curout++ = '\'';
if ((pp = POSDATALEN(out, ptr)) != 0)
{
WordEntryPos *wptr;
*curout++ = ':';
wptr = POSDATAPTR(out, ptr);
while (pp)
{
sprintf(curout, "%d", WEP_GETPOS(*wptr));
curout = strchr(curout, '\0');
switch (WEP_GETWEIGHT(*wptr))
{
case 3:
*curout++ = 'A';
break;
case 2:
*curout++ = 'B';
break;
case 1:
*curout++ = 'C';
break;
case 0:
default:
break;
}
if (pp > 1)
*curout++ = ',';
pp--;
wptr++;
}
}
ptr++;
}
*curout = '\0';
outbuf[lenbuf - 1] = '\0';
PG_FREE_IF_COPY(out, 0);
PG_RETURN_POINTER(outbuf);
}
static int
compareWORD(const void *a, const void *b)
{
if (((TSWORD *) a)->len == ((TSWORD *) b)->len)
{
int res = strncmp(
((TSWORD *) a)->word,
((TSWORD *) b)->word,
((TSWORD *) b)->len);
if (res == 0)
return (((TSWORD *) a)->pos.pos > ((TSWORD *) b)->pos.pos) ? 1 : -1;
return res;
}
return (((TSWORD *) a)->len > ((TSWORD *) b)->len) ? 1 : -1;
}
static int
uniqueWORD(TSWORD * a, int4 l)
{
TSWORD *ptr,
*res;
int tmppos;
if (l == 1)
{
tmppos = LIMITPOS(a->pos.pos);
a->alen = 2;
a->pos.apos = (uint16 *) palloc(sizeof(uint16) * a->alen);
a->pos.apos[0] = 1;
a->pos.apos[1] = tmppos;
return l;
}
res = a;
ptr = a + 1;
qsort((void *) a, l, sizeof(TSWORD), compareWORD);
tmppos = LIMITPOS(a->pos.pos);
a->alen = 2;
a->pos.apos = (uint16 *) palloc(sizeof(uint16) * a->alen);
a->pos.apos[0] = 1;
a->pos.apos[1] = tmppos;
while (ptr - a < l)
{
if (!(ptr->len == res->len &&
strncmp(ptr->word, res->word, res->len) == 0))
{
res++;
res->len = ptr->len;
res->word = ptr->word;
tmppos = LIMITPOS(ptr->pos.pos);
res->alen = 2;
res->pos.apos = (uint16 *) palloc(sizeof(uint16) * res->alen);
res->pos.apos[0] = 1;
res->pos.apos[1] = tmppos;
}
else
{
pfree(ptr->word);
if (res->pos.apos[0] < MAXNUMPOS - 1 && res->pos.apos[res->pos.apos[0]] != MAXENTRYPOS - 1)
{
if (res->pos.apos[0] + 1 >= res->alen)
{
res->alen *= 2;
res->pos.apos = (uint16 *) repalloc(res->pos.apos, sizeof(uint16) * res->alen);
}
if (res->pos.apos[0] == 0 || res->pos.apos[res->pos.apos[0]] != LIMITPOS(ptr->pos.pos))
{
res->pos.apos[res->pos.apos[0] + 1] = LIMITPOS(ptr->pos.pos);
res->pos.apos[0]++;
}
}
}
ptr++;
}
return res + 1 - a;
}
/*
* make value of tsvector
*/
static tsvector *
makevalue(PRSTEXT * prs)
{
int4 i,
j,
lenstr = 0,
totallen;
tsvector *in;
WordEntry *ptr;
char *str,
*cur;
prs->curwords = uniqueWORD(prs->words, prs->curwords);
for (i = 0; i < prs->curwords; i++)
{
lenstr += SHORTALIGN(prs->words[i].len);
if (prs->words[i].alen)
lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
}
totallen = CALCDATASIZE(prs->curwords, lenstr);
in = (tsvector *) palloc0(totallen);
SET_VARSIZE(in, totallen);
in->size = prs->curwords;
ptr = ARRPTR(in);
cur = str = STRPTR(in);
for (i = 0; i < prs->curwords; i++)
{
ptr->len = prs->words[i].len;
if (cur - str > MAXSTRPOS)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("value is too big")));
ptr->pos = cur - str;
memcpy((void *) cur, (void *) prs->words[i].word, prs->words[i].len);
pfree(prs->words[i].word);
cur += SHORTALIGN(prs->words[i].len);
if (prs->words[i].alen)
{
WordEntryPos *wptr;
ptr->haspos = 1;
*(uint16 *) cur = prs->words[i].pos.apos[0];
wptr = POSDATAPTR(in, ptr);
for (j = 0; j < *(uint16 *) cur; j++)
{
WEP_SETWEIGHT(wptr[j], 0);
WEP_SETPOS(wptr[j], prs->words[i].pos.apos[j + 1]);
}
cur += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
pfree(prs->words[i].pos.apos);
}
else
ptr->haspos = 0;
ptr++;
}
pfree(prs->words);
return in;
}
Datum
to_tsvector(PG_FUNCTION_ARGS)
{
text *in = PG_GETARG_TEXT_P(1);
PRSTEXT prs;
tsvector *out;
TSCfgInfo *cfg;
SET_FUNCOID();
cfg = findcfg(PG_GETARG_INT32(0));
prs.lenwords = 32;
prs.curwords = 0;
prs.pos = 0;
prs.words = (TSWORD *) palloc(sizeof(TSWORD) * prs.lenwords);
parsetext_v2(cfg, &prs, VARDATA(in), VARSIZE(in) - VARHDRSZ);
PG_FREE_IF_COPY(in, 1);
if (prs.curwords)
out = makevalue(&prs);
else
{
pfree(prs.words);
out = palloc(CALCDATASIZE(0, 0));
SET_VARSIZE(out, CALCDATASIZE(0, 0));
out->size = 0;
}
PG_RETURN_POINTER(out);
}
Datum
to_tsvector_name(PG_FUNCTION_ARGS)
{
text *cfg = PG_GETARG_TEXT_P(0);
Datum res;
SET_FUNCOID();
res = DirectFunctionCall3(
to_tsvector,
Int32GetDatum(name2id_cfg(cfg)),
PG_GETARG_DATUM(1),
(Datum) 0
);
PG_FREE_IF_COPY(cfg, 0);
PG_RETURN_DATUM(res);
}
Datum
to_tsvector_current(PG_FUNCTION_ARGS)
{
Datum res;
SET_FUNCOID();
res = DirectFunctionCall3(
to_tsvector,
Int32GetDatum(get_currcfg()),
PG_GETARG_DATUM(0),
(Datum) 0
);
PG_RETURN_DATUM(res);
}
static Oid
findFunc(char *fname)
{
FuncCandidateList clist,
ptr;
Oid funcid = InvalidOid;
List *names = list_make1(makeString(fname));
ptr = clist = FuncnameGetCandidates(names, 1);
list_free(names);
if (!ptr)
return funcid;
while (ptr)
{
if (ptr->args[0] == TEXTOID && funcid == InvalidOid)
funcid = ptr->oid;
clist = ptr->next;
pfree(ptr);
ptr = clist;
}
return funcid;
}
/*
* Trigger
*/
Datum
tsearch2(PG_FUNCTION_ARGS)
{
TriggerData *trigdata;
Trigger *trigger;
Relation rel;
HeapTuple rettuple = NULL;
int numidxattr,
i;
PRSTEXT prs;
Datum datum = (Datum) 0;
Oid funcoid = InvalidOid;
TSCfgInfo *cfg;
SET_FUNCOID();
cfg = findcfg(get_currcfg());
if (!CALLED_AS_TRIGGER(fcinfo))
/* internal error */
elog(ERROR, "TSearch: Not fired by trigger manager");
trigdata = (TriggerData *) fcinfo->context;
if (TRIGGER_FIRED_FOR_STATEMENT(trigdata->tg_event))
/* internal error */
elog(ERROR, "TSearch: Cannot process STATEMENT events");
if (TRIGGER_FIRED_AFTER(trigdata->tg_event))
/* internal error */
elog(ERROR, "TSearch: Must be fired BEFORE event");
if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
rettuple = trigdata->tg_trigtuple;
else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
rettuple = trigdata->tg_newtuple;
else
/* internal error */
elog(ERROR, "TSearch: Unknown event");
trigger = trigdata->tg_trigger;
rel = trigdata->tg_relation;
if (trigger->tgnargs < 2)
/* internal error */
elog(ERROR, "TSearch: format tsearch2(tsvector_field, text_field1,...)");
numidxattr = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
if (numidxattr == SPI_ERROR_NOATTRIBUTE)
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_COLUMN),
errmsg("tsvector column \"%s\" does not exist",
trigger->tgargs[0])));
prs.lenwords = 32;
prs.curwords = 0;
prs.pos = 0;
prs.words = (TSWORD *) palloc(sizeof(TSWORD) * prs.lenwords);
/* find all words in indexable column */
for (i = 1; i < trigger->tgnargs; i++)
{
int numattr;
Oid oidtype;
Datum txt_toasted;
bool isnull;
text *txt;
numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
if (numattr == SPI_ERROR_NOATTRIBUTE)
{
funcoid = findFunc(trigger->tgargs[i]);
if (funcoid == InvalidOid)
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_COLUMN),
errmsg("could not find function or field \"%s\"",
trigger->tgargs[i])));
continue;
}
oidtype = SPI_gettypeid(rel->rd_att, numattr);
/* We assume char() and varchar() are binary-equivalent to text */
if (!(oidtype == TEXTOID ||
oidtype == VARCHAROID ||
oidtype == BPCHAROID))
{
elog(WARNING, "TSearch: '%s' is not of character type",
trigger->tgargs[i]);
continue;
}
txt_toasted = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
if (isnull)
continue;
if (funcoid != InvalidOid)
{
text *txttmp = (text *) DatumGetPointer(OidFunctionCall1(
funcoid,
PointerGetDatum(txt_toasted)
));
txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txttmp)));
if (txt == txttmp)
txt_toasted = PointerGetDatum(txt);
}
else
txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txt_toasted)));
parsetext_v2(cfg, &prs, VARDATA(txt), VARSIZE(txt) - VARHDRSZ);
if (txt != (text *) DatumGetPointer(txt_toasted))
pfree(txt);
}
/* make tsvector value */
if (prs.curwords)
{
datum = PointerGetDatum(makevalue(&prs));
rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
&datum, NULL);
pfree(DatumGetPointer(datum));
}
else
{
tsvector *out = palloc(CALCDATASIZE(0, 0));
SET_VARSIZE(out, CALCDATASIZE(0, 0));
out->size = 0;
datum = PointerGetDatum(out);
pfree(prs.words);
rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
&datum, NULL);
}
if (rettuple == NULL)
/* internal error */
elog(ERROR, "TSearch: %d returned by SPI_modifytuple", SPI_result);
return PointerGetDatum(rettuple);
}
static int
silly_cmp_tsvector(const tsvector * a, const tsvector * b)
{
if (VARSIZE(a) < VARSIZE(b))
return -1;
else if (VARSIZE(a) > VARSIZE(b))
return 1;
else if (a->size < b->size)
return -1;
else if (a->size > b->size)
return 1;
else
{
WordEntry *aptr = ARRPTR(a);
WordEntry *bptr = ARRPTR(b);
int i = 0;
int res;
for (i = 0; i < a->size; i++)
{
if (aptr->haspos != bptr->haspos)
{
return (aptr->haspos > bptr->haspos) ? -1 : 1;
}
else if (aptr->len != bptr->len)
{
return (aptr->len > bptr->len) ? -1 : 1;
}
else if ((res = strncmp(STRPTR(a) + aptr->pos, STRPTR(b) + bptr->pos, bptr->len)) != 0)
{
return res;
}
else if (aptr->haspos)
{
WordEntryPos *ap = POSDATAPTR(a, aptr);
WordEntryPos *bp = POSDATAPTR(b, bptr);
int j;
if (POSDATALEN(a, aptr) != POSDATALEN(b, bptr))
return (POSDATALEN(a, aptr) > POSDATALEN(b, bptr)) ? -1 : 1;
for (j = 0; j < POSDATALEN(a, aptr); j++)
{
if (WEP_GETPOS(*ap) != WEP_GETPOS(*bp))
{
return (WEP_GETPOS(*ap) > WEP_GETPOS(*bp)) ? -1 : 1;
}
else if (WEP_GETWEIGHT(*ap) != WEP_GETWEIGHT(*bp))
{
return (WEP_GETWEIGHT(*ap) > WEP_GETWEIGHT(*bp)) ? -1 : 1;
}
ap++, bp++;
}
}
aptr++;
bptr++;
}
}
return 0;
}
PG_FUNCTION_INFO_V1(tsvector_cmp);
PG_FUNCTION_INFO_V1(tsvector_lt);
PG_FUNCTION_INFO_V1(tsvector_le);
PG_FUNCTION_INFO_V1(tsvector_eq);
PG_FUNCTION_INFO_V1(tsvector_ne);
PG_FUNCTION_INFO_V1(tsvector_ge);
PG_FUNCTION_INFO_V1(tsvector_gt);
Datum tsvector_cmp(PG_FUNCTION_ARGS);
Datum tsvector_lt(PG_FUNCTION_ARGS);
Datum tsvector_le(PG_FUNCTION_ARGS);
Datum tsvector_eq(PG_FUNCTION_ARGS);
Datum tsvector_ne(PG_FUNCTION_ARGS);
Datum tsvector_ge(PG_FUNCTION_ARGS);
Datum tsvector_gt(PG_FUNCTION_ARGS);
#define RUNCMP \
tsvector *a = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));\
tsvector *b = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(1)));\
int res = silly_cmp_tsvector(a,b); \
PG_FREE_IF_COPY(a,0); \
PG_FREE_IF_COPY(b,1); \
Datum
tsvector_cmp(PG_FUNCTION_ARGS)
{
RUNCMP
PG_RETURN_INT32(res);
}
Datum
tsvector_lt(PG_FUNCTION_ARGS)
{
RUNCMP
PG_RETURN_BOOL((res < 0) ? true : false);
}
Datum
tsvector_le(PG_FUNCTION_ARGS)
{
RUNCMP
PG_RETURN_BOOL((res <= 0) ? true : false);
}
Datum
tsvector_eq(PG_FUNCTION_ARGS)
{
RUNCMP
PG_RETURN_BOOL((res == 0) ? true : false);
}
Datum
tsvector_ge(PG_FUNCTION_ARGS)
{
RUNCMP
PG_RETURN_BOOL((res >= 0) ? true : false);
}
Datum
tsvector_gt(PG_FUNCTION_ARGS)
{
RUNCMP
PG_RETURN_BOOL((res > 0) ? true : false);
}
Datum
tsvector_ne(PG_FUNCTION_ARGS)
{
RUNCMP
PG_RETURN_BOOL((res != 0) ? true : false);
}