From 21b748e76acc54ad0c3715e340fdff0865e201eb Mon Sep 17 00:00:00 2001 From: Teodor Sigaev Date: Fri, 28 Oct 2005 13:05:06 +0000 Subject: [PATCH] 1 Fix problem with lost precision in rank with OR-ed lexemes 2 Allow tsquery_in to input void tsquery: resolve dump/restore problem with tsquery --- contrib/tsearch2/expected/tsearch2.out | 18 +++++++-------- contrib/tsearch2/query.c | 23 ++++++++++++++----- contrib/tsearch2/rank.c | 31 +++++++++++++++++++------- 3 files changed, 49 insertions(+), 23 deletions(-) diff --git a/contrib/tsearch2/expected/tsearch2.out b/contrib/tsearch2/expected/tsearch2.out index 6c266a29ac..a7ac240ef9 100644 --- a/contrib/tsearch2/expected/tsearch2.out +++ b/contrib/tsearch2/expected/tsearch2.out @@ -746,21 +746,21 @@ select count(*) FROM test_tsvector WHERE a @@ to_tsquery('copyright'); (1 row) select rank(' a:1 s:2C d g'::tsvector, 'a | s'); - rank ------- - 0.28 + rank +----------- + 0.0911891 (1 row) select rank(' a:1 s:2B d g'::tsvector, 'a | s'); - rank ------- - 0.46 + rank +---------- + 0.151982 (1 row) select rank(' a:1 s:2 d g'::tsvector, 'a | s'); - rank ------- - 0.19 + rank +----------- + 0.0607927 (1 row) select rank(' a:1 s:2C d g'::tsvector, 'a & s'); diff --git a/contrib/tsearch2/query.c b/contrib/tsearch2/query.c index d8b8d4c80d..013f003196 100644 --- a/contrib/tsearch2/query.c +++ b/contrib/tsearch2/query.c @@ -55,6 +55,7 @@ Datum to_tsquery_current(PG_FUNCTION_ARGS); /* parser's states */ #define WAITOPERAND 1 #define WAITOPERATOR 2 +#define WAITFIRSTOPERAND 3 /* * node of query tree, also used @@ -137,6 +138,7 @@ gettoken_query(QPRS_STATE * state, int4 *val, int4 *lenval, char **strval, int2 { switch (state->state) { + case WAITFIRSTOPERAND: case WAITOPERAND: if (*(state->buf) == '!') { @@ -159,14 +161,16 @@ gettoken_query(QPRS_STATE * state, int4 *val, int4 *lenval, char **strval, int2 else if (*(state->buf) != ' ') { state->valstate.prsbuf = state->buf; - state->state = WAITOPERATOR; if (gettoken_tsvector(&(state->valstate))) { *strval = state->valstate.word; *lenval = state->valstate.curpos - state->valstate.word; state->buf = get_weight(state->valstate.prsbuf, weight); + state->state = WAITOPERATOR; return VAL; } + else if ( state->state == WAITFIRSTOPERAND ) + return END; else ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), @@ -596,7 +600,7 @@ static QUERYTYPE * /* init state */ state.buf = buf; - state.state = WAITOPERAND; + state.state = WAITFIRSTOPERAND; state.count = 0; state.num = 0; state.str = NULL; @@ -616,10 +620,13 @@ static QUERYTYPE * /* parse query & make polish notation (postfix, but in reverse order) */ makepol(&state, pushval); pfree(state.valstate.word); - if (!state.num) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("empty query"))); + if (!state.num) { + elog(NOTICE, "Query doesn't contain lexem(s)"); + query = (QUERYTYPE*)palloc( HDRSIZEQT ); + query->len = HDRSIZEQT; + query->size = 0; + return query; + } /* make finish struct */ commonlen = COMPUTESIZE(state.num, state.sumlen); @@ -905,6 +912,10 @@ to_tsquery(PG_FUNCTION_ARGS) PG_FREE_IF_COPY(in, 1); query = queryin(str, pushval_morph, PG_GETARG_INT32(0)); + + if ( query->size == 0 ) + PG_RETURN_POINTER(query); + res = clean_fakeval_v2(GETQUERY(query), &len); if (!res) { diff --git a/contrib/tsearch2/rank.c b/contrib/tsearch2/rank.c index 081b084087..40bec1f484 100644 --- a/contrib/tsearch2/rank.c +++ b/contrib/tsearch2/rank.c @@ -257,7 +257,7 @@ calc_rank_or(float *w, tsvector * t, QUERYTYPE * q) int4 dimt, j, i; - float res = -1.0; + float res = 0.0; ITEM **item; int size = q->size; @@ -266,6 +266,8 @@ calc_rank_or(float *w, tsvector * t, QUERYTYPE * q) for (i = 0; i < size; i++) { + float resj,wjm; + int4 jm; entry = find_wordentry(t, q, item[i]); if (!entry) continue; @@ -281,14 +283,27 @@ calc_rank_or(float *w, tsvector * t, QUERYTYPE * q) post = POSNULL + 1; } - for (j = 0; j < dimt; j++) - { - if (res < 0) - res = wpos(post[j]); - else - res = 1.0 - (1.0 - res) * (1.0 - wpos(post[j])); - } + resj = 0.0; + wjm = -1.0; + jm = 0; + for (j = 0; j < dimt; j++) + { + resj = resj + wpos(post[j])/((j+1)*(j+1)); + if ( wpos(post[j]) > wjm ) { + wjm = wpos(post[j]); + jm = j; + } + } +/* + limit (sum(i/i^2),i->inf) = pi^2/6 + resj = sum(wi/i^2),i=1,noccurence, + wi - should be sorted desc, + don't sort for now, just choose maximum weight. This should be corrected + Oleg Bartunov +*/ + res = res + ( wjm + resj - wjm/((jm+1)*(jm+1)))/1.64493406685; } + res = res /size; pfree(item); return res; }