Do not fallback to AND for FTS phrase operator.

If there is no positional information of lexemes then phrase operator will not
fallback to AND operator. This change makes needing to modify TS_execute()
interface, because somewhere (in indexes, for example) positional information
is unaccesible and in this cases we need to force fallback to AND.

Per discussion c19fcfec308e6ccd952cdde9e648b505@mail.gmail.com
This commit is contained in:
Teodor Sigaev 2016-06-27 20:47:32 +03:00
parent 028350f619
commit 3dbbd0f02a
7 changed files with 53 additions and 27 deletions

View File

@ -308,7 +308,7 @@ gin_tsquery_consistent(PG_FUNCTION_ARGS)
res = TS_execute(GETQUERY(query),
&gcv,
true,
TS_EXEC_CALC_NOT | TS_EXEC_PHRASE_AS_AND,
checkcondition_gin);
}

View File

@ -361,7 +361,8 @@ gtsvector_consistent(PG_FUNCTION_ARGS)
PG_RETURN_BOOL(TS_execute(
GETQUERY(query),
(void *) GETSIGN(key), false,
(void *) GETSIGN(key),
TS_EXEC_PHRASE_AS_AND,
checkcondition_bit
));
}
@ -373,7 +374,8 @@ gtsvector_consistent(PG_FUNCTION_ARGS)
chkval.arre = chkval.arrb + ARRNELEM(key);
PG_RETURN_BOOL(TS_execute(
GETQUERY(query),
(void *) &chkval, true,
(void *) &chkval,
TS_EXEC_PHRASE_AS_AND | TS_EXEC_CALC_NOT,
checkcondition_arr
));
}

View File

@ -662,7 +662,8 @@ Cover(DocRepresentation *doc, int len, QueryRepresentation *qr, CoverExt *ext)
{
fillQueryRepresentationData(qr, ptr);
if (TS_execute(GETQUERY(qr->query), (void *) qr, false, checkcondition_QueryOperand))
if (TS_execute(GETQUERY(qr->query), (void *) qr,
TS_EXEC_EMPTY, checkcondition_QueryOperand))
{
if (WEP_GETPOS(ptr->pos) > ext->q)
{
@ -691,7 +692,8 @@ Cover(DocRepresentation *doc, int len, QueryRepresentation *qr, CoverExt *ext)
*/
fillQueryRepresentationData(qr, ptr);
if (TS_execute(GETQUERY(qr->query), (void *) qr, true, checkcondition_QueryOperand))
if (TS_execute(GETQUERY(qr->query), (void *) qr,
TS_EXEC_CALC_NOT, checkcondition_QueryOperand))
{
if (WEP_GETPOS(ptr->pos) < ext->p)
{

View File

@ -1360,7 +1360,7 @@ checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
*/
static bool
TS_phrase_execute(QueryItem *curitem,
void *checkval, bool calcnot, ExecPhraseData *data,
void *checkval, uint32 flags, ExecPhraseData *data,
bool (*chkcond) (void *, QueryOperand *, ExecPhraseData *))
{
/* since this function recurses, it could be driven to stack overflow */
@ -1382,18 +1382,19 @@ TS_phrase_execute(QueryItem *curitem,
Assert(curitem->qoperator.oper == OP_PHRASE);
if (!TS_phrase_execute(curitem + curitem->qoperator.left,
checkval, calcnot, &Ldata, chkcond))
checkval, flags, &Ldata, chkcond))
return false;
if (!TS_phrase_execute(curitem + 1, checkval, calcnot, &Rdata, chkcond))
if (!TS_phrase_execute(curitem + 1, checkval, flags, &Rdata, chkcond))
return false;
/*
* if at least one of the operands has no position information,
* fallback to AND operation.
* then return false. But if TS_EXEC_PHRASE_AS_AND flag is set then
* we return true as it is a AND operation
*/
if (Ldata.npos == 0 || Rdata.npos == 0)
return true;
return (flags & TS_EXEC_PHRASE_AS_AND) ? true : false;
/*
* Result of the operation is a list of the corresponding positions of
@ -1498,13 +1499,11 @@ TS_phrase_execute(QueryItem *curitem,
* chkcond is a callback function used to evaluate each VAL node in the query.
* checkval can be used to pass information to the callback. TS_execute doesn't
* do anything with it.
* if calcnot is false, NOT expressions are always evaluated to be true. This
* is used in ranking.
* It believes that ordinary operators are always closier to root than phrase
* operator, so, TS_execute() may not take care of lexeme's position at all.
*/
bool
TS_execute(QueryItem *curitem, void *checkval, bool calcnot,
TS_execute(QueryItem *curitem, void *checkval, uint32 flags,
bool (*chkcond) (void *checkval, QueryOperand *val, ExecPhraseData *data))
{
/* since this function recurses, it could be driven to stack overflow */
@ -1517,25 +1516,29 @@ TS_execute(QueryItem *curitem, void *checkval, bool calcnot,
switch (curitem->qoperator.oper)
{
case OP_NOT:
if (calcnot)
return !TS_execute(curitem + 1, checkval, calcnot, chkcond);
if (flags & TS_EXEC_CALC_NOT)
return !TS_execute(curitem + 1, checkval, flags, chkcond);
else
return true;
case OP_AND:
if (TS_execute(curitem + curitem->qoperator.left, checkval, calcnot, chkcond))
return TS_execute(curitem + 1, checkval, calcnot, chkcond);
if (TS_execute(curitem + curitem->qoperator.left, checkval, flags, chkcond))
return TS_execute(curitem + 1, checkval, flags, chkcond);
else
return false;
case OP_OR:
if (TS_execute(curitem + curitem->qoperator.left, checkval, calcnot, chkcond))
if (TS_execute(curitem + curitem->qoperator.left, checkval, flags, chkcond))
return true;
else
return TS_execute(curitem + 1, checkval, calcnot, chkcond);
return TS_execute(curitem + 1, checkval, flags, chkcond);
case OP_PHRASE:
return TS_phrase_execute(curitem, checkval, calcnot, NULL, chkcond);
/*
* do not check TS_EXEC_PHRASE_AS_AND here because chkcond()
* could do something more if it's called from TS_phrase_execute()
*/
return TS_phrase_execute(curitem, checkval, flags, NULL, chkcond);
default:
elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
@ -1633,7 +1636,7 @@ ts_match_vq(PG_FUNCTION_ARGS)
result = TS_execute(
GETQUERY(query),
&chkval,
true,
TS_EXEC_CALC_NOT,
checkcondition_str
);

View File

@ -111,8 +111,25 @@ typedef struct ExecPhraseData
WordEntryPos *pos;
} ExecPhraseData;
extern bool TS_execute(QueryItem *curitem, void *checkval, bool calcnot,
/*
* Evaluates tsquery, flags are followe below
*/
extern bool TS_execute(QueryItem *curitem, void *checkval, uint32 flags,
bool (*chkcond) (void *, QueryOperand *, ExecPhraseData *));
#define TS_EXEC_EMPTY (0x00)
/*
* if TS_EXEC_CALC_NOT is not set then NOT expression evaluated to be true,
* used in cases where NOT cannot be accurately computed (GiST) or
* it isn't important (ranking)
*/
#define TS_EXEC_CALC_NOT (0x01)
/*
* Treat OP_PHRASE as OP_AND. Used when posiotional information is not
* accessible, like in consistent methods of GIN/GiST indexes
*/
#define TS_EXEC_PHRASE_AS_AND (0x02)
extern bool tsquery_requires_match(QueryItem *curitem);
/*

View File

@ -1459,13 +1459,14 @@ select * from pendtest where 'ipi:*'::tsquery @@ ts;
--check OP_PHRASE on index
create temp table phrase_index_test(fts tsvector);
insert into phrase_index_test values('A fat cat has just eaten a rat.');
insert into phrase_index_test values ('A fat cat has just eaten a rat.');
insert into phrase_index_test values (to_tsvector('english', 'A fat cat has just eaten a rat.'));
create index phrase_index_test_idx on phrase_index_test using gin(fts);
set enable_seqscan = off;
select * from phrase_index_test where fts @@ phraseto_tsquery('english', 'fat cat');
fts
-------------------------------------------------
'A' 'a' 'cat' 'eaten' 'fat' 'has' 'just' 'rat.'
fts
-----------------------------------
'cat':3 'eaten':6 'fat':2 'rat':8
(1 row)
set enable_seqscan = on;

View File

@ -482,7 +482,8 @@ select * from pendtest where 'ipi:*'::tsquery @@ ts;
--check OP_PHRASE on index
create temp table phrase_index_test(fts tsvector);
insert into phrase_index_test values('A fat cat has just eaten a rat.');
insert into phrase_index_test values ('A fat cat has just eaten a rat.');
insert into phrase_index_test values (to_tsvector('english', 'A fat cat has just eaten a rat.'));
create index phrase_index_test_idx on phrase_index_test using gin(fts);
set enable_seqscan = off;
select * from phrase_index_test where fts @@ phraseto_tsquery('english', 'fat cat');