Make exact distance match for FTS phrase operator

Phrase operator now requires exact distance betweens lexems instead of
less-or-equal.

Per discussion c19fcfec308e6ccd952cdde9e648b505@mail.gmail.com
This commit is contained in:
Teodor Sigaev 2016-06-27 20:41:00 +03:00
parent f1993038a4
commit 028350f619
4 changed files with 63 additions and 47 deletions

View File

@ -346,10 +346,10 @@ SELECT to_tsvector('error is not fatal') @@ to_tsquery('fatal <-> error');
There is a more general version of the FOLLOWED BY operator having the
form <literal>&lt;<replaceable>N</>&gt;</literal>,
where <replaceable>N</> is an integer standing for the greatest distance
where <replaceable>N</> is an integer standing for the exact distance
allowed between the matching lexemes. <literal>&lt;1&gt;</literal> is
the same as <literal>&lt;-&gt;</>, while <literal>&lt;2&gt;</literal>
allows one other lexeme to optionally appear between the matches, and so
allows one other lexeme to appear between the matches, and so
on. The <literal>phraseto_tsquery</> function makes use of this
operator to construct a <literal>tsquery</> that can match a multi-word
phrase when some of the words are stop words. For example:
@ -1529,7 +1529,7 @@ SELECT to_tsquery('fat') &lt;-&gt; to_tsquery('cat | rat');
<para>
Returns a query that searches for a match to the first given query
followed by a match to the second given query at a distance of at
most <replaceable>distance</replaceable> lexemes, using
<replaceable>distance</replaceable> lexemes, using
the <literal>&lt;<replaceable>N</>&gt;</literal>
<type>tsquery</> operator. For example:

View File

@ -1375,6 +1375,7 @@ TS_phrase_execute(QueryItem *curitem,
ExecPhraseData Ldata = {0, false, NULL},
Rdata = {0, false, NULL};
WordEntryPos *Lpos,
*LposStart,
*Rpos,
*pos_iter = NULL;
@ -1416,52 +1417,60 @@ TS_phrase_execute(QueryItem *curitem,
pos_iter = data->pos;
}
Lpos = Ldata.pos;
Rpos = Rdata.pos;
/*
* Find matches by distance, WEP_GETPOS() is needed because
* ExecPhraseData->data can point to the tsvector's WordEntryPosVector
*/
Rpos = Rdata.pos;
LposStart = Ldata.pos;
while (Rpos < Rdata.pos + Rdata.npos)
{
/*
* We need to check all possible distances, so reset Lpos
* to guranteed not yet satisfied position.
*/
Lpos = LposStart;
while (Lpos < Ldata.pos + Ldata.npos)
{
if (WEP_GETPOS(*Lpos) <= WEP_GETPOS(*Rpos))
if (WEP_GETPOS(*Rpos) - WEP_GETPOS(*Lpos) ==
curitem->qoperator.distance)
{
/*
* Lpos is behind the Rpos, so we have to check the
* distance condition
*/
if (WEP_GETPOS(*Rpos) - WEP_GETPOS(*Lpos) <= curitem->qoperator.distance)
/* MATCH! */
if (data)
{
/* MATCH! */
if (data)
{
*pos_iter = WEP_GETPOS(*Rpos);
pos_iter++;
/* Store position for upper phrase operator */
*pos_iter = WEP_GETPOS(*Rpos);
pos_iter++;
break; /* We need to build a unique result
* array, so go to the next Rpos */
}
else
{
/*
* We are in the root of the phrase tree and hence
* we don't have to store the resulting positions
*/
return true;
}
/*
* Set left start position to next, because current one
* could not satisfy distance for any other right
* position
*/
LposStart = Lpos + 1;
break;
}
else
{
/*
* We are in the root of the phrase tree and hence
* we don't have to store the resulting positions
*/
return true;
}
}
else
else if (WEP_GETPOS(*Rpos) <= WEP_GETPOS(*Lpos) ||
WEP_GETPOS(*Rpos) - WEP_GETPOS(*Lpos) <
curitem->qoperator.distance)
{
/*
* Go to the next Rpos, because Lpos is ahead of the
* current Rpos
* Go to the next Rpos, because Lpos is ahead or on less
* distance than required by current operator
*/
break;
}
Lpos++;

View File

@ -665,10 +665,10 @@ SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <-> 2' AS "true";
t
(1 row)
SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <2> 2' AS "true";
true
------
t
SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <2> 2' AS "false";
false
-------
f
(1 row)
SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <-> 3' AS "false";
@ -683,6 +683,12 @@ SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <2> 3' AS "true";
t
(1 row)
SELECT to_tsvector('simple', '1 2 1 2') @@ '1 <3> 2' AS "true";
true
------
t
(1 row)
SELECT to_tsvector('simple', '1 2 11 3') @@ '1 <-> 3' AS "false";
false
-------
@ -897,7 +903,7 @@ SELECT ts_rank_cd(' a:1 sa:2A sb:2D g'::tsvector, 'a <-> s:*');
SELECT ts_rank_cd(' a:1 sa:2A sb:2D g'::tsvector, 'a <-> s:* <-> sa:A');
ts_rank_cd
------------
0.0714286
0
(1 row)
SELECT ts_rank_cd(' a:1 sa:2A sb:2D g'::tsvector, 'a <-> s:* <-> sa:B');
@ -924,10 +930,10 @@ SELECT 'a:1 b:2'::tsvector @@ 'a <1> b'::tsquery AS "true";
t
(1 row)
SELECT 'a:1 b:2'::tsvector @@ 'a <2> b'::tsquery AS "true";
true
------
t
SELECT 'a:1 b:2'::tsvector @@ 'a <2> b'::tsquery AS "false";
false
-------
f
(1 row)
SELECT 'a:1 b:3'::tsvector @@ 'a <-> b'::tsquery AS "false";
@ -954,10 +960,10 @@ SELECT 'a:1 b:3'::tsvector @@ 'a <2> b'::tsquery AS "true";
t
(1 row)
SELECT 'a:1 b:3'::tsvector @@ 'a <3> b'::tsquery AS "true";
true
------
t
SELECT 'a:1 b:3'::tsvector @@ 'a <3> b'::tsquery AS "false";
false
-------
f
(1 row)
-- tsvector editing operations

View File

@ -130,9 +130,10 @@ SELECT 'supeznova supernova'::tsvector @@ 'super:*'::tsquery AS "true";
--phrase search
SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <-> 2' AS "true";
SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <2> 2' AS "true";
SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <2> 2' AS "false";
SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <-> 3' AS "false";
SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <2> 3' AS "true";
SELECT to_tsvector('simple', '1 2 1 2') @@ '1 <3> 2' AS "true";
SELECT to_tsvector('simple', '1 2 11 3') @@ '1 <-> 3' AS "false";
SELECT to_tsvector('simple', '1 2 11 3') @@ '1:* <-> 3' AS "true";
@ -180,12 +181,12 @@ SELECT ts_rank_cd(' a:1 sa:2A sb:2D g'::tsvector, 'a <-> s:* <-> sa:B');
SELECT 'a:1 b:2'::tsvector @@ 'a <-> b'::tsquery AS "true";
SELECT 'a:1 b:2'::tsvector @@ 'a <0> b'::tsquery AS "false";
SELECT 'a:1 b:2'::tsvector @@ 'a <1> b'::tsquery AS "true";
SELECT 'a:1 b:2'::tsvector @@ 'a <2> b'::tsquery AS "true";
SELECT 'a:1 b:2'::tsvector @@ 'a <2> b'::tsquery AS "false";
SELECT 'a:1 b:3'::tsvector @@ 'a <-> b'::tsquery AS "false";
SELECT 'a:1 b:3'::tsvector @@ 'a <0> b'::tsquery AS "false";
SELECT 'a:1 b:3'::tsvector @@ 'a <1> b'::tsquery AS "false";
SELECT 'a:1 b:3'::tsvector @@ 'a <2> b'::tsquery AS "true";
SELECT 'a:1 b:3'::tsvector @@ 'a <3> b'::tsquery AS "true";
SELECT 'a:1 b:3'::tsvector @@ 'a <3> b'::tsquery AS "false";
-- tsvector editing operations