Fix up core tsquery GIN support for new extractQuery API.

No need for the empty-prefix-match kluge to force a full scan anymore.
This commit is contained in:
Tom Lane 2011-01-09 14:34:50 -05:00
parent 304845075c
commit 52fd2d65a3
3 changed files with 96 additions and 53 deletions

View File

@ -13,6 +13,7 @@
*/
#include "postgres.h"
#include "access/gin.h"
#include "access/skey.h"
#include "tsearch/ts_type.h"
#include "tsearch/ts_utils.h"
@ -26,8 +27,7 @@ gin_cmp_tslexeme(PG_FUNCTION_ARGS)
text *b = PG_GETARG_TEXT_PP(1);
int cmp;
cmp = tsCompareString(
VARDATA_ANY(a), VARSIZE_ANY_EXHDR(a),
cmp = tsCompareString(VARDATA_ANY(a), VARSIZE_ANY_EXHDR(a),
VARDATA_ANY(b), VARSIZE_ANY_EXHDR(b),
false);
@ -48,8 +48,7 @@ gin_cmp_prefix(PG_FUNCTION_ARGS)
#endif
int cmp;
cmp = tsCompareString(
VARDATA_ANY(a), VARSIZE_ANY_EXHDR(a),
cmp = tsCompareString(VARDATA_ANY(a), VARSIZE_ANY_EXHDR(a),
VARDATA_ANY(b), VARSIZE_ANY_EXHDR(b),
true);
@ -96,71 +95,72 @@ gin_extract_tsquery(PG_FUNCTION_ARGS)
{
TSQuery query = PG_GETARG_TSQUERY(0);
int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
/* StrategyNumber strategy = PG_GETARG_UINT16(2); */
bool **ptr_partialmatch = (bool **) PG_GETARG_POINTER(3);
Pointer **extra_data = (Pointer **) PG_GETARG_POINTER(4);
/* bool **nullFlags = (bool **) PG_GETARG_POINTER(5); */
int32 *searchMode = (int32 *) PG_GETARG_POINTER(6);
Datum *entries = NULL;
bool *partialmatch;
*nentries = 0;
if (query->size > 0)
{
QueryItem *item = GETQUERY(query);
int4 i,
j = 0,
len;
QueryItem *item;
bool use_fullscan = false;
j;
bool *partialmatch;
int *map_item_operand;
item = clean_NOT(GETQUERY(query), &len);
if (!item)
{
use_fullscan = true;
*nentries = 1;
}
item = GETQUERY(query);
/*
* If the query doesn't have any required positive matches (for
* instance, it's something like '! foo'), we have to do a full
* index scan.
*/
if (tsquery_requires_match(item))
*searchMode = GIN_SEARCH_MODE_DEFAULT;
else
*searchMode = GIN_SEARCH_MODE_ALL;
/* count number of VAL items */
j = 0;
for (i = 0; i < query->size; i++)
{
if (item[i].type == QI_VAL)
(*nentries)++;
j++;
}
*nentries = j;
entries = (Datum *) palloc(sizeof(Datum) * (*nentries));
partialmatch = *ptr_partialmatch = (bool *) palloc(sizeof(bool) * (*nentries));
entries = (Datum *) palloc(sizeof(Datum) * j);
partialmatch = *ptr_partialmatch = (bool *) palloc(sizeof(bool) * j);
/*
* Make map to convert item's number to corresponding operand's (the
* same, entry's) number. Entry's number is used in check array in
* consistent method. We use the same map for each entry.
*/
*extra_data = (Pointer *) palloc0(sizeof(Pointer) * (*nentries));
map_item_operand = palloc0(sizeof(int) * (query->size + 1));
*extra_data = (Pointer *) palloc(sizeof(Pointer) * j);
map_item_operand = (int *) palloc0(sizeof(int) * query->size);
/* Now rescan the VAL items and fill in the arrays */
j = 0;
for (i = 0; i < query->size; i++)
{
if (item[i].type == QI_VAL)
{
text *txt;
QueryOperand *val = &item[i].qoperand;
text *txt;
txt = cstring_to_text_with_len(GETOPERAND(query) + val->distance,
val->length);
entries[j] = PointerGetDatum(txt);
partialmatch[j] = val->prefix;
(*extra_data)[j] = (Pointer) map_item_operand;
map_item_operand[i] = j;
partialmatch[j] = val->prefix;
entries[j++] = PointerGetDatum(txt);
j++;
}
if (use_fullscan)
{
(*extra_data)[j] = (Pointer) map_item_operand;
map_item_operand[i] = j;
entries[j++] = PointerGetDatum(cstring_to_text_with_len("", 0));
}
}
else
*nentries = -1; /* nothing can be found */
PG_FREE_IF_COPY(query, 0);
@ -222,12 +222,10 @@ gin_tsquery_consistent(PG_FUNCTION_ARGS)
gcv.map_item_operand = (int *) (extra_data[0]);
gcv.need_recheck = recheck;
res = TS_execute(
GETQUERY(query),
res = TS_execute(GETQUERY(query),
&gcv,
true,
checkcondition_gin
);
checkcondition_gin);
}
PG_RETURN_BOOL(res);

View File

@ -525,7 +525,8 @@ tsvector_concat(PG_FUNCTION_ARGS)
/*
* Compare two strings by tsvector rules.
* if isPrefix = true then it returns not-zero value if b has prefix a
*
* if isPrefix = true then it returns zero value iff b has prefix a
*/
int4
tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
@ -535,8 +536,7 @@ tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
if (lena == 0)
{
if (prefix)
cmp = 0; /* emtry string is equal to any if a prefix
* match */
cmp = 0; /* empty string is prefix of anything */
else
cmp = (lenb > 0) ? -1 : 0;
}
@ -551,14 +551,9 @@ tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
if (prefix)
{
if (cmp == 0 && lena > lenb)
{
/*
* b argument is not beginning with argument a
*/
cmp = 1;
}
cmp = 1; /* a is longer, so not a prefix of b */
}
else if ((cmp == 0) && (lena != lenb))
else if (cmp == 0 && lena != lenb)
{
cmp = (lena < lenb) ? -1 : 1;
}
@ -650,13 +645,13 @@ checkcondition_str(void *checkval, QueryOperand *val)
}
/*
* check for boolean condition.
* Evaluate tsquery boolean expression.
*
* if calcnot is false, NOT expressions are always evaluated to be true. This is used in ranking.
* chkcond is a callback function used to evaluate each VAL node in the query.
* checkval can be used to pass information to the callback. TS_execute doesn't
* do anything with it.
* chkcond is a callback function used to evaluate each VAL node in the query.
*
* if calcnot is false, NOT expressions are always evaluated to be true. This
* is used in ranking.
*/
bool
TS_execute(QueryItem *curitem, void *checkval, bool calcnot,
@ -675,6 +670,7 @@ TS_execute(QueryItem *curitem, void *checkval, bool calcnot,
return !TS_execute(curitem + 1, checkval, calcnot, chkcond);
else
return true;
case OP_AND:
if (TS_execute(curitem + curitem->qoperator.left, checkval, calcnot, chkcond))
return TS_execute(curitem + 1, checkval, calcnot, chkcond);
@ -695,6 +691,55 @@ TS_execute(QueryItem *curitem, void *checkval, bool calcnot,
return false;
}
/*
* Detect whether a tsquery boolean expression requires any positive matches
* to values shown in the tsquery.
*
* This is needed to know whether a GIN index search requires full index scan.
* For example, 'x & !y' requires a match of x, so it's sufficient to scan
* entries for x; but 'x | !y' could match rows containing neither x nor y.
*/
bool
tsquery_requires_match(QueryItem *curitem)
{
/* since this function recurses, it could be driven to stack overflow */
check_stack_depth();
if (curitem->type == QI_VAL)
return true;
switch (curitem->qoperator.oper)
{
case OP_NOT:
/*
* Assume there are no required matches underneath a NOT. For
* some cases with nested NOTs, we could prove there's a required
* match, but it seems unlikely to be worth the trouble.
*/
return false;
case OP_AND:
/* If either side requires a match, we're good */
if (tsquery_requires_match(curitem + curitem->qoperator.left))
return true;
else
return tsquery_requires_match(curitem + 1);
case OP_OR:
/* Both sides must require a match */
if (tsquery_requires_match(curitem + curitem->qoperator.left))
return tsquery_requires_match(curitem + 1);
else
return false;
default:
elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
}
/* not reachable, but keep compiler quiet */
return false;
}
/*
* boolean operations
*/

View File

@ -104,9 +104,9 @@ extern text *generateHeadline(HeadlineParsedText *prs);
/*
* Common check function for tsvector @@ tsquery
*/
extern bool TS_execute(QueryItem *curitem, void *checkval, bool calcnot,
bool (*chkcond) (void *checkval, QueryOperand *val));
extern bool tsquery_requires_match(QueryItem *curitem);
/*
* to_ts* - text transformation to tsvector, tsquery