Fix up core tsquery GIN support for new extractQuery API.

No need for the empty-prefix-match kluge to force a full scan anymore.
This commit is contained in:
Tom Lane 2011-01-09 14:34:50 -05:00
parent 304845075c
commit 52fd2d65a3
3 changed files with 96 additions and 53 deletions

View File

@ -13,6 +13,7 @@
*/ */
#include "postgres.h" #include "postgres.h"
#include "access/gin.h"
#include "access/skey.h" #include "access/skey.h"
#include "tsearch/ts_type.h" #include "tsearch/ts_type.h"
#include "tsearch/ts_utils.h" #include "tsearch/ts_utils.h"
@ -26,8 +27,7 @@ gin_cmp_tslexeme(PG_FUNCTION_ARGS)
text *b = PG_GETARG_TEXT_PP(1); text *b = PG_GETARG_TEXT_PP(1);
int cmp; int cmp;
cmp = tsCompareString( cmp = tsCompareString(VARDATA_ANY(a), VARSIZE_ANY_EXHDR(a),
VARDATA_ANY(a), VARSIZE_ANY_EXHDR(a),
VARDATA_ANY(b), VARSIZE_ANY_EXHDR(b), VARDATA_ANY(b), VARSIZE_ANY_EXHDR(b),
false); false);
@ -48,8 +48,7 @@ gin_cmp_prefix(PG_FUNCTION_ARGS)
#endif #endif
int cmp; int cmp;
cmp = tsCompareString( cmp = tsCompareString(VARDATA_ANY(a), VARSIZE_ANY_EXHDR(a),
VARDATA_ANY(a), VARSIZE_ANY_EXHDR(a),
VARDATA_ANY(b), VARSIZE_ANY_EXHDR(b), VARDATA_ANY(b), VARSIZE_ANY_EXHDR(b),
true); true);
@ -96,71 +95,72 @@ gin_extract_tsquery(PG_FUNCTION_ARGS)
{ {
TSQuery query = PG_GETARG_TSQUERY(0); TSQuery query = PG_GETARG_TSQUERY(0);
int32 *nentries = (int32 *) PG_GETARG_POINTER(1); int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
/* StrategyNumber strategy = PG_GETARG_UINT16(2); */ /* StrategyNumber strategy = PG_GETARG_UINT16(2); */
bool **ptr_partialmatch = (bool **) PG_GETARG_POINTER(3); bool **ptr_partialmatch = (bool **) PG_GETARG_POINTER(3);
Pointer **extra_data = (Pointer **) PG_GETARG_POINTER(4); Pointer **extra_data = (Pointer **) PG_GETARG_POINTER(4);
/* bool **nullFlags = (bool **) PG_GETARG_POINTER(5); */
int32 *searchMode = (int32 *) PG_GETARG_POINTER(6);
Datum *entries = NULL; Datum *entries = NULL;
bool *partialmatch;
*nentries = 0; *nentries = 0;
if (query->size > 0) if (query->size > 0)
{ {
QueryItem *item = GETQUERY(query);
int4 i, int4 i,
j = 0, j;
len; bool *partialmatch;
QueryItem *item;
bool use_fullscan = false;
int *map_item_operand; int *map_item_operand;
item = clean_NOT(GETQUERY(query), &len); /*
if (!item) * If the query doesn't have any required positive matches (for
{ * instance, it's something like '! foo'), we have to do a full
use_fullscan = true; * index scan.
*nentries = 1; */
} if (tsquery_requires_match(item))
*searchMode = GIN_SEARCH_MODE_DEFAULT;
item = GETQUERY(query); else
*searchMode = GIN_SEARCH_MODE_ALL;
/* count number of VAL items */
j = 0;
for (i = 0; i < query->size; i++) for (i = 0; i < query->size; i++)
{
if (item[i].type == QI_VAL) if (item[i].type == QI_VAL)
(*nentries)++; j++;
}
*nentries = j;
entries = (Datum *) palloc(sizeof(Datum) * (*nentries)); entries = (Datum *) palloc(sizeof(Datum) * j);
partialmatch = *ptr_partialmatch = (bool *) palloc(sizeof(bool) * (*nentries)); partialmatch = *ptr_partialmatch = (bool *) palloc(sizeof(bool) * j);
/* /*
* Make map to convert item's number to corresponding operand's (the * Make map to convert item's number to corresponding operand's (the
* same, entry's) number. Entry's number is used in check array in * same, entry's) number. Entry's number is used in check array in
* consistent method. We use the same map for each entry. * consistent method. We use the same map for each entry.
*/ */
*extra_data = (Pointer *) palloc0(sizeof(Pointer) * (*nentries)); *extra_data = (Pointer *) palloc(sizeof(Pointer) * j);
map_item_operand = palloc0(sizeof(int) * (query->size + 1)); map_item_operand = (int *) palloc0(sizeof(int) * query->size);
/* Now rescan the VAL items and fill in the arrays */
j = 0;
for (i = 0; i < query->size; i++) for (i = 0; i < query->size; i++)
{
if (item[i].type == QI_VAL) if (item[i].type == QI_VAL)
{ {
text *txt;
QueryOperand *val = &item[i].qoperand; QueryOperand *val = &item[i].qoperand;
text *txt;
txt = cstring_to_text_with_len(GETOPERAND(query) + val->distance, txt = cstring_to_text_with_len(GETOPERAND(query) + val->distance,
val->length); val->length);
entries[j] = PointerGetDatum(txt);
partialmatch[j] = val->prefix;
(*extra_data)[j] = (Pointer) map_item_operand; (*extra_data)[j] = (Pointer) map_item_operand;
map_item_operand[i] = j; map_item_operand[i] = j;
partialmatch[j] = val->prefix; j++;
entries[j++] = PointerGetDatum(txt);
} }
if (use_fullscan)
{
(*extra_data)[j] = (Pointer) map_item_operand;
map_item_operand[i] = j;
entries[j++] = PointerGetDatum(cstring_to_text_with_len("", 0));
} }
} }
else
*nentries = -1; /* nothing can be found */
PG_FREE_IF_COPY(query, 0); PG_FREE_IF_COPY(query, 0);
@ -222,12 +222,10 @@ gin_tsquery_consistent(PG_FUNCTION_ARGS)
gcv.map_item_operand = (int *) (extra_data[0]); gcv.map_item_operand = (int *) (extra_data[0]);
gcv.need_recheck = recheck; gcv.need_recheck = recheck;
res = TS_execute( res = TS_execute(GETQUERY(query),
GETQUERY(query),
&gcv, &gcv,
true, true,
checkcondition_gin checkcondition_gin);
);
} }
PG_RETURN_BOOL(res); PG_RETURN_BOOL(res);

View File

@ -525,7 +525,8 @@ tsvector_concat(PG_FUNCTION_ARGS)
/* /*
* Compare two strings by tsvector rules. * Compare two strings by tsvector rules.
* if isPrefix = true then it returns not-zero value if b has prefix a *
* if isPrefix = true then it returns zero value iff b has prefix a
*/ */
int4 int4
tsCompareString(char *a, int lena, char *b, int lenb, bool prefix) tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
@ -535,8 +536,7 @@ tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
if (lena == 0) if (lena == 0)
{ {
if (prefix) if (prefix)
cmp = 0; /* emtry string is equal to any if a prefix cmp = 0; /* empty string is prefix of anything */
* match */
else else
cmp = (lenb > 0) ? -1 : 0; cmp = (lenb > 0) ? -1 : 0;
} }
@ -551,14 +551,9 @@ tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
if (prefix) if (prefix)
{ {
if (cmp == 0 && lena > lenb) if (cmp == 0 && lena > lenb)
{ cmp = 1; /* a is longer, so not a prefix of b */
/*
* b argument is not beginning with argument a
*/
cmp = 1;
}
} }
else if ((cmp == 0) && (lena != lenb)) else if (cmp == 0 && lena != lenb)
{ {
cmp = (lena < lenb) ? -1 : 1; cmp = (lena < lenb) ? -1 : 1;
} }
@ -650,13 +645,13 @@ checkcondition_str(void *checkval, QueryOperand *val)
} }
/* /*
* check for boolean condition. * Evaluate tsquery boolean expression.
* *
* if calcnot is false, NOT expressions are always evaluated to be true. This is used in ranking. * chkcond is a callback function used to evaluate each VAL node in the query.
* checkval can be used to pass information to the callback. TS_execute doesn't * checkval can be used to pass information to the callback. TS_execute doesn't
* do anything with it. * do anything with it.
* chkcond is a callback function used to evaluate each VAL node in the query. * if calcnot is false, NOT expressions are always evaluated to be true. This
* * is used in ranking.
*/ */
bool bool
TS_execute(QueryItem *curitem, void *checkval, bool calcnot, TS_execute(QueryItem *curitem, void *checkval, bool calcnot,
@ -675,6 +670,7 @@ TS_execute(QueryItem *curitem, void *checkval, bool calcnot,
return !TS_execute(curitem + 1, checkval, calcnot, chkcond); return !TS_execute(curitem + 1, checkval, calcnot, chkcond);
else else
return true; return true;
case OP_AND: case OP_AND:
if (TS_execute(curitem + curitem->qoperator.left, checkval, calcnot, chkcond)) if (TS_execute(curitem + curitem->qoperator.left, checkval, calcnot, chkcond))
return TS_execute(curitem + 1, checkval, calcnot, chkcond); return TS_execute(curitem + 1, checkval, calcnot, chkcond);
@ -695,6 +691,55 @@ TS_execute(QueryItem *curitem, void *checkval, bool calcnot,
return false; return false;
} }
/*
* Detect whether a tsquery boolean expression requires any positive matches
* to values shown in the tsquery.
*
* This is needed to know whether a GIN index search requires full index scan.
* For example, 'x & !y' requires a match of x, so it's sufficient to scan
* entries for x; but 'x | !y' could match rows containing neither x nor y.
*/
bool
tsquery_requires_match(QueryItem *curitem)
{
/* since this function recurses, it could be driven to stack overflow */
check_stack_depth();
if (curitem->type == QI_VAL)
return true;
switch (curitem->qoperator.oper)
{
case OP_NOT:
/*
* Assume there are no required matches underneath a NOT. For
* some cases with nested NOTs, we could prove there's a required
* match, but it seems unlikely to be worth the trouble.
*/
return false;
case OP_AND:
/* If either side requires a match, we're good */
if (tsquery_requires_match(curitem + curitem->qoperator.left))
return true;
else
return tsquery_requires_match(curitem + 1);
case OP_OR:
/* Both sides must require a match */
if (tsquery_requires_match(curitem + curitem->qoperator.left))
return tsquery_requires_match(curitem + 1);
else
return false;
default:
elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
}
/* not reachable, but keep compiler quiet */
return false;
}
/* /*
* boolean operations * boolean operations
*/ */

View File

@ -104,9 +104,9 @@ extern text *generateHeadline(HeadlineParsedText *prs);
/* /*
* Common check function for tsvector @@ tsquery * Common check function for tsvector @@ tsquery
*/ */
extern bool TS_execute(QueryItem *curitem, void *checkval, bool calcnot, extern bool TS_execute(QueryItem *curitem, void *checkval, bool calcnot,
bool (*chkcond) (void *checkval, QueryOperand *val)); bool (*chkcond) (void *checkval, QueryOperand *val));
extern bool tsquery_requires_match(QueryItem *curitem);
/* /*
* to_ts* - text transformation to tsvector, tsquery * to_ts* - text transformation to tsvector, tsquery