From 52fd2d65a33c9c33b29788e9df89d7716f1ec0bc Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sun, 9 Jan 2011 14:34:50 -0500 Subject: [PATCH] Fix up core tsquery GIN support for new extractQuery API. No need for the empty-prefix-match kluge to force a full scan anymore. --- src/backend/utils/adt/tsginidx.c | 74 ++++++++++++++--------------- src/backend/utils/adt/tsvector_op.c | 73 ++++++++++++++++++++++------ src/include/tsearch/ts_utils.h | 2 +- 3 files changed, 96 insertions(+), 53 deletions(-) diff --git a/src/backend/utils/adt/tsginidx.c b/src/backend/utils/adt/tsginidx.c index 9e7ca66132..56cd9b70d2 100644 --- a/src/backend/utils/adt/tsginidx.c +++ b/src/backend/utils/adt/tsginidx.c @@ -13,6 +13,7 @@ */ #include "postgres.h" +#include "access/gin.h" #include "access/skey.h" #include "tsearch/ts_type.h" #include "tsearch/ts_utils.h" @@ -26,8 +27,7 @@ gin_cmp_tslexeme(PG_FUNCTION_ARGS) text *b = PG_GETARG_TEXT_PP(1); int cmp; - cmp = tsCompareString( - VARDATA_ANY(a), VARSIZE_ANY_EXHDR(a), + cmp = tsCompareString(VARDATA_ANY(a), VARSIZE_ANY_EXHDR(a), VARDATA_ANY(b), VARSIZE_ANY_EXHDR(b), false); @@ -48,8 +48,7 @@ gin_cmp_prefix(PG_FUNCTION_ARGS) #endif int cmp; - cmp = tsCompareString( - VARDATA_ANY(a), VARSIZE_ANY_EXHDR(a), + cmp = tsCompareString(VARDATA_ANY(a), VARSIZE_ANY_EXHDR(a), VARDATA_ANY(b), VARSIZE_ANY_EXHDR(b), true); @@ -96,71 +95,72 @@ gin_extract_tsquery(PG_FUNCTION_ARGS) { TSQuery query = PG_GETARG_TSQUERY(0); int32 *nentries = (int32 *) PG_GETARG_POINTER(1); - /* StrategyNumber strategy = PG_GETARG_UINT16(2); */ bool **ptr_partialmatch = (bool **) PG_GETARG_POINTER(3); Pointer **extra_data = (Pointer **) PG_GETARG_POINTER(4); + /* bool **nullFlags = (bool **) PG_GETARG_POINTER(5); */ + int32 *searchMode = (int32 *) PG_GETARG_POINTER(6); Datum *entries = NULL; - bool *partialmatch; *nentries = 0; if (query->size > 0) { + QueryItem *item = GETQUERY(query); int4 i, - j = 0, - len; - QueryItem *item; - bool use_fullscan = false; + j; + bool *partialmatch; int *map_item_operand; - item = clean_NOT(GETQUERY(query), &len); - if (!item) - { - use_fullscan = true; - *nentries = 1; - } - - item = GETQUERY(query); + /* + * If the query doesn't have any required positive matches (for + * instance, it's something like '! foo'), we have to do a full + * index scan. + */ + if (tsquery_requires_match(item)) + *searchMode = GIN_SEARCH_MODE_DEFAULT; + else + *searchMode = GIN_SEARCH_MODE_ALL; + /* count number of VAL items */ + j = 0; for (i = 0; i < query->size; i++) + { if (item[i].type == QI_VAL) - (*nentries)++; + j++; + } + *nentries = j; - entries = (Datum *) palloc(sizeof(Datum) * (*nentries)); - partialmatch = *ptr_partialmatch = (bool *) palloc(sizeof(bool) * (*nentries)); + entries = (Datum *) palloc(sizeof(Datum) * j); + partialmatch = *ptr_partialmatch = (bool *) palloc(sizeof(bool) * j); /* * Make map to convert item's number to corresponding operand's (the * same, entry's) number. Entry's number is used in check array in * consistent method. We use the same map for each entry. */ - *extra_data = (Pointer *) palloc0(sizeof(Pointer) * (*nentries)); - map_item_operand = palloc0(sizeof(int) * (query->size + 1)); + *extra_data = (Pointer *) palloc(sizeof(Pointer) * j); + map_item_operand = (int *) palloc0(sizeof(int) * query->size); + /* Now rescan the VAL items and fill in the arrays */ + j = 0; for (i = 0; i < query->size; i++) + { if (item[i].type == QI_VAL) { - text *txt; QueryOperand *val = &item[i].qoperand; + text *txt; txt = cstring_to_text_with_len(GETOPERAND(query) + val->distance, val->length); + entries[j] = PointerGetDatum(txt); + partialmatch[j] = val->prefix; (*extra_data)[j] = (Pointer) map_item_operand; map_item_operand[i] = j; - partialmatch[j] = val->prefix; - entries[j++] = PointerGetDatum(txt); + j++; } - - if (use_fullscan) - { - (*extra_data)[j] = (Pointer) map_item_operand; - map_item_operand[i] = j; - entries[j++] = PointerGetDatum(cstring_to_text_with_len("", 0)); } } - else - *nentries = -1; /* nothing can be found */ PG_FREE_IF_COPY(query, 0); @@ -222,12 +222,10 @@ gin_tsquery_consistent(PG_FUNCTION_ARGS) gcv.map_item_operand = (int *) (extra_data[0]); gcv.need_recheck = recheck; - res = TS_execute( - GETQUERY(query), + res = TS_execute(GETQUERY(query), &gcv, true, - checkcondition_gin - ); + checkcondition_gin); } PG_RETURN_BOOL(res); diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c index 38c1401398..b7a822d354 100644 --- a/src/backend/utils/adt/tsvector_op.c +++ b/src/backend/utils/adt/tsvector_op.c @@ -525,7 +525,8 @@ tsvector_concat(PG_FUNCTION_ARGS) /* * Compare two strings by tsvector rules. - * if isPrefix = true then it returns not-zero value if b has prefix a + * + * if isPrefix = true then it returns zero value iff b has prefix a */ int4 tsCompareString(char *a, int lena, char *b, int lenb, bool prefix) @@ -535,8 +536,7 @@ tsCompareString(char *a, int lena, char *b, int lenb, bool prefix) if (lena == 0) { if (prefix) - cmp = 0; /* emtry string is equal to any if a prefix - * match */ + cmp = 0; /* empty string is prefix of anything */ else cmp = (lenb > 0) ? -1 : 0; } @@ -551,14 +551,9 @@ tsCompareString(char *a, int lena, char *b, int lenb, bool prefix) if (prefix) { if (cmp == 0 && lena > lenb) - { - /* - * b argument is not beginning with argument a - */ - cmp = 1; - } + cmp = 1; /* a is longer, so not a prefix of b */ } - else if ((cmp == 0) && (lena != lenb)) + else if (cmp == 0 && lena != lenb) { cmp = (lena < lenb) ? -1 : 1; } @@ -650,13 +645,13 @@ checkcondition_str(void *checkval, QueryOperand *val) } /* - * check for boolean condition. + * Evaluate tsquery boolean expression. * - * if calcnot is false, NOT expressions are always evaluated to be true. This is used in ranking. + * chkcond is a callback function used to evaluate each VAL node in the query. * checkval can be used to pass information to the callback. TS_execute doesn't * do anything with it. - * chkcond is a callback function used to evaluate each VAL node in the query. - * + * if calcnot is false, NOT expressions are always evaluated to be true. This + * is used in ranking. */ bool TS_execute(QueryItem *curitem, void *checkval, bool calcnot, @@ -675,6 +670,7 @@ TS_execute(QueryItem *curitem, void *checkval, bool calcnot, return !TS_execute(curitem + 1, checkval, calcnot, chkcond); else return true; + case OP_AND: if (TS_execute(curitem + curitem->qoperator.left, checkval, calcnot, chkcond)) return TS_execute(curitem + 1, checkval, calcnot, chkcond); @@ -695,6 +691,55 @@ TS_execute(QueryItem *curitem, void *checkval, bool calcnot, return false; } +/* + * Detect whether a tsquery boolean expression requires any positive matches + * to values shown in the tsquery. + * + * This is needed to know whether a GIN index search requires full index scan. + * For example, 'x & !y' requires a match of x, so it's sufficient to scan + * entries for x; but 'x | !y' could match rows containing neither x nor y. + */ +bool +tsquery_requires_match(QueryItem *curitem) +{ + /* since this function recurses, it could be driven to stack overflow */ + check_stack_depth(); + + if (curitem->type == QI_VAL) + return true; + + switch (curitem->qoperator.oper) + { + case OP_NOT: + /* + * Assume there are no required matches underneath a NOT. For + * some cases with nested NOTs, we could prove there's a required + * match, but it seems unlikely to be worth the trouble. + */ + return false; + + case OP_AND: + /* If either side requires a match, we're good */ + if (tsquery_requires_match(curitem + curitem->qoperator.left)) + return true; + else + return tsquery_requires_match(curitem + 1); + + case OP_OR: + /* Both sides must require a match */ + if (tsquery_requires_match(curitem + curitem->qoperator.left)) + return tsquery_requires_match(curitem + 1); + else + return false; + + default: + elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper); + } + + /* not reachable, but keep compiler quiet */ + return false; +} + /* * boolean operations */ diff --git a/src/include/tsearch/ts_utils.h b/src/include/tsearch/ts_utils.h index 62890aabb7..1bd4034488 100644 --- a/src/include/tsearch/ts_utils.h +++ b/src/include/tsearch/ts_utils.h @@ -104,9 +104,9 @@ extern text *generateHeadline(HeadlineParsedText *prs); /* * Common check function for tsvector @@ tsquery */ - extern bool TS_execute(QueryItem *curitem, void *checkval, bool calcnot, bool (*chkcond) (void *checkval, QueryOperand *val)); +extern bool tsquery_requires_match(QueryItem *curitem); /* * to_ts* - text transformation to tsvector, tsquery