diff --git a/src/backend/tsearch/wparser_def.c b/src/backend/tsearch/wparser_def.c index 76b6f9aef0..7b29062a97 100644 --- a/src/backend/tsearch/wparser_def.c +++ b/src/backend/tsearch/wparser_def.c @@ -2003,24 +2003,14 @@ checkcondition_HL(void *opaque, QueryOperand *val, ExecPhraseData *data) * Returns -1 if no such index */ static int -hlFirstIndex(HeadlineParsedText *prs, TSQuery query, int pos) +hlFirstIndex(HeadlineParsedText *prs, int pos) { int i; - /* For each word ... */ for (i = pos; i < prs->curwords; i++) { - /* ... scan the query to see if this word matches any operand */ - QueryItem *item = GETQUERY(query); - int j; - - for (j = 0; j < query->size; j++) - { - if (item->type == QI_VAL && - prs->words[i].item == &item->qoperand) - return i; - item++; - } + if (prs->words[i].item != NULL) + return i; } return -1; } @@ -2028,8 +2018,14 @@ hlFirstIndex(HeadlineParsedText *prs, TSQuery query, int pos) /* * hlCover: try to find a substring of prs' word list that satisfies query * - * At entry, *p must be the first word index to consider (initialize this to - * zero, or to the next index after a previous successful search). + * At entry, *p must be the first word index to consider (initialize this + * to zero, or to the next index after a previous successful search). + * We will consider all substrings starting at or after that word, and + * containing no more than max_cover words. (We need a length limit to + * keep this from taking O(N^2) time for a long document with many query + * words but few complete matches. Actually, since checkcondition_HL is + * roughly O(N) in the length of the substring being checked, it's even + * worse than that.) * * On success, sets *p to first word index and *q to last word index of the * cover substring, and returns true. @@ -2038,7 +2034,8 @@ hlFirstIndex(HeadlineParsedText *prs, TSQuery query, int pos) * words used in the query. */ static bool -hlCover(HeadlineParsedText *prs, TSQuery query, int *p, int *q) +hlCover(HeadlineParsedText *prs, TSQuery query, int max_cover, + int *p, int *q) { int pmin, pmax, @@ -2052,7 +2049,7 @@ hlCover(HeadlineParsedText *prs, TSQuery query, int *p, int *q) * appearing in the query; there's no point in trying endpoints in between * such points. */ - pmin = hlFirstIndex(prs, query, *p); + pmin = hlFirstIndex(prs, *p); while (pmin >= 0) { /* This useless assignment just keeps stupider compilers quiet */ @@ -2073,7 +2070,7 @@ hlCover(HeadlineParsedText *prs, TSQuery query, int *p, int *q) return true; } /* Nope, so advance pmax to next feasible endpoint */ - nextpmax = hlFirstIndex(prs, query, pmax + 1); + nextpmax = hlFirstIndex(prs, pmax + 1); /* * If this is our first advance past pmin, then the result is also @@ -2084,7 +2081,7 @@ hlCover(HeadlineParsedText *prs, TSQuery query, int *p, int *q) nextpmin = nextpmax; pmax = nextpmax; } - while (pmax >= 0); + while (pmax >= 0 && pmax - pmin < max_cover); /* No luck here, so try next feasible startpoint */ pmin = nextpmin; } @@ -2186,7 +2183,7 @@ get_next_fragment(HeadlineParsedText *prs, int *startpos, int *endpos, static void mark_hl_fragments(HeadlineParsedText *prs, TSQuery query, bool highlightall, int shortword, int min_words, - int max_words, int max_fragments) + int max_words, int max_fragments, int max_cover) { int32 poslen, curlen, @@ -2213,7 +2210,7 @@ mark_hl_fragments(HeadlineParsedText *prs, TSQuery query, bool highlightall, covers = palloc(maxcovers * sizeof(CoverPos)); /* get all covers */ - while (hlCover(prs, query, &p, &q)) + while (hlCover(prs, query, max_cover, &p, &q)) { startpos = p; endpos = q; @@ -2368,7 +2365,7 @@ mark_hl_fragments(HeadlineParsedText *prs, TSQuery query, bool highlightall, */ static void mark_hl_words(HeadlineParsedText *prs, TSQuery query, bool highlightall, - int shortword, int min_words, int max_words) + int shortword, int min_words, int max_words, int max_cover) { int p = 0, q = 0; @@ -2386,7 +2383,7 @@ mark_hl_words(HeadlineParsedText *prs, TSQuery query, bool highlightall, if (!highlightall) { /* examine all covers, select a headline using the best one */ - while (hlCover(prs, query, &p, &q)) + while (hlCover(prs, query, max_cover, &p, &q)) { /* * Count words (curlen) and interesting words (poslen) within @@ -2542,6 +2539,7 @@ prsd_headline(PG_FUNCTION_ARGS) int shortword = 3; int max_fragments = 0; bool highlightall = false; + int max_cover; ListCell *l; /* Extract configuration option values */ @@ -2581,6 +2579,15 @@ prsd_headline(PG_FUNCTION_ARGS) defel->defname))); } + /* + * We might eventually make max_cover a user-settable parameter, but for + * now, just compute a reasonable value based on max_words and + * max_fragments. + */ + max_cover = Max(max_words * 10, 100); + if (max_fragments > 0) + max_cover *= max_fragments; + /* in HighlightAll mode these parameters are ignored */ if (!highlightall) { @@ -2605,10 +2612,10 @@ prsd_headline(PG_FUNCTION_ARGS) /* Apply appropriate headline selector */ if (max_fragments == 0) mark_hl_words(prs, query, highlightall, shortword, - min_words, max_words); + min_words, max_words, max_cover); else mark_hl_fragments(prs, query, highlightall, shortword, - min_words, max_words, max_fragments); + min_words, max_words, max_fragments, max_cover); /* Fill in default values for string options */ if (!prs->startsel) diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c index f01b1ee253..756a48a167 100644 --- a/src/backend/utils/adt/tsvector_op.c +++ b/src/backend/utils/adt/tsvector_op.c @@ -1868,6 +1868,9 @@ TS_execute_recurse(QueryItem *curitem, void *arg, uint32 flags, /* since this function recurses, it could be driven to stack overflow */ check_stack_depth(); + /* ... and let's check for query cancel while we're at it */ + CHECK_FOR_INTERRUPTS(); + if (curitem->type == QI_VAL) return chkcond(arg, (QueryOperand *) curitem, NULL /* don't need position info */ );