diff --git a/src/backend/tsearch/ts_parse.c b/src/backend/tsearch/ts_parse.c index 1c0f94e797..6491f0a715 100644 --- a/src/backend/tsearch/ts_parse.c +++ b/src/backend/tsearch/ts_parse.c @@ -433,6 +433,8 @@ parsetext(Oid cfgId, ParsedText *prs, char *buf, int buflen) /* * Headline framework */ + +/* Add a word to prs->words[] */ static void hladdword(HeadlineParsedText *prs, char *buf, int buflen, int type) { @@ -449,6 +451,14 @@ hladdword(HeadlineParsedText *prs, char *buf, int buflen, int type) prs->curwords++; } +/* + * Add pos and matching-query-item data to the just-added word. + * Here, buf/buflen represent a processed lexeme, not raw token text. + * + * If the query contains more than one matching item, we replicate + * the last-added word so that each item can be pointed to. The + * duplicate entries are marked with repeated = 1. + */ static void hlfinditem(HeadlineParsedText *prs, TSQuery query, int32 pos, char *buf, int buflen) { @@ -590,6 +600,9 @@ hlparsetext(Oid cfgId, HeadlineParsedText *prs, TSQuery query, char *buf, int bu FunctionCall1(&(prsobj->prsend), PointerGetDatum(prsdata)); } +/* + * Generate the headline, as a text object, from HeadlineParsedText. + */ text * generateHeadline(HeadlineParsedText *prs) { diff --git a/src/backend/tsearch/wparser_def.c b/src/backend/tsearch/wparser_def.c index 2915f3b298..fd5b81279a 100644 --- a/src/backend/tsearch/wparser_def.c +++ b/src/backend/tsearch/wparser_def.c @@ -1921,10 +1921,6 @@ prsd_end(PG_FUNCTION_ARGS) */ /* token type classification macros */ -#define LEAVETOKEN(x) ( (x)==SPACE ) -#define COMPLEXTOKEN(x) ( (x)==URL_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD ) -#define ENDPUNCTOKEN(x) ( (x)==SPACE ) - #define TS_IDIGNORE(x) ( (x)==TAG_T || (x)==PROTOCOL || (x)==SPACE || (x)==XMLENTITY ) #define HLIDREPLACE(x) ( (x)==TAG_T ) #define HLIDSKIP(x) ( (x)==URL_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD ) diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c index 69fdd2fe36..cc3e3c1505 100644 --- a/src/backend/utils/adt/tsvector_op.c +++ b/src/backend/utils/adt/tsvector_op.c @@ -1616,6 +1616,9 @@ TS_phrase_execute(QueryItem *curitem, void *arg, uint32 flags, /* since this function recurses, it could be driven to stack overflow */ check_stack_depth(); + /* ... and let's check for query cancel while we're at it */ + CHECK_FOR_INTERRUPTS(); + if (curitem->type == QI_VAL) return chkcond(arg, (QueryOperand *) curitem, data); diff --git a/src/include/tsearch/ts_public.h b/src/include/tsearch/ts_public.h index 63aca2b1f5..1d6dfe8c59 100644 --- a/src/include/tsearch/ts_public.h +++ b/src/include/tsearch/ts_public.h @@ -30,33 +30,60 @@ typedef struct } LexDescr; /* - * Interface to headline generator + * Interface to headline generator (tsparser's prsheadline function) + * + * HeadlineParsedText describes the text that is to be highlighted. + * Some fields are passed from the core code to the prsheadline function, + * while others are output from the prsheadline function. + * + * The principal data is words[], an array of HeadlineWordEntry, + * one entry per token, of length curwords. + * The fields of HeadlineWordEntry are: + * + * in, selected, replace, skip: these flags are initially zero + * and may be set by the prsheadline function. A consecutive group + * of tokens marked "in" form a "fragment" to be output. + * Such tokens may additionally be marked selected, replace, or skip + * to modify how they are shown. (If you set more than one of those + * bits, you get an unspecified one of those behaviors.) + * + * type, len, pos, word: filled by core code to describe the token. + * + * item: if the token matches any operand of the tsquery of interest, + * a pointer to such an operand. (If there are multiple matching + * operands, we generate extra copies of the HeadlineWordEntry to hold + * all the pointers. The extras are marked with repeated = 1 and should + * be ignored except for checking the item pointer.) */ typedef struct { - uint32 selected:1, - in:1, - replace:1, - repeated:1, - skip:1, - unused:3, - type:8, - len:16; - WordEntryPos pos; - char *word; - QueryOperand *item; + uint32 selected:1, /* token is to be highlighted */ + in:1, /* token is part of headline */ + replace:1, /* token is to be replaced with a space */ + repeated:1, /* duplicate entry to hold item pointer */ + skip:1, /* token is to be skipped (not output) */ + unused:3, /* available bits */ + type:8, /* parser's token category */ + len:16; /* length of token */ + WordEntryPos pos; /* position of token */ + char *word; /* text of token (not null-terminated) */ + QueryOperand *item; /* a matching query operand, or NULL if none */ } HeadlineWordEntry; typedef struct { + /* Fields filled by core code before calling prsheadline function: */ HeadlineWordEntry *words; - int32 lenwords; - int32 curwords; - int32 vectorpos; /* positions a-la tsvector */ - char *startsel; + int32 lenwords; /* allocated length of words[] */ + int32 curwords; /* current number of valid entries */ + int32 vectorpos; /* used by ts_parse.c in filling pos fields */ + + /* The prsheadline function must fill these fields: */ + /* Strings for marking selected tokens and separating fragments: */ + char *startsel; /* palloc'd strings */ char *stopsel; char *fragdelim; - int16 startsellen; + int16 startsellen; /* lengths of strings */ int16 stopsellen; int16 fragdelimlen; } HeadlineParsedText;