postgresql/contrib/tsearch2/wparser_def.c

/*
 * default word parser
 * Teodor Sigaev <teodor@sigaev.ru>
 */
#include <errno.h>
#include <stdlib.h>
#include <string.h>

#include "postgres.h"
#include "utils/builtins.h"

#include "dict.h"
#include "wparser.h"
#include "common.h"
#include "ts_cfg.h"
#include "wordparser/parser.h"
#include "wordparser/deflex.h"

PG_FUNCTION_INFO_V1(prsd_lextype);
Datum		prsd_lextype(PG_FUNCTION_ARGS);

Datum
prsd_lextype(PG_FUNCTION_ARGS)
{
	LexDescr   *descr = (LexDescr *) palloc(sizeof(LexDescr) * (LASTNUM + 1));
	int			i;

	for (i = 1; i <= LASTNUM; i++)
	{
		descr[i - 1].lexid = i;
		descr[i - 1].alias = pstrdup(tok_alias[i]);
		descr[i - 1].descr = pstrdup(lex_descr[i]);
	}

	descr[LASTNUM].lexid = 0;

	PG_RETURN_POINTER(descr);
}

PG_FUNCTION_INFO_V1(prsd_start);
Datum		prsd_start(PG_FUNCTION_ARGS);
Datum
prsd_start(PG_FUNCTION_ARGS)
{
	tsearch2_start_parse_str((char *) PG_GETARG_POINTER(0), PG_GETARG_INT32(1));
	PG_RETURN_POINTER(NULL);
}

PG_FUNCTION_INFO_V1(prsd_getlexeme);
Datum		prsd_getlexeme(PG_FUNCTION_ARGS);
Datum
prsd_getlexeme(PG_FUNCTION_ARGS)
{
	/* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
	char	  **t = (char **) PG_GETARG_POINTER(1);
	int		   *tlen = (int *) PG_GETARG_POINTER(2);
	int			type = tsearch2_yylex();

	*t = token;
	*tlen = tokenlen;
	PG_RETURN_INT32(type);
}

PG_FUNCTION_INFO_V1(prsd_end);
Datum		prsd_end(PG_FUNCTION_ARGS);
Datum
prsd_end(PG_FUNCTION_ARGS)
{
	/* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
	tsearch2_end_parse();
	PG_RETURN_VOID();
}

#define LEAVETOKEN(x)	( (x)==12 )
#define COMPLEXTOKEN(x) ( (x)==5 || (x)==15 || (x)==16 || (x)==17 )
#define ENDPUNCTOKEN(x) ( (x)==12 )


#define IDIGNORE(x) ( (x)==13 || (x)==14 || (x)==12 || (x)==23 )
#define HLIDIGNORE(x) ( (x)==5 || (x)==13 || (x)==15 || (x)==16 || (x)==17 )
#define NONWORDTOKEN(x) ( (x)==12 || HLIDIGNORE(x) )
#define NOENDTOKEN(x)	( NONWORDTOKEN(x) || (x)==7 || (x)==8 || (x)==20 || (x)==21 || (x)==22 || IDIGNORE(x) )

typedef struct
{
	HLWORD	   *words;
	int			len;
}	hlCheck;

static bool
checkcondition_HL(void *checkval, ITEM * val)
{
	int			i;

	for (i = 0; i < ((hlCheck *) checkval)->len; i++)
	{
		if (((hlCheck *) checkval)->words[i].item == val)
			return true;
	}
	return false;
}


static bool
hlCover(HLPRSTEXT * prs, QUERYTYPE * query, int *p, int *q)
{
	int			i,
				j;
	ITEM	   *item = GETQUERY(query);
	int			pos = *p;

	*q = 0;
	*p = 0x7fffffff;

	for (j = 0; j < query->size; j++)
	{
		if (item->type != VAL)
		{
			item++;
			continue;
		}
		for (i = pos; i < prs->curwords; i++)
		{
			if (prs->words[i].item == item)
			{
				if (i > *q)
					*q = i;
				break;
			}
		}
		item++;
	}

	if (*q == 0)
		return false;

	item = GETQUERY(query);
	for (j = 0; j < query->size; j++)
	{
		if (item->type != VAL)
		{
			item++;
			continue;
		}
		for (i = *q; i >= pos; i--)
		{
			if (prs->words[i].item == item)
			{
				if (i < *p)
					*p = i;
				break;
			}
		}
		item++;
	}

	if (*p <= *q)
	{
		hlCheck		ch;

		ch.words = &(prs->words[*p]);
		ch.len = *q - *p + 1;
		if (TS_execute(GETQUERY(query), &ch, false, checkcondition_HL))
			return true;
		else
		{
			(*p)++;
			return hlCover(prs, query, p, q);
		}
	}

	return false;
}

PG_FUNCTION_INFO_V1(prsd_headline);
Datum		prsd_headline(PG_FUNCTION_ARGS);
Datum
prsd_headline(PG_FUNCTION_ARGS)
{
	HLPRSTEXT  *prs = (HLPRSTEXT *) PG_GETARG_POINTER(0);
	text	   *opt = (text *) PG_GETARG_POINTER(1);	/* can't be toasted */
	QUERYTYPE  *query = (QUERYTYPE *) PG_GETARG_POINTER(2);		/* can't be toasted */

	/* from opt + start and and tag */
	int			min_words = 15;
	int			max_words = 35;
	int			shortword = 3;

	int			p = 0,
				q = 0;
	int			bestb = -1,
				beste = -1;
	int			bestlen = -1;
	int			pose = 0, posb,
				poslen,
				curlen;

	int			i;

	/* config */
	prs->startsel = NULL;
	prs->stopsel = NULL;
	if (opt)
	{
		Map		   *map,
				   *mptr;

		parse_cfgdict(opt, &map);
		mptr = map;

		while (mptr && mptr->key)
		{
			if (strcasecmp(mptr->key, "MaxWords") == 0)
				max_words = pg_atoi(mptr->value, 4, 1);
			else if (strcasecmp(mptr->key, "MinWords") == 0)
				min_words = pg_atoi(mptr->value, 4, 1);
			else if (strcasecmp(mptr->key, "ShortWord") == 0)
				shortword = pg_atoi(mptr->value, 4, 1);
			else if (strcasecmp(mptr->key, "StartSel") == 0)
				prs->startsel = pstrdup(mptr->value);
			else if (strcasecmp(mptr->key, "StopSel") == 0)
				prs->stopsel = pstrdup(mptr->value);

			pfree(mptr->key);
			pfree(mptr->value);

			mptr++;
		}
		pfree(map);

		if (min_words >= max_words)
			ereport(ERROR,
					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
					 errmsg("MinWords should be less than MaxWords")));
		if (min_words <= 0)
			ereport(ERROR,
					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
					 errmsg("MinWords should be positive")));
		if (shortword < 0)
			ereport(ERROR,
					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
					 errmsg("ShortWord should be >= 0")));
	}

	while (hlCover(prs, query, &p, &q))
	{
		/* find cover len in words */
		curlen = 0;
		poslen = 0;
		for (i = p; i <= q && curlen < max_words; i++)
		{
			if (!NONWORDTOKEN(prs->words[i].type))
				curlen++;
			if (prs->words[i].item && !prs->words[i].repeated)
				poslen++;
			pose = i;
		}

		if (poslen < bestlen && !(NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword))
		{
			/* best already finded, so try one more cover */
			p++;
			continue;
		}

		posb=p;
		if (curlen < max_words)
		{						/* find good end */
			for (i = i - 1; i < prs->curwords && curlen < max_words; i++)
			{
				if (i != q)
				{
					if (!NONWORDTOKEN(prs->words[i].type))
						curlen++;
					if (prs->words[i].item && !prs->words[i].repeated)
						poslen++;
				}
				pose = i;
				if (NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword)
					continue;
				if (curlen >= min_words)
					break;
			}
			if ( curlen < min_words && i>=prs->curwords ) { /* got end of text and our cover is shoter than min_words */
				for(i=p; i>= 0; i--) {
					if (!NONWORDTOKEN(prs->words[i].type))
						curlen++;
					if (prs->words[i].item && !prs->words[i].repeated)
						poslen++;
					if (NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword)
						continue;
					if (curlen >= min_words)
						break;
				}
				posb=(i>=0) ? i : 0;
			}
		}
		else
		{						/* shorter cover :((( */
			for (; curlen > min_words; i--)
			{
				if (!NONWORDTOKEN(prs->words[i].type))
					curlen--;
				if (prs->words[i].item && !prs->words[i].repeated)
					poslen--;
				pose = i;
				if (NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword)
					continue;
				break;
			}
		}

		if (bestlen < 0 || (poslen > bestlen && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword)) ||
			(bestlen >= 0 && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword) &&
			 (NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword)))
		{
			bestb = posb;
			beste = pose;
			bestlen = poslen;
		}

		p++;
	}

	if (bestlen < 0)
	{
		curlen = 0;
		poslen = 0;
		for (i = 0; i < prs->curwords && curlen < min_words; i++)
		{
			if (!NONWORDTOKEN(prs->words[i].type))
				curlen++;
			pose = i;
		}
		bestb = 0;
		beste = pose;
	}

	for (i = bestb; i <= beste; i++)
	{
		if (prs->words[i].item)
			prs->words[i].selected = 1;
		if (prs->words[i].repeated)
			prs->words[i].skip = 1;
		if (HLIDIGNORE(prs->words[i].type))
			prs->words[i].replace = 1;

		prs->words[i].in = 1;
	}

	if (!prs->startsel)
		prs->startsel = pstrdup("<b>");
	if (!prs->stopsel)
		prs->stopsel = pstrdup("</b>");
	prs->startsellen = strlen(prs->startsel);
	prs->stopsellen = strlen(prs->stopsel);

	PG_RETURN_POINTER(prs);
}
pgindent run. 2003-08-04 02:43:34 +02:00			`/*`
			`* default word parser`
tsearch2 module 2003-07-21 12:27:44 +02:00			`* Teodor Sigaev <teodor@sigaev.ru>`
			`*/`
			`#include <errno.h>`
			`#include <stdlib.h>`
			`#include <string.h>`

			`#include "postgres.h"`
			`#include "utils/builtins.h"`

			`#include "dict.h"`
			`#include "wparser.h"`
			`#include "common.h"`
			`#include "ts_cfg.h"`
			`#include "wordparser/parser.h"`
			`#include "wordparser/deflex.h"`

			`PG_FUNCTION_INFO_V1(prsd_lextype);`
pgindent run. 2003-08-04 02:43:34 +02:00			`Datum prsd_lextype(PG_FUNCTION_ARGS);`

			`Datum`
			`prsd_lextype(PG_FUNCTION_ARGS)`
			`{`
			`LexDescr descr = (LexDescr ) palloc(sizeof(LexDescr) * (LASTNUM + 1));`
			`int i;`

			`for (i = 1; i <= LASTNUM; i++)`
			`{`
			`descr[i - 1].lexid = i;`
			`descr[i - 1].alias = pstrdup(tok_alias[i]);`
			`descr[i - 1].descr = pstrdup(lex_descr[i]);`
			`}`
tsearch2 module 2003-07-21 12:27:44 +02:00
pgindent run. 2003-08-04 02:43:34 +02:00			`descr[LASTNUM].lexid = 0;`
tsearch2 module 2003-07-21 12:27:44 +02:00
			`PG_RETURN_POINTER(descr);`
			`}`

			`PG_FUNCTION_INFO_V1(prsd_start);`
pgindent run. 2003-08-04 02:43:34 +02:00			`Datum prsd_start(PG_FUNCTION_ARGS);`
			`Datum`
			`prsd_start(PG_FUNCTION_ARGS)`
			`{`
Avoid confusion start_parse_str function with tsearch V1 2003-12-05 15:27:42 +01:00			`tsearch2_start_parse_str((char *) PG_GETARG_POINTER(0), PG_GETARG_INT32(1));`
tsearch2 module 2003-07-21 12:27:44 +02:00			`PG_RETURN_POINTER(NULL);`
			`}`

			`PG_FUNCTION_INFO_V1(prsd_getlexeme);`
pgindent run. 2003-08-04 02:43:34 +02:00			`Datum prsd_getlexeme(PG_FUNCTION_ARGS);`
			`Datum`
			`prsd_getlexeme(PG_FUNCTION_ARGS)`
			`{`
tsearch2 module 2003-07-21 12:27:44 +02:00			`/* ParserState p=(ParserState)PG_GETARG_POINTER(0); */`
pgindent run. 2003-08-04 02:43:34 +02:00			`char t = (char ) PG_GETARG_POINTER(1);`
			`int tlen = (int ) PG_GETARG_POINTER(2);`
			`int type = tsearch2_yylex();`
tsearch2 module 2003-07-21 12:27:44 +02:00
			`*t = token;`
			`*tlen = tokenlen;`
			`PG_RETURN_INT32(type);`
			`}`

			`PG_FUNCTION_INFO_V1(prsd_end);`
pgindent run. 2003-08-04 02:43:34 +02:00			`Datum prsd_end(PG_FUNCTION_ARGS);`
			`Datum`
			`prsd_end(PG_FUNCTION_ARGS)`
			`{`
tsearch2 module 2003-07-21 12:27:44 +02:00			`/* ParserState p=(ParserState)PG_GETARG_POINTER(0); */`
Avoid confusion start_parse_str function with tsearch V1 2003-12-05 15:27:42 +01:00			`tsearch2_end_parse();`
tsearch2 module 2003-07-21 12:27:44 +02:00			`PG_RETURN_VOID();`
			`}`

			`#define LEAVETOKEN(x) ( (x)==12 )`
pgindent run. 2003-08-04 02:43:34 +02:00			`#define COMPLEXTOKEN(x) ( (x)==5 \|\| (x)==15 \|\| (x)==16 \|\| (x)==17 )`
			`#define ENDPUNCTOKEN(x) ( (x)==12 )`
tsearch2 module 2003-07-21 12:27:44 +02:00

			`#define IDIGNORE(x) ( (x)==13 \|\| (x)==14 \|\| (x)==12 \|\| (x)==23 )`
			`#define HLIDIGNORE(x) ( (x)==5 \|\| (x)==13 \|\| (x)==15 \|\| (x)==16 \|\| (x)==17 )`
pgindent run. 2003-08-04 02:43:34 +02:00			`#define NONWORDTOKEN(x) ( (x)==12 \|\| HLIDIGNORE(x) )`
tsearch2 module 2003-07-21 12:27:44 +02:00			`#define NOENDTOKEN(x) ( NONWORDTOKEN(x) \|\| (x)==7 \|\| (x)==8 \|\| (x)==20 \|\| (x)==21 \|\| (x)==22 \|\| IDIGNORE(x) )`

pgindent run. 2003-08-04 02:43:34 +02:00			`typedef struct`
			`{`
			`HLWORD *words;`
			`int len;`
			`} hlCheck;`
tsearch2 module 2003-07-21 12:27:44 +02:00
			`static bool`
pgindent run. 2003-08-04 02:43:34 +02:00			`checkcondition_HL(void checkval, ITEM val)`
			`{`
			`int i;`

			`for (i = 0; i < ((hlCheck *) checkval)->len; i++)`
			`{`
			`if (((hlCheck *) checkval)->words[i].item == val)`
tsearch2 module 2003-07-21 12:27:44 +02:00			`return true;`
			`}`
			`return false;`
			`}`


			`static bool`
pgindent run. 2003-08-04 02:43:34 +02:00			`hlCover(HLPRSTEXT * prs, QUERYTYPE * query, int p, int q)`
			`{`
			`int i,`
			`j;`
			`ITEM *item = GETQUERY(query);`
			`int pos = *p;`

			`*q = 0;`
			`*p = 0x7fffffff;`

			`for (j = 0; j < query->size; j++)`
			`{`
			`if (item->type != VAL)`
			`{`
tsearch2 module 2003-07-21 12:27:44 +02:00			`item++;`
			`continue;`
			`}`
pgindent run. 2003-08-04 02:43:34 +02:00			`for (i = pos; i < prs->curwords; i++)`
			`{`
			`if (prs->words[i].item == item)`
			`{`
			`if (i > *q)`
tsearch2 module 2003-07-21 12:27:44 +02:00			`*q = i;`
			`break;`
			`}`
			`}`
			`item++;`
			`}`

pgindent run. 2003-08-04 02:43:34 +02:00			`if (*q == 0)`
tsearch2 module 2003-07-21 12:27:44 +02:00			`return false;`

pgindent run. 2003-08-04 02:43:34 +02:00			`item = GETQUERY(query);`
			`for (j = 0; j < query->size; j++)`
			`{`
			`if (item->type != VAL)`
			`{`
tsearch2 module 2003-07-21 12:27:44 +02:00			`item++;`
			`continue;`
			`}`
pgindent run. 2003-08-04 02:43:34 +02:00			`for (i = *q; i >= pos; i--)`
			`{`
			`if (prs->words[i].item == item)`
			`{`
			`if (i < *p)`
			`*p = i;`
tsearch2 module 2003-07-21 12:27:44 +02:00			`break;`
			`}`
			`}`
			`item++;`
pgindent run. 2003-08-04 02:43:34 +02:00			`}`

			`if (p <= q)`
			`{`
Fix some portability issues (reliance on gcc-isms). 2004-04-02 01:44:38 +02:00			`hlCheck ch;`
tsearch2 module 2003-07-21 12:27:44 +02:00
Fix some portability issues (reliance on gcc-isms). 2004-04-02 01:44:38 +02:00			`ch.words = &(prs->words[*p]);`
			`ch.len = q - p + 1;`
pgindent run. 2003-08-04 02:43:34 +02:00			`if (TS_execute(GETQUERY(query), &ch, false, checkcondition_HL))`
tsearch2 module 2003-07-21 12:27:44 +02:00			`return true;`
pgindent run. 2003-08-04 02:43:34 +02:00			`else`
			`{`
tsearch2 module 2003-07-21 12:27:44 +02:00			`(*p)++;`
pgindent run. 2003-08-04 02:43:34 +02:00			`return hlCover(prs, query, p, q);`
tsearch2 module 2003-07-21 12:27:44 +02:00			`}`
			`}`

			`return false;`
			`}`

			`PG_FUNCTION_INFO_V1(prsd_headline);`
pgindent run. 2003-08-04 02:43:34 +02:00			`Datum prsd_headline(PG_FUNCTION_ARGS);`
			`Datum`
			`prsd_headline(PG_FUNCTION_ARGS)`
			`{`
			`HLPRSTEXT prs = (HLPRSTEXT ) PG_GETARG_POINTER(0);`
			`text opt = (text ) PG_GETARG_POINTER(1); /* can't be toasted */`
			`QUERYTYPE query = (QUERYTYPE ) PG_GETARG_POINTER(2); /* can't be toasted */`

tsearch2 module 2003-07-21 12:27:44 +02:00			`/* from opt + start and and tag */`
pgindent run. 2003-08-04 02:43:34 +02:00			`int min_words = 15;`
			`int max_words = 35;`
			`int shortword = 3;`

			`int p = 0,`
			`q = 0;`
			`int bestb = -1,`
			`beste = -1;`
			`int bestlen = -1;`
More accuracy work with MinWords option of headline function 2003-08-13 16:37:55 +02:00			`int pose = 0, posb,`
pgindent run. 2003-08-04 02:43:34 +02:00			`poslen,`
			`curlen;`

			`int i;`

			`/* config */`
			`prs->startsel = NULL;`
			`prs->stopsel = NULL;`
			`if (opt)`
			`{`
			`Map *map,`
			`*mptr;`

			`parse_cfgdict(opt, &map);`
			`mptr = map;`

			`while (mptr && mptr->key)`
			`{`
			`if (strcasecmp(mptr->key, "MaxWords") == 0)`
			`max_words = pg_atoi(mptr->value, 4, 1);`
			`else if (strcasecmp(mptr->key, "MinWords") == 0)`
			`min_words = pg_atoi(mptr->value, 4, 1);`
			`else if (strcasecmp(mptr->key, "ShortWord") == 0)`
			`shortword = pg_atoi(mptr->value, 4, 1);`
			`else if (strcasecmp(mptr->key, "StartSel") == 0)`
			`prs->startsel = pstrdup(mptr->value);`
			`else if (strcasecmp(mptr->key, "StopSel") == 0)`
			`prs->stopsel = pstrdup(mptr->value);`

tsearch2 module 2003-07-21 12:27:44 +02:00			`pfree(mptr->key);`
			`pfree(mptr->value);`

			`mptr++;`
			`}`
			`pfree(map);`

pgindent run. 2003-08-04 02:43:34 +02:00			`if (min_words >= max_words)`
Error message editing in contrib (mostly by Joe Conway --- thanks Joe!) 2003-07-24 19:52:50 +02:00			`ereport(ERROR,`
			`(errcode(ERRCODE_INVALID_PARAMETER_VALUE),`
Correct error messages 2003-08-13 16:41:59 +02:00			`errmsg("MinWords should be less than MaxWords")));`
pgindent run. 2003-08-04 02:43:34 +02:00			`if (min_words <= 0)`
Error message editing in contrib (mostly by Joe Conway --- thanks Joe!) 2003-07-24 19:52:50 +02:00			`ereport(ERROR,`
			`(errcode(ERRCODE_INVALID_PARAMETER_VALUE),`
More accuracy work with MinWords option of headline function 2003-08-13 16:37:55 +02:00			`errmsg("MinWords should be positive")));`
pgindent run. 2003-08-04 02:43:34 +02:00			`if (shortword < 0)`
Error message editing in contrib (mostly by Joe Conway --- thanks Joe!) 2003-07-24 19:52:50 +02:00			`ereport(ERROR,`
			`(errcode(ERRCODE_INVALID_PARAMETER_VALUE),`
Correct error messages 2003-08-13 16:41:59 +02:00			`errmsg("ShortWord should be >= 0")));`
tsearch2 module 2003-07-21 12:27:44 +02:00			`}`

pgindent run. 2003-08-04 02:43:34 +02:00			`while (hlCover(prs, query, &p, &q))`
			`{`
tsearch2 module 2003-07-21 12:27:44 +02:00			`/* find cover len in words */`
pgindent run. 2003-08-04 02:43:34 +02:00			`curlen = 0;`
			`poslen = 0;`
			`for (i = p; i <= q && curlen < max_words; i++)`
			`{`
			`if (!NONWORDTOKEN(prs->words[i].type))`
tsearch2 module 2003-07-21 12:27:44 +02:00			`curlen++;`
pgindent run. 2003-08-04 02:43:34 +02:00			`if (prs->words[i].item && !prs->words[i].repeated)`
			`poslen++;`
			`pose = i;`
tsearch2 module 2003-07-21 12:27:44 +02:00			`}`

pgindent run. 2003-08-04 02:43:34 +02:00			`if (poslen < bestlen && !(NOENDTOKEN(prs->words[beste].type) \|\| prs->words[beste].len <= shortword))`
			`{`
tsearch2 module 2003-07-21 12:27:44 +02:00			`/* best already finded, so try one more cover */`
			`p++;`
			`continue;`
			`}`

More accuracy work with MinWords option of headline function 2003-08-13 16:37:55 +02:00			`posb=p;`
pgindent run. 2003-08-04 02:43:34 +02:00			`if (curlen < max_words)`
			`{ /* find good end */`
			`for (i = i - 1; i < prs->curwords && curlen < max_words; i++)`
			`{`
			`if (i != q)`
			`{`
			`if (!NONWORDTOKEN(prs->words[i].type))`
tsearch2 module 2003-07-21 12:27:44 +02:00			`curlen++;`
pgindent run. 2003-08-04 02:43:34 +02:00			`if (prs->words[i].item && !prs->words[i].repeated)`
tsearch2 module 2003-07-21 12:27:44 +02:00			`poslen++;`
			`}`
pgindent run. 2003-08-04 02:43:34 +02:00			`pose = i;`
			`if (NOENDTOKEN(prs->words[i].type) \|\| prs->words[i].len <= shortword)`
tsearch2 module 2003-07-21 12:27:44 +02:00			`continue;`
pgindent run. 2003-08-04 02:43:34 +02:00			`if (curlen >= min_words)`
tsearch2 module 2003-07-21 12:27:44 +02:00			`break;`
			`}`
More accuracy work with MinWords option of headline function 2003-08-13 16:37:55 +02:00			`if ( curlen < min_words && i>=prs->curwords ) { /* got end of text and our cover is shoter than min_words */`
			`for(i=p; i>= 0; i--) {`
			`if (!NONWORDTOKEN(prs->words[i].type))`
			`curlen++;`
			`if (prs->words[i].item && !prs->words[i].repeated)`
			`poslen++;`
			`if (NOENDTOKEN(prs->words[i].type) \|\| prs->words[i].len <= shortword)`
			`continue;`
			`if (curlen >= min_words)`
			`break;`
			`}`
			`posb=(i>=0) ? i : 0;`
			`}`
pgindent run. 2003-08-04 02:43:34 +02:00			`}`
			`else`
			`{ /* shorter cover :((( */`
			`for (; curlen > min_words; i--)`
			`{`
			`if (!NONWORDTOKEN(prs->words[i].type))`
tsearch2 module 2003-07-21 12:27:44 +02:00			`curlen--;`
pgindent run. 2003-08-04 02:43:34 +02:00			`if (prs->words[i].item && !prs->words[i].repeated)`
tsearch2 module 2003-07-21 12:27:44 +02:00			`poslen--;`
pgindent run. 2003-08-04 02:43:34 +02:00			`pose = i;`
			`if (NOENDTOKEN(prs->words[i].type) \|\| prs->words[i].len <= shortword)`
tsearch2 module 2003-07-21 12:27:44 +02:00			`continue;`
			`break;`
			`}`
			`}`

pgindent run. 2003-08-04 02:43:34 +02:00			`if (bestlen < 0 \|\| (poslen > bestlen && !(NOENDTOKEN(prs->words[pose].type) \|\| prs->words[pose].len <= shortword)) \|\|`
			`(bestlen >= 0 && !(NOENDTOKEN(prs->words[pose].type) \|\| prs->words[pose].len <= shortword) &&`
			`(NOENDTOKEN(prs->words[beste].type) \|\| prs->words[beste].len <= shortword)))`
			`{`
More accuracy work with MinWords option of headline function 2003-08-13 16:37:55 +02:00			`bestb = posb;`
pgindent run. 2003-08-04 02:43:34 +02:00			`beste = pose;`
			`bestlen = poslen;`
			`}`
tsearch2 module 2003-07-21 12:27:44 +02:00
			`p++;`
			`}`

pgindent run. 2003-08-04 02:43:34 +02:00			`if (bestlen < 0)`
			`{`
			`curlen = 0;`
			`poslen = 0;`
			`for (i = 0; i < prs->curwords && curlen < min_words; i++)`
			`{`
			`if (!NONWORDTOKEN(prs->words[i].type))`
tsearch2 module 2003-07-21 12:27:44 +02:00			`curlen++;`
pgindent run. 2003-08-04 02:43:34 +02:00			`pose = i;`
tsearch2 module 2003-07-21 12:27:44 +02:00			`}`
pgindent run. 2003-08-04 02:43:34 +02:00			`bestb = 0;`
			`beste = pose;`
tsearch2 module 2003-07-21 12:27:44 +02:00			`}`

pgindent run. 2003-08-04 02:43:34 +02:00			`for (i = bestb; i <= beste; i++)`
			`{`
			`if (prs->words[i].item)`
			`prs->words[i].selected = 1;`
			`if (prs->words[i].repeated)`
			`prs->words[i].skip = 1;`
			`if (HLIDIGNORE(prs->words[i].type))`
			`prs->words[i].replace = 1;`
tsearch2 module 2003-07-21 12:27:44 +02:00
pgindent run. 2003-08-04 02:43:34 +02:00			`prs->words[i].in = 1;`
tsearch2 module 2003-07-21 12:27:44 +02:00			`}`

			`if (!prs->startsel)`
pgindent run. 2003-08-04 02:43:34 +02:00			`prs->startsel = pstrdup("<b>");`
tsearch2 module 2003-07-21 12:27:44 +02:00			`if (!prs->stopsel)`
pgindent run. 2003-08-04 02:43:34 +02:00			`prs->stopsel = pstrdup("</b>");`
			`prs->startsellen = strlen(prs->startsel);`
			`prs->stopsellen = strlen(prs->stopsel);`
tsearch2 module 2003-07-21 12:27:44 +02:00
			`PG_RETURN_POINTER(prs);`
			`}`