Cosmetic improvements for default text search parser's ts_headline code.

This code was woefully unreadable and under-commented.  Try to improve
matters by adding comments, using some macros to make complicated
if-tests more readable, using boolean type where appropriate, etc.
There are a couple of tiny coding improvements too, but this commit
includes (I hope) no behavioral change.

Nonetheless, back-patch as far as 9.6, because a followup bug-fixing
commit depends on this.

Discussion: https://postgr.es/m/16345-2e0cf5cddbdcd3b4@postgresql.org
This commit is contained in:
Tom Lane 2020-04-09 12:37:00 -04:00
parent f179e9f01b
commit ff081d6bb8
1 changed files with 176 additions and 97 deletions

View File

@ -1915,6 +1915,12 @@ prsd_end(PG_FUNCTION_ARGS)
PG_RETURN_VOID(); PG_RETURN_VOID();
} }
/*
* ts_headline support begins here
*/
/* token type classification macros */
#define LEAVETOKEN(x) ( (x)==SPACE ) #define LEAVETOKEN(x) ( (x)==SPACE )
#define COMPLEXTOKEN(x) ( (x)==URL_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD ) #define COMPLEXTOKEN(x) ( (x)==URL_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD )
#define ENDPUNCTOKEN(x) ( (x)==SPACE ) #define ENDPUNCTOKEN(x) ( (x)==SPACE )
@ -1926,23 +1932,54 @@ prsd_end(PG_FUNCTION_ARGS)
#define NONWORDTOKEN(x) ( (x)==SPACE || HLIDREPLACE(x) || HLIDSKIP(x) ) #define NONWORDTOKEN(x) ( (x)==SPACE || HLIDREPLACE(x) || HLIDSKIP(x) )
#define NOENDTOKEN(x) ( NONWORDTOKEN(x) || (x)==SCIENTIFIC || (x)==VERSIONNUMBER || (x)==DECIMAL_T || (x)==SIGNEDINT || (x)==UNSIGNEDINT || TS_IDIGNORE(x) ) #define NOENDTOKEN(x) ( NONWORDTOKEN(x) || (x)==SCIENTIFIC || (x)==VERSIONNUMBER || (x)==DECIMAL_T || (x)==SIGNEDINT || (x)==UNSIGNEDINT || TS_IDIGNORE(x) )
/*
* Macros useful in headline selection. These rely on availability of
* "HeadlineParsedText *prs" describing some text, and "int shortword"
* describing the "short word" length parameter.
*/
/* Interesting words are non-repeated search terms */
#define INTERESTINGWORD(j) \
(prs->words[j].item && !prs->words[j].repeated)
/* Don't want to end at a non-word or a short word */
#define BADENDPOINT(j) \
(NOENDTOKEN(prs->words[j].type) || prs->words[j].len <= shortword)
typedef struct typedef struct
{ {
/* one cover (well, really one fragment) for mark_hl_fragments */
int32 startpos; /* fragment's starting word index */
int32 endpos; /* ending word index (inclusive) */
int32 poslen; /* number of interesting words */
int32 curlen; /* total number of words */
bool chosen; /* chosen? */
bool excluded; /* excluded? */
} CoverPos;
typedef struct
{
/* callback data for checkcondition_HL */
HeadlineWordEntry *words; HeadlineWordEntry *words;
int len; int len;
} hlCheck; } hlCheck;
/*
* TS_execute callback for matching a tsquery operand to headline words
*/
static bool static bool
checkcondition_HL(void *opaque, QueryOperand *val, ExecPhraseData *data) checkcondition_HL(void *opaque, QueryOperand *val, ExecPhraseData *data)
{ {
int i;
hlCheck *checkval = (hlCheck *) opaque; hlCheck *checkval = (hlCheck *) opaque;
int i;
/* scan words array for marching items */
for (i = 0; i < checkval->len; i++) for (i = 0; i < checkval->len; i++)
{ {
if (checkval->words[i].item == val) if (checkval->words[i].item == val)
{ {
/* don't need to find all positions */ /* if data == NULL, don't need to report positions */
if (!data) if (!data)
return true; return true;
@ -2038,8 +2075,14 @@ hlCover(HeadlineParsedText *prs, TSQuery query, int *p, int *q)
return false; return false;
} }
/*
* Apply suitable highlight marking to words selected by headline selector
*
* The words from startpos to endpos inclusive are marked per highlightall
*/
static void static void
mark_fragment(HeadlineParsedText *prs, int highlight, int startpos, int endpos) mark_fragment(HeadlineParsedText *prs, bool highlightall,
int startpos, int endpos)
{ {
int i; int i;
@ -2047,7 +2090,7 @@ mark_fragment(HeadlineParsedText *prs, int highlight, int startpos, int endpos)
{ {
if (prs->words[i].item) if (prs->words[i].item)
prs->words[i].selected = 1; prs->words[i].selected = 1;
if (highlight == 0) if (!highlightall)
{ {
if (HLIDREPLACE(prs->words[i].type)) if (HLIDREPLACE(prs->words[i].type))
prs->words[i].replace = 1; prs->words[i].replace = 1;
@ -2064,16 +2107,15 @@ mark_fragment(HeadlineParsedText *prs, int highlight, int startpos, int endpos)
} }
} }
typedef struct /*
{ * split a cover substring into fragments not longer than max_words
int32 startpos; *
int32 endpos; * At entry, *startpos and *endpos are the (remaining) bounds of the cover
int32 poslen; * substring. They are updated to hold the bounds of the next fragment.
int32 curlen; *
int16 in; * *curlen and *poslen are set to the fragment's length, in words and
int16 excluded; * interesting words respectively.
} CoverPos; */
static void static void
get_next_fragment(HeadlineParsedText *prs, int *startpos, int *endpos, get_next_fragment(HeadlineParsedText *prs, int *startpos, int *endpos,
int *curlen, int *poslen, int max_words) int *curlen, int *poslen, int max_words)
@ -2081,17 +2123,17 @@ get_next_fragment(HeadlineParsedText *prs, int *startpos, int *endpos,
int i; int i;
/* /*
* Objective: Generate a fragment of words between startpos and endpos * Objective: select a fragment of words between startpos and endpos such
* such that it has at most max_words and both ends has query words. If * that it has at most max_words and both ends have query words. If the
* the startpos and endpos are the endpoints of the cover and the cover * startpos and endpos are the endpoints of the cover and the cover has
* has fewer words than max_words, then this function should just return * fewer words than max_words, then this function should just return the
* the cover * cover
*/ */
/* first move startpos to an item */ /* first move startpos to an item */
for (i = *startpos; i <= *endpos; i++) for (i = *startpos; i <= *endpos; i++)
{ {
*startpos = i; *startpos = i;
if (prs->words[i].item && !prs->words[i].repeated) if (INTERESTINGWORD(i))
break; break;
} }
/* cut endpos to have only max_words */ /* cut endpos to have only max_words */
@ -2101,7 +2143,7 @@ get_next_fragment(HeadlineParsedText *prs, int *startpos, int *endpos,
{ {
if (!NONWORDTOKEN(prs->words[i].type)) if (!NONWORDTOKEN(prs->words[i].type))
*curlen += 1; *curlen += 1;
if (prs->words[i].item && !prs->words[i].repeated) if (INTERESTINGWORD(i))
*poslen += 1; *poslen += 1;
} }
/* if the cover was cut then move back endpos to a query item */ /* if the cover was cut then move back endpos to a query item */
@ -2111,7 +2153,7 @@ get_next_fragment(HeadlineParsedText *prs, int *startpos, int *endpos,
for (i = *endpos; i >= *startpos; i--) for (i = *endpos; i >= *startpos; i--)
{ {
*endpos = i; *endpos = i;
if (prs->words[i].item && !prs->words[i].repeated) if (INTERESTINGWORD(i))
break; break;
if (!NONWORDTOKEN(prs->words[i].type)) if (!NONWORDTOKEN(prs->words[i].type))
*curlen -= 1; *curlen -= 1;
@ -2119,8 +2161,14 @@ get_next_fragment(HeadlineParsedText *prs, int *startpos, int *endpos,
} }
} }
/*
* Headline selector used when MaxFragments > 0
*
* Note: in this mode, highlightall is disregarded for phrase selection;
* it only controls presentation details.
*/
static void static void
mark_hl_fragments(HeadlineParsedText *prs, TSQuery query, int highlight, mark_hl_fragments(HeadlineParsedText *prs, TSQuery query, bool highlightall,
int shortword, int min_words, int shortword, int min_words,
int max_words, int max_fragments) int max_words, int max_fragments)
{ {
@ -2156,7 +2204,7 @@ mark_hl_fragments(HeadlineParsedText *prs, TSQuery query, int highlight,
/* /*
* Break the cover into smaller fragments such that each fragment has * Break the cover into smaller fragments such that each fragment has
* at most max_words. Also ensure that each end of the fragment is a * at most max_words. Also ensure that each end of each fragment is a
* query word. This will allow us to stretch the fragment in either * query word. This will allow us to stretch the fragment in either
* direction * direction
*/ */
@ -2173,12 +2221,13 @@ mark_hl_fragments(HeadlineParsedText *prs, TSQuery query, int highlight,
covers[numcovers].endpos = endpos; covers[numcovers].endpos = endpos;
covers[numcovers].curlen = curlen; covers[numcovers].curlen = curlen;
covers[numcovers].poslen = poslen; covers[numcovers].poslen = poslen;
covers[numcovers].in = 0; covers[numcovers].chosen = false;
covers[numcovers].excluded = 0; covers[numcovers].excluded = false;
numcovers++; numcovers++;
startpos = endpos + 1; startpos = endpos + 1;
endpos = q; endpos = q;
} }
/* move p to generate the next cover */ /* move p to generate the next cover */
p++; p++;
} }
@ -2196,9 +2245,10 @@ mark_hl_fragments(HeadlineParsedText *prs, TSQuery query, int highlight,
*/ */
for (i = 0; i < numcovers; i++) for (i = 0; i < numcovers; i++)
{ {
if (!covers[i].in && !covers[i].excluded && if (!covers[i].chosen && !covers[i].excluded &&
(maxitems < covers[i].poslen || (maxitems == covers[i].poslen (maxitems < covers[i].poslen ||
&& minwords > covers[i].curlen))) (maxitems == covers[i].poslen &&
minwords > covers[i].curlen)))
{ {
maxitems = covers[i].poslen; maxitems = covers[i].poslen;
minwords = covers[i].curlen; minwords = covers[i].curlen;
@ -2208,7 +2258,7 @@ mark_hl_fragments(HeadlineParsedText *prs, TSQuery query, int highlight,
/* if a cover was found mark it */ /* if a cover was found mark it */
if (minI >= 0) if (minI >= 0)
{ {
covers[minI].in = 1; covers[minI].chosen = true;
/* adjust the size of cover */ /* adjust the size of cover */
startpos = covers[minI].startpos; startpos = covers[minI].startpos;
endpos = covers[minI].endpos; endpos = covers[minI].endpos;
@ -2235,8 +2285,8 @@ mark_hl_fragments(HeadlineParsedText *prs, TSQuery query, int highlight,
} }
posmarker = i; posmarker = i;
} }
/* cut back startpos till we find a non short token */ /* cut back startpos till we find a good endpoint */
for (i = posmarker; i < startpos && (NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword); i++) for (i = posmarker; i < startpos && BADENDPOINT(i); i++)
{ {
if (!NONWORDTOKEN(prs->words[i].type)) if (!NONWORDTOKEN(prs->words[i].type))
curlen--; curlen--;
@ -2250,8 +2300,8 @@ mark_hl_fragments(HeadlineParsedText *prs, TSQuery query, int highlight,
curlen++; curlen++;
posmarker = i; posmarker = i;
} }
/* cut back endpos till we find a non-short token */ /* cut back endpos till we find a good endpoint */
for (i = posmarker; i > endpos && (NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword); i--) for (i = posmarker; i > endpos && BADENDPOINT(i); i--)
{ {
if (!NONWORDTOKEN(prs->words[i].type)) if (!NONWORDTOKEN(prs->words[i].type))
curlen--; curlen--;
@ -2262,20 +2312,24 @@ mark_hl_fragments(HeadlineParsedText *prs, TSQuery query, int highlight,
covers[minI].endpos = endpos; covers[minI].endpos = endpos;
covers[minI].curlen = curlen; covers[minI].curlen = curlen;
/* Mark the chosen fragments (covers) */ /* Mark the chosen fragments (covers) */
mark_fragment(prs, highlight, startpos, endpos); mark_fragment(prs, highlightall, startpos, endpos);
num_f++; num_f++;
/* exclude overlapping covers */ /* exclude overlapping covers */
for (i = 0; i < numcovers; i++) for (i = 0; i < numcovers; i++)
{ {
if (i != minI && ((covers[i].startpos >= covers[minI].startpos && covers[i].startpos <= covers[minI].endpos) || (covers[i].endpos >= covers[minI].startpos && covers[i].endpos <= covers[minI].endpos))) if (i != minI &&
covers[i].excluded = 1; ((covers[i].startpos >= covers[minI].startpos &&
covers[i].startpos <= covers[minI].endpos) ||
(covers[i].endpos >= covers[minI].startpos &&
covers[i].endpos <= covers[minI].endpos)))
covers[i].excluded = true;
} }
} }
else else
break; break;
} }
/* show at least min_words we have not marked anything */ /* show at least min_words if we have not marked anything */
if (num_f <= 0) if (num_f <= 0)
{ {
startpos = endpos = curlen = 0; startpos = endpos = curlen = 0;
@ -2285,13 +2339,17 @@ mark_hl_fragments(HeadlineParsedText *prs, TSQuery query, int highlight,
curlen++; curlen++;
endpos = i; endpos = i;
} }
mark_fragment(prs, highlight, startpos, endpos); mark_fragment(prs, highlightall, startpos, endpos);
} }
pfree(covers); pfree(covers);
} }
/*
* Headline selector used when MaxFragments == 0
*/
static void static void
mark_hl_words(HeadlineParsedText *prs, TSQuery query, int highlight, mark_hl_words(HeadlineParsedText *prs, TSQuery query, bool highlightall,
int shortword, int min_words, int max_words) int shortword, int min_words, int max_words)
{ {
int p = 0, int p = 0,
@ -2299,66 +2357,81 @@ mark_hl_words(HeadlineParsedText *prs, TSQuery query, int highlight,
int bestb = -1, int bestb = -1,
beste = -1; beste = -1;
int bestlen = -1; int bestlen = -1;
int pose = 0, int pose,
posb, posb,
poslen, poslen,
curlen; curlen;
int i; int i;
if (highlight == 0) if (!highlightall)
{ {
/* examine all covers, select a headline using the best one */
while (hlCover(prs, query, &p, &q)) while (hlCover(prs, query, &p, &q))
{ {
/* find cover len in words */ /*
* Count words (curlen) and interesting words (poslen) within
* cover, but stop once we reach max_words. This step doesn't
* consider whether that's a good stopping point. posb and pose
* are set to the start and end indexes of the possible headline.
*/
curlen = 0; curlen = 0;
poslen = 0; poslen = 0;
posb = pose = p;
for (i = p; i <= q && curlen < max_words; i++) for (i = p; i <= q && curlen < max_words; i++)
{ {
if (!NONWORDTOKEN(prs->words[i].type)) if (!NONWORDTOKEN(prs->words[i].type))
curlen++; curlen++;
if (prs->words[i].item && !prs->words[i].repeated) if (INTERESTINGWORD(i))
poslen++; poslen++;
pose = i; pose = i;
} }
if (poslen < bestlen && !(NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword)) /* XXX this optimization seems unnecessary and wrong */
if (poslen < bestlen && !BADENDPOINT(beste))
{ {
/* best already found, so try one more cover */ /* better cover already found, so try next cover */
p++; p++;
continue; continue;
} }
posb = p;
if (curlen < max_words) if (curlen < max_words)
{ /* find good end */ {
/*
* We have room to lengthen the headline, so search forward
* until it's full or we find a good stopping point. We'll
* reconsider the word at "q", then move forward.
*/
for (i = i - 1; i < prs->curwords && curlen < max_words; i++) for (i = i - 1; i < prs->curwords && curlen < max_words; i++)
{ {
if (i != q) if (i > q)
{ {
if (!NONWORDTOKEN(prs->words[i].type)) if (!NONWORDTOKEN(prs->words[i].type))
curlen++; curlen++;
if (prs->words[i].item && !prs->words[i].repeated) if (INTERESTINGWORD(i))
poslen++; poslen++;
} }
pose = i; pose = i;
if (NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword) if (BADENDPOINT(i))
continue; continue;
if (curlen >= min_words) if (curlen >= min_words)
break; break;
} }
if (curlen < min_words && i >= prs->curwords) if (curlen < min_words)
{ /* got end of text and our cover is shorter {
* than min_words */ /*
* Reached end of text and our headline is still shorter
* than min_words, so try to extend it to the left.
*/
for (i = p - 1; i >= 0; i--) for (i = p - 1; i >= 0; i--)
{ {
if (!NONWORDTOKEN(prs->words[i].type)) if (!NONWORDTOKEN(prs->words[i].type))
curlen++; curlen++;
if (prs->words[i].item && !prs->words[i].repeated) if (INTERESTINGWORD(i))
poslen++; poslen++;
if (curlen >= max_words) if (curlen >= max_words)
break; break;
if (NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword) if (BADENDPOINT(i))
continue; continue;
if (curlen >= min_words) if (curlen >= min_words)
break; break;
@ -2367,34 +2440,48 @@ mark_hl_words(HeadlineParsedText *prs, TSQuery query, int highlight,
} }
} }
else else
{ /* shorter cover :((( */ {
/*
* Can't make headline longer, so consider making it shorter
* if needed to avoid a bad endpoint.
*/
if (i > q) if (i > q)
i = q; i = q;
for (; curlen > min_words; i--) for (; curlen > min_words; i--)
{ {
if (!NONWORDTOKEN(prs->words[i].type)) if (!NONWORDTOKEN(prs->words[i].type))
curlen--; curlen--;
if (prs->words[i].item && !prs->words[i].repeated) if (INTERESTINGWORD(i))
poslen--; poslen--;
pose = i; pose = i;
if (NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword) if (!BADENDPOINT(i))
continue; break;
break;
} }
} }
if (bestlen < 0 || (poslen > bestlen && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword)) || /*
(bestlen >= 0 && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword) && * Adopt this headline if it's the first, or if it has more
(NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword))) * interesting words and isn't ending at a bad endpoint, or if it
* replaces a bad endpoint with a good one (XXX even if it has
* fewer interesting words? Really?)
*/
if (bestlen < 0 ||
(poslen > bestlen && !BADENDPOINT(pose)) ||
(!BADENDPOINT(pose) && BADENDPOINT(beste)))
{ {
bestb = posb; bestb = posb;
beste = pose; beste = pose;
bestlen = poslen; bestlen = poslen;
} }
/* move p to generate the next cover */
p++; p++;
} }
/*
* If we found nothing acceptable, select min_words words starting at
* the beginning.
*/
if (bestlen < 0) if (bestlen < 0)
{ {
curlen = 0; curlen = 0;
@ -2410,32 +2497,17 @@ mark_hl_words(HeadlineParsedText *prs, TSQuery query, int highlight,
} }
else else
{ {
/* highlightall mode: headline is whole document */
bestb = 0; bestb = 0;
beste = prs->curwords - 1; beste = prs->curwords - 1;
} }
for (i = bestb; i <= beste; i++) mark_fragment(prs, highlightall, bestb, beste);
{
if (prs->words[i].item)
prs->words[i].selected = 1;
if (highlight == 0)
{
if (HLIDREPLACE(prs->words[i].type))
prs->words[i].replace = 1;
else if (HLIDSKIP(prs->words[i].type))
prs->words[i].skip = 1;
}
else
{
if (XMLHLIDSKIP(prs->words[i].type))
prs->words[i].skip = 1;
}
prs->words[i].in = (prs->words[i].repeated) ? 0 : 1;
}
} }
/*
* Default parser's prsheadline function
*/
Datum Datum
prsd_headline(PG_FUNCTION_ARGS) prsd_headline(PG_FUNCTION_ARGS)
{ {
@ -2443,17 +2515,18 @@ prsd_headline(PG_FUNCTION_ARGS)
List *prsoptions = (List *) PG_GETARG_POINTER(1); List *prsoptions = (List *) PG_GETARG_POINTER(1);
TSQuery query = PG_GETARG_TSQUERY(2); TSQuery query = PG_GETARG_TSQUERY(2);
/* from opt + start and end tag */ /* default option values: */
int min_words = 15; int min_words = 15;
int max_words = 35; int max_words = 35;
int shortword = 3; int shortword = 3;
int max_fragments = 0; int max_fragments = 0;
int highlight = 0; bool highlightall = false;
ListCell *l; ListCell *l;
/* config */ /* Extract configuration option values */
prs->startsel = NULL; prs->startsel = NULL;
prs->stopsel = NULL; prs->stopsel = NULL;
prs->fragdelim = NULL;
foreach(l, prsoptions) foreach(l, prsoptions)
{ {
DefElem *defel = (DefElem *) lfirst(l); DefElem *defel = (DefElem *) lfirst(l);
@ -2474,12 +2547,12 @@ prsd_headline(PG_FUNCTION_ARGS)
else if (pg_strcasecmp(defel->defname, "FragmentDelimiter") == 0) else if (pg_strcasecmp(defel->defname, "FragmentDelimiter") == 0)
prs->fragdelim = pstrdup(val); prs->fragdelim = pstrdup(val);
else if (pg_strcasecmp(defel->defname, "HighlightAll") == 0) else if (pg_strcasecmp(defel->defname, "HighlightAll") == 0)
highlight = (pg_strcasecmp(val, "1") == 0 || highlightall = (pg_strcasecmp(val, "1") == 0 ||
pg_strcasecmp(val, "on") == 0 || pg_strcasecmp(val, "on") == 0 ||
pg_strcasecmp(val, "true") == 0 || pg_strcasecmp(val, "true") == 0 ||
pg_strcasecmp(val, "t") == 0 || pg_strcasecmp(val, "t") == 0 ||
pg_strcasecmp(val, "y") == 0 || pg_strcasecmp(val, "y") == 0 ||
pg_strcasecmp(val, "yes") == 0); pg_strcasecmp(val, "yes") == 0);
else else
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE), (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
@ -2487,7 +2560,8 @@ prsd_headline(PG_FUNCTION_ARGS)
defel->defname))); defel->defname)));
} }
if (highlight == 0) /* in HighlightAll mode these parameters are ignored */
if (!highlightall)
{ {
if (min_words >= max_words) if (min_words >= max_words)
ereport(ERROR, ereport(ERROR,
@ -2507,18 +2581,23 @@ prsd_headline(PG_FUNCTION_ARGS)
errmsg("MaxFragments should be >= 0"))); errmsg("MaxFragments should be >= 0")));
} }
/* Apply appropriate headline selector */
if (max_fragments == 0) if (max_fragments == 0)
/* call the default headline generator */ mark_hl_words(prs, query, highlightall, shortword,
mark_hl_words(prs, query, highlight, shortword, min_words, max_words); min_words, max_words);
else else
mark_hl_fragments(prs, query, highlight, shortword, min_words, max_words, max_fragments); mark_hl_fragments(prs, query, highlightall, shortword,
min_words, max_words, max_fragments);
/* Fill in default values for string options */
if (!prs->startsel) if (!prs->startsel)
prs->startsel = pstrdup("<b>"); prs->startsel = pstrdup("<b>");
if (!prs->stopsel) if (!prs->stopsel)
prs->stopsel = pstrdup("</b>"); prs->stopsel = pstrdup("</b>");
if (!prs->fragdelim) if (!prs->fragdelim)
prs->fragdelim = pstrdup(" ... "); prs->fragdelim = pstrdup(" ... ");
/* Caller will need these lengths, too */
prs->startsellen = strlen(prs->startsel); prs->startsellen = strlen(prs->startsel);
prs->stopsellen = strlen(prs->stopsel); prs->stopsellen = strlen(prs->stopsel);
prs->fragdelimlen = strlen(prs->fragdelim); prs->fragdelimlen = strlen(prs->fragdelim);