postgresql/contrib/tsearch2/wordparser/parser.c

1027 lines
36 KiB
C
Raw Normal View History

#include "postgres.h"
#include "utils/builtins.h"
#include "utils/pg_locale.h"
#include "mb/pg_wchar.h"
#include "deflex.h"
#include "parser.h"
#include "ts_locale.h"
static TParserPosition*
newTParserPosition(TParserPosition *prev) {
TParserPosition *res = (TParserPosition*)palloc(sizeof(TParserPosition));
if ( prev )
memcpy(res, prev, sizeof(TParserPosition));
else
memset(res, 0, sizeof(TParserPosition));
res->prev = prev;
res->pushedAtAction = NULL;
return res;
}
TParser*
TParserInit( char *str, int len ) {
TParser *prs = (TParser*)palloc0( sizeof(TParser) );
prs->charmaxlen = pg_database_encoding_max_length();
prs->str = str;
prs->lenstr = len;
#ifdef TS_USE_WIDE
/*
* Use wide char code only when max encoding length > 1 and ctype != C.
* Some operating systems fail with multi-byte encodings and a C locale.
* Also, for a C locale there is no need to process as multibyte.
* From backend/utils/adt/oracle_compat.c Teodor
*/
if ( prs->charmaxlen > 1 && !lc_ctype_is_c() ) {
prs->usewide=true;
prs->wstr = (wchar_t*)palloc( sizeof(wchar_t) * prs->lenstr );
prs->lenwstr = char2wchar( prs->wstr, prs->str, prs->lenstr );
} else
#endif
prs->usewide=false;
prs->state = newTParserPosition(NULL);
prs->state->state = TPS_Base;
return prs;
}
void
TParserClose( TParser* prs ) {
while( prs->state ) {
TParserPosition *ptr = prs->state->prev;
pfree( prs->state );
prs->state = ptr;
}
if ( prs->wstr )
pfree( prs->wstr );
pfree( prs );
}
/*
* defining support function, equvalent is* macroses, but
* working with any possible encodings and locales
*/
#ifdef TS_USE_WIDE
#define p_iswhat(type) \
static int \
p_is##type(TParser *prs) { \
Assert( prs->state ); \
return ( ( prs->usewide ) ? isw##type( (wint_t)*( prs->wstr + prs->state->poschar ) ) : \
is##type( (unsigned char)*( prs->str + prs->state->posbyte ) ) ); \
} \
\
static int \
p_isnot##type(TParser *prs) { \
return !p_is##type(prs); \
}
/* p_iseq should be used only for ascii symbols */
static int
p_iseq(TParser *prs, char c) {
Assert( prs->state );
return ( ( prs->state->charlen==1 && *( prs->str + prs->state->posbyte ) == c ) ) ? 1 : 0;
}
#else /* TS_USE_WIDE */
#define p_iswhat(type) \
static int \
p_is##type(TParser *prs) { \
Assert( prs->state ); \
return is##type( (unsigned char)*( prs->str + prs->state->posbyte ) ); \
} \
\
static int \
p_isnot##type(TParser *prs) { \
return !p_is##type(prs); \
}
static int
p_iseq(TParser *prs, char c) {
Assert( prs->state );
return ( *( prs->str + prs->state->posbyte ) == c ) ? 1 : 0;
}
#endif /* TS_USE_WIDE */
p_iswhat(alnum)
p_iswhat(alpha)
p_iswhat(digit)
p_iswhat(lower)
p_iswhat(print)
p_iswhat(punct)
p_iswhat(space)
p_iswhat(upper)
p_iswhat(xdigit)
static int
p_isEOF(TParser *prs) {
Assert( prs->state );
return (prs->state->posbyte == prs->lenstr || prs->state->charlen==0) ? 1 : 0;
}
static int
p_iseqC(TParser *prs) {
return p_iseq(prs, prs->c);
}
static int
p_isneC(TParser *prs) {
return !p_iseq(prs, prs->c);
}
static int
p_isascii(TParser *prs) {
return ( prs->state->charlen==1 && isascii( (unsigned char) *( prs->str + prs->state->posbyte ) ) ) ? 1 : 0;
}
static int
p_islatin(TParser *prs) {
return ( p_isalpha(prs) && p_isascii(prs) ) ? 1 : 0;
}
static int
p_isnonlatin(TParser *prs) {
return ( p_isalpha(prs) && !p_isascii(prs) ) ? 1 : 0;
}
void _make_compiler_happy(void);
void
_make_compiler_happy(void) {
p_isalnum(NULL); p_isnotalnum(NULL);
p_isalpha(NULL); p_isnotalpha(NULL);
p_isdigit(NULL); p_isnotdigit(NULL);
p_islower(NULL); p_isnotlower(NULL);
p_isprint(NULL); p_isnotprint(NULL);
p_ispunct(NULL); p_isnotpunct(NULL);
p_isspace(NULL); p_isnotspace(NULL);
p_isupper(NULL); p_isnotupper(NULL);
p_isxdigit(NULL); p_isnotxdigit(NULL);
p_isEOF(NULL);
p_iseqC(NULL); p_isneC(NULL);
}
static void
SpecialTags(TParser *prs) {
switch( prs->state->lencharlexeme ) {
case 8: /* </script */
if ( pg_strncasecmp( prs->lexeme, "</script", 8 ) == 0 )
prs->ignore = false;
break;
case 7: /* <script || </style */
if ( pg_strncasecmp( prs->lexeme, "</style", 7 ) == 0 )
prs->ignore = false;
else if ( pg_strncasecmp( prs->lexeme, "<script", 7 ) == 0 )
prs->ignore = true;
break;
case 6: /* <style */
if ( pg_strncasecmp( prs->lexeme, "<style", 6 ) == 0 )
prs->ignore = true;
break;
default: break;
}
}
static void
SpecialFURL(TParser *prs) {
prs->wanthost = true;
prs->state->posbyte -= prs->state->lenbytelexeme;
prs->state->poschar -= prs->state->lencharlexeme;
}
static void
SpecialHyphen(TParser *prs) {
prs->state->posbyte -= prs->state->lenbytelexeme;
prs->state->poschar -= prs->state->lencharlexeme;
}
static int
p_isstophost(TParser *prs) {
if ( prs->wanthost ) {
prs->wanthost = false;
return 1;
}
return 0;
}
static int
p_isignore(TParser *prs) {
return (prs->ignore) ? 1 : 0;
}
static int
p_ishost(TParser *prs) {
TParser *tmpprs = TParserInit( prs->str+prs->state->posbyte, prs->lenstr - prs->state->posbyte );
int res = 0;
if ( TParserGet(tmpprs) && tmpprs->type == HOST ) {
prs->state->posbyte += tmpprs->lenbytelexeme;
prs->state->poschar += tmpprs->lencharlexeme;
prs->state->lenbytelexeme += tmpprs->lenbytelexeme;
prs->state->lencharlexeme += tmpprs->lencharlexeme;
prs->state->charlen = tmpprs->state->charlen;
res = 1;
}
TParserClose(tmpprs);
return res;
}
static int
p_isURI(TParser *prs) {
TParser *tmpprs = TParserInit( prs->str+prs->state->posbyte, prs->lenstr - prs->state->posbyte );
int res = 0;
tmpprs->state = newTParserPosition( tmpprs->state );
tmpprs->state->state = TPS_InFileFirst;
if ( TParserGet(tmpprs) && (tmpprs->type == URI || tmpprs->type == FILEPATH) ) {
prs->state->posbyte += tmpprs->lenbytelexeme;
prs->state->poschar += tmpprs->lencharlexeme;
prs->state->lenbytelexeme += tmpprs->lenbytelexeme;
prs->state->lencharlexeme += tmpprs->lencharlexeme;
prs->state->charlen = tmpprs->state->charlen;
res = 1;
}
TParserClose(tmpprs);
return res;
}
/*
* Table of state/action of parser
*/
#define A_NEXT 0x0000
#define A_BINGO 0x0001
#define A_POP 0x0002
#define A_PUSH 0x0004
#define A_RERUN 0x0008
#define A_CLEAR 0x0010
#define A_MERGE 0x0020
#define A_CLRALL 0x0040
static TParserStateActionItem actionTPS_Base[] = {
{p_isEOF, 0, A_NEXT, TPS_Null, 0, NULL},
{p_iseqC, '<', A_PUSH, TPS_InTagFirst, 0, NULL},
{p_isignore, 0, A_NEXT, TPS_InSpace, 0, NULL},
{p_islatin, 0, A_NEXT, TPS_InLatWord, 0, NULL},
{p_isnonlatin, 0, A_NEXT, TPS_InCyrWord, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InUnsignedInt, 0, NULL},
{p_iseqC, '-', A_PUSH, TPS_InSignedIntFirst, 0, NULL},
{p_iseqC, '+', A_PUSH, TPS_InSignedIntFirst, 0, NULL},
{p_iseqC, '&', A_PUSH, TPS_InHTMLEntityFirst, 0, NULL},
{p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL},
{NULL, 0, A_NEXT, TPS_InSpace, 0, NULL}
};
static TParserStateActionItem actionTPS_InUWord[] = {
{p_isEOF, 0, A_BINGO, TPS_Base, UWORD, NULL},
{p_isalnum, 0, A_NEXT, TPS_InUWord, 0, NULL},
{p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL},
{p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL},
{p_iseqC, '-', A_PUSH, TPS_InHyphenUWordFirst, 0, NULL},
{NULL, 0, A_BINGO, TPS_Base, UWORD, NULL}
};
static TParserStateActionItem actionTPS_InLatWord[] = {
{p_isEOF, 0, A_BINGO, TPS_Base, LATWORD, NULL},
{p_islatin, 0, A_NEXT, TPS_Null, 0, NULL},
{p_iseqC, '.', A_PUSH, TPS_InHostFirstDomen, 0, NULL},
{p_iseqC, '.', A_PUSH, TPS_InFileFirst, 0, NULL},
{p_iseqC, '-', A_PUSH, TPS_InHostFirstAN, 0, NULL},
{p_iseqC, '-', A_PUSH, TPS_InHyphenLatWordFirst,0, NULL},
{p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL},
{p_iseqC, ':', A_PUSH, TPS_InProtocolFirst, 0, NULL},
{p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL},
{p_isdigit, 0, A_PUSH, TPS_InHost, 0, NULL},
{p_isalnum, 0, A_NEXT, TPS_InUWord, 0, NULL},
{NULL, 0, A_BINGO, TPS_Base, LATWORD, NULL}
};
static TParserStateActionItem actionTPS_InCyrWord[] = {
{p_isEOF, 0, A_BINGO, TPS_Base, CYRWORD, NULL},
{p_isnonlatin, 0, A_NEXT, TPS_Null, 0, NULL},
{p_isalnum, 0, A_NEXT, TPS_InUWord, 0, NULL},
{p_iseqC, '-', A_PUSH, TPS_InHyphenCyrWordFirst,0, NULL},
{NULL, 0, A_BINGO, TPS_Base, CYRWORD, NULL}
};
static TParserStateActionItem actionTPS_InUnsignedInt[] = {
{p_isEOF, 0, A_BINGO, TPS_Base, UNSIGNEDINT, NULL},
{p_isdigit, 0, A_NEXT, TPS_Null, 0, NULL},
{p_iseqC, '.', A_PUSH, TPS_InHostFirstDomen, 0, NULL},
{p_iseqC, '.', A_PUSH, TPS_InUDecimalFirst, 0, NULL},
{p_iseqC, 'e', A_PUSH, TPS_InMantissaFirst, 0, NULL},
{p_iseqC, 'E', A_PUSH, TPS_InMantissaFirst, 0, NULL},
{p_islatin, 0, A_PUSH, TPS_InHost, 0, NULL},
{p_isalpha, 0, A_NEXT, TPS_InUWord, 0, NULL},
{p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL},
{NULL, 0, A_BINGO, TPS_Base, UNSIGNEDINT, NULL}
};
static TParserStateActionItem actionTPS_InSignedIntFirst[] = {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isdigit, 0, A_NEXT|A_CLEAR, TPS_InSignedInt, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
};
static TParserStateActionItem actionTPS_InSignedInt[] = {
{p_isEOF, 0, A_BINGO, TPS_Base, SIGNEDINT, NULL},
{p_isdigit, 0, A_NEXT, TPS_Null, 0, NULL},
{p_iseqC, '.', A_PUSH, TPS_InDecimalFirst, 0, NULL},
{p_iseqC, 'e', A_PUSH, TPS_InMantissaFirst, 0, NULL},
{p_iseqC, 'E', A_PUSH, TPS_InMantissaFirst, 0, NULL},
{NULL, 0, A_BINGO, TPS_Base, SIGNEDINT, NULL}
};
static TParserStateActionItem actionTPS_InSpace[] = {
{p_isEOF, 0, A_BINGO, TPS_Base, SPACE, NULL},
{p_iseqC, '<', A_BINGO, TPS_Base, SPACE, NULL},
{p_isignore, 0, A_NEXT, TPS_Null, 0, NULL},
{p_iseqC, '-', A_BINGO, TPS_Base, SPACE, NULL},
{p_iseqC, '+', A_BINGO, TPS_Base, SPACE, NULL},
{p_iseqC, '&', A_BINGO, TPS_Base, SPACE, NULL},
{p_iseqC, '/', A_BINGO, TPS_Base, SPACE, NULL},
{p_isnotalnum, 0, A_NEXT, TPS_InSpace, 0, NULL},
{NULL, 0, A_BINGO, TPS_Base, SPACE, NULL}
};
static TParserStateActionItem actionTPS_InUDecimalFirst[] = {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isdigit, 0, A_CLEAR, TPS_InUDecimal, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
};
static TParserStateActionItem actionTPS_InUDecimal[] = {
{p_isEOF, 0, A_BINGO, TPS_Base, DECIMAL, NULL},
{p_isdigit, 0, A_NEXT, TPS_InUDecimal, 0, NULL},
{p_iseqC, '.', A_PUSH, TPS_InVersionFirst, 0, NULL},
{p_iseqC, 'e', A_PUSH, TPS_InMantissaFirst, 0, NULL},
{p_iseqC, 'E', A_PUSH, TPS_InMantissaFirst, 0, NULL},
{NULL, 0, A_BINGO, TPS_Base, DECIMAL, NULL}
};
static TParserStateActionItem actionTPS_InDecimalFirst[] = {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isdigit, 0, A_CLEAR, TPS_InDecimal, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
};
static TParserStateActionItem actionTPS_InDecimal[] = {
{p_isEOF, 0, A_BINGO, TPS_Base, DECIMAL, NULL},
{p_isdigit, 0, A_NEXT, TPS_InDecimal, 0, NULL},
{p_iseqC, 'e', A_PUSH, TPS_InMantissaFirst, 0, NULL},
{p_iseqC, 'E', A_PUSH, TPS_InMantissaFirst, 0, NULL},
{NULL, 0, A_BINGO, TPS_Base, DECIMAL, NULL}
};
static TParserStateActionItem actionTPS_InVersionFirst[] = {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isdigit, 0, A_CLEAR, TPS_InVersion, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
};
static TParserStateActionItem actionTPS_InVersion[] = {
{p_isEOF, 0, A_BINGO, TPS_Base, VERSIONNUMBER, NULL},
{p_isdigit, 0, A_NEXT, TPS_InVersion, 0, NULL},
{p_iseqC, '.', A_PUSH, TPS_InVersionFirst, 0, NULL},
{NULL, 0, A_BINGO, TPS_Base, VERSIONNUMBER, NULL}
};
static TParserStateActionItem actionTPS_InMantissaFirst[] = {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isdigit, 0, A_CLEAR, TPS_InMantissa, 0, NULL},
{p_iseqC, '+', A_NEXT, TPS_InMantissaSign, 0, NULL},
{p_iseqC, '-', A_NEXT, TPS_InMantissaSign, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
};
static TParserStateActionItem actionTPS_InMantissaSign[] = {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isdigit, 0, A_CLEAR, TPS_InMantissa, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
};
static TParserStateActionItem actionTPS_InMantissa[] = {
{p_isEOF, 0, A_BINGO, TPS_Base, SCIENTIFIC, NULL},
{p_isdigit, 0, A_NEXT, TPS_InMantissa, 0, NULL},
{NULL, 0, A_BINGO, TPS_Base, SCIENTIFIC, NULL}
};
static TParserStateActionItem actionTPS_InHTMLEntityFirst[] = {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_iseqC, '#', A_NEXT, TPS_InHTMLEntityNumFirst,0, NULL},
{p_islatin, 0, A_NEXT, TPS_InHTMLEntity, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
};
static TParserStateActionItem actionTPS_InHTMLEntity[] = {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_islatin, 0, A_NEXT, TPS_InHTMLEntity, 0, NULL},
{p_iseqC, ';', A_NEXT, TPS_InHTMLEntityEnd, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
};
static TParserStateActionItem actionTPS_InHTMLEntityNumFirst[] = {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InHTMLEntityNum, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
};
static TParserStateActionItem actionTPS_InHTMLEntityNum[] = {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InHTMLEntityNum, 0, NULL},
{p_iseqC, ';', A_NEXT, TPS_InHTMLEntityEnd, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
};
static TParserStateActionItem actionTPS_InHTMLEntityEnd[] = {
{NULL, 0, A_BINGO|A_CLEAR,TPS_Base, HTMLENTITY, NULL}
};
static TParserStateActionItem actionTPS_InTagFirst[] = {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_iseqC, '/', A_PUSH, TPS_InTagCloseFirst, 0, NULL},
{p_iseqC, '!', A_PUSH, TPS_InCommentFirst, 0, NULL},
{p_islatin, 0, A_PUSH, TPS_InTag, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
};
static TParserStateActionItem actionTPS_InTagCloseFirst[] = {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_islatin, 0, A_NEXT, TPS_InTag, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
};
static TParserStateActionItem actionTPS_InTag[] = {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_iseqC, '>', A_NEXT, TPS_InTagEnd, 0, SpecialTags},
{p_iseqC, '\'', A_NEXT, TPS_InTagEscapeK, 0, NULL},
{p_iseqC, '"', A_NEXT, TPS_InTagEscapeKK, 0, NULL},
{p_islatin, 0, A_NEXT, TPS_Null, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_Null, 0, NULL},
{p_iseqC, '=', A_NEXT, TPS_Null, 0, NULL},
{p_iseqC, '-', A_NEXT, TPS_Null, 0, NULL},
{p_iseqC, '#', A_NEXT, TPS_Null, 0, NULL},
{p_iseqC, '%', A_NEXT, TPS_Null, 0, NULL},
{p_isspace, 0, A_NEXT, TPS_Null, 0, SpecialTags},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
};
static TParserStateActionItem actionTPS_InTagEscapeK[] = {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_iseqC, '\\', A_PUSH, TPS_InTagBackSleshed, 0, NULL},
{p_iseqC, '\'', A_NEXT, TPS_InTag, 0, NULL},
{NULL, 0, A_NEXT, TPS_InTagEscapeK, 0, NULL}
};
static TParserStateActionItem actionTPS_InTagEscapeKK[] = {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_iseqC, '\\', A_PUSH, TPS_InTagBackSleshed, 0, NULL},
{p_iseqC, '"', A_NEXT, TPS_InTag, 0, NULL},
{NULL, 0, A_NEXT, TPS_InTagEscapeKK, 0, NULL}
};
static TParserStateActionItem actionTPS_InTagBackSleshed[] = {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{NULL, 0, A_MERGE, TPS_Null, 0, NULL}
};
static TParserStateActionItem actionTPS_InTagEnd[] = {
{NULL, 0, A_BINGO|A_CLRALL,TPS_Base, TAG, NULL}
};
static TParserStateActionItem actionTPS_InCommentFirst[] = {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_iseqC, '-', A_NEXT, TPS_InCommentLast, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
};
static TParserStateActionItem actionTPS_InCommentLast[] = {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_iseqC, '-', A_NEXT, TPS_InComment, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
};
static TParserStateActionItem actionTPS_InComment[] = {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_iseqC, '-', A_NEXT, TPS_InCloseCommentFirst,0, NULL},
{NULL, 0, A_NEXT, TPS_Null, 0, NULL}
};
static TParserStateActionItem actionTPS_InCloseCommentFirst[] = {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_iseqC, '-', A_NEXT, TPS_InCloseCommentLast, 0, NULL},
{NULL, 0, A_NEXT, TPS_InComment, 0, NULL}
};
static TParserStateActionItem actionTPS_InCloseCommentLast[] = {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_iseqC, '-', A_NEXT, TPS_Null, 0, NULL},
{p_iseqC, '>', A_NEXT, TPS_InCommentEnd, 0, NULL},
{NULL, 0, A_NEXT, TPS_InComment, 0, NULL}
};
static TParserStateActionItem actionTPS_InCommentEnd[] = {
{NULL, 0, A_BINGO|A_CLRALL,TPS_Base, TAG, NULL}
};
static TParserStateActionItem actionTPS_InHostFirstDomen[] = {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_islatin, 0, A_NEXT, TPS_InHostDomenSecond, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InHost, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
};
static TParserStateActionItem actionTPS_InHostDomenSecond[] = {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_islatin, 0, A_NEXT, TPS_InHostDomen, 0, NULL},
{p_isdigit, 0, A_PUSH, TPS_InHost, 0, NULL},
{p_iseqC, '-', A_PUSH, TPS_InHostFirstAN, 0, NULL},
{p_iseqC, '.', A_PUSH, TPS_InHostFirstDomen, 0, NULL},
{p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
};
static TParserStateActionItem actionTPS_InHostDomen[] = {
{p_isEOF, 0, A_BINGO|A_CLRALL,TPS_Base, HOST, NULL},
{p_islatin, 0, A_NEXT, TPS_InHostDomen, 0, NULL},
{p_isdigit, 0, A_PUSH, TPS_InHost, 0, NULL},
{p_iseqC, ':', A_PUSH, TPS_InPortFirst, 0, NULL},
{p_iseqC, '-', A_PUSH, TPS_InHostFirstAN, 0, NULL},
{p_iseqC, '.', A_PUSH, TPS_InHostFirstDomen, 0, NULL},
{p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL},
{p_isdigit, 0, A_POP, TPS_Null, 0, NULL},
{p_isstophost, 0, A_BINGO|A_CLRALL,TPS_InURIStart, HOST, NULL},
{p_iseqC, '/', A_PUSH, TPS_InFURL, 0, NULL},
{NULL, 0, A_BINGO|A_CLRALL,TPS_Base, HOST, NULL}
};
static TParserStateActionItem actionTPS_InPortFirst[] = {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InPort, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
};
static TParserStateActionItem actionTPS_InPort[] = {
{p_isEOF, 0, A_BINGO|A_CLRALL,TPS_Base, HOST, NULL},
{p_isdigit, 0, A_NEXT, TPS_InPort, 0, NULL},
{p_isstophost, 0, A_BINGO|A_CLRALL,TPS_InURIStart, HOST, NULL},
{p_iseqC, '/', A_PUSH, TPS_InFURL, 0, NULL},
{NULL, 0, A_BINGO|A_CLRALL,TPS_Base, HOST, NULL}
};
static TParserStateActionItem actionTPS_InHostFirstAN[] = {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InHost, 0, NULL},
{p_islatin, 0, A_NEXT, TPS_InHost, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
};
static TParserStateActionItem actionTPS_InHost[] = {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InHost, 0, NULL},
{p_islatin, 0, A_NEXT, TPS_InHost, 0, NULL},
{p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL},
{p_iseqC, '.', A_PUSH, TPS_InHostFirstDomen, 0, NULL},
{p_iseqC, '-', A_PUSH, TPS_InHostFirstAN, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
};
static TParserStateActionItem actionTPS_InEmail[] = {
{p_ishost, 0, A_BINGO|A_CLRALL, TPS_Base, EMAIL, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
};
static TParserStateActionItem actionTPS_InFileFirst[] = {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_islatin, 0, A_CLEAR, TPS_InFile, 0, NULL},
{p_isdigit, 0, A_CLEAR, TPS_InFile, 0, NULL},
{p_iseqC, '.', A_CLEAR, TPS_InFile, 0, NULL},
{p_iseqC, '_', A_CLEAR, TPS_InFile, 0, NULL},
{p_iseqC, '?', A_PUSH, TPS_InURIFirst, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
};
static TParserStateActionItem actionTPS_InFile[] = {
{p_isEOF, 0, A_BINGO, TPS_Base, FILEPATH, NULL},
{p_islatin, 0, A_NEXT, TPS_InFile, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InFile, 0, NULL},
{p_iseqC, '.', A_PUSH, TPS_InFileNext, 0, NULL},
{p_iseqC, '_', A_NEXT, TPS_InFile, 0, NULL},
{p_iseqC, '-', A_NEXT, TPS_InFile, 0, NULL},
{p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL},
{p_iseqC, '?', A_PUSH, TPS_InURIFirst, 0, NULL},
{NULL, 0, A_BINGO, TPS_Base, FILEPATH, NULL}
};
static TParserStateActionItem actionTPS_InFileNext[] = {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_islatin, 0, A_CLEAR, TPS_InFile, 0, NULL},
{p_isdigit, 0, A_CLEAR, TPS_InFile, 0, NULL},
{p_iseqC, '_', A_CLEAR, TPS_InFile, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
};
static TParserStateActionItem actionTPS_InURIFirst[] = {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_iseqC, '"', A_POP, TPS_Null, 0, NULL},
{p_iseqC, '\'', A_POP, TPS_Null, 0, NULL},
{p_isnotspace, 0, A_CLEAR, TPS_InURI, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL},
};
static TParserStateActionItem actionTPS_InURIStart[] = {
{NULL, 0, A_NEXT, TPS_InURI, 0, NULL}
};
static TParserStateActionItem actionTPS_InURI[] = {
{p_isEOF, 0, A_BINGO, TPS_Base, URI, NULL},
{p_iseqC, '"', A_BINGO, TPS_Base, URI, NULL},
{p_iseqC, '\'', A_BINGO, TPS_Base, URI, NULL},
{p_isnotspace, 0, A_NEXT, TPS_InURI, 0, NULL},
{NULL, 0, A_BINGO, TPS_Base, URI, NULL}
};
static TParserStateActionItem actionTPS_InFURL[] = {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isURI, 0, A_BINGO|A_CLRALL,TPS_Base, FURL, SpecialFURL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
};
static TParserStateActionItem actionTPS_InProtocolFirst[] = {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_iseqC, '/', A_NEXT, TPS_InProtocolSecond, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
};
static TParserStateActionItem actionTPS_InProtocolSecond[] = {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_iseqC, '/', A_NEXT, TPS_InProtocolEnd, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
};
static TParserStateActionItem actionTPS_InProtocolEnd[] = {
{NULL, 0, A_BINGO|A_CLRALL,TPS_Base, PROTOCOL, NULL}
};
static TParserStateActionItem actionTPS_InHyphenLatWordFirst[] = {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_islatin, 0, A_NEXT, TPS_InHyphenLatWord, 0, NULL},
{p_isnonlatin, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InHyphenValue, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
};
static TParserStateActionItem actionTPS_InHyphenLatWord[] = {
{p_isEOF, 0, A_BINGO|A_CLRALL,TPS_InParseHyphen, LATHYPHENWORD, SpecialHyphen},
{p_islatin, 0, A_NEXT, TPS_InHyphenLatWord, 0, NULL},
{p_isnonlatin, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL},
{p_iseqC, '-', A_PUSH, TPS_InHyphenLatWordFirst,0, NULL},
{NULL, 0, A_BINGO|A_CLRALL,TPS_InParseHyphen, LATHYPHENWORD, SpecialHyphen}
};
static TParserStateActionItem actionTPS_InHyphenCyrWordFirst[] = {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isnonlatin, 0, A_NEXT, TPS_InHyphenCyrWord, 0, NULL},
{p_islatin, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InHyphenValue, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
};
static TParserStateActionItem actionTPS_InHyphenCyrWord[] = {
{p_isEOF, 0, A_BINGO|A_CLRALL,TPS_InParseHyphen, CYRHYPHENWORD, SpecialHyphen},
{p_isnonlatin, 0, A_NEXT, TPS_InHyphenCyrWord, 0, NULL},
{p_islatin, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL},
{p_iseqC, '-', A_PUSH, TPS_InHyphenCyrWordFirst,0, NULL},
{NULL, 0, A_BINGO|A_CLRALL,TPS_InParseHyphen, CYRHYPHENWORD, SpecialHyphen}
};
static TParserStateActionItem actionTPS_InHyphenUWordFirst[] = {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InHyphenValue, 0, NULL},
{p_isalnum, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
};
static TParserStateActionItem actionTPS_InHyphenUWord[] = {
{p_isEOF, 0, A_BINGO|A_CLRALL,TPS_InParseHyphen, HYPHENWORD, SpecialHyphen},
{p_isalnum, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL},
{p_iseqC, '-', A_PUSH, TPS_InHyphenUWordFirst,0, NULL},
{NULL, 0, A_BINGO|A_CLRALL,TPS_InParseHyphen, HYPHENWORD, SpecialHyphen}
};
static TParserStateActionItem actionTPS_InHyphenValueFirst[] = {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InHyphenValueExact, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
};
static TParserStateActionItem actionTPS_InHyphenValue[] = {
{p_isEOF, 0, A_BINGO|A_CLRALL,TPS_InParseHyphen, HYPHENWORD, SpecialHyphen},
{p_isdigit, 0, A_NEXT, TPS_InHyphenValue, 0, NULL},
{p_iseqC, '.', A_PUSH, TPS_InHyphenValueFirst, 0, NULL},
{p_iseqC, '-', A_PUSH, TPS_InHyphenUWordFirst,0, NULL},
{p_isalpha, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL},
{NULL, 0, A_BINGO|A_CLRALL,TPS_InParseHyphen, HYPHENWORD, SpecialHyphen}
};
static TParserStateActionItem actionTPS_InHyphenValueExact[] = {
{p_isEOF, 0, A_BINGO|A_CLRALL,TPS_InParseHyphen, HYPHENWORD, SpecialHyphen},
{p_isdigit, 0, A_NEXT, TPS_InHyphenValueExact, 0, NULL},
{p_iseqC, '.', A_PUSH, TPS_InHyphenValueFirst, 0, NULL},
{p_iseqC, '-', A_PUSH, TPS_InHyphenUWordFirst, 0, NULL},
{NULL, 0, A_BINGO|A_CLRALL,TPS_InParseHyphen, HYPHENWORD, SpecialHyphen}
};
static TParserStateActionItem actionTPS_InParseHyphen[] = {
{p_isEOF, 0, A_RERUN, TPS_Base, 0, NULL},
{p_islatin, 0, A_NEXT, TPS_InHyphenLatWordPart,0, NULL},
{p_isnonlatin, 0, A_NEXT, TPS_InHyphenCyrWordPart,0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InHyphenUnsignedInt,0, NULL},
{p_iseqC, '-', A_PUSH, TPS_InParseHyphenHyphen,0, NULL},
{NULL, 0, A_RERUN, TPS_Base, 0, NULL}
};
static TParserStateActionItem actionTPS_InParseHyphenHyphen[] = {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isalnum, 0, A_BINGO|A_CLEAR,TPS_InParseHyphen, SPACE, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
};
static TParserStateActionItem actionTPS_InHyphenCyrWordPart[] = {
{p_isEOF, 0, A_BINGO, TPS_Base, CYRPARTHYPHENWORD,NULL},
{p_isnonlatin, 0, A_NEXT, TPS_InHyphenCyrWordPart,0, NULL},
{p_islatin, 0, A_NEXT, TPS_InHyphenUWordPart, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InHyphenUWordPart, 0, NULL},
{NULL, 0, A_BINGO, TPS_InParseHyphen, CYRPARTHYPHENWORD,NULL}
};
static TParserStateActionItem actionTPS_InHyphenLatWordPart[] = {
{p_isEOF, 0, A_BINGO, TPS_Base, LATPARTHYPHENWORD,NULL},
{p_islatin, 0, A_NEXT, TPS_InHyphenLatWordPart,0, NULL},
{p_isnonlatin, 0, A_NEXT, TPS_InHyphenUWordPart, 0, NULL},
{p_isdigit, 0, A_NEXT, TPS_InHyphenUWordPart, 0, NULL},
{NULL, 0, A_BINGO, TPS_InParseHyphen, LATPARTHYPHENWORD,NULL}
};
static TParserStateActionItem actionTPS_InHyphenUWordPart[] = {
{p_isEOF, 0, A_BINGO, TPS_Base, PARTHYPHENWORD, NULL},
{p_isalnum, 0, A_NEXT, TPS_InHyphenUWordPart, 0, NULL},
{NULL, 0, A_BINGO, TPS_InParseHyphen, PARTHYPHENWORD, NULL}
};
static TParserStateActionItem actionTPS_InHyphenUnsignedInt[] = {
{p_isEOF, 0, A_BINGO, TPS_Base, UNSIGNEDINT, NULL},
{p_isdigit, 0, A_NEXT, TPS_InHyphenUnsignedInt,0, NULL},
{p_isalpha, 0, A_NEXT, TPS_InHyphenUWordPart, 0, NULL},
{p_iseqC, '.', A_PUSH, TPS_InHDecimalPartFirst,0, NULL},
{NULL, 0, A_BINGO, TPS_InParseHyphen, UNSIGNEDINT, NULL}
};
static TParserStateActionItem actionTPS_InHDecimalPartFirst[] = {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isdigit, 0, A_CLEAR, TPS_InHDecimalPart, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
};
static TParserStateActionItem actionTPS_InHDecimalPart[] = {
{p_isEOF, 0, A_BINGO, TPS_Base, DECIMAL, NULL},
{p_isdigit, 0, A_NEXT, TPS_InHDecimalPart, 0, NULL},
{p_iseqC, '.', A_PUSH, TPS_InHVersionPartFirst,0, NULL},
{NULL, 0, A_BINGO, TPS_InParseHyphen, DECIMAL, NULL}
};
static TParserStateActionItem actionTPS_InHVersionPartFirst[] = {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_isdigit, 0, A_CLEAR, TPS_InHVersionPart, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
};
static TParserStateActionItem actionTPS_InHVersionPart[] = {
{p_isEOF, 0, A_BINGO, TPS_Base, VERSIONNUMBER, NULL},
{p_isdigit, 0, A_NEXT, TPS_InHVersionPart, 0, NULL},
{p_iseqC, '.', A_PUSH, TPS_InHVersionPartFirst,0, NULL},
{NULL, 0, A_BINGO, TPS_InParseHyphen, VERSIONNUMBER, NULL}
};
/*
* order should be the same as in typedef enum {} TParserState!!
*/
static const TParserStateAction Actions[] = {
{ TPS_Base, actionTPS_Base },
{ TPS_InUWord, actionTPS_InUWord },
{ TPS_InLatWord, actionTPS_InLatWord },
{ TPS_InCyrWord, actionTPS_InCyrWord },
{ TPS_InUnsignedInt, actionTPS_InUnsignedInt },
{ TPS_InSignedIntFirst, actionTPS_InSignedIntFirst },
{ TPS_InSignedInt, actionTPS_InSignedInt },
{ TPS_InSpace, actionTPS_InSpace },
{ TPS_InUDecimalFirst, actionTPS_InUDecimalFirst },
{ TPS_InUDecimal, actionTPS_InUDecimal },
{ TPS_InDecimalFirst, actionTPS_InDecimalFirst },
{ TPS_InDecimal, actionTPS_InDecimal },
{ TPS_InVersionFirst, actionTPS_InVersionFirst },
{ TPS_InVersion, actionTPS_InVersion },
{ TPS_InMantissaFirst, actionTPS_InMantissaFirst },
{ TPS_InMantissaSign, actionTPS_InMantissaSign },
{ TPS_InMantissa, actionTPS_InMantissa },
{ TPS_InHTMLEntityFirst, actionTPS_InHTMLEntityFirst },
{ TPS_InHTMLEntity, actionTPS_InHTMLEntity },
{ TPS_InHTMLEntityNumFirst, actionTPS_InHTMLEntityNumFirst },
{ TPS_InHTMLEntityNum, actionTPS_InHTMLEntityNum },
{ TPS_InHTMLEntityEnd, actionTPS_InHTMLEntityEnd },
{ TPS_InTagFirst, actionTPS_InTagFirst },
{ TPS_InTagCloseFirst, actionTPS_InTagCloseFirst },
{ TPS_InTag, actionTPS_InTag },
{ TPS_InTagEscapeK, actionTPS_InTagEscapeK },
{ TPS_InTagEscapeKK, actionTPS_InTagEscapeKK },
{ TPS_InTagBackSleshed, actionTPS_InTagBackSleshed },
{ TPS_InTagEnd, actionTPS_InTagEnd },
{ TPS_InCommentFirst, actionTPS_InCommentFirst },
{ TPS_InCommentLast, actionTPS_InCommentLast },
{ TPS_InComment, actionTPS_InComment },
{ TPS_InCloseCommentFirst, actionTPS_InCloseCommentFirst },
{ TPS_InCloseCommentLast, actionTPS_InCloseCommentLast },
{ TPS_InCommentEnd, actionTPS_InCommentEnd },
{ TPS_InHostFirstDomen, actionTPS_InHostFirstDomen },
{ TPS_InHostDomenSecond, actionTPS_InHostDomenSecond },
{ TPS_InHostDomen, actionTPS_InHostDomen },
{ TPS_InPortFirst, actionTPS_InPortFirst },
{ TPS_InPort, actionTPS_InPort },
{ TPS_InHostFirstAN, actionTPS_InHostFirstAN },
{ TPS_InHost, actionTPS_InHost },
{ TPS_InEmail, actionTPS_InEmail },
{ TPS_InFileFirst, actionTPS_InFileFirst },
{ TPS_InFile, actionTPS_InFile },
{ TPS_InFileNext, actionTPS_InFileNext },
{ TPS_InURIFirst, actionTPS_InURIFirst },
{ TPS_InURIStart, actionTPS_InURIStart },
{ TPS_InURI, actionTPS_InURI },
{ TPS_InFURL, actionTPS_InFURL },
{ TPS_InProtocolFirst, actionTPS_InProtocolFirst },
{ TPS_InProtocolSecond, actionTPS_InProtocolSecond },
{ TPS_InProtocolEnd, actionTPS_InProtocolEnd },
{ TPS_InHyphenLatWordFirst, actionTPS_InHyphenLatWordFirst },
{ TPS_InHyphenLatWord, actionTPS_InHyphenLatWord },
{ TPS_InHyphenCyrWordFirst, actionTPS_InHyphenCyrWordFirst },
{ TPS_InHyphenCyrWord, actionTPS_InHyphenCyrWord },
{ TPS_InHyphenUWordFirst, actionTPS_InHyphenUWordFirst },
{ TPS_InHyphenUWord, actionTPS_InHyphenUWord },
{ TPS_InHyphenValueFirst, actionTPS_InHyphenValueFirst },
{ TPS_InHyphenValue, actionTPS_InHyphenValue },
{ TPS_InHyphenValueExact, actionTPS_InHyphenValueExact },
{ TPS_InParseHyphen, actionTPS_InParseHyphen },
{ TPS_InParseHyphenHyphen, actionTPS_InParseHyphenHyphen },
{ TPS_InHyphenCyrWordPart, actionTPS_InHyphenCyrWordPart },
{ TPS_InHyphenLatWordPart, actionTPS_InHyphenLatWordPart },
{ TPS_InHyphenUWordPart, actionTPS_InHyphenUWordPart },
{ TPS_InHyphenUnsignedInt, actionTPS_InHyphenUnsignedInt },
{ TPS_InHDecimalPartFirst, actionTPS_InHDecimalPartFirst },
{ TPS_InHDecimalPart, actionTPS_InHDecimalPart },
{ TPS_InHVersionPartFirst, actionTPS_InHVersionPartFirst },
{ TPS_InHVersionPart, actionTPS_InHVersionPart },
{ TPS_Null, NULL }
};
bool
TParserGet( TParser *prs ) {
TParserStateActionItem *item=NULL;
if ( prs->state->posbyte >= prs->lenstr )
return false;
Assert( prs->state );
prs->lexeme = prs->str + prs->state->posbyte;
prs->state->pushedAtAction = NULL;
/* look at string */
while (prs->state->posbyte <= prs->lenstr) {
if ( prs->state->posbyte == prs->lenstr )
prs->state->charlen = 0;
else
prs->state->charlen = ( prs->charmaxlen == 1 ) ? prs->charmaxlen :
pg_mblen( prs->str + prs->state->posbyte );
Assert( prs->state->posbyte + prs->state->charlen <= prs->lenstr );
Assert( prs->state->state >=TPS_Base && prs->state->state < TPS_Null );
Assert( Actions[ prs->state->state ].state == prs->state->state );
item = Actions[ prs->state->state ].action;
Assert(item!=NULL);
if ( item < prs->state->pushedAtAction )
item = prs->state->pushedAtAction;
/* find action by character class */
while( item->isclass ) {
prs->c = item->c;
if ( item->isclass(prs)!=0 ) {
if ( item > prs->state->pushedAtAction ) /* remember: after pushing we were by false way */
break;
}
item++;
}
prs->state->pushedAtAction = NULL;
/* call special handler if exists */
if ( item->special )
item->special(prs);
/* BINGO, lexeme is found */
if ( item->flags & A_BINGO ) {
Assert( item->type>0 );
prs->lenbytelexeme = prs->state->lenbytelexeme;
prs->lencharlexeme = prs->state->lencharlexeme;
prs->state->lenbytelexeme = prs->state->lencharlexeme = 0;
prs->type = item->type;
}
/* do various actions by flags */
if ( item->flags & A_POP ) { /* pop stored state in stack */
TParserPosition *ptr = prs->state->prev;
pfree( prs->state );
prs->state = ptr;
Assert( prs->state );
} else if ( item->flags & A_PUSH ) { /* push (store) state in stack */
prs->state->pushedAtAction = item; /* remember where we push */
prs->state = newTParserPosition( prs->state );
} else if ( item->flags & A_CLEAR ) { /* clear previous pushed state */
TParserPosition *ptr;
Assert( prs->state->prev );
ptr = prs->state->prev->prev;
pfree( prs->state->prev );
prs->state->prev = ptr;
} else if ( item->flags & A_CLRALL ) { /* clear all previous pushed state */
TParserPosition *ptr;
while( prs->state->prev ) {
ptr = prs->state->prev->prev;
pfree( prs->state->prev );
prs->state->prev = ptr;
}
} else if ( item->flags & A_MERGE ) { /* merge posinfo with current and pushed state */
TParserPosition *ptr = prs->state;
Assert( prs->state->prev );
prs->state = prs->state->prev;
prs->state->posbyte = ptr->posbyte;
prs->state->poschar = ptr->poschar;
prs->state->charlen = ptr->charlen;
prs->state->lenbytelexeme = ptr->lenbytelexeme;
prs->state->lencharlexeme = ptr->lencharlexeme;
pfree(ptr);
}
/* set new state if pointed */
if ( item->tostate != TPS_Null )
prs->state->state = item->tostate;
/* check for go away */
if ( (item->flags & A_BINGO) || (prs->state->posbyte >= prs->lenstr && (item->flags & A_RERUN)==0 ) )
break;
/* go to begining of loop if we should rerun or we just restore state */
if ( item->flags & ( A_RERUN | A_POP ) )
continue;
/* move forward */
if ( prs->state->charlen ) {
prs->state->posbyte += prs->state->charlen;
prs->state->lenbytelexeme += prs->state->charlen;
prs->state->poschar ++;
prs->state->lencharlexeme ++;
}
}
return (item && (item->flags & A_BINGO)) ? true : false;
}