#include "postgres.h" #include "utils/builtins.h" #include "utils/pg_locale.h" #include "mb/pg_wchar.h" #include "deflex.h" #include "parser.h" #include "ts_locale.h" static TParserPosition * newTParserPosition(TParserPosition * prev) { TParserPosition *res = (TParserPosition *) palloc(sizeof(TParserPosition)); if (prev) memcpy(res, prev, sizeof(TParserPosition)); else memset(res, 0, sizeof(TParserPosition)); res->prev = prev; res->pushedAtAction = NULL; return res; } TParser * TParserInit(char *str, int len) { TParser *prs = (TParser *) palloc0(sizeof(TParser)); prs->charmaxlen = pg_database_encoding_max_length(); prs->str = str; prs->lenstr = len; #ifdef TS_USE_WIDE /* * Use wide char code only when max encoding length > 1 and ctype != C. * Some operating systems fail with multi-byte encodings and a C locale. * Also, for a C locale there is no need to process as multibyte. From * backend/utils/adt/oracle_compat.c Teodor */ if (prs->charmaxlen > 1 && !lc_ctype_is_c()) { prs->usewide = true; prs->wstr = (wchar_t *) palloc(sizeof(wchar_t) * prs->lenstr); prs->lenwstr = char2wchar(prs->wstr, prs->str, prs->lenstr); } else #endif prs->usewide = false; prs->state = newTParserPosition(NULL); prs->state->state = TPS_Base; return prs; } void TParserClose(TParser * prs) { while (prs->state) { TParserPosition *ptr = prs->state->prev; pfree(prs->state); prs->state = ptr; } if (prs->wstr) pfree(prs->wstr); pfree(prs); } /* * defining support function, equvalent is* macroses, but * working with any possible encodings and locales */ #ifdef TS_USE_WIDE #define p_iswhat(type) \ static int \ p_is##type(TParser *prs) { \ Assert( prs->state ); \ return ( ( prs->usewide ) ? isw##type( (wint_t)*( prs->wstr + prs->state->poschar ) ) : \ is##type( (unsigned char)*( prs->str + prs->state->posbyte ) ) ); \ } \ \ static int \ p_isnot##type(TParser *prs) { \ return !p_is##type(prs); \ } /* p_iseq should be used only for ascii symbols */ static int p_iseq(TParser * prs, char c) { Assert(prs->state); return ((prs->state->charlen == 1 && *(prs->str + prs->state->posbyte) == c)) ? 1 : 0; } #else /* TS_USE_WIDE */ #define p_iswhat(type) \ static int \ p_is##type(TParser *prs) { \ Assert( prs->state ); \ return is##type( (unsigned char)*( prs->str + prs->state->posbyte ) ); \ } \ \ static int \ p_isnot##type(TParser *prs) { \ return !p_is##type(prs); \ } static int p_iseq(TParser * prs, char c) { Assert(prs->state); return (*(prs->str + prs->state->posbyte) == c) ? 1 : 0; } #endif /* TS_USE_WIDE */ p_iswhat(alnum) p_iswhat(alpha) p_iswhat(digit) p_iswhat(lower) p_iswhat(print) p_iswhat(punct) p_iswhat(space) p_iswhat(upper) p_iswhat(xdigit) static int p_isEOF(TParser * prs) { Assert(prs->state); return (prs->state->posbyte == prs->lenstr || prs->state->charlen == 0) ? 1 : 0; } static int p_iseqC(TParser * prs) { return p_iseq(prs, prs->c); } static int p_isneC(TParser * prs) { return !p_iseq(prs, prs->c); } static int p_isascii(TParser * prs) { return (prs->state->charlen == 1 && isascii((unsigned char) *(prs->str + prs->state->posbyte))) ? 1 : 0; } static int p_islatin(TParser * prs) { return (p_isalpha(prs) && p_isascii(prs)) ? 1 : 0; } static int p_isnonlatin(TParser * prs) { return (p_isalpha(prs) && !p_isascii(prs)) ? 1 : 0; } void _make_compiler_happy(void); void _make_compiler_happy(void) { p_isalnum(NULL); p_isnotalnum(NULL); p_isalpha(NULL); p_isnotalpha(NULL); p_isdigit(NULL); p_isnotdigit(NULL); p_islower(NULL); p_isnotlower(NULL); p_isprint(NULL); p_isnotprint(NULL); p_ispunct(NULL); p_isnotpunct(NULL); p_isspace(NULL); p_isnotspace(NULL); p_isupper(NULL); p_isnotupper(NULL); p_isxdigit(NULL); p_isnotxdigit(NULL); p_isEOF(NULL); p_iseqC(NULL); p_isneC(NULL); } static void SpecialTags(TParser * prs) { switch (prs->state->lencharlexeme) { case 8: /* lexeme, "ignore = false; break; case 7: /*