%{ #include #include "deflex.h" #include "parser.h" /* postgres allocation function */ #include "postgres.h" #define free pfree #define malloc palloc #define realloc repalloc #ifdef strdup #undef strdup #endif #define strdup pstrdup char *token = NULL; /* pointer to token */ char *s = NULL; /* for returning full defis-word */ YY_BUFFER_STATE buf = NULL; /* buffer to parse; it need for parse from string */ int lrlimit = -1; /* for limiting read from filehandle ( -1 - unlimited read ) */ int bytestoread = 0; /* for limiting read from filehandle */ /* redefine macro for read limited length */ #define YY_INPUT(buf,result,max_size) \ if ( yy_current_buffer->yy_is_interactive ) { \ int c = '*', n; \ for ( n = 0; n < max_size && \ (c = getc( tsearch_yyin )) != EOF && c != '\n'; ++n ) \ buf[n] = (char) c; \ if ( c == '\n' ) \ buf[n++] = (char) c; \ if ( c == EOF && ferror( tsearch_yyin ) ) \ YY_FATAL_ERROR( "input in flex scanner failed" ); \ result = n; \ } else { \ if ( lrlimit == 0 ) \ result=YY_NULL; \ else { \ if ( lrlimit>0 ) { \ bytestoread = ( lrlimit > max_size ) ? max_size : lrlimit; \ lrlimit -= bytestoread; \ } else \ bytestoread = max_size; \ if ( ((result = fread( buf, 1, bytestoread, tsearch_yyin )) == 0) \ && ferror( tsearch_yyin ) ) \ YY_FATAL_ERROR( "input in flex scanner failed" ); \ } \ } %} %option 8bit %option never-interactive %option nounput %option noyywrap /* parser's state for parsing defis-word */ %x DELIM /* parser's state for parsing URL*/ %x URL %x SERVER /* parser's state for parsing filepath */ %x INTAG %x QINTAG /* NONLATIN char */ NONLATINALNUM [0-9\200-\377] NONLATINALPHA [\200-\377] ALPHA [a-zA-Z\200-\377] ALNUM [0-9a-zA-Z\200-\377] HOSTNAME ([-_[:alnum:]]+\.)+[[:alpha:]]+ URI [-_[:alnum:]/%,\.;=&?#]+ %% "<"[[:alpha:]] { BEGIN INTAG; token = tsearch_yytext; tokenlen = tsearch_yyleng; return SYMTAG; } "" { token = tsearch_yytext; tokenlen = tsearch_yyleng; return SYMTAG; } "<"[^>[:alpha:]] { token = tsearch_yytext; tokenlen = tsearch_yyleng; return SPACE; } "\"" { BEGIN QINTAG; token = tsearch_yytext; tokenlen = tsearch_yyleng; return SYMTAG; } "\\\"" { token = tsearch_yytext; tokenlen = tsearch_yyleng; return SYMTAG; } "\"" { BEGIN INTAG; token = tsearch_yytext; tokenlen = tsearch_yyleng; return SYMTAG; } .|\n { token = tsearch_yytext; tokenlen = tsearch_yyleng; return SYMTAG; } ">" { BEGIN INITIAL; token = tsearch_yytext; tokenlen = tsearch_yyleng; return SYMTAG; } .|\n { token = tsearch_yytext; tokenlen = tsearch_yyleng; return SYMTAG; } [-_\.[:alnum:]]+@{HOSTNAME} /* Emails */ { token = tsearch_yytext; tokenlen = tsearch_yyleng; return EMAIL; } [0-9] /* digit's and point (might be a version) */ { token = tsearch_yytext; tokenlen = tsearch_yyleng; return FINT; } [0-9]+[0-9\.]*[0-9] /* digit's and point (might be a version) */ { token = tsearch_yytext; tokenlen = tsearch_yyleng; return FINT; } [+-]?[0-9\.]+[eE][+-]?[0-9]+ /* float */ { token = tsearch_yytext; tokenlen = tsearch_yyleng; return FLOAT; } http"://" { BEGIN URL; token = tsearch_yytext; tokenlen = tsearch_yyleng; return HTTP; } ftp"://" { BEGIN URL; token = tsearch_yytext; tokenlen = tsearch_yyleng; return HTTP; } {HOSTNAME}[/:]{URI} { BEGIN SERVER; if (s) { free(s); s=NULL; } s = strdup( tsearch_yytext ); tokenlen = tsearch_yyleng; yyless( 0 ); token = s; return FURL; } {HOSTNAME} { token = tsearch_yytext; tokenlen = tsearch_yyleng; return HOST; } [/:]{URI} { token = tsearch_yytext; tokenlen = tsearch_yyleng; return URI; } [[:alnum:]\./_-]+"/"[[:alnum:]\./_-]+ { token = tsearch_yytext; tokenlen = tsearch_yyleng; return FILEPATH; } ({NONLATINALNUM}+-)+{NONLATINALPHA}+ /* composite-word */ { BEGIN DELIM; if (s) { free(s); s=NULL; } s = strdup( tsearch_yytext ); tokenlen = tsearch_yyleng; yyless( 0 ); token = s; return DEFISNONLATINWORD; } ([[:alnum:]]+-)+[[:alpha:]]+ /* composite-word */ { BEGIN DELIM; if (s) { free(s); s=NULL; } tokenlen = tsearch_yyleng; s = strdup( tsearch_yytext ); yyless( 0 ); token = s; return DEFISLATWORD; } ({ALNUM}+-)+{ALPHA}+ /* composite-word */ { BEGIN DELIM; if (s) { free(s); s=NULL; } s = strdup( tsearch_yytext ); tokenlen = tsearch_yyleng; yyless( 0 ); token = s; return DEFISWORD; } {NONLATINALNUM}+ /* one word in composite-word */ { token = tsearch_yytext; tokenlen = tsearch_yyleng; return NONLATINPARTWORD; } [[:alnum:]]+ /* one word in composite-word */ { token = tsearch_yytext; tokenlen = tsearch_yyleng; return LATPARTWORD; } {ALNUM}+ /* one word in composite-word */ { token = tsearch_yytext; tokenlen = tsearch_yyleng; return PARTWORD; } - { token = tsearch_yytext; tokenlen = tsearch_yyleng; return SPACE; } .|\n /* return in basic state */ { BEGIN INITIAL; tokenlen = tsearch_yyleng; yyless( 0 ); } {NONLATINALNUM}+ /* normal word */ { token = tsearch_yytext; tokenlen = tsearch_yyleng; return NONLATINWORD; } [[:alnum:]]+ /* normal word */ { token = tsearch_yytext; tokenlen = tsearch_yyleng; return LATWORD; } {ALNUM}+ /* normal word */ { token = tsearch_yytext; tokenlen = tsearch_yyleng; return UWORD; } .|\n { token = tsearch_yytext; tokenlen = tsearch_yyleng; return SPACE; } %% /* clearing after parsing from string */ void end_parse() { if (s) { free(s); s=NULL; } tsearch_yy_delete_buffer( buf ); buf = NULL; } /* start parse from string */ void start_parse_str(char* str, int limit) { if (buf) end_parse(); buf = tsearch_yy_scan_bytes( str, limit ); tsearch_yy_switch_to_buffer( buf ); BEGIN INITIAL; } /* start parse from filehandle */ void start_parse_fh( FILE* fh, int limit ) { if (buf) end_parse(); lrlimit = ( limit ) ? limit : -1; buf = tsearch_yy_create_buffer( fh, YY_BUF_SIZE ); tsearch_yy_switch_to_buffer( buf ); BEGIN INITIAL; }