postgresql/contrib/seg/segscan.l
Michael Paquier ae6d06f096 Handle \v as a whitespace character in parsers
This commit comes as a continuation of the discussion that has led to
d522b05, as \v was handled inconsistently when parsing array values or
anything going through the parsers, and changing a parser behavior in
stable branches is a scary thing to do.  The parsing of array values now
uses the more central scanner_isspace() and array_isspace() is removed.

As pointing out by Peter Eisentraut, fix a confusing reference to
horizontal space in the parsers with the term "horiz_space".  \f was
included in this set since 3cfdd8f from 2000, but it is not horizontal.
"horiz_space" is renamed to "non_newline_space", to refer to all
whitespace characters except newlines.

The changes impact the parsers for the backend, psql, seg, cube, ecpg
and replication commands.  Note that JSON should not escape \v, as per
RFC 7159, so these are not touched.

Reviewed-by: Peter Eisentraut, Tom Lane
Discussion: https://postgr.es/m/ZJKcjNwWHHvw9ksQ@paquier.xyz
2023-07-06 08:16:24 +09:00

130 lines
2.8 KiB
Plaintext

%top{
/*
* A scanner for EMP-style numeric ranges
*/
#include "postgres.h"
#include "nodes/miscnodes.h"
/*
* NB: include segparse.h only AFTER including segdata.h, because segdata.h
* contains the definition for SEG.
*/
#include "segdata.h"
#include "segparse.h"
}
%{
/* LCOV_EXCL_START */
/* No reason to constrain amount of data slurped */
#define YY_READ_BUF_SIZE 16777216
/* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */
#undef fprintf
#define fprintf(file, fmt, msg) fprintf_to_ereport(fmt, msg)
static void
fprintf_to_ereport(const char *fmt, const char *msg)
{
ereport(ERROR, (errmsg_internal("%s", msg)));
}
/* Handles to the buffer that the lexer uses internally */
static YY_BUFFER_STATE scanbufhandle;
static char *scanbuf;
%}
%option 8bit
%option never-interactive
%option nodefault
%option noinput
%option nounput
%option noyywrap
%option warn
%option prefix="seg_yy"
range (\.\.)(\.)?
plumin (\'\+\-\')|(\(\+\-)\)
integer [+-]?[0-9]+
real [+-]?[0-9]+\.[0-9]+
float ({integer}|{real})([eE]{integer})?
%%
{range} seg_yylval.text = yytext; return RANGE;
{plumin} seg_yylval.text = yytext; return PLUMIN;
{float} seg_yylval.text = yytext; return SEGFLOAT;
\< seg_yylval.text = "<"; return EXTENSION;
\> seg_yylval.text = ">"; return EXTENSION;
\~ seg_yylval.text = "~"; return EXTENSION;
[ \t\n\r\f\v]+ /* discard spaces */
. return yytext[0]; /* alert parser of the garbage */
%%
/* LCOV_EXCL_STOP */
void
seg_yyerror(SEG *result, struct Node *escontext, const char *message)
{
/* if we already reported an error, don't overwrite it */
if (SOFT_ERROR_OCCURRED(escontext))
return;
if (*yytext == YY_END_OF_BUFFER_CHAR)
{
errsave(escontext,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("bad seg representation"),
/* translator: %s is typically "syntax error" */
errdetail("%s at end of input", message)));
}
else
{
errsave(escontext,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("bad seg representation"),
/* translator: first %s is typically "syntax error" */
errdetail("%s at or near \"%s\"", message, yytext)));
}
}
/*
* Called before any actual parsing is done
*/
void
seg_scanner_init(const char *str)
{
Size slen = strlen(str);
/*
* Might be left over after ereport()
*/
if (YY_CURRENT_BUFFER)
yy_delete_buffer(YY_CURRENT_BUFFER);
/*
* Make a scan buffer with special termination needed by flex.
*/
scanbuf = palloc(slen + 2);
memcpy(scanbuf, str, slen);
scanbuf[slen] = scanbuf[slen + 1] = YY_END_OF_BUFFER_CHAR;
scanbufhandle = yy_scan_buffer(scanbuf, slen + 2);
BEGIN(INITIAL);
}
/*
* Called after parsing is done to clean up after seg_scanner_init()
*/
void
seg_scanner_finish(void)
{
yy_delete_buffer(scanbufhandle);
pfree(scanbuf);
}