Split psql's lexer into two separate .l files for SQL and backslash cases.

This gets us to a point where psqlscan.l can be used by other frontend
programs for the same purpose psql uses it for, ie to detect when it's
collected a complete SQL command from input that is divided across
line boundaries.  Moreover, other programs can supply their own lexers
for backslash commands of their own choosing.  A follow-on patch will
use this in pgbench.

The end result here is roughly the same as in Kyotaro Horiguchi's
0001-Make-SQL-parser-part-of-psqlscan-independent-from-ps.patch, although
the details of the method for switching between lexers are quite different.
Basically, in this patch we share the entire PsqlScanState, YY_BUFFER_STATE
stack, *and* yyscan_t between different lexers.  The only thing we need
to do to switch to a different lexer is to make sure the start_state is
valid for the new lexer.  This works because flex doesn't keep any other
persistent state that depends on the specific lexing tables generated for
a particular .l file.  (We are assuming that both lexers are built with
the same flex version, or at least versions that are compatible with
respect to the contents of yyscan_t; but that doesn't seem likely to
be a big problem in practice, considering how slowly flex changes.)

Aside from being more efficient than Horiguchi-san's original solution,
this avoids possible corner-case changes in semantics: the original code
was capable of popping the input buffer stack while still staying in
backslash-related parsing states.  I'm not sure that that equates to any
useful user-visible behaviors, but I'm not sure it doesn't either, so
I'm loath to assume that we only need to consider the topmost buffer when
parsing a backslash command.

I've attempted to update the MSVC build scripts for the added .l file,
but will rely on the buildfarm to see if I missed anything.

Kyotaro Horiguchi and Tom Lane
This commit is contained in:
Tom Lane 2016-03-19 00:24:55 -04:00
parent 27199058d9
commit 0ea9efbe9e
12 changed files with 1008 additions and 758 deletions

View File

@ -1,4 +1,5 @@
/psqlscan.c
/psqlscanslash.c
/sql_help.h
/sql_help.c
/dumputils.c

View File

@ -23,7 +23,7 @@ override CPPFLAGS := -I. -I$(srcdir) -I$(libpq_srcdir) -I$(top_srcdir)/src/bin/p
OBJS= command.o common.o help.o input.o stringutils.o mainloop.o copy.o \
startup.o prompt.o variables.o large_obj.o print.o describe.o \
tab-complete.o mbprint.o dumputils.o keywords.o kwlookup.o \
sql_help.o psqlscan.o \
sql_help.o psqlscan.o psqlscanslash.o \
$(WIN32RES)
@ -47,12 +47,16 @@ sql_help.h: create_help.pl $(wildcard $(REFDOCDIR)/*.sgml)
psqlscan.c: FLEXFLAGS = -Cfe -p -p
psqlscan.c: FLEX_NO_BACKUP=yes
# Latest flex causes warnings in this file.
psqlscanslash.c: FLEXFLAGS = -Cfe -p -p
psqlscanslash.c: FLEX_NO_BACKUP=yes
# Latest flex causes warnings in these files.
ifeq ($(GCC),yes)
psqlscan.o: CFLAGS += -Wno-error
psqlscanslash.o: CFLAGS += -Wno-error
endif
distprep: sql_help.h psqlscan.c
distprep: sql_help.h psqlscan.c psqlscanslash.c
install: all installdirs
$(INSTALL_PROGRAM) psql$(X) '$(DESTDIR)$(bindir)/psql$(X)'
@ -64,9 +68,10 @@ installdirs:
uninstall:
rm -f '$(DESTDIR)$(bindir)/psql$(X)' '$(DESTDIR)$(datadir)/psqlrc.sample'
# psqlscan.c is in the distribution tarball, so is not cleaned here
clean distclean:
rm -f psql$(X) $(OBJS) dumputils.c keywords.c kwlookup.c lex.backup
# files removed here are supposed to be in the distribution tarball,
# so do not clean them in the clean/distclean rules
maintainer-clean: distclean
rm -f sql_help.h sql_help.c psqlscan.c
rm -f sql_help.h sql_help.c psqlscan.c psqlscanslash.c

View File

@ -45,7 +45,7 @@
#include "large_obj.h"
#include "mainloop.h"
#include "print.h"
#include "psqlscan.h"
#include "psqlscanslash.h"
#include "settings.h"
#include "variables.h"

View File

@ -2,7 +2,8 @@
CATALOG_NAME = psql
AVAIL_LANGUAGES = cs de es fr it ja pl pt_BR ru zh_CN zh_TW
GETTEXT_FILES = command.c common.c copy.c help.c input.c large_obj.c \
mainloop.c print.c psqlscan.c startup.c describe.c sql_help.h sql_help.c \
mainloop.c print.c psqlscan.c psqlscanslash.c startup.c \
describe.c sql_help.h sql_help.c \
tab-complete.c variables.c \
../../common/exec.c ../../common/fe_memutils.c ../../common/username.c \
../../common/wait_error.c

View File

@ -25,17 +25,6 @@ typedef enum
PSCAN_EOL /* end of line, SQL possibly complete */
} PsqlScanResult;
/* Different ways for scan_slash_option to handle parameter words */
enum slash_option_type
{
OT_NORMAL, /* normal case */
OT_SQLID, /* treat as SQL identifier */
OT_SQLIDHACK, /* SQL identifier, but don't downcase */
OT_FILEPIPE, /* it's a filename or pipe */
OT_WHOLE_LINE, /* just snarf the rest of the line */
OT_NO_EVAL /* no expansion of backticks or variables */
};
/* Callback functions to be used by the lexer */
typedef struct PsqlScanCallbacks
{
@ -61,15 +50,8 @@ extern PsqlScanResult psql_scan(PsqlScanState state,
extern void psql_scan_reset(PsqlScanState state);
extern void psql_scan_reselect_sql_lexer(PsqlScanState state);
extern bool psql_scan_in_quote(PsqlScanState state);
extern char *psql_scan_slash_command(PsqlScanState state);
extern char *psql_scan_slash_option(PsqlScanState state,
enum slash_option_type type,
char *quote,
bool semicolon);
extern void psql_scan_slash_command_end(PsqlScanState state);
#endif /* PSQLSCAN_H */

File diff suppressed because it is too large Load Diff

129
src/bin/psql/psqlscan_int.h Normal file
View File

@ -0,0 +1,129 @@
/*
* psqlscan_int.h
* lexical scanner internal declarations
*
* This file declares the PsqlScanStateData structure used by psqlscan.l
* and shared by other lexers compatible with it, such as psqlscanslash.l.
*
* One difficult aspect of this code is that we need to work in multibyte
* encodings that are not ASCII-safe. A "safe" encoding is one in which each
* byte of a multibyte character has the high bit set (it's >= 0x80). Since
* all our lexing rules treat all high-bit-set characters alike, we don't
* really need to care whether such a byte is part of a sequence or not.
* In an "unsafe" encoding, we still expect the first byte of a multibyte
* sequence to be >= 0x80, but later bytes might not be. If we scan such
* a sequence as-is, the lexing rules could easily be fooled into matching
* such bytes to ordinary ASCII characters. Our solution for this is to
* substitute 0xFF for each non-first byte within the data presented to flex.
* The flex rules will then pass the FF's through unmolested. The
* psqlscan_emit() subroutine is responsible for looking back to the original
* string and replacing FF's with the corresponding original bytes.
*
* Another interesting thing we do here is scan different parts of the same
* input with physically separate flex lexers (ie, lexers written in separate
* .l files). We can get away with this because the only part of the
* persistent state of a flex lexer that depends on its parsing rule tables
* is the start state number, which is easy enough to manage --- usually,
* in fact, we just need to set it to INITIAL when changing lexers. But to
* make that work at all, we must use re-entrant lexers, so that all the
* relevant state is in the yyscanner_t attached to the PsqlScanState;
* if we were using lexers with separate static state we would soon end up
* with dangling buffer pointers in one or the other. Also note that this
* is unlikely to work very nicely if the lexers aren't all built with the
* same flex version.
*
* Copyright (c) 2000-2016, PostgreSQL Global Development Group
*
* src/bin/psql/psqlscan_int.h
*/
#ifndef PSQLSCAN_INT_H
#define PSQLSCAN_INT_H
#include "psqlscan.h"
/* This is just to allow this file to be compilable standalone */
#ifndef YY_TYPEDEF_YY_BUFFER_STATE
#define YY_TYPEDEF_YY_BUFFER_STATE
typedef struct yy_buffer_state *YY_BUFFER_STATE;
#endif
/*
* We use a stack of flex buffers to handle substitution of psql variables.
* Each stacked buffer contains the as-yet-unread text from one psql variable.
* When we pop the stack all the way, we resume reading from the outer buffer
* identified by scanbufhandle.
*/
typedef struct StackElem
{
YY_BUFFER_STATE buf; /* flex input control structure */
char *bufstring; /* data actually being scanned by flex */
char *origstring; /* copy of original data, if needed */
char *varname; /* name of variable providing data, or NULL */
struct StackElem *next;
} StackElem;
/*
* All working state of the lexer must be stored in PsqlScanStateData
* between calls. This allows us to have multiple open lexer operations,
* which is needed for nested include files. The lexer itself is not
* recursive, but it must be re-entrant.
*/
typedef struct PsqlScanStateData
{
yyscan_t scanner; /* Flex's state for this PsqlScanState */
PQExpBuffer output_buf; /* current output buffer */
StackElem *buffer_stack; /* stack of variable expansion buffers */
/*
* These variables always refer to the outer buffer, never to any stacked
* variable-expansion buffer.
*/
YY_BUFFER_STATE scanbufhandle;
char *scanbuf; /* start of outer-level input buffer */
const char *scanline; /* current input line at outer level */
/* safe_encoding, curline, refline are used by emit() to replace FFs */
int encoding; /* encoding being used now */
bool safe_encoding; /* is current encoding "safe"? */
bool std_strings; /* are string literals standard? */
const char *curline; /* actual flex input string for cur buf */
const char *refline; /* original data for cur buffer */
/*
* All this state lives across successive input lines, until explicitly
* reset by psql_scan_reset. start_state is adopted by yylex() on entry,
* and updated with its finishing state on exit.
*/
int start_state; /* yylex's starting/finishing state */
int paren_depth; /* depth of nesting in parentheses */
int xcdepth; /* depth of nesting in slash-star comments */
char *dolqstart; /* current $foo$ quote start string */
/*
* Callback functions provided by the program making use of the lexer.
*/
const PsqlScanCallbacks *callbacks;
} PsqlScanStateData;
/*
* Functions exported by psqlscan.l, but only meant for use within
* compatible lexers.
*/
extern void psqlscan_push_new_buffer(PsqlScanState state,
const char *newstr, const char *varname);
extern void psqlscan_pop_buffer_stack(PsqlScanState state);
extern void psqlscan_select_top_buffer(PsqlScanState state);
extern YY_BUFFER_STATE psqlscan_prepare_buffer(PsqlScanState state,
const char *txt, int len,
char **txtcopy);
extern void psqlscan_emit(PsqlScanState state, const char *txt, int len);
extern char *psqlscan_extract_substring(PsqlScanState state,
const char *txt, int len);
extern void psqlscan_escape_variable(PsqlScanState state,
const char *txt, int len,
bool as_ident);
#endif /* PSQLSCAN_INT_H */

View File

@ -0,0 +1,35 @@
/*
* psql - the PostgreSQL interactive terminal
*
* Copyright (c) 2000-2016, PostgreSQL Global Development Group
*
* src/bin/psql/psqlscanslash.h
*/
#ifndef PSQLSCANSLASH_H
#define PSQLSCANSLASH_H
#include "psqlscan.h"
/* Different ways for scan_slash_option to handle parameter words */
enum slash_option_type
{
OT_NORMAL, /* normal case */
OT_SQLID, /* treat as SQL identifier */
OT_SQLIDHACK, /* SQL identifier, but don't downcase */
OT_FILEPIPE, /* it's a filename or pipe */
OT_WHOLE_LINE, /* just snarf the rest of the line */
OT_NO_EVAL /* no expansion of backticks or variables */
};
extern char *psql_scan_slash_command(PsqlScanState state);
extern char *psql_scan_slash_option(PsqlScanState state,
enum slash_option_type type,
char *quote,
bool semicolon);
extern void psql_scan_slash_command_end(PsqlScanState state);
#endif /* PSQLSCANSLASH_H */

View File

@ -0,0 +1,735 @@
%top{
/*-------------------------------------------------------------------------
*
* psqlscanslash.l
* lexical scanner for psql backslash commands
*
* XXX Avoid creating backtracking cases --- see the backend lexer for info.
*
* See psqlscan_int.h for additional commentary.
*
* Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* src/bin/psql/psqlscanslash.l
*
*-------------------------------------------------------------------------
*/
#include "postgres_fe.h"
#include "psqlscanslash.h"
#include "libpq-fe.h"
}
%{
#include "psqlscan_int.h"
/*
* Set the type of yyextra; we use it as a pointer back to the containing
* PsqlScanState.
*/
#define YY_EXTRA_TYPE PsqlScanState
/*
* These variables do not need to be saved across calls. Yeah, it's a bit
* of a hack, but putting them into PsqlScanStateData would be klugy too.
*/
static enum slash_option_type option_type;
static char *option_quote;
static int unquoted_option_chars;
static int backtick_start_offset;
/* Return values from yylex() */
#define LEXRES_EOL 0 /* end of input */
#define LEXRES_OK 1 /* OK completion of backslash argument */
static void evaluate_backtick(PsqlScanState state);
#define ECHO psqlscan_emit(cur_state, yytext, yyleng)
/*
* Work around a bug in flex 2.5.35: it emits a couple of functions that
* it forgets to emit declarations for. Since we use -Wmissing-prototypes,
* this would cause warnings. Providing our own declarations should be
* harmless even when the bug gets fixed.
*/
extern int slash_yyget_column(yyscan_t yyscanner);
extern void slash_yyset_column(int column_no, yyscan_t yyscanner);
%}
%option reentrant
%option 8bit
%option never-interactive
%option nodefault
%option noinput
%option nounput
%option noyywrap
%option warn
%option prefix="slash_yy"
/*
* OK, here is a short description of lex/flex rules behavior.
* The longest pattern which matches an input string is always chosen.
* For equal-length patterns, the first occurring in the rules list is chosen.
* INITIAL is the starting state, to which all non-conditional rules apply.
* Exclusive states change parsing rules while the state is active. When in
* an exclusive state, only those rules defined for that state apply.
*/
/* Exclusive states for lexing backslash commands */
%x xslashcmd
%x xslashargstart
%x xslasharg
%x xslashquote
%x xslashbackquote
%x xslashdquote
%x xslashwholeline
%x xslashend
/*
* Assorted character class definitions that should match psqlscan.l.
*/
space [ \t\n\r\f]
quote '
xeoctesc [\\][0-7]{1,3}
xehexesc [\\]x[0-9A-Fa-f]{1,2}
xqdouble {quote}{quote}
dquote \"
variable_char [A-Za-z\200-\377_0-9]
other .
%%
%{
/* Declare some local variables inside yylex(), for convenience */
PsqlScanState cur_state = yyextra;
PQExpBuffer output_buf = cur_state->output_buf;
/*
* Force flex into the state indicated by start_state. This has a
* couple of purposes: it lets some of the functions below set a
* new starting state without ugly direct access to flex variables,
* and it allows us to transition from one flex lexer to another
* so that we can lex different parts of the source string using
* separate lexers.
*/
BEGIN(cur_state->start_state);
%}
/*
* We don't really expect to be invoked in the INITIAL state in this
* lexer; but if we are, just spit data to the output_buf until EOF.
*/
{other}|\n { ECHO; }
/*
* Exclusive lexer states to handle backslash command lexing
*/
<xslashcmd>{
/* command name ends at whitespace or backslash; eat all else */
{space}|"\\" {
yyless(0);
cur_state->start_state = YY_START;
return LEXRES_OK;
}
{other} { ECHO; }
}
<xslashargstart>{
/*
* Discard any whitespace before argument, then go to xslasharg state.
* An exception is that "|" is only special at start of argument, so we
* check for it here.
*/
{space}+ { }
"|" {
if (option_type == OT_FILEPIPE)
{
/* treat like whole-string case */
ECHO;
BEGIN(xslashwholeline);
}
else
{
/* vertical bar is not special otherwise */
yyless(0);
BEGIN(xslasharg);
}
}
{other} {
yyless(0);
BEGIN(xslasharg);
}
}
<xslasharg>{
/*
* Default processing of text in a slash command's argument.
*
* Note: unquoted_option_chars counts the number of characters at the
* end of the argument that were not subject to any form of quoting.
* psql_scan_slash_option needs this to strip trailing semicolons safely.
*/
{space}|"\\" {
/*
* Unquoted space is end of arg; do not eat. Likewise
* backslash is end of command or next command, do not eat
*
* XXX this means we can't conveniently accept options
* that include unquoted backslashes; therefore, option
* processing that encourages use of backslashes is rather
* broken.
*/
yyless(0);
cur_state->start_state = YY_START;
return LEXRES_OK;
}
{quote} {
*option_quote = '\'';
unquoted_option_chars = 0;
BEGIN(xslashquote);
}
"`" {
backtick_start_offset = output_buf->len;
*option_quote = '`';
unquoted_option_chars = 0;
BEGIN(xslashbackquote);
}
{dquote} {
ECHO;
*option_quote = '"';
unquoted_option_chars = 0;
BEGIN(xslashdquote);
}
:{variable_char}+ {
/* Possible psql variable substitution */
if (option_type == OT_NO_EVAL ||
cur_state->callbacks->get_variable == NULL)
ECHO;
else
{
char *varname;
char *value;
varname = psqlscan_extract_substring(cur_state,
yytext + 1,
yyleng - 1);
value = cur_state->callbacks->get_variable(varname,
false,
false);
free(varname);
/*
* The variable value is just emitted without any
* further examination. This is consistent with the
* pre-8.0 code behavior, if not with the way that
* variables are handled outside backslash commands.
* Note that we needn't guard against recursion here.
*/
if (value)
{
appendPQExpBufferStr(output_buf, value);
free(value);
}
else
ECHO;
*option_quote = ':';
}
unquoted_option_chars = 0;
}
:'{variable_char}+' {
if (option_type == OT_NO_EVAL)
ECHO;
else
{
psqlscan_escape_variable(cur_state, yytext, yyleng, false);
*option_quote = ':';
}
unquoted_option_chars = 0;
}
:\"{variable_char}+\" {
if (option_type == OT_NO_EVAL)
ECHO;
else
{
psqlscan_escape_variable(cur_state, yytext, yyleng, true);
*option_quote = ':';
}
unquoted_option_chars = 0;
}
:'{variable_char}* {
/* Throw back everything but the colon */
yyless(1);
unquoted_option_chars++;
ECHO;
}
:\"{variable_char}* {
/* Throw back everything but the colon */
yyless(1);
unquoted_option_chars++;
ECHO;
}
{other} {
unquoted_option_chars++;
ECHO;
}
}
<xslashquote>{
/*
* single-quoted text: copy literally except for '' and backslash
* sequences
*/
{quote} { BEGIN(xslasharg); }
{xqdouble} { appendPQExpBufferChar(output_buf, '\''); }
"\\n" { appendPQExpBufferChar(output_buf, '\n'); }
"\\t" { appendPQExpBufferChar(output_buf, '\t'); }
"\\b" { appendPQExpBufferChar(output_buf, '\b'); }
"\\r" { appendPQExpBufferChar(output_buf, '\r'); }
"\\f" { appendPQExpBufferChar(output_buf, '\f'); }
{xeoctesc} {
/* octal case */
appendPQExpBufferChar(output_buf,
(char) strtol(yytext + 1, NULL, 8));
}
{xehexesc} {
/* hex case */
appendPQExpBufferChar(output_buf,
(char) strtol(yytext + 2, NULL, 16));
}
"\\". { psqlscan_emit(cur_state, yytext + 1, 1); }
{other}|\n { ECHO; }
}
<xslashbackquote>{
/*
* backticked text: copy everything until next backquote, then evaluate.
*
* XXX Possible future behavioral change: substitute for :VARIABLE?
*/
"`" {
/* In NO_EVAL mode, don't evaluate the command */
if (option_type != OT_NO_EVAL)
evaluate_backtick(cur_state);
BEGIN(xslasharg);
}
{other}|\n { ECHO; }
}
<xslashdquote>{
/* double-quoted text: copy verbatim, including the double quotes */
{dquote} {
ECHO;
BEGIN(xslasharg);
}
{other}|\n { ECHO; }
}
<xslashwholeline>{
/* copy everything until end of input line */
/* but suppress leading whitespace */
{space}+ {
if (output_buf->len > 0)
ECHO;
}
{other} { ECHO; }
}
<xslashend>{
/* at end of command, eat a double backslash, but not anything else */
"\\\\" {
cur_state->start_state = YY_START;
return LEXRES_OK;
}
{other}|\n {
yyless(0);
cur_state->start_state = YY_START;
return LEXRES_OK;
}
}
/*
* psql uses a single <<EOF>> rule, unlike the backend.
*/
<<EOF>> {
if (cur_state->buffer_stack == NULL)
{
cur_state->start_state = YY_START;
return LEXRES_EOL; /* end of input reached */
}
/*
* We were expanding a variable, so pop the inclusion
* stack and keep lexing
*/
psqlscan_pop_buffer_stack(cur_state);
psqlscan_select_top_buffer(cur_state);
}
%%
/*
* Scan the command name of a psql backslash command. This should be called
* after psql_scan() returns PSCAN_BACKSLASH. It is assumed that the input
* has been consumed through the leading backslash.
*
* The return value is a malloc'd copy of the command name, as parsed off
* from the input.
*/
char *
psql_scan_slash_command(PsqlScanState state)
{
PQExpBufferData mybuf;
/* Must be scanning already */
Assert(state->scanbufhandle != NULL);
/* Build a local buffer that we'll return the data of */
initPQExpBuffer(&mybuf);
/* Set current output target */
state->output_buf = &mybuf;
/* Set input source */
if (state->buffer_stack != NULL)
yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
else
yy_switch_to_buffer(state->scanbufhandle, state->scanner);
/*
* Set lexer start state. Note that this is sufficient to switch
* state->scanner over to using the tables in this lexer file.
*/
state->start_state = xslashcmd;
/* And lex. */
yylex(state->scanner);
/* There are no possible errors in this lex state... */
/*
* In case the caller returns to using the regular SQL lexer, reselect the
* appropriate initial state.
*/
psql_scan_reselect_sql_lexer(state);
return mybuf.data;
}
/*
* Parse off the next argument for a backslash command, and return it as a
* malloc'd string. If there are no more arguments, returns NULL.
*
* type tells what processing, if any, to perform on the option string;
* for example, if it's a SQL identifier, we want to downcase any unquoted
* letters.
*
* if quote is not NULL, *quote is set to 0 if no quoting was found, else
* the last quote symbol used in the argument.
*
* if semicolon is true, unquoted trailing semicolon(s) that would otherwise
* be taken as part of the option string will be stripped.
*
* NOTE: the only possible syntax errors for backslash options are unmatched
* quotes, which are detected when we run out of input. Therefore, on a
* syntax error we just throw away the string and return NULL; there is no
* need to worry about flushing remaining input.
*/
char *
psql_scan_slash_option(PsqlScanState state,
enum slash_option_type type,
char *quote,
bool semicolon)
{
PQExpBufferData mybuf;
int lexresult PG_USED_FOR_ASSERTS_ONLY;
int final_state;
char local_quote;
/* Must be scanning already */
Assert(state->scanbufhandle != NULL);
if (quote == NULL)
quote = &local_quote;
*quote = 0;
/* Build a local buffer that we'll return the data of */
initPQExpBuffer(&mybuf);
/* Set up static variables that will be used by yylex */
option_type = type;
option_quote = quote;
unquoted_option_chars = 0;
/* Set current output target */
state->output_buf = &mybuf;
/* Set input source */
if (state->buffer_stack != NULL)
yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
else
yy_switch_to_buffer(state->scanbufhandle, state->scanner);
/* Set lexer start state */
if (type == OT_WHOLE_LINE)
state->start_state = xslashwholeline;
else
state->start_state = xslashargstart;
/* And lex. */
lexresult = yylex(state->scanner);
/* Save final state for a moment... */
final_state = state->start_state;
/*
* In case the caller returns to using the regular SQL lexer, reselect the
* appropriate initial state.
*/
psql_scan_reselect_sql_lexer(state);
/*
* Check the lex result: we should have gotten back either LEXRES_OK
* or LEXRES_EOL (the latter indicating end of string). If we were inside
* a quoted string, as indicated by final_state, EOL is an error.
*/
Assert(lexresult == LEXRES_EOL || lexresult == LEXRES_OK);
switch (final_state)
{
case xslashargstart:
/* empty arg */
break;
case xslasharg:
/* Strip any unquoted trailing semi-colons if requested */
if (semicolon)
{
while (unquoted_option_chars-- > 0 &&
mybuf.len > 0 &&
mybuf.data[mybuf.len - 1] == ';')
{
mybuf.data[--mybuf.len] = '\0';
}
}
/*
* If SQL identifier processing was requested, then we strip out
* excess double quotes and downcase unquoted letters.
* Doubled double-quotes become output double-quotes, per spec.
*
* Note that a string like FOO"BAR"BAZ will be converted to
* fooBARbaz; this is somewhat inconsistent with the SQL spec,
* which would have us parse it as several identifiers. But
* for psql's purposes, we want a string like "foo"."bar" to
* be treated as one option, so there's little choice.
*/
if (type == OT_SQLID || type == OT_SQLIDHACK)
{
bool inquotes = false;
char *cp = mybuf.data;
while (*cp)
{
if (*cp == '"')
{
if (inquotes && cp[1] == '"')
{
/* Keep the first quote, remove the second */
cp++;
}
inquotes = !inquotes;
/* Collapse out quote at *cp */
memmove(cp, cp + 1, strlen(cp));
mybuf.len--;
/* do not advance cp */
}
else
{
if (!inquotes && type == OT_SQLID)
*cp = pg_tolower((unsigned char) *cp);
cp += PQmblen(cp, state->encoding);
}
}
}
break;
case xslashquote:
case xslashbackquote:
case xslashdquote:
/* must have hit EOL inside quotes */
state->callbacks->write_error("unterminated quoted string\n");
termPQExpBuffer(&mybuf);
return NULL;
case xslashwholeline:
/* always okay */
break;
default:
/* can't get here */
fprintf(stderr, "invalid YY_START\n");
exit(1);
}
/*
* An unquoted empty argument isn't possible unless we are at end of
* command. Return NULL instead.
*/
if (mybuf.len == 0 && *quote == 0)
{
termPQExpBuffer(&mybuf);
return NULL;
}
/* Else return the completed string. */
return mybuf.data;
}
/*
* Eat up any unused \\ to complete a backslash command.
*/
void
psql_scan_slash_command_end(PsqlScanState state)
{
/* Must be scanning already */
Assert(state->scanbufhandle != NULL);
/* Set current output target */
state->output_buf = NULL; /* we won't output anything */
/* Set input source */
if (state->buffer_stack != NULL)
yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
else
yy_switch_to_buffer(state->scanbufhandle, state->scanner);
/* Set lexer start state */
state->start_state = xslashend;
/* And lex. */
yylex(state->scanner);
/* There are no possible errors in this lex state... */
/*
* We expect the caller to return to using the regular SQL lexer, so
* reselect the appropriate initial state.
*/
psql_scan_reselect_sql_lexer(state);
}
/*
* Evaluate a backticked substring of a slash command's argument.
*
* The portion of output_buf starting at backtick_start_offset is evaluated
* as a shell command and then replaced by the command's output.
*/
static void
evaluate_backtick(PsqlScanState state)
{
PQExpBuffer output_buf = state->output_buf;
char *cmd = output_buf->data + backtick_start_offset;
PQExpBufferData cmd_output;
FILE *fd;
bool error = false;
char buf[512];
size_t result;
initPQExpBuffer(&cmd_output);
fd = popen(cmd, PG_BINARY_R);
if (!fd)
{
state->callbacks->write_error("%s: %s\n", cmd, strerror(errno));
error = true;
}
if (!error)
{
do
{
result = fread(buf, 1, sizeof(buf), fd);
if (ferror(fd))
{
state->callbacks->write_error("%s: %s\n", cmd, strerror(errno));
error = true;
break;
}
appendBinaryPQExpBuffer(&cmd_output, buf, result);
} while (!feof(fd));
}
if (fd && pclose(fd) == -1)
{
state->callbacks->write_error("%s: %s\n", cmd, strerror(errno));
error = true;
}
if (PQExpBufferDataBroken(cmd_output))
{
state->callbacks->write_error("%s: out of memory\n", cmd);
error = true;
}
/* Now done with cmd, delete it from output_buf */
output_buf->len = backtick_start_offset;
output_buf->data[output_buf->len] = '\0';
/* If no error, transfer result to output_buf */
if (!error)
{
/* strip any trailing newline */
if (cmd_output.len > 0 &&
cmd_output.data[cmd_output.len - 1] == '\n')
cmd_output.len--;
appendBinaryPQExpBuffer(output_buf, cmd_output.data, cmd_output.len);
}
termPQExpBuffer(&cmd_output);
}

View File

@ -16,7 +16,7 @@
*
* We allow any non-ASCII character, as well as ASCII letters, digits, and
* underscore. Keep this in sync with the definition of variable_char in
* psqlscan.l.
* psqlscan.l and psqlscanslash.l.
*/
static bool
valid_variable_name(const char *name)

View File

@ -64,7 +64,7 @@ my $frontend_extraincludes = {
'initdb' => ['src/timezone'],
'psql' => [ 'src/bin/pg_dump', 'src/backend' ] };
my $frontend_extrasource = {
'psql' => ['src/bin/psql/psqlscan.l'],
'psql' => ['src/bin/psql/psqlscan.l', 'src/bin/psql/psqlscanslash.l'],
'pgbench' =>
[ 'src/bin/pgbench/exprscan.l', 'src/bin/pgbench/exprparse.y' ], };
my @frontend_excludes = (

View File

@ -76,6 +76,7 @@ if %DIST%==1 if exist src\pl\plpgsql\src\pl_gram.c del /q src\pl\plpgsql\src\pl_
if %DIST%==1 if exist src\pl\plpgsql\src\pl_gram.h del /q src\pl\plpgsql\src\pl_gram.h
if %DIST%==1 if exist src\bin\psql\psqlscan.c del /q src\bin\psql\psqlscan.c
if %DIST%==1 if exist src\bin\psql\psqlscanslash.c del /q src\bin\psql\psqlscanslash.c
if %DIST%==1 if exist contrib\cube\cubescan.c del /q contrib\cube\cubescan.c
if %DIST%==1 if exist contrib\cube\cubeparse.c del /q contrib\cube\cubeparse.c