diff --git a/src/bin/psql/.cvsignore b/src/bin/psql/.cvsignore index 0447dbea53..770d87f588 100644 --- a/src/bin/psql/.cvsignore +++ b/src/bin/psql/.cvsignore @@ -1 +1,2 @@ +psqlscan.c sql_help.h diff --git a/src/bin/psql/Makefile b/src/bin/psql/Makefile index 76f0dc7a95..7856b3e9ce 100644 --- a/src/bin/psql/Makefile +++ b/src/bin/psql/Makefile @@ -5,7 +5,7 @@ # Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group # Portions Copyright (c) 1994, Regents of the University of California # -# $PostgreSQL: pgsql/src/bin/psql/Makefile,v 1.38 2003/11/29 19:52:06 pgsql Exp $ +# $PostgreSQL: pgsql/src/bin/psql/Makefile,v 1.39 2004/02/19 19:40:08 tgl Exp $ # #------------------------------------------------------------------------- @@ -19,7 +19,10 @@ override CPPFLAGS := -I$(libpq_srcdir) $(CPPFLAGS) -DFRONTEND OBJS= command.o common.o help.o input.o stringutils.o mainloop.o copy.o \ startup.o prompt.o variables.o large_obj.o print.o describe.o \ - tab-complete.o mbprint.o + psqlscan.o tab-complete.o mbprint.o + +FLEXFLAGS = -Cfe + all: submake-libpq submake-libpgport psql @@ -36,6 +39,13 @@ $(srcdir)/sql_help.h: @echo "*** Perl is needed to build psql help." endif +$(srcdir)/psqlscan.c: psqlscan.l +ifdef FLEX + $(FLEX) $(FLEXFLAGS) -o'$@' $< +else + @$(missing) flex $< $@ +endif + distprep: $(srcdir)/sql_help.h install: all installdirs @@ -47,8 +57,9 @@ installdirs: uninstall: rm -f $(DESTDIR)$(bindir)/psql$(X) +# psqlscan.c is in the distribution tarball, so is not cleaned here clean distclean: rm -f psql$(X) $(OBJS) maintainer-clean: distclean - rm -f $(srcdir)/sql_help.h + rm -f $(srcdir)/sql_help.h $(srcdir)/psqlscan.c diff --git a/src/bin/psql/command.c b/src/bin/psql/command.c index 2b8fcb9dcd..6b03e35c6f 100644 --- a/src/bin/psql/command.c +++ b/src/bin/psql/command.c @@ -3,7 +3,7 @@ * * Copyright (c) 2000-2003, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/bin/psql/command.c,v 1.112 2004/01/26 22:35:32 tgl Exp $ + * $PostgreSQL: pgsql/src/bin/psql/command.c,v 1.113 2004/02/19 19:40:08 tgl Exp $ */ #include "postgres_fe.h" #include "command.h" @@ -36,47 +36,33 @@ #include "large_obj.h" #include "mainloop.h" #include "print.h" +#include "psqlscan.h" #include "settings.h" #include "variables.h" #include "mb/pg_wchar.h" + /* functions for use in this file */ - static backslashResult exec_command(const char *cmd, - const char *options_string, - const char **continue_parse, - PQExpBuffer query_buf, - volatile int *paren_level); - -/* different ways for scan_option to handle parameter words */ -enum option_type -{ - OT_NORMAL, /* normal case */ - OT_SQLID, /* treat as SQL identifier */ - OT_SQLIDHACK, /* SQL identifier, but don't downcase */ - OT_FILEPIPE /* it's a filename or pipe */ -}; - -static char *scan_option(char **string, enum option_type type, - char *quote, bool semicolon); -static char *unescape(const unsigned char *source, size_t len); - + PsqlScanState scan_state, + PQExpBuffer query_buf); static bool do_edit(const char *filename_arg, PQExpBuffer query_buf); static bool do_connect(const char *new_dbname, const char *new_user); static bool do_shell(const char *command); + /*---------- * HandleSlashCmds: * * Handles all the different commands that start with '\', * ordinarily called by MainLoop(). * - * 'line' is the current input line, which should not start with a '\' - * but with the actual command name - * (that is taken care of by MainLoop) + * scan_state is a lexer working state that is set to continue scanning + * just after the '\'. The lexer is advanced past the command and all + * arguments on return. * * 'query_buf' contains the query-so-far, which may be modified by - * execution of the backslash command (for example, \r clears it) + * execution of the backslash command (for example, \r clears it). * query_buf can be NULL if there is no query so far. * * Returns a status code indicating what action is desired, see command.h. @@ -84,124 +70,88 @@ static bool do_shell(const char *command); */ backslashResult -HandleSlashCmds(const char *line, - PQExpBuffer query_buf, - const char **end_of_cmd, - volatile int *paren_level) +HandleSlashCmds(PsqlScanState scan_state, + PQExpBuffer query_buf) { backslashResult status = CMD_SKIP_LINE; - char *my_line; - char *options_string = NULL; - size_t blank_loc; - const char *continue_parse = NULL; /* tell the mainloop where the - * backslash command ended */ + char *cmd; + char *arg; - psql_assert(line); - my_line = pg_strdup(line); + psql_assert(scan_state); - /* - * Find the first whitespace. line[blank_loc] will now be the - * whitespace character or the \0 at the end - * - * Also look for a backslash, so stuff like \p\g works. - */ - blank_loc = strcspn(my_line, " \t\n\r\\"); + /* Parse off the command name */ + cmd = psql_scan_slash_command(scan_state); - if (my_line[blank_loc] == '\\') - { - continue_parse = &my_line[blank_loc]; - my_line[blank_loc] = '\0'; - /* If it's a double backslash, we skip it. */ - if (my_line[blank_loc + 1] == '\\') - continue_parse += 2; - } - /* do we have an option string? */ - else if (my_line[blank_loc] != '\0') - { - options_string = &my_line[blank_loc + 1]; - my_line[blank_loc] = '\0'; - } + /* And try to execute it */ + status = exec_command(cmd, scan_state, query_buf); - status = exec_command(my_line, options_string, &continue_parse, query_buf, paren_level); - - if (status == CMD_UNKNOWN) + if (status == CMD_UNKNOWN && strlen(cmd) > 1) { /* * If the command was not recognized, try to parse it as a * one-letter command with immediately following argument (a * still-supported, but no longer encouraged, syntax). */ - char new_cmd[2]; + char new_cmd[2]; - new_cmd[0] = my_line[0]; + /* don't change cmd until we know it's okay */ + new_cmd[0] = cmd[0]; new_cmd[1] = '\0'; - /* use line for options, because my_line was clobbered above */ - status = exec_command(new_cmd, line + 1, &continue_parse, query_buf, paren_level); + psql_scan_slash_pushback(scan_state, cmd + 1); - /* - * continue_parse must be relative to my_line for calculation - * below - */ - continue_parse += my_line - line; + status = exec_command(new_cmd, scan_state, query_buf); + + if (status != CMD_UNKNOWN) + { + /* adjust cmd for possible messages below */ + cmd[1] = '\0'; #if 0 /* turned out to be too annoying */ - if (status != CMD_UNKNOWN && isalpha((unsigned char) new_cmd[0])) - psql_error("Warning: This syntax is deprecated.\n"); + if (isalpha((unsigned char) cmd[0])) + psql_error("Warning: This syntax is deprecated.\n"); #endif + } } if (status == CMD_UNKNOWN) { if (pset.cur_cmd_interactive) - fprintf(stderr, gettext("Invalid command \\%s. Try \\? for help.\n"), my_line); + fprintf(stderr, gettext("Invalid command \\%s. Try \\? for help.\n"), cmd); else - psql_error("invalid command \\%s\n", my_line); + psql_error("invalid command \\%s\n", cmd); status = CMD_ERROR; } - if (continue_parse && *continue_parse && *(continue_parse + 1) == '\\') - continue_parse += 2; - - if (end_of_cmd) + /* eat the rest of the options, if any */ + while ((arg = psql_scan_slash_option(scan_state, + OT_NORMAL, NULL, false))) { - if (continue_parse) - *end_of_cmd = line + (continue_parse - my_line); - else - *end_of_cmd = line + strlen(line); + if (status != CMD_ERROR) + psql_error("\\%s: extra argument \"%s\" ignored\n", cmd, arg); + free(arg); } - free(my_line); + /* if there is a trailing \\, swallow it */ + psql_scan_slash_command_end(scan_state); + + free(cmd); return status; } - - +/* + * Subroutine to actually try to execute a backslash command. + */ static backslashResult exec_command(const char *cmd, - const char *options_string, - const char **continue_parse, - PQExpBuffer query_buf, - volatile int *paren_level) + PsqlScanState scan_state, + PQExpBuffer query_buf) { bool success = true; /* indicate here if the command ran ok or * failed */ bool quiet = QUIET(); backslashResult status = CMD_SKIP_LINE; - char *string, - *string_cpy, - *val; - - /* - * The 'string' variable will be overwritten to point to the next - * token, hence we need an extra pointer so we can free this at the - * end. - */ - if (options_string) - string = string_cpy = pg_strdup(options_string); - else - string = string_cpy = NULL; /* * \a -- toggle field alignment This makes little sense but we keep it @@ -218,7 +168,8 @@ exec_command(const char *cmd, /* \C -- override table title (formerly change HTML caption) */ else if (strcmp(cmd, "C") == 0) { - char *opt = scan_option(&string, OT_NORMAL, NULL, true); + char *opt = psql_scan_slash_option(scan_state, + OT_NORMAL, NULL, true); success = do_pset("title", opt, &pset.popt, quiet); free(opt); @@ -249,8 +200,10 @@ exec_command(const char *cmd, * files can be expected to double-quote all mixed-case \connect * arguments, and then we can get rid of OT_SQLIDHACK. */ - opt1 = scan_option(&string, OT_SQLIDHACK, &opt1q, true); - opt2 = scan_option(&string, OT_SQLIDHACK, &opt2q, true); + opt1 = psql_scan_slash_option(scan_state, + OT_SQLIDHACK, &opt1q, true); + opt2 = psql_scan_slash_option(scan_state, + OT_SQLIDHACK, &opt2q, true); if (opt2) /* gave username */ @@ -270,7 +223,8 @@ exec_command(const char *cmd, /* \cd */ else if (strcmp(cmd, "cd") == 0) { - char *opt = scan_option(&string, OT_NORMAL, NULL, true); + char *opt = psql_scan_slash_option(scan_state, + OT_NORMAL, NULL, true); char *dir; if (opt) @@ -311,9 +265,11 @@ exec_command(const char *cmd, /* \copy */ else if (strcasecmp(cmd, "copy") == 0) { - success = do_copy(options_string); - if (options_string) - string += strlen(string); + char *opt = psql_scan_slash_option(scan_state, + OT_WHOLE_LINE, NULL, false); + + success = do_copy(opt); + free(opt); } /* \copyright */ @@ -327,7 +283,8 @@ exec_command(const char *cmd, bool show_verbose; /* We don't do SQLID reduction on the pattern yet */ - pattern = scan_option(&string, OT_NORMAL, NULL, true); + pattern = psql_scan_slash_option(scan_state, + OT_NORMAL, NULL, true); show_verbose = strchr(cmd, '+') ? true : false; @@ -412,7 +369,8 @@ exec_command(const char *cmd, } else { - fname = scan_option(&string, OT_NORMAL, NULL, true); + fname = psql_scan_slash_option(scan_state, + OT_NORMAL, NULL, true); expand_tilde(&fname); status = do_edit(fname, query_buf) ? CMD_NEWEDIT : CMD_ERROR; free(fname); @@ -433,7 +391,8 @@ exec_command(const char *cmd, else fout = stdout; - while ((value = scan_option(&string, OT_NORMAL, "ed, false))) + while ((value = psql_scan_slash_option(scan_state, + OT_NORMAL, "ed, false))) { if (!quoted && strcmp(value, "-n") == 0) no_newline = true; @@ -454,7 +413,8 @@ exec_command(const char *cmd, /* \encoding -- set/show client side encoding */ else if (strcmp(cmd, "encoding") == 0) { - char *encoding = scan_option(&string, OT_NORMAL, NULL, false); + char *encoding = psql_scan_slash_option(scan_state, + OT_NORMAL, NULL, false); if (!encoding) { @@ -481,7 +441,8 @@ exec_command(const char *cmd, /* \f -- change field separator */ else if (strcmp(cmd, "f") == 0) { - char *fname = scan_option(&string, OT_NORMAL, NULL, false); + char *fname = psql_scan_slash_option(scan_state, + OT_NORMAL, NULL, false); success = do_pset("fieldsep", fname, &pset.popt, quiet); free(fname); @@ -490,7 +451,8 @@ exec_command(const char *cmd, /* \g means send query */ else if (strcmp(cmd, "g") == 0) { - char *fname = scan_option(&string, OT_FILEPIPE, NULL, false); + char *fname = psql_scan_slash_option(scan_state, + OT_FILEPIPE, NULL, false); if (!fname) pset.gfname = NULL; @@ -506,11 +468,11 @@ exec_command(const char *cmd, /* help */ else if (strcmp(cmd, "h") == 0 || strcmp(cmd, "help") == 0) { - helpSQL(options_string ? &options_string[strspn(options_string, " \t\n\r")] : NULL, - pset.popt.topt.pager); - /* set pointer to end of line */ - if (string) - string += strlen(string); + char *opt = psql_scan_slash_option(scan_state, + OT_WHOLE_LINE, NULL, false); + + helpSQL(opt, pset.popt.topt.pager); + free(opt); } /* HTML mode */ @@ -526,7 +488,8 @@ exec_command(const char *cmd, /* \i is include file */ else if (strcmp(cmd, "i") == 0 || strcmp(cmd, "include") == 0) { - char *fname = scan_option(&string, OT_NORMAL, NULL, true); + char *fname = psql_scan_slash_option(scan_state, + OT_NORMAL, NULL, true); if (!fname) { @@ -555,8 +518,10 @@ exec_command(const char *cmd, char *opt1, *opt2; - opt1 = scan_option(&string, OT_NORMAL, NULL, true); - opt2 = scan_option(&string, OT_NORMAL, NULL, true); + opt1 = psql_scan_slash_option(scan_state, + OT_NORMAL, NULL, true); + opt2 = psql_scan_slash_option(scan_state, + OT_NORMAL, NULL, true); if (strcmp(cmd + 3, "export") == 0) { @@ -611,7 +576,8 @@ exec_command(const char *cmd, /* \o -- set query output */ else if (strcmp(cmd, "o") == 0 || strcmp(cmd, "out") == 0) { - char *fname = scan_option(&string, OT_FILEPIPE, NULL, true); + char *fname = psql_scan_slash_option(scan_state, + OT_FILEPIPE, NULL, true); expand_tilde(&fname); success = setQFout(fname); @@ -631,8 +597,10 @@ exec_command(const char *cmd, /* \pset -- set printing parameters */ else if (strcmp(cmd, "pset") == 0) { - char *opt0 = scan_option(&string, OT_NORMAL, NULL, false); - char *opt1 = scan_option(&string, OT_NORMAL, NULL, false); + char *opt0 = psql_scan_slash_option(scan_state, + OT_NORMAL, NULL, false); + char *opt1 = psql_scan_slash_option(scan_state, + OT_NORMAL, NULL, false); if (!opt0) { @@ -654,8 +622,7 @@ exec_command(const char *cmd, else if (strcmp(cmd, "r") == 0 || strcmp(cmd, "reset") == 0) { resetPQExpBuffer(query_buf); - if (paren_level) - *paren_level = 0; + psql_scan_reset(scan_state); if (!quiet) puts(gettext("Query buffer reset (cleared).")); } @@ -663,7 +630,8 @@ exec_command(const char *cmd, /* \s save history in a file or show it on the screen */ else if (strcmp(cmd, "s") == 0) { - char *fname = scan_option(&string, OT_NORMAL, NULL, true); + char *fname = psql_scan_slash_option(scan_state, + OT_NORMAL, NULL, true); expand_tilde(&fname); success = saveHistory(fname ? fname : "/dev/tty"); @@ -676,7 +644,8 @@ exec_command(const char *cmd, /* \set -- generalized set variable/option command */ else if (strcmp(cmd, "set") == 0) { - char *opt0 = scan_option(&string, OT_NORMAL, NULL, false); + char *opt0 = psql_scan_slash_option(scan_state, + OT_NORMAL, NULL, false); if (!opt0) { @@ -689,14 +658,16 @@ exec_command(const char *cmd, /* * Set variable to the concatenation of the arguments. */ - char *newval = NULL; + char *newval; char *opt; - opt = scan_option(&string, OT_NORMAL, NULL, false); + opt = psql_scan_slash_option(scan_state, + OT_NORMAL, NULL, false); newval = pg_strdup(opt ? opt : ""); free(opt); - while ((opt = scan_option(&string, OT_NORMAL, NULL, false))) + while ((opt = psql_scan_slash_option(scan_state, + OT_NORMAL, NULL, false))) { newval = realloc(newval, strlen(newval) + strlen(opt) + 1); if (!newval) @@ -732,7 +703,8 @@ exec_command(const char *cmd, /* \T -- define html attributes */ else if (strcmp(cmd, "T") == 0) { - char *value = scan_option(&string, OT_NORMAL, NULL, false); + char *value = psql_scan_slash_option(scan_state, + OT_NORMAL, NULL, false); success = do_pset("tableattr", value, &pset.popt, quiet); free(value); @@ -754,7 +726,8 @@ exec_command(const char *cmd, /* \unset */ else if (strcmp(cmd, "unset") == 0) { - char *opt = scan_option(&string, OT_NORMAL, NULL, false); + char *opt = psql_scan_slash_option(scan_state, + OT_NORMAL, NULL, false); if (!opt) { @@ -783,7 +756,8 @@ exec_command(const char *cmd, } else { - fname = scan_option(&string, OT_FILEPIPE, NULL, true); + fname = psql_scan_slash_option(scan_state, + OT_FILEPIPE, NULL, true); expand_tilde(&fname); if (!fname) @@ -839,7 +813,8 @@ exec_command(const char *cmd, /* \z -- list table rights (equivalent to \dp) */ else if (strcmp(cmd, "z") == 0) { - char *pattern = scan_option(&string, OT_NORMAL, NULL, true); + char *pattern = psql_scan_slash_option(scan_state, + OT_NORMAL, NULL, true); success = permissionsList(pattern); if (pattern) @@ -849,10 +824,11 @@ exec_command(const char *cmd, /* \! -- shell escape */ else if (strcmp(cmd, "!") == 0) { - success = do_shell(options_string); - /* wind pointer to end of line */ - if (string) - string += strlen(string); + char *opt = psql_scan_slash_option(scan_state, + OT_WHOLE_LINE, NULL, false); + + success = do_shell(opt); + free(opt); } /* \? -- slash command help */ @@ -870,8 +846,8 @@ exec_command(const char *cmd, int i = 0; char *value; - fprintf(stderr, "+ optstr = |%s|\n", options_string); - while ((value = scan_option(&string, OT_NORMAL, NULL, true))) + while ((value = psql_scan_slash_option(scan_state, + OT_NORMAL, NULL, true))) { fprintf(stderr, "+ opt(%d) = |%s|\n", i++, value); free(value); @@ -885,431 +861,11 @@ exec_command(const char *cmd, if (!success) status = CMD_ERROR; - /* eat the rest of the options string */ - while ((val = scan_option(&string, OT_NORMAL, NULL, false))) - { - if (status != CMD_UNKNOWN) - psql_error("\\%s: extra argument \"%s\" ignored\n", cmd, val); - if (val) - free(val); - } - - if (options_string && continue_parse) - *continue_parse = options_string + (string - string_cpy); - free(string_cpy); - return status; } -/* - * scan_option() - * - * *string points to possible option string on entry; on exit, it's updated - * to point past the option string (if any). - * - * type tells what processing, if any, to perform on the option string; - * for example, if it's a SQL identifier, we want to downcase any unquoted - * letters. - * - * if quote is not NULL, *quote is set to 0 if no quoting was found, else - * the quote symbol. - * - * if semicolon is true, trailing semicolon(s) that would otherwise be taken - * as part of the option string will be stripped. - * - * Return value is NULL if no option found, else a malloc'd copy of the - * processed option value. - */ -static char * -scan_option(char **string, enum option_type type, char *quote, bool semicolon) -{ - unsigned int pos; - char *options_string; - char *return_val; - - if (quote) - *quote = 0; - - if (!string || !(*string)) - return NULL; - - options_string = *string; - /* skip leading whitespace */ - pos = strspn(options_string, " \t\n\r"); - - switch (options_string[pos]) - { - /* - * End of line: no option present - */ - case '\0': - *string = &options_string[pos]; - return NULL; - - /* - * Next command: treat like end of line - * - * XXX this means we can't conveniently accept options that start - * with a backslash; therefore, option processing that - * encourages use of backslashes is rather broken. - */ - case '\\': - *string = &options_string[pos]; - return NULL; - - /* - * A single quote has a psql internal meaning, such as for - * delimiting file names, and it also allows for such escape - * sequences as \t. - */ - case '\'': - { - unsigned int jj; - unsigned short int bslash_count = 0; - - for (jj = pos + 1; options_string[jj]; jj += PQmblen(&options_string[jj], pset.encoding)) - { - if (options_string[jj] == '\'' && bslash_count % 2 == 0) - break; - - if (options_string[jj] == '\\') - bslash_count++; - else - bslash_count = 0; - } - - if (options_string[jj] == 0) - { - psql_error("parse error at the end of line\n"); - *string = &options_string[jj]; - return NULL; - } - - return_val = unescape(&options_string[pos + 1], jj - pos - 1); - *string = &options_string[jj + 1]; - if (quote) - *quote = '\''; - return return_val; - } - - /* - * Backticks are for command substitution, like in shells - */ - case '`': - { - bool error = false; - FILE *fd; - char *file; - PQExpBufferData output; - char buf[512]; - size_t result, - len; - - len = strcspn(options_string + pos + 1, "`"); - if (options_string[pos + 1 + len] == 0) - { - psql_error("parse error at the end of line\n"); - *string = &options_string[pos + 1 + len]; - return NULL; - } - - options_string[pos + 1 + len] = '\0'; - file = options_string + pos + 1; - - fd = popen(file, "r"); - if (!fd) - { - psql_error("%s: %s\n", file, strerror(errno)); - error = true; - } - - initPQExpBuffer(&output); - - if (!error) - { - do - { - result = fread(buf, 1, 512, fd); - if (ferror(fd)) - { - psql_error("%s: %s\n", file, strerror(errno)); - error = true; - break; - } - appendBinaryPQExpBuffer(&output, buf, result); - } while (!feof(fd)); - appendPQExpBufferChar(&output, '\0'); - } - - if (fd && pclose(fd) == -1) - { - psql_error("%s: %s\n", file, strerror(errno)); - error = true; - } - - if (!error) - { - if (output.data[strlen(output.data) - 1] == '\n') - output.data[strlen(output.data) - 1] = '\0'; - return_val = output.data; - } - else - { - return_val = pg_strdup(""); - termPQExpBuffer(&output); - } - - options_string[pos + 1 + len] = '`'; - *string = options_string + pos + len + 2; - if (quote) - *quote = '`'; - return return_val; - } - - /* - * Variable substitution - */ - case ':': - { - size_t token_end; - const char *value; - char save_char; - - token_end = strcspn(&options_string[pos + 1], " \t\n\r"); - save_char = options_string[pos + token_end + 1]; - options_string[pos + token_end + 1] = '\0'; - value = GetVariable(pset.vars, options_string + pos + 1); - return_val = pg_strdup(value ? value : ""); - options_string[pos + token_end + 1] = save_char; - *string = &options_string[pos + token_end + 1]; - /* XXX should we set *quote to ':' here? */ - return return_val; - } - - /* - * | could be the beginning of a pipe if so, take rest of line - * as command - */ - case '|': - if (type == OT_FILEPIPE) - { - *string += strlen(*string); - return pg_strdup(options_string + pos); - } - /* fallthrough for other option types */ - - /* - * Default case: token extends to next whitespace, except that - * whitespace within double quotes doesn't end the token. - * - * If we are processing the option as a SQL identifier, then - * downcase unquoted letters and remove double-quotes --- but - * doubled double-quotes become output double-quotes, per - * spec. - * - * Note that a string like FOO"BAR"BAZ will be converted to - * fooBARbaz; this is somewhat inconsistent with the SQL spec, - * which would have us parse it as several identifiers. But - * for psql's purposes, we want a string like "foo"."bar" to - * be treated as one option, so there's little choice. - */ - default: - { - bool inquotes = false; - size_t token_len; - char *cp; - - /* Find end of option */ - - cp = &options_string[pos]; - for (;;) - { - /* Find next quote, whitespace, or end of string */ - cp += strcspn(cp, "\" \t\n\r"); - if (inquotes) - { - if (*cp == '\0') - { - psql_error("parse error at the end of line\n"); - *string = cp; - return NULL; - } - if (*cp == '"') - inquotes = false; - cp++; - } - else - { - if (*cp != '"') - break; /* whitespace or end of string */ - if (quote) - *quote = '"'; - inquotes = true; - cp++; - } - } - - *string = cp; - - /* Copy the option */ - token_len = cp - &options_string[pos]; - - return_val = pg_malloc(token_len + 1); - memcpy(return_val, &options_string[pos], token_len); - return_val[token_len] = '\0'; - - /* Strip any trailing semi-colons if requested */ - if (semicolon) - { - int i; - - for (i = token_len - 1; - i >= 0 && return_val[i] == ';'; - i--) - /* skip */ ; - - if (i < 0) - { - /* nothing left after stripping the semicolon... */ - free(return_val); - return NULL; - } - - if (i < (int) token_len - 1) - return_val[i + 1] = '\0'; - } - - /* - * If SQL identifier processing was requested, then we - * strip out excess double quotes and downcase unquoted - * letters. - */ - if (type == OT_SQLID || type == OT_SQLIDHACK) - { - inquotes = false; - cp = return_val; - - while (*cp) - { - if (*cp == '"') - { - if (inquotes && cp[1] == '"') - { - /* Keep the first quote, remove the second */ - cp++; - } - inquotes = !inquotes; - /* Collapse out quote at *cp */ - memmove(cp, cp + 1, strlen(cp)); - /* do not advance cp */ - } - else - { - if (!inquotes && type == OT_SQLID) - { - if (isupper((unsigned char) *cp)) - *cp = tolower((unsigned char) *cp); - } - cp += PQmblen(cp, pset.encoding); - } - } - } - - return return_val; - } - } -} - - - -/* - * unescape - * - * Replaces \n, \t, and the like. - * - * The return value is malloc'ed. - */ -static char * -unescape(const unsigned char *source, size_t len) -{ - const unsigned char *p; - bool esc = false; /* Last character we saw was the escape - * character */ - char *destination, - *tmp; - size_t length; - - psql_assert(source); - - length = Min(len, strlen(source)) + 1; - - tmp = destination = pg_malloc(length); - - for (p = source; p - source < (int) len && *p; p += PQmblen(p, pset.encoding)) - { - if (esc) - { - char c; - - switch (*p) - { - case 'n': - c = '\n'; - break; - case 't': - c = '\t'; - break; - case 'b': - c = '\b'; - break; - case 'r': - c = '\r'; - break; - case 'f': - c = '\f'; - break; - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - c = parse_char((char **) &p); - break; - - default: - c = *p; - } - *tmp++ = c; - esc = false; - } - - else if (*p == '\\') - esc = true; - - else - { - int i; - const unsigned char *mp = p; - - for (i = 0; i < PQmblen(p, pset.encoding); i++) - *tmp++ = *mp++; - esc = false; - } - } - - *tmp = '\0'; - return destination; -} - - - /* do_connect * -- handler for \connect * diff --git a/src/bin/psql/command.h b/src/bin/psql/command.h index dca0de8286..62a66ade3f 100644 --- a/src/bin/psql/command.h +++ b/src/bin/psql/command.h @@ -3,15 +3,14 @@ * * Copyright (c) 2000-2003, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/bin/psql/command.h,v 1.18 2003/11/29 19:52:06 pgsql Exp $ + * $PostgreSQL: pgsql/src/bin/psql/command.h,v 1.19 2004/02/19 19:40:09 tgl Exp $ */ #ifndef COMMAND_H #define COMMAND_H -#include "pqexpbuffer.h" - #include "settings.h" #include "print.h" +#include "psqlscan.h" typedef enum _backslashResult @@ -26,10 +25,8 @@ typedef enum _backslashResult } backslashResult; -extern backslashResult HandleSlashCmds(const char *line, - PQExpBuffer query_buf, - const char **end_of_cmd, - volatile int *paren_level); +extern backslashResult HandleSlashCmds(PsqlScanState scan_state, + PQExpBuffer query_buf); extern int process_file(char *filename); diff --git a/src/bin/psql/mainloop.c b/src/bin/psql/mainloop.c index b02ffe2a8c..7153181425 100644 --- a/src/bin/psql/mainloop.c +++ b/src/bin/psql/mainloop.c @@ -3,18 +3,19 @@ * * Copyright (c) 2000-2003, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/bin/psql/mainloop.c,v 1.61 2004/01/25 03:07:22 neilc Exp $ + * $PostgreSQL: pgsql/src/bin/psql/mainloop.c,v 1.62 2004/02/19 19:40:09 tgl Exp $ */ #include "postgres_fe.h" #include "mainloop.h" #include "pqexpbuffer.h" -#include "settings.h" -#include "prompt.h" -#include "input.h" -#include "common.h" #include "command.h" +#include "common.h" +#include "input.h" +#include "prompt.h" +#include "psqlscan.h" +#include "settings.h" #ifndef WIN32 #include @@ -28,48 +29,39 @@ sigjmp_buf main_loop_jmp; * * This loop is re-entrant. May be called by \i command * which reads input from a file. - * - * FIXME: rewrite this whole thing with flex */ int MainLoop(FILE *source) { + PsqlScanState scan_state; /* lexer working state */ PQExpBuffer query_buf; /* buffer for query being accumulated */ PQExpBuffer previous_buf; /* if there isn't anything in the new * buffer yet, use this one for \e, etc. */ char *line; /* current line of input */ - int len; /* length of the line */ + int added_nl_pos; + bool success; volatile int successResult = EXIT_SUCCESS; volatile backslashResult slashCmdStatus = CMD_UNKNOWN; - - bool success; - volatile char in_quote = 0; /* == 0 for no in_quote */ - volatile int in_xcomment = 0; /* in extended comment */ - volatile int paren_level = 0; - unsigned int query_start; + volatile promptStatus_t prompt_status = PROMPT_READY; volatile int count_eof = 0; - volatile unsigned int bslash_count = 0; - - int i, - prevlen, - thislen; - + volatile bool die_on_error = false; /* Save the prior command source */ FILE *prev_cmd_source; bool prev_cmd_interactive; - unsigned int prev_lineno; - volatile bool die_on_error = false; - /* Save old settings */ prev_cmd_source = pset.cur_cmd_source; prev_cmd_interactive = pset.cur_cmd_interactive; + prev_lineno = pset.lineno; /* Establish new source */ pset.cur_cmd_source = source; pset.cur_cmd_interactive = ((source == stdin) && !pset.notty); + pset.lineno = 0; + /* Create working state */ + scan_state = psql_scan_create(); query_buf = createPQExpBuffer(); previous_buf = createPQExpBuffer(); @@ -79,10 +71,6 @@ MainLoop(FILE *source) exit(EXIT_FAILURE); } - prev_lineno = pset.lineno; - pset.lineno = 0; - - /* main loop to get queries and execute them */ while (successResult == EXIT_SUCCESS) { @@ -110,17 +98,17 @@ MainLoop(FILE *source) { /* got here with longjmp */ + /* reset parsing state */ + resetPQExpBuffer(query_buf); + psql_scan_finish(scan_state); + psql_scan_reset(scan_state); + count_eof = 0; + slashCmdStatus = CMD_UNKNOWN; + prompt_status = PROMPT_READY; + if (pset.cur_cmd_interactive) { putc('\n', stdout); - resetPQExpBuffer(query_buf); - - /* reset parsing state */ - in_xcomment = 0; - in_quote = 0; - paren_level = 0; - count_eof = 0; - slashCmdStatus = CMD_UNKNOWN; } else { @@ -145,48 +133,30 @@ MainLoop(FILE *source) * input buffer */ line = pg_strdup(query_buf->data); - resetPQExpBuffer(query_buf); /* reset parsing state since we are rescanning whole line */ - in_xcomment = 0; - in_quote = 0; - paren_level = 0; + resetPQExpBuffer(query_buf); + psql_scan_reset(scan_state); slashCmdStatus = CMD_UNKNOWN; + prompt_status = PROMPT_READY; } /* - * otherwise, set interactive prompt if necessary and get another - * line + * otherwise, get another line */ else if (pset.cur_cmd_interactive) { - int prompt_status; - - if (in_quote && in_quote == '\'') - prompt_status = PROMPT_SINGLEQUOTE; - else if (in_quote && in_quote == '"') - prompt_status = PROMPT_DOUBLEQUOTE; - else if (in_xcomment) - prompt_status = PROMPT_COMMENT; - else if (paren_level) - prompt_status = PROMPT_PAREN; - else if (query_buf->len > 0) - prompt_status = PROMPT_CONTINUE; - else + /* May need to reset prompt, eg after \r command */ + if (query_buf->len == 0) prompt_status = PROMPT_READY; - line = gets_interactive(get_prompt(prompt_status)); } else line = gets_fromFile(source); - /* Setting this will not have effect until next line. */ - die_on_error = GetVariableBool(pset.vars, "ON_ERROR_STOP"); - /* * query_buf holds query already accumulated. line is the * malloc'd new line of input (note it must be freed before - * looping around!) query_start is the next command start location - * within the line. + * looping around!) */ /* No more input. Time to quit, or \i done */ @@ -214,165 +184,52 @@ MainLoop(FILE *source) pset.lineno++; /* nothing left on line? then ignore */ - if (line[0] == '\0' && !in_quote) + if (line[0] == '\0' && !psql_scan_in_quote(scan_state)) { free(line); continue; } /* echo back if flag is set */ - if (!pset.cur_cmd_interactive && VariableEquals(pset.vars, "ECHO", "all")) + if (!pset.cur_cmd_interactive && + VariableEquals(pset.vars, "ECHO", "all")) puts(line); fflush(stdout); - len = strlen(line); - query_start = 0; + /* insert newlines into query buffer between source lines */ + if (query_buf->len > 0) + { + appendPQExpBufferChar(query_buf, '\n'); + added_nl_pos = query_buf->len; + } + else + added_nl_pos = -1; /* flag we didn't add one */ + + /* Setting this will not have effect until next line. */ + die_on_error = GetVariableBool(pset.vars, "ON_ERROR_STOP"); /* * Parse line, looking for command separators. - * - * The current character is at line[i], the prior character at line[i - * - prevlen], the next character at line[i + thislen]. */ -#define ADVANCE_1 (prevlen = thislen, i += thislen, thislen = PQmblen(line+i, pset.encoding)) - + psql_scan_setup(scan_state, line, strlen(line)); success = true; - prevlen = 0; - thislen = ((len > 0) ? PQmblen(line, pset.encoding) : 0); - for (i = 0; (i < len) && (success || !die_on_error); ADVANCE_1) + while (success || !die_on_error) { - /* was the previous character a backslash? */ - if (i > 0 && line[i - prevlen] == '\\') - bslash_count++; - else - bslash_count = 0; + PsqlScanResult scan_result; + promptStatus_t prompt_tmp = prompt_status; - rescan: + scan_result = psql_scan(scan_state, query_buf, &prompt_tmp); + prompt_status = prompt_tmp; /* - * It is important to place the in_* test routines before the - * in_* detection routines. i.e. we have to test if we are in - * a quote before testing for comments. bjm 2000-06-30 + * Send command if semicolon found, or if end of line and + * we're in single-line mode. */ - - /* in quote? */ - if (in_quote) + if (scan_result == PSCAN_SEMICOLON || + (scan_result == PSCAN_EOL && + GetVariableBool(pset.vars, "SINGLELINE"))) { - /* - * end of quote if matching non-backslashed character. - * backslashes don't count for double quotes, though. - */ - if (line[i] == in_quote && - (bslash_count % 2 == 0 || in_quote == '"')) - in_quote = 0; - } - - /* start of extended comment? */ - else if (line[i] == '/' && line[i + thislen] == '*') - { - in_xcomment++; - if (in_xcomment == 1) - ADVANCE_1; - } - - /* in or end of extended comment? */ - else if (in_xcomment) - { - if (line[i] == '*' && line[i + thislen] == '/' && - !--in_xcomment) - ADVANCE_1; - } - - /* start of quote? */ - else if (line[i] == '\'' || line[i] == '"') - in_quote = line[i]; - - /* single-line comment? truncate line */ - else if (line[i] == '-' && line[i + thislen] == '-') - { - line[i] = '\0'; /* remove comment */ - break; - } - - /* count nested parentheses */ - else if (line[i] == '(') - paren_level++; - - else if (line[i] == ')' && paren_level > 0) - paren_level--; - - /* colon -> substitute variable */ - /* we need to be on the watch for the '::' operator */ - else if (line[i] == ':' && !bslash_count - && strspn(line + i + thislen, VALID_VARIABLE_CHARS) > 0 - && !(prevlen > 0 && line[i - prevlen] == ':') - ) - { - size_t in_length, - out_length; - const char *value; - char *new; - char after; /* the character after the - * variable name will be - * temporarily overwritten */ - - in_length = strspn(&line[i + thislen], VALID_VARIABLE_CHARS); - /* mark off the possible variable name */ - after = line[i + thislen + in_length]; - line[i + thislen + in_length] = '\0'; - - value = GetVariable(pset.vars, &line[i + thislen]); - - /* restore overwritten character */ - line[i + thislen + in_length] = after; - - if (value) - { - /* It is a variable, perform substitution */ - out_length = strlen(value); - - new = pg_malloc(len + out_length - in_length + 1); - sprintf(new, "%.*s%s%s", i, line, value, - &line[i + thislen + in_length]); - - free(line); - line = new; - len = strlen(new); - - if (i < len) - { - thislen = PQmblen(line + i, pset.encoding); - goto rescan; /* reparse the just substituted */ - } - } - else - { - /* - * if the variable doesn't exist we'll leave the - * string as is ... move on ... - */ - } - } - - /* semicolon? then send query */ - else if (line[i] == ';' && !bslash_count && !paren_level) - { - line[i] = '\0'; - /* is there anything else on the line? */ - if (line[query_start + strspn(line + query_start, " \t\n\r")] != '\0') - { - /* - * insert a cosmetic newline, if this is not the first - * line in the buffer - */ - if (query_buf->len > 0) - appendPQExpBufferChar(query_buf, '\n'); - /* append the line to the query buffer */ - appendPQExpBufferStr(query_buf, line + query_start); - appendPQExpBufferChar(query_buf, ';'); - } - /* execute query */ success = SendQuery(query_buf->data); slashCmdStatus = success ? CMD_SEND : CMD_ERROR; @@ -380,46 +237,26 @@ MainLoop(FILE *source) resetPQExpBuffer(previous_buf); appendPQExpBufferStr(previous_buf, query_buf->data); resetPQExpBuffer(query_buf); - query_start = i + thislen; + added_nl_pos = -1; + /* we need not do psql_scan_reset() here */ } - - /* - * if you have a burning need to send a semicolon or colon to - * the backend ... - */ - else if (bslash_count && (line[i] == ';' || line[i] == ':')) + else if (scan_result == PSCAN_BACKSLASH) { - /* remove the backslash */ - memmove(line + i - prevlen, line + i, len - i + 1); - len--; - i--; - } - - /* backslash command */ - else if (bslash_count) - { - const char *end_of_cmd = NULL; - - line[i - prevlen] = '\0'; /* overwrites backslash */ - - /* is there anything else on the line for the command? */ - if (line[query_start + strspn(line + query_start, " \t\n\r")] != '\0') - { - /* - * insert a cosmetic newline, if this is not the first - * line in the buffer - */ - if (query_buf->len > 0) - appendPQExpBufferChar(query_buf, '\n'); - /* append the line to the query buffer */ - appendPQExpBufferStr(query_buf, line + query_start); - } - /* handle backslash command */ - slashCmdStatus = HandleSlashCmds(&line[i], - query_buf->len > 0 ? query_buf : previous_buf, - &end_of_cmd, - &paren_level); + + /* + * If we added a newline to query_buf, and nothing else has + * been inserted in query_buf by the lexer, then strip off + * the newline again. This avoids any change to query_buf + * when a line contains only a backslash command. + */ + if (query_buf->len == added_nl_pos) + query_buf->data[--query_buf->len] = '\0'; + added_nl_pos = -1; + + slashCmdStatus = HandleSlashCmds(scan_state, + query_buf->len > 0 ? + query_buf : previous_buf); success = slashCmdStatus != CMD_ERROR; @@ -433,22 +270,27 @@ MainLoop(FILE *source) if (slashCmdStatus == CMD_SEND) { success = SendQuery(query_buf->data); - query_start = i + thislen; resetPQExpBuffer(previous_buf); appendPQExpBufferStr(previous_buf, query_buf->data); resetPQExpBuffer(query_buf); + + /* flush any paren nesting info after forced send */ + psql_scan_reset(scan_state); } - if (query_buf->len == 0 && previous_buf->len == 0) - paren_level = 0; - - /* process anything left after the backslash command */ - i = end_of_cmd - line; - query_start = i; + if (slashCmdStatus == CMD_TERMINATE) + break; } - } /* for (line) */ + /* fall out of loop if lexer reached EOL */ + if (scan_result == PSCAN_INCOMPLETE || + scan_result == PSCAN_EOL) + break; + } + + psql_scan_finish(scan_state); + free(line); if (slashCmdStatus == CMD_TERMINATE) { @@ -456,28 +298,6 @@ MainLoop(FILE *source) break; } - - /* Put the rest of the line in the query buffer. */ - if (in_quote || line[query_start + strspn(line + query_start, " \t\n\r")] != '\0') - { - if (query_buf->len > 0) - appendPQExpBufferChar(query_buf, '\n'); - appendPQExpBufferStr(query_buf, line + query_start); - } - - free(line); - - - /* In single line mode, send off the query if any */ - if (query_buf->data[0] != '\0' && GetVariableBool(pset.vars, "SINGLELINE")) - { - success = SendQuery(query_buf->data); - slashCmdStatus = (success ? CMD_SEND : CMD_ERROR); - resetPQExpBuffer(previous_buf); - appendPQExpBufferStr(previous_buf, query_buf->data); - resetPQExpBuffer(query_buf); - } - if (!pset.cur_cmd_interactive) { if (!success && die_on_error) @@ -515,6 +335,8 @@ MainLoop(FILE *source) destroyPQExpBuffer(query_buf); destroyPQExpBuffer(previous_buf); + psql_scan_destroy(scan_state); + pset.cur_cmd_source = prev_cmd_source; pset.cur_cmd_interactive = prev_cmd_interactive; pset.lineno = prev_lineno; diff --git a/src/bin/psql/psqlscan.h b/src/bin/psql/psqlscan.h new file mode 100644 index 0000000000..7dbfc673c5 --- /dev/null +++ b/src/bin/psql/psqlscan.h @@ -0,0 +1,65 @@ +/* + * psql - the PostgreSQL interactive terminal + * + * Copyright (c) 2000-2003, PostgreSQL Global Development Group + * + * $PostgreSQL: pgsql/src/bin/psql/psqlscan.h,v 1.1 2004/02/19 19:40:09 tgl Exp $ + */ +#ifndef PSQLSCAN_H +#define PSQLSCAN_H + +#include "pqexpbuffer.h" + +#include "prompt.h" + + +/* Abstract type for lexer's internal state */ +typedef struct PsqlScanStateData *PsqlScanState; + +/* Termination states for psql_scan() */ +typedef enum +{ + PSCAN_SEMICOLON, /* found command-ending semicolon */ + PSCAN_BACKSLASH, /* found backslash command */ + PSCAN_INCOMPLETE, /* end of line, SQL statement incomplete */ + PSCAN_EOL /* end of line, SQL possibly complete */ +} PsqlScanResult; + +/* Different ways for scan_slash_option to handle parameter words */ +enum slash_option_type +{ + OT_NORMAL, /* normal case */ + OT_SQLID, /* treat as SQL identifier */ + OT_SQLIDHACK, /* SQL identifier, but don't downcase */ + OT_FILEPIPE, /* it's a filename or pipe */ + OT_WHOLE_LINE /* just snarf the rest of the line */ +}; + + +extern PsqlScanState psql_scan_create(void); +extern void psql_scan_destroy(PsqlScanState state); + +extern void psql_scan_setup(PsqlScanState state, + const char *line, int line_len); +extern void psql_scan_finish(PsqlScanState state); + +extern PsqlScanResult psql_scan(PsqlScanState state, + PQExpBuffer query_buf, + promptStatus_t *prompt); + +extern void psql_scan_reset(PsqlScanState state); + +extern bool psql_scan_in_quote(PsqlScanState state); + +extern char *psql_scan_slash_command(PsqlScanState state); + +extern char *psql_scan_slash_option(PsqlScanState state, + enum slash_option_type type, + char *quote, + bool semicolon); + +extern void psql_scan_slash_command_end(PsqlScanState state); + +extern void psql_scan_slash_pushback(PsqlScanState state, const char *str); + +#endif /* PSQLSCAN_H */ diff --git a/src/bin/psql/psqlscan.l b/src/bin/psql/psqlscan.l new file mode 100644 index 0000000000..46886b2f94 --- /dev/null +++ b/src/bin/psql/psqlscan.l @@ -0,0 +1,1506 @@ +%{ +/*------------------------------------------------------------------------- + * + * psqlscan.l + * lexical scanner for psql + * + * This code is mainly needed to determine where the end of a SQL statement + * is: we are looking for semicolons that are not within quotes, comments, + * or parentheses. The most reliable way to handle this is to borrow the + * backend's flex lexer rules, lock, stock, and barrel. The rules below + * are (except for a few) the same as the backend's, but their actions are + * just ECHO whereas the backend's actions generally do other things. + * + * XXX The rules in this file must be kept in sync with the main parser!!! + * + * The most difficult aspect of this code is that we need to work in multibyte + * encodings that are not ASCII-safe. A "safe" encoding is one in which each + * byte of a multibyte character has the high bit set (it's >= 0x80). Since + * all our lexing rules treat all high-bit-set characters alike, we don't + * really need to care whether such a byte is part of a sequence or not. + * In an "unsafe" encoding, we still expect the first byte of a multibyte + * sequence to be >= 0x80, but later bytes might not be. If we scan such + * a sequence as-is, the lexing rules could easily be fooled into matching + * such bytes to ordinary ASCII characters. Our solution for this is to + * substitute 0xFF for each non-first byte within the data presented to flex. + * The flex rules will then pass the FF's through unmolested. The emit() + * subroutine is responsible for looking back to the original string and + * replacing FF's with the corresponding original bytes. + * + * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * $PostgreSQL: pgsql/src/bin/psql/psqlscan.l,v 1.1 2004/02/19 19:40:09 tgl Exp $ + * + *------------------------------------------------------------------------- + */ +#include "postgres_fe.h" + +#include "psqlscan.h" + +#include + +#include "mb/pg_wchar.h" + +#include "common.h" +#include "settings.h" +#include "variables.h" + + +/* + * We use a stack of flex buffers to handle substitution of psql variables. + * Each stacked buffer contains the as-yet-unread text from one psql variable. + * When we pop the stack all the way, we resume reading from the outer buffer + * identified by scanbufhandle. + */ +typedef struct StackElem +{ + YY_BUFFER_STATE buf; /* flex input control structure */ + char *bufstring; /* data actually being scanned by flex */ + char *origstring; /* copy of original data, if needed */ + struct StackElem *next; +} StackElem; + +/* + * All working state of the lexer must be stored in PsqlScanStateData + * between calls. This allows us to have multiple open lexer operations, + * which is needed for nested include files. The lexer itself is not + * recursive, but it must be re-entrant. + */ +typedef struct PsqlScanStateData +{ + StackElem *buffer_stack; /* stack of variable expansion buffers */ + /* + * These variables always refer to the outer buffer, never to any + * stacked variable-expansion buffer. + */ + YY_BUFFER_STATE scanbufhandle; + char *scanbuf; /* start of outer-level input buffer */ + const char *scanline; /* current input line at outer level */ + + /* safe_encoding, curline, refline are used by emit() to replace FFs */ + int encoding; /* encoding being used now */ + bool safe_encoding; /* is current encoding "safe"? */ + const char *curline; /* actual flex input string for cur buf */ + const char *refline; /* original data for cur buffer */ + + /* + * All this state lives across successive input lines, until explicitly + * reset by psql_scan_reset. + */ + int start_state; /* saved YY_START */ + int paren_depth; /* depth of nesting in parentheses */ + int xcdepth; /* depth of nesting in slash-star comments */ +} PsqlScanStateData; + +static PsqlScanState cur_state; /* current state while active */ + +static PQExpBuffer output_buf; /* current output buffer */ + +/* these variables do not need to be saved across calls */ +static enum slash_option_type option_type; +static char *option_quote; + + +/* Return values from yylex() */ +#define LEXRES_EOL 0 /* end of input */ +#define LEXRES_SEMI 1 /* command-terminating semicolon found */ +#define LEXRES_BACKSLASH 2 /* backslash command start */ +#define LEXRES_OK 3 /* OK completion of backslash argument */ + + +int yylex(void); + +static void push_new_buffer(const char *newstr); +static YY_BUFFER_STATE prepare_buffer(const char *txt, int len, + char **txtcopy); +static void emit(const char *txt, int len); + +#define ECHO emit(yytext, yyleng) + +%} + +%option 8bit +%option never-interactive +%option nounput +%option noyywrap + +/* + * All of the following definitions and rules should exactly match + * src/backend/parser/scan.l so far as the flex patterns are concerned. + * The rule bodies are just ECHO as opposed to what the backend does, + * however. (But be sure to duplicate code that affects the lexing process, + * such as BEGIN().) Also, psqlscan uses a single <> rule whereas + * scan.l has a separate one for each exclusive state. + */ + +/* + * OK, here is a short description of lex/flex rules behavior. + * The longest pattern which matches an input string is always chosen. + * For equal-length patterns, the first occurring in the rules list is chosen. + * INITIAL is the starting state, to which all non-conditional rules apply. + * Exclusive states change parsing rules while the state is active. When in + * an exclusive state, only those rules defined for that state apply. + * + * We use exclusive states for quoted strings, extended comments, + * and to eliminate parsing troubles for numeric strings. + * Exclusive states: + * bit string literal + * extended C-style comments + * delimited identifiers (double-quoted identifiers) + * hexadecimal numeric string + * quoted strings + */ + +%x xb +%x xc +%x xd +%x xh +%x xq +/* Additional exclusive states for psql only: lex backslash commands */ +%x xslashcmd +%x xslasharg +%x xslashquote +%x xslashbackquote +%x xslashdefaultarg +%x xslashquotedarg +%x xslashwholeline +%x xslashend + +/* + * In order to make the world safe for Windows and Mac clients as well as + * Unix ones, we accept either \n or \r as a newline. A DOS-style \r\n + * sequence will be seen as two successive newlines, but that doesn't cause + * any problems. Comments that start with -- and extend to the next + * newline are treated as equivalent to a single whitespace character. + * + * NOTE a fine point: if there is no newline following --, we will absorb + * everything to the end of the input as a comment. This is correct. Older + * versions of Postgres failed to recognize -- as a comment if the input + * did not end with a newline. + * + * XXX perhaps \f (formfeed) should be treated as a newline as well? + */ + +space [ \t\n\r\f] +horiz_space [ \t\f] +newline [\n\r] +non_newline [^\n\r] + +comment ("--"{non_newline}*) + +whitespace ({space}+|{comment}) + +/* + * SQL requires at least one newline in the whitespace separating + * string literals that are to be concatenated. Silly, but who are we + * to argue? Note that {whitespace_with_newline} should not have * after + * it, whereas {whitespace} should generally have a * after it... + */ + +special_whitespace ({space}+|{comment}{newline}) +horiz_whitespace ({horiz_space}|{comment}) +whitespace_with_newline ({horiz_whitespace}*{newline}{special_whitespace}*) + +/* Bit string + * It is tempting to scan the string for only those characters + * which are allowed. However, this leads to silently swallowed + * characters if illegal characters are included in the string. + * For example, if xbinside is [01] then B'ABCD' is interpreted + * as a zero-length string, and the ABCD' is lost! + * Better to pass the string forward and let the input routines + * validate the contents. + */ +xbstart [bB]{quote} +xbstop {quote} +xbinside [^']* +xbcat {quote}{whitespace_with_newline}{quote} + +/* Hexadecimal number + */ +xhstart [xX]{quote} +xhstop {quote} +xhinside [^']* +xhcat {quote}{whitespace_with_newline}{quote} + +/* National character + */ +xnstart [nN]{quote} + +/* Extended quote + * xqdouble implements embedded quote + * xqcat allows strings to cross input lines + */ +quote ' +xqstart {quote} +xqstop {quote} +xqdouble {quote}{quote} +xqinside [^\\']+ +xqescape [\\][^0-7] +xqoctesc [\\][0-7]{1,3} +xqcat {quote}{whitespace_with_newline}{quote} + +/* Double quote + * Allows embedded spaces and other special characters into identifiers. + */ +dquote \" +xdstart {dquote} +xdstop {dquote} +xddouble {dquote}{dquote} +xdinside [^"]+ + +/* C-style comments + * + * The "extended comment" syntax closely resembles allowable operator syntax. + * The tricky part here is to get lex to recognize a string starting with + * slash-star as a comment, when interpreting it as an operator would produce + * a longer match --- remember lex will prefer a longer match! Also, if we + * have something like plus-slash-star, lex will think this is a 3-character + * operator whereas we want to see it as a + operator and a comment start. + * The solution is two-fold: + * 1. append {op_chars}* to xcstart so that it matches as much text as + * {operator} would. Then the tie-breaker (first matching rule of same + * length) ensures xcstart wins. We put back the extra stuff with yyless() + * in case it contains a star-slash that should terminate the comment. + * 2. In the operator rule, check for slash-star within the operator, and + * if found throw it back with yyless(). This handles the plus-slash-star + * problem. + * Dash-dash comments have similar interactions with the operator rule. + */ +xcstart \/\*{op_chars}* +xcstop \*+\/ +xcinside [^*/]+ + +digit [0-9] +ident_start [A-Za-z\200-\377_] +ident_cont [A-Za-z\200-\377_0-9\$] + +identifier {ident_start}{ident_cont}* + +typecast "::" + +/* + * "self" is the set of chars that should be returned as single-character + * tokens. "op_chars" is the set of chars that can make up "Op" tokens, + * which can be one or more characters long (but if a single-char token + * appears in the "self" set, it is not to be returned as an Op). Note + * that the sets overlap, but each has some chars that are not in the other. + * + * If you change either set, adjust the character lists appearing in the + * rule for "operator"! + */ +self [,()\[\].;\:\+\-\*\/\%\^\<\>\=] +op_chars [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=] +operator {op_chars}+ + +/* we no longer allow unary minus in numbers. + * instead we pass it separately to parser. there it gets + * coerced via doNegate() -- Leon aug 20 1999 + */ + +integer {digit}+ +decimal (({digit}*\.{digit}+)|({digit}+\.{digit}*)) +real ((({digit}*\.{digit}+)|({digit}+\.{digit}*)|({digit}+))([Ee][-+]?{digit}+)) + +param \${integer} + +other . + +/* + * Quoted strings must allow some special characters such as single-quote + * and newline. + * Embedded single-quotes are implemented both in the SQL standard + * style of two adjacent single quotes "''" and in the Postgres/Java style + * of escaped-quote "\'". + * Other embedded escaped characters are matched explicitly and the leading + * backslash is dropped from the string. + * Note that xcstart must appear before operator, as explained above! + * Also whitespace (comment) must appear before operator. + */ + +%% + +{whitespace} { + /* + * Note that the whitespace rule includes both true + * whitespace and single-line ("--" style) comments. + * We suppress whitespace at the start of the query + * buffer. We also suppress all single-line comments, + * which is pretty dubious but is the historical + * behavior. + */ + if (!(output_buf->len == 0 || yytext[0] == '-')) + ECHO; + } + +{xcstart} { + cur_state->xcdepth = 0; + BEGIN(xc); + /* Put back any characters past slash-star; see above */ + yyless(2); + ECHO; + } + +{xcstart} { + cur_state->xcdepth++; + /* Put back any characters past slash-star; see above */ + yyless(2); + ECHO; + } + +{xcstop} { + if (cur_state->xcdepth <= 0) + { + BEGIN(INITIAL); + } + else + cur_state->xcdepth--; + ECHO; + } + +{xcinside} { + ECHO; + } + +{op_chars} { + ECHO; + } + +{xbstart} { + BEGIN(xb); + ECHO; + } +{xbstop} { + BEGIN(INITIAL); + ECHO; + } +{xhinside} | +{xbinside} { + ECHO; + } +{xhcat} | +{xbcat} { + ECHO; + } + +{xhstart} { + /* Hexadecimal bit type. + * At some point we should simply pass the string + * forward to the parser and label it there. + * In the meantime, place a leading "x" on the string + * to mark it for the input routine as a hex string. + */ + BEGIN(xh); + ECHO; + } +{xhstop} { + BEGIN(INITIAL); + ECHO; + } + +{xnstart} { + BEGIN(xq); + ECHO; + } + +{xqstart} { + BEGIN(xq); + ECHO; + } +{xqstop} { + BEGIN(INITIAL); + ECHO; + } +{xqdouble} { + ECHO; + } +{xqinside} { + ECHO; + } +{xqescape} { + ECHO; + } +{xqoctesc} { + ECHO; + } +{xqcat} { + ECHO; + } + +{xdstart} { + BEGIN(xd); + ECHO; + } +{xdstop} { + BEGIN(INITIAL); + ECHO; + } +{xddouble} { + ECHO; + } +{xdinside} { + ECHO; + } + +{typecast} { + ECHO; + } + + /* + * These rules are specific to psql --- they implement parenthesis + * counting and detection of command-ending semicolon. These must + * appear before the {self} rule so that they take precedence over it. + */ + +"(" { + cur_state->paren_depth++; + ECHO; + } + +")" { + if (cur_state->paren_depth > 0) + cur_state->paren_depth--; + ECHO; + } + +";" { + ECHO; + if (cur_state->paren_depth == 0) + { + /* Terminate lexing temporarily */ + return LEXRES_SEMI; + } + } + + /* + * psql-specific rules to handle backslash commands and variable + * substitution. We want these before {self}, also. + */ + +"\\"[;:] { + /* Force a semicolon or colon into the query buffer */ + emit(yytext + 1, 1); + } + +"\\" { + /* Terminate lexing temporarily */ + return LEXRES_BACKSLASH; + } + +:[A-Za-z0-9_]+ { + /* Possible psql variable substitution */ + const char *value; + + value = GetVariable(pset.vars, yytext + 1); + + if (value) + { + /* It is a variable, perform substitution */ + push_new_buffer(value); + /* yy_scan_string already made buffer active */ + } + else + { + /* + * if the variable doesn't exist we'll copy the + * string as is + */ + ECHO; + } + } + + /* + * Back to backend-compatible rules. + */ + +{self} { + ECHO; + } + +{operator} { + /* + * Check for embedded slash-star or dash-dash; those + * are comment starts, so operator must stop there. + * Note that slash-star or dash-dash at the first + * character will match a prior rule, not this one. + */ + int nchars = yyleng; + char *slashstar = strstr(yytext, "/*"); + char *dashdash = strstr(yytext, "--"); + + if (slashstar && dashdash) + { + /* if both appear, take the first one */ + if (slashstar > dashdash) + slashstar = dashdash; + } + else if (!slashstar) + slashstar = dashdash; + if (slashstar) + nchars = slashstar - yytext; + + /* + * For SQL compatibility, '+' and '-' cannot be the + * last char of a multi-char operator unless the operator + * contains chars that are not in SQL operators. + * The idea is to lex '=-' as two operators, but not + * to forbid operator names like '?-' that could not be + * sequences of SQL operators. + */ + while (nchars > 1 && + (yytext[nchars-1] == '+' || + yytext[nchars-1] == '-')) + { + int ic; + + for (ic = nchars-2; ic >= 0; ic--) + { + if (strchr("~!@#^&|`?%", yytext[ic])) + break; + } + if (ic >= 0) + break; /* found a char that makes it OK */ + nchars--; /* else remove the +/-, and check again */ + } + + if (nchars < yyleng) + { + /* Strip the unwanted chars from the token */ + yyless(nchars); + } + ECHO; + } + +{param} { + ECHO; + } + +{integer} { + ECHO; + } +{decimal} { + ECHO; + } +{real} { + ECHO; + } + + +{identifier} { + ECHO; + } + +{other} { + ECHO; + } + + + /* + * Everything from here down is psql-specific. + */ + +<> { + StackElem *stackelem = cur_state->buffer_stack; + + if (stackelem == NULL) + return LEXRES_EOL; /* end of input reached */ + + /* + * We were expanding a variable, so pop the inclusion + * stack and keep lexing + */ + cur_state->buffer_stack = stackelem->next; + yy_delete_buffer(stackelem->buf); + free(stackelem->bufstring); + if (stackelem->origstring) + free(stackelem->origstring); + free(stackelem); + + stackelem = cur_state->buffer_stack; + if (stackelem != NULL) + { + yy_switch_to_buffer(stackelem->buf); + cur_state->curline = stackelem->bufstring; + cur_state->refline = stackelem->origstring ? stackelem->origstring : stackelem->bufstring; + } + else + { + yy_switch_to_buffer(cur_state->scanbufhandle); + cur_state->curline = cur_state->scanbuf; + cur_state->refline = cur_state->scanline; + } + } + + /* + * Exclusive lexer states to handle backslash command lexing + */ + +{ + /* command name ends at whitespace or backslash; eat all else */ + +{space}|"\\" { + yyless(0); + return LEXRES_OK; + } + +{other} { ECHO; } + +} + +{ + /* eat any whitespace, then decide what to do at first nonblank */ + +{space}+ { } + +"\\" { + /* + * backslash is end of command or next command, do not eat + * + * XXX this means we can't conveniently accept options + * that start with a backslash; therefore, option + * processing that encourages use of backslashes is rather + * broken. + */ + yyless(0); + return LEXRES_OK; + } + +{quote} { + *option_quote = '\''; + BEGIN(xslashquote); + } + +"`" { + *option_quote = '`'; + BEGIN(xslashbackquote); + } + +:[A-Za-z0-9_]* { + /* Possible psql variable substitution */ + const char *value; + + value = GetVariable(pset.vars, yytext + 1); + + /* + * The variable value is just emitted without any + * further examination. This is consistent with the + * pre-7.5 code behavior, if not with the way that + * variables are handled outside backslash commands. + */ + if (value) + appendPQExpBufferStr(output_buf, value); + + *option_quote = ':'; + + return LEXRES_OK; + } + +"|" { + ECHO; + if (option_type == OT_FILEPIPE) + { + /* treat like whole-string case */ + BEGIN(xslashwholeline); + } + else + { + /* treat like default case */ + BEGIN(xslashdefaultarg); + } + } + +{dquote} { + *option_quote = '"'; + ECHO; + BEGIN(xslashquotedarg); + } + +{other} { + ECHO; + BEGIN(xslashdefaultarg); + } + +} + +{ + /* single-quoted text: copy literally except for backslash sequences */ + +{quote} { return LEXRES_OK; } + +"\\n" { appendPQExpBufferChar(output_buf, '\n'); } +"\\t" { appendPQExpBufferChar(output_buf, '\t'); } +"\\b" { appendPQExpBufferChar(output_buf, '\b'); } +"\\r" { appendPQExpBufferChar(output_buf, '\r'); } +"\\f" { appendPQExpBufferChar(output_buf, '\f'); } + +"\\"[1-9][0-9]* { + /* decimal case */ + appendPQExpBufferChar(output_buf, + (char) strtol(yytext + 1, NULL, 0)); + } + +"\\"0[0-7]* { + /* octal case */ + appendPQExpBufferChar(output_buf, + (char) strtol(yytext + 1, NULL, 0)); + } + +"\\"0[xX][0-9A-Fa-f]+ { + /* hex case */ + appendPQExpBufferChar(output_buf, + (char) strtol(yytext + 1, NULL, 0)); + } + +"\\". { emit(yytext + 1, 1); } + +{other} { ECHO; } + +} + +{ + /* + * backticked text: copy everything until next backquote or end of line. + * Invocation of the command will happen in psql_scan_slash_option. + */ + +"`" { return LEXRES_OK; } + +{other} { ECHO; } + +} + +{ + /* + * Copy everything until unquoted whitespace or end of line. Quotes + * do not get stripped yet. + */ + +{space} { + yyless(0); + return LEXRES_OK; + } + +"\\" { + /* + * unquoted backslash is end of command or next command, + * do not eat + * + * (this was not the behavior pre-7.5, but it seems + * consistent) + */ + yyless(0); + return LEXRES_OK; + } + +{dquote} { + *option_quote = '"'; + ECHO; + BEGIN(xslashquotedarg); + } + +{other} { ECHO; } + +} + +{ + /* double-quoted text within a default-type argument: copy */ + +{dquote} { + ECHO; + BEGIN(xslashdefaultarg); + } + +{other} { ECHO; } + +} + +{ + /* copy everything until end of input line */ + /* but suppress leading whitespace */ + +{space}+ { + if (output_buf->len > 0) + ECHO; + } + +{other} { ECHO; } + +} + +{ + /* at end of command, eat a double backslash, but not anything else */ + +"\\\\" { return LEXRES_OK; } + +{other} { + yyless(0); + return LEXRES_OK; + } + +} + +%% + +/* + * Create a lexer working state struct. + */ +PsqlScanState +psql_scan_create(void) +{ + PsqlScanState state; + + state = (PsqlScanStateData *) pg_malloc_zero(sizeof(PsqlScanStateData)); + + psql_scan_reset(state); + + return state; +} + +/* + * Destroy a lexer working state struct, releasing all resources. + */ +void +psql_scan_destroy(PsqlScanState state) +{ + psql_scan_finish(state); + + free(state); +} + +/* + * Set up to perform lexing of the given input line. + * + * The text at *line, extending for line_len bytes, will be scanned by + * subsequent calls to the psql_scan routines. psql_scan_finish should + * be called when scanning is complete. Note that the lexer retains + * a pointer to the storage at *line --- this string must not be altered + * or freed until after psql_scan_finish is called. + */ +void +psql_scan_setup(PsqlScanState state, + const char *line, int line_len) +{ + /* Mustn't be scanning already */ + psql_assert(state->scanbufhandle == NULL); + psql_assert(state->buffer_stack == NULL); + + /* Do we need to hack the character set encoding? */ + state->encoding = pset.encoding; + state->safe_encoding = PG_VALID_BE_ENCODING(state->encoding); + + /* needed for prepare_buffer */ + cur_state = state; + + /* Set up flex input buffer with appropriate translation and padding */ + state->scanbufhandle = prepare_buffer(line, line_len, + &state->scanbuf); + state->scanline = line; + + /* Set lookaside data in case we have to map unsafe encoding */ + state->curline = state->scanbuf; + state->refline = state->scanline; +} + +/* + * Do lexical analysis of SQL command text. + * + * The text previously passed to psql_scan_setup is scanned, and appended + * (possibly with transformation) to query_buf. + * + * The return value indicates the condition that stopped scanning: + * + * PSCAN_SEMICOLON: found a command-ending semicolon. (The semicolon is + * transferred to query_buf.) The command accumulated in query_buf should + * be executed, then clear query_buf and call again to scan the remainder + * of the line. + * + * PSCAN_BACKSLASH: found a backslash that starts a psql special command. + * Any previous data on the line has been transferred to query_buf. + * The caller will typically next call psql_scan_slash_command(), + * perhaps psql_scan_slash_option(), and psql_scan_slash_command_end(). + * + * PSCAN_INCOMPLETE: the end of the line was reached, but we have an + * incomplete SQL command. *prompt is set to the appropriate prompt type. + * + * PSCAN_EOL: the end of the line was reached, and there is no lexical + * reason to consider the command incomplete. The caller may or may not + * choose to send it. *prompt is set to the appropriate prompt type if + * the caller chooses to collect more input. + * + * In the PSCAN_INCOMPLETE and PSCAN_EOL cases, psql_scan_finish() should + * be called next, then the cycle may be repeated with a fresh input line. + * + * In all cases, *prompt is set to an appropriate prompt type code for the + * next line-input operation. + */ +PsqlScanResult +psql_scan(PsqlScanState state, + PQExpBuffer query_buf, + promptStatus_t *prompt) +{ + PsqlScanResult result; + int lexresult; + + /* Must be scanning already */ + psql_assert(state->scanbufhandle); + + /* Set up static variables that will be used by yylex */ + cur_state = state; + output_buf = query_buf; + + if (state->buffer_stack != NULL) + yy_switch_to_buffer(state->buffer_stack->buf); + else + yy_switch_to_buffer(state->scanbufhandle); + + BEGIN(state->start_state); + + /* And lex. */ + lexresult = yylex(); + + /* Update static vars back to the state struct */ + state->start_state = YY_START; + + /* + * Check termination state and return appropriate result info. + */ + switch (lexresult) + { + case LEXRES_EOL: /* end of input */ + switch (state->start_state) + { + case INITIAL: + if (state->paren_depth > 0) + { + result = PSCAN_INCOMPLETE; + *prompt = PROMPT_PAREN; + } + else if (query_buf->len > 0) + { + result = PSCAN_EOL; + *prompt = PROMPT_CONTINUE; + } + else + { + /* never bother to send an empty buffer */ + result = PSCAN_INCOMPLETE; + *prompt = PROMPT_READY; + } + break; + case xb: + result = PSCAN_INCOMPLETE; + *prompt = PROMPT_SINGLEQUOTE; + break; + case xc: + result = PSCAN_INCOMPLETE; + *prompt = PROMPT_COMMENT; + break; + case xd: + result = PSCAN_INCOMPLETE; + *prompt = PROMPT_DOUBLEQUOTE; + break; + case xh: + result = PSCAN_INCOMPLETE; + *prompt = PROMPT_SINGLEQUOTE; + break; + case xq: + result = PSCAN_INCOMPLETE; + *prompt = PROMPT_SINGLEQUOTE; + break; + default: + /* can't get here */ + fprintf(stderr, "invalid YY_START\n"); + exit(1); + } + break; + case LEXRES_SEMI: /* semicolon */ + result = PSCAN_SEMICOLON; + *prompt = PROMPT_READY; + break; + case LEXRES_BACKSLASH: /* backslash */ + result = PSCAN_BACKSLASH; + *prompt = PROMPT_READY; + break; + default: + /* can't get here */ + fprintf(stderr, "invalid yylex result\n"); + exit(1); + } + + return result; +} + +/* + * Clean up after scanning a string. This flushes any unread input and + * releases resources (but not the PsqlScanState itself). Note however + * that this does not reset the lexer scan state; that can be done by + * psql_scan_reset(), which is an orthogonal operation. + * + * It is legal to call this when not scanning anything (makes it easier + * to deal with error recovery). + */ +void +psql_scan_finish(PsqlScanState state) +{ + /* Drop any incomplete variable expansions. */ + while (state->buffer_stack != NULL) + { + StackElem *stackelem = state->buffer_stack; + + state->buffer_stack = stackelem->next; + yy_delete_buffer(stackelem->buf); + free(stackelem->bufstring); + if (stackelem->origstring) + free(stackelem->origstring); + free(stackelem); + } + + /* Done with the outer scan buffer, too */ + if (state->scanbufhandle) + yy_delete_buffer(state->scanbufhandle); + state->scanbufhandle = NULL; + if (state->scanbuf) + free(state->scanbuf); + state->scanbuf = NULL; +} + +/* + * Reset lexer scanning state to start conditions. This is appropriate + * for executing \r psql commands (or any other time that we discard the + * prior contents of query_buf). It is not, however, necessary to do this + * when we execute and clear the buffer after getting a PSCAN_SEMICOLON or + * PSCAN_EOL scan result, because the scan state must be INITIAL when those + * conditions are returned. + * + * Note that this is unrelated to flushing unread input; that task is + * done by psql_scan_finish(). + */ +void +psql_scan_reset(PsqlScanState state) +{ + state->start_state = INITIAL; + state->paren_depth = 0; + state->xcdepth = 0; /* not really necessary */ +} + +/* + * Return true if lexer is currently in an "inside quotes" state. + * + * This is pretty grotty but is needed to preserve the old behavior + * that mainloop.c drops blank lines not inside quotes without even + * echoing them. + */ +bool +psql_scan_in_quote(PsqlScanState state) +{ + return state->start_state != INITIAL; +} + +/* + * Scan the command name of a psql backslash command. This should be called + * after psql_scan() returns PSCAN_BACKSLASH. It is assumed that the input + * has been consumed through the leading backslash. + * + * The return value is a malloc'd copy of the command name, as parsed off + * from the input. + */ +char * +psql_scan_slash_command(PsqlScanState state) +{ + PQExpBufferData mybuf; + int lexresult; + + /* Must be scanning already */ + psql_assert(state->scanbufhandle); + + /* Build a local buffer that we'll return the data of */ + initPQExpBuffer(&mybuf); + + /* Set up static variables that will be used by yylex */ + cur_state = state; + output_buf = &mybuf; + + if (state->buffer_stack != NULL) + yy_switch_to_buffer(state->buffer_stack->buf); + else + yy_switch_to_buffer(state->scanbufhandle); + + BEGIN(xslashcmd); + + /* And lex. */ + lexresult = yylex(); + + /* There are no possible errors in this lex state... */ + + return mybuf.data; +} + +/* + * Parse off the next argument for a backslash command, and return it as a + * malloc'd string. If there are no more arguments, returns NULL. + * + * type tells what processing, if any, to perform on the option string; + * for example, if it's a SQL identifier, we want to downcase any unquoted + * letters. + * + * if quote is not NULL, *quote is set to 0 if no quoting was found, else + * the quote symbol. + * + * if semicolon is true, unquoted trailing semicolon(s) that would otherwise + * be taken as part of the option string will be stripped. + * + * NOTE: the only possible syntax errors for backslash options are unmatched + * quotes, which are detected when we run out of input. Therefore, on a + * syntax error we just throw away the string and return NULL; there is no + * need to worry about flushing remaining input. + */ +char * +psql_scan_slash_option(PsqlScanState state, + enum slash_option_type type, + char *quote, + bool semicolon) +{ + PQExpBufferData mybuf; + int lexresult; + char local_quote; + bool badarg; + + /* Must be scanning already */ + psql_assert(state->scanbufhandle); + + if (quote == NULL) + quote = &local_quote; + *quote = 0; + + /* Build a local buffer that we'll return the data of */ + initPQExpBuffer(&mybuf); + + /* Set up static variables that will be used by yylex */ + cur_state = state; + output_buf = &mybuf; + option_type = type; + option_quote = quote; + + if (state->buffer_stack != NULL) + yy_switch_to_buffer(state->buffer_stack->buf); + else + yy_switch_to_buffer(state->scanbufhandle); + + if (type == OT_WHOLE_LINE) + BEGIN(xslashwholeline); + else + BEGIN(xslasharg); + + /* And lex. */ + lexresult = yylex(); + + /* + * Check the lex result: we should have gotten back either LEXRES_OK + * or LEXRES_EOL (the latter indicating end of string). If we were inside + * a quoted string, as indicated by YY_START, EOL is an error. + */ + psql_assert(lexresult == LEXRES_EOL || lexresult == LEXRES_OK); + badarg = false; + switch (YY_START) + { + case xslasharg: + /* empty arg, or possibly a psql variable substitution */ + break; + case xslashquote: + if (lexresult != LEXRES_OK) + badarg = true; /* hit EOL not ending quote */ + break; + case xslashbackquote: + if (lexresult != LEXRES_OK) + badarg = true; /* hit EOL not ending quote */ + else + { + /* Perform evaluation of backticked command */ + char *cmd = mybuf.data; + FILE *fd; + bool error = false; + PQExpBufferData output; + char buf[512]; + size_t result; + + fd = popen(cmd, "r"); + if (!fd) + { + psql_error("%s: %s\n", cmd, strerror(errno)); + error = true; + } + + initPQExpBuffer(&output); + + if (!error) + { + do + { + result = fread(buf, 1, sizeof(buf), fd); + if (ferror(fd)) + { + psql_error("%s: %s\n", cmd, strerror(errno)); + error = true; + break; + } + appendBinaryPQExpBuffer(&output, buf, result); + } while (!feof(fd)); + } + + if (fd && pclose(fd) == -1) + { + psql_error("%s: %s\n", cmd, strerror(errno)); + error = true; + } + + /* Now done with cmd, transfer result to mybuf */ + resetPQExpBuffer(&mybuf); + + if (!error) + { + /* strip any trailing newline */ + if (output.len > 0 && + output.data[output.len - 1] == '\n') + output.len--; + appendBinaryPQExpBuffer(&mybuf, output.data, output.len); + } + + termPQExpBuffer(&output); + } + break; + case xslashdefaultarg: + /* Strip any trailing semi-colons if requested */ + if (semicolon) + { + while (mybuf.len > 0 && + mybuf.data[mybuf.len - 1] == ';') + { + mybuf.data[--mybuf.len] = '\0'; + } + } + + /* + * If SQL identifier processing was requested, then we strip out + * excess double quotes and downcase unquoted letters. + * Doubled double-quotes become output double-quotes, per spec. + * + * Note that a string like FOO"BAR"BAZ will be converted to + * fooBARbaz; this is somewhat inconsistent with the SQL spec, + * which would have us parse it as several identifiers. But + * for psql's purposes, we want a string like "foo"."bar" to + * be treated as one option, so there's little choice. + */ + if (type == OT_SQLID || type == OT_SQLIDHACK) + { + bool inquotes = false; + char *cp = mybuf.data; + + while (*cp) + { + if (*cp == '"') + { + if (inquotes && cp[1] == '"') + { + /* Keep the first quote, remove the second */ + cp++; + } + inquotes = !inquotes; + /* Collapse out quote at *cp */ + memmove(cp, cp + 1, strlen(cp)); + mybuf.len--; + /* do not advance cp */ + } + else + { + if (!inquotes && type == OT_SQLID) + { + if (isupper((unsigned char) *cp)) + *cp = tolower((unsigned char) *cp); + } + cp += PQmblen(cp, pset.encoding); + } + } + } + break; + case xslashquotedarg: + /* must have hit EOL inside double quotes */ + badarg = true; + break; + case xslashwholeline: + /* always okay */ + break; + default: + /* can't get here */ + fprintf(stderr, "invalid YY_START\n"); + exit(1); + } + + if (badarg) + { + psql_error("unterminated quoted string\n"); + termPQExpBuffer(&mybuf); + return NULL; + } + + /* + * An unquoted empty argument isn't possible unless we are at end of + * command. Return NULL instead. + */ + if (mybuf.len == 0 && *quote == 0) + { + termPQExpBuffer(&mybuf); + return NULL; + } + + /* Else return the completed string. */ + return mybuf.data; +} + +/* + * Eat up any unused \\ to complete a backslash command. + */ +void +psql_scan_slash_command_end(PsqlScanState state) +{ + int lexresult; + + /* Must be scanning already */ + psql_assert(state->scanbufhandle); + + /* Set up static variables that will be used by yylex */ + cur_state = state; + output_buf = NULL; + + if (state->buffer_stack != NULL) + yy_switch_to_buffer(state->buffer_stack->buf); + else + yy_switch_to_buffer(state->scanbufhandle); + + BEGIN(xslashend); + + /* And lex. */ + lexresult = yylex(); + + /* There are no possible errors in this lex state... */ +} + +/* + * "Push back" the passed string so that it will be rescanned by subsequent + * psql_scan_slash_option calls. This is presently only used in the case + * where a single-letter command has been concatenated with its argument. + * + * We use the same buffer stack mechanism as for variable expansion. + */ +void +psql_scan_slash_pushback(PsqlScanState state, const char *str) +{ + /* needed for push_new_buffer */ + cur_state = state; + + push_new_buffer(str); +} + + +/* + * Push the given string onto the stack of stuff to scan. + * + * cur_state must point to the active PsqlScanState. + * + * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer. + */ +static void +push_new_buffer(const char *newstr) +{ + StackElem *stackelem; + + stackelem = (StackElem *) pg_malloc(sizeof(StackElem)); + stackelem->buf = prepare_buffer(newstr, strlen(newstr), + &stackelem->bufstring); + cur_state->curline = stackelem->bufstring; + if (cur_state->safe_encoding) + { + stackelem->origstring = NULL; + cur_state->refline = stackelem->bufstring; + } + else + { + stackelem->origstring = pg_strdup(newstr); + cur_state->refline = stackelem->origstring; + } + stackelem->next = cur_state->buffer_stack; + cur_state->buffer_stack = stackelem; +} + +/* + * Set up a flex input buffer to scan the given data. We always make a + * copy of the data. If working in an unsafe encoding, the copy has + * multibyte sequences replaced by FFs to avoid fooling the lexer rules. + * + * cur_state must point to the active PsqlScanState. + * + * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer. + */ +static YY_BUFFER_STATE +prepare_buffer(const char *txt, int len, char **txtcopy) +{ + char *newtxt; + + /* Flex wants two \0 characters after the actual data */ + newtxt = pg_malloc(len + 2); + *txtcopy = newtxt; + newtxt[len] = newtxt[len + 1] = YY_END_OF_BUFFER_CHAR; + + if (cur_state->safe_encoding) + memcpy(newtxt, txt, len); + else + { + /* Gotta do it the hard way */ + int i = 0; + + while (i < len) + { + int thislen = PQmblen(txt + i, cur_state->encoding); + + /* first byte should always be okay... */ + newtxt[i] = txt[i]; + i++; + while (--thislen > 0) + newtxt[i++] = (char) 0xFF; + } + } + + return yy_scan_buffer(newtxt, len + 2); +} + +/* + * emit() --- body for ECHO macro + * + * NB: this must be used for ALL and ONLY the text copied from the flex + * input data. If you pass it something that is not part of the yytext + * string, you are making a mistake. Internally generated text can be + * appended directly to output_buf. + */ +static void +emit(const char *txt, int len) +{ + if (cur_state->safe_encoding) + appendBinaryPQExpBuffer(output_buf, txt, len); + else + { + /* Gotta do it the hard way */ + const char *reference = cur_state->refline; + int i; + + reference += (txt - cur_state->curline); + + for (i = 0; i < len; i++) + { + char ch = txt[i]; + + if (ch == (char) 0xFF) + ch = reference[i]; + appendPQExpBufferChar(output_buf, ch); + } + } +} diff --git a/src/bin/psql/startup.c b/src/bin/psql/startup.c index a29ff23d79..400f7d4578 100644 --- a/src/bin/psql/startup.c +++ b/src/bin/psql/startup.c @@ -3,7 +3,7 @@ * * Copyright (c) 2000-2003, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/bin/psql/startup.c,v 1.84 2004/02/12 19:58:16 momjian Exp $ + * $PostgreSQL: pgsql/src/bin/psql/startup.c,v 1.85 2004/02/19 19:40:09 tgl Exp $ */ #include "postgres_fe.h" @@ -238,11 +238,20 @@ main(int argc, char *argv[]) */ else if (options.action == ACT_SINGLE_SLASH) { + PsqlScanState scan_state; + if (VariableEquals(pset.vars, "ECHO", "all")) puts(options.action_string); - successResult = HandleSlashCmds(options.action_string, NULL, NULL, NULL) != CMD_ERROR + scan_state = psql_scan_create(); + psql_scan_setup(scan_state, + options.action_string, + strlen(options.action_string)); + + successResult = HandleSlashCmds(scan_state, NULL) != CMD_ERROR ? EXIT_SUCCESS : EXIT_FAILURE; + + psql_scan_destroy(scan_state); } /*