postgresql/src/bin/psql/copy.c

717 lines
17 KiB
C
Raw Normal View History

2000-01-19 00:30:24 +01:00
/*
* psql - the PostgreSQL interactive terminal
*
* Copyright (c) 2000-2019, PostgreSQL Global Development Group
2000-01-19 00:30:24 +01:00
*
2010-09-20 22:08:53 +02:00
* src/bin/psql/copy.c
2000-01-19 00:30:24 +01:00
*/
#include "postgres_fe.h"
#include "copy.h"
#include <signal.h>
#include <sys/stat.h>
#ifndef WIN32
1999-11-05 00:14:30 +01:00
#include <unistd.h> /* for isatty */
#else
1999-11-05 00:14:30 +01:00
#include <io.h> /* I think */
#endif
2000-02-16 14:15:26 +01:00
#include "libpq-fe.h"
#include "pqexpbuffer.h"
#include "settings.h"
#include "common.h"
#include "prompt.h"
#include "stringutils.h"
/*
* parse_slash_copy
* -- parses \copy command line
*
* The documented syntax is:
* \copy tablename [(columnlist)] from|to filename [options]
* \copy ( query stmt ) to filename [options]
*
* where 'filename' can be one of the following:
* '<file path>' | PROGRAM '<command>' | stdin | stdout | pstdout | pstdout
* and 'query' can be one of the following:
* SELECT | UPDATE | INSERT | DELETE
*
* An undocumented fact is that you can still write BINARY before the
* tablename; this is a hangover from the pre-7.3 syntax. The options
* syntax varies across backend versions, but we avoid all that mess
* by just transmitting the stuff after the filename literally.
*
* table name can be double-quoted and can have a schema part.
* column names can be double-quoted.
* filename can be single-quoted like SQL literals.
* command must be single-quoted like SQL literals.
*
* returns a malloc'ed structure with the options, or NULL on parsing error
*/
1999-11-05 00:14:30 +01:00
struct copy_options
{
char *before_tofrom; /* COPY string before TO/FROM */
char *after_tofrom; /* COPY string after TO/FROM filename */
char *file; /* NULL = stdin/stdout */
bool program; /* is 'file' a program to popen? */
bool psql_inout; /* true = use psql stdin/stdout */
bool from; /* true = FROM, false = TO */
};
static void
2017-06-21 20:39:04 +02:00
free_copy_options(struct copy_options *ptr)
{
1999-11-05 00:14:30 +01:00
if (!ptr)
return;
free(ptr->before_tofrom);
free(ptr->after_tofrom);
1999-11-05 00:14:30 +01:00
free(ptr->file);
free(ptr);
}
/* concatenate "more" onto "var", freeing the original value of *var */
static void
xstrcat(char **var, const char *more)
{
char *newvar;
newvar = psprintf("%s%s", *var, more);
free(*var);
*var = newvar;
}
static struct copy_options *
2000-01-19 00:30:24 +01:00
parse_slash_copy(const char *args)
{
1999-11-05 00:14:30 +01:00
struct copy_options *result;
char *token;
const char *whitespace = " \t\n\r";
char nonstd_backslash = standard_strings() ? 0 : '\\';
1999-11-05 00:14:30 +01:00
if (!args)
{
psql_error("\\copy: arguments required\n");
return NULL;
2001-03-22 05:01:46 +01:00
}
1999-11-05 00:14:30 +01:00
result = pg_malloc0(sizeof(struct copy_options));
1999-11-05 00:14:30 +01:00
Phase 2 of pgindent updates. Change pg_bsd_indent to follow upstream rules for placement of comments to the right of code, and remove pgindent hack that caused comments following #endif to not obey the general rule. Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using the published version of pg_bsd_indent, but a hacked-up version that tried to minimize the amount of movement of comments to the right of code. The situation of interest is where such a comment has to be moved to the right of its default placement at column 33 because there's code there. BSD indent has always moved right in units of tab stops in such cases --- but in the previous incarnation, indent was working in 8-space tab stops, while now it knows we use 4-space tabs. So the net result is that in about half the cases, such comments are placed one tab stop left of before. This is better all around: it leaves more room on the line for comment text, and it means that in such cases the comment uniformly starts at the next 4-space tab stop after the code, rather than sometimes one and sometimes two tabs after. Also, ensure that comments following #endif are indented the same as comments following other preprocessor commands such as #else. That inconsistency turns out to have been self-inflicted damage from a poorly-thought-through post-indent "fixup" in pgindent. This patch is much less interesting than the first round of indent changes, but also bulkier, so I thought it best to separate the effects. Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
result->before_tofrom = pg_strdup(""); /* initialize for appending */
token = strtokx(args, whitespace, ".,()", "\"",
0, false, false, pset.encoding);
1999-11-05 00:14:30 +01:00
if (!token)
goto error;
/* The following can be removed when we drop 7.3 syntax support */
if (pg_strcasecmp(token, "binary") == 0)
{
xstrcat(&result->before_tofrom, token);
token = strtokx(NULL, whitespace, ".,()", "\"",
0, false, false, pset.encoding);
if (!token)
goto error;
}
/* Handle COPY (query) case */
if (token[0] == '(')
{
2006-10-04 02:30:14 +02:00
int parens = 1;
while (parens > 0)
{
xstrcat(&result->before_tofrom, " ");
xstrcat(&result->before_tofrom, token);
token = strtokx(NULL, whitespace, "()", "\"'",
nonstd_backslash, true, false, pset.encoding);
if (!token)
goto error;
if (token[0] == '(')
parens++;
else if (token[0] == ')')
parens--;
}
}
xstrcat(&result->before_tofrom, " ");
xstrcat(&result->before_tofrom, token);
token = strtokx(NULL, whitespace, ".,()", "\"",
0, false, false, pset.encoding);
if (!token)
goto error;
/*
2005-10-15 04:49:52 +02:00
* strtokx() will not have returned a multi-character token starting with
* '.', so we don't need strcmp() here. Likewise for '(', etc, below.
*/
if (token[0] == '.')
1999-11-05 00:14:30 +01:00
{
/* handle schema . table */
xstrcat(&result->before_tofrom, token);
token = strtokx(NULL, whitespace, ".,()", "\"",
0, false, false, pset.encoding);
1999-11-05 00:14:30 +01:00
if (!token)
goto error;
xstrcat(&result->before_tofrom, token);
token = strtokx(NULL, whitespace, ".,()", "\"",
0, false, false, pset.encoding);
if (!token)
goto error;
}
1999-11-05 00:14:30 +01:00
if (token[0] == '(')
{
/* handle parenthesized column list */
for (;;)
{
xstrcat(&result->before_tofrom, " ");
xstrcat(&result->before_tofrom, token);
token = strtokx(NULL, whitespace, "()", "\"",
0, false, false, pset.encoding);
if (!token)
goto error;
if (token[0] == ')')
break;
}
xstrcat(&result->before_tofrom, " ");
xstrcat(&result->before_tofrom, token);
token = strtokx(NULL, whitespace, ".,()", "\"",
0, false, false, pset.encoding);
if (!token)
goto error;
}
if (pg_strcasecmp(token, "from") == 0)
result->from = true;
else if (pg_strcasecmp(token, "to") == 0)
result->from = false;
else
goto error;
/* { 'filename' | PROGRAM 'command' | STDIN | STDOUT | PSTDIN | PSTDOUT } */
token = strtokx(NULL, whitespace, ";", "'",
0, false, false, pset.encoding);
if (!token)
goto error;
if (pg_strcasecmp(token, "program") == 0)
{
int toklen;
token = strtokx(NULL, whitespace, ";", "'",
0, false, false, pset.encoding);
if (!token)
goto error;
/*
* The shell command must be quoted. This isn't fool-proof, but
* catches most quoting errors.
*/
toklen = strlen(token);
if (token[0] != '\'' || toklen < 2 || token[toklen - 1] != '\'')
goto error;
strip_quotes(token, '\'', 0, pset.encoding);
result->program = true;
result->file = pg_strdup(token);
}
else if (pg_strcasecmp(token, "stdin") == 0 ||
pg_strcasecmp(token, "stdout") == 0)
{
result->file = NULL;
}
else if (pg_strcasecmp(token, "pstdin") == 0 ||
2004-08-29 07:07:03 +02:00
pg_strcasecmp(token, "pstdout") == 0)
{
result->psql_inout = true;
result->file = NULL;
}
else
{
/* filename can be optionally quoted */
strip_quotes(token, '\'', 0, pset.encoding);
result->file = pg_strdup(token);
expand_tilde(&result->file);
}
/* Collect the rest of the line (COPY options) */
token = strtokx(NULL, "", NULL, NULL,
0, false, false, pset.encoding);
if (token)
result->after_tofrom = pg_strdup(token);
return result;
error:
if (token)
psql_error("\\copy: parse error at \"%s\"\n", token);
else
psql_error("\\copy: parse error at end of line\n");
free_copy_options(result);
return NULL;
1999-11-05 00:14:30 +01:00
}
/*
* Execute a \copy command (frontend copy). We have to open a file (or execute
* a command), then submit a COPY query to the backend and either feed it data
* from the file or route its response into the file.
1999-11-05 00:14:30 +01:00
*/
bool
2000-01-19 00:30:24 +01:00
do_copy(const char *args)
{
PQExpBufferData query;
1999-11-05 00:14:30 +01:00
FILE *copystream;
struct copy_options *options;
bool success;
2004-08-29 07:07:03 +02:00
1999-11-05 00:14:30 +01:00
/* parse options */
2000-01-19 00:30:24 +01:00
options = parse_slash_copy(args);
1999-11-05 00:14:30 +01:00
if (!options)
return false;
/* prepare to read or write the target file */
if (options->file && !options->program)
canonicalize_path(options->file);
1999-11-05 00:14:30 +01:00
if (options->from)
{
if (options->file)
{
if (options->program)
{
fflush(stdout);
fflush(stderr);
errno = 0;
copystream = popen(options->file, PG_BINARY_R);
}
else
copystream = fopen(options->file, PG_BINARY_R);
}
else if (!options->psql_inout)
2004-08-29 07:07:03 +02:00
copystream = pset.cur_cmd_source;
else
2004-08-29 07:07:03 +02:00
copystream = stdin;
}
1999-11-05 00:14:30 +01:00
else
{
if (options->file)
{
if (options->program)
{
fflush(stdout);
fflush(stderr);
errno = 0;
disable_sigpipe_trap();
copystream = popen(options->file, PG_BINARY_W);
}
else
copystream = fopen(options->file, PG_BINARY_W);
}
else if (!options->psql_inout)
2004-08-29 07:07:03 +02:00
copystream = pset.queryFout;
else
copystream = stdout;
}
1999-11-05 00:14:30 +01:00
if (!copystream)
{
if (options->program)
psql_error("could not execute command \"%s\": %s\n",
options->file, strerror(errno));
else
psql_error("%s: %s\n",
options->file, strerror(errno));
1999-11-05 00:14:30 +01:00
free_copy_options(options);
return false;
}
if (!options->program)
{
struct stat st;
int result;
/* make sure the specified file is not a directory */
if ((result = fstat(fileno(copystream), &st)) < 0)
2014-08-29 06:01:34 +02:00
psql_error("could not stat file \"%s\": %s\n",
options->file, strerror(errno));
if (result == 0 && S_ISDIR(st.st_mode))
psql_error("%s: cannot copy from/to a directory\n",
options->file);
if (result < 0 || S_ISDIR(st.st_mode))
{
fclose(copystream);
free_copy_options(options);
return false;
}
}
/* build the command we will send to the backend */
initPQExpBuffer(&query);
printfPQExpBuffer(&query, "COPY ");
appendPQExpBufferStr(&query, options->before_tofrom);
if (options->from)
appendPQExpBufferStr(&query, " FROM STDIN ");
else
appendPQExpBufferStr(&query, " TO STDOUT ");
if (options->after_tofrom)
appendPQExpBufferStr(&query, options->after_tofrom);
/* run it like a user command, but with copystream as data source/sink */
pset.copyStream = copystream;
success = SendQuery(query.data);
pset.copyStream = NULL;
termPQExpBuffer(&query);
1999-11-05 00:14:30 +01:00
2004-08-29 07:07:03 +02:00
if (options->file != NULL)
{
if (options->program)
{
int pclose_rc = pclose(copystream);
if (pclose_rc != 0)
{
if (pclose_rc < 0)
psql_error("could not close pipe to external command: %s\n",
strerror(errno));
else
{
char *reason = wait_result_to_str(pclose_rc);
psql_error("%s: %s\n", options->file,
reason ? reason : "");
if (reason)
free(reason);
}
success = false;
}
restore_sigpipe_trap();
}
else
{
if (fclose(copystream) != 0)
{
psql_error("%s: %s\n", options->file, strerror(errno));
success = false;
}
}
}
1999-11-05 00:14:30 +01:00
free_copy_options(options);
return success;
}
/*
* Functions for handling COPY IN/OUT data transfer.
*
* If you want to use COPY TO STDOUT/FROM STDIN in your application,
* this is the code to steal ;)
*/
/*
* handleCopyOut
* receives data as a result of a COPY ... TO STDOUT command
*
* conn should be a database connection that you just issued COPY TO on
* and got back a PGRES_COPY_OUT result.
*
* copystream is the file stream for the data to go to.
* copystream can be NULL to eat the data without writing it anywhere.
*
* The final status for the COPY is returned into *res (but note
* we already reported the error, if it's not a success result).
*
* result is true if successful, false if not.
*/
bool
handleCopyOut(PGconn *conn, FILE *copystream, PGresult **res)
{
2006-10-04 02:30:14 +02:00
bool OK = true;
char *buf;
int ret;
for (;;)
{
ret = PQgetCopyData(conn, &buf, 0);
1999-11-05 00:14:30 +01:00
if (ret < 0)
break; /* done or server/connection error */
if (buf)
1999-11-05 00:14:30 +01:00
{
if (OK && copystream && fwrite(buf, 1, ret, copystream) != ret)
{
psql_error("could not write COPY data: %s\n",
strerror(errno));
/* complain only once, keep reading data from server */
OK = false;
}
PQfreemem(buf);
1999-11-05 00:14:30 +01:00
}
}
if (OK && copystream && fflush(copystream))
{
psql_error("could not write COPY data: %s\n",
strerror(errno));
OK = false;
}
if (ret == -2)
{
psql_error("COPY data transfer failed: %s", PQerrorMessage(conn));
OK = false;
}
/*
* Check command status and return to normal libpq state.
*
* If for some reason libpq is still reporting PGRES_COPY_OUT state, we
* would like to forcibly exit that state, since our caller would be
* unable to distinguish that situation from reaching the next COPY in a
* command string that happened to contain two consecutive COPY TO STDOUT
* commands. However, libpq provides no API for doing that, and in
* principle it's a libpq bug anyway if PQgetCopyData() returns -1 or -2
* but hasn't exited COPY_OUT state internally. So we ignore the
* possibility here.
*/
*res = PQgetResult(conn);
if (PQresultStatus(*res) != PGRES_COMMAND_OK)
{
psql_error("%s", PQerrorMessage(conn));
OK = false;
}
2006-10-04 02:30:14 +02:00
return OK;
}
/*
* handleCopyIn
* sends data to complete a COPY ... FROM STDIN command
*
* conn should be a database connection that you just issued COPY FROM on
* and got back a PGRES_COPY_IN result.
* copystream is the file stream to read the data from.
* isbinary can be set from PQbinaryTuples().
* The final status for the COPY is returned into *res (but note
* we already reported the error, if it's not a success result).
*
* result is true if successful, false if not.
*/
/* read chunk size for COPY IN - size is not critical */
#define COPYBUFSIZ 8192
bool
handleCopyIn(PGconn *conn, FILE *copystream, bool isbinary, PGresult **res)
{
bool OK;
char buf[COPYBUFSIZ];
bool showprompt;
1999-11-05 00:14:30 +01:00
/*
2006-10-04 02:30:14 +02:00
* Establish longjmp destination for exiting from wait-for-input. (This is
* only effective while sigint_interrupt_enabled is TRUE.)
*/
if (sigsetjmp(sigint_interrupt_jmp, 1) != 0)
{
/* got here with longjmp */
/* Terminate data transfer */
PQputCopyEnd(conn,
(PQprotocolVersion(conn) < 3) ? NULL :
_("canceled by user"));
OK = false;
goto copyin_cleanup;
}
/* Prompt if interactive input */
if (isatty(fileno(copystream)))
{
showprompt = true;
if (!pset.quiet)
puts(_("Enter data to be copied followed by a newline.\n"
"End with a backslash and a period on a line by itself, or an EOF signal."));
}
else
showprompt = false;
OK = true;
if (isbinary)
{
/* interactive input probably silly, but give one prompt anyway */
if (showprompt)
{
Support \if ... \elif ... \else ... \endif in psql scripting. This patch adds nestable conditional blocks to psql. The control structure feature per se is complete, but the boolean expressions understood by \if and \elif are pretty primitive; basically, after variable substitution and backtick expansion, the result has to be "true" or "false" or one of the other standard spellings of a boolean value. But that's enough for many purposes, since you can always do the heavy lifting on the server side; and we can extend it later. Along the way, pay down some of the technical debt that had built up around psql/command.c: * Refactor exec_command() into a function per command, instead of being a 1500-line monstrosity. This makes the file noticeably longer because of repetitive function header/trailer overhead, but it seems much more readable. * Teach psql_get_variable() and psqlscanslash.l to suppress variable substitution and backtick expansion on the basis of the conditional stack state, thereby allowing removal of the OT_NO_EVAL kluge. * Fix the no-doubt-once-expedient hack of sometimes silently substituting mainloop.c's previous_buf for query_buf when calling HandleSlashCmds. (It's a bit remarkable that commands like \r worked at all with that.) Recall of a previous query is now done explicitly in the slash commands where that should happen. Corey Huinker, reviewed by Fabien Coelho, further hacking by me Discussion: https://postgr.es/m/CADkLM=c94OSRTnat=LX0ivNq4pxDNeoomFfYvBKM5N_xfmLtAA@mail.gmail.com
2017-03-30 18:59:11 +02:00
const char *prompt = get_prompt(PROMPT_COPY, NULL);
2015-05-24 03:35:49 +02:00
1999-11-05 00:14:30 +01:00
fputs(prompt, stdout);
fflush(stdout);
}
for (;;)
{
2006-10-04 02:30:14 +02:00
int buflen;
/* enable longjmp while waiting for input */
sigint_interrupt_enabled = true;
buflen = fread(buf, 1, COPYBUFSIZ, copystream);
sigint_interrupt_enabled = false;
if (buflen <= 0)
break;
if (PQputCopyData(conn, buf, buflen) <= 0)
1999-11-05 00:14:30 +01:00
{
OK = false;
1999-11-05 00:14:30 +01:00
break;
}
}
}
else
{
bool copydone = false;
while (!copydone)
2006-10-04 02:30:14 +02:00
{ /* for each input line ... */
bool firstload;
bool linedone;
if (showprompt)
1999-11-05 00:14:30 +01:00
{
Support \if ... \elif ... \else ... \endif in psql scripting. This patch adds nestable conditional blocks to psql. The control structure feature per se is complete, but the boolean expressions understood by \if and \elif are pretty primitive; basically, after variable substitution and backtick expansion, the result has to be "true" or "false" or one of the other standard spellings of a boolean value. But that's enough for many purposes, since you can always do the heavy lifting on the server side; and we can extend it later. Along the way, pay down some of the technical debt that had built up around psql/command.c: * Refactor exec_command() into a function per command, instead of being a 1500-line monstrosity. This makes the file noticeably longer because of repetitive function header/trailer overhead, but it seems much more readable. * Teach psql_get_variable() and psqlscanslash.l to suppress variable substitution and backtick expansion on the basis of the conditional stack state, thereby allowing removal of the OT_NO_EVAL kluge. * Fix the no-doubt-once-expedient hack of sometimes silently substituting mainloop.c's previous_buf for query_buf when calling HandleSlashCmds. (It's a bit remarkable that commands like \r worked at all with that.) Recall of a previous query is now done explicitly in the slash commands where that should happen. Corey Huinker, reviewed by Fabien Coelho, further hacking by me Discussion: https://postgr.es/m/CADkLM=c94OSRTnat=LX0ivNq4pxDNeoomFfYvBKM5N_xfmLtAA@mail.gmail.com
2017-03-30 18:59:11 +02:00
const char *prompt = get_prompt(PROMPT_COPY, NULL);
2015-05-24 03:35:49 +02:00
fputs(prompt, stdout);
fflush(stdout);
}
2006-10-04 02:30:14 +02:00
firstload = true;
linedone = false;
while (!linedone)
2006-10-04 02:30:14 +02:00
{ /* for each bufferload in line ... */
int linelen;
char *fgresult;
/* enable longjmp while waiting for input */
sigint_interrupt_enabled = true;
fgresult = fgets(buf, sizeof(buf), copystream);
sigint_interrupt_enabled = false;
if (!fgresult)
{
1999-11-05 00:14:30 +01:00
copydone = true;
break;
}
linelen = strlen(buf);
/* current line is done? */
2006-10-04 02:30:14 +02:00
if (linelen > 0 && buf[linelen - 1] == '\n')
linedone = true;
/* check for EOF marker, but not on a partial line */
if (firstload)
{
/*
* This code erroneously assumes '\.' on a line alone
* inside a quoted CSV string terminates the \copy.
* http://www.postgresql.org/message-id/E1TdNVQ-0001ju-GO@wrigleys.postgresql.org
*/
if (strcmp(buf, "\\.\n") == 0 ||
strcmp(buf, "\\.\r\n") == 0)
{
copydone = true;
break;
}
2006-10-04 02:30:14 +02:00
firstload = false;
}
2006-10-04 02:30:14 +02:00
if (PQputCopyData(conn, buf, linelen) <= 0)
{
OK = false;
copydone = true;
break;
}
}
2006-10-04 02:30:14 +02:00
if (copystream == pset.cur_cmd_source)
{
pset.lineno++;
pset.stmt_lineno++;
}
}
}
/* Check for read error */
if (ferror(copystream))
OK = false;
/*
* Terminate data transfer. We can't send an error message if we're using
* protocol version 2.
*/
if (PQputCopyEnd(conn,
(OK || PQprotocolVersion(conn) < 3) ? NULL :
_("aborted because of read failure")) <= 0)
OK = false;
copyin_cleanup:
/*
* Clear the EOF flag on the stream, in case copying ended due to an EOF
* signal. This allows an interactive TTY session to perform another COPY
* FROM STDIN later. (In non-STDIN cases, we're about to close the file
* anyway, so it doesn't matter.) Although we don't ever test the flag
* with feof(), some fread() implementations won't read more data if it's
* set. This also clears the error flag, but we already checked that.
*/
clearerr(copystream);
/*
* Check command status and return to normal libpq state.
*
* We do not want to return with the status still PGRES_COPY_IN: our
* caller would be unable to distinguish that situation from reaching the
* next COPY in a command string that happened to contain two consecutive
* COPY FROM STDIN commands. We keep trying PQputCopyEnd() in the hope
* it'll work eventually. (What's actually likely to happen is that in
* attempting to flush the data, libpq will eventually realize that the
* connection is lost. But that's fine; it will get us out of COPY_IN
* state, which is what we need.)
*/
while (*res = PQgetResult(conn), PQresultStatus(*res) == PGRES_COPY_IN)
{
OK = false;
PQclear(*res);
/* We can't send an error message if we're using protocol version 2 */
PQputCopyEnd(conn,
(PQprotocolVersion(conn) < 3) ? NULL :
_("trying to exit copy mode"));
}
if (PQresultStatus(*res) != PGRES_COMMAND_OK)
{
psql_error("%s", PQerrorMessage(conn));
OK = false;
}
return OK;
}