Add new COPY option SAVE_ERROR_TO
Currently, when source data contains unexpected data regarding data type or range, the entire COPY fails. However, in some cases, such data can be ignored and just copying normal data is preferable. This commit adds a new option SAVE_ERROR_TO, which specifies where to save the error information. When this option is specified, COPY skips soft errors and continues copying. Currently, SAVE_ERROR_TO only supports "none". This indicates error information is not saved and COPY just skips the unexpected data and continues running. Later works are expected to add more choices, such as 'log' and 'table'. Author: Damir Belyalov, Atsushi Torikoshi, Alex Shulgin, Jian He Discussion: https://postgr.es/m/87k31ftoe0.fsf_-_%40commandprompt.com Reviewed-by: Pavel Stehule, Andres Freund, Tom Lane, Daniel Gustafsson, Reviewed-by: Alena Rybakina, Andy Fan, Andrei Lepikhov, Masahiko Sawada Reviewed-by: Vignesh C, Atsushi Torikoshi
This commit is contained in:
parent
c7e5e994b2
commit
9e2d870119
|
@ -43,6 +43,7 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
|
|||
FORCE_QUOTE { ( <replaceable class="parameter">column_name</replaceable> [, ...] ) | * }
|
||||
FORCE_NOT_NULL { ( <replaceable class="parameter">column_name</replaceable> [, ...] ) | * }
|
||||
FORCE_NULL { ( <replaceable class="parameter">column_name</replaceable> [, ...] ) | * }
|
||||
SAVE_ERROR_TO '<replaceable class="parameter">location</replaceable>'
|
||||
ENCODING '<replaceable class="parameter">encoding_name</replaceable>'
|
||||
</synopsis>
|
||||
</refsynopsisdiv>
|
||||
|
@ -373,6 +374,25 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
|
|||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><literal>SAVE_ERROR_TO</literal></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Specifies to save error information to <replaceable class="parameter">
|
||||
location</replaceable> when there is malformed data in the input.
|
||||
Currently, only <literal>error</literal> (default) and <literal>none</literal>
|
||||
values are supported.
|
||||
If the <literal>error</literal> value is specified,
|
||||
<command>COPY</command> stops operation at the first error.
|
||||
If the <literal>none</literal> value is specified,
|
||||
<command>COPY</command> skips malformed data and continues copying data.
|
||||
The option is allowed only in <command>COPY FROM</command>.
|
||||
The <literal>none</literal> value is allowed only when
|
||||
not using <literal>binary</literal> format.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><literal>ENCODING</literal></term>
|
||||
<listitem>
|
||||
|
@ -556,7 +576,8 @@ COPY <replaceable class="parameter">count</replaceable>
|
|||
</para>
|
||||
|
||||
<para>
|
||||
<command>COPY</command> stops operation at the first error. This
|
||||
<command>COPY</command> stops operation at the first error when
|
||||
<literal>SAVE_ERROR_TO</literal> is not specified. This
|
||||
should not lead to problems in the event of a <command>COPY
|
||||
TO</command>, but the target table will already have received
|
||||
earlier rows in a <command>COPY FROM</command>. These rows will not
|
||||
|
|
|
@ -394,6 +394,42 @@ defGetCopyHeaderChoice(DefElem *def, bool is_from)
|
|||
return COPY_HEADER_FALSE; /* keep compiler quiet */
|
||||
}
|
||||
|
||||
/*
|
||||
* Extract a CopySaveErrorToChoice value from a DefElem.
|
||||
*/
|
||||
static CopySaveErrorToChoice
|
||||
defGetCopySaveErrorToChoice(DefElem *def, ParseState *pstate, bool is_from)
|
||||
{
|
||||
char *sval;
|
||||
|
||||
if (!is_from)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("COPY SAVE_ERROR_TO cannot be used with COPY TO"),
|
||||
parser_errposition(pstate, def->location)));
|
||||
|
||||
/*
|
||||
* If no parameter value given, assume the default value.
|
||||
*/
|
||||
if (def->arg == NULL)
|
||||
return COPY_SAVE_ERROR_TO_ERROR;
|
||||
|
||||
/*
|
||||
* Allow "error", or "none" values.
|
||||
*/
|
||||
sval = defGetString(def);
|
||||
if (pg_strcasecmp(sval, "error") == 0)
|
||||
return COPY_SAVE_ERROR_TO_ERROR;
|
||||
if (pg_strcasecmp(sval, "none") == 0)
|
||||
return COPY_SAVE_ERROR_TO_NONE;
|
||||
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("COPY save_error_to \"%s\" not recognized", sval),
|
||||
parser_errposition(pstate, def->location)));
|
||||
return COPY_SAVE_ERROR_TO_ERROR; /* keep compiler quiet */
|
||||
}
|
||||
|
||||
/*
|
||||
* Process the statement option list for COPY.
|
||||
*
|
||||
|
@ -419,6 +455,7 @@ ProcessCopyOptions(ParseState *pstate,
|
|||
bool format_specified = false;
|
||||
bool freeze_specified = false;
|
||||
bool header_specified = false;
|
||||
bool save_error_to_specified = false;
|
||||
ListCell *option;
|
||||
|
||||
/* Support external use for option sanity checking */
|
||||
|
@ -571,6 +608,13 @@ ProcessCopyOptions(ParseState *pstate,
|
|||
defel->defname),
|
||||
parser_errposition(pstate, defel->location)));
|
||||
}
|
||||
else if (strcmp(defel->defname, "save_error_to") == 0)
|
||||
{
|
||||
if (save_error_to_specified)
|
||||
errorConflictingDefElem(defel, pstate);
|
||||
save_error_to_specified = true;
|
||||
opts_out->save_error_to = defGetCopySaveErrorToChoice(defel, pstate, is_from);
|
||||
}
|
||||
else
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
|
@ -598,6 +642,11 @@ ProcessCopyOptions(ParseState *pstate,
|
|||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("cannot specify DEFAULT in BINARY mode")));
|
||||
|
||||
if (opts_out->binary && opts_out->save_error_to != COPY_SAVE_ERROR_TO_ERROR)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("cannot specify SAVE_ERROR_TO in BINARY mode")));
|
||||
|
||||
/* Set defaults for omitted options */
|
||||
if (!opts_out->delim)
|
||||
opts_out->delim = opts_out->csv_mode ? "," : "\t";
|
||||
|
|
|
@ -42,6 +42,7 @@
|
|||
#include "libpq/libpq.h"
|
||||
#include "libpq/pqformat.h"
|
||||
#include "miscadmin.h"
|
||||
#include "nodes/miscnodes.h"
|
||||
#include "optimizer/optimizer.h"
|
||||
#include "pgstat.h"
|
||||
#include "rewrite/rewriteHandler.h"
|
||||
|
@ -656,6 +657,9 @@ CopyFrom(CopyFromState cstate)
|
|||
Assert(cstate->rel);
|
||||
Assert(list_length(cstate->range_table) == 1);
|
||||
|
||||
if (cstate->opts.save_error_to != COPY_SAVE_ERROR_TO_ERROR)
|
||||
Assert(cstate->escontext);
|
||||
|
||||
/*
|
||||
* The target must be a plain, foreign, or partitioned relation, or have
|
||||
* an INSTEAD OF INSERT row trigger. (Currently, such triggers are only
|
||||
|
@ -992,6 +996,25 @@ CopyFrom(CopyFromState cstate)
|
|||
if (!NextCopyFrom(cstate, econtext, myslot->tts_values, myslot->tts_isnull))
|
||||
break;
|
||||
|
||||
if (cstate->opts.save_error_to != COPY_SAVE_ERROR_TO_ERROR &&
|
||||
cstate->escontext->error_occurred)
|
||||
{
|
||||
/*
|
||||
* Soft error occured, skip this tuple and save error information
|
||||
* according to SAVE_ERROR_TO.
|
||||
*/
|
||||
if (cstate->opts.save_error_to == COPY_SAVE_ERROR_TO_NONE)
|
||||
|
||||
/*
|
||||
* Just make ErrorSaveContext ready for the next NextCopyFrom.
|
||||
* Since we don't set details_wanted and error_data is not to
|
||||
* be filled, just resetting error_occurred is enough.
|
||||
*/
|
||||
cstate->escontext->error_occurred = false;
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
ExecStoreVirtualTuple(myslot);
|
||||
|
||||
/*
|
||||
|
@ -1284,6 +1307,14 @@ CopyFrom(CopyFromState cstate)
|
|||
/* Done, clean up */
|
||||
error_context_stack = errcallback.previous;
|
||||
|
||||
if (cstate->opts.save_error_to != COPY_SAVE_ERROR_TO_ERROR &&
|
||||
cstate->num_errors > 0)
|
||||
ereport(NOTICE,
|
||||
errmsg_plural("%zd row were skipped due to data type incompatibility",
|
||||
"%zd rows were skipped due to data type incompatibility",
|
||||
cstate->num_errors,
|
||||
cstate->num_errors));
|
||||
|
||||
if (bistate != NULL)
|
||||
FreeBulkInsertState(bistate);
|
||||
|
||||
|
@ -1419,6 +1450,23 @@ BeginCopyFrom(ParseState *pstate,
|
|||
}
|
||||
}
|
||||
|
||||
/* Set up soft error handler for SAVE_ERROR_TO */
|
||||
if (cstate->opts.save_error_to != COPY_SAVE_ERROR_TO_ERROR)
|
||||
{
|
||||
cstate->escontext = makeNode(ErrorSaveContext);
|
||||
cstate->escontext->type = T_ErrorSaveContext;
|
||||
cstate->escontext->error_occurred = false;
|
||||
|
||||
/*
|
||||
* Currently we only support COPY_SAVE_ERROR_TO_NONE. We'll add other
|
||||
* options later
|
||||
*/
|
||||
if (cstate->opts.save_error_to == COPY_SAVE_ERROR_TO_NONE)
|
||||
cstate->escontext->details_wanted = false;
|
||||
}
|
||||
else
|
||||
cstate->escontext = NULL;
|
||||
|
||||
/* Convert FORCE_NULL name list to per-column flags, check validity */
|
||||
cstate->opts.force_null_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool));
|
||||
if (cstate->opts.force_null_all)
|
||||
|
|
|
@ -70,6 +70,7 @@
|
|||
#include "libpq/pqformat.h"
|
||||
#include "mb/pg_wchar.h"
|
||||
#include "miscadmin.h"
|
||||
#include "nodes/miscnodes.h"
|
||||
#include "pgstat.h"
|
||||
#include "port/pg_bswap.h"
|
||||
#include "utils/builtins.h"
|
||||
|
@ -955,11 +956,17 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
|
|||
|
||||
values[m] = ExecEvalExpr(defexprs[m], econtext, &nulls[m]);
|
||||
}
|
||||
else
|
||||
values[m] = InputFunctionCall(&in_functions[m],
|
||||
string,
|
||||
typioparams[m],
|
||||
att->atttypmod);
|
||||
/* If SAVE_ERROR_TO is specified, skip rows with soft errors */
|
||||
else if (!InputFunctionCallSafe(&in_functions[m],
|
||||
string,
|
||||
typioparams[m],
|
||||
att->atttypmod,
|
||||
(Node *) cstate->escontext,
|
||||
&values[m]))
|
||||
{
|
||||
cstate->num_errors++;
|
||||
return true;
|
||||
}
|
||||
|
||||
cstate->cur_attname = NULL;
|
||||
cstate->cur_attval = NULL;
|
||||
|
|
|
@ -2898,12 +2898,17 @@ psql_completion(const char *text, int start, int end)
|
|||
else if (Matches("COPY|\\copy", MatchAny, "FROM|TO", MatchAny, "WITH", "("))
|
||||
COMPLETE_WITH("FORMAT", "FREEZE", "DELIMITER", "NULL",
|
||||
"HEADER", "QUOTE", "ESCAPE", "FORCE_QUOTE",
|
||||
"FORCE_NOT_NULL", "FORCE_NULL", "ENCODING", "DEFAULT");
|
||||
"FORCE_NOT_NULL", "FORCE_NULL", "ENCODING", "DEFAULT",
|
||||
"SAVE_ERROR_TO");
|
||||
|
||||
/* Complete COPY <sth> FROM|TO filename WITH (FORMAT */
|
||||
else if (Matches("COPY|\\copy", MatchAny, "FROM|TO", MatchAny, "WITH", "(", "FORMAT"))
|
||||
COMPLETE_WITH("binary", "csv", "text");
|
||||
|
||||
/* Complete COPY <sth> FROM filename WITH (SAVE_ERROR_TO */
|
||||
else if (Matches("COPY|\\copy", MatchAny, "FROM|TO", MatchAny, "WITH", "(", "SAVE_ERROR_TO"))
|
||||
COMPLETE_WITH("error", "none");
|
||||
|
||||
/* Complete COPY <sth> FROM <sth> WITH (<options>) */
|
||||
else if (Matches("COPY|\\copy", MatchAny, "FROM", MatchAny, "WITH", MatchAny))
|
||||
COMPLETE_WITH("WHERE");
|
||||
|
|
|
@ -30,6 +30,16 @@ typedef enum CopyHeaderChoice
|
|||
COPY_HEADER_MATCH,
|
||||
} CopyHeaderChoice;
|
||||
|
||||
/*
|
||||
* Represents where to save input processing errors. More values to be added
|
||||
* in the future.
|
||||
*/
|
||||
typedef enum CopySaveErrorToChoice
|
||||
{
|
||||
COPY_SAVE_ERROR_TO_ERROR = 0, /* immediately throw errors */
|
||||
COPY_SAVE_ERROR_TO_NONE, /* ignore errors */
|
||||
} CopySaveErrorToChoice;
|
||||
|
||||
/*
|
||||
* A struct to hold COPY options, in a parsed form. All of these are related
|
||||
* to formatting, except for 'freeze', which doesn't really belong here, but
|
||||
|
@ -62,6 +72,7 @@ typedef struct CopyFormatOptions
|
|||
bool force_null_all; /* FORCE_NULL *? */
|
||||
bool *force_null_flags; /* per-column CSV FN flags */
|
||||
bool convert_selectively; /* do selective binary conversion? */
|
||||
CopySaveErrorToChoice save_error_to; /* where to save error information */
|
||||
List *convert_select; /* list of column names (can be NIL) */
|
||||
} CopyFormatOptions;
|
||||
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
|
||||
#include "commands/copy.h"
|
||||
#include "commands/trigger.h"
|
||||
#include "nodes/miscnodes.h"
|
||||
|
||||
/*
|
||||
* Represents the different source cases we need to worry about at
|
||||
|
@ -94,6 +95,10 @@ typedef struct CopyFromStateData
|
|||
* default value */
|
||||
FmgrInfo *in_functions; /* array of input functions for each attrs */
|
||||
Oid *typioparams; /* array of element types for in_functions */
|
||||
ErrorSaveContext *escontext; /* soft error trapper during in_functions
|
||||
* execution */
|
||||
uint64 num_errors; /* total number of rows which contained soft
|
||||
* errors */
|
||||
int *defmap; /* array of default att numbers related to
|
||||
* missing att */
|
||||
ExprState **defexprs; /* array of default att expressions for all
|
||||
|
|
|
@ -77,11 +77,21 @@ COPY x from stdin (encoding 'sql_ascii', encoding 'sql_ascii');
|
|||
ERROR: conflicting or redundant options
|
||||
LINE 1: COPY x from stdin (encoding 'sql_ascii', encoding 'sql_ascii...
|
||||
^
|
||||
COPY x from stdin (save_error_to none,save_error_to none);
|
||||
ERROR: conflicting or redundant options
|
||||
LINE 1: COPY x from stdin (save_error_to none,save_error_to none);
|
||||
^
|
||||
-- incorrect options
|
||||
COPY x to stdin (format BINARY, delimiter ',');
|
||||
ERROR: cannot specify DELIMITER in BINARY mode
|
||||
COPY x to stdin (format BINARY, null 'x');
|
||||
ERROR: cannot specify NULL in BINARY mode
|
||||
COPY x from stdin (format BINARY, save_error_to none);
|
||||
ERROR: cannot specify SAVE_ERROR_TO in BINARY mode
|
||||
COPY x to stdin (save_error_to none);
|
||||
ERROR: COPY SAVE_ERROR_TO cannot be used with COPY TO
|
||||
LINE 1: COPY x to stdin (save_error_to none);
|
||||
^
|
||||
COPY x to stdin (format TEXT, force_quote(a));
|
||||
ERROR: COPY FORCE_QUOTE requires CSV mode
|
||||
COPY x from stdin (format CSV, force_quote(a));
|
||||
|
@ -94,6 +104,10 @@ COPY x to stdout (format TEXT, force_null(a));
|
|||
ERROR: COPY FORCE_NULL requires CSV mode
|
||||
COPY x to stdin (format CSV, force_null(a));
|
||||
ERROR: COPY FORCE_NULL cannot be used with COPY TO
|
||||
COPY x to stdin (format BINARY, save_error_to unsupported);
|
||||
ERROR: COPY SAVE_ERROR_TO cannot be used with COPY TO
|
||||
LINE 1: COPY x to stdin (format BINARY, save_error_to unsupported);
|
||||
^
|
||||
-- too many columns in column list: should fail
|
||||
COPY x (a, b, c, d, e, d, c) from stdin;
|
||||
ERROR: column "d" specified more than once
|
||||
|
@ -710,6 +724,33 @@ SELECT * FROM instead_of_insert_tbl;
|
|||
(2 rows)
|
||||
|
||||
COMMIT;
|
||||
-- tests for SAVE_ERROR_TO option
|
||||
CREATE TABLE check_ign_err (n int, m int[], k int);
|
||||
COPY check_ign_err FROM STDIN WITH (save_error_to error);
|
||||
ERROR: invalid input syntax for type integer: "a"
|
||||
CONTEXT: COPY check_ign_err, line 2, column n: "a"
|
||||
COPY check_ign_err FROM STDIN WITH (save_error_to none);
|
||||
NOTICE: 4 rows were skipped due to data type incompatibility
|
||||
SELECT * FROM check_ign_err;
|
||||
n | m | k
|
||||
---+-----+---
|
||||
1 | {1} | 1
|
||||
5 | {5} | 5
|
||||
(2 rows)
|
||||
|
||||
-- test datatype error that can't be handled as soft: should fail
|
||||
CREATE TABLE hard_err(foo widget);
|
||||
COPY hard_err FROM STDIN WITH (save_error_to none);
|
||||
ERROR: invalid input syntax for type widget: "1"
|
||||
CONTEXT: COPY hard_err, line 1, column foo: "1"
|
||||
-- test missing data: should fail
|
||||
COPY check_ign_err FROM STDIN WITH (save_error_to none);
|
||||
ERROR: missing data for column "k"
|
||||
CONTEXT: COPY check_ign_err, line 1: "1 {1}"
|
||||
-- test extra data: should fail
|
||||
COPY check_ign_err FROM STDIN WITH (save_error_to none);
|
||||
ERROR: extra data after last expected column
|
||||
CONTEXT: COPY check_ign_err, line 1: "1 {1} 3 abc"
|
||||
-- clean up
|
||||
DROP TABLE forcetest;
|
||||
DROP TABLE vistest;
|
||||
|
@ -724,6 +765,8 @@ DROP TABLE instead_of_insert_tbl;
|
|||
DROP VIEW instead_of_insert_tbl_view;
|
||||
DROP VIEW instead_of_insert_tbl_view_2;
|
||||
DROP FUNCTION fun_instead_of_insert_tbl();
|
||||
DROP TABLE check_ign_err;
|
||||
DROP TABLE hard_err;
|
||||
--
|
||||
-- COPY FROM ... DEFAULT
|
||||
--
|
||||
|
|
|
@ -66,16 +66,20 @@ COPY x from stdin (force_not_null (a), force_not_null (b));
|
|||
COPY x from stdin (force_null (a), force_null (b));
|
||||
COPY x from stdin (convert_selectively (a), convert_selectively (b));
|
||||
COPY x from stdin (encoding 'sql_ascii', encoding 'sql_ascii');
|
||||
COPY x from stdin (save_error_to none,save_error_to none);
|
||||
|
||||
-- incorrect options
|
||||
COPY x to stdin (format BINARY, delimiter ',');
|
||||
COPY x to stdin (format BINARY, null 'x');
|
||||
COPY x from stdin (format BINARY, save_error_to none);
|
||||
COPY x to stdin (save_error_to none);
|
||||
COPY x to stdin (format TEXT, force_quote(a));
|
||||
COPY x from stdin (format CSV, force_quote(a));
|
||||
COPY x to stdout (format TEXT, force_not_null(a));
|
||||
COPY x to stdin (format CSV, force_not_null(a));
|
||||
COPY x to stdout (format TEXT, force_null(a));
|
||||
COPY x to stdin (format CSV, force_null(a));
|
||||
COPY x to stdin (format BINARY, save_error_to unsupported);
|
||||
|
||||
-- too many columns in column list: should fail
|
||||
COPY x (a, b, c, d, e, d, c) from stdin;
|
||||
|
@ -494,6 +498,42 @@ test1
|
|||
SELECT * FROM instead_of_insert_tbl;
|
||||
COMMIT;
|
||||
|
||||
-- tests for SAVE_ERROR_TO option
|
||||
CREATE TABLE check_ign_err (n int, m int[], k int);
|
||||
COPY check_ign_err FROM STDIN WITH (save_error_to error);
|
||||
1 {1} 1
|
||||
a {2} 2
|
||||
3 {3} 3333333333
|
||||
4 {a, 4} 4
|
||||
|
||||
5 {5} 5
|
||||
\.
|
||||
COPY check_ign_err FROM STDIN WITH (save_error_to none);
|
||||
1 {1} 1
|
||||
a {2} 2
|
||||
3 {3} 3333333333
|
||||
4 {a, 4} 4
|
||||
|
||||
5 {5} 5
|
||||
\.
|
||||
SELECT * FROM check_ign_err;
|
||||
|
||||
-- test datatype error that can't be handled as soft: should fail
|
||||
CREATE TABLE hard_err(foo widget);
|
||||
COPY hard_err FROM STDIN WITH (save_error_to none);
|
||||
1
|
||||
\.
|
||||
|
||||
-- test missing data: should fail
|
||||
COPY check_ign_err FROM STDIN WITH (save_error_to none);
|
||||
1 {1}
|
||||
\.
|
||||
|
||||
-- test extra data: should fail
|
||||
COPY check_ign_err FROM STDIN WITH (save_error_to none);
|
||||
1 {1} 3 abc
|
||||
\.
|
||||
|
||||
-- clean up
|
||||
DROP TABLE forcetest;
|
||||
DROP TABLE vistest;
|
||||
|
@ -508,6 +548,8 @@ DROP TABLE instead_of_insert_tbl;
|
|||
DROP VIEW instead_of_insert_tbl_view;
|
||||
DROP VIEW instead_of_insert_tbl_view_2;
|
||||
DROP FUNCTION fun_instead_of_insert_tbl();
|
||||
DROP TABLE check_ign_err;
|
||||
DROP TABLE hard_err;
|
||||
|
||||
--
|
||||
-- COPY FROM ... DEFAULT
|
||||
|
|
|
@ -4041,3 +4041,4 @@ manifest_writer
|
|||
rfile
|
||||
ws_options
|
||||
ws_file_info
|
||||
CopySaveErrorToChoice
|
||||
|
|
Loading…
Reference in New Issue