Complete TODO item:

o -Allow dump/load of CSV format

This adds new keywords to COPY and \copy:

        CSV - enable CSV mode (comma separated variable)
        QUOTE - specify quote character
        ESCAPE - specify escape character
        FORCE - force quoting of specified column
	LITERAL - suppress null comparison for columns

Doc changes included.  Regression updates coming from Andrew.
This commit is contained in:
Bruce Momjian 2004-04-19 17:22:31 +00:00
parent 83ab1c0475
commit 862b20b382
7 changed files with 776 additions and 63 deletions

View File

@ -1,5 +1,5 @@
<!--
$PostgreSQL: pgsql/doc/src/sgml/ref/copy.sgml,v 1.55 2003/12/13 23:59:07 neilc Exp $
$PostgreSQL: pgsql/doc/src/sgml/ref/copy.sgml,v 1.56 2004/04/19 17:22:30 momjian Exp $
PostgreSQL documentation
-->
@ -26,7 +26,10 @@ COPY <replaceable class="parameter">tablename</replaceable> [ ( <replaceable cla
[ BINARY ]
[ OIDS ]
[ DELIMITER [ AS ] '<replaceable class="parameter">delimiter</replaceable>' ]
[ NULL [ AS ] '<replaceable class="parameter">null string</replaceable>' ] ]
[ NULL [ AS ] '<replaceable class="parameter">null string</replaceable>' ]
[ CSV [ QUOTE [ AS ] '<replaceable class="parameter">quote</replaceable>' ]
[ ESCAPE [ AS ] '<replaceable class="parameter">escape</replaceable>' ]
[ LITERAL <replaceable class="parameter">column</replaceable> [, ...] ]
COPY <replaceable class="parameter">tablename</replaceable> [ ( <replaceable class="parameter">column</replaceable> [, ...] ) ]
TO { '<replaceable class="parameter">filename</replaceable>' | STDOUT }
@ -34,7 +37,10 @@ COPY <replaceable class="parameter">tablename</replaceable> [ ( <replaceable cla
[ BINARY ]
[ OIDS ]
[ DELIMITER [ AS ] '<replaceable class="parameter">delimiter</replaceable>' ]
[ NULL [ AS ] '<replaceable class="parameter">null string</replaceable>' ] ]
[ NULL [ AS ] '<replaceable class="parameter">null string</replaceable>' ]
[ CSV [ QUOTE [ AS ] '<replaceable class="parameter">quote</replaceable>' ]
[ ESCAPE [ AS ] '<replaceable class="parameter">escape</replaceable>' ]
[ FORCE <replaceable class="parameter">column</replaceable> [, ...] ]
</synopsis>
</refsynopsisdiv>
@ -146,7 +152,8 @@ COPY <replaceable class="parameter">tablename</replaceable> [ ( <replaceable cla
<listitem>
<para>
The single character that separates columns within each row
(line) of the file. The default is a tab character.
(line) of the file. The default is a tab character in text mode,
a comma in <literal>CSV</> mode.
</para>
</listitem>
</varlistentry>
@ -156,20 +163,86 @@ COPY <replaceable class="parameter">tablename</replaceable> [ ( <replaceable cla
<listitem>
<para>
The string that represents a null value. The default is
<literal>\N</literal> (backslash-N). You might prefer an empty
string, for example.
<literal>\N</literal> (backslash-N) in text mode, and a empty
value with no quotes in <literal>CSV</> mode. You might prefer an
empty string even in text mode for cases where you don't want to
distinguish nulls from empty strings.
</para>
<note>
<para>
On a <command>COPY FROM</command>, any data item that matches
When using <command>COPY FROM</command>, any data item that matches
this string will be stored as a null value, so you should make
sure that you use the same string as you used with
<command>COPY TO</command>.
</para>
</note>
</listitem>
</varlistentry>
<varlistentry>
<term><literal>CSV</literal></term>
<listitem>
<para>
Enables Comma Separated Variable (<literal>CSV</>) mode. (Also called
Comma Separated Value). It sets the default <literal>DELIMITER</> to
comma, and <literal>QUOTE</> and <literal>ESCAPE</> values to
double-quote.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><replaceable class="parameter">quote</replaceable></term>
<listitem>
<para>
Specifies the quotation character in <literal>CSV</> mode.
The default is double-quote.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><replaceable class="parameter">escape</replaceable></term>
<listitem>
<para>
Specifies the character that should appear before a <literal>QUOTE</>
data character value in <literal>CSV</> mode. The default is the
<literal>QUOTE</> value (usually double-quote).
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><literal>FORCE</></term>
<listitem>
<para>
In <literal>CSV</> <command>COPY TO</> mode, forces quoting
to be used for all non-<literal>NULL</> values in each specified
column. <literal>NULL</> output is never quoted.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><literal>LITERAL</></term>
<listitem>
<para>
In <literal>CSV</> <command>COPY FROM</> mode, for each column specified,
do not do a <literal>null string</> comparison; instead load the value
literally. <literal>QUOTE</> and <literal>ESCAPE</> processing are still
performed.
</para>
<para>
If the <literal>null string</> is <literal>''</> (the default
in <literal>CSV</> mode), a missing input value (<literal>delimiter,
delimiter</>), will load as a zero-length string. <literal>Delimiter, quote,
quote, delimiter</> is always treated as a zero-length string on input.
</para>
</listitem>
</varlistentry>
</variablelist>
</refsect1>
@ -233,6 +306,17 @@ COPY <replaceable class="parameter">tablename</replaceable> [ ( <replaceable cla
constraints on the destination table. However, it will not invoke rules.
</para>
<para>
<command>COPY</command> input and output is affected by
<varname>DateStyle </varname>. For portability with other
<productname>PostgreSQL</productname> installations which might use
non-default <varname>DateStyle</varname> settings,
<varname>DateStyle</varname> should be set to <literal>ISO</> before
using <command>COPY</>. In <literal>CSV</> mode, use <literal>ISO</>
or a <varname>DateStyle</varname> setting appropriate for the
external application.
</para>
<para>
<command>COPY</command> stops operation at the first error. This
should not lead to problems in the event of a <command>COPY
@ -253,7 +337,8 @@ COPY <replaceable class="parameter">tablename</replaceable> [ ( <replaceable cla
<para>
When <command>COPY</command> is used without the <literal>BINARY</literal> option,
the data read or written is a text file with one line per table row.
the data read or written is a text file with one line per table row,
unless <literal>CSV</> mode is used.
Columns in a row are separated by the delimiter character.
The column values themselves are strings generated by the
output function, or acceptable to the input function, of each
@ -379,6 +464,63 @@ COPY <replaceable class="parameter">tablename</replaceable> [ ( <replaceable cla
</para>
</refsect2>
<refsect2>
<title>CSV Format</title>
<para>
This format is used for importing and exporting the Comma
Separated Variable (<literal>CSV</>) file format used by many other
programs, such as spreadsheets. Instead of the escaping used by
<productname>PostgreSQL</productname>'s standard text mode, it
produces and recognises the common CSV escaping mechanism.
</para>
<para>
The values in each record are separated by the <literal>DELIMITER</>
character. If the value contains the delimiter character, the
<literal>QUOTE</> character, the <literal>NULL</> string, a carriage
return, or line feed character, then the whole value is prefixed and
suffixed by the <literal>QUOTE</> character, and any occurrence
within the value of a <literal>QUOTE</> character or the
<literal>ESCAPE</> character is preceded by the escape character.
You can also use <literal>FORCE</> to force quotes when outputting
non-<literal>NULL</> values in specific columns.
</para>
<para>
In general, the <literal>CSV</> format has no way to distinguish a
<literal>NULL</> from an empty string.
<productname>PostgreSQL</productname>'s COPY handles this by
quoting. A <literal>NULL</> is output as the <literal>NULL</> string
and is not quoted, while a data value matching the <literal>NULL</> string
is quoted. Therefore, using the default settings, a <literal>NULL</> is
written as an unquoted empty string, while an empty string is
written with double quotes (<literal>""</>). Reading values follows
similar rules. You can use <literal>LITERAL</> to prevent <literal>NULL</>
input comparisons for specific columns.
</para>
<note>
<para>
CSV mode will both recognize and produce CSV files with quoted
values containing embedded carriage returns and line feeds. Thus
the files are not strictly one line per table row like text-mode
files.
</para>
</note>
<note>
<para>
Many programs produce strange and occasionally perverse CSV files,
so the file format is more a convention than a standard. Thus you
might encounter some files that cannot be imported using this
mechanism, and <command>COPY</> might produce files that other
programs can not process.
</para>
</note>
</refsect2>
<refsect2>
<title>Binary Format</title>

View File

@ -1,5 +1,5 @@
<!--
$PostgreSQL: pgsql/doc/src/sgml/ref/psql-ref.sgml,v 1.110 2004/04/12 15:58:52 momjian Exp $
$PostgreSQL: pgsql/doc/src/sgml/ref/psql-ref.sgml,v 1.111 2004/04/19 17:22:30 momjian Exp $
PostgreSQL documentation
-->
@ -711,6 +711,10 @@ testdb=>
[ <literal>oids</literal> ]
[ <literal>delimiter [as] </literal> '<replaceable class="parameter">character</replaceable>' ]
[ <literal>null [as] </literal> '<replaceable class="parameter">string</replaceable>' ]</literal>
[ <literal>csv [ quote [as] </literal> '<replaceable class="parameter">character</replaceable>' ]
[ <literal>escape [as] </literal> '<replaceable class="parameter">character</replaceable>' ]
[ <literal>force</> <replaceable class="parameter">column_list</replaceable> ]
[ <literal>literal</> <replaceable class="parameter">column_list</replaceable> ] ]
</term>
<listitem>

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.220 2004/04/15 22:36:03 momjian Exp $
* $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.221 2004/04/19 17:22:30 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@ -70,7 +70,8 @@ typedef enum CopyDest
typedef enum CopyReadResult
{
NORMAL_ATTR,
END_OF_LINE
END_OF_LINE,
UNTERMINATED_FIELD
} CopyReadResult;
/*
@ -130,15 +131,22 @@ static bool line_buf_converted;
/* non-export function prototypes */
static void CopyTo(Relation rel, List *attnumlist, bool binary, bool oids,
char *delim, char *null_print);
char *delim, char *null_print, bool csv_mode, char *quote, char *escape,
List *force_atts);
static void CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
char *delim, char *null_print);
char *delim, char *null_print, bool csv_mode, char *quote, char *escape,
List *literal_atts);
static bool CopyReadLine(void);
static char *CopyReadAttribute(const char *delim, const char *null_print,
CopyReadResult *result, bool *isnull);
static char *CopyReadAttributeCSV(const char *delim, const char *null_print,
char *quote, char *escape,
CopyReadResult *result, bool *isnull);
static Datum CopyReadBinaryAttribute(int column_no, FmgrInfo *flinfo,
Oid typelem, bool *isnull);
static void CopyAttributeOut(char *string, char *delim);
static void CopyAttributeOutCSV(char *string, char *delim, char *quote,
char *escape, bool force_quote);
static List *CopyGetAttnums(Relation rel, List *attnamelist);
static void limit_printout_length(StringInfo buf);
@ -682,8 +690,15 @@ DoCopy(const CopyStmt *stmt)
List *attnumlist;
bool binary = false;
bool oids = false;
bool csv_mode = false;
char *delim = NULL;
char *quote = NULL;
char *escape = NULL;
char *null_print = NULL;
List *force = NIL;
List *literal = NIL;
List *force_atts = NIL;
List *literal_atts = NIL;
Relation rel;
AclMode required_access = (is_from ? ACL_INSERT : ACL_SELECT);
AclResult aclresult;
@ -725,6 +740,46 @@ DoCopy(const CopyStmt *stmt)
errmsg("conflicting or redundant options")));
null_print = strVal(defel->arg);
}
else if (strcmp(defel->defname, "csv") == 0)
{
if (csv_mode)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("conflicting or redundant options")));
csv_mode = intVal(defel->arg);
}
else if (strcmp(defel->defname, "quote") == 0)
{
if (quote)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("conflicting or redundant options")));
quote = strVal(defel->arg);
}
else if (strcmp(defel->defname, "escape") == 0)
{
if (escape)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("conflicting or redundant options")));
escape = strVal(defel->arg);
}
else if (strcmp(defel->defname, "force") == 0)
{
if (force)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("conflicting or redundant options")));
force = (List *)defel->arg;
}
else if (strcmp(defel->defname, "literal") == 0)
{
if (literal)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("conflicting or redundant options")));
literal = (List *)defel->arg;
}
else
elog(ERROR, "option \"%s\" not recognized",
defel->defname);
@ -735,6 +790,11 @@ DoCopy(const CopyStmt *stmt)
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("cannot specify DELIMITER in BINARY mode")));
if (binary && csv_mode)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("cannot specify CSV in BINARY mode")));
if (binary && null_print)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
@ -742,10 +802,92 @@ DoCopy(const CopyStmt *stmt)
/* Set defaults */
if (!delim)
delim = "\t";
delim = csv_mode ? "," : "\t";
if (!null_print)
null_print = "\\N";
null_print = csv_mode ? "" : "\\N";
if (csv_mode)
{
if (!quote)
quote = "\"";
if (!escape)
escape = quote;
}
/*
* Only single-character delimiter strings are supported.
*/
if (strlen(delim) != 1)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("COPY delimiter must be a single character")));
/*
* Check quote
*/
if (!csv_mode && quote != NULL)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("COPY quote available only in CSV mode")));
if (csv_mode && strlen(quote) != 1)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("COPY quote must be a single character")));
/*
* Check escape
*/
if (!csv_mode && escape != NULL)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("COPY escape available only in CSV mode")));
if (csv_mode && strlen(escape) != 1)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("COPY escape must be a single character")));
/*
* Check force
*/
if (!csv_mode && force != NIL)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("COPY force available only in CSV mode")));
if (force != NIL && is_from)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("COPY force only available using COPY TO")));
/*
* Check literal
*/
if (!csv_mode && literal != NIL)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("COPY literal available only in CSV mode")));
if (literal != NIL && !is_from)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("COPY literal only available using COPY FROM")));
/*
* Don't allow the delimiter to appear in the null string.
*/
if (strchr(null_print, delim[0]) != NULL)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("COPY delimiter must not appear in the NULL specification")));
/*
* Don't allow the csv quote char to appear in the null string.
*/
if (csv_mode && strchr(null_print, quote[0]) != NULL)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("CSV quote character must not appear in the NULL specification")));
/*
* Open and lock the relation, using the appropriate lock type.
@ -771,22 +913,6 @@ DoCopy(const CopyStmt *stmt)
errhint("Anyone can COPY to stdout or from stdin. "
"psql's \\copy command also works for anyone.")));
/*
* Presently, only single-character delimiter strings are supported.
*/
if (strlen(delim) != 1)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("COPY delimiter must be a single character")));
/*
* Don't allow the delimiter to appear in the null string.
*/
if (strchr(null_print, delim[0]) != NULL)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("COPY delimiter must not appear in the NULL specification")));
/*
* Don't allow COPY w/ OIDs to or from a table without them
*/
@ -801,6 +927,52 @@ DoCopy(const CopyStmt *stmt)
*/
attnumlist = CopyGetAttnums(rel, attnamelist);
/*
* Check that FORCE references valid COPY columns
*/
if (force)
{
TupleDesc tupDesc = RelationGetDescr(rel);
Form_pg_attribute *attr = tupDesc->attrs;
List *cur;
force_atts = CopyGetAttnums(rel, force);
foreach(cur, force_atts)
{
int attnum = lfirsti(cur);
if (!intMember(attnum, attnumlist))
ereport(ERROR,
(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
errmsg("FORCE column \"%s\" not referenced by COPY",
NameStr(attr[attnum - 1]->attname))));
}
}
/*
* Check that LITERAL references valid COPY columns
*/
if (literal)
{
List *cur;
TupleDesc tupDesc = RelationGetDescr(rel);
Form_pg_attribute *attr = tupDesc->attrs;
literal_atts = CopyGetAttnums(rel, literal);
foreach(cur, literal_atts)
{
int attnum = lfirsti(cur);
if (!intMember(attnum, attnumlist))
ereport(ERROR,
(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
errmsg("LITERAL column \"%s\" not referenced by COPY",
NameStr(attr[attnum - 1]->attname))));
}
}
/*
* Set up variables to avoid per-attribute overhead.
*/
@ -864,7 +1036,8 @@ DoCopy(const CopyStmt *stmt)
errmsg("\"%s\" is a directory", filename)));
}
}
CopyFrom(rel, attnumlist, binary, oids, delim, null_print);
CopyFrom(rel, attnumlist, binary, oids, delim, null_print, csv_mode,
quote, escape, literal_atts);
}
else
{ /* copy from database to file */
@ -926,7 +1099,8 @@ DoCopy(const CopyStmt *stmt)
errmsg("\"%s\" is a directory", filename)));
}
}
CopyTo(rel, attnumlist, binary, oids, delim, null_print);
CopyTo(rel, attnumlist, binary, oids, delim, null_print, csv_mode,
quote, escape, force_atts);
}
if (!pipe)
@ -958,7 +1132,8 @@ DoCopy(const CopyStmt *stmt)
*/
static void
CopyTo(Relation rel, List *attnumlist, bool binary, bool oids,
char *delim, char *null_print)
char *delim, char *null_print, bool csv_mode, char *quote,
char *escape, List *force_atts)
{
HeapTuple tuple;
TupleDesc tupDesc;
@ -967,6 +1142,7 @@ CopyTo(Relation rel, List *attnumlist, bool binary, bool oids,
int attr_count;
Form_pg_attribute *attr;
FmgrInfo *out_functions;
bool *force_quote;
Oid *elements;
bool *isvarlena;
char *string;
@ -988,11 +1164,12 @@ CopyTo(Relation rel, List *attnumlist, bool binary, bool oids,
out_functions = (FmgrInfo *) palloc((num_phys_attrs + 1) * sizeof(FmgrInfo));
elements = (Oid *) palloc((num_phys_attrs + 1) * sizeof(Oid));
isvarlena = (bool *) palloc((num_phys_attrs + 1) * sizeof(bool));
force_quote = (bool *) palloc((num_phys_attrs + 1) * sizeof(bool));
foreach(cur, attnumlist)
{
int attnum = lfirsti(cur);
Oid out_func_oid;
if (binary)
getTypeBinaryOutputInfo(attr[attnum - 1]->atttypid,
&out_func_oid, &elements[attnum - 1],
@ -1002,6 +1179,11 @@ CopyTo(Relation rel, List *attnumlist, bool binary, bool oids,
&out_func_oid, &elements[attnum - 1],
&isvarlena[attnum - 1]);
fmgr_info(out_func_oid, &out_functions[attnum - 1]);
if (intMember(attnum, force_atts))
force_quote[attnum - 1] = true;
else
force_quote[attnum - 1] = false;
}
/*
@ -1051,7 +1233,6 @@ CopyTo(Relation rel, List *attnumlist, bool binary, bool oids,
while ((tuple = heap_getnext(scandesc, ForwardScanDirection)) != NULL)
{
bool need_delim = false;
CHECK_FOR_INTERRUPTS();
MemoryContextReset(mycontext);
@ -1113,7 +1294,15 @@ CopyTo(Relation rel, List *attnumlist, bool binary, bool oids,
value,
ObjectIdGetDatum(elements[attnum - 1]),
Int32GetDatum(attr[attnum - 1]->atttypmod)));
CopyAttributeOut(string, delim);
if (csv_mode)
{
CopyAttributeOutCSV(string, delim, quote, escape,
(strcmp(string, null_print) == 0 ||
force_quote[attnum - 1]));
}
else
CopyAttributeOut(string, delim);
}
else
{
@ -1148,6 +1337,7 @@ CopyTo(Relation rel, List *attnumlist, bool binary, bool oids,
pfree(out_functions);
pfree(elements);
pfree(isvarlena);
pfree(force_quote);
}
@ -1243,7 +1433,8 @@ limit_printout_length(StringInfo buf)
*/
static void
CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
char *delim, char *null_print)
char *delim, char *null_print, bool csv_mode, char *quote,
char *escape, List *literal_atts)
{
HeapTuple tuple;
TupleDesc tupDesc;
@ -1256,9 +1447,10 @@ CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
Oid *elements;
Oid oid_in_element;
ExprState **constraintexprs;
bool *literal_nullstr;
bool hasConstraints = false;
int i;
int attnum;
int i;
List *cur;
Oid in_func_oid;
Datum *values;
@ -1317,6 +1509,7 @@ CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
defmap = (int *) palloc((num_phys_attrs + 1) * sizeof(int));
defexprs = (ExprState **) palloc((num_phys_attrs + 1) * sizeof(ExprState *));
constraintexprs = (ExprState **) palloc0((num_phys_attrs + 1) * sizeof(ExprState *));
literal_nullstr = (bool *) palloc((num_phys_attrs + 1) * sizeof(bool));
for (attnum = 1; attnum <= num_phys_attrs; attnum++)
{
@ -1333,6 +1526,11 @@ CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
&in_func_oid, &elements[attnum - 1]);
fmgr_info(in_func_oid, &in_functions[attnum - 1]);
if (intMember(attnum, literal_atts))
literal_nullstr[attnum - 1] = true;
else
literal_nullstr[attnum - 1] = false;
/* Get default info if needed */
if (!intMember(attnum, attnumlist))
{
@ -1389,9 +1587,7 @@ CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
ExecBSInsertTriggers(estate, resultRelInfo);
if (!binary)
{
file_has_oids = oids; /* must rely on user to tell us this... */
}
else
{
/* Read and verify binary header */
@ -1500,6 +1696,7 @@ CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
if (file_has_oids)
{
/* can't be in CSV mode here */
string = CopyReadAttribute(delim, null_print,
&result, &isnull);
@ -1538,14 +1735,27 @@ CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
errmsg("missing data for column \"%s\"",
NameStr(attr[m]->attname))));
string = CopyReadAttribute(delim, null_print,
&result, &isnull);
if (isnull)
if (csv_mode)
{
/* we read an SQL NULL, no need to do anything */
string = CopyReadAttributeCSV(delim, null_print, quote,
escape, &result, &isnull);
if (result == UNTERMINATED_FIELD)
ereport(ERROR,
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
errmsg("unterminated CSV quoted field")));
}
else
string = CopyReadAttribute(delim, null_print,
&result, &isnull);
if (csv_mode && isnull && literal_nullstr[m])
{
string = null_print; /* set to NULL string */
isnull = false;
}
/* we read an SQL NULL, no need to do anything */
if (!isnull)
{
copy_attname = NameStr(attr[m]->attname);
values[m] = FunctionCall3(&in_functions[m],
@ -1732,11 +1942,12 @@ CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
pfree(values);
pfree(nulls);
if (!binary)
{
pfree(in_functions);
pfree(elements);
}
pfree(in_functions);
pfree(elements);
pfree(defmap);
pfree(defexprs);
pfree(constraintexprs);
pfree(literal_nullstr);
ExecDropTupleTable(tupleTable, true);
@ -2070,6 +2281,152 @@ CopyReadAttribute(const char *delim, const char *null_print,
return attribute_buf.data;
}
/*
* Read the value of a single attribute in CSV mode,
* performing de-escaping as needed. Escaping does not follow the normal
* PostgreSQL text mode, but instead "standard" (i.e. common) CSV usage.
*
* Quoted fields can span lines, in which case the line end is embedded
* in the returned string.
*
* null_print is the null marker string. Note that this is compared to
* the pre-de-escaped input string (thus if it is quoted it is not a NULL).
*
* *result is set to indicate what terminated the read:
* NORMAL_ATTR: column delimiter
* END_OF_LINE: end of line
* UNTERMINATED_FIELD no quote detected at end of a quoted field
*
* In any case, the string read up to the terminator (or end of file)
* is returned.
*
* *isnull is set true or false depending on whether the input matched
* the null marker. Note that the caller cannot check this since the
* returned string will be the post-de-escaping equivalent, which may
* look the same as some valid data string.
*----------
*/
static char *
CopyReadAttributeCSV(const char *delim, const char *null_print, char *quote,
char *escape, CopyReadResult *result, bool *isnull)
{
char delimc = delim[0];
char quotec = quote[0];
char escapec = escape[0];
char c;
int start_cursor = line_buf.cursor;
int end_cursor = start_cursor;
int input_len;
bool in_quote = false;
bool saw_quote = false;
/* reset attribute_buf to empty */
attribute_buf.len = 0;
attribute_buf.data[0] = '\0';
/* set default status */
*result = END_OF_LINE;
for (;;)
{
/* handle multiline quoted fields */
if (in_quote && line_buf.cursor >= line_buf.len)
{
bool done;
switch(eol_type)
{
case EOL_NL:
appendStringInfoString(&attribute_buf,"\n");
break;
case EOL_CR:
appendStringInfoString(&attribute_buf,"\r");
break;
case EOL_CRNL:
appendStringInfoString(&attribute_buf,"\r\n");
break;
case EOL_UNKNOWN:
/* shouldn't happen - just keep going */
break;
}
copy_lineno++;
done = CopyReadLine();
if (done && line_buf.len == 0)
break;
start_cursor = line_buf.cursor;
}
end_cursor = line_buf.cursor;
if (line_buf.cursor >= line_buf.len)
break;
c = line_buf.data[line_buf.cursor++];
/*
* unquoted field delimiter
*/
if (!in_quote && c == delimc)
{
*result = NORMAL_ATTR;
break;
}
/*
* start of quoted field (or part of field)
*/
if (!in_quote && c == quotec)
{
saw_quote = true;
in_quote = true;
continue;
}
/*
* escape within a quoted field
*/
if (in_quote && c == escapec)
{
/*
* peek at the next char if available, and escape it if it
* is an escape char or a quote char
*/
if (line_buf.cursor <= line_buf.len)
{
char nextc = line_buf.data[line_buf.cursor];
if (nextc == escapec || nextc == quotec)
{
appendStringInfoCharMacro(&attribute_buf, nextc);
line_buf.cursor++;
continue;
}
}
}
/*
* end of quoted field.
* Must do this test after testing for escape in case quote char
* and escape char are the same (which is the common case).
*/
if (in_quote && c == quotec)
{
in_quote = false;
continue;
}
appendStringInfoCharMacro(&attribute_buf, c);
}
if (in_quote)
*result = UNTERMINATED_FIELD;
/* check whether raw input matched null marker */
input_len = end_cursor - start_cursor;
if (!saw_quote && input_len == strlen(null_print) &&
strncmp(&line_buf.data[start_cursor], null_print, input_len) == 0)
*isnull = true;
else
*isnull = false;
return attribute_buf.data;
}
/*
* Read a binary attribute
*/
@ -2195,6 +2552,73 @@ CopyAttributeOut(char *server_string, char *delim)
}
}
/*
* Send CSV representation of one attribute, with conversion and
* CSV type escaping
*/
static void
CopyAttributeOutCSV(char *server_string, char *delim, char *quote,
char *escape, bool force_quote)
{
char *string;
char c;
char delimc = delim[0];
char quotec = quote[0];
char escapec = escape[0];
bool need_quote = force_quote;
char *test_string;
bool same_encoding;
int mblen;
int i;
same_encoding = (server_encoding == client_encoding);
if (!same_encoding)
string = (char *) pg_server_to_client((unsigned char *) server_string,
strlen(server_string));
else
string = server_string;
/* have to run through the string twice,
* first time to see if it needs quoting, second to actually send it
*/
for(test_string = string;
!need_quote && (c = *test_string) != '\0';
test_string += mblen)
{
if (c == delimc || c == quotec || c == '\n' || c == '\r')
need_quote = true;
if (!same_encoding)
mblen = pg_encoding_mblen(client_encoding, test_string);
else
mblen = 1;
}
if (need_quote)
CopySendChar(quotec);
for (; (c = *string) != '\0'; string += mblen)
{
if (c == quotec || c == escapec)
CopySendChar(escapec);
CopySendChar(c);
if (!same_encoding)
{
/* send additional bytes of the char, if any */
mblen = pg_encoding_mblen(client_encoding, string);
for (i = 1; i < mblen; i++)
CopySendChar(string[i]);
}
else
mblen = 1;
}
if (need_quote)
CopySendChar(quotec);
}
/*
* CopyGetAttnums - build an integer list of attnums to be copied
*

View File

@ -11,7 +11,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/parser/gram.y,v 2.450 2004/04/05 03:07:26 momjian Exp $
* $PostgreSQL: pgsql/src/backend/parser/gram.y,v 2.451 2004/04/19 17:22:30 momjian Exp $
*
* HISTORY
* AUTHOR DATE MAJOR EVENT
@ -343,7 +343,7 @@ static void doNegateFloat(Value *v);
CHARACTER CHARACTERISTICS CHECK CHECKPOINT CLASS CLOSE
CLUSTER COALESCE COLLATE COLUMN COMMENT COMMIT
COMMITTED CONSTRAINT CONSTRAINTS CONVERSION_P CONVERT COPY CREATE CREATEDB
CREATEUSER CROSS CURRENT_DATE CURRENT_TIME
CREATEUSER CROSS CSV CURRENT_DATE CURRENT_TIME
CURRENT_TIMESTAMP CURRENT_USER CURSOR CYCLE
DATABASE DAY_P DEALLOCATE DEC DECIMAL_P DECLARE DEFAULT DEFAULTS
@ -370,7 +370,7 @@ static void doNegateFloat(Value *v);
KEY
LANCOMPILER LANGUAGE LARGE_P LAST_P LEADING LEFT LEVEL LIKE LIMIT
LISTEN LOAD LOCAL LOCALTIME LOCALTIMESTAMP LOCATION
LISTEN LITERAL LOAD LOCAL LOCALTIME LOCALTIMESTAMP LOCATION
LOCK_P
MATCH MAXVALUE MINUTE_P MINVALUE MODE MONTH_P MOVE
@ -386,6 +386,8 @@ static void doNegateFloat(Value *v);
PRECISION PRESERVE PREPARE PRIMARY
PRIOR PRIVILEGES PROCEDURAL PROCEDURE
QUOTE
READ REAL RECHECK REFERENCES REINDEX RELATIVE_P RENAME REPEATABLE REPLACE
RESET RESTART RESTRICT RETURNS REVOKE RIGHT ROLLBACK ROW ROWS
RULE
@ -1360,6 +1362,26 @@ copy_opt_item:
{
$$ = makeDefElem("null", (Node *)makeString($3));
}
| CSV
{
$$ = makeDefElem("csv", (Node *)makeInteger(TRUE));
}
| QUOTE opt_as Sconst
{
$$ = makeDefElem("quote", (Node *)makeString($3));
}
| ESCAPE opt_as Sconst
{
$$ = makeDefElem("escape", (Node *)makeString($3));
}
| FORCE columnList
{
$$ = makeDefElem("force", (Node *)$2);
}
| LITERAL columnList
{
$$ = makeDefElem("literal", (Node *)$2);
}
;
/* The following exist for backward compatibility */
@ -7420,6 +7442,7 @@ unreserved_keyword:
| COPY
| CREATEDB
| CREATEUSER
| CSV
| CURSOR
| CYCLE
| DATABASE
@ -7473,6 +7496,7 @@ unreserved_keyword:
| LAST_P
| LEVEL
| LISTEN
| LITERAL
| LOAD
| LOCAL
| LOCATION
@ -7507,6 +7531,7 @@ unreserved_keyword:
| PRIVILEGES
| PROCEDURAL
| PROCEDURE
| QUOTE
| READ
| RECHECK
| REINDEX

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/parser/keywords.c,v 1.147 2004/03/11 01:47:40 ishii Exp $
* $PostgreSQL: pgsql/src/backend/parser/keywords.c,v 1.148 2004/04/19 17:22:31 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@ -90,6 +90,7 @@ static const ScanKeyword ScanKeywords[] = {
{"createdb", CREATEDB},
{"createuser", CREATEUSER},
{"cross", CROSS},
{"csv", CSV},
{"current_date", CURRENT_DATE},
{"current_time", CURRENT_TIME},
{"current_timestamp", CURRENT_TIMESTAMP},
@ -186,6 +187,7 @@ static const ScanKeyword ScanKeywords[] = {
{"like", LIKE},
{"limit", LIMIT},
{"listen", LISTEN},
{"literal", LITERAL},
{"load", LOAD},
{"local", LOCAL},
{"localtime", LOCALTIME},
@ -248,6 +250,7 @@ static const ScanKeyword ScanKeywords[] = {
{"privileges", PRIVILEGES},
{"procedural", PROCEDURAL},
{"procedure", PROCEDURE},
{"quote", QUOTE},
{"read", READ},
{"real", REAL},
{"recheck", RECHECK},

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tcop/fastpath.c,v 1.71 2004/01/07 18:56:27 neilc Exp $
* $PostgreSQL: pgsql/src/backend/tcop/fastpath.c,v 1.72 2004/04/19 17:22:31 momjian Exp $
*
* NOTES
* This cruft is the server side of PQfn.
@ -154,8 +154,7 @@ SendFunctionResult(Datum retval, bool isnull, Oid rettype, int16 format)
bool typisvarlena;
char *outputstr;
getTypeOutputInfo(rettype,
&typoutput, &typelem, &typisvarlena);
getTypeOutputInfo(rettype, &typoutput, &typelem, &typisvarlena);
outputstr = DatumGetCString(OidFunctionCall3(typoutput,
retval,
ObjectIdGetDatum(typelem),

View File

@ -3,7 +3,7 @@
*
* Copyright (c) 2000-2003, PostgreSQL Global Development Group
*
* $PostgreSQL: pgsql/src/bin/psql/copy.c,v 1.43 2004/04/12 15:58:52 momjian Exp $
* $PostgreSQL: pgsql/src/bin/psql/copy.c,v 1.44 2004/04/19 17:22:31 momjian Exp $
*/
#include "postgres_fe.h"
#include "copy.h"
@ -66,8 +66,13 @@ struct copy_options
bool from;
bool binary;
bool oids;
bool csv_mode;
char *delim;
char *null;
char *quote;
char *escape;
char *force_list;
char *literal_list;
};
@ -81,6 +86,10 @@ free_copy_options(struct copy_options * ptr)
free(ptr->file);
free(ptr->delim);
free(ptr->null);
free(ptr->quote);
free(ptr->escape);
free(ptr->force_list);
free(ptr->literal_list);
free(ptr);
}
@ -272,11 +281,19 @@ parse_slash_copy(const char *args)
while (token)
{
bool fetch_next;
fetch_next = true;
/* someday allow BINARY here */
if (strcasecmp(token, "oids") == 0)
{
result->oids = true;
}
else if (strcasecmp(token, "csv") == 0)
{
result->csv_mode = true;
}
else if (strcasecmp(token, "delimiter") == 0)
{
token = strtokx(NULL, whitespace, NULL, "'",
@ -301,11 +318,78 @@ parse_slash_copy(const char *args)
else
goto error;
}
else if (strcasecmp(token, "quote") == 0)
{
token = strtokx(NULL, whitespace, NULL, "'",
'\\', false, pset.encoding);
if (token && strcasecmp(token, "as") == 0)
token = strtokx(NULL, whitespace, NULL, "'",
'\\', false, pset.encoding);
if (token)
result->quote = pg_strdup(token);
else
goto error;
}
else if (strcasecmp(token, "escape") == 0)
{
token = strtokx(NULL, whitespace, NULL, "'",
'\\', false, pset.encoding);
if (token && strcasecmp(token, "as") == 0)
token = strtokx(NULL, whitespace, NULL, "'",
'\\', false, pset.encoding);
if (token)
result->escape = pg_strdup(token);
else
goto error;
}
else if (strcasecmp(token, "force") == 0)
{
/* handle column list */
fetch_next = false;
for (;;)
{
token = strtokx(NULL, whitespace, ",", "\"",
0, false, pset.encoding);
if (!token || strchr(",", token[0]))
goto error;
if (!result->force_list)
result->force_list = pg_strdup(token);
else
xstrcat(&result->force_list, token);
token = strtokx(NULL, whitespace, ",", "\"",
0, false, pset.encoding);
if (!token || token[0] != ',')
break;
xstrcat(&result->force_list, token);
}
}
else if (strcasecmp(token, "literal") == 0)
{
/* handle column list */
fetch_next = false;
for (;;)
{
token = strtokx(NULL, whitespace, ",", "\"",
0, false, pset.encoding);
if (!token || strchr(",", token[0]))
goto error;
if (!result->literal_list)
result->literal_list = pg_strdup(token);
else
xstrcat(&result->literal_list, token);
token = strtokx(NULL, whitespace, ",", "\"",
0, false, pset.encoding);
if (!token || token[0] != ',')
break;
xstrcat(&result->literal_list, token);
}
}
else
goto error;
token = strtokx(NULL, whitespace, NULL, NULL,
0, false, pset.encoding);
if (fetch_next)
token = strtokx(NULL, whitespace, NULL, NULL,
0, false, pset.encoding);
}
}
@ -340,7 +424,7 @@ do_copy(const char *args)
PGresult *result;
bool success;
struct stat st;
/* parse options */
options = parse_slash_copy(args);
@ -379,6 +463,7 @@ do_copy(const char *args)
options->delim);
}
/* There is no backward-compatible CSV syntax */
if (options->null)
{
if (options->null[0] == '\'')
@ -387,6 +472,37 @@ do_copy(const char *args)
appendPQExpBuffer(&query, " WITH NULL AS '%s'", options->null);
}
if (options->csv_mode)
{
appendPQExpBuffer(&query, " CSV");
}
if (options->quote)
{
if (options->quote[0] == '\'')
appendPQExpBuffer(&query, " QUOTE AS %s", options->quote);
else
appendPQExpBuffer(&query, " QUOTE AS '%s'", options->quote);
}
if (options->escape)
{
if (options->escape[0] == '\'')
appendPQExpBuffer(&query, " ESCAPE AS %s", options->escape);
else
appendPQExpBuffer(&query, " ESCAPE AS '%s'", options->escape);
}
if (options->force_list)
{
appendPQExpBuffer(&query, " FORCE %s", options->force_list);
}
if (options->literal_list)
{
appendPQExpBuffer(&query, " LITERAL %s", options->literal_list);
}
if (options->from)
{
if (options->file)