Add regression tests for CSV and \., and add automatic quoting of a

single column dump that has a \. value, so the load works properly.  I
also added documentation describing this issue.
This commit is contained in:
Bruce Momjian 2005-12-28 03:25:32 +00:00
parent 1b184c990f
commit 87289ff35c
4 changed files with 61 additions and 25 deletions

View File

@ -1,5 +1,5 @@
<!--
$PostgreSQL: pgsql/doc/src/sgml/ref/copy.sgml,v 1.70 2005/10/15 20:12:33 neilc Exp $
$PostgreSQL: pgsql/doc/src/sgml/ref/copy.sgml,v 1.71 2005/12/28 03:25:32 momjian Exp $
PostgreSQL documentation
-->
@ -511,17 +511,28 @@ COPY <replaceable class="parameter">tablename</replaceable> [ ( <replaceable cla
comparisons for specific columns.
</para>
<para>
Because backslash is not a special character in the <literal>CSV</>
format, <literal>\.</>, the end-of-data marker, could also appear
as a data value. To avoid any misinterpretation, a <literal>\.</>
data value appearing as a lone entry on a line is automatically
quoted on output, and on input, if quoted, is not interpreted as the
end-of-data marker. If you are loading a single-column table that
might have a column value of <literal>\.</>, you might need to quote
that value in the input file.
</para>
<note>
<para>
In <literal>CSV</> mode, all characters are significant. A quoted value
surrounded by white space, or any characters other than
<literal>DELIMITER</>, will include those characters. This can cause
errors if you import data from a system that pads <literal>CSV</>
lines with white space out to some fixed width. If such a situation
arises you might need to preprocess the <literal>CSV</> file to remove
the trailing white space, before importing the data into
<productname>PostgreSQL</>.
</para>
<para>
In <literal>CSV</> mode, all characters are significant. A quoted value
surrounded by white space, or any characters other than
<literal>DELIMITER</>, will include those characters. This can cause
errors if you import data from a system that pads <literal>CSV</>
lines with white space out to some fixed width. If such a situation
arises you might need to preprocess the <literal>CSV</> file to remove
the trailing white space, before importing the data into
<productname>PostgreSQL</>.
</para>
</note>
<note>

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.256 2005/12/27 18:10:48 momjian Exp $
* $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.257 2005/12/28 03:25:32 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@ -244,7 +244,7 @@ static Datum CopyReadBinaryAttribute(CopyState cstate,
bool *isnull);
static void CopyAttributeOutText(CopyState cstate, char *server_string);
static void CopyAttributeOutCSV(CopyState cstate, char *server_string,
bool use_quote);
bool use_quote, bool single_attr);
static List *CopyGetAttnums(Relation rel, List *attnamelist);
static char *limit_printout_length(const char *str);
@ -1284,7 +1284,8 @@ CopyTo(CopyState cstate)
colname = NameStr(attr[attnum - 1]->attname);
CopyAttributeOutCSV(cstate, colname, false);
CopyAttributeOutCSV(cstate, colname, false,
list_length(cstate->attnumlist) == 1);
}
CopySendEndOfRow(cstate);
@ -1359,7 +1360,8 @@ CopyTo(CopyState cstate)
value));
if (cstate->csv_mode)
CopyAttributeOutCSV(cstate, string,
force_quote[attnum - 1]);
force_quote[attnum - 1],
list_length(cstate->attnumlist) == 1);
else
CopyAttributeOutText(cstate, string);
}
@ -2968,7 +2970,7 @@ CopyAttributeOutText(CopyState cstate, char *server_string)
*/
static void
CopyAttributeOutCSV(CopyState cstate, char *server_string,
bool use_quote)
bool use_quote, bool single_attr)
{
char *string;
char c;
@ -2993,17 +2995,27 @@ CopyAttributeOutCSV(CopyState cstate, char *server_string,
*/
if (!use_quote)
{
for (tstring = string; (c = *tstring) != '\0'; tstring += mblen)
{
if (c == delimc || c == quotec || c == '\n' || c == '\r')
/*
* Because '\.' can be a data value, quote it if it appears
* alone on a line so it is not interpreted as the end-of-data
* marker.
*/
if (single_attr && strcmp(string, "\\.") == 0)
use_quote = true;
else
{
for (tstring = string; (c = *tstring) != '\0'; tstring += mblen)
{
use_quote = true;
break;
if (c == delimc || c == quotec || c == '\n' || c == '\r')
{
use_quote = true;
break;
}
if (cstate->encoding_embeds_ascii && IS_HIGHBIT_SET(c))
mblen = pg_encoding_mblen(cstate->client_encoding, tstring);
else
mblen = 1;
}
if (cstate->encoding_embeds_ascii && IS_HIGHBIT_SET(c))
mblen = pg_encoding_mblen(cstate->client_encoding, tstring);
else
mblen = 1;
}
}

View File

@ -194,6 +194,9 @@ COPY y TO stdout WITH CSV FORCE QUOTE col2 ESCAPE E'\\';
--test that we read consecutive LFs properly
CREATE TEMP TABLE testnl (a int, b text, c int);
COPY testnl FROM stdin CSV;
-- test end of copy marker
CREATE TEMP TABLE testeoc (a text);
COPY testeoc FROM stdin CSV;
DROP TABLE x, y;
DROP FUNCTION fn_x_before();
DROP FUNCTION fn_x_after();

View File

@ -139,6 +139,16 @@ COPY testnl FROM stdin CSV;
inside",2
\.
-- test end of copy marker
CREATE TEMP TABLE testeoc (a text);
COPY testeoc FROM stdin CSV;
a\.
\.b
c\.d
"\."
\.
DROP TABLE x, y;
DROP FUNCTION fn_x_before();