mirror of
https://git.postgresql.org/git/postgresql.git
synced 2024-10-01 21:11:22 +02:00
83af58f6b5
Jeff Janes
914 lines
34 KiB
Plaintext
914 lines
34 KiB
Plaintext
<!--
|
|
doc/src/sgml/ref/copy.sgml
|
|
PostgreSQL documentation
|
|
-->
|
|
|
|
|
|
<refentry id="SQL-COPY">
|
|
<refmeta>
|
|
<refentrytitle>COPY</refentrytitle>
|
|
<manvolnum>7</manvolnum>
|
|
<refmiscinfo>SQL - Language Statements</refmiscinfo>
|
|
</refmeta>
|
|
|
|
<refnamediv>
|
|
<refname>COPY</refname>
|
|
<refpurpose>copy data between a file and a table</refpurpose>
|
|
</refnamediv>
|
|
|
|
<indexterm zone="sql-copy">
|
|
<primary>COPY</primary>
|
|
</indexterm>
|
|
|
|
<refsynopsisdiv>
|
|
<synopsis>
|
|
COPY <replaceable class="parameter">table_name</replaceable> [ ( <replaceable class="parameter">column_name</replaceable> [, ...] ) ]
|
|
FROM { '<replaceable class="parameter">filename</replaceable>' | STDIN }
|
|
[ [ WITH ] ( <replaceable class="parameter">option</replaceable> [, ...] ) ]
|
|
|
|
COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable class="parameter">column_name</replaceable> [, ...] ) ] | ( <replaceable class="parameter">query</replaceable> ) }
|
|
TO { '<replaceable class="parameter">filename</replaceable>' | STDOUT }
|
|
[ [ WITH ] ( <replaceable class="parameter">option</replaceable> [, ...] ) ]
|
|
|
|
<phrase>where <replaceable class="parameter">option</replaceable> can be one of:</phrase>
|
|
|
|
FORMAT <replaceable class="parameter">format_name</replaceable>
|
|
OIDS [ <replaceable class="parameter">boolean</replaceable> ]
|
|
DELIMITER '<replaceable class="parameter">delimiter_character</replaceable>'
|
|
NULL '<replaceable class="parameter">null_string</replaceable>'
|
|
HEADER [ <replaceable class="parameter">boolean</replaceable> ]
|
|
QUOTE '<replaceable class="parameter">quote_character</replaceable>'
|
|
ESCAPE '<replaceable class="parameter">escape_character</replaceable>'
|
|
FORCE_QUOTE { ( <replaceable class="parameter">column_name</replaceable> [, ...] ) | * }
|
|
FORCE_NOT_NULL ( <replaceable class="parameter">column_name</replaceable> [, ...] ) |
|
|
ENCODING '<replaceable class="parameter">encoding_name</replaceable>'
|
|
</synopsis>
|
|
</refsynopsisdiv>
|
|
|
|
<refsect1>
|
|
<title>Description</title>
|
|
|
|
<para>
|
|
<command>COPY</command> moves data between
|
|
<productname>PostgreSQL</productname> tables and standard file-system
|
|
files. <command>COPY TO</command> copies the contents of a table
|
|
<emphasis>to</> a file, while <command>COPY FROM</command> copies
|
|
data <emphasis>from</> a file to a table (appending the data to
|
|
whatever is in the table already). <command>COPY TO</command>
|
|
can also copy the results of a <command>SELECT</> query.
|
|
</para>
|
|
|
|
<para>
|
|
If a list of columns is specified, <command>COPY</command> will
|
|
only copy the data in the specified columns to or from the file.
|
|
If there are any columns in the table that are not in the column list,
|
|
<command>COPY FROM</command> will insert the default values for
|
|
those columns.
|
|
</para>
|
|
|
|
<para>
|
|
<command>COPY</command> with a file name instructs the
|
|
<productname>PostgreSQL</productname> server to directly read from
|
|
or write to a file. The file must be accessible to the server and
|
|
the name must be specified from the viewpoint of the server. When
|
|
<literal>STDIN</literal> or <literal>STDOUT</literal> is
|
|
specified, data is transmitted via the connection between the
|
|
client and the server.
|
|
</para>
|
|
</refsect1>
|
|
|
|
<refsect1>
|
|
<title>Parameters</title>
|
|
|
|
<variablelist>
|
|
<varlistentry>
|
|
<term><replaceable class="parameter">table_name</replaceable></term>
|
|
<listitem>
|
|
<para>
|
|
The name (optionally schema-qualified) of an existing table.
|
|
</para>
|
|
</listitem>
|
|
</varlistentry>
|
|
|
|
<varlistentry>
|
|
<term><replaceable class="parameter">column_name</replaceable></term>
|
|
<listitem>
|
|
<para>
|
|
An optional list of columns to be copied. If no column list is
|
|
specified, all columns of the table will be copied.
|
|
</para>
|
|
</listitem>
|
|
</varlistentry>
|
|
|
|
<varlistentry>
|
|
<term><replaceable class="parameter">query</replaceable></term>
|
|
<listitem>
|
|
<para>
|
|
A <xref linkend="sql-select"> or
|
|
<xref linkend="sql-values"> command
|
|
whose results are to be copied.
|
|
Note that parentheses are required around the query.
|
|
</para>
|
|
</listitem>
|
|
</varlistentry>
|
|
|
|
<varlistentry>
|
|
<term><replaceable class="parameter">filename</replaceable></term>
|
|
<listitem>
|
|
<para>
|
|
The absolute path name of the input or output file. Windows users
|
|
might need to use an <literal>E''</> string and double any backslashes
|
|
used in the path name.
|
|
</para>
|
|
</listitem>
|
|
</varlistentry>
|
|
|
|
<varlistentry>
|
|
<term><literal>STDIN</literal></term>
|
|
<listitem>
|
|
<para>
|
|
Specifies that input comes from the client application.
|
|
</para>
|
|
</listitem>
|
|
</varlistentry>
|
|
|
|
<varlistentry>
|
|
<term><literal>STDOUT</literal></term>
|
|
<listitem>
|
|
<para>
|
|
Specifies that output goes to the client application.
|
|
</para>
|
|
</listitem>
|
|
</varlistentry>
|
|
|
|
<varlistentry>
|
|
<term><replaceable class="parameter">boolean</replaceable></term>
|
|
<listitem>
|
|
<para>
|
|
Specifies whether the selected option should be turned on or off.
|
|
You can write <literal>TRUE</literal>, <literal>ON</>, or
|
|
<literal>1</literal> to enable the option, and <literal>FALSE</literal>,
|
|
<literal>OFF</>, or <literal>0</literal> to disable it. The
|
|
<replaceable class="parameter">boolean</replaceable> value can also
|
|
be omitted, in which case <literal>TRUE</literal> is assumed.
|
|
</para>
|
|
</listitem>
|
|
</varlistentry>
|
|
|
|
<varlistentry>
|
|
<term><literal>FORMAT</literal></term>
|
|
<listitem>
|
|
<para>
|
|
Selects the data format to be read or written:
|
|
<literal>text</>,
|
|
<literal>csv</> (Comma Separated Values),
|
|
or <literal>binary</>.
|
|
The default is <literal>text</>.
|
|
</para>
|
|
</listitem>
|
|
</varlistentry>
|
|
|
|
<varlistentry>
|
|
<term><literal>OIDS</literal></term>
|
|
<listitem>
|
|
<para>
|
|
Specifies copying the OID for each row. (An error is raised if
|
|
<literal>OIDS</literal> is specified for a table that does not
|
|
have OIDs, or in the case of copying a <replaceable
|
|
class="parameter">query</replaceable>.)
|
|
</para>
|
|
</listitem>
|
|
</varlistentry>
|
|
|
|
<varlistentry>
|
|
<term><literal>DELIMITER</literal></term>
|
|
<listitem>
|
|
<para>
|
|
Specifies the character that separates columns within each row
|
|
(line) of the file. The default is a tab character in text format,
|
|
a comma in <literal>CSV</> format.
|
|
This must be a single one-byte character.
|
|
This option is not allowed when using <literal>binary</> format.
|
|
</para>
|
|
</listitem>
|
|
</varlistentry>
|
|
|
|
<varlistentry>
|
|
<term><literal>NULL</literal></term>
|
|
<listitem>
|
|
<para>
|
|
Specifies the string that represents a null value. The default is
|
|
<literal>\N</literal> (backslash-N) in text format, and an unquoted empty
|
|
string in <literal>CSV</> format. You might prefer an
|
|
empty string even in text format for cases where you don't want to
|
|
distinguish nulls from empty strings.
|
|
This option is not allowed when using <literal>binary</> format.
|
|
</para>
|
|
|
|
<note>
|
|
<para>
|
|
When using <command>COPY FROM</command>, any data item that matches
|
|
this string will be stored as a null value, so you should make
|
|
sure that you use the same string as you used with
|
|
<command>COPY TO</command>.
|
|
</para>
|
|
</note>
|
|
|
|
</listitem>
|
|
</varlistentry>
|
|
|
|
<varlistentry>
|
|
<term><literal>HEADER</literal></term>
|
|
<listitem>
|
|
<para>
|
|
Specifies that the file contains a header line with the names of each
|
|
column in the file. On output, the first line contains the column
|
|
names from the table, and on input, the first line is ignored.
|
|
This option is allowed only when using <literal>CSV</> format.
|
|
</para>
|
|
</listitem>
|
|
</varlistentry>
|
|
|
|
<varlistentry>
|
|
<term><literal>QUOTE</literal></term>
|
|
<listitem>
|
|
<para>
|
|
Specifies the quoting character to be used when a data value is quoted.
|
|
The default is double-quote.
|
|
This must be a single one-byte character.
|
|
This option is allowed only when using <literal>CSV</> format.
|
|
</para>
|
|
</listitem>
|
|
</varlistentry>
|
|
|
|
<varlistentry>
|
|
<term><literal>ESCAPE</literal></term>
|
|
<listitem>
|
|
<para>
|
|
Specifies the character that should appear before a
|
|
data character that matches the <literal>QUOTE</> value.
|
|
The default is the same as the <literal>QUOTE</> value (so that
|
|
the quoting character is doubled if it appears in the data).
|
|
This must be a single one-byte character.
|
|
This option is allowed only when using <literal>CSV</> format.
|
|
</para>
|
|
</listitem>
|
|
</varlistentry>
|
|
|
|
<varlistentry>
|
|
<term><literal>FORCE_QUOTE</></term>
|
|
<listitem>
|
|
<para>
|
|
Forces quoting to be
|
|
used for all non-<literal>NULL</> values in each specified column.
|
|
<literal>NULL</> output is never quoted. If <literal>*</> is specified,
|
|
non-<literal>NULL</> values will be quoted in all columns.
|
|
This option is allowed only in <command>COPY TO</>, and only when
|
|
using <literal>CSV</> format.
|
|
</para>
|
|
</listitem>
|
|
</varlistentry>
|
|
|
|
<varlistentry>
|
|
<term><literal>FORCE_NOT_NULL</></term>
|
|
<listitem>
|
|
<para>
|
|
Do not match the specified columns' values against the null string.
|
|
In the default case where the null string is empty, this means that
|
|
empty values will be read as zero-length strings rather than nulls,
|
|
even when they are not quoted.
|
|
This option is allowed only in <command>COPY FROM</>, and only when
|
|
using <literal>CSV</> format.
|
|
</para>
|
|
</listitem>
|
|
</varlistentry>
|
|
|
|
<varlistentry>
|
|
<term><literal>ENCODING</></term>
|
|
<listitem>
|
|
<para>
|
|
Specifies that the file is encoded in the <replaceable
|
|
class="parameter">encoding_name</replaceable>. If this option is
|
|
omitted, the current client encoding is used. See the Notes below
|
|
for more details.
|
|
</para>
|
|
</listitem>
|
|
</varlistentry>
|
|
|
|
</variablelist>
|
|
</refsect1>
|
|
|
|
<refsect1>
|
|
<title>Outputs</title>
|
|
|
|
<para>
|
|
On successful completion, a <command>COPY</> command returns a command
|
|
tag of the form
|
|
<screen>
|
|
COPY <replaceable class="parameter">count</replaceable>
|
|
</screen>
|
|
The <replaceable class="parameter">count</replaceable> is the number
|
|
of rows copied.
|
|
</para>
|
|
</refsect1>
|
|
|
|
<refsect1>
|
|
<title>Notes</title>
|
|
|
|
<para>
|
|
<command>COPY</command> can only be used with plain tables, not
|
|
with views. However, you can write <literal>COPY (SELECT * FROM
|
|
<replaceable class="parameter">viewname</replaceable>) TO ...</literal>.
|
|
</para>
|
|
|
|
<para>
|
|
<command>COPY</command> only deals with the specific table named;
|
|
it does not copy data to or from child tables. Thus for example
|
|
<literal>COPY <replaceable class="parameter">table</> TO</literal>
|
|
shows the same data as <literal>SELECT * FROM ONLY <replaceable
|
|
class="parameter">table</></literal>. But <literal>COPY
|
|
(SELECT * FROM <replaceable class="parameter">table</>) TO ...</literal>
|
|
can be used to dump all of the data in an inheritance hierarchy.
|
|
</para>
|
|
|
|
<para>
|
|
You must have select privilege on the table
|
|
whose values are read by <command>COPY TO</command>, and
|
|
insert privilege on the table into which values
|
|
are inserted by <command>COPY FROM</command>. It is sufficient
|
|
to have column privileges on the column(s) listed in the command.
|
|
</para>
|
|
|
|
<para>
|
|
Files named in a <command>COPY</command> command are read or written
|
|
directly by the server, not by the client application. Therefore,
|
|
they must reside on or be accessible to the database server machine,
|
|
not the client. They must be accessible to and readable or writable
|
|
by the <productname>PostgreSQL</productname> user (the user ID the
|
|
server runs as), not the client. <command>COPY</command> naming a
|
|
file is only allowed to database superusers, since it allows reading
|
|
or writing any file that the server has privileges to access.
|
|
</para>
|
|
|
|
<para>
|
|
Do not confuse <command>COPY</command> with the
|
|
<application>psql</application> instruction
|
|
<command><link linkend="APP-PSQL-meta-commands-copy">\copy</link></command>. <command>\copy</command> invokes
|
|
<command>COPY FROM STDIN</command> or <command>COPY TO
|
|
STDOUT</command>, and then fetches/stores the data in a file
|
|
accessible to the <application>psql</application> client. Thus,
|
|
file accessibility and access rights depend on the client rather
|
|
than the server when <command>\copy</command> is used.
|
|
</para>
|
|
|
|
<para>
|
|
It is recommended that the file name used in <command>COPY</command>
|
|
always be specified as an absolute path. This is enforced by the
|
|
server in the case of <command>COPY TO</command>, but for
|
|
<command>COPY FROM</command> you do have the option of reading from
|
|
a file specified by a relative path. The path will be interpreted
|
|
relative to the working directory of the server process (normally
|
|
the cluster's data directory), not the client's working directory.
|
|
</para>
|
|
|
|
<para>
|
|
<command>COPY FROM</command> will invoke any triggers and check
|
|
constraints on the destination table. However, it will not invoke rules.
|
|
</para>
|
|
|
|
<para>
|
|
<command>COPY</command> input and output is affected by
|
|
<varname>DateStyle</varname>. To ensure portability to other
|
|
<productname>PostgreSQL</productname> installations that might use
|
|
non-default <varname>DateStyle</varname> settings,
|
|
<varname>DateStyle</varname> should be set to <literal>ISO</> before
|
|
using <command>COPY TO</>. It is also a good idea to avoid dumping
|
|
data with <varname>IntervalStyle</varname> set to
|
|
<literal>sql_standard</>, because negative interval values might be
|
|
misinterpreted by a server that has a different setting for
|
|
<varname>IntervalStyle</varname>.
|
|
</para>
|
|
|
|
<para>
|
|
Input data is interpreted according to <literal>ENCODING</literal>
|
|
option or the current client encoding, and output data is encoded
|
|
in <literal>ENCODING</literal> or the current client encoding, even
|
|
if the data does not pass through the client but is read from or
|
|
written to a file directly by the server.
|
|
</para>
|
|
|
|
<para>
|
|
<command>COPY</command> stops operation at the first error. This
|
|
should not lead to problems in the event of a <command>COPY
|
|
TO</command>, but the target table will already have received
|
|
earlier rows in a <command>COPY FROM</command>. These rows will not
|
|
be visible or accessible, but they still occupy disk space. This might
|
|
amount to a considerable amount of wasted disk space if the failure
|
|
happened well into a large copy operation. You might wish to invoke
|
|
<command>VACUUM</command> to recover the wasted space.
|
|
</para>
|
|
|
|
</refsect1>
|
|
|
|
<refsect1>
|
|
<title>File Formats</title>
|
|
|
|
<refsect2>
|
|
<title>Text Format</title>
|
|
|
|
<para>
|
|
When the <literal>text</> format is used,
|
|
the data read or written is a text file with one line per table row.
|
|
Columns in a row are separated by the delimiter character.
|
|
The column values themselves are strings generated by the
|
|
output function, or acceptable to the input function, of each
|
|
attribute's data type. The specified null string is used in
|
|
place of columns that are null.
|
|
<command>COPY FROM</command> will raise an error if any line of the
|
|
input file contains more or fewer columns than are expected.
|
|
If <literal>OIDS</literal> is specified, the OID is read or written as the first column,
|
|
preceding the user data columns.
|
|
</para>
|
|
|
|
<para>
|
|
End of data can be represented by a single line containing just
|
|
backslash-period (<literal>\.</>). An end-of-data marker is
|
|
not necessary when reading from a file, since the end of file
|
|
serves perfectly well; it is needed only when copying data to or from
|
|
client applications using pre-3.0 client protocol.
|
|
</para>
|
|
|
|
<para>
|
|
Backslash characters (<literal>\</>) can be used in the
|
|
<command>COPY</command> data to quote data characters that might
|
|
otherwise be taken as row or column delimiters. In particular, the
|
|
following characters <emphasis>must</> be preceded by a backslash if
|
|
they appear as part of a column value: backslash itself,
|
|
newline, carriage return, and the current delimiter character.
|
|
</para>
|
|
|
|
<para>
|
|
The specified null string is sent by <command>COPY TO</command> without
|
|
adding any backslashes; conversely, <command>COPY FROM</command> matches
|
|
the input against the null string before removing backslashes. Therefore,
|
|
a null string such as <literal>\N</literal> cannot be confused with
|
|
the actual data value <literal>\N</literal> (which would be represented
|
|
as <literal>\\N</literal>).
|
|
</para>
|
|
|
|
<para>
|
|
The following special backslash sequences are recognized by
|
|
<command>COPY FROM</command>:
|
|
|
|
<informaltable>
|
|
<tgroup cols="2">
|
|
<thead>
|
|
<row>
|
|
<entry>Sequence</entry>
|
|
<entry>Represents</entry>
|
|
</row>
|
|
</thead>
|
|
|
|
<tbody>
|
|
<row>
|
|
<entry><literal>\b</></entry>
|
|
<entry>Backspace (ASCII 8)</entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>\f</></entry>
|
|
<entry>Form feed (ASCII 12)</entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>\n</></entry>
|
|
<entry>Newline (ASCII 10)</entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>\r</></entry>
|
|
<entry>Carriage return (ASCII 13)</entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>\t</></entry>
|
|
<entry>Tab (ASCII 9)</entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>\v</></entry>
|
|
<entry>Vertical tab (ASCII 11)</entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>\</><replaceable>digits</></entry>
|
|
<entry>Backslash followed by one to three octal digits specifies
|
|
the character with that numeric code</entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>\x</><replaceable>digits</></entry>
|
|
<entry>Backslash <literal>x</> followed by one or two hex digits specifies
|
|
the character with that numeric code</entry>
|
|
</row>
|
|
</tbody>
|
|
</tgroup>
|
|
</informaltable>
|
|
|
|
Presently, <command>COPY TO</command> will never emit an octal or
|
|
hex-digits backslash sequence, but it does use the other sequences
|
|
listed above for those control characters.
|
|
</para>
|
|
|
|
<para>
|
|
Any other backslashed character that is not mentioned in the above table
|
|
will be taken to represent itself. However, beware of adding backslashes
|
|
unnecessarily, since that might accidentally produce a string matching the
|
|
end-of-data marker (<literal>\.</>) or the null string (<literal>\N</> by
|
|
default). These strings will be recognized before any other backslash
|
|
processing is done.
|
|
</para>
|
|
|
|
<para>
|
|
It is strongly recommended that applications generating <command>COPY</command> data convert
|
|
data newlines and carriage returns to the <literal>\n</> and
|
|
<literal>\r</> sequences respectively. At present it is
|
|
possible to represent a data carriage return by a backslash and carriage
|
|
return, and to represent a data newline by a backslash and newline.
|
|
However, these representations might not be accepted in future releases.
|
|
They are also highly vulnerable to corruption if the <command>COPY</command> file is
|
|
transferred across different machines (for example, from Unix to Windows
|
|
or vice versa).
|
|
</para>
|
|
|
|
<para>
|
|
<command>COPY TO</command> will terminate each row with a Unix-style
|
|
newline (<quote><literal>\n</></>). Servers running on Microsoft Windows instead
|
|
output carriage return/newline (<quote><literal>\r\n</></>), but only for
|
|
<command>COPY</> to a server file; for consistency across platforms,
|
|
<command>COPY TO STDOUT</> always sends <quote><literal>\n</></>
|
|
regardless of server platform.
|
|
<command>COPY FROM</command> can handle lines ending with newlines,
|
|
carriage returns, or carriage return/newlines. To reduce the risk of
|
|
error due to un-backslashed newlines or carriage returns that were
|
|
meant as data, <command>COPY FROM</command> will complain if the line
|
|
endings in the input are not all alike.
|
|
</para>
|
|
</refsect2>
|
|
|
|
<refsect2>
|
|
<title>CSV Format</title>
|
|
|
|
<para>
|
|
This format option is used for importing and exporting the Comma
|
|
Separated Value (<literal>CSV</>) file format used by many other
|
|
programs, such as spreadsheets. Instead of the escaping rules used by
|
|
<productname>PostgreSQL</productname>'s standard text format, it
|
|
produces and recognizes the common CSV escaping mechanism.
|
|
</para>
|
|
|
|
<para>
|
|
The values in each record are separated by the <literal>DELIMITER</>
|
|
character. If the value contains the delimiter character, the
|
|
<literal>QUOTE</> character, the <literal>NULL</> string, a carriage
|
|
return, or line feed character, then the whole value is prefixed and
|
|
suffixed by the <literal>QUOTE</> character, and any occurrence
|
|
within the value of a <literal>QUOTE</> character or the
|
|
<literal>ESCAPE</> character is preceded by the escape character.
|
|
You can also use <literal>FORCE_QUOTE</> to force quotes when outputting
|
|
non-<literal>NULL</> values in specific columns.
|
|
</para>
|
|
|
|
<para>
|
|
The <literal>CSV</> format has no standard way to distinguish a
|
|
<literal>NULL</> value from an empty string.
|
|
<productname>PostgreSQL</>'s <command>COPY</> handles this by quoting.
|
|
A <literal>NULL</> is output as the <literal>NULL</> parameter string
|
|
and is not quoted, while a non-<literal>NULL</> value matching the
|
|
<literal>NULL</> parameter string is quoted. For example, with the
|
|
default settings, a <literal>NULL</> is written as an unquoted empty
|
|
string, while an empty string data value is written with double quotes
|
|
(<literal>""</>). Reading values follows similar rules. You can
|
|
use <literal>FORCE_NOT_NULL</> to prevent <literal>NULL</> input
|
|
comparisons for specific columns.
|
|
</para>
|
|
|
|
<para>
|
|
Because backslash is not a special character in the <literal>CSV</>
|
|
format, <literal>\.</>, the end-of-data marker, could also appear
|
|
as a data value. To avoid any misinterpretation, a <literal>\.</>
|
|
data value appearing as a lone entry on a line is automatically
|
|
quoted on output, and on input, if quoted, is not interpreted as the
|
|
end-of-data marker. If you are loading a file created by another
|
|
application that has a single unquoted column and might have a
|
|
value of <literal>\.</>, you might need to quote that value in the
|
|
input file.
|
|
</para>
|
|
|
|
<note>
|
|
<para>
|
|
In <literal>CSV</> format, all characters are significant. A quoted value
|
|
surrounded by white space, or any characters other than
|
|
<literal>DELIMITER</>, will include those characters. This can cause
|
|
errors if you import data from a system that pads <literal>CSV</>
|
|
lines with white space out to some fixed width. If such a situation
|
|
arises you might need to preprocess the <literal>CSV</> file to remove
|
|
the trailing white space, before importing the data into
|
|
<productname>PostgreSQL</>.
|
|
</para>
|
|
</note>
|
|
|
|
<note>
|
|
<para>
|
|
CSV format will both recognize and produce CSV files with quoted
|
|
values containing embedded carriage returns and line feeds. Thus
|
|
the files are not strictly one line per table row like text-format
|
|
files.
|
|
</para>
|
|
</note>
|
|
|
|
<note>
|
|
<para>
|
|
Many programs produce strange and occasionally perverse CSV files,
|
|
so the file format is more a convention than a standard. Thus you
|
|
might encounter some files that cannot be imported using this
|
|
mechanism, and <command>COPY</> might produce files that other
|
|
programs cannot process.
|
|
</para>
|
|
</note>
|
|
|
|
</refsect2>
|
|
|
|
<refsect2>
|
|
<title>Binary Format</title>
|
|
|
|
<para>
|
|
The <literal>binary</literal> format option causes all data to be
|
|
stored/read as binary format rather than as text. It is
|
|
somewhat faster than the text and <literal>CSV</> formats,
|
|
but a binary-format file is less portable across machine architectures and
|
|
<productname>PostgreSQL</productname> versions.
|
|
Also, the binary format is very data type specific; for example
|
|
it will not work to output binary data from a <type>smallint</> column
|
|
and read it into an <type>integer</> column, even though that would work
|
|
fine in text format.
|
|
</para>
|
|
|
|
<para>
|
|
The <literal>binary</> file format consists
|
|
of a file header, zero or more tuples containing the row data, and
|
|
a file trailer. Headers and data are in network byte order.
|
|
</para>
|
|
|
|
<note>
|
|
<para>
|
|
<productname>PostgreSQL</productname> releases before 7.4 used a
|
|
different binary file format.
|
|
</para>
|
|
</note>
|
|
|
|
<refsect3>
|
|
<title>File Header</title>
|
|
|
|
<para>
|
|
The file header consists of 15 bytes of fixed fields, followed
|
|
by a variable-length header extension area. The fixed fields are:
|
|
|
|
<variablelist>
|
|
<varlistentry>
|
|
<term>Signature</term>
|
|
<listitem>
|
|
<para>
|
|
11-byte sequence <literal>PGCOPY\n\377\r\n\0</> — note that the zero byte
|
|
is a required part of the signature. (The signature is designed to allow
|
|
easy identification of files that have been munged by a non-8-bit-clean
|
|
transfer. This signature will be changed by end-of-line-translation
|
|
filters, dropped zero bytes, dropped high bits, or parity changes.)
|
|
</para>
|
|
</listitem>
|
|
</varlistentry>
|
|
|
|
<varlistentry>
|
|
<term>Flags field</term>
|
|
<listitem>
|
|
<para>
|
|
32-bit integer bit mask to denote important aspects of the file format. Bits
|
|
are numbered from 0 (<acronym>LSB</>) to 31 (<acronym>MSB</>). Note that
|
|
this field is stored in network byte order (most significant byte first),
|
|
as are all the integer fields used in the file format. Bits
|
|
16-31 are reserved to denote critical file format issues; a reader
|
|
should abort if it finds an unexpected bit set in this range. Bits 0-15
|
|
are reserved to signal backwards-compatible format issues; a reader
|
|
should simply ignore any unexpected bits set in this range. Currently
|
|
only one flag bit is defined, and the rest must be zero:
|
|
<variablelist>
|
|
<varlistentry>
|
|
<term>Bit 16</term>
|
|
<listitem>
|
|
<para>
|
|
if 1, OIDs are included in the data; if 0, not
|
|
</para>
|
|
</listitem>
|
|
</varlistentry>
|
|
</variablelist></para>
|
|
</listitem>
|
|
</varlistentry>
|
|
|
|
<varlistentry>
|
|
<term>Header extension area length</term>
|
|
<listitem>
|
|
<para>
|
|
32-bit integer, length in bytes of remainder of header, not including self.
|
|
Currently, this is zero, and the first tuple follows
|
|
immediately. Future changes to the format might allow additional data
|
|
to be present in the header. A reader should silently skip over any header
|
|
extension data it does not know what to do with.
|
|
</para>
|
|
</listitem>
|
|
</varlistentry>
|
|
</variablelist>
|
|
</para>
|
|
|
|
<para>
|
|
The header extension area is envisioned to contain a sequence of
|
|
self-identifying chunks. The flags field is not intended to tell readers
|
|
what is in the extension area. Specific design of header extension contents
|
|
is left for a later release.
|
|
</para>
|
|
|
|
<para>
|
|
This design allows for both backwards-compatible header additions (add
|
|
header extension chunks, or set low-order flag bits) and
|
|
non-backwards-compatible changes (set high-order flag bits to signal such
|
|
changes, and add supporting data to the extension area if needed).
|
|
</para>
|
|
</refsect3>
|
|
|
|
<refsect3>
|
|
<title>Tuples</title>
|
|
<para>
|
|
Each tuple begins with a 16-bit integer count of the number of fields in the
|
|
tuple. (Presently, all tuples in a table will have the same count, but that
|
|
might not always be true.) Then, repeated for each field in the tuple, there
|
|
is a 32-bit length word followed by that many bytes of field data. (The
|
|
length word does not include itself, and can be zero.) As a special case,
|
|
-1 indicates a NULL field value. No value bytes follow in the NULL case.
|
|
</para>
|
|
|
|
<para>
|
|
There is no alignment padding or any other extra data between fields.
|
|
</para>
|
|
|
|
<para>
|
|
Presently, all data values in a binary-format file are
|
|
assumed to be in binary format (format code one). It is anticipated that a
|
|
future extension might add a header field that allows per-column format codes
|
|
to be specified.
|
|
</para>
|
|
|
|
<para>
|
|
To determine the appropriate binary format for the actual tuple data you
|
|
should consult the <productname>PostgreSQL</productname> source, in
|
|
particular the <function>*send</> and <function>*recv</> functions for
|
|
each column's data type (typically these functions are found in the
|
|
<filename>src/backend/utils/adt/</filename> directory of the source
|
|
distribution).
|
|
</para>
|
|
|
|
<para>
|
|
If OIDs are included in the file, the OID field immediately follows the
|
|
field-count word. It is a normal field except that it's not included
|
|
in the field-count. In particular it has a length word — this will allow
|
|
handling of 4-byte vs. 8-byte OIDs without too much pain, and will allow
|
|
OIDs to be shown as null if that ever proves desirable.
|
|
</para>
|
|
</refsect3>
|
|
|
|
<refsect3>
|
|
<title>File Trailer</title>
|
|
|
|
<para>
|
|
The file trailer consists of a 16-bit integer word containing -1. This
|
|
is easily distinguished from a tuple's field-count word.
|
|
</para>
|
|
|
|
<para>
|
|
A reader should report an error if a field-count word is neither -1
|
|
nor the expected number of columns. This provides an extra
|
|
check against somehow getting out of sync with the data.
|
|
</para>
|
|
</refsect3>
|
|
</refsect2>
|
|
</refsect1>
|
|
|
|
<refsect1>
|
|
<title>Examples</title>
|
|
|
|
<para>
|
|
The following example copies a table to the client
|
|
using the vertical bar (<literal>|</literal>) as the field delimiter:
|
|
<programlisting>
|
|
COPY country TO STDOUT (DELIMITER '|');
|
|
</programlisting>
|
|
</para>
|
|
|
|
<para>
|
|
To copy data from a file into the <literal>country</> table:
|
|
<programlisting>
|
|
COPY country FROM '/usr1/proj/bray/sql/country_data';
|
|
</programlisting>
|
|
</para>
|
|
|
|
<para>
|
|
To copy into a file just the countries whose names start with 'A':
|
|
<programlisting>
|
|
COPY (SELECT * FROM country WHERE country_name LIKE 'A%') TO '/usr1/proj/bray/sql/a_list_countries.copy';
|
|
</programlisting>
|
|
</para>
|
|
|
|
<para>
|
|
Here is a sample of data suitable for copying into a table from
|
|
<literal>STDIN</literal>:
|
|
<programlisting>
|
|
AF AFGHANISTAN
|
|
AL ALBANIA
|
|
DZ ALGERIA
|
|
ZM ZAMBIA
|
|
ZW ZIMBABWE
|
|
</programlisting>
|
|
Note that the white space on each line is actually a tab character.
|
|
</para>
|
|
|
|
<para>
|
|
The following is the same data, output in binary format.
|
|
The data is shown after filtering through the
|
|
Unix utility <command>od -c</command>. The table has three columns;
|
|
the first has type <type>char(2)</type>, the second has type <type>text</type>,
|
|
and the third has type <type>integer</type>. All the rows have a null value
|
|
in the third column.
|
|
<programlisting>
|
|
0000000 P G C O P Y \n 377 \r \n \0 \0 \0 \0 \0 \0
|
|
0000020 \0 \0 \0 \0 003 \0 \0 \0 002 A F \0 \0 \0 013 A
|
|
0000040 F G H A N I S T A N 377 377 377 377 \0 003
|
|
0000060 \0 \0 \0 002 A L \0 \0 \0 007 A L B A N I
|
|
0000100 A 377 377 377 377 \0 003 \0 \0 \0 002 D Z \0 \0 \0
|
|
0000120 007 A L G E R I A 377 377 377 377 \0 003 \0 \0
|
|
0000140 \0 002 Z M \0 \0 \0 006 Z A M B I A 377 377
|
|
0000160 377 377 \0 003 \0 \0 \0 002 Z W \0 \0 \0 \b Z I
|
|
0000200 M B A B W E 377 377 377 377 377 377
|
|
</programlisting></para>
|
|
</refsect1>
|
|
|
|
<refsect1>
|
|
<title>Compatibility</title>
|
|
|
|
<para>
|
|
There is no <command>COPY</command> statement in the SQL standard.
|
|
</para>
|
|
|
|
<para>
|
|
The following syntax was used before <productname>PostgreSQL</>
|
|
version 9.0 and is still supported:
|
|
|
|
<synopsis>
|
|
COPY <replaceable class="parameter">table_name</replaceable> [ ( <replaceable class="parameter">column_name</replaceable> [, ...] ) ]
|
|
FROM { '<replaceable class="parameter">filename</replaceable>' | STDIN }
|
|
[ [ WITH ]
|
|
[ BINARY ]
|
|
[ OIDS ]
|
|
[ DELIMITER [ AS ] '<replaceable class="parameter">delimiter</replaceable>' ]
|
|
[ NULL [ AS ] '<replaceable class="parameter">null string</replaceable>' ]
|
|
[ CSV [ HEADER ]
|
|
[ QUOTE [ AS ] '<replaceable class="parameter">quote</replaceable>' ]
|
|
[ ESCAPE [ AS ] '<replaceable class="parameter">escape</replaceable>' ]
|
|
[ FORCE NOT NULL <replaceable class="parameter">column_name</replaceable> [, ...] ] ] ]
|
|
|
|
COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable class="parameter">column_name</replaceable> [, ...] ) ] | ( <replaceable class="parameter">query</replaceable> ) }
|
|
TO { '<replaceable class="parameter">filename</replaceable>' | STDOUT }
|
|
[ [ WITH ]
|
|
[ BINARY ]
|
|
[ OIDS ]
|
|
[ DELIMITER [ AS ] '<replaceable class="parameter">delimiter</replaceable>' ]
|
|
[ NULL [ AS ] '<replaceable class="parameter">null string</replaceable>' ]
|
|
[ CSV [ HEADER ]
|
|
[ QUOTE [ AS ] '<replaceable class="parameter">quote</replaceable>' ]
|
|
[ ESCAPE [ AS ] '<replaceable class="parameter">escape</replaceable>' ]
|
|
[ FORCE QUOTE { <replaceable class="parameter">column_name</replaceable> [, ...] | * } ] ] ]
|
|
</synopsis>
|
|
|
|
Note that in this syntax, <literal>BINARY</> and <literal>CSV</> are
|
|
treated as independent keywords, not as arguments of a <literal>FORMAT</>
|
|
option.
|
|
</para>
|
|
|
|
<para>
|
|
The following syntax was used before <productname>PostgreSQL</>
|
|
version 7.3 and is still supported:
|
|
|
|
<synopsis>
|
|
COPY [ BINARY ] <replaceable class="parameter">table_name</replaceable> [ WITH OIDS ]
|
|
FROM { '<replaceable class="parameter">filename</replaceable>' | STDIN }
|
|
[ [USING] DELIMITERS '<replaceable class="parameter">delimiter</replaceable>' ]
|
|
[ WITH NULL AS '<replaceable class="parameter">null string</replaceable>' ]
|
|
|
|
COPY [ BINARY ] <replaceable class="parameter">table_name</replaceable> [ WITH OIDS ]
|
|
TO { '<replaceable class="parameter">filename</replaceable>' | STDOUT }
|
|
[ [USING] DELIMITERS '<replaceable class="parameter">delimiter</replaceable>' ]
|
|
[ WITH NULL AS '<replaceable class="parameter">null string</replaceable>' ]
|
|
</synopsis></para>
|
|
</refsect1>
|
|
</refentry>
|