1996-07-09 08:22:35 +02:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
1999-02-14 00:22:53 +01:00
|
|
|
* copy.c
|
2006-08-31 02:35:32 +02:00
|
|
|
* Implements the COPY utility command
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
2024-01-04 02:49:05 +01:00
|
|
|
* Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
|
2000-01-26 06:58:53 +01:00
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
2010-09-20 22:08:53 +02:00
|
|
|
* src/backend/commands/copy.c
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
2000-12-02 21:49:24 +01:00
|
|
|
#include "postgres.h"
|
1996-08-27 09:42:29 +02:00
|
|
|
|
2005-09-01 17:34:31 +02:00
|
|
|
#include <ctype.h>
|
1996-11-06 09:21:43 +01:00
|
|
|
#include <unistd.h>
|
1999-07-16 07:00:38 +02:00
|
|
|
#include <sys/stat.h>
|
1996-11-06 09:21:43 +01:00
|
|
|
|
2010-07-22 02:47:59 +02:00
|
|
|
#include "access/sysattr.h"
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
#include "access/table.h"
|
2006-07-13 18:49:20 +02:00
|
|
|
#include "access/xact.h"
|
2018-04-06 20:47:10 +02:00
|
|
|
#include "catalog/pg_authid.h"
|
1999-07-16 01:04:24 +02:00
|
|
|
#include "commands/copy.h"
|
2009-09-21 22:10:21 +02:00
|
|
|
#include "commands/defrem.h"
|
1999-07-16 07:00:38 +02:00
|
|
|
#include "executor/executor.h"
|
2002-09-20 05:52:50 +02:00
|
|
|
#include "mb/pg_wchar.h"
|
1999-07-16 07:00:38 +02:00
|
|
|
#include "miscadmin.h"
|
Row-Level Security Policies (RLS)
Building on the updatable security-barrier views work, add the
ability to define policies on tables to limit the set of rows
which are returned from a query and which are allowed to be added
to a table. Expressions defined by the policy for filtering are
added to the security barrier quals of the query, while expressions
defined to check records being added to a table are added to the
with-check options of the query.
New top-level commands are CREATE/ALTER/DROP POLICY and are
controlled by the table owner. Row Security is able to be enabled
and disabled by the owner on a per-table basis using
ALTER TABLE .. ENABLE/DISABLE ROW SECURITY.
Per discussion, ROW SECURITY is disabled on tables by default and
must be enabled for policies on the table to be used. If no
policies exist on a table with ROW SECURITY enabled, a default-deny
policy is used and no records will be visible.
By default, row security is applied at all times except for the
table owner and the superuser. A new GUC, row_security, is added
which can be set to ON, OFF, or FORCE. When set to FORCE, row
security will be applied even for the table owner and superusers.
When set to OFF, row security will be disabled when allowed and an
error will be thrown if the user does not have rights to bypass row
security.
Per discussion, pg_dump sets row_security = OFF by default to ensure
that exports and backups will have all data in the table or will
error if there are insufficient privileges to bypass row security.
A new option has been added to pg_dump, --enable-row-security, to
ask pg_dump to export with row security enabled.
A new role capability, BYPASSRLS, which can only be set by the
superuser, is added to allow other users to be able to bypass row
security using row_security = OFF.
Many thanks to the various individuals who have helped with the
design, particularly Robert Haas for his feedback.
Authors include Craig Ringer, KaiGai Kohei, Adam Brightwell, Dean
Rasheed, with additional changes and rework by me.
Reviewers have included all of the above, Greg Smith,
Jeff McCormick, and Robert Haas.
2014-09-19 17:18:35 +02:00
|
|
|
#include "nodes/makefuncs.h"
|
2019-11-12 04:00:16 +01:00
|
|
|
#include "optimizer/optimizer.h"
|
2019-01-19 23:48:16 +01:00
|
|
|
#include "parser/parse_coerce.h"
|
|
|
|
#include "parser/parse_collate.h"
|
|
|
|
#include "parser/parse_expr.h"
|
2017-04-18 05:22:04 +02:00
|
|
|
#include "parser/parse_relation.h"
|
2020-03-10 10:22:52 +01:00
|
|
|
#include "utils/acl.h"
|
1999-07-16 07:00:38 +02:00
|
|
|
#include "utils/builtins.h"
|
2002-08-22 02:01:51 +02:00
|
|
|
#include "utils/lsyscache.h"
|
2011-02-23 18:18:09 +01:00
|
|
|
#include "utils/rel.h"
|
Fix column-privilege leak in error-message paths
While building error messages to return to the user,
BuildIndexValueDescription, ExecBuildSlotValueDescription and
ri_ReportViolation would happily include the entire key or entire row in
the result returned to the user, even if the user didn't have access to
view all of the columns being included.
Instead, include only those columns which the user is providing or which
the user has select rights on. If the user does not have any rights
to view the table or any of the columns involved then no detail is
provided and a NULL value is returned from BuildIndexValueDescription
and ExecBuildSlotValueDescription. Note that, for key cases, the user
must have access to all of the columns for the key to be shown; a
partial key will not be returned.
Further, in master only, do not return any data for cases where row
security is enabled on the relation and row security should be applied
for the user. This required a bit of refactoring and moving of things
around related to RLS- note the addition of utils/misc/rls.c.
Back-patch all the way, as column-level privileges are now in all
supported versions.
This has been assigned CVE-2014-8161, but since the issue and the patch
have already been publicized on pgsql-hackers, there's no point in trying
to hide this commit.
2015-01-12 23:04:11 +01:00
|
|
|
#include "utils/rls.h"
|
1999-01-11 04:56:11 +01:00
|
|
|
|
1996-11-02 03:01:48 +01:00
|
|
|
/*
|
2006-08-31 05:17:50 +02:00
|
|
|
* DoCopy executes the SQL COPY statement
|
2001-08-10 20:57:42 +02:00
|
|
|
*
|
2002-03-29 20:06:29 +01:00
|
|
|
* Either unload or reload contents of table <relation>, depending on <from>.
|
2017-08-16 06:22:32 +02:00
|
|
|
* (<from> = true means we are inserting into the table.) In the "TO" case
|
2015-11-27 17:11:22 +01:00
|
|
|
* we also support copying the output of an arbitrary SELECT, INSERT, UPDATE
|
|
|
|
* or DELETE query.
|
2001-08-10 20:57:42 +02:00
|
|
|
*
|
|
|
|
* If <pipe> is false, transfer is between the table and the file named
|
|
|
|
* <filename>. Otherwise, transfer is between the table and our regular
|
|
|
|
* input/output stream. The latter could be either stdin/stdout or a
|
|
|
|
* socket, depending on whether we're running under Postmaster control.
|
|
|
|
*
|
2019-06-17 09:13:16 +02:00
|
|
|
* Do not allow a Postgres user without the 'pg_read_server_files' or
|
|
|
|
* 'pg_write_server_files' role to read from or write to a file.
|
2011-02-16 03:19:11 +01:00
|
|
|
*
|
|
|
|
* Do not allow the copy if user doesn't have proper permission to access
|
|
|
|
* the table or the specifically requested columns.
|
|
|
|
*/
|
Change representation of statement lists, and add statement location info.
This patch makes several changes that improve the consistency of
representation of lists of statements. It's always been the case
that the output of parse analysis is a list of Query nodes, whatever
the types of the individual statements in the list. This patch brings
similar consistency to the outputs of raw parsing and planning steps:
* The output of raw parsing is now always a list of RawStmt nodes;
the statement-type-dependent nodes are one level down from that.
* The output of pg_plan_queries() is now always a list of PlannedStmt
nodes, even for utility statements. In the case of a utility statement,
"planning" just consists of wrapping a CMD_UTILITY PlannedStmt around
the utility node. This list representation is now used in Portal and
CachedPlan plan lists, replacing the former convention of intermixing
PlannedStmts with bare utility-statement nodes.
Now, every list of statements has a consistent head-node type depending
on how far along it is in processing. This allows changing many places
that formerly used generic "Node *" pointers to use a more specific
pointer type, thus reducing the number of IsA() tests and casts needed,
as well as improving code clarity.
Also, the post-parse-analysis representation of DECLARE CURSOR is changed
so that it looks more like EXPLAIN, PREPARE, etc. That is, the contained
SELECT remains a child of the DeclareCursorStmt rather than getting flipped
around to be the other way. It's now true for both Query and PlannedStmt
that utilityStmt is non-null if and only if commandType is CMD_UTILITY.
That allows simplifying a lot of places that were testing both fields.
(I think some of those were just defensive programming, but in many places,
it was actually necessary to avoid confusing DECLARE CURSOR with SELECT.)
Because PlannedStmt carries a canSetTag field, we're also able to get rid
of some ad-hoc rules about how to reconstruct canSetTag for a bare utility
statement; specifically, the assumption that a utility is canSetTag if and
only if it's the only one in its list. While I see no near-term need for
relaxing that restriction, it's nice to get rid of the ad-hocery.
The API of ProcessUtility() is changed so that what it's passed is the
wrapper PlannedStmt not just the bare utility statement. This will affect
all users of ProcessUtility_hook, but the changes are pretty trivial; see
the affected contrib modules for examples of the minimum change needed.
(Most compilers should give pointer-type-mismatch warnings for uncorrected
code.)
There's also a change in the API of ExplainOneQuery_hook, to pass through
cursorOptions instead of expecting hook functions to know what to pick.
This is needed because of the DECLARE CURSOR changes, but really should
have been done in 9.6; it's unlikely that any extant hook functions
know about using CURSOR_OPT_PARALLEL_OK.
Finally, teach gram.y to save statement boundary locations in RawStmt
nodes, and pass those through to Query and PlannedStmt nodes. This allows
more intelligent handling of cases where a source query string contains
multiple statements. This patch doesn't actually do anything with the
information, but a follow-on patch will. (Passing this information through
cleanly is the true motivation for these changes; while I think this is all
good cleanup, it's unlikely we'd have bothered without this end goal.)
catversion bump because addition of location fields to struct Query
affects stored rules.
This patch is by me, but it owes a good deal to Fabien Coelho who did
a lot of preliminary work on the problem, and also reviewed the patch.
Discussion: https://postgr.es/m/alpine.DEB.2.20.1612200926310.29821@lancre
2017-01-14 22:02:35 +01:00
|
|
|
void
|
|
|
|
DoCopy(ParseState *pstate, const CopyStmt *stmt,
|
|
|
|
int stmt_location, int stmt_len,
|
|
|
|
uint64 *processed)
|
2011-02-16 03:19:11 +01:00
|
|
|
{
|
|
|
|
bool is_from = stmt->is_from;
|
|
|
|
bool pipe = (stmt->filename == NULL);
|
|
|
|
Relation rel;
|
2012-12-29 13:55:37 +01:00
|
|
|
Oid relid;
|
Change representation of statement lists, and add statement location info.
This patch makes several changes that improve the consistency of
representation of lists of statements. It's always been the case
that the output of parse analysis is a list of Query nodes, whatever
the types of the individual statements in the list. This patch brings
similar consistency to the outputs of raw parsing and planning steps:
* The output of raw parsing is now always a list of RawStmt nodes;
the statement-type-dependent nodes are one level down from that.
* The output of pg_plan_queries() is now always a list of PlannedStmt
nodes, even for utility statements. In the case of a utility statement,
"planning" just consists of wrapping a CMD_UTILITY PlannedStmt around
the utility node. This list representation is now used in Portal and
CachedPlan plan lists, replacing the former convention of intermixing
PlannedStmts with bare utility-statement nodes.
Now, every list of statements has a consistent head-node type depending
on how far along it is in processing. This allows changing many places
that formerly used generic "Node *" pointers to use a more specific
pointer type, thus reducing the number of IsA() tests and casts needed,
as well as improving code clarity.
Also, the post-parse-analysis representation of DECLARE CURSOR is changed
so that it looks more like EXPLAIN, PREPARE, etc. That is, the contained
SELECT remains a child of the DeclareCursorStmt rather than getting flipped
around to be the other way. It's now true for both Query and PlannedStmt
that utilityStmt is non-null if and only if commandType is CMD_UTILITY.
That allows simplifying a lot of places that were testing both fields.
(I think some of those were just defensive programming, but in many places,
it was actually necessary to avoid confusing DECLARE CURSOR with SELECT.)
Because PlannedStmt carries a canSetTag field, we're also able to get rid
of some ad-hoc rules about how to reconstruct canSetTag for a bare utility
statement; specifically, the assumption that a utility is canSetTag if and
only if it's the only one in its list. While I see no near-term need for
relaxing that restriction, it's nice to get rid of the ad-hocery.
The API of ProcessUtility() is changed so that what it's passed is the
wrapper PlannedStmt not just the bare utility statement. This will affect
all users of ProcessUtility_hook, but the changes are pretty trivial; see
the affected contrib modules for examples of the minimum change needed.
(Most compilers should give pointer-type-mismatch warnings for uncorrected
code.)
There's also a change in the API of ExplainOneQuery_hook, to pass through
cursorOptions instead of expecting hook functions to know what to pick.
This is needed because of the DECLARE CURSOR changes, but really should
have been done in 9.6; it's unlikely that any extant hook functions
know about using CURSOR_OPT_PARALLEL_OK.
Finally, teach gram.y to save statement boundary locations in RawStmt
nodes, and pass those through to Query and PlannedStmt nodes. This allows
more intelligent handling of cases where a source query string contains
multiple statements. This patch doesn't actually do anything with the
information, but a follow-on patch will. (Passing this information through
cleanly is the true motivation for these changes; while I think this is all
good cleanup, it's unlikely we'd have bothered without this end goal.)
catversion bump because addition of location fields to struct Query
affects stored rules.
This patch is by me, but it owes a good deal to Fabien Coelho who did
a lot of preliminary work on the problem, and also reviewed the patch.
Discussion: https://postgr.es/m/alpine.DEB.2.20.1612200926310.29821@lancre
2017-01-14 22:02:35 +01:00
|
|
|
RawStmt *query = NULL;
|
tableam: Add table_multi_insert() and revamp/speed-up COPY FROM buffering.
This adds table_multi_insert(), and converts COPY FROM, the only user
of heap_multi_insert, to it.
A simple conversion of COPY FROM use slots would have yielded a
slowdown when inserting into a partitioned table for some
workloads. Different partitions might need different slots (both slot
types and their descriptors), and dropping / creating slots when
there's constant partition changes is measurable.
Thus instead revamp the COPY FROM buffering for partitioned tables to
allow to buffer inserts into multiple tables, flushing only when
limits are reached across all partition buffers. By only dropping
slots when there've been inserts into too many different partitions,
the aforementioned overhead is gone. By allowing larger batches, even
when there are frequent partition changes, we actuall speed such cases
up significantly.
By using slots COPY of very narrow rows into unlogged / temporary
might slow down very slightly (due to the indirect function calls).
Author: David Rowley, Andres Freund, Haribabu Kommi
Discussion:
https://postgr.es/m/20180703070645.wchpu5muyto5n647@alap3.anarazel.de
https://postgr.es/m/20190327054923.t3epfuewxfqdt22e@alap3.anarazel.de
2019-04-05 00:47:19 +02:00
|
|
|
Node *whereClause = NULL;
|
2011-02-16 03:19:11 +01:00
|
|
|
|
2018-04-06 20:47:10 +02:00
|
|
|
/*
|
|
|
|
* Disallow COPY to/from file or program except to users with the
|
|
|
|
* appropriate role.
|
|
|
|
*/
|
|
|
|
if (!pipe)
|
Add support for piping COPY to/from an external program.
This includes backend "COPY TO/FROM PROGRAM '...'" syntax, and corresponding
psql \copy syntax. Like with reading/writing files, the backend version is
superuser-only, and in the psql version, the program is run in the client.
In the passing, the psql \copy STDIN/STDOUT syntax is subtly changed: if you
the stdin/stdout is quoted, it's now interpreted as a filename. For example,
"\copy foo from 'stdin'" now reads from a file called 'stdin', not from
standard input. Before this, there was no way to specify a filename called
stdin, stdout, pstdin or pstdout.
This creates a new function in pgport, wait_result_to_str(), which can
be used to convert the exit status of a process, as returned by wait(3),
to a human-readable string.
Etsuro Fujita, reviewed by Amit Kapila.
2013-02-27 17:17:21 +01:00
|
|
|
{
|
|
|
|
if (stmt->is_program)
|
2018-04-06 20:47:10 +02:00
|
|
|
{
|
2022-03-28 21:10:04 +02:00
|
|
|
if (!has_privs_of_role(GetUserId(), ROLE_PG_EXECUTE_SERVER_PROGRAM))
|
2018-04-06 20:47:10 +02:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
|
2023-03-17 10:14:16 +01:00
|
|
|
errmsg("permission denied to COPY to or from an external program"),
|
|
|
|
errdetail("Only roles with privileges of the \"%s\" role may COPY to or from an external program.",
|
|
|
|
"pg_execute_server_program"),
|
2018-04-06 20:47:10 +02:00
|
|
|
errhint("Anyone can COPY to stdout or from stdin. "
|
|
|
|
"psql's \\copy command also works for anyone.")));
|
|
|
|
}
|
Add support for piping COPY to/from an external program.
This includes backend "COPY TO/FROM PROGRAM '...'" syntax, and corresponding
psql \copy syntax. Like with reading/writing files, the backend version is
superuser-only, and in the psql version, the program is run in the client.
In the passing, the psql \copy STDIN/STDOUT syntax is subtly changed: if you
the stdin/stdout is quoted, it's now interpreted as a filename. For example,
"\copy foo from 'stdin'" now reads from a file called 'stdin', not from
standard input. Before this, there was no way to specify a filename called
stdin, stdout, pstdin or pstdout.
This creates a new function in pgport, wait_result_to_str(), which can
be used to convert the exit status of a process, as returned by wait(3),
to a human-readable string.
Etsuro Fujita, reviewed by Amit Kapila.
2013-02-27 17:17:21 +01:00
|
|
|
else
|
2018-04-06 20:47:10 +02:00
|
|
|
{
|
2022-03-28 21:10:04 +02:00
|
|
|
if (is_from && !has_privs_of_role(GetUserId(), ROLE_PG_READ_SERVER_FILES))
|
2018-04-06 20:47:10 +02:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
|
2023-03-17 10:14:16 +01:00
|
|
|
errmsg("permission denied to COPY from a file"),
|
|
|
|
errdetail("Only roles with privileges of the \"%s\" role may COPY from a file.",
|
|
|
|
"pg_read_server_files"),
|
2018-04-06 20:47:10 +02:00
|
|
|
errhint("Anyone can COPY to stdout or from stdin. "
|
|
|
|
"psql's \\copy command also works for anyone.")));
|
|
|
|
|
2022-03-28 21:10:04 +02:00
|
|
|
if (!is_from && !has_privs_of_role(GetUserId(), ROLE_PG_WRITE_SERVER_FILES))
|
2018-04-06 20:47:10 +02:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
|
2023-03-17 10:14:16 +01:00
|
|
|
errmsg("permission denied to COPY to a file"),
|
|
|
|
errdetail("Only roles with privileges of the \"%s\" role may COPY to a file.",
|
|
|
|
"pg_write_server_files"),
|
2018-04-06 20:47:10 +02:00
|
|
|
errhint("Anyone can COPY to stdout or from stdin. "
|
|
|
|
"psql's \\copy command also works for anyone.")));
|
|
|
|
}
|
Add support for piping COPY to/from an external program.
This includes backend "COPY TO/FROM PROGRAM '...'" syntax, and corresponding
psql \copy syntax. Like with reading/writing files, the backend version is
superuser-only, and in the psql version, the program is run in the client.
In the passing, the psql \copy STDIN/STDOUT syntax is subtly changed: if you
the stdin/stdout is quoted, it's now interpreted as a filename. For example,
"\copy foo from 'stdin'" now reads from a file called 'stdin', not from
standard input. Before this, there was no way to specify a filename called
stdin, stdout, pstdin or pstdout.
This creates a new function in pgport, wait_result_to_str(), which can
be used to convert the exit status of a process, as returned by wait(3),
to a human-readable string.
Etsuro Fujita, reviewed by Amit Kapila.
2013-02-27 17:17:21 +01:00
|
|
|
}
|
2011-02-16 03:19:11 +01:00
|
|
|
|
|
|
|
if (stmt->relation)
|
|
|
|
{
|
Create an RTE field to record the query's lock mode for each relation.
Add RangeTblEntry.rellockmode, which records the appropriate lock mode for
each RTE_RELATION rangetable entry (either AccessShareLock, RowShareLock,
or RowExclusiveLock depending on the RTE's role in the query).
This patch creates the field and makes all creators of RTE nodes fill it
in reasonably, but for the moment nothing much is done with it. The plan
is to replace assorted post-parser logic that re-determines the right
lockmode to use with simple uses of rte->rellockmode. For now, just add
Asserts in each of those places that the rellockmode matches what they are
computing today. (In some cases the match isn't perfect, so the Asserts
are weaker than you might expect; but this seems OK, as per discussion.)
This passes check-world for me, but it seems worth pushing in this state
to see if the buildfarm finds any problems in cases I failed to test.
catversion bump due to change of stored rules.
Amit Langote, reviewed by David Rowley and Jesper Pedersen,
and whacked around a bit more by me
Discussion: https://postgr.es/m/468c85d9-540e-66a2-1dde-fec2b741e688@lab.ntt.co.jp
2018-09-30 19:55:51 +02:00
|
|
|
LOCKMODE lockmode = is_from ? RowExclusiveLock : AccessShareLock;
|
Make parser rely more heavily on the ParseNamespaceItem data structure.
When I added the ParseNamespaceItem data structure (in commit 5ebaaa494),
it wasn't very tightly integrated into the parser's APIs. In the wake of
adding p_rtindex to that struct (commit b541e9acc), there is a good reason
to make more use of it: by passing around ParseNamespaceItem pointers
instead of bare RTE pointers, we can get rid of various messy methods for
passing back or deducing the rangetable index of an RTE during parsing.
Hence, refactor the addRangeTableEntryXXX functions to build and return
a ParseNamespaceItem struct, not just the RTE proper; and replace
addRTEtoQuery with addNSItemToQuery, which is passed a ParseNamespaceItem
rather than building one internally.
Also, add per-column data (a ParseNamespaceColumn array) to each
ParseNamespaceItem. These arrays are built during addRangeTableEntryXXX,
where we have column type data at hand so that it's nearly free to fill
the data structure. Later, when we need to build Vars referencing RTEs,
we can use the ParseNamespaceColumn info to avoid the rather expensive
operations done in get_rte_attribute_type() or expandRTE().
get_rte_attribute_type() is indeed dead code now, so I've removed it.
This makes for a useful improvement in parse analysis speed, around 20%
in one moderately-complex test query.
The ParseNamespaceColumn structs also include Var identity information
(varno/varattno). That info isn't actually being used in this patch,
except that p_varno == 0 is a handy test for a dropped column.
A follow-on patch will make more use of it.
Discussion: https://postgr.es/m/2461.1577764221@sss.pgh.pa.us
2020-01-02 17:29:01 +01:00
|
|
|
ParseNamespaceItem *nsitem;
|
2022-12-06 16:09:24 +01:00
|
|
|
RTEPermissionInfo *perminfo;
|
2011-02-16 03:19:11 +01:00
|
|
|
TupleDesc tupDesc;
|
|
|
|
List *attnums;
|
|
|
|
ListCell *cur;
|
|
|
|
|
|
|
|
Assert(!stmt->query);
|
|
|
|
|
|
|
|
/* Open and lock the relation, using the appropriate lock type. */
|
2019-01-21 19:32:19 +01:00
|
|
|
rel = table_openrv(stmt->relation, lockmode);
|
2011-02-16 03:19:11 +01:00
|
|
|
|
2012-12-29 13:55:37 +01:00
|
|
|
relid = RelationGetRelid(rel);
|
|
|
|
|
Make parser rely more heavily on the ParseNamespaceItem data structure.
When I added the ParseNamespaceItem data structure (in commit 5ebaaa494),
it wasn't very tightly integrated into the parser's APIs. In the wake of
adding p_rtindex to that struct (commit b541e9acc), there is a good reason
to make more use of it: by passing around ParseNamespaceItem pointers
instead of bare RTE pointers, we can get rid of various messy methods for
passing back or deducing the rangetable index of an RTE during parsing.
Hence, refactor the addRangeTableEntryXXX functions to build and return
a ParseNamespaceItem struct, not just the RTE proper; and replace
addRTEtoQuery with addNSItemToQuery, which is passed a ParseNamespaceItem
rather than building one internally.
Also, add per-column data (a ParseNamespaceColumn array) to each
ParseNamespaceItem. These arrays are built during addRangeTableEntryXXX,
where we have column type data at hand so that it's nearly free to fill
the data structure. Later, when we need to build Vars referencing RTEs,
we can use the ParseNamespaceColumn info to avoid the rather expensive
operations done in get_rte_attribute_type() or expandRTE().
get_rte_attribute_type() is indeed dead code now, so I've removed it.
This makes for a useful improvement in parse analysis speed, around 20%
in one moderately-complex test query.
The ParseNamespaceColumn structs also include Var identity information
(varno/varattno). That info isn't actually being used in this patch,
except that p_varno == 0 is a handy test for a dropped column.
A follow-on patch will make more use of it.
Discussion: https://postgr.es/m/2461.1577764221@sss.pgh.pa.us
2020-01-02 17:29:01 +01:00
|
|
|
nsitem = addRangeTableEntryForRelation(pstate, rel, lockmode,
|
|
|
|
NULL, false, false);
|
2022-12-06 16:09:24 +01:00
|
|
|
|
|
|
|
perminfo = nsitem->p_perminfo;
|
|
|
|
perminfo->requiredPerms = (is_from ? ACL_INSERT : ACL_SELECT);
|
2011-02-16 03:19:11 +01:00
|
|
|
|
2019-01-19 23:48:16 +01:00
|
|
|
if (stmt->whereClause)
|
|
|
|
{
|
Make parser rely more heavily on the ParseNamespaceItem data structure.
When I added the ParseNamespaceItem data structure (in commit 5ebaaa494),
it wasn't very tightly integrated into the parser's APIs. In the wake of
adding p_rtindex to that struct (commit b541e9acc), there is a good reason
to make more use of it: by passing around ParseNamespaceItem pointers
instead of bare RTE pointers, we can get rid of various messy methods for
passing back or deducing the rangetable index of an RTE during parsing.
Hence, refactor the addRangeTableEntryXXX functions to build and return
a ParseNamespaceItem struct, not just the RTE proper; and replace
addRTEtoQuery with addNSItemToQuery, which is passed a ParseNamespaceItem
rather than building one internally.
Also, add per-column data (a ParseNamespaceColumn array) to each
ParseNamespaceItem. These arrays are built during addRangeTableEntryXXX,
where we have column type data at hand so that it's nearly free to fill
the data structure. Later, when we need to build Vars referencing RTEs,
we can use the ParseNamespaceColumn info to avoid the rather expensive
operations done in get_rte_attribute_type() or expandRTE().
get_rte_attribute_type() is indeed dead code now, so I've removed it.
This makes for a useful improvement in parse analysis speed, around 20%
in one moderately-complex test query.
The ParseNamespaceColumn structs also include Var identity information
(varno/varattno). That info isn't actually being used in this patch,
except that p_varno == 0 is a handy test for a dropped column.
A follow-on patch will make more use of it.
Discussion: https://postgr.es/m/2461.1577764221@sss.pgh.pa.us
2020-01-02 17:29:01 +01:00
|
|
|
/* add nsitem to query namespace */
|
|
|
|
addNSItemToQuery(pstate, nsitem, false, true, true);
|
2019-01-19 23:48:16 +01:00
|
|
|
|
|
|
|
/* Transform the raw expression tree */
|
|
|
|
whereClause = transformExpr(pstate, stmt->whereClause, EXPR_KIND_COPY_WHERE);
|
|
|
|
|
tableam: Add table_multi_insert() and revamp/speed-up COPY FROM buffering.
This adds table_multi_insert(), and converts COPY FROM, the only user
of heap_multi_insert, to it.
A simple conversion of COPY FROM use slots would have yielded a
slowdown when inserting into a partitioned table for some
workloads. Different partitions might need different slots (both slot
types and their descriptors), and dropping / creating slots when
there's constant partition changes is measurable.
Thus instead revamp the COPY FROM buffering for partitioned tables to
allow to buffer inserts into multiple tables, flushing only when
limits are reached across all partition buffers. By only dropping
slots when there've been inserts into too many different partitions,
the aforementioned overhead is gone. By allowing larger batches, even
when there are frequent partition changes, we actuall speed such cases
up significantly.
By using slots COPY of very narrow rows into unlogged / temporary
might slow down very slightly (due to the indirect function calls).
Author: David Rowley, Andres Freund, Haribabu Kommi
Discussion:
https://postgr.es/m/20180703070645.wchpu5muyto5n647@alap3.anarazel.de
https://postgr.es/m/20190327054923.t3epfuewxfqdt22e@alap3.anarazel.de
2019-04-05 00:47:19 +02:00
|
|
|
/* Make sure it yields a boolean result. */
|
2019-01-19 23:48:16 +01:00
|
|
|
whereClause = coerce_to_boolean(pstate, whereClause, "WHERE");
|
|
|
|
|
|
|
|
/* we have to fix its collations too */
|
|
|
|
assign_expr_collations(pstate, whereClause);
|
|
|
|
|
|
|
|
whereClause = eval_const_expressions(NULL, whereClause);
|
|
|
|
|
|
|
|
whereClause = (Node *) canonicalize_qual((Expr *) whereClause, false);
|
|
|
|
whereClause = (Node *) make_ands_implicit((Expr *) whereClause);
|
|
|
|
}
|
|
|
|
|
2011-02-16 03:19:11 +01:00
|
|
|
tupDesc = RelationGetDescr(rel);
|
|
|
|
attnums = CopyGetAttnums(tupDesc, rel, stmt->attlist);
|
2011-02-20 20:06:59 +01:00
|
|
|
foreach(cur, attnums)
|
2011-02-16 03:19:11 +01:00
|
|
|
{
|
2022-12-06 16:09:24 +01:00
|
|
|
int attno;
|
|
|
|
Bitmapset **bms;
|
2011-02-16 03:19:11 +01:00
|
|
|
|
2022-12-06 16:09:24 +01:00
|
|
|
attno = lfirst_int(cur) - FirstLowInvalidHeapAttributeNumber;
|
|
|
|
bms = is_from ? &perminfo->insertedCols : &perminfo->selectedCols;
|
|
|
|
|
|
|
|
*bms = bms_add_member(*bms, attno);
|
2011-02-16 03:19:11 +01:00
|
|
|
}
|
2022-12-06 16:09:24 +01:00
|
|
|
ExecCheckPermissions(pstate->p_rtable, list_make1(perminfo), true);
|
Row-Level Security Policies (RLS)
Building on the updatable security-barrier views work, add the
ability to define policies on tables to limit the set of rows
which are returned from a query and which are allowed to be added
to a table. Expressions defined by the policy for filtering are
added to the security barrier quals of the query, while expressions
defined to check records being added to a table are added to the
with-check options of the query.
New top-level commands are CREATE/ALTER/DROP POLICY and are
controlled by the table owner. Row Security is able to be enabled
and disabled by the owner on a per-table basis using
ALTER TABLE .. ENABLE/DISABLE ROW SECURITY.
Per discussion, ROW SECURITY is disabled on tables by default and
must be enabled for policies on the table to be used. If no
policies exist on a table with ROW SECURITY enabled, a default-deny
policy is used and no records will be visible.
By default, row security is applied at all times except for the
table owner and the superuser. A new GUC, row_security, is added
which can be set to ON, OFF, or FORCE. When set to FORCE, row
security will be applied even for the table owner and superusers.
When set to OFF, row security will be disabled when allowed and an
error will be thrown if the user does not have rights to bypass row
security.
Per discussion, pg_dump sets row_security = OFF by default to ensure
that exports and backups will have all data in the table or will
error if there are insufficient privileges to bypass row security.
A new option has been added to pg_dump, --enable-row-security, to
ask pg_dump to export with row security enabled.
A new role capability, BYPASSRLS, which can only be set by the
superuser, is added to allow other users to be able to bypass row
security using row_security = OFF.
Many thanks to the various individuals who have helped with the
design, particularly Robert Haas for his feedback.
Authors include Craig Ringer, KaiGai Kohei, Adam Brightwell, Dean
Rasheed, with additional changes and rework by me.
Reviewers have included all of the above, Greg Smith,
Jeff McCormick, and Robert Haas.
2014-09-19 17:18:35 +02:00
|
|
|
|
|
|
|
/*
|
Rename pg_rowsecurity -> pg_policy and other fixes
As pointed out by Robert, we should really have named pg_rowsecurity
pg_policy, as the objects stored in that catalog are policies. This
patch fixes that and updates the column names to start with 'pol' to
match the new catalog name.
The security consideration for COPY with row level security, also
pointed out by Robert, has also been addressed by remembering and
re-checking the OID of the relation initially referenced during COPY
processing, to make sure it hasn't changed under us by the time we
finish planning out the query which has been built.
Robert and Alvaro also commented on missing OCLASS and OBJECT entries
for POLICY (formerly ROWSECURITY or POLICY, depending) in various
places. This patch fixes that too, which also happens to add the
ability to COMMENT on policies.
In passing, attempt to improve the consistency of messages, comments,
and documentation as well. This removes various incarnations of
'row-security', 'row-level security', 'Row-security', etc, in favor
of 'policy', 'row level security' or 'row_security' as appropriate.
Happy Thanksgiving!
2014-11-27 07:06:36 +01:00
|
|
|
* Permission check for row security policies.
|
Row-Level Security Policies (RLS)
Building on the updatable security-barrier views work, add the
ability to define policies on tables to limit the set of rows
which are returned from a query and which are allowed to be added
to a table. Expressions defined by the policy for filtering are
added to the security barrier quals of the query, while expressions
defined to check records being added to a table are added to the
with-check options of the query.
New top-level commands are CREATE/ALTER/DROP POLICY and are
controlled by the table owner. Row Security is able to be enabled
and disabled by the owner on a per-table basis using
ALTER TABLE .. ENABLE/DISABLE ROW SECURITY.
Per discussion, ROW SECURITY is disabled on tables by default and
must be enabled for policies on the table to be used. If no
policies exist on a table with ROW SECURITY enabled, a default-deny
policy is used and no records will be visible.
By default, row security is applied at all times except for the
table owner and the superuser. A new GUC, row_security, is added
which can be set to ON, OFF, or FORCE. When set to FORCE, row
security will be applied even for the table owner and superusers.
When set to OFF, row security will be disabled when allowed and an
error will be thrown if the user does not have rights to bypass row
security.
Per discussion, pg_dump sets row_security = OFF by default to ensure
that exports and backups will have all data in the table or will
error if there are insufficient privileges to bypass row security.
A new option has been added to pg_dump, --enable-row-security, to
ask pg_dump to export with row security enabled.
A new role capability, BYPASSRLS, which can only be set by the
superuser, is added to allow other users to be able to bypass row
security using row_security = OFF.
Many thanks to the various individuals who have helped with the
design, particularly Robert Haas for his feedback.
Authors include Craig Ringer, KaiGai Kohei, Adam Brightwell, Dean
Rasheed, with additional changes and rework by me.
Reviewers have included all of the above, Greg Smith,
Jeff McCormick, and Robert Haas.
2014-09-19 17:18:35 +02:00
|
|
|
*
|
|
|
|
* check_enable_rls will ereport(ERROR) if the user has requested
|
|
|
|
* something invalid and will otherwise indicate if we should enable
|
|
|
|
* RLS (returns RLS_ENABLED) or not for this COPY statement.
|
|
|
|
*
|
|
|
|
* If the relation has a row security policy and we are to apply it
|
|
|
|
* then perform a "query" copy and allow the normal query processing
|
|
|
|
* to handle the policies.
|
|
|
|
*
|
|
|
|
* If RLS is not enabled for this, then just fall through to the
|
|
|
|
* normal non-filtering relation handling.
|
|
|
|
*/
|
2022-12-06 16:09:24 +01:00
|
|
|
if (check_enable_rls(relid, InvalidOid, false) == RLS_ENABLED)
|
Row-Level Security Policies (RLS)
Building on the updatable security-barrier views work, add the
ability to define policies on tables to limit the set of rows
which are returned from a query and which are allowed to be added
to a table. Expressions defined by the policy for filtering are
added to the security barrier quals of the query, while expressions
defined to check records being added to a table are added to the
with-check options of the query.
New top-level commands are CREATE/ALTER/DROP POLICY and are
controlled by the table owner. Row Security is able to be enabled
and disabled by the owner on a per-table basis using
ALTER TABLE .. ENABLE/DISABLE ROW SECURITY.
Per discussion, ROW SECURITY is disabled on tables by default and
must be enabled for policies on the table to be used. If no
policies exist on a table with ROW SECURITY enabled, a default-deny
policy is used and no records will be visible.
By default, row security is applied at all times except for the
table owner and the superuser. A new GUC, row_security, is added
which can be set to ON, OFF, or FORCE. When set to FORCE, row
security will be applied even for the table owner and superusers.
When set to OFF, row security will be disabled when allowed and an
error will be thrown if the user does not have rights to bypass row
security.
Per discussion, pg_dump sets row_security = OFF by default to ensure
that exports and backups will have all data in the table or will
error if there are insufficient privileges to bypass row security.
A new option has been added to pg_dump, --enable-row-security, to
ask pg_dump to export with row security enabled.
A new role capability, BYPASSRLS, which can only be set by the
superuser, is added to allow other users to be able to bypass row
security using row_security = OFF.
Many thanks to the various individuals who have helped with the
design, particularly Robert Haas for his feedback.
Authors include Craig Ringer, KaiGai Kohei, Adam Brightwell, Dean
Rasheed, with additional changes and rework by me.
Reviewers have included all of the above, Greg Smith,
Jeff McCormick, and Robert Haas.
2014-09-19 17:18:35 +02:00
|
|
|
{
|
|
|
|
SelectStmt *select;
|
|
|
|
ColumnRef *cr;
|
|
|
|
ResTarget *target;
|
|
|
|
RangeVar *from;
|
2016-10-03 22:22:57 +02:00
|
|
|
List *targetList = NIL;
|
Row-Level Security Policies (RLS)
Building on the updatable security-barrier views work, add the
ability to define policies on tables to limit the set of rows
which are returned from a query and which are allowed to be added
to a table. Expressions defined by the policy for filtering are
added to the security barrier quals of the query, while expressions
defined to check records being added to a table are added to the
with-check options of the query.
New top-level commands are CREATE/ALTER/DROP POLICY and are
controlled by the table owner. Row Security is able to be enabled
and disabled by the owner on a per-table basis using
ALTER TABLE .. ENABLE/DISABLE ROW SECURITY.
Per discussion, ROW SECURITY is disabled on tables by default and
must be enabled for policies on the table to be used. If no
policies exist on a table with ROW SECURITY enabled, a default-deny
policy is used and no records will be visible.
By default, row security is applied at all times except for the
table owner and the superuser. A new GUC, row_security, is added
which can be set to ON, OFF, or FORCE. When set to FORCE, row
security will be applied even for the table owner and superusers.
When set to OFF, row security will be disabled when allowed and an
error will be thrown if the user does not have rights to bypass row
security.
Per discussion, pg_dump sets row_security = OFF by default to ensure
that exports and backups will have all data in the table or will
error if there are insufficient privileges to bypass row security.
A new option has been added to pg_dump, --enable-row-security, to
ask pg_dump to export with row security enabled.
A new role capability, BYPASSRLS, which can only be set by the
superuser, is added to allow other users to be able to bypass row
security using row_security = OFF.
Many thanks to the various individuals who have helped with the
design, particularly Robert Haas for his feedback.
Authors include Craig Ringer, KaiGai Kohei, Adam Brightwell, Dean
Rasheed, with additional changes and rework by me.
Reviewers have included all of the above, Greg Smith,
Jeff McCormick, and Robert Haas.
2014-09-19 17:18:35 +02:00
|
|
|
|
|
|
|
if (is_from)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
2015-11-17 12:53:07 +01:00
|
|
|
errmsg("COPY FROM not supported with row-level security"),
|
2015-10-29 01:23:53 +01:00
|
|
|
errhint("Use INSERT statements instead.")));
|
Row-Level Security Policies (RLS)
Building on the updatable security-barrier views work, add the
ability to define policies on tables to limit the set of rows
which are returned from a query and which are allowed to be added
to a table. Expressions defined by the policy for filtering are
added to the security barrier quals of the query, while expressions
defined to check records being added to a table are added to the
with-check options of the query.
New top-level commands are CREATE/ALTER/DROP POLICY and are
controlled by the table owner. Row Security is able to be enabled
and disabled by the owner on a per-table basis using
ALTER TABLE .. ENABLE/DISABLE ROW SECURITY.
Per discussion, ROW SECURITY is disabled on tables by default and
must be enabled for policies on the table to be used. If no
policies exist on a table with ROW SECURITY enabled, a default-deny
policy is used and no records will be visible.
By default, row security is applied at all times except for the
table owner and the superuser. A new GUC, row_security, is added
which can be set to ON, OFF, or FORCE. When set to FORCE, row
security will be applied even for the table owner and superusers.
When set to OFF, row security will be disabled when allowed and an
error will be thrown if the user does not have rights to bypass row
security.
Per discussion, pg_dump sets row_security = OFF by default to ensure
that exports and backups will have all data in the table or will
error if there are insufficient privileges to bypass row security.
A new option has been added to pg_dump, --enable-row-security, to
ask pg_dump to export with row security enabled.
A new role capability, BYPASSRLS, which can only be set by the
superuser, is added to allow other users to be able to bypass row
security using row_security = OFF.
Many thanks to the various individuals who have helped with the
design, particularly Robert Haas for his feedback.
Authors include Craig Ringer, KaiGai Kohei, Adam Brightwell, Dean
Rasheed, with additional changes and rework by me.
Reviewers have included all of the above, Greg Smith,
Jeff McCormick, and Robert Haas.
2014-09-19 17:18:35 +02:00
|
|
|
|
2016-10-03 22:22:57 +02:00
|
|
|
/*
|
|
|
|
* Build target list
|
|
|
|
*
|
|
|
|
* If no columns are specified in the attribute list of the COPY
|
|
|
|
* command, then the target list is 'all' columns. Therefore, '*'
|
|
|
|
* should be used as the target list for the resulting SELECT
|
|
|
|
* statement.
|
|
|
|
*
|
|
|
|
* In the case that columns are specified in the attribute list,
|
2016-11-10 20:13:43 +01:00
|
|
|
* create a ColumnRef and ResTarget for each column and add them
|
|
|
|
* to the target list for the resulting SELECT statement.
|
2016-10-03 22:22:57 +02:00
|
|
|
*/
|
Row-Level Security Policies (RLS)
Building on the updatable security-barrier views work, add the
ability to define policies on tables to limit the set of rows
which are returned from a query and which are allowed to be added
to a table. Expressions defined by the policy for filtering are
added to the security barrier quals of the query, while expressions
defined to check records being added to a table are added to the
with-check options of the query.
New top-level commands are CREATE/ALTER/DROP POLICY and are
controlled by the table owner. Row Security is able to be enabled
and disabled by the owner on a per-table basis using
ALTER TABLE .. ENABLE/DISABLE ROW SECURITY.
Per discussion, ROW SECURITY is disabled on tables by default and
must be enabled for policies on the table to be used. If no
policies exist on a table with ROW SECURITY enabled, a default-deny
policy is used and no records will be visible.
By default, row security is applied at all times except for the
table owner and the superuser. A new GUC, row_security, is added
which can be set to ON, OFF, or FORCE. When set to FORCE, row
security will be applied even for the table owner and superusers.
When set to OFF, row security will be disabled when allowed and an
error will be thrown if the user does not have rights to bypass row
security.
Per discussion, pg_dump sets row_security = OFF by default to ensure
that exports and backups will have all data in the table or will
error if there are insufficient privileges to bypass row security.
A new option has been added to pg_dump, --enable-row-security, to
ask pg_dump to export with row security enabled.
A new role capability, BYPASSRLS, which can only be set by the
superuser, is added to allow other users to be able to bypass row
security using row_security = OFF.
Many thanks to the various individuals who have helped with the
design, particularly Robert Haas for his feedback.
Authors include Craig Ringer, KaiGai Kohei, Adam Brightwell, Dean
Rasheed, with additional changes and rework by me.
Reviewers have included all of the above, Greg Smith,
Jeff McCormick, and Robert Haas.
2014-09-19 17:18:35 +02:00
|
|
|
if (!stmt->attlist)
|
2016-10-03 22:22:57 +02:00
|
|
|
{
|
|
|
|
cr = makeNode(ColumnRef);
|
Row-Level Security Policies (RLS)
Building on the updatable security-barrier views work, add the
ability to define policies on tables to limit the set of rows
which are returned from a query and which are allowed to be added
to a table. Expressions defined by the policy for filtering are
added to the security barrier quals of the query, while expressions
defined to check records being added to a table are added to the
with-check options of the query.
New top-level commands are CREATE/ALTER/DROP POLICY and are
controlled by the table owner. Row Security is able to be enabled
and disabled by the owner on a per-table basis using
ALTER TABLE .. ENABLE/DISABLE ROW SECURITY.
Per discussion, ROW SECURITY is disabled on tables by default and
must be enabled for policies on the table to be used. If no
policies exist on a table with ROW SECURITY enabled, a default-deny
policy is used and no records will be visible.
By default, row security is applied at all times except for the
table owner and the superuser. A new GUC, row_security, is added
which can be set to ON, OFF, or FORCE. When set to FORCE, row
security will be applied even for the table owner and superusers.
When set to OFF, row security will be disabled when allowed and an
error will be thrown if the user does not have rights to bypass row
security.
Per discussion, pg_dump sets row_security = OFF by default to ensure
that exports and backups will have all data in the table or will
error if there are insufficient privileges to bypass row security.
A new option has been added to pg_dump, --enable-row-security, to
ask pg_dump to export with row security enabled.
A new role capability, BYPASSRLS, which can only be set by the
superuser, is added to allow other users to be able to bypass row
security using row_security = OFF.
Many thanks to the various individuals who have helped with the
design, particularly Robert Haas for his feedback.
Authors include Craig Ringer, KaiGai Kohei, Adam Brightwell, Dean
Rasheed, with additional changes and rework by me.
Reviewers have included all of the above, Greg Smith,
Jeff McCormick, and Robert Haas.
2014-09-19 17:18:35 +02:00
|
|
|
cr->fields = list_make1(makeNode(A_Star));
|
2016-10-03 22:22:57 +02:00
|
|
|
cr->location = -1;
|
|
|
|
|
|
|
|
target = makeNode(ResTarget);
|
|
|
|
target->name = NULL;
|
|
|
|
target->indirection = NIL;
|
|
|
|
target->val = (Node *) cr;
|
|
|
|
target->location = -1;
|
Row-Level Security Policies (RLS)
Building on the updatable security-barrier views work, add the
ability to define policies on tables to limit the set of rows
which are returned from a query and which are allowed to be added
to a table. Expressions defined by the policy for filtering are
added to the security barrier quals of the query, while expressions
defined to check records being added to a table are added to the
with-check options of the query.
New top-level commands are CREATE/ALTER/DROP POLICY and are
controlled by the table owner. Row Security is able to be enabled
and disabled by the owner on a per-table basis using
ALTER TABLE .. ENABLE/DISABLE ROW SECURITY.
Per discussion, ROW SECURITY is disabled on tables by default and
must be enabled for policies on the table to be used. If no
policies exist on a table with ROW SECURITY enabled, a default-deny
policy is used and no records will be visible.
By default, row security is applied at all times except for the
table owner and the superuser. A new GUC, row_security, is added
which can be set to ON, OFF, or FORCE. When set to FORCE, row
security will be applied even for the table owner and superusers.
When set to OFF, row security will be disabled when allowed and an
error will be thrown if the user does not have rights to bypass row
security.
Per discussion, pg_dump sets row_security = OFF by default to ensure
that exports and backups will have all data in the table or will
error if there are insufficient privileges to bypass row security.
A new option has been added to pg_dump, --enable-row-security, to
ask pg_dump to export with row security enabled.
A new role capability, BYPASSRLS, which can only be set by the
superuser, is added to allow other users to be able to bypass row
security using row_security = OFF.
Many thanks to the various individuals who have helped with the
design, particularly Robert Haas for his feedback.
Authors include Craig Ringer, KaiGai Kohei, Adam Brightwell, Dean
Rasheed, with additional changes and rework by me.
Reviewers have included all of the above, Greg Smith,
Jeff McCormick, and Robert Haas.
2014-09-19 17:18:35 +02:00
|
|
|
|
2016-10-03 22:22:57 +02:00
|
|
|
targetList = list_make1(target);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
ListCell *lc;
|
Row-Level Security Policies (RLS)
Building on the updatable security-barrier views work, add the
ability to define policies on tables to limit the set of rows
which are returned from a query and which are allowed to be added
to a table. Expressions defined by the policy for filtering are
added to the security barrier quals of the query, while expressions
defined to check records being added to a table are added to the
with-check options of the query.
New top-level commands are CREATE/ALTER/DROP POLICY and are
controlled by the table owner. Row Security is able to be enabled
and disabled by the owner on a per-table basis using
ALTER TABLE .. ENABLE/DISABLE ROW SECURITY.
Per discussion, ROW SECURITY is disabled on tables by default and
must be enabled for policies on the table to be used. If no
policies exist on a table with ROW SECURITY enabled, a default-deny
policy is used and no records will be visible.
By default, row security is applied at all times except for the
table owner and the superuser. A new GUC, row_security, is added
which can be set to ON, OFF, or FORCE. When set to FORCE, row
security will be applied even for the table owner and superusers.
When set to OFF, row security will be disabled when allowed and an
error will be thrown if the user does not have rights to bypass row
security.
Per discussion, pg_dump sets row_security = OFF by default to ensure
that exports and backups will have all data in the table or will
error if there are insufficient privileges to bypass row security.
A new option has been added to pg_dump, --enable-row-security, to
ask pg_dump to export with row security enabled.
A new role capability, BYPASSRLS, which can only be set by the
superuser, is added to allow other users to be able to bypass row
security using row_security = OFF.
Many thanks to the various individuals who have helped with the
design, particularly Robert Haas for his feedback.
Authors include Craig Ringer, KaiGai Kohei, Adam Brightwell, Dean
Rasheed, with additional changes and rework by me.
Reviewers have included all of the above, Greg Smith,
Jeff McCormick, and Robert Haas.
2014-09-19 17:18:35 +02:00
|
|
|
|
2016-10-03 22:22:57 +02:00
|
|
|
foreach(lc, stmt->attlist)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Build the ColumnRef for each column. The ColumnRef
|
Remove Value node struct
The Value node struct is a weird construct. It is its own node type,
but most of the time, it actually has a node type of Integer, Float,
String, or BitString. As a consequence, the struct name and the node
type don't match most of the time, and so it has to be treated
specially a lot. There doesn't seem to be any value in the special
construct. There is very little code that wants to accept all Value
variants but nothing else (and even if it did, this doesn't provide
any convenient way to check it), and most code wants either just one
particular node type (usually String), or it accepts a broader set of
node types besides just Value.
This change removes the Value struct and node type and replaces them
by separate Integer, Float, String, and BitString node types that are
proper node types and structs of their own and behave mostly like
normal node types.
Also, this removes the T_Null node tag, which was previously also a
possible variant of Value but wasn't actually used outside of the
Value contained in A_Const. Replace that by an isnull field in
A_Const.
Reviewed-by: Dagfinn Ilmari Mannsåker <ilmari@ilmari.org>
Reviewed-by: Kyotaro Horiguchi <horikyota.ntt@gmail.com>
Discussion: https://www.postgresql.org/message-id/flat/5ba6bc5b-3f95-04f2-2419-f8ddb4c046fb@enterprisedb.com
2021-09-09 07:58:12 +02:00
|
|
|
* 'fields' property is a String node that corresponds to
|
|
|
|
* the column name respectively.
|
2016-10-03 22:22:57 +02:00
|
|
|
*/
|
|
|
|
cr = makeNode(ColumnRef);
|
|
|
|
cr->fields = list_make1(lfirst(lc));
|
|
|
|
cr->location = -1;
|
|
|
|
|
|
|
|
/* Build the ResTarget and add the ColumnRef to it. */
|
|
|
|
target = makeNode(ResTarget);
|
|
|
|
target->name = NULL;
|
|
|
|
target->indirection = NIL;
|
|
|
|
target->val = (Node *) cr;
|
|
|
|
target->location = -1;
|
|
|
|
|
|
|
|
/* Add each column to the SELECT statement's target list */
|
|
|
|
targetList = lappend(targetList, target);
|
|
|
|
}
|
|
|
|
}
|
Row-Level Security Policies (RLS)
Building on the updatable security-barrier views work, add the
ability to define policies on tables to limit the set of rows
which are returned from a query and which are allowed to be added
to a table. Expressions defined by the policy for filtering are
added to the security barrier quals of the query, while expressions
defined to check records being added to a table are added to the
with-check options of the query.
New top-level commands are CREATE/ALTER/DROP POLICY and are
controlled by the table owner. Row Security is able to be enabled
and disabled by the owner on a per-table basis using
ALTER TABLE .. ENABLE/DISABLE ROW SECURITY.
Per discussion, ROW SECURITY is disabled on tables by default and
must be enabled for policies on the table to be used. If no
policies exist on a table with ROW SECURITY enabled, a default-deny
policy is used and no records will be visible.
By default, row security is applied at all times except for the
table owner and the superuser. A new GUC, row_security, is added
which can be set to ON, OFF, or FORCE. When set to FORCE, row
security will be applied even for the table owner and superusers.
When set to OFF, row security will be disabled when allowed and an
error will be thrown if the user does not have rights to bypass row
security.
Per discussion, pg_dump sets row_security = OFF by default to ensure
that exports and backups will have all data in the table or will
error if there are insufficient privileges to bypass row security.
A new option has been added to pg_dump, --enable-row-security, to
ask pg_dump to export with row security enabled.
A new role capability, BYPASSRLS, which can only be set by the
superuser, is added to allow other users to be able to bypass row
security using row_security = OFF.
Many thanks to the various individuals who have helped with the
design, particularly Robert Haas for his feedback.
Authors include Craig Ringer, KaiGai Kohei, Adam Brightwell, Dean
Rasheed, with additional changes and rework by me.
Reviewers have included all of the above, Greg Smith,
Jeff McCormick, and Robert Haas.
2014-09-19 17:18:35 +02:00
|
|
|
|
2015-07-27 22:48:26 +02:00
|
|
|
/*
|
|
|
|
* Build RangeVar for from clause, fully qualified based on the
|
2023-03-10 19:52:28 +01:00
|
|
|
* relation which we have opened and locked. Use "ONLY" so that
|
|
|
|
* COPY retrieves rows from only the target table not any
|
|
|
|
* inheritance children, the same as when RLS doesn't apply.
|
2015-07-27 22:48:26 +02:00
|
|
|
*/
|
|
|
|
from = makeRangeVar(get_namespace_name(RelationGetNamespace(rel)),
|
2017-03-06 22:50:47 +01:00
|
|
|
pstrdup(RelationGetRelationName(rel)),
|
|
|
|
-1);
|
2023-03-10 19:52:28 +01:00
|
|
|
from->inh = false; /* apply ONLY */
|
Row-Level Security Policies (RLS)
Building on the updatable security-barrier views work, add the
ability to define policies on tables to limit the set of rows
which are returned from a query and which are allowed to be added
to a table. Expressions defined by the policy for filtering are
added to the security barrier quals of the query, while expressions
defined to check records being added to a table are added to the
with-check options of the query.
New top-level commands are CREATE/ALTER/DROP POLICY and are
controlled by the table owner. Row Security is able to be enabled
and disabled by the owner on a per-table basis using
ALTER TABLE .. ENABLE/DISABLE ROW SECURITY.
Per discussion, ROW SECURITY is disabled on tables by default and
must be enabled for policies on the table to be used. If no
policies exist on a table with ROW SECURITY enabled, a default-deny
policy is used and no records will be visible.
By default, row security is applied at all times except for the
table owner and the superuser. A new GUC, row_security, is added
which can be set to ON, OFF, or FORCE. When set to FORCE, row
security will be applied even for the table owner and superusers.
When set to OFF, row security will be disabled when allowed and an
error will be thrown if the user does not have rights to bypass row
security.
Per discussion, pg_dump sets row_security = OFF by default to ensure
that exports and backups will have all data in the table or will
error if there are insufficient privileges to bypass row security.
A new option has been added to pg_dump, --enable-row-security, to
ask pg_dump to export with row security enabled.
A new role capability, BYPASSRLS, which can only be set by the
superuser, is added to allow other users to be able to bypass row
security using row_security = OFF.
Many thanks to the various individuals who have helped with the
design, particularly Robert Haas for his feedback.
Authors include Craig Ringer, KaiGai Kohei, Adam Brightwell, Dean
Rasheed, with additional changes and rework by me.
Reviewers have included all of the above, Greg Smith,
Jeff McCormick, and Robert Haas.
2014-09-19 17:18:35 +02:00
|
|
|
|
|
|
|
/* Build query */
|
|
|
|
select = makeNode(SelectStmt);
|
2016-10-03 22:22:57 +02:00
|
|
|
select->targetList = targetList;
|
Row-Level Security Policies (RLS)
Building on the updatable security-barrier views work, add the
ability to define policies on tables to limit the set of rows
which are returned from a query and which are allowed to be added
to a table. Expressions defined by the policy for filtering are
added to the security barrier quals of the query, while expressions
defined to check records being added to a table are added to the
with-check options of the query.
New top-level commands are CREATE/ALTER/DROP POLICY and are
controlled by the table owner. Row Security is able to be enabled
and disabled by the owner on a per-table basis using
ALTER TABLE .. ENABLE/DISABLE ROW SECURITY.
Per discussion, ROW SECURITY is disabled on tables by default and
must be enabled for policies on the table to be used. If no
policies exist on a table with ROW SECURITY enabled, a default-deny
policy is used and no records will be visible.
By default, row security is applied at all times except for the
table owner and the superuser. A new GUC, row_security, is added
which can be set to ON, OFF, or FORCE. When set to FORCE, row
security will be applied even for the table owner and superusers.
When set to OFF, row security will be disabled when allowed and an
error will be thrown if the user does not have rights to bypass row
security.
Per discussion, pg_dump sets row_security = OFF by default to ensure
that exports and backups will have all data in the table or will
error if there are insufficient privileges to bypass row security.
A new option has been added to pg_dump, --enable-row-security, to
ask pg_dump to export with row security enabled.
A new role capability, BYPASSRLS, which can only be set by the
superuser, is added to allow other users to be able to bypass row
security using row_security = OFF.
Many thanks to the various individuals who have helped with the
design, particularly Robert Haas for his feedback.
Authors include Craig Ringer, KaiGai Kohei, Adam Brightwell, Dean
Rasheed, with additional changes and rework by me.
Reviewers have included all of the above, Greg Smith,
Jeff McCormick, and Robert Haas.
2014-09-19 17:18:35 +02:00
|
|
|
select->fromClause = list_make1(from);
|
|
|
|
|
Change representation of statement lists, and add statement location info.
This patch makes several changes that improve the consistency of
representation of lists of statements. It's always been the case
that the output of parse analysis is a list of Query nodes, whatever
the types of the individual statements in the list. This patch brings
similar consistency to the outputs of raw parsing and planning steps:
* The output of raw parsing is now always a list of RawStmt nodes;
the statement-type-dependent nodes are one level down from that.
* The output of pg_plan_queries() is now always a list of PlannedStmt
nodes, even for utility statements. In the case of a utility statement,
"planning" just consists of wrapping a CMD_UTILITY PlannedStmt around
the utility node. This list representation is now used in Portal and
CachedPlan plan lists, replacing the former convention of intermixing
PlannedStmts with bare utility-statement nodes.
Now, every list of statements has a consistent head-node type depending
on how far along it is in processing. This allows changing many places
that formerly used generic "Node *" pointers to use a more specific
pointer type, thus reducing the number of IsA() tests and casts needed,
as well as improving code clarity.
Also, the post-parse-analysis representation of DECLARE CURSOR is changed
so that it looks more like EXPLAIN, PREPARE, etc. That is, the contained
SELECT remains a child of the DeclareCursorStmt rather than getting flipped
around to be the other way. It's now true for both Query and PlannedStmt
that utilityStmt is non-null if and only if commandType is CMD_UTILITY.
That allows simplifying a lot of places that were testing both fields.
(I think some of those were just defensive programming, but in many places,
it was actually necessary to avoid confusing DECLARE CURSOR with SELECT.)
Because PlannedStmt carries a canSetTag field, we're also able to get rid
of some ad-hoc rules about how to reconstruct canSetTag for a bare utility
statement; specifically, the assumption that a utility is canSetTag if and
only if it's the only one in its list. While I see no near-term need for
relaxing that restriction, it's nice to get rid of the ad-hocery.
The API of ProcessUtility() is changed so that what it's passed is the
wrapper PlannedStmt not just the bare utility statement. This will affect
all users of ProcessUtility_hook, but the changes are pretty trivial; see
the affected contrib modules for examples of the minimum change needed.
(Most compilers should give pointer-type-mismatch warnings for uncorrected
code.)
There's also a change in the API of ExplainOneQuery_hook, to pass through
cursorOptions instead of expecting hook functions to know what to pick.
This is needed because of the DECLARE CURSOR changes, but really should
have been done in 9.6; it's unlikely that any extant hook functions
know about using CURSOR_OPT_PARALLEL_OK.
Finally, teach gram.y to save statement boundary locations in RawStmt
nodes, and pass those through to Query and PlannedStmt nodes. This allows
more intelligent handling of cases where a source query string contains
multiple statements. This patch doesn't actually do anything with the
information, but a follow-on patch will. (Passing this information through
cleanly is the true motivation for these changes; while I think this is all
good cleanup, it's unlikely we'd have bothered without this end goal.)
catversion bump because addition of location fields to struct Query
affects stored rules.
This patch is by me, but it owes a good deal to Fabien Coelho who did
a lot of preliminary work on the problem, and also reviewed the patch.
Discussion: https://postgr.es/m/alpine.DEB.2.20.1612200926310.29821@lancre
2017-01-14 22:02:35 +01:00
|
|
|
query = makeNode(RawStmt);
|
|
|
|
query->stmt = (Node *) select;
|
|
|
|
query->stmt_location = stmt_location;
|
|
|
|
query->stmt_len = stmt_len;
|
Row-Level Security Policies (RLS)
Building on the updatable security-barrier views work, add the
ability to define policies on tables to limit the set of rows
which are returned from a query and which are allowed to be added
to a table. Expressions defined by the policy for filtering are
added to the security barrier quals of the query, while expressions
defined to check records being added to a table are added to the
with-check options of the query.
New top-level commands are CREATE/ALTER/DROP POLICY and are
controlled by the table owner. Row Security is able to be enabled
and disabled by the owner on a per-table basis using
ALTER TABLE .. ENABLE/DISABLE ROW SECURITY.
Per discussion, ROW SECURITY is disabled on tables by default and
must be enabled for policies on the table to be used. If no
policies exist on a table with ROW SECURITY enabled, a default-deny
policy is used and no records will be visible.
By default, row security is applied at all times except for the
table owner and the superuser. A new GUC, row_security, is added
which can be set to ON, OFF, or FORCE. When set to FORCE, row
security will be applied even for the table owner and superusers.
When set to OFF, row security will be disabled when allowed and an
error will be thrown if the user does not have rights to bypass row
security.
Per discussion, pg_dump sets row_security = OFF by default to ensure
that exports and backups will have all data in the table or will
error if there are insufficient privileges to bypass row security.
A new option has been added to pg_dump, --enable-row-security, to
ask pg_dump to export with row security enabled.
A new role capability, BYPASSRLS, which can only be set by the
superuser, is added to allow other users to be able to bypass row
security using row_security = OFF.
Many thanks to the various individuals who have helped with the
design, particularly Robert Haas for his feedback.
Authors include Craig Ringer, KaiGai Kohei, Adam Brightwell, Dean
Rasheed, with additional changes and rework by me.
Reviewers have included all of the above, Greg Smith,
Jeff McCormick, and Robert Haas.
2014-09-19 17:18:35 +02:00
|
|
|
|
2015-07-27 22:48:26 +02:00
|
|
|
/*
|
|
|
|
* Close the relation for now, but keep the lock on it to prevent
|
|
|
|
* changes between now and when we start the query-based COPY.
|
|
|
|
*
|
|
|
|
* We'll reopen it later as part of the query-based COPY.
|
|
|
|
*/
|
2019-01-21 19:32:19 +01:00
|
|
|
table_close(rel, NoLock);
|
Row-Level Security Policies (RLS)
Building on the updatable security-barrier views work, add the
ability to define policies on tables to limit the set of rows
which are returned from a query and which are allowed to be added
to a table. Expressions defined by the policy for filtering are
added to the security barrier quals of the query, while expressions
defined to check records being added to a table are added to the
with-check options of the query.
New top-level commands are CREATE/ALTER/DROP POLICY and are
controlled by the table owner. Row Security is able to be enabled
and disabled by the owner on a per-table basis using
ALTER TABLE .. ENABLE/DISABLE ROW SECURITY.
Per discussion, ROW SECURITY is disabled on tables by default and
must be enabled for policies on the table to be used. If no
policies exist on a table with ROW SECURITY enabled, a default-deny
policy is used and no records will be visible.
By default, row security is applied at all times except for the
table owner and the superuser. A new GUC, row_security, is added
which can be set to ON, OFF, or FORCE. When set to FORCE, row
security will be applied even for the table owner and superusers.
When set to OFF, row security will be disabled when allowed and an
error will be thrown if the user does not have rights to bypass row
security.
Per discussion, pg_dump sets row_security = OFF by default to ensure
that exports and backups will have all data in the table or will
error if there are insufficient privileges to bypass row security.
A new option has been added to pg_dump, --enable-row-security, to
ask pg_dump to export with row security enabled.
A new role capability, BYPASSRLS, which can only be set by the
superuser, is added to allow other users to be able to bypass row
security using row_security = OFF.
Many thanks to the various individuals who have helped with the
design, particularly Robert Haas for his feedback.
Authors include Craig Ringer, KaiGai Kohei, Adam Brightwell, Dean
Rasheed, with additional changes and rework by me.
Reviewers have included all of the above, Greg Smith,
Jeff McCormick, and Robert Haas.
2014-09-19 17:18:35 +02:00
|
|
|
rel = NULL;
|
|
|
|
}
|
2011-02-16 03:19:11 +01:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
Assert(stmt->query);
|
|
|
|
|
Change representation of statement lists, and add statement location info.
This patch makes several changes that improve the consistency of
representation of lists of statements. It's always been the case
that the output of parse analysis is a list of Query nodes, whatever
the types of the individual statements in the list. This patch brings
similar consistency to the outputs of raw parsing and planning steps:
* The output of raw parsing is now always a list of RawStmt nodes;
the statement-type-dependent nodes are one level down from that.
* The output of pg_plan_queries() is now always a list of PlannedStmt
nodes, even for utility statements. In the case of a utility statement,
"planning" just consists of wrapping a CMD_UTILITY PlannedStmt around
the utility node. This list representation is now used in Portal and
CachedPlan plan lists, replacing the former convention of intermixing
PlannedStmts with bare utility-statement nodes.
Now, every list of statements has a consistent head-node type depending
on how far along it is in processing. This allows changing many places
that formerly used generic "Node *" pointers to use a more specific
pointer type, thus reducing the number of IsA() tests and casts needed,
as well as improving code clarity.
Also, the post-parse-analysis representation of DECLARE CURSOR is changed
so that it looks more like EXPLAIN, PREPARE, etc. That is, the contained
SELECT remains a child of the DeclareCursorStmt rather than getting flipped
around to be the other way. It's now true for both Query and PlannedStmt
that utilityStmt is non-null if and only if commandType is CMD_UTILITY.
That allows simplifying a lot of places that were testing both fields.
(I think some of those were just defensive programming, but in many places,
it was actually necessary to avoid confusing DECLARE CURSOR with SELECT.)
Because PlannedStmt carries a canSetTag field, we're also able to get rid
of some ad-hoc rules about how to reconstruct canSetTag for a bare utility
statement; specifically, the assumption that a utility is canSetTag if and
only if it's the only one in its list. While I see no near-term need for
relaxing that restriction, it's nice to get rid of the ad-hocery.
The API of ProcessUtility() is changed so that what it's passed is the
wrapper PlannedStmt not just the bare utility statement. This will affect
all users of ProcessUtility_hook, but the changes are pretty trivial; see
the affected contrib modules for examples of the minimum change needed.
(Most compilers should give pointer-type-mismatch warnings for uncorrected
code.)
There's also a change in the API of ExplainOneQuery_hook, to pass through
cursorOptions instead of expecting hook functions to know what to pick.
This is needed because of the DECLARE CURSOR changes, but really should
have been done in 9.6; it's unlikely that any extant hook functions
know about using CURSOR_OPT_PARALLEL_OK.
Finally, teach gram.y to save statement boundary locations in RawStmt
nodes, and pass those through to Query and PlannedStmt nodes. This allows
more intelligent handling of cases where a source query string contains
multiple statements. This patch doesn't actually do anything with the
information, but a follow-on patch will. (Passing this information through
cleanly is the true motivation for these changes; while I think this is all
good cleanup, it's unlikely we'd have bothered without this end goal.)
catversion bump because addition of location fields to struct Query
affects stored rules.
This patch is by me, but it owes a good deal to Fabien Coelho who did
a lot of preliminary work on the problem, and also reviewed the patch.
Discussion: https://postgr.es/m/alpine.DEB.2.20.1612200926310.29821@lancre
2017-01-14 22:02:35 +01:00
|
|
|
query = makeNode(RawStmt);
|
|
|
|
query->stmt = stmt->query;
|
|
|
|
query->stmt_location = stmt_location;
|
|
|
|
query->stmt_len = stmt_len;
|
|
|
|
|
2012-12-31 06:13:40 +01:00
|
|
|
relid = InvalidOid;
|
2011-02-16 03:19:11 +01:00
|
|
|
rel = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (is_from)
|
|
|
|
{
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
CopyFromState cstate;
|
|
|
|
|
2012-03-14 21:44:40 +01:00
|
|
|
Assert(rel);
|
|
|
|
|
Create an infrastructure for parallel computation in PostgreSQL.
This does four basic things. First, it provides convenience routines
to coordinate the startup and shutdown of parallel workers. Second,
it synchronizes various pieces of state (e.g. GUCs, combo CID
mappings, transaction snapshot) from the parallel group leader to the
worker processes. Third, it prohibits various operations that would
result in unsafe changes to that state while parallelism is active.
Finally, it propagates events that would result in an ErrorResponse,
NoticeResponse, or NotifyResponse message being sent to the client
from the parallel workers back to the master, from which they can then
be sent on to the client.
Robert Haas, Amit Kapila, Noah Misch, Rushabh Lathia, Jeevan Chalke.
Suggestions and review from Andres Freund, Heikki Linnakangas, Noah
Misch, Simon Riggs, Euler Taveira, and Jim Nasby.
2015-04-30 21:02:14 +02:00
|
|
|
/* check read-only transaction and parallel mode */
|
2012-12-18 02:15:32 +01:00
|
|
|
if (XactReadOnly && !rel->rd_islocaltemp)
|
2011-02-16 03:19:11 +01:00
|
|
|
PreventCommandIfReadOnly("COPY FROM");
|
|
|
|
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
cstate = BeginCopyFrom(pstate, rel, whereClause,
|
|
|
|
stmt->filename, stmt->is_program,
|
2017-03-23 13:36:36 +01:00
|
|
|
NULL, stmt->attlist, stmt->options);
|
2012-12-29 13:55:37 +01:00
|
|
|
*processed = CopyFrom(cstate); /* copy from file to database */
|
2011-02-16 03:19:11 +01:00
|
|
|
EndCopyFrom(cstate);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
CopyToState cstate;
|
|
|
|
|
2016-09-06 18:00:00 +02:00
|
|
|
cstate = BeginCopyTo(pstate, rel, query, relid,
|
Add support for piping COPY to/from an external program.
This includes backend "COPY TO/FROM PROGRAM '...'" syntax, and corresponding
psql \copy syntax. Like with reading/writing files, the backend version is
superuser-only, and in the psql version, the program is run in the client.
In the passing, the psql \copy STDIN/STDOUT syntax is subtly changed: if you
the stdin/stdout is quoted, it's now interpreted as a filename. For example,
"\copy foo from 'stdin'" now reads from a file called 'stdin', not from
standard input. Before this, there was no way to specify a filename called
stdin, stdout, pstdin or pstdout.
This creates a new function in pgport, wait_result_to_str(), which can
be used to convert the exit status of a process, as returned by wait(3),
to a human-readable string.
Etsuro Fujita, reviewed by Amit Kapila.
2013-02-27 17:17:21 +01:00
|
|
|
stmt->filename, stmt->is_program,
|
2022-10-11 04:45:52 +02:00
|
|
|
NULL, stmt->attlist, stmt->options);
|
2012-12-29 13:55:37 +01:00
|
|
|
*processed = DoCopyTo(cstate); /* copy from database to file */
|
2011-02-16 03:19:11 +01:00
|
|
|
EndCopyTo(cstate);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (rel != NULL)
|
2020-05-15 04:40:00 +02:00
|
|
|
table_close(rel, NoLock);
|
2011-02-16 03:19:11 +01:00
|
|
|
}
|
|
|
|
|
2022-03-30 08:56:58 +02:00
|
|
|
/*
|
|
|
|
* Extract a CopyHeaderChoice value from a DefElem. This is like
|
|
|
|
* defGetBoolean() but also accepts the special value "match".
|
|
|
|
*/
|
|
|
|
static CopyHeaderChoice
|
2022-06-23 03:49:20 +02:00
|
|
|
defGetCopyHeaderChoice(DefElem *def, bool is_from)
|
2022-03-30 08:56:58 +02:00
|
|
|
{
|
|
|
|
/*
|
2022-07-11 04:07:33 +02:00
|
|
|
* If no parameter value given, assume "true" is meant.
|
2022-03-30 08:56:58 +02:00
|
|
|
*/
|
|
|
|
if (def->arg == NULL)
|
|
|
|
return COPY_HEADER_TRUE;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Allow 0, 1, "true", "false", "on", "off", or "match".
|
|
|
|
*/
|
|
|
|
switch (nodeTag(def->arg))
|
|
|
|
{
|
|
|
|
case T_Integer:
|
|
|
|
switch (intVal(def->arg))
|
|
|
|
{
|
|
|
|
case 0:
|
|
|
|
return COPY_HEADER_FALSE;
|
|
|
|
case 1:
|
|
|
|
return COPY_HEADER_TRUE;
|
|
|
|
default:
|
|
|
|
/* otherwise, error out below */
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
{
|
|
|
|
char *sval = defGetString(def);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The set of strings accepted here should match up with the
|
|
|
|
* grammar's opt_boolean_or_string production.
|
|
|
|
*/
|
|
|
|
if (pg_strcasecmp(sval, "true") == 0)
|
|
|
|
return COPY_HEADER_TRUE;
|
|
|
|
if (pg_strcasecmp(sval, "false") == 0)
|
|
|
|
return COPY_HEADER_FALSE;
|
|
|
|
if (pg_strcasecmp(sval, "on") == 0)
|
|
|
|
return COPY_HEADER_TRUE;
|
|
|
|
if (pg_strcasecmp(sval, "off") == 0)
|
|
|
|
return COPY_HEADER_FALSE;
|
|
|
|
if (pg_strcasecmp(sval, "match") == 0)
|
2022-06-23 03:49:20 +02:00
|
|
|
{
|
|
|
|
if (!is_from)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
|
|
|
errmsg("cannot use \"%s\" with HEADER in COPY TO",
|
|
|
|
sval)));
|
2022-03-30 08:56:58 +02:00
|
|
|
return COPY_HEADER_MATCH;
|
2022-06-23 03:49:20 +02:00
|
|
|
}
|
2022-03-30 08:56:58 +02:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_SYNTAX_ERROR),
|
|
|
|
errmsg("%s requires a Boolean value or \"match\"",
|
|
|
|
def->defname)));
|
|
|
|
return COPY_HEADER_FALSE; /* keep compiler quiet */
|
|
|
|
}
|
|
|
|
|
Add new COPY option SAVE_ERROR_TO
Currently, when source data contains unexpected data regarding data type or
range, the entire COPY fails. However, in some cases, such data can be ignored
and just copying normal data is preferable.
This commit adds a new option SAVE_ERROR_TO, which specifies where to save the
error information. When this option is specified, COPY skips soft errors and
continues copying.
Currently, SAVE_ERROR_TO only supports "none". This indicates error information
is not saved and COPY just skips the unexpected data and continues running.
Later works are expected to add more choices, such as 'log' and 'table'.
Author: Damir Belyalov, Atsushi Torikoshi, Alex Shulgin, Jian He
Discussion: https://postgr.es/m/87k31ftoe0.fsf_-_%40commandprompt.com
Reviewed-by: Pavel Stehule, Andres Freund, Tom Lane, Daniel Gustafsson,
Reviewed-by: Alena Rybakina, Andy Fan, Andrei Lepikhov, Masahiko Sawada
Reviewed-by: Vignesh C, Atsushi Torikoshi
2024-01-16 22:08:53 +01:00
|
|
|
/*
|
2024-01-19 14:08:40 +01:00
|
|
|
* Extract a CopyOnErrorChoice value from a DefElem.
|
Add new COPY option SAVE_ERROR_TO
Currently, when source data contains unexpected data regarding data type or
range, the entire COPY fails. However, in some cases, such data can be ignored
and just copying normal data is preferable.
This commit adds a new option SAVE_ERROR_TO, which specifies where to save the
error information. When this option is specified, COPY skips soft errors and
continues copying.
Currently, SAVE_ERROR_TO only supports "none". This indicates error information
is not saved and COPY just skips the unexpected data and continues running.
Later works are expected to add more choices, such as 'log' and 'table'.
Author: Damir Belyalov, Atsushi Torikoshi, Alex Shulgin, Jian He
Discussion: https://postgr.es/m/87k31ftoe0.fsf_-_%40commandprompt.com
Reviewed-by: Pavel Stehule, Andres Freund, Tom Lane, Daniel Gustafsson,
Reviewed-by: Alena Rybakina, Andy Fan, Andrei Lepikhov, Masahiko Sawada
Reviewed-by: Vignesh C, Atsushi Torikoshi
2024-01-16 22:08:53 +01:00
|
|
|
*/
|
2024-01-19 14:08:40 +01:00
|
|
|
static CopyOnErrorChoice
|
|
|
|
defGetCopyOnErrorChoice(DefElem *def, ParseState *pstate, bool is_from)
|
Add new COPY option SAVE_ERROR_TO
Currently, when source data contains unexpected data regarding data type or
range, the entire COPY fails. However, in some cases, such data can be ignored
and just copying normal data is preferable.
This commit adds a new option SAVE_ERROR_TO, which specifies where to save the
error information. When this option is specified, COPY skips soft errors and
continues copying.
Currently, SAVE_ERROR_TO only supports "none". This indicates error information
is not saved and COPY just skips the unexpected data and continues running.
Later works are expected to add more choices, such as 'log' and 'table'.
Author: Damir Belyalov, Atsushi Torikoshi, Alex Shulgin, Jian He
Discussion: https://postgr.es/m/87k31ftoe0.fsf_-_%40commandprompt.com
Reviewed-by: Pavel Stehule, Andres Freund, Tom Lane, Daniel Gustafsson,
Reviewed-by: Alena Rybakina, Andy Fan, Andrei Lepikhov, Masahiko Sawada
Reviewed-by: Vignesh C, Atsushi Torikoshi
2024-01-16 22:08:53 +01:00
|
|
|
{
|
2024-04-17 04:31:27 +02:00
|
|
|
char *sval = defGetString(def);
|
Add new COPY option SAVE_ERROR_TO
Currently, when source data contains unexpected data regarding data type or
range, the entire COPY fails. However, in some cases, such data can be ignored
and just copying normal data is preferable.
This commit adds a new option SAVE_ERROR_TO, which specifies where to save the
error information. When this option is specified, COPY skips soft errors and
continues copying.
Currently, SAVE_ERROR_TO only supports "none". This indicates error information
is not saved and COPY just skips the unexpected data and continues running.
Later works are expected to add more choices, such as 'log' and 'table'.
Author: Damir Belyalov, Atsushi Torikoshi, Alex Shulgin, Jian He
Discussion: https://postgr.es/m/87k31ftoe0.fsf_-_%40commandprompt.com
Reviewed-by: Pavel Stehule, Andres Freund, Tom Lane, Daniel Gustafsson,
Reviewed-by: Alena Rybakina, Andy Fan, Andrei Lepikhov, Masahiko Sawada
Reviewed-by: Vignesh C, Atsushi Torikoshi
2024-01-16 22:08:53 +01:00
|
|
|
|
|
|
|
if (!is_from)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
2024-01-19 14:08:40 +01:00
|
|
|
errmsg("COPY ON_ERROR cannot be used with COPY TO"),
|
Add new COPY option SAVE_ERROR_TO
Currently, when source data contains unexpected data regarding data type or
range, the entire COPY fails. However, in some cases, such data can be ignored
and just copying normal data is preferable.
This commit adds a new option SAVE_ERROR_TO, which specifies where to save the
error information. When this option is specified, COPY skips soft errors and
continues copying.
Currently, SAVE_ERROR_TO only supports "none". This indicates error information
is not saved and COPY just skips the unexpected data and continues running.
Later works are expected to add more choices, such as 'log' and 'table'.
Author: Damir Belyalov, Atsushi Torikoshi, Alex Shulgin, Jian He
Discussion: https://postgr.es/m/87k31ftoe0.fsf_-_%40commandprompt.com
Reviewed-by: Pavel Stehule, Andres Freund, Tom Lane, Daniel Gustafsson,
Reviewed-by: Alena Rybakina, Andy Fan, Andrei Lepikhov, Masahiko Sawada
Reviewed-by: Vignesh C, Atsushi Torikoshi
2024-01-16 22:08:53 +01:00
|
|
|
parser_errposition(pstate, def->location)));
|
|
|
|
|
|
|
|
/*
|
2024-01-19 14:08:40 +01:00
|
|
|
* Allow "stop", or "ignore" values.
|
Add new COPY option SAVE_ERROR_TO
Currently, when source data contains unexpected data regarding data type or
range, the entire COPY fails. However, in some cases, such data can be ignored
and just copying normal data is preferable.
This commit adds a new option SAVE_ERROR_TO, which specifies where to save the
error information. When this option is specified, COPY skips soft errors and
continues copying.
Currently, SAVE_ERROR_TO only supports "none". This indicates error information
is not saved and COPY just skips the unexpected data and continues running.
Later works are expected to add more choices, such as 'log' and 'table'.
Author: Damir Belyalov, Atsushi Torikoshi, Alex Shulgin, Jian He
Discussion: https://postgr.es/m/87k31ftoe0.fsf_-_%40commandprompt.com
Reviewed-by: Pavel Stehule, Andres Freund, Tom Lane, Daniel Gustafsson,
Reviewed-by: Alena Rybakina, Andy Fan, Andrei Lepikhov, Masahiko Sawada
Reviewed-by: Vignesh C, Atsushi Torikoshi
2024-01-16 22:08:53 +01:00
|
|
|
*/
|
2024-01-19 14:08:40 +01:00
|
|
|
if (pg_strcasecmp(sval, "stop") == 0)
|
|
|
|
return COPY_ON_ERROR_STOP;
|
|
|
|
if (pg_strcasecmp(sval, "ignore") == 0)
|
|
|
|
return COPY_ON_ERROR_IGNORE;
|
Add new COPY option SAVE_ERROR_TO
Currently, when source data contains unexpected data regarding data type or
range, the entire COPY fails. However, in some cases, such data can be ignored
and just copying normal data is preferable.
This commit adds a new option SAVE_ERROR_TO, which specifies where to save the
error information. When this option is specified, COPY skips soft errors and
continues copying.
Currently, SAVE_ERROR_TO only supports "none". This indicates error information
is not saved and COPY just skips the unexpected data and continues running.
Later works are expected to add more choices, such as 'log' and 'table'.
Author: Damir Belyalov, Atsushi Torikoshi, Alex Shulgin, Jian He
Discussion: https://postgr.es/m/87k31ftoe0.fsf_-_%40commandprompt.com
Reviewed-by: Pavel Stehule, Andres Freund, Tom Lane, Daniel Gustafsson,
Reviewed-by: Alena Rybakina, Andy Fan, Andrei Lepikhov, Masahiko Sawada
Reviewed-by: Vignesh C, Atsushi Torikoshi
2024-01-16 22:08:53 +01:00
|
|
|
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
2024-01-19 14:08:40 +01:00
|
|
|
errmsg("COPY ON_ERROR \"%s\" not recognized", sval),
|
Add new COPY option SAVE_ERROR_TO
Currently, when source data contains unexpected data regarding data type or
range, the entire COPY fails. However, in some cases, such data can be ignored
and just copying normal data is preferable.
This commit adds a new option SAVE_ERROR_TO, which specifies where to save the
error information. When this option is specified, COPY skips soft errors and
continues copying.
Currently, SAVE_ERROR_TO only supports "none". This indicates error information
is not saved and COPY just skips the unexpected data and continues running.
Later works are expected to add more choices, such as 'log' and 'table'.
Author: Damir Belyalov, Atsushi Torikoshi, Alex Shulgin, Jian He
Discussion: https://postgr.es/m/87k31ftoe0.fsf_-_%40commandprompt.com
Reviewed-by: Pavel Stehule, Andres Freund, Tom Lane, Daniel Gustafsson,
Reviewed-by: Alena Rybakina, Andy Fan, Andrei Lepikhov, Masahiko Sawada
Reviewed-by: Vignesh C, Atsushi Torikoshi
2024-01-16 22:08:53 +01:00
|
|
|
parser_errposition(pstate, def->location)));
|
2024-01-19 14:08:40 +01:00
|
|
|
return COPY_ON_ERROR_STOP; /* keep compiler quiet */
|
Add new COPY option SAVE_ERROR_TO
Currently, when source data contains unexpected data regarding data type or
range, the entire COPY fails. However, in some cases, such data can be ignored
and just copying normal data is preferable.
This commit adds a new option SAVE_ERROR_TO, which specifies where to save the
error information. When this option is specified, COPY skips soft errors and
continues copying.
Currently, SAVE_ERROR_TO only supports "none". This indicates error information
is not saved and COPY just skips the unexpected data and continues running.
Later works are expected to add more choices, such as 'log' and 'table'.
Author: Damir Belyalov, Atsushi Torikoshi, Alex Shulgin, Jian He
Discussion: https://postgr.es/m/87k31ftoe0.fsf_-_%40commandprompt.com
Reviewed-by: Pavel Stehule, Andres Freund, Tom Lane, Daniel Gustafsson,
Reviewed-by: Alena Rybakina, Andy Fan, Andrei Lepikhov, Masahiko Sawada
Reviewed-by: Vignesh C, Atsushi Torikoshi
2024-01-16 22:08:53 +01:00
|
|
|
}
|
|
|
|
|
2024-04-01 08:25:25 +02:00
|
|
|
/*
|
|
|
|
* Extract a CopyLogVerbosityChoice value from a DefElem.
|
|
|
|
*/
|
|
|
|
static CopyLogVerbosityChoice
|
|
|
|
defGetCopyLogVerbosityChoice(DefElem *def, ParseState *pstate)
|
|
|
|
{
|
|
|
|
char *sval;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Allow "default", or "verbose" values.
|
|
|
|
*/
|
|
|
|
sval = defGetString(def);
|
|
|
|
if (pg_strcasecmp(sval, "default") == 0)
|
|
|
|
return COPY_LOG_VERBOSITY_DEFAULT;
|
|
|
|
if (pg_strcasecmp(sval, "verbose") == 0)
|
|
|
|
return COPY_LOG_VERBOSITY_VERBOSE;
|
|
|
|
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
|
|
errmsg("COPY LOG_VERBOSITY \"%s\" not recognized", sval),
|
|
|
|
parser_errposition(pstate, def->location)));
|
|
|
|
return COPY_LOG_VERBOSITY_DEFAULT; /* keep compiler quiet */
|
|
|
|
}
|
|
|
|
|
2011-02-16 03:19:11 +01:00
|
|
|
/*
|
2011-02-20 20:06:59 +01:00
|
|
|
* Process the statement option list for COPY.
|
2011-02-16 03:19:11 +01:00
|
|
|
*
|
2011-02-20 20:06:59 +01:00
|
|
|
* Scan the options list (a list of DefElem) and transpose the information
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
* into *opts_out, applying appropriate error checking.
|
2001-08-10 20:57:42 +02:00
|
|
|
*
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
* If 'opts_out' is not NULL, it is assumed to be filled with zeroes initially.
|
2001-08-10 20:57:42 +02:00
|
|
|
*
|
2011-02-20 20:06:59 +01:00
|
|
|
* This is exported so that external users of the COPY API can sanity-check
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
* a list of options. In that usage, 'opts_out' can be passed as NULL and
|
|
|
|
* the collected data is just leaked until CurrentMemoryContext is reset.
|
2011-02-20 20:06:59 +01:00
|
|
|
*
|
|
|
|
* Note that additional checking, such as whether column names listed in FORCE
|
|
|
|
* QUOTE actually exist, has to be applied later. This just checks for
|
|
|
|
* self-consistency of the options list.
|
1996-11-02 03:01:48 +01:00
|
|
|
*/
|
2011-02-20 20:06:59 +01:00
|
|
|
void
|
2016-09-06 18:00:00 +02:00
|
|
|
ProcessCopyOptions(ParseState *pstate,
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
CopyFormatOptions *opts_out,
|
2011-02-20 20:06:59 +01:00
|
|
|
bool is_from,
|
|
|
|
List *options)
|
1996-11-02 03:01:48 +01:00
|
|
|
{
|
2009-09-21 22:10:21 +02:00
|
|
|
bool format_specified = false;
|
2020-10-05 02:43:17 +02:00
|
|
|
bool freeze_specified = false;
|
|
|
|
bool header_specified = false;
|
2024-01-19 14:08:40 +01:00
|
|
|
bool on_error_specified = false;
|
2024-04-01 08:25:25 +02:00
|
|
|
bool log_verbosity_specified = false;
|
2005-08-06 22:41:58 +02:00
|
|
|
ListCell *option;
|
2011-02-16 03:19:11 +01:00
|
|
|
|
2011-02-20 20:06:59 +01:00
|
|
|
/* Support external use for option sanity checking */
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
if (opts_out == NULL)
|
|
|
|
opts_out = (CopyFormatOptions *) palloc0(sizeof(CopyFormatOptions));
|
Handle EPIPE more sanely when we close a pipe reading from a program.
Previously, any program launched by COPY TO/FROM PROGRAM inherited the
server's setting of SIGPIPE handling, i.e. SIG_IGN. Hence, if we were
doing COPY FROM PROGRAM and closed the pipe early, the child process
would see EPIPE on its output file and typically would treat that as
a fatal error, in turn causing the COPY to report error. Similarly,
one could get a failure report from a query that didn't read all of
the output from a contrib/file_fdw foreign table that uses file_fdw's
PROGRAM option.
To fix, ensure that child programs inherit SIG_DFL not SIG_IGN processing
of SIGPIPE. This seems like an all-around better situation since if
the called program wants some non-default treatment of SIGPIPE, it would
expect to have to set that up for itself. Then in COPY, if it's COPY
FROM PROGRAM and we stop reading short of detecting EOF, treat a SIGPIPE
exit from the called program as a non-error condition. This still allows
us to report an error for any case where the called program gets SIGPIPE
on some other file descriptor.
As coded, we won't report a SIGPIPE if we stop reading as a result of
seeing an in-band EOF marker (e.g. COPY BINARY EOF marker). It's
somewhat debatable whether we should complain if the called program
continues to transmit data after an EOF marker. However, it seems like
we should avoid throwing error in any questionable cases, especially in a
back-patched fix, and anyway it would take additional code to make such
an error get reported consistently.
Back-patch to v10. We could go further back, since COPY FROM PROGRAM
has been around awhile, but AFAICS the only way to reach this situation
using core or contrib is via file_fdw, which has only supported PROGRAM
sources since v10. The COPY statement per se has no feature whereby
it'd stop reading without having hit EOF or an error already. Therefore,
I don't see any upside to back-patching further that'd outweigh the
risk of complaints about behavioral change.
Per bug #15449 from Eric Cyr.
Patch by me, review by Etsuro Fujita and Kyotaro Horiguchi
Discussion: https://postgr.es/m/15449-1cf737dd5929450e@postgresql.org
2018-11-19 23:02:25 +01:00
|
|
|
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
opts_out->file_encoding = -1;
|
2011-02-21 06:08:04 +01:00
|
|
|
|
2002-06-20 18:00:44 +02:00
|
|
|
/* Extract options from the statement node tree */
|
2011-02-16 03:19:11 +01:00
|
|
|
foreach(option, options)
|
2002-06-20 18:00:44 +02:00
|
|
|
{
|
Improve castNode notation by introducing list-extraction-specific variants.
This extends the castNode() notation introduced by commit 5bcab1114 to
provide, in one step, extraction of a list cell's pointer and coercion to
a concrete node type. For example, "lfirst_node(Foo, lc)" is the same
as "castNode(Foo, lfirst(lc))". Almost half of the uses of castNode
that have appeared so far include a list extraction call, so this is
pretty widely useful, and it saves a few more keystrokes compared to the
old way.
As with the previous patch, back-patch the addition of these macros to
pg_list.h, so that the notation will be available when back-patching.
Patch by me, after an idea of Andrew Gierth's.
Discussion: https://postgr.es/m/14197.1491841216@sss.pgh.pa.us
2017-04-10 19:51:29 +02:00
|
|
|
DefElem *defel = lfirst_node(DefElem, option);
|
2002-06-20 18:00:44 +02:00
|
|
|
|
2009-09-21 22:10:21 +02:00
|
|
|
if (strcmp(defel->defname, "format") == 0)
|
2002-06-20 18:00:44 +02:00
|
|
|
{
|
2009-09-21 22:10:21 +02:00
|
|
|
char *fmt = defGetString(defel);
|
|
|
|
|
|
|
|
if (format_specified)
|
Improve reporting of "conflicting or redundant options" errors.
When reporting "conflicting or redundant options" errors, try to
ensure that errposition() is used, to help the user identify the
offending option.
Formerly, errposition() was invoked in less than 60% of cases. This
patch raises that to over 90%, but there remain a few places where the
ParseState is not readily available. Using errdetail() might improve
the error in such cases, but that is left as a task for the future.
Additionally, since this error is thrown from over 100 places in the
codebase, introduce a dedicated function to throw it, reducing code
duplication.
Extracted from a slightly larger patch by Vignesh C. Reviewed by
Bharath Rupireddy, Alvaro Herrera, Dilip Kumar, Hou Zhijie, Peter
Smith, Daniel Gustafsson, Julien Rouhaud and me.
Discussion: https://postgr.es/m/CALDaNm33FFSS5tVyvmkoK2cCMuDVxcui=gFrjti9ROfynqSAGA@mail.gmail.com
2021-07-15 09:49:45 +02:00
|
|
|
errorConflictingDefElem(defel, pstate);
|
2009-09-21 22:10:21 +02:00
|
|
|
format_specified = true;
|
|
|
|
if (strcmp(fmt, "text") == 0)
|
|
|
|
/* default format */ ;
|
|
|
|
else if (strcmp(fmt, "csv") == 0)
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
opts_out->csv_mode = true;
|
2009-09-21 22:10:21 +02:00
|
|
|
else if (strcmp(fmt, "binary") == 0)
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
opts_out->binary = true;
|
2009-09-21 22:10:21 +02:00
|
|
|
else
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
2016-09-06 18:00:00 +02:00
|
|
|
errmsg("COPY format \"%s\" not recognized", fmt),
|
|
|
|
parser_errposition(pstate, defel->location)));
|
2002-06-20 18:00:44 +02:00
|
|
|
}
|
2012-12-01 13:54:20 +01:00
|
|
|
else if (strcmp(defel->defname, "freeze") == 0)
|
|
|
|
{
|
2020-10-05 02:43:17 +02:00
|
|
|
if (freeze_specified)
|
Improve reporting of "conflicting or redundant options" errors.
When reporting "conflicting or redundant options" errors, try to
ensure that errposition() is used, to help the user identify the
offending option.
Formerly, errposition() was invoked in less than 60% of cases. This
patch raises that to over 90%, but there remain a few places where the
ParseState is not readily available. Using errdetail() might improve
the error in such cases, but that is left as a task for the future.
Additionally, since this error is thrown from over 100 places in the
codebase, introduce a dedicated function to throw it, reducing code
duplication.
Extracted from a slightly larger patch by Vignesh C. Reviewed by
Bharath Rupireddy, Alvaro Herrera, Dilip Kumar, Hou Zhijie, Peter
Smith, Daniel Gustafsson, Julien Rouhaud and me.
Discussion: https://postgr.es/m/CALDaNm33FFSS5tVyvmkoK2cCMuDVxcui=gFrjti9ROfynqSAGA@mail.gmail.com
2021-07-15 09:49:45 +02:00
|
|
|
errorConflictingDefElem(defel, pstate);
|
2020-10-05 02:43:17 +02:00
|
|
|
freeze_specified = true;
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
opts_out->freeze = defGetBoolean(defel);
|
2012-12-01 13:54:20 +01:00
|
|
|
}
|
2002-06-20 18:00:44 +02:00
|
|
|
else if (strcmp(defel->defname, "delimiter") == 0)
|
|
|
|
{
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
if (opts_out->delim)
|
Improve reporting of "conflicting or redundant options" errors.
When reporting "conflicting or redundant options" errors, try to
ensure that errposition() is used, to help the user identify the
offending option.
Formerly, errposition() was invoked in less than 60% of cases. This
patch raises that to over 90%, but there remain a few places where the
ParseState is not readily available. Using errdetail() might improve
the error in such cases, but that is left as a task for the future.
Additionally, since this error is thrown from over 100 places in the
codebase, introduce a dedicated function to throw it, reducing code
duplication.
Extracted from a slightly larger patch by Vignesh C. Reviewed by
Bharath Rupireddy, Alvaro Herrera, Dilip Kumar, Hou Zhijie, Peter
Smith, Daniel Gustafsson, Julien Rouhaud and me.
Discussion: https://postgr.es/m/CALDaNm33FFSS5tVyvmkoK2cCMuDVxcui=gFrjti9ROfynqSAGA@mail.gmail.com
2021-07-15 09:49:45 +02:00
|
|
|
errorConflictingDefElem(defel, pstate);
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
opts_out->delim = defGetString(defel);
|
2002-06-20 18:00:44 +02:00
|
|
|
}
|
|
|
|
else if (strcmp(defel->defname, "null") == 0)
|
|
|
|
{
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
if (opts_out->null_print)
|
Improve reporting of "conflicting or redundant options" errors.
When reporting "conflicting or redundant options" errors, try to
ensure that errposition() is used, to help the user identify the
offending option.
Formerly, errposition() was invoked in less than 60% of cases. This
patch raises that to over 90%, but there remain a few places where the
ParseState is not readily available. Using errdetail() might improve
the error in such cases, but that is left as a task for the future.
Additionally, since this error is thrown from over 100 places in the
codebase, introduce a dedicated function to throw it, reducing code
duplication.
Extracted from a slightly larger patch by Vignesh C. Reviewed by
Bharath Rupireddy, Alvaro Herrera, Dilip Kumar, Hou Zhijie, Peter
Smith, Daniel Gustafsson, Julien Rouhaud and me.
Discussion: https://postgr.es/m/CALDaNm33FFSS5tVyvmkoK2cCMuDVxcui=gFrjti9ROfynqSAGA@mail.gmail.com
2021-07-15 09:49:45 +02:00
|
|
|
errorConflictingDefElem(defel, pstate);
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
opts_out->null_print = defGetString(defel);
|
2004-04-19 19:22:31 +02:00
|
|
|
}
|
2023-03-13 15:01:56 +01:00
|
|
|
else if (strcmp(defel->defname, "default") == 0)
|
|
|
|
{
|
|
|
|
if (opts_out->default_print)
|
|
|
|
errorConflictingDefElem(defel, pstate);
|
|
|
|
opts_out->default_print = defGetString(defel);
|
|
|
|
}
|
2005-05-07 04:22:49 +02:00
|
|
|
else if (strcmp(defel->defname, "header") == 0)
|
|
|
|
{
|
2020-10-05 02:43:17 +02:00
|
|
|
if (header_specified)
|
Improve reporting of "conflicting or redundant options" errors.
When reporting "conflicting or redundant options" errors, try to
ensure that errposition() is used, to help the user identify the
offending option.
Formerly, errposition() was invoked in less than 60% of cases. This
patch raises that to over 90%, but there remain a few places where the
ParseState is not readily available. Using errdetail() might improve
the error in such cases, but that is left as a task for the future.
Additionally, since this error is thrown from over 100 places in the
codebase, introduce a dedicated function to throw it, reducing code
duplication.
Extracted from a slightly larger patch by Vignesh C. Reviewed by
Bharath Rupireddy, Alvaro Herrera, Dilip Kumar, Hou Zhijie, Peter
Smith, Daniel Gustafsson, Julien Rouhaud and me.
Discussion: https://postgr.es/m/CALDaNm33FFSS5tVyvmkoK2cCMuDVxcui=gFrjti9ROfynqSAGA@mail.gmail.com
2021-07-15 09:49:45 +02:00
|
|
|
errorConflictingDefElem(defel, pstate);
|
2020-10-05 02:43:17 +02:00
|
|
|
header_specified = true;
|
2022-06-23 03:49:20 +02:00
|
|
|
opts_out->header_line = defGetCopyHeaderChoice(defel, is_from);
|
2005-05-07 04:22:49 +02:00
|
|
|
}
|
2004-04-19 19:22:31 +02:00
|
|
|
else if (strcmp(defel->defname, "quote") == 0)
|
|
|
|
{
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
if (opts_out->quote)
|
Improve reporting of "conflicting or redundant options" errors.
When reporting "conflicting or redundant options" errors, try to
ensure that errposition() is used, to help the user identify the
offending option.
Formerly, errposition() was invoked in less than 60% of cases. This
patch raises that to over 90%, but there remain a few places where the
ParseState is not readily available. Using errdetail() might improve
the error in such cases, but that is left as a task for the future.
Additionally, since this error is thrown from over 100 places in the
codebase, introduce a dedicated function to throw it, reducing code
duplication.
Extracted from a slightly larger patch by Vignesh C. Reviewed by
Bharath Rupireddy, Alvaro Herrera, Dilip Kumar, Hou Zhijie, Peter
Smith, Daniel Gustafsson, Julien Rouhaud and me.
Discussion: https://postgr.es/m/CALDaNm33FFSS5tVyvmkoK2cCMuDVxcui=gFrjti9ROfynqSAGA@mail.gmail.com
2021-07-15 09:49:45 +02:00
|
|
|
errorConflictingDefElem(defel, pstate);
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
opts_out->quote = defGetString(defel);
|
2004-04-19 19:22:31 +02:00
|
|
|
}
|
|
|
|
else if (strcmp(defel->defname, "escape") == 0)
|
|
|
|
{
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
if (opts_out->escape)
|
Improve reporting of "conflicting or redundant options" errors.
When reporting "conflicting or redundant options" errors, try to
ensure that errposition() is used, to help the user identify the
offending option.
Formerly, errposition() was invoked in less than 60% of cases. This
patch raises that to over 90%, but there remain a few places where the
ParseState is not readily available. Using errdetail() might improve
the error in such cases, but that is left as a task for the future.
Additionally, since this error is thrown from over 100 places in the
codebase, introduce a dedicated function to throw it, reducing code
duplication.
Extracted from a slightly larger patch by Vignesh C. Reviewed by
Bharath Rupireddy, Alvaro Herrera, Dilip Kumar, Hou Zhijie, Peter
Smith, Daniel Gustafsson, Julien Rouhaud and me.
Discussion: https://postgr.es/m/CALDaNm33FFSS5tVyvmkoK2cCMuDVxcui=gFrjti9ROfynqSAGA@mail.gmail.com
2021-07-15 09:49:45 +02:00
|
|
|
errorConflictingDefElem(defel, pstate);
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
opts_out->escape = defGetString(defel);
|
2004-04-19 19:22:31 +02:00
|
|
|
}
|
2004-04-21 02:34:18 +02:00
|
|
|
else if (strcmp(defel->defname, "force_quote") == 0)
|
2004-04-19 19:22:31 +02:00
|
|
|
{
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
if (opts_out->force_quote || opts_out->force_quote_all)
|
Improve reporting of "conflicting or redundant options" errors.
When reporting "conflicting or redundant options" errors, try to
ensure that errposition() is used, to help the user identify the
offending option.
Formerly, errposition() was invoked in less than 60% of cases. This
patch raises that to over 90%, but there remain a few places where the
ParseState is not readily available. Using errdetail() might improve
the error in such cases, but that is left as a task for the future.
Additionally, since this error is thrown from over 100 places in the
codebase, introduce a dedicated function to throw it, reducing code
duplication.
Extracted from a slightly larger patch by Vignesh C. Reviewed by
Bharath Rupireddy, Alvaro Herrera, Dilip Kumar, Hou Zhijie, Peter
Smith, Daniel Gustafsson, Julien Rouhaud and me.
Discussion: https://postgr.es/m/CALDaNm33FFSS5tVyvmkoK2cCMuDVxcui=gFrjti9ROfynqSAGA@mail.gmail.com
2021-07-15 09:49:45 +02:00
|
|
|
errorConflictingDefElem(defel, pstate);
|
2009-07-25 19:04:19 +02:00
|
|
|
if (defel->arg && IsA(defel->arg, A_Star))
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
opts_out->force_quote_all = true;
|
2009-09-21 22:10:21 +02:00
|
|
|
else if (defel->arg && IsA(defel->arg, List))
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
opts_out->force_quote = castNode(List, defel->arg);
|
2009-09-21 22:10:21 +02:00
|
|
|
else
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
|
|
errmsg("argument to option \"%s\" must be a list of column names",
|
2016-09-06 18:00:00 +02:00
|
|
|
defel->defname),
|
|
|
|
parser_errposition(pstate, defel->location)));
|
2004-04-19 19:22:31 +02:00
|
|
|
}
|
2009-09-21 22:10:21 +02:00
|
|
|
else if (strcmp(defel->defname, "force_not_null") == 0)
|
2004-04-19 19:22:31 +02:00
|
|
|
{
|
2023-09-30 18:34:41 +02:00
|
|
|
if (opts_out->force_notnull || opts_out->force_notnull_all)
|
Improve reporting of "conflicting or redundant options" errors.
When reporting "conflicting or redundant options" errors, try to
ensure that errposition() is used, to help the user identify the
offending option.
Formerly, errposition() was invoked in less than 60% of cases. This
patch raises that to over 90%, but there remain a few places where the
ParseState is not readily available. Using errdetail() might improve
the error in such cases, but that is left as a task for the future.
Additionally, since this error is thrown from over 100 places in the
codebase, introduce a dedicated function to throw it, reducing code
duplication.
Extracted from a slightly larger patch by Vignesh C. Reviewed by
Bharath Rupireddy, Alvaro Herrera, Dilip Kumar, Hou Zhijie, Peter
Smith, Daniel Gustafsson, Julien Rouhaud and me.
Discussion: https://postgr.es/m/CALDaNm33FFSS5tVyvmkoK2cCMuDVxcui=gFrjti9ROfynqSAGA@mail.gmail.com
2021-07-15 09:49:45 +02:00
|
|
|
errorConflictingDefElem(defel, pstate);
|
2023-09-30 18:34:41 +02:00
|
|
|
if (defel->arg && IsA(defel->arg, A_Star))
|
|
|
|
opts_out->force_notnull_all = true;
|
|
|
|
else if (defel->arg && IsA(defel->arg, List))
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
opts_out->force_notnull = castNode(List, defel->arg);
|
2009-09-21 22:10:21 +02:00
|
|
|
else
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
|
|
errmsg("argument to option \"%s\" must be a list of column names",
|
2016-09-06 18:00:00 +02:00
|
|
|
defel->defname),
|
|
|
|
parser_errposition(pstate, defel->location)));
|
2004-04-19 19:22:31 +02:00
|
|
|
}
|
2014-03-04 23:31:59 +01:00
|
|
|
else if (strcmp(defel->defname, "force_null") == 0)
|
|
|
|
{
|
2023-09-30 18:34:41 +02:00
|
|
|
if (opts_out->force_null || opts_out->force_null_all)
|
Improve reporting of "conflicting or redundant options" errors.
When reporting "conflicting or redundant options" errors, try to
ensure that errposition() is used, to help the user identify the
offending option.
Formerly, errposition() was invoked in less than 60% of cases. This
patch raises that to over 90%, but there remain a few places where the
ParseState is not readily available. Using errdetail() might improve
the error in such cases, but that is left as a task for the future.
Additionally, since this error is thrown from over 100 places in the
codebase, introduce a dedicated function to throw it, reducing code
duplication.
Extracted from a slightly larger patch by Vignesh C. Reviewed by
Bharath Rupireddy, Alvaro Herrera, Dilip Kumar, Hou Zhijie, Peter
Smith, Daniel Gustafsson, Julien Rouhaud and me.
Discussion: https://postgr.es/m/CALDaNm33FFSS5tVyvmkoK2cCMuDVxcui=gFrjti9ROfynqSAGA@mail.gmail.com
2021-07-15 09:49:45 +02:00
|
|
|
errorConflictingDefElem(defel, pstate);
|
2023-09-30 18:34:41 +02:00
|
|
|
if (defel->arg && IsA(defel->arg, A_Star))
|
|
|
|
opts_out->force_null_all = true;
|
|
|
|
else if (defel->arg && IsA(defel->arg, List))
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
opts_out->force_null = castNode(List, defel->arg);
|
2014-03-04 23:31:59 +01:00
|
|
|
else
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
|
|
errmsg("argument to option \"%s\" must be a list of column names",
|
2016-09-06 18:00:00 +02:00
|
|
|
defel->defname),
|
|
|
|
parser_errposition(pstate, defel->location)));
|
2014-03-04 23:31:59 +01:00
|
|
|
}
|
2012-07-12 22:26:59 +02:00
|
|
|
else if (strcmp(defel->defname, "convert_selectively") == 0)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Undocumented, not-accessible-from-SQL option: convert only the
|
|
|
|
* named columns to binary form, storing the rest as NULLs. It's
|
|
|
|
* allowed for the column list to be NIL.
|
|
|
|
*/
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
if (opts_out->convert_selectively)
|
Improve reporting of "conflicting or redundant options" errors.
When reporting "conflicting or redundant options" errors, try to
ensure that errposition() is used, to help the user identify the
offending option.
Formerly, errposition() was invoked in less than 60% of cases. This
patch raises that to over 90%, but there remain a few places where the
ParseState is not readily available. Using errdetail() might improve
the error in such cases, but that is left as a task for the future.
Additionally, since this error is thrown from over 100 places in the
codebase, introduce a dedicated function to throw it, reducing code
duplication.
Extracted from a slightly larger patch by Vignesh C. Reviewed by
Bharath Rupireddy, Alvaro Herrera, Dilip Kumar, Hou Zhijie, Peter
Smith, Daniel Gustafsson, Julien Rouhaud and me.
Discussion: https://postgr.es/m/CALDaNm33FFSS5tVyvmkoK2cCMuDVxcui=gFrjti9ROfynqSAGA@mail.gmail.com
2021-07-15 09:49:45 +02:00
|
|
|
errorConflictingDefElem(defel, pstate);
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
opts_out->convert_selectively = true;
|
2012-07-12 22:26:59 +02:00
|
|
|
if (defel->arg == NULL || IsA(defel->arg, List))
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
opts_out->convert_select = castNode(List, defel->arg);
|
2012-07-12 22:26:59 +02:00
|
|
|
else
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
|
|
errmsg("argument to option \"%s\" must be a list of column names",
|
2016-09-06 18:00:00 +02:00
|
|
|
defel->defname),
|
|
|
|
parser_errposition(pstate, defel->location)));
|
2012-07-12 22:26:59 +02:00
|
|
|
}
|
2011-02-21 06:08:04 +01:00
|
|
|
else if (strcmp(defel->defname, "encoding") == 0)
|
|
|
|
{
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
if (opts_out->file_encoding >= 0)
|
Improve reporting of "conflicting or redundant options" errors.
When reporting "conflicting or redundant options" errors, try to
ensure that errposition() is used, to help the user identify the
offending option.
Formerly, errposition() was invoked in less than 60% of cases. This
patch raises that to over 90%, but there remain a few places where the
ParseState is not readily available. Using errdetail() might improve
the error in such cases, but that is left as a task for the future.
Additionally, since this error is thrown from over 100 places in the
codebase, introduce a dedicated function to throw it, reducing code
duplication.
Extracted from a slightly larger patch by Vignesh C. Reviewed by
Bharath Rupireddy, Alvaro Herrera, Dilip Kumar, Hou Zhijie, Peter
Smith, Daniel Gustafsson, Julien Rouhaud and me.
Discussion: https://postgr.es/m/CALDaNm33FFSS5tVyvmkoK2cCMuDVxcui=gFrjti9ROfynqSAGA@mail.gmail.com
2021-07-15 09:49:45 +02:00
|
|
|
errorConflictingDefElem(defel, pstate);
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
opts_out->file_encoding = pg_char_to_encoding(defGetString(defel));
|
|
|
|
if (opts_out->file_encoding < 0)
|
2011-02-21 06:08:04 +01:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
|
|
errmsg("argument to option \"%s\" must be a valid encoding name",
|
2016-09-06 18:00:00 +02:00
|
|
|
defel->defname),
|
|
|
|
parser_errposition(pstate, defel->location)));
|
2011-02-21 06:08:04 +01:00
|
|
|
}
|
2024-01-19 14:08:40 +01:00
|
|
|
else if (strcmp(defel->defname, "on_error") == 0)
|
Add new COPY option SAVE_ERROR_TO
Currently, when source data contains unexpected data regarding data type or
range, the entire COPY fails. However, in some cases, such data can be ignored
and just copying normal data is preferable.
This commit adds a new option SAVE_ERROR_TO, which specifies where to save the
error information. When this option is specified, COPY skips soft errors and
continues copying.
Currently, SAVE_ERROR_TO only supports "none". This indicates error information
is not saved and COPY just skips the unexpected data and continues running.
Later works are expected to add more choices, such as 'log' and 'table'.
Author: Damir Belyalov, Atsushi Torikoshi, Alex Shulgin, Jian He
Discussion: https://postgr.es/m/87k31ftoe0.fsf_-_%40commandprompt.com
Reviewed-by: Pavel Stehule, Andres Freund, Tom Lane, Daniel Gustafsson,
Reviewed-by: Alena Rybakina, Andy Fan, Andrei Lepikhov, Masahiko Sawada
Reviewed-by: Vignesh C, Atsushi Torikoshi
2024-01-16 22:08:53 +01:00
|
|
|
{
|
2024-01-19 14:08:40 +01:00
|
|
|
if (on_error_specified)
|
Add new COPY option SAVE_ERROR_TO
Currently, when source data contains unexpected data regarding data type or
range, the entire COPY fails. However, in some cases, such data can be ignored
and just copying normal data is preferable.
This commit adds a new option SAVE_ERROR_TO, which specifies where to save the
error information. When this option is specified, COPY skips soft errors and
continues copying.
Currently, SAVE_ERROR_TO only supports "none". This indicates error information
is not saved and COPY just skips the unexpected data and continues running.
Later works are expected to add more choices, such as 'log' and 'table'.
Author: Damir Belyalov, Atsushi Torikoshi, Alex Shulgin, Jian He
Discussion: https://postgr.es/m/87k31ftoe0.fsf_-_%40commandprompt.com
Reviewed-by: Pavel Stehule, Andres Freund, Tom Lane, Daniel Gustafsson,
Reviewed-by: Alena Rybakina, Andy Fan, Andrei Lepikhov, Masahiko Sawada
Reviewed-by: Vignesh C, Atsushi Torikoshi
2024-01-16 22:08:53 +01:00
|
|
|
errorConflictingDefElem(defel, pstate);
|
2024-01-19 14:08:40 +01:00
|
|
|
on_error_specified = true;
|
|
|
|
opts_out->on_error = defGetCopyOnErrorChoice(defel, pstate, is_from);
|
Add new COPY option SAVE_ERROR_TO
Currently, when source data contains unexpected data regarding data type or
range, the entire COPY fails. However, in some cases, such data can be ignored
and just copying normal data is preferable.
This commit adds a new option SAVE_ERROR_TO, which specifies where to save the
error information. When this option is specified, COPY skips soft errors and
continues copying.
Currently, SAVE_ERROR_TO only supports "none". This indicates error information
is not saved and COPY just skips the unexpected data and continues running.
Later works are expected to add more choices, such as 'log' and 'table'.
Author: Damir Belyalov, Atsushi Torikoshi, Alex Shulgin, Jian He
Discussion: https://postgr.es/m/87k31ftoe0.fsf_-_%40commandprompt.com
Reviewed-by: Pavel Stehule, Andres Freund, Tom Lane, Daniel Gustafsson,
Reviewed-by: Alena Rybakina, Andy Fan, Andrei Lepikhov, Masahiko Sawada
Reviewed-by: Vignesh C, Atsushi Torikoshi
2024-01-16 22:08:53 +01:00
|
|
|
}
|
2024-04-01 08:25:25 +02:00
|
|
|
else if (strcmp(defel->defname, "log_verbosity") == 0)
|
|
|
|
{
|
|
|
|
if (log_verbosity_specified)
|
|
|
|
errorConflictingDefElem(defel, pstate);
|
|
|
|
log_verbosity_specified = true;
|
|
|
|
opts_out->log_verbosity = defGetCopyLogVerbosityChoice(defel, pstate);
|
|
|
|
}
|
2002-06-20 18:00:44 +02:00
|
|
|
else
|
2009-09-21 22:10:21 +02:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_SYNTAX_ERROR),
|
|
|
|
errmsg("option \"%s\" not recognized",
|
2016-09-06 18:00:00 +02:00
|
|
|
defel->defname),
|
2016-09-07 05:55:55 +02:00
|
|
|
parser_errposition(pstate, defel->location)));
|
2002-06-20 18:00:44 +02:00
|
|
|
}
|
|
|
|
|
2009-09-21 22:10:21 +02:00
|
|
|
/*
|
|
|
|
* Check for incompatible options (must do these two before inserting
|
|
|
|
* defaults)
|
|
|
|
*/
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
if (opts_out->binary && opts_out->delim)
|
2003-07-20 23:56:35 +02:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_SYNTAX_ERROR),
|
|
|
|
errmsg("cannot specify DELIMITER in BINARY mode")));
|
2002-06-20 18:00:44 +02:00
|
|
|
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
if (opts_out->binary && opts_out->null_print)
|
2003-07-20 23:56:35 +02:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_SYNTAX_ERROR),
|
|
|
|
errmsg("cannot specify NULL in BINARY mode")));
|
2002-07-30 18:55:06 +02:00
|
|
|
|
2023-03-13 15:01:56 +01:00
|
|
|
if (opts_out->binary && opts_out->default_print)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_SYNTAX_ERROR),
|
|
|
|
errmsg("cannot specify DEFAULT in BINARY mode")));
|
|
|
|
|
2024-01-19 14:08:40 +01:00
|
|
|
if (opts_out->binary && opts_out->on_error != COPY_ON_ERROR_STOP)
|
Add new COPY option SAVE_ERROR_TO
Currently, when source data contains unexpected data regarding data type or
range, the entire COPY fails. However, in some cases, such data can be ignored
and just copying normal data is preferable.
This commit adds a new option SAVE_ERROR_TO, which specifies where to save the
error information. When this option is specified, COPY skips soft errors and
continues copying.
Currently, SAVE_ERROR_TO only supports "none". This indicates error information
is not saved and COPY just skips the unexpected data and continues running.
Later works are expected to add more choices, such as 'log' and 'table'.
Author: Damir Belyalov, Atsushi Torikoshi, Alex Shulgin, Jian He
Discussion: https://postgr.es/m/87k31ftoe0.fsf_-_%40commandprompt.com
Reviewed-by: Pavel Stehule, Andres Freund, Tom Lane, Daniel Gustafsson,
Reviewed-by: Alena Rybakina, Andy Fan, Andrei Lepikhov, Masahiko Sawada
Reviewed-by: Vignesh C, Atsushi Torikoshi
2024-01-16 22:08:53 +01:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_SYNTAX_ERROR),
|
2024-01-19 14:08:40 +01:00
|
|
|
errmsg("only ON_ERROR STOP is allowed in BINARY mode")));
|
Add new COPY option SAVE_ERROR_TO
Currently, when source data contains unexpected data regarding data type or
range, the entire COPY fails. However, in some cases, such data can be ignored
and just copying normal data is preferable.
This commit adds a new option SAVE_ERROR_TO, which specifies where to save the
error information. When this option is specified, COPY skips soft errors and
continues copying.
Currently, SAVE_ERROR_TO only supports "none". This indicates error information
is not saved and COPY just skips the unexpected data and continues running.
Later works are expected to add more choices, such as 'log' and 'table'.
Author: Damir Belyalov, Atsushi Torikoshi, Alex Shulgin, Jian He
Discussion: https://postgr.es/m/87k31ftoe0.fsf_-_%40commandprompt.com
Reviewed-by: Pavel Stehule, Andres Freund, Tom Lane, Daniel Gustafsson,
Reviewed-by: Alena Rybakina, Andy Fan, Andrei Lepikhov, Masahiko Sawada
Reviewed-by: Vignesh C, Atsushi Torikoshi
2024-01-16 22:08:53 +01:00
|
|
|
|
2005-08-06 22:41:58 +02:00
|
|
|
/* Set defaults for omitted options */
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
if (!opts_out->delim)
|
|
|
|
opts_out->delim = opts_out->csv_mode ? "," : "\t";
|
2004-08-29 07:07:03 +02:00
|
|
|
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
if (!opts_out->null_print)
|
|
|
|
opts_out->null_print = opts_out->csv_mode ? "" : "\\N";
|
|
|
|
opts_out->null_print_len = strlen(opts_out->null_print);
|
2004-04-19 19:22:31 +02:00
|
|
|
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
if (opts_out->csv_mode)
|
2004-04-19 19:22:31 +02:00
|
|
|
{
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
if (!opts_out->quote)
|
|
|
|
opts_out->quote = "\"";
|
|
|
|
if (!opts_out->escape)
|
|
|
|
opts_out->escape = opts_out->quote;
|
2004-04-19 19:22:31 +02:00
|
|
|
}
|
2004-08-29 07:07:03 +02:00
|
|
|
|
2009-03-26 20:24:54 +01:00
|
|
|
/* Only single-byte delimiter strings are supported. */
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
if (strlen(opts_out->delim) != 1)
|
2004-04-19 19:22:31 +02:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
2009-03-26 20:24:54 +01:00
|
|
|
errmsg("COPY delimiter must be a single one-byte character")));
|
2004-04-19 19:22:31 +02:00
|
|
|
|
2006-02-03 13:41:07 +01:00
|
|
|
/* Disallow end-of-line characters */
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
if (strchr(opts_out->delim, '\r') != NULL ||
|
|
|
|
strchr(opts_out->delim, '\n') != NULL)
|
2006-02-03 13:41:07 +01:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
|
|
errmsg("COPY delimiter cannot be newline or carriage return")));
|
|
|
|
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
if (strchr(opts_out->null_print, '\r') != NULL ||
|
|
|
|
strchr(opts_out->null_print, '\n') != NULL)
|
2006-02-03 13:41:07 +01:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
2006-10-06 19:14:01 +02:00
|
|
|
errmsg("COPY null representation cannot use newline or carriage return")));
|
2006-02-03 13:41:07 +01:00
|
|
|
|
2023-03-13 15:01:56 +01:00
|
|
|
if (opts_out->default_print)
|
|
|
|
{
|
|
|
|
opts_out->default_print_len = strlen(opts_out->default_print);
|
|
|
|
|
|
|
|
if (strchr(opts_out->default_print, '\r') != NULL ||
|
|
|
|
strchr(opts_out->default_print, '\n') != NULL)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
|
|
errmsg("COPY default representation cannot use newline or carriage return")));
|
|
|
|
}
|
|
|
|
|
Disallow digits and lower-case ASCII letters as the delimiter in non-CSV
COPY. We need a restriction here because when the delimiter occurs as a
data character, it is emitted with a backslash, and that will only work
as desired if CopyReadAttributesText() will interpret the backslash sequence
as representing the second character literally. This is currently untrue
for 'b', 'f', 'n', 'r', 't', 'v', 'x', and octal digits. For future-proofing
and simplicity of explanation, it seems best to disallow a-z and 0-9.
We must also disallow dot, since "\." by itself would look like copy EOF.
Note: "\N" is by default the null print string, so N would also cause a
problem, but that is already tested for.
2007-12-27 19:28:58 +01:00
|
|
|
/*
|
|
|
|
* Disallow unsafe delimiter characters in non-CSV mode. We can't allow
|
|
|
|
* backslash because it would be ambiguous. We can't allow the other
|
|
|
|
* cases because data characters matching the delimiter must be
|
|
|
|
* backslashed, and certain backslash combinations are interpreted
|
|
|
|
* non-literally by COPY IN. Disallowing all lower case ASCII letters is
|
|
|
|
* more than strictly necessary, but seems best for consistency and
|
|
|
|
* future-proofing. Likewise we disallow all digits though only octal
|
|
|
|
* digits are actually dangerous.
|
|
|
|
*/
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
if (!opts_out->csv_mode &&
|
Disallow digits and lower-case ASCII letters as the delimiter in non-CSV
COPY. We need a restriction here because when the delimiter occurs as a
data character, it is emitted with a backslash, and that will only work
as desired if CopyReadAttributesText() will interpret the backslash sequence
as representing the second character literally. This is currently untrue
for 'b', 'f', 'n', 'r', 't', 'v', 'x', and octal digits. For future-proofing
and simplicity of explanation, it seems best to disallow a-z and 0-9.
We must also disallow dot, since "\." by itself would look like copy EOF.
Note: "\N" is by default the null print string, so N would also cause a
problem, but that is already tested for.
2007-12-27 19:28:58 +01:00
|
|
|
strchr("\\.abcdefghijklmnopqrstuvwxyz0123456789",
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
opts_out->delim[0]) != NULL)
|
2006-02-03 13:41:07 +01:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
errmsg("COPY delimiter cannot be \"%s\"", opts_out->delim)));
|
2006-02-03 13:41:07 +01:00
|
|
|
|
2005-05-07 04:22:49 +02:00
|
|
|
/* Check header */
|
2022-01-28 09:22:53 +01:00
|
|
|
if (opts_out->binary && opts_out->header_line)
|
2005-05-07 04:22:49 +02:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
2022-01-28 09:22:53 +01:00
|
|
|
errmsg("cannot specify HEADER in BINARY mode")));
|
2005-05-07 04:22:49 +02:00
|
|
|
|
2005-05-06 04:56:42 +02:00
|
|
|
/* Check quote */
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
if (!opts_out->csv_mode && opts_out->quote != NULL)
|
2004-04-19 19:22:31 +02:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
2023-11-13 18:53:03 +01:00
|
|
|
errmsg("COPY QUOTE requires CSV mode")));
|
2004-04-19 19:22:31 +02:00
|
|
|
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
if (opts_out->csv_mode && strlen(opts_out->quote) != 1)
|
2004-04-19 19:22:31 +02:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
2009-03-26 20:24:54 +01:00
|
|
|
errmsg("COPY quote must be a single one-byte character")));
|
2004-04-19 19:22:31 +02:00
|
|
|
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
if (opts_out->csv_mode && opts_out->delim[0] == opts_out->quote[0])
|
2007-12-30 15:46:52 +01:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
|
|
errmsg("COPY delimiter and quote must be different")));
|
|
|
|
|
2005-05-06 04:56:42 +02:00
|
|
|
/* Check escape */
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
if (!opts_out->csv_mode && opts_out->escape != NULL)
|
2004-04-19 19:22:31 +02:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
2023-11-13 18:53:03 +01:00
|
|
|
errmsg("COPY ESCAPE requires CSV mode")));
|
2004-04-19 19:22:31 +02:00
|
|
|
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
if (opts_out->csv_mode && strlen(opts_out->escape) != 1)
|
2004-04-19 19:22:31 +02:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
2009-03-26 20:24:54 +01:00
|
|
|
errmsg("COPY escape must be a single one-byte character")));
|
2004-04-19 19:22:31 +02:00
|
|
|
|
2005-05-06 04:56:42 +02:00
|
|
|
/* Check force_quote */
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
if (!opts_out->csv_mode && (opts_out->force_quote || opts_out->force_quote_all))
|
2004-04-19 19:22:31 +02:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
2023-11-13 18:53:03 +01:00
|
|
|
errmsg("COPY FORCE_QUOTE requires CSV mode")));
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
if ((opts_out->force_quote || opts_out->force_quote_all) && is_from)
|
2004-04-19 19:22:31 +02:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
2023-11-13 18:53:03 +01:00
|
|
|
errmsg("COPY FORCE_QUOTE cannot be used with COPY FROM")));
|
2004-04-19 19:22:31 +02:00
|
|
|
|
2005-05-06 04:56:42 +02:00
|
|
|
/* Check force_notnull */
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
if (!opts_out->csv_mode && opts_out->force_notnull != NIL)
|
2004-04-19 19:22:31 +02:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
2023-11-13 18:53:03 +01:00
|
|
|
errmsg("COPY FORCE_NOT_NULL requires CSV mode")));
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
if (opts_out->force_notnull != NIL && !is_from)
|
2004-04-19 19:22:31 +02:00
|
|
|
ereport(ERROR,
|
2023-11-13 18:53:03 +01:00
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
|
|
errmsg("COPY FORCE_NOT_NULL cannot be used with COPY TO")));
|
2004-04-19 19:22:31 +02:00
|
|
|
|
2014-03-04 23:31:59 +01:00
|
|
|
/* Check force_null */
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
if (!opts_out->csv_mode && opts_out->force_null != NIL)
|
2014-03-04 23:31:59 +01:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
2023-11-13 18:53:03 +01:00
|
|
|
errmsg("COPY FORCE_NULL requires CSV mode")));
|
2014-03-04 23:31:59 +01:00
|
|
|
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
if (opts_out->force_null != NIL && !is_from)
|
2014-03-04 23:31:59 +01:00
|
|
|
ereport(ERROR,
|
2023-11-13 18:53:03 +01:00
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
|
|
errmsg("COPY FORCE_NULL cannot be used with COPY TO")));
|
2014-03-04 23:31:59 +01:00
|
|
|
|
2005-05-06 04:56:42 +02:00
|
|
|
/* Don't allow the delimiter to appear in the null string. */
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
if (strchr(opts_out->null_print, opts_out->delim[0]) != NULL)
|
2004-04-19 19:22:31 +02:00
|
|
|
ereport(ERROR,
|
2023-11-13 18:53:03 +01:00
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
|
|
errmsg("COPY delimiter character must not appear in the NULL specification")));
|
2004-04-19 19:22:31 +02:00
|
|
|
|
2005-08-06 22:41:58 +02:00
|
|
|
/* Don't allow the CSV quote char to appear in the null string. */
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
if (opts_out->csv_mode &&
|
|
|
|
strchr(opts_out->null_print, opts_out->quote[0]) != NULL)
|
2004-04-19 19:22:31 +02:00
|
|
|
ereport(ERROR,
|
2023-11-13 18:53:03 +01:00
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
2004-04-19 19:22:31 +02:00
|
|
|
errmsg("CSV quote character must not appear in the NULL specification")));
|
2023-03-13 15:01:56 +01:00
|
|
|
|
2023-11-13 18:53:03 +01:00
|
|
|
/* Check freeze */
|
|
|
|
if (opts_out->freeze && !is_from)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
|
|
errmsg("COPY FREEZE cannot be used with COPY TO")));
|
|
|
|
|
2023-03-13 15:01:56 +01:00
|
|
|
if (opts_out->default_print)
|
|
|
|
{
|
|
|
|
if (!is_from)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
|
|
|
errmsg("COPY DEFAULT only available using COPY FROM")));
|
|
|
|
|
|
|
|
/* Don't allow the delimiter to appear in the default string. */
|
|
|
|
if (strchr(opts_out->default_print, opts_out->delim[0]) != NULL)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
|
|
|
errmsg("COPY delimiter must not appear in the DEFAULT specification")));
|
|
|
|
|
|
|
|
/* Don't allow the CSV quote char to appear in the default string. */
|
|
|
|
if (opts_out->csv_mode &&
|
|
|
|
strchr(opts_out->default_print, opts_out->quote[0]) != NULL)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
|
|
|
errmsg("CSV quote character must not appear in the DEFAULT specification")));
|
|
|
|
|
|
|
|
/* Don't allow the NULL and DEFAULT string to be the same */
|
|
|
|
if (opts_out->null_print_len == opts_out->default_print_len &&
|
|
|
|
strncmp(opts_out->null_print, opts_out->default_print,
|
|
|
|
opts_out->null_print_len) == 0)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
|
|
|
errmsg("NULL specification and DEFAULT specification cannot be the same")));
|
|
|
|
}
|
2011-02-20 20:06:59 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
* CopyGetAttnums - build an integer list of attnums to be copied
|
2011-02-20 20:06:59 +01:00
|
|
|
*
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
* The input attnamelist is either the user-specified column list,
|
|
|
|
* or NIL if there was none (in which case we want all the non-dropped
|
|
|
|
* columns).
|
2011-02-20 20:06:59 +01:00
|
|
|
*
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
* We don't include generated columns in the generated full list and we don't
|
|
|
|
* allow them to be specified explicitly. They don't make sense for COPY
|
|
|
|
* FROM, but we could possibly allow them for COPY TO. But this way it's at
|
|
|
|
* least ensured that whatever we copy out can be copied back in.
|
2011-02-20 20:06:59 +01:00
|
|
|
*
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
* rel can be NULL ... it's only used for error reports.
|
2011-02-20 20:06:59 +01:00
|
|
|
*/
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
List *
|
|
|
|
CopyGetAttnums(TupleDesc tupDesc, Relation rel, List *attnamelist)
|
2011-02-20 20:06:59 +01:00
|
|
|
{
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
List *attnums = NIL;
|
2002-09-04 22:31:48 +02:00
|
|
|
|
Split copy.c into four files.
Copy.c has grown really large. Split it into more manageable parts:
- copy.c now contains only a few functions that are common to COPY FROM
and COPY TO.
- copyto.c contains code for COPY TO.
- copyfrom.c contains code for initializing COPY FROM, and inserting the
tuples to the correct table.
- copyfromparse.c contains code for reading from the client/file/program,
and parsing the input text/CSV/binary format into tuples.
All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.
The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.
Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
2020-11-23 09:50:50 +01:00
|
|
|
if (attnamelist == NIL)
|
2002-07-30 18:55:06 +02:00
|
|
|
{
|
2002-08-02 20:15:10 +02:00
|
|
|
/* Generate default column list */
|
|
|
|
int attr_count = tupDesc->natts;
|
|
|
|
int i;
|
2002-07-30 18:55:06 +02:00
|
|
|
|
|
|
|
for (i = 0; i < attr_count; i++)
|
|
|
|
{
|
2017-08-20 20:19:07 +02:00
|
|
|
if (TupleDescAttr(tupDesc, i)->attisdropped)
|
2002-08-02 20:15:10 +02:00
|
|
|
continue;
|
2019-03-30 08:13:09 +01:00
|
|
|
if (TupleDescAttr(tupDesc, i)->attgenerated)
|
|
|
|
continue;
|
2004-05-26 06:41:50 +02:00
|
|
|
attnums = lappend_int(attnums, i + 1);
|
2002-08-02 20:15:10 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* Validate the user-supplied list and extract attnums */
|
2004-05-26 06:41:50 +02:00
|
|
|
ListCell *l;
|
2002-08-02 20:15:10 +02:00
|
|
|
|
|
|
|
foreach(l, attnamelist)
|
|
|
|
{
|
2002-08-19 17:08:47 +02:00
|
|
|
char *name = strVal(lfirst(l));
|
2002-08-02 20:15:10 +02:00
|
|
|
int attnum;
|
2006-08-31 01:34:22 +02:00
|
|
|
int i;
|
2002-08-02 20:15:10 +02:00
|
|
|
|
2006-03-23 01:19:30 +01:00
|
|
|
/* Lookup column name */
|
2006-08-31 01:34:22 +02:00
|
|
|
attnum = InvalidAttrNumber;
|
|
|
|
for (i = 0; i < tupDesc->natts; i++)
|
|
|
|
{
|
2017-08-20 20:19:07 +02:00
|
|
|
Form_pg_attribute att = TupleDescAttr(tupDesc, i);
|
|
|
|
|
|
|
|
if (att->attisdropped)
|
2006-08-31 01:34:22 +02:00
|
|
|
continue;
|
2017-08-20 20:19:07 +02:00
|
|
|
if (namestrcmp(&(att->attname), name) == 0)
|
2006-08-31 01:34:22 +02:00
|
|
|
{
|
2019-03-30 08:13:09 +01:00
|
|
|
if (att->attgenerated)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
|
|
|
|
errmsg("column \"%s\" is a generated column",
|
|
|
|
name),
|
|
|
|
errdetail("Generated columns cannot be used in COPY.")));
|
2017-08-20 20:19:07 +02:00
|
|
|
attnum = att->attnum;
|
2006-08-31 01:34:22 +02:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2006-03-23 01:19:30 +01:00
|
|
|
if (attnum == InvalidAttrNumber)
|
2006-08-31 01:34:22 +02:00
|
|
|
{
|
|
|
|
if (rel != NULL)
|
|
|
|
ereport(ERROR,
|
2006-03-23 01:19:30 +01:00
|
|
|
(errcode(ERRCODE_UNDEFINED_COLUMN),
|
|
|
|
errmsg("column \"%s\" of relation \"%s\" does not exist",
|
2006-08-31 01:34:22 +02:00
|
|
|
name, RelationGetRelationName(rel))));
|
|
|
|
else
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_UNDEFINED_COLUMN),
|
|
|
|
errmsg("column \"%s\" does not exist",
|
|
|
|
name)));
|
|
|
|
}
|
2002-08-02 20:15:10 +02:00
|
|
|
/* Check for duplicates */
|
2004-05-26 06:41:50 +02:00
|
|
|
if (list_member_int(attnums, attnum))
|
2003-07-20 23:56:35 +02:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_DUPLICATE_COLUMN),
|
2003-09-25 08:58:07 +02:00
|
|
|
errmsg("column \"%s\" specified more than once",
|
2003-07-20 23:56:35 +02:00
|
|
|
name)));
|
2004-05-26 06:41:50 +02:00
|
|
|
attnums = lappend_int(attnums, attnum);
|
2002-07-30 18:55:06 +02:00
|
|
|
}
|
|
|
|
}
|
2002-07-18 06:43:51 +02:00
|
|
|
|
2002-08-02 20:15:10 +02:00
|
|
|
return attnums;
|
|
|
|
}
|