postgresql/src/backend/commands/copy.c

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

725 lines
22 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* copy.c
* Implements the COPY utility command
*
* Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
2010-09-20 22:08:53 +02:00
* src/backend/commands/copy.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <ctype.h>
1996-11-06 09:21:43 +01:00
#include <unistd.h>
1999-07-16 07:00:38 +02:00
#include <sys/stat.h>
1996-11-06 09:21:43 +01:00
#include "access/sysattr.h"
#include "access/table.h"
#include "access/xact.h"
#include "catalog/pg_authid.h"
#include "commands/copy.h"
#include "commands/defrem.h"
1999-07-16 07:00:38 +02:00
#include "executor/executor.h"
#include "mb/pg_wchar.h"
1999-07-16 07:00:38 +02:00
#include "miscadmin.h"
Row-Level Security Policies (RLS) Building on the updatable security-barrier views work, add the ability to define policies on tables to limit the set of rows which are returned from a query and which are allowed to be added to a table. Expressions defined by the policy for filtering are added to the security barrier quals of the query, while expressions defined to check records being added to a table are added to the with-check options of the query. New top-level commands are CREATE/ALTER/DROP POLICY and are controlled by the table owner. Row Security is able to be enabled and disabled by the owner on a per-table basis using ALTER TABLE .. ENABLE/DISABLE ROW SECURITY. Per discussion, ROW SECURITY is disabled on tables by default and must be enabled for policies on the table to be used. If no policies exist on a table with ROW SECURITY enabled, a default-deny policy is used and no records will be visible. By default, row security is applied at all times except for the table owner and the superuser. A new GUC, row_security, is added which can be set to ON, OFF, or FORCE. When set to FORCE, row security will be applied even for the table owner and superusers. When set to OFF, row security will be disabled when allowed and an error will be thrown if the user does not have rights to bypass row security. Per discussion, pg_dump sets row_security = OFF by default to ensure that exports and backups will have all data in the table or will error if there are insufficient privileges to bypass row security. A new option has been added to pg_dump, --enable-row-security, to ask pg_dump to export with row security enabled. A new role capability, BYPASSRLS, which can only be set by the superuser, is added to allow other users to be able to bypass row security using row_security = OFF. Many thanks to the various individuals who have helped with the design, particularly Robert Haas for his feedback. Authors include Craig Ringer, KaiGai Kohei, Adam Brightwell, Dean Rasheed, with additional changes and rework by me. Reviewers have included all of the above, Greg Smith, Jeff McCormick, and Robert Haas.
2014-09-19 17:18:35 +02:00
#include "nodes/makefuncs.h"
#include "optimizer/optimizer.h"
#include "parser/parse_coerce.h"
#include "parser/parse_collate.h"
#include "parser/parse_expr.h"
#include "parser/parse_relation.h"
#include "rewrite/rewriteHandler.h"
#include "utils/acl.h"
1999-07-16 07:00:38 +02:00
#include "utils/builtins.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
#include "utils/rel.h"
#include "utils/rls.h"
/*
* DoCopy executes the SQL COPY statement
*
* Either unload or reload contents of table <relation>, depending on <from>.
* (<from> = true means we are inserting into the table.) In the "TO" case
* we also support copying the output of an arbitrary SELECT, INSERT, UPDATE
* or DELETE query.
*
* If <pipe> is false, transfer is between the table and the file named
* <filename>. Otherwise, transfer is between the table and our regular
* input/output stream. The latter could be either stdin/stdout or a
* socket, depending on whether we're running under Postmaster control.
*
* Do not allow a Postgres user without the 'pg_read_server_files' or
* 'pg_write_server_files' role to read from or write to a file.
*
* Do not allow the copy if user doesn't have proper permission to access
* the table or the specifically requested columns.
*/
Change representation of statement lists, and add statement location info. This patch makes several changes that improve the consistency of representation of lists of statements. It's always been the case that the output of parse analysis is a list of Query nodes, whatever the types of the individual statements in the list. This patch brings similar consistency to the outputs of raw parsing and planning steps: * The output of raw parsing is now always a list of RawStmt nodes; the statement-type-dependent nodes are one level down from that. * The output of pg_plan_queries() is now always a list of PlannedStmt nodes, even for utility statements. In the case of a utility statement, "planning" just consists of wrapping a CMD_UTILITY PlannedStmt around the utility node. This list representation is now used in Portal and CachedPlan plan lists, replacing the former convention of intermixing PlannedStmts with bare utility-statement nodes. Now, every list of statements has a consistent head-node type depending on how far along it is in processing. This allows changing many places that formerly used generic "Node *" pointers to use a more specific pointer type, thus reducing the number of IsA() tests and casts needed, as well as improving code clarity. Also, the post-parse-analysis representation of DECLARE CURSOR is changed so that it looks more like EXPLAIN, PREPARE, etc. That is, the contained SELECT remains a child of the DeclareCursorStmt rather than getting flipped around to be the other way. It's now true for both Query and PlannedStmt that utilityStmt is non-null if and only if commandType is CMD_UTILITY. That allows simplifying a lot of places that were testing both fields. (I think some of those were just defensive programming, but in many places, it was actually necessary to avoid confusing DECLARE CURSOR with SELECT.) Because PlannedStmt carries a canSetTag field, we're also able to get rid of some ad-hoc rules about how to reconstruct canSetTag for a bare utility statement; specifically, the assumption that a utility is canSetTag if and only if it's the only one in its list. While I see no near-term need for relaxing that restriction, it's nice to get rid of the ad-hocery. The API of ProcessUtility() is changed so that what it's passed is the wrapper PlannedStmt not just the bare utility statement. This will affect all users of ProcessUtility_hook, but the changes are pretty trivial; see the affected contrib modules for examples of the minimum change needed. (Most compilers should give pointer-type-mismatch warnings for uncorrected code.) There's also a change in the API of ExplainOneQuery_hook, to pass through cursorOptions instead of expecting hook functions to know what to pick. This is needed because of the DECLARE CURSOR changes, but really should have been done in 9.6; it's unlikely that any extant hook functions know about using CURSOR_OPT_PARALLEL_OK. Finally, teach gram.y to save statement boundary locations in RawStmt nodes, and pass those through to Query and PlannedStmt nodes. This allows more intelligent handling of cases where a source query string contains multiple statements. This patch doesn't actually do anything with the information, but a follow-on patch will. (Passing this information through cleanly is the true motivation for these changes; while I think this is all good cleanup, it's unlikely we'd have bothered without this end goal.) catversion bump because addition of location fields to struct Query affects stored rules. This patch is by me, but it owes a good deal to Fabien Coelho who did a lot of preliminary work on the problem, and also reviewed the patch. Discussion: https://postgr.es/m/alpine.DEB.2.20.1612200926310.29821@lancre
2017-01-14 22:02:35 +01:00
void
DoCopy(ParseState *pstate, const CopyStmt *stmt,
int stmt_location, int stmt_len,
uint64 *processed)
{
bool is_from = stmt->is_from;
bool pipe = (stmt->filename == NULL);
Relation rel;
Oid relid;
Change representation of statement lists, and add statement location info. This patch makes several changes that improve the consistency of representation of lists of statements. It's always been the case that the output of parse analysis is a list of Query nodes, whatever the types of the individual statements in the list. This patch brings similar consistency to the outputs of raw parsing and planning steps: * The output of raw parsing is now always a list of RawStmt nodes; the statement-type-dependent nodes are one level down from that. * The output of pg_plan_queries() is now always a list of PlannedStmt nodes, even for utility statements. In the case of a utility statement, "planning" just consists of wrapping a CMD_UTILITY PlannedStmt around the utility node. This list representation is now used in Portal and CachedPlan plan lists, replacing the former convention of intermixing PlannedStmts with bare utility-statement nodes. Now, every list of statements has a consistent head-node type depending on how far along it is in processing. This allows changing many places that formerly used generic "Node *" pointers to use a more specific pointer type, thus reducing the number of IsA() tests and casts needed, as well as improving code clarity. Also, the post-parse-analysis representation of DECLARE CURSOR is changed so that it looks more like EXPLAIN, PREPARE, etc. That is, the contained SELECT remains a child of the DeclareCursorStmt rather than getting flipped around to be the other way. It's now true for both Query and PlannedStmt that utilityStmt is non-null if and only if commandType is CMD_UTILITY. That allows simplifying a lot of places that were testing both fields. (I think some of those were just defensive programming, but in many places, it was actually necessary to avoid confusing DECLARE CURSOR with SELECT.) Because PlannedStmt carries a canSetTag field, we're also able to get rid of some ad-hoc rules about how to reconstruct canSetTag for a bare utility statement; specifically, the assumption that a utility is canSetTag if and only if it's the only one in its list. While I see no near-term need for relaxing that restriction, it's nice to get rid of the ad-hocery. The API of ProcessUtility() is changed so that what it's passed is the wrapper PlannedStmt not just the bare utility statement. This will affect all users of ProcessUtility_hook, but the changes are pretty trivial; see the affected contrib modules for examples of the minimum change needed. (Most compilers should give pointer-type-mismatch warnings for uncorrected code.) There's also a change in the API of ExplainOneQuery_hook, to pass through cursorOptions instead of expecting hook functions to know what to pick. This is needed because of the DECLARE CURSOR changes, but really should have been done in 9.6; it's unlikely that any extant hook functions know about using CURSOR_OPT_PARALLEL_OK. Finally, teach gram.y to save statement boundary locations in RawStmt nodes, and pass those through to Query and PlannedStmt nodes. This allows more intelligent handling of cases where a source query string contains multiple statements. This patch doesn't actually do anything with the information, but a follow-on patch will. (Passing this information through cleanly is the true motivation for these changes; while I think this is all good cleanup, it's unlikely we'd have bothered without this end goal.) catversion bump because addition of location fields to struct Query affects stored rules. This patch is by me, but it owes a good deal to Fabien Coelho who did a lot of preliminary work on the problem, and also reviewed the patch. Discussion: https://postgr.es/m/alpine.DEB.2.20.1612200926310.29821@lancre
2017-01-14 22:02:35 +01:00
RawStmt *query = NULL;
Node *whereClause = NULL;
/*
* Disallow COPY to/from file or program except to users with the
* appropriate role.
*/
if (!pipe)
{
if (stmt->is_program)
{
if (!is_member_of_role(GetUserId(), ROLE_PG_EXECUTE_SERVER_PROGRAM))
ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
errmsg("must be superuser or a member of the pg_execute_server_program role to COPY to or from an external program"),
errhint("Anyone can COPY to stdout or from stdin. "
"psql's \\copy command also works for anyone.")));
}
else
{
if (is_from && !is_member_of_role(GetUserId(), ROLE_PG_READ_SERVER_FILES))
ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
errmsg("must be superuser or a member of the pg_read_server_files role to COPY from a file"),
errhint("Anyone can COPY to stdout or from stdin. "
"psql's \\copy command also works for anyone.")));
if (!is_from && !is_member_of_role(GetUserId(), ROLE_PG_WRITE_SERVER_FILES))
ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
errmsg("must be superuser or a member of the pg_write_server_files role to COPY to a file"),
errhint("Anyone can COPY to stdout or from stdin. "
"psql's \\copy command also works for anyone.")));
}
}
if (stmt->relation)
{
LOCKMODE lockmode = is_from ? RowExclusiveLock : AccessShareLock;
Make parser rely more heavily on the ParseNamespaceItem data structure. When I added the ParseNamespaceItem data structure (in commit 5ebaaa494), it wasn't very tightly integrated into the parser's APIs. In the wake of adding p_rtindex to that struct (commit b541e9acc), there is a good reason to make more use of it: by passing around ParseNamespaceItem pointers instead of bare RTE pointers, we can get rid of various messy methods for passing back or deducing the rangetable index of an RTE during parsing. Hence, refactor the addRangeTableEntryXXX functions to build and return a ParseNamespaceItem struct, not just the RTE proper; and replace addRTEtoQuery with addNSItemToQuery, which is passed a ParseNamespaceItem rather than building one internally. Also, add per-column data (a ParseNamespaceColumn array) to each ParseNamespaceItem. These arrays are built during addRangeTableEntryXXX, where we have column type data at hand so that it's nearly free to fill the data structure. Later, when we need to build Vars referencing RTEs, we can use the ParseNamespaceColumn info to avoid the rather expensive operations done in get_rte_attribute_type() or expandRTE(). get_rte_attribute_type() is indeed dead code now, so I've removed it. This makes for a useful improvement in parse analysis speed, around 20% in one moderately-complex test query. The ParseNamespaceColumn structs also include Var identity information (varno/varattno). That info isn't actually being used in this patch, except that p_varno == 0 is a handy test for a dropped column. A follow-on patch will make more use of it. Discussion: https://postgr.es/m/2461.1577764221@sss.pgh.pa.us
2020-01-02 17:29:01 +01:00
ParseNamespaceItem *nsitem;
RangeTblEntry *rte;
TupleDesc tupDesc;
List *attnums;
ListCell *cur;
Assert(!stmt->query);
/* Open and lock the relation, using the appropriate lock type. */
rel = table_openrv(stmt->relation, lockmode);
relid = RelationGetRelid(rel);
Make parser rely more heavily on the ParseNamespaceItem data structure. When I added the ParseNamespaceItem data structure (in commit 5ebaaa494), it wasn't very tightly integrated into the parser's APIs. In the wake of adding p_rtindex to that struct (commit b541e9acc), there is a good reason to make more use of it: by passing around ParseNamespaceItem pointers instead of bare RTE pointers, we can get rid of various messy methods for passing back or deducing the rangetable index of an RTE during parsing. Hence, refactor the addRangeTableEntryXXX functions to build and return a ParseNamespaceItem struct, not just the RTE proper; and replace addRTEtoQuery with addNSItemToQuery, which is passed a ParseNamespaceItem rather than building one internally. Also, add per-column data (a ParseNamespaceColumn array) to each ParseNamespaceItem. These arrays are built during addRangeTableEntryXXX, where we have column type data at hand so that it's nearly free to fill the data structure. Later, when we need to build Vars referencing RTEs, we can use the ParseNamespaceColumn info to avoid the rather expensive operations done in get_rte_attribute_type() or expandRTE(). get_rte_attribute_type() is indeed dead code now, so I've removed it. This makes for a useful improvement in parse analysis speed, around 20% in one moderately-complex test query. The ParseNamespaceColumn structs also include Var identity information (varno/varattno). That info isn't actually being used in this patch, except that p_varno == 0 is a handy test for a dropped column. A follow-on patch will make more use of it. Discussion: https://postgr.es/m/2461.1577764221@sss.pgh.pa.us
2020-01-02 17:29:01 +01:00
nsitem = addRangeTableEntryForRelation(pstate, rel, lockmode,
NULL, false, false);
rte = nsitem->p_rte;
rte->requiredPerms = (is_from ? ACL_INSERT : ACL_SELECT);
if (stmt->whereClause)
{
Make parser rely more heavily on the ParseNamespaceItem data structure. When I added the ParseNamespaceItem data structure (in commit 5ebaaa494), it wasn't very tightly integrated into the parser's APIs. In the wake of adding p_rtindex to that struct (commit b541e9acc), there is a good reason to make more use of it: by passing around ParseNamespaceItem pointers instead of bare RTE pointers, we can get rid of various messy methods for passing back or deducing the rangetable index of an RTE during parsing. Hence, refactor the addRangeTableEntryXXX functions to build and return a ParseNamespaceItem struct, not just the RTE proper; and replace addRTEtoQuery with addNSItemToQuery, which is passed a ParseNamespaceItem rather than building one internally. Also, add per-column data (a ParseNamespaceColumn array) to each ParseNamespaceItem. These arrays are built during addRangeTableEntryXXX, where we have column type data at hand so that it's nearly free to fill the data structure. Later, when we need to build Vars referencing RTEs, we can use the ParseNamespaceColumn info to avoid the rather expensive operations done in get_rte_attribute_type() or expandRTE(). get_rte_attribute_type() is indeed dead code now, so I've removed it. This makes for a useful improvement in parse analysis speed, around 20% in one moderately-complex test query. The ParseNamespaceColumn structs also include Var identity information (varno/varattno). That info isn't actually being used in this patch, except that p_varno == 0 is a handy test for a dropped column. A follow-on patch will make more use of it. Discussion: https://postgr.es/m/2461.1577764221@sss.pgh.pa.us
2020-01-02 17:29:01 +01:00
/* add nsitem to query namespace */
addNSItemToQuery(pstate, nsitem, false, true, true);
/* Transform the raw expression tree */
whereClause = transformExpr(pstate, stmt->whereClause, EXPR_KIND_COPY_WHERE);
/* Make sure it yields a boolean result. */
whereClause = coerce_to_boolean(pstate, whereClause, "WHERE");
/* we have to fix its collations too */
assign_expr_collations(pstate, whereClause);
whereClause = eval_const_expressions(NULL, whereClause);
whereClause = (Node *) canonicalize_qual((Expr *) whereClause, false);
whereClause = (Node *) make_ands_implicit((Expr *) whereClause);
}
tupDesc = RelationGetDescr(rel);
attnums = CopyGetAttnums(tupDesc, rel, stmt->attlist);
foreach(cur, attnums)
{
int attno = lfirst_int(cur) -
FirstLowInvalidHeapAttributeNumber;
if (is_from)
rte->insertedCols = bms_add_member(rte->insertedCols, attno);
else
rte->selectedCols = bms_add_member(rte->selectedCols, attno);
}
ExecCheckRTPerms(pstate->p_rtable, true);
Row-Level Security Policies (RLS) Building on the updatable security-barrier views work, add the ability to define policies on tables to limit the set of rows which are returned from a query and which are allowed to be added to a table. Expressions defined by the policy for filtering are added to the security barrier quals of the query, while expressions defined to check records being added to a table are added to the with-check options of the query. New top-level commands are CREATE/ALTER/DROP POLICY and are controlled by the table owner. Row Security is able to be enabled and disabled by the owner on a per-table basis using ALTER TABLE .. ENABLE/DISABLE ROW SECURITY. Per discussion, ROW SECURITY is disabled on tables by default and must be enabled for policies on the table to be used. If no policies exist on a table with ROW SECURITY enabled, a default-deny policy is used and no records will be visible. By default, row security is applied at all times except for the table owner and the superuser. A new GUC, row_security, is added which can be set to ON, OFF, or FORCE. When set to FORCE, row security will be applied even for the table owner and superusers. When set to OFF, row security will be disabled when allowed and an error will be thrown if the user does not have rights to bypass row security. Per discussion, pg_dump sets row_security = OFF by default to ensure that exports and backups will have all data in the table or will error if there are insufficient privileges to bypass row security. A new option has been added to pg_dump, --enable-row-security, to ask pg_dump to export with row security enabled. A new role capability, BYPASSRLS, which can only be set by the superuser, is added to allow other users to be able to bypass row security using row_security = OFF. Many thanks to the various individuals who have helped with the design, particularly Robert Haas for his feedback. Authors include Craig Ringer, KaiGai Kohei, Adam Brightwell, Dean Rasheed, with additional changes and rework by me. Reviewers have included all of the above, Greg Smith, Jeff McCormick, and Robert Haas.
2014-09-19 17:18:35 +02:00
/*
* Permission check for row security policies.
Row-Level Security Policies (RLS) Building on the updatable security-barrier views work, add the ability to define policies on tables to limit the set of rows which are returned from a query and which are allowed to be added to a table. Expressions defined by the policy for filtering are added to the security barrier quals of the query, while expressions defined to check records being added to a table are added to the with-check options of the query. New top-level commands are CREATE/ALTER/DROP POLICY and are controlled by the table owner. Row Security is able to be enabled and disabled by the owner on a per-table basis using ALTER TABLE .. ENABLE/DISABLE ROW SECURITY. Per discussion, ROW SECURITY is disabled on tables by default and must be enabled for policies on the table to be used. If no policies exist on a table with ROW SECURITY enabled, a default-deny policy is used and no records will be visible. By default, row security is applied at all times except for the table owner and the superuser. A new GUC, row_security, is added which can be set to ON, OFF, or FORCE. When set to FORCE, row security will be applied even for the table owner and superusers. When set to OFF, row security will be disabled when allowed and an error will be thrown if the user does not have rights to bypass row security. Per discussion, pg_dump sets row_security = OFF by default to ensure that exports and backups will have all data in the table or will error if there are insufficient privileges to bypass row security. A new option has been added to pg_dump, --enable-row-security, to ask pg_dump to export with row security enabled. A new role capability, BYPASSRLS, which can only be set by the superuser, is added to allow other users to be able to bypass row security using row_security = OFF. Many thanks to the various individuals who have helped with the design, particularly Robert Haas for his feedback. Authors include Craig Ringer, KaiGai Kohei, Adam Brightwell, Dean Rasheed, with additional changes and rework by me. Reviewers have included all of the above, Greg Smith, Jeff McCormick, and Robert Haas.
2014-09-19 17:18:35 +02:00
*
* check_enable_rls will ereport(ERROR) if the user has requested
* something invalid and will otherwise indicate if we should enable
* RLS (returns RLS_ENABLED) or not for this COPY statement.
*
* If the relation has a row security policy and we are to apply it
* then perform a "query" copy and allow the normal query processing
* to handle the policies.
*
* If RLS is not enabled for this, then just fall through to the
* normal non-filtering relation handling.
*/
if (check_enable_rls(rte->relid, InvalidOid, false) == RLS_ENABLED)
Row-Level Security Policies (RLS) Building on the updatable security-barrier views work, add the ability to define policies on tables to limit the set of rows which are returned from a query and which are allowed to be added to a table. Expressions defined by the policy for filtering are added to the security barrier quals of the query, while expressions defined to check records being added to a table are added to the with-check options of the query. New top-level commands are CREATE/ALTER/DROP POLICY and are controlled by the table owner. Row Security is able to be enabled and disabled by the owner on a per-table basis using ALTER TABLE .. ENABLE/DISABLE ROW SECURITY. Per discussion, ROW SECURITY is disabled on tables by default and must be enabled for policies on the table to be used. If no policies exist on a table with ROW SECURITY enabled, a default-deny policy is used and no records will be visible. By default, row security is applied at all times except for the table owner and the superuser. A new GUC, row_security, is added which can be set to ON, OFF, or FORCE. When set to FORCE, row security will be applied even for the table owner and superusers. When set to OFF, row security will be disabled when allowed and an error will be thrown if the user does not have rights to bypass row security. Per discussion, pg_dump sets row_security = OFF by default to ensure that exports and backups will have all data in the table or will error if there are insufficient privileges to bypass row security. A new option has been added to pg_dump, --enable-row-security, to ask pg_dump to export with row security enabled. A new role capability, BYPASSRLS, which can only be set by the superuser, is added to allow other users to be able to bypass row security using row_security = OFF. Many thanks to the various individuals who have helped with the design, particularly Robert Haas for his feedback. Authors include Craig Ringer, KaiGai Kohei, Adam Brightwell, Dean Rasheed, with additional changes and rework by me. Reviewers have included all of the above, Greg Smith, Jeff McCormick, and Robert Haas.
2014-09-19 17:18:35 +02:00
{
SelectStmt *select;
ColumnRef *cr;
ResTarget *target;
RangeVar *from;
List *targetList = NIL;
Row-Level Security Policies (RLS) Building on the updatable security-barrier views work, add the ability to define policies on tables to limit the set of rows which are returned from a query and which are allowed to be added to a table. Expressions defined by the policy for filtering are added to the security barrier quals of the query, while expressions defined to check records being added to a table are added to the with-check options of the query. New top-level commands are CREATE/ALTER/DROP POLICY and are controlled by the table owner. Row Security is able to be enabled and disabled by the owner on a per-table basis using ALTER TABLE .. ENABLE/DISABLE ROW SECURITY. Per discussion, ROW SECURITY is disabled on tables by default and must be enabled for policies on the table to be used. If no policies exist on a table with ROW SECURITY enabled, a default-deny policy is used and no records will be visible. By default, row security is applied at all times except for the table owner and the superuser. A new GUC, row_security, is added which can be set to ON, OFF, or FORCE. When set to FORCE, row security will be applied even for the table owner and superusers. When set to OFF, row security will be disabled when allowed and an error will be thrown if the user does not have rights to bypass row security. Per discussion, pg_dump sets row_security = OFF by default to ensure that exports and backups will have all data in the table or will error if there are insufficient privileges to bypass row security. A new option has been added to pg_dump, --enable-row-security, to ask pg_dump to export with row security enabled. A new role capability, BYPASSRLS, which can only be set by the superuser, is added to allow other users to be able to bypass row security using row_security = OFF. Many thanks to the various individuals who have helped with the design, particularly Robert Haas for his feedback. Authors include Craig Ringer, KaiGai Kohei, Adam Brightwell, Dean Rasheed, with additional changes and rework by me. Reviewers have included all of the above, Greg Smith, Jeff McCormick, and Robert Haas.
2014-09-19 17:18:35 +02:00
if (is_from)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2015-11-17 12:53:07 +01:00
errmsg("COPY FROM not supported with row-level security"),
errhint("Use INSERT statements instead.")));
Row-Level Security Policies (RLS) Building on the updatable security-barrier views work, add the ability to define policies on tables to limit the set of rows which are returned from a query and which are allowed to be added to a table. Expressions defined by the policy for filtering are added to the security barrier quals of the query, while expressions defined to check records being added to a table are added to the with-check options of the query. New top-level commands are CREATE/ALTER/DROP POLICY and are controlled by the table owner. Row Security is able to be enabled and disabled by the owner on a per-table basis using ALTER TABLE .. ENABLE/DISABLE ROW SECURITY. Per discussion, ROW SECURITY is disabled on tables by default and must be enabled for policies on the table to be used. If no policies exist on a table with ROW SECURITY enabled, a default-deny policy is used and no records will be visible. By default, row security is applied at all times except for the table owner and the superuser. A new GUC, row_security, is added which can be set to ON, OFF, or FORCE. When set to FORCE, row security will be applied even for the table owner and superusers. When set to OFF, row security will be disabled when allowed and an error will be thrown if the user does not have rights to bypass row security. Per discussion, pg_dump sets row_security = OFF by default to ensure that exports and backups will have all data in the table or will error if there are insufficient privileges to bypass row security. A new option has been added to pg_dump, --enable-row-security, to ask pg_dump to export with row security enabled. A new role capability, BYPASSRLS, which can only be set by the superuser, is added to allow other users to be able to bypass row security using row_security = OFF. Many thanks to the various individuals who have helped with the design, particularly Robert Haas for his feedback. Authors include Craig Ringer, KaiGai Kohei, Adam Brightwell, Dean Rasheed, with additional changes and rework by me. Reviewers have included all of the above, Greg Smith, Jeff McCormick, and Robert Haas.
2014-09-19 17:18:35 +02:00
/*
* Build target list
*
* If no columns are specified in the attribute list of the COPY
* command, then the target list is 'all' columns. Therefore, '*'
* should be used as the target list for the resulting SELECT
* statement.
*
* In the case that columns are specified in the attribute list,
* create a ColumnRef and ResTarget for each column and add them
* to the target list for the resulting SELECT statement.
*/
Row-Level Security Policies (RLS) Building on the updatable security-barrier views work, add the ability to define policies on tables to limit the set of rows which are returned from a query and which are allowed to be added to a table. Expressions defined by the policy for filtering are added to the security barrier quals of the query, while expressions defined to check records being added to a table are added to the with-check options of the query. New top-level commands are CREATE/ALTER/DROP POLICY and are controlled by the table owner. Row Security is able to be enabled and disabled by the owner on a per-table basis using ALTER TABLE .. ENABLE/DISABLE ROW SECURITY. Per discussion, ROW SECURITY is disabled on tables by default and must be enabled for policies on the table to be used. If no policies exist on a table with ROW SECURITY enabled, a default-deny policy is used and no records will be visible. By default, row security is applied at all times except for the table owner and the superuser. A new GUC, row_security, is added which can be set to ON, OFF, or FORCE. When set to FORCE, row security will be applied even for the table owner and superusers. When set to OFF, row security will be disabled when allowed and an error will be thrown if the user does not have rights to bypass row security. Per discussion, pg_dump sets row_security = OFF by default to ensure that exports and backups will have all data in the table or will error if there are insufficient privileges to bypass row security. A new option has been added to pg_dump, --enable-row-security, to ask pg_dump to export with row security enabled. A new role capability, BYPASSRLS, which can only be set by the superuser, is added to allow other users to be able to bypass row security using row_security = OFF. Many thanks to the various individuals who have helped with the design, particularly Robert Haas for his feedback. Authors include Craig Ringer, KaiGai Kohei, Adam Brightwell, Dean Rasheed, with additional changes and rework by me. Reviewers have included all of the above, Greg Smith, Jeff McCormick, and Robert Haas.
2014-09-19 17:18:35 +02:00
if (!stmt->attlist)
{
cr = makeNode(ColumnRef);
Row-Level Security Policies (RLS) Building on the updatable security-barrier views work, add the ability to define policies on tables to limit the set of rows which are returned from a query and which are allowed to be added to a table. Expressions defined by the policy for filtering are added to the security barrier quals of the query, while expressions defined to check records being added to a table are added to the with-check options of the query. New top-level commands are CREATE/ALTER/DROP POLICY and are controlled by the table owner. Row Security is able to be enabled and disabled by the owner on a per-table basis using ALTER TABLE .. ENABLE/DISABLE ROW SECURITY. Per discussion, ROW SECURITY is disabled on tables by default and must be enabled for policies on the table to be used. If no policies exist on a table with ROW SECURITY enabled, a default-deny policy is used and no records will be visible. By default, row security is applied at all times except for the table owner and the superuser. A new GUC, row_security, is added which can be set to ON, OFF, or FORCE. When set to FORCE, row security will be applied even for the table owner and superusers. When set to OFF, row security will be disabled when allowed and an error will be thrown if the user does not have rights to bypass row security. Per discussion, pg_dump sets row_security = OFF by default to ensure that exports and backups will have all data in the table or will error if there are insufficient privileges to bypass row security. A new option has been added to pg_dump, --enable-row-security, to ask pg_dump to export with row security enabled. A new role capability, BYPASSRLS, which can only be set by the superuser, is added to allow other users to be able to bypass row security using row_security = OFF. Many thanks to the various individuals who have helped with the design, particularly Robert Haas for his feedback. Authors include Craig Ringer, KaiGai Kohei, Adam Brightwell, Dean Rasheed, with additional changes and rework by me. Reviewers have included all of the above, Greg Smith, Jeff McCormick, and Robert Haas.
2014-09-19 17:18:35 +02:00
cr->fields = list_make1(makeNode(A_Star));
cr->location = -1;
target = makeNode(ResTarget);
target->name = NULL;
target->indirection = NIL;
target->val = (Node *) cr;
target->location = -1;
Row-Level Security Policies (RLS) Building on the updatable security-barrier views work, add the ability to define policies on tables to limit the set of rows which are returned from a query and which are allowed to be added to a table. Expressions defined by the policy for filtering are added to the security barrier quals of the query, while expressions defined to check records being added to a table are added to the with-check options of the query. New top-level commands are CREATE/ALTER/DROP POLICY and are controlled by the table owner. Row Security is able to be enabled and disabled by the owner on a per-table basis using ALTER TABLE .. ENABLE/DISABLE ROW SECURITY. Per discussion, ROW SECURITY is disabled on tables by default and must be enabled for policies on the table to be used. If no policies exist on a table with ROW SECURITY enabled, a default-deny policy is used and no records will be visible. By default, row security is applied at all times except for the table owner and the superuser. A new GUC, row_security, is added which can be set to ON, OFF, or FORCE. When set to FORCE, row security will be applied even for the table owner and superusers. When set to OFF, row security will be disabled when allowed and an error will be thrown if the user does not have rights to bypass row security. Per discussion, pg_dump sets row_security = OFF by default to ensure that exports and backups will have all data in the table or will error if there are insufficient privileges to bypass row security. A new option has been added to pg_dump, --enable-row-security, to ask pg_dump to export with row security enabled. A new role capability, BYPASSRLS, which can only be set by the superuser, is added to allow other users to be able to bypass row security using row_security = OFF. Many thanks to the various individuals who have helped with the design, particularly Robert Haas for his feedback. Authors include Craig Ringer, KaiGai Kohei, Adam Brightwell, Dean Rasheed, with additional changes and rework by me. Reviewers have included all of the above, Greg Smith, Jeff McCormick, and Robert Haas.
2014-09-19 17:18:35 +02:00
targetList = list_make1(target);
}
else
{
ListCell *lc;
Row-Level Security Policies (RLS) Building on the updatable security-barrier views work, add the ability to define policies on tables to limit the set of rows which are returned from a query and which are allowed to be added to a table. Expressions defined by the policy for filtering are added to the security barrier quals of the query, while expressions defined to check records being added to a table are added to the with-check options of the query. New top-level commands are CREATE/ALTER/DROP POLICY and are controlled by the table owner. Row Security is able to be enabled and disabled by the owner on a per-table basis using ALTER TABLE .. ENABLE/DISABLE ROW SECURITY. Per discussion, ROW SECURITY is disabled on tables by default and must be enabled for policies on the table to be used. If no policies exist on a table with ROW SECURITY enabled, a default-deny policy is used and no records will be visible. By default, row security is applied at all times except for the table owner and the superuser. A new GUC, row_security, is added which can be set to ON, OFF, or FORCE. When set to FORCE, row security will be applied even for the table owner and superusers. When set to OFF, row security will be disabled when allowed and an error will be thrown if the user does not have rights to bypass row security. Per discussion, pg_dump sets row_security = OFF by default to ensure that exports and backups will have all data in the table or will error if there are insufficient privileges to bypass row security. A new option has been added to pg_dump, --enable-row-security, to ask pg_dump to export with row security enabled. A new role capability, BYPASSRLS, which can only be set by the superuser, is added to allow other users to be able to bypass row security using row_security = OFF. Many thanks to the various individuals who have helped with the design, particularly Robert Haas for his feedback. Authors include Craig Ringer, KaiGai Kohei, Adam Brightwell, Dean Rasheed, with additional changes and rework by me. Reviewers have included all of the above, Greg Smith, Jeff McCormick, and Robert Haas.
2014-09-19 17:18:35 +02:00
foreach(lc, stmt->attlist)
{
/*
* Build the ColumnRef for each column. The ColumnRef
* 'fields' property is a String node that corresponds to
* the column name respectively.
*/
cr = makeNode(ColumnRef);
cr->fields = list_make1(lfirst(lc));
cr->location = -1;
/* Build the ResTarget and add the ColumnRef to it. */
target = makeNode(ResTarget);
target->name = NULL;
target->indirection = NIL;
target->val = (Node *) cr;
target->location = -1;
/* Add each column to the SELECT statement's target list */
targetList = lappend(targetList, target);
}
}
Row-Level Security Policies (RLS) Building on the updatable security-barrier views work, add the ability to define policies on tables to limit the set of rows which are returned from a query and which are allowed to be added to a table. Expressions defined by the policy for filtering are added to the security barrier quals of the query, while expressions defined to check records being added to a table are added to the with-check options of the query. New top-level commands are CREATE/ALTER/DROP POLICY and are controlled by the table owner. Row Security is able to be enabled and disabled by the owner on a per-table basis using ALTER TABLE .. ENABLE/DISABLE ROW SECURITY. Per discussion, ROW SECURITY is disabled on tables by default and must be enabled for policies on the table to be used. If no policies exist on a table with ROW SECURITY enabled, a default-deny policy is used and no records will be visible. By default, row security is applied at all times except for the table owner and the superuser. A new GUC, row_security, is added which can be set to ON, OFF, or FORCE. When set to FORCE, row security will be applied even for the table owner and superusers. When set to OFF, row security will be disabled when allowed and an error will be thrown if the user does not have rights to bypass row security. Per discussion, pg_dump sets row_security = OFF by default to ensure that exports and backups will have all data in the table or will error if there are insufficient privileges to bypass row security. A new option has been added to pg_dump, --enable-row-security, to ask pg_dump to export with row security enabled. A new role capability, BYPASSRLS, which can only be set by the superuser, is added to allow other users to be able to bypass row security using row_security = OFF. Many thanks to the various individuals who have helped with the design, particularly Robert Haas for his feedback. Authors include Craig Ringer, KaiGai Kohei, Adam Brightwell, Dean Rasheed, with additional changes and rework by me. Reviewers have included all of the above, Greg Smith, Jeff McCormick, and Robert Haas.
2014-09-19 17:18:35 +02:00
/*
* Build RangeVar for from clause, fully qualified based on the
* relation which we have opened and locked.
*/
from = makeRangeVar(get_namespace_name(RelationGetNamespace(rel)),
pstrdup(RelationGetRelationName(rel)),
-1);
Row-Level Security Policies (RLS) Building on the updatable security-barrier views work, add the ability to define policies on tables to limit the set of rows which are returned from a query and which are allowed to be added to a table. Expressions defined by the policy for filtering are added to the security barrier quals of the query, while expressions defined to check records being added to a table are added to the with-check options of the query. New top-level commands are CREATE/ALTER/DROP POLICY and are controlled by the table owner. Row Security is able to be enabled and disabled by the owner on a per-table basis using ALTER TABLE .. ENABLE/DISABLE ROW SECURITY. Per discussion, ROW SECURITY is disabled on tables by default and must be enabled for policies on the table to be used. If no policies exist on a table with ROW SECURITY enabled, a default-deny policy is used and no records will be visible. By default, row security is applied at all times except for the table owner and the superuser. A new GUC, row_security, is added which can be set to ON, OFF, or FORCE. When set to FORCE, row security will be applied even for the table owner and superusers. When set to OFF, row security will be disabled when allowed and an error will be thrown if the user does not have rights to bypass row security. Per discussion, pg_dump sets row_security = OFF by default to ensure that exports and backups will have all data in the table or will error if there are insufficient privileges to bypass row security. A new option has been added to pg_dump, --enable-row-security, to ask pg_dump to export with row security enabled. A new role capability, BYPASSRLS, which can only be set by the superuser, is added to allow other users to be able to bypass row security using row_security = OFF. Many thanks to the various individuals who have helped with the design, particularly Robert Haas for his feedback. Authors include Craig Ringer, KaiGai Kohei, Adam Brightwell, Dean Rasheed, with additional changes and rework by me. Reviewers have included all of the above, Greg Smith, Jeff McCormick, and Robert Haas.
2014-09-19 17:18:35 +02:00
/* Build query */
select = makeNode(SelectStmt);
select->targetList = targetList;
Row-Level Security Policies (RLS) Building on the updatable security-barrier views work, add the ability to define policies on tables to limit the set of rows which are returned from a query and which are allowed to be added to a table. Expressions defined by the policy for filtering are added to the security barrier quals of the query, while expressions defined to check records being added to a table are added to the with-check options of the query. New top-level commands are CREATE/ALTER/DROP POLICY and are controlled by the table owner. Row Security is able to be enabled and disabled by the owner on a per-table basis using ALTER TABLE .. ENABLE/DISABLE ROW SECURITY. Per discussion, ROW SECURITY is disabled on tables by default and must be enabled for policies on the table to be used. If no policies exist on a table with ROW SECURITY enabled, a default-deny policy is used and no records will be visible. By default, row security is applied at all times except for the table owner and the superuser. A new GUC, row_security, is added which can be set to ON, OFF, or FORCE. When set to FORCE, row security will be applied even for the table owner and superusers. When set to OFF, row security will be disabled when allowed and an error will be thrown if the user does not have rights to bypass row security. Per discussion, pg_dump sets row_security = OFF by default to ensure that exports and backups will have all data in the table or will error if there are insufficient privileges to bypass row security. A new option has been added to pg_dump, --enable-row-security, to ask pg_dump to export with row security enabled. A new role capability, BYPASSRLS, which can only be set by the superuser, is added to allow other users to be able to bypass row security using row_security = OFF. Many thanks to the various individuals who have helped with the design, particularly Robert Haas for his feedback. Authors include Craig Ringer, KaiGai Kohei, Adam Brightwell, Dean Rasheed, with additional changes and rework by me. Reviewers have included all of the above, Greg Smith, Jeff McCormick, and Robert Haas.
2014-09-19 17:18:35 +02:00
select->fromClause = list_make1(from);
Change representation of statement lists, and add statement location info. This patch makes several changes that improve the consistency of representation of lists of statements. It's always been the case that the output of parse analysis is a list of Query nodes, whatever the types of the individual statements in the list. This patch brings similar consistency to the outputs of raw parsing and planning steps: * The output of raw parsing is now always a list of RawStmt nodes; the statement-type-dependent nodes are one level down from that. * The output of pg_plan_queries() is now always a list of PlannedStmt nodes, even for utility statements. In the case of a utility statement, "planning" just consists of wrapping a CMD_UTILITY PlannedStmt around the utility node. This list representation is now used in Portal and CachedPlan plan lists, replacing the former convention of intermixing PlannedStmts with bare utility-statement nodes. Now, every list of statements has a consistent head-node type depending on how far along it is in processing. This allows changing many places that formerly used generic "Node *" pointers to use a more specific pointer type, thus reducing the number of IsA() tests and casts needed, as well as improving code clarity. Also, the post-parse-analysis representation of DECLARE CURSOR is changed so that it looks more like EXPLAIN, PREPARE, etc. That is, the contained SELECT remains a child of the DeclareCursorStmt rather than getting flipped around to be the other way. It's now true for both Query and PlannedStmt that utilityStmt is non-null if and only if commandType is CMD_UTILITY. That allows simplifying a lot of places that were testing both fields. (I think some of those were just defensive programming, but in many places, it was actually necessary to avoid confusing DECLARE CURSOR with SELECT.) Because PlannedStmt carries a canSetTag field, we're also able to get rid of some ad-hoc rules about how to reconstruct canSetTag for a bare utility statement; specifically, the assumption that a utility is canSetTag if and only if it's the only one in its list. While I see no near-term need for relaxing that restriction, it's nice to get rid of the ad-hocery. The API of ProcessUtility() is changed so that what it's passed is the wrapper PlannedStmt not just the bare utility statement. This will affect all users of ProcessUtility_hook, but the changes are pretty trivial; see the affected contrib modules for examples of the minimum change needed. (Most compilers should give pointer-type-mismatch warnings for uncorrected code.) There's also a change in the API of ExplainOneQuery_hook, to pass through cursorOptions instead of expecting hook functions to know what to pick. This is needed because of the DECLARE CURSOR changes, but really should have been done in 9.6; it's unlikely that any extant hook functions know about using CURSOR_OPT_PARALLEL_OK. Finally, teach gram.y to save statement boundary locations in RawStmt nodes, and pass those through to Query and PlannedStmt nodes. This allows more intelligent handling of cases where a source query string contains multiple statements. This patch doesn't actually do anything with the information, but a follow-on patch will. (Passing this information through cleanly is the true motivation for these changes; while I think this is all good cleanup, it's unlikely we'd have bothered without this end goal.) catversion bump because addition of location fields to struct Query affects stored rules. This patch is by me, but it owes a good deal to Fabien Coelho who did a lot of preliminary work on the problem, and also reviewed the patch. Discussion: https://postgr.es/m/alpine.DEB.2.20.1612200926310.29821@lancre
2017-01-14 22:02:35 +01:00
query = makeNode(RawStmt);
query->stmt = (Node *) select;
query->stmt_location = stmt_location;
query->stmt_len = stmt_len;
Row-Level Security Policies (RLS) Building on the updatable security-barrier views work, add the ability to define policies on tables to limit the set of rows which are returned from a query and which are allowed to be added to a table. Expressions defined by the policy for filtering are added to the security barrier quals of the query, while expressions defined to check records being added to a table are added to the with-check options of the query. New top-level commands are CREATE/ALTER/DROP POLICY and are controlled by the table owner. Row Security is able to be enabled and disabled by the owner on a per-table basis using ALTER TABLE .. ENABLE/DISABLE ROW SECURITY. Per discussion, ROW SECURITY is disabled on tables by default and must be enabled for policies on the table to be used. If no policies exist on a table with ROW SECURITY enabled, a default-deny policy is used and no records will be visible. By default, row security is applied at all times except for the table owner and the superuser. A new GUC, row_security, is added which can be set to ON, OFF, or FORCE. When set to FORCE, row security will be applied even for the table owner and superusers. When set to OFF, row security will be disabled when allowed and an error will be thrown if the user does not have rights to bypass row security. Per discussion, pg_dump sets row_security = OFF by default to ensure that exports and backups will have all data in the table or will error if there are insufficient privileges to bypass row security. A new option has been added to pg_dump, --enable-row-security, to ask pg_dump to export with row security enabled. A new role capability, BYPASSRLS, which can only be set by the superuser, is added to allow other users to be able to bypass row security using row_security = OFF. Many thanks to the various individuals who have helped with the design, particularly Robert Haas for his feedback. Authors include Craig Ringer, KaiGai Kohei, Adam Brightwell, Dean Rasheed, with additional changes and rework by me. Reviewers have included all of the above, Greg Smith, Jeff McCormick, and Robert Haas.
2014-09-19 17:18:35 +02:00
/*
* Close the relation for now, but keep the lock on it to prevent
* changes between now and when we start the query-based COPY.
*
* We'll reopen it later as part of the query-based COPY.
*/
table_close(rel, NoLock);
Row-Level Security Policies (RLS) Building on the updatable security-barrier views work, add the ability to define policies on tables to limit the set of rows which are returned from a query and which are allowed to be added to a table. Expressions defined by the policy for filtering are added to the security barrier quals of the query, while expressions defined to check records being added to a table are added to the with-check options of the query. New top-level commands are CREATE/ALTER/DROP POLICY and are controlled by the table owner. Row Security is able to be enabled and disabled by the owner on a per-table basis using ALTER TABLE .. ENABLE/DISABLE ROW SECURITY. Per discussion, ROW SECURITY is disabled on tables by default and must be enabled for policies on the table to be used. If no policies exist on a table with ROW SECURITY enabled, a default-deny policy is used and no records will be visible. By default, row security is applied at all times except for the table owner and the superuser. A new GUC, row_security, is added which can be set to ON, OFF, or FORCE. When set to FORCE, row security will be applied even for the table owner and superusers. When set to OFF, row security will be disabled when allowed and an error will be thrown if the user does not have rights to bypass row security. Per discussion, pg_dump sets row_security = OFF by default to ensure that exports and backups will have all data in the table or will error if there are insufficient privileges to bypass row security. A new option has been added to pg_dump, --enable-row-security, to ask pg_dump to export with row security enabled. A new role capability, BYPASSRLS, which can only be set by the superuser, is added to allow other users to be able to bypass row security using row_security = OFF. Many thanks to the various individuals who have helped with the design, particularly Robert Haas for his feedback. Authors include Craig Ringer, KaiGai Kohei, Adam Brightwell, Dean Rasheed, with additional changes and rework by me. Reviewers have included all of the above, Greg Smith, Jeff McCormick, and Robert Haas.
2014-09-19 17:18:35 +02:00
rel = NULL;
}
}
else
{
Assert(stmt->query);
Change representation of statement lists, and add statement location info. This patch makes several changes that improve the consistency of representation of lists of statements. It's always been the case that the output of parse analysis is a list of Query nodes, whatever the types of the individual statements in the list. This patch brings similar consistency to the outputs of raw parsing and planning steps: * The output of raw parsing is now always a list of RawStmt nodes; the statement-type-dependent nodes are one level down from that. * The output of pg_plan_queries() is now always a list of PlannedStmt nodes, even for utility statements. In the case of a utility statement, "planning" just consists of wrapping a CMD_UTILITY PlannedStmt around the utility node. This list representation is now used in Portal and CachedPlan plan lists, replacing the former convention of intermixing PlannedStmts with bare utility-statement nodes. Now, every list of statements has a consistent head-node type depending on how far along it is in processing. This allows changing many places that formerly used generic "Node *" pointers to use a more specific pointer type, thus reducing the number of IsA() tests and casts needed, as well as improving code clarity. Also, the post-parse-analysis representation of DECLARE CURSOR is changed so that it looks more like EXPLAIN, PREPARE, etc. That is, the contained SELECT remains a child of the DeclareCursorStmt rather than getting flipped around to be the other way. It's now true for both Query and PlannedStmt that utilityStmt is non-null if and only if commandType is CMD_UTILITY. That allows simplifying a lot of places that were testing both fields. (I think some of those were just defensive programming, but in many places, it was actually necessary to avoid confusing DECLARE CURSOR with SELECT.) Because PlannedStmt carries a canSetTag field, we're also able to get rid of some ad-hoc rules about how to reconstruct canSetTag for a bare utility statement; specifically, the assumption that a utility is canSetTag if and only if it's the only one in its list. While I see no near-term need for relaxing that restriction, it's nice to get rid of the ad-hocery. The API of ProcessUtility() is changed so that what it's passed is the wrapper PlannedStmt not just the bare utility statement. This will affect all users of ProcessUtility_hook, but the changes are pretty trivial; see the affected contrib modules for examples of the minimum change needed. (Most compilers should give pointer-type-mismatch warnings for uncorrected code.) There's also a change in the API of ExplainOneQuery_hook, to pass through cursorOptions instead of expecting hook functions to know what to pick. This is needed because of the DECLARE CURSOR changes, but really should have been done in 9.6; it's unlikely that any extant hook functions know about using CURSOR_OPT_PARALLEL_OK. Finally, teach gram.y to save statement boundary locations in RawStmt nodes, and pass those through to Query and PlannedStmt nodes. This allows more intelligent handling of cases where a source query string contains multiple statements. This patch doesn't actually do anything with the information, but a follow-on patch will. (Passing this information through cleanly is the true motivation for these changes; while I think this is all good cleanup, it's unlikely we'd have bothered without this end goal.) catversion bump because addition of location fields to struct Query affects stored rules. This patch is by me, but it owes a good deal to Fabien Coelho who did a lot of preliminary work on the problem, and also reviewed the patch. Discussion: https://postgr.es/m/alpine.DEB.2.20.1612200926310.29821@lancre
2017-01-14 22:02:35 +01:00
query = makeNode(RawStmt);
query->stmt = stmt->query;
query->stmt_location = stmt_location;
query->stmt_len = stmt_len;
relid = InvalidOid;
rel = NULL;
}
if (is_from)
{
CopyFromState cstate;
Assert(rel);
/* check read-only transaction and parallel mode */
if (XactReadOnly && !rel->rd_islocaltemp)
PreventCommandIfReadOnly("COPY FROM");
cstate = BeginCopyFrom(pstate, rel, whereClause,
stmt->filename, stmt->is_program,
NULL, stmt->attlist, stmt->options);
*processed = CopyFrom(cstate); /* copy from file to database */
EndCopyFrom(cstate);
}
else
{
CopyToState cstate;
cstate = BeginCopyTo(pstate, rel, query, relid,
stmt->filename, stmt->is_program,
stmt->attlist, stmt->options);
*processed = DoCopyTo(cstate); /* copy from database to file */
EndCopyTo(cstate);
}
if (rel != NULL)
table_close(rel, NoLock);
}
/*
* Process the statement option list for COPY.
*
* Scan the options list (a list of DefElem) and transpose the information
* into *opts_out, applying appropriate error checking.
*
* If 'opts_out' is not NULL, it is assumed to be filled with zeroes initially.
*
* This is exported so that external users of the COPY API can sanity-check
* a list of options. In that usage, 'opts_out' can be passed as NULL and
* the collected data is just leaked until CurrentMemoryContext is reset.
*
* Note that additional checking, such as whether column names listed in FORCE
* QUOTE actually exist, has to be applied later. This just checks for
* self-consistency of the options list.
*/
void
ProcessCopyOptions(ParseState *pstate,
CopyFormatOptions *opts_out,
bool is_from,
List *options)
{
bool format_specified = false;
bool freeze_specified = false;
bool header_specified = false;
ListCell *option;
/* Support external use for option sanity checking */
if (opts_out == NULL)
opts_out = (CopyFormatOptions *) palloc0(sizeof(CopyFormatOptions));
Handle EPIPE more sanely when we close a pipe reading from a program. Previously, any program launched by COPY TO/FROM PROGRAM inherited the server's setting of SIGPIPE handling, i.e. SIG_IGN. Hence, if we were doing COPY FROM PROGRAM and closed the pipe early, the child process would see EPIPE on its output file and typically would treat that as a fatal error, in turn causing the COPY to report error. Similarly, one could get a failure report from a query that didn't read all of the output from a contrib/file_fdw foreign table that uses file_fdw's PROGRAM option. To fix, ensure that child programs inherit SIG_DFL not SIG_IGN processing of SIGPIPE. This seems like an all-around better situation since if the called program wants some non-default treatment of SIGPIPE, it would expect to have to set that up for itself. Then in COPY, if it's COPY FROM PROGRAM and we stop reading short of detecting EOF, treat a SIGPIPE exit from the called program as a non-error condition. This still allows us to report an error for any case where the called program gets SIGPIPE on some other file descriptor. As coded, we won't report a SIGPIPE if we stop reading as a result of seeing an in-band EOF marker (e.g. COPY BINARY EOF marker). It's somewhat debatable whether we should complain if the called program continues to transmit data after an EOF marker. However, it seems like we should avoid throwing error in any questionable cases, especially in a back-patched fix, and anyway it would take additional code to make such an error get reported consistently. Back-patch to v10. We could go further back, since COPY FROM PROGRAM has been around awhile, but AFAICS the only way to reach this situation using core or contrib is via file_fdw, which has only supported PROGRAM sources since v10. The COPY statement per se has no feature whereby it'd stop reading without having hit EOF or an error already. Therefore, I don't see any upside to back-patching further that'd outweigh the risk of complaints about behavioral change. Per bug #15449 from Eric Cyr. Patch by me, review by Etsuro Fujita and Kyotaro Horiguchi Discussion: https://postgr.es/m/15449-1cf737dd5929450e@postgresql.org
2018-11-19 23:02:25 +01:00
opts_out->file_encoding = -1;
/* Extract options from the statement node tree */
foreach(option, options)
{
DefElem *defel = lfirst_node(DefElem, option);
if (strcmp(defel->defname, "format") == 0)
{
char *fmt = defGetString(defel);
if (format_specified)
errorConflictingDefElem(defel, pstate);
format_specified = true;
if (strcmp(fmt, "text") == 0)
/* default format */ ;
else if (strcmp(fmt, "csv") == 0)
opts_out->csv_mode = true;
else if (strcmp(fmt, "binary") == 0)
opts_out->binary = true;
else
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("COPY format \"%s\" not recognized", fmt),
parser_errposition(pstate, defel->location)));
}
else if (strcmp(defel->defname, "freeze") == 0)
{
if (freeze_specified)
errorConflictingDefElem(defel, pstate);
freeze_specified = true;
opts_out->freeze = defGetBoolean(defel);
}
else if (strcmp(defel->defname, "delimiter") == 0)
{
if (opts_out->delim)
errorConflictingDefElem(defel, pstate);
opts_out->delim = defGetString(defel);
}
else if (strcmp(defel->defname, "null") == 0)
{
if (opts_out->null_print)
errorConflictingDefElem(defel, pstate);
opts_out->null_print = defGetString(defel);
}
else if (strcmp(defel->defname, "header") == 0)
{
if (header_specified)
errorConflictingDefElem(defel, pstate);
header_specified = true;
opts_out->header_line = defGetBoolean(defel);
}
else if (strcmp(defel->defname, "quote") == 0)
{
if (opts_out->quote)
errorConflictingDefElem(defel, pstate);
opts_out->quote = defGetString(defel);
}
else if (strcmp(defel->defname, "escape") == 0)
{
if (opts_out->escape)
errorConflictingDefElem(defel, pstate);
opts_out->escape = defGetString(defel);
}
else if (strcmp(defel->defname, "force_quote") == 0)
{
if (opts_out->force_quote || opts_out->force_quote_all)
errorConflictingDefElem(defel, pstate);
if (defel->arg && IsA(defel->arg, A_Star))
opts_out->force_quote_all = true;
else if (defel->arg && IsA(defel->arg, List))
opts_out->force_quote = castNode(List, defel->arg);
else
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("argument to option \"%s\" must be a list of column names",
defel->defname),
parser_errposition(pstate, defel->location)));
}
else if (strcmp(defel->defname, "force_not_null") == 0)
{
if (opts_out->force_notnull)
errorConflictingDefElem(defel, pstate);
if (defel->arg && IsA(defel->arg, List))
opts_out->force_notnull = castNode(List, defel->arg);
else
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("argument to option \"%s\" must be a list of column names",
defel->defname),
parser_errposition(pstate, defel->location)));
}
else if (strcmp(defel->defname, "force_null") == 0)
{
if (opts_out->force_null)
errorConflictingDefElem(defel, pstate);
if (defel->arg && IsA(defel->arg, List))
opts_out->force_null = castNode(List, defel->arg);
else
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("argument to option \"%s\" must be a list of column names",
defel->defname),
parser_errposition(pstate, defel->location)));
}
else if (strcmp(defel->defname, "convert_selectively") == 0)
{
/*
* Undocumented, not-accessible-from-SQL option: convert only the
* named columns to binary form, storing the rest as NULLs. It's
* allowed for the column list to be NIL.
*/
if (opts_out->convert_selectively)
errorConflictingDefElem(defel, pstate);
opts_out->convert_selectively = true;
if (defel->arg == NULL || IsA(defel->arg, List))
opts_out->convert_select = castNode(List, defel->arg);
else
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("argument to option \"%s\" must be a list of column names",
defel->defname),
parser_errposition(pstate, defel->location)));
}
else if (strcmp(defel->defname, "encoding") == 0)
{
if (opts_out->file_encoding >= 0)
errorConflictingDefElem(defel, pstate);
opts_out->file_encoding = pg_char_to_encoding(defGetString(defel));
if (opts_out->file_encoding < 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("argument to option \"%s\" must be a valid encoding name",
defel->defname),
parser_errposition(pstate, defel->location)));
}
else
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("option \"%s\" not recognized",
defel->defname),
parser_errposition(pstate, defel->location)));
}
/*
* Check for incompatible options (must do these two before inserting
* defaults)
*/
if (opts_out->binary && opts_out->delim)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("cannot specify DELIMITER in BINARY mode")));
if (opts_out->binary && opts_out->null_print)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("cannot specify NULL in BINARY mode")));
/* Set defaults for omitted options */
if (!opts_out->delim)
opts_out->delim = opts_out->csv_mode ? "," : "\t";
2004-08-29 07:07:03 +02:00
if (!opts_out->null_print)
opts_out->null_print = opts_out->csv_mode ? "" : "\\N";
opts_out->null_print_len = strlen(opts_out->null_print);
if (opts_out->csv_mode)
{
if (!opts_out->quote)
opts_out->quote = "\"";
if (!opts_out->escape)
opts_out->escape = opts_out->quote;
}
2004-08-29 07:07:03 +02:00
/* Only single-byte delimiter strings are supported. */
if (strlen(opts_out->delim) != 1)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("COPY delimiter must be a single one-byte character")));
/* Disallow end-of-line characters */
if (strchr(opts_out->delim, '\r') != NULL ||
strchr(opts_out->delim, '\n') != NULL)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("COPY delimiter cannot be newline or carriage return")));
if (strchr(opts_out->null_print, '\r') != NULL ||
strchr(opts_out->null_print, '\n') != NULL)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2006-10-06 19:14:01 +02:00
errmsg("COPY null representation cannot use newline or carriage return")));
/*
* Disallow unsafe delimiter characters in non-CSV mode. We can't allow
* backslash because it would be ambiguous. We can't allow the other
* cases because data characters matching the delimiter must be
* backslashed, and certain backslash combinations are interpreted
* non-literally by COPY IN. Disallowing all lower case ASCII letters is
* more than strictly necessary, but seems best for consistency and
* future-proofing. Likewise we disallow all digits though only octal
* digits are actually dangerous.
*/
if (!opts_out->csv_mode &&
strchr("\\.abcdefghijklmnopqrstuvwxyz0123456789",
opts_out->delim[0]) != NULL)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("COPY delimiter cannot be \"%s\"", opts_out->delim)));
/* Check header */
if (opts_out->binary && opts_out->header_line)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot specify HEADER in BINARY mode")));
/* Check quote */
if (!opts_out->csv_mode && opts_out->quote != NULL)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("COPY quote available only in CSV mode")));
if (opts_out->csv_mode && strlen(opts_out->quote) != 1)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("COPY quote must be a single one-byte character")));
if (opts_out->csv_mode && opts_out->delim[0] == opts_out->quote[0])
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("COPY delimiter and quote must be different")));
/* Check escape */
if (!opts_out->csv_mode && opts_out->escape != NULL)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("COPY escape available only in CSV mode")));
if (opts_out->csv_mode && strlen(opts_out->escape) != 1)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("COPY escape must be a single one-byte character")));
/* Check force_quote */
if (!opts_out->csv_mode && (opts_out->force_quote || opts_out->force_quote_all))
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("COPY force quote available only in CSV mode")));
if ((opts_out->force_quote || opts_out->force_quote_all) && is_from)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("COPY force quote only available using COPY TO")));
/* Check force_notnull */
if (!opts_out->csv_mode && opts_out->force_notnull != NIL)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("COPY force not null available only in CSV mode")));
if (opts_out->force_notnull != NIL && !is_from)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("COPY force not null only available using COPY FROM")));
/* Check force_null */
if (!opts_out->csv_mode && opts_out->force_null != NIL)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("COPY force null available only in CSV mode")));
if (opts_out->force_null != NIL && !is_from)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("COPY force null only available using COPY FROM")));
/* Don't allow the delimiter to appear in the null string. */
if (strchr(opts_out->null_print, opts_out->delim[0]) != NULL)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("COPY delimiter must not appear in the NULL specification")));
/* Don't allow the CSV quote char to appear in the null string. */
if (opts_out->csv_mode &&
strchr(opts_out->null_print, opts_out->quote[0]) != NULL)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("CSV quote character must not appear in the NULL specification")));
}
/*
* CopyGetAttnums - build an integer list of attnums to be copied
*
* The input attnamelist is either the user-specified column list,
* or NIL if there was none (in which case we want all the non-dropped
* columns).
*
* We don't include generated columns in the generated full list and we don't
* allow them to be specified explicitly. They don't make sense for COPY
* FROM, but we could possibly allow them for COPY TO. But this way it's at
* least ensured that whatever we copy out can be copied back in.
*
* rel can be NULL ... it's only used for error reports.
*/
List *
CopyGetAttnums(TupleDesc tupDesc, Relation rel, List *attnamelist)
{
List *attnums = NIL;
2002-09-04 22:31:48 +02:00
if (attnamelist == NIL)
{
/* Generate default column list */
int attr_count = tupDesc->natts;
int i;
for (i = 0; i < attr_count; i++)
{
if (TupleDescAttr(tupDesc, i)->attisdropped)
continue;
if (TupleDescAttr(tupDesc, i)->attgenerated)
continue;
attnums = lappend_int(attnums, i + 1);
}
}
else
{
/* Validate the user-supplied list and extract attnums */
ListCell *l;
foreach(l, attnamelist)
{
char *name = strVal(lfirst(l));
int attnum;
int i;
/* Lookup column name */
attnum = InvalidAttrNumber;
for (i = 0; i < tupDesc->natts; i++)
{
Form_pg_attribute att = TupleDescAttr(tupDesc, i);
if (att->attisdropped)
continue;
if (namestrcmp(&(att->attname), name) == 0)
{
if (att->attgenerated)
ereport(ERROR,
(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
errmsg("column \"%s\" is a generated column",
name),
errdetail("Generated columns cannot be used in COPY.")));
attnum = att->attnum;
break;
}
}
if (attnum == InvalidAttrNumber)
{
if (rel != NULL)
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_COLUMN),
errmsg("column \"%s\" of relation \"%s\" does not exist",
name, RelationGetRelationName(rel))));
else
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_COLUMN),
errmsg("column \"%s\" does not exist",
name)));
}
/* Check for duplicates */
if (list_member_int(attnums, attnum))
ereport(ERROR,
(errcode(ERRCODE_DUPLICATE_COLUMN),
errmsg("column \"%s\" specified more than once",
name)));
attnums = lappend_int(attnums, attnum);
}
}
return attnums;
}