postgresql/src/backend/commands/createas.c

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

646 lines
19 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* createas.c
* Execution of CREATE TABLE ... AS, a/k/a SELECT INTO.
* Since CREATE MATERIALIZED VIEW shares syntax and most behaviors,
* we implement that here, too.
*
* We implement this by diverting the query's normal output to a
* specialized DestReceiver type.
*
* Formerly, CTAS was implemented as a variant of SELECT, which led
* to assorted legacy behaviors that we still try to preserve, notably that
* we must return a tuples-processed count in the QueryCompletion. (We no
* longer do that for CTAS ... WITH NO DATA, however.)
*
* Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* src/backend/commands/createas.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
2019-01-15 00:54:18 +01:00
#include "access/heapam.h"
#include "access/reloptions.h"
#include "access/tableam.h"
#include "access/xact.h"
#include "catalog/namespace.h"
#include "catalog/toasting.h"
#include "commands/createas.h"
#include "commands/matview.h"
#include "commands/prepare.h"
#include "commands/tablecmds.h"
#include "commands/view.h"
#include "miscadmin.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#include "rewrite/rewriteHandler.h"
#include "tcop/tcopprot.h"
#include "utils/builtins.h"
#include "utils/lsyscache.h"
#include "utils/rel.h"
#include "utils/rls.h"
#include "utils/snapmgr.h"
typedef struct
{
DestReceiver pub; /* publicly-known function pointers */
IntoClause *into; /* target relation specification */
/* These fields are filled by intorel_startup: */
Relation rel; /* relation to write to */
ObjectAddress reladdr; /* address of rel, for ExecCreateTableAs */
CommandId output_cid; /* cmin to insert in output tuples */
int ti_options; /* table_tuple_insert performance options */
BulkInsertState bistate; /* bulk insert state */
} DR_intorel;
/* utility functions for CTAS definition creation */
static ObjectAddress create_ctas_internal(List *attrList, IntoClause *into);
static ObjectAddress create_ctas_nodata(List *tlist, IntoClause *into);
/* DestReceiver routines for collecting data */
static void intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo);
static bool intorel_receive(TupleTableSlot *slot, DestReceiver *self);
static void intorel_shutdown(DestReceiver *self);
static void intorel_destroy(DestReceiver *self);
/*
* create_ctas_internal
*
* Internal utility used for the creation of the definition of a relation
* created via CREATE TABLE AS or a materialized view. Caller needs to
* provide a list of attributes (ColumnDef nodes).
*/
static ObjectAddress
create_ctas_internal(List *attrList, IntoClause *into)
{
CreateStmt *create = makeNode(CreateStmt);
bool is_matview;
char relkind;
Datum toast_options;
static char *validnsps[] = HEAP_RELOPT_NAMESPACES;
ObjectAddress intoRelationAddr;
const TableAmRoutine *tableam = NULL;
Oid accessMethodId = InvalidOid;
Relation rel;
/* This code supports both CREATE TABLE AS and CREATE MATERIALIZED VIEW */
is_matview = (into->viewQuery != NULL);
relkind = is_matview ? RELKIND_MATVIEW : RELKIND_RELATION;
/*
* Create the target relation by faking up a CREATE TABLE parsetree and
* passing it to DefineRelation.
*/
create->relation = into->rel;
create->tableElts = attrList;
create->inhRelations = NIL;
create->ofTypename = NULL;
create->constraints = NIL;
create->options = into->options;
create->oncommit = into->onCommit;
create->tablespacename = into->tableSpaceName;
create->if_not_exists = false;
tableam: introduce table AM infrastructure. This introduces the concept of table access methods, i.e. CREATE ACCESS METHOD ... TYPE TABLE and CREATE TABLE ... USING (storage-engine). No table access functionality is delegated to table AMs as of this commit, that'll be done in following commits. Subsequent commits will incrementally abstract table access functionality to be routed through table access methods. That change is too large to be reviewed & committed at once, so it'll be done incrementally. Docs will be updated at the end, as adding them incrementally would likely make them less coherent, and definitely is a lot more work, without a lot of benefit. Table access methods are specified similar to index access methods, i.e. pg_am.amhandler returns, as INTERNAL, a pointer to a struct with callbacks. In contrast to index AMs that struct needs to live as long as a backend, typically that's achieved by just returning a pointer to a constant struct. Psql's \d+ now displays a table's access method. That can be disabled with HIDE_TABLEAM=true, which is mainly useful so regression tests can be run against different AMs. It's quite possible that this behaviour still needs to be fine tuned. For now it's not allowed to set a table AM for a partitioned table, as we've not resolved how partitions would inherit that. Disallowing allows us to introduce, if we decide that's the way forward, such a behaviour without a compatibility break. Catversion bumped, to add the heap table AM and references to it. Author: Haribabu Kommi, Andres Freund, Alvaro Herrera, Dimitri Golgov and others Discussion: https://postgr.es/m/20180703070645.wchpu5muyto5n647@alap3.anarazel.de https://postgr.es/m/20160812231527.GA690404@alvherre.pgsql https://postgr.es/m/20190107235616.6lur25ph22u5u5av@alap3.anarazel.de https://postgr.es/m/20190304234700.w5tmhducs5wxgzls@alap3.anarazel.de
2019-03-06 18:54:38 +01:00
create->accessMethod = into->accessMethod;
/*
* Create the relation. (This will error out if there's an existing view,
* so we don't need more code to complain if "replace" is false.)
*/
Implement table partitioning. Table partitioning is like table inheritance and reuses much of the existing infrastructure, but there are some important differences. The parent is called a partitioned table and is always empty; it may not have indexes or non-inherited constraints, since those make no sense for a relation with no data of its own. The children are called partitions and contain all of the actual data. Each partition has an implicit partitioning constraint. Multiple inheritance is not allowed, and partitioning and inheritance can't be mixed. Partitions can't have extra columns and may not allow nulls unless the parent does. Tuples inserted into the parent are automatically routed to the correct partition, so tuple-routing ON INSERT triggers are not needed. Tuple routing isn't yet supported for partitions which are foreign tables, and it doesn't handle updates that cross partition boundaries. Currently, tables can be range-partitioned or list-partitioned. List partitioning is limited to a single column, but range partitioning can involve multiple columns. A partitioning "column" can be an expression. Because table partitioning is less general than table inheritance, it is hoped that it will be easier to reason about properties of partitions, and therefore that this will serve as a better foundation for a variety of possible optimizations, including query planner optimizations. The tuple routing based which this patch does based on the implicit partitioning constraints is an example of this, but it seems likely that many other useful optimizations are also possible. Amit Langote, reviewed and tested by Robert Haas, Ashutosh Bapat, Amit Kapila, Rajkumar Raghuwanshi, Corey Huinker, Jaime Casanova, Rushabh Lathia, Erik Rijkers, among others. Minor revisions by me.
2016-12-07 19:17:43 +01:00
intoRelationAddr = DefineRelation(create, relkind, InvalidOid, NULL, NULL);
/*
* If necessary, create a TOAST table for the target table. Note that
* NewRelationCreateToastTable ends with CommandCounterIncrement(), so
* that the TOAST table will be visible for insertion.
*/
CommandCounterIncrement();
/* parse and validate reloptions for the toast table */
toast_options = transformRelOptions((Datum) 0,
create->options,
"toast",
validnsps,
true, false);
rel = relation_open(intoRelationAddr.objectId, AccessShareLock);
accessMethodId = table_relation_toast_am(rel);
relation_close(rel, AccessShareLock);
if (OidIsValid(accessMethodId))
{
tableam = GetTableAmRoutineByAmOid(accessMethodId);
(void) tableam_reloptions(tableam, RELKIND_TOASTVALUE, toast_options, NULL, true);
}
NewRelationCreateToastTable(intoRelationAddr.objectId, toast_options);
/* Create the "view" part of a materialized view. */
if (is_matview)
{
/* StoreViewQuery scribbles on tree, so make a copy */
Query *query = (Query *) copyObject(into->viewQuery);
StoreViewQuery(intoRelationAddr.objectId, query, false);
CommandCounterIncrement();
}
return intoRelationAddr;
}
/*
* create_ctas_nodata
*
* Create CTAS or materialized view when WITH NO DATA is used, starting from
* the targetlist of the SELECT or view definition.
*/
static ObjectAddress
create_ctas_nodata(List *tlist, IntoClause *into)
{
List *attrList;
ListCell *t,
*lc;
/*
* Build list of ColumnDefs from non-junk elements of the tlist. If a
* column name list was specified in CREATE TABLE AS, override the column
* names in the query. (Too few column names are OK, too many are not.)
*/
attrList = NIL;
lc = list_head(into->colNames);
foreach(t, tlist)
{
TargetEntry *tle = (TargetEntry *) lfirst(t);
if (!tle->resjunk)
{
ColumnDef *col;
char *colname;
if (lc)
{
colname = strVal(lfirst(lc));
Represent Lists as expansible arrays, not chains of cons-cells. Originally, Postgres Lists were a more or less exact reimplementation of Lisp lists, which consist of chains of separately-allocated cons cells, each having a value and a next-cell link. We'd hacked that once before (commit d0b4399d8) to add a separate List header, but the data was still in cons cells. That makes some operations -- notably list_nth() -- O(N), and it's bulky because of the next-cell pointers and per-cell palloc overhead, and it's very cache-unfriendly if the cons cells end up scattered around rather than being adjacent. In this rewrite, we still have List headers, but the data is in a resizable array of values, with no next-cell links. Now we need at most two palloc's per List, and often only one, since we can allocate some values in the same palloc call as the List header. (Of course, extending an existing List may require repalloc's to enlarge the array. But this involves just O(log N) allocations not O(N).) Of course this is not without downsides. The key difficulty is that addition or deletion of a list entry may now cause other entries to move, which it did not before. For example, that breaks foreach() and sister macros, which historically used a pointer to the current cons-cell as loop state. We can repair those macros transparently by making their actual loop state be an integer list index; the exposed "ListCell *" pointer is no longer state carried across loop iterations, but is just a derived value. (In practice, modern compilers can optimize things back to having just one loop state value, at least for simple cases with inline loop bodies.) In principle, this is a semantics change for cases where the loop body inserts or deletes list entries ahead of the current loop index; but I found no such cases in the Postgres code. The change is not at all transparent for code that doesn't use foreach() but chases lists "by hand" using lnext(). The largest share of such code in the backend is in loops that were maintaining "prev" and "next" variables in addition to the current-cell pointer, in order to delete list cells efficiently using list_delete_cell(). However, we no longer need a previous-cell pointer to delete a list cell efficiently. Keeping a next-cell pointer doesn't work, as explained above, but we can improve matters by changing such code to use a regular foreach() loop and then using the new macro foreach_delete_current() to delete the current cell. (This macro knows how to update the associated foreach loop's state so that no cells will be missed in the traversal.) There remains a nontrivial risk of code assuming that a ListCell * pointer will remain good over an operation that could now move the list contents. To help catch such errors, list.c can be compiled with a new define symbol DEBUG_LIST_MEMORY_USAGE that forcibly moves list contents whenever that could possibly happen. This makes list operations significantly more expensive so it's not normally turned on (though it is on by default if USE_VALGRIND is on). There are two notable API differences from the previous code: * lnext() now requires the List's header pointer in addition to the current cell's address. * list_delete_cell() no longer requires a previous-cell argument. These changes are somewhat unfortunate, but on the other hand code using either function needs inspection to see if it is assuming anything it shouldn't, so it's not all bad. Programmers should be aware of these significant performance changes: * list_nth() and related functions are now O(1); so there's no major access-speed difference between a list and an array. * Inserting or deleting a list element now takes time proportional to the distance to the end of the list, due to moving the array elements. (However, it typically *doesn't* require palloc or pfree, so except in long lists it's probably still faster than before.) Notably, lcons() used to be about the same cost as lappend(), but that's no longer true if the list is long. Code that uses lcons() and list_delete_first() to maintain a stack might usefully be rewritten to push and pop at the end of the list rather than the beginning. * There are now list_insert_nth...() and list_delete_nth...() functions that add or remove a list cell identified by index. These have the data-movement penalty explained above, but there's no search penalty. * list_concat() and variants now copy the second list's data into storage belonging to the first list, so there is no longer any sharing of cells between the input lists. The second argument is now declared "const List *" to reflect that it isn't changed. This patch just does the minimum needed to get the new implementation in place and fix bugs exposed by the regression tests. As suggested by the foregoing, there's a fair amount of followup work remaining to do. Also, the ENABLE_LIST_COMPAT macros are finally removed in this commit. Code using those should have been gone a dozen years ago. Patch by me; thanks to David Rowley, Jesper Pedersen, and others for review. Discussion: https://postgr.es/m/11587.1550975080@sss.pgh.pa.us
2019-07-15 19:41:58 +02:00
lc = lnext(into->colNames, lc);
}
else
colname = tle->resname;
col = makeColumnDef(colname,
exprType((Node *) tle->expr),
exprTypmod((Node *) tle->expr),
exprCollation((Node *) tle->expr));
/*
* It's possible that the column is of a collatable type but the
* collation could not be resolved, so double-check. (We must
* check this here because DefineRelation would adopt the type's
* default collation rather than complaining.)
*/
if (!OidIsValid(col->collOid) &&
type_is_collatable(col->typeName->typeOid))
ereport(ERROR,
(errcode(ERRCODE_INDETERMINATE_COLLATION),
errmsg("no collation was derived for column \"%s\" with collatable type %s",
col->colname,
format_type_be(col->typeName->typeOid)),
errhint("Use the COLLATE clause to set the collation explicitly.")));
attrList = lappend(attrList, col);
}
}
if (lc != NULL)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("too many column names were specified")));
/* Create the relation definition using the ColumnDef list */
return create_ctas_internal(attrList, into);
}
/*
* ExecCreateTableAs -- execute a CREATE TABLE AS command
*/
ObjectAddress
ExecCreateTableAs(ParseState *pstate, CreateTableAsStmt *stmt,
ParamListInfo params, QueryEnvironment *queryEnv,
QueryCompletion *qc)
{
Query *query = castNode(Query, stmt->query);
IntoClause *into = stmt->into;
bool is_matview = (into->viewQuery != NULL);
DestReceiver *dest;
Oid save_userid = InvalidOid;
int save_sec_context = 0;
int save_nestlevel = 0;
ObjectAddress address;
List *rewritten;
PlannedStmt *plan;
QueryDesc *queryDesc;
/* Check if the relation exists or not */
if (CreateTableAsRelExists(stmt))
return InvalidObjectAddress;
/*
* Create the tuple receiver object and insert info it will need
*/
dest = CreateIntoRelDestReceiver(into);
/*
* The contained Query could be a SELECT, or an EXECUTE utility command.
* If the latter, we just pass it off to ExecuteQuery.
*/
if (query->commandType == CMD_UTILITY &&
IsA(query->utilityStmt, ExecuteStmt))
{
ExecuteStmt *estmt = castNode(ExecuteStmt, query->utilityStmt);
Assert(!is_matview); /* excluded by syntax */
ExecuteQuery(pstate, estmt, into, params, dest, qc);
/* get object address that intorel_startup saved for us */
address = ((DR_intorel *) dest)->reladdr;
return address;
}
Assert(query->commandType == CMD_SELECT);
/*
* For materialized views, lock down security-restricted operations and
* arrange to make GUC variable changes local to this command. This is
* not necessary for security, but this keeps the behavior similar to
* REFRESH MATERIALIZED VIEW. Otherwise, one could create a materialized
* view not possible to refresh.
*/
if (is_matview)
{
GetUserIdAndSecContext(&save_userid, &save_sec_context);
SetUserIdAndSecContext(save_userid,
save_sec_context | SECURITY_RESTRICTED_OPERATION);
save_nestlevel = NewGUCNestLevel();
}
if (into->skipData)
{
/*
* If WITH NO DATA was specified, do not go through the rewriter,
* planner and executor. Just define the relation using a code path
* similar to CREATE VIEW. This avoids dump/restore problems stemming
* from running the planner before all dependencies are set up.
*/
address = create_ctas_nodata(query->targetList, into);
}
else
{
/*
* Parse analysis was done already, but we still have to run the rule
* rewriter. We do not do AcquireRewriteLocks: we assume the query
* either came straight from the parser, or suitable locks were
* acquired by plancache.c.
*/
Centralize the logic for protective copying of utility statements. In the "simple Query" code path, it's fine for parse analysis or execution of a utility statement to scribble on the statement's node tree, since that'll just be thrown away afterwards. However it's not fine if the node tree is in the plan cache, as then it'd be corrupted for subsequent executions. Up to now we've dealt with that by having individual utility-statement functions apply copyObject() if they were going to modify the tree. But that's prone to errors of omission. Bug #17053 from Charles Samborski shows that CREATE/ALTER DOMAIN didn't get this memo, and can crash if executed repeatedly from plan cache. In the back branches, we'll just apply a narrow band-aid for that, but in HEAD it seems prudent to have a more principled fix that will close off the possibility of other similar bugs in future. Hence, let's hoist the responsibility for doing copyObject up into ProcessUtility from its children, thus ensuring that it happens for all utility statement types. Also, modify ProcessUtility's API so that its callers can tell it whether a copy step is necessary. It turns out that in all cases, the immediate caller knows whether the node tree is transient, so this doesn't involve a huge amount of code thrashing. In this way, while we lose a little bit in the execute-from-cache code path due to sometimes copying node trees that wouldn't be mutated anyway, we gain something in the simple-Query code path by not copying throwaway node trees. Statements that are complex enough to be expensive to copy are almost certainly ones that would have to be copied anyway, so the loss in the cache code path shouldn't be much. (Note that this whole problem applies only to utility statements. Optimizable statements don't have the issue because we long ago made the executor treat Plan trees as read-only. Perhaps someday we will make utility statement execution act likewise, but I'm not holding my breath.) Discussion: https://postgr.es/m/931771.1623893989@sss.pgh.pa.us Discussion: https://postgr.es/m/17053-3ca3f501bbc212b4@postgresql.org
2021-06-18 17:22:58 +02:00
rewritten = QueryRewrite(query);
/* SELECT should never rewrite to more or less than one SELECT query */
if (list_length(rewritten) != 1)
elog(ERROR, "unexpected rewrite result for %s",
is_matview ? "CREATE MATERIALIZED VIEW" :
"CREATE TABLE AS SELECT");
query = linitial_node(Query, rewritten);
Assert(query->commandType == CMD_SELECT);
/* plan the query */
plan = pg_plan_query(query, pstate->p_sourcetext,
CURSOR_OPT_PARALLEL_OK, params);
/*
* Use a snapshot with an updated command ID to ensure this query sees
* results of any previously executed queries. (This could only
* matter if the planner executed an allegedly-stable function that
* changed the database contents, but let's do it anyway to be
* parallel to the EXPLAIN code path.)
*/
PushCopiedSnapshot(GetActiveSnapshot());
UpdateActiveSnapshotCommandId();
/* Create a QueryDesc, redirecting output to our tuple receiver */
queryDesc = CreateQueryDesc(plan, pstate->p_sourcetext,
GetActiveSnapshot(), InvalidSnapshot,
dest, params, queryEnv, 0);
/* call ExecutorStart to prepare the plan for execution */
ExecutorStart(queryDesc, GetIntoRelEFlags(into));
/* run the plan to completion */
ExecutorRun(queryDesc, ForwardScanDirection, 0, true);
/* save the rowcount if we're given a qc to fill */
if (qc)
SetQueryCompletion(qc, CMDTAG_SELECT, queryDesc->estate->es_processed);
/* get object address that intorel_startup saved for us */
address = ((DR_intorel *) dest)->reladdr;
/* and clean up */
ExecutorFinish(queryDesc);
ExecutorEnd(queryDesc);
FreeQueryDesc(queryDesc);
PopActiveSnapshot();
}
if (is_matview)
{
/* Roll back any GUC changes */
AtEOXact_GUC(false, save_nestlevel);
/* Restore userid and security context */
SetUserIdAndSecContext(save_userid, save_sec_context);
}
return address;
}
/*
* GetIntoRelEFlags --- compute executor flags needed for CREATE TABLE AS
*
* This is exported because EXPLAIN and PREPARE need it too. (Note: those
* callers still need to deal explicitly with the skipData flag; since they
* use different methods for suppressing execution, it doesn't seem worth
* trying to encapsulate that part.)
*/
int
GetIntoRelEFlags(IntoClause *intoClause)
{
Remove WITH OIDS support, change oid catalog column visibility. Previously tables declared WITH OIDS, including a significant fraction of the catalog tables, stored the oid column not as a normal column, but as part of the tuple header. This special column was not shown by default, which was somewhat odd, as it's often (consider e.g. pg_class.oid) one of the more important parts of a row. Neither pg_dump nor COPY included the contents of the oid column by default. The fact that the oid column was not an ordinary column necessitated a significant amount of special case code to support oid columns. That already was painful for the existing, but upcoming work aiming to make table storage pluggable, would have required expanding and duplicating that "specialness" significantly. WITH OIDS has been deprecated since 2005 (commit ff02d0a05280e0). Remove it. Removing includes: - CREATE TABLE and ALTER TABLE syntax for declaring the table to be WITH OIDS has been removed (WITH (oids[ = true]) will error out) - pg_dump does not support dumping tables declared WITH OIDS and will issue a warning when dumping one (and ignore the oid column). - restoring an pg_dump archive with pg_restore will warn when restoring a table with oid contents (and ignore the oid column) - COPY will refuse to load binary dump that includes oids. - pg_upgrade will error out when encountering tables declared WITH OIDS, they have to be altered to remove the oid column first. - Functionality to access the oid of the last inserted row (like plpgsql's RESULT_OID, spi's SPI_lastoid, ...) has been removed. The syntax for declaring a table WITHOUT OIDS (or WITH (oids = false) for CREATE TABLE) is still supported. While that requires a bit of support code, it seems unnecessary to break applications / dumps that do not use oids, and are explicit about not using them. The biggest user of WITH OID columns was postgres' catalog. This commit changes all 'magic' oid columns to be columns that are normally declared and stored. To reduce unnecessary query breakage all the newly added columns are still named 'oid', even if a table's column naming scheme would indicate 'reloid' or such. This obviously requires adapting a lot code, mostly replacing oid access via HeapTupleGetOid() with access to the underlying Form_pg_*->oid column. The bootstrap process now assigns oids for all oid columns in genbki.pl that do not have an explicit value (starting at the largest oid previously used), only oids assigned later by oids will be above FirstBootstrapObjectId. As the oid column now is a normal column the special bootstrap syntax for oids has been removed. Oids are not automatically assigned during insertion anymore, all backend code explicitly assigns oids with GetNewOidWithIndex(). For the rare case that insertions into the catalog via SQL are called for the new pg_nextoid() function can be used (which only works on catalog tables). The fact that oid columns on system tables are now normal columns means that they will be included in the set of columns expanded by * (i.e. SELECT * FROM pg_class will now include the table's oid, previously it did not). It'd not technically be hard to hide oid column by default, but that'd mean confusing behavior would either have to be carried forward forever, or it'd cause breakage down the line. While it's not unlikely that further adjustments are needed, the scope/invasiveness of the patch makes it worthwhile to get merge this now. It's painful to maintain externally, too complicated to commit after the code code freeze, and a dependency of a number of other patches. Catversion bump, for obvious reasons. Author: Andres Freund, with contributions by John Naylor Discussion: https://postgr.es/m/20180930034810.ywp2c7awz7opzcfr@alap3.anarazel.de
2018-11-21 00:36:57 +01:00
int flags = 0;
if (intoClause->skipData)
flags |= EXEC_FLAG_WITH_NO_DATA;
return flags;
}
/*
* CreateTableAsRelExists --- check existence of relation for CreateTableAsStmt
*
* Utility wrapper checking if the relation pending for creation in this
* CreateTableAsStmt query already exists or not. Returns true if the
* relation exists, otherwise false.
*/
bool
CreateTableAsRelExists(CreateTableAsStmt *ctas)
{
Oid nspid;
Oid oldrelid;
ObjectAddress address;
IntoClause *into = ctas->into;
nspid = RangeVarGetCreationNamespace(into->rel);
oldrelid = get_relname_relid(into->rel->relname, nspid);
if (OidIsValid(oldrelid))
{
if (!ctas->if_not_exists)
ereport(ERROR,
(errcode(ERRCODE_DUPLICATE_TABLE),
errmsg("relation \"%s\" already exists",
into->rel->relname)));
/*
* The relation exists and IF NOT EXISTS has been specified.
*
* If we are in an extension script, insist that the pre-existing
* object be a member of the extension, to avoid security risks.
*/
ObjectAddressSet(address, RelationRelationId, oldrelid);
checkMembershipInCurrentExtension(&address);
/* OK to skip */
ereport(NOTICE,
(errcode(ERRCODE_DUPLICATE_TABLE),
errmsg("relation \"%s\" already exists, skipping",
into->rel->relname)));
return true;
}
/* Relation does not exist, it can be created */
return false;
}
/*
* CreateIntoRelDestReceiver -- create a suitable DestReceiver object
*
* intoClause will be NULL if called from CreateDestReceiver(), in which
* case it has to be provided later. However, it is convenient to allow
* self->into to be filled in immediately for other callers.
*/
DestReceiver *
CreateIntoRelDestReceiver(IntoClause *intoClause)
{
DR_intorel *self = (DR_intorel *) palloc0(sizeof(DR_intorel));
self->pub.receiveSlot = intorel_receive;
self->pub.rStartup = intorel_startup;
self->pub.rShutdown = intorel_shutdown;
self->pub.rDestroy = intorel_destroy;
self->pub.mydest = DestIntoRel;
self->into = intoClause;
/* other private fields will be set during intorel_startup */
return (DestReceiver *) self;
}
/*
* intorel_startup --- executor startup
*/
static void
intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
{
DR_intorel *myState = (DR_intorel *) self;
IntoClause *into = myState->into;
bool is_matview;
List *attrList;
ObjectAddress intoRelationAddr;
Relation intoRelationDesc;
ListCell *lc;
int attnum;
Assert(into != NULL); /* else somebody forgot to set it */
/* This code supports both CREATE TABLE AS and CREATE MATERIALIZED VIEW */
is_matview = (into->viewQuery != NULL);
/*
* Build column definitions using "pre-cooked" type and collation info. If
* a column name list was specified in CREATE TABLE AS, override the
* column names derived from the query. (Too few column names are OK, too
* many are not.)
*/
attrList = NIL;
lc = list_head(into->colNames);
for (attnum = 0; attnum < typeinfo->natts; attnum++)
{
Form_pg_attribute attribute = TupleDescAttr(typeinfo, attnum);
ColumnDef *col;
char *colname;
if (lc)
{
colname = strVal(lfirst(lc));
Represent Lists as expansible arrays, not chains of cons-cells. Originally, Postgres Lists were a more or less exact reimplementation of Lisp lists, which consist of chains of separately-allocated cons cells, each having a value and a next-cell link. We'd hacked that once before (commit d0b4399d8) to add a separate List header, but the data was still in cons cells. That makes some operations -- notably list_nth() -- O(N), and it's bulky because of the next-cell pointers and per-cell palloc overhead, and it's very cache-unfriendly if the cons cells end up scattered around rather than being adjacent. In this rewrite, we still have List headers, but the data is in a resizable array of values, with no next-cell links. Now we need at most two palloc's per List, and often only one, since we can allocate some values in the same palloc call as the List header. (Of course, extending an existing List may require repalloc's to enlarge the array. But this involves just O(log N) allocations not O(N).) Of course this is not without downsides. The key difficulty is that addition or deletion of a list entry may now cause other entries to move, which it did not before. For example, that breaks foreach() and sister macros, which historically used a pointer to the current cons-cell as loop state. We can repair those macros transparently by making their actual loop state be an integer list index; the exposed "ListCell *" pointer is no longer state carried across loop iterations, but is just a derived value. (In practice, modern compilers can optimize things back to having just one loop state value, at least for simple cases with inline loop bodies.) In principle, this is a semantics change for cases where the loop body inserts or deletes list entries ahead of the current loop index; but I found no such cases in the Postgres code. The change is not at all transparent for code that doesn't use foreach() but chases lists "by hand" using lnext(). The largest share of such code in the backend is in loops that were maintaining "prev" and "next" variables in addition to the current-cell pointer, in order to delete list cells efficiently using list_delete_cell(). However, we no longer need a previous-cell pointer to delete a list cell efficiently. Keeping a next-cell pointer doesn't work, as explained above, but we can improve matters by changing such code to use a regular foreach() loop and then using the new macro foreach_delete_current() to delete the current cell. (This macro knows how to update the associated foreach loop's state so that no cells will be missed in the traversal.) There remains a nontrivial risk of code assuming that a ListCell * pointer will remain good over an operation that could now move the list contents. To help catch such errors, list.c can be compiled with a new define symbol DEBUG_LIST_MEMORY_USAGE that forcibly moves list contents whenever that could possibly happen. This makes list operations significantly more expensive so it's not normally turned on (though it is on by default if USE_VALGRIND is on). There are two notable API differences from the previous code: * lnext() now requires the List's header pointer in addition to the current cell's address. * list_delete_cell() no longer requires a previous-cell argument. These changes are somewhat unfortunate, but on the other hand code using either function needs inspection to see if it is assuming anything it shouldn't, so it's not all bad. Programmers should be aware of these significant performance changes: * list_nth() and related functions are now O(1); so there's no major access-speed difference between a list and an array. * Inserting or deleting a list element now takes time proportional to the distance to the end of the list, due to moving the array elements. (However, it typically *doesn't* require palloc or pfree, so except in long lists it's probably still faster than before.) Notably, lcons() used to be about the same cost as lappend(), but that's no longer true if the list is long. Code that uses lcons() and list_delete_first() to maintain a stack might usefully be rewritten to push and pop at the end of the list rather than the beginning. * There are now list_insert_nth...() and list_delete_nth...() functions that add or remove a list cell identified by index. These have the data-movement penalty explained above, but there's no search penalty. * list_concat() and variants now copy the second list's data into storage belonging to the first list, so there is no longer any sharing of cells between the input lists. The second argument is now declared "const List *" to reflect that it isn't changed. This patch just does the minimum needed to get the new implementation in place and fix bugs exposed by the regression tests. As suggested by the foregoing, there's a fair amount of followup work remaining to do. Also, the ENABLE_LIST_COMPAT macros are finally removed in this commit. Code using those should have been gone a dozen years ago. Patch by me; thanks to David Rowley, Jesper Pedersen, and others for review. Discussion: https://postgr.es/m/11587.1550975080@sss.pgh.pa.us
2019-07-15 19:41:58 +02:00
lc = lnext(into->colNames, lc);
}
else
colname = NameStr(attribute->attname);
col = makeColumnDef(colname,
attribute->atttypid,
attribute->atttypmod,
attribute->attcollation);
/*
* It's possible that the column is of a collatable type but the
* collation could not be resolved, so double-check. (We must check
* this here because DefineRelation would adopt the type's default
* collation rather than complaining.)
*/
if (!OidIsValid(col->collOid) &&
type_is_collatable(col->typeName->typeOid))
ereport(ERROR,
(errcode(ERRCODE_INDETERMINATE_COLLATION),
errmsg("no collation was derived for column \"%s\" with collatable type %s",
col->colname,
format_type_be(col->typeName->typeOid)),
errhint("Use the COLLATE clause to set the collation explicitly.")));
attrList = lappend(attrList, col);
}
if (lc != NULL)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("too many column names were specified")));
/*
* Actually create the target table
*/
intoRelationAddr = create_ctas_internal(attrList, into);
/*
* Finally we can open the target table
*/
intoRelationDesc = table_open(intoRelationAddr.objectId, AccessExclusiveLock);
Row-Level Security Policies (RLS) Building on the updatable security-barrier views work, add the ability to define policies on tables to limit the set of rows which are returned from a query and which are allowed to be added to a table. Expressions defined by the policy for filtering are added to the security barrier quals of the query, while expressions defined to check records being added to a table are added to the with-check options of the query. New top-level commands are CREATE/ALTER/DROP POLICY and are controlled by the table owner. Row Security is able to be enabled and disabled by the owner on a per-table basis using ALTER TABLE .. ENABLE/DISABLE ROW SECURITY. Per discussion, ROW SECURITY is disabled on tables by default and must be enabled for policies on the table to be used. If no policies exist on a table with ROW SECURITY enabled, a default-deny policy is used and no records will be visible. By default, row security is applied at all times except for the table owner and the superuser. A new GUC, row_security, is added which can be set to ON, OFF, or FORCE. When set to FORCE, row security will be applied even for the table owner and superusers. When set to OFF, row security will be disabled when allowed and an error will be thrown if the user does not have rights to bypass row security. Per discussion, pg_dump sets row_security = OFF by default to ensure that exports and backups will have all data in the table or will error if there are insufficient privileges to bypass row security. A new option has been added to pg_dump, --enable-row-security, to ask pg_dump to export with row security enabled. A new role capability, BYPASSRLS, which can only be set by the superuser, is added to allow other users to be able to bypass row security using row_security = OFF. Many thanks to the various individuals who have helped with the design, particularly Robert Haas for his feedback. Authors include Craig Ringer, KaiGai Kohei, Adam Brightwell, Dean Rasheed, with additional changes and rework by me. Reviewers have included all of the above, Greg Smith, Jeff McCormick, and Robert Haas.
2014-09-19 17:18:35 +02:00
/*
* Make sure the constructed table does not have RLS enabled.
*
* check_enable_rls() will ereport(ERROR) itself if the user has requested
* something invalid, and otherwise will return RLS_ENABLED if RLS should
* be enabled here. We don't actually support that currently, so throw
* our own ereport(ERROR) if that happens.
*/
if (check_enable_rls(intoRelationAddr.objectId, InvalidOid, false) == RLS_ENABLED)
Row-Level Security Policies (RLS) Building on the updatable security-barrier views work, add the ability to define policies on tables to limit the set of rows which are returned from a query and which are allowed to be added to a table. Expressions defined by the policy for filtering are added to the security barrier quals of the query, while expressions defined to check records being added to a table are added to the with-check options of the query. New top-level commands are CREATE/ALTER/DROP POLICY and are controlled by the table owner. Row Security is able to be enabled and disabled by the owner on a per-table basis using ALTER TABLE .. ENABLE/DISABLE ROW SECURITY. Per discussion, ROW SECURITY is disabled on tables by default and must be enabled for policies on the table to be used. If no policies exist on a table with ROW SECURITY enabled, a default-deny policy is used and no records will be visible. By default, row security is applied at all times except for the table owner and the superuser. A new GUC, row_security, is added which can be set to ON, OFF, or FORCE. When set to FORCE, row security will be applied even for the table owner and superusers. When set to OFF, row security will be disabled when allowed and an error will be thrown if the user does not have rights to bypass row security. Per discussion, pg_dump sets row_security = OFF by default to ensure that exports and backups will have all data in the table or will error if there are insufficient privileges to bypass row security. A new option has been added to pg_dump, --enable-row-security, to ask pg_dump to export with row security enabled. A new role capability, BYPASSRLS, which can only be set by the superuser, is added to allow other users to be able to bypass row security using row_security = OFF. Many thanks to the various individuals who have helped with the design, particularly Robert Haas for his feedback. Authors include Craig Ringer, KaiGai Kohei, Adam Brightwell, Dean Rasheed, with additional changes and rework by me. Reviewers have included all of the above, Greg Smith, Jeff McCormick, and Robert Haas.
2014-09-19 17:18:35 +02:00
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("policies not yet implemented for this command")));
Row-Level Security Policies (RLS) Building on the updatable security-barrier views work, add the ability to define policies on tables to limit the set of rows which are returned from a query and which are allowed to be added to a table. Expressions defined by the policy for filtering are added to the security barrier quals of the query, while expressions defined to check records being added to a table are added to the with-check options of the query. New top-level commands are CREATE/ALTER/DROP POLICY and are controlled by the table owner. Row Security is able to be enabled and disabled by the owner on a per-table basis using ALTER TABLE .. ENABLE/DISABLE ROW SECURITY. Per discussion, ROW SECURITY is disabled on tables by default and must be enabled for policies on the table to be used. If no policies exist on a table with ROW SECURITY enabled, a default-deny policy is used and no records will be visible. By default, row security is applied at all times except for the table owner and the superuser. A new GUC, row_security, is added which can be set to ON, OFF, or FORCE. When set to FORCE, row security will be applied even for the table owner and superusers. When set to OFF, row security will be disabled when allowed and an error will be thrown if the user does not have rights to bypass row security. Per discussion, pg_dump sets row_security = OFF by default to ensure that exports and backups will have all data in the table or will error if there are insufficient privileges to bypass row security. A new option has been added to pg_dump, --enable-row-security, to ask pg_dump to export with row security enabled. A new role capability, BYPASSRLS, which can only be set by the superuser, is added to allow other users to be able to bypass row security using row_security = OFF. Many thanks to the various individuals who have helped with the design, particularly Robert Haas for his feedback. Authors include Craig Ringer, KaiGai Kohei, Adam Brightwell, Dean Rasheed, with additional changes and rework by me. Reviewers have included all of the above, Greg Smith, Jeff McCormick, and Robert Haas.
2014-09-19 17:18:35 +02:00
/*
* Tentatively mark the target as populated, if it's a matview and we're
* going to fill it; otherwise, no change needed.
*/
if (is_matview && !into->skipData)
SetMatViewPopulatedState(intoRelationDesc, true);
/*
* Fill private fields of myState for use by later routines
*/
myState->rel = intoRelationDesc;
myState->reladdr = intoRelationAddr;
myState->output_cid = GetCurrentCommandId(true);
Skip WAL for new relfilenodes, under wal_level=minimal. Until now, only selected bulk operations (e.g. COPY) did this. If a given relfilenode received both a WAL-skipping COPY and a WAL-logged operation (e.g. INSERT), recovery could lose tuples from the COPY. See src/backend/access/transam/README section "Skipping WAL for New RelFileNode" for the new coding rules. Maintainers of table access methods should examine that section. To maintain data durability, just before commit, we choose between an fsync of the relfilenode and copying its contents to WAL. A new GUC, wal_skip_threshold, guides that choice. If this change slows a workload that creates small, permanent relfilenodes under wal_level=minimal, try adjusting wal_skip_threshold. Users setting a timeout on COMMIT may need to adjust that timeout, and log_min_duration_statement analysis will reflect time consumption moving to COMMIT from commands like COPY. Internally, this requires a reliable determination of whether RollbackAndReleaseCurrentSubTransaction() would unlink a relation's current relfilenode. Introduce rd_firstRelfilenodeSubid. Amend the specification of rd_createSubid such that the field is zero when a new rel has an old rd_node. Make relcache.c retain entries for certain dropped relations until end of transaction. Bump XLOG_PAGE_MAGIC, since this introduces XLOG_GIST_ASSIGN_LSN. Future servers accept older WAL, so this bump is discretionary. Kyotaro Horiguchi, reviewed (in earlier, similar versions) by Robert Haas. Heikki Linnakangas and Michael Paquier implemented earlier designs that materially clarified the problem. Reviewed, in earlier designs, by Andrew Dunstan, Andres Freund, Alvaro Herrera, Tom Lane, Fujii Masao, and Simon Riggs. Reported by Martijn van Oosterhout. Discussion: https://postgr.es/m/20150702220524.GA9392@svana.org
2020-04-04 21:25:34 +02:00
myState->ti_options = TABLE_INSERT_SKIP_FSM;
/*
* If WITH NO DATA is specified, there is no need to set up the state for
* bulk inserts as there are no tuples to insert.
*/
if (!into->skipData)
myState->bistate = GetBulkInsertState();
else
myState->bistate = NULL;
/*
Skip WAL for new relfilenodes, under wal_level=minimal. Until now, only selected bulk operations (e.g. COPY) did this. If a given relfilenode received both a WAL-skipping COPY and a WAL-logged operation (e.g. INSERT), recovery could lose tuples from the COPY. See src/backend/access/transam/README section "Skipping WAL for New RelFileNode" for the new coding rules. Maintainers of table access methods should examine that section. To maintain data durability, just before commit, we choose between an fsync of the relfilenode and copying its contents to WAL. A new GUC, wal_skip_threshold, guides that choice. If this change slows a workload that creates small, permanent relfilenodes under wal_level=minimal, try adjusting wal_skip_threshold. Users setting a timeout on COMMIT may need to adjust that timeout, and log_min_duration_statement analysis will reflect time consumption moving to COMMIT from commands like COPY. Internally, this requires a reliable determination of whether RollbackAndReleaseCurrentSubTransaction() would unlink a relation's current relfilenode. Introduce rd_firstRelfilenodeSubid. Amend the specification of rd_createSubid such that the field is zero when a new rel has an old rd_node. Make relcache.c retain entries for certain dropped relations until end of transaction. Bump XLOG_PAGE_MAGIC, since this introduces XLOG_GIST_ASSIGN_LSN. Future servers accept older WAL, so this bump is discretionary. Kyotaro Horiguchi, reviewed (in earlier, similar versions) by Robert Haas. Heikki Linnakangas and Michael Paquier implemented earlier designs that materially clarified the problem. Reviewed, in earlier designs, by Andrew Dunstan, Andres Freund, Alvaro Herrera, Tom Lane, Fujii Masao, and Simon Riggs. Reported by Martijn van Oosterhout. Discussion: https://postgr.es/m/20150702220524.GA9392@svana.org
2020-04-04 21:25:34 +02:00
* Valid smgr_targblock implies something already wrote to the relation.
* This may be harmless, but this function hasn't planned for it.
*/
Assert(RelationGetTargetBlock(intoRelationDesc) == InvalidBlockNumber);
}
/*
* intorel_receive --- receive one tuple
*/
static bool
intorel_receive(TupleTableSlot *slot, DestReceiver *self)
{
DR_intorel *myState = (DR_intorel *) self;
bool insertIndexes;
/* Nothing to insert if WITH NO DATA is specified. */
if (!myState->into->skipData)
{
/*
* Note that the input slot might not be of the type of the target
* relation. That's supported by table_tuple_insert(), but slightly
* less efficient than inserting with the right slot - but the
* alternative would be to copy into a slot of the right type, which
* would not be cheap either. This also doesn't allow accessing per-AM
* data (say a tuple's xmin), but since we don't do that here...
*/
table_tuple_insert(myState->rel,
slot,
myState->output_cid,
myState->ti_options,
myState->bistate,
&insertIndexes);
}
/* We know this is a newly created relation, so there are no indexes */
return true;
}
/*
* intorel_shutdown --- executor end
*/
static void
intorel_shutdown(DestReceiver *self)
{
DR_intorel *myState = (DR_intorel *) self;
IntoClause *into = myState->into;
if (!into->skipData)
{
FreeBulkInsertState(myState->bistate);
table_finish_bulk_insert(myState->rel, myState->ti_options);
}
/* close rel, but keep lock until commit */
table_close(myState->rel, NoLock);
myState->rel = NULL;
}
/*
* intorel_destroy --- release DestReceiver object
*/
static void
intorel_destroy(DestReceiver *self)
{
pfree(self);
}