/*------------------------------------------------------------------------- * * createas.c * Execution of CREATE TABLE ... AS, a/k/a SELECT INTO. * Since CREATE MATERIALIZED VIEW shares syntax and most behaviors, * we implement that here, too. * * We implement this by diverting the query's normal output to a * specialized DestReceiver type. * * Formerly, CTAS was implemented as a variant of SELECT, which led * to assorted legacy behaviors that we still try to preserve, notably that * we must return a tuples-processed count in the QueryCompletion. (We no * longer do that for CTAS ... WITH NO DATA, however.) * * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION * src/backend/commands/createas.c * *------------------------------------------------------------------------- */ #include "postgres.h" #include "access/heapam.h" #include "access/reloptions.h" #include "access/tableam.h" #include "access/xact.h" #include "catalog/namespace.h" #include "catalog/toasting.h" #include "commands/createas.h" #include "commands/matview.h" #include "commands/prepare.h" #include "commands/tablecmds.h" #include "commands/view.h" #include "miscadmin.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" #include "rewrite/rewriteHandler.h" #include "tcop/tcopprot.h" #include "utils/builtins.h" #include "utils/lsyscache.h" #include "utils/rel.h" #include "utils/rls.h" #include "utils/snapmgr.h" typedef struct { DestReceiver pub; /* publicly-known function pointers */ IntoClause *into; /* target relation specification */ /* These fields are filled by intorel_startup: */ Relation rel; /* relation to write to */ ObjectAddress reladdr; /* address of rel, for ExecCreateTableAs */ CommandId output_cid; /* cmin to insert in output tuples */ int ti_options; /* table_tuple_insert performance options */ BulkInsertState bistate; /* bulk insert state */ } DR_intorel; /* utility functions for CTAS definition creation */ static ObjectAddress create_ctas_internal(List *attrList, IntoClause *into); static ObjectAddress create_ctas_nodata(List *tlist, IntoClause *into); /* DestReceiver routines for collecting data */ static void intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo); static bool intorel_receive(TupleTableSlot *slot, DestReceiver *self); static void intorel_shutdown(DestReceiver *self); static void intorel_destroy(DestReceiver *self); /* * create_ctas_internal * * Internal utility used for the creation of the definition of a relation * created via CREATE TABLE AS or a materialized view. Caller needs to * provide a list of attributes (ColumnDef nodes). */ static ObjectAddress create_ctas_internal(List *attrList, IntoClause *into) { CreateStmt *create = makeNode(CreateStmt); bool is_matview; char relkind; Datum toast_options; static char *validnsps[] = HEAP_RELOPT_NAMESPACES; ObjectAddress intoRelationAddr; /* This code supports both CREATE TABLE AS and CREATE MATERIALIZED VIEW */ is_matview = (into->viewQuery != NULL); relkind = is_matview ? RELKIND_MATVIEW : RELKIND_RELATION; /* * Create the target relation by faking up a CREATE TABLE parsetree and * passing it to DefineRelation. */ create->relation = into->rel; create->tableElts = attrList; create->inhRelations = NIL; create->ofTypename = NULL; create->constraints = NIL; create->options = into->options; create->oncommit = into->onCommit; create->tablespacename = into->tableSpaceName; create->if_not_exists = false; create->accessMethod = into->accessMethod; /* * Create the relation. (This will error out if there's an existing view, * so we don't need more code to complain if "replace" is false.) */ intoRelationAddr = DefineRelation(create, relkind, InvalidOid, NULL, NULL); /* * If necessary, create a TOAST table for the target table. Note that * NewRelationCreateToastTable ends with CommandCounterIncrement(), so * that the TOAST table will be visible for insertion. */ CommandCounterIncrement(); /* parse and validate reloptions for the toast table */ toast_options = transformRelOptions((Datum) 0, create->options, "toast", validnsps, true, false); (void) heap_reloptions(RELKIND_TOASTVALUE, toast_options, true); NewRelationCreateToastTable(intoRelationAddr.objectId, toast_options); /* Create the "view" part of a materialized view. */ if (is_matview) { /* StoreViewQuery scribbles on tree, so make a copy */ Query *query = (Query *) copyObject(into->viewQuery); StoreViewQuery(intoRelationAddr.objectId, query, false); CommandCounterIncrement(); } return intoRelationAddr; } /* * create_ctas_nodata * * Create CTAS or materialized view when WITH NO DATA is used, starting from * the targetlist of the SELECT or view definition. */ static ObjectAddress create_ctas_nodata(List *tlist, IntoClause *into) { List *attrList; ListCell *t, *lc; /* * Build list of ColumnDefs from non-junk elements of the tlist. If a * column name list was specified in CREATE TABLE AS, override the column * names in the query. (Too few column names are OK, too many are not.) */ attrList = NIL; lc = list_head(into->colNames); foreach(t, tlist) { TargetEntry *tle = (TargetEntry *) lfirst(t); if (!tle->resjunk) { ColumnDef *col; char *colname; if (lc) { colname = strVal(lfirst(lc)); lc = lnext(into->colNames, lc); } else colname = tle->resname; col = makeColumnDef(colname, exprType((Node *) tle->expr), exprTypmod((Node *) tle->expr), exprCollation((Node *) tle->expr)); /* * It's possible that the column is of a collatable type but the * collation could not be resolved, so double-check. (We must * check this here because DefineRelation would adopt the type's * default collation rather than complaining.) */ if (!OidIsValid(col->collOid) && type_is_collatable(col->typeName->typeOid)) ereport(ERROR, (errcode(ERRCODE_INDETERMINATE_COLLATION), errmsg("no collation was derived for column \"%s\" with collatable type %s", col->colname, format_type_be(col->typeName->typeOid)), errhint("Use the COLLATE clause to set the collation explicitly."))); attrList = lappend(attrList, col); } } if (lc != NULL) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("too many column names were specified"))); /* Create the relation definition using the ColumnDef list */ return create_ctas_internal(attrList, into); } /* * ExecCreateTableAs -- execute a CREATE TABLE AS command */ ObjectAddress ExecCreateTableAs(ParseState *pstate, CreateTableAsStmt *stmt, ParamListInfo params, QueryEnvironment *queryEnv, QueryCompletion *qc) { Query *query = castNode(Query, stmt->query); IntoClause *into = stmt->into; bool is_matview = (into->viewQuery != NULL); DestReceiver *dest; Oid save_userid = InvalidOid; int save_sec_context = 0; int save_nestlevel = 0; ObjectAddress address; List *rewritten; PlannedStmt *plan; QueryDesc *queryDesc; /* Check if the relation exists or not */ if (CreateTableAsRelExists(stmt)) return InvalidObjectAddress; /* * Create the tuple receiver object and insert info it will need */ dest = CreateIntoRelDestReceiver(into); /* * The contained Query could be a SELECT, or an EXECUTE utility command. * If the latter, we just pass it off to ExecuteQuery. */ if (query->commandType == CMD_UTILITY && IsA(query->utilityStmt, ExecuteStmt)) { ExecuteStmt *estmt = castNode(ExecuteStmt, query->utilityStmt); Assert(!is_matview); /* excluded by syntax */ ExecuteQuery(pstate, estmt, into, params, dest, qc); /* get object address that intorel_startup saved for us */ address = ((DR_intorel *) dest)->reladdr; return address; } Assert(query->commandType == CMD_SELECT); /* * For materialized views, lock down security-restricted operations and * arrange to make GUC variable changes local to this command. This is * not necessary for security, but this keeps the behavior similar to * REFRESH MATERIALIZED VIEW. Otherwise, one could create a materialized * view not possible to refresh. */ if (is_matview) { GetUserIdAndSecContext(&save_userid, &save_sec_context); SetUserIdAndSecContext(save_userid, save_sec_context | SECURITY_RESTRICTED_OPERATION); save_nestlevel = NewGUCNestLevel(); } if (into->skipData) { /* * If WITH NO DATA was specified, do not go through the rewriter, * planner and executor. Just define the relation using a code path * similar to CREATE VIEW. This avoids dump/restore problems stemming * from running the planner before all dependencies are set up. */ address = create_ctas_nodata(query->targetList, into); } else { /* * Parse analysis was done already, but we still have to run the rule * rewriter. We do not do AcquireRewriteLocks: we assume the query * either came straight from the parser, or suitable locks were * acquired by plancache.c. */ rewritten = QueryRewrite(query); /* SELECT should never rewrite to more or less than one SELECT query */ if (list_length(rewritten) != 1) elog(ERROR, "unexpected rewrite result for %s", is_matview ? "CREATE MATERIALIZED VIEW" : "CREATE TABLE AS SELECT"); query = linitial_node(Query, rewritten); Assert(query->commandType == CMD_SELECT); /* plan the query */ plan = pg_plan_query(query, pstate->p_sourcetext, CURSOR_OPT_PARALLEL_OK, params); /* * Use a snapshot with an updated command ID to ensure this query sees * results of any previously executed queries. (This could only * matter if the planner executed an allegedly-stable function that * changed the database contents, but let's do it anyway to be * parallel to the EXPLAIN code path.) */ PushCopiedSnapshot(GetActiveSnapshot()); UpdateActiveSnapshotCommandId(); /* Create a QueryDesc, redirecting output to our tuple receiver */ queryDesc = CreateQueryDesc(plan, pstate->p_sourcetext, GetActiveSnapshot(), InvalidSnapshot, dest, params, queryEnv, 0); /* call ExecutorStart to prepare the plan for execution */ ExecutorStart(queryDesc, GetIntoRelEFlags(into)); /* run the plan to completion */ ExecutorRun(queryDesc, ForwardScanDirection, 0, true); /* save the rowcount if we're given a qc to fill */ if (qc) SetQueryCompletion(qc, CMDTAG_SELECT, queryDesc->estate->es_processed); /* get object address that intorel_startup saved for us */ address = ((DR_intorel *) dest)->reladdr; /* and clean up */ ExecutorFinish(queryDesc); ExecutorEnd(queryDesc); FreeQueryDesc(queryDesc); PopActiveSnapshot(); } if (is_matview) { /* Roll back any GUC changes */ AtEOXact_GUC(false, save_nestlevel); /* Restore userid and security context */ SetUserIdAndSecContext(save_userid, save_sec_context); } return address; } /* * GetIntoRelEFlags --- compute executor flags needed for CREATE TABLE AS * * This is exported because EXPLAIN and PREPARE need it too. (Note: those * callers still need to deal explicitly with the skipData flag; since they * use different methods for suppressing execution, it doesn't seem worth * trying to encapsulate that part.) */ int GetIntoRelEFlags(IntoClause *intoClause) { int flags = 0; if (intoClause->skipData) flags |= EXEC_FLAG_WITH_NO_DATA; return flags; } /* * CreateTableAsRelExists --- check existence of relation for CreateTableAsStmt * * Utility wrapper checking if the relation pending for creation in this * CreateTableAsStmt query already exists or not. Returns true if the * relation exists, otherwise false. */ bool CreateTableAsRelExists(CreateTableAsStmt *ctas) { Oid nspid; Oid oldrelid; ObjectAddress address; IntoClause *into = ctas->into; nspid = RangeVarGetCreationNamespace(into->rel); oldrelid = get_relname_relid(into->rel->relname, nspid); if (OidIsValid(oldrelid)) { if (!ctas->if_not_exists) ereport(ERROR, (errcode(ERRCODE_DUPLICATE_TABLE), errmsg("relation \"%s\" already exists", into->rel->relname))); /* * The relation exists and IF NOT EXISTS has been specified. * * If we are in an extension script, insist that the pre-existing * object be a member of the extension, to avoid security risks. */ ObjectAddressSet(address, RelationRelationId, oldrelid); checkMembershipInCurrentExtension(&address); /* OK to skip */ ereport(NOTICE, (errcode(ERRCODE_DUPLICATE_TABLE), errmsg("relation \"%s\" already exists, skipping", into->rel->relname))); return true; } /* Relation does not exist, it can be created */ return false; } /* * CreateIntoRelDestReceiver -- create a suitable DestReceiver object * * intoClause will be NULL if called from CreateDestReceiver(), in which * case it has to be provided later. However, it is convenient to allow * self->into to be filled in immediately for other callers. */ DestReceiver * CreateIntoRelDestReceiver(IntoClause *intoClause) { DR_intorel *self = (DR_intorel *) palloc0(sizeof(DR_intorel)); self->pub.receiveSlot = intorel_receive; self->pub.rStartup = intorel_startup; self->pub.rShutdown = intorel_shutdown; self->pub.rDestroy = intorel_destroy; self->pub.mydest = DestIntoRel; self->into = intoClause; /* other private fields will be set during intorel_startup */ return (DestReceiver *) self; } /* * intorel_startup --- executor startup */ static void intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo) { DR_intorel *myState = (DR_intorel *) self; IntoClause *into = myState->into; bool is_matview; List *attrList; ObjectAddress intoRelationAddr; Relation intoRelationDesc; ListCell *lc; int attnum; Assert(into != NULL); /* else somebody forgot to set it */ /* This code supports both CREATE TABLE AS and CREATE MATERIALIZED VIEW */ is_matview = (into->viewQuery != NULL); /* * Build column definitions using "pre-cooked" type and collation info. If * a column name list was specified in CREATE TABLE AS, override the * column names derived from the query. (Too few column names are OK, too * many are not.) */ attrList = NIL; lc = list_head(into->colNames); for (attnum = 0; attnum < typeinfo->natts; attnum++) { Form_pg_attribute attribute = TupleDescAttr(typeinfo, attnum); ColumnDef *col; char *colname; if (lc) { colname = strVal(lfirst(lc)); lc = lnext(into->colNames, lc); } else colname = NameStr(attribute->attname); col = makeColumnDef(colname, attribute->atttypid, attribute->atttypmod, attribute->attcollation); /* * It's possible that the column is of a collatable type but the * collation could not be resolved, so double-check. (We must check * this here because DefineRelation would adopt the type's default * collation rather than complaining.) */ if (!OidIsValid(col->collOid) && type_is_collatable(col->typeName->typeOid)) ereport(ERROR, (errcode(ERRCODE_INDETERMINATE_COLLATION), errmsg("no collation was derived for column \"%s\" with collatable type %s", col->colname, format_type_be(col->typeName->typeOid)), errhint("Use the COLLATE clause to set the collation explicitly."))); attrList = lappend(attrList, col); } if (lc != NULL) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("too many column names were specified"))); /* * Actually create the target table */ intoRelationAddr = create_ctas_internal(attrList, into); /* * Finally we can open the target table */ intoRelationDesc = table_open(intoRelationAddr.objectId, AccessExclusiveLock); /* * Make sure the constructed table does not have RLS enabled. * * check_enable_rls() will ereport(ERROR) itself if the user has requested * something invalid, and otherwise will return RLS_ENABLED if RLS should * be enabled here. We don't actually support that currently, so throw * our own ereport(ERROR) if that happens. */ if (check_enable_rls(intoRelationAddr.objectId, InvalidOid, false) == RLS_ENABLED) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("policies not yet implemented for this command"))); /* * Tentatively mark the target as populated, if it's a matview and we're * going to fill it; otherwise, no change needed. */ if (is_matview && !into->skipData) SetMatViewPopulatedState(intoRelationDesc, true); /* * Fill private fields of myState for use by later routines */ myState->rel = intoRelationDesc; myState->reladdr = intoRelationAddr; myState->output_cid = GetCurrentCommandId(true); myState->ti_options = TABLE_INSERT_SKIP_FSM; /* * If WITH NO DATA is specified, there is no need to set up the state for * bulk inserts as there are no tuples to insert. */ if (!into->skipData) myState->bistate = GetBulkInsertState(); else myState->bistate = NULL; /* * Valid smgr_targblock implies something already wrote to the relation. * This may be harmless, but this function hasn't planned for it. */ Assert(RelationGetTargetBlock(intoRelationDesc) == InvalidBlockNumber); } /* * intorel_receive --- receive one tuple */ static bool intorel_receive(TupleTableSlot *slot, DestReceiver *self) { DR_intorel *myState = (DR_intorel *) self; /* Nothing to insert if WITH NO DATA is specified. */ if (!myState->into->skipData) { /* * Note that the input slot might not be of the type of the target * relation. That's supported by table_tuple_insert(), but slightly * less efficient than inserting with the right slot - but the * alternative would be to copy into a slot of the right type, which * would not be cheap either. This also doesn't allow accessing per-AM * data (say a tuple's xmin), but since we don't do that here... */ table_tuple_insert(myState->rel, slot, myState->output_cid, myState->ti_options, myState->bistate); } /* We know this is a newly created relation, so there are no indexes */ return true; } /* * intorel_shutdown --- executor end */ static void intorel_shutdown(DestReceiver *self) { DR_intorel *myState = (DR_intorel *) self; IntoClause *into = myState->into; if (!into->skipData) { FreeBulkInsertState(myState->bistate); table_finish_bulk_insert(myState->rel, myState->ti_options); } /* close rel, but keep lock until commit */ table_close(myState->rel, NoLock); myState->rel = NULL; } /* * intorel_destroy --- release DestReceiver object */ static void intorel_destroy(DestReceiver *self) { pfree(self); }