diff --git a/doc/src/sgml/ref/create_table.sgml b/doc/src/sgml/ref/create_table.sgml index d3e33132f3..94f7651c34 100644 --- a/doc/src/sgml/ref/create_table.sgml +++ b/doc/src/sgml/ref/create_table.sgml @@ -1216,7 +1216,11 @@ WITH ( MODULUS numeric_literal, REM of the tablespace in which the new table is to be created. If not specified, is consulted, or - if the table is temporary. + if the table is temporary. For + partitioned tables, since no storage is required for the table itself, + the tablespace specified here only serves to mark the default tablespace + for any newly created partitions when no other tablespace is explicitly + specified. diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index 651e86b71e..4d5b82aaa9 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -333,7 +333,6 @@ heap_create(const char *relname, case RELKIND_VIEW: case RELKIND_COMPOSITE_TYPE: case RELKIND_FOREIGN_TABLE: - case RELKIND_PARTITIONED_TABLE: create_storage = false; /* @@ -343,10 +342,11 @@ heap_create(const char *relname, reltablespace = InvalidOid; break; + case RELKIND_PARTITIONED_TABLE: case RELKIND_PARTITIONED_INDEX: /* - * Preserve tablespace so that it's used as tablespace for indexes - * on future partitions. + * For partitioned tables and indexes, preserve tablespace so that + * it's used as the tablespace for future partitions. */ create_storage = false; break; diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 9149dfd328..eef3b3a26c 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -305,7 +305,7 @@ static void truncate_check_activity(Relation rel); static void RangeVarCallbackForTruncate(const RangeVar *relation, Oid relId, Oid oldRelId, void *arg); static List *MergeAttributes(List *schema, List *supers, char relpersistence, - bool is_partition, List **supOids, List **supconstr); + bool is_partition, List **supconstr); static bool MergeCheckConstraint(List *constraints, char *name, Node *expr); static void MergeAttributesIntoExisting(Relation child_rel, Relation parent_rel); static void MergeConstraintsIntoExisting(Relation child_rel, Relation parent_rel); @@ -446,7 +446,7 @@ static bool ATPrepChangePersistence(Relation rel, bool toLogged); static void ATPrepSetTableSpace(AlteredTableInfo *tab, Relation rel, const char *tablespacename, LOCKMODE lockmode); static void ATExecSetTableSpace(Oid tableOid, Oid newTableSpace, LOCKMODE lockmode); -static void ATExecPartedIdxSetTableSpace(Relation rel, Oid newTableSpace); +static void ATExecSetTableSpaceNoStorage(Relation rel, Oid newTableSpace); static void ATExecSetRelOptions(Relation rel, List *defList, AlterTableType operation, LOCKMODE lockmode); @@ -536,6 +536,7 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId, static char *validnsps[] = HEAP_RELOPT_NAMESPACES; Oid ofTypeId; ObjectAddress address; + LOCKMODE parentLockmode; /* * Truncate relname to appropriate length (probably a waste of time, as @@ -580,6 +581,46 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("cannot create temporary table within security-restricted operation"))); + /* + * Determine the lockmode to use when scanning parents. A self-exclusive + * lock is needed here. + * + * For regular inheritance, if two backends attempt to add children to the + * same parent simultaneously, and that parent has no pre-existing + * children, then both will attempt to update the parent's relhassubclass + * field, leading to a "tuple concurrently updated" error. Also, this + * interlocks against a concurrent ANALYZE on the parent table, which + * might otherwise be attempting to clear the parent's relhassubclass + * field, if its previous children were recently dropped. + * + * If the child table is a partition, then we instead grab an exclusive + * lock on the parent because its partition descriptor will be changed by + * addition of the new partition. + */ + parentLockmode = (stmt->partbound != NULL ? AccessExclusiveLock : + ShareUpdateExclusiveLock); + + /* Determine the list of OIDs of the parents. */ + inheritOids = NIL; + foreach(listptr, stmt->inhRelations) + { + RangeVar *rv = (RangeVar *) lfirst(listptr); + Oid parentOid; + + parentOid = RangeVarGetRelid(rv, parentLockmode, false); + + /* + * Reject duplications in the list of parents. + */ + if (list_member_oid(inheritOids, parentOid)) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_TABLE), + errmsg("relation \"%s\" would be inherited from more than once", + get_rel_name(parentOid)))); + + inheritOids = lappend_oid(inheritOids, parentOid); + } + /* * Select tablespace to use. If not specified, use default tablespace * (which may in turn default to database's default). @@ -588,6 +629,25 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId, { tablespaceId = get_tablespace_oid(stmt->tablespacename, false); } + else if (stmt->partbound) + { + HeapTuple tup; + + /* + * For partitions, when no other tablespace is specified, we default + * the tablespace to the parent partitioned table's. + */ + Assert(list_length(inheritOids) == 1); + tup = SearchSysCache1(RELOID, + DatumGetObjectId(linitial_oid(inheritOids))); + + tablespaceId = ((Form_pg_class) GETSTRUCT(tup))->reltablespace; + + if (!OidIsValid(tablespaceId)) + tablespaceId = GetDefaultTablespace(stmt->relation->relpersistence); + + ReleaseSysCache(tup); + } else { tablespaceId = GetDefaultTablespace(stmt->relation->relpersistence); @@ -646,10 +706,10 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId, * modified by MergeAttributes.) */ stmt->tableElts = - MergeAttributes(stmt->tableElts, stmt->inhRelations, + MergeAttributes(stmt->tableElts, inheritOids, stmt->relation->relpersistence, stmt->partbound != NULL, - &inheritOids, &old_constraints); + &old_constraints); /* * Create a tuple descriptor from the relation schema. Note that this @@ -1781,12 +1841,11 @@ storage_name(char c) * Input arguments: * 'schema' is the column/attribute definition for the table. (It's a list * of ColumnDef's.) It is destructively changed. - * 'supers' is a list of names (as RangeVar nodes) of parent relations. + * 'supers' is a list of OIDs of parent relations, already locked by caller. * 'relpersistence' is a persistence type of the table. * 'is_partition' tells if the table is a partition * * Output arguments: - * 'supOids' receives a list of the OIDs of the parent relations. * 'supconstr' receives a list of constraints belonging to the parents, * updated as necessary to be valid for the child. * @@ -1834,11 +1893,10 @@ storage_name(char c) */ static List * MergeAttributes(List *schema, List *supers, char relpersistence, - bool is_partition, List **supOids, List **supconstr) + bool is_partition, List **supconstr) { ListCell *entry; List *inhSchema = NIL; - List *parentOids = NIL; List *constraints = NIL; bool have_bogus_defaults = false; int child_attno; @@ -1939,31 +1997,15 @@ MergeAttributes(List *schema, List *supers, char relpersistence, child_attno = 0; foreach(entry, supers) { - RangeVar *parent = (RangeVar *) lfirst(entry); + Oid parent = lfirst_oid(entry); Relation relation; TupleDesc tupleDesc; TupleConstr *constr; AttrNumber *newattno; AttrNumber parent_attno; - /* - * A self-exclusive lock is needed here. If two backends attempt to - * add children to the same parent simultaneously, and that parent has - * no pre-existing children, then both will attempt to update the - * parent's relhassubclass field, leading to a "tuple concurrently - * updated" error. Also, this interlocks against a concurrent ANALYZE - * on the parent table, which might otherwise be attempting to clear - * the parent's relhassubclass field, if its previous children were - * recently dropped. - * - * If the child table is a partition, then we instead grab an - * exclusive lock on the parent because its partition descriptor will - * be changed by addition of the new partition. - */ - if (!is_partition) - relation = heap_openrv(parent, ShareUpdateExclusiveLock); - else - relation = heap_openrv(parent, AccessExclusiveLock); + /* caller already got lock */ + relation = heap_open(parent, NoLock); /* * Check for active uses of the parent partitioned table in the @@ -1982,12 +2024,12 @@ MergeAttributes(List *schema, List *supers, char relpersistence, ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("cannot inherit from partitioned table \"%s\"", - parent->relname))); + RelationGetRelationName(relation)))); if (relation->rd_rel->relispartition && !is_partition) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("cannot inherit from partition \"%s\"", - parent->relname))); + RelationGetRelationName(relation)))); if (relation->rd_rel->relkind != RELKIND_RELATION && relation->rd_rel->relkind != RELKIND_FOREIGN_TABLE && @@ -1995,7 +2037,7 @@ MergeAttributes(List *schema, List *supers, char relpersistence, ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("inherited relation \"%s\" is not a table or foreign table", - parent->relname))); + RelationGetRelationName(relation)))); /* * If the parent is permanent, so must be all of its partitions. Note @@ -2017,7 +2059,7 @@ MergeAttributes(List *schema, List *supers, char relpersistence, errmsg(!is_partition ? "cannot inherit from temporary relation \"%s\"" : "cannot create a permanent relation as partition of temporary relation \"%s\"", - parent->relname))); + RelationGetRelationName(relation)))); /* If existing rel is temp, it must belong to this session */ if (relation->rd_rel->relpersistence == RELPERSISTENCE_TEMP && @@ -2036,17 +2078,6 @@ MergeAttributes(List *schema, List *supers, char relpersistence, aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(relation->rd_rel->relkind), RelationGetRelationName(relation)); - /* - * Reject duplications in the list of parents. - */ - if (list_member_oid(parentOids, RelationGetRelid(relation))) - ereport(ERROR, - (errcode(ERRCODE_DUPLICATE_TABLE), - errmsg("relation \"%s\" would be inherited from more than once", - parent->relname))); - - parentOids = lappend_oid(parentOids, RelationGetRelid(relation)); - tupleDesc = RelationGetDescr(relation); constr = tupleDesc->constr; @@ -2463,7 +2494,6 @@ MergeAttributes(List *schema, List *supers, char relpersistence, } } - *supOids = parentOids; *supconstr = constraints; return schema; } @@ -4157,11 +4187,13 @@ ATExecCmd(List **wqueue, AlteredTableInfo *tab, Relation rel, break; case AT_SetTableSpace: /* SET TABLESPACE */ /* - * Only do this for partitioned indexes, for which this is just - * a catalog change. Other relation types are handled by Phase 3. + * Only do this for partitioned tables and indexes, for which this + * is just a catalog change. Other relation types which have + * storage are handled by Phase 3. */ - if (rel->rd_rel->relkind == RELKIND_PARTITIONED_INDEX) - ATExecPartedIdxSetTableSpace(rel, tab->newTableSpace); + if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE || + rel->rd_rel->relkind == RELKIND_PARTITIONED_INDEX) + ATExecSetTableSpaceNoStorage(rel, tab->newTableSpace); break; case AT_SetRelOptions: /* SET (...) */ @@ -10935,19 +10967,26 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace, LOCKMODE lockmode) } /* - * Special handling of ALTER TABLE SET TABLESPACE for partitioned indexes, - * which have no storage (so not handled in Phase 3 like other relation types) + * Special handling of ALTER TABLE SET TABLESPACE for relations with no + * storage that have an interest in preserving tablespace. + * + * Since these have no storage the tablespace can be updated with a simple + * metadata only operation to update the tablespace. */ static void -ATExecPartedIdxSetTableSpace(Relation rel, Oid newTableSpace) +ATExecSetTableSpaceNoStorage(Relation rel, Oid newTableSpace) { HeapTuple tuple; Oid oldTableSpace; Relation pg_class; Form_pg_class rd_rel; - Oid indexOid = RelationGetRelid(rel); + Oid reloid = RelationGetRelid(rel); - Assert(rel->rd_rel->relkind == RELKIND_PARTITIONED_INDEX); + /* + * Shouldn't be called on relations having storage; these are processed + * in phase 3. + */ + Assert(!RELKIND_CAN_HAVE_STORAGE(rel->rd_rel->relkind)); /* Can't allow a non-shared relation in pg_global */ if (newTableSpace == GLOBALTABLESPACE_OID) @@ -10962,24 +11001,23 @@ ATExecPartedIdxSetTableSpace(Relation rel, Oid newTableSpace) if (newTableSpace == oldTableSpace || (newTableSpace == MyDatabaseTableSpace && oldTableSpace == 0)) { - InvokeObjectPostAlterHook(RelationRelationId, - indexOid, 0); + InvokeObjectPostAlterHook(RelationRelationId, reloid, 0); return; } /* Get a modifiable copy of the relation's pg_class row */ pg_class = heap_open(RelationRelationId, RowExclusiveLock); - tuple = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(indexOid)); + tuple = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(reloid)); if (!HeapTupleIsValid(tuple)) - elog(ERROR, "cache lookup failed for relation %u", indexOid); + elog(ERROR, "cache lookup failed for relation %u", reloid); rd_rel = (Form_pg_class) GETSTRUCT(tuple); /* update the pg_class row */ rd_rel->reltablespace = (newTableSpace == MyDatabaseTableSpace) ? InvalidOid : newTableSpace; CatalogTupleUpdate(pg_class, &tuple->t_self, tuple); - InvokeObjectPostAlterHook(RelationRelationId, indexOid, 0); + InvokeObjectPostAlterHook(RelationRelationId, reloid, 0); heap_freetuple(tuple); diff --git a/src/include/catalog/pg_class.h b/src/include/catalog/pg_class.h index 84e63c6d06..321c9a1973 100644 --- a/src/include/catalog/pg_class.h +++ b/src/include/catalog/pg_class.h @@ -122,6 +122,19 @@ typedef FormData_pg_class *Form_pg_class; */ #define REPLICA_IDENTITY_INDEX 'i' +/* + * Relation kinds that have physical storage. These relations normally have + * relfilenode set to non-zero, but it can also be zero if the relation is + * mapped. + */ +#define RELKIND_CAN_HAVE_STORAGE(relkind) \ + ((relkind) == RELKIND_RELATION || \ + (relkind) == RELKIND_INDEX || \ + (relkind) == RELKIND_SEQUENCE || \ + (relkind) == RELKIND_TOASTVALUE || \ + (relkind) == RELKIND_MATVIEW) + + #endif /* EXPOSE_TO_CLIENT_CODE */ #endif /* PG_CLASS_H */ diff --git a/src/test/regress/input/tablespace.source b/src/test/regress/input/tablespace.source index 2ac757cfab..47ae73af95 100644 --- a/src/test/regress/input/tablespace.source +++ b/src/test/regress/input/tablespace.source @@ -44,6 +44,18 @@ CREATE INDEX foo_idx on testschema.foo(i) TABLESPACE regress_tblspace; SELECT relname, spcname FROM pg_catalog.pg_tablespace t, pg_catalog.pg_class c where c.reltablespace = t.oid AND c.relname = 'foo_idx'; +-- partitioned table +CREATE TABLE testschema.part (a int) PARTITION BY LIST (a); +CREATE TABLE testschema.part12 PARTITION OF testschema.part FOR VALUES IN(1,2) PARTITION BY LIST (a) TABLESPACE regress_tblspace; +CREATE TABLE testschema.part12_1 PARTITION OF testschema.part12 FOR VALUES IN (1); +ALTER TABLE testschema.part12 SET TABLESPACE pg_default; +CREATE TABLE testschema.part12_2 PARTITION OF testschema.part12 FOR VALUES IN (2); +-- Ensure part12_1 defaulted to regress_tblspace and part12_2 defaulted to pg_default. +SELECT relname, spcname FROM pg_catalog.pg_class c + LEFT JOIN pg_catalog.pg_tablespace t ON c.reltablespace = t.oid + where c.relname LIKE 'part%' order by relname; +DROP TABLE testschema.part; + -- partitioned index CREATE TABLE testschema.part (a int) PARTITION BY LIST (a); CREATE TABLE testschema.part1 PARTITION OF testschema.part FOR VALUES IN (1); diff --git a/src/test/regress/output/tablespace.source b/src/test/regress/output/tablespace.source index 2e78e5ece6..29d6d2232b 100644 --- a/src/test/regress/output/tablespace.source +++ b/src/test/regress/output/tablespace.source @@ -61,6 +61,25 @@ SELECT relname, spcname FROM pg_catalog.pg_tablespace t, pg_catalog.pg_class c foo_idx | regress_tblspace (1 row) +-- partitioned table +CREATE TABLE testschema.part (a int) PARTITION BY LIST (a); +CREATE TABLE testschema.part12 PARTITION OF testschema.part FOR VALUES IN(1,2) PARTITION BY LIST (a) TABLESPACE regress_tblspace; +CREATE TABLE testschema.part12_1 PARTITION OF testschema.part12 FOR VALUES IN (1); +ALTER TABLE testschema.part12 SET TABLESPACE pg_default; +CREATE TABLE testschema.part12_2 PARTITION OF testschema.part12 FOR VALUES IN (2); +-- Ensure part12_1 defaulted to regress_tblspace and part12_2 defaulted to pg_default. +SELECT relname, spcname FROM pg_catalog.pg_class c + LEFT JOIN pg_catalog.pg_tablespace t ON c.reltablespace = t.oid + where c.relname LIKE 'part%' order by relname; + relname | spcname +----------+------------------ + part | + part12 | + part12_1 | regress_tblspace + part12_2 | +(4 rows) + +DROP TABLE testschema.part; -- partitioned index CREATE TABLE testschema.part (a int) PARTITION BY LIST (a); CREATE TABLE testschema.part1 PARTITION OF testschema.part FOR VALUES IN (1);