diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index 6584a9cb8d..4d179881f2 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -566,10 +566,14 @@ heapam_finish_bulk_insert(Relation relation, int options) */ static void -heapam_relation_set_new_filenode(Relation rel, char persistence, +heapam_relation_set_new_filenode(Relation rel, + const RelFileNode *newrnode, + char persistence, TransactionId *freezeXid, MultiXactId *minmulti) { + SMgrRelation srel; + /* * Initialize to the minimum XID that could put tuples in the table. We * know that no xacts older than RecentXmin are still running, so that @@ -587,7 +591,7 @@ heapam_relation_set_new_filenode(Relation rel, char persistence, */ *minmulti = GetOldestMultiXactId(); - RelationCreateStorage(rel->rd_node, persistence); + srel = RelationCreateStorage(*newrnode, persistence); /* * If required, set up an init fork for an unlogged table so that it can @@ -598,16 +602,17 @@ heapam_relation_set_new_filenode(Relation rel, char persistence, * while replaying, for example, XLOG_DBASE_CREATE or XLOG_TBLSPC_CREATE * record. Therefore, logging is necessary even if wal_level=minimal. */ - if (rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED) + if (persistence == RELPERSISTENCE_UNLOGGED) { Assert(rel->rd_rel->relkind == RELKIND_RELATION || rel->rd_rel->relkind == RELKIND_MATVIEW || rel->rd_rel->relkind == RELKIND_TOASTVALUE); - RelationOpenSmgr(rel); - smgrcreate(rel->rd_smgr, INIT_FORKNUM, false); - log_smgrcreate(&rel->rd_smgr->smgr_rnode.node, INIT_FORKNUM); - smgrimmedsync(rel->rd_smgr, INIT_FORKNUM); + smgrcreate(srel, INIT_FORKNUM, false); + log_smgrcreate(newrnode, INIT_FORKNUM); + smgrimmedsync(srel, INIT_FORKNUM); } + + smgrclose(srel); } static void @@ -617,13 +622,21 @@ heapam_relation_nontransactional_truncate(Relation rel) } static void -heapam_relation_copy_data(Relation rel, RelFileNode newrnode) +heapam_relation_copy_data(Relation rel, const RelFileNode *newrnode) { SMgrRelation dstrel; - dstrel = smgropen(newrnode, rel->rd_backend); + dstrel = smgropen(*newrnode, rel->rd_backend); RelationOpenSmgr(rel); + /* + * Since we copy the file directly without looking at the shared buffers, + * we'd better first flush out any pages of the source relation that are + * in shared buffers. We assume no new changes will be made while we are + * holding exclusive lock on the rel. + */ + FlushRelationBuffers(rel); + /* * Create and copy all forks of the relation, and schedule unlinking of * old physical files. @@ -631,7 +644,7 @@ heapam_relation_copy_data(Relation rel, RelFileNode newrnode) * NOTE: any conflict in relfilenode value will be caught in * RelationCreateStorage(). */ - RelationCreateStorage(newrnode, rel->rd_rel->relpersistence); + RelationCreateStorage(*newrnode, rel->rd_rel->relpersistence); /* copy main fork */ RelationCopyStorage(rel->rd_smgr, dstrel, MAIN_FORKNUM, @@ -652,7 +665,7 @@ heapam_relation_copy_data(Relation rel, RelFileNode newrnode) if (rel->rd_rel->relpersistence == RELPERSISTENCE_PERMANENT || (rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED && forkNum == INIT_FORKNUM)) - log_smgrcreate(&newrnode, forkNum); + log_smgrcreate(newrnode, forkNum); RelationCopyStorage(rel->rd_smgr, dstrel, forkNum, rel->rd_rel->relpersistence); } diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index 6b77eff0af..ee6b72e550 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -435,8 +435,9 @@ heap_create(const char *relname, case RELKIND_RELATION: case RELKIND_TOASTVALUE: case RELKIND_MATVIEW: - table_relation_set_new_filenode(rel, relpersistence, - relfrozenxid, relminmxid); + table_relation_set_new_filenode(rel, &rel->rd_node, + relpersistence, + relfrozenxid, relminmxid); break; } } diff --git a/src/backend/catalog/storage.c b/src/backend/catalog/storage.c index 72242b2476..fb41f223ad 100644 --- a/src/backend/catalog/storage.c +++ b/src/backend/catalog/storage.c @@ -75,7 +75,7 @@ static PendingRelDelete *pendingDeletes = NULL; /* head of linked list */ * This function is transactional. The creation is WAL-logged, and if the * transaction aborts later on, the storage will be destroyed. */ -void +SMgrRelation RelationCreateStorage(RelFileNode rnode, char relpersistence) { PendingRelDelete *pending; @@ -99,7 +99,7 @@ RelationCreateStorage(RelFileNode rnode, char relpersistence) break; default: elog(ERROR, "invalid relpersistence: %c", relpersistence); - return; /* placate compiler */ + return NULL; /* placate compiler */ } srel = smgropen(rnode, backend); @@ -117,13 +117,15 @@ RelationCreateStorage(RelFileNode rnode, char relpersistence) pending->nestLevel = GetCurrentTransactionNestLevel(); pending->next = pendingDeletes; pendingDeletes = pending; + + return srel; } /* * Perform XLogInsert of an XLOG_SMGR_CREATE record to WAL. */ void -log_smgrcreate(RelFileNode *rnode, ForkNumber forkNum) +log_smgrcreate(const RelFileNode *rnode, ForkNumber forkNum) { xl_smgr_create xlrec; @@ -294,6 +296,10 @@ RelationTruncate(Relation rel, BlockNumber nblocks) /* * Copy a fork's data, block by block. + * + * Note that this requires that there is no dirty data in shared buffers. If + * it's possible that there are, callers need to flush those using + * e.g. FlushRelationBuffers(rel). */ void RelationCopyStorage(SMgrRelation src, SMgrRelation dst, diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 14fcad9034..2d0ef92bad 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -12236,14 +12236,6 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace, LOCKMODE lockmode) elog(ERROR, "cache lookup failed for relation %u", tableOid); rd_rel = (Form_pg_class) GETSTRUCT(tuple); - /* - * Since we copy the file directly without looking at the shared buffers, - * we'd better first flush out any pages of the source relation that are - * in shared buffers. We assume no new changes will be made while we are - * holding exclusive lock on the rel. - */ - FlushRelationBuffers(rel); - /* * Relfilenodes are not unique in databases across tablespaces, so we need * to allocate a new one in the new tablespace. @@ -12266,10 +12258,16 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace, LOCKMODE lockmode) Assert(rel->rd_rel->relkind == RELKIND_RELATION || rel->rd_rel->relkind == RELKIND_MATVIEW || rel->rd_rel->relkind == RELKIND_TOASTVALUE); - table_relation_copy_data(rel, newrnode); + table_relation_copy_data(rel, &newrnode); } - /* update the pg_class row */ + /* + * Update the pg_class row. + * + * NB: This wouldn't work if ATExecSetTableSpace() were allowed to be + * executed on pg_class or its indexes (the above copy wouldn't contain + * the updated pg_class entry), but that's forbidden above. + */ rd_rel->reltablespace = (newTableSpace == MyDatabaseTableSpace) ? InvalidOid : newTableSpace; rd_rel->relfilenode = newrelfilenode; CatalogTupleUpdate(pg_class, &tuple->t_self, tuple); @@ -12537,6 +12535,14 @@ index_copy_data(Relation rel, RelFileNode newrnode) dstrel = smgropen(newrnode, rel->rd_backend); RelationOpenSmgr(rel); + /* + * Since we copy the file directly without looking at the shared buffers, + * we'd better first flush out any pages of the source relation that are + * in shared buffers. We assume no new changes will be made while we are + * holding exclusive lock on the rel. + */ + FlushRelationBuffers(rel); + /* * Create and copy all forks of the relation, and schedule unlinking of * old physical files. diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index bab59f16e6..90ff8ccf54 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -3440,6 +3440,7 @@ RelationSetNewRelfilenode(Relation relation, char persistence) Form_pg_class classform; MultiXactId minmulti = InvalidMultiXactId; TransactionId freezeXid = InvalidTransactionId; + RelFileNode newrnode; /* Allocate a new relfilenode */ newrelfilenode = GetNewRelFileNode(relation->rd_rel->reltablespace, NULL, @@ -3462,39 +3463,23 @@ RelationSetNewRelfilenode(Relation relation, char persistence) */ RelationDropStorage(relation); - /* - * Now update the pg_class row. However, if we're dealing with a mapped - * index, pg_class.relfilenode doesn't change; instead we have to send the - * update to the relation mapper. - */ - if (RelationIsMapped(relation)) - RelationMapUpdateMap(RelationGetRelid(relation), - newrelfilenode, - relation->rd_rel->relisshared, - true); - else - { - relation->rd_rel->relfilenode = newrelfilenode; - classform->relfilenode = newrelfilenode; - } - - RelationInitPhysicalAddr(relation); + /* initialize new relfilenode from old relfilenode */ + newrnode = relation->rd_node; /* * Create storage for the main fork of the new relfilenode. If it's * table-like object, call into table AM to do so, which'll also create * the table's init fork. * - * NOTE: any conflict in relfilenode value will be caught here, if - * GetNewRelFileNode messes up for any reason. + * NOTE: If relevant for the AM, any conflict in relfilenode value will be + * caught here, if GetNewRelFileNode messes up for any reason. */ + newrnode = relation->rd_node; + newrnode.relNode = newrelfilenode; - /* - * Create storage for relation. - */ switch (relation->rd_rel->relkind) { - /* shouldn't be called for these */ + /* shouldn't be called for these */ case RELKIND_VIEW: case RELKIND_COMPOSITE_TYPE: case RELKIND_FOREIGN_TABLE: @@ -3505,18 +3490,36 @@ RelationSetNewRelfilenode(Relation relation, char persistence) case RELKIND_INDEX: case RELKIND_SEQUENCE: - RelationCreateStorage(relation->rd_node, persistence); - RelationOpenSmgr(relation); + { + SMgrRelation srel; + + srel = RelationCreateStorage(newrnode, persistence); + smgrclose(srel); + } break; case RELKIND_RELATION: case RELKIND_TOASTVALUE: case RELKIND_MATVIEW: - table_relation_set_new_filenode(relation, persistence, + table_relation_set_new_filenode(relation, &newrnode, + persistence, &freezeXid, &minmulti); break; } + /* + * However, if we're dealing with a mapped index, pg_class.relfilenode + * doesn't change; instead we have to send the update to the relation + * mapper. + */ + if (RelationIsMapped(relation)) + RelationMapUpdateMap(RelationGetRelid(relation), + newrelfilenode, + relation->rd_rel->relisshared, + false); + else + classform->relfilenode = newrelfilenode; + /* These changes are safe even for a mapped relation */ if (relation->rd_rel->relkind != RELKIND_SEQUENCE) { diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index c018a44267..ebfa0d5185 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -416,7 +416,12 @@ typedef struct TableAmRoutine * This callback needs to create a new relation filenode for `rel`, with * appropriate durability behaviour for `persistence`. * - * On output *freezeXid, *minmulti must be set to the values appropriate + * Note that only the subset of the relcache filled by + * RelationBuildLocalRelation() can be relied upon and that the relation's + * catalog entries either will either not yet exist (new relation), or + * will still reference the old relfilenode. + * + * As output *freezeXid, *minmulti must be set to the values appropriate * for pg_class.{relfrozenxid, relminmxid}. For AMs that don't need those * fields to be filled they can be set to InvalidTransactionId and * InvalidMultiXactId, respectively. @@ -424,6 +429,7 @@ typedef struct TableAmRoutine * See also table_relation_set_new_filenode(). */ void (*relation_set_new_filenode) (Relation rel, + const RelFileNode *newrnode, char persistence, TransactionId *freezeXid, MultiXactId *minmulti); @@ -444,7 +450,8 @@ typedef struct TableAmRoutine * This can typically be implemented by directly copying the underlying * storage, unless it contains references to the tablespace internally. */ - void (*relation_copy_data) (Relation rel, RelFileNode newrnode); + void (*relation_copy_data) (Relation rel, + const RelFileNode *newrnode); /* See table_relation_copy_for_cluster() */ void (*relation_copy_for_cluster) (Relation NewHeap, @@ -1251,21 +1258,25 @@ table_finish_bulk_insert(Relation rel, int options) */ /* - * Create a new relation filenode for `rel`, with persistence set to + * Create storage for `rel` in `newrode`, with persistence set to * `persistence`. * * This is used both during relation creation and various DDL operations to - * create a new relfilenode that can be filled from scratch. + * create a new relfilenode that can be filled from scratch. When creating + * new storage for an existing relfilenode, this should be called before the + * relcache entry has been updated. * * *freezeXid, *minmulti are set to the xid / multixact horizon for the table * that pg_class.{relfrozenxid, relminmxid} have to be set to. */ static inline void -table_relation_set_new_filenode(Relation rel, char persistence, +table_relation_set_new_filenode(Relation rel, + const RelFileNode *newrnode, + char persistence, TransactionId *freezeXid, MultiXactId *minmulti) { - rel->rd_tableam->relation_set_new_filenode(rel, persistence, + rel->rd_tableam->relation_set_new_filenode(rel, newrnode, persistence, freezeXid, minmulti); } @@ -1288,7 +1299,7 @@ table_relation_nontransactional_truncate(Relation rel) * changing a relation's tablespace. */ static inline void -table_relation_copy_data(Relation rel, RelFileNode newrnode) +table_relation_copy_data(Relation rel, const RelFileNode *newrnode) { rel->rd_tableam->relation_copy_data(rel, newrnode); } diff --git a/src/include/catalog/storage.h b/src/include/catalog/storage.h index 882dc65c89..3579d3f3eb 100644 --- a/src/include/catalog/storage.h +++ b/src/include/catalog/storage.h @@ -19,7 +19,7 @@ #include "storage/smgr.h" #include "utils/relcache.h" -extern void RelationCreateStorage(RelFileNode rnode, char relpersistence); +extern SMgrRelation RelationCreateStorage(RelFileNode rnode, char relpersistence); extern void RelationDropStorage(Relation rel); extern void RelationPreserveStorage(RelFileNode rnode, bool atCommit); extern void RelationTruncate(Relation rel, BlockNumber nblocks); diff --git a/src/include/catalog/storage_xlog.h b/src/include/catalog/storage_xlog.h index dfca3611ea..40419efd37 100644 --- a/src/include/catalog/storage_xlog.h +++ b/src/include/catalog/storage_xlog.h @@ -50,7 +50,7 @@ typedef struct xl_smgr_truncate int flags; } xl_smgr_truncate; -extern void log_smgrcreate(RelFileNode *rnode, ForkNumber forkNum); +extern void log_smgrcreate(const RelFileNode *rnode, ForkNumber forkNum); extern void smgr_redo(XLogReaderState *record); extern void smgr_desc(StringInfo buf, XLogReaderState *record);