diff --git a/doc/src/sgml/storage.sgml b/doc/src/sgml/storage.sgml index c4b38ddb6c..46bb03432d 100644 --- a/doc/src/sgml/storage.sgml +++ b/doc/src/sgml/storage.sgml @@ -1,4 +1,4 @@ - + @@ -133,16 +133,20 @@ there. -Each table and index is stored in a separate file, named after the table -or index's filenode number, which can be found in -pg_class.relfilenode. In addition to the -main file (a/k/a main fork), each table and index has a free space -map (see ), which stores information about free -space available in the relation. The free space map is stored in a file named -with the filenode number plus the suffix _fsm. Tables also have a -visibility map, stored in a fork with the suffix -_vm, to track which pages are known to have no dead tuples. -The visibility map is described further in . +Each table and index is stored in a separate file. For ordinary relations, +these files are named after the table or index's filenode number, +which can be found in pg_class.relfilenode. But +for temporary relations, the file name is of the form +tBBB_FFF, where BBB +is the backend ID of the backend which created the file, and FFF +is the filenode number. In either case, in addition to the main file (a/k/a +main fork), each table and index has a free space map (see ), which stores information about free space available in +the relation. The free space map is stored in a file named with the filenode +number plus the suffix _fsm. Tables also have a +visibility map, stored in a fork with the suffix _vm, +to track which pages are known to have no dead tuples. The visibility map is +described further in . diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c index 88a0c74e32..9bc65acae5 100644 --- a/src/backend/access/heap/visibilitymap.c +++ b/src/backend/access/heap/visibilitymap.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/heap/visibilitymap.c,v 1.10 2010/04/23 23:21:44 rhaas Exp $ + * $PostgreSQL: pgsql/src/backend/access/heap/visibilitymap.c,v 1.11 2010/08/13 20:10:50 rhaas Exp $ * * INTERFACE ROUTINES * visibilitymap_clear - clear a bit in the visibility map @@ -373,8 +373,7 @@ visibilitymap_truncate(Relation rel, BlockNumber nheapblocks) } /* Truncate the unused VM pages, and send smgr inval message */ - smgrtruncate(rel->rd_smgr, VISIBILITYMAP_FORKNUM, newnblocks, - rel->rd_istemp); + smgrtruncate(rel->rd_smgr, VISIBILITYMAP_FORKNUM, newnblocks); /* * We might as well update the local smgr_vm_nblocks setting. smgrtruncate diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c index 15964e127e..e7048e7211 100644 --- a/src/backend/access/nbtree/nbtsort.c +++ b/src/backend/access/nbtree/nbtsort.c @@ -59,7 +59,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.125 2010/04/28 16:10:40 heikki Exp $ + * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.126 2010/08/13 20:10:50 rhaas Exp $ * *------------------------------------------------------------------------- */ @@ -295,9 +295,8 @@ _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno) } /* - * Now write the page. We say isTemp = true even if it's not a temp - * index, because there's no need for smgr to schedule an fsync for this - * write; we'll do it ourselves before ending the build. + * Now write the page. There's no need for smgr to schedule an fsync for + * this write; we'll do it ourselves before ending the build. */ if (blkno == wstate->btws_pages_written) { diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c index d6dca97bce..e3c3bc8dbc 100644 --- a/src/backend/access/transam/twophase.c +++ b/src/backend/access/transam/twophase.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/transam/twophase.c,v 1.62 2010/07/06 19:18:55 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/twophase.c,v 1.63 2010/08/13 20:10:50 rhaas Exp $ * * NOTES * Each global transaction is associated with a global transaction @@ -865,8 +865,8 @@ StartPrepare(GlobalTransaction gxact) hdr.prepared_at = gxact->prepared_at; hdr.owner = gxact->owner; hdr.nsubxacts = xactGetCommittedChildren(&children); - hdr.ncommitrels = smgrGetPendingDeletes(true, &commitrels, NULL); - hdr.nabortrels = smgrGetPendingDeletes(false, &abortrels, NULL); + hdr.ncommitrels = smgrGetPendingDeletes(true, &commitrels); + hdr.nabortrels = smgrGetPendingDeletes(false, &abortrels); hdr.ninvalmsgs = xactGetCommittedInvalidationMessages(&invalmsgs, &hdr.initfileinval); StrNCpy(hdr.gid, gxact->gid, GIDSIZE); @@ -1320,13 +1320,13 @@ FinishPreparedTransaction(const char *gid, bool isCommit) } for (i = 0; i < ndelrels; i++) { - SMgrRelation srel = smgropen(delrels[i]); + SMgrRelation srel = smgropen(delrels[i], InvalidBackendId); ForkNumber fork; for (fork = 0; fork <= MAX_FORKNUM; fork++) { if (smgrexists(srel, fork)) - smgrdounlink(srel, fork, false, false); + smgrdounlink(srel, fork, false); } smgrclose(srel); } diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index 0491d2c8d9..6015eaab1d 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -10,7 +10,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.297 2010/08/13 15:42:21 rhaas Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.298 2010/08/13 20:10:50 rhaas Exp $ * *------------------------------------------------------------------------- */ @@ -912,7 +912,6 @@ RecordTransactionCommit(void) TransactionId latestXid = InvalidTransactionId; int nrels; RelFileNode *rels; - bool haveNonTemp; int nchildren; TransactionId *children; int nmsgs = 0; @@ -920,7 +919,7 @@ RecordTransactionCommit(void) bool RelcacheInitFileInval = false; /* Get data needed for commit record */ - nrels = smgrGetPendingDeletes(true, &rels, &haveNonTemp); + nrels = smgrGetPendingDeletes(true, &rels); nchildren = xactGetCommittedChildren(&children); if (XLogStandbyInfoActive()) nmsgs = xactGetCommittedInvalidationMessages(&invalMessages, @@ -1048,7 +1047,7 @@ RecordTransactionCommit(void) * asynchronous commit if all to-be-deleted tables are temporary though, * since they are lost anyway if we crash.) */ - if (XactSyncCommit || forceSyncCommit || haveNonTemp) + if (XactSyncCommit || forceSyncCommit || nrels > 0) { /* * Synchronous commit case: @@ -1334,7 +1333,7 @@ RecordTransactionAbort(bool isSubXact) xid); /* Fetch the data we need for the abort record */ - nrels = smgrGetPendingDeletes(false, &rels, NULL); + nrels = smgrGetPendingDeletes(false, &rels); nchildren = xactGetCommittedChildren(&children); /* XXX do we really need a critical section here? */ @@ -4474,7 +4473,7 @@ xact_redo_commit(xl_xact_commit *xlrec, TransactionId xid, XLogRecPtr lsn) /* Make sure files supposed to be dropped are dropped */ for (i = 0; i < xlrec->nrels; i++) { - SMgrRelation srel = smgropen(xlrec->xnodes[i]); + SMgrRelation srel = smgropen(xlrec->xnodes[i], InvalidBackendId); ForkNumber fork; for (fork = 0; fork <= MAX_FORKNUM; fork++) @@ -4482,7 +4481,7 @@ xact_redo_commit(xl_xact_commit *xlrec, TransactionId xid, XLogRecPtr lsn) if (smgrexists(srel, fork)) { XLogDropRelation(xlrec->xnodes[i], fork); - smgrdounlink(srel, fork, false, true); + smgrdounlink(srel, fork, true); } } smgrclose(srel); @@ -4579,7 +4578,7 @@ xact_redo_abort(xl_xact_abort *xlrec, TransactionId xid) /* Make sure files supposed to be dropped are dropped */ for (i = 0; i < xlrec->nrels; i++) { - SMgrRelation srel = smgropen(xlrec->xnodes[i]); + SMgrRelation srel = smgropen(xlrec->xnodes[i], InvalidBackendId); ForkNumber fork; for (fork = 0; fork <= MAX_FORKNUM; fork++) @@ -4587,7 +4586,7 @@ xact_redo_abort(xl_xact_abort *xlrec, TransactionId xid) if (smgrexists(srel, fork)) { XLogDropRelation(xlrec->xnodes[i], fork); - smgrdounlink(srel, fork, false, true); + smgrdounlink(srel, fork, true); } } smgrclose(srel); @@ -4661,7 +4660,7 @@ xact_desc_commit(StringInfo buf, xl_xact_commit *xlrec) appendStringInfo(buf, "; rels:"); for (i = 0; i < xlrec->nrels; i++) { - char *path = relpath(xlrec->xnodes[i], MAIN_FORKNUM); + char *path = relpathperm(xlrec->xnodes[i], MAIN_FORKNUM); appendStringInfo(buf, " %s", path); pfree(path); @@ -4716,7 +4715,7 @@ xact_desc_abort(StringInfo buf, xl_xact_abort *xlrec) appendStringInfo(buf, "; rels:"); for (i = 0; i < xlrec->nrels; i++) { - char *path = relpath(xlrec->xnodes[i], MAIN_FORKNUM); + char *path = relpathperm(xlrec->xnodes[i], MAIN_FORKNUM); appendStringInfo(buf, " %s", path); pfree(path); diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c index 3d7c7cf69e..31479eabff 100644 --- a/src/backend/access/transam/xlogutils.c +++ b/src/backend/access/transam/xlogutils.c @@ -11,7 +11,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.71 2010/07/08 16:08:30 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.72 2010/08/13 20:10:50 rhaas Exp $ * *------------------------------------------------------------------------- */ @@ -68,7 +68,7 @@ log_invalid_page(RelFileNode node, ForkNumber forkno, BlockNumber blkno, */ if (log_min_messages <= DEBUG1 || client_min_messages <= DEBUG1) { - char *path = relpath(node, forkno); + char *path = relpathperm(node, forkno); if (present) elog(DEBUG1, "page %u of relation %s is uninitialized", @@ -133,7 +133,7 @@ forget_invalid_pages(RelFileNode node, ForkNumber forkno, BlockNumber minblkno) { if (log_min_messages <= DEBUG2 || client_min_messages <= DEBUG2) { - char *path = relpath(hentry->key.node, forkno); + char *path = relpathperm(hentry->key.node, forkno); elog(DEBUG2, "page %u of relation %s has been dropped", hentry->key.blkno, path); @@ -166,7 +166,7 @@ forget_invalid_pages_db(Oid dbid) { if (log_min_messages <= DEBUG2 || client_min_messages <= DEBUG2) { - char *path = relpath(hentry->key.node, hentry->key.forkno); + char *path = relpathperm(hentry->key.node, hentry->key.forkno); elog(DEBUG2, "page %u of relation %s has been dropped", hentry->key.blkno, path); @@ -200,7 +200,7 @@ XLogCheckInvalidPages(void) */ while ((hentry = (xl_invalid_page *) hash_seq_search(&status)) != NULL) { - char *path = relpath(hentry->key.node, hentry->key.forkno); + char *path = relpathperm(hentry->key.node, hentry->key.forkno); if (hentry->present) elog(WARNING, "page %u of relation %s was uninitialized", @@ -276,7 +276,7 @@ XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum, Assert(blkno != P_NEW); /* Open the relation at smgr level */ - smgr = smgropen(rnode); + smgr = smgropen(rnode, InvalidBackendId); /* * Create the target file if it doesn't already exist. This lets us cope @@ -293,7 +293,7 @@ XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum, if (blkno < lastblock) { /* page exists in file */ - buffer = ReadBufferWithoutRelcache(rnode, false, forknum, blkno, + buffer = ReadBufferWithoutRelcache(rnode, forknum, blkno, mode, NULL); } else @@ -312,7 +312,7 @@ XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum, { if (buffer != InvalidBuffer) ReleaseBuffer(buffer); - buffer = ReadBufferWithoutRelcache(rnode, false, forknum, + buffer = ReadBufferWithoutRelcache(rnode, forknum, P_NEW, mode, NULL); lastblock++; } diff --git a/src/backend/catalog/catalog.c b/src/backend/catalog/catalog.c index 1739085ffd..016081a7bf 100644 --- a/src/backend/catalog/catalog.c +++ b/src/backend/catalog/catalog.c @@ -10,7 +10,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/catalog/catalog.c,v 1.90 2010/04/20 23:48:47 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/catalog/catalog.c,v 1.91 2010/08/13 20:10:50 rhaas Exp $ * *------------------------------------------------------------------------- */ @@ -78,12 +78,37 @@ forkname_to_number(char *forkName) } /* - * relpath - construct path to a relation's file + * forkname_chars + * We use this to figure out whether a filename could be a relation + * fork (as opposed to an oddly named stray file that somehow ended + * up in the database directory). If the passed string begins with + * a fork name (other than the main fork name), we return its length. + * If not, we return 0. + * + * Note that the present coding assumes that there are no fork names which + * are prefixes of other fork names. + */ +int +forkname_chars(const char *str) +{ + ForkNumber forkNum; + + for (forkNum = 1; forkNum <= MAX_FORKNUM; forkNum++) + { + int len = strlen(forkNames[forkNum]); + if (strncmp(forkNames[forkNum], str, len) == 0) + return len; + } + return 0; +} + +/* + * relpathbackend - construct path to a relation's file * * Result is a palloc'd string. */ char * -relpath(RelFileNode rnode, ForkNumber forknum) +relpathbackend(RelFileNode rnode, BackendId backend, ForkNumber forknum) { int pathlen; char *path; @@ -92,6 +117,7 @@ relpath(RelFileNode rnode, ForkNumber forknum) { /* Shared system relations live in {datadir}/global */ Assert(rnode.dbNode == 0); + Assert(backend == InvalidBackendId); pathlen = 7 + OIDCHARS + 1 + FORKNAMECHARS + 1; path = (char *) palloc(pathlen); if (forknum != MAIN_FORKNUM) @@ -103,29 +129,69 @@ relpath(RelFileNode rnode, ForkNumber forknum) else if (rnode.spcNode == DEFAULTTABLESPACE_OID) { /* The default tablespace is {datadir}/base */ - pathlen = 5 + OIDCHARS + 1 + OIDCHARS + 1 + FORKNAMECHARS + 1; - path = (char *) palloc(pathlen); - if (forknum != MAIN_FORKNUM) - snprintf(path, pathlen, "base/%u/%u_%s", - rnode.dbNode, rnode.relNode, forkNames[forknum]); + if (backend == InvalidBackendId) + { + pathlen = 5 + OIDCHARS + 1 + OIDCHARS + 1 + FORKNAMECHARS + 1; + path = (char *) palloc(pathlen); + if (forknum != MAIN_FORKNUM) + snprintf(path, pathlen, "base/%u/%u_%s", + rnode.dbNode, rnode.relNode, + forkNames[forknum]); + else + snprintf(path, pathlen, "base/%u/%u", + rnode.dbNode, rnode.relNode); + } else - snprintf(path, pathlen, "base/%u/%u", - rnode.dbNode, rnode.relNode); + { + /* OIDCHARS will suffice for an integer, too */ + pathlen = 5 + OIDCHARS + 2 + OIDCHARS + 1 + OIDCHARS + 1 + + FORKNAMECHARS + 1; + path = (char *) palloc(pathlen); + if (forknum != MAIN_FORKNUM) + snprintf(path, pathlen, "base/%u/t%d_%u_%s", + rnode.dbNode, backend, rnode.relNode, + forkNames[forknum]); + else + snprintf(path, pathlen, "base/%u/t%d_%u", + rnode.dbNode, backend, rnode.relNode); + } } else { /* All other tablespaces are accessed via symlinks */ - pathlen = 9 + 1 + OIDCHARS + 1 + strlen(TABLESPACE_VERSION_DIRECTORY) + - 1 + OIDCHARS + 1 + OIDCHARS + 1 + FORKNAMECHARS + 1; - path = (char *) palloc(pathlen); - if (forknum != MAIN_FORKNUM) - snprintf(path, pathlen, "pg_tblspc/%u/%s/%u/%u_%s", - rnode.spcNode, TABLESPACE_VERSION_DIRECTORY, - rnode.dbNode, rnode.relNode, forkNames[forknum]); + if (backend == InvalidBackendId) + { + pathlen = 9 + 1 + OIDCHARS + 1 + + strlen(TABLESPACE_VERSION_DIRECTORY) + 1 + OIDCHARS + 1 + + OIDCHARS + 1 + FORKNAMECHARS + 1; + path = (char *) palloc(pathlen); + if (forknum != MAIN_FORKNUM) + snprintf(path, pathlen, "pg_tblspc/%u/%s/%u/%u_%s", + rnode.spcNode, TABLESPACE_VERSION_DIRECTORY, + rnode.dbNode, rnode.relNode, + forkNames[forknum]); + else + snprintf(path, pathlen, "pg_tblspc/%u/%s/%u/%u", + rnode.spcNode, TABLESPACE_VERSION_DIRECTORY, + rnode.dbNode, rnode.relNode); + } else - snprintf(path, pathlen, "pg_tblspc/%u/%s/%u/%u", - rnode.spcNode, TABLESPACE_VERSION_DIRECTORY, - rnode.dbNode, rnode.relNode); + { + /* OIDCHARS will suffice for an integer, too */ + pathlen = 9 + 1 + OIDCHARS + 1 + + strlen(TABLESPACE_VERSION_DIRECTORY) + 1 + OIDCHARS + 2 + + OIDCHARS + 1 + OIDCHARS + 1 + FORKNAMECHARS + 1; + path = (char *) palloc(pathlen); + if (forknum != MAIN_FORKNUM) + snprintf(path, pathlen, "pg_tblspc/%u/%s/%u/t%d_%u_%s", + rnode.spcNode, TABLESPACE_VERSION_DIRECTORY, + rnode.dbNode, backend, rnode.relNode, + forkNames[forknum]); + else + snprintf(path, pathlen, "pg_tblspc/%u/%s/%u/t%d_%u", + rnode.spcNode, TABLESPACE_VERSION_DIRECTORY, + rnode.dbNode, backend, rnode.relNode); + } } return path; } @@ -458,16 +524,23 @@ GetNewOidWithIndex(Relation relation, Oid indexId, AttrNumber oidcolumn) * created by bootstrap have preassigned OIDs, so there's no need. */ Oid -GetNewRelFileNode(Oid reltablespace, Relation pg_class) +GetNewRelFileNode(Oid reltablespace, Relation pg_class, BackendId backend) { - RelFileNode rnode; + RelFileNodeBackend rnode; char *rpath; int fd; bool collides; /* This logic should match RelationInitPhysicalAddr */ - rnode.spcNode = reltablespace ? reltablespace : MyDatabaseTableSpace; - rnode.dbNode = (rnode.spcNode == GLOBALTABLESPACE_OID) ? InvalidOid : MyDatabaseId; + rnode.node.spcNode = reltablespace ? reltablespace : MyDatabaseTableSpace; + rnode.node.dbNode = (rnode.node.spcNode == GLOBALTABLESPACE_OID) ? InvalidOid : MyDatabaseId; + + /* + * The relpath will vary based on the backend ID, so we must initialize + * that properly here to make sure that any collisions based on filename + * are properly detected. + */ + rnode.backend = backend; do { @@ -475,9 +548,9 @@ GetNewRelFileNode(Oid reltablespace, Relation pg_class) /* Generate the OID */ if (pg_class) - rnode.relNode = GetNewOid(pg_class); + rnode.node.relNode = GetNewOid(pg_class); else - rnode.relNode = GetNewObjectId(); + rnode.node.relNode = GetNewObjectId(); /* Check for existing file of same name */ rpath = relpath(rnode, MAIN_FORKNUM); @@ -508,5 +581,5 @@ GetNewRelFileNode(Oid reltablespace, Relation pg_class) pfree(rpath); } while (collides); - return rnode.relNode; + return rnode.node.relNode; } diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index a0268f7177..7754b73d73 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/catalog/heap.c,v 1.374 2010/07/25 23:21:21 rhaas Exp $ + * $PostgreSQL: pgsql/src/backend/catalog/heap.c,v 1.375 2010/08/13 20:10:50 rhaas Exp $ * * * INTERFACE ROUTINES @@ -39,6 +39,7 @@ #include "catalog/heap.h" #include "catalog/index.h" #include "catalog/indexing.h" +#include "catalog/namespace.h" #include "catalog/pg_attrdef.h" #include "catalog/pg_constraint.h" #include "catalog/pg_inherits.h" @@ -994,7 +995,9 @@ heap_create_with_catalog(const char *relname, binary_upgrade_next_toast_relfilenode = InvalidOid; } else - relid = GetNewRelFileNode(reltablespace, pg_class_desc); + relid = GetNewRelFileNode(reltablespace, pg_class_desc, + isTempOrToastNamespace(relnamespace) ? + MyBackendId : InvalidBackendId); } /* diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index dea6889075..b36402c755 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.337 2010/02/26 02:00:36 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.338 2010/08/13 20:10:50 rhaas Exp $ * * * INTERFACE ROUTINES @@ -645,7 +645,12 @@ index_create(Oid heapRelationId, binary_upgrade_next_index_relfilenode = InvalidOid; } else - indexRelationId = GetNewRelFileNode(tableSpaceId, pg_class); + { + indexRelationId = + GetNewRelFileNode(tableSpaceId, pg_class, + heapRelation->rd_istemp ? + MyBackendId : InvalidBackendId); + } } /* diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c index 71ec8f8250..624c8337b0 100644 --- a/src/backend/catalog/namespace.c +++ b/src/backend/catalog/namespace.c @@ -13,7 +13,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/catalog/namespace.c,v 1.128 2010/08/13 16:27:11 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/catalog/namespace.c,v 1.129 2010/08/13 20:10:50 rhaas Exp $ * *------------------------------------------------------------------------- */ @@ -2588,7 +2588,7 @@ isOtherTempNamespace(Oid namespaceId) * GetTempNamespaceBackendId - if the given namespace is a temporary-table * namespace (either my own, or another backend's), return the BackendId * that owns it. Temporary-toast-table namespaces are included, too. - * If it isn't a temp namespace, return -1. + * If it isn't a temp namespace, return InvalidBackendId. */ int GetTempNamespaceBackendId(Oid namespaceId) @@ -2599,13 +2599,13 @@ GetTempNamespaceBackendId(Oid namespaceId) /* See if the namespace name starts with "pg_temp_" or "pg_toast_temp_" */ nspname = get_namespace_name(namespaceId); if (!nspname) - return -1; /* no such namespace? */ + return InvalidBackendId; /* no such namespace? */ if (strncmp(nspname, "pg_temp_", 8) == 0) result = atoi(nspname + 8); else if (strncmp(nspname, "pg_toast_temp_", 14) == 0) result = atoi(nspname + 14); else - result = -1; + result = InvalidBackendId; pfree(nspname); return result; } diff --git a/src/backend/catalog/storage.c b/src/backend/catalog/storage.c index 2165341e0e..5a1131945c 100644 --- a/src/backend/catalog/storage.c +++ b/src/backend/catalog/storage.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/catalog/storage.c,v 1.10 2010/02/09 21:43:30 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/catalog/storage.c,v 1.11 2010/08/13 20:10:50 rhaas Exp $ * * NOTES * Some of this code used to be in storage/smgr/smgr.c, and the @@ -52,7 +52,7 @@ typedef struct PendingRelDelete { RelFileNode relnode; /* relation that may need to be deleted */ - bool isTemp; /* is it a temporary relation? */ + BackendId backend; /* InvalidBackendId if not a temp rel */ bool atCommit; /* T=delete at commit; F=delete at abort */ int nestLevel; /* xact nesting level of request */ struct PendingRelDelete *next; /* linked-list link */ @@ -102,8 +102,9 @@ RelationCreateStorage(RelFileNode rnode, bool istemp) XLogRecData rdata; xl_smgr_create xlrec; SMgrRelation srel; + BackendId backend = istemp ? MyBackendId : InvalidBackendId; - srel = smgropen(rnode); + srel = smgropen(rnode, backend); smgrcreate(srel, MAIN_FORKNUM, false); if (!istemp) @@ -125,7 +126,7 @@ RelationCreateStorage(RelFileNode rnode, bool istemp) pending = (PendingRelDelete *) MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete)); pending->relnode = rnode; - pending->isTemp = istemp; + pending->backend = backend; pending->atCommit = false; /* delete if abort */ pending->nestLevel = GetCurrentTransactionNestLevel(); pending->next = pendingDeletes; @@ -145,7 +146,7 @@ RelationDropStorage(Relation rel) pending = (PendingRelDelete *) MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete)); pending->relnode = rel->rd_node; - pending->isTemp = rel->rd_istemp; + pending->backend = rel->rd_backend; pending->atCommit = true; /* delete if commit */ pending->nestLevel = GetCurrentTransactionNestLevel(); pending->next = pendingDeletes; @@ -283,7 +284,7 @@ RelationTruncate(Relation rel, BlockNumber nblocks) } /* Do the real work */ - smgrtruncate(rel->rd_smgr, MAIN_FORKNUM, nblocks, rel->rd_istemp); + smgrtruncate(rel->rd_smgr, MAIN_FORKNUM, nblocks); } /* @@ -291,6 +292,11 @@ RelationTruncate(Relation rel, BlockNumber nblocks) * * This also runs when aborting a subxact; we want to clean up a failed * subxact immediately. + * + * Note: It's possible that we're being asked to remove a relation that has + * no physical storage in any fork. In particular, it's possible that we're + * cleaning up an old temporary relation for which RemovePgTempFiles has + * already recovered the physical storage. */ void smgrDoPendingDeletes(bool isCommit) @@ -322,14 +328,11 @@ smgrDoPendingDeletes(bool isCommit) SMgrRelation srel; int i; - srel = smgropen(pending->relnode); + srel = smgropen(pending->relnode, pending->backend); for (i = 0; i <= MAX_FORKNUM; i++) { if (smgrexists(srel, i)) - smgrdounlink(srel, - i, - pending->isTemp, - false); + smgrdounlink(srel, i, false); } smgrclose(srel); } @@ -341,20 +344,24 @@ smgrDoPendingDeletes(bool isCommit) } /* - * smgrGetPendingDeletes() -- Get a list of relations to be deleted. + * smgrGetPendingDeletes() -- Get a list of non-temp relations to be deleted. * * The return value is the number of relations scheduled for termination. * *ptr is set to point to a freshly-palloc'd array of RelFileNodes. * If there are no relations to be deleted, *ptr is set to NULL. * - * If haveNonTemp isn't NULL, the bool it points to gets set to true if - * there is any non-temp table pending to be deleted; false if not. + * Only non-temporary relations are included in the returned list. This is OK + * because the list is used only in contexts where temporary relations don't + * matter: we're either writing to the two-phase state file (and transactions + * that have touched temp tables can't be prepared) or we're writing to xlog + * (and all temporary files will be zapped if we restart anyway, so no need + * for redo to do it also). * * Note that the list does not include anything scheduled for termination * by upper-level transactions. */ int -smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr, bool *haveNonTemp) +smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr) { int nestLevel = GetCurrentTransactionNestLevel(); int nrels; @@ -362,11 +369,10 @@ smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr, bool *haveNonTemp) PendingRelDelete *pending; nrels = 0; - if (haveNonTemp) - *haveNonTemp = false; for (pending = pendingDeletes; pending != NULL; pending = pending->next) { - if (pending->nestLevel >= nestLevel && pending->atCommit == forCommit) + if (pending->nestLevel >= nestLevel && pending->atCommit == forCommit + && pending->backend == InvalidBackendId) nrels++; } if (nrels == 0) @@ -378,13 +384,12 @@ smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr, bool *haveNonTemp) *ptr = rptr; for (pending = pendingDeletes; pending != NULL; pending = pending->next) { - if (pending->nestLevel >= nestLevel && pending->atCommit == forCommit) + if (pending->nestLevel >= nestLevel && pending->atCommit == forCommit + && pending->backend == InvalidBackendId) { *rptr = pending->relnode; rptr++; } - if (haveNonTemp && !pending->isTemp) - *haveNonTemp = true; } return nrels; } @@ -456,7 +461,7 @@ smgr_redo(XLogRecPtr lsn, XLogRecord *record) xl_smgr_create *xlrec = (xl_smgr_create *) XLogRecGetData(record); SMgrRelation reln; - reln = smgropen(xlrec->rnode); + reln = smgropen(xlrec->rnode, InvalidBackendId); smgrcreate(reln, MAIN_FORKNUM, true); } else if (info == XLOG_SMGR_TRUNCATE) @@ -465,7 +470,7 @@ smgr_redo(XLogRecPtr lsn, XLogRecord *record) SMgrRelation reln; Relation rel; - reln = smgropen(xlrec->rnode); + reln = smgropen(xlrec->rnode, InvalidBackendId); /* * Forcibly create relation if it doesn't exist (which suggests that @@ -475,7 +480,7 @@ smgr_redo(XLogRecPtr lsn, XLogRecord *record) */ smgrcreate(reln, MAIN_FORKNUM, true); - smgrtruncate(reln, MAIN_FORKNUM, xlrec->blkno, false); + smgrtruncate(reln, MAIN_FORKNUM, xlrec->blkno); /* Also tell xlogutils.c about it */ XLogTruncateRelation(xlrec->rnode, MAIN_FORKNUM, xlrec->blkno); @@ -502,7 +507,7 @@ smgr_desc(StringInfo buf, uint8 xl_info, char *rec) if (info == XLOG_SMGR_CREATE) { xl_smgr_create *xlrec = (xl_smgr_create *) rec; - char *path = relpath(xlrec->rnode, MAIN_FORKNUM); + char *path = relpathperm(xlrec->rnode, MAIN_FORKNUM); appendStringInfo(buf, "file create: %s", path); pfree(path); @@ -510,7 +515,7 @@ smgr_desc(StringInfo buf, uint8 xl_info, char *rec) else if (info == XLOG_SMGR_TRUNCATE) { xl_smgr_truncate *xlrec = (xl_smgr_truncate *) rec; - char *path = relpath(xlrec->rnode, MAIN_FORKNUM); + char *path = relpathperm(xlrec->rnode, MAIN_FORKNUM); appendStringInfo(buf, "file truncate: %s to %u blocks", path, xlrec->blkno); diff --git a/src/backend/catalog/toasting.c b/src/backend/catalog/toasting.c index 6f658321b4..14757eed52 100644 --- a/src/backend/catalog/toasting.c +++ b/src/backend/catalog/toasting.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/catalog/toasting.c,v 1.33 2010/07/25 23:21:21 rhaas Exp $ + * $PostgreSQL: pgsql/src/backend/catalog/toasting.c,v 1.34 2010/08/13 20:10:50 rhaas Exp $ * *------------------------------------------------------------------------- */ @@ -195,7 +195,7 @@ create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid, Datum reloptio * Toast tables for regular relations go in pg_toast; those for temp * relations go into the per-backend temp-toast-table namespace. */ - if (rel->rd_islocaltemp) + if (rel->rd_backend == MyBackendId) namespaceid = GetTempToastNamespace(); else namespaceid = PG_TOAST_NAMESPACE; diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index 681a7aaa92..19e1b7251e 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.328 2010/07/22 00:47:52 rhaas Exp $ + * $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.329 2010/08/13 20:10:50 rhaas Exp $ * *------------------------------------------------------------------------- */ @@ -1019,7 +1019,7 @@ DoCopy(const CopyStmt *stmt, const char *queryString) ExecCheckRTPerms(list_make1(rte), true); /* check read-only transaction */ - if (XactReadOnly && is_from && !cstate->rel->rd_islocaltemp) + if (XactReadOnly && is_from && cstate->rel->rd_backend != MyBackendId) PreventCommandIfReadOnly("COPY FROM"); /* Don't allow COPY w/ OIDs to or from a table without them */ diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c index 0f06bba803..6e0930f8d0 100644 --- a/src/backend/commands/sequence.c +++ b/src/backend/commands/sequence.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/sequence.c,v 1.169 2010/07/25 23:21:21 rhaas Exp $ + * $PostgreSQL: pgsql/src/backend/commands/sequence.c,v 1.170 2010/08/13 20:10:51 rhaas Exp $ * *------------------------------------------------------------------------- */ @@ -472,7 +472,7 @@ nextval_internal(Oid relid) RelationGetRelationName(seqrel)))); /* read-only transactions may only modify temp sequences */ - if (!seqrel->rd_islocaltemp) + if (seqrel->rd_backend != MyBackendId) PreventCommandIfReadOnly("nextval()"); if (elm->last != elm->cached) /* some numbers were cached */ @@ -749,7 +749,7 @@ do_setval(Oid relid, int64 next, bool iscalled) RelationGetRelationName(seqrel)))); /* read-only transactions may only modify temp sequences */ - if (!seqrel->rd_islocaltemp) + if (seqrel->rd_backend != MyBackendId) PreventCommandIfReadOnly("setval()"); /* lock page' buffer and read tuple */ diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 221e6417eb..703fd7e71b 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.339 2010/08/05 14:45:01 rhaas Exp $ + * $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.340 2010/08/13 20:10:51 rhaas Exp $ * *------------------------------------------------------------------------- */ @@ -7165,13 +7165,13 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace, LOCKMODE lockmode) * Relfilenodes are not unique across tablespaces, so we need to allocate * a new one in the new tablespace. */ - newrelfilenode = GetNewRelFileNode(newTableSpace, NULL); + newrelfilenode = GetNewRelFileNode(newTableSpace, NULL, rel->rd_backend); /* Open old and new relation */ newrnode = rel->rd_node; newrnode.relNode = newrelfilenode; newrnode.spcNode = newTableSpace; - dstrel = smgropen(newrnode); + dstrel = smgropen(newrnode, rel->rd_backend); RelationOpenSmgr(rel); @@ -7262,7 +7262,7 @@ copy_relation_data(SMgrRelation src, SMgrRelation dst, /* XLOG stuff */ if (use_wal) - log_newpage(&dst->smgr_rnode, forkNum, blkno, page); + log_newpage(&dst->smgr_rnode.node, forkNum, blkno, page); /* * Now write the page. We say isTemp = true even if it's not a temp diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c index 72737ab226..67f0d5c636 100644 --- a/src/backend/postmaster/bgwriter.c +++ b/src/backend/postmaster/bgwriter.c @@ -38,7 +38,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.68 2010/04/28 16:54:15 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.69 2010/08/13 20:10:52 rhaas Exp $ * *------------------------------------------------------------------------- */ @@ -113,7 +113,7 @@ */ typedef struct { - RelFileNode rnode; + RelFileNodeBackend rnode; ForkNumber forknum; BlockNumber segno; /* see md.c for special values */ /* might add a real request-type field later; not needed yet */ @@ -1071,7 +1071,8 @@ RequestCheckpoint(int flags) * than we have to here. */ bool -ForwardFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno) +ForwardFsyncRequest(RelFileNodeBackend rnode, ForkNumber forknum, + BlockNumber segno) { BgWriterRequest *request; diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 3b6938135a..4c09df1ba7 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.256 2010/02/26 02:00:59 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.257 2010/08/13 20:10:52 rhaas Exp $ * *------------------------------------------------------------------------- */ @@ -95,7 +95,8 @@ static void WaitIO(volatile BufferDesc *buf); static bool StartBufferIO(volatile BufferDesc *buf, bool forInput); static void TerminateBufferIO(volatile BufferDesc *buf, bool clear_dirty, int set_flag_bits); -static void buffer_write_error_callback(void *arg); +static void shared_buffer_write_error_callback(void *arg); +static void local_buffer_write_error_callback(void *arg); static volatile BufferDesc *BufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, BufferAccessStrategy strategy, @@ -141,7 +142,8 @@ PrefetchBuffer(Relation reln, ForkNumber forkNum, BlockNumber blockNum) int buf_id; /* create a tag so we can lookup the buffer */ - INIT_BUFFERTAG(newTag, reln->rd_smgr->smgr_rnode, forkNum, blockNum); + INIT_BUFFERTAG(newTag, reln->rd_smgr->smgr_rnode.node, + forkNum, blockNum); /* determine its hash code and partition lock ID */ newHash = BufTableHashCode(&newTag); @@ -251,18 +253,21 @@ ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, * ReadBufferWithoutRelcache -- like ReadBufferExtended, but doesn't require * a relcache entry for the relation. * - * NB: caller is assumed to know what it's doing if isTemp is true. + * NB: At present, this function may not be used on temporary relations, which + * is OK, because we only use it during XLOG replay. If in the future we + * want to use it on temporary relations, we could pass the backend ID as an + * additional parameter. */ Buffer -ReadBufferWithoutRelcache(RelFileNode rnode, bool isTemp, - ForkNumber forkNum, BlockNumber blockNum, - ReadBufferMode mode, BufferAccessStrategy strategy) +ReadBufferWithoutRelcache(RelFileNode rnode, ForkNumber forkNum, + BlockNumber blockNum, ReadBufferMode mode, + BufferAccessStrategy strategy) { bool hit; - SMgrRelation smgr = smgropen(rnode); + SMgrRelation smgr = smgropen(rnode, InvalidBackendId); - return ReadBuffer_common(smgr, isTemp, forkNum, blockNum, mode, strategy, + return ReadBuffer_common(smgr, false, forkNum, blockNum, mode, strategy, &hit); } @@ -414,7 +419,7 @@ ReadBuffer_common(SMgrRelation smgr, bool isLocalBuf, ForkNumber forkNum, { /* new buffers are zero-filled */ MemSet((char *) bufBlock, 0, BLCKSZ); - smgrextend(smgr, forkNum, blockNum, (char *) bufBlock, isLocalBuf); + smgrextend(smgr, forkNum, blockNum, (char *) bufBlock, false); } else { @@ -465,10 +470,10 @@ ReadBuffer_common(SMgrRelation smgr, bool isLocalBuf, ForkNumber forkNum, VacuumCostBalance += VacuumCostPageMiss; TRACE_POSTGRESQL_BUFFER_READ_DONE(forkNum, blockNum, - smgr->smgr_rnode.spcNode, - smgr->smgr_rnode.dbNode, - smgr->smgr_rnode.relNode, - isLocalBuf, + smgr->smgr_rnode.node.spcNode, + smgr->smgr_rnode.node.dbNode, + smgr->smgr_rnode.node.relNode, + smgr->smgr_rnode.backend, isExtend, found); @@ -512,7 +517,7 @@ BufferAlloc(SMgrRelation smgr, ForkNumber forkNum, bool valid; /* create a tag so we can lookup the buffer */ - INIT_BUFFERTAG(newTag, smgr->smgr_rnode, forkNum, blockNum); + INIT_BUFFERTAG(newTag, smgr->smgr_rnode.node, forkNum, blockNum); /* determine its hash code and partition lock ID */ newHash = BufTableHashCode(&newTag); @@ -1693,21 +1698,24 @@ PrintBufferLeakWarning(Buffer buffer) volatile BufferDesc *buf; int32 loccount; char *path; + BackendId backend; Assert(BufferIsValid(buffer)); if (BufferIsLocal(buffer)) { buf = &LocalBufferDescriptors[-buffer - 1]; loccount = LocalRefCount[-buffer - 1]; + backend = MyBackendId; } else { buf = &BufferDescriptors[buffer - 1]; loccount = PrivateRefCount[buffer - 1]; + backend = InvalidBackendId; } /* theoretically we should lock the bufhdr here */ - path = relpath(buf->tag.rnode, buf->tag.forkNum); + path = relpathbackend(buf->tag.rnode, backend, buf->tag.forkNum); elog(WARNING, "buffer refcount leak: [%03d] " "(rel=%s, blockNum=%u, flags=0x%x, refcount=%u %d)", @@ -1831,14 +1839,14 @@ FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln) return; /* Setup error traceback support for ereport() */ - errcontext.callback = buffer_write_error_callback; + errcontext.callback = shared_buffer_write_error_callback; errcontext.arg = (void *) buf; errcontext.previous = error_context_stack; error_context_stack = &errcontext; /* Find smgr relation for buffer */ if (reln == NULL) - reln = smgropen(buf->tag.rnode); + reln = smgropen(buf->tag.rnode, InvalidBackendId); TRACE_POSTGRESQL_BUFFER_FLUSH_START(buf->tag.forkNum, buf->tag.blockNum, @@ -1929,14 +1937,15 @@ RelationGetNumberOfBlocks(Relation relation) * -------------------------------------------------------------------- */ void -DropRelFileNodeBuffers(RelFileNode rnode, ForkNumber forkNum, bool istemp, +DropRelFileNodeBuffers(RelFileNodeBackend rnode, ForkNumber forkNum, BlockNumber firstDelBlock) { int i; - if (istemp) + if (rnode.backend != InvalidBackendId) { - DropRelFileNodeLocalBuffers(rnode, forkNum, firstDelBlock); + if (rnode.backend == MyBackendId) + DropRelFileNodeLocalBuffers(rnode.node, forkNum, firstDelBlock); return; } @@ -1945,7 +1954,7 @@ DropRelFileNodeBuffers(RelFileNode rnode, ForkNumber forkNum, bool istemp, volatile BufferDesc *bufHdr = &BufferDescriptors[i]; LockBufHdr(bufHdr); - if (RelFileNodeEquals(bufHdr->tag.rnode, rnode) && + if (RelFileNodeEquals(bufHdr->tag.rnode, rnode.node) && bufHdr->tag.forkNum == forkNum && bufHdr->tag.blockNum >= firstDelBlock) InvalidateBuffer(bufHdr); /* releases spinlock */ @@ -2008,7 +2017,7 @@ PrintBufferDescs(void) "[%02d] (freeNext=%d, rel=%s, " "blockNum=%u, flags=0x%x, refcount=%u %d)", i, buf->freeNext, - relpath(buf->tag.rnode, buf->tag.forkNum), + relpathbackend(buf->tag.rnode, InvalidBackendId, buf->tag.forkNum), buf->tag.blockNum, buf->flags, buf->refcount, PrivateRefCount[i]); } @@ -2078,7 +2087,7 @@ FlushRelationBuffers(Relation rel) ErrorContextCallback errcontext; /* Setup error traceback support for ereport() */ - errcontext.callback = buffer_write_error_callback; + errcontext.callback = local_buffer_write_error_callback; errcontext.arg = (void *) bufHdr; errcontext.previous = error_context_stack; error_context_stack = &errcontext; @@ -2087,7 +2096,7 @@ FlushRelationBuffers(Relation rel) bufHdr->tag.forkNum, bufHdr->tag.blockNum, (char *) LocalBufHdrGetBlock(bufHdr), - true); + false); bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED); @@ -2699,8 +2708,9 @@ AbortBufferIO(void) if (sv_flags & BM_IO_ERROR) { /* Buffer is pinned, so we can read tag without spinlock */ - char *path = relpath(buf->tag.rnode, buf->tag.forkNum); + char *path; + path = relpathperm(buf->tag.rnode, buf->tag.forkNum); ereport(WARNING, (errcode(ERRCODE_IO_ERROR), errmsg("could not write block %u of %s", @@ -2714,17 +2724,36 @@ AbortBufferIO(void) } /* - * Error context callback for errors occurring during buffer writes. + * Error context callback for errors occurring during shared buffer writes. */ static void -buffer_write_error_callback(void *arg) +shared_buffer_write_error_callback(void *arg) { volatile BufferDesc *bufHdr = (volatile BufferDesc *) arg; /* Buffer is pinned, so we can read the tag without locking the spinlock */ if (bufHdr != NULL) { - char *path = relpath(bufHdr->tag.rnode, bufHdr->tag.forkNum); + char *path = relpathperm(bufHdr->tag.rnode, bufHdr->tag.forkNum); + + errcontext("writing block %u of relation %s", + bufHdr->tag.blockNum, path); + pfree(path); + } +} + +/* + * Error context callback for errors occurring during local buffer writes. + */ +static void +local_buffer_write_error_callback(void *arg) +{ + volatile BufferDesc *bufHdr = (volatile BufferDesc *) arg; + + if (bufHdr != NULL) + { + char *path = relpathbackend(bufHdr->tag.rnode, MyBackendId, + bufHdr->tag.forkNum); errcontext("writing block %u of relation %s", bufHdr->tag.blockNum, path); diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c index 2b783f87f4..dd067737c9 100644 --- a/src/backend/storage/buffer/localbuf.c +++ b/src/backend/storage/buffer/localbuf.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/buffer/localbuf.c,v 1.89 2010/01/02 16:57:51 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/storage/buffer/localbuf.c,v 1.90 2010/08/13 20:10:52 rhaas Exp $ * *------------------------------------------------------------------------- */ @@ -68,7 +68,7 @@ LocalPrefetchBuffer(SMgrRelation smgr, ForkNumber forkNum, BufferTag newTag; /* identity of requested block */ LocalBufferLookupEnt *hresult; - INIT_BUFFERTAG(newTag, smgr->smgr_rnode, forkNum, blockNum); + INIT_BUFFERTAG(newTag, smgr->smgr_rnode.node, forkNum, blockNum); /* Initialize local buffers if first request in this session */ if (LocalBufHash == NULL) @@ -110,7 +110,7 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, int trycounter; bool found; - INIT_BUFFERTAG(newTag, smgr->smgr_rnode, forkNum, blockNum); + INIT_BUFFERTAG(newTag, smgr->smgr_rnode.node, forkNum, blockNum); /* Initialize local buffers if first request in this session */ if (LocalBufHash == NULL) @@ -127,7 +127,7 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, Assert(BUFFERTAGS_EQUAL(bufHdr->tag, newTag)); #ifdef LBDEBUG fprintf(stderr, "LB ALLOC (%u,%d,%d) %d\n", - smgr->smgr_rnode.relNode, forkNum, blockNum, -b - 1); + smgr->smgr_rnode.node.relNode, forkNum, blockNum, -b - 1); #endif /* this part is equivalent to PinBuffer for a shared buffer */ if (LocalRefCount[b] == 0) @@ -150,7 +150,8 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, #ifdef LBDEBUG fprintf(stderr, "LB ALLOC (%u,%d,%d) %d\n", - smgr->smgr_rnode.relNode, forkNum, blockNum, -nextFreeLocalBuf - 1); + smgr->smgr_rnode.node.relNode, forkNum, blockNum, + -nextFreeLocalBuf - 1); #endif /* @@ -198,14 +199,14 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, SMgrRelation oreln; /* Find smgr relation for buffer */ - oreln = smgropen(bufHdr->tag.rnode); + oreln = smgropen(bufHdr->tag.rnode, MyBackendId); /* And write... */ smgrwrite(oreln, bufHdr->tag.forkNum, bufHdr->tag.blockNum, (char *) LocalBufHdrGetBlock(bufHdr), - true); + false); /* Mark not-dirty now in case we error out below */ bufHdr->flags &= ~BM_DIRTY; @@ -309,7 +310,8 @@ DropRelFileNodeLocalBuffers(RelFileNode rnode, ForkNumber forkNum, if (LocalRefCount[i] != 0) elog(ERROR, "block %u of %s is still referenced (local %u)", bufHdr->tag.blockNum, - relpath(bufHdr->tag.rnode, bufHdr->tag.forkNum), + relpathbackend(bufHdr->tag.rnode, MyBackendId, + bufHdr->tag.forkNum), LocalRefCount[i]); /* Remove entry from hashtable */ hresult = (LocalBufferLookupEnt *) diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c index 91bf4af8e4..18d6de1dec 100644 --- a/src/backend/storage/file/fd.c +++ b/src/backend/storage/file/fd.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/file/fd.c,v 1.157 2010/07/06 22:55:26 rhaas Exp $ + * $PostgreSQL: pgsql/src/backend/storage/file/fd.c,v 1.158 2010/08/13 20:10:52 rhaas Exp $ * * NOTES: * @@ -249,6 +249,9 @@ static File OpenTemporaryFileInTablespace(Oid tblspcOid, bool rejectError); static void AtProcExit_Files(int code, Datum arg); static void CleanupTempFiles(bool isProcExit); static void RemovePgTempFilesInDir(const char *tmpdirname); +static void RemovePgTempRelationFiles(const char *tsdirname); +static void RemovePgTempRelationFilesInDbspace(const char *dbspacedirname); +static bool looks_like_temp_rel_name(const char *name); /* @@ -1824,10 +1827,12 @@ CleanupTempFiles(bool isProcExit) /* - * Remove temporary files left over from a prior postmaster session + * Remove temporary and temporary relation files left over from a prior + * postmaster session * * This should be called during postmaster startup. It will forcibly - * remove any leftover files created by OpenTemporaryFile. + * remove any leftover files created by OpenTemporaryFile and any leftover + * temporary relation files created by mdcreate. * * NOTE: we could, but don't, call this during a post-backend-crash restart * cycle. The argument for not doing it is that someone might want to examine @@ -1847,6 +1852,7 @@ RemovePgTempFiles(void) */ snprintf(temp_path, sizeof(temp_path), "base/%s", PG_TEMP_FILES_DIR); RemovePgTempFilesInDir(temp_path); + RemovePgTempRelationFiles("base"); /* * Cycle through temp directories for all non-default tablespaces. @@ -1862,6 +1868,10 @@ RemovePgTempFiles(void) snprintf(temp_path, sizeof(temp_path), "pg_tblspc/%s/%s/%s", spc_de->d_name, TABLESPACE_VERSION_DIRECTORY, PG_TEMP_FILES_DIR); RemovePgTempFilesInDir(temp_path); + + snprintf(temp_path, sizeof(temp_path), "pg_tblspc/%s/%s", + spc_de->d_name, TABLESPACE_VERSION_DIRECTORY); + RemovePgTempRelationFiles(temp_path); } FreeDir(spc_dir); @@ -1915,3 +1925,123 @@ RemovePgTempFilesInDir(const char *tmpdirname) FreeDir(temp_dir); } + +/* Process one tablespace directory, look for per-DB subdirectories */ +static void +RemovePgTempRelationFiles(const char *tsdirname) +{ + DIR *ts_dir; + struct dirent *de; + char dbspace_path[MAXPGPATH]; + + ts_dir = AllocateDir(tsdirname); + if (ts_dir == NULL) + { + /* anything except ENOENT is fishy */ + if (errno != ENOENT) + elog(LOG, + "could not open tablespace directory \"%s\": %m", + tsdirname); + return; + } + + while ((de = ReadDir(ts_dir, tsdirname)) != NULL) + { + int i = 0; + + /* + * We're only interested in the per-database directories, which have + * numeric names. Note that this code will also (properly) ignore "." + * and "..". + */ + while (isdigit((unsigned char) de->d_name[i])) + ++i; + if (de->d_name[i] != '\0' || i == 0) + continue; + + snprintf(dbspace_path, sizeof(dbspace_path), "%s/%s", + tsdirname, de->d_name); + RemovePgTempRelationFilesInDbspace(dbspace_path); + } + + FreeDir(ts_dir); +} + +/* Process one per-dbspace directory for RemovePgTempRelationFiles */ +static void +RemovePgTempRelationFilesInDbspace(const char *dbspacedirname) +{ + DIR *dbspace_dir; + struct dirent *de; + char rm_path[MAXPGPATH]; + + dbspace_dir = AllocateDir(dbspacedirname); + if (dbspace_dir == NULL) + { + /* we just saw this directory, so it really ought to be there */ + elog(LOG, + "could not open dbspace directory \"%s\": %m", + dbspacedirname); + return; + } + + while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL) + { + if (!looks_like_temp_rel_name(de->d_name)) + continue; + + snprintf(rm_path, sizeof(rm_path), "%s/%s", + dbspacedirname, de->d_name); + + unlink(rm_path); /* note we ignore any error */ + } + + FreeDir(dbspace_dir); +} + +/* t_, or t__ */ +static bool +looks_like_temp_rel_name(const char *name) +{ + int pos; + int savepos; + + /* Must start with "t". */ + if (name[0] != 't') + return false; + + /* Followed by a non-empty string of digits and then an underscore. */ + for (pos = 1; isdigit((unsigned char) name[pos]); ++pos) + ; + if (pos == 1 || name[pos] != '_') + return false; + + /* Followed by another nonempty string of digits. */ + for (savepos = ++pos; isdigit((unsigned char) name[pos]); ++pos) + ; + if (savepos == pos) + return false; + + /* We might have _forkname or .segment or both. */ + if (name[pos] == '_') + { + int forkchar = forkname_chars(&name[pos+1]); + if (forkchar <= 0) + return false; + pos += forkchar + 1; + } + if (name[pos] == '.') + { + int segchar; + for (segchar = 1; isdigit((unsigned char) name[pos+segchar]); ++segchar) + ; + if (segchar <= 1) + return false; + pos += segchar; + } + + /* Now we should be at the end. */ + if (name[pos] != '\0') + return false; + return true; +} diff --git a/src/backend/storage/freespace/freespace.c b/src/backend/storage/freespace/freespace.c index a872f1e78f..040dd3344c 100644 --- a/src/backend/storage/freespace/freespace.c +++ b/src/backend/storage/freespace/freespace.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/freespace/freespace.c,v 1.77 2010/02/26 02:00:59 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/storage/freespace/freespace.c,v 1.78 2010/08/13 20:10:52 rhaas Exp $ * * * NOTES: @@ -303,7 +303,7 @@ FreeSpaceMapTruncateRel(Relation rel, BlockNumber nblocks) } /* Truncate the unused FSM pages, and send smgr inval message */ - smgrtruncate(rel->rd_smgr, FSM_FORKNUM, new_nfsmblocks, rel->rd_istemp); + smgrtruncate(rel->rd_smgr, FSM_FORKNUM, new_nfsmblocks); /* * We might as well update the local smgr_fsm_nblocks setting. diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c index eb5c73d6f8..f1ff2fe15e 100644 --- a/src/backend/storage/smgr/md.c +++ b/src/backend/storage/smgr/md.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.151 2010/02/26 02:01:01 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.152 2010/08/13 20:10:52 rhaas Exp $ * *------------------------------------------------------------------------- */ @@ -119,7 +119,7 @@ static MemoryContext MdCxt; /* context for all md.c allocations */ */ typedef struct { - RelFileNode rnode; /* the targeted relation */ + RelFileNodeBackend rnode; /* the targeted relation */ ForkNumber forknum; BlockNumber segno; /* which segment */ } PendingOperationTag; @@ -135,7 +135,7 @@ typedef struct typedef struct { - RelFileNode rnode; /* the dead relation to delete */ + RelFileNodeBackend rnode; /* the dead relation to delete */ CycleCtr cycle_ctr; /* mdckpt_cycle_ctr when request was made */ } PendingUnlinkEntry; @@ -158,14 +158,14 @@ static MdfdVec *mdopen(SMgrRelation reln, ForkNumber forknum, ExtensionBehavior behavior); static void register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg); -static void register_unlink(RelFileNode rnode); +static void register_unlink(RelFileNodeBackend rnode); static MdfdVec *_fdvec_alloc(void); static char *_mdfd_segpath(SMgrRelation reln, ForkNumber forknum, BlockNumber segno); static MdfdVec *_mdfd_openseg(SMgrRelation reln, ForkNumber forkno, BlockNumber segno, int oflags); static MdfdVec *_mdfd_getseg(SMgrRelation reln, ForkNumber forkno, - BlockNumber blkno, bool isTemp, ExtensionBehavior behavior); + BlockNumber blkno, bool skipFsync, ExtensionBehavior behavior); static BlockNumber _mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg); @@ -321,7 +321,7 @@ mdcreate(SMgrRelation reln, ForkNumber forkNum, bool isRedo) * we are usually not in a transaction anymore when this is called. */ void -mdunlink(RelFileNode rnode, ForkNumber forkNum, bool isRedo) +mdunlink(RelFileNodeBackend rnode, ForkNumber forkNum, bool isRedo) { char *path; int ret; @@ -417,7 +417,7 @@ mdunlink(RelFileNode rnode, ForkNumber forkNum, bool isRedo) */ void mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, - char *buffer, bool isTemp) + char *buffer, bool skipFsync) { off_t seekpos; int nbytes; @@ -440,7 +440,7 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, relpath(reln->smgr_rnode, forknum), InvalidBlockNumber))); - v = _mdfd_getseg(reln, forknum, blocknum, isTemp, EXTENSION_CREATE); + v = _mdfd_getseg(reln, forknum, blocknum, skipFsync, EXTENSION_CREATE); seekpos = (off_t) BLCKSZ *(blocknum % ((BlockNumber) RELSEG_SIZE)); @@ -478,7 +478,7 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, errhint("Check free disk space."))); } - if (!isTemp) + if (!skipFsync && !SmgrIsTemp(reln)) register_dirty_segment(reln, forknum, v); Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE)); @@ -605,9 +605,10 @@ mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, MdfdVec *v; TRACE_POSTGRESQL_SMGR_MD_READ_START(forknum, blocknum, - reln->smgr_rnode.spcNode, - reln->smgr_rnode.dbNode, - reln->smgr_rnode.relNode); + reln->smgr_rnode.node.spcNode, + reln->smgr_rnode.node.dbNode, + reln->smgr_rnode.node.relNode, + reln->smgr_rnode.backend); v = _mdfd_getseg(reln, forknum, blocknum, false, EXTENSION_FAIL); @@ -624,9 +625,10 @@ mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ); TRACE_POSTGRESQL_SMGR_MD_READ_DONE(forknum, blocknum, - reln->smgr_rnode.spcNode, - reln->smgr_rnode.dbNode, - reln->smgr_rnode.relNode, + reln->smgr_rnode.node.spcNode, + reln->smgr_rnode.node.dbNode, + reln->smgr_rnode.node.relNode, + reln->smgr_rnode.backend, nbytes, BLCKSZ); @@ -666,7 +668,7 @@ mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, */ void mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, - char *buffer, bool isTemp) + char *buffer, bool skipFsync) { off_t seekpos; int nbytes; @@ -678,11 +680,12 @@ mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, #endif TRACE_POSTGRESQL_SMGR_MD_WRITE_START(forknum, blocknum, - reln->smgr_rnode.spcNode, - reln->smgr_rnode.dbNode, - reln->smgr_rnode.relNode); + reln->smgr_rnode.node.spcNode, + reln->smgr_rnode.node.dbNode, + reln->smgr_rnode.node.relNode, + reln->smgr_rnode.backend); - v = _mdfd_getseg(reln, forknum, blocknum, isTemp, EXTENSION_FAIL); + v = _mdfd_getseg(reln, forknum, blocknum, skipFsync, EXTENSION_FAIL); seekpos = (off_t) BLCKSZ *(blocknum % ((BlockNumber) RELSEG_SIZE)); @@ -697,9 +700,10 @@ mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ); TRACE_POSTGRESQL_SMGR_MD_WRITE_DONE(forknum, blocknum, - reln->smgr_rnode.spcNode, - reln->smgr_rnode.dbNode, - reln->smgr_rnode.relNode, + reln->smgr_rnode.node.spcNode, + reln->smgr_rnode.node.dbNode, + reln->smgr_rnode.node.relNode, + reln->smgr_rnode.backend, nbytes, BLCKSZ); @@ -720,7 +724,7 @@ mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, errhint("Check free disk space."))); } - if (!isTemp) + if (!skipFsync && !SmgrIsTemp(reln)) register_dirty_segment(reln, forknum, v); } @@ -794,8 +798,7 @@ mdnblocks(SMgrRelation reln, ForkNumber forknum) * mdtruncate() -- Truncate relation to specified number of blocks. */ void -mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks, - bool isTemp) +mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks) { MdfdVec *v; BlockNumber curnblk; @@ -839,7 +842,7 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks, errmsg("could not truncate file \"%s\": %m", FilePathName(v->mdfd_vfd)))); - if (!isTemp) + if (!SmgrIsTemp(reln)) register_dirty_segment(reln, forknum, v); v = v->mdfd_chain; Assert(ov != reln->md_fd[forknum]); /* we never drop the 1st @@ -864,7 +867,7 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks, errmsg("could not truncate file \"%s\" to %u blocks: %m", FilePathName(v->mdfd_vfd), nblocks))); - if (!isTemp) + if (!SmgrIsTemp(reln)) register_dirty_segment(reln, forknum, v); v = v->mdfd_chain; ov->mdfd_chain = NULL; @@ -1052,7 +1055,8 @@ mdsync(void) * the relation will have been dirtied through this same smgr * relation, and so we can save a file open/close cycle. */ - reln = smgropen(entry->tag.rnode); + reln = smgropen(entry->tag.rnode.node, + entry->tag.rnode.backend); /* * It is possible that the relation has been dropped or @@ -1235,7 +1239,7 @@ register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg) * a remote pending-ops table. */ static void -register_unlink(RelFileNode rnode) +register_unlink(RelFileNodeBackend rnode) { if (pendingOpsTable) { @@ -1278,7 +1282,8 @@ register_unlink(RelFileNode rnode) * structure for them.) */ void -RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno) +RememberFsyncRequest(RelFileNodeBackend rnode, ForkNumber forknum, + BlockNumber segno) { Assert(pendingOpsTable); @@ -1291,7 +1296,7 @@ RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno) hash_seq_init(&hstat, pendingOpsTable); while ((entry = (PendingOperationEntry *) hash_seq_search(&hstat)) != NULL) { - if (RelFileNodeEquals(entry->tag.rnode, rnode) && + if (RelFileNodeBackendEquals(entry->tag.rnode, rnode) && entry->tag.forknum == forknum) { /* Okay, cancel this entry */ @@ -1312,7 +1317,7 @@ RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno) hash_seq_init(&hstat, pendingOpsTable); while ((entry = (PendingOperationEntry *) hash_seq_search(&hstat)) != NULL) { - if (entry->tag.rnode.dbNode == rnode.dbNode) + if (entry->tag.rnode.node.dbNode == rnode.node.dbNode) { /* Okay, cancel this entry */ entry->canceled = true; @@ -1326,7 +1331,7 @@ RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno) PendingUnlinkEntry *entry = (PendingUnlinkEntry *) lfirst(cell); next = lnext(cell); - if (entry->rnode.dbNode == rnode.dbNode) + if (entry->rnode.node.dbNode == rnode.node.dbNode) { pendingUnlinks = list_delete_cell(pendingUnlinks, cell, prev); pfree(entry); @@ -1393,7 +1398,7 @@ RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno) * ForgetRelationFsyncRequests -- forget any fsyncs for a rel */ void -ForgetRelationFsyncRequests(RelFileNode rnode, ForkNumber forknum) +ForgetRelationFsyncRequests(RelFileNodeBackend rnode, ForkNumber forknum) { if (pendingOpsTable) { @@ -1428,11 +1433,12 @@ ForgetRelationFsyncRequests(RelFileNode rnode, ForkNumber forknum) void ForgetDatabaseFsyncRequests(Oid dbid) { - RelFileNode rnode; + RelFileNodeBackend rnode; - rnode.dbNode = dbid; - rnode.spcNode = 0; - rnode.relNode = 0; + rnode.node.dbNode = dbid; + rnode.node.spcNode = 0; + rnode.node.relNode = 0; + rnode.backend = InvalidBackendId; if (pendingOpsTable) { @@ -1523,12 +1529,12 @@ _mdfd_openseg(SMgrRelation reln, ForkNumber forknum, BlockNumber segno, * specified block. * * If the segment doesn't exist, we ereport, return NULL, or create the - * segment, according to "behavior". Note: isTemp need only be correct - * in the EXTENSION_CREATE case. + * segment, according to "behavior". Note: skipFsync is only used in the + * EXTENSION_CREATE case. */ static MdfdVec * _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno, - bool isTemp, ExtensionBehavior behavior) + bool skipFsync, ExtensionBehavior behavior) { MdfdVec *v = mdopen(reln, forknum, behavior); BlockNumber targetseg; @@ -1566,7 +1572,7 @@ _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno, mdextend(reln, forknum, nextsegno * ((BlockNumber) RELSEG_SIZE) - 1, - zerobuf, isTemp); + zerobuf, skipFsync); pfree(zerobuf); } v->mdfd_chain = _mdfd_openseg(reln, forknum, +nextsegno, O_CREAT); diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c index 7a35b0a833..c1d1449222 100644 --- a/src/backend/storage/smgr/smgr.c +++ b/src/backend/storage/smgr/smgr.c @@ -11,7 +11,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.121 2010/02/26 02:01:01 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.122 2010/08/13 20:10:52 rhaas Exp $ * *------------------------------------------------------------------------- */ @@ -45,19 +45,19 @@ typedef struct f_smgr void (*smgr_create) (SMgrRelation reln, ForkNumber forknum, bool isRedo); bool (*smgr_exists) (SMgrRelation reln, ForkNumber forknum); - void (*smgr_unlink) (RelFileNode rnode, ForkNumber forknum, + void (*smgr_unlink) (RelFileNodeBackend rnode, ForkNumber forknum, bool isRedo); void (*smgr_extend) (SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, char *buffer, bool isTemp); + BlockNumber blocknum, char *buffer, bool skipFsync); void (*smgr_prefetch) (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum); void (*smgr_read) (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer); void (*smgr_write) (SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, char *buffer, bool isTemp); + BlockNumber blocknum, char *buffer, bool skipFsync); BlockNumber (*smgr_nblocks) (SMgrRelation reln, ForkNumber forknum); void (*smgr_truncate) (SMgrRelation reln, ForkNumber forknum, - BlockNumber nblocks, bool isTemp); + BlockNumber nblocks); void (*smgr_immedsync) (SMgrRelation reln, ForkNumber forknum); void (*smgr_pre_ckpt) (void); /* may be NULL */ void (*smgr_sync) (void); /* may be NULL */ @@ -83,8 +83,6 @@ static HTAB *SMgrRelationHash = NULL; /* local function prototypes */ static void smgrshutdown(int code, Datum arg); -static void smgr_internal_unlink(RelFileNode rnode, ForkNumber forknum, - int which, bool isTemp, bool isRedo); /* @@ -131,8 +129,9 @@ smgrshutdown(int code, Datum arg) * This does not attempt to actually open the object. */ SMgrRelation -smgropen(RelFileNode rnode) +smgropen(RelFileNode rnode, BackendId backend) { + RelFileNodeBackend brnode; SMgrRelation reln; bool found; @@ -142,7 +141,7 @@ smgropen(RelFileNode rnode) HASHCTL ctl; MemSet(&ctl, 0, sizeof(ctl)); - ctl.keysize = sizeof(RelFileNode); + ctl.keysize = sizeof(RelFileNodeBackend); ctl.entrysize = sizeof(SMgrRelationData); ctl.hash = tag_hash; SMgrRelationHash = hash_create("smgr relation table", 400, @@ -150,8 +149,10 @@ smgropen(RelFileNode rnode) } /* Look up or create an entry */ + brnode.node = rnode; + brnode.backend = backend; reln = (SMgrRelation) hash_search(SMgrRelationHash, - (void *) &rnode, + (void *) &brnode, HASH_ENTER, &found); /* Initialize it if not present before */ @@ -261,7 +262,7 @@ smgrcloseall(void) * such entry exists already. */ void -smgrclosenode(RelFileNode rnode) +smgrclosenode(RelFileNodeBackend rnode) { SMgrRelation reln; @@ -305,8 +306,8 @@ smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo) * should be here and not in commands/tablespace.c? But that would imply * importing a lot of stuff that smgr.c oughtn't know, either. */ - TablespaceCreateDbspace(reln->smgr_rnode.spcNode, - reln->smgr_rnode.dbNode, + TablespaceCreateDbspace(reln->smgr_rnode.node.spcNode, + reln->smgr_rnode.node.dbNode, isRedo); (*(smgrsw[reln->smgr_which].smgr_create)) (reln, forknum, isRedo); @@ -323,29 +324,19 @@ smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo) * already. */ void -smgrdounlink(SMgrRelation reln, ForkNumber forknum, bool isTemp, bool isRedo) +smgrdounlink(SMgrRelation reln, ForkNumber forknum, bool isRedo) { - RelFileNode rnode = reln->smgr_rnode; + RelFileNodeBackend rnode = reln->smgr_rnode; int which = reln->smgr_which; /* Close the fork */ (*(smgrsw[which].smgr_close)) (reln, forknum); - smgr_internal_unlink(rnode, forknum, which, isTemp, isRedo); -} - -/* - * Shared subroutine that actually does the unlink ... - */ -static void -smgr_internal_unlink(RelFileNode rnode, ForkNumber forknum, - int which, bool isTemp, bool isRedo) -{ /* * Get rid of any remaining buffers for the relation. bufmgr will just * drop them without bothering to write the contents. */ - DropRelFileNodeBuffers(rnode, forknum, isTemp, 0); + DropRelFileNodeBuffers(rnode, forknum, 0); /* * It'd be nice to tell the stats collector to forget it immediately, too. @@ -385,10 +376,10 @@ smgr_internal_unlink(RelFileNode rnode, ForkNumber forknum, */ void smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, - char *buffer, bool isTemp) + char *buffer, bool skipFsync) { (*(smgrsw[reln->smgr_which].smgr_extend)) (reln, forknum, blocknum, - buffer, isTemp); + buffer, skipFsync); } /* @@ -426,16 +417,16 @@ smgrread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, * on disk at return, only dumped out to the kernel. However, * provisions will be made to fsync the write before the next checkpoint. * - * isTemp indicates that the relation is a temp table (ie, is managed - * by the local-buffer manager). In this case no provisions need be - * made to fsync the write before checkpointing. + * skipFsync indicates that the caller will make other provisions to + * fsync the relation, so we needn't bother. Temporary relations also + * do not require fsync. */ void smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, - char *buffer, bool isTemp) + char *buffer, bool skipFsync) { (*(smgrsw[reln->smgr_which].smgr_write)) (reln, forknum, blocknum, - buffer, isTemp); + buffer, skipFsync); } /* @@ -455,14 +446,13 @@ smgrnblocks(SMgrRelation reln, ForkNumber forknum) * The truncation is done immediately, so this can't be rolled back. */ void -smgrtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks, - bool isTemp) +smgrtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks) { /* * Get rid of any buffers for the about-to-be-deleted blocks. bufmgr will * just drop them without bothering to write the contents. */ - DropRelFileNodeBuffers(reln->smgr_rnode, forknum, isTemp, nblocks); + DropRelFileNodeBuffers(reln->smgr_rnode, forknum, nblocks); /* * Send a shared-inval message to force other backends to close any smgr @@ -479,8 +469,7 @@ smgrtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks, /* * Do the truncation. */ - (*(smgrsw[reln->smgr_which].smgr_truncate)) (reln, forknum, nblocks, - isTemp); + (*(smgrsw[reln->smgr_which].smgr_truncate)) (reln, forknum, nblocks); } /* @@ -499,7 +488,7 @@ smgrtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks, * to use the WAL log for PITR or replication purposes: in that case * we have to make WAL entries as well.) * - * The preceding writes should specify isTemp = true to avoid + * The preceding writes should specify skipFsync = true to avoid * duplicative fsyncs. * * Note that you need to do FlushRelationBuffers() first if there is diff --git a/src/backend/utils/adt/dbsize.c b/src/backend/utils/adt/dbsize.c index e11c13a9cc..01a4a17915 100644 --- a/src/backend/utils/adt/dbsize.c +++ b/src/backend/utils/adt/dbsize.c @@ -5,7 +5,7 @@ * Copyright (c) 2002-2010, PostgreSQL Global Development Group * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/dbsize.c,v 1.32 2010/08/05 14:45:04 rhaas Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/dbsize.c,v 1.33 2010/08/13 20:10:52 rhaas Exp $ * */ @@ -244,14 +244,14 @@ pg_tablespace_size_name(PG_FUNCTION_ARGS) * calculate size of (one fork of) a relation */ static int64 -calculate_relation_size(RelFileNode *rfn, ForkNumber forknum) +calculate_relation_size(RelFileNode *rfn, BackendId backend, ForkNumber forknum) { int64 totalsize = 0; char *relationpath; char pathname[MAXPGPATH]; unsigned int segcount = 0; - relationpath = relpath(*rfn, forknum); + relationpath = relpathbackend(*rfn, backend, forknum); for (segcount = 0;; segcount++) { @@ -291,7 +291,7 @@ pg_relation_size(PG_FUNCTION_ARGS) rel = relation_open(relOid, AccessShareLock); - size = calculate_relation_size(&(rel->rd_node), + size = calculate_relation_size(&(rel->rd_node), rel->rd_backend, forkname_to_number(text_to_cstring(forkName))); relation_close(rel, AccessShareLock); @@ -315,12 +315,14 @@ calculate_toast_table_size(Oid toastrelid) /* toast heap size, including FSM and VM size */ for (forkNum = 0; forkNum <= MAX_FORKNUM; forkNum++) - size += calculate_relation_size(&(toastRel->rd_node), forkNum); + size += calculate_relation_size(&(toastRel->rd_node), + toastRel->rd_backend, forkNum); /* toast index size, including FSM and VM size */ toastIdxRel = relation_open(toastRel->rd_rel->reltoastidxid, AccessShareLock); for (forkNum = 0; forkNum <= MAX_FORKNUM; forkNum++) - size += calculate_relation_size(&(toastIdxRel->rd_node), forkNum); + size += calculate_relation_size(&(toastIdxRel->rd_node), + toastIdxRel->rd_backend, forkNum); relation_close(toastIdxRel, AccessShareLock); relation_close(toastRel, AccessShareLock); @@ -349,7 +351,8 @@ calculate_table_size(Oid relOid) * heap size, including FSM and VM */ for (forkNum = 0; forkNum <= MAX_FORKNUM; forkNum++) - size += calculate_relation_size(&(rel->rd_node), forkNum); + size += calculate_relation_size(&(rel->rd_node), rel->rd_backend, + forkNum); /* * Size of toast relation @@ -392,7 +395,9 @@ calculate_indexes_size(Oid relOid) idxRel = relation_open(idxOid, AccessShareLock); for (forkNum = 0; forkNum <= MAX_FORKNUM; forkNum++) - size += calculate_relation_size(&(idxRel->rd_node), forkNum); + size += calculate_relation_size(&(idxRel->rd_node), + idxRel->rd_backend, + forkNum); relation_close(idxRel, AccessShareLock); } @@ -563,6 +568,7 @@ pg_relation_filepath(PG_FUNCTION_ARGS) HeapTuple tuple; Form_pg_class relform; RelFileNode rnode; + BackendId backend; char *path; tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid)); @@ -600,12 +606,27 @@ pg_relation_filepath(PG_FUNCTION_ARGS) break; } + if (!OidIsValid(rnode.relNode)) + { + ReleaseSysCache(tuple); + PG_RETURN_NULL(); + } + + /* If temporary, determine owning backend. */ + if (!relform->relistemp) + backend = InvalidBackendId; + else if (isTempOrToastNamespace(relform->relnamespace)) + backend = MyBackendId; + else + { + /* Do it the hard way. */ + backend = GetTempNamespaceBackendId(relform->relnamespace); + Assert(backend != InvalidBackendId); + } + ReleaseSysCache(tuple); - if (!OidIsValid(rnode.relNode)) - PG_RETURN_NULL(); - - path = relpath(rnode, MAIN_FORKNUM); + path = relpathbackend(rnode, backend, MAIN_FORKNUM); PG_RETURN_TEXT_P(cstring_to_text(path)); } diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c index 7a67f4a85e..1490483922 100644 --- a/src/backend/utils/cache/inval.c +++ b/src/backend/utils/cache/inval.c @@ -80,7 +80,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/cache/inval.c,v 1.98 2010/02/26 02:01:11 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/cache/inval.c,v 1.99 2010/08/13 20:10:52 rhaas Exp $ * *------------------------------------------------------------------------- */ @@ -319,7 +319,8 @@ AddCatcacheInvalidationMessage(InvalidationListHeader *hdr, { SharedInvalidationMessage msg; - msg.cc.id = (int16) id; + Assert(id < CHAR_MAX); + msg.cc.id = (int8) id; msg.cc.tuplePtr = *tuplePtr; msg.cc.dbId = dbId; msg.cc.hashValue = hashValue; @@ -513,7 +514,10 @@ LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg) * We could have smgr entries for relations of other databases, so no * short-circuit test is possible here. */ - smgrclosenode(msg->sm.rnode); + RelFileNodeBackend rnode; + rnode.node = msg->sm.rnode; + rnode.backend = (msg->sm.backend_hi << 16) | (int) msg->sm.backend_lo; + smgrclosenode(rnode); } else if (msg->id == SHAREDINVALRELMAP_ID) { @@ -1163,14 +1167,20 @@ CacheInvalidateRelcacheByRelid(Oid relid) * in commit/abort WAL entries. Instead, calls to CacheInvalidateSmgr() * should happen in low-level smgr.c routines, which are executed while * replaying WAL as well as when creating it. + * + * Note: In order to avoid bloating SharedInvalidationMessage, we store only + * three bytes of the backend ID using what would otherwise be padding space. + * Thus, the maximum possible backend ID is 2^23-1. */ void -CacheInvalidateSmgr(RelFileNode rnode) +CacheInvalidateSmgr(RelFileNodeBackend rnode) { SharedInvalidationMessage msg; msg.sm.id = SHAREDINVALSMGR_ID; - msg.sm.rnode = rnode; + msg.sm.backend_hi = rnode.backend >> 16; + msg.sm.backend_lo = rnode.backend & 0xffff; + msg.sm.rnode = rnode.node; SendSharedInvalidMessages(&msg, 1); } diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index f4304bce72..166beb25b1 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/cache/relcache.c,v 1.311 2010/07/06 19:18:58 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/cache/relcache.c,v 1.312 2010/08/13 20:10:52 rhaas Exp $ * *------------------------------------------------------------------------- */ @@ -858,10 +858,20 @@ RelationBuildDesc(Oid targetRelId, bool insertIt) relation->rd_createSubid = InvalidSubTransactionId; relation->rd_newRelfilenodeSubid = InvalidSubTransactionId; relation->rd_istemp = relation->rd_rel->relistemp; - if (relation->rd_istemp) - relation->rd_islocaltemp = isTempOrToastNamespace(relation->rd_rel->relnamespace); + if (!relation->rd_istemp) + relation->rd_backend = InvalidBackendId; + else if (isTempOrToastNamespace(relation->rd_rel->relnamespace)) + relation->rd_backend = MyBackendId; else - relation->rd_islocaltemp = false; + { + /* + * If it's a temporary table, but not one of ours, we have to use + * the slow, grotty method to figure out the owning backend. + */ + relation->rd_backend = + GetTempNamespaceBackendId(relation->rd_rel->relnamespace); + Assert(relation->rd_backend != InvalidBackendId); + } /* * initialize the tuple descriptor (relation->rd_att). @@ -1424,7 +1434,7 @@ formrdesc(const char *relationName, Oid relationReltype, relation->rd_createSubid = InvalidSubTransactionId; relation->rd_newRelfilenodeSubid = InvalidSubTransactionId; relation->rd_istemp = false; - relation->rd_islocaltemp = false; + relation->rd_backend = InvalidBackendId; /* * initialize relation tuple form @@ -2515,7 +2525,7 @@ RelationBuildLocalRelation(const char *relname, /* it is temporary if and only if it is in my temp-table namespace */ rel->rd_istemp = isTempOrToastNamespace(relnamespace); - rel->rd_islocaltemp = rel->rd_istemp; + rel->rd_backend = rel->rd_istemp ? MyBackendId : InvalidBackendId; /* * create a new tuple descriptor from the one passed in. We do this @@ -2629,7 +2639,7 @@ void RelationSetNewRelfilenode(Relation relation, TransactionId freezeXid) { Oid newrelfilenode; - RelFileNode newrnode; + RelFileNodeBackend newrnode; Relation pg_class; HeapTuple tuple; Form_pg_class classform; @@ -2640,7 +2650,8 @@ RelationSetNewRelfilenode(Relation relation, TransactionId freezeXid) TransactionIdIsNormal(freezeXid)); /* Allocate a new relfilenode */ - newrelfilenode = GetNewRelFileNode(relation->rd_rel->reltablespace, NULL); + newrelfilenode = GetNewRelFileNode(relation->rd_rel->reltablespace, NULL, + relation->rd_backend); /* * Get a writable copy of the pg_class tuple for the given relation. @@ -2660,9 +2671,10 @@ RelationSetNewRelfilenode(Relation relation, TransactionId freezeXid) * NOTE: any conflict in relfilenode value will be caught here, if * GetNewRelFileNode messes up for any reason. */ - newrnode = relation->rd_node; - newrnode.relNode = newrelfilenode; - RelationCreateStorage(newrnode, relation->rd_istemp); + newrnode.node = relation->rd_node; + newrnode.node.relNode = newrelfilenode; + newrnode.backend = relation->rd_backend; + RelationCreateStorage(newrnode.node, relation->rd_istemp); smgrclosenode(newrnode); /* diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 97ed5b7247..dac704ee4c 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -10,7 +10,7 @@ * Written by Peter Eisentraut . * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.566 2010/08/06 14:51:33 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.567 2010/08/13 20:10:53 rhaas Exp $ * *-------------------------------------------------------------------- */ @@ -96,6 +96,16 @@ #define MAX_KILOBYTES (INT_MAX / 1024) #endif +/* + * Note: MAX_BACKENDS is limited to 2^23-1 because inval.c stores the + * backend ID as a 3-byte signed integer. Even if that limitation were + * removed, we still could not exceed INT_MAX/4 because some places compute + * 4*MaxBackends without any overflow check. This is rechecked in + * assign_maxconnections, since MaxBackends is computed as MaxConnections + * plus autovacuum_max_workers plus one (for the autovacuum launcher). + */ +#define MAX_BACKENDS 0x7fffff + #define KB_PER_MB (1024) #define KB_PER_GB (1024*1024) @@ -1414,23 +1424,13 @@ static struct config_int ConfigureNamesInt[] = 30 * 1000, -1, INT_MAX / 1000, NULL, NULL }, - /* - * Note: MaxBackends is limited to INT_MAX/4 because some places compute - * 4*MaxBackends without any overflow check. This check is made in - * assign_maxconnections, since MaxBackends is computed as MaxConnections - * plus autovacuum_max_workers plus one (for the autovacuum launcher). - * - * Likewise we have to limit NBuffers to INT_MAX/2. - * - * See also CheckRequiredParameterValues() if this parameter changes - */ { {"max_connections", PGC_POSTMASTER, CONN_AUTH_SETTINGS, gettext_noop("Sets the maximum number of concurrent connections."), NULL }, &MaxConnections, - 100, 1, INT_MAX / 4, assign_maxconnections, NULL + 100, 1, MAX_BACKENDS, assign_maxconnections, NULL }, { @@ -1439,9 +1439,13 @@ static struct config_int ConfigureNamesInt[] = NULL }, &ReservedBackends, - 3, 0, INT_MAX / 4, NULL, NULL + 3, 0, MAX_BACKENDS, NULL, NULL }, + /* + * We sometimes multiply the number of shared buffers by two without + * checking for overflow, so we mustn't allow more than INT_MAX / 2. + */ { {"shared_buffers", PGC_POSTMASTER, RESOURCES_MEM, gettext_noop("Sets the number of shared memory buffers used by the server."), @@ -1618,7 +1622,7 @@ static struct config_int ConfigureNamesInt[] = NULL }, &max_prepared_xacts, - 0, 0, INT_MAX / 4, NULL, NULL + 0, 0, MAX_BACKENDS, NULL, NULL }, #ifdef LOCK_DEBUG @@ -1782,7 +1786,7 @@ static struct config_int ConfigureNamesInt[] = NULL }, &max_wal_senders, - 0, 0, INT_MAX / 4, NULL, NULL + 0, 0, MAX_BACKENDS, NULL, NULL }, { @@ -2022,7 +2026,7 @@ static struct config_int ConfigureNamesInt[] = NULL }, &autovacuum_max_workers, - 3, 1, INT_MAX / 4, assign_autovacuum_max_workers, NULL + 3, 1, MAX_BACKENDS, assign_autovacuum_max_workers, NULL }, { @@ -7995,7 +7999,7 @@ show_tcp_keepalives_count(void) static bool assign_maxconnections(int newval, bool doit, GucSource source) { - if (newval + autovacuum_max_workers + 1 > INT_MAX / 4) + if (newval + autovacuum_max_workers + 1 > MAX_BACKENDS) return false; if (doit) @@ -8007,7 +8011,7 @@ assign_maxconnections(int newval, bool doit, GucSource source) static bool assign_autovacuum_max_workers(int newval, bool doit, GucSource source) { - if (MaxConnections + newval + 1 > INT_MAX / 4) + if (MaxConnections + newval + 1 > MAX_BACKENDS) return false; if (doit) diff --git a/src/backend/utils/probes.d b/src/backend/utils/probes.d index 2874bf5141..2ea6b7798f 100644 --- a/src/backend/utils/probes.d +++ b/src/backend/utils/probes.d @@ -3,7 +3,7 @@ * * Copyright (c) 2006-2010, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/backend/utils/probes.d,v 1.12 2010/01/02 16:57:53 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/probes.d,v 1.13 2010/08/13 20:10:52 rhaas Exp $ * ---------- */ @@ -55,7 +55,7 @@ provider postgresql { probe sort__done(bool, long); probe buffer__read__start(ForkNumber, BlockNumber, Oid, Oid, Oid, bool, bool); - probe buffer__read__done(ForkNumber, BlockNumber, Oid, Oid, Oid, bool, bool, bool); + probe buffer__read__done(ForkNumber, BlockNumber, Oid, Oid, Oid, int, bool, bool); probe buffer__flush__start(ForkNumber, BlockNumber, Oid, Oid, Oid); probe buffer__flush__done(ForkNumber, BlockNumber, Oid, Oid, Oid); @@ -81,10 +81,10 @@ provider postgresql { probe twophase__checkpoint__start(); probe twophase__checkpoint__done(); - probe smgr__md__read__start(ForkNumber, BlockNumber, Oid, Oid, Oid); - probe smgr__md__read__done(ForkNumber, BlockNumber, Oid, Oid, Oid, int, int); - probe smgr__md__write__start(ForkNumber, BlockNumber, Oid, Oid, Oid); - probe smgr__md__write__done(ForkNumber, BlockNumber, Oid, Oid, Oid, int, int); + probe smgr__md__read__start(ForkNumber, BlockNumber, Oid, Oid, Oid, int); + probe smgr__md__read__done(ForkNumber, BlockNumber, Oid, Oid, Oid, int, int, int); + probe smgr__md__write__start(ForkNumber, BlockNumber, Oid, Oid, Oid, int); + probe smgr__md__write__done(ForkNumber, BlockNumber, Oid, Oid, Oid, int, int, int); probe xlog__insert(unsigned char, unsigned char); probe xlog__switch(); diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h index 3f0930f395..f026728dd1 100644 --- a/src/include/access/xlog_internal.h +++ b/src/include/access/xlog_internal.h @@ -11,7 +11,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/xlog_internal.h,v 1.33 2010/04/28 16:10:43 heikki Exp $ + * $PostgreSQL: pgsql/src/include/access/xlog_internal.h,v 1.34 2010/08/13 20:10:53 rhaas Exp $ */ #ifndef XLOG_INTERNAL_H #define XLOG_INTERNAL_H @@ -71,7 +71,7 @@ typedef struct XLogContRecord /* * Each page of XLOG file has a header like this: */ -#define XLOG_PAGE_MAGIC 0xD064 /* can be used as WAL version indicator */ +#define XLOG_PAGE_MAGIC 0xD065 /* can be used as WAL version indicator */ typedef struct XLogPageHeaderData { diff --git a/src/include/catalog/catalog.h b/src/include/catalog/catalog.h index ccbb5a1b28..6ba729a251 100644 --- a/src/include/catalog/catalog.h +++ b/src/include/catalog/catalog.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/catalog/catalog.h,v 1.49 2010/02/26 02:01:21 momjian Exp $ + * $PostgreSQL: pgsql/src/include/catalog/catalog.h,v 1.50 2010/08/13 20:10:53 rhaas Exp $ * *------------------------------------------------------------------------- */ @@ -25,10 +25,20 @@ extern const char *forkNames[]; extern ForkNumber forkname_to_number(char *forkName); +extern int forkname_chars(const char *str); -extern char *relpath(RelFileNode rnode, ForkNumber forknum); +extern char *relpathbackend(RelFileNode rnode, BackendId backend, + ForkNumber forknum); extern char *GetDatabasePath(Oid dbNode, Oid spcNode); +/* First argument is a RelFileNodeBackend */ +#define relpath(rnode, forknum) \ + relpathbackend((rnode).node, (rnode).backend, (forknum)) + +/* First argument is a RelFileNode */ +#define relpathperm(rnode, forknum) \ + relpathbackend((rnode), InvalidBackendId, (forknum)) + extern bool IsSystemRelation(Relation relation); extern bool IsToastRelation(Relation relation); @@ -45,6 +55,7 @@ extern bool IsSharedRelation(Oid relationId); extern Oid GetNewOid(Relation relation); extern Oid GetNewOidWithIndex(Relation relation, Oid indexId, AttrNumber oidcolumn); -extern Oid GetNewRelFileNode(Oid reltablespace, Relation pg_class); +extern Oid GetNewRelFileNode(Oid reltablespace, Relation pg_class, + BackendId backend); #endif /* CATALOG_H */ diff --git a/src/include/catalog/storage.h b/src/include/catalog/storage.h index f86cf9bbf5..8449a7775e 100644 --- a/src/include/catalog/storage.h +++ b/src/include/catalog/storage.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/catalog/storage.h,v 1.5 2010/02/07 20:48:13 tgl Exp $ + * $PostgreSQL: pgsql/src/include/catalog/storage.h,v 1.6 2010/08/13 20:10:53 rhaas Exp $ * *------------------------------------------------------------------------- */ @@ -30,8 +30,7 @@ extern void RelationTruncate(Relation rel, BlockNumber nblocks); * naming */ extern void smgrDoPendingDeletes(bool isCommit); -extern int smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr, - bool *haveNonTemp); +extern int smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr); extern void AtSubCommit_smgr(void); extern void AtSubAbort_smgr(void); extern void PostPrepare_smgr(void); diff --git a/src/include/postmaster/bgwriter.h b/src/include/postmaster/bgwriter.h index a72e31724c..e4ec6ad5b0 100644 --- a/src/include/postmaster/bgwriter.h +++ b/src/include/postmaster/bgwriter.h @@ -5,7 +5,7 @@ * * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/include/postmaster/bgwriter.h,v 1.15 2010/01/02 16:58:08 momjian Exp $ + * $PostgreSQL: pgsql/src/include/postmaster/bgwriter.h,v 1.16 2010/08/13 20:10:53 rhaas Exp $ * *------------------------------------------------------------------------- */ @@ -27,7 +27,7 @@ extern void BackgroundWriterMain(void); extern void RequestCheckpoint(int flags); extern void CheckpointWriteDelay(int flags, double progress); -extern bool ForwardFsyncRequest(RelFileNode rnode, ForkNumber forknum, +extern bool ForwardFsyncRequest(RelFileNodeBackend rnode, ForkNumber forknum, BlockNumber segno); extern void AbsorbFsyncRequests(void); diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index dc4376ee9a..68416ee1b5 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/storage/bufmgr.h,v 1.124 2010/01/23 16:37:12 sriggs Exp $ + * $PostgreSQL: pgsql/src/include/storage/bufmgr.h,v 1.125 2010/08/13 20:10:53 rhaas Exp $ * *------------------------------------------------------------------------- */ @@ -160,7 +160,7 @@ extern Buffer ReadBuffer(Relation reln, BlockNumber blockNum); extern Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy); -extern Buffer ReadBufferWithoutRelcache(RelFileNode rnode, bool isTemp, +extern Buffer ReadBufferWithoutRelcache(RelFileNode rnode, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy); extern void ReleaseBuffer(Buffer buffer); @@ -180,8 +180,8 @@ extern BlockNumber BufferGetBlockNumber(Buffer buffer); extern BlockNumber RelationGetNumberOfBlocks(Relation relation); extern void FlushRelationBuffers(Relation rel); extern void FlushDatabaseBuffers(Oid dbid); -extern void DropRelFileNodeBuffers(RelFileNode rnode, ForkNumber forkNum, - bool istemp, BlockNumber firstDelBlock); +extern void DropRelFileNodeBuffers(RelFileNodeBackend rnode, + ForkNumber forkNum, BlockNumber firstDelBlock); extern void DropDatabaseBuffers(Oid dbid); #ifdef NOT_USED diff --git a/src/include/storage/relfilenode.h b/src/include/storage/relfilenode.h index b5e4e1134d..9bf170b2c8 100644 --- a/src/include/storage/relfilenode.h +++ b/src/include/storage/relfilenode.h @@ -7,13 +7,15 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/storage/relfilenode.h,v 1.25 2010/02/07 20:48:13 tgl Exp $ + * $PostgreSQL: pgsql/src/include/storage/relfilenode.h,v 1.26 2010/08/13 20:10:53 rhaas Exp $ * *------------------------------------------------------------------------- */ #ifndef RELFILENODE_H #define RELFILENODE_H +#include "storage/backendid.h" + /* * The physical storage of a relation consists of one or more forks. The * main fork is always created, but in addition to that there can be @@ -37,7 +39,8 @@ typedef enum ForkNumber /* * RelFileNode must provide all that we need to know to physically access - * a relation. Note, however, that a "physical" relation is comprised of + * a relation, with the exception of the backend ID, which can be provided + * separately. Note, however, that a "physical" relation is comprised of * multiple files on the filesystem, as each fork is stored as a separate * file, and each fork can be divided into multiple segments. See md.c. * @@ -74,14 +77,30 @@ typedef struct RelFileNode } RelFileNode; /* - * Note: RelFileNodeEquals compares relNode first since that is most likely - * to be different in two unequal RelFileNodes. It is probably redundant - * to compare spcNode if the other two fields are found equal, but do it - * anyway to be sure. + * Augmenting a relfilenode with the backend ID provides all the information + * we need to locate the physical storage. + */ +typedef struct RelFileNodeBackend +{ + RelFileNode node; + BackendId backend; +} RelFileNodeBackend; + +/* + * Note: RelFileNodeEquals and RelFileNodeBackendEquals compare relNode first + * since that is most likely to be different in two unequal RelFileNodes. It + * is probably redundant to compare spcNode if the other fields are found equal, + * but do it anyway to be sure. */ #define RelFileNodeEquals(node1, node2) \ ((node1).relNode == (node2).relNode && \ (node1).dbNode == (node2).dbNode && \ (node1).spcNode == (node2).spcNode) +#define RelFileNodeBackendEquals(node1, node2) \ + ((node1).node.relNode == (node2).node.relNode && \ + (node1).node.dbNode == (node2).node.dbNode && \ + (node1).backend == (node2).backend && \ + (node1).node.spcNode == (node2).node.spcNode) + #endif /* RELFILENODE_H */ diff --git a/src/include/storage/sinval.h b/src/include/storage/sinval.h index 864a28fde8..b35fe7f1fb 100644 --- a/src/include/storage/sinval.h +++ b/src/include/storage/sinval.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/storage/sinval.h,v 1.59 2010/02/26 02:01:28 momjian Exp $ + * $PostgreSQL: pgsql/src/include/storage/sinval.h,v 1.60 2010/08/13 20:10:53 rhaas Exp $ * *------------------------------------------------------------------------- */ @@ -26,7 +26,7 @@ * * invalidate an smgr cache entry for a specific physical relation * * invalidate the mapped-relation mapping for a given database * More types could be added if needed. The message type is identified by - * the first "int16" field of the message struct. Zero or positive means a + * the first "int8" field of the message struct. Zero or positive means a * specific-catcache inval message (and also serves as the catcache ID field). * Negative values identify the other message types, as per codes below. * @@ -63,7 +63,7 @@ typedef struct { /* note: field layout chosen with an eye to alignment concerns */ - int16 id; /* cache ID --- must be first */ + int8 id; /* cache ID --- must be first */ ItemPointerData tuplePtr; /* tuple identifier in cached relation */ Oid dbId; /* database ID, or 0 if a shared relation */ uint32 hashValue; /* hash value of key for this catcache */ @@ -73,7 +73,7 @@ typedef struct typedef struct { - int16 id; /* type field --- must be first */ + int8 id; /* type field --- must be first */ Oid dbId; /* database ID, or 0 if a shared catalog */ Oid catId; /* ID of catalog whose contents are invalid */ } SharedInvalCatalogMsg; @@ -82,7 +82,7 @@ typedef struct typedef struct { - int16 id; /* type field --- must be first */ + int8 id; /* type field --- must be first */ Oid dbId; /* database ID, or 0 if a shared relation */ Oid relId; /* relation ID */ } SharedInvalRelcacheMsg; @@ -91,21 +91,23 @@ typedef struct typedef struct { - int16 id; /* type field --- must be first */ - RelFileNode rnode; /* physical file ID */ + int8 id; /* type field --- must be first */ + int8 backend_hi; /* high bits of backend ID, if temprel */ + uint16 backend_lo; /* low bits of backend ID, if temprel */ + RelFileNode rnode; /* spcNode, dbNode, relNode */ } SharedInvalSmgrMsg; #define SHAREDINVALRELMAP_ID (-4) typedef struct { - int16 id; /* type field --- must be first */ + int8 id; /* type field --- must be first */ Oid dbId; /* database ID, or 0 for shared catalogs */ } SharedInvalRelmapMsg; typedef union { - int16 id; /* type field --- must be first */ + int8 id; /* type field --- must be first */ SharedInvalCatcacheMsg cc; SharedInvalCatalogMsg cat; SharedInvalRelcacheMsg rc; diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h index c037190b4b..55028556fa 100644 --- a/src/include/storage/smgr.h +++ b/src/include/storage/smgr.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.71 2010/02/26 02:01:28 momjian Exp $ + * $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.72 2010/08/13 20:10:53 rhaas Exp $ * *------------------------------------------------------------------------- */ @@ -16,6 +16,7 @@ #include "access/xlog.h" #include "fmgr.h" +#include "storage/backendid.h" #include "storage/block.h" #include "storage/relfilenode.h" @@ -38,7 +39,7 @@ typedef struct SMgrRelationData { /* rnode is the hashtable lookup key, so it must be first! */ - RelFileNode smgr_rnode; /* relation physical identifier */ + RelFileNodeBackend smgr_rnode; /* relation physical identifier */ /* pointer to owning pointer, or NULL if none */ struct SMgrRelationData **smgr_owner; @@ -68,28 +69,30 @@ typedef struct SMgrRelationData typedef SMgrRelationData *SMgrRelation; +#define SmgrIsTemp(smgr) \ + ((smgr)->smgr_rnode.backend != InvalidBackendId) extern void smgrinit(void); -extern SMgrRelation smgropen(RelFileNode rnode); +extern SMgrRelation smgropen(RelFileNode rnode, BackendId backend); extern bool smgrexists(SMgrRelation reln, ForkNumber forknum); extern void smgrsetowner(SMgrRelation *owner, SMgrRelation reln); extern void smgrclose(SMgrRelation reln); extern void smgrcloseall(void); -extern void smgrclosenode(RelFileNode rnode); +extern void smgrclosenode(RelFileNodeBackend rnode); extern void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo); extern void smgrdounlink(SMgrRelation reln, ForkNumber forknum, - bool isTemp, bool isRedo); + bool isRedo); extern void smgrextend(SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, char *buffer, bool isTemp); + BlockNumber blocknum, char *buffer, bool skipFsync); extern void smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum); extern void smgrread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer); extern void smgrwrite(SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, char *buffer, bool isTemp); + BlockNumber blocknum, char *buffer, bool skipFsync); extern BlockNumber smgrnblocks(SMgrRelation reln, ForkNumber forknum); extern void smgrtruncate(SMgrRelation reln, ForkNumber forknum, - BlockNumber nblocks, bool isTemp); + BlockNumber nblocks); extern void smgrimmedsync(SMgrRelation reln, ForkNumber forknum); extern void smgrpreckpt(void); extern void smgrsync(void); @@ -103,27 +106,28 @@ extern void mdinit(void); extern void mdclose(SMgrRelation reln, ForkNumber forknum); extern void mdcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo); extern bool mdexists(SMgrRelation reln, ForkNumber forknum); -extern void mdunlink(RelFileNode rnode, ForkNumber forknum, bool isRedo); +extern void mdunlink(RelFileNodeBackend rnode, ForkNumber forknum, bool isRedo); extern void mdextend(SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, char *buffer, bool isTemp); + BlockNumber blocknum, char *buffer, bool skipFsync); extern void mdprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum); extern void mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer); extern void mdwrite(SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, char *buffer, bool isTemp); + BlockNumber blocknum, char *buffer, bool skipFsync); extern BlockNumber mdnblocks(SMgrRelation reln, ForkNumber forknum); extern void mdtruncate(SMgrRelation reln, ForkNumber forknum, - BlockNumber nblocks, bool isTemp); + BlockNumber nblocks); extern void mdimmedsync(SMgrRelation reln, ForkNumber forknum); extern void mdpreckpt(void); extern void mdsync(void); extern void mdpostckpt(void); extern void SetForwardFsyncRequests(void); -extern void RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum, +extern void RememberFsyncRequest(RelFileNodeBackend rnode, ForkNumber forknum, BlockNumber segno); -extern void ForgetRelationFsyncRequests(RelFileNode rnode, ForkNumber forknum); +extern void ForgetRelationFsyncRequests(RelFileNodeBackend rnode, + ForkNumber forknum); extern void ForgetDatabaseFsyncRequests(Oid dbid); /* smgrtype.c */ diff --git a/src/include/utils/inval.h b/src/include/utils/inval.h index 8fe710d718..328f73c543 100644 --- a/src/include/utils/inval.h +++ b/src/include/utils/inval.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/utils/inval.h,v 1.49 2010/02/08 04:33:55 tgl Exp $ + * $PostgreSQL: pgsql/src/include/utils/inval.h,v 1.50 2010/08/13 20:10:54 rhaas Exp $ * *------------------------------------------------------------------------- */ @@ -49,7 +49,7 @@ extern void CacheInvalidateRelcacheByTuple(HeapTuple classTuple); extern void CacheInvalidateRelcacheByRelid(Oid relid); -extern void CacheInvalidateSmgr(RelFileNode rnode); +extern void CacheInvalidateSmgr(RelFileNodeBackend rnode); extern void CacheInvalidateRelmap(Oid databaseId); diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index a0a9b301c4..b615f81c65 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/utils/rel.h,v 1.124 2010/02/26 02:01:29 momjian Exp $ + * $PostgreSQL: pgsql/src/include/utils/rel.h,v 1.125 2010/08/13 20:10:54 rhaas Exp $ * *------------------------------------------------------------------------- */ @@ -126,8 +126,8 @@ typedef struct RelationData /* use "struct" here to avoid needing to include smgr.h: */ struct SMgrRelationData *rd_smgr; /* cached file handle, or NULL */ int rd_refcnt; /* reference count */ + BackendId rd_backend; /* owning backend id, if temporary relation */ bool rd_istemp; /* rel is a temporary relation */ - bool rd_islocaltemp; /* rel is a temp rel of this session */ bool rd_isnailed; /* rel is nailed in cache */ bool rd_isvalid; /* relcache entry is valid */ char rd_indexvalid; /* state of rd_indexlist: 0 = not valid, 1 = @@ -347,7 +347,7 @@ typedef struct StdRdOptions #define RelationOpenSmgr(relation) \ do { \ if ((relation)->rd_smgr == NULL) \ - smgrsetowner(&((relation)->rd_smgr), smgropen((relation)->rd_node)); \ + smgrsetowner(&((relation)->rd_smgr), smgropen((relation)->rd_node, (relation)->rd_backend)); \ } while (0) /* @@ -393,7 +393,7 @@ typedef struct StdRdOptions * Beware of multiple eval of argument */ #define RELATION_IS_LOCAL(relation) \ - ((relation)->rd_islocaltemp || \ + ((relation)->rd_backend == MyBackendId || \ (relation)->rd_createSubid != InvalidSubTransactionId) /* @@ -403,7 +403,7 @@ typedef struct StdRdOptions * Beware of multiple eval of argument */ #define RELATION_IS_OTHER_TEMP(relation) \ - ((relation)->rd_istemp && !(relation)->rd_islocaltemp) + ((relation)->rd_istemp && (relation)->rd_backend != MyBackendId) /* routines in utils/cache/relcache.c */ extern void RelationIncrementReferenceCount(Relation rel);