From debcec7dc31a992703911a9953e299c8d730c778 Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Fri, 13 Aug 2010 20:10:54 +0000 Subject: [PATCH] Include the backend ID in the relpath of temporary relations. This allows us to reliably remove all leftover temporary relation files on cluster startup without reference to system catalogs or WAL; therefore, we no longer include temporary relations in XLOG_XACT_COMMIT and XLOG_XACT_ABORT WAL records. Since these changes require including a backend ID in each SharedInvalSmgrMsg, the size of the SharedInvalidationMessage.id field has been reduced from two bytes to one, and the maximum number of connections has been reduced from INT_MAX / 4 to 2^23-1. It would be possible to remove these restrictions by increasing the size of SharedInvalidationMessage by 4 bytes, but right now that doesn't seem like a good trade-off. Review by Jaime Casanova and Tom Lane. --- doc/src/sgml/storage.sgml | 26 +++-- src/backend/access/heap/visibilitymap.c | 5 +- src/backend/access/nbtree/nbtsort.c | 7 +- src/backend/access/transam/twophase.c | 10 +- src/backend/access/transam/xact.c | 21 ++-- src/backend/access/transam/xlogutils.c | 16 +-- src/backend/catalog/catalog.c | 127 +++++++++++++++----- src/backend/catalog/heap.c | 7 +- src/backend/catalog/index.c | 9 +- src/backend/catalog/namespace.c | 8 +- src/backend/catalog/storage.c | 57 ++++----- src/backend/catalog/toasting.c | 4 +- src/backend/commands/copy.c | 4 +- src/backend/commands/sequence.c | 6 +- src/backend/commands/tablecmds.c | 8 +- src/backend/postmaster/bgwriter.c | 7 +- src/backend/storage/buffer/bufmgr.c | 87 +++++++++----- src/backend/storage/buffer/localbuf.c | 18 +-- src/backend/storage/file/fd.c | 136 +++++++++++++++++++++- src/backend/storage/freespace/freespace.c | 4 +- src/backend/storage/smgr/md.c | 92 ++++++++------- src/backend/storage/smgr/smgr.c | 67 +++++------ src/backend/utils/adt/dbsize.c | 45 +++++-- src/backend/utils/cache/inval.c | 20 +++- src/backend/utils/cache/relcache.c | 34 ++++-- src/backend/utils/misc/guc.c | 40 ++++--- src/backend/utils/probes.d | 12 +- src/include/access/xlog_internal.h | 4 +- src/include/catalog/catalog.h | 17 ++- src/include/catalog/storage.h | 5 +- src/include/postmaster/bgwriter.h | 4 +- src/include/storage/bufmgr.h | 8 +- src/include/storage/relfilenode.h | 31 ++++- src/include/storage/sinval.h | 20 ++-- src/include/storage/smgr.h | 32 ++--- src/include/utils/inval.h | 4 +- src/include/utils/rel.h | 10 +- 37 files changed, 669 insertions(+), 343 deletions(-) diff --git a/doc/src/sgml/storage.sgml b/doc/src/sgml/storage.sgml index c4b38ddb6c..46bb03432d 100644 --- a/doc/src/sgml/storage.sgml +++ b/doc/src/sgml/storage.sgml @@ -1,4 +1,4 @@ - + @@ -133,16 +133,20 @@ there. -Each table and index is stored in a separate file, named after the table -or index's filenode number, which can be found in -pg_class.relfilenode. In addition to the -main file (a/k/a main fork), each table and index has a free space -map (see ), which stores information about free -space available in the relation. The free space map is stored in a file named -with the filenode number plus the suffix _fsm. Tables also have a -visibility map, stored in a fork with the suffix -_vm, to track which pages are known to have no dead tuples. -The visibility map is described further in . +Each table and index is stored in a separate file. For ordinary relations, +these files are named after the table or index's filenode number, +which can be found in pg_class.relfilenode. But +for temporary relations, the file name is of the form +tBBB_FFF, where BBB +is the backend ID of the backend which created the file, and FFF +is the filenode number. In either case, in addition to the main file (a/k/a +main fork), each table and index has a free space map (see ), which stores information about free space available in +the relation. The free space map is stored in a file named with the filenode +number plus the suffix _fsm. Tables also have a +visibility map, stored in a fork with the suffix _vm, +to track which pages are known to have no dead tuples. The visibility map is +described further in . diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c index 88a0c74e32..9bc65acae5 100644 --- a/src/backend/access/heap/visibilitymap.c +++ b/src/backend/access/heap/visibilitymap.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/heap/visibilitymap.c,v 1.10 2010/04/23 23:21:44 rhaas Exp $ + * $PostgreSQL: pgsql/src/backend/access/heap/visibilitymap.c,v 1.11 2010/08/13 20:10:50 rhaas Exp $ * * INTERFACE ROUTINES * visibilitymap_clear - clear a bit in the visibility map @@ -373,8 +373,7 @@ visibilitymap_truncate(Relation rel, BlockNumber nheapblocks) } /* Truncate the unused VM pages, and send smgr inval message */ - smgrtruncate(rel->rd_smgr, VISIBILITYMAP_FORKNUM, newnblocks, - rel->rd_istemp); + smgrtruncate(rel->rd_smgr, VISIBILITYMAP_FORKNUM, newnblocks); /* * We might as well update the local smgr_vm_nblocks setting. smgrtruncate diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c index 15964e127e..e7048e7211 100644 --- a/src/backend/access/nbtree/nbtsort.c +++ b/src/backend/access/nbtree/nbtsort.c @@ -59,7 +59,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.125 2010/04/28 16:10:40 heikki Exp $ + * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.126 2010/08/13 20:10:50 rhaas Exp $ * *------------------------------------------------------------------------- */ @@ -295,9 +295,8 @@ _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno) } /* - * Now write the page. We say isTemp = true even if it's not a temp - * index, because there's no need for smgr to schedule an fsync for this - * write; we'll do it ourselves before ending the build. + * Now write the page. There's no need for smgr to schedule an fsync for + * this write; we'll do it ourselves before ending the build. */ if (blkno == wstate->btws_pages_written) { diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c index d6dca97bce..e3c3bc8dbc 100644 --- a/src/backend/access/transam/twophase.c +++ b/src/backend/access/transam/twophase.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/transam/twophase.c,v 1.62 2010/07/06 19:18:55 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/twophase.c,v 1.63 2010/08/13 20:10:50 rhaas Exp $ * * NOTES * Each global transaction is associated with a global transaction @@ -865,8 +865,8 @@ StartPrepare(GlobalTransaction gxact) hdr.prepared_at = gxact->prepared_at; hdr.owner = gxact->owner; hdr.nsubxacts = xactGetCommittedChildren(&children); - hdr.ncommitrels = smgrGetPendingDeletes(true, &commitrels, NULL); - hdr.nabortrels = smgrGetPendingDeletes(false, &abortrels, NULL); + hdr.ncommitrels = smgrGetPendingDeletes(true, &commitrels); + hdr.nabortrels = smgrGetPendingDeletes(false, &abortrels); hdr.ninvalmsgs = xactGetCommittedInvalidationMessages(&invalmsgs, &hdr.initfileinval); StrNCpy(hdr.gid, gxact->gid, GIDSIZE); @@ -1320,13 +1320,13 @@ FinishPreparedTransaction(const char *gid, bool isCommit) } for (i = 0; i < ndelrels; i++) { - SMgrRelation srel = smgropen(delrels[i]); + SMgrRelation srel = smgropen(delrels[i], InvalidBackendId); ForkNumber fork; for (fork = 0; fork <= MAX_FORKNUM; fork++) { if (smgrexists(srel, fork)) - smgrdounlink(srel, fork, false, false); + smgrdounlink(srel, fork, false); } smgrclose(srel); } diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index 0491d2c8d9..6015eaab1d 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -10,7 +10,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.297 2010/08/13 15:42:21 rhaas Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.298 2010/08/13 20:10:50 rhaas Exp $ * *------------------------------------------------------------------------- */ @@ -912,7 +912,6 @@ RecordTransactionCommit(void) TransactionId latestXid = InvalidTransactionId; int nrels; RelFileNode *rels; - bool haveNonTemp; int nchildren; TransactionId *children; int nmsgs = 0; @@ -920,7 +919,7 @@ RecordTransactionCommit(void) bool RelcacheInitFileInval = false; /* Get data needed for commit record */ - nrels = smgrGetPendingDeletes(true, &rels, &haveNonTemp); + nrels = smgrGetPendingDeletes(true, &rels); nchildren = xactGetCommittedChildren(&children); if (XLogStandbyInfoActive()) nmsgs = xactGetCommittedInvalidationMessages(&invalMessages, @@ -1048,7 +1047,7 @@ RecordTransactionCommit(void) * asynchronous commit if all to-be-deleted tables are temporary though, * since they are lost anyway if we crash.) */ - if (XactSyncCommit || forceSyncCommit || haveNonTemp) + if (XactSyncCommit || forceSyncCommit || nrels > 0) { /* * Synchronous commit case: @@ -1334,7 +1333,7 @@ RecordTransactionAbort(bool isSubXact) xid); /* Fetch the data we need for the abort record */ - nrels = smgrGetPendingDeletes(false, &rels, NULL); + nrels = smgrGetPendingDeletes(false, &rels); nchildren = xactGetCommittedChildren(&children); /* XXX do we really need a critical section here? */ @@ -4474,7 +4473,7 @@ xact_redo_commit(xl_xact_commit *xlrec, TransactionId xid, XLogRecPtr lsn) /* Make sure files supposed to be dropped are dropped */ for (i = 0; i < xlrec->nrels; i++) { - SMgrRelation srel = smgropen(xlrec->xnodes[i]); + SMgrRelation srel = smgropen(xlrec->xnodes[i], InvalidBackendId); ForkNumber fork; for (fork = 0; fork <= MAX_FORKNUM; fork++) @@ -4482,7 +4481,7 @@ xact_redo_commit(xl_xact_commit *xlrec, TransactionId xid, XLogRecPtr lsn) if (smgrexists(srel, fork)) { XLogDropRelation(xlrec->xnodes[i], fork); - smgrdounlink(srel, fork, false, true); + smgrdounlink(srel, fork, true); } } smgrclose(srel); @@ -4579,7 +4578,7 @@ xact_redo_abort(xl_xact_abort *xlrec, TransactionId xid) /* Make sure files supposed to be dropped are dropped */ for (i = 0; i < xlrec->nrels; i++) { - SMgrRelation srel = smgropen(xlrec->xnodes[i]); + SMgrRelation srel = smgropen(xlrec->xnodes[i], InvalidBackendId); ForkNumber fork; for (fork = 0; fork <= MAX_FORKNUM; fork++) @@ -4587,7 +4586,7 @@ xact_redo_abort(xl_xact_abort *xlrec, TransactionId xid) if (smgrexists(srel, fork)) { XLogDropRelation(xlrec->xnodes[i], fork); - smgrdounlink(srel, fork, false, true); + smgrdounlink(srel, fork, true); } } smgrclose(srel); @@ -4661,7 +4660,7 @@ xact_desc_commit(StringInfo buf, xl_xact_commit *xlrec) appendStringInfo(buf, "; rels:"); for (i = 0; i < xlrec->nrels; i++) { - char *path = relpath(xlrec->xnodes[i], MAIN_FORKNUM); + char *path = relpathperm(xlrec->xnodes[i], MAIN_FORKNUM); appendStringInfo(buf, " %s", path); pfree(path); @@ -4716,7 +4715,7 @@ xact_desc_abort(StringInfo buf, xl_xact_abort *xlrec) appendStringInfo(buf, "; rels:"); for (i = 0; i < xlrec->nrels; i++) { - char *path = relpath(xlrec->xnodes[i], MAIN_FORKNUM); + char *path = relpathperm(xlrec->xnodes[i], MAIN_FORKNUM); appendStringInfo(buf, " %s", path); pfree(path); diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c index 3d7c7cf69e..31479eabff 100644 --- a/src/backend/access/transam/xlogutils.c +++ b/src/backend/access/transam/xlogutils.c @@ -11,7 +11,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.71 2010/07/08 16:08:30 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.72 2010/08/13 20:10:50 rhaas Exp $ * *------------------------------------------------------------------------- */ @@ -68,7 +68,7 @@ log_invalid_page(RelFileNode node, ForkNumber forkno, BlockNumber blkno, */ if (log_min_messages <= DEBUG1 || client_min_messages <= DEBUG1) { - char *path = relpath(node, forkno); + char *path = relpathperm(node, forkno); if (present) elog(DEBUG1, "page %u of relation %s is uninitialized", @@ -133,7 +133,7 @@ forget_invalid_pages(RelFileNode node, ForkNumber forkno, BlockNumber minblkno) { if (log_min_messages <= DEBUG2 || client_min_messages <= DEBUG2) { - char *path = relpath(hentry->key.node, forkno); + char *path = relpathperm(hentry->key.node, forkno); elog(DEBUG2, "page %u of relation %s has been dropped", hentry->key.blkno, path); @@ -166,7 +166,7 @@ forget_invalid_pages_db(Oid dbid) { if (log_min_messages <= DEBUG2 || client_min_messages <= DEBUG2) { - char *path = relpath(hentry->key.node, hentry->key.forkno); + char *path = relpathperm(hentry->key.node, hentry->key.forkno); elog(DEBUG2, "page %u of relation %s has been dropped", hentry->key.blkno, path); @@ -200,7 +200,7 @@ XLogCheckInvalidPages(void) */ while ((hentry = (xl_invalid_page *) hash_seq_search(&status)) != NULL) { - char *path = relpath(hentry->key.node, hentry->key.forkno); + char *path = relpathperm(hentry->key.node, hentry->key.forkno); if (hentry->present) elog(WARNING, "page %u of relation %s was uninitialized", @@ -276,7 +276,7 @@ XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum, Assert(blkno != P_NEW); /* Open the relation at smgr level */ - smgr = smgropen(rnode); + smgr = smgropen(rnode, InvalidBackendId); /* * Create the target file if it doesn't already exist. This lets us cope @@ -293,7 +293,7 @@ XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum, if (blkno < lastblock) { /* page exists in file */ - buffer = ReadBufferWithoutRelcache(rnode, false, forknum, blkno, + buffer = ReadBufferWithoutRelcache(rnode, forknum, blkno, mode, NULL); } else @@ -312,7 +312,7 @@ XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum, { if (buffer != InvalidBuffer) ReleaseBuffer(buffer); - buffer = ReadBufferWithoutRelcache(rnode, false, forknum, + buffer = ReadBufferWithoutRelcache(rnode, forknum, P_NEW, mode, NULL); lastblock++; } diff --git a/src/backend/catalog/catalog.c b/src/backend/catalog/catalog.c index 1739085ffd..016081a7bf 100644 --- a/src/backend/catalog/catalog.c +++ b/src/backend/catalog/catalog.c @@ -10,7 +10,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/catalog/catalog.c,v 1.90 2010/04/20 23:48:47 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/catalog/catalog.c,v 1.91 2010/08/13 20:10:50 rhaas Exp $ * *------------------------------------------------------------------------- */ @@ -78,12 +78,37 @@ forkname_to_number(char *forkName) } /* - * relpath - construct path to a relation's file + * forkname_chars + * We use this to figure out whether a filename could be a relation + * fork (as opposed to an oddly named stray file that somehow ended + * up in the database directory). If the passed string begins with + * a fork name (other than the main fork name), we return its length. + * If not, we return 0. + * + * Note that the present coding assumes that there are no fork names which + * are prefixes of other fork names. + */ +int +forkname_chars(const char *str) +{ + ForkNumber forkNum; + + for (forkNum = 1; forkNum <= MAX_FORKNUM; forkNum++) + { + int len = strlen(forkNames[forkNum]); + if (strncmp(forkNames[forkNum], str, len) == 0) + return len; + } + return 0; +} + +/* + * relpathbackend - construct path to a relation's file * * Result is a palloc'd string. */ char * -relpath(RelFileNode rnode, ForkNumber forknum) +relpathbackend(RelFileNode rnode, BackendId backend, ForkNumber forknum) { int pathlen; char *path; @@ -92,6 +117,7 @@ relpath(RelFileNode rnode, ForkNumber forknum) { /* Shared system relations live in {datadir}/global */ Assert(rnode.dbNode == 0); + Assert(backend == InvalidBackendId); pathlen = 7 + OIDCHARS + 1 + FORKNAMECHARS + 1; path = (char *) palloc(pathlen); if (forknum != MAIN_FORKNUM) @@ -103,29 +129,69 @@ relpath(RelFileNode rnode, ForkNumber forknum) else if (rnode.spcNode == DEFAULTTABLESPACE_OID) { /* The default tablespace is {datadir}/base */ - pathlen = 5 + OIDCHARS + 1 + OIDCHARS + 1 + FORKNAMECHARS + 1; - path = (char *) palloc(pathlen); - if (forknum != MAIN_FORKNUM) - snprintf(path, pathlen, "base/%u/%u_%s", - rnode.dbNode, rnode.relNode, forkNames[forknum]); + if (backend == InvalidBackendId) + { + pathlen = 5 + OIDCHARS + 1 + OIDCHARS + 1 + FORKNAMECHARS + 1; + path = (char *) palloc(pathlen); + if (forknum != MAIN_FORKNUM) + snprintf(path, pathlen, "base/%u/%u_%s", + rnode.dbNode, rnode.relNode, + forkNames[forknum]); + else + snprintf(path, pathlen, "base/%u/%u", + rnode.dbNode, rnode.relNode); + } else - snprintf(path, pathlen, "base/%u/%u", - rnode.dbNode, rnode.relNode); + { + /* OIDCHARS will suffice for an integer, too */ + pathlen = 5 + OIDCHARS + 2 + OIDCHARS + 1 + OIDCHARS + 1 + + FORKNAMECHARS + 1; + path = (char *) palloc(pathlen); + if (forknum != MAIN_FORKNUM) + snprintf(path, pathlen, "base/%u/t%d_%u_%s", + rnode.dbNode, backend, rnode.relNode, + forkNames[forknum]); + else + snprintf(path, pathlen, "base/%u/t%d_%u", + rnode.dbNode, backend, rnode.relNode); + } } else { /* All other tablespaces are accessed via symlinks */ - pathlen = 9 + 1 + OIDCHARS + 1 + strlen(TABLESPACE_VERSION_DIRECTORY) + - 1 + OIDCHARS + 1 + OIDCHARS + 1 + FORKNAMECHARS + 1; - path = (char *) palloc(pathlen); - if (forknum != MAIN_FORKNUM) - snprintf(path, pathlen, "pg_tblspc/%u/%s/%u/%u_%s", - rnode.spcNode, TABLESPACE_VERSION_DIRECTORY, - rnode.dbNode, rnode.relNode, forkNames[forknum]); + if (backend == InvalidBackendId) + { + pathlen = 9 + 1 + OIDCHARS + 1 + + strlen(TABLESPACE_VERSION_DIRECTORY) + 1 + OIDCHARS + 1 + + OIDCHARS + 1 + FORKNAMECHARS + 1; + path = (char *) palloc(pathlen); + if (forknum != MAIN_FORKNUM) + snprintf(path, pathlen, "pg_tblspc/%u/%s/%u/%u_%s", + rnode.spcNode, TABLESPACE_VERSION_DIRECTORY, + rnode.dbNode, rnode.relNode, + forkNames[forknum]); + else + snprintf(path, pathlen, "pg_tblspc/%u/%s/%u/%u", + rnode.spcNode, TABLESPACE_VERSION_DIRECTORY, + rnode.dbNode, rnode.relNode); + } else - snprintf(path, pathlen, "pg_tblspc/%u/%s/%u/%u", - rnode.spcNode, TABLESPACE_VERSION_DIRECTORY, - rnode.dbNode, rnode.relNode); + { + /* OIDCHARS will suffice for an integer, too */ + pathlen = 9 + 1 + OIDCHARS + 1 + + strlen(TABLESPACE_VERSION_DIRECTORY) + 1 + OIDCHARS + 2 + + OIDCHARS + 1 + OIDCHARS + 1 + FORKNAMECHARS + 1; + path = (char *) palloc(pathlen); + if (forknum != MAIN_FORKNUM) + snprintf(path, pathlen, "pg_tblspc/%u/%s/%u/t%d_%u_%s", + rnode.spcNode, TABLESPACE_VERSION_DIRECTORY, + rnode.dbNode, backend, rnode.relNode, + forkNames[forknum]); + else + snprintf(path, pathlen, "pg_tblspc/%u/%s/%u/t%d_%u", + rnode.spcNode, TABLESPACE_VERSION_DIRECTORY, + rnode.dbNode, backend, rnode.relNode); + } } return path; } @@ -458,16 +524,23 @@ GetNewOidWithIndex(Relation relation, Oid indexId, AttrNumber oidcolumn) * created by bootstrap have preassigned OIDs, so there's no need. */ Oid -GetNewRelFileNode(Oid reltablespace, Relation pg_class) +GetNewRelFileNode(Oid reltablespace, Relation pg_class, BackendId backend) { - RelFileNode rnode; + RelFileNodeBackend rnode; char *rpath; int fd; bool collides; /* This logic should match RelationInitPhysicalAddr */ - rnode.spcNode = reltablespace ? reltablespace : MyDatabaseTableSpace; - rnode.dbNode = (rnode.spcNode == GLOBALTABLESPACE_OID) ? InvalidOid : MyDatabaseId; + rnode.node.spcNode = reltablespace ? reltablespace : MyDatabaseTableSpace; + rnode.node.dbNode = (rnode.node.spcNode == GLOBALTABLESPACE_OID) ? InvalidOid : MyDatabaseId; + + /* + * The relpath will vary based on the backend ID, so we must initialize + * that properly here to make sure that any collisions based on filename + * are properly detected. + */ + rnode.backend = backend; do { @@ -475,9 +548,9 @@ GetNewRelFileNode(Oid reltablespace, Relation pg_class) /* Generate the OID */ if (pg_class) - rnode.relNode = GetNewOid(pg_class); + rnode.node.relNode = GetNewOid(pg_class); else - rnode.relNode = GetNewObjectId(); + rnode.node.relNode = GetNewObjectId(); /* Check for existing file of same name */ rpath = relpath(rnode, MAIN_FORKNUM); @@ -508,5 +581,5 @@ GetNewRelFileNode(Oid reltablespace, Relation pg_class) pfree(rpath); } while (collides); - return rnode.relNode; + return rnode.node.relNode; } diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index a0268f7177..7754b73d73 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/catalog/heap.c,v 1.374 2010/07/25 23:21:21 rhaas Exp $ + * $PostgreSQL: pgsql/src/backend/catalog/heap.c,v 1.375 2010/08/13 20:10:50 rhaas Exp $ * * * INTERFACE ROUTINES @@ -39,6 +39,7 @@ #include "catalog/heap.h" #include "catalog/index.h" #include "catalog/indexing.h" +#include "catalog/namespace.h" #include "catalog/pg_attrdef.h" #include "catalog/pg_constraint.h" #include "catalog/pg_inherits.h" @@ -994,7 +995,9 @@ heap_create_with_catalog(const char *relname, binary_upgrade_next_toast_relfilenode = InvalidOid; } else - relid = GetNewRelFileNode(reltablespace, pg_class_desc); + relid = GetNewRelFileNode(reltablespace, pg_class_desc, + isTempOrToastNamespace(relnamespace) ? + MyBackendId : InvalidBackendId); } /* diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index dea6889075..b36402c755 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.337 2010/02/26 02:00:36 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.338 2010/08/13 20:10:50 rhaas Exp $ * * * INTERFACE ROUTINES @@ -645,7 +645,12 @@ index_create(Oid heapRelationId, binary_upgrade_next_index_relfilenode = InvalidOid; } else - indexRelationId = GetNewRelFileNode(tableSpaceId, pg_class); + { + indexRelationId = + GetNewRelFileNode(tableSpaceId, pg_class, + heapRelation->rd_istemp ? + MyBackendId : InvalidBackendId); + } } /* diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c index 71ec8f8250..624c8337b0 100644 --- a/src/backend/catalog/namespace.c +++ b/src/backend/catalog/namespace.c @@ -13,7 +13,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/catalog/namespace.c,v 1.128 2010/08/13 16:27:11 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/catalog/namespace.c,v 1.129 2010/08/13 20:10:50 rhaas Exp $ * *------------------------------------------------------------------------- */ @@ -2588,7 +2588,7 @@ isOtherTempNamespace(Oid namespaceId) * GetTempNamespaceBackendId - if the given namespace is a temporary-table * namespace (either my own, or another backend's), return the BackendId * that owns it. Temporary-toast-table namespaces are included, too. - * If it isn't a temp namespace, return -1. + * If it isn't a temp namespace, return InvalidBackendId. */ int GetTempNamespaceBackendId(Oid namespaceId) @@ -2599,13 +2599,13 @@ GetTempNamespaceBackendId(Oid namespaceId) /* See if the namespace name starts with "pg_temp_" or "pg_toast_temp_" */ nspname = get_namespace_name(namespaceId); if (!nspname) - return -1; /* no such namespace? */ + return InvalidBackendId; /* no such namespace? */ if (strncmp(nspname, "pg_temp_", 8) == 0) result = atoi(nspname + 8); else if (strncmp(nspname, "pg_toast_temp_", 14) == 0) result = atoi(nspname + 14); else - result = -1; + result = InvalidBackendId; pfree(nspname); return result; } diff --git a/src/backend/catalog/storage.c b/src/backend/catalog/storage.c index 2165341e0e..5a1131945c 100644 --- a/src/backend/catalog/storage.c +++ b/src/backend/catalog/storage.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/catalog/storage.c,v 1.10 2010/02/09 21:43:30 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/catalog/storage.c,v 1.11 2010/08/13 20:10:50 rhaas Exp $ * * NOTES * Some of this code used to be in storage/smgr/smgr.c, and the @@ -52,7 +52,7 @@ typedef struct PendingRelDelete { RelFileNode relnode; /* relation that may need to be deleted */ - bool isTemp; /* is it a temporary relation? */ + BackendId backend; /* InvalidBackendId if not a temp rel */ bool atCommit; /* T=delete at commit; F=delete at abort */ int nestLevel; /* xact nesting level of request */ struct PendingRelDelete *next; /* linked-list link */ @@ -102,8 +102,9 @@ RelationCreateStorage(RelFileNode rnode, bool istemp) XLogRecData rdata; xl_smgr_create xlrec; SMgrRelation srel; + BackendId backend = istemp ? MyBackendId : InvalidBackendId; - srel = smgropen(rnode); + srel = smgropen(rnode, backend); smgrcreate(srel, MAIN_FORKNUM, false); if (!istemp) @@ -125,7 +126,7 @@ RelationCreateStorage(RelFileNode rnode, bool istemp) pending = (PendingRelDelete *) MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete)); pending->relnode = rnode; - pending->isTemp = istemp; + pending->backend = backend; pending->atCommit = false; /* delete if abort */ pending->nestLevel = GetCurrentTransactionNestLevel(); pending->next = pendingDeletes; @@ -145,7 +146,7 @@ RelationDropStorage(Relation rel) pending = (PendingRelDelete *) MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete)); pending->relnode = rel->rd_node; - pending->isTemp = rel->rd_istemp; + pending->backend = rel->rd_backend; pending->atCommit = true; /* delete if commit */ pending->nestLevel = GetCurrentTransactionNestLevel(); pending->next = pendingDeletes; @@ -283,7 +284,7 @@ RelationTruncate(Relation rel, BlockNumber nblocks) } /* Do the real work */ - smgrtruncate(rel->rd_smgr, MAIN_FORKNUM, nblocks, rel->rd_istemp); + smgrtruncate(rel->rd_smgr, MAIN_FORKNUM, nblocks); } /* @@ -291,6 +292,11 @@ RelationTruncate(Relation rel, BlockNumber nblocks) * * This also runs when aborting a subxact; we want to clean up a failed * subxact immediately. + * + * Note: It's possible that we're being asked to remove a relation that has + * no physical storage in any fork. In particular, it's possible that we're + * cleaning up an old temporary relation for which RemovePgTempFiles has + * already recovered the physical storage. */ void smgrDoPendingDeletes(bool isCommit) @@ -322,14 +328,11 @@ smgrDoPendingDeletes(bool isCommit) SMgrRelation srel; int i; - srel = smgropen(pending->relnode); + srel = smgropen(pending->relnode, pending->backend); for (i = 0; i <= MAX_FORKNUM; i++) { if (smgrexists(srel, i)) - smgrdounlink(srel, - i, - pending->isTemp, - false); + smgrdounlink(srel, i, false); } smgrclose(srel); } @@ -341,20 +344,24 @@ smgrDoPendingDeletes(bool isCommit) } /* - * smgrGetPendingDeletes() -- Get a list of relations to be deleted. + * smgrGetPendingDeletes() -- Get a list of non-temp relations to be deleted. * * The return value is the number of relations scheduled for termination. * *ptr is set to point to a freshly-palloc'd array of RelFileNodes. * If there are no relations to be deleted, *ptr is set to NULL. * - * If haveNonTemp isn't NULL, the bool it points to gets set to true if - * there is any non-temp table pending to be deleted; false if not. + * Only non-temporary relations are included in the returned list. This is OK + * because the list is used only in contexts where temporary relations don't + * matter: we're either writing to the two-phase state file (and transactions + * that have touched temp tables can't be prepared) or we're writing to xlog + * (and all temporary files will be zapped if we restart anyway, so no need + * for redo to do it also). * * Note that the list does not include anything scheduled for termination * by upper-level transactions. */ int -smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr, bool *haveNonTemp) +smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr) { int nestLevel = GetCurrentTransactionNestLevel(); int nrels; @@ -362,11 +369,10 @@ smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr, bool *haveNonTemp) PendingRelDelete *pending; nrels = 0; - if (haveNonTemp) - *haveNonTemp = false; for (pending = pendingDeletes; pending != NULL; pending = pending->next) { - if (pending->nestLevel >= nestLevel && pending->atCommit == forCommit) + if (pending->nestLevel >= nestLevel && pending->atCommit == forCommit + && pending->backend == InvalidBackendId) nrels++; } if (nrels == 0) @@ -378,13 +384,12 @@ smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr, bool *haveNonTemp) *ptr = rptr; for (pending = pendingDeletes; pending != NULL; pending = pending->next) { - if (pending->nestLevel >= nestLevel && pending->atCommit == forCommit) + if (pending->nestLevel >= nestLevel && pending->atCommit == forCommit + && pending->backend == InvalidBackendId) { *rptr = pending->relnode; rptr++; } - if (haveNonTemp && !pending->isTemp) - *haveNonTemp = true; } return nrels; } @@ -456,7 +461,7 @@ smgr_redo(XLogRecPtr lsn, XLogRecord *record) xl_smgr_create *xlrec = (xl_smgr_create *) XLogRecGetData(record); SMgrRelation reln; - reln = smgropen(xlrec->rnode); + reln = smgropen(xlrec->rnode, InvalidBackendId); smgrcreate(reln, MAIN_FORKNUM, true); } else if (info == XLOG_SMGR_TRUNCATE) @@ -465,7 +470,7 @@ smgr_redo(XLogRecPtr lsn, XLogRecord *record) SMgrRelation reln; Relation rel; - reln = smgropen(xlrec->rnode); + reln = smgropen(xlrec->rnode, InvalidBackendId); /* * Forcibly create relation if it doesn't exist (which suggests that @@ -475,7 +480,7 @@ smgr_redo(XLogRecPtr lsn, XLogRecord *record) */ smgrcreate(reln, MAIN_FORKNUM, true); - smgrtruncate(reln, MAIN_FORKNUM, xlrec->blkno, false); + smgrtruncate(reln, MAIN_FORKNUM, xlrec->blkno); /* Also tell xlogutils.c about it */ XLogTruncateRelation(xlrec->rnode, MAIN_FORKNUM, xlrec->blkno); @@ -502,7 +507,7 @@ smgr_desc(StringInfo buf, uint8 xl_info, char *rec) if (info == XLOG_SMGR_CREATE) { xl_smgr_create *xlrec = (xl_smgr_create *) rec; - char *path = relpath(xlrec->rnode, MAIN_FORKNUM); + char *path = relpathperm(xlrec->rnode, MAIN_FORKNUM); appendStringInfo(buf, "file create: %s", path); pfree(path); @@ -510,7 +515,7 @@ smgr_desc(StringInfo buf, uint8 xl_info, char *rec) else if (info == XLOG_SMGR_TRUNCATE) { xl_smgr_truncate *xlrec = (xl_smgr_truncate *) rec; - char *path = relpath(xlrec->rnode, MAIN_FORKNUM); + char *path = relpathperm(xlrec->rnode, MAIN_FORKNUM); appendStringInfo(buf, "file truncate: %s to %u blocks", path, xlrec->blkno); diff --git a/src/backend/catalog/toasting.c b/src/backend/catalog/toasting.c index 6f658321b4..14757eed52 100644 --- a/src/backend/catalog/toasting.c +++ b/src/backend/catalog/toasting.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/catalog/toasting.c,v 1.33 2010/07/25 23:21:21 rhaas Exp $ + * $PostgreSQL: pgsql/src/backend/catalog/toasting.c,v 1.34 2010/08/13 20:10:50 rhaas Exp $ * *------------------------------------------------------------------------- */ @@ -195,7 +195,7 @@ create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid, Datum reloptio * Toast tables for regular relations go in pg_toast; those for temp * relations go into the per-backend temp-toast-table namespace. */ - if (rel->rd_islocaltemp) + if (rel->rd_backend == MyBackendId) namespaceid = GetTempToastNamespace(); else namespaceid = PG_TOAST_NAMESPACE; diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index 681a7aaa92..19e1b7251e 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.328 2010/07/22 00:47:52 rhaas Exp $ + * $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.329 2010/08/13 20:10:50 rhaas Exp $ * *------------------------------------------------------------------------- */ @@ -1019,7 +1019,7 @@ DoCopy(const CopyStmt *stmt, const char *queryString) ExecCheckRTPerms(list_make1(rte), true); /* check read-only transaction */ - if (XactReadOnly && is_from && !cstate->rel->rd_islocaltemp) + if (XactReadOnly && is_from && cstate->rel->rd_backend != MyBackendId) PreventCommandIfReadOnly("COPY FROM"); /* Don't allow COPY w/ OIDs to or from a table without them */ diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c index 0f06bba803..6e0930f8d0 100644 --- a/src/backend/commands/sequence.c +++ b/src/backend/commands/sequence.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/sequence.c,v 1.169 2010/07/25 23:21:21 rhaas Exp $ + * $PostgreSQL: pgsql/src/backend/commands/sequence.c,v 1.170 2010/08/13 20:10:51 rhaas Exp $ * *------------------------------------------------------------------------- */ @@ -472,7 +472,7 @@ nextval_internal(Oid relid) RelationGetRelationName(seqrel)))); /* read-only transactions may only modify temp sequences */ - if (!seqrel->rd_islocaltemp) + if (seqrel->rd_backend != MyBackendId) PreventCommandIfReadOnly("nextval()"); if (elm->last != elm->cached) /* some numbers were cached */ @@ -749,7 +749,7 @@ do_setval(Oid relid, int64 next, bool iscalled) RelationGetRelationName(seqrel)))); /* read-only transactions may only modify temp sequences */ - if (!seqrel->rd_islocaltemp) + if (seqrel->rd_backend != MyBackendId) PreventCommandIfReadOnly("setval()"); /* lock page' buffer and read tuple */ diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 221e6417eb..703fd7e71b 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.339 2010/08/05 14:45:01 rhaas Exp $ + * $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.340 2010/08/13 20:10:51 rhaas Exp $ * *------------------------------------------------------------------------- */ @@ -7165,13 +7165,13 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace, LOCKMODE lockmode) * Relfilenodes are not unique across tablespaces, so we need to allocate * a new one in the new tablespace. */ - newrelfilenode = GetNewRelFileNode(newTableSpace, NULL); + newrelfilenode = GetNewRelFileNode(newTableSpace, NULL, rel->rd_backend); /* Open old and new relation */ newrnode = rel->rd_node; newrnode.relNode = newrelfilenode; newrnode.spcNode = newTableSpace; - dstrel = smgropen(newrnode); + dstrel = smgropen(newrnode, rel->rd_backend); RelationOpenSmgr(rel); @@ -7262,7 +7262,7 @@ copy_relation_data(SMgrRelation src, SMgrRelation dst, /* XLOG stuff */ if (use_wal) - log_newpage(&dst->smgr_rnode, forkNum, blkno, page); + log_newpage(&dst->smgr_rnode.node, forkNum, blkno, page); /* * Now write the page. We say isTemp = true even if it's not a temp diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c index 72737ab226..67f0d5c636 100644 --- a/src/backend/postmaster/bgwriter.c +++ b/src/backend/postmaster/bgwriter.c @@ -38,7 +38,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.68 2010/04/28 16:54:15 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.69 2010/08/13 20:10:52 rhaas Exp $ * *------------------------------------------------------------------------- */ @@ -113,7 +113,7 @@ */ typedef struct { - RelFileNode rnode; + RelFileNodeBackend rnode; ForkNumber forknum; BlockNumber segno; /* see md.c for special values */ /* might add a real request-type field later; not needed yet */ @@ -1071,7 +1071,8 @@ RequestCheckpoint(int flags) * than we have to here. */ bool -ForwardFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno) +ForwardFsyncRequest(RelFileNodeBackend rnode, ForkNumber forknum, + BlockNumber segno) { BgWriterRequest *request; diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 3b6938135a..4c09df1ba7 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.256 2010/02/26 02:00:59 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.257 2010/08/13 20:10:52 rhaas Exp $ * *------------------------------------------------------------------------- */ @@ -95,7 +95,8 @@ static void WaitIO(volatile BufferDesc *buf); static bool StartBufferIO(volatile BufferDesc *buf, bool forInput); static void TerminateBufferIO(volatile BufferDesc *buf, bool clear_dirty, int set_flag_bits); -static void buffer_write_error_callback(void *arg); +static void shared_buffer_write_error_callback(void *arg); +static void local_buffer_write_error_callback(void *arg); static volatile BufferDesc *BufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, BufferAccessStrategy strategy, @@ -141,7 +142,8 @@ PrefetchBuffer(Relation reln, ForkNumber forkNum, BlockNumber blockNum) int buf_id; /* create a tag so we can lookup the buffer */ - INIT_BUFFERTAG(newTag, reln->rd_smgr->smgr_rnode, forkNum, blockNum); + INIT_BUFFERTAG(newTag, reln->rd_smgr->smgr_rnode.node, + forkNum, blockNum); /* determine its hash code and partition lock ID */ newHash = BufTableHashCode(&newTag); @@ -251,18 +253,21 @@ ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, * ReadBufferWithoutRelcache -- like ReadBufferExtended, but doesn't require * a relcache entry for the relation. * - * NB: caller is assumed to know what it's doing if isTemp is true. + * NB: At present, this function may not be used on temporary relations, which + * is OK, because we only use it during XLOG replay. If in the future we + * want to use it on temporary relations, we could pass the backend ID as an + * additional parameter. */ Buffer -ReadBufferWithoutRelcache(RelFileNode rnode, bool isTemp, - ForkNumber forkNum, BlockNumber blockNum, - ReadBufferMode mode, BufferAccessStrategy strategy) +ReadBufferWithoutRelcache(RelFileNode rnode, ForkNumber forkNum, + BlockNumber blockNum, ReadBufferMode mode, + BufferAccessStrategy strategy) { bool hit; - SMgrRelation smgr = smgropen(rnode); + SMgrRelation smgr = smgropen(rnode, InvalidBackendId); - return ReadBuffer_common(smgr, isTemp, forkNum, blockNum, mode, strategy, + return ReadBuffer_common(smgr, false, forkNum, blockNum, mode, strategy, &hit); } @@ -414,7 +419,7 @@ ReadBuffer_common(SMgrRelation smgr, bool isLocalBuf, ForkNumber forkNum, { /* new buffers are zero-filled */ MemSet((char *) bufBlock, 0, BLCKSZ); - smgrextend(smgr, forkNum, blockNum, (char *) bufBlock, isLocalBuf); + smgrextend(smgr, forkNum, blockNum, (char *) bufBlock, false); } else { @@ -465,10 +470,10 @@ ReadBuffer_common(SMgrRelation smgr, bool isLocalBuf, ForkNumber forkNum, VacuumCostBalance += VacuumCostPageMiss; TRACE_POSTGRESQL_BUFFER_READ_DONE(forkNum, blockNum, - smgr->smgr_rnode.spcNode, - smgr->smgr_rnode.dbNode, - smgr->smgr_rnode.relNode, - isLocalBuf, + smgr->smgr_rnode.node.spcNode, + smgr->smgr_rnode.node.dbNode, + smgr->smgr_rnode.node.relNode, + smgr->smgr_rnode.backend, isExtend, found); @@ -512,7 +517,7 @@ BufferAlloc(SMgrRelation smgr, ForkNumber forkNum, bool valid; /* create a tag so we can lookup the buffer */ - INIT_BUFFERTAG(newTag, smgr->smgr_rnode, forkNum, blockNum); + INIT_BUFFERTAG(newTag, smgr->smgr_rnode.node, forkNum, blockNum); /* determine its hash code and partition lock ID */ newHash = BufTableHashCode(&newTag); @@ -1693,21 +1698,24 @@ PrintBufferLeakWarning(Buffer buffer) volatile BufferDesc *buf; int32 loccount; char *path; + BackendId backend; Assert(BufferIsValid(buffer)); if (BufferIsLocal(buffer)) { buf = &LocalBufferDescriptors[-buffer - 1]; loccount = LocalRefCount[-buffer - 1]; + backend = MyBackendId; } else { buf = &BufferDescriptors[buffer - 1]; loccount = PrivateRefCount[buffer - 1]; + backend = InvalidBackendId; } /* theoretically we should lock the bufhdr here */ - path = relpath(buf->tag.rnode, buf->tag.forkNum); + path = relpathbackend(buf->tag.rnode, backend, buf->tag.forkNum); elog(WARNING, "buffer refcount leak: [%03d] " "(rel=%s, blockNum=%u, flags=0x%x, refcount=%u %d)", @@ -1831,14 +1839,14 @@ FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln) return; /* Setup error traceback support for ereport() */ - errcontext.callback = buffer_write_error_callback; + errcontext.callback = shared_buffer_write_error_callback; errcontext.arg = (void *) buf; errcontext.previous = error_context_stack; error_context_stack = &errcontext; /* Find smgr relation for buffer */ if (reln == NULL) - reln = smgropen(buf->tag.rnode); + reln = smgropen(buf->tag.rnode, InvalidBackendId); TRACE_POSTGRESQL_BUFFER_FLUSH_START(buf->tag.forkNum, buf->tag.blockNum, @@ -1929,14 +1937,15 @@ RelationGetNumberOfBlocks(Relation relation) * -------------------------------------------------------------------- */ void -DropRelFileNodeBuffers(RelFileNode rnode, ForkNumber forkNum, bool istemp, +DropRelFileNodeBuffers(RelFileNodeBackend rnode, ForkNumber forkNum, BlockNumber firstDelBlock) { int i; - if (istemp) + if (rnode.backend != InvalidBackendId) { - DropRelFileNodeLocalBuffers(rnode, forkNum, firstDelBlock); + if (rnode.backend == MyBackendId) + DropRelFileNodeLocalBuffers(rnode.node, forkNum, firstDelBlock); return; } @@ -1945,7 +1954,7 @@ DropRelFileNodeBuffers(RelFileNode rnode, ForkNumber forkNum, bool istemp, volatile BufferDesc *bufHdr = &BufferDescriptors[i]; LockBufHdr(bufHdr); - if (RelFileNodeEquals(bufHdr->tag.rnode, rnode) && + if (RelFileNodeEquals(bufHdr->tag.rnode, rnode.node) && bufHdr->tag.forkNum == forkNum && bufHdr->tag.blockNum >= firstDelBlock) InvalidateBuffer(bufHdr); /* releases spinlock */ @@ -2008,7 +2017,7 @@ PrintBufferDescs(void) "[%02d] (freeNext=%d, rel=%s, " "blockNum=%u, flags=0x%x, refcount=%u %d)", i, buf->freeNext, - relpath(buf->tag.rnode, buf->tag.forkNum), + relpathbackend(buf->tag.rnode, InvalidBackendId, buf->tag.forkNum), buf->tag.blockNum, buf->flags, buf->refcount, PrivateRefCount[i]); } @@ -2078,7 +2087,7 @@ FlushRelationBuffers(Relation rel) ErrorContextCallback errcontext; /* Setup error traceback support for ereport() */ - errcontext.callback = buffer_write_error_callback; + errcontext.callback = local_buffer_write_error_callback; errcontext.arg = (void *) bufHdr; errcontext.previous = error_context_stack; error_context_stack = &errcontext; @@ -2087,7 +2096,7 @@ FlushRelationBuffers(Relation rel) bufHdr->tag.forkNum, bufHdr->tag.blockNum, (char *) LocalBufHdrGetBlock(bufHdr), - true); + false); bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED); @@ -2699,8 +2708,9 @@ AbortBufferIO(void) if (sv_flags & BM_IO_ERROR) { /* Buffer is pinned, so we can read tag without spinlock */ - char *path = relpath(buf->tag.rnode, buf->tag.forkNum); + char *path; + path = relpathperm(buf->tag.rnode, buf->tag.forkNum); ereport(WARNING, (errcode(ERRCODE_IO_ERROR), errmsg("could not write block %u of %s", @@ -2714,17 +2724,36 @@ AbortBufferIO(void) } /* - * Error context callback for errors occurring during buffer writes. + * Error context callback for errors occurring during shared buffer writes. */ static void -buffer_write_error_callback(void *arg) +shared_buffer_write_error_callback(void *arg) { volatile BufferDesc *bufHdr = (volatile BufferDesc *) arg; /* Buffer is pinned, so we can read the tag without locking the spinlock */ if (bufHdr != NULL) { - char *path = relpath(bufHdr->tag.rnode, bufHdr->tag.forkNum); + char *path = relpathperm(bufHdr->tag.rnode, bufHdr->tag.forkNum); + + errcontext("writing block %u of relation %s", + bufHdr->tag.blockNum, path); + pfree(path); + } +} + +/* + * Error context callback for errors occurring during local buffer writes. + */ +static void +local_buffer_write_error_callback(void *arg) +{ + volatile BufferDesc *bufHdr = (volatile BufferDesc *) arg; + + if (bufHdr != NULL) + { + char *path = relpathbackend(bufHdr->tag.rnode, MyBackendId, + bufHdr->tag.forkNum); errcontext("writing block %u of relation %s", bufHdr->tag.blockNum, path); diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c index 2b783f87f4..dd067737c9 100644 --- a/src/backend/storage/buffer/localbuf.c +++ b/src/backend/storage/buffer/localbuf.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/buffer/localbuf.c,v 1.89 2010/01/02 16:57:51 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/storage/buffer/localbuf.c,v 1.90 2010/08/13 20:10:52 rhaas Exp $ * *------------------------------------------------------------------------- */ @@ -68,7 +68,7 @@ LocalPrefetchBuffer(SMgrRelation smgr, ForkNumber forkNum, BufferTag newTag; /* identity of requested block */ LocalBufferLookupEnt *hresult; - INIT_BUFFERTAG(newTag, smgr->smgr_rnode, forkNum, blockNum); + INIT_BUFFERTAG(newTag, smgr->smgr_rnode.node, forkNum, blockNum); /* Initialize local buffers if first request in this session */ if (LocalBufHash == NULL) @@ -110,7 +110,7 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, int trycounter; bool found; - INIT_BUFFERTAG(newTag, smgr->smgr_rnode, forkNum, blockNum); + INIT_BUFFERTAG(newTag, smgr->smgr_rnode.node, forkNum, blockNum); /* Initialize local buffers if first request in this session */ if (LocalBufHash == NULL) @@ -127,7 +127,7 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, Assert(BUFFERTAGS_EQUAL(bufHdr->tag, newTag)); #ifdef LBDEBUG fprintf(stderr, "LB ALLOC (%u,%d,%d) %d\n", - smgr->smgr_rnode.relNode, forkNum, blockNum, -b - 1); + smgr->smgr_rnode.node.relNode, forkNum, blockNum, -b - 1); #endif /* this part is equivalent to PinBuffer for a shared buffer */ if (LocalRefCount[b] == 0) @@ -150,7 +150,8 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, #ifdef LBDEBUG fprintf(stderr, "LB ALLOC (%u,%d,%d) %d\n", - smgr->smgr_rnode.relNode, forkNum, blockNum, -nextFreeLocalBuf - 1); + smgr->smgr_rnode.node.relNode, forkNum, blockNum, + -nextFreeLocalBuf - 1); #endif /* @@ -198,14 +199,14 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, SMgrRelation oreln; /* Find smgr relation for buffer */ - oreln = smgropen(bufHdr->tag.rnode); + oreln = smgropen(bufHdr->tag.rnode, MyBackendId); /* And write... */ smgrwrite(oreln, bufHdr->tag.forkNum, bufHdr->tag.blockNum, (char *) LocalBufHdrGetBlock(bufHdr), - true); + false); /* Mark not-dirty now in case we error out below */ bufHdr->flags &= ~BM_DIRTY; @@ -309,7 +310,8 @@ DropRelFileNodeLocalBuffers(RelFileNode rnode, ForkNumber forkNum, if (LocalRefCount[i] != 0) elog(ERROR, "block %u of %s is still referenced (local %u)", bufHdr->tag.blockNum, - relpath(bufHdr->tag.rnode, bufHdr->tag.forkNum), + relpathbackend(bufHdr->tag.rnode, MyBackendId, + bufHdr->tag.forkNum), LocalRefCount[i]); /* Remove entry from hashtable */ hresult = (LocalBufferLookupEnt *) diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c index 91bf4af8e4..18d6de1dec 100644 --- a/src/backend/storage/file/fd.c +++ b/src/backend/storage/file/fd.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/file/fd.c,v 1.157 2010/07/06 22:55:26 rhaas Exp $ + * $PostgreSQL: pgsql/src/backend/storage/file/fd.c,v 1.158 2010/08/13 20:10:52 rhaas Exp $ * * NOTES: * @@ -249,6 +249,9 @@ static File OpenTemporaryFileInTablespace(Oid tblspcOid, bool rejectError); static void AtProcExit_Files(int code, Datum arg); static void CleanupTempFiles(bool isProcExit); static void RemovePgTempFilesInDir(const char *tmpdirname); +static void RemovePgTempRelationFiles(const char *tsdirname); +static void RemovePgTempRelationFilesInDbspace(const char *dbspacedirname); +static bool looks_like_temp_rel_name(const char *name); /* @@ -1824,10 +1827,12 @@ CleanupTempFiles(bool isProcExit) /* - * Remove temporary files left over from a prior postmaster session + * Remove temporary and temporary relation files left over from a prior + * postmaster session * * This should be called during postmaster startup. It will forcibly - * remove any leftover files created by OpenTemporaryFile. + * remove any leftover files created by OpenTemporaryFile and any leftover + * temporary relation files created by mdcreate. * * NOTE: we could, but don't, call this during a post-backend-crash restart * cycle. The argument for not doing it is that someone might want to examine @@ -1847,6 +1852,7 @@ RemovePgTempFiles(void) */ snprintf(temp_path, sizeof(temp_path), "base/%s", PG_TEMP_FILES_DIR); RemovePgTempFilesInDir(temp_path); + RemovePgTempRelationFiles("base"); /* * Cycle through temp directories for all non-default tablespaces. @@ -1862,6 +1868,10 @@ RemovePgTempFiles(void) snprintf(temp_path, sizeof(temp_path), "pg_tblspc/%s/%s/%s", spc_de->d_name, TABLESPACE_VERSION_DIRECTORY, PG_TEMP_FILES_DIR); RemovePgTempFilesInDir(temp_path); + + snprintf(temp_path, sizeof(temp_path), "pg_tblspc/%s/%s", + spc_de->d_name, TABLESPACE_VERSION_DIRECTORY); + RemovePgTempRelationFiles(temp_path); } FreeDir(spc_dir); @@ -1915,3 +1925,123 @@ RemovePgTempFilesInDir(const char *tmpdirname) FreeDir(temp_dir); } + +/* Process one tablespace directory, look for per-DB subdirectories */ +static void +RemovePgTempRelationFiles(const char *tsdirname) +{ + DIR *ts_dir; + struct dirent *de; + char dbspace_path[MAXPGPATH]; + + ts_dir = AllocateDir(tsdirname); + if (ts_dir == NULL) + { + /* anything except ENOENT is fishy */ + if (errno != ENOENT) + elog(LOG, + "could not open tablespace directory \"%s\": %m", + tsdirname); + return; + } + + while ((de = ReadDir(ts_dir, tsdirname)) != NULL) + { + int i = 0; + + /* + * We're only interested in the per-database directories, which have + * numeric names. Note that this code will also (properly) ignore "." + * and "..". + */ + while (isdigit((unsigned char) de->d_name[i])) + ++i; + if (de->d_name[i] != '\0' || i == 0) + continue; + + snprintf(dbspace_path, sizeof(dbspace_path), "%s/%s", + tsdirname, de->d_name); + RemovePgTempRelationFilesInDbspace(dbspace_path); + } + + FreeDir(ts_dir); +} + +/* Process one per-dbspace directory for RemovePgTempRelationFiles */ +static void +RemovePgTempRelationFilesInDbspace(const char *dbspacedirname) +{ + DIR *dbspace_dir; + struct dirent *de; + char rm_path[MAXPGPATH]; + + dbspace_dir = AllocateDir(dbspacedirname); + if (dbspace_dir == NULL) + { + /* we just saw this directory, so it really ought to be there */ + elog(LOG, + "could not open dbspace directory \"%s\": %m", + dbspacedirname); + return; + } + + while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL) + { + if (!looks_like_temp_rel_name(de->d_name)) + continue; + + snprintf(rm_path, sizeof(rm_path), "%s/%s", + dbspacedirname, de->d_name); + + unlink(rm_path); /* note we ignore any error */ + } + + FreeDir(dbspace_dir); +} + +/* t_, or t__ */ +static bool +looks_like_temp_rel_name(const char *name) +{ + int pos; + int savepos; + + /* Must start with "t". */ + if (name[0] != 't') + return false; + + /* Followed by a non-empty string of digits and then an underscore. */ + for (pos = 1; isdigit((unsigned char) name[pos]); ++pos) + ; + if (pos == 1 || name[pos] != '_') + return false; + + /* Followed by another nonempty string of digits. */ + for (savepos = ++pos; isdigit((unsigned char) name[pos]); ++pos) + ; + if (savepos == pos) + return false; + + /* We might have _forkname or .segment or both. */ + if (name[pos] == '_') + { + int forkchar = forkname_chars(&name[pos+1]); + if (forkchar <= 0) + return false; + pos += forkchar + 1; + } + if (name[pos] == '.') + { + int segchar; + for (segchar = 1; isdigit((unsigned char) name[pos+segchar]); ++segchar) + ; + if (segchar <= 1) + return false; + pos += segchar; + } + + /* Now we should be at the end. */ + if (name[pos] != '\0') + return false; + return true; +} diff --git a/src/backend/storage/freespace/freespace.c b/src/backend/storage/freespace/freespace.c index a872f1e78f..040dd3344c 100644 --- a/src/backend/storage/freespace/freespace.c +++ b/src/backend/storage/freespace/freespace.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/freespace/freespace.c,v 1.77 2010/02/26 02:00:59 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/storage/freespace/freespace.c,v 1.78 2010/08/13 20:10:52 rhaas Exp $ * * * NOTES: @@ -303,7 +303,7 @@ FreeSpaceMapTruncateRel(Relation rel, BlockNumber nblocks) } /* Truncate the unused FSM pages, and send smgr inval message */ - smgrtruncate(rel->rd_smgr, FSM_FORKNUM, new_nfsmblocks, rel->rd_istemp); + smgrtruncate(rel->rd_smgr, FSM_FORKNUM, new_nfsmblocks); /* * We might as well update the local smgr_fsm_nblocks setting. diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c index eb5c73d6f8..f1ff2fe15e 100644 --- a/src/backend/storage/smgr/md.c +++ b/src/backend/storage/smgr/md.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.151 2010/02/26 02:01:01 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.152 2010/08/13 20:10:52 rhaas Exp $ * *------------------------------------------------------------------------- */ @@ -119,7 +119,7 @@ static MemoryContext MdCxt; /* context for all md.c allocations */ */ typedef struct { - RelFileNode rnode; /* the targeted relation */ + RelFileNodeBackend rnode; /* the targeted relation */ ForkNumber forknum; BlockNumber segno; /* which segment */ } PendingOperationTag; @@ -135,7 +135,7 @@ typedef struct typedef struct { - RelFileNode rnode; /* the dead relation to delete */ + RelFileNodeBackend rnode; /* the dead relation to delete */ CycleCtr cycle_ctr; /* mdckpt_cycle_ctr when request was made */ } PendingUnlinkEntry; @@ -158,14 +158,14 @@ static MdfdVec *mdopen(SMgrRelation reln, ForkNumber forknum, ExtensionBehavior behavior); static void register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg); -static void register_unlink(RelFileNode rnode); +static void register_unlink(RelFileNodeBackend rnode); static MdfdVec *_fdvec_alloc(void); static char *_mdfd_segpath(SMgrRelation reln, ForkNumber forknum, BlockNumber segno); static MdfdVec *_mdfd_openseg(SMgrRelation reln, ForkNumber forkno, BlockNumber segno, int oflags); static MdfdVec *_mdfd_getseg(SMgrRelation reln, ForkNumber forkno, - BlockNumber blkno, bool isTemp, ExtensionBehavior behavior); + BlockNumber blkno, bool skipFsync, ExtensionBehavior behavior); static BlockNumber _mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg); @@ -321,7 +321,7 @@ mdcreate(SMgrRelation reln, ForkNumber forkNum, bool isRedo) * we are usually not in a transaction anymore when this is called. */ void -mdunlink(RelFileNode rnode, ForkNumber forkNum, bool isRedo) +mdunlink(RelFileNodeBackend rnode, ForkNumber forkNum, bool isRedo) { char *path; int ret; @@ -417,7 +417,7 @@ mdunlink(RelFileNode rnode, ForkNumber forkNum, bool isRedo) */ void mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, - char *buffer, bool isTemp) + char *buffer, bool skipFsync) { off_t seekpos; int nbytes; @@ -440,7 +440,7 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, relpath(reln->smgr_rnode, forknum), InvalidBlockNumber))); - v = _mdfd_getseg(reln, forknum, blocknum, isTemp, EXTENSION_CREATE); + v = _mdfd_getseg(reln, forknum, blocknum, skipFsync, EXTENSION_CREATE); seekpos = (off_t) BLCKSZ *(blocknum % ((BlockNumber) RELSEG_SIZE)); @@ -478,7 +478,7 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, errhint("Check free disk space."))); } - if (!isTemp) + if (!skipFsync && !SmgrIsTemp(reln)) register_dirty_segment(reln, forknum, v); Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE)); @@ -605,9 +605,10 @@ mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, MdfdVec *v; TRACE_POSTGRESQL_SMGR_MD_READ_START(forknum, blocknum, - reln->smgr_rnode.spcNode, - reln->smgr_rnode.dbNode, - reln->smgr_rnode.relNode); + reln->smgr_rnode.node.spcNode, + reln->smgr_rnode.node.dbNode, + reln->smgr_rnode.node.relNode, + reln->smgr_rnode.backend); v = _mdfd_getseg(reln, forknum, blocknum, false, EXTENSION_FAIL); @@ -624,9 +625,10 @@ mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ); TRACE_POSTGRESQL_SMGR_MD_READ_DONE(forknum, blocknum, - reln->smgr_rnode.spcNode, - reln->smgr_rnode.dbNode, - reln->smgr_rnode.relNode, + reln->smgr_rnode.node.spcNode, + reln->smgr_rnode.node.dbNode, + reln->smgr_rnode.node.relNode, + reln->smgr_rnode.backend, nbytes, BLCKSZ); @@ -666,7 +668,7 @@ mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, */ void mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, - char *buffer, bool isTemp) + char *buffer, bool skipFsync) { off_t seekpos; int nbytes; @@ -678,11 +680,12 @@ mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, #endif TRACE_POSTGRESQL_SMGR_MD_WRITE_START(forknum, blocknum, - reln->smgr_rnode.spcNode, - reln->smgr_rnode.dbNode, - reln->smgr_rnode.relNode); + reln->smgr_rnode.node.spcNode, + reln->smgr_rnode.node.dbNode, + reln->smgr_rnode.node.relNode, + reln->smgr_rnode.backend); - v = _mdfd_getseg(reln, forknum, blocknum, isTemp, EXTENSION_FAIL); + v = _mdfd_getseg(reln, forknum, blocknum, skipFsync, EXTENSION_FAIL); seekpos = (off_t) BLCKSZ *(blocknum % ((BlockNumber) RELSEG_SIZE)); @@ -697,9 +700,10 @@ mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ); TRACE_POSTGRESQL_SMGR_MD_WRITE_DONE(forknum, blocknum, - reln->smgr_rnode.spcNode, - reln->smgr_rnode.dbNode, - reln->smgr_rnode.relNode, + reln->smgr_rnode.node.spcNode, + reln->smgr_rnode.node.dbNode, + reln->smgr_rnode.node.relNode, + reln->smgr_rnode.backend, nbytes, BLCKSZ); @@ -720,7 +724,7 @@ mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, errhint("Check free disk space."))); } - if (!isTemp) + if (!skipFsync && !SmgrIsTemp(reln)) register_dirty_segment(reln, forknum, v); } @@ -794,8 +798,7 @@ mdnblocks(SMgrRelation reln, ForkNumber forknum) * mdtruncate() -- Truncate relation to specified number of blocks. */ void -mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks, - bool isTemp) +mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks) { MdfdVec *v; BlockNumber curnblk; @@ -839,7 +842,7 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks, errmsg("could not truncate file \"%s\": %m", FilePathName(v->mdfd_vfd)))); - if (!isTemp) + if (!SmgrIsTemp(reln)) register_dirty_segment(reln, forknum, v); v = v->mdfd_chain; Assert(ov != reln->md_fd[forknum]); /* we never drop the 1st @@ -864,7 +867,7 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks, errmsg("could not truncate file \"%s\" to %u blocks: %m", FilePathName(v->mdfd_vfd), nblocks))); - if (!isTemp) + if (!SmgrIsTemp(reln)) register_dirty_segment(reln, forknum, v); v = v->mdfd_chain; ov->mdfd_chain = NULL; @@ -1052,7 +1055,8 @@ mdsync(void) * the relation will have been dirtied through this same smgr * relation, and so we can save a file open/close cycle. */ - reln = smgropen(entry->tag.rnode); + reln = smgropen(entry->tag.rnode.node, + entry->tag.rnode.backend); /* * It is possible that the relation has been dropped or @@ -1235,7 +1239,7 @@ register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg) * a remote pending-ops table. */ static void -register_unlink(RelFileNode rnode) +register_unlink(RelFileNodeBackend rnode) { if (pendingOpsTable) { @@ -1278,7 +1282,8 @@ register_unlink(RelFileNode rnode) * structure for them.) */ void -RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno) +RememberFsyncRequest(RelFileNodeBackend rnode, ForkNumber forknum, + BlockNumber segno) { Assert(pendingOpsTable); @@ -1291,7 +1296,7 @@ RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno) hash_seq_init(&hstat, pendingOpsTable); while ((entry = (PendingOperationEntry *) hash_seq_search(&hstat)) != NULL) { - if (RelFileNodeEquals(entry->tag.rnode, rnode) && + if (RelFileNodeBackendEquals(entry->tag.rnode, rnode) && entry->tag.forknum == forknum) { /* Okay, cancel this entry */ @@ -1312,7 +1317,7 @@ RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno) hash_seq_init(&hstat, pendingOpsTable); while ((entry = (PendingOperationEntry *) hash_seq_search(&hstat)) != NULL) { - if (entry->tag.rnode.dbNode == rnode.dbNode) + if (entry->tag.rnode.node.dbNode == rnode.node.dbNode) { /* Okay, cancel this entry */ entry->canceled = true; @@ -1326,7 +1331,7 @@ RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno) PendingUnlinkEntry *entry = (PendingUnlinkEntry *) lfirst(cell); next = lnext(cell); - if (entry->rnode.dbNode == rnode.dbNode) + if (entry->rnode.node.dbNode == rnode.node.dbNode) { pendingUnlinks = list_delete_cell(pendingUnlinks, cell, prev); pfree(entry); @@ -1393,7 +1398,7 @@ RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno) * ForgetRelationFsyncRequests -- forget any fsyncs for a rel */ void -ForgetRelationFsyncRequests(RelFileNode rnode, ForkNumber forknum) +ForgetRelationFsyncRequests(RelFileNodeBackend rnode, ForkNumber forknum) { if (pendingOpsTable) { @@ -1428,11 +1433,12 @@ ForgetRelationFsyncRequests(RelFileNode rnode, ForkNumber forknum) void ForgetDatabaseFsyncRequests(Oid dbid) { - RelFileNode rnode; + RelFileNodeBackend rnode; - rnode.dbNode = dbid; - rnode.spcNode = 0; - rnode.relNode = 0; + rnode.node.dbNode = dbid; + rnode.node.spcNode = 0; + rnode.node.relNode = 0; + rnode.backend = InvalidBackendId; if (pendingOpsTable) { @@ -1523,12 +1529,12 @@ _mdfd_openseg(SMgrRelation reln, ForkNumber forknum, BlockNumber segno, * specified block. * * If the segment doesn't exist, we ereport, return NULL, or create the - * segment, according to "behavior". Note: isTemp need only be correct - * in the EXTENSION_CREATE case. + * segment, according to "behavior". Note: skipFsync is only used in the + * EXTENSION_CREATE case. */ static MdfdVec * _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno, - bool isTemp, ExtensionBehavior behavior) + bool skipFsync, ExtensionBehavior behavior) { MdfdVec *v = mdopen(reln, forknum, behavior); BlockNumber targetseg; @@ -1566,7 +1572,7 @@ _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno, mdextend(reln, forknum, nextsegno * ((BlockNumber) RELSEG_SIZE) - 1, - zerobuf, isTemp); + zerobuf, skipFsync); pfree(zerobuf); } v->mdfd_chain = _mdfd_openseg(reln, forknum, +nextsegno, O_CREAT); diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c index 7a35b0a833..c1d1449222 100644 --- a/src/backend/storage/smgr/smgr.c +++ b/src/backend/storage/smgr/smgr.c @@ -11,7 +11,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.121 2010/02/26 02:01:01 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.122 2010/08/13 20:10:52 rhaas Exp $ * *------------------------------------------------------------------------- */ @@ -45,19 +45,19 @@ typedef struct f_smgr void (*smgr_create) (SMgrRelation reln, ForkNumber forknum, bool isRedo); bool (*smgr_exists) (SMgrRelation reln, ForkNumber forknum); - void (*smgr_unlink) (RelFileNode rnode, ForkNumber forknum, + void (*smgr_unlink) (RelFileNodeBackend rnode, ForkNumber forknum, bool isRedo); void (*smgr_extend) (SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, char *buffer, bool isTemp); + BlockNumber blocknum, char *buffer, bool skipFsync); void (*smgr_prefetch) (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum); void (*smgr_read) (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer); void (*smgr_write) (SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, char *buffer, bool isTemp); + BlockNumber blocknum, char *buffer, bool skipFsync); BlockNumber (*smgr_nblocks) (SMgrRelation reln, ForkNumber forknum); void (*smgr_truncate) (SMgrRelation reln, ForkNumber forknum, - BlockNumber nblocks, bool isTemp); + BlockNumber nblocks); void (*smgr_immedsync) (SMgrRelation reln, ForkNumber forknum); void (*smgr_pre_ckpt) (void); /* may be NULL */ void (*smgr_sync) (void); /* may be NULL */ @@ -83,8 +83,6 @@ static HTAB *SMgrRelationHash = NULL; /* local function prototypes */ static void smgrshutdown(int code, Datum arg); -static void smgr_internal_unlink(RelFileNode rnode, ForkNumber forknum, - int which, bool isTemp, bool isRedo); /* @@ -131,8 +129,9 @@ smgrshutdown(int code, Datum arg) * This does not attempt to actually open the object. */ SMgrRelation -smgropen(RelFileNode rnode) +smgropen(RelFileNode rnode, BackendId backend) { + RelFileNodeBackend brnode; SMgrRelation reln; bool found; @@ -142,7 +141,7 @@ smgropen(RelFileNode rnode) HASHCTL ctl; MemSet(&ctl, 0, sizeof(ctl)); - ctl.keysize = sizeof(RelFileNode); + ctl.keysize = sizeof(RelFileNodeBackend); ctl.entrysize = sizeof(SMgrRelationData); ctl.hash = tag_hash; SMgrRelationHash = hash_create("smgr relation table", 400, @@ -150,8 +149,10 @@ smgropen(RelFileNode rnode) } /* Look up or create an entry */ + brnode.node = rnode; + brnode.backend = backend; reln = (SMgrRelation) hash_search(SMgrRelationHash, - (void *) &rnode, + (void *) &brnode, HASH_ENTER, &found); /* Initialize it if not present before */ @@ -261,7 +262,7 @@ smgrcloseall(void) * such entry exists already. */ void -smgrclosenode(RelFileNode rnode) +smgrclosenode(RelFileNodeBackend rnode) { SMgrRelation reln; @@ -305,8 +306,8 @@ smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo) * should be here and not in commands/tablespace.c? But that would imply * importing a lot of stuff that smgr.c oughtn't know, either. */ - TablespaceCreateDbspace(reln->smgr_rnode.spcNode, - reln->smgr_rnode.dbNode, + TablespaceCreateDbspace(reln->smgr_rnode.node.spcNode, + reln->smgr_rnode.node.dbNode, isRedo); (*(smgrsw[reln->smgr_which].smgr_create)) (reln, forknum, isRedo); @@ -323,29 +324,19 @@ smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo) * already. */ void -smgrdounlink(SMgrRelation reln, ForkNumber forknum, bool isTemp, bool isRedo) +smgrdounlink(SMgrRelation reln, ForkNumber forknum, bool isRedo) { - RelFileNode rnode = reln->smgr_rnode; + RelFileNodeBackend rnode = reln->smgr_rnode; int which = reln->smgr_which; /* Close the fork */ (*(smgrsw[which].smgr_close)) (reln, forknum); - smgr_internal_unlink(rnode, forknum, which, isTemp, isRedo); -} - -/* - * Shared subroutine that actually does the unlink ... - */ -static void -smgr_internal_unlink(RelFileNode rnode, ForkNumber forknum, - int which, bool isTemp, bool isRedo) -{ /* * Get rid of any remaining buffers for the relation. bufmgr will just * drop them without bothering to write the contents. */ - DropRelFileNodeBuffers(rnode, forknum, isTemp, 0); + DropRelFileNodeBuffers(rnode, forknum, 0); /* * It'd be nice to tell the stats collector to forget it immediately, too. @@ -385,10 +376,10 @@ smgr_internal_unlink(RelFileNode rnode, ForkNumber forknum, */ void smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, - char *buffer, bool isTemp) + char *buffer, bool skipFsync) { (*(smgrsw[reln->smgr_which].smgr_extend)) (reln, forknum, blocknum, - buffer, isTemp); + buffer, skipFsync); } /* @@ -426,16 +417,16 @@ smgrread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, * on disk at return, only dumped out to the kernel. However, * provisions will be made to fsync the write before the next checkpoint. * - * isTemp indicates that the relation is a temp table (ie, is managed - * by the local-buffer manager). In this case no provisions need be - * made to fsync the write before checkpointing. + * skipFsync indicates that the caller will make other provisions to + * fsync the relation, so we needn't bother. Temporary relations also + * do not require fsync. */ void smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, - char *buffer, bool isTemp) + char *buffer, bool skipFsync) { (*(smgrsw[reln->smgr_which].smgr_write)) (reln, forknum, blocknum, - buffer, isTemp); + buffer, skipFsync); } /* @@ -455,14 +446,13 @@ smgrnblocks(SMgrRelation reln, ForkNumber forknum) * The truncation is done immediately, so this can't be rolled back. */ void -smgrtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks, - bool isTemp) +smgrtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks) { /* * Get rid of any buffers for the about-to-be-deleted blocks. bufmgr will * just drop them without bothering to write the contents. */ - DropRelFileNodeBuffers(reln->smgr_rnode, forknum, isTemp, nblocks); + DropRelFileNodeBuffers(reln->smgr_rnode, forknum, nblocks); /* * Send a shared-inval message to force other backends to close any smgr @@ -479,8 +469,7 @@ smgrtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks, /* * Do the truncation. */ - (*(smgrsw[reln->smgr_which].smgr_truncate)) (reln, forknum, nblocks, - isTemp); + (*(smgrsw[reln->smgr_which].smgr_truncate)) (reln, forknum, nblocks); } /* @@ -499,7 +488,7 @@ smgrtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks, * to use the WAL log for PITR or replication purposes: in that case * we have to make WAL entries as well.) * - * The preceding writes should specify isTemp = true to avoid + * The preceding writes should specify skipFsync = true to avoid * duplicative fsyncs. * * Note that you need to do FlushRelationBuffers() first if there is diff --git a/src/backend/utils/adt/dbsize.c b/src/backend/utils/adt/dbsize.c index e11c13a9cc..01a4a17915 100644 --- a/src/backend/utils/adt/dbsize.c +++ b/src/backend/utils/adt/dbsize.c @@ -5,7 +5,7 @@ * Copyright (c) 2002-2010, PostgreSQL Global Development Group * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/dbsize.c,v 1.32 2010/08/05 14:45:04 rhaas Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/dbsize.c,v 1.33 2010/08/13 20:10:52 rhaas Exp $ * */ @@ -244,14 +244,14 @@ pg_tablespace_size_name(PG_FUNCTION_ARGS) * calculate size of (one fork of) a relation */ static int64 -calculate_relation_size(RelFileNode *rfn, ForkNumber forknum) +calculate_relation_size(RelFileNode *rfn, BackendId backend, ForkNumber forknum) { int64 totalsize = 0; char *relationpath; char pathname[MAXPGPATH]; unsigned int segcount = 0; - relationpath = relpath(*rfn, forknum); + relationpath = relpathbackend(*rfn, backend, forknum); for (segcount = 0;; segcount++) { @@ -291,7 +291,7 @@ pg_relation_size(PG_FUNCTION_ARGS) rel = relation_open(relOid, AccessShareLock); - size = calculate_relation_size(&(rel->rd_node), + size = calculate_relation_size(&(rel->rd_node), rel->rd_backend, forkname_to_number(text_to_cstring(forkName))); relation_close(rel, AccessShareLock); @@ -315,12 +315,14 @@ calculate_toast_table_size(Oid toastrelid) /* toast heap size, including FSM and VM size */ for (forkNum = 0; forkNum <= MAX_FORKNUM; forkNum++) - size += calculate_relation_size(&(toastRel->rd_node), forkNum); + size += calculate_relation_size(&(toastRel->rd_node), + toastRel->rd_backend, forkNum); /* toast index size, including FSM and VM size */ toastIdxRel = relation_open(toastRel->rd_rel->reltoastidxid, AccessShareLock); for (forkNum = 0; forkNum <= MAX_FORKNUM; forkNum++) - size += calculate_relation_size(&(toastIdxRel->rd_node), forkNum); + size += calculate_relation_size(&(toastIdxRel->rd_node), + toastIdxRel->rd_backend, forkNum); relation_close(toastIdxRel, AccessShareLock); relation_close(toastRel, AccessShareLock); @@ -349,7 +351,8 @@ calculate_table_size(Oid relOid) * heap size, including FSM and VM */ for (forkNum = 0; forkNum <= MAX_FORKNUM; forkNum++) - size += calculate_relation_size(&(rel->rd_node), forkNum); + size += calculate_relation_size(&(rel->rd_node), rel->rd_backend, + forkNum); /* * Size of toast relation @@ -392,7 +395,9 @@ calculate_indexes_size(Oid relOid) idxRel = relation_open(idxOid, AccessShareLock); for (forkNum = 0; forkNum <= MAX_FORKNUM; forkNum++) - size += calculate_relation_size(&(idxRel->rd_node), forkNum); + size += calculate_relation_size(&(idxRel->rd_node), + idxRel->rd_backend, + forkNum); relation_close(idxRel, AccessShareLock); } @@ -563,6 +568,7 @@ pg_relation_filepath(PG_FUNCTION_ARGS) HeapTuple tuple; Form_pg_class relform; RelFileNode rnode; + BackendId backend; char *path; tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid)); @@ -600,12 +606,27 @@ pg_relation_filepath(PG_FUNCTION_ARGS) break; } + if (!OidIsValid(rnode.relNode)) + { + ReleaseSysCache(tuple); + PG_RETURN_NULL(); + } + + /* If temporary, determine owning backend. */ + if (!relform->relistemp) + backend = InvalidBackendId; + else if (isTempOrToastNamespace(relform->relnamespace)) + backend = MyBackendId; + else + { + /* Do it the hard way. */ + backend = GetTempNamespaceBackendId(relform->relnamespace); + Assert(backend != InvalidBackendId); + } + ReleaseSysCache(tuple); - if (!OidIsValid(rnode.relNode)) - PG_RETURN_NULL(); - - path = relpath(rnode, MAIN_FORKNUM); + path = relpathbackend(rnode, backend, MAIN_FORKNUM); PG_RETURN_TEXT_P(cstring_to_text(path)); } diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c index 7a67f4a85e..1490483922 100644 --- a/src/backend/utils/cache/inval.c +++ b/src/backend/utils/cache/inval.c @@ -80,7 +80,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/cache/inval.c,v 1.98 2010/02/26 02:01:11 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/cache/inval.c,v 1.99 2010/08/13 20:10:52 rhaas Exp $ * *------------------------------------------------------------------------- */ @@ -319,7 +319,8 @@ AddCatcacheInvalidationMessage(InvalidationListHeader *hdr, { SharedInvalidationMessage msg; - msg.cc.id = (int16) id; + Assert(id < CHAR_MAX); + msg.cc.id = (int8) id; msg.cc.tuplePtr = *tuplePtr; msg.cc.dbId = dbId; msg.cc.hashValue = hashValue; @@ -513,7 +514,10 @@ LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg) * We could have smgr entries for relations of other databases, so no * short-circuit test is possible here. */ - smgrclosenode(msg->sm.rnode); + RelFileNodeBackend rnode; + rnode.node = msg->sm.rnode; + rnode.backend = (msg->sm.backend_hi << 16) | (int) msg->sm.backend_lo; + smgrclosenode(rnode); } else if (msg->id == SHAREDINVALRELMAP_ID) { @@ -1163,14 +1167,20 @@ CacheInvalidateRelcacheByRelid(Oid relid) * in commit/abort WAL entries. Instead, calls to CacheInvalidateSmgr() * should happen in low-level smgr.c routines, which are executed while * replaying WAL as well as when creating it. + * + * Note: In order to avoid bloating SharedInvalidationMessage, we store only + * three bytes of the backend ID using what would otherwise be padding space. + * Thus, the maximum possible backend ID is 2^23-1. */ void -CacheInvalidateSmgr(RelFileNode rnode) +CacheInvalidateSmgr(RelFileNodeBackend rnode) { SharedInvalidationMessage msg; msg.sm.id = SHAREDINVALSMGR_ID; - msg.sm.rnode = rnode; + msg.sm.backend_hi = rnode.backend >> 16; + msg.sm.backend_lo = rnode.backend & 0xffff; + msg.sm.rnode = rnode.node; SendSharedInvalidMessages(&msg, 1); } diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index f4304bce72..166beb25b1 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/cache/relcache.c,v 1.311 2010/07/06 19:18:58 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/cache/relcache.c,v 1.312 2010/08/13 20:10:52 rhaas Exp $ * *------------------------------------------------------------------------- */ @@ -858,10 +858,20 @@ RelationBuildDesc(Oid targetRelId, bool insertIt) relation->rd_createSubid = InvalidSubTransactionId; relation->rd_newRelfilenodeSubid = InvalidSubTransactionId; relation->rd_istemp = relation->rd_rel->relistemp; - if (relation->rd_istemp) - relation->rd_islocaltemp = isTempOrToastNamespace(relation->rd_rel->relnamespace); + if (!relation->rd_istemp) + relation->rd_backend = InvalidBackendId; + else if (isTempOrToastNamespace(relation->rd_rel->relnamespace)) + relation->rd_backend = MyBackendId; else - relation->rd_islocaltemp = false; + { + /* + * If it's a temporary table, but not one of ours, we have to use + * the slow, grotty method to figure out the owning backend. + */ + relation->rd_backend = + GetTempNamespaceBackendId(relation->rd_rel->relnamespace); + Assert(relation->rd_backend != InvalidBackendId); + } /* * initialize the tuple descriptor (relation->rd_att). @@ -1424,7 +1434,7 @@ formrdesc(const char *relationName, Oid relationReltype, relation->rd_createSubid = InvalidSubTransactionId; relation->rd_newRelfilenodeSubid = InvalidSubTransactionId; relation->rd_istemp = false; - relation->rd_islocaltemp = false; + relation->rd_backend = InvalidBackendId; /* * initialize relation tuple form @@ -2515,7 +2525,7 @@ RelationBuildLocalRelation(const char *relname, /* it is temporary if and only if it is in my temp-table namespace */ rel->rd_istemp = isTempOrToastNamespace(relnamespace); - rel->rd_islocaltemp = rel->rd_istemp; + rel->rd_backend = rel->rd_istemp ? MyBackendId : InvalidBackendId; /* * create a new tuple descriptor from the one passed in. We do this @@ -2629,7 +2639,7 @@ void RelationSetNewRelfilenode(Relation relation, TransactionId freezeXid) { Oid newrelfilenode; - RelFileNode newrnode; + RelFileNodeBackend newrnode; Relation pg_class; HeapTuple tuple; Form_pg_class classform; @@ -2640,7 +2650,8 @@ RelationSetNewRelfilenode(Relation relation, TransactionId freezeXid) TransactionIdIsNormal(freezeXid)); /* Allocate a new relfilenode */ - newrelfilenode = GetNewRelFileNode(relation->rd_rel->reltablespace, NULL); + newrelfilenode = GetNewRelFileNode(relation->rd_rel->reltablespace, NULL, + relation->rd_backend); /* * Get a writable copy of the pg_class tuple for the given relation. @@ -2660,9 +2671,10 @@ RelationSetNewRelfilenode(Relation relation, TransactionId freezeXid) * NOTE: any conflict in relfilenode value will be caught here, if * GetNewRelFileNode messes up for any reason. */ - newrnode = relation->rd_node; - newrnode.relNode = newrelfilenode; - RelationCreateStorage(newrnode, relation->rd_istemp); + newrnode.node = relation->rd_node; + newrnode.node.relNode = newrelfilenode; + newrnode.backend = relation->rd_backend; + RelationCreateStorage(newrnode.node, relation->rd_istemp); smgrclosenode(newrnode); /* diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 97ed5b7247..dac704ee4c 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -10,7 +10,7 @@ * Written by Peter Eisentraut . * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.566 2010/08/06 14:51:33 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.567 2010/08/13 20:10:53 rhaas Exp $ * *-------------------------------------------------------------------- */ @@ -96,6 +96,16 @@ #define MAX_KILOBYTES (INT_MAX / 1024) #endif +/* + * Note: MAX_BACKENDS is limited to 2^23-1 because inval.c stores the + * backend ID as a 3-byte signed integer. Even if that limitation were + * removed, we still could not exceed INT_MAX/4 because some places compute + * 4*MaxBackends without any overflow check. This is rechecked in + * assign_maxconnections, since MaxBackends is computed as MaxConnections + * plus autovacuum_max_workers plus one (for the autovacuum launcher). + */ +#define MAX_BACKENDS 0x7fffff + #define KB_PER_MB (1024) #define KB_PER_GB (1024*1024) @@ -1414,23 +1424,13 @@ static struct config_int ConfigureNamesInt[] = 30 * 1000, -1, INT_MAX / 1000, NULL, NULL }, - /* - * Note: MaxBackends is limited to INT_MAX/4 because some places compute - * 4*MaxBackends without any overflow check. This check is made in - * assign_maxconnections, since MaxBackends is computed as MaxConnections - * plus autovacuum_max_workers plus one (for the autovacuum launcher). - * - * Likewise we have to limit NBuffers to INT_MAX/2. - * - * See also CheckRequiredParameterValues() if this parameter changes - */ { {"max_connections", PGC_POSTMASTER, CONN_AUTH_SETTINGS, gettext_noop("Sets the maximum number of concurrent connections."), NULL }, &MaxConnections, - 100, 1, INT_MAX / 4, assign_maxconnections, NULL + 100, 1, MAX_BACKENDS, assign_maxconnections, NULL }, { @@ -1439,9 +1439,13 @@ static struct config_int ConfigureNamesInt[] = NULL }, &ReservedBackends, - 3, 0, INT_MAX / 4, NULL, NULL + 3, 0, MAX_BACKENDS, NULL, NULL }, + /* + * We sometimes multiply the number of shared buffers by two without + * checking for overflow, so we mustn't allow more than INT_MAX / 2. + */ { {"shared_buffers", PGC_POSTMASTER, RESOURCES_MEM, gettext_noop("Sets the number of shared memory buffers used by the server."), @@ -1618,7 +1622,7 @@ static struct config_int ConfigureNamesInt[] = NULL }, &max_prepared_xacts, - 0, 0, INT_MAX / 4, NULL, NULL + 0, 0, MAX_BACKENDS, NULL, NULL }, #ifdef LOCK_DEBUG @@ -1782,7 +1786,7 @@ static struct config_int ConfigureNamesInt[] = NULL }, &max_wal_senders, - 0, 0, INT_MAX / 4, NULL, NULL + 0, 0, MAX_BACKENDS, NULL, NULL }, { @@ -2022,7 +2026,7 @@ static struct config_int ConfigureNamesInt[] = NULL }, &autovacuum_max_workers, - 3, 1, INT_MAX / 4, assign_autovacuum_max_workers, NULL + 3, 1, MAX_BACKENDS, assign_autovacuum_max_workers, NULL }, { @@ -7995,7 +7999,7 @@ show_tcp_keepalives_count(void) static bool assign_maxconnections(int newval, bool doit, GucSource source) { - if (newval + autovacuum_max_workers + 1 > INT_MAX / 4) + if (newval + autovacuum_max_workers + 1 > MAX_BACKENDS) return false; if (doit) @@ -8007,7 +8011,7 @@ assign_maxconnections(int newval, bool doit, GucSource source) static bool assign_autovacuum_max_workers(int newval, bool doit, GucSource source) { - if (MaxConnections + newval + 1 > INT_MAX / 4) + if (MaxConnections + newval + 1 > MAX_BACKENDS) return false; if (doit) diff --git a/src/backend/utils/probes.d b/src/backend/utils/probes.d index 2874bf5141..2ea6b7798f 100644 --- a/src/backend/utils/probes.d +++ b/src/backend/utils/probes.d @@ -3,7 +3,7 @@ * * Copyright (c) 2006-2010, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/backend/utils/probes.d,v 1.12 2010/01/02 16:57:53 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/probes.d,v 1.13 2010/08/13 20:10:52 rhaas Exp $ * ---------- */ @@ -55,7 +55,7 @@ provider postgresql { probe sort__done(bool, long); probe buffer__read__start(ForkNumber, BlockNumber, Oid, Oid, Oid, bool, bool); - probe buffer__read__done(ForkNumber, BlockNumber, Oid, Oid, Oid, bool, bool, bool); + probe buffer__read__done(ForkNumber, BlockNumber, Oid, Oid, Oid, int, bool, bool); probe buffer__flush__start(ForkNumber, BlockNumber, Oid, Oid, Oid); probe buffer__flush__done(ForkNumber, BlockNumber, Oid, Oid, Oid); @@ -81,10 +81,10 @@ provider postgresql { probe twophase__checkpoint__start(); probe twophase__checkpoint__done(); - probe smgr__md__read__start(ForkNumber, BlockNumber, Oid, Oid, Oid); - probe smgr__md__read__done(ForkNumber, BlockNumber, Oid, Oid, Oid, int, int); - probe smgr__md__write__start(ForkNumber, BlockNumber, Oid, Oid, Oid); - probe smgr__md__write__done(ForkNumber, BlockNumber, Oid, Oid, Oid, int, int); + probe smgr__md__read__start(ForkNumber, BlockNumber, Oid, Oid, Oid, int); + probe smgr__md__read__done(ForkNumber, BlockNumber, Oid, Oid, Oid, int, int, int); + probe smgr__md__write__start(ForkNumber, BlockNumber, Oid, Oid, Oid, int); + probe smgr__md__write__done(ForkNumber, BlockNumber, Oid, Oid, Oid, int, int, int); probe xlog__insert(unsigned char, unsigned char); probe xlog__switch(); diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h index 3f0930f395..f026728dd1 100644 --- a/src/include/access/xlog_internal.h +++ b/src/include/access/xlog_internal.h @@ -11,7 +11,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/xlog_internal.h,v 1.33 2010/04/28 16:10:43 heikki Exp $ + * $PostgreSQL: pgsql/src/include/access/xlog_internal.h,v 1.34 2010/08/13 20:10:53 rhaas Exp $ */ #ifndef XLOG_INTERNAL_H #define XLOG_INTERNAL_H @@ -71,7 +71,7 @@ typedef struct XLogContRecord /* * Each page of XLOG file has a header like this: */ -#define XLOG_PAGE_MAGIC 0xD064 /* can be used as WAL version indicator */ +#define XLOG_PAGE_MAGIC 0xD065 /* can be used as WAL version indicator */ typedef struct XLogPageHeaderData { diff --git a/src/include/catalog/catalog.h b/src/include/catalog/catalog.h index ccbb5a1b28..6ba729a251 100644 --- a/src/include/catalog/catalog.h +++ b/src/include/catalog/catalog.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/catalog/catalog.h,v 1.49 2010/02/26 02:01:21 momjian Exp $ + * $PostgreSQL: pgsql/src/include/catalog/catalog.h,v 1.50 2010/08/13 20:10:53 rhaas Exp $ * *------------------------------------------------------------------------- */ @@ -25,10 +25,20 @@ extern const char *forkNames[]; extern ForkNumber forkname_to_number(char *forkName); +extern int forkname_chars(const char *str); -extern char *relpath(RelFileNode rnode, ForkNumber forknum); +extern char *relpathbackend(RelFileNode rnode, BackendId backend, + ForkNumber forknum); extern char *GetDatabasePath(Oid dbNode, Oid spcNode); +/* First argument is a RelFileNodeBackend */ +#define relpath(rnode, forknum) \ + relpathbackend((rnode).node, (rnode).backend, (forknum)) + +/* First argument is a RelFileNode */ +#define relpathperm(rnode, forknum) \ + relpathbackend((rnode), InvalidBackendId, (forknum)) + extern bool IsSystemRelation(Relation relation); extern bool IsToastRelation(Relation relation); @@ -45,6 +55,7 @@ extern bool IsSharedRelation(Oid relationId); extern Oid GetNewOid(Relation relation); extern Oid GetNewOidWithIndex(Relation relation, Oid indexId, AttrNumber oidcolumn); -extern Oid GetNewRelFileNode(Oid reltablespace, Relation pg_class); +extern Oid GetNewRelFileNode(Oid reltablespace, Relation pg_class, + BackendId backend); #endif /* CATALOG_H */ diff --git a/src/include/catalog/storage.h b/src/include/catalog/storage.h index f86cf9bbf5..8449a7775e 100644 --- a/src/include/catalog/storage.h +++ b/src/include/catalog/storage.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/catalog/storage.h,v 1.5 2010/02/07 20:48:13 tgl Exp $ + * $PostgreSQL: pgsql/src/include/catalog/storage.h,v 1.6 2010/08/13 20:10:53 rhaas Exp $ * *------------------------------------------------------------------------- */ @@ -30,8 +30,7 @@ extern void RelationTruncate(Relation rel, BlockNumber nblocks); * naming */ extern void smgrDoPendingDeletes(bool isCommit); -extern int smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr, - bool *haveNonTemp); +extern int smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr); extern void AtSubCommit_smgr(void); extern void AtSubAbort_smgr(void); extern void PostPrepare_smgr(void); diff --git a/src/include/postmaster/bgwriter.h b/src/include/postmaster/bgwriter.h index a72e31724c..e4ec6ad5b0 100644 --- a/src/include/postmaster/bgwriter.h +++ b/src/include/postmaster/bgwriter.h @@ -5,7 +5,7 @@ * * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/include/postmaster/bgwriter.h,v 1.15 2010/01/02 16:58:08 momjian Exp $ + * $PostgreSQL: pgsql/src/include/postmaster/bgwriter.h,v 1.16 2010/08/13 20:10:53 rhaas Exp $ * *------------------------------------------------------------------------- */ @@ -27,7 +27,7 @@ extern void BackgroundWriterMain(void); extern void RequestCheckpoint(int flags); extern void CheckpointWriteDelay(int flags, double progress); -extern bool ForwardFsyncRequest(RelFileNode rnode, ForkNumber forknum, +extern bool ForwardFsyncRequest(RelFileNodeBackend rnode, ForkNumber forknum, BlockNumber segno); extern void AbsorbFsyncRequests(void); diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index dc4376ee9a..68416ee1b5 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/storage/bufmgr.h,v 1.124 2010/01/23 16:37:12 sriggs Exp $ + * $PostgreSQL: pgsql/src/include/storage/bufmgr.h,v 1.125 2010/08/13 20:10:53 rhaas Exp $ * *------------------------------------------------------------------------- */ @@ -160,7 +160,7 @@ extern Buffer ReadBuffer(Relation reln, BlockNumber blockNum); extern Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy); -extern Buffer ReadBufferWithoutRelcache(RelFileNode rnode, bool isTemp, +extern Buffer ReadBufferWithoutRelcache(RelFileNode rnode, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy); extern void ReleaseBuffer(Buffer buffer); @@ -180,8 +180,8 @@ extern BlockNumber BufferGetBlockNumber(Buffer buffer); extern BlockNumber RelationGetNumberOfBlocks(Relation relation); extern void FlushRelationBuffers(Relation rel); extern void FlushDatabaseBuffers(Oid dbid); -extern void DropRelFileNodeBuffers(RelFileNode rnode, ForkNumber forkNum, - bool istemp, BlockNumber firstDelBlock); +extern void DropRelFileNodeBuffers(RelFileNodeBackend rnode, + ForkNumber forkNum, BlockNumber firstDelBlock); extern void DropDatabaseBuffers(Oid dbid); #ifdef NOT_USED diff --git a/src/include/storage/relfilenode.h b/src/include/storage/relfilenode.h index b5e4e1134d..9bf170b2c8 100644 --- a/src/include/storage/relfilenode.h +++ b/src/include/storage/relfilenode.h @@ -7,13 +7,15 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/storage/relfilenode.h,v 1.25 2010/02/07 20:48:13 tgl Exp $ + * $PostgreSQL: pgsql/src/include/storage/relfilenode.h,v 1.26 2010/08/13 20:10:53 rhaas Exp $ * *------------------------------------------------------------------------- */ #ifndef RELFILENODE_H #define RELFILENODE_H +#include "storage/backendid.h" + /* * The physical storage of a relation consists of one or more forks. The * main fork is always created, but in addition to that there can be @@ -37,7 +39,8 @@ typedef enum ForkNumber /* * RelFileNode must provide all that we need to know to physically access - * a relation. Note, however, that a "physical" relation is comprised of + * a relation, with the exception of the backend ID, which can be provided + * separately. Note, however, that a "physical" relation is comprised of * multiple files on the filesystem, as each fork is stored as a separate * file, and each fork can be divided into multiple segments. See md.c. * @@ -74,14 +77,30 @@ typedef struct RelFileNode } RelFileNode; /* - * Note: RelFileNodeEquals compares relNode first since that is most likely - * to be different in two unequal RelFileNodes. It is probably redundant - * to compare spcNode if the other two fields are found equal, but do it - * anyway to be sure. + * Augmenting a relfilenode with the backend ID provides all the information + * we need to locate the physical storage. + */ +typedef struct RelFileNodeBackend +{ + RelFileNode node; + BackendId backend; +} RelFileNodeBackend; + +/* + * Note: RelFileNodeEquals and RelFileNodeBackendEquals compare relNode first + * since that is most likely to be different in two unequal RelFileNodes. It + * is probably redundant to compare spcNode if the other fields are found equal, + * but do it anyway to be sure. */ #define RelFileNodeEquals(node1, node2) \ ((node1).relNode == (node2).relNode && \ (node1).dbNode == (node2).dbNode && \ (node1).spcNode == (node2).spcNode) +#define RelFileNodeBackendEquals(node1, node2) \ + ((node1).node.relNode == (node2).node.relNode && \ + (node1).node.dbNode == (node2).node.dbNode && \ + (node1).backend == (node2).backend && \ + (node1).node.spcNode == (node2).node.spcNode) + #endif /* RELFILENODE_H */ diff --git a/src/include/storage/sinval.h b/src/include/storage/sinval.h index 864a28fde8..b35fe7f1fb 100644 --- a/src/include/storage/sinval.h +++ b/src/include/storage/sinval.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/storage/sinval.h,v 1.59 2010/02/26 02:01:28 momjian Exp $ + * $PostgreSQL: pgsql/src/include/storage/sinval.h,v 1.60 2010/08/13 20:10:53 rhaas Exp $ * *------------------------------------------------------------------------- */ @@ -26,7 +26,7 @@ * * invalidate an smgr cache entry for a specific physical relation * * invalidate the mapped-relation mapping for a given database * More types could be added if needed. The message type is identified by - * the first "int16" field of the message struct. Zero or positive means a + * the first "int8" field of the message struct. Zero or positive means a * specific-catcache inval message (and also serves as the catcache ID field). * Negative values identify the other message types, as per codes below. * @@ -63,7 +63,7 @@ typedef struct { /* note: field layout chosen with an eye to alignment concerns */ - int16 id; /* cache ID --- must be first */ + int8 id; /* cache ID --- must be first */ ItemPointerData tuplePtr; /* tuple identifier in cached relation */ Oid dbId; /* database ID, or 0 if a shared relation */ uint32 hashValue; /* hash value of key for this catcache */ @@ -73,7 +73,7 @@ typedef struct typedef struct { - int16 id; /* type field --- must be first */ + int8 id; /* type field --- must be first */ Oid dbId; /* database ID, or 0 if a shared catalog */ Oid catId; /* ID of catalog whose contents are invalid */ } SharedInvalCatalogMsg; @@ -82,7 +82,7 @@ typedef struct typedef struct { - int16 id; /* type field --- must be first */ + int8 id; /* type field --- must be first */ Oid dbId; /* database ID, or 0 if a shared relation */ Oid relId; /* relation ID */ } SharedInvalRelcacheMsg; @@ -91,21 +91,23 @@ typedef struct typedef struct { - int16 id; /* type field --- must be first */ - RelFileNode rnode; /* physical file ID */ + int8 id; /* type field --- must be first */ + int8 backend_hi; /* high bits of backend ID, if temprel */ + uint16 backend_lo; /* low bits of backend ID, if temprel */ + RelFileNode rnode; /* spcNode, dbNode, relNode */ } SharedInvalSmgrMsg; #define SHAREDINVALRELMAP_ID (-4) typedef struct { - int16 id; /* type field --- must be first */ + int8 id; /* type field --- must be first */ Oid dbId; /* database ID, or 0 for shared catalogs */ } SharedInvalRelmapMsg; typedef union { - int16 id; /* type field --- must be first */ + int8 id; /* type field --- must be first */ SharedInvalCatcacheMsg cc; SharedInvalCatalogMsg cat; SharedInvalRelcacheMsg rc; diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h index c037190b4b..55028556fa 100644 --- a/src/include/storage/smgr.h +++ b/src/include/storage/smgr.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.71 2010/02/26 02:01:28 momjian Exp $ + * $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.72 2010/08/13 20:10:53 rhaas Exp $ * *------------------------------------------------------------------------- */ @@ -16,6 +16,7 @@ #include "access/xlog.h" #include "fmgr.h" +#include "storage/backendid.h" #include "storage/block.h" #include "storage/relfilenode.h" @@ -38,7 +39,7 @@ typedef struct SMgrRelationData { /* rnode is the hashtable lookup key, so it must be first! */ - RelFileNode smgr_rnode; /* relation physical identifier */ + RelFileNodeBackend smgr_rnode; /* relation physical identifier */ /* pointer to owning pointer, or NULL if none */ struct SMgrRelationData **smgr_owner; @@ -68,28 +69,30 @@ typedef struct SMgrRelationData typedef SMgrRelationData *SMgrRelation; +#define SmgrIsTemp(smgr) \ + ((smgr)->smgr_rnode.backend != InvalidBackendId) extern void smgrinit(void); -extern SMgrRelation smgropen(RelFileNode rnode); +extern SMgrRelation smgropen(RelFileNode rnode, BackendId backend); extern bool smgrexists(SMgrRelation reln, ForkNumber forknum); extern void smgrsetowner(SMgrRelation *owner, SMgrRelation reln); extern void smgrclose(SMgrRelation reln); extern void smgrcloseall(void); -extern void smgrclosenode(RelFileNode rnode); +extern void smgrclosenode(RelFileNodeBackend rnode); extern void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo); extern void smgrdounlink(SMgrRelation reln, ForkNumber forknum, - bool isTemp, bool isRedo); + bool isRedo); extern void smgrextend(SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, char *buffer, bool isTemp); + BlockNumber blocknum, char *buffer, bool skipFsync); extern void smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum); extern void smgrread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer); extern void smgrwrite(SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, char *buffer, bool isTemp); + BlockNumber blocknum, char *buffer, bool skipFsync); extern BlockNumber smgrnblocks(SMgrRelation reln, ForkNumber forknum); extern void smgrtruncate(SMgrRelation reln, ForkNumber forknum, - BlockNumber nblocks, bool isTemp); + BlockNumber nblocks); extern void smgrimmedsync(SMgrRelation reln, ForkNumber forknum); extern void smgrpreckpt(void); extern void smgrsync(void); @@ -103,27 +106,28 @@ extern void mdinit(void); extern void mdclose(SMgrRelation reln, ForkNumber forknum); extern void mdcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo); extern bool mdexists(SMgrRelation reln, ForkNumber forknum); -extern void mdunlink(RelFileNode rnode, ForkNumber forknum, bool isRedo); +extern void mdunlink(RelFileNodeBackend rnode, ForkNumber forknum, bool isRedo); extern void mdextend(SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, char *buffer, bool isTemp); + BlockNumber blocknum, char *buffer, bool skipFsync); extern void mdprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum); extern void mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer); extern void mdwrite(SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, char *buffer, bool isTemp); + BlockNumber blocknum, char *buffer, bool skipFsync); extern BlockNumber mdnblocks(SMgrRelation reln, ForkNumber forknum); extern void mdtruncate(SMgrRelation reln, ForkNumber forknum, - BlockNumber nblocks, bool isTemp); + BlockNumber nblocks); extern void mdimmedsync(SMgrRelation reln, ForkNumber forknum); extern void mdpreckpt(void); extern void mdsync(void); extern void mdpostckpt(void); extern void SetForwardFsyncRequests(void); -extern void RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum, +extern void RememberFsyncRequest(RelFileNodeBackend rnode, ForkNumber forknum, BlockNumber segno); -extern void ForgetRelationFsyncRequests(RelFileNode rnode, ForkNumber forknum); +extern void ForgetRelationFsyncRequests(RelFileNodeBackend rnode, + ForkNumber forknum); extern void ForgetDatabaseFsyncRequests(Oid dbid); /* smgrtype.c */ diff --git a/src/include/utils/inval.h b/src/include/utils/inval.h index 8fe710d718..328f73c543 100644 --- a/src/include/utils/inval.h +++ b/src/include/utils/inval.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/utils/inval.h,v 1.49 2010/02/08 04:33:55 tgl Exp $ + * $PostgreSQL: pgsql/src/include/utils/inval.h,v 1.50 2010/08/13 20:10:54 rhaas Exp $ * *------------------------------------------------------------------------- */ @@ -49,7 +49,7 @@ extern void CacheInvalidateRelcacheByTuple(HeapTuple classTuple); extern void CacheInvalidateRelcacheByRelid(Oid relid); -extern void CacheInvalidateSmgr(RelFileNode rnode); +extern void CacheInvalidateSmgr(RelFileNodeBackend rnode); extern void CacheInvalidateRelmap(Oid databaseId); diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index a0a9b301c4..b615f81c65 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/utils/rel.h,v 1.124 2010/02/26 02:01:29 momjian Exp $ + * $PostgreSQL: pgsql/src/include/utils/rel.h,v 1.125 2010/08/13 20:10:54 rhaas Exp $ * *------------------------------------------------------------------------- */ @@ -126,8 +126,8 @@ typedef struct RelationData /* use "struct" here to avoid needing to include smgr.h: */ struct SMgrRelationData *rd_smgr; /* cached file handle, or NULL */ int rd_refcnt; /* reference count */ + BackendId rd_backend; /* owning backend id, if temporary relation */ bool rd_istemp; /* rel is a temporary relation */ - bool rd_islocaltemp; /* rel is a temp rel of this session */ bool rd_isnailed; /* rel is nailed in cache */ bool rd_isvalid; /* relcache entry is valid */ char rd_indexvalid; /* state of rd_indexlist: 0 = not valid, 1 = @@ -347,7 +347,7 @@ typedef struct StdRdOptions #define RelationOpenSmgr(relation) \ do { \ if ((relation)->rd_smgr == NULL) \ - smgrsetowner(&((relation)->rd_smgr), smgropen((relation)->rd_node)); \ + smgrsetowner(&((relation)->rd_smgr), smgropen((relation)->rd_node, (relation)->rd_backend)); \ } while (0) /* @@ -393,7 +393,7 @@ typedef struct StdRdOptions * Beware of multiple eval of argument */ #define RELATION_IS_LOCAL(relation) \ - ((relation)->rd_islocaltemp || \ + ((relation)->rd_backend == MyBackendId || \ (relation)->rd_createSubid != InvalidSubTransactionId) /* @@ -403,7 +403,7 @@ typedef struct StdRdOptions * Beware of multiple eval of argument */ #define RELATION_IS_OTHER_TEMP(relation) \ - ((relation)->rd_istemp && !(relation)->rd_islocaltemp) + ((relation)->rd_istemp && (relation)->rd_backend != MyBackendId) /* routines in utils/cache/relcache.c */ extern void RelationIncrementReferenceCount(Relation rel);