From 1bf3d615047eb214b1ddde31bd268dabf96cc3fa Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 28 Jul 2004 14:23:31 +0000 Subject: [PATCH] Fix subtransaction behavior for large objects, temp namespace, files, password/group files. Also allow read-only subtransactions of a read-write parent, but not vice versa. These are the reasonably noncontroversial parts of Alvaro's recent mop-up patch, plus further work on large objects to minimize use of the TopTransactionResourceOwner. --- src/backend/access/transam/xact.c | 81 ++++++-- src/backend/catalog/namespace.c | 47 ++++- src/backend/commands/user.c | 93 ++++++--- src/backend/libpq/be-fsstubs.c | 73 +++++-- src/backend/storage/file/fd.c | 209 +++++++++++++++------ src/backend/storage/large_object/inv_api.c | 122 ++++++++---- src/backend/storage/lmgr/lmgr.c | 16 +- src/backend/utils/cache/inval.c | 6 +- src/backend/utils/misc/guc.c | 15 +- src/backend/utils/time/tqual.c | 65 +++++-- src/bin/psql/tab-complete.c | 4 +- src/include/catalog/namespace.h | 4 +- src/include/commands/user.h | 4 +- src/include/libpq/be-fsstubs.h | 8 +- src/include/storage/fd.h | 4 +- src/include/storage/large_object.h | 22 +-- src/include/utils/inval.h | 4 +- 17 files changed, 572 insertions(+), 205 deletions(-) diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index 55d5ef9b80..f938cdcc5b 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.172 2004/07/27 05:10:49 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.173 2004/07/28 14:23:27 tgl Exp $ * * NOTES * Transaction aborts can now occur two ways: @@ -224,6 +224,7 @@ typedef struct TransactionStateData ResourceOwner curTransactionOwner; /* my query resources */ List *childXids; /* subcommitted child XIDs */ AclId currentUser; /* subxact start current_user */ + bool prevXactReadOnly; /* entry-time xact r/o state */ struct TransactionStateData *parent; /* back link to parent */ } TransactionStateData; @@ -284,6 +285,7 @@ static TransactionStateData TopTransactionStateData = { NULL, /* cur transaction resource owner */ NIL, /* subcommitted child Xids */ 0, /* entry-time current userid */ + false, /* entry-time xact r/o state */ NULL /* link to parent state block */ }; @@ -1242,7 +1244,8 @@ StartTransaction(void) * check the current transaction state */ if (s->state != TRANS_DEFAULT) - elog(WARNING, "StartTransaction and not in default state"); + elog(WARNING, "StartTransaction while in %s state", + TransStateAsString(s->state)); /* * set the current transaction state information appropriately during @@ -1287,6 +1290,8 @@ StartTransaction(void) * you won't because it doesn't work during startup; the userid isn't * set yet during a backend's first transaction start. We only use * the currentUser field in sub-transaction state structs. + * + * prevXactReadOnly is also valid only in sub-transactions. */ /* @@ -1319,7 +1324,8 @@ CommitTransaction(void) * check the current transaction state */ if (s->state != TRANS_INPROGRESS) - elog(WARNING, "CommitTransaction and not in in-progress state"); + elog(WARNING, "CommitTransaction while in %s state", + TransStateAsString(s->state)); Assert(s->parent == NULL); /* @@ -1351,14 +1357,14 @@ CommitTransaction(void) AtCommit_Portals(); - /* handle commit for large objects [ PA, 7/17/98 ] */ - /* XXX probably this does not belong here */ - lo_commit(true); + /* close large objects before lower-level cleanup */ + AtEOXact_LargeObject(true); /* NOTIFY commit must come before lower-level cleanup */ AtCommit_Notify(); /* Update the flat password file if we changed pg_shadow or pg_group */ + /* This should be the last step before commit */ AtEOXact_UpdatePasswordFile(true); /* @@ -1486,7 +1492,8 @@ AbortTransaction(void) * check the current transaction state */ if (s->state != TRANS_INPROGRESS) - elog(WARNING, "AbortTransaction and not in in-progress state"); + elog(WARNING, "AbortTransaction while in %s state", + TransStateAsString(s->state)); Assert(s->parent == NULL); /* @@ -1515,7 +1522,7 @@ AbortTransaction(void) */ DeferredTriggerAbortXact(); AtAbort_Portals(); - lo_commit(false); /* 'false' means it's abort */ + AtEOXact_LargeObject(false); /* 'false' means it's abort */ AtAbort_Notify(); AtEOXact_UpdatePasswordFile(false); @@ -1870,6 +1877,9 @@ CleanupAbortedSubTransactions(bool returnName) s = CurrentTransactionState; } + AssertState(s->blockState == TBLOCK_SUBINPROGRESS || + s->blockState == TBLOCK_INPROGRESS); + return name; } @@ -2866,7 +2876,8 @@ StartSubTransaction(void) TransactionState s = CurrentTransactionState; if (s->state != TRANS_DEFAULT) - elog(WARNING, "StartSubTransaction and not in default state"); + elog(WARNING, "StartSubTransaction while in %s state", + TransStateAsString(s->state)); s->state = TRANS_START; @@ -2889,6 +2900,7 @@ StartSubTransaction(void) * Finish setup of other transaction state fields. */ s->currentUser = GetUserId(); + s->prevXactReadOnly = XactReadOnly; /* * Initialize other subsystems for new subtransaction @@ -2913,7 +2925,8 @@ CommitSubTransaction(void) ShowTransactionState("CommitSubTransaction"); if (s->state != TRANS_INPROGRESS) - elog(WARNING, "CommitSubTransaction and not in in-progress state"); + elog(WARNING, "CommitSubTransaction while in %s state", + TransStateAsString(s->state)); /* Pre-commit processing */ AtSubCommit_Portals(s->parent->transactionIdData, @@ -2930,9 +2943,18 @@ CommitSubTransaction(void) /* Post-commit cleanup */ AtSubCommit_smgr(); - AtSubEOXact_Inval(true); + AtEOSubXact_Inval(true); AtEOSubXact_SPI(true, s->transactionIdData); + AtEOSubXact_LargeObject(true, s->transactionIdData, + s->parent->transactionIdData); + AtEOSubXact_UpdatePasswordFile(true, s->transactionIdData, + s->parent->transactionIdData); + AtEOSubXact_Files(true, s->transactionIdData, + s->parent->transactionIdData); + AtEOSubXact_Namespace(true, s->transactionIdData, + s->parent->transactionIdData); + /* * Note that we just release the resource owner's resources and don't * delete it. This is because locks are not actually released here. @@ -2953,6 +2975,13 @@ CommitSubTransaction(void) AtEOSubXact_on_commit_actions(true, s->transactionIdData, s->parent->transactionIdData); + /* + * We need to restore the upper transaction's read-only state, + * in case the upper is read-write while the child is read-only; + * GUC will incorrectly think it should leave the child state in place. + */ + XactReadOnly = s->prevXactReadOnly; + CurrentResourceOwner = s->parent->curTransactionOwner; CurTransactionResourceOwner = s->parent->curTransactionOwner; s->curTransactionOwner = NULL; @@ -2973,7 +3002,8 @@ AbortSubTransaction(void) ShowTransactionState("AbortSubTransaction"); if (s->state != TRANS_INPROGRESS) - elog(WARNING, "AbortSubTransaction and not in in-progress state"); + elog(WARNING, "AbortSubTransaction while in %s state", + TransStateAsString(s->state)); HOLD_INTERRUPTS(); @@ -3010,7 +3040,16 @@ AbortSubTransaction(void) AtEOSubXact_SPI(false, s->transactionIdData); AtSubAbort_Portals(s->parent->transactionIdData, s->parent->curTransactionOwner); - AtSubEOXact_Inval(false); + AtEOSubXact_Inval(false); + + AtEOSubXact_LargeObject(false, s->transactionIdData, + s->parent->transactionIdData); + AtEOSubXact_UpdatePasswordFile(false, s->transactionIdData, + s->parent->transactionIdData); + AtEOSubXact_Files(false, s->transactionIdData, + s->parent->transactionIdData); + AtEOSubXact_Namespace(false, s->transactionIdData, + s->parent->transactionIdData); ResourceOwnerRelease(s->curTransactionOwner, RESOURCE_RELEASE_BEFORE_LOCKS, @@ -3041,6 +3080,13 @@ AbortSubTransaction(void) */ SetUserId(s->currentUser); + /* + * Restore the upper transaction's read-only state, too. This should + * be redundant with GUC's cleanup but we may as well do it for + * consistency with the commit case. + */ + XactReadOnly = s->prevXactReadOnly; + CommandCounterIncrement(); RESUME_INTERRUPTS(); @@ -3057,7 +3103,8 @@ CleanupSubTransaction(void) ShowTransactionState("CleanupSubTransaction"); if (s->state != TRANS_ABORT) - elog(WARNING, "CleanupSubTransaction and not in aborted state"); + elog(WARNING, "CleanupSubTransaction while in %s state", + TransStateAsString(s->state)); AtSubCleanup_Portals(); @@ -3088,7 +3135,8 @@ StartAbortedSubTransaction(void) TransactionState s = CurrentTransactionState; if (s->state != TRANS_DEFAULT) - elog(WARNING, "StartAbortedSubTransaction and not in default state"); + elog(WARNING, "StartAbortedSubTransaction while in %s state", + TransStateAsString(s->state)); s->state = TRANS_START; @@ -3168,7 +3216,8 @@ PopTransaction(void) TransactionState s = CurrentTransactionState; if (s->state != TRANS_DEFAULT) - elog(WARNING, "PopTransaction and not in default state"); + elog(WARNING, "PopTransaction while in %s state", + TransStateAsString(s->state)); if (s->parent == NULL) elog(FATAL, "PopTransaction with no parent"); diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c index b412023fe2..7bc388cf14 100644 --- a/src/backend/catalog/namespace.c +++ b/src/backend/catalog/namespace.c @@ -13,7 +13,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/catalog/namespace.c,v 1.67 2004/06/18 06:13:19 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/catalog/namespace.c,v 1.68 2004/07/28 14:23:27 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -107,12 +107,17 @@ static bool namespaceSearchPathValid = true; * myTempNamespace is InvalidOid until and unless a TEMP namespace is set up * in a particular backend session (this happens when a CREATE TEMP TABLE * command is first executed). Thereafter it's the OID of the temp namespace. - * firstTempTransaction flags whether we've committed creation of the TEMP - * namespace or not. + * + * myTempNamespaceXID shows whether we've created the TEMP namespace in the + * current transaction. The TransactionId propagates up the transaction tree, + * so the main transaction will correctly recognize the flag if all + * intermediate subtransactions commit. When it is InvalidTransactionId, + * we either haven't made the TEMP namespace yet, or have successfully + * committed its creation, depending on whether myTempNamespace is valid. */ static Oid myTempNamespace = InvalidOid; -static bool firstTempTransaction = false; +static TransactionId myTempNamespaceXID = InvalidTransactionId; /* * "Special" namespace for CREATE SCHEMA. If set, it's the first search @@ -1688,7 +1693,9 @@ InitTempTableNamespace(void) */ myTempNamespace = namespaceId; - firstTempTransaction = true; + /* It should not be done already. */ + AssertState(myTempNamespaceXID == InvalidTransactionId); + myTempNamespaceXID = GetCurrentTransactionId(); namespaceSearchPathValid = false; /* need to rebuild list */ } @@ -1707,7 +1714,7 @@ AtEOXact_Namespace(bool isCommit) * temp tables at backend shutdown. (We only want to register the * callback once per session, so this is a good place to do it.) */ - if (firstTempTransaction) + if (myTempNamespaceXID == GetCurrentTransactionId()) { if (isCommit) on_shmem_exit(RemoveTempRelationsCallback, 0); @@ -1716,7 +1723,7 @@ AtEOXact_Namespace(bool isCommit) myTempNamespace = InvalidOid; namespaceSearchPathValid = false; /* need to rebuild list */ } - firstTempTransaction = false; + myTempNamespaceXID = InvalidTransactionId; } /* @@ -1729,6 +1736,32 @@ AtEOXact_Namespace(bool isCommit) } } +/* + * AtEOSubXact_Namespace + * + * At subtransaction commit, propagate the temp-namespace-creation + * flag to the parent transaction. + * + * At subtransaction abort, forget the flag if we set it up. + */ +void +AtEOSubXact_Namespace(bool isCommit, TransactionId myXid, + TransactionId parentXid) +{ + if (myTempNamespaceXID == myXid) + { + if (isCommit) + myTempNamespaceXID = parentXid; + else + { + myTempNamespaceXID = InvalidTransactionId; + /* TEMP namespace creation failed, so reset state */ + myTempNamespace = InvalidOid; + namespaceSearchPathValid = false; /* need to rebuild list */ + } + } +} + /* * Remove all relations in the specified temp namespace. * diff --git a/src/backend/commands/user.c b/src/backend/commands/user.c index 255428fadc..da8f92aee7 100644 --- a/src/backend/commands/user.c +++ b/src/backend/commands/user.c @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/commands/user.c,v 1.141 2004/05/26 04:41:12 neilc Exp $ + * $PostgreSQL: pgsql/src/backend/commands/user.c,v 1.142 2004/07/28 14:23:28 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -44,8 +44,30 @@ extern bool Password_encryption; -static bool user_file_update_needed = false; -static bool group_file_update_needed = false; +/* + * The need-to-update-files flags are a pair of TransactionIds that show what + * level of the transaction tree requested the update. To register an update, + * the transaction saves its own TransactionId in the flag, unless the value + * was already set to a valid TransactionId. If it aborts and the value is its + * TransactionId, it resets the value to InvalidTransactionId. If it commits, + * it changes the value to its parent's TransactionId. This way the value is + * propagated up to the topmost transaction, which will update the files if a + * valid TransactionId is detected. + */ +static TransactionId user_file_update_xid = InvalidTransactionId; +static TransactionId group_file_update_xid = InvalidTransactionId; + +#define user_file_update_needed() \ + do { \ + if (user_file_update_xid == InvalidTransactionId) \ + user_file_update_xid = GetCurrentTransactionId(); \ + } while (0) + +#define group_file_update_needed() \ + do { \ + if (group_file_update_xid == InvalidTransactionId) \ + group_file_update_xid = GetCurrentTransactionId(); \ + } while (0) static void CheckPgUserAclNotNull(void); @@ -402,8 +424,8 @@ write_user_file(Relation urel) Datum update_pg_pwd_and_pg_group(PG_FUNCTION_ARGS) { - user_file_update_needed = true; - group_file_update_needed = true; + user_file_update_needed(); + group_file_update_needed(); return PointerGetDatum(NULL); } @@ -429,13 +451,14 @@ AtEOXact_UpdatePasswordFile(bool isCommit) Relation urel = NULL; Relation grel = NULL; - if (!(user_file_update_needed || group_file_update_needed)) + if (user_file_update_xid == InvalidTransactionId && + group_file_update_xid == InvalidTransactionId) return; if (!isCommit) { - user_file_update_needed = false; - group_file_update_needed = false; + user_file_update_xid = InvalidTransactionId; + group_file_update_xid = InvalidTransactionId; return; } @@ -447,22 +470,22 @@ AtEOXact_UpdatePasswordFile(bool isCommit) * pg_shadow or pg_group, which likely won't have gotten a strong * enough lock), so get the locks we need before writing anything. */ - if (user_file_update_needed) + if (user_file_update_xid != InvalidTransactionId) urel = heap_openr(ShadowRelationName, ExclusiveLock); - if (group_file_update_needed) + if (group_file_update_xid != InvalidTransactionId) grel = heap_openr(GroupRelationName, ExclusiveLock); /* Okay to write the files */ - if (user_file_update_needed) + if (user_file_update_xid != InvalidTransactionId) { - user_file_update_needed = false; + user_file_update_xid = InvalidTransactionId; write_user_file(urel); heap_close(urel, NoLock); } - if (group_file_update_needed) + if (group_file_update_xid != InvalidTransactionId) { - group_file_update_needed = false; + group_file_update_xid = InvalidTransactionId; write_group_file(grel); heap_close(grel, NoLock); } @@ -473,7 +496,33 @@ AtEOXact_UpdatePasswordFile(bool isCommit) SendPostmasterSignal(PMSIGNAL_PASSWORD_CHANGE); } +/* + * AtEOSubXact_UpdatePasswordFile + * + * Called at subtransaction end, this routine resets or updates the + * need-to-update-files flags. + */ +void +AtEOSubXact_UpdatePasswordFile(bool isCommit, TransactionId myXid, + TransactionId parentXid) +{ + if (isCommit) + { + if (user_file_update_xid == myXid) + user_file_update_xid = parentXid; + if (group_file_update_xid == myXid) + group_file_update_xid = parentXid; + } + else + { + if (user_file_update_xid == myXid) + user_file_update_xid = InvalidTransactionId; + + if (group_file_update_xid == myXid) + group_file_update_xid = InvalidTransactionId; + } +} /* * CREATE USER @@ -728,7 +777,7 @@ CreateUser(CreateUserStmt *stmt) /* * Set flag to update flat password file at commit. */ - user_file_update_needed = true; + user_file_update_needed(); } @@ -925,7 +974,7 @@ AlterUser(AlterUserStmt *stmt) /* * Set flag to update flat password file at commit. */ - user_file_update_needed = true; + user_file_update_needed(); } @@ -1147,7 +1196,7 @@ DropUser(DropUserStmt *stmt) /* * Set flag to update flat password file at commit. */ - user_file_update_needed = true; + user_file_update_needed(); } @@ -1233,7 +1282,7 @@ RenameUser(const char *oldname, const char *newname) ReleaseSysCache(oldtuple); heap_close(rel, NoLock); - user_file_update_needed = true; + user_file_update_needed(); } @@ -1438,7 +1487,7 @@ CreateGroup(CreateGroupStmt *stmt) /* * Set flag to update flat group file at commit. */ - group_file_update_needed = true; + group_file_update_needed(); } @@ -1590,7 +1639,7 @@ AlterGroup(AlterGroupStmt *stmt, const char *tag) /* * Set flag to update flat group file at commit. */ - group_file_update_needed = true; + group_file_update_needed(); } /* @@ -1730,7 +1779,7 @@ DropGroup(DropGroupStmt *stmt) /* * Set flag to update flat group file at commit. */ - group_file_update_needed = true; + group_file_update_needed(); } @@ -1776,5 +1825,5 @@ RenameGroup(const char *oldname, const char *newname) heap_close(rel, NoLock); heap_freetuple(tup); - group_file_update_needed = true; + group_file_update_needed(); } diff --git a/src/backend/libpq/be-fsstubs.c b/src/backend/libpq/be-fsstubs.c index ed19e76db2..21d1f3ddcf 100644 --- a/src/backend/libpq/be-fsstubs.c +++ b/src/backend/libpq/be-fsstubs.c @@ -1,24 +1,22 @@ /*------------------------------------------------------------------------- * * be-fsstubs.c - * support for filesystem operations on large objects + * Builtin functions for open/close/read/write operations on large objects * * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/libpq/be-fsstubs.c,v 1.70 2004/02/10 01:55:25 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/libpq/be-fsstubs.c,v 1.71 2004/07/28 14:23:28 tgl Exp $ * * NOTES * This should be moved to a more appropriate place. It is here * for lack of a better place. * - * Builtin functions for open/close/read/write operations on large objects. - * * These functions operate in a private MemoryContext, which means - * that large object descriptors hang around until we destroy the context. - * That happens in lo_commit(). It'd be possible to prolong the lifetime + * that large object descriptors hang around until we destroy the context + * at transaction end. It'd be possible to prolong the lifetime * of the context so that LO FDs are good across transactions (for example, * we could release the context only if we see that no FDs remain open). * But we'd need additional state in order to do the right thing at the @@ -29,6 +27,11 @@ * existing documented semantics of LO FDs: they're only good within a * transaction. * + * As of PostgreSQL 7.5, much of the angst expressed above is no longer + * relevant, and in fact it'd be pretty easy to allow LO FDs to stay + * open across transactions. However backwards compatibility suggests + * that we should stick to the status quo. + * *------------------------------------------------------------------------- */ @@ -46,8 +49,6 @@ #include "utils/memutils.h" -/* [PA] is Pascal André */ - /*#define FSDB 1*/ #define BUFSIZE 8192 @@ -68,6 +69,7 @@ static MemoryContext fscxt = NULL; static int newLOfd(LargeObjectDesc *lobjCookie); static void deleteLOfd(int fd); + /***************************************************************************** * File Interfaces for Large Objects *****************************************************************************/ @@ -399,7 +401,7 @@ lo_import(PG_FUNCTION_ARGS) lobjOid = lobj->id; /* - * read in from the Unix file and write to the inversion file + * read in from the filesystem and write to the inversion file */ while ((nbytes = FileRead(fd, buf, BUFSIZE)) > 0) { @@ -471,7 +473,7 @@ lo_export(PG_FUNCTION_ARGS) fnamebuf))); /* - * read in from the inversion file and write to the Unix file + * read in from the inversion file and write to the filesystem */ while ((nbytes = inv_read(lobj, buf, BUFSIZE)) > 0) { @@ -490,11 +492,11 @@ lo_export(PG_FUNCTION_ARGS) } /* - * lo_commit - - * prepares large objects for transaction commit [PA, 7/17/98] + * AtEOXact_LargeObject - + * prepares large objects for transaction commit */ void -lo_commit(bool isCommit) +AtEOXact_LargeObject(bool isCommit) { int i; MemoryContext currentContext; @@ -505,8 +507,8 @@ lo_commit(bool isCommit) currentContext = MemoryContextSwitchTo(fscxt); /* - * Clean out still-open index scans (not necessary if aborting) and - * clear cookies array so that LO fds are no longer good. + * Close LO fds and clear cookies array so that LO fds are no longer good. + * On abort we skip the close step. */ for (i = 0; i < cookies_size; i++) { @@ -514,7 +516,7 @@ lo_commit(bool isCommit) { if (isCommit) inv_close(cookies[i]); - cookies[i] = NULL; + deleteLOfd(i); } } @@ -527,8 +529,47 @@ lo_commit(bool isCommit) /* Release the LO memory context to prevent permanent memory leaks. */ MemoryContextDelete(fscxt); fscxt = NULL; + + /* Give inv_api.c a chance to clean up, too */ + close_lo_relation(isCommit); } +/* + * AtEOSubXact_LargeObject + * Take care of large objects at subtransaction commit/abort + * + * Reassign LOs created/opened during a committing subtransaction + * to the parent transaction. On abort, just close them. + */ +void +AtEOSubXact_LargeObject(bool isCommit, TransactionId myXid, + TransactionId parentXid) +{ + int i; + + if (fscxt == NULL) /* no LO operations in this xact */ + return; + + for (i = 0; i < cookies_size; i++) + { + LargeObjectDesc *lo = cookies[i]; + + if (lo != NULL && lo->xid == myXid) + { + if (isCommit) + lo->xid = parentXid; + else + { + /* + * Make sure we do not call inv_close twice if it errors out + * for some reason. Better a leak than a crash. + */ + deleteLOfd(i); + inv_close(lo); + } + } + } +} /***************************************************************************** * Support routines for this file diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c index 96de54110c..918d541e2a 100644 --- a/src/backend/storage/file/fd.c +++ b/src/backend/storage/file/fd.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/file/fd.c,v 1.109 2004/05/31 03:48:04 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/storage/file/fd.c,v 1.110 2004/07/28 14:23:28 tgl Exp $ * * NOTES: * @@ -47,6 +47,7 @@ #include #include "miscadmin.h" +#include "access/xact.h" #include "storage/fd.h" #include "storage/ipc.h" @@ -122,6 +123,7 @@ typedef struct vfd { signed short fd; /* current FD, or VFD_CLOSED if none */ unsigned short fdstate; /* bitflags for VFD's state */ + TransactionId create_xid; /* for XACT_TEMPORARY fds, creating Xid */ File nextFree; /* link to next free VFD, if in freelist */ File lruMoreRecently; /* doubly linked recency-of-use list */ File lruLessRecently; @@ -146,27 +148,31 @@ static Size SizeVfdCache = 0; static int nfile = 0; /* - * List of stdio FILEs opened with AllocateFile. + * List of stdio FILEs and DIRs opened with AllocateFile + * and AllocateDir. * - * Since we don't want to encourage heavy use of AllocateFile, it seems - * OK to put a pretty small maximum limit on the number of simultaneously - * allocated files. + * Since we don't want to encourage heavy use of AllocateFile or AllocateDir, + * it seems OK to put a pretty small maximum limit on the number of + * simultaneously allocated descs. */ -#define MAX_ALLOCATED_FILES 32 +#define MAX_ALLOCATED_DESCS 32 -static int numAllocatedFiles = 0; -static FILE *allocatedFiles[MAX_ALLOCATED_FILES]; +typedef enum { + AllocateDescFile, + AllocateDescDir +} AllocateDescKind; -/* - * List of DIRs opened with AllocateDir. - * - * Since we don't have heavy use of AllocateDir, it seems OK to put a pretty - * small maximum limit on the number of simultaneously allocated dirs. - */ -#define MAX_ALLOCATED_DIRS 10 +typedef struct { + AllocateDescKind kind; + union { + FILE *file; + DIR *dir; + } desc; + TransactionId create_xid; +} AllocateDesc; -static int numAllocatedDirs = 0; -static DIR *allocatedDirs[MAX_ALLOCATED_DIRS]; +static int numAllocatedDescs = 0; +static AllocateDesc allocatedDescs[MAX_ALLOCATED_DESCS]; /* * Number of temporary files opened during the current session; @@ -499,7 +505,7 @@ LruInsert(File file) if (FileIsNotOpen(file)) { - while (nfile + numAllocatedFiles + numAllocatedDirs >= max_safe_fds) + while (nfile + numAllocatedDescs >= max_safe_fds) { if (!ReleaseLruFile()) break; @@ -759,7 +765,7 @@ fileNameOpenFile(FileName fileName, file = AllocateVfd(); vfdP = &VfdCache[file]; - while (nfile + numAllocatedFiles + numAllocatedDirs >= max_safe_fds) + while (nfile + numAllocatedDescs >= max_safe_fds) { if (!ReleaseLruFile()) break; @@ -876,7 +882,10 @@ OpenTemporaryFile(bool interXact) /* Mark it for deletion at EOXact */ if (!interXact) + { VfdCache[file].fdstate |= FD_XACT_TEMPORARY; + VfdCache[file].create_xid = GetCurrentTransactionId(); + } return file; } @@ -1134,24 +1143,29 @@ AllocateFile(char *name, char *mode) { FILE *file; - DO_DB(elog(LOG, "AllocateFile: Allocated %d", numAllocatedFiles)); + DO_DB(elog(LOG, "AllocateFile: Allocated %d (%s)", + numAllocatedDescs, name)); /* - * The test against MAX_ALLOCATED_FILES prevents us from overflowing + * The test against MAX_ALLOCATED_DESCS prevents us from overflowing * allocatedFiles[]; the test against max_safe_fds prevents AllocateFile * from hogging every one of the available FDs, which'd lead to infinite * looping. */ - if (numAllocatedFiles >= MAX_ALLOCATED_FILES || - numAllocatedFiles + numAllocatedDirs >= max_safe_fds - 1) + if (numAllocatedDescs >= MAX_ALLOCATED_DESCS || + numAllocatedDescs >= max_safe_fds - 1) elog(ERROR, "too many private files demanded"); TryAgain: if ((file = fopen(name, mode)) != NULL) { - allocatedFiles[numAllocatedFiles] = file; - numAllocatedFiles++; - return file; + AllocateDesc *desc = &allocatedDescs[numAllocatedDescs]; + + desc->kind = AllocateDescFile; + desc->desc.file = file; + desc->create_xid = GetCurrentTransactionId(); + numAllocatedDescs++; + return desc->desc.file; } if (errno == EMFILE || errno == ENFILE) @@ -1170,6 +1184,38 @@ TryAgain: return NULL; } +/* + * Free an AllocateDesc of either type. + * + * The argument *must* point into the allocatedDescs[] array. + */ +static int +FreeDesc(AllocateDesc *desc) +{ + int result; + + /* Close the underlying object */ + switch (desc->kind) + { + case AllocateDescFile: + result = fclose(desc->desc.file); + break; + case AllocateDescDir: + result = closedir(desc->desc.dir); + break; + default: + elog(ERROR, "AllocateDesc kind not recognized"); + result = 0; /* keep compiler quiet */ + break; + } + + /* Compact storage in the allocatedDescs array */ + numAllocatedDescs--; + *desc = allocatedDescs[numAllocatedDescs]; + + return result; +} + /* * Close a file returned by AllocateFile. * @@ -1181,20 +1227,19 @@ FreeFile(FILE *file) { int i; - DO_DB(elog(LOG, "FreeFile: Allocated %d", numAllocatedFiles)); + DO_DB(elog(LOG, "FreeFile: Allocated %d", numAllocatedDescs)); /* Remove file from list of allocated files, if it's present */ - for (i = numAllocatedFiles; --i >= 0;) + for (i = numAllocatedDescs; --i >= 0;) { - if (allocatedFiles[i] == file) - { - numAllocatedFiles--; - allocatedFiles[i] = allocatedFiles[numAllocatedFiles]; - break; - } + AllocateDesc *desc = &allocatedDescs[i]; + + if (desc->kind == AllocateDescFile && desc->desc.file == file) + return FreeDesc(desc); } - if (i < 0) - elog(WARNING, "file passed to FreeFile was not obtained from AllocateFile"); + + /* Only get here if someone passes us a file not in allocatedDescs */ + elog(WARNING, "file passed to FreeFile was not obtained from AllocateFile"); return fclose(file); } @@ -1213,24 +1258,29 @@ AllocateDir(const char *dirname) { DIR *dir; - DO_DB(elog(LOG, "AllocateDir: Allocated %d", numAllocatedDirs)); + DO_DB(elog(LOG, "AllocateDir: Allocated %d (%s)", + numAllocatedDescs, dirname)); /* - * The test against MAX_ALLOCATED_DIRS prevents us from overflowing - * allocatedDirs[]; the test against max_safe_fds prevents AllocateDir + * The test against MAX_ALLOCATED_DESCS prevents us from overflowing + * allocatedDescs[]; the test against max_safe_fds prevents AllocateDir * from hogging every one of the available FDs, which'd lead to infinite * looping. */ - if (numAllocatedDirs >= MAX_ALLOCATED_DIRS || - numAllocatedDirs + numAllocatedFiles >= max_safe_fds - 1) + if (numAllocatedDescs >= MAX_ALLOCATED_DESCS || + numAllocatedDescs >= max_safe_fds - 1) elog(ERROR, "too many private dirs demanded"); TryAgain: if ((dir = opendir(dirname)) != NULL) { - allocatedDirs[numAllocatedDirs] = dir; - numAllocatedDirs++; - return dir; + AllocateDesc *desc = &allocatedDescs[numAllocatedDescs]; + + desc->kind = AllocateDescDir; + desc->desc.dir = dir; + desc->create_xid = GetCurrentTransactionId(); + numAllocatedDescs++; + return desc->desc.dir; } if (errno == EMFILE || errno == ENFILE) @@ -1260,20 +1310,19 @@ FreeDir(DIR *dir) { int i; - DO_DB(elog(LOG, "FreeDir: Allocated %d", numAllocatedDirs)); + DO_DB(elog(LOG, "FreeDir: Allocated %d", numAllocatedDescs)); /* Remove dir from list of allocated dirs, if it's present */ - for (i = numAllocatedDirs; --i >= 0;) + for (i = numAllocatedDescs; --i >= 0;) { - if (allocatedDirs[i] == dir) - { - numAllocatedDirs--; - allocatedDirs[i] = allocatedDirs[numAllocatedDirs]; - break; - } + AllocateDesc *desc = &allocatedDescs[i]; + + if (desc->kind == AllocateDescDir && desc->desc.dir == dir) + return FreeDesc(desc); } - if (i < 0) - elog(WARNING, "dir passed to FreeDir was not obtained from AllocateDir"); + + /* Only get here if someone passes us a dir not in allocatedDescs */ + elog(WARNING, "dir passed to FreeDir was not obtained from AllocateDir"); return closedir(dir); } @@ -1302,6 +1351,51 @@ closeAllVfds(void) } } +/* + * AtEOSubXact_Files + * + * Take care of subtransaction commit/abort. At abort, we close temp files + * that the subtransaction may have opened. At commit, we reassign the + * files that were opened to the parent transaction. + */ +void +AtEOSubXact_Files(bool isCommit, TransactionId myXid, TransactionId parentXid) +{ + Index i; + + if (SizeVfdCache > 0) + { + Assert(FileIsNotOpen(0)); /* Make sure ring not corrupted */ + for (i = 1; i < SizeVfdCache; i++) + { + unsigned short fdstate = VfdCache[i].fdstate; + + if ((fdstate & FD_XACT_TEMPORARY) && + VfdCache[i].create_xid == myXid) + { + if (isCommit) + VfdCache[i].create_xid = parentXid; + else if (VfdCache[i].fileName != NULL) + FileClose(i); + } + } + } + + for (i = 0; i < numAllocatedDescs; i++) + { + if (allocatedDescs[i].create_xid == myXid) + { + if (isCommit) + allocatedDescs[i].create_xid = parentXid; + else + { + /* have to recheck the item after FreeDesc (ugly) */ + FreeDesc(&allocatedDescs[i--]); + } + } + } +} + /* * AtEOXact_Files * @@ -1362,11 +1456,8 @@ CleanupTempFiles(bool isProcExit) } } - while (numAllocatedFiles > 0) - FreeFile(allocatedFiles[0]); - - while (numAllocatedDirs > 0) - FreeDir(allocatedDirs[0]); + while (numAllocatedDescs > 0) + FreeDesc(&allocatedDescs[0]); } diff --git a/src/backend/storage/large_object/inv_api.c b/src/backend/storage/large_object/inv_api.c index 5f75e06e18..470dcf11aa 100644 --- a/src/backend/storage/large_object/inv_api.c +++ b/src/backend/storage/large_object/inv_api.c @@ -9,36 +9,92 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/large_object/inv_api.c,v 1.102 2003/11/29 19:51:56 pgsql Exp $ + * $PostgreSQL: pgsql/src/backend/storage/large_object/inv_api.c,v 1.103 2004/07/28 14:23:29 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" -#include -#include -#include - #include "access/genam.h" #include "access/heapam.h" -#include "access/htup.h" #include "access/tuptoaster.h" #include "catalog/catalog.h" #include "catalog/catname.h" -#include "catalog/heap.h" -#include "catalog/index.h" #include "catalog/indexing.h" -#include "catalog/pg_opclass.h" #include "catalog/pg_largeobject.h" -#include "catalog/pg_type.h" #include "commands/comment.h" #include "libpq/libpq-fs.h" -#include "miscadmin.h" #include "storage/large_object.h" -#include "storage/smgr.h" -#include "utils/builtins.h" #include "utils/fmgroids.h" #include "utils/lsyscache.h" +#include "utils/resowner.h" + + +/* + * All accesses to pg_largeobject and its index make use of a single Relation + * reference, so that we only need to open pg_relation once per transaction. + * To avoid problems when the first such reference occurs inside a + * subtransaction, we execute a slightly klugy maneuver to assign ownership of + * the Relation reference to TopTransactionResourceOwner. + */ +static Relation lo_heap_r = NULL; +static Relation lo_index_r = NULL; + + +/* + * Open pg_largeobject and its index, if not already done in current xact + */ +static void +open_lo_relation(void) +{ + ResourceOwner currentOwner; + + if (lo_heap_r && lo_index_r) + return; /* already open in current xact */ + + /* Arrange for the top xact to own these relation references */ + currentOwner = CurrentResourceOwner; + CurrentResourceOwner = TopTransactionResourceOwner; + + /* Use RowExclusiveLock since we might either read or write */ + if (lo_heap_r == NULL) + lo_heap_r = heap_openr(LargeObjectRelationName, RowExclusiveLock); + if (lo_index_r == NULL) + lo_index_r = index_openr(LargeObjectLOidPNIndex); + + CurrentResourceOwner = currentOwner; +} + +/* + * Clean up at main transaction end + */ +void +close_lo_relation(bool isCommit) +{ + if (lo_heap_r || lo_index_r) + { + /* + * Only bother to close if committing; else abort cleanup will + * handle it + */ + if (isCommit) + { + ResourceOwner currentOwner; + + currentOwner = CurrentResourceOwner; + CurrentResourceOwner = TopTransactionResourceOwner; + + if (lo_index_r) + index_close(lo_index_r); + if (lo_heap_r) + heap_close(lo_heap_r, NoLock); + + CurrentResourceOwner = currentOwner; + } + lo_heap_r = NULL; + lo_index_r = NULL; + } +} static int32 @@ -50,6 +106,7 @@ getbytealen(bytea *data) return (VARSIZE(data) - VARHDRSZ); } + /* * inv_create -- create a new large object. * @@ -92,23 +149,20 @@ inv_create(int flags) retval = (LargeObjectDesc *) palloc(sizeof(LargeObjectDesc)); retval->id = file_oid; + retval->xid = GetCurrentTransactionId(); retval->offset = 0; if (flags & INV_WRITE) { retval->flags = IFS_WRLOCK | IFS_RDLOCK; - retval->heap_r = heap_openr(LargeObjectRelationName, RowExclusiveLock); } else if (flags & INV_READ) { retval->flags = IFS_RDLOCK; - retval->heap_r = heap_openr(LargeObjectRelationName, AccessShareLock); } else elog(ERROR, "invalid flags: %d", flags); - retval->index_r = index_openr(LargeObjectLOidPNIndex); - return retval; } @@ -131,23 +185,20 @@ inv_open(Oid lobjId, int flags) retval = (LargeObjectDesc *) palloc(sizeof(LargeObjectDesc)); retval->id = lobjId; + retval->xid = GetCurrentTransactionId(); retval->offset = 0; if (flags & INV_WRITE) { retval->flags = IFS_WRLOCK | IFS_RDLOCK; - retval->heap_r = heap_openr(LargeObjectRelationName, RowExclusiveLock); } else if (flags & INV_READ) { retval->flags = IFS_RDLOCK; - retval->heap_r = heap_openr(LargeObjectRelationName, AccessShareLock); } else elog(ERROR, "invalid flags: %d", flags); - retval->index_r = index_openr(LargeObjectLOidPNIndex); - return retval; } @@ -158,13 +209,6 @@ void inv_close(LargeObjectDesc *obj_desc) { Assert(PointerIsValid(obj_desc)); - - if (obj_desc->flags & IFS_WRLOCK) - heap_close(obj_desc->heap_r, RowExclusiveLock); - else if (obj_desc->flags & IFS_RDLOCK) - heap_close(obj_desc->heap_r, AccessShareLock); - index_close(obj_desc->index_r); - pfree(obj_desc); } @@ -212,12 +256,14 @@ inv_getsize(LargeObjectDesc *obj_desc) Assert(PointerIsValid(obj_desc)); + open_lo_relation(); + ScanKeyInit(&skey[0], Anum_pg_largeobject_loid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(obj_desc->id)); - sd = index_beginscan(obj_desc->heap_r, obj_desc->index_r, + sd = index_beginscan(lo_heap_r, lo_index_r, SnapshotNow, 1, skey); /* @@ -316,6 +362,8 @@ inv_read(LargeObjectDesc *obj_desc, char *buf, int nbytes) if (nbytes <= 0) return 0; + open_lo_relation(); + ScanKeyInit(&skey[0], Anum_pg_largeobject_loid, BTEqualStrategyNumber, F_OIDEQ, @@ -326,7 +374,7 @@ inv_read(LargeObjectDesc *obj_desc, char *buf, int nbytes) BTGreaterEqualStrategyNumber, F_INT4GE, Int32GetDatum(pageno)); - sd = index_beginscan(obj_desc->heap_r, obj_desc->index_r, + sd = index_beginscan(lo_heap_r, lo_index_r, SnapshotNow, 2, skey); while ((tuple = index_getnext(sd, ForwardScanDirection)) != NULL) @@ -421,7 +469,9 @@ inv_write(LargeObjectDesc *obj_desc, char *buf, int nbytes) if (nbytes <= 0) return 0; - indstate = CatalogOpenIndexes(obj_desc->heap_r); + open_lo_relation(); + + indstate = CatalogOpenIndexes(lo_heap_r); ScanKeyInit(&skey[0], Anum_pg_largeobject_loid, @@ -433,7 +483,7 @@ inv_write(LargeObjectDesc *obj_desc, char *buf, int nbytes) BTGreaterEqualStrategyNumber, F_INT4GE, Int32GetDatum(pageno)); - sd = index_beginscan(obj_desc->heap_r, obj_desc->index_r, + sd = index_beginscan(lo_heap_r, lo_index_r, SnapshotNow, 2, skey); oldtuple = NULL; @@ -510,9 +560,9 @@ inv_write(LargeObjectDesc *obj_desc, char *buf, int nbytes) memset(replace, ' ', sizeof(replace)); values[Anum_pg_largeobject_data - 1] = PointerGetDatum(&workbuf); replace[Anum_pg_largeobject_data - 1] = 'r'; - newtup = heap_modifytuple(oldtuple, obj_desc->heap_r, + newtup = heap_modifytuple(oldtuple, lo_heap_r, values, nulls, replace); - simple_heap_update(obj_desc->heap_r, &newtup->t_self, newtup); + simple_heap_update(lo_heap_r, &newtup->t_self, newtup); CatalogIndexInsert(indstate, newtup); heap_freetuple(newtup); @@ -554,8 +604,8 @@ inv_write(LargeObjectDesc *obj_desc, char *buf, int nbytes) values[Anum_pg_largeobject_loid - 1] = ObjectIdGetDatum(obj_desc->id); values[Anum_pg_largeobject_pageno - 1] = Int32GetDatum(pageno); values[Anum_pg_largeobject_data - 1] = PointerGetDatum(&workbuf); - newtup = heap_formtuple(obj_desc->heap_r->rd_att, values, nulls); - simple_heap_insert(obj_desc->heap_r, newtup); + newtup = heap_formtuple(lo_heap_r->rd_att, values, nulls); + simple_heap_insert(lo_heap_r, newtup); CatalogIndexInsert(indstate, newtup); heap_freetuple(newtup); } diff --git a/src/backend/storage/lmgr/lmgr.c b/src/backend/storage/lmgr/lmgr.c index 176767507c..11d73c5830 100644 --- a/src/backend/storage/lmgr/lmgr.c +++ b/src/backend/storage/lmgr/lmgr.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/lmgr/lmgr.c,v 1.65 2004/07/27 05:10:58 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/storage/lmgr/lmgr.c,v 1.66 2004/07/28 14:23:29 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -137,7 +137,7 @@ LockRelation(Relation relation, LOCKMODE lockmode) tag.dbId = relation->rd_lockInfo.lockRelId.dbId; tag.objId.blkno = InvalidBlockNumber; - if (!LockAcquire(LockTableId, &tag, GetCurrentTransactionId(), + if (!LockAcquire(LockTableId, &tag, GetTopTransactionId(), lockmode, false)) elog(ERROR, "LockAcquire failed"); @@ -171,7 +171,7 @@ ConditionalLockRelation(Relation relation, LOCKMODE lockmode) tag.dbId = relation->rd_lockInfo.lockRelId.dbId; tag.objId.blkno = InvalidBlockNumber; - if (!LockAcquire(LockTableId, &tag, GetCurrentTransactionId(), + if (!LockAcquire(LockTableId, &tag, GetTopTransactionId(), lockmode, true)) return false; @@ -201,7 +201,7 @@ UnlockRelation(Relation relation, LOCKMODE lockmode) tag.dbId = relation->rd_lockInfo.lockRelId.dbId; tag.objId.blkno = InvalidBlockNumber; - LockRelease(LockTableId, &tag, GetCurrentTransactionId(), lockmode); + LockRelease(LockTableId, &tag, GetTopTransactionId(), lockmode); } /* @@ -264,7 +264,7 @@ LockPage(Relation relation, BlockNumber blkno, LOCKMODE lockmode) tag.dbId = relation->rd_lockInfo.lockRelId.dbId; tag.objId.blkno = blkno; - if (!LockAcquire(LockTableId, &tag, GetCurrentTransactionId(), + if (!LockAcquire(LockTableId, &tag, GetTopTransactionId(), lockmode, false)) elog(ERROR, "LockAcquire failed"); } @@ -285,7 +285,7 @@ ConditionalLockPage(Relation relation, BlockNumber blkno, LOCKMODE lockmode) tag.dbId = relation->rd_lockInfo.lockRelId.dbId; tag.objId.blkno = blkno; - return LockAcquire(LockTableId, &tag, GetCurrentTransactionId(), + return LockAcquire(LockTableId, &tag, GetTopTransactionId(), lockmode, true); } @@ -302,7 +302,7 @@ UnlockPage(Relation relation, BlockNumber blkno, LOCKMODE lockmode) tag.dbId = relation->rd_lockInfo.lockRelId.dbId; tag.objId.blkno = blkno; - LockRelease(LockTableId, &tag, GetCurrentTransactionId(), lockmode); + LockRelease(LockTableId, &tag, GetTopTransactionId(), lockmode); } /* @@ -343,7 +343,7 @@ void XactLockTableWait(TransactionId xid) { LOCKTAG tag; - TransactionId myxid = GetCurrentTransactionId(); + TransactionId myxid = GetTopTransactionId(); Assert(!SubTransXidsHaveCommonAncestor(xid, myxid)); diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c index e54a74fae4..946bd0c9eb 100644 --- a/src/backend/utils/cache/inval.c +++ b/src/backend/utils/cache/inval.c @@ -80,7 +80,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/cache/inval.c,v 1.63 2004/07/01 00:51:17 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/cache/inval.c,v 1.64 2004/07/28 14:23:29 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -678,7 +678,7 @@ AtEOXact_Inval(bool isCommit) } /* - * AtSubEOXact_Inval + * AtEOSubXact_Inval * Process queued-up invalidation messages at end of subtransaction. * * If isCommit, process CurrentCmdInvalidMsgs if any (there probably aren't), @@ -695,7 +695,7 @@ AtEOXact_Inval(bool isCommit) * (if aborting). */ void -AtSubEOXact_Inval(bool isCommit) +AtEOSubXact_Inval(bool isCommit) { TransInvalidationInfo *myInfo = transInvalInfo; diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index b9865462a4..22df3effc3 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -10,7 +10,7 @@ * Written by Peter Eisentraut . * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.224 2004/07/24 19:51:23 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.225 2004/07/28 14:23:29 tgl Exp $ * *-------------------------------------------------------------------- */ @@ -5436,10 +5436,15 @@ assign_log_stats(bool newval, bool doit, GucSource source) static bool assign_transaction_read_only(bool newval, bool doit, GucSource source) { - if (doit && source >= PGC_S_INTERACTIVE && IsSubTransaction()) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("cannot set transaction read only mode inside a subtransaction"))); + /* Can't go to r/w mode inside a r/o transaction */ + if (newval == false && XactReadOnly && IsSubTransaction()) + { + if (source >= PGC_S_INTERACTIVE) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot set transaction read-write mode inside a read-only transaction"))); + return false; + } return true; } diff --git a/src/backend/utils/time/tqual.c b/src/backend/utils/time/tqual.c index 446ee4b72c..d1a7179484 100644 --- a/src/backend/utils/time/tqual.c +++ b/src/backend/utils/time/tqual.c @@ -16,7 +16,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/time/tqual.c,v 1.73 2004/07/01 00:51:33 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/time/tqual.c,v 1.74 2004/07/28 14:23:30 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -118,7 +118,10 @@ HeapTupleSatisfiesItself(HeapTupleHeader tuple) /* deleting subtransaction aborted */ if (TransactionIdDidAbort(HeapTupleHeaderGetXmax(tuple))) + { + tuple->t_infomask |= HEAP_XMAX_INVALID; return true; + } Assert(TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmax(tuple))); @@ -268,7 +271,10 @@ HeapTupleSatisfiesNow(HeapTupleHeader tuple) /* deleting subtransaction aborted */ if (TransactionIdDidAbort(HeapTupleHeaderGetXmax(tuple))) + { + tuple->t_infomask |= HEAP_XMAX_INVALID; return true; + } Assert(TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmax(tuple))); @@ -452,7 +458,10 @@ HeapTupleSatisfiesUpdate(HeapTupleHeader tuple, CommandId curcid) /* deleting subtransaction aborted */ if (TransactionIdDidAbort(HeapTupleHeaderGetXmax(tuple))) + { + tuple->t_infomask |= HEAP_XMAX_INVALID; return HeapTupleMayBeUpdated; + } Assert(TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmax(tuple))); @@ -590,7 +599,10 @@ HeapTupleSatisfiesDirty(HeapTupleHeader tuple) /* deleting subtransaction aborted */ if (TransactionIdDidAbort(HeapTupleHeaderGetXmax(tuple))) + { + tuple->t_infomask |= HEAP_XMAX_INVALID; return true; + } Assert(TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmax(tuple))); @@ -732,7 +744,10 @@ HeapTupleSatisfiesSnapshot(HeapTupleHeader tuple, Snapshot snapshot) /* deleting subtransaction aborted */ /* FIXME -- is this correct w.r.t. the cmax of the tuple? */ if (TransactionIdDidAbort(HeapTupleHeaderGetXmax(tuple))) + { + tuple->t_infomask |= HEAP_XMAX_INVALID; return true; + } Assert(TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmax(tuple))); @@ -757,21 +772,36 @@ HeapTupleSatisfiesSnapshot(HeapTupleHeader tuple, Snapshot snapshot) /* * By here, the inserting transaction has committed - have to check * when... + * + * Note that the provided snapshot contains only top-level XIDs, so + * we have to convert a subxact XID to its parent for comparison. + * However, we can make first-pass range checks with the given XID, + * because a subxact with XID < xmin has surely also got a parent with + * XID < xmin, while one with XID >= xmax must belong to a parent that + * was not yet committed at the time of this snapshot. */ if (TransactionIdFollowsOrEquals(HeapTupleHeaderGetXmin(tuple), snapshot->xmin)) { - uint32 i; + TransactionId parentXid; if (TransactionIdFollowsOrEquals(HeapTupleHeaderGetXmin(tuple), snapshot->xmax)) return false; - for (i = 0; i < snapshot->xcnt; i++) + parentXid = SubTransGetTopmostTransaction(HeapTupleHeaderGetXmin(tuple)); + + if (TransactionIdFollowsOrEquals(parentXid, snapshot->xmin)) { - if (SubTransXidsHaveCommonAncestor(HeapTupleHeaderGetXmin(tuple), - snapshot->xip[i])) - return false; + uint32 i; + + /* no point in checking parentXid against xmax here */ + + for (i = 0; i < snapshot->xcnt; i++) + { + if (TransactionIdEquals(parentXid, snapshot->xip[i])) + return false; + } } } @@ -804,18 +834,31 @@ HeapTupleSatisfiesSnapshot(HeapTupleHeader tuple, Snapshot snapshot) /* * OK, the deleting transaction committed too ... but when? + * + * See notes for the similar tests on tuple xmin, above. */ - if (TransactionIdFollowsOrEquals(HeapTupleHeaderGetXmax(tuple), snapshot->xmin)) + if (TransactionIdFollowsOrEquals(HeapTupleHeaderGetXmax(tuple), + snapshot->xmin)) { - uint32 i; + TransactionId parentXid; if (TransactionIdFollowsOrEquals(HeapTupleHeaderGetXmax(tuple), snapshot->xmax)) return true; - for (i = 0; i < snapshot->xcnt; i++) + + parentXid = SubTransGetTopmostTransaction(HeapTupleHeaderGetXmax(tuple)); + + if (TransactionIdFollowsOrEquals(parentXid, snapshot->xmin)) { - if (SubTransXidsHaveCommonAncestor(HeapTupleHeaderGetXmax(tuple), snapshot->xip[i])) - return true; + uint32 i; + + /* no point in checking parentXid against xmax here */ + + for (i = 0; i < snapshot->xcnt; i++) + { + if (TransactionIdEquals(parentXid, snapshot->xip[i])) + return true; + } } } diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c index 0dfaebe38b..130fcd33f4 100644 --- a/src/bin/psql/tab-complete.c +++ b/src/bin/psql/tab-complete.c @@ -3,7 +3,7 @@ * * Copyright (c) 2000-2003, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/bin/psql/tab-complete.c,v 1.108 2004/07/27 05:11:11 tgl Exp $ + * $PostgreSQL: pgsql/src/bin/psql/tab-complete.c,v 1.109 2004/07/28 14:23:30 tgl Exp $ */ /*---------------------------------------------------------------------- @@ -722,7 +722,7 @@ psql_completion(char *text, int start, int end) else if (pg_strcasecmp(prev2_wd, "ANALYZE") == 0) COMPLETE_WITH_CONST(";"); -/* BEGIN, COMMIT, ABORT */ +/* BEGIN, END, COMMIT, ABORT */ else if (pg_strcasecmp(prev_wd, "BEGIN") == 0 || pg_strcasecmp(prev_wd, "END") == 0 || pg_strcasecmp(prev_wd, "COMMIT") == 0 || diff --git a/src/include/catalog/namespace.h b/src/include/catalog/namespace.h index 7a0cfd7099..986a26b96f 100644 --- a/src/include/catalog/namespace.h +++ b/src/include/catalog/namespace.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/catalog/namespace.h,v 1.30 2004/01/19 19:04:40 tgl Exp $ + * $PostgreSQL: pgsql/src/include/catalog/namespace.h,v 1.31 2004/07/28 14:23:30 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -91,6 +91,8 @@ extern Oid FindDefaultConversionProc(int4 for_encoding, int4 to_encoding); /* initialization & transaction cleanup code */ extern void InitializeSearchPath(void); extern void AtEOXact_Namespace(bool isCommit); +extern void AtEOSubXact_Namespace(bool isCommit, TransactionId myXid, + TransactionId parentXid); /* stuff for search_path GUC variable */ extern char *namespace_search_path; diff --git a/src/include/commands/user.h b/src/include/commands/user.h index 8dba146aa5..7a72319277 100644 --- a/src/include/commands/user.h +++ b/src/include/commands/user.h @@ -4,7 +4,7 @@ * Commands for manipulating users and groups. * * - * $PostgreSQL: pgsql/src/include/commands/user.h,v 1.22 2003/11/29 22:40:59 pgsql Exp $ + * $PostgreSQL: pgsql/src/include/commands/user.h,v 1.23 2004/07/28 14:23:31 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -32,5 +32,7 @@ extern void RenameGroup(const char *oldname, const char *newname); extern Datum update_pg_pwd_and_pg_group(PG_FUNCTION_ARGS); extern void AtEOXact_UpdatePasswordFile(bool isCommit); +extern void AtEOSubXact_UpdatePasswordFile(bool isCommit, TransactionId myXid, + TransactionId parentXid); #endif /* USER_H */ diff --git a/src/include/libpq/be-fsstubs.h b/src/include/libpq/be-fsstubs.h index 9c45876be5..b2d8b3d340 100644 --- a/src/include/libpq/be-fsstubs.h +++ b/src/include/libpq/be-fsstubs.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/libpq/be-fsstubs.h,v 1.18 2003/11/29 22:41:03 pgsql Exp $ + * $PostgreSQL: pgsql/src/include/libpq/be-fsstubs.h,v 1.19 2004/07/28 14:23:31 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -43,8 +43,10 @@ extern int lo_read(int fd, char *buf, int len); extern int lo_write(int fd, char *buf, int len); /* - * Cleanup LOs at xact commit/abort [ Pascal André ] + * Cleanup LOs at xact commit/abort */ -extern void lo_commit(bool isCommit); +extern void AtEOXact_LargeObject(bool isCommit); +extern void AtEOSubXact_LargeObject(bool isCommit, TransactionId myXid, + TransactionId parentXid); #endif /* BE_FSSTUBS_H */ diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h index 430ed5d8c7..c62f6fff76 100644 --- a/src/include/storage/fd.h +++ b/src/include/storage/fd.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/storage/fd.h,v 1.45 2004/05/31 03:48:10 tgl Exp $ + * $PostgreSQL: pgsql/src/include/storage/fd.h,v 1.46 2004/07/28 14:23:31 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -85,6 +85,8 @@ extern int BasicOpenFile(FileName fileName, int fileFlags, int fileMode); extern void set_max_safe_fds(void); extern void closeAllVfds(void); extern void AtEOXact_Files(void); +extern void AtEOSubXact_Files(bool isCommit, TransactionId myXid, + TransactionId parentXid); extern void RemovePgTempFiles(void); extern int pg_fsync(int fd); extern int pg_fdatasync(int fd); diff --git a/src/include/storage/large_object.h b/src/include/storage/large_object.h index 23118aaf5a..164d3abb5d 100644 --- a/src/include/storage/large_object.h +++ b/src/include/storage/large_object.h @@ -1,47 +1,44 @@ /*------------------------------------------------------------------------- * * large_object.h - * file of info for Postgres large objects. POSTGRES 4.2 supports + * Declarations for PostgreSQL large objects. POSTGRES 4.2 supported * zillions of large objects (internal, external, jaquith, inversion). * Now we only support inversion. * * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/storage/large_object.h,v 1.27 2003/11/29 22:41:13 pgsql Exp $ + * $PostgreSQL: pgsql/src/include/storage/large_object.h,v 1.28 2004/07/28 14:23:31 tgl Exp $ * *------------------------------------------------------------------------- */ #ifndef LARGE_OBJECT_H #define LARGE_OBJECT_H -#include "utils/rel.h" - /*---------- * Data about a currently-open large object. * * id is the logical OID of the large object + * xid is the transaction Id that opened the LO (or currently owns it) * offset is the current seek offset within the LO - * heap_r holds an open-relation reference to pg_largeobject - * index_r holds an open-relation reference to pg_largeobject_loid_pn_index + * flags contains some flag bits * - * NOTE: before 7.1, heap_r and index_r held references to the separate - * table and index of a specific large object. Now they all live in one rel. + * NOTE: before 7.1, we also had to store references to the separate table + * and index of a specific large object. Now they all live in pg_largeobject + * and are accessed via a common relation descriptor. *---------- */ typedef struct LargeObjectDesc { - Oid id; + Oid id; /* LO's identifier */ + TransactionId xid; /* owning XID */ uint32 offset; /* current seek pointer */ int flags; /* locking info, etc */ /* flag bits: */ #define IFS_RDLOCK (1 << 0) #define IFS_WRLOCK (1 << 1) - - Relation heap_r; - Relation index_r; } LargeObjectDesc; @@ -67,6 +64,7 @@ typedef struct LargeObjectDesc */ /* inversion stuff in inv_api.c */ +extern void close_lo_relation(bool isCommit); extern LargeObjectDesc *inv_create(int flags); extern LargeObjectDesc *inv_open(Oid lobjId, int flags); extern void inv_close(LargeObjectDesc *obj_desc); diff --git a/src/include/utils/inval.h b/src/include/utils/inval.h index add5ca83c7..cf12122ea5 100644 --- a/src/include/utils/inval.h +++ b/src/include/utils/inval.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/utils/inval.h,v 1.32 2004/07/01 00:51:44 tgl Exp $ + * $PostgreSQL: pgsql/src/include/utils/inval.h,v 1.33 2004/07/28 14:23:31 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -28,7 +28,7 @@ extern void AtSubStart_Inval(void); extern void AtEOXact_Inval(bool isCommit); -extern void AtSubEOXact_Inval(bool isCommit); +extern void AtEOSubXact_Inval(bool isCommit); extern void CommandEndInvalidationMessages(void);