diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index 0709b35c04..1466606c34 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -10215,7 +10215,8 @@ SCRAM-SHA-256$<iteration count>:&l and general database objects (identified by class OID and object OID, in the same way as in pg_description or pg_depend). Also, the right to extend a - relation is represented as a separate lockable object. + relation is represented as a separate lockable object, as is the right to + update pg_database.datfrozenxid. Also, advisory locks can be taken on numbers that have user-defined meanings. @@ -10243,6 +10244,7 @@ SCRAM-SHA-256$<iteration count>:&l Type of the lockable object: relation, extend, + frozenid, page, tuple, transactionid, diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index 408ecbcae0..5243f5f346 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -1738,6 +1738,12 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser extend Waiting to extend a relation. + + frozenid + Waiting to + update pg_database.datfrozenxid + and pg_database.datminmxid. + object Waiting to acquire a lock on a non-relation database object. @@ -1906,6 +1912,11 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser NotifyQueue Waiting to read or update NOTIFY messages. + + NotifyQueueTail + Waiting to update limit on NOTIFY message + storage. + NotifySLRU Waiting to access the NOTIFY message SLRU @@ -2082,6 +2093,11 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser WALWrite Waiting for WAL buffers to be written to disk. + + WrapLimitsVacuum + Waiting to update limits on transaction id and multixact + consumption. + XactBuffer Waiting for I/O on a transaction status SLRU buffer. diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c index 61249f4a12..275058036a 100644 --- a/src/backend/access/transam/slru.c +++ b/src/backend/access/transam/slru.c @@ -1208,6 +1208,14 @@ SimpleLruFlush(SlruCtl ctl, bool allow_redirtied) /* * Remove all segments before the one holding the passed page number + * + * All SLRUs prevent concurrent calls to this function, either with an LWLock + * or by calling it only as part of a checkpoint. Mutual exclusion must begin + * before computing cutoffPage. Mutual exclusion must end after any limit + * update that would permit other backends to write fresh data into the + * segment immediately preceding the one containing cutoffPage. Otherwise, + * when the SLRU is quite full, SimpleLruTruncate() might delete that segment + * after it has accrued freshly-written data. */ void SimpleLruTruncate(SlruCtl ctl, int cutoffPage) diff --git a/src/backend/access/transam/subtrans.c b/src/backend/access/transam/subtrans.c index f33ae407a6..d9a31d3c8d 100644 --- a/src/backend/access/transam/subtrans.c +++ b/src/backend/access/transam/subtrans.c @@ -349,8 +349,8 @@ ExtendSUBTRANS(TransactionId newestXact) /* * Remove all SUBTRANS segments before the one holding the passed transaction ID * - * This is normally called during checkpoint, with oldestXact being the - * oldest TransactionXmin of any running transaction. + * oldestXact is the oldest TransactionXmin of any running transaction. This + * is called only during checkpoint. */ void TruncateSUBTRANS(TransactionId oldestXact) diff --git a/src/backend/commands/async.c b/src/backend/commands/async.c index 71b7577afc..4c1286eb98 100644 --- a/src/backend/commands/async.c +++ b/src/backend/commands/async.c @@ -244,19 +244,22 @@ typedef struct QueueBackendStatus /* * Shared memory state for LISTEN/NOTIFY (excluding its SLRU stuff) * - * The AsyncQueueControl structure is protected by the NotifyQueueLock. + * The AsyncQueueControl structure is protected by the NotifyQueueLock and + * NotifyQueueTailLock. * - * When holding the lock in SHARED mode, backends may only inspect their own - * entries as well as the head and tail pointers. Consequently we can allow a - * backend to update its own record while holding only SHARED lock (since no - * other backend will inspect it). + * When holding NotifyQueueLock in SHARED mode, backends may only inspect + * their own entries as well as the head and tail pointers. Consequently we + * can allow a backend to update its own record while holding only SHARED lock + * (since no other backend will inspect it). * - * When holding the lock in EXCLUSIVE mode, backends can inspect the entries - * of other backends and also change the head and tail pointers. + * When holding NotifyQueueLock in EXCLUSIVE mode, backends can inspect the + * entries of other backends and also change the head pointer. When holding + * both NotifyQueueLock and NotifyQueueTailLock in EXCLUSIVE mode, backends + * can change the tail pointer. * * NotifySLRULock is used as the control lock for the pg_notify SLRU buffers. - * In order to avoid deadlocks, whenever we need both locks, we always first - * get NotifyQueueLock and then NotifySLRULock. + * In order to avoid deadlocks, whenever we need multiple locks, we first get + * NotifyQueueTailLock, then NotifyQueueLock, and lastly NotifySLRULock. * * Each backend uses the backend[] array entry with index equal to its * BackendId (which can range from 1 to MaxBackends). We rely on this to make @@ -2177,6 +2180,10 @@ asyncQueueAdvanceTail(void) int newtailpage; int boundary; + /* Restrict task to one backend per cluster; see SimpleLruTruncate(). */ + LWLockAcquire(NotifyQueueTailLock, LW_EXCLUSIVE); + + /* Compute the new tail. */ LWLockAcquire(NotifyQueueLock, LW_EXCLUSIVE); min = QUEUE_HEAD; for (BackendId i = QUEUE_FIRST_LISTENER; i > 0; i = QUEUE_NEXT_LISTENER(i)) @@ -2185,7 +2192,6 @@ asyncQueueAdvanceTail(void) min = QUEUE_POS_MIN(min, QUEUE_BACKEND_POS(i)); } oldtailpage = QUEUE_POS_PAGE(QUEUE_TAIL); - QUEUE_TAIL = min; LWLockRelease(NotifyQueueLock); /* @@ -2205,6 +2211,17 @@ asyncQueueAdvanceTail(void) */ SimpleLruTruncate(NotifyCtl, newtailpage); } + + /* + * Advertise the new tail. This changes asyncQueueIsFull()'s verdict for + * the segment immediately prior to the new tail, allowing fresh data into + * that segment. + */ + LWLockAcquire(NotifyQueueLock, LW_EXCLUSIVE); + QUEUE_TAIL = min; + LWLockRelease(NotifyQueueLock); + + LWLockRelease(NotifyQueueTailLock); } /* diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index d32de23e62..92c4eb6a87 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -1344,6 +1344,14 @@ vac_update_datfrozenxid(void) bool bogus = false; bool dirty = false; + /* + * Restrict this task to one backend per database. This avoids race + * conditions that would move datfrozenxid or datminmxid backward. It + * avoids calling vac_truncate_clog() with a datfrozenxid preceding a + * datfrozenxid passed to an earlier vac_truncate_clog() call. + */ + LockDatabaseFrozenIds(ExclusiveLock); + /* * Initialize the "min" calculation with GetOldestXmin, which is a * reasonable approximation to the minimum relfrozenxid for not-yet- @@ -1533,6 +1541,9 @@ vac_truncate_clog(TransactionId frozenXID, bool bogus = false; bool frozenAlreadyWrapped = false; + /* Restrict task to one backend per cluster; see SimpleLruTruncate(). */ + LWLockAcquire(WrapLimitsVacuumLock, LW_EXCLUSIVE); + /* init oldest datoids to sync with my frozenXID/minMulti values */ oldestxid_datoid = MyDatabaseId; minmulti_datoid = MyDatabaseId; @@ -1642,6 +1653,8 @@ vac_truncate_clog(TransactionId frozenXID, */ SetTransactionIdLimit(frozenXID, oldestxid_datoid); SetMultiXactIdLimit(minMulti, minmulti_datoid, false); + + LWLockRelease(WrapLimitsVacuumLock); } diff --git a/src/backend/storage/lmgr/lmgr.c b/src/backend/storage/lmgr/lmgr.c index 2010320095..7409de9405 100644 --- a/src/backend/storage/lmgr/lmgr.c +++ b/src/backend/storage/lmgr/lmgr.c @@ -460,6 +460,21 @@ UnlockRelationForExtension(Relation relation, LOCKMODE lockmode) LockRelease(&tag, lockmode, false); } +/* + * LockDatabaseFrozenIds + * + * This allows one backend per database to execute vac_update_datfrozenxid(). + */ +void +LockDatabaseFrozenIds(LOCKMODE lockmode) +{ + LOCKTAG tag; + + SET_LOCKTAG_DATABASE_FROZEN_IDS(tag, MyDatabaseId); + + (void) LockAcquire(&tag, lockmode, false, false); +} + /* * LockPage * @@ -1098,6 +1113,11 @@ DescribeLockTag(StringInfo buf, const LOCKTAG *tag) tag->locktag_field2, tag->locktag_field1); break; + case LOCKTAG_DATABASE_FROZEN_IDS: + appendStringInfo(buf, + _("pg_database.datfrozenxid of database %u"), + tag->locktag_field1); + break; case LOCKTAG_PAGE: appendStringInfo(buf, _("page %u of relation %u of database %u"), diff --git a/src/backend/storage/lmgr/lwlocknames.txt b/src/backend/storage/lmgr/lwlocknames.txt index e6985e8eed..774292fd94 100644 --- a/src/backend/storage/lmgr/lwlocknames.txt +++ b/src/backend/storage/lmgr/lwlocknames.txt @@ -50,3 +50,6 @@ MultiXactTruncationLock 41 OldSnapshotTimeMapLock 42 LogicalRepWorkerLock 43 XactTruncationLock 44 +# 45 was XactTruncationLock until removal of BackendRandomLock +WrapLimitsVacuumLock 46 +NotifyQueueTailLock 47 diff --git a/src/backend/utils/adt/lockfuncs.c b/src/backend/utils/adt/lockfuncs.c index e992d1bbfc..f592292d06 100644 --- a/src/backend/utils/adt/lockfuncs.c +++ b/src/backend/utils/adt/lockfuncs.c @@ -29,6 +29,7 @@ const char *const LockTagTypeNames[] = { "relation", "extend", + "frozenid", "page", "tuple", "transactionid", @@ -254,6 +255,17 @@ pg_lock_status(PG_FUNCTION_ARGS) nulls[8] = true; nulls[9] = true; break; + case LOCKTAG_DATABASE_FROZEN_IDS: + values[1] = ObjectIdGetDatum(instance->locktag.locktag_field1); + nulls[2] = true; + nulls[3] = true; + nulls[4] = true; + nulls[5] = true; + nulls[6] = true; + nulls[7] = true; + nulls[8] = true; + nulls[9] = true; + break; case LOCKTAG_PAGE: values[1] = ObjectIdGetDatum(instance->locktag.locktag_field1); values[2] = ObjectIdGetDatum(instance->locktag.locktag_field2); diff --git a/src/include/storage/lmgr.h b/src/include/storage/lmgr.h index 3acc11aa5a..f7cabcbbf5 100644 --- a/src/include/storage/lmgr.h +++ b/src/include/storage/lmgr.h @@ -59,6 +59,9 @@ extern bool ConditionalLockRelationForExtension(Relation relation, LOCKMODE lockmode); extern int RelationExtensionLockWaiterCount(Relation relation); +/* Lock to recompute pg_database.datfrozenxid in the current database */ +extern void LockDatabaseFrozenIds(LOCKMODE lockmode); + /* Lock a page (currently only used within indexes) */ extern void LockPage(Relation relation, BlockNumber blkno, LOCKMODE lockmode); extern bool ConditionalLockPage(Relation relation, BlockNumber blkno, LOCKMODE lockmode); diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h index fdabf42721..1c3e9c1999 100644 --- a/src/include/storage/lock.h +++ b/src/include/storage/lock.h @@ -138,6 +138,7 @@ typedef enum LockTagType { LOCKTAG_RELATION, /* whole relation */ LOCKTAG_RELATION_EXTEND, /* the right to extend a relation */ + LOCKTAG_DATABASE_FROZEN_IDS, /* pg_database.datfrozenxid */ LOCKTAG_PAGE, /* one page of a relation */ LOCKTAG_TUPLE, /* one physical tuple */ LOCKTAG_TRANSACTION, /* transaction (for waiting for xact done) */ @@ -194,6 +195,15 @@ typedef struct LOCKTAG (locktag).locktag_type = LOCKTAG_RELATION_EXTEND, \ (locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD) +/* ID info for frozen IDs is DB OID */ +#define SET_LOCKTAG_DATABASE_FROZEN_IDS(locktag,dboid) \ + ((locktag).locktag_field1 = (dboid), \ + (locktag).locktag_field2 = 0, \ + (locktag).locktag_field3 = 0, \ + (locktag).locktag_field4 = 0, \ + (locktag).locktag_type = LOCKTAG_DATABASE_FROZEN_IDS, \ + (locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD) + /* ID info for a page is RELATION info + BlockNumber */ #define SET_LOCKTAG_PAGE(locktag,dboid,reloid,blocknum) \ ((locktag).locktag_field1 = (dboid), \