Improve heuristics for compressing the KnownAssignedXids array.

Previously, we'd compress only when the active range of array entries
reached Max(4 * PROCARRAY_MAXPROCS, 2 * pArray->numKnownAssignedXids).
If max_connections is large, the first term could result in not
compressing for a long time, resulting in much wastage of cycles in
hot-standby backends scanning the array to take snapshots.  Get rid
of that term, and just bound it to 2 * pArray->numKnownAssignedXids.

That however creates the opposite risk, that we might spend too much
effort compressing.  Hence, consider compressing only once every 128
commit records.  (This frequency was chosen by benchmarking.  While
we only tried one benchmark scenario, the results seem stable over
a fairly wide range of frequencies.)

Also, force compression when processing RecoveryInfo WAL records
(which should be infrequent); the old code could perform compression
then, but would do so only after the same array-range check as for
the transaction-commit path.

Also, opportunistically run compression if the startup process is about
to wait for WAL, though not oftener than once a second.  This should
prevent cases where we waste lots of time by leaving the array
not-compressed for long intervals due to low WAL traffic.

Lastly, add a simple check to keep us from uselessly compressing
when the array storage is already compact.

Back-patch, as the performance problem is worse in pre-v14 branches
than in HEAD.

Simon Riggs and Michail Nikolaev, with help from Tom Lane and
Andres Freund.

Discussion: https://postgr.es/m/CALdSSPgahNUD_=pB_j=1zSnDBaiOtqVfzo8Ejt5J_k7qZiU1Tw@mail.gmail.com
This commit is contained in:
Tom Lane 2022-11-29 15:43:17 -05:00
parent 8b47ccb624
commit 8242752f9c
3 changed files with 111 additions and 31 deletions

View File

@ -3566,6 +3566,9 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
elog(LOG, "waiting for WAL to become available at %X/%X",
LSN_FORMAT_ARGS(RecPtr));
/* Do background tasks that might benefit us later. */
KnownAssignedTransactionIdsIdleMaintenance();
(void) WaitLatch(&XLogRecoveryCtl->recoveryWakeupLatch,
WL_LATCH_SET | WL_TIMEOUT |
WL_EXIT_ON_PM_DEATH,
@ -3832,6 +3835,9 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
streaming_reply_sent = true;
}
/* Do any background tasks that might benefit us later. */
KnownAssignedTransactionIdsIdleMaintenance();
/* Update pg_stat_recovery_prefetch before sleeping. */
XLogPrefetcherComputeStats(xlogprefetcher);

View File

@ -257,6 +257,17 @@ typedef enum GlobalVisHorizonKind
VISHORIZON_TEMP
} GlobalVisHorizonKind;
/*
* Reason codes for KnownAssignedXidsCompress().
*/
typedef enum KAXCompressReason
{
KAX_NO_SPACE, /* need to free up space at array end */
KAX_PRUNE, /* we just pruned old entries */
KAX_TRANSACTION_END, /* we just committed/removed some XIDs */
KAX_STARTUP_PROCESS_IDLE /* startup process is about to sleep */
} KAXCompressReason;
static ProcArrayStruct *procArray;
@ -336,7 +347,7 @@ static void DisplayXidCache(void);
#endif /* XIDCACHE_DEBUG */
/* Primitives for KnownAssignedXids array handling for standby */
static void KnownAssignedXidsCompress(bool force);
static void KnownAssignedXidsCompress(KAXCompressReason reason, bool haveLock);
static void KnownAssignedXidsAdd(TransactionId from_xid, TransactionId to_xid,
bool exclusive_lock);
static bool KnownAssignedXidsSearch(TransactionId xid, bool remove);
@ -4509,6 +4520,17 @@ ExpireOldKnownAssignedTransactionIds(TransactionId xid)
LWLockRelease(ProcArrayLock);
}
/*
* KnownAssignedTransactionIdsIdleMaintenance
* Opportunistically do maintenance work when the startup process
* is about to go idle.
*/
void
KnownAssignedTransactionIdsIdleMaintenance(void)
{
KnownAssignedXidsCompress(KAX_STARTUP_PROCESS_IDLE, false);
}
/*
* Private module functions to manipulate KnownAssignedXids
@ -4591,7 +4613,9 @@ ExpireOldKnownAssignedTransactionIds(TransactionId xid)
* so there is an optimal point for any workload mix. We use a heuristic to
* decide when to compress the array, though trimming also helps reduce
* frequency of compressing. The heuristic requires us to track the number of
* currently valid XIDs in the array.
* currently valid XIDs in the array (N). Except in special cases, we'll
* compress when S >= 2N. Bounding S at 2N in turn bounds the time for
* taking a snapshot to be O(N), which it would have to be anyway.
*/
@ -4599,42 +4623,91 @@ ExpireOldKnownAssignedTransactionIds(TransactionId xid)
* Compress KnownAssignedXids by shifting valid data down to the start of the
* array, removing any gaps.
*
* A compression step is forced if "force" is true, otherwise we do it
* only if a heuristic indicates it's a good time to do it.
* A compression step is forced if "reason" is KAX_NO_SPACE, otherwise
* we do it only if a heuristic indicates it's a good time to do it.
*
* Caller must hold ProcArrayLock in exclusive mode.
* Compression requires holding ProcArrayLock in exclusive mode.
* Caller must pass haveLock = true if it already holds the lock.
*/
static void
KnownAssignedXidsCompress(bool force)
KnownAssignedXidsCompress(KAXCompressReason reason, bool haveLock)
{
ProcArrayStruct *pArray = procArray;
int head,
tail;
tail,
nelements;
int compress_index;
int i;
/* no spinlock required since we hold ProcArrayLock exclusively */
/* Counters for compression heuristics */
static unsigned int transactionEndsCounter;
static TimestampTz lastCompressTs;
/* Tuning constants */
#define KAX_COMPRESS_FREQUENCY 128 /* in transactions */
#define KAX_COMPRESS_IDLE_INTERVAL 1000 /* in ms */
/*
* Since only the startup process modifies the head/tail pointers, we
* don't need a lock to read them here.
*/
head = pArray->headKnownAssignedXids;
tail = pArray->tailKnownAssignedXids;
nelements = head - tail;
if (!force)
/*
* If we can choose whether to compress, use a heuristic to avoid
* compressing too often or not often enough. "Compress" here simply
* means moving the values to the beginning of the array, so it is not as
* complex or costly as typical data compression algorithms.
*/
if (nelements == pArray->numKnownAssignedXids)
{
/*
* If we can choose how much to compress, use a heuristic to avoid
* compressing too often or not often enough.
*
* Heuristic is if we have a large enough current spread and less than
* 50% of the elements are currently in use, then compress. This
* should ensure we compress fairly infrequently. We could compress
* less often though the virtual array would spread out more and
* snapshots would become more expensive.
* When there are no gaps between head and tail, don't bother to
* compress, except in the KAX_NO_SPACE case where we must compress to
* create some space after the head.
*/
int nelements = head - tail;
if (nelements < 4 * PROCARRAY_MAXPROCS ||
nelements < 2 * pArray->numKnownAssignedXids)
if (reason != KAX_NO_SPACE)
return;
}
else if (reason == KAX_TRANSACTION_END)
{
/*
* Consider compressing only once every so many commits. Frequency
* determined by benchmarks.
*/
if ((transactionEndsCounter++) % KAX_COMPRESS_FREQUENCY != 0)
return;
/*
* Furthermore, compress only if the used part of the array is less
* than 50% full (see comments above).
*/
if (nelements < 2 * pArray->numKnownAssignedXids)
return;
}
else if (reason == KAX_STARTUP_PROCESS_IDLE)
{
/*
* We're about to go idle for lack of new WAL, so we might as well
* compress. But not too often, to avoid ProcArray lock contention
* with readers.
*/
if (lastCompressTs != 0)
{
TimestampTz compress_after;
compress_after = TimestampTzPlusMilliseconds(lastCompressTs,
KAX_COMPRESS_IDLE_INTERVAL);
if (GetCurrentTimestamp() < compress_after)
return;
}
}
/* Need to compress, so get the lock if we don't have it. */
if (!haveLock)
LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
/*
* We compress the array by reading the valid values from tail to head,
@ -4650,9 +4723,16 @@ KnownAssignedXidsCompress(bool force)
compress_index++;
}
}
Assert(compress_index == pArray->numKnownAssignedXids);
pArray->tailKnownAssignedXids = 0;
pArray->headKnownAssignedXids = compress_index;
if (!haveLock)
LWLockRelease(ProcArrayLock);
/* Update timestamp for maintenance. No need to hold lock for this. */
lastCompressTs = GetCurrentTimestamp();
}
/*
@ -4724,18 +4804,11 @@ KnownAssignedXidsAdd(TransactionId from_xid, TransactionId to_xid,
*/
if (head + nxids > pArray->maxKnownAssignedXids)
{
/* must hold lock to compress */
if (!exclusive_lock)
LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
KnownAssignedXidsCompress(true);
KnownAssignedXidsCompress(KAX_NO_SPACE, exclusive_lock);
head = pArray->headKnownAssignedXids;
/* note: we no longer care about the tail pointer */
if (!exclusive_lock)
LWLockRelease(ProcArrayLock);
/*
* If it still won't fit then we're out of memory
*/
@ -4929,7 +5002,7 @@ KnownAssignedXidsRemoveTree(TransactionId xid, int nsubxids,
KnownAssignedXidsRemove(subxids[i]);
/* Opportunistically compress the array */
KnownAssignedXidsCompress(false);
KnownAssignedXidsCompress(KAX_TRANSACTION_END, true);
}
/*
@ -5004,7 +5077,7 @@ KnownAssignedXidsRemovePreceding(TransactionId removeXid)
}
/* Opportunistically compress the array */
KnownAssignedXidsCompress(false);
KnownAssignedXidsCompress(KAX_PRUNE, true);
}
/*

View File

@ -39,6 +39,7 @@ extern void ExpireTreeKnownAssignedTransactionIds(TransactionId xid,
TransactionId max_xid);
extern void ExpireAllKnownAssignedTransactionIds(void);
extern void ExpireOldKnownAssignedTransactionIds(TransactionId xid);
extern void KnownAssignedTransactionIdsIdleMaintenance(void);
extern int GetMaxSnapshotXidCount(void);
extern int GetMaxSnapshotSubxidCount(void);