From 667912aee649c3608e003568e4b47d95251b1c8c Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Sun, 21 Jun 2015 18:57:28 +0200 Subject: [PATCH] Improve multixact emergency autovacuum logic. Previously autovacuum was not necessarily triggered if space in the members slru got tight. The first problem was that the signalling was tied to values in the offsets slru, but members can advance much faster. Thats especially a problem if old sessions had been around that previously prevented the multixact horizon to increase. Secondly the skipping logic doesn't work if the database was restarted after autovacuum was triggered - that knowledge is not preserved across restart. This is especially a problem because it's a common panic-reaction to restart the database if it gets slow to anti-wraparound vacuums. Fix the first problem by separating the logic for members from offsets. Trigger autovacuum whenever a multixact crosses a segment boundary, as the current member offset increases in irregular values, so we can't use a simple modulo logic as for offsets. Add a stopgap for the second problem, by signalling autovacuum whenver ERRORing out because of boundaries. Discussion: 20150608163707.GD20772@alap3.anarazel.de Backpatch into 9.3, where it became more likely that multixacts wrap around. --- src/backend/access/transam/multixact.c | 65 +++++++++++++++++++------- 1 file changed, 48 insertions(+), 17 deletions(-) diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c index 4daa5ae9b0..377d0842bd 100644 --- a/src/backend/access/transam/multixact.c +++ b/src/backend/access/transam/multixact.c @@ -977,10 +977,7 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset) * Note these are pretty much the same protections in GetNewTransactionId. *---------- */ - if (!MultiXactIdPrecedes(result, MultiXactState->multiVacLimit) || - !MultiXactState->oldestOffsetKnown || - (MultiXactState->nextOffset - MultiXactState->oldestOffset - > MULTIXACT_MEMBER_SAFE_THRESHOLD)) + if (!MultiXactIdPrecedes(result, MultiXactState->multiVacLimit)) { /* * For safety's sake, we release MultiXactGenLock while sending @@ -996,19 +993,17 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset) LWLockRelease(MultiXactGenLock); - /* - * To avoid swamping the postmaster with signals, we issue the autovac - * request only once per 64K multis generated. This still gives - * plenty of chances before we get into real trouble. - */ - if (IsUnderPostmaster && (result % 65536) == 0) - SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER); - if (IsUnderPostmaster && !MultiXactIdPrecedes(result, multiStopLimit)) { char *oldest_datname = get_database_name(oldest_datoid); + /* + * Immediately kick autovacuum into action as we're already + * in ERROR territory. + */ + SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER); + /* complain even if that DB has disappeared */ if (oldest_datname) ereport(ERROR, @@ -1025,7 +1020,16 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset) errhint("Execute a database-wide VACUUM in that database.\n" "You might also need to commit or roll back old prepared transactions."))); } - else if (!MultiXactIdPrecedes(result, multiWarnLimit)) + + /* + * To avoid swamping the postmaster with signals, we issue the autovac + * request only once per 64K multis generated. This still gives + * plenty of chances before we get into real trouble. + */ + if (IsUnderPostmaster && (result % 65536) == 0) + SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER); + + if (!MultiXactIdPrecedes(result, multiWarnLimit)) { char *oldest_datname = get_database_name(oldest_datoid); @@ -1096,6 +1100,10 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset) if (MultiXactState->offsetStopLimitKnown && MultiXactOffsetWouldWrap(MultiXactState->offsetStopLimit, nextOffset, nmembers)) + { + /* see comment in the corresponding offsets wraparound case */ + SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER); + ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("multixact \"members\" limit exceeded"), @@ -1106,10 +1114,33 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset) MultiXactState->offsetStopLimit - nextOffset - 1), errhint("Execute a database-wide VACUUM in database with OID %u with reduced vacuum_multixact_freeze_min_age and vacuum_multixact_freeze_table_age settings.", MultiXactState->oldestMultiXactDB))); - else if (MultiXactState->offsetStopLimitKnown && - MultiXactOffsetWouldWrap(MultiXactState->offsetStopLimit, - nextOffset, - nmembers + MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT * OFFSET_WARN_SEGMENTS)) + } + + /* + * Check whether we should kick autovacuum into action, to prevent members + * wraparound. NB we use a much larger window to trigger autovacuum than + * just the warning limit. The warning is just a measure of last resort - + * this is in line with GetNewTransactionId's behaviour. + */ + if (!MultiXactState->oldestOffsetKnown || + (MultiXactState->nextOffset - MultiXactState->oldestOffset + > MULTIXACT_MEMBER_SAFE_THRESHOLD)) + { + /* + * To avoid swamping the postmaster with signals, we issue the autovac + * request only when crossing a segment boundary. With default + * compilation settings that's rougly after 50k members. This still + * gives plenty of chances before we get into real trouble. + */ + if ((MXOffsetToMemberPage(nextOffset) / SLRU_PAGES_PER_SEGMENT) != + (MXOffsetToMemberPage(nextOffset + nmembers) / SLRU_PAGES_PER_SEGMENT)) + SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER); + } + + if (MultiXactState->offsetStopLimitKnown && + MultiXactOffsetWouldWrap(MultiXactState->offsetStopLimit, + nextOffset, + nmembers + MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT * OFFSET_WARN_SEGMENTS)) ereport(WARNING, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("database with OID %u must be vacuumed before %d more multixact members are used",