From 722acf51a0d074d19782ad7e97ebe3fdb63fbb87 Mon Sep 17 00:00:00 2001 From: Alvaro Herrera Date: Thu, 2 Jan 2014 18:16:54 -0300 Subject: [PATCH] Handle wraparound during truncation in multixact/members In pg_multixact/members, relying on modulo-2^32 arithmetic for wraparound handling doesn't work all that well. Because we don't explicitely track wraparound of the allocation counter for members, it is possible that the "live" area exceeds 2^31 entries; trying to remove SLRU segments that are "old" according to the original logic might lead to removal of segments still in use. To fix, have the truncation routine use a tailored SlruScanDirectory callback that keeps track of the live area in actual use; that way, when the live range exceeds 2^31 entries, the oldest segments still live will not get removed untimely. This new SlruScanDir callback needs to take care not to remove segments that are "in the future": if new SLRU segments appear while the truncation is ongoing, make sure we don't remove them. This requires examination of shared memory state to recheck for false positives, but testing suggests that this doesn't cause a problem. The original coding didn't suffer from this pitfall because segments created when truncation is running are never considered to be removable. Per Andres Freund's investigation of bug #8673 reported by Serge Negodyuck. --- src/backend/access/transam/multixact.c | 90 ++++++++++++++++++++++++-- src/backend/access/transam/slru.c | 31 +++++---- src/include/access/slru.h | 1 + 3 files changed, 104 insertions(+), 18 deletions(-) diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c index 03581bea66..60c3370ece 100644 --- a/src/backend/access/transam/multixact.c +++ b/src/backend/access/transam/multixact.c @@ -577,8 +577,13 @@ MultiXactIdSetOldestMember(void) * another someone else could compute an OldestVisibleMXactId that * would be after the value we are going to store when we get control * back. Which would be wrong. + * + * Note that a shared lock is sufficient, because it's enough to stop + * someone from advancing nextMXact; and nobody else could be trying to + * write to our OldestMember entry, only reading (and we assume storing + * it is atomic.) */ - LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE); + LWLockAcquire(MultiXactGenLock, LW_SHARED); /* * We have to beware of the possibility that nextMXact is in the @@ -1559,7 +1564,7 @@ AtEOXact_MultiXact(void) /* * AtPrepare_MultiXact - * Save multixact state at 2PC tranasction prepare + * Save multixact state at 2PC transaction prepare * * In this phase, we only store our OldestMemberMXactId value in the two-phase * state file. @@ -2335,6 +2340,65 @@ GetOldestMultiXactId(void) return oldestMXact; } +/* + * SlruScanDirectory callback. + * This callback deletes segments that are outside the range determined by + * the given page numbers. + * + * Both range endpoints are exclusive (that is, segments containing any of + * those pages are kept.) + */ +typedef struct MembersLiveRange +{ + int rangeStart; + int rangeEnd; +} MembersLiveRange; + +static bool +SlruScanDirCbRemoveMembers(SlruCtl ctl, char *filename, int segpage, + void *data) +{ + MembersLiveRange *range = (MembersLiveRange *) data; + MultiXactOffset nextOffset; + + if ((segpage == range->rangeStart) || + (segpage == range->rangeEnd)) + return false; /* easy case out */ + + /* + * To ensure that no segment is spuriously removed, we must keep track + * of new segments added since the start of the directory scan; to do this, + * we update our end-of-range point as we run. + * + * As an optimization, we can skip looking at shared memory if we know for + * certain that the current segment must be kept. This is so because + * nextOffset never decreases, and we never increase rangeStart during any + * one run. + */ + if (!((range->rangeStart > range->rangeEnd && + segpage > range->rangeEnd && segpage < range->rangeStart) || + (range->rangeStart < range->rangeEnd && + (segpage < range->rangeStart || segpage > range->rangeEnd)))) + return false; + + /* + * Update our idea of the end of the live range. + */ + LWLockAcquire(MultiXactGenLock, LW_SHARED); + nextOffset = MultiXactState->nextOffset; + LWLockRelease(MultiXactGenLock); + range->rangeEnd = MXOffsetToMemberPage(nextOffset); + + /* Recheck the deletion condition. If it still holds, perform deletion */ + if ((range->rangeStart > range->rangeEnd && + segpage > range->rangeEnd && segpage < range->rangeStart) || + (range->rangeStart < range->rangeEnd && + (segpage < range->rangeStart || segpage > range->rangeEnd))) + SlruDeleteSegment(ctl, filename); + + return false; /* keep going */ +} + typedef struct mxtruncinfo { int earliestExistingPage; @@ -2376,8 +2440,10 @@ void TruncateMultiXact(MultiXactId oldestMXact) { MultiXactOffset oldestOffset; + MultiXactOffset nextOffset; mxtruncinfo trunc; MultiXactId earliest; + MembersLiveRange range; /* * Note we can't just plow ahead with the truncation; it's possible that @@ -2424,9 +2490,23 @@ TruncateMultiXact(MultiXactId oldestMXact) SimpleLruTruncate(MultiXactOffsetCtl, MultiXactIdToOffsetPage(oldestMXact)); - /* truncate MultiXactMembers and we're done */ - SimpleLruTruncate(MultiXactMemberCtl, - MXOffsetToMemberPage(oldestOffset)); + /* + * To truncate MultiXactMembers, we need to figure out the active page + * range and delete all files outside that range. The start point is the + * start of the segment containing the oldest offset; an end point of the + * segment containing the next offset to use is enough. The end point is + * updated as MultiXactMember gets extended concurrently, elsewhere. + */ + range.rangeStart = MXOffsetToMemberPage(oldestOffset); + range.rangeStart -= range.rangeStart % SLRU_PAGES_PER_SEGMENT; + + LWLockAcquire(MultiXactGenLock, LW_SHARED); + nextOffset = MultiXactState->nextOffset; + LWLockRelease(MultiXactGenLock); + + range.rangeEnd = MXOffsetToMemberPage(nextOffset); + + SlruScanDirectory(MultiXactMemberCtl, SlruScanDirCbRemoveMembers, &range); } /* diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c index 5e53593a8f..9dc566e162 100644 --- a/src/backend/access/transam/slru.c +++ b/src/backend/access/transam/slru.c @@ -1210,6 +1210,17 @@ restart:; (void) SlruScanDirectory(ctl, SlruScanDirCbDeleteCutoff, &cutoffPage); } +void +SlruDeleteSegment(SlruCtl ctl, char *filename) +{ + char path[MAXPGPATH]; + + snprintf(path, MAXPGPATH, "%s/%s", ctl->Dir, filename); + ereport(DEBUG2, + (errmsg("removing file \"%s\"", path))); + unlink(path); +} + /* * SlruScanDirectory callback * This callback reports true if there's any segment prior to the one @@ -1235,16 +1246,10 @@ SlruScanDirCbReportPresence(SlruCtl ctl, char *filename, int segpage, void *data static bool SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename, int segpage, void *data) { - char path[MAXPGPATH]; int cutoffPage = *(int *) data; if (ctl->PagePrecedes(segpage, cutoffPage)) - { - snprintf(path, MAXPGPATH, "%s/%s", ctl->Dir, filename); - ereport(DEBUG2, - (errmsg("removing file \"%s\"", path))); - unlink(path); - } + SlruDeleteSegment(ctl, filename); return false; /* keep going */ } @@ -1256,12 +1261,7 @@ SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename, int segpage, void *data) bool SlruScanDirCbDeleteAll(SlruCtl ctl, char *filename, int segpage, void *data) { - char path[MAXPGPATH]; - - snprintf(path, MAXPGPATH, "%s/%s", ctl->Dir, filename); - ereport(DEBUG2, - (errmsg("removing file \"%s\"", path))); - unlink(path); + SlruDeleteSegment(ctl, filename); return false; /* keep going */ } @@ -1272,6 +1272,11 @@ SlruScanDirCbDeleteAll(SlruCtl ctl, char *filename, int segpage, void *data) * If the callback returns true, the scan is stopped. The last return value * from the callback is returned. * + * The callback receives the following arguments: 1. the SlruCtl struct for the + * slru being truncated; 2. the filename being considered; 3. the page number + * for the first page of that file; 4. a pointer to the opaque data given to us + * by the caller. + * * Note that the ordering in which the directory is scanned is not guaranteed. * * Note that no locking is applied. diff --git a/src/include/access/slru.h b/src/include/access/slru.h index 7e81e0f113..fc2c5035a5 100644 --- a/src/include/access/slru.h +++ b/src/include/access/slru.h @@ -150,6 +150,7 @@ extern bool SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int pageno); typedef bool (*SlruScanCallback) (SlruCtl ctl, char *filename, int segpage, void *data); extern bool SlruScanDirectory(SlruCtl ctl, SlruScanCallback callback, void *data); +extern void SlruDeleteSegment(SlruCtl ctl, char *filename); /* SlruScanDirectory public callbacks */ extern bool SlruScanDirCbReportPresence(SlruCtl ctl, char *filename,