Handle wraparound during truncation in multixact/members

In pg_multixact/members, relying on modulo-2^32 arithmetic for
wraparound handling doesn't work all that well.  Because we don't
explicitely track wraparound of the allocation counter for members, it
is possible that the "live" area exceeds 2^31 entries; trying to remove
SLRU segments that are "old" according to the original logic might lead
to removal of segments still in use.  To fix, have the truncation
routine use a tailored SlruScanDirectory callback that keeps track of
the live area in actual use; that way, when the live range exceeds 2^31
entries, the oldest segments still live will not get removed untimely.

This new SlruScanDir callback needs to take care not to remove segments
that are "in the future": if new SLRU segments appear while the
truncation is ongoing, make sure we don't remove them.  This requires
examination of shared memory state to recheck for false positives, but
testing suggests that this doesn't cause a problem.  The original coding
didn't suffer from this pitfall because segments created when truncation
is running are never considered to be removable.

Per Andres Freund's investigation of bug #8673 reported by Serge
Negodyuck.
This commit is contained in:
Alvaro Herrera 2014-01-02 18:16:54 -03:00
parent 3cff1879f8
commit 722acf51a0
3 changed files with 104 additions and 18 deletions

View File

@ -577,8 +577,13 @@ MultiXactIdSetOldestMember(void)
* another someone else could compute an OldestVisibleMXactId that
* would be after the value we are going to store when we get control
* back. Which would be wrong.
*
* Note that a shared lock is sufficient, because it's enough to stop
* someone from advancing nextMXact; and nobody else could be trying to
* write to our OldestMember entry, only reading (and we assume storing
* it is atomic.)
*/
LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
LWLockAcquire(MultiXactGenLock, LW_SHARED);
/*
* We have to beware of the possibility that nextMXact is in the
@ -1559,7 +1564,7 @@ AtEOXact_MultiXact(void)
/*
* AtPrepare_MultiXact
* Save multixact state at 2PC tranasction prepare
* Save multixact state at 2PC transaction prepare
*
* In this phase, we only store our OldestMemberMXactId value in the two-phase
* state file.
@ -2335,6 +2340,65 @@ GetOldestMultiXactId(void)
return oldestMXact;
}
/*
* SlruScanDirectory callback.
* This callback deletes segments that are outside the range determined by
* the given page numbers.
*
* Both range endpoints are exclusive (that is, segments containing any of
* those pages are kept.)
*/
typedef struct MembersLiveRange
{
int rangeStart;
int rangeEnd;
} MembersLiveRange;
static bool
SlruScanDirCbRemoveMembers(SlruCtl ctl, char *filename, int segpage,
void *data)
{
MembersLiveRange *range = (MembersLiveRange *) data;
MultiXactOffset nextOffset;
if ((segpage == range->rangeStart) ||
(segpage == range->rangeEnd))
return false; /* easy case out */
/*
* To ensure that no segment is spuriously removed, we must keep track
* of new segments added since the start of the directory scan; to do this,
* we update our end-of-range point as we run.
*
* As an optimization, we can skip looking at shared memory if we know for
* certain that the current segment must be kept. This is so because
* nextOffset never decreases, and we never increase rangeStart during any
* one run.
*/
if (!((range->rangeStart > range->rangeEnd &&
segpage > range->rangeEnd && segpage < range->rangeStart) ||
(range->rangeStart < range->rangeEnd &&
(segpage < range->rangeStart || segpage > range->rangeEnd))))
return false;
/*
* Update our idea of the end of the live range.
*/
LWLockAcquire(MultiXactGenLock, LW_SHARED);
nextOffset = MultiXactState->nextOffset;
LWLockRelease(MultiXactGenLock);
range->rangeEnd = MXOffsetToMemberPage(nextOffset);
/* Recheck the deletion condition. If it still holds, perform deletion */
if ((range->rangeStart > range->rangeEnd &&
segpage > range->rangeEnd && segpage < range->rangeStart) ||
(range->rangeStart < range->rangeEnd &&
(segpage < range->rangeStart || segpage > range->rangeEnd)))
SlruDeleteSegment(ctl, filename);
return false; /* keep going */
}
typedef struct mxtruncinfo
{
int earliestExistingPage;
@ -2376,8 +2440,10 @@ void
TruncateMultiXact(MultiXactId oldestMXact)
{
MultiXactOffset oldestOffset;
MultiXactOffset nextOffset;
mxtruncinfo trunc;
MultiXactId earliest;
MembersLiveRange range;
/*
* Note we can't just plow ahead with the truncation; it's possible that
@ -2424,9 +2490,23 @@ TruncateMultiXact(MultiXactId oldestMXact)
SimpleLruTruncate(MultiXactOffsetCtl,
MultiXactIdToOffsetPage(oldestMXact));
/* truncate MultiXactMembers and we're done */
SimpleLruTruncate(MultiXactMemberCtl,
MXOffsetToMemberPage(oldestOffset));
/*
* To truncate MultiXactMembers, we need to figure out the active page
* range and delete all files outside that range. The start point is the
* start of the segment containing the oldest offset; an end point of the
* segment containing the next offset to use is enough. The end point is
* updated as MultiXactMember gets extended concurrently, elsewhere.
*/
range.rangeStart = MXOffsetToMemberPage(oldestOffset);
range.rangeStart -= range.rangeStart % SLRU_PAGES_PER_SEGMENT;
LWLockAcquire(MultiXactGenLock, LW_SHARED);
nextOffset = MultiXactState->nextOffset;
LWLockRelease(MultiXactGenLock);
range.rangeEnd = MXOffsetToMemberPage(nextOffset);
SlruScanDirectory(MultiXactMemberCtl, SlruScanDirCbRemoveMembers, &range);
}
/*

View File

@ -1210,6 +1210,17 @@ restart:;
(void) SlruScanDirectory(ctl, SlruScanDirCbDeleteCutoff, &cutoffPage);
}
void
SlruDeleteSegment(SlruCtl ctl, char *filename)
{
char path[MAXPGPATH];
snprintf(path, MAXPGPATH, "%s/%s", ctl->Dir, filename);
ereport(DEBUG2,
(errmsg("removing file \"%s\"", path)));
unlink(path);
}
/*
* SlruScanDirectory callback
* This callback reports true if there's any segment prior to the one
@ -1235,16 +1246,10 @@ SlruScanDirCbReportPresence(SlruCtl ctl, char *filename, int segpage, void *data
static bool
SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename, int segpage, void *data)
{
char path[MAXPGPATH];
int cutoffPage = *(int *) data;
if (ctl->PagePrecedes(segpage, cutoffPage))
{
snprintf(path, MAXPGPATH, "%s/%s", ctl->Dir, filename);
ereport(DEBUG2,
(errmsg("removing file \"%s\"", path)));
unlink(path);
}
SlruDeleteSegment(ctl, filename);
return false; /* keep going */
}
@ -1256,12 +1261,7 @@ SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename, int segpage, void *data)
bool
SlruScanDirCbDeleteAll(SlruCtl ctl, char *filename, int segpage, void *data)
{
char path[MAXPGPATH];
snprintf(path, MAXPGPATH, "%s/%s", ctl->Dir, filename);
ereport(DEBUG2,
(errmsg("removing file \"%s\"", path)));
unlink(path);
SlruDeleteSegment(ctl, filename);
return false; /* keep going */
}
@ -1272,6 +1272,11 @@ SlruScanDirCbDeleteAll(SlruCtl ctl, char *filename, int segpage, void *data)
* If the callback returns true, the scan is stopped. The last return value
* from the callback is returned.
*
* The callback receives the following arguments: 1. the SlruCtl struct for the
* slru being truncated; 2. the filename being considered; 3. the page number
* for the first page of that file; 4. a pointer to the opaque data given to us
* by the caller.
*
* Note that the ordering in which the directory is scanned is not guaranteed.
*
* Note that no locking is applied.

View File

@ -150,6 +150,7 @@ extern bool SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int pageno);
typedef bool (*SlruScanCallback) (SlruCtl ctl, char *filename, int segpage,
void *data);
extern bool SlruScanDirectory(SlruCtl ctl, SlruScanCallback callback, void *data);
extern void SlruDeleteSegment(SlruCtl ctl, char *filename);
/* SlruScanDirectory public callbacks */
extern bool SlruScanDirCbReportPresence(SlruCtl ctl, char *filename,