diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c index 03581bea66..60c3370ece 100644 --- a/src/backend/access/transam/multixact.c +++ b/src/backend/access/transam/multixact.c @@ -577,8 +577,13 @@ MultiXactIdSetOldestMember(void) * another someone else could compute an OldestVisibleMXactId that * would be after the value we are going to store when we get control * back. Which would be wrong. + * + * Note that a shared lock is sufficient, because it's enough to stop + * someone from advancing nextMXact; and nobody else could be trying to + * write to our OldestMember entry, only reading (and we assume storing + * it is atomic.) */ - LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE); + LWLockAcquire(MultiXactGenLock, LW_SHARED); /* * We have to beware of the possibility that nextMXact is in the @@ -1559,7 +1564,7 @@ AtEOXact_MultiXact(void) /* * AtPrepare_MultiXact - * Save multixact state at 2PC tranasction prepare + * Save multixact state at 2PC transaction prepare * * In this phase, we only store our OldestMemberMXactId value in the two-phase * state file. @@ -2335,6 +2340,65 @@ GetOldestMultiXactId(void) return oldestMXact; } +/* + * SlruScanDirectory callback. + * This callback deletes segments that are outside the range determined by + * the given page numbers. + * + * Both range endpoints are exclusive (that is, segments containing any of + * those pages are kept.) + */ +typedef struct MembersLiveRange +{ + int rangeStart; + int rangeEnd; +} MembersLiveRange; + +static bool +SlruScanDirCbRemoveMembers(SlruCtl ctl, char *filename, int segpage, + void *data) +{ + MembersLiveRange *range = (MembersLiveRange *) data; + MultiXactOffset nextOffset; + + if ((segpage == range->rangeStart) || + (segpage == range->rangeEnd)) + return false; /* easy case out */ + + /* + * To ensure that no segment is spuriously removed, we must keep track + * of new segments added since the start of the directory scan; to do this, + * we update our end-of-range point as we run. + * + * As an optimization, we can skip looking at shared memory if we know for + * certain that the current segment must be kept. This is so because + * nextOffset never decreases, and we never increase rangeStart during any + * one run. + */ + if (!((range->rangeStart > range->rangeEnd && + segpage > range->rangeEnd && segpage < range->rangeStart) || + (range->rangeStart < range->rangeEnd && + (segpage < range->rangeStart || segpage > range->rangeEnd)))) + return false; + + /* + * Update our idea of the end of the live range. + */ + LWLockAcquire(MultiXactGenLock, LW_SHARED); + nextOffset = MultiXactState->nextOffset; + LWLockRelease(MultiXactGenLock); + range->rangeEnd = MXOffsetToMemberPage(nextOffset); + + /* Recheck the deletion condition. If it still holds, perform deletion */ + if ((range->rangeStart > range->rangeEnd && + segpage > range->rangeEnd && segpage < range->rangeStart) || + (range->rangeStart < range->rangeEnd && + (segpage < range->rangeStart || segpage > range->rangeEnd))) + SlruDeleteSegment(ctl, filename); + + return false; /* keep going */ +} + typedef struct mxtruncinfo { int earliestExistingPage; @@ -2376,8 +2440,10 @@ void TruncateMultiXact(MultiXactId oldestMXact) { MultiXactOffset oldestOffset; + MultiXactOffset nextOffset; mxtruncinfo trunc; MultiXactId earliest; + MembersLiveRange range; /* * Note we can't just plow ahead with the truncation; it's possible that @@ -2424,9 +2490,23 @@ TruncateMultiXact(MultiXactId oldestMXact) SimpleLruTruncate(MultiXactOffsetCtl, MultiXactIdToOffsetPage(oldestMXact)); - /* truncate MultiXactMembers and we're done */ - SimpleLruTruncate(MultiXactMemberCtl, - MXOffsetToMemberPage(oldestOffset)); + /* + * To truncate MultiXactMembers, we need to figure out the active page + * range and delete all files outside that range. The start point is the + * start of the segment containing the oldest offset; an end point of the + * segment containing the next offset to use is enough. The end point is + * updated as MultiXactMember gets extended concurrently, elsewhere. + */ + range.rangeStart = MXOffsetToMemberPage(oldestOffset); + range.rangeStart -= range.rangeStart % SLRU_PAGES_PER_SEGMENT; + + LWLockAcquire(MultiXactGenLock, LW_SHARED); + nextOffset = MultiXactState->nextOffset; + LWLockRelease(MultiXactGenLock); + + range.rangeEnd = MXOffsetToMemberPage(nextOffset); + + SlruScanDirectory(MultiXactMemberCtl, SlruScanDirCbRemoveMembers, &range); } /* diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c index 5e53593a8f..9dc566e162 100644 --- a/src/backend/access/transam/slru.c +++ b/src/backend/access/transam/slru.c @@ -1210,6 +1210,17 @@ restart:; (void) SlruScanDirectory(ctl, SlruScanDirCbDeleteCutoff, &cutoffPage); } +void +SlruDeleteSegment(SlruCtl ctl, char *filename) +{ + char path[MAXPGPATH]; + + snprintf(path, MAXPGPATH, "%s/%s", ctl->Dir, filename); + ereport(DEBUG2, + (errmsg("removing file \"%s\"", path))); + unlink(path); +} + /* * SlruScanDirectory callback * This callback reports true if there's any segment prior to the one @@ -1235,16 +1246,10 @@ SlruScanDirCbReportPresence(SlruCtl ctl, char *filename, int segpage, void *data static bool SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename, int segpage, void *data) { - char path[MAXPGPATH]; int cutoffPage = *(int *) data; if (ctl->PagePrecedes(segpage, cutoffPage)) - { - snprintf(path, MAXPGPATH, "%s/%s", ctl->Dir, filename); - ereport(DEBUG2, - (errmsg("removing file \"%s\"", path))); - unlink(path); - } + SlruDeleteSegment(ctl, filename); return false; /* keep going */ } @@ -1256,12 +1261,7 @@ SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename, int segpage, void *data) bool SlruScanDirCbDeleteAll(SlruCtl ctl, char *filename, int segpage, void *data) { - char path[MAXPGPATH]; - - snprintf(path, MAXPGPATH, "%s/%s", ctl->Dir, filename); - ereport(DEBUG2, - (errmsg("removing file \"%s\"", path))); - unlink(path); + SlruDeleteSegment(ctl, filename); return false; /* keep going */ } @@ -1272,6 +1272,11 @@ SlruScanDirCbDeleteAll(SlruCtl ctl, char *filename, int segpage, void *data) * If the callback returns true, the scan is stopped. The last return value * from the callback is returned. * + * The callback receives the following arguments: 1. the SlruCtl struct for the + * slru being truncated; 2. the filename being considered; 3. the page number + * for the first page of that file; 4. a pointer to the opaque data given to us + * by the caller. + * * Note that the ordering in which the directory is scanned is not guaranteed. * * Note that no locking is applied. diff --git a/src/include/access/slru.h b/src/include/access/slru.h index 7e81e0f113..fc2c5035a5 100644 --- a/src/include/access/slru.h +++ b/src/include/access/slru.h @@ -150,6 +150,7 @@ extern bool SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int pageno); typedef bool (*SlruScanCallback) (SlruCtl ctl, char *filename, int segpage, void *data); extern bool SlruScanDirectory(SlruCtl ctl, SlruScanCallback callback, void *data); +extern void SlruDeleteSegment(SlruCtl ctl, char *filename); /* SlruScanDirectory public callbacks */ extern bool SlruScanDirCbReportPresence(SlruCtl ctl, char *filename,