Fix pg_upgrade failure from servers older than 9.3

When upgrading from servers of versions 9.2 and older, and MultiXactIds
have been used in the old server beyond the first page (that is, 2048
multis or more in the default 8kB-page build), pg_upgrade would set the
next multixact offset to use beyond what has been allocated in the new
cluster.  This would cause a failure the first time the new cluster
needs to use this value, because the pg_multixact/offsets/ file wouldn't
exist or wouldn't be large enough.  To fix, ensure that the transient
server instances launched by pg_upgrade extend the file as necessary.

Per report from Jesse Denardo in
CANiVXAj4c88YqipsyFQPboqMudnjcNTdB3pqe8ReXqAFQ=HXyA@mail.gmail.com
This commit is contained in:
Alvaro Herrera 2013-08-19 12:33:07 -04:00
parent 1bc5935b67
commit 78e1220104
3 changed files with 92 additions and 0 deletions

View File

@ -1722,6 +1722,46 @@ ZeroMultiXactMemberPage(int pageno, bool writeXlog)
return slotno;
}
/*
* MaybeExtendOffsetSlru
* Extend the offsets SLRU area, if necessary
*
* After a binary upgrade from <= 9.2, the pg_multixact/offset SLRU area might
* contain files that are shorter than necessary; this would occur if the old
* installation had used multixacts beyond the first page (files cannot be
* copied, because the on-disk representation is different). pg_upgrade would
* update pg_control to set the next offset value to be at that position, so
* that tuples marked as locked by such MultiXacts would be seen as visible
* without having to consult multixact. However, trying to create and use a
* new MultiXactId would result in an error because the page on which the new
* value would reside does not exist. This routine is in charge of creating
* such pages.
*/
static void
MaybeExtendOffsetSlru(void)
{
int pageno;
pageno = MultiXactIdToOffsetPage(MultiXactState->nextMXact);
LWLockAcquire(MultiXactOffsetControlLock, LW_EXCLUSIVE);
if (!SimpleLruDoesPhysicalPageExist(MultiXactOffsetCtl, pageno))
{
int slotno;
/*
* Fortunately for us, SimpleLruWritePage is already prepared to deal
* with creating a new segment file even if the page we're writing is
* not the first in it, so this is enough.
*/
slotno = ZeroMultiXactOffsetPage(pageno, false);
SimpleLruWritePage(MultiXactOffsetCtl, slotno);
}
LWLockRelease(MultiXactOffsetControlLock);
}
/*
* This must be called ONCE during postmaster or standalone-backend startup.
*
@ -1742,6 +1782,13 @@ StartupMultiXact(void)
int entryno;
int flagsoff;
/*
* During a binary upgrade, make sure that the offsets SLRU is large
* enough to contain the next value that would be created.
*/
if (IsBinaryUpgrade)
MaybeExtendOffsetSlru();
/* Clean up offsets state */
LWLockAcquire(MultiXactOffsetControlLock, LW_EXCLUSIVE);

View File

@ -563,6 +563,50 @@ SimpleLruWritePage(SlruCtl ctl, int slotno)
SlruInternalWritePage(ctl, slotno, NULL);
}
/*
* Return whether the given page exists on disk.
*
* A false return means that either the file does not exist, or that it's not
* large enough to contain the given page.
*/
bool
SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int pageno)
{
int segno = pageno / SLRU_PAGES_PER_SEGMENT;
int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
int offset = rpageno * BLCKSZ;
char path[MAXPGPATH];
int fd;
bool result;
off_t endpos;
SlruFileName(ctl, path, segno);
fd = OpenTransientFile(path, O_RDWR | PG_BINARY, S_IRUSR | S_IWUSR);
if (fd < 0)
{
/* expected: file doesn't exist */
if (errno == ENOENT)
return false;
/* report error normally */
slru_errcause = SLRU_OPEN_FAILED;
slru_errno = errno;
SlruReportIOError(ctl, pageno, 0);
}
if ((endpos = lseek(fd, 0, SEEK_END)) < 0)
{
slru_errcause = SLRU_OPEN_FAILED;
slru_errno = errno;
SlruReportIOError(ctl, pageno, 0);
}
result = endpos >= (off_t) (offset + BLCKSZ);
CloseTransientFile(fd);
return result;
}
/*
* Physical read of a (previously existing) page into a buffer slot

View File

@ -145,6 +145,7 @@ extern int SimpleLruReadPage_ReadOnly(SlruCtl ctl, int pageno,
extern void SimpleLruWritePage(SlruCtl ctl, int slotno);
extern void SimpleLruFlush(SlruCtl ctl, bool checkpoint);
extern void SimpleLruTruncate(SlruCtl ctl, int cutoffPage);
extern bool SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int pageno);
typedef bool (*SlruScanCallback) (SlruCtl ctl, char *filename, int segpage,
void *data);