Set the metapage's pd_lower correctly in brin, gin, and spgist indexes.

Previously, these index types left the pd_lower field set to the default
SizeOfPageHeaderData, which is really a lie because it ought to point past
whatever space is being used for metadata.  The coding accidentally failed
to fail because we never told xlog.c that the metapage is of standard
format --- but that's not very good, because it impedes WAL consistency
checking, and in some cases prevents compression of full-page images.

To fix, ensure that we set pd_lower correctly, not only when creating a
metapage but whenever we write it out (these apparently redundant steps are
needed to cope with pg_upgrade'd indexes that don't yet contain the right
value).  This allows telling xlog.c that the page is of standard format.

The WAL consistency check mask functions are made to mask only if pd_lower
appears valid, which I think is likely unnecessary complication, since
any metapage appearing in a v11 WAL stream should contain valid pd_lower.
But it doesn't cost much to be paranoid.

Amit Langote, reviewed by Michael Paquier and Amit Kapila

Discussion: https://postgr.es/m/0d273805-0e9e-ec1a-cb84-d4da400b8f85@lab.ntt.co.jp
This commit is contained in:
Tom Lane 2017-11-02 17:22:08 -04:00
parent 6976a4f05f
commit 81e334ce4e
11 changed files with 125 additions and 35 deletions

View File

@ -685,7 +685,7 @@ brinbuild(Relation heap, Relation index, IndexInfo *indexInfo)
XLogBeginInsert();
XLogRegisterData((char *) &xlrec, SizeOfBrinCreateIdx);
XLogRegisterBuffer(0, meta, REGBUF_WILL_INIT);
XLogRegisterBuffer(0, meta, REGBUF_WILL_INIT | REGBUF_STANDARD);
recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_CREATE_INDEX);
@ -742,7 +742,7 @@ brinbuildempty(Relation index)
brin_metapage_init(BufferGetPage(metabuf), BrinGetPagesPerRange(index),
BRIN_CURRENT_VERSION);
MarkBufferDirty(metabuf);
log_newpage_buffer(metabuf, false);
log_newpage_buffer(metabuf, true);
END_CRIT_SECTION();
UnlockReleaseBuffer(metabuf);

View File

@ -476,7 +476,7 @@ brin_page_init(Page page, uint16 type)
}
/*
* Initialize a new BRIN index' metapage.
* Initialize a new BRIN index's metapage.
*/
void
brin_metapage_init(Page page, BlockNumber pagesPerRange, uint16 version)
@ -497,6 +497,14 @@ brin_metapage_init(Page page, BlockNumber pagesPerRange, uint16 version)
* revmap page to be created when the index is.
*/
metadata->lastRevmapPage = 0;
/*
* Set pd_lower just past the end of the metadata. This is essential,
* because without doing so, metadata will be lost if xlog.c compresses
* the page.
*/
((PageHeader) page)->pd_lower =
((char *) metadata + sizeof(BrinMetaPageData)) - (char *) page;
}
/*

View File

@ -615,7 +615,7 @@ revmap_physical_extend(BrinRevmap *revmap)
/*
* Ok, we have now locked the metapage and the target block. Re-initialize
* it as a revmap page.
* the target block as a revmap page, and update the metapage.
*/
START_CRIT_SECTION();
@ -624,6 +624,17 @@ revmap_physical_extend(BrinRevmap *revmap)
MarkBufferDirty(buf);
metadata->lastRevmapPage = mapBlk;
/*
* Set pd_lower just past the end of the metadata. This is essential,
* because without doing so, metadata will be lost if xlog.c compresses
* the page. (We must do this here because pre-v11 versions of PG did not
* set the metapage's pd_lower correctly, so a pg_upgraded index might
* contain the wrong value.)
*/
((PageHeader) metapage)->pd_lower =
((char *) metadata + sizeof(BrinMetaPageData)) - (char *) metapage;
MarkBufferDirty(revmap->rm_metaBuf);
if (RelationNeedsWAL(revmap->rm_irel))
@ -635,7 +646,7 @@ revmap_physical_extend(BrinRevmap *revmap)
XLogBeginInsert();
XLogRegisterData((char *) &xlrec, SizeOfBrinRevmapExtend);
XLogRegisterBuffer(0, revmap->rm_metaBuf, 0);
XLogRegisterBuffer(0, revmap->rm_metaBuf, REGBUF_STANDARD);
XLogRegisterBuffer(1, buf, REGBUF_WILL_INIT);

View File

@ -234,6 +234,17 @@ brin_xlog_revmap_extend(XLogReaderState *record)
metadata->lastRevmapPage = xlrec->targetBlk;
PageSetLSN(metapg, lsn);
/*
* Set pd_lower just past the end of the metadata. This is essential,
* because without doing so, metadata will be lost if xlog.c
* compresses the page. (We must do this here because pre-v11
* versions of PG did not set the metapage's pd_lower correctly, so a
* pg_upgraded index might contain the wrong value.)
*/
((PageHeader) metapg)->pd_lower =
((char *) metadata + sizeof(BrinMetaPageData)) - (char *) metapg;
MarkBufferDirty(metabuf);
}
@ -331,14 +342,20 @@ void
brin_mask(char *pagedata, BlockNumber blkno)
{
Page page = (Page) pagedata;
PageHeader pagehdr = (PageHeader) page;
mask_page_lsn_and_checksum(page);
mask_page_hint_bits(page);
if (BRIN_IS_REGULAR_PAGE(page))
/*
* Regular brin pages contain unused space which needs to be masked.
* Similarly for meta pages, but mask it only if pd_lower appears to have
* been set correctly.
*/
if (BRIN_IS_REGULAR_PAGE(page) ||
(BRIN_IS_META_PAGE(page) && pagehdr->pd_lower > SizeOfPageHeaderData))
{
/* Regular brin pages contain unused space which needs to be masked. */
mask_unused_space(page);
}
}

View File

@ -396,6 +396,16 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
MarkBufferDirty(buffer);
}
/*
* Set pd_lower just past the end of the metadata. This is essential,
* because without doing so, metadata will be lost if xlog.c compresses
* the page. (We must do this here because pre-v11 versions of PG did not
* set the metapage's pd_lower correctly, so a pg_upgraded index might
* contain the wrong value.)
*/
((PageHeader) metapage)->pd_lower =
((char *) metadata + sizeof(GinMetaPageData)) - (char *) metapage;
/*
* Write metabuffer, make xlog entry
*/
@ -407,7 +417,7 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
memcpy(&data.metadata, metadata, sizeof(GinMetaPageData));
XLogRegisterBuffer(0, metabuffer, REGBUF_WILL_INIT);
XLogRegisterBuffer(0, metabuffer, REGBUF_WILL_INIT | REGBUF_STANDARD);
XLogRegisterData((char *) &data, sizeof(ginxlogUpdateMeta));
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE);
@ -572,6 +582,16 @@ shiftList(Relation index, Buffer metabuffer, BlockNumber newHead,
metadata->nPendingHeapTuples = 0;
}
/*
* Set pd_lower just past the end of the metadata. This is essential,
* because without doing so, metadata will be lost if xlog.c
* compresses the page. (We must do this here because pre-v11
* versions of PG did not set the metapage's pd_lower correctly, so a
* pg_upgraded index might contain the wrong value.)
*/
((PageHeader) metapage)->pd_lower =
((char *) metadata + sizeof(GinMetaPageData)) - (char *) metapage;
MarkBufferDirty(metabuffer);
for (i = 0; i < data.ndeleted; i++)
@ -586,7 +606,8 @@ shiftList(Relation index, Buffer metabuffer, BlockNumber newHead,
XLogRecPtr recptr;
XLogBeginInsert();
XLogRegisterBuffer(0, metabuffer, REGBUF_WILL_INIT);
XLogRegisterBuffer(0, metabuffer,
REGBUF_WILL_INIT | REGBUF_STANDARD);
for (i = 0; i < data.ndeleted; i++)
XLogRegisterBuffer(i + 1, buffers[i], REGBUF_WILL_INIT);
@ -968,7 +989,6 @@ ginInsertCleanup(GinState *ginstate, bool full_clean,
if (fsm_vac && fill_fsm)
IndexFreeSpaceMapVacuum(index);
/* Clean up temporary space */
MemoryContextSwitchTo(oldCtx);
MemoryContextDelete(opCtx);

View File

@ -348,7 +348,7 @@ ginbuild(Relation heap, Relation index, IndexInfo *indexInfo)
Page page;
XLogBeginInsert();
XLogRegisterBuffer(0, MetaBuffer, REGBUF_WILL_INIT);
XLogRegisterBuffer(0, MetaBuffer, REGBUF_WILL_INIT | REGBUF_STANDARD);
XLogRegisterBuffer(1, RootBuffer, REGBUF_WILL_INIT);
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_INDEX);
@ -447,7 +447,7 @@ ginbuildempty(Relation index)
START_CRIT_SECTION();
GinInitMetabuffer(MetaBuffer);
MarkBufferDirty(MetaBuffer);
log_newpage_buffer(MetaBuffer, false);
log_newpage_buffer(MetaBuffer, true);
GinInitBuffer(RootBuffer, GIN_LEAF);
MarkBufferDirty(RootBuffer);
log_newpage_buffer(RootBuffer, false);

View File

@ -374,6 +374,14 @@ GinInitMetabuffer(Buffer b)
metadata->nDataPages = 0;
metadata->nEntries = 0;
metadata->ginVersion = GIN_CURRENT_VERSION;
/*
* Set pd_lower just past the end of the metadata. This is essential,
* because without doing so, metadata will be lost if xlog.c compresses
* the page.
*/
((PageHeader) page)->pd_lower =
((char *) metadata + sizeof(GinMetaPageData)) - (char *) page;
}
/*
@ -676,6 +684,16 @@ ginUpdateStats(Relation index, const GinStatsData *stats)
metadata->nDataPages = stats->nDataPages;
metadata->nEntries = stats->nEntries;
/*
* Set pd_lower just past the end of the metadata. This is essential,
* because without doing so, metadata will be lost if xlog.c compresses
* the page. (We must do this here because pre-v11 versions of PG did not
* set the metapage's pd_lower correctly, so a pg_upgraded index might
* contain the wrong value.)
*/
((PageHeader) metapage)->pd_lower =
((char *) metadata + sizeof(GinMetaPageData)) - (char *) metapage;
MarkBufferDirty(metabuffer);
if (RelationNeedsWAL(index))
@ -690,7 +708,7 @@ ginUpdateStats(Relation index, const GinStatsData *stats)
XLogBeginInsert();
XLogRegisterData((char *) &data, sizeof(ginxlogUpdateMeta));
XLogRegisterBuffer(0, metabuffer, REGBUF_WILL_INIT);
XLogRegisterBuffer(0, metabuffer, REGBUF_WILL_INIT | REGBUF_STANDARD);
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE);
PageSetLSN(metapage, recptr);

View File

@ -514,7 +514,7 @@ ginRedoUpdateMetapage(XLogReaderState *record)
Assert(BufferGetBlockNumber(metabuffer) == GIN_METAPAGE_BLKNO);
metapage = BufferGetPage(metabuffer);
GinInitPage(metapage, GIN_META, BufferGetPageSize(metabuffer));
GinInitMetabuffer(metabuffer);
memcpy(GinPageGetMeta(metapage), &data->metadata, sizeof(GinMetaPageData));
PageSetLSN(metapage, lsn);
MarkBufferDirty(metabuffer);
@ -656,7 +656,7 @@ ginRedoDeleteListPages(XLogReaderState *record)
Assert(BufferGetBlockNumber(metabuffer) == GIN_METAPAGE_BLKNO);
metapage = BufferGetPage(metabuffer);
GinInitPage(metapage, GIN_META, BufferGetPageSize(metabuffer));
GinInitMetabuffer(metabuffer);
memcpy(GinPageGetMeta(metapage), &data->metadata, sizeof(GinMetaPageData));
PageSetLSN(metapage, lsn);
@ -768,6 +768,7 @@ void
gin_mask(char *pagedata, BlockNumber blkno)
{
Page page = (Page) pagedata;
PageHeader pagehdr = (PageHeader) page;
GinPageOpaque opaque;
mask_page_lsn_and_checksum(page);
@ -776,18 +777,12 @@ gin_mask(char *pagedata, BlockNumber blkno)
mask_page_hint_bits(page);
/*
* GIN metapage doesn't use pd_lower/pd_upper. Other page types do. Hence,
* we need to apply masking for those pages.
* For a GIN_DELETED page, the page is initialized to empty. Hence, mask
* the whole page content. For other pages, mask the hole if pd_lower
* appears to have been set correctly.
*/
if (opaque->flags != GIN_META)
{
/*
* For GIN_DELETED page, the page is initialized to empty. Hence, mask
* the page content.
*/
if (opaque->flags & GIN_DELETED)
mask_page_content(page);
else
mask_unused_space(page);
}
if (opaque->flags & GIN_DELETED)
mask_page_content(page);
else if (pagehdr->pd_lower > SizeOfPageHeaderData)
mask_unused_space(page);
}

View File

@ -110,7 +110,7 @@ spgbuild(Relation heap, Relation index, IndexInfo *indexInfo)
* Replay will re-initialize the pages, so don't take full pages
* images. No other data to log.
*/
XLogRegisterBuffer(0, metabuffer, REGBUF_WILL_INIT);
XLogRegisterBuffer(0, metabuffer, REGBUF_WILL_INIT | REGBUF_STANDARD);
XLogRegisterBuffer(1, rootbuffer, REGBUF_WILL_INIT | REGBUF_STANDARD);
XLogRegisterBuffer(2, nullbuffer, REGBUF_WILL_INIT | REGBUF_STANDARD);
@ -173,7 +173,7 @@ spgbuildempty(Relation index)
smgrwrite(index->rd_smgr, INIT_FORKNUM, SPGIST_METAPAGE_BLKNO,
(char *) page, true);
log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
SPGIST_METAPAGE_BLKNO, page, false);
SPGIST_METAPAGE_BLKNO, page, true);
/* Likewise for the root page. */
SpGistInitPage(page, SPGIST_LEAF);

View File

@ -256,15 +256,27 @@ SpGistUpdateMetaPage(Relation index)
if (cache != NULL)
{
Buffer metabuffer;
SpGistMetaPageData *metadata;
metabuffer = ReadBuffer(index, SPGIST_METAPAGE_BLKNO);
if (ConditionalLockBuffer(metabuffer))
{
metadata = SpGistPageGetMeta(BufferGetPage(metabuffer));
Page metapage = BufferGetPage(metabuffer);
SpGistMetaPageData *metadata = SpGistPageGetMeta(metapage);
metadata->lastUsedPages = cache->lastUsedPages;
/*
* Set pd_lower just past the end of the metadata. This is
* essential, because without doing so, metadata will be lost if
* xlog.c compresses the page. (We must do this here because
* pre-v11 versions of PG did not set the metapage's pd_lower
* correctly, so a pg_upgraded index might contain the wrong
* value.)
*/
((PageHeader) metapage)->pd_lower =
((char *) metadata + sizeof(SpGistMetaPageData)) - (char *) metapage;
MarkBufferDirty(metabuffer);
UnlockReleaseBuffer(metabuffer);
}
@ -534,6 +546,14 @@ SpGistInitMetapage(Page page)
/* initialize last-used-page cache to empty */
for (i = 0; i < SPGIST_CACHED_PAGES; i++)
metadata->lastUsedPages.cachedPage[i].blkno = InvalidBlockNumber;
/*
* Set pd_lower just past the end of the metadata. This is essential,
* because without doing so, metadata will be lost if xlog.c compresses
* the page.
*/
((PageHeader) page)->pd_lower =
((char *) metadata + sizeof(SpGistMetaPageData)) - (char *) page;
}
/*

View File

@ -1033,15 +1033,16 @@ void
spg_mask(char *pagedata, BlockNumber blkno)
{
Page page = (Page) pagedata;
PageHeader pagehdr = (PageHeader) page;
mask_page_lsn_and_checksum(page);
mask_page_hint_bits(page);
/*
* Any SpGist page other than meta contains unused space which needs to be
* masked.
* Mask the unused space, but only if the page's pd_lower appears to have
* been set correctly.
*/
if (!SpGistPageIsMeta(page))
if (pagehdr->pd_lower > SizeOfPageHeaderData)
mask_unused_space(page);
}