2017-03-14 18:27:02 +01:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
|
|
|
* hash_xlog.c
|
|
|
|
* WAL replay logic for hash index.
|
|
|
|
*
|
|
|
|
*
|
2023-01-02 21:00:37 +01:00
|
|
|
* Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
|
2017-03-14 18:27:02 +01:00
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
|
|
|
* src/backend/access/hash/hash_xlog.c
|
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
#include "postgres.h"
|
|
|
|
|
2017-03-14 19:58:56 +01:00
|
|
|
#include "access/bufmask.h"
|
2017-03-14 18:27:02 +01:00
|
|
|
#include "access/hash.h"
|
|
|
|
#include "access/hash_xlog.h"
|
2017-03-16 03:18:56 +01:00
|
|
|
#include "access/transam.h"
|
2019-11-12 04:00:16 +01:00
|
|
|
#include "access/xlog.h"
|
|
|
|
#include "access/xlogutils.h"
|
2017-03-16 03:18:56 +01:00
|
|
|
#include "miscadmin.h"
|
2019-11-12 04:00:16 +01:00
|
|
|
#include "storage/procarray.h"
|
2017-03-14 18:27:02 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* replay a hash index meta page
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
hash_xlog_init_meta_page(XLogReaderState *record)
|
|
|
|
{
|
|
|
|
XLogRecPtr lsn = record->EndRecPtr;
|
|
|
|
Page page;
|
|
|
|
Buffer metabuf;
|
2017-07-17 18:03:35 +02:00
|
|
|
ForkNumber forknum;
|
2017-03-14 18:27:02 +01:00
|
|
|
|
|
|
|
xl_hash_init_meta_page *xlrec = (xl_hash_init_meta_page *) XLogRecGetData(record);
|
|
|
|
|
|
|
|
/* create the index' metapage */
|
|
|
|
metabuf = XLogInitBufferForRedo(record, 0);
|
|
|
|
Assert(BufferIsValid(metabuf));
|
|
|
|
_hash_init_metabuffer(metabuf, xlrec->num_tuples, xlrec->procid,
|
|
|
|
xlrec->ffactor, true);
|
|
|
|
page = (Page) BufferGetPage(metabuf);
|
|
|
|
PageSetLSN(page, lsn);
|
|
|
|
MarkBufferDirty(metabuf);
|
2017-07-17 18:03:35 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Force the on-disk state of init forks to always be in sync with the
|
|
|
|
* state in shared buffers. See XLogReadBufferForRedoExtended. We need
|
|
|
|
* special handling for init forks as create index operations don't log a
|
|
|
|
* full page image of the metapage.
|
|
|
|
*/
|
|
|
|
XLogRecGetBlockTag(record, 0, NULL, &forknum, NULL);
|
|
|
|
if (forknum == INIT_FORKNUM)
|
|
|
|
FlushOneBuffer(metabuf);
|
|
|
|
|
2017-03-14 18:27:02 +01:00
|
|
|
/* all done */
|
|
|
|
UnlockReleaseBuffer(metabuf);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* replay a hash index bitmap page
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
hash_xlog_init_bitmap_page(XLogReaderState *record)
|
|
|
|
{
|
|
|
|
XLogRecPtr lsn = record->EndRecPtr;
|
|
|
|
Buffer bitmapbuf;
|
|
|
|
Buffer metabuf;
|
|
|
|
Page page;
|
|
|
|
HashMetaPage metap;
|
|
|
|
uint32 num_buckets;
|
2017-07-17 18:03:35 +02:00
|
|
|
ForkNumber forknum;
|
2017-03-14 18:27:02 +01:00
|
|
|
|
|
|
|
xl_hash_init_bitmap_page *xlrec = (xl_hash_init_bitmap_page *) XLogRecGetData(record);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Initialize bitmap page
|
|
|
|
*/
|
|
|
|
bitmapbuf = XLogInitBufferForRedo(record, 0);
|
|
|
|
_hash_initbitmapbuffer(bitmapbuf, xlrec->bmsize, true);
|
|
|
|
PageSetLSN(BufferGetPage(bitmapbuf), lsn);
|
|
|
|
MarkBufferDirty(bitmapbuf);
|
2017-07-17 18:03:35 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Force the on-disk state of init forks to always be in sync with the
|
|
|
|
* state in shared buffers. See XLogReadBufferForRedoExtended. We need
|
|
|
|
* special handling for init forks as create index operations don't log a
|
|
|
|
* full page image of the metapage.
|
|
|
|
*/
|
|
|
|
XLogRecGetBlockTag(record, 0, NULL, &forknum, NULL);
|
|
|
|
if (forknum == INIT_FORKNUM)
|
|
|
|
FlushOneBuffer(bitmapbuf);
|
2017-03-14 18:27:02 +01:00
|
|
|
UnlockReleaseBuffer(bitmapbuf);
|
|
|
|
|
|
|
|
/* add the new bitmap page to the metapage's list of bitmaps */
|
|
|
|
if (XLogReadBufferForRedo(record, 1, &metabuf) == BLK_NEEDS_REDO)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Note: in normal operation, we'd update the metapage while still
|
|
|
|
* holding lock on the bitmap page. But during replay it's not
|
|
|
|
* necessary to hold that lock, since nobody can see it yet; the
|
|
|
|
* creating transaction hasn't yet committed.
|
|
|
|
*/
|
|
|
|
page = BufferGetPage(metabuf);
|
|
|
|
metap = HashPageGetMeta(page);
|
|
|
|
|
|
|
|
num_buckets = metap->hashm_maxbucket + 1;
|
|
|
|
metap->hashm_mapp[metap->hashm_nmaps] = num_buckets + 1;
|
|
|
|
metap->hashm_nmaps++;
|
|
|
|
|
|
|
|
PageSetLSN(page, lsn);
|
|
|
|
MarkBufferDirty(metabuf);
|
2017-07-17 18:03:35 +02:00
|
|
|
|
|
|
|
XLogRecGetBlockTag(record, 1, NULL, &forknum, NULL);
|
|
|
|
if (forknum == INIT_FORKNUM)
|
|
|
|
FlushOneBuffer(metabuf);
|
2017-03-14 18:27:02 +01:00
|
|
|
}
|
|
|
|
if (BufferIsValid(metabuf))
|
|
|
|
UnlockReleaseBuffer(metabuf);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* replay a hash index insert without split
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
hash_xlog_insert(XLogReaderState *record)
|
|
|
|
{
|
|
|
|
HashMetaPage metap;
|
|
|
|
XLogRecPtr lsn = record->EndRecPtr;
|
|
|
|
xl_hash_insert *xlrec = (xl_hash_insert *) XLogRecGetData(record);
|
|
|
|
Buffer buffer;
|
|
|
|
Page page;
|
|
|
|
|
|
|
|
if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
|
|
|
|
{
|
|
|
|
Size datalen;
|
|
|
|
char *datapos = XLogRecGetBlockData(record, 0, &datalen);
|
|
|
|
|
|
|
|
page = BufferGetPage(buffer);
|
|
|
|
|
|
|
|
if (PageAddItem(page, (Item) datapos, datalen, xlrec->offnum,
|
|
|
|
false, false) == InvalidOffsetNumber)
|
|
|
|
elog(PANIC, "hash_xlog_insert: failed to add item");
|
|
|
|
|
|
|
|
PageSetLSN(page, lsn);
|
|
|
|
MarkBufferDirty(buffer);
|
|
|
|
}
|
|
|
|
if (BufferIsValid(buffer))
|
|
|
|
UnlockReleaseBuffer(buffer);
|
|
|
|
|
|
|
|
if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Note: in normal operation, we'd update the metapage while still
|
|
|
|
* holding lock on the page we inserted into. But during replay it's
|
|
|
|
* not necessary to hold that lock, since no other index updates can
|
|
|
|
* be happening concurrently.
|
|
|
|
*/
|
|
|
|
page = BufferGetPage(buffer);
|
|
|
|
metap = HashPageGetMeta(page);
|
|
|
|
metap->hashm_ntuples += 1;
|
|
|
|
|
|
|
|
PageSetLSN(page, lsn);
|
|
|
|
MarkBufferDirty(buffer);
|
|
|
|
}
|
|
|
|
if (BufferIsValid(buffer))
|
|
|
|
UnlockReleaseBuffer(buffer);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* replay addition of overflow page for hash index
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
hash_xlog_add_ovfl_page(XLogReaderState *record)
|
|
|
|
{
|
|
|
|
XLogRecPtr lsn = record->EndRecPtr;
|
|
|
|
xl_hash_add_ovfl_page *xlrec = (xl_hash_add_ovfl_page *) XLogRecGetData(record);
|
|
|
|
Buffer leftbuf;
|
|
|
|
Buffer ovflbuf;
|
|
|
|
Buffer metabuf;
|
|
|
|
BlockNumber leftblk;
|
|
|
|
BlockNumber rightblk;
|
|
|
|
BlockNumber newmapblk = InvalidBlockNumber;
|
|
|
|
Page ovflpage;
|
|
|
|
HashPageOpaque ovflopaque;
|
|
|
|
uint32 *num_bucket;
|
|
|
|
char *data;
|
|
|
|
Size datalen PG_USED_FOR_ASSERTS_ONLY;
|
|
|
|
bool new_bmpage = false;
|
|
|
|
|
|
|
|
XLogRecGetBlockTag(record, 0, NULL, NULL, &rightblk);
|
|
|
|
XLogRecGetBlockTag(record, 1, NULL, NULL, &leftblk);
|
|
|
|
|
|
|
|
ovflbuf = XLogInitBufferForRedo(record, 0);
|
|
|
|
Assert(BufferIsValid(ovflbuf));
|
|
|
|
|
|
|
|
data = XLogRecGetBlockData(record, 0, &datalen);
|
|
|
|
num_bucket = (uint32 *) data;
|
|
|
|
Assert(datalen == sizeof(uint32));
|
|
|
|
_hash_initbuf(ovflbuf, InvalidBlockNumber, *num_bucket, LH_OVERFLOW_PAGE,
|
|
|
|
true);
|
|
|
|
/* update backlink */
|
|
|
|
ovflpage = BufferGetPage(ovflbuf);
|
2022-04-01 06:24:50 +02:00
|
|
|
ovflopaque = HashPageGetOpaque(ovflpage);
|
2017-03-14 18:27:02 +01:00
|
|
|
ovflopaque->hasho_prevblkno = leftblk;
|
|
|
|
|
|
|
|
PageSetLSN(ovflpage, lsn);
|
|
|
|
MarkBufferDirty(ovflbuf);
|
|
|
|
|
|
|
|
if (XLogReadBufferForRedo(record, 1, &leftbuf) == BLK_NEEDS_REDO)
|
|
|
|
{
|
|
|
|
Page leftpage;
|
|
|
|
HashPageOpaque leftopaque;
|
|
|
|
|
|
|
|
leftpage = BufferGetPage(leftbuf);
|
2022-04-01 06:24:50 +02:00
|
|
|
leftopaque = HashPageGetOpaque(leftpage);
|
2017-03-14 18:27:02 +01:00
|
|
|
leftopaque->hasho_nextblkno = rightblk;
|
|
|
|
|
|
|
|
PageSetLSN(leftpage, lsn);
|
|
|
|
MarkBufferDirty(leftbuf);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (BufferIsValid(leftbuf))
|
|
|
|
UnlockReleaseBuffer(leftbuf);
|
|
|
|
UnlockReleaseBuffer(ovflbuf);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Note: in normal operation, we'd update the bitmap and meta page while
|
|
|
|
* still holding lock on the overflow pages. But during replay it's not
|
|
|
|
* necessary to hold those locks, since no other index updates can be
|
|
|
|
* happening concurrently.
|
|
|
|
*/
|
|
|
|
if (XLogRecHasBlockRef(record, 2))
|
|
|
|
{
|
|
|
|
Buffer mapbuffer;
|
|
|
|
|
|
|
|
if (XLogReadBufferForRedo(record, 2, &mapbuffer) == BLK_NEEDS_REDO)
|
|
|
|
{
|
|
|
|
Page mappage = (Page) BufferGetPage(mapbuffer);
|
|
|
|
uint32 *freep = NULL;
|
|
|
|
uint32 *bitmap_page_bit;
|
|
|
|
|
|
|
|
freep = HashPageGetBitmap(mappage);
|
|
|
|
|
|
|
|
data = XLogRecGetBlockData(record, 2, &datalen);
|
|
|
|
bitmap_page_bit = (uint32 *) data;
|
|
|
|
|
|
|
|
SETBIT(freep, *bitmap_page_bit);
|
|
|
|
|
|
|
|
PageSetLSN(mappage, lsn);
|
|
|
|
MarkBufferDirty(mapbuffer);
|
|
|
|
}
|
|
|
|
if (BufferIsValid(mapbuffer))
|
|
|
|
UnlockReleaseBuffer(mapbuffer);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (XLogRecHasBlockRef(record, 3))
|
|
|
|
{
|
|
|
|
Buffer newmapbuf;
|
|
|
|
|
|
|
|
newmapbuf = XLogInitBufferForRedo(record, 3);
|
|
|
|
|
|
|
|
_hash_initbitmapbuffer(newmapbuf, xlrec->bmsize, true);
|
|
|
|
|
|
|
|
new_bmpage = true;
|
|
|
|
newmapblk = BufferGetBlockNumber(newmapbuf);
|
|
|
|
|
|
|
|
MarkBufferDirty(newmapbuf);
|
|
|
|
PageSetLSN(BufferGetPage(newmapbuf), lsn);
|
|
|
|
|
|
|
|
UnlockReleaseBuffer(newmapbuf);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (XLogReadBufferForRedo(record, 4, &metabuf) == BLK_NEEDS_REDO)
|
|
|
|
{
|
|
|
|
HashMetaPage metap;
|
|
|
|
Page page;
|
|
|
|
uint32 *firstfree_ovflpage;
|
|
|
|
|
|
|
|
data = XLogRecGetBlockData(record, 4, &datalen);
|
|
|
|
firstfree_ovflpage = (uint32 *) data;
|
|
|
|
|
|
|
|
page = BufferGetPage(metabuf);
|
|
|
|
metap = HashPageGetMeta(page);
|
|
|
|
metap->hashm_firstfree = *firstfree_ovflpage;
|
|
|
|
|
|
|
|
if (!xlrec->bmpage_found)
|
|
|
|
{
|
|
|
|
metap->hashm_spares[metap->hashm_ovflpoint]++;
|
|
|
|
|
|
|
|
if (new_bmpage)
|
|
|
|
{
|
|
|
|
Assert(BlockNumberIsValid(newmapblk));
|
|
|
|
|
|
|
|
metap->hashm_mapp[metap->hashm_nmaps] = newmapblk;
|
|
|
|
metap->hashm_nmaps++;
|
|
|
|
metap->hashm_spares[metap->hashm_ovflpoint]++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
PageSetLSN(page, lsn);
|
|
|
|
MarkBufferDirty(metabuf);
|
|
|
|
}
|
|
|
|
if (BufferIsValid(metabuf))
|
|
|
|
UnlockReleaseBuffer(metabuf);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* replay allocation of page for split operation
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
hash_xlog_split_allocate_page(XLogReaderState *record)
|
|
|
|
{
|
|
|
|
XLogRecPtr lsn = record->EndRecPtr;
|
|
|
|
xl_hash_split_allocate_page *xlrec = (xl_hash_split_allocate_page *) XLogRecGetData(record);
|
|
|
|
Buffer oldbuf;
|
|
|
|
Buffer newbuf;
|
|
|
|
Buffer metabuf;
|
|
|
|
Size datalen PG_USED_FOR_ASSERTS_ONLY;
|
|
|
|
char *data;
|
|
|
|
XLogRedoAction action;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* To be consistent with normal operation, here we take cleanup locks on
|
|
|
|
* both the old and new buckets even though there can't be any concurrent
|
|
|
|
* inserts.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* replay the record for old bucket */
|
|
|
|
action = XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &oldbuf);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Note that we still update the page even if it was restored from a full
|
|
|
|
* page image, because the special space is not included in the image.
|
|
|
|
*/
|
|
|
|
if (action == BLK_NEEDS_REDO || action == BLK_RESTORED)
|
|
|
|
{
|
|
|
|
Page oldpage;
|
|
|
|
HashPageOpaque oldopaque;
|
|
|
|
|
|
|
|
oldpage = BufferGetPage(oldbuf);
|
2022-04-01 06:24:50 +02:00
|
|
|
oldopaque = HashPageGetOpaque(oldpage);
|
2017-03-14 18:27:02 +01:00
|
|
|
|
|
|
|
oldopaque->hasho_flag = xlrec->old_bucket_flag;
|
|
|
|
oldopaque->hasho_prevblkno = xlrec->new_bucket;
|
|
|
|
|
|
|
|
PageSetLSN(oldpage, lsn);
|
|
|
|
MarkBufferDirty(oldbuf);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* replay the record for new bucket */
|
2022-11-14 06:13:33 +01:00
|
|
|
XLogReadBufferForRedoExtended(record, 1, RBM_ZERO_AND_CLEANUP_LOCK, true,
|
|
|
|
&newbuf);
|
2017-03-14 18:27:02 +01:00
|
|
|
_hash_initbuf(newbuf, xlrec->new_bucket, xlrec->new_bucket,
|
|
|
|
xlrec->new_bucket_flag, true);
|
|
|
|
MarkBufferDirty(newbuf);
|
|
|
|
PageSetLSN(BufferGetPage(newbuf), lsn);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We can release the lock on old bucket early as well but doing here to
|
|
|
|
* consistent with normal operation.
|
|
|
|
*/
|
|
|
|
if (BufferIsValid(oldbuf))
|
|
|
|
UnlockReleaseBuffer(oldbuf);
|
|
|
|
if (BufferIsValid(newbuf))
|
|
|
|
UnlockReleaseBuffer(newbuf);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Note: in normal operation, we'd update the meta page while still
|
|
|
|
* holding lock on the old and new bucket pages. But during replay it's
|
|
|
|
* not necessary to hold those locks, since no other bucket splits can be
|
|
|
|
* happening concurrently.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* replay the record for metapage changes */
|
|
|
|
if (XLogReadBufferForRedo(record, 2, &metabuf) == BLK_NEEDS_REDO)
|
|
|
|
{
|
|
|
|
Page page;
|
|
|
|
HashMetaPage metap;
|
|
|
|
|
|
|
|
page = BufferGetPage(metabuf);
|
|
|
|
metap = HashPageGetMeta(page);
|
|
|
|
metap->hashm_maxbucket = xlrec->new_bucket;
|
|
|
|
|
|
|
|
data = XLogRecGetBlockData(record, 2, &datalen);
|
|
|
|
|
|
|
|
if (xlrec->flags & XLH_SPLIT_META_UPDATE_MASKS)
|
|
|
|
{
|
|
|
|
uint32 lowmask;
|
|
|
|
uint32 *highmask;
|
|
|
|
|
|
|
|
/* extract low and high masks. */
|
|
|
|
memcpy(&lowmask, data, sizeof(uint32));
|
|
|
|
highmask = (uint32 *) ((char *) data + sizeof(uint32));
|
|
|
|
|
|
|
|
/* update metapage */
|
|
|
|
metap->hashm_lowmask = lowmask;
|
|
|
|
metap->hashm_highmask = *highmask;
|
|
|
|
|
|
|
|
data += sizeof(uint32) * 2;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (xlrec->flags & XLH_SPLIT_META_UPDATE_SPLITPOINT)
|
|
|
|
{
|
|
|
|
uint32 ovflpoint;
|
|
|
|
uint32 *ovflpages;
|
|
|
|
|
|
|
|
/* extract information of overflow pages. */
|
|
|
|
memcpy(&ovflpoint, data, sizeof(uint32));
|
|
|
|
ovflpages = (uint32 *) ((char *) data + sizeof(uint32));
|
|
|
|
|
|
|
|
/* update metapage */
|
|
|
|
metap->hashm_spares[ovflpoint] = *ovflpages;
|
|
|
|
metap->hashm_ovflpoint = ovflpoint;
|
|
|
|
}
|
|
|
|
|
|
|
|
MarkBufferDirty(metabuf);
|
|
|
|
PageSetLSN(BufferGetPage(metabuf), lsn);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (BufferIsValid(metabuf))
|
|
|
|
UnlockReleaseBuffer(metabuf);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* replay of split operation
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
hash_xlog_split_page(XLogReaderState *record)
|
|
|
|
{
|
|
|
|
Buffer buf;
|
|
|
|
|
|
|
|
if (XLogReadBufferForRedo(record, 0, &buf) != BLK_RESTORED)
|
|
|
|
elog(ERROR, "Hash split record did not contain a full-page image");
|
|
|
|
|
|
|
|
UnlockReleaseBuffer(buf);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* replay completion of split operation
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
hash_xlog_split_complete(XLogReaderState *record)
|
|
|
|
{
|
|
|
|
XLogRecPtr lsn = record->EndRecPtr;
|
|
|
|
xl_hash_split_complete *xlrec = (xl_hash_split_complete *) XLogRecGetData(record);
|
|
|
|
Buffer oldbuf;
|
|
|
|
Buffer newbuf;
|
|
|
|
XLogRedoAction action;
|
|
|
|
|
|
|
|
/* replay the record for old bucket */
|
|
|
|
action = XLogReadBufferForRedo(record, 0, &oldbuf);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Note that we still update the page even if it was restored from a full
|
|
|
|
* page image, because the bucket flag is not included in the image.
|
|
|
|
*/
|
|
|
|
if (action == BLK_NEEDS_REDO || action == BLK_RESTORED)
|
|
|
|
{
|
|
|
|
Page oldpage;
|
|
|
|
HashPageOpaque oldopaque;
|
|
|
|
|
|
|
|
oldpage = BufferGetPage(oldbuf);
|
2022-04-01 06:24:50 +02:00
|
|
|
oldopaque = HashPageGetOpaque(oldpage);
|
2017-03-14 18:27:02 +01:00
|
|
|
|
|
|
|
oldopaque->hasho_flag = xlrec->old_bucket_flag;
|
|
|
|
|
|
|
|
PageSetLSN(oldpage, lsn);
|
|
|
|
MarkBufferDirty(oldbuf);
|
|
|
|
}
|
|
|
|
if (BufferIsValid(oldbuf))
|
|
|
|
UnlockReleaseBuffer(oldbuf);
|
|
|
|
|
|
|
|
/* replay the record for new bucket */
|
|
|
|
action = XLogReadBufferForRedo(record, 1, &newbuf);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Note that we still update the page even if it was restored from a full
|
|
|
|
* page image, because the bucket flag is not included in the image.
|
|
|
|
*/
|
|
|
|
if (action == BLK_NEEDS_REDO || action == BLK_RESTORED)
|
|
|
|
{
|
|
|
|
Page newpage;
|
|
|
|
HashPageOpaque nopaque;
|
|
|
|
|
|
|
|
newpage = BufferGetPage(newbuf);
|
2022-04-01 06:24:50 +02:00
|
|
|
nopaque = HashPageGetOpaque(newpage);
|
2017-03-14 18:27:02 +01:00
|
|
|
|
|
|
|
nopaque->hasho_flag = xlrec->new_bucket_flag;
|
|
|
|
|
|
|
|
PageSetLSN(newpage, lsn);
|
|
|
|
MarkBufferDirty(newbuf);
|
|
|
|
}
|
|
|
|
if (BufferIsValid(newbuf))
|
|
|
|
UnlockReleaseBuffer(newbuf);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* replay move of page contents for squeeze operation of hash index
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
hash_xlog_move_page_contents(XLogReaderState *record)
|
|
|
|
{
|
|
|
|
XLogRecPtr lsn = record->EndRecPtr;
|
|
|
|
xl_hash_move_page_contents *xldata = (xl_hash_move_page_contents *) XLogRecGetData(record);
|
|
|
|
Buffer bucketbuf = InvalidBuffer;
|
|
|
|
Buffer writebuf = InvalidBuffer;
|
|
|
|
Buffer deletebuf = InvalidBuffer;
|
|
|
|
XLogRedoAction action;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Ensure we have a cleanup lock on primary bucket page before we start
|
|
|
|
* with the actual replay operation. This is to ensure that neither a
|
|
|
|
* scan can start nor a scan can be already-in-progress during the replay
|
|
|
|
* of this operation. If we allow scans during this operation, then they
|
|
|
|
* can miss some records or show the same record multiple times.
|
|
|
|
*/
|
|
|
|
if (xldata->is_prim_bucket_same_wrt)
|
|
|
|
action = XLogReadBufferForRedoExtended(record, 1, RBM_NORMAL, true, &writebuf);
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* we don't care for return value as the purpose of reading bucketbuf
|
|
|
|
* is to ensure a cleanup lock on primary bucket page.
|
|
|
|
*/
|
|
|
|
(void) XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &bucketbuf);
|
|
|
|
|
|
|
|
action = XLogReadBufferForRedo(record, 1, &writebuf);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* replay the record for adding entries in overflow buffer */
|
|
|
|
if (action == BLK_NEEDS_REDO)
|
|
|
|
{
|
|
|
|
Page writepage;
|
|
|
|
char *begin;
|
|
|
|
char *data;
|
|
|
|
Size datalen;
|
|
|
|
uint16 ninserted = 0;
|
|
|
|
|
|
|
|
data = begin = XLogRecGetBlockData(record, 1, &datalen);
|
|
|
|
|
|
|
|
writepage = (Page) BufferGetPage(writebuf);
|
|
|
|
|
|
|
|
if (xldata->ntups > 0)
|
|
|
|
{
|
|
|
|
OffsetNumber *towrite = (OffsetNumber *) data;
|
|
|
|
|
|
|
|
data += sizeof(OffsetNumber) * xldata->ntups;
|
|
|
|
|
|
|
|
while (data - begin < datalen)
|
|
|
|
{
|
|
|
|
IndexTuple itup = (IndexTuple) data;
|
|
|
|
Size itemsz;
|
|
|
|
OffsetNumber l;
|
|
|
|
|
2018-03-01 01:25:54 +01:00
|
|
|
itemsz = IndexTupleSize(itup);
|
2017-03-14 18:27:02 +01:00
|
|
|
itemsz = MAXALIGN(itemsz);
|
|
|
|
|
|
|
|
data += itemsz;
|
|
|
|
|
|
|
|
l = PageAddItem(writepage, (Item) itup, itemsz, towrite[ninserted], false, false);
|
|
|
|
if (l == InvalidOffsetNumber)
|
|
|
|
elog(ERROR, "hash_xlog_move_page_contents: failed to add item to hash index page, size %d bytes",
|
|
|
|
(int) itemsz);
|
|
|
|
|
|
|
|
ninserted++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* number of tuples inserted must be same as requested in REDO record.
|
|
|
|
*/
|
|
|
|
Assert(ninserted == xldata->ntups);
|
|
|
|
|
|
|
|
PageSetLSN(writepage, lsn);
|
|
|
|
MarkBufferDirty(writebuf);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* replay the record for deleting entries from overflow buffer */
|
|
|
|
if (XLogReadBufferForRedo(record, 2, &deletebuf) == BLK_NEEDS_REDO)
|
|
|
|
{
|
|
|
|
Page page;
|
|
|
|
char *ptr;
|
|
|
|
Size len;
|
|
|
|
|
|
|
|
ptr = XLogRecGetBlockData(record, 2, &len);
|
|
|
|
|
|
|
|
page = (Page) BufferGetPage(deletebuf);
|
|
|
|
|
|
|
|
if (len > 0)
|
|
|
|
{
|
|
|
|
OffsetNumber *unused;
|
|
|
|
OffsetNumber *unend;
|
|
|
|
|
|
|
|
unused = (OffsetNumber *) ptr;
|
|
|
|
unend = (OffsetNumber *) ((char *) ptr + len);
|
|
|
|
|
|
|
|
if ((unend - unused) > 0)
|
|
|
|
PageIndexMultiDelete(page, unused, unend - unused);
|
|
|
|
}
|
|
|
|
|
|
|
|
PageSetLSN(page, lsn);
|
|
|
|
MarkBufferDirty(deletebuf);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Replay is complete, now we can release the buffers. We release locks at
|
|
|
|
* end of replay operation to ensure that we hold lock on primary bucket
|
|
|
|
* page till end of operation. We can optimize by releasing the lock on
|
|
|
|
* write buffer as soon as the operation for same is complete, if it is
|
|
|
|
* not same as primary bucket page, but that doesn't seem to be worth
|
|
|
|
* complicating the code.
|
|
|
|
*/
|
|
|
|
if (BufferIsValid(deletebuf))
|
|
|
|
UnlockReleaseBuffer(deletebuf);
|
|
|
|
|
|
|
|
if (BufferIsValid(writebuf))
|
|
|
|
UnlockReleaseBuffer(writebuf);
|
|
|
|
|
|
|
|
if (BufferIsValid(bucketbuf))
|
|
|
|
UnlockReleaseBuffer(bucketbuf);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* replay squeeze page operation of hash index
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
hash_xlog_squeeze_page(XLogReaderState *record)
|
|
|
|
{
|
|
|
|
XLogRecPtr lsn = record->EndRecPtr;
|
|
|
|
xl_hash_squeeze_page *xldata = (xl_hash_squeeze_page *) XLogRecGetData(record);
|
|
|
|
Buffer bucketbuf = InvalidBuffer;
|
2023-12-01 05:44:36 +01:00
|
|
|
Buffer writebuf = InvalidBuffer;
|
2017-03-14 18:27:02 +01:00
|
|
|
Buffer ovflbuf;
|
|
|
|
Buffer prevbuf = InvalidBuffer;
|
|
|
|
Buffer mapbuf;
|
|
|
|
XLogRedoAction action;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Ensure we have a cleanup lock on primary bucket page before we start
|
|
|
|
* with the actual replay operation. This is to ensure that neither a
|
|
|
|
* scan can start nor a scan can be already-in-progress during the replay
|
|
|
|
* of this operation. If we allow scans during this operation, then they
|
|
|
|
* can miss some records or show the same record multiple times.
|
|
|
|
*/
|
|
|
|
if (xldata->is_prim_bucket_same_wrt)
|
|
|
|
action = XLogReadBufferForRedoExtended(record, 1, RBM_NORMAL, true, &writebuf);
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* we don't care for return value as the purpose of reading bucketbuf
|
|
|
|
* is to ensure a cleanup lock on primary bucket page.
|
|
|
|
*/
|
|
|
|
(void) XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &bucketbuf);
|
|
|
|
|
2023-11-13 09:38:26 +01:00
|
|
|
if (xldata->ntups > 0 || xldata->is_prev_bucket_same_wrt)
|
|
|
|
action = XLogReadBufferForRedo(record, 1, &writebuf);
|
|
|
|
else
|
|
|
|
action = BLK_NOTFOUND;
|
2017-03-14 18:27:02 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/* replay the record for adding entries in overflow buffer */
|
|
|
|
if (action == BLK_NEEDS_REDO)
|
|
|
|
{
|
|
|
|
Page writepage;
|
|
|
|
char *begin;
|
|
|
|
char *data;
|
|
|
|
Size datalen;
|
|
|
|
uint16 ninserted = 0;
|
|
|
|
|
|
|
|
data = begin = XLogRecGetBlockData(record, 1, &datalen);
|
|
|
|
|
|
|
|
writepage = (Page) BufferGetPage(writebuf);
|
|
|
|
|
|
|
|
if (xldata->ntups > 0)
|
|
|
|
{
|
|
|
|
OffsetNumber *towrite = (OffsetNumber *) data;
|
|
|
|
|
|
|
|
data += sizeof(OffsetNumber) * xldata->ntups;
|
|
|
|
|
|
|
|
while (data - begin < datalen)
|
|
|
|
{
|
|
|
|
IndexTuple itup = (IndexTuple) data;
|
|
|
|
Size itemsz;
|
|
|
|
OffsetNumber l;
|
|
|
|
|
2018-03-01 01:25:54 +01:00
|
|
|
itemsz = IndexTupleSize(itup);
|
2017-03-14 18:27:02 +01:00
|
|
|
itemsz = MAXALIGN(itemsz);
|
|
|
|
|
|
|
|
data += itemsz;
|
|
|
|
|
|
|
|
l = PageAddItem(writepage, (Item) itup, itemsz, towrite[ninserted], false, false);
|
|
|
|
if (l == InvalidOffsetNumber)
|
|
|
|
elog(ERROR, "hash_xlog_squeeze_page: failed to add item to hash index page, size %d bytes",
|
|
|
|
(int) itemsz);
|
|
|
|
|
|
|
|
ninserted++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* number of tuples inserted must be same as requested in REDO record.
|
|
|
|
*/
|
|
|
|
Assert(ninserted == xldata->ntups);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* if the page on which are adding tuples is a page previous to freed
|
2019-08-05 05:14:58 +02:00
|
|
|
* overflow page, then update its nextblkno.
|
2017-03-14 18:27:02 +01:00
|
|
|
*/
|
|
|
|
if (xldata->is_prev_bucket_same_wrt)
|
|
|
|
{
|
2022-04-01 06:24:50 +02:00
|
|
|
HashPageOpaque writeopaque = HashPageGetOpaque(writepage);
|
2017-03-14 18:27:02 +01:00
|
|
|
|
|
|
|
writeopaque->hasho_nextblkno = xldata->nextblkno;
|
|
|
|
}
|
|
|
|
|
|
|
|
PageSetLSN(writepage, lsn);
|
|
|
|
MarkBufferDirty(writebuf);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* replay the record for initializing overflow buffer */
|
|
|
|
if (XLogReadBufferForRedo(record, 2, &ovflbuf) == BLK_NEEDS_REDO)
|
|
|
|
{
|
|
|
|
Page ovflpage;
|
2017-04-05 20:17:23 +02:00
|
|
|
HashPageOpaque ovflopaque;
|
2017-03-14 18:27:02 +01:00
|
|
|
|
|
|
|
ovflpage = BufferGetPage(ovflbuf);
|
|
|
|
|
|
|
|
_hash_pageinit(ovflpage, BufferGetPageSize(ovflbuf));
|
|
|
|
|
2022-04-01 06:24:50 +02:00
|
|
|
ovflopaque = HashPageGetOpaque(ovflpage);
|
2017-04-05 20:17:23 +02:00
|
|
|
|
|
|
|
ovflopaque->hasho_prevblkno = InvalidBlockNumber;
|
|
|
|
ovflopaque->hasho_nextblkno = InvalidBlockNumber;
|
2021-07-02 11:59:55 +02:00
|
|
|
ovflopaque->hasho_bucket = InvalidBucket;
|
2017-04-05 20:17:23 +02:00
|
|
|
ovflopaque->hasho_flag = LH_UNUSED_PAGE;
|
|
|
|
ovflopaque->hasho_page_id = HASHO_PAGE_ID;
|
|
|
|
|
2017-03-14 18:27:02 +01:00
|
|
|
PageSetLSN(ovflpage, lsn);
|
|
|
|
MarkBufferDirty(ovflbuf);
|
|
|
|
}
|
|
|
|
if (BufferIsValid(ovflbuf))
|
|
|
|
UnlockReleaseBuffer(ovflbuf);
|
|
|
|
|
|
|
|
/* replay the record for page previous to the freed overflow page */
|
|
|
|
if (!xldata->is_prev_bucket_same_wrt &&
|
|
|
|
XLogReadBufferForRedo(record, 3, &prevbuf) == BLK_NEEDS_REDO)
|
|
|
|
{
|
|
|
|
Page prevpage = BufferGetPage(prevbuf);
|
2022-04-01 06:24:50 +02:00
|
|
|
HashPageOpaque prevopaque = HashPageGetOpaque(prevpage);
|
2017-03-14 18:27:02 +01:00
|
|
|
|
|
|
|
prevopaque->hasho_nextblkno = xldata->nextblkno;
|
|
|
|
|
|
|
|
PageSetLSN(prevpage, lsn);
|
|
|
|
MarkBufferDirty(prevbuf);
|
|
|
|
}
|
|
|
|
if (BufferIsValid(prevbuf))
|
|
|
|
UnlockReleaseBuffer(prevbuf);
|
|
|
|
|
|
|
|
/* replay the record for page next to the freed overflow page */
|
|
|
|
if (XLogRecHasBlockRef(record, 4))
|
|
|
|
{
|
|
|
|
Buffer nextbuf;
|
|
|
|
|
|
|
|
if (XLogReadBufferForRedo(record, 4, &nextbuf) == BLK_NEEDS_REDO)
|
|
|
|
{
|
|
|
|
Page nextpage = BufferGetPage(nextbuf);
|
2022-04-01 06:24:50 +02:00
|
|
|
HashPageOpaque nextopaque = HashPageGetOpaque(nextpage);
|
2017-03-14 18:27:02 +01:00
|
|
|
|
|
|
|
nextopaque->hasho_prevblkno = xldata->prevblkno;
|
|
|
|
|
|
|
|
PageSetLSN(nextpage, lsn);
|
|
|
|
MarkBufferDirty(nextbuf);
|
|
|
|
}
|
|
|
|
if (BufferIsValid(nextbuf))
|
|
|
|
UnlockReleaseBuffer(nextbuf);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (BufferIsValid(writebuf))
|
|
|
|
UnlockReleaseBuffer(writebuf);
|
|
|
|
|
|
|
|
if (BufferIsValid(bucketbuf))
|
|
|
|
UnlockReleaseBuffer(bucketbuf);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Note: in normal operation, we'd update the bitmap and meta page while
|
|
|
|
* still holding lock on the primary bucket page and overflow pages. But
|
|
|
|
* during replay it's not necessary to hold those locks, since no other
|
|
|
|
* index updates can be happening concurrently.
|
|
|
|
*/
|
|
|
|
/* replay the record for bitmap page */
|
|
|
|
if (XLogReadBufferForRedo(record, 5, &mapbuf) == BLK_NEEDS_REDO)
|
|
|
|
{
|
|
|
|
Page mappage = (Page) BufferGetPage(mapbuf);
|
|
|
|
uint32 *freep = NULL;
|
|
|
|
char *data;
|
|
|
|
uint32 *bitmap_page_bit;
|
|
|
|
Size datalen;
|
|
|
|
|
|
|
|
freep = HashPageGetBitmap(mappage);
|
|
|
|
|
|
|
|
data = XLogRecGetBlockData(record, 5, &datalen);
|
|
|
|
bitmap_page_bit = (uint32 *) data;
|
|
|
|
|
|
|
|
CLRBIT(freep, *bitmap_page_bit);
|
|
|
|
|
|
|
|
PageSetLSN(mappage, lsn);
|
|
|
|
MarkBufferDirty(mapbuf);
|
|
|
|
}
|
|
|
|
if (BufferIsValid(mapbuf))
|
|
|
|
UnlockReleaseBuffer(mapbuf);
|
|
|
|
|
|
|
|
/* replay the record for meta page */
|
|
|
|
if (XLogRecHasBlockRef(record, 6))
|
|
|
|
{
|
|
|
|
Buffer metabuf;
|
|
|
|
|
|
|
|
if (XLogReadBufferForRedo(record, 6, &metabuf) == BLK_NEEDS_REDO)
|
|
|
|
{
|
|
|
|
HashMetaPage metap;
|
|
|
|
Page page;
|
|
|
|
char *data;
|
|
|
|
uint32 *firstfree_ovflpage;
|
|
|
|
Size datalen;
|
|
|
|
|
|
|
|
data = XLogRecGetBlockData(record, 6, &datalen);
|
|
|
|
firstfree_ovflpage = (uint32 *) data;
|
|
|
|
|
|
|
|
page = BufferGetPage(metabuf);
|
|
|
|
metap = HashPageGetMeta(page);
|
|
|
|
metap->hashm_firstfree = *firstfree_ovflpage;
|
|
|
|
|
|
|
|
PageSetLSN(page, lsn);
|
|
|
|
MarkBufferDirty(metabuf);
|
|
|
|
}
|
|
|
|
if (BufferIsValid(metabuf))
|
|
|
|
UnlockReleaseBuffer(metabuf);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* replay delete operation of hash index
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
hash_xlog_delete(XLogReaderState *record)
|
|
|
|
{
|
|
|
|
XLogRecPtr lsn = record->EndRecPtr;
|
|
|
|
xl_hash_delete *xldata = (xl_hash_delete *) XLogRecGetData(record);
|
|
|
|
Buffer bucketbuf = InvalidBuffer;
|
|
|
|
Buffer deletebuf;
|
|
|
|
Page page;
|
|
|
|
XLogRedoAction action;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Ensure we have a cleanup lock on primary bucket page before we start
|
|
|
|
* with the actual replay operation. This is to ensure that neither a
|
|
|
|
* scan can start nor a scan can be already-in-progress during the replay
|
|
|
|
* of this operation. If we allow scans during this operation, then they
|
|
|
|
* can miss some records or show the same record multiple times.
|
|
|
|
*/
|
|
|
|
if (xldata->is_primary_bucket_page)
|
|
|
|
action = XLogReadBufferForRedoExtended(record, 1, RBM_NORMAL, true, &deletebuf);
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* we don't care for return value as the purpose of reading bucketbuf
|
|
|
|
* is to ensure a cleanup lock on primary bucket page.
|
|
|
|
*/
|
|
|
|
(void) XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &bucketbuf);
|
|
|
|
|
|
|
|
action = XLogReadBufferForRedo(record, 1, &deletebuf);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* replay the record for deleting entries in bucket page */
|
|
|
|
if (action == BLK_NEEDS_REDO)
|
|
|
|
{
|
|
|
|
char *ptr;
|
|
|
|
Size len;
|
|
|
|
|
|
|
|
ptr = XLogRecGetBlockData(record, 1, &len);
|
|
|
|
|
|
|
|
page = (Page) BufferGetPage(deletebuf);
|
|
|
|
|
|
|
|
if (len > 0)
|
|
|
|
{
|
|
|
|
OffsetNumber *unused;
|
|
|
|
OffsetNumber *unend;
|
|
|
|
|
|
|
|
unused = (OffsetNumber *) ptr;
|
|
|
|
unend = (OffsetNumber *) ((char *) ptr + len);
|
|
|
|
|
|
|
|
if ((unend - unused) > 0)
|
|
|
|
PageIndexMultiDelete(page, unused, unend - unused);
|
|
|
|
}
|
|
|
|
|
2017-03-20 20:49:09 +01:00
|
|
|
/*
|
|
|
|
* Mark the page as not containing any LP_DEAD items only if
|
|
|
|
* clear_dead_marking flag is set to true. See comments in
|
|
|
|
* hashbucketcleanup() for details.
|
|
|
|
*/
|
|
|
|
if (xldata->clear_dead_marking)
|
|
|
|
{
|
|
|
|
HashPageOpaque pageopaque;
|
|
|
|
|
2022-04-01 06:24:50 +02:00
|
|
|
pageopaque = HashPageGetOpaque(page);
|
2017-03-20 20:49:09 +01:00
|
|
|
pageopaque->hasho_flag &= ~LH_PAGE_HAS_DEAD_TUPLES;
|
|
|
|
}
|
|
|
|
|
2017-03-14 18:27:02 +01:00
|
|
|
PageSetLSN(page, lsn);
|
|
|
|
MarkBufferDirty(deletebuf);
|
|
|
|
}
|
|
|
|
if (BufferIsValid(deletebuf))
|
|
|
|
UnlockReleaseBuffer(deletebuf);
|
|
|
|
|
|
|
|
if (BufferIsValid(bucketbuf))
|
|
|
|
UnlockReleaseBuffer(bucketbuf);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* replay split cleanup flag operation for primary bucket page.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
hash_xlog_split_cleanup(XLogReaderState *record)
|
|
|
|
{
|
|
|
|
XLogRecPtr lsn = record->EndRecPtr;
|
|
|
|
Buffer buffer;
|
|
|
|
Page page;
|
|
|
|
|
|
|
|
if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
|
|
|
|
{
|
|
|
|
HashPageOpaque bucket_opaque;
|
|
|
|
|
|
|
|
page = (Page) BufferGetPage(buffer);
|
|
|
|
|
2022-04-01 06:24:50 +02:00
|
|
|
bucket_opaque = HashPageGetOpaque(page);
|
2017-03-14 18:27:02 +01:00
|
|
|
bucket_opaque->hasho_flag &= ~LH_BUCKET_NEEDS_SPLIT_CLEANUP;
|
|
|
|
PageSetLSN(page, lsn);
|
|
|
|
MarkBufferDirty(buffer);
|
|
|
|
}
|
|
|
|
if (BufferIsValid(buffer))
|
|
|
|
UnlockReleaseBuffer(buffer);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* replay for update meta page
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
hash_xlog_update_meta_page(XLogReaderState *record)
|
|
|
|
{
|
|
|
|
HashMetaPage metap;
|
|
|
|
XLogRecPtr lsn = record->EndRecPtr;
|
|
|
|
xl_hash_update_meta_page *xldata = (xl_hash_update_meta_page *) XLogRecGetData(record);
|
|
|
|
Buffer metabuf;
|
|
|
|
Page page;
|
|
|
|
|
|
|
|
if (XLogReadBufferForRedo(record, 0, &metabuf) == BLK_NEEDS_REDO)
|
|
|
|
{
|
|
|
|
page = BufferGetPage(metabuf);
|
|
|
|
metap = HashPageGetMeta(page);
|
|
|
|
|
|
|
|
metap->hashm_ntuples = xldata->ntuples;
|
|
|
|
|
|
|
|
PageSetLSN(page, lsn);
|
|
|
|
MarkBufferDirty(metabuf);
|
|
|
|
}
|
|
|
|
if (BufferIsValid(metabuf))
|
|
|
|
UnlockReleaseBuffer(metabuf);
|
|
|
|
}
|
|
|
|
|
2017-03-16 03:18:56 +01:00
|
|
|
/*
|
|
|
|
* replay delete operation in hash index to remove
|
|
|
|
* tuples marked as DEAD during index tuple insertion.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
hash_xlog_vacuum_one_page(XLogReaderState *record)
|
|
|
|
{
|
|
|
|
XLogRecPtr lsn = record->EndRecPtr;
|
|
|
|
xl_hash_vacuum_one_page *xldata;
|
|
|
|
Buffer buffer;
|
|
|
|
Buffer metabuf;
|
|
|
|
Page page;
|
|
|
|
XLogRedoAction action;
|
2017-03-20 20:49:09 +01:00
|
|
|
HashPageOpaque pageopaque;
|
Add info in WAL records in preparation for logical slot conflict handling
This commit only implements one prerequisite part for allowing logical
decoding. The commit message contains an explanation of the overall design,
which later commits will refer back to.
Overall design:
1. We want to enable logical decoding on standbys, but replay of WAL
from the primary might remove data that is needed by logical decoding,
causing error(s) on the standby. To prevent those errors, a new replication
conflict scenario needs to be addressed (as much as hot standby does).
2. Our chosen strategy for dealing with this type of replication slot
is to invalidate logical slots for which needed data has been removed.
3. To do this we need the latestRemovedXid for each change, just as we
do for physical replication conflicts, but we also need to know
whether any particular change was to data that logical replication
might access. That way, during WAL replay, we know when there is a risk of
conflict and, if so, if there is a conflict.
4. We can't rely on the standby's relcache entries for this purpose in
any way, because the startup process can't access catalog contents.
5. Therefore every WAL record that potentially removes data from the
index or heap must carry a flag indicating whether or not it is one
that might be accessed during logical decoding.
Why do we need this for logical decoding on standby?
First, let's forget about logical decoding on standby and recall that
on a primary database, any catalog rows that may be needed by a logical
decoding replication slot are not removed.
This is done thanks to the catalog_xmin associated with the logical
replication slot.
But, with logical decoding on standby, in the following cases:
- hot_standby_feedback is off
- hot_standby_feedback is on but there is no a physical slot between
the primary and the standby. Then, hot_standby_feedback will work,
but only while the connection is alive (for example a node restart
would break it)
Then, the primary may delete system catalog rows that could be needed
by the logical decoding on the standby (as it does not know about the
catalog_xmin on the standby).
So, it’s mandatory to identify those rows and invalidate the slots
that may need them if any. Identifying those rows is the purpose of
this commit.
Implementation:
When a WAL replay on standby indicates that a catalog table tuple is
to be deleted by an xid that is greater than a logical slot's
catalog_xmin, then that means the slot's catalog_xmin conflicts with
the xid, and we need to handle the conflict. While subsequent commits
will do the actual conflict handling, this commit adds a new field
isCatalogRel in such WAL records (and a new bit set in the
xl_heap_visible flags field), that is true for catalog tables, so as to
arrange for conflict handling.
The affected WAL records are the ones that already contain the
snapshotConflictHorizon field, namely:
- gistxlogDelete
- gistxlogPageReuse
- xl_hash_vacuum_one_page
- xl_heap_prune
- xl_heap_freeze_page
- xl_heap_visible
- xl_btree_reuse_page
- xl_btree_delete
- spgxlogVacuumRedirect
Due to this new field being added, xl_hash_vacuum_one_page and
gistxlogDelete do now contain the offsets to be deleted as a
FLEXIBLE_ARRAY_MEMBER. This is needed to ensure correct alignment.
It's not needed on the others struct where isCatalogRel has
been added.
This commit just introduces the WAL format changes mentioned above. Handling
the actual conflicts will follow in future commits.
Bumps XLOG_PAGE_MAGIC as the several WAL records are changed.
Author: "Drouvot, Bertrand" <bertranddrouvot.pg@gmail.com>
Author: Andres Freund <andres@anarazel.de> (in an older version)
Author: Amit Khandekar <amitdkhan.pg@gmail.com> (in an older version)
Reviewed-by: "Drouvot, Bertrand" <bertranddrouvot.pg@gmail.com>
Reviewed-by: Andres Freund <andres@anarazel.de>
Reviewed-by: Robert Haas <robertmhaas@gmail.com>
Reviewed-by: Fabrízio de Royes Mello <fabriziomello@gmail.com>
Reviewed-by: Melanie Plageman <melanieplageman@gmail.com>
2023-04-02 21:32:19 +02:00
|
|
|
OffsetNumber *toDelete;
|
2017-03-16 03:18:56 +01:00
|
|
|
|
|
|
|
xldata = (xl_hash_vacuum_one_page *) XLogRecGetData(record);
|
Add info in WAL records in preparation for logical slot conflict handling
This commit only implements one prerequisite part for allowing logical
decoding. The commit message contains an explanation of the overall design,
which later commits will refer back to.
Overall design:
1. We want to enable logical decoding on standbys, but replay of WAL
from the primary might remove data that is needed by logical decoding,
causing error(s) on the standby. To prevent those errors, a new replication
conflict scenario needs to be addressed (as much as hot standby does).
2. Our chosen strategy for dealing with this type of replication slot
is to invalidate logical slots for which needed data has been removed.
3. To do this we need the latestRemovedXid for each change, just as we
do for physical replication conflicts, but we also need to know
whether any particular change was to data that logical replication
might access. That way, during WAL replay, we know when there is a risk of
conflict and, if so, if there is a conflict.
4. We can't rely on the standby's relcache entries for this purpose in
any way, because the startup process can't access catalog contents.
5. Therefore every WAL record that potentially removes data from the
index or heap must carry a flag indicating whether or not it is one
that might be accessed during logical decoding.
Why do we need this for logical decoding on standby?
First, let's forget about logical decoding on standby and recall that
on a primary database, any catalog rows that may be needed by a logical
decoding replication slot are not removed.
This is done thanks to the catalog_xmin associated with the logical
replication slot.
But, with logical decoding on standby, in the following cases:
- hot_standby_feedback is off
- hot_standby_feedback is on but there is no a physical slot between
the primary and the standby. Then, hot_standby_feedback will work,
but only while the connection is alive (for example a node restart
would break it)
Then, the primary may delete system catalog rows that could be needed
by the logical decoding on the standby (as it does not know about the
catalog_xmin on the standby).
So, it’s mandatory to identify those rows and invalidate the slots
that may need them if any. Identifying those rows is the purpose of
this commit.
Implementation:
When a WAL replay on standby indicates that a catalog table tuple is
to be deleted by an xid that is greater than a logical slot's
catalog_xmin, then that means the slot's catalog_xmin conflicts with
the xid, and we need to handle the conflict. While subsequent commits
will do the actual conflict handling, this commit adds a new field
isCatalogRel in such WAL records (and a new bit set in the
xl_heap_visible flags field), that is true for catalog tables, so as to
arrange for conflict handling.
The affected WAL records are the ones that already contain the
snapshotConflictHorizon field, namely:
- gistxlogDelete
- gistxlogPageReuse
- xl_hash_vacuum_one_page
- xl_heap_prune
- xl_heap_freeze_page
- xl_heap_visible
- xl_btree_reuse_page
- xl_btree_delete
- spgxlogVacuumRedirect
Due to this new field being added, xl_hash_vacuum_one_page and
gistxlogDelete do now contain the offsets to be deleted as a
FLEXIBLE_ARRAY_MEMBER. This is needed to ensure correct alignment.
It's not needed on the others struct where isCatalogRel has
been added.
This commit just introduces the WAL format changes mentioned above. Handling
the actual conflicts will follow in future commits.
Bumps XLOG_PAGE_MAGIC as the several WAL records are changed.
Author: "Drouvot, Bertrand" <bertranddrouvot.pg@gmail.com>
Author: Andres Freund <andres@anarazel.de> (in an older version)
Author: Amit Khandekar <amitdkhan.pg@gmail.com> (in an older version)
Reviewed-by: "Drouvot, Bertrand" <bertranddrouvot.pg@gmail.com>
Reviewed-by: Andres Freund <andres@anarazel.de>
Reviewed-by: Robert Haas <robertmhaas@gmail.com>
Reviewed-by: Fabrízio de Royes Mello <fabriziomello@gmail.com>
Reviewed-by: Melanie Plageman <melanieplageman@gmail.com>
2023-04-02 21:32:19 +02:00
|
|
|
toDelete = xldata->offsets;
|
2017-03-16 03:18:56 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If we have any conflict processing to do, it must happen before we
|
|
|
|
* update the page.
|
|
|
|
*
|
|
|
|
* Hash index records that are marked as LP_DEAD and being removed during
|
|
|
|
* hash index tuple insertion can conflict with standby queries. You might
|
|
|
|
* think that vacuum records would conflict as well, but we've handled
|
2021-04-06 17:49:22 +02:00
|
|
|
* that already. XLOG_HEAP2_PRUNE records provide the highest xid cleaned
|
|
|
|
* by the vacuum of the heap and so we can resolve any conflicts just once
|
|
|
|
* when that arrives. After that we know that no conflicts exist from
|
|
|
|
* individual hash index vacuum records on that index.
|
2017-03-16 03:18:56 +01:00
|
|
|
*/
|
|
|
|
if (InHotStandby)
|
|
|
|
{
|
Change internal RelFileNode references to RelFileNumber or RelFileLocator.
We have been using the term RelFileNode to refer to either (1) the
integer that is used to name the sequence of files for a certain relation
within the directory set aside for that tablespace/database combination;
or (2) that value plus the OIDs of the tablespace and database; or
occasionally (3) the whole series of files created for a relation
based on those values. Using the same name for more than one thing is
confusing.
Replace RelFileNode with RelFileNumber when we're talking about just the
single number, i.e. (1) from above, and with RelFileLocator when we're
talking about all the things that are needed to locate a relation's files
on disk, i.e. (2) from above. In the places where we refer to (3) as
a relfilenode, instead refer to "relation storage".
Since there is a ton of SQL code in the world that knows about
pg_class.relfilenode, don't change the name of that column, or of other
SQL-facing things that derive their name from it.
On the other hand, do adjust closely-related internal terminology. For
example, the structure member names dbNode and spcNode appear to be
derived from the fact that the structure itself was called RelFileNode,
so change those to dbOid and spcOid. Likewise, various variables with
names like rnode and relnode get renamed appropriately, according to
how they're being used in context.
Hopefully, this is clearer than before. It is also preparation for
future patches that intend to widen the relfilenumber fields from its
current width of 32 bits. Variables that store a relfilenumber are now
declared as type RelFileNumber rather than type Oid; right now, these
are the same, but that can now more easily be changed.
Dilip Kumar, per an idea from me. Reviewed also by Andres Freund.
I fixed some whitespace issues, changed a couple of words in a
comment, and made one other minor correction.
Discussion: http://postgr.es/m/CA+TgmoamOtXbVAQf9hWFzonUo6bhhjS6toZQd7HZ-pmojtAmag@mail.gmail.com
Discussion: http://postgr.es/m/CA+Tgmobp7+7kmi4gkq7Y+4AM9fTvL+O1oQ4-5gFTT+6Ng-dQ=g@mail.gmail.com
Discussion: http://postgr.es/m/CAFiTN-vTe79M8uDH1yprOU64MNFE+R3ODRuA+JWf27JbhY4hJw@mail.gmail.com
2022-07-06 17:39:09 +02:00
|
|
|
RelFileLocator rlocator;
|
2017-03-16 03:18:56 +01:00
|
|
|
|
Change internal RelFileNode references to RelFileNumber or RelFileLocator.
We have been using the term RelFileNode to refer to either (1) the
integer that is used to name the sequence of files for a certain relation
within the directory set aside for that tablespace/database combination;
or (2) that value plus the OIDs of the tablespace and database; or
occasionally (3) the whole series of files created for a relation
based on those values. Using the same name for more than one thing is
confusing.
Replace RelFileNode with RelFileNumber when we're talking about just the
single number, i.e. (1) from above, and with RelFileLocator when we're
talking about all the things that are needed to locate a relation's files
on disk, i.e. (2) from above. In the places where we refer to (3) as
a relfilenode, instead refer to "relation storage".
Since there is a ton of SQL code in the world that knows about
pg_class.relfilenode, don't change the name of that column, or of other
SQL-facing things that derive their name from it.
On the other hand, do adjust closely-related internal terminology. For
example, the structure member names dbNode and spcNode appear to be
derived from the fact that the structure itself was called RelFileNode,
so change those to dbOid and spcOid. Likewise, various variables with
names like rnode and relnode get renamed appropriately, according to
how they're being used in context.
Hopefully, this is clearer than before. It is also preparation for
future patches that intend to widen the relfilenumber fields from its
current width of 32 bits. Variables that store a relfilenumber are now
declared as type RelFileNumber rather than type Oid; right now, these
are the same, but that can now more easily be changed.
Dilip Kumar, per an idea from me. Reviewed also by Andres Freund.
I fixed some whitespace issues, changed a couple of words in a
comment, and made one other minor correction.
Discussion: http://postgr.es/m/CA+TgmoamOtXbVAQf9hWFzonUo6bhhjS6toZQd7HZ-pmojtAmag@mail.gmail.com
Discussion: http://postgr.es/m/CA+Tgmobp7+7kmi4gkq7Y+4AM9fTvL+O1oQ4-5gFTT+6Ng-dQ=g@mail.gmail.com
Discussion: http://postgr.es/m/CAFiTN-vTe79M8uDH1yprOU64MNFE+R3ODRuA+JWf27JbhY4hJw@mail.gmail.com
2022-07-06 17:39:09 +02:00
|
|
|
XLogRecGetBlockTag(record, 0, &rlocator, NULL, NULL);
|
2022-11-17 23:55:08 +01:00
|
|
|
ResolveRecoveryConflictWithSnapshot(xldata->snapshotConflictHorizon,
|
2023-04-08 08:11:28 +02:00
|
|
|
xldata->isCatalogRel,
|
2022-11-17 23:55:08 +01:00
|
|
|
rlocator);
|
2017-03-16 03:18:56 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
action = XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &buffer);
|
|
|
|
|
|
|
|
if (action == BLK_NEEDS_REDO)
|
|
|
|
{
|
|
|
|
page = (Page) BufferGetPage(buffer);
|
|
|
|
|
Add info in WAL records in preparation for logical slot conflict handling
This commit only implements one prerequisite part for allowing logical
decoding. The commit message contains an explanation of the overall design,
which later commits will refer back to.
Overall design:
1. We want to enable logical decoding on standbys, but replay of WAL
from the primary might remove data that is needed by logical decoding,
causing error(s) on the standby. To prevent those errors, a new replication
conflict scenario needs to be addressed (as much as hot standby does).
2. Our chosen strategy for dealing with this type of replication slot
is to invalidate logical slots for which needed data has been removed.
3. To do this we need the latestRemovedXid for each change, just as we
do for physical replication conflicts, but we also need to know
whether any particular change was to data that logical replication
might access. That way, during WAL replay, we know when there is a risk of
conflict and, if so, if there is a conflict.
4. We can't rely on the standby's relcache entries for this purpose in
any way, because the startup process can't access catalog contents.
5. Therefore every WAL record that potentially removes data from the
index or heap must carry a flag indicating whether or not it is one
that might be accessed during logical decoding.
Why do we need this for logical decoding on standby?
First, let's forget about logical decoding on standby and recall that
on a primary database, any catalog rows that may be needed by a logical
decoding replication slot are not removed.
This is done thanks to the catalog_xmin associated with the logical
replication slot.
But, with logical decoding on standby, in the following cases:
- hot_standby_feedback is off
- hot_standby_feedback is on but there is no a physical slot between
the primary and the standby. Then, hot_standby_feedback will work,
but only while the connection is alive (for example a node restart
would break it)
Then, the primary may delete system catalog rows that could be needed
by the logical decoding on the standby (as it does not know about the
catalog_xmin on the standby).
So, it’s mandatory to identify those rows and invalidate the slots
that may need them if any. Identifying those rows is the purpose of
this commit.
Implementation:
When a WAL replay on standby indicates that a catalog table tuple is
to be deleted by an xid that is greater than a logical slot's
catalog_xmin, then that means the slot's catalog_xmin conflicts with
the xid, and we need to handle the conflict. While subsequent commits
will do the actual conflict handling, this commit adds a new field
isCatalogRel in such WAL records (and a new bit set in the
xl_heap_visible flags field), that is true for catalog tables, so as to
arrange for conflict handling.
The affected WAL records are the ones that already contain the
snapshotConflictHorizon field, namely:
- gistxlogDelete
- gistxlogPageReuse
- xl_hash_vacuum_one_page
- xl_heap_prune
- xl_heap_freeze_page
- xl_heap_visible
- xl_btree_reuse_page
- xl_btree_delete
- spgxlogVacuumRedirect
Due to this new field being added, xl_hash_vacuum_one_page and
gistxlogDelete do now contain the offsets to be deleted as a
FLEXIBLE_ARRAY_MEMBER. This is needed to ensure correct alignment.
It's not needed on the others struct where isCatalogRel has
been added.
This commit just introduces the WAL format changes mentioned above. Handling
the actual conflicts will follow in future commits.
Bumps XLOG_PAGE_MAGIC as the several WAL records are changed.
Author: "Drouvot, Bertrand" <bertranddrouvot.pg@gmail.com>
Author: Andres Freund <andres@anarazel.de> (in an older version)
Author: Amit Khandekar <amitdkhan.pg@gmail.com> (in an older version)
Reviewed-by: "Drouvot, Bertrand" <bertranddrouvot.pg@gmail.com>
Reviewed-by: Andres Freund <andres@anarazel.de>
Reviewed-by: Robert Haas <robertmhaas@gmail.com>
Reviewed-by: Fabrízio de Royes Mello <fabriziomello@gmail.com>
Reviewed-by: Melanie Plageman <melanieplageman@gmail.com>
2023-04-02 21:32:19 +02:00
|
|
|
PageIndexMultiDelete(page, toDelete, xldata->ntuples);
|
2017-03-16 03:18:56 +01:00
|
|
|
|
2017-03-20 20:49:09 +01:00
|
|
|
/*
|
|
|
|
* Mark the page as not containing any LP_DEAD items. See comments in
|
|
|
|
* _hash_vacuum_one_page() for details.
|
|
|
|
*/
|
2022-04-01 06:24:50 +02:00
|
|
|
pageopaque = HashPageGetOpaque(page);
|
2017-03-20 20:49:09 +01:00
|
|
|
pageopaque->hasho_flag &= ~LH_PAGE_HAS_DEAD_TUPLES;
|
|
|
|
|
2017-03-16 03:18:56 +01:00
|
|
|
PageSetLSN(page, lsn);
|
|
|
|
MarkBufferDirty(buffer);
|
|
|
|
}
|
|
|
|
if (BufferIsValid(buffer))
|
|
|
|
UnlockReleaseBuffer(buffer);
|
|
|
|
|
|
|
|
if (XLogReadBufferForRedo(record, 1, &metabuf) == BLK_NEEDS_REDO)
|
|
|
|
{
|
|
|
|
Page metapage;
|
|
|
|
HashMetaPage metap;
|
|
|
|
|
|
|
|
metapage = BufferGetPage(metabuf);
|
|
|
|
metap = HashPageGetMeta(metapage);
|
|
|
|
|
|
|
|
metap->hashm_ntuples -= xldata->ntuples;
|
|
|
|
|
|
|
|
PageSetLSN(metapage, lsn);
|
|
|
|
MarkBufferDirty(metabuf);
|
|
|
|
}
|
|
|
|
if (BufferIsValid(metabuf))
|
|
|
|
UnlockReleaseBuffer(metabuf);
|
|
|
|
}
|
|
|
|
|
2017-03-14 18:27:02 +01:00
|
|
|
void
|
|
|
|
hash_redo(XLogReaderState *record)
|
|
|
|
{
|
|
|
|
uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
|
|
|
|
|
|
|
|
switch (info)
|
|
|
|
{
|
|
|
|
case XLOG_HASH_INIT_META_PAGE:
|
|
|
|
hash_xlog_init_meta_page(record);
|
|
|
|
break;
|
|
|
|
case XLOG_HASH_INIT_BITMAP_PAGE:
|
|
|
|
hash_xlog_init_bitmap_page(record);
|
|
|
|
break;
|
|
|
|
case XLOG_HASH_INSERT:
|
|
|
|
hash_xlog_insert(record);
|
|
|
|
break;
|
|
|
|
case XLOG_HASH_ADD_OVFL_PAGE:
|
|
|
|
hash_xlog_add_ovfl_page(record);
|
|
|
|
break;
|
|
|
|
case XLOG_HASH_SPLIT_ALLOCATE_PAGE:
|
|
|
|
hash_xlog_split_allocate_page(record);
|
|
|
|
break;
|
|
|
|
case XLOG_HASH_SPLIT_PAGE:
|
|
|
|
hash_xlog_split_page(record);
|
|
|
|
break;
|
|
|
|
case XLOG_HASH_SPLIT_COMPLETE:
|
|
|
|
hash_xlog_split_complete(record);
|
|
|
|
break;
|
|
|
|
case XLOG_HASH_MOVE_PAGE_CONTENTS:
|
|
|
|
hash_xlog_move_page_contents(record);
|
|
|
|
break;
|
|
|
|
case XLOG_HASH_SQUEEZE_PAGE:
|
|
|
|
hash_xlog_squeeze_page(record);
|
|
|
|
break;
|
|
|
|
case XLOG_HASH_DELETE:
|
|
|
|
hash_xlog_delete(record);
|
|
|
|
break;
|
|
|
|
case XLOG_HASH_SPLIT_CLEANUP:
|
|
|
|
hash_xlog_split_cleanup(record);
|
|
|
|
break;
|
|
|
|
case XLOG_HASH_UPDATE_META_PAGE:
|
|
|
|
hash_xlog_update_meta_page(record);
|
|
|
|
break;
|
2017-03-16 03:18:56 +01:00
|
|
|
case XLOG_HASH_VACUUM_ONE_PAGE:
|
|
|
|
hash_xlog_vacuum_one_page(record);
|
|
|
|
break;
|
2017-03-14 18:27:02 +01:00
|
|
|
default:
|
|
|
|
elog(PANIC, "hash_redo: unknown op code %u", info);
|
|
|
|
}
|
|
|
|
}
|
2017-03-14 19:58:56 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Mask a hash page before performing consistency checks on it.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
hash_mask(char *pagedata, BlockNumber blkno)
|
|
|
|
{
|
|
|
|
Page page = (Page) pagedata;
|
|
|
|
HashPageOpaque opaque;
|
2017-04-14 23:04:25 +02:00
|
|
|
int pagetype;
|
2017-03-14 19:58:56 +01:00
|
|
|
|
2017-09-22 20:28:22 +02:00
|
|
|
mask_page_lsn_and_checksum(page);
|
2017-03-14 19:58:56 +01:00
|
|
|
|
|
|
|
mask_page_hint_bits(page);
|
|
|
|
mask_unused_space(page);
|
|
|
|
|
2022-04-01 06:24:50 +02:00
|
|
|
opaque = HashPageGetOpaque(page);
|
2017-03-14 19:58:56 +01:00
|
|
|
|
2017-04-14 23:04:25 +02:00
|
|
|
pagetype = opaque->hasho_flag & LH_PAGE_TYPE;
|
|
|
|
if (pagetype == LH_UNUSED_PAGE)
|
2017-03-14 19:58:56 +01:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Mask everything on a UNUSED page.
|
|
|
|
*/
|
|
|
|
mask_page_content(page);
|
|
|
|
}
|
2017-04-14 23:04:25 +02:00
|
|
|
else if (pagetype == LH_BUCKET_PAGE ||
|
|
|
|
pagetype == LH_OVERFLOW_PAGE)
|
2017-03-14 19:58:56 +01:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
* In hash bucket and overflow pages, it is possible to modify the
|
|
|
|
* LP_FLAGS without emitting any WAL record. Hence, mask the line
|
2017-03-20 20:55:27 +01:00
|
|
|
* pointer flags. See hashgettuple(), _hash_kill_items() for details.
|
2017-03-14 19:58:56 +01:00
|
|
|
*/
|
|
|
|
mask_lp_flags(page);
|
|
|
|
}
|
2017-03-20 20:55:27 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* It is possible that the hint bit LH_PAGE_HAS_DEAD_TUPLES may remain
|
|
|
|
* unlogged. So, mask it. See _hash_kill_items() for details.
|
|
|
|
*/
|
|
|
|
opaque->hasho_flag &= ~LH_PAGE_HAS_DEAD_TUPLES;
|
2017-03-14 19:58:56 +01:00
|
|
|
}
|