postgresql/src/backend/access/spgist/spgxlog.c

1011 lines
26 KiB
C

/*-------------------------------------------------------------------------
*
* spgxlog.c
* WAL replay logic for SP-GiST
*
*
* Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* src/backend/access/spgist/spgxlog.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/bufmask.h"
#include "access/spgist_private.h"
#include "access/spgxlog.h"
#include "access/transam.h"
#include "access/xlog.h"
#include "access/xlogutils.h"
#include "storage/standby.h"
#include "utils/memutils.h"
static MemoryContext opCtx; /* working memory for operations */
/*
* Prepare a dummy SpGistState, with just the minimum info needed for replay.
*
* At present, all we need is enough info to support spgFormDeadTuple(),
* plus the isBuild flag.
*/
static void
fillFakeState(SpGistState *state, spgxlogState stateSrc)
{
memset(state, 0, sizeof(*state));
state->myXid = stateSrc.myXid;
state->isBuild = stateSrc.isBuild;
state->deadTupleStorage = palloc0(SGDTSIZE);
}
/*
* Add a leaf tuple, or replace an existing placeholder tuple. This is used
* to replay SpGistPageAddNewItem() operations. If the offset points at an
* existing tuple, it had better be a placeholder tuple.
*/
static void
addOrReplaceTuple(Page page, Item tuple, int size, OffsetNumber offset)
{
if (offset <= PageGetMaxOffsetNumber(page))
{
SpGistDeadTuple dt = (SpGistDeadTuple) PageGetItem(page,
PageGetItemId(page, offset));
if (dt->tupstate != SPGIST_PLACEHOLDER)
elog(ERROR, "SPGiST tuple to be replaced is not a placeholder");
Assert(SpGistPageGetOpaque(page)->nPlaceholder > 0);
SpGistPageGetOpaque(page)->nPlaceholder--;
PageIndexTupleDelete(page, offset);
}
Assert(offset <= PageGetMaxOffsetNumber(page) + 1);
if (PageAddItem(page, tuple, size, offset, false, false) != offset)
elog(ERROR, "failed to add item of size %u to SPGiST index page",
size);
}
static void
spgRedoAddLeaf(XLogReaderState *record)
{
XLogRecPtr lsn = record->EndRecPtr;
char *ptr = XLogRecGetData(record);
spgxlogAddLeaf *xldata = (spgxlogAddLeaf *) ptr;
char *leafTuple;
SpGistLeafTupleData leafTupleHdr;
Buffer buffer;
Page page;
XLogRedoAction action;
ptr += sizeof(spgxlogAddLeaf);
leafTuple = ptr;
/* the leaf tuple is unaligned, so make a copy to access its header */
memcpy(&leafTupleHdr, leafTuple, sizeof(SpGistLeafTupleData));
/*
* In normal operation we would have both current and parent pages locked
* simultaneously; but in WAL replay it should be safe to update the leaf
* page before updating the parent.
*/
if (xldata->newPage)
{
buffer = XLogInitBufferForRedo(record, 0);
SpGistInitBuffer(buffer,
SPGIST_LEAF | (xldata->storesNulls ? SPGIST_NULLS : 0));
action = BLK_NEEDS_REDO;
}
else
action = XLogReadBufferForRedo(record, 0, &buffer);
if (action == BLK_NEEDS_REDO)
{
page = BufferGetPage(buffer);
/* insert new tuple */
if (xldata->offnumLeaf != xldata->offnumHeadLeaf)
{
/* normal cases, tuple was added by SpGistPageAddNewItem */
addOrReplaceTuple(page, (Item) leafTuple, leafTupleHdr.size,
xldata->offnumLeaf);
/* update head tuple's chain link if needed */
if (xldata->offnumHeadLeaf != InvalidOffsetNumber)
{
SpGistLeafTuple head;
head = (SpGistLeafTuple) PageGetItem(page,
PageGetItemId(page, xldata->offnumHeadLeaf));
Assert(SGLT_GET_NEXTOFFSET(head) == SGLT_GET_NEXTOFFSET(&leafTupleHdr));
SGLT_SET_NEXTOFFSET(head, xldata->offnumLeaf);
}
}
else
{
/* replacing a DEAD tuple */
PageIndexTupleDelete(page, xldata->offnumLeaf);
if (PageAddItem(page,
(Item) leafTuple, leafTupleHdr.size,
xldata->offnumLeaf, false, false) != xldata->offnumLeaf)
elog(ERROR, "failed to add item of size %u to SPGiST index page",
leafTupleHdr.size);
}
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
}
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
/* update parent downlink if necessary */
if (xldata->offnumParent != InvalidOffsetNumber)
{
if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO)
{
SpGistInnerTuple tuple;
BlockNumber blknoLeaf;
XLogRecGetBlockTag(record, 0, NULL, NULL, &blknoLeaf);
page = BufferGetPage(buffer);
tuple = (SpGistInnerTuple) PageGetItem(page,
PageGetItemId(page, xldata->offnumParent));
spgUpdateNodeLink(tuple, xldata->nodeI,
blknoLeaf, xldata->offnumLeaf);
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
}
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
}
}
static void
spgRedoMoveLeafs(XLogReaderState *record)
{
XLogRecPtr lsn = record->EndRecPtr;
char *ptr = XLogRecGetData(record);
spgxlogMoveLeafs *xldata = (spgxlogMoveLeafs *) ptr;
SpGistState state;
OffsetNumber *toDelete;
OffsetNumber *toInsert;
int nInsert;
Buffer buffer;
Page page;
XLogRedoAction action;
BlockNumber blknoDst;
XLogRecGetBlockTag(record, 1, NULL, NULL, &blknoDst);
fillFakeState(&state, xldata->stateSrc);
nInsert = xldata->replaceDead ? 1 : xldata->nMoves + 1;
ptr += SizeOfSpgxlogMoveLeafs;
toDelete = (OffsetNumber *) ptr;
ptr += sizeof(OffsetNumber) * xldata->nMoves;
toInsert = (OffsetNumber *) ptr;
ptr += sizeof(OffsetNumber) * nInsert;
/* now ptr points to the list of leaf tuples */
/*
* In normal operation we would have all three pages (source, dest, and
* parent) locked simultaneously; but in WAL replay it should be safe to
* update them one at a time, as long as we do it in the right order.
*/
/* Insert tuples on the dest page (do first, so redirect is valid) */
if (xldata->newPage)
{
buffer = XLogInitBufferForRedo(record, 1);
SpGistInitBuffer(buffer,
SPGIST_LEAF | (xldata->storesNulls ? SPGIST_NULLS : 0));
action = BLK_NEEDS_REDO;
}
else
action = XLogReadBufferForRedo(record, 1, &buffer);
if (action == BLK_NEEDS_REDO)
{
int i;
page = BufferGetPage(buffer);
for (i = 0; i < nInsert; i++)
{
char *leafTuple;
SpGistLeafTupleData leafTupleHdr;
/*
* the tuples are not aligned, so must copy to access the size
* field.
*/
leafTuple = ptr;
memcpy(&leafTupleHdr, leafTuple,
sizeof(SpGistLeafTupleData));
addOrReplaceTuple(page, (Item) leafTuple,
leafTupleHdr.size, toInsert[i]);
ptr += leafTupleHdr.size;
}
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
}
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
/* Delete tuples from the source page, inserting a redirection pointer */
if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
{
page = BufferGetPage(buffer);
spgPageIndexMultiDelete(&state, page, toDelete, xldata->nMoves,
state.isBuild ? SPGIST_PLACEHOLDER : SPGIST_REDIRECT,
SPGIST_PLACEHOLDER,
blknoDst,
toInsert[nInsert - 1]);
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
}
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
/* And update the parent downlink */
if (XLogReadBufferForRedo(record, 2, &buffer) == BLK_NEEDS_REDO)
{
SpGistInnerTuple tuple;
page = BufferGetPage(buffer);
tuple = (SpGistInnerTuple) PageGetItem(page,
PageGetItemId(page, xldata->offnumParent));
spgUpdateNodeLink(tuple, xldata->nodeI,
blknoDst, toInsert[nInsert - 1]);
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
}
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
}
static void
spgRedoAddNode(XLogReaderState *record)
{
XLogRecPtr lsn = record->EndRecPtr;
char *ptr = XLogRecGetData(record);
spgxlogAddNode *xldata = (spgxlogAddNode *) ptr;
char *innerTuple;
SpGistInnerTupleData innerTupleHdr;
SpGistState state;
Buffer buffer;
Page page;
XLogRedoAction action;
ptr += sizeof(spgxlogAddNode);
innerTuple = ptr;
/* the tuple is unaligned, so make a copy to access its header */
memcpy(&innerTupleHdr, innerTuple, sizeof(SpGistInnerTupleData));
fillFakeState(&state, xldata->stateSrc);
if (!XLogRecHasBlockRef(record, 1))
{
/* update in place */
Assert(xldata->parentBlk == -1);
if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
{
page = BufferGetPage(buffer);
PageIndexTupleDelete(page, xldata->offnum);
if (PageAddItem(page, (Item) innerTuple, innerTupleHdr.size,
xldata->offnum,
false, false) != xldata->offnum)
elog(ERROR, "failed to add item of size %u to SPGiST index page",
innerTupleHdr.size);
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
}
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
}
else
{
BlockNumber blkno;
BlockNumber blknoNew;
XLogRecGetBlockTag(record, 0, NULL, NULL, &blkno);
XLogRecGetBlockTag(record, 1, NULL, NULL, &blknoNew);
/*
* In normal operation we would have all three pages (source, dest,
* and parent) locked simultaneously; but in WAL replay it should be
* safe to update them one at a time, as long as we do it in the right
* order. We must insert the new tuple before replacing the old tuple
* with the redirect tuple.
*/
/* Install new tuple first so redirect is valid */
if (xldata->newPage)
{
/* AddNode is not used for nulls pages */
buffer = XLogInitBufferForRedo(record, 1);
SpGistInitBuffer(buffer, 0);
action = BLK_NEEDS_REDO;
}
else
action = XLogReadBufferForRedo(record, 1, &buffer);
if (action == BLK_NEEDS_REDO)
{
page = BufferGetPage(buffer);
addOrReplaceTuple(page, (Item) innerTuple,
innerTupleHdr.size, xldata->offnumNew);
/*
* If parent is in this same page, update it now.
*/
if (xldata->parentBlk == 1)
{
SpGistInnerTuple parentTuple;
parentTuple = (SpGistInnerTuple) PageGetItem(page,
PageGetItemId(page, xldata->offnumParent));
spgUpdateNodeLink(parentTuple, xldata->nodeI,
blknoNew, xldata->offnumNew);
}
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
}
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
/* Delete old tuple, replacing it with redirect or placeholder tuple */
if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
{
SpGistDeadTuple dt;
page = BufferGetPage(buffer);
if (state.isBuild)
dt = spgFormDeadTuple(&state, SPGIST_PLACEHOLDER,
InvalidBlockNumber,
InvalidOffsetNumber);
else
dt = spgFormDeadTuple(&state, SPGIST_REDIRECT,
blknoNew,
xldata->offnumNew);
PageIndexTupleDelete(page, xldata->offnum);
if (PageAddItem(page, (Item) dt, dt->size,
xldata->offnum,
false, false) != xldata->offnum)
elog(ERROR, "failed to add item of size %u to SPGiST index page",
dt->size);
if (state.isBuild)
SpGistPageGetOpaque(page)->nPlaceholder++;
else
SpGistPageGetOpaque(page)->nRedirection++;
/*
* If parent is in this same page, update it now.
*/
if (xldata->parentBlk == 0)
{
SpGistInnerTuple parentTuple;
parentTuple = (SpGistInnerTuple) PageGetItem(page,
PageGetItemId(page, xldata->offnumParent));
spgUpdateNodeLink(parentTuple, xldata->nodeI,
blknoNew, xldata->offnumNew);
}
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
}
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
/*
* Update parent downlink (if we didn't do it as part of the source or
* destination page update already).
*/
if (xldata->parentBlk == 2)
{
if (XLogReadBufferForRedo(record, 2, &buffer) == BLK_NEEDS_REDO)
{
SpGistInnerTuple parentTuple;
page = BufferGetPage(buffer);
parentTuple = (SpGistInnerTuple) PageGetItem(page,
PageGetItemId(page, xldata->offnumParent));
spgUpdateNodeLink(parentTuple, xldata->nodeI,
blknoNew, xldata->offnumNew);
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
}
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
}
}
}
static void
spgRedoSplitTuple(XLogReaderState *record)
{
XLogRecPtr lsn = record->EndRecPtr;
char *ptr = XLogRecGetData(record);
spgxlogSplitTuple *xldata = (spgxlogSplitTuple *) ptr;
char *prefixTuple;
SpGistInnerTupleData prefixTupleHdr;
char *postfixTuple;
SpGistInnerTupleData postfixTupleHdr;
Buffer buffer;
Page page;
XLogRedoAction action;
ptr += sizeof(spgxlogSplitTuple);
prefixTuple = ptr;
/* the prefix tuple is unaligned, so make a copy to access its header */
memcpy(&prefixTupleHdr, prefixTuple, sizeof(SpGistInnerTupleData));
ptr += prefixTupleHdr.size;
postfixTuple = ptr;
/* postfix tuple is also unaligned */
memcpy(&postfixTupleHdr, postfixTuple, sizeof(SpGistInnerTupleData));
/*
* In normal operation we would have both pages locked simultaneously; but
* in WAL replay it should be safe to update them one at a time, as long
* as we do it in the right order.
*/
/* insert postfix tuple first to avoid dangling link */
if (!xldata->postfixBlkSame)
{
if (xldata->newPage)
{
buffer = XLogInitBufferForRedo(record, 1);
/* SplitTuple is not used for nulls pages */
SpGistInitBuffer(buffer, 0);
action = BLK_NEEDS_REDO;
}
else
action = XLogReadBufferForRedo(record, 1, &buffer);
if (action == BLK_NEEDS_REDO)
{
page = BufferGetPage(buffer);
addOrReplaceTuple(page, (Item) postfixTuple,
postfixTupleHdr.size, xldata->offnumPostfix);
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
}
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
}
/* now handle the original page */
if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
{
page = BufferGetPage(buffer);
PageIndexTupleDelete(page, xldata->offnumPrefix);
if (PageAddItem(page, (Item) prefixTuple, prefixTupleHdr.size,
xldata->offnumPrefix, false, false) != xldata->offnumPrefix)
elog(ERROR, "failed to add item of size %u to SPGiST index page",
prefixTupleHdr.size);
if (xldata->postfixBlkSame)
addOrReplaceTuple(page, (Item) postfixTuple,
postfixTupleHdr.size,
xldata->offnumPostfix);
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
}
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
}
static void
spgRedoPickSplit(XLogReaderState *record)
{
XLogRecPtr lsn = record->EndRecPtr;
char *ptr = XLogRecGetData(record);
spgxlogPickSplit *xldata = (spgxlogPickSplit *) ptr;
char *innerTuple;
SpGistInnerTupleData innerTupleHdr;
SpGistState state;
OffsetNumber *toDelete;
OffsetNumber *toInsert;
uint8 *leafPageSelect;
Buffer srcBuffer;
Buffer destBuffer;
Buffer innerBuffer;
Page srcPage;
Page destPage;
Page page;
int i;
BlockNumber blknoInner;
XLogRedoAction action;
XLogRecGetBlockTag(record, 2, NULL, NULL, &blknoInner);
fillFakeState(&state, xldata->stateSrc);
ptr += SizeOfSpgxlogPickSplit;
toDelete = (OffsetNumber *) ptr;
ptr += sizeof(OffsetNumber) * xldata->nDelete;
toInsert = (OffsetNumber *) ptr;
ptr += sizeof(OffsetNumber) * xldata->nInsert;
leafPageSelect = (uint8 *) ptr;
ptr += sizeof(uint8) * xldata->nInsert;
innerTuple = ptr;
/* the inner tuple is unaligned, so make a copy to access its header */
memcpy(&innerTupleHdr, innerTuple, sizeof(SpGistInnerTupleData));
ptr += innerTupleHdr.size;
/* now ptr points to the list of leaf tuples */
if (xldata->isRootSplit)
{
/* when splitting root, we touch it only in the guise of new inner */
srcBuffer = InvalidBuffer;
srcPage = NULL;
}
else if (xldata->initSrc)
{
/* just re-init the source page */
srcBuffer = XLogInitBufferForRedo(record, 0);
srcPage = (Page) BufferGetPage(srcBuffer);
SpGistInitBuffer(srcBuffer,
SPGIST_LEAF | (xldata->storesNulls ? SPGIST_NULLS : 0));
/* don't update LSN etc till we're done with it */
}
else
{
/*
* Delete the specified tuples from source page. (In case we're in
* Hot Standby, we need to hold lock on the page till we're done
* inserting leaf tuples and the new inner tuple, else the added
* redirect tuple will be a dangling link.)
*/
srcPage = NULL;
if (XLogReadBufferForRedo(record, 0, &srcBuffer) == BLK_NEEDS_REDO)
{
srcPage = BufferGetPage(srcBuffer);
/*
* We have it a bit easier here than in doPickSplit(), because we
* know the inner tuple's location already, so we can inject the
* correct redirection tuple now.
*/
if (!state.isBuild)
spgPageIndexMultiDelete(&state, srcPage,
toDelete, xldata->nDelete,
SPGIST_REDIRECT,
SPGIST_PLACEHOLDER,
blknoInner,
xldata->offnumInner);
else
spgPageIndexMultiDelete(&state, srcPage,
toDelete, xldata->nDelete,
SPGIST_PLACEHOLDER,
SPGIST_PLACEHOLDER,
InvalidBlockNumber,
InvalidOffsetNumber);
/* don't update LSN etc till we're done with it */
}
}
/* try to access dest page if any */
if (!XLogRecHasBlockRef(record, 1))
{
destBuffer = InvalidBuffer;
destPage = NULL;
}
else if (xldata->initDest)
{
/* just re-init the dest page */
destBuffer = XLogInitBufferForRedo(record, 1);
destPage = (Page) BufferGetPage(destBuffer);
SpGistInitBuffer(destBuffer,
SPGIST_LEAF | (xldata->storesNulls ? SPGIST_NULLS : 0));
/* don't update LSN etc till we're done with it */
}
else
{
/*
* We could probably release the page lock immediately in the
* full-page-image case, but for safety let's hold it till later.
*/
if (XLogReadBufferForRedo(record, 1, &destBuffer) == BLK_NEEDS_REDO)
destPage = (Page) BufferGetPage(destBuffer);
else
destPage = NULL; /* don't do any page updates */
}
/* restore leaf tuples to src and/or dest page */
for (i = 0; i < xldata->nInsert; i++)
{
char *leafTuple;
SpGistLeafTupleData leafTupleHdr;
/* the tuples are not aligned, so must copy to access the size field. */
leafTuple = ptr;
memcpy(&leafTupleHdr, leafTuple, sizeof(SpGistLeafTupleData));
ptr += leafTupleHdr.size;
page = leafPageSelect[i] ? destPage : srcPage;
if (page == NULL)
continue; /* no need to touch this page */
addOrReplaceTuple(page, (Item) leafTuple, leafTupleHdr.size,
toInsert[i]);
}
/* Now update src and dest page LSNs if needed */
if (srcPage != NULL)
{
PageSetLSN(srcPage, lsn);
MarkBufferDirty(srcBuffer);
}
if (destPage != NULL)
{
PageSetLSN(destPage, lsn);
MarkBufferDirty(destBuffer);
}
/* restore new inner tuple */
if (xldata->initInner)
{
innerBuffer = XLogInitBufferForRedo(record, 2);
SpGistInitBuffer(innerBuffer, (xldata->storesNulls ? SPGIST_NULLS : 0));
action = BLK_NEEDS_REDO;
}
else
action = XLogReadBufferForRedo(record, 2, &innerBuffer);
if (action == BLK_NEEDS_REDO)
{
page = BufferGetPage(innerBuffer);
addOrReplaceTuple(page, (Item) innerTuple, innerTupleHdr.size,
xldata->offnumInner);
/* if inner is also parent, update link while we're here */
if (xldata->innerIsParent)
{
SpGistInnerTuple parent;
parent = (SpGistInnerTuple) PageGetItem(page,
PageGetItemId(page, xldata->offnumParent));
spgUpdateNodeLink(parent, xldata->nodeI,
blknoInner, xldata->offnumInner);
}
PageSetLSN(page, lsn);
MarkBufferDirty(innerBuffer);
}
if (BufferIsValid(innerBuffer))
UnlockReleaseBuffer(innerBuffer);
/*
* Now we can release the leaf-page locks. It's okay to do this before
* updating the parent downlink.
*/
if (BufferIsValid(srcBuffer))
UnlockReleaseBuffer(srcBuffer);
if (BufferIsValid(destBuffer))
UnlockReleaseBuffer(destBuffer);
/* update parent downlink, unless we did it above */
if (XLogRecHasBlockRef(record, 3))
{
Buffer parentBuffer;
if (XLogReadBufferForRedo(record, 3, &parentBuffer) == BLK_NEEDS_REDO)
{
SpGistInnerTuple parent;
page = BufferGetPage(parentBuffer);
parent = (SpGistInnerTuple) PageGetItem(page,
PageGetItemId(page, xldata->offnumParent));
spgUpdateNodeLink(parent, xldata->nodeI,
blknoInner, xldata->offnumInner);
PageSetLSN(page, lsn);
MarkBufferDirty(parentBuffer);
}
if (BufferIsValid(parentBuffer))
UnlockReleaseBuffer(parentBuffer);
}
else
Assert(xldata->innerIsParent || xldata->isRootSplit);
}
static void
spgRedoVacuumLeaf(XLogReaderState *record)
{
XLogRecPtr lsn = record->EndRecPtr;
char *ptr = XLogRecGetData(record);
spgxlogVacuumLeaf *xldata = (spgxlogVacuumLeaf *) ptr;
OffsetNumber *toDead;
OffsetNumber *toPlaceholder;
OffsetNumber *moveSrc;
OffsetNumber *moveDest;
OffsetNumber *chainSrc;
OffsetNumber *chainDest;
SpGistState state;
Buffer buffer;
Page page;
int i;
fillFakeState(&state, xldata->stateSrc);
ptr += SizeOfSpgxlogVacuumLeaf;
toDead = (OffsetNumber *) ptr;
ptr += sizeof(OffsetNumber) * xldata->nDead;
toPlaceholder = (OffsetNumber *) ptr;
ptr += sizeof(OffsetNumber) * xldata->nPlaceholder;
moveSrc = (OffsetNumber *) ptr;
ptr += sizeof(OffsetNumber) * xldata->nMove;
moveDest = (OffsetNumber *) ptr;
ptr += sizeof(OffsetNumber) * xldata->nMove;
chainSrc = (OffsetNumber *) ptr;
ptr += sizeof(OffsetNumber) * xldata->nChain;
chainDest = (OffsetNumber *) ptr;
if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
{
page = BufferGetPage(buffer);
spgPageIndexMultiDelete(&state, page,
toDead, xldata->nDead,
SPGIST_DEAD, SPGIST_DEAD,
InvalidBlockNumber,
InvalidOffsetNumber);
spgPageIndexMultiDelete(&state, page,
toPlaceholder, xldata->nPlaceholder,
SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER,
InvalidBlockNumber,
InvalidOffsetNumber);
/* see comments in vacuumLeafPage() */
for (i = 0; i < xldata->nMove; i++)
{
ItemId idSrc = PageGetItemId(page, moveSrc[i]);
ItemId idDest = PageGetItemId(page, moveDest[i]);
ItemIdData tmp;
tmp = *idSrc;
*idSrc = *idDest;
*idDest = tmp;
}
spgPageIndexMultiDelete(&state, page,
moveSrc, xldata->nMove,
SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER,
InvalidBlockNumber,
InvalidOffsetNumber);
for (i = 0; i < xldata->nChain; i++)
{
SpGistLeafTuple lt;
lt = (SpGistLeafTuple) PageGetItem(page,
PageGetItemId(page, chainSrc[i]));
Assert(lt->tupstate == SPGIST_LIVE);
SGLT_SET_NEXTOFFSET(lt, chainDest[i]);
}
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
}
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
}
static void
spgRedoVacuumRoot(XLogReaderState *record)
{
XLogRecPtr lsn = record->EndRecPtr;
char *ptr = XLogRecGetData(record);
spgxlogVacuumRoot *xldata = (spgxlogVacuumRoot *) ptr;
OffsetNumber *toDelete;
Buffer buffer;
Page page;
toDelete = xldata->offsets;
if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
{
page = BufferGetPage(buffer);
/* The tuple numbers are in order */
PageIndexMultiDelete(page, toDelete, xldata->nDelete);
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
}
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
}
static void
spgRedoVacuumRedirect(XLogReaderState *record)
{
XLogRecPtr lsn = record->EndRecPtr;
char *ptr = XLogRecGetData(record);
spgxlogVacuumRedirect *xldata = (spgxlogVacuumRedirect *) ptr;
OffsetNumber *itemToPlaceholder;
Buffer buffer;
itemToPlaceholder = xldata->offsets;
/*
* If any redirection tuples are being removed, make sure there are no
* live Hot Standby transactions that might need to see them.
*/
if (InHotStandby)
{
RelFileLocator locator;
XLogRecGetBlockTag(record, 0, &locator, NULL, NULL);
ResolveRecoveryConflictWithSnapshot(xldata->snapshotConflictHorizon,
locator);
}
if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
{
Page page = BufferGetPage(buffer);
SpGistPageOpaque opaque = SpGistPageGetOpaque(page);
int i;
/* Convert redirect pointers to plain placeholders */
for (i = 0; i < xldata->nToPlaceholder; i++)
{
SpGistDeadTuple dt;
dt = (SpGistDeadTuple) PageGetItem(page,
PageGetItemId(page, itemToPlaceholder[i]));
Assert(dt->tupstate == SPGIST_REDIRECT);
dt->tupstate = SPGIST_PLACEHOLDER;
ItemPointerSetInvalid(&dt->pointer);
}
Assert(opaque->nRedirection >= xldata->nToPlaceholder);
opaque->nRedirection -= xldata->nToPlaceholder;
opaque->nPlaceholder += xldata->nToPlaceholder;
/* Remove placeholder tuples at end of page */
if (xldata->firstPlaceholder != InvalidOffsetNumber)
{
int max = PageGetMaxOffsetNumber(page);
OffsetNumber *toDelete;
toDelete = palloc(sizeof(OffsetNumber) * max);
for (i = xldata->firstPlaceholder; i <= max; i++)
toDelete[i - xldata->firstPlaceholder] = i;
i = max - xldata->firstPlaceholder + 1;
Assert(opaque->nPlaceholder >= i);
opaque->nPlaceholder -= i;
/* The array is sorted, so can use PageIndexMultiDelete */
PageIndexMultiDelete(page, toDelete, i);
pfree(toDelete);
}
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
}
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
}
void
spg_redo(XLogReaderState *record)
{
uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
MemoryContext oldCxt;
oldCxt = MemoryContextSwitchTo(opCtx);
switch (info)
{
case XLOG_SPGIST_ADD_LEAF:
spgRedoAddLeaf(record);
break;
case XLOG_SPGIST_MOVE_LEAFS:
spgRedoMoveLeafs(record);
break;
case XLOG_SPGIST_ADD_NODE:
spgRedoAddNode(record);
break;
case XLOG_SPGIST_SPLIT_TUPLE:
spgRedoSplitTuple(record);
break;
case XLOG_SPGIST_PICKSPLIT:
spgRedoPickSplit(record);
break;
case XLOG_SPGIST_VACUUM_LEAF:
spgRedoVacuumLeaf(record);
break;
case XLOG_SPGIST_VACUUM_ROOT:
spgRedoVacuumRoot(record);
break;
case XLOG_SPGIST_VACUUM_REDIRECT:
spgRedoVacuumRedirect(record);
break;
default:
elog(PANIC, "spg_redo: unknown op code %u", info);
}
MemoryContextSwitchTo(oldCxt);
MemoryContextReset(opCtx);
}
void
spg_xlog_startup(void)
{
opCtx = AllocSetContextCreate(CurrentMemoryContext,
"SP-GiST temporary context",
ALLOCSET_DEFAULT_SIZES);
}
void
spg_xlog_cleanup(void)
{
MemoryContextDelete(opCtx);
opCtx = NULL;
}
/*
* Mask a SpGist page before performing consistency checks on it.
*/
void
spg_mask(char *pagedata, BlockNumber blkno)
{
Page page = (Page) pagedata;
PageHeader pagehdr = (PageHeader) page;
mask_page_lsn_and_checksum(page);
mask_page_hint_bits(page);
/*
* Mask the unused space, but only if the page's pd_lower appears to have
* been set correctly.
*/
if (pagehdr->pd_lower >= SizeOfPageHeaderData)
mask_unused_space(page);
}