Derive latestRemovedXid for btree deletes by reading heap pages. The
WAL record for btree delete contains a list of tids, even when backup blocks are present. We follow the tids to their heap tuples, taking care to follow LP_REDIRECT tuples. We ignore LP_DEAD tuples on the understanding that they will always have xmin/xmax earlier than any LP_NORMAL tuples referred to by killed index tuples. Iff all tuples are LP_DEAD we return InvalidTransactionId. The heap relfilenode is added to the WAL record, requiring API changes to pass down the heap Relation. XLOG_PAGE_MAGIC updated.
This commit is contained in:
parent
59292f28ca
commit
a760893dbd
|
@ -8,7 +8,7 @@
|
||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.177 2010/02/26 02:00:34 momjian Exp $
|
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.178 2010/03/28 09:27:01 sriggs Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
|
@ -57,7 +57,8 @@ static void _bt_findinsertloc(Relation rel,
|
||||||
OffsetNumber *offsetptr,
|
OffsetNumber *offsetptr,
|
||||||
int keysz,
|
int keysz,
|
||||||
ScanKey scankey,
|
ScanKey scankey,
|
||||||
IndexTuple newtup);
|
IndexTuple newtup,
|
||||||
|
Relation heapRel);
|
||||||
static void _bt_insertonpg(Relation rel, Buffer buf,
|
static void _bt_insertonpg(Relation rel, Buffer buf,
|
||||||
BTStack stack,
|
BTStack stack,
|
||||||
IndexTuple itup,
|
IndexTuple itup,
|
||||||
|
@ -78,7 +79,7 @@ static void _bt_pgaddtup(Relation rel, Page page,
|
||||||
OffsetNumber itup_off, const char *where);
|
OffsetNumber itup_off, const char *where);
|
||||||
static bool _bt_isequal(TupleDesc itupdesc, Page page, OffsetNumber offnum,
|
static bool _bt_isequal(TupleDesc itupdesc, Page page, OffsetNumber offnum,
|
||||||
int keysz, ScanKey scankey);
|
int keysz, ScanKey scankey);
|
||||||
static void _bt_vacuum_one_page(Relation rel, Buffer buffer);
|
static void _bt_vacuum_one_page(Relation rel, Buffer buffer, Relation heapRel);
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -175,7 +176,7 @@ top:
|
||||||
if (checkUnique != UNIQUE_CHECK_EXISTING)
|
if (checkUnique != UNIQUE_CHECK_EXISTING)
|
||||||
{
|
{
|
||||||
/* do the insertion */
|
/* do the insertion */
|
||||||
_bt_findinsertloc(rel, &buf, &offset, natts, itup_scankey, itup);
|
_bt_findinsertloc(rel, &buf, &offset, natts, itup_scankey, itup, heapRel);
|
||||||
_bt_insertonpg(rel, buf, stack, itup, offset, false);
|
_bt_insertonpg(rel, buf, stack, itup, offset, false);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -491,7 +492,8 @@ _bt_findinsertloc(Relation rel,
|
||||||
OffsetNumber *offsetptr,
|
OffsetNumber *offsetptr,
|
||||||
int keysz,
|
int keysz,
|
||||||
ScanKey scankey,
|
ScanKey scankey,
|
||||||
IndexTuple newtup)
|
IndexTuple newtup,
|
||||||
|
Relation heapRel)
|
||||||
{
|
{
|
||||||
Buffer buf = *bufptr;
|
Buffer buf = *bufptr;
|
||||||
Page page = BufferGetPage(buf);
|
Page page = BufferGetPage(buf);
|
||||||
|
@ -556,7 +558,7 @@ _bt_findinsertloc(Relation rel,
|
||||||
*/
|
*/
|
||||||
if (P_ISLEAF(lpageop) && P_HAS_GARBAGE(lpageop))
|
if (P_ISLEAF(lpageop) && P_HAS_GARBAGE(lpageop))
|
||||||
{
|
{
|
||||||
_bt_vacuum_one_page(rel, buf);
|
_bt_vacuum_one_page(rel, buf, heapRel);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* remember that we vacuumed this page, because that makes the
|
* remember that we vacuumed this page, because that makes the
|
||||||
|
@ -1998,7 +2000,7 @@ _bt_isequal(TupleDesc itupdesc, Page page, OffsetNumber offnum,
|
||||||
* super-exclusive "cleanup" lock (see nbtree/README).
|
* super-exclusive "cleanup" lock (see nbtree/README).
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
_bt_vacuum_one_page(Relation rel, Buffer buffer)
|
_bt_vacuum_one_page(Relation rel, Buffer buffer, Relation heapRel)
|
||||||
{
|
{
|
||||||
OffsetNumber deletable[MaxOffsetNumber];
|
OffsetNumber deletable[MaxOffsetNumber];
|
||||||
int ndeletable = 0;
|
int ndeletable = 0;
|
||||||
|
@ -2025,7 +2027,7 @@ _bt_vacuum_one_page(Relation rel, Buffer buffer)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ndeletable > 0)
|
if (ndeletable > 0)
|
||||||
_bt_delitems(rel, buffer, deletable, ndeletable, false, 0);
|
_bt_delitems_delete(rel, buffer, deletable, ndeletable, heapRel);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Note: if we didn't find any LP_DEAD items, then the page's
|
* Note: if we didn't find any LP_DEAD items, then the page's
|
||||||
|
|
|
@ -9,7 +9,7 @@
|
||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.121 2010/03/19 10:41:21 sriggs Exp $
|
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.122 2010/03/28 09:27:01 sriggs Exp $
|
||||||
*
|
*
|
||||||
* NOTES
|
* NOTES
|
||||||
* Postgres btree pages look like ordinary relation pages. The opaque
|
* Postgres btree pages look like ordinary relation pages. The opaque
|
||||||
|
@ -719,15 +719,12 @@ _bt_page_recyclable(Page page)
|
||||||
* ensure correct locking.
|
* ensure correct locking.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
_bt_delitems(Relation rel, Buffer buf,
|
_bt_delitems_vacuum(Relation rel, Buffer buf,
|
||||||
OffsetNumber *itemnos, int nitems, bool isVacuum,
|
OffsetNumber *itemnos, int nitems, BlockNumber lastBlockVacuumed)
|
||||||
BlockNumber lastBlockVacuumed)
|
|
||||||
{
|
{
|
||||||
Page page = BufferGetPage(buf);
|
Page page = BufferGetPage(buf);
|
||||||
BTPageOpaque opaque;
|
BTPageOpaque opaque;
|
||||||
|
|
||||||
Assert(isVacuum || lastBlockVacuumed == 0);
|
|
||||||
|
|
||||||
/* No ereport(ERROR) until changes are logged */
|
/* No ereport(ERROR) until changes are logged */
|
||||||
START_CRIT_SECTION();
|
START_CRIT_SECTION();
|
||||||
|
|
||||||
|
@ -759,35 +756,14 @@ _bt_delitems(Relation rel, Buffer buf,
|
||||||
XLogRecPtr recptr;
|
XLogRecPtr recptr;
|
||||||
XLogRecData rdata[2];
|
XLogRecData rdata[2];
|
||||||
|
|
||||||
if (isVacuum)
|
xl_btree_vacuum xlrec_vacuum;
|
||||||
{
|
|
||||||
xl_btree_vacuum xlrec_vacuum;
|
|
||||||
|
|
||||||
xlrec_vacuum.node = rel->rd_node;
|
xlrec_vacuum.node = rel->rd_node;
|
||||||
xlrec_vacuum.block = BufferGetBlockNumber(buf);
|
xlrec_vacuum.block = BufferGetBlockNumber(buf);
|
||||||
|
|
||||||
xlrec_vacuum.lastBlockVacuumed = lastBlockVacuumed;
|
|
||||||
rdata[0].data = (char *) &xlrec_vacuum;
|
|
||||||
rdata[0].len = SizeOfBtreeVacuum;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
xl_btree_delete xlrec_delete;
|
|
||||||
|
|
||||||
xlrec_delete.node = rel->rd_node;
|
|
||||||
xlrec_delete.block = BufferGetBlockNumber(buf);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* XXX: We would like to set an accurate latestRemovedXid, but
|
|
||||||
* there is no easy way of obtaining a useful value. So we punt
|
|
||||||
* and store InvalidTransactionId, which forces the standby to
|
|
||||||
* wait for/cancel all currently running transactions.
|
|
||||||
*/
|
|
||||||
xlrec_delete.latestRemovedXid = InvalidTransactionId;
|
|
||||||
rdata[0].data = (char *) &xlrec_delete;
|
|
||||||
rdata[0].len = SizeOfBtreeDelete;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
xlrec_vacuum.lastBlockVacuumed = lastBlockVacuumed;
|
||||||
|
rdata[0].data = (char *) &xlrec_vacuum;
|
||||||
|
rdata[0].len = SizeOfBtreeVacuum;
|
||||||
rdata[0].buffer = InvalidBuffer;
|
rdata[0].buffer = InvalidBuffer;
|
||||||
rdata[0].next = &(rdata[1]);
|
rdata[0].next = &(rdata[1]);
|
||||||
|
|
||||||
|
@ -810,10 +786,82 @@ _bt_delitems(Relation rel, Buffer buf,
|
||||||
rdata[1].buffer_std = true;
|
rdata[1].buffer_std = true;
|
||||||
rdata[1].next = NULL;
|
rdata[1].next = NULL;
|
||||||
|
|
||||||
if (isVacuum)
|
recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_VACUUM, rdata);
|
||||||
recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_VACUUM, rdata);
|
|
||||||
else
|
PageSetLSN(page, recptr);
|
||||||
recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_DELETE, rdata);
|
PageSetTLI(page, ThisTimeLineID);
|
||||||
|
}
|
||||||
|
|
||||||
|
END_CRIT_SECTION();
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
_bt_delitems_delete(Relation rel, Buffer buf,
|
||||||
|
OffsetNumber *itemnos, int nitems, Relation heapRel)
|
||||||
|
{
|
||||||
|
Page page = BufferGetPage(buf);
|
||||||
|
BTPageOpaque opaque;
|
||||||
|
|
||||||
|
Assert(nitems > 0);
|
||||||
|
|
||||||
|
/* No ereport(ERROR) until changes are logged */
|
||||||
|
START_CRIT_SECTION();
|
||||||
|
|
||||||
|
/* Fix the page */
|
||||||
|
PageIndexMultiDelete(page, itemnos, nitems);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We can clear the vacuum cycle ID since this page has certainly been
|
||||||
|
* processed by the current vacuum scan.
|
||||||
|
*/
|
||||||
|
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||||
|
opaque->btpo_cycleid = 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Mark the page as not containing any LP_DEAD items. This is not
|
||||||
|
* certainly true (there might be some that have recently been marked, but
|
||||||
|
* weren't included in our target-item list), but it will almost always be
|
||||||
|
* true and it doesn't seem worth an additional page scan to check it.
|
||||||
|
* Remember that BTP_HAS_GARBAGE is only a hint anyway.
|
||||||
|
*/
|
||||||
|
opaque->btpo_flags &= ~BTP_HAS_GARBAGE;
|
||||||
|
|
||||||
|
MarkBufferDirty(buf);
|
||||||
|
|
||||||
|
/* XLOG stuff */
|
||||||
|
if (!rel->rd_istemp)
|
||||||
|
{
|
||||||
|
XLogRecPtr recptr;
|
||||||
|
XLogRecData rdata[3];
|
||||||
|
|
||||||
|
xl_btree_delete xlrec_delete;
|
||||||
|
|
||||||
|
xlrec_delete.node = rel->rd_node;
|
||||||
|
xlrec_delete.hnode = heapRel->rd_node;
|
||||||
|
xlrec_delete.block = BufferGetBlockNumber(buf);
|
||||||
|
xlrec_delete.nitems = nitems;
|
||||||
|
|
||||||
|
rdata[0].data = (char *) &xlrec_delete;
|
||||||
|
rdata[0].len = SizeOfBtreeDelete;
|
||||||
|
rdata[0].buffer = InvalidBuffer;
|
||||||
|
rdata[0].next = &(rdata[1]);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We need the target-offsets array whether or not we store the
|
||||||
|
* to allow us to find the latestRemovedXid on a standby server.
|
||||||
|
*/
|
||||||
|
rdata[1].data = (char *) itemnos;
|
||||||
|
rdata[1].len = nitems * sizeof(OffsetNumber);
|
||||||
|
rdata[1].buffer = InvalidBuffer;
|
||||||
|
rdata[1].next = &(rdata[2]);
|
||||||
|
|
||||||
|
rdata[2].data = NULL;
|
||||||
|
rdata[2].len = 0;
|
||||||
|
rdata[2].buffer = buf;
|
||||||
|
rdata[2].buffer_std = true;
|
||||||
|
rdata[2].next = NULL;
|
||||||
|
|
||||||
|
recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_DELETE, rdata);
|
||||||
|
|
||||||
PageSetLSN(page, recptr);
|
PageSetLSN(page, recptr);
|
||||||
PageSetTLI(page, ThisTimeLineID);
|
PageSetTLI(page, ThisTimeLineID);
|
||||||
|
|
|
@ -12,7 +12,7 @@
|
||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.176 2010/02/26 02:00:34 momjian Exp $
|
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.177 2010/03/28 09:27:01 sriggs Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
|
@ -708,7 +708,7 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
|
||||||
buf = ReadBufferExtended(rel, MAIN_FORKNUM, num_pages - 1, RBM_NORMAL,
|
buf = ReadBufferExtended(rel, MAIN_FORKNUM, num_pages - 1, RBM_NORMAL,
|
||||||
info->strategy);
|
info->strategy);
|
||||||
LockBufferForCleanup(buf);
|
LockBufferForCleanup(buf);
|
||||||
_bt_delitems(rel, buf, NULL, 0, true, vstate.lastBlockVacuumed);
|
_bt_delitems_vacuum(rel, buf, NULL, 0, vstate.lastBlockVacuumed);
|
||||||
_bt_relbuf(rel, buf);
|
_bt_relbuf(rel, buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -889,7 +889,7 @@ restart:
|
||||||
{
|
{
|
||||||
BlockNumber lastBlockVacuumed = BufferGetBlockNumber(buf);
|
BlockNumber lastBlockVacuumed = BufferGetBlockNumber(buf);
|
||||||
|
|
||||||
_bt_delitems(rel, buf, deletable, ndeletable, true, vstate->lastBlockVacuumed);
|
_bt_delitems_vacuum(rel, buf, deletable, ndeletable, vstate->lastBlockVacuumed);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Keep track of the block number of the lastBlockVacuumed, so we
|
* Keep track of the block number of the lastBlockVacuumed, so we
|
||||||
|
|
|
@ -8,7 +8,7 @@
|
||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.63 2010/03/19 10:41:22 sriggs Exp $
|
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.64 2010/03/28 09:27:01 sriggs Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
|
@ -553,6 +553,139 @@ btree_xlog_vacuum(XLogRecPtr lsn, XLogRecord *record)
|
||||||
UnlockReleaseBuffer(buffer);
|
UnlockReleaseBuffer(buffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Get the latestRemovedXid from the heap pages pointed at by the index
|
||||||
|
* tuples being deleted. This puts the work for calculating latestRemovedXid
|
||||||
|
* into the recovery path rather than the primary path.
|
||||||
|
*
|
||||||
|
* It's possible that this generates a fair amount of I/O, since an index
|
||||||
|
* block may have hundreds of tuples being deleted. Repeat accesses to the
|
||||||
|
* same heap blocks are common, though are not yet optimised.
|
||||||
|
*
|
||||||
|
* XXX optimise later with something like XLogPrefetchBuffer()
|
||||||
|
*/
|
||||||
|
static TransactionId
|
||||||
|
btree_xlog_delete_get_latestRemovedXid(XLogRecord *record)
|
||||||
|
{
|
||||||
|
OffsetNumber *unused;
|
||||||
|
Buffer ibuffer, hbuffer;
|
||||||
|
Page ipage, hpage;
|
||||||
|
ItemId iitemid, hitemid;
|
||||||
|
IndexTuple itup;
|
||||||
|
HeapTupleHeader htuphdr;
|
||||||
|
BlockNumber hblkno;
|
||||||
|
OffsetNumber hoffnum;
|
||||||
|
TransactionId latestRemovedXid = InvalidTransactionId;
|
||||||
|
TransactionId htupxid = InvalidTransactionId;
|
||||||
|
int i;
|
||||||
|
int num_unused, num_redirect, num_dead;
|
||||||
|
|
||||||
|
xl_btree_delete *xlrec = (xl_btree_delete *) XLogRecGetData(record);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Get index page
|
||||||
|
*/
|
||||||
|
ibuffer = XLogReadBuffer(xlrec->node, xlrec->block, false);
|
||||||
|
if (!BufferIsValid(ibuffer))
|
||||||
|
return InvalidTransactionId;
|
||||||
|
ipage = (Page) BufferGetPage(ibuffer);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Loop through the deleted index items to obtain the TransactionId
|
||||||
|
* from the heap items they point to.
|
||||||
|
*/
|
||||||
|
unused = (OffsetNumber *) ((char *) xlrec + SizeOfBtreeDelete);
|
||||||
|
|
||||||
|
for (i = 0; i < xlrec->nitems; i++)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Identify the index tuple about to be deleted
|
||||||
|
*/
|
||||||
|
iitemid = PageGetItemId(ipage, unused[i]);
|
||||||
|
itup = (IndexTuple) PageGetItem(ipage, iitemid);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Locate the heap page that the index tuple points at
|
||||||
|
*/
|
||||||
|
hblkno = ItemPointerGetBlockNumber(&(itup->t_tid));
|
||||||
|
hbuffer = XLogReadBuffer(xlrec->hnode, hblkno, false);
|
||||||
|
if (!BufferIsValid(hbuffer))
|
||||||
|
{
|
||||||
|
UnlockReleaseBuffer(ibuffer);
|
||||||
|
return InvalidTransactionId;
|
||||||
|
}
|
||||||
|
hpage = (Page) BufferGetPage(hbuffer);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Look up the heap tuple header that the index tuple points at
|
||||||
|
* by using the heap node supplied with the xlrec. We can't use
|
||||||
|
* heap_fetch, since it uses ReadBuffer rather than XLogReadBuffer.
|
||||||
|
* Note that we are not looking at tuple data here, just headers.
|
||||||
|
*/
|
||||||
|
hoffnum = ItemPointerGetOffsetNumber(&(itup->t_tid));
|
||||||
|
hitemid = PageGetItemId(hpage, hoffnum);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Follow any redirections until we find something useful.
|
||||||
|
*/
|
||||||
|
while (ItemIdIsRedirected(hitemid))
|
||||||
|
{
|
||||||
|
num_redirect++;
|
||||||
|
hoffnum = ItemIdGetRedirect(hitemid);
|
||||||
|
hitemid = PageGetItemId(hpage, hoffnum);
|
||||||
|
CHECK_FOR_INTERRUPTS();
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If the heap item has storage, then read the header. Some LP_DEAD
|
||||||
|
* items may not be accessible, so we ignore them.
|
||||||
|
*/
|
||||||
|
if (ItemIdHasStorage(hitemid))
|
||||||
|
{
|
||||||
|
htuphdr = (HeapTupleHeader) PageGetItem(hpage, hitemid);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Get the heap tuple's xmin/xmax and ratchet up the latestRemovedXid.
|
||||||
|
* No need to consider xvac values here.
|
||||||
|
*/
|
||||||
|
htupxid = HeapTupleHeaderGetXmin(htuphdr);
|
||||||
|
if (TransactionIdFollows(htupxid, latestRemovedXid))
|
||||||
|
latestRemovedXid = htupxid;
|
||||||
|
|
||||||
|
htupxid = HeapTupleHeaderGetXmax(htuphdr);
|
||||||
|
if (TransactionIdFollows(htupxid, latestRemovedXid))
|
||||||
|
latestRemovedXid = htupxid;
|
||||||
|
}
|
||||||
|
else if (ItemIdIsDead(hitemid))
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Conjecture: if hitemid is dead then it had xids before the xids
|
||||||
|
* marked on LP_NORMAL items. So we just ignore this item and move
|
||||||
|
* onto the next, for the purposes of calculating latestRemovedxids.
|
||||||
|
*/
|
||||||
|
num_dead++;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Assert(!ItemIdIsUsed(hitemid));
|
||||||
|
num_unused++;
|
||||||
|
}
|
||||||
|
|
||||||
|
UnlockReleaseBuffer(hbuffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
UnlockReleaseBuffer(ibuffer);
|
||||||
|
|
||||||
|
Assert(num_unused == 0);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Note that if all heap tuples were LP_DEAD then we will be
|
||||||
|
* returning InvalidTransactionId here. This seems very unlikely
|
||||||
|
* in practice.
|
||||||
|
*/
|
||||||
|
return latestRemovedXid;
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
btree_xlog_delete(XLogRecPtr lsn, XLogRecord *record)
|
btree_xlog_delete(XLogRecPtr lsn, XLogRecord *record)
|
||||||
{
|
{
|
||||||
|
@ -584,12 +717,10 @@ btree_xlog_delete(XLogRecPtr lsn, XLogRecord *record)
|
||||||
if (record->xl_len > SizeOfBtreeDelete)
|
if (record->xl_len > SizeOfBtreeDelete)
|
||||||
{
|
{
|
||||||
OffsetNumber *unused;
|
OffsetNumber *unused;
|
||||||
OffsetNumber *unend;
|
|
||||||
|
|
||||||
unused = (OffsetNumber *) ((char *) xlrec + SizeOfBtreeDelete);
|
unused = (OffsetNumber *) ((char *) xlrec + SizeOfBtreeDelete);
|
||||||
unend = (OffsetNumber *) ((char *) xlrec + record->xl_len);
|
|
||||||
|
|
||||||
PageIndexMultiDelete(page, unused, unend - unused);
|
PageIndexMultiDelete(page, unused, xlrec->nitems);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -830,6 +961,7 @@ btree_redo(XLogRecPtr lsn, XLogRecord *record)
|
||||||
* from individual btree vacuum records on that index.
|
* from individual btree vacuum records on that index.
|
||||||
*/
|
*/
|
||||||
{
|
{
|
||||||
|
TransactionId latestRemovedXid = btree_xlog_delete_get_latestRemovedXid(record);
|
||||||
xl_btree_delete *xlrec = (xl_btree_delete *) XLogRecGetData(record);
|
xl_btree_delete *xlrec = (xl_btree_delete *) XLogRecGetData(record);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -839,7 +971,7 @@ btree_redo(XLogRecPtr lsn, XLogRecord *record)
|
||||||
* here is worth some thought and possibly some effort to
|
* here is worth some thought and possibly some effort to
|
||||||
* improve.
|
* improve.
|
||||||
*/
|
*/
|
||||||
ResolveRecoveryConflictWithSnapshot(xlrec->latestRemovedXid, xlrec->node);
|
ResolveRecoveryConflictWithSnapshot(latestRemovedXid, xlrec->node);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -1012,10 +1144,10 @@ btree_desc(StringInfo buf, uint8 xl_info, char *rec)
|
||||||
{
|
{
|
||||||
xl_btree_delete *xlrec = (xl_btree_delete *) rec;
|
xl_btree_delete *xlrec = (xl_btree_delete *) rec;
|
||||||
|
|
||||||
appendStringInfo(buf, "delete: rel %u/%u/%u; blk %u, latestRemovedXid %u",
|
appendStringInfo(buf, "delete: index %u/%u/%u; iblk %u, heap %u/%u/%u;",
|
||||||
xlrec->node.spcNode, xlrec->node.dbNode,
|
xlrec->node.spcNode, xlrec->node.dbNode, xlrec->node.relNode,
|
||||||
xlrec->node.relNode, xlrec->block,
|
xlrec->block,
|
||||||
xlrec->latestRemovedXid);
|
xlrec->hnode.spcNode, xlrec->hnode.dbNode, xlrec->hnode.relNode);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case XLOG_BTREE_DELETE_PAGE:
|
case XLOG_BTREE_DELETE_PAGE:
|
||||||
|
|
|
@ -7,7 +7,7 @@
|
||||||
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
|
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
|
||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* $PostgreSQL: pgsql/src/include/access/nbtree.h,v 1.133 2010/03/20 07:49:48 sriggs Exp $
|
* $PostgreSQL: pgsql/src/include/access/nbtree.h,v 1.134 2010/03/28 09:27:02 sriggs Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
|
@ -314,14 +314,15 @@ typedef struct xl_btree_split
|
||||||
*/
|
*/
|
||||||
typedef struct xl_btree_delete
|
typedef struct xl_btree_delete
|
||||||
{
|
{
|
||||||
RelFileNode node;
|
RelFileNode node; /* RelFileNode of the index */
|
||||||
BlockNumber block;
|
BlockNumber block;
|
||||||
TransactionId latestRemovedXid;
|
RelFileNode hnode; /* RelFileNode of the heap the index currently points at */
|
||||||
|
int nitems;
|
||||||
|
|
||||||
/* TARGET OFFSET NUMBERS FOLLOW AT THE END */
|
/* TARGET OFFSET NUMBERS FOLLOW AT THE END */
|
||||||
} xl_btree_delete;
|
} xl_btree_delete;
|
||||||
|
|
||||||
#define SizeOfBtreeDelete (offsetof(xl_btree_delete, latestRemovedXid) + sizeof(TransactionId))
|
#define SizeOfBtreeDelete (offsetof(xl_btree_delete, nitems) + sizeof(int))
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This is what we need to know about page reuse within btree.
|
* This is what we need to know about page reuse within btree.
|
||||||
|
@ -349,13 +350,12 @@ typedef struct xl_btree_reuse_page
|
||||||
* heap tuples.
|
* heap tuples.
|
||||||
*
|
*
|
||||||
* Any changes to any one block are registered on just one WAL record. All
|
* Any changes to any one block are registered on just one WAL record. All
|
||||||
* blocks that we need to run EnsureBlockUnpinned() before we touch the changed
|
* blocks that we need to run EnsureBlockUnpinned() are listed as a block range
|
||||||
* block are also given on this record as a variable length array. The array
|
* starting from the last block vacuumed through until this one. Individual
|
||||||
* is compressed by way of storing an array of block ranges, rather than an
|
* block numbers aren't given.
|
||||||
* actual array of blockids.
|
|
||||||
*
|
*
|
||||||
* Note that the *last* WAL record in any vacuum of an index is allowed to
|
* Note that the *last* WAL record in any vacuum of an index is allowed to
|
||||||
* have numItems == 0. All other WAL records must have numItems > 0.
|
* have a zero length array of offsets. Earlier records must have at least one.
|
||||||
*/
|
*/
|
||||||
typedef struct xl_btree_vacuum
|
typedef struct xl_btree_vacuum
|
||||||
{
|
{
|
||||||
|
@ -588,9 +588,10 @@ extern Buffer _bt_relandgetbuf(Relation rel, Buffer obuf,
|
||||||
extern void _bt_relbuf(Relation rel, Buffer buf);
|
extern void _bt_relbuf(Relation rel, Buffer buf);
|
||||||
extern void _bt_pageinit(Page page, Size size);
|
extern void _bt_pageinit(Page page, Size size);
|
||||||
extern bool _bt_page_recyclable(Page page);
|
extern bool _bt_page_recyclable(Page page);
|
||||||
extern void _bt_delitems(Relation rel, Buffer buf,
|
extern void _bt_delitems_delete(Relation rel, Buffer buf,
|
||||||
OffsetNumber *itemnos, int nitems, bool isVacuum,
|
OffsetNumber *itemnos, int nitems, Relation heapRel);
|
||||||
BlockNumber lastBlockVacuumed);
|
extern void _bt_delitems_vacuum(Relation rel, Buffer buf,
|
||||||
|
OffsetNumber *itemnos, int nitems, BlockNumber lastBlockVacuumed);
|
||||||
extern int _bt_pagedel(Relation rel, Buffer buf, BTStack stack);
|
extern int _bt_pagedel(Relation rel, Buffer buf, BTStack stack);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -11,7 +11,7 @@
|
||||||
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
|
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
|
||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* $PostgreSQL: pgsql/src/include/access/xlog_internal.h,v 1.30 2010/03/19 17:42:10 sriggs Exp $
|
* $PostgreSQL: pgsql/src/include/access/xlog_internal.h,v 1.31 2010/03/28 09:27:02 sriggs Exp $
|
||||||
*/
|
*/
|
||||||
#ifndef XLOG_INTERNAL_H
|
#ifndef XLOG_INTERNAL_H
|
||||||
#define XLOG_INTERNAL_H
|
#define XLOG_INTERNAL_H
|
||||||
|
@ -71,7 +71,7 @@ typedef struct XLogContRecord
|
||||||
/*
|
/*
|
||||||
* Each page of XLOG file has a header like this:
|
* Each page of XLOG file has a header like this:
|
||||||
*/
|
*/
|
||||||
#define XLOG_PAGE_MAGIC 0x9002 /* can be used as WAL version indicator */
|
#define XLOG_PAGE_MAGIC 0x9003 /* can be used as WAL version indicator */
|
||||||
|
|
||||||
typedef struct XLogPageHeaderData
|
typedef struct XLogPageHeaderData
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in New Issue