Introduce WAL records to log reuse of btree pages, allowing conflict

resolution during Hot Standby. Page reuse interlock requested by Tom.
Analysis and patch by me.
This commit is contained in:
Simon Riggs 2010-02-13 00:59:58 +00:00
parent 4688869f41
commit fafa374f2d
3 changed files with 111 additions and 22 deletions

View File

@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.118 2010/02/08 04:33:53 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.119 2010/02/13 00:59:58 sriggs Exp $
*
* NOTES
* Postgres btree pages look like ordinary relation pages. The opaque
@ -446,6 +446,48 @@ _bt_checkpage(Relation rel, Buffer buf)
errhint("Please REINDEX it.")));
}
/*
* Log the reuse of a page from the FSM.
*/
static void
_bt_log_reuse_page(Relation rel, BlockNumber blkno, TransactionId latestRemovedXid)
{
if (rel->rd_istemp)
return;
/* No ereport(ERROR) until changes are logged */
START_CRIT_SECTION();
/*
* We don't do MarkBufferDirty here because we're about initialise
* the page, and nobody else can see it yet.
*/
/* XLOG stuff */
{
XLogRecPtr recptr;
XLogRecData rdata[1];
xl_btree_reuse_page xlrec_reuse;
xlrec_reuse.node = rel->rd_node;
xlrec_reuse.block = blkno;
xlrec_reuse.latestRemovedXid = latestRemovedXid;
rdata[0].data = (char *) &xlrec_reuse;
rdata[0].len = SizeOfBtreeReusePage;
rdata[0].buffer = InvalidBuffer;
rdata[0].next = NULL;
recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_REUSE_PAGE, rdata);
/*
* We don't do PageSetLSN or PageSetTLI here because
* we're about initialise the page, so no need.
*/
}
END_CRIT_SECTION();
}
/*
* _bt_getbuf() -- Get a buffer by block number for read or write.
*
@ -510,7 +552,19 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access)
{
page = BufferGetPage(buf);
if (_bt_page_recyclable(page))
{
{
/*
* If we are generating WAL for Hot Standby then create
* a WAL record that will allow us to conflict with
* queries running on standby.
*/
if (XLogStandbyInfoActive())
{
BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page);
_bt_log_reuse_page(rel, blkno, opaque->btpo.xact);
}
/* Okay to use page. Re-initialize and return it */
_bt_pageinit(page, BufferGetPageSize(buf));
return buf;

View File

@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.60 2010/02/08 04:33:53 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.61 2010/02/13 00:59:58 sriggs Exp $
*
*-------------------------------------------------------------------------
*/
@ -814,26 +814,48 @@ btree_redo(XLogRecPtr lsn, XLogRecord *record)
{
uint8 info = record->xl_info & ~XLR_INFO_MASK;
/*
* Btree delete records can conflict with standby queries. You might
* think that vacuum records would conflict as well, but we've handled
* that already. XLOG_HEAP2_CLEANUP_INFO records provide the highest xid
* cleaned by the vacuum of the heap and so we can resolve any conflicts
* just once when that arrives. After that any we know that no conflicts
* exist from individual btree vacuum records on that index.
*/
if (InHotStandby && info == XLOG_BTREE_DELETE)
if (InHotStandby)
{
xl_btree_delete *xlrec = (xl_btree_delete *) XLogRecGetData(record);
switch (info)
{
case XLOG_BTREE_DELETE:
/*
* Btree delete records can conflict with standby queries. You might
* think that vacuum records would conflict as well, but we've handled
* that already. XLOG_HEAP2_CLEANUP_INFO records provide the highest xid
* cleaned by the vacuum of the heap and so we can resolve any conflicts
* just once when that arrives. After that any we know that no conflicts
* exist from individual btree vacuum records on that index.
*/
{
xl_btree_delete *xlrec = (xl_btree_delete *) XLogRecGetData(record);
/*
* XXX Currently we put everybody on death row, because
* currently _bt_delitems() supplies InvalidTransactionId.
* This can be fairly painful, so providing a better value
* here is worth some thought and possibly some effort to
* improve.
*/
ResolveRecoveryConflictWithSnapshot(xlrec->latestRemovedXid, xlrec->node);
/*
* XXX Currently we put everybody on death row, because
* currently _bt_delitems() supplies InvalidTransactionId.
* This can be fairly painful, so providing a better value
* here is worth some thought and possibly some effort to
* improve.
*/
ResolveRecoveryConflictWithSnapshot(xlrec->latestRemovedXid, xlrec->node);
}
break;
case XLOG_BTREE_REUSE_PAGE:
/*
* Btree reuse page records exist to provide a conflict point when we
* reuse pages in the index via the FSM. That's all it does though.
*/
{
xl_btree_reuse_page *xlrec = (xl_btree_reuse_page *) XLogRecGetData(record);
ResolveRecoveryConflictWithSnapshot(xlrec->latestRemovedXid, xlrec->node);
}
return;
default:
break;
}
}
/*

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/access/nbtree.h,v 1.128 2010/02/08 04:33:54 tgl Exp $
* $PostgreSQL: pgsql/src/include/access/nbtree.h,v 1.129 2010/02/13 00:59:58 sriggs Exp $
*
*-------------------------------------------------------------------------
*/
@ -221,6 +221,7 @@ typedef struct BTMetaPageData
#define XLOG_BTREE_DELETE_PAGE_HALF 0xB0 /* page deletion that makes
* parent half-dead */
#define XLOG_BTREE_VACUUM 0xC0 /* delete entries on a page during vacuum */
#define XLOG_BTREE_REUSE_PAGE 0xD0 /* old page is about to be reused from FSM */
/*
* All that we need to find changed index tuple
@ -321,6 +322,18 @@ typedef struct xl_btree_delete
#define SizeOfBtreeDelete (offsetof(xl_btree_delete, latestRemovedXid) + sizeof(TransactionId))
/*
* This is what we need to know about page reuse within btree.
*/
typedef struct xl_btree_reuse_page
{
RelFileNode node;
BlockNumber block;
TransactionId latestRemovedXid;
} xl_btree_reuse_page;
#define SizeOfBtreeReusePage (sizeof(xl_btree_reuse_page))
/*
* This is what we need to know about vacuum of individual leaf index tuples.
* The WAL record can represent deletion of any number of index tuples on a