From fafa374f2d1e04ab265d56cdadb634124364646f Mon Sep 17 00:00:00 2001 From: Simon Riggs Date: Sat, 13 Feb 2010 00:59:58 +0000 Subject: [PATCH] Introduce WAL records to log reuse of btree pages, allowing conflict resolution during Hot Standby. Page reuse interlock requested by Tom. Analysis and patch by me. --- src/backend/access/nbtree/nbtpage.c | 58 +++++++++++++++++++++++++++- src/backend/access/nbtree/nbtxlog.c | 60 ++++++++++++++++++++--------- src/include/access/nbtree.h | 15 +++++++- 3 files changed, 111 insertions(+), 22 deletions(-) diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index b0eff770d0..5df975e4ec 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.118 2010/02/08 04:33:53 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.119 2010/02/13 00:59:58 sriggs Exp $ * * NOTES * Postgres btree pages look like ordinary relation pages. The opaque @@ -446,6 +446,48 @@ _bt_checkpage(Relation rel, Buffer buf) errhint("Please REINDEX it."))); } +/* + * Log the reuse of a page from the FSM. + */ +static void +_bt_log_reuse_page(Relation rel, BlockNumber blkno, TransactionId latestRemovedXid) +{ + if (rel->rd_istemp) + return; + + /* No ereport(ERROR) until changes are logged */ + START_CRIT_SECTION(); + + /* + * We don't do MarkBufferDirty here because we're about initialise + * the page, and nobody else can see it yet. + */ + + /* XLOG stuff */ + { + XLogRecPtr recptr; + XLogRecData rdata[1]; + xl_btree_reuse_page xlrec_reuse; + + xlrec_reuse.node = rel->rd_node; + xlrec_reuse.block = blkno; + xlrec_reuse.latestRemovedXid = latestRemovedXid; + rdata[0].data = (char *) &xlrec_reuse; + rdata[0].len = SizeOfBtreeReusePage; + rdata[0].buffer = InvalidBuffer; + rdata[0].next = NULL; + + recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_REUSE_PAGE, rdata); + + /* + * We don't do PageSetLSN or PageSetTLI here because + * we're about initialise the page, so no need. + */ + } + + END_CRIT_SECTION(); +} + /* * _bt_getbuf() -- Get a buffer by block number for read or write. * @@ -510,7 +552,19 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access) { page = BufferGetPage(buf); if (_bt_page_recyclable(page)) - { + { + /* + * If we are generating WAL for Hot Standby then create + * a WAL record that will allow us to conflict with + * queries running on standby. + */ + if (XLogStandbyInfoActive()) + { + BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page); + + _bt_log_reuse_page(rel, blkno, opaque->btpo.xact); + } + /* Okay to use page. Re-initialize and return it */ _bt_pageinit(page, BufferGetPageSize(buf)); return buf; diff --git a/src/backend/access/nbtree/nbtxlog.c b/src/backend/access/nbtree/nbtxlog.c index 83a7c98c14..f5320fb103 100644 --- a/src/backend/access/nbtree/nbtxlog.c +++ b/src/backend/access/nbtree/nbtxlog.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.60 2010/02/08 04:33:53 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.61 2010/02/13 00:59:58 sriggs Exp $ * *------------------------------------------------------------------------- */ @@ -814,26 +814,48 @@ btree_redo(XLogRecPtr lsn, XLogRecord *record) { uint8 info = record->xl_info & ~XLR_INFO_MASK; - /* - * Btree delete records can conflict with standby queries. You might - * think that vacuum records would conflict as well, but we've handled - * that already. XLOG_HEAP2_CLEANUP_INFO records provide the highest xid - * cleaned by the vacuum of the heap and so we can resolve any conflicts - * just once when that arrives. After that any we know that no conflicts - * exist from individual btree vacuum records on that index. - */ - if (InHotStandby && info == XLOG_BTREE_DELETE) + if (InHotStandby) { - xl_btree_delete *xlrec = (xl_btree_delete *) XLogRecGetData(record); + switch (info) + { + case XLOG_BTREE_DELETE: + /* + * Btree delete records can conflict with standby queries. You might + * think that vacuum records would conflict as well, but we've handled + * that already. XLOG_HEAP2_CLEANUP_INFO records provide the highest xid + * cleaned by the vacuum of the heap and so we can resolve any conflicts + * just once when that arrives. After that any we know that no conflicts + * exist from individual btree vacuum records on that index. + */ + { + xl_btree_delete *xlrec = (xl_btree_delete *) XLogRecGetData(record); - /* - * XXX Currently we put everybody on death row, because - * currently _bt_delitems() supplies InvalidTransactionId. - * This can be fairly painful, so providing a better value - * here is worth some thought and possibly some effort to - * improve. - */ - ResolveRecoveryConflictWithSnapshot(xlrec->latestRemovedXid, xlrec->node); + /* + * XXX Currently we put everybody on death row, because + * currently _bt_delitems() supplies InvalidTransactionId. + * This can be fairly painful, so providing a better value + * here is worth some thought and possibly some effort to + * improve. + */ + ResolveRecoveryConflictWithSnapshot(xlrec->latestRemovedXid, xlrec->node); + } + break; + + case XLOG_BTREE_REUSE_PAGE: + /* + * Btree reuse page records exist to provide a conflict point when we + * reuse pages in the index via the FSM. That's all it does though. + */ + { + xl_btree_reuse_page *xlrec = (xl_btree_reuse_page *) XLogRecGetData(record); + + ResolveRecoveryConflictWithSnapshot(xlrec->latestRemovedXid, xlrec->node); + } + return; + + default: + break; + } } /* diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h index acbb0cbc7d..f3898a4140 100644 --- a/src/include/access/nbtree.h +++ b/src/include/access/nbtree.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/nbtree.h,v 1.128 2010/02/08 04:33:54 tgl Exp $ + * $PostgreSQL: pgsql/src/include/access/nbtree.h,v 1.129 2010/02/13 00:59:58 sriggs Exp $ * *------------------------------------------------------------------------- */ @@ -221,6 +221,7 @@ typedef struct BTMetaPageData #define XLOG_BTREE_DELETE_PAGE_HALF 0xB0 /* page deletion that makes * parent half-dead */ #define XLOG_BTREE_VACUUM 0xC0 /* delete entries on a page during vacuum */ +#define XLOG_BTREE_REUSE_PAGE 0xD0 /* old page is about to be reused from FSM */ /* * All that we need to find changed index tuple @@ -321,6 +322,18 @@ typedef struct xl_btree_delete #define SizeOfBtreeDelete (offsetof(xl_btree_delete, latestRemovedXid) + sizeof(TransactionId)) +/* + * This is what we need to know about page reuse within btree. + */ +typedef struct xl_btree_reuse_page +{ + RelFileNode node; + BlockNumber block; + TransactionId latestRemovedXid; +} xl_btree_reuse_page; + +#define SizeOfBtreeReusePage (sizeof(xl_btree_reuse_page)) + /* * This is what we need to know about vacuum of individual leaf index tuples. * The WAL record can represent deletion of any number of index tuples on a