Update FSM on WAL replay. This is a bit limited; the FSM is only updated

on non-full-page-image WAL records, and quite arbitrarily, only if there's
less than 20% free space on the page after the insert/update (not on HOT
updates, though). The 20% cutoff should avoid most of the overhead, when
replaying a bulk insertion, for example, while ensuring that pages that
are full are marked as full in the FSM.

This is mostly to avoid the nasty worst case scenario, where you replay
from a PITR archive, and the FSM information in the base backup is really
out of date. If there was a lot of pages that the outdated FSM claims to
have free space, but don't actually have any, the first unlucky inserter
after the recovery would traverse through all those pages, just to find
out that they're full. We didn't have this problem with the old FSM
implementation, because we simply threw the FSM information away on a
non-clean shutdown.
This commit is contained in:
Heikki Linnakangas 2008-10-31 19:40:27 +00:00
parent 9b46abb7c4
commit e9816533e3
3 changed files with 92 additions and 9 deletions

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.267 2008/10/31 15:04:59 heikki Exp $
* $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.268 2008/10/31 19:40:26 heikki Exp $
*
*
* INTERFACE ROUTINES
@ -54,6 +54,7 @@
#include "miscadmin.h"
#include "pgstat.h"
#include "storage/bufmgr.h"
#include "storage/freespace.h"
#include "storage/lmgr.h"
#include "storage/procarray.h"
#include "storage/smgr.h"
@ -4022,6 +4023,7 @@ heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record, bool clean_move)
int nredirected;
int ndead;
int nunused;
Size freespace;
if (record->xl_info & XLR_BKP_BLOCK_1)
return;
@ -4053,6 +4055,8 @@ heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record, bool clean_move)
nowunused, nunused,
clean_move);
freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
/*
* Note: we don't worry about updating the page's prunability hints.
* At worst this will cause an extra prune cycle to occur soon.
@ -4062,6 +4066,15 @@ heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record, bool clean_move)
PageSetTLI(page, ThisTimeLineID);
MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
/*
* Update the FSM as well.
*
* XXX: We don't get here if the page was restored from full page image.
* We don't bother to update the FSM in that case, it doesn't need to be
* totally accurate anyway.
*/
XLogRecordPageWithFreeSpace(xlrec->node, xlrec->block, freespace);
}
static void
@ -4205,15 +4218,17 @@ heap_xlog_insert(XLogRecPtr lsn, XLogRecord *record)
HeapTupleHeader htup;
xl_heap_header xlhdr;
uint32 newlen;
Size freespace;
BlockNumber blkno;
if (record->xl_info & XLR_BKP_BLOCK_1)
return;
blkno = ItemPointerGetBlockNumber(&(xlrec->target.tid));
if (record->xl_info & XLOG_HEAP_INIT_PAGE)
{
buffer = XLogReadBuffer(xlrec->target.node,
ItemPointerGetBlockNumber(&(xlrec->target.tid)),
true);
buffer = XLogReadBuffer(xlrec->target.node, blkno, true);
Assert(BufferIsValid(buffer));
page = (Page) BufferGetPage(buffer);
@ -4221,9 +4236,7 @@ heap_xlog_insert(XLogRecPtr lsn, XLogRecord *record)
}
else
{
buffer = XLogReadBuffer(xlrec->target.node,
ItemPointerGetBlockNumber(&(xlrec->target.tid)),
false);
buffer = XLogReadBuffer(xlrec->target.node, blkno, false);
if (!BufferIsValid(buffer))
return;
page = (Page) BufferGetPage(buffer);
@ -4261,10 +4274,25 @@ heap_xlog_insert(XLogRecPtr lsn, XLogRecord *record)
offnum = PageAddItem(page, (Item) htup, newlen, offnum, true, true);
if (offnum == InvalidOffsetNumber)
elog(PANIC, "heap_insert_redo: failed to add tuple");
freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
PageSetLSN(page, lsn);
PageSetTLI(page, ThisTimeLineID);
MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
/*
* If the page is running low on free space, update the FSM as well.
* Arbitrarily, our definition of "low" is less than 20%. We can't do
* much better than that without knowing the fill-factor for the table.
*
* XXX: We don't get here if the page was restored from full page image.
* We don't bother to update the FSM in that case, it doesn't need to be
* totally accurate anyway.
*/
if (freespace < BLCKSZ / 5)
XLogRecordPageWithFreeSpace(xlrec->target.node, blkno, freespace);
}
/*
@ -4289,6 +4317,7 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool move, bool hot_update)
xl_heap_header xlhdr;
int hsize;
uint32 newlen;
Size freespace;
if (record->xl_info & XLR_BKP_BLOCK_1)
{
@ -4446,10 +4475,32 @@ newsame:;
offnum = PageAddItem(page, (Item) htup, newlen, offnum, true, true);
if (offnum == InvalidOffsetNumber)
elog(PANIC, "heap_update_redo: failed to add tuple");
freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
PageSetLSN(page, lsn);
PageSetTLI(page, ThisTimeLineID);
MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
/*
* If the page is running low on free space, update the FSM as well.
* Arbitrarily, our definition of "low" is less than 20%. We can't do
* much better than that without knowing the fill-factor for the table.
*
* However, don't update the FSM on HOT updates, because after crash
* recovery, either the old or the new tuple will certainly be dead and
* prunable. After pruning, the page will have roughly as much free space
* as it did before the update, assuming the new tuple is about the same
* size as the old one.
*
* XXX: We don't get here if the page was restored from full page image.
* We don't bother to update the FSM in that case, it doesn't need to be
* totally accurate anyway.
*/
if (!hot_update && freespace < BLCKSZ / 5)
XLogRecordPageWithFreeSpace(xlrec->target.node,
ItemPointerGetBlockNumber(&(xlrec->newtid)), freespace);
}
static void

View File

@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/storage/freespace/freespace.c,v 1.65 2008/10/31 15:05:00 heikki Exp $
* $PostgreSQL: pgsql/src/backend/storage/freespace/freespace.c,v 1.66 2008/10/31 19:40:27 heikki Exp $
*
*
* NOTES:
@ -202,6 +202,36 @@ RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail)
fsm_set_and_search(rel, addr, slot, new_cat, 0);
}
/*
* XLogRecordPageWithFreeSpace - like RecordPageWithFreeSpace, for use in
* WAL replay
*/
void
XLogRecordPageWithFreeSpace(RelFileNode rnode, BlockNumber heapBlk,
Size spaceAvail)
{
int new_cat = fsm_space_avail_to_cat(spaceAvail);
FSMAddress addr;
uint16 slot;
BlockNumber blkno;
Buffer buf;
Page page;
/* Get the location of the FSM byte representing the heap block */
addr = fsm_get_location(heapBlk, &slot);
blkno = fsm_logical_to_physical(addr);
/* If the page doesn't exist already, extend */
buf = XLogReadBufferExtended(rnode, FSM_FORKNUM, blkno, RBM_ZERO_ON_ERROR);
page = BufferGetPage(buf);
if (PageIsNew(page))
PageInit(page, BLCKSZ, 0);
if (fsm_set_avail(page, slot, new_cat))
MarkBufferDirty(buf);
UnlockReleaseBuffer(buf);
}
/*
* GetRecordedFreePage - return the amount of free space on a particular page,
* according to the FSM.

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/storage/freespace.h,v 1.29 2008/09/30 10:52:13 heikki Exp $
* $PostgreSQL: pgsql/src/include/storage/freespace.h,v 1.30 2008/10/31 19:40:27 heikki Exp $
*
*-------------------------------------------------------------------------
*/
@ -27,6 +27,8 @@ extern BlockNumber RecordAndGetPageWithFreeSpace(Relation rel,
Size spaceNeeded);
extern void RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk,
Size spaceAvail);
extern void XLogRecordPageWithFreeSpace(RelFileNode rnode, BlockNumber heapBlk,
Size spaceAvail);
extern void FreeSpaceMapTruncateRel(Relation rel, BlockNumber nblocks);
extern void FreeSpaceMapVacuum(Relation rel);