2003-02-21 01:06:22 +01:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
|
|
|
* nbtxlog.c
|
|
|
|
* WAL replay logic for btrees.
|
|
|
|
*
|
|
|
|
*
|
2008-01-01 20:46:01 +01:00
|
|
|
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
|
2003-02-21 01:06:22 +01:00
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
2008-06-12 11:12:31 +02:00
|
|
|
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.52 2008/06/12 09:12:30 heikki Exp $
|
2003-02-21 01:06:22 +01:00
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
#include "postgres.h"
|
|
|
|
|
|
|
|
#include "access/nbtree.h"
|
2006-07-13 18:49:20 +02:00
|
|
|
#include "access/transam.h"
|
2008-05-12 02:00:54 +02:00
|
|
|
#include "storage/bufmgr.h"
|
2003-02-21 01:06:22 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* We must keep track of expected insertions due to page splits, and apply
|
|
|
|
* them manually if they are not seen in the WAL log during replay. This
|
|
|
|
* makes it safe for page insertion to be a multiple-WAL-action process.
|
|
|
|
*
|
2006-11-01 20:43:17 +01:00
|
|
|
* Similarly, deletion of an only child page and deletion of its parent page
|
|
|
|
* form multiple WAL log entries, and we have to be prepared to follow through
|
|
|
|
* with the deletion if the log ends between.
|
|
|
|
*
|
2003-02-21 01:06:22 +01:00
|
|
|
* The data structure is a simple linked list --- this should be good enough,
|
2006-11-01 20:43:17 +01:00
|
|
|
* since we don't expect a page split or multi deletion to remain incomplete
|
|
|
|
* for long. In any case we need to respect the order of operations.
|
2003-02-21 01:06:22 +01:00
|
|
|
*/
|
2006-11-01 20:43:17 +01:00
|
|
|
typedef struct bt_incomplete_action
|
2003-02-21 01:06:22 +01:00
|
|
|
{
|
|
|
|
RelFileNode node; /* the index */
|
2006-11-01 20:43:17 +01:00
|
|
|
bool is_split; /* T = pending split, F = pending delete */
|
|
|
|
/* these fields are for a split: */
|
|
|
|
bool is_root; /* we split the root */
|
2003-08-04 02:43:34 +02:00
|
|
|
BlockNumber leftblk; /* left half of split */
|
|
|
|
BlockNumber rightblk; /* right half of split */
|
2006-11-01 20:43:17 +01:00
|
|
|
/* these fields are for a delete: */
|
|
|
|
BlockNumber delblk; /* parent block to be deleted */
|
2007-11-15 23:25:18 +01:00
|
|
|
} bt_incomplete_action;
|
2003-02-21 01:06:22 +01:00
|
|
|
|
2006-11-01 20:43:17 +01:00
|
|
|
static List *incomplete_actions;
|
2003-02-21 01:06:22 +01:00
|
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
log_incomplete_split(RelFileNode node, BlockNumber leftblk,
|
|
|
|
BlockNumber rightblk, bool is_root)
|
|
|
|
{
|
2006-11-01 20:43:17 +01:00
|
|
|
bt_incomplete_action *action = palloc(sizeof(bt_incomplete_action));
|
|
|
|
|
|
|
|
action->node = node;
|
|
|
|
action->is_split = true;
|
|
|
|
action->is_root = is_root;
|
|
|
|
action->leftblk = leftblk;
|
|
|
|
action->rightblk = rightblk;
|
|
|
|
incomplete_actions = lappend(incomplete_actions, action);
|
2003-02-21 01:06:22 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2006-04-13 05:53:05 +02:00
|
|
|
forget_matching_split(RelFileNode node, BlockNumber downlink, bool is_root)
|
2003-02-21 01:06:22 +01:00
|
|
|
{
|
2004-05-26 06:41:50 +02:00
|
|
|
ListCell *l;
|
2003-02-21 01:06:22 +01:00
|
|
|
|
2006-11-01 20:43:17 +01:00
|
|
|
foreach(l, incomplete_actions)
|
2003-02-21 01:06:22 +01:00
|
|
|
{
|
2006-11-01 20:43:17 +01:00
|
|
|
bt_incomplete_action *action = (bt_incomplete_action *) lfirst(l);
|
2003-02-21 01:06:22 +01:00
|
|
|
|
2006-11-01 20:43:17 +01:00
|
|
|
if (RelFileNodeEquals(node, action->node) &&
|
|
|
|
action->is_split &&
|
|
|
|
downlink == action->rightblk)
|
2003-02-21 01:06:22 +01:00
|
|
|
{
|
2006-11-01 20:43:17 +01:00
|
|
|
if (is_root != action->is_root)
|
2006-04-01 01:32:07 +02:00
|
|
|
elog(LOG, "forget_matching_split: fishy is_root data (expected %d, got %d)",
|
2006-11-01 20:43:17 +01:00
|
|
|
action->is_root, is_root);
|
|
|
|
incomplete_actions = list_delete_ptr(incomplete_actions, action);
|
|
|
|
pfree(action);
|
|
|
|
break; /* need not look further */
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
log_incomplete_deletion(RelFileNode node, BlockNumber delblk)
|
|
|
|
{
|
|
|
|
bt_incomplete_action *action = palloc(sizeof(bt_incomplete_action));
|
|
|
|
|
|
|
|
action->node = node;
|
|
|
|
action->is_split = false;
|
|
|
|
action->delblk = delblk;
|
|
|
|
incomplete_actions = lappend(incomplete_actions, action);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
forget_matching_deletion(RelFileNode node, BlockNumber delblk)
|
|
|
|
{
|
|
|
|
ListCell *l;
|
|
|
|
|
|
|
|
foreach(l, incomplete_actions)
|
|
|
|
{
|
|
|
|
bt_incomplete_action *action = (bt_incomplete_action *) lfirst(l);
|
|
|
|
|
|
|
|
if (RelFileNodeEquals(node, action->node) &&
|
|
|
|
!action->is_split &&
|
|
|
|
delblk == action->delblk)
|
|
|
|
{
|
|
|
|
incomplete_actions = list_delete_ptr(incomplete_actions, action);
|
|
|
|
pfree(action);
|
2003-02-21 01:06:22 +01:00
|
|
|
break; /* need not look further */
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2006-04-13 05:53:05 +02:00
|
|
|
/*
|
|
|
|
* _bt_restore_page -- re-enter all the index tuples on a page
|
|
|
|
*
|
|
|
|
* The page is freshly init'd, and *from (length len) is a copy of what
|
|
|
|
* had been its upper part (pd_upper to pd_special). We assume that the
|
|
|
|
* tuples had been added to the page in item-number order, and therefore
|
|
|
|
* the one with highest item number appears first (lowest on the page).
|
|
|
|
*
|
|
|
|
* NOTE: the way this routine is coded, the rebuilt page will have the items
|
|
|
|
* in correct itemno sequence, but physically the opposite order from the
|
|
|
|
* original, because we insert them in the opposite of itemno order. This
|
|
|
|
* does not matter in any current btree code, but it's something to keep an
|
2007-04-11 22:47:38 +02:00
|
|
|
* eye on. Is it worth changing just on general principles? See also the
|
|
|
|
* notes in btree_xlog_split().
|
2006-04-13 05:53:05 +02:00
|
|
|
*/
|
2003-02-21 01:06:22 +01:00
|
|
|
static void
|
|
|
|
_bt_restore_page(Page page, char *from, int len)
|
|
|
|
{
|
2006-01-26 00:04:21 +01:00
|
|
|
IndexTupleData itupdata;
|
2003-02-21 01:06:22 +01:00
|
|
|
Size itemsz;
|
|
|
|
char *end = from + len;
|
|
|
|
|
|
|
|
for (; from < end;)
|
|
|
|
{
|
2006-01-26 00:04:21 +01:00
|
|
|
/* Need to copy tuple header due to alignment considerations */
|
|
|
|
memcpy(&itupdata, from, sizeof(IndexTupleData));
|
|
|
|
itemsz = IndexTupleDSize(itupdata);
|
2003-02-21 01:06:22 +01:00
|
|
|
itemsz = MAXALIGN(itemsz);
|
2007-09-20 19:56:33 +02:00
|
|
|
if (PageAddItem(page, (Item) from, itemsz, FirstOffsetNumber,
|
|
|
|
false, false) == InvalidOffsetNumber)
|
Wording cleanup for error messages. Also change can't -> cannot.
Standard English uses "may", "can", and "might" in different ways:
may - permission, "You may borrow my rake."
can - ability, "I can lift that log."
might - possibility, "It might rain today."
Unfortunately, in conversational English, their use is often mixed, as
in, "You may use this variable to do X", when in fact, "can" is a better
choice. Similarly, "It may crash" is better stated, "It might crash".
2007-02-01 20:10:30 +01:00
|
|
|
elog(PANIC, "_bt_restore_page: cannot add item to page");
|
2003-02-21 01:06:22 +01:00
|
|
|
from += itemsz;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2008-06-12 11:12:31 +02:00
|
|
|
_bt_restore_meta(RelFileNode rnode, XLogRecPtr lsn,
|
2003-02-21 01:06:22 +01:00
|
|
|
BlockNumber root, uint32 level,
|
2004-06-02 19:28:18 +02:00
|
|
|
BlockNumber fastroot, uint32 fastlevel)
|
2003-02-21 01:06:22 +01:00
|
|
|
{
|
|
|
|
Buffer metabuf;
|
|
|
|
Page metapg;
|
|
|
|
BTMetaPageData *md;
|
|
|
|
BTPageOpaque pageop;
|
|
|
|
|
2008-06-12 11:12:31 +02:00
|
|
|
metabuf = XLogReadBuffer(rnode, BTREE_METAPAGE, true);
|
2006-03-29 23:17:39 +02:00
|
|
|
Assert(BufferIsValid(metabuf));
|
2003-02-21 01:06:22 +01:00
|
|
|
metapg = BufferGetPage(metabuf);
|
2006-03-29 23:17:39 +02:00
|
|
|
|
2003-02-21 01:06:22 +01:00
|
|
|
_bt_pageinit(metapg, BufferGetPageSize(metabuf));
|
|
|
|
|
|
|
|
md = BTPageGetMeta(metapg);
|
2004-06-02 19:28:18 +02:00
|
|
|
md->btm_magic = BTREE_MAGIC;
|
2003-02-21 01:06:22 +01:00
|
|
|
md->btm_version = BTREE_VERSION;
|
|
|
|
md->btm_root = root;
|
|
|
|
md->btm_level = level;
|
|
|
|
md->btm_fastroot = fastroot;
|
|
|
|
md->btm_fastlevel = fastlevel;
|
|
|
|
|
|
|
|
pageop = (BTPageOpaque) PageGetSpecialPointer(metapg);
|
|
|
|
pageop->btpo_flags = BTP_META;
|
|
|
|
|
2005-06-02 07:55:29 +02:00
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* Set pd_lower just past the end of the metadata. This is not essential
|
|
|
|
* but it makes the page look compressible to xlog.c.
|
2005-06-02 07:55:29 +02:00
|
|
|
*/
|
|
|
|
((PageHeader) metapg)->pd_lower =
|
|
|
|
((char *) md + sizeof(BTMetaPageData)) - (char *) metapg;
|
|
|
|
|
2003-02-21 01:06:22 +01:00
|
|
|
PageSetLSN(metapg, lsn);
|
2004-07-22 00:31:26 +02:00
|
|
|
PageSetTLI(metapg, ThisTimeLineID);
|
2006-04-01 01:32:07 +02:00
|
|
|
MarkBufferDirty(metabuf);
|
|
|
|
UnlockReleaseBuffer(metabuf);
|
2003-02-21 01:06:22 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2005-06-06 19:01:25 +02:00
|
|
|
btree_xlog_insert(bool isleaf, bool ismeta,
|
2003-02-21 01:06:22 +01:00
|
|
|
XLogRecPtr lsn, XLogRecord *record)
|
|
|
|
{
|
|
|
|
xl_btree_insert *xlrec = (xl_btree_insert *) XLogRecGetData(record);
|
|
|
|
Buffer buffer;
|
|
|
|
Page page;
|
|
|
|
char *datapos;
|
|
|
|
int datalen;
|
|
|
|
xl_btree_metadata md;
|
2006-10-04 02:30:14 +02:00
|
|
|
BlockNumber downlink = 0;
|
2003-02-21 01:06:22 +01:00
|
|
|
|
|
|
|
datapos = (char *) xlrec + SizeOfBtreeInsert;
|
|
|
|
datalen = record->xl_len - SizeOfBtreeInsert;
|
2006-04-13 05:53:05 +02:00
|
|
|
if (!isleaf)
|
|
|
|
{
|
|
|
|
memcpy(&downlink, datapos, sizeof(BlockNumber));
|
|
|
|
datapos += sizeof(BlockNumber);
|
|
|
|
datalen -= sizeof(BlockNumber);
|
|
|
|
}
|
2003-02-21 01:06:22 +01:00
|
|
|
if (ismeta)
|
|
|
|
{
|
|
|
|
memcpy(&md, datapos, sizeof(xl_btree_metadata));
|
|
|
|
datapos += sizeof(xl_btree_metadata);
|
|
|
|
datalen -= sizeof(xl_btree_metadata);
|
|
|
|
}
|
|
|
|
|
2006-04-13 05:53:05 +02:00
|
|
|
if ((record->xl_info & XLR_BKP_BLOCK_1) && !ismeta && isleaf)
|
2003-02-21 01:06:22 +01:00
|
|
|
return; /* nothing to do */
|
|
|
|
|
2005-06-06 19:01:25 +02:00
|
|
|
if (!(record->xl_info & XLR_BKP_BLOCK_1))
|
2003-02-21 01:06:22 +01:00
|
|
|
{
|
2008-06-12 11:12:31 +02:00
|
|
|
buffer = XLogReadBuffer(xlrec->target.node,
|
2006-10-04 02:30:14 +02:00
|
|
|
ItemPointerGetBlockNumber(&(xlrec->target.tid)),
|
2006-03-29 23:17:39 +02:00
|
|
|
false);
|
|
|
|
if (BufferIsValid(buffer))
|
2003-02-21 01:06:22 +01:00
|
|
|
{
|
2006-03-29 23:17:39 +02:00
|
|
|
page = (Page) BufferGetPage(buffer);
|
2003-02-21 01:06:22 +01:00
|
|
|
|
2006-03-29 23:17:39 +02:00
|
|
|
if (XLByteLE(lsn, PageGetLSN(page)))
|
|
|
|
{
|
2006-04-01 01:32:07 +02:00
|
|
|
UnlockReleaseBuffer(buffer);
|
2006-03-29 23:17:39 +02:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (PageAddItem(page, (Item) datapos, datalen,
|
2006-10-04 02:30:14 +02:00
|
|
|
ItemPointerGetOffsetNumber(&(xlrec->target.tid)),
|
2007-09-20 19:56:33 +02:00
|
|
|
false, false) == InvalidOffsetNumber)
|
2006-03-29 23:17:39 +02:00
|
|
|
elog(PANIC, "btree_insert_redo: failed to add item");
|
|
|
|
|
|
|
|
PageSetLSN(page, lsn);
|
|
|
|
PageSetTLI(page, ThisTimeLineID);
|
2006-04-01 01:32:07 +02:00
|
|
|
MarkBufferDirty(buffer);
|
|
|
|
UnlockReleaseBuffer(buffer);
|
2006-03-29 23:17:39 +02:00
|
|
|
}
|
2003-02-21 01:06:22 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2005-06-06 19:01:25 +02:00
|
|
|
if (ismeta)
|
2008-06-12 11:12:31 +02:00
|
|
|
_bt_restore_meta(xlrec->target.node, lsn,
|
2005-06-06 19:01:25 +02:00
|
|
|
md.root, md.level,
|
|
|
|
md.fastroot, md.fastlevel);
|
2003-02-21 01:06:22 +01:00
|
|
|
|
|
|
|
/* Forget any split this insertion completes */
|
2006-04-13 05:53:05 +02:00
|
|
|
if (!isleaf)
|
|
|
|
forget_matching_split(xlrec->target.node, downlink, false);
|
2003-02-21 01:06:22 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2005-06-06 19:01:25 +02:00
|
|
|
btree_xlog_split(bool onleft, bool isroot,
|
2003-02-21 01:06:22 +01:00
|
|
|
XLogRecPtr lsn, XLogRecord *record)
|
|
|
|
{
|
|
|
|
xl_btree_split *xlrec = (xl_btree_split *) XLogRecGetData(record);
|
2007-04-11 22:47:38 +02:00
|
|
|
Buffer rbuf;
|
|
|
|
Page rpage;
|
|
|
|
BTPageOpaque ropaque;
|
2007-02-08 06:05:53 +01:00
|
|
|
char *datapos;
|
|
|
|
int datalen;
|
2007-04-11 22:47:38 +02:00
|
|
|
OffsetNumber newitemoff = 0;
|
2007-11-15 22:14:46 +01:00
|
|
|
Item newitem = NULL;
|
|
|
|
Size newitemsz = 0;
|
2007-11-16 20:53:50 +01:00
|
|
|
Item left_hikey = NULL;
|
|
|
|
Size left_hikeysz = 0;
|
2003-02-21 01:06:22 +01:00
|
|
|
|
2007-02-08 06:05:53 +01:00
|
|
|
datapos = (char *) xlrec + SizeOfBtreeSplit;
|
|
|
|
datalen = record->xl_len - SizeOfBtreeSplit;
|
2006-03-29 23:17:39 +02:00
|
|
|
|
2007-02-08 06:05:53 +01:00
|
|
|
/* Forget any split this insertion completes */
|
|
|
|
if (xlrec->level > 0)
|
|
|
|
{
|
2007-04-11 22:47:38 +02:00
|
|
|
/* we assume SizeOfBtreeSplit is at least 16-bit aligned */
|
2007-02-08 06:05:53 +01:00
|
|
|
BlockNumber downlink = BlockIdGetBlockNumber((BlockId) datapos);
|
2003-02-21 01:06:22 +01:00
|
|
|
|
2007-02-08 06:05:53 +01:00
|
|
|
datapos += sizeof(BlockIdData);
|
|
|
|
datalen -= sizeof(BlockIdData);
|
|
|
|
|
|
|
|
forget_matching_split(xlrec->node, downlink, false);
|
2007-11-16 20:53:50 +01:00
|
|
|
|
|
|
|
/* Extract left hikey and its size (still assuming 16-bit alignment) */
|
|
|
|
if (!(record->xl_info & XLR_BKP_BLOCK_1))
|
|
|
|
{
|
|
|
|
/* We assume 16-bit alignment is enough for IndexTupleSize */
|
|
|
|
left_hikey = (Item) datapos;
|
|
|
|
left_hikeysz = MAXALIGN(IndexTupleSize(left_hikey));
|
|
|
|
|
|
|
|
datapos += left_hikeysz;
|
|
|
|
datalen -= left_hikeysz;
|
|
|
|
}
|
2007-02-08 06:05:53 +01:00
|
|
|
}
|
2003-02-21 01:06:22 +01:00
|
|
|
|
2007-04-11 22:47:38 +02:00
|
|
|
/* Extract newitem and newitemoff, if present */
|
2007-05-20 23:08:19 +02:00
|
|
|
if (onleft)
|
2006-04-13 05:53:05 +02:00
|
|
|
{
|
2007-04-11 22:47:38 +02:00
|
|
|
/* Extract the offset (still assuming 16-bit alignment) */
|
2007-02-08 06:05:53 +01:00
|
|
|
memcpy(&newitemoff, datapos, sizeof(OffsetNumber));
|
|
|
|
datapos += sizeof(OffsetNumber);
|
|
|
|
datalen -= sizeof(OffsetNumber);
|
2007-05-20 23:08:19 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if (onleft && !(record->xl_info & XLR_BKP_BLOCK_1))
|
|
|
|
{
|
2007-04-11 22:47:38 +02:00
|
|
|
/*
|
2007-11-16 20:53:50 +01:00
|
|
|
* We assume that 16-bit alignment is enough to apply IndexTupleSize
|
|
|
|
* (since it's fetching from a uint16 field) and also enough for
|
|
|
|
* PageAddItem to insert the tuple.
|
2007-04-11 22:47:38 +02:00
|
|
|
*/
|
2007-11-16 20:53:50 +01:00
|
|
|
newitem = (Item) datapos;
|
|
|
|
newitemsz = MAXALIGN(IndexTupleSize(newitem));
|
2007-02-08 06:05:53 +01:00
|
|
|
datapos += newitemsz;
|
|
|
|
datalen -= newitemsz;
|
2006-04-13 05:53:05 +02:00
|
|
|
}
|
|
|
|
|
2007-04-11 22:47:38 +02:00
|
|
|
/* Reconstruct right (new) sibling from scratch */
|
2008-06-12 11:12:31 +02:00
|
|
|
rbuf = XLogReadBuffer(xlrec->node, xlrec->rightsib, true);
|
2007-02-08 06:05:53 +01:00
|
|
|
Assert(BufferIsValid(rbuf));
|
|
|
|
rpage = (Page) BufferGetPage(rbuf);
|
2003-02-21 01:06:22 +01:00
|
|
|
|
2007-02-08 06:05:53 +01:00
|
|
|
_bt_pageinit(rpage, BufferGetPageSize(rbuf));
|
|
|
|
ropaque = (BTPageOpaque) PageGetSpecialPointer(rpage);
|
2006-03-29 23:17:39 +02:00
|
|
|
|
2007-02-08 06:05:53 +01:00
|
|
|
ropaque->btpo_prev = xlrec->leftsib;
|
|
|
|
ropaque->btpo_next = xlrec->rnext;
|
|
|
|
ropaque->btpo.level = xlrec->level;
|
|
|
|
ropaque->btpo_flags = (xlrec->level == 0) ? BTP_LEAF : 0;
|
|
|
|
ropaque->btpo_cycleid = 0;
|
2003-02-21 01:06:22 +01:00
|
|
|
|
2007-02-08 06:05:53 +01:00
|
|
|
_bt_restore_page(rpage, datapos, datalen);
|
2003-02-21 01:06:22 +01:00
|
|
|
|
2007-11-16 20:53:50 +01:00
|
|
|
/*
|
|
|
|
* On leaf level, the high key of the left page is equal to the
|
|
|
|
* first key on the right page.
|
|
|
|
*/
|
|
|
|
if (xlrec->level == 0)
|
|
|
|
{
|
|
|
|
ItemId hiItemId = PageGetItemId(rpage, P_FIRSTDATAKEY(ropaque));
|
|
|
|
|
|
|
|
left_hikey = PageGetItem(rpage, hiItemId);
|
|
|
|
left_hikeysz = ItemIdGetLength(hiItemId);
|
|
|
|
}
|
|
|
|
|
2007-02-08 06:05:53 +01:00
|
|
|
PageSetLSN(rpage, lsn);
|
|
|
|
PageSetTLI(rpage, ThisTimeLineID);
|
|
|
|
MarkBufferDirty(rbuf);
|
2003-02-21 01:06:22 +01:00
|
|
|
|
2007-04-11 22:47:38 +02:00
|
|
|
/* don't release the buffer yet; we touch right page's first item below */
|
2003-02-21 01:06:22 +01:00
|
|
|
|
2007-04-11 22:47:38 +02:00
|
|
|
/*
|
|
|
|
* Reconstruct left (original) sibling if needed. Note that this code
|
|
|
|
* ensures that the items remaining on the left page are in the correct
|
|
|
|
* item number order, but it does not reproduce the physical order they
|
2007-11-15 22:14:46 +01:00
|
|
|
* would have had. Is this worth changing? See also _bt_restore_page().
|
2007-04-11 22:47:38 +02:00
|
|
|
*/
|
|
|
|
if (!(record->xl_info & XLR_BKP_BLOCK_1))
|
2003-02-21 01:06:22 +01:00
|
|
|
{
|
2008-06-12 11:12:31 +02:00
|
|
|
Buffer lbuf = XLogReadBuffer(xlrec->node, xlrec->leftsib, false);
|
2007-02-08 06:05:53 +01:00
|
|
|
|
|
|
|
if (BufferIsValid(lbuf))
|
2003-02-21 01:06:22 +01:00
|
|
|
{
|
2007-11-15 22:14:46 +01:00
|
|
|
Page lpage = (Page) BufferGetPage(lbuf);
|
2007-04-11 22:47:38 +02:00
|
|
|
BTPageOpaque lopaque = (BTPageOpaque) PageGetSpecialPointer(lpage);
|
2006-03-29 23:17:39 +02:00
|
|
|
|
2007-02-08 06:05:53 +01:00
|
|
|
if (!XLByteLE(lsn, PageGetLSN(lpage)))
|
|
|
|
{
|
|
|
|
OffsetNumber off;
|
|
|
|
OffsetNumber maxoff = PageGetMaxOffsetNumber(lpage);
|
2007-04-11 22:47:38 +02:00
|
|
|
OffsetNumber deletable[MaxOffsetNumber];
|
2007-11-15 22:14:46 +01:00
|
|
|
int ndeletable = 0;
|
2007-02-08 06:05:53 +01:00
|
|
|
|
2007-04-11 22:47:38 +02:00
|
|
|
/*
|
2007-11-15 22:14:46 +01:00
|
|
|
* Remove the items from the left page that were copied to the
|
|
|
|
* right page. Also remove the old high key, if any. (We must
|
|
|
|
* remove everything before trying to insert any items, else
|
|
|
|
* we risk not having enough space.)
|
2007-04-11 22:47:38 +02:00
|
|
|
*/
|
|
|
|
if (!P_RIGHTMOST(lopaque))
|
|
|
|
{
|
|
|
|
deletable[ndeletable++] = P_HIKEY;
|
2007-11-15 22:14:46 +01:00
|
|
|
|
2007-04-11 22:47:38 +02:00
|
|
|
/*
|
|
|
|
* newitemoff is given to us relative to the original
|
|
|
|
* page's item numbering, so adjust it for this deletion.
|
|
|
|
*/
|
|
|
|
newitemoff--;
|
|
|
|
}
|
|
|
|
for (off = xlrec->firstright; off <= maxoff; off++)
|
|
|
|
deletable[ndeletable++] = off;
|
|
|
|
if (ndeletable > 0)
|
|
|
|
PageIndexMultiDelete(lpage, deletable, ndeletable);
|
2007-02-08 06:05:53 +01:00
|
|
|
|
2007-04-11 22:47:38 +02:00
|
|
|
/*
|
|
|
|
* Add the new item if it was inserted on left page.
|
|
|
|
*/
|
2007-02-08 06:05:53 +01:00
|
|
|
if (onleft)
|
2006-03-29 23:17:39 +02:00
|
|
|
{
|
2007-04-11 22:47:38 +02:00
|
|
|
if (PageAddItem(lpage, newitem, newitemsz, newitemoff,
|
2007-09-20 19:56:33 +02:00
|
|
|
false, false) == InvalidOffsetNumber)
|
2007-04-11 22:47:38 +02:00
|
|
|
elog(PANIC, "failed to add new item to left page after split");
|
2006-03-29 23:17:39 +02:00
|
|
|
}
|
2007-04-11 22:47:38 +02:00
|
|
|
|
2007-11-16 20:53:50 +01:00
|
|
|
/* Set high key */
|
|
|
|
if (PageAddItem(lpage, left_hikey, left_hikeysz,
|
2007-09-20 19:56:33 +02:00
|
|
|
P_HIKEY, false, false) == InvalidOffsetNumber)
|
2007-04-11 22:47:38 +02:00
|
|
|
elog(PANIC, "failed to add high key to left page after split");
|
2007-02-08 06:05:53 +01:00
|
|
|
|
2007-04-11 22:47:38 +02:00
|
|
|
/* Fix opaque fields */
|
2007-02-08 06:05:53 +01:00
|
|
|
lopaque->btpo_flags = (xlrec->level == 0) ? BTP_LEAF : 0;
|
|
|
|
lopaque->btpo_next = xlrec->rightsib;
|
|
|
|
lopaque->btpo_cycleid = 0;
|
|
|
|
|
|
|
|
PageSetLSN(lpage, lsn);
|
|
|
|
PageSetTLI(lpage, ThisTimeLineID);
|
|
|
|
MarkBufferDirty(lbuf);
|
2003-02-21 01:06:22 +01:00
|
|
|
}
|
2007-02-08 06:05:53 +01:00
|
|
|
|
|
|
|
UnlockReleaseBuffer(lbuf);
|
2003-02-21 01:06:22 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2007-04-11 22:47:38 +02:00
|
|
|
/* We no longer need the right buffer */
|
2007-02-08 06:05:53 +01:00
|
|
|
UnlockReleaseBuffer(rbuf);
|
|
|
|
|
|
|
|
/* Fix left-link of the page to the right of the new right sibling */
|
2007-04-11 22:47:38 +02:00
|
|
|
if (xlrec->rnext != P_NONE && !(record->xl_info & XLR_BKP_BLOCK_2))
|
2007-02-08 06:05:53 +01:00
|
|
|
{
|
2008-06-12 11:12:31 +02:00
|
|
|
Buffer buffer = XLogReadBuffer(xlrec->node, xlrec->rnext, false);
|
2007-04-11 22:47:38 +02:00
|
|
|
|
2007-02-08 06:05:53 +01:00
|
|
|
if (BufferIsValid(buffer))
|
|
|
|
{
|
2007-11-15 22:14:46 +01:00
|
|
|
Page page = (Page) BufferGetPage(buffer);
|
2007-02-08 06:05:53 +01:00
|
|
|
|
|
|
|
if (!XLByteLE(lsn, PageGetLSN(page)))
|
|
|
|
{
|
|
|
|
BTPageOpaque pageop = (BTPageOpaque) PageGetSpecialPointer(page);
|
2007-04-11 22:47:38 +02:00
|
|
|
|
2007-02-08 06:05:53 +01:00
|
|
|
pageop->btpo_prev = xlrec->rightsib;
|
|
|
|
|
|
|
|
PageSetLSN(page, lsn);
|
|
|
|
PageSetTLI(page, ThisTimeLineID);
|
|
|
|
MarkBufferDirty(buffer);
|
|
|
|
}
|
|
|
|
UnlockReleaseBuffer(buffer);
|
|
|
|
}
|
|
|
|
}
|
2003-02-21 01:06:22 +01:00
|
|
|
|
|
|
|
/* The job ain't done till the parent link is inserted... */
|
2007-02-08 06:05:53 +01:00
|
|
|
log_incomplete_split(xlrec->node,
|
|
|
|
xlrec->leftsib, xlrec->rightsib, isroot);
|
2003-02-21 01:06:22 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2005-06-06 19:01:25 +02:00
|
|
|
btree_xlog_delete(XLogRecPtr lsn, XLogRecord *record)
|
2003-02-21 01:06:22 +01:00
|
|
|
{
|
|
|
|
xl_btree_delete *xlrec;
|
|
|
|
Buffer buffer;
|
|
|
|
Page page;
|
2006-07-25 21:13:00 +02:00
|
|
|
BTPageOpaque opaque;
|
2003-02-21 01:06:22 +01:00
|
|
|
|
2005-06-06 19:01:25 +02:00
|
|
|
if (record->xl_info & XLR_BKP_BLOCK_1)
|
2003-02-21 01:06:22 +01:00
|
|
|
return;
|
|
|
|
|
|
|
|
xlrec = (xl_btree_delete *) XLogRecGetData(record);
|
2008-06-12 11:12:31 +02:00
|
|
|
buffer = XLogReadBuffer(xlrec->node, xlrec->block, false);
|
2003-02-21 01:06:22 +01:00
|
|
|
if (!BufferIsValid(buffer))
|
2006-03-29 23:17:39 +02:00
|
|
|
return;
|
2003-02-21 01:06:22 +01:00
|
|
|
page = (Page) BufferGetPage(buffer);
|
|
|
|
|
|
|
|
if (XLByteLE(lsn, PageGetLSN(page)))
|
|
|
|
{
|
2006-04-01 01:32:07 +02:00
|
|
|
UnlockReleaseBuffer(buffer);
|
2003-02-21 01:06:22 +01:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2003-02-23 23:43:09 +01:00
|
|
|
if (record->xl_len > SizeOfBtreeDelete)
|
|
|
|
{
|
|
|
|
OffsetNumber *unused;
|
|
|
|
OffsetNumber *unend;
|
|
|
|
|
|
|
|
unused = (OffsetNumber *) ((char *) xlrec + SizeOfBtreeDelete);
|
|
|
|
unend = (OffsetNumber *) ((char *) xlrec + record->xl_len);
|
|
|
|
|
2005-03-22 07:17:03 +01:00
|
|
|
PageIndexMultiDelete(page, unused, unend - unused);
|
2003-02-23 23:43:09 +01:00
|
|
|
}
|
2003-02-21 01:06:22 +01:00
|
|
|
|
2006-07-25 21:13:00 +02:00
|
|
|
/*
|
2007-09-13 00:10:26 +02:00
|
|
|
* Mark the page as not containing any LP_DEAD items --- see comments in
|
2006-10-04 02:30:14 +02:00
|
|
|
* _bt_delitems().
|
2006-07-25 21:13:00 +02:00
|
|
|
*/
|
|
|
|
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
|
|
|
opaque->btpo_flags &= ~BTP_HAS_GARBAGE;
|
|
|
|
|
2003-02-21 01:06:22 +01:00
|
|
|
PageSetLSN(page, lsn);
|
2004-07-22 00:31:26 +02:00
|
|
|
PageSetTLI(page, ThisTimeLineID);
|
2006-04-01 01:32:07 +02:00
|
|
|
MarkBufferDirty(buffer);
|
|
|
|
UnlockReleaseBuffer(buffer);
|
2003-02-21 01:06:22 +01:00
|
|
|
}
|
|
|
|
|
2003-02-23 07:17:13 +01:00
|
|
|
static void
|
2006-11-01 20:43:17 +01:00
|
|
|
btree_xlog_delete_page(uint8 info, XLogRecPtr lsn, XLogRecord *record)
|
2003-02-23 07:17:13 +01:00
|
|
|
{
|
|
|
|
xl_btree_delete_page *xlrec = (xl_btree_delete_page *) XLogRecGetData(record);
|
2003-08-04 02:43:34 +02:00
|
|
|
BlockNumber parent;
|
|
|
|
BlockNumber target;
|
|
|
|
BlockNumber leftsib;
|
|
|
|
BlockNumber rightsib;
|
2003-02-23 07:17:13 +01:00
|
|
|
Buffer buffer;
|
|
|
|
Page page;
|
|
|
|
BTPageOpaque pageop;
|
|
|
|
|
|
|
|
parent = ItemPointerGetBlockNumber(&(xlrec->target.tid));
|
|
|
|
target = xlrec->deadblk;
|
|
|
|
leftsib = xlrec->leftblk;
|
|
|
|
rightsib = xlrec->rightblk;
|
|
|
|
|
|
|
|
/* parent page */
|
2005-06-06 19:01:25 +02:00
|
|
|
if (!(record->xl_info & XLR_BKP_BLOCK_1))
|
2003-02-23 07:17:13 +01:00
|
|
|
{
|
2008-06-12 11:12:31 +02:00
|
|
|
buffer = XLogReadBuffer(xlrec->target.node, parent, false);
|
2006-03-29 23:17:39 +02:00
|
|
|
if (BufferIsValid(buffer))
|
2003-02-23 07:17:13 +01:00
|
|
|
{
|
2006-03-29 23:17:39 +02:00
|
|
|
page = (Page) BufferGetPage(buffer);
|
|
|
|
pageop = (BTPageOpaque) PageGetSpecialPointer(page);
|
|
|
|
if (XLByteLE(lsn, PageGetLSN(page)))
|
2003-02-23 07:17:13 +01:00
|
|
|
{
|
2006-04-01 01:32:07 +02:00
|
|
|
UnlockReleaseBuffer(buffer);
|
2003-02-23 07:17:13 +01:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2006-03-29 23:17:39 +02:00
|
|
|
OffsetNumber poffset;
|
|
|
|
|
|
|
|
poffset = ItemPointerGetOffsetNumber(&(xlrec->target.tid));
|
|
|
|
if (poffset >= PageGetMaxOffsetNumber(page))
|
|
|
|
{
|
2006-11-01 20:43:17 +01:00
|
|
|
Assert(info == XLOG_BTREE_DELETE_PAGE_HALF);
|
2006-03-29 23:17:39 +02:00
|
|
|
Assert(poffset == P_FIRSTDATAKEY(pageop));
|
|
|
|
PageIndexTupleDelete(page, poffset);
|
|
|
|
pageop->btpo_flags |= BTP_HALF_DEAD;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
ItemId itemid;
|
|
|
|
IndexTuple itup;
|
|
|
|
OffsetNumber nextoffset;
|
|
|
|
|
2006-11-01 20:43:17 +01:00
|
|
|
Assert(info != XLOG_BTREE_DELETE_PAGE_HALF);
|
2006-03-29 23:17:39 +02:00
|
|
|
itemid = PageGetItemId(page, poffset);
|
|
|
|
itup = (IndexTuple) PageGetItem(page, itemid);
|
|
|
|
ItemPointerSet(&(itup->t_tid), rightsib, P_HIKEY);
|
|
|
|
nextoffset = OffsetNumberNext(poffset);
|
|
|
|
PageIndexTupleDelete(page, nextoffset);
|
|
|
|
}
|
2003-02-23 07:17:13 +01:00
|
|
|
|
2006-03-29 23:17:39 +02:00
|
|
|
PageSetLSN(page, lsn);
|
|
|
|
PageSetTLI(page, ThisTimeLineID);
|
2006-04-01 01:32:07 +02:00
|
|
|
MarkBufferDirty(buffer);
|
|
|
|
UnlockReleaseBuffer(buffer);
|
2006-03-29 23:17:39 +02:00
|
|
|
}
|
2003-02-23 07:17:13 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Fix left-link of right sibling */
|
2005-06-06 19:01:25 +02:00
|
|
|
if (!(record->xl_info & XLR_BKP_BLOCK_2))
|
2003-02-23 07:17:13 +01:00
|
|
|
{
|
2008-06-12 11:12:31 +02:00
|
|
|
buffer = XLogReadBuffer(xlrec->target.node, rightsib, false);
|
2006-03-29 23:17:39 +02:00
|
|
|
if (BufferIsValid(buffer))
|
2003-02-23 07:17:13 +01:00
|
|
|
{
|
|
|
|
page = (Page) BufferGetPage(buffer);
|
|
|
|
if (XLByteLE(lsn, PageGetLSN(page)))
|
2003-12-14 01:34:47 +01:00
|
|
|
{
|
2006-04-01 01:32:07 +02:00
|
|
|
UnlockReleaseBuffer(buffer);
|
2003-12-14 01:34:47 +01:00
|
|
|
}
|
2003-02-23 07:17:13 +01:00
|
|
|
else
|
|
|
|
{
|
|
|
|
pageop = (BTPageOpaque) PageGetSpecialPointer(page);
|
2006-03-29 23:17:39 +02:00
|
|
|
pageop->btpo_prev = leftsib;
|
2003-02-23 07:17:13 +01:00
|
|
|
|
|
|
|
PageSetLSN(page, lsn);
|
2004-07-22 00:31:26 +02:00
|
|
|
PageSetTLI(page, ThisTimeLineID);
|
2006-04-01 01:32:07 +02:00
|
|
|
MarkBufferDirty(buffer);
|
|
|
|
UnlockReleaseBuffer(buffer);
|
2003-02-23 07:17:13 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2006-03-29 23:17:39 +02:00
|
|
|
/* Fix right-link of left sibling, if any */
|
|
|
|
if (!(record->xl_info & XLR_BKP_BLOCK_3))
|
|
|
|
{
|
|
|
|
if (leftsib != P_NONE)
|
|
|
|
{
|
2008-06-12 11:12:31 +02:00
|
|
|
buffer = XLogReadBuffer(xlrec->target.node, leftsib, false);
|
2006-03-29 23:17:39 +02:00
|
|
|
if (BufferIsValid(buffer))
|
|
|
|
{
|
|
|
|
page = (Page) BufferGetPage(buffer);
|
|
|
|
if (XLByteLE(lsn, PageGetLSN(page)))
|
|
|
|
{
|
2006-04-01 01:32:07 +02:00
|
|
|
UnlockReleaseBuffer(buffer);
|
2006-03-29 23:17:39 +02:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
pageop = (BTPageOpaque) PageGetSpecialPointer(page);
|
|
|
|
pageop->btpo_next = rightsib;
|
|
|
|
|
|
|
|
PageSetLSN(page, lsn);
|
|
|
|
PageSetTLI(page, ThisTimeLineID);
|
2006-04-01 01:32:07 +02:00
|
|
|
MarkBufferDirty(buffer);
|
|
|
|
UnlockReleaseBuffer(buffer);
|
2006-03-29 23:17:39 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2003-02-23 07:17:13 +01:00
|
|
|
/* Rewrite target page as empty deleted page */
|
2008-06-12 11:12:31 +02:00
|
|
|
buffer = XLogReadBuffer(xlrec->target.node, target, true);
|
2006-03-29 23:17:39 +02:00
|
|
|
Assert(BufferIsValid(buffer));
|
2003-02-23 07:17:13 +01:00
|
|
|
page = (Page) BufferGetPage(buffer);
|
2006-03-29 23:17:39 +02:00
|
|
|
|
2005-06-06 19:01:25 +02:00
|
|
|
_bt_pageinit(page, BufferGetPageSize(buffer));
|
2003-02-23 07:17:13 +01:00
|
|
|
pageop = (BTPageOpaque) PageGetSpecialPointer(page);
|
|
|
|
|
2005-06-06 19:01:25 +02:00
|
|
|
pageop->btpo_prev = leftsib;
|
|
|
|
pageop->btpo_next = rightsib;
|
|
|
|
pageop->btpo.xact = FrozenTransactionId;
|
|
|
|
pageop->btpo_flags = BTP_DELETED;
|
2006-05-08 02:00:17 +02:00
|
|
|
pageop->btpo_cycleid = 0;
|
2003-02-23 07:17:13 +01:00
|
|
|
|
2005-06-06 19:01:25 +02:00
|
|
|
PageSetLSN(page, lsn);
|
|
|
|
PageSetTLI(page, ThisTimeLineID);
|
2006-04-01 01:32:07 +02:00
|
|
|
MarkBufferDirty(buffer);
|
|
|
|
UnlockReleaseBuffer(buffer);
|
2003-02-23 07:17:13 +01:00
|
|
|
|
|
|
|
/* Update metapage if needed */
|
2006-11-01 20:43:17 +01:00
|
|
|
if (info == XLOG_BTREE_DELETE_PAGE_META)
|
2003-02-23 07:17:13 +01:00
|
|
|
{
|
2005-06-06 19:01:25 +02:00
|
|
|
xl_btree_metadata md;
|
2003-02-23 07:17:13 +01:00
|
|
|
|
2005-06-06 19:01:25 +02:00
|
|
|
memcpy(&md, (char *) xlrec + SizeOfBtreeDeletePage,
|
|
|
|
sizeof(xl_btree_metadata));
|
2008-06-12 11:12:31 +02:00
|
|
|
_bt_restore_meta(xlrec->target.node, lsn,
|
2005-06-06 19:01:25 +02:00
|
|
|
md.root, md.level,
|
|
|
|
md.fastroot, md.fastlevel);
|
2003-02-23 07:17:13 +01:00
|
|
|
}
|
2006-11-01 20:43:17 +01:00
|
|
|
|
|
|
|
/* Forget any completed deletion */
|
|
|
|
forget_matching_deletion(xlrec->target.node, target);
|
|
|
|
|
|
|
|
/* If parent became half-dead, remember it for deletion */
|
|
|
|
if (info == XLOG_BTREE_DELETE_PAGE_HALF)
|
|
|
|
log_incomplete_deletion(xlrec->target.node, parent);
|
2003-02-23 07:17:13 +01:00
|
|
|
}
|
|
|
|
|
2003-02-21 01:06:22 +01:00
|
|
|
static void
|
2005-06-06 19:01:25 +02:00
|
|
|
btree_xlog_newroot(XLogRecPtr lsn, XLogRecord *record)
|
2003-02-21 01:06:22 +01:00
|
|
|
{
|
|
|
|
xl_btree_newroot *xlrec = (xl_btree_newroot *) XLogRecGetData(record);
|
|
|
|
Buffer buffer;
|
|
|
|
Page page;
|
|
|
|
BTPageOpaque pageop;
|
2006-10-04 02:30:14 +02:00
|
|
|
BlockNumber downlink = 0;
|
2003-02-21 01:06:22 +01:00
|
|
|
|
2008-06-12 11:12:31 +02:00
|
|
|
buffer = XLogReadBuffer(xlrec->node, xlrec->rootblk, true);
|
2006-03-29 23:17:39 +02:00
|
|
|
Assert(BufferIsValid(buffer));
|
2003-02-21 01:06:22 +01:00
|
|
|
page = (Page) BufferGetPage(buffer);
|
2006-03-29 23:17:39 +02:00
|
|
|
|
2003-02-21 01:06:22 +01:00
|
|
|
_bt_pageinit(page, BufferGetPageSize(buffer));
|
|
|
|
pageop = (BTPageOpaque) PageGetSpecialPointer(page);
|
|
|
|
|
|
|
|
pageop->btpo_flags = BTP_ROOT;
|
|
|
|
pageop->btpo_prev = pageop->btpo_next = P_NONE;
|
|
|
|
pageop->btpo.level = xlrec->level;
|
|
|
|
if (xlrec->level == 0)
|
|
|
|
pageop->btpo_flags |= BTP_LEAF;
|
2006-05-08 02:00:17 +02:00
|
|
|
pageop->btpo_cycleid = 0;
|
2003-02-21 01:06:22 +01:00
|
|
|
|
|
|
|
if (record->xl_len > SizeOfBtreeNewroot)
|
2006-04-13 05:53:05 +02:00
|
|
|
{
|
|
|
|
IndexTuple itup;
|
|
|
|
|
2003-02-21 01:06:22 +01:00
|
|
|
_bt_restore_page(page,
|
|
|
|
(char *) xlrec + SizeOfBtreeNewroot,
|
|
|
|
record->xl_len - SizeOfBtreeNewroot);
|
2006-04-13 05:53:05 +02:00
|
|
|
/* extract downlink to the right-hand split page */
|
|
|
|
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, P_FIRSTKEY));
|
|
|
|
downlink = ItemPointerGetBlockNumber(&(itup->t_tid));
|
|
|
|
Assert(ItemPointerGetOffsetNumber(&(itup->t_tid)) == P_HIKEY);
|
|
|
|
}
|
2003-02-21 01:06:22 +01:00
|
|
|
|
|
|
|
PageSetLSN(page, lsn);
|
2004-07-22 00:31:26 +02:00
|
|
|
PageSetTLI(page, ThisTimeLineID);
|
2006-04-01 01:32:07 +02:00
|
|
|
MarkBufferDirty(buffer);
|
|
|
|
UnlockReleaseBuffer(buffer);
|
2003-02-21 01:06:22 +01:00
|
|
|
|
2008-06-12 11:12:31 +02:00
|
|
|
_bt_restore_meta(xlrec->node, lsn,
|
2003-02-21 01:06:22 +01:00
|
|
|
xlrec->rootblk, xlrec->level,
|
2004-06-02 19:28:18 +02:00
|
|
|
xlrec->rootblk, xlrec->level);
|
2003-02-21 01:06:22 +01:00
|
|
|
|
|
|
|
/* Check to see if this satisfies any incomplete insertions */
|
2006-04-13 05:53:05 +02:00
|
|
|
if (record->xl_len > SizeOfBtreeNewroot)
|
|
|
|
forget_matching_split(xlrec->node, downlink, true);
|
2003-02-21 01:06:22 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
btree_redo(XLogRecPtr lsn, XLogRecord *record)
|
|
|
|
{
|
|
|
|
uint8 info = record->xl_info & ~XLR_INFO_MASK;
|
|
|
|
|
|
|
|
switch (info)
|
|
|
|
{
|
|
|
|
case XLOG_BTREE_INSERT_LEAF:
|
2005-06-06 19:01:25 +02:00
|
|
|
btree_xlog_insert(true, false, lsn, record);
|
2003-02-21 01:06:22 +01:00
|
|
|
break;
|
|
|
|
case XLOG_BTREE_INSERT_UPPER:
|
2005-06-06 19:01:25 +02:00
|
|
|
btree_xlog_insert(false, false, lsn, record);
|
2003-02-21 01:06:22 +01:00
|
|
|
break;
|
|
|
|
case XLOG_BTREE_INSERT_META:
|
2005-06-06 19:01:25 +02:00
|
|
|
btree_xlog_insert(false, true, lsn, record);
|
2003-02-21 01:06:22 +01:00
|
|
|
break;
|
|
|
|
case XLOG_BTREE_SPLIT_L:
|
2005-06-06 19:01:25 +02:00
|
|
|
btree_xlog_split(true, false, lsn, record);
|
2003-02-21 01:06:22 +01:00
|
|
|
break;
|
|
|
|
case XLOG_BTREE_SPLIT_R:
|
2005-06-06 19:01:25 +02:00
|
|
|
btree_xlog_split(false, false, lsn, record);
|
2003-02-21 01:06:22 +01:00
|
|
|
break;
|
|
|
|
case XLOG_BTREE_SPLIT_L_ROOT:
|
2005-06-06 19:01:25 +02:00
|
|
|
btree_xlog_split(true, true, lsn, record);
|
2003-02-21 01:06:22 +01:00
|
|
|
break;
|
|
|
|
case XLOG_BTREE_SPLIT_R_ROOT:
|
2005-06-06 19:01:25 +02:00
|
|
|
btree_xlog_split(false, true, lsn, record);
|
2003-02-21 01:06:22 +01:00
|
|
|
break;
|
|
|
|
case XLOG_BTREE_DELETE:
|
2005-06-06 19:01:25 +02:00
|
|
|
btree_xlog_delete(lsn, record);
|
2003-02-21 01:06:22 +01:00
|
|
|
break;
|
|
|
|
case XLOG_BTREE_DELETE_PAGE:
|
|
|
|
case XLOG_BTREE_DELETE_PAGE_META:
|
2006-11-01 20:43:17 +01:00
|
|
|
case XLOG_BTREE_DELETE_PAGE_HALF:
|
|
|
|
btree_xlog_delete_page(info, lsn, record);
|
2003-02-21 01:06:22 +01:00
|
|
|
break;
|
|
|
|
case XLOG_BTREE_NEWROOT:
|
2005-06-06 19:01:25 +02:00
|
|
|
btree_xlog_newroot(lsn, record);
|
2003-02-21 01:06:22 +01:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
elog(PANIC, "btree_redo: unknown op code %u", info);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2006-03-24 05:32:13 +01:00
|
|
|
out_target(StringInfo buf, xl_btreetid *target)
|
2003-02-21 01:06:22 +01:00
|
|
|
{
|
2006-03-24 05:32:13 +01:00
|
|
|
appendStringInfo(buf, "rel %u/%u/%u; tid %u/%u",
|
2006-10-04 02:30:14 +02:00
|
|
|
target->node.spcNode, target->node.dbNode, target->node.relNode,
|
|
|
|
ItemPointerGetBlockNumber(&(target->tid)),
|
|
|
|
ItemPointerGetOffsetNumber(&(target->tid)));
|
2003-02-21 01:06:22 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2006-03-24 05:32:13 +01:00
|
|
|
btree_desc(StringInfo buf, uint8 xl_info, char *rec)
|
2003-02-21 01:06:22 +01:00
|
|
|
{
|
|
|
|
uint8 info = xl_info & ~XLR_INFO_MASK;
|
|
|
|
|
|
|
|
switch (info)
|
|
|
|
{
|
|
|
|
case XLOG_BTREE_INSERT_LEAF:
|
2003-08-04 02:43:34 +02:00
|
|
|
{
|
|
|
|
xl_btree_insert *xlrec = (xl_btree_insert *) rec;
|
2003-02-21 01:06:22 +01:00
|
|
|
|
2006-03-24 05:32:13 +01:00
|
|
|
appendStringInfo(buf, "insert: ");
|
2003-08-04 02:43:34 +02:00
|
|
|
out_target(buf, &(xlrec->target));
|
|
|
|
break;
|
|
|
|
}
|
2003-02-21 01:06:22 +01:00
|
|
|
case XLOG_BTREE_INSERT_UPPER:
|
2003-08-04 02:43:34 +02:00
|
|
|
{
|
|
|
|
xl_btree_insert *xlrec = (xl_btree_insert *) rec;
|
2003-02-21 01:06:22 +01:00
|
|
|
|
2006-03-24 05:32:13 +01:00
|
|
|
appendStringInfo(buf, "insert_upper: ");
|
2003-08-04 02:43:34 +02:00
|
|
|
out_target(buf, &(xlrec->target));
|
|
|
|
break;
|
|
|
|
}
|
2003-02-21 01:06:22 +01:00
|
|
|
case XLOG_BTREE_INSERT_META:
|
2003-08-04 02:43:34 +02:00
|
|
|
{
|
|
|
|
xl_btree_insert *xlrec = (xl_btree_insert *) rec;
|
2003-02-21 01:06:22 +01:00
|
|
|
|
2006-03-24 05:32:13 +01:00
|
|
|
appendStringInfo(buf, "insert_meta: ");
|
2003-08-04 02:43:34 +02:00
|
|
|
out_target(buf, &(xlrec->target));
|
|
|
|
break;
|
|
|
|
}
|
2003-02-21 01:06:22 +01:00
|
|
|
case XLOG_BTREE_SPLIT_L:
|
2003-08-04 02:43:34 +02:00
|
|
|
{
|
|
|
|
xl_btree_split *xlrec = (xl_btree_split *) rec;
|
2003-02-21 01:06:22 +01:00
|
|
|
|
2007-04-11 22:47:38 +02:00
|
|
|
appendStringInfo(buf, "split_l: rel %u/%u/%u ",
|
2007-02-08 06:05:53 +01:00
|
|
|
xlrec->node.spcNode, xlrec->node.dbNode,
|
|
|
|
xlrec->node.relNode);
|
2007-04-11 22:47:38 +02:00
|
|
|
appendStringInfo(buf, "left %u, right %u, next %u, level %u, firstright %d",
|
2007-11-15 22:14:46 +01:00
|
|
|
xlrec->leftsib, xlrec->rightsib, xlrec->rnext,
|
2007-04-11 22:47:38 +02:00
|
|
|
xlrec->level, xlrec->firstright);
|
2003-08-04 02:43:34 +02:00
|
|
|
break;
|
|
|
|
}
|
2003-02-21 01:06:22 +01:00
|
|
|
case XLOG_BTREE_SPLIT_R:
|
2003-08-04 02:43:34 +02:00
|
|
|
{
|
|
|
|
xl_btree_split *xlrec = (xl_btree_split *) rec;
|
2003-02-21 01:06:22 +01:00
|
|
|
|
2007-04-11 22:47:38 +02:00
|
|
|
appendStringInfo(buf, "split_r: rel %u/%u/%u ",
|
2007-02-08 06:05:53 +01:00
|
|
|
xlrec->node.spcNode, xlrec->node.dbNode,
|
|
|
|
xlrec->node.relNode);
|
2007-04-11 22:47:38 +02:00
|
|
|
appendStringInfo(buf, "left %u, right %u, next %u, level %u, firstright %d",
|
2007-11-15 22:14:46 +01:00
|
|
|
xlrec->leftsib, xlrec->rightsib, xlrec->rnext,
|
2007-04-11 22:47:38 +02:00
|
|
|
xlrec->level, xlrec->firstright);
|
2003-08-04 02:43:34 +02:00
|
|
|
break;
|
|
|
|
}
|
2003-02-21 01:06:22 +01:00
|
|
|
case XLOG_BTREE_SPLIT_L_ROOT:
|
2003-08-04 02:43:34 +02:00
|
|
|
{
|
|
|
|
xl_btree_split *xlrec = (xl_btree_split *) rec;
|
2003-02-21 01:06:22 +01:00
|
|
|
|
2007-04-11 22:47:38 +02:00
|
|
|
appendStringInfo(buf, "split_l_root: rel %u/%u/%u ",
|
2007-02-08 06:05:53 +01:00
|
|
|
xlrec->node.spcNode, xlrec->node.dbNode,
|
|
|
|
xlrec->node.relNode);
|
2007-04-11 22:47:38 +02:00
|
|
|
appendStringInfo(buf, "left %u, right %u, next %u, level %u, firstright %d",
|
2007-11-15 22:14:46 +01:00
|
|
|
xlrec->leftsib, xlrec->rightsib, xlrec->rnext,
|
2007-04-11 22:47:38 +02:00
|
|
|
xlrec->level, xlrec->firstright);
|
2003-08-04 02:43:34 +02:00
|
|
|
break;
|
|
|
|
}
|
2003-02-21 01:06:22 +01:00
|
|
|
case XLOG_BTREE_SPLIT_R_ROOT:
|
2003-08-04 02:43:34 +02:00
|
|
|
{
|
|
|
|
xl_btree_split *xlrec = (xl_btree_split *) rec;
|
2003-02-21 01:06:22 +01:00
|
|
|
|
2007-04-11 22:47:38 +02:00
|
|
|
appendStringInfo(buf, "split_r_root: rel %u/%u/%u ",
|
2007-02-08 06:05:53 +01:00
|
|
|
xlrec->node.spcNode, xlrec->node.dbNode,
|
|
|
|
xlrec->node.relNode);
|
2007-04-11 22:47:38 +02:00
|
|
|
appendStringInfo(buf, "left %u, right %u, next %u, level %u, firstright %d",
|
2007-11-15 22:14:46 +01:00
|
|
|
xlrec->leftsib, xlrec->rightsib, xlrec->rnext,
|
2007-04-11 22:47:38 +02:00
|
|
|
xlrec->level, xlrec->firstright);
|
2003-08-04 02:43:34 +02:00
|
|
|
break;
|
|
|
|
}
|
2003-02-21 01:06:22 +01:00
|
|
|
case XLOG_BTREE_DELETE:
|
2003-08-04 02:43:34 +02:00
|
|
|
{
|
|
|
|
xl_btree_delete *xlrec = (xl_btree_delete *) rec;
|
2003-02-21 01:06:22 +01:00
|
|
|
|
2006-03-24 05:32:13 +01:00
|
|
|
appendStringInfo(buf, "delete: rel %u/%u/%u; blk %u",
|
2006-10-04 02:30:14 +02:00
|
|
|
xlrec->node.spcNode, xlrec->node.dbNode,
|
|
|
|
xlrec->node.relNode, xlrec->block);
|
2003-08-04 02:43:34 +02:00
|
|
|
break;
|
|
|
|
}
|
2003-02-21 01:06:22 +01:00
|
|
|
case XLOG_BTREE_DELETE_PAGE:
|
|
|
|
case XLOG_BTREE_DELETE_PAGE_META:
|
2006-11-01 20:43:17 +01:00
|
|
|
case XLOG_BTREE_DELETE_PAGE_HALF:
|
2003-08-04 02:43:34 +02:00
|
|
|
{
|
|
|
|
xl_btree_delete_page *xlrec = (xl_btree_delete_page *) rec;
|
2003-02-21 01:06:22 +01:00
|
|
|
|
2006-03-24 05:32:13 +01:00
|
|
|
appendStringInfo(buf, "delete_page: ");
|
2003-08-04 02:43:34 +02:00
|
|
|
out_target(buf, &(xlrec->target));
|
2006-03-24 05:32:13 +01:00
|
|
|
appendStringInfo(buf, "; dead %u; left %u; right %u",
|
2006-10-04 02:30:14 +02:00
|
|
|
xlrec->deadblk, xlrec->leftblk, xlrec->rightblk);
|
2003-08-04 02:43:34 +02:00
|
|
|
break;
|
|
|
|
}
|
2003-02-21 01:06:22 +01:00
|
|
|
case XLOG_BTREE_NEWROOT:
|
2003-08-04 02:43:34 +02:00
|
|
|
{
|
|
|
|
xl_btree_newroot *xlrec = (xl_btree_newroot *) rec;
|
2003-02-21 01:06:22 +01:00
|
|
|
|
2006-03-24 05:32:13 +01:00
|
|
|
appendStringInfo(buf, "newroot: rel %u/%u/%u; root %u lev %u",
|
2006-10-04 02:30:14 +02:00
|
|
|
xlrec->node.spcNode, xlrec->node.dbNode,
|
|
|
|
xlrec->node.relNode,
|
|
|
|
xlrec->rootblk, xlrec->level);
|
2003-08-04 02:43:34 +02:00
|
|
|
break;
|
|
|
|
}
|
2003-02-21 01:06:22 +01:00
|
|
|
default:
|
2006-03-24 05:32:13 +01:00
|
|
|
appendStringInfo(buf, "UNKNOWN");
|
2003-02-21 01:06:22 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
btree_xlog_startup(void)
|
|
|
|
{
|
2006-11-01 20:43:17 +01:00
|
|
|
incomplete_actions = NIL;
|
2003-02-21 01:06:22 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
btree_xlog_cleanup(void)
|
|
|
|
{
|
2004-05-26 06:41:50 +02:00
|
|
|
ListCell *l;
|
2003-02-21 01:06:22 +01:00
|
|
|
|
2006-11-01 20:43:17 +01:00
|
|
|
foreach(l, incomplete_actions)
|
2003-02-21 01:06:22 +01:00
|
|
|
{
|
2006-11-01 20:43:17 +01:00
|
|
|
bt_incomplete_action *action = (bt_incomplete_action *) lfirst(l);
|
|
|
|
|
|
|
|
if (action->is_split)
|
|
|
|
{
|
|
|
|
/* finish an incomplete split */
|
|
|
|
Buffer lbuf,
|
|
|
|
rbuf;
|
|
|
|
Page lpage,
|
|
|
|
rpage;
|
|
|
|
BTPageOpaque lpageop,
|
|
|
|
rpageop;
|
|
|
|
bool is_only;
|
2008-06-12 11:12:31 +02:00
|
|
|
Relation reln;
|
2006-11-01 20:43:17 +01:00
|
|
|
|
2008-06-12 11:12:31 +02:00
|
|
|
lbuf = XLogReadBuffer(action->node, action->leftblk, false);
|
2006-11-01 20:43:17 +01:00
|
|
|
/* failure is impossible because we wrote this page earlier */
|
|
|
|
if (!BufferIsValid(lbuf))
|
|
|
|
elog(PANIC, "btree_xlog_cleanup: left block unfound");
|
|
|
|
lpage = (Page) BufferGetPage(lbuf);
|
|
|
|
lpageop = (BTPageOpaque) PageGetSpecialPointer(lpage);
|
2008-06-12 11:12:31 +02:00
|
|
|
rbuf = XLogReadBuffer(action->node, action->rightblk, false);
|
2006-11-01 20:43:17 +01:00
|
|
|
/* failure is impossible because we wrote this page earlier */
|
|
|
|
if (!BufferIsValid(rbuf))
|
|
|
|
elog(PANIC, "btree_xlog_cleanup: right block unfound");
|
|
|
|
rpage = (Page) BufferGetPage(rbuf);
|
|
|
|
rpageop = (BTPageOpaque) PageGetSpecialPointer(rpage);
|
|
|
|
|
|
|
|
/* if the pages are all of their level, it's a only-page split */
|
|
|
|
is_only = P_LEFTMOST(lpageop) && P_RIGHTMOST(rpageop);
|
|
|
|
|
2008-06-12 11:12:31 +02:00
|
|
|
reln = CreateFakeRelcacheEntry(action->node);
|
2006-11-01 20:43:17 +01:00
|
|
|
_bt_insert_parent(reln, lbuf, rbuf, NULL,
|
|
|
|
action->is_root, is_only);
|
2008-06-12 11:12:31 +02:00
|
|
|
FreeFakeRelcacheEntry(reln);
|
2006-11-01 20:43:17 +01:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* finish an incomplete deletion (of a half-dead page) */
|
|
|
|
Buffer buf;
|
|
|
|
|
2008-06-12 11:12:31 +02:00
|
|
|
buf = XLogReadBuffer(action->node, action->delblk, false);
|
2006-11-01 20:43:17 +01:00
|
|
|
if (BufferIsValid(buf))
|
2008-06-12 11:12:31 +02:00
|
|
|
{
|
|
|
|
Relation reln;
|
|
|
|
|
|
|
|
reln = CreateFakeRelcacheEntry(action->node);
|
2006-11-01 20:43:17 +01:00
|
|
|
if (_bt_pagedel(reln, buf, NULL, true) == 0)
|
|
|
|
elog(PANIC, "btree_xlog_cleanup: _bt_pagdel failed");
|
2008-06-12 11:12:31 +02:00
|
|
|
FreeFakeRelcacheEntry(reln);
|
|
|
|
}
|
2006-11-01 20:43:17 +01:00
|
|
|
}
|
2003-02-21 01:06:22 +01:00
|
|
|
}
|
2006-11-01 20:43:17 +01:00
|
|
|
incomplete_actions = NIL;
|
2003-02-21 01:06:22 +01:00
|
|
|
}
|
2006-08-07 18:57:57 +02:00
|
|
|
|
|
|
|
bool
|
|
|
|
btree_safe_restartpoint(void)
|
|
|
|
{
|
2006-11-01 20:43:17 +01:00
|
|
|
if (incomplete_actions)
|
2006-08-07 18:57:57 +02:00
|
|
|
return false;
|
|
|
|
return true;
|
|
|
|
}
|