diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c index c1671ce333..775eaca242 100644 --- a/src/backend/access/nbtree/nbtinsert.c +++ b/src/backend/access/nbtree/nbtinsert.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.155 2007/03/25 19:45:14 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.156 2007/04/11 20:47:37 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -49,7 +49,7 @@ static TransactionId _bt_check_unique(Relation rel, IndexTuple itup, Relation heapRel, Buffer buf, OffsetNumber ioffset, ScanKey itup_scankey); static void _bt_findinsertloc(Relation rel, - Buffer *bufptr, + Buffer *bufptr, OffsetNumber *offsetptr, int keysz, ScanKey scankey, @@ -66,7 +66,7 @@ static OffsetNumber _bt_findsplitloc(Relation rel, Page page, OffsetNumber newitemoff, Size newitemsz, bool *newitemonleft); -static void _bt_checksplitloc(FindSplitData *state, +static void _bt_checksplitloc(FindSplitData *state, OffsetNumber firstoldonright, bool newitemonleft, int dataitemstoleft, Size firstoldonrightsz); static void _bt_pgaddtup(Relation rel, Page page, @@ -459,7 +459,7 @@ _bt_findinsertloc(Relation rel, * the hint supplied by the caller invalid */ vacuumed = true; - if (PageGetFreeSpace(page) >= itemsz) + if (PageGetFreeSpace(page) >= itemsz) break; /* OK, now we have enough space */ } @@ -506,7 +506,7 @@ _bt_findinsertloc(Relation rel, * moved right at all, we know we should insert at the start of the * page. If we didn't move right, we can use the firstlegaloff hint * if the caller supplied one, unless we vacuumed the page which - * might have moved tuples around making the hint invalid. If we + * might have moved tuples around making the hint invalid. If we * didn't move right or can't use the hint, find the position * by searching. */ @@ -779,8 +779,6 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright, Buffer sbuf = InvalidBuffer; Page spage = NULL; BTPageOpaque sopaque = NULL; - OffsetNumber itup_off = 0; - BlockNumber itup_blkno = 0; Size itemsz; ItemId itemid; IndexTuple item; @@ -798,6 +796,14 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright, _bt_pageinit(leftpage, BufferGetPageSize(buf)); /* rightpage was already initialized by _bt_getbuf */ + /* + * Copy the original page's LSN and TLI into leftpage, which will become + * the updated version of the page. We need this because XLogInsert will + * examine these fields and possibly dump them in a page image. + */ + PageSetLSN(leftpage, PageGetLSN(origpage)); + PageSetTLI(leftpage, PageGetTLI(origpage)); + /* init btree private data */ oopaque = (BTPageOpaque) PageGetSpecialPointer(origpage); lopaque = (BTPageOpaque) PageGetSpecialPointer(leftpage); @@ -864,7 +870,10 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright, leftoff = OffsetNumberNext(leftoff); /* - * Now transfer all the data items to the appropriate page + * Now transfer all the data items to the appropriate page. + * + * Note: we *must* insert at least the right page's items in item-number + * order, for the benefit of _bt_restore_page(). */ maxoff = PageGetMaxOffsetNumber(origpage); @@ -881,16 +890,12 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright, { _bt_pgaddtup(rel, leftpage, newitemsz, newitem, leftoff, "left sibling"); - itup_off = leftoff; - itup_blkno = BufferGetBlockNumber(buf); leftoff = OffsetNumberNext(leftoff); } else { _bt_pgaddtup(rel, rightpage, newitemsz, newitem, rightoff, "right sibling"); - itup_off = rightoff; - itup_blkno = BufferGetBlockNumber(rbuf); rightoff = OffsetNumberNext(rightoff); } } @@ -921,8 +926,6 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright, Assert(!newitemonleft); _bt_pgaddtup(rel, rightpage, newitemsz, newitem, rightoff, "right sibling"); - itup_off = rightoff; - itup_blkno = BufferGetBlockNumber(rbuf); rightoff = OffsetNumberNext(rightoff); } @@ -961,7 +964,7 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright, /* * Right sibling is locked, new siblings are prepared, but original page - * is not updated yet. Log changes before continuing. + * is not updated yet. * * NO EREPORT(ERROR) till right sibling is updated. We can get away with * not starting the critical section till here because we haven't been @@ -970,15 +973,6 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright, */ START_CRIT_SECTION(); - MarkBufferDirty(buf); - MarkBufferDirty(rbuf); - - if (!P_RIGHTMOST(ropaque)) - { - sopaque->btpo_prev = BufferGetBlockNumber(rbuf); - MarkBufferDirty(sbuf); - } - /* * By here, the original data page has been split into two new halves, and * these are correct. The algorithm requires that the left page never @@ -994,6 +988,15 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright, */ PageRestoreTempPage(leftpage, origpage); + MarkBufferDirty(buf); + MarkBufferDirty(rbuf); + + if (!P_RIGHTMOST(ropaque)) + { + sopaque->btpo_prev = BufferGetBlockNumber(rbuf); + MarkBufferDirty(sbuf); + } + /* XLOG stuff */ if (!rel->rd_istemp) { @@ -1006,9 +1009,9 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright, xlrec.node = rel->rd_node; xlrec.leftsib = BufferGetBlockNumber(buf); xlrec.rightsib = BufferGetBlockNumber(rbuf); - xlrec.firstright = firstright; xlrec.rnext = ropaque->btpo_next; xlrec.level = ropaque->btpo.level; + xlrec.firstright = firstright; rdata[0].data = (char *) &xlrec; rdata[0].len = SizeOfBtreeSplit; @@ -1027,14 +1030,18 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright, lastrdata->buffer = InvalidBuffer; } - /* Log the new item, if it was inserted on the left page. If it was - * put on the right page, we don't need to explicitly WAL log it - * because it's included with all the other items on the right page. + /* + * Log the new item and its offset, if it was inserted on the left + * page. (If it was put on the right page, we don't need to explicitly + * WAL log it because it's included with all the other items on the + * right page.) Show these as belonging to the left page buffer, + * so that they are not stored if XLogInsert decides it needs a + * full-page image of the left page. */ - lastrdata->next = lastrdata + 1; - lastrdata++; if (newitemonleft) { + lastrdata->next = lastrdata + 1; + lastrdata++; lastrdata->data = (char *) &newitemoff; lastrdata->len = sizeof(OffsetNumber); lastrdata->buffer = buf; /* backup block 1 */ @@ -1042,39 +1049,49 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright, lastrdata->next = lastrdata + 1; lastrdata++; - lastrdata->data = (char *)newitem; - lastrdata->len = newitemsz; + lastrdata->data = (char *) newitem; + lastrdata->len = MAXALIGN(newitemsz); lastrdata->buffer = buf; /* backup block 1 */ lastrdata->buffer_std = true; } else { + /* + * Although we don't need to WAL-log the new item, we still + * need XLogInsert to consider storing a full-page image of the + * left page, so make an empty entry referencing that buffer. + * This also ensures that the left page is always backup block 1. + */ + lastrdata->next = lastrdata + 1; + lastrdata++; lastrdata->data = NULL; lastrdata->len = 0; lastrdata->buffer = buf; /* backup block 1 */ lastrdata->buffer_std = true; } - /* Log the contents of the right page in the format understood by + /* + * Log the contents of the right page in the format understood by * _bt_restore_page(). We set lastrdata->buffer to InvalidBuffer, - * because we're going to recreate the whole page anyway. + * because we're going to recreate the whole page anyway, so it + * should never be stored by XLogInsert. * * Direct access to page is not good but faster - we should implement * some new func in page API. Note we only store the tuples - * themselves, knowing that the item pointers are in the same order - * and can be reconstructed by scanning the tuples. See comments for + * themselves, knowing that they were inserted in item-number order + * and so the item pointers can be reconstructed. See comments for * _bt_restore_page(). */ lastrdata->next = lastrdata + 1; lastrdata++; - lastrdata->data = (char *) rightpage + + lastrdata->data = (char *) rightpage + ((PageHeader) rightpage)->pd_upper; lastrdata->len = ((PageHeader) rightpage)->pd_special - ((PageHeader) rightpage)->pd_upper; lastrdata->buffer = InvalidBuffer; - /* Log the right sibling, because we've changed it's prev-pointer. */ + /* Log the right sibling, because we've changed its' prev-pointer. */ if (!P_RIGHTMOST(ropaque)) { lastrdata->next = lastrdata + 1; @@ -1216,7 +1233,7 @@ _bt_findsplitloc(Relation rel, olddataitemstoleft = 0; goodenoughfound = false; maxoff = PageGetMaxOffsetNumber(page); - + for (offnum = P_FIRSTDATAKEY(opaque); offnum <= maxoff; offnum = OffsetNumberNext(offnum)) @@ -1234,7 +1251,7 @@ _bt_findsplitloc(Relation rel, olddataitemstoleft, itemsz); else if (offnum < newitemoff) - _bt_checksplitloc(&state, offnum, false, + _bt_checksplitloc(&state, offnum, false, olddataitemstoleft, itemsz); else { @@ -1285,11 +1302,11 @@ _bt_findsplitloc(Relation rel, * items go to the left page and only the new item goes to the right page. * In that case, firstoldonrightsz is not used. * - * olddataitemstoleft is the total size of all old items to the left of - * firstoldonright. + * olddataitemstoleft is the total size of all old items to the left of + * firstoldonright. */ static void -_bt_checksplitloc(FindSplitData *state, +_bt_checksplitloc(FindSplitData *state, OffsetNumber firstoldonright, bool newitemonleft, int olddataitemstoleft, @@ -1311,7 +1328,7 @@ _bt_checksplitloc(FindSplitData *state, /* Account for all the old tuples */ leftfree = state->leftspace - olddataitemstoleft; - rightfree = state->rightspace - + rightfree = state->rightspace - (state->olddataitemstotal - olddataitemstoleft); /* @@ -1854,7 +1871,7 @@ _bt_vacuum_one_page(Relation rel, Buffer buffer) BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page); /* - * Scan over all items to see which ones need to be deleted + * Scan over all items to see which ones need to be deleted * according to LP_DELETE flags. */ minoff = P_FIRSTDATAKEY(opaque); diff --git a/src/backend/access/nbtree/nbtxlog.c b/src/backend/access/nbtree/nbtxlog.c index dd6fd8571a..ff41be3767 100644 --- a/src/backend/access/nbtree/nbtxlog.c +++ b/src/backend/access/nbtree/nbtxlog.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.42 2007/02/08 05:05:53 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.43 2007/04/11 20:47:38 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -125,7 +125,8 @@ forget_matching_deletion(RelFileNode node, BlockNumber delblk) * in correct itemno sequence, but physically the opposite order from the * original, because we insert them in the opposite of itemno order. This * does not matter in any current btree code, but it's something to keep an - * eye on. Is it worth changing just on general principles? + * eye on. Is it worth changing just on general principles? See also the + * notes in btree_xlog_split(). */ static void _bt_restore_page(Page page, char *from, int len) @@ -264,14 +265,12 @@ btree_xlog_split(bool onleft, bool isroot, { xl_btree_split *xlrec = (xl_btree_split *) XLogRecGetData(record); Relation reln; - Buffer lbuf, rbuf; - Page lpage, rpage; - BTPageOpaque ropaque, lopaque; + Buffer rbuf; + Page rpage; + BTPageOpaque ropaque; char *datapos; int datalen; - bool bkp_left = record->xl_info & XLR_BKP_BLOCK_1; - bool bkp_nextsib = record->xl_info & XLR_BKP_BLOCK_2; - OffsetNumber newitemoff; + OffsetNumber newitemoff = 0; Item newitem = NULL; Size newitemsz = 0; @@ -283,6 +282,7 @@ btree_xlog_split(bool onleft, bool isroot, /* Forget any split this insertion completes */ if (xlrec->level > 0) { + /* we assume SizeOfBtreeSplit is at least 16-bit aligned */ BlockNumber downlink = BlockIdGetBlockNumber((BlockId) datapos); datapos += sizeof(BlockIdData); @@ -291,19 +291,22 @@ btree_xlog_split(bool onleft, bool isroot, forget_matching_split(xlrec->node, downlink, false); } - - /* Extract newitem and newitemoff */ - if (!bkp_left && onleft) + /* Extract newitem and newitemoff, if present */ + if (onleft && !(record->xl_info & XLR_BKP_BLOCK_1)) { IndexTupleData itupdata; - /* Extract the offset of the new tuple and it's contents */ + /* Extract the offset (still assuming 16-bit alignment) */ memcpy(&newitemoff, datapos, sizeof(OffsetNumber)); datapos += sizeof(OffsetNumber); datalen -= sizeof(OffsetNumber); + /* + * We need to copy the tuple header to apply IndexTupleDSize, because + * of alignment considerations. However, we assume that PageAddItem + * doesn't care about the alignment of the newitem pointer it's given. + */ newitem = datapos; - /* Need to copy tuple header due to alignment considerations */ memcpy(&itupdata, datapos, sizeof(IndexTupleData)); newitemsz = IndexTupleDSize(itupdata); newitemsz = MAXALIGN(newitemsz); @@ -311,7 +314,7 @@ btree_xlog_split(bool onleft, bool isroot, datalen -= newitemsz; } - /* Reconstruct right (new) sibling */ + /* Reconstruct right (new) sibling from scratch */ rbuf = XLogReadBuffer(reln, xlrec->rightsib, true); Assert(BufferIsValid(rbuf)); rpage = (Page) BufferGetPage(rbuf); @@ -331,57 +334,71 @@ btree_xlog_split(bool onleft, bool isroot, PageSetTLI(rpage, ThisTimeLineID); MarkBufferDirty(rbuf); - /* don't release the buffer yet, because reconstructing the left sibling - * needs to access the data on the right page + /* don't release the buffer yet; we touch right page's first item below */ + + /* + * Reconstruct left (original) sibling if needed. Note that this code + * ensures that the items remaining on the left page are in the correct + * item number order, but it does not reproduce the physical order they + * would have had. Is this worth changing? See also _bt_restore_page(). */ - - - /* Reconstruct left (original) sibling */ - - if(!bkp_left) + if (!(record->xl_info & XLR_BKP_BLOCK_1)) { - lbuf = XLogReadBuffer(reln, xlrec->leftsib, false); + Buffer lbuf = XLogReadBuffer(reln, xlrec->leftsib, false); if (BufferIsValid(lbuf)) { - lpage = (Page) BufferGetPage(lbuf); - lopaque = (BTPageOpaque) PageGetSpecialPointer(lpage); + Page lpage = (Page) BufferGetPage(lbuf); + BTPageOpaque lopaque = (BTPageOpaque) PageGetSpecialPointer(lpage); if (!XLByteLE(lsn, PageGetLSN(lpage))) { - /* Remove the items from the left page that were copied to - * right page, and add the new item if it was inserted to - * left page. - */ OffsetNumber off; OffsetNumber maxoff = PageGetMaxOffsetNumber(lpage); + OffsetNumber deletable[MaxOffsetNumber]; + int ndeletable = 0; ItemId hiItemId; Item hiItem; - for(off = maxoff ; off >= xlrec->firstright; off--) - PageIndexTupleDelete(lpage, off); + /* + * Remove the items from the left page that were copied to + * the right page. Also remove the old high key, if any. + * (We must remove everything before trying to insert any + * items, else we risk not having enough space.) + */ + if (!P_RIGHTMOST(lopaque)) + { + deletable[ndeletable++] = P_HIKEY; + /* + * newitemoff is given to us relative to the original + * page's item numbering, so adjust it for this deletion. + */ + newitemoff--; + } + for (off = xlrec->firstright; off <= maxoff; off++) + deletable[ndeletable++] = off; + if (ndeletable > 0) + PageIndexMultiDelete(lpage, deletable, ndeletable); + /* + * Add the new item if it was inserted on left page. + */ if (onleft) { - if (PageAddItem(lpage, newitem, newitemsz, newitemoff, + if (PageAddItem(lpage, newitem, newitemsz, newitemoff, LP_USED) == InvalidOffsetNumber) - elog(PANIC, "can't add new item to left sibling after split"); + elog(PANIC, "failed to add new item to left page after split"); } + /* Set high key equal to the first key on the right page */ hiItemId = PageGetItemId(rpage, P_FIRSTDATAKEY(ropaque)); hiItem = PageGetItem(rpage, hiItemId); - if(!P_RIGHTMOST(lopaque)) - { - /* but remove the old high key first */ - PageIndexTupleDelete(lpage, P_HIKEY); - } + if (PageAddItem(lpage, hiItem, ItemIdGetLength(hiItemId), + P_HIKEY, LP_USED) == InvalidOffsetNumber) + elog(PANIC, "failed to add high key to left page after split"); - if(PageAddItem(lpage, hiItem, ItemIdGetLength(hiItemId), - P_HIKEY, LP_USED) == InvalidOffsetNumber) - elog(PANIC, "can't add high key after split to left page"); - - /* Fix opaque fields */ + /* Fix opaque fields */ lopaque->btpo_flags = (xlrec->level == 0) ? BTP_LEAF : 0; lopaque->btpo_next = xlrec->rightsib; lopaque->btpo_cycleid = 0; @@ -393,16 +410,16 @@ btree_xlog_split(bool onleft, bool isroot, UnlockReleaseBuffer(lbuf); } - } - /* we no longer need the right buffer. */ + /* We no longer need the right buffer */ UnlockReleaseBuffer(rbuf); /* Fix left-link of the page to the right of the new right sibling */ - if (!bkp_nextsib && xlrec->rnext != P_NONE) + if (xlrec->rnext != P_NONE && !(record->xl_info & XLR_BKP_BLOCK_2)) { Buffer buffer = XLogReadBuffer(reln, xlrec->rnext, false); + if (BufferIsValid(buffer)) { Page page = (Page) BufferGetPage(buffer); @@ -410,6 +427,7 @@ btree_xlog_split(bool onleft, bool isroot, if (!XLByteLE(lsn, PageGetLSN(page))) { BTPageOpaque pageop = (BTPageOpaque) PageGetSpecialPointer(page); + pageop->btpo_prev = xlrec->rightsib; PageSetLSN(page, lsn); @@ -770,48 +788,48 @@ btree_desc(StringInfo buf, uint8 xl_info, char *rec) { xl_btree_split *xlrec = (xl_btree_split *) rec; - appendStringInfo(buf, "split_l: rel %u/%u/%u ", + appendStringInfo(buf, "split_l: rel %u/%u/%u ", xlrec->node.spcNode, xlrec->node.dbNode, xlrec->node.relNode); - appendStringInfo(buf, "left %u, right %u off %u level %u", - xlrec->leftsib, xlrec->rightsib, - xlrec->firstright, xlrec->level); + appendStringInfo(buf, "left %u, right %u, next %u, level %u, firstright %d", + xlrec->leftsib, xlrec->rightsib, xlrec->rnext, + xlrec->level, xlrec->firstright); break; } case XLOG_BTREE_SPLIT_R: { xl_btree_split *xlrec = (xl_btree_split *) rec; - appendStringInfo(buf, "split_r: rel %u/%u/%u ", + appendStringInfo(buf, "split_r: rel %u/%u/%u ", xlrec->node.spcNode, xlrec->node.dbNode, xlrec->node.relNode); - appendStringInfo(buf, "left %u, right %u off %u level %u", - xlrec->leftsib, xlrec->rightsib, - xlrec->firstright, xlrec->level); + appendStringInfo(buf, "left %u, right %u, next %u, level %u, firstright %d", + xlrec->leftsib, xlrec->rightsib, xlrec->rnext, + xlrec->level, xlrec->firstright); break; } case XLOG_BTREE_SPLIT_L_ROOT: { xl_btree_split *xlrec = (xl_btree_split *) rec; - appendStringInfo(buf, "split_l_root: rel %u/%u/%u ", + appendStringInfo(buf, "split_l_root: rel %u/%u/%u ", xlrec->node.spcNode, xlrec->node.dbNode, xlrec->node.relNode); - appendStringInfo(buf, "left %u, right %u off %u level %u", - xlrec->leftsib, xlrec->rightsib, - xlrec->firstright, xlrec->level); + appendStringInfo(buf, "left %u, right %u, next %u, level %u, firstright %d", + xlrec->leftsib, xlrec->rightsib, xlrec->rnext, + xlrec->level, xlrec->firstright); break; } case XLOG_BTREE_SPLIT_R_ROOT: { xl_btree_split *xlrec = (xl_btree_split *) rec; - appendStringInfo(buf, "split_r_root: rel %u/%u/%u ", + appendStringInfo(buf, "split_r_root: rel %u/%u/%u ", xlrec->node.spcNode, xlrec->node.dbNode, xlrec->node.relNode); - appendStringInfo(buf, "left %u, right %u off %u level %u", - xlrec->leftsib, xlrec->rightsib, - xlrec->firstright, xlrec->level); + appendStringInfo(buf, "left %u, right %u, next %u, level %u, firstright %d", + xlrec->leftsib, xlrec->rightsib, xlrec->rnext, + xlrec->level, xlrec->firstright); break; } case XLOG_BTREE_DELETE: diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h index 53e10c80e8..c1a7d06240 100644 --- a/src/include/access/nbtree.h +++ b/src/include/access/nbtree.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/nbtree.h,v 1.112 2007/04/09 22:04:08 tgl Exp $ + * $PostgreSQL: pgsql/src/include/access/nbtree.h,v 1.113 2007/04/11 20:47:38 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -260,17 +260,17 @@ typedef struct xl_btree_insert #define SizeOfBtreeInsert (offsetof(xl_btreetid, tid) + SizeOfIptrData) /* - * On insert with split we save items of both left and right siblings - * and restore content of both pages from log record. This way takes less - * xlog space than the normal approach, because if we did it standardly, + * On insert with split, we save all the items going into the right sibling + * so that we can restore it completely from the log record. This way takes + * less xlog space than the normal approach, because if we did it standardly, * XLogInsert would almost always think the right page is new and store its - * whole page image. + * whole page image. The left page, however, is handled in the normal + * incremental-update fashion. * * Note: the four XLOG_BTREE_SPLIT xl_info codes all use this data record. * The _L and _R variants indicate whether the inserted tuple went into the * left or right split page (and thus, whether newitemoff and the new item - * are stored or not. - * page of the split pair). The _ROOT variants indicate that we are splitting + * are stored or not). The _ROOT variants indicate that we are splitting * the root page, and thus that a newroot record rather than an insert or * split record should follow. Note that a split record never carries a * metapage update --- we'll do that in the parent-level update. @@ -278,20 +278,25 @@ typedef struct xl_btree_insert typedef struct xl_btree_split { RelFileNode node; - BlockNumber leftsib; /* orig page / new left page */ - BlockNumber rightsib; /* new right page */ - OffsetNumber firstright; /* first item stored on right page */ - BlockNumber rnext; /* next/right block pointer */ - uint32 level; /* tree level of page being split */ + BlockNumber leftsib; /* orig page / new left page */ + BlockNumber rightsib; /* new right page */ + BlockNumber rnext; /* next block (orig page's rightlink) */ + uint32 level; /* tree level of page being split */ + OffsetNumber firstright; /* first item moved to right page */ - /* BlockIdData downlink follows if level > 0 */ - - /* OffsetNumber newitemoff follows in the _L variants. */ - /* New item follows in the _L variants */ - /* RIGHT PAGES TUPLES FOLLOW AT THE END */ + /* + * If level > 0, BlockIdData downlink follows. (We use BlockIdData + * rather than BlockNumber for alignment reasons: SizeOfBtreeSplit + * is only 16-bit aligned.) + * + * In the _L variants, next are OffsetNumber newitemoff and the new item. + * (In the _R variants, the new item is one of the right page's tuples.) + * + * Last are the right page's tuples in the form used by _bt_restore_page. + */ } xl_btree_split; -#define SizeOfBtreeSplit (offsetof(xl_btree_split, level) + sizeof(uint32)) +#define SizeOfBtreeSplit (offsetof(xl_btree_split, firstright) + sizeof(OffsetNumber)) /* * This is what we need to know about delete of individual leaf index tuples. diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 7b2c966101..e2511988f3 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -37,7 +37,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.402 2007/04/09 22:04:08 tgl Exp $ + * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.403 2007/04/11 20:47:38 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 200704091 +#define CATALOG_VERSION_NO 200704111 #endif