450 lines
11 KiB
C
450 lines
11 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* ginbtree.c
|
|
* page utilities routines for the postgres inverted index access method.
|
|
*
|
|
*
|
|
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
* IDENTIFICATION
|
|
* $PostgreSQL: pgsql/src/backend/access/gin/ginbtree.c,v 1.11 2008/01/01 19:45:46 momjian Exp $
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
|
|
#include "postgres.h"
|
|
#include "access/gin.h"
|
|
#include "miscadmin.h"
|
|
|
|
/*
|
|
* Locks buffer by needed method for search.
|
|
*/
|
|
static int
|
|
ginTraverseLock(Buffer buffer, bool searchMode)
|
|
{
|
|
Page page;
|
|
int access = GIN_SHARE;
|
|
|
|
LockBuffer(buffer, GIN_SHARE);
|
|
page = BufferGetPage(buffer);
|
|
if (GinPageIsLeaf(page))
|
|
{
|
|
if (searchMode == FALSE)
|
|
{
|
|
/* we should relock our page */
|
|
LockBuffer(buffer, GIN_UNLOCK);
|
|
LockBuffer(buffer, GIN_EXCLUSIVE);
|
|
|
|
/* But root can become non-leaf during relock */
|
|
if (!GinPageIsLeaf(page))
|
|
{
|
|
/* restore old lock type (very rare) */
|
|
LockBuffer(buffer, GIN_UNLOCK);
|
|
LockBuffer(buffer, GIN_SHARE);
|
|
}
|
|
else
|
|
access = GIN_EXCLUSIVE;
|
|
}
|
|
}
|
|
|
|
return access;
|
|
}
|
|
|
|
GinBtreeStack *
|
|
ginPrepareFindLeafPage(GinBtree btree, BlockNumber blkno)
|
|
{
|
|
GinBtreeStack *stack = (GinBtreeStack *) palloc(sizeof(GinBtreeStack));
|
|
|
|
stack->blkno = blkno;
|
|
stack->buffer = ReadBuffer(btree->index, stack->blkno);
|
|
stack->parent = NULL;
|
|
stack->predictNumber = 1;
|
|
|
|
ginTraverseLock(stack->buffer, btree->searchMode);
|
|
|
|
return stack;
|
|
}
|
|
|
|
/*
|
|
* Locates leaf page contained tuple
|
|
*/
|
|
GinBtreeStack *
|
|
ginFindLeafPage(GinBtree btree, GinBtreeStack *stack)
|
|
{
|
|
bool isfirst = TRUE;
|
|
BlockNumber rootBlkno;
|
|
|
|
if (!stack)
|
|
stack = ginPrepareFindLeafPage(btree, GIN_ROOT_BLKNO);
|
|
rootBlkno = stack->blkno;
|
|
|
|
for (;;)
|
|
{
|
|
Page page;
|
|
BlockNumber child;
|
|
int access = GIN_SHARE;
|
|
|
|
stack->off = InvalidOffsetNumber;
|
|
|
|
page = BufferGetPage(stack->buffer);
|
|
|
|
if (isfirst)
|
|
{
|
|
if (GinPageIsLeaf(page) && !btree->searchMode)
|
|
access = GIN_EXCLUSIVE;
|
|
isfirst = FALSE;
|
|
}
|
|
else
|
|
access = ginTraverseLock(stack->buffer, btree->searchMode);
|
|
|
|
/*
|
|
* ok, page is correctly locked, we should check to move right ..,
|
|
* root never has a right link, so small optimization
|
|
*/
|
|
while (btree->fullScan == FALSE && stack->blkno != rootBlkno && btree->isMoveRight(btree, page))
|
|
{
|
|
BlockNumber rightlink = GinPageGetOpaque(page)->rightlink;
|
|
|
|
if (rightlink == InvalidBlockNumber)
|
|
/* rightmost page */
|
|
break;
|
|
|
|
stack->blkno = rightlink;
|
|
LockBuffer(stack->buffer, GIN_UNLOCK);
|
|
stack->buffer = ReleaseAndReadBuffer(stack->buffer, btree->index, stack->blkno);
|
|
LockBuffer(stack->buffer, access);
|
|
page = BufferGetPage(stack->buffer);
|
|
}
|
|
|
|
if (GinPageIsLeaf(page)) /* we found, return locked page */
|
|
return stack;
|
|
|
|
/* now we have correct buffer, try to find child */
|
|
child = btree->findChildPage(btree, stack);
|
|
|
|
LockBuffer(stack->buffer, GIN_UNLOCK);
|
|
Assert(child != InvalidBlockNumber);
|
|
Assert(stack->blkno != child);
|
|
|
|
if (btree->searchMode)
|
|
{
|
|
/* in search mode we may forget path to leaf */
|
|
stack->blkno = child;
|
|
stack->buffer = ReleaseAndReadBuffer(stack->buffer, btree->index, stack->blkno);
|
|
}
|
|
else
|
|
{
|
|
GinBtreeStack *ptr = (GinBtreeStack *) palloc(sizeof(GinBtreeStack));
|
|
|
|
ptr->parent = stack;
|
|
stack = ptr;
|
|
stack->blkno = child;
|
|
stack->buffer = ReadBuffer(btree->index, stack->blkno);
|
|
stack->predictNumber = 1;
|
|
}
|
|
}
|
|
|
|
/* keep compiler happy */
|
|
return NULL;
|
|
}
|
|
|
|
void
|
|
freeGinBtreeStack(GinBtreeStack *stack)
|
|
{
|
|
while (stack)
|
|
{
|
|
GinBtreeStack *tmp = stack->parent;
|
|
|
|
if (stack->buffer != InvalidBuffer)
|
|
ReleaseBuffer(stack->buffer);
|
|
|
|
pfree(stack);
|
|
stack = tmp;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Try to find parent for current stack position, returns correct
|
|
* parent and child's offset in stack->parent.
|
|
* Function should never release root page to prevent conflicts
|
|
* with vacuum process
|
|
*/
|
|
void
|
|
findParents(GinBtree btree, GinBtreeStack *stack,
|
|
BlockNumber rootBlkno)
|
|
{
|
|
|
|
Page page;
|
|
Buffer buffer;
|
|
BlockNumber blkno,
|
|
leftmostBlkno;
|
|
OffsetNumber offset;
|
|
GinBtreeStack *root = stack->parent;
|
|
GinBtreeStack *ptr;
|
|
|
|
if (!root)
|
|
{
|
|
/* XLog mode... */
|
|
root = (GinBtreeStack *) palloc(sizeof(GinBtreeStack));
|
|
root->blkno = rootBlkno;
|
|
root->buffer = ReadBuffer(btree->index, rootBlkno);
|
|
LockBuffer(root->buffer, GIN_EXCLUSIVE);
|
|
root->parent = NULL;
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
* find root, we should not release root page until update is
|
|
* finished!!
|
|
*/
|
|
while (root->parent)
|
|
{
|
|
ReleaseBuffer(root->buffer);
|
|
root = root->parent;
|
|
}
|
|
|
|
Assert(root->blkno == rootBlkno);
|
|
Assert(BufferGetBlockNumber(root->buffer) == rootBlkno);
|
|
LockBuffer(root->buffer, GIN_EXCLUSIVE);
|
|
}
|
|
root->off = InvalidOffsetNumber;
|
|
|
|
page = BufferGetPage(root->buffer);
|
|
Assert(!GinPageIsLeaf(page));
|
|
|
|
/* check trivial case */
|
|
if ((root->off = btree->findChildPtr(btree, page, stack->blkno, InvalidOffsetNumber)) != InvalidOffsetNumber)
|
|
{
|
|
stack->parent = root;
|
|
return;
|
|
}
|
|
|
|
leftmostBlkno = blkno = btree->getLeftMostPage(btree, page);
|
|
LockBuffer(root->buffer, GIN_UNLOCK);
|
|
Assert(blkno != InvalidBlockNumber);
|
|
|
|
|
|
for (;;)
|
|
{
|
|
buffer = ReadBuffer(btree->index, blkno);
|
|
LockBuffer(buffer, GIN_EXCLUSIVE);
|
|
page = BufferGetPage(buffer);
|
|
if (GinPageIsLeaf(page))
|
|
elog(ERROR, "Lost path");
|
|
|
|
leftmostBlkno = btree->getLeftMostPage(btree, page);
|
|
|
|
while ((offset = btree->findChildPtr(btree, page, stack->blkno, InvalidOffsetNumber)) == InvalidOffsetNumber)
|
|
{
|
|
blkno = GinPageGetOpaque(page)->rightlink;
|
|
LockBuffer(buffer, GIN_UNLOCK);
|
|
ReleaseBuffer(buffer);
|
|
if (blkno == InvalidBlockNumber)
|
|
break;
|
|
buffer = ReadBuffer(btree->index, blkno);
|
|
LockBuffer(buffer, GIN_EXCLUSIVE);
|
|
page = BufferGetPage(buffer);
|
|
}
|
|
|
|
if (blkno != InvalidBlockNumber)
|
|
{
|
|
ptr = (GinBtreeStack *) palloc(sizeof(GinBtreeStack));
|
|
ptr->blkno = blkno;
|
|
ptr->buffer = buffer;
|
|
ptr->parent = root; /* it's may be wrong, but in next call we will
|
|
* correct */
|
|
ptr->off = offset;
|
|
stack->parent = ptr;
|
|
return;
|
|
}
|
|
|
|
blkno = leftmostBlkno;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Insert value (stored in GinBtree) to tree described by stack
|
|
*/
|
|
void
|
|
ginInsertValue(GinBtree btree, GinBtreeStack *stack)
|
|
{
|
|
GinBtreeStack *parent = stack;
|
|
BlockNumber rootBlkno = InvalidBuffer;
|
|
Page page,
|
|
rpage,
|
|
lpage;
|
|
|
|
/* remember root BlockNumber */
|
|
while (parent)
|
|
{
|
|
rootBlkno = parent->blkno;
|
|
parent = parent->parent;
|
|
}
|
|
|
|
while (stack)
|
|
{
|
|
XLogRecData *rdata;
|
|
BlockNumber savedRightLink;
|
|
|
|
page = BufferGetPage(stack->buffer);
|
|
savedRightLink = GinPageGetOpaque(page)->rightlink;
|
|
|
|
if (btree->isEnoughSpace(btree, stack->buffer, stack->off))
|
|
{
|
|
START_CRIT_SECTION();
|
|
btree->placeToPage(btree, stack->buffer, stack->off, &rdata);
|
|
|
|
MarkBufferDirty(stack->buffer);
|
|
|
|
if (!btree->index->rd_istemp)
|
|
{
|
|
XLogRecPtr recptr;
|
|
|
|
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT, rdata);
|
|
PageSetLSN(page, recptr);
|
|
PageSetTLI(page, ThisTimeLineID);
|
|
}
|
|
|
|
UnlockReleaseBuffer(stack->buffer);
|
|
END_CRIT_SECTION();
|
|
|
|
freeGinBtreeStack(stack->parent);
|
|
return;
|
|
}
|
|
else
|
|
{
|
|
Buffer rbuffer = GinNewBuffer(btree->index);
|
|
Page newlpage;
|
|
|
|
/*
|
|
* newlpage is a pointer to memory page, it doesn't associate with
|
|
* buffer, stack->buffer should be untouched
|
|
*/
|
|
newlpage = btree->splitPage(btree, stack->buffer, rbuffer, stack->off, &rdata);
|
|
|
|
|
|
((ginxlogSplit *) (rdata->data))->rootBlkno = rootBlkno;
|
|
|
|
parent = stack->parent;
|
|
|
|
if (parent == NULL)
|
|
{
|
|
/*
|
|
* split root, so we need to allocate new left page and place
|
|
* pointer on root to left and right page
|
|
*/
|
|
Buffer lbuffer = GinNewBuffer(btree->index);
|
|
|
|
((ginxlogSplit *) (rdata->data))->isRootSplit = TRUE;
|
|
((ginxlogSplit *) (rdata->data))->rrlink = InvalidBlockNumber;
|
|
|
|
|
|
page = BufferGetPage(stack->buffer);
|
|
lpage = BufferGetPage(lbuffer);
|
|
rpage = BufferGetPage(rbuffer);
|
|
|
|
GinPageGetOpaque(rpage)->rightlink = InvalidBlockNumber;
|
|
GinPageGetOpaque(newlpage)->rightlink = BufferGetBlockNumber(rbuffer);
|
|
((ginxlogSplit *) (rdata->data))->lblkno = BufferGetBlockNumber(lbuffer);
|
|
|
|
START_CRIT_SECTION();
|
|
|
|
GinInitBuffer(stack->buffer, GinPageGetOpaque(newlpage)->flags & ~GIN_LEAF);
|
|
PageRestoreTempPage(newlpage, lpage);
|
|
btree->fillRoot(btree, stack->buffer, lbuffer, rbuffer);
|
|
|
|
MarkBufferDirty(rbuffer);
|
|
MarkBufferDirty(lbuffer);
|
|
MarkBufferDirty(stack->buffer);
|
|
|
|
if (!btree->index->rd_istemp)
|
|
{
|
|
XLogRecPtr recptr;
|
|
|
|
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_SPLIT, rdata);
|
|
PageSetLSN(page, recptr);
|
|
PageSetTLI(page, ThisTimeLineID);
|
|
PageSetLSN(lpage, recptr);
|
|
PageSetTLI(lpage, ThisTimeLineID);
|
|
PageSetLSN(rpage, recptr);
|
|
PageSetTLI(rpage, ThisTimeLineID);
|
|
}
|
|
|
|
UnlockReleaseBuffer(rbuffer);
|
|
UnlockReleaseBuffer(lbuffer);
|
|
UnlockReleaseBuffer(stack->buffer);
|
|
|
|
END_CRIT_SECTION();
|
|
|
|
return;
|
|
}
|
|
else
|
|
{
|
|
/* split non-root page */
|
|
((ginxlogSplit *) (rdata->data))->isRootSplit = FALSE;
|
|
((ginxlogSplit *) (rdata->data))->rrlink = savedRightLink;
|
|
|
|
lpage = BufferGetPage(stack->buffer);
|
|
rpage = BufferGetPage(rbuffer);
|
|
|
|
GinPageGetOpaque(rpage)->rightlink = savedRightLink;
|
|
GinPageGetOpaque(newlpage)->rightlink = BufferGetBlockNumber(rbuffer);
|
|
|
|
START_CRIT_SECTION();
|
|
PageRestoreTempPage(newlpage, lpage);
|
|
|
|
MarkBufferDirty(rbuffer);
|
|
MarkBufferDirty(stack->buffer);
|
|
|
|
if (!btree->index->rd_istemp)
|
|
{
|
|
XLogRecPtr recptr;
|
|
|
|
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_SPLIT, rdata);
|
|
PageSetLSN(lpage, recptr);
|
|
PageSetTLI(lpage, ThisTimeLineID);
|
|
PageSetLSN(rpage, recptr);
|
|
PageSetTLI(rpage, ThisTimeLineID);
|
|
}
|
|
UnlockReleaseBuffer(rbuffer);
|
|
END_CRIT_SECTION();
|
|
}
|
|
}
|
|
|
|
btree->isDelete = FALSE;
|
|
|
|
/* search parent to lock */
|
|
LockBuffer(parent->buffer, GIN_EXCLUSIVE);
|
|
|
|
/* move right if it's needed */
|
|
page = BufferGetPage(parent->buffer);
|
|
while ((parent->off = btree->findChildPtr(btree, page, stack->blkno, parent->off)) == InvalidOffsetNumber)
|
|
{
|
|
BlockNumber rightlink = GinPageGetOpaque(page)->rightlink;
|
|
|
|
LockBuffer(parent->buffer, GIN_UNLOCK);
|
|
|
|
if (rightlink == InvalidBlockNumber)
|
|
{
|
|
/*
|
|
* rightmost page, but we don't find parent, we should use
|
|
* plain search...
|
|
*/
|
|
findParents(btree, stack, rootBlkno);
|
|
parent = stack->parent;
|
|
page = BufferGetPage(parent->buffer);
|
|
break;
|
|
}
|
|
|
|
parent->blkno = rightlink;
|
|
parent->buffer = ReleaseAndReadBuffer(parent->buffer, btree->index, parent->blkno);
|
|
LockBuffer(parent->buffer, GIN_EXCLUSIVE);
|
|
page = BufferGetPage(parent->buffer);
|
|
}
|
|
|
|
UnlockReleaseBuffer(stack->buffer);
|
|
pfree(stack);
|
|
stack = parent;
|
|
}
|
|
}
|