Misc GIN refactoring.

Merge the isEnoughSpace and placeToPage functions in the b-tree interface
into one function that tries to put a tuple on page, and returns false if
it doesn't fit.

Move createPostingTree function to gindatapage.c, and change its contract
so that it can be passed more items than fit on the root page. It's in a
better position than the callers to know how many items fit.

Move ginMergeItemPointers out of gindatapage.c, into a separate file.

These changes make no difference now, but reduce the footprint of Alexander
Korotkov's upcoming patch to pack item pointers more tightly.
This commit is contained in:
Heikki Linnakangas 2013-11-06 10:31:38 +02:00
parent 920c8261d5
commit ecaa4708e5
9 changed files with 223 additions and 181 deletions

View File

@ -14,6 +14,6 @@ include $(top_builddir)/src/Makefile.global
OBJS = ginutil.o gininsert.o ginxlog.o ginentrypage.o gindatapage.o \
ginbtree.o ginscan.o ginget.o ginvacuum.o ginarrayproc.o \
ginbulk.o ginfast.o
ginbulk.o ginfast.o ginpostinglist.o
include $(top_srcdir)/src/backend/common.mk

View File

@ -104,7 +104,7 @@ a few thousand entries can be much faster than retail insertion. (The win
comes mainly from not having to do multiple searches/insertions when the
same key appears in multiple new heap tuples.)
Key entries are nominally of the same IndexEntry format as used in other
Key entries are nominally of the same IndexTuple format as used in other
index types, but since a leaf key entry typically refers to multiple heap
tuples, there are significant differences. (See GinFormTuple, which works
by building a "normal" index tuple and then modifying it.) The points to

View File

@ -264,7 +264,7 @@ ginFindParents(GinBtree btree, GinBtreeStack *stack,
* Insert value (stored in GinBtree) to tree described by stack
*
* During an index build, buildStats is non-null and the counters
* it contains should be incremented as needed.
* it contains are incremented as needed.
*
* NB: the passed-in stack is freed, as though by freeGinBtreeStack.
*/
@ -290,15 +290,15 @@ ginInsertValue(GinBtree btree, GinBtreeStack *stack, GinStatsData *buildStats)
{
XLogRecData *rdata;
BlockNumber savedRightLink;
bool fit;
page = BufferGetPage(stack->buffer);
savedRightLink = GinPageGetOpaque(page)->rightlink;
if (btree->isEnoughSpace(btree, stack->buffer, stack->off))
START_CRIT_SECTION();
fit = btree->placeToPage(btree, stack->buffer, stack->off, &rdata);
if (fit)
{
START_CRIT_SECTION();
btree->placeToPage(btree, stack->buffer, stack->off, &rdata);
MarkBufferDirty(stack->buffer);
if (RelationNeedsWAL(btree->index))
@ -318,12 +318,17 @@ ginInsertValue(GinBtree btree, GinBtreeStack *stack, GinStatsData *buildStats)
}
else
{
Buffer rbuffer = GinNewBuffer(btree->index);
/* Didn't fit, have to split */
Buffer rbuffer;
Page newlpage;
END_CRIT_SECTION();
rbuffer = GinNewBuffer(btree->index);
/*
* newlpage is a pointer to memory page, it doesn't associate with
* buffer, stack->buffer should be untouched
* newlpage is a pointer to memory page, it is not associated with
* a buffer. stack->buffer is not touched yet.
*/
newlpage = btree->splitPage(btree, stack->buffer, rbuffer, stack->off, &rdata);

View File

@ -15,47 +15,9 @@
#include "postgres.h"
#include "access/gin_private.h"
#include "miscadmin.h"
#include "utils/rel.h"
/*
* Merge two ordered arrays of itempointers, eliminating any duplicates.
* Returns the number of items in the result.
* Caller is responsible that there is enough space at *dst.
*/
uint32
ginMergeItemPointers(ItemPointerData *dst,
ItemPointerData *a, uint32 na,
ItemPointerData *b, uint32 nb)
{
ItemPointerData *dptr = dst;
ItemPointerData *aptr = a,
*bptr = b;
while (aptr - a < na && bptr - b < nb)
{
int cmp = ginCompareItemPointers(aptr, bptr);
if (cmp > 0)
*dptr++ = *bptr++;
else if (cmp == 0)
{
/* we want only one copy of the identical items */
*dptr++ = *bptr++;
aptr++;
}
else
*dptr++ = *aptr++;
}
while (aptr - a < na)
*dptr++ = *aptr++;
while (bptr - b < nb)
*dptr++ = *bptr++;
return dptr - dst;
}
/*
* Checks, should we move to right link...
* Compares inserting itemp pointer with right bound of current page
@ -387,9 +349,12 @@ dataPrepareData(GinBtree btree, Page page, OffsetNumber off)
/*
* Places keys to page and fills WAL record. In case leaf page and
* build mode puts all ItemPointers to page.
*
* If none of the keys fit, returns false without modifying the page.
*/
static void
dataPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prdata)
static bool
dataPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off,
XLogRecData **prdata)
{
Page page = BufferGetPage(buf);
int sizeofitem = GinSizeOfDataPageItem(page);
@ -399,6 +364,10 @@ dataPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prda
static XLogRecData rdata[3];
static ginxlogInsert data;
/* quick exit if it doesn't fit */
if (!dataIsEnoughSpace(btree, buf, off))
return false;
*prdata = rdata;
Assert(GinPageIsData(page));
@ -464,6 +433,8 @@ dataPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prda
}
else
GinDataPageAddPostingItem(page, &(btree->pitem), off);
return true;
}
/*
@ -545,8 +516,8 @@ dataSplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogRe
}
/*
* we suppose that during index creation table scaned from begin to end,
* so ItemPointers are monotonically increased..
* we assume that during index creation the table scanned from beginning
* to end, so ItemPointers are in monotonically increasing order.
*/
if (btree->isBuild && GinPageRightMost(lpage))
separator = freeSpace / sizeofitem;
@ -575,15 +546,6 @@ dataSplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogRe
GinPageGetOpaque(rpage)->maxoff = maxoff - separator;
PostingItemSetBlockNumber(&(btree->pitem), BufferGetBlockNumber(lbuf));
if (GinPageIsLeaf(lpage))
btree->pitem.key = *GinDataPageGetItemPointer(lpage,
GinPageGetOpaque(lpage)->maxoff);
else
btree->pitem.key = GinDataPageGetPostingItem(lpage,
GinPageGetOpaque(lpage)->maxoff)->key;
btree->rightblkno = BufferGetBlockNumber(rbuf);
/* set up right bound for left page */
bound = GinDataPageGetRightBound(lpage);
*bound = btree->pitem.key;
@ -613,6 +575,16 @@ dataSplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogRe
rdata[1].len = MAXALIGN(maxoff * sizeofitem);
rdata[1].next = NULL;
/* Prepare a downlink tuple for insertion to the parent */
PostingItemSetBlockNumber(&(btree->pitem), BufferGetBlockNumber(lbuf));
if (GinPageIsLeaf(lpage))
btree->pitem.key = *GinDataPageGetItemPointer(lpage,
GinPageGetOpaque(lpage)->maxoff);
else
btree->pitem.key = GinDataPageGetPostingItem(lpage,
GinPageGetOpaque(lpage)->maxoff)->key;
btree->rightblkno = BufferGetBlockNumber(rbuf);
return lpage;
}
@ -638,6 +610,92 @@ ginDataFillRoot(GinBtree btree, Buffer root, Buffer lbuf, Buffer rbuf)
GinDataPageAddPostingItem(page, &ri, InvalidOffsetNumber);
}
/*
* Creates new posting tree containing the given TIDs. Returns the page
* number of the root of the new posting tree.
*
* items[] must be in sorted order with no duplicates.
*/
BlockNumber
createPostingTree(Relation index, ItemPointerData *items, uint32 nitems,
GinStatsData *buildStats)
{
BlockNumber blkno;
Buffer buffer;
Page page;
int itemsCount;
/* Calculate how many TIDs will fit on first page. */
itemsCount = Min(nitems, GinMaxLeafDataItems);
/*
* Create the root page.
*/
buffer = GinNewBuffer(index);
page = BufferGetPage(buffer);
blkno = BufferGetBlockNumber(buffer);
START_CRIT_SECTION();
GinInitBuffer(buffer, GIN_DATA | GIN_LEAF);
memcpy(GinDataPageGetData(page), items, sizeof(ItemPointerData) * nitems);
GinPageGetOpaque(page)->maxoff = nitems;
MarkBufferDirty(buffer);
if (RelationNeedsWAL(index))
{
XLogRecPtr recptr;
XLogRecData rdata[2];
ginxlogCreatePostingTree data;
data.node = index->rd_node;
data.blkno = blkno;
data.nitem = nitems;
rdata[0].buffer = InvalidBuffer;
rdata[0].data = (char *) &data;
rdata[0].len = sizeof(ginxlogCreatePostingTree);
rdata[0].next = &rdata[1];
rdata[1].buffer = InvalidBuffer;
rdata[1].data = (char *) items;
rdata[1].len = sizeof(ItemPointerData) * itemsCount;
rdata[1].next = NULL;
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_PTREE, rdata);
PageSetLSN(page, recptr);
}
UnlockReleaseBuffer(buffer);
END_CRIT_SECTION();
/* During index build, count the newly-added data page */
if (buildStats)
buildStats->nDataPages++;
/*
* Add any remaining TIDs to the newly-created posting tree.
*/
if (itemsCount < nitems)
{
GinPostingTreeScan *gdi;
gdi = ginPrepareScanPostingTree(index, blkno, FALSE);
gdi->btree.isBuild = (buildStats != NULL);
ginInsertItemPointers(gdi,
items + itemsCount,
nitems - itemsCount,
buildStats);
pfree(gdi);
}
return blkno;
}
void
ginPrepareDataScan(GinBtree btree, Relation index)
{
@ -650,7 +708,6 @@ ginPrepareDataScan(GinBtree btree, Relation index)
btree->findItem = dataLocateLeafItem;
btree->findChildPtr = dataFindChildPtr;
btree->getLeftMostPage = dataGetLeftMostPage;
btree->isEnoughSpace = dataIsEnoughSpace;
btree->placeToPage = dataPlaceToPage;
btree->splitPage = dataSplitPage;
btree->fillRoot = ginDataFillRoot;

View File

@ -486,9 +486,12 @@ entryPreparePage(GinBtree btree, Page page, OffsetNumber off)
/*
* Place tuple on page and fills WAL record
*
* If the tuple doesn't fit, returns false without modifying the page.
*/
static void
entryPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prdata)
static bool
entryPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off,
XLogRecData **prdata)
{
Page page = BufferGetPage(buf);
OffsetNumber placed;
@ -498,6 +501,10 @@ entryPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prd
static XLogRecData rdata[3];
static ginxlogInsert data;
/* quick exit if it doesn't fit */
if (!entryIsEnoughSpace(btree, buf, off))
return false;
*prdata = rdata;
data.updateBlkno = entryPreparePage(btree, page, off);
@ -543,6 +550,8 @@ entryPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prd
rdata[cnt].next = NULL;
btree->entry = NULL;
return true;
}
/*
@ -724,7 +733,6 @@ ginPrepareEntryScan(GinBtree btree, OffsetNumber attnum,
btree->findItem = entryLocateLeafEntry;
btree->findChildPtr = entryFindChildPtr;
btree->getLeftMostPage = entryGetLeftMostPage;
btree->isEnoughSpace = entryIsEnoughSpace;
btree->placeToPage = entryPlaceToPage;
btree->splitPage = entrySplitPage;
btree->fillRoot = ginEntryFillRoot;

View File

@ -35,64 +35,6 @@ typedef struct
BuildAccumulator accum;
} GinBuildState;
/*
* Creates new posting tree with one page, containing the given TIDs.
* Returns the page number (which will be the root of this posting tree).
*
* items[] must be in sorted order with no duplicates.
*/
static BlockNumber
createPostingTree(Relation index, ItemPointerData *items, uint32 nitems)
{
BlockNumber blkno;
Buffer buffer = GinNewBuffer(index);
Page page;
/* Assert that the items[] array will fit on one page */
Assert(nitems <= GinMaxLeafDataItems);
START_CRIT_SECTION();
GinInitBuffer(buffer, GIN_DATA | GIN_LEAF);
page = BufferGetPage(buffer);
blkno = BufferGetBlockNumber(buffer);
memcpy(GinDataPageGetData(page), items, sizeof(ItemPointerData) * nitems);
GinPageGetOpaque(page)->maxoff = nitems;
MarkBufferDirty(buffer);
if (RelationNeedsWAL(index))
{
XLogRecPtr recptr;
XLogRecData rdata[2];
ginxlogCreatePostingTree data;
data.node = index->rd_node;
data.blkno = blkno;
data.nitem = nitems;
rdata[0].buffer = InvalidBuffer;
rdata[0].data = (char *) &data;
rdata[0].len = sizeof(ginxlogCreatePostingTree);
rdata[0].next = &rdata[1];
rdata[1].buffer = InvalidBuffer;
rdata[1].data = (char *) items;
rdata[1].len = sizeof(ItemPointerData) * nitems;
rdata[1].next = NULL;
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_PTREE, rdata);
PageSetLSN(page, recptr);
}
UnlockReleaseBuffer(buffer);
END_CRIT_SECTION();
return blkno;
}
/*
* Adds array of item pointers to tuple's posting list, or
@ -148,11 +90,8 @@ addItemPointersToLeafTuple(GinState *ginstate,
*/
postingRoot = createPostingTree(ginstate->index,
GinGetPosting(old),
GinGetNPosting(old));
/* During index build, count the newly-added data page */
if (buildStats)
buildStats->nDataPages++;
GinGetNPosting(old),
buildStats);
/* Now insert the TIDs-to-be-added into the posting tree */
gdi = ginPrepareScanPostingTree(ginstate->index, postingRoot, FALSE);
@ -186,7 +125,7 @@ buildFreshLeafTuple(GinState *ginstate,
{
IndexTuple res;
/* try to build tuple with room for all the items */
/* try to build a posting list tuple with all the items */
res = GinFormTuple(ginstate, attnum, key, category,
items, nitem, false);
@ -202,32 +141,9 @@ buildFreshLeafTuple(GinState *ginstate,
res = GinFormTuple(ginstate, attnum, key, category, NULL, 0, true);
/*
* Initialize posting tree with as many TIDs as will fit on the first
* page.
* Initialize a new posting tree with the TIDs.
*/
postingRoot = createPostingTree(ginstate->index,
items,
Min(nitem, GinMaxLeafDataItems));
/* During index build, count the newly-added data page */
if (buildStats)
buildStats->nDataPages++;
/* Add any remaining TIDs to the posting tree */
if (nitem > GinMaxLeafDataItems)
{
GinPostingTreeScan *gdi;
gdi = ginPrepareScanPostingTree(ginstate->index, postingRoot, FALSE);
gdi->btree.isBuild = (buildStats != NULL);
ginInsertItemPointers(gdi,
items + GinMaxLeafDataItems,
nitem - GinMaxLeafDataItems,
buildStats);
pfree(gdi);
}
postingRoot = createPostingTree(ginstate->index, items, nitem);
/* And save the root link in the result tuple */
GinSetPostingTree(res, postingRoot);

View File

@ -0,0 +1,56 @@
/*-------------------------------------------------------------------------
*
* ginpostinglist.c
* routines for dealing with posting lists.
*
*
* Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* src/backend/access/gin/ginpostinglist.c
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/gin_private.h"
/*
* Merge two ordered arrays of itempointers, eliminating any duplicates.
* Returns the number of items in the result.
* Caller is responsible that there is enough space at *dst.
*/
uint32
ginMergeItemPointers(ItemPointerData *dst,
ItemPointerData *a, uint32 na,
ItemPointerData *b, uint32 nb)
{
ItemPointerData *dptr = dst;
ItemPointerData *aptr = a,
*bptr = b;
while (aptr - a < na && bptr - b < nb)
{
int cmp = ginCompareItemPointers(aptr, bptr);
if (cmp > 0)
*dptr++ = *bptr++;
else if (cmp == 0)
{
/* we want only one copy of the identical items */
*dptr++ = *bptr++;
aptr++;
}
else
*dptr++ = *aptr++;
}
while (aptr - a < na)
*dptr++ = *aptr++;
while (bptr - b < nb)
*dptr++ = *bptr++;
return dptr - dst;
}

View File

@ -33,23 +33,26 @@ typedef struct
/*
* Cleans array of ItemPointer (removes dead pointers)
* Results are always stored in *cleaned, which will be allocated
* if it's needed. In case of *cleaned!=NULL caller is responsible to
* have allocated enough space. *cleaned and items may point to the same
* memory address.
* Vacuums a list of item pointers. The original size of the list is 'nitem',
* returns the number of items remaining afterwards.
*
* If *cleaned == NULL on entry, the original array is left unmodified; if
* any items are removed, a palloc'd copy of the result is stored in *cleaned.
* Otherwise *cleaned should point to the original array, in which case it's
* modified directly.
*/
static uint32
ginVacuumPostingList(GinVacuumState *gvs, ItemPointerData *items, uint32 nitem, ItemPointerData **cleaned)
static int
ginVacuumPostingList(GinVacuumState *gvs, ItemPointerData *items, int nitem,
ItemPointerData **cleaned)
{
uint32 i,
int i,
j = 0;
Assert(*cleaned == NULL || *cleaned == items);
/*
* just scan over ItemPointer array
*/
for (i = 0; i < nitem; i++)
{
if (gvs->callback(items + i, gvs->callback_state))
@ -385,7 +388,8 @@ typedef struct DataPageDeleteStack
* scans posting tree and deletes empty pages
*/
static bool
ginScanToDelete(GinVacuumState *gvs, BlockNumber blkno, bool isRoot, DataPageDeleteStack *parent, OffsetNumber myoff)
ginScanToDelete(GinVacuumState *gvs, BlockNumber blkno, bool isRoot,
DataPageDeleteStack *parent, OffsetNumber myoff)
{
DataPageDeleteStack *me;
Buffer buffer;
@ -431,15 +435,13 @@ ginScanToDelete(GinVacuumState *gvs, BlockNumber blkno, bool isRoot, DataPageDel
if (GinPageGetOpaque(page)->maxoff < FirstOffsetNumber)
{
/* the page is empty */
if (!(me->leftBlkno == InvalidBlockNumber && GinPageRightMost(page)))
{
/* we never delete right most branch */
Assert(!isRoot);
if (GinPageGetOpaque(page)->maxoff < FirstOffsetNumber)
{
ginDeletePage(gvs, blkno, me->leftBlkno, me->parent->blkno, myoff, me->parent->isRoot);
meDelete = TRUE;
}
ginDeletePage(gvs, blkno, me->leftBlkno, me->parent->blkno, myoff, me->parent->isRoot);
meDelete = TRUE;
}
}
@ -517,11 +519,12 @@ ginVacuumEntryPage(GinVacuumState *gvs, Buffer buffer, BlockNumber *roots, uint3
else if (GinGetNPosting(itup) > 0)
{
/*
* if we already create temporary page, we will make changes in
* place
* if we already created a temporary page, make changes in place
*/
ItemPointerData *cleaned = (tmppage == origpage) ? NULL : GinGetPosting(itup);
uint32 newN = ginVacuumPostingList(gvs, GinGetPosting(itup), GinGetNPosting(itup), &cleaned);
int newN;
newN = ginVacuumPostingList(gvs, GinGetPosting(itup), GinGetNPosting(itup), &cleaned);
if (GinGetNPosting(itup) != newN)
{
@ -530,15 +533,13 @@ ginVacuumEntryPage(GinVacuumState *gvs, Buffer buffer, BlockNumber *roots, uint3
GinNullCategory category;
/*
* Some ItemPointers was deleted, so we should remake our
* tuple
* Some ItemPointers were deleted, recreate tuple.
*/
if (tmppage == origpage)
{
/*
* On first difference we create temporary page in memory
* and copies content in to it.
* On first difference, create a temporary copy of the
* page and copy the tuple's posting list to it.
*/
tmppage = PageGetTempPageCopy(origpage);

View File

@ -485,8 +485,7 @@ typedef struct GinBtreeData
/* insert methods */
OffsetNumber (*findChildPtr) (GinBtree, Page, BlockNumber, OffsetNumber);
BlockNumber (*getLeftMostPage) (GinBtree, Page);
bool (*isEnoughSpace) (GinBtree, Buffer, OffsetNumber);
void (*placeToPage) (GinBtree, Buffer, OffsetNumber, XLogRecData **);
bool (*placeToPage) (GinBtree, Buffer, OffsetNumber, XLogRecData **);
Page (*splitPage) (GinBtree, Buffer, Buffer, OffsetNumber, XLogRecData **);
void (*fillRoot) (GinBtree, Buffer, Buffer, Buffer);