From ecaa4708e5dde5e9f72cdb066780acb4b12ee0ec Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Wed, 6 Nov 2013 10:31:38 +0200 Subject: [PATCH] Misc GIN refactoring. Merge the isEnoughSpace and placeToPage functions in the b-tree interface into one function that tries to put a tuple on page, and returns false if it doesn't fit. Move createPostingTree function to gindatapage.c, and change its contract so that it can be passed more items than fit on the root page. It's in a better position than the callers to know how many items fit. Move ginMergeItemPointers out of gindatapage.c, into a separate file. These changes make no difference now, but reduce the footprint of Alexander Korotkov's upcoming patch to pack item pointers more tightly. --- src/backend/access/gin/Makefile | 2 +- src/backend/access/gin/README | 2 +- src/backend/access/gin/ginbtree.c | 21 +-- src/backend/access/gin/gindatapage.c | 163 ++++++++++++++++-------- src/backend/access/gin/ginentrypage.c | 14 +- src/backend/access/gin/gininsert.c | 94 +------------- src/backend/access/gin/ginpostinglist.c | 56 ++++++++ src/backend/access/gin/ginvacuum.c | 49 +++---- src/include/access/gin_private.h | 3 +- 9 files changed, 223 insertions(+), 181 deletions(-) create mode 100644 src/backend/access/gin/ginpostinglist.c diff --git a/src/backend/access/gin/Makefile b/src/backend/access/gin/Makefile index 889dde6a27..aabc62ff22 100644 --- a/src/backend/access/gin/Makefile +++ b/src/backend/access/gin/Makefile @@ -14,6 +14,6 @@ include $(top_builddir)/src/Makefile.global OBJS = ginutil.o gininsert.o ginxlog.o ginentrypage.o gindatapage.o \ ginbtree.o ginscan.o ginget.o ginvacuum.o ginarrayproc.o \ - ginbulk.o ginfast.o + ginbulk.o ginfast.o ginpostinglist.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/access/gin/README b/src/backend/access/gin/README index 67159d8529..434d398bf7 100644 --- a/src/backend/access/gin/README +++ b/src/backend/access/gin/README @@ -104,7 +104,7 @@ a few thousand entries can be much faster than retail insertion. (The win comes mainly from not having to do multiple searches/insertions when the same key appears in multiple new heap tuples.) -Key entries are nominally of the same IndexEntry format as used in other +Key entries are nominally of the same IndexTuple format as used in other index types, but since a leaf key entry typically refers to multiple heap tuples, there are significant differences. (See GinFormTuple, which works by building a "normal" index tuple and then modifying it.) The points to diff --git a/src/backend/access/gin/ginbtree.c b/src/backend/access/gin/ginbtree.c index 2a6be4b1a9..f032faa22e 100644 --- a/src/backend/access/gin/ginbtree.c +++ b/src/backend/access/gin/ginbtree.c @@ -264,7 +264,7 @@ ginFindParents(GinBtree btree, GinBtreeStack *stack, * Insert value (stored in GinBtree) to tree described by stack * * During an index build, buildStats is non-null and the counters - * it contains should be incremented as needed. + * it contains are incremented as needed. * * NB: the passed-in stack is freed, as though by freeGinBtreeStack. */ @@ -290,15 +290,15 @@ ginInsertValue(GinBtree btree, GinBtreeStack *stack, GinStatsData *buildStats) { XLogRecData *rdata; BlockNumber savedRightLink; + bool fit; page = BufferGetPage(stack->buffer); savedRightLink = GinPageGetOpaque(page)->rightlink; - if (btree->isEnoughSpace(btree, stack->buffer, stack->off)) + START_CRIT_SECTION(); + fit = btree->placeToPage(btree, stack->buffer, stack->off, &rdata); + if (fit) { - START_CRIT_SECTION(); - btree->placeToPage(btree, stack->buffer, stack->off, &rdata); - MarkBufferDirty(stack->buffer); if (RelationNeedsWAL(btree->index)) @@ -318,12 +318,17 @@ ginInsertValue(GinBtree btree, GinBtreeStack *stack, GinStatsData *buildStats) } else { - Buffer rbuffer = GinNewBuffer(btree->index); + /* Didn't fit, have to split */ + Buffer rbuffer; Page newlpage; + END_CRIT_SECTION(); + + rbuffer = GinNewBuffer(btree->index); + /* - * newlpage is a pointer to memory page, it doesn't associate with - * buffer, stack->buffer should be untouched + * newlpage is a pointer to memory page, it is not associated with + * a buffer. stack->buffer is not touched yet. */ newlpage = btree->splitPage(btree, stack->buffer, rbuffer, stack->off, &rdata); diff --git a/src/backend/access/gin/gindatapage.c b/src/backend/access/gin/gindatapage.c index 2c6447d7fa..d67e50555c 100644 --- a/src/backend/access/gin/gindatapage.c +++ b/src/backend/access/gin/gindatapage.c @@ -15,47 +15,9 @@ #include "postgres.h" #include "access/gin_private.h" +#include "miscadmin.h" #include "utils/rel.h" -/* - * Merge two ordered arrays of itempointers, eliminating any duplicates. - * Returns the number of items in the result. - * Caller is responsible that there is enough space at *dst. - */ -uint32 -ginMergeItemPointers(ItemPointerData *dst, - ItemPointerData *a, uint32 na, - ItemPointerData *b, uint32 nb) -{ - ItemPointerData *dptr = dst; - ItemPointerData *aptr = a, - *bptr = b; - - while (aptr - a < na && bptr - b < nb) - { - int cmp = ginCompareItemPointers(aptr, bptr); - - if (cmp > 0) - *dptr++ = *bptr++; - else if (cmp == 0) - { - /* we want only one copy of the identical items */ - *dptr++ = *bptr++; - aptr++; - } - else - *dptr++ = *aptr++; - } - - while (aptr - a < na) - *dptr++ = *aptr++; - - while (bptr - b < nb) - *dptr++ = *bptr++; - - return dptr - dst; -} - /* * Checks, should we move to right link... * Compares inserting itemp pointer with right bound of current page @@ -387,9 +349,12 @@ dataPrepareData(GinBtree btree, Page page, OffsetNumber off) /* * Places keys to page and fills WAL record. In case leaf page and * build mode puts all ItemPointers to page. + * + * If none of the keys fit, returns false without modifying the page. */ -static void -dataPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prdata) +static bool +dataPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, + XLogRecData **prdata) { Page page = BufferGetPage(buf); int sizeofitem = GinSizeOfDataPageItem(page); @@ -399,6 +364,10 @@ dataPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prda static XLogRecData rdata[3]; static ginxlogInsert data; + /* quick exit if it doesn't fit */ + if (!dataIsEnoughSpace(btree, buf, off)) + return false; + *prdata = rdata; Assert(GinPageIsData(page)); @@ -464,6 +433,8 @@ dataPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prda } else GinDataPageAddPostingItem(page, &(btree->pitem), off); + + return true; } /* @@ -545,8 +516,8 @@ dataSplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogRe } /* - * we suppose that during index creation table scaned from begin to end, - * so ItemPointers are monotonically increased.. + * we assume that during index creation the table scanned from beginning + * to end, so ItemPointers are in monotonically increasing order. */ if (btree->isBuild && GinPageRightMost(lpage)) separator = freeSpace / sizeofitem; @@ -575,15 +546,6 @@ dataSplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogRe GinPageGetOpaque(rpage)->maxoff = maxoff - separator; - PostingItemSetBlockNumber(&(btree->pitem), BufferGetBlockNumber(lbuf)); - if (GinPageIsLeaf(lpage)) - btree->pitem.key = *GinDataPageGetItemPointer(lpage, - GinPageGetOpaque(lpage)->maxoff); - else - btree->pitem.key = GinDataPageGetPostingItem(lpage, - GinPageGetOpaque(lpage)->maxoff)->key; - btree->rightblkno = BufferGetBlockNumber(rbuf); - /* set up right bound for left page */ bound = GinDataPageGetRightBound(lpage); *bound = btree->pitem.key; @@ -613,6 +575,16 @@ dataSplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogRe rdata[1].len = MAXALIGN(maxoff * sizeofitem); rdata[1].next = NULL; + /* Prepare a downlink tuple for insertion to the parent */ + PostingItemSetBlockNumber(&(btree->pitem), BufferGetBlockNumber(lbuf)); + if (GinPageIsLeaf(lpage)) + btree->pitem.key = *GinDataPageGetItemPointer(lpage, + GinPageGetOpaque(lpage)->maxoff); + else + btree->pitem.key = GinDataPageGetPostingItem(lpage, + GinPageGetOpaque(lpage)->maxoff)->key; + btree->rightblkno = BufferGetBlockNumber(rbuf); + return lpage; } @@ -638,6 +610,92 @@ ginDataFillRoot(GinBtree btree, Buffer root, Buffer lbuf, Buffer rbuf) GinDataPageAddPostingItem(page, &ri, InvalidOffsetNumber); } +/* + * Creates new posting tree containing the given TIDs. Returns the page + * number of the root of the new posting tree. + * + * items[] must be in sorted order with no duplicates. + */ +BlockNumber +createPostingTree(Relation index, ItemPointerData *items, uint32 nitems, + GinStatsData *buildStats) +{ + BlockNumber blkno; + Buffer buffer; + Page page; + int itemsCount; + + /* Calculate how many TIDs will fit on first page. */ + itemsCount = Min(nitems, GinMaxLeafDataItems); + + /* + * Create the root page. + */ + buffer = GinNewBuffer(index); + page = BufferGetPage(buffer); + blkno = BufferGetBlockNumber(buffer); + + START_CRIT_SECTION(); + + GinInitBuffer(buffer, GIN_DATA | GIN_LEAF); + memcpy(GinDataPageGetData(page), items, sizeof(ItemPointerData) * nitems); + GinPageGetOpaque(page)->maxoff = nitems; + + MarkBufferDirty(buffer); + + if (RelationNeedsWAL(index)) + { + XLogRecPtr recptr; + XLogRecData rdata[2]; + ginxlogCreatePostingTree data; + + data.node = index->rd_node; + data.blkno = blkno; + data.nitem = nitems; + + rdata[0].buffer = InvalidBuffer; + rdata[0].data = (char *) &data; + rdata[0].len = sizeof(ginxlogCreatePostingTree); + rdata[0].next = &rdata[1]; + + rdata[1].buffer = InvalidBuffer; + rdata[1].data = (char *) items; + rdata[1].len = sizeof(ItemPointerData) * itemsCount; + rdata[1].next = NULL; + + recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_PTREE, rdata); + PageSetLSN(page, recptr); + } + + UnlockReleaseBuffer(buffer); + + END_CRIT_SECTION(); + + /* During index build, count the newly-added data page */ + if (buildStats) + buildStats->nDataPages++; + + /* + * Add any remaining TIDs to the newly-created posting tree. + */ + if (itemsCount < nitems) + { + GinPostingTreeScan *gdi; + + gdi = ginPrepareScanPostingTree(index, blkno, FALSE); + gdi->btree.isBuild = (buildStats != NULL); + + ginInsertItemPointers(gdi, + items + itemsCount, + nitems - itemsCount, + buildStats); + + pfree(gdi); + } + + return blkno; +} + void ginPrepareDataScan(GinBtree btree, Relation index) { @@ -650,7 +708,6 @@ ginPrepareDataScan(GinBtree btree, Relation index) btree->findItem = dataLocateLeafItem; btree->findChildPtr = dataFindChildPtr; btree->getLeftMostPage = dataGetLeftMostPage; - btree->isEnoughSpace = dataIsEnoughSpace; btree->placeToPage = dataPlaceToPage; btree->splitPage = dataSplitPage; btree->fillRoot = ginDataFillRoot; diff --git a/src/backend/access/gin/ginentrypage.c b/src/backend/access/gin/ginentrypage.c index 7733028fba..0ed0a3db7e 100644 --- a/src/backend/access/gin/ginentrypage.c +++ b/src/backend/access/gin/ginentrypage.c @@ -486,9 +486,12 @@ entryPreparePage(GinBtree btree, Page page, OffsetNumber off) /* * Place tuple on page and fills WAL record + * + * If the tuple doesn't fit, returns false without modifying the page. */ -static void -entryPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prdata) +static bool +entryPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, + XLogRecData **prdata) { Page page = BufferGetPage(buf); OffsetNumber placed; @@ -498,6 +501,10 @@ entryPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prd static XLogRecData rdata[3]; static ginxlogInsert data; + /* quick exit if it doesn't fit */ + if (!entryIsEnoughSpace(btree, buf, off)) + return false; + *prdata = rdata; data.updateBlkno = entryPreparePage(btree, page, off); @@ -543,6 +550,8 @@ entryPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prd rdata[cnt].next = NULL; btree->entry = NULL; + + return true; } /* @@ -724,7 +733,6 @@ ginPrepareEntryScan(GinBtree btree, OffsetNumber attnum, btree->findItem = entryLocateLeafEntry; btree->findChildPtr = entryFindChildPtr; btree->getLeftMostPage = entryGetLeftMostPage; - btree->isEnoughSpace = entryIsEnoughSpace; btree->placeToPage = entryPlaceToPage; btree->splitPage = entrySplitPage; btree->fillRoot = ginEntryFillRoot; diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c index beaa65317f..125f3fb12d 100644 --- a/src/backend/access/gin/gininsert.c +++ b/src/backend/access/gin/gininsert.c @@ -35,64 +35,6 @@ typedef struct BuildAccumulator accum; } GinBuildState; -/* - * Creates new posting tree with one page, containing the given TIDs. - * Returns the page number (which will be the root of this posting tree). - * - * items[] must be in sorted order with no duplicates. - */ -static BlockNumber -createPostingTree(Relation index, ItemPointerData *items, uint32 nitems) -{ - BlockNumber blkno; - Buffer buffer = GinNewBuffer(index); - Page page; - - /* Assert that the items[] array will fit on one page */ - Assert(nitems <= GinMaxLeafDataItems); - - START_CRIT_SECTION(); - - GinInitBuffer(buffer, GIN_DATA | GIN_LEAF); - page = BufferGetPage(buffer); - blkno = BufferGetBlockNumber(buffer); - - memcpy(GinDataPageGetData(page), items, sizeof(ItemPointerData) * nitems); - GinPageGetOpaque(page)->maxoff = nitems; - - MarkBufferDirty(buffer); - - if (RelationNeedsWAL(index)) - { - XLogRecPtr recptr; - XLogRecData rdata[2]; - ginxlogCreatePostingTree data; - - data.node = index->rd_node; - data.blkno = blkno; - data.nitem = nitems; - - rdata[0].buffer = InvalidBuffer; - rdata[0].data = (char *) &data; - rdata[0].len = sizeof(ginxlogCreatePostingTree); - rdata[0].next = &rdata[1]; - - rdata[1].buffer = InvalidBuffer; - rdata[1].data = (char *) items; - rdata[1].len = sizeof(ItemPointerData) * nitems; - rdata[1].next = NULL; - - recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_PTREE, rdata); - PageSetLSN(page, recptr); - } - - UnlockReleaseBuffer(buffer); - - END_CRIT_SECTION(); - - return blkno; -} - /* * Adds array of item pointers to tuple's posting list, or @@ -148,11 +90,8 @@ addItemPointersToLeafTuple(GinState *ginstate, */ postingRoot = createPostingTree(ginstate->index, GinGetPosting(old), - GinGetNPosting(old)); - - /* During index build, count the newly-added data page */ - if (buildStats) - buildStats->nDataPages++; + GinGetNPosting(old), + buildStats); /* Now insert the TIDs-to-be-added into the posting tree */ gdi = ginPrepareScanPostingTree(ginstate->index, postingRoot, FALSE); @@ -186,7 +125,7 @@ buildFreshLeafTuple(GinState *ginstate, { IndexTuple res; - /* try to build tuple with room for all the items */ + /* try to build a posting list tuple with all the items */ res = GinFormTuple(ginstate, attnum, key, category, items, nitem, false); @@ -202,32 +141,9 @@ buildFreshLeafTuple(GinState *ginstate, res = GinFormTuple(ginstate, attnum, key, category, NULL, 0, true); /* - * Initialize posting tree with as many TIDs as will fit on the first - * page. + * Initialize a new posting tree with the TIDs. */ - postingRoot = createPostingTree(ginstate->index, - items, - Min(nitem, GinMaxLeafDataItems)); - - /* During index build, count the newly-added data page */ - if (buildStats) - buildStats->nDataPages++; - - /* Add any remaining TIDs to the posting tree */ - if (nitem > GinMaxLeafDataItems) - { - GinPostingTreeScan *gdi; - - gdi = ginPrepareScanPostingTree(ginstate->index, postingRoot, FALSE); - gdi->btree.isBuild = (buildStats != NULL); - - ginInsertItemPointers(gdi, - items + GinMaxLeafDataItems, - nitem - GinMaxLeafDataItems, - buildStats); - - pfree(gdi); - } + postingRoot = createPostingTree(ginstate->index, items, nitem); /* And save the root link in the result tuple */ GinSetPostingTree(res, postingRoot); diff --git a/src/backend/access/gin/ginpostinglist.c b/src/backend/access/gin/ginpostinglist.c new file mode 100644 index 0000000000..e5a15bf2a9 --- /dev/null +++ b/src/backend/access/gin/ginpostinglist.c @@ -0,0 +1,56 @@ +/*------------------------------------------------------------------------- + * + * ginpostinglist.c + * routines for dealing with posting lists. + * + * + * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/access/gin/ginpostinglist.c + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/gin_private.h" + +/* + * Merge two ordered arrays of itempointers, eliminating any duplicates. + * Returns the number of items in the result. + * Caller is responsible that there is enough space at *dst. + */ +uint32 +ginMergeItemPointers(ItemPointerData *dst, + ItemPointerData *a, uint32 na, + ItemPointerData *b, uint32 nb) +{ + ItemPointerData *dptr = dst; + ItemPointerData *aptr = a, + *bptr = b; + + while (aptr - a < na && bptr - b < nb) + { + int cmp = ginCompareItemPointers(aptr, bptr); + + if (cmp > 0) + *dptr++ = *bptr++; + else if (cmp == 0) + { + /* we want only one copy of the identical items */ + *dptr++ = *bptr++; + aptr++; + } + else + *dptr++ = *aptr++; + } + + while (aptr - a < na) + *dptr++ = *aptr++; + + while (bptr - b < nb) + *dptr++ = *bptr++; + + return dptr - dst; +} diff --git a/src/backend/access/gin/ginvacuum.c b/src/backend/access/gin/ginvacuum.c index bda9c60279..7b2c39965a 100644 --- a/src/backend/access/gin/ginvacuum.c +++ b/src/backend/access/gin/ginvacuum.c @@ -33,23 +33,26 @@ typedef struct /* - * Cleans array of ItemPointer (removes dead pointers) - * Results are always stored in *cleaned, which will be allocated - * if it's needed. In case of *cleaned!=NULL caller is responsible to - * have allocated enough space. *cleaned and items may point to the same - * memory address. + * Vacuums a list of item pointers. The original size of the list is 'nitem', + * returns the number of items remaining afterwards. + * + * If *cleaned == NULL on entry, the original array is left unmodified; if + * any items are removed, a palloc'd copy of the result is stored in *cleaned. + * Otherwise *cleaned should point to the original array, in which case it's + * modified directly. */ - -static uint32 -ginVacuumPostingList(GinVacuumState *gvs, ItemPointerData *items, uint32 nitem, ItemPointerData **cleaned) +static int +ginVacuumPostingList(GinVacuumState *gvs, ItemPointerData *items, int nitem, + ItemPointerData **cleaned) { - uint32 i, + int i, j = 0; + Assert(*cleaned == NULL || *cleaned == items); + /* * just scan over ItemPointer array */ - for (i = 0; i < nitem; i++) { if (gvs->callback(items + i, gvs->callback_state)) @@ -385,7 +388,8 @@ typedef struct DataPageDeleteStack * scans posting tree and deletes empty pages */ static bool -ginScanToDelete(GinVacuumState *gvs, BlockNumber blkno, bool isRoot, DataPageDeleteStack *parent, OffsetNumber myoff) +ginScanToDelete(GinVacuumState *gvs, BlockNumber blkno, bool isRoot, + DataPageDeleteStack *parent, OffsetNumber myoff) { DataPageDeleteStack *me; Buffer buffer; @@ -431,15 +435,13 @@ ginScanToDelete(GinVacuumState *gvs, BlockNumber blkno, bool isRoot, DataPageDel if (GinPageGetOpaque(page)->maxoff < FirstOffsetNumber) { + /* the page is empty */ if (!(me->leftBlkno == InvalidBlockNumber && GinPageRightMost(page))) { /* we never delete right most branch */ Assert(!isRoot); - if (GinPageGetOpaque(page)->maxoff < FirstOffsetNumber) - { - ginDeletePage(gvs, blkno, me->leftBlkno, me->parent->blkno, myoff, me->parent->isRoot); - meDelete = TRUE; - } + ginDeletePage(gvs, blkno, me->leftBlkno, me->parent->blkno, myoff, me->parent->isRoot); + meDelete = TRUE; } } @@ -517,11 +519,12 @@ ginVacuumEntryPage(GinVacuumState *gvs, Buffer buffer, BlockNumber *roots, uint3 else if (GinGetNPosting(itup) > 0) { /* - * if we already create temporary page, we will make changes in - * place + * if we already created a temporary page, make changes in place */ ItemPointerData *cleaned = (tmppage == origpage) ? NULL : GinGetPosting(itup); - uint32 newN = ginVacuumPostingList(gvs, GinGetPosting(itup), GinGetNPosting(itup), &cleaned); + int newN; + + newN = ginVacuumPostingList(gvs, GinGetPosting(itup), GinGetNPosting(itup), &cleaned); if (GinGetNPosting(itup) != newN) { @@ -530,15 +533,13 @@ ginVacuumEntryPage(GinVacuumState *gvs, Buffer buffer, BlockNumber *roots, uint3 GinNullCategory category; /* - * Some ItemPointers was deleted, so we should remake our - * tuple + * Some ItemPointers were deleted, recreate tuple. */ - if (tmppage == origpage) { /* - * On first difference we create temporary page in memory - * and copies content in to it. + * On first difference, create a temporary copy of the + * page and copy the tuple's posting list to it. */ tmppage = PageGetTempPageCopy(origpage); diff --git a/src/include/access/gin_private.h b/src/include/access/gin_private.h index 7af3a870c7..c1462ee2fe 100644 --- a/src/include/access/gin_private.h +++ b/src/include/access/gin_private.h @@ -485,8 +485,7 @@ typedef struct GinBtreeData /* insert methods */ OffsetNumber (*findChildPtr) (GinBtree, Page, BlockNumber, OffsetNumber); BlockNumber (*getLeftMostPage) (GinBtree, Page); - bool (*isEnoughSpace) (GinBtree, Buffer, OffsetNumber); - void (*placeToPage) (GinBtree, Buffer, OffsetNumber, XLogRecData **); + bool (*placeToPage) (GinBtree, Buffer, OffsetNumber, XLogRecData **); Page (*splitPage) (GinBtree, Buffer, Buffer, OffsetNumber, XLogRecData **); void (*fillRoot) (GinBtree, Buffer, Buffer, Buffer);