Set pd_lower on internal GIN posting tree pages.

This allows squeezing out the unused space in full-page writes. And more
importantly, it can be a useful debugging aid.

In hindsight we should've done this back when GIN was added - we wouldn't
need the 'maxoff' field in the page opaque struct if we had used pd_lower
and pd_upper like on normal pages. But as long as there can be pages in the
index that have been binary-upgraded from pre-9.4 versions, we can't rely
on that, and have to continue using 'maxoff'.

Most of the code churn comes from renaming some macros, now that they're
used on internal pages, too.

This change is completely backwards-compatible, no effect on pg_upgrade.
This commit is contained in:
Heikki Linnakangas 2014-04-14 21:03:01 +03:00
parent 69671ab548
commit f1dadd34fa
4 changed files with 79 additions and 42 deletions

View File

@ -390,7 +390,15 @@ GinDataPageAddPostingItem(Page page, PostingItem *data, OffsetNumber offset)
}
memcpy(ptr, data, sizeof(PostingItem));
GinPageGetOpaque(page)->maxoff++;
maxoff++;
GinPageGetOpaque(page)->maxoff = maxoff;
/*
* Also set pd_lower to the end of the posting items, to follow the
* "standard" page layout, so that we can squeeze out the unused space
* from full-page images.
*/
GinDataPageSetDataSize(page, maxoff * sizeof(PostingItem));
}
/*
@ -409,7 +417,10 @@ GinPageDeletePostingItem(Page page, OffsetNumber offset)
GinDataPageGetPostingItem(page, offset + 1),
sizeof(PostingItem) * (maxoff - offset));
GinPageGetOpaque(page)->maxoff--;
maxoff--;
GinPageGetOpaque(page)->maxoff = maxoff;
GinDataPageSetDataSize(page, maxoff * sizeof(PostingItem));
}
/*
@ -520,7 +531,7 @@ dataPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
* a single byte, and we can use all the free space on the old page as
* well as the new page. For simplicity, ignore segment overhead etc.
*/
maxitems = Min(maxitems, freespace + GinDataLeafMaxContentSize);
maxitems = Min(maxitems, freespace + GinDataPageMaxDataSize);
}
else
{
@ -535,7 +546,7 @@ dataPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
int nnewsegments;
nnewsegments = freespace / GinPostingListSegmentMaxSize;
nnewsegments += GinDataLeafMaxContentSize / GinPostingListSegmentMaxSize;
nnewsegments += GinDataPageMaxDataSize / GinPostingListSegmentMaxSize;
maxitems = Min(maxitems, nnewsegments * MinTuplesPerSegment);
}
@ -648,8 +659,8 @@ dataPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
leaf->lastleft = dlist_prev_node(&leaf->segments, leaf->lastleft);
}
}
Assert(leaf->lsize <= GinDataLeafMaxContentSize);
Assert(leaf->rsize <= GinDataLeafMaxContentSize);
Assert(leaf->lsize <= GinDataPageMaxDataSize);
Assert(leaf->rsize <= GinDataPageMaxDataSize);
/*
* Fetch the max item in the left page's last segment; it becomes the
@ -716,7 +727,7 @@ ginVacuumPostingTreeLeaf(Relation indexrel, Buffer buffer, GinVacuumState *gvs)
if (seginfo->seg)
oldsegsize = SizeOfGinPostingList(seginfo->seg);
else
oldsegsize = GinDataLeafMaxContentSize;
oldsegsize = GinDataPageMaxDataSize;
cleaned = ginVacuumItemPointers(gvs,
seginfo->items,
@ -987,8 +998,8 @@ dataPlaceToPageLeafRecompress(Buffer buf, disassembledLeaf *leaf)
}
}
Assert(newsize <= GinDataLeafMaxContentSize);
GinDataLeafPageSetPostingListSize(page, newsize);
Assert(newsize <= GinDataPageMaxDataSize);
GinDataPageSetDataSize(page, newsize);
}
/*
@ -1043,7 +1054,7 @@ dataPlaceToPageLeafSplit(Buffer buf, disassembledLeaf *leaf,
}
}
Assert(lsize == leaf->lsize);
GinDataLeafPageSetPostingListSize(lpage, lsize);
GinDataPageSetDataSize(lpage, lsize);
*GinDataPageGetRightBound(lpage) = lbound;
/* Copy the segments that go to the right page */
@ -1067,7 +1078,7 @@ dataPlaceToPageLeafSplit(Buffer buf, disassembledLeaf *leaf,
break;
}
Assert(rsize == leaf->rsize);
GinDataLeafPageSetPostingListSize(rpage, rsize);
GinDataPageSetDataSize(rpage, rsize);
*GinDataPageGetRightBound(rpage) = rbound;
/* Create WAL record */
@ -1139,7 +1150,7 @@ dataPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
data.newitem = *pitem;
rdata.buffer = buf;
rdata.buffer_std = false;
rdata.buffer_std = TRUE;
rdata.data = (char *) &data;
rdata.len = sizeof(ginxlogInsertDataInternal);
rdata.next = NULL;
@ -1183,6 +1194,8 @@ dataSplitPageInternal(GinBtree btree, Buffer origbuf,
Page oldpage = BufferGetPage(origbuf);
OffsetNumber off = stack->off;
int nitems = GinPageGetOpaque(oldpage)->maxoff;
int nleftitems;
int nrightitems;
Size pageSize = PageGetPageSize(oldpage);
ItemPointerData oldbound = *GinDataPageGetRightBound(oldpage);
ItemPointer bound;
@ -1226,17 +1239,27 @@ dataSplitPageInternal(GinBtree btree, Buffer origbuf,
separator = GinNonLeafDataPageGetFreeSpace(rpage) / sizeof(PostingItem);
else
separator = nitems / 2;
nleftitems = separator;
nrightitems = nitems - separator;
memcpy(GinDataPageGetPostingItem(lpage, FirstOffsetNumber), allitems, separator * sizeof(PostingItem));
GinPageGetOpaque(lpage)->maxoff = separator;
memcpy(GinDataPageGetPostingItem(lpage, FirstOffsetNumber),
allitems,
nleftitems * sizeof(PostingItem));
GinPageGetOpaque(lpage)->maxoff = nleftitems;
memcpy(GinDataPageGetPostingItem(rpage, FirstOffsetNumber),
&allitems[separator], (nitems - separator) * sizeof(PostingItem));
GinPageGetOpaque(rpage)->maxoff = nitems - separator;
&allitems[separator],
nrightitems * sizeof(PostingItem));
GinPageGetOpaque(rpage)->maxoff = nrightitems;
/*
* Also set pd_lower for both pages, like GinDataPageAddPostingItem does.
*/
GinDataPageSetDataSize(lpage, nleftitems * sizeof(PostingItem));
GinDataPageSetDataSize(rpage, nrightitems * sizeof(PostingItem));
/* set up right bound for left page */
bound = GinDataPageGetRightBound(lpage);
*bound = GinDataPageGetPostingItem(lpage,
GinPageGetOpaque(lpage)->maxoff)->key;
*bound = GinDataPageGetPostingItem(lpage, nleftitems)->key;
/* set up right bound for right page */
*GinDataPageGetRightBound(rpage) = oldbound;
@ -1619,7 +1642,7 @@ leafRepackItems(disassembledLeaf *leaf, ItemPointer remaining)
* copying to the page. Did we exceed the size that fits on one page?
*/
segsize = SizeOfGinPostingList(seginfo->seg);
if (pgused + segsize > GinDataLeafMaxContentSize)
if (pgused + segsize > GinDataPageMaxDataSize)
{
if (!needsplit)
{
@ -1659,8 +1682,8 @@ leafRepackItems(disassembledLeaf *leaf, ItemPointer remaining)
else
leaf->rsize = pgused;
Assert(leaf->lsize <= GinDataLeafMaxContentSize);
Assert(leaf->rsize <= GinDataLeafMaxContentSize);
Assert(leaf->lsize <= GinDataPageMaxDataSize);
Assert(leaf->rsize <= GinDataPageMaxDataSize);
/*
* Make a palloc'd copy of every segment after the first modified one,
@ -1735,7 +1758,7 @@ createPostingTree(Relation index, ItemPointerData *items, uint32 nitems,
GinPostingListSegmentMaxSize,
&npacked);
segsize = SizeOfGinPostingList(segment);
if (rootsize + segsize > GinDataLeafMaxContentSize)
if (rootsize + segsize > GinDataPageMaxDataSize)
break;
memcpy(ptr, segment, segsize);
@ -1744,7 +1767,7 @@ createPostingTree(Relation index, ItemPointerData *items, uint32 nitems,
nrootitems += npacked;
pfree(segment);
}
GinDataLeafPageSetPostingListSize(tmppage, rootsize);
GinDataPageSetDataSize(tmppage, rootsize);
/*
* All set. Get a new physical page, and copy the in-memory page to it.

View File

@ -301,6 +301,13 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn
data.leftBlkno = leftBlkno;
data.rightLink = GinPageGetOpaque(page)->rightlink;
/*
* We can't pass buffer_std = TRUE, because we didn't set pd_lower
* on pre-9.4 versions. The page might've been binary-upgraded from
* an older version, and hence not have pd_lower set correctly.
* Ditto for the left page, but removing the item from the parent
* updated its pd_lower, so we know that's OK at this point.
*/
rdata[0].buffer = dBuffer;
rdata[0].buffer_std = FALSE;
rdata[0].data = NULL;
@ -308,7 +315,7 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn
rdata[0].next = rdata + 1;
rdata[1].buffer = pBuffer;
rdata[1].buffer_std = FALSE;
rdata[1].buffer_std = TRUE;
rdata[1].data = NULL;
rdata[1].len = 0;
rdata[1].next = rdata + 2;

View File

@ -96,7 +96,7 @@ ginRedoCreatePTree(XLogRecPtr lsn, XLogRecord *record)
/* Place page data */
memcpy(GinDataLeafPageGetPostingList(page), ptr, data->size);
GinDataLeafPageSetPostingListSize(page, data->size);
GinDataPageSetDataSize(page, data->size);
PageSetLSN(page, lsn);
@ -169,7 +169,7 @@ ginRedoRecompress(Page page, ginxlogRecompressDataLeaf *data)
totalsize = SizeOfGinPostingList(plist);
memcpy(GinDataLeafPageGetPostingList(page), plist, totalsize);
GinDataLeafPageSetPostingListSize(page, totalsize);
GinDataPageSetDataSize(page, totalsize);
GinPageSetCompressed(page);
GinPageGetOpaque(page)->maxoff = InvalidOffsetNumber;
}
@ -296,7 +296,7 @@ ginRedoRecompress(Page page, ginxlogRecompressDataLeaf *data)
}
totalsize = segmentend - (Pointer) GinDataLeafPageGetPostingList(page);
GinDataLeafPageSetPostingListSize(page, totalsize);
GinDataPageSetDataSize(page, totalsize);
}
static void
@ -423,14 +423,14 @@ ginRedoSplitData(Page lpage, Page rpage, void *rdata)
Pointer lptr = (Pointer) rdata + sizeof(ginxlogSplitDataLeaf);
Pointer rptr = lptr + data->lsize;
Assert(data->lsize > 0 && data->lsize <= GinDataLeafMaxContentSize);
Assert(data->rsize > 0 && data->rsize <= GinDataLeafMaxContentSize);
Assert(data->lsize > 0 && data->lsize <= GinDataPageMaxDataSize);
Assert(data->rsize > 0 && data->rsize <= GinDataPageMaxDataSize);
memcpy(GinDataLeafPageGetPostingList(lpage), lptr, data->lsize);
memcpy(GinDataLeafPageGetPostingList(rpage), rptr, data->rsize);
GinDataLeafPageSetPostingListSize(lpage, data->lsize);
GinDataLeafPageSetPostingListSize(rpage, data->rsize);
GinDataPageSetDataSize(lpage, data->lsize);
GinDataPageSetDataSize(rpage, data->rsize);
*GinDataPageGetRightBound(lpage) = data->lrightbound;
*GinDataPageGetRightBound(rpage) = data->rrightbound;
}

View File

@ -257,11 +257,6 @@ typedef signed char GinNullCategory;
(GinPostingList *) ((PageGetContents(page) + MAXALIGN(sizeof(ItemPointerData))))
#define GinDataLeafPageGetPostingListSize(page) \
(((PageHeader) page)->pd_lower - MAXALIGN(SizeOfPageHeaderData) - MAXALIGN(sizeof(ItemPointerData)))
#define GinDataLeafPageSetPostingListSize(page, size) \
{ \
Assert(size <= GinDataLeafMaxContentSize); \
((PageHeader) page)->pd_lower = (size) + MAXALIGN(SizeOfPageHeaderData) + MAXALIGN(sizeof(ItemPointerData)); \
}
#define GinDataLeafPageIsEmpty(page) \
(GinPageIsCompressed(page) ? (GinDataLeafPageGetPostingListSize(page) == 0) : (GinPageGetOpaque(page)->maxoff < FirstOffsetNumber))
@ -281,13 +276,25 @@ typedef signed char GinNullCategory;
#define GinDataPageGetPostingItem(page, i) \
((PostingItem *) (GinDataPageGetData(page) + ((i)-1) * sizeof(PostingItem)))
#define GinNonLeafDataPageGetFreeSpace(page) \
(BLCKSZ - MAXALIGN(SizeOfPageHeaderData) \
- MAXALIGN(sizeof(ItemPointerData)) \
- GinPageGetOpaque(page)->maxoff * sizeof(PostingItem) \
- MAXALIGN(sizeof(GinPageOpaqueData)))
/*
* Note: there is no GinDataPageGetDataSize macro, because before version
* 9.4, we didn't set pd_lower on data pages. There can be pages in the index
* that were binary-upgraded from earlier versions and still have an invalid
* pd_lower, so we cannot trust it in general. Compressed posting tree leaf
* pages are new in 9.4, however, so we can trust them; see
* GinDataLeafPageGetPostingListSize.
*/
#define GinDataPageSetDataSize(page, size) \
{ \
Assert(size <= GinDataPageMaxDataSize); \
((PageHeader) page)->pd_lower = (size) + MAXALIGN(SizeOfPageHeaderData) + MAXALIGN(sizeof(ItemPointerData)); \
}
#define GinDataLeafMaxContentSize \
#define GinNonLeafDataPageGetFreeSpace(page) \
(GinDataPageMaxDataSize - \
GinPageGetOpaque(page)->maxoff * sizeof(PostingItem))
#define GinDataPageMaxDataSize \
(BLCKSZ - MAXALIGN(SizeOfPageHeaderData) \
- MAXALIGN(sizeof(ItemPointerData)) \
- MAXALIGN(sizeof(GinPageOpaqueData)))