Truncate line pointer array during VACUUM.
Teach VACUUM to truncate the line pointer array of each heap page when a contiguous group of LP_UNUSED line pointers appear at the end of the array -- these unused and unreferenced items are excluded. This process occurs during VACUUM's second pass over the heap, right after LP_DEAD line pointers on the page (those encountered/pruned during the first pass) are marked LP_UNUSED. Truncation avoids line pointer bloat with certain workloads, particularly those involving continual range DELETEs and bulk INSERTs against the same table. Also harden heapam code to check for an out-of-range page offset number in places where we weren't already doing so. Author: Matthias van de Meent <boekewurm+postgres@gmail.com> Author: Peter Geoghegan <pg@bowt.ie> Reviewed-By: Masahiko Sawada <sawada.mshk@gmail.com> Reviewed-By: Peter Geoghegan <pg@bowt.ie> Discussion: https://postgr.es/m/CAEze2WjgaQc55Y5f5CQd3L=eS5CZcff2Obxp=O6pto8-f0hC4w@mail.gmail.com Discussion: https://postgr.es/m/CAH2-Wzn6a64PJM1Ggzm=uvx2otsopJMhFQj_g1rAj4GWr3ZSzw@mail.gmail.com
This commit is contained in:
parent
3db826bd55
commit
3c3b8a4b26
|
@ -635,8 +635,15 @@ heapgettup(HeapScanDesc scan,
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
|
* The previous returned tuple may have been vacuumed since the
|
||||||
|
* previous scan when we use a non-MVCC snapshot, so we must
|
||||||
|
* re-establish the lineoff <= PageGetMaxOffsetNumber(dp)
|
||||||
|
* invariant
|
||||||
|
*/
|
||||||
lineoff = /* previous offnum */
|
lineoff = /* previous offnum */
|
||||||
OffsetNumberPrev(ItemPointerGetOffsetNumber(&(tuple->t_self)));
|
Min(lines,
|
||||||
|
OffsetNumberPrev(ItemPointerGetOffsetNumber(&(tuple->t_self))));
|
||||||
}
|
}
|
||||||
/* page and lineoff now reference the physically previous tid */
|
/* page and lineoff now reference the physically previous tid */
|
||||||
|
|
||||||
|
@ -678,6 +685,13 @@ heapgettup(HeapScanDesc scan,
|
||||||
lpp = PageGetItemId(dp, lineoff);
|
lpp = PageGetItemId(dp, lineoff);
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
|
* Only continue scanning the page while we have lines left.
|
||||||
|
*
|
||||||
|
* Note that this protects us from accessing line pointers past
|
||||||
|
* PageGetMaxOffsetNumber(); both for forward scans when we resume the
|
||||||
|
* table scan, and for when we start scanning a new page.
|
||||||
|
*/
|
||||||
while (linesleft > 0)
|
while (linesleft > 0)
|
||||||
{
|
{
|
||||||
if (ItemIdIsNormal(lpp))
|
if (ItemIdIsNormal(lpp))
|
||||||
|
@ -8556,10 +8570,8 @@ heap_xlog_vacuum(XLogReaderState *record)
|
||||||
ItemIdSetUnused(lp);
|
ItemIdSetUnused(lp);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/* Attempt to truncate line pointer array now */
|
||||||
* Update the page's hint bit about whether it has free pointers
|
PageTruncateLinePointerArray(page);
|
||||||
*/
|
|
||||||
PageSetHasFreeLinePointers(page);
|
|
||||||
|
|
||||||
PageSetLSN(page, lsn);
|
PageSetLSN(page, lsn);
|
||||||
MarkBufferDirty(buffer);
|
MarkBufferDirty(buffer);
|
||||||
|
|
|
@ -962,6 +962,10 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
|
||||||
*/
|
*/
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
|
/* Sanity check */
|
||||||
|
if (nextoffnum < FirstOffsetNumber || nextoffnum > maxoff)
|
||||||
|
break;
|
||||||
|
|
||||||
lp = PageGetItemId(page, nextoffnum);
|
lp = PageGetItemId(page, nextoffnum);
|
||||||
|
|
||||||
/* Check for broken chains */
|
/* Check for broken chains */
|
||||||
|
|
|
@ -1444,7 +1444,11 @@ lazy_scan_heap(LVRelState *vacrel, VacuumParams *params, bool aggressive)
|
||||||
if (prunestate.has_lpdead_items && vacrel->do_index_vacuuming)
|
if (prunestate.has_lpdead_items && vacrel->do_index_vacuuming)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* Wait until lazy_vacuum_heap_rel() to save free space.
|
* Wait until lazy_vacuum_heap_rel() to save free space. This
|
||||||
|
* doesn't just save us some cycles; it also allows us to record
|
||||||
|
* any additional free space that lazy_vacuum_heap_page() will
|
||||||
|
* make available in cases where it's possible to truncate the
|
||||||
|
* page's line pointer array.
|
||||||
*
|
*
|
||||||
* Note: The one-pass (no indexes) case is only supposed to make
|
* Note: The one-pass (no indexes) case is only supposed to make
|
||||||
* it this far when there were no LP_DEAD items during pruning.
|
* it this far when there were no LP_DEAD items during pruning.
|
||||||
|
@ -2033,6 +2037,13 @@ lazy_vacuum_all_indexes(LVRelState *vacrel)
|
||||||
* Pages that never had lazy_scan_prune record LP_DEAD items are not visited
|
* Pages that never had lazy_scan_prune record LP_DEAD items are not visited
|
||||||
* at all.
|
* at all.
|
||||||
*
|
*
|
||||||
|
* We may also be able to truncate the line pointer array of the heap pages we
|
||||||
|
* visit. If there is a contiguous group of LP_UNUSED items at the end of the
|
||||||
|
* array, it can be reclaimed as free space. These LP_UNUSED items usually
|
||||||
|
* start out as LP_DEAD items recorded by lazy_scan_prune (we set items from
|
||||||
|
* each page to LP_UNUSED, and then consider if it's possible to truncate the
|
||||||
|
* page's line pointer array).
|
||||||
|
*
|
||||||
* Note: the reason for doing this as a second pass is we cannot remove the
|
* Note: the reason for doing this as a second pass is we cannot remove the
|
||||||
* tuples until we've removed their index entries, and we want to process
|
* tuples until we've removed their index entries, and we want to process
|
||||||
* index entry removal in batches as large as possible.
|
* index entry removal in batches as large as possible.
|
||||||
|
@ -2175,7 +2186,8 @@ lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer,
|
||||||
|
|
||||||
Assert(uncnt > 0);
|
Assert(uncnt > 0);
|
||||||
|
|
||||||
PageSetHasFreeLinePointers(page);
|
/* Attempt to truncate line pointer array now */
|
||||||
|
PageTruncateLinePointerArray(page);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Mark buffer dirty before we write WAL.
|
* Mark buffer dirty before we write WAL.
|
||||||
|
|
|
@ -250,8 +250,17 @@ PageAddItemExtended(Page page,
|
||||||
/* if no free slot, we'll put it at limit (1st open slot) */
|
/* if no free slot, we'll put it at limit (1st open slot) */
|
||||||
if (PageHasFreeLinePointers(phdr))
|
if (PageHasFreeLinePointers(phdr))
|
||||||
{
|
{
|
||||||
/* Look for "recyclable" (unused) ItemId */
|
/*
|
||||||
for (offsetNumber = 1; offsetNumber < limit; offsetNumber++)
|
* Scan line pointer array to locate a "recyclable" (unused)
|
||||||
|
* ItemId.
|
||||||
|
*
|
||||||
|
* Always use earlier items first. PageTruncateLinePointerArray
|
||||||
|
* can only truncate unused items when they appear as a contiguous
|
||||||
|
* group at the end of the line pointer array.
|
||||||
|
*/
|
||||||
|
for (offsetNumber = FirstOffsetNumber;
|
||||||
|
offsetNumber < limit; /* limit is maxoff+1 */
|
||||||
|
offsetNumber++)
|
||||||
{
|
{
|
||||||
itemId = PageGetItemId(phdr, offsetNumber);
|
itemId = PageGetItemId(phdr, offsetNumber);
|
||||||
|
|
||||||
|
@ -675,11 +684,23 @@ compactify_tuples(itemIdCompact itemidbase, int nitems, Page page, bool presorte
|
||||||
/*
|
/*
|
||||||
* PageRepairFragmentation
|
* PageRepairFragmentation
|
||||||
*
|
*
|
||||||
* Frees fragmented space on a page.
|
* Frees fragmented space on a heap page following pruning.
|
||||||
* It doesn't remove unused line pointers! Please don't change this.
|
|
||||||
*
|
*
|
||||||
* This routine is usable for heap pages only, but see PageIndexMultiDelete.
|
* This routine is usable for heap pages only, but see PageIndexMultiDelete.
|
||||||
*
|
*
|
||||||
|
* Never removes unused line pointers. PageTruncateLinePointerArray can
|
||||||
|
* safely remove some unused line pointers. It ought to be safe for this
|
||||||
|
* routine to free unused line pointers in roughly the same way, but it's not
|
||||||
|
* clear that that would be beneficial.
|
||||||
|
*
|
||||||
|
* PageTruncateLinePointerArray is only called during VACUUM's second pass
|
||||||
|
* over the heap. Any unused line pointers that it sees are likely to have
|
||||||
|
* been set to LP_UNUSED (from LP_DEAD) immediately before the time it is
|
||||||
|
* called. On the other hand, many tables have the vast majority of all
|
||||||
|
* required pruning performed opportunistically (not during VACUUM). And so
|
||||||
|
* there is, in general, a good chance that even large groups of unused line
|
||||||
|
* pointers that we see here will be recycled quickly.
|
||||||
|
*
|
||||||
* Caller had better have a super-exclusive lock on page's buffer. As a side
|
* Caller had better have a super-exclusive lock on page's buffer. As a side
|
||||||
* effect the page's PD_HAS_FREE_LINES hint bit will be set or unset as
|
* effect the page's PD_HAS_FREE_LINES hint bit will be set or unset as
|
||||||
* needed.
|
* needed.
|
||||||
|
@ -784,6 +805,89 @@ PageRepairFragmentation(Page page)
|
||||||
PageClearHasFreeLinePointers(page);
|
PageClearHasFreeLinePointers(page);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* PageTruncateLinePointerArray
|
||||||
|
*
|
||||||
|
* Removes unused line pointers at the end of the line pointer array.
|
||||||
|
*
|
||||||
|
* This routine is usable for heap pages only. It is called by VACUUM during
|
||||||
|
* its second pass over the heap. We expect at least one LP_UNUSED line
|
||||||
|
* pointer on the page (if VACUUM didn't have an LP_DEAD item on the page that
|
||||||
|
* it just set to LP_UNUSED then it should not call here).
|
||||||
|
*
|
||||||
|
* We avoid truncating the line pointer array to 0 items, if necessary by
|
||||||
|
* leaving behind a single remaining LP_UNUSED item. This is a little
|
||||||
|
* arbitrary, but it seems like a good idea to avoid leaving a PageIsEmpty()
|
||||||
|
* page behind.
|
||||||
|
*
|
||||||
|
* Caller can have either an exclusive lock or a super-exclusive lock on
|
||||||
|
* page's buffer. The page's PD_HAS_FREE_LINES hint bit will be set or unset
|
||||||
|
* based on whether or not we leave behind any remaining LP_UNUSED items.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
PageTruncateLinePointerArray(Page page)
|
||||||
|
{
|
||||||
|
PageHeader phdr = (PageHeader) page;
|
||||||
|
bool countdone = false,
|
||||||
|
sethint = false;
|
||||||
|
int nunusedend = 0;
|
||||||
|
|
||||||
|
/* Scan line pointer array back-to-front */
|
||||||
|
for (int i = PageGetMaxOffsetNumber(page); i >= FirstOffsetNumber; i--)
|
||||||
|
{
|
||||||
|
ItemId lp = PageGetItemId(page, i);
|
||||||
|
|
||||||
|
if (!countdone && i > FirstOffsetNumber)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Still determining which line pointers from the end of the array
|
||||||
|
* will be truncated away. Either count another line pointer as
|
||||||
|
* safe to truncate, or notice that it's not safe to truncate
|
||||||
|
* additional line pointers (stop counting line pointers).
|
||||||
|
*/
|
||||||
|
if (!ItemIdIsUsed(lp))
|
||||||
|
nunusedend++;
|
||||||
|
else
|
||||||
|
countdone = true;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Once we've stopped counting we still need to figure out if
|
||||||
|
* there are any remaining LP_UNUSED line pointers somewhere more
|
||||||
|
* towards the front of the array.
|
||||||
|
*/
|
||||||
|
if (!ItemIdIsUsed(lp))
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* This is an unused line pointer that we won't be truncating
|
||||||
|
* away -- so there is at least one. Set hint on page.
|
||||||
|
*/
|
||||||
|
sethint = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (nunusedend > 0)
|
||||||
|
{
|
||||||
|
phdr->pd_lower -= sizeof(ItemIdData) * nunusedend;
|
||||||
|
|
||||||
|
#ifdef CLOBBER_FREED_MEMORY
|
||||||
|
memset((char *) page + phdr->pd_lower, 0x7F,
|
||||||
|
sizeof(ItemIdData) * nunusedend);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
else
|
||||||
|
Assert(sethint);
|
||||||
|
|
||||||
|
/* Set hint bit for PageAddItemExtended */
|
||||||
|
if (sethint)
|
||||||
|
PageSetHasFreeLinePointers(page);
|
||||||
|
else
|
||||||
|
PageClearHasFreeLinePointers(page);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* PageGetFreeSpace
|
* PageGetFreeSpace
|
||||||
* Returns the size of the free (allocatable) space on a page,
|
* Returns the size of the free (allocatable) space on a page,
|
||||||
|
|
|
@ -441,6 +441,7 @@ extern Page PageGetTempPageCopy(Page page);
|
||||||
extern Page PageGetTempPageCopySpecial(Page page);
|
extern Page PageGetTempPageCopySpecial(Page page);
|
||||||
extern void PageRestoreTempPage(Page tempPage, Page oldPage);
|
extern void PageRestoreTempPage(Page tempPage, Page oldPage);
|
||||||
extern void PageRepairFragmentation(Page page);
|
extern void PageRepairFragmentation(Page page);
|
||||||
|
extern void PageTruncateLinePointerArray(Page page);
|
||||||
extern Size PageGetFreeSpace(Page page);
|
extern Size PageGetFreeSpace(Page page);
|
||||||
extern Size PageGetFreeSpaceForMultipleTuples(Page page, int ntups);
|
extern Size PageGetFreeSpaceForMultipleTuples(Page page, int ntups);
|
||||||
extern Size PageGetExactFreeSpace(Page page);
|
extern Size PageGetExactFreeSpace(Page page);
|
||||||
|
|
Loading…
Reference in New Issue