diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c index 0e5849efdc..6ed115f81c 100644 --- a/src/backend/access/brin/brin.c +++ b/src/backend/access/brin/brin.c @@ -1121,16 +1121,22 @@ initialize_brin_buildstate(Relation idxRel, BrinRevmap *revmap, static void terminate_brin_buildstate(BrinBuildState *state) { - /* release the last index buffer used */ + /* + * Release the last index buffer used. We might as well ensure that + * whatever free space remains in that page is available in FSM, too. + */ if (!BufferIsInvalid(state->bs_currentInsertBuf)) { Page page; + Size freespace; + BlockNumber blk; page = BufferGetPage(state->bs_currentInsertBuf); - RecordPageWithFreeSpace(state->bs_irel, - BufferGetBlockNumber(state->bs_currentInsertBuf), - PageGetFreeSpace(page)); + freespace = PageGetFreeSpace(page); + blk = BufferGetBlockNumber(state->bs_currentInsertBuf); ReleaseBuffer(state->bs_currentInsertBuf); + RecordPageWithFreeSpace(state->bs_irel, blk, freespace); + FreeSpaceMapVacuumRange(state->bs_irel, blk, blk + 1); } brin_free_desc(state->bs_bdesc); @@ -1445,14 +1451,15 @@ union_tuples(BrinDesc *bdesc, BrinMemTuple *a, BrinTuple *b) static void brin_vacuum_scan(Relation idxrel, BufferAccessStrategy strategy) { - bool vacuum_fsm = false; + BlockNumber nblocks; BlockNumber blkno; /* * Scan the index in physical order, and clean up any possible mess in * each page. */ - for (blkno = 0; blkno < RelationGetNumberOfBlocks(idxrel); blkno++) + nblocks = RelationGetNumberOfBlocks(idxrel); + for (blkno = 0; blkno < nblocks; blkno++) { Buffer buf; @@ -1461,15 +1468,15 @@ brin_vacuum_scan(Relation idxrel, BufferAccessStrategy strategy) buf = ReadBufferExtended(idxrel, MAIN_FORKNUM, blkno, RBM_NORMAL, strategy); - vacuum_fsm |= brin_page_cleanup(idxrel, buf); + brin_page_cleanup(idxrel, buf); ReleaseBuffer(buf); } /* - * If we made any change to the FSM, make sure the new info is visible all - * the way to the top. + * Update all upper pages in the index's FSM, as well. This ensures not + * only that we propagate leaf-page FSM updates made by brin_page_cleanup, + * but also that any pre-existing damage or out-of-dateness is repaired. */ - if (vacuum_fsm) - FreeSpaceMapVacuum(idxrel); + FreeSpaceMapVacuum(idxrel); } diff --git a/src/backend/access/brin/brin_pageops.c b/src/backend/access/brin/brin_pageops.c index 60a7025ec5..040cb62e55 100644 --- a/src/backend/access/brin/brin_pageops.c +++ b/src/backend/access/brin/brin_pageops.c @@ -64,6 +64,7 @@ brin_doupdate(Relation idxrel, BlockNumber pagesPerRange, BrinTuple *oldtup; Size oldsz; Buffer newbuf; + BlockNumber newblk = InvalidBlockNumber; bool extended; Assert(newsz == MAXALIGN(newsz)); @@ -101,6 +102,8 @@ brin_doupdate(Relation idxrel, BlockNumber pagesPerRange, Assert(!extended); newbuf = InvalidBuffer; } + else + newblk = BufferGetBlockNumber(newbuf); } else { @@ -136,7 +139,7 @@ brin_doupdate(Relation idxrel, BlockNumber pagesPerRange, brin_initialize_empty_new_buffer(idxrel, newbuf); UnlockReleaseBuffer(newbuf); if (extended) - FreeSpaceMapVacuum(idxrel); + FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1); } return false; } @@ -152,11 +155,12 @@ brin_doupdate(Relation idxrel, BlockNumber pagesPerRange, LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK); if (BufferIsValid(newbuf)) { + /* As above, initialize and record new page if we got one */ if (extended) brin_initialize_empty_new_buffer(idxrel, newbuf); UnlockReleaseBuffer(newbuf); if (extended) - FreeSpaceMapVacuum(idxrel); + FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1); } return false; } @@ -173,14 +177,6 @@ brin_doupdate(Relation idxrel, BlockNumber pagesPerRange, if (((BrinPageFlags(oldpage) & BRIN_EVACUATE_PAGE) == 0) && brin_can_do_samepage_update(oldbuf, origsz, newsz)) { - if (BufferIsValid(newbuf)) - { - /* as above */ - if (extended) - brin_initialize_empty_new_buffer(idxrel, newbuf); - UnlockReleaseBuffer(newbuf); - } - START_CRIT_SECTION(); if (!PageIndexTupleOverwrite(oldpage, oldoff, (Item) newtup, newsz)) elog(ERROR, "failed to replace BRIN tuple"); @@ -210,8 +206,15 @@ brin_doupdate(Relation idxrel, BlockNumber pagesPerRange, LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK); - if (extended) - FreeSpaceMapVacuum(idxrel); + if (BufferIsValid(newbuf)) + { + /* As above, initialize and record new page if we got one */ + if (extended) + brin_initialize_empty_new_buffer(idxrel, newbuf); + UnlockReleaseBuffer(newbuf); + if (extended) + FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1); + } return true; } @@ -234,7 +237,6 @@ brin_doupdate(Relation idxrel, BlockNumber pagesPerRange, Buffer revmapbuf; ItemPointerData newtid; OffsetNumber newoff; - BlockNumber newblk = InvalidBlockNumber; Size freespace = 0; revmapbuf = brinLockRevmapPageForUpdate(revmap, heapBlk); @@ -247,7 +249,7 @@ brin_doupdate(Relation idxrel, BlockNumber pagesPerRange, * need to do that here. */ if (extended) - brin_page_init(BufferGetPage(newbuf), BRIN_PAGETYPE_REGULAR); + brin_page_init(newpage, BRIN_PAGETYPE_REGULAR); PageIndexTupleDeleteNoCompact(oldpage, oldoff); newoff = PageAddItem(newpage, (Item) newtup, newsz, @@ -259,12 +261,9 @@ brin_doupdate(Relation idxrel, BlockNumber pagesPerRange, /* needed to update FSM below */ if (extended) - { - newblk = BufferGetBlockNumber(newbuf); freespace = br_page_get_freespace(newpage); - } - ItemPointerSet(&newtid, BufferGetBlockNumber(newbuf), newoff); + ItemPointerSet(&newtid, newblk, newoff); brinSetHeapBlockItemptr(revmapbuf, pagesPerRange, heapBlk, newtid); MarkBufferDirty(revmapbuf); @@ -311,9 +310,8 @@ brin_doupdate(Relation idxrel, BlockNumber pagesPerRange, if (extended) { - Assert(BlockNumberIsValid(newblk)); RecordPageWithFreeSpace(idxrel, newblk, freespace); - FreeSpaceMapVacuum(idxrel); + FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1); } return true; @@ -350,6 +348,7 @@ brin_doinsert(Relation idxrel, BlockNumber pagesPerRange, Page page; BlockNumber blk; OffsetNumber off; + Size freespace = 0; Buffer revmapbuf; ItemPointerData tid; bool extended; @@ -410,15 +409,16 @@ brin_doinsert(Relation idxrel, BlockNumber pagesPerRange, /* Execute the actual insertion */ START_CRIT_SECTION(); if (extended) - brin_page_init(BufferGetPage(*buffer), BRIN_PAGETYPE_REGULAR); + brin_page_init(page, BRIN_PAGETYPE_REGULAR); off = PageAddItem(page, (Item) tup, itemsz, InvalidOffsetNumber, false, false); if (off == InvalidOffsetNumber) - elog(ERROR, "could not insert new index tuple to page"); + elog(ERROR, "failed to add BRIN tuple to new page"); MarkBufferDirty(*buffer); - BRIN_elog((DEBUG2, "inserted tuple (%u,%u) for range starting at %u", - blk, off, heapBlk)); + /* needed to update FSM below */ + if (extended) + freespace = br_page_get_freespace(page); ItemPointerSet(&tid, blk, off); brinSetHeapBlockItemptr(revmapbuf, pagesPerRange, heapBlk, tid); @@ -456,8 +456,14 @@ brin_doinsert(Relation idxrel, BlockNumber pagesPerRange, LockBuffer(*buffer, BUFFER_LOCK_UNLOCK); LockBuffer(revmapbuf, BUFFER_LOCK_UNLOCK); + BRIN_elog((DEBUG2, "inserted tuple (%u,%u) for range starting at %u", + blk, off, heapBlk)); + if (extended) - FreeSpaceMapVacuum(idxrel); + { + RecordPageWithFreeSpace(idxrel, blk, freespace); + FreeSpaceMapVacuumRange(idxrel, blk, blk + 1); + } return off; } @@ -599,17 +605,22 @@ brin_evacuate_page(Relation idxRel, BlockNumber pagesPerRange, } /* - * Given a BRIN index page, initialize it if necessary, and record it into the - * FSM if necessary. Return value is true if the FSM itself needs "vacuuming". + * Given a BRIN index page, initialize it if necessary, and record its + * current free space in the FSM. + * * The main use for this is when, during vacuuming, an uninitialized page is * found, which could be the result of relation extension followed by a crash * before the page can be used. + * + * Here, we don't bother to update upper FSM pages, instead expecting that our + * caller (brin_vacuum_scan) will fix them at the end of the scan. Elsewhere + * in this file, it's generally a good idea to propagate additions of free + * space into the upper FSM pages immediately. */ -bool +void brin_page_cleanup(Relation idxrel, Buffer buf) { Page page = BufferGetPage(buf); - Size freespace; /* * If a page was left uninitialized, initialize it now; also record it in @@ -631,7 +642,7 @@ brin_page_cleanup(Relation idxrel, Buffer buf) { brin_initialize_empty_new_buffer(idxrel, buf); LockBuffer(buf, BUFFER_LOCK_UNLOCK); - return true; + return; } LockBuffer(buf, BUFFER_LOCK_UNLOCK); } @@ -639,24 +650,18 @@ brin_page_cleanup(Relation idxrel, Buffer buf) /* Nothing to be done for non-regular index pages */ if (BRIN_IS_META_PAGE(BufferGetPage(buf)) || BRIN_IS_REVMAP_PAGE(BufferGetPage(buf))) - return false; + return; /* Measure free space and record it */ - freespace = br_page_get_freespace(page); - if (freespace > GetRecordedFreeSpace(idxrel, BufferGetBlockNumber(buf))) - { - RecordPageWithFreeSpace(idxrel, BufferGetBlockNumber(buf), freespace); - return true; - } - - return false; + RecordPageWithFreeSpace(idxrel, BufferGetBlockNumber(buf), + br_page_get_freespace(page)); } /* * Return a pinned and exclusively locked buffer which can be used to insert an * index item of size itemsz (caller must ensure not to request sizes * impossible to fulfill). If oldbuf is a valid buffer, it is also locked (in - * an order determined to avoid deadlocks.) + * an order determined to avoid deadlocks). * * If we find that the old page is no longer a regular index page (because * of a revmap extension), the old buffer is unlocked and we return @@ -665,12 +670,18 @@ brin_page_cleanup(Relation idxrel, Buffer buf) * If there's no existing page with enough free space to accommodate the new * item, the relation is extended. If this happens, *extended is set to true, * and it is the caller's responsibility to initialize the page (and WAL-log - * that fact) prior to use. + * that fact) prior to use. The caller should also update the FSM with the + * page's remaining free space after the insertion. * - * Note that in some corner cases it is possible for this routine to extend the - * relation and then not return the buffer. It is this routine's + * Note that the caller is not expected to update FSM unless *extended is set + * true. This policy means that we'll update FSM when a page is created, and + * when it's found to have too little space for a desired tuple insertion, + * but not every single time we add a tuple to the page. + * + * Note that in some corner cases it is possible for this routine to extend + * the relation and then not return the new page. It is this routine's * responsibility to WAL-log the page initialization and to record the page in - * FSM if that happens. Such a buffer may later be reused by this routine. + * FSM if that happens, since the caller certainly can't do it. */ static Buffer brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz, @@ -684,22 +695,22 @@ brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz, /* callers must have checked */ Assert(itemsz <= BrinMaxItemSize); - *extended = false; - if (BufferIsValid(oldbuf)) oldblk = BufferGetBlockNumber(oldbuf); else oldblk = InvalidBlockNumber; - /* - * Loop until we find a page with sufficient free space. By the time we - * return to caller out of this loop, both buffers are valid and locked; - * if we have to restart here, neither buffer is locked and buf is not a - * pinned buffer. - */ + /* Choose initial target page, re-using existing target if known */ newblk = RelationGetTargetBlock(irel); if (newblk == InvalidBlockNumber) newblk = GetPageWithFreeSpace(irel, itemsz); + + /* + * Loop until we find a page with sufficient free space. By the time we + * return to caller out of this loop, both buffers are valid and locked; + * if we have to restart here, neither page is locked and newblk isn't + * pinned (if it's even valid). + */ for (;;) { Buffer buf; @@ -707,6 +718,8 @@ brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz, CHECK_FOR_INTERRUPTS(); + *extended = false; + if (newblk == InvalidBlockNumber) { /* @@ -741,9 +754,9 @@ brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz, /* * We lock the old buffer first, if it's earlier than the new one; but - * before we do, we need to check that it hasn't been turned into a - * revmap page concurrently; if we detect that it happened, give up - * and tell caller to start over. + * then we need to check that it hasn't been turned into a revmap page + * concurrently. If we detect that that happened, give up and tell + * caller to start over. */ if (BufferIsValid(oldbuf) && oldblk < newblk) { @@ -761,16 +774,20 @@ brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz, * it first. */ if (*extended) - { brin_initialize_empty_new_buffer(irel, buf); - /* shouldn't matter, but don't confuse caller */ - *extended = false; - } if (extensionLockHeld) UnlockRelationForExtension(irel, ExclusiveLock); ReleaseBuffer(buf); + + if (*extended) + { + FreeSpaceMapVacuumRange(irel, newblk, newblk + 1); + /* shouldn't matter, but don't confuse caller */ + *extended = false; + } + return InvalidBuffer; } } @@ -785,9 +802,6 @@ brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz, /* * We have a new buffer to insert into. Check that the new page has * enough free space, and return it if it does; otherwise start over. - * Note that we allow for the FSM to be out of date here, and in that - * case we update it and move on. - * * (br_page_get_freespace also checks that the FSM didn't hand us a * page that has since been repurposed for the revmap.) */ @@ -795,16 +809,7 @@ brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz, BrinMaxItemSize : br_page_get_freespace(page); if (freespace >= itemsz) { - RelationSetTargetBlock(irel, BufferGetBlockNumber(buf)); - - /* - * Since the target block specification can get lost on cache - * invalidations, make sure we update the more permanent FSM with - * data about it before going away. - */ - if (*extended) - RecordPageWithFreeSpace(irel, BufferGetBlockNumber(buf), - freespace); + RelationSetTargetBlock(irel, newblk); /* * Lock the old buffer if not locked already. Note that in this @@ -832,6 +837,7 @@ brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz, if (*extended) { brin_initialize_empty_new_buffer(irel, buf); + /* since this should not happen, skip FreeSpaceMapVacuum */ ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), @@ -845,6 +851,10 @@ brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz, if (BufferIsValid(oldbuf) && oldblk <= newblk) LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK); + /* + * Update the FSM with the new, presumably smaller, freespace value + * for this page, then search for a new target page. + */ newblk = RecordAndGetPageWithFreeSpace(irel, newblk, freespace, itemsz); } } @@ -859,6 +869,9 @@ brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz, * there is no mechanism to get the space back and the index would bloat. * Also, because we would not WAL-log the action that would initialize the * page, the page would go uninitialized in a standby (or after recovery). + * + * While we record the page in FSM here, caller is responsible for doing FSM + * upper-page update if that seems appropriate. */ static void brin_initialize_empty_new_buffer(Relation idxrel, Buffer buffer) diff --git a/src/include/access/brin_pageops.h b/src/include/access/brin_pageops.h index 8b389de4a5..5189d5ddc2 100644 --- a/src/include/access/brin_pageops.h +++ b/src/include/access/brin_pageops.h @@ -33,6 +33,6 @@ extern bool brin_start_evacuating_page(Relation idxRel, Buffer buf); extern void brin_evacuate_page(Relation idxRel, BlockNumber pagesPerRange, BrinRevmap *revmap, Buffer buf); -extern bool brin_page_cleanup(Relation idxrel, Buffer buf); +extern void brin_page_cleanup(Relation idxrel, Buffer buf); #endif /* BRIN_PAGEOPS_H */