/* * brin_pageops.c * Page-handling routines for BRIN indexes * * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION * src/backend/access/brin/brin_pageops.c */ #include "postgres.h" #include "access/brin_page.h" #include "access/brin_pageops.h" #include "access/brin_revmap.h" #include "access/brin_xlog.h" #include "access/xloginsert.h" #include "miscadmin.h" #include "storage/bufmgr.h" #include "storage/freespace.h" #include "storage/lmgr.h" #include "storage/smgr.h" #include "utils/rel.h" /* * Maximum size of an entry in a BRIN_PAGETYPE_REGULAR page. We can tolerate * a single item per page, unlike other index AMs. */ #define BrinMaxItemSize \ MAXALIGN_DOWN(BLCKSZ - \ (MAXALIGN(SizeOfPageHeaderData + \ sizeof(ItemIdData)) + \ MAXALIGN(sizeof(BrinSpecialSpace)))) static Buffer brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz, bool *extended); static Size br_page_get_freespace(Page page); static void brin_initialize_empty_new_buffer(Relation idxrel, Buffer buffer); /* * Update tuple origtup (size origsz), located in offset oldoff of buffer * oldbuf, to newtup (size newsz) as summary tuple for the page range starting * at heapBlk. oldbuf must not be locked on entry, and is not locked at exit. * * If samepage is true, attempt to put the new tuple in the same page, but if * there's no room, use some other one. * * If the update is successful, return true; the revmap is updated to point to * the new tuple. If the update is not done for whatever reason, return false. * Caller may retry the update if this happens. */ bool brin_doupdate(Relation idxrel, BlockNumber pagesPerRange, BrinRevmap *revmap, BlockNumber heapBlk, Buffer oldbuf, OffsetNumber oldoff, const BrinTuple *origtup, Size origsz, const BrinTuple *newtup, Size newsz, bool samepage) { Page oldpage; ItemId oldlp; BrinTuple *oldtup; Size oldsz; Buffer newbuf; BlockNumber newblk = InvalidBlockNumber; bool extended; Assert(newsz == MAXALIGN(newsz)); /* If the item is oversized, don't bother. */ if (newsz > BrinMaxItemSize) { ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("index row size %zu exceeds maximum %zu for index \"%s\"", newsz, BrinMaxItemSize, RelationGetRelationName(idxrel)))); return false; /* keep compiler quiet */ } /* make sure the revmap is long enough to contain the entry we need */ brinRevmapExtend(revmap, heapBlk); if (!samepage) { /* need a page on which to put the item */ newbuf = brin_getinsertbuffer(idxrel, oldbuf, newsz, &extended); if (!BufferIsValid(newbuf)) { Assert(!extended); return false; } /* * Note: it's possible (though unlikely) that the returned newbuf is * the same as oldbuf, if brin_getinsertbuffer determined that the old * buffer does in fact have enough space. */ if (newbuf == oldbuf) { Assert(!extended); newbuf = InvalidBuffer; } else newblk = BufferGetBlockNumber(newbuf); } else { LockBuffer(oldbuf, BUFFER_LOCK_EXCLUSIVE); newbuf = InvalidBuffer; extended = false; } oldpage = BufferGetPage(oldbuf); oldlp = PageGetItemId(oldpage, oldoff); /* * Check that the old tuple wasn't updated concurrently: it might have * moved someplace else entirely, and for that matter the whole page * might've become a revmap page. Note that in the first two cases * checked here, the "oldlp" we just calculated is garbage; but * PageGetItemId() is simple enough that it was safe to do that * calculation anyway. */ if (!BRIN_IS_REGULAR_PAGE(oldpage) || oldoff > PageGetMaxOffsetNumber(oldpage) || !ItemIdIsNormal(oldlp)) { LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK); /* * If this happens, and the new buffer was obtained by extending the * relation, then we need to ensure we don't leave it uninitialized or * forget about it. */ if (BufferIsValid(newbuf)) { if (extended) brin_initialize_empty_new_buffer(idxrel, newbuf); UnlockReleaseBuffer(newbuf); if (extended) FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1); } return false; } oldsz = ItemIdGetLength(oldlp); oldtup = (BrinTuple *) PageGetItem(oldpage, oldlp); /* * ... or it might have been updated in place to different contents. */ if (!brin_tuples_equal(oldtup, oldsz, origtup, origsz)) { LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK); if (BufferIsValid(newbuf)) { /* As above, initialize and record new page if we got one */ if (extended) brin_initialize_empty_new_buffer(idxrel, newbuf); UnlockReleaseBuffer(newbuf); if (extended) FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1); } return false; } /* * Great, the old tuple is intact. We can proceed with the update. * * If there's enough room in the old page for the new tuple, replace it. * * Note that there might now be enough space on the page even though the * caller told us there isn't, if a concurrent update moved another tuple * elsewhere or replaced a tuple with a smaller one. */ if (((BrinPageFlags(oldpage) & BRIN_EVACUATE_PAGE) == 0) && brin_can_do_samepage_update(oldbuf, origsz, newsz)) { START_CRIT_SECTION(); if (!PageIndexTupleOverwrite(oldpage, oldoff, (Item) unconstify(BrinTuple *, newtup), newsz)) elog(ERROR, "failed to replace BRIN tuple"); MarkBufferDirty(oldbuf); /* XLOG stuff */ if (RelationNeedsWAL(idxrel)) { xl_brin_samepage_update xlrec; XLogRecPtr recptr; uint8 info = XLOG_BRIN_SAMEPAGE_UPDATE; xlrec.offnum = oldoff; XLogBeginInsert(); XLogRegisterData((char *) &xlrec, SizeOfBrinSamepageUpdate); XLogRegisterBuffer(0, oldbuf, REGBUF_STANDARD); XLogRegisterBufData(0, (char *) unconstify(BrinTuple *, newtup), newsz); recptr = XLogInsert(RM_BRIN_ID, info); PageSetLSN(oldpage, recptr); } END_CRIT_SECTION(); LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK); if (BufferIsValid(newbuf)) { /* As above, initialize and record new page if we got one */ if (extended) brin_initialize_empty_new_buffer(idxrel, newbuf); UnlockReleaseBuffer(newbuf); if (extended) FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1); } return true; } else if (newbuf == InvalidBuffer) { /* * Not enough space, but caller said that there was. Tell them to * start over. */ LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK); return false; } else { /* * Not enough free space on the oldpage. Put the new tuple on the new * page, and update the revmap. */ Page newpage = BufferGetPage(newbuf); Buffer revmapbuf; ItemPointerData newtid; OffsetNumber newoff; Size freespace = 0; revmapbuf = brinLockRevmapPageForUpdate(revmap, heapBlk); START_CRIT_SECTION(); /* * We need to initialize the page if it's newly obtained. Note we * will WAL-log the initialization as part of the update, so we don't * need to do that here. */ if (extended) brin_page_init(newpage, BRIN_PAGETYPE_REGULAR); PageIndexTupleDeleteNoCompact(oldpage, oldoff); newoff = PageAddItem(newpage, (Item) unconstify(BrinTuple *, newtup), newsz, InvalidOffsetNumber, false, false); if (newoff == InvalidOffsetNumber) elog(ERROR, "failed to add BRIN tuple to new page"); MarkBufferDirty(oldbuf); MarkBufferDirty(newbuf); /* needed to update FSM below */ if (extended) freespace = br_page_get_freespace(newpage); ItemPointerSet(&newtid, newblk, newoff); brinSetHeapBlockItemptr(revmapbuf, pagesPerRange, heapBlk, newtid); MarkBufferDirty(revmapbuf); /* XLOG stuff */ if (RelationNeedsWAL(idxrel)) { xl_brin_update xlrec; XLogRecPtr recptr; uint8 info; info = XLOG_BRIN_UPDATE | (extended ? XLOG_BRIN_INIT_PAGE : 0); xlrec.insert.offnum = newoff; xlrec.insert.heapBlk = heapBlk; xlrec.insert.pagesPerRange = pagesPerRange; xlrec.oldOffnum = oldoff; XLogBeginInsert(); /* new page */ XLogRegisterData((char *) &xlrec, SizeOfBrinUpdate); XLogRegisterBuffer(0, newbuf, REGBUF_STANDARD | (extended ? REGBUF_WILL_INIT : 0)); XLogRegisterBufData(0, (char *) unconstify(BrinTuple *, newtup), newsz); /* revmap page */ XLogRegisterBuffer(1, revmapbuf, 0); /* old page */ XLogRegisterBuffer(2, oldbuf, REGBUF_STANDARD); recptr = XLogInsert(RM_BRIN_ID, info); PageSetLSN(oldpage, recptr); PageSetLSN(newpage, recptr); PageSetLSN(BufferGetPage(revmapbuf), recptr); } END_CRIT_SECTION(); LockBuffer(revmapbuf, BUFFER_LOCK_UNLOCK); LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK); UnlockReleaseBuffer(newbuf); if (extended) { RecordPageWithFreeSpace(idxrel, newblk, freespace); FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1); } return true; } } /* * Return whether brin_doupdate can do a samepage update. */ bool brin_can_do_samepage_update(Buffer buffer, Size origsz, Size newsz) { return ((newsz <= origsz) || PageGetExactFreeSpace(BufferGetPage(buffer)) >= (newsz - origsz)); } /* * Insert an index tuple into the index relation. The revmap is updated to * mark the range containing the given page as pointing to the inserted entry. * A WAL record is written. * * The buffer, if valid, is first checked for free space to insert the new * entry; if there isn't enough, a new buffer is obtained and pinned. No * buffer lock must be held on entry, no buffer lock is held on exit. * * Return value is the offset number where the tuple was inserted. */ OffsetNumber brin_doinsert(Relation idxrel, BlockNumber pagesPerRange, BrinRevmap *revmap, Buffer *buffer, BlockNumber heapBlk, BrinTuple *tup, Size itemsz) { Page page; BlockNumber blk; OffsetNumber off; Size freespace = 0; Buffer revmapbuf; ItemPointerData tid; bool extended; Assert(itemsz == MAXALIGN(itemsz)); /* If the item is oversized, don't even bother. */ if (itemsz > BrinMaxItemSize) { ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("index row size %zu exceeds maximum %zu for index \"%s\"", itemsz, BrinMaxItemSize, RelationGetRelationName(idxrel)))); return InvalidOffsetNumber; /* keep compiler quiet */ } /* Make sure the revmap is long enough to contain the entry we need */ brinRevmapExtend(revmap, heapBlk); /* * Acquire lock on buffer supplied by caller, if any. If it doesn't have * enough space, unpin it to obtain a new one below. */ if (BufferIsValid(*buffer)) { /* * It's possible that another backend (or ourselves!) extended the * revmap over the page we held a pin on, so we cannot assume that * it's still a regular page. */ LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); if (br_page_get_freespace(BufferGetPage(*buffer)) < itemsz) { UnlockReleaseBuffer(*buffer); *buffer = InvalidBuffer; } } /* * If we still don't have a usable buffer, have brin_getinsertbuffer * obtain one for us. */ if (!BufferIsValid(*buffer)) { do *buffer = brin_getinsertbuffer(idxrel, InvalidBuffer, itemsz, &extended); while (!BufferIsValid(*buffer)); } else extended = false; /* Now obtain lock on revmap buffer */ revmapbuf = brinLockRevmapPageForUpdate(revmap, heapBlk); page = BufferGetPage(*buffer); blk = BufferGetBlockNumber(*buffer); /* Execute the actual insertion */ START_CRIT_SECTION(); if (extended) brin_page_init(page, BRIN_PAGETYPE_REGULAR); off = PageAddItem(page, (Item) tup, itemsz, InvalidOffsetNumber, false, false); if (off == InvalidOffsetNumber) elog(ERROR, "failed to add BRIN tuple to new page"); MarkBufferDirty(*buffer); /* needed to update FSM below */ if (extended) freespace = br_page_get_freespace(page); ItemPointerSet(&tid, blk, off); brinSetHeapBlockItemptr(revmapbuf, pagesPerRange, heapBlk, tid); MarkBufferDirty(revmapbuf); /* XLOG stuff */ if (RelationNeedsWAL(idxrel)) { xl_brin_insert xlrec; XLogRecPtr recptr; uint8 info; info = XLOG_BRIN_INSERT | (extended ? XLOG_BRIN_INIT_PAGE : 0); xlrec.heapBlk = heapBlk; xlrec.pagesPerRange = pagesPerRange; xlrec.offnum = off; XLogBeginInsert(); XLogRegisterData((char *) &xlrec, SizeOfBrinInsert); XLogRegisterBuffer(0, *buffer, REGBUF_STANDARD | (extended ? REGBUF_WILL_INIT : 0)); XLogRegisterBufData(0, (char *) tup, itemsz); XLogRegisterBuffer(1, revmapbuf, 0); recptr = XLogInsert(RM_BRIN_ID, info); PageSetLSN(page, recptr); PageSetLSN(BufferGetPage(revmapbuf), recptr); } END_CRIT_SECTION(); /* Tuple is firmly on buffer; we can release our locks */ LockBuffer(*buffer, BUFFER_LOCK_UNLOCK); LockBuffer(revmapbuf, BUFFER_LOCK_UNLOCK); BRIN_elog((DEBUG2, "inserted tuple (%u,%u) for range starting at %u", blk, off, heapBlk)); if (extended) { RecordPageWithFreeSpace(idxrel, blk, freespace); FreeSpaceMapVacuumRange(idxrel, blk, blk + 1); } return off; } /* * Initialize a page with the given type. * * Caller is responsible for marking it dirty, as appropriate. */ void brin_page_init(Page page, uint16 type) { PageInit(page, BLCKSZ, sizeof(BrinSpecialSpace)); BrinPageType(page) = type; } /* * Initialize a new BRIN index's metapage. */ void brin_metapage_init(Page page, BlockNumber pagesPerRange, uint16 version) { BrinMetaPageData *metadata; brin_page_init(page, BRIN_PAGETYPE_META); metadata = (BrinMetaPageData *) PageGetContents(page); metadata->brinMagic = BRIN_META_MAGIC; metadata->brinVersion = version; metadata->pagesPerRange = pagesPerRange; /* * Note we cheat here a little. 0 is not a valid revmap block number * (because it's the metapage buffer), but doing this enables the first * revmap page to be created when the index is. */ metadata->lastRevmapPage = 0; /* * Set pd_lower just past the end of the metadata. This is essential, * because without doing so, metadata will be lost if xlog.c compresses * the page. */ ((PageHeader) page)->pd_lower = ((char *) metadata + sizeof(BrinMetaPageData)) - (char *) page; } /* * Initiate page evacuation protocol. * * The page must be locked in exclusive mode by the caller. * * If the page is not yet initialized or empty, return false without doing * anything; it can be used for revmap without any further changes. If it * contains tuples, mark it for evacuation and return true. */ bool brin_start_evacuating_page(Relation idxRel, Buffer buf) { OffsetNumber off; OffsetNumber maxoff; Page page; page = BufferGetPage(buf); if (PageIsNew(page)) return false; maxoff = PageGetMaxOffsetNumber(page); for (off = FirstOffsetNumber; off <= maxoff; off++) { ItemId lp; lp = PageGetItemId(page, off); if (ItemIdIsUsed(lp)) { /* * Prevent other backends from adding more stuff to this page: * BRIN_EVACUATE_PAGE informs br_page_get_freespace that this page * can no longer be used to add new tuples. Note that this flag * is not WAL-logged, except accidentally. */ BrinPageFlags(page) |= BRIN_EVACUATE_PAGE; MarkBufferDirtyHint(buf, true); return true; } } return false; } /* * Move all tuples out of a page. * * The caller must hold lock on the page. The lock and pin are released. */ void brin_evacuate_page(Relation idxRel, BlockNumber pagesPerRange, BrinRevmap *revmap, Buffer buf) { OffsetNumber off; OffsetNumber maxoff; Page page; BrinTuple *btup = NULL; Size btupsz = 0; page = BufferGetPage(buf); Assert(BrinPageFlags(page) & BRIN_EVACUATE_PAGE); maxoff = PageGetMaxOffsetNumber(page); for (off = FirstOffsetNumber; off <= maxoff; off++) { BrinTuple *tup; Size sz; ItemId lp; CHECK_FOR_INTERRUPTS(); lp = PageGetItemId(page, off); if (ItemIdIsUsed(lp)) { sz = ItemIdGetLength(lp); tup = (BrinTuple *) PageGetItem(page, lp); tup = brin_copy_tuple(tup, sz, btup, &btupsz); LockBuffer(buf, BUFFER_LOCK_UNLOCK); if (!brin_doupdate(idxRel, pagesPerRange, revmap, tup->bt_blkno, buf, off, tup, sz, tup, sz, false)) off--; /* retry */ LockBuffer(buf, BUFFER_LOCK_SHARE); /* It's possible that someone extended the revmap over this page */ if (!BRIN_IS_REGULAR_PAGE(page)) break; } } UnlockReleaseBuffer(buf); } /* * Given a BRIN index page, initialize it if necessary, and record its * current free space in the FSM. * * The main use for this is when, during vacuuming, an uninitialized page is * found, which could be the result of relation extension followed by a crash * before the page can be used. * * Here, we don't bother to update upper FSM pages, instead expecting that our * caller (brin_vacuum_scan) will fix them at the end of the scan. Elsewhere * in this file, it's generally a good idea to propagate additions of free * space into the upper FSM pages immediately. */ void brin_page_cleanup(Relation idxrel, Buffer buf) { Page page = BufferGetPage(buf); /* * If a page was left uninitialized, initialize it now; also record it in * FSM. * * Somebody else might be extending the relation concurrently. To avoid * re-initializing the page before they can grab the buffer lock, we * acquire the extension lock momentarily. Since they hold the extension * lock from before getting the page and after its been initialized, we're * sure to see their initialization. */ if (PageIsNew(page)) { LockRelationForExtension(idxrel, ShareLock); UnlockRelationForExtension(idxrel, ShareLock); LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); if (PageIsNew(page)) { brin_initialize_empty_new_buffer(idxrel, buf); LockBuffer(buf, BUFFER_LOCK_UNLOCK); return; } LockBuffer(buf, BUFFER_LOCK_UNLOCK); } /* Nothing to be done for non-regular index pages */ if (BRIN_IS_META_PAGE(BufferGetPage(buf)) || BRIN_IS_REVMAP_PAGE(BufferGetPage(buf))) return; /* Measure free space and record it */ RecordPageWithFreeSpace(idxrel, BufferGetBlockNumber(buf), br_page_get_freespace(page)); } /* * Return a pinned and exclusively locked buffer which can be used to insert an * index item of size itemsz (caller must ensure not to request sizes * impossible to fulfill). If oldbuf is a valid buffer, it is also locked (in * an order determined to avoid deadlocks). * * If we find that the old page is no longer a regular index page (because * of a revmap extension), the old buffer is unlocked and we return * InvalidBuffer. * * If there's no existing page with enough free space to accommodate the new * item, the relation is extended. If this happens, *extended is set to true, * and it is the caller's responsibility to initialize the page (and WAL-log * that fact) prior to use. The caller should also update the FSM with the * page's remaining free space after the insertion. * * Note that the caller is not expected to update FSM unless *extended is set * true. This policy means that we'll update FSM when a page is created, and * when it's found to have too little space for a desired tuple insertion, * but not every single time we add a tuple to the page. * * Note that in some corner cases it is possible for this routine to extend * the relation and then not return the new page. It is this routine's * responsibility to WAL-log the page initialization and to record the page in * FSM if that happens, since the caller certainly can't do it. */ static Buffer brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz, bool *extended) { BlockNumber oldblk; BlockNumber newblk; Page page; Size freespace; /* callers must have checked */ Assert(itemsz <= BrinMaxItemSize); if (BufferIsValid(oldbuf)) oldblk = BufferGetBlockNumber(oldbuf); else oldblk = InvalidBlockNumber; /* Choose initial target page, re-using existing target if known */ newblk = RelationGetTargetBlock(irel); if (newblk == InvalidBlockNumber) newblk = GetPageWithFreeSpace(irel, itemsz); /* * Loop until we find a page with sufficient free space. By the time we * return to caller out of this loop, both buffers are valid and locked; * if we have to restart here, neither page is locked and newblk isn't * pinned (if it's even valid). */ for (;;) { Buffer buf; bool extensionLockHeld = false; CHECK_FOR_INTERRUPTS(); *extended = false; if (newblk == InvalidBlockNumber) { /* * There's not enough free space in any existing index page, * according to the FSM: extend the relation to obtain a shiny new * page. * * XXX: It's likely possible to use RBM_ZERO_AND_LOCK here, * which'd avoid the need to hold the extension lock during buffer * reclaim. */ if (!RELATION_IS_LOCAL(irel)) { LockRelationForExtension(irel, ExclusiveLock); extensionLockHeld = true; } buf = ReadBuffer(irel, P_NEW); newblk = BufferGetBlockNumber(buf); *extended = true; BRIN_elog((DEBUG2, "brin_getinsertbuffer: extending to page %u", BufferGetBlockNumber(buf))); } else if (newblk == oldblk) { /* * There's an odd corner-case here where the FSM is out-of-date, * and gave us the old page. */ buf = oldbuf; } else { buf = ReadBuffer(irel, newblk); } /* * We lock the old buffer first, if it's earlier than the new one; but * then we need to check that it hasn't been turned into a revmap page * concurrently. If we detect that that happened, give up and tell * caller to start over. */ if (BufferIsValid(oldbuf) && oldblk < newblk) { LockBuffer(oldbuf, BUFFER_LOCK_EXCLUSIVE); if (!BRIN_IS_REGULAR_PAGE(BufferGetPage(oldbuf))) { LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK); /* * It is possible that the new page was obtained from * extending the relation. In that case, we must be sure to * record it in the FSM before leaving, because otherwise the * space would be lost forever. However, we cannot let an * uninitialized page get in the FSM, so we need to initialize * it first. */ if (*extended) brin_initialize_empty_new_buffer(irel, buf); if (extensionLockHeld) UnlockRelationForExtension(irel, ExclusiveLock); ReleaseBuffer(buf); if (*extended) { FreeSpaceMapVacuumRange(irel, newblk, newblk + 1); /* shouldn't matter, but don't confuse caller */ *extended = false; } return InvalidBuffer; } } LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); if (extensionLockHeld) UnlockRelationForExtension(irel, ExclusiveLock); page = BufferGetPage(buf); /* * We have a new buffer to insert into. Check that the new page has * enough free space, and return it if it does; otherwise start over. * (br_page_get_freespace also checks that the FSM didn't hand us a * page that has since been repurposed for the revmap.) */ freespace = *extended ? BrinMaxItemSize : br_page_get_freespace(page); if (freespace >= itemsz) { RelationSetTargetBlock(irel, newblk); /* * Lock the old buffer if not locked already. Note that in this * case we know for sure it's a regular page: it's later than the * new page we just got, which is not a revmap page, and revmap * pages are always consecutive. */ if (BufferIsValid(oldbuf) && oldblk > newblk) { LockBuffer(oldbuf, BUFFER_LOCK_EXCLUSIVE); Assert(BRIN_IS_REGULAR_PAGE(BufferGetPage(oldbuf))); } return buf; } /* This page is no good. */ /* * If an entirely new page does not contain enough free space for the * new item, then surely that item is oversized. Complain loudly; but * first make sure we initialize the page and record it as free, for * next time. */ if (*extended) { brin_initialize_empty_new_buffer(irel, buf); /* since this should not happen, skip FreeSpaceMapVacuum */ ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("index row size %zu exceeds maximum %zu for index \"%s\"", itemsz, freespace, RelationGetRelationName(irel)))); return InvalidBuffer; /* keep compiler quiet */ } if (newblk != oldblk) UnlockReleaseBuffer(buf); if (BufferIsValid(oldbuf) && oldblk <= newblk) LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK); /* * Update the FSM with the new, presumably smaller, freespace value * for this page, then search for a new target page. */ newblk = RecordAndGetPageWithFreeSpace(irel, newblk, freespace, itemsz); } } /* * Initialize a page as an empty regular BRIN page, WAL-log this, and record * the page in FSM. * * There are several corner situations in which we extend the relation to * obtain a new page and later find that we cannot use it immediately. When * that happens, we don't want to leave the page go unrecorded in FSM, because * there is no mechanism to get the space back and the index would bloat. * Also, because we would not WAL-log the action that would initialize the * page, the page would go uninitialized in a standby (or after recovery). * * While we record the page in FSM here, caller is responsible for doing FSM * upper-page update if that seems appropriate. */ static void brin_initialize_empty_new_buffer(Relation idxrel, Buffer buffer) { Page page; BRIN_elog((DEBUG2, "brin_initialize_empty_new_buffer: initializing blank page %u", BufferGetBlockNumber(buffer))); START_CRIT_SECTION(); page = BufferGetPage(buffer); brin_page_init(page, BRIN_PAGETYPE_REGULAR); MarkBufferDirty(buffer); log_newpage_buffer(buffer, true); END_CRIT_SECTION(); /* * We update the FSM for this page, but this is not WAL-logged. This is * acceptable because VACUUM will scan the index and update the FSM with * pages whose FSM records were forgotten in a crash. */ RecordPageWithFreeSpace(idxrel, BufferGetBlockNumber(buffer), br_page_get_freespace(page)); } /* * Return the amount of free space on a regular BRIN index page. * * If the page is not a regular page, or has been marked with the * BRIN_EVACUATE_PAGE flag, returns 0. */ static Size br_page_get_freespace(Page page) { if (!BRIN_IS_REGULAR_PAGE(page) || (BrinPageFlags(page) & BRIN_EVACUATE_PAGE) != 0) return 0; else return PageGetFreeSpace(page); }