From 074251db6740a9abfbd922d13d39b27c4f338a20 Mon Sep 17 00:00:00 2001 From: Andrew Dunstan Date: Tue, 10 Apr 2018 18:21:03 -0400 Subject: [PATCH] Adjustments to the btree fastpath optimization. This optimization was introduced in commit 2b272734. The changes include some additional comments and documentation, and also these more substantive changes: . ensure the optimization is only applied on the leaf node of a tree whose root is on level 2 or more. It's of little value on small trees. . Delay calling RelationSetTargetBlock() until after the critical section of _bt_insertonpg . ensure the optimization is also applied to unlogged tables. Pavan Deolasee and Peter Geoghegan with some very light editing from me. Discussion: https://postgr.es/m/CABOikdO8jhRarNC60nZLktZYhxt+TK8z_V97+Ny499YQdyAfug@mail.gmail.com --- src/backend/access/nbtree/README | 19 ++++++++ src/backend/access/nbtree/nbtinsert.c | 63 ++++++++++++++++++++++----- 2 files changed, 71 insertions(+), 11 deletions(-) diff --git a/src/backend/access/nbtree/README b/src/backend/access/nbtree/README index aef455c122..3680e69b89 100644 --- a/src/backend/access/nbtree/README +++ b/src/backend/access/nbtree/README @@ -375,6 +375,25 @@ positives, so long as it never gives a false negative. This makes it possible to implement the test with a small counter value stored on each index page. +Fastpath For Index Insertion +---------------------------- + +We optimize for a common case of insertion of increasing index key +values by caching the last page to which this backend inserted the last +value, if this page was the rightmost leaf page. For the next insert, we +can then quickly check if the cached page is still the rightmost leaf +page and also the correct place to hold the current value. We can avoid +the cost of walking down the tree in such common cases. + +The optimization works on the assumption that there can only be one +non-ignorable leaf rightmost page, and so even a RecentGlobalXmin style +interlock isn't required. We cannot fail to detect that our hint was +invalidated, because there can only be one such page in the B-Tree at +any time. It's possible that the page will be deleted and recycled +without a backend's cached page also being detected as invalidated, but +only when we happen to recycle a block that once again gets recycled as the +rightmost leaf page. + On-the-Fly Deletion Of Index Tuples ----------------------------------- diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c index 0b93acd024..995cc61b4c 100644 --- a/src/backend/access/nbtree/nbtinsert.c +++ b/src/backend/access/nbtree/nbtinsert.c @@ -26,6 +26,8 @@ #include "storage/smgr.h" #include "utils/tqual.h" +/* Minimum tree height for application of fastpath optimization */ +#define BTREE_FASTPATH_MIN_LEVEL 2 typedef struct { @@ -125,7 +127,7 @@ _bt_doinsert(Relation rel, IndexTuple itup, /* * It's very common to have an index on an auto-incremented or * monotonically increasing value. In such cases, every insertion happens - * towards the end of the index. We try to optimise that case by caching + * towards the end of the index. We try to optimize that case by caching * the right-most leaf of the index. If our cached block is still the * rightmost leaf, has enough free space to accommodate a new entry and * the insertion key is strictly greater than the first key in this page, @@ -176,13 +178,17 @@ top: * the first key on the page. */ if (P_ISLEAF(lpageop) && P_RIGHTMOST(lpageop) && - !P_INCOMPLETE_SPLIT(lpageop) && !P_IGNORE(lpageop) && (PageGetFreeSpace(page) > itemsz) && PageGetMaxOffsetNumber(page) >= P_FIRSTDATAKEY(lpageop) && _bt_compare(rel, indnkeyatts, itup_scankey, page, P_FIRSTDATAKEY(lpageop)) > 0) { + /* + * The right-most block should never have incomplete split. But + * be paranoid and check for it anyway. + */ + Assert(!P_INCOMPLETE_SPLIT(lpageop)); fastpath = true; } else @@ -868,6 +874,24 @@ _bt_insertonpg(Relation rel, bool newitemonleft; Buffer rbuf; + /* + * If we're here then a pagesplit is needed. We should never reach here + * if we're using the fastpath since we should have checked for all the + * required conditions, including the fact that this page has enough + * freespace. Note that this routine can in theory deal with the + * situation where a NULL stack pointer is passed (that's what would + * happen if the fastpath is taken), like it does during crash + * recovery. But that path is much slower, defeating the very purpose + * of the optimization. The following assertion should protect us from + * any future code changes that invalidate those assumptions. + * + * Note that whenever we fail to take the fastpath, we clear the + * cached block. Checking for a valid cached block at this point is + * enough to decide whether we're in a fastpath or not. + */ + Assert(!(P_ISLEAF(lpageop) && + BlockNumberIsValid(RelationGetTargetBlock(rel)))); + /* Choose the split point */ firstright = _bt_findsplitloc(rel, page, newitemoff, itemsz, @@ -905,6 +929,7 @@ _bt_insertonpg(Relation rel, BTMetaPageData *metad = NULL; OffsetNumber itup_off; BlockNumber itup_blkno; + BlockNumber cachedBlock = InvalidBlockNumber; itup_off = newitemoff; itup_blkno = BufferGetBlockNumber(buf); @@ -962,6 +987,15 @@ _bt_insertonpg(Relation rel, MarkBufferDirty(cbuf); } + /* + * Cache the block information if we just inserted into the rightmost + * leaf page of the index and it's not the root page. For very small + * index where root is also the leaf, there is no point trying for any + * optimization. + */ + if (P_RIGHTMOST(lpageop) && P_ISLEAF(lpageop) && !P_ISROOT(lpageop)) + cachedBlock = BufferGetBlockNumber(buf); + /* XLOG stuff */ if (RelationNeedsWAL(rel)) { @@ -977,16 +1011,7 @@ _bt_insertonpg(Relation rel, XLogRegisterData((char *) &xlrec, SizeOfBtreeInsert); if (P_ISLEAF(lpageop)) - { xlinfo = XLOG_BTREE_INSERT_LEAF; - - /* - * Cache the block information if we just inserted into the - * rightmost leaf page of the index. - */ - if (P_RIGHTMOST(lpageop)) - RelationSetTargetBlock(rel, BufferGetBlockNumber(buf)); - } else { /* @@ -1048,6 +1073,22 @@ _bt_insertonpg(Relation rel, if (BufferIsValid(cbuf)) _bt_relbuf(rel, cbuf); _bt_relbuf(rel, buf); + + /* + * If we decided to cache the insertion target block, then set it now. + * But before that, check for the height of the tree and don't go for + * the optimization for small indexes. We defer that check to this + * point to ensure that we don't call _bt_getrootheight while holding + * lock on any other block. + * + * We do this after dropping locks on all buffers. So the information + * about whether the insertion block is still the rightmost block or + * not may have changed in between. But we will deal with that during + * next insert operation. No special care is required while setting it. + */ + if (BlockNumberIsValid(cachedBlock) && + _bt_getrootheight(rel) >= BTREE_FASTPATH_MIN_LEVEL) + RelationSetTargetBlock(rel, cachedBlock); } }