diff --git a/src/backend/access/nbtree/README b/src/backend/access/nbtree/README index 40f09e397e..bf5bc38273 100644 --- a/src/backend/access/nbtree/README +++ b/src/backend/access/nbtree/README @@ -438,14 +438,17 @@ location of the root page --- both the true root and the current effective root ("fast" root). To avoid fetching the metapage for every single index search, we cache a copy of the meta-data information in the index's relcache entry (rd_amcache). This is a bit ticklish since using the cache -implies following a root page pointer that could be stale. We require -every metapage update to send out a SI "relcache inval" message on the -index relation. That ensures that each backend will flush its cached copy -not later than the start of its next transaction. Therefore, stale -pointers cannot be used for longer than the current transaction, which -reduces the problem to the same one already dealt with for concurrent -VACUUM --- we can just imagine that each open transaction is potentially -"already in flight" to the old root. +implies following a root page pointer that could be stale. However, a +backend following a cached pointer can sufficiently verify whether it +reached the intended page; either by checking the is-root flag when it +is going to the true root, or by checking that the page has no siblings +when going to the fast root. At worst, this could result in descending +some extra tree levels if we have a cached pointer to a fast root that is +now above the real fast root. Such cases shouldn't arise often enough to +be worth optimizing; and in any case we can expect a relcache flush will +discard the cached metapage before long, since a VACUUM that's moved the +fast root pointer can be expected to issue a statistics update for the +index. The algorithm assumes we can fit at least three items per page (a "high key" and two real data items). Therefore it's unsafe diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c index 49c084a5ab..9140749998 100644 --- a/src/backend/access/nbtree/nbtinsert.c +++ b/src/backend/access/nbtree/nbtinsert.c @@ -21,7 +21,6 @@ #include "miscadmin.h" #include "storage/lmgr.h" #include "storage/predicate.h" -#include "utils/inval.h" #include "utils/tqual.h" @@ -868,13 +867,9 @@ _bt_insertonpg(Relation rel, END_CRIT_SECTION(); - /* release buffers; send out relcache inval if metapage changed */ + /* release buffers */ if (BufferIsValid(metabuf)) - { - if (!InRecovery) - CacheInvalidateRelcache(rel); _bt_relbuf(rel, metabuf); - } _bt_relbuf(rel, buf); } @@ -1963,10 +1958,6 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf) END_CRIT_SECTION(); - /* send out relcache inval for metapage change */ - if (!InRecovery) - CacheInvalidateRelcache(rel); - /* done with metapage */ _bt_relbuf(rel, metabuf); diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index c7c65a6a40..1e78858001 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -28,7 +28,6 @@ #include "storage/indexfsm.h" #include "storage/lmgr.h" #include "storage/predicate.h" -#include "utils/inval.h" #include "utils/snapmgr.h" @@ -246,12 +245,6 @@ _bt_getroot(Relation rel, int access) END_CRIT_SECTION(); - /* - * Send out relcache inval for metapage change (probably unnecessary - * here, but let's be safe). - */ - CacheInvalidateRelcache(rel); - /* * swap root write lock for read lock. There is no danger of anyone * else accessing the new root page while it's unlocked, since no one @@ -1545,12 +1538,10 @@ _bt_pagedel(Relation rel, Buffer buf, BTStack stack) END_CRIT_SECTION(); - /* release metapage; send out relcache inval if metapage changed */ + /* release metapage */ if (BufferIsValid(metabuf)) - { - CacheInvalidateRelcache(rel); _bt_relbuf(rel, metabuf); - } + /* can always release leftsib immediately */ if (BufferIsValid(lbuf)) _bt_relbuf(rel, lbuf); diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index b7f9e2e160..1da9548d57 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -148,14 +148,6 @@ btbuild(PG_FUNCTION_ARGS) } #endif /* BTREE_BUILD_STATS */ - /* - * If we are reindexing a pre-existing index, it is critical to send out a - * relcache invalidation SI message to ensure all backends re-read the - * index metapage. We expect that the caller will ensure that happens - * (typically as a side effect of updating index stats, but it must happen - * even if the stats don't change!) - */ - /* * Return statistics */