From ac8bc3b6e4a28cf7cd33fe11866d72f6deb2a38f Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 5 Feb 2014 13:43:37 -0500 Subject: [PATCH] Remove unnecessary relcache flushes after changing btree metapages. These flushes were added in my commit d2896a9ed, which added the btree logic that keeps a cached copy of the index metapage data in index relcache entries. The idea was to ensure that other backends would promptly update their cached copies after a change. However, this is not really necessary, since _bt_getroot() has adequate defenses against believing a stale root page link, and _bt_getrootheight() doesn't have to be 100% right. Moreover, if it were necessary, a relcache flush would be an unreliable way to do it, since the sinval mechanism believes that relcache flush requests represent transactional updates, and therefore discards them on transaction rollback. Therefore, we might as well drop these flush requests and save the time to rebuild the whole relcache entry after a metapage change. If we ever try to support in-place truncation of btree indexes, it might be necessary to revisit this issue so that _bt_getroot() can't get caught by trying to follow a metapage link to a page that no longer exists. A possible solution to that is to make use of an smgr, rather than relcache, inval request to force other backends to discard their cached metapages. But for the moment this is not worth pursuing. --- src/backend/access/nbtree/README | 19 +++++++++++-------- src/backend/access/nbtree/nbtinsert.c | 11 +---------- src/backend/access/nbtree/nbtpage.c | 13 ++----------- src/backend/access/nbtree/nbtree.c | 8 -------- 4 files changed, 14 insertions(+), 37 deletions(-) diff --git a/src/backend/access/nbtree/README b/src/backend/access/nbtree/README index 40f09e397e..bf5bc38273 100644 --- a/src/backend/access/nbtree/README +++ b/src/backend/access/nbtree/README @@ -438,14 +438,17 @@ location of the root page --- both the true root and the current effective root ("fast" root). To avoid fetching the metapage for every single index search, we cache a copy of the meta-data information in the index's relcache entry (rd_amcache). This is a bit ticklish since using the cache -implies following a root page pointer that could be stale. We require -every metapage update to send out a SI "relcache inval" message on the -index relation. That ensures that each backend will flush its cached copy -not later than the start of its next transaction. Therefore, stale -pointers cannot be used for longer than the current transaction, which -reduces the problem to the same one already dealt with for concurrent -VACUUM --- we can just imagine that each open transaction is potentially -"already in flight" to the old root. +implies following a root page pointer that could be stale. However, a +backend following a cached pointer can sufficiently verify whether it +reached the intended page; either by checking the is-root flag when it +is going to the true root, or by checking that the page has no siblings +when going to the fast root. At worst, this could result in descending +some extra tree levels if we have a cached pointer to a fast root that is +now above the real fast root. Such cases shouldn't arise often enough to +be worth optimizing; and in any case we can expect a relcache flush will +discard the cached metapage before long, since a VACUUM that's moved the +fast root pointer can be expected to issue a statistics update for the +index. The algorithm assumes we can fit at least three items per page (a "high key" and two real data items). Therefore it's unsafe diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c index 49c084a5ab..9140749998 100644 --- a/src/backend/access/nbtree/nbtinsert.c +++ b/src/backend/access/nbtree/nbtinsert.c @@ -21,7 +21,6 @@ #include "miscadmin.h" #include "storage/lmgr.h" #include "storage/predicate.h" -#include "utils/inval.h" #include "utils/tqual.h" @@ -868,13 +867,9 @@ _bt_insertonpg(Relation rel, END_CRIT_SECTION(); - /* release buffers; send out relcache inval if metapage changed */ + /* release buffers */ if (BufferIsValid(metabuf)) - { - if (!InRecovery) - CacheInvalidateRelcache(rel); _bt_relbuf(rel, metabuf); - } _bt_relbuf(rel, buf); } @@ -1963,10 +1958,6 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf) END_CRIT_SECTION(); - /* send out relcache inval for metapage change */ - if (!InRecovery) - CacheInvalidateRelcache(rel); - /* done with metapage */ _bt_relbuf(rel, metabuf); diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index c7c65a6a40..1e78858001 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -28,7 +28,6 @@ #include "storage/indexfsm.h" #include "storage/lmgr.h" #include "storage/predicate.h" -#include "utils/inval.h" #include "utils/snapmgr.h" @@ -246,12 +245,6 @@ _bt_getroot(Relation rel, int access) END_CRIT_SECTION(); - /* - * Send out relcache inval for metapage change (probably unnecessary - * here, but let's be safe). - */ - CacheInvalidateRelcache(rel); - /* * swap root write lock for read lock. There is no danger of anyone * else accessing the new root page while it's unlocked, since no one @@ -1545,12 +1538,10 @@ _bt_pagedel(Relation rel, Buffer buf, BTStack stack) END_CRIT_SECTION(); - /* release metapage; send out relcache inval if metapage changed */ + /* release metapage */ if (BufferIsValid(metabuf)) - { - CacheInvalidateRelcache(rel); _bt_relbuf(rel, metabuf); - } + /* can always release leftsib immediately */ if (BufferIsValid(lbuf)) _bt_relbuf(rel, lbuf); diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index b7f9e2e160..1da9548d57 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -148,14 +148,6 @@ btbuild(PG_FUNCTION_ARGS) } #endif /* BTREE_BUILD_STATS */ - /* - * If we are reindexing a pre-existing index, it is critical to send out a - * relcache invalidation SI message to ensure all backends re-read the - * index metapage. We expect that the caller will ensure that happens - * (typically as a side effect of updating index stats, but it must happen - * even if the stats don't change!) - */ - /* * Return statistics */