Fix nbtree metapage cache upgrade bug.
Commit857f9c36cd
, which taught nbtree VACUUM to avoid unnecessary index scans, bumped the nbtree version number from 2 to 3, while adding the ability for nbtree indexes to be upgraded on-the-fly. Various assertions that assumed that an nbtree index was always on version 2 had to be changed to accept any supported version (version 2 or 3 on Postgres 11). However, a few assertions were missed in the initial commit, all of which were in code paths that cache a local copy of the metapage metadata, where the index had been expected to be on the current version (no longer version 2) as a generic sanity check. Rather than simply update the assertions, follow-up commit0a64b45152
intentionally made the metapage caching code update the per-backend cached metadata version without changing the on-disk version at the same time. This could even happen when the planner needed to determine the height of a B-Tree for costing purposes. The assertions only fail on Postgres v12 when upgrading from v10, because they were adjusted to use the authoritative shared memory metapage by v12's commitdd299df8
. To fix, remove the cache-only upgrade mechanism entirely, and update the assertions themselves to accept any supported version (go back to using the cached version in v12). The fix is almost a full revert of commit0a64b45152
on the v11 branch. VACUUM only considers the authoritative metapage, and never bothers with a locally cached version, whereas everywhere else isn't interested in the metapage fields that were added by commit857f9c36cd
. It seems unlikely that this bug has affected any user on v11. Reported-By: Christoph Berg Bug: #15896 Discussion: https://postgr.es/m/15896-5b25e260fdb0b081%40postgresql.org Backpatch: 11-, where VACUUM was taught to avoid unnecessary index scans.
This commit is contained in:
parent
79d3a1e52a
commit
7772dece98
|
@ -33,7 +33,6 @@
|
||||||
#include "storage/predicate.h"
|
#include "storage/predicate.h"
|
||||||
#include "utils/snapmgr.h"
|
#include "utils/snapmgr.h"
|
||||||
|
|
||||||
static void _bt_cachemetadata(Relation rel, BTMetaPageData *input);
|
|
||||||
static BTMetaPageData *_bt_getmeta(Relation rel, Buffer metabuf);
|
static BTMetaPageData *_bt_getmeta(Relation rel, Buffer metabuf);
|
||||||
static bool _bt_mark_page_halfdead(Relation rel, Buffer buf, BTStack stack);
|
static bool _bt_mark_page_halfdead(Relation rel, Buffer buf, BTStack stack);
|
||||||
static bool _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf,
|
static bool _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf,
|
||||||
|
@ -109,48 +108,13 @@ _bt_upgrademetapage(Page page)
|
||||||
((char *) metad + sizeof(BTMetaPageData)) - (char *) page;
|
((char *) metad + sizeof(BTMetaPageData)) - (char *) page;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Cache metadata from input meta page to rel->rd_amcache.
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
_bt_cachemetadata(Relation rel, BTMetaPageData *input)
|
|
||||||
{
|
|
||||||
BTMetaPageData *cached_metad;
|
|
||||||
|
|
||||||
/* We assume rel->rd_amcache was already freed by caller */
|
|
||||||
Assert(rel->rd_amcache == NULL);
|
|
||||||
rel->rd_amcache = MemoryContextAlloc(rel->rd_indexcxt,
|
|
||||||
sizeof(BTMetaPageData));
|
|
||||||
|
|
||||||
/* Meta page should be of supported version */
|
|
||||||
Assert(input->btm_version >= BTREE_MIN_VERSION &&
|
|
||||||
input->btm_version <= BTREE_VERSION);
|
|
||||||
|
|
||||||
cached_metad = (BTMetaPageData *) rel->rd_amcache;
|
|
||||||
if (input->btm_version >= BTREE_NOVAC_VERSION)
|
|
||||||
{
|
|
||||||
/* Version with compatible meta-data, no need to upgrade */
|
|
||||||
memcpy(cached_metad, input, sizeof(BTMetaPageData));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* Upgrade meta-data: copy available information from meta-page and
|
|
||||||
* fill new fields with default values.
|
|
||||||
*
|
|
||||||
* Note that we cannot upgrade to version 4+ without a REINDEX, since
|
|
||||||
* extensive on-disk changes are required.
|
|
||||||
*/
|
|
||||||
memcpy(cached_metad, input, offsetof(BTMetaPageData, btm_oldest_btpo_xact));
|
|
||||||
cached_metad->btm_version = BTREE_NOVAC_VERSION;
|
|
||||||
cached_metad->btm_oldest_btpo_xact = InvalidTransactionId;
|
|
||||||
cached_metad->btm_last_cleanup_num_heap_tuples = -1.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Get metadata from share-locked buffer containing metapage, while performing
|
* Get metadata from share-locked buffer containing metapage, while performing
|
||||||
* standard sanity checks. Sanity checks here must match _bt_getroot().
|
* standard sanity checks.
|
||||||
|
*
|
||||||
|
* Callers that cache data returned here in local cache should note that an
|
||||||
|
* on-the-fly upgrade using _bt_upgrademetapage() can change the version field
|
||||||
|
* and BTREE_NOVAC_VERSION specific fields without invalidating local cache.
|
||||||
*/
|
*/
|
||||||
static BTMetaPageData *
|
static BTMetaPageData *
|
||||||
_bt_getmeta(Relation rel, Buffer metabuf)
|
_bt_getmeta(Relation rel, Buffer metabuf)
|
||||||
|
@ -293,8 +257,6 @@ Buffer
|
||||||
_bt_getroot(Relation rel, int access)
|
_bt_getroot(Relation rel, int access)
|
||||||
{
|
{
|
||||||
Buffer metabuf;
|
Buffer metabuf;
|
||||||
Page metapg;
|
|
||||||
BTPageOpaque metaopaque;
|
|
||||||
Buffer rootbuf;
|
Buffer rootbuf;
|
||||||
Page rootpage;
|
Page rootpage;
|
||||||
BTPageOpaque rootopaque;
|
BTPageOpaque rootopaque;
|
||||||
|
@ -347,30 +309,13 @@ _bt_getroot(Relation rel, int access)
|
||||||
}
|
}
|
||||||
|
|
||||||
metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ);
|
metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ);
|
||||||
metapg = BufferGetPage(metabuf);
|
metad = _bt_getmeta(rel, metabuf);
|
||||||
metaopaque = (BTPageOpaque) PageGetSpecialPointer(metapg);
|
|
||||||
metad = BTPageGetMeta(metapg);
|
|
||||||
|
|
||||||
/* sanity-check the metapage */
|
|
||||||
if (!P_ISMETA(metaopaque) ||
|
|
||||||
metad->btm_magic != BTREE_MAGIC)
|
|
||||||
ereport(ERROR,
|
|
||||||
(errcode(ERRCODE_INDEX_CORRUPTED),
|
|
||||||
errmsg("index \"%s\" is not a btree",
|
|
||||||
RelationGetRelationName(rel))));
|
|
||||||
|
|
||||||
if (metad->btm_version < BTREE_MIN_VERSION ||
|
|
||||||
metad->btm_version > BTREE_VERSION)
|
|
||||||
ereport(ERROR,
|
|
||||||
(errcode(ERRCODE_INDEX_CORRUPTED),
|
|
||||||
errmsg("version mismatch in index \"%s\": file version %d, "
|
|
||||||
"current version %d, minimal supported version %d",
|
|
||||||
RelationGetRelationName(rel),
|
|
||||||
metad->btm_version, BTREE_VERSION, BTREE_MIN_VERSION)));
|
|
||||||
|
|
||||||
/* if no root page initialized yet, do it */
|
/* if no root page initialized yet, do it */
|
||||||
if (metad->btm_root == P_NONE)
|
if (metad->btm_root == P_NONE)
|
||||||
{
|
{
|
||||||
|
Page metapg;
|
||||||
|
|
||||||
/* If access = BT_READ, caller doesn't want us to create root yet */
|
/* If access = BT_READ, caller doesn't want us to create root yet */
|
||||||
if (access == BT_READ)
|
if (access == BT_READ)
|
||||||
{
|
{
|
||||||
|
@ -412,6 +357,8 @@ _bt_getroot(Relation rel, int access)
|
||||||
rootopaque->btpo_flags = (BTP_LEAF | BTP_ROOT);
|
rootopaque->btpo_flags = (BTP_LEAF | BTP_ROOT);
|
||||||
rootopaque->btpo.level = 0;
|
rootopaque->btpo.level = 0;
|
||||||
rootopaque->btpo_cycleid = 0;
|
rootopaque->btpo_cycleid = 0;
|
||||||
|
/* Get raw page pointer for metapage */
|
||||||
|
metapg = BufferGetPage(metabuf);
|
||||||
|
|
||||||
/* NO ELOG(ERROR) till meta is updated */
|
/* NO ELOG(ERROR) till meta is updated */
|
||||||
START_CRIT_SECTION();
|
START_CRIT_SECTION();
|
||||||
|
@ -473,7 +420,7 @@ _bt_getroot(Relation rel, int access)
|
||||||
LockBuffer(rootbuf, BUFFER_LOCK_UNLOCK);
|
LockBuffer(rootbuf, BUFFER_LOCK_UNLOCK);
|
||||||
LockBuffer(rootbuf, BT_READ);
|
LockBuffer(rootbuf, BT_READ);
|
||||||
|
|
||||||
/* okay, metadata is correct, release lock on it */
|
/* okay, metadata is correct, release lock on it without caching */
|
||||||
_bt_relbuf(rel, metabuf);
|
_bt_relbuf(rel, metabuf);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -485,7 +432,9 @@ _bt_getroot(Relation rel, int access)
|
||||||
/*
|
/*
|
||||||
* Cache the metapage data for next time
|
* Cache the metapage data for next time
|
||||||
*/
|
*/
|
||||||
_bt_cachemetadata(rel, metad);
|
rel->rd_amcache = MemoryContextAlloc(rel->rd_indexcxt,
|
||||||
|
sizeof(BTMetaPageData));
|
||||||
|
memcpy(rel->rd_amcache, metad, sizeof(BTMetaPageData));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We are done with the metapage; arrange to release it via first
|
* We are done with the metapage; arrange to release it via first
|
||||||
|
@ -659,16 +608,19 @@ _bt_getrootheight(Relation rel)
|
||||||
/*
|
/*
|
||||||
* Cache the metapage data for next time
|
* Cache the metapage data for next time
|
||||||
*/
|
*/
|
||||||
_bt_cachemetadata(rel, metad);
|
rel->rd_amcache = MemoryContextAlloc(rel->rd_indexcxt,
|
||||||
/* We shouldn't have cached it if any of these fail */
|
sizeof(BTMetaPageData));
|
||||||
Assert(metad->btm_magic == BTREE_MAGIC);
|
memcpy(rel->rd_amcache, metad, sizeof(BTMetaPageData));
|
||||||
Assert(metad->btm_version >= BTREE_NOVAC_VERSION);
|
|
||||||
Assert(metad->btm_fastroot != P_NONE);
|
|
||||||
_bt_relbuf(rel, metabuf);
|
_bt_relbuf(rel, metabuf);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Get cached page */
|
/* Get cached page */
|
||||||
metad = (BTMetaPageData *) rel->rd_amcache;
|
metad = (BTMetaPageData *) rel->rd_amcache;
|
||||||
|
/* We shouldn't have cached it if any of these fail */
|
||||||
|
Assert(metad->btm_magic == BTREE_MAGIC);
|
||||||
|
Assert(metad->btm_version >= BTREE_MIN_VERSION);
|
||||||
|
Assert(metad->btm_version <= BTREE_VERSION);
|
||||||
|
Assert(metad->btm_fastroot != P_NONE);
|
||||||
|
|
||||||
return metad->btm_fastlevel;
|
return metad->btm_fastlevel;
|
||||||
}
|
}
|
||||||
|
@ -709,17 +661,26 @@ _bt_heapkeyspace(Relation rel)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Cache the metapage data for next time
|
* Cache the metapage data for next time
|
||||||
|
*
|
||||||
|
* An on-the-fly version upgrade performed by _bt_upgrademetapage()
|
||||||
|
* can change the nbtree version for an index without invalidating any
|
||||||
|
* local cache. This is okay because it can only happen when moving
|
||||||
|
* from version 2 to version 3, both of which are !heapkeyspace
|
||||||
|
* versions.
|
||||||
*/
|
*/
|
||||||
_bt_cachemetadata(rel, metad);
|
rel->rd_amcache = MemoryContextAlloc(rel->rd_indexcxt,
|
||||||
/* We shouldn't have cached it if any of these fail */
|
sizeof(BTMetaPageData));
|
||||||
Assert(metad->btm_magic == BTREE_MAGIC);
|
memcpy(rel->rd_amcache, metad, sizeof(BTMetaPageData));
|
||||||
Assert(metad->btm_version >= BTREE_NOVAC_VERSION);
|
|
||||||
Assert(metad->btm_fastroot != P_NONE);
|
|
||||||
_bt_relbuf(rel, metabuf);
|
_bt_relbuf(rel, metabuf);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Get cached page */
|
/* Get cached page */
|
||||||
metad = (BTMetaPageData *) rel->rd_amcache;
|
metad = (BTMetaPageData *) rel->rd_amcache;
|
||||||
|
/* We shouldn't have cached it if any of these fail */
|
||||||
|
Assert(metad->btm_magic == BTREE_MAGIC);
|
||||||
|
Assert(metad->btm_version >= BTREE_MIN_VERSION);
|
||||||
|
Assert(metad->btm_version <= BTREE_VERSION);
|
||||||
|
Assert(metad->btm_fastroot != P_NONE);
|
||||||
|
|
||||||
return metad->btm_version > BTREE_NOVAC_VERSION;
|
return metad->btm_version > BTREE_NOVAC_VERSION;
|
||||||
}
|
}
|
||||||
|
|
|
@ -107,6 +107,8 @@ _bt_restore_meta(XLogReaderState *record, uint8 block_id)
|
||||||
md->btm_level = xlrec->level;
|
md->btm_level = xlrec->level;
|
||||||
md->btm_fastroot = xlrec->fastroot;
|
md->btm_fastroot = xlrec->fastroot;
|
||||||
md->btm_fastlevel = xlrec->fastlevel;
|
md->btm_fastlevel = xlrec->fastlevel;
|
||||||
|
/* Cannot log BTREE_MIN_VERSION index metapage without upgrade */
|
||||||
|
Assert(md->btm_version >= BTREE_NOVAC_VERSION);
|
||||||
md->btm_oldest_btpo_xact = xlrec->oldest_btpo_xact;
|
md->btm_oldest_btpo_xact = xlrec->oldest_btpo_xact;
|
||||||
md->btm_last_cleanup_num_heap_tuples = xlrec->last_cleanup_num_heap_tuples;
|
md->btm_last_cleanup_num_heap_tuples = xlrec->last_cleanup_num_heap_tuples;
|
||||||
|
|
||||||
|
|
|
@ -97,12 +97,12 @@ typedef BTPageOpaqueData *BTPageOpaque;
|
||||||
typedef struct BTMetaPageData
|
typedef struct BTMetaPageData
|
||||||
{
|
{
|
||||||
uint32 btm_magic; /* should contain BTREE_MAGIC */
|
uint32 btm_magic; /* should contain BTREE_MAGIC */
|
||||||
uint32 btm_version; /* should contain BTREE_VERSION */
|
uint32 btm_version; /* nbtree version (always <= BTREE_VERSION) */
|
||||||
BlockNumber btm_root; /* current root location */
|
BlockNumber btm_root; /* current root location */
|
||||||
uint32 btm_level; /* tree level of the root page */
|
uint32 btm_level; /* tree level of the root page */
|
||||||
BlockNumber btm_fastroot; /* current "fast" root location */
|
BlockNumber btm_fastroot; /* current "fast" root location */
|
||||||
uint32 btm_fastlevel; /* tree level of the "fast" root page */
|
uint32 btm_fastlevel; /* tree level of the "fast" root page */
|
||||||
/* following fields are available since page version 3 */
|
/* remaining fields only valid when btm_version >= BTREE_NOVAC_VERSION */
|
||||||
TransactionId btm_oldest_btpo_xact; /* oldest btpo_xact among all deleted
|
TransactionId btm_oldest_btpo_xact; /* oldest btpo_xact among all deleted
|
||||||
* pages */
|
* pages */
|
||||||
float8 btm_last_cleanup_num_heap_tuples; /* number of heap tuples
|
float8 btm_last_cleanup_num_heap_tuples; /* number of heap tuples
|
||||||
|
|
Loading…
Reference in New Issue