From 5b861baa550a369e04bf67fbe83f3a5a8c742fb4 Mon Sep 17 00:00:00 2001 From: Peter Geoghegan Date: Tue, 23 Mar 2021 16:09:51 -0700 Subject: [PATCH] nbtree VACUUM: Cope with buggy opclasses. Teach nbtree VACUUM to press on with vacuuming in the event of a page deletion attempt that fails to "re-find" a downlink for its child/target page. There is no good reason to treat this as an irrecoverable error. But there is a good reason not to: pressing on at this point removes any question of VACUUM not making progress solely due to misbehavior from user-defined operator class code. Discussion: https://postgr.es/m/CAH2-Wzma5G9CTtMjbrXTwOym+U=aWg-R7=-htySuztgoJLvZXg@mail.gmail.com --- src/backend/access/nbtree/nbtpage.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index 530d924bff..ef48679cc2 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -2791,10 +2791,26 @@ _bt_lock_subtree_parent(Relation rel, BlockNumber child, BTStack stack, */ pbuf = _bt_getstackbuf(rel, stack, child); if (pbuf == InvalidBuffer) - ereport(ERROR, + { + /* + * Failed to "re-find" a pivot tuple whose downlink matched our child + * block number on the parent level -- the index must be corrupt. + * Don't even try to delete the leafbuf subtree. Just report the + * issue and press on with vacuuming the index. + * + * Note: _bt_getstackbuf() recovers from concurrent page splits that + * take place on the parent level. Its approach is a near-exhaustive + * linear search. This also gives it a surprisingly good chance of + * recovering in the event of a buggy or inconsistent opclass. But we + * don't rely on that here. + */ + ereport(LOG, (errcode(ERRCODE_INDEX_CORRUPTED), errmsg_internal("failed to re-find parent key in index \"%s\" for deletion target page %u", RelationGetRelationName(rel), child))); + return false; + } + parent = stack->bts_blkno; parentoffset = stack->bts_offset;