diff --git a/contrib/amcheck/expected/check_btree.out b/contrib/amcheck/expected/check_btree.out index e864579774..ef5c9e1a1c 100644 --- a/contrib/amcheck/expected/check_btree.out +++ b/contrib/amcheck/expected/check_btree.out @@ -140,10 +140,30 @@ SELECT bt_index_parent_check('delete_test_table_pkey', true); (1 row) +-- +-- BUG #15597: must not assume consistent input toasting state when forming +-- tuple. Bloom filter must fingerprint normalized index tuple representation. +-- +CREATE TABLE toast_bug(buggy text); +ALTER TABLE toast_bug ALTER COLUMN buggy SET STORAGE plain; +-- pg_attribute entry for toasty.buggy will have plain storage: +CREATE INDEX toasty ON toast_bug(buggy); +-- Whereas pg_attribute entry for toast_bug.buggy now has extended storage: +ALTER TABLE toast_bug ALTER COLUMN buggy SET STORAGE extended; +-- Insert compressible heap tuple (comfortably exceeds TOAST_TUPLE_THRESHOLD): +INSERT INTO toast_bug SELECT repeat('a', 2200); +-- Should not get false positive report of corruption: +SELECT bt_index_check('toasty', true); + bt_index_check +---------------- + +(1 row) + -- cleanup DROP TABLE bttest_a; DROP TABLE bttest_b; DROP TABLE bttest_multi; DROP TABLE delete_test_table; +DROP TABLE toast_bug; DROP OWNED BY bttest_role; -- permissions DROP ROLE bttest_role; diff --git a/contrib/amcheck/sql/check_btree.sql b/contrib/amcheck/sql/check_btree.sql index 7b1ab4f148..0ad1631476 100644 --- a/contrib/amcheck/sql/check_btree.sql +++ b/contrib/amcheck/sql/check_btree.sql @@ -88,10 +88,26 @@ DELETE FROM delete_test_table WHERE a > 10; VACUUM delete_test_table; SELECT bt_index_parent_check('delete_test_table_pkey', true); +-- +-- BUG #15597: must not assume consistent input toasting state when forming +-- tuple. Bloom filter must fingerprint normalized index tuple representation. +-- +CREATE TABLE toast_bug(buggy text); +ALTER TABLE toast_bug ALTER COLUMN buggy SET STORAGE plain; +-- pg_attribute entry for toasty.buggy will have plain storage: +CREATE INDEX toasty ON toast_bug(buggy); +-- Whereas pg_attribute entry for toast_bug.buggy now has extended storage: +ALTER TABLE toast_bug ALTER COLUMN buggy SET STORAGE extended; +-- Insert compressible heap tuple (comfortably exceeds TOAST_TUPLE_THRESHOLD): +INSERT INTO toast_bug SELECT repeat('a', 2200); +-- Should not get false positive report of corruption: +SELECT bt_index_check('toasty', true); + -- cleanup DROP TABLE bttest_a; DROP TABLE bttest_b; DROP TABLE bttest_multi; DROP TABLE delete_test_table; +DROP TABLE toast_bug; DROP OWNED BY bttest_role; -- permissions DROP ROLE bttest_role; diff --git a/contrib/amcheck/verify_nbtree.c b/contrib/amcheck/verify_nbtree.c index a1438a2855..767d8e9e1e 100644 --- a/contrib/amcheck/verify_nbtree.c +++ b/contrib/amcheck/verify_nbtree.c @@ -132,6 +132,8 @@ static void bt_downlink_missing_check(BtreeCheckState *state); static void bt_tuple_present_callback(Relation index, HeapTuple htup, Datum *values, bool *isnull, bool tupleIsAlive, void *checkstate); +static IndexTuple bt_normalize_tuple(BtreeCheckState *state, + IndexTuple itup); static inline bool offset_is_negative_infinity(BTPageOpaque opaque, OffsetNumber offset); static inline bool invariant_leq_offset(BtreeCheckState *state, @@ -907,7 +909,16 @@ bt_target_page_check(BtreeCheckState *state) /* Fingerprint leaf page tuples (those that point to the heap) */ if (state->heapallindexed && P_ISLEAF(topaque) && !ItemIdIsDead(itemid)) - bloom_add_element(state->filter, (unsigned char *) itup, tupsize); + { + IndexTuple norm; + + norm = bt_normalize_tuple(state, itup); + bloom_add_element(state->filter, (unsigned char *) norm, + IndexTupleSize(norm)); + /* Be tidy */ + if (norm != itup) + pfree(norm); + } /* * * High key check * @@ -1671,35 +1682,18 @@ bt_tuple_present_callback(Relation index, HeapTuple htup, Datum *values, bool *isnull, bool tupleIsAlive, void *checkstate) { BtreeCheckState *state = (BtreeCheckState *) checkstate; - IndexTuple itup; + IndexTuple itup, norm; Assert(state->heapallindexed); - /* - * Generate an index tuple for fingerprinting. - * - * Index tuple formation is assumed to be deterministic, and IndexTuples - * are assumed immutable. While the LP_DEAD bit is mutable in leaf pages, - * that's ItemId metadata, which was not fingerprinted. (There will often - * be some dead-to-everyone IndexTuples fingerprinted by the Bloom filter, - * but we only try to detect the absence of needed tuples, so that's - * okay.) - * - * Note that we rely on deterministic index_form_tuple() TOAST - * compression. If index_form_tuple() was ever enhanced to compress datums - * out-of-line, or otherwise varied when or how compression was applied, - * our assumption would break, leading to false positive reports of - * corruption. It's also possible that non-pivot tuples could in the - * future have alternative equivalent representations (e.g. by using the - * INDEX_ALT_TID_MASK bit). For now, we don't decompress/normalize toasted - * values as part of fingerprinting. - */ + /* Generate a normalized index tuple for fingerprinting */ itup = index_form_tuple(RelationGetDescr(index), values, isnull); itup->t_tid = htup->t_self; + norm = bt_normalize_tuple(state, itup); /* Probe Bloom filter -- tuple should be present */ - if (bloom_lacks_element(state->filter, (unsigned char *) itup, - IndexTupleSize(itup))) + if (bloom_lacks_element(state->filter, (unsigned char *) norm, + IndexTupleSize(norm))) ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), errmsg("heap tuple (%u,%u) from table \"%s\" lacks matching index tuple within index \"%s\"", @@ -1713,6 +1707,115 @@ bt_tuple_present_callback(Relation index, HeapTuple htup, Datum *values, state->heaptuplespresent++; pfree(itup); + /* Cannot leak memory here */ + if (norm != itup) + pfree(norm); +} + +/* + * Normalize an index tuple for fingerprinting. + * + * In general, index tuple formation is assumed to be deterministic by + * heapallindexed verification, and IndexTuples are assumed immutable. While + * the LP_DEAD bit is mutable in leaf pages, that's ItemId metadata, which is + * not fingerprinted. Normalization is required to compensate for corner + * cases where the determinism assumption doesn't quite work. + * + * There is currently one such case: index_form_tuple() does not try to hide + * the source TOAST state of input datums. The executor applies TOAST + * compression for heap tuples based on different criteria to the compression + * applied within btinsert()'s call to index_form_tuple(): it sometimes + * compresses more aggressively, resulting in compressed heap tuple datums but + * uncompressed corresponding index tuple datums. A subsequent heapallindexed + * verification will get a logically equivalent though bitwise unequal tuple + * from index_form_tuple(). False positive heapallindexed corruption reports + * could occur without normalizing away the inconsistency. + * + * Returned tuple is often caller's own original tuple. Otherwise, it is a + * new representation of caller's original index tuple, palloc()'d in caller's + * memory context. + * + * Note: This routine is not concerned with distinctions about the + * representation of tuples beyond those that might break heapallindexed + * verification. In particular, it won't try to normalize opclass-equal + * datums with potentially distinct representations (e.g., btree/numeric_ops + * index datums will not get their display scale normalized-away here). + * Normalization may need to be expanded to handle more cases in the future, + * though. For example, it's possible that non-pivot tuples could in the + * future have alternative logically equivalent representations due to using + * the INDEX_ALT_TID_MASK bit to implement intelligent deduplication. + */ +static IndexTuple +bt_normalize_tuple(BtreeCheckState *state, IndexTuple itup) +{ + TupleDesc tupleDescriptor = RelationGetDescr(state->rel); + Datum normalized[INDEX_MAX_KEYS]; + bool isnull[INDEX_MAX_KEYS]; + bool toast_free[INDEX_MAX_KEYS]; + bool formnewtup = false; + IndexTuple reformed; + int i; + + /* Easy case: It's immediately clear that tuple has no varlena datums */ + if (!IndexTupleHasVarwidths(itup)) + return itup; + + for (i = 0; i < tupleDescriptor->natts; i++) + { + Form_pg_attribute att; + + att = TupleDescAttr(tupleDescriptor, i); + + /* Assume untoasted/already normalized datum initially */ + toast_free[i] = false; + normalized[i] = index_getattr(itup, att->attnum, + tupleDescriptor, + &isnull[i]); + if (att->attbyval || att->attlen != -1 || isnull[i]) + continue; + + /* + * Callers always pass a tuple that could safely be inserted into the + * index without further processing, so an external varlena header + * should never be encountered here + */ + if (VARATT_IS_EXTERNAL(DatumGetPointer(normalized[i]))) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("external varlena datum in tuple that references heap row (%u,%u) in index \"%s\"", + ItemPointerGetBlockNumber(&(itup->t_tid)), + ItemPointerGetOffsetNumber(&(itup->t_tid)), + RelationGetRelationName(state->rel)))); + else if (VARATT_IS_COMPRESSED(DatumGetPointer(normalized[i]))) + { + formnewtup = true; + normalized[i] = PointerGetDatum(PG_DETOAST_DATUM(normalized[i])); + toast_free[i] = true; + } + } + + /* Easier case: Tuple has varlena datums, none of which are compressed */ + if (!formnewtup) + return itup; + + /* + * Hard case: Tuple had compressed varlena datums that necessitate + * creating normalized version of the tuple from uncompressed input datums + * (normalized input datums). This is rather naive, but shouldn't be + * necessary too often. + * + * Note that we rely on deterministic index_form_tuple() TOAST compression + * of normalized input. + */ + reformed = index_form_tuple(tupleDescriptor, normalized, isnull); + reformed->t_tid = itup->t_tid; + + /* Cannot leak memory here */ + for (i = 0; i < tupleDescriptor->natts; i++) + if (toast_free[i]) + pfree(DatumGetPointer(normalized[i])); + + return reformed; } /*