Avoid amcheck inline compression false positives.

The previous tacit assumption that index_form_tuple() hides differences in the TOAST state of its input datums was wrong. Normalize input varlena datums by decompressing compressed values, and forming a new index tuple for fingerprinting using uncompressed inputs. The final normalized representation may actually be compressed once again within index_form_tuple(), though that shouldn't matter. When the original tuple is found to have no datums that are compressed inline, fingerprint the original tuple directly. Normalization avoids false positive reports of corruption in certain cases. For example, the executor can apply toasting with some inline compression to an entire heap tuple because its input has a single external TOAST pointer. Varlena datums for other attributes that are not particularly good candidates for inline compression can be compressed in the heap tuple in passing, without the representation of the same values in index tuples ever receiving concomitant inline compression. Add a test case to recreate the issue in a simpler though less realistic way: by exploiting differences in pg_attribute.attstorage between heap and index relations. This bug was discovered by me during testing of an upcoming set of nbtree enhancements. It was also independently reported by Andreas Kunert, as bug #15597. His test case was rather more realistic than the one I ended up using. Bug: #15597 Discussion: https://postgr.es/m/CAH2-WznrVd9ie+TTJ45nDT+v2nUt6YJwQrT9SebCdQKtAvfPZw@mail.gmail.com Discussion: https://postgr.es/m/15597-294e5d3e7f01c407@postgresql.org Backpatch: 11-, where heapallindexed verification was introduced.
2019-02-06 15:54:19 -08:00 · 2019-02-06 15:54:19 -08:00 · eba775345d
parent 727921f466
commit eba775345d
3 changed files with 162 additions and 23 deletions
--- a/contrib/amcheck/expected/check_btree.out
+++ b/contrib/amcheck/expected/check_btree.out
@ -140,10 +140,30 @@ SELECT bt_index_parent_check('delete_test_table_pkey', true);
 
 (1 row)

+--
+-- BUG #15597: must not assume consistent input toasting state when forming
+-- tuple.  Bloom filter must fingerprint normalized index tuple representation.
+--
+CREATE TABLE toast_bug(buggy text);
+ALTER TABLE toast_bug ALTER COLUMN buggy SET STORAGE plain;
+-- pg_attribute entry for toasty.buggy will have plain storage:
+CREATE INDEX toasty ON toast_bug(buggy);
+-- Whereas pg_attribute entry for toast_bug.buggy now has extended storage:
+ALTER TABLE toast_bug ALTER COLUMN buggy SET STORAGE extended;
+-- Insert compressible heap tuple (comfortably exceeds TOAST_TUPLE_THRESHOLD):
+INSERT INTO toast_bug SELECT repeat('a', 2200);
+-- Should not get false positive report of corruption:
+SELECT bt_index_check('toasty', true);
+ bt_index_check 
+----------------
+ 
+(1 row)
+
 -- cleanup
 DROP TABLE bttest_a;
 DROP TABLE bttest_b;
 DROP TABLE bttest_multi;
 DROP TABLE delete_test_table;
+DROP TABLE toast_bug;
 DROP OWNED BY bttest_role; -- permissions
 DROP ROLE bttest_role;
--- a/contrib/amcheck/sql/check_btree.sql
+++ b/contrib/amcheck/sql/check_btree.sql
@ -88,10 +88,26 @@ DELETE FROM delete_test_table WHERE a > 10;
 VACUUM delete_test_table;
 SELECT bt_index_parent_check('delete_test_table_pkey', true);

+--
+-- BUG #15597: must not assume consistent input toasting state when forming
+-- tuple.  Bloom filter must fingerprint normalized index tuple representation.
+--
+CREATE TABLE toast_bug(buggy text);
+ALTER TABLE toast_bug ALTER COLUMN buggy SET STORAGE plain;
+-- pg_attribute entry for toasty.buggy will have plain storage:
+CREATE INDEX toasty ON toast_bug(buggy);
+-- Whereas pg_attribute entry for toast_bug.buggy now has extended storage:
+ALTER TABLE toast_bug ALTER COLUMN buggy SET STORAGE extended;
+-- Insert compressible heap tuple (comfortably exceeds TOAST_TUPLE_THRESHOLD):
+INSERT INTO toast_bug SELECT repeat('a', 2200);
+-- Should not get false positive report of corruption:
+SELECT bt_index_check('toasty', true);
+
 -- cleanup
 DROP TABLE bttest_a;
 DROP TABLE bttest_b;
 DROP TABLE bttest_multi;
 DROP TABLE delete_test_table;
+DROP TABLE toast_bug;
 DROP OWNED BY bttest_role; -- permissions
 DROP ROLE bttest_role;
--- a/contrib/amcheck/verify_nbtree.c
+++ b/contrib/amcheck/verify_nbtree.c
@ -133,6 +133,8 @@ static void bt_downlink_missing_check(BtreeCheckState *state);
 static void bt_tuple_present_callback(Relation index, HeapTuple htup,
 						  Datum *values, bool *isnull,
 						  bool tupleIsAlive, void *checkstate);
+static IndexTuple bt_normalize_tuple(BtreeCheckState *state,
+						   IndexTuple itup);
 static inline bool offset_is_negative_infinity(BTPageOpaque opaque,
 							OffsetNumber offset);
 static inline bool invariant_leq_offset(BtreeCheckState *state,
@ -908,7 +910,16 @@ bt_target_page_check(BtreeCheckState *state)

 		/* Fingerprint leaf page tuples (those that point to the heap) */
 		if (state->heapallindexed && P_ISLEAF(topaque) && !ItemIdIsDead(itemid))
-			bloom_add_element(state->filter, (unsigned char *) itup, tupsize);
+		{
+			IndexTuple		norm;
+
+			norm = bt_normalize_tuple(state, itup);
+			bloom_add_element(state->filter, (unsigned char *) norm,
+							  IndexTupleSize(norm));
+			/* Be tidy */
+			if (norm != itup)
+				pfree(norm);
+		}

 		/*
 		 * * High key check *
@ -1672,35 +1683,18 @@ bt_tuple_present_callback(Relation index, HeapTuple htup, Datum *values,
 						  bool *isnull, bool tupleIsAlive, void *checkstate)
 {
 	BtreeCheckState *state = (BtreeCheckState *) checkstate;
-	IndexTuple	itup;
+	IndexTuple	itup, norm;

 	Assert(state->heapallindexed);

-	/*
-	 * Generate an index tuple for fingerprinting.
-	 *
-	 * Index tuple formation is assumed to be deterministic, and IndexTuples
-	 * are assumed immutable.  While the LP_DEAD bit is mutable in leaf pages,
-	 * that's ItemId metadata, which was not fingerprinted.  (There will often
-	 * be some dead-to-everyone IndexTuples fingerprinted by the Bloom filter,
-	 * but we only try to detect the absence of needed tuples, so that's
-	 * okay.)
-	 *
-	 * Note that we rely on deterministic index_form_tuple() TOAST
-	 * compression. If index_form_tuple() was ever enhanced to compress datums
-	 * out-of-line, or otherwise varied when or how compression was applied,
-	 * our assumption would break, leading to false positive reports of
-	 * corruption.  It's also possible that non-pivot tuples could in the
-	 * future have alternative equivalent representations (e.g. by using the
-	 * INDEX_ALT_TID_MASK bit). For now, we don't decompress/normalize toasted
-	 * values as part of fingerprinting.
-	 */
+	/* Generate a normalized index tuple for fingerprinting */
 	itup = index_form_tuple(RelationGetDescr(index), values, isnull);
 	itup->t_tid = htup->t_self;
+	norm = bt_normalize_tuple(state, itup);

 	/* Probe Bloom filter -- tuple should be present */
-	if (bloom_lacks_element(state->filter, (unsigned char *) itup,
-							IndexTupleSize(itup)))
+	if (bloom_lacks_element(state->filter, (unsigned char *) norm,
+							IndexTupleSize(norm)))
 		ereport(ERROR,
 				(errcode(ERRCODE_DATA_CORRUPTED),
 				 errmsg("heap tuple (%u,%u) from table \"%s\" lacks matching index tuple within index \"%s\"",
@ -1714,6 +1708,115 @@ bt_tuple_present_callback(Relation index, HeapTuple htup, Datum *values,

 	state->heaptuplespresent++;
 	pfree(itup);
+	/* Cannot leak memory here */
+	if (norm != itup)
+		pfree(norm);
+}
+
+/*
+ * Normalize an index tuple for fingerprinting.
+ *
+ * In general, index tuple formation is assumed to be deterministic by
+ * heapallindexed verification, and IndexTuples are assumed immutable.  While
+ * the LP_DEAD bit is mutable in leaf pages, that's ItemId metadata, which is
+ * not fingerprinted.  Normalization is required to compensate for corner
+ * cases where the determinism assumption doesn't quite work.
+ *
+ * There is currently one such case: index_form_tuple() does not try to hide
+ * the source TOAST state of input datums.  The executor applies TOAST
+ * compression for heap tuples based on different criteria to the compression
+ * applied within btinsert()'s call to index_form_tuple(): it sometimes
+ * compresses more aggressively, resulting in compressed heap tuple datums but
+ * uncompressed corresponding index tuple datums.  A subsequent heapallindexed
+ * verification will get a logically equivalent though bitwise unequal tuple
+ * from index_form_tuple().  False positive heapallindexed corruption reports
+ * could occur without normalizing away the inconsistency.
+ *
+ * Returned tuple is often caller's own original tuple.  Otherwise, it is a
+ * new representation of caller's original index tuple, palloc()'d in caller's
+ * memory context.
+ *
+ * Note: This routine is not concerned with distinctions about the
+ * representation of tuples beyond those that might break heapallindexed
+ * verification.  In particular, it won't try to normalize opclass-equal
+ * datums with potentially distinct representations (e.g., btree/numeric_ops
+ * index datums will not get their display scale normalized-away here).
+ * Normalization may need to be expanded to handle more cases in the future,
+ * though.  For example, it's possible that non-pivot tuples could in the
+ * future have alternative logically equivalent representations due to using
+ * the INDEX_ALT_TID_MASK bit to implement intelligent deduplication.
+ */
+static IndexTuple
+bt_normalize_tuple(BtreeCheckState *state, IndexTuple itup)
+{
+	TupleDesc	tupleDescriptor = RelationGetDescr(state->rel);
+	Datum		normalized[INDEX_MAX_KEYS];
+	bool		isnull[INDEX_MAX_KEYS];
+	bool		toast_free[INDEX_MAX_KEYS];
+	bool		formnewtup = false;
+	IndexTuple	reformed;
+	int			i;
+
+	/* Easy case: It's immediately clear that tuple has no varlena datums */
+	if (!IndexTupleHasVarwidths(itup))
+		return itup;
+
+	for (i = 0; i < tupleDescriptor->natts; i++)
+	{
+		Form_pg_attribute	att;
+
+		att = TupleDescAttr(tupleDescriptor, i);
+
+		/* Assume untoasted/already normalized datum initially */
+		toast_free[i] = false;
+		normalized[i] = index_getattr(itup, att->attnum,
+									  tupleDescriptor,
+									  &isnull[i]);
+		if (att->attbyval || att->attlen != -1 || isnull[i])
+			continue;
+
+		/*
+		 * Callers always pass a tuple that could safely be inserted into the
+		 * index without further processing, so an external varlena header
+		 * should never be encountered here
+		 */
+		if (VARATT_IS_EXTERNAL(DatumGetPointer(normalized[i])))
+			ereport(ERROR,
+					(errcode(ERRCODE_INDEX_CORRUPTED),
+					 errmsg("external varlena datum in tuple that references heap row (%u,%u) in index \"%s\"",
+							ItemPointerGetBlockNumber(&(itup->t_tid)),
+							ItemPointerGetOffsetNumber(&(itup->t_tid)),
+							RelationGetRelationName(state->rel))));
+		else if (VARATT_IS_COMPRESSED(DatumGetPointer(normalized[i])))
+		{
+			formnewtup = true;
+			normalized[i] = PointerGetDatum(PG_DETOAST_DATUM(normalized[i]));
+			toast_free[i] = true;
+		}
+	}
+
+	/* Easier case: Tuple has varlena datums, none of which are compressed */
+	if (!formnewtup)
+		return itup;
+
+	/*
+	 * Hard case: Tuple had compressed varlena datums that necessitate
+	 * creating normalized version of the tuple from uncompressed input datums
+	 * (normalized input datums).  This is rather naive, but shouldn't be
+	 * necessary too often.
+	 *
+	 * Note that we rely on deterministic index_form_tuple() TOAST compression
+	 * of normalized input.
+	 */
+	reformed = index_form_tuple(tupleDescriptor, normalized, isnull);
+	reformed->t_tid = itup->t_tid;
+
+	/* Cannot leak memory here */
+	for (i = 0; i < tupleDescriptor->natts; i++)
+		if (toast_free[i])
+			pfree(DatumGetPointer(normalized[i]));
+
+	return reformed;
 }

 /*