diff --git a/src/backend/access/brin/brin_tuple.c b/src/backend/access/brin/brin_tuple.c index 6cb7c26b39..c13010ad9c 100644 --- a/src/backend/access/brin/brin_tuple.c +++ b/src/backend/access/brin/brin_tuple.c @@ -32,12 +32,23 @@ #include "postgres.h" #include "access/brin_tuple.h" +#include "access/detoast.h" +#include "access/heaptoast.h" #include "access/htup_details.h" +#include "access/toast_internals.h" #include "access/tupdesc.h" #include "access/tupmacs.h" #include "utils/datum.h" #include "utils/memutils.h" + +/* + * This enables de-toasting of index entries. Needed until VACUUM is + * smart enough to rebuild indexes from scratch. + */ +#define TOAST_INDEX_HACK + + static inline void brin_deconstruct_tuple(BrinDesc *brdesc, char *tp, bits8 *nullbits, bool nulls, Datum *values, bool *allnulls, bool *hasnulls); @@ -99,6 +110,12 @@ brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno, BrinMemTuple *tuple, Size len, hoff, data_len; + int i; + +#ifdef TOAST_INDEX_HACK + Datum *untoasted_values; + int nuntoasted = 0; +#endif Assert(brdesc->bd_totalstored > 0); @@ -107,6 +124,10 @@ brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno, BrinMemTuple *tuple, phony_nullbitmap = (bits8 *) palloc(sizeof(bits8) * BITMAPLEN(brdesc->bd_totalstored)); +#ifdef TOAST_INDEX_HACK + untoasted_values = (Datum *) palloc(sizeof(Datum) * brdesc->bd_totalstored); +#endif + /* * Set up the values/nulls arrays for heap_fill_tuple */ @@ -138,10 +159,84 @@ brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno, BrinMemTuple *tuple, if (tuple->bt_columns[keyno].bv_hasnulls) anynulls = true; + /* + * Now obtain the values of each stored datum. Note that some values + * might be toasted, and we cannot rely on the original heap values + * sticking around forever, so we must detoast them. Also try to + * compress them. + */ for (datumno = 0; datumno < brdesc->bd_info[keyno]->oi_nstored; datumno++) - values[idxattno++] = tuple->bt_columns[keyno].bv_values[datumno]; + { + Datum value = tuple->bt_columns[keyno].bv_values[datumno]; + +#ifdef TOAST_INDEX_HACK + + /* We must look at the stored type, not at the index descriptor. */ + TypeCacheEntry *atttype = brdesc->bd_info[keyno]->oi_typcache[datumno]; + + /* Do we need to free the value at the end? */ + bool free_value = false; + + /* For non-varlena types we don't need to do anything special */ + if (atttype->typlen != -1) + { + values[idxattno++] = value; + continue; + } + + /* + * Do nothing if value is not of varlena type. We don't need to + * care about NULL values here, thanks to bv_allnulls above. + * + * If value is stored EXTERNAL, must fetch it so we are not + * depending on outside storage. + * + * XXX Is this actually true? Could it be that the summary is + * NULL even for range with non-NULL data? E.g. degenerate bloom + * filter may be thrown away, etc. + */ + if (VARATT_IS_EXTERNAL(DatumGetPointer(value))) + { + value = PointerGetDatum(detoast_external_attr((struct varlena *) + DatumGetPointer(value))); + free_value = true; + } + + /* + * If value is above size target, and is of a compressible datatype, + * try to compress it in-line. + */ + if (!VARATT_IS_EXTENDED(DatumGetPointer(value)) && + VARSIZE(DatumGetPointer(value)) > TOAST_INDEX_TARGET && + (atttype->typstorage == TYPSTORAGE_EXTENDED || + atttype->typstorage == TYPSTORAGE_MAIN)) + { + Datum cvalue = toast_compress_datum(value); + + if (DatumGetPointer(cvalue) != NULL) + { + /* successful compression */ + if (free_value) + pfree(DatumGetPointer(value)); + + value = cvalue; + free_value = true; + } + } + + /* + * If we untoasted / compressed the value, we need to free it + * after forming the index tuple. + */ + if (free_value) + untoasted_values[nuntoasted++] = value; + +#endif + + values[idxattno++] = value; + } } /* Assert we did not overrun temp arrays */ @@ -193,6 +288,11 @@ brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno, BrinMemTuple *tuple, pfree(nulls); pfree(phony_nullbitmap); +#ifdef TOAST_INDEX_HACK + for (i = 0; i < nuntoasted; i++) + pfree(DatumGetPointer(untoasted_values[i])); +#endif + /* * Now fill in the real null bitmasks. allnulls first. */ diff --git a/src/test/regress/expected/brin.out b/src/test/regress/expected/brin.out index 18403498df..e53d6e4885 100644 --- a/src/test/regress/expected/brin.out +++ b/src/test/regress/expected/brin.out @@ -526,3 +526,44 @@ EXPLAIN (COSTS OFF) SELECT * FROM brin_test WHERE b = 1; Filter: (b = 1) (2 rows) +-- make sure data are properly de-toasted in BRIN index +CREATE TABLE brintest_3 (a text, b text, c text, d text); +-- long random strings (~2000 chars each, so ~6kB for min/max on two +-- columns) to trigger toasting +WITH rand_value AS (SELECT string_agg(md5(i::text),'') AS val FROM generate_series(1,60) s(i)) +INSERT INTO brintest_3 +SELECT val, val, val, val FROM rand_value; +CREATE INDEX brin_test_toast_idx ON brintest_3 USING brin (b, c); +DELETE FROM brintest_3; +-- We need to wait a bit for all transactions to complete, so that the +-- vacuum actually removes the TOAST rows. Creating an index concurrently +-- is a one way to achieve that, because it does exactly such wait. +CREATE INDEX CONCURRENTLY brin_test_temp_idx ON brintest_3(a); +DROP INDEX brin_test_temp_idx; +-- vacuum the table, to discard TOAST data +VACUUM brintest_3; +-- retry insert with a different random-looking (but deterministic) value +-- the value is different, and so should replace either min or max in the +-- brin summary +WITH rand_value AS (SELECT string_agg(md5((-i)::text),'') AS val FROM generate_series(1,60) s(i)) +INSERT INTO brintest_3 +SELECT val, val, val, val FROM rand_value; +-- now try some queries, accessing the brin index +SET enable_seqscan = off; +EXPLAIN (COSTS OFF) +SELECT * FROM brintest_3 WHERE b < '0'; + QUERY PLAN +------------------------------------------------ + Bitmap Heap Scan on brintest_3 + Recheck Cond: (b < '0'::text) + -> Bitmap Index Scan on brin_test_toast_idx + Index Cond: (b < '0'::text) +(4 rows) + +SELECT * FROM brintest_3 WHERE b < '0'; + a | b | c | d +---+---+---+--- +(0 rows) + +DROP TABLE brintest_3; +RESET enable_seqscan; diff --git a/src/test/regress/sql/brin.sql b/src/test/regress/sql/brin.sql index d1a82474f3..3bd866d947 100644 --- a/src/test/regress/sql/brin.sql +++ b/src/test/regress/sql/brin.sql @@ -470,3 +470,42 @@ VACUUM ANALYZE brin_test; EXPLAIN (COSTS OFF) SELECT * FROM brin_test WHERE a = 1; -- Ensure brin index is not used when values are not correlated EXPLAIN (COSTS OFF) SELECT * FROM brin_test WHERE b = 1; + +-- make sure data are properly de-toasted in BRIN index +CREATE TABLE brintest_3 (a text, b text, c text, d text); + +-- long random strings (~2000 chars each, so ~6kB for min/max on two +-- columns) to trigger toasting +WITH rand_value AS (SELECT string_agg(md5(i::text),'') AS val FROM generate_series(1,60) s(i)) +INSERT INTO brintest_3 +SELECT val, val, val, val FROM rand_value; + +CREATE INDEX brin_test_toast_idx ON brintest_3 USING brin (b, c); +DELETE FROM brintest_3; + +-- We need to wait a bit for all transactions to complete, so that the +-- vacuum actually removes the TOAST rows. Creating an index concurrently +-- is a one way to achieve that, because it does exactly such wait. +CREATE INDEX CONCURRENTLY brin_test_temp_idx ON brintest_3(a); +DROP INDEX brin_test_temp_idx; + +-- vacuum the table, to discard TOAST data +VACUUM brintest_3; + +-- retry insert with a different random-looking (but deterministic) value +-- the value is different, and so should replace either min or max in the +-- brin summary +WITH rand_value AS (SELECT string_agg(md5((-i)::text),'') AS val FROM generate_series(1,60) s(i)) +INSERT INTO brintest_3 +SELECT val, val, val, val FROM rand_value; + +-- now try some queries, accessing the brin index +SET enable_seqscan = off; + +EXPLAIN (COSTS OFF) +SELECT * FROM brintest_3 WHERE b < '0'; + +SELECT * FROM brintest_3 WHERE b < '0'; + +DROP TABLE brintest_3; +RESET enable_seqscan;