diff --git a/contrib/pageinspect/expected/brin.out b/contrib/pageinspect/expected/brin.out index 098ddc202f..3f6e5174bc 100644 --- a/contrib/pageinspect/expected/brin.out +++ b/contrib/pageinspect/expected/brin.out @@ -89,4 +89,22 @@ SELECT brin_revmap_data(decode(repeat('00', :block_size), 'hex')); (1 row) +-- Test that partial indexes have all pages, including empty ones. +CREATE TABLE test2 (a int); +INSERT INTO test2 SELECT i FROM generate_series(1,1000) s(i); +-- No rows match the index predicate, make sure the index has the right number +-- of ranges (same as number of page ranges). +CREATE INDEX ON test2 USING brin (a) WITH (pages_per_range=1) WHERE (a IS NULL); +ANALYZE test2; +-- Does the index have one summary of the relation? +SELECT (COUNT(*) = (SELECT relpages FROM pg_class WHERE relname = 'test2')) AS ranges_do_match + FROM generate_series((SELECT (lastrevmappage + 1) FROM brin_metapage_info(get_raw_page('test2_a_idx', 0))), + (SELECT (relpages - 1) FROM pg_class WHERE relname = 'test2_a_idx')) AS pages(p), + LATERAL brin_page_items(get_raw_page('test2_a_idx', p), 'test2_a_idx') AS items; + ranges_do_match +----------------- + t +(1 row) + DROP TABLE test1; +DROP TABLE test2; diff --git a/contrib/pageinspect/sql/brin.sql b/contrib/pageinspect/sql/brin.sql index 96b4645187..50f260b8e1 100644 --- a/contrib/pageinspect/sql/brin.sql +++ b/contrib/pageinspect/sql/brin.sql @@ -36,4 +36,21 @@ SELECT brin_page_items(decode(repeat('00', :block_size), 'hex'), 'test1_a_idx'); SELECT brin_metapage_info(decode(repeat('00', :block_size), 'hex')); SELECT brin_revmap_data(decode(repeat('00', :block_size), 'hex')); +-- Test that partial indexes have all pages, including empty ones. +CREATE TABLE test2 (a int); +INSERT INTO test2 SELECT i FROM generate_series(1,1000) s(i); + +-- No rows match the index predicate, make sure the index has the right number +-- of ranges (same as number of page ranges). +CREATE INDEX ON test2 USING brin (a) WITH (pages_per_range=1) WHERE (a IS NULL); + +ANALYZE test2; + +-- Does the index have one summary of the relation? +SELECT (COUNT(*) = (SELECT relpages FROM pg_class WHERE relname = 'test2')) AS ranges_do_match + FROM generate_series((SELECT (lastrevmappage + 1) FROM brin_metapage_info(get_raw_page('test2_a_idx', 0))), + (SELECT (relpages - 1) FROM pg_class WHERE relname = 'test2_a_idx')) AS pages(p), + LATERAL brin_page_items(get_raw_page('test2_a_idx', p), 'test2_a_idx') AS items; + DROP TABLE test1; +DROP TABLE test2; diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c index 4f2dfdd17b..14be939ad8 100644 --- a/src/backend/access/brin/brin.c +++ b/src/backend/access/brin/brin.c @@ -53,9 +53,13 @@ typedef struct BrinBuildState Buffer bs_currentInsertBuf; BlockNumber bs_pagesPerRange; BlockNumber bs_currRangeStart; + BlockNumber bs_maxRangeStart; BrinRevmap *bs_rmAccess; BrinDesc *bs_bdesc; BrinMemTuple *bs_dtuple; + BrinTuple *bs_emptyTuple; + Size bs_emptyTupleLen; + MemoryContext bs_context; } BrinBuildState; /* @@ -82,7 +86,9 @@ typedef struct BrinOpaque #define BRIN_ALL_BLOCKRANGES InvalidBlockNumber static BrinBuildState *initialize_brin_buildstate(Relation idxRel, - BrinRevmap *revmap, BlockNumber pagesPerRange); + BrinRevmap *revmap, + BlockNumber pagesPerRange, + BlockNumber tablePages); static BrinInsertState *initialize_brin_insertstate(Relation idxRel, IndexInfo *indexInfo); static void terminate_brin_buildstate(BrinBuildState *state); static void brinsummarize(Relation index, Relation heapRel, BlockNumber pageRange, @@ -94,6 +100,8 @@ static void brin_vacuum_scan(Relation idxrel, BufferAccessStrategy strategy); static bool add_values_to_range(Relation idxRel, BrinDesc *bdesc, BrinMemTuple *dtup, const Datum *values, const bool *nulls); static bool check_null_keys(BrinValues *bval, ScanKey *nullkeys, int nnullkeys); +static void brin_fill_empty_ranges(BrinBuildState *state, + BlockNumber prevRange, BlockNumber maxRange); /* * BRIN handler function: return IndexAmRoutine with access method parameters @@ -933,7 +941,8 @@ brinbuild(Relation heap, Relation index, IndexInfo *indexInfo) * Initialize our state, including the deformed tuple state. */ revmap = brinRevmapInitialize(index, &pagesPerRange); - state = initialize_brin_buildstate(index, revmap, pagesPerRange); + state = initialize_brin_buildstate(index, revmap, pagesPerRange, + RelationGetNumberOfBlocks(heap)); /* * Now scan the relation. No syncscan allowed here because we want the @@ -945,6 +954,17 @@ brinbuild(Relation heap, Relation index, IndexInfo *indexInfo) /* process the final batch */ form_and_insert_tuple(state); + /* + * Backfill the final ranges with empty data. + * + * This saves us from doing what amounts to full table scans when the + * index with a predicate like WHERE (nonnull_column IS NULL), or other + * very selective predicates. + */ + brin_fill_empty_ranges(state, + state->bs_currRangeStart, + state->bs_maxRangeStart); + /* release resources */ idxtuples = state->bs_numtuples; brinRevmapTerminate(state->bs_rmAccess); @@ -1358,9 +1378,10 @@ brinGetStats(Relation index, BrinStatsData *stats) */ static BrinBuildState * initialize_brin_buildstate(Relation idxRel, BrinRevmap *revmap, - BlockNumber pagesPerRange) + BlockNumber pagesPerRange, BlockNumber tablePages) { BrinBuildState *state; + BlockNumber lastRange = 0; state = palloc_object(BrinBuildState); @@ -1373,6 +1394,22 @@ initialize_brin_buildstate(Relation idxRel, BrinRevmap *revmap, state->bs_bdesc = brin_build_desc(idxRel); state->bs_dtuple = brin_new_memtuple(state->bs_bdesc); + /* Remember the memory context to use for an empty tuple, if needed. */ + state->bs_context = CurrentMemoryContext; + state->bs_emptyTuple = NULL; + state->bs_emptyTupleLen = 0; + + /* + * Calculate the start of the last page range. Page numbers are 0-based, + * so to calculate the index we need to subtract one. The integer division + * gives us the index of the page range. + */ + if (tablePages > 0) + lastRange = ((tablePages - 1) / pagesPerRange) * pagesPerRange; + + /* Now calculate the start of the next range. */ + state->bs_maxRangeStart = lastRange + state->bs_pagesPerRange; + return state; } @@ -1612,7 +1649,8 @@ brinsummarize(Relation index, Relation heapRel, BlockNumber pageRange, /* first time through */ Assert(!indexInfo); state = initialize_brin_buildstate(index, revmap, - pagesPerRange); + pagesPerRange, + InvalidBlockNumber); indexInfo = BuildIndexInfo(index); } summarize_range(indexInfo, state, heapRel, startBlk, heapNumBlocks); @@ -1982,3 +2020,78 @@ check_null_keys(BrinValues *bval, ScanKey *nullkeys, int nnullkeys) return true; } + +/* + * brin_build_empty_tuple + * Maybe initialize a BRIN tuple representing empty range. + * + * Returns a BRIN tuple representing an empty page range starting at the + * specified block number. The empty tuple is initialized only once, when it's + * needed for the first time, stored in the memory context bs_context to ensure + * proper life span, and reused on following calls. All empty tuples are + * exactly the same except for the bs_blkno field, which is set to the value + * in blkno parameter. + */ +static void +brin_build_empty_tuple(BrinBuildState *state, BlockNumber blkno) +{ + /* First time an empty tuple is requested? If yes, initialize it. */ + if (state->bs_emptyTuple == NULL) + { + MemoryContext oldcxt; + BrinMemTuple *dtuple = brin_new_memtuple(state->bs_bdesc); + + /* Allocate the tuple in context for the whole index build. */ + oldcxt = MemoryContextSwitchTo(state->bs_context); + + state->bs_emptyTuple = brin_form_tuple(state->bs_bdesc, blkno, dtuple, + &state->bs_emptyTupleLen); + + MemoryContextSwitchTo(oldcxt); + } + else + { + /* If we already have an empty tuple, just update the block. */ + state->bs_emptyTuple->bt_blkno = blkno; + } +} + +/* + * brin_fill_empty_ranges + * Add BRIN index tuples representing empty page ranges. + * + * prevRange/nextRange determine for which page ranges to add empty summaries. + * Both boundaries are exclusive, i.e. only ranges starting at blkno for which + * (prevRange < blkno < nextRange) will be added to the index. + * + * If prevRange is InvalidBlockNumber, this means there was no previous page + * range (i.e. the first empty range to add is for blkno=0). + * + * The empty tuple is built only once, and then reused for all future calls. + */ +static void +brin_fill_empty_ranges(BrinBuildState *state, + BlockNumber prevRange, BlockNumber nextRange) +{ + BlockNumber blkno; + + /* + * If we already summarized some ranges, we need to start with the next + * one. Otherwise start from the first range of the table. + */ + blkno = (prevRange == InvalidBlockNumber) ? 0 : (prevRange + state->bs_pagesPerRange); + + /* Generate empty ranges until we hit the next non-empty range. */ + while (blkno < nextRange) + { + /* Did we already build the empty tuple? If not, do it now. */ + brin_build_empty_tuple(state, blkno); + + brin_doinsert(state->bs_irel, state->bs_pagesPerRange, state->bs_rmAccess, + &state->bs_currentInsertBuf, + blkno, state->bs_emptyTuple, state->bs_emptyTupleLen); + + /* try next page range */ + blkno += state->bs_pagesPerRange; + } +}