diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c index fdf74256fc..5d17d7013a 100644 --- a/src/backend/access/brin/brin.c +++ b/src/backend/access/brin/brin.c @@ -591,6 +591,17 @@ bringetbitmap(IndexScanDesc scan, TIDBitmap *tbm) bval = &dtup->bt_columns[attno - 1]; + /* + * If the BRIN tuple indicates that this range is empty, + * we can skip it: there's nothing to match. We don't + * need to examine the next columns. + */ + if (dtup->bt_empty_range) + { + addrange = false; + break; + } + /* * First check if there are any IS [NOT] NULL scan keys, * and if we're violating them. In that case we can @@ -1606,6 +1617,64 @@ union_tuples(BrinDesc *bdesc, BrinMemTuple *a, BrinTuple *b) db = brin_deform_tuple(bdesc, b, NULL); MemoryContextSwitchTo(oldcxt); + /* + * Check if the ranges are empty. + * + * If at least one of them is empty, we don't need to call per-key union + * functions at all. If "b" is empty, we just use "a" as the result (it + * might be empty fine, but that's fine). If "a" is empty but "b" is not, + * we use "b" as the result (but we have to copy the data into "a" first). + * + * Only when both ranges are non-empty, we actually do the per-key merge. + */ + + /* If "b" is empty - ignore it and just use "a" (even if it's empty etc.). */ + if (db->bt_empty_range) + { + /* skip the per-key merge */ + MemoryContextDelete(cxt); + return; + } + + /* + * Now we know "b" is not empty. If "a" is empty, then "b" is the result. + * But we need to copy the data from "b" to "a" first, because that's how + * we pass result out. + * + * We have to copy all the global/per-key flags etc. too. + */ + if (a->bt_empty_range) + { + for (keyno = 0; keyno < bdesc->bd_tupdesc->natts; keyno++) + { + int i; + BrinValues *col_a = &a->bt_columns[keyno]; + BrinValues *col_b = &db->bt_columns[keyno]; + BrinOpcInfo *opcinfo = bdesc->bd_info[keyno]; + + col_a->bv_allnulls = col_b->bv_allnulls; + col_a->bv_hasnulls = col_b->bv_hasnulls; + + /* If "b" has no data, we're done. */ + if (col_b->bv_allnulls) + continue; + + for (i = 0; i < opcinfo->oi_nstored; i++) + col_a->bv_values[i] = + datumCopy(col_b->bv_values[i], + opcinfo->oi_typcache[i]->typbyval, + opcinfo->oi_typcache[i]->typlen); + } + + /* "a" started empty, but "b" was not empty, so remember that */ + a->bt_empty_range = false; + + /* skip the per-key merge */ + MemoryContextDelete(cxt); + return; + } + + /* Now we know neither range is empty. */ for (keyno = 0; keyno < bdesc->bd_tupdesc->natts; keyno++) { FmgrInfo *unionFn; @@ -1711,7 +1780,9 @@ add_values_to_range(Relation idxRel, BrinDesc *bdesc, BrinMemTuple *dtup, Datum *values, bool *nulls) { int keyno; - bool modified = false; + + /* If the range starts empty, we're certainly going to modify it. */ + bool modified = dtup->bt_empty_range; /* * Compare the key values of the new tuple to the stored index values; our @@ -1725,9 +1796,24 @@ add_values_to_range(Relation idxRel, BrinDesc *bdesc, BrinMemTuple *dtup, Datum result; BrinValues *bval; FmgrInfo *addValue; + bool has_nulls; bval = &dtup->bt_columns[keyno]; + /* + * Does the range have actual NULL values? Either of the flags can + * be set, but we ignore the state before adding first row. + * + * We have to remember this, because we'll modify the flags and we + * need to know if the range started as empty. + */ + has_nulls = ((!dtup->bt_empty_range) && + (bval->bv_hasnulls || bval->bv_allnulls)); + + /* + * If the value we're adding is NULL, handle it locally. Otherwise + * call the BRIN_PROCNUM_ADDVALUE procedure. + */ if (bdesc->bd_info[keyno]->oi_regular_nulls && nulls[keyno]) { /* @@ -1753,8 +1839,33 @@ add_values_to_range(Relation idxRel, BrinDesc *bdesc, BrinMemTuple *dtup, nulls[keyno]); /* if that returned true, we need to insert the updated tuple */ modified |= DatumGetBool(result); + + /* + * If the range was had actual NULL values (i.e. did not start empty), + * make sure we don't forget about the NULL values. Either the allnulls + * flag is still set to true, or (if the opclass cleared it) we need to + * set hasnulls=true. + * + * XXX This can only happen when the opclass modified the tuple, so the + * modified flag should be set. + */ + if (has_nulls && !(bval->bv_hasnulls || bval->bv_allnulls)) + { + Assert(modified); + bval->bv_hasnulls = true; + } } + /* + * After updating summaries for all the keys, mark it as not empty. + * + * If we're actually changing the flag value (i.e. tuple started as empty), + * we should have modified the tuple. So we should not see empty range that + * was not modified. + */ + Assert(!dtup->bt_empty_range || modified); + dtup->bt_empty_range = false; + return modified; } diff --git a/src/backend/access/brin/brin_tuple.c b/src/backend/access/brin/brin_tuple.c index c0e2dbd23b..4a5a4da231 100644 --- a/src/backend/access/brin/brin_tuple.c +++ b/src/backend/access/brin/brin_tuple.c @@ -372,6 +372,9 @@ brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno, BrinMemTuple *tuple, if (tuple->bt_placeholder) rettuple->bt_info |= BRIN_PLACEHOLDER_MASK; + if (tuple->bt_empty_range) + rettuple->bt_info |= BRIN_EMPTY_RANGE_MASK; + *size = len; return rettuple; } @@ -399,7 +402,7 @@ brin_form_placeholder_tuple(BrinDesc *brdesc, BlockNumber blkno, Size *size) rettuple = palloc0(len); rettuple->bt_blkno = blkno; rettuple->bt_info = hoff; - rettuple->bt_info |= BRIN_NULLS_MASK | BRIN_PLACEHOLDER_MASK; + rettuple->bt_info |= BRIN_NULLS_MASK | BRIN_PLACEHOLDER_MASK | BRIN_EMPTY_RANGE_MASK; bitP = ((bits8 *) ((char *) rettuple + SizeOfBrinTuple)) - 1; bitmask = HIGHBIT; @@ -489,6 +492,8 @@ brin_new_memtuple(BrinDesc *brdesc) dtup->bt_allnulls = palloc(sizeof(bool) * brdesc->bd_tupdesc->natts); dtup->bt_hasnulls = palloc(sizeof(bool) * brdesc->bd_tupdesc->natts); + dtup->bt_empty_range = true; + dtup->bt_context = AllocSetContextCreate(CurrentMemoryContext, "brin dtuple", ALLOCSET_DEFAULT_SIZES); @@ -527,6 +532,8 @@ brin_memtuple_initialize(BrinMemTuple *dtuple, BrinDesc *brdesc) currdatum += sizeof(Datum) * brdesc->bd_info[i]->oi_nstored; } + dtuple->bt_empty_range = true; + return dtuple; } @@ -560,6 +567,11 @@ brin_deform_tuple(BrinDesc *brdesc, BrinTuple *tuple, BrinMemTuple *dMemtuple) if (BrinTupleIsPlaceholder(tuple)) dtup->bt_placeholder = true; + + /* ranges start as empty, depends on the BrinTuple */ + if (!BrinTupleIsEmptyRange(tuple)) + dtup->bt_empty_range = false; + dtup->bt_blkno = tuple->bt_blkno; values = dtup->bt_values; diff --git a/src/include/access/brin_tuple.h b/src/include/access/brin_tuple.h index ced20d4543..634aa4b20e 100644 --- a/src/include/access/brin_tuple.h +++ b/src/include/access/brin_tuple.h @@ -44,6 +44,7 @@ typedef struct BrinValues typedef struct BrinMemTuple { bool bt_placeholder; /* this is a placeholder tuple */ + bool bt_empty_range; /* range represents no tuples */ BlockNumber bt_blkno; /* heap blkno that the tuple is for */ MemoryContext bt_context; /* memcxt holding the bt_columns values */ /* output arrays for brin_deform_tuple: */ @@ -69,7 +70,7 @@ typedef struct BrinTuple * * 7th (high) bit: has nulls * 6th bit: is placeholder tuple - * 5th bit: unused + * 5th bit: range is empty * 4-0 bit: offset of data * --------------- */ @@ -82,13 +83,14 @@ typedef struct BrinTuple * bt_info manipulation macros */ #define BRIN_OFFSET_MASK 0x1F -/* bit 0x20 is not used at present */ +#define BRIN_EMPTY_RANGE_MASK 0x20 #define BRIN_PLACEHOLDER_MASK 0x40 #define BRIN_NULLS_MASK 0x80 #define BrinTupleDataOffset(tup) ((Size) (((BrinTuple *) (tup))->bt_info & BRIN_OFFSET_MASK)) #define BrinTupleHasNulls(tup) (((((BrinTuple *) (tup))->bt_info & BRIN_NULLS_MASK)) != 0) #define BrinTupleIsPlaceholder(tup) (((((BrinTuple *) (tup))->bt_info & BRIN_PLACEHOLDER_MASK)) != 0) +#define BrinTupleIsEmptyRange(tup) (((((BrinTuple *) (tup))->bt_info & BRIN_EMPTY_RANGE_MASK)) != 0) extern BrinTuple *brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno, diff --git a/src/test/modules/brin/expected/summarization-and-inprogress-insertion.out b/src/test/modules/brin/expected/summarization-and-inprogress-insertion.out index 2a4755d099..584ac2602f 100644 --- a/src/test/modules/brin/expected/summarization-and-inprogress-insertion.out +++ b/src/test/modules/brin/expected/summarization-and-inprogress-insertion.out @@ -4,7 +4,7 @@ starting permutation: s2check s1b s2b s1i s2summ s1c s2c s2check step s2check: SELECT * FROM brin_page_items(get_raw_page('brinidx', 2), 'brinidx'::regclass); itemoffset|blknum|attnum|allnulls|hasnulls|placeholder|value ----------+------+------+--------+--------+-----------+-------- - 1| 0| 1|f |f |f |{1 .. 1} + 1| 0| 1|f |t |f |{1 .. 1} (1 row) step s1b: BEGIN ISOLATION LEVEL REPEATABLE READ; @@ -26,7 +26,7 @@ step s2c: COMMIT; step s2check: SELECT * FROM brin_page_items(get_raw_page('brinidx', 2), 'brinidx'::regclass); itemoffset|blknum|attnum|allnulls|hasnulls|placeholder|value ----------+------+------+--------+--------+-----------+----------- - 1| 0| 1|f |f |f |{1 .. 1} + 1| 0| 1|f |t |f |{1 .. 1} 2| 1| 1|f |f |f |{1 .. 1000} (2 rows) @@ -35,7 +35,7 @@ starting permutation: s2check s1b s1i s2vacuum s1c s2check step s2check: SELECT * FROM brin_page_items(get_raw_page('brinidx', 2), 'brinidx'::regclass); itemoffset|blknum|attnum|allnulls|hasnulls|placeholder|value ----------+------+------+--------+--------+-----------+-------- - 1| 0| 1|f |f |f |{1 .. 1} + 1| 0| 1|f |t |f |{1 .. 1} (1 row) step s1b: BEGIN ISOLATION LEVEL REPEATABLE READ; @@ -45,7 +45,7 @@ step s1c: COMMIT; step s2check: SELECT * FROM brin_page_items(get_raw_page('brinidx', 2), 'brinidx'::regclass); itemoffset|blknum|attnum|allnulls|hasnulls|placeholder|value ----------+------+------+--------+--------+-----------+----------- - 1| 0| 1|f |f |f |{1 .. 1} + 1| 0| 1|f |t |f |{1 .. 1} 2| 1| 1|f |f |f |{1 .. 1000} (2 rows) diff --git a/src/test/modules/brin/specs/summarization-and-inprogress-insertion.spec b/src/test/modules/brin/specs/summarization-and-inprogress-insertion.spec index 19ac18a2e8..18ba92b7ba 100644 --- a/src/test/modules/brin/specs/summarization-and-inprogress-insertion.spec +++ b/src/test/modules/brin/specs/summarization-and-inprogress-insertion.spec @@ -9,6 +9,7 @@ setup ) WITH (fillfactor=10); CREATE INDEX brinidx ON brin_iso USING brin (value) WITH (pages_per_range=1); -- this fills the first page + INSERT INTO brin_iso VALUES (NULL); DO $$ DECLARE curtid tid; BEGIN