diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c index 0becfde113..91d49339f3 100644 --- a/src/backend/access/brin/brin.c +++ b/src/backend/access/brin/brin.c @@ -35,6 +35,7 @@ #include "storage/freespace.h" #include "utils/acl.h" #include "utils/builtins.h" +#include "utils/datum.h" #include "utils/index_selfuncs.h" #include "utils/memutils.h" #include "utils/rel.h" @@ -173,7 +174,7 @@ brininsert(Relation idxRel, Datum *values, bool *nulls, for (;;) { - bool need_insert = false; + bool need_insert; OffsetNumber off; BrinTuple *brtup; BrinMemTuple *dtup; @@ -241,6 +242,9 @@ brininsert(Relation idxRel, Datum *values, bool *nulls, dtup = brin_deform_tuple(bdesc, brtup, NULL); + /* If the range starts empty, we're certainly going to modify it. */ + need_insert = dtup->bt_empty_range; + /* * Compare the key values of the new tuple to the stored index values; * our deformed tuple will get updated if the new tuple doesn't fit @@ -253,8 +257,20 @@ brininsert(Relation idxRel, Datum *values, bool *nulls, Datum result; BrinValues *bval; FmgrInfo *addValue; + bool has_nulls; bval = &dtup->bt_columns[keyno]; + + /* + * Does the range have actual NULL values? Either of the flags can + * be set, but we ignore the state before adding first row. + * + * We have to remember this, because we'll modify the flags and we + * need to know if the range started as empty. + */ + has_nulls = ((!dtup->bt_empty_range) && + (bval->bv_hasnulls || bval->bv_allnulls)); + addValue = index_getprocinfo(idxRel, keyno + 1, BRIN_PROCNUM_ADDVALUE); result = FunctionCall4Coll(addValue, @@ -265,8 +281,33 @@ brininsert(Relation idxRel, Datum *values, bool *nulls, nulls[keyno]); /* if that returned true, we need to insert the updated tuple */ need_insert |= DatumGetBool(result); + + /* + * If the range was had actual NULL values (i.e. did not start empty), + * make sure we don't forget about the NULL values. Either the allnulls + * flag is still set to true, or (if the opclass cleared it) we need to + * set hasnulls=true. + * + * XXX This can only happen when the opclass modified the tuple, so the + * modified flag should be set. + */ + if (has_nulls && !(bval->bv_hasnulls || bval->bv_allnulls)) + { + Assert(need_insert); + bval->bv_hasnulls = true; + } } + /* + * After updating summaries for all the keys, mark it as not empty. + * + * If we're actually changing the flag value (i.e. tuple started as + * empty), we should have modified the tuple. So we should not see + * empty range that was not modified. + */ + Assert(!dtup->bt_empty_range || need_insert); + dtup->bt_empty_range = false; + if (!need_insert) { /* @@ -508,6 +549,17 @@ bringetbitmap(IndexScanDesc scan, TIDBitmap *tbm) CurrentMemoryContext); } + /* + * If the BRIN tuple indicates that this range is empty, + * we can skip it: there's nothing to match. We don't + * need to examine the next columns. + */ + if (dtup->bt_empty_range) + { + addrange = false; + break; + } + /* * Check whether the scan key is consistent with the page * range values; if so, have the pages in the range added @@ -645,8 +697,24 @@ brinbuildCallback(Relation index, FmgrInfo *addValue; BrinValues *col; Form_pg_attribute attr = TupleDescAttr(state->bs_bdesc->bd_tupdesc, i); + bool has_nulls; col = &state->bs_dtuple->bt_columns[i]; + + /* + * Does the range have actual NULL values? Either of the flags can + * be set, but we ignore the state before adding first row. + * + * We have to remember this, because we'll modify the flags and we + * need to know if the range started as empty. + */ + has_nulls = ((!state->bs_dtuple->bt_empty_range) && + (col->bv_hasnulls || col->bv_allnulls)); + + /* + * Call the BRIN_PROCNUM_ADDVALUE procedure. We do this even for NULL + * values, because who knows what the opclass is doing. + */ addValue = index_getprocinfo(index, i + 1, BRIN_PROCNUM_ADDVALUE); @@ -658,7 +726,25 @@ brinbuildCallback(Relation index, PointerGetDatum(state->bs_bdesc), PointerGetDatum(col), values[i], isnull[i]); + + /* + * If the range was had actual NULL values (i.e. did not start empty), + * make sure we don't forget about the NULL values. Either the allnulls + * flag is still set to true, or (if the opclass cleared it) we need to + * set hasnulls=true. + */ + if (has_nulls && !(col->bv_hasnulls || col->bv_allnulls)) + col->bv_hasnulls = true; } + + /* + * After updating summaries for all the keys, mark it as not empty. + * + * If we're actually changing the flag value (i.e. tuple started as + * empty), we should have modified the tuple. So we should not see + * empty range that was not modified. + */ + state->bs_dtuple->bt_empty_range = false; } /* @@ -1465,6 +1551,64 @@ union_tuples(BrinDesc *bdesc, BrinMemTuple *a, BrinTuple *b) db = brin_deform_tuple(bdesc, b, NULL); MemoryContextSwitchTo(oldcxt); + /* + * Check if the ranges are empty. + * + * If at least one of them is empty, we don't need to call per-key union + * functions at all. If "b" is empty, we just use "a" as the result (it + * might be empty fine, but that's fine). If "a" is empty but "b" is not, + * we use "b" as the result (but we have to copy the data into "a" first). + * + * Only when both ranges are non-empty, we actually do the per-key merge. + */ + + /* If "b" is empty - ignore it and just use "a" (even if it's empty etc.). */ + if (db->bt_empty_range) + { + /* skip the per-key merge */ + MemoryContextDelete(cxt); + return; + } + + /* + * Now we know "b" is not empty. If "a" is empty, then "b" is the result. + * But we need to copy the data from "b" to "a" first, because that's how + * we pass result out. + * + * We have to copy all the global/per-key flags etc. too. + */ + if (a->bt_empty_range) + { + for (keyno = 0; keyno < bdesc->bd_tupdesc->natts; keyno++) + { + int i; + BrinValues *col_a = &a->bt_columns[keyno]; + BrinValues *col_b = &db->bt_columns[keyno]; + BrinOpcInfo *opcinfo = bdesc->bd_info[keyno]; + + col_a->bv_allnulls = col_b->bv_allnulls; + col_a->bv_hasnulls = col_b->bv_hasnulls; + + /* If "b" has no data, we're done. */ + if (col_b->bv_allnulls) + continue; + + for (i = 0; i < opcinfo->oi_nstored; i++) + col_a->bv_values[i] = + datumCopy(col_b->bv_values[i], + opcinfo->oi_typcache[i]->typbyval, + opcinfo->oi_typcache[i]->typlen); + } + + /* "a" started empty, but "b" was not empty, so remember that */ + a->bt_empty_range = false; + + /* skip the per-key merge */ + MemoryContextDelete(cxt); + return; + } + + /* Neither range is empty, so call the union proc. */ for (keyno = 0; keyno < bdesc->bd_tupdesc->natts; keyno++) { FmgrInfo *unionFn; diff --git a/src/backend/access/brin/brin_tuple.c b/src/backend/access/brin/brin_tuple.c index b3b453aed1..aafd2f17ca 100644 --- a/src/backend/access/brin/brin_tuple.c +++ b/src/backend/access/brin/brin_tuple.c @@ -349,6 +349,9 @@ brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno, BrinMemTuple *tuple, if (tuple->bt_placeholder) rettuple->bt_info |= BRIN_PLACEHOLDER_MASK; + if (tuple->bt_empty_range) + rettuple->bt_info |= BRIN_EMPTY_RANGE_MASK; + *size = len; return rettuple; } @@ -376,7 +379,7 @@ brin_form_placeholder_tuple(BrinDesc *brdesc, BlockNumber blkno, Size *size) rettuple = palloc0(len); rettuple->bt_blkno = blkno; rettuple->bt_info = hoff; - rettuple->bt_info |= BRIN_NULLS_MASK | BRIN_PLACEHOLDER_MASK; + rettuple->bt_info |= BRIN_NULLS_MASK | BRIN_PLACEHOLDER_MASK | BRIN_EMPTY_RANGE_MASK; bitP = ((bits8 *) ((char *) rettuple + SizeOfBrinTuple)) - 1; bitmask = HIGHBIT; @@ -466,6 +469,8 @@ brin_new_memtuple(BrinDesc *brdesc) dtup->bt_allnulls = palloc(sizeof(bool) * brdesc->bd_tupdesc->natts); dtup->bt_hasnulls = palloc(sizeof(bool) * brdesc->bd_tupdesc->natts); + dtup->bt_empty_range = true; + dtup->bt_context = AllocSetContextCreate(CurrentMemoryContext, "brin dtuple", ALLOCSET_DEFAULT_SIZES); @@ -499,6 +504,8 @@ brin_memtuple_initialize(BrinMemTuple *dtuple, BrinDesc *brdesc) currdatum += sizeof(Datum) * brdesc->bd_info[i]->oi_nstored; } + dtuple->bt_empty_range = true; + return dtuple; } @@ -532,6 +539,11 @@ brin_deform_tuple(BrinDesc *brdesc, BrinTuple *tuple, BrinMemTuple *dMemtuple) if (BrinTupleIsPlaceholder(tuple)) dtup->bt_placeholder = true; + + /* ranges start as empty, depends on the BrinTuple */ + if (!BrinTupleIsEmptyRange(tuple)) + dtup->bt_empty_range = false; + dtup->bt_blkno = tuple->bt_blkno; values = dtup->bt_values; diff --git a/src/include/access/brin_tuple.h b/src/include/access/brin_tuple.h index a9ccc3995b..5280872707 100644 --- a/src/include/access/brin_tuple.h +++ b/src/include/access/brin_tuple.h @@ -36,6 +36,7 @@ typedef struct BrinValues typedef struct BrinMemTuple { bool bt_placeholder; /* this is a placeholder tuple */ + bool bt_empty_range; /* range represents no tuples */ BlockNumber bt_blkno; /* heap blkno that the tuple is for */ MemoryContext bt_context; /* memcxt holding the bt_columns values */ /* output arrays for brin_deform_tuple: */ @@ -61,7 +62,7 @@ typedef struct BrinTuple * * 7th (high) bit: has nulls * 6th bit: is placeholder tuple - * 5th bit: unused + * 5th bit: range is empty * 4-0 bit: offset of data * --------------- */ @@ -74,13 +75,14 @@ typedef struct BrinTuple * bt_info manipulation macros */ #define BRIN_OFFSET_MASK 0x1F -/* bit 0x20 is not used at present */ +#define BRIN_EMPTY_RANGE_MASK 0x20 #define BRIN_PLACEHOLDER_MASK 0x40 #define BRIN_NULLS_MASK 0x80 #define BrinTupleDataOffset(tup) ((Size) (((BrinTuple *) (tup))->bt_info & BRIN_OFFSET_MASK)) #define BrinTupleHasNulls(tup) (((((BrinTuple *) (tup))->bt_info & BRIN_NULLS_MASK)) != 0) #define BrinTupleIsPlaceholder(tup) (((((BrinTuple *) (tup))->bt_info & BRIN_PLACEHOLDER_MASK)) != 0) +#define BrinTupleIsEmptyRange(tup) (((((BrinTuple *) (tup))->bt_info & BRIN_EMPTY_RANGE_MASK)) != 0) extern BrinTuple *brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno, diff --git a/src/test/modules/brin/expected/summarization-and-inprogress-insertion.out b/src/test/modules/brin/expected/summarization-and-inprogress-insertion.out index 2a4755d099..584ac2602f 100644 --- a/src/test/modules/brin/expected/summarization-and-inprogress-insertion.out +++ b/src/test/modules/brin/expected/summarization-and-inprogress-insertion.out @@ -4,7 +4,7 @@ starting permutation: s2check s1b s2b s1i s2summ s1c s2c s2check step s2check: SELECT * FROM brin_page_items(get_raw_page('brinidx', 2), 'brinidx'::regclass); itemoffset|blknum|attnum|allnulls|hasnulls|placeholder|value ----------+------+------+--------+--------+-----------+-------- - 1| 0| 1|f |f |f |{1 .. 1} + 1| 0| 1|f |t |f |{1 .. 1} (1 row) step s1b: BEGIN ISOLATION LEVEL REPEATABLE READ; @@ -26,7 +26,7 @@ step s2c: COMMIT; step s2check: SELECT * FROM brin_page_items(get_raw_page('brinidx', 2), 'brinidx'::regclass); itemoffset|blknum|attnum|allnulls|hasnulls|placeholder|value ----------+------+------+--------+--------+-----------+----------- - 1| 0| 1|f |f |f |{1 .. 1} + 1| 0| 1|f |t |f |{1 .. 1} 2| 1| 1|f |f |f |{1 .. 1000} (2 rows) @@ -35,7 +35,7 @@ starting permutation: s2check s1b s1i s2vacuum s1c s2check step s2check: SELECT * FROM brin_page_items(get_raw_page('brinidx', 2), 'brinidx'::regclass); itemoffset|blknum|attnum|allnulls|hasnulls|placeholder|value ----------+------+------+--------+--------+-----------+-------- - 1| 0| 1|f |f |f |{1 .. 1} + 1| 0| 1|f |t |f |{1 .. 1} (1 row) step s1b: BEGIN ISOLATION LEVEL REPEATABLE READ; @@ -45,7 +45,7 @@ step s1c: COMMIT; step s2check: SELECT * FROM brin_page_items(get_raw_page('brinidx', 2), 'brinidx'::regclass); itemoffset|blknum|attnum|allnulls|hasnulls|placeholder|value ----------+------+------+--------+--------+-----------+----------- - 1| 0| 1|f |f |f |{1 .. 1} + 1| 0| 1|f |t |f |{1 .. 1} 2| 1| 1|f |f |f |{1 .. 1000} (2 rows) diff --git a/src/test/modules/brin/specs/summarization-and-inprogress-insertion.spec b/src/test/modules/brin/specs/summarization-and-inprogress-insertion.spec index 19ac18a2e8..18ba92b7ba 100644 --- a/src/test/modules/brin/specs/summarization-and-inprogress-insertion.spec +++ b/src/test/modules/brin/specs/summarization-and-inprogress-insertion.spec @@ -9,6 +9,7 @@ setup ) WITH (fillfactor=10); CREATE INDEX brinidx ON brin_iso USING brin (value) WITH (pages_per_range=1); -- this fills the first page + INSERT INTO brin_iso VALUES (NULL); DO $$ DECLARE curtid tid; BEGIN