Require empty Bitmapsets to be represented as NULL.

When I designed the Bitmapset module, I set things up so that an empty
Bitmapset could be represented either by a NULL pointer, or by an
allocated object all of whose bits are zero.  I've recently come to
the conclusion that that was a bad idea and we should instead have a
convention like the longstanding invariant for Lists, whereby an empty
list is represented by NIL and nothing else.

To do this, we need to fix bms_intersect, bms_difference, and a couple
of other functions to check for having produced an empty result; but
then we can replace bms_is_empty(a) by a simple "a == NULL" test.

This is very likely a (marginal) win performance-wise, because we
call bms_is_empty many more times than those other functions put
together.  However, the real reason to do it is that we have various
places that have hand-implemented a rule about "this Bitmapset
variable must be exactly NULL if empty", so that they can use
checks-for-null in place of bms_is_empty calls in particularly hot
code paths.  That is a really fragile, mistake-prone way to do things,
and I'm surprised that we've seldom been bitten by it.  It's not well
documented at all which variables have this property, so you can't
readily tell which code might be violating those conventions.  By
making the convention universal, we can eliminate a subtle source of
bugs.

Patch by me; thanks to Nathan Bossart and Richard Guo for review.

Discussion: https://postgr.es/m/1159933.1677621588@sss.pgh.pa.us
This commit is contained in:
Tom Lane 2023-03-02 11:47:26 -05:00
parent 141225b251
commit 00b41463c2
2 changed files with 57 additions and 23 deletions

View File

@ -5,10 +5,8 @@
*
* A bitmap set can represent any set of nonnegative integers, although
* it is mainly intended for sets where the maximum value is not large,
* say at most a few hundred. By convention, a NULL pointer is always
* accepted by all operations to represent the empty set. (But beware
* that this is not the only representation of the empty set. Use
* bms_is_empty() in preference to testing for NULL.)
* say at most a few hundred. By convention, we always represent the
* empty set by a NULL pointer.
*
*
* Copyright (c) 2003-2023, PostgreSQL Global Development Group
@ -66,6 +64,8 @@
#error "invalid BITS_PER_BITMAPWORD"
#endif
static bool bms_is_empty_internal(const Bitmapset *a);
/*
* bms_copy - make a palloc'd copy of a bitmapset
@ -104,10 +104,10 @@ bms_equal(const Bitmapset *a, const Bitmapset *b)
{
if (b == NULL)
return true;
return bms_is_empty(b);
return false;
}
else if (b == NULL)
return bms_is_empty(a);
return false;
/* Identify shorter and longer input */
if (a->nwords <= b->nwords)
{
@ -151,9 +151,9 @@ bms_compare(const Bitmapset *a, const Bitmapset *b)
/* Handle cases where either input is NULL */
if (a == NULL)
return bms_is_empty(b) ? 0 : -1;
return (b == NULL) ? 0 : -1;
else if (b == NULL)
return bms_is_empty(a) ? 0 : +1;
return +1;
/* Handle cases where one input is longer than the other */
shortlen = Min(a->nwords, b->nwords);
for (i = shortlen; i < a->nwords; i++)
@ -282,6 +282,12 @@ bms_intersect(const Bitmapset *a, const Bitmapset *b)
resultlen = result->nwords;
for (i = 0; i < resultlen; i++)
result->words[i] &= other->words[i];
/* If we computed an empty result, we must return NULL */
if (bms_is_empty_internal(result))
{
pfree(result);
return NULL;
}
return result;
}
@ -300,12 +306,22 @@ bms_difference(const Bitmapset *a, const Bitmapset *b)
return NULL;
if (b == NULL)
return bms_copy(a);
/*
* In Postgres' usage, an empty result is a very common case, so it's
* worth optimizing for that by testing bms_nonempty_difference(). This
* saves us a palloc/pfree cycle compared to checking after-the-fact.
*/
if (!bms_nonempty_difference(a, b))
return NULL;
/* Copy the left input */
result = bms_copy(a);
/* And remove b's bits from result */
shortlen = Min(a->nwords, b->nwords);
for (i = 0; i < shortlen; i++)
result->words[i] &= ~b->words[i];
/* Need not check for empty result, since we handled that case above */
return result;
}
@ -323,7 +339,7 @@ bms_is_subset(const Bitmapset *a, const Bitmapset *b)
if (a == NULL)
return true; /* empty set is a subset of anything */
if (b == NULL)
return bms_is_empty(a);
return false;
/* Check common words */
shortlen = Min(a->nwords, b->nwords);
for (i = 0; i < shortlen; i++)
@ -362,10 +378,10 @@ bms_subset_compare(const Bitmapset *a, const Bitmapset *b)
{
if (b == NULL)
return BMS_EQUAL;
return bms_is_empty(b) ? BMS_EQUAL : BMS_SUBSET1;
return BMS_SUBSET1;
}
if (b == NULL)
return bms_is_empty(a) ? BMS_EQUAL : BMS_SUBSET2;
return BMS_SUBSET2;
/* Check common words */
result = BMS_EQUAL; /* status so far */
shortlen = Min(a->nwords, b->nwords);
@ -554,7 +570,7 @@ bms_nonempty_difference(const Bitmapset *a, const Bitmapset *b)
if (a == NULL)
return false;
if (b == NULL)
return !bms_is_empty(a);
return true;
/* Check words in common */
shortlen = Min(a->nwords, b->nwords);
for (i = 0; i < shortlen; i++)
@ -696,18 +712,18 @@ bms_membership(const Bitmapset *a)
}
/*
* bms_is_empty - is a set empty?
* bms_is_empty_internal - is a set empty?
*
* This is even faster than bms_membership().
* This is now used only locally, to detect cases where a function has
* computed an empty set that we must now get rid of. Hence, we can
* assume the input isn't NULL.
*/
bool
bms_is_empty(const Bitmapset *a)
static bool
bms_is_empty_internal(const Bitmapset *a)
{
int nwords;
int wordnum;
if (a == NULL)
return true;
nwords = a->nwords;
for (wordnum = 0; wordnum < nwords; wordnum++)
{
@ -786,6 +802,12 @@ bms_del_member(Bitmapset *a, int x)
bitnum = BITNUM(x);
if (wordnum < a->nwords)
a->words[wordnum] &= ~((bitmapword) 1 << bitnum);
/* If we computed an empty result, we must return NULL */
if (bms_is_empty_internal(a))
{
pfree(a);
return NULL;
}
return a;
}
@ -922,6 +944,12 @@ bms_int_members(Bitmapset *a, const Bitmapset *b)
a->words[i] &= b->words[i];
for (; i < a->nwords; i++)
a->words[i] = 0;
/* If we computed an empty result, we must return NULL */
if (bms_is_empty_internal(a))
{
pfree(a);
return NULL;
}
return a;
}
@ -943,6 +971,12 @@ bms_del_members(Bitmapset *a, const Bitmapset *b)
shortlen = Min(a->nwords, b->nwords);
for (i = 0; i < shortlen; i++)
a->words[i] &= ~b->words[i];
/* If we computed an empty result, we must return NULL */
if (bms_is_empty_internal(a))
{
pfree(a);
return NULL;
}
return a;
}

View File

@ -5,10 +5,8 @@
*
* A bitmap set can represent any set of nonnegative integers, although
* it is mainly intended for sets where the maximum value is not large,
* say at most a few hundred. By convention, a NULL pointer is always
* accepted by all operations to represent the empty set. (But beware
* that this is not the only representation of the empty set. Use
* bms_is_empty() in preference to testing for NULL.)
* say at most a few hundred. By convention, we always represent the
* empty set by a NULL pointer.
*
*
* Copyright (c) 2003-2023, PostgreSQL Global Development Group
@ -102,7 +100,9 @@ extern int bms_num_members(const Bitmapset *a);
/* optimized tests when we don't need to know exact membership count: */
extern BMS_Membership bms_membership(const Bitmapset *a);
extern bool bms_is_empty(const Bitmapset *a);
/* NULL is now the only allowed representation of an empty bitmapset */
#define bms_is_empty(a) ((a) == NULL)
/* these routines recycle (modify or free) their non-const inputs: */