Measure Bloom index signature-length reloption in bits, not words.

Per discussion, this is a more understandable and future-proof way of
exposing the setting to users.  On-disk, we can still store it in words,
so as to not break on-disk compatibility with beta1.

Along the way, clean up the code associated with Bloom reloptions.
Provide explicit macros for default and maximum lengths rather than
having magic numbers buried in multiple places in the code.  Drop
the adjustBloomOptions() code altogether: it was useless in view of
the fact that reloptions.c already performed default-substitution and
range checking for the options.  Rename a couple of macros and types
for more clarity.

Discussion: <23767.1464926580@sss.pgh.pa.us>
This commit is contained in:
Tom Lane 2016-06-03 10:52:36 -04:00
parent fdfaccfa79
commit ee4af347ba
4 changed files with 86 additions and 93 deletions

View File

@ -79,18 +79,31 @@ typedef BloomPageOpaqueData *BloomPageOpaque;
#define BLOOM_HEAD_BLKNO (1) /* first data page */
/*
* Maximum of bloom signature length in uint16. Actual value
* is 512 bytes
* We store Bloom signatures as arrays of uint16 words.
*/
#define MAX_BLOOM_LENGTH (256)
typedef uint16 BloomSignatureWord;
#define SIGNWORDBITS ((int) (BITS_PER_BYTE * sizeof(BloomSignatureWord)))
/*
* Default and maximum Bloom signature length in bits.
*/
#define DEFAULT_BLOOM_LENGTH (5 * SIGNWORDBITS)
#define MAX_BLOOM_LENGTH (256 * SIGNWORDBITS)
/*
* Default and maximum signature bits generated per index key.
*/
#define DEFAULT_BLOOM_BITS 2
#define MAX_BLOOM_BITS (MAX_BLOOM_LENGTH - 1)
/* Bloom index options */
typedef struct BloomOptions
{
int32 vl_len_; /* varlena header (do not touch directly!) */
int bloomLength; /* length of signature in uint16 */
int bitSize[INDEX_MAX_KEYS]; /* signature bits per index
* key */
int bloomLength; /* length of signature in words (not bits!) */
int bitSize[INDEX_MAX_KEYS]; /* # of bits generated for each
* index key */
} BloomOptions;
/*
@ -143,12 +156,10 @@ typedef struct BloomState
/*
* Tuples are very different from all other relations
*/
typedef uint16 SignType;
typedef struct BloomTuple
{
ItemPointerData heapPtr;
SignType sign[FLEXIBLE_ARRAY_MEMBER];
BloomSignatureWord sign[FLEXIBLE_ARRAY_MEMBER];
} BloomTuple;
#define BLOOMTUPLEHDRSZ offsetof(BloomTuple, sign)
@ -156,7 +167,7 @@ typedef struct BloomTuple
/* Opaque data structure for bloom index scan */
typedef struct BloomScanOpaqueData
{
SignType *sign; /* Scan signature */
BloomSignatureWord *sign; /* Scan signature */
BloomState state;
} BloomScanOpaqueData;
@ -170,7 +181,7 @@ extern void BloomFillMetapage(Relation index, Page metaPage);
extern void BloomInitMetapage(Relation index);
extern void BloomInitPage(Page page, uint16 flags);
extern Buffer BloomNewBuffer(Relation index);
extern void signValue(BloomState * state, SignType * sign, Datum value, int attno);
extern void signValue(BloomState * state, BloomSignatureWord * sign, Datum value, int attno);
extern BloomTuple *BloomFormTuple(BloomState * state, ItemPointer iptr, Datum *values, bool *isnull);
extern bool BloomPageAddItem(BloomState * state, Page page, BloomTuple * tuple);

View File

@ -93,7 +93,7 @@ blgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
/* New search: have to calculate search signature */
ScanKey skey = scan->keyData;
so->sign = palloc0(sizeof(SignType) * so->state.opts.bloomLength);
so->sign = palloc0(sizeof(BloomSignatureWord) * so->state.opts.bloomLength);
for (i = 0; i < scan->numberOfKeys; i++)
{

View File

@ -27,23 +27,26 @@
#include "bloom.h"
/* Signature dealing macros */
#define BITSIGNTYPE (BITS_PER_BYTE * sizeof(SignType))
#define GETWORD(x,i) ( *( (SignType*)(x) + (int)( (i) / BITSIGNTYPE ) ) )
#define CLRBIT(x,i) GETWORD(x,i) &= ~( 0x01 << ( (i) % BITSIGNTYPE ) )
#define SETBIT(x,i) GETWORD(x,i) |= ( 0x01 << ( (i) % BITSIGNTYPE ) )
#define GETBIT(x,i) ( (GETWORD(x,i) >> ( (i) % BITSIGNTYPE )) & 0x01 )
/* Signature dealing macros - note i is assumed to be of type int */
#define GETWORD(x,i) ( *( (BloomSignatureWord *)(x) + ( (i) / SIGNWORDBITS ) ) )
#define CLRBIT(x,i) GETWORD(x,i) &= ~( 0x01 << ( (i) % SIGNWORDBITS ) )
#define SETBIT(x,i) GETWORD(x,i) |= ( 0x01 << ( (i) % SIGNWORDBITS ) )
#define GETBIT(x,i) ( (GETWORD(x,i) >> ( (i) % SIGNWORDBITS )) & 0x01 )
PG_FUNCTION_INFO_V1(blhandler);
/* Kind of relation optioms for bloom index */
/* Kind of relation options for bloom index */
static relopt_kind bl_relopt_kind;
/* parse table for fillRelOptions */
static relopt_parse_elt bl_relopt_tab[INDEX_MAX_KEYS + 1];
static int32 myRand(void);
static void mySrand(uint32 seed);
/*
* Module initialize function: initilized relation options.
* Module initialize function: initialize info about Bloom relation options.
*
* Note: keep this in sync with makeDefaultBloomOptions().
*/
void
_PG_init(void)
@ -53,17 +56,46 @@ _PG_init(void)
bl_relopt_kind = add_reloption_kind();
/* Option for length of signature */
add_int_reloption(bl_relopt_kind, "length",
"Length of signature in uint16 type", 5, 1, 256);
"Length of signature in bits",
DEFAULT_BLOOM_LENGTH, 1, MAX_BLOOM_LENGTH);
bl_relopt_tab[0].optname = "length";
bl_relopt_tab[0].opttype = RELOPT_TYPE_INT;
bl_relopt_tab[0].offset = offsetof(BloomOptions, bloomLength);
/* Number of bits for each possible index column: col1, col2, ... */
for (i = 0; i < INDEX_MAX_KEYS; i++)
{
snprintf(buf, 16, "col%d", i + 1);
snprintf(buf, sizeof(buf), "col%d", i + 1);
add_int_reloption(bl_relopt_kind, buf,
"Number of bits for corresponding column", 2, 1, 2048);
"Number of bits generated for each index column",
DEFAULT_BLOOM_BITS, 1, MAX_BLOOM_BITS);
bl_relopt_tab[i + 1].optname = MemoryContextStrdup(TopMemoryContext,
buf);
bl_relopt_tab[i + 1].opttype = RELOPT_TYPE_INT;
bl_relopt_tab[i + 1].offset = offsetof(BloomOptions, bitSize[i]);
}
}
/*
* Construct a default set of Bloom options.
*/
static BloomOptions *
makeDefaultBloomOptions(void)
{
BloomOptions *opts;
int i;
opts = (BloomOptions *) palloc0(sizeof(BloomOptions));
/* Convert DEFAULT_BLOOM_LENGTH from # of bits to # of words */
opts->bloomLength = (DEFAULT_BLOOM_LENGTH + SIGNWORDBITS - 1) / SIGNWORDBITS;
for (i = 0; i < INDEX_MAX_KEYS; i++)
opts->bitSize[i] = DEFAULT_BLOOM_BITS;
SET_VARSIZE(opts, sizeof(BloomOptions));
return opts;
}
/*
* Bloom handler function: return IndexAmRoutine with access method parameters
* and callbacks.
@ -157,7 +189,7 @@ initBloomState(BloomState *state, Relation index)
memcpy(&state->opts, index->rd_amcache, sizeof(state->opts));
state->sizeOfBloomTuple = BLOOMTUPLEHDRSZ +
sizeof(SignType) * state->opts.bloomLength;
sizeof(BloomSignatureWord) * state->opts.bloomLength;
}
/*
@ -208,7 +240,7 @@ mySrand(uint32 seed)
* Add bits of given value to the signature.
*/
void
signValue(BloomState *state, SignType *sign, Datum value, int attno)
signValue(BloomState *state, BloomSignatureWord *sign, Datum value, int attno)
{
uint32 hashVal;
int nBit,
@ -231,8 +263,8 @@ signValue(BloomState *state, SignType *sign, Datum value, int attno)
for (j = 0; j < state->opts.bitSize[attno]; j++)
{
/* prevent mutiple evaluation */
nBit = myRand() % (state->opts.bloomLength * BITSIGNTYPE);
/* prevent multiple evaluation in SETBIT macro */
nBit = myRand() % (state->opts.bloomLength * SIGNWORDBITS);
SETBIT(sign, nBit);
}
}
@ -361,39 +393,6 @@ BloomInitPage(Page page, uint16 flags)
opaque->bloom_page_id = BLOOM_PAGE_ID;
}
/*
* Adjust options of bloom index.
*
* This must produce default options when *opts is initially all-zero.
*/
static void
adjustBloomOptions(BloomOptions *opts)
{
int i;
/* Default length of bloom filter is 5 of 16-bit integers */
if (opts->bloomLength <= 0)
opts->bloomLength = 5;
else if (opts->bloomLength > MAX_BLOOM_LENGTH)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("length of bloom signature (%d) is greater than maximum %d",
opts->bloomLength, MAX_BLOOM_LENGTH)));
/* Check signature length */
for (i = 0; i < INDEX_MAX_KEYS; i++)
{
/*
* Zero and negative number of bits is meaningless. Also setting
* more bits than signature have seems useless. Replace both cases
* with 2 bits default.
*/
if (opts->bitSize[i] <= 0
|| opts->bitSize[i] >= opts->bloomLength * sizeof(SignType) * BITS_PER_BYTE)
opts->bitSize[i] = 2;
}
}
/*
* Fill in metapage for bloom index.
*/
@ -405,14 +404,11 @@ BloomFillMetapage(Relation index, Page metaPage)
/*
* Choose the index's options. If reloptions have been assigned, use
* those, otherwise create default options by applying adjustBloomOptions
* to a zeroed chunk of memory. We apply adjustBloomOptions to existing
* reloptions too, just out of paranoia; they should be valid already.
* those, otherwise create default options.
*/
opts = (BloomOptions *) index->rd_options;
if (!opts)
opts = (BloomOptions *) palloc0(sizeof(BloomOptions));
adjustBloomOptions(opts);
opts = makeDefaultBloomOptions();
/*
* Initialize contents of meta page, including a copy of the options,
@ -462,30 +458,15 @@ bloptions(Datum reloptions, bool validate)
relopt_value *options;
int numoptions;
BloomOptions *rdopts;
relopt_parse_elt tab[INDEX_MAX_KEYS + 1];
int i;
char buf[16];
/* Option for length of signature */
tab[0].optname = "length";
tab[0].opttype = RELOPT_TYPE_INT;
tab[0].offset = offsetof(BloomOptions, bloomLength);
/* Number of bits for each of possible columns: col1, col2, ... */
for (i = 0; i < INDEX_MAX_KEYS; i++)
{
snprintf(buf, sizeof(buf), "col%d", i + 1);
tab[i + 1].optname = pstrdup(buf);
tab[i + 1].opttype = RELOPT_TYPE_INT;
tab[i + 1].offset = offsetof(BloomOptions, bitSize[i]);
}
/* Parse the user-given reloptions */
options = parseRelOptions(reloptions, validate, bl_relopt_kind, &numoptions);
rdopts = allocateReloptStruct(sizeof(BloomOptions), options, numoptions);
fillRelOptions((void *) rdopts, sizeof(BloomOptions), options, numoptions,
validate, tab, INDEX_MAX_KEYS + 1);
validate, bl_relopt_tab, lengthof(bl_relopt_tab));
adjustBloomOptions(rdopts);
/* Convert signature length from # of bits to # to words, rounding up */
rdopts->bloomLength = (rdopts->bloomLength + SIGNWORDBITS - 1) / SIGNWORDBITS;
return (bytea *) rdopts;
}

View File

@ -8,8 +8,8 @@
</indexterm>
<para>
<literal>bloom</> is a module which implements an index access method. It comes
as an example of custom access methods and generic WAL records usage. But it
<literal>bloom</> is a module that implements an index access method. It comes
as an example of custom access methods and generic WAL record usage. But it
is also useful in itself.
</para>
@ -22,8 +22,9 @@
allows fast exclusion of non-candidate tuples via signatures.
Since a signature is a lossy representation of all indexed attributes,
search results must be rechecked using heap information.
The user can specify signature length (in uint16, default is 5) and the
number of bits, which can be set per attribute (1 < colN < 2048).
The user can specify signature length in bits (default 80, maximum 4096)
and the number of bits generated for each index column (default 2,
maximum 4095).
</para>
<para>
@ -51,17 +52,17 @@
<term><literal>length</></term>
<listitem>
<para>
Length of signature in uint16 type values
Length of signature in bits
</para>
</listitem>
</varlistentry>
</variablelist>
<variablelist>
<varlistentry>
<term><literal>col1 &mdash; col16</></term>
<term><literal>col1 &mdash; col32</></term>
<listitem>
<para>
Number of bits for corresponding column
Number of bits generated for each index column
</para>
</listitem>
</varlistentry>
@ -77,12 +78,12 @@
<programlisting>
CREATE INDEX bloomidx ON tbloom USING bloom (i1,i2,i3)
WITH (length=5, col1=2, col2=2, col3=4);
WITH (length=80, col1=2, col2=2, col3=4);
</programlisting>
<para>
Here, we created a bloom index with a signature length of 80 bits,
and attributes i1 and i2 mapped to 2 bits, and attribute i3 to 4 bits.
and attributes i1 and i2 mapped to 2 bits, and attribute i3 mapped to 4 bits.
</para>
<para>