Bloom index contrib module
Module provides new access method. It is actually a simple Bloom filter implemented as pgsql's index. It could give some benefits on search with large number of columns. Module is a single way to test generic WAL interface committed earlier. Author: Teodor Sigaev, Alexander Korotkov Reviewers: Aleksander Alekseev, Michael Paquier, Jim Nasby
This commit is contained in:
parent
4e56e5a6de
commit
9ee014fc89
|
@ -8,6 +8,7 @@ SUBDIRS = \
|
||||||
adminpack \
|
adminpack \
|
||||||
auth_delay \
|
auth_delay \
|
||||||
auto_explain \
|
auto_explain \
|
||||||
|
bloom \
|
||||||
btree_gin \
|
btree_gin \
|
||||||
btree_gist \
|
btree_gist \
|
||||||
chkpass \
|
chkpass \
|
||||||
|
|
|
@ -0,0 +1,4 @@
|
||||||
|
# Generated subdirectories
|
||||||
|
/log/
|
||||||
|
/results/
|
||||||
|
/tmp_check/
|
|
@ -0,0 +1,24 @@
|
||||||
|
# contrib/bloom/Makefile
|
||||||
|
|
||||||
|
MODULE_big = bloom
|
||||||
|
OBJS = blcost.o blinsert.o blscan.o blutils.o blvacuum.o blvalidate.o $(WIN32RES)
|
||||||
|
|
||||||
|
EXTENSION = bloom
|
||||||
|
DATA = bloom--1.0.sql
|
||||||
|
PGFILEDESC = "bloom access method - signature file based index"
|
||||||
|
|
||||||
|
REGRESS = bloom
|
||||||
|
|
||||||
|
ifdef USE_PGXS
|
||||||
|
PG_CONFIG = pg_config
|
||||||
|
PGXS := $(shell $(PG_CONFIG) --pgxs)
|
||||||
|
include $(PGXS)
|
||||||
|
else
|
||||||
|
subdir = contrib/bloom
|
||||||
|
top_builddir = ../..
|
||||||
|
include $(top_builddir)/src/Makefile.global
|
||||||
|
include $(top_srcdir)/contrib/contrib-global.mk
|
||||||
|
endif
|
||||||
|
|
||||||
|
wal-check: temp-install
|
||||||
|
$(prove_check)
|
|
@ -0,0 +1,48 @@
|
||||||
|
/*-------------------------------------------------------------------------
|
||||||
|
*
|
||||||
|
* blcost.c
|
||||||
|
* Cost estimate function for bloom indexes.
|
||||||
|
*
|
||||||
|
* Copyright (c) 2016, PostgreSQL Global Development Group
|
||||||
|
*
|
||||||
|
* IDENTIFICATION
|
||||||
|
* contrib/bloom/blcost.c
|
||||||
|
*
|
||||||
|
*-------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
#include "postgres.h"
|
||||||
|
|
||||||
|
#include "fmgr.h"
|
||||||
|
#include "optimizer/cost.h"
|
||||||
|
#include "utils/selfuncs.h"
|
||||||
|
|
||||||
|
#include "bloom.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Estimate cost of bloom index scan.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
blcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
|
||||||
|
Cost *indexStartupCost, Cost *indexTotalCost,
|
||||||
|
Selectivity *indexSelectivity, double *indexCorrelation)
|
||||||
|
{
|
||||||
|
IndexOptInfo *index = path->indexinfo;
|
||||||
|
List *qinfos;
|
||||||
|
GenericCosts costs;
|
||||||
|
|
||||||
|
/* Do preliminary analysis of indexquals */
|
||||||
|
qinfos = deconstruct_indexquals(path);
|
||||||
|
|
||||||
|
MemSet(&costs, 0, sizeof(costs));
|
||||||
|
|
||||||
|
/* We have to visit all index tuples anyway */
|
||||||
|
costs.numIndexTuples = index->tuples;
|
||||||
|
|
||||||
|
/* Use generic estimate */
|
||||||
|
genericcostestimate(root, path, loop_count, qinfos, &costs);
|
||||||
|
|
||||||
|
*indexStartupCost = costs.indexStartupCost;
|
||||||
|
*indexTotalCost = costs.indexTotalCost;
|
||||||
|
*indexSelectivity = costs.indexSelectivity;
|
||||||
|
*indexCorrelation = costs.indexCorrelation;
|
||||||
|
}
|
|
@ -0,0 +1,313 @@
|
||||||
|
/*-------------------------------------------------------------------------
|
||||||
|
*
|
||||||
|
* blinsert.c
|
||||||
|
* Bloom index build and insert functions.
|
||||||
|
*
|
||||||
|
* Copyright (c) 2016, PostgreSQL Global Development Group
|
||||||
|
*
|
||||||
|
* IDENTIFICATION
|
||||||
|
* contrib/bloom/blinsert.c
|
||||||
|
*
|
||||||
|
*-------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
#include "postgres.h"
|
||||||
|
|
||||||
|
#include "access/genam.h"
|
||||||
|
#include "access/generic_xlog.h"
|
||||||
|
#include "catalog/index.h"
|
||||||
|
#include "miscadmin.h"
|
||||||
|
#include "storage/bufmgr.h"
|
||||||
|
#include "storage/indexfsm.h"
|
||||||
|
#include "utils/memutils.h"
|
||||||
|
#include "utils/rel.h"
|
||||||
|
|
||||||
|
#include "bloom.h"
|
||||||
|
|
||||||
|
PG_MODULE_MAGIC;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* State of bloom index build. We accumulate one page data here before
|
||||||
|
* flushing it to buffer manager.
|
||||||
|
*/
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
BloomState blstate; /* bloom index state */
|
||||||
|
MemoryContext tmpCtx; /* temporary memory context reset after
|
||||||
|
* each tuple */
|
||||||
|
char data[BLCKSZ]; /* cached page */
|
||||||
|
int64 count; /* number of tuples in cached page */
|
||||||
|
} BloomBuildState;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Flush page cached in BloomBuildState.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
flushCachedPage(Relation index, BloomBuildState *buildstate)
|
||||||
|
{
|
||||||
|
Page page;
|
||||||
|
Buffer buffer = BloomNewBuffer(index);
|
||||||
|
GenericXLogState *state;
|
||||||
|
|
||||||
|
state = GenericXLogStart(index);
|
||||||
|
page = GenericXLogRegister(state, buffer, true);
|
||||||
|
memcpy(page, buildstate->data, BLCKSZ);
|
||||||
|
GenericXLogFinish(state);
|
||||||
|
UnlockReleaseBuffer(buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* (Re)initialize cached page in BloomBuildState.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
initCachedPage(BloomBuildState *buildstate)
|
||||||
|
{
|
||||||
|
memset(buildstate->data, 0, BLCKSZ);
|
||||||
|
BloomInitPage(buildstate->data, 0);
|
||||||
|
buildstate->count = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Per-tuple callback from IndexBuildHeapScan.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
bloomBuildCallback(Relation index, HeapTuple htup, Datum *values,
|
||||||
|
bool *isnull, bool tupleIsAlive, void *state)
|
||||||
|
{
|
||||||
|
BloomBuildState *buildstate = (BloomBuildState *) state;
|
||||||
|
MemoryContext oldCtx;
|
||||||
|
BloomTuple *itup;
|
||||||
|
|
||||||
|
oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx);
|
||||||
|
|
||||||
|
itup = BloomFormTuple(&buildstate->blstate, &htup->t_self, values, isnull);
|
||||||
|
|
||||||
|
/* Try to add next item to cached page */
|
||||||
|
if (BloomPageAddItem(&buildstate->blstate, buildstate->data, itup))
|
||||||
|
{
|
||||||
|
/* Next item was added successfully */
|
||||||
|
buildstate->count++;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* Cached page is full, flush it out and make a new one */
|
||||||
|
flushCachedPage(index, buildstate);
|
||||||
|
|
||||||
|
CHECK_FOR_INTERRUPTS();
|
||||||
|
|
||||||
|
initCachedPage(buildstate);
|
||||||
|
|
||||||
|
if (BloomPageAddItem(&buildstate->blstate, buildstate->data, itup) == false)
|
||||||
|
{
|
||||||
|
/* We shouldn't be here since we're inserting to the empty page */
|
||||||
|
elog(ERROR, "can not add new tuple");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
MemoryContextSwitchTo(oldCtx);
|
||||||
|
MemoryContextReset(buildstate->tmpCtx);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Build a new bloom index.
|
||||||
|
*/
|
||||||
|
IndexBuildResult *
|
||||||
|
blbuild(Relation heap, Relation index, IndexInfo *indexInfo)
|
||||||
|
{
|
||||||
|
IndexBuildResult *result;
|
||||||
|
double reltuples;
|
||||||
|
BloomBuildState buildstate;
|
||||||
|
|
||||||
|
if (RelationGetNumberOfBlocks(index) != 0)
|
||||||
|
elog(ERROR, "index \"%s\" already contains data",
|
||||||
|
RelationGetRelationName(index));
|
||||||
|
|
||||||
|
/* Initialize the meta page */
|
||||||
|
BloomInitMetapage(index);
|
||||||
|
|
||||||
|
/* Initialize the bloom build state */
|
||||||
|
memset(&buildstate, 0, sizeof(buildstate));
|
||||||
|
initBloomState(&buildstate.blstate, index);
|
||||||
|
buildstate.tmpCtx = AllocSetContextCreate(CurrentMemoryContext,
|
||||||
|
"Bloom build temporary context",
|
||||||
|
ALLOCSET_DEFAULT_MINSIZE,
|
||||||
|
ALLOCSET_DEFAULT_INITSIZE,
|
||||||
|
ALLOCSET_DEFAULT_MAXSIZE);
|
||||||
|
initCachedPage(&buildstate);
|
||||||
|
|
||||||
|
/* Do the heap scan */
|
||||||
|
reltuples = IndexBuildHeapScan(heap, index, indexInfo, true,
|
||||||
|
bloomBuildCallback, (void *) &buildstate);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* There are could be some items in cached page. Flush this page
|
||||||
|
* if needed.
|
||||||
|
*/
|
||||||
|
if (buildstate.count > 0)
|
||||||
|
flushCachedPage(index, &buildstate);
|
||||||
|
|
||||||
|
MemoryContextDelete(buildstate.tmpCtx);
|
||||||
|
|
||||||
|
result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
|
||||||
|
result->heap_tuples = result->index_tuples = reltuples;
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Build an empty bloom index in the initialization fork.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
blbuildempty(Relation index)
|
||||||
|
{
|
||||||
|
if (RelationGetNumberOfBlocks(index) != 0)
|
||||||
|
elog(ERROR, "index \"%s\" already contains data",
|
||||||
|
RelationGetRelationName(index));
|
||||||
|
|
||||||
|
/* Initialize the meta page */
|
||||||
|
BloomInitMetapage(index);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Insert new tuple to the bloom index.
|
||||||
|
*/
|
||||||
|
bool
|
||||||
|
blinsert(Relation index, Datum *values, bool *isnull,
|
||||||
|
ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique)
|
||||||
|
{
|
||||||
|
BloomState blstate;
|
||||||
|
BloomTuple *itup;
|
||||||
|
MemoryContext oldCtx;
|
||||||
|
MemoryContext insertCtx;
|
||||||
|
BloomMetaPageData *metaData;
|
||||||
|
Buffer buffer,
|
||||||
|
metaBuffer;
|
||||||
|
Page page,
|
||||||
|
metaPage;
|
||||||
|
BlockNumber blkno = InvalidBlockNumber;
|
||||||
|
OffsetNumber nStart;
|
||||||
|
GenericXLogState *state;
|
||||||
|
|
||||||
|
insertCtx = AllocSetContextCreate(CurrentMemoryContext,
|
||||||
|
"Bloom insert temporary context",
|
||||||
|
ALLOCSET_DEFAULT_MINSIZE,
|
||||||
|
ALLOCSET_DEFAULT_INITSIZE,
|
||||||
|
ALLOCSET_DEFAULT_MAXSIZE);
|
||||||
|
|
||||||
|
oldCtx = MemoryContextSwitchTo(insertCtx);
|
||||||
|
|
||||||
|
initBloomState(&blstate, index);
|
||||||
|
itup = BloomFormTuple(&blstate, ht_ctid, values, isnull);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* At first, try to insert new tuple to the first page in notFullPage
|
||||||
|
* array. If success we don't need to modify the meta page.
|
||||||
|
*/
|
||||||
|
metaBuffer = ReadBuffer(index, BLOOM_METAPAGE_BLKNO);
|
||||||
|
LockBuffer(metaBuffer, BUFFER_LOCK_SHARE);
|
||||||
|
metaData = BloomPageGetMeta(BufferGetPage(metaBuffer));
|
||||||
|
|
||||||
|
if (metaData->nEnd > metaData->nStart)
|
||||||
|
{
|
||||||
|
Page page;
|
||||||
|
|
||||||
|
blkno = metaData->notFullPage[metaData->nStart];
|
||||||
|
|
||||||
|
Assert(blkno != InvalidBlockNumber);
|
||||||
|
LockBuffer(metaBuffer, BUFFER_LOCK_UNLOCK);
|
||||||
|
|
||||||
|
buffer = ReadBuffer(index, blkno);
|
||||||
|
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
|
||||||
|
state = GenericXLogStart(index);
|
||||||
|
page = GenericXLogRegister(state, buffer, false);
|
||||||
|
|
||||||
|
if (BloomPageAddItem(&blstate, page, itup))
|
||||||
|
{
|
||||||
|
GenericXLogFinish(state);
|
||||||
|
UnlockReleaseBuffer(buffer);
|
||||||
|
ReleaseBuffer(metaBuffer);
|
||||||
|
MemoryContextSwitchTo(oldCtx);
|
||||||
|
MemoryContextDelete(insertCtx);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
GenericXLogAbort(state);
|
||||||
|
UnlockReleaseBuffer(buffer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* First page in notFullPage isn't suitable */
|
||||||
|
LockBuffer(metaBuffer, BUFFER_LOCK_UNLOCK);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Try other pages in notFullPage array. We will have to change nStart in
|
||||||
|
* metapage. Thus, grab exclusive lock on metapage.
|
||||||
|
*/
|
||||||
|
LockBuffer(metaBuffer, BUFFER_LOCK_EXCLUSIVE);
|
||||||
|
|
||||||
|
state = GenericXLogStart(index);
|
||||||
|
metaPage = GenericXLogRegister(state, metaBuffer, false);
|
||||||
|
metaData = BloomPageGetMeta(metaPage);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Iterate over notFullPage array. Skip page we already tried first.
|
||||||
|
*/
|
||||||
|
nStart = metaData->nStart;
|
||||||
|
if (metaData->nEnd > nStart &&
|
||||||
|
blkno == metaData->notFullPage[nStart])
|
||||||
|
nStart++;
|
||||||
|
|
||||||
|
while (metaData->nEnd > nStart)
|
||||||
|
{
|
||||||
|
blkno = metaData->notFullPage[nStart];
|
||||||
|
Assert(blkno != InvalidBlockNumber);
|
||||||
|
|
||||||
|
buffer = ReadBuffer(index, blkno);
|
||||||
|
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
|
||||||
|
page = GenericXLogRegister(state, buffer, false);
|
||||||
|
|
||||||
|
if (BloomPageAddItem(&blstate, page, itup))
|
||||||
|
{
|
||||||
|
metaData->nStart = nStart;
|
||||||
|
GenericXLogFinish(state);
|
||||||
|
UnlockReleaseBuffer(buffer);
|
||||||
|
UnlockReleaseBuffer(metaBuffer);
|
||||||
|
MemoryContextSwitchTo(oldCtx);
|
||||||
|
MemoryContextDelete(insertCtx);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
GenericXLogUnregister(state, buffer);
|
||||||
|
UnlockReleaseBuffer(buffer);
|
||||||
|
}
|
||||||
|
nStart++;
|
||||||
|
}
|
||||||
|
|
||||||
|
GenericXLogAbort(state);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Didn't find place to insert in notFullPage array. Allocate new page.
|
||||||
|
*/
|
||||||
|
buffer = BloomNewBuffer(index);
|
||||||
|
|
||||||
|
state = GenericXLogStart(index);
|
||||||
|
metaPage = GenericXLogRegister(state, metaBuffer, false);
|
||||||
|
metaData = BloomPageGetMeta(metaPage);
|
||||||
|
page = GenericXLogRegister(state, buffer, true);
|
||||||
|
BloomInitPage(page, 0);
|
||||||
|
BloomPageAddItem(&blstate, page, itup);
|
||||||
|
|
||||||
|
metaData->nStart = 0;
|
||||||
|
metaData->nEnd = 1;
|
||||||
|
metaData->notFullPage[0] = BufferGetBlockNumber(buffer);
|
||||||
|
|
||||||
|
GenericXLogFinish(state);
|
||||||
|
|
||||||
|
UnlockReleaseBuffer(buffer);
|
||||||
|
UnlockReleaseBuffer(metaBuffer);
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
|
@ -0,0 +1,19 @@
|
||||||
|
CREATE OR REPLACE FUNCTION blhandler(internal)
|
||||||
|
RETURNS index_am_handler
|
||||||
|
AS 'MODULE_PATHNAME'
|
||||||
|
LANGUAGE C;
|
||||||
|
|
||||||
|
-- Access method
|
||||||
|
CREATE ACCESS METHOD bloom TYPE INDEX HANDLER blhandler;
|
||||||
|
|
||||||
|
-- Opclasses
|
||||||
|
|
||||||
|
CREATE OPERATOR CLASS int4_ops
|
||||||
|
DEFAULT FOR TYPE int4 USING bloom AS
|
||||||
|
OPERATOR 1 =(int4, int4),
|
||||||
|
FUNCTION 1 hashint4(int4);
|
||||||
|
|
||||||
|
CREATE OPERATOR CLASS text_ops
|
||||||
|
DEFAULT FOR TYPE text USING bloom AS
|
||||||
|
OPERATOR 1 =(text, text),
|
||||||
|
FUNCTION 1 hashtext(text);
|
|
@ -0,0 +1,5 @@
|
||||||
|
# bloom extension
|
||||||
|
comment = 'bloom access method - signature file based index'
|
||||||
|
default_version = '1.0'
|
||||||
|
module_pathname = '$libdir/bloom'
|
||||||
|
relocatable = true
|
|
@ -0,0 +1,178 @@
|
||||||
|
/*-------------------------------------------------------------------------
|
||||||
|
*
|
||||||
|
* bloom.h
|
||||||
|
* Header for bloom index.
|
||||||
|
*
|
||||||
|
* Copyright (c) 2016, PostgreSQL Global Development Group
|
||||||
|
*
|
||||||
|
* IDENTIFICATION
|
||||||
|
* contrib/bloom/bloom.h
|
||||||
|
*
|
||||||
|
*-------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
#ifndef _BLOOM_H_
|
||||||
|
#define _BLOOM_H_
|
||||||
|
|
||||||
|
#include "access/amapi.h"
|
||||||
|
#include "access/generic_xlog.h"
|
||||||
|
#include "access/itup.h"
|
||||||
|
#include "access/xlog.h"
|
||||||
|
#include "nodes/relation.h"
|
||||||
|
#include "fmgr.h"
|
||||||
|
|
||||||
|
/* Support procedures numbers */
|
||||||
|
#define BLOOM_HASH_PROC 1
|
||||||
|
#define BLOOM_NPROC 1
|
||||||
|
|
||||||
|
/* Scan strategies */
|
||||||
|
#define BLOOM_EQUAL_STRATEGY 1
|
||||||
|
#define BLOOM_NSTRATEGIES 1
|
||||||
|
|
||||||
|
/* Opaque for bloom pages */
|
||||||
|
typedef struct BloomPageOpaqueData
|
||||||
|
{
|
||||||
|
OffsetNumber maxoff;
|
||||||
|
uint16 flags;
|
||||||
|
} BloomPageOpaqueData;
|
||||||
|
|
||||||
|
typedef BloomPageOpaqueData *BloomPageOpaque;
|
||||||
|
|
||||||
|
/* Bloom page flags */
|
||||||
|
#define BLOOM_META (1<<0)
|
||||||
|
#define BLOOM_DELETED (2<<0)
|
||||||
|
|
||||||
|
/* Macros for accessing bloom page structures */
|
||||||
|
#define BloomPageGetOpaque(page) ((BloomPageOpaque) PageGetSpecialPointer(page))
|
||||||
|
#define BloomPageGetMaxOffset(page) (BloomPageGetOpaque(page)->maxoff)
|
||||||
|
#define BloomPageIsMeta(page) (BloomPageGetOpaque(page)->flags & BLOOM_META)
|
||||||
|
#define BloomPageIsDeleted(page) (BloomPageGetOpaque(page)->flags & BLOOM_DELETED)
|
||||||
|
#define BloomPageSetDeleted(page) (BloomPageGetOpaque(page)->flags |= BLOOM_DELETED)
|
||||||
|
#define BloomPageSetNonDeleted(page) (BloomPageGetOpaque(page)->flags &= ~BLOOM_DELETED)
|
||||||
|
#define BloomPageGetData(page) ((BloomTuple *)PageGetContents(page))
|
||||||
|
#define BloomPageGetTuple(state, page, offset) \
|
||||||
|
((BloomTuple *)(PageGetContents(page) \
|
||||||
|
+ (state)->sizeOfBloomTuple * ((offset) - 1)))
|
||||||
|
#define BloomPageGetNextTuple(state, tuple) \
|
||||||
|
((BloomTuple *)((Pointer)(tuple) + (state)->sizeOfBloomTuple))
|
||||||
|
|
||||||
|
/* Preserved page numbers */
|
||||||
|
#define BLOOM_METAPAGE_BLKNO (0)
|
||||||
|
#define BLOOM_HEAD_BLKNO (1) /* first data page */
|
||||||
|
|
||||||
|
/* Bloom index options */
|
||||||
|
typedef struct BloomOptions
|
||||||
|
{
|
||||||
|
int32 vl_len_; /* varlena header (do not touch directly!) */
|
||||||
|
int bloomLength; /* length of signature in uint16 */
|
||||||
|
int bitSize[INDEX_MAX_KEYS]; /* signature bits per index
|
||||||
|
* key */
|
||||||
|
} BloomOptions;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* FreeBlockNumberArray - array of block numbers sized so that metadata fill
|
||||||
|
* all space in metapage.
|
||||||
|
*/
|
||||||
|
typedef BlockNumber FreeBlockNumberArray[
|
||||||
|
MAXALIGN_DOWN(
|
||||||
|
BLCKSZ - SizeOfPageHeaderData - MAXALIGN(sizeof(BloomPageOpaqueData))
|
||||||
|
- MAXALIGN(sizeof(uint16) * 2 + sizeof(uint32) + sizeof(BloomOptions))
|
||||||
|
) / sizeof(BlockNumber)
|
||||||
|
];
|
||||||
|
|
||||||
|
/* Metadata of bloom index */
|
||||||
|
typedef struct BloomMetaPageData
|
||||||
|
{
|
||||||
|
uint32 magickNumber;
|
||||||
|
uint16 nStart;
|
||||||
|
uint16 nEnd;
|
||||||
|
BloomOptions opts;
|
||||||
|
FreeBlockNumberArray notFullPage;
|
||||||
|
} BloomMetaPageData;
|
||||||
|
|
||||||
|
/* Magic number to distinguish bloom pages among anothers */
|
||||||
|
#define BLOOM_MAGICK_NUMBER (0xDBAC0DED)
|
||||||
|
|
||||||
|
/* Number of blocks numbers fit in BloomMetaPageData */
|
||||||
|
#define BloomMetaBlockN (sizeof(FreeBlockNumberArray) / sizeof(BlockNumber))
|
||||||
|
|
||||||
|
#define BloomPageGetMeta(page) ((BloomMetaPageData *) PageGetContents(page))
|
||||||
|
|
||||||
|
typedef struct BloomState
|
||||||
|
{
|
||||||
|
FmgrInfo hashFn[INDEX_MAX_KEYS];
|
||||||
|
BloomOptions *opts; /* stored in rd_amcache and defined at
|
||||||
|
* creation time */
|
||||||
|
int32 nColumns;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* sizeOfBloomTuple is index's specific, and it depends on reloptions, so
|
||||||
|
* precompute it
|
||||||
|
*/
|
||||||
|
int32 sizeOfBloomTuple;
|
||||||
|
} BloomState;
|
||||||
|
|
||||||
|
#define BloomPageGetFreeSpace(state, page) \
|
||||||
|
(BLCKSZ - MAXALIGN(SizeOfPageHeaderData) \
|
||||||
|
- BloomPageGetMaxOffset(page) * (state)->sizeOfBloomTuple \
|
||||||
|
- MAXALIGN(sizeof(BloomPageOpaqueData)))
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Tuples are very different from all other relations
|
||||||
|
*/
|
||||||
|
typedef uint16 SignType;
|
||||||
|
|
||||||
|
typedef struct BloomTuple
|
||||||
|
{
|
||||||
|
ItemPointerData heapPtr;
|
||||||
|
SignType sign[1];
|
||||||
|
} BloomTuple;
|
||||||
|
|
||||||
|
#define BLOOMTUPLEHDRSZ offsetof(BloomTuple, sign)
|
||||||
|
|
||||||
|
/* Opaque data structure for bloom index scan */
|
||||||
|
typedef struct BloomScanOpaqueData
|
||||||
|
{
|
||||||
|
SignType *sign; /* Scan signature */
|
||||||
|
BloomState state;
|
||||||
|
} BloomScanOpaqueData;
|
||||||
|
|
||||||
|
typedef BloomScanOpaqueData *BloomScanOpaque;
|
||||||
|
|
||||||
|
/* blutils.c */
|
||||||
|
extern void _PG_init(void);
|
||||||
|
extern Datum blhandler(PG_FUNCTION_ARGS);
|
||||||
|
extern void initBloomState(BloomState * state, Relation index);
|
||||||
|
extern void BloomInitMetapage(Relation index);
|
||||||
|
extern void BloomInitPage(Page page, uint16 flags);
|
||||||
|
extern Buffer BloomNewBuffer(Relation index);
|
||||||
|
extern void signValue(BloomState * state, SignType * sign, Datum value, int attno);
|
||||||
|
extern BloomTuple *BloomFormTuple(BloomState * state, ItemPointer iptr, Datum *values, bool *isnull);
|
||||||
|
extern bool BloomPageAddItem(BloomState * state, Page page, BloomTuple * tuple);
|
||||||
|
|
||||||
|
/* blvalidate.c */
|
||||||
|
extern bool blvalidate(Oid opclassoid);
|
||||||
|
|
||||||
|
/* index access method interface functions */
|
||||||
|
extern bool blinsert(Relation index, Datum *values, bool *isnull,
|
||||||
|
ItemPointer ht_ctid, Relation heapRel,
|
||||||
|
IndexUniqueCheck checkUnique);
|
||||||
|
extern IndexScanDesc blbeginscan(Relation r, int nkeys, int norderbys);
|
||||||
|
extern int64 blgetbitmap(IndexScanDesc scan, TIDBitmap *tbm);
|
||||||
|
extern void blrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
|
||||||
|
ScanKey orderbys, int norderbys);
|
||||||
|
extern void blendscan(IndexScanDesc scan);
|
||||||
|
extern IndexBuildResult *blbuild(Relation heap, Relation index,
|
||||||
|
struct IndexInfo *indexInfo);
|
||||||
|
extern void blbuildempty(Relation index);
|
||||||
|
extern IndexBulkDeleteResult *blbulkdelete(IndexVacuumInfo *info,
|
||||||
|
IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback,
|
||||||
|
void *callback_state);
|
||||||
|
extern IndexBulkDeleteResult *blvacuumcleanup(IndexVacuumInfo *info,
|
||||||
|
IndexBulkDeleteResult *stats);
|
||||||
|
extern bytea *bloptions(Datum reloptions, bool validate);
|
||||||
|
extern void blcostestimate(PlannerInfo *root, IndexPath *path,
|
||||||
|
double loop_count, Cost *indexStartupCost,
|
||||||
|
Cost *indexTotalCost, Selectivity *indexSelectivity,
|
||||||
|
double *indexCorrelation);
|
||||||
|
|
||||||
|
#endif
|
|
@ -0,0 +1,175 @@
|
||||||
|
/*-------------------------------------------------------------------------
|
||||||
|
*
|
||||||
|
* blscan.c
|
||||||
|
* Bloom index scan functions.
|
||||||
|
*
|
||||||
|
* Copyright (c) 2016, PostgreSQL Global Development Group
|
||||||
|
*
|
||||||
|
* IDENTIFICATION
|
||||||
|
* contrib/bloom/blscan.c
|
||||||
|
*
|
||||||
|
*-------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
#include "postgres.h"
|
||||||
|
|
||||||
|
#include "access/relscan.h"
|
||||||
|
#include "pgstat.h"
|
||||||
|
#include "miscadmin.h"
|
||||||
|
#include "storage/bufmgr.h"
|
||||||
|
#include "storage/lmgr.h"
|
||||||
|
#include "utils/memutils.h"
|
||||||
|
#include "utils/rel.h"
|
||||||
|
|
||||||
|
#include "bloom.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Begin scan of bloom index.
|
||||||
|
*/
|
||||||
|
IndexScanDesc
|
||||||
|
blbeginscan(Relation r, int nkeys, int norderbys)
|
||||||
|
{
|
||||||
|
IndexScanDesc scan;
|
||||||
|
|
||||||
|
scan = RelationGetIndexScan(r, nkeys, norderbys);
|
||||||
|
|
||||||
|
return scan;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Rescan a bloom index.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
blrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
|
||||||
|
ScanKey orderbys, int norderbys)
|
||||||
|
{
|
||||||
|
BloomScanOpaque so;
|
||||||
|
|
||||||
|
so = (BloomScanOpaque) scan->opaque;
|
||||||
|
|
||||||
|
if (so == NULL)
|
||||||
|
{
|
||||||
|
/* if called from blbeginscan */
|
||||||
|
so = (BloomScanOpaque) palloc(sizeof(BloomScanOpaqueData));
|
||||||
|
initBloomState(&so->state, scan->indexRelation);
|
||||||
|
scan->opaque = so;
|
||||||
|
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (so->sign)
|
||||||
|
pfree(so->sign);
|
||||||
|
}
|
||||||
|
so->sign = NULL;
|
||||||
|
|
||||||
|
if (scankey && scan->numberOfKeys > 0)
|
||||||
|
{
|
||||||
|
memmove(scan->keyData, scankey,
|
||||||
|
scan->numberOfKeys * sizeof(ScanKeyData));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* End scan of bloom index.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
blendscan(IndexScanDesc scan)
|
||||||
|
{
|
||||||
|
BloomScanOpaque so = (BloomScanOpaque) scan->opaque;
|
||||||
|
|
||||||
|
if (so->sign)
|
||||||
|
pfree(so->sign);
|
||||||
|
so->sign = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Insert all matching tuples into to a bitmap.
|
||||||
|
*/
|
||||||
|
int64
|
||||||
|
blgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
|
||||||
|
{
|
||||||
|
int64 ntids = 0;
|
||||||
|
BlockNumber blkno = BLOOM_HEAD_BLKNO,
|
||||||
|
npages;
|
||||||
|
int i;
|
||||||
|
BufferAccessStrategy bas;
|
||||||
|
BloomScanOpaque so = (BloomScanOpaque) scan->opaque;
|
||||||
|
|
||||||
|
if (so->sign == NULL && scan->numberOfKeys > 0)
|
||||||
|
{
|
||||||
|
/* New search: have to calculate search signature */
|
||||||
|
ScanKey skey = scan->keyData;
|
||||||
|
|
||||||
|
so->sign = palloc0(sizeof(SignType) * so->state.opts->bloomLength);
|
||||||
|
|
||||||
|
for (i = 0; i < scan->numberOfKeys; i++)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Assume bloom-indexable operators to be strict, so nothing could
|
||||||
|
* be found for NULL key.
|
||||||
|
*/
|
||||||
|
if (skey->sk_flags & SK_ISNULL)
|
||||||
|
{
|
||||||
|
pfree(so->sign);
|
||||||
|
so->sign = NULL;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Add next value to the signature */
|
||||||
|
signValue(&so->state, so->sign, skey->sk_argument,
|
||||||
|
skey->sk_attno - 1);
|
||||||
|
|
||||||
|
skey++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We're going to read the whole index. This is why we use appropriate
|
||||||
|
* buffer access strategy.
|
||||||
|
*/
|
||||||
|
bas = GetAccessStrategy(BAS_BULKREAD);
|
||||||
|
npages = RelationGetNumberOfBlocks(scan->indexRelation);
|
||||||
|
|
||||||
|
for (blkno = BLOOM_HEAD_BLKNO; blkno < npages; blkno++)
|
||||||
|
{
|
||||||
|
Buffer buffer;
|
||||||
|
Page page;
|
||||||
|
|
||||||
|
buffer = ReadBufferExtended(scan->indexRelation, MAIN_FORKNUM,
|
||||||
|
blkno, RBM_NORMAL, bas);
|
||||||
|
|
||||||
|
LockBuffer(buffer, BUFFER_LOCK_SHARE);
|
||||||
|
page = BufferGetPage(buffer);
|
||||||
|
|
||||||
|
if (!BloomPageIsDeleted(page))
|
||||||
|
{
|
||||||
|
OffsetNumber offset,
|
||||||
|
maxOffset = BloomPageGetMaxOffset(page);
|
||||||
|
|
||||||
|
for (offset = 1; offset <= maxOffset; offset++)
|
||||||
|
{
|
||||||
|
BloomTuple *itup = BloomPageGetTuple(&so->state, page, offset);
|
||||||
|
bool res = true;
|
||||||
|
|
||||||
|
/* Check index signature with scan signature */
|
||||||
|
for (i = 0; res && i < so->state.opts->bloomLength; i++)
|
||||||
|
{
|
||||||
|
if ((itup->sign[i] & so->sign[i]) != so->sign[i])
|
||||||
|
res = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Add matching tuples to bitmap */
|
||||||
|
if (res)
|
||||||
|
{
|
||||||
|
tbm_add_tuples(tbm, &itup->heapPtr, 1, true);
|
||||||
|
ntids++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
UnlockReleaseBuffer(buffer);
|
||||||
|
CHECK_FOR_INTERRUPTS();
|
||||||
|
}
|
||||||
|
FreeAccessStrategy(bas);
|
||||||
|
|
||||||
|
return ntids;
|
||||||
|
}
|
|
@ -0,0 +1,463 @@
|
||||||
|
/*-------------------------------------------------------------------------
|
||||||
|
*
|
||||||
|
* blutils.c
|
||||||
|
* Bloom index utilities.
|
||||||
|
*
|
||||||
|
* Portions Copyright (c) 2016, PostgreSQL Global Development Group
|
||||||
|
* Portions Copyright (c) 1990-1993, Regents of the University of California
|
||||||
|
*
|
||||||
|
* IDENTIFICATION
|
||||||
|
* contrib/bloom/blutils.c
|
||||||
|
*
|
||||||
|
*-------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
#include "postgres.h"
|
||||||
|
|
||||||
|
#include "access/amapi.h"
|
||||||
|
#include "access/generic_xlog.h"
|
||||||
|
#include "catalog/index.h"
|
||||||
|
#include "storage/lmgr.h"
|
||||||
|
#include "miscadmin.h"
|
||||||
|
#include "storage/bufmgr.h"
|
||||||
|
#include "storage/indexfsm.h"
|
||||||
|
#include "utils/memutils.h"
|
||||||
|
#include "access/reloptions.h"
|
||||||
|
#include "storage/freespace.h"
|
||||||
|
#include "storage/indexfsm.h"
|
||||||
|
|
||||||
|
#include "bloom.h"
|
||||||
|
|
||||||
|
/* Signature dealing macros */
|
||||||
|
#define BITSIGNTYPE (BITS_PER_BYTE * sizeof(SignType))
|
||||||
|
#define GETWORD(x,i) ( *( (SignType*)(x) + (int)( (i) / BITSIGNTYPE ) ) )
|
||||||
|
#define CLRBIT(x,i) GETWORD(x,i) &= ~( 0x01 << ( (i) % BITSIGNTYPE ) )
|
||||||
|
#define SETBIT(x,i) GETWORD(x,i) |= ( 0x01 << ( (i) % BITSIGNTYPE ) )
|
||||||
|
#define GETBIT(x,i) ( (GETWORD(x,i) >> ( (i) % BITSIGNTYPE )) & 0x01 )
|
||||||
|
|
||||||
|
PG_FUNCTION_INFO_V1(blhandler);
|
||||||
|
|
||||||
|
/* Kind of relation optioms for bloom index */
|
||||||
|
static relopt_kind bl_relopt_kind;
|
||||||
|
|
||||||
|
static int32 myRand();
|
||||||
|
static void mySrand(uint32 seed);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Module initialize function: initilized relation options.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
_PG_init(void)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
char buf[16];
|
||||||
|
|
||||||
|
bl_relopt_kind = add_reloption_kind();
|
||||||
|
|
||||||
|
add_int_reloption(bl_relopt_kind, "length",
|
||||||
|
"Length of signature in uint16 type", 5, 1, 256);
|
||||||
|
|
||||||
|
for (i = 0; i < INDEX_MAX_KEYS; i++)
|
||||||
|
{
|
||||||
|
snprintf(buf, 16, "col%d", i + 1);
|
||||||
|
add_int_reloption(bl_relopt_kind, buf,
|
||||||
|
"Number of bits for corresponding column", 2, 1, 2048);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Bloom handler function: return IndexAmRoutine with access method parameters
|
||||||
|
* and callbacks.
|
||||||
|
*/
|
||||||
|
Datum
|
||||||
|
blhandler(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
IndexAmRoutine *amroutine = makeNode(IndexAmRoutine);
|
||||||
|
|
||||||
|
amroutine->amstrategies = 1;
|
||||||
|
amroutine->amsupport = 1;
|
||||||
|
amroutine->amcanorder = false;
|
||||||
|
amroutine->amcanorderbyop = false;
|
||||||
|
amroutine->amcanbackward = false;
|
||||||
|
amroutine->amcanunique = false;
|
||||||
|
amroutine->amcanmulticol = true;
|
||||||
|
amroutine->amoptionalkey = true;
|
||||||
|
amroutine->amsearcharray = false;
|
||||||
|
amroutine->amsearchnulls = false;
|
||||||
|
amroutine->amstorage = false;
|
||||||
|
amroutine->amclusterable = false;
|
||||||
|
amroutine->ampredlocks = false;
|
||||||
|
amroutine->amkeytype = 0;
|
||||||
|
|
||||||
|
amroutine->aminsert = blinsert;
|
||||||
|
amroutine->ambeginscan = blbeginscan;
|
||||||
|
amroutine->amgettuple = NULL;
|
||||||
|
amroutine->amgetbitmap = blgetbitmap;
|
||||||
|
amroutine->amrescan = blrescan;
|
||||||
|
amroutine->amendscan = blendscan;
|
||||||
|
amroutine->ammarkpos = NULL;
|
||||||
|
amroutine->amrestrpos = NULL;
|
||||||
|
amroutine->ambuild = blbuild;
|
||||||
|
amroutine->ambuildempty = blbuildempty;
|
||||||
|
amroutine->ambulkdelete = blbulkdelete;
|
||||||
|
amroutine->amvacuumcleanup = blvacuumcleanup;
|
||||||
|
amroutine->amcanreturn = NULL;
|
||||||
|
amroutine->amcostestimate = blcostestimate;
|
||||||
|
amroutine->amoptions = bloptions;
|
||||||
|
amroutine->amvalidate = blvalidate;
|
||||||
|
|
||||||
|
PG_RETURN_POINTER(amroutine);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Fill BloomState structure for particular index.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
initBloomState(BloomState *state, Relation index)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
state->nColumns = index->rd_att->natts;
|
||||||
|
|
||||||
|
/* Initialize hash function for each attribute */
|
||||||
|
for (i = 0; i < index->rd_att->natts; i++)
|
||||||
|
{
|
||||||
|
fmgr_info_copy(&(state->hashFn[i]),
|
||||||
|
index_getprocinfo(index, i + 1, BLOOM_HASH_PROC),
|
||||||
|
CurrentMemoryContext);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Initialize amcache if needed with options from metapage */
|
||||||
|
if (!index->rd_amcache)
|
||||||
|
{
|
||||||
|
Buffer buffer;
|
||||||
|
Page page;
|
||||||
|
BloomMetaPageData *meta;
|
||||||
|
BloomOptions *opts;
|
||||||
|
|
||||||
|
opts = MemoryContextAlloc(index->rd_indexcxt, sizeof(BloomOptions));
|
||||||
|
|
||||||
|
buffer = ReadBuffer(index, BLOOM_METAPAGE_BLKNO);
|
||||||
|
LockBuffer(buffer, BUFFER_LOCK_SHARE);
|
||||||
|
|
||||||
|
page = BufferGetPage(buffer);
|
||||||
|
|
||||||
|
if (!BloomPageIsMeta(page))
|
||||||
|
elog(ERROR, "Relation is not a bloom index");
|
||||||
|
meta = BloomPageGetMeta(BufferGetPage(buffer));
|
||||||
|
|
||||||
|
if (meta->magickNumber != BLOOM_MAGICK_NUMBER)
|
||||||
|
elog(ERROR, "Relation is not a bloom index");
|
||||||
|
|
||||||
|
*opts = meta->opts;
|
||||||
|
|
||||||
|
UnlockReleaseBuffer(buffer);
|
||||||
|
|
||||||
|
index->rd_amcache = (void *) opts;
|
||||||
|
}
|
||||||
|
|
||||||
|
state->opts = (BloomOptions *) index->rd_amcache;
|
||||||
|
state->sizeOfBloomTuple = BLOOMTUPLEHDRSZ +
|
||||||
|
sizeof(SignType) * state->opts->bloomLength;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Random generator copied from FreeBSD. Using own random generator here for
|
||||||
|
* two reasons:
|
||||||
|
*
|
||||||
|
* 1) In this case random numbers are used for on-disk storage. Usage of
|
||||||
|
* PostgreSQL number generator would obstruct it from all possible changes.
|
||||||
|
* 2) Changing seed of PostgreSQL random generator would be undesirable side
|
||||||
|
* effect.
|
||||||
|
*/
|
||||||
|
static int32 next;
|
||||||
|
|
||||||
|
static int32
|
||||||
|
myRand()
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Compute x = (7^5 * x) mod (2^31 - 1)
|
||||||
|
* without overflowing 31 bits:
|
||||||
|
* (2^31 - 1) = 127773 * (7^5) + 2836
|
||||||
|
* From "Random number generators: good ones are hard to find",
|
||||||
|
* Park and Miller, Communications of the ACM, vol. 31, no. 10,
|
||||||
|
* October 1988, p. 1195.
|
||||||
|
*/
|
||||||
|
int32 hi, lo, x;
|
||||||
|
|
||||||
|
/* Must be in [1, 0x7ffffffe] range at this point. */
|
||||||
|
hi = next / 127773;
|
||||||
|
lo = next % 127773;
|
||||||
|
x = 16807 * lo - 2836 * hi;
|
||||||
|
if (x < 0)
|
||||||
|
x += 0x7fffffff;
|
||||||
|
next = x;
|
||||||
|
/* Transform to [0, 0x7ffffffd] range. */
|
||||||
|
return (x - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
mySrand(uint32 seed)
|
||||||
|
{
|
||||||
|
next = seed;
|
||||||
|
/* Transform to [1, 0x7ffffffe] range. */
|
||||||
|
next = (next % 0x7ffffffe) + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Add bits of given value to the signature.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
signValue(BloomState *state, SignType *sign, Datum value, int attno)
|
||||||
|
{
|
||||||
|
uint32 hashVal;
|
||||||
|
int nBit,
|
||||||
|
j;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* init generator with "column's" number to get "hashed" seed for new
|
||||||
|
* value. We don't want to map the same numbers from different columns
|
||||||
|
* into the same bits!
|
||||||
|
*/
|
||||||
|
mySrand(attno);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Init hash sequence to map our value into bits. the same values in
|
||||||
|
* different columns will be mapped into different bits because of step
|
||||||
|
* above
|
||||||
|
*/
|
||||||
|
hashVal = DatumGetInt32(FunctionCall1(&state->hashFn[attno], value));
|
||||||
|
mySrand(hashVal ^ myRand());
|
||||||
|
|
||||||
|
for (j = 0; j < state->opts->bitSize[attno]; j++)
|
||||||
|
{
|
||||||
|
/* prevent mutiple evaluation */
|
||||||
|
nBit = myRand() % (state->opts->bloomLength * BITSIGNTYPE);
|
||||||
|
SETBIT(sign, nBit);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Make bloom tuple from values.
|
||||||
|
*/
|
||||||
|
BloomTuple *
|
||||||
|
BloomFormTuple(BloomState *state, ItemPointer iptr, Datum *values, bool *isnull)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
BloomTuple *res = (BloomTuple *) palloc0(state->sizeOfBloomTuple);
|
||||||
|
|
||||||
|
res->heapPtr = *iptr;
|
||||||
|
|
||||||
|
/* Blooming each column */
|
||||||
|
for (i = 0; i < state->nColumns; i++)
|
||||||
|
{
|
||||||
|
/* skip nulls */
|
||||||
|
if (isnull[i])
|
||||||
|
continue;
|
||||||
|
|
||||||
|
signValue(state, res->sign, values[i], i);
|
||||||
|
}
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Add new bloom tuple to the page. Returns true if new tuple was successfully
|
||||||
|
* added to the page. Returns false if it doesn't git the page.
|
||||||
|
*/
|
||||||
|
bool
|
||||||
|
BloomPageAddItem(BloomState *state, Page page, BloomTuple *tuple)
|
||||||
|
{
|
||||||
|
BloomTuple *itup;
|
||||||
|
BloomPageOpaque opaque;
|
||||||
|
Pointer ptr;
|
||||||
|
|
||||||
|
/* Does new tuple fit the page */
|
||||||
|
if (BloomPageGetFreeSpace(state, page) < state->sizeOfBloomTuple)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/* Copy new tuple to the end of page */
|
||||||
|
opaque = BloomPageGetOpaque(page);
|
||||||
|
itup = BloomPageGetTuple(state, page, opaque->maxoff + 1);
|
||||||
|
memcpy((Pointer) itup, (Pointer) tuple, state->sizeOfBloomTuple);
|
||||||
|
|
||||||
|
/* Adjust maxoff and pd_lower */
|
||||||
|
opaque->maxoff++;
|
||||||
|
ptr = (Pointer) BloomPageGetTuple(state, page, opaque->maxoff + 1);
|
||||||
|
((PageHeader) page)->pd_lower = ptr - page;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Allocate a new page (either by recycling, or by extending the index file)
|
||||||
|
* The returned buffer is already pinned and exclusive-locked
|
||||||
|
* Caller is responsible for initializing the page by calling BloomInitBuffer
|
||||||
|
*/
|
||||||
|
Buffer
|
||||||
|
BloomNewBuffer(Relation index)
|
||||||
|
{
|
||||||
|
Buffer buffer;
|
||||||
|
bool needLock;
|
||||||
|
|
||||||
|
/* First, try to get a page from FSM */
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
BlockNumber blkno = GetFreeIndexPage(index);
|
||||||
|
|
||||||
|
if (blkno == InvalidBlockNumber)
|
||||||
|
break;
|
||||||
|
|
||||||
|
buffer = ReadBuffer(index, blkno);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We have to guard against the possibility that someone else already
|
||||||
|
* recycled this page; the buffer may be locked if so.
|
||||||
|
*/
|
||||||
|
if (ConditionalLockBuffer(buffer))
|
||||||
|
{
|
||||||
|
Page page = BufferGetPage(buffer);
|
||||||
|
|
||||||
|
if (PageIsNew(page))
|
||||||
|
return buffer; /* OK to use, if never initialized */
|
||||||
|
|
||||||
|
if (BloomPageIsDeleted(page))
|
||||||
|
return buffer; /* OK to use */
|
||||||
|
|
||||||
|
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Can't use it, so release buffer and try again */
|
||||||
|
ReleaseBuffer(buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Must extend the file */
|
||||||
|
needLock = !RELATION_IS_LOCAL(index);
|
||||||
|
if (needLock)
|
||||||
|
LockRelationForExtension(index, ExclusiveLock);
|
||||||
|
|
||||||
|
buffer = ReadBuffer(index, P_NEW);
|
||||||
|
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
|
||||||
|
|
||||||
|
if (needLock)
|
||||||
|
UnlockRelationForExtension(index, ExclusiveLock);
|
||||||
|
|
||||||
|
return buffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Initialize bloom page.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
BloomInitPage(Page page, uint16 flags)
|
||||||
|
{
|
||||||
|
BloomPageOpaque opaque;
|
||||||
|
|
||||||
|
PageInit(page, BLCKSZ, sizeof(BloomPageOpaqueData));
|
||||||
|
|
||||||
|
opaque = BloomPageGetOpaque(page);
|
||||||
|
memset(opaque, 0, sizeof(BloomPageOpaqueData));
|
||||||
|
opaque->flags = flags;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Adjust options of bloom index.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
adjustBloomOptions(BloomOptions *opts)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
/* Default length of bloom filter is 5 of 16-bit integers */
|
||||||
|
if (opts->bloomLength <= 0)
|
||||||
|
opts->bloomLength = 5;
|
||||||
|
else
|
||||||
|
opts->bloomLength = opts->bloomLength;
|
||||||
|
|
||||||
|
/* Check singnature length */
|
||||||
|
for (i = 0; i < INDEX_MAX_KEYS; i++)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Zero and negative number of bits is meaningless. Also setting
|
||||||
|
* more bits than signature have seems useless. Replace both cases
|
||||||
|
* with 2 bits default.
|
||||||
|
*/
|
||||||
|
if (opts->bitSize[i] <= 0
|
||||||
|
|| opts->bitSize[i] >= opts->bloomLength * sizeof(SignType))
|
||||||
|
opts->bitSize[i] = 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Initialize metapage for bloom index.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
BloomInitMetapage(Relation index)
|
||||||
|
{
|
||||||
|
Page metaPage;
|
||||||
|
Buffer metaBuffer;
|
||||||
|
BloomMetaPageData *metadata;
|
||||||
|
GenericXLogState *state;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Make a new buffer, since it first buffer it should be associated with
|
||||||
|
* block number 0 (BLOOM_METAPAGE_BLKNO).
|
||||||
|
*/
|
||||||
|
metaBuffer = BloomNewBuffer(index);
|
||||||
|
Assert(BufferGetBlockNumber(metaBuffer) == BLOOM_METAPAGE_BLKNO);
|
||||||
|
|
||||||
|
/* Initialize bloom index options */
|
||||||
|
if (!index->rd_options)
|
||||||
|
index->rd_options = palloc0(sizeof(BloomOptions));
|
||||||
|
adjustBloomOptions((BloomOptions *) index->rd_options);
|
||||||
|
|
||||||
|
/* Initialize contents of meta page */
|
||||||
|
state = GenericXLogStart(index);
|
||||||
|
metaPage = GenericXLogRegister(state, metaBuffer, true);
|
||||||
|
|
||||||
|
BloomInitPage(metaPage, BLOOM_META);
|
||||||
|
metadata = BloomPageGetMeta(metaPage);
|
||||||
|
memset(metadata, 0, sizeof(BloomMetaPageData));
|
||||||
|
metadata->magickNumber = BLOOM_MAGICK_NUMBER;
|
||||||
|
metadata->opts = *((BloomOptions *) index->rd_options);
|
||||||
|
((PageHeader) metaPage)->pd_lower += sizeof(BloomMetaPageData);
|
||||||
|
|
||||||
|
GenericXLogFinish(state);
|
||||||
|
UnlockReleaseBuffer(metaBuffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Initialize options for bloom index.
|
||||||
|
*/
|
||||||
|
bytea *
|
||||||
|
bloptions(Datum reloptions, bool validate)
|
||||||
|
{
|
||||||
|
relopt_value *options;
|
||||||
|
int numoptions;
|
||||||
|
BloomOptions *rdopts;
|
||||||
|
relopt_parse_elt tab[INDEX_MAX_KEYS + 1];
|
||||||
|
int i;
|
||||||
|
char buf[16];
|
||||||
|
|
||||||
|
/* Option for length of signature */
|
||||||
|
tab[0].optname = "length";
|
||||||
|
tab[0].opttype = RELOPT_TYPE_INT;
|
||||||
|
tab[0].offset = offsetof(BloomOptions, bloomLength);
|
||||||
|
|
||||||
|
/* Number of bits for each of possible columns: col1, col2, ... */
|
||||||
|
for (i = 0; i < INDEX_MAX_KEYS; i++)
|
||||||
|
{
|
||||||
|
snprintf(buf, sizeof(buf), "col%d", i + 1);
|
||||||
|
tab[i + 1].optname = pstrdup(buf);
|
||||||
|
tab[i + 1].opttype = RELOPT_TYPE_INT;
|
||||||
|
tab[i + 1].offset = offsetof(BloomOptions, bitSize[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
options = parseRelOptions(reloptions, validate, bl_relopt_kind, &numoptions);
|
||||||
|
rdopts = allocateReloptStruct(sizeof(BloomOptions), options, numoptions);
|
||||||
|
fillRelOptions((void *) rdopts, sizeof(BloomOptions), options, numoptions,
|
||||||
|
validate, tab, INDEX_MAX_KEYS + 1);
|
||||||
|
|
||||||
|
adjustBloomOptions(rdopts);
|
||||||
|
|
||||||
|
return (bytea *) rdopts;
|
||||||
|
}
|
|
@ -0,0 +1,212 @@
|
||||||
|
/*-------------------------------------------------------------------------
|
||||||
|
*
|
||||||
|
* blvacuum.c
|
||||||
|
* Bloom VACUUM functions.
|
||||||
|
*
|
||||||
|
* Copyright (c) 2016, PostgreSQL Global Development Group
|
||||||
|
*
|
||||||
|
* IDENTIFICATION
|
||||||
|
* contrib/bloom/blvacuum.c
|
||||||
|
*
|
||||||
|
*-------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
#include "postgres.h"
|
||||||
|
|
||||||
|
#include "access/genam.h"
|
||||||
|
#include "catalog/storage.h"
|
||||||
|
#include "commands/vacuum.h"
|
||||||
|
#include "miscadmin.h"
|
||||||
|
#include "postmaster/autovacuum.h"
|
||||||
|
#include "storage/bufmgr.h"
|
||||||
|
#include "storage/indexfsm.h"
|
||||||
|
#include "storage/lmgr.h"
|
||||||
|
|
||||||
|
#include "bloom.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Bulk deletion of all index entries pointing to a set of heap tuples.
|
||||||
|
* The set of target tuples is specified via a callback routine that tells
|
||||||
|
* whether any given heap tuple (identified by ItemPointer) is being deleted.
|
||||||
|
*
|
||||||
|
* Result: a palloc'd struct containing statistical info for VACUUM displays.
|
||||||
|
*/
|
||||||
|
IndexBulkDeleteResult *
|
||||||
|
blbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
|
||||||
|
IndexBulkDeleteCallback callback, void *callback_state)
|
||||||
|
{
|
||||||
|
Relation index = info->index;
|
||||||
|
BlockNumber blkno,
|
||||||
|
npages;
|
||||||
|
FreeBlockNumberArray notFullPage;
|
||||||
|
int countPage = 0;
|
||||||
|
BloomState state;
|
||||||
|
Buffer buffer;
|
||||||
|
Page page;
|
||||||
|
GenericXLogState *gxlogState;
|
||||||
|
|
||||||
|
if (stats == NULL)
|
||||||
|
stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
|
||||||
|
|
||||||
|
initBloomState(&state, index);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Interate over the pages. We don't care about concurrently added pages,
|
||||||
|
* they can't contain tuples to delete.
|
||||||
|
*/
|
||||||
|
npages = RelationGetNumberOfBlocks(index);
|
||||||
|
for (blkno = BLOOM_HEAD_BLKNO; blkno < npages; blkno++)
|
||||||
|
{
|
||||||
|
BloomTuple *itup,
|
||||||
|
*itupPtr,
|
||||||
|
*itupEnd;
|
||||||
|
|
||||||
|
buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
|
||||||
|
RBM_NORMAL, info->strategy);
|
||||||
|
|
||||||
|
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
|
||||||
|
gxlogState = GenericXLogStart(index);
|
||||||
|
page = GenericXLogRegister(gxlogState, buffer, false);
|
||||||
|
|
||||||
|
if (BloomPageIsDeleted(page))
|
||||||
|
{
|
||||||
|
UnlockReleaseBuffer(buffer);
|
||||||
|
CHECK_FOR_INTERRUPTS();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Iterate over the tuples */
|
||||||
|
itup = BloomPageGetTuple(&state, page, 1);
|
||||||
|
itupPtr = BloomPageGetTuple(&state, page, 1);
|
||||||
|
itupEnd = BloomPageGetTuple(&state, page, BloomPageGetMaxOffset(page) + 1);
|
||||||
|
while (itup < itupEnd)
|
||||||
|
{
|
||||||
|
/* Do we have to delete this tuple? */
|
||||||
|
if (callback(&itup->heapPtr, callback_state))
|
||||||
|
{
|
||||||
|
stats->tuples_removed += 1;
|
||||||
|
BloomPageGetOpaque(page)->maxoff--;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (itupPtr != itup)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* If we already delete something before, we have to move
|
||||||
|
* this tuple backward.
|
||||||
|
*/
|
||||||
|
memmove((Pointer) itupPtr, (Pointer) itup,
|
||||||
|
state.sizeOfBloomTuple);
|
||||||
|
}
|
||||||
|
stats->num_index_tuples++;
|
||||||
|
itupPtr = BloomPageGetNextTuple(&state, itupPtr);
|
||||||
|
}
|
||||||
|
|
||||||
|
itup = BloomPageGetNextTuple(&state, itup);
|
||||||
|
}
|
||||||
|
|
||||||
|
Assert(itupPtr == BloomPageGetTuple(&state, page, BloomPageGetMaxOffset(page) + 1));
|
||||||
|
|
||||||
|
if (!BloomPageIsDeleted(page) &&
|
||||||
|
BloomPageGetFreeSpace(&state, page) > state.sizeOfBloomTuple &&
|
||||||
|
countPage < BloomMetaBlockN)
|
||||||
|
notFullPage[countPage++] = blkno;
|
||||||
|
|
||||||
|
/* Did we delete something? */
|
||||||
|
if (itupPtr != itup)
|
||||||
|
{
|
||||||
|
/* Is it empty page now? */
|
||||||
|
if (itupPtr == BloomPageGetData(page))
|
||||||
|
BloomPageSetDeleted(page);
|
||||||
|
/* Adjust pg_lower */
|
||||||
|
((PageHeader) page)->pd_lower = (Pointer) itupPtr - page;
|
||||||
|
/* Finish WAL-logging */
|
||||||
|
GenericXLogFinish(gxlogState);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* Didn't change anything: abort WAL-logging */
|
||||||
|
GenericXLogAbort(gxlogState);
|
||||||
|
}
|
||||||
|
UnlockReleaseBuffer(buffer);
|
||||||
|
CHECK_FOR_INTERRUPTS();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (countPage > 0)
|
||||||
|
{
|
||||||
|
BloomMetaPageData *metaData;
|
||||||
|
|
||||||
|
buffer = ReadBuffer(index, BLOOM_METAPAGE_BLKNO);
|
||||||
|
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
|
||||||
|
|
||||||
|
gxlogState = GenericXLogStart(index);
|
||||||
|
page = GenericXLogRegister(gxlogState, buffer, false);
|
||||||
|
|
||||||
|
metaData = BloomPageGetMeta(page);
|
||||||
|
memcpy(metaData->notFullPage, notFullPage, sizeof(FreeBlockNumberArray));
|
||||||
|
metaData->nStart = 0;
|
||||||
|
metaData->nEnd = countPage;
|
||||||
|
|
||||||
|
GenericXLogFinish(gxlogState);
|
||||||
|
UnlockReleaseBuffer(buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
return stats;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Post-VACUUM cleanup.
|
||||||
|
*
|
||||||
|
* Result: a palloc'd struct containing statistical info for VACUUM displays.
|
||||||
|
*/
|
||||||
|
IndexBulkDeleteResult *
|
||||||
|
blvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
|
||||||
|
{
|
||||||
|
Relation index = info->index;
|
||||||
|
BlockNumber npages,
|
||||||
|
blkno;
|
||||||
|
BlockNumber totFreePages;
|
||||||
|
|
||||||
|
if (info->analyze_only)
|
||||||
|
return stats;
|
||||||
|
|
||||||
|
if (stats == NULL)
|
||||||
|
stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Iterate over the pages: insert deleted pages into FSM and collect
|
||||||
|
* statistics.
|
||||||
|
*/
|
||||||
|
npages = RelationGetNumberOfBlocks(index);
|
||||||
|
totFreePages = 0;
|
||||||
|
for (blkno = BLOOM_HEAD_BLKNO; blkno < npages; blkno++)
|
||||||
|
{
|
||||||
|
Buffer buffer;
|
||||||
|
Page page;
|
||||||
|
|
||||||
|
vacuum_delay_point();
|
||||||
|
|
||||||
|
buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
|
||||||
|
RBM_NORMAL, info->strategy);
|
||||||
|
LockBuffer(buffer, BUFFER_LOCK_SHARE);
|
||||||
|
page = (Page) BufferGetPage(buffer);
|
||||||
|
|
||||||
|
if (BloomPageIsDeleted(page))
|
||||||
|
{
|
||||||
|
RecordFreeIndexPage(index, blkno);
|
||||||
|
totFreePages++;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
stats->num_index_tuples += BloomPageGetMaxOffset(page);
|
||||||
|
stats->estimated_count += BloomPageGetMaxOffset(page);
|
||||||
|
}
|
||||||
|
|
||||||
|
UnlockReleaseBuffer(buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
IndexFreeSpaceMapVacuum(info->index);
|
||||||
|
stats->pages_free = totFreePages;
|
||||||
|
stats->num_pages = RelationGetNumberOfBlocks(index);
|
||||||
|
|
||||||
|
return stats;
|
||||||
|
}
|
|
@ -0,0 +1,220 @@
|
||||||
|
/*-------------------------------------------------------------------------
|
||||||
|
*
|
||||||
|
* blvalidate.c
|
||||||
|
* Opclass validator for bloom.
|
||||||
|
*
|
||||||
|
* Copyright (c) 2016, PostgreSQL Global Development Group
|
||||||
|
*
|
||||||
|
* IDENTIFICATION
|
||||||
|
* contrib/bloom/blvalidate.c
|
||||||
|
*
|
||||||
|
*-------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
#include "postgres.h"
|
||||||
|
|
||||||
|
#include "access/amvalidate.h"
|
||||||
|
#include "access/htup_details.h"
|
||||||
|
#include "catalog/pg_amop.h"
|
||||||
|
#include "catalog/pg_amproc.h"
|
||||||
|
#include "catalog/pg_opclass.h"
|
||||||
|
#include "catalog/pg_opfamily.h"
|
||||||
|
#include "catalog/pg_type.h"
|
||||||
|
#include "utils/builtins.h"
|
||||||
|
#include "utils/lsyscache.h"
|
||||||
|
#include "utils/syscache.h"
|
||||||
|
|
||||||
|
#include "bloom.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Validator for a bloom opclass.
|
||||||
|
*/
|
||||||
|
bool
|
||||||
|
blvalidate(Oid opclassoid)
|
||||||
|
{
|
||||||
|
bool result = true;
|
||||||
|
HeapTuple classtup;
|
||||||
|
Form_pg_opclass classform;
|
||||||
|
Oid opfamilyoid;
|
||||||
|
Oid opcintype;
|
||||||
|
Oid opckeytype;
|
||||||
|
char *opclassname;
|
||||||
|
HeapTuple familytup;
|
||||||
|
Form_pg_opfamily familyform;
|
||||||
|
char *opfamilyname;
|
||||||
|
CatCList *proclist,
|
||||||
|
*oprlist;
|
||||||
|
List *grouplist;
|
||||||
|
OpFamilyOpFuncGroup *opclassgroup;
|
||||||
|
int i;
|
||||||
|
ListCell *lc;
|
||||||
|
|
||||||
|
/* Fetch opclass information */
|
||||||
|
classtup = SearchSysCache1(CLAOID, ObjectIdGetDatum(opclassoid));
|
||||||
|
if (!HeapTupleIsValid(classtup))
|
||||||
|
elog(ERROR, "cache lookup failed for operator class %u", opclassoid);
|
||||||
|
classform = (Form_pg_opclass) GETSTRUCT(classtup);
|
||||||
|
|
||||||
|
opfamilyoid = classform->opcfamily;
|
||||||
|
opcintype = classform->opcintype;
|
||||||
|
opckeytype = classform->opckeytype;
|
||||||
|
if (!OidIsValid(opckeytype))
|
||||||
|
opckeytype = opcintype;
|
||||||
|
opclassname = NameStr(classform->opcname);
|
||||||
|
|
||||||
|
/* Fetch opfamily information */
|
||||||
|
familytup = SearchSysCache1(OPFAMILYOID, ObjectIdGetDatum(opfamilyoid));
|
||||||
|
if (!HeapTupleIsValid(familytup))
|
||||||
|
elog(ERROR, "cache lookup failed for operator family %u", opfamilyoid);
|
||||||
|
familyform = (Form_pg_opfamily) GETSTRUCT(familytup);
|
||||||
|
|
||||||
|
opfamilyname = NameStr(familyform->opfname);
|
||||||
|
|
||||||
|
/* Fetch all operators and support functions of the opfamily */
|
||||||
|
oprlist = SearchSysCacheList1(AMOPSTRATEGY, ObjectIdGetDatum(opfamilyoid));
|
||||||
|
proclist = SearchSysCacheList1(AMPROCNUM, ObjectIdGetDatum(opfamilyoid));
|
||||||
|
|
||||||
|
/* Check individual support functions */
|
||||||
|
for (i = 0; i < proclist->n_members; i++)
|
||||||
|
{
|
||||||
|
HeapTuple proctup = &proclist->members[i]->tuple;
|
||||||
|
Form_pg_amproc procform = (Form_pg_amproc) GETSTRUCT(proctup);
|
||||||
|
bool ok;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* All bloom support functions should be registered with matching
|
||||||
|
* left/right types
|
||||||
|
*/
|
||||||
|
if (procform->amproclefttype != procform->amprocrighttype)
|
||||||
|
{
|
||||||
|
ereport(INFO,
|
||||||
|
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
|
||||||
|
errmsg("bloom opfamily %s contains support procedure %s with cross-type registration",
|
||||||
|
opfamilyname,
|
||||||
|
format_procedure(procform->amproc))));
|
||||||
|
result = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We can't check signatures except within the specific opclass, since
|
||||||
|
* we need to know the associated opckeytype in many cases.
|
||||||
|
*/
|
||||||
|
if (procform->amproclefttype != opcintype)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/* Check procedure numbers and function signatures */
|
||||||
|
switch (procform->amprocnum)
|
||||||
|
{
|
||||||
|
case BLOOM_HASH_PROC:
|
||||||
|
ok = check_amproc_signature(procform->amproc, INT4OID, false,
|
||||||
|
1, 1, opckeytype);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
ereport(INFO,
|
||||||
|
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
|
||||||
|
errmsg("bloom opfamily %s contains function %s with invalid support number %d",
|
||||||
|
opfamilyname,
|
||||||
|
format_procedure(procform->amproc),
|
||||||
|
procform->amprocnum)));
|
||||||
|
result = false;
|
||||||
|
continue; /* don't want additional message */
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!ok)
|
||||||
|
{
|
||||||
|
ereport(INFO,
|
||||||
|
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
|
||||||
|
errmsg("gist opfamily %s contains function %s with wrong signature for support number %d",
|
||||||
|
opfamilyname,
|
||||||
|
format_procedure(procform->amproc),
|
||||||
|
procform->amprocnum)));
|
||||||
|
result = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check individual operators */
|
||||||
|
for (i = 0; i < oprlist->n_members; i++)
|
||||||
|
{
|
||||||
|
HeapTuple oprtup = &oprlist->members[i]->tuple;
|
||||||
|
Form_pg_amop oprform = (Form_pg_amop) GETSTRUCT(oprtup);
|
||||||
|
|
||||||
|
/* Check it's allowed strategy for bloom */
|
||||||
|
if (oprform->amopstrategy < 1 ||
|
||||||
|
oprform->amopstrategy > BLOOM_NSTRATEGIES)
|
||||||
|
{
|
||||||
|
ereport(INFO,
|
||||||
|
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
|
||||||
|
errmsg("bloom opfamily %s contains operator %s with invalid strategy number %d",
|
||||||
|
opfamilyname,
|
||||||
|
format_operator(oprform->amopopr),
|
||||||
|
oprform->amopstrategy)));
|
||||||
|
result = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* bloom doesn't support ORDER BY operators */
|
||||||
|
if (oprform->amoppurpose != AMOP_SEARCH ||
|
||||||
|
OidIsValid(oprform->amopsortfamily))
|
||||||
|
{
|
||||||
|
ereport(INFO,
|
||||||
|
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
|
||||||
|
errmsg("bloom opfamily %s contains invalid ORDER BY specification for operator %s",
|
||||||
|
opfamilyname,
|
||||||
|
format_operator(oprform->amopopr))));
|
||||||
|
result = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check operator signature --- same for all bloom strategies */
|
||||||
|
if (!check_amop_signature(oprform->amopopr, BOOLOID,
|
||||||
|
oprform->amoplefttype,
|
||||||
|
oprform->amoprighttype))
|
||||||
|
{
|
||||||
|
ereport(INFO,
|
||||||
|
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
|
||||||
|
errmsg("bloom opfamily %s contains operator %s with wrong signature",
|
||||||
|
opfamilyname,
|
||||||
|
format_operator(oprform->amopopr))));
|
||||||
|
result = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Now check for inconsistent groups of operators/functions */
|
||||||
|
grouplist = identify_opfamily_groups(oprlist, proclist);
|
||||||
|
opclassgroup = NULL;
|
||||||
|
foreach(lc, grouplist)
|
||||||
|
{
|
||||||
|
OpFamilyOpFuncGroup *thisgroup = (OpFamilyOpFuncGroup *) lfirst(lc);
|
||||||
|
|
||||||
|
/* Remember the group exactly matching the test opclass */
|
||||||
|
if (thisgroup->lefttype == opcintype &&
|
||||||
|
thisgroup->righttype == opcintype)
|
||||||
|
opclassgroup = thisgroup;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* There is not a lot we can do to check the operator sets, since each
|
||||||
|
* bloom opclass is more or less a law unto itself, and some contain
|
||||||
|
* only operators that are binary-compatible with the opclass datatype
|
||||||
|
* (meaning that empty operator sets can be OK). That case also means
|
||||||
|
* that we shouldn't insist on nonempty function sets except for the
|
||||||
|
* opclass's own group.
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check that the originally-named opclass is complete */
|
||||||
|
for (i = 1; i <= BLOOM_NPROC; i++)
|
||||||
|
{
|
||||||
|
if (opclassgroup &&
|
||||||
|
(opclassgroup->functionset & (((uint64) 1) << i)) != 0)
|
||||||
|
continue; /* got it */
|
||||||
|
ereport(INFO,
|
||||||
|
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
|
||||||
|
errmsg("bloom opclass %s is missing support function %d",
|
||||||
|
opclassname, i)));
|
||||||
|
result = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
ReleaseCatCacheList(proclist);
|
||||||
|
ReleaseCatCacheList(oprlist);
|
||||||
|
ReleaseSysCache(familytup);
|
||||||
|
ReleaseSysCache(classtup);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
|
@ -0,0 +1,122 @@
|
||||||
|
CREATE EXTENSION bloom;
|
||||||
|
CREATE TABLE tst (
|
||||||
|
i int4,
|
||||||
|
t text
|
||||||
|
);
|
||||||
|
INSERT INTO tst SELECT i%10, substr(md5(i::text), 1, 1) FROM generate_series(1,100000) i;
|
||||||
|
CREATE INDEX bloomidx ON tst USING bloom (i, t) WITH (col1 = 3);
|
||||||
|
SET enable_seqscan=on;
|
||||||
|
SET enable_bitmapscan=off;
|
||||||
|
SET enable_indexscan=off;
|
||||||
|
SELECT count(*) FROM tst WHERE i = 7;
|
||||||
|
count
|
||||||
|
-------
|
||||||
|
10000
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT count(*) FROM tst WHERE t = '5';
|
||||||
|
count
|
||||||
|
-------
|
||||||
|
6264
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
|
||||||
|
count
|
||||||
|
-------
|
||||||
|
588
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SET enable_seqscan=off;
|
||||||
|
SET enable_bitmapscan=on;
|
||||||
|
SET enable_indexscan=on;
|
||||||
|
EXPLAIN (COSTS OFF) SELECT count(*) FROM tst WHERE i = 7;
|
||||||
|
QUERY PLAN
|
||||||
|
-------------------------------------------
|
||||||
|
Aggregate
|
||||||
|
-> Bitmap Heap Scan on tst
|
||||||
|
Recheck Cond: (i = 7)
|
||||||
|
-> Bitmap Index Scan on bloomidx
|
||||||
|
Index Cond: (i = 7)
|
||||||
|
(5 rows)
|
||||||
|
|
||||||
|
EXPLAIN (COSTS OFF) SELECT count(*) FROM tst WHERE t = '5';
|
||||||
|
QUERY PLAN
|
||||||
|
-------------------------------------------
|
||||||
|
Aggregate
|
||||||
|
-> Bitmap Heap Scan on tst
|
||||||
|
Recheck Cond: (t = '5'::text)
|
||||||
|
-> Bitmap Index Scan on bloomidx
|
||||||
|
Index Cond: (t = '5'::text)
|
||||||
|
(5 rows)
|
||||||
|
|
||||||
|
EXPLAIN (COSTS OFF) SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
|
||||||
|
QUERY PLAN
|
||||||
|
---------------------------------------------------------
|
||||||
|
Aggregate
|
||||||
|
-> Bitmap Heap Scan on tst
|
||||||
|
Recheck Cond: ((i = 7) AND (t = '5'::text))
|
||||||
|
-> Bitmap Index Scan on bloomidx
|
||||||
|
Index Cond: ((i = 7) AND (t = '5'::text))
|
||||||
|
(5 rows)
|
||||||
|
|
||||||
|
SELECT count(*) FROM tst WHERE i = 7;
|
||||||
|
count
|
||||||
|
-------
|
||||||
|
10000
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT count(*) FROM tst WHERE t = '5';
|
||||||
|
count
|
||||||
|
-------
|
||||||
|
6264
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
|
||||||
|
count
|
||||||
|
-------
|
||||||
|
588
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
DELETE FROM tst;
|
||||||
|
INSERT INTO tst SELECT i%10, substr(md5(i::text), 1, 1) FROM generate_series(1,100000) i;
|
||||||
|
VACUUM ANALYZE tst;
|
||||||
|
SELECT count(*) FROM tst WHERE i = 7;
|
||||||
|
count
|
||||||
|
-------
|
||||||
|
10000
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT count(*) FROM tst WHERE t = '5';
|
||||||
|
count
|
||||||
|
-------
|
||||||
|
6264
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
|
||||||
|
count
|
||||||
|
-------
|
||||||
|
588
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
VACUUM FULL tst;
|
||||||
|
SELECT count(*) FROM tst WHERE i = 7;
|
||||||
|
count
|
||||||
|
-------
|
||||||
|
10000
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT count(*) FROM tst WHERE t = '5';
|
||||||
|
count
|
||||||
|
-------
|
||||||
|
6264
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
|
||||||
|
count
|
||||||
|
-------
|
||||||
|
588
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
RESET enable_seqscan;
|
||||||
|
RESET enable_bitmapscan;
|
||||||
|
RESET enable_indexscan;
|
|
@ -0,0 +1,47 @@
|
||||||
|
CREATE EXTENSION bloom;
|
||||||
|
|
||||||
|
CREATE TABLE tst (
|
||||||
|
i int4,
|
||||||
|
t text
|
||||||
|
);
|
||||||
|
|
||||||
|
INSERT INTO tst SELECT i%10, substr(md5(i::text), 1, 1) FROM generate_series(1,100000) i;
|
||||||
|
CREATE INDEX bloomidx ON tst USING bloom (i, t) WITH (col1 = 3);
|
||||||
|
|
||||||
|
SET enable_seqscan=on;
|
||||||
|
SET enable_bitmapscan=off;
|
||||||
|
SET enable_indexscan=off;
|
||||||
|
|
||||||
|
SELECT count(*) FROM tst WHERE i = 7;
|
||||||
|
SELECT count(*) FROM tst WHERE t = '5';
|
||||||
|
SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
|
||||||
|
|
||||||
|
SET enable_seqscan=off;
|
||||||
|
SET enable_bitmapscan=on;
|
||||||
|
SET enable_indexscan=on;
|
||||||
|
|
||||||
|
EXPLAIN (COSTS OFF) SELECT count(*) FROM tst WHERE i = 7;
|
||||||
|
EXPLAIN (COSTS OFF) SELECT count(*) FROM tst WHERE t = '5';
|
||||||
|
EXPLAIN (COSTS OFF) SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
|
||||||
|
|
||||||
|
SELECT count(*) FROM tst WHERE i = 7;
|
||||||
|
SELECT count(*) FROM tst WHERE t = '5';
|
||||||
|
SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
|
||||||
|
|
||||||
|
DELETE FROM tst;
|
||||||
|
INSERT INTO tst SELECT i%10, substr(md5(i::text), 1, 1) FROM generate_series(1,100000) i;
|
||||||
|
VACUUM ANALYZE tst;
|
||||||
|
|
||||||
|
SELECT count(*) FROM tst WHERE i = 7;
|
||||||
|
SELECT count(*) FROM tst WHERE t = '5';
|
||||||
|
SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
|
||||||
|
|
||||||
|
VACUUM FULL tst;
|
||||||
|
|
||||||
|
SELECT count(*) FROM tst WHERE i = 7;
|
||||||
|
SELECT count(*) FROM tst WHERE t = '5';
|
||||||
|
SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
|
||||||
|
|
||||||
|
RESET enable_seqscan;
|
||||||
|
RESET enable_bitmapscan;
|
||||||
|
RESET enable_indexscan;
|
|
@ -0,0 +1,75 @@
|
||||||
|
# Test generic xlog record work for bloom index replication.
|
||||||
|
use strict;
|
||||||
|
use warnings;
|
||||||
|
use PostgresNode;
|
||||||
|
use TestLib;
|
||||||
|
use Test::More tests => 31;
|
||||||
|
|
||||||
|
my $node_master;
|
||||||
|
my $node_standby;
|
||||||
|
|
||||||
|
# Run few queries on both master and standby and check their results match.
|
||||||
|
sub test_index_replay
|
||||||
|
{
|
||||||
|
my ($test_name) = @_;
|
||||||
|
|
||||||
|
# Wait for standby to catch up
|
||||||
|
my $applname = $node_standby->name;
|
||||||
|
my $caughtup_query =
|
||||||
|
"SELECT pg_current_xlog_location() <= write_location FROM pg_stat_replication WHERE application_name = '$applname';";
|
||||||
|
$node_master->poll_query_until('postgres', $caughtup_query)
|
||||||
|
or die "Timed out while waiting for standby 1 to catch up";
|
||||||
|
|
||||||
|
my $queries = qq(SET enable_seqscan=off;
|
||||||
|
SET enable_bitmapscan=on;
|
||||||
|
SET enable_indexscan=on;
|
||||||
|
SELECT * FROM tst WHERE i = 0;
|
||||||
|
SELECT * FROM tst WHERE i = 3;
|
||||||
|
SELECT * FROM tst WHERE t = 'b';
|
||||||
|
SELECT * FROM tst WHERE t = 'f';
|
||||||
|
SELECT * FROM tst WHERE i = 3 AND t = 'c';
|
||||||
|
SELECT * FROM tst WHERE i = 7 AND t = 'e';
|
||||||
|
);
|
||||||
|
|
||||||
|
# Run test queries and compare their result
|
||||||
|
my $master_result = $node_master->psql("postgres", $queries);
|
||||||
|
my $standby_result = $node_standby->psql("postgres", $queries);
|
||||||
|
|
||||||
|
is($master_result, $standby_result, "$test_name: query result matches");
|
||||||
|
}
|
||||||
|
|
||||||
|
# Initialize master node
|
||||||
|
$node_master = get_new_node('master');
|
||||||
|
$node_master->init(allows_streaming => 1);
|
||||||
|
$node_master->start;
|
||||||
|
my $backup_name = 'my_backup';
|
||||||
|
|
||||||
|
# Take backup
|
||||||
|
$node_master->backup($backup_name);
|
||||||
|
|
||||||
|
# Create streaming standby linking to master
|
||||||
|
$node_standby = get_new_node('standby');
|
||||||
|
$node_standby->init_from_backup($node_master, $backup_name,
|
||||||
|
has_streaming => 1);
|
||||||
|
$node_standby->start;
|
||||||
|
|
||||||
|
# Create some bloom index on master
|
||||||
|
$node_master->psql("postgres", "CREATE EXTENSION bloom;");
|
||||||
|
$node_master->psql("postgres", "CREATE TABLE tst (i int4, t text);");
|
||||||
|
$node_master->psql("postgres", "INSERT INTO tst SELECT i%10, substr(md5(i::text), 1, 1) FROM generate_series(1,100000) i;");
|
||||||
|
$node_master->psql("postgres", "CREATE INDEX bloomidx ON tst USING bloom (i, t) WITH (col1 = 3);");
|
||||||
|
|
||||||
|
# Test that queries give same result
|
||||||
|
test_index_replay('initial');
|
||||||
|
|
||||||
|
# Run 10 cycles of table modification. Run test queries after each modification.
|
||||||
|
for my $i (1..10)
|
||||||
|
{
|
||||||
|
$node_master->psql("postgres", "DELETE FROM tst WHERE i = $i;");
|
||||||
|
test_index_replay("delete $i");
|
||||||
|
$node_master->psql("postgres", "VACUUM tst;");
|
||||||
|
test_index_replay("vacuum $i");
|
||||||
|
my ($start, $end) = (100001 + ($i - 1) * 10000, 100000 + $i * 10000);
|
||||||
|
$node_master->psql("postgres", "INSERT INTO tst SELECT i%10, substr(md5(i::text), 1, 1) FROM generate_series($start,$end) i;");
|
||||||
|
test_index_replay("insert $i");
|
||||||
|
}
|
|
@ -0,0 +1,218 @@
|
||||||
|
<!-- doc/src/sgml/bloom.sgml -->
|
||||||
|
|
||||||
|
<sect1 id="bloom" xreflabel="bloom">
|
||||||
|
<title>bloom</title>
|
||||||
|
|
||||||
|
<indexterm zone="bloom">
|
||||||
|
<primary>bloom</primary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
<literal>bloom</> is a contrib which implements index access method. It comes
|
||||||
|
as example of custom access methods and generic WAL records usage. But it
|
||||||
|
is also useful itself.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<sect2>
|
||||||
|
<title>Introduction</title>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
Implementation of
|
||||||
|
<ulink url="http://en.wikipedia.org/wiki/Bloom_filter">Bloom filter</ulink>
|
||||||
|
allows fast exclusion of non-candidate tuples.
|
||||||
|
Since signature is a lossy representation of all indexed attributes,
|
||||||
|
search results should be rechecked using heap information.
|
||||||
|
User can specify signature length (in uint16, default is 5) and the number of
|
||||||
|
bits, which can be setted, per attribute (1 < colN < 2048).
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
This index is useful if table has many attributes and queries can include
|
||||||
|
their arbitary combinations. Traditional <literal>btree</> index is faster
|
||||||
|
than bloom index, but it'd require too many indexes to support all possible
|
||||||
|
queries, while one need only one bloom index. Bloom index supports only
|
||||||
|
equality comparison. Since it's a signature file, not a tree, it always
|
||||||
|
should be readed fully, but sequentially, so index search performance is
|
||||||
|
constant and doesn't depend on a query.
|
||||||
|
</para>
|
||||||
|
</sect2>
|
||||||
|
|
||||||
|
<sect2>
|
||||||
|
<title>Parameters</title>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
<literal>bloom</> indexes accept following parameters in <literal>WITH</>
|
||||||
|
clause.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<variablelist>
|
||||||
|
<varlistentry>
|
||||||
|
<term><literal>length</></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Length of signature in uint16 type values
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
</variablelist>
|
||||||
|
<variablelist>
|
||||||
|
<varlistentry>
|
||||||
|
<term><literal>col1 — col16</></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Number of bits for corresponding column
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
</variablelist>
|
||||||
|
</sect2>
|
||||||
|
|
||||||
|
<sect2>
|
||||||
|
<title>Examples</title>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
Example of index definition is given below.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<programlisting>
|
||||||
|
CREATE INDEX bloomidx ON tbloom(i1,i2,i3)
|
||||||
|
WITH (length=5, col1=2, col2=2, col3=4);
|
||||||
|
</programlisting>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
Here, we create bloom index with signature length 80 bits and attributes
|
||||||
|
i1, i2 mapped to 2 bits, attribute i3 - to 4 bits.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
Example of index definition and usage is given below.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<programlisting>
|
||||||
|
CREATE TABLE tbloom AS
|
||||||
|
SELECT
|
||||||
|
random()::int as i1,
|
||||||
|
random()::int as i2,
|
||||||
|
random()::int as i3,
|
||||||
|
random()::int as i4,
|
||||||
|
random()::int as i5,
|
||||||
|
random()::int as i6,
|
||||||
|
random()::int as i7,
|
||||||
|
random()::int as i8,
|
||||||
|
random()::int as i9,
|
||||||
|
random()::int as i10,
|
||||||
|
random()::int as i11,
|
||||||
|
random()::int as i12,
|
||||||
|
random()::int as i13
|
||||||
|
FROM
|
||||||
|
generate_series(1,1000);
|
||||||
|
CREATE INDEX bloomidx ON tbloom USING
|
||||||
|
bloom (i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, i11, i12);
|
||||||
|
SELECT pg_relation_size('bloomidx');
|
||||||
|
CREATE index btree_idx ON tbloom(i1,i2,i3,i4,i5,i6,i7,i8,i9,i10,i11,i12);
|
||||||
|
SELECT pg_relation_size('btree_idx');
|
||||||
|
</programlisting>
|
||||||
|
|
||||||
|
<programlisting>
|
||||||
|
=# EXPLAIN ANALYZE SELECT * FROM tbloom WHERE i2 = 20 AND i10 = 15;
|
||||||
|
QUERY PLAN
|
||||||
|
-----------------------------------------------------------------------------------------------------------------
|
||||||
|
Bitmap Heap Scan on tbloom (cost=1.50..5.52 rows=1 width=52) (actual time=0.057..0.057 rows=0 loops=1)
|
||||||
|
Recheck Cond: ((i2 = 20) AND (i10 = 15))
|
||||||
|
-> Bitmap Index Scan on bloomidx (cost=0.00..1.50 rows=1 width=0) (actual time=0.041..0.041 rows=9 loops=1)
|
||||||
|
Index Cond: ((i2 = 20) AND (i10 = 15))
|
||||||
|
Total runtime: 0.081 ms
|
||||||
|
(5 rows)
|
||||||
|
</programlisting>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
Seqscan is slow.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<programlisting>
|
||||||
|
=# SET enable_bitmapscan = off;
|
||||||
|
=# SET enable_indexscan = off;
|
||||||
|
=# EXPLAIN ANALYZE SELECT * FROM tbloom WHERE i2 = 20 AND i10 = 15;
|
||||||
|
QUERY PLAN
|
||||||
|
--------------------------------------------------------------------------------------------------
|
||||||
|
Seq Scan on tbloom (cost=0.00..25.00 rows=1 width=52) (actual time=0.162..0.162 rows=0 loops=1)
|
||||||
|
Filter: ((i2 = 20) AND (i10 = 15))
|
||||||
|
Total runtime: 0.181 ms
|
||||||
|
(3 rows)
|
||||||
|
</programlisting>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
Btree index will be not used for this query.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<programlisting>
|
||||||
|
=# DROP INDEX bloomidx;
|
||||||
|
=# CREATE INDEX btree_idx ON tbloom(i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, i11, i12);
|
||||||
|
=# EXPLAIN ANALYZE SELECT * FROM tbloom WHERE i2 = 20 AND i10 = 15;
|
||||||
|
QUERY PLAN
|
||||||
|
--------------------------------------------------------------------------------------------------
|
||||||
|
Seq Scan on tbloom (cost=0.00..25.00 rows=1 width=52) (actual time=0.210..0.210 rows=0 loops=1)
|
||||||
|
Filter: ((i2 = 20) AND (i10 = 15))
|
||||||
|
Total runtime: 0.250 ms
|
||||||
|
(3 rows)
|
||||||
|
</programlisting>
|
||||||
|
</sect2>
|
||||||
|
|
||||||
|
<sect2>
|
||||||
|
<title>Opclass interface</title>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
Bloom opclass interface is simple. It requires 1 supporting function:
|
||||||
|
hash function for indexing datatype. And it provides 1 search operator:
|
||||||
|
equality operator. The example below shows <literal>opclass</> definition
|
||||||
|
for <literal>text</> datatype.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<programlisting>
|
||||||
|
CREATE OPERATOR CLASS text_ops
|
||||||
|
DEFAULT FOR TYPE text USING bloom AS
|
||||||
|
OPERATOR 1 =(text, text),
|
||||||
|
FUNCTION 1 hashtext(text);
|
||||||
|
</programlisting>
|
||||||
|
</sect2>
|
||||||
|
|
||||||
|
<sect2>
|
||||||
|
<title>Limitation</title>
|
||||||
|
<para>
|
||||||
|
|
||||||
|
<itemizedlist>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
For now, only opclasses for <literal>int4</>, <literal>text</> comes
|
||||||
|
with contrib. However, users may define more of them.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Only <literal>=</literal> operator is supported for search now. But it's
|
||||||
|
possible to add support of arrays with contains and intersection
|
||||||
|
operations in future.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</itemizedlist>
|
||||||
|
</para>
|
||||||
|
</sect2>
|
||||||
|
|
||||||
|
<sect2>
|
||||||
|
<title>Authors</title>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
Teodor Sigaev <email>teodor@postgrespro.ru</email>, Postgres Professional, Moscow, Russia
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
Alexander Korotkov <email>a.korotkov@postgrespro.ru</email>, Postgres Professional, Moscow, Russia
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
Oleg Bartunov <email>obartunov@postgrespro.ru</email>, Postgres Professional, Moscow, Russia
|
||||||
|
</para>
|
||||||
|
</sect2>
|
||||||
|
|
||||||
|
</sect1>
|
|
@ -105,6 +105,7 @@ CREATE EXTENSION <replaceable>module_name</> FROM unpackaged;
|
||||||
&adminpack;
|
&adminpack;
|
||||||
&auth-delay;
|
&auth-delay;
|
||||||
&auto-explain;
|
&auto-explain;
|
||||||
|
&bloom;
|
||||||
&btree-gin;
|
&btree-gin;
|
||||||
&btree-gist;
|
&btree-gist;
|
||||||
&chkpass;
|
&chkpass;
|
||||||
|
|
|
@ -107,6 +107,7 @@
|
||||||
<!ENTITY adminpack SYSTEM "adminpack.sgml">
|
<!ENTITY adminpack SYSTEM "adminpack.sgml">
|
||||||
<!ENTITY auth-delay SYSTEM "auth-delay.sgml">
|
<!ENTITY auth-delay SYSTEM "auth-delay.sgml">
|
||||||
<!ENTITY auto-explain SYSTEM "auto-explain.sgml">
|
<!ENTITY auto-explain SYSTEM "auto-explain.sgml">
|
||||||
|
<!ENTITY bloom SYSTEM "bloom.sgml">
|
||||||
<!ENTITY btree-gin SYSTEM "btree-gin.sgml">
|
<!ENTITY btree-gin SYSTEM "btree-gin.sgml">
|
||||||
<!ENTITY btree-gist SYSTEM "btree-gist.sgml">
|
<!ENTITY btree-gist SYSTEM "btree-gist.sgml">
|
||||||
<!ENTITY chkpass SYSTEM "chkpass.sgml">
|
<!ENTITY chkpass SYSTEM "chkpass.sgml">
|
||||||
|
|
Loading…
Reference in New Issue