postgresql/src/backend/access/gin/gininsert.c

621 lines
16 KiB
C

/*-------------------------------------------------------------------------
*
* gininsert.c
* insert routines for the postgres inverted index access method.
*
*
* Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* src/backend/access/gin/gininsert.c
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/gin_private.h"
#include "catalog/index.h"
#include "miscadmin.h"
#include "storage/bufmgr.h"
#include "storage/smgr.h"
#include "storage/indexfsm.h"
#include "utils/memutils.h"
typedef struct
{
GinState ginstate;
double indtuples;
GinStatsData buildStats;
MemoryContext tmpCtx;
MemoryContext funcCtx;
BuildAccumulator accum;
} GinBuildState;
/*
* Creates new posting tree with one page, containing the given TIDs.
* Returns the page number (which will be the root of this posting tree).
*
* items[] must be in sorted order with no duplicates.
*/
static BlockNumber
createPostingTree(Relation index, ItemPointerData *items, uint32 nitems)
{
BlockNumber blkno;
Buffer buffer = GinNewBuffer(index);
Page page;
/* Assert that the items[] array will fit on one page */
Assert(nitems <= GinMaxLeafDataItems);
START_CRIT_SECTION();
GinInitBuffer(buffer, GIN_DATA | GIN_LEAF);
page = BufferGetPage(buffer);
blkno = BufferGetBlockNumber(buffer);
memcpy(GinDataPageGetData(page), items, sizeof(ItemPointerData) * nitems);
GinPageGetOpaque(page)->maxoff = nitems;
MarkBufferDirty(buffer);
if (RelationNeedsWAL(index))
{
XLogRecPtr recptr;
XLogRecData rdata[2];
ginxlogCreatePostingTree data;
data.node = index->rd_node;
data.blkno = blkno;
data.nitem = nitems;
rdata[0].buffer = InvalidBuffer;
rdata[0].data = (char *) &data;
rdata[0].len = sizeof(ginxlogCreatePostingTree);
rdata[0].next = &rdata[1];
rdata[1].buffer = InvalidBuffer;
rdata[1].data = (char *) items;
rdata[1].len = sizeof(ItemPointerData) * nitems;
rdata[1].next = NULL;
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_PTREE, rdata);
PageSetLSN(page, recptr);
PageSetTLI(page, ThisTimeLineID);
}
UnlockReleaseBuffer(buffer);
END_CRIT_SECTION();
return blkno;
}
/*
* Adds array of item pointers to tuple's posting list, or
* creates posting tree and tuple pointing to tree in case
* of not enough space. Max size of tuple is defined in
* GinFormTuple(). Returns a new, modified index tuple.
* items[] must be in sorted order with no duplicates.
*/
static IndexTuple
addItemPointersToLeafTuple(GinState *ginstate,
IndexTuple old,
ItemPointerData *items, uint32 nitem,
GinStatsData *buildStats)
{
OffsetNumber attnum;
Datum key;
GinNullCategory category;
IndexTuple res;
Assert(!GinIsPostingTree(old));
attnum = gintuple_get_attrnum(ginstate, old);
key = gintuple_get_key(ginstate, old, &category);
/* try to build tuple with room for all the items */
res = GinFormTuple(ginstate, attnum, key, category,
NULL, nitem + GinGetNPosting(old),
false);
if (res)
{
/* good, small enough */
uint32 newnitem;
/* fill in the posting list with union of old and new TIDs */
newnitem = ginMergeItemPointers(GinGetPosting(res),
GinGetPosting(old),
GinGetNPosting(old),
items, nitem);
/* merge might have eliminated some duplicate items */
GinShortenTuple(res, newnitem);
}
else
{
/* posting list would be too big, convert to posting tree */
BlockNumber postingRoot;
GinPostingTreeScan *gdi;
/*
* Initialize posting tree with the old tuple's posting list. It's
* surely small enough to fit on one posting-tree page, and should
* already be in order with no duplicates.
*/
postingRoot = createPostingTree(ginstate->index,
GinGetPosting(old),
GinGetNPosting(old));
/* During index build, count the newly-added data page */
if (buildStats)
buildStats->nDataPages++;
/* Now insert the TIDs-to-be-added into the posting tree */
gdi = ginPrepareScanPostingTree(ginstate->index, postingRoot, FALSE);
gdi->btree.isBuild = (buildStats != NULL);
ginInsertItemPointers(gdi, items, nitem, buildStats);
pfree(gdi);
/* And build a new posting-tree-only result tuple */
res = GinFormTuple(ginstate, attnum, key, category, NULL, 0, true);
GinSetPostingTree(res, postingRoot);
}
return res;
}
/*
* Build a fresh leaf tuple, either posting-list or posting-tree format
* depending on whether the given items list will fit.
* items[] must be in sorted order with no duplicates.
*
* This is basically the same logic as in addItemPointersToLeafTuple,
* but working from slightly different input.
*/
static IndexTuple
buildFreshLeafTuple(GinState *ginstate,
OffsetNumber attnum, Datum key, GinNullCategory category,
ItemPointerData *items, uint32 nitem,
GinStatsData *buildStats)
{
IndexTuple res;
/* try to build tuple with room for all the items */
res = GinFormTuple(ginstate, attnum, key, category,
items, nitem, false);
if (!res)
{
/* posting list would be too big, build posting tree */
BlockNumber postingRoot;
/*
* Build posting-tree-only result tuple. We do this first so as to
* fail quickly if the key is too big.
*/
res = GinFormTuple(ginstate, attnum, key, category, NULL, 0, true);
/*
* Initialize posting tree with as many TIDs as will fit on the first
* page.
*/
postingRoot = createPostingTree(ginstate->index,
items,
Min(nitem, GinMaxLeafDataItems));
/* During index build, count the newly-added data page */
if (buildStats)
buildStats->nDataPages++;
/* Add any remaining TIDs to the posting tree */
if (nitem > GinMaxLeafDataItems)
{
GinPostingTreeScan *gdi;
gdi = ginPrepareScanPostingTree(ginstate->index, postingRoot, FALSE);
gdi->btree.isBuild = (buildStats != NULL);
ginInsertItemPointers(gdi,
items + GinMaxLeafDataItems,
nitem - GinMaxLeafDataItems,
buildStats);
pfree(gdi);
}
/* And save the root link in the result tuple */
GinSetPostingTree(res, postingRoot);
}
return res;
}
/*
* Insert one or more heap TIDs associated with the given key value.
* This will either add a single key entry, or enlarge a pre-existing entry.
*
* During an index build, buildStats is non-null and the counters
* it contains should be incremented as needed.
*/
void
ginEntryInsert(GinState *ginstate,
OffsetNumber attnum, Datum key, GinNullCategory category,
ItemPointerData *items, uint32 nitem,
GinStatsData *buildStats)
{
GinBtreeData btree;
GinBtreeStack *stack;
IndexTuple itup;
Page page;
/* During index build, count the to-be-inserted entry */
if (buildStats)
buildStats->nEntries++;
ginPrepareEntryScan(&btree, attnum, key, category, ginstate);
stack = ginFindLeafPage(&btree, NULL);
page = BufferGetPage(stack->buffer);
if (btree.findItem(&btree, stack))
{
/* found pre-existing entry */
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
if (GinIsPostingTree(itup))
{
/* add entries to existing posting tree */
BlockNumber rootPostingTree = GinGetPostingTree(itup);
GinPostingTreeScan *gdi;
/* release all stack */
LockBuffer(stack->buffer, GIN_UNLOCK);
freeGinBtreeStack(stack);
/* insert into posting tree */
gdi = ginPrepareScanPostingTree(ginstate->index, rootPostingTree, FALSE);
gdi->btree.isBuild = (buildStats != NULL);
ginInsertItemPointers(gdi, items, nitem, buildStats);
pfree(gdi);
return;
}
/* modify an existing leaf entry */
itup = addItemPointersToLeafTuple(ginstate, itup,
items, nitem, buildStats);
btree.isDelete = TRUE;
}
else
{
/* no match, so construct a new leaf entry */
itup = buildFreshLeafTuple(ginstate, attnum, key, category,
items, nitem, buildStats);
}
/* Insert the new or modified leaf tuple */
btree.entry = itup;
ginInsertValue(&btree, stack, buildStats);
pfree(itup);
}
/*
* Extract index entries for a single indexable item, and add them to the
* BuildAccumulator's state.
*
* This function is used only during initial index creation.
*/
static void
ginHeapTupleBulkInsert(GinBuildState *buildstate, OffsetNumber attnum,
Datum value, bool isNull,
ItemPointer heapptr)
{
Datum *entries;
GinNullCategory *categories;
int32 nentries;
MemoryContext oldCtx;
oldCtx = MemoryContextSwitchTo(buildstate->funcCtx);
entries = ginExtractEntries(buildstate->accum.ginstate, attnum,
value, isNull,
&nentries, &categories);
MemoryContextSwitchTo(oldCtx);
ginInsertBAEntries(&buildstate->accum, heapptr, attnum,
entries, categories, nentries);
buildstate->indtuples += nentries;
MemoryContextReset(buildstate->funcCtx);
}
static void
ginBuildCallback(Relation index, HeapTuple htup, Datum *values,
bool *isnull, bool tupleIsAlive, void *state)
{
GinBuildState *buildstate = (GinBuildState *) state;
MemoryContext oldCtx;
int i;
oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx);
for (i = 0; i < buildstate->ginstate.origTupdesc->natts; i++)
ginHeapTupleBulkInsert(buildstate, (OffsetNumber) (i + 1),
values[i], isnull[i],
&htup->t_self);
/* If we've maxed out our available memory, dump everything to the index */
if (buildstate->accum.allocatedMemory >= maintenance_work_mem * 1024L)
{
ItemPointerData *list;
Datum key;
GinNullCategory category;
uint32 nlist;
OffsetNumber attnum;
ginBeginBAScan(&buildstate->accum);
while ((list = ginGetBAEntry(&buildstate->accum,
&attnum, &key, &category, &nlist)) != NULL)
{
/* there could be many entries, so be willing to abort here */
CHECK_FOR_INTERRUPTS();
ginEntryInsert(&buildstate->ginstate, attnum, key, category,
list, nlist, &buildstate->buildStats);
}
MemoryContextReset(buildstate->tmpCtx);
ginInitBA(&buildstate->accum);
}
MemoryContextSwitchTo(oldCtx);
}
Datum
ginbuild(PG_FUNCTION_ARGS)
{
Relation heap = (Relation) PG_GETARG_POINTER(0);
Relation index = (Relation) PG_GETARG_POINTER(1);
IndexInfo *indexInfo = (IndexInfo *) PG_GETARG_POINTER(2);
IndexBuildResult *result;
double reltuples;
GinBuildState buildstate;
Buffer RootBuffer,
MetaBuffer;
ItemPointerData *list;
Datum key;
GinNullCategory category;
uint32 nlist;
MemoryContext oldCtx;
OffsetNumber attnum;
if (RelationGetNumberOfBlocks(index) != 0)
elog(ERROR, "index \"%s\" already contains data",
RelationGetRelationName(index));
initGinState(&buildstate.ginstate, index);
buildstate.indtuples = 0;
memset(&buildstate.buildStats, 0, sizeof(GinStatsData));
/* initialize the meta page */
MetaBuffer = GinNewBuffer(index);
/* initialize the root page */
RootBuffer = GinNewBuffer(index);
START_CRIT_SECTION();
GinInitMetabuffer(MetaBuffer);
MarkBufferDirty(MetaBuffer);
GinInitBuffer(RootBuffer, GIN_LEAF);
MarkBufferDirty(RootBuffer);
if (RelationNeedsWAL(index))
{
XLogRecPtr recptr;
XLogRecData rdata;
Page page;
rdata.buffer = InvalidBuffer;
rdata.data = (char *) &(index->rd_node);
rdata.len = sizeof(RelFileNode);
rdata.next = NULL;
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_INDEX, &rdata);
page = BufferGetPage(RootBuffer);
PageSetLSN(page, recptr);
PageSetTLI(page, ThisTimeLineID);
page = BufferGetPage(MetaBuffer);
PageSetLSN(page, recptr);
PageSetTLI(page, ThisTimeLineID);
}
UnlockReleaseBuffer(MetaBuffer);
UnlockReleaseBuffer(RootBuffer);
END_CRIT_SECTION();
/* count the root as first entry page */
buildstate.buildStats.nEntryPages++;
/*
* create a temporary memory context that is reset once for each tuple
* inserted into the index
*/
buildstate.tmpCtx = AllocSetContextCreate(CurrentMemoryContext,
"Gin build temporary context",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
buildstate.funcCtx = AllocSetContextCreate(buildstate.tmpCtx,
"Gin build temporary context for user-defined function",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
buildstate.accum.ginstate = &buildstate.ginstate;
ginInitBA(&buildstate.accum);
/*
* Do the heap scan. We disallow sync scan here because dataPlaceToPage
* prefers to receive tuples in TID order.
*/
reltuples = IndexBuildHeapScan(heap, index, indexInfo, false,
ginBuildCallback, (void *) &buildstate);
/* dump remaining entries to the index */
oldCtx = MemoryContextSwitchTo(buildstate.tmpCtx);
ginBeginBAScan(&buildstate.accum);
while ((list = ginGetBAEntry(&buildstate.accum,
&attnum, &key, &category, &nlist)) != NULL)
{
/* there could be many entries, so be willing to abort here */
CHECK_FOR_INTERRUPTS();
ginEntryInsert(&buildstate.ginstate, attnum, key, category,
list, nlist, &buildstate.buildStats);
}
MemoryContextSwitchTo(oldCtx);
MemoryContextDelete(buildstate.tmpCtx);
/*
* Update metapage stats
*/
buildstate.buildStats.nTotalPages = RelationGetNumberOfBlocks(index);
ginUpdateStats(index, &buildstate.buildStats);
/*
* Return statistics
*/
result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
result->heap_tuples = reltuples;
result->index_tuples = buildstate.indtuples;
PG_RETURN_POINTER(result);
}
/*
* ginbuildempty() -- build an empty gin index in the initialization fork
*/
Datum
ginbuildempty(PG_FUNCTION_ARGS)
{
Relation index = (Relation) PG_GETARG_POINTER(0);
Buffer RootBuffer,
MetaBuffer;
/* An empty GIN index has two pages. */
MetaBuffer =
ReadBufferExtended(index, INIT_FORKNUM, P_NEW, RBM_NORMAL, NULL);
LockBuffer(MetaBuffer, BUFFER_LOCK_EXCLUSIVE);
RootBuffer =
ReadBufferExtended(index, INIT_FORKNUM, P_NEW, RBM_NORMAL, NULL);
LockBuffer(RootBuffer, BUFFER_LOCK_EXCLUSIVE);
/* Initialize both pages, mark them dirty, unlock and release buffer. */
START_CRIT_SECTION();
GinInitMetabuffer(MetaBuffer);
MarkBufferDirty(MetaBuffer);
GinInitBuffer(RootBuffer, GIN_LEAF);
MarkBufferDirty(RootBuffer);
/* XLOG the new pages */
log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
BufferGetBlockNumber(MetaBuffer),
BufferGetPage(MetaBuffer));
log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
BufferGetBlockNumber(RootBuffer),
BufferGetPage(RootBuffer));
END_CRIT_SECTION();
/* Unlock and release the buffers. */
UnlockReleaseBuffer(MetaBuffer);
UnlockReleaseBuffer(RootBuffer);
PG_RETURN_VOID();
}
/*
* Insert index entries for a single indexable item during "normal"
* (non-fast-update) insertion
*/
static void
ginHeapTupleInsert(GinState *ginstate, OffsetNumber attnum,
Datum value, bool isNull,
ItemPointer item)
{
Datum *entries;
GinNullCategory *categories;
int32 i,
nentries;
entries = ginExtractEntries(ginstate, attnum, value, isNull,
&nentries, &categories);
for (i = 0; i < nentries; i++)
ginEntryInsert(ginstate, attnum, entries[i], categories[i],
item, 1, NULL);
}
Datum
gininsert(PG_FUNCTION_ARGS)
{
Relation index = (Relation) PG_GETARG_POINTER(0);
Datum *values = (Datum *) PG_GETARG_POINTER(1);
bool *isnull = (bool *) PG_GETARG_POINTER(2);
ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3);
#ifdef NOT_USED
Relation heapRel = (Relation) PG_GETARG_POINTER(4);
IndexUniqueCheck checkUnique = (IndexUniqueCheck) PG_GETARG_INT32(5);
#endif
GinState ginstate;
MemoryContext oldCtx;
MemoryContext insertCtx;
int i;
insertCtx = AllocSetContextCreate(CurrentMemoryContext,
"Gin insert temporary context",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
oldCtx = MemoryContextSwitchTo(insertCtx);
initGinState(&ginstate, index);
if (GinGetUseFastUpdate(index))
{
GinTupleCollector collector;
memset(&collector, 0, sizeof(GinTupleCollector));
for (i = 0; i < ginstate.origTupdesc->natts; i++)
ginHeapTupleFastCollect(&ginstate, &collector,
(OffsetNumber) (i + 1),
values[i], isnull[i],
ht_ctid);
ginHeapTupleFastInsert(&ginstate, &collector);
}
else
{
for (i = 0; i < ginstate.origTupdesc->natts; i++)
ginHeapTupleInsert(&ginstate, (OffsetNumber) (i + 1),
values[i], isnull[i],
ht_ctid);
}
MemoryContextSwitchTo(oldCtx);
MemoryContextDelete(insertCtx);
PG_RETURN_BOOL(false);
}