1739 lines
43 KiB
C
1739 lines
43 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* gist.c
|
|
* interface routines for the postgres GiST index access method.
|
|
*
|
|
*
|
|
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
* IDENTIFICATION
|
|
* $PostgreSQL: pgsql/src/backend/access/gist/gist.c,v 1.118 2005/06/06 17:01:21 tgl Exp $
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
#include "postgres.h"
|
|
|
|
#include "access/genam.h"
|
|
#include "access/gist_private.h"
|
|
#include "access/gistscan.h"
|
|
#include "access/heapam.h"
|
|
#include "catalog/index.h"
|
|
#include "commands/vacuum.h"
|
|
#include "miscadmin.h"
|
|
#include "utils/memutils.h"
|
|
|
|
|
|
#undef GIST_PAGEADDITEM
|
|
|
|
#define ATTSIZE(datum, tupdesc, i, isnull) \
|
|
( \
|
|
(isnull) ? 0 : \
|
|
att_addlength(0, (tupdesc)->attrs[(i)-1]->attlen, (datum)) \
|
|
)
|
|
|
|
/* result's status */
|
|
#define INSERTED 0x01
|
|
#define SPLITED 0x02
|
|
|
|
/* group flags ( in gistSplit ) */
|
|
#define LEFT_ADDED 0x01
|
|
#define RIGHT_ADDED 0x02
|
|
#define BOTH_ADDED ( LEFT_ADDED | RIGHT_ADDED )
|
|
|
|
/*
|
|
* This defines only for shorter code, used in gistgetadjusted
|
|
* and gistadjsubkey only
|
|
*/
|
|
#define FILLITEM(evp, isnullkey, okey, okeyb, rkey, rkeyb) do { \
|
|
if (isnullkey) { \
|
|
gistentryinit((evp), rkey, r, NULL, \
|
|
(OffsetNumber) 0, rkeyb, FALSE); \
|
|
} else { \
|
|
gistentryinit((evp), okey, r, NULL, \
|
|
(OffsetNumber) 0, okeyb, FALSE); \
|
|
} \
|
|
} while(0)
|
|
|
|
#define FILLEV(isnull1, key1, key1b, isnull2, key2, key2b) do { \
|
|
FILLITEM(*ev0p, isnull1, key1, key1b, key2, key2b); \
|
|
FILLITEM(*ev1p, isnull2, key2, key2b, key1, key1b); \
|
|
} while(0);
|
|
|
|
/* Working state for gistbuild and its callback */
|
|
typedef struct
|
|
{
|
|
GISTSTATE giststate;
|
|
int numindexattrs;
|
|
double indtuples;
|
|
MemoryContext tmpCxt;
|
|
} GISTBuildState;
|
|
|
|
|
|
/* non-export function prototypes */
|
|
static void gistbuildCallback(Relation index,
|
|
HeapTuple htup,
|
|
Datum *values,
|
|
bool *isnull,
|
|
bool tupleIsAlive,
|
|
void *state);
|
|
static void gistdoinsert(Relation r,
|
|
IndexTuple itup,
|
|
GISTSTATE *GISTstate);
|
|
static int gistlayerinsert(Relation r, BlockNumber blkno,
|
|
IndexTuple **itup,
|
|
int *len,
|
|
GISTSTATE *giststate);
|
|
static OffsetNumber gistwritebuffer(Relation r,
|
|
Page page,
|
|
IndexTuple *itup,
|
|
int len,
|
|
OffsetNumber off);
|
|
static bool gistnospace(Page page, IndexTuple *itvec, int len);
|
|
static IndexTuple *gistreadbuffer(Buffer buffer, int *len);
|
|
static IndexTuple *gistjoinvector(
|
|
IndexTuple *itvec, int *len,
|
|
IndexTuple *additvec, int addlen);
|
|
static IndexTuple gistunion(Relation r, IndexTuple *itvec,
|
|
int len, GISTSTATE *giststate);
|
|
|
|
static IndexTuple gistgetadjusted(Relation r,
|
|
IndexTuple oldtup,
|
|
IndexTuple addtup,
|
|
GISTSTATE *giststate);
|
|
static int gistfindgroup(GISTSTATE *giststate,
|
|
GISTENTRY *valvec, GIST_SPLITVEC *spl);
|
|
static void gistadjsubkey(Relation r,
|
|
IndexTuple *itup, int *len,
|
|
GIST_SPLITVEC *v,
|
|
GISTSTATE *giststate);
|
|
static IndexTuple gistFormTuple(GISTSTATE *giststate,
|
|
Relation r, Datum *attdata, int *datumsize, bool *isnull);
|
|
static IndexTuple *gistSplit(Relation r,
|
|
Buffer buffer,
|
|
IndexTuple *itup,
|
|
int *len,
|
|
GISTSTATE *giststate);
|
|
static void gistnewroot(Relation r, IndexTuple *itup, int len);
|
|
static void GISTInitBuffer(Buffer b, uint32 f);
|
|
static OffsetNumber gistchoose(Relation r, Page p,
|
|
IndexTuple it,
|
|
GISTSTATE *giststate);
|
|
static void gistdelete(Relation r, ItemPointer tid);
|
|
|
|
#ifdef GIST_PAGEADDITEM
|
|
static IndexTuple gist_tuple_replacekey(Relation r,
|
|
GISTENTRY entry, IndexTuple t);
|
|
#endif
|
|
static void gistcentryinit(GISTSTATE *giststate, int nkey,
|
|
GISTENTRY *e, Datum k,
|
|
Relation r, Page pg,
|
|
OffsetNumber o, int b, bool l, bool isNull);
|
|
static void gistDeCompressAtt(GISTSTATE *giststate, Relation r,
|
|
IndexTuple tuple, Page p, OffsetNumber o,
|
|
GISTENTRY *attdata, bool *isnull);
|
|
static void gistpenalty(GISTSTATE *giststate, int attno,
|
|
GISTENTRY *key1, bool isNull1,
|
|
GISTENTRY *key2, bool isNull2,
|
|
float *penalty);
|
|
|
|
#undef GISTDEBUG
|
|
|
|
#ifdef GISTDEBUG
|
|
static void gist_dumptree(Relation r, int level, BlockNumber blk, OffsetNumber coff);
|
|
#endif
|
|
|
|
/*
|
|
* Create and return a temporary memory context for use by GiST. We
|
|
* _always_ invoke user-provided methods in a temporary memory
|
|
* context, so that memory leaks in those functions cannot cause
|
|
* problems. Also, we use some additional temporary contexts in the
|
|
* GiST code itself, to avoid the need to do some awkward manual
|
|
* memory management.
|
|
*/
|
|
MemoryContext
|
|
createTempGistContext(void)
|
|
{
|
|
return AllocSetContextCreate(CurrentMemoryContext,
|
|
"GiST temporary context",
|
|
ALLOCSET_DEFAULT_MINSIZE,
|
|
ALLOCSET_DEFAULT_INITSIZE,
|
|
ALLOCSET_DEFAULT_MAXSIZE);
|
|
}
|
|
|
|
/*
|
|
* Routine to build an index. Basically calls insert over and over.
|
|
*
|
|
* XXX: it would be nice to implement some sort of bulk-loading
|
|
* algorithm, but it is not clear how to do that.
|
|
*/
|
|
Datum
|
|
gistbuild(PG_FUNCTION_ARGS)
|
|
{
|
|
Relation heap = (Relation) PG_GETARG_POINTER(0);
|
|
Relation index = (Relation) PG_GETARG_POINTER(1);
|
|
IndexInfo *indexInfo = (IndexInfo *) PG_GETARG_POINTER(2);
|
|
double reltuples;
|
|
GISTBuildState buildstate;
|
|
Buffer buffer;
|
|
|
|
/*
|
|
* We expect to be called exactly once for any index relation. If
|
|
* that's not the case, big trouble's what we have.
|
|
*/
|
|
if (RelationGetNumberOfBlocks(index) != 0)
|
|
elog(ERROR, "index \"%s\" already contains data",
|
|
RelationGetRelationName(index));
|
|
|
|
/* no locking is needed */
|
|
initGISTstate(&buildstate.giststate, index);
|
|
|
|
/* initialize the root page */
|
|
buffer = ReadBuffer(index, P_NEW);
|
|
GISTInitBuffer(buffer, F_LEAF);
|
|
WriteBuffer(buffer);
|
|
|
|
/* build the index */
|
|
buildstate.numindexattrs = indexInfo->ii_NumIndexAttrs;
|
|
buildstate.indtuples = 0;
|
|
/*
|
|
* create a temporary memory context that is reset once for each
|
|
* tuple inserted into the index
|
|
*/
|
|
buildstate.tmpCxt = createTempGistContext();
|
|
|
|
/* do the heap scan */
|
|
reltuples = IndexBuildHeapScan(heap, index, indexInfo,
|
|
gistbuildCallback, (void *) &buildstate);
|
|
|
|
/* okay, all heap tuples are indexed */
|
|
MemoryContextDelete(buildstate.tmpCxt);
|
|
|
|
/* since we just counted the # of tuples, may as well update stats */
|
|
IndexCloseAndUpdateStats(heap, reltuples, index, buildstate.indtuples);
|
|
|
|
freeGISTstate(&buildstate.giststate);
|
|
|
|
#ifdef GISTDEBUG
|
|
gist_dumptree(index, 0, GIST_ROOT_BLKNO, 0);
|
|
#endif
|
|
PG_RETURN_VOID();
|
|
}
|
|
|
|
/*
|
|
* Per-tuple callback from IndexBuildHeapScan
|
|
*/
|
|
static void
|
|
gistbuildCallback(Relation index,
|
|
HeapTuple htup,
|
|
Datum *values,
|
|
bool *isnull,
|
|
bool tupleIsAlive,
|
|
void *state)
|
|
{
|
|
GISTBuildState *buildstate = (GISTBuildState *) state;
|
|
IndexTuple itup;
|
|
GISTENTRY tmpcentry;
|
|
int i;
|
|
MemoryContext oldCxt;
|
|
|
|
/* GiST cannot index tuples with leading NULLs */
|
|
if (isnull[0])
|
|
return;
|
|
|
|
oldCxt = MemoryContextSwitchTo(buildstate->tmpCxt);
|
|
|
|
/* immediately compress keys to normalize */
|
|
for (i = 0; i < buildstate->numindexattrs; i++)
|
|
{
|
|
if (isnull[i])
|
|
values[i] = (Datum) 0;
|
|
else
|
|
{
|
|
gistcentryinit(&buildstate->giststate, i, &tmpcentry, values[i],
|
|
NULL, NULL, (OffsetNumber) 0,
|
|
-1 /* size is currently bogus */, TRUE, FALSE);
|
|
values[i] = tmpcentry.key;
|
|
}
|
|
}
|
|
|
|
/* form an index tuple and point it at the heap tuple */
|
|
itup = index_form_tuple(buildstate->giststate.tupdesc, values, isnull);
|
|
itup->t_tid = htup->t_self;
|
|
|
|
/*
|
|
* Since we already have the index relation locked, we call
|
|
* gistdoinsert directly. Normal access method calls dispatch through
|
|
* gistinsert, which locks the relation for write. This is the right
|
|
* thing to do if you're inserting single tups, but not when you're
|
|
* initializing the whole index at once.
|
|
*/
|
|
gistdoinsert(index, itup, &buildstate->giststate);
|
|
|
|
buildstate->indtuples += 1;
|
|
MemoryContextSwitchTo(oldCxt);
|
|
MemoryContextReset(buildstate->tmpCxt);
|
|
}
|
|
|
|
/*
|
|
* gistinsert -- wrapper for GiST tuple insertion.
|
|
*
|
|
* This is the public interface routine for tuple insertion in GiSTs.
|
|
* It doesn't do any work; just locks the relation and passes the buck.
|
|
*/
|
|
Datum
|
|
gistinsert(PG_FUNCTION_ARGS)
|
|
{
|
|
Relation r = (Relation) PG_GETARG_POINTER(0);
|
|
Datum *values = (Datum *) PG_GETARG_POINTER(1);
|
|
bool *isnull = (bool *) PG_GETARG_POINTER(2);
|
|
ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3);
|
|
#ifdef NOT_USED
|
|
Relation heapRel = (Relation) PG_GETARG_POINTER(4);
|
|
bool checkUnique = PG_GETARG_BOOL(5);
|
|
#endif
|
|
IndexTuple itup;
|
|
GISTSTATE giststate;
|
|
GISTENTRY tmpentry;
|
|
int i;
|
|
MemoryContext oldCxt;
|
|
MemoryContext insertCxt;
|
|
|
|
/*
|
|
* Since GIST is not marked "amconcurrent" in pg_am, caller should
|
|
* have acquired exclusive lock on index relation. We need no locking
|
|
* here.
|
|
*/
|
|
|
|
/* GiST cannot index tuples with leading NULLs */
|
|
if (isnull[0])
|
|
PG_RETURN_BOOL(false);
|
|
|
|
insertCxt = createTempGistContext();
|
|
oldCxt = MemoryContextSwitchTo(insertCxt);
|
|
|
|
initGISTstate(&giststate, r);
|
|
|
|
/* immediately compress keys to normalize */
|
|
for (i = 0; i < r->rd_att->natts; i++)
|
|
{
|
|
if (isnull[i])
|
|
values[i] = (Datum) 0;
|
|
else
|
|
{
|
|
gistcentryinit(&giststate, i, &tmpentry, values[i],
|
|
NULL, NULL, (OffsetNumber) 0,
|
|
-1 /* size is currently bogus */, TRUE, FALSE);
|
|
values[i] = tmpentry.key;
|
|
}
|
|
}
|
|
itup = index_form_tuple(giststate.tupdesc, values, isnull);
|
|
itup->t_tid = *ht_ctid;
|
|
|
|
gistdoinsert(r, itup, &giststate);
|
|
|
|
/* cleanup */
|
|
freeGISTstate(&giststate);
|
|
MemoryContextSwitchTo(oldCxt);
|
|
MemoryContextDelete(insertCxt);
|
|
|
|
PG_RETURN_BOOL(true);
|
|
}
|
|
|
|
#ifdef GIST_PAGEADDITEM
|
|
/*
|
|
* Take a compressed entry, and install it on a page. Since we now know
|
|
* where the entry will live, we decompress it and recompress it using
|
|
* that knowledge (some compression routines may want to fish around
|
|
* on the page, for example, or do something special for leaf nodes.)
|
|
*/
|
|
static OffsetNumber
|
|
gistPageAddItem(GISTSTATE *giststate,
|
|
Relation r,
|
|
Page page,
|
|
Item item,
|
|
Size size,
|
|
OffsetNumber offsetNumber,
|
|
ItemIdFlags flags,
|
|
GISTENTRY *dentry,
|
|
IndexTuple *newtup)
|
|
{
|
|
GISTENTRY tmpcentry;
|
|
IndexTuple itup = (IndexTuple) item;
|
|
OffsetNumber retval;
|
|
Datum datum;
|
|
bool IsNull;
|
|
|
|
/*
|
|
* recompress the item given that we now know the exact page and
|
|
* offset for insertion
|
|
*/
|
|
datum = index_getattr(itup, 1, r->rd_att, &IsNull);
|
|
gistdentryinit(giststate, 0, dentry, datum,
|
|
(Relation) 0, (Page) 0,
|
|
(OffsetNumber) InvalidOffsetNumber,
|
|
ATTSIZE(datum, r, 1, IsNull),
|
|
FALSE, IsNull);
|
|
gistcentryinit(giststate, 0, &tmpcentry, dentry->key, r, page,
|
|
offsetNumber, dentry->bytes, FALSE);
|
|
*newtup = gist_tuple_replacekey(r, tmpcentry, itup);
|
|
retval = PageAddItem(page, (Item) *newtup, IndexTupleSize(*newtup),
|
|
offsetNumber, flags);
|
|
if (retval == InvalidOffsetNumber)
|
|
elog(ERROR, "failed to add index item to \"%s\"",
|
|
RelationGetRelationName(r));
|
|
return retval;
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
* Workhouse routine for doing insertion into a GiST index. Note that
|
|
* this routine assumes it is invoked in a short-lived memory context,
|
|
* so it does not bother releasing palloc'd allocations.
|
|
*/
|
|
static void
|
|
gistdoinsert(Relation r, IndexTuple itup, GISTSTATE *giststate)
|
|
{
|
|
IndexTuple *instup;
|
|
int ret,
|
|
len = 1;
|
|
|
|
instup = (IndexTuple *) palloc(sizeof(IndexTuple));
|
|
instup[0] = (IndexTuple) palloc(IndexTupleSize(itup));
|
|
memcpy(instup[0], itup, IndexTupleSize(itup));
|
|
|
|
ret = gistlayerinsert(r, GIST_ROOT_BLKNO, &instup, &len, giststate);
|
|
if (ret & SPLITED)
|
|
gistnewroot(r, instup, len);
|
|
}
|
|
|
|
static int
|
|
gistlayerinsert(Relation r, BlockNumber blkno,
|
|
IndexTuple **itup, /* in - out, has compressed entry */
|
|
int *len, /* in - out */
|
|
GISTSTATE *giststate)
|
|
{
|
|
Buffer buffer;
|
|
Page page;
|
|
int ret;
|
|
GISTPageOpaque opaque;
|
|
|
|
buffer = ReadBuffer(r, blkno);
|
|
page = (Page) BufferGetPage(buffer);
|
|
opaque = (GISTPageOpaque) PageGetSpecialPointer(page);
|
|
|
|
if (!(opaque->flags & F_LEAF))
|
|
{
|
|
/*
|
|
* This is an internal page, so continue to walk down the
|
|
* tree. We find the child node that has the minimum insertion
|
|
* penalty and recursively invoke ourselves to modify that
|
|
* node. Once the recursive call returns, we may need to
|
|
* adjust the parent node for two reasons: the child node
|
|
* split, or the key in this node needs to be adjusted for the
|
|
* newly inserted key below us.
|
|
*/
|
|
ItemId iid;
|
|
BlockNumber nblkno;
|
|
ItemPointerData oldtid;
|
|
IndexTuple oldtup;
|
|
OffsetNumber child;
|
|
|
|
child = gistchoose(r, page, *(*itup), giststate);
|
|
iid = PageGetItemId(page, child);
|
|
oldtup = (IndexTuple) PageGetItem(page, iid);
|
|
nblkno = ItemPointerGetBlockNumber(&(oldtup->t_tid));
|
|
|
|
/*
|
|
* After this call: 1. if child page was splited, then itup
|
|
* contains keys for each page 2. if child page wasn't splited,
|
|
* then itup contains additional for adjustment of current key
|
|
*/
|
|
ret = gistlayerinsert(r, nblkno, itup, len, giststate);
|
|
|
|
/* nothing inserted in child */
|
|
if (!(ret & INSERTED))
|
|
{
|
|
ReleaseBuffer(buffer);
|
|
return 0x00;
|
|
}
|
|
|
|
/* child did not split */
|
|
if (!(ret & SPLITED))
|
|
{
|
|
IndexTuple newtup = gistgetadjusted(r, oldtup, (*itup)[0], giststate);
|
|
|
|
if (!newtup)
|
|
{
|
|
/* not need to update key */
|
|
ReleaseBuffer(buffer);
|
|
return 0x00;
|
|
}
|
|
|
|
(*itup)[0] = newtup;
|
|
}
|
|
|
|
/*
|
|
* This node's key has been modified, either because a child
|
|
* split occurred or because we needed to adjust our key for
|
|
* an insert in a child node. Therefore, remove the old
|
|
* version of this node's key.
|
|
*/
|
|
ItemPointerSet(&oldtid, blkno, child);
|
|
gistdelete(r, &oldtid);
|
|
|
|
/*
|
|
* if child was splitted, new key for child will be inserted in
|
|
* the end list of child, so we must say to any scans that page is
|
|
* changed beginning from 'child' offset
|
|
*/
|
|
if (ret & SPLITED)
|
|
gistadjscans(r, GISTOP_SPLIT, blkno, child);
|
|
}
|
|
|
|
ret = INSERTED;
|
|
|
|
if (gistnospace(page, (*itup), *len))
|
|
{
|
|
/* no space for insertion */
|
|
IndexTuple *itvec,
|
|
*newitup;
|
|
int tlen,
|
|
oldlen;
|
|
|
|
ret |= SPLITED;
|
|
itvec = gistreadbuffer(buffer, &tlen);
|
|
itvec = gistjoinvector(itvec, &tlen, (*itup), *len);
|
|
oldlen = *len;
|
|
newitup = gistSplit(r, buffer, itvec, &tlen, giststate);
|
|
ReleaseBuffer(buffer);
|
|
*itup = newitup;
|
|
*len = tlen; /* now tlen >= 2 */
|
|
}
|
|
else
|
|
{
|
|
/* enough space */
|
|
OffsetNumber off,
|
|
l;
|
|
|
|
off = (PageIsEmpty(page)) ?
|
|
FirstOffsetNumber
|
|
:
|
|
OffsetNumberNext(PageGetMaxOffsetNumber(page));
|
|
l = gistwritebuffer(r, page, *itup, *len, off);
|
|
WriteBuffer(buffer);
|
|
|
|
if (*len > 1)
|
|
{ /* previous insert ret & SPLITED != 0 */
|
|
/*
|
|
* child was splited, so we must form union for insertion in
|
|
* parent
|
|
*/
|
|
IndexTuple newtup = gistunion(r, (*itup), *len, giststate);
|
|
ItemPointerSet(&(newtup->t_tid), blkno, 1);
|
|
(*itup)[0] = newtup;
|
|
*len = 1;
|
|
}
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* Write itup vector to page, has no control of free space
|
|
*/
|
|
static OffsetNumber
|
|
gistwritebuffer(Relation r, Page page, IndexTuple *itup,
|
|
int len, OffsetNumber off)
|
|
{
|
|
OffsetNumber l = InvalidOffsetNumber;
|
|
int i;
|
|
|
|
for (i = 0; i < len; i++)
|
|
{
|
|
#ifdef GIST_PAGEADDITEM
|
|
GISTENTRY tmpdentry;
|
|
IndexTuple newtup;
|
|
bool IsNull;
|
|
|
|
l = gistPageAddItem(giststate, r, page,
|
|
(Item) itup[i], IndexTupleSize(itup[i]),
|
|
off, LP_USED, &tmpdentry, &newtup);
|
|
off = OffsetNumberNext(off);
|
|
#else
|
|
l = PageAddItem(page, (Item) itup[i], IndexTupleSize(itup[i]),
|
|
off, LP_USED);
|
|
if (l == InvalidOffsetNumber)
|
|
elog(ERROR, "failed to add index item to \"%s\"",
|
|
RelationGetRelationName(r));
|
|
#endif
|
|
}
|
|
return l;
|
|
}
|
|
|
|
/*
|
|
* Check space for itup vector on page
|
|
*/
|
|
static bool
|
|
gistnospace(Page page, IndexTuple *itvec, int len)
|
|
{
|
|
unsigned int size = 0;
|
|
int i;
|
|
|
|
for (i = 0; i < len; i++)
|
|
size += IndexTupleSize(itvec[i]) + sizeof(ItemIdData);
|
|
|
|
return (PageGetFreeSpace(page) < size);
|
|
}
|
|
|
|
/*
|
|
* Read buffer into itup vector
|
|
*/
|
|
static IndexTuple *
|
|
gistreadbuffer(Buffer buffer, int *len /* out */ )
|
|
{
|
|
OffsetNumber i,
|
|
maxoff;
|
|
IndexTuple *itvec;
|
|
Page p = (Page) BufferGetPage(buffer);
|
|
|
|
maxoff = PageGetMaxOffsetNumber(p);
|
|
*len = maxoff;
|
|
itvec = palloc(sizeof(IndexTuple) * maxoff);
|
|
for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
|
|
itvec[i - 1] = (IndexTuple) PageGetItem(p, PageGetItemId(p, i));
|
|
|
|
return itvec;
|
|
}
|
|
|
|
/*
|
|
* join two vectors into one
|
|
*/
|
|
static IndexTuple *
|
|
gistjoinvector(IndexTuple *itvec, int *len, IndexTuple *additvec, int addlen)
|
|
{
|
|
itvec = (IndexTuple *) repalloc((void *) itvec, sizeof(IndexTuple) * ((*len) + addlen));
|
|
memmove(&itvec[*len], additvec, sizeof(IndexTuple) * addlen);
|
|
*len += addlen;
|
|
return itvec;
|
|
}
|
|
|
|
/*
|
|
* Return an IndexTuple containing the result of applying the "union"
|
|
* method to the specified IndexTuple vector.
|
|
*/
|
|
static IndexTuple
|
|
gistunion(Relation r, IndexTuple *itvec, int len, GISTSTATE *giststate)
|
|
{
|
|
Datum attr[INDEX_MAX_KEYS];
|
|
bool isnull[INDEX_MAX_KEYS];
|
|
GistEntryVector *evec;
|
|
int i;
|
|
GISTENTRY centry[INDEX_MAX_KEYS];
|
|
|
|
evec = (GistEntryVector *) palloc(((len == 1) ? 2 : len) * sizeof(GISTENTRY) + GEVHDRSZ);
|
|
|
|
for (i = 0; i < r->rd_att->natts; i++)
|
|
{
|
|
Datum datum;
|
|
int j;
|
|
int real_len;
|
|
|
|
real_len = 0;
|
|
for (j = 0; j < len; j++)
|
|
{
|
|
bool IsNull;
|
|
datum = index_getattr(itvec[j], i + 1, giststate->tupdesc, &IsNull);
|
|
if (IsNull)
|
|
continue;
|
|
|
|
gistdentryinit(giststate, i,
|
|
&(evec->vector[real_len]),
|
|
datum,
|
|
NULL, NULL, (OffsetNumber) 0,
|
|
ATTSIZE(datum, giststate->tupdesc, i + 1, IsNull),
|
|
FALSE, IsNull);
|
|
real_len++;
|
|
}
|
|
|
|
/* If this tuple vector was all NULLs, the union is NULL */
|
|
if (real_len == 0)
|
|
{
|
|
attr[i] = (Datum) 0;
|
|
isnull[i] = TRUE;
|
|
}
|
|
else
|
|
{
|
|
int datumsize;
|
|
|
|
if (real_len == 1)
|
|
{
|
|
evec->n = 2;
|
|
gistentryinit(evec->vector[1],
|
|
evec->vector[0].key, r, NULL,
|
|
(OffsetNumber) 0, evec->vector[0].bytes, FALSE);
|
|
}
|
|
else
|
|
evec->n = real_len;
|
|
|
|
/* Compress the result of the union and store in attr array */
|
|
datum = FunctionCall2(&giststate->unionFn[i],
|
|
PointerGetDatum(evec),
|
|
PointerGetDatum(&datumsize));
|
|
|
|
gistcentryinit(giststate, i, ¢ry[i], datum,
|
|
NULL, NULL, (OffsetNumber) 0,
|
|
datumsize, FALSE, FALSE);
|
|
isnull[i] = FALSE;
|
|
attr[i] = centry[i].key;
|
|
}
|
|
}
|
|
|
|
return index_form_tuple(giststate->tupdesc, attr, isnull);
|
|
}
|
|
|
|
|
|
/*
|
|
* Forms union of oldtup and addtup, if union == oldtup then return NULL
|
|
*/
|
|
static IndexTuple
|
|
gistgetadjusted(Relation r, IndexTuple oldtup, IndexTuple addtup, GISTSTATE *giststate)
|
|
{
|
|
GistEntryVector *evec;
|
|
bool neednew = false;
|
|
bool isnull[INDEX_MAX_KEYS];
|
|
Datum attr[INDEX_MAX_KEYS];
|
|
GISTENTRY centry[INDEX_MAX_KEYS],
|
|
oldatt[INDEX_MAX_KEYS],
|
|
addatt[INDEX_MAX_KEYS],
|
|
*ev0p,
|
|
*ev1p;
|
|
bool oldisnull[INDEX_MAX_KEYS],
|
|
addisnull[INDEX_MAX_KEYS];
|
|
IndexTuple newtup = NULL;
|
|
int i;
|
|
|
|
evec = palloc(2 * sizeof(GISTENTRY) + GEVHDRSZ);
|
|
evec->n = 2;
|
|
ev0p = &(evec->vector[0]);
|
|
ev1p = &(evec->vector[1]);
|
|
|
|
gistDeCompressAtt(giststate, r, oldtup, NULL,
|
|
(OffsetNumber) 0, oldatt, oldisnull);
|
|
|
|
gistDeCompressAtt(giststate, r, addtup, NULL,
|
|
(OffsetNumber) 0, addatt, addisnull);
|
|
|
|
for (i = 0; i < r->rd_att->natts; i++)
|
|
{
|
|
if (oldisnull[i] && addisnull[i])
|
|
{
|
|
attr[i] = (Datum) 0;
|
|
isnull[i] = TRUE;
|
|
}
|
|
else
|
|
{
|
|
Datum datum;
|
|
int datumsize;
|
|
|
|
FILLEV(oldisnull[i], oldatt[i].key, oldatt[i].bytes,
|
|
addisnull[i], addatt[i].key, addatt[i].bytes);
|
|
|
|
datum = FunctionCall2(&giststate->unionFn[i],
|
|
PointerGetDatum(evec),
|
|
PointerGetDatum(&datumsize));
|
|
|
|
if (oldisnull[i] || addisnull[i])
|
|
{
|
|
if (oldisnull[i])
|
|
neednew = true;
|
|
}
|
|
else
|
|
{
|
|
bool result;
|
|
|
|
FunctionCall3(&giststate->equalFn[i],
|
|
ev0p->key,
|
|
datum,
|
|
PointerGetDatum(&result));
|
|
|
|
if (!result)
|
|
neednew = true;
|
|
}
|
|
|
|
gistcentryinit(giststate, i, ¢ry[i], datum,
|
|
NULL, NULL, (OffsetNumber) 0,
|
|
datumsize, FALSE, FALSE);
|
|
|
|
attr[i] = centry[i].key;
|
|
isnull[i] = FALSE;
|
|
}
|
|
}
|
|
|
|
if (neednew)
|
|
{
|
|
/* need to update key */
|
|
newtup = index_form_tuple(giststate->tupdesc, attr, isnull);
|
|
newtup->t_tid = oldtup->t_tid;
|
|
}
|
|
|
|
return newtup;
|
|
}
|
|
|
|
static void
|
|
gistunionsubkey(Relation r, GISTSTATE *giststate, IndexTuple *itvec, GIST_SPLITVEC *spl)
|
|
{
|
|
int lr;
|
|
|
|
for (lr = 0; lr < 2; lr++)
|
|
{
|
|
OffsetNumber *entries;
|
|
int i;
|
|
Datum *attr;
|
|
int len,
|
|
*attrsize;
|
|
bool *isnull;
|
|
GistEntryVector *evec;
|
|
|
|
if (lr)
|
|
{
|
|
attrsize = spl->spl_lattrsize;
|
|
attr = spl->spl_lattr;
|
|
len = spl->spl_nleft;
|
|
entries = spl->spl_left;
|
|
isnull = spl->spl_lisnull;
|
|
}
|
|
else
|
|
{
|
|
attrsize = spl->spl_rattrsize;
|
|
attr = spl->spl_rattr;
|
|
len = spl->spl_nright;
|
|
entries = spl->spl_right;
|
|
isnull = spl->spl_risnull;
|
|
}
|
|
|
|
evec = palloc(((len == 1) ? 2 : len) * sizeof(GISTENTRY) + GEVHDRSZ);
|
|
|
|
for (i = 1; i < r->rd_att->natts; i++)
|
|
{
|
|
int j;
|
|
Datum datum;
|
|
int datumsize;
|
|
int real_len;
|
|
|
|
real_len = 0;
|
|
for (j = 0; j < len; j++)
|
|
{
|
|
bool IsNull;
|
|
|
|
if (spl->spl_idgrp[entries[j]])
|
|
continue;
|
|
datum = index_getattr(itvec[entries[j] - 1], i + 1,
|
|
giststate->tupdesc, &IsNull);
|
|
if (IsNull)
|
|
continue;
|
|
gistdentryinit(giststate, i,
|
|
&(evec->vector[real_len]),
|
|
datum,
|
|
NULL, NULL, (OffsetNumber) 0,
|
|
ATTSIZE(datum, giststate->tupdesc, i + 1, IsNull),
|
|
FALSE, IsNull);
|
|
real_len++;
|
|
|
|
}
|
|
|
|
if (real_len == 0)
|
|
{
|
|
datum = (Datum) 0;
|
|
datumsize = 0;
|
|
isnull[i] = true;
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
* evec->vector[0].bytes may be not defined, so form union
|
|
* with itself
|
|
*/
|
|
if (real_len == 1)
|
|
{
|
|
evec->n = 2;
|
|
memcpy(&(evec->vector[1]), &(evec->vector[0]),
|
|
sizeof(GISTENTRY));
|
|
}
|
|
else
|
|
evec->n = real_len;
|
|
datum = FunctionCall2(&giststate->unionFn[i],
|
|
PointerGetDatum(evec),
|
|
PointerGetDatum(&datumsize));
|
|
isnull[i] = false;
|
|
}
|
|
|
|
attr[i] = datum;
|
|
attrsize[i] = datumsize;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* find group in vector with equal value
|
|
*/
|
|
static int
|
|
gistfindgroup(GISTSTATE *giststate, GISTENTRY *valvec, GIST_SPLITVEC *spl)
|
|
{
|
|
int i;
|
|
int curid = 1;
|
|
|
|
/*
|
|
* first key is always not null (see gistinsert), so we may not check
|
|
* for nulls
|
|
*/
|
|
for (i = 0; i < spl->spl_nleft; i++)
|
|
{
|
|
int j;
|
|
int len;
|
|
bool result;
|
|
|
|
if (spl->spl_idgrp[spl->spl_left[i]])
|
|
continue;
|
|
len = 0;
|
|
/* find all equal value in right part */
|
|
for (j = 0; j < spl->spl_nright; j++)
|
|
{
|
|
if (spl->spl_idgrp[spl->spl_right[j]])
|
|
continue;
|
|
FunctionCall3(&giststate->equalFn[0],
|
|
valvec[spl->spl_left[i]].key,
|
|
valvec[spl->spl_right[j]].key,
|
|
PointerGetDatum(&result));
|
|
if (result)
|
|
{
|
|
spl->spl_idgrp[spl->spl_right[j]] = curid;
|
|
len++;
|
|
}
|
|
}
|
|
/* find all other equal value in left part */
|
|
if (len)
|
|
{
|
|
/* add current val to list of equal values */
|
|
spl->spl_idgrp[spl->spl_left[i]] = curid;
|
|
/* searching .. */
|
|
for (j = i + 1; j < spl->spl_nleft; j++)
|
|
{
|
|
if (spl->spl_idgrp[spl->spl_left[j]])
|
|
continue;
|
|
FunctionCall3(&giststate->equalFn[0],
|
|
valvec[spl->spl_left[i]].key,
|
|
valvec[spl->spl_left[j]].key,
|
|
PointerGetDatum(&result));
|
|
if (result)
|
|
{
|
|
spl->spl_idgrp[spl->spl_left[j]] = curid;
|
|
len++;
|
|
}
|
|
}
|
|
spl->spl_ngrp[curid] = len + 1;
|
|
curid++;
|
|
}
|
|
}
|
|
|
|
return curid;
|
|
}
|
|
|
|
/*
|
|
* Insert equivalent tuples to left or right page with minimum
|
|
* penalty
|
|
*/
|
|
static void
|
|
gistadjsubkey(Relation r,
|
|
IndexTuple *itup, /* contains compressed entry */
|
|
int *len,
|
|
GIST_SPLITVEC *v,
|
|
GISTSTATE *giststate)
|
|
{
|
|
int curlen;
|
|
OffsetNumber *curwpos;
|
|
GISTENTRY entry,
|
|
identry[INDEX_MAX_KEYS],
|
|
*ev0p,
|
|
*ev1p;
|
|
float lpenalty,
|
|
rpenalty;
|
|
GistEntryVector *evec;
|
|
int datumsize;
|
|
bool isnull[INDEX_MAX_KEYS];
|
|
int i,
|
|
j;
|
|
|
|
/* clear vectors */
|
|
curlen = v->spl_nleft;
|
|
curwpos = v->spl_left;
|
|
for (i = 0; i < v->spl_nleft; i++)
|
|
{
|
|
if (v->spl_idgrp[v->spl_left[i]] == 0)
|
|
{
|
|
*curwpos = v->spl_left[i];
|
|
curwpos++;
|
|
}
|
|
else
|
|
curlen--;
|
|
}
|
|
v->spl_nleft = curlen;
|
|
|
|
curlen = v->spl_nright;
|
|
curwpos = v->spl_right;
|
|
for (i = 0; i < v->spl_nright; i++)
|
|
{
|
|
if (v->spl_idgrp[v->spl_right[i]] == 0)
|
|
{
|
|
*curwpos = v->spl_right[i];
|
|
curwpos++;
|
|
}
|
|
else
|
|
curlen--;
|
|
}
|
|
v->spl_nright = curlen;
|
|
|
|
evec = palloc(2 * sizeof(GISTENTRY) + GEVHDRSZ);
|
|
evec->n = 2;
|
|
ev0p = &(evec->vector[0]);
|
|
ev1p = &(evec->vector[1]);
|
|
|
|
/* add equivalent tuple */
|
|
for (i = 0; i < *len; i++)
|
|
{
|
|
Datum datum;
|
|
|
|
if (v->spl_idgrp[i + 1] == 0) /* already inserted */
|
|
continue;
|
|
gistDeCompressAtt(giststate, r, itup[i], NULL, (OffsetNumber) 0,
|
|
identry, isnull);
|
|
|
|
v->spl_ngrp[v->spl_idgrp[i + 1]]--;
|
|
if (v->spl_ngrp[v->spl_idgrp[i + 1]] == 0 &&
|
|
(v->spl_grpflag[v->spl_idgrp[i + 1]] & BOTH_ADDED) != BOTH_ADDED)
|
|
{
|
|
/* force last in group */
|
|
rpenalty = 1.0;
|
|
lpenalty = (v->spl_grpflag[v->spl_idgrp[i + 1]] & LEFT_ADDED) ? 2.0 : 0.0;
|
|
}
|
|
else
|
|
{
|
|
/* where? */
|
|
for (j = 1; j < r->rd_att->natts; j++)
|
|
{
|
|
gistentryinit(entry, v->spl_lattr[j], r, NULL,
|
|
(OffsetNumber) 0, v->spl_lattrsize[j], FALSE);
|
|
gistpenalty(giststate, j, &entry, v->spl_lisnull[j],
|
|
&identry[j], isnull[j], &lpenalty);
|
|
|
|
gistentryinit(entry, v->spl_rattr[j], r, NULL,
|
|
(OffsetNumber) 0, v->spl_rattrsize[j], FALSE);
|
|
gistpenalty(giststate, j, &entry, v->spl_risnull[j],
|
|
&identry[j], isnull[j], &rpenalty);
|
|
|
|
if (lpenalty != rpenalty)
|
|
break;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* add
|
|
* XXX: refactor this to avoid duplicating code
|
|
*/
|
|
if (lpenalty < rpenalty)
|
|
{
|
|
v->spl_grpflag[v->spl_idgrp[i + 1]] |= LEFT_ADDED;
|
|
v->spl_left[v->spl_nleft] = i + 1;
|
|
v->spl_nleft++;
|
|
for (j = 1; j < r->rd_att->natts; j++)
|
|
{
|
|
if (isnull[j] && v->spl_lisnull[j])
|
|
{
|
|
v->spl_lattr[j] = (Datum) 0;
|
|
v->spl_lattrsize[j] = 0;
|
|
}
|
|
else
|
|
{
|
|
FILLEV(v->spl_lisnull[j], v->spl_lattr[j], v->spl_lattrsize[j],
|
|
isnull[j], identry[j].key, identry[j].bytes);
|
|
|
|
datum = FunctionCall2(&giststate->unionFn[j],
|
|
PointerGetDatum(evec),
|
|
PointerGetDatum(&datumsize));
|
|
|
|
v->spl_lattr[j] = datum;
|
|
v->spl_lattrsize[j] = datumsize;
|
|
v->spl_lisnull[j] = false;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
v->spl_grpflag[v->spl_idgrp[i + 1]] |= RIGHT_ADDED;
|
|
v->spl_right[v->spl_nright] = i + 1;
|
|
v->spl_nright++;
|
|
for (j = 1; j < r->rd_att->natts; j++)
|
|
{
|
|
if (isnull[j] && v->spl_risnull[j])
|
|
{
|
|
v->spl_rattr[j] = (Datum) 0;
|
|
v->spl_rattrsize[j] = 0;
|
|
}
|
|
else
|
|
{
|
|
FILLEV(v->spl_risnull[j], v->spl_rattr[j], v->spl_rattrsize[j],
|
|
isnull[j], identry[j].key, identry[j].bytes);
|
|
|
|
datum = FunctionCall2(&giststate->unionFn[j],
|
|
PointerGetDatum(evec),
|
|
PointerGetDatum(&datumsize));
|
|
|
|
v->spl_rattr[j] = datum;
|
|
v->spl_rattrsize[j] = datumsize;
|
|
v->spl_risnull[j] = false;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* gistSplit -- split a page in the tree.
|
|
*/
|
|
static IndexTuple *
|
|
gistSplit(Relation r,
|
|
Buffer buffer,
|
|
IndexTuple *itup, /* contains compressed entry */
|
|
int *len,
|
|
GISTSTATE *giststate)
|
|
{
|
|
Page p;
|
|
Buffer leftbuf,
|
|
rightbuf;
|
|
Page left,
|
|
right;
|
|
IndexTuple *lvectup,
|
|
*rvectup,
|
|
*newtup;
|
|
BlockNumber lbknum,
|
|
rbknum;
|
|
GISTPageOpaque opaque;
|
|
GIST_SPLITVEC v;
|
|
GistEntryVector *entryvec;
|
|
int i,
|
|
nlen;
|
|
|
|
p = (Page) BufferGetPage(buffer);
|
|
opaque = (GISTPageOpaque) PageGetSpecialPointer(p);
|
|
|
|
/*
|
|
* The root of the tree is the first block in the relation. If we're
|
|
* about to split the root, we need to do some hocus-pocus to enforce
|
|
* this guarantee.
|
|
*/
|
|
if (BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO)
|
|
{
|
|
leftbuf = ReadBuffer(r, P_NEW);
|
|
GISTInitBuffer(leftbuf, opaque->flags);
|
|
lbknum = BufferGetBlockNumber(leftbuf);
|
|
left = (Page) BufferGetPage(leftbuf);
|
|
}
|
|
else
|
|
{
|
|
leftbuf = buffer;
|
|
IncrBufferRefCount(buffer);
|
|
lbknum = BufferGetBlockNumber(buffer);
|
|
left = (Page) PageGetTempPage(p, sizeof(GISTPageOpaqueData));
|
|
}
|
|
|
|
rightbuf = ReadBuffer(r, P_NEW);
|
|
GISTInitBuffer(rightbuf, opaque->flags);
|
|
rbknum = BufferGetBlockNumber(rightbuf);
|
|
right = (Page) BufferGetPage(rightbuf);
|
|
|
|
/* generate the item array */
|
|
entryvec = palloc(GEVHDRSZ + (*len + 1) * sizeof(GISTENTRY));
|
|
entryvec->n = *len + 1;
|
|
|
|
for (i = 1; i <= *len; i++)
|
|
{
|
|
Datum datum;
|
|
bool IsNull;
|
|
|
|
datum = index_getattr(itup[i - 1], 1, giststate->tupdesc, &IsNull);
|
|
gistdentryinit(giststate, 0, &(entryvec->vector[i]),
|
|
datum, r, p, i,
|
|
ATTSIZE(datum, giststate->tupdesc, 1, IsNull),
|
|
FALSE, IsNull);
|
|
}
|
|
|
|
/*
|
|
* now let the user-defined picksplit function set up the split
|
|
* vector; in entryvec have no null value!!
|
|
*/
|
|
FunctionCall2(&giststate->picksplitFn[0],
|
|
PointerGetDatum(entryvec),
|
|
PointerGetDatum(&v));
|
|
|
|
/* compatibility with old code */
|
|
if (v.spl_left[v.spl_nleft - 1] == InvalidOffsetNumber)
|
|
v.spl_left[v.spl_nleft - 1] = (OffsetNumber) *len;
|
|
if (v.spl_right[v.spl_nright - 1] == InvalidOffsetNumber)
|
|
v.spl_right[v.spl_nright - 1] = (OffsetNumber) *len;
|
|
|
|
v.spl_lattr[0] = v.spl_ldatum;
|
|
v.spl_rattr[0] = v.spl_rdatum;
|
|
v.spl_lisnull[0] = false;
|
|
v.spl_risnull[0] = false;
|
|
|
|
/*
|
|
* if index is multikey, then we must to try get smaller bounding box
|
|
* for subkey(s)
|
|
*/
|
|
if (r->rd_att->natts > 1)
|
|
{
|
|
int MaxGrpId;
|
|
|
|
v.spl_idgrp = (int *) palloc0(sizeof(int) * (*len + 1));
|
|
v.spl_grpflag = (char *) palloc0(sizeof(char) * (*len + 1));
|
|
v.spl_ngrp = (int *) palloc(sizeof(int) * (*len + 1));
|
|
|
|
MaxGrpId = gistfindgroup(giststate, entryvec->vector, &v);
|
|
|
|
/* form union of sub keys for each page (l,p) */
|
|
gistunionsubkey(r, giststate, itup, &v);
|
|
|
|
/*
|
|
* if possible, we insert equivalent tuples with control by
|
|
* penalty for a subkey(s)
|
|
*/
|
|
if (MaxGrpId > 1)
|
|
gistadjsubkey(r, itup, len, &v, giststate);
|
|
}
|
|
|
|
/* form left and right vector */
|
|
lvectup = (IndexTuple *) palloc(sizeof(IndexTuple) * v.spl_nleft);
|
|
rvectup = (IndexTuple *) palloc(sizeof(IndexTuple) * v.spl_nright);
|
|
|
|
for (i = 0; i < v.spl_nleft; i++)
|
|
lvectup[i] = itup[v.spl_left[i] - 1];
|
|
|
|
for (i = 0; i < v.spl_nright; i++)
|
|
rvectup[i] = itup[v.spl_right[i] - 1];
|
|
|
|
|
|
/* write on disk (may need another split) */
|
|
if (gistnospace(right, rvectup, v.spl_nright))
|
|
{
|
|
nlen = v.spl_nright;
|
|
newtup = gistSplit(r, rightbuf, rvectup, &nlen, giststate);
|
|
ReleaseBuffer(rightbuf);
|
|
}
|
|
else
|
|
{
|
|
OffsetNumber l;
|
|
|
|
l = gistwritebuffer(r, right, rvectup, v.spl_nright, FirstOffsetNumber);
|
|
WriteBuffer(rightbuf);
|
|
|
|
nlen = 1;
|
|
newtup = (IndexTuple *) palloc(sizeof(IndexTuple) * 1);
|
|
newtup[0] = gistFormTuple(giststate, r, v.spl_rattr, v.spl_rattrsize, v.spl_risnull);
|
|
ItemPointerSet(&(newtup[0]->t_tid), rbknum, 1);
|
|
}
|
|
|
|
if (gistnospace(left, lvectup, v.spl_nleft))
|
|
{
|
|
int llen = v.spl_nleft;
|
|
IndexTuple *lntup;
|
|
|
|
lntup = gistSplit(r, leftbuf, lvectup, &llen, giststate);
|
|
ReleaseBuffer(leftbuf);
|
|
|
|
newtup = gistjoinvector(newtup, &nlen, lntup, llen);
|
|
}
|
|
else
|
|
{
|
|
OffsetNumber l;
|
|
|
|
l = gistwritebuffer(r, left, lvectup, v.spl_nleft, FirstOffsetNumber);
|
|
if (BufferGetBlockNumber(buffer) != GIST_ROOT_BLKNO)
|
|
PageRestoreTempPage(left, p);
|
|
|
|
WriteBuffer(leftbuf);
|
|
|
|
nlen += 1;
|
|
newtup = (IndexTuple *) repalloc(newtup, sizeof(IndexTuple) * nlen);
|
|
newtup[nlen - 1] = gistFormTuple(giststate, r, v.spl_lattr, v.spl_lattrsize, v.spl_lisnull);
|
|
ItemPointerSet(&(newtup[nlen - 1]->t_tid), lbknum, 1);
|
|
}
|
|
|
|
*len = nlen;
|
|
return newtup;
|
|
}
|
|
|
|
static void
|
|
gistnewroot(Relation r, IndexTuple *itup, int len)
|
|
{
|
|
Buffer b;
|
|
Page p;
|
|
|
|
b = ReadBuffer(r, GIST_ROOT_BLKNO);
|
|
GISTInitBuffer(b, 0);
|
|
p = BufferGetPage(b);
|
|
|
|
gistwritebuffer(r, p, itup, len, FirstOffsetNumber);
|
|
WriteBuffer(b);
|
|
}
|
|
|
|
static void
|
|
GISTInitBuffer(Buffer b, uint32 f)
|
|
{
|
|
GISTPageOpaque opaque;
|
|
Page page;
|
|
Size pageSize;
|
|
|
|
pageSize = BufferGetPageSize(b);
|
|
page = BufferGetPage(b);
|
|
PageInit(page, pageSize, sizeof(GISTPageOpaqueData));
|
|
|
|
opaque = (GISTPageOpaque) PageGetSpecialPointer(page);
|
|
opaque->flags = f;
|
|
}
|
|
|
|
/*
|
|
* find entry with lowest penalty
|
|
*/
|
|
static OffsetNumber
|
|
gistchoose(Relation r, Page p, IndexTuple it, /* it has compressed entry */
|
|
GISTSTATE *giststate)
|
|
{
|
|
OffsetNumber maxoff;
|
|
OffsetNumber i;
|
|
OffsetNumber which;
|
|
float sum_grow,
|
|
which_grow[INDEX_MAX_KEYS];
|
|
GISTENTRY entry,
|
|
identry[INDEX_MAX_KEYS];
|
|
bool isnull[INDEX_MAX_KEYS];
|
|
|
|
maxoff = PageGetMaxOffsetNumber(p);
|
|
*which_grow = -1.0;
|
|
which = -1;
|
|
sum_grow = 1;
|
|
gistDeCompressAtt(giststate, r,
|
|
it, NULL, (OffsetNumber) 0,
|
|
identry, isnull);
|
|
|
|
for (i = FirstOffsetNumber; i <= maxoff && sum_grow; i = OffsetNumberNext(i))
|
|
{
|
|
int j;
|
|
IndexTuple itup = (IndexTuple) PageGetItem(p, PageGetItemId(p, i));
|
|
|
|
sum_grow = 0;
|
|
for (j = 0; j < r->rd_att->natts; j++)
|
|
{
|
|
Datum datum;
|
|
float usize;
|
|
bool IsNull;
|
|
|
|
datum = index_getattr(itup, j + 1, giststate->tupdesc, &IsNull);
|
|
gistdentryinit(giststate, j, &entry, datum, r, p, i,
|
|
ATTSIZE(datum, giststate->tupdesc, j + 1, IsNull),
|
|
FALSE, IsNull);
|
|
gistpenalty(giststate, j, &entry, IsNull,
|
|
&identry[j], isnull[j], &usize);
|
|
|
|
if (which_grow[j] < 0 || usize < which_grow[j])
|
|
{
|
|
which = i;
|
|
which_grow[j] = usize;
|
|
if (j < r->rd_att->natts - 1 && i == FirstOffsetNumber)
|
|
which_grow[j + 1] = -1;
|
|
sum_grow += which_grow[j];
|
|
}
|
|
else if (which_grow[j] == usize)
|
|
sum_grow += usize;
|
|
else
|
|
{
|
|
sum_grow = 1;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
return which;
|
|
}
|
|
|
|
/*
|
|
* Retail deletion of a single tuple.
|
|
*
|
|
* NB: this is no longer called externally, but is still needed by
|
|
* gistlayerinsert(). That dependency will have to be fixed if GIST
|
|
* is ever going to allow concurrent insertions.
|
|
*/
|
|
static void
|
|
gistdelete(Relation r, ItemPointer tid)
|
|
{
|
|
BlockNumber blkno;
|
|
OffsetNumber offnum;
|
|
Buffer buf;
|
|
Page page;
|
|
|
|
/*
|
|
* Since GIST is not marked "amconcurrent" in pg_am, caller should
|
|
* have acquired exclusive lock on index relation. We need no locking
|
|
* here.
|
|
*/
|
|
|
|
blkno = ItemPointerGetBlockNumber(tid);
|
|
offnum = ItemPointerGetOffsetNumber(tid);
|
|
|
|
/* adjust any scans that will be affected by this deletion */
|
|
/* NB: this works only for scans in *this* backend! */
|
|
gistadjscans(r, GISTOP_DEL, blkno, offnum);
|
|
|
|
/* delete the index tuple */
|
|
buf = ReadBuffer(r, blkno);
|
|
page = BufferGetPage(buf);
|
|
|
|
PageIndexTupleDelete(page, offnum);
|
|
|
|
WriteBuffer(buf);
|
|
}
|
|
|
|
/*
|
|
* Bulk deletion of all index entries pointing to a set of heap tuples.
|
|
* The set of target tuples is specified via a callback routine that tells
|
|
* whether any given heap tuple (identified by ItemPointer) is being deleted.
|
|
*
|
|
* Result: a palloc'd struct containing statistical info for VACUUM displays.
|
|
*/
|
|
Datum
|
|
gistbulkdelete(PG_FUNCTION_ARGS)
|
|
{
|
|
Relation rel = (Relation) PG_GETARG_POINTER(0);
|
|
IndexBulkDeleteCallback callback = (IndexBulkDeleteCallback) PG_GETARG_POINTER(1);
|
|
void *callback_state = (void *) PG_GETARG_POINTER(2);
|
|
IndexBulkDeleteResult *result;
|
|
BlockNumber num_pages;
|
|
double tuples_removed;
|
|
double num_index_tuples;
|
|
IndexScanDesc iscan;
|
|
|
|
tuples_removed = 0;
|
|
num_index_tuples = 0;
|
|
|
|
/*
|
|
* Since GIST is not marked "amconcurrent" in pg_am, caller should
|
|
* have acquired exclusive lock on index relation. We need no locking
|
|
* here.
|
|
*/
|
|
|
|
/*
|
|
* XXX generic implementation --- should be improved!
|
|
*/
|
|
|
|
/* walk through the entire index */
|
|
iscan = index_beginscan(NULL, rel, SnapshotAny, 0, NULL);
|
|
/* including killed tuples */
|
|
iscan->ignore_killed_tuples = false;
|
|
|
|
while (index_getnext_indexitem(iscan, ForwardScanDirection))
|
|
{
|
|
vacuum_delay_point();
|
|
|
|
if (callback(&iscan->xs_ctup.t_self, callback_state))
|
|
{
|
|
ItemPointerData indextup = iscan->currentItemData;
|
|
BlockNumber blkno;
|
|
OffsetNumber offnum;
|
|
Buffer buf;
|
|
Page page;
|
|
|
|
blkno = ItemPointerGetBlockNumber(&indextup);
|
|
offnum = ItemPointerGetOffsetNumber(&indextup);
|
|
|
|
/* adjust any scans that will be affected by this deletion */
|
|
gistadjscans(rel, GISTOP_DEL, blkno, offnum);
|
|
|
|
/* delete the index tuple */
|
|
buf = ReadBuffer(rel, blkno);
|
|
page = BufferGetPage(buf);
|
|
|
|
PageIndexTupleDelete(page, offnum);
|
|
|
|
WriteBuffer(buf);
|
|
|
|
tuples_removed += 1;
|
|
}
|
|
else
|
|
num_index_tuples += 1;
|
|
}
|
|
|
|
index_endscan(iscan);
|
|
|
|
/* return statistics */
|
|
num_pages = RelationGetNumberOfBlocks(rel);
|
|
|
|
result = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
|
|
result->num_pages = num_pages;
|
|
result->num_index_tuples = num_index_tuples;
|
|
result->tuples_removed = tuples_removed;
|
|
|
|
PG_RETURN_POINTER(result);
|
|
}
|
|
|
|
void
|
|
initGISTstate(GISTSTATE *giststate, Relation index)
|
|
{
|
|
int i;
|
|
|
|
if (index->rd_att->natts > INDEX_MAX_KEYS)
|
|
elog(ERROR, "numberOfAttributes %d > %d",
|
|
index->rd_att->natts, INDEX_MAX_KEYS);
|
|
|
|
giststate->tupdesc = index->rd_att;
|
|
|
|
for (i = 0; i < index->rd_att->natts; i++)
|
|
{
|
|
fmgr_info_copy(&(giststate->consistentFn[i]),
|
|
index_getprocinfo(index, i + 1, GIST_CONSISTENT_PROC),
|
|
CurrentMemoryContext);
|
|
fmgr_info_copy(&(giststate->unionFn[i]),
|
|
index_getprocinfo(index, i + 1, GIST_UNION_PROC),
|
|
CurrentMemoryContext);
|
|
fmgr_info_copy(&(giststate->compressFn[i]),
|
|
index_getprocinfo(index, i + 1, GIST_COMPRESS_PROC),
|
|
CurrentMemoryContext);
|
|
fmgr_info_copy(&(giststate->decompressFn[i]),
|
|
index_getprocinfo(index, i + 1, GIST_DECOMPRESS_PROC),
|
|
CurrentMemoryContext);
|
|
fmgr_info_copy(&(giststate->penaltyFn[i]),
|
|
index_getprocinfo(index, i + 1, GIST_PENALTY_PROC),
|
|
CurrentMemoryContext);
|
|
fmgr_info_copy(&(giststate->picksplitFn[i]),
|
|
index_getprocinfo(index, i + 1, GIST_PICKSPLIT_PROC),
|
|
CurrentMemoryContext);
|
|
fmgr_info_copy(&(giststate->equalFn[i]),
|
|
index_getprocinfo(index, i + 1, GIST_EQUAL_PROC),
|
|
CurrentMemoryContext);
|
|
}
|
|
}
|
|
|
|
void
|
|
freeGISTstate(GISTSTATE *giststate)
|
|
{
|
|
/* no work */
|
|
}
|
|
|
|
#ifdef GIST_PAGEADDITEM
|
|
/*
|
|
* Given an IndexTuple to be inserted on a page, this routine replaces
|
|
* the key with another key, which may involve generating a new IndexTuple
|
|
* if the sizes don't match or if the null status changes.
|
|
*
|
|
* XXX this only works for a single-column index tuple!
|
|
*/
|
|
static IndexTuple
|
|
gist_tuple_replacekey(Relation r, GISTENTRY entry, IndexTuple t)
|
|
{
|
|
bool IsNull;
|
|
Datum datum = index_getattr(t, 1, r->rd_att, &IsNull);
|
|
|
|
/*
|
|
* If new entry fits in index tuple, copy it in. To avoid worrying
|
|
* about null-value bitmask, pass it off to the general
|
|
* index_form_tuple routine if either the previous or new value is
|
|
* NULL.
|
|
*/
|
|
if (!IsNull && DatumGetPointer(entry.key) != NULL &&
|
|
(Size) entry.bytes <= ATTSIZE(datum, r, 1, IsNull))
|
|
{
|
|
memcpy(DatumGetPointer(datum),
|
|
DatumGetPointer(entry.key),
|
|
entry.bytes);
|
|
/* clear out old size */
|
|
t->t_info &= ~INDEX_SIZE_MASK;
|
|
/* or in new size */
|
|
t->t_info |= MAXALIGN(entry.bytes + sizeof(IndexTupleData));
|
|
|
|
return t;
|
|
}
|
|
else
|
|
{
|
|
/* generate a new index tuple for the compressed entry */
|
|
TupleDesc tupDesc = r->rd_att;
|
|
IndexTuple newtup;
|
|
bool isnull;
|
|
|
|
isnull = (DatumGetPointer(entry.key) == NULL);
|
|
newtup = index_form_tuple(tupDesc, &(entry.key), &isnull);
|
|
newtup->t_tid = t->t_tid;
|
|
return newtup;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
* initialize a GiST entry with a decompressed version of key
|
|
*/
|
|
void
|
|
gistdentryinit(GISTSTATE *giststate, int nkey, GISTENTRY *e,
|
|
Datum k, Relation r, Page pg, OffsetNumber o,
|
|
int b, bool l, bool isNull)
|
|
{
|
|
if (b && !isNull)
|
|
{
|
|
GISTENTRY *dep;
|
|
|
|
gistentryinit(*e, k, r, pg, o, b, l);
|
|
dep = (GISTENTRY *)
|
|
DatumGetPointer(FunctionCall1(&giststate->decompressFn[nkey],
|
|
PointerGetDatum(e)));
|
|
/* decompressFn may just return the given pointer */
|
|
if (dep != e)
|
|
gistentryinit(*e, dep->key, dep->rel, dep->page, dep->offset,
|
|
dep->bytes, dep->leafkey);
|
|
}
|
|
else
|
|
gistentryinit(*e, (Datum) 0, r, pg, o, 0, l);
|
|
}
|
|
|
|
|
|
/*
|
|
* initialize a GiST entry with a compressed version of key
|
|
*/
|
|
static void
|
|
gistcentryinit(GISTSTATE *giststate, int nkey,
|
|
GISTENTRY *e, Datum k, Relation r,
|
|
Page pg, OffsetNumber o, int b, bool l, bool isNull)
|
|
{
|
|
if (!isNull)
|
|
{
|
|
GISTENTRY *cep;
|
|
|
|
gistentryinit(*e, k, r, pg, o, b, l);
|
|
cep = (GISTENTRY *)
|
|
DatumGetPointer(FunctionCall1(&giststate->compressFn[nkey],
|
|
PointerGetDatum(e)));
|
|
/* compressFn may just return the given pointer */
|
|
if (cep != e)
|
|
gistentryinit(*e, cep->key, cep->rel, cep->page, cep->offset,
|
|
cep->bytes, cep->leafkey);
|
|
}
|
|
else
|
|
gistentryinit(*e, (Datum) 0, r, pg, o, 0, l);
|
|
}
|
|
|
|
static IndexTuple
|
|
gistFormTuple(GISTSTATE *giststate, Relation r,
|
|
Datum attdata[], int datumsize[], bool isnull[])
|
|
{
|
|
GISTENTRY centry[INDEX_MAX_KEYS];
|
|
Datum compatt[INDEX_MAX_KEYS];
|
|
int i;
|
|
|
|
for (i = 0; i < r->rd_att->natts; i++)
|
|
{
|
|
if (isnull[i])
|
|
compatt[i] = (Datum) 0;
|
|
else
|
|
{
|
|
gistcentryinit(giststate, i, ¢ry[i], attdata[i],
|
|
NULL, NULL, (OffsetNumber) 0,
|
|
datumsize[i], FALSE, FALSE);
|
|
compatt[i] = centry[i].key;
|
|
}
|
|
}
|
|
|
|
return index_form_tuple(giststate->tupdesc, compatt, isnull);
|
|
}
|
|
|
|
static void
|
|
gistDeCompressAtt(GISTSTATE *giststate, Relation r, IndexTuple tuple, Page p,
|
|
OffsetNumber o, GISTENTRY *attdata, bool *isnull)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < r->rd_att->natts; i++)
|
|
{
|
|
Datum datum = index_getattr(tuple, i + 1, giststate->tupdesc, &isnull[i]);
|
|
gistdentryinit(giststate, i, &attdata[i],
|
|
datum, r, p, o,
|
|
ATTSIZE(datum, giststate->tupdesc, i + 1, isnull[i]),
|
|
FALSE, isnull[i]);
|
|
}
|
|
}
|
|
|
|
static void
|
|
gistpenalty(GISTSTATE *giststate, int attno,
|
|
GISTENTRY *key1, bool isNull1,
|
|
GISTENTRY *key2, bool isNull2, float *penalty)
|
|
{
|
|
if (giststate->penaltyFn[attno].fn_strict && (isNull1 || isNull2))
|
|
*penalty = 0.0;
|
|
else
|
|
FunctionCall3(&giststate->penaltyFn[attno],
|
|
PointerGetDatum(key1),
|
|
PointerGetDatum(key2),
|
|
PointerGetDatum(penalty));
|
|
}
|
|
|
|
#ifdef GISTDEBUG
|
|
static void
|
|
gist_dumptree(Relation r, int level, BlockNumber blk, OffsetNumber coff)
|
|
{
|
|
Buffer buffer;
|
|
Page page;
|
|
GISTPageOpaque opaque;
|
|
IndexTuple which;
|
|
ItemId iid;
|
|
OffsetNumber i,
|
|
maxoff;
|
|
BlockNumber cblk;
|
|
char *pred;
|
|
|
|
pred = (char *) palloc(sizeof(char) * level + 1);
|
|
MemSet(pred, '\t', level);
|
|
pred[level] = '\0';
|
|
|
|
buffer = ReadBuffer(r, blk);
|
|
page = (Page) BufferGetPage(buffer);
|
|
opaque = (GISTPageOpaque) PageGetSpecialPointer(page);
|
|
|
|
maxoff = PageGetMaxOffsetNumber(page);
|
|
|
|
elog(DEBUG4, "%sPage: %d %s blk: %d maxoff: %d free: %d", pred,
|
|
coff, (opaque->flags & F_LEAF) ? "LEAF" : "INTE", (int) blk,
|
|
(int) maxoff, PageGetFreeSpace(page));
|
|
|
|
for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
|
|
{
|
|
iid = PageGetItemId(page, i);
|
|
which = (IndexTuple) PageGetItem(page, iid);
|
|
cblk = ItemPointerGetBlockNumber(&(which->t_tid));
|
|
#ifdef PRINTTUPLE
|
|
elog(DEBUG4, "%s Tuple. blk: %d size: %d", pred, (int) cblk,
|
|
IndexTupleSize(which));
|
|
#endif
|
|
|
|
if (!(opaque->flags & F_LEAF))
|
|
gist_dumptree(r, level + 1, cblk, i);
|
|
}
|
|
ReleaseBuffer(buffer);
|
|
pfree(pred);
|
|
}
|
|
#endif /* defined GISTDEBUG */
|
|
|
|
void
|
|
gist_redo(XLogRecPtr lsn, XLogRecord *record)
|
|
{
|
|
elog(PANIC, "gist_redo: unimplemented");
|
|
}
|
|
|
|
void
|
|
gist_desc(char *buf, uint8 xl_info, char *rec)
|
|
{
|
|
}
|