postgresql/src/backend/access/nbtree/nbtree.c

543 lines
14 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* btree.c--
* Implementation of Lehman and Yao's btree management algorithm for
* Postgres.
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
1996-11-03 23:58:26 +01:00
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.8 1996/11/03 22:57:54 scrappy Exp $
*
* NOTES
* This file contains only the public interface routines.
*
*-------------------------------------------------------------------------
*/
#include <stdio.h>
#include <time.h>
1996-10-20 12:53:18 +02:00
#include "postgres.h"
#include "utils/rel.h"
1996-11-03 23:58:26 +01:00
#include "access/genam.h"
1996-10-20 12:53:18 +02:00
#include "storage/bufpage.h"
#include "storage/bufmgr.h"
1996-10-20 12:53:18 +02:00
#include "access/htup.h"
#include "utils/palloc.h"
#include "access/nbtree.h"
#include "executor/executor.h"
1996-10-20 12:53:18 +02:00
#include "access/heapam.h"
#include "catalog/index.h"
#include "miscadmin.h"
bool BuildingBtree = false;
bool FastBuild = false; /* turn this on to make bulk builds work*/
/*
* btbuild() -- build a new btree index.
*
* We use a global variable to record the fact that we're creating
* a new index. This is used to avoid high-concurrency locking,
* since the index won't be visible until this transaction commits
* and since building is guaranteed to be single-threaded.
*/
void
btbuild(Relation heap,
Relation index,
int natts,
AttrNumber *attnum,
IndexStrategy istrat,
uint16 pcount,
Datum *params,
FuncIndexInfo *finfo,
PredInfo *predInfo)
{
HeapScanDesc hscan;
Buffer buffer;
HeapTuple htup;
IndexTuple itup;
TupleDesc htupdesc, itupdesc;
Datum *attdata;
bool *nulls;
InsertIndexResult res = 0;
int nhtups, nitups;
int i;
BTItem btitem;
#ifndef OMIT_PARTIAL_INDEX
ExprContext *econtext;
TupleTable tupleTable;
TupleTableSlot *slot;
#endif
Oid hrelid, irelid;
Node *pred, *oldPred;
void *spool;
/* note that this is a new btree */
BuildingBtree = true;
pred = predInfo->pred;
oldPred = predInfo->oldPred;
/* initialize the btree index metadata page (if this is a new index) */
if (oldPred == NULL)
_bt_metapinit(index);
/* get tuple descriptors for heap and index relations */
htupdesc = RelationGetTupleDescriptor(heap);
itupdesc = RelationGetTupleDescriptor(index);
/* get space for data items that'll appear in the index tuple */
attdata = (Datum *) palloc(natts * sizeof(Datum));
nulls = (bool *) palloc(natts * sizeof(bool));
/*
* If this is a predicate (partial) index, we will need to evaluate the
* predicate using ExecQual, which requires the current tuple to be in a
* slot of a TupleTable. In addition, ExecQual must have an ExprContext
* referring to that slot. Here, we initialize dummy TupleTable and
* ExprContext objects for this purpose. --Nels, Feb '92
*/
#ifndef OMIT_PARTIAL_INDEX
if (pred != NULL || oldPred != NULL) {
tupleTable = ExecCreateTupleTable(1);
slot = ExecAllocTableSlot(tupleTable);
econtext = makeNode(ExprContext);
FillDummyExprContext(econtext, slot, htupdesc, InvalidBuffer);
}
else
{
econtext = NULL;
tupleTable = NULL;
slot = NULL;
}
#endif /* OMIT_PARTIAL_INDEX */
/* start a heap scan */
hscan = heap_beginscan(heap, 0, NowTimeQual, 0, (ScanKey) NULL);
htup = heap_getnext(hscan, 0, &buffer);
/* build the index */
nhtups = nitups = 0;
if (FastBuild) {
spool = _bt_spoolinit(index, 7);
res = (InsertIndexResult) NULL;
}
else
spool = NULL;
for (; HeapTupleIsValid(htup); htup = heap_getnext(hscan, 0, &buffer)) {
nhtups++;
/*
* If oldPred != NULL, this is an EXTEND INDEX command, so skip
* this tuple if it was already in the existing partial index
*/
if (oldPred != NULL) {
#ifndef OMIT_PARTIAL_INDEX
/*SetSlotContents(slot, htup);*/
slot->val = htup;
if (ExecQual((List*)oldPred, econtext) == true) {
nitups++;
continue;
}
#endif /* OMIT_PARTIAL_INDEX */
}
/* Skip this tuple if it doesn't satisfy the partial-index predicate */
if (pred != NULL) {
#ifndef OMIT_PARTIAL_INDEX
/* SetSlotContents(slot, htup); */
slot->val = htup;
if (ExecQual((List*)pred, econtext) == false)
continue;
#endif /* OMIT_PARTIAL_INDEX */
}
nitups++;
/*
* For the current heap tuple, extract all the attributes
* we use in this index, and note which are null.
*/
for (i = 1; i <= natts; i++) {
int attoff;
bool attnull;
/*
* Offsets are from the start of the tuple, and are
* zero-based; indices are one-based. The next call
* returns i - 1. That's data hiding for you.
*/
attoff = AttrNumberGetAttrOffset(i);
attdata[attoff] = GetIndexValue(htup,
htupdesc,
attoff,
attnum,
finfo,
&attnull,
buffer);
nulls[attoff] = (attnull ? 'n' : ' ');
}
/* form an index tuple and point it at the heap tuple */
itup = index_formtuple(itupdesc, attdata, nulls);
/*
* If the single index key is null, we don't insert it into
* the index. Btrees support scans on <, <=, =, >=, and >.
* Relational algebra says that A op B (where op is one of the
* operators above) returns null if either A or B is null. This
* means that no qualification used in an index scan could ever
* return true on a null attribute. It also means that indices
* can't be used by ISNULL or NOTNULL scans, but that's an
* artifact of the strategy map architecture chosen in 1986, not
* of the way nulls are handled here.
*/
if (itup->t_info & INDEX_NULL_MASK) {
pfree(itup);
continue;
}
itup->t_tid = htup->t_ctid;
btitem = _bt_formitem(itup);
/*
* if we are doing bottom-up btree build, we insert the index
* into a spool page for subsequent processing. otherwise, we
* insert into the btree.
*/
if (FastBuild) {
_bt_spool(index, btitem, spool);
} else {
res = _bt_doinsert(index, btitem);
}
pfree(btitem);
pfree(itup);
if (res) {
pfree(res);
}
}
/* okay, all heap tuples are indexed */
heap_endscan(hscan);
if (pred != NULL || oldPred != NULL) {
#ifndef OMIT_PARTIAL_INDEX
ExecDestroyTupleTable(tupleTable, true);
pfree(econtext);
#endif /* OMIT_PARTIAL_INDEX */
}
/*
* if we are doing bottom-up btree build, we now have a bunch of
* sorted runs in the spool pages. finish the build by (1)
* merging the runs, (2) inserting the sorted tuples into btree
* pages and (3) building the upper levels.
*/
if (FastBuild) {
_bt_spool(index, (BTItem) NULL, spool); /* flush spool */
_bt_leafbuild(index, spool);
_bt_spooldestroy(spool);
}
/*
* Since we just counted the tuples in the heap, we update its
* stats in pg_class to guarantee that the planner takes advantage
* of the index we just created. Finally, only update statistics
* during normal index definitions, not for indices on system catalogs
* created during bootstrap processing. We must close the relations
* before updatings statistics to guarantee that the relcache entries
* are flushed when we increment the command counter in UpdateStats().
*/
if (IsNormalProcessingMode())
{
hrelid = heap->rd_id;
irelid = index->rd_id;
heap_close(heap);
index_close(index);
UpdateStats(hrelid, nhtups, true);
UpdateStats(irelid, nitups, false);
if (oldPred != NULL) {
if (nitups == nhtups) pred = NULL;
UpdateIndexPredicate(irelid, oldPred, pred);
}
}
/* be tidy */
pfree(nulls);
pfree(attdata);
/* all done */
BuildingBtree = false;
}
/*
* btinsert() -- insert an index tuple into a btree.
*
* Descend the tree recursively, find the appropriate location for our
* new tuple, put it there, set its unique OID as appropriate, and
* return an InsertIndexResult to the caller.
*/
InsertIndexResult
btinsert(Relation rel, Datum *datum, char *nulls, ItemPointer ht_ctid)
{
BTItem btitem;
IndexTuple itup;
InsertIndexResult res;
/* generate an index tuple */
itup = index_formtuple(RelationGetTupleDescriptor(rel), datum, nulls);
itup->t_tid = *ht_ctid;
if (itup->t_info & INDEX_NULL_MASK)
return ((InsertIndexResult) NULL);
btitem = _bt_formitem(itup);
res = _bt_doinsert(rel, btitem);
pfree(btitem);
pfree(itup);
return (res);
}
/*
* btgettuple() -- Get the next tuple in the scan.
*/
char *
btgettuple(IndexScanDesc scan, ScanDirection dir)
{
RetrieveIndexResult res;
/*
* If we've already initialized this scan, we can just advance it
* in the appropriate direction. If we haven't done so yet, we
* call a routine to get the first item in the scan.
*/
if (ItemPointerIsValid(&(scan->currentItemData)))
res = _bt_next(scan, dir);
else
res = _bt_first(scan, dir);
return ((char *) res);
}
/*
* btbeginscan() -- start a scan on a btree index
*/
char *
btbeginscan(Relation rel, bool fromEnd, uint16 keysz, ScanKey scankey)
{
IndexScanDesc scan;
/* get the scan */
scan = RelationGetIndexScan(rel, fromEnd, keysz, scankey);
/* register scan in case we change pages it's using */
_bt_regscan(scan);
return ((char *) scan);
}
/*
* btrescan() -- rescan an index relation
*/
void
btrescan(IndexScanDesc scan, bool fromEnd, ScanKey scankey)
{
ItemPointer iptr;
BTScanOpaque so;
StrategyNumber strat;
so = (BTScanOpaque) scan->opaque;
/* we hold a read lock on the current page in the scan */
if (ItemPointerIsValid(iptr = &(scan->currentItemData))) {
_bt_relbuf(scan->relation, so->btso_curbuf, BT_READ);
so->btso_curbuf = InvalidBuffer;
ItemPointerSetInvalid(iptr);
}
/* and we hold a read lock on the last marked item in the scan */
if (ItemPointerIsValid(iptr = &(scan->currentMarkData))) {
_bt_relbuf(scan->relation, so->btso_mrkbuf, BT_READ);
so->btso_mrkbuf = InvalidBuffer;
ItemPointerSetInvalid(iptr);
}
if ( so == NULL ) /* if called from btbeginscan */
{
so = (BTScanOpaque) palloc(sizeof(BTScanOpaqueData));
so->btso_curbuf = so->btso_mrkbuf = InvalidBuffer;
so->keyData = (ScanKey) NULL;
if ( scan->numberOfKeys > 0)
so->keyData = (ScanKey) palloc (scan->numberOfKeys * sizeof(ScanKeyData));
scan->opaque = so;
scan->flags = 0x0;
}
/* reset the scan key */
so->numberOfKeys = scan->numberOfKeys;
so->qual_ok = 1; /* may be changed by _bt_orderkeys */
if (scan->numberOfKeys > 0) {
memmove(scan->keyData,
scankey,
scan->numberOfKeys * sizeof(ScanKeyData));
memmove(so->keyData,
scankey,
so->numberOfKeys * sizeof(ScanKeyData));
/* order the keys in the qualification */
if (so->numberOfKeys > 1)
_bt_orderkeys(scan->relation, &so->numberOfKeys, so->keyData, &so->qual_ok);
}
/* finally, be sure that the scan exploits the tree order */
scan->scanFromEnd = false;
if ( so->numberOfKeys > 0 ) {
strat = _bt_getstrat(scan->relation, 1 /* XXX */,
so->keyData[0].sk_procedure);
if (strat == BTLessStrategyNumber
|| strat == BTLessEqualStrategyNumber)
scan->scanFromEnd = true;
} else {
scan->scanFromEnd = true;
}
}
void
btmovescan(IndexScanDesc scan, Datum v)
{
ItemPointer iptr;
BTScanOpaque so;
so = (BTScanOpaque) scan->opaque;
/* release any locks we still hold */
if (ItemPointerIsValid(iptr = &(scan->currentItemData))) {
_bt_relbuf(scan->relation, so->btso_curbuf, BT_READ);
so->btso_curbuf = InvalidBuffer;
ItemPointerSetInvalid(iptr);
}
/* scan->keyData[0].sk_argument = v; */
so->keyData[0].sk_argument = v;
}
/*
* btendscan() -- close down a scan
*/
void
btendscan(IndexScanDesc scan)
{
ItemPointer iptr;
BTScanOpaque so;
so = (BTScanOpaque) scan->opaque;
/* release any locks we still hold */
if (ItemPointerIsValid(iptr = &(scan->currentItemData))) {
if (BufferIsValid(so->btso_curbuf))
_bt_relbuf(scan->relation, so->btso_curbuf, BT_READ);
so->btso_curbuf = InvalidBuffer;
ItemPointerSetInvalid(iptr);
}
if (ItemPointerIsValid(iptr = &(scan->currentMarkData))) {
if (BufferIsValid(so->btso_mrkbuf))
_bt_relbuf(scan->relation, so->btso_mrkbuf, BT_READ);
so->btso_mrkbuf = InvalidBuffer;
ItemPointerSetInvalid(iptr);
}
/* don't need scan registered anymore */
_bt_dropscan(scan);
/* be tidy */
if ( so->keyData != (ScanKey) NULL )
pfree (so->keyData);
pfree (scan->opaque);
}
/*
* btmarkpos() -- save current scan position
*/
void
btmarkpos(IndexScanDesc scan)
{
ItemPointer iptr;
BTScanOpaque so;
so = (BTScanOpaque) scan->opaque;
/* release lock on old marked data, if any */
if (ItemPointerIsValid(iptr = &(scan->currentMarkData))) {
_bt_relbuf(scan->relation, so->btso_mrkbuf, BT_READ);
so->btso_mrkbuf = InvalidBuffer;
ItemPointerSetInvalid(iptr);
}
/* bump lock on currentItemData and copy to currentMarkData */
if (ItemPointerIsValid(&(scan->currentItemData))) {
so->btso_mrkbuf = _bt_getbuf(scan->relation,
BufferGetBlockNumber(so->btso_curbuf),
BT_READ);
scan->currentMarkData = scan->currentItemData;
}
}
/*
* btrestrpos() -- restore scan to last saved position
*/
void
btrestrpos(IndexScanDesc scan)
{
ItemPointer iptr;
BTScanOpaque so;
so = (BTScanOpaque) scan->opaque;
/* release lock on current data, if any */
if (ItemPointerIsValid(iptr = &(scan->currentItemData))) {
_bt_relbuf(scan->relation, so->btso_curbuf, BT_READ);
so->btso_curbuf = InvalidBuffer;
ItemPointerSetInvalid(iptr);
}
/* bump lock on currentMarkData and copy to currentItemData */
if (ItemPointerIsValid(&(scan->currentMarkData))) {
so->btso_curbuf = _bt_getbuf(scan->relation,
BufferGetBlockNumber(so->btso_mrkbuf),
BT_READ);
scan->currentItemData = scan->currentMarkData;
}
}
/* stubs */
void
btdelete(Relation rel, ItemPointer tid)
{
/* adjust any active scans that will be affected by this deletion */
_bt_adjscans(rel, tid);
/* delete the data from the page */
_bt_pagedel(rel, tid);
}