1996-08-27 23:50:29 +02:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
1999-02-14 00:22:53 +01:00
|
|
|
* nbtree.h
|
1997-09-07 07:04:48 +02:00
|
|
|
* header file for postgres btree access method implementation.
|
1996-08-27 23:50:29 +02:00
|
|
|
*
|
|
|
|
*
|
2000-01-26 06:58:53 +01:00
|
|
|
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc
|
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
1996-08-27 23:50:29 +02:00
|
|
|
*
|
2000-02-18 07:32:39 +01:00
|
|
|
* $Id: nbtree.h,v 1.34 2000/02/18 06:32:28 tgl Exp $
|
1996-08-27 23:50:29 +02:00
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
1997-09-07 07:04:48 +02:00
|
|
|
#ifndef NBTREE_H
|
|
|
|
#define NBTREE_H
|
1996-08-27 23:50:29 +02:00
|
|
|
|
1999-07-16 01:04:24 +02:00
|
|
|
#include "access/funcindex.h"
|
|
|
|
#include "access/itup.h"
|
1999-07-16 19:07:40 +02:00
|
|
|
#include "access/relscan.h"
|
|
|
|
#include "access/sdir.h"
|
1996-08-27 23:50:29 +02:00
|
|
|
|
|
|
|
/*
|
1997-09-07 07:04:48 +02:00
|
|
|
* BTPageOpaqueData -- At the end of every page, we store a pointer
|
|
|
|
* to both siblings in the tree. See Lehman and Yao's paper for more
|
|
|
|
* info. In addition, we need to know what sort of page this is
|
|
|
|
* (leaf or internal), and whether the page is available for reuse.
|
1996-08-27 23:50:29 +02:00
|
|
|
*
|
1997-09-07 07:04:48 +02:00
|
|
|
* Lehman and Yao's algorithm requires a ``high key'' on every page.
|
|
|
|
* The high key on a page is guaranteed to be greater than or equal
|
|
|
|
* to any key that appears on this page. Our insertion algorithm
|
|
|
|
* guarantees that we can use the initial least key on our right
|
|
|
|
* sibling as the high key. We allocate space for the line pointer
|
|
|
|
* to the high key in the opaque data at the end of the page.
|
1996-08-27 23:50:29 +02:00
|
|
|
*
|
1997-09-07 07:04:48 +02:00
|
|
|
* Rightmost pages in the tree have no high key.
|
1996-08-27 23:50:29 +02:00
|
|
|
*/
|
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
typedef struct BTPageOpaqueData
|
|
|
|
{
|
1997-09-08 04:41:22 +02:00
|
|
|
BlockNumber btpo_prev;
|
|
|
|
BlockNumber btpo_next;
|
1999-05-25 18:15:34 +02:00
|
|
|
BlockNumber btpo_parent;
|
1997-09-08 04:41:22 +02:00
|
|
|
uint16 btpo_flags;
|
1996-08-27 23:50:29 +02:00
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
#define BTP_LEAF (1 << 0)
|
|
|
|
#define BTP_ROOT (1 << 1)
|
|
|
|
#define BTP_FREE (1 << 2)
|
|
|
|
#define BTP_META (1 << 3)
|
|
|
|
#define BTP_CHAIN (1 << 4)
|
1996-08-27 23:50:29 +02:00
|
|
|
|
1997-09-08 22:59:27 +02:00
|
|
|
} BTPageOpaqueData;
|
1996-08-27 23:50:29 +02:00
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
typedef BTPageOpaqueData *BTPageOpaque;
|
1996-08-27 23:50:29 +02:00
|
|
|
|
|
|
|
/*
|
1997-09-07 07:04:48 +02:00
|
|
|
* ScanOpaqueData is used to remember which buffers we're currently
|
|
|
|
* examining in the scan. We keep these buffers locked and pinned
|
|
|
|
* and recorded in the opaque entry of the scan in order to avoid
|
|
|
|
* doing a ReadBuffer() for every tuple in the index. This avoids
|
|
|
|
* semop() calls, which are expensive.
|
1996-08-27 23:50:29 +02:00
|
|
|
*
|
1997-09-07 07:04:48 +02:00
|
|
|
* And it's used to remember actual scankey info (we need in it
|
1998-07-30 07:05:05 +02:00
|
|
|
* if some scankeys evaled at runtime).
|
|
|
|
*
|
|
|
|
* curHeapIptr & mrkHeapIptr are heap iptr-s from current/marked
|
|
|
|
* index tuples: we don't adjust scans on insertions (and, if LLL
|
|
|
|
* is ON, don't hold locks on index pages between passes) - we
|
|
|
|
* use these pointers to restore index scan positions...
|
|
|
|
* - vadim 07/29/98
|
1996-08-27 23:50:29 +02:00
|
|
|
*/
|
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
typedef struct BTScanOpaqueData
|
|
|
|
{
|
1998-09-01 06:40:42 +02:00
|
|
|
Buffer btso_curbuf;
|
|
|
|
Buffer btso_mrkbuf;
|
|
|
|
ItemPointerData curHeapIptr;
|
|
|
|
ItemPointerData mrkHeapIptr;
|
|
|
|
uint16 qual_ok; /* 0 for quals like key == 1 && key > 2 */
|
|
|
|
uint16 numberOfKeys; /* number of keys */
|
|
|
|
uint16 numberOfFirstKeys; /* number of keys for 1st
|
|
|
|
* attribute */
|
|
|
|
ScanKey keyData; /* key descriptor */
|
1997-09-08 22:59:27 +02:00
|
|
|
} BTScanOpaqueData;
|
1996-08-27 23:50:29 +02:00
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
typedef BTScanOpaqueData *BTScanOpaque;
|
1996-08-27 23:50:29 +02:00
|
|
|
|
|
|
|
/*
|
1997-09-07 07:04:48 +02:00
|
|
|
* BTItems are what we store in the btree. Each item has an index
|
|
|
|
* tuple, including key and pointer values. In addition, we must
|
|
|
|
* guarantee that all tuples in the index are unique, in order to
|
|
|
|
* satisfy some assumptions in Lehman and Yao. The way that we do
|
|
|
|
* this is by generating a new OID for every insertion that we do in
|
|
|
|
* the tree. This adds eight bytes to the size of btree index
|
|
|
|
* tuples. Note that we do not use the OID as part of a composite
|
|
|
|
* key; the OID only serves as a unique identifier for a given index
|
|
|
|
* tuple (logical position within a page).
|
1997-04-16 03:21:59 +02:00
|
|
|
*
|
1997-09-07 07:04:48 +02:00
|
|
|
* New comments:
|
|
|
|
* actually, we must guarantee that all tuples in A LEVEL
|
|
|
|
* are unique, not in ALL INDEX. So, we can use bti_itup->t_tid
|
|
|
|
* as unique identifier for a given index tuple (logical position
|
|
|
|
* within a level). - vadim 04/09/97
|
1996-08-27 23:50:29 +02:00
|
|
|
*/
|
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
typedef struct BTItemData
|
|
|
|
{
|
1997-09-08 04:41:22 +02:00
|
|
|
IndexTupleData bti_itup;
|
1997-09-08 22:59:27 +02:00
|
|
|
} BTItemData;
|
1996-08-27 23:50:29 +02:00
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
typedef BTItemData *BTItem;
|
1996-08-27 23:50:29 +02:00
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
#define BTItemSame(i1, i2) ( i1->bti_itup.t_tid.ip_blkid.bi_hi == \
|
|
|
|
i2->bti_itup.t_tid.ip_blkid.bi_hi && \
|
|
|
|
i1->bti_itup.t_tid.ip_blkid.bi_lo == \
|
|
|
|
i2->bti_itup.t_tid.ip_blkid.bi_lo && \
|
|
|
|
i1->bti_itup.t_tid.ip_posid == \
|
|
|
|
i2->bti_itup.t_tid.ip_posid )
|
1997-04-16 03:21:59 +02:00
|
|
|
|
1996-08-27 23:50:29 +02:00
|
|
|
/*
|
1997-09-07 07:04:48 +02:00
|
|
|
* BTStackData -- As we descend a tree, we push the (key, pointer)
|
|
|
|
* pairs from internal nodes onto a private stack. If we split a
|
|
|
|
* leaf, we use this stack to walk back up the tree and insert data
|
|
|
|
* into parent nodes (and possibly to split them, too). Lehman and
|
|
|
|
* Yao's update algorithm guarantees that under no circumstances can
|
|
|
|
* our private stack give us an irredeemably bad picture up the tree.
|
|
|
|
* Again, see the paper for details.
|
1996-08-27 23:50:29 +02:00
|
|
|
*/
|
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
typedef struct BTStackData
|
|
|
|
{
|
1997-09-08 04:41:22 +02:00
|
|
|
BlockNumber bts_blkno;
|
|
|
|
OffsetNumber bts_offset;
|
|
|
|
BTItem bts_btitem;
|
1997-09-07 07:04:48 +02:00
|
|
|
struct BTStackData *bts_parent;
|
1997-09-08 22:59:27 +02:00
|
|
|
} BTStackData;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
typedef BTStackData *BTStack;
|
|
|
|
|
|
|
|
typedef struct BTPageState
|
|
|
|
{
|
1997-09-08 04:41:22 +02:00
|
|
|
Buffer btps_buf;
|
|
|
|
Page btps_page;
|
|
|
|
BTItem btps_lastbti;
|
|
|
|
OffsetNumber btps_lastoff;
|
|
|
|
OffsetNumber btps_firstoff;
|
|
|
|
int btps_level;
|
|
|
|
bool btps_doupper;
|
1997-09-07 07:04:48 +02:00
|
|
|
struct BTPageState *btps_next;
|
1997-09-08 22:59:27 +02:00
|
|
|
} BTPageState;
|
1997-02-14 23:47:36 +01:00
|
|
|
|
1996-08-27 23:50:29 +02:00
|
|
|
/*
|
1997-09-07 07:04:48 +02:00
|
|
|
* We need to be able to tell the difference between read and write
|
|
|
|
* requests for pages, in order to do locking correctly.
|
1996-08-27 23:50:29 +02:00
|
|
|
*/
|
|
|
|
|
1999-05-25 20:31:28 +02:00
|
|
|
#define BT_READ BUFFER_LOCK_SHARE
|
|
|
|
#define BT_WRITE BUFFER_LOCK_EXCLUSIVE
|
1996-08-27 23:50:29 +02:00
|
|
|
|
|
|
|
/*
|
1997-09-07 07:04:48 +02:00
|
|
|
* Similarly, the difference between insertion and non-insertion binary
|
|
|
|
* searches on a given page makes a difference when we're descending the
|
|
|
|
* tree.
|
1996-08-27 23:50:29 +02:00
|
|
|
*/
|
|
|
|
|
|
|
|
#define BT_INSERTION 0
|
1997-09-07 07:04:48 +02:00
|
|
|
#define BT_DESCENT 1
|
1996-08-27 23:50:29 +02:00
|
|
|
|
|
|
|
/*
|
1997-09-07 07:04:48 +02:00
|
|
|
* In general, the btree code tries to localize its knowledge about
|
|
|
|
* page layout to a couple of routines. However, we need a special
|
|
|
|
* value to indicate "no page number" in those places where we expect
|
|
|
|
* page numbers.
|
1996-08-27 23:50:29 +02:00
|
|
|
*/
|
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
#define P_NONE 0
|
|
|
|
#define P_LEFTMOST(opaque) ((opaque)->btpo_prev == P_NONE)
|
|
|
|
#define P_RIGHTMOST(opaque) ((opaque)->btpo_next == P_NONE)
|
1996-08-27 23:50:29 +02:00
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
#define P_HIKEY ((OffsetNumber) 1)
|
|
|
|
#define P_FIRSTKEY ((OffsetNumber) 2)
|
1996-08-27 23:50:29 +02:00
|
|
|
|
|
|
|
/*
|
1997-09-07 07:04:48 +02:00
|
|
|
* Strategy numbers -- ordering of these is <, <=, =, >=, >
|
1996-08-27 23:50:29 +02:00
|
|
|
*/
|
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
#define BTLessStrategyNumber 1
|
|
|
|
#define BTLessEqualStrategyNumber 2
|
|
|
|
#define BTEqualStrategyNumber 3
|
1996-08-27 23:50:29 +02:00
|
|
|
#define BTGreaterEqualStrategyNumber 4
|
1997-09-07 07:04:48 +02:00
|
|
|
#define BTGreaterStrategyNumber 5
|
|
|
|
#define BTMaxStrategyNumber 5
|
1996-08-27 23:50:29 +02:00
|
|
|
|
|
|
|
/*
|
1997-09-07 07:04:48 +02:00
|
|
|
* When a new operator class is declared, we require that the user
|
|
|
|
* supply us with an amproc procedure for determining whether, for
|
|
|
|
* two keys a and b, a < b, a = b, or a > b. This routine must
|
|
|
|
* return < 0, 0, > 0, respectively, in these three cases. Since we
|
|
|
|
* only have one such proc in amproc, it's number 1.
|
1996-08-27 23:50:29 +02:00
|
|
|
*/
|
|
|
|
|
|
|
|
#define BTORDER_PROC 1
|
|
|
|
|
|
|
|
/*
|
|
|
|
* prototypes for functions in nbtinsert.c
|
|
|
|
*/
|
1998-09-01 06:40:42 +02:00
|
|
|
extern InsertIndexResult _bt_doinsert(Relation rel, BTItem btitem,
|
2000-02-18 07:32:39 +01:00
|
|
|
bool index_is_unique, Relation heapRel);
|
|
|
|
extern int32 _bt_tuplecompare(Relation rel, Size keysz, ScanKey scankey,
|
|
|
|
IndexTuple tuple1, IndexTuple tuple2);
|
|
|
|
extern bool _bt_itemcmp(Relation rel, Size keysz, ScanKey scankey,
|
|
|
|
BTItem item1, BTItem item2, StrategyNumber strat);
|
1996-08-27 23:50:29 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* prototypes for functions in nbtpage.c
|
|
|
|
*/
|
1997-09-08 04:41:22 +02:00
|
|
|
extern void _bt_metapinit(Relation rel);
|
|
|
|
extern Buffer _bt_getroot(Relation rel, int access);
|
|
|
|
extern Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access);
|
|
|
|
extern void _bt_relbuf(Relation rel, Buffer buf, int access);
|
|
|
|
extern void _bt_wrtbuf(Relation rel, Buffer buf);
|
|
|
|
extern void _bt_wrtnorelbuf(Relation rel, Buffer buf);
|
|
|
|
extern void _bt_pageinit(Page page, Size size);
|
|
|
|
extern void _bt_metaproot(Relation rel, BlockNumber rootbknum, int level);
|
|
|
|
extern Buffer _bt_getstackbuf(Relation rel, BTStack stack, int access);
|
|
|
|
extern void _bt_pagedel(Relation rel, ItemPointer tid);
|
1996-08-27 23:50:29 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* prototypes for functions in nbtree.c
|
|
|
|
*/
|
1997-09-08 04:41:22 +02:00
|
|
|
extern bool BuildingBtree; /* in nbtree.c */
|
1997-09-07 07:04:48 +02:00
|
|
|
|
1998-09-01 06:40:42 +02:00
|
|
|
extern void btbuild(Relation heap, Relation index, int natts,
|
1997-09-08 22:59:27 +02:00
|
|
|
AttrNumber *attnum, IndexStrategy istrat, uint16 pcount,
|
1997-09-08 23:56:23 +02:00
|
|
|
Datum *params, FuncIndexInfo *finfo, PredInfo *predInfo);
|
1998-09-01 06:40:42 +02:00
|
|
|
extern InsertIndexResult btinsert(Relation rel, Datum *datum, char *nulls,
|
1997-09-07 07:04:48 +02:00
|
|
|
ItemPointer ht_ctid, Relation heapRel);
|
1997-09-08 04:41:22 +02:00
|
|
|
extern char *btgettuple(IndexScanDesc scan, ScanDirection dir);
|
1998-09-01 06:40:42 +02:00
|
|
|
extern char *btbeginscan(Relation rel, bool fromEnd, uint16 keysz,
|
1997-09-07 07:04:48 +02:00
|
|
|
ScanKey scankey);
|
|
|
|
|
1997-09-08 04:41:22 +02:00
|
|
|
extern void btrescan(IndexScanDesc scan, bool fromEnd, ScanKey scankey);
|
|
|
|
extern void btmovescan(IndexScanDesc scan, Datum v);
|
|
|
|
extern void btendscan(IndexScanDesc scan);
|
|
|
|
extern void btmarkpos(IndexScanDesc scan);
|
|
|
|
extern void btrestrpos(IndexScanDesc scan);
|
|
|
|
extern void btdelete(Relation rel, ItemPointer tid);
|
1996-08-27 23:50:29 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* prototypes for functions in nbtscan.c
|
|
|
|
*/
|
1997-09-08 04:41:22 +02:00
|
|
|
extern void _bt_regscan(IndexScanDesc scan);
|
|
|
|
extern void _bt_dropscan(IndexScanDesc scan);
|
1999-03-28 22:32:42 +02:00
|
|
|
extern void _bt_adjscans(Relation rel, ItemPointer tid);
|
1999-08-08 22:12:52 +02:00
|
|
|
extern void AtEOXact_nbtree(void);
|
1996-08-27 23:50:29 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* prototypes for functions in nbtsearch.c
|
|
|
|
*/
|
1998-09-01 06:40:42 +02:00
|
|
|
extern BTStack _bt_search(Relation rel, int keysz, ScanKey scankey,
|
1997-09-08 22:59:27 +02:00
|
|
|
Buffer *bufP);
|
1998-09-01 06:40:42 +02:00
|
|
|
extern Buffer _bt_moveright(Relation rel, Buffer buf, int keysz,
|
1997-09-07 07:04:48 +02:00
|
|
|
ScanKey scankey, int access);
|
1998-09-01 06:40:42 +02:00
|
|
|
extern bool _bt_skeycmp(Relation rel, Size keysz, ScanKey scankey,
|
1996-08-27 23:50:29 +02:00
|
|
|
Page page, ItemId itemid, StrategyNumber strat);
|
1998-09-01 06:40:42 +02:00
|
|
|
extern OffsetNumber _bt_binsrch(Relation rel, Buffer buf, int keysz,
|
1997-09-07 07:04:48 +02:00
|
|
|
ScanKey scankey, int srchtype);
|
1996-08-27 23:50:29 +02:00
|
|
|
extern RetrieveIndexResult _bt_next(IndexScanDesc scan, ScanDirection dir);
|
|
|
|
extern RetrieveIndexResult _bt_first(IndexScanDesc scan, ScanDirection dir);
|
1997-09-08 22:59:27 +02:00
|
|
|
extern bool _bt_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir);
|
1996-08-27 23:50:29 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* prototypes for functions in nbtstrat.c
|
|
|
|
*/
|
1998-09-01 06:40:42 +02:00
|
|
|
extern StrategyNumber _bt_getstrat(Relation rel, AttrNumber attno,
|
2000-02-18 07:32:39 +01:00
|
|
|
RegProcedure proc);
|
1996-08-27 23:50:29 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* prototypes for functions in nbtutils.c
|
|
|
|
*/
|
1997-09-08 04:41:22 +02:00
|
|
|
extern ScanKey _bt_mkscankey(Relation rel, IndexTuple itup);
|
2000-02-18 07:32:39 +01:00
|
|
|
extern ScanKey _bt_mkscankey_nodata(Relation rel);
|
1997-09-08 04:41:22 +02:00
|
|
|
extern void _bt_freeskey(ScanKey skey);
|
|
|
|
extern void _bt_freestack(BTStack stack);
|
|
|
|
extern void _bt_orderkeys(Relation relation, BTScanOpaque so);
|
1997-09-08 23:56:23 +02:00
|
|
|
extern bool _bt_checkkeys(IndexScanDesc scan, IndexTuple tuple, Size *keysok);
|
1997-09-08 04:41:22 +02:00
|
|
|
extern BTItem _bt_formitem(IndexTuple itup);
|
1996-08-27 23:50:29 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* prototypes for functions in nbtsort.c
|
|
|
|
*/
|
1999-10-18 00:15:09 +02:00
|
|
|
|
|
|
|
typedef struct BTSpool BTSpool; /* opaque type known only within nbtsort.c */
|
|
|
|
|
|
|
|
extern BTSpool *_bt_spoolinit(Relation index, bool isunique);
|
|
|
|
extern void _bt_spooldestroy(BTSpool *btspool);
|
|
|
|
extern void _bt_spool(BTItem btitem, BTSpool *btspool);
|
|
|
|
extern void _bt_leafbuild(BTSpool *btspool);
|
1996-08-27 23:50:29 +02:00
|
|
|
|
1998-09-01 06:40:42 +02:00
|
|
|
#endif /* NBTREE_H */
|