postgresql/src/include/access/gist.h

160 lines
5.4 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* gist.h
* The public API for GiST indexes. This API is exposed to
* individuals implementing GiST indexes, so backward-incompatible
* changes should be made with care.
*
*
* Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
2010-09-20 22:08:53 +02:00
* src/include/access/gist.h
*
*-------------------------------------------------------------------------
*/
#ifndef GIST_H
#define GIST_H
2005-11-07 18:36:47 +01:00
#include "access/xlog.h"
#include "access/xlogdefs.h"
#include "storage/block.h"
#include "storage/bufpage.h"
#include "utils/relcache.h"
/*
* amproc indexes for GiST indexes.
*/
#define GIST_CONSISTENT_PROC 1
#define GIST_UNION_PROC 2
#define GIST_COMPRESS_PROC 3
#define GIST_DECOMPRESS_PROC 4
#define GIST_PENALTY_PROC 5
#define GIST_PICKSPLIT_PROC 6
#define GIST_EQUAL_PROC 7
#define GIST_DISTANCE_PROC 8
#define GISTNProcs 8
2005-11-07 18:36:47 +01:00
/*
* strategy numbers for GiST opclasses that want to implement the old
* RTREE behavior.
*/
#define RTLeftStrategyNumber 1
#define RTOverLeftStrategyNumber 2
#define RTOverlapStrategyNumber 3
#define RTOverRightStrategyNumber 4
#define RTRightStrategyNumber 5
#define RTSameStrategyNumber 6
#define RTContainsStrategyNumber 7 /* for @> */
#define RTContainedByStrategyNumber 8 /* for <@ */
2005-11-07 18:36:47 +01:00
#define RTOverBelowStrategyNumber 9
#define RTBelowStrategyNumber 10
#define RTAboveStrategyNumber 11
#define RTOverAboveStrategyNumber 12
#define RTOldContainsStrategyNumber 13 /* for old spelling of @> */
#define RTOldContainedByStrategyNumber 14 /* for old spelling of <@ */
2011-04-10 17:42:00 +02:00
#define RTKNNSearchStrategyNumber 15
2005-11-07 18:36:47 +01:00
/*
* Page opaque data in a GiST index page.
*/
Rewrite the GiST insertion logic so that we don't need the post-recovery cleanup stage to finish incomplete inserts or splits anymore. There was two reasons for the cleanup step: 1. When a new tuple was inserted to a leaf page, the downlink in the parent needed to be updated to contain (ie. to be consistent with) the new key. Updating the parent in turn might require recursively updating the parent of the parent. We now handle that by updating the parent while traversing down the tree, so that when we insert the leaf tuple, all the parents are already consistent with the new key, and the tree is consistent at every step. 2. When a page is split, we need to insert the downlink for the new right page(s), and update the downlink for the original page to not include keys that moved to the right page(s). We now handle that by setting a new flag, F_FOLLOW_RIGHT, on the non-rightmost pages in the split. When that flag is set, scans always follow the rightlink, regardless of the NSN mechanism used to detect concurrent page splits. That way the tree is consistent right after split, even though the downlink is still missing. This is very similar to the way B-tree splits are handled. When the downlink is inserted in the parent, the flag is cleared. To keep the insertion algorithm simple, when an insertion sees an incomplete split, indicated by the F_FOLLOW_RIGHT flag, it finishes the split before doing anything else. These changes allow removing the whole "invalid tuple" mechanism, but I retained the scan code to still follow invalid tuples correctly. While we don't create any such tuples anymore, we want to handle them gracefully in case you pg_upgrade a GiST index that has them. If we encounter any on an insert, though, we just throw an error saying that you need to REINDEX. The issue that got me into doing this is that if you did a checkpoint while an insert or split was in progress, and the checkpoint finishes quickly so that there is no WAL record related to the insert between RedoRecPtr and the checkpoint record, recovery from that checkpoint would not know to finish the incomplete insert. IOW, we have the same issue we solved with the rm_safe_restartpoint mechanism during normal operation too. It's highly unlikely to happen in practice, and this fix is far too large to backpatch, so we're just going to live with in previous versions, but this refactoring fixes it going forward. With this patch, you don't get the annoying 'index "FOO" needs VACUUM or REINDEX to finish crash recovery' notices anymore if you crash at an unfortunate moment.
2010-12-23 15:03:08 +01:00
#define F_LEAF (1 << 0) /* leaf page */
#define F_DELETED (1 << 1) /* the page has been deleted */
#define F_TUPLES_DELETED (1 << 2) /* some tuples on the page are dead */
#define F_FOLLOW_RIGHT (1 << 3) /* page to the right has no downlink */
typedef XLogRecPtr GistNSN;
typedef struct GISTPageOpaqueData
{
GistNSN nsn; /* this value must change on page split */
BlockNumber rightlink; /* next page if any */
uint16 flags; /* see bit definitions above */
uint16 gist_page_id; /* for identification of GiST indexes */
} GISTPageOpaqueData;
typedef GISTPageOpaqueData *GISTPageOpaque;
/*
* The page ID is for the convenience of pg_filedump and similar utilities,
* which otherwise would have a hard time telling pages of different index
* types apart. It should be the last 2 bytes on the page. This is more or
* less "free" due to alignment considerations.
*/
#define GIST_PAGE_ID 0xFF81
/*
* This is the Split Vector to be returned by the PickSplit method.
* PickSplit should check spl_(r|l)datum_exists. If it is 'true',
* that corresponding spl_(r|l)datum already defined and
* PickSplit should use that value. PickSplit should always set
2006-10-04 02:30:14 +02:00
* spl_(r|l)datum_exists to false: GiST will check value to
* control supporting this feature by PickSplit...
*/
typedef struct GIST_SPLITVEC
{
OffsetNumber *spl_left; /* array of entries that go left */
int spl_nleft; /* size of this array */
Datum spl_ldatum; /* Union of keys in spl_left */
2006-10-04 02:30:14 +02:00
bool spl_ldatum_exists; /* true, if spl_ldatum already exists. */
OffsetNumber *spl_right; /* array of entries that go right */
int spl_nright; /* size of the array */
Datum spl_rdatum; /* Union of keys in spl_right */
2006-10-04 02:30:14 +02:00
bool spl_rdatum_exists; /* true, if spl_rdatum already exists. */
} GIST_SPLITVEC;
/*
* An entry on a GiST node. Contains the key, as well as its own
* location (rel,page,offset) which can supply the matching pointer.
* leafkey is a flag to tell us if the entry is in a leaf node.
*/
typedef struct GISTENTRY
{
Datum key;
Relation rel;
Page page;
OffsetNumber offset;
bool leafkey;
} GISTENTRY;
2005-10-15 04:49:52 +02:00
#define GistPageGetOpaque(page) ( (GISTPageOpaque) PageGetSpecialPointer(page) )
#define GistPageIsLeaf(page) ( GistPageGetOpaque(page)->flags & F_LEAF)
#define GIST_LEAF(entry) (GistPageIsLeaf((entry)->page))
#define GistPageSetLeaf(page) ( GistPageGetOpaque(page)->flags |= F_LEAF)
2005-10-15 04:49:52 +02:00
#define GistPageSetNonLeaf(page) ( GistPageGetOpaque(page)->flags &= ~F_LEAF)
2005-10-15 04:49:52 +02:00
#define GistPageIsDeleted(page) ( GistPageGetOpaque(page)->flags & F_DELETED)
#define GistPageSetDeleted(page) ( GistPageGetOpaque(page)->flags |= F_DELETED)
2005-10-15 04:49:52 +02:00
#define GistPageSetNonDeleted(page) ( GistPageGetOpaque(page)->flags &= ~F_DELETED)
2005-10-15 04:49:52 +02:00
#define GistTuplesDeleted(page) ( GistPageGetOpaque(page)->flags & F_TUPLES_DELETED)
#define GistMarkTuplesDeleted(page) ( GistPageGetOpaque(page)->flags |= F_TUPLES_DELETED)
#define GistClearTuplesDeleted(page) ( GistPageGetOpaque(page)->flags &= ~F_TUPLES_DELETED)
Rewrite the GiST insertion logic so that we don't need the post-recovery cleanup stage to finish incomplete inserts or splits anymore. There was two reasons for the cleanup step: 1. When a new tuple was inserted to a leaf page, the downlink in the parent needed to be updated to contain (ie. to be consistent with) the new key. Updating the parent in turn might require recursively updating the parent of the parent. We now handle that by updating the parent while traversing down the tree, so that when we insert the leaf tuple, all the parents are already consistent with the new key, and the tree is consistent at every step. 2. When a page is split, we need to insert the downlink for the new right page(s), and update the downlink for the original page to not include keys that moved to the right page(s). We now handle that by setting a new flag, F_FOLLOW_RIGHT, on the non-rightmost pages in the split. When that flag is set, scans always follow the rightlink, regardless of the NSN mechanism used to detect concurrent page splits. That way the tree is consistent right after split, even though the downlink is still missing. This is very similar to the way B-tree splits are handled. When the downlink is inserted in the parent, the flag is cleared. To keep the insertion algorithm simple, when an insertion sees an incomplete split, indicated by the F_FOLLOW_RIGHT flag, it finishes the split before doing anything else. These changes allow removing the whole "invalid tuple" mechanism, but I retained the scan code to still follow invalid tuples correctly. While we don't create any such tuples anymore, we want to handle them gracefully in case you pg_upgrade a GiST index that has them. If we encounter any on an insert, though, we just throw an error saying that you need to REINDEX. The issue that got me into doing this is that if you did a checkpoint while an insert or split was in progress, and the checkpoint finishes quickly so that there is no WAL record related to the insert between RedoRecPtr and the checkpoint record, recovery from that checkpoint would not know to finish the incomplete insert. IOW, we have the same issue we solved with the rm_safe_restartpoint mechanism during normal operation too. It's highly unlikely to happen in practice, and this fix is far too large to backpatch, so we're just going to live with in previous versions, but this refactoring fixes it going forward. With this patch, you don't get the annoying 'index "FOO" needs VACUUM or REINDEX to finish crash recovery' notices anymore if you crash at an unfortunate moment.
2010-12-23 15:03:08 +01:00
#define GistFollowRight(page) ( GistPageGetOpaque(page)->flags & F_FOLLOW_RIGHT)
#define GistMarkFollowRight(page) ( GistPageGetOpaque(page)->flags |= F_FOLLOW_RIGHT)
#define GistClearFollowRight(page) ( GistPageGetOpaque(page)->flags &= ~F_FOLLOW_RIGHT)
/*
* Vector of GISTENTRY structs; user-defined methods union and picksplit
* take it as one of their arguments
*/
2004-08-29 07:07:03 +02:00
typedef struct
{
int32 n; /* number of elements */
GISTENTRY vector[FLEXIBLE_ARRAY_MEMBER];
} GistEntryVector;
#define GEVHDRSZ (offsetof(GistEntryVector, vector))
/*
* macro to initialize a GISTENTRY
*/
#define gistentryinit(e, k, r, pg, o, l) \
do { (e).key = (k); (e).rel = (r); (e).page = (pg); \
(e).offset = (o); (e).leafkey = (l); } while (0)
#endif /* GIST_H */