postgresql/src/include/access/amapi.h

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

299 lines
10 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* amapi.h
* API for Postgres index access methods.
*
* Copyright (c) 2015-2024, PostgreSQL Global Development Group
*
* src/include/access/amapi.h
*
*-------------------------------------------------------------------------
*/
#ifndef AMAPI_H
#define AMAPI_H
#include "access/genam.h"
/*
* We don't wish to include planner header files here, since most of an index
* AM's implementation isn't concerned with those data structures. To allow
* declaring amcostestimate_function here, use forward struct references.
*/
struct PlannerInfo;
struct IndexPath;
/* Likewise, this file shouldn't depend on execnodes.h. */
struct IndexInfo;
/*
* Properties for amproperty API. This list covers properties known to the
* core code, but an index AM can define its own properties, by matching the
* string property name.
*/
typedef enum IndexAMProperty
{
AMPROP_UNKNOWN = 0, /* anything not known to core code */
AMPROP_ASC, /* column properties */
AMPROP_DESC,
AMPROP_NULLS_FIRST,
AMPROP_NULLS_LAST,
AMPROP_ORDERABLE,
AMPROP_DISTANCE_ORDERABLE,
AMPROP_RETURNABLE,
AMPROP_SEARCH_ARRAY,
AMPROP_SEARCH_NULLS,
AMPROP_CLUSTERABLE, /* index properties */
AMPROP_INDEX_SCAN,
AMPROP_BITMAP_SCAN,
AMPROP_BACKWARD_SCAN,
AMPROP_CAN_ORDER, /* AM properties */
AMPROP_CAN_UNIQUE,
AMPROP_CAN_MULTI_COL,
AMPROP_CAN_EXCLUDE,
AMPROP_CAN_INCLUDE,
} IndexAMProperty;
/*
* We use lists of this struct type to keep track of both operators and
* support functions while building or adding to an opclass or opfamily.
* amadjustmembers functions receive lists of these structs, and are allowed
* to alter their "ref" fields.
*
* The "ref" fields define how the pg_amop or pg_amproc entry should depend
* on the associated objects (that is, which dependency type to use, and
* which opclass or opfamily it should depend on).
*
* If ref_is_hard is true, the entry will have a NORMAL dependency on the
* operator or support func, and an INTERNAL dependency on the opclass or
* opfamily. This forces the opclass or opfamily to be dropped if the
* operator or support func is dropped, and requires the CASCADE option
* to do so. Nor will ALTER OPERATOR FAMILY DROP be allowed. This is
* the right behavior for objects that are essential to an opclass.
*
* If ref_is_hard is false, the entry will have an AUTO dependency on the
* operator or support func, and also an AUTO dependency on the opclass or
* opfamily. This allows ALTER OPERATOR FAMILY DROP, and causes that to
* happen automatically if the operator or support func is dropped. This
* is the right behavior for inessential ("loose") objects.
*/
typedef struct OpFamilyMember
{
bool is_func; /* is this an operator, or support func? */
Oid object; /* operator or support func's OID */
int number; /* strategy or support func number */
Oid lefttype; /* lefttype */
Oid righttype; /* righttype */
Oid sortfamily; /* ordering operator's sort opfamily, or 0 */
bool ref_is_hard; /* hard or soft dependency? */
bool ref_is_family; /* is dependency on opclass or opfamily? */
Oid refobjid; /* OID of opclass or opfamily */
} OpFamilyMember;
/*
* Callback function signatures --- see indexam.sgml for more info.
*/
/* build new index */
typedef IndexBuildResult *(*ambuild_function) (Relation heapRelation,
Relation indexRelation,
struct IndexInfo *indexInfo);
/* build empty index */
typedef void (*ambuildempty_function) (Relation indexRelation);
/* insert this tuple */
typedef bool (*aminsert_function) (Relation indexRelation,
Datum *values,
bool *isnull,
ItemPointer heap_tid,
Relation heapRelation,
IndexUniqueCheck checkUnique,
bool indexUnchanged,
struct IndexInfo *indexInfo);
/* cleanup after insert */
typedef void (*aminsertcleanup_function) (struct IndexInfo *indexInfo);
/* bulk delete */
typedef IndexBulkDeleteResult *(*ambulkdelete_function) (IndexVacuumInfo *info,
IndexBulkDeleteResult *stats,
IndexBulkDeleteCallback callback,
void *callback_state);
/* post-VACUUM cleanup */
typedef IndexBulkDeleteResult *(*amvacuumcleanup_function) (IndexVacuumInfo *info,
IndexBulkDeleteResult *stats);
/* can indexscan return IndexTuples? */
typedef bool (*amcanreturn_function) (Relation indexRelation, int attno);
/* estimate cost of an indexscan */
typedef void (*amcostestimate_function) (struct PlannerInfo *root,
struct IndexPath *path,
double loop_count,
Cost *indexStartupCost,
Cost *indexTotalCost,
Selectivity *indexSelectivity,
double *indexCorrelation,
double *indexPages);
/* parse index reloptions */
typedef bytea *(*amoptions_function) (Datum reloptions,
bool validate);
/* report AM, index, or index column property */
typedef bool (*amproperty_function) (Oid index_oid, int attno,
IndexAMProperty prop, const char *propname,
bool *res, bool *isnull);
/* name of phase as used in progress reporting */
typedef char *(*ambuildphasename_function) (int64 phasenum);
/* validate definition of an opclass for this AM */
typedef bool (*amvalidate_function) (Oid opclassoid);
/* validate operators and support functions to be added to an opclass/family */
typedef void (*amadjustmembers_function) (Oid opfamilyoid,
Oid opclassoid,
List *operators,
List *functions);
/* prepare for index scan */
typedef IndexScanDesc (*ambeginscan_function) (Relation indexRelation,
int nkeys,
int norderbys);
/* (re)start index scan */
typedef void (*amrescan_function) (IndexScanDesc scan,
ScanKey keys,
int nkeys,
ScanKey orderbys,
int norderbys);
/* next valid tuple */
typedef bool (*amgettuple_function) (IndexScanDesc scan,
ScanDirection direction);
/* fetch all valid tuples */
typedef int64 (*amgetbitmap_function) (IndexScanDesc scan,
TIDBitmap *tbm);
/* end index scan */
typedef void (*amendscan_function) (IndexScanDesc scan);
/* mark current scan position */
typedef void (*ammarkpos_function) (IndexScanDesc scan);
/* restore marked scan position */
typedef void (*amrestrpos_function) (IndexScanDesc scan);
/*
* Callback function signatures - for parallel index scans.
*/
/* estimate size of parallel scan descriptor */
Enhance nbtree ScalarArrayOp execution. Commit 9e8da0f7 taught nbtree to handle ScalarArrayOpExpr quals natively. This works by pushing down the full context (the array keys) to the nbtree index AM, enabling it to execute multiple primitive index scans that the planner treats as one continuous index scan/index path. This earlier enhancement enabled nbtree ScalarArrayOp index-only scans. It also allowed scans with ScalarArrayOp quals to return ordered results (with some notable restrictions, described further down). Take this general approach a lot further: teach nbtree SAOP index scans to decide how to execute ScalarArrayOp scans (when and where to start the next primitive index scan) based on physical index characteristics. This can be far more efficient. All SAOP scans will now reliably avoid duplicative leaf page accesses (just like any other nbtree index scan). SAOP scans whose array keys are naturally clustered together now require far fewer index descents, since we'll reliably avoid starting a new primitive scan just to get to a later offset from the same leaf page. The scan's arrays now advance using binary searches for the array element that best matches the next tuple's attribute value. Required scan key arrays (i.e. arrays from scan keys that can terminate the scan) ratchet forward in lockstep with the index scan. Non-required arrays (i.e. arrays from scan keys that can only exclude non-matching tuples) "advance" without the process ever rolling over to a higher-order array. Naturally, only required SAOP scan keys trigger skipping over leaf pages (non-required arrays cannot safely end or start primitive index scans). Consequently, even index scans of a composite index with a high-order inequality scan key (which we'll mark required) and a low-order SAOP scan key (which we won't mark required) now avoid repeating leaf page accesses -- that benefit isn't limited to simpler equality-only cases. In general, all nbtree index scans now output tuples as if they were one continuous index scan -- even scans that mix a high-order inequality with lower-order SAOP equalities reliably output tuples in index order. This allows us to remove a couple of special cases that were applied when building index paths with SAOP clauses during planning. Bugfix commit 807a40c5 taught the planner to avoid generating unsafe path keys: path keys on a multicolumn index path, with a SAOP clause on any attribute beyond the first/most significant attribute. These cases are now all safe, so we go back to generating path keys without regard for the presence of SAOP clauses (just like with any other clause type). Affected queries can now exploit scan output order in all the usual ways (e.g., certain "ORDER BY ... LIMIT n" queries can now terminate early). Also undo changes from follow-up bugfix commit a4523c5a, which taught the planner to produce alternative index paths, with path keys, but without low-order SAOP index quals (filter quals were used instead). We'll no longer generate these alternative paths, since they can no longer offer any meaningful advantages over standard index qual paths. Affected queries thereby avoid all of the disadvantages that come from using filter quals within index scan nodes. They can avoid extra heap page accesses from using filter quals to exclude non-matching tuples (index quals will never have that problem). They can also skip over irrelevant sections of the index in more cases (though only when nbtree determines that starting another primitive scan actually makes sense). There is a theoretical risk that removing restrictions on SAOP index paths from the planner will break compatibility with amcanorder-based index AMs maintained as extensions. Such an index AM could have the same limitations around ordered SAOP scans as nbtree had up until now. Adding a pro forma incompatibility item about the issue to the Postgres 17 release notes seems like a good idea. Author: Peter Geoghegan <pg@bowt.ie> Author: Matthias van de Meent <boekewurm+postgres@gmail.com> Reviewed-By: Heikki Linnakangas <hlinnaka@iki.fi> Reviewed-By: Matthias van de Meent <boekewurm+postgres@gmail.com> Reviewed-By: Tomas Vondra <tomas.vondra@enterprisedb.com> Discussion: https://postgr.es/m/CAH2-Wz=ksvN_sjcnD1+Bt-WtifRA5ok48aDYnq3pkKhxgMQpcw@mail.gmail.com
2024-04-06 17:47:10 +02:00
typedef Size (*amestimateparallelscan_function) (int nkeys, int norderbys);
/* prepare for parallel index scan */
typedef void (*aminitparallelscan_function) (void *target);
/* (re)start parallel index scan */
typedef void (*amparallelrescan_function) (IndexScanDesc scan);
/*
* API struct for an index AM. Note this must be stored in a single palloc'd
* chunk of memory.
*/
typedef struct IndexAmRoutine
{
NodeTag type;
/*
* Total number of strategies (operators) by which we can traverse/search
* this AM. Zero if AM does not have a fixed set of strategy assignments.
*/
uint16 amstrategies;
/* total number of support functions that this AM uses */
uint16 amsupport;
Implement operator class parameters PostgreSQL provides set of template index access methods, where opclasses have much freedom in the semantics of indexing. These index AMs are GiST, GIN, SP-GiST and BRIN. There opclasses define representation of keys, operations on them and supported search strategies. So, it's natural that opclasses may be faced some tradeoffs, which require user-side decision. This commit implements opclass parameters allowing users to set some values, which tell opclass how to index the particular dataset. This commit doesn't introduce new storage in system catalog. Instead it uses pg_attribute.attoptions, which is used for table column storage options but unused for index attributes. In order to evade changing signature of each opclass support function, we implement unified way to pass options to opclass support functions. Options are set to fn_expr as the constant bytea expression. It's possible due to the fact that opclass support functions are executed outside of expressions, so fn_expr is unused for them. This commit comes with some examples of opclass options usage. We parametrize signature length in GiST. That applies to multiple opclasses: tsvector_ops, gist__intbig_ops, gist_ltree_ops, gist__ltree_ops, gist_trgm_ops and gist_hstore_ops. Also we parametrize maximum number of integer ranges for gist__int_ops. However, the main future usage of this feature is expected to be json, where users would be able to specify which way to index particular json parts. Catversion is bumped. Discussion: https://postgr.es/m/d22c3a18-31c7-1879-fc11-4c1ce2f5e5af%40postgrespro.ru Author: Nikita Glukhov, revised by me Reviwed-by: Nikolay Shaplov, Robert Haas, Tom Lane, Tomas Vondra, Alvaro Herrera
2020-03-30 18:17:11 +02:00
/* opclass options support function number or 0 */
uint16 amoptsprocnum;
/* does AM support ORDER BY indexed column's value? */
bool amcanorder;
/* does AM support ORDER BY result of an operator on indexed column? */
bool amcanorderbyop;
/* does AM support backward scanning? */
bool amcanbackward;
/* does AM support UNIQUE indexes? */
bool amcanunique;
/* does AM support multi-column indexes? */
bool amcanmulticol;
/* does AM require scans to have a constraint on the first index column? */
bool amoptionalkey;
/* does AM handle ScalarArrayOpExpr quals? */
bool amsearcharray;
/* does AM handle IS NULL/IS NOT NULL quals? */
bool amsearchnulls;
/* can index storage data type differ from column data type? */
bool amstorage;
/* can an index of this type be clustered on? */
bool amclusterable;
/* does AM handle predicate locks? */
bool ampredlocks;
/* does AM support parallel scan? */
bool amcanparallel;
Allow parallel CREATE INDEX for BRIN indexes Allow using multiple worker processes to build BRIN index, which until now was supported only for BTREE indexes. For large tables this often results in significant speedup when the build is CPU-bound. The work is split in a simple way - each worker builds BRIN summaries on a subset of the table, determined by the regular parallel scan used to read the data, and feeds them into a shared tuplesort which sorts them by blkno (start of the range). The leader then reads this sorted stream of ranges, merges duplicates (which may happen if the parallel scan does not align with BRIN pages_per_range), and adds the resulting ranges into the index. The number of duplicate results produced by workers (requiring merging in the leader process) should be fairly small, thanks to how parallel scans assign chunks to workers. The likelihood of duplicate results may increase for higher pages_per_range values, but then there are fewer page ranges in total. In any case, we expect the merging to be much cheaper than summarization, so this should be a win. Most of the parallelism infrastructure is a simplified copy of the code used by BTREE indexes, omitting the parts irrelevant for BRIN indexes (e.g. uniqueness checks). This also introduces a new index AM flag amcanbuildparallel, determining whether to attempt to start parallel workers for the index build. Original patch by me, with reviews and substantial reworks by Matthias van de Meent, certainly enough to make him a co-author. Author: Tomas Vondra, Matthias van de Meent Reviewed-by: Matthias van de Meent Discussion: https://postgr.es/m/c2ee7d69-ce17-43f2-d1a0-9811edbda6e6%40enterprisedb.com
2023-12-08 18:15:23 +01:00
/* does AM support parallel build? */
bool amcanbuildparallel;
/* does AM support columns included with clause INCLUDE? */
bool amcaninclude;
/* does AM use maintenance_work_mem? */
bool amusemaintenanceworkmem;
/* does AM store tuple information only at block granularity? */
bool amsummarizing;
/* OR of parallel vacuum flags. See vacuum.h for flags. */
uint8 amparallelvacuumoptions;
/* type of data stored in index, or InvalidOid if variable */
Oid amkeytype;
/*
* If you add new properties to either the above or the below lists, then
* they should also (usually) be exposed via the property API (see
* IndexAMProperty at the top of the file, and utils/adt/amutils.c).
*/
/* interface functions */
ambuild_function ambuild;
ambuildempty_function ambuildempty;
aminsert_function aminsert;
aminsertcleanup_function aminsertcleanup;
ambulkdelete_function ambulkdelete;
amvacuumcleanup_function amvacuumcleanup;
amcanreturn_function amcanreturn; /* can be NULL */
amcostestimate_function amcostestimate;
amoptions_function amoptions;
amproperty_function amproperty; /* can be NULL */
ambuildphasename_function ambuildphasename; /* can be NULL */
amvalidate_function amvalidate;
amadjustmembers_function amadjustmembers; /* can be NULL */
ambeginscan_function ambeginscan;
amrescan_function amrescan;
amgettuple_function amgettuple; /* can be NULL */
amgetbitmap_function amgetbitmap; /* can be NULL */
amendscan_function amendscan;
ammarkpos_function ammarkpos; /* can be NULL */
amrestrpos_function amrestrpos; /* can be NULL */
/* interface functions to support parallel index scans */
amestimateparallelscan_function amestimateparallelscan; /* can be NULL */
aminitparallelscan_function aminitparallelscan; /* can be NULL */
amparallelrescan_function amparallelrescan; /* can be NULL */
} IndexAmRoutine;
/* Functions in access/index/amapi.c */
extern IndexAmRoutine *GetIndexAmRoutine(Oid amhandler);
extern IndexAmRoutine *GetIndexAmRoutineByAmId(Oid amoid, bool noerror);
#endif /* AMAPI_H */