1996-08-27 23:50:29 +02:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
2019-12-27 00:09:00 +01:00
|
|
|
* genam.h
|
2002-05-21 01:51:44 +02:00
|
|
|
* POSTGRES generalized index access method definitions.
|
1996-08-27 23:50:29 +02:00
|
|
|
*
|
|
|
|
*
|
2021-01-02 19:06:25 +01:00
|
|
|
* Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
|
2000-01-26 06:58:53 +01:00
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
1996-08-27 23:50:29 +02:00
|
|
|
*
|
2019-12-27 00:09:00 +01:00
|
|
|
* src/include/access/genam.h
|
1996-08-27 23:50:29 +02:00
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
2019-12-27 00:09:00 +01:00
|
|
|
#ifndef GENAM_H
|
|
|
|
#define GENAM_H
|
1996-08-27 23:50:29 +02:00
|
|
|
|
1999-07-16 19:07:40 +02:00
|
|
|
#include "access/sdir.h"
|
2008-06-19 02:46:06 +02:00
|
|
|
#include "access/skey.h"
|
2008-04-11 00:25:26 +02:00
|
|
|
#include "nodes/tidbitmap.h"
|
2015-08-07 15:10:56 +02:00
|
|
|
#include "storage/lockdefs.h"
|
2008-06-19 02:46:06 +02:00
|
|
|
#include "utils/relcache.h"
|
|
|
|
#include "utils/snapshot.h"
|
1996-08-27 23:50:29 +02:00
|
|
|
|
Allow index AMs to cache data across aminsert calls within a SQL command.
It's always been possible for index AMs to cache data across successive
amgettuple calls within a single SQL command: the IndexScanDesc.opaque
field is meant for precisely that. However, no comparable facility
exists for amortizing setup work across successive aminsert calls.
This patch adds such a feature and teaches GIN, GIST, and BRIN to use it
to amortize catalog lookups they'd previously been doing on every call.
(The other standard index AMs keep everything they need in the relcache,
so there's little to improve there.)
For GIN, the overall improvement in a statement that inserts many rows
can be as much as 10%, though it seems a bit less for the other two.
In addition, this makes a really significant difference in runtime
for CLOBBER_CACHE_ALWAYS tests, since in those builds the repeated
catalog lookups are vastly more expensive.
The reason this has been hard up to now is that the aminsert function is
not passed any useful place to cache per-statement data. What I chose to
do is to add suitable fields to struct IndexInfo and pass that to aminsert.
That's not widening the index AM API very much because IndexInfo is already
within the ken of ambuild; in fact, by passing the same info to aminsert
as to ambuild, this is really removing an inconsistency in the AM API.
Discussion: https://postgr.es/m/27568.1486508680@sss.pgh.pa.us
2017-02-09 17:52:12 +01:00
|
|
|
/* We don't want this file to depend on execnodes.h. */
|
|
|
|
struct IndexInfo;
|
|
|
|
|
2006-05-11 01:18:39 +02:00
|
|
|
/*
|
|
|
|
* Struct for statistics returned by ambuild
|
|
|
|
*/
|
|
|
|
typedef struct IndexBuildResult
|
|
|
|
{
|
|
|
|
double heap_tuples; /* # of tuples seen in parent table */
|
|
|
|
double index_tuples; /* # of tuples inserted into index */
|
|
|
|
} IndexBuildResult;
|
|
|
|
|
2003-02-22 01:45:05 +01:00
|
|
|
/*
|
2006-05-03 00:25:10 +02:00
|
|
|
* Struct for input arguments passed to ambulkdelete and amvacuumcleanup
|
2003-02-22 01:45:05 +01:00
|
|
|
*
|
2009-06-07 00:13:52 +02:00
|
|
|
* num_heap_tuples is accurate only when estimated_count is false;
|
|
|
|
* otherwise it's just an estimate (currently, the estimate is the
|
Redefine pg_class.reltuples to be -1 before the first VACUUM or ANALYZE.
Historically, we've considered the state with relpages and reltuples
both zero as indicating that we do not know the table's tuple density.
This is problematic because it's impossible to distinguish "never yet
vacuumed" from "vacuumed and seen to be empty". In particular, a user
cannot use VACUUM or ANALYZE to override the planner's normal heuristic
that an empty table should not be believed to be empty because it is
probably about to get populated. That heuristic is a good safety
measure, so I don't care to abandon it, but there should be a way to
override it if the table is indeed intended to stay empty.
Hence, represent the initial state of ignorance by setting reltuples
to -1 (relpages is still set to zero), and apply the minimum-ten-pages
heuristic only when reltuples is still -1. If the table is empty,
VACUUM or ANALYZE (but not CREATE INDEX) will override that to
reltuples = relpages = 0, and then we'll plan on that basis.
This requires a bunch of fiddly little changes, but we can get rid of
some ugly kluges that were formerly needed to maintain the old definition.
One notable point is that FDWs' GetForeignRelSize methods will see
baserel->tuples = -1 when no ANALYZE has been done on the foreign table.
That seems like a net improvement, since those methods were formerly
also in the dark about what baserel->tuples = 0 really meant. Still,
it is an API change.
I bumped catversion because code predating this change would get confused
by seeing reltuples = -1.
Discussion: https://postgr.es/m/F02298E0-6EF4-49A1-BCB6-C484794D9ACC@thebuild.com
2020-08-30 18:21:51 +02:00
|
|
|
* prior value of the relation's pg_class.reltuples field, so it could
|
|
|
|
* even be -1). It will always just be an estimate during ambulkdelete.
|
2006-05-03 00:25:10 +02:00
|
|
|
*/
|
|
|
|
typedef struct IndexVacuumInfo
|
|
|
|
{
|
|
|
|
Relation index; /* the index being vacuumed */
|
2009-03-24 21:17:18 +01:00
|
|
|
bool analyze_only; /* ANALYZE (without any actual vacuum) */
|
Report progress of CREATE INDEX operations
This uses the progress reporting infrastructure added by c16dc1aca5e0,
adding support for CREATE INDEX and CREATE INDEX CONCURRENTLY.
There are two pieces to this: one is index-AM-agnostic, and the other is
AM-specific. The latter is fairly elaborate for btrees, including
reportage for parallel index builds and the separate phases that btree
index creation uses; other index AMs, which are much simpler in their
building procedures, have simplistic reporting only, but that seems
sufficient, at least for non-concurrent builds.
The index-AM-agnostic part is fairly complete, providing insight into
the CONCURRENTLY wait phases as well as block-based progress during the
index validation table scan. (The index validation index scan requires
patching each AM, which has not been included here.)
Reviewers: Rahila Syed, Pavan Deolasee, Tatsuro Yamada
Discussion: https://postgr.es/m/20181220220022.mg63bhk26zdpvmcj@alvherre.pgsql
2019-04-02 20:18:08 +02:00
|
|
|
bool report_progress; /* emit progress.h status reports */
|
2009-06-07 00:13:52 +02:00
|
|
|
bool estimated_count; /* num_heap_tuples is an estimate */
|
2006-05-03 00:25:10 +02:00
|
|
|
int message_level; /* ereport level for progress messages */
|
|
|
|
double num_heap_tuples; /* tuples remaining in heap */
|
Phase 2 of pgindent updates.
Change pg_bsd_indent to follow upstream rules for placement of comments
to the right of code, and remove pgindent hack that caused comments
following #endif to not obey the general rule.
Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using
the published version of pg_bsd_indent, but a hacked-up version that
tried to minimize the amount of movement of comments to the right of
code. The situation of interest is where such a comment has to be
moved to the right of its default placement at column 33 because there's
code there. BSD indent has always moved right in units of tab stops
in such cases --- but in the previous incarnation, indent was working
in 8-space tab stops, while now it knows we use 4-space tabs. So the
net result is that in about half the cases, such comments are placed
one tab stop left of before. This is better all around: it leaves
more room on the line for comment text, and it means that in such
cases the comment uniformly starts at the next 4-space tab stop after
the code, rather than sometimes one and sometimes two tabs after.
Also, ensure that comments following #endif are indented the same
as comments following other preprocessor commands such as #else.
That inconsistency turns out to have been self-inflicted damage
from a poorly-thought-through post-indent "fixup" in pgindent.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
|
|
|
BufferAccessStrategy strategy; /* access strategy for reads */
|
2006-05-03 00:25:10 +02:00
|
|
|
} IndexVacuumInfo;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Struct for statistics returned by ambulkdelete and amvacuumcleanup
|
|
|
|
*
|
|
|
|
* This struct is normally allocated by the first ambulkdelete call and then
|
|
|
|
* passed along through subsequent ones until amvacuumcleanup; however,
|
|
|
|
* amvacuumcleanup must be prepared to allocate it in the case where no
|
|
|
|
* ambulkdelete calls were made (because no tuples needed deletion).
|
|
|
|
* Note that an index AM could choose to return a larger struct
|
|
|
|
* of which this is just the first field; this provides a way for ambulkdelete
|
|
|
|
* to communicate additional private data to amvacuumcleanup.
|
2004-12-01 20:00:56 +01:00
|
|
|
*
|
2021-02-12 01:49:41 +01:00
|
|
|
* Note: pages_deleted and pages_free refer to free space within the index
|
|
|
|
* file. Some index AMs may compute num_index_tuples by reference to
|
|
|
|
* num_heap_tuples, in which case they should copy the estimated_count field
|
|
|
|
* from IndexVacuumInfo.
|
2003-02-22 01:45:05 +01:00
|
|
|
*/
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
typedef struct IndexBulkDeleteResult
|
|
|
|
{
|
2001-10-25 07:50:21 +02:00
|
|
|
BlockNumber num_pages; /* pages remaining in index */
|
2009-06-07 00:13:52 +02:00
|
|
|
bool estimated_count; /* num_index_tuples is an estimate */
|
Phase 2 of pgindent updates.
Change pg_bsd_indent to follow upstream rules for placement of comments
to the right of code, and remove pgindent hack that caused comments
following #endif to not obey the general rule.
Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using
the published version of pg_bsd_indent, but a hacked-up version that
tried to minimize the amount of movement of comments to the right of
code. The situation of interest is where such a comment has to be
moved to the right of its default placement at column 33 because there's
code there. BSD indent has always moved right in units of tab stops
in such cases --- but in the previous incarnation, indent was working
in 8-space tab stops, while now it knows we use 4-space tabs. So the
net result is that in about half the cases, such comments are placed
one tab stop left of before. This is better all around: it leaves
more room on the line for comment text, and it means that in such
cases the comment uniformly starts at the next 4-space tab stop after
the code, rather than sometimes one and sometimes two tabs after.
Also, ensure that comments following #endif are indented the same
as comments following other preprocessor commands such as #else.
That inconsistency turns out to have been self-inflicted damage
from a poorly-thought-through post-indent "fixup" in pgindent.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
|
|
|
double num_index_tuples; /* tuples remaining */
|
2006-05-03 00:25:10 +02:00
|
|
|
double tuples_removed; /* # removed during vacuum operation */
|
2003-08-04 02:43:34 +02:00
|
|
|
BlockNumber pages_deleted; /* # unused pages in index */
|
|
|
|
BlockNumber pages_free; /* # pages available for reuse */
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
} IndexBulkDeleteResult;
|
|
|
|
|
|
|
|
/* Typedef for callback function to determine if a tuple is bulk-deletable */
|
|
|
|
typedef bool (*IndexBulkDeleteCallback) (ItemPointer itemptr, void *state);
|
|
|
|
|
2008-06-19 02:46:06 +02:00
|
|
|
/* struct definitions appear in relscan.h */
|
|
|
|
typedef struct IndexScanDescData *IndexScanDesc;
|
|
|
|
typedef struct SysScanDescData *SysScanDesc;
|
2002-02-19 21:11:20 +01:00
|
|
|
|
2017-01-24 22:42:58 +01:00
|
|
|
typedef struct ParallelIndexScanDescData *ParallelIndexScanDesc;
|
|
|
|
|
2009-07-29 22:56:21 +02:00
|
|
|
/*
|
|
|
|
* Enumeration specifying the type of uniqueness check to perform in
|
|
|
|
* index_insert().
|
|
|
|
*
|
|
|
|
* UNIQUE_CHECK_YES is the traditional Postgres immediate check, possibly
|
|
|
|
* blocking to see if a conflicting transaction commits.
|
|
|
|
*
|
|
|
|
* For deferrable unique constraints, UNIQUE_CHECK_PARTIAL is specified at
|
2014-05-06 18:12:18 +02:00
|
|
|
* insertion time. The index AM should test if the tuple is unique, but
|
2009-07-29 22:56:21 +02:00
|
|
|
* should not throw error, block, or prevent the insertion if the tuple
|
|
|
|
* appears not to be unique. We'll recheck later when it is time for the
|
|
|
|
* constraint to be enforced. The AM must return true if the tuple is
|
|
|
|
* known unique, false if it is possibly non-unique. In the "true" case
|
|
|
|
* it is safe to omit the later recheck.
|
|
|
|
*
|
|
|
|
* When it is time to recheck the deferred constraint, a pseudo-insertion
|
|
|
|
* call is made with UNIQUE_CHECK_EXISTING. The tuple is already in the
|
2014-05-06 18:12:18 +02:00
|
|
|
* index in this case, so it should not be inserted again. Rather, just
|
2009-07-29 22:56:21 +02:00
|
|
|
* check for conflicting live tuples (possibly blocking).
|
|
|
|
*/
|
|
|
|
typedef enum IndexUniqueCheck
|
|
|
|
{
|
|
|
|
UNIQUE_CHECK_NO, /* Don't do any uniqueness checking */
|
|
|
|
UNIQUE_CHECK_YES, /* Enforce uniqueness at insertion time */
|
|
|
|
UNIQUE_CHECK_PARTIAL, /* Test uniqueness, but no error */
|
|
|
|
UNIQUE_CHECK_EXISTING /* Check if existing tuple is unique */
|
|
|
|
} IndexUniqueCheck;
|
|
|
|
|
2002-02-19 21:11:20 +01:00
|
|
|
|
2019-09-19 20:30:19 +02:00
|
|
|
/* Nullable "ORDER BY col op const" distance */
|
|
|
|
typedef struct IndexOrderByDistance
|
|
|
|
{
|
|
|
|
double value;
|
|
|
|
bool isnull;
|
|
|
|
} IndexOrderByDistance;
|
|
|
|
|
2002-02-19 21:11:20 +01:00
|
|
|
/*
|
|
|
|
* generalized index_ interface routines (in indexam.c)
|
1996-08-27 23:50:29 +02:00
|
|
|
*/
|
2008-06-19 02:46:06 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* IndexScanIsValid
|
|
|
|
* True iff the index scan is valid.
|
|
|
|
*/
|
|
|
|
#define IndexScanIsValid(scan) PointerIsValid(scan)
|
|
|
|
|
2006-07-31 22:09:10 +02:00
|
|
|
extern Relation index_open(Oid relationId, LOCKMODE lockmode);
|
|
|
|
extern void index_close(Relation relation, LOCKMODE lockmode);
|
|
|
|
|
2005-03-21 02:24:04 +01:00
|
|
|
extern bool index_insert(Relation indexRelation,
|
2019-05-22 19:04:48 +02:00
|
|
|
Datum *values, bool *isnull,
|
|
|
|
ItemPointer heap_t_ctid,
|
|
|
|
Relation heapRelation,
|
|
|
|
IndexUniqueCheck checkUnique,
|
2021-01-13 17:11:00 +01:00
|
|
|
bool indexUnchanged,
|
2019-05-22 19:04:48 +02:00
|
|
|
struct IndexInfo *indexInfo);
|
2002-05-21 01:51:44 +02:00
|
|
|
|
|
|
|
extern IndexScanDesc index_beginscan(Relation heapRelation,
|
2019-05-22 19:04:48 +02:00
|
|
|
Relation indexRelation,
|
|
|
|
Snapshot snapshot,
|
|
|
|
int nkeys, int norderbys);
|
2008-04-11 00:25:26 +02:00
|
|
|
extern IndexScanDesc index_beginscan_bitmap(Relation indexRelation,
|
2019-05-22 19:04:48 +02:00
|
|
|
Snapshot snapshot,
|
|
|
|
int nkeys);
|
2010-12-03 02:50:48 +01:00
|
|
|
extern void index_rescan(IndexScanDesc scan,
|
2019-05-22 19:04:48 +02:00
|
|
|
ScanKey keys, int nkeys,
|
|
|
|
ScanKey orderbys, int norderbys);
|
1997-09-08 04:41:22 +02:00
|
|
|
extern void index_endscan(IndexScanDesc scan);
|
1998-02-26 13:14:54 +01:00
|
|
|
extern void index_markpos(IndexScanDesc scan);
|
|
|
|
extern void index_restrpos(IndexScanDesc scan);
|
2017-01-24 22:42:58 +01:00
|
|
|
extern Size index_parallelscan_estimate(Relation indexrel, Snapshot snapshot);
|
|
|
|
extern void index_parallelscan_initialize(Relation heaprel, Relation indexrel,
|
2019-05-22 19:04:48 +02:00
|
|
|
Snapshot snapshot, ParallelIndexScanDesc target);
|
2017-01-24 22:42:58 +01:00
|
|
|
extern void index_parallelrescan(IndexScanDesc scan);
|
|
|
|
extern IndexScanDesc index_beginscan_parallel(Relation heaprel,
|
2019-05-22 19:04:48 +02:00
|
|
|
Relation indexrel, int nkeys, int norderbys,
|
|
|
|
ParallelIndexScanDesc pscan);
|
2011-10-08 02:13:02 +02:00
|
|
|
extern ItemPointer index_getnext_tid(IndexScanDesc scan,
|
2019-05-22 19:04:48 +02:00
|
|
|
ScanDirection direction);
|
tableam: Add and use scan APIs.
Too allow table accesses to be not directly dependent on heap, several
new abstractions are needed. Specifically:
1) Heap scans need to be generalized into table scans. Do this by
introducing TableScanDesc, which will be the "base class" for
individual AMs. This contains the AM independent fields from
HeapScanDesc.
The previous heap_{beginscan,rescan,endscan} et al. have been
replaced with a table_ version.
There's no direct replacement for heap_getnext(), as that returned
a HeapTuple, which is undesirable for a other AMs. Instead there's
table_scan_getnextslot(). But note that heap_getnext() lives on,
it's still used widely to access catalog tables.
This is achieved by new scan_begin, scan_end, scan_rescan,
scan_getnextslot callbacks.
2) The portion of parallel scans that's shared between backends need
to be able to do so without the user doing per-AM work. To achieve
that new parallelscan_{estimate, initialize, reinitialize}
callbacks are introduced, which operate on a new
ParallelTableScanDesc, which again can be subclassed by AMs.
As it is likely that several AMs are going to be block oriented,
block oriented callbacks that can be shared between such AMs are
provided and used by heap. table_block_parallelscan_{estimate,
intiialize, reinitialize} as callbacks, and
table_block_parallelscan_{nextpage, init} for use in AMs. These
operate on a ParallelBlockTableScanDesc.
3) Index scans need to be able to access tables to return a tuple, and
there needs to be state across individual accesses to the heap to
store state like buffers. That's now handled by introducing a
sort-of-scan IndexFetchTable, which again is intended to be
subclassed by individual AMs (for heap IndexFetchHeap).
The relevant callbacks for an AM are index_fetch_{end, begin,
reset} to create the necessary state, and index_fetch_tuple to
retrieve an indexed tuple. Note that index_fetch_tuple
implementations need to be smarter than just blindly fetching the
tuples for AMs that have optimizations similar to heap's HOT - the
currently alive tuple in the update chain needs to be fetched if
appropriate.
Similar to table_scan_getnextslot(), it's undesirable to continue
to return HeapTuples. Thus index_fetch_heap (might want to rename
that later) now accepts a slot as an argument. Core code doesn't
have a lot of call sites performing index scans without going
through the systable_* API (in contrast to loads of heap_getnext
calls and working directly with HeapTuples).
Index scans now store the result of a search in
IndexScanDesc->xs_heaptid, rather than xs_ctup->t_self. As the
target is not generally a HeapTuple anymore that seems cleaner.
To be able to sensible adapt code to use the above, two further
callbacks have been introduced:
a) slot_callbacks returns a TupleTableSlotOps* suitable for creating
slots capable of holding a tuple of the AMs
type. table_slot_callbacks() and table_slot_create() are based
upon that, but have additional logic to deal with views, foreign
tables, etc.
While this change could have been done separately, nearly all the
call sites that needed to be adapted for the rest of this commit
also would have been needed to be adapted for
table_slot_callbacks(), making separation not worthwhile.
b) tuple_satisfies_snapshot checks whether the tuple in a slot is
currently visible according to a snapshot. That's required as a few
places now don't have a buffer + HeapTuple around, but a
slot (which in heap's case internally has that information).
Additionally a few infrastructure changes were needed:
I) SysScanDesc, as used by systable_{beginscan, getnext} et al. now
internally uses a slot to keep track of tuples. While
systable_getnext() still returns HeapTuples, and will so for the
foreseeable future, the index API (see 1) above) now only deals with
slots.
The remainder, and largest part, of this commit is then adjusting all
scans in postgres to use the new APIs.
Author: Andres Freund, Haribabu Kommi, Alvaro Herrera
Discussion:
https://postgr.es/m/20180703070645.wchpu5muyto5n647@alap3.anarazel.de
https://postgr.es/m/20160812231527.GA690404@alvherre.pgsql
2019-03-11 20:46:41 +01:00
|
|
|
struct TupleTableSlot;
|
|
|
|
extern bool index_fetch_heap(IndexScanDesc scan, struct TupleTableSlot *slot);
|
|
|
|
extern bool index_getnext_slot(IndexScanDesc scan, ScanDirection direction,
|
2019-05-22 19:04:48 +02:00
|
|
|
struct TupleTableSlot *slot);
|
2008-04-11 00:25:26 +02:00
|
|
|
extern int64 index_getbitmap(IndexScanDesc scan, TIDBitmap *bitmap);
|
2002-05-21 01:51:44 +02:00
|
|
|
|
2006-05-03 00:25:10 +02:00
|
|
|
extern IndexBulkDeleteResult *index_bulk_delete(IndexVacuumInfo *info,
|
2019-05-22 19:04:48 +02:00
|
|
|
IndexBulkDeleteResult *stats,
|
|
|
|
IndexBulkDeleteCallback callback,
|
|
|
|
void *callback_state);
|
2006-05-03 00:25:10 +02:00
|
|
|
extern IndexBulkDeleteResult *index_vacuum_cleanup(IndexVacuumInfo *info,
|
2019-05-22 19:04:48 +02:00
|
|
|
IndexBulkDeleteResult *stats);
|
2015-03-26 18:12:00 +01:00
|
|
|
extern bool index_can_return(Relation indexRelation, int attno);
|
1998-09-01 06:40:42 +02:00
|
|
|
extern RegProcedure index_getprocid(Relation irel, AttrNumber attnum,
|
2019-05-22 19:04:48 +02:00
|
|
|
uint16 procnum);
|
2005-05-28 01:31:21 +02:00
|
|
|
extern FmgrInfo *index_getprocinfo(Relation irel, AttrNumber attnum,
|
2019-05-22 19:04:48 +02:00
|
|
|
uint16 procnum);
|
2018-09-19 00:54:10 +02:00
|
|
|
extern void index_store_float8_orderby_distances(IndexScanDesc scan,
|
2019-09-08 20:13:40 +02:00
|
|
|
Oid *orderByTypes,
|
2019-09-19 20:30:19 +02:00
|
|
|
IndexOrderByDistance *distances,
|
2019-05-22 19:04:48 +02:00
|
|
|
bool recheckOrderBy);
|
Implement operator class parameters
PostgreSQL provides set of template index access methods, where opclasses have
much freedom in the semantics of indexing. These index AMs are GiST, GIN,
SP-GiST and BRIN. There opclasses define representation of keys, operations on
them and supported search strategies. So, it's natural that opclasses may be
faced some tradeoffs, which require user-side decision. This commit implements
opclass parameters allowing users to set some values, which tell opclass how to
index the particular dataset.
This commit doesn't introduce new storage in system catalog. Instead it uses
pg_attribute.attoptions, which is used for table column storage options but
unused for index attributes.
In order to evade changing signature of each opclass support function, we
implement unified way to pass options to opclass support functions. Options
are set to fn_expr as the constant bytea expression. It's possible due to the
fact that opclass support functions are executed outside of expressions, so
fn_expr is unused for them.
This commit comes with some examples of opclass options usage. We parametrize
signature length in GiST. That applies to multiple opclasses: tsvector_ops,
gist__intbig_ops, gist_ltree_ops, gist__ltree_ops, gist_trgm_ops and
gist_hstore_ops. Also we parametrize maximum number of integer ranges for
gist__int_ops. However, the main future usage of this feature is expected
to be json, where users would be able to specify which way to index particular
json parts.
Catversion is bumped.
Discussion: https://postgr.es/m/d22c3a18-31c7-1879-fc11-4c1ce2f5e5af%40postgrespro.ru
Author: Nikita Glukhov, revised by me
Reviwed-by: Nikolay Shaplov, Robert Haas, Tom Lane, Tomas Vondra, Alvaro Herrera
2020-03-30 18:17:11 +02:00
|
|
|
extern bytea *index_opclass_options(Relation relation, AttrNumber attnum,
|
|
|
|
Datum attoptions, bool validate);
|
|
|
|
|
1996-08-27 23:50:29 +02:00
|
|
|
|
2002-02-19 21:11:20 +01:00
|
|
|
/*
|
2019-12-27 00:09:00 +01:00
|
|
|
* index access method support routines (in genam.c)
|
2002-02-19 21:11:20 +01:00
|
|
|
*/
|
2002-05-21 01:51:44 +02:00
|
|
|
extern IndexScanDesc RelationGetIndexScan(Relation indexRelation,
|
2019-05-22 19:04:48 +02:00
|
|
|
int nkeys, int norderbys);
|
1999-12-30 06:05:13 +01:00
|
|
|
extern void IndexScanEnd(IndexScanDesc scan);
|
2009-08-01 22:59:17 +02:00
|
|
|
extern char *BuildIndexValueDescription(Relation indexRelation,
|
2019-05-22 19:04:48 +02:00
|
|
|
Datum *values, bool *isnull);
|
Compute XID horizon for page level index vacuum on primary.
Previously the xid horizon was only computed during WAL replay. That
had two major problems:
1) It relied on knowing what the table pointed to looks like. That was
easy enough before the introducing of tableam (we knew it had to be
heap, although some trickery around logging the heap relfilenodes
was required). But to properly handle table AMs we need
per-database catalog access to look up the AM handler, which
recovery doesn't allow.
2) Not knowing the xid horizon also makes it hard to support logical
decoding on standbys. When on a catalog table, we need to be able
to conflict with slots that have an xid horizon that's too old. But
computing the horizon by visiting the heap only works once
consistency is reached, but we always need to be able to detect
conflicts.
There's also a secondary problem, in that the current method performs
redundant work on every standby. But that's counterbalanced by
potentially computing the value when not necessary (either because
there's no standby, or because there's no connected backends).
Solve 1) and 2) by moving computation of the xid horizon to the
primary and by involving tableam in the computation of the horizon.
To address the potentially increased overhead, increase the efficiency
of the xid horizon computation for heap by sorting the tids, and
eliminating redundant buffer accesses. When prefetching is available,
additionally perform prefetching of buffers. As this is more of a
maintenance task, rather than something routinely done in every read
only query, we add an arbitrary 10 to the effective concurrency -
thereby using IO concurrency, when not globally enabled. That's
possibly not the perfect formula, but seems good enough for now.
Bumps WAL format, as latestRemovedXid is now part of the records, and
the heap's relfilenode isn't anymore.
Author: Andres Freund, Amit Khandekar, Robert Haas
Reviewed-By: Robert Haas
Discussion:
https://postgr.es/m/20181212204154.nsxf3gzqv3gesl32@alap3.anarazel.de
https://postgr.es/m/20181214014235.dal5ogljs3bmlq44@alap3.anarazel.de
https://postgr.es/m/20180703070645.wchpu5muyto5n647@alap3.anarazel.de
2019-03-26 22:41:46 +01:00
|
|
|
extern TransactionId index_compute_xid_horizon_for_tuples(Relation irel,
|
2019-05-22 19:04:48 +02:00
|
|
|
Relation hrel,
|
|
|
|
Buffer ibuf,
|
|
|
|
OffsetNumber *itemnos,
|
|
|
|
int nitems);
|
2001-10-28 07:26:15 +01:00
|
|
|
|
2002-02-19 21:11:20 +01:00
|
|
|
/*
|
2019-12-27 00:09:00 +01:00
|
|
|
* heap-or-index access to system catalogs (in genam.c)
|
2002-02-19 21:11:20 +01:00
|
|
|
*/
|
2002-05-21 01:51:44 +02:00
|
|
|
extern SysScanDesc systable_beginscan(Relation heapRelation,
|
2019-05-22 19:04:48 +02:00
|
|
|
Oid indexId,
|
|
|
|
bool indexOK,
|
|
|
|
Snapshot snapshot,
|
|
|
|
int nkeys, ScanKey key);
|
2002-02-19 21:11:20 +01:00
|
|
|
extern HeapTuple systable_getnext(SysScanDesc sysscan);
|
2008-06-09 00:41:04 +02:00
|
|
|
extern bool systable_recheck_tuple(SysScanDesc sysscan, HeapTuple tup);
|
2002-02-19 21:11:20 +01:00
|
|
|
extern void systable_endscan(SysScanDesc sysscan);
|
2008-04-13 01:14:21 +02:00
|
|
|
extern SysScanDesc systable_beginscan_ordered(Relation heapRelation,
|
2019-05-22 19:04:48 +02:00
|
|
|
Relation indexRelation,
|
|
|
|
Snapshot snapshot,
|
|
|
|
int nkeys, ScanKey key);
|
2008-04-13 01:14:21 +02:00
|
|
|
extern HeapTuple systable_getnext_ordered(SysScanDesc sysscan,
|
2019-05-22 19:04:48 +02:00
|
|
|
ScanDirection direction);
|
2008-04-13 01:14:21 +02:00
|
|
|
extern void systable_endscan_ordered(SysScanDesc sysscan);
|
2002-02-19 21:11:20 +01:00
|
|
|
|
2019-12-27 00:09:00 +01:00
|
|
|
#endif /* GENAM_H */
|