1996-07-09 08:22:35 +02:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
1999-10-18 00:15:09 +02:00
|
|
|
* nbtree.c
|
1997-09-07 07:04:48 +02:00
|
|
|
* Implementation of Lehman and Yao's btree management algorithm for
|
|
|
|
* Postgres.
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
1999-10-18 00:15:09 +02:00
|
|
|
* NOTES
|
|
|
|
* This file contains only the public interface routines.
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
|
|
|
*
|
2009-01-01 18:24:05 +01:00
|
|
|
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
|
2000-01-26 06:58:53 +01:00
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
1999-10-18 00:15:09 +02:00
|
|
|
* IDENTIFICATION
|
2009-05-05 21:36:32 +02:00
|
|
|
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.169 2009/05/05 19:36:32 tgl Exp $
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
1999-07-16 01:04:24 +02:00
|
|
|
#include "postgres.h"
|
1996-11-05 11:35:38 +01:00
|
|
|
|
1999-07-16 01:04:24 +02:00
|
|
|
#include "access/genam.h"
|
1999-07-16 07:00:38 +02:00
|
|
|
#include "access/nbtree.h"
|
2008-06-19 02:46:06 +02:00
|
|
|
#include "access/relscan.h"
|
1999-07-16 01:04:24 +02:00
|
|
|
#include "catalog/index.h"
|
2008-11-19 11:34:52 +01:00
|
|
|
#include "catalog/storage.h"
|
2004-02-10 04:42:45 +01:00
|
|
|
#include "commands/vacuum.h"
|
2008-05-12 02:00:54 +02:00
|
|
|
#include "storage/bufmgr.h"
|
2003-02-22 01:45:05 +01:00
|
|
|
#include "storage/freespace.h"
|
2008-09-30 12:52:14 +02:00
|
|
|
#include "storage/indexfsm.h"
|
2008-04-17 01:59:40 +02:00
|
|
|
#include "storage/ipc.h"
|
2006-05-11 01:18:39 +02:00
|
|
|
#include "storage/lmgr.h"
|
2005-05-06 19:24:55 +02:00
|
|
|
#include "utils/memutils.h"
|
2000-07-21 08:42:39 +02:00
|
|
|
|
2001-03-22 05:01:46 +01:00
|
|
|
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
/* Working state for btbuild and its callback */
|
|
|
|
typedef struct
|
|
|
|
{
|
|
|
|
bool isUnique;
|
|
|
|
bool haveDead;
|
|
|
|
Relation heapRel;
|
|
|
|
BTSpool *spool;
|
2001-10-25 07:50:21 +02:00
|
|
|
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* spool2 is needed only when the index is an unique index. Dead tuples
|
|
|
|
* are put into spool2 instead of spool in order to avoid uniqueness
|
|
|
|
* check.
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
*/
|
|
|
|
BTSpool *spool2;
|
|
|
|
double indtuples;
|
|
|
|
} BTBuildState;
|
|
|
|
|
2006-05-08 02:00:17 +02:00
|
|
|
/* Working state needed by btvacuumpage */
|
|
|
|
typedef struct
|
|
|
|
{
|
|
|
|
IndexVacuumInfo *info;
|
|
|
|
IndexBulkDeleteResult *stats;
|
|
|
|
IndexBulkDeleteCallback callback;
|
|
|
|
void *callback_state;
|
|
|
|
BTCycleId cycleid;
|
2008-09-30 12:52:14 +02:00
|
|
|
BlockNumber lastUsedPage;
|
2006-10-04 02:30:14 +02:00
|
|
|
BlockNumber totFreePages; /* true total # of free pages */
|
2006-05-08 02:00:17 +02:00
|
|
|
MemoryContext pagedelcontext;
|
|
|
|
} BTVacState;
|
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
static void btbuildCallback(Relation index,
|
2001-10-25 07:50:21 +02:00
|
|
|
HeapTuple htup,
|
2005-03-21 02:24:04 +01:00
|
|
|
Datum *values,
|
|
|
|
bool *isnull,
|
2001-10-25 07:50:21 +02:00
|
|
|
bool tupleIsAlive,
|
|
|
|
void *state);
|
2006-05-08 02:00:17 +02:00
|
|
|
static void btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
|
|
|
|
IndexBulkDeleteCallback callback, void *callback_state,
|
|
|
|
BTCycleId cycleid);
|
|
|
|
static void btvacuumpage(BTVacState *vstate, BlockNumber blkno,
|
2006-10-04 02:30:14 +02:00
|
|
|
BlockNumber orig_blkno);
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
|
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
/*
|
1997-09-07 07:04:48 +02:00
|
|
|
* btbuild() -- build a new btree index.
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
2000-06-13 09:35:40 +02:00
|
|
|
Datum
|
|
|
|
btbuild(PG_FUNCTION_ARGS)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2001-03-22 05:01:46 +01:00
|
|
|
Relation heap = (Relation) PG_GETARG_POINTER(0);
|
|
|
|
Relation index = (Relation) PG_GETARG_POINTER(1);
|
|
|
|
IndexInfo *indexInfo = (IndexInfo *) PG_GETARG_POINTER(2);
|
2006-05-11 01:18:39 +02:00
|
|
|
IndexBuildResult *result;
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
double reltuples;
|
|
|
|
BTBuildState buildstate;
|
2001-03-22 05:01:46 +01:00
|
|
|
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
buildstate.isUnique = indexInfo->ii_Unique;
|
|
|
|
buildstate.haveDead = false;
|
|
|
|
buildstate.heapRel = heap;
|
|
|
|
buildstate.spool = NULL;
|
|
|
|
buildstate.spool2 = NULL;
|
|
|
|
buildstate.indtuples = 0;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
#ifdef BTREE_BUILD_STATS
|
2002-11-15 02:26:09 +01:00
|
|
|
if (log_btree_build_stats)
|
1997-09-07 07:04:48 +02:00
|
|
|
ResetUsage();
|
2001-11-05 18:46:40 +01:00
|
|
|
#endif /* BTREE_BUILD_STATS */
|
1997-09-07 07:04:48 +02:00
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* We expect to be called exactly once for any index relation. If that's
|
|
|
|
* not the case, big trouble's what we have.
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
if (RelationGetNumberOfBlocks(index) != 0)
|
2003-07-21 22:29:40 +02:00
|
|
|
elog(ERROR, "index \"%s\" already contains data",
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
RelationGetRelationName(index));
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2006-04-01 05:03:37 +02:00
|
|
|
buildstate.spool = _bt_spoolinit(index, indexInfo->ii_Unique, false);
|
|
|
|
|
|
|
|
/*
|
2006-10-04 02:30:14 +02:00
|
|
|
* If building a unique index, put dead tuples in a second spool to keep
|
|
|
|
* them out of the uniqueness check.
|
2006-04-01 05:03:37 +02:00
|
|
|
*/
|
|
|
|
if (indexInfo->ii_Unique)
|
|
|
|
buildstate.spool2 = _bt_spoolinit(index, false, true);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
/* do the heap scan */
|
2008-11-13 18:42:10 +01:00
|
|
|
reltuples = IndexBuildHeapScan(heap, index, indexInfo, true,
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
btbuildCallback, (void *) &buildstate);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
/* okay, all heap tuples are indexed */
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
if (buildstate.spool2 && !buildstate.haveDead)
|
2000-08-10 04:33:20 +02:00
|
|
|
{
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
/* spool2 turns out to be unnecessary */
|
|
|
|
_bt_spooldestroy(buildstate.spool2);
|
|
|
|
buildstate.spool2 = NULL;
|
2000-08-10 04:33:20 +02:00
|
|
|
}
|
1997-09-07 07:04:48 +02:00
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
/*
|
2006-04-01 05:03:37 +02:00
|
|
|
* Finish the build by (1) completing the sort of the spool file, (2)
|
|
|
|
* inserting the sorted tuples into btree pages and (3) building the upper
|
|
|
|
* levels.
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
2006-04-01 05:03:37 +02:00
|
|
|
_bt_leafbuild(buildstate.spool, buildstate.spool2);
|
|
|
|
_bt_spooldestroy(buildstate.spool);
|
|
|
|
if (buildstate.spool2)
|
|
|
|
_bt_spooldestroy(buildstate.spool2);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
#ifdef BTREE_BUILD_STATS
|
2002-11-15 02:26:09 +01:00
|
|
|
if (log_btree_build_stats)
|
1997-09-07 07:04:48 +02:00
|
|
|
{
|
2001-11-11 00:51:14 +01:00
|
|
|
ShowUsage("BTREE BUILD STATS");
|
1997-09-07 07:04:48 +02:00
|
|
|
ResetUsage();
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
2001-11-05 18:46:40 +01:00
|
|
|
#endif /* BTREE_BUILD_STATS */
|
1996-07-09 08:22:35 +02:00
|
|
|
|
2006-04-26 00:46:05 +02:00
|
|
|
/*
|
2006-10-04 02:30:14 +02:00
|
|
|
* If we are reindexing a pre-existing index, it is critical to send out a
|
|
|
|
* relcache invalidation SI message to ensure all backends re-read the
|
|
|
|
* index metapage. We expect that the caller will ensure that happens
|
|
|
|
* (typically as a side effect of updating index stats, but it must happen
|
|
|
|
* even if the stats don't change!)
|
2006-04-26 00:46:05 +02:00
|
|
|
*/
|
|
|
|
|
2006-05-11 01:18:39 +02:00
|
|
|
/*
|
|
|
|
* Return statistics
|
|
|
|
*/
|
|
|
|
result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
|
|
|
|
|
|
|
|
result->heap_tuples = reltuples;
|
|
|
|
result->index_tuples = buildstate.indtuples;
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
|
2006-05-11 01:18:39 +02:00
|
|
|
PG_RETURN_POINTER(result);
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Per-tuple callback from IndexBuildHeapScan
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
btbuildCallback(Relation index,
|
|
|
|
HeapTuple htup,
|
2005-03-21 02:24:04 +01:00
|
|
|
Datum *values,
|
|
|
|
bool *isnull,
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
bool tupleIsAlive,
|
|
|
|
void *state)
|
|
|
|
{
|
2001-10-25 07:50:21 +02:00
|
|
|
BTBuildState *buildstate = (BTBuildState *) state;
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
IndexTuple itup;
|
|
|
|
|
|
|
|
/* form an index tuple and point it at the heap tuple */
|
2005-03-21 02:24:04 +01:00
|
|
|
itup = index_form_tuple(RelationGetDescr(index), values, isnull);
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
itup->t_tid = htup->t_self;
|
|
|
|
|
|
|
|
/*
|
2006-04-01 05:03:37 +02:00
|
|
|
* insert the index tuple into the appropriate spool file for subsequent
|
|
|
|
* processing
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
*/
|
2006-04-01 05:03:37 +02:00
|
|
|
if (tupleIsAlive || buildstate->spool2 == NULL)
|
|
|
|
_bt_spool(itup, buildstate->spool);
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
else
|
|
|
|
{
|
2006-04-01 05:03:37 +02:00
|
|
|
/* dead tuples are put into spool2 */
|
|
|
|
buildstate->haveDead = true;
|
|
|
|
_bt_spool(itup, buildstate->spool2);
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
}
|
1996-07-09 08:22:35 +02:00
|
|
|
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
buildstate->indtuples += 1;
|
2000-06-13 09:35:40 +02:00
|
|
|
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
pfree(itup);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
1997-09-07 07:04:48 +02:00
|
|
|
* btinsert() -- insert an index tuple into a btree.
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
1997-09-07 07:04:48 +02:00
|
|
|
* Descend the tree recursively, find the appropriate location for our
|
2005-03-21 02:24:04 +01:00
|
|
|
* new tuple, and put it there.
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
2000-06-13 09:35:40 +02:00
|
|
|
Datum
|
|
|
|
btinsert(PG_FUNCTION_ARGS)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2001-03-22 05:01:46 +01:00
|
|
|
Relation rel = (Relation) PG_GETARG_POINTER(0);
|
2005-03-21 02:24:04 +01:00
|
|
|
Datum *values = (Datum *) PG_GETARG_POINTER(1);
|
|
|
|
bool *isnull = (bool *) PG_GETARG_POINTER(2);
|
2001-03-22 05:01:46 +01:00
|
|
|
ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3);
|
|
|
|
Relation heapRel = (Relation) PG_GETARG_POINTER(4);
|
2002-05-24 20:57:57 +02:00
|
|
|
bool checkUnique = PG_GETARG_BOOL(5);
|
1997-09-08 04:41:22 +02:00
|
|
|
IndexTuple itup;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
/* generate an index tuple */
|
2005-03-21 02:24:04 +01:00
|
|
|
itup = index_form_tuple(RelationGetDescr(rel), values, isnull);
|
1997-09-07 07:04:48 +02:00
|
|
|
itup->t_tid = *ht_ctid;
|
|
|
|
|
2006-01-26 00:04:21 +01:00
|
|
|
_bt_doinsert(rel, itup, checkUnique, heapRel);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
pfree(itup);
|
|
|
|
|
2005-03-21 02:24:04 +01:00
|
|
|
PG_RETURN_BOOL(true);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
1997-09-07 07:04:48 +02:00
|
|
|
* btgettuple() -- Get the next tuple in the scan.
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
2000-06-13 09:35:40 +02:00
|
|
|
Datum
|
|
|
|
btgettuple(PG_FUNCTION_ARGS)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2001-03-22 05:01:46 +01:00
|
|
|
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
|
|
|
|
ScanDirection dir = (ScanDirection) PG_GETARG_INT32(1);
|
2002-05-24 20:57:57 +02:00
|
|
|
BTScanOpaque so = (BTScanOpaque) scan->opaque;
|
|
|
|
bool res;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2008-04-13 21:18:14 +02:00
|
|
|
/* btree indexes are never lossy */
|
|
|
|
scan->xs_recheck = false;
|
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* If we've already initialized this scan, we can just advance it in the
|
|
|
|
* appropriate direction. If we haven't done so yet, we call a routine to
|
|
|
|
* get the first item in the scan.
|
1997-09-07 07:04:48 +02:00
|
|
|
*/
|
2006-05-07 03:21:30 +02:00
|
|
|
if (BTScanPosIsValid(so->currPos))
|
1998-07-30 07:05:05 +02:00
|
|
|
{
|
2002-05-24 20:57:57 +02:00
|
|
|
/*
|
|
|
|
* Check to see if we should kill the previously-fetched tuple.
|
|
|
|
*/
|
|
|
|
if (scan->kill_prior_tuple)
|
|
|
|
{
|
|
|
|
/*
|
2006-10-04 02:30:14 +02:00
|
|
|
* Yes, remember it for later. (We'll deal with all such tuples
|
2006-05-07 03:21:30 +02:00
|
|
|
* at once right before leaving the index page.) The test for
|
|
|
|
* numKilled overrun is not just paranoia: if the caller reverses
|
|
|
|
* direction in the indexscan then the same item might get entered
|
2006-10-04 02:30:14 +02:00
|
|
|
* multiple times. It's not worth trying to optimize that, so we
|
2006-05-07 03:21:30 +02:00
|
|
|
* don't detect it, but instead just forget any excess entries.
|
2002-05-24 20:57:57 +02:00
|
|
|
*/
|
2006-05-07 03:21:30 +02:00
|
|
|
if (so->killedItems == NULL)
|
|
|
|
so->killedItems = (int *)
|
|
|
|
palloc(MaxIndexTuplesPerPage * sizeof(int));
|
|
|
|
if (so->numKilled < MaxIndexTuplesPerPage)
|
|
|
|
so->killedItems[so->numKilled++] = so->currPos.itemIndex;
|
2002-05-24 20:57:57 +02:00
|
|
|
}
|
2002-09-04 22:31:48 +02:00
|
|
|
|
2002-05-24 20:57:57 +02:00
|
|
|
/*
|
|
|
|
* Now continue the scan.
|
|
|
|
*/
|
1997-09-07 07:04:48 +02:00
|
|
|
res = _bt_next(scan, dir);
|
1998-07-30 07:05:05 +02:00
|
|
|
}
|
1997-09-07 07:04:48 +02:00
|
|
|
else
|
|
|
|
res = _bt_first(scan, dir);
|
1998-09-01 06:40:42 +02:00
|
|
|
|
2002-05-21 01:51:44 +02:00
|
|
|
PG_RETURN_BOOL(res);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
2005-03-28 01:53:05 +02:00
|
|
|
/*
|
2008-04-11 00:25:26 +02:00
|
|
|
* btgetbitmap() -- gets all matching tuples, and adds them to a bitmap
|
2005-03-28 01:53:05 +02:00
|
|
|
*/
|
|
|
|
Datum
|
2008-04-11 00:25:26 +02:00
|
|
|
btgetbitmap(PG_FUNCTION_ARGS)
|
2005-03-28 01:53:05 +02:00
|
|
|
{
|
|
|
|
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
|
2008-04-11 00:25:26 +02:00
|
|
|
TIDBitmap *tbm = (TIDBitmap *) PG_GETARG_POINTER(1);
|
2005-03-28 01:53:05 +02:00
|
|
|
BTScanOpaque so = (BTScanOpaque) scan->opaque;
|
2008-04-11 00:25:26 +02:00
|
|
|
int64 ntids = 0;
|
|
|
|
ItemPointer heapTid;
|
2005-03-28 01:53:05 +02:00
|
|
|
|
2008-04-11 00:25:26 +02:00
|
|
|
/* Fetch the first page & tuple. */
|
|
|
|
if (!_bt_first(scan, ForwardScanDirection))
|
2005-03-28 01:53:05 +02:00
|
|
|
{
|
2008-04-11 00:25:26 +02:00
|
|
|
/* empty scan */
|
|
|
|
PG_RETURN_INT64(0);
|
2005-03-28 01:53:05 +02:00
|
|
|
}
|
2008-04-11 00:25:26 +02:00
|
|
|
/* Save tuple ID, and continue scanning */
|
|
|
|
heapTid = &scan->xs_ctup.t_self;
|
|
|
|
tbm_add_tuples(tbm, heapTid, 1, false);
|
|
|
|
ntids++;
|
2005-03-28 01:53:05 +02:00
|
|
|
|
2008-04-11 00:25:26 +02:00
|
|
|
for (;;)
|
2005-03-28 01:53:05 +02:00
|
|
|
{
|
2006-05-07 03:21:30 +02:00
|
|
|
/*
|
2006-10-04 02:30:14 +02:00
|
|
|
* Advance to next tuple within page. This is the same as the easy
|
|
|
|
* case in _bt_next().
|
2006-05-07 03:21:30 +02:00
|
|
|
*/
|
|
|
|
if (++so->currPos.itemIndex > so->currPos.lastItem)
|
|
|
|
{
|
|
|
|
/* let _bt_next do the heavy lifting */
|
2008-04-11 00:25:26 +02:00
|
|
|
if (!_bt_next(scan, ForwardScanDirection))
|
2006-05-07 03:21:30 +02:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Save tuple ID, and continue scanning */
|
2008-04-11 00:25:26 +02:00
|
|
|
heapTid = &so->currPos.items[so->currPos.itemIndex].heapTid;
|
|
|
|
tbm_add_tuples(tbm, heapTid, 1, false);
|
2006-05-07 03:21:30 +02:00
|
|
|
ntids++;
|
2005-03-28 01:53:05 +02:00
|
|
|
}
|
|
|
|
|
2008-04-11 00:25:26 +02:00
|
|
|
PG_RETURN_INT64(ntids);
|
2005-03-28 01:53:05 +02:00
|
|
|
}
|
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
/*
|
1997-09-07 07:04:48 +02:00
|
|
|
* btbeginscan() -- start a scan on a btree index
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
2000-06-13 09:35:40 +02:00
|
|
|
Datum
|
|
|
|
btbeginscan(PG_FUNCTION_ARGS)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2000-06-13 09:35:40 +02:00
|
|
|
Relation rel = (Relation) PG_GETARG_POINTER(0);
|
2002-05-21 01:51:44 +02:00
|
|
|
int keysz = PG_GETARG_INT32(1);
|
|
|
|
ScanKey scankey = (ScanKey) PG_GETARG_POINTER(2);
|
1997-09-08 04:41:22 +02:00
|
|
|
IndexScanDesc scan;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
/* get the scan */
|
2002-05-21 01:51:44 +02:00
|
|
|
scan = RelationGetIndexScan(rel, keysz, scankey);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2000-06-13 09:35:40 +02:00
|
|
|
PG_RETURN_POINTER(scan);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
1997-09-07 07:04:48 +02:00
|
|
|
* btrescan() -- rescan an index relation
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
2000-06-13 09:35:40 +02:00
|
|
|
Datum
|
|
|
|
btrescan(PG_FUNCTION_ARGS)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2001-03-22 05:01:46 +01:00
|
|
|
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
|
2002-05-21 01:51:44 +02:00
|
|
|
ScanKey scankey = (ScanKey) PG_GETARG_POINTER(1);
|
1997-09-08 04:41:22 +02:00
|
|
|
BTScanOpaque so;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
so = (BTScanOpaque) scan->opaque;
|
|
|
|
|
2000-07-21 08:42:39 +02:00
|
|
|
if (so == NULL) /* if called from btbeginscan */
|
|
|
|
{
|
|
|
|
so = (BTScanOpaque) palloc(sizeof(BTScanOpaqueData));
|
2006-05-07 03:21:30 +02:00
|
|
|
so->currPos.buf = so->markPos.buf = InvalidBuffer;
|
2000-07-21 08:42:39 +02:00
|
|
|
if (scan->numberOfKeys > 0)
|
|
|
|
so->keyData = (ScanKey) palloc(scan->numberOfKeys * sizeof(ScanKeyData));
|
2002-05-21 01:51:44 +02:00
|
|
|
else
|
2004-01-07 19:56:30 +01:00
|
|
|
so->keyData = NULL;
|
2006-10-04 02:30:14 +02:00
|
|
|
so->killedItems = NULL; /* until needed */
|
2006-05-07 03:21:30 +02:00
|
|
|
so->numKilled = 0;
|
2000-07-21 08:42:39 +02:00
|
|
|
scan->opaque = so;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* we aren't holding any read locks, but gotta drop the pins */
|
2006-05-07 03:21:30 +02:00
|
|
|
if (BTScanPosIsValid(so->currPos))
|
1997-09-07 07:04:48 +02:00
|
|
|
{
|
2006-05-07 03:21:30 +02:00
|
|
|
/* Before leaving current page, deal with any killed items */
|
|
|
|
if (so->numKilled > 0)
|
|
|
|
_bt_killitems(scan, false);
|
|
|
|
ReleaseBuffer(so->currPos.buf);
|
|
|
|
so->currPos.buf = InvalidBuffer;
|
1997-09-07 07:04:48 +02:00
|
|
|
}
|
|
|
|
|
2006-05-07 03:21:30 +02:00
|
|
|
if (BTScanPosIsValid(so->markPos))
|
1997-09-07 07:04:48 +02:00
|
|
|
{
|
2006-05-07 03:21:30 +02:00
|
|
|
ReleaseBuffer(so->markPos.buf);
|
|
|
|
so->markPos.buf = InvalidBuffer;
|
1997-09-07 07:04:48 +02:00
|
|
|
}
|
2006-08-24 03:18:34 +02:00
|
|
|
so->markItemIndex = -1;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* Reset the scan keys. Note that keys ordering stuff moved to _bt_first.
|
|
|
|
* - vadim 05/05/97
|
1997-09-07 07:04:48 +02:00
|
|
|
*/
|
2003-03-24 00:01:03 +01:00
|
|
|
if (scankey && scan->numberOfKeys > 0)
|
1997-09-07 07:04:48 +02:00
|
|
|
memmove(scan->keyData,
|
|
|
|
scankey,
|
|
|
|
scan->numberOfKeys * sizeof(ScanKeyData));
|
2003-11-12 22:15:59 +01:00
|
|
|
so->numberOfKeys = 0; /* until _bt_preprocess_keys sets it */
|
1996-07-30 09:56:04 +02:00
|
|
|
|
2000-06-14 07:24:50 +02:00
|
|
|
PG_RETURN_VOID();
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
1997-09-07 07:04:48 +02:00
|
|
|
* btendscan() -- close down a scan
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
2000-06-13 09:35:40 +02:00
|
|
|
Datum
|
|
|
|
btendscan(PG_FUNCTION_ARGS)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2001-03-22 05:01:46 +01:00
|
|
|
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
|
2006-05-07 03:21:30 +02:00
|
|
|
BTScanOpaque so = (BTScanOpaque) scan->opaque;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2000-07-21 08:42:39 +02:00
|
|
|
/* we aren't holding any read locks, but gotta drop the pins */
|
2006-05-07 03:21:30 +02:00
|
|
|
if (BTScanPosIsValid(so->currPos))
|
1997-09-07 07:04:48 +02:00
|
|
|
{
|
2006-05-07 03:21:30 +02:00
|
|
|
/* Before leaving current page, deal with any killed items */
|
|
|
|
if (so->numKilled > 0)
|
|
|
|
_bt_killitems(scan, false);
|
|
|
|
ReleaseBuffer(so->currPos.buf);
|
|
|
|
so->currPos.buf = InvalidBuffer;
|
1997-09-07 07:04:48 +02:00
|
|
|
}
|
|
|
|
|
2006-05-07 03:21:30 +02:00
|
|
|
if (BTScanPosIsValid(so->markPos))
|
1997-09-07 07:04:48 +02:00
|
|
|
{
|
2006-05-07 03:21:30 +02:00
|
|
|
ReleaseBuffer(so->markPos.buf);
|
|
|
|
so->markPos.buf = InvalidBuffer;
|
1997-09-07 07:04:48 +02:00
|
|
|
}
|
2006-08-24 03:18:34 +02:00
|
|
|
so->markItemIndex = -1;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2006-05-07 03:21:30 +02:00
|
|
|
if (so->killedItems != NULL)
|
|
|
|
pfree(so->killedItems);
|
2004-01-07 19:56:30 +01:00
|
|
|
if (so->keyData != NULL)
|
1997-09-07 07:04:48 +02:00
|
|
|
pfree(so->keyData);
|
|
|
|
pfree(so);
|
|
|
|
|
2000-06-14 07:24:50 +02:00
|
|
|
PG_RETURN_VOID();
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
1997-09-07 07:04:48 +02:00
|
|
|
* btmarkpos() -- save current scan position
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
2000-06-13 09:35:40 +02:00
|
|
|
Datum
|
|
|
|
btmarkpos(PG_FUNCTION_ARGS)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2001-03-22 05:01:46 +01:00
|
|
|
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
|
2006-05-07 03:21:30 +02:00
|
|
|
BTScanOpaque so = (BTScanOpaque) scan->opaque;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2000-07-21 08:42:39 +02:00
|
|
|
/* we aren't holding any read locks, but gotta drop the pin */
|
2006-05-07 03:21:30 +02:00
|
|
|
if (BTScanPosIsValid(so->markPos))
|
1997-09-07 07:04:48 +02:00
|
|
|
{
|
2006-05-07 03:21:30 +02:00
|
|
|
ReleaseBuffer(so->markPos.buf);
|
|
|
|
so->markPos.buf = InvalidBuffer;
|
1997-09-07 07:04:48 +02:00
|
|
|
}
|
|
|
|
|
2006-08-24 03:18:34 +02:00
|
|
|
/*
|
|
|
|
* Just record the current itemIndex. If we later step to next page
|
2006-10-04 02:30:14 +02:00
|
|
|
* before releasing the marked position, _bt_steppage makes a full copy of
|
|
|
|
* the currPos struct in markPos. If (as often happens) the mark is moved
|
|
|
|
* before we leave the page, we don't have to do that work.
|
2006-08-24 03:18:34 +02:00
|
|
|
*/
|
2006-05-07 03:21:30 +02:00
|
|
|
if (BTScanPosIsValid(so->currPos))
|
2006-08-24 03:18:34 +02:00
|
|
|
so->markItemIndex = so->currPos.itemIndex;
|
|
|
|
else
|
|
|
|
so->markItemIndex = -1;
|
2000-06-13 09:35:40 +02:00
|
|
|
|
2000-06-14 07:24:50 +02:00
|
|
|
PG_RETURN_VOID();
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
1997-09-07 07:04:48 +02:00
|
|
|
* btrestrpos() -- restore scan to last saved position
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
2000-06-13 09:35:40 +02:00
|
|
|
Datum
|
|
|
|
btrestrpos(PG_FUNCTION_ARGS)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2001-03-22 05:01:46 +01:00
|
|
|
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
|
2006-05-07 03:21:30 +02:00
|
|
|
BTScanOpaque so = (BTScanOpaque) scan->opaque;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2006-08-24 03:18:34 +02:00
|
|
|
if (so->markItemIndex >= 0)
|
1997-09-07 07:04:48 +02:00
|
|
|
{
|
2006-08-24 03:18:34 +02:00
|
|
|
/*
|
2006-10-04 02:30:14 +02:00
|
|
|
* The mark position is on the same page we are currently on. Just
|
|
|
|
* restore the itemIndex.
|
2006-08-24 03:18:34 +02:00
|
|
|
*/
|
|
|
|
so->currPos.itemIndex = so->markItemIndex;
|
2006-10-04 02:30:14 +02:00
|
|
|
}
|
2006-08-24 03:18:34 +02:00
|
|
|
else
|
1997-09-07 07:04:48 +02:00
|
|
|
{
|
2006-08-24 03:18:34 +02:00
|
|
|
/* we aren't holding any read locks, but gotta drop the pin */
|
|
|
|
if (BTScanPosIsValid(so->currPos))
|
|
|
|
{
|
|
|
|
/* Before leaving current page, deal with any killed items */
|
|
|
|
if (so->numKilled > 0 &&
|
|
|
|
so->currPos.buf != so->markPos.buf)
|
|
|
|
_bt_killitems(scan, false);
|
|
|
|
ReleaseBuffer(so->currPos.buf);
|
|
|
|
so->currPos.buf = InvalidBuffer;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (BTScanPosIsValid(so->markPos))
|
|
|
|
{
|
|
|
|
/* bump pin on mark buffer for assignment to current buffer */
|
|
|
|
IncrBufferRefCount(so->markPos.buf);
|
|
|
|
memcpy(&so->currPos, &so->markPos,
|
|
|
|
offsetof(BTScanPosData, items[1]) +
|
|
|
|
so->markPos.lastItem * sizeof(BTScanPosItem));
|
|
|
|
}
|
1997-09-07 07:04:48 +02:00
|
|
|
}
|
2000-06-13 09:35:40 +02:00
|
|
|
|
2000-06-14 07:24:50 +02:00
|
|
|
PG_RETURN_VOID();
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
/*
|
|
|
|
* Bulk deletion of all index entries pointing to a set of heap tuples.
|
|
|
|
* The set of target tuples is specified via a callback routine that tells
|
|
|
|
* whether any given heap tuple (identified by ItemPointer) is being deleted.
|
|
|
|
*
|
|
|
|
* Result: a palloc'd struct containing statistical info for VACUUM displays.
|
|
|
|
*/
|
2000-06-13 09:35:40 +02:00
|
|
|
Datum
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
btbulkdelete(PG_FUNCTION_ARGS)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2006-05-03 00:25:10 +02:00
|
|
|
IndexVacuumInfo *info = (IndexVacuumInfo *) PG_GETARG_POINTER(0);
|
2006-10-04 02:30:14 +02:00
|
|
|
IndexBulkDeleteResult *volatile stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(1);
|
2006-05-03 00:25:10 +02:00
|
|
|
IndexBulkDeleteCallback callback = (IndexBulkDeleteCallback) PG_GETARG_POINTER(2);
|
|
|
|
void *callback_state = (void *) PG_GETARG_POINTER(3);
|
|
|
|
Relation rel = info->index;
|
2006-05-08 02:00:17 +02:00
|
|
|
BTCycleId cycleid;
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
|
2006-05-08 02:00:17 +02:00
|
|
|
/* allocate stats if first time through, else re-use existing struct */
|
|
|
|
if (stats == NULL)
|
|
|
|
stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
|
2006-02-12 00:31:34 +01:00
|
|
|
|
2006-05-08 02:00:17 +02:00
|
|
|
/* Establish the vacuum cycle ID to use for this scan */
|
2008-04-17 01:59:40 +02:00
|
|
|
/* The ENSURE stuff ensures we clean up shared memory on failure */
|
|
|
|
PG_ENSURE_ERROR_CLEANUP(_bt_end_vacuum_callback, PointerGetDatum(rel));
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
{
|
2006-05-08 02:00:17 +02:00
|
|
|
cycleid = _bt_start_vacuum(rel);
|
2003-08-04 02:43:34 +02:00
|
|
|
|
2006-05-08 02:00:17 +02:00
|
|
|
btvacuumscan(info, stats, callback, callback_state, cycleid);
|
|
|
|
}
|
2008-04-17 01:59:40 +02:00
|
|
|
PG_END_ENSURE_ERROR_CLEANUP(_bt_end_vacuum_callback, PointerGetDatum(rel));
|
|
|
|
_bt_end_vacuum(rel);
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
|
2006-05-03 00:25:10 +02:00
|
|
|
PG_RETURN_POINTER(stats);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
1998-07-30 07:05:05 +02:00
|
|
|
|
2003-02-22 01:45:05 +01:00
|
|
|
/*
|
|
|
|
* Post-VACUUM cleanup.
|
|
|
|
*
|
|
|
|
* Result: a palloc'd struct containing statistical info for VACUUM displays.
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
btvacuumcleanup(PG_FUNCTION_ARGS)
|
|
|
|
{
|
2006-05-03 00:25:10 +02:00
|
|
|
IndexVacuumInfo *info = (IndexVacuumInfo *) PG_GETARG_POINTER(0);
|
|
|
|
IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(1);
|
2003-02-22 01:45:05 +01:00
|
|
|
|
2009-03-24 21:17:18 +01:00
|
|
|
/* No-op in ANALYZE ONLY mode */
|
|
|
|
if (info->analyze_only)
|
|
|
|
PG_RETURN_POINTER(stats);
|
|
|
|
|
2006-05-08 02:00:17 +02:00
|
|
|
/*
|
2006-10-04 02:30:14 +02:00
|
|
|
* If btbulkdelete was called, we need not do anything, just return the
|
|
|
|
* stats from the latest btbulkdelete call. If it wasn't called, we must
|
|
|
|
* still do a pass over the index, to recycle any newly-recyclable pages
|
|
|
|
* and to obtain index statistics.
|
2006-05-08 02:00:17 +02:00
|
|
|
*
|
|
|
|
* Since we aren't going to actually delete any leaf items, there's no
|
|
|
|
* need to go through all the vacuum-cycle-ID pushups.
|
|
|
|
*/
|
2006-05-03 00:25:10 +02:00
|
|
|
if (stats == NULL)
|
2006-05-08 02:00:17 +02:00
|
|
|
{
|
2006-05-03 00:25:10 +02:00
|
|
|
stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
|
2006-05-08 02:00:17 +02:00
|
|
|
btvacuumscan(info, stats, NULL, NULL, 0);
|
|
|
|
}
|
2003-02-22 01:45:05 +01:00
|
|
|
|
2008-10-06 10:04:11 +02:00
|
|
|
/* Finally, vacuum the FSM */
|
|
|
|
IndexFreeSpaceMapVacuum(info->index);
|
|
|
|
|
2005-05-07 23:32:24 +02:00
|
|
|
/*
|
2006-05-08 02:00:17 +02:00
|
|
|
* During a non-FULL vacuum it's quite possible for us to be fooled by
|
|
|
|
* concurrent page splits into double-counting some index tuples, so
|
2006-10-04 02:30:14 +02:00
|
|
|
* disbelieve any total that exceeds the underlying heap's count. (We
|
|
|
|
* can't check this during btbulkdelete.)
|
2005-05-07 23:32:24 +02:00
|
|
|
*/
|
2006-05-08 02:00:17 +02:00
|
|
|
if (!info->vacuum_full)
|
|
|
|
{
|
|
|
|
if (stats->num_index_tuples > info->num_heap_tuples)
|
|
|
|
stats->num_index_tuples = info->num_heap_tuples;
|
|
|
|
}
|
2005-05-07 23:32:24 +02:00
|
|
|
|
2006-05-08 02:00:17 +02:00
|
|
|
PG_RETURN_POINTER(stats);
|
|
|
|
}
|
2003-02-22 01:45:05 +01:00
|
|
|
|
2006-05-08 02:00:17 +02:00
|
|
|
/*
|
|
|
|
* btvacuumscan --- scan the index for VACUUMing purposes
|
|
|
|
*
|
|
|
|
* This combines the functions of looking for leaf tuples that are deletable
|
|
|
|
* according to the vacuum callback, looking for empty pages that can be
|
|
|
|
* deleted, and looking for old deleted pages that can be recycled. Both
|
|
|
|
* btbulkdelete and btvacuumcleanup invoke this (the latter only if no
|
|
|
|
* btbulkdelete call occurred).
|
|
|
|
*
|
|
|
|
* The caller is responsible for initially allocating/zeroing a stats struct
|
|
|
|
* and for obtaining a vacuum cycle ID if necessary.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
|
|
|
|
IndexBulkDeleteCallback callback, void *callback_state,
|
|
|
|
BTCycleId cycleid)
|
|
|
|
{
|
|
|
|
Relation rel = info->index;
|
|
|
|
BTVacState vstate;
|
|
|
|
BlockNumber num_pages;
|
|
|
|
BlockNumber blkno;
|
|
|
|
bool needLock;
|
2005-05-07 23:32:24 +02:00
|
|
|
|
2006-05-08 02:00:17 +02:00
|
|
|
/*
|
2006-10-04 02:30:14 +02:00
|
|
|
* Reset counts that will be incremented during the scan; needed in case
|
|
|
|
* of multiple scans during a single VACUUM command
|
2006-05-08 02:00:17 +02:00
|
|
|
*/
|
|
|
|
stats->num_index_tuples = 0;
|
|
|
|
stats->pages_deleted = 0;
|
|
|
|
|
|
|
|
/* Set up info to pass down to btvacuumpage */
|
|
|
|
vstate.info = info;
|
|
|
|
vstate.stats = stats;
|
|
|
|
vstate.callback = callback;
|
|
|
|
vstate.callback_state = callback_state;
|
|
|
|
vstate.cycleid = cycleid;
|
2008-09-30 12:52:14 +02:00
|
|
|
vstate.lastUsedPage = BTREE_METAPAGE;
|
2006-09-21 22:31:22 +02:00
|
|
|
vstate.totFreePages = 0;
|
2003-02-22 01:45:05 +01:00
|
|
|
|
2003-02-23 07:17:13 +01:00
|
|
|
/* Create a temporary memory context to run _bt_pagedel in */
|
2006-05-08 02:00:17 +02:00
|
|
|
vstate.pagedelcontext = AllocSetContextCreate(CurrentMemoryContext,
|
|
|
|
"_bt_pagedel",
|
|
|
|
ALLOCSET_DEFAULT_MINSIZE,
|
|
|
|
ALLOCSET_DEFAULT_INITSIZE,
|
|
|
|
ALLOCSET_DEFAULT_MAXSIZE);
|
2003-02-23 07:17:13 +01:00
|
|
|
|
2003-02-22 01:45:05 +01:00
|
|
|
/*
|
2006-10-04 02:30:14 +02:00
|
|
|
* The outer loop iterates over all index pages except the metapage, in
|
|
|
|
* physical order (we hope the kernel will cooperate in providing
|
2006-05-08 02:00:17 +02:00
|
|
|
* read-ahead for speed). It is critical that we visit all leaf pages,
|
|
|
|
* including ones added after we start the scan, else we might fail to
|
|
|
|
* delete some deletable tuples. Hence, we must repeatedly check the
|
|
|
|
* relation length. We must acquire the relation-extension lock while
|
|
|
|
* doing so to avoid a race condition: if someone else is extending the
|
|
|
|
* relation, there is a window where bufmgr/smgr have created a new
|
2006-10-04 02:30:14 +02:00
|
|
|
* all-zero page but it hasn't yet been write-locked by _bt_getbuf(). If
|
|
|
|
* we manage to scan such a page here, we'll improperly assume it can be
|
|
|
|
* recycled. Taking the lock synchronizes things enough to prevent a
|
2006-05-08 02:00:17 +02:00
|
|
|
* problem: either num_pages won't include the new page, or _bt_getbuf
|
|
|
|
* already has write lock on the buffer and it will be fully initialized
|
|
|
|
* before we can examine it. (See also vacuumlazy.c, which has the same
|
2006-10-04 02:30:14 +02:00
|
|
|
* issue.) Also, we need not worry if a page is added immediately after
|
2006-05-08 02:00:17 +02:00
|
|
|
* we look; the page splitting code already has write-lock on the left
|
2006-10-04 02:30:14 +02:00
|
|
|
* page before it adds a right page, so we must already have processed any
|
|
|
|
* tuples due to be moved into such a page.
|
2006-05-08 02:00:17 +02:00
|
|
|
*
|
|
|
|
* We can skip locking for new or temp relations, however, since no one
|
|
|
|
* else could be accessing them.
|
2003-02-22 01:45:05 +01:00
|
|
|
*/
|
2006-05-08 02:00:17 +02:00
|
|
|
needLock = !RELATION_IS_LOCAL(rel);
|
2006-02-14 18:20:01 +01:00
|
|
|
|
2006-05-08 02:00:17 +02:00
|
|
|
blkno = BTREE_METAPAGE + 1;
|
|
|
|
for (;;)
|
|
|
|
{
|
|
|
|
/* Get the current relation length */
|
|
|
|
if (needLock)
|
|
|
|
LockRelationForExtension(rel, ExclusiveLock);
|
|
|
|
num_pages = RelationGetNumberOfBlocks(rel);
|
|
|
|
if (needLock)
|
|
|
|
UnlockRelationForExtension(rel, ExclusiveLock);
|
|
|
|
|
|
|
|
/* Quit if we've scanned the whole relation */
|
|
|
|
if (blkno >= num_pages)
|
|
|
|
break;
|
|
|
|
/* Iterate over pages, then loop back to recheck length */
|
|
|
|
for (; blkno < num_pages; blkno++)
|
2006-02-12 01:18:17 +01:00
|
|
|
{
|
2006-05-08 02:00:17 +02:00
|
|
|
btvacuumpage(&vstate, blkno, blkno);
|
2006-02-12 01:18:17 +01:00
|
|
|
}
|
2003-02-22 01:45:05 +01:00
|
|
|
}
|
|
|
|
|
2003-02-24 01:57:17 +01:00
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* During VACUUM FULL, we truncate off any recyclable pages at the end of
|
|
|
|
* the index. In a normal vacuum it'd be unsafe to do this except by
|
|
|
|
* acquiring exclusive lock on the index and then rechecking all the
|
|
|
|
* pages; doesn't seem worth it.
|
2003-02-24 01:57:17 +01:00
|
|
|
*/
|
2008-09-30 12:52:14 +02:00
|
|
|
if (info->vacuum_full && vstate.lastUsedPage < num_pages - 1)
|
2003-02-24 01:57:17 +01:00
|
|
|
{
|
2008-09-30 12:52:14 +02:00
|
|
|
BlockNumber new_pages = vstate.lastUsedPage + 1;
|
2004-12-01 20:00:56 +01:00
|
|
|
|
2008-09-30 12:52:14 +02:00
|
|
|
/*
|
|
|
|
* Okay to truncate.
|
|
|
|
*/
|
|
|
|
RelationTruncate(rel, new_pages);
|
2004-12-01 20:00:56 +01:00
|
|
|
|
2008-09-30 12:52:14 +02:00
|
|
|
/* update statistics */
|
|
|
|
stats->pages_removed += num_pages - new_pages;
|
|
|
|
vstate.totFreePages -= (num_pages - new_pages);
|
|
|
|
num_pages = new_pages;
|
2003-02-24 01:57:17 +01:00
|
|
|
}
|
|
|
|
|
2006-05-08 02:00:17 +02:00
|
|
|
MemoryContextDelete(vstate.pagedelcontext);
|
2003-02-23 07:17:13 +01:00
|
|
|
|
2003-02-22 01:45:05 +01:00
|
|
|
/* update statistics */
|
|
|
|
stats->num_pages = num_pages;
|
2006-09-21 22:31:22 +02:00
|
|
|
stats->pages_free = vstate.totFreePages;
|
2006-05-08 02:00:17 +02:00
|
|
|
}
|
2003-02-22 01:45:05 +01:00
|
|
|
|
2006-05-08 02:00:17 +02:00
|
|
|
/*
|
|
|
|
* btvacuumpage --- VACUUM one page
|
|
|
|
*
|
|
|
|
* This processes a single page for btvacuumscan(). In some cases we
|
|
|
|
* must go back and re-examine previously-scanned pages; this routine
|
|
|
|
* recurses when necessary to handle that case.
|
|
|
|
*
|
|
|
|
* blkno is the page to process. orig_blkno is the highest block number
|
|
|
|
* reached by the outer btvacuumscan loop (the same as blkno, unless we
|
|
|
|
* are recursing to re-examine a previous page).
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
btvacuumpage(BTVacState *vstate, BlockNumber blkno, BlockNumber orig_blkno)
|
|
|
|
{
|
|
|
|
IndexVacuumInfo *info = vstate->info;
|
|
|
|
IndexBulkDeleteResult *stats = vstate->stats;
|
|
|
|
IndexBulkDeleteCallback callback = vstate->callback;
|
|
|
|
void *callback_state = vstate->callback_state;
|
|
|
|
Relation rel = info->index;
|
|
|
|
bool delete_now;
|
2006-10-04 02:30:14 +02:00
|
|
|
BlockNumber recurse_to;
|
2006-05-08 02:00:17 +02:00
|
|
|
Buffer buf;
|
|
|
|
Page page;
|
|
|
|
BTPageOpaque opaque;
|
|
|
|
|
|
|
|
restart:
|
|
|
|
delete_now = false;
|
|
|
|
recurse_to = P_NONE;
|
|
|
|
|
|
|
|
/* call vacuum_delay_point while not holding any buffer lock */
|
|
|
|
vacuum_delay_point();
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We can't use _bt_getbuf() here because it always applies
|
|
|
|
* _bt_checkpage(), which will barf on an all-zero page. We want to
|
2007-05-30 22:12:03 +02:00
|
|
|
* recycle all-zero pages, not fail. Also, we want to use a nondefault
|
|
|
|
* buffer access strategy.
|
2006-05-08 02:00:17 +02:00
|
|
|
*/
|
Unite ReadBufferWithFork, ReadBufferWithStrategy, and ZeroOrReadBuffer
functions into one ReadBufferExtended function, that takes the strategy
and mode as argument. There's three modes, RBM_NORMAL which is the default
used by plain ReadBuffer(), RBM_ZERO, which replaces ZeroOrReadBuffer, and
a new mode RBM_ZERO_ON_ERROR, which allows callers to read corrupt pages
without throwing an error. The FSM needs the new mode to recover from
corrupt pages, which could happend if we crash after extending an FSM file,
and the new page is "torn".
Add fork number to some error messages in bufmgr.c, that still lacked it.
2008-10-31 16:05:00 +01:00
|
|
|
buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
|
|
|
|
info->strategy);
|
2006-05-08 02:00:17 +02:00
|
|
|
LockBuffer(buf, BT_READ);
|
|
|
|
page = BufferGetPage(buf);
|
|
|
|
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
|
|
|
if (!PageIsNew(page))
|
|
|
|
_bt_checkpage(rel, buf);
|
|
|
|
|
|
|
|
/*
|
2006-10-04 02:30:14 +02:00
|
|
|
* If we are recursing, the only case we want to do anything with is a
|
|
|
|
* live leaf page having the current vacuum cycle ID. Any other state
|
2008-09-30 12:52:14 +02:00
|
|
|
* implies we already saw the page (eg, deleted it as being empty).
|
2006-05-08 02:00:17 +02:00
|
|
|
*/
|
|
|
|
if (blkno != orig_blkno)
|
|
|
|
{
|
|
|
|
if (_bt_page_recyclable(page) ||
|
2006-11-01 20:43:17 +01:00
|
|
|
P_IGNORE(opaque) ||
|
2006-05-08 02:00:17 +02:00
|
|
|
!P_ISLEAF(opaque) ||
|
|
|
|
opaque->btpo_cycleid != vstate->cycleid)
|
|
|
|
{
|
|
|
|
_bt_relbuf(rel, buf);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-09-30 12:52:14 +02:00
|
|
|
/* If the page is in use, update lastUsedPage */
|
|
|
|
if (!_bt_page_recyclable(page) && vstate->lastUsedPage < blkno)
|
|
|
|
vstate->lastUsedPage = blkno;
|
|
|
|
|
2006-05-08 02:00:17 +02:00
|
|
|
/* Page is valid, see what to do with it */
|
|
|
|
if (_bt_page_recyclable(page))
|
|
|
|
{
|
|
|
|
/* Okay to recycle this page */
|
2008-09-30 12:52:14 +02:00
|
|
|
RecordFreeIndexPage(rel, blkno);
|
2006-09-21 22:31:22 +02:00
|
|
|
vstate->totFreePages++;
|
2006-05-08 02:00:17 +02:00
|
|
|
stats->pages_deleted++;
|
|
|
|
}
|
|
|
|
else if (P_ISDELETED(opaque))
|
|
|
|
{
|
|
|
|
/* Already deleted, but can't recycle yet */
|
|
|
|
stats->pages_deleted++;
|
|
|
|
}
|
2006-11-01 20:43:17 +01:00
|
|
|
else if (P_ISHALFDEAD(opaque))
|
2006-05-08 02:00:17 +02:00
|
|
|
{
|
|
|
|
/* Half-dead, try to delete */
|
|
|
|
delete_now = true;
|
|
|
|
}
|
|
|
|
else if (P_ISLEAF(opaque))
|
|
|
|
{
|
|
|
|
OffsetNumber deletable[MaxOffsetNumber];
|
|
|
|
int ndeletable;
|
|
|
|
OffsetNumber offnum,
|
2006-10-04 02:30:14 +02:00
|
|
|
minoff,
|
|
|
|
maxoff;
|
2006-05-08 02:00:17 +02:00
|
|
|
|
|
|
|
/*
|
2006-10-04 02:30:14 +02:00
|
|
|
* Trade in the initial read lock for a super-exclusive write lock on
|
|
|
|
* this page. We must get such a lock on every leaf page over the
|
|
|
|
* course of the vacuum scan, whether or not it actually contains any
|
|
|
|
* deletable tuples --- see nbtree/README.
|
2006-05-08 02:00:17 +02:00
|
|
|
*/
|
|
|
|
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
|
|
|
|
LockBufferForCleanup(buf);
|
|
|
|
|
|
|
|
/*
|
2006-10-04 02:30:14 +02:00
|
|
|
* Check whether we need to recurse back to earlier pages. What we
|
|
|
|
* are concerned about is a page split that happened since we started
|
|
|
|
* the vacuum scan. If the split moved some tuples to a lower page
|
|
|
|
* then we might have missed 'em. If so, set up for tail recursion.
|
|
|
|
* (Must do this before possibly clearing btpo_cycleid below!)
|
2006-05-08 02:00:17 +02:00
|
|
|
*/
|
|
|
|
if (vstate->cycleid != 0 &&
|
|
|
|
opaque->btpo_cycleid == vstate->cycleid &&
|
|
|
|
!(opaque->btpo_flags & BTP_SPLIT_END) &&
|
|
|
|
!P_RIGHTMOST(opaque) &&
|
|
|
|
opaque->btpo_next < orig_blkno)
|
|
|
|
recurse_to = opaque->btpo_next;
|
|
|
|
|
|
|
|
/*
|
2006-10-04 02:30:14 +02:00
|
|
|
* Scan over all items to see which ones need deleted according to the
|
|
|
|
* callback function.
|
2006-05-08 02:00:17 +02:00
|
|
|
*/
|
|
|
|
ndeletable = 0;
|
|
|
|
minoff = P_FIRSTDATAKEY(opaque);
|
|
|
|
maxoff = PageGetMaxOffsetNumber(page);
|
|
|
|
if (callback)
|
|
|
|
{
|
|
|
|
for (offnum = minoff;
|
|
|
|
offnum <= maxoff;
|
|
|
|
offnum = OffsetNumberNext(offnum))
|
|
|
|
{
|
|
|
|
IndexTuple itup;
|
|
|
|
ItemPointer htup;
|
|
|
|
|
|
|
|
itup = (IndexTuple) PageGetItem(page,
|
|
|
|
PageGetItemId(page, offnum));
|
|
|
|
htup = &(itup->t_tid);
|
|
|
|
if (callback(htup, callback_state))
|
|
|
|
deletable[ndeletable++] = offnum;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2006-10-04 02:30:14 +02:00
|
|
|
* Apply any needed deletes. We issue just one _bt_delitems() call
|
|
|
|
* per page, so as to minimize WAL traffic.
|
2006-05-08 02:00:17 +02:00
|
|
|
*/
|
|
|
|
if (ndeletable > 0)
|
|
|
|
{
|
|
|
|
_bt_delitems(rel, buf, deletable, ndeletable);
|
|
|
|
stats->tuples_removed += ndeletable;
|
|
|
|
/* must recompute maxoff */
|
|
|
|
maxoff = PageGetMaxOffsetNumber(page);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* If the page has been split during this vacuum cycle, it seems
|
|
|
|
* worth expending a write to clear btpo_cycleid even if we don't
|
|
|
|
* have any deletions to do. (If we do, _bt_delitems takes care
|
|
|
|
* of this.) This ensures we won't process the page again.
|
|
|
|
*
|
2006-10-04 02:30:14 +02:00
|
|
|
* We treat this like a hint-bit update because there's no need to
|
|
|
|
* WAL-log it.
|
2006-05-08 02:00:17 +02:00
|
|
|
*/
|
|
|
|
if (vstate->cycleid != 0 &&
|
|
|
|
opaque->btpo_cycleid == vstate->cycleid)
|
|
|
|
{
|
|
|
|
opaque->btpo_cycleid = 0;
|
|
|
|
SetBufferCommitInfoNeedsSave(buf);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2006-10-04 02:30:14 +02:00
|
|
|
* If it's now empty, try to delete; else count the live tuples. We
|
|
|
|
* don't delete when recursing, though, to avoid putting entries into
|
|
|
|
* freePages out-of-order (doesn't seem worth any extra code to handle
|
|
|
|
* the case).
|
2006-05-08 02:00:17 +02:00
|
|
|
*/
|
|
|
|
if (minoff > maxoff)
|
|
|
|
delete_now = (blkno == orig_blkno);
|
|
|
|
else
|
|
|
|
stats->num_index_tuples += maxoff - minoff + 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (delete_now)
|
|
|
|
{
|
|
|
|
MemoryContext oldcontext;
|
|
|
|
int ndel;
|
|
|
|
|
|
|
|
/* Run pagedel in a temp context to avoid memory leakage */
|
|
|
|
MemoryContextReset(vstate->pagedelcontext);
|
|
|
|
oldcontext = MemoryContextSwitchTo(vstate->pagedelcontext);
|
|
|
|
|
2006-11-01 20:43:17 +01:00
|
|
|
ndel = _bt_pagedel(rel, buf, NULL, info->vacuum_full);
|
2006-05-08 02:00:17 +02:00
|
|
|
|
|
|
|
/* count only this page, else may double-count parent */
|
|
|
|
if (ndel)
|
|
|
|
stats->pages_deleted++;
|
|
|
|
|
|
|
|
/*
|
2006-10-04 02:30:14 +02:00
|
|
|
* During VACUUM FULL it's okay to recycle deleted pages immediately,
|
|
|
|
* since there can be no other transactions scanning the index. Note
|
|
|
|
* that we will only recycle the current page and not any parent pages
|
|
|
|
* that _bt_pagedel might have recursed to; this seems reasonable in
|
|
|
|
* the name of simplicity. (Trying to do otherwise would mean we'd
|
|
|
|
* have to sort the list of recyclable pages we're building.)
|
2006-05-08 02:00:17 +02:00
|
|
|
*/
|
|
|
|
if (ndel && info->vacuum_full)
|
|
|
|
{
|
2008-09-30 12:52:14 +02:00
|
|
|
RecordFreeIndexPage(rel, blkno);
|
2006-09-21 22:31:22 +02:00
|
|
|
vstate->totFreePages++;
|
2006-05-08 02:00:17 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
MemoryContextSwitchTo(oldcontext);
|
|
|
|
/* pagedel released buffer, so we shouldn't */
|
|
|
|
}
|
|
|
|
else
|
|
|
|
_bt_relbuf(rel, buf);
|
|
|
|
|
|
|
|
/*
|
2006-10-04 02:30:14 +02:00
|
|
|
* This is really tail recursion, but if the compiler is too stupid to
|
|
|
|
* optimize it as such, we'd eat an uncomfortably large amount of stack
|
|
|
|
* space per recursion level (due to the deletable[] array). A failure is
|
|
|
|
* improbable since the number of levels isn't likely to be large ... but
|
|
|
|
* just in case, let's hand-optimize into a loop.
|
2006-05-08 02:00:17 +02:00
|
|
|
*/
|
|
|
|
if (recurse_to != P_NONE)
|
|
|
|
{
|
|
|
|
blkno = recurse_to;
|
|
|
|
goto restart;
|
|
|
|
}
|
2003-02-22 01:45:05 +01:00
|
|
|
}
|