1996-07-09 08:22:35 +02:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
1999-10-18 00:15:09 +02:00
|
|
|
* nbtree.c
|
1997-09-07 07:04:48 +02:00
|
|
|
* Implementation of Lehman and Yao's btree management algorithm for
|
|
|
|
* Postgres.
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
1999-10-18 00:15:09 +02:00
|
|
|
* NOTES
|
|
|
|
* This file contains only the public interface routines.
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
|
|
|
*
|
2001-01-24 20:43:33 +01:00
|
|
|
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
|
2000-01-26 06:58:53 +01:00
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
1999-10-18 00:15:09 +02:00
|
|
|
* IDENTIFICATION
|
2001-11-11 00:51:14 +01:00
|
|
|
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.85 2001/11/10 23:51:13 tgl Exp $
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
|
1999-07-16 01:04:24 +02:00
|
|
|
#include "postgres.h"
|
1996-11-05 11:35:38 +01:00
|
|
|
|
1999-07-16 01:04:24 +02:00
|
|
|
#include "access/genam.h"
|
|
|
|
#include "access/heapam.h"
|
1999-07-16 07:00:38 +02:00
|
|
|
#include "access/nbtree.h"
|
1999-07-16 01:04:24 +02:00
|
|
|
#include "catalog/index.h"
|
1999-07-16 07:00:38 +02:00
|
|
|
#include "executor/executor.h"
|
1999-07-16 01:04:24 +02:00
|
|
|
#include "miscadmin.h"
|
2000-08-10 04:33:20 +02:00
|
|
|
#include "storage/sinval.h"
|
2001-01-26 02:24:31 +01:00
|
|
|
#include "access/xlogutils.h"
|
2000-07-21 08:42:39 +02:00
|
|
|
|
2001-03-22 05:01:46 +01:00
|
|
|
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
/* Working state for btbuild and its callback */
|
|
|
|
typedef struct
|
|
|
|
{
|
|
|
|
bool usefast;
|
|
|
|
bool isUnique;
|
|
|
|
bool haveDead;
|
|
|
|
Relation heapRel;
|
|
|
|
BTSpool *spool;
|
2001-10-25 07:50:21 +02:00
|
|
|
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
/*
|
|
|
|
* spool2 is needed only when the index is an unique index. Dead
|
|
|
|
* tuples are put into spool2 instead of spool in order to avoid
|
|
|
|
* uniqueness check.
|
|
|
|
*/
|
|
|
|
BTSpool *spool2;
|
|
|
|
double indtuples;
|
|
|
|
} BTBuildState;
|
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
bool BuildingBtree = false; /* see comment in btbuild() */
|
|
|
|
bool FastBuild = true; /* use SORT instead of insertion build */
|
2001-01-26 02:24:31 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* TEMPORARY FLAG FOR TESTING NEW FIX TREE
|
|
|
|
* CODE WITHOUT AFFECTING ANYONE ELSE
|
|
|
|
*/
|
2001-02-08 00:35:33 +01:00
|
|
|
bool FixBTree = true;
|
2000-10-21 17:43:36 +02:00
|
|
|
|
1998-09-01 06:40:42 +02:00
|
|
|
static void _bt_restscan(IndexScanDesc scan);
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
static void btbuildCallback(Relation index,
|
2001-10-25 07:50:21 +02:00
|
|
|
HeapTuple htup,
|
|
|
|
Datum *attdata,
|
|
|
|
char *nulls,
|
|
|
|
bool tupleIsAlive,
|
|
|
|
void *state);
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* AtEOXact_nbtree() --- clean up nbtree subsystem at xact abort or commit.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
AtEOXact_nbtree(void)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Note: these actions should only be necessary during xact abort; but
|
|
|
|
* they can't hurt during a commit.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* If we were building a btree, we ain't anymore. */
|
|
|
|
BuildingBtree = false;
|
|
|
|
}
|
|
|
|
|
1998-07-30 07:05:05 +02:00
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
/*
|
1997-09-07 07:04:48 +02:00
|
|
|
* btbuild() -- build a new btree index.
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
1997-09-07 07:04:48 +02:00
|
|
|
* We use a global variable to record the fact that we're creating
|
|
|
|
* a new index. This is used to avoid high-concurrency locking,
|
|
|
|
* since the index won't be visible until this transaction commits
|
|
|
|
* and since building is guaranteed to be single-threaded.
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
2000-06-13 09:35:40 +02:00
|
|
|
Datum
|
|
|
|
btbuild(PG_FUNCTION_ARGS)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2001-03-22 05:01:46 +01:00
|
|
|
Relation heap = (Relation) PG_GETARG_POINTER(0);
|
|
|
|
Relation index = (Relation) PG_GETARG_POINTER(1);
|
|
|
|
IndexInfo *indexInfo = (IndexInfo *) PG_GETARG_POINTER(2);
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
double reltuples;
|
|
|
|
BTBuildState buildstate;
|
2001-03-22 05:01:46 +01:00
|
|
|
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
/* set flag to disable locking */
|
1997-09-07 07:04:48 +02:00
|
|
|
BuildingBtree = true;
|
|
|
|
|
What looks like some *major* improvements to btree indexing...
Patches from: aoki@CS.Berkeley.EDU (Paul M. Aoki)
i gave jolly my btree bulkload code a long, long time ago but never
gave him a bunch of my bugfixes. here's a diff against the 6.0
baseline.
for some reason, this code has slowed down somewhat relative to the
insertion-build code on very small tables. don't know why -- it used
to be within about 10%. anyway, here are some (highly unscientific!)
timings on a dec 3000/300 for synthetic tables with 10k, 100k and
1000k tuples (basically, 1mb, 10mb and 100mb heaps). 'c' means
clustered (pre-sorted) inputs and 'u' means unclustered (randomly
ordered) inputs. the 10k table basically fits in the buffer pool, but
the 100k and 1000k tables don't. as you can see, insertion build is
fine if you've sorted your heaps on your index key or if your heap
fits in core, but is absolutely horrible on unordered data (yes,
that's 7.5 hours to index 100mb of data...) because of the zillions of
random i/os.
if it doesn't work for you for whatever reason, you can always turn it
back off by flipping the FastBuild flag in nbtree.c. i don't have
time to maintain it.
good luck!
baseline code:
time psql -c 'create index c10 on k10 using btree (c int4_ops)' bttest
real 8.6
time psql -c 'create index u10 on k10 using btree (b int4_ops)' bttest
real 9.1
time psql -c 'create index c100 on k100 using btree (c int4_ops)' bttest
real 59.2
time psql -c 'create index u100 on k100 using btree (b int4_ops)' bttest
real 652.4
time psql -c 'create index c1000 on k1000 using btree (c int4_ops)' bttest
real 636.1
time psql -c 'create index u1000 on k1000 using btree (b int4_ops)' bttest
real 26772.9
bulkloading code:
time psql -c 'create index c10 on k10 using btree (c int4_ops)' bttest
real 11.3
time psql -c 'create index u10 on k10 using btree (b int4_ops)' bttest
real 10.4
time psql -c 'create index c100 on k100 using btree (c int4_ops)' bttest
real 59.5
time psql -c 'create index u100 on k100 using btree (b int4_ops)' bttest
real 63.5
time psql -c 'create index c1000 on k1000 using btree (c int4_ops)' bttest
real 636.9
time psql -c 'create index u1000 on k1000 using btree (b int4_ops)' bttest
real 701.0
1997-02-12 06:04:52 +01:00
|
|
|
/*
|
1997-09-07 07:04:48 +02:00
|
|
|
* bootstrap processing does something strange, so don't use
|
|
|
|
* sort/build for initial catalog indices. at some point i need to
|
|
|
|
* look harder at this. (there is some kind of incremental processing
|
|
|
|
* going on there.) -- pma 08/29/95
|
What looks like some *major* improvements to btree indexing...
Patches from: aoki@CS.Berkeley.EDU (Paul M. Aoki)
i gave jolly my btree bulkload code a long, long time ago but never
gave him a bunch of my bugfixes. here's a diff against the 6.0
baseline.
for some reason, this code has slowed down somewhat relative to the
insertion-build code on very small tables. don't know why -- it used
to be within about 10%. anyway, here are some (highly unscientific!)
timings on a dec 3000/300 for synthetic tables with 10k, 100k and
1000k tuples (basically, 1mb, 10mb and 100mb heaps). 'c' means
clustered (pre-sorted) inputs and 'u' means unclustered (randomly
ordered) inputs. the 10k table basically fits in the buffer pool, but
the 100k and 1000k tables don't. as you can see, insertion build is
fine if you've sorted your heaps on your index key or if your heap
fits in core, but is absolutely horrible on unordered data (yes,
that's 7.5 hours to index 100mb of data...) because of the zillions of
random i/os.
if it doesn't work for you for whatever reason, you can always turn it
back off by flipping the FastBuild flag in nbtree.c. i don't have
time to maintain it.
good luck!
baseline code:
time psql -c 'create index c10 on k10 using btree (c int4_ops)' bttest
real 8.6
time psql -c 'create index u10 on k10 using btree (b int4_ops)' bttest
real 9.1
time psql -c 'create index c100 on k100 using btree (c int4_ops)' bttest
real 59.2
time psql -c 'create index u100 on k100 using btree (b int4_ops)' bttest
real 652.4
time psql -c 'create index c1000 on k1000 using btree (c int4_ops)' bttest
real 636.1
time psql -c 'create index u1000 on k1000 using btree (b int4_ops)' bttest
real 26772.9
bulkloading code:
time psql -c 'create index c10 on k10 using btree (c int4_ops)' bttest
real 11.3
time psql -c 'create index u10 on k10 using btree (b int4_ops)' bttest
real 10.4
time psql -c 'create index c100 on k100 using btree (c int4_ops)' bttest
real 59.5
time psql -c 'create index u100 on k100 using btree (b int4_ops)' bttest
real 63.5
time psql -c 'create index c1000 on k1000 using btree (c int4_ops)' bttest
real 636.9
time psql -c 'create index u1000 on k1000 using btree (b int4_ops)' bttest
real 701.0
1997-02-12 06:04:52 +01:00
|
|
|
*/
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
buildstate.usefast = (FastBuild && IsNormalProcessingMode());
|
|
|
|
buildstate.isUnique = indexInfo->ii_Unique;
|
|
|
|
buildstate.haveDead = false;
|
|
|
|
buildstate.heapRel = heap;
|
|
|
|
buildstate.spool = NULL;
|
|
|
|
buildstate.spool2 = NULL;
|
|
|
|
buildstate.indtuples = 0;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
#ifdef BTREE_BUILD_STATS
|
2000-05-31 02:28:42 +02:00
|
|
|
if (Show_btree_build_stats)
|
1997-09-07 07:04:48 +02:00
|
|
|
ResetUsage();
|
2001-11-05 18:46:40 +01:00
|
|
|
#endif /* BTREE_BUILD_STATS */
|
1997-09-07 07:04:48 +02:00
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
/*
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
* We expect to be called exactly once for any index relation. If
|
|
|
|
* that's not the case, big trouble's what we have.
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
if (RelationGetNumberOfBlocks(index) != 0)
|
|
|
|
elog(ERROR, "%s already contains data",
|
|
|
|
RelationGetRelationName(index));
|
1997-09-07 07:04:48 +02:00
|
|
|
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
/* initialize the btree index metadata page */
|
|
|
|
_bt_metapinit(index);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
if (buildstate.usefast)
|
2000-08-10 04:33:20 +02:00
|
|
|
{
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
buildstate.spool = _bt_spoolinit(index, indexInfo->ii_Unique);
|
2001-10-25 07:50:21 +02:00
|
|
|
|
2000-08-10 04:33:20 +02:00
|
|
|
/*
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
* Different from spool, the uniqueness isn't checked for spool2.
|
2001-03-22 05:01:46 +01:00
|
|
|
*/
|
2000-08-10 04:33:20 +02:00
|
|
|
if (indexInfo->ii_Unique)
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
buildstate.spool2 = _bt_spoolinit(index, false);
|
2000-08-10 04:33:20 +02:00
|
|
|
}
|
1997-09-07 07:04:48 +02:00
|
|
|
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
/* do the heap scan */
|
|
|
|
reltuples = IndexBuildHeapScan(heap, index, indexInfo,
|
|
|
|
btbuildCallback, (void *) &buildstate);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
/* okay, all heap tuples are indexed */
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
if (buildstate.spool2 && !buildstate.haveDead)
|
2000-08-10 04:33:20 +02:00
|
|
|
{
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
/* spool2 turns out to be unnecessary */
|
|
|
|
_bt_spooldestroy(buildstate.spool2);
|
|
|
|
buildstate.spool2 = NULL;
|
2000-08-10 04:33:20 +02:00
|
|
|
}
|
1997-09-07 07:04:48 +02:00
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
/*
|
2000-04-12 19:17:23 +02:00
|
|
|
* if we are doing bottom-up btree build, finish the build by (1)
|
|
|
|
* completing the sort of the spool file, (2) inserting the sorted
|
|
|
|
* tuples into btree pages and (3) building the upper levels.
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
if (buildstate.usefast)
|
1997-09-07 07:04:48 +02:00
|
|
|
{
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
_bt_leafbuild(buildstate.spool, buildstate.spool2);
|
|
|
|
_bt_spooldestroy(buildstate.spool);
|
|
|
|
if (buildstate.spool2)
|
|
|
|
_bt_spooldestroy(buildstate.spool2);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
#ifdef BTREE_BUILD_STATS
|
2000-05-31 02:28:42 +02:00
|
|
|
if (Show_btree_build_stats)
|
1997-09-07 07:04:48 +02:00
|
|
|
{
|
2001-11-11 00:51:14 +01:00
|
|
|
ShowUsage("BTREE BUILD STATS");
|
1997-09-07 07:04:48 +02:00
|
|
|
ResetUsage();
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
2001-11-05 18:46:40 +01:00
|
|
|
#endif /* BTREE_BUILD_STATS */
|
1996-07-09 08:22:35 +02:00
|
|
|
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
/* all done */
|
|
|
|
BuildingBtree = false;
|
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
/*
|
1997-09-07 07:04:48 +02:00
|
|
|
* Since we just counted the tuples in the heap, we update its stats
|
|
|
|
* in pg_class to guarantee that the planner takes advantage of the
|
2000-04-12 19:17:23 +02:00
|
|
|
* index we just created. But, only update statistics during normal
|
|
|
|
* index definitions, not for indices on system catalogs created
|
|
|
|
* during bootstrap processing. We must close the relations before
|
|
|
|
* updating statistics to guarantee that the relcache entries are
|
|
|
|
* flushed when we increment the command counter in UpdateStats(). But
|
|
|
|
* we do not release any locks on the relations; those will be held
|
|
|
|
* until end of transaction.
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
1997-09-07 07:04:48 +02:00
|
|
|
if (IsNormalProcessingMode())
|
|
|
|
{
|
2000-04-12 19:17:23 +02:00
|
|
|
Oid hrelid = RelationGetRelid(heap);
|
|
|
|
Oid irelid = RelationGetRelid(index);
|
1999-09-18 21:08:25 +02:00
|
|
|
|
|
|
|
heap_close(heap, NoLock);
|
1997-09-07 07:04:48 +02:00
|
|
|
index_close(index);
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
UpdateStats(hrelid, reltuples);
|
|
|
|
UpdateStats(irelid, buildstate.indtuples);
|
|
|
|
}
|
|
|
|
|
|
|
|
PG_RETURN_VOID();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Per-tuple callback from IndexBuildHeapScan
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
btbuildCallback(Relation index,
|
|
|
|
HeapTuple htup,
|
|
|
|
Datum *attdata,
|
|
|
|
char *nulls,
|
|
|
|
bool tupleIsAlive,
|
|
|
|
void *state)
|
|
|
|
{
|
2001-10-25 07:50:21 +02:00
|
|
|
BTBuildState *buildstate = (BTBuildState *) state;
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
IndexTuple itup;
|
|
|
|
BTItem btitem;
|
|
|
|
InsertIndexResult res;
|
|
|
|
|
|
|
|
/* form an index tuple and point it at the heap tuple */
|
|
|
|
itup = index_formtuple(RelationGetDescr(index), attdata, nulls);
|
|
|
|
itup->t_tid = htup->t_self;
|
|
|
|
|
|
|
|
btitem = _bt_formitem(itup);
|
|
|
|
|
|
|
|
/*
|
2001-10-25 07:50:21 +02:00
|
|
|
* if we are doing bottom-up btree build, we insert the index into a
|
|
|
|
* spool file for subsequent processing. otherwise, we insert into
|
|
|
|
* the btree.
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
*/
|
|
|
|
if (buildstate->usefast)
|
|
|
|
{
|
|
|
|
if (tupleIsAlive || buildstate->spool2 == NULL)
|
|
|
|
_bt_spool(btitem, buildstate->spool);
|
|
|
|
else
|
1997-09-07 07:04:48 +02:00
|
|
|
{
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
/* dead tuples are put into spool2 */
|
|
|
|
buildstate->haveDead = true;
|
|
|
|
_bt_spool(btitem, buildstate->spool2);
|
1997-09-07 07:04:48 +02:00
|
|
|
}
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
else
|
|
|
|
{
|
|
|
|
res = _bt_doinsert(index, btitem,
|
|
|
|
buildstate->isUnique, buildstate->heapRel);
|
|
|
|
if (res)
|
|
|
|
pfree(res);
|
|
|
|
}
|
1996-07-09 08:22:35 +02:00
|
|
|
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
buildstate->indtuples += 1;
|
2000-06-13 09:35:40 +02:00
|
|
|
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
pfree(btitem);
|
|
|
|
pfree(itup);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
1997-09-07 07:04:48 +02:00
|
|
|
* btinsert() -- insert an index tuple into a btree.
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
1997-09-07 07:04:48 +02:00
|
|
|
* Descend the tree recursively, find the appropriate location for our
|
|
|
|
* new tuple, put it there, set its unique OID as appropriate, and
|
|
|
|
* return an InsertIndexResult to the caller.
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
2000-06-13 09:35:40 +02:00
|
|
|
Datum
|
|
|
|
btinsert(PG_FUNCTION_ARGS)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2001-03-22 05:01:46 +01:00
|
|
|
Relation rel = (Relation) PG_GETARG_POINTER(0);
|
|
|
|
Datum *datum = (Datum *) PG_GETARG_POINTER(1);
|
|
|
|
char *nulls = (char *) PG_GETARG_POINTER(2);
|
|
|
|
ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3);
|
|
|
|
Relation heapRel = (Relation) PG_GETARG_POINTER(4);
|
2000-06-13 09:35:40 +02:00
|
|
|
InsertIndexResult res;
|
1997-09-08 04:41:22 +02:00
|
|
|
BTItem btitem;
|
|
|
|
IndexTuple itup;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
/* generate an index tuple */
|
1998-09-01 05:29:17 +02:00
|
|
|
itup = index_formtuple(RelationGetDescr(rel), datum, nulls);
|
1997-09-07 07:04:48 +02:00
|
|
|
itup->t_tid = *ht_ctid;
|
|
|
|
btitem = _bt_formitem(itup);
|
|
|
|
|
2000-07-15 00:18:02 +02:00
|
|
|
res = _bt_doinsert(rel, btitem, rel->rd_uniqueindex, heapRel);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
pfree(btitem);
|
|
|
|
pfree(itup);
|
|
|
|
|
2000-06-13 09:35:40 +02:00
|
|
|
PG_RETURN_POINTER(res);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
1997-09-07 07:04:48 +02:00
|
|
|
* btgettuple() -- Get the next tuple in the scan.
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
2000-06-13 09:35:40 +02:00
|
|
|
Datum
|
|
|
|
btgettuple(PG_FUNCTION_ARGS)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2001-03-22 05:01:46 +01:00
|
|
|
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
|
|
|
|
ScanDirection dir = (ScanDirection) PG_GETARG_INT32(1);
|
1997-09-07 07:04:48 +02:00
|
|
|
RetrieveIndexResult res;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If we've already initialized this scan, we can just advance it in
|
|
|
|
* the appropriate direction. If we haven't done so yet, we call a
|
|
|
|
* routine to get the first item in the scan.
|
|
|
|
*/
|
|
|
|
|
|
|
|
if (ItemPointerIsValid(&(scan->currentItemData)))
|
1998-07-30 07:05:05 +02:00
|
|
|
{
|
|
|
|
/*
|
1999-05-25 18:15:34 +02:00
|
|
|
* Restore scan position using heap TID returned by previous call
|
2001-03-22 05:01:46 +01:00
|
|
|
* to btgettuple(). _bt_restscan() re-grabs the read lock on the
|
|
|
|
* buffer, too.
|
1998-07-30 07:05:05 +02:00
|
|
|
*/
|
|
|
|
_bt_restscan(scan);
|
1997-09-07 07:04:48 +02:00
|
|
|
res = _bt_next(scan, dir);
|
1998-07-30 07:05:05 +02:00
|
|
|
}
|
1997-09-07 07:04:48 +02:00
|
|
|
else
|
|
|
|
res = _bt_first(scan, dir);
|
1998-09-01 06:40:42 +02:00
|
|
|
|
1999-05-26 00:04:56 +02:00
|
|
|
/*
|
2000-07-21 08:42:39 +02:00
|
|
|
* Save heap TID to use it in _bt_restscan. Then release the read
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
* lock on the buffer so that we aren't blocking other backends.
|
|
|
|
*
|
2001-10-25 07:50:21 +02:00
|
|
|
* NOTE: we do keep the pin on the buffer! This is essential to ensure
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
* that someone else doesn't delete the index entry we are stopped on.
|
1999-05-25 20:20:31 +02:00
|
|
|
*/
|
1998-07-30 07:05:05 +02:00
|
|
|
if (res)
|
1999-05-25 20:20:31 +02:00
|
|
|
{
|
1998-09-01 06:40:42 +02:00
|
|
|
((BTScanOpaque) scan->opaque)->curHeapIptr = res->heap_iptr;
|
2000-06-13 09:35:40 +02:00
|
|
|
LockBuffer(((BTScanOpaque) scan->opaque)->btso_curbuf,
|
|
|
|
BUFFER_LOCK_UNLOCK);
|
1999-05-25 20:20:31 +02:00
|
|
|
}
|
1998-09-01 06:40:42 +02:00
|
|
|
|
2000-06-13 09:35:40 +02:00
|
|
|
PG_RETURN_POINTER(res);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
1997-09-07 07:04:48 +02:00
|
|
|
* btbeginscan() -- start a scan on a btree index
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
2000-06-13 09:35:40 +02:00
|
|
|
Datum
|
|
|
|
btbeginscan(PG_FUNCTION_ARGS)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2000-06-13 09:35:40 +02:00
|
|
|
Relation rel = (Relation) PG_GETARG_POINTER(0);
|
|
|
|
bool fromEnd = PG_GETARG_BOOL(1);
|
|
|
|
uint16 keysz = PG_GETARG_UINT16(2);
|
|
|
|
ScanKey scankey = (ScanKey) PG_GETARG_POINTER(3);
|
1997-09-08 04:41:22 +02:00
|
|
|
IndexScanDesc scan;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
/* get the scan */
|
|
|
|
scan = RelationGetIndexScan(rel, fromEnd, keysz, scankey);
|
|
|
|
|
2000-06-13 09:35:40 +02:00
|
|
|
PG_RETURN_POINTER(scan);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
1997-09-07 07:04:48 +02:00
|
|
|
* btrescan() -- rescan an index relation
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
2000-06-13 09:35:40 +02:00
|
|
|
Datum
|
|
|
|
btrescan(PG_FUNCTION_ARGS)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2001-03-22 05:01:46 +01:00
|
|
|
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
|
|
|
|
|
2000-06-13 09:35:40 +02:00
|
|
|
#ifdef NOT_USED /* XXX surely it's wrong to ignore this? */
|
2001-03-22 05:01:46 +01:00
|
|
|
bool fromEnd = PG_GETARG_BOOL(1);
|
2000-06-13 09:35:40 +02:00
|
|
|
#endif
|
2001-03-22 05:01:46 +01:00
|
|
|
ScanKey scankey = (ScanKey) PG_GETARG_POINTER(2);
|
1997-09-08 04:41:22 +02:00
|
|
|
ItemPointer iptr;
|
|
|
|
BTScanOpaque so;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
so = (BTScanOpaque) scan->opaque;
|
|
|
|
|
2000-07-21 08:42:39 +02:00
|
|
|
if (so == NULL) /* if called from btbeginscan */
|
|
|
|
{
|
|
|
|
so = (BTScanOpaque) palloc(sizeof(BTScanOpaqueData));
|
|
|
|
so->btso_curbuf = so->btso_mrkbuf = InvalidBuffer;
|
|
|
|
so->keyData = (ScanKey) NULL;
|
|
|
|
if (scan->numberOfKeys > 0)
|
|
|
|
so->keyData = (ScanKey) palloc(scan->numberOfKeys * sizeof(ScanKeyData));
|
|
|
|
scan->opaque = so;
|
|
|
|
scan->flags = 0x0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* we aren't holding any read locks, but gotta drop the pins */
|
1997-09-07 07:04:48 +02:00
|
|
|
if (ItemPointerIsValid(iptr = &(scan->currentItemData)))
|
|
|
|
{
|
1999-05-25 20:20:31 +02:00
|
|
|
ReleaseBuffer(so->btso_curbuf);
|
1997-09-07 07:04:48 +02:00
|
|
|
so->btso_curbuf = InvalidBuffer;
|
|
|
|
ItemPointerSetInvalid(iptr);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ItemPointerIsValid(iptr = &(scan->currentMarkData)))
|
|
|
|
{
|
1999-05-25 20:20:31 +02:00
|
|
|
ReleaseBuffer(so->btso_mrkbuf);
|
1997-09-07 07:04:48 +02:00
|
|
|
so->btso_mrkbuf = InvalidBuffer;
|
|
|
|
ItemPointerSetInvalid(iptr);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Reset the scan keys. Note that keys ordering stuff moved to
|
|
|
|
* _bt_first. - vadim 05/05/97
|
|
|
|
*/
|
|
|
|
so->numberOfKeys = scan->numberOfKeys;
|
|
|
|
if (scan->numberOfKeys > 0)
|
|
|
|
{
|
|
|
|
memmove(scan->keyData,
|
|
|
|
scankey,
|
|
|
|
scan->numberOfKeys * sizeof(ScanKeyData));
|
|
|
|
memmove(so->keyData,
|
|
|
|
scankey,
|
|
|
|
so->numberOfKeys * sizeof(ScanKeyData));
|
|
|
|
}
|
1996-07-30 09:56:04 +02:00
|
|
|
|
2000-06-14 07:24:50 +02:00
|
|
|
PG_RETURN_VOID();
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
btmovescan(IndexScanDesc scan, Datum v)
|
|
|
|
{
|
1997-09-08 04:41:22 +02:00
|
|
|
ItemPointer iptr;
|
|
|
|
BTScanOpaque so;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
so = (BTScanOpaque) scan->opaque;
|
|
|
|
|
2000-07-21 08:42:39 +02:00
|
|
|
/* we aren't holding any read locks, but gotta drop the pin */
|
1997-09-07 07:04:48 +02:00
|
|
|
if (ItemPointerIsValid(iptr = &(scan->currentItemData)))
|
|
|
|
{
|
1999-05-25 20:20:31 +02:00
|
|
|
ReleaseBuffer(so->btso_curbuf);
|
1997-09-07 07:04:48 +02:00
|
|
|
so->btso_curbuf = InvalidBuffer;
|
|
|
|
ItemPointerSetInvalid(iptr);
|
|
|
|
}
|
|
|
|
|
|
|
|
so->keyData[0].sk_argument = v;
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
1997-09-07 07:04:48 +02:00
|
|
|
* btendscan() -- close down a scan
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
2000-06-13 09:35:40 +02:00
|
|
|
Datum
|
|
|
|
btendscan(PG_FUNCTION_ARGS)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2001-03-22 05:01:46 +01:00
|
|
|
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
|
1997-09-08 04:41:22 +02:00
|
|
|
ItemPointer iptr;
|
|
|
|
BTScanOpaque so;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
so = (BTScanOpaque) scan->opaque;
|
|
|
|
|
2000-07-21 08:42:39 +02:00
|
|
|
/* we aren't holding any read locks, but gotta drop the pins */
|
1997-09-07 07:04:48 +02:00
|
|
|
if (ItemPointerIsValid(iptr = &(scan->currentItemData)))
|
|
|
|
{
|
|
|
|
if (BufferIsValid(so->btso_curbuf))
|
1999-05-25 20:20:31 +02:00
|
|
|
ReleaseBuffer(so->btso_curbuf);
|
1997-09-07 07:04:48 +02:00
|
|
|
so->btso_curbuf = InvalidBuffer;
|
|
|
|
ItemPointerSetInvalid(iptr);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ItemPointerIsValid(iptr = &(scan->currentMarkData)))
|
|
|
|
{
|
|
|
|
if (BufferIsValid(so->btso_mrkbuf))
|
1999-05-25 20:20:31 +02:00
|
|
|
ReleaseBuffer(so->btso_mrkbuf);
|
1997-09-07 07:04:48 +02:00
|
|
|
so->btso_mrkbuf = InvalidBuffer;
|
|
|
|
ItemPointerSetInvalid(iptr);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (so->keyData != (ScanKey) NULL)
|
|
|
|
pfree(so->keyData);
|
|
|
|
pfree(so);
|
|
|
|
|
2000-06-14 07:24:50 +02:00
|
|
|
PG_RETURN_VOID();
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
1997-09-07 07:04:48 +02:00
|
|
|
* btmarkpos() -- save current scan position
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
2000-06-13 09:35:40 +02:00
|
|
|
Datum
|
|
|
|
btmarkpos(PG_FUNCTION_ARGS)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2001-03-22 05:01:46 +01:00
|
|
|
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
|
1997-09-08 04:41:22 +02:00
|
|
|
ItemPointer iptr;
|
|
|
|
BTScanOpaque so;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
so = (BTScanOpaque) scan->opaque;
|
|
|
|
|
2000-07-21 08:42:39 +02:00
|
|
|
/* we aren't holding any read locks, but gotta drop the pin */
|
1997-09-07 07:04:48 +02:00
|
|
|
if (ItemPointerIsValid(iptr = &(scan->currentMarkData)))
|
|
|
|
{
|
1999-05-25 20:20:31 +02:00
|
|
|
ReleaseBuffer(so->btso_mrkbuf);
|
1997-09-07 07:04:48 +02:00
|
|
|
so->btso_mrkbuf = InvalidBuffer;
|
|
|
|
ItemPointerSetInvalid(iptr);
|
|
|
|
}
|
|
|
|
|
2000-07-21 08:42:39 +02:00
|
|
|
/* bump pin on current buffer for assignment to mark buffer */
|
1997-09-07 07:04:48 +02:00
|
|
|
if (ItemPointerIsValid(&(scan->currentItemData)))
|
|
|
|
{
|
1999-05-25 20:20:31 +02:00
|
|
|
so->btso_mrkbuf = ReadBuffer(scan->relation,
|
1999-05-26 00:04:56 +02:00
|
|
|
BufferGetBlockNumber(so->btso_curbuf));
|
1997-09-07 07:04:48 +02:00
|
|
|
scan->currentMarkData = scan->currentItemData;
|
1998-07-30 07:05:05 +02:00
|
|
|
so->mrkHeapIptr = so->curHeapIptr;
|
1997-09-07 07:04:48 +02:00
|
|
|
}
|
2000-06-13 09:35:40 +02:00
|
|
|
|
2000-06-14 07:24:50 +02:00
|
|
|
PG_RETURN_VOID();
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
1997-09-07 07:04:48 +02:00
|
|
|
* btrestrpos() -- restore scan to last saved position
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
2000-06-13 09:35:40 +02:00
|
|
|
Datum
|
|
|
|
btrestrpos(PG_FUNCTION_ARGS)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2001-03-22 05:01:46 +01:00
|
|
|
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
|
1997-09-08 04:41:22 +02:00
|
|
|
ItemPointer iptr;
|
|
|
|
BTScanOpaque so;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
so = (BTScanOpaque) scan->opaque;
|
|
|
|
|
2000-07-21 08:42:39 +02:00
|
|
|
/* we aren't holding any read locks, but gotta drop the pin */
|
1997-09-07 07:04:48 +02:00
|
|
|
if (ItemPointerIsValid(iptr = &(scan->currentItemData)))
|
|
|
|
{
|
1999-05-25 20:20:31 +02:00
|
|
|
ReleaseBuffer(so->btso_curbuf);
|
1997-09-07 07:04:48 +02:00
|
|
|
so->btso_curbuf = InvalidBuffer;
|
|
|
|
ItemPointerSetInvalid(iptr);
|
|
|
|
}
|
|
|
|
|
1999-05-25 20:20:31 +02:00
|
|
|
/* bump pin on marked buffer */
|
1997-09-07 07:04:48 +02:00
|
|
|
if (ItemPointerIsValid(&(scan->currentMarkData)))
|
|
|
|
{
|
1999-05-25 20:20:31 +02:00
|
|
|
so->btso_curbuf = ReadBuffer(scan->relation,
|
1999-05-26 00:04:56 +02:00
|
|
|
BufferGetBlockNumber(so->btso_mrkbuf));
|
1997-09-07 07:04:48 +02:00
|
|
|
scan->currentItemData = scan->currentMarkData;
|
1998-07-30 07:05:05 +02:00
|
|
|
so->curHeapIptr = so->mrkHeapIptr;
|
1997-09-07 07:04:48 +02:00
|
|
|
}
|
2000-06-13 09:35:40 +02:00
|
|
|
|
2000-06-14 07:24:50 +02:00
|
|
|
PG_RETURN_VOID();
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
/*
|
|
|
|
* Bulk deletion of all index entries pointing to a set of heap tuples.
|
|
|
|
* The set of target tuples is specified via a callback routine that tells
|
|
|
|
* whether any given heap tuple (identified by ItemPointer) is being deleted.
|
|
|
|
*
|
|
|
|
* Result: a palloc'd struct containing statistical info for VACUUM displays.
|
|
|
|
*/
|
2000-06-13 09:35:40 +02:00
|
|
|
Datum
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
btbulkdelete(PG_FUNCTION_ARGS)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2001-03-22 05:01:46 +01:00
|
|
|
Relation rel = (Relation) PG_GETARG_POINTER(0);
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
IndexBulkDeleteCallback callback = (IndexBulkDeleteCallback) PG_GETARG_POINTER(1);
|
|
|
|
void *callback_state = (void *) PG_GETARG_POINTER(2);
|
|
|
|
IndexBulkDeleteResult *result;
|
2001-10-25 07:50:21 +02:00
|
|
|
BlockNumber num_pages;
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
double tuples_removed;
|
|
|
|
double num_index_tuples;
|
|
|
|
RetrieveIndexResult res;
|
|
|
|
IndexScanDesc scan;
|
|
|
|
BTScanOpaque so;
|
|
|
|
ItemPointer current;
|
|
|
|
|
|
|
|
tuples_removed = 0;
|
|
|
|
num_index_tuples = 0;
|
|
|
|
|
|
|
|
/*
|
2001-10-25 07:50:21 +02:00
|
|
|
* We use a standard IndexScanDesc scan object, but to speed up the
|
|
|
|
* loop, we skip most of the wrapper layers of index_getnext and
|
|
|
|
* instead call _bt_step directly. This implies holding buffer lock
|
|
|
|
* on a target page throughout the loop over the page's tuples.
|
|
|
|
* Initially, we have a read lock acquired by _bt_step when we stepped
|
|
|
|
* onto the page. If we find a tuple we need to delete, we trade in
|
|
|
|
* the read lock for an exclusive write lock; after that, we hold the
|
|
|
|
* write lock until we step off the page (fortunately, _bt_relbuf
|
|
|
|
* doesn't care which kind of lock it's releasing). This should
|
|
|
|
* minimize the amount of work needed per page.
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
*/
|
|
|
|
scan = index_beginscan(rel, false, 0, (ScanKey) NULL);
|
|
|
|
so = (BTScanOpaque) scan->opaque;
|
|
|
|
current = &(scan->currentItemData);
|
2000-06-13 09:35:40 +02:00
|
|
|
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
/* Use _bt_first to get started, then _bt_step to remaining tuples */
|
|
|
|
res = _bt_first(scan, ForwardScanDirection);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
if (res != NULL)
|
|
|
|
{
|
|
|
|
Buffer buf;
|
2001-10-25 07:50:21 +02:00
|
|
|
BlockNumber lockedBlock = InvalidBlockNumber;
|
2000-06-13 09:35:40 +02:00
|
|
|
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
pfree(res);
|
|
|
|
/* we have the buffer pinned and locked */
|
|
|
|
buf = so->btso_curbuf;
|
|
|
|
Assert(BufferIsValid(buf));
|
|
|
|
|
|
|
|
do
|
|
|
|
{
|
|
|
|
Page page;
|
2001-10-25 07:50:21 +02:00
|
|
|
BlockNumber blkno;
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
OffsetNumber offnum;
|
|
|
|
BTItem btitem;
|
|
|
|
IndexTuple itup;
|
2001-10-25 07:50:21 +02:00
|
|
|
ItemPointer htup;
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
|
|
|
|
/* current is the next index tuple */
|
|
|
|
blkno = ItemPointerGetBlockNumber(current);
|
|
|
|
offnum = ItemPointerGetOffsetNumber(current);
|
|
|
|
page = BufferGetPage(buf);
|
|
|
|
btitem = (BTItem) PageGetItem(page, PageGetItemId(page, offnum));
|
|
|
|
itup = &btitem->bti_itup;
|
|
|
|
htup = &(itup->t_tid);
|
|
|
|
|
|
|
|
if (callback(htup, callback_state))
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* If this is first deletion on this page, trade in read
|
2001-10-25 07:50:21 +02:00
|
|
|
* lock for a really-exclusive write lock. Then, step
|
|
|
|
* back one and re-examine the item, because someone else
|
|
|
|
* might have inserted an item while we weren't holding
|
|
|
|
* the lock!
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
*/
|
|
|
|
if (blkno != lockedBlock)
|
|
|
|
{
|
|
|
|
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
|
|
|
|
LockBufferForCleanup(buf);
|
|
|
|
lockedBlock = blkno;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* Delete the item from the page */
|
|
|
|
_bt_itemdel(rel, buf, current);
|
|
|
|
|
|
|
|
/* Mark buffer dirty, but keep the lock and pin */
|
|
|
|
WriteNoReleaseBuffer(buf);
|
|
|
|
|
|
|
|
tuples_removed += 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We need to back up the scan one item so that the next
|
|
|
|
* cycle will re-examine the same offnum on this page.
|
|
|
|
*
|
2001-10-25 07:50:21 +02:00
|
|
|
* For now, just hack the current-item index. Will need to
|
|
|
|
* be smarter when deletion includes removal of empty
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
* index pages.
|
|
|
|
*/
|
|
|
|
current->ip_posid--;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
num_index_tuples += 1;
|
|
|
|
} while (_bt_step(scan, &buf, ForwardScanDirection));
|
|
|
|
}
|
|
|
|
|
|
|
|
index_endscan(scan);
|
|
|
|
|
|
|
|
/* return statistics */
|
|
|
|
num_pages = RelationGetNumberOfBlocks(rel);
|
|
|
|
|
|
|
|
result = (IndexBulkDeleteResult *) palloc(sizeof(IndexBulkDeleteResult));
|
|
|
|
result->num_pages = num_pages;
|
|
|
|
result->tuples_removed = tuples_removed;
|
|
|
|
result->num_index_tuples = num_index_tuples;
|
|
|
|
|
|
|
|
PG_RETURN_POINTER(result);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
1998-07-30 07:05:05 +02:00
|
|
|
|
2000-07-21 08:42:39 +02:00
|
|
|
/*
|
|
|
|
* Restore scan position when btgettuple is called to continue a scan.
|
|
|
|
*/
|
1998-07-30 07:05:05 +02:00
|
|
|
static void
|
|
|
|
_bt_restscan(IndexScanDesc scan)
|
|
|
|
{
|
1998-09-01 06:40:42 +02:00
|
|
|
Relation rel = scan->relation;
|
|
|
|
BTScanOpaque so = (BTScanOpaque) scan->opaque;
|
|
|
|
Buffer buf = so->btso_curbuf;
|
1999-06-07 17:14:54 +02:00
|
|
|
Page page;
|
1998-09-01 06:40:42 +02:00
|
|
|
ItemPointer current = &(scan->currentItemData);
|
|
|
|
OffsetNumber offnum = ItemPointerGetOffsetNumber(current),
|
1999-06-07 17:14:54 +02:00
|
|
|
maxoff;
|
|
|
|
BTPageOpaque opaque;
|
1998-09-01 06:40:42 +02:00
|
|
|
ItemPointerData target = so->curHeapIptr;
|
|
|
|
BTItem item;
|
|
|
|
BlockNumber blkno;
|
1998-07-30 07:05:05 +02:00
|
|
|
|
2000-07-21 08:42:39 +02:00
|
|
|
/*
|
2001-03-22 05:01:46 +01:00
|
|
|
* Get back the read lock we were holding on the buffer. (We still
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
* have a reference-count pin on it, so need not get that.)
|
2000-07-21 08:42:39 +02:00
|
|
|
*/
|
|
|
|
LockBuffer(buf, BT_READ);
|
|
|
|
|
1999-06-07 17:14:54 +02:00
|
|
|
page = BufferGetPage(buf);
|
|
|
|
maxoff = PageGetMaxOffsetNumber(page);
|
|
|
|
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
1999-05-26 00:04:56 +02:00
|
|
|
|
1999-03-28 22:32:42 +02:00
|
|
|
/*
|
1999-05-25 18:15:34 +02:00
|
|
|
* We use this as flag when first index tuple on page is deleted but
|
|
|
|
* we do not move left (this would slowdown vacuum) - so we set
|
|
|
|
* current->ip_posid before first index tuple on the current page
|
1999-03-28 22:32:42 +02:00
|
|
|
* (_bt_step will move it right)...
|
|
|
|
*/
|
|
|
|
if (!ItemPointerIsValid(&target))
|
|
|
|
{
|
2000-07-21 08:42:39 +02:00
|
|
|
ItemPointerSetOffsetNumber(current,
|
2001-03-22 05:01:46 +01:00
|
|
|
OffsetNumberPrev(P_FIRSTDATAKEY(opaque)));
|
1999-03-28 22:32:42 +02:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2000-07-21 08:42:39 +02:00
|
|
|
/*
|
2001-03-22 05:01:46 +01:00
|
|
|
* The item we were on may have moved right due to insertions. Find it
|
|
|
|
* again.
|
2000-07-21 08:42:39 +02:00
|
|
|
*/
|
|
|
|
for (;;)
|
1998-07-30 07:05:05 +02:00
|
|
|
{
|
2000-07-21 08:42:39 +02:00
|
|
|
/* Check for item on this page */
|
1998-09-01 06:40:42 +02:00
|
|
|
for (;
|
1998-07-30 07:05:05 +02:00
|
|
|
offnum <= maxoff;
|
|
|
|
offnum = OffsetNumberNext(offnum))
|
|
|
|
{
|
|
|
|
item = (BTItem) PageGetItem(page, PageGetItemId(page, offnum));
|
2000-07-21 08:42:39 +02:00
|
|
|
if (item->bti_itup.t_tid.ip_blkid.bi_hi ==
|
|
|
|
target.ip_blkid.bi_hi &&
|
|
|
|
item->bti_itup.t_tid.ip_blkid.bi_lo ==
|
|
|
|
target.ip_blkid.bi_lo &&
|
1998-07-30 07:05:05 +02:00
|
|
|
item->bti_itup.t_tid.ip_posid == target.ip_posid)
|
|
|
|
{
|
|
|
|
current->ip_posid = offnum;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2000-07-21 08:42:39 +02:00
|
|
|
/*
|
2001-03-22 05:01:46 +01:00
|
|
|
* By here, the item we're looking for moved right at least one
|
|
|
|
* page
|
2000-07-21 08:42:39 +02:00
|
|
|
*/
|
1998-07-30 07:05:05 +02:00
|
|
|
if (P_RIGHTMOST(opaque))
|
2000-07-21 08:42:39 +02:00
|
|
|
elog(FATAL, "_bt_restscan: my bits moved right off the end of the world!"
|
|
|
|
"\n\tRecreate index %s.", RelationGetRelationName(rel));
|
1998-07-30 07:05:05 +02:00
|
|
|
|
|
|
|
blkno = opaque->btpo_next;
|
Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers. Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc. (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.) The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method. I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions. Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.
Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).
Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error. We discovered this need long
ago for btree, but missed the other guys.
Oh, one more thing: concurrent VACUUM is now the default.
2001-07-16 00:48:19 +02:00
|
|
|
_bt_relbuf(rel, buf);
|
1998-07-30 07:05:05 +02:00
|
|
|
buf = _bt_getbuf(rel, blkno, BT_READ);
|
|
|
|
page = BufferGetPage(buf);
|
|
|
|
maxoff = PageGetMaxOffsetNumber(page);
|
|
|
|
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
2000-07-21 08:42:39 +02:00
|
|
|
offnum = P_FIRSTDATAKEY(opaque);
|
|
|
|
ItemPointerSet(current, blkno, offnum);
|
|
|
|
so->btso_curbuf = buf;
|
1998-07-30 07:05:05 +02:00
|
|
|
}
|
|
|
|
}
|
2000-10-13 04:03:02 +02:00
|
|
|
|
2000-10-21 17:43:36 +02:00
|
|
|
static void
|
2000-12-28 14:00:29 +01:00
|
|
|
_bt_restore_page(Page page, char *from, int len)
|
2000-10-13 04:03:02 +02:00
|
|
|
{
|
2000-12-28 14:00:29 +01:00
|
|
|
BTItemData btdata;
|
|
|
|
Size itemsz;
|
|
|
|
char *end = from + len;
|
2000-10-21 17:43:36 +02:00
|
|
|
|
2001-03-22 05:01:46 +01:00
|
|
|
for (; from < end;)
|
2000-10-13 04:03:02 +02:00
|
|
|
{
|
2000-12-28 14:00:29 +01:00
|
|
|
memcpy(&btdata, from, sizeof(BTItemData));
|
2000-10-21 17:43:36 +02:00
|
|
|
itemsz = IndexTupleDSize(btdata.bti_itup) +
|
2001-03-22 05:01:46 +01:00
|
|
|
(sizeof(BTItemData) - sizeof(IndexTupleData));
|
2000-10-21 17:43:36 +02:00
|
|
|
itemsz = MAXALIGN(itemsz);
|
2000-12-28 14:00:29 +01:00
|
|
|
if (PageAddItem(page, (Item) from, itemsz,
|
2001-03-22 05:01:46 +01:00
|
|
|
FirstOffsetNumber, LP_USED) == InvalidOffsetNumber)
|
2000-12-28 14:00:29 +01:00
|
|
|
elog(STOP, "_bt_restore_page: can't add item to page");
|
|
|
|
from += itemsz;
|
2000-10-21 17:43:36 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
btree_xlog_delete(bool redo, XLogRecPtr lsn, XLogRecord *record)
|
|
|
|
{
|
2001-03-22 05:01:46 +01:00
|
|
|
xl_btree_delete *xlrec;
|
|
|
|
Relation reln;
|
|
|
|
Buffer buffer;
|
|
|
|
Page page;
|
2000-10-21 17:43:36 +02:00
|
|
|
|
2000-12-28 14:00:29 +01:00
|
|
|
if (!redo || (record->xl_info & XLR_BKP_BLOCK_1))
|
2000-10-21 17:43:36 +02:00
|
|
|
return;
|
|
|
|
|
2001-03-22 05:01:46 +01:00
|
|
|
xlrec = (xl_btree_delete *) XLogRecGetData(record);
|
2000-10-21 17:43:36 +02:00
|
|
|
reln = XLogOpenRelation(redo, RM_BTREE_ID, xlrec->target.node);
|
|
|
|
if (!RelationIsValid(reln))
|
|
|
|
return;
|
2001-03-22 05:01:46 +01:00
|
|
|
buffer = XLogReadBuffer(false, reln,
|
|
|
|
ItemPointerGetBlockNumber(&(xlrec->target.tid)));
|
2000-10-21 17:43:36 +02:00
|
|
|
if (!BufferIsValid(buffer))
|
|
|
|
elog(STOP, "btree_delete_redo: block unfound");
|
|
|
|
page = (Page) BufferGetPage(buffer);
|
|
|
|
if (PageIsNew((PageHeader) page))
|
|
|
|
elog(STOP, "btree_delete_redo: uninitialized page");
|
|
|
|
|
2000-11-01 21:39:58 +01:00
|
|
|
if (XLByteLE(lsn, PageGetLSN(page)))
|
|
|
|
{
|
|
|
|
UnlockAndReleaseBuffer(buffer);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2000-10-21 17:43:36 +02:00
|
|
|
PageIndexTupleDelete(page, ItemPointerGetOffsetNumber(&(xlrec->target.tid)));
|
|
|
|
|
2000-11-01 21:39:58 +01:00
|
|
|
PageSetLSN(page, lsn);
|
|
|
|
PageSetSUI(page, ThisStartUpID);
|
|
|
|
UnlockAndWriteBuffer(buffer);
|
|
|
|
|
2000-10-21 17:43:36 +02:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
btree_xlog_insert(bool redo, XLogRecPtr lsn, XLogRecord *record)
|
|
|
|
{
|
2001-03-22 05:01:46 +01:00
|
|
|
xl_btree_insert *xlrec;
|
|
|
|
Relation reln;
|
|
|
|
Buffer buffer;
|
|
|
|
Page page;
|
|
|
|
BTPageOpaque pageop;
|
2000-10-21 17:43:36 +02:00
|
|
|
|
2000-12-28 14:00:29 +01:00
|
|
|
if (redo && (record->xl_info & XLR_BKP_BLOCK_1))
|
|
|
|
return;
|
|
|
|
|
2001-03-22 05:01:46 +01:00
|
|
|
xlrec = (xl_btree_insert *) XLogRecGetData(record);
|
2000-10-21 17:43:36 +02:00
|
|
|
reln = XLogOpenRelation(redo, RM_BTREE_ID, xlrec->target.node);
|
|
|
|
if (!RelationIsValid(reln))
|
|
|
|
return;
|
2001-03-22 05:01:46 +01:00
|
|
|
buffer = XLogReadBuffer(false, reln,
|
|
|
|
ItemPointerGetBlockNumber(&(xlrec->target.tid)));
|
2000-10-21 17:43:36 +02:00
|
|
|
if (!BufferIsValid(buffer))
|
2000-12-28 14:00:29 +01:00
|
|
|
elog(STOP, "btree_insert_%sdo: block unfound", (redo) ? "re" : "un");
|
2000-10-21 17:43:36 +02:00
|
|
|
page = (Page) BufferGetPage(buffer);
|
|
|
|
if (PageIsNew((PageHeader) page))
|
2000-12-28 14:00:29 +01:00
|
|
|
elog(STOP, "btree_insert_%sdo: uninitialized page", (redo) ? "re" : "un");
|
2000-10-21 17:43:36 +02:00
|
|
|
pageop = (BTPageOpaque) PageGetSpecialPointer(page);
|
|
|
|
|
|
|
|
if (redo)
|
|
|
|
{
|
|
|
|
if (XLByteLE(lsn, PageGetLSN(page)))
|
|
|
|
{
|
2000-12-28 14:00:29 +01:00
|
|
|
UnlockAndReleaseBuffer(buffer);
|
|
|
|
return;
|
|
|
|
}
|
2001-03-22 05:01:46 +01:00
|
|
|
if (PageAddItem(page, (Item) ((char *) xlrec + SizeOfBtreeInsert),
|
|
|
|
record->xl_len - SizeOfBtreeInsert,
|
|
|
|
ItemPointerGetOffsetNumber(&(xlrec->target.tid)),
|
|
|
|
LP_USED) == InvalidOffsetNumber)
|
|
|
|
elog(STOP, "btree_insert_redo: failed to add item");
|
2000-10-21 17:43:36 +02:00
|
|
|
|
2000-12-28 14:00:29 +01:00
|
|
|
PageSetLSN(page, lsn);
|
|
|
|
PageSetSUI(page, ThisStartUpID);
|
|
|
|
UnlockAndWriteBuffer(buffer);
|
2000-10-21 17:43:36 +02:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (XLByteLT(PageGetLSN(page), lsn))
|
|
|
|
elog(STOP, "btree_insert_undo: bad page LSN");
|
|
|
|
|
2001-03-22 05:01:46 +01:00
|
|
|
if (!P_ISLEAF(pageop))
|
2000-10-21 17:43:36 +02:00
|
|
|
{
|
|
|
|
UnlockAndReleaseBuffer(buffer);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2000-12-28 14:00:29 +01:00
|
|
|
elog(STOP, "btree_insert_undo: unimplemented");
|
2000-10-21 17:43:36 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
btree_xlog_split(bool redo, bool onleft, XLogRecPtr lsn, XLogRecord *record)
|
|
|
|
{
|
2001-03-22 05:01:46 +01:00
|
|
|
xl_btree_split *xlrec = (xl_btree_split *) XLogRecGetData(record);
|
|
|
|
Relation reln;
|
|
|
|
BlockNumber blkno;
|
|
|
|
Buffer buffer;
|
|
|
|
Page page;
|
|
|
|
BTPageOpaque pageop;
|
|
|
|
char *op = (redo) ? "redo" : "undo";
|
|
|
|
bool isleaf = (record->xl_info & XLOG_BTREE_LEAF);
|
2000-10-21 17:43:36 +02:00
|
|
|
|
|
|
|
reln = XLogOpenRelation(redo, RM_BTREE_ID, xlrec->target.node);
|
|
|
|
if (!RelationIsValid(reln))
|
|
|
|
return;
|
2000-10-13 04:03:02 +02:00
|
|
|
|
|
|
|
/* Left (original) sibling */
|
|
|
|
blkno = (onleft) ? ItemPointerGetBlockNumber(&(xlrec->target.tid)) :
|
2001-03-22 05:01:46 +01:00
|
|
|
BlockIdGetBlockNumber(&(xlrec->otherblk));
|
2000-10-13 04:03:02 +02:00
|
|
|
buffer = XLogReadBuffer(false, reln, blkno);
|
|
|
|
if (!BufferIsValid(buffer))
|
|
|
|
elog(STOP, "btree_split_%s: lost left sibling", op);
|
|
|
|
|
|
|
|
page = (Page) BufferGetPage(buffer);
|
2000-12-28 14:00:29 +01:00
|
|
|
if (redo)
|
|
|
|
_bt_pageinit(page, BufferGetPageSize(buffer));
|
|
|
|
else if (PageIsNew((PageHeader) page))
|
|
|
|
elog(STOP, "btree_split_undo: uninitialized left sibling");
|
2000-10-13 04:03:02 +02:00
|
|
|
pageop = (BTPageOpaque) PageGetSpecialPointer(page);
|
|
|
|
|
|
|
|
if (redo)
|
|
|
|
{
|
2000-12-28 14:00:29 +01:00
|
|
|
pageop->btpo_parent = BlockIdGetBlockNumber(&(xlrec->parentblk));
|
|
|
|
pageop->btpo_prev = BlockIdGetBlockNumber(&(xlrec->leftblk));
|
|
|
|
if (onleft)
|
|
|
|
pageop->btpo_next = BlockIdGetBlockNumber(&(xlrec->otherblk));
|
2000-10-13 04:03:02 +02:00
|
|
|
else
|
2000-12-28 14:00:29 +01:00
|
|
|
pageop->btpo_next = ItemPointerGetBlockNumber(&(xlrec->target.tid));
|
|
|
|
pageop->btpo_flags = (isleaf) ? BTP_LEAF : 0;
|
2000-10-13 04:03:02 +02:00
|
|
|
|
2001-03-22 05:01:46 +01:00
|
|
|
_bt_restore_page(page, (char *) xlrec + SizeOfBtreeSplit, xlrec->leftlen);
|
2000-10-13 04:03:02 +02:00
|
|
|
|
2000-12-28 14:00:29 +01:00
|
|
|
PageSetLSN(page, lsn);
|
|
|
|
PageSetSUI(page, ThisStartUpID);
|
|
|
|
UnlockAndWriteBuffer(buffer);
|
2000-10-13 04:03:02 +02:00
|
|
|
}
|
2001-03-22 05:01:46 +01:00
|
|
|
else
|
|
|
|
/* undo */
|
2000-10-13 04:03:02 +02:00
|
|
|
{
|
|
|
|
if (XLByteLT(PageGetLSN(page), lsn))
|
|
|
|
elog(STOP, "btree_split_undo: bad left sibling LSN");
|
2000-12-28 14:00:29 +01:00
|
|
|
elog(STOP, "btree_split_undo: unimplemented");
|
2000-10-13 04:03:02 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Right (new) sibling */
|
2001-03-22 05:01:46 +01:00
|
|
|
blkno = (onleft) ? BlockIdGetBlockNumber(&(xlrec->otherblk)) :
|
|
|
|
ItemPointerGetBlockNumber(&(xlrec->target.tid));
|
2000-10-13 04:03:02 +02:00
|
|
|
buffer = XLogReadBuffer((redo) ? true : false, reln, blkno);
|
|
|
|
if (!BufferIsValid(buffer))
|
|
|
|
elog(STOP, "btree_split_%s: lost right sibling", op);
|
|
|
|
|
|
|
|
page = (Page) BufferGetPage(buffer);
|
2000-12-28 14:00:29 +01:00
|
|
|
if (redo)
|
|
|
|
_bt_pageinit(page, BufferGetPageSize(buffer));
|
|
|
|
else if (PageIsNew((PageHeader) page))
|
|
|
|
elog(STOP, "btree_split_undo: uninitialized right sibling");
|
|
|
|
pageop = (BTPageOpaque) PageGetSpecialPointer(page);
|
2000-10-13 04:03:02 +02:00
|
|
|
|
|
|
|
if (redo)
|
|
|
|
{
|
2000-12-28 14:00:29 +01:00
|
|
|
pageop->btpo_parent = BlockIdGetBlockNumber(&(xlrec->parentblk));
|
2001-03-22 05:01:46 +01:00
|
|
|
pageop->btpo_prev = (onleft) ?
|
|
|
|
ItemPointerGetBlockNumber(&(xlrec->target.tid)) :
|
|
|
|
BlockIdGetBlockNumber(&(xlrec->otherblk));
|
2000-12-28 14:00:29 +01:00
|
|
|
pageop->btpo_next = BlockIdGetBlockNumber(&(xlrec->rightblk));
|
|
|
|
pageop->btpo_flags = (isleaf) ? BTP_LEAF : 0;
|
2000-10-13 04:03:02 +02:00
|
|
|
|
2000-12-28 14:00:29 +01:00
|
|
|
_bt_restore_page(page,
|
2001-03-22 05:01:46 +01:00
|
|
|
(char *) xlrec + SizeOfBtreeSplit + xlrec->leftlen,
|
|
|
|
record->xl_len - SizeOfBtreeSplit - xlrec->leftlen);
|
2000-10-13 04:03:02 +02:00
|
|
|
|
2000-12-28 14:00:29 +01:00
|
|
|
PageSetLSN(page, lsn);
|
|
|
|
PageSetSUI(page, ThisStartUpID);
|
|
|
|
UnlockAndWriteBuffer(buffer);
|
2000-10-13 04:03:02 +02:00
|
|
|
}
|
2001-03-22 05:01:46 +01:00
|
|
|
else
|
|
|
|
/* undo */
|
2000-10-13 04:03:02 +02:00
|
|
|
{
|
|
|
|
if (XLByteLT(PageGetLSN(page), lsn))
|
|
|
|
elog(STOP, "btree_split_undo: bad right sibling LSN");
|
2000-12-28 14:00:29 +01:00
|
|
|
elog(STOP, "btree_split_undo: unimplemented");
|
2000-10-13 04:03:02 +02:00
|
|
|
}
|
|
|
|
|
2000-12-28 14:00:29 +01:00
|
|
|
if (!redo || (record->xl_info & XLR_BKP_BLOCK_1))
|
|
|
|
return;
|
|
|
|
|
2000-10-13 04:03:02 +02:00
|
|
|
/* Right (next) page */
|
2000-10-21 17:43:36 +02:00
|
|
|
blkno = BlockIdGetBlockNumber(&(xlrec->rightblk));
|
2000-10-29 19:33:41 +01:00
|
|
|
if (blkno == P_NONE)
|
|
|
|
return;
|
|
|
|
|
2000-10-13 04:03:02 +02:00
|
|
|
buffer = XLogReadBuffer(false, reln, blkno);
|
|
|
|
if (!BufferIsValid(buffer))
|
2000-12-28 14:00:29 +01:00
|
|
|
elog(STOP, "btree_split_redo: lost next right page");
|
2000-10-13 04:03:02 +02:00
|
|
|
|
|
|
|
page = (Page) BufferGetPage(buffer);
|
|
|
|
if (PageIsNew((PageHeader) page))
|
2000-12-28 14:00:29 +01:00
|
|
|
elog(STOP, "btree_split_redo: uninitialized next right page");
|
2000-10-13 04:03:02 +02:00
|
|
|
|
2000-12-28 14:00:29 +01:00
|
|
|
if (XLByteLE(lsn, PageGetLSN(page)))
|
2000-10-13 04:03:02 +02:00
|
|
|
{
|
|
|
|
UnlockAndReleaseBuffer(buffer);
|
2000-12-28 14:00:29 +01:00
|
|
|
return;
|
2000-10-13 04:03:02 +02:00
|
|
|
}
|
2000-12-28 14:00:29 +01:00
|
|
|
pageop = (BTPageOpaque) PageGetSpecialPointer(page);
|
2001-03-22 05:01:46 +01:00
|
|
|
pageop->btpo_prev = (onleft) ?
|
|
|
|
BlockIdGetBlockNumber(&(xlrec->otherblk)) :
|
|
|
|
ItemPointerGetBlockNumber(&(xlrec->target.tid));
|
2000-10-13 04:03:02 +02:00
|
|
|
|
2000-12-28 14:00:29 +01:00
|
|
|
PageSetLSN(page, lsn);
|
|
|
|
PageSetSUI(page, ThisStartUpID);
|
|
|
|
UnlockAndWriteBuffer(buffer);
|
2000-10-13 04:03:02 +02:00
|
|
|
}
|
|
|
|
|
2000-10-21 17:43:36 +02:00
|
|
|
static void
|
|
|
|
btree_xlog_newroot(bool redo, XLogRecPtr lsn, XLogRecord *record)
|
2000-10-13 04:03:02 +02:00
|
|
|
{
|
2001-03-22 05:01:46 +01:00
|
|
|
xl_btree_newroot *xlrec = (xl_btree_newroot *) XLogRecGetData(record);
|
|
|
|
Relation reln;
|
|
|
|
Buffer buffer;
|
|
|
|
Page page;
|
|
|
|
BTPageOpaque pageop;
|
|
|
|
Buffer metabuf;
|
|
|
|
Page metapg;
|
|
|
|
BTMetaPageData md;
|
2000-10-13 04:03:02 +02:00
|
|
|
|
|
|
|
if (!redo)
|
|
|
|
return;
|
|
|
|
|
|
|
|
reln = XLogOpenRelation(redo, RM_BTREE_ID, xlrec->node);
|
|
|
|
if (!RelationIsValid(reln))
|
|
|
|
return;
|
|
|
|
buffer = XLogReadBuffer(true, reln, BlockIdGetBlockNumber(&(xlrec->rootblk)));
|
|
|
|
if (!BufferIsValid(buffer))
|
|
|
|
elog(STOP, "btree_newroot_redo: no root page");
|
|
|
|
metabuf = XLogReadBuffer(false, reln, BTREE_METAPAGE);
|
|
|
|
if (!BufferIsValid(buffer))
|
|
|
|
elog(STOP, "btree_newroot_redo: no metapage");
|
|
|
|
page = (Page) BufferGetPage(buffer);
|
2000-12-28 14:00:29 +01:00
|
|
|
_bt_pageinit(page, BufferGetPageSize(buffer));
|
|
|
|
pageop = (BTPageOpaque) PageGetSpecialPointer(page);
|
2000-10-13 04:03:02 +02:00
|
|
|
|
2000-12-28 14:00:29 +01:00
|
|
|
pageop->btpo_flags |= BTP_ROOT;
|
|
|
|
pageop->btpo_prev = pageop->btpo_next = P_NONE;
|
|
|
|
pageop->btpo_parent = BTREE_METAPAGE;
|
2000-10-13 04:03:02 +02:00
|
|
|
|
2000-12-28 14:00:29 +01:00
|
|
|
if (record->xl_info & XLOG_BTREE_LEAF)
|
|
|
|
pageop->btpo_flags |= BTP_LEAF;
|
2000-10-13 04:03:02 +02:00
|
|
|
|
2000-12-28 14:00:29 +01:00
|
|
|
if (record->xl_len > SizeOfBtreeNewroot)
|
|
|
|
_bt_restore_page(page,
|
2001-03-22 05:01:46 +01:00
|
|
|
(char *) xlrec + SizeOfBtreeNewroot,
|
|
|
|
record->xl_len - SizeOfBtreeNewroot);
|
2000-10-13 04:03:02 +02:00
|
|
|
|
2000-12-28 14:00:29 +01:00
|
|
|
PageSetLSN(page, lsn);
|
|
|
|
PageSetSUI(page, ThisStartUpID);
|
|
|
|
UnlockAndWriteBuffer(buffer);
|
2000-10-13 04:03:02 +02:00
|
|
|
|
|
|
|
metapg = BufferGetPage(metabuf);
|
2000-12-28 14:00:29 +01:00
|
|
|
_bt_pageinit(metapg, BufferGetPageSize(metabuf));
|
|
|
|
md.btm_magic = BTREE_MAGIC;
|
|
|
|
md.btm_version = BTREE_VERSION;
|
|
|
|
md.btm_root = BlockIdGetBlockNumber(&(xlrec->rootblk));
|
|
|
|
md.btm_level = xlrec->level;
|
|
|
|
memcpy((char *) BTPageGetMeta(metapg), (char *) &md, sizeof(md));
|
|
|
|
|
2000-12-29 09:08:59 +01:00
|
|
|
pageop = (BTPageOpaque) PageGetSpecialPointer(metapg);
|
|
|
|
pageop->btpo_flags = BTP_META;
|
|
|
|
|
2000-12-28 14:00:29 +01:00
|
|
|
PageSetLSN(metapg, lsn);
|
|
|
|
PageSetSUI(metapg, ThisStartUpID);
|
|
|
|
UnlockAndWriteBuffer(metabuf);
|
2000-10-13 04:03:02 +02:00
|
|
|
}
|
|
|
|
|
2000-10-21 17:43:36 +02:00
|
|
|
void
|
|
|
|
btree_redo(XLogRecPtr lsn, XLogRecord *record)
|
2000-10-13 04:03:02 +02:00
|
|
|
{
|
2001-03-22 05:01:46 +01:00
|
|
|
uint8 info = record->xl_info & ~XLR_INFO_MASK;
|
2000-10-20 13:01:21 +02:00
|
|
|
|
2000-12-28 14:00:29 +01:00
|
|
|
info &= ~XLOG_BTREE_LEAF;
|
2000-10-21 17:43:36 +02:00
|
|
|
if (info == XLOG_BTREE_DELETE)
|
|
|
|
btree_xlog_delete(true, lsn, record);
|
|
|
|
else if (info == XLOG_BTREE_INSERT)
|
|
|
|
btree_xlog_insert(true, lsn, record);
|
|
|
|
else if (info == XLOG_BTREE_SPLIT)
|
2001-03-22 05:01:46 +01:00
|
|
|
btree_xlog_split(true, false, lsn, record); /* new item on the right */
|
2000-10-21 17:43:36 +02:00
|
|
|
else if (info == XLOG_BTREE_SPLEFT)
|
2001-03-22 05:01:46 +01:00
|
|
|
btree_xlog_split(true, true, lsn, record); /* new item on the left */
|
2000-10-21 17:43:36 +02:00
|
|
|
else if (info == XLOG_BTREE_NEWROOT)
|
|
|
|
btree_xlog_newroot(true, lsn, record);
|
|
|
|
else
|
|
|
|
elog(STOP, "btree_redo: unknown op code %u", info);
|
2000-10-13 04:03:02 +02:00
|
|
|
}
|
|
|
|
|
2000-10-21 17:43:36 +02:00
|
|
|
void
|
|
|
|
btree_undo(XLogRecPtr lsn, XLogRecord *record)
|
2000-10-13 04:03:02 +02:00
|
|
|
{
|
2001-03-22 05:01:46 +01:00
|
|
|
uint8 info = record->xl_info & ~XLR_INFO_MASK;
|
2000-10-13 04:03:02 +02:00
|
|
|
|
2000-12-28 14:00:29 +01:00
|
|
|
info &= ~XLOG_BTREE_LEAF;
|
2000-10-21 17:43:36 +02:00
|
|
|
if (info == XLOG_BTREE_DELETE)
|
|
|
|
btree_xlog_delete(false, lsn, record);
|
|
|
|
else if (info == XLOG_BTREE_INSERT)
|
|
|
|
btree_xlog_insert(false, lsn, record);
|
|
|
|
else if (info == XLOG_BTREE_SPLIT)
|
2001-03-22 05:01:46 +01:00
|
|
|
btree_xlog_split(false, false, lsn, record); /* new item on the right */
|
2000-10-21 17:43:36 +02:00
|
|
|
else if (info == XLOG_BTREE_SPLEFT)
|
2001-03-22 05:01:46 +01:00
|
|
|
btree_xlog_split(false, true, lsn, record); /* new item on the left */
|
2000-10-21 17:43:36 +02:00
|
|
|
else if (info == XLOG_BTREE_NEWROOT)
|
|
|
|
btree_xlog_newroot(false, lsn, record);
|
|
|
|
else
|
|
|
|
elog(STOP, "btree_undo: unknown op code %u", info);
|
2000-10-13 04:03:02 +02:00
|
|
|
}
|
|
|
|
|
2000-10-21 17:43:36 +02:00
|
|
|
static void
|
|
|
|
out_target(char *buf, xl_btreetid *target)
|
2000-10-13 14:05:22 +02:00
|
|
|
{
|
2000-10-21 17:43:36 +02:00
|
|
|
sprintf(buf + strlen(buf), "node %u/%u; tid %u/%u",
|
2001-03-22 05:01:46 +01:00
|
|
|
target->node.tblNode, target->node.relNode,
|
|
|
|
ItemPointerGetBlockNumber(&(target->tid)),
|
|
|
|
ItemPointerGetOffsetNumber(&(target->tid)));
|
2000-10-13 14:05:22 +02:00
|
|
|
}
|
2001-03-22 05:01:46 +01:00
|
|
|
|
2000-10-21 17:43:36 +02:00
|
|
|
void
|
2001-03-22 05:01:46 +01:00
|
|
|
btree_desc(char *buf, uint8 xl_info, char *rec)
|
2000-10-13 14:05:22 +02:00
|
|
|
{
|
2001-03-22 05:01:46 +01:00
|
|
|
uint8 info = xl_info & ~XLR_INFO_MASK;
|
2000-10-13 14:05:22 +02:00
|
|
|
|
2000-12-28 14:00:29 +01:00
|
|
|
info &= ~XLOG_BTREE_LEAF;
|
2000-10-21 17:43:36 +02:00
|
|
|
if (info == XLOG_BTREE_INSERT)
|
2000-10-13 14:05:22 +02:00
|
|
|
{
|
2001-03-22 05:01:46 +01:00
|
|
|
xl_btree_insert *xlrec = (xl_btree_insert *) rec;
|
|
|
|
|
2000-10-21 17:43:36 +02:00
|
|
|
strcat(buf, "insert: ");
|
|
|
|
out_target(buf, &(xlrec->target));
|
2000-10-13 14:05:22 +02:00
|
|
|
}
|
2000-10-21 17:43:36 +02:00
|
|
|
else if (info == XLOG_BTREE_DELETE)
|
2000-10-13 14:05:22 +02:00
|
|
|
{
|
2001-03-22 05:01:46 +01:00
|
|
|
xl_btree_delete *xlrec = (xl_btree_delete *) rec;
|
|
|
|
|
2000-10-21 17:43:36 +02:00
|
|
|
strcat(buf, "delete: ");
|
|
|
|
out_target(buf, &(xlrec->target));
|
2000-10-13 14:05:22 +02:00
|
|
|
}
|
2000-10-21 17:43:36 +02:00
|
|
|
else if (info == XLOG_BTREE_SPLIT || info == XLOG_BTREE_SPLEFT)
|
2000-10-13 14:05:22 +02:00
|
|
|
{
|
2001-03-22 05:01:46 +01:00
|
|
|
xl_btree_split *xlrec = (xl_btree_split *) rec;
|
|
|
|
|
|
|
|
sprintf(buf + strlen(buf), "split(%s): ",
|
|
|
|
(info == XLOG_BTREE_SPLIT) ? "right" : "left");
|
2000-10-21 17:43:36 +02:00
|
|
|
out_target(buf, &(xlrec->target));
|
|
|
|
sprintf(buf + strlen(buf), "; oth %u; rgh %u",
|
2001-03-22 05:01:46 +01:00
|
|
|
BlockIdGetBlockNumber(&xlrec->otherblk),
|
|
|
|
BlockIdGetBlockNumber(&xlrec->rightblk));
|
2000-10-13 14:05:22 +02:00
|
|
|
}
|
2000-10-21 17:43:36 +02:00
|
|
|
else if (info == XLOG_BTREE_NEWROOT)
|
2000-10-13 14:05:22 +02:00
|
|
|
{
|
2001-03-22 05:01:46 +01:00
|
|
|
xl_btree_newroot *xlrec = (xl_btree_newroot *) rec;
|
|
|
|
|
2000-10-21 17:43:36 +02:00
|
|
|
sprintf(buf + strlen(buf), "root: node %u/%u; blk %u",
|
2001-03-22 05:01:46 +01:00
|
|
|
xlrec->node.tblNode, xlrec->node.relNode,
|
|
|
|
BlockIdGetBlockNumber(&xlrec->rootblk));
|
2000-10-13 14:05:22 +02:00
|
|
|
}
|
2000-10-21 17:43:36 +02:00
|
|
|
else
|
|
|
|
strcat(buf, "UNKNOWN");
|
2000-10-13 14:05:22 +02:00
|
|
|
}
|