From 799bc58dc7ed9899facfc8302040749cb0a9af2f Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sat, 22 Feb 2003 00:45:05 +0000 Subject: [PATCH] More infrastructure for btree compaction project. Tree-traversal code now knows what to do upon hitting a dead page (in theory anyway, it's untested...). Add a post-VACUUM-cleanup entry point for index AMs, to provide a place for dead-page scavenging to happen. Also, fix oversight that broke btpo_prev links in temporary indexes. initdb forced due to additions in pg_am. --- doc/src/sgml/catalogs.sgml | 9 +- src/backend/access/gist/gist.c | 5 +- src/backend/access/hash/hash.c | 5 +- src/backend/access/index/indexam.c | 34 +++- src/backend/access/nbtree/nbtinsert.c | 161 +++++++++------ src/backend/access/nbtree/nbtpage.c | 272 +++++++++++++++----------- src/backend/access/nbtree/nbtree.c | 138 +++++++++---- src/backend/access/nbtree/nbtsearch.c | 220 ++++++++++++++++----- src/backend/access/nbtree/nbtsort.c | 12 +- src/backend/access/rtree/rtree.c | 5 +- src/backend/commands/vacuum.c | 32 ++- src/backend/commands/vacuumlazy.c | 78 ++++---- src/include/access/genam.h | 24 ++- src/include/access/nbtree.h | 20 +- src/include/access/xlog.h | 15 +- src/include/catalog/catversion.h | 4 +- src/include/catalog/pg_am.h | 16 +- src/include/catalog/pg_proc.h | 4 +- 18 files changed, 709 insertions(+), 345 deletions(-) diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index 4d38bef237..4b50e8f442 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -1,6 +1,6 @@ @@ -446,6 +446,13 @@ bulk-delete function + + amvacuumcleanup + regproc + pg_proc.oid + post-VACUUM cleanup function + + amcostestimate regproc diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c index 6591e76644..472bcf4527 100644 --- a/src/backend/access/gist/gist.c +++ b/src/backend/access/gist/gist.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/gist/gist.c,v 1.99 2002/11/13 00:39:46 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/gist/gist.c,v 1.100 2003/02/22 00:45:03 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1650,8 +1650,9 @@ gistbulkdelete(PG_FUNCTION_ARGS) result = (IndexBulkDeleteResult *) palloc(sizeof(IndexBulkDeleteResult)); result->num_pages = num_pages; - result->tuples_removed = tuples_removed; result->num_index_tuples = num_index_tuples; + result->tuples_removed = tuples_removed; + result->pages_free = 0; PG_RETURN_POINTER(result); } diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c index 3a75265f01..0ec2380cef 100644 --- a/src/backend/access/hash/hash.c +++ b/src/backend/access/hash/hash.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.60 2002/09/04 20:31:09 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.61 2003/02/22 00:45:03 tgl Exp $ * * NOTES * This file contains only the public interface routines. @@ -491,8 +491,9 @@ hashbulkdelete(PG_FUNCTION_ARGS) result = (IndexBulkDeleteResult *) palloc(sizeof(IndexBulkDeleteResult)); result->num_pages = num_pages; - result->tuples_removed = tuples_removed; result->num_index_tuples = num_index_tuples; + result->tuples_removed = tuples_removed; + result->pages_free = 0; PG_RETURN_POINTER(result); } diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c index 258eb546a4..d045bafc1c 100644 --- a/src/backend/access/index/indexam.c +++ b/src/backend/access/index/indexam.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/index/indexam.c,v 1.63 2003/01/08 19:41:40 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/index/indexam.c,v 1.64 2003/02/22 00:45:03 tgl Exp $ * * INTERFACE ROUTINES * index_open - open an index relation by relation OID @@ -23,6 +23,7 @@ * index_restrpos - restore a scan position * index_getnext - get the next tuple from a scan * index_bulk_delete - bulk deletion of index tuples + * index_vacuum_cleanup - post-deletion cleanup of an index * index_cost_estimator - fetch amcostestimate procedure OID * index_getprocid - get a support procedure OID * @@ -579,6 +580,37 @@ index_bulk_delete(Relation indexRelation, return result; } +/* ---------------- + * index_vacuum_cleanup - do post-deletion cleanup of an index + * + * return value is an optional palloc'd struct of statistics + * ---------------- + */ +IndexBulkDeleteResult * +index_vacuum_cleanup(Relation indexRelation, + IndexVacuumCleanupInfo *info, + IndexBulkDeleteResult *stats) +{ + RegProcedure procedure; + IndexBulkDeleteResult *result; + + RELATION_CHECKS; + + /* It's okay for an index AM not to have a vacuumcleanup procedure */ + if (!RegProcedureIsValid(indexRelation->rd_am->amvacuumcleanup)) + return stats; + + GET_REL_PROCEDURE(vacuum_cleanup, amvacuumcleanup); + + result = (IndexBulkDeleteResult *) + DatumGetPointer(OidFunctionCall3(procedure, + PointerGetDatum(indexRelation), + PointerGetDatum((Pointer) info), + PointerGetDatum((Pointer) stats))); + + return result; +} + /* ---------------- * index_cost_estimator * diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c index a93a9fed8c..e943ca96f1 100644 --- a/src/backend/access/nbtree/nbtinsert.c +++ b/src/backend/access/nbtree/nbtinsert.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.97 2003/02/21 00:06:21 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.98 2003/02/22 00:45:03 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -280,12 +280,21 @@ _bt_check_unique(Relation rel, BTItem btitem, Relation heapRel, if (!_bt_isequal(itupdesc, page, P_HIKEY, natts, itup_scankey)) break; - nblkno = opaque->btpo_next; - if (nbuf != InvalidBuffer) - _bt_relbuf(rel, nbuf); - nbuf = _bt_getbuf(rel, nblkno, BT_READ); - page = BufferGetPage(nbuf); - opaque = (BTPageOpaque) PageGetSpecialPointer(page); + /* Advance to next non-dead page --- there must be one */ + for (;;) + { + nblkno = opaque->btpo_next; + if (nbuf != InvalidBuffer) + _bt_relbuf(rel, nbuf); + nbuf = _bt_getbuf(rel, nblkno, BT_READ); + page = BufferGetPage(nbuf); + opaque = (BTPageOpaque) PageGetSpecialPointer(page); + if (!P_IGNORE(opaque)) + break; + if (P_RIGHTMOST(opaque)) + elog(ERROR, "_bt_check_unique: fell off the end of %s", + RelationGetRelationName(rel)); + } maxoff = PageGetMaxOffsetNumber(page); offset = P_FIRSTDATAKEY(opaque); } @@ -414,20 +423,34 @@ _bt_insertonpg(Relation rel, _bt_compare(rel, keysz, scankey, page, P_HIKEY) == 0 && random() > (MAX_RANDOM_VALUE / 100)) { - /* step right one page */ - BlockNumber rblkno = lpageop->btpo_next; - Buffer rbuf; - /* - * must write-lock next page before releasing write lock on + * step right to next non-dead page + * + * must write-lock that page before releasing write lock on * current page; else someone else's _bt_check_unique scan - * could fail to see our insertion. + * could fail to see our insertion. write locks on intermediate + * dead pages won't do because we don't know when they will get + * de-linked from the tree. */ - rbuf = _bt_getbuf(rel, rblkno, BT_WRITE); + Buffer rbuf = InvalidBuffer; + + for (;;) + { + BlockNumber rblkno = lpageop->btpo_next; + + if (rbuf != InvalidBuffer) + _bt_relbuf(rel, rbuf); + rbuf = _bt_getbuf(rel, rblkno, BT_WRITE); + page = BufferGetPage(rbuf); + lpageop = (BTPageOpaque) PageGetSpecialPointer(page); + if (!P_IGNORE(lpageop)) + break; + if (P_RIGHTMOST(lpageop)) + elog(ERROR, "_bt_insertonpg: fell off the end of %s", + RelationGetRelationName(rel)); + } _bt_relbuf(rel, buf); buf = rbuf; - page = BufferGetPage(buf); - lpageop = (BTPageOpaque) PageGetSpecialPointer(page); movedright = true; } @@ -633,8 +656,9 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright, BTPageOpaque ropaque, lopaque, oopaque; - Buffer sbuf = 0; - Page spage = 0; + Buffer sbuf = InvalidBuffer; + Page spage = NULL; + BTPageOpaque sopaque = NULL; Size itemsz; ItemId itemid; BTItem item; @@ -792,6 +816,9 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright, { sbuf = _bt_getbuf(rel, ropaque->btpo_next, BT_WRITE); spage = BufferGetPage(sbuf); + sopaque = (BTPageOpaque) PageGetSpecialPointer(spage); + if (sopaque->btpo_prev != ropaque->btpo_prev) + elog(PANIC, "btree: right sibling's left-link doesn't match"); } /* @@ -802,6 +829,9 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright, */ START_CRIT_SECTION(); + if (!P_RIGHTMOST(ropaque)) + sopaque->btpo_prev = BufferGetBlockNumber(rbuf); + /* XLOG stuff */ if (!rel->rd_istemp) { @@ -847,10 +877,6 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright, if (!P_RIGHTMOST(ropaque)) { - BTPageOpaque sopaque = (BTPageOpaque) PageGetSpecialPointer(spage); - - sopaque->btpo_prev = BufferGetBlockNumber(rbuf); - rdata[2].next = &(rdata[3]); rdata[3].buffer = sbuf; rdata[3].data = NULL; @@ -1250,58 +1276,63 @@ _bt_getstackbuf(Relation rel, BTStack stack, int access) Buffer buf; Page page; BTPageOpaque opaque; - OffsetNumber offnum, - minoff, - maxoff; - ItemId itemid; - BTItem item; buf = _bt_getbuf(rel, blkno, access); page = BufferGetPage(buf); opaque = (BTPageOpaque) PageGetSpecialPointer(page); - minoff = P_FIRSTDATAKEY(opaque); - maxoff = PageGetMaxOffsetNumber(page); - /* - * start = InvalidOffsetNumber means "search the whole page". - * We need this test anyway due to possibility that - * page has a high key now when it didn't before. - */ - if (start < minoff) - start = minoff; - - /* - * These loops will check every item on the page --- but in an order - * that's attuned to the probability of where it actually is. Scan - * to the right first, then to the left. - */ - for (offnum = start; - offnum <= maxoff; - offnum = OffsetNumberNext(offnum)) + if (!P_IGNORE(opaque)) { - itemid = PageGetItemId(page, offnum); - item = (BTItem) PageGetItem(page, itemid); - if (BTItemSame(item, &stack->bts_btitem)) + OffsetNumber offnum, + minoff, + maxoff; + ItemId itemid; + BTItem item; + + minoff = P_FIRSTDATAKEY(opaque); + maxoff = PageGetMaxOffsetNumber(page); + + /* + * start = InvalidOffsetNumber means "search the whole page". + * We need this test anyway due to possibility that + * page has a high key now when it didn't before. + */ + if (start < minoff) + start = minoff; + + /* + * These loops will check every item on the page --- but in an + * order that's attuned to the probability of where it actually + * is. Scan to the right first, then to the left. + */ + for (offnum = start; + offnum <= maxoff; + offnum = OffsetNumberNext(offnum)) { - /* Return accurate pointer to where link is now */ - stack->bts_blkno = blkno; - stack->bts_offset = offnum; - return buf; + itemid = PageGetItemId(page, offnum); + item = (BTItem) PageGetItem(page, itemid); + if (BTItemSame(item, &stack->bts_btitem)) + { + /* Return accurate pointer to where link is now */ + stack->bts_blkno = blkno; + stack->bts_offset = offnum; + return buf; + } } - } - for (offnum = OffsetNumberPrev(start); - offnum >= minoff; - offnum = OffsetNumberPrev(offnum)) - { - itemid = PageGetItemId(page, offnum); - item = (BTItem) PageGetItem(page, itemid); - if (BTItemSame(item, &stack->bts_btitem)) + for (offnum = OffsetNumberPrev(start); + offnum >= minoff; + offnum = OffsetNumberPrev(offnum)) { - /* Return accurate pointer to where link is now */ - stack->bts_blkno = blkno; - stack->bts_offset = offnum; - return buf; + itemid = PageGetItemId(page, offnum); + item = (BTItem) PageGetItem(page, itemid); + if (BTItemSame(item, &stack->bts_btitem)) + { + /* Return accurate pointer to where link is now */ + stack->bts_blkno = blkno; + stack->bts_offset = offnum; + return buf; + } } } @@ -1365,6 +1396,8 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf) rootbuf = _bt_getbuf(rel, P_NEW, BT_WRITE); rootpage = BufferGetPage(rootbuf); rootblknum = BufferGetBlockNumber(rootbuf); + + /* acquire lock on the metapage */ metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_WRITE); metapg = BufferGetPage(metabuf); metad = BTPageGetMeta(metapg); diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index c9879b73ae..0296b71c36 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.59 2003/02/21 00:06:21 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.60 2003/02/22 00:45:04 tgl Exp $ * * NOTES * Postgres btree pages look like ordinary relation pages. The opaque @@ -22,34 +22,17 @@ */ #include "postgres.h" -#include - #include "access/nbtree.h" #include "miscadmin.h" #include "storage/lmgr.h" -extern bool FixBTree; /* comments in nbtree.c */ -extern Buffer _bt_fixroot(Relation rel, Buffer oldrootbuf, bool release); - -/* - * We use high-concurrency locking on btrees. There are two cases in - * which we don't do locking. One is when we're building the btree. - * Since the creating transaction has not committed, no one can see - * the index, and there's no reason to share locks. The second case - * is when we're just starting up the database system. We use some - * special-purpose initialization code in the relation cache manager - * (see utils/cache/relcache.c) to allow us to do indexed scans on - * the system catalogs before we'd normally be able to. This happens - * before the lock table is fully initialized, so we can't use it. - * Strictly speaking, this violates 2pl, but we don't do 2pl on the - * system catalogs anyway, so I declare this to be okay. - */ - -#define USELOCKING (!BuildingBtree && !IsInitProcessingMode()) - /* * _bt_metapinit() -- Initialize the metadata page of a new btree. + * + * Note: there's no real need for any locking here. Since the transaction + * creating the index hasn't committed yet, no one else can even see the index + * much less be trying to use it. */ void _bt_metapinit(Relation rel) @@ -59,10 +42,6 @@ _bt_metapinit(Relation rel) BTMetaPageData *metad; BTPageOpaque op; - /* can't be sharing this with anyone, now... */ - if (USELOCKING) - LockRelation(rel, AccessExclusiveLock); - if (RelationGetNumberOfBlocks(rel) != 0) elog(ERROR, "Cannot initialize non-empty btree %s", RelationGetRelationName(rel)); @@ -114,10 +93,6 @@ _bt_metapinit(Relation rel) END_CRIT_SECTION(); WriteBuffer(buf); - - /* all done */ - if (USELOCKING) - UnlockRelation(rel, AccessExclusiveLock); } /* @@ -142,7 +117,8 @@ _bt_metapinit(Relation rel) * what we will return is the old root, which is now just the leftmost * page on a probably-not-very-wide level. For most purposes this is * as good as or better than the true root, so we do not bother to - * insist on finding the true root. + * insist on finding the true root. We do, however, guarantee to + * return a live (not deleted or half-dead) page. * * On successful return, the root page is pinned and read-locked. * The metadata page is not locked or pinned on exit. @@ -157,6 +133,7 @@ _bt_getroot(Relation rel, int access) Page rootpage; BTPageOpaque rootopaque; BlockNumber rootblkno; + uint32 rootlevel; BTMetaPageData *metad; metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ); @@ -164,6 +141,7 @@ _bt_getroot(Relation rel, int access) metaopaque = (BTPageOpaque) PageGetSpecialPointer(metapg); metad = BTPageGetMeta(metapg); + /* sanity-check the metapage */ if (!(metaopaque->btpo_flags & BTP_META) || metad->btm_magic != BTREE_MAGIC) elog(ERROR, "Index %s is not a btree", @@ -191,90 +169,113 @@ _bt_getroot(Relation rel, int access) /* * Race condition: if someone else initialized the metadata * between the time we released the read lock and acquired the - * write lock, above, we must avoid doing it again. + * write lock, we must avoid doing it again. */ - if (metad->btm_root == P_NONE) - { - /* - * Get, initialize, write, and leave a lock of the appropriate - * type on the new root page. Since this is the first page in - * the tree, it's a leaf as well as the root. - */ - rootbuf = _bt_getbuf(rel, P_NEW, BT_WRITE); - rootblkno = BufferGetBlockNumber(rootbuf); - rootpage = BufferGetPage(rootbuf); - - _bt_pageinit(rootpage, BufferGetPageSize(rootbuf)); - rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpage); - rootopaque->btpo_prev = rootopaque->btpo_next = P_NONE; - rootopaque->btpo_flags = (BTP_LEAF | BTP_ROOT); - rootopaque->btpo.level = 0; - - /* NO ELOG(ERROR) till meta is updated */ - START_CRIT_SECTION(); - - metad->btm_root = rootblkno; - metad->btm_level = 0; - metad->btm_fastroot = rootblkno; - metad->btm_fastlevel = 0; - - /* XLOG stuff */ - if (!rel->rd_istemp) - { - xl_btree_newroot xlrec; - XLogRecPtr recptr; - XLogRecData rdata; - - xlrec.node = rel->rd_node; - xlrec.rootblk = rootblkno; - xlrec.level = 0; - - rdata.buffer = InvalidBuffer; - rdata.data = (char *) &xlrec; - rdata.len = SizeOfBtreeNewroot; - rdata.next = NULL; - - recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT, &rdata); - - PageSetLSN(rootpage, recptr); - PageSetSUI(rootpage, ThisStartUpID); - PageSetLSN(metapg, recptr); - PageSetSUI(metapg, ThisStartUpID); - } - - END_CRIT_SECTION(); - - _bt_wrtnorelbuf(rel, rootbuf); - - /* - * swap root write lock for read lock. There is no danger of - * anyone else accessing the new root page while it's unlocked, - * since no one else knows where it is yet. - */ - LockBuffer(rootbuf, BUFFER_LOCK_UNLOCK); - LockBuffer(rootbuf, BT_READ); - - /* okay, metadata is correct, write and release it */ - _bt_wrtbuf(rel, metabuf); - } - else + if (metad->btm_root != P_NONE) { /* * Metadata initialized by someone else. In order to * guarantee no deadlocks, we have to release the metadata - * page and start all over again. + * page and start all over again. (Is that really true? + * But it's hardly worth trying to optimize this case.) */ _bt_relbuf(rel, metabuf); return _bt_getroot(rel, access); } + + /* + * Get, initialize, write, and leave a lock of the appropriate + * type on the new root page. Since this is the first page in + * the tree, it's a leaf as well as the root. + */ + rootbuf = _bt_getbuf(rel, P_NEW, BT_WRITE); + rootblkno = BufferGetBlockNumber(rootbuf); + rootpage = BufferGetPage(rootbuf); + + _bt_pageinit(rootpage, BufferGetPageSize(rootbuf)); + rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpage); + rootopaque->btpo_prev = rootopaque->btpo_next = P_NONE; + rootopaque->btpo_flags = (BTP_LEAF | BTP_ROOT); + rootopaque->btpo.level = 0; + + /* NO ELOG(ERROR) till meta is updated */ + START_CRIT_SECTION(); + + metad->btm_root = rootblkno; + metad->btm_level = 0; + metad->btm_fastroot = rootblkno; + metad->btm_fastlevel = 0; + + /* XLOG stuff */ + if (!rel->rd_istemp) + { + xl_btree_newroot xlrec; + XLogRecPtr recptr; + XLogRecData rdata; + + xlrec.node = rel->rd_node; + xlrec.rootblk = rootblkno; + xlrec.level = 0; + + rdata.buffer = InvalidBuffer; + rdata.data = (char *) &xlrec; + rdata.len = SizeOfBtreeNewroot; + rdata.next = NULL; + + recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT, &rdata); + + PageSetLSN(rootpage, recptr); + PageSetSUI(rootpage, ThisStartUpID); + PageSetLSN(metapg, recptr); + PageSetSUI(metapg, ThisStartUpID); + } + + END_CRIT_SECTION(); + + _bt_wrtnorelbuf(rel, rootbuf); + + /* + * swap root write lock for read lock. There is no danger of + * anyone else accessing the new root page while it's unlocked, + * since no one else knows where it is yet. + */ + LockBuffer(rootbuf, BUFFER_LOCK_UNLOCK); + LockBuffer(rootbuf, BT_READ); + + /* okay, metadata is correct, write and release it */ + _bt_wrtbuf(rel, metabuf); } else { rootblkno = metad->btm_fastroot; + Assert(rootblkno != P_NONE); + rootlevel = metad->btm_fastlevel; _bt_relbuf(rel, metabuf); /* done with the meta page */ - rootbuf = _bt_getbuf(rel, rootblkno, BT_READ); + for (;;) + { + rootbuf = _bt_getbuf(rel, rootblkno, BT_READ); + rootpage = BufferGetPage(rootbuf); + rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpage); + + if (!P_IGNORE(rootopaque)) + break; + + /* it's dead, Jim. step right one page */ + if (P_RIGHTMOST(rootopaque)) + elog(ERROR, "No live root page found in %s", + RelationGetRelationName(rel)); + rootblkno = rootopaque->btpo_next; + + _bt_relbuf(rel, rootbuf); + } + + /* Note: can't check btpo.level on deleted pages */ + if (rootopaque->btpo.level != rootlevel) + elog(ERROR, "Root page %u of %s has level %u, expected %u", + rootblkno, RelationGetRelationName(rel), + rootopaque->btpo.level, rootlevel); } /* @@ -305,7 +306,10 @@ _bt_gettrueroot(Relation rel) Page metapg; BTPageOpaque metaopaque; Buffer rootbuf; + Page rootpage; + BTPageOpaque rootopaque; BlockNumber rootblkno; + uint32 rootlevel; BTMetaPageData *metad; metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ); @@ -331,10 +335,33 @@ _bt_gettrueroot(Relation rel) } rootblkno = metad->btm_root; + rootlevel = metad->btm_level; _bt_relbuf(rel, metabuf); /* done with the meta page */ - rootbuf = _bt_getbuf(rel, rootblkno, BT_READ); + for (;;) + { + rootbuf = _bt_getbuf(rel, rootblkno, BT_READ); + rootpage = BufferGetPage(rootbuf); + rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpage); + + if (!P_IGNORE(rootopaque)) + break; + + /* it's dead, Jim. step right one page */ + if (P_RIGHTMOST(rootopaque)) + elog(ERROR, "No live root page found in %s", + RelationGetRelationName(rel)); + rootblkno = rootopaque->btpo_next; + + _bt_relbuf(rel, rootbuf); + } + + /* Note: can't check btpo.level on deleted pages */ + if (rootopaque->btpo.level != rootlevel) + elog(ERROR, "Root page %u of %s has level %u, expected %u", + rootblkno, RelationGetRelationName(rel), + rootopaque->btpo.level, rootlevel); return rootbuf; } @@ -342,6 +369,8 @@ _bt_gettrueroot(Relation rel) /* * _bt_getbuf() -- Get a buffer by block number for read or write. * + * blkno == P_NEW means to get an unallocated index page. + * * When this routine returns, the appropriate lock is set on the * requested buffer and its reference count has been incremented * (ie, the buffer is "locked and pinned"). @@ -359,18 +388,35 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access) } else { + bool needLock; Page page; + /* XXX soon: ask FSM about free space */ + /* * Extend the relation by one page. * - * Extend bufmgr code is unclean and so we have to use extra locking - * here. + * We have to use a lock to ensure no one else is extending the rel at + * the same time, else we will both try to initialize the same new + * page. We can skip locking for new or temp relations, however, + * since no one else could be accessing them. */ - LockPage(rel, 0, ExclusiveLock); - buf = ReadBuffer(rel, blkno); + needLock = !(rel->rd_isnew || rel->rd_istemp); + + if (needLock) + LockPage(rel, 0, ExclusiveLock); + + buf = ReadBuffer(rel, P_NEW); + + /* + * Release the file-extension lock; it's now OK for someone else to + * extend the relation some more. + */ + if (needLock) + UnlockPage(rel, 0, ExclusiveLock); + + /* Acquire appropriate buffer lock on new page */ LockBuffer(buf, access); - UnlockPage(rel, 0, ExclusiveLock); /* Initialize the new page before returning it */ page = BufferGetPage(buf); @@ -403,10 +449,9 @@ _bt_relbuf(Relation rel, Buffer buf) * and a pin on the buffer. * * NOTE: actually, the buffer manager just marks the shared buffer page - * dirty here, the real I/O happens later. Since we can't persuade the - * Unix kernel to schedule disk writes in a particular order, there's not - * much point in worrying about this. The most we can say is that all the - * writes will occur before commit. + * dirty here; the real I/O happens later. This is okay since we are not + * relying on write ordering anyway. The WAL mechanism is responsible for + * guaranteeing correctness after a crash. */ void _bt_wrtbuf(Relation rel, Buffer buf) @@ -455,8 +500,9 @@ _bt_pageinit(Page page, Size size) * mistake. On exit, metapage data is correct and we no longer have * a pin or lock on the metapage. * - * XXX this is not used for splitting anymore, only in nbtsort.c at the - * completion of btree building. + * Actually this is not used for splitting on-the-fly anymore. It's only used + * in nbtsort.c at the completion of btree building, where we know we have + * sole access to the index anyway. */ void _bt_metaproot(Relation rel, BlockNumber rootbknum, uint32 level) @@ -512,6 +558,10 @@ _bt_metaproot(Relation rel, BlockNumber rootbknum, uint32 level) /* * Delete an item from a btree page. * + * This must only be used for deleting leaf items. Deleting an item on a + * non-leaf page has to be done as part of an atomic action that includes + * deleting the page it points to. + * * This routine assumes that the caller has pinned and locked the buffer, * and will write the buffer afterwards. */ diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index de6765415f..c7f23da4c7 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -12,7 +12,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.95 2003/02/21 00:06:21 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.96 2003/02/22 00:45:04 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -23,6 +23,7 @@ #include "access/nbtree.h" #include "catalog/index.h" #include "miscadmin.h" +#include "storage/freespace.h" /* Working state for btbuild and its callback */ @@ -44,7 +45,6 @@ typedef struct } BTBuildState; -bool BuildingBtree = false; /* see comment in btbuild() */ bool FastBuild = true; /* use SORT instead of insertion build */ /* @@ -68,13 +68,7 @@ static void btbuildCallback(Relation index, void AtEOXact_nbtree(void) { - /* - * Note: these actions should only be necessary during xact abort; but - * they can't hurt during a commit. - */ - - /* If we were building a btree, we ain't anymore. */ - BuildingBtree = false; + /* nothing to do at the moment */ } @@ -95,9 +89,6 @@ btbuild(PG_FUNCTION_ARGS) double reltuples; BTBuildState buildstate; - /* set flag to disable locking */ - BuildingBtree = true; - /* * bootstrap processing does something strange, so don't use * sort/build for initial catalog indices. at some point i need to @@ -172,9 +163,6 @@ btbuild(PG_FUNCTION_ARGS) } #endif /* BTREE_BUILD_STATS */ - /* all done */ - BuildingBtree = false; - /* * Since we just counted the tuples in the heap, we update its stats * in pg_class to guarantee that the planner takes advantage of the @@ -689,10 +677,6 @@ btbulkdelete(PG_FUNCTION_ARGS) * We now need to back up the scan one item, so that the next * cycle will re-examine the same offnum on this page (which * now holds the next item). - * - * For now, just hack the current-item index. Will need to - * be smarter when deletion includes removal of empty - * index pages. */ current->ip_posid--; } @@ -708,12 +692,89 @@ btbulkdelete(PG_FUNCTION_ARGS) result = (IndexBulkDeleteResult *) palloc(sizeof(IndexBulkDeleteResult)); result->num_pages = num_pages; - result->tuples_removed = tuples_removed; result->num_index_tuples = num_index_tuples; + result->tuples_removed = tuples_removed; + result->pages_free = 0; /* not computed here */ PG_RETURN_POINTER(result); } +/* + * Post-VACUUM cleanup. + * + * Here, we scan looking for pages we can delete or return to the freelist. + * + * Result: a palloc'd struct containing statistical info for VACUUM displays. + */ +Datum +btvacuumcleanup(PG_FUNCTION_ARGS) +{ + Relation rel = (Relation) PG_GETARG_POINTER(0); +#ifdef NOT_USED + IndexVacuumCleanupInfo *info = (IndexVacuumCleanupInfo *) PG_GETARG_POINTER(1); +#endif + IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(2); + BlockNumber num_pages; + BlockNumber blkno; + PageFreeSpaceInfo *pageSpaces; + int nFreePages, + maxFreePages; + + Assert(stats != NULL); + + num_pages = RelationGetNumberOfBlocks(rel); + + /* No point in remembering more than MaxFSMPages pages */ + maxFreePages = MaxFSMPages; + if ((BlockNumber) maxFreePages > num_pages) + maxFreePages = (int) num_pages + 1; /* +1 to avoid palloc(0) */ + pageSpaces = (PageFreeSpaceInfo *) palloc(maxFreePages * sizeof(PageFreeSpaceInfo)); + nFreePages = 0; + + /* + * Scan through all pages of index, except metapage. (Any pages added + * after we start the scan will not be examined; this should be fine, + * since they can't possibly be empty.) + */ + for (blkno = BTREE_METAPAGE+1; blkno < num_pages; blkno++) + { + Buffer buf; + Page page; + BTPageOpaque opaque; + + buf = _bt_getbuf(rel, blkno, BT_READ); + page = BufferGetPage(buf); + opaque = (BTPageOpaque) PageGetSpecialPointer(page); + if (P_ISDELETED(opaque)) + { + /* XXX if safe-to-reclaim... */ + if (nFreePages < maxFreePages) + { + pageSpaces[nFreePages].blkno = blkno; + /* The avail-space value is bogus, but must be < BLCKSZ */ + pageSpaces[nFreePages].avail = BLCKSZ-1; + nFreePages++; + } + } + _bt_relbuf(rel, buf); + } + + /* + * Update the shared Free Space Map with the info we now have about + * free space in the index, discarding any old info the map may have. + * We do not need to sort the page numbers; they're in order already. + */ + MultiRecordFreeSpace(&rel->rd_node, 0, nFreePages, pageSpaces); + + pfree(pageSpaces); + + /* update statistics */ + stats->num_pages = num_pages; + stats->pages_free = nFreePages; + + PG_RETURN_POINTER(stats); +} + /* * Restore scan position when btgettuple is called to continue a scan. * @@ -739,7 +800,7 @@ _bt_restscan(IndexScanDesc scan) maxoff; BTPageOpaque opaque; Buffer nextbuf; - ItemPointerData target = so->curHeapIptr; + ItemPointer target = &(so->curHeapIptr); BTItem item; BlockNumber blkno; @@ -759,7 +820,7 @@ _bt_restscan(IndexScanDesc scan) * current->ip_posid before first index tuple on the current page * (_bt_step will move it right)... XXX still needed? */ - if (!ItemPointerIsValid(&target)) + if (!ItemPointerIsValid(target)) { ItemPointerSetOffsetNumber(current, OffsetNumberPrev(P_FIRSTDATAKEY(opaque))); @@ -778,11 +839,7 @@ _bt_restscan(IndexScanDesc scan) offnum = OffsetNumberNext(offnum)) { item = (BTItem) PageGetItem(page, PageGetItemId(page, offnum)); - if (item->bti_itup.t_tid.ip_blkid.bi_hi == - target.ip_blkid.bi_hi && - item->bti_itup.t_tid.ip_blkid.bi_lo == - target.ip_blkid.bi_lo && - item->bti_itup.t_tid.ip_posid == target.ip_posid) + if (BTTidSame(item->bti_itup.t_tid, *target)) { /* Found it */ current->ip_posid = offnum; @@ -793,22 +850,33 @@ _bt_restscan(IndexScanDesc scan) /* * The item we're looking for moved right at least one page, so * move right. We are careful here to pin and read-lock the next - * page before releasing the current one. This ensures that a - * concurrent btbulkdelete scan cannot pass our position --- if it + * non-dead page before releasing the current one. This ensures that + * a concurrent btbulkdelete scan cannot pass our position --- if it * did, it might be able to reach and delete our target item before * we can find it again. */ if (P_RIGHTMOST(opaque)) - elog(FATAL, "_bt_restscan: my bits moved right off the end of the world!" + elog(ERROR, "_bt_restscan: my bits moved right off the end of the world!" "\n\tRecreate index %s.", RelationGetRelationName(rel)); - - blkno = opaque->btpo_next; - nextbuf = _bt_getbuf(rel, blkno, BT_READ); + /* Advance to next non-dead page --- there must be one */ + nextbuf = InvalidBuffer; + for (;;) + { + blkno = opaque->btpo_next; + if (nextbuf != InvalidBuffer) + _bt_relbuf(rel, nextbuf); + nextbuf = _bt_getbuf(rel, blkno, BT_READ); + page = BufferGetPage(nextbuf); + opaque = (BTPageOpaque) PageGetSpecialPointer(page); + if (!P_IGNORE(opaque)) + break; + if (P_RIGHTMOST(opaque)) + elog(ERROR, "_bt_restscan: fell off the end of %s", + RelationGetRelationName(rel)); + } _bt_relbuf(rel, buf); so->btso_curbuf = buf = nextbuf; - page = BufferGetPage(buf); maxoff = PageGetMaxOffsetNumber(page); - opaque = (BTPageOpaque) PageGetSpecialPointer(page); offnum = P_FIRSTDATAKEY(opaque); ItemPointerSet(current, blkno, offnum); } diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c index 0daae3cd58..91089d8545 100644 --- a/src/backend/access/nbtree/nbtsearch.c +++ b/src/backend/access/nbtree/nbtsearch.c @@ -1,14 +1,14 @@ /*------------------------------------------------------------------------- * * nbtsearch.c - * search code for postgres btrees. + * Search code for postgres btrees. * * * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.73 2003/02/21 00:06:21 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.74 2003/02/22 00:45:04 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -19,6 +19,7 @@ #include "access/nbtree.h" +static Buffer _bt_walk_left(Relation rel, Buffer buf); static bool _bt_endpoint(IndexScanDesc scan, ScanDirection dir); @@ -79,10 +80,11 @@ _bt_search(Relation rel, int keysz, ScanKey scankey, par_blkno = BufferGetBlockNumber(*bufP); /* - * We need to save the bit image of the index entry we chose in + * We need to save the location of the index entry we chose in * the parent page on a stack. In case we split the tree, we'll - * use this bit image to figure out what our real parent page is, - * in case the parent splits while we're working lower in the + * use the stack to work back up to the parent page. We also save + * the actual downlink (TID) to uniquely identify the index entry, + * in case it moves right while we're working lower in the * tree. See the paper by Lehman and Yao for how this is detected * and handled. (We use the child link to disambiguate duplicate * keys in the index -- Lehman and Yao disallow duplicate keys.) @@ -114,7 +116,7 @@ _bt_search(Relation rel, int keysz, ScanKey scankey, /* * _bt_moveright() -- move right in the btree if necessary. * - * When we drop and reacquire a pointer to a page, it is possible that + * When we follow a pointer to reach a page, it is possible that * the page has changed in the meanwhile. If this happens, we're * guaranteed that the page has "split right" -- that is, that any * data that appeared on the page originally is either on the page @@ -148,9 +150,13 @@ _bt_moveright(Relation rel, * right. (If the scan key is equal to the high key, we might or * might not need to move right; have to scan the page first anyway.) * It could even have split more than once, so scan as far as needed. + * + * We also have to move right if we followed a link that brought us to + * a dead page. */ while (!P_RIGHTMOST(opaque) && - _bt_compare(rel, keysz, scankey, page, P_HIKEY) > 0) + (P_IGNORE(opaque) || + _bt_compare(rel, keysz, scankey, page, P_HIKEY) > 0)) { /* step right one page */ BlockNumber rblkno = opaque->btpo_next; @@ -161,6 +167,10 @@ _bt_moveright(Relation rel, opaque = (BTPageOpaque) PageGetSpecialPointer(page); } + if (P_IGNORE(opaque)) + elog(ERROR, "_bt_moveright: fell off the end of %s", + RelationGetRelationName(rel)); + return buf; } @@ -796,7 +806,6 @@ _bt_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir) OffsetNumber offnum, maxoff; BlockNumber blkno; - BlockNumber obknum; /* * Don't use ItemPointerGetOffsetNumber or you risk to get assertion @@ -814,7 +823,7 @@ _bt_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir) offnum = OffsetNumberNext(offnum); else { - /* walk right to the next page with data */ + /* Walk right to the next page with data */ for (;;) { /* if we're at end of scan, release the buffer and return */ @@ -831,58 +840,56 @@ _bt_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir) *bufP = _bt_getbuf(rel, blkno, BT_READ); page = BufferGetPage(*bufP); opaque = (BTPageOpaque) PageGetSpecialPointer(page); - maxoff = PageGetMaxOffsetNumber(page); - /* done if it's not empty */ - offnum = P_FIRSTDATAKEY(opaque); - if (!PageIsEmpty(page) && offnum <= maxoff) - break; + if (!P_IGNORE(opaque)) + { + maxoff = PageGetMaxOffsetNumber(page); + /* done if it's not empty */ + offnum = P_FIRSTDATAKEY(opaque); + if (!PageIsEmpty(page) && offnum <= maxoff) + break; + } } } } - else + else /* backwards scan */ { if (offnum > P_FIRSTDATAKEY(opaque)) offnum = OffsetNumberPrev(offnum); else { - /* walk left to the next page with data */ + /* + * Walk left to the next page with data. This is much more + * complex than the walk-right case because of the possibility + * that the page to our left splits while we are in flight to it, + * plus the possibility that the page we were on gets deleted + * after we leave it. See nbtree/README for details. + */ for (;;) { - /* if we're at end of scan, release the buffer and return */ - if (P_LEFTMOST(opaque)) + *bufP = _bt_walk_left(rel, *bufP); + + /* if we're at end of scan, return failure */ + if (*bufP == InvalidBuffer) { - _bt_relbuf(rel, *bufP); ItemPointerSetInvalid(current); - *bufP = so->btso_curbuf = InvalidBuffer; + so->btso_curbuf = InvalidBuffer; return false; } - /* step left */ - obknum = BufferGetBlockNumber(*bufP); - blkno = opaque->btpo_prev; - _bt_relbuf(rel, *bufP); - *bufP = _bt_getbuf(rel, blkno, BT_READ); page = BufferGetPage(*bufP); opaque = (BTPageOpaque) PageGetSpecialPointer(page); - /* - * If the adjacent page just split, then we have to walk - * right to find the block that's now adjacent to where we - * were. Because pages only split right, we don't have to - * worry about this failing to terminate. + * Okay, we managed to move left to a non-deleted page. + * Done if it's not half-dead and not empty. Else loop back + * and do it all again. */ - while (opaque->btpo_next != obknum) + if (!P_IGNORE(opaque)) { - blkno = opaque->btpo_next; - _bt_relbuf(rel, *bufP); - *bufP = _bt_getbuf(rel, blkno, BT_READ); - page = BufferGetPage(*bufP); - opaque = (BTPageOpaque) PageGetSpecialPointer(page); + maxoff = PageGetMaxOffsetNumber(page); + offnum = maxoff; + if (!PageIsEmpty(page) && + maxoff >= P_FIRSTDATAKEY(opaque)) + break; } - /* done if it's not empty */ - maxoff = PageGetMaxOffsetNumber(page); - offnum = maxoff; - if (!PageIsEmpty(page) && maxoff >= P_FIRSTDATAKEY(opaque)) - break; } } } @@ -895,11 +902,133 @@ _bt_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir) return true; } +/* + * _bt_walk_left() -- step left one page, if possible + * + * The given buffer must be pinned and read-locked. This will be dropped + * before stepping left. On return, we have pin and read lock on the + * returned page, instead. + * + * Returns InvalidBuffer if there is no page to the left (no lock is held + * in that case). + * + * When working on a non-leaf level, it is possible for the returned page + * to be half-dead; the caller should check that condition and step left + * again if it's important. + */ +static Buffer +_bt_walk_left(Relation rel, Buffer buf) +{ + Page page; + BTPageOpaque opaque; + + page = BufferGetPage(buf); + opaque = (BTPageOpaque) PageGetSpecialPointer(page); + + for (;;) + { + BlockNumber obknum; + BlockNumber lblkno; + BlockNumber blkno; + int tries; + + /* if we're at end of tree, release buf and return failure */ + if (P_LEFTMOST(opaque)) + { + _bt_relbuf(rel, buf); + break; + } + /* remember original page we are stepping left from */ + obknum = BufferGetBlockNumber(buf); + /* step left */ + blkno = lblkno = opaque->btpo_prev; + _bt_relbuf(rel, buf); + buf = _bt_getbuf(rel, blkno, BT_READ); + page = BufferGetPage(buf); + opaque = (BTPageOpaque) PageGetSpecialPointer(page); + /* + * If this isn't the page we want, walk right till we find + * what we want --- but go no more than four hops (an + * arbitrary limit). If we don't find the correct page by then, + * the most likely bet is that the original page got deleted + * and isn't in the sibling chain at all anymore, not that its + * left sibling got split more than four times. + * + * Note that it is correct to test P_ISDELETED not P_IGNORE + * here, because half-dead pages are still in the sibling + * chain. Caller must reject half-dead pages if wanted. + */ + tries = 0; + for (;;) + { + if (!P_ISDELETED(opaque) && opaque->btpo_next == obknum) + { + /* Found desired page, return it */ + return buf; + } + if (P_RIGHTMOST(opaque) || ++tries > 4) + break; + blkno = opaque->btpo_next; + _bt_relbuf(rel, buf); + buf = _bt_getbuf(rel, blkno, BT_READ); + page = BufferGetPage(buf); + opaque = (BTPageOpaque) PageGetSpecialPointer(page); + } + + /* Return to the original page to see what's up */ + _bt_relbuf(rel, buf); + buf = _bt_getbuf(rel, obknum, BT_READ); + page = BufferGetPage(buf); + opaque = (BTPageOpaque) PageGetSpecialPointer(page); + if (P_ISDELETED(opaque)) + { + /* + * It was deleted. Move right to first nondeleted page + * (there must be one); that is the page that has acquired the + * deleted one's keyspace, so stepping left from it will take + * us where we want to be. + */ + for (;;) + { + if (P_RIGHTMOST(opaque)) + elog(ERROR, "_bt_walk_left: fell off the end of %s", + RelationGetRelationName(rel)); + blkno = opaque->btpo_next; + _bt_relbuf(rel, buf); + buf = _bt_getbuf(rel, blkno, BT_READ); + page = BufferGetPage(buf); + opaque = (BTPageOpaque) PageGetSpecialPointer(page); + if (!P_ISDELETED(opaque)) + break; + } + /* + * Now return to top of loop, resetting obknum to + * point to this nondeleted page, and try again. + */ + } + else + { + /* + * It wasn't deleted; the explanation had better be + * that the page to the left got split or deleted. + * Without this check, we'd go into an infinite loop + * if there's anything wrong. + */ + if (opaque->btpo_prev == lblkno) + elog(ERROR, "_bt_walk_left: can't find left sibling in %s", + RelationGetRelationName(rel)); + /* Okay to try again with new lblkno value */ + } + } + + return InvalidBuffer; +} + /* * _bt_get_endpoint() -- Find the first or last page on a given tree level * * If the index is empty, we will return InvalidBuffer; any other failure - * condition causes elog(). + * condition causes elog(). We will not return a dead page. * * The returned buffer is pinned and read-locked. */ @@ -941,12 +1070,13 @@ _bt_get_endpoint(Relation rel, uint32 level, bool rightmost) * step right if needed to get to it (this could happen if the * page split since we obtained a pointer to it). */ - while (P_ISDELETED(opaque) || + while (P_IGNORE(opaque) || (rightmost && !P_RIGHTMOST(opaque))) { blkno = opaque->btpo_next; if (blkno == P_NONE) - elog(ERROR, "_bt_get_endpoint: ran off end of btree"); + elog(ERROR, "_bt_get_endpoint: fell off the end of %s", + RelationGetRelationName(rel)); _bt_relbuf(rel, buf); buf = _bt_getbuf(rel, blkno, BT_READ); page = BufferGetPage(buf); @@ -959,7 +1089,7 @@ _bt_get_endpoint(Relation rel, uint32 level, bool rightmost) if (opaque->btpo.level < level) elog(ERROR, "_bt_get_endpoint: btree level %u not found", level); - /* Step to leftmost or rightmost child page */ + /* Descend to leftmost or rightmost child page */ if (rightmost) offnum = PageGetMaxOffsetNumber(page); else diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c index f9d227ecd0..62f020086d 100644 --- a/src/backend/access/nbtree/nbtsort.c +++ b/src/backend/access/nbtree/nbtsort.c @@ -1,4 +1,5 @@ /*------------------------------------------------------------------------- + * * nbtsort.c * Build a btree from sorted input by loading leaf pages sequentially. * @@ -35,7 +36,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtsort.c,v 1.71 2003/02/21 00:06:21 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtsort.c,v 1.72 2003/02/22 00:45:04 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -164,8 +165,8 @@ _bt_leafbuild(BTSpool *btspool, BTSpool *btspool2) ResetUsage(); } #endif /* BTREE_BUILD_STATS */ - tuplesort_performsort(btspool->sortstate); + tuplesort_performsort(btspool->sortstate); if (btspool2) tuplesort_performsort(btspool2->sortstate); _bt_load(btspool->index, btspool, btspool2); @@ -331,7 +332,7 @@ _bt_sortaddtup(Page page, if (PageAddItem(page, (Item) btitem, itemsize, itup_off, LP_USED) == InvalidOffsetNumber) - elog(FATAL, "btree: failed to add item to the page in _bt_sort"); + elog(ERROR, "btree: failed to add item to the page in _bt_sort"); } /*---------- @@ -470,8 +471,7 @@ _bt_buildadd(Relation index, BTPageState *state, BTItem bti) /* * Write out the old page. We never want to see it again, so we - * can give up our lock (if we had one; most likely BuildingBtree - * is set, so we aren't locking). + * can give up our lock. */ _bt_blwritepage(index, obuf); @@ -534,7 +534,7 @@ _bt_uppershutdown(Relation index, BTPageState *state) if (s->btps_next == (BTPageState *) NULL) { opaque->btpo_flags |= BTP_ROOT; - _bt_metaproot(index, blkno, s->btps_level + 1); + _bt_metaproot(index, blkno, s->btps_level); } else { diff --git a/src/backend/access/rtree/rtree.c b/src/backend/access/rtree/rtree.c index 650820085c..b6b2a19e10 100644 --- a/src/backend/access/rtree/rtree.c +++ b/src/backend/access/rtree/rtree.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtree.c,v 1.75 2002/09/04 20:31:13 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtree.c,v 1.76 2003/02/22 00:45:04 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1250,8 +1250,9 @@ rtbulkdelete(PG_FUNCTION_ARGS) result = (IndexBulkDeleteResult *) palloc(sizeof(IndexBulkDeleteResult)); result->num_pages = num_pages; - result->tuples_removed = tuples_removed; result->num_index_tuples = num_index_tuples; + result->tuples_removed = tuples_removed; + result->pages_free = 0; PG_RETURN_POINTER(result); } diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index c1b17bba86..ac45a5df69 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -13,7 +13,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.247 2003/02/09 06:56:27 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.248 2003/02/22 00:45:05 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -2603,17 +2603,25 @@ static void scan_index(Relation indrel, double num_tuples) { IndexBulkDeleteResult *stats; + IndexVacuumCleanupInfo vcinfo; VacRUsage ru0; vac_init_rusage(&ru0); /* - * Even though we're not planning to delete anything, use the - * ambulkdelete call, so that the scan happens within the index AM for - * more speed. + * Even though we're not planning to delete anything, we use the + * ambulkdelete call, because (a) the scan happens within the index AM + * for more speed, and (b) it may want to pass private statistics to + * the amvacuumcleanup call. */ stats = index_bulk_delete(indrel, dummy_tid_reaped, NULL); + /* Do post-VACUUM cleanup, even though we deleted nothing */ + vcinfo.vacuum_full = true; + vcinfo.message_level = elevel; + + stats = index_vacuum_cleanup(indrel, &vcinfo, stats); + if (!stats) return; @@ -2622,9 +2630,9 @@ scan_index(Relation indrel, double num_tuples) stats->num_pages, stats->num_index_tuples, false); - elog(elevel, "Index %s: Pages %u; Tuples %.0f.\n\t%s", + elog(elevel, "Index %s: Pages %u, %u free; Tuples %.0f.\n\t%s", RelationGetRelationName(indrel), - stats->num_pages, stats->num_index_tuples, + stats->num_pages, stats->pages_free, stats->num_index_tuples, vac_show_rusage(&ru0)); /* @@ -2661,6 +2669,7 @@ vacuum_index(VacPageList vacpagelist, Relation indrel, double num_tuples, int keep_tuples) { IndexBulkDeleteResult *stats; + IndexVacuumCleanupInfo vcinfo; VacRUsage ru0; vac_init_rusage(&ru0); @@ -2668,6 +2677,12 @@ vacuum_index(VacPageList vacpagelist, Relation indrel, /* Do bulk deletion */ stats = index_bulk_delete(indrel, tid_reaped, (void *) vacpagelist); + /* Do post-VACUUM cleanup */ + vcinfo.vacuum_full = true; + vcinfo.message_level = elevel; + + stats = index_vacuum_cleanup(indrel, &vcinfo, stats); + if (!stats) return; @@ -2676,8 +2691,9 @@ vacuum_index(VacPageList vacpagelist, Relation indrel, stats->num_pages, stats->num_index_tuples, false); - elog(elevel, "Index %s: Pages %u; Tuples %.0f: Deleted %.0f.\n\t%s", - RelationGetRelationName(indrel), stats->num_pages, + elog(elevel, "Index %s: Pages %u, %u free; Tuples %.0f: Deleted %.0f.\n\t%s", + RelationGetRelationName(indrel), + stats->num_pages, stats->pages_free, stats->num_index_tuples - keep_tuples, stats->tuples_removed, vac_show_rusage(&ru0)); diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c index 2974eb1bc3..9790ef30bc 100644 --- a/src/backend/commands/vacuumlazy.c +++ b/src/backend/commands/vacuumlazy.c @@ -31,7 +31,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/commands/vacuumlazy.c,v 1.23 2002/11/13 00:39:46 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/commands/vacuumlazy.c,v 1.24 2003/02/22 00:45:05 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -200,7 +200,6 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, tups_vacuumed, nkeep, nunused; - bool did_vacuum_index = false; int i; VacRUsage ru0; @@ -244,7 +243,6 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, /* Remove index entries */ for (i = 0; i < nindexes; i++) lazy_vacuum_index(Irel[i], vacrelstats); - did_vacuum_index = true; /* Remove tuples from heap */ lazy_vacuum_heap(onerel, vacrelstats); /* Forget the now-vacuumed tuples, and press on */ @@ -415,7 +413,7 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, vacrelstats->rel_tuples = num_tuples; /* If any tuples need to be deleted, perform final vacuum cycle */ - /* XXX put a threshold on min nuber of tuples here? */ + /* XXX put a threshold on min number of tuples here? */ if (vacrelstats->num_dead_tuples > 0) { /* Remove index entries */ @@ -424,9 +422,9 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, /* Remove tuples from heap */ lazy_vacuum_heap(onerel, vacrelstats); } - else if (!did_vacuum_index) + else { - /* Scan indexes just to update pg_class statistics about them */ + /* Must do post-vacuum cleanup and statistics update anyway */ for (i = 0; i < nindexes; i++) lazy_scan_index(Irel[i], vacrelstats); } @@ -551,42 +549,36 @@ static void lazy_scan_index(Relation indrel, LVRelStats *vacrelstats) { IndexBulkDeleteResult *stats; + IndexVacuumCleanupInfo vcinfo; VacRUsage ru0; vac_init_rusage(&ru0); /* - * If the index is not partial, skip the scan, and just assume it has - * the same number of tuples as the heap. - */ - if (!vac_is_partial_index(indrel)) - { - vac_update_relstats(RelationGetRelid(indrel), - RelationGetNumberOfBlocks(indrel), - vacrelstats->rel_tuples, - false); - return; - } - - /* - * If index is unsafe for concurrent access, must lock it; but a - * shared lock should be sufficient. + * If index is unsafe for concurrent access, must lock it. */ if (!indrel->rd_am->amconcurrent) - LockRelation(indrel, AccessShareLock); + LockRelation(indrel, AccessExclusiveLock); /* - * Even though we're not planning to delete anything, use the - * ambulkdelete call, so that the scan happens within the index AM for - * more speed. + * Even though we're not planning to delete anything, we use the + * ambulkdelete call, because (a) the scan happens within the index AM + * for more speed, and (b) it may want to pass private statistics to + * the amvacuumcleanup call. */ stats = index_bulk_delete(indrel, dummy_tid_reaped, NULL); + /* Do post-VACUUM cleanup, even though we deleted nothing */ + vcinfo.vacuum_full = false; + vcinfo.message_level = elevel; + + stats = index_vacuum_cleanup(indrel, &vcinfo, stats); + /* * Release lock acquired above. */ if (!indrel->rd_am->amconcurrent) - UnlockRelation(indrel, AccessShareLock); + UnlockRelation(indrel, AccessExclusiveLock); if (!stats) return; @@ -596,9 +588,9 @@ lazy_scan_index(Relation indrel, LVRelStats *vacrelstats) stats->num_pages, stats->num_index_tuples, false); - elog(elevel, "Index %s: Pages %u; Tuples %.0f.\n\t%s", + elog(elevel, "Index %s: Pages %u, %u free; Tuples %.0f.\n\t%s", RelationGetRelationName(indrel), - stats->num_pages, stats->num_index_tuples, + stats->num_pages, stats->pages_free, stats->num_index_tuples, vac_show_rusage(&ru0)); pfree(stats); @@ -617,6 +609,7 @@ static void lazy_vacuum_index(Relation indrel, LVRelStats *vacrelstats) { IndexBulkDeleteResult *stats; + IndexVacuumCleanupInfo vcinfo; VacRUsage ru0; vac_init_rusage(&ru0); @@ -630,26 +623,33 @@ lazy_vacuum_index(Relation indrel, LVRelStats *vacrelstats) /* Do bulk deletion */ stats = index_bulk_delete(indrel, lazy_tid_reaped, (void *) vacrelstats); + /* Do post-VACUUM cleanup */ + vcinfo.vacuum_full = false; + vcinfo.message_level = elevel; + + stats = index_vacuum_cleanup(indrel, &vcinfo, stats); + /* * Release lock acquired above. */ if (!indrel->rd_am->amconcurrent) UnlockRelation(indrel, AccessExclusiveLock); + if (!stats) + return; + /* now update statistics in pg_class */ - if (stats) - { - vac_update_relstats(RelationGetRelid(indrel), - stats->num_pages, stats->num_index_tuples, - false); + vac_update_relstats(RelationGetRelid(indrel), + stats->num_pages, stats->num_index_tuples, + false); - elog(elevel, "Index %s: Pages %u; Tuples %.0f: Deleted %.0f.\n\t%s", - RelationGetRelationName(indrel), stats->num_pages, - stats->num_index_tuples, stats->tuples_removed, - vac_show_rusage(&ru0)); + elog(elevel, "Index %s: Pages %u, %u free; Tuples %.0f: Deleted %.0f.\n\t%s", + RelationGetRelationName(indrel), + stats->num_pages, stats->pages_free, + stats->num_index_tuples, stats->tuples_removed, + vac_show_rusage(&ru0)); - pfree(stats); - } + pfree(stats); } /* diff --git a/src/include/access/genam.h b/src/include/access/genam.h index 6266da47c8..59ecf1d8f4 100644 --- a/src/include/access/genam.h +++ b/src/include/access/genam.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: genam.h,v 1.37 2002/09/04 20:31:36 momjian Exp $ + * $Id: genam.h,v 1.38 2003/02/22 00:45:05 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -20,17 +20,32 @@ #include "nodes/primnodes.h" -/* Struct for statistics returned by bulk-delete operation */ +/* + * Struct for statistics returned by bulk-delete operation + * + * This is now also passed to the index AM's vacuum-cleanup operation, + * if it has one, which can modify the results as needed. Note that + * an index AM could choose to have bulk-delete return a larger struct + * of which this is just the first field; this provides a way for bulk-delete + * to communicate additional private data to vacuum-cleanup. + */ typedef struct IndexBulkDeleteResult { BlockNumber num_pages; /* pages remaining in index */ + double num_index_tuples; /* tuples remaining */ double tuples_removed; /* # removed by bulk-delete operation */ - double num_index_tuples; /* # remaining */ + BlockNumber pages_free; /* # unused pages in index */ } IndexBulkDeleteResult; /* Typedef for callback function to determine if a tuple is bulk-deletable */ typedef bool (*IndexBulkDeleteCallback) (ItemPointer itemptr, void *state); +/* Struct for additional arguments passed to vacuum-cleanup operation */ +typedef struct IndexVacuumCleanupInfo +{ + bool vacuum_full; /* VACUUM FULL (we have exclusive lock) */ + int message_level; /* elog level for progress messages */ +} IndexVacuumCleanupInfo; /* Struct for heap-or-index scans of system tables */ typedef struct SysScanDescData @@ -72,6 +87,9 @@ extern bool index_getnext_indexitem(IndexScanDesc scan, extern IndexBulkDeleteResult *index_bulk_delete(Relation indexRelation, IndexBulkDeleteCallback callback, void *callback_state); +extern IndexBulkDeleteResult *index_vacuum_cleanup(Relation indexRelation, + IndexVacuumCleanupInfo *info, + IndexBulkDeleteResult *stats); extern RegProcedure index_cost_estimator(Relation indexRelation); extern RegProcedure index_getprocid(Relation irel, AttrNumber attnum, uint16 procnum); diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h index f4dce1842f..4bb5db0513 100644 --- a/src/include/access/nbtree.h +++ b/src/include/access/nbtree.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: nbtree.h,v 1.64 2003/02/21 00:06:22 tgl Exp $ + * $Id: nbtree.h,v 1.65 2003/02/22 00:45:05 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -54,6 +54,7 @@ typedef BTPageOpaqueData *BTPageOpaque; #define BTP_ROOT (1 << 1) /* root page (has no parent) */ #define BTP_DELETED (1 << 2) /* page has been deleted from tree */ #define BTP_META (1 << 3) /* meta-page */ +#define BTP_HALF_DEAD (1 << 4) /* empty, but still in tree */ /* @@ -124,12 +125,13 @@ typedef BTItemData *BTItem; #define SizeOfBTItem sizeof(BTItemData) /* Test whether items are the "same" per the above notes */ -#define BTItemSame(i1, i2) ( (i1)->bti_itup.t_tid.ip_blkid.bi_hi == \ - (i2)->bti_itup.t_tid.ip_blkid.bi_hi && \ - (i1)->bti_itup.t_tid.ip_blkid.bi_lo == \ - (i2)->bti_itup.t_tid.ip_blkid.bi_lo && \ - (i1)->bti_itup.t_tid.ip_posid == \ - (i2)->bti_itup.t_tid.ip_posid ) +#define BTTidSame(i1, i2) \ + ( (i1).ip_blkid.bi_hi == (i2).ip_blkid.bi_hi && \ + (i1).ip_blkid.bi_lo == (i2).ip_blkid.bi_lo && \ + (i1).ip_posid == (i2).ip_posid ) +#define BTItemSame(i1, i2) \ + BTTidSame((i1)->bti_itup.t_tid, (i2)->bti_itup.t_tid) + /* * In general, the btree code tries to localize its knowledge about @@ -150,6 +152,7 @@ typedef BTItemData *BTItem; #define P_ISLEAF(opaque) ((opaque)->btpo_flags & BTP_LEAF) #define P_ISROOT(opaque) ((opaque)->btpo_flags & BTP_ROOT) #define P_ISDELETED(opaque) ((opaque)->btpo_flags & BTP_DELETED) +#define P_IGNORE(opaque) ((opaque)->btpo_flags & (BTP_DELETED|BTP_HALF_DEAD)) /* * Lehman and Yao's algorithm requires a ``high key'' on every non-rightmost @@ -412,8 +415,6 @@ typedef BTScanOpaqueData *BTScanOpaque; /* * prototypes for functions in nbtree.c (external entry points for btree) */ -extern bool BuildingBtree; /* in nbtree.c */ - extern void AtEOXact_nbtree(void); extern Datum btbuild(PG_FUNCTION_ARGS); @@ -426,6 +427,7 @@ extern Datum btendscan(PG_FUNCTION_ARGS); extern Datum btmarkpos(PG_FUNCTION_ARGS); extern Datum btrestrpos(PG_FUNCTION_ARGS); extern Datum btbulkdelete(PG_FUNCTION_ARGS); +extern Datum btvacuumcleanup(PG_FUNCTION_ARGS); /* * prototypes for functions in nbtinsert.c diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index a1be9bacf3..cb2e6e523d 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: xlog.h,v 1.41 2003/02/21 00:06:22 tgl Exp $ + * $Id: xlog.h,v 1.42 2003/02/22 00:45:05 tgl Exp $ */ #ifndef XLOG_H #define XLOG_H @@ -56,17 +56,18 @@ typedef struct XLogRecord #define XLR_INFO_MASK 0x0F /* - * We support backup of up to 2 disk blocks per XLOG record (could support - * more if we cared to dedicate more xl_info bits for this purpose; currently - * do not need more than 2 anyway). If we backed up any disk blocks then we - * use flag bits in xl_info to signal it. + * If we backed up any disk blocks with the XLOG record, we use flag bits in + * xl_info to signal it. We support backup of up to 3 disk blocks per XLOG + * record. (Could support 4 if we cared to dedicate all the xl_info bits for + * this purpose; currently bit 0 of xl_info is unused and available.) */ -#define XLR_BKP_BLOCK_MASK 0x0C /* all info bits used for bkp +#define XLR_BKP_BLOCK_MASK 0x0E /* all info bits used for bkp * blocks */ -#define XLR_MAX_BKP_BLOCKS 2 +#define XLR_MAX_BKP_BLOCKS 3 #define XLR_SET_BKP_BLOCK(iblk) (0x08 >> (iblk)) #define XLR_BKP_BLOCK_1 XLR_SET_BKP_BLOCK(0) /* 0x08 */ #define XLR_BKP_BLOCK_2 XLR_SET_BKP_BLOCK(1) /* 0x04 */ +#define XLR_BKP_BLOCK_3 XLR_SET_BKP_BLOCK(2) /* 0x02 */ /* * Sometimes we log records which are out of transaction control. diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 240889577a..fc24db5d2e 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -37,7 +37,7 @@ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: catversion.h,v 1.178 2003/02/21 00:06:22 tgl Exp $ + * $Id: catversion.h,v 1.179 2003/02/22 00:45:05 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 200302171 +#define CATALOG_VERSION_NO 200302211 #endif diff --git a/src/include/catalog/pg_am.h b/src/include/catalog/pg_am.h index 66b2f2621f..3ee7121812 100644 --- a/src/include/catalog/pg_am.h +++ b/src/include/catalog/pg_am.h @@ -8,7 +8,7 @@ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: pg_am.h,v 1.23 2002/07/29 22:14:11 tgl Exp $ + * $Id: pg_am.h,v 1.24 2003/02/22 00:45:05 tgl Exp $ * * NOTES * the genbki.sh script reads this file and generates .bki @@ -58,6 +58,7 @@ CATALOG(pg_am) regproc amrestrpos; /* "restore marked scan position" function */ regproc ambuild; /* "build new index" function */ regproc ambulkdelete; /* bulk-delete function */ + regproc amvacuumcleanup; /* post-VACUUM cleanup function */ regproc amcostestimate; /* estimate cost of an indexscan */ } FormData_pg_am; @@ -72,7 +73,7 @@ typedef FormData_pg_am *Form_pg_am; * compiler constants for pg_am * ---------------- */ -#define Natts_pg_am 19 +#define Natts_pg_am 20 #define Anum_pg_am_amname 1 #define Anum_pg_am_amowner 2 #define Anum_pg_am_amstrategies 3 @@ -91,21 +92,22 @@ typedef FormData_pg_am *Form_pg_am; #define Anum_pg_am_amrestrpos 16 #define Anum_pg_am_ambuild 17 #define Anum_pg_am_ambulkdelete 18 -#define Anum_pg_am_amcostestimate 19 +#define Anum_pg_am_amvacuumcleanup 19 +#define Anum_pg_am_amcostestimate 20 /* ---------------- * initial contents of pg_am * ---------------- */ -DATA(insert OID = 402 ( rtree PGUID 8 3 0 f f f f rtgettuple rtinsert rtbeginscan rtrescan rtendscan rtmarkpos rtrestrpos rtbuild rtbulkdelete rtcostestimate )); +DATA(insert OID = 402 ( rtree PGUID 8 3 0 f f f f rtgettuple rtinsert rtbeginscan rtrescan rtendscan rtmarkpos rtrestrpos rtbuild rtbulkdelete - rtcostestimate )); DESCR("r-tree index access method"); -DATA(insert OID = 403 ( btree PGUID 5 1 1 t t t t btgettuple btinsert btbeginscan btrescan btendscan btmarkpos btrestrpos btbuild btbulkdelete btcostestimate )); +DATA(insert OID = 403 ( btree PGUID 5 1 1 t t t t btgettuple btinsert btbeginscan btrescan btendscan btmarkpos btrestrpos btbuild btbulkdelete btvacuumcleanup btcostestimate )); DESCR("b-tree index access method"); #define BTREE_AM_OID 403 -DATA(insert OID = 405 ( hash PGUID 1 1 0 f f f t hashgettuple hashinsert hashbeginscan hashrescan hashendscan hashmarkpos hashrestrpos hashbuild hashbulkdelete hashcostestimate )); +DATA(insert OID = 405 ( hash PGUID 1 1 0 f f f t hashgettuple hashinsert hashbeginscan hashrescan hashendscan hashmarkpos hashrestrpos hashbuild hashbulkdelete - hashcostestimate )); DESCR("hash index access method"); -DATA(insert OID = 783 ( gist PGUID 100 7 0 f t f f gistgettuple gistinsert gistbeginscan gistrescan gistendscan gistmarkpos gistrestrpos gistbuild gistbulkdelete gistcostestimate )); +DATA(insert OID = 783 ( gist PGUID 100 7 0 f t f f gistgettuple gistinsert gistbeginscan gistrescan gistendscan gistmarkpos gistrestrpos gistbuild gistbulkdelete - gistcostestimate )); DESCR("GiST index access method"); #define GIST_AM_OID 783 diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index f32715284b..3aab3ef8a7 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: pg_proc.h,v 1.283 2003/02/13 05:24:02 momjian Exp $ + * $Id: pg_proc.h,v 1.284 2003/02/22 00:45:05 tgl Exp $ * * NOTES * The script catalog/genbki.sh reads this file and generates .bki @@ -710,6 +710,8 @@ DATA(insert OID = 338 ( btbuild PGNSP PGUID 12 f f t f v 3 2278 "2281 2281 DESCR("btree(internal)"); DATA(insert OID = 332 ( btbulkdelete PGNSP PGUID 12 f f t f v 3 2281 "2281 2281 2281" btbulkdelete - _null_ )); DESCR("btree(internal)"); +DATA(insert OID = 972 ( btvacuumcleanup PGNSP PGUID 12 f f t f v 3 2281 "2281 2281 2281" btvacuumcleanup - _null_ )); +DESCR("btree(internal)"); DATA(insert OID = 1268 ( btcostestimate PGNSP PGUID 12 f f t f v 8 2278 "2281 2281 2281 2281 2281 2281 2281 2281" btcostestimate - _null_ )); DESCR("btree(internal)");