From 3f4d48802271126b1343289a9d2267ff1ed3788a Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 24 May 2002 18:57:57 +0000 Subject: [PATCH] Mark index entries "killed" when they are no longer visible to any transaction, so as to avoid returning them out of the index AM. Saves repeated heap_fetch operations on frequently-updated rows. Also detect queries on unique keys (equality to all columns of a unique index), and don't bother continuing scan once we have found first match. Killing is implemented in the btree and hash AMs, but not yet in rtree or gist, because there isn't an equally convenient place to do it in those AMs (the outer amgetnext routine can't do it without re-pinning the index page). Did some small cleanup on APIs of HeapTupleSatisfies, heap_fetch, and index_insert to make this a little easier. --- src/backend/access/gist/gist.c | 6 +- src/backend/access/hash/hash.c | 48 ++++++++++- src/backend/access/hash/hashscan.c | 80 ++++++++----------- src/backend/access/hash/hashsearch.c | 29 ++----- src/backend/access/heap/heapam.c | 99 ++++++++++++++--------- src/backend/access/heap/tuptoaster.c | 4 +- src/backend/access/index/genam.c | 7 +- src/backend/access/index/indexam.c | 81 ++++++++++++++++--- src/backend/access/nbtree/nbtinsert.c | 110 +++++++++++++++----------- src/backend/access/nbtree/nbtree.c | 47 ++++++++++- src/backend/access/nbtree/nbtsearch.c | 11 +-- src/backend/access/nbtree/nbtutils.c | 67 +++++++++++++--- src/backend/access/rtree/rtree.c | 6 +- src/backend/catalog/indexing.c | 5 +- src/backend/commands/analyze.c | 6 +- src/backend/commands/trigger.c | 8 +- src/backend/commands/vacuum.c | 6 +- src/backend/executor/execMain.c | 7 +- src/backend/executor/execUtils.c | 12 ++- src/backend/executor/nodeTidscan.c | 9 +-- src/backend/storage/ipc/sinval.c | 43 ++++++++-- src/backend/utils/time/tqual.c | 5 +- src/include/access/genam.h | 5 +- src/include/access/heapam.h | 6 +- src/include/access/nbtree.h | 4 +- src/include/access/relscan.h | 10 ++- src/include/access/valid.h | 32 +++----- src/include/catalog/pg_proc.h | 10 +-- src/include/executor/executor.h | 4 +- src/include/utils/tqual.h | 4 +- 30 files changed, 498 insertions(+), 273 deletions(-) diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c index 1da836a364..16105696d4 100644 --- a/src/backend/access/gist/gist.c +++ b/src/backend/access/gist/gist.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/gist/gist.c,v 1.92 2002/05/20 23:51:40 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/gist/gist.c,v 1.93 2002/05/24 18:57:55 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -294,9 +294,9 @@ gistinsert(PG_FUNCTION_ARGS) Datum *datum = (Datum *) PG_GETARG_POINTER(1); char *nulls = (char *) PG_GETARG_POINTER(2); ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3); - #ifdef NOT_USED Relation heapRel = (Relation) PG_GETARG_POINTER(4); + bool checkUnique = PG_GETARG_BOOL(5); #endif InsertIndexResult res; IndexTuple itup; @@ -1607,6 +1607,8 @@ gistbulkdelete(PG_FUNCTION_ARGS) /* walk through the entire index */ iscan = index_beginscan(NULL, rel, SnapshotAny, 0, (ScanKey) NULL); + /* including killed tuples */ + iscan->ignore_killed_tuples = false; while (index_getnext_indexitem(iscan, ForwardScanDirection)) { diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c index 6f06ffbfa0..8db98e8a36 100644 --- a/src/backend/access/hash/hash.c +++ b/src/backend/access/hash/hash.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.57 2002/05/20 23:51:41 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.58 2002/05/24 18:57:55 tgl Exp $ * * NOTES * This file contains only the public interface routines. @@ -166,8 +166,8 @@ hashinsert(PG_FUNCTION_ARGS) ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3); #ifdef NOT_USED Relation heapRel = (Relation) PG_GETARG_POINTER(4); + bool checkUnique = PG_GETARG_BOOL(5); #endif - InsertIndexResult res; HashItem hitem; IndexTuple itup; @@ -210,6 +210,9 @@ hashgettuple(PG_FUNCTION_ARGS) { IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); ScanDirection dir = (ScanDirection) PG_GETARG_INT32(1); + HashScanOpaque so = (HashScanOpaque) scan->opaque; + Page page; + OffsetNumber offnum; bool res; /* @@ -217,12 +220,49 @@ hashgettuple(PG_FUNCTION_ARGS) * the appropriate direction. If we haven't done so yet, we call a * routine to get the first item in the scan. */ - if (ItemPointerIsValid(&(scan->currentItemData))) + { + /* + * Check to see if we should kill the previously-fetched tuple. + */ + if (scan->kill_prior_tuple) + { + /* + * Yes, so mark it by setting the LP_DELETE bit in the item flags. + */ + offnum = ItemPointerGetOffsetNumber(&(scan->currentItemData)); + page = BufferGetPage(so->hashso_curbuf); + PageGetItemId(page, offnum)->lp_flags |= LP_DELETE; + /* + * Since this can be redone later if needed, it's treated the + * same as a commit-hint-bit status update for heap tuples: + * we mark the buffer dirty but don't make a WAL log entry. + */ + SetBufferCommitInfoNeedsSave(so->hashso_curbuf); + } + /* + * Now continue the scan. + */ res = _hash_next(scan, dir); + } else res = _hash_first(scan, dir); + /* + * Skip killed tuples if asked to. + */ + if (scan->ignore_killed_tuples) + { + while (res) + { + offnum = ItemPointerGetOffsetNumber(&(scan->currentItemData)); + page = BufferGetPage(so->hashso_curbuf); + if (!ItemIdDeleted(PageGetItemId(page, offnum))) + break; + res = _hash_next(scan, dir); + } + } + PG_RETURN_BOOL(res); } @@ -418,6 +458,8 @@ hashbulkdelete(PG_FUNCTION_ARGS) /* walk through the entire index */ iscan = index_beginscan(NULL, rel, SnapshotAny, 0, (ScanKey) NULL); + /* including killed tuples */ + iscan->ignore_killed_tuples = false; while (index_getnext_indexitem(iscan, ForwardScanDirection)) { diff --git a/src/backend/access/hash/hashscan.c b/src/backend/access/hash/hashscan.c index 87dfcd6093..724a785ac7 100644 --- a/src/backend/access/hash/hashscan.c +++ b/src/backend/access/hash/hashscan.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/hash/hashscan.c,v 1.26 2002/05/20 23:51:41 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/hash/hashscan.c,v 1.27 2002/05/24 18:57:55 tgl Exp $ * * NOTES * Because we can be doing an index scan on a relation while we @@ -32,8 +32,6 @@ #include "access/hash.h" -static void _hash_scandel(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno); -static bool _hash_scantouched(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno); typedef struct HashScanListData { @@ -46,6 +44,10 @@ typedef HashScanListData *HashScanList; static HashScanList HashScans = (HashScanList) NULL; +static void _hash_scandel(IndexScanDesc scan, + BlockNumber blkno, OffsetNumber offno); + + /* * AtEOXact_hash() --- clean up hash subsystem at xact abort or commit. * @@ -129,63 +131,51 @@ static void _hash_scandel(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno) { ItemPointer current; + ItemPointer mark; Buffer buf; Buffer metabuf; HashScanOpaque so; - if (!_hash_scantouched(scan, blkno, offno)) - return; - - metabuf = _hash_getbuf(scan->indexRelation, HASH_METAPAGE, HASH_READ); - so = (HashScanOpaque) scan->opaque; - buf = so->hashso_curbuf; - current = &(scan->currentItemData); + mark = &(scan->currentMarkData); + if (ItemPointerIsValid(current) && ItemPointerGetBlockNumber(current) == blkno && ItemPointerGetOffsetNumber(current) >= offno) { + metabuf = _hash_getbuf(scan->indexRelation, HASH_METAPAGE, HASH_READ); + buf = so->hashso_curbuf; _hash_step(scan, &buf, BackwardScanDirection, metabuf); - so->hashso_curbuf = buf; } - current = &(scan->currentMarkData); - if (ItemPointerIsValid(current) - && ItemPointerGetBlockNumber(current) == blkno - && ItemPointerGetOffsetNumber(current) >= offno) + if (ItemPointerIsValid(mark) + && ItemPointerGetBlockNumber(mark) == blkno + && ItemPointerGetOffsetNumber(mark) >= offno) { - ItemPointerData tmp; + /* + * The idea here is to exchange the current and mark positions, + * then step backwards (affecting current), then exchange again. + */ + ItemPointerData tmpitem; + Buffer tmpbuf; - tmp = *current; - *current = scan->currentItemData; - scan->currentItemData = tmp; + tmpitem = *mark; + *mark = *current; + *current = tmpitem; + tmpbuf = so->hashso_mrkbuf; + so->hashso_mrkbuf = so->hashso_curbuf; + so->hashso_curbuf = tmpbuf; + + metabuf = _hash_getbuf(scan->indexRelation, HASH_METAPAGE, HASH_READ); + buf = so->hashso_curbuf; _hash_step(scan, &buf, BackwardScanDirection, metabuf); - so->hashso_mrkbuf = buf; - tmp = *current; - *current = scan->currentItemData; - scan->currentItemData = tmp; + + tmpitem = *mark; + *mark = *current; + *current = tmpitem; + tmpbuf = so->hashso_mrkbuf; + so->hashso_mrkbuf = so->hashso_curbuf; + so->hashso_curbuf = tmpbuf; } } - -static bool -_hash_scantouched(IndexScanDesc scan, - BlockNumber blkno, - OffsetNumber offno) -{ - ItemPointer current; - - current = &(scan->currentItemData); - if (ItemPointerIsValid(current) - && ItemPointerGetBlockNumber(current) == blkno - && ItemPointerGetOffsetNumber(current) >= offno) - return true; - - current = &(scan->currentMarkData); - if (ItemPointerIsValid(current) - && ItemPointerGetBlockNumber(current) == blkno - && ItemPointerGetOffsetNumber(current) >= offno) - return true; - - return false; -} diff --git a/src/backend/access/hash/hashsearch.c b/src/backend/access/hash/hashsearch.c index 968efa363a..db10ff055c 100644 --- a/src/backend/access/hash/hashsearch.c +++ b/src/backend/access/hash/hashsearch.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/hash/hashsearch.c,v 1.28 2002/05/20 23:51:41 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/hash/hashsearch.c,v 1.29 2002/05/24 18:57:55 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -54,10 +54,10 @@ _hash_search(Relation rel, * _hash_next() -- Get the next item in a scan. * * On entry, we have a valid currentItemData in the scan, and a - * read lock on the page that contains that item. We do not have - * the page pinned. We return the next item in the scan. On - * exit, we have the page containing the next item locked but not - * pinned. + * pin and read lock on the page that contains that item. + * We find the next item in the scan, if any. + * On success exit, we have the page containing the next item + * pinned and locked. */ bool _hash_next(IndexScanDesc scan, ScanDirection dir) @@ -74,25 +74,12 @@ _hash_next(IndexScanDesc scan, ScanDirection dir) rel = scan->indexRelation; so = (HashScanOpaque) scan->opaque; - current = &(scan->currentItemData); - - metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ); - - /* - * XXX 10 may 91: somewhere there's a bug in our management of the - * cached buffer for this scan. wei discovered it. the following is - * a workaround so he can work until i figure out what's going on. - */ - - if (!BufferIsValid(so->hashso_curbuf)) - { - so->hashso_curbuf = _hash_getbuf(rel, - ItemPointerGetBlockNumber(current), - HASH_READ); - } /* we still have the buffer pinned and locked */ buf = so->hashso_curbuf; + Assert(BufferIsValid(buf)); + + metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ); /* * step to next valid tuple. note that _hash_step releases our lock diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index a8d7ca0c29..e763823a16 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/heap/heapam.c,v 1.135 2002/05/21 22:05:53 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/heap/heapam.c,v 1.136 2002/05/24 18:57:55 tgl Exp $ * * * INTERFACE ROUTINES @@ -306,6 +306,8 @@ heapgettup(Relation relation, { if (ItemIdIsUsed(lpp)) { + bool valid; + tuple->t_datamcxt = NULL; tuple->t_data = (HeapTupleHeader) PageGetItem((Page) dp, lpp); tuple->t_len = ItemIdGetLength(lpp); @@ -315,8 +317,8 @@ heapgettup(Relation relation, * if current tuple qualifies, return it. */ HeapTupleSatisfies(tuple, relation, *buffer, (PageHeader) dp, - snapshot, nkeys, key); - if (tuple->t_data != NULL) + snapshot, nkeys, key, valid); + if (valid) { LockBuffer(*buffer, BUFFER_LOCK_UNLOCK); return; @@ -864,32 +866,37 @@ heap_getnext(HeapScanDesc scan, ScanDirection direction) return ((scan->rs_ctup.t_data == NULL) ? NULL : &(scan->rs_ctup)); } -/* ---------------- - * heap_fetch - retrieve tuple with given tid +/* + * heap_fetch - retrieve tuple with given tid * - * On entry, tuple->t_self is the TID to fetch. + * On entry, tuple->t_self is the TID to fetch. We pin the buffer holding + * the tuple, fill in the remaining fields of *tuple, and check the tuple + * against the specified snapshot. * - * If successful (ie, tuple found and passes snapshot time qual), - * then the rest of *tuple is filled in, and *userbuf is set to the - * buffer holding the tuple. A pin is obtained on the buffer; the - * caller must BufferRelease the buffer when done with the tuple. + * If successful (tuple passes snapshot time qual), then *userbuf is set to + * the buffer holding the tuple and TRUE is returned. The caller must + * unpin the buffer when done with the tuple. * - * If not successful, tuple->t_data is set to NULL and *userbuf is set to - * InvalidBuffer. - * ---------------- + * If the tuple fails the time qual check, then FALSE will be returned. + * When the caller specifies keep_buf = true, we retain the pin on the + * buffer and return it in *userbuf (so the caller can still access the + * tuple); when keep_buf = false, the pin is released and *userbuf is set + * to InvalidBuffer. */ -void +bool heap_fetch(Relation relation, Snapshot snapshot, HeapTuple tuple, Buffer *userbuf, + bool keep_buf, PgStat_Info *pgstat_info) { + ItemPointer tid = &(tuple->t_self); ItemId lp; Buffer buffer; PageHeader dp; - ItemPointer tid = &(tuple->t_self); OffsetNumber offnum; + bool valid; /* * increment access statistics @@ -901,14 +908,16 @@ heap_fetch(Relation relation, * get the buffer from the relation descriptor. Note that this does a * buffer pin. */ - buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid)); if (!BufferIsValid(buffer)) - elog(ERROR, "heap_fetch: %s relation: ReadBuffer(%ld) failed", + elog(ERROR, "heap_fetch: ReadBuffer(%s, %lu) failed", RelationGetRelationName(relation), - (long) ItemPointerGetBlockNumber(tid)); + (unsigned long) ItemPointerGetBlockNumber(tid)); + /* + * Need share lock on buffer to examine tuple commit status. + */ LockBuffer(buffer, BUFFER_LOCK_SHARE); /* @@ -921,38 +930,34 @@ heap_fetch(Relation relation, /* * more sanity checks */ - if (!ItemIdIsUsed(lp)) { LockBuffer(buffer, BUFFER_LOCK_UNLOCK); ReleaseBuffer(buffer); - *userbuf = InvalidBuffer; - tuple->t_datamcxt = NULL; - tuple->t_data = NULL; - return; + + elog(ERROR, "heap_fetch: invalid tuple id (%s, %lu, %u)", + RelationGetRelationName(relation), + (unsigned long) ItemPointerGetBlockNumber(tid), + offnum); } + /* + * fill in *tuple fields + */ tuple->t_datamcxt = NULL; tuple->t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp); tuple->t_len = ItemIdGetLength(lp); tuple->t_tableOid = relation->rd_id; /* - * check time qualification of tid + * check time qualification of tuple, then release lock */ - HeapTupleSatisfies(tuple, relation, buffer, dp, - snapshot, 0, (ScanKey) NULL); + snapshot, 0, (ScanKey) NULL, valid); LockBuffer(buffer, BUFFER_LOCK_UNLOCK); - if (tuple->t_data == NULL) - { - /* Tuple failed time check, so we can release now. */ - ReleaseBuffer(buffer); - *userbuf = InvalidBuffer; - } - else + if (valid) { /* * All checks passed, so return the tuple as valid. Caller is now @@ -968,13 +973,28 @@ heap_fetch(Relation relation, pgstat_count_heap_fetch(pgstat_info); else pgstat_count_heap_fetch(&relation->pgstat_info); + + return true; } + + /* Tuple failed time qual, but maybe caller wants to see it anyway. */ + if (keep_buf) + { + *userbuf = buffer; + + return false; + } + + /* Okay to release pin on buffer. */ + ReleaseBuffer(buffer); + + *userbuf = InvalidBuffer; + + return false; } -/* ---------------- +/* * heap_get_latest_tid - get the latest tid of a specified tuple - * - * ---------------- */ ItemPointer heap_get_latest_tid(Relation relation, @@ -989,7 +1009,8 @@ heap_get_latest_tid(Relation relation, HeapTupleHeader t_data; ItemPointerData ctid; bool invalidBlock, - linkend; + linkend, + valid; /* * get the buffer from the relation descriptor Note that this does a @@ -1038,7 +1059,7 @@ heap_get_latest_tid(Relation relation, */ HeapTupleSatisfies(&tp, relation, buffer, dp, - snapshot, 0, (ScanKey) NULL); + snapshot, 0, (ScanKey) NULL, valid); linkend = true; if ((t_data->t_infomask & HEAP_XMIN_COMMITTED) != 0 && @@ -1048,7 +1069,7 @@ heap_get_latest_tid(Relation relation, LockBuffer(buffer, BUFFER_LOCK_UNLOCK); ReleaseBuffer(buffer); - if (tp.t_data == NULL) + if (!valid) { if (linkend) return NULL; diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c index 9ac1c69c8e..ab5e96f8cc 100644 --- a/src/backend/access/heap/tuptoaster.c +++ b/src/backend/access/heap/tuptoaster.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/heap/tuptoaster.c,v 1.30 2002/05/21 22:05:53 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/heap/tuptoaster.c,v 1.31 2002/05/24 18:57:55 tgl Exp $ * * * INTERFACE ROUTINES @@ -923,7 +923,7 @@ toast_save_datum(Relation rel, Datum value) */ idxres = index_insert(toastidx, t_values, t_nulls, &(toasttup->t_self), - toastrel); + toastrel, toastidx->rd_uniqueindex); if (idxres == NULL) elog(ERROR, "Failed to insert index entry for TOAST tuple"); diff --git a/src/backend/access/index/genam.c b/src/backend/access/index/genam.c index cc8c08c177..16d0beaffe 100644 --- a/src/backend/access/index/genam.c +++ b/src/backend/access/index/genam.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/index/genam.c,v 1.33 2002/05/20 23:51:41 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/index/genam.c,v 1.34 2002/05/24 18:57:55 tgl Exp $ * * NOTES * many of the old access method routines have been turned into @@ -89,6 +89,11 @@ RelationGetIndexScan(Relation indexRelation, else scan->keyData = NULL; + scan->kill_prior_tuple = false; + scan->ignore_killed_tuples = true; /* default setting */ + scan->keys_are_unique = false; /* may be set by amrescan */ + scan->got_tuple = false; + scan->opaque = NULL; ItemPointerSetInvalid(&scan->currentItemData); diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c index b616212bb5..8c0fbaa6a1 100644 --- a/src/backend/access/index/indexam.c +++ b/src/backend/access/index/indexam.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/index/indexam.c,v 1.58 2002/05/20 23:51:41 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/index/indexam.c,v 1.59 2002/05/24 18:57:55 tgl Exp $ * * INTERFACE ROUTINES * index_open - open an index relation by relation OID @@ -204,7 +204,8 @@ index_insert(Relation indexRelation, Datum *datums, char *nulls, ItemPointer heap_t_ctid, - Relation heapRelation) + Relation heapRelation, + bool check_uniqueness) { RegProcedure procedure; InsertIndexResult specificResult; @@ -216,12 +217,13 @@ index_insert(Relation indexRelation, * have the am's insert proc do all the work. */ specificResult = (InsertIndexResult) - DatumGetPointer(OidFunctionCall5(procedure, + DatumGetPointer(OidFunctionCall6(procedure, PointerGetDatum(indexRelation), PointerGetDatum(datums), PointerGetDatum(nulls), PointerGetDatum(heap_t_ctid), - PointerGetDatum(heapRelation))); + PointerGetDatum(heapRelation), + BoolGetDatum(check_uniqueness))); /* must be pfree'ed */ return specificResult; @@ -303,6 +305,10 @@ index_rescan(IndexScanDesc scan, ScanKey key) SCAN_CHECKS; GET_SCAN_PROCEDURE(rescan, amrescan); + scan->kill_prior_tuple = false; /* for safety */ + scan->keys_are_unique = false; /* may be set by amrescan */ + scan->got_tuple = false; + OidFunctionCall2(procedure, PointerGetDatum(scan), PointerGetDatum(key)); @@ -369,6 +375,9 @@ index_restrpos(IndexScanDesc scan) SCAN_CHECKS; GET_SCAN_PROCEDURE(restrpos, amrestrpos); + scan->kill_prior_tuple = false; /* for safety */ + scan->got_tuple = false; + OidFunctionCall1(procedure, PointerGetDatum(scan)); } @@ -385,7 +394,7 @@ index_restrpos(IndexScanDesc scan) HeapTuple index_getnext(IndexScanDesc scan, ScanDirection direction) { - bool found; + HeapTuple heapTuple = &scan->xs_ctup; SCAN_CHECKS; @@ -396,8 +405,21 @@ index_getnext(IndexScanDesc scan, ScanDirection direction) scan->xs_cbuf = InvalidBuffer; } + /* just make sure this is false... */ + scan->kill_prior_tuple = false; + + /* + * Can skip entering the index AM if we already got a tuple + * and it must be unique. + */ + if (scan->keys_are_unique && scan->got_tuple) + return NULL; + for (;;) { + bool found; + uint16 sv_infomask; + pgstat_count_index_scan(&scan->xs_pgstat_info); /* @@ -407,32 +429,62 @@ index_getnext(IndexScanDesc scan, ScanDirection direction) found = DatumGetBool(FunctionCall2(&scan->fn_getnext, PointerGetDatum(scan), Int32GetDatum(direction))); + + /* Reset kill flag immediately for safety */ + scan->kill_prior_tuple = false; + if (!found) return NULL; /* failure exit */ + /* * Fetch the heap tuple and see if it matches the snapshot. */ - heap_fetch(scan->heapRelation, scan->xs_snapshot, - &scan->xs_ctup, &scan->xs_cbuf, - &scan->xs_pgstat_info); - if (scan->xs_ctup.t_data != NULL) + if (heap_fetch(scan->heapRelation, scan->xs_snapshot, + heapTuple, &scan->xs_cbuf, true, + &scan->xs_pgstat_info)) break; + /* - * XXX here, consider whether we can kill the index tuple. + * If we can't see it, maybe no one else can either. Check to see + * if the tuple is dead to all transactions. If so, signal the + * index AM to not return it on future indexscans. + * + * We told heap_fetch to keep a pin on the buffer, so we can + * re-access the tuple here. But we must re-lock the buffer first. + * Also, it's just barely possible for an update of hint bits to + * occur here. */ + LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE); + sv_infomask = heapTuple->t_data->t_infomask; + + if (HeapTupleSatisfiesVacuum(heapTuple->t_data, RecentGlobalXmin) == + HEAPTUPLE_DEAD) + scan->kill_prior_tuple = true; + + if (sv_infomask != heapTuple->t_data->t_infomask) + SetBufferCommitInfoNeedsSave(scan->xs_cbuf); + LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK); + ReleaseBuffer(scan->xs_cbuf); + scan->xs_cbuf = InvalidBuffer; } /* Success exit */ + scan->got_tuple = true; + pgstat_count_index_getnext(&scan->xs_pgstat_info); - return &scan->xs_ctup; + return heapTuple; } /* ---------------- * index_getnext_indexitem - get the next index tuple from a scan * - * Finds the next index tuple satisfying the scan keys. Note that no - * time qual (snapshot) check is done; indeed the heap tuple is not accessed. + * Finds the next index tuple satisfying the scan keys. Note that the + * corresponding heap tuple is not accessed, and thus no time qual (snapshot) + * check is done, other than the index AM's internal check for killed tuples + * (which most callers of this routine will probably want to suppress by + * setting scan->ignore_killed_tuples = false). + * * On success (TRUE return), the found index TID is in scan->currentItemData, * and its heap TID is in scan->xs_ctup.t_self. scan->xs_cbuf is untouched. * ---------------- @@ -445,6 +497,9 @@ index_getnext_indexitem(IndexScanDesc scan, SCAN_CHECKS; + /* just make sure this is false... */ + scan->kill_prior_tuple = false; + /* * have the am's gettuple proc do all the work. index_beginscan * already set up fn_getnext. diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c index 3a94297188..fe3e98b982 100644 --- a/src/backend/access/nbtree/nbtinsert.c +++ b/src/backend/access/nbtree/nbtinsert.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.90 2002/03/06 06:09:17 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.91 2002/05/24 18:57:55 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -176,7 +176,6 @@ _bt_check_unique(Relation rel, BTItem btitem, Relation heapRel, Page page; BTPageOpaque opaque; Buffer nbuf = InvalidBuffer; - bool chtup = true; page = BufferGetPage(buf); opaque = (BTPageOpaque) PageGetSpecialPointer(page); @@ -194,70 +193,85 @@ _bt_check_unique(Relation rel, BTItem btitem, Relation heapRel, for (;;) { HeapTupleData htup; - Buffer buffer; + Buffer hbuffer; + ItemId curitemid; BTItem cbti; BlockNumber nblkno; /* - * _bt_compare returns 0 for (1,NULL) and (1,NULL) - this's how we - * handling NULLs - and so we must not use _bt_compare in real - * comparison, but only for ordering/finding items on pages. - - * vadim 03/24/97 - * * make sure the offset points to an actual key before trying to * compare it... */ if (offset <= maxoff) { + /* + * _bt_compare returns 0 for (1,NULL) and (1,NULL) - this's how we + * handling NULLs - and so we must not use _bt_compare in real + * comparison, but only for ordering/finding items on pages. - + * vadim 03/24/97 + */ if (!_bt_isequal(itupdesc, page, offset, natts, itup_scankey)) break; /* we're past all the equal tuples */ + curitemid = PageGetItemId(page, offset); /* - * Have to check is inserted heap tuple deleted one (i.e. just - * moved to another place by vacuum)! We only need to do this - * once, but don't want to do it at all unless we see equal - * tuples, so as not to slow down unequal case. + * We can skip the heap fetch if the item is marked killed. */ - if (chtup) + if (!ItemIdDeleted(curitemid)) { - htup.t_self = btitem->bti_itup.t_tid; - heap_fetch(heapRel, SnapshotDirty, &htup, &buffer, NULL); - if (htup.t_data == NULL) /* YES! */ - break; - /* Live tuple is being inserted, so continue checking */ - ReleaseBuffer(buffer); - chtup = false; - } - - cbti = (BTItem) PageGetItem(page, PageGetItemId(page, offset)); - htup.t_self = cbti->bti_itup.t_tid; - heap_fetch(heapRel, SnapshotDirty, &htup, &buffer, NULL); - if (htup.t_data != NULL) /* it is a duplicate */ - { - TransactionId xwait = - (TransactionIdIsValid(SnapshotDirty->xmin)) ? - SnapshotDirty->xmin : SnapshotDirty->xmax; - - /* - * If this tuple is being updated by other transaction - * then we have to wait for its commit/abort. - */ - ReleaseBuffer(buffer); - if (TransactionIdIsValid(xwait)) + cbti = (BTItem) PageGetItem(page, curitemid); + htup.t_self = cbti->bti_itup.t_tid; + if (heap_fetch(heapRel, SnapshotDirty, &htup, &hbuffer, + true, NULL)) { - if (nbuf != InvalidBuffer) - _bt_relbuf(rel, nbuf); - /* Tell _bt_doinsert to wait... */ - return xwait; - } + /* it is a duplicate */ + TransactionId xwait = + (TransactionIdIsValid(SnapshotDirty->xmin)) ? + SnapshotDirty->xmin : SnapshotDirty->xmax; - /* - * Otherwise we have a definite conflict. - */ - elog(ERROR, "Cannot insert a duplicate key into unique index %s", - RelationGetRelationName(rel)); + ReleaseBuffer(hbuffer); + /* + * If this tuple is being updated by other transaction + * then we have to wait for its commit/abort. + */ + if (TransactionIdIsValid(xwait)) + { + if (nbuf != InvalidBuffer) + _bt_relbuf(rel, nbuf); + /* Tell _bt_doinsert to wait... */ + return xwait; + } + + /* + * Otherwise we have a definite conflict. + */ + elog(ERROR, "Cannot insert a duplicate key into unique index %s", + RelationGetRelationName(rel)); + } + else + { + /* + * Hmm, if we can't see the tuple, maybe it can be + * marked killed. This logic should match index_getnext + * and btgettuple. + */ + uint16 sv_infomask; + + LockBuffer(hbuffer, BUFFER_LOCK_SHARE); + sv_infomask = htup.t_data->t_infomask; + if (HeapTupleSatisfiesVacuum(htup.t_data, + RecentGlobalXmin) == + HEAPTUPLE_DEAD) + { + curitemid->lp_flags |= LP_DELETE; + SetBufferCommitInfoNeedsSave(buf); + } + if (sv_infomask != htup.t_data->t_infomask) + SetBufferCommitInfoNeedsSave(hbuffer); + LockBuffer(hbuffer, BUFFER_LOCK_UNLOCK); + ReleaseBuffer(hbuffer); + } } - /* htup null so no buffer to release */ } /* diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index 206a1f17a3..e4163da987 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -12,7 +12,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.89 2002/05/20 23:51:41 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.90 2002/05/24 18:57:55 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -271,6 +271,7 @@ btinsert(PG_FUNCTION_ARGS) char *nulls = (char *) PG_GETARG_POINTER(2); ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3); Relation heapRel = (Relation) PG_GETARG_POINTER(4); + bool checkUnique = PG_GETARG_BOOL(5); InsertIndexResult res; BTItem btitem; IndexTuple itup; @@ -280,7 +281,7 @@ btinsert(PG_FUNCTION_ARGS) itup->t_tid = *ht_ctid; btitem = _bt_formitem(itup); - res = _bt_doinsert(rel, btitem, rel->rd_uniqueindex, heapRel); + res = _bt_doinsert(rel, btitem, checkUnique, heapRel); pfree(btitem); pfree(itup); @@ -296,14 +297,16 @@ btgettuple(PG_FUNCTION_ARGS) { IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); ScanDirection dir = (ScanDirection) PG_GETARG_INT32(1); - bool res; + BTScanOpaque so = (BTScanOpaque) scan->opaque; + Page page; + OffsetNumber offnum; + bool res; /* * If we've already initialized this scan, we can just advance it in * the appropriate direction. If we haven't done so yet, we call a * routine to get the first item in the scan. */ - if (ItemPointerIsValid(&(scan->currentItemData))) { /* @@ -312,11 +315,47 @@ btgettuple(PG_FUNCTION_ARGS) * buffer, too. */ _bt_restscan(scan); + /* + * Check to see if we should kill the previously-fetched tuple. + */ + if (scan->kill_prior_tuple) + { + /* + * Yes, so mark it by setting the LP_DELETE bit in the item flags. + */ + offnum = ItemPointerGetOffsetNumber(&(scan->currentItemData)); + page = BufferGetPage(so->btso_curbuf); + PageGetItemId(page, offnum)->lp_flags |= LP_DELETE; + /* + * Since this can be redone later if needed, it's treated the + * same as a commit-hint-bit status update for heap tuples: + * we mark the buffer dirty but don't make a WAL log entry. + */ + SetBufferCommitInfoNeedsSave(so->btso_curbuf); + } + /* + * Now continue the scan. + */ res = _bt_next(scan, dir); } else res = _bt_first(scan, dir); + /* + * Skip killed tuples if asked to. + */ + if (scan->ignore_killed_tuples) + { + while (res) + { + offnum = ItemPointerGetOffsetNumber(&(scan->currentItemData)); + page = BufferGetPage(so->btso_curbuf); + if (!ItemIdDeleted(PageGetItemId(page, offnum))) + break; + res = _bt_next(scan, dir); + } + } + /* * Save heap TID to use it in _bt_restscan. Then release the read * lock on the buffer so that we aren't blocking other backends. diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c index 2cfb8c8f00..33d5dd9cbe 100644 --- a/src/backend/access/nbtree/nbtsearch.c +++ b/src/backend/access/nbtree/nbtsearch.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.70 2002/05/20 23:51:41 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.71 2002/05/24 18:57:55 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -425,7 +425,8 @@ _bt_next(IndexScanDesc scan, ScanDirection dir) bool _bt_first(IndexScanDesc scan, ScanDirection dir) { - Relation rel; + Relation rel = scan->indexRelation; + BTScanOpaque so = (BTScanOpaque) scan->opaque; Buffer buf; Page page; BTStack stack; @@ -437,7 +438,6 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) StrategyNumber strat; bool res; int32 result; - BTScanOpaque so; bool scanFromEnd; bool continuescan; ScanKey scankeys = NULL; @@ -447,14 +447,11 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) j; StrategyNumber strat_total; - rel = scan->indexRelation; - so = (BTScanOpaque) scan->opaque; - /* * Order the scan keys in our canonical fashion and eliminate any * redundant keys. */ - _bt_orderkeys(rel, so); + _bt_orderkeys(scan); /* * Quit now if _bt_orderkeys() discovered that the scan keys can never diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c index 2f596eac11..c3462a75bc 100644 --- a/src/backend/access/nbtree/nbtutils.c +++ b/src/backend/access/nbtree/nbtutils.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtutils.c,v 1.48 2002/05/20 23:51:41 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtutils.c,v 1.49 2002/05/24 18:57:55 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -22,6 +22,9 @@ #include "executor/execdebug.h" +static int _bt_getstrategynumber(RegProcedure sk_procedure, StrategyMap map); + + /* * _bt_mkscankey * Build a scan key that contains comparison data from itup @@ -174,6 +177,11 @@ _bt_formitem(IndexTuple itup) * attribute, which can be seen to be correct by considering the above * example. * + * Furthermore, we detect the case where the index is unique and we have + * equality quals for all columns. In this case there can be at most one + * (visible) matching tuple. index_getnext uses this to avoid uselessly + * continuing the scan after finding one match. + * * The initial ordering of the keys is expected to be by attribute already * (see group_clauses_by_indexkey() in indxpath.c). The task here is to * standardize the appearance of multiple keys for the same attribute. @@ -191,8 +199,10 @@ _bt_formitem(IndexTuple itup) *---------- */ void -_bt_orderkeys(Relation relation, BTScanOpaque so) +_bt_orderkeys(IndexScanDesc scan) { + Relation relation = scan->indexRelation; + BTScanOpaque so = (BTScanOpaque) scan->opaque; ScanKeyData xform[BTMaxStrategyNumber]; bool init[BTMaxStrategyNumber]; int numberOfKeys = so->numberOfKeys; @@ -208,6 +218,7 @@ _bt_orderkeys(Relation relation, BTScanOpaque so) so->qual_ok = true; so->numberOfRequiredKeys = 0; + scan->keys_are_unique = false; if (numberOfKeys < 1) return; /* done if qual-less scan */ @@ -228,6 +239,17 @@ _bt_orderkeys(Relation relation, BTScanOpaque so) */ if (cur->sk_flags & SK_ISNULL) so->qual_ok = false; + else if (relation->rd_index->indisunique && + relation->rd_rel->relnatts == 1) + { + /* it's a unique index, do we have an equality qual? */ + map = IndexStrategyGetStrategyMap(RelationGetIndexStrategy(relation), + BTMaxStrategyNumber, + 1); + j = _bt_getstrategynumber(cur->sk_procedure, map); + if (j == (BTEqualStrategyNumber - 1)) + scan->keys_are_unique = true; + } so->numberOfRequiredKeys = 1; return; } @@ -390,17 +412,8 @@ _bt_orderkeys(Relation relation, BTScanOpaque so) MemSet(init, 0, sizeof(init)); } - /* - * OK, figure out which strategy this key corresponds to - */ - for (j = BTMaxStrategyNumber; --j >= 0;) - { - if (cur->sk_procedure == map->entry[j].sk_procedure) - break; - } - if (j < 0) - elog(ERROR, "_bt_orderkeys: unable to identify operator %u", - cur->sk_procedure); + /* figure out which strategy this key's operator corresponds to */ + j = _bt_getstrategynumber(cur->sk_procedure, map); /* have we seen one of these before? */ if (init[j]) @@ -424,6 +437,34 @@ _bt_orderkeys(Relation relation, BTScanOpaque so) } so->numberOfKeys = new_numberOfKeys; + + /* + * If unique index and we have equality keys for all columns, + * set keys_are_unique flag for higher levels. + */ + if (allEqualSoFar && relation->rd_index->indisunique && + relation->rd_rel->relnatts == new_numberOfKeys) + scan->keys_are_unique = true; +} + +/* + * Determine which btree strategy an operator procedure matches. + * + * Result is strategy number minus 1. + */ +static int +_bt_getstrategynumber(RegProcedure sk_procedure, StrategyMap map) +{ + int j; + + for (j = BTMaxStrategyNumber; --j >= 0;) + { + if (sk_procedure == map->entry[j].sk_procedure) + return j; + } + elog(ERROR, "_bt_getstrategynumber: unable to identify operator %u", + sk_procedure); + return -1; /* keep compiler quiet */ } /* diff --git a/src/backend/access/rtree/rtree.c b/src/backend/access/rtree/rtree.c index a72f774760..006777b931 100644 --- a/src/backend/access/rtree/rtree.c +++ b/src/backend/access/rtree/rtree.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtree.c,v 1.71 2002/05/20 23:51:41 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtree.c,v 1.72 2002/05/24 18:57:55 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -223,9 +223,9 @@ rtinsert(PG_FUNCTION_ARGS) Datum *datum = (Datum *) PG_GETARG_POINTER(1); char *nulls = (char *) PG_GETARG_POINTER(2); ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3); - #ifdef NOT_USED Relation heapRel = (Relation) PG_GETARG_POINTER(4); + bool checkUnique = PG_GETARG_BOOL(5); #endif InsertIndexResult res; IndexTuple itup; @@ -1206,6 +1206,8 @@ rtbulkdelete(PG_FUNCTION_ARGS) /* walk through the entire index */ iscan = index_beginscan(NULL, rel, SnapshotAny, 0, (ScanKey) NULL); + /* including killed tuples */ + iscan->ignore_killed_tuples = false; while (index_getnext_indexitem(iscan, ForwardScanDirection)) { diff --git a/src/backend/catalog/indexing.c b/src/backend/catalog/indexing.c index cc26af128a..1b8800d193 100644 --- a/src/backend/catalog/indexing.c +++ b/src/backend/catalog/indexing.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/catalog/indexing.c,v 1.92 2002/04/19 16:36:08 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/catalog/indexing.c,v 1.93 2002/05/24 18:57:55 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -160,7 +160,8 @@ CatalogIndexInsert(Relation *idescs, nullv); indexRes = index_insert(idescs[i], datum, nullv, - &heapTuple->t_self, heapRelation); + &heapTuple->t_self, heapRelation, + idescs[i]->rd_uniqueindex); if (indexRes) pfree(indexRes); pfree(indexInfo); diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index 3f5cc96f41..5785139a51 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/commands/analyze.c,v 1.34 2002/05/21 22:05:54 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/commands/analyze.c,v 1.35 2002/05/24 18:57:55 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -655,8 +655,8 @@ pageloop:; goto pageloop; } ItemPointerSet(&targtuple.t_self, targblock, targoffset); - heap_fetch(onerel, SnapshotNow, &targtuple, &tupbuffer, NULL); - if (targtuple.t_data != NULL) + if (heap_fetch(onerel, SnapshotNow, &targtuple, &tupbuffer, + false, NULL)) { /* * Found a suitable tuple, so save it, replacing one old diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index e0b01b6fee..2cb5f2e457 100644 --- a/src/backend/commands/trigger.c +++ b/src/backend/commands/trigger.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/commands/trigger.c,v 1.118 2002/05/21 22:05:54 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/commands/trigger.c,v 1.119 2002/05/24 18:57:55 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1421,16 +1421,14 @@ DeferredTriggerExecute(DeferredTriggerEvent event, int itemno, if (ItemPointerIsValid(&(event->dte_oldctid))) { ItemPointerCopy(&(event->dte_oldctid), &(oldtuple.t_self)); - heap_fetch(rel, SnapshotAny, &oldtuple, &oldbuffer, NULL); - if (!oldtuple.t_data) + if (!heap_fetch(rel, SnapshotAny, &oldtuple, &oldbuffer, false, NULL)) elog(ERROR, "DeferredTriggerExecute: failed to fetch old tuple"); } if (ItemPointerIsValid(&(event->dte_newctid))) { ItemPointerCopy(&(event->dte_newctid), &(newtuple.t_self)); - heap_fetch(rel, SnapshotAny, &newtuple, &newbuffer, NULL); - if (!newtuple.t_data) + if (!heap_fetch(rel, SnapshotAny, &newtuple, &newbuffer, false, NULL)) elog(ERROR, "DeferredTriggerExecute: failed to fetch new tuple"); } diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 4889e30040..953ff39714 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -13,7 +13,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.225 2002/05/20 23:51:42 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.226 2002/05/24 18:57:56 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -458,7 +458,9 @@ vac_update_relstats(Oid relid, BlockNumber num_pages, double num_tuples, /* get the buffer cache tuple */ rtup.t_self = ctup->t_self; ReleaseSysCache(ctup); - heap_fetch(rd, SnapshotNow, &rtup, &buffer, NULL); + if (!heap_fetch(rd, SnapshotNow, &rtup, &buffer, false, NULL)) + elog(ERROR, "pg_class entry for relid %u vanished during vacuuming", + relid); /* overwrite the existing statistics in the tuple */ pgcform = (Form_pg_class) GETSTRUCT(&rtup); diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index 29687a54ba..fa276aaeec 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -27,7 +27,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/executor/execMain.c,v 1.163 2002/05/21 22:59:01 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/executor/execMain.c,v 1.164 2002/05/24 18:57:56 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1489,7 +1489,7 @@ lreplace:; numIndices = resultRelInfo->ri_NumIndices; if (numIndices > 0) - ExecInsertIndexTuples(slot, &(tuple->t_self), estate, true); + ExecInsertIndexTuples(slot, &(tuple->t_self), estate, false); /* AFTER ROW UPDATE Triggers */ if (resultRelInfo->ri_TrigDesc) @@ -1639,8 +1639,7 @@ EvalPlanQual(EState *estate, Index rti, ItemPointer tid) { Buffer buffer; - heap_fetch(relation, SnapshotDirty, &tuple, &buffer, NULL); - if (tuple.t_data != NULL) + if (heap_fetch(relation, SnapshotDirty, &tuple, &buffer, false, NULL)) { TransactionId xwait = SnapshotDirty->xmax; diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c index a6b5048326..32cd88ed07 100644 --- a/src/backend/executor/execUtils.c +++ b/src/backend/executor/execUtils.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/executor/execUtils.c,v 1.81 2002/05/12 20:10:02 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/executor/execUtils.c,v 1.82 2002/05/24 18:57:56 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -599,7 +599,7 @@ void ExecInsertIndexTuples(TupleTableSlot *slot, ItemPointer tupleid, EState *estate, - bool is_update) + bool is_vacuum) { HeapTuple heapTuple; ResultRelInfo *resultRelInfo; @@ -667,11 +667,17 @@ ExecInsertIndexTuples(TupleTableSlot *slot, datum, nullv); + /* + * The index AM does the rest. Note we suppress unique-index + * checks if we are being called from VACUUM, since VACUUM may + * need to move dead tuples that have the same keys as live ones. + */ result = index_insert(relationDescs[i], /* index relation */ datum, /* array of heaptuple Datums */ nullv, /* info on nulls */ &(heapTuple->t_self), /* tid of heap tuple */ - heapRelation); + heapRelation, + relationDescs[i]->rd_uniqueindex && !is_vacuum); /* * keep track of index inserts for debugging diff --git a/src/backend/executor/nodeTidscan.c b/src/backend/executor/nodeTidscan.c index 5ee222744d..46e9daed6c 100644 --- a/src/backend/executor/nodeTidscan.c +++ b/src/backend/executor/nodeTidscan.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/executor/nodeTidscan.c,v 1.23 2002/02/19 20:11:14 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/executor/nodeTidscan.c,v 1.24 2002/05/24 18:57:56 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -150,11 +150,8 @@ TidNext(TidScan *node) { bool slot_is_valid = false; - tuple->t_datamcxt = NULL; - tuple->t_data = NULL; tuple->t_self = tidList[tidstate->tss_TidPtr]; - heap_fetch(heapRelation, snapshot, tuple, &buffer, NULL); - if (tuple->t_data != NULL) + if (heap_fetch(heapRelation, snapshot, tuple, &buffer, false, NULL)) { bool prev_matches = false; int prev_tid; @@ -198,8 +195,6 @@ TidNext(TidScan *node) else ExecClearTuple(slot); } - else if (BufferIsValid(buffer)) - ReleaseBuffer(buffer); tidNumber++; if (bBackward) tidstate->tss_TidPtr--; diff --git a/src/backend/storage/ipc/sinval.c b/src/backend/storage/ipc/sinval.c index a32c8ae703..6415271bea 100644 --- a/src/backend/storage/ipc/sinval.c +++ b/src/backend/storage/ipc/sinval.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/ipc/sinval.c,v 1.46 2002/05/21 22:05:55 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/ipc/sinval.c,v 1.47 2002/05/24 18:57:56 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -297,6 +297,10 @@ GetOldestXmin(bool allDbs) * it is considered running or not. * This ensures that the set of transactions seen as "running" by the * current xact will not change after it takes the snapshot. + * + * Also, we compute the current global xmin (oldest xmin across all running + * transactions) and save it in RecentGlobalXmin. This is the same + * computation done by GetOldestXmin(TRUE). *---------- */ Snapshot @@ -305,6 +309,9 @@ GetSnapshotData(bool serializable) Snapshot snapshot = (Snapshot) malloc(sizeof(SnapshotData)); SISeg *segP = shmInvalBuffer; ProcState *stateP = segP->procState; + TransactionId xmin; + TransactionId xmax; + TransactionId globalxmin; int index; int count = 0; @@ -321,7 +328,7 @@ GetSnapshotData(bool serializable) if (snapshot->xip == NULL) elog(ERROR, "Memory exhausted in GetSnapshotData"); - snapshot->xmin = GetCurrentTransactionId(); + globalxmin = xmin = GetCurrentTransactionId(); /* * If we are going to set MyProc->xmin then we'd better get exclusive @@ -356,7 +363,7 @@ GetSnapshotData(bool serializable) *-------------------- */ - snapshot->xmax = ReadNewTransactionId(); + xmax = ReadNewTransactionId(); for (index = 0; index < segP->lastBackend; index++) { @@ -374,28 +381,48 @@ GetSnapshotData(bool serializable) * running a transaction, and xacts started since we read the * next transaction ID. There's no need to store XIDs above * what we got from ReadNewTransactionId, since we'll treat - * them as running anyway. + * them as running anyway. We also assume that such xacts can't + * compute an xmin older than ours, so they needn't be considered + * in computing globalxmin. */ if (proc == MyProc || !TransactionIdIsNormal(xid) || - TransactionIdFollowsOrEquals(xid, snapshot->xmax)) + TransactionIdFollowsOrEquals(xid, xmax)) continue; - if (TransactionIdPrecedes(xid, snapshot->xmin)) - snapshot->xmin = xid; + if (TransactionIdPrecedes(xid, xmin)) + xmin = xid; snapshot->xip[count] = xid; count++; + + /* Update globalxmin to be the smallest valid xmin */ + xid = proc->xmin; + if (TransactionIdIsNormal(xid)) + if (TransactionIdPrecedes(xid, globalxmin)) + globalxmin = xid; } } if (serializable) - MyProc->xmin = snapshot->xmin; + MyProc->xmin = xmin; LWLockRelease(SInvalLock); /* Serializable snapshot must be computed before any other... */ Assert(TransactionIdIsValid(MyProc->xmin)); + /* + * Update globalxmin to include actual process xids. This is a slightly + * different way of computing it than GetOldestXmin uses, but should give + * the same result. + */ + if (TransactionIdPrecedes(xmin, globalxmin)) + globalxmin = xmin; + + RecentGlobalXmin = globalxmin; + + snapshot->xmin = xmin; + snapshot->xmax = xmax; snapshot->xcnt = count; snapshot->curcid = GetCurrentCommandId(); diff --git a/src/backend/utils/time/tqual.c b/src/backend/utils/time/tqual.c index 582ec28183..9f51304e68 100644 --- a/src/backend/utils/time/tqual.c +++ b/src/backend/utils/time/tqual.c @@ -16,7 +16,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/utils/time/tqual.c,v 1.52 2002/05/21 22:59:01 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/time/tqual.c,v 1.53 2002/05/24 18:57:56 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -33,6 +33,9 @@ Snapshot SnapshotDirty = &SnapshotDirtyData; Snapshot QuerySnapshot = NULL; Snapshot SerializableSnapshot = NULL; +/* This is updated by GetSnapshotData: */ +TransactionId RecentGlobalXmin = InvalidTransactionId; + bool ReferentialIntegritySnapshotOverride = false; diff --git a/src/include/access/genam.h b/src/include/access/genam.h index 610afa11f0..dd344043b3 100644 --- a/src/include/access/genam.h +++ b/src/include/access/genam.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: genam.h,v 1.34 2002/05/20 23:51:43 tgl Exp $ + * $Id: genam.h,v 1.35 2002/05/24 18:57:56 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -54,7 +54,8 @@ extern void index_close(Relation relation); extern InsertIndexResult index_insert(Relation indexRelation, Datum *datums, char *nulls, ItemPointer heap_t_ctid, - Relation heapRelation); + Relation heapRelation, + bool check_uniqueness); extern IndexScanDesc index_beginscan(Relation heapRelation, Relation indexRelation, diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index 7bf75f512d..73ed6e7f15 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: heapam.h,v 1.75 2002/05/21 22:05:55 tgl Exp $ + * $Id: heapam.h,v 1.76 2002/05/24 18:57:56 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -152,8 +152,8 @@ extern void heap_rescan(HeapScanDesc scan, ScanKey key); extern void heap_endscan(HeapScanDesc scan); extern HeapTuple heap_getnext(HeapScanDesc scan, ScanDirection direction); -extern void heap_fetch(Relation relation, Snapshot snapshot, - HeapTuple tuple, Buffer *userbuf, +extern bool heap_fetch(Relation relation, Snapshot snapshot, + HeapTuple tuple, Buffer *userbuf, bool keep_buf, PgStat_Info *pgstat_info); extern ItemPointer heap_get_latest_tid(Relation relation, Snapshot snapshot, diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h index c29defba8f..bef621dd68 100644 --- a/src/include/access/nbtree.h +++ b/src/include/access/nbtree.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: nbtree.h,v 1.60 2002/05/20 23:51:43 tgl Exp $ + * $Id: nbtree.h,v 1.61 2002/05/24 18:57:56 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -383,7 +383,7 @@ extern ScanKey _bt_mkscankey(Relation rel, IndexTuple itup); extern ScanKey _bt_mkscankey_nodata(Relation rel); extern void _bt_freeskey(ScanKey skey); extern void _bt_freestack(BTStack stack); -extern void _bt_orderkeys(Relation relation, BTScanOpaque so); +extern void _bt_orderkeys(IndexScanDesc scan); extern bool _bt_checkkeys(IndexScanDesc scan, IndexTuple tuple, ScanDirection dir, bool *continuescan); extern BTItem _bt_formitem(IndexTuple itup); diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h index 87e3b52369..e23681876d 100644 --- a/src/include/access/relscan.h +++ b/src/include/access/relscan.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: relscan.h,v 1.26 2002/05/20 23:51:43 tgl Exp $ + * $Id: relscan.h,v 1.27 2002/05/24 18:57:56 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -46,7 +46,15 @@ typedef struct IndexScanDescData int numberOfKeys; /* number of scan keys */ ScanKey keyData; /* array of scan key descriptors */ + /* signaling to index AM about killing index tuples */ + bool kill_prior_tuple; /* last-returned tuple is dead */ + bool ignore_killed_tuples; /* do not return killed entries */ + + /* set by index AM if scan keys satisfy index's uniqueness constraint */ + bool keys_are_unique; + /* scan current state */ + bool got_tuple; /* true after successful index_getnext */ void *opaque; /* access-method-specific info */ ItemPointerData currentItemData; /* current index pointer */ ItemPointerData currentMarkData; /* marked position, if any */ diff --git a/src/include/access/valid.h b/src/include/access/valid.h index b76e5111c2..d16c0f4e2c 100644 --- a/src/include/access/valid.h +++ b/src/include/access/valid.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: valid.h,v 1.26 2001/11/05 17:46:31 momjian Exp $ + * $Id: valid.h,v 1.27 2002/05/24 18:57:56 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -77,14 +77,9 @@ do \ /* ---------------- * HeapTupleSatisfies * - * Returns a valid HeapTuple if it satisfies the timequal and keytest. - * Returns NULL otherwise. Used to be heap_satisifies (sic) which - * returned a boolean. It now returns a tuple so that we can avoid doing two - * PageGetItem's per tuple. - * - * Complete check of validity including LP_CTUP and keytest. - * This should perhaps be combined with valid somehow in the - * future. (Also, additional rule tests/time range tests.) + * res is set TRUE if the HeapTuple satisfies the timequal and keytest, + * otherwise it is set FALSE. Note that the hint bits in the HeapTuple's + * t_infomask may be updated as a side effect. * * on 8/21/92 mao says: i rearranged the tests here to do keytest before * SatisfiesTimeQual. profiling indicated that even for vacuumed relations, @@ -100,35 +95,28 @@ do \ disk_page, \ seeself, \ nKeys, \ - key) \ + key, \ + res) \ do \ { \ /* We use underscores to protect the variable passed in as parameters */ \ - bool _res; \ - \ if ((key) != NULL) \ HeapKeyTest(tuple, RelationGetDescr(relation), \ - (nKeys), (key), _res); \ + (nKeys), (key), (res)); \ else \ - _res = TRUE; \ + (res) = true; \ \ - if (_res) \ + if (res) \ { \ if ((relation)->rd_rel->relkind != RELKIND_UNCATALOGED) \ { \ uint16 _infomask = (tuple)->t_data->t_infomask; \ \ - _res = HeapTupleSatisfiesVisibility((tuple), (seeself)); \ + (res) = HeapTupleSatisfiesVisibility((tuple), (seeself)); \ if ((tuple)->t_data->t_infomask != _infomask) \ SetBufferCommitInfoNeedsSave(buffer); \ - if (!_res) \ - (tuple)->t_data = NULL; \ } \ } \ - else \ - { \ - (tuple)->t_data = NULL; \ - } \ } while (0) #endif /* VALID_H */ diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index 2cc1f8aac1..a7d56dc90d 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: pg_proc.h,v 1.239 2002/05/22 17:21:01 petere Exp $ + * $Id: pg_proc.h,v 1.240 2002/05/24 18:57:56 tgl Exp $ * * NOTES * The script catalog/genbki.sh reads this file and generates .bki @@ -675,7 +675,7 @@ DESCR("convert int4 to float4"); DATA(insert OID = 319 ( int4 PGNSP PGUID 12 f f f t f i 1 23 "700" 100 0 0 100 ftoi4 - _null_ )); DESCR("convert float4 to int4"); -DATA(insert OID = 320 ( rtinsert PGNSP PGUID 12 f f f t f v 5 23 "0 0 0 0 0" 100 0 0 100 rtinsert - _null_ )); +DATA(insert OID = 320 ( rtinsert PGNSP PGUID 12 f f f t f v 6 23 "0 0 0 0 0 0" 100 0 0 100 rtinsert - _null_ )); DESCR("r-tree(internal)"); DATA(insert OID = 322 ( rtgettuple PGNSP PGUID 12 f f f t f v 2 23 "0 0" 100 0 0 100 rtgettuple - _null_ )); DESCR("r-tree(internal)"); @@ -698,7 +698,7 @@ DESCR("r-tree(internal)"); DATA(insert OID = 330 ( btgettuple PGNSP PGUID 12 f f f t f v 2 23 "0 0" 100 0 0 100 btgettuple - _null_ )); DESCR("btree(internal)"); -DATA(insert OID = 331 ( btinsert PGNSP PGUID 12 f f f t f v 5 23 "0 0 0 0 0" 100 0 0 100 btinsert - _null_ )); +DATA(insert OID = 331 ( btinsert PGNSP PGUID 12 f f f t f v 6 23 "0 0 0 0 0 0" 100 0 0 100 btinsert - _null_ )); DESCR("btree(internal)"); DATA(insert OID = 333 ( btbeginscan PGNSP PGUID 12 f f f t f v 3 23 "0 0 0" 100 0 0 100 btbeginscan - _null_ )); DESCR("btree(internal)"); @@ -801,7 +801,7 @@ DESCR("convert char() to name"); DATA(insert OID = 440 ( hashgettuple PGNSP PGUID 12 f f f t f v 2 23 "0 0" 100 0 0 100 hashgettuple - _null_ )); DESCR("hash(internal)"); -DATA(insert OID = 441 ( hashinsert PGNSP PGUID 12 f f f t f v 5 23 "0 0 0 0 0" 100 0 0 100 hashinsert - _null_ )); +DATA(insert OID = 441 ( hashinsert PGNSP PGUID 12 f f f t f v 6 23 "0 0 0 0 0 0" 100 0 0 100 hashinsert - _null_ )); DESCR("hash(internal)"); DATA(insert OID = 443 ( hashbeginscan PGNSP PGUID 12 f f f t f v 3 23 "0 0 0" 100 0 0 100 hashbeginscan - _null_ )); DESCR("hash(internal)"); @@ -1031,7 +1031,7 @@ DESCR("smaller of two"); DATA(insert OID = 774 ( gistgettuple PGNSP PGUID 12 f f f t f v 2 23 "0 0" 100 0 0 100 gistgettuple - _null_ )); DESCR("gist(internal)"); -DATA(insert OID = 775 ( gistinsert PGNSP PGUID 12 f f f t f v 5 23 "0 0 0 0 0" 100 0 0 100 gistinsert - _null_ )); +DATA(insert OID = 775 ( gistinsert PGNSP PGUID 12 f f f t f v 6 23 "0 0 0 0 0 0" 100 0 0 100 gistinsert - _null_ )); DESCR("gist(internal)"); DATA(insert OID = 777 ( gistbeginscan PGNSP PGUID 12 f f f t f v 3 23 "0 0 0" 100 0 0 100 gistbeginscan - _null_ )); DESCR("gist(internal)"); diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index 6752d72ca7..293550e568 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: executor.h,v 1.64 2002/05/12 20:10:04 tgl Exp $ + * $Id: executor.h,v 1.65 2002/05/24 18:57:57 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -170,7 +170,7 @@ extern ExprContext *MakePerTupleExprContext(EState *estate); extern void ExecOpenIndices(ResultRelInfo *resultRelInfo); extern void ExecCloseIndices(ResultRelInfo *resultRelInfo); extern void ExecInsertIndexTuples(TupleTableSlot *slot, ItemPointer tupleid, - EState *estate, bool is_update); + EState *estate, bool is_vacuum); extern void RegisterExprContextCallback(ExprContext *econtext, ExprContextCallbackFunction function, diff --git a/src/include/utils/tqual.h b/src/include/utils/tqual.h index 3ddceff5f8..cdda44023b 100644 --- a/src/include/utils/tqual.h +++ b/src/include/utils/tqual.h @@ -8,7 +8,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: tqual.h,v 1.40 2002/05/21 22:59:01 tgl Exp $ + * $Id: tqual.h,v 1.41 2002/05/24 18:57:57 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -41,6 +41,8 @@ extern DLLIMPORT Snapshot SnapshotDirty; extern DLLIMPORT Snapshot QuerySnapshot; extern DLLIMPORT Snapshot SerializableSnapshot; +extern TransactionId RecentGlobalXmin; + extern bool ReferentialIntegritySnapshotOverride; #define IsSnapshotNow(snapshot) ((Snapshot) (snapshot) == SnapshotNow)