diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c index 1da836a364..16105696d4 100644 --- a/src/backend/access/gist/gist.c +++ b/src/backend/access/gist/gist.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/gist/gist.c,v 1.92 2002/05/20 23:51:40 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/gist/gist.c,v 1.93 2002/05/24 18:57:55 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -294,9 +294,9 @@ gistinsert(PG_FUNCTION_ARGS) Datum *datum = (Datum *) PG_GETARG_POINTER(1); char *nulls = (char *) PG_GETARG_POINTER(2); ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3); - #ifdef NOT_USED Relation heapRel = (Relation) PG_GETARG_POINTER(4); + bool checkUnique = PG_GETARG_BOOL(5); #endif InsertIndexResult res; IndexTuple itup; @@ -1607,6 +1607,8 @@ gistbulkdelete(PG_FUNCTION_ARGS) /* walk through the entire index */ iscan = index_beginscan(NULL, rel, SnapshotAny, 0, (ScanKey) NULL); + /* including killed tuples */ + iscan->ignore_killed_tuples = false; while (index_getnext_indexitem(iscan, ForwardScanDirection)) { diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c index 6f06ffbfa0..8db98e8a36 100644 --- a/src/backend/access/hash/hash.c +++ b/src/backend/access/hash/hash.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.57 2002/05/20 23:51:41 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.58 2002/05/24 18:57:55 tgl Exp $ * * NOTES * This file contains only the public interface routines. @@ -166,8 +166,8 @@ hashinsert(PG_FUNCTION_ARGS) ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3); #ifdef NOT_USED Relation heapRel = (Relation) PG_GETARG_POINTER(4); + bool checkUnique = PG_GETARG_BOOL(5); #endif - InsertIndexResult res; HashItem hitem; IndexTuple itup; @@ -210,6 +210,9 @@ hashgettuple(PG_FUNCTION_ARGS) { IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); ScanDirection dir = (ScanDirection) PG_GETARG_INT32(1); + HashScanOpaque so = (HashScanOpaque) scan->opaque; + Page page; + OffsetNumber offnum; bool res; /* @@ -217,12 +220,49 @@ hashgettuple(PG_FUNCTION_ARGS) * the appropriate direction. If we haven't done so yet, we call a * routine to get the first item in the scan. */ - if (ItemPointerIsValid(&(scan->currentItemData))) + { + /* + * Check to see if we should kill the previously-fetched tuple. + */ + if (scan->kill_prior_tuple) + { + /* + * Yes, so mark it by setting the LP_DELETE bit in the item flags. + */ + offnum = ItemPointerGetOffsetNumber(&(scan->currentItemData)); + page = BufferGetPage(so->hashso_curbuf); + PageGetItemId(page, offnum)->lp_flags |= LP_DELETE; + /* + * Since this can be redone later if needed, it's treated the + * same as a commit-hint-bit status update for heap tuples: + * we mark the buffer dirty but don't make a WAL log entry. + */ + SetBufferCommitInfoNeedsSave(so->hashso_curbuf); + } + /* + * Now continue the scan. + */ res = _hash_next(scan, dir); + } else res = _hash_first(scan, dir); + /* + * Skip killed tuples if asked to. + */ + if (scan->ignore_killed_tuples) + { + while (res) + { + offnum = ItemPointerGetOffsetNumber(&(scan->currentItemData)); + page = BufferGetPage(so->hashso_curbuf); + if (!ItemIdDeleted(PageGetItemId(page, offnum))) + break; + res = _hash_next(scan, dir); + } + } + PG_RETURN_BOOL(res); } @@ -418,6 +458,8 @@ hashbulkdelete(PG_FUNCTION_ARGS) /* walk through the entire index */ iscan = index_beginscan(NULL, rel, SnapshotAny, 0, (ScanKey) NULL); + /* including killed tuples */ + iscan->ignore_killed_tuples = false; while (index_getnext_indexitem(iscan, ForwardScanDirection)) { diff --git a/src/backend/access/hash/hashscan.c b/src/backend/access/hash/hashscan.c index 87dfcd6093..724a785ac7 100644 --- a/src/backend/access/hash/hashscan.c +++ b/src/backend/access/hash/hashscan.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/hash/hashscan.c,v 1.26 2002/05/20 23:51:41 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/hash/hashscan.c,v 1.27 2002/05/24 18:57:55 tgl Exp $ * * NOTES * Because we can be doing an index scan on a relation while we @@ -32,8 +32,6 @@ #include "access/hash.h" -static void _hash_scandel(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno); -static bool _hash_scantouched(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno); typedef struct HashScanListData { @@ -46,6 +44,10 @@ typedef HashScanListData *HashScanList; static HashScanList HashScans = (HashScanList) NULL; +static void _hash_scandel(IndexScanDesc scan, + BlockNumber blkno, OffsetNumber offno); + + /* * AtEOXact_hash() --- clean up hash subsystem at xact abort or commit. * @@ -129,63 +131,51 @@ static void _hash_scandel(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno) { ItemPointer current; + ItemPointer mark; Buffer buf; Buffer metabuf; HashScanOpaque so; - if (!_hash_scantouched(scan, blkno, offno)) - return; - - metabuf = _hash_getbuf(scan->indexRelation, HASH_METAPAGE, HASH_READ); - so = (HashScanOpaque) scan->opaque; - buf = so->hashso_curbuf; - current = &(scan->currentItemData); + mark = &(scan->currentMarkData); + if (ItemPointerIsValid(current) && ItemPointerGetBlockNumber(current) == blkno && ItemPointerGetOffsetNumber(current) >= offno) { + metabuf = _hash_getbuf(scan->indexRelation, HASH_METAPAGE, HASH_READ); + buf = so->hashso_curbuf; _hash_step(scan, &buf, BackwardScanDirection, metabuf); - so->hashso_curbuf = buf; } - current = &(scan->currentMarkData); - if (ItemPointerIsValid(current) - && ItemPointerGetBlockNumber(current) == blkno - && ItemPointerGetOffsetNumber(current) >= offno) + if (ItemPointerIsValid(mark) + && ItemPointerGetBlockNumber(mark) == blkno + && ItemPointerGetOffsetNumber(mark) >= offno) { - ItemPointerData tmp; + /* + * The idea here is to exchange the current and mark positions, + * then step backwards (affecting current), then exchange again. + */ + ItemPointerData tmpitem; + Buffer tmpbuf; - tmp = *current; - *current = scan->currentItemData; - scan->currentItemData = tmp; + tmpitem = *mark; + *mark = *current; + *current = tmpitem; + tmpbuf = so->hashso_mrkbuf; + so->hashso_mrkbuf = so->hashso_curbuf; + so->hashso_curbuf = tmpbuf; + + metabuf = _hash_getbuf(scan->indexRelation, HASH_METAPAGE, HASH_READ); + buf = so->hashso_curbuf; _hash_step(scan, &buf, BackwardScanDirection, metabuf); - so->hashso_mrkbuf = buf; - tmp = *current; - *current = scan->currentItemData; - scan->currentItemData = tmp; + + tmpitem = *mark; + *mark = *current; + *current = tmpitem; + tmpbuf = so->hashso_mrkbuf; + so->hashso_mrkbuf = so->hashso_curbuf; + so->hashso_curbuf = tmpbuf; } } - -static bool -_hash_scantouched(IndexScanDesc scan, - BlockNumber blkno, - OffsetNumber offno) -{ - ItemPointer current; - - current = &(scan->currentItemData); - if (ItemPointerIsValid(current) - && ItemPointerGetBlockNumber(current) == blkno - && ItemPointerGetOffsetNumber(current) >= offno) - return true; - - current = &(scan->currentMarkData); - if (ItemPointerIsValid(current) - && ItemPointerGetBlockNumber(current) == blkno - && ItemPointerGetOffsetNumber(current) >= offno) - return true; - - return false; -} diff --git a/src/backend/access/hash/hashsearch.c b/src/backend/access/hash/hashsearch.c index 968efa363a..db10ff055c 100644 --- a/src/backend/access/hash/hashsearch.c +++ b/src/backend/access/hash/hashsearch.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/hash/hashsearch.c,v 1.28 2002/05/20 23:51:41 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/hash/hashsearch.c,v 1.29 2002/05/24 18:57:55 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -54,10 +54,10 @@ _hash_search(Relation rel, * _hash_next() -- Get the next item in a scan. * * On entry, we have a valid currentItemData in the scan, and a - * read lock on the page that contains that item. We do not have - * the page pinned. We return the next item in the scan. On - * exit, we have the page containing the next item locked but not - * pinned. + * pin and read lock on the page that contains that item. + * We find the next item in the scan, if any. + * On success exit, we have the page containing the next item + * pinned and locked. */ bool _hash_next(IndexScanDesc scan, ScanDirection dir) @@ -74,25 +74,12 @@ _hash_next(IndexScanDesc scan, ScanDirection dir) rel = scan->indexRelation; so = (HashScanOpaque) scan->opaque; - current = &(scan->currentItemData); - - metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ); - - /* - * XXX 10 may 91: somewhere there's a bug in our management of the - * cached buffer for this scan. wei discovered it. the following is - * a workaround so he can work until i figure out what's going on. - */ - - if (!BufferIsValid(so->hashso_curbuf)) - { - so->hashso_curbuf = _hash_getbuf(rel, - ItemPointerGetBlockNumber(current), - HASH_READ); - } /* we still have the buffer pinned and locked */ buf = so->hashso_curbuf; + Assert(BufferIsValid(buf)); + + metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ); /* * step to next valid tuple. note that _hash_step releases our lock diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index a8d7ca0c29..e763823a16 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/heap/heapam.c,v 1.135 2002/05/21 22:05:53 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/heap/heapam.c,v 1.136 2002/05/24 18:57:55 tgl Exp $ * * * INTERFACE ROUTINES @@ -306,6 +306,8 @@ heapgettup(Relation relation, { if (ItemIdIsUsed(lpp)) { + bool valid; + tuple->t_datamcxt = NULL; tuple->t_data = (HeapTupleHeader) PageGetItem((Page) dp, lpp); tuple->t_len = ItemIdGetLength(lpp); @@ -315,8 +317,8 @@ heapgettup(Relation relation, * if current tuple qualifies, return it. */ HeapTupleSatisfies(tuple, relation, *buffer, (PageHeader) dp, - snapshot, nkeys, key); - if (tuple->t_data != NULL) + snapshot, nkeys, key, valid); + if (valid) { LockBuffer(*buffer, BUFFER_LOCK_UNLOCK); return; @@ -864,32 +866,37 @@ heap_getnext(HeapScanDesc scan, ScanDirection direction) return ((scan->rs_ctup.t_data == NULL) ? NULL : &(scan->rs_ctup)); } -/* ---------------- - * heap_fetch - retrieve tuple with given tid +/* + * heap_fetch - retrieve tuple with given tid * - * On entry, tuple->t_self is the TID to fetch. + * On entry, tuple->t_self is the TID to fetch. We pin the buffer holding + * the tuple, fill in the remaining fields of *tuple, and check the tuple + * against the specified snapshot. * - * If successful (ie, tuple found and passes snapshot time qual), - * then the rest of *tuple is filled in, and *userbuf is set to the - * buffer holding the tuple. A pin is obtained on the buffer; the - * caller must BufferRelease the buffer when done with the tuple. + * If successful (tuple passes snapshot time qual), then *userbuf is set to + * the buffer holding the tuple and TRUE is returned. The caller must + * unpin the buffer when done with the tuple. * - * If not successful, tuple->t_data is set to NULL and *userbuf is set to - * InvalidBuffer. - * ---------------- + * If the tuple fails the time qual check, then FALSE will be returned. + * When the caller specifies keep_buf = true, we retain the pin on the + * buffer and return it in *userbuf (so the caller can still access the + * tuple); when keep_buf = false, the pin is released and *userbuf is set + * to InvalidBuffer. */ -void +bool heap_fetch(Relation relation, Snapshot snapshot, HeapTuple tuple, Buffer *userbuf, + bool keep_buf, PgStat_Info *pgstat_info) { + ItemPointer tid = &(tuple->t_self); ItemId lp; Buffer buffer; PageHeader dp; - ItemPointer tid = &(tuple->t_self); OffsetNumber offnum; + bool valid; /* * increment access statistics @@ -901,14 +908,16 @@ heap_fetch(Relation relation, * get the buffer from the relation descriptor. Note that this does a * buffer pin. */ - buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid)); if (!BufferIsValid(buffer)) - elog(ERROR, "heap_fetch: %s relation: ReadBuffer(%ld) failed", + elog(ERROR, "heap_fetch: ReadBuffer(%s, %lu) failed", RelationGetRelationName(relation), - (long) ItemPointerGetBlockNumber(tid)); + (unsigned long) ItemPointerGetBlockNumber(tid)); + /* + * Need share lock on buffer to examine tuple commit status. + */ LockBuffer(buffer, BUFFER_LOCK_SHARE); /* @@ -921,38 +930,34 @@ heap_fetch(Relation relation, /* * more sanity checks */ - if (!ItemIdIsUsed(lp)) { LockBuffer(buffer, BUFFER_LOCK_UNLOCK); ReleaseBuffer(buffer); - *userbuf = InvalidBuffer; - tuple->t_datamcxt = NULL; - tuple->t_data = NULL; - return; + + elog(ERROR, "heap_fetch: invalid tuple id (%s, %lu, %u)", + RelationGetRelationName(relation), + (unsigned long) ItemPointerGetBlockNumber(tid), + offnum); } + /* + * fill in *tuple fields + */ tuple->t_datamcxt = NULL; tuple->t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp); tuple->t_len = ItemIdGetLength(lp); tuple->t_tableOid = relation->rd_id; /* - * check time qualification of tid + * check time qualification of tuple, then release lock */ - HeapTupleSatisfies(tuple, relation, buffer, dp, - snapshot, 0, (ScanKey) NULL); + snapshot, 0, (ScanKey) NULL, valid); LockBuffer(buffer, BUFFER_LOCK_UNLOCK); - if (tuple->t_data == NULL) - { - /* Tuple failed time check, so we can release now. */ - ReleaseBuffer(buffer); - *userbuf = InvalidBuffer; - } - else + if (valid) { /* * All checks passed, so return the tuple as valid. Caller is now @@ -968,13 +973,28 @@ heap_fetch(Relation relation, pgstat_count_heap_fetch(pgstat_info); else pgstat_count_heap_fetch(&relation->pgstat_info); + + return true; } + + /* Tuple failed time qual, but maybe caller wants to see it anyway. */ + if (keep_buf) + { + *userbuf = buffer; + + return false; + } + + /* Okay to release pin on buffer. */ + ReleaseBuffer(buffer); + + *userbuf = InvalidBuffer; + + return false; } -/* ---------------- +/* * heap_get_latest_tid - get the latest tid of a specified tuple - * - * ---------------- */ ItemPointer heap_get_latest_tid(Relation relation, @@ -989,7 +1009,8 @@ heap_get_latest_tid(Relation relation, HeapTupleHeader t_data; ItemPointerData ctid; bool invalidBlock, - linkend; + linkend, + valid; /* * get the buffer from the relation descriptor Note that this does a @@ -1038,7 +1059,7 @@ heap_get_latest_tid(Relation relation, */ HeapTupleSatisfies(&tp, relation, buffer, dp, - snapshot, 0, (ScanKey) NULL); + snapshot, 0, (ScanKey) NULL, valid); linkend = true; if ((t_data->t_infomask & HEAP_XMIN_COMMITTED) != 0 && @@ -1048,7 +1069,7 @@ heap_get_latest_tid(Relation relation, LockBuffer(buffer, BUFFER_LOCK_UNLOCK); ReleaseBuffer(buffer); - if (tp.t_data == NULL) + if (!valid) { if (linkend) return NULL; diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c index 9ac1c69c8e..ab5e96f8cc 100644 --- a/src/backend/access/heap/tuptoaster.c +++ b/src/backend/access/heap/tuptoaster.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/heap/tuptoaster.c,v 1.30 2002/05/21 22:05:53 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/heap/tuptoaster.c,v 1.31 2002/05/24 18:57:55 tgl Exp $ * * * INTERFACE ROUTINES @@ -923,7 +923,7 @@ toast_save_datum(Relation rel, Datum value) */ idxres = index_insert(toastidx, t_values, t_nulls, &(toasttup->t_self), - toastrel); + toastrel, toastidx->rd_uniqueindex); if (idxres == NULL) elog(ERROR, "Failed to insert index entry for TOAST tuple"); diff --git a/src/backend/access/index/genam.c b/src/backend/access/index/genam.c index cc8c08c177..16d0beaffe 100644 --- a/src/backend/access/index/genam.c +++ b/src/backend/access/index/genam.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/index/genam.c,v 1.33 2002/05/20 23:51:41 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/index/genam.c,v 1.34 2002/05/24 18:57:55 tgl Exp $ * * NOTES * many of the old access method routines have been turned into @@ -89,6 +89,11 @@ RelationGetIndexScan(Relation indexRelation, else scan->keyData = NULL; + scan->kill_prior_tuple = false; + scan->ignore_killed_tuples = true; /* default setting */ + scan->keys_are_unique = false; /* may be set by amrescan */ + scan->got_tuple = false; + scan->opaque = NULL; ItemPointerSetInvalid(&scan->currentItemData); diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c index b616212bb5..8c0fbaa6a1 100644 --- a/src/backend/access/index/indexam.c +++ b/src/backend/access/index/indexam.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/index/indexam.c,v 1.58 2002/05/20 23:51:41 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/index/indexam.c,v 1.59 2002/05/24 18:57:55 tgl Exp $ * * INTERFACE ROUTINES * index_open - open an index relation by relation OID @@ -204,7 +204,8 @@ index_insert(Relation indexRelation, Datum *datums, char *nulls, ItemPointer heap_t_ctid, - Relation heapRelation) + Relation heapRelation, + bool check_uniqueness) { RegProcedure procedure; InsertIndexResult specificResult; @@ -216,12 +217,13 @@ index_insert(Relation indexRelation, * have the am's insert proc do all the work. */ specificResult = (InsertIndexResult) - DatumGetPointer(OidFunctionCall5(procedure, + DatumGetPointer(OidFunctionCall6(procedure, PointerGetDatum(indexRelation), PointerGetDatum(datums), PointerGetDatum(nulls), PointerGetDatum(heap_t_ctid), - PointerGetDatum(heapRelation))); + PointerGetDatum(heapRelation), + BoolGetDatum(check_uniqueness))); /* must be pfree'ed */ return specificResult; @@ -303,6 +305,10 @@ index_rescan(IndexScanDesc scan, ScanKey key) SCAN_CHECKS; GET_SCAN_PROCEDURE(rescan, amrescan); + scan->kill_prior_tuple = false; /* for safety */ + scan->keys_are_unique = false; /* may be set by amrescan */ + scan->got_tuple = false; + OidFunctionCall2(procedure, PointerGetDatum(scan), PointerGetDatum(key)); @@ -369,6 +375,9 @@ index_restrpos(IndexScanDesc scan) SCAN_CHECKS; GET_SCAN_PROCEDURE(restrpos, amrestrpos); + scan->kill_prior_tuple = false; /* for safety */ + scan->got_tuple = false; + OidFunctionCall1(procedure, PointerGetDatum(scan)); } @@ -385,7 +394,7 @@ index_restrpos(IndexScanDesc scan) HeapTuple index_getnext(IndexScanDesc scan, ScanDirection direction) { - bool found; + HeapTuple heapTuple = &scan->xs_ctup; SCAN_CHECKS; @@ -396,8 +405,21 @@ index_getnext(IndexScanDesc scan, ScanDirection direction) scan->xs_cbuf = InvalidBuffer; } + /* just make sure this is false... */ + scan->kill_prior_tuple = false; + + /* + * Can skip entering the index AM if we already got a tuple + * and it must be unique. + */ + if (scan->keys_are_unique && scan->got_tuple) + return NULL; + for (;;) { + bool found; + uint16 sv_infomask; + pgstat_count_index_scan(&scan->xs_pgstat_info); /* @@ -407,32 +429,62 @@ index_getnext(IndexScanDesc scan, ScanDirection direction) found = DatumGetBool(FunctionCall2(&scan->fn_getnext, PointerGetDatum(scan), Int32GetDatum(direction))); + + /* Reset kill flag immediately for safety */ + scan->kill_prior_tuple = false; + if (!found) return NULL; /* failure exit */ + /* * Fetch the heap tuple and see if it matches the snapshot. */ - heap_fetch(scan->heapRelation, scan->xs_snapshot, - &scan->xs_ctup, &scan->xs_cbuf, - &scan->xs_pgstat_info); - if (scan->xs_ctup.t_data != NULL) + if (heap_fetch(scan->heapRelation, scan->xs_snapshot, + heapTuple, &scan->xs_cbuf, true, + &scan->xs_pgstat_info)) break; + /* - * XXX here, consider whether we can kill the index tuple. + * If we can't see it, maybe no one else can either. Check to see + * if the tuple is dead to all transactions. If so, signal the + * index AM to not return it on future indexscans. + * + * We told heap_fetch to keep a pin on the buffer, so we can + * re-access the tuple here. But we must re-lock the buffer first. + * Also, it's just barely possible for an update of hint bits to + * occur here. */ + LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE); + sv_infomask = heapTuple->t_data->t_infomask; + + if (HeapTupleSatisfiesVacuum(heapTuple->t_data, RecentGlobalXmin) == + HEAPTUPLE_DEAD) + scan->kill_prior_tuple = true; + + if (sv_infomask != heapTuple->t_data->t_infomask) + SetBufferCommitInfoNeedsSave(scan->xs_cbuf); + LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK); + ReleaseBuffer(scan->xs_cbuf); + scan->xs_cbuf = InvalidBuffer; } /* Success exit */ + scan->got_tuple = true; + pgstat_count_index_getnext(&scan->xs_pgstat_info); - return &scan->xs_ctup; + return heapTuple; } /* ---------------- * index_getnext_indexitem - get the next index tuple from a scan * - * Finds the next index tuple satisfying the scan keys. Note that no - * time qual (snapshot) check is done; indeed the heap tuple is not accessed. + * Finds the next index tuple satisfying the scan keys. Note that the + * corresponding heap tuple is not accessed, and thus no time qual (snapshot) + * check is done, other than the index AM's internal check for killed tuples + * (which most callers of this routine will probably want to suppress by + * setting scan->ignore_killed_tuples = false). + * * On success (TRUE return), the found index TID is in scan->currentItemData, * and its heap TID is in scan->xs_ctup.t_self. scan->xs_cbuf is untouched. * ---------------- @@ -445,6 +497,9 @@ index_getnext_indexitem(IndexScanDesc scan, SCAN_CHECKS; + /* just make sure this is false... */ + scan->kill_prior_tuple = false; + /* * have the am's gettuple proc do all the work. index_beginscan * already set up fn_getnext. diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c index 3a94297188..fe3e98b982 100644 --- a/src/backend/access/nbtree/nbtinsert.c +++ b/src/backend/access/nbtree/nbtinsert.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.90 2002/03/06 06:09:17 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.91 2002/05/24 18:57:55 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -176,7 +176,6 @@ _bt_check_unique(Relation rel, BTItem btitem, Relation heapRel, Page page; BTPageOpaque opaque; Buffer nbuf = InvalidBuffer; - bool chtup = true; page = BufferGetPage(buf); opaque = (BTPageOpaque) PageGetSpecialPointer(page); @@ -194,70 +193,85 @@ _bt_check_unique(Relation rel, BTItem btitem, Relation heapRel, for (;;) { HeapTupleData htup; - Buffer buffer; + Buffer hbuffer; + ItemId curitemid; BTItem cbti; BlockNumber nblkno; /* - * _bt_compare returns 0 for (1,NULL) and (1,NULL) - this's how we - * handling NULLs - and so we must not use _bt_compare in real - * comparison, but only for ordering/finding items on pages. - - * vadim 03/24/97 - * * make sure the offset points to an actual key before trying to * compare it... */ if (offset <= maxoff) { + /* + * _bt_compare returns 0 for (1,NULL) and (1,NULL) - this's how we + * handling NULLs - and so we must not use _bt_compare in real + * comparison, but only for ordering/finding items on pages. - + * vadim 03/24/97 + */ if (!_bt_isequal(itupdesc, page, offset, natts, itup_scankey)) break; /* we're past all the equal tuples */ + curitemid = PageGetItemId(page, offset); /* - * Have to check is inserted heap tuple deleted one (i.e. just - * moved to another place by vacuum)! We only need to do this - * once, but don't want to do it at all unless we see equal - * tuples, so as not to slow down unequal case. + * We can skip the heap fetch if the item is marked killed. */ - if (chtup) + if (!ItemIdDeleted(curitemid)) { - htup.t_self = btitem->bti_itup.t_tid; - heap_fetch(heapRel, SnapshotDirty, &htup, &buffer, NULL); - if (htup.t_data == NULL) /* YES! */ - break; - /* Live tuple is being inserted, so continue checking */ - ReleaseBuffer(buffer); - chtup = false; - } - - cbti = (BTItem) PageGetItem(page, PageGetItemId(page, offset)); - htup.t_self = cbti->bti_itup.t_tid; - heap_fetch(heapRel, SnapshotDirty, &htup, &buffer, NULL); - if (htup.t_data != NULL) /* it is a duplicate */ - { - TransactionId xwait = - (TransactionIdIsValid(SnapshotDirty->xmin)) ? - SnapshotDirty->xmin : SnapshotDirty->xmax; - - /* - * If this tuple is being updated by other transaction - * then we have to wait for its commit/abort. - */ - ReleaseBuffer(buffer); - if (TransactionIdIsValid(xwait)) + cbti = (BTItem) PageGetItem(page, curitemid); + htup.t_self = cbti->bti_itup.t_tid; + if (heap_fetch(heapRel, SnapshotDirty, &htup, &hbuffer, + true, NULL)) { - if (nbuf != InvalidBuffer) - _bt_relbuf(rel, nbuf); - /* Tell _bt_doinsert to wait... */ - return xwait; - } + /* it is a duplicate */ + TransactionId xwait = + (TransactionIdIsValid(SnapshotDirty->xmin)) ? + SnapshotDirty->xmin : SnapshotDirty->xmax; - /* - * Otherwise we have a definite conflict. - */ - elog(ERROR, "Cannot insert a duplicate key into unique index %s", - RelationGetRelationName(rel)); + ReleaseBuffer(hbuffer); + /* + * If this tuple is being updated by other transaction + * then we have to wait for its commit/abort. + */ + if (TransactionIdIsValid(xwait)) + { + if (nbuf != InvalidBuffer) + _bt_relbuf(rel, nbuf); + /* Tell _bt_doinsert to wait... */ + return xwait; + } + + /* + * Otherwise we have a definite conflict. + */ + elog(ERROR, "Cannot insert a duplicate key into unique index %s", + RelationGetRelationName(rel)); + } + else + { + /* + * Hmm, if we can't see the tuple, maybe it can be + * marked killed. This logic should match index_getnext + * and btgettuple. + */ + uint16 sv_infomask; + + LockBuffer(hbuffer, BUFFER_LOCK_SHARE); + sv_infomask = htup.t_data->t_infomask; + if (HeapTupleSatisfiesVacuum(htup.t_data, + RecentGlobalXmin) == + HEAPTUPLE_DEAD) + { + curitemid->lp_flags |= LP_DELETE; + SetBufferCommitInfoNeedsSave(buf); + } + if (sv_infomask != htup.t_data->t_infomask) + SetBufferCommitInfoNeedsSave(hbuffer); + LockBuffer(hbuffer, BUFFER_LOCK_UNLOCK); + ReleaseBuffer(hbuffer); + } } - /* htup null so no buffer to release */ } /* diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index 206a1f17a3..e4163da987 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -12,7 +12,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.89 2002/05/20 23:51:41 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.90 2002/05/24 18:57:55 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -271,6 +271,7 @@ btinsert(PG_FUNCTION_ARGS) char *nulls = (char *) PG_GETARG_POINTER(2); ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3); Relation heapRel = (Relation) PG_GETARG_POINTER(4); + bool checkUnique = PG_GETARG_BOOL(5); InsertIndexResult res; BTItem btitem; IndexTuple itup; @@ -280,7 +281,7 @@ btinsert(PG_FUNCTION_ARGS) itup->t_tid = *ht_ctid; btitem = _bt_formitem(itup); - res = _bt_doinsert(rel, btitem, rel->rd_uniqueindex, heapRel); + res = _bt_doinsert(rel, btitem, checkUnique, heapRel); pfree(btitem); pfree(itup); @@ -296,14 +297,16 @@ btgettuple(PG_FUNCTION_ARGS) { IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); ScanDirection dir = (ScanDirection) PG_GETARG_INT32(1); - bool res; + BTScanOpaque so = (BTScanOpaque) scan->opaque; + Page page; + OffsetNumber offnum; + bool res; /* * If we've already initialized this scan, we can just advance it in * the appropriate direction. If we haven't done so yet, we call a * routine to get the first item in the scan. */ - if (ItemPointerIsValid(&(scan->currentItemData))) { /* @@ -312,11 +315,47 @@ btgettuple(PG_FUNCTION_ARGS) * buffer, too. */ _bt_restscan(scan); + /* + * Check to see if we should kill the previously-fetched tuple. + */ + if (scan->kill_prior_tuple) + { + /* + * Yes, so mark it by setting the LP_DELETE bit in the item flags. + */ + offnum = ItemPointerGetOffsetNumber(&(scan->currentItemData)); + page = BufferGetPage(so->btso_curbuf); + PageGetItemId(page, offnum)->lp_flags |= LP_DELETE; + /* + * Since this can be redone later if needed, it's treated the + * same as a commit-hint-bit status update for heap tuples: + * we mark the buffer dirty but don't make a WAL log entry. + */ + SetBufferCommitInfoNeedsSave(so->btso_curbuf); + } + /* + * Now continue the scan. + */ res = _bt_next(scan, dir); } else res = _bt_first(scan, dir); + /* + * Skip killed tuples if asked to. + */ + if (scan->ignore_killed_tuples) + { + while (res) + { + offnum = ItemPointerGetOffsetNumber(&(scan->currentItemData)); + page = BufferGetPage(so->btso_curbuf); + if (!ItemIdDeleted(PageGetItemId(page, offnum))) + break; + res = _bt_next(scan, dir); + } + } + /* * Save heap TID to use it in _bt_restscan. Then release the read * lock on the buffer so that we aren't blocking other backends. diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c index 2cfb8c8f00..33d5dd9cbe 100644 --- a/src/backend/access/nbtree/nbtsearch.c +++ b/src/backend/access/nbtree/nbtsearch.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.70 2002/05/20 23:51:41 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.71 2002/05/24 18:57:55 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -425,7 +425,8 @@ _bt_next(IndexScanDesc scan, ScanDirection dir) bool _bt_first(IndexScanDesc scan, ScanDirection dir) { - Relation rel; + Relation rel = scan->indexRelation; + BTScanOpaque so = (BTScanOpaque) scan->opaque; Buffer buf; Page page; BTStack stack; @@ -437,7 +438,6 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) StrategyNumber strat; bool res; int32 result; - BTScanOpaque so; bool scanFromEnd; bool continuescan; ScanKey scankeys = NULL; @@ -447,14 +447,11 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) j; StrategyNumber strat_total; - rel = scan->indexRelation; - so = (BTScanOpaque) scan->opaque; - /* * Order the scan keys in our canonical fashion and eliminate any * redundant keys. */ - _bt_orderkeys(rel, so); + _bt_orderkeys(scan); /* * Quit now if _bt_orderkeys() discovered that the scan keys can never diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c index 2f596eac11..c3462a75bc 100644 --- a/src/backend/access/nbtree/nbtutils.c +++ b/src/backend/access/nbtree/nbtutils.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtutils.c,v 1.48 2002/05/20 23:51:41 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtutils.c,v 1.49 2002/05/24 18:57:55 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -22,6 +22,9 @@ #include "executor/execdebug.h" +static int _bt_getstrategynumber(RegProcedure sk_procedure, StrategyMap map); + + /* * _bt_mkscankey * Build a scan key that contains comparison data from itup @@ -174,6 +177,11 @@ _bt_formitem(IndexTuple itup) * attribute, which can be seen to be correct by considering the above * example. * + * Furthermore, we detect the case where the index is unique and we have + * equality quals for all columns. In this case there can be at most one + * (visible) matching tuple. index_getnext uses this to avoid uselessly + * continuing the scan after finding one match. + * * The initial ordering of the keys is expected to be by attribute already * (see group_clauses_by_indexkey() in indxpath.c). The task here is to * standardize the appearance of multiple keys for the same attribute. @@ -191,8 +199,10 @@ _bt_formitem(IndexTuple itup) *---------- */ void -_bt_orderkeys(Relation relation, BTScanOpaque so) +_bt_orderkeys(IndexScanDesc scan) { + Relation relation = scan->indexRelation; + BTScanOpaque so = (BTScanOpaque) scan->opaque; ScanKeyData xform[BTMaxStrategyNumber]; bool init[BTMaxStrategyNumber]; int numberOfKeys = so->numberOfKeys; @@ -208,6 +218,7 @@ _bt_orderkeys(Relation relation, BTScanOpaque so) so->qual_ok = true; so->numberOfRequiredKeys = 0; + scan->keys_are_unique = false; if (numberOfKeys < 1) return; /* done if qual-less scan */ @@ -228,6 +239,17 @@ _bt_orderkeys(Relation relation, BTScanOpaque so) */ if (cur->sk_flags & SK_ISNULL) so->qual_ok = false; + else if (relation->rd_index->indisunique && + relation->rd_rel->relnatts == 1) + { + /* it's a unique index, do we have an equality qual? */ + map = IndexStrategyGetStrategyMap(RelationGetIndexStrategy(relation), + BTMaxStrategyNumber, + 1); + j = _bt_getstrategynumber(cur->sk_procedure, map); + if (j == (BTEqualStrategyNumber - 1)) + scan->keys_are_unique = true; + } so->numberOfRequiredKeys = 1; return; } @@ -390,17 +412,8 @@ _bt_orderkeys(Relation relation, BTScanOpaque so) MemSet(init, 0, sizeof(init)); } - /* - * OK, figure out which strategy this key corresponds to - */ - for (j = BTMaxStrategyNumber; --j >= 0;) - { - if (cur->sk_procedure == map->entry[j].sk_procedure) - break; - } - if (j < 0) - elog(ERROR, "_bt_orderkeys: unable to identify operator %u", - cur->sk_procedure); + /* figure out which strategy this key's operator corresponds to */ + j = _bt_getstrategynumber(cur->sk_procedure, map); /* have we seen one of these before? */ if (init[j]) @@ -424,6 +437,34 @@ _bt_orderkeys(Relation relation, BTScanOpaque so) } so->numberOfKeys = new_numberOfKeys; + + /* + * If unique index and we have equality keys for all columns, + * set keys_are_unique flag for higher levels. + */ + if (allEqualSoFar && relation->rd_index->indisunique && + relation->rd_rel->relnatts == new_numberOfKeys) + scan->keys_are_unique = true; +} + +/* + * Determine which btree strategy an operator procedure matches. + * + * Result is strategy number minus 1. + */ +static int +_bt_getstrategynumber(RegProcedure sk_procedure, StrategyMap map) +{ + int j; + + for (j = BTMaxStrategyNumber; --j >= 0;) + { + if (sk_procedure == map->entry[j].sk_procedure) + return j; + } + elog(ERROR, "_bt_getstrategynumber: unable to identify operator %u", + sk_procedure); + return -1; /* keep compiler quiet */ } /* diff --git a/src/backend/access/rtree/rtree.c b/src/backend/access/rtree/rtree.c index a72f774760..006777b931 100644 --- a/src/backend/access/rtree/rtree.c +++ b/src/backend/access/rtree/rtree.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtree.c,v 1.71 2002/05/20 23:51:41 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtree.c,v 1.72 2002/05/24 18:57:55 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -223,9 +223,9 @@ rtinsert(PG_FUNCTION_ARGS) Datum *datum = (Datum *) PG_GETARG_POINTER(1); char *nulls = (char *) PG_GETARG_POINTER(2); ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3); - #ifdef NOT_USED Relation heapRel = (Relation) PG_GETARG_POINTER(4); + bool checkUnique = PG_GETARG_BOOL(5); #endif InsertIndexResult res; IndexTuple itup; @@ -1206,6 +1206,8 @@ rtbulkdelete(PG_FUNCTION_ARGS) /* walk through the entire index */ iscan = index_beginscan(NULL, rel, SnapshotAny, 0, (ScanKey) NULL); + /* including killed tuples */ + iscan->ignore_killed_tuples = false; while (index_getnext_indexitem(iscan, ForwardScanDirection)) { diff --git a/src/backend/catalog/indexing.c b/src/backend/catalog/indexing.c index cc26af128a..1b8800d193 100644 --- a/src/backend/catalog/indexing.c +++ b/src/backend/catalog/indexing.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/catalog/indexing.c,v 1.92 2002/04/19 16:36:08 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/catalog/indexing.c,v 1.93 2002/05/24 18:57:55 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -160,7 +160,8 @@ CatalogIndexInsert(Relation *idescs, nullv); indexRes = index_insert(idescs[i], datum, nullv, - &heapTuple->t_self, heapRelation); + &heapTuple->t_self, heapRelation, + idescs[i]->rd_uniqueindex); if (indexRes) pfree(indexRes); pfree(indexInfo); diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index 3f5cc96f41..5785139a51 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/commands/analyze.c,v 1.34 2002/05/21 22:05:54 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/commands/analyze.c,v 1.35 2002/05/24 18:57:55 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -655,8 +655,8 @@ pageloop:; goto pageloop; } ItemPointerSet(&targtuple.t_self, targblock, targoffset); - heap_fetch(onerel, SnapshotNow, &targtuple, &tupbuffer, NULL); - if (targtuple.t_data != NULL) + if (heap_fetch(onerel, SnapshotNow, &targtuple, &tupbuffer, + false, NULL)) { /* * Found a suitable tuple, so save it, replacing one old diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index e0b01b6fee..2cb5f2e457 100644 --- a/src/backend/commands/trigger.c +++ b/src/backend/commands/trigger.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/commands/trigger.c,v 1.118 2002/05/21 22:05:54 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/commands/trigger.c,v 1.119 2002/05/24 18:57:55 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1421,16 +1421,14 @@ DeferredTriggerExecute(DeferredTriggerEvent event, int itemno, if (ItemPointerIsValid(&(event->dte_oldctid))) { ItemPointerCopy(&(event->dte_oldctid), &(oldtuple.t_self)); - heap_fetch(rel, SnapshotAny, &oldtuple, &oldbuffer, NULL); - if (!oldtuple.t_data) + if (!heap_fetch(rel, SnapshotAny, &oldtuple, &oldbuffer, false, NULL)) elog(ERROR, "DeferredTriggerExecute: failed to fetch old tuple"); } if (ItemPointerIsValid(&(event->dte_newctid))) { ItemPointerCopy(&(event->dte_newctid), &(newtuple.t_self)); - heap_fetch(rel, SnapshotAny, &newtuple, &newbuffer, NULL); - if (!newtuple.t_data) + if (!heap_fetch(rel, SnapshotAny, &newtuple, &newbuffer, false, NULL)) elog(ERROR, "DeferredTriggerExecute: failed to fetch new tuple"); } diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 4889e30040..953ff39714 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -13,7 +13,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.225 2002/05/20 23:51:42 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.226 2002/05/24 18:57:56 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -458,7 +458,9 @@ vac_update_relstats(Oid relid, BlockNumber num_pages, double num_tuples, /* get the buffer cache tuple */ rtup.t_self = ctup->t_self; ReleaseSysCache(ctup); - heap_fetch(rd, SnapshotNow, &rtup, &buffer, NULL); + if (!heap_fetch(rd, SnapshotNow, &rtup, &buffer, false, NULL)) + elog(ERROR, "pg_class entry for relid %u vanished during vacuuming", + relid); /* overwrite the existing statistics in the tuple */ pgcform = (Form_pg_class) GETSTRUCT(&rtup); diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index 29687a54ba..fa276aaeec 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -27,7 +27,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/executor/execMain.c,v 1.163 2002/05/21 22:59:01 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/executor/execMain.c,v 1.164 2002/05/24 18:57:56 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1489,7 +1489,7 @@ lreplace:; numIndices = resultRelInfo->ri_NumIndices; if (numIndices > 0) - ExecInsertIndexTuples(slot, &(tuple->t_self), estate, true); + ExecInsertIndexTuples(slot, &(tuple->t_self), estate, false); /* AFTER ROW UPDATE Triggers */ if (resultRelInfo->ri_TrigDesc) @@ -1639,8 +1639,7 @@ EvalPlanQual(EState *estate, Index rti, ItemPointer tid) { Buffer buffer; - heap_fetch(relation, SnapshotDirty, &tuple, &buffer, NULL); - if (tuple.t_data != NULL) + if (heap_fetch(relation, SnapshotDirty, &tuple, &buffer, false, NULL)) { TransactionId xwait = SnapshotDirty->xmax; diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c index a6b5048326..32cd88ed07 100644 --- a/src/backend/executor/execUtils.c +++ b/src/backend/executor/execUtils.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/executor/execUtils.c,v 1.81 2002/05/12 20:10:02 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/executor/execUtils.c,v 1.82 2002/05/24 18:57:56 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -599,7 +599,7 @@ void ExecInsertIndexTuples(TupleTableSlot *slot, ItemPointer tupleid, EState *estate, - bool is_update) + bool is_vacuum) { HeapTuple heapTuple; ResultRelInfo *resultRelInfo; @@ -667,11 +667,17 @@ ExecInsertIndexTuples(TupleTableSlot *slot, datum, nullv); + /* + * The index AM does the rest. Note we suppress unique-index + * checks if we are being called from VACUUM, since VACUUM may + * need to move dead tuples that have the same keys as live ones. + */ result = index_insert(relationDescs[i], /* index relation */ datum, /* array of heaptuple Datums */ nullv, /* info on nulls */ &(heapTuple->t_self), /* tid of heap tuple */ - heapRelation); + heapRelation, + relationDescs[i]->rd_uniqueindex && !is_vacuum); /* * keep track of index inserts for debugging diff --git a/src/backend/executor/nodeTidscan.c b/src/backend/executor/nodeTidscan.c index 5ee222744d..46e9daed6c 100644 --- a/src/backend/executor/nodeTidscan.c +++ b/src/backend/executor/nodeTidscan.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/executor/nodeTidscan.c,v 1.23 2002/02/19 20:11:14 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/executor/nodeTidscan.c,v 1.24 2002/05/24 18:57:56 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -150,11 +150,8 @@ TidNext(TidScan *node) { bool slot_is_valid = false; - tuple->t_datamcxt = NULL; - tuple->t_data = NULL; tuple->t_self = tidList[tidstate->tss_TidPtr]; - heap_fetch(heapRelation, snapshot, tuple, &buffer, NULL); - if (tuple->t_data != NULL) + if (heap_fetch(heapRelation, snapshot, tuple, &buffer, false, NULL)) { bool prev_matches = false; int prev_tid; @@ -198,8 +195,6 @@ TidNext(TidScan *node) else ExecClearTuple(slot); } - else if (BufferIsValid(buffer)) - ReleaseBuffer(buffer); tidNumber++; if (bBackward) tidstate->tss_TidPtr--; diff --git a/src/backend/storage/ipc/sinval.c b/src/backend/storage/ipc/sinval.c index a32c8ae703..6415271bea 100644 --- a/src/backend/storage/ipc/sinval.c +++ b/src/backend/storage/ipc/sinval.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/ipc/sinval.c,v 1.46 2002/05/21 22:05:55 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/ipc/sinval.c,v 1.47 2002/05/24 18:57:56 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -297,6 +297,10 @@ GetOldestXmin(bool allDbs) * it is considered running or not. * This ensures that the set of transactions seen as "running" by the * current xact will not change after it takes the snapshot. + * + * Also, we compute the current global xmin (oldest xmin across all running + * transactions) and save it in RecentGlobalXmin. This is the same + * computation done by GetOldestXmin(TRUE). *---------- */ Snapshot @@ -305,6 +309,9 @@ GetSnapshotData(bool serializable) Snapshot snapshot = (Snapshot) malloc(sizeof(SnapshotData)); SISeg *segP = shmInvalBuffer; ProcState *stateP = segP->procState; + TransactionId xmin; + TransactionId xmax; + TransactionId globalxmin; int index; int count = 0; @@ -321,7 +328,7 @@ GetSnapshotData(bool serializable) if (snapshot->xip == NULL) elog(ERROR, "Memory exhausted in GetSnapshotData"); - snapshot->xmin = GetCurrentTransactionId(); + globalxmin = xmin = GetCurrentTransactionId(); /* * If we are going to set MyProc->xmin then we'd better get exclusive @@ -356,7 +363,7 @@ GetSnapshotData(bool serializable) *-------------------- */ - snapshot->xmax = ReadNewTransactionId(); + xmax = ReadNewTransactionId(); for (index = 0; index < segP->lastBackend; index++) { @@ -374,28 +381,48 @@ GetSnapshotData(bool serializable) * running a transaction, and xacts started since we read the * next transaction ID. There's no need to store XIDs above * what we got from ReadNewTransactionId, since we'll treat - * them as running anyway. + * them as running anyway. We also assume that such xacts can't + * compute an xmin older than ours, so they needn't be considered + * in computing globalxmin. */ if (proc == MyProc || !TransactionIdIsNormal(xid) || - TransactionIdFollowsOrEquals(xid, snapshot->xmax)) + TransactionIdFollowsOrEquals(xid, xmax)) continue; - if (TransactionIdPrecedes(xid, snapshot->xmin)) - snapshot->xmin = xid; + if (TransactionIdPrecedes(xid, xmin)) + xmin = xid; snapshot->xip[count] = xid; count++; + + /* Update globalxmin to be the smallest valid xmin */ + xid = proc->xmin; + if (TransactionIdIsNormal(xid)) + if (TransactionIdPrecedes(xid, globalxmin)) + globalxmin = xid; } } if (serializable) - MyProc->xmin = snapshot->xmin; + MyProc->xmin = xmin; LWLockRelease(SInvalLock); /* Serializable snapshot must be computed before any other... */ Assert(TransactionIdIsValid(MyProc->xmin)); + /* + * Update globalxmin to include actual process xids. This is a slightly + * different way of computing it than GetOldestXmin uses, but should give + * the same result. + */ + if (TransactionIdPrecedes(xmin, globalxmin)) + globalxmin = xmin; + + RecentGlobalXmin = globalxmin; + + snapshot->xmin = xmin; + snapshot->xmax = xmax; snapshot->xcnt = count; snapshot->curcid = GetCurrentCommandId(); diff --git a/src/backend/utils/time/tqual.c b/src/backend/utils/time/tqual.c index 582ec28183..9f51304e68 100644 --- a/src/backend/utils/time/tqual.c +++ b/src/backend/utils/time/tqual.c @@ -16,7 +16,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/utils/time/tqual.c,v 1.52 2002/05/21 22:59:01 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/time/tqual.c,v 1.53 2002/05/24 18:57:56 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -33,6 +33,9 @@ Snapshot SnapshotDirty = &SnapshotDirtyData; Snapshot QuerySnapshot = NULL; Snapshot SerializableSnapshot = NULL; +/* This is updated by GetSnapshotData: */ +TransactionId RecentGlobalXmin = InvalidTransactionId; + bool ReferentialIntegritySnapshotOverride = false; diff --git a/src/include/access/genam.h b/src/include/access/genam.h index 610afa11f0..dd344043b3 100644 --- a/src/include/access/genam.h +++ b/src/include/access/genam.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: genam.h,v 1.34 2002/05/20 23:51:43 tgl Exp $ + * $Id: genam.h,v 1.35 2002/05/24 18:57:56 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -54,7 +54,8 @@ extern void index_close(Relation relation); extern InsertIndexResult index_insert(Relation indexRelation, Datum *datums, char *nulls, ItemPointer heap_t_ctid, - Relation heapRelation); + Relation heapRelation, + bool check_uniqueness); extern IndexScanDesc index_beginscan(Relation heapRelation, Relation indexRelation, diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index 7bf75f512d..73ed6e7f15 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: heapam.h,v 1.75 2002/05/21 22:05:55 tgl Exp $ + * $Id: heapam.h,v 1.76 2002/05/24 18:57:56 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -152,8 +152,8 @@ extern void heap_rescan(HeapScanDesc scan, ScanKey key); extern void heap_endscan(HeapScanDesc scan); extern HeapTuple heap_getnext(HeapScanDesc scan, ScanDirection direction); -extern void heap_fetch(Relation relation, Snapshot snapshot, - HeapTuple tuple, Buffer *userbuf, +extern bool heap_fetch(Relation relation, Snapshot snapshot, + HeapTuple tuple, Buffer *userbuf, bool keep_buf, PgStat_Info *pgstat_info); extern ItemPointer heap_get_latest_tid(Relation relation, Snapshot snapshot, diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h index c29defba8f..bef621dd68 100644 --- a/src/include/access/nbtree.h +++ b/src/include/access/nbtree.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: nbtree.h,v 1.60 2002/05/20 23:51:43 tgl Exp $ + * $Id: nbtree.h,v 1.61 2002/05/24 18:57:56 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -383,7 +383,7 @@ extern ScanKey _bt_mkscankey(Relation rel, IndexTuple itup); extern ScanKey _bt_mkscankey_nodata(Relation rel); extern void _bt_freeskey(ScanKey skey); extern void _bt_freestack(BTStack stack); -extern void _bt_orderkeys(Relation relation, BTScanOpaque so); +extern void _bt_orderkeys(IndexScanDesc scan); extern bool _bt_checkkeys(IndexScanDesc scan, IndexTuple tuple, ScanDirection dir, bool *continuescan); extern BTItem _bt_formitem(IndexTuple itup); diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h index 87e3b52369..e23681876d 100644 --- a/src/include/access/relscan.h +++ b/src/include/access/relscan.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: relscan.h,v 1.26 2002/05/20 23:51:43 tgl Exp $ + * $Id: relscan.h,v 1.27 2002/05/24 18:57:56 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -46,7 +46,15 @@ typedef struct IndexScanDescData int numberOfKeys; /* number of scan keys */ ScanKey keyData; /* array of scan key descriptors */ + /* signaling to index AM about killing index tuples */ + bool kill_prior_tuple; /* last-returned tuple is dead */ + bool ignore_killed_tuples; /* do not return killed entries */ + + /* set by index AM if scan keys satisfy index's uniqueness constraint */ + bool keys_are_unique; + /* scan current state */ + bool got_tuple; /* true after successful index_getnext */ void *opaque; /* access-method-specific info */ ItemPointerData currentItemData; /* current index pointer */ ItemPointerData currentMarkData; /* marked position, if any */ diff --git a/src/include/access/valid.h b/src/include/access/valid.h index b76e5111c2..d16c0f4e2c 100644 --- a/src/include/access/valid.h +++ b/src/include/access/valid.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: valid.h,v 1.26 2001/11/05 17:46:31 momjian Exp $ + * $Id: valid.h,v 1.27 2002/05/24 18:57:56 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -77,14 +77,9 @@ do \ /* ---------------- * HeapTupleSatisfies * - * Returns a valid HeapTuple if it satisfies the timequal and keytest. - * Returns NULL otherwise. Used to be heap_satisifies (sic) which - * returned a boolean. It now returns a tuple so that we can avoid doing two - * PageGetItem's per tuple. - * - * Complete check of validity including LP_CTUP and keytest. - * This should perhaps be combined with valid somehow in the - * future. (Also, additional rule tests/time range tests.) + * res is set TRUE if the HeapTuple satisfies the timequal and keytest, + * otherwise it is set FALSE. Note that the hint bits in the HeapTuple's + * t_infomask may be updated as a side effect. * * on 8/21/92 mao says: i rearranged the tests here to do keytest before * SatisfiesTimeQual. profiling indicated that even for vacuumed relations, @@ -100,35 +95,28 @@ do \ disk_page, \ seeself, \ nKeys, \ - key) \ + key, \ + res) \ do \ { \ /* We use underscores to protect the variable passed in as parameters */ \ - bool _res; \ - \ if ((key) != NULL) \ HeapKeyTest(tuple, RelationGetDescr(relation), \ - (nKeys), (key), _res); \ + (nKeys), (key), (res)); \ else \ - _res = TRUE; \ + (res) = true; \ \ - if (_res) \ + if (res) \ { \ if ((relation)->rd_rel->relkind != RELKIND_UNCATALOGED) \ { \ uint16 _infomask = (tuple)->t_data->t_infomask; \ \ - _res = HeapTupleSatisfiesVisibility((tuple), (seeself)); \ + (res) = HeapTupleSatisfiesVisibility((tuple), (seeself)); \ if ((tuple)->t_data->t_infomask != _infomask) \ SetBufferCommitInfoNeedsSave(buffer); \ - if (!_res) \ - (tuple)->t_data = NULL; \ } \ } \ - else \ - { \ - (tuple)->t_data = NULL; \ - } \ } while (0) #endif /* VALID_H */ diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index 2cc1f8aac1..a7d56dc90d 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: pg_proc.h,v 1.239 2002/05/22 17:21:01 petere Exp $ + * $Id: pg_proc.h,v 1.240 2002/05/24 18:57:56 tgl Exp $ * * NOTES * The script catalog/genbki.sh reads this file and generates .bki @@ -675,7 +675,7 @@ DESCR("convert int4 to float4"); DATA(insert OID = 319 ( int4 PGNSP PGUID 12 f f f t f i 1 23 "700" 100 0 0 100 ftoi4 - _null_ )); DESCR("convert float4 to int4"); -DATA(insert OID = 320 ( rtinsert PGNSP PGUID 12 f f f t f v 5 23 "0 0 0 0 0" 100 0 0 100 rtinsert - _null_ )); +DATA(insert OID = 320 ( rtinsert PGNSP PGUID 12 f f f t f v 6 23 "0 0 0 0 0 0" 100 0 0 100 rtinsert - _null_ )); DESCR("r-tree(internal)"); DATA(insert OID = 322 ( rtgettuple PGNSP PGUID 12 f f f t f v 2 23 "0 0" 100 0 0 100 rtgettuple - _null_ )); DESCR("r-tree(internal)"); @@ -698,7 +698,7 @@ DESCR("r-tree(internal)"); DATA(insert OID = 330 ( btgettuple PGNSP PGUID 12 f f f t f v 2 23 "0 0" 100 0 0 100 btgettuple - _null_ )); DESCR("btree(internal)"); -DATA(insert OID = 331 ( btinsert PGNSP PGUID 12 f f f t f v 5 23 "0 0 0 0 0" 100 0 0 100 btinsert - _null_ )); +DATA(insert OID = 331 ( btinsert PGNSP PGUID 12 f f f t f v 6 23 "0 0 0 0 0 0" 100 0 0 100 btinsert - _null_ )); DESCR("btree(internal)"); DATA(insert OID = 333 ( btbeginscan PGNSP PGUID 12 f f f t f v 3 23 "0 0 0" 100 0 0 100 btbeginscan - _null_ )); DESCR("btree(internal)"); @@ -801,7 +801,7 @@ DESCR("convert char() to name"); DATA(insert OID = 440 ( hashgettuple PGNSP PGUID 12 f f f t f v 2 23 "0 0" 100 0 0 100 hashgettuple - _null_ )); DESCR("hash(internal)"); -DATA(insert OID = 441 ( hashinsert PGNSP PGUID 12 f f f t f v 5 23 "0 0 0 0 0" 100 0 0 100 hashinsert - _null_ )); +DATA(insert OID = 441 ( hashinsert PGNSP PGUID 12 f f f t f v 6 23 "0 0 0 0 0 0" 100 0 0 100 hashinsert - _null_ )); DESCR("hash(internal)"); DATA(insert OID = 443 ( hashbeginscan PGNSP PGUID 12 f f f t f v 3 23 "0 0 0" 100 0 0 100 hashbeginscan - _null_ )); DESCR("hash(internal)"); @@ -1031,7 +1031,7 @@ DESCR("smaller of two"); DATA(insert OID = 774 ( gistgettuple PGNSP PGUID 12 f f f t f v 2 23 "0 0" 100 0 0 100 gistgettuple - _null_ )); DESCR("gist(internal)"); -DATA(insert OID = 775 ( gistinsert PGNSP PGUID 12 f f f t f v 5 23 "0 0 0 0 0" 100 0 0 100 gistinsert - _null_ )); +DATA(insert OID = 775 ( gistinsert PGNSP PGUID 12 f f f t f v 6 23 "0 0 0 0 0 0" 100 0 0 100 gistinsert - _null_ )); DESCR("gist(internal)"); DATA(insert OID = 777 ( gistbeginscan PGNSP PGUID 12 f f f t f v 3 23 "0 0 0" 100 0 0 100 gistbeginscan - _null_ )); DESCR("gist(internal)"); diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index 6752d72ca7..293550e568 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: executor.h,v 1.64 2002/05/12 20:10:04 tgl Exp $ + * $Id: executor.h,v 1.65 2002/05/24 18:57:57 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -170,7 +170,7 @@ extern ExprContext *MakePerTupleExprContext(EState *estate); extern void ExecOpenIndices(ResultRelInfo *resultRelInfo); extern void ExecCloseIndices(ResultRelInfo *resultRelInfo); extern void ExecInsertIndexTuples(TupleTableSlot *slot, ItemPointer tupleid, - EState *estate, bool is_update); + EState *estate, bool is_vacuum); extern void RegisterExprContextCallback(ExprContext *econtext, ExprContextCallbackFunction function, diff --git a/src/include/utils/tqual.h b/src/include/utils/tqual.h index 3ddceff5f8..cdda44023b 100644 --- a/src/include/utils/tqual.h +++ b/src/include/utils/tqual.h @@ -8,7 +8,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: tqual.h,v 1.40 2002/05/21 22:59:01 tgl Exp $ + * $Id: tqual.h,v 1.41 2002/05/24 18:57:57 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -41,6 +41,8 @@ extern DLLIMPORT Snapshot SnapshotDirty; extern DLLIMPORT Snapshot QuerySnapshot; extern DLLIMPORT Snapshot SerializableSnapshot; +extern TransactionId RecentGlobalXmin; + extern bool ReferentialIntegritySnapshotOverride; #define IsSnapshotNow(snapshot) ((Snapshot) (snapshot) == SnapshotNow)