postgresql/src/backend/access/heap/heapam_handler.c

1311 lines
38 KiB
C

/*-------------------------------------------------------------------------
*
* heapam_handler.c
* heap table access method code
*
* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* src/backend/access/heap/heapam_handler.c
*
*
* NOTES
* This files wires up the lower level heapam.c et routines with the
* tableam abstraction.
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "miscadmin.h"
#include "access/genam.h"
#include "access/heapam.h"
#include "access/tableam.h"
#include "access/xact.h"
#include "catalog/catalog.h"
#include "catalog/index.h"
#include "executor/executor.h"
#include "storage/bufmgr.h"
#include "storage/bufpage.h"
#include "storage/lmgr.h"
#include "storage/procarray.h"
#include "utils/builtins.h"
static const TableAmRoutine heapam_methods;
/* ------------------------------------------------------------------------
* Slot related callbacks for heap AM
* ------------------------------------------------------------------------
*/
static const TupleTableSlotOps *
heapam_slot_callbacks(Relation relation)
{
return &TTSOpsBufferHeapTuple;
}
/* ------------------------------------------------------------------------
* Index Scan Callbacks for heap AM
* ------------------------------------------------------------------------
*/
static IndexFetchTableData *
heapam_index_fetch_begin(Relation rel)
{
IndexFetchHeapData *hscan = palloc0(sizeof(IndexFetchHeapData));
hscan->xs_base.rel = rel;
hscan->xs_cbuf = InvalidBuffer;
return &hscan->xs_base;
}
static void
heapam_index_fetch_reset(IndexFetchTableData *scan)
{
IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan;
if (BufferIsValid(hscan->xs_cbuf))
{
ReleaseBuffer(hscan->xs_cbuf);
hscan->xs_cbuf = InvalidBuffer;
}
}
static void
heapam_index_fetch_end(IndexFetchTableData *scan)
{
IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan;
heapam_index_fetch_reset(scan);
pfree(hscan);
}
static bool
heapam_index_fetch_tuple(struct IndexFetchTableData *scan,
ItemPointer tid,
Snapshot snapshot,
TupleTableSlot *slot,
bool *call_again, bool *all_dead)
{
IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan;
BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
bool got_heap_tuple;
Assert(TTS_IS_BUFFERTUPLE(slot));
/* We can skip the buffer-switching logic if we're in mid-HOT chain. */
if (!*call_again)
{
/* Switch to correct buffer if we don't have it already */
Buffer prev_buf = hscan->xs_cbuf;
hscan->xs_cbuf = ReleaseAndReadBuffer(hscan->xs_cbuf,
hscan->xs_base.rel,
ItemPointerGetBlockNumber(tid));
/*
* Prune page, but only if we weren't already on this page
*/
if (prev_buf != hscan->xs_cbuf)
heap_page_prune_opt(hscan->xs_base.rel, hscan->xs_cbuf);
}
/* Obtain share-lock on the buffer so we can examine visibility */
LockBuffer(hscan->xs_cbuf, BUFFER_LOCK_SHARE);
got_heap_tuple = heap_hot_search_buffer(tid,
hscan->xs_base.rel,
hscan->xs_cbuf,
snapshot,
&bslot->base.tupdata,
all_dead,
!*call_again);
bslot->base.tupdata.t_self = *tid;
LockBuffer(hscan->xs_cbuf, BUFFER_LOCK_UNLOCK);
if (got_heap_tuple)
{
/*
* Only in a non-MVCC snapshot can more than one member of the HOT
* chain be visible.
*/
*call_again = !IsMVCCSnapshot(snapshot);
slot->tts_tableOid = RelationGetRelid(scan->rel);
ExecStoreBufferHeapTuple(&bslot->base.tupdata, slot, hscan->xs_cbuf);
}
else
{
/* We've reached the end of the HOT chain. */
*call_again = false;
}
return got_heap_tuple;
}
/* ------------------------------------------------------------------------
* Callbacks for non-modifying operations on individual tuples for heap AM
* ------------------------------------------------------------------------
*/
static bool
heapam_fetch_row_version(Relation relation,
ItemPointer tid,
Snapshot snapshot,
TupleTableSlot *slot)
{
BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
Buffer buffer;
Assert(TTS_IS_BUFFERTUPLE(slot));
bslot->base.tupdata.t_self = *tid;
if (heap_fetch(relation, snapshot, &bslot->base.tupdata, &buffer))
{
/* store in slot, transferring existing pin */
ExecStorePinnedBufferHeapTuple(&bslot->base.tupdata, slot, buffer);
slot->tts_tableOid = RelationGetRelid(relation);
return true;
}
return false;
}
static bool
heapam_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot,
Snapshot snapshot)
{
BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
bool res;
Assert(TTS_IS_BUFFERTUPLE(slot));
Assert(BufferIsValid(bslot->buffer));
/*
* We need buffer pin and lock to call HeapTupleSatisfiesVisibility.
* Caller should be holding pin, but not lock.
*/
LockBuffer(bslot->buffer, BUFFER_LOCK_SHARE);
res = HeapTupleSatisfiesVisibility(bslot->base.tuple, snapshot,
bslot->buffer);
LockBuffer(bslot->buffer, BUFFER_LOCK_UNLOCK);
return res;
}
/* ----------------------------------------------------------------------------
* Functions for manipulations of physical tuples for heap AM.
* ----------------------------------------------------------------------------
*/
static void
heapam_tuple_insert(Relation relation, TupleTableSlot *slot, CommandId cid,
int options, BulkInsertState bistate)
{
bool shouldFree = true;
HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
/* Update the tuple with table oid */
slot->tts_tableOid = RelationGetRelid(relation);
tuple->t_tableOid = slot->tts_tableOid;
/* Perform the insertion, and copy the resulting ItemPointer */
heap_insert(relation, tuple, cid, options, bistate);
ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
if (shouldFree)
pfree(tuple);
}
static void
heapam_tuple_insert_speculative(Relation relation, TupleTableSlot *slot, CommandId cid,
int options, BulkInsertState bistate, uint32 specToken)
{
bool shouldFree = true;
HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
/* Update the tuple with table oid */
slot->tts_tableOid = RelationGetRelid(relation);
tuple->t_tableOid = slot->tts_tableOid;
HeapTupleHeaderSetSpeculativeToken(tuple->t_data, specToken);
options |= HEAP_INSERT_SPECULATIVE;
/* Perform the insertion, and copy the resulting ItemPointer */
heap_insert(relation, tuple, cid, options, bistate);
ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
if (shouldFree)
pfree(tuple);
}
static void
heapam_tuple_complete_speculative(Relation relation, TupleTableSlot *slot, uint32 spekToken,
bool succeeded)
{
bool shouldFree = true;
HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
/* adjust the tuple's state accordingly */
if (!succeeded)
heap_finish_speculative(relation, &slot->tts_tid);
else
heap_abort_speculative(relation, &slot->tts_tid);
if (shouldFree)
pfree(tuple);
}
static TM_Result
heapam_tuple_delete(Relation relation, ItemPointer tid, CommandId cid,
Snapshot snapshot, Snapshot crosscheck, bool wait,
TM_FailureData *tmfd, bool changingPart)
{
/*
* Currently Deleting of index tuples are handled at vacuum, in case if
* the storage itself is cleaning the dead tuples by itself, it is the
* time to call the index tuple deletion also.
*/
return heap_delete(relation, tid, cid, crosscheck, wait, tmfd, changingPart);
}
static TM_Result
heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot,
CommandId cid, Snapshot snapshot, Snapshot crosscheck,
bool wait, TM_FailureData *tmfd,
LockTupleMode *lockmode, bool *update_indexes)
{
bool shouldFree = true;
HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
TM_Result result;
/* Update the tuple with table oid */
slot->tts_tableOid = RelationGetRelid(relation);
tuple->t_tableOid = slot->tts_tableOid;
result = heap_update(relation, otid, tuple, cid, crosscheck, wait,
tmfd, lockmode);
ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
/*
* Decide whether new index entries are needed for the tuple
*
* Note: heap_update returns the tid (location) of the new tuple in the
* t_self field.
*
* If it's a HOT update, we mustn't insert new index entries.
*/
*update_indexes = result == TM_Ok && !HeapTupleIsHeapOnly(tuple);
if (shouldFree)
pfree(tuple);
return result;
}
static TM_Result
heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot,
TupleTableSlot *slot, CommandId cid, LockTupleMode mode,
LockWaitPolicy wait_policy, uint8 flags,
TM_FailureData *tmfd)
{
BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
TM_Result result;
Buffer buffer;
HeapTuple tuple = &bslot->base.tupdata;
bool follow_updates;
follow_updates = (flags & TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS) != 0;
tmfd->traversed = false;
Assert(TTS_IS_BUFFERTUPLE(slot));
tuple_lock_retry:
tuple->t_self = *tid;
result = heap_lock_tuple(relation, tuple, cid, mode, wait_policy,
follow_updates, &buffer, tmfd);
if (result == TM_Updated &&
(flags & TUPLE_LOCK_FLAG_FIND_LAST_VERSION))
{
ReleaseBuffer(buffer);
/* Should not encounter speculative tuple on recheck */
Assert(!HeapTupleHeaderIsSpeculative(tuple->t_data));
if (!ItemPointerEquals(&tmfd->ctid, &tuple->t_self))
{
SnapshotData SnapshotDirty;
TransactionId priorXmax;
/* it was updated, so look at the updated version */
*tid = tmfd->ctid;
/* updated row should have xmin matching this xmax */
priorXmax = tmfd->xmax;
/* signal that a tuple later in the chain is getting locked */
tmfd->traversed = true;
/*
* fetch target tuple
*
* Loop here to deal with updated or busy tuples
*/
InitDirtySnapshot(SnapshotDirty);
for (;;)
{
if (ItemPointerIndicatesMovedPartitions(tid))
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("tuple to be locked was already moved to another partition due to concurrent update")));
tuple->t_self = *tid;
if (heap_fetch(relation, &SnapshotDirty, tuple, &buffer))
{
/*
* If xmin isn't what we're expecting, the slot must have
* been recycled and reused for an unrelated tuple. This
* implies that the latest version of the row was deleted,
* so we need do nothing. (Should be safe to examine xmin
* without getting buffer's content lock. We assume
* reading a TransactionId to be atomic, and Xmin never
* changes in an existing tuple, except to invalid or
* frozen, and neither of those can match priorXmax.)
*/
if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple->t_data),
priorXmax))
{
ReleaseBuffer(buffer);
return TM_Deleted;
}
/* otherwise xmin should not be dirty... */
if (TransactionIdIsValid(SnapshotDirty.xmin))
elog(ERROR, "t_xmin is uncommitted in tuple to be updated");
/*
* If tuple is being updated by other transaction then we
* have to wait for its commit/abort, or die trying.
*/
if (TransactionIdIsValid(SnapshotDirty.xmax))
{
ReleaseBuffer(buffer);
switch (wait_policy)
{
case LockWaitBlock:
XactLockTableWait(SnapshotDirty.xmax,
relation, &tuple->t_self,
XLTW_FetchUpdated);
break;
case LockWaitSkip:
if (!ConditionalXactLockTableWait(SnapshotDirty.xmax))
/* skip instead of waiting */
return TM_WouldBlock;
break;
case LockWaitError:
if (!ConditionalXactLockTableWait(SnapshotDirty.xmax))
ereport(ERROR,
(errcode(ERRCODE_LOCK_NOT_AVAILABLE),
errmsg("could not obtain lock on row in relation \"%s\"",
RelationGetRelationName(relation))));
break;
}
continue; /* loop back to repeat heap_fetch */
}
/*
* If tuple was inserted by our own transaction, we have
* to check cmin against cid: cmin >= current CID means
* our command cannot see the tuple, so we should ignore
* it. Otherwise heap_lock_tuple() will throw an error,
* and so would any later attempt to update or delete the
* tuple. (We need not check cmax because
* HeapTupleSatisfiesDirty will consider a tuple deleted
* by our transaction dead, regardless of cmax.) We just
* checked that priorXmax == xmin, so we can test that
* variable instead of doing HeapTupleHeaderGetXmin again.
*/
if (TransactionIdIsCurrentTransactionId(priorXmax) &&
HeapTupleHeaderGetCmin(tuple->t_data) >= cid)
{
ReleaseBuffer(buffer);
return TM_Invisible;
}
/*
* This is a live tuple, so try to lock it again.
*/
ReleaseBuffer(buffer);
goto tuple_lock_retry;
}
/*
* If the referenced slot was actually empty, the latest
* version of the row must have been deleted, so we need do
* nothing.
*/
if (tuple->t_data == NULL)
{
return TM_Deleted;
}
/*
* As above, if xmin isn't what we're expecting, do nothing.
*/
if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple->t_data),
priorXmax))
{
if (BufferIsValid(buffer))
ReleaseBuffer(buffer);
return TM_Deleted;
}
/*
* If we get here, the tuple was found but failed
* SnapshotDirty. Assuming the xmin is either a committed xact
* or our own xact (as it certainly should be if we're trying
* to modify the tuple), this must mean that the row was
* updated or deleted by either a committed xact or our own
* xact. If it was deleted, we can ignore it; if it was
* updated then chain up to the next version and repeat the
* whole process.
*
* As above, it should be safe to examine xmax and t_ctid
* without the buffer content lock, because they can't be
* changing.
*/
if (ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid))
{
/* deleted, so forget about it */
if (BufferIsValid(buffer))
ReleaseBuffer(buffer);
return TM_Deleted;
}
/* updated, so look at the updated row */
*tid = tuple->t_data->t_ctid;
/* updated row should have xmin matching this xmax */
priorXmax = HeapTupleHeaderGetUpdateXid(tuple->t_data);
if (BufferIsValid(buffer))
ReleaseBuffer(buffer);
/* loop back to fetch next in chain */
}
}
else
{
/* tuple was deleted, so give up */
return TM_Deleted;
}
}
slot->tts_tableOid = RelationGetRelid(relation);
tuple->t_tableOid = slot->tts_tableOid;
/* store in slot, transferring existing pin */
ExecStorePinnedBufferHeapTuple(tuple, slot, buffer);
return result;
}
/* ------------------------------------------------------------------------
* DDL related callbacks for heap AM.
* ------------------------------------------------------------------------
*/
static double
heapam_index_build_range_scan(Relation heapRelation,
Relation indexRelation,
IndexInfo *indexInfo,
bool allow_sync,
bool anyvisible,
BlockNumber start_blockno,
BlockNumber numblocks,
IndexBuildCallback callback,
void *callback_state,
TableScanDesc scan)
{
HeapScanDesc hscan;
bool is_system_catalog;
bool checking_uniqueness;
HeapTuple heapTuple;
Datum values[INDEX_MAX_KEYS];
bool isnull[INDEX_MAX_KEYS];
double reltuples;
ExprState *predicate;
TupleTableSlot *slot;
EState *estate;
ExprContext *econtext;
Snapshot snapshot;
bool need_unregister_snapshot = false;
TransactionId OldestXmin;
BlockNumber root_blkno = InvalidBlockNumber;
OffsetNumber root_offsets[MaxHeapTuplesPerPage];
/*
* sanity checks
*/
Assert(OidIsValid(indexRelation->rd_rel->relam));
/* Remember if it's a system catalog */
is_system_catalog = IsSystemRelation(heapRelation);
/* See whether we're verifying uniqueness/exclusion properties */
checking_uniqueness = (indexInfo->ii_Unique ||
indexInfo->ii_ExclusionOps != NULL);
/*
* "Any visible" mode is not compatible with uniqueness checks; make sure
* only one of those is requested.
*/
Assert(!(anyvisible && checking_uniqueness));
/*
* Need an EState for evaluation of index expressions and partial-index
* predicates. Also a slot to hold the current tuple.
*/
estate = CreateExecutorState();
econtext = GetPerTupleExprContext(estate);
slot = table_slot_create(heapRelation, NULL);
/* Arrange for econtext's scan tuple to be the tuple under test */
econtext->ecxt_scantuple = slot;
/* Set up execution state for predicate, if any. */
predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
/*
* Prepare for scan of the base relation. In a normal index build, we use
* SnapshotAny because we must retrieve all tuples and do our own time
* qual checks (because we have to index RECENTLY_DEAD tuples). In a
* concurrent build, or during bootstrap, we take a regular MVCC snapshot
* and index whatever's live according to that.
*/
OldestXmin = InvalidTransactionId;
/* okay to ignore lazy VACUUMs here */
if (!IsBootstrapProcessingMode() && !indexInfo->ii_Concurrent)
OldestXmin = GetOldestXmin(heapRelation, PROCARRAY_FLAGS_VACUUM);
if (!scan)
{
/*
* Serial index build.
*
* Must begin our own heap scan in this case. We may also need to
* register a snapshot whose lifetime is under our direct control.
*/
if (!TransactionIdIsValid(OldestXmin))
{
snapshot = RegisterSnapshot(GetTransactionSnapshot());
need_unregister_snapshot = true;
}
else
snapshot = SnapshotAny;
scan = table_beginscan_strat(heapRelation, /* relation */
snapshot, /* snapshot */
0, /* number of keys */
NULL, /* scan key */
true, /* buffer access strategy OK */
allow_sync); /* syncscan OK? */
}
else
{
/*
* Parallel index build.
*
* Parallel case never registers/unregisters own snapshot. Snapshot
* is taken from parallel heap scan, and is SnapshotAny or an MVCC
* snapshot, based on same criteria as serial case.
*/
Assert(!IsBootstrapProcessingMode());
Assert(allow_sync);
snapshot = scan->rs_snapshot;
}
hscan = (HeapScanDesc) scan;
/*
* Must call GetOldestXmin() with SnapshotAny. Should never call
* GetOldestXmin() with MVCC snapshot. (It's especially worth checking
* this for parallel builds, since ambuild routines that support parallel
* builds must work these details out for themselves.)
*/
Assert(snapshot == SnapshotAny || IsMVCCSnapshot(snapshot));
Assert(snapshot == SnapshotAny ? TransactionIdIsValid(OldestXmin) :
!TransactionIdIsValid(OldestXmin));
Assert(snapshot == SnapshotAny || !anyvisible);
/* set our scan endpoints */
if (!allow_sync)
heap_setscanlimits(scan, start_blockno, numblocks);
else
{
/* syncscan can only be requested on whole relation */
Assert(start_blockno == 0);
Assert(numblocks == InvalidBlockNumber);
}
reltuples = 0;
/*
* Scan all tuples in the base relation.
*/
while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
{
bool tupleIsAlive;
CHECK_FOR_INTERRUPTS();
/*
* When dealing with a HOT-chain of updated tuples, we want to index
* the values of the live tuple (if any), but index it under the TID
* of the chain's root tuple. This approach is necessary to preserve
* the HOT-chain structure in the heap. So we need to be able to find
* the root item offset for every tuple that's in a HOT-chain. When
* first reaching a new page of the relation, call
* heap_get_root_tuples() to build a map of root item offsets on the
* page.
*
* It might look unsafe to use this information across buffer
* lock/unlock. However, we hold ShareLock on the table so no
* ordinary insert/update/delete should occur; and we hold pin on the
* buffer continuously while visiting the page, so no pruning
* operation can occur either.
*
* Also, although our opinions about tuple liveness could change while
* we scan the page (due to concurrent transaction commits/aborts),
* the chain root locations won't, so this info doesn't need to be
* rebuilt after waiting for another transaction.
*
* Note the implied assumption that there is no more than one live
* tuple per HOT-chain --- else we could create more than one index
* entry pointing to the same root tuple.
*/
if (hscan->rs_cblock != root_blkno)
{
Page page = BufferGetPage(hscan->rs_cbuf);
LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
heap_get_root_tuples(page, root_offsets);
LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
root_blkno = hscan->rs_cblock;
}
if (snapshot == SnapshotAny)
{
/* do our own time qual check */
bool indexIt;
TransactionId xwait;
recheck:
/*
* We could possibly get away with not locking the buffer here,
* since caller should hold ShareLock on the relation, but let's
* be conservative about it. (This remark is still correct even
* with HOT-pruning: our pin on the buffer prevents pruning.)
*/
LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
/*
* The criteria for counting a tuple as live in this block need to
* match what analyze.c's acquire_sample_rows() does, otherwise
* CREATE INDEX and ANALYZE may produce wildly different reltuples
* values, e.g. when there are many recently-dead tuples.
*/
switch (HeapTupleSatisfiesVacuum(heapTuple, OldestXmin,
hscan->rs_cbuf))
{
case HEAPTUPLE_DEAD:
/* Definitely dead, we can ignore it */
indexIt = false;
tupleIsAlive = false;
break;
case HEAPTUPLE_LIVE:
/* Normal case, index and unique-check it */
indexIt = true;
tupleIsAlive = true;
/* Count it as live, too */
reltuples += 1;
break;
case HEAPTUPLE_RECENTLY_DEAD:
/*
* If tuple is recently deleted then we must index it
* anyway to preserve MVCC semantics. (Pre-existing
* transactions could try to use the index after we finish
* building it, and may need to see such tuples.)
*
* However, if it was HOT-updated then we must only index
* the live tuple at the end of the HOT-chain. Since this
* breaks semantics for pre-existing snapshots, mark the
* index as unusable for them.
*
* We don't count recently-dead tuples in reltuples, even
* if we index them; see acquire_sample_rows().
*/
if (HeapTupleIsHotUpdated(heapTuple))
{
indexIt = false;
/* mark the index as unsafe for old snapshots */
indexInfo->ii_BrokenHotChain = true;
}
else
indexIt = true;
/* In any case, exclude the tuple from unique-checking */
tupleIsAlive = false;
break;
case HEAPTUPLE_INSERT_IN_PROGRESS:
/*
* In "anyvisible" mode, this tuple is visible and we
* don't need any further checks.
*/
if (anyvisible)
{
indexIt = true;
tupleIsAlive = true;
reltuples += 1;
break;
}
/*
* Since caller should hold ShareLock or better, normally
* the only way to see this is if it was inserted earlier
* in our own transaction. However, it can happen in
* system catalogs, since we tend to release write lock
* before commit there. Give a warning if neither case
* applies.
*/
xwait = HeapTupleHeaderGetXmin(heapTuple->t_data);
if (!TransactionIdIsCurrentTransactionId(xwait))
{
if (!is_system_catalog)
elog(WARNING, "concurrent insert in progress within table \"%s\"",
RelationGetRelationName(heapRelation));
/*
* If we are performing uniqueness checks, indexing
* such a tuple could lead to a bogus uniqueness
* failure. In that case we wait for the inserting
* transaction to finish and check again.
*/
if (checking_uniqueness)
{
/*
* Must drop the lock on the buffer before we wait
*/
LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
XactLockTableWait(xwait, heapRelation,
&heapTuple->t_self,
XLTW_InsertIndexUnique);
CHECK_FOR_INTERRUPTS();
goto recheck;
}
}
else
{
/*
* For consistency with acquire_sample_rows(), count
* HEAPTUPLE_INSERT_IN_PROGRESS tuples as live only
* when inserted by our own transaction.
*/
reltuples += 1;
}
/*
* We must index such tuples, since if the index build
* commits then they're good.
*/
indexIt = true;
tupleIsAlive = true;
break;
case HEAPTUPLE_DELETE_IN_PROGRESS:
/*
* As with INSERT_IN_PROGRESS case, this is unexpected
* unless it's our own deletion or a system catalog; but
* in anyvisible mode, this tuple is visible.
*/
if (anyvisible)
{
indexIt = true;
tupleIsAlive = false;
reltuples += 1;
break;
}
xwait = HeapTupleHeaderGetUpdateXid(heapTuple->t_data);
if (!TransactionIdIsCurrentTransactionId(xwait))
{
if (!is_system_catalog)
elog(WARNING, "concurrent delete in progress within table \"%s\"",
RelationGetRelationName(heapRelation));
/*
* If we are performing uniqueness checks, assuming
* the tuple is dead could lead to missing a
* uniqueness violation. In that case we wait for the
* deleting transaction to finish and check again.
*
* Also, if it's a HOT-updated tuple, we should not
* index it but rather the live tuple at the end of
* the HOT-chain. However, the deleting transaction
* could abort, possibly leaving this tuple as live
* after all, in which case it has to be indexed. The
* only way to know what to do is to wait for the
* deleting transaction to finish and check again.
*/
if (checking_uniqueness ||
HeapTupleIsHotUpdated(heapTuple))
{
/*
* Must drop the lock on the buffer before we wait
*/
LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
XactLockTableWait(xwait, heapRelation,
&heapTuple->t_self,
XLTW_InsertIndexUnique);
CHECK_FOR_INTERRUPTS();
goto recheck;
}
/*
* Otherwise index it but don't check for uniqueness,
* the same as a RECENTLY_DEAD tuple.
*/
indexIt = true;
/*
* Count HEAPTUPLE_DELETE_IN_PROGRESS tuples as live,
* if they were not deleted by the current
* transaction. That's what acquire_sample_rows()
* does, and we want the behavior to be consistent.
*/
reltuples += 1;
}
else if (HeapTupleIsHotUpdated(heapTuple))
{
/*
* It's a HOT-updated tuple deleted by our own xact.
* We can assume the deletion will commit (else the
* index contents don't matter), so treat the same as
* RECENTLY_DEAD HOT-updated tuples.
*/
indexIt = false;
/* mark the index as unsafe for old snapshots */
indexInfo->ii_BrokenHotChain = true;
}
else
{
/*
* It's a regular tuple deleted by our own xact. Index
* it, but don't check for uniqueness nor count in
* reltuples, the same as a RECENTLY_DEAD tuple.
*/
indexIt = true;
}
/* In any case, exclude the tuple from unique-checking */
tupleIsAlive = false;
break;
default:
elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
indexIt = tupleIsAlive = false; /* keep compiler quiet */
break;
}
LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
if (!indexIt)
continue;
}
else
{
/* heap_getnext did the time qual check */
tupleIsAlive = true;
reltuples += 1;
}
MemoryContextReset(econtext->ecxt_per_tuple_memory);
/* Set up for predicate or expression evaluation */
ExecStoreBufferHeapTuple(heapTuple, slot, hscan->rs_cbuf);
/*
* In a partial index, discard tuples that don't satisfy the
* predicate.
*/
if (predicate != NULL)
{
if (!ExecQual(predicate, econtext))
continue;
}
/*
* For the current heap tuple, extract all the attributes we use in
* this index, and note which are null. This also performs evaluation
* of any expressions needed.
*/
FormIndexDatum(indexInfo,
slot,
estate,
values,
isnull);
/*
* You'd think we should go ahead and build the index tuple here, but
* some index AMs want to do further processing on the data first. So
* pass the values[] and isnull[] arrays, instead.
*/
if (HeapTupleIsHeapOnly(heapTuple))
{
/*
* For a heap-only tuple, pretend its TID is that of the root. See
* src/backend/access/heap/README.HOT for discussion.
*/
HeapTupleData rootTuple;
OffsetNumber offnum;
rootTuple = *heapTuple;
offnum = ItemPointerGetOffsetNumber(&heapTuple->t_self);
if (!OffsetNumberIsValid(root_offsets[offnum - 1]))
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
ItemPointerGetBlockNumber(&heapTuple->t_self),
offnum,
RelationGetRelationName(heapRelation))));
ItemPointerSetOffsetNumber(&rootTuple.t_self,
root_offsets[offnum - 1]);
/* Call the AM's callback routine to process the tuple */
callback(indexRelation, &rootTuple, values, isnull, tupleIsAlive,
callback_state);
}
else
{
/* Call the AM's callback routine to process the tuple */
callback(indexRelation, heapTuple, values, isnull, tupleIsAlive,
callback_state);
}
}
table_endscan(scan);
/* we can now forget our snapshot, if set and registered by us */
if (need_unregister_snapshot)
UnregisterSnapshot(snapshot);
ExecDropSingleTupleTableSlot(slot);
FreeExecutorState(estate);
/* These may have been pointing to the now-gone estate */
indexInfo->ii_ExpressionsState = NIL;
indexInfo->ii_PredicateState = NULL;
return reltuples;
}
static void
heapam_index_validate_scan(Relation heapRelation,
Relation indexRelation,
IndexInfo *indexInfo,
Snapshot snapshot,
ValidateIndexState * state)
{
TableScanDesc scan;
HeapScanDesc hscan;
HeapTuple heapTuple;
Datum values[INDEX_MAX_KEYS];
bool isnull[INDEX_MAX_KEYS];
ExprState *predicate;
TupleTableSlot *slot;
EState *estate;
ExprContext *econtext;
BlockNumber root_blkno = InvalidBlockNumber;
OffsetNumber root_offsets[MaxHeapTuplesPerPage];
bool in_index[MaxHeapTuplesPerPage];
/* state variables for the merge */
ItemPointer indexcursor = NULL;
ItemPointerData decoded;
bool tuplesort_empty = false;
/*
* sanity checks
*/
Assert(OidIsValid(indexRelation->rd_rel->relam));
/*
* Need an EState for evaluation of index expressions and partial-index
* predicates. Also a slot to hold the current tuple.
*/
estate = CreateExecutorState();
econtext = GetPerTupleExprContext(estate);
slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation),
&TTSOpsHeapTuple);
/* Arrange for econtext's scan tuple to be the tuple under test */
econtext->ecxt_scantuple = slot;
/* Set up execution state for predicate, if any. */
predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
/*
* Prepare for scan of the base relation. We need just those tuples
* satisfying the passed-in reference snapshot. We must disable syncscan
* here, because it's critical that we read from block zero forward to
* match the sorted TIDs.
*/
scan = table_beginscan_strat(heapRelation, /* relation */
snapshot, /* snapshot */
0, /* number of keys */
NULL, /* scan key */
true, /* buffer access strategy OK */
false); /* syncscan not OK */
hscan = (HeapScanDesc) scan;
/*
* Scan all tuples matching the snapshot.
*/
while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
{
ItemPointer heapcursor = &heapTuple->t_self;
ItemPointerData rootTuple;
OffsetNumber root_offnum;
CHECK_FOR_INTERRUPTS();
state->htups += 1;
/*
* As commented in table_index_build_scan, we should index heap-only
* tuples under the TIDs of their root tuples; so when we advance onto
* a new heap page, build a map of root item offsets on the page.
*
* This complicates merging against the tuplesort output: we will
* visit the live tuples in order by their offsets, but the root
* offsets that we need to compare against the index contents might be
* ordered differently. So we might have to "look back" within the
* tuplesort output, but only within the current page. We handle that
* by keeping a bool array in_index[] showing all the
* already-passed-over tuplesort output TIDs of the current page. We
* clear that array here, when advancing onto a new heap page.
*/
if (hscan->rs_cblock != root_blkno)
{
Page page = BufferGetPage(hscan->rs_cbuf);
LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
heap_get_root_tuples(page, root_offsets);
LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
memset(in_index, 0, sizeof(in_index));
root_blkno = hscan->rs_cblock;
}
/* Convert actual tuple TID to root TID */
rootTuple = *heapcursor;
root_offnum = ItemPointerGetOffsetNumber(heapcursor);
if (HeapTupleIsHeapOnly(heapTuple))
{
root_offnum = root_offsets[root_offnum - 1];
if (!OffsetNumberIsValid(root_offnum))
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
ItemPointerGetBlockNumber(heapcursor),
ItemPointerGetOffsetNumber(heapcursor),
RelationGetRelationName(heapRelation))));
ItemPointerSetOffsetNumber(&rootTuple, root_offnum);
}
/*
* "merge" by skipping through the index tuples until we find or pass
* the current root tuple.
*/
while (!tuplesort_empty &&
(!indexcursor ||
ItemPointerCompare(indexcursor, &rootTuple) < 0))
{
Datum ts_val;
bool ts_isnull;
if (indexcursor)
{
/*
* Remember index items seen earlier on the current heap page
*/
if (ItemPointerGetBlockNumber(indexcursor) == root_blkno)
in_index[ItemPointerGetOffsetNumber(indexcursor) - 1] = true;
}
tuplesort_empty = !tuplesort_getdatum(state->tuplesort, true,
&ts_val, &ts_isnull, NULL);
Assert(tuplesort_empty || !ts_isnull);
if (!tuplesort_empty)
{
itemptr_decode(&decoded, DatumGetInt64(ts_val));
indexcursor = &decoded;
/* If int8 is pass-by-ref, free (encoded) TID Datum memory */
#ifndef USE_FLOAT8_BYVAL
pfree(DatumGetPointer(ts_val));
#endif
}
else
{
/* Be tidy */
indexcursor = NULL;
}
}
/*
* If the tuplesort has overshot *and* we didn't see a match earlier,
* then this tuple is missing from the index, so insert it.
*/
if ((tuplesort_empty ||
ItemPointerCompare(indexcursor, &rootTuple) > 0) &&
!in_index[root_offnum - 1])
{
MemoryContextReset(econtext->ecxt_per_tuple_memory);
/* Set up for predicate or expression evaluation */
ExecStoreHeapTuple(heapTuple, slot, false);
/*
* In a partial index, discard tuples that don't satisfy the
* predicate.
*/
if (predicate != NULL)
{
if (!ExecQual(predicate, econtext))
continue;
}
/*
* For the current heap tuple, extract all the attributes we use
* in this index, and note which are null. This also performs
* evaluation of any expressions needed.
*/
FormIndexDatum(indexInfo,
slot,
estate,
values,
isnull);
/*
* You'd think we should go ahead and build the index tuple here,
* but some index AMs want to do further processing on the data
* first. So pass the values[] and isnull[] arrays, instead.
*/
/*
* If the tuple is already committed dead, you might think we
* could suppress uniqueness checking, but this is no longer true
* in the presence of HOT, because the insert is actually a proxy
* for a uniqueness check on the whole HOT-chain. That is, the
* tuple we have here could be dead because it was already
* HOT-updated, and if so the updating transaction will not have
* thought it should insert index entries. The index AM will
* check the whole HOT-chain and correctly detect a conflict if
* there is one.
*/
index_insert(indexRelation,
values,
isnull,
&rootTuple,
heapRelation,
indexInfo->ii_Unique ?
UNIQUE_CHECK_YES : UNIQUE_CHECK_NO,
indexInfo);
state->tups_inserted += 1;
}
}
table_endscan(scan);
ExecDropSingleTupleTableSlot(slot);
FreeExecutorState(estate);
/* These may have been pointing to the now-gone estate */
indexInfo->ii_ExpressionsState = NIL;
indexInfo->ii_PredicateState = NULL;
}
/* ------------------------------------------------------------------------
* Definition of the heap table access method.
* ------------------------------------------------------------------------
*/
static const TableAmRoutine heapam_methods = {
.type = T_TableAmRoutine,
.slot_callbacks = heapam_slot_callbacks,
.scan_begin = heap_beginscan,
.scan_end = heap_endscan,
.scan_rescan = heap_rescan,
.scan_getnextslot = heap_getnextslot,
.parallelscan_estimate = table_block_parallelscan_estimate,
.parallelscan_initialize = table_block_parallelscan_initialize,
.parallelscan_reinitialize = table_block_parallelscan_reinitialize,
.index_fetch_begin = heapam_index_fetch_begin,
.index_fetch_reset = heapam_index_fetch_reset,
.index_fetch_end = heapam_index_fetch_end,
.index_fetch_tuple = heapam_index_fetch_tuple,
.tuple_insert = heapam_tuple_insert,
.tuple_insert_speculative = heapam_tuple_insert_speculative,
.tuple_complete_speculative = heapam_tuple_complete_speculative,
.tuple_delete = heapam_tuple_delete,
.tuple_update = heapam_tuple_update,
.tuple_lock = heapam_tuple_lock,
.tuple_fetch_row_version = heapam_fetch_row_version,
.tuple_get_latest_tid = heap_get_latest_tid,
.tuple_satisfies_snapshot = heapam_tuple_satisfies_snapshot,
.compute_xid_horizon_for_tuples = heap_compute_xid_horizon_for_tuples,
.index_build_range_scan = heapam_index_build_range_scan,
.index_validate_scan = heapam_index_validate_scan,
};
const TableAmRoutine *
GetHeapamTableAmRoutine(void)
{
return &heapam_methods;
}
Datum
heap_tableam_handler(PG_FUNCTION_ARGS)
{
PG_RETURN_POINTER(&heapam_methods);
}