Allow locking updated tuples in tuple_update() and tuple_delete()

Currently, in read committed transaction isolation mode (default), we have the
following sequence of actions when tuple_update()/tuple_delete() finds
the tuple updated by the concurrent transaction.

1. Attempt to update/delete tuple with tuple_update()/tuple_delete(), which
   returns TM_Updated.
2. Lock tuple with tuple_lock().
3. Re-evaluate plan qual (recheck if we still need to update/delete and
   calculate the new tuple for update).
4. Second attempt to update/delete tuple with tuple_update()/tuple_delete().
   This attempt should be successful, since the tuple was previously locked.

This commit eliminates step 2 by taking the lock during the first
tuple_update()/tuple_delete() call.  The heap table access method saves some
effort by checking the updated tuple once instead of twice.  Future
undo-based table access methods, which will start from the latest row version,
can immediately place a lock there.

Also, this commit makes tuple_update()/tuple_delete() optionally save the old
tuple into the dedicated slot.  That saves efforts on re-fetching tuples in
certain cases.

The code in nodeModifyTable.c is simplified by removing the nested switch/case.

Discussion: https://postgr.es/m/CAPpHfdua-YFw3XTprfutzGp28xXLigFtzNbuFY8yPhqeq6X5kg%40mail.gmail.com
Reviewed-by: Aleksander Alekseev, Pavel Borisov, Vignesh C, Mason Sharp
Reviewed-by: Andres Freund, Chris Travers
This commit is contained in:
Alexander Korotkov 2024-03-26 01:27:56 +02:00
parent c7076ba6ad
commit 87985cc925
9 changed files with 502 additions and 346 deletions

View File

@ -2496,10 +2496,11 @@ xmax_infomask_changed(uint16 new_infomask, uint16 old_infomask)
} }
/* /*
* heap_delete - delete a tuple * heap_delete - delete a tuple, optionally fetching it into a slot
* *
* See table_tuple_delete() for an explanation of the parameters, except that * See table_tuple_delete() for an explanation of the parameters, except that
* this routine directly takes a tuple rather than a slot. * this routine directly takes a tuple rather than a slot. Also, we don't
* place a lock on the tuple in this function, just fetch the existing version.
* *
* In the failure cases, the routine fills *tmfd with the tuple's t_ctid, * In the failure cases, the routine fills *tmfd with the tuple's t_ctid,
* t_xmax (resolving a possible MultiXact, if necessary), and t_cmax (the last * t_xmax (resolving a possible MultiXact, if necessary), and t_cmax (the last
@ -2508,8 +2509,9 @@ xmax_infomask_changed(uint16 new_infomask, uint16 old_infomask)
*/ */
TM_Result TM_Result
heap_delete(Relation relation, ItemPointer tid, heap_delete(Relation relation, ItemPointer tid,
CommandId cid, Snapshot crosscheck, bool wait, CommandId cid, Snapshot crosscheck, int options,
TM_FailureData *tmfd, bool changingPart) TM_FailureData *tmfd, bool changingPart,
TupleTableSlot *oldSlot)
{ {
TM_Result result; TM_Result result;
TransactionId xid = GetCurrentTransactionId(); TransactionId xid = GetCurrentTransactionId();
@ -2587,7 +2589,7 @@ l1:
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("attempted to delete invisible tuple"))); errmsg("attempted to delete invisible tuple")));
} }
else if (result == TM_BeingModified && wait) else if (result == TM_BeingModified && (options & TABLE_MODIFY_WAIT))
{ {
TransactionId xwait; TransactionId xwait;
uint16 infomask; uint16 infomask;
@ -2728,7 +2730,30 @@ l1:
tmfd->cmax = HeapTupleHeaderGetCmax(tp.t_data); tmfd->cmax = HeapTupleHeaderGetCmax(tp.t_data);
else else
tmfd->cmax = InvalidCommandId; tmfd->cmax = InvalidCommandId;
UnlockReleaseBuffer(buffer);
/*
* If we're asked to lock the updated tuple, we just fetch the
* existing tuple. That let's the caller save some resources on
* placing the lock.
*/
if (result == TM_Updated &&
(options & TABLE_MODIFY_LOCK_UPDATED))
{
BufferHeapTupleTableSlot *bslot;
Assert(TTS_IS_BUFFERTUPLE(oldSlot));
bslot = (BufferHeapTupleTableSlot *) oldSlot;
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
bslot->base.tupdata = tp;
ExecStorePinnedBufferHeapTuple(&bslot->base.tupdata,
oldSlot,
buffer);
}
else
{
UnlockReleaseBuffer(buffer);
}
if (have_tuple_lock) if (have_tuple_lock)
UnlockTupleTuplock(relation, &(tp.t_self), LockTupleExclusive); UnlockTupleTuplock(relation, &(tp.t_self), LockTupleExclusive);
if (vmbuffer != InvalidBuffer) if (vmbuffer != InvalidBuffer)
@ -2902,8 +2927,24 @@ l1:
*/ */
CacheInvalidateHeapTuple(relation, &tp, NULL); CacheInvalidateHeapTuple(relation, &tp, NULL);
/* Now we can release the buffer */ /* Fetch the old tuple version if we're asked for that. */
ReleaseBuffer(buffer); if (options & TABLE_MODIFY_FETCH_OLD_TUPLE)
{
BufferHeapTupleTableSlot *bslot;
Assert(TTS_IS_BUFFERTUPLE(oldSlot));
bslot = (BufferHeapTupleTableSlot *) oldSlot;
bslot->base.tupdata = tp;
ExecStorePinnedBufferHeapTuple(&bslot->base.tupdata,
oldSlot,
buffer);
}
else
{
/* Now we can release the buffer */
ReleaseBuffer(buffer);
}
/* /*
* Release the lmgr tuple lock, if we had it. * Release the lmgr tuple lock, if we had it.
@ -2935,8 +2976,8 @@ simple_heap_delete(Relation relation, ItemPointer tid)
result = heap_delete(relation, tid, result = heap_delete(relation, tid,
GetCurrentCommandId(true), InvalidSnapshot, GetCurrentCommandId(true), InvalidSnapshot,
true /* wait for commit */ , TABLE_MODIFY_WAIT /* wait for commit */ ,
&tmfd, false /* changingPart */ ); &tmfd, false /* changingPart */ , NULL);
switch (result) switch (result)
{ {
case TM_SelfModified: case TM_SelfModified:
@ -2963,10 +3004,11 @@ simple_heap_delete(Relation relation, ItemPointer tid)
} }
/* /*
* heap_update - replace a tuple * heap_update - replace a tuple, optionally fetching it into a slot
* *
* See table_tuple_update() for an explanation of the parameters, except that * See table_tuple_update() for an explanation of the parameters, except that
* this routine directly takes a tuple rather than a slot. * this routine directly takes a tuple rather than a slot. Also, we don't
* place a lock on the tuple in this function, just fetch the existing version.
* *
* In the failure cases, the routine fills *tmfd with the tuple's t_ctid, * In the failure cases, the routine fills *tmfd with the tuple's t_ctid,
* t_xmax (resolving a possible MultiXact, if necessary), and t_cmax (the last * t_xmax (resolving a possible MultiXact, if necessary), and t_cmax (the last
@ -2975,9 +3017,9 @@ simple_heap_delete(Relation relation, ItemPointer tid)
*/ */
TM_Result TM_Result
heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
CommandId cid, Snapshot crosscheck, bool wait, CommandId cid, Snapshot crosscheck, int options,
TM_FailureData *tmfd, LockTupleMode *lockmode, TM_FailureData *tmfd, LockTupleMode *lockmode,
TU_UpdateIndexes *update_indexes) TU_UpdateIndexes *update_indexes, TupleTableSlot *oldSlot)
{ {
TM_Result result; TM_Result result;
TransactionId xid = GetCurrentTransactionId(); TransactionId xid = GetCurrentTransactionId();
@ -3154,7 +3196,7 @@ l2:
result = HeapTupleSatisfiesUpdate(&oldtup, cid, buffer); result = HeapTupleSatisfiesUpdate(&oldtup, cid, buffer);
/* see below about the "no wait" case */ /* see below about the "no wait" case */
Assert(result != TM_BeingModified || wait); Assert(result != TM_BeingModified || (options & TABLE_MODIFY_WAIT));
if (result == TM_Invisible) if (result == TM_Invisible)
{ {
@ -3163,7 +3205,7 @@ l2:
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("attempted to update invisible tuple"))); errmsg("attempted to update invisible tuple")));
} }
else if (result == TM_BeingModified && wait) else if (result == TM_BeingModified && (options & TABLE_MODIFY_WAIT))
{ {
TransactionId xwait; TransactionId xwait;
uint16 infomask; uint16 infomask;
@ -3367,7 +3409,30 @@ l2:
tmfd->cmax = HeapTupleHeaderGetCmax(oldtup.t_data); tmfd->cmax = HeapTupleHeaderGetCmax(oldtup.t_data);
else else
tmfd->cmax = InvalidCommandId; tmfd->cmax = InvalidCommandId;
UnlockReleaseBuffer(buffer);
/*
* If we're asked to lock the updated tuple, we just fetch the
* existing tuple. That let's the caller save some resouces on
* placing the lock.
*/
if (result == TM_Updated &&
(options & TABLE_MODIFY_LOCK_UPDATED))
{
BufferHeapTupleTableSlot *bslot;
Assert(TTS_IS_BUFFERTUPLE(oldSlot));
bslot = (BufferHeapTupleTableSlot *) oldSlot;
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
bslot->base.tupdata = oldtup;
ExecStorePinnedBufferHeapTuple(&bslot->base.tupdata,
oldSlot,
buffer);
}
else
{
UnlockReleaseBuffer(buffer);
}
if (have_tuple_lock) if (have_tuple_lock)
UnlockTupleTuplock(relation, &(oldtup.t_self), *lockmode); UnlockTupleTuplock(relation, &(oldtup.t_self), *lockmode);
if (vmbuffer != InvalidBuffer) if (vmbuffer != InvalidBuffer)
@ -3846,7 +3911,26 @@ l2:
/* Now we can release the buffer(s) */ /* Now we can release the buffer(s) */
if (newbuf != buffer) if (newbuf != buffer)
ReleaseBuffer(newbuf); ReleaseBuffer(newbuf);
ReleaseBuffer(buffer);
/* Fetch the old tuple version if we're asked for that. */
if (options & TABLE_MODIFY_FETCH_OLD_TUPLE)
{
BufferHeapTupleTableSlot *bslot;
Assert(TTS_IS_BUFFERTUPLE(oldSlot));
bslot = (BufferHeapTupleTableSlot *) oldSlot;
bslot->base.tupdata = oldtup;
ExecStorePinnedBufferHeapTuple(&bslot->base.tupdata,
oldSlot,
buffer);
}
else
{
/* Now we can release the buffer */
ReleaseBuffer(buffer);
}
if (BufferIsValid(vmbuffer_new)) if (BufferIsValid(vmbuffer_new))
ReleaseBuffer(vmbuffer_new); ReleaseBuffer(vmbuffer_new);
if (BufferIsValid(vmbuffer)) if (BufferIsValid(vmbuffer))
@ -4054,8 +4138,8 @@ simple_heap_update(Relation relation, ItemPointer otid, HeapTuple tup,
result = heap_update(relation, otid, tup, result = heap_update(relation, otid, tup,
GetCurrentCommandId(true), InvalidSnapshot, GetCurrentCommandId(true), InvalidSnapshot,
true /* wait for commit */ , TABLE_MODIFY_WAIT /* wait for commit */ ,
&tmfd, &lockmode, update_indexes); &tmfd, &lockmode, update_indexes, NULL);
switch (result) switch (result)
{ {
case TM_SelfModified: case TM_SelfModified:
@ -4118,12 +4202,14 @@ get_mxact_status_for_lock(LockTupleMode mode, bool is_update)
* tuples. * tuples.
* *
* Output parameters: * Output parameters:
* *tuple: all fields filled in * *slot: BufferHeapTupleTableSlot filled with tuple
* *buffer: set to buffer holding tuple (pinned but not locked at exit)
* *tmfd: filled in failure cases (see below) * *tmfd: filled in failure cases (see below)
* *
* Function results are the same as the ones for table_tuple_lock(). * Function results are the same as the ones for table_tuple_lock().
* *
* If *slot already contains the target tuple, it takes advantage on that by
* skipping the ReadBuffer() call.
*
* In the failure cases other than TM_Invisible, the routine fills * In the failure cases other than TM_Invisible, the routine fills
* *tmfd with the tuple's t_ctid, t_xmax (resolving a possible MultiXact, * *tmfd with the tuple's t_ctid, t_xmax (resolving a possible MultiXact,
* if necessary), and t_cmax (the last only for TM_SelfModified, * if necessary), and t_cmax (the last only for TM_SelfModified,
@ -4134,15 +4220,14 @@ get_mxact_status_for_lock(LockTupleMode mode, bool is_update)
* See README.tuplock for a thorough explanation of this mechanism. * See README.tuplock for a thorough explanation of this mechanism.
*/ */
TM_Result TM_Result
heap_lock_tuple(Relation relation, HeapTuple tuple, heap_lock_tuple(Relation relation, ItemPointer tid, TupleTableSlot *slot,
CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy,
bool follow_updates, bool follow_updates, TM_FailureData *tmfd)
Buffer *buffer, TM_FailureData *tmfd)
{ {
TM_Result result; TM_Result result;
ItemPointer tid = &(tuple->t_self);
ItemId lp; ItemId lp;
Page page; Page page;
Buffer buffer;
Buffer vmbuffer = InvalidBuffer; Buffer vmbuffer = InvalidBuffer;
BlockNumber block; BlockNumber block;
TransactionId xid, TransactionId xid,
@ -4154,8 +4239,24 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
bool skip_tuple_lock = false; bool skip_tuple_lock = false;
bool have_tuple_lock = false; bool have_tuple_lock = false;
bool cleared_all_frozen = false; bool cleared_all_frozen = false;
BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
HeapTuple tuple = &bslot->base.tupdata;
*buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid)); Assert(TTS_IS_BUFFERTUPLE(slot));
/* Take advantage if slot already contains the relevant tuple */
if (!TTS_EMPTY(slot) &&
slot->tts_tableOid == relation->rd_id &&
ItemPointerCompare(&slot->tts_tid, tid) == 0 &&
BufferIsValid(bslot->buffer))
{
buffer = bslot->buffer;
IncrBufferRefCount(buffer);
}
else
{
buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
}
block = ItemPointerGetBlockNumber(tid); block = ItemPointerGetBlockNumber(tid);
/* /*
@ -4164,21 +4265,22 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
* in the middle of changing this, so we'll need to recheck after we have * in the middle of changing this, so we'll need to recheck after we have
* the lock. * the lock.
*/ */
if (PageIsAllVisible(BufferGetPage(*buffer))) if (PageIsAllVisible(BufferGetPage(buffer)))
visibilitymap_pin(relation, block, &vmbuffer); visibilitymap_pin(relation, block, &vmbuffer);
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
page = BufferGetPage(*buffer); page = BufferGetPage(buffer);
lp = PageGetItemId(page, ItemPointerGetOffsetNumber(tid)); lp = PageGetItemId(page, ItemPointerGetOffsetNumber(tid));
Assert(ItemIdIsNormal(lp)); Assert(ItemIdIsNormal(lp));
tuple->t_self = *tid;
tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp); tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
tuple->t_len = ItemIdGetLength(lp); tuple->t_len = ItemIdGetLength(lp);
tuple->t_tableOid = RelationGetRelid(relation); tuple->t_tableOid = RelationGetRelid(relation);
l3: l3:
result = HeapTupleSatisfiesUpdate(tuple, cid, *buffer); result = HeapTupleSatisfiesUpdate(tuple, cid, buffer);
if (result == TM_Invisible) if (result == TM_Invisible)
{ {
@ -4207,7 +4309,7 @@ l3:
infomask2 = tuple->t_data->t_infomask2; infomask2 = tuple->t_data->t_infomask2;
ItemPointerCopy(&tuple->t_data->t_ctid, &t_ctid); ItemPointerCopy(&tuple->t_data->t_ctid, &t_ctid);
LockBuffer(*buffer, BUFFER_LOCK_UNLOCK); LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
/* /*
* If any subtransaction of the current top transaction already holds * If any subtransaction of the current top transaction already holds
@ -4359,12 +4461,12 @@ l3:
{ {
result = res; result = res;
/* recovery code expects to have buffer lock held */ /* recovery code expects to have buffer lock held */
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
goto failed; goto failed;
} }
} }
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
/* /*
* Make sure it's still an appropriate lock, else start over. * Make sure it's still an appropriate lock, else start over.
@ -4399,7 +4501,7 @@ l3:
if (HEAP_XMAX_IS_LOCKED_ONLY(infomask) && if (HEAP_XMAX_IS_LOCKED_ONLY(infomask) &&
!HEAP_XMAX_IS_EXCL_LOCKED(infomask)) !HEAP_XMAX_IS_EXCL_LOCKED(infomask))
{ {
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
/* /*
* Make sure it's still an appropriate lock, else start over. * Make sure it's still an appropriate lock, else start over.
@ -4427,7 +4529,7 @@ l3:
* No conflict, but if the xmax changed under us in the * No conflict, but if the xmax changed under us in the
* meantime, start over. * meantime, start over.
*/ */
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) || if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
!TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple->t_data), !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple->t_data),
xwait)) xwait))
@ -4439,7 +4541,7 @@ l3:
} }
else if (HEAP_XMAX_IS_KEYSHR_LOCKED(infomask)) else if (HEAP_XMAX_IS_KEYSHR_LOCKED(infomask))
{ {
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
/* if the xmax changed in the meantime, start over */ /* if the xmax changed in the meantime, start over */
if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) || if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
@ -4467,7 +4569,7 @@ l3:
TransactionIdIsCurrentTransactionId(xwait)) TransactionIdIsCurrentTransactionId(xwait))
{ {
/* ... but if the xmax changed in the meantime, start over */ /* ... but if the xmax changed in the meantime, start over */
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) || if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
!TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple->t_data), !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple->t_data),
xwait)) xwait))
@ -4489,7 +4591,7 @@ l3:
*/ */
if (require_sleep && (result == TM_Updated || result == TM_Deleted)) if (require_sleep && (result == TM_Updated || result == TM_Deleted))
{ {
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
goto failed; goto failed;
} }
else if (require_sleep) else if (require_sleep)
@ -4514,7 +4616,7 @@ l3:
*/ */
result = TM_WouldBlock; result = TM_WouldBlock;
/* recovery code expects to have buffer lock held */ /* recovery code expects to have buffer lock held */
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
goto failed; goto failed;
} }
@ -4540,7 +4642,7 @@ l3:
{ {
result = TM_WouldBlock; result = TM_WouldBlock;
/* recovery code expects to have buffer lock held */ /* recovery code expects to have buffer lock held */
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
goto failed; goto failed;
} }
break; break;
@ -4580,7 +4682,7 @@ l3:
{ {
result = TM_WouldBlock; result = TM_WouldBlock;
/* recovery code expects to have buffer lock held */ /* recovery code expects to have buffer lock held */
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
goto failed; goto failed;
} }
break; break;
@ -4606,12 +4708,12 @@ l3:
{ {
result = res; result = res;
/* recovery code expects to have buffer lock held */ /* recovery code expects to have buffer lock held */
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
goto failed; goto failed;
} }
} }
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
/* /*
* xwait is done, but if xwait had just locked the tuple then some * xwait is done, but if xwait had just locked the tuple then some
@ -4633,7 +4735,7 @@ l3:
* don't check for this in the multixact case, because some * don't check for this in the multixact case, because some
* locker transactions might still be running. * locker transactions might still be running.
*/ */
UpdateXmaxHintBits(tuple->t_data, *buffer, xwait); UpdateXmaxHintBits(tuple->t_data, buffer, xwait);
} }
} }
@ -4692,9 +4794,9 @@ failed:
*/ */
if (vmbuffer == InvalidBuffer && PageIsAllVisible(page)) if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
{ {
LockBuffer(*buffer, BUFFER_LOCK_UNLOCK); LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
visibilitymap_pin(relation, block, &vmbuffer); visibilitymap_pin(relation, block, &vmbuffer);
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
goto l3; goto l3;
} }
@ -4757,7 +4859,7 @@ failed:
cleared_all_frozen = true; cleared_all_frozen = true;
MarkBufferDirty(*buffer); MarkBufferDirty(buffer);
/* /*
* XLOG stuff. You might think that we don't need an XLOG record because * XLOG stuff. You might think that we don't need an XLOG record because
@ -4777,7 +4879,7 @@ failed:
XLogRecPtr recptr; XLogRecPtr recptr;
XLogBeginInsert(); XLogBeginInsert();
XLogRegisterBuffer(0, *buffer, REGBUF_STANDARD); XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self); xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
xlrec.xmax = xid; xlrec.xmax = xid;
@ -4798,7 +4900,7 @@ failed:
result = TM_Ok; result = TM_Ok;
out_locked: out_locked:
LockBuffer(*buffer, BUFFER_LOCK_UNLOCK); LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
out_unlocked: out_unlocked:
if (BufferIsValid(vmbuffer)) if (BufferIsValid(vmbuffer))
@ -4816,6 +4918,9 @@ out_unlocked:
if (have_tuple_lock) if (have_tuple_lock)
UnlockTupleTuplock(relation, tid, mode); UnlockTupleTuplock(relation, tid, mode);
/* Put the target tuple to the slot */
ExecStorePinnedBufferHeapTuple(tuple, slot, buffer);
return result; return result;
} }

View File

@ -45,6 +45,12 @@
#include "utils/builtins.h" #include "utils/builtins.h"
#include "utils/rel.h" #include "utils/rel.h"
static TM_Result heapam_tuple_lock(Relation relation, ItemPointer tid,
Snapshot snapshot, TupleTableSlot *slot,
CommandId cid, LockTupleMode mode,
LockWaitPolicy wait_policy, uint8 flags,
TM_FailureData *tmfd);
static void reform_and_rewrite_tuple(HeapTuple tuple, static void reform_and_rewrite_tuple(HeapTuple tuple,
Relation OldHeap, Relation NewHeap, Relation OldHeap, Relation NewHeap,
Datum *values, bool *isnull, RewriteState rwstate); Datum *values, bool *isnull, RewriteState rwstate);
@ -300,23 +306,55 @@ heapam_tuple_complete_speculative(Relation relation, TupleTableSlot *slot,
static TM_Result static TM_Result
heapam_tuple_delete(Relation relation, ItemPointer tid, CommandId cid, heapam_tuple_delete(Relation relation, ItemPointer tid, CommandId cid,
Snapshot snapshot, Snapshot crosscheck, bool wait, Snapshot snapshot, Snapshot crosscheck, int options,
TM_FailureData *tmfd, bool changingPart) TM_FailureData *tmfd, bool changingPart,
TupleTableSlot *oldSlot)
{ {
TM_Result result;
/* /*
* Currently Deleting of index tuples are handled at vacuum, in case if * Currently Deleting of index tuples are handled at vacuum, in case if
* the storage itself is cleaning the dead tuples by itself, it is the * the storage itself is cleaning the dead tuples by itself, it is the
* time to call the index tuple deletion also. * time to call the index tuple deletion also.
*/ */
return heap_delete(relation, tid, cid, crosscheck, wait, tmfd, changingPart); result = heap_delete(relation, tid, cid, crosscheck, options,
tmfd, changingPart, oldSlot);
/*
* If the tuple has been concurrently updated, then get the lock on it.
* (Do only if caller asked for this by setting the
* TABLE_MODIFY_LOCK_UPDATED option) With the lock held retry of the
* delete should succeed even if there are more concurrent update
* attempts.
*/
if (result == TM_Updated && (options & TABLE_MODIFY_LOCK_UPDATED))
{
/*
* heapam_tuple_lock() will take advantage of tuple loaded into
* oldSlot by heap_delete().
*/
result = heapam_tuple_lock(relation, tid, snapshot,
oldSlot, cid, LockTupleExclusive,
(options & TABLE_MODIFY_WAIT) ?
LockWaitBlock :
LockWaitSkip,
TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
tmfd);
if (result == TM_Ok)
return TM_Updated;
}
return result;
} }
static TM_Result static TM_Result
heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot, heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot,
CommandId cid, Snapshot snapshot, Snapshot crosscheck, CommandId cid, Snapshot snapshot, Snapshot crosscheck,
bool wait, TM_FailureData *tmfd, int options, TM_FailureData *tmfd,
LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes) LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes,
TupleTableSlot *oldSlot)
{ {
bool shouldFree = true; bool shouldFree = true;
HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
@ -326,8 +364,8 @@ heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot,
slot->tts_tableOid = RelationGetRelid(relation); slot->tts_tableOid = RelationGetRelid(relation);
tuple->t_tableOid = slot->tts_tableOid; tuple->t_tableOid = slot->tts_tableOid;
result = heap_update(relation, otid, tuple, cid, crosscheck, wait, result = heap_update(relation, otid, tuple, cid, crosscheck, options,
tmfd, lockmode, update_indexes); tmfd, lockmode, update_indexes, oldSlot);
ItemPointerCopy(&tuple->t_self, &slot->tts_tid); ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
/* /*
@ -354,6 +392,31 @@ heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot,
if (shouldFree) if (shouldFree)
pfree(tuple); pfree(tuple);
/*
* If the tuple has been concurrently updated, then get the lock on it.
* (Do only if caller asked for this by setting the
* TABLE_MODIFY_LOCK_UPDATED option) With the lock held retry of the
* update should succeed even if there are more concurrent update
* attempts.
*/
if (result == TM_Updated && (options & TABLE_MODIFY_LOCK_UPDATED))
{
/*
* heapam_tuple_lock() will take advantage of tuple loaded into
* oldSlot by heap_update().
*/
result = heapam_tuple_lock(relation, otid, snapshot,
oldSlot, cid, *lockmode,
(options & TABLE_MODIFY_WAIT) ?
LockWaitBlock :
LockWaitSkip,
TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
tmfd);
if (result == TM_Ok)
return TM_Updated;
}
return result; return result;
} }
@ -365,7 +428,6 @@ heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot,
{ {
BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot; BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
TM_Result result; TM_Result result;
Buffer buffer;
HeapTuple tuple = &bslot->base.tupdata; HeapTuple tuple = &bslot->base.tupdata;
bool follow_updates; bool follow_updates;
@ -375,9 +437,8 @@ heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot,
Assert(TTS_IS_BUFFERTUPLE(slot)); Assert(TTS_IS_BUFFERTUPLE(slot));
tuple_lock_retry: tuple_lock_retry:
tuple->t_self = *tid; result = heap_lock_tuple(relation, tid, slot, cid, mode, wait_policy,
result = heap_lock_tuple(relation, tuple, cid, mode, wait_policy, follow_updates, tmfd);
follow_updates, &buffer, tmfd);
if (result == TM_Updated && if (result == TM_Updated &&
(flags & TUPLE_LOCK_FLAG_FIND_LAST_VERSION)) (flags & TUPLE_LOCK_FLAG_FIND_LAST_VERSION))
@ -385,8 +446,6 @@ tuple_lock_retry:
/* Should not encounter speculative tuple on recheck */ /* Should not encounter speculative tuple on recheck */
Assert(!HeapTupleHeaderIsSpeculative(tuple->t_data)); Assert(!HeapTupleHeaderIsSpeculative(tuple->t_data));
ReleaseBuffer(buffer);
if (!ItemPointerEquals(&tmfd->ctid, &tuple->t_self)) if (!ItemPointerEquals(&tmfd->ctid, &tuple->t_self))
{ {
SnapshotData SnapshotDirty; SnapshotData SnapshotDirty;
@ -408,6 +467,8 @@ tuple_lock_retry:
InitDirtySnapshot(SnapshotDirty); InitDirtySnapshot(SnapshotDirty);
for (;;) for (;;)
{ {
Buffer buffer = InvalidBuffer;
if (ItemPointerIndicatesMovedPartitions(tid)) if (ItemPointerIndicatesMovedPartitions(tid))
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
@ -502,7 +563,7 @@ tuple_lock_retry:
/* /*
* This is a live tuple, so try to lock it again. * This is a live tuple, so try to lock it again.
*/ */
ReleaseBuffer(buffer); ExecStorePinnedBufferHeapTuple(tuple, slot, buffer);
goto tuple_lock_retry; goto tuple_lock_retry;
} }
@ -513,7 +574,7 @@ tuple_lock_retry:
*/ */
if (tuple->t_data == NULL) if (tuple->t_data == NULL)
{ {
Assert(!BufferIsValid(buffer)); ReleaseBuffer(buffer);
return TM_Deleted; return TM_Deleted;
} }
@ -566,9 +627,6 @@ tuple_lock_retry:
slot->tts_tableOid = RelationGetRelid(relation); slot->tts_tableOid = RelationGetRelid(relation);
tuple->t_tableOid = slot->tts_tableOid; tuple->t_tableOid = slot->tts_tableOid;
/* store in slot, transferring existing pin */
ExecStorePinnedBufferHeapTuple(tuple, slot, buffer);
return result; return result;
} }

View File

@ -287,16 +287,23 @@ simple_table_tuple_insert(Relation rel, TupleTableSlot *slot)
* via ereport(). * via ereport().
*/ */
void void
simple_table_tuple_delete(Relation rel, ItemPointer tid, Snapshot snapshot) simple_table_tuple_delete(Relation rel, ItemPointer tid, Snapshot snapshot,
TupleTableSlot *oldSlot)
{ {
TM_Result result; TM_Result result;
TM_FailureData tmfd; TM_FailureData tmfd;
int options = TABLE_MODIFY_WAIT; /* wait for commit */
/* Fetch old tuple if the relevant slot is provided */
if (oldSlot)
options |= TABLE_MODIFY_FETCH_OLD_TUPLE;
result = table_tuple_delete(rel, tid, result = table_tuple_delete(rel, tid,
GetCurrentCommandId(true), GetCurrentCommandId(true),
snapshot, InvalidSnapshot, snapshot, InvalidSnapshot,
true /* wait for commit */ , options,
&tmfd, false /* changingPart */ ); &tmfd, false /* changingPart */ ,
oldSlot);
switch (result) switch (result)
{ {
@ -335,17 +342,24 @@ void
simple_table_tuple_update(Relation rel, ItemPointer otid, simple_table_tuple_update(Relation rel, ItemPointer otid,
TupleTableSlot *slot, TupleTableSlot *slot,
Snapshot snapshot, Snapshot snapshot,
TU_UpdateIndexes *update_indexes) TU_UpdateIndexes *update_indexes,
TupleTableSlot *oldSlot)
{ {
TM_Result result; TM_Result result;
TM_FailureData tmfd; TM_FailureData tmfd;
LockTupleMode lockmode; LockTupleMode lockmode;
int options = TABLE_MODIFY_WAIT; /* wait for commit */
/* Fetch old tuple if the relevant slot is provided */
if (oldSlot)
options |= TABLE_MODIFY_FETCH_OLD_TUPLE;
result = table_tuple_update(rel, otid, slot, result = table_tuple_update(rel, otid, slot,
GetCurrentCommandId(true), GetCurrentCommandId(true),
snapshot, InvalidSnapshot, snapshot, InvalidSnapshot,
true /* wait for commit */ , options,
&tmfd, &lockmode, update_indexes); &tmfd, &lockmode, update_indexes,
oldSlot);
switch (result) switch (result)
{ {

View File

@ -2773,8 +2773,8 @@ ExecBRDeleteTriggers(EState *estate, EPQState *epqstate,
void void
ExecARDeleteTriggers(EState *estate, ExecARDeleteTriggers(EState *estate,
ResultRelInfo *relinfo, ResultRelInfo *relinfo,
ItemPointer tupleid,
HeapTuple fdw_trigtuple, HeapTuple fdw_trigtuple,
TupleTableSlot *slot,
TransitionCaptureState *transition_capture, TransitionCaptureState *transition_capture,
bool is_crosspart_update) bool is_crosspart_update)
{ {
@ -2783,20 +2783,11 @@ ExecARDeleteTriggers(EState *estate,
if ((trigdesc && trigdesc->trig_delete_after_row) || if ((trigdesc && trigdesc->trig_delete_after_row) ||
(transition_capture && transition_capture->tcs_delete_old_table)) (transition_capture && transition_capture->tcs_delete_old_table))
{ {
TupleTableSlot *slot = ExecGetTriggerOldSlot(estate, relinfo); /*
* Put the FDW old tuple to the slot. Otherwise, caller is expected
Assert(HeapTupleIsValid(fdw_trigtuple) ^ ItemPointerIsValid(tupleid)); * to have old tuple alredy fetched to the slot.
if (fdw_trigtuple == NULL) */
GetTupleForTrigger(estate, if (fdw_trigtuple != NULL)
NULL,
relinfo,
tupleid,
LockTupleExclusive,
slot,
NULL,
NULL,
NULL);
else
ExecForceStoreHeapTuple(fdw_trigtuple, slot, false); ExecForceStoreHeapTuple(fdw_trigtuple, slot, false);
AfterTriggerSaveEvent(estate, relinfo, NULL, NULL, AfterTriggerSaveEvent(estate, relinfo, NULL, NULL,
@ -3087,18 +3078,17 @@ ExecBRUpdateTriggers(EState *estate, EPQState *epqstate,
* Note: 'src_partinfo' and 'dst_partinfo', when non-NULL, refer to the source * Note: 'src_partinfo' and 'dst_partinfo', when non-NULL, refer to the source
* and destination partitions, respectively, of a cross-partition update of * and destination partitions, respectively, of a cross-partition update of
* the root partitioned table mentioned in the query, given by 'relinfo'. * the root partitioned table mentioned in the query, given by 'relinfo'.
* 'tupleid' in that case refers to the ctid of the "old" tuple in the source * 'oldslot' contains the "old" tuple in the source partition, and 'newslot'
* partition, and 'newslot' contains the "new" tuple in the destination * contains the "new" tuple in the destination partition. This interface
* partition. This interface allows to support the requirements of * allows to support the requirements of ExecCrossPartitionUpdateForeignKey();
* ExecCrossPartitionUpdateForeignKey(); is_crosspart_update must be true in * is_crosspart_update must be true in that case.
* that case.
*/ */
void void
ExecARUpdateTriggers(EState *estate, ResultRelInfo *relinfo, ExecARUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
ResultRelInfo *src_partinfo, ResultRelInfo *src_partinfo,
ResultRelInfo *dst_partinfo, ResultRelInfo *dst_partinfo,
ItemPointer tupleid,
HeapTuple fdw_trigtuple, HeapTuple fdw_trigtuple,
TupleTableSlot *oldslot,
TupleTableSlot *newslot, TupleTableSlot *newslot,
List *recheckIndexes, List *recheckIndexes,
TransitionCaptureState *transition_capture, TransitionCaptureState *transition_capture,
@ -3117,29 +3107,14 @@ ExecARUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
* separately for DELETE and INSERT to capture transition table rows. * separately for DELETE and INSERT to capture transition table rows.
* In such case, either old tuple or new tuple can be NULL. * In such case, either old tuple or new tuple can be NULL.
*/ */
TupleTableSlot *oldslot;
ResultRelInfo *tupsrc;
Assert((src_partinfo != NULL && dst_partinfo != NULL) || Assert((src_partinfo != NULL && dst_partinfo != NULL) ||
!is_crosspart_update); !is_crosspart_update);
tupsrc = src_partinfo ? src_partinfo : relinfo; if (fdw_trigtuple != NULL)
oldslot = ExecGetTriggerOldSlot(estate, tupsrc); {
Assert(oldslot);
if (fdw_trigtuple == NULL && ItemPointerIsValid(tupleid))
GetTupleForTrigger(estate,
NULL,
tupsrc,
tupleid,
LockTupleExclusive,
oldslot,
NULL,
NULL,
NULL);
else if (fdw_trigtuple != NULL)
ExecForceStoreHeapTuple(fdw_trigtuple, oldslot, false); ExecForceStoreHeapTuple(fdw_trigtuple, oldslot, false);
else }
ExecClearTuple(oldslot);
AfterTriggerSaveEvent(estate, relinfo, AfterTriggerSaveEvent(estate, relinfo,
src_partinfo, dst_partinfo, src_partinfo, dst_partinfo,

View File

@ -577,6 +577,7 @@ ExecSimpleRelationUpdate(ResultRelInfo *resultRelInfo,
{ {
List *recheckIndexes = NIL; List *recheckIndexes = NIL;
TU_UpdateIndexes update_indexes; TU_UpdateIndexes update_indexes;
TupleTableSlot *oldSlot = NULL;
/* Compute stored generated columns */ /* Compute stored generated columns */
if (rel->rd_att->constr && if (rel->rd_att->constr &&
@ -590,8 +591,12 @@ ExecSimpleRelationUpdate(ResultRelInfo *resultRelInfo,
if (rel->rd_rel->relispartition) if (rel->rd_rel->relispartition)
ExecPartitionCheck(resultRelInfo, slot, estate, true); ExecPartitionCheck(resultRelInfo, slot, estate, true);
if (resultRelInfo->ri_TrigDesc &&
resultRelInfo->ri_TrigDesc->trig_update_after_row)
oldSlot = ExecGetTriggerOldSlot(estate, resultRelInfo);
simple_table_tuple_update(rel, tid, slot, estate->es_snapshot, simple_table_tuple_update(rel, tid, slot, estate->es_snapshot,
&update_indexes); &update_indexes, oldSlot);
if (resultRelInfo->ri_NumIndices > 0 && (update_indexes != TU_None)) if (resultRelInfo->ri_NumIndices > 0 && (update_indexes != TU_None))
recheckIndexes = ExecInsertIndexTuples(resultRelInfo, recheckIndexes = ExecInsertIndexTuples(resultRelInfo,
@ -602,7 +607,7 @@ ExecSimpleRelationUpdate(ResultRelInfo *resultRelInfo,
/* AFTER ROW UPDATE Triggers */ /* AFTER ROW UPDATE Triggers */
ExecARUpdateTriggers(estate, resultRelInfo, ExecARUpdateTriggers(estate, resultRelInfo,
NULL, NULL, NULL, NULL,
tid, NULL, slot, NULL, oldSlot, slot,
recheckIndexes, NULL, false); recheckIndexes, NULL, false);
list_free(recheckIndexes); list_free(recheckIndexes);
@ -636,12 +641,18 @@ ExecSimpleRelationDelete(ResultRelInfo *resultRelInfo,
if (!skip_tuple) if (!skip_tuple)
{ {
TupleTableSlot *oldSlot = NULL;
if (resultRelInfo->ri_TrigDesc &&
resultRelInfo->ri_TrigDesc->trig_delete_after_row)
oldSlot = ExecGetTriggerOldSlot(estate, resultRelInfo);
/* OK, delete the tuple */ /* OK, delete the tuple */
simple_table_tuple_delete(rel, tid, estate->es_snapshot); simple_table_tuple_delete(rel, tid, estate->es_snapshot, oldSlot);
/* AFTER ROW DELETE Triggers */ /* AFTER ROW DELETE Triggers */
ExecARDeleteTriggers(estate, resultRelInfo, ExecARDeleteTriggers(estate, resultRelInfo,
tid, NULL, NULL, false); NULL, oldSlot, NULL, false);
} }
} }

View File

@ -566,6 +566,15 @@ ExecInitInsertProjection(ModifyTableState *mtstate,
table_slot_create(resultRelInfo->ri_RelationDesc, table_slot_create(resultRelInfo->ri_RelationDesc,
&estate->es_tupleTable); &estate->es_tupleTable);
/*
* In the ON CONFLICT UPDATE case, we will also need a slot for the old
* tuple to calculate the updated tuple on its base.
*/
if (node->onConflictAction == ONCONFLICT_UPDATE)
resultRelInfo->ri_oldTupleSlot =
table_slot_create(resultRelInfo->ri_RelationDesc,
&estate->es_tupleTable);
/* Build ProjectionInfo if needed (it probably isn't). */ /* Build ProjectionInfo if needed (it probably isn't). */
if (need_projection) if (need_projection)
{ {
@ -1154,7 +1163,7 @@ ExecInsert(ModifyTableContext *context,
ExecARUpdateTriggers(estate, resultRelInfo, ExecARUpdateTriggers(estate, resultRelInfo,
NULL, NULL, NULL, NULL,
NULL, NULL,
NULL, resultRelInfo->ri_oldTupleSlot,
slot, slot,
NULL, NULL,
mtstate->mt_transition_capture, mtstate->mt_transition_capture,
@ -1334,7 +1343,8 @@ ExecDeletePrologue(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
*/ */
static TM_Result static TM_Result
ExecDeleteAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo, ExecDeleteAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
ItemPointer tupleid, bool changingPart) ItemPointer tupleid, bool changingPart, int options,
TupleTableSlot *oldSlot)
{ {
EState *estate = context->estate; EState *estate = context->estate;
@ -1342,9 +1352,10 @@ ExecDeleteAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
estate->es_output_cid, estate->es_output_cid,
estate->es_snapshot, estate->es_snapshot,
estate->es_crosscheck_snapshot, estate->es_crosscheck_snapshot,
true /* wait for commit */ , options,
&context->tmfd, &context->tmfd,
changingPart); changingPart,
oldSlot);
} }
/* /*
@ -1353,10 +1364,15 @@ ExecDeleteAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
* Closing steps of tuple deletion; this invokes AFTER FOR EACH ROW triggers, * Closing steps of tuple deletion; this invokes AFTER FOR EACH ROW triggers,
* including the UPDATE triggers if the deletion is being done as part of a * including the UPDATE triggers if the deletion is being done as part of a
* cross-partition tuple move. * cross-partition tuple move.
*
* The old tuple is already fetched into slot for regular tables. For FDW,
* the old tuple is given as 'oldtuple' and is to be stored in 'slot' when
* needed.
*/ */
static void static void
ExecDeleteEpilogue(ModifyTableContext *context, ResultRelInfo *resultRelInfo, ExecDeleteEpilogue(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
ItemPointer tupleid, HeapTuple oldtuple, bool changingPart) ItemPointer tupleid, HeapTuple oldtuple,
TupleTableSlot *slot, bool changingPart)
{ {
ModifyTableState *mtstate = context->mtstate; ModifyTableState *mtstate = context->mtstate;
EState *estate = context->estate; EState *estate = context->estate;
@ -1374,8 +1390,8 @@ ExecDeleteEpilogue(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
{ {
ExecARUpdateTriggers(estate, resultRelInfo, ExecARUpdateTriggers(estate, resultRelInfo,
NULL, NULL, NULL, NULL,
tupleid, oldtuple, oldtuple,
NULL, NULL, mtstate->mt_transition_capture, slot, NULL, NULL, mtstate->mt_transition_capture,
false); false);
/* /*
@ -1386,10 +1402,30 @@ ExecDeleteEpilogue(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
} }
/* AFTER ROW DELETE Triggers */ /* AFTER ROW DELETE Triggers */
ExecARDeleteTriggers(estate, resultRelInfo, tupleid, oldtuple, ExecARDeleteTriggers(estate, resultRelInfo, oldtuple, slot,
ar_delete_trig_tcs, changingPart); ar_delete_trig_tcs, changingPart);
} }
/*
* Initializes the tuple slot in a ResultRelInfo for DELETE action.
*
* We mark 'projectNewInfoValid' even though the projections themselves
* are not initialized here.
*/
static void
ExecInitDeleteTupleSlot(ModifyTableState *mtstate,
ResultRelInfo *resultRelInfo)
{
EState *estate = mtstate->ps.state;
Assert(!resultRelInfo->ri_projectNewInfoValid);
resultRelInfo->ri_oldTupleSlot =
table_slot_create(resultRelInfo->ri_RelationDesc,
&estate->es_tupleTable);
resultRelInfo->ri_projectNewInfoValid = true;
}
/* ---------------------------------------------------------------- /* ----------------------------------------------------------------
* ExecDelete * ExecDelete
* *
@ -1409,7 +1445,8 @@ ExecDeleteEpilogue(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
* part of an UPDATE of partition-key, then the slot returned by * part of an UPDATE of partition-key, then the slot returned by
* EvalPlanQual() is passed back using output parameter epqreturnslot. * EvalPlanQual() is passed back using output parameter epqreturnslot.
* *
* Returns RETURNING result if any, otherwise NULL. * Returns RETURNING result if any, otherwise NULL. The deleted tuple
* to be stored into oldslot independently that.
* ---------------------------------------------------------------- * ----------------------------------------------------------------
*/ */
static TupleTableSlot * static TupleTableSlot *
@ -1417,6 +1454,7 @@ ExecDelete(ModifyTableContext *context,
ResultRelInfo *resultRelInfo, ResultRelInfo *resultRelInfo,
ItemPointer tupleid, ItemPointer tupleid,
HeapTuple oldtuple, HeapTuple oldtuple,
TupleTableSlot *oldslot,
bool processReturning, bool processReturning,
bool changingPart, bool changingPart,
bool canSetTag, bool canSetTag,
@ -1480,6 +1518,15 @@ ExecDelete(ModifyTableContext *context,
} }
else else
{ {
int options = TABLE_MODIFY_WAIT | TABLE_MODIFY_FETCH_OLD_TUPLE;
/*
* Specify that we need to lock and fetch the last tuple version for
* EPQ on appropriate transaction isolation levels.
*/
if (!IsolationUsesXactSnapshot())
options |= TABLE_MODIFY_LOCK_UPDATED;
/* /*
* delete the tuple * delete the tuple
* *
@ -1490,7 +1537,8 @@ ExecDelete(ModifyTableContext *context,
* transaction-snapshot mode transactions. * transaction-snapshot mode transactions.
*/ */
ldelete: ldelete:
result = ExecDeleteAct(context, resultRelInfo, tupleid, changingPart); result = ExecDeleteAct(context, resultRelInfo, tupleid, changingPart,
options, oldslot);
if (tmresult) if (tmresult)
*tmresult = result; *tmresult = result;
@ -1537,7 +1585,6 @@ ldelete:
case TM_Updated: case TM_Updated:
{ {
TupleTableSlot *inputslot;
TupleTableSlot *epqslot; TupleTableSlot *epqslot;
if (IsolationUsesXactSnapshot()) if (IsolationUsesXactSnapshot())
@ -1546,87 +1593,29 @@ ldelete:
errmsg("could not serialize access due to concurrent update"))); errmsg("could not serialize access due to concurrent update")));
/* /*
* Already know that we're going to need to do EPQ, so * We need to do EPQ. The latest tuple is already found
* fetch tuple directly into the right slot. * and locked as a result of TABLE_MODIFY_LOCK_UPDATED.
*/ */
EvalPlanQualBegin(context->epqstate); Assert(context->tmfd.traversed);
inputslot = EvalPlanQualSlot(context->epqstate, resultRelationDesc, epqslot = EvalPlanQual(context->epqstate,
resultRelInfo->ri_RangeTableIndex); resultRelationDesc,
resultRelInfo->ri_RangeTableIndex,
oldslot);
if (TupIsNull(epqslot))
/* Tuple not passing quals anymore, exiting... */
return NULL;
result = table_tuple_lock(resultRelationDesc, tupleid, /*
estate->es_snapshot, * If requested, skip delete and pass back the updated
inputslot, estate->es_output_cid, * row.
LockTupleExclusive, LockWaitBlock, */
TUPLE_LOCK_FLAG_FIND_LAST_VERSION, if (epqreturnslot)
&context->tmfd);
switch (result)
{ {
case TM_Ok: *epqreturnslot = epqslot;
Assert(context->tmfd.traversed); return NULL;
epqslot = EvalPlanQual(context->epqstate,
resultRelationDesc,
resultRelInfo->ri_RangeTableIndex,
inputslot);
if (TupIsNull(epqslot))
/* Tuple not passing quals anymore, exiting... */
return NULL;
/*
* If requested, skip delete and pass back the
* updated row.
*/
if (epqreturnslot)
{
*epqreturnslot = epqslot;
return NULL;
}
else
goto ldelete;
case TM_SelfModified:
/*
* This can be reached when following an update
* chain from a tuple updated by another session,
* reaching a tuple that was already updated in
* this transaction. If previously updated by this
* command, ignore the delete, otherwise error
* out.
*
* See also TM_SelfModified response to
* table_tuple_delete() above.
*/
if (context->tmfd.cmax != estate->es_output_cid)
ereport(ERROR,
(errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
errmsg("tuple to be deleted was already modified by an operation triggered by the current command"),
errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
return NULL;
case TM_Deleted:
/* tuple already deleted; nothing to do */
return NULL;
default:
/*
* TM_Invisible should be impossible because we're
* waiting for updated row versions, and would
* already have errored out if the first version
* is invisible.
*
* TM_Updated should be impossible, because we're
* locking the latest version via
* TUPLE_LOCK_FLAG_FIND_LAST_VERSION.
*/
elog(ERROR, "unexpected table_tuple_lock status: %u",
result);
return NULL;
} }
else
Assert(false); goto ldelete;
break;
} }
case TM_Deleted: case TM_Deleted:
@ -1660,7 +1649,8 @@ ldelete:
if (tupleDeleted) if (tupleDeleted)
*tupleDeleted = true; *tupleDeleted = true;
ExecDeleteEpilogue(context, resultRelInfo, tupleid, oldtuple, changingPart); ExecDeleteEpilogue(context, resultRelInfo, tupleid, oldtuple,
oldslot, changingPart);
/* Process RETURNING if present and if requested */ /* Process RETURNING if present and if requested */
if (processReturning && resultRelInfo->ri_projectReturning) if (processReturning && resultRelInfo->ri_projectReturning)
@ -1678,17 +1668,13 @@ ldelete:
} }
else else
{ {
/* Copy old tuple to the returning slot */
slot = ExecGetReturningSlot(estate, resultRelInfo); slot = ExecGetReturningSlot(estate, resultRelInfo);
if (oldtuple != NULL) if (oldtuple != NULL)
{
ExecForceStoreHeapTuple(oldtuple, slot, false); ExecForceStoreHeapTuple(oldtuple, slot, false);
}
else else
{ ExecCopySlot(slot, oldslot);
if (!table_tuple_fetch_row_version(resultRelationDesc, tupleid, Assert(!TupIsNull(slot));
SnapshotAny, slot))
elog(ERROR, "failed to fetch deleted tuple for DELETE RETURNING");
}
} }
rslot = ExecProcessReturning(resultRelInfo, slot, context->planSlot); rslot = ExecProcessReturning(resultRelInfo, slot, context->planSlot);
@ -1788,12 +1774,19 @@ ExecCrossPartitionUpdate(ModifyTableContext *context,
MemoryContextSwitchTo(oldcxt); MemoryContextSwitchTo(oldcxt);
} }
/*
* Make sure ri_oldTupleSlot is initialized. The old tuple is to be saved
* there by ExecDelete() to save effort on further re-fetching.
*/
if (unlikely(!resultRelInfo->ri_projectNewInfoValid))
ExecInitUpdateProjection(mtstate, resultRelInfo);
/* /*
* Row movement, part 1. Delete the tuple, but skip RETURNING processing. * Row movement, part 1. Delete the tuple, but skip RETURNING processing.
* We want to return rows from INSERT. * We want to return rows from INSERT.
*/ */
ExecDelete(context, resultRelInfo, ExecDelete(context, resultRelInfo,
tupleid, oldtuple, tupleid, oldtuple, resultRelInfo->ri_oldTupleSlot,
false, /* processReturning */ false, /* processReturning */
true, /* changingPart */ true, /* changingPart */
false, /* canSetTag */ false, /* canSetTag */
@ -1834,21 +1827,13 @@ ExecCrossPartitionUpdate(ModifyTableContext *context,
return true; return true;
else else
{ {
/* Fetch the most recent version of old tuple. */ /*
TupleTableSlot *oldSlot; * ExecDelete already fetches the most recent version of old tuple
* to resultRelInfo->ri_oldTupleSlot. So, just project the new
/* ... but first, make sure ri_oldTupleSlot is initialized. */ * tuple to retry the UPDATE with.
if (unlikely(!resultRelInfo->ri_projectNewInfoValid)) */
ExecInitUpdateProjection(mtstate, resultRelInfo);
oldSlot = resultRelInfo->ri_oldTupleSlot;
if (!table_tuple_fetch_row_version(resultRelInfo->ri_RelationDesc,
tupleid,
SnapshotAny,
oldSlot))
elog(ERROR, "failed to fetch tuple being updated");
/* and project the new tuple to retry the UPDATE with */
*retry_slot = ExecGetUpdateNewTuple(resultRelInfo, epqslot, *retry_slot = ExecGetUpdateNewTuple(resultRelInfo, epqslot,
oldSlot); resultRelInfo->ri_oldTupleSlot);
return false; return false;
} }
} }
@ -1967,7 +1952,8 @@ ExecUpdatePrepareSlot(ResultRelInfo *resultRelInfo,
static TM_Result static TM_Result
ExecUpdateAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo, ExecUpdateAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
ItemPointer tupleid, HeapTuple oldtuple, TupleTableSlot *slot, ItemPointer tupleid, HeapTuple oldtuple, TupleTableSlot *slot,
bool canSetTag, UpdateContext *updateCxt) bool canSetTag, int options, TupleTableSlot *oldSlot,
UpdateContext *updateCxt)
{ {
EState *estate = context->estate; EState *estate = context->estate;
Relation resultRelationDesc = resultRelInfo->ri_RelationDesc; Relation resultRelationDesc = resultRelInfo->ri_RelationDesc;
@ -2059,7 +2045,8 @@ lreplace:
ExecCrossPartitionUpdateForeignKey(context, ExecCrossPartitionUpdateForeignKey(context,
resultRelInfo, resultRelInfo,
insert_destrel, insert_destrel,
tupleid, slot, tupleid,
resultRelInfo->ri_oldTupleSlot,
inserted_tuple); inserted_tuple);
return TM_Ok; return TM_Ok;
@ -2102,9 +2089,10 @@ lreplace:
estate->es_output_cid, estate->es_output_cid,
estate->es_snapshot, estate->es_snapshot,
estate->es_crosscheck_snapshot, estate->es_crosscheck_snapshot,
true /* wait for commit */ , options /* wait for commit */ ,
&context->tmfd, &updateCxt->lockmode, &context->tmfd, &updateCxt->lockmode,
&updateCxt->updateIndexes); &updateCxt->updateIndexes,
oldSlot);
return result; return result;
} }
@ -2118,7 +2106,8 @@ lreplace:
static void static void
ExecUpdateEpilogue(ModifyTableContext *context, UpdateContext *updateCxt, ExecUpdateEpilogue(ModifyTableContext *context, UpdateContext *updateCxt,
ResultRelInfo *resultRelInfo, ItemPointer tupleid, ResultRelInfo *resultRelInfo, ItemPointer tupleid,
HeapTuple oldtuple, TupleTableSlot *slot) HeapTuple oldtuple, TupleTableSlot *slot,
TupleTableSlot *oldslot)
{ {
ModifyTableState *mtstate = context->mtstate; ModifyTableState *mtstate = context->mtstate;
List *recheckIndexes = NIL; List *recheckIndexes = NIL;
@ -2134,7 +2123,7 @@ ExecUpdateEpilogue(ModifyTableContext *context, UpdateContext *updateCxt,
/* AFTER ROW UPDATE Triggers */ /* AFTER ROW UPDATE Triggers */
ExecARUpdateTriggers(context->estate, resultRelInfo, ExecARUpdateTriggers(context->estate, resultRelInfo,
NULL, NULL, NULL, NULL,
tupleid, oldtuple, slot, oldtuple, oldslot, slot,
recheckIndexes, recheckIndexes,
mtstate->operation == CMD_INSERT ? mtstate->operation == CMD_INSERT ?
mtstate->mt_oc_transition_capture : mtstate->mt_oc_transition_capture :
@ -2223,7 +2212,7 @@ ExecCrossPartitionUpdateForeignKey(ModifyTableContext *context,
/* Perform the root table's triggers. */ /* Perform the root table's triggers. */
ExecARUpdateTriggers(context->estate, ExecARUpdateTriggers(context->estate,
rootRelInfo, sourcePartInfo, destPartInfo, rootRelInfo, sourcePartInfo, destPartInfo,
tupleid, NULL, newslot, NIL, NULL, true); NULL, oldslot, newslot, NIL, NULL, true);
} }
/* ---------------------------------------------------------------- /* ----------------------------------------------------------------
@ -2246,6 +2235,7 @@ ExecCrossPartitionUpdateForeignKey(ModifyTableContext *context,
* no relevant triggers. * no relevant triggers.
* *
* slot contains the new tuple value to be stored. * slot contains the new tuple value to be stored.
* oldslot is the slot to store the old tuple.
* planSlot is the output of the ModifyTable's subplan; we use it * planSlot is the output of the ModifyTable's subplan; we use it
* to access values from other input tables (for RETURNING), * to access values from other input tables (for RETURNING),
* row-ID junk columns, etc. * row-ID junk columns, etc.
@ -2256,7 +2246,7 @@ ExecCrossPartitionUpdateForeignKey(ModifyTableContext *context,
static TupleTableSlot * static TupleTableSlot *
ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo, ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
ItemPointer tupleid, HeapTuple oldtuple, TupleTableSlot *slot, ItemPointer tupleid, HeapTuple oldtuple, TupleTableSlot *slot,
bool canSetTag) TupleTableSlot *oldslot, bool canSetTag, bool locked)
{ {
EState *estate = context->estate; EState *estate = context->estate;
Relation resultRelationDesc = resultRelInfo->ri_RelationDesc; Relation resultRelationDesc = resultRelInfo->ri_RelationDesc;
@ -2309,6 +2299,16 @@ ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
} }
else else
{ {
int options = TABLE_MODIFY_WAIT | TABLE_MODIFY_FETCH_OLD_TUPLE;
/*
* Specify that we need to lock and fetch the last tuple version for
* EPQ on appropriate transaction isolation levels if the tuple isn't
* locked already.
*/
if (!locked && !IsolationUsesXactSnapshot())
options |= TABLE_MODIFY_LOCK_UPDATED;
/* /*
* If we generate a new candidate tuple after EvalPlanQual testing, we * If we generate a new candidate tuple after EvalPlanQual testing, we
* must loop back here to try again. (We don't need to redo triggers, * must loop back here to try again. (We don't need to redo triggers,
@ -2318,7 +2318,7 @@ ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
*/ */
redo_act: redo_act:
result = ExecUpdateAct(context, resultRelInfo, tupleid, oldtuple, slot, result = ExecUpdateAct(context, resultRelInfo, tupleid, oldtuple, slot,
canSetTag, &updateCxt); canSetTag, options, oldslot, &updateCxt);
/* /*
* If ExecUpdateAct reports that a cross-partition update was done, * If ExecUpdateAct reports that a cross-partition update was done,
@ -2369,88 +2369,32 @@ redo_act:
case TM_Updated: case TM_Updated:
{ {
TupleTableSlot *inputslot;
TupleTableSlot *epqslot; TupleTableSlot *epqslot;
TupleTableSlot *oldSlot;
if (IsolationUsesXactSnapshot()) if (IsolationUsesXactSnapshot())
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("could not serialize access due to concurrent update"))); errmsg("could not serialize access due to concurrent update")));
/* Shouldn't get there if the tuple was previously locked */
Assert(!locked);
/* /*
* Already know that we're going to need to do EPQ, so * We need to do EPQ. The latest tuple is already found
* fetch tuple directly into the right slot. * and locked as a result of TABLE_MODIFY_LOCK_UPDATED.
*/ */
inputslot = EvalPlanQualSlot(context->epqstate, resultRelationDesc, Assert(context->tmfd.traversed);
resultRelInfo->ri_RangeTableIndex); epqslot = EvalPlanQual(context->epqstate,
resultRelationDesc,
result = table_tuple_lock(resultRelationDesc, tupleid, resultRelInfo->ri_RangeTableIndex,
estate->es_snapshot, oldslot);
inputslot, estate->es_output_cid, if (TupIsNull(epqslot))
updateCxt.lockmode, LockWaitBlock, /* Tuple not passing quals anymore, exiting... */
TUPLE_LOCK_FLAG_FIND_LAST_VERSION, return NULL;
&context->tmfd); slot = ExecGetUpdateNewTuple(resultRelInfo,
epqslot,
switch (result) oldslot);
{ goto redo_act;
case TM_Ok:
Assert(context->tmfd.traversed);
epqslot = EvalPlanQual(context->epqstate,
resultRelationDesc,
resultRelInfo->ri_RangeTableIndex,
inputslot);
if (TupIsNull(epqslot))
/* Tuple not passing quals anymore, exiting... */
return NULL;
/* Make sure ri_oldTupleSlot is initialized. */
if (unlikely(!resultRelInfo->ri_projectNewInfoValid))
ExecInitUpdateProjection(context->mtstate,
resultRelInfo);
/* Fetch the most recent version of old tuple. */
oldSlot = resultRelInfo->ri_oldTupleSlot;
if (!table_tuple_fetch_row_version(resultRelationDesc,
tupleid,
SnapshotAny,
oldSlot))
elog(ERROR, "failed to fetch tuple being updated");
slot = ExecGetUpdateNewTuple(resultRelInfo,
epqslot, oldSlot);
goto redo_act;
case TM_Deleted:
/* tuple already deleted; nothing to do */
return NULL;
case TM_SelfModified:
/*
* This can be reached when following an update
* chain from a tuple updated by another session,
* reaching a tuple that was already updated in
* this transaction. If previously modified by
* this command, ignore the redundant update,
* otherwise error out.
*
* See also TM_SelfModified response to
* table_tuple_update() above.
*/
if (context->tmfd.cmax != estate->es_output_cid)
ereport(ERROR,
(errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
errmsg("tuple to be updated was already modified by an operation triggered by the current command"),
errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
return NULL;
default:
/* see table_tuple_lock call in ExecDelete() */
elog(ERROR, "unexpected table_tuple_lock status: %u",
result);
return NULL;
}
} }
break; break;
@ -2474,7 +2418,7 @@ redo_act:
(estate->es_processed)++; (estate->es_processed)++;
ExecUpdateEpilogue(context, &updateCxt, resultRelInfo, tupleid, oldtuple, ExecUpdateEpilogue(context, &updateCxt, resultRelInfo, tupleid, oldtuple,
slot); slot, oldslot);
/* Process RETURNING if present */ /* Process RETURNING if present */
if (resultRelInfo->ri_projectReturning) if (resultRelInfo->ri_projectReturning)
@ -2692,7 +2636,8 @@ ExecOnConflictUpdate(ModifyTableContext *context,
*returning = ExecUpdate(context, resultRelInfo, *returning = ExecUpdate(context, resultRelInfo,
conflictTid, NULL, conflictTid, NULL,
resultRelInfo->ri_onConflict->oc_ProjSlot, resultRelInfo->ri_onConflict->oc_ProjSlot,
canSetTag); existing,
canSetTag, true);
/* /*
* Clear out existing tuple, as there might not be another conflict among * Clear out existing tuple, as there might not be another conflict among
@ -2934,6 +2879,7 @@ lmerge_matched:
{ {
result = ExecUpdateAct(context, resultRelInfo, tupleid, result = ExecUpdateAct(context, resultRelInfo, tupleid,
NULL, newslot, canSetTag, NULL, newslot, canSetTag,
TABLE_MODIFY_WAIT, NULL,
&updateCxt); &updateCxt);
/* /*
@ -2956,7 +2902,8 @@ lmerge_matched:
if (result == TM_Ok) if (result == TM_Ok)
{ {
ExecUpdateEpilogue(context, &updateCxt, resultRelInfo, ExecUpdateEpilogue(context, &updateCxt, resultRelInfo,
tupleid, NULL, newslot); tupleid, NULL, newslot,
resultRelInfo->ri_oldTupleSlot);
mtstate->mt_merge_updated += 1; mtstate->mt_merge_updated += 1;
} }
break; break;
@ -2987,12 +2934,12 @@ lmerge_matched:
} }
else else
result = ExecDeleteAct(context, resultRelInfo, tupleid, result = ExecDeleteAct(context, resultRelInfo, tupleid,
false); false, TABLE_MODIFY_WAIT, NULL);
if (result == TM_Ok) if (result == TM_Ok)
{ {
ExecDeleteEpilogue(context, resultRelInfo, tupleid, NULL, ExecDeleteEpilogue(context, resultRelInfo, tupleid, NULL,
false); resultRelInfo->ri_oldTupleSlot, false);
mtstate->mt_merge_deleted += 1; mtstate->mt_merge_deleted += 1;
} }
break; break;
@ -4006,12 +3953,18 @@ ExecModifyTable(PlanState *pstate)
/* Now apply the update. */ /* Now apply the update. */
slot = ExecUpdate(&context, resultRelInfo, tupleid, oldtuple, slot = ExecUpdate(&context, resultRelInfo, tupleid, oldtuple,
slot, node->canSetTag); slot, resultRelInfo->ri_oldTupleSlot,
node->canSetTag, false);
break; break;
case CMD_DELETE: case CMD_DELETE:
/* Initialize slot for DELETE to fetch the old tuple */
if (unlikely(!resultRelInfo->ri_projectNewInfoValid))
ExecInitDeleteTupleSlot(node, resultRelInfo);
slot = ExecDelete(&context, resultRelInfo, tupleid, oldtuple, slot = ExecDelete(&context, resultRelInfo, tupleid, oldtuple,
true, false, node->canSetTag, NULL, NULL, NULL); resultRelInfo->ri_oldTupleSlot, true, false,
node->canSetTag, NULL, NULL, NULL);
break; break;
case CMD_MERGE: case CMD_MERGE:

View File

@ -284,19 +284,22 @@ extern void heap_multi_insert(Relation relation, struct TupleTableSlot **slots,
int ntuples, CommandId cid, int options, int ntuples, CommandId cid, int options,
BulkInsertState bistate); BulkInsertState bistate);
extern TM_Result heap_delete(Relation relation, ItemPointer tid, extern TM_Result heap_delete(Relation relation, ItemPointer tid,
CommandId cid, Snapshot crosscheck, bool wait, CommandId cid, Snapshot crosscheck, int options,
struct TM_FailureData *tmfd, bool changingPart); struct TM_FailureData *tmfd, bool changingPart,
TupleTableSlot *oldSlot);
extern void heap_finish_speculative(Relation relation, ItemPointer tid); extern void heap_finish_speculative(Relation relation, ItemPointer tid);
extern void heap_abort_speculative(Relation relation, ItemPointer tid); extern void heap_abort_speculative(Relation relation, ItemPointer tid);
extern TM_Result heap_update(Relation relation, ItemPointer otid, extern TM_Result heap_update(Relation relation, ItemPointer otid,
HeapTuple newtup, HeapTuple newtup,
CommandId cid, Snapshot crosscheck, bool wait, CommandId cid, Snapshot crosscheck, int options,
struct TM_FailureData *tmfd, LockTupleMode *lockmode, struct TM_FailureData *tmfd, LockTupleMode *lockmode,
TU_UpdateIndexes *update_indexes); TU_UpdateIndexes *update_indexes,
extern TM_Result heap_lock_tuple(Relation relation, HeapTuple tuple, TupleTableSlot *oldSlot);
CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, extern TM_Result heap_lock_tuple(Relation relation, ItemPointer tid,
bool follow_updates, TupleTableSlot *slot,
Buffer *buffer, struct TM_FailureData *tmfd); CommandId cid, LockTupleMode mode,
LockWaitPolicy wait_policy, bool follow_updates,
struct TM_FailureData *tmfd);
extern void heap_inplace_update(Relation relation, HeapTuple tuple); extern void heap_inplace_update(Relation relation, HeapTuple tuple);
extern bool heap_prepare_freeze_tuple(HeapTupleHeader tuple, extern bool heap_prepare_freeze_tuple(HeapTupleHeader tuple,

View File

@ -259,6 +259,15 @@ typedef struct TM_IndexDeleteOp
/* Follow update chain and lock latest version of tuple */ /* Follow update chain and lock latest version of tuple */
#define TUPLE_LOCK_FLAG_FIND_LAST_VERSION (1 << 1) #define TUPLE_LOCK_FLAG_FIND_LAST_VERSION (1 << 1)
/*
* "options" flag bits for table_tuple_update and table_tuple_delete,
* Wait for any conflicting update to commit/abort */
#define TABLE_MODIFY_WAIT 0x0001
/* Fetch the existing tuple into a dedicated slot */
#define TABLE_MODIFY_FETCH_OLD_TUPLE 0x0002
/* On concurrent update, follow the update chain and lock latest version of tuple */
#define TABLE_MODIFY_LOCK_UPDATED 0x0004
/* Typedef for callback function for table_index_build_scan */ /* Typedef for callback function for table_index_build_scan */
typedef void (*IndexBuildCallback) (Relation index, typedef void (*IndexBuildCallback) (Relation index,
@ -528,9 +537,10 @@ typedef struct TableAmRoutine
CommandId cid, CommandId cid,
Snapshot snapshot, Snapshot snapshot,
Snapshot crosscheck, Snapshot crosscheck,
bool wait, int options,
TM_FailureData *tmfd, TM_FailureData *tmfd,
bool changingPart); bool changingPart,
TupleTableSlot *oldSlot);
/* see table_tuple_update() for reference about parameters */ /* see table_tuple_update() for reference about parameters */
TM_Result (*tuple_update) (Relation rel, TM_Result (*tuple_update) (Relation rel,
@ -539,10 +549,11 @@ typedef struct TableAmRoutine
CommandId cid, CommandId cid,
Snapshot snapshot, Snapshot snapshot,
Snapshot crosscheck, Snapshot crosscheck,
bool wait, int options,
TM_FailureData *tmfd, TM_FailureData *tmfd,
LockTupleMode *lockmode, LockTupleMode *lockmode,
TU_UpdateIndexes *update_indexes); TU_UpdateIndexes *update_indexes,
TupleTableSlot *oldSlot);
/* see table_tuple_lock() for reference about parameters */ /* see table_tuple_lock() for reference about parameters */
TM_Result (*tuple_lock) (Relation rel, TM_Result (*tuple_lock) (Relation rel,
@ -1463,7 +1474,7 @@ table_multi_insert(Relation rel, TupleTableSlot **slots, int nslots,
} }
/* /*
* Delete a tuple. * Delete a tuple (and optionally lock the last tuple version).
* *
* NB: do not call this directly unless prepared to deal with * NB: do not call this directly unless prepared to deal with
* concurrent-update conditions. Use simple_table_tuple_delete instead. * concurrent-update conditions. Use simple_table_tuple_delete instead.
@ -1474,11 +1485,21 @@ table_multi_insert(Relation rel, TupleTableSlot **slots, int nslots,
* cid - delete command ID (used for visibility test, and stored into * cid - delete command ID (used for visibility test, and stored into
* cmax if successful) * cmax if successful)
* crosscheck - if not InvalidSnapshot, also check tuple against this * crosscheck - if not InvalidSnapshot, also check tuple against this
* wait - true if should wait for any conflicting update to commit/abort * options:
* If TABLE_MODIFY_WAIT, wait for any conflicting update to commit/abort.
* If TABLE_MODIFY_FETCH_OLD_TUPLE option is given, the existing tuple is
* fetched into oldSlot when the update is successful.
* If TABLE_MODIFY_LOCK_UPDATED option is given and the tuple is
* concurrently updated, then the last tuple version is locked and fetched
* into oldSlot.
*
* Output parameters: * Output parameters:
* tmfd - filled in failure cases (see below) * tmfd - filled in failure cases (see below)
* changingPart - true iff the tuple is being moved to another partition * changingPart - true iff the tuple is being moved to another partition
* table due to an update of the partition key. Otherwise, false. * table due to an update of the partition key. Otherwise, false.
* oldSlot - slot to save the deleted or locked tuple. Can be NULL if none of
* TABLE_MODIFY_FETCH_OLD_TUPLE or TABLE_MODIFY_LOCK_UPDATED options
* is specified.
* *
* Normal, successful return value is TM_Ok, which means we did actually * Normal, successful return value is TM_Ok, which means we did actually
* delete it. Failure return codes are TM_SelfModified, TM_Updated, and * delete it. Failure return codes are TM_SelfModified, TM_Updated, and
@ -1490,16 +1511,18 @@ table_multi_insert(Relation rel, TupleTableSlot **slots, int nslots,
*/ */
static inline TM_Result static inline TM_Result
table_tuple_delete(Relation rel, ItemPointer tid, CommandId cid, table_tuple_delete(Relation rel, ItemPointer tid, CommandId cid,
Snapshot snapshot, Snapshot crosscheck, bool wait, Snapshot snapshot, Snapshot crosscheck, int options,
TM_FailureData *tmfd, bool changingPart) TM_FailureData *tmfd, bool changingPart,
TupleTableSlot *oldSlot)
{ {
return rel->rd_tableam->tuple_delete(rel, tid, cid, return rel->rd_tableam->tuple_delete(rel, tid, cid,
snapshot, crosscheck, snapshot, crosscheck,
wait, tmfd, changingPart); options, tmfd, changingPart,
oldSlot);
} }
/* /*
* Update a tuple. * Update a tuple (and optionally lock the last tuple version).
* *
* NB: do not call this directly unless you are prepared to deal with * NB: do not call this directly unless you are prepared to deal with
* concurrent-update conditions. Use simple_table_tuple_update instead. * concurrent-update conditions. Use simple_table_tuple_update instead.
@ -1511,13 +1534,23 @@ table_tuple_delete(Relation rel, ItemPointer tid, CommandId cid,
* cid - update command ID (used for visibility test, and stored into * cid - update command ID (used for visibility test, and stored into
* cmax/cmin if successful) * cmax/cmin if successful)
* crosscheck - if not InvalidSnapshot, also check old tuple against this * crosscheck - if not InvalidSnapshot, also check old tuple against this
* wait - true if should wait for any conflicting update to commit/abort * options:
* If TABLE_MODIFY_WAIT, wait for any conflicting update to commit/abort.
* If TABLE_MODIFY_FETCH_OLD_TUPLE option is given, the existing tuple is
* fetched into oldSlot when the update is successful.
* If TABLE_MODIFY_LOCK_UPDATED option is given and the tuple is
* concurrently updated, then the last tuple version is locked and fetched
* into oldSlot.
*
* Output parameters: * Output parameters:
* tmfd - filled in failure cases (see below) * tmfd - filled in failure cases (see below)
* lockmode - filled with lock mode acquired on tuple * lockmode - filled with lock mode acquired on tuple
* update_indexes - in success cases this is set to true if new index entries * update_indexes - in success cases this is set to true if new index entries
* are required for this tuple * are required for this tuple
* * oldSlot - slot to save the deleted or locked tuple. Can be NULL if none of
* TABLE_MODIFY_FETCH_OLD_TUPLE or TABLE_MODIFY_LOCK_UPDATED options
* is specified.
* Normal, successful return value is TM_Ok, which means we did actually * Normal, successful return value is TM_Ok, which means we did actually
* update it. Failure return codes are TM_SelfModified, TM_Updated, and * update it. Failure return codes are TM_SelfModified, TM_Updated, and
* TM_BeingModified (the last only possible if wait == false). * TM_BeingModified (the last only possible if wait == false).
@ -1535,13 +1568,15 @@ table_tuple_delete(Relation rel, ItemPointer tid, CommandId cid,
static inline TM_Result static inline TM_Result
table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot, table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot,
CommandId cid, Snapshot snapshot, Snapshot crosscheck, CommandId cid, Snapshot snapshot, Snapshot crosscheck,
bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode, int options, TM_FailureData *tmfd, LockTupleMode *lockmode,
TU_UpdateIndexes *update_indexes) TU_UpdateIndexes *update_indexes,
TupleTableSlot *oldSlot)
{ {
return rel->rd_tableam->tuple_update(rel, otid, slot, return rel->rd_tableam->tuple_update(rel, otid, slot,
cid, snapshot, crosscheck, cid, snapshot, crosscheck,
wait, tmfd, options, tmfd,
lockmode, update_indexes); lockmode, update_indexes,
oldSlot);
} }
/* /*
@ -2083,10 +2118,12 @@ table_scan_sample_next_tuple(TableScanDesc scan,
extern void simple_table_tuple_insert(Relation rel, TupleTableSlot *slot); extern void simple_table_tuple_insert(Relation rel, TupleTableSlot *slot);
extern void simple_table_tuple_delete(Relation rel, ItemPointer tid, extern void simple_table_tuple_delete(Relation rel, ItemPointer tid,
Snapshot snapshot); Snapshot snapshot,
TupleTableSlot *oldSlot);
extern void simple_table_tuple_update(Relation rel, ItemPointer otid, extern void simple_table_tuple_update(Relation rel, ItemPointer otid,
TupleTableSlot *slot, Snapshot snapshot, TupleTableSlot *slot, Snapshot snapshot,
TU_UpdateIndexes *update_indexes); TU_UpdateIndexes *update_indexes,
TupleTableSlot *oldSlot);
/* ---------------------------------------------------------------------------- /* ----------------------------------------------------------------------------

View File

@ -216,8 +216,8 @@ extern bool ExecBRDeleteTriggers(EState *estate,
TM_FailureData *tmfd); TM_FailureData *tmfd);
extern void ExecARDeleteTriggers(EState *estate, extern void ExecARDeleteTriggers(EState *estate,
ResultRelInfo *relinfo, ResultRelInfo *relinfo,
ItemPointer tupleid,
HeapTuple fdw_trigtuple, HeapTuple fdw_trigtuple,
TupleTableSlot *slot,
TransitionCaptureState *transition_capture, TransitionCaptureState *transition_capture,
bool is_crosspart_update); bool is_crosspart_update);
extern bool ExecIRDeleteTriggers(EState *estate, extern bool ExecIRDeleteTriggers(EState *estate,
@ -240,8 +240,8 @@ extern void ExecARUpdateTriggers(EState *estate,
ResultRelInfo *relinfo, ResultRelInfo *relinfo,
ResultRelInfo *src_partinfo, ResultRelInfo *src_partinfo,
ResultRelInfo *dst_partinfo, ResultRelInfo *dst_partinfo,
ItemPointer tupleid,
HeapTuple fdw_trigtuple, HeapTuple fdw_trigtuple,
TupleTableSlot *oldslot,
TupleTableSlot *newslot, TupleTableSlot *newslot,
List *recheckIndexes, List *recheckIndexes,
TransitionCaptureState *transition_capture, TransitionCaptureState *transition_capture,