From 459c64d3227f878c72ef0145a67e80e17728c556 Mon Sep 17 00:00:00 2001 From: Alvaro Herrera Date: Wed, 26 Jul 2017 17:24:16 -0400 Subject: [PATCH] Fix concurrent locking of tuple update chain If several sessions are concurrently locking a tuple update chain with nonconflicting lock modes using an old snapshot, and they all succeed, it may happen that some of them fail because of restarting the loop (due to a concurrent Xmax change) and getting an error in the subsequent pass while trying to obtain a tuple lock that they already have in some tuple version. This can only happen with very high concurrency (where a row is being both updated and FK-checked by multiple transactions concurrently), but it's been observed in the field and can have unpleasant consequences such as an FK check failing to see a tuple that definitely exists: ERROR: insert or update on table "child_table" violates foreign key constraint "fk_constraint_name" DETAIL: Key (keyid)=(123456) is not present in table "parent_table". (where the key is observably present in the table). Discussion: https://postgr.es/m/20170714210011.r25mrff4nxjhmf3g@alvherre.pgsql --- src/backend/access/heap/heapam.c | 41 ++++++++++++++++++++++++++++---- 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 011f2b9c54..479c3843f0 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -5524,8 +5524,10 @@ l5: * with the given xid, does the current transaction need to wait, fail, or can * it continue if it wanted to acquire a lock of the given mode? "needwait" * is set to true if waiting is necessary; if it can continue, then - * HeapTupleMayBeUpdated is returned. In case of a conflict, a different - * HeapTupleSatisfiesUpdate return code is returned. + * HeapTupleMayBeUpdated is returned. If the lock is already held by the + * current transaction, return HeapTupleSelfUpdated. In case of a conflict + * with another transaction, a different HeapTupleSatisfiesUpdate return code + * is returned. * * The held status is said to be hypothetical because it might correspond to a * lock held by a single Xid, i.e. not a real MultiXactId; we express it this @@ -5548,8 +5550,9 @@ test_lockmode_for_conflict(MultiXactStatus status, TransactionId xid, if (TransactionIdIsCurrentTransactionId(xid)) { /* - * Updated by our own transaction? Just return failure. This - * shouldn't normally happen. + * The tuple has already been locked by our own transaction. This is + * very rare but can happen if multiple transactions are trying to + * lock an ancient version of the same tuple. */ return HeapTupleSelfUpdated; } @@ -5748,6 +5751,22 @@ l4: members[i].xid, mode, &needwait); + /* + * If the tuple was already locked by ourselves in a + * previous iteration of this (say heap_lock_tuple was + * forced to restart the locking loop because of a change + * in xmax), then we hold the lock already on this tuple + * version and we don't need to do anything; and this is + * not an error condition either. We just need to skip + * this tuple and continue locking the next version in the + * update chain. + */ + if (result == HeapTupleSelfUpdated) + { + pfree(members); + goto next; + } + if (needwait) { LockBuffer(buf, BUFFER_LOCK_UNLOCK); @@ -5808,6 +5827,19 @@ l4: result = test_lockmode_for_conflict(status, rawxmax, mode, &needwait); + + /* + * If the tuple was already locked by ourselves in a previous + * iteration of this (say heap_lock_tuple was forced to + * restart the locking loop because of a change in xmax), then + * we hold the lock already on this tuple version and we don't + * need to do anything; and this is not an error condition + * either. We just need to skip this tuple and continue + * locking the next version in the update chain. + */ + if (result == HeapTupleSelfUpdated) + goto next; + if (needwait) { LockBuffer(buf, BUFFER_LOCK_UNLOCK); @@ -5868,6 +5900,7 @@ l4: END_CRIT_SECTION(); +next: /* if we find the end of update chain, we're done. */ if (mytup.t_data->t_infomask & HEAP_XMAX_INVALID || ItemPointerEquals(&mytup.t_self, &mytup.t_data->t_ctid) ||