diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index bcf987124f..0424a96058 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -2163,18 +2163,6 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, */ heaptup = heap_prepare_insert(relation, tup, xid, cid, options); - /* - * We're about to do the actual insert -- but check for conflict first, to - * avoid possibly having to roll back work we've just done. - * - * For a heap insert, we only need to check for table-level SSI locks. Our - * new tuple can't possibly conflict with existing tuple locks, and heap - * page locks are only consolidated versions of tuple locks; they do not - * lock "gaps" as index page locks do. So we don't need to identify a - * buffer before making the call. - */ - CheckForSerializableConflictIn(relation, NULL, InvalidBuffer); - /* * Find buffer to insert this tuple into. If the page is all visible, * this will also pin the requisite visibility map page. @@ -2183,6 +2171,23 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, InvalidBuffer, options, bistate, &vmbuffer, NULL); + /* + * We're about to do the actual insert -- but check for conflict first, to + * avoid possibly having to roll back work we've just done. + * + * This is safe without a recheck as long as there is no possibility of + * another process scanning the page between this check and the insert + * being visible to the scan (i.e., an exclusive buffer content lock is + * continuously held from this point until the tuple insert is visible). + * + * For a heap insert, we only need to check for table-level SSI locks. Our + * new tuple can't possibly conflict with existing tuple locks, and heap + * page locks are only consolidated versions of tuple locks; they do not + * lock "gaps" as index page locks do. So we don't need to specify a + * buffer when making the call, which makes for a faster check. + */ + CheckForSerializableConflictIn(relation, NULL, InvalidBuffer); + /* NO EREPORT(ERROR) from here till changes are logged */ START_CRIT_SECTION(); @@ -2436,13 +2441,26 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples, /* * We're about to do the actual inserts -- but check for conflict first, - * to avoid possibly having to roll back work we've just done. + * to minimize the possibility of having to roll back work we've just + * done. * - * For a heap insert, we only need to check for table-level SSI locks. Our - * new tuple can't possibly conflict with existing tuple locks, and heap + * A check here does not definitively prevent a serialization anomaly; + * that check MUST be done at least past the point of acquiring an + * exclusive buffer content lock on every buffer that will be affected, + * and MAY be done after all inserts are reflected in the buffers and + * those locks are released; otherwise there race condition. Since + * multiple buffers can be locked and unlocked in the loop below, and it + * would not be feasible to identify and lock all of those buffers before + * the loop, we must do a final check at the end. + * + * The check here could be omitted with no loss of correctness; it is + * present strictly as an optimization. + * + * For heap inserts, we only need to check for table-level SSI locks. Our + * new tuples can't possibly conflict with existing tuple locks, and heap * page locks are only consolidated versions of tuple locks; they do not - * lock "gaps" as index page locks do. So we don't need to identify a - * buffer before making the call. + * lock "gaps" as index page locks do. So we don't need to specify a + * buffer when making the call, which makes for a faster check. */ CheckForSerializableConflictIn(relation, NULL, InvalidBuffer); @@ -2621,6 +2639,22 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples, ndone += nthispage; } + /* + * We're done with the actual inserts. Check for conflicts again, to + * ensure that all rw-conflicts in to these inserts are detected. Without + * this final check, a sequential scan of the heap may have locked the + * table after the "before" check, missing one opportunity to detect the + * conflict, and then scanned the table before the new tuples were there, + * missing the other chance to detect the conflict. + * + * For heap inserts, we only need to check for table-level SSI locks. Our + * new tuples can't possibly conflict with existing tuple locks, and heap + * page locks are only consolidated versions of tuple locks; they do not + * lock "gaps" as index page locks do. So we don't need to specify a + * buffer when making the call. + */ + CheckForSerializableConflictIn(relation, NULL, InvalidBuffer); + /* * If tuples are cachable, mark them for invalidation from the caches in * case we abort. Note it is OK to do this after releasing the buffer, @@ -2934,6 +2968,11 @@ l1: /* * We're about to do the actual delete -- check for conflict first, to * avoid possibly having to roll back work we've just done. + * + * This is safe without a recheck as long as there is no possibility of + * another process scanning the page between this check and the delete + * being visible to the scan (i.e., an exclusive buffer content lock is + * continuously held from this point until the tuple delete is visible). */ CheckForSerializableConflictIn(relation, &tp, buffer); @@ -3561,12 +3600,6 @@ l2: goto l2; } - /* - * We're about to do the actual update -- check for conflict first, to - * avoid possibly having to roll back work we've just done. - */ - CheckForSerializableConflictIn(relation, &oldtup, buffer); - /* Fill in transaction status data */ /* @@ -3755,14 +3788,20 @@ l2: } /* - * We're about to create the new tuple -- check for conflict first, to + * We're about to do the actual update -- check for conflict first, to * avoid possibly having to roll back work we've just done. * - * NOTE: For a tuple insert, we only need to check for table locks, since - * predicate locking at the index level will cover ranges for anything - * except a table scan. Therefore, only provide the relation. + * This is safe without a recheck as long as there is no possibility of + * another process scanning the pages between this check and the update + * being visible to the scan (i.e., exclusive buffer content lock(s) are + * continuously held from this point until the tuple update is visible). + * + * For the new tuple the only check needed is at the relation level, but + * since both tuples are in the same relation and the check for oldtup + * will include checking the relation level, there is no benefit to a + * separate check for the new tuple. */ - CheckForSerializableConflictIn(relation, NULL, InvalidBuffer); + CheckForSerializableConflictIn(relation, &oldtup, buffer); /* * At this point newbuf and buffer are both pinned and locked, and newbuf diff --git a/src/backend/storage/lmgr/predicate.c b/src/backend/storage/lmgr/predicate.c index bad5618341..47b4adaf0e 100644 --- a/src/backend/storage/lmgr/predicate.c +++ b/src/backend/storage/lmgr/predicate.c @@ -3217,22 +3217,21 @@ ReleasePredicateLocks(bool isCommit) return; } + LWLockAcquire(SerializableXactHashLock, LW_EXCLUSIVE); + Assert(!isCommit || SxactIsPrepared(MySerializableXact)); Assert(!isCommit || !SxactIsDoomed(MySerializableXact)); Assert(!SxactIsCommitted(MySerializableXact)); Assert(!SxactIsRolledBack(MySerializableXact)); /* may not be serializable during COMMIT/ROLLBACK PREPARED */ - if (MySerializableXact->pid != 0) - Assert(IsolationIsSerializable()); + Assert(MySerializableXact->pid == 0 || IsolationIsSerializable()); /* We'd better not already be on the cleanup list. */ Assert(!SxactIsOnFinishedList(MySerializableXact)); topLevelIsDeclaredReadOnly = SxactIsReadOnly(MySerializableXact); - LWLockAcquire(SerializableXactHashLock, LW_EXCLUSIVE); - /* * We don't hold XidGenLock lock here, assuming that TransactionId is * atomic! @@ -4369,7 +4368,7 @@ CheckTableForSerializableConflictIn(Relation relation) LWLockAcquire(SerializablePredicateLockListLock, LW_EXCLUSIVE); for (i = 0; i < NUM_PREDICATELOCK_PARTITIONS; i++) LWLockAcquire(PredicateLockHashPartitionLockByIndex(i), LW_SHARED); - LWLockAcquire(SerializableXactHashLock, LW_SHARED); + LWLockAcquire(SerializableXactHashLock, LW_EXCLUSIVE); /* Scan through target list */ hash_seq_init(&seqstat, PredicateLockTargetHash);