Raise error when affecting tuple moved into different partition.

When an update moves a row between partitions (supported since
2f17844104), our normal logic for following update chains in READ
COMMITTED mode doesn't work anymore. Cross partition updates are
modeled as an delete from the old and insert into the new
partition. No ctid chain exists across partitions, and there's no
convenient space to introduce that link.

Not throwing an error in a partitioned context when one would have
been thrown without partitioning is obviously problematic. This commit
introduces infrastructure to detect when a tuple has been moved, not
just plainly deleted. That allows to throw an error when encountering
a deletion that's actually a move, while attempting to following a
ctid chain.

The row deleted as part of a cross partition update is marked by
pointing it's t_ctid to an invalid block, instead of self as a normal
update would.  That was deemed to be the least invasive and most
future proof way to represent the knowledge, given how few infomask
bits are there to be recycled (there's also some locking issues with
using infomask bits).

External code following ctid chains should be updated to check for
moved tuples. The most likely consequence of not doing so is a missed
error.

Author: Amul Sul, editorialized by me
Reviewed-By: Amit Kapila, Pavan Deolasee, Andres Freund, Robert Haas
Discussion: http://postgr.es/m/CAAJ_b95PkwojoYfz0bzXU8OokcTVGzN6vYGCNVUukeUDrnF3dw@mail.gmail.com
This commit is contained in:
Andres Freund 2018-04-07 13:24:10 -07:00
parent 8224de4f42
commit f16241bef7
23 changed files with 624 additions and 22 deletions

View File

@ -2308,6 +2308,7 @@ heap_get_latest_tid(Relation relation,
*/
if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) ||
HeapTupleHeaderIsOnlyLocked(tp.t_data) ||
HeapTupleHeaderIndicatesMovedPartitions(tp.t_data) ||
ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid))
{
UnlockReleaseBuffer(buffer);
@ -3041,6 +3042,8 @@ xmax_infomask_changed(uint16 new_infomask, uint16 old_infomask)
* crosscheck - if not InvalidSnapshot, also check tuple against this
* wait - true if should wait for any conflicting update to commit/abort
* hufd - output parameter, filled in failure cases (see below)
* changingPart - true iff the tuple is being moved to another partition
* table due to an update of the partition key. Otherwise, false.
*
* Normal, successful return value is HeapTupleMayBeUpdated, which
* actually means we did delete it. Failure return codes are
@ -3056,7 +3059,7 @@ xmax_infomask_changed(uint16 new_infomask, uint16 old_infomask)
HTSU_Result
heap_delete(Relation relation, ItemPointer tid,
CommandId cid, Snapshot crosscheck, bool wait,
HeapUpdateFailureData *hufd)
HeapUpdateFailureData *hufd, bool changingPart)
{
HTSU_Result result;
TransactionId xid = GetCurrentTransactionId();
@ -3325,6 +3328,10 @@ l1:
/* Make sure there is no forward chain link in t_ctid */
tp.t_data->t_ctid = tp.t_self;
/* Signal that this is actually a move into another partition */
if (changingPart)
HeapTupleHeaderSetMovedPartitions(tp.t_data);
MarkBufferDirty(buffer);
/*
@ -3342,7 +3349,11 @@ l1:
if (RelationIsAccessibleInLogicalDecoding(relation))
log_heap_new_cid(relation, &tp);
xlrec.flags = all_visible_cleared ? XLH_DELETE_ALL_VISIBLE_CLEARED : 0;
xlrec.flags = 0;
if (all_visible_cleared)
xlrec.flags |= XLH_DELETE_ALL_VISIBLE_CLEARED;
if (changingPart)
xlrec.flags |= XLH_DELETE_IS_PARTITION_MOVE;
xlrec.infobits_set = compute_infobits(tp.t_data->t_infomask,
tp.t_data->t_infomask2);
xlrec.offnum = ItemPointerGetOffsetNumber(&tp.t_self);
@ -3450,7 +3461,7 @@ simple_heap_delete(Relation relation, ItemPointer tid)
result = heap_delete(relation, tid,
GetCurrentCommandId(true), InvalidSnapshot,
true /* wait for commit */ ,
&hufd);
&hufd, false /* changingPart */);
switch (result)
{
case HeapTupleSelfUpdated:
@ -6051,6 +6062,7 @@ l4:
next:
/* if we find the end of update chain, we're done. */
if (mytup.t_data->t_infomask & HEAP_XMAX_INVALID ||
HeapTupleHeaderIndicatesMovedPartitions(mytup.t_data) ||
ItemPointerEquals(&mytup.t_self, &mytup.t_data->t_ctid) ||
HeapTupleHeaderIsOnlyLocked(mytup.t_data))
{
@ -6102,7 +6114,12 @@ static HTSU_Result
heap_lock_updated_tuple(Relation rel, HeapTuple tuple, ItemPointer ctid,
TransactionId xid, LockTupleMode mode)
{
if (!ItemPointerEquals(&tuple->t_self, ctid))
/*
* If the tuple has not been updated, or has moved into another partition
* (effectively a delete) stop here.
*/
if (!HeapTupleHeaderIndicatesMovedPartitions(tuple->t_data) &&
!ItemPointerEquals(&tuple->t_self, ctid))
{
/*
* If this is the first possibly-multixact-able operation in the
@ -8493,8 +8510,11 @@ heap_xlog_delete(XLogReaderState *record)
if (xlrec->flags & XLH_DELETE_ALL_VISIBLE_CLEARED)
PageClearAllVisible(page);
/* Make sure there is no forward chain link in t_ctid */
htup->t_ctid = target_tid;
/* Make sure t_ctid is set correctly */
if (xlrec->flags & XLH_DELETE_IS_PARTITION_MOVE)
HeapTupleHeaderSetMovedPartitions(htup);
else
htup->t_ctid = target_tid;
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
}
@ -9422,6 +9442,13 @@ heap_mask(char *pagedata, BlockNumber blkno)
*/
if (HeapTupleHeaderIsSpeculative(page_htup))
ItemPointerSet(&page_htup->t_ctid, blkno, off);
/*
* NB: Not ignoring ctid changes due to the tuple having moved
* (i.e. HeapTupleHeaderIndicatesMovedPartitions), because that's
* important information that needs to be in-sync between primary
* and standby, and thus is WAL logged.
*/
}
/*

View File

@ -552,6 +552,9 @@ heap_prune_chain(Relation relation, Buffer buffer, OffsetNumber rootoffnum,
if (!HeapTupleHeaderIsHotUpdated(htup))
break;
/* HOT implies it can't have moved to different partition */
Assert(!HeapTupleHeaderIndicatesMovedPartitions(htup));
/*
* Advance to next chain member.
*/
@ -823,6 +826,9 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
if (!HeapTupleHeaderIsHotUpdated(htup))
break;
/* HOT implies it can't have moved to different partition */
Assert(!HeapTupleHeaderIndicatesMovedPartitions(htup));
nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
priorXmax = HeapTupleHeaderGetUpdateXid(htup);
}

View File

@ -424,6 +424,7 @@ rewrite_heap_tuple(RewriteState state,
*/
if (!((old_tuple->t_data->t_infomask & HEAP_XMAX_INVALID) ||
HeapTupleHeaderIsOnlyLocked(old_tuple->t_data)) &&
!HeapTupleHeaderIndicatesMovedPartitions(old_tuple->t_data) &&
!(ItemPointerEquals(&(old_tuple->t_self),
&(old_tuple->t_data->t_ctid))))
{

View File

@ -3315,6 +3315,11 @@ ltrmark:;
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("could not serialize access due to concurrent update")));
if (ItemPointerIndicatesMovedPartitions(&hufd.ctid))
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("tuple to be locked was already moved to another partition due to concurrent update")));
if (!ItemPointerEquals(&hufd.ctid, &tuple.t_self))
{
/* it was updated, so look at the updated version */

View File

@ -2733,6 +2733,10 @@ EvalPlanQualFetch(EState *estate, Relation relation, int lockmode,
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("could not serialize access due to concurrent update")));
if (ItemPointerIndicatesMovedPartitions(&hufd.ctid))
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("tuple to be locked was already moved to another partition due to concurrent update")));
/* Should not encounter speculative tuple on recheck */
Assert(!HeapTupleHeaderIsSpeculative(tuple.t_data));
@ -2801,6 +2805,14 @@ EvalPlanQualFetch(EState *estate, Relation relation, int lockmode,
* As above, it should be safe to examine xmax and t_ctid without the
* buffer content lock, because they can't be changing.
*/
/* check whether next version would be in a different partition */
if (HeapTupleHeaderIndicatesMovedPartitions(tuple.t_data))
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("tuple to be locked was already moved to another partition due to concurrent update")));
/* check whether tuple has been deleted */
if (ItemPointerEquals(&tuple.t_self, &tuple.t_data->t_ctid))
{
/* deleted, so forget about it */

View File

@ -324,7 +324,8 @@ lmerge_matched:;
slot = ExecDelete(mtstate, tupleid, NULL,
slot, epqstate, estate,
&tuple_deleted, false, &hufd, action,
mtstate->canSetTag);
mtstate->canSetTag,
false /* changingPart */);
break;

View File

@ -191,9 +191,14 @@ retry:
break;
case HeapTupleUpdated:
/* XXX: Improve handling here */
ereport(LOG,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("concurrent update, retrying")));
if (ItemPointerIndicatesMovedPartitions(&hufd.ctid))
ereport(LOG,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("tuple to be locked was already moved to another partition due to concurrent update, retrying")));
else
ereport(LOG,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("concurrent update, retrying")));
goto retry;
case HeapTupleInvisible:
elog(ERROR, "attempted to lock invisible tuple");
@ -349,9 +354,14 @@ retry:
break;
case HeapTupleUpdated:
/* XXX: Improve handling here */
ereport(LOG,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("concurrent update, retrying")));
if (ItemPointerIndicatesMovedPartitions(&hufd.ctid))
ereport(LOG,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("tuple to be locked was already moved to another partition due to concurrent update, retrying")));
else
ereport(LOG,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("concurrent update, retrying")));
goto retry;
case HeapTupleInvisible:
elog(ERROR, "attempted to lock invisible tuple");

View File

@ -218,6 +218,11 @@ lnext:
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("could not serialize access due to concurrent update")));
if (ItemPointerIndicatesMovedPartitions(&hufd.ctid))
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("tuple to be locked was already moved to another partition due to concurrent update")));
if (ItemPointerEquals(&hufd.ctid, &tuple.t_self))
{
/* Tuple was deleted, so don't return it */

View File

@ -645,7 +645,8 @@ ExecDelete(ModifyTableState *mtstate,
bool processReturning,
HeapUpdateFailureData *hufdp,
MergeActionState *actionState,
bool canSetTag)
bool canSetTag,
bool changingPart)
{
ResultRelInfo *resultRelInfo;
Relation resultRelationDesc;
@ -744,7 +745,8 @@ ldelete:;
estate->es_output_cid,
estate->es_crosscheck_snapshot,
true /* wait for commit */ ,
&hufd);
&hufd,
changingPart);
/*
* Copy the necessary information, if the caller has asked for it. We
@ -803,6 +805,10 @@ ldelete:;
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("could not serialize access due to concurrent update")));
if (ItemPointerIndicatesMovedPartitions(&hufd.ctid))
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("tuple to be deleted was already moved to another partition due to concurrent update")));
if (!ItemPointerEquals(tupleid, &hufd.ctid))
{
@ -1157,7 +1163,7 @@ lreplace:;
*/
ExecDelete(mtstate, tupleid, oldtuple, planSlot, epqstate,
estate, &tuple_deleted, false, hufdp, NULL,
false);
false /* canSetTag */, true /* changingPart */);
/*
* For some reason if DELETE didn't happen (e.g. trigger prevented
@ -1333,6 +1339,10 @@ lreplace:;
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("could not serialize access due to concurrent update")));
if (ItemPointerIndicatesMovedPartitions(&hufd.ctid))
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("tuple to be updated was already moved to another partition due to concurrent update")));
if (!ItemPointerEquals(tupleid, &hufd.ctid))
{
@ -1522,6 +1532,14 @@ ExecOnConflictUpdate(ModifyTableState *mtstate,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("could not serialize access due to concurrent update")));
/*
* As long as we don't support an UPDATE of INSERT ON CONFLICT for
* a partitioned table we shouldn't reach to a case where tuple to
* be lock is moved to another partition due to concurrent update
* of the partition key.
*/
Assert(!ItemPointerIndicatesMovedPartitions(&hufd.ctid));
/*
* Tell caller to try again from the very start.
*
@ -2274,7 +2292,8 @@ ExecModifyTable(PlanState *pstate)
case CMD_DELETE:
slot = ExecDelete(node, tupleid, oldtuple, planSlot,
&node->mt_epqstate, estate,
NULL, true, NULL, NULL, node->canSetTag);
NULL, true, NULL, NULL, node->canSetTag,
false /* changingPart */);
break;
default:
elog(ERROR, "unknown operation");

View File

@ -167,7 +167,7 @@ extern void heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
CommandId cid, int options, BulkInsertState bistate);
extern HTSU_Result heap_delete(Relation relation, ItemPointer tid,
CommandId cid, Snapshot crosscheck, bool wait,
HeapUpdateFailureData *hufd);
HeapUpdateFailureData *hufd, bool changingPart);
extern void heap_finish_speculative(Relation relation, HeapTuple tuple);
extern void heap_abort_speculative(Relation relation, HeapTuple tuple);
extern HTSU_Result heap_update(Relation relation, ItemPointer otid,

View File

@ -93,6 +93,7 @@
#define XLH_DELETE_CONTAINS_OLD_TUPLE (1<<1)
#define XLH_DELETE_CONTAINS_OLD_KEY (1<<2)
#define XLH_DELETE_IS_SUPER (1<<3)
#define XLH_DELETE_IS_PARTITION_MOVE (1<<4)
/* convenience macro for checking whether any form of old tuple was logged */
#define XLH_DELETE_CONTAINS_OLD \

View File

@ -83,8 +83,10 @@
*
* A word about t_ctid: whenever a new tuple is stored on disk, its t_ctid
* is initialized with its own TID (location). If the tuple is ever updated,
* its t_ctid is changed to point to the replacement version of the tuple.
* Thus, a tuple is the latest version of its row iff XMAX is invalid or
* its t_ctid is changed to point to the replacement version of the tuple or
* the block number (ip_blkid) is invalidated if the tuple is moved from one
* partition to another partition relation due to an update of the partition
* key. Thus, a tuple is the latest version of its row iff XMAX is invalid or
* t_ctid points to itself (in which case, if XMAX is valid, the tuple is
* either locked or deleted). One can follow the chain of t_ctid links
* to find the newest version of the row. Beware however that VACUUM might
@ -445,6 +447,12 @@ do { \
ItemPointerSet(&(tup)->t_ctid, token, SpecTokenOffsetNumber) \
)
#define HeapTupleHeaderSetMovedPartitions(tup) \
ItemPointerSetMovedPartitions(&(tup)->t_ctid)
#define HeapTupleHeaderIndicatesMovedPartitions(tup) \
ItemPointerIndicatesMovedPartitions(&tup->t_ctid)
#define HeapTupleHeaderGetDatumLength(tup) \
VARSIZE(tup)

View File

@ -27,7 +27,8 @@ extern TupleTableSlot *ExecDelete(ModifyTableState *mtstate,
ItemPointer tupleid, HeapTuple oldtuple, TupleTableSlot *planSlot,
EPQState *epqstate, EState *estate, bool *tupleDeleted,
bool processReturning, HeapUpdateFailureData *hufdp,
MergeActionState *actionState, bool canSetTag);
MergeActionState *actionState, bool canSetTag,
bool changingPart);
extern TupleTableSlot *ExecUpdate(ModifyTableState *mtstate,
ItemPointer tupleid, HeapTuple oldtuple, TupleTableSlot *slot,
TupleTableSlot *planSlot, EPQState *epqstate, EState *estate,

View File

@ -154,6 +154,22 @@ typedef ItemPointerData *ItemPointer;
(pointer)->ip_posid = InvalidOffsetNumber \
)
/*
* ItemPointerIndicatesMovedPartitions
* True iff the block number indicates the tuple has moved to another
* partition.
*/
#define ItemPointerIndicatesMovedPartitions(pointer) \
!BlockNumberIsValid(ItemPointerGetBlockNumberNoCheck(pointer))
/*
* ItemPointerSetMovedPartitions
* Indicate that the item referenced by the itempointer has moved into a
* different partition.
*/
#define ItemPointerSetMovedPartitions(pointer) \
ItemPointerSetBlockNumber((pointer), InvalidBlockNumber)
/* ----------------
* externs
* ----------------

View File

@ -204,6 +204,31 @@ step pa_merge2a:
<waiting ...>
step c1: COMMIT;
step pa_merge2a: <... completed>
error in steps c1 pa_merge2a: ERROR: tuple to be deleted was already moved to another partition due to concurrent update
step pa_select2: SELECT * FROM pa_target;
ERROR: current transaction is aborted, commands ignored until end of transaction block
step c2: COMMIT;
starting permutation: pa_merge2 c1 pa_merge2a pa_select2 c2
step pa_merge2:
MERGE INTO pa_target t
USING (SELECT 1 as key, 'pa_merge1' as val) s
ON s.key = t.key
WHEN NOT MATCHED THEN
INSERT VALUES (s.key, s.val)
WHEN MATCHED THEN
UPDATE set key = t.key + 1, val = t.val || ' updated by ' || s.val;
step c1: COMMIT;
step pa_merge2a:
MERGE INTO pa_target t
USING (SELECT 1 as key, 'pa_merge2a' as val) s
ON s.key = t.key
WHEN NOT MATCHED THEN
INSERT VALUES (s.key, s.val)
WHEN MATCHED THEN
UPDATE set key = t.key + 1, val = t.val || ' updated by ' || s.val;
step pa_select2: SELECT * FROM pa_target;
key val

View File

@ -0,0 +1,119 @@
Parsed test spec with 2 sessions
starting permutation: s1b s2b s1u s1c s2d s2c
step s1b: BEGIN ISOLATION LEVEL READ COMMITTED;
step s2b: BEGIN ISOLATION LEVEL READ COMMITTED;
step s1u: UPDATE foo SET a=2 WHERE a=1;
step s1c: COMMIT;
step s2d: DELETE FROM foo WHERE a=1;
step s2c: COMMIT;
starting permutation: s1b s2b s1u s2d s1c s2c
step s1b: BEGIN ISOLATION LEVEL READ COMMITTED;
step s2b: BEGIN ISOLATION LEVEL READ COMMITTED;
step s1u: UPDATE foo SET a=2 WHERE a=1;
step s2d: DELETE FROM foo WHERE a=1; <waiting ...>
step s1c: COMMIT;
step s2d: <... completed>
error in steps s1c s2d: ERROR: tuple to be deleted was already moved to another partition due to concurrent update
step s2c: COMMIT;
starting permutation: s1b s2b s2d s1u s2c s1c
step s1b: BEGIN ISOLATION LEVEL READ COMMITTED;
step s2b: BEGIN ISOLATION LEVEL READ COMMITTED;
step s2d: DELETE FROM foo WHERE a=1;
step s1u: UPDATE foo SET a=2 WHERE a=1; <waiting ...>
step s2c: COMMIT;
step s1u: <... completed>
step s1c: COMMIT;
starting permutation: s1b s2b s1u2 s1c s2u2 s2c
step s1b: BEGIN ISOLATION LEVEL READ COMMITTED;
step s2b: BEGIN ISOLATION LEVEL READ COMMITTED;
step s1u2: UPDATE footrg SET b='EFG' WHERE a=1;
step s1c: COMMIT;
step s2u2: UPDATE footrg SET b='XYZ' WHERE a=1;
step s2c: COMMIT;
starting permutation: s1b s2b s1u2 s2u2 s1c s2c
step s1b: BEGIN ISOLATION LEVEL READ COMMITTED;
step s2b: BEGIN ISOLATION LEVEL READ COMMITTED;
step s1u2: UPDATE footrg SET b='EFG' WHERE a=1;
step s2u2: UPDATE footrg SET b='XYZ' WHERE a=1; <waiting ...>
step s1c: COMMIT;
step s2u2: <... completed>
error in steps s1c s2u2: ERROR: tuple to be locked was already moved to another partition due to concurrent update
step s2c: COMMIT;
starting permutation: s1b s2b s2u2 s1u2 s2c s1c
step s1b: BEGIN ISOLATION LEVEL READ COMMITTED;
step s2b: BEGIN ISOLATION LEVEL READ COMMITTED;
step s2u2: UPDATE footrg SET b='XYZ' WHERE a=1;
step s1u2: UPDATE footrg SET b='EFG' WHERE a=1; <waiting ...>
step s2c: COMMIT;
step s1u2: <... completed>
error in steps s2c s1u2: ERROR: tuple to be locked was already moved to another partition due to concurrent update
step s1c: COMMIT;
starting permutation: s1b s2b s1u3pc s2i s1c s2c
step s1b: BEGIN ISOLATION LEVEL READ COMMITTED;
step s2b: BEGIN ISOLATION LEVEL READ COMMITTED;
step s1u3pc: UPDATE foo_range_parted SET a=11 WHERE a=7;
step s2i: INSERT INTO bar VALUES(7); <waiting ...>
step s1c: COMMIT;
step s2i: <... completed>
error in steps s1c s2i: ERROR: tuple to be locked was already moved to another partition due to concurrent update
step s2c: COMMIT;
starting permutation: s1b s2b s1u3pc s2i s1r s2c
step s1b: BEGIN ISOLATION LEVEL READ COMMITTED;
step s2b: BEGIN ISOLATION LEVEL READ COMMITTED;
step s1u3pc: UPDATE foo_range_parted SET a=11 WHERE a=7;
step s2i: INSERT INTO bar VALUES(7); <waiting ...>
step s1r: ROLLBACK;
step s2i: <... completed>
step s2c: COMMIT;
starting permutation: s1b s2b s1u3npc s1u3pc s2i s1c s2c
step s1b: BEGIN ISOLATION LEVEL READ COMMITTED;
step s2b: BEGIN ISOLATION LEVEL READ COMMITTED;
step s1u3npc: UPDATE foo_range_parted SET b='XYZ' WHERE a=7;
step s1u3pc: UPDATE foo_range_parted SET a=11 WHERE a=7;
step s2i: INSERT INTO bar VALUES(7); <waiting ...>
step s1c: COMMIT;
step s2i: <... completed>
error in steps s1c s2i: ERROR: tuple to be locked was already moved to another partition due to concurrent update
step s2c: COMMIT;
starting permutation: s1b s2b s1u3npc s1u3pc s2i s1r s2c
step s1b: BEGIN ISOLATION LEVEL READ COMMITTED;
step s2b: BEGIN ISOLATION LEVEL READ COMMITTED;
step s1u3npc: UPDATE foo_range_parted SET b='XYZ' WHERE a=7;
step s1u3pc: UPDATE foo_range_parted SET a=11 WHERE a=7;
step s2i: INSERT INTO bar VALUES(7); <waiting ...>
step s1r: ROLLBACK;
step s2i: <... completed>
step s2c: COMMIT;
starting permutation: s1b s2b s1u3npc s1u3pc s1u3pc s2i s1c s2c
step s1b: BEGIN ISOLATION LEVEL READ COMMITTED;
step s2b: BEGIN ISOLATION LEVEL READ COMMITTED;
step s1u3npc: UPDATE foo_range_parted SET b='XYZ' WHERE a=7;
step s1u3pc: UPDATE foo_range_parted SET a=11 WHERE a=7;
step s1u3pc: UPDATE foo_range_parted SET a=11 WHERE a=7;
step s2i: INSERT INTO bar VALUES(7); <waiting ...>
step s1c: COMMIT;
step s2i: <... completed>
error in steps s1c s2i: ERROR: tuple to be locked was already moved to another partition due to concurrent update
step s2c: COMMIT;
starting permutation: s1b s2b s1u3npc s1u3pc s1u3pc s2i s1r s2c
step s1b: BEGIN ISOLATION LEVEL READ COMMITTED;
step s2b: BEGIN ISOLATION LEVEL READ COMMITTED;
step s1u3npc: UPDATE foo_range_parted SET b='XYZ' WHERE a=7;
step s1u3pc: UPDATE foo_range_parted SET a=11 WHERE a=7;
step s1u3pc: UPDATE foo_range_parted SET a=11 WHERE a=7;
step s2i: INSERT INTO bar VALUES(7); <waiting ...>
step s1r: ROLLBACK;
step s2i: <... completed>
step s2c: COMMIT;

View File

@ -0,0 +1,29 @@
Parsed test spec with 3 sessions
starting permutation: s1u s2donothing s3donothing s1c s2c s3select s3c
step s1u: UPDATE foo SET a=2, b=b || ' -> moved by session-1' WHERE a=1;
step s2donothing: INSERT INTO foo VALUES(1, 'session-2 donothing') ON CONFLICT DO NOTHING; <waiting ...>
step s3donothing: INSERT INTO foo VALUES(2, 'session-3 donothing') ON CONFLICT DO NOTHING; <waiting ...>
step s1c: COMMIT;
step s2donothing: <... completed>
step s3donothing: <... completed>
step s2c: COMMIT;
step s3select: SELECT * FROM foo ORDER BY a;
a b
1 session-2 donothing
2 initial tuple -> moved by session-1
step s3c: COMMIT;
starting permutation: s2donothing s1u s3donothing s1c s2c s3select s3c
step s2donothing: INSERT INTO foo VALUES(1, 'session-2 donothing') ON CONFLICT DO NOTHING;
step s1u: UPDATE foo SET a=2, b=b || ' -> moved by session-1' WHERE a=1;
step s3donothing: INSERT INTO foo VALUES(2, 'session-3 donothing') ON CONFLICT DO NOTHING; <waiting ...>
step s1c: COMMIT;
step s3donothing: <... completed>
step s2c: COMMIT;
step s3select: SELECT * FROM foo ORDER BY a;
a b
2 initial tuple -> moved by session-1
step s3c: COMMIT;

View File

@ -0,0 +1,139 @@
Parsed test spec with 3 sessions
starting permutation: s2beginrr s3beginrr s1u s2donothing s1c s2c s3donothing s3c s2select
step s2beginrr: BEGIN ISOLATION LEVEL REPEATABLE READ;
step s3beginrr: BEGIN ISOLATION LEVEL REPEATABLE READ;
step s1u: UPDATE foo SET a=2, b=b || ' -> moved by session-1' WHERE a=1;
step s2donothing: INSERT INTO foo VALUES(1, 'session-2 donothing') ON CONFLICT DO NOTHING; <waiting ...>
step s1c: COMMIT;
step s2donothing: <... completed>
step s2c: COMMIT;
step s3donothing: INSERT INTO foo VALUES(2, 'session-3 donothing'), (2, 'session-3 donothing2') ON CONFLICT DO NOTHING;
step s3c: COMMIT;
step s2select: SELECT * FROM foo ORDER BY a;
a b
1 session-2 donothing
2 initial tuple -> moved by session-1
starting permutation: s2beginrr s3beginrr s1u s3donothing s1c s3c s2donothing s2c s2select
step s2beginrr: BEGIN ISOLATION LEVEL REPEATABLE READ;
step s3beginrr: BEGIN ISOLATION LEVEL REPEATABLE READ;
step s1u: UPDATE foo SET a=2, b=b || ' -> moved by session-1' WHERE a=1;
step s3donothing: INSERT INTO foo VALUES(2, 'session-3 donothing'), (2, 'session-3 donothing2') ON CONFLICT DO NOTHING; <waiting ...>
step s1c: COMMIT;
step s3donothing: <... completed>
error in steps s1c s3donothing: ERROR: could not serialize access due to concurrent update
step s3c: COMMIT;
step s2donothing: INSERT INTO foo VALUES(1, 'session-2 donothing') ON CONFLICT DO NOTHING;
step s2c: COMMIT;
step s2select: SELECT * FROM foo ORDER BY a;
a b
1 session-2 donothing
2 initial tuple -> moved by session-1
starting permutation: s2beginrr s3beginrr s1u s2donothing s3donothing s1c s2c s3c s2select
step s2beginrr: BEGIN ISOLATION LEVEL REPEATABLE READ;
step s3beginrr: BEGIN ISOLATION LEVEL REPEATABLE READ;
step s1u: UPDATE foo SET a=2, b=b || ' -> moved by session-1' WHERE a=1;
step s2donothing: INSERT INTO foo VALUES(1, 'session-2 donothing') ON CONFLICT DO NOTHING; <waiting ...>
step s3donothing: INSERT INTO foo VALUES(2, 'session-3 donothing'), (2, 'session-3 donothing2') ON CONFLICT DO NOTHING; <waiting ...>
step s1c: COMMIT;
step s2donothing: <... completed>
step s3donothing: <... completed>
error in steps s1c s2donothing s3donothing: ERROR: could not serialize access due to concurrent update
step s2c: COMMIT;
step s3c: COMMIT;
step s2select: SELECT * FROM foo ORDER BY a;
a b
1 session-2 donothing
2 initial tuple -> moved by session-1
starting permutation: s2beginrr s3beginrr s1u s3donothing s2donothing s1c s3c s2c s2select
step s2beginrr: BEGIN ISOLATION LEVEL REPEATABLE READ;
step s3beginrr: BEGIN ISOLATION LEVEL REPEATABLE READ;
step s1u: UPDATE foo SET a=2, b=b || ' -> moved by session-1' WHERE a=1;
step s3donothing: INSERT INTO foo VALUES(2, 'session-3 donothing'), (2, 'session-3 donothing2') ON CONFLICT DO NOTHING; <waiting ...>
step s2donothing: INSERT INTO foo VALUES(1, 'session-2 donothing') ON CONFLICT DO NOTHING; <waiting ...>
step s1c: COMMIT;
step s3donothing: <... completed>
step s2donothing: <... completed>
error in steps s1c s3donothing s2donothing: ERROR: could not serialize access due to concurrent update
step s3c: COMMIT;
step s2c: COMMIT;
step s2select: SELECT * FROM foo ORDER BY a;
a b
1 session-2 donothing
2 initial tuple -> moved by session-1
starting permutation: s2begins s3begins s1u s2donothing s1c s2c s3donothing s3c s2select
step s2begins: BEGIN ISOLATION LEVEL SERIALIZABLE;
step s3begins: BEGIN ISOLATION LEVEL SERIALIZABLE;
step s1u: UPDATE foo SET a=2, b=b || ' -> moved by session-1' WHERE a=1;
step s2donothing: INSERT INTO foo VALUES(1, 'session-2 donothing') ON CONFLICT DO NOTHING; <waiting ...>
step s1c: COMMIT;
step s2donothing: <... completed>
step s2c: COMMIT;
step s3donothing: INSERT INTO foo VALUES(2, 'session-3 donothing'), (2, 'session-3 donothing2') ON CONFLICT DO NOTHING;
step s3c: COMMIT;
step s2select: SELECT * FROM foo ORDER BY a;
a b
1 session-2 donothing
2 initial tuple -> moved by session-1
starting permutation: s2begins s3begins s1u s3donothing s1c s3c s2donothing s2c s2select
step s2begins: BEGIN ISOLATION LEVEL SERIALIZABLE;
step s3begins: BEGIN ISOLATION LEVEL SERIALIZABLE;
step s1u: UPDATE foo SET a=2, b=b || ' -> moved by session-1' WHERE a=1;
step s3donothing: INSERT INTO foo VALUES(2, 'session-3 donothing'), (2, 'session-3 donothing2') ON CONFLICT DO NOTHING; <waiting ...>
step s1c: COMMIT;
step s3donothing: <... completed>
error in steps s1c s3donothing: ERROR: could not serialize access due to concurrent update
step s3c: COMMIT;
step s2donothing: INSERT INTO foo VALUES(1, 'session-2 donothing') ON CONFLICT DO NOTHING;
step s2c: COMMIT;
step s2select: SELECT * FROM foo ORDER BY a;
a b
1 session-2 donothing
2 initial tuple -> moved by session-1
starting permutation: s2begins s3begins s1u s2donothing s3donothing s1c s2c s3c s2select
step s2begins: BEGIN ISOLATION LEVEL SERIALIZABLE;
step s3begins: BEGIN ISOLATION LEVEL SERIALIZABLE;
step s1u: UPDATE foo SET a=2, b=b || ' -> moved by session-1' WHERE a=1;
step s2donothing: INSERT INTO foo VALUES(1, 'session-2 donothing') ON CONFLICT DO NOTHING; <waiting ...>
step s3donothing: INSERT INTO foo VALUES(2, 'session-3 donothing'), (2, 'session-3 donothing2') ON CONFLICT DO NOTHING; <waiting ...>
step s1c: COMMIT;
step s2donothing: <... completed>
step s3donothing: <... completed>
error in steps s1c s2donothing s3donothing: ERROR: could not serialize access due to concurrent update
step s2c: COMMIT;
step s3c: COMMIT;
step s2select: SELECT * FROM foo ORDER BY a;
a b
1 session-2 donothing
2 initial tuple -> moved by session-1
starting permutation: s2begins s3begins s1u s3donothing s2donothing s1c s3c s2c s2select
step s2begins: BEGIN ISOLATION LEVEL SERIALIZABLE;
step s3begins: BEGIN ISOLATION LEVEL SERIALIZABLE;
step s1u: UPDATE foo SET a=2, b=b || ' -> moved by session-1' WHERE a=1;
step s3donothing: INSERT INTO foo VALUES(2, 'session-3 donothing'), (2, 'session-3 donothing2') ON CONFLICT DO NOTHING; <waiting ...>
step s2donothing: INSERT INTO foo VALUES(1, 'session-2 donothing') ON CONFLICT DO NOTHING; <waiting ...>
step s1c: COMMIT;
step s3donothing: <... completed>
step s2donothing: <... completed>
error in steps s1c s3donothing s2donothing: ERROR: could not serialize access due to concurrent update
step s3c: COMMIT;
step s2c: COMMIT;
step s2select: SELECT * FROM foo ORDER BY a;
a b
1 session-2 donothing
2 initial tuple -> moved by session-1

View File

@ -73,6 +73,9 @@ test: vacuum-concurrent-drop
test: predicate-hash
test: predicate-gist
test: predicate-gin
test: partition-key-update-1
test: partition-key-update-2
test: partition-key-update-3
# The checksum_enable suite will enable checksums for the cluster so should
# not run before anything expecting the cluster to have checksums turned off
# test: checksum_cancel

View File

@ -129,4 +129,5 @@ permutation "merge1" "merge2a" "a1" "select2" "c2"
permutation "merge1" "merge2b" "c1" "select2" "c2"
permutation "merge1" "merge2c" "c1" "select2" "c2"
permutation "pa_merge1" "pa_merge2a" "c1" "pa_select2" "c2"
permutation "pa_merge2" "pa_merge2a" "c1" "pa_select2" "c2"
permutation "pa_merge2" "pa_merge2a" "c1" "pa_select2" "c2" # fails
permutation "pa_merge2" "c1" "pa_merge2a" "pa_select2" "c2" # succeeds

View File

@ -0,0 +1,85 @@
# Test that an error if thrown if the target row has been moved to a
# different partition by a concurrent session.
setup
{
--
-- Setup to test an error from ExecUpdate and ExecDelete.
--
CREATE TABLE foo (a int, b text) PARTITION BY LIST(a);
CREATE TABLE foo1 PARTITION OF foo FOR VALUES IN (1);
CREATE TABLE foo2 PARTITION OF foo FOR VALUES IN (2);
INSERT INTO foo VALUES (1, 'ABC');
--
-- Setup to test an error from GetTupleForTrigger
--
CREATE TABLE footrg (a int, b text) PARTITION BY LIST(a);
CREATE TABLE footrg1 PARTITION OF footrg FOR VALUES IN (1);
CREATE TABLE footrg2 PARTITION OF footrg FOR VALUES IN (2);
INSERT INTO footrg VALUES (1, 'ABC');
CREATE FUNCTION func_footrg_mod_a() RETURNS TRIGGER AS $$
BEGIN
NEW.a = 2; -- This is changing partition key column.
RETURN NEW;
END $$ LANGUAGE PLPGSQL;
CREATE TRIGGER footrg_mod_a BEFORE UPDATE ON footrg1
FOR EACH ROW EXECUTE PROCEDURE func_footrg_mod_a();
--
-- Setup to test an error from ExecLockRows
--
CREATE TABLE foo_range_parted (a int, b text) PARTITION BY RANGE(a);
CREATE TABLE foo_range_parted1 PARTITION OF foo_range_parted FOR VALUES FROM (1) TO (10);
CREATE TABLE foo_range_parted2 PARTITION OF foo_range_parted FOR VALUES FROM (10) TO (20);
INSERT INTO foo_range_parted VALUES(7, 'ABC');
CREATE UNIQUE INDEX foo_range_parted1_a_unique ON foo_range_parted1 (a);
CREATE TABLE bar (a int REFERENCES foo_range_parted1(a));
}
teardown
{
DROP TABLE foo;
DROP TRIGGER footrg_mod_a ON footrg1;
DROP FUNCTION func_footrg_mod_a();
DROP TABLE footrg;
DROP TABLE bar, foo_range_parted;
}
session "s1"
step "s1b" { BEGIN ISOLATION LEVEL READ COMMITTED; }
step "s1u" { UPDATE foo SET a=2 WHERE a=1; }
step "s1u2" { UPDATE footrg SET b='EFG' WHERE a=1; }
step "s1u3pc" { UPDATE foo_range_parted SET a=11 WHERE a=7; }
step "s1u3npc" { UPDATE foo_range_parted SET b='XYZ' WHERE a=7; }
step "s1c" { COMMIT; }
step "s1r" { ROLLBACK; }
session "s2"
step "s2b" { BEGIN ISOLATION LEVEL READ COMMITTED; }
step "s2u" { UPDATE foo SET b='EFG' WHERE a=1; }
step "s2u2" { UPDATE footrg SET b='XYZ' WHERE a=1; }
step "s2i" { INSERT INTO bar VALUES(7); }
step "s2d" { DELETE FROM foo WHERE a=1; }
step "s2c" { COMMIT; }
# Concurrency error from ExecUpdate and ExecDelete.
permutation "s1b" "s2b" "s1u" "s1c" "s2d" "s2c"
permutation "s1b" "s2b" "s1u" "s2d" "s1c" "s2c"
permutation "s1b" "s2b" "s2d" "s1u" "s2c" "s1c"
# Concurrency error from GetTupleForTrigger
permutation "s1b" "s2b" "s1u2" "s1c" "s2u2" "s2c"
permutation "s1b" "s2b" "s1u2" "s2u2" "s1c" "s2c"
permutation "s1b" "s2b" "s2u2" "s1u2" "s2c" "s1c"
# Concurrency error from ExecLockRows
# test waiting for moved row itself
permutation "s1b" "s2b" "s1u3pc" "s2i" "s1c" "s2c"
permutation "s1b" "s2b" "s1u3pc" "s2i" "s1r" "s2c"
# test waiting for in-partition update, followed by cross-partition move
permutation "s1b" "s2b" "s1u3npc" "s1u3pc" "s2i" "s1c" "s2c"
permutation "s1b" "s2b" "s1u3npc" "s1u3pc" "s2i" "s1r" "s2c"
# test waiting for in-partition update, followed by cross-partition move
permutation "s1b" "s2b" "s1u3npc" "s1u3pc" "s1u3pc" "s2i" "s1c" "s2c"
permutation "s1b" "s2b" "s1u3npc" "s1u3pc" "s1u3pc" "s2i" "s1r" "s2c"

View File

@ -0,0 +1,45 @@
# Concurrent update of a partition key and INSERT...ON CONFLICT DO NOTHING test
#
# This test tries to expose problems with the interaction between concurrent
# sessions during an update of the partition key and INSERT...ON CONFLICT DO
# NOTHING on a partitioned table.
#
# The convention here is that session 1 moves row from one partition to
# another due update of the partition key and session 2 always ends up
# inserting, and session 3 always ends up doing nothing.
#
# Note: This test is slightly resemble to insert-conflict-do-nothing test.
setup
{
CREATE TABLE foo (a int primary key, b text) PARTITION BY LIST(a);
CREATE TABLE foo1 PARTITION OF foo FOR VALUES IN (1);
CREATE TABLE foo2 PARTITION OF foo FOR VALUES IN (2);
INSERT INTO foo VALUES (1, 'initial tuple');
}
teardown
{
DROP TABLE foo;
}
session "s1"
setup { BEGIN ISOLATION LEVEL READ COMMITTED; }
step "s1u" { UPDATE foo SET a=2, b=b || ' -> moved by session-1' WHERE a=1; }
step "s1c" { COMMIT; }
session "s2"
setup { BEGIN ISOLATION LEVEL READ COMMITTED; }
step "s2donothing" { INSERT INTO foo VALUES(1, 'session-2 donothing') ON CONFLICT DO NOTHING; }
step "s2c" { COMMIT; }
session "s3"
setup { BEGIN ISOLATION LEVEL READ COMMITTED; }
step "s3donothing" { INSERT INTO foo VALUES(2, 'session-3 donothing') ON CONFLICT DO NOTHING; }
step "s3select" { SELECT * FROM foo ORDER BY a; }
step "s3c" { COMMIT; }
# Regular case where one session block-waits on another to determine if it
# should proceed with an insert or do nothing.
permutation "s1u" "s2donothing" "s3donothing" "s1c" "s2c" "s3select" "s3c"
permutation "s2donothing" "s1u" "s3donothing" "s1c" "s2c" "s3select" "s3c"

View File

@ -0,0 +1,44 @@
# Concurrent update of a partition key and INSERT...ON CONFLICT DO NOTHING
# test on partitioned table with multiple rows in higher isolation levels.
#
# Note: This test is resemble to insert-conflict-do-nothing-2 test
setup
{
CREATE TABLE foo (a int primary key, b text) PARTITION BY LIST(a);
CREATE TABLE foo1 PARTITION OF foo FOR VALUES IN (1);
CREATE TABLE foo2 PARTITION OF foo FOR VALUES IN (2);
INSERT INTO foo VALUES (1, 'initial tuple');
}
teardown
{
DROP TABLE foo;
}
session "s1"
setup { BEGIN ISOLATION LEVEL READ COMMITTED; }
step "s1u" { UPDATE foo SET a=2, b=b || ' -> moved by session-1' WHERE a=1; }
step "s1c" { COMMIT; }
session "s2"
step "s2beginrr" { BEGIN ISOLATION LEVEL REPEATABLE READ; }
step "s2begins" { BEGIN ISOLATION LEVEL SERIALIZABLE; }
step "s2donothing" { INSERT INTO foo VALUES(1, 'session-2 donothing') ON CONFLICT DO NOTHING; }
step "s2c" { COMMIT; }
step "s2select" { SELECT * FROM foo ORDER BY a; }
session "s3"
step "s3beginrr" { BEGIN ISOLATION LEVEL REPEATABLE READ; }
step "s3begins" { BEGIN ISOLATION LEVEL SERIALIZABLE; }
step "s3donothing" { INSERT INTO foo VALUES(2, 'session-3 donothing'), (2, 'session-3 donothing2') ON CONFLICT DO NOTHING; }
step "s3c" { COMMIT; }
permutation "s2beginrr" "s3beginrr" "s1u" "s2donothing" "s1c" "s2c" "s3donothing" "s3c" "s2select"
permutation "s2beginrr" "s3beginrr" "s1u" "s3donothing" "s1c" "s3c" "s2donothing" "s2c" "s2select"
permutation "s2beginrr" "s3beginrr" "s1u" "s2donothing" "s3donothing" "s1c" "s2c" "s3c" "s2select"
permutation "s2beginrr" "s3beginrr" "s1u" "s3donothing" "s2donothing" "s1c" "s3c" "s2c" "s2select"
permutation "s2begins" "s3begins" "s1u" "s2donothing" "s1c" "s2c" "s3donothing" "s3c" "s2select"
permutation "s2begins" "s3begins" "s1u" "s3donothing" "s1c" "s3c" "s2donothing" "s2c" "s2select"
permutation "s2begins" "s3begins" "s1u" "s2donothing" "s3donothing" "s1c" "s2c" "s3c" "s2select"
permutation "s2begins" "s3begins" "s1u" "s3donothing" "s2donothing" "s1c" "s3c" "s2c" "s2select"