From b5c645d2a2652070ac128a42cf0d7b34403a6cfe Mon Sep 17 00:00:00 2001
From: Dean Rasheed <dean.a.rasheed@gmail.com>
Date: Thu, 7 Mar 2024 09:53:31 +0000
Subject: [PATCH] Fix handling of self-modified tuples in MERGE.

When an UPDATE or DELETE action in MERGE returns TM_SelfModified,
there are 2 possible causes:

1). The target tuple was already updated or deleted by the current
    command. This can happen if the target row joins to more than one
    source row, and the SQL standard explicitly says that this must be
    an error.

2). The target tuple was already updated or deleted by a later command
    in the current transaction. This can happen if the tuple is
    modified by a BEFORE trigger or a volatile function used in the
    query, and should be an error for the same reason that it is in a
    plain UPDATE or DELETE command.

In MERGE's primary error handling block, it failed to check for (2),
causing it to return a misleading error message in such cases.

In the secondary error handling block, following a concurrent update
from another session, it failed to check for (1), causing it to
silently ignore target rows joined to more than one source row,
instead of reporting an error.

Fix this, and add tests for both of these cases.

Per report from Wenjiang Zhang. Back-patch to v15, where MERGE was
introduced.

Discussion: https://postgr.es/m/tencent_41DE0FF443FE14B94A5898D373792109E408%40qq.com
---
 src/backend/executor/nodeModifyTable.c       | 46 ++++++++++++++++----
 src/test/isolation/expected/merge-update.out | 43 ++++++++++++++++++
 src/test/isolation/specs/merge-update.spec   | 13 ++++++
 src/test/regress/expected/triggers.out       |  8 ++++
 src/test/regress/sql/triggers.sql            |  4 ++
 5 files changed, 106 insertions(+), 8 deletions(-)

diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index 6fea82e0d4..1ad5dcb406 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -2947,8 +2947,29 @@ lmerge_matched:;
 			case TM_SelfModified:
 
 				/*
-				 * The SQL standard disallows this for MERGE.
+				 * The target tuple was already updated or deleted by the
+				 * current command, or by a later command in the current
+				 * transaction.  The former case is explicitly disallowed by
+				 * the SQL standard for MERGE, which insists that the MERGE
+				 * join condition should not join a target row to more than
+				 * one source row.
+				 *
+				 * The latter case arises if the tuple is modified by a
+				 * command in a BEFORE trigger, or perhaps by a command in a
+				 * volatile function used in the query.  In such situations we
+				 * should not ignore the MERGE action, but it is equally
+				 * unsafe to proceed.  We don't want to discard the original
+				 * MERGE action while keeping the triggered actions based on
+				 * it; and it would be no better to allow the original MERGE
+				 * action while discarding the updates that it triggered.  So
+				 * throwing an error is the only safe course.
 				 */
+				if (context->tmfd.cmax != estate->es_output_cid)
+					ereport(ERROR,
+							(errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
+							 errmsg("tuple to be updated or deleted was already modified by an operation triggered by the current command"),
+							 errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
+
 				if (TransactionIdIsCurrentTransactionId(context->tmfd.xmax))
 					ereport(ERROR,
 							(errcode(ERRCODE_CARDINALITY_VIOLATION),
@@ -2956,6 +2977,7 @@ lmerge_matched:;
 							 errmsg("%s command cannot affect row a second time",
 									"MERGE"),
 							 errhint("Ensure that not more than one source row matches any one target row.")));
+
 				/* This shouldn't happen */
 				elog(ERROR, "attempted to update or delete invisible tuple");
 				break;
@@ -3064,19 +3086,27 @@ lmerge_matched:;
 							/*
 							 * This can be reached when following an update
 							 * chain from a tuple updated by another session,
-							 * reaching a tuple that was already updated in
-							 * this transaction. If previously modified by
-							 * this command, ignore the redundant update,
-							 * otherwise error out.
-							 *
-							 * See also response to TM_SelfModified in
-							 * ExecUpdate().
+							 * reaching a tuple that was already updated or
+							 * deleted by the current command, or by a later
+							 * command in the current transaction. As above,
+							 * this should always be treated as an error.
 							 */
 							if (context->tmfd.cmax != estate->es_output_cid)
 								ereport(ERROR,
 										(errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
 										 errmsg("tuple to be updated or deleted was already modified by an operation triggered by the current command"),
 										 errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
+
+							if (TransactionIdIsCurrentTransactionId(context->tmfd.xmax))
+								ereport(ERROR,
+										(errcode(ERRCODE_CARDINALITY_VIOLATION),
+								/* translator: %s is a SQL command name */
+										 errmsg("%s command cannot affect row a second time",
+												"MERGE"),
+										 errhint("Ensure that not more than one source row matches any one target row.")));
+
+							/* This shouldn't happen */
+							elog(ERROR, "attempted to update or delete invisible tuple");
 							return false;
 
 						default:
diff --git a/src/test/isolation/expected/merge-update.out b/src/test/isolation/expected/merge-update.out
index 55b1f908fd..f5f7e3ba19 100644
--- a/src/test/isolation/expected/merge-update.out
+++ b/src/test/isolation/expected/merge-update.out
@@ -48,6 +48,27 @@ key|val
 
 step c2: COMMIT;
 
+starting permutation: pa_merge1 c1 pa_merge2c_dup a2
+step pa_merge1: 
+  MERGE INTO pa_target t
+  USING (SELECT 1 as key, 'pa_merge1' as val) s
+  ON s.key = t.key
+  WHEN NOT MATCHED THEN
+	INSERT VALUES (s.key, s.val)
+  WHEN MATCHED THEN
+    UPDATE set val = t.val || ' updated by ' || s.val;
+
+step c1: COMMIT;
+step pa_merge2c_dup: 
+  MERGE INTO pa_target t
+  USING (VALUES (1), (1)) v(a)
+  ON t.key = v.a
+  WHEN MATCHED THEN
+	UPDATE set val = t.val || ' updated by pa_merge2c_dup';  -- should fail
+
+ERROR:  MERGE command cannot affect row a second time
+step a2: ABORT;
+
 starting permutation: merge1 merge2a c1 select2 c2
 step merge1: 
   MERGE INTO target t
@@ -312,3 +333,25 @@ key|val
 (2 rows)
 
 step c2: COMMIT;
+
+starting permutation: pa_merge1 pa_merge2c_dup c1 a2
+step pa_merge1: 
+  MERGE INTO pa_target t
+  USING (SELECT 1 as key, 'pa_merge1' as val) s
+  ON s.key = t.key
+  WHEN NOT MATCHED THEN
+	INSERT VALUES (s.key, s.val)
+  WHEN MATCHED THEN
+    UPDATE set val = t.val || ' updated by ' || s.val;
+
+step pa_merge2c_dup: 
+  MERGE INTO pa_target t
+  USING (VALUES (1), (1)) v(a)
+  ON t.key = v.a
+  WHEN MATCHED THEN
+	UPDATE set val = t.val || ' updated by pa_merge2c_dup';  -- should fail
+ <waiting ...>
+step c1: COMMIT;
+step pa_merge2c_dup: <... completed>
+ERROR:  MERGE command cannot affect row a second time
+step a2: ABORT;
diff --git a/src/test/isolation/specs/merge-update.spec b/src/test/isolation/specs/merge-update.spec
index e8d01666fe..3ccd466449 100644
--- a/src/test/isolation/specs/merge-update.spec
+++ b/src/test/isolation/specs/merge-update.spec
@@ -4,6 +4,7 @@
 # 1. UPDATEs of PKs that change the join in the ON clause
 # 2. UPDATEs with WHEN conditions that would fail after concurrent update
 # 3. UPDATEs with extra ON conditions that would fail after concurrent update
+# 4. UPDATEs with duplicate source rows
 
 setup
 {
@@ -134,15 +135,26 @@ step "pa_merge2b_when"
   WHEN MATCHED AND t.val like 'initial%' THEN
 	UPDATE set key = t.key + 1, val = t.val || ' updated by ' || s.val;
 }
+# Duplicate source row should fail
+step "pa_merge2c_dup"
+{
+  MERGE INTO pa_target t
+  USING (VALUES (1), (1)) v(a)
+  ON t.key = v.a
+  WHEN MATCHED THEN
+	UPDATE set val = t.val || ' updated by pa_merge2c_dup';  -- should fail
+}
 step "select2" { SELECT * FROM target; }
 step "pa_select2" { SELECT * FROM pa_target; }
 step "c2" { COMMIT; }
+step "a2" { ABORT; }
 
 # Basic effects
 permutation "merge1" "c1" "select2" "c2"
 
 # One after the other, no concurrency
 permutation "merge1" "c1" "merge2a" "select2" "c2"
+permutation "pa_merge1" "c1" "pa_merge2c_dup" "a2"
 
 # Now with concurrency
 permutation "merge1" "merge2a" "c1" "select2" "c2"
@@ -154,3 +166,4 @@ permutation "pa_merge2" "pa_merge2a" "c1" "pa_select2" "c2" # fails
 permutation "pa_merge2" "c1" "pa_merge2a" "pa_select2" "c2" # succeeds
 permutation "pa_merge3" "pa_merge2b_when" "c1" "pa_select2" "c2" # WHEN not satisfied by updated tuple
 permutation "pa_merge1" "pa_merge2b_when" "c1" "pa_select2" "c2" # WHEN satisfied by updated tuple
+permutation "pa_merge1" "pa_merge2c_dup" "c1" "a2"
diff --git a/src/test/regress/expected/triggers.out b/src/test/regress/expected/triggers.out
index 723e8b7166..4dd95e2680 100644
--- a/src/test/regress/expected/triggers.out
+++ b/src/test/regress/expected/triggers.out
@@ -1745,6 +1745,10 @@ select * from parent; select * from child;
 update parent set val1 = 'b' where aid = 1; -- should fail
 ERROR:  tuple to be updated was already modified by an operation triggered by the current command
 HINT:  Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.
+merge into parent p using (values (1)) as v(id) on p.aid = v.id
+  when matched then update set val1 = 'b'; -- should fail
+ERROR:  tuple to be updated or deleted was already modified by an operation triggered by the current command
+HINT:  Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.
 select * from parent; select * from child;
  aid | val1 | val2 | val3 | val4 | bcnt 
 -----+------+------+------+------+------
@@ -1759,6 +1763,10 @@ select * from parent; select * from child;
 delete from parent where aid = 1; -- should fail
 ERROR:  tuple to be deleted was already modified by an operation triggered by the current command
 HINT:  Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.
+merge into parent p using (values (1)) as v(id) on p.aid = v.id
+  when matched then delete; -- should fail
+ERROR:  tuple to be updated or deleted was already modified by an operation triggered by the current command
+HINT:  Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.
 select * from parent; select * from child;
  aid | val1 | val2 | val3 | val4 | bcnt 
 -----+------+------+------+------+------
diff --git a/src/test/regress/sql/triggers.sql b/src/test/regress/sql/triggers.sql
index 46795a9c78..6c9e066397 100644
--- a/src/test/regress/sql/triggers.sql
+++ b/src/test/regress/sql/triggers.sql
@@ -1186,9 +1186,13 @@ insert into child values (10, 1, 'b');
 select * from parent; select * from child;
 
 update parent set val1 = 'b' where aid = 1; -- should fail
+merge into parent p using (values (1)) as v(id) on p.aid = v.id
+  when matched then update set val1 = 'b'; -- should fail
 select * from parent; select * from child;
 
 delete from parent where aid = 1; -- should fail
+merge into parent p using (values (1)) as v(id) on p.aid = v.id
+  when matched then delete; -- should fail
 select * from parent; select * from child;
 
 -- replace the trigger function with one that restarts the deletion after