Fix misbehavior of EvalPlanQual checks with multiple result relations.

The idea of EvalPlanQual is that we replace the query's scan of the
result relation with a single injected tuple, and see if we get a
tuple out, thereby implying that the injected tuple still passes the
query quals.  (In join cases, other relations in the query are still
scanned normally.)  This logic was not updated when commit 86dc90056
made it possible for a single DML query plan to have multiple result
relations, when the query target relation has inheritance or partition
children.  We replaced the output for the current result relation
successfully, but other result relations were still scanned normally;
thus, if any other result relation contained a tuple satisfying the
quals, we'd think the EPQ check passed, even if it did not pass for
the injected tuple itself.  This would lead to update or delete
actions getting performed when they should have been skipped due to
a conflicting concurrent update in READ COMMITTED isolation mode.

Fix by blocking all sibling result relations from emitting tuples
during an EvalPlanQual recheck.  In the back branches, the fix is
complicated a bit by the need to not change the size of struct
EPQState (else we'd have ABI-breaking changes in offsets in
struct ModifyTableState).  Like the back-patches of 3f7836ff6
and 4b3e37993, add a separately palloc'd struct to avoid that.
The logic is the same as in HEAD otherwise.

This is only a live bug back to v14 where 86dc90056 came in.
However, I chose to back-patch the test cases further, on the
grounds that this whole area is none too well tested.  I skipped
doing so in v11 though because none of the test applied cleanly,
and it didn't quite seem worth extra work for a branch with only
six months to live.

Per report from Ante Krešić (via Aleksander Alekseev)

Discussion: https://postgr.es/m/CAJ7c6TMBTN3rcz4=AjYhLPD_w3FFT0Wq_C15jxCDn8U4tZnH1g@mail.gmail.com
This commit is contained in:
Tom Lane 2023-05-19 14:26:34 -04:00
parent ed7e686a03
commit 70b42f2790
9 changed files with 221 additions and 48 deletions

View File

@ -2469,7 +2469,7 @@ ExecBuildAuxRowMark(ExecRowMark *erm, List *targetlist)
* relation - table containing tuple
* rti - rangetable index of table containing tuple
* inputslot - tuple for processing - this can be the slot from
* EvalPlanQualSlot(), for the increased efficiency.
* EvalPlanQualSlot() for this rel, for increased efficiency.
*
* This tests whether the tuple in inputslot still matches the relevant
* quals. For that result to be useful, typically the input tuple has to be
@ -2503,6 +2503,14 @@ EvalPlanQual(EPQState *epqstate, Relation relation,
if (testslot != inputslot)
ExecCopySlot(testslot, inputslot);
/*
* Mark that an EPQ tuple is available for this relation. (If there is
* more than one result relation, the others remain marked as having no
* tuple available.)
*/
epqstate->relsubs_done[rti - 1] = false;
epqstate->relsubs_blocked[rti - 1] = false;
/*
* Run the EPQ query. We assume it will return at most one tuple.
*/
@ -2519,11 +2527,12 @@ EvalPlanQual(EPQState *epqstate, Relation relation,
ExecMaterializeSlot(slot);
/*
* Clear out the test tuple. This is needed in case the EPQ query is
* re-used to test a tuple for a different relation. (Not clear that can
* really happen, but let's be safe.)
* Clear out the test tuple, and mark that no tuple is available here.
* This is needed in case the EPQ state is re-used to test a tuple for a
* different target relation.
*/
ExecClearTuple(testslot);
epqstate->relsubs_blocked[rti - 1] = true;
return slot;
}
@ -2532,18 +2541,26 @@ EvalPlanQual(EPQState *epqstate, Relation relation,
* EvalPlanQualInit -- initialize during creation of a plan state node
* that might need to invoke EPQ processing.
*
* If the caller intends to use EvalPlanQual(), resultRelations should be
* a list of RT indexes of potential target relations for EvalPlanQual(),
* and we will arrange that the other listed relations don't return any
* tuple during an EvalPlanQual() call. Otherwise resultRelations
* should be NIL.
*
* Note: subplan/auxrowmarks can be NULL/NIL if they will be set later
* with EvalPlanQualSetPlan.
*/
void
EvalPlanQualInit(EPQState *epqstate, EState *parentestate,
Plan *subplan, List *auxrowmarks, int epqParam)
Plan *subplan, List *auxrowmarks,
int epqParam, List *resultRelations)
{
Index rtsize = parentestate->es_range_table_size;
/* initialize data not changing over EPQState's lifetime */
epqstate->parentestate = parentestate;
epqstate->epqParam = epqParam;
epqstate->resultRelations = resultRelations;
/*
* Allocate space to reference a slot for each potential rti - do so now
@ -2566,6 +2583,7 @@ EvalPlanQualInit(EPQState *epqstate, EState *parentestate,
epqstate->recheckplanstate = NULL;
epqstate->relsubs_rowmark = NULL;
epqstate->relsubs_done = NULL;
epqstate->relsubs_blocked = NULL;
}
/*
@ -2763,7 +2781,13 @@ EvalPlanQualBegin(EPQState *epqstate)
Index rtsize = parentestate->es_range_table_size;
PlanState *rcplanstate = epqstate->recheckplanstate;
MemSet(epqstate->relsubs_done, 0, rtsize * sizeof(bool));
/*
* Reset the relsubs_done[] flags to equal relsubs_blocked[], so that
* the EPQ run will never attempt to fetch tuples from blocked target
* relations.
*/
memcpy(epqstate->relsubs_done, epqstate->relsubs_blocked,
rtsize * sizeof(bool));
/* Recopy current values of parent parameters */
if (parentestate->es_plannedstmt->paramExecTypes != NIL)
@ -2931,10 +2955,22 @@ EvalPlanQualStart(EPQState *epqstate, Plan *planTree)
}
/*
* Initialize per-relation EPQ tuple states to not-fetched.
* Initialize per-relation EPQ tuple states. Result relations, if any,
* get marked as blocked; others as not-fetched.
*/
epqstate->relsubs_done = (bool *)
palloc0(rtsize * sizeof(bool));
epqstate->relsubs_done = palloc_array(bool, rtsize);
epqstate->relsubs_blocked = palloc0_array(bool, rtsize);
foreach(l, epqstate->resultRelations)
{
int rtindex = lfirst_int(l);
Assert(rtindex > 0 && rtindex <= rtsize);
epqstate->relsubs_blocked[rtindex - 1] = true;
}
memcpy(epqstate->relsubs_done, epqstate->relsubs_blocked,
rtsize * sizeof(bool));
/*
* Initialize the private state information for all the nodes in the part
@ -3010,4 +3046,5 @@ EvalPlanQualEnd(EPQState *epqstate)
epqstate->recheckplanstate = NULL;
epqstate->relsubs_rowmark = NULL;
epqstate->relsubs_done = NULL;
epqstate->relsubs_blocked = NULL;
}

View File

@ -69,13 +69,12 @@ ExecScanFetch(ScanState *node,
else if (epqstate->relsubs_done[scanrelid - 1])
{
/*
* Return empty slot, as we already performed an EPQ substitution
* for this relation.
* Return empty slot, as either there is no EPQ tuple for this rel
* or we already returned it.
*/
TupleTableSlot *slot = node->ss_ScanTupleSlot;
/* Return empty slot, as we already returned a tuple */
return ExecClearTuple(slot);
}
else if (epqstate->relsubs_slot[scanrelid - 1] != NULL)
@ -88,7 +87,7 @@ ExecScanFetch(ScanState *node,
Assert(epqstate->relsubs_rowmark[scanrelid - 1] == NULL);
/* Mark to remember that we shouldn't return more */
/* Mark to remember that we shouldn't return it again */
epqstate->relsubs_done[scanrelid - 1] = true;
/* Return empty slot if we haven't got a test tuple */
@ -306,14 +305,18 @@ ExecScanReScan(ScanState *node)
*/
ExecClearTuple(node->ss_ScanTupleSlot);
/* Rescan EvalPlanQual tuple if we're inside an EvalPlanQual recheck */
/*
* Rescan EvalPlanQual tuple(s) if we're inside an EvalPlanQual recheck.
* But don't lose the "blocked" status of blocked target relations.
*/
if (estate->es_epq_active != NULL)
{
EPQState *epqstate = estate->es_epq_active;
Index scanrelid = ((Scan *) node->ps.plan)->scanrelid;
if (scanrelid > 0)
epqstate->relsubs_done[scanrelid - 1] = false;
epqstate->relsubs_done[scanrelid - 1] =
epqstate->relsubs_blocked[scanrelid - 1];
else
{
Bitmapset *relids;
@ -335,7 +338,8 @@ ExecScanReScan(ScanState *node)
while ((rtindex = bms_next_member(relids, rtindex)) >= 0)
{
Assert(rtindex > 0);
epqstate->relsubs_done[rtindex - 1] = false;
epqstate->relsubs_done[rtindex - 1] =
epqstate->relsubs_blocked[rtindex - 1];
}
}
}

View File

@ -108,7 +108,6 @@ lnext:
/* this child is inactive right now */
erm->ermActive = false;
ItemPointerSetInvalid(&(erm->curCtid));
ExecClearTuple(markSlot);
continue;
}
}
@ -370,7 +369,7 @@ ExecInitLockRows(LockRows *node, EState *estate, int eflags)
/* Now we have the info needed to set up EPQ state */
EvalPlanQualInit(&lrstate->lr_epqstate, estate,
outerPlan, epq_arowmarks, node->epqParam);
outerPlan, epq_arowmarks, node->epqParam, NIL);
return lrstate;
}

View File

@ -3985,7 +3985,8 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
}
/* set up epqstate with dummy subplan data for the moment */
EvalPlanQualInit(&mtstate->mt_epqstate, estate, NULL, NIL, node->epqParam);
EvalPlanQualInit(&mtstate->mt_epqstate, estate, NULL, NIL,
node->epqParam, node->resultRelations);
mtstate->fireBSTriggers = true;
/*

View File

@ -2674,7 +2674,7 @@ apply_handle_update_internal(ApplyExecutionData *edata,
bool found;
MemoryContext oldctx;
EvalPlanQualInit(&epqstate, estate, NULL, NIL, -1);
EvalPlanQualInit(&epqstate, estate, NULL, NIL, -1, NIL);
ExecOpenIndices(relinfo, false);
found = FindReplTupleInLocalRel(estate, localrel,
@ -2827,7 +2827,7 @@ apply_handle_delete_internal(ApplyExecutionData *edata,
TupleTableSlot *localslot;
bool found;
EvalPlanQualInit(&epqstate, estate, NULL, NIL, -1);
EvalPlanQualInit(&epqstate, estate, NULL, NIL, -1, NIL);
ExecOpenIndices(relinfo, false);
found = FindReplTupleInLocalRel(estate, localrel, remoterel, localindexoid,
@ -3054,7 +3054,7 @@ apply_handle_tuple_routing(ApplyExecutionData *edata,
*/
EPQState epqstate;
EvalPlanQualInit(&epqstate, estate, NULL, NIL, -1);
EvalPlanQualInit(&epqstate, estate, NULL, NIL, -1, NIL);
ExecOpenIndices(partrelinfo, false);
EvalPlanQualSetSlot(&epqstate, remoteslot_part);

View File

@ -233,7 +233,8 @@ extern ExecAuxRowMark *ExecBuildAuxRowMark(ExecRowMark *erm, List *targetlist);
extern TupleTableSlot *EvalPlanQual(EPQState *epqstate, Relation relation,
Index rti, TupleTableSlot *inputslot);
extern void EvalPlanQualInit(EPQState *epqstate, EState *parentestate,
Plan *subplan, List *auxrowmarks, int epqParam);
Plan *subplan, List *auxrowmarks,
int epqParam, List *resultRelations);
extern void EvalPlanQualSetPlan(EPQState *epqstate,
Plan *subplan, List *auxrowmarks);
extern TupleTableSlot *EvalPlanQualSlot(EPQState *epqstate,

View File

@ -1169,15 +1169,16 @@ typedef struct PlanState
*/
typedef struct EPQState
{
/* Initialized at EvalPlanQualInit() time: */
/* These are initialized by EvalPlanQualInit() and do not change later: */
EState *parentestate; /* main query's EState */
int epqParam; /* ID of Param to force scan node re-eval */
List *resultRelations; /* integer list of RT indexes, or NIL */
/*
* Tuples to be substituted by scan nodes. They need to set up, before
* calling EvalPlanQual()/EvalPlanQualNext(), into the slot returned by
* EvalPlanQualSlot(scanrelid). The array is indexed by scanrelid - 1.
* relsubs_slot[scanrelid - 1] holds the EPQ test tuple to be returned by
* the scan node for the scanrelid'th RT index, in place of performing an
* actual table scan. Callers should use EvalPlanQualSlot() to fetch
* these slots.
*/
List *tuple_table; /* tuple table for relsubs_slot */
TupleTableSlot **relsubs_slot;
@ -1211,11 +1212,21 @@ typedef struct EPQState
ExecAuxRowMark **relsubs_rowmark;
/*
* True if a relation's EPQ tuple has been fetched for relation, indexed
* by scanrelid - 1.
* relsubs_done[scanrelid - 1] is true if there is no EPQ tuple for this
* target relation or it has already been fetched in the current scan of
* this target relation within the current EvalPlanQual test.
*/
bool *relsubs_done;
/*
* relsubs_blocked[scanrelid - 1] is true if there is no EPQ tuple for
* this target relation during the current EvalPlanQual test. We keep
* these flags set for all relids listed in resultRelations, but
* transiently clear the one for the relation whose tuple is actually
* passed to EvalPlanQual().
*/
bool *relsubs_blocked;
PlanState *recheckplanstate; /* EPQ specific exec nodes, for ->plan */
} EPQState;

View File

@ -842,6 +842,90 @@ step c1: COMMIT;
step writep3b: <... completed>
step c2: COMMIT;
starting permutation: writep4a writep4b c1 c2 readp
step writep4a: UPDATE p SET c = 4 WHERE c = 0;
step writep4b: UPDATE p SET b = -4 WHERE c = 0; <waiting ...>
step c1: COMMIT;
step writep4b: <... completed>
step c2: COMMIT;
step readp: SELECT tableoid::regclass, ctid, * FROM p;
tableoid|ctid |a|b|c
--------+------+-+-+-
c1 |(0,2) |0|0|1
c1 |(0,3) |0|0|2
c1 |(0,5) |0|1|1
c1 |(0,6) |0|1|2
c1 |(0,8) |0|2|1
c1 |(0,9) |0|2|2
c1 |(0,11)|0|0|4
c1 |(0,12)|0|1|4
c1 |(0,13)|0|2|4
c1 |(0,14)|0|3|4
c2 |(0,2) |1|0|1
c2 |(0,3) |1|0|2
c2 |(0,5) |1|1|1
c2 |(0,6) |1|1|2
c2 |(0,8) |1|2|1
c2 |(0,9) |1|2|2
c2 |(0,11)|1|0|4
c2 |(0,12)|1|1|4
c2 |(0,13)|1|2|4
c2 |(0,14)|1|3|4
c3 |(0,2) |2|0|1
c3 |(0,3) |2|0|2
c3 |(0,5) |2|1|1
c3 |(0,6) |2|1|2
c3 |(0,8) |2|2|1
c3 |(0,9) |2|2|2
c3 |(0,11)|2|0|4
c3 |(0,12)|2|1|4
c3 |(0,13)|2|2|4
c3 |(0,14)|2|3|4
(30 rows)
starting permutation: writep4a deletep4 c1 c2 readp
step writep4a: UPDATE p SET c = 4 WHERE c = 0;
step deletep4: DELETE FROM p WHERE c = 0; <waiting ...>
step c1: COMMIT;
step deletep4: <... completed>
step c2: COMMIT;
step readp: SELECT tableoid::regclass, ctid, * FROM p;
tableoid|ctid |a|b|c
--------+------+-+-+-
c1 |(0,2) |0|0|1
c1 |(0,3) |0|0|2
c1 |(0,5) |0|1|1
c1 |(0,6) |0|1|2
c1 |(0,8) |0|2|1
c1 |(0,9) |0|2|2
c1 |(0,11)|0|0|4
c1 |(0,12)|0|1|4
c1 |(0,13)|0|2|4
c1 |(0,14)|0|3|4
c2 |(0,2) |1|0|1
c2 |(0,3) |1|0|2
c2 |(0,5) |1|1|1
c2 |(0,6) |1|1|2
c2 |(0,8) |1|2|1
c2 |(0,9) |1|2|2
c2 |(0,11)|1|0|4
c2 |(0,12)|1|1|4
c2 |(0,13)|1|2|4
c2 |(0,14)|1|3|4
c3 |(0,2) |2|0|1
c3 |(0,3) |2|0|2
c3 |(0,5) |2|1|1
c3 |(0,6) |2|1|2
c3 |(0,8) |2|2|1
c3 |(0,9) |2|2|2
c3 |(0,11)|2|0|4
c3 |(0,12)|2|1|4
c3 |(0,13)|2|2|4
c3 |(0,14)|2|3|4
(30 rows)
starting permutation: wx2 partiallock c2 c1 read
step wx2: UPDATE accounts SET balance = balance + 450 WHERE accountid = 'checking' RETURNING balance;
balance
@ -1104,22 +1188,41 @@ subid|id
(1 row)
starting permutation: simplepartupdate conditionalpartupdate c1 c2 read_part
step simplepartupdate:
update parttbl set b = b + 10;
step conditionalpartupdate:
update parttbl set c = -c where b < 10;
<waiting ...>
step c1: COMMIT;
step conditionalpartupdate: <... completed>
step c2: COMMIT;
step read_part: SELECT * FROM parttbl ORDER BY a, c;
a| b|c| d
-+--+-+----
1|11|1| 12
2|12|2|1014
(2 rows)
starting permutation: simplepartupdate complexpartupdate c1 c2 read_part
step simplepartupdate:
update parttbl set b = b + 10;
step complexpartupdate:
with u as (update parttbl set b = b + 1 returning parttbl.*)
update parttbl set b = u.b + 100 from u;
update parttbl p set b = u.b + 100 from u where p.a = u.a;
<waiting ...>
step c1: COMMIT;
step complexpartupdate: <... completed>
step c2: COMMIT;
step read_part: SELECT * FROM parttbl ORDER BY a;
a| b|c| d
-+--+-+--
1|12|1|13
(1 row)
step read_part: SELECT * FROM parttbl ORDER BY a, c;
a| b|c| d
-+--+-+----
1|12|1| 13
2|13|2|1015
(2 rows)
starting permutation: simplepartupdate_route1to2 complexpartupdate_route_err1 c1 c2 read_part
@ -1139,11 +1242,12 @@ step c1: COMMIT;
step complexpartupdate_route_err1: <... completed>
ERROR: tuple to be locked was already moved to another partition due to concurrent update
step c2: COMMIT;
step read_part: SELECT * FROM parttbl ORDER BY a;
step read_part: SELECT * FROM parttbl ORDER BY a, c;
a|b|c| d
-+-+-+----
2|1|1|1003
(1 row)
2|2|2|1004
(2 rows)
starting permutation: simplepartupdate_noroute complexpartupdate_route c1 c2 read_part
@ -1167,11 +1271,12 @@ a|b|c| d
(1 row)
step c2: COMMIT;
step read_part: SELECT * FROM parttbl ORDER BY a;
step read_part: SELECT * FROM parttbl ORDER BY a, c;
a|b|c| d
-+-+-+----
2|2|1|1004
(1 row)
2|2|2|1004
(2 rows)
starting permutation: simplepartupdate_noroute complexpartupdate_doesnt_route c1 c2 read_part
@ -1195,9 +1300,10 @@ a|b|c|d
(1 row)
step c2: COMMIT;
step read_part: SELECT * FROM parttbl ORDER BY a;
a|b|c|d
-+-+-+-
1|2|1|3
(1 row)
step read_part: SELECT * FROM parttbl ORDER BY a, c;
a|b|c| d
-+-+-+----
1|2|1| 3
2|2|2|1004
(2 rows)

View File

@ -40,7 +40,7 @@ setup
CREATE TABLE parttbl2 PARTITION OF parttbl
(d WITH OPTIONS GENERATED ALWAYS AS (a + b + 1000) STORED)
FOR VALUES IN (2);
INSERT INTO parttbl VALUES (1, 1, 1);
INSERT INTO parttbl VALUES (1, 1, 1), (2, 2, 2);
CREATE TABLE another_parttbl (a int, b int, c int) PARTITION BY LIST (a);
CREATE TABLE another_parttbl1 PARTITION OF another_parttbl FOR VALUES IN (1);
@ -102,11 +102,15 @@ step upsert1 {
# when the first updated tuple was in a non-first child table.
# writep2/returningp1 tests a memory allocation issue
# writep3a/writep3b tests updates touching more than one table
# writep4a/writep4b tests a case where matches in another table confused EPQ
# writep4a/deletep4 tests the same case in the DELETE path
step readp { SELECT tableoid::regclass, ctid, * FROM p; }
step readp1 { SELECT tableoid::regclass, ctid, * FROM p WHERE b IN (0, 1) AND c = 0 FOR UPDATE; }
step writep1 { UPDATE p SET b = -1 WHERE a = 1 AND b = 1 AND c = 0; }
step writep2 { UPDATE p SET b = -b WHERE a = 1 AND c = 0; }
step writep3a { UPDATE p SET b = -b WHERE c = 0; }
step writep4a { UPDATE p SET c = 4 WHERE c = 0; }
step c1 { COMMIT; }
step r1 { ROLLBACK; }
@ -210,6 +214,8 @@ step returningp1 {
SELECT * FROM u;
}
step writep3b { UPDATE p SET b = -b WHERE c = 0; }
step writep4b { UPDATE p SET b = -4 WHERE c = 0; }
step deletep4 { DELETE FROM p WHERE c = 0; }
step readforss {
SELECT ta.id AS ta_id, ta.value AS ta_value,
(SELECT ROW(tb.id, tb.value)
@ -226,9 +232,14 @@ step updateforcip3 {
}
step wrtwcte { UPDATE table_a SET value = 'tableAValue2' WHERE id = 1; }
step wrjt { UPDATE jointest SET data = 42 WHERE id = 7; }
step conditionalpartupdate {
update parttbl set c = -c where b < 10;
}
step complexpartupdate {
with u as (update parttbl set b = b + 1 returning parttbl.*)
update parttbl set b = u.b + 100 from u;
update parttbl p set b = u.b + 100 from u where p.a = u.a;
}
step complexpartupdate_route_err1 {
@ -277,7 +288,7 @@ setup { BEGIN ISOLATION LEVEL READ COMMITTED; }
step read { SELECT * FROM accounts ORDER BY accountid; }
step read_ext { SELECT * FROM accounts_ext ORDER BY accountid; }
step read_a { SELECT * FROM table_a ORDER BY id; }
step read_part { SELECT * FROM parttbl ORDER BY a; }
step read_part { SELECT * FROM parttbl ORDER BY a, c; }
# this test exercises EvalPlanQual with a CTE, cf bug #14328
step readwcte {
@ -347,6 +358,8 @@ permutation upsert1 upsert2 c1 c2 read
permutation readp1 writep1 readp2 c1 c2
permutation writep2 returningp1 c1 c2
permutation writep3a writep3b c1 c2
permutation writep4a writep4b c1 c2 readp
permutation writep4a deletep4 c1 c2 readp
permutation wx2 partiallock c2 c1 read
permutation wx2 lockwithvalues c2 c1 read
permutation wx2_ext partiallock_ext c2 c1 read_ext
@ -358,6 +371,7 @@ permutation wrjt selectjoinforupdate c2 c1
permutation wrjt selectresultforupdate c2 c1
permutation wrtwcte multireadwcte c1 c2
permutation simplepartupdate conditionalpartupdate c1 c2 read_part
permutation simplepartupdate complexpartupdate c1 c2 read_part
permutation simplepartupdate_route1to2 complexpartupdate_route_err1 c1 c2 read_part
permutation simplepartupdate_noroute complexpartupdate_route c1 c2 read_part