postgresql/src/backend/executor/nodeModifyTable.c

1277 lines
37 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* nodeModifyTable.c
* routines to handle ModifyTable nodes.
*
* Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
2010-09-20 22:08:53 +02:00
* src/backend/executor/nodeModifyTable.c
*
*-------------------------------------------------------------------------
*/
/* INTERFACE ROUTINES
2010-02-26 03:01:40 +01:00
* ExecInitModifyTable - initialize the ModifyTable node
* ExecModifyTable - retrieve the next tuple from the node
* ExecEndModifyTable - shut down the ModifyTable node
* ExecReScanModifyTable - rescan the ModifyTable node
*
* NOTES
* Each ModifyTable node contains a list of one or more subplans,
* much like an Append node. There is one subplan per result relation.
* The key reason for this is that in an inherited UPDATE command, each
* result relation could have a different schema (more or different
* columns) requiring a different plan tree to produce it. In an
* inherited DELETE, all the subplans should produce the same output
* rowtype, but we might still find that different plans are appropriate
* for different child relations.
*
* If the query specifies RETURNING, then the ModifyTable returns a
* RETURNING tuple after completing each row insert, update, or delete.
2010-02-26 03:01:40 +01:00
* It must be called again to continue the operation. Without RETURNING,
* we just loop within the node until all the work is done, then
* return NULL. This avoids useless call/return overhead.
*/
#include "postgres.h"
#include "access/htup_details.h"
#include "access/xact.h"
#include "commands/trigger.h"
#include "executor/executor.h"
#include "executor/nodeModifyTable.h"
#include "miscadmin.h"
#include "nodes/nodeFuncs.h"
#include "storage/bufmgr.h"
#include "utils/builtins.h"
#include "utils/memutils.h"
#include "utils/rel.h"
#include "utils/tqual.h"
/*
* Verify that the tuples to be produced by INSERT or UPDATE match the
* target relation's rowtype
*
* We do this to guard against stale plans. If plan invalidation is
* functioning properly then we should never get a failure here, but better
* safe than sorry. Note that this is called after we have obtained lock
* on the target rel, so the rowtype can't change underneath us.
*
* The plan output is represented by its targetlist, because that makes
* handling the dropped-column case easier.
*/
static void
ExecCheckPlanOutput(Relation resultRel, List *targetList)
{
TupleDesc resultDesc = RelationGetDescr(resultRel);
int attno = 0;
ListCell *lc;
foreach(lc, targetList)
{
TargetEntry *tle = (TargetEntry *) lfirst(lc);
Form_pg_attribute attr;
if (tle->resjunk)
continue; /* ignore junk tlist items */
if (attno >= resultDesc->natts)
ereport(ERROR,
(errcode(ERRCODE_DATATYPE_MISMATCH),
errmsg("table row type and query-specified row type do not match"),
errdetail("Query has too many columns.")));
attr = resultDesc->attrs[attno++];
if (!attr->attisdropped)
{
/* Normal case: demand type match */
if (exprType((Node *) tle->expr) != attr->atttypid)
ereport(ERROR,
(errcode(ERRCODE_DATATYPE_MISMATCH),
errmsg("table row type and query-specified row type do not match"),
errdetail("Table has type %s at ordinal position %d, but query expects %s.",
format_type_be(attr->atttypid),
attno,
format_type_be(exprType((Node *) tle->expr)))));
}
else
{
/*
* For a dropped column, we can't check atttypid (it's likely 0).
* In any case the planner has most likely inserted an INT4 null.
* What we insist on is just *some* NULL constant.
*/
if (!IsA(tle->expr, Const) ||
!((Const *) tle->expr)->constisnull)
ereport(ERROR,
(errcode(ERRCODE_DATATYPE_MISMATCH),
errmsg("table row type and query-specified row type do not match"),
errdetail("Query provides a value for a dropped column at ordinal position %d.",
attno)));
}
}
if (attno != resultDesc->natts)
ereport(ERROR,
(errcode(ERRCODE_DATATYPE_MISMATCH),
errmsg("table row type and query-specified row type do not match"),
errdetail("Query has too few columns.")));
}
/*
* ExecProcessReturning --- evaluate a RETURNING list
*
* projectReturning: RETURNING projection info for current result rel
* tupleSlot: slot holding tuple actually inserted/updated/deleted
* planSlot: slot holding tuple returned by top subplan node
*
* Returns a slot holding the result tuple
*/
static TupleTableSlot *
ExecProcessReturning(ProjectionInfo *projectReturning,
TupleTableSlot *tupleSlot,
TupleTableSlot *planSlot)
{
ExprContext *econtext = projectReturning->pi_exprContext;
/*
* Reset per-tuple memory context to free any expression evaluation
* storage allocated in the previous cycle.
*/
ResetExprContext(econtext);
/* Make tuple and any needed join variables available to ExecProject */
econtext->ecxt_scantuple = tupleSlot;
econtext->ecxt_outertuple = planSlot;
/* Compute the RETURNING expressions */
return ExecProject(projectReturning, NULL);
}
/* ----------------------------------------------------------------
* ExecInsert
*
* For INSERT, we have to insert the tuple into the target relation
* and insert appropriate tuples into the index relations.
*
* Returns RETURNING result if any, otherwise NULL.
* ----------------------------------------------------------------
*/
static TupleTableSlot *
ExecInsert(TupleTableSlot *slot,
TupleTableSlot *planSlot,
EState *estate,
bool canSetTag)
{
HeapTuple tuple;
ResultRelInfo *resultRelInfo;
Relation resultRelationDesc;
Oid newId;
List *recheckIndexes = NIL;
/*
* get the heap tuple out of the tuple table slot, making sure we have a
* writable copy
*/
tuple = ExecMaterializeSlot(slot);
/*
* get information on the (current) result relation
*/
resultRelInfo = estate->es_result_relation_info;
resultRelationDesc = resultRelInfo->ri_RelationDesc;
/*
* If the result relation has OIDs, force the tuple's OID to zero so that
* heap_insert will assign a fresh OID. Usually the OID already will be
* zero at this point, but there are corner cases where the plan tree can
* return a tuple extracted literally from some table with the same
* rowtype.
*
* XXX if we ever wanted to allow users to assign their own OIDs to new
* rows, this'd be the place to do it. For the moment, we make a point of
* doing this before calling triggers, so that a user-supplied trigger
* could hack the OID if desired.
*/
if (resultRelationDesc->rd_rel->relhasoids)
HeapTupleSetOid(tuple, InvalidOid);
/* BEFORE ROW INSERT Triggers */
if (resultRelInfo->ri_TrigDesc &&
resultRelInfo->ri_TrigDesc->trig_insert_before_row)
{
slot = ExecBRInsertTriggers(estate, resultRelInfo, slot);
if (slot == NULL) /* "do nothing" */
return NULL;
/* trigger might have changed tuple */
tuple = ExecMaterializeSlot(slot);
}
/* INSTEAD OF ROW INSERT Triggers */
if (resultRelInfo->ri_TrigDesc &&
resultRelInfo->ri_TrigDesc->trig_insert_instead_row)
{
slot = ExecIRInsertTriggers(estate, resultRelInfo, slot);
if (slot == NULL) /* "do nothing" */
return NULL;
/* trigger might have changed tuple */
tuple = ExecMaterializeSlot(slot);
newId = InvalidOid;
}
else
{
/*
* Check the constraints of the tuple
*/
if (resultRelationDesc->rd_att->constr)
ExecConstraints(resultRelInfo, slot, estate);
/*
* insert the tuple
*
* Note: heap_insert returns the tid (location) of the new tuple in
* the t_self field.
*/
newId = heap_insert(resultRelationDesc, tuple,
estate->es_output_cid, 0, NULL);
/*
* insert index entries for tuple
*/
if (resultRelInfo->ri_NumIndices > 0)
recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self),
estate);
}
if (canSetTag)
{
(estate->es_processed)++;
estate->es_lastoid = newId;
setLastTid(&(tuple->t_self));
}
/* AFTER ROW INSERT Triggers */
ExecARInsertTriggers(estate, resultRelInfo, tuple, recheckIndexes);
list_free(recheckIndexes);
/* Process RETURNING if present */
if (resultRelInfo->ri_projectReturning)
return ExecProcessReturning(resultRelInfo->ri_projectReturning,
slot, planSlot);
return NULL;
}
/* ----------------------------------------------------------------
* ExecDelete
*
* DELETE is like UPDATE, except that we delete the tuple and no
* index modifications are needed.
*
* When deleting from a table, tupleid identifies the tuple to
* delete and oldtuple is NULL. When deleting from a view,
* oldtuple is passed to the INSTEAD OF triggers and identifies
* what to delete, and tupleid is invalid.
*
* Returns RETURNING result if any, otherwise NULL.
* ----------------------------------------------------------------
*/
static TupleTableSlot *
ExecDelete(ItemPointer tupleid,
HeapTupleHeader oldtuple,
TupleTableSlot *planSlot,
EPQState *epqstate,
EState *estate,
bool canSetTag)
{
ResultRelInfo *resultRelInfo;
Relation resultRelationDesc;
HTSU_Result result;
HeapUpdateFailureData hufd;
/*
* get information on the (current) result relation
*/
resultRelInfo = estate->es_result_relation_info;
resultRelationDesc = resultRelInfo->ri_RelationDesc;
/* BEFORE ROW DELETE Triggers */
if (resultRelInfo->ri_TrigDesc &&
resultRelInfo->ri_TrigDesc->trig_delete_before_row)
{
bool dodelete;
dodelete = ExecBRDeleteTriggers(estate, epqstate, resultRelInfo,
tupleid);
if (!dodelete) /* "do nothing" */
return NULL;
}
/* INSTEAD OF ROW DELETE Triggers */
if (resultRelInfo->ri_TrigDesc &&
resultRelInfo->ri_TrigDesc->trig_delete_instead_row)
{
HeapTupleData tuple;
bool dodelete;
Assert(oldtuple != NULL);
tuple.t_data = oldtuple;
tuple.t_len = HeapTupleHeaderGetDatumLength(oldtuple);
ItemPointerSetInvalid(&(tuple.t_self));
tuple.t_tableOid = InvalidOid;
dodelete = ExecIRDeleteTriggers(estate, resultRelInfo, &tuple);
if (!dodelete) /* "do nothing" */
return NULL;
}
else
{
/*
* delete the tuple
*
* Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check
* that the row to be deleted is visible to that snapshot, and throw a
* can't-serialize error if not. This is a special-case behavior
* needed for referential integrity updates in transaction-snapshot
* mode transactions.
*/
ldelete:;
result = heap_delete(resultRelationDesc, tupleid,
estate->es_output_cid,
estate->es_crosscheck_snapshot,
true /* wait for commit */,
&hufd);
switch (result)
{
case HeapTupleSelfUpdated:
/*
* The target tuple was already updated or deleted by the
* current command, or by a later command in the current
* transaction. The former case is possible in a join DELETE
* where multiple tuples join to the same target tuple.
* This is somewhat questionable, but Postgres has always
* allowed it: we just ignore additional deletion attempts.
*
* The latter case arises if the tuple is modified by a
* command in a BEFORE trigger, or perhaps by a command in a
* volatile function used in the query. In such situations we
* should not ignore the deletion, but it is equally unsafe to
* proceed. We don't want to discard the original DELETE
* while keeping the triggered actions based on its deletion;
* and it would be no better to allow the original DELETE
* while discarding updates that it triggered. The row update
* carries some information that might be important according
* to business rules; so throwing an error is the only safe
* course.
*
* If a trigger actually intends this type of interaction,
* it can re-execute the DELETE and then return NULL to
* cancel the outer delete.
*/
if (hufd.cmax != estate->es_output_cid)
ereport(ERROR,
(errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
errmsg("tuple to be updated was already modified by an operation triggered by the current command"),
errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
/* Else, already deleted by self; nothing to do */
return NULL;
case HeapTupleMayBeUpdated:
break;
case HeapTupleUpdated:
if (IsolationUsesXactSnapshot())
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("could not serialize access due to concurrent update")));
if (!ItemPointerEquals(tupleid, &hufd.ctid))
{
TupleTableSlot *epqslot;
epqslot = EvalPlanQual(estate,
epqstate,
resultRelationDesc,
resultRelInfo->ri_RangeTableIndex,
Improve concurrency of foreign key locking This patch introduces two additional lock modes for tuples: "SELECT FOR KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each other, in contrast with already existing "SELECT FOR SHARE" and "SELECT FOR UPDATE". UPDATE commands that do not modify the values stored in the columns that are part of the key of the tuple now grab a SELECT FOR NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently with tuple locks of the FOR KEY SHARE variety. Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this means the concurrency improvement applies to them, which is the whole point of this patch. The added tuple lock semantics require some rejiggering of the multixact module, so that the locking level that each transaction is holding can be stored alongside its Xid. Also, multixacts now need to persist across server restarts and crashes, because they can now represent not only tuple locks, but also tuple updates. This means we need more careful tracking of lifetime of pg_multixact SLRU files; since they now persist longer, we require more infrastructure to figure out when they can be removed. pg_upgrade also needs to be careful to copy pg_multixact files over from the old server to the new, or at least part of multixact.c state, depending on the versions of the old and new servers. Tuple time qualification rules (HeapTupleSatisfies routines) need to be careful not to consider tuples with the "is multi" infomask bit set as being only locked; they might need to look up MultiXact values (i.e. possibly do pg_multixact I/O) to find out the Xid that updated a tuple, whereas they previously were assured to only use information readily available from the tuple header. This is considered acceptable, because the extra I/O would involve cases that would previously cause some commands to block waiting for concurrent transactions to finish. Another important change is the fact that locking tuples that have previously been updated causes the future versions to be marked as locked, too; this is essential for correctness of foreign key checks. This causes additional WAL-logging, also (there was previously a single WAL record for a locked tuple; now there are as many as updated copies of the tuple there exist.) With all this in place, contention related to tuples being checked by foreign key rules should be much reduced. As a bonus, the old behavior that a subtransaction grabbing a stronger tuple lock than the parent (sub)transaction held on a given tuple and later aborting caused the weaker lock to be lost, has been fixed. Many new spec files were added for isolation tester framework, to ensure overall behavior is sane. There's probably room for several more tests. There were several reviewers of this patch; in particular, Noah Misch and Andres Freund spent considerable time in it. Original idea for the patch came from Simon Riggs, after a problem report by Joel Jacobson. Most code is from me, with contributions from Marti Raudsepp, Alexander Shulgin, Noah Misch and Andres Freund. This patch was discussed in several pgsql-hackers threads; the most important start at the following message-ids: AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com 1290721684-sup-3951@alvh.no-ip.org 1294953201-sup-2099@alvh.no-ip.org 1320343602-sup-2290@alvh.no-ip.org 1339690386-sup-8927@alvh.no-ip.org 4FE5FF020200002500048A3D@gw.wicourts.gov 4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
LockTupleExclusive,
&hufd.ctid,
hufd.xmax);
if (!TupIsNull(epqslot))
{
*tupleid = hufd.ctid;
goto ldelete;
}
}
/* tuple already deleted; nothing to do */
return NULL;
default:
elog(ERROR, "unrecognized heap_delete status: %u", result);
return NULL;
}
/*
* Note: Normally one would think that we have to delete index tuples
* associated with the heap tuple now...
*
* ... but in POSTGRES, we have no need to do this because VACUUM will
* take care of it later. We can't delete index tuples immediately
* anyway, since the tuple is still visible to other transactions.
*/
}
if (canSetTag)
(estate->es_processed)++;
/* AFTER ROW DELETE Triggers */
ExecARDeleteTriggers(estate, resultRelInfo, tupleid);
/* Process RETURNING if present */
if (resultRelInfo->ri_projectReturning)
{
/*
* We have to put the target tuple into a slot, which means first we
* gotta fetch it. We can use the trigger tuple slot.
*/
TupleTableSlot *slot = estate->es_trig_tuple_slot;
TupleTableSlot *rslot;
HeapTupleData deltuple;
Buffer delbuffer;
if (oldtuple != NULL)
{
deltuple.t_data = oldtuple;
deltuple.t_len = HeapTupleHeaderGetDatumLength(oldtuple);
ItemPointerSetInvalid(&(deltuple.t_self));
deltuple.t_tableOid = InvalidOid;
delbuffer = InvalidBuffer;
}
else
{
deltuple.t_self = *tupleid;
if (!heap_fetch(resultRelationDesc, SnapshotAny,
&deltuple, &delbuffer, false, NULL))
elog(ERROR, "failed to fetch deleted tuple for DELETE RETURNING");
}
if (slot->tts_tupleDescriptor != RelationGetDescr(resultRelationDesc))
ExecSetSlotDescriptor(slot, RelationGetDescr(resultRelationDesc));
ExecStoreTuple(&deltuple, slot, InvalidBuffer, false);
rslot = ExecProcessReturning(resultRelInfo->ri_projectReturning,
slot, planSlot);
ExecClearTuple(slot);
if (BufferIsValid(delbuffer))
ReleaseBuffer(delbuffer);
return rslot;
}
return NULL;
}
/* ----------------------------------------------------------------
* ExecUpdate
*
* note: we can't run UPDATE queries with transactions
* off because UPDATEs are actually INSERTs and our
* scan will mistakenly loop forever, updating the tuple
* it just inserted.. This should be fixed but until it
* is, we don't want to get stuck in an infinite loop
* which corrupts your database..
*
* When updating a table, tupleid identifies the tuple to
* update and oldtuple is NULL. When updating a view, oldtuple
* is passed to the INSTEAD OF triggers and identifies what to
* update, and tupleid is invalid.
*
* Returns RETURNING result if any, otherwise NULL.
* ----------------------------------------------------------------
*/
static TupleTableSlot *
ExecUpdate(ItemPointer tupleid,
HeapTupleHeader oldtuple,
TupleTableSlot *slot,
TupleTableSlot *planSlot,
EPQState *epqstate,
EState *estate,
bool canSetTag)
{
HeapTuple tuple;
ResultRelInfo *resultRelInfo;
Relation resultRelationDesc;
HTSU_Result result;
HeapUpdateFailureData hufd;
List *recheckIndexes = NIL;
/*
* abort the operation if not running transactions
*/
if (IsBootstrapProcessingMode())
elog(ERROR, "cannot UPDATE during bootstrap");
/*
* get the heap tuple out of the tuple table slot, making sure we have a
* writable copy
*/
tuple = ExecMaterializeSlot(slot);
/*
* get information on the (current) result relation
*/
resultRelInfo = estate->es_result_relation_info;
resultRelationDesc = resultRelInfo->ri_RelationDesc;
/* BEFORE ROW UPDATE Triggers */
if (resultRelInfo->ri_TrigDesc &&
resultRelInfo->ri_TrigDesc->trig_update_before_row)
{
slot = ExecBRUpdateTriggers(estate, epqstate, resultRelInfo,
tupleid, slot);
if (slot == NULL) /* "do nothing" */
return NULL;
/* trigger might have changed tuple */
tuple = ExecMaterializeSlot(slot);
}
/* INSTEAD OF ROW UPDATE Triggers */
if (resultRelInfo->ri_TrigDesc &&
resultRelInfo->ri_TrigDesc->trig_update_instead_row)
{
HeapTupleData oldtup;
Assert(oldtuple != NULL);
oldtup.t_data = oldtuple;
oldtup.t_len = HeapTupleHeaderGetDatumLength(oldtuple);
ItemPointerSetInvalid(&(oldtup.t_self));
oldtup.t_tableOid = InvalidOid;
slot = ExecIRUpdateTriggers(estate, resultRelInfo,
&oldtup, slot);
if (slot == NULL) /* "do nothing" */
return NULL;
/* trigger might have changed tuple */
tuple = ExecMaterializeSlot(slot);
}
else
{
Improve concurrency of foreign key locking This patch introduces two additional lock modes for tuples: "SELECT FOR KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each other, in contrast with already existing "SELECT FOR SHARE" and "SELECT FOR UPDATE". UPDATE commands that do not modify the values stored in the columns that are part of the key of the tuple now grab a SELECT FOR NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently with tuple locks of the FOR KEY SHARE variety. Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this means the concurrency improvement applies to them, which is the whole point of this patch. The added tuple lock semantics require some rejiggering of the multixact module, so that the locking level that each transaction is holding can be stored alongside its Xid. Also, multixacts now need to persist across server restarts and crashes, because they can now represent not only tuple locks, but also tuple updates. This means we need more careful tracking of lifetime of pg_multixact SLRU files; since they now persist longer, we require more infrastructure to figure out when they can be removed. pg_upgrade also needs to be careful to copy pg_multixact files over from the old server to the new, or at least part of multixact.c state, depending on the versions of the old and new servers. Tuple time qualification rules (HeapTupleSatisfies routines) need to be careful not to consider tuples with the "is multi" infomask bit set as being only locked; they might need to look up MultiXact values (i.e. possibly do pg_multixact I/O) to find out the Xid that updated a tuple, whereas they previously were assured to only use information readily available from the tuple header. This is considered acceptable, because the extra I/O would involve cases that would previously cause some commands to block waiting for concurrent transactions to finish. Another important change is the fact that locking tuples that have previously been updated causes the future versions to be marked as locked, too; this is essential for correctness of foreign key checks. This causes additional WAL-logging, also (there was previously a single WAL record for a locked tuple; now there are as many as updated copies of the tuple there exist.) With all this in place, contention related to tuples being checked by foreign key rules should be much reduced. As a bonus, the old behavior that a subtransaction grabbing a stronger tuple lock than the parent (sub)transaction held on a given tuple and later aborting caused the weaker lock to be lost, has been fixed. Many new spec files were added for isolation tester framework, to ensure overall behavior is sane. There's probably room for several more tests. There were several reviewers of this patch; in particular, Noah Misch and Andres Freund spent considerable time in it. Original idea for the patch came from Simon Riggs, after a problem report by Joel Jacobson. Most code is from me, with contributions from Marti Raudsepp, Alexander Shulgin, Noah Misch and Andres Freund. This patch was discussed in several pgsql-hackers threads; the most important start at the following message-ids: AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com 1290721684-sup-3951@alvh.no-ip.org 1294953201-sup-2099@alvh.no-ip.org 1320343602-sup-2290@alvh.no-ip.org 1339690386-sup-8927@alvh.no-ip.org 4FE5FF020200002500048A3D@gw.wicourts.gov 4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
LockTupleMode lockmode;
/*
* Check the constraints of the tuple
*
* If we generate a new candidate tuple after EvalPlanQual testing, we
* must loop back here and recheck constraints. (We don't need to
2011-04-10 17:42:00 +02:00
* redo triggers, however. If there are any BEFORE triggers then
* trigger.c will have done heap_lock_tuple to lock the correct tuple,
* so there's no need to do them again.)
*/
lreplace:;
if (resultRelationDesc->rd_att->constr)
ExecConstraints(resultRelInfo, slot, estate);
/*
* replace the heap tuple
*
* Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check
* that the row to be updated is visible to that snapshot, and throw a
* can't-serialize error if not. This is a special-case behavior
* needed for referential integrity updates in transaction-snapshot
* mode transactions.
*/
result = heap_update(resultRelationDesc, tupleid, tuple,
estate->es_output_cid,
estate->es_crosscheck_snapshot,
true /* wait for commit */,
Improve concurrency of foreign key locking This patch introduces two additional lock modes for tuples: "SELECT FOR KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each other, in contrast with already existing "SELECT FOR SHARE" and "SELECT FOR UPDATE". UPDATE commands that do not modify the values stored in the columns that are part of the key of the tuple now grab a SELECT FOR NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently with tuple locks of the FOR KEY SHARE variety. Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this means the concurrency improvement applies to them, which is the whole point of this patch. The added tuple lock semantics require some rejiggering of the multixact module, so that the locking level that each transaction is holding can be stored alongside its Xid. Also, multixacts now need to persist across server restarts and crashes, because they can now represent not only tuple locks, but also tuple updates. This means we need more careful tracking of lifetime of pg_multixact SLRU files; since they now persist longer, we require more infrastructure to figure out when they can be removed. pg_upgrade also needs to be careful to copy pg_multixact files over from the old server to the new, or at least part of multixact.c state, depending on the versions of the old and new servers. Tuple time qualification rules (HeapTupleSatisfies routines) need to be careful not to consider tuples with the "is multi" infomask bit set as being only locked; they might need to look up MultiXact values (i.e. possibly do pg_multixact I/O) to find out the Xid that updated a tuple, whereas they previously were assured to only use information readily available from the tuple header. This is considered acceptable, because the extra I/O would involve cases that would previously cause some commands to block waiting for concurrent transactions to finish. Another important change is the fact that locking tuples that have previously been updated causes the future versions to be marked as locked, too; this is essential for correctness of foreign key checks. This causes additional WAL-logging, also (there was previously a single WAL record for a locked tuple; now there are as many as updated copies of the tuple there exist.) With all this in place, contention related to tuples being checked by foreign key rules should be much reduced. As a bonus, the old behavior that a subtransaction grabbing a stronger tuple lock than the parent (sub)transaction held on a given tuple and later aborting caused the weaker lock to be lost, has been fixed. Many new spec files were added for isolation tester framework, to ensure overall behavior is sane. There's probably room for several more tests. There were several reviewers of this patch; in particular, Noah Misch and Andres Freund spent considerable time in it. Original idea for the patch came from Simon Riggs, after a problem report by Joel Jacobson. Most code is from me, with contributions from Marti Raudsepp, Alexander Shulgin, Noah Misch and Andres Freund. This patch was discussed in several pgsql-hackers threads; the most important start at the following message-ids: AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com 1290721684-sup-3951@alvh.no-ip.org 1294953201-sup-2099@alvh.no-ip.org 1320343602-sup-2290@alvh.no-ip.org 1339690386-sup-8927@alvh.no-ip.org 4FE5FF020200002500048A3D@gw.wicourts.gov 4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
&hufd, &lockmode);
switch (result)
{
case HeapTupleSelfUpdated:
/*
* The target tuple was already updated or deleted by the
* current command, or by a later command in the current
* transaction. The former case is possible in a join UPDATE
* where multiple tuples join to the same target tuple.
* This is pretty questionable, but Postgres has always
* allowed it: we just execute the first update action and
* ignore additional update attempts.
*
* The latter case arises if the tuple is modified by a
* command in a BEFORE trigger, or perhaps by a command in a
* volatile function used in the query. In such situations we
* should not ignore the update, but it is equally unsafe to
* proceed. We don't want to discard the original UPDATE
* while keeping the triggered actions based on it; and we
* have no principled way to merge this update with the
* previous ones. So throwing an error is the only safe
* course.
*
* If a trigger actually intends this type of interaction,
* it can re-execute the UPDATE (assuming it can figure out
* how) and then return NULL to cancel the outer update.
*/
if (hufd.cmax != estate->es_output_cid)
ereport(ERROR,
(errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
errmsg("tuple to be updated was already modified by an operation triggered by the current command"),
errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
/* Else, already updated by self; nothing to do */
return NULL;
case HeapTupleMayBeUpdated:
break;
case HeapTupleUpdated:
if (IsolationUsesXactSnapshot())
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("could not serialize access due to concurrent update")));
if (!ItemPointerEquals(tupleid, &hufd.ctid))
{
TupleTableSlot *epqslot;
epqslot = EvalPlanQual(estate,
epqstate,
resultRelationDesc,
resultRelInfo->ri_RangeTableIndex,
Improve concurrency of foreign key locking This patch introduces two additional lock modes for tuples: "SELECT FOR KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each other, in contrast with already existing "SELECT FOR SHARE" and "SELECT FOR UPDATE". UPDATE commands that do not modify the values stored in the columns that are part of the key of the tuple now grab a SELECT FOR NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently with tuple locks of the FOR KEY SHARE variety. Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this means the concurrency improvement applies to them, which is the whole point of this patch. The added tuple lock semantics require some rejiggering of the multixact module, so that the locking level that each transaction is holding can be stored alongside its Xid. Also, multixacts now need to persist across server restarts and crashes, because they can now represent not only tuple locks, but also tuple updates. This means we need more careful tracking of lifetime of pg_multixact SLRU files; since they now persist longer, we require more infrastructure to figure out when they can be removed. pg_upgrade also needs to be careful to copy pg_multixact files over from the old server to the new, or at least part of multixact.c state, depending on the versions of the old and new servers. Tuple time qualification rules (HeapTupleSatisfies routines) need to be careful not to consider tuples with the "is multi" infomask bit set as being only locked; they might need to look up MultiXact values (i.e. possibly do pg_multixact I/O) to find out the Xid that updated a tuple, whereas they previously were assured to only use information readily available from the tuple header. This is considered acceptable, because the extra I/O would involve cases that would previously cause some commands to block waiting for concurrent transactions to finish. Another important change is the fact that locking tuples that have previously been updated causes the future versions to be marked as locked, too; this is essential for correctness of foreign key checks. This causes additional WAL-logging, also (there was previously a single WAL record for a locked tuple; now there are as many as updated copies of the tuple there exist.) With all this in place, contention related to tuples being checked by foreign key rules should be much reduced. As a bonus, the old behavior that a subtransaction grabbing a stronger tuple lock than the parent (sub)transaction held on a given tuple and later aborting caused the weaker lock to be lost, has been fixed. Many new spec files were added for isolation tester framework, to ensure overall behavior is sane. There's probably room for several more tests. There were several reviewers of this patch; in particular, Noah Misch and Andres Freund spent considerable time in it. Original idea for the patch came from Simon Riggs, after a problem report by Joel Jacobson. Most code is from me, with contributions from Marti Raudsepp, Alexander Shulgin, Noah Misch and Andres Freund. This patch was discussed in several pgsql-hackers threads; the most important start at the following message-ids: AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com 1290721684-sup-3951@alvh.no-ip.org 1294953201-sup-2099@alvh.no-ip.org 1320343602-sup-2290@alvh.no-ip.org 1339690386-sup-8927@alvh.no-ip.org 4FE5FF020200002500048A3D@gw.wicourts.gov 4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
lockmode,
&hufd.ctid,
hufd.xmax);
if (!TupIsNull(epqslot))
{
*tupleid = hufd.ctid;
slot = ExecFilterJunk(resultRelInfo->ri_junkFilter, epqslot);
tuple = ExecMaterializeSlot(slot);
goto lreplace;
}
}
/* tuple already deleted; nothing to do */
return NULL;
default:
elog(ERROR, "unrecognized heap_update status: %u", result);
return NULL;
}
/*
* Note: instead of having to update the old index tuples associated
2011-04-10 17:42:00 +02:00
* with the heap tuple, all we do is form and insert new index tuples.
* This is because UPDATEs are actually DELETEs and INSERTs, and index
* tuple deletion is done later by VACUUM (see notes in ExecDelete).
* All we do here is insert new index tuples. -cim 9/27/89
*/
/*
* insert index entries for tuple
*
* Note: heap_update returns the tid (location) of the new tuple in
* the t_self field.
*
* If it's a HOT update, we mustn't insert new index entries.
*/
if (resultRelInfo->ri_NumIndices > 0 && !HeapTupleIsHeapOnly(tuple))
recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self),
estate);
}
if (canSetTag)
(estate->es_processed)++;
/* AFTER ROW UPDATE Triggers */
ExecARUpdateTriggers(estate, resultRelInfo, tupleid, tuple,
recheckIndexes);
list_free(recheckIndexes);
/* Process RETURNING if present */
if (resultRelInfo->ri_projectReturning)
return ExecProcessReturning(resultRelInfo->ri_projectReturning,
slot, planSlot);
return NULL;
}
/*
* Process BEFORE EACH STATEMENT triggers
*/
static void
fireBSTriggers(ModifyTableState *node)
{
switch (node->operation)
{
case CMD_INSERT:
ExecBSInsertTriggers(node->ps.state, node->resultRelInfo);
break;
case CMD_UPDATE:
ExecBSUpdateTriggers(node->ps.state, node->resultRelInfo);
break;
case CMD_DELETE:
ExecBSDeleteTriggers(node->ps.state, node->resultRelInfo);
break;
default:
elog(ERROR, "unknown operation");
break;
}
}
/*
* Process AFTER EACH STATEMENT triggers
*/
static void
fireASTriggers(ModifyTableState *node)
{
switch (node->operation)
{
case CMD_INSERT:
ExecASInsertTriggers(node->ps.state, node->resultRelInfo);
break;
case CMD_UPDATE:
ExecASUpdateTriggers(node->ps.state, node->resultRelInfo);
break;
case CMD_DELETE:
ExecASDeleteTriggers(node->ps.state, node->resultRelInfo);
break;
default:
elog(ERROR, "unknown operation");
break;
}
}
/* ----------------------------------------------------------------
* ExecModifyTable
*
* Perform table modifications as required, and return RETURNING results
* if needed.
* ----------------------------------------------------------------
*/
TupleTableSlot *
ExecModifyTable(ModifyTableState *node)
{
2010-02-26 03:01:40 +01:00
EState *estate = node->ps.state;
CmdType operation = node->operation;
ResultRelInfo *saved_resultRelInfo;
ResultRelInfo *resultRelInfo;
2010-02-26 03:01:40 +01:00
PlanState *subplanstate;
JunkFilter *junkfilter;
TupleTableSlot *slot;
TupleTableSlot *planSlot;
ItemPointer tupleid = NULL;
ItemPointerData tuple_ctid;
2011-04-10 17:42:00 +02:00
HeapTupleHeader oldtuple = NULL;
/*
* This should NOT get called during EvalPlanQual; we should have passed a
* subplan tree to EvalPlanQual, instead. Use a runtime test not just
* Assert because this condition is easy to miss in testing. (Note:
* although ModifyTable should not get executed within an EvalPlanQual
* operation, we do have to allow it to be initialized and shut down in
* case it is within a CTE subplan. Hence this test must be here, not in
* ExecInitModifyTable.)
*/
if (estate->es_epqTuple != NULL)
elog(ERROR, "ModifyTable should not be called during EvalPlanQual");
/*
* If we've already completed processing, don't try to do more. We need
* this test because ExecPostprocessPlan might call us an extra time, and
* our subplan's nodes aren't necessarily robust against being called
* extra times.
*/
if (node->mt_done)
return NULL;
/*
* On first call, fire BEFORE STATEMENT triggers before proceeding.
*/
if (node->fireBSTriggers)
{
fireBSTriggers(node);
node->fireBSTriggers = false;
}
/* Preload local variables */
resultRelInfo = node->resultRelInfo + node->mt_whichplan;
subplanstate = node->mt_plans[node->mt_whichplan];
junkfilter = resultRelInfo->ri_junkFilter;
/*
* es_result_relation_info must point to the currently active result
2011-04-10 17:42:00 +02:00
* relation while we are within this ModifyTable node. Even though
* ModifyTable nodes can't be nested statically, they can be nested
* dynamically (since our subplan could include a reference to a modifying
* CTE). So we have to save and restore the caller's value.
*/
saved_resultRelInfo = estate->es_result_relation_info;
estate->es_result_relation_info = resultRelInfo;
/*
* Fetch rows from subplan(s), and execute the required table modification
* for each row.
*/
for (;;)
{
/*
2011-04-10 17:42:00 +02:00
* Reset the per-output-tuple exprcontext. This is needed because
* triggers expect to use that context as workspace. It's a bit ugly
* to do this below the top level of the plan, however. We might need
* to rethink this later.
*/
ResetPerTupleExprContext(estate);
planSlot = ExecProcNode(subplanstate);
if (TupIsNull(planSlot))
{
/* advance to next subplan if any */
node->mt_whichplan++;
if (node->mt_whichplan < node->mt_nplans)
{
resultRelInfo++;
subplanstate = node->mt_plans[node->mt_whichplan];
junkfilter = resultRelInfo->ri_junkFilter;
estate->es_result_relation_info = resultRelInfo;
EvalPlanQualSetPlan(&node->mt_epqstate, subplanstate->plan,
node->mt_arowmarks[node->mt_whichplan]);
continue;
}
else
break;
}
EvalPlanQualSetSlot(&node->mt_epqstate, planSlot);
slot = planSlot;
if (junkfilter != NULL)
{
/*
* extract the 'ctid' or 'wholerow' junk attribute.
*/
if (operation == CMD_UPDATE || operation == CMD_DELETE)
{
Datum datum;
bool isNull;
if (resultRelInfo->ri_RelationDesc->rd_rel->relkind == RELKIND_RELATION)
{
datum = ExecGetJunkAttribute(slot,
junkfilter->jf_junkAttNo,
&isNull);
/* shouldn't ever get a null result... */
if (isNull)
elog(ERROR, "ctid is NULL");
tupleid = (ItemPointer) DatumGetPointer(datum);
2011-04-10 17:42:00 +02:00
tuple_ctid = *tupleid; /* be sure we don't free
* ctid!! */
tupleid = &tuple_ctid;
}
else
{
datum = ExecGetJunkAttribute(slot,
junkfilter->jf_junkAttNo,
&isNull);
/* shouldn't ever get a null result... */
if (isNull)
elog(ERROR, "wholerow is NULL");
oldtuple = DatumGetHeapTupleHeader(datum);
}
}
/*
* apply the junkfilter if needed.
*/
if (operation != CMD_DELETE)
slot = ExecFilterJunk(junkfilter, slot);
}
switch (operation)
{
case CMD_INSERT:
slot = ExecInsert(slot, planSlot, estate, node->canSetTag);
break;
case CMD_UPDATE:
slot = ExecUpdate(tupleid, oldtuple, slot, planSlot,
2011-04-10 17:42:00 +02:00
&node->mt_epqstate, estate, node->canSetTag);
break;
case CMD_DELETE:
slot = ExecDelete(tupleid, oldtuple, planSlot,
2011-04-10 17:42:00 +02:00
&node->mt_epqstate, estate, node->canSetTag);
break;
default:
elog(ERROR, "unknown operation");
break;
}
/*
* If we got a RETURNING result, return it to caller. We'll continue
* the work on next call.
*/
if (slot)
{
estate->es_result_relation_info = saved_resultRelInfo;
return slot;
}
}
/* Restore es_result_relation_info before exiting */
estate->es_result_relation_info = saved_resultRelInfo;
/*
* We're done, but fire AFTER STATEMENT triggers before exiting.
*/
fireASTriggers(node);
node->mt_done = true;
return NULL;
}
/* ----------------------------------------------------------------
* ExecInitModifyTable
* ----------------------------------------------------------------
*/
ModifyTableState *
ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
{
ModifyTableState *mtstate;
CmdType operation = node->operation;
int nplans = list_length(node->plans);
ResultRelInfo *saved_resultRelInfo;
ResultRelInfo *resultRelInfo;
TupleDesc tupDesc;
Plan *subplan;
ListCell *l;
int i;
/* check for unsupported flags */
Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
/*
* create state structure
*/
mtstate = makeNode(ModifyTableState);
mtstate->ps.plan = (Plan *) node;
mtstate->ps.state = estate;
mtstate->ps.targetlist = NIL; /* not actually used */
mtstate->operation = operation;
mtstate->canSetTag = node->canSetTag;
mtstate->mt_done = false;
mtstate->mt_plans = (PlanState **) palloc0(sizeof(PlanState *) * nplans);
mtstate->resultRelInfo = estate->es_result_relations + node->resultRelIndex;
mtstate->mt_arowmarks = (List **) palloc0(sizeof(List *) * nplans);
mtstate->mt_nplans = nplans;
/* set up epqstate with dummy subplan data for the moment */
EvalPlanQualInit(&mtstate->mt_epqstate, estate, NULL, NIL, node->epqParam);
mtstate->fireBSTriggers = true;
/*
* call ExecInitNode on each of the plans to be executed and save the
2011-04-10 17:42:00 +02:00
* results into the array "mt_plans". This is also a convenient place to
* verify that the proposed target relations are valid and open their
* indexes for insertion of new index entries. Note we *must* set
* estate->es_result_relation_info correctly while we initialize each
* sub-plan; ExecContextForcesOids depends on that!
*/
saved_resultRelInfo = estate->es_result_relation_info;
resultRelInfo = mtstate->resultRelInfo;
i = 0;
foreach(l, node->plans)
{
subplan = (Plan *) lfirst(l);
/*
* Verify result relation is a valid target for the current operation
*/
CheckValidResultRel(resultRelInfo->ri_RelationDesc, operation);
/*
* If there are indices on the result relation, open them and save
* descriptors in the result relation info, so that we can add new
2011-04-10 17:42:00 +02:00
* index entries for the tuples we add/update. We need not do this
* for a DELETE, however, since deletion doesn't affect indexes. Also,
* inside an EvalPlanQual operation, the indexes might be open
* already, since we share the resultrel state with the original
* query.
*/
if (resultRelInfo->ri_RelationDesc->rd_rel->relhasindex &&
operation != CMD_DELETE &&
resultRelInfo->ri_IndexRelationDescs == NULL)
ExecOpenIndices(resultRelInfo);
/* Now init the plan for this result rel */
estate->es_result_relation_info = resultRelInfo;
mtstate->mt_plans[i] = ExecInitNode(subplan, estate, eflags);
resultRelInfo++;
i++;
}
estate->es_result_relation_info = saved_resultRelInfo;
/*
* Initialize RETURNING projections if needed.
*/
if (node->returningLists)
{
TupleTableSlot *slot;
ExprContext *econtext;
/*
2010-02-26 03:01:40 +01:00
* Initialize result tuple slot and assign its rowtype using the first
* RETURNING list. We assume the rest will look the same.
*/
tupDesc = ExecTypeFromTL((List *) linitial(node->returningLists),
false);
/* Set up a slot for the output of the RETURNING projection(s) */
ExecInitResultTupleSlot(estate, &mtstate->ps);
ExecAssignResultType(&mtstate->ps, tupDesc);
slot = mtstate->ps.ps_ResultTupleSlot;
/* Need an econtext too */
econtext = CreateExprContext(estate);
mtstate->ps.ps_ExprContext = econtext;
/*
* Build a projection for each result rel.
*/
resultRelInfo = mtstate->resultRelInfo;
foreach(l, node->returningLists)
{
List *rlist = (List *) lfirst(l);
List *rliststate;
rliststate = (List *) ExecInitExpr((Expr *) rlist, &mtstate->ps);
resultRelInfo->ri_projectReturning =
ExecBuildProjectionInfo(rliststate, econtext, slot,
resultRelInfo->ri_RelationDesc->rd_att);
resultRelInfo++;
}
}
else
{
/*
2010-02-26 03:01:40 +01:00
* We still must construct a dummy result tuple type, because InitPlan
* expects one (maybe should change that?).
*/
tupDesc = ExecTypeFromTL(NIL, false);
ExecInitResultTupleSlot(estate, &mtstate->ps);
ExecAssignResultType(&mtstate->ps, tupDesc);
mtstate->ps.ps_ExprContext = NULL;
}
/*
2010-02-26 03:01:40 +01:00
* If we have any secondary relations in an UPDATE or DELETE, they need to
* be treated like non-locked relations in SELECT FOR UPDATE, ie, the
* EvalPlanQual mechanism needs to be told about them. Locate the
* relevant ExecRowMarks.
*/
foreach(l, node->rowMarks)
{
PlanRowMark *rc = (PlanRowMark *) lfirst(l);
ExecRowMark *erm;
Assert(IsA(rc, PlanRowMark));
/* ignore "parent" rowmarks; they are irrelevant at runtime */
if (rc->isParent)
continue;
/* find ExecRowMark (same for all subplans) */
erm = ExecFindRowMark(estate, rc->rti);
/* build ExecAuxRowMark for each subplan */
for (i = 0; i < nplans; i++)
{
ExecAuxRowMark *aerm;
subplan = mtstate->mt_plans[i]->plan;
aerm = ExecBuildAuxRowMark(erm, subplan->targetlist);
mtstate->mt_arowmarks[i] = lappend(mtstate->mt_arowmarks[i], aerm);
}
}
/* select first subplan */
mtstate->mt_whichplan = 0;
subplan = (Plan *) linitial(node->plans);
EvalPlanQualSetPlan(&mtstate->mt_epqstate, subplan,
mtstate->mt_arowmarks[0]);
/*
* Initialize the junk filter(s) if needed. INSERT queries need a filter
2010-02-26 03:01:40 +01:00
* if there are any junk attrs in the tlist. UPDATE and DELETE always
* need a filter, since there's always a junk 'ctid' or 'wholerow'
* attribute present --- no need to look first.
*
* If there are multiple result relations, each one needs its own junk
2010-02-26 03:01:40 +01:00
* filter. Note multiple rels are only possible for UPDATE/DELETE, so we
* can't be fooled by some needing a filter and some not.
*
* This section of code is also a convenient place to verify that the
* output of an INSERT or UPDATE matches the target table(s).
*/
{
bool junk_filter_needed = false;
switch (operation)
{
case CMD_INSERT:
foreach(l, subplan->targetlist)
{
TargetEntry *tle = (TargetEntry *) lfirst(l);
if (tle->resjunk)
{
junk_filter_needed = true;
break;
}
}
break;
case CMD_UPDATE:
case CMD_DELETE:
junk_filter_needed = true;
break;
default:
elog(ERROR, "unknown operation");
break;
}
if (junk_filter_needed)
{
resultRelInfo = mtstate->resultRelInfo;
for (i = 0; i < nplans; i++)
{
JunkFilter *j;
subplan = mtstate->mt_plans[i]->plan;
if (operation == CMD_INSERT || operation == CMD_UPDATE)
ExecCheckPlanOutput(resultRelInfo->ri_RelationDesc,
subplan->targetlist);
j = ExecInitJunkFilter(subplan->targetlist,
resultRelInfo->ri_RelationDesc->rd_att->tdhasoid,
ExecInitExtraTupleSlot(estate));
if (operation == CMD_UPDATE || operation == CMD_DELETE)
{
/* For UPDATE/DELETE, find the appropriate junk attr now */
if (resultRelInfo->ri_RelationDesc->rd_rel->relkind == RELKIND_RELATION)
{
j->jf_junkAttNo = ExecFindJunkAttribute(j, "ctid");
if (!AttributeNumberIsValid(j->jf_junkAttNo))
elog(ERROR, "could not find junk ctid column");
}
else
{
j->jf_junkAttNo = ExecFindJunkAttribute(j, "wholerow");
if (!AttributeNumberIsValid(j->jf_junkAttNo))
elog(ERROR, "could not find junk wholerow column");
}
}
resultRelInfo->ri_junkFilter = j;
resultRelInfo++;
}
}
else
{
if (operation == CMD_INSERT)
ExecCheckPlanOutput(mtstate->resultRelInfo->ri_RelationDesc,
subplan->targetlist);
}
}
/*
2010-02-26 03:01:40 +01:00
* Set up a tuple table slot for use for trigger output tuples. In a plan
* containing multiple ModifyTable nodes, all can share one such slot, so
* we keep it in the estate.
*/
if (estate->es_trig_tuple_slot == NULL)
estate->es_trig_tuple_slot = ExecInitExtraTupleSlot(estate);
/*
* Lastly, if this is not the primary (canSetTag) ModifyTable node, add it
* to estate->es_auxmodifytables so that it will be run to completion by
* ExecPostprocessPlan. (It'd actually work fine to add the primary
2011-04-10 17:42:00 +02:00
* ModifyTable node too, but there's no need.) Note the use of lcons not
* lappend: we need later-initialized ModifyTable nodes to be shut down
* before earlier ones. This ensures that we don't throw away RETURNING
* rows that need to be seen by a later CTE subplan.
*/
if (!mtstate->canSetTag)
estate->es_auxmodifytables = lcons(mtstate,
estate->es_auxmodifytables);
return mtstate;
}
/* ----------------------------------------------------------------
* ExecEndModifyTable
*
* Shuts down the plan.
*
* Returns nothing of interest.
* ----------------------------------------------------------------
*/
void
ExecEndModifyTable(ModifyTableState *node)
{
2010-02-26 03:01:40 +01:00
int i;
/*
* Free the exprcontext
*/
ExecFreeExprContext(&node->ps);
/*
* clean out the tuple table
*/
ExecClearTuple(node->ps.ps_ResultTupleSlot);
/*
* Terminate EPQ execution if active
*/
EvalPlanQualEnd(&node->mt_epqstate);
/*
* shut down subplans
*/
2010-02-26 03:01:40 +01:00
for (i = 0; i < node->mt_nplans; i++)
ExecEndNode(node->mt_plans[i]);
}
void
ExecReScanModifyTable(ModifyTableState *node)
{
/*
2010-02-26 03:01:40 +01:00
* Currently, we don't need to support rescan on ModifyTable nodes. The
* semantics of that would be a bit debatable anyway.
*/
elog(ERROR, "ExecReScanModifyTable is not implemented");
}