/*------------------------------------------------------------------------- * * nodeModifyTable.c * routines to handle ModifyTable nodes. * * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION * src/backend/executor/nodeModifyTable.c * *------------------------------------------------------------------------- */ /* INTERFACE ROUTINES * ExecInitModifyTable - initialize the ModifyTable node * ExecModifyTable - retrieve the next tuple from the node * ExecEndModifyTable - shut down the ModifyTable node * ExecReScanModifyTable - rescan the ModifyTable node * * NOTES * Each ModifyTable node contains a list of one or more subplans, * much like an Append node. There is one subplan per result relation. * The key reason for this is that in an inherited UPDATE command, each * result relation could have a different schema (more or different * columns) requiring a different plan tree to produce it. In an * inherited DELETE, all the subplans should produce the same output * rowtype, but we might still find that different plans are appropriate * for different child relations. * * If the query specifies RETURNING, then the ModifyTable returns a * RETURNING tuple after completing each row insert, update, or delete. * It must be called again to continue the operation. Without RETURNING, * we just loop within the node until all the work is done, then * return NULL. This avoids useless call/return overhead. */ #include "postgres.h" #include "access/htup_details.h" #include "access/xact.h" #include "commands/trigger.h" #include "executor/executor.h" #include "executor/nodeModifyTable.h" #include "foreign/fdwapi.h" #include "miscadmin.h" #include "nodes/nodeFuncs.h" #include "parser/parsetree.h" #include "storage/bufmgr.h" #include "storage/lmgr.h" #include "utils/builtins.h" #include "utils/memutils.h" #include "utils/rel.h" #include "utils/tqual.h" static bool ExecOnConflictUpdate(ModifyTableState *mtstate, ResultRelInfo *resultRelInfo, ItemPointer conflictTid, TupleTableSlot *planSlot, TupleTableSlot *excludedSlot, EState *estate, bool canSetTag, TupleTableSlot **returning); /* * Verify that the tuples to be produced by INSERT or UPDATE match the * target relation's rowtype * * We do this to guard against stale plans. If plan invalidation is * functioning properly then we should never get a failure here, but better * safe than sorry. Note that this is called after we have obtained lock * on the target rel, so the rowtype can't change underneath us. * * The plan output is represented by its targetlist, because that makes * handling the dropped-column case easier. */ static void ExecCheckPlanOutput(Relation resultRel, List *targetList) { TupleDesc resultDesc = RelationGetDescr(resultRel); int attno = 0; ListCell *lc; foreach(lc, targetList) { TargetEntry *tle = (TargetEntry *) lfirst(lc); Form_pg_attribute attr; if (tle->resjunk) continue; /* ignore junk tlist items */ if (attno >= resultDesc->natts) ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("table row type and query-specified row type do not match"), errdetail("Query has too many columns."))); attr = TupleDescAttr(resultDesc, attno); attno++; if (!attr->attisdropped) { /* Normal case: demand type match */ if (exprType((Node *) tle->expr) != attr->atttypid) ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("table row type and query-specified row type do not match"), errdetail("Table has type %s at ordinal position %d, but query expects %s.", format_type_be(attr->atttypid), attno, format_type_be(exprType((Node *) tle->expr))))); } else { /* * For a dropped column, we can't check atttypid (it's likely 0). * In any case the planner has most likely inserted an INT4 null. * What we insist on is just *some* NULL constant. */ if (!IsA(tle->expr, Const) || !((Const *) tle->expr)->constisnull) ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("table row type and query-specified row type do not match"), errdetail("Query provides a value for a dropped column at ordinal position %d.", attno))); } } if (attno != resultDesc->natts) ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("table row type and query-specified row type do not match"), errdetail("Query has too few columns."))); } /* * ExecProcessReturning --- evaluate a RETURNING list * * projectReturning: RETURNING projection info for current result rel * tupleSlot: slot holding tuple actually inserted/updated/deleted * planSlot: slot holding tuple returned by top subplan node * * Note: If tupleSlot is NULL, the FDW should have already provided econtext's * scan tuple. * * Returns a slot holding the result tuple */ static TupleTableSlot * ExecProcessReturning(ResultRelInfo *resultRelInfo, TupleTableSlot *tupleSlot, TupleTableSlot *planSlot) { ProjectionInfo *projectReturning = resultRelInfo->ri_projectReturning; ExprContext *econtext = projectReturning->pi_exprContext; /* * Reset per-tuple memory context to free any expression evaluation * storage allocated in the previous cycle. */ ResetExprContext(econtext); /* Make tuple and any needed join variables available to ExecProject */ if (tupleSlot) econtext->ecxt_scantuple = tupleSlot; else { HeapTuple tuple; /* * RETURNING expressions might reference the tableoid column, so * initialize t_tableOid before evaluating them. */ Assert(!TupIsNull(econtext->ecxt_scantuple)); tuple = ExecMaterializeSlot(econtext->ecxt_scantuple); tuple->t_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc); } econtext->ecxt_outertuple = planSlot; /* Compute the RETURNING expressions */ return ExecProject(projectReturning); } /* * ExecCheckHeapTupleVisible -- verify heap tuple is visible * * It would not be consistent with guarantees of the higher isolation levels to * proceed with avoiding insertion (taking speculative insertion's alternative * path) on the basis of another tuple that is not visible to MVCC snapshot. * Check for the need to raise a serialization failure, and do so as necessary. */ static void ExecCheckHeapTupleVisible(EState *estate, HeapTuple tuple, Buffer buffer) { if (!IsolationUsesXactSnapshot()) return; /* * We need buffer pin and lock to call HeapTupleSatisfiesVisibility. * Caller should be holding pin, but not lock. */ LockBuffer(buffer, BUFFER_LOCK_SHARE); if (!HeapTupleSatisfiesVisibility(tuple, estate->es_snapshot, buffer)) { /* * We should not raise a serialization failure if the conflict is * against a tuple inserted by our own transaction, even if it's not * visible to our snapshot. (This would happen, for example, if * conflicting keys are proposed for insertion in a single command.) */ if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple->t_data))) ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("could not serialize access due to concurrent update"))); } LockBuffer(buffer, BUFFER_LOCK_UNLOCK); } /* * ExecCheckTIDVisible -- convenience variant of ExecCheckHeapTupleVisible() */ static void ExecCheckTIDVisible(EState *estate, ResultRelInfo *relinfo, ItemPointer tid) { Relation rel = relinfo->ri_RelationDesc; Buffer buffer; HeapTupleData tuple; /* Redundantly check isolation level */ if (!IsolationUsesXactSnapshot()) return; tuple.t_self = *tid; if (!heap_fetch(rel, SnapshotAny, &tuple, &buffer, false, NULL)) elog(ERROR, "failed to fetch conflicting tuple for ON CONFLICT"); ExecCheckHeapTupleVisible(estate, &tuple, buffer); ReleaseBuffer(buffer); } /* ---------------------------------------------------------------- * ExecInsert * * For INSERT, we have to insert the tuple into the target relation * and insert appropriate tuples into the index relations. * * Returns RETURNING result if any, otherwise NULL. * ---------------------------------------------------------------- */ static TupleTableSlot * ExecInsert(ModifyTableState *mtstate, TupleTableSlot *slot, TupleTableSlot *planSlot, List *arbiterIndexes, OnConflictAction onconflict, EState *estate, bool canSetTag) { HeapTuple tuple; ResultRelInfo *resultRelInfo; ResultRelInfo *saved_resultRelInfo = NULL; Relation resultRelationDesc; Oid newId; List *recheckIndexes = NIL; TupleTableSlot *result = NULL; /* * get the heap tuple out of the tuple table slot, making sure we have a * writable copy */ tuple = ExecMaterializeSlot(slot); /* * get information on the (current) result relation */ resultRelInfo = estate->es_result_relation_info; /* Determine the partition to heap_insert the tuple into */ if (mtstate->mt_partition_dispatch_info) { int leaf_part_index; TupleConversionMap *map; /* * Away we go ... If we end up not finding a partition after all, * ExecFindPartition() does not return and errors out instead. * Otherwise, the returned value is to be used as an index into arrays * mt_partitions[] and mt_partition_tupconv_maps[] that will get us * the ResultRelInfo and TupleConversionMap for the partition, * respectively. */ leaf_part_index = ExecFindPartition(resultRelInfo, mtstate->mt_partition_dispatch_info, slot, estate); Assert(leaf_part_index >= 0 && leaf_part_index < mtstate->mt_num_partitions); /* * Save the old ResultRelInfo and switch to the one corresponding to * the selected partition. */ saved_resultRelInfo = resultRelInfo; resultRelInfo = mtstate->mt_partitions + leaf_part_index; /* We do not yet have a way to insert into a foreign partition */ if (resultRelInfo->ri_FdwRoutine) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot route inserted tuples to a foreign table"))); /* For ExecInsertIndexTuples() to work on the partition's indexes */ estate->es_result_relation_info = resultRelInfo; /* * If we're capturing transition tuples, we might need to convert from * the partition rowtype to parent rowtype. */ if (mtstate->mt_transition_capture != NULL) { if (resultRelInfo->ri_TrigDesc && (resultRelInfo->ri_TrigDesc->trig_insert_before_row || resultRelInfo->ri_TrigDesc->trig_insert_instead_row)) { /* * If there are any BEFORE or INSTEAD triggers on the * partition, we'll have to be ready to convert their result * back to tuplestore format. */ mtstate->mt_transition_capture->tcs_original_insert_tuple = NULL; mtstate->mt_transition_capture->tcs_map = mtstate->mt_transition_tupconv_maps[leaf_part_index]; } else { /* * Otherwise, just remember the original unconverted tuple, to * avoid a needless round trip conversion. */ mtstate->mt_transition_capture->tcs_original_insert_tuple = tuple; mtstate->mt_transition_capture->tcs_map = NULL; } } if (mtstate->mt_oc_transition_capture != NULL) mtstate->mt_oc_transition_capture->tcs_map = mtstate->mt_transition_tupconv_maps[leaf_part_index]; /* * We might need to convert from the parent rowtype to the partition * rowtype. */ map = mtstate->mt_partition_tupconv_maps[leaf_part_index]; if (map) { Relation partrel = resultRelInfo->ri_RelationDesc; tuple = do_convert_tuple(tuple, map); /* * We must use the partition's tuple descriptor from this point * on, until we're finished dealing with the partition. Use the * dedicated slot for that. */ slot = mtstate->mt_partition_tuple_slot; Assert(slot != NULL); ExecSetSlotDescriptor(slot, RelationGetDescr(partrel)); ExecStoreTuple(tuple, slot, InvalidBuffer, true); } } resultRelationDesc = resultRelInfo->ri_RelationDesc; /* * If the result relation has OIDs, force the tuple's OID to zero so that * heap_insert will assign a fresh OID. Usually the OID already will be * zero at this point, but there are corner cases where the plan tree can * return a tuple extracted literally from some table with the same * rowtype. * * XXX if we ever wanted to allow users to assign their own OIDs to new * rows, this'd be the place to do it. For the moment, we make a point of * doing this before calling triggers, so that a user-supplied trigger * could hack the OID if desired. */ if (resultRelationDesc->rd_rel->relhasoids) HeapTupleSetOid(tuple, InvalidOid); /* * BEFORE ROW INSERT Triggers. * * Note: We fire BEFORE ROW TRIGGERS for every attempted insertion in an * INSERT ... ON CONFLICT statement. We cannot check for constraint * violations before firing these triggers, because they can change the * values to insert. Also, they can run arbitrary user-defined code with * side-effects that we can't cancel by just not inserting the tuple. */ if (resultRelInfo->ri_TrigDesc && resultRelInfo->ri_TrigDesc->trig_insert_before_row) { slot = ExecBRInsertTriggers(estate, resultRelInfo, slot); if (slot == NULL) /* "do nothing" */ return NULL; /* trigger might have changed tuple */ tuple = ExecMaterializeSlot(slot); } /* INSTEAD OF ROW INSERT Triggers */ if (resultRelInfo->ri_TrigDesc && resultRelInfo->ri_TrigDesc->trig_insert_instead_row) { slot = ExecIRInsertTriggers(estate, resultRelInfo, slot); if (slot == NULL) /* "do nothing" */ return NULL; /* trigger might have changed tuple */ tuple = ExecMaterializeSlot(slot); newId = InvalidOid; } else if (resultRelInfo->ri_FdwRoutine) { /* * insert into foreign table: let the FDW do it */ slot = resultRelInfo->ri_FdwRoutine->ExecForeignInsert(estate, resultRelInfo, slot, planSlot); if (slot == NULL) /* "do nothing" */ return NULL; /* FDW might have changed tuple */ tuple = ExecMaterializeSlot(slot); /* * AFTER ROW Triggers or RETURNING expressions might reference the * tableoid column, so initialize t_tableOid before evaluating them. */ tuple->t_tableOid = RelationGetRelid(resultRelationDesc); newId = InvalidOid; } else { /* * We always check the partition constraint, including when the tuple * got here via tuple-routing. However we don't need to in the latter * case if no BR trigger is defined on the partition. Note that a BR * trigger might modify the tuple such that the partition constraint * is no longer satisfied, so we need to check in that case. */ bool check_partition_constr = (resultRelInfo->ri_PartitionCheck != NIL); /* * Constraints might reference the tableoid column, so initialize * t_tableOid before evaluating them. */ tuple->t_tableOid = RelationGetRelid(resultRelationDesc); /* * Check any RLS INSERT WITH CHECK policies * * ExecWithCheckOptions() will skip any WCOs which are not of the kind * we are looking for at this point. */ if (resultRelInfo->ri_WithCheckOptions != NIL) ExecWithCheckOptions(WCO_RLS_INSERT_CHECK, resultRelInfo, slot, estate); /* * No need though if the tuple has been routed, and a BR trigger * doesn't exist. */ if (saved_resultRelInfo != NULL && !(resultRelInfo->ri_TrigDesc && resultRelInfo->ri_TrigDesc->trig_insert_before_row)) check_partition_constr = false; /* Check the constraints of the tuple */ if (resultRelationDesc->rd_att->constr || check_partition_constr) ExecConstraints(resultRelInfo, slot, estate); if (onconflict != ONCONFLICT_NONE && resultRelInfo->ri_NumIndices > 0) { /* Perform a speculative insertion. */ uint32 specToken; ItemPointerData conflictTid; bool specConflict; /* * Do a non-conclusive check for conflicts first. * * We're not holding any locks yet, so this doesn't guarantee that * the later insert won't conflict. But it avoids leaving behind * a lot of canceled speculative insertions, if you run a lot of * INSERT ON CONFLICT statements that do conflict. * * We loop back here if we find a conflict below, either during * the pre-check, or when we re-check after inserting the tuple * speculatively. */ vlock: specConflict = false; if (!ExecCheckIndexConstraints(slot, estate, &conflictTid, arbiterIndexes)) { /* committed conflict tuple found */ if (onconflict == ONCONFLICT_UPDATE) { /* * In case of ON CONFLICT DO UPDATE, execute the UPDATE * part. Be prepared to retry if the UPDATE fails because * of another concurrent UPDATE/DELETE to the conflict * tuple. */ TupleTableSlot *returning = NULL; if (ExecOnConflictUpdate(mtstate, resultRelInfo, &conflictTid, planSlot, slot, estate, canSetTag, &returning)) { InstrCountFiltered2(&mtstate->ps, 1); return returning; } else goto vlock; } else { /* * In case of ON CONFLICT DO NOTHING, do nothing. However, * verify that the tuple is visible to the executor's MVCC * snapshot at higher isolation levels. */ Assert(onconflict == ONCONFLICT_NOTHING); ExecCheckTIDVisible(estate, resultRelInfo, &conflictTid); InstrCountFiltered2(&mtstate->ps, 1); return NULL; } } /* * Before we start insertion proper, acquire our "speculative * insertion lock". Others can use that to wait for us to decide * if we're going to go ahead with the insertion, instead of * waiting for the whole transaction to complete. */ specToken = SpeculativeInsertionLockAcquire(GetCurrentTransactionId()); HeapTupleHeaderSetSpeculativeToken(tuple->t_data, specToken); /* insert the tuple, with the speculative token */ newId = heap_insert(resultRelationDesc, tuple, estate->es_output_cid, HEAP_INSERT_SPECULATIVE, NULL); /* insert index entries for tuple */ recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self), estate, true, &specConflict, arbiterIndexes); /* adjust the tuple's state accordingly */ if (!specConflict) heap_finish_speculative(resultRelationDesc, tuple); else heap_abort_speculative(resultRelationDesc, tuple); /* * Wake up anyone waiting for our decision. They will re-check * the tuple, see that it's no longer speculative, and wait on our * XID as if this was a regularly inserted tuple all along. Or if * we killed the tuple, they will see it's dead, and proceed as if * the tuple never existed. */ SpeculativeInsertionLockRelease(GetCurrentTransactionId()); /* * If there was a conflict, start from the beginning. We'll do * the pre-check again, which will now find the conflicting tuple * (unless it aborts before we get there). */ if (specConflict) { list_free(recheckIndexes); goto vlock; } /* Since there was no insertion conflict, we're done */ } else { /* * insert the tuple normally. * * Note: heap_insert returns the tid (location) of the new tuple * in the t_self field. */ newId = heap_insert(resultRelationDesc, tuple, estate->es_output_cid, 0, NULL); /* insert index entries for tuple */ if (resultRelInfo->ri_NumIndices > 0) recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self), estate, false, NULL, arbiterIndexes); } } if (canSetTag) { (estate->es_processed)++; estate->es_lastoid = newId; setLastTid(&(tuple->t_self)); } /* AFTER ROW INSERT Triggers */ ExecARInsertTriggers(estate, resultRelInfo, tuple, recheckIndexes, mtstate->mt_transition_capture); list_free(recheckIndexes); /* * Check any WITH CHECK OPTION constraints from parent views. We are * required to do this after testing all constraints and uniqueness * violations per the SQL spec, so we do it after actually inserting the * record into the heap and all indexes. * * ExecWithCheckOptions will elog(ERROR) if a violation is found, so the * tuple will never be seen, if it violates the WITH CHECK OPTION. * * ExecWithCheckOptions() will skip any WCOs which are not of the kind we * are looking for at this point. */ if (resultRelInfo->ri_WithCheckOptions != NIL) ExecWithCheckOptions(WCO_VIEW_CHECK, resultRelInfo, slot, estate); /* Process RETURNING if present */ if (resultRelInfo->ri_projectReturning) result = ExecProcessReturning(resultRelInfo, slot, planSlot); if (saved_resultRelInfo) estate->es_result_relation_info = saved_resultRelInfo; return result; } /* ---------------------------------------------------------------- * ExecDelete * * DELETE is like UPDATE, except that we delete the tuple and no * index modifications are needed. * * When deleting from a table, tupleid identifies the tuple to * delete and oldtuple is NULL. When deleting from a view, * oldtuple is passed to the INSTEAD OF triggers and identifies * what to delete, and tupleid is invalid. When deleting from a * foreign table, tupleid is invalid; the FDW has to figure out * which row to delete using data from the planSlot. oldtuple is * passed to foreign table triggers; it is NULL when the foreign * table has no relevant triggers. * * Returns RETURNING result if any, otherwise NULL. * ---------------------------------------------------------------- */ static TupleTableSlot * ExecDelete(ModifyTableState *mtstate, ItemPointer tupleid, HeapTuple oldtuple, TupleTableSlot *planSlot, EPQState *epqstate, EState *estate, bool canSetTag) { ResultRelInfo *resultRelInfo; Relation resultRelationDesc; HTSU_Result result; HeapUpdateFailureData hufd; TupleTableSlot *slot = NULL; /* * get information on the (current) result relation */ resultRelInfo = estate->es_result_relation_info; resultRelationDesc = resultRelInfo->ri_RelationDesc; /* BEFORE ROW DELETE Triggers */ if (resultRelInfo->ri_TrigDesc && resultRelInfo->ri_TrigDesc->trig_delete_before_row) { bool dodelete; dodelete = ExecBRDeleteTriggers(estate, epqstate, resultRelInfo, tupleid, oldtuple); if (!dodelete) /* "do nothing" */ return NULL; } /* INSTEAD OF ROW DELETE Triggers */ if (resultRelInfo->ri_TrigDesc && resultRelInfo->ri_TrigDesc->trig_delete_instead_row) { bool dodelete; Assert(oldtuple != NULL); dodelete = ExecIRDeleteTriggers(estate, resultRelInfo, oldtuple); if (!dodelete) /* "do nothing" */ return NULL; } else if (resultRelInfo->ri_FdwRoutine) { HeapTuple tuple; /* * delete from foreign table: let the FDW do it * * We offer the trigger tuple slot as a place to store RETURNING data, * although the FDW can return some other slot if it wants. Set up * the slot's tupdesc so the FDW doesn't need to do that for itself. */ slot = estate->es_trig_tuple_slot; if (slot->tts_tupleDescriptor != RelationGetDescr(resultRelationDesc)) ExecSetSlotDescriptor(slot, RelationGetDescr(resultRelationDesc)); slot = resultRelInfo->ri_FdwRoutine->ExecForeignDelete(estate, resultRelInfo, slot, planSlot); if (slot == NULL) /* "do nothing" */ return NULL; /* * RETURNING expressions might reference the tableoid column, so * initialize t_tableOid before evaluating them. */ if (slot->tts_isempty) ExecStoreAllNullTuple(slot); tuple = ExecMaterializeSlot(slot); tuple->t_tableOid = RelationGetRelid(resultRelationDesc); } else { /* * delete the tuple * * Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check * that the row to be deleted is visible to that snapshot, and throw a * can't-serialize error if not. This is a special-case behavior * needed for referential integrity updates in transaction-snapshot * mode transactions. */ ldelete:; result = heap_delete(resultRelationDesc, tupleid, estate->es_output_cid, estate->es_crosscheck_snapshot, true /* wait for commit */ , &hufd); switch (result) { case HeapTupleSelfUpdated: /* * The target tuple was already updated or deleted by the * current command, or by a later command in the current * transaction. The former case is possible in a join DELETE * where multiple tuples join to the same target tuple. This * is somewhat questionable, but Postgres has always allowed * it: we just ignore additional deletion attempts. * * The latter case arises if the tuple is modified by a * command in a BEFORE trigger, or perhaps by a command in a * volatile function used in the query. In such situations we * should not ignore the deletion, but it is equally unsafe to * proceed. We don't want to discard the original DELETE * while keeping the triggered actions based on its deletion; * and it would be no better to allow the original DELETE * while discarding updates that it triggered. The row update * carries some information that might be important according * to business rules; so throwing an error is the only safe * course. * * If a trigger actually intends this type of interaction, it * can re-execute the DELETE and then return NULL to cancel * the outer delete. */ if (hufd.cmax != estate->es_output_cid) ereport(ERROR, (errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION), errmsg("tuple to be updated was already modified by an operation triggered by the current command"), errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows."))); /* Else, already deleted by self; nothing to do */ return NULL; case HeapTupleMayBeUpdated: break; case HeapTupleUpdated: if (IsolationUsesXactSnapshot()) ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("could not serialize access due to concurrent update"))); if (!ItemPointerEquals(tupleid, &hufd.ctid)) { TupleTableSlot *epqslot; epqslot = EvalPlanQual(estate, epqstate, resultRelationDesc, resultRelInfo->ri_RangeTableIndex, LockTupleExclusive, &hufd.ctid, hufd.xmax); if (!TupIsNull(epqslot)) { *tupleid = hufd.ctid; goto ldelete; } } /* tuple already deleted; nothing to do */ return NULL; default: elog(ERROR, "unrecognized heap_delete status: %u", result); return NULL; } /* * Note: Normally one would think that we have to delete index tuples * associated with the heap tuple now... * * ... but in POSTGRES, we have no need to do this because VACUUM will * take care of it later. We can't delete index tuples immediately * anyway, since the tuple is still visible to other transactions. */ } if (canSetTag) (estate->es_processed)++; /* AFTER ROW DELETE Triggers */ ExecARDeleteTriggers(estate, resultRelInfo, tupleid, oldtuple, mtstate->mt_transition_capture); /* Process RETURNING if present */ if (resultRelInfo->ri_projectReturning) { /* * We have to put the target tuple into a slot, which means first we * gotta fetch it. We can use the trigger tuple slot. */ TupleTableSlot *rslot; HeapTupleData deltuple; Buffer delbuffer; if (resultRelInfo->ri_FdwRoutine) { /* FDW must have provided a slot containing the deleted row */ Assert(!TupIsNull(slot)); delbuffer = InvalidBuffer; } else { slot = estate->es_trig_tuple_slot; if (oldtuple != NULL) { deltuple = *oldtuple; delbuffer = InvalidBuffer; } else { deltuple.t_self = *tupleid; if (!heap_fetch(resultRelationDesc, SnapshotAny, &deltuple, &delbuffer, false, NULL)) elog(ERROR, "failed to fetch deleted tuple for DELETE RETURNING"); } if (slot->tts_tupleDescriptor != RelationGetDescr(resultRelationDesc)) ExecSetSlotDescriptor(slot, RelationGetDescr(resultRelationDesc)); ExecStoreTuple(&deltuple, slot, InvalidBuffer, false); } rslot = ExecProcessReturning(resultRelInfo, slot, planSlot); /* * Before releasing the target tuple again, make sure rslot has a * local copy of any pass-by-reference values. */ ExecMaterializeSlot(rslot); ExecClearTuple(slot); if (BufferIsValid(delbuffer)) ReleaseBuffer(delbuffer); return rslot; } return NULL; } /* ---------------------------------------------------------------- * ExecUpdate * * note: we can't run UPDATE queries with transactions * off because UPDATEs are actually INSERTs and our * scan will mistakenly loop forever, updating the tuple * it just inserted.. This should be fixed but until it * is, we don't want to get stuck in an infinite loop * which corrupts your database.. * * When updating a table, tupleid identifies the tuple to * update and oldtuple is NULL. When updating a view, oldtuple * is passed to the INSTEAD OF triggers and identifies what to * update, and tupleid is invalid. When updating a foreign table, * tupleid is invalid; the FDW has to figure out which row to * update using data from the planSlot. oldtuple is passed to * foreign table triggers; it is NULL when the foreign table has * no relevant triggers. * * Returns RETURNING result if any, otherwise NULL. * ---------------------------------------------------------------- */ static TupleTableSlot * ExecUpdate(ModifyTableState *mtstate, ItemPointer tupleid, HeapTuple oldtuple, TupleTableSlot *slot, TupleTableSlot *planSlot, EPQState *epqstate, EState *estate, bool canSetTag) { HeapTuple tuple; ResultRelInfo *resultRelInfo; Relation resultRelationDesc; HTSU_Result result; HeapUpdateFailureData hufd; List *recheckIndexes = NIL; /* * abort the operation if not running transactions */ if (IsBootstrapProcessingMode()) elog(ERROR, "cannot UPDATE during bootstrap"); /* * get the heap tuple out of the tuple table slot, making sure we have a * writable copy */ tuple = ExecMaterializeSlot(slot); /* * get information on the (current) result relation */ resultRelInfo = estate->es_result_relation_info; resultRelationDesc = resultRelInfo->ri_RelationDesc; /* BEFORE ROW UPDATE Triggers */ if (resultRelInfo->ri_TrigDesc && resultRelInfo->ri_TrigDesc->trig_update_before_row) { slot = ExecBRUpdateTriggers(estate, epqstate, resultRelInfo, tupleid, oldtuple, slot); if (slot == NULL) /* "do nothing" */ return NULL; /* trigger might have changed tuple */ tuple = ExecMaterializeSlot(slot); } /* INSTEAD OF ROW UPDATE Triggers */ if (resultRelInfo->ri_TrigDesc && resultRelInfo->ri_TrigDesc->trig_update_instead_row) { slot = ExecIRUpdateTriggers(estate, resultRelInfo, oldtuple, slot); if (slot == NULL) /* "do nothing" */ return NULL; /* trigger might have changed tuple */ tuple = ExecMaterializeSlot(slot); } else if (resultRelInfo->ri_FdwRoutine) { /* * update in foreign table: let the FDW do it */ slot = resultRelInfo->ri_FdwRoutine->ExecForeignUpdate(estate, resultRelInfo, slot, planSlot); if (slot == NULL) /* "do nothing" */ return NULL; /* FDW might have changed tuple */ tuple = ExecMaterializeSlot(slot); /* * AFTER ROW Triggers or RETURNING expressions might reference the * tableoid column, so initialize t_tableOid before evaluating them. */ tuple->t_tableOid = RelationGetRelid(resultRelationDesc); } else { LockTupleMode lockmode; /* * Constraints might reference the tableoid column, so initialize * t_tableOid before evaluating them. */ tuple->t_tableOid = RelationGetRelid(resultRelationDesc); /* * Check any RLS UPDATE WITH CHECK policies * * If we generate a new candidate tuple after EvalPlanQual testing, we * must loop back here and recheck any RLS policies and constraints. * (We don't need to redo triggers, however. If there are any BEFORE * triggers then trigger.c will have done heap_lock_tuple to lock the * correct tuple, so there's no need to do them again.) * * ExecWithCheckOptions() will skip any WCOs which are not of the kind * we are looking for at this point. */ lreplace:; if (resultRelInfo->ri_WithCheckOptions != NIL) ExecWithCheckOptions(WCO_RLS_UPDATE_CHECK, resultRelInfo, slot, estate); /* * Check the constraints of the tuple. Note that we pass the same * slot for the orig_slot argument, because unlike ExecInsert(), no * tuple-routing is performed here, hence the slot remains unchanged. */ if (resultRelationDesc->rd_att->constr || resultRelInfo->ri_PartitionCheck) ExecConstraints(resultRelInfo, slot, estate); /* * replace the heap tuple * * Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check * that the row to be updated is visible to that snapshot, and throw a * can't-serialize error if not. This is a special-case behavior * needed for referential integrity updates in transaction-snapshot * mode transactions. */ result = heap_update(resultRelationDesc, tupleid, tuple, estate->es_output_cid, estate->es_crosscheck_snapshot, true /* wait for commit */ , &hufd, &lockmode); switch (result) { case HeapTupleSelfUpdated: /* * The target tuple was already updated or deleted by the * current command, or by a later command in the current * transaction. The former case is possible in a join UPDATE * where multiple tuples join to the same target tuple. This * is pretty questionable, but Postgres has always allowed it: * we just execute the first update action and ignore * additional update attempts. * * The latter case arises if the tuple is modified by a * command in a BEFORE trigger, or perhaps by a command in a * volatile function used in the query. In such situations we * should not ignore the update, but it is equally unsafe to * proceed. We don't want to discard the original UPDATE * while keeping the triggered actions based on it; and we * have no principled way to merge this update with the * previous ones. So throwing an error is the only safe * course. * * If a trigger actually intends this type of interaction, it * can re-execute the UPDATE (assuming it can figure out how) * and then return NULL to cancel the outer update. */ if (hufd.cmax != estate->es_output_cid) ereport(ERROR, (errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION), errmsg("tuple to be updated was already modified by an operation triggered by the current command"), errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows."))); /* Else, already updated by self; nothing to do */ return NULL; case HeapTupleMayBeUpdated: break; case HeapTupleUpdated: if (IsolationUsesXactSnapshot()) ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("could not serialize access due to concurrent update"))); if (!ItemPointerEquals(tupleid, &hufd.ctid)) { TupleTableSlot *epqslot; epqslot = EvalPlanQual(estate, epqstate, resultRelationDesc, resultRelInfo->ri_RangeTableIndex, lockmode, &hufd.ctid, hufd.xmax); if (!TupIsNull(epqslot)) { *tupleid = hufd.ctid; slot = ExecFilterJunk(resultRelInfo->ri_junkFilter, epqslot); tuple = ExecMaterializeSlot(slot); goto lreplace; } } /* tuple already deleted; nothing to do */ return NULL; default: elog(ERROR, "unrecognized heap_update status: %u", result); return NULL; } /* * Note: instead of having to update the old index tuples associated * with the heap tuple, all we do is form and insert new index tuples. * This is because UPDATEs are actually DELETEs and INSERTs, and index * tuple deletion is done later by VACUUM (see notes in ExecDelete). * All we do here is insert new index tuples. -cim 9/27/89 */ /* * insert index entries for tuple * * Note: heap_update returns the tid (location) of the new tuple in * the t_self field. * * If it's a HOT update, we mustn't insert new index entries. */ if (resultRelInfo->ri_NumIndices > 0 && !HeapTupleIsHeapOnly(tuple)) recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self), estate, false, NULL, NIL); } if (canSetTag) (estate->es_processed)++; /* AFTER ROW UPDATE Triggers */ ExecARUpdateTriggers(estate, resultRelInfo, tupleid, oldtuple, tuple, recheckIndexes, mtstate->operation == CMD_INSERT ? mtstate->mt_oc_transition_capture : mtstate->mt_transition_capture); list_free(recheckIndexes); /* * Check any WITH CHECK OPTION constraints from parent views. We are * required to do this after testing all constraints and uniqueness * violations per the SQL spec, so we do it after actually updating the * record in the heap and all indexes. * * ExecWithCheckOptions() will skip any WCOs which are not of the kind we * are looking for at this point. */ if (resultRelInfo->ri_WithCheckOptions != NIL) ExecWithCheckOptions(WCO_VIEW_CHECK, resultRelInfo, slot, estate); /* Process RETURNING if present */ if (resultRelInfo->ri_projectReturning) return ExecProcessReturning(resultRelInfo, slot, planSlot); return NULL; } /* * ExecOnConflictUpdate --- execute UPDATE of INSERT ON CONFLICT DO UPDATE * * Try to lock tuple for update as part of speculative insertion. If * a qual originating from ON CONFLICT DO UPDATE is satisfied, update * (but still lock row, even though it may not satisfy estate's * snapshot). * * Returns true if if we're done (with or without an update), or false if * the caller must retry the INSERT from scratch. */ static bool ExecOnConflictUpdate(ModifyTableState *mtstate, ResultRelInfo *resultRelInfo, ItemPointer conflictTid, TupleTableSlot *planSlot, TupleTableSlot *excludedSlot, EState *estate, bool canSetTag, TupleTableSlot **returning) { ExprContext *econtext = mtstate->ps.ps_ExprContext; Relation relation = resultRelInfo->ri_RelationDesc; ExprState *onConflictSetWhere = resultRelInfo->ri_onConflictSetWhere; HeapTupleData tuple; HeapUpdateFailureData hufd; LockTupleMode lockmode; HTSU_Result test; Buffer buffer; /* Determine lock mode to use */ lockmode = ExecUpdateLockMode(estate, resultRelInfo); /* * Lock tuple for update. Don't follow updates when tuple cannot be * locked without doing so. A row locking conflict here means our * previous conclusion that the tuple is conclusively committed is not * true anymore. */ tuple.t_self = *conflictTid; test = heap_lock_tuple(relation, &tuple, estate->es_output_cid, lockmode, LockWaitBlock, false, &buffer, &hufd); switch (test) { case HeapTupleMayBeUpdated: /* success! */ break; case HeapTupleInvisible: /* * This can occur when a just inserted tuple is updated again in * the same command. E.g. because multiple rows with the same * conflicting key values are inserted. * * This is somewhat similar to the ExecUpdate() * HeapTupleSelfUpdated case. We do not want to proceed because * it would lead to the same row being updated a second time in * some unspecified order, and in contrast to plain UPDATEs * there's no historical behavior to break. * * It is the user's responsibility to prevent this situation from * occurring. These problems are why SQL-2003 similarly specifies * that for SQL MERGE, an exception must be raised in the event of * an attempt to update the same row twice. */ if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple.t_data))) ereport(ERROR, (errcode(ERRCODE_CARDINALITY_VIOLATION), errmsg("ON CONFLICT DO UPDATE command cannot affect row a second time"), errhint("Ensure that no rows proposed for insertion within the same command have duplicate constrained values."))); /* This shouldn't happen */ elog(ERROR, "attempted to lock invisible tuple"); case HeapTupleSelfUpdated: /* * This state should never be reached. As a dirty snapshot is used * to find conflicting tuples, speculative insertion wouldn't have * seen this row to conflict with. */ elog(ERROR, "unexpected self-updated tuple"); case HeapTupleUpdated: if (IsolationUsesXactSnapshot()) ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("could not serialize access due to concurrent update"))); /* * Tell caller to try again from the very start. * * It does not make sense to use the usual EvalPlanQual() style * loop here, as the new version of the row might not conflict * anymore, or the conflicting tuple has actually been deleted. */ ReleaseBuffer(buffer); return false; default: elog(ERROR, "unrecognized heap_lock_tuple status: %u", test); } /* * Success, the tuple is locked. * * Reset per-tuple memory context to free any expression evaluation * storage allocated in the previous cycle. */ ResetExprContext(econtext); /* * Verify that the tuple is visible to our MVCC snapshot if the current * isolation level mandates that. * * It's not sufficient to rely on the check within ExecUpdate() as e.g. * CONFLICT ... WHERE clause may prevent us from reaching that. * * This means we only ever continue when a new command in the current * transaction could see the row, even though in READ COMMITTED mode the * tuple will not be visible according to the current statement's * snapshot. This is in line with the way UPDATE deals with newer tuple * versions. */ ExecCheckHeapTupleVisible(estate, &tuple, buffer); /* Store target's existing tuple in the state's dedicated slot */ ExecStoreTuple(&tuple, mtstate->mt_existing, buffer, false); /* * Make tuple and any needed join variables available to ExecQual and * ExecProject. The EXCLUDED tuple is installed in ecxt_innertuple, while * the target's existing tuple is installed in the scantuple. EXCLUDED * has been made to reference INNER_VAR in setrefs.c, but there is no * other redirection. */ econtext->ecxt_scantuple = mtstate->mt_existing; econtext->ecxt_innertuple = excludedSlot; econtext->ecxt_outertuple = NULL; if (!ExecQual(onConflictSetWhere, econtext)) { ReleaseBuffer(buffer); InstrCountFiltered1(&mtstate->ps, 1); return true; /* done with the tuple */ } if (resultRelInfo->ri_WithCheckOptions != NIL) { /* * Check target's existing tuple against UPDATE-applicable USING * security barrier quals (if any), enforced here as RLS checks/WCOs. * * The rewriter creates UPDATE RLS checks/WCOs for UPDATE security * quals, and stores them as WCOs of "kind" WCO_RLS_CONFLICT_CHECK, * but that's almost the extent of its special handling for ON * CONFLICT DO UPDATE. * * The rewriter will also have associated UPDATE applicable straight * RLS checks/WCOs for the benefit of the ExecUpdate() call that * follows. INSERTs and UPDATEs naturally have mutually exclusive WCO * kinds, so there is no danger of spurious over-enforcement in the * INSERT or UPDATE path. */ ExecWithCheckOptions(WCO_RLS_CONFLICT_CHECK, resultRelInfo, mtstate->mt_existing, mtstate->ps.state); } /* Project the new tuple version */ ExecProject(resultRelInfo->ri_onConflictSetProj); /* * Note that it is possible that the target tuple has been modified in * this session, after the above heap_lock_tuple. We choose to not error * out in that case, in line with ExecUpdate's treatment of similar cases. * This can happen if an UPDATE is triggered from within ExecQual(), * ExecWithCheckOptions() or ExecProject() above, e.g. by selecting from a * wCTE in the ON CONFLICT's SET. */ /* Execute UPDATE with projection */ *returning = ExecUpdate(mtstate, &tuple.t_self, NULL, mtstate->mt_conflproj, planSlot, &mtstate->mt_epqstate, mtstate->ps.state, canSetTag); ReleaseBuffer(buffer); return true; } /* * Process BEFORE EACH STATEMENT triggers */ static void fireBSTriggers(ModifyTableState *node) { ResultRelInfo *resultRelInfo = node->resultRelInfo; /* * If the node modifies a partitioned table, we must fire its triggers. * Note that in that case, node->resultRelInfo points to the first leaf * partition, not the root table. */ if (node->rootResultRelInfo != NULL) resultRelInfo = node->rootResultRelInfo; switch (node->operation) { case CMD_INSERT: ExecBSInsertTriggers(node->ps.state, resultRelInfo); if (node->mt_onconflict == ONCONFLICT_UPDATE) ExecBSUpdateTriggers(node->ps.state, resultRelInfo); break; case CMD_UPDATE: ExecBSUpdateTriggers(node->ps.state, resultRelInfo); break; case CMD_DELETE: ExecBSDeleteTriggers(node->ps.state, resultRelInfo); break; default: elog(ERROR, "unknown operation"); break; } } /* * Return the ResultRelInfo for which we will fire AFTER STATEMENT triggers. * This is also the relation into whose tuple format all captured transition * tuples must be converted. */ static ResultRelInfo * getASTriggerResultRelInfo(ModifyTableState *node) { /* * If the node modifies a partitioned table, we must fire its triggers. * Note that in that case, node->resultRelInfo points to the first leaf * partition, not the root table. */ if (node->rootResultRelInfo != NULL) return node->rootResultRelInfo; else return node->resultRelInfo; } /* * Process AFTER EACH STATEMENT triggers */ static void fireASTriggers(ModifyTableState *node) { ResultRelInfo *resultRelInfo = getASTriggerResultRelInfo(node); switch (node->operation) { case CMD_INSERT: if (node->mt_onconflict == ONCONFLICT_UPDATE) ExecASUpdateTriggers(node->ps.state, resultRelInfo, node->mt_oc_transition_capture); ExecASInsertTriggers(node->ps.state, resultRelInfo, node->mt_transition_capture); break; case CMD_UPDATE: ExecASUpdateTriggers(node->ps.state, resultRelInfo, node->mt_transition_capture); break; case CMD_DELETE: ExecASDeleteTriggers(node->ps.state, resultRelInfo, node->mt_transition_capture); break; default: elog(ERROR, "unknown operation"); break; } } /* * Set up the state needed for collecting transition tuples for AFTER * triggers. */ static void ExecSetupTransitionCaptureState(ModifyTableState *mtstate, EState *estate) { ResultRelInfo *targetRelInfo = getASTriggerResultRelInfo(mtstate); int i; /* Check for transition tables on the directly targeted relation. */ mtstate->mt_transition_capture = MakeTransitionCaptureState(targetRelInfo->ri_TrigDesc, RelationGetRelid(targetRelInfo->ri_RelationDesc), mtstate->operation); if (mtstate->operation == CMD_INSERT && mtstate->mt_onconflict == ONCONFLICT_UPDATE) mtstate->mt_oc_transition_capture = MakeTransitionCaptureState(targetRelInfo->ri_TrigDesc, RelationGetRelid(targetRelInfo->ri_RelationDesc), CMD_UPDATE); /* * If we found that we need to collect transition tuples then we may also * need tuple conversion maps for any children that have TupleDescs that * aren't compatible with the tuplestores. (We can share these maps * between the regular and ON CONFLICT cases.) */ if (mtstate->mt_transition_capture != NULL || mtstate->mt_oc_transition_capture != NULL) { ResultRelInfo *resultRelInfos; int numResultRelInfos; /* Find the set of partitions so that we can find their TupleDescs. */ if (mtstate->mt_partition_dispatch_info != NULL) { /* * For INSERT via partitioned table, so we need TupleDescs based * on the partition routing table. */ resultRelInfos = mtstate->mt_partitions; numResultRelInfos = mtstate->mt_num_partitions; } else { /* Otherwise we need the ResultRelInfo for each subplan. */ resultRelInfos = mtstate->resultRelInfo; numResultRelInfos = mtstate->mt_nplans; } /* * Build array of conversion maps from each child's TupleDesc to the * one used in the tuplestore. The map pointers may be NULL when no * conversion is necessary, which is hopefully a common case for * partitions. */ mtstate->mt_transition_tupconv_maps = (TupleConversionMap **) palloc0(sizeof(TupleConversionMap *) * numResultRelInfos); for (i = 0; i < numResultRelInfos; ++i) { mtstate->mt_transition_tupconv_maps[i] = convert_tuples_by_name(RelationGetDescr(resultRelInfos[i].ri_RelationDesc), RelationGetDescr(targetRelInfo->ri_RelationDesc), gettext_noop("could not convert row type")); } /* * Install the conversion map for the first plan for UPDATE and DELETE * operations. It will be advanced each time we switch to the next * plan. (INSERT operations set it every time, so we need not update * mtstate->mt_oc_transition_capture here.) */ if (mtstate->mt_transition_capture) mtstate->mt_transition_capture->tcs_map = mtstate->mt_transition_tupconv_maps[0]; } } /* ---------------------------------------------------------------- * ExecModifyTable * * Perform table modifications as required, and return RETURNING results * if needed. * ---------------------------------------------------------------- */ static TupleTableSlot * ExecModifyTable(PlanState *pstate) { ModifyTableState *node = castNode(ModifyTableState, pstate); EState *estate = node->ps.state; CmdType operation = node->operation; ResultRelInfo *saved_resultRelInfo; ResultRelInfo *resultRelInfo; PlanState *subplanstate; JunkFilter *junkfilter; TupleTableSlot *slot; TupleTableSlot *planSlot; ItemPointer tupleid = NULL; ItemPointerData tuple_ctid; HeapTupleData oldtupdata; HeapTuple oldtuple; CHECK_FOR_INTERRUPTS(); /* * This should NOT get called during EvalPlanQual; we should have passed a * subplan tree to EvalPlanQual, instead. Use a runtime test not just * Assert because this condition is easy to miss in testing. (Note: * although ModifyTable should not get executed within an EvalPlanQual * operation, we do have to allow it to be initialized and shut down in * case it is within a CTE subplan. Hence this test must be here, not in * ExecInitModifyTable.) */ if (estate->es_epqTuple != NULL) elog(ERROR, "ModifyTable should not be called during EvalPlanQual"); /* * If we've already completed processing, don't try to do more. We need * this test because ExecPostprocessPlan might call us an extra time, and * our subplan's nodes aren't necessarily robust against being called * extra times. */ if (node->mt_done) return NULL; /* * On first call, fire BEFORE STATEMENT triggers before proceeding. */ if (node->fireBSTriggers) { fireBSTriggers(node); node->fireBSTriggers = false; } /* Preload local variables */ resultRelInfo = node->resultRelInfo + node->mt_whichplan; subplanstate = node->mt_plans[node->mt_whichplan]; junkfilter = resultRelInfo->ri_junkFilter; /* * es_result_relation_info must point to the currently active result * relation while we are within this ModifyTable node. Even though * ModifyTable nodes can't be nested statically, they can be nested * dynamically (since our subplan could include a reference to a modifying * CTE). So we have to save and restore the caller's value. */ saved_resultRelInfo = estate->es_result_relation_info; estate->es_result_relation_info = resultRelInfo; /* * Fetch rows from subplan(s), and execute the required table modification * for each row. */ for (;;) { /* * Reset the per-output-tuple exprcontext. This is needed because * triggers expect to use that context as workspace. It's a bit ugly * to do this below the top level of the plan, however. We might need * to rethink this later. */ ResetPerTupleExprContext(estate); planSlot = ExecProcNode(subplanstate); if (TupIsNull(planSlot)) { /* advance to next subplan if any */ node->mt_whichplan++; if (node->mt_whichplan < node->mt_nplans) { resultRelInfo++; subplanstate = node->mt_plans[node->mt_whichplan]; junkfilter = resultRelInfo->ri_junkFilter; estate->es_result_relation_info = resultRelInfo; EvalPlanQualSetPlan(&node->mt_epqstate, subplanstate->plan, node->mt_arowmarks[node->mt_whichplan]); /* Prepare to convert transition tuples from this child. */ if (node->mt_transition_capture != NULL) { Assert(node->mt_transition_tupconv_maps != NULL); node->mt_transition_capture->tcs_map = node->mt_transition_tupconv_maps[node->mt_whichplan]; } if (node->mt_oc_transition_capture != NULL) { Assert(node->mt_transition_tupconv_maps != NULL); node->mt_oc_transition_capture->tcs_map = node->mt_transition_tupconv_maps[node->mt_whichplan]; } continue; } else break; } /* * If resultRelInfo->ri_usesFdwDirectModify is true, all we need to do * here is compute the RETURNING expressions. */ if (resultRelInfo->ri_usesFdwDirectModify) { Assert(resultRelInfo->ri_projectReturning); /* * A scan slot containing the data that was actually inserted, * updated or deleted has already been made available to * ExecProcessReturning by IterateDirectModify, so no need to * provide it here. */ slot = ExecProcessReturning(resultRelInfo, NULL, planSlot); estate->es_result_relation_info = saved_resultRelInfo; return slot; } EvalPlanQualSetSlot(&node->mt_epqstate, planSlot); slot = planSlot; oldtuple = NULL; if (junkfilter != NULL) { /* * extract the 'ctid' or 'wholerow' junk attribute. */ if (operation == CMD_UPDATE || operation == CMD_DELETE) { char relkind; Datum datum; bool isNull; relkind = resultRelInfo->ri_RelationDesc->rd_rel->relkind; if (relkind == RELKIND_RELATION || relkind == RELKIND_MATVIEW) { datum = ExecGetJunkAttribute(slot, junkfilter->jf_junkAttNo, &isNull); /* shouldn't ever get a null result... */ if (isNull) elog(ERROR, "ctid is NULL"); tupleid = (ItemPointer) DatumGetPointer(datum); tuple_ctid = *tupleid; /* be sure we don't free ctid!! */ tupleid = &tuple_ctid; } /* * Use the wholerow attribute, when available, to reconstruct * the old relation tuple. * * Foreign table updates have a wholerow attribute when the * relation has a row-level trigger. Note that the wholerow * attribute does not carry system columns. Foreign table * triggers miss seeing those, except that we know enough here * to set t_tableOid. Quite separately from this, the FDW may * fetch its own junk attrs to identify the row. * * Other relevant relkinds, currently limited to views, always * have a wholerow attribute. */ else if (AttributeNumberIsValid(junkfilter->jf_junkAttNo)) { datum = ExecGetJunkAttribute(slot, junkfilter->jf_junkAttNo, &isNull); /* shouldn't ever get a null result... */ if (isNull) elog(ERROR, "wholerow is NULL"); oldtupdata.t_data = DatumGetHeapTupleHeader(datum); oldtupdata.t_len = HeapTupleHeaderGetDatumLength(oldtupdata.t_data); ItemPointerSetInvalid(&(oldtupdata.t_self)); /* Historically, view triggers see invalid t_tableOid. */ oldtupdata.t_tableOid = (relkind == RELKIND_VIEW) ? InvalidOid : RelationGetRelid(resultRelInfo->ri_RelationDesc); oldtuple = &oldtupdata; } else Assert(relkind == RELKIND_FOREIGN_TABLE); } /* * apply the junkfilter if needed. */ if (operation != CMD_DELETE) slot = ExecFilterJunk(junkfilter, slot); } switch (operation) { case CMD_INSERT: slot = ExecInsert(node, slot, planSlot, node->mt_arbiterindexes, node->mt_onconflict, estate, node->canSetTag); break; case CMD_UPDATE: slot = ExecUpdate(node, tupleid, oldtuple, slot, planSlot, &node->mt_epqstate, estate, node->canSetTag); break; case CMD_DELETE: slot = ExecDelete(node, tupleid, oldtuple, planSlot, &node->mt_epqstate, estate, node->canSetTag); break; default: elog(ERROR, "unknown operation"); break; } /* * If we got a RETURNING result, return it to caller. We'll continue * the work on next call. */ if (slot) { estate->es_result_relation_info = saved_resultRelInfo; return slot; } } /* Restore es_result_relation_info before exiting */ estate->es_result_relation_info = saved_resultRelInfo; /* * We're done, but fire AFTER STATEMENT triggers before exiting. */ fireASTriggers(node); node->mt_done = true; return NULL; } /* ---------------------------------------------------------------- * ExecInitModifyTable * ---------------------------------------------------------------- */ ModifyTableState * ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) { ModifyTableState *mtstate; CmdType operation = node->operation; int nplans = list_length(node->plans); ResultRelInfo *saved_resultRelInfo; ResultRelInfo *resultRelInfo; TupleDesc tupDesc; Plan *subplan; ListCell *l; int i; Relation rel; /* check for unsupported flags */ Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK))); /* * create state structure */ mtstate = makeNode(ModifyTableState); mtstate->ps.plan = (Plan *) node; mtstate->ps.state = estate; mtstate->ps.ExecProcNode = ExecModifyTable; mtstate->operation = operation; mtstate->canSetTag = node->canSetTag; mtstate->mt_done = false; mtstate->mt_plans = (PlanState **) palloc0(sizeof(PlanState *) * nplans); mtstate->resultRelInfo = estate->es_result_relations + node->resultRelIndex; /* If modifying a partitioned table, initialize the root table info */ if (node->rootResultRelIndex >= 0) mtstate->rootResultRelInfo = estate->es_root_result_relations + node->rootResultRelIndex; mtstate->mt_arowmarks = (List **) palloc0(sizeof(List *) * nplans); mtstate->mt_nplans = nplans; mtstate->mt_onconflict = node->onConflictAction; mtstate->mt_arbiterindexes = node->arbiterIndexes; /* set up epqstate with dummy subplan data for the moment */ EvalPlanQualInit(&mtstate->mt_epqstate, estate, NULL, NIL, node->epqParam); mtstate->fireBSTriggers = true; /* * call ExecInitNode on each of the plans to be executed and save the * results into the array "mt_plans". This is also a convenient place to * verify that the proposed target relations are valid and open their * indexes for insertion of new index entries. Note we *must* set * estate->es_result_relation_info correctly while we initialize each * sub-plan; ExecContextForcesOids depends on that! */ saved_resultRelInfo = estate->es_result_relation_info; resultRelInfo = mtstate->resultRelInfo; i = 0; foreach(l, node->plans) { subplan = (Plan *) lfirst(l); /* Initialize the usesFdwDirectModify flag */ resultRelInfo->ri_usesFdwDirectModify = bms_is_member(i, node->fdwDirectModifyPlans); /* * Verify result relation is a valid target for the current operation */ CheckValidResultRel(resultRelInfo, operation); /* * If there are indices on the result relation, open them and save * descriptors in the result relation info, so that we can add new * index entries for the tuples we add/update. We need not do this * for a DELETE, however, since deletion doesn't affect indexes. Also, * inside an EvalPlanQual operation, the indexes might be open * already, since we share the resultrel state with the original * query. */ if (resultRelInfo->ri_RelationDesc->rd_rel->relhasindex && operation != CMD_DELETE && resultRelInfo->ri_IndexRelationDescs == NULL) ExecOpenIndices(resultRelInfo, mtstate->mt_onconflict != ONCONFLICT_NONE); /* Now init the plan for this result rel */ estate->es_result_relation_info = resultRelInfo; mtstate->mt_plans[i] = ExecInitNode(subplan, estate, eflags); /* Also let FDWs init themselves for foreign-table result rels */ if (!resultRelInfo->ri_usesFdwDirectModify && resultRelInfo->ri_FdwRoutine != NULL && resultRelInfo->ri_FdwRoutine->BeginForeignModify != NULL) { List *fdw_private = (List *) list_nth(node->fdwPrivLists, i); resultRelInfo->ri_FdwRoutine->BeginForeignModify(mtstate, resultRelInfo, fdw_private, i, eflags); } resultRelInfo++; i++; } estate->es_result_relation_info = saved_resultRelInfo; /* The root table RT index is at the head of the partitioned_rels list */ if (node->partitioned_rels) { Index root_rti; Oid root_oid; root_rti = linitial_int(node->partitioned_rels); root_oid = getrelid(root_rti, estate->es_range_table); rel = heap_open(root_oid, NoLock); /* locked by InitPlan */ } else rel = mtstate->resultRelInfo->ri_RelationDesc; /* Build state for INSERT tuple routing */ if (operation == CMD_INSERT && rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) { PartitionDispatch *partition_dispatch_info; ResultRelInfo *partitions; TupleConversionMap **partition_tupconv_maps; TupleTableSlot *partition_tuple_slot; int num_parted, num_partitions; ExecSetupPartitionTupleRouting(rel, node->nominalRelation, estate, &partition_dispatch_info, &partitions, &partition_tupconv_maps, &partition_tuple_slot, &num_parted, &num_partitions); mtstate->mt_partition_dispatch_info = partition_dispatch_info; mtstate->mt_num_dispatch = num_parted; mtstate->mt_partitions = partitions; mtstate->mt_num_partitions = num_partitions; mtstate->mt_partition_tupconv_maps = partition_tupconv_maps; mtstate->mt_partition_tuple_slot = partition_tuple_slot; } /* * Build state for collecting transition tuples. This requires having a * valid trigger query context, so skip it in explain-only mode. */ if (!(eflags & EXEC_FLAG_EXPLAIN_ONLY)) ExecSetupTransitionCaptureState(mtstate, estate); /* * Initialize any WITH CHECK OPTION constraints if needed. */ resultRelInfo = mtstate->resultRelInfo; i = 0; foreach(l, node->withCheckOptionLists) { List *wcoList = (List *) lfirst(l); List *wcoExprs = NIL; ListCell *ll; foreach(ll, wcoList) { WithCheckOption *wco = (WithCheckOption *) lfirst(ll); ExprState *wcoExpr = ExecInitQual((List *) wco->qual, mtstate->mt_plans[i]); wcoExprs = lappend(wcoExprs, wcoExpr); } resultRelInfo->ri_WithCheckOptions = wcoList; resultRelInfo->ri_WithCheckOptionExprs = wcoExprs; resultRelInfo++; i++; } /* * Build WITH CHECK OPTION constraints for each leaf partition rel. Note * that we didn't build the withCheckOptionList for each partition within * the planner, but simple translation of the varattnos for each partition * will suffice. This only occurs for the INSERT case; UPDATE/DELETE * cases are handled above. */ if (node->withCheckOptionLists != NIL && mtstate->mt_num_partitions > 0) { List *wcoList; PlanState *plan; /* * In case of INSERT on partitioned tables, there is only one plan. * Likewise, there is only one WITH CHECK OPTIONS list, not one per * partition. We make a copy of the WCO qual for each partition; note * that, if there are SubPlans in there, they all end up attached to * the one parent Plan node. */ Assert(operation == CMD_INSERT && list_length(node->withCheckOptionLists) == 1 && mtstate->mt_nplans == 1); wcoList = linitial(node->withCheckOptionLists); plan = mtstate->mt_plans[0]; resultRelInfo = mtstate->mt_partitions; for (i = 0; i < mtstate->mt_num_partitions; i++) { Relation partrel = resultRelInfo->ri_RelationDesc; List *mapped_wcoList; List *wcoExprs = NIL; ListCell *ll; /* varno = node->nominalRelation */ mapped_wcoList = map_partition_varattnos(wcoList, node->nominalRelation, partrel, rel, NULL); foreach(ll, mapped_wcoList) { WithCheckOption *wco = castNode(WithCheckOption, lfirst(ll)); ExprState *wcoExpr = ExecInitQual(castNode(List, wco->qual), plan); wcoExprs = lappend(wcoExprs, wcoExpr); } resultRelInfo->ri_WithCheckOptions = mapped_wcoList; resultRelInfo->ri_WithCheckOptionExprs = wcoExprs; resultRelInfo++; } } /* * Initialize RETURNING projections if needed. */ if (node->returningLists) { TupleTableSlot *slot; ExprContext *econtext; List *returningList; /* * Initialize result tuple slot and assign its rowtype using the first * RETURNING list. We assume the rest will look the same. */ tupDesc = ExecTypeFromTL((List *) linitial(node->returningLists), false); /* Set up a slot for the output of the RETURNING projection(s) */ ExecInitResultTupleSlot(estate, &mtstate->ps); ExecAssignResultType(&mtstate->ps, tupDesc); slot = mtstate->ps.ps_ResultTupleSlot; /* Need an econtext too */ if (mtstate->ps.ps_ExprContext == NULL) ExecAssignExprContext(estate, &mtstate->ps); econtext = mtstate->ps.ps_ExprContext; /* * Build a projection for each result rel. */ resultRelInfo = mtstate->resultRelInfo; foreach(l, node->returningLists) { List *rlist = (List *) lfirst(l); resultRelInfo->ri_projectReturning = ExecBuildProjectionInfo(rlist, econtext, slot, &mtstate->ps, resultRelInfo->ri_RelationDesc->rd_att); resultRelInfo++; } /* * Build a projection for each leaf partition rel. Note that we * didn't build the returningList for each partition within the * planner, but simple translation of the varattnos for each partition * will suffice. This only occurs for the INSERT case; UPDATE/DELETE * are handled above. */ resultRelInfo = mtstate->mt_partitions; returningList = linitial(node->returningLists); for (i = 0; i < mtstate->mt_num_partitions; i++) { Relation partrel = resultRelInfo->ri_RelationDesc; List *rlist; /* varno = node->nominalRelation */ rlist = map_partition_varattnos(returningList, node->nominalRelation, partrel, rel, NULL); resultRelInfo->ri_projectReturning = ExecBuildProjectionInfo(rlist, econtext, slot, &mtstate->ps, resultRelInfo->ri_RelationDesc->rd_att); resultRelInfo++; } } else { /* * We still must construct a dummy result tuple type, because InitPlan * expects one (maybe should change that?). */ tupDesc = ExecTypeFromTL(NIL, false); ExecInitResultTupleSlot(estate, &mtstate->ps); ExecAssignResultType(&mtstate->ps, tupDesc); mtstate->ps.ps_ExprContext = NULL; } /* Close the root partitioned rel if we opened it above. */ if (rel != mtstate->resultRelInfo->ri_RelationDesc) heap_close(rel, NoLock); /* * If needed, Initialize target list, projection and qual for ON CONFLICT * DO UPDATE. */ resultRelInfo = mtstate->resultRelInfo; if (node->onConflictAction == ONCONFLICT_UPDATE) { ExprContext *econtext; TupleDesc tupDesc; /* insert may only have one plan, inheritance is not expanded */ Assert(nplans == 1); /* already exists if created by RETURNING processing above */ if (mtstate->ps.ps_ExprContext == NULL) ExecAssignExprContext(estate, &mtstate->ps); econtext = mtstate->ps.ps_ExprContext; /* initialize slot for the existing tuple */ mtstate->mt_existing = ExecInitExtraTupleSlot(mtstate->ps.state); ExecSetSlotDescriptor(mtstate->mt_existing, resultRelInfo->ri_RelationDesc->rd_att); /* carried forward solely for the benefit of explain */ mtstate->mt_excludedtlist = node->exclRelTlist; /* create target slot for UPDATE SET projection */ tupDesc = ExecTypeFromTL((List *) node->onConflictSet, resultRelInfo->ri_RelationDesc->rd_rel->relhasoids); mtstate->mt_conflproj = ExecInitExtraTupleSlot(mtstate->ps.state); ExecSetSlotDescriptor(mtstate->mt_conflproj, tupDesc); /* build UPDATE SET projection state */ resultRelInfo->ri_onConflictSetProj = ExecBuildProjectionInfo(node->onConflictSet, econtext, mtstate->mt_conflproj, &mtstate->ps, resultRelInfo->ri_RelationDesc->rd_att); /* build DO UPDATE WHERE clause expression */ if (node->onConflictWhere) { ExprState *qualexpr; qualexpr = ExecInitQual((List *) node->onConflictWhere, &mtstate->ps); resultRelInfo->ri_onConflictSetWhere = qualexpr; } } /* * If we have any secondary relations in an UPDATE or DELETE, they need to * be treated like non-locked relations in SELECT FOR UPDATE, ie, the * EvalPlanQual mechanism needs to be told about them. Locate the * relevant ExecRowMarks. */ foreach(l, node->rowMarks) { PlanRowMark *rc = lfirst_node(PlanRowMark, l); ExecRowMark *erm; /* ignore "parent" rowmarks; they are irrelevant at runtime */ if (rc->isParent) continue; /* find ExecRowMark (same for all subplans) */ erm = ExecFindRowMark(estate, rc->rti, false); /* build ExecAuxRowMark for each subplan */ for (i = 0; i < nplans; i++) { ExecAuxRowMark *aerm; subplan = mtstate->mt_plans[i]->plan; aerm = ExecBuildAuxRowMark(erm, subplan->targetlist); mtstate->mt_arowmarks[i] = lappend(mtstate->mt_arowmarks[i], aerm); } } /* select first subplan */ mtstate->mt_whichplan = 0; subplan = (Plan *) linitial(node->plans); EvalPlanQualSetPlan(&mtstate->mt_epqstate, subplan, mtstate->mt_arowmarks[0]); /* * Initialize the junk filter(s) if needed. INSERT queries need a filter * if there are any junk attrs in the tlist. UPDATE and DELETE always * need a filter, since there's always at least one junk attribute present * --- no need to look first. Typically, this will be a 'ctid' or * 'wholerow' attribute, but in the case of a foreign data wrapper it * might be a set of junk attributes sufficient to identify the remote * row. * * If there are multiple result relations, each one needs its own junk * filter. Note multiple rels are only possible for UPDATE/DELETE, so we * can't be fooled by some needing a filter and some not. * * This section of code is also a convenient place to verify that the * output of an INSERT or UPDATE matches the target table(s). */ { bool junk_filter_needed = false; switch (operation) { case CMD_INSERT: foreach(l, subplan->targetlist) { TargetEntry *tle = (TargetEntry *) lfirst(l); if (tle->resjunk) { junk_filter_needed = true; break; } } break; case CMD_UPDATE: case CMD_DELETE: junk_filter_needed = true; break; default: elog(ERROR, "unknown operation"); break; } if (junk_filter_needed) { resultRelInfo = mtstate->resultRelInfo; for (i = 0; i < nplans; i++) { JunkFilter *j; subplan = mtstate->mt_plans[i]->plan; if (operation == CMD_INSERT || operation == CMD_UPDATE) ExecCheckPlanOutput(resultRelInfo->ri_RelationDesc, subplan->targetlist); j = ExecInitJunkFilter(subplan->targetlist, resultRelInfo->ri_RelationDesc->rd_att->tdhasoid, ExecInitExtraTupleSlot(estate)); if (operation == CMD_UPDATE || operation == CMD_DELETE) { /* For UPDATE/DELETE, find the appropriate junk attr now */ char relkind; relkind = resultRelInfo->ri_RelationDesc->rd_rel->relkind; if (relkind == RELKIND_RELATION || relkind == RELKIND_MATVIEW || relkind == RELKIND_PARTITIONED_TABLE) { j->jf_junkAttNo = ExecFindJunkAttribute(j, "ctid"); if (!AttributeNumberIsValid(j->jf_junkAttNo)) elog(ERROR, "could not find junk ctid column"); } else if (relkind == RELKIND_FOREIGN_TABLE) { /* * When there is a row-level trigger, there should be * a wholerow attribute. */ j->jf_junkAttNo = ExecFindJunkAttribute(j, "wholerow"); } else { j->jf_junkAttNo = ExecFindJunkAttribute(j, "wholerow"); if (!AttributeNumberIsValid(j->jf_junkAttNo)) elog(ERROR, "could not find junk wholerow column"); } } resultRelInfo->ri_junkFilter = j; resultRelInfo++; } } else { if (operation == CMD_INSERT) ExecCheckPlanOutput(mtstate->resultRelInfo->ri_RelationDesc, subplan->targetlist); } } /* * Set up a tuple table slot for use for trigger output tuples. In a plan * containing multiple ModifyTable nodes, all can share one such slot, so * we keep it in the estate. */ if (estate->es_trig_tuple_slot == NULL) estate->es_trig_tuple_slot = ExecInitExtraTupleSlot(estate); /* * Lastly, if this is not the primary (canSetTag) ModifyTable node, add it * to estate->es_auxmodifytables so that it will be run to completion by * ExecPostprocessPlan. (It'd actually work fine to add the primary * ModifyTable node too, but there's no need.) Note the use of lcons not * lappend: we need later-initialized ModifyTable nodes to be shut down * before earlier ones. This ensures that we don't throw away RETURNING * rows that need to be seen by a later CTE subplan. */ if (!mtstate->canSetTag) estate->es_auxmodifytables = lcons(mtstate, estate->es_auxmodifytables); return mtstate; } /* ---------------------------------------------------------------- * ExecEndModifyTable * * Shuts down the plan. * * Returns nothing of interest. * ---------------------------------------------------------------- */ void ExecEndModifyTable(ModifyTableState *node) { int i; /* * Allow any FDWs to shut down */ for (i = 0; i < node->mt_nplans; i++) { ResultRelInfo *resultRelInfo = node->resultRelInfo + i; if (!resultRelInfo->ri_usesFdwDirectModify && resultRelInfo->ri_FdwRoutine != NULL && resultRelInfo->ri_FdwRoutine->EndForeignModify != NULL) resultRelInfo->ri_FdwRoutine->EndForeignModify(node->ps.state, resultRelInfo); } /* * Close all the partitioned tables, leaf partitions, and their indices * * Remember node->mt_partition_dispatch_info[0] corresponds to the root * partitioned table, which we must not try to close, because it is the * main target table of the query that will be closed by ExecEndPlan(). * Also, tupslot is NULL for the root partitioned table. */ for (i = 1; i < node->mt_num_dispatch; i++) { PartitionDispatch pd = node->mt_partition_dispatch_info[i]; heap_close(pd->reldesc, NoLock); ExecDropSingleTupleTableSlot(pd->tupslot); } for (i = 0; i < node->mt_num_partitions; i++) { ResultRelInfo *resultRelInfo = node->mt_partitions + i; ExecCloseIndices(resultRelInfo); heap_close(resultRelInfo->ri_RelationDesc, NoLock); } /* Release the standalone partition tuple descriptor, if any */ if (node->mt_partition_tuple_slot) ExecDropSingleTupleTableSlot(node->mt_partition_tuple_slot); /* * Free the exprcontext */ ExecFreeExprContext(&node->ps); /* * clean out the tuple table */ ExecClearTuple(node->ps.ps_ResultTupleSlot); /* * Terminate EPQ execution if active */ EvalPlanQualEnd(&node->mt_epqstate); /* * shut down subplans */ for (i = 0; i < node->mt_nplans; i++) ExecEndNode(node->mt_plans[i]); } void ExecReScanModifyTable(ModifyTableState *node) { /* * Currently, we don't need to support rescan on ModifyTable nodes. The * semantics of that would be a bit debatable anyway. */ elog(ERROR, "ExecReScanModifyTable is not implemented"); }