Enforce foreign key correctly during cross-partition updates

When an update on a partitioned table referenced in foreign key
constraints causes a row to move from one partition to another,
the fact that the move is implemented as a delete followed by an insert
on the target partition causes the foreign key triggers to have
surprising behavior.  For example, a given foreign key's delete trigger
which implements the ON DELETE CASCADE clause of that key will delete
any referencing rows when triggered for that internal DELETE, although
it should not, because the referenced row is simply being moved from one
partition of the referenced root partitioned table into another, not
being deleted from it.

This commit teaches trigger.c to skip queuing such delete trigger events
on the leaf partitions in favor of an UPDATE event fired on the root
target relation.  Doing so is sensible because both the old and the new
tuple "logically" belong to the root relation.

The after trigger event queuing interface now allows passing the source
and the target partitions of a particular cross-partition update when
registering the update event for the root partitioned table.  Along with
the two ctids of the old and the new tuple, the after trigger event now
also stores the OIDs of those partitions. The tuples fetched from the
source and the target partitions are converted into the root table
format, if necessary, before they are passed to the trigger function.

The implementation currently has a limitation that only the foreign keys
pointing into the query's target relation are considered, not those of
its sub-partitioned partitions.  That seems like a reasonable
limitation, because it sounds rare to have distinct foreign keys
pointing to sub-partitioned partitions instead of to the root table.

This misbehavior stems from commit f56f8f8da6 (which added support for
foreign keys to reference partitioned tables) not paying sufficient
attention to commit 2f17844104 (which had introduced cross-partition
updates a year earlier).  Even though the former commit goes back to
Postgres 12, we're not backpatching this fix at this time for fear of
destabilizing things too much, and because there are a few ABI breaks in
it that we'd have to work around in older branches.  It also depends on
commit f4566345cf, which had its own share of backpatchability issues
as well.

Author: Amit Langote <amitlangote09@gmail.com>
Reviewed-by: Masahiko Sawada <sawada.mshk@gmail.com>
Reviewed-by: Álvaro Herrera <alvherre@alvh.no-ip.org>
Reported-by: Eduard Català <eduard.catala@gmail.com>
Discussion: https://postgr.es/m/CA+HiwqFvkBCmfwkQX_yBqv2Wz8ugUGiBDxum8=WvVbfU1TXaNg@mail.gmail.com
Discussion: https://postgr.es/m/CAL54xNZsLwEM1XCk5yW9EqaRzsZYHuWsHQkA2L5MOSKXAwviCQ@mail.gmail.com
This commit is contained in:
Alvaro Herrera 2022-03-20 18:43:40 +01:00
parent 3f513ac793
commit ba9a7e3921
No known key found for this signature in database
GPG Key ID: 1C20ACB9D5C564AE
11 changed files with 926 additions and 80 deletions

View File

@ -316,6 +316,13 @@ UPDATE <replaceable class="parameter">count</replaceable>
partition (provided the foreign data wrapper supports tuple routing), they
cannot be moved from a foreign-table partition to another partition.
</para>
<para>
An attempt of moving a row from one partition to another will fail if a
foreign key is found to directly reference an ancestor of the source
partition that is not the same as the ancestor that's mentioned in the
<command>UPDATE</command> query.
</para>
</refsect1>
<refsect1>

View File

@ -95,10 +95,13 @@ static HeapTuple ExecCallTriggerFunc(TriggerData *trigdata,
Instrumentation *instr,
MemoryContext per_tuple_context);
static void AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
ResultRelInfo *src_partinfo,
ResultRelInfo *dst_partinfo,
int event, bool row_trigger,
TupleTableSlot *oldtup, TupleTableSlot *newtup,
List *recheckIndexes, Bitmapset *modifiedCols,
TransitionCaptureState *transition_capture);
TransitionCaptureState *transition_capture,
bool is_crosspart_update);
static void AfterTriggerEnlargeQueryState(void);
static bool before_stmt_triggers_fired(Oid relid, CmdType cmdType);
@ -2458,8 +2461,10 @@ ExecASInsertTriggers(EState *estate, ResultRelInfo *relinfo,
TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
if (trigdesc && trigdesc->trig_insert_after_statement)
AfterTriggerSaveEvent(estate, relinfo, TRIGGER_EVENT_INSERT,
false, NULL, NULL, NIL, NULL, transition_capture);
AfterTriggerSaveEvent(estate, relinfo, NULL, NULL,
TRIGGER_EVENT_INSERT,
false, NULL, NULL, NIL, NULL, transition_capture,
false);
}
bool
@ -2547,10 +2552,12 @@ ExecARInsertTriggers(EState *estate, ResultRelInfo *relinfo,
if ((trigdesc && trigdesc->trig_insert_after_row) ||
(transition_capture && transition_capture->tcs_insert_new_table))
AfterTriggerSaveEvent(estate, relinfo, TRIGGER_EVENT_INSERT,
AfterTriggerSaveEvent(estate, relinfo, NULL, NULL,
TRIGGER_EVENT_INSERT,
true, NULL, slot,
recheckIndexes, NULL,
transition_capture);
transition_capture,
false);
}
bool
@ -2672,8 +2679,10 @@ ExecASDeleteTriggers(EState *estate, ResultRelInfo *relinfo,
TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
if (trigdesc && trigdesc->trig_delete_after_statement)
AfterTriggerSaveEvent(estate, relinfo, TRIGGER_EVENT_DELETE,
false, NULL, NULL, NIL, NULL, transition_capture);
AfterTriggerSaveEvent(estate, relinfo, NULL, NULL,
TRIGGER_EVENT_DELETE,
false, NULL, NULL, NIL, NULL, transition_capture,
false);
}
/*
@ -2768,11 +2777,17 @@ ExecBRDeleteTriggers(EState *estate, EPQState *epqstate,
return result;
}
/*
* Note: is_crosspart_update must be true if the DELETE is being performed
* as part of a cross-partition update.
*/
void
ExecARDeleteTriggers(EState *estate, ResultRelInfo *relinfo,
ExecARDeleteTriggers(EState *estate,
ResultRelInfo *relinfo,
ItemPointer tupleid,
HeapTuple fdw_trigtuple,
TransitionCaptureState *transition_capture)
TransitionCaptureState *transition_capture,
bool is_crosspart_update)
{
TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
@ -2793,9 +2808,11 @@ ExecARDeleteTriggers(EState *estate, ResultRelInfo *relinfo,
else
ExecForceStoreHeapTuple(fdw_trigtuple, slot, false);
AfterTriggerSaveEvent(estate, relinfo, TRIGGER_EVENT_DELETE,
AfterTriggerSaveEvent(estate, relinfo, NULL, NULL,
TRIGGER_EVENT_DELETE,
true, slot, NULL, NIL, NULL,
transition_capture);
transition_capture,
is_crosspart_update);
}
}
@ -2914,10 +2931,12 @@ ExecASUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
Assert(relinfo->ri_RootResultRelInfo == NULL);
if (trigdesc && trigdesc->trig_update_after_statement)
AfterTriggerSaveEvent(estate, relinfo, TRIGGER_EVENT_UPDATE,
AfterTriggerSaveEvent(estate, relinfo, NULL, NULL,
TRIGGER_EVENT_UPDATE,
false, NULL, NULL, NIL,
ExecGetAllUpdatedCols(relinfo, estate),
transition_capture);
transition_capture,
false);
}
bool
@ -3052,13 +3071,26 @@ ExecBRUpdateTriggers(EState *estate, EPQState *epqstate,
return true;
}
/*
* Note: 'src_partinfo' and 'dst_partinfo', when non-NULL, refer to the source
* and destination partitions, respectively, of a cross-partition update of
* the root partitioned table mentioned in the query, given by 'relinfo'.
* 'tupleid' in that case refers to the ctid of the "old" tuple in the source
* partition, and 'newslot' contains the "new" tuple in the destination
* partition. This interface allows to support the requirements of
* ExecCrossPartitionUpdateForeignKey(); is_crosspart_update must be true in
* that case.
*/
void
ExecARUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
ResultRelInfo *src_partinfo,
ResultRelInfo *dst_partinfo,
ItemPointer tupleid,
HeapTuple fdw_trigtuple,
TupleTableSlot *newslot,
List *recheckIndexes,
TransitionCaptureState *transition_capture)
TransitionCaptureState *transition_capture,
bool is_crosspart_update)
{
TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
@ -3073,12 +3105,19 @@ ExecARUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
* separately for DELETE and INSERT to capture transition table rows.
* In such case, either old tuple or new tuple can be NULL.
*/
TupleTableSlot *oldslot = ExecGetTriggerOldSlot(estate, relinfo);
TupleTableSlot *oldslot;
ResultRelInfo *tupsrc;
Assert((src_partinfo != NULL && dst_partinfo != NULL) ||
!is_crosspart_update);
tupsrc = src_partinfo ? src_partinfo : relinfo;
oldslot = ExecGetTriggerOldSlot(estate, tupsrc);
if (fdw_trigtuple == NULL && ItemPointerIsValid(tupleid))
GetTupleForTrigger(estate,
NULL,
relinfo,
tupsrc,
tupleid,
LockTupleExclusive,
oldslot,
@ -3088,10 +3127,14 @@ ExecARUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
else
ExecClearTuple(oldslot);
AfterTriggerSaveEvent(estate, relinfo, TRIGGER_EVENT_UPDATE,
true, oldslot, newslot, recheckIndexes,
AfterTriggerSaveEvent(estate, relinfo,
src_partinfo, dst_partinfo,
TRIGGER_EVENT_UPDATE,
true,
oldslot, newslot, recheckIndexes,
ExecGetAllUpdatedCols(relinfo, estate),
transition_capture);
transition_capture,
is_crosspart_update);
}
}
@ -3214,8 +3257,11 @@ ExecASTruncateTriggers(EState *estate, ResultRelInfo *relinfo)
TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
if (trigdesc && trigdesc->trig_truncate_after_statement)
AfterTriggerSaveEvent(estate, relinfo, TRIGGER_EVENT_TRUNCATE,
false, NULL, NULL, NIL, NULL, NULL);
AfterTriggerSaveEvent(estate, relinfo,
NULL, NULL,
TRIGGER_EVENT_TRUNCATE,
false, NULL, NULL, NIL, NULL, NULL,
false);
}
@ -3496,9 +3542,9 @@ typedef SetConstraintStateData *SetConstraintState;
* Per-trigger-event data
*
* The actual per-event data, AfterTriggerEventData, includes DONE/IN_PROGRESS
* status bits and up to two tuple CTIDs. Each event record also has an
* associated AfterTriggerSharedData that is shared across all instances of
* similar events within a "chunk".
* status bits, up to two tuple CTIDs, and optionally two OIDs of partitions.
* Each event record also has an associated AfterTriggerSharedData that is
* shared across all instances of similar events within a "chunk".
*
* For row-level triggers, we arrange not to waste storage on unneeded ctid
* fields. Updates of regular tables use two; inserts and deletes of regular
@ -3509,6 +3555,11 @@ typedef SetConstraintStateData *SetConstraintState;
* tuple(s). This permits storing tuples once regardless of the number of
* row-level triggers on a foreign table.
*
* When updates on partitioned tables cause rows to move between partitions,
* the OIDs of both partitions are stored too, so that the tuples can be
* fetched; such entries are marked AFTER_TRIGGER_CP_UPDATE (for "cross-
* partition update").
*
* Note that we need triggers on foreign tables to be fired in exactly the
* order they were queued, so that the tuples come out of the tuplestore in
* the right order. To ensure that, we forbid deferrable (constraint)
@ -3531,16 +3582,16 @@ typedef SetConstraintStateData *SetConstraintState;
*/
typedef uint32 TriggerFlags;
#define AFTER_TRIGGER_OFFSET 0x0FFFFFFF /* must be low-order bits */
#define AFTER_TRIGGER_DONE 0x10000000
#define AFTER_TRIGGER_IN_PROGRESS 0x20000000
#define AFTER_TRIGGER_OFFSET 0x07FFFFFF /* must be low-order bits */
#define AFTER_TRIGGER_DONE 0x80000000
#define AFTER_TRIGGER_IN_PROGRESS 0x40000000
/* bits describing the size and tuple sources of this event */
#define AFTER_TRIGGER_FDW_REUSE 0x00000000
#define AFTER_TRIGGER_FDW_FETCH 0x80000000
#define AFTER_TRIGGER_1CTID 0x40000000
#define AFTER_TRIGGER_2CTID 0xC0000000
#define AFTER_TRIGGER_TUP_BITS 0xC0000000
#define AFTER_TRIGGER_FDW_FETCH 0x20000000
#define AFTER_TRIGGER_1CTID 0x10000000
#define AFTER_TRIGGER_2CTID 0x30000000
#define AFTER_TRIGGER_CP_UPDATE 0x08000000
#define AFTER_TRIGGER_TUP_BITS 0x38000000
typedef struct AfterTriggerSharedData *AfterTriggerShared;
typedef struct AfterTriggerSharedData
@ -3560,27 +3611,45 @@ typedef struct AfterTriggerEventData
TriggerFlags ate_flags; /* status bits and offset to shared data */
ItemPointerData ate_ctid1; /* inserted, deleted, or old updated tuple */
ItemPointerData ate_ctid2; /* new updated tuple */
/*
* During a cross-partition update of a partitioned table, we also store
* the OIDs of source and destination partitions that are needed to fetch
* the old (ctid1) and the new tuple (ctid2) from, respectively.
*/
Oid ate_src_part;
Oid ate_dst_part;
} AfterTriggerEventData;
/* AfterTriggerEventData, minus ate_ctid2 */
/* AfterTriggerEventData, minus ate_src_part, ate_dst_part */
typedef struct AfterTriggerEventDataNoOids
{
TriggerFlags ate_flags;
ItemPointerData ate_ctid1;
ItemPointerData ate_ctid2;
} AfterTriggerEventDataNoOids;
/* AfterTriggerEventData, minus ate_*_part and ate_ctid2 */
typedef struct AfterTriggerEventDataOneCtid
{
TriggerFlags ate_flags; /* status bits and offset to shared data */
ItemPointerData ate_ctid1; /* inserted, deleted, or old updated tuple */
} AfterTriggerEventDataOneCtid;
/* AfterTriggerEventData, minus ate_ctid1 and ate_ctid2 */
/* AfterTriggerEventData, minus ate_*_part, ate_ctid1 and ate_ctid2 */
typedef struct AfterTriggerEventDataZeroCtids
{
TriggerFlags ate_flags; /* status bits and offset to shared data */
} AfterTriggerEventDataZeroCtids;
#define SizeofTriggerEvent(evt) \
(((evt)->ate_flags & AFTER_TRIGGER_TUP_BITS) == AFTER_TRIGGER_2CTID ? \
(((evt)->ate_flags & AFTER_TRIGGER_TUP_BITS) == AFTER_TRIGGER_CP_UPDATE ? \
sizeof(AfterTriggerEventData) : \
((evt)->ate_flags & AFTER_TRIGGER_TUP_BITS) == AFTER_TRIGGER_1CTID ? \
sizeof(AfterTriggerEventDataOneCtid) : \
sizeof(AfterTriggerEventDataZeroCtids))
(((evt)->ate_flags & AFTER_TRIGGER_TUP_BITS) == AFTER_TRIGGER_2CTID ? \
sizeof(AfterTriggerEventDataNoOids) : \
(((evt)->ate_flags & AFTER_TRIGGER_TUP_BITS) == AFTER_TRIGGER_1CTID ? \
sizeof(AfterTriggerEventDataOneCtid) : \
sizeof(AfterTriggerEventDataZeroCtids))))
#define GetTriggerSharedData(evt) \
((AfterTriggerShared) ((char *) (evt) + ((evt)->ate_flags & AFTER_TRIGGER_OFFSET)))
@ -3762,6 +3831,8 @@ static AfterTriggersData afterTriggers;
static void AfterTriggerExecute(EState *estate,
AfterTriggerEvent event,
ResultRelInfo *relInfo,
ResultRelInfo *src_relInfo,
ResultRelInfo *dst_relInfo,
TriggerDesc *trigdesc,
FmgrInfo *finfo,
Instrumentation *instr,
@ -4096,8 +4167,16 @@ afterTriggerDeleteHeadEventChunk(AfterTriggersQueryData *qs)
* fmgr lookup cache space at the caller level. (For triggers fired at
* the end of a query, we can even piggyback on the executor's state.)
*
* When fired for a cross-partition update of a partitioned table, the old
* tuple is fetched using 'src_relInfo' (the source leaf partition) and
* the new tuple using 'dst_relInfo' (the destination leaf partition), though
* both are converted into the root partitioned table's format before passing
* to the trigger function.
*
* event: event currently being fired.
* rel: open relation for event.
* relInfo: result relation for event.
* src_relInfo: source partition of a cross-partition update
* dst_relInfo: its destination partition
* trigdesc: working copy of rel's trigger info.
* finfo: array of fmgr lookup cache entries (one per trigger in trigdesc).
* instr: array of EXPLAIN ANALYZE instrumentation nodes (one per trigger),
@ -4111,6 +4190,8 @@ static void
AfterTriggerExecute(EState *estate,
AfterTriggerEvent event,
ResultRelInfo *relInfo,
ResultRelInfo *src_relInfo,
ResultRelInfo *dst_relInfo,
TriggerDesc *trigdesc,
FmgrInfo *finfo, Instrumentation *instr,
MemoryContext per_tuple_context,
@ -4118,6 +4199,8 @@ AfterTriggerExecute(EState *estate,
TupleTableSlot *trig_tuple_slot2)
{
Relation rel = relInfo->ri_RelationDesc;
Relation src_rel = src_relInfo->ri_RelationDesc;
Relation dst_rel = dst_relInfo->ri_RelationDesc;
AfterTriggerShared evtshared = GetTriggerSharedData(event);
Oid tgoid = evtshared->ats_tgoid;
TriggerData LocTriggerData = {0};
@ -4198,12 +4281,35 @@ AfterTriggerExecute(EState *estate,
default:
if (ItemPointerIsValid(&(event->ate_ctid1)))
{
LocTriggerData.tg_trigslot = ExecGetTriggerOldSlot(estate, relInfo);
TupleTableSlot *src_slot = ExecGetTriggerOldSlot(estate,
src_relInfo);
if (!table_tuple_fetch_row_version(rel, &(event->ate_ctid1),
if (!table_tuple_fetch_row_version(src_rel,
&(event->ate_ctid1),
SnapshotAny,
LocTriggerData.tg_trigslot))
src_slot))
elog(ERROR, "failed to fetch tuple1 for AFTER trigger");
/*
* Store the tuple fetched from the source partition into the
* target (root partitioned) table slot, converting if needed.
*/
if (src_relInfo != relInfo)
{
TupleConversionMap *map = ExecGetChildToRootMap(src_relInfo);
LocTriggerData.tg_trigslot = ExecGetTriggerOldSlot(estate, relInfo);
if (map)
{
execute_attr_map_slot(map->attrMap,
src_slot,
LocTriggerData.tg_trigslot);
}
else
ExecCopySlot(LocTriggerData.tg_trigslot, src_slot);
}
else
LocTriggerData.tg_trigslot = src_slot;
LocTriggerData.tg_trigtuple =
ExecFetchSlotHeapTuple(LocTriggerData.tg_trigslot, false, &should_free_trig);
}
@ -4213,16 +4319,40 @@ AfterTriggerExecute(EState *estate,
}
/* don't touch ctid2 if not there */
if ((event->ate_flags & AFTER_TRIGGER_TUP_BITS) ==
AFTER_TRIGGER_2CTID &&
if (((event->ate_flags & AFTER_TRIGGER_TUP_BITS) == AFTER_TRIGGER_2CTID ||
(event->ate_flags & AFTER_TRIGGER_CP_UPDATE)) &&
ItemPointerIsValid(&(event->ate_ctid2)))
{
LocTriggerData.tg_newslot = ExecGetTriggerNewSlot(estate, relInfo);
TupleTableSlot *dst_slot = ExecGetTriggerNewSlot(estate,
dst_relInfo);
if (!table_tuple_fetch_row_version(rel, &(event->ate_ctid2),
if (!table_tuple_fetch_row_version(dst_rel,
&(event->ate_ctid2),
SnapshotAny,
LocTriggerData.tg_newslot))
dst_slot))
elog(ERROR, "failed to fetch tuple2 for AFTER trigger");
/*
* Store the tuple fetched from the destination partition into
* the target (root partitioned) table slot, converting if
* needed.
*/
if (dst_relInfo != relInfo)
{
TupleConversionMap *map = ExecGetChildToRootMap(dst_relInfo);
LocTriggerData.tg_newslot = ExecGetTriggerNewSlot(estate, relInfo);
if (map)
{
execute_attr_map_slot(map->attrMap,
dst_slot,
LocTriggerData.tg_newslot);
}
else
ExecCopySlot(LocTriggerData.tg_newslot, dst_slot);
}
else
LocTriggerData.tg_newslot = dst_slot;
LocTriggerData.tg_newtuple =
ExecFetchSlotHeapTuple(LocTriggerData.tg_newslot, false, &should_free_new);
}
@ -4451,13 +4581,17 @@ afterTriggerInvokeEvents(AfterTriggerEventList *events,
if ((event->ate_flags & AFTER_TRIGGER_IN_PROGRESS) &&
evtshared->ats_firing_id == firing_id)
{
ResultRelInfo *src_rInfo,
*dst_rInfo;
/*
* So let's fire it... but first, find the correct relation if
* this is not the same relation as before.
*/
if (rel == NULL || RelationGetRelid(rel) != evtshared->ats_relid)
{
rInfo = ExecGetTriggerResultRel(estate, evtshared->ats_relid);
rInfo = ExecGetTriggerResultRel(estate, evtshared->ats_relid,
NULL);
rel = rInfo->ri_RelationDesc;
/* Catch calls with insufficient relcache refcounting */
Assert(!RelationHasReferenceCountZero(rel));
@ -4482,12 +4616,33 @@ afterTriggerInvokeEvents(AfterTriggerEventList *events,
evtshared->ats_relid);
}
/*
* Look up source and destination partition result rels of a
* cross-partition update event.
*/
if ((event->ate_flags & AFTER_TRIGGER_TUP_BITS) ==
AFTER_TRIGGER_CP_UPDATE)
{
Assert(OidIsValid(event->ate_src_part) &&
OidIsValid(event->ate_dst_part));
src_rInfo = ExecGetTriggerResultRel(estate,
event->ate_src_part,
rInfo);
dst_rInfo = ExecGetTriggerResultRel(estate,
event->ate_dst_part,
rInfo);
}
else
src_rInfo = dst_rInfo = rInfo;
/*
* Fire it. Note that the AFTER_TRIGGER_IN_PROGRESS flag is
* still set, so recursive examinations of the event list
* won't try to re-fire it.
*/
AfterTriggerExecute(estate, event, rInfo, trigdesc, finfo, instr,
AfterTriggerExecute(estate, event, rInfo,
src_rInfo, dst_rInfo,
trigdesc, finfo, instr,
per_tuple_context, slot1, slot2);
/*
@ -5767,14 +5922,35 @@ AfterTriggerPendingOnRel(Oid relid)
* Transition tuplestores are built now, rather than when events are pulled
* off of the queue because AFTER ROW triggers are allowed to select from the
* transition tables for the statement.
*
* This contains special support to queue the update events for the case where
* a partitioned table undergoing a cross-partition update may have foreign
* keys pointing into it. Normally, a partitioned table's row triggers are
* not fired because the leaf partition(s) which are modified as a result of
* the operation on the partitioned table contain the same triggers which are
* fired instead. But that general scheme can cause problematic behavior with
* foreign key triggers during cross-partition updates, which are implemented
* as DELETE on the source partition followed by INSERT into the destination
* partition. Specifically, firing DELETE triggers would lead to the wrong
* foreign key action to be enforced considering that the original command is
* UPDATE; in this case, this function is called with relinfo as the
* partitioned table, and src_partinfo and dst_partinfo referring to the
* source and target leaf partitions, respectively.
*
* is_crosspart_update is true either when a DELETE event is fired on the
* source partition (which is to be ignored) or an UPDATE event is fired on
* the root partitioned table.
* ----------
*/
static void
AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
ResultRelInfo *src_partinfo,
ResultRelInfo *dst_partinfo,
int event, bool row_trigger,
TupleTableSlot *oldslot, TupleTableSlot *newslot,
List *recheckIndexes, Bitmapset *modifiedCols,
TransitionCaptureState *transition_capture)
TransitionCaptureState *transition_capture,
bool is_crosspart_update)
{
Relation rel = relinfo->ri_RelationDesc;
TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
@ -5854,6 +6030,19 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
return;
}
/*
* We normally don't see partitioned tables here for row level triggers
* except in the special case of a cross-partition update. In that case,
* nodeModifyTable.c:ExecCrossPartitionUpdateForeignKey() calls here to
* queue an update event on the root target partitioned table, also
* passing the source and destination partitions and their tuples.
*/
Assert(!row_trigger ||
rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE ||
(is_crosspart_update &&
TRIGGER_FIRED_BY_UPDATE(event) &&
src_partinfo != NULL && dst_partinfo != NULL));
/*
* Validate the event code and collect the associated tuple CTIDs.
*
@ -5914,6 +6103,19 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
Assert(newslot != NULL);
ItemPointerCopy(&(oldslot->tts_tid), &(new_event.ate_ctid1));
ItemPointerCopy(&(newslot->tts_tid), &(new_event.ate_ctid2));
/*
* Also remember the OIDs of partitions to fetch these tuples
* out of later in AfterTriggerExecute().
*/
if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
{
Assert(src_partinfo != NULL && dst_partinfo != NULL);
new_event.ate_src_part =
RelationGetRelid(src_partinfo->ri_RelationDesc);
new_event.ate_dst_part =
RelationGetRelid(dst_partinfo->ri_RelationDesc);
}
}
else
{
@ -5938,13 +6140,53 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
break;
}
/* Determine flags */
if (!(relkind == RELKIND_FOREIGN_TABLE && row_trigger))
new_event.ate_flags = (row_trigger && event == TRIGGER_EVENT_UPDATE) ?
AFTER_TRIGGER_2CTID : AFTER_TRIGGER_1CTID;
{
if (row_trigger && event == TRIGGER_EVENT_UPDATE)
{
if (relkind == RELKIND_PARTITIONED_TABLE)
new_event.ate_flags = AFTER_TRIGGER_CP_UPDATE;
else
new_event.ate_flags = AFTER_TRIGGER_2CTID;
}
else
new_event.ate_flags = AFTER_TRIGGER_1CTID;
}
/* else, we'll initialize ate_flags for each trigger */
tgtype_level = (row_trigger ? TRIGGER_TYPE_ROW : TRIGGER_TYPE_STATEMENT);
/*
* Must convert/copy the source and destination partition tuples into the
* root partitioned table's format/slot, because the processing in the
* loop below expects both oldslot and newslot tuples to be in that form.
*/
if (row_trigger && rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
{
TupleTableSlot *rootslot;
TupleConversionMap *map;
rootslot = ExecGetTriggerOldSlot(estate, relinfo);
map = ExecGetChildToRootMap(src_partinfo);
if (map)
oldslot = execute_attr_map_slot(map->attrMap,
oldslot,
rootslot);
else
oldslot = ExecCopySlot(rootslot, oldslot);
rootslot = ExecGetTriggerNewSlot(estate, relinfo);
map = ExecGetChildToRootMap(dst_partinfo);
if (map)
newslot = execute_attr_map_slot(map->attrMap,
newslot,
rootslot);
else
newslot = ExecCopySlot(rootslot, newslot);
}
for (i = 0; i < trigdesc->numtriggers; i++)
{
Trigger *trigger = &trigdesc->triggers[i];
@ -5973,13 +6215,30 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
/*
* If the trigger is a foreign key enforcement trigger, there are
* certain cases where we can skip queueing the event because we can
* tell by inspection that the FK constraint will still pass.
* tell by inspection that the FK constraint will still pass. There
* are also some cases during cross-partition updates of a partitioned
* table where queuing the event can be skipped.
*/
if (TRIGGER_FIRED_BY_UPDATE(event) || TRIGGER_FIRED_BY_DELETE(event))
{
switch (RI_FKey_trigger_type(trigger->tgfoid))
{
case RI_TRIGGER_PK:
/*
* For cross-partitioned updates of partitioned PK table,
* skip the event fired by the component delete on the
* source leaf partition unless the constraint originates
* in the partition itself (!tgisclone), because the
* update event that will be fired on the root
* (partitioned) target table will be used to perform the
* necessary foreign key enforcement action.
*/
if (is_crosspart_update &&
TRIGGER_FIRED_BY_DELETE(event) &&
trigger->tgisclone)
continue;
/* Update or delete on trigger's PK table */
if (!RI_FKey_pk_upd_check_required(trigger, rel,
oldslot, newslot))
@ -5990,8 +6249,20 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
break;
case RI_TRIGGER_FK:
/* Update on trigger's FK table */
if (!RI_FKey_fk_upd_check_required(trigger, rel,
/*
* Update on trigger's FK table. We can skip the update
* event fired on a partitioned table during a
* cross-partition of that table, because the insert event
* that is fired on the destination leaf partition would
* suffice to perform the necessary foreign key check.
* Moreover, RI_FKey_fk_upd_check_required() expects to be
* passed a tuple that contains system attributes, most of
* which are not present in the virtual slot belonging to
* a partitioned table.
*/
if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE ||
!RI_FKey_fk_upd_check_required(trigger, rel,
oldslot, newslot))
{
/* skip queuing this event */
@ -6000,7 +6271,18 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
break;
case RI_TRIGGER_NONE:
/* Not an FK trigger */
/*
* Not an FK trigger. No need to queue the update event
* fired during a cross-partitioned update of a
* partitioned table, because the same row trigger must be
* present in the leaf partition(s) that are affected as
* part of this update and the events fired on them are
* queued instead.
*/
if (row_trigger &&
rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
continue;
break;
}
}

View File

@ -44,6 +44,7 @@
#include "access/transam.h"
#include "access/xact.h"
#include "catalog/namespace.h"
#include "catalog/partition.h"
#include "catalog/pg_publication.h"
#include "commands/matview.h"
#include "commands/trigger.h"
@ -1279,7 +1280,8 @@ InitResultRelInfo(ResultRelInfo *resultRelInfo,
* in es_trig_target_relations.
*/
ResultRelInfo *
ExecGetTriggerResultRel(EState *estate, Oid relid)
ExecGetTriggerResultRel(EState *estate, Oid relid,
ResultRelInfo *rootRelInfo)
{
ResultRelInfo *rInfo;
ListCell *l;
@ -1330,7 +1332,7 @@ ExecGetTriggerResultRel(EState *estate, Oid relid)
InitResultRelInfo(rInfo,
rel,
0, /* dummy rangetable index */
NULL,
rootRelInfo,
estate->es_instrument);
estate->es_trig_target_relations =
lappend(estate->es_trig_target_relations, rInfo);
@ -1344,6 +1346,69 @@ ExecGetTriggerResultRel(EState *estate, Oid relid)
return rInfo;
}
/*
* Return the ancestor relations of a given leaf partition result relation
* up to and including the query's root target relation.
*
* These work much like the ones opened by ExecGetTriggerResultRel, except
* that we need to keep them in a separate list.
*
* These are closed by ExecCloseResultRelations.
*/
List *
ExecGetAncestorResultRels(EState *estate, ResultRelInfo *resultRelInfo)
{
ResultRelInfo *rootRelInfo = resultRelInfo->ri_RootResultRelInfo;
Relation partRel = resultRelInfo->ri_RelationDesc;
Oid rootRelOid;
if (!partRel->rd_rel->relispartition)
elog(ERROR, "cannot find ancestors of a non-partition result relation");
Assert(rootRelInfo != NULL);
rootRelOid = RelationGetRelid(rootRelInfo->ri_RelationDesc);
if (resultRelInfo->ri_ancestorResultRels == NIL)
{
ListCell *lc;
List *oids = get_partition_ancestors(RelationGetRelid(partRel));
List *ancResultRels = NIL;
foreach(lc, oids)
{
Oid ancOid = lfirst_oid(lc);
Relation ancRel;
ResultRelInfo *rInfo;
/*
* Ignore the root ancestor here, and use ri_RootResultRelInfo
* (below) for it instead. Also, we stop climbing up the
* hierarchy when we find the table that was mentioned in the
* query.
*/
if (ancOid == rootRelOid)
break;
/*
* All ancestors up to the root target relation must have been
* locked by the planner or AcquireExecutorLocks().
*/
ancRel = table_open(ancOid, NoLock);
rInfo = makeNode(ResultRelInfo);
/* dummy rangetable index */
InitResultRelInfo(rInfo, ancRel, 0, NULL,
estate->es_instrument);
ancResultRels = lappend(ancResultRels, rInfo);
}
ancResultRels = lappend(ancResultRels, rootRelInfo);
resultRelInfo->ri_ancestorResultRels = ancResultRels;
}
/* We must have found some ancestor */
Assert(resultRelInfo->ri_ancestorResultRels != NIL);
return resultRelInfo->ri_ancestorResultRels;
}
/* ----------------------------------------------------------------
* ExecPostprocessPlan
*
@ -1443,12 +1508,29 @@ ExecCloseResultRelations(EState *estate)
/*
* close indexes of result relation(s) if any. (Rels themselves are
* closed in ExecCloseRangeTableRelations())
*
* In addition, close the stub RTs that may be in each resultrel's
* ri_ancestorResultRels.
*/
foreach(l, estate->es_opened_result_relations)
{
ResultRelInfo *resultRelInfo = lfirst(l);
ListCell *lc;
ExecCloseIndices(resultRelInfo);
foreach(lc, resultRelInfo->ri_ancestorResultRels)
{
ResultRelInfo *rInfo = lfirst(lc);
/*
* Ancestors with RTI > 0 (should only be the root ancestor) are
* closed by ExecCloseRangeTableRelations.
*/
if (rInfo->ri_RangeTableIndex > 0)
continue;
table_close(rInfo->ri_RelationDesc, NoLock);
}
}
/* Close any relations that have been opened by ExecGetTriggerResultRel(). */

View File

@ -517,8 +517,9 @@ ExecSimpleRelationUpdate(ResultRelInfo *resultRelInfo,
/* AFTER ROW UPDATE Triggers */
ExecARUpdateTriggers(estate, resultRelInfo,
NULL, NULL,
tid, NULL, slot,
recheckIndexes, NULL);
recheckIndexes, NULL, false);
list_free(recheckIndexes);
}
@ -557,7 +558,7 @@ ExecSimpleRelationDelete(ResultRelInfo *resultRelInfo,
/* AFTER ROW DELETE Triggers */
ExecARDeleteTriggers(estate, resultRelInfo,
tid, NULL, NULL);
tid, NULL, NULL, false);
}
}

View File

@ -122,6 +122,12 @@ static void ExecBatchInsert(ModifyTableState *mtstate,
int numSlots,
EState *estate,
bool canSetTag);
static void ExecCrossPartitionUpdateForeignKey(ModifyTableContext *context,
ResultRelInfo *sourcePartInfo,
ResultRelInfo *destPartInfo,
ItemPointer tupleid,
TupleTableSlot *oldslot,
TupleTableSlot *newslot);
static bool ExecOnConflictUpdate(ModifyTableContext *context,
ResultRelInfo *resultRelInfo,
ItemPointer conflictTid,
@ -635,6 +641,9 @@ ExecGetUpdateNewTuple(ResultRelInfo *relinfo,
* slot contains the new tuple value to be stored.
*
* Returns RETURNING result if any, otherwise NULL.
* *inserted_tuple is the tuple that's effectively inserted;
* *inserted_destrel is the relation where it was inserted.
* These are only set on success.
*
* This may change the currently active tuple conversion map in
* mtstate->mt_transition_capture, so the callers must take care to
@ -645,7 +654,9 @@ static TupleTableSlot *
ExecInsert(ModifyTableContext *context,
ResultRelInfo *resultRelInfo,
TupleTableSlot *slot,
bool canSetTag)
bool canSetTag,
TupleTableSlot **inserted_tuple,
ResultRelInfo **insert_destrel)
{
ModifyTableState *mtstate = context->mtstate;
EState *estate = context->estate;
@ -1008,11 +1019,14 @@ ExecInsert(ModifyTableContext *context,
if (mtstate->operation == CMD_UPDATE && mtstate->mt_transition_capture
&& mtstate->mt_transition_capture->tcs_update_new_table)
{
ExecARUpdateTriggers(estate, resultRelInfo, NULL,
ExecARUpdateTriggers(estate, resultRelInfo,
NULL, NULL,
NULL,
NULL,
slot,
NULL,
mtstate->mt_transition_capture);
mtstate->mt_transition_capture,
false);
/*
* We've already captured the NEW TABLE row, so make sure any AR
@ -1046,6 +1060,11 @@ ExecInsert(ModifyTableContext *context,
if (resultRelInfo->ri_projectReturning)
result = ExecProcessReturning(resultRelInfo, slot, planSlot);
if (inserted_tuple)
*inserted_tuple = slot;
if (insert_destrel)
*insert_destrel = resultRelInfo;
return result;
}
@ -1160,7 +1179,7 @@ ExecDeleteAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
*/
static void
ExecDeleteEpilogue(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
ItemPointer tupleid, HeapTuple oldtuple)
ItemPointer tupleid, HeapTuple oldtuple, bool changingPart)
{
ModifyTableState *mtstate = context->mtstate;
EState *estate = context->estate;
@ -1176,8 +1195,11 @@ ExecDeleteEpilogue(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
if (mtstate->operation == CMD_UPDATE && mtstate->mt_transition_capture &&
mtstate->mt_transition_capture->tcs_update_old_table)
{
ExecARUpdateTriggers(estate, resultRelInfo, tupleid, oldtuple,
NULL, NULL, mtstate->mt_transition_capture);
ExecARUpdateTriggers(estate, resultRelInfo,
NULL, NULL,
tupleid, oldtuple,
NULL, NULL, mtstate->mt_transition_capture,
false);
/*
* We've already captured the NEW TABLE row, so make sure any AR
@ -1188,7 +1210,7 @@ ExecDeleteEpilogue(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
/* AFTER ROW DELETE Triggers */
ExecARDeleteTriggers(estate, resultRelInfo, tupleid, oldtuple,
ar_delete_trig_tcs);
ar_delete_trig_tcs, changingPart);
}
/* ----------------------------------------------------------------
@ -1457,7 +1479,7 @@ ldelete:;
if (tupleDeleted)
*tupleDeleted = true;
ExecDeleteEpilogue(context, resultRelInfo, tupleid, oldtuple);
ExecDeleteEpilogue(context, resultRelInfo, tupleid, oldtuple, changingPart);
/* Process RETURNING if present and if requested */
if (processReturning && resultRelInfo->ri_projectReturning)
@ -1526,7 +1548,10 @@ ExecCrossPartitionUpdate(ModifyTableContext *context,
ResultRelInfo *resultRelInfo,
ItemPointer tupleid, HeapTuple oldtuple,
TupleTableSlot *slot,
bool canSetTag, UpdateContext *updateCxt)
bool canSetTag,
UpdateContext *updateCxt,
TupleTableSlot **inserted_tuple,
ResultRelInfo **insert_destrel)
{
ModifyTableState *mtstate = context->mtstate;
EState *estate = mtstate->ps.state;
@ -1652,7 +1677,8 @@ ExecCrossPartitionUpdate(ModifyTableContext *context,
/* Tuple routing starts from the root table. */
context->cpUpdateReturningSlot =
ExecInsert(context, mtstate->rootResultRelInfo, slot, canSetTag);
ExecInsert(context, mtstate->rootResultRelInfo, slot, canSetTag,
inserted_tuple, insert_destrel);
/*
* Reset the transition state that may possibly have been written by
@ -1793,6 +1819,9 @@ lreplace:;
*/
if (partition_constraint_failed)
{
TupleTableSlot *inserted_tuple;
ResultRelInfo *insert_destrel;
/*
* ExecCrossPartitionUpdate will first DELETE the row from the
* partition it's currently in and then insert it back into the root
@ -1801,11 +1830,37 @@ lreplace:;
*/
if (ExecCrossPartitionUpdate(context, resultRelInfo,
tupleid, oldtuple, slot,
canSetTag, updateCxt))
canSetTag, updateCxt,
&inserted_tuple,
&insert_destrel))
{
/* success! */
updateCxt->updated = true;
updateCxt->crossPartUpdate = true;
/*
* If the partitioned table being updated is referenced in foreign
* keys, queue up trigger events to check that none of them were
* violated. No special treatment is needed in
* non-cross-partition update situations, because the leaf
* partition's AR update triggers will take care of that. During
* cross-partition updates implemented as delete on the source
* partition followed by insert on the destination partition,
* AR-UPDATE triggers of the root table (that is, the table
* mentioned in the query) must be fired.
*
* NULL insert_destrel means that the move failed to occur, that
* is, the update failed, so no need to anything in that case.
*/
if (insert_destrel &&
resultRelInfo->ri_TrigDesc &&
resultRelInfo->ri_TrigDesc->trig_update_after_row)
ExecCrossPartitionUpdateForeignKey(context,
resultRelInfo,
insert_destrel,
tupleid, slot,
inserted_tuple);
return TM_Ok;
}
@ -1871,11 +1926,13 @@ ExecUpdateEpilogue(ModifyTableContext *context, UpdateContext *updateCxt,
/* AFTER ROW UPDATE Triggers */
ExecARUpdateTriggers(context->estate, resultRelInfo,
NULL, NULL,
tupleid, oldtuple, slot,
recheckIndexes,
mtstate->operation == CMD_INSERT ?
mtstate->mt_oc_transition_capture :
mtstate->mt_transition_capture);
mtstate->mt_transition_capture,
false);
/*
* Check any WITH CHECK OPTION constraints from parent views. We are
@ -1891,6 +1948,74 @@ ExecUpdateEpilogue(ModifyTableContext *context, UpdateContext *updateCxt,
slot, context->estate);
}
/*
* Queues up an update event using the target root partitioned table's
* trigger to check that a cross-partition update hasn't broken any foreign
* keys pointing into it.
*/
static void
ExecCrossPartitionUpdateForeignKey(ModifyTableContext *context,
ResultRelInfo *sourcePartInfo,
ResultRelInfo *destPartInfo,
ItemPointer tupleid,
TupleTableSlot *oldslot,
TupleTableSlot *newslot)
{
ListCell *lc;
ResultRelInfo *rootRelInfo;
List *ancestorRels;
rootRelInfo = sourcePartInfo->ri_RootResultRelInfo;
ancestorRels = ExecGetAncestorResultRels(context->estate, sourcePartInfo);
/*
* For any foreign keys that point directly into a non-root ancestors of
* the source partition, we can in theory fire an update event to enforce
* those constraints using their triggers, if we could tell that both the
* source and the destination partitions are under the same ancestor. But
* for now, we simply report an error that those cannot be enforced.
*/
foreach(lc, ancestorRels)
{
ResultRelInfo *rInfo = lfirst(lc);
TriggerDesc *trigdesc = rInfo->ri_TrigDesc;
bool has_noncloned_fkey = false;
/* Root ancestor's triggers will be processed. */
if (rInfo == rootRelInfo)
continue;
if (trigdesc && trigdesc->trig_update_after_row)
{
for (int i = 0; i < trigdesc->numtriggers; i++)
{
Trigger *trig = &trigdesc->triggers[i];
if (!trig->tgisclone &&
RI_FKey_trigger_type(trig->tgfoid) == RI_TRIGGER_PK)
{
has_noncloned_fkey = true;
break;
}
}
}
if (has_noncloned_fkey)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot move tuple across partitions when a non-root ancestor of the source partition is directly referenced in a foreign key"),
errdetail("A foreign key points to ancestor \"%s\", but not the root ancestor \"%s\".",
RelationGetRelationName(rInfo->ri_RelationDesc),
RelationGetRelationName(rootRelInfo->ri_RelationDesc)),
errhint("Consider defining the foreign key on \"%s\".",
RelationGetRelationName(rootRelInfo->ri_RelationDesc))));
}
/* Perform the root table's triggers. */
ExecARUpdateTriggers(context->estate,
rootRelInfo, sourcePartInfo, destPartInfo,
tupleid, NULL, newslot, NIL, NULL, true);
}
/* ----------------------------------------------------------------
* ExecUpdate
@ -2745,7 +2870,7 @@ ExecModifyTable(PlanState *pstate)
ExecInitInsertProjection(node, resultRelInfo);
slot = ExecGetInsertNewTuple(resultRelInfo, planSlot);
slot = ExecInsert(&context, resultRelInfo, slot,
node->canSetTag);
node->canSetTag, NULL, NULL);
break;
case CMD_UPDATE:

View File

@ -1261,6 +1261,12 @@ RI_FKey_fk_upd_check_required(Trigger *trigger, Relation fk_rel,
TransactionId xmin;
bool isnull;
/*
* AfterTriggerSaveEvent() handles things such that this function is never
* called for partitioned tables.
*/
Assert(fk_rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE);
riinfo = ri_FetchConstraintInfo(trigger, fk_rel, false);
ri_nullcheck = ri_NullCheck(RelationGetDescr(fk_rel), newslot, riinfo, false);

View File

@ -214,7 +214,8 @@ extern void ExecARDeleteTriggers(EState *estate,
ResultRelInfo *relinfo,
ItemPointer tupleid,
HeapTuple fdw_trigtuple,
TransitionCaptureState *transition_capture);
TransitionCaptureState *transition_capture,
bool is_crosspart_update);
extern bool ExecIRDeleteTriggers(EState *estate,
ResultRelInfo *relinfo,
HeapTuple trigtuple);
@ -231,11 +232,14 @@ extern bool ExecBRUpdateTriggers(EState *estate,
TupleTableSlot *slot);
extern void ExecARUpdateTriggers(EState *estate,
ResultRelInfo *relinfo,
ResultRelInfo *src_partinfo,
ResultRelInfo *dst_partinfo,
ItemPointer tupleid,
HeapTuple fdw_trigtuple,
TupleTableSlot *slot,
List *recheckIndexes,
TransitionCaptureState *transition_capture);
TransitionCaptureState *transition_capture,
bool is_crosspart_update);
extern bool ExecIRUpdateTriggers(EState *estate,
ResultRelInfo *relinfo,
HeapTuple trigtuple,

View File

@ -203,7 +203,9 @@ extern void InitResultRelInfo(ResultRelInfo *resultRelInfo,
Index resultRelationIndex,
ResultRelInfo *partition_root_rri,
int instrument_options);
extern ResultRelInfo *ExecGetTriggerResultRel(EState *estate, Oid relid);
extern ResultRelInfo *ExecGetTriggerResultRel(EState *estate, Oid relid,
ResultRelInfo *rootRelInfo);
extern List *ExecGetAncestorResultRels(EState *estate, ResultRelInfo *resultRelInfo);
extern void ExecConstraints(ResultRelInfo *resultRelInfo,
TupleTableSlot *slot, EState *estate);
extern bool ExecPartitionCheck(ResultRelInfo *resultRelInfo,

View File

@ -530,6 +530,12 @@ typedef struct ResultRelInfo
/* for use by copyfrom.c when performing multi-inserts */
struct CopyMultiInsertBuffer *ri_CopyMultiInsertBuffer;
/*
* Used when a leaf partition is involved in a cross-partition update of
* one of its ancestors; see ExecCrossPartitionUpdateForeignKey().
*/
List *ri_ancestorResultRels;
} ResultRelInfo;
/* ----------------

View File

@ -2556,7 +2556,7 @@ DELETE FROM pk WHERE a = 20;
ERROR: update or delete on table "pk11" violates foreign key constraint "fk_a_fkey2" on table "fk"
DETAIL: Key (a)=(20) is still referenced from table "fk".
UPDATE pk SET a = 90 WHERE a = 30;
ERROR: update or delete on table "pk11" violates foreign key constraint "fk_a_fkey2" on table "fk"
ERROR: update or delete on table "pk" violates foreign key constraint "fk_a_fkey" on table "fk"
DETAIL: Key (a)=(30) is still referenced from table "fk".
SELECT tableoid::regclass, * FROM fk;
tableoid | a
@ -2625,15 +2625,213 @@ CREATE SCHEMA fkpart10
CREATE TABLE tbl1(f1 int PRIMARY KEY) PARTITION BY RANGE(f1)
CREATE TABLE tbl1_p1 PARTITION OF tbl1 FOR VALUES FROM (minvalue) TO (1)
CREATE TABLE tbl1_p2 PARTITION OF tbl1 FOR VALUES FROM (1) TO (maxvalue)
CREATE TABLE tbl2(f1 int REFERENCES tbl1 DEFERRABLE INITIALLY DEFERRED);
CREATE TABLE tbl2(f1 int REFERENCES tbl1 DEFERRABLE INITIALLY DEFERRED)
CREATE TABLE tbl3(f1 int PRIMARY KEY) PARTITION BY RANGE(f1)
CREATE TABLE tbl3_p1 PARTITION OF tbl3 FOR VALUES FROM (minvalue) TO (1)
CREATE TABLE tbl3_p2 PARTITION OF tbl3 FOR VALUES FROM (1) TO (maxvalue)
CREATE TABLE tbl4(f1 int REFERENCES tbl3 DEFERRABLE INITIALLY DEFERRED);
INSERT INTO fkpart10.tbl1 VALUES (0), (1);
INSERT INTO fkpart10.tbl2 VALUES (0), (1);
INSERT INTO fkpart10.tbl3 VALUES (-2), (-1), (0);
INSERT INTO fkpart10.tbl4 VALUES (-2), (-1);
BEGIN;
DELETE FROM fkpart10.tbl1 WHERE f1 = 0;
UPDATE fkpart10.tbl1 SET f1 = 2 WHERE f1 = 1;
INSERT INTO fkpart10.tbl1 VALUES (0), (1);
COMMIT;
-- test that cross-partition updates correctly enforces the foreign key
-- restriction (specifically testing INITIAILLY DEFERRED)
BEGIN;
UPDATE fkpart10.tbl1 SET f1 = 3 WHERE f1 = 0;
UPDATE fkpart10.tbl3 SET f1 = f1 * -1;
INSERT INTO fkpart10.tbl1 VALUES (4);
COMMIT;
ERROR: update or delete on table "tbl1" violates foreign key constraint "tbl2_f1_fkey" on table "tbl2"
DETAIL: Key (f1)=(0) is still referenced from table "tbl2".
BEGIN;
UPDATE fkpart10.tbl3 SET f1 = f1 * -1;
UPDATE fkpart10.tbl3 SET f1 = f1 + 3;
UPDATE fkpart10.tbl1 SET f1 = 3 WHERE f1 = 0;
INSERT INTO fkpart10.tbl1 VALUES (0);
COMMIT;
ERROR: update or delete on table "tbl3" violates foreign key constraint "tbl4_f1_fkey" on table "tbl4"
DETAIL: Key (f1)=(-2) is still referenced from table "tbl4".
BEGIN;
UPDATE fkpart10.tbl3 SET f1 = f1 * -1;
UPDATE fkpart10.tbl1 SET f1 = 3 WHERE f1 = 0;
INSERT INTO fkpart10.tbl1 VALUES (0);
INSERT INTO fkpart10.tbl3 VALUES (-2), (-1);
COMMIT;
-- test where the updated table now has both an IMMEDIATE and a DEFERRED
-- constraint pointing into it
CREATE TABLE fkpart10.tbl5(f1 int REFERENCES fkpart10.tbl3);
INSERT INTO fkpart10.tbl5 VALUES (-2), (-1);
BEGIN;
UPDATE fkpart10.tbl3 SET f1 = f1 * -3;
ERROR: update or delete on table "tbl3" violates foreign key constraint "tbl5_f1_fkey" on table "tbl5"
DETAIL: Key (f1)=(-2) is still referenced from table "tbl5".
COMMIT;
-- Now test where the row referenced from the table with an IMMEDIATE
-- constraint stays in place, while those referenced from the table with a
-- DEFERRED constraint don't.
DELETE FROM fkpart10.tbl5;
INSERT INTO fkpart10.tbl5 VALUES (0);
BEGIN;
UPDATE fkpart10.tbl3 SET f1 = f1 * -3;
COMMIT;
ERROR: update or delete on table "tbl3" violates foreign key constraint "tbl4_f1_fkey" on table "tbl4"
DETAIL: Key (f1)=(-2) is still referenced from table "tbl4".
DROP SCHEMA fkpart10 CASCADE;
NOTICE: drop cascades to 2 other objects
NOTICE: drop cascades to 5 other objects
DETAIL: drop cascades to table fkpart10.tbl1
drop cascades to table fkpart10.tbl2
drop cascades to table fkpart10.tbl3
drop cascades to table fkpart10.tbl4
drop cascades to table fkpart10.tbl5
-- verify foreign keys are enforced during cross-partition updates,
-- especially on the PK side
CREATE SCHEMA fkpart11
CREATE TABLE pk (a INT PRIMARY KEY, b text) PARTITION BY LIST (a)
CREATE TABLE fk (
a INT,
CONSTRAINT fkey FOREIGN KEY (a) REFERENCES pk(a) ON UPDATE CASCADE ON DELETE CASCADE
)
CREATE TABLE fk_parted (
a INT PRIMARY KEY,
CONSTRAINT fkey FOREIGN KEY (a) REFERENCES pk(a) ON UPDATE CASCADE ON DELETE CASCADE
) PARTITION BY LIST (a)
CREATE TABLE fk_another (
a INT,
CONSTRAINT fkey FOREIGN KEY (a) REFERENCES fk_parted (a) ON UPDATE CASCADE ON DELETE CASCADE
)
CREATE TABLE pk1 PARTITION OF pk FOR VALUES IN (1, 2) PARTITION BY LIST (a)
CREATE TABLE pk2 PARTITION OF pk FOR VALUES IN (3)
CREATE TABLE pk3 PARTITION OF pk FOR VALUES IN (4)
CREATE TABLE fk1 PARTITION OF fk_parted FOR VALUES IN (1, 2)
CREATE TABLE fk2 PARTITION OF fk_parted FOR VALUES IN (3)
CREATE TABLE fk3 PARTITION OF fk_parted FOR VALUES IN (4);
CREATE TABLE fkpart11.pk11 (b text, a int NOT NULL);
ALTER TABLE fkpart11.pk1 ATTACH PARTITION fkpart11.pk11 FOR VALUES IN (1);
CREATE TABLE fkpart11.pk12 (b text, c int, a int NOT NULL);
ALTER TABLE fkpart11.pk12 DROP c;
ALTER TABLE fkpart11.pk1 ATTACH PARTITION fkpart11.pk12 FOR VALUES IN (2);
INSERT INTO fkpart11.pk VALUES (1, 'xxx'), (3, 'yyy');
INSERT INTO fkpart11.fk VALUES (1), (3);
INSERT INTO fkpart11.fk_parted VALUES (1), (3);
INSERT INTO fkpart11.fk_another VALUES (1), (3);
-- moves 2 rows from one leaf partition to another, with both updates being
-- cascaded to fk and fk_parted. Updates of fk_parted, of which one is
-- cross-partition (3 -> 4), are further cascaded to fk_another.
UPDATE fkpart11.pk SET a = a + 1 RETURNING tableoid::pg_catalog.regclass, *;
tableoid | a | b
---------------+---+-----
fkpart11.pk12 | 2 | xxx
fkpart11.pk3 | 4 | yyy
(2 rows)
SELECT tableoid::pg_catalog.regclass, * FROM fkpart11.fk;
tableoid | a
-------------+---
fkpart11.fk | 2
fkpart11.fk | 4
(2 rows)
SELECT tableoid::pg_catalog.regclass, * FROM fkpart11.fk_parted;
tableoid | a
--------------+---
fkpart11.fk1 | 2
fkpart11.fk3 | 4
(2 rows)
SELECT tableoid::pg_catalog.regclass, * FROM fkpart11.fk_another;
tableoid | a
---------------------+---
fkpart11.fk_another | 2
fkpart11.fk_another | 4
(2 rows)
-- let's try with the foreign key pointing at tables in the partition tree
-- that are not the same as the query's target table
-- 1. foreign key pointing into a non-root ancestor
--
-- A cross-partition update on the root table will fail, because we currently
-- can't enforce the foreign keys pointing into a non-leaf partition
ALTER TABLE fkpart11.fk DROP CONSTRAINT fkey;
DELETE FROM fkpart11.fk WHERE a = 4;
ALTER TABLE fkpart11.fk ADD CONSTRAINT fkey FOREIGN KEY (a) REFERENCES fkpart11.pk1 (a) ON UPDATE CASCADE ON DELETE CASCADE;
UPDATE fkpart11.pk SET a = a - 1;
ERROR: cannot move tuple across partitions when a non-root ancestor of the source partition is directly referenced in a foreign key
DETAIL: A foreign key points to ancestor "pk1", but not the root ancestor "pk".
HINT: Consider defining the foreign key on "pk".
-- it's okay though if the non-leaf partition is updated directly
UPDATE fkpart11.pk1 SET a = a - 1;
SELECT tableoid::pg_catalog.regclass, * FROM fkpart11.pk;
tableoid | a | b
---------------+---+-----
fkpart11.pk11 | 1 | xxx
fkpart11.pk3 | 4 | yyy
(2 rows)
SELECT tableoid::pg_catalog.regclass, * FROM fkpart11.fk;
tableoid | a
-------------+---
fkpart11.fk | 1
(1 row)
SELECT tableoid::pg_catalog.regclass, * FROM fkpart11.fk_parted;
tableoid | a
--------------+---
fkpart11.fk1 | 1
fkpart11.fk3 | 4
(2 rows)
SELECT tableoid::pg_catalog.regclass, * FROM fkpart11.fk_another;
tableoid | a
---------------------+---
fkpart11.fk_another | 4
fkpart11.fk_another | 1
(2 rows)
-- 2. foreign key pointing into a single leaf partition
--
-- A cross-partition update that deletes from the pointed-to leaf partition
-- is allowed to succeed
ALTER TABLE fkpart11.fk DROP CONSTRAINT fkey;
ALTER TABLE fkpart11.fk ADD CONSTRAINT fkey FOREIGN KEY (a) REFERENCES fkpart11.pk11 (a) ON UPDATE CASCADE ON DELETE CASCADE;
-- will delete (1) from p11 which is cascaded to fk
UPDATE fkpart11.pk SET a = a + 1 WHERE a = 1;
SELECT tableoid::pg_catalog.regclass, * FROM fkpart11.fk;
tableoid | a
----------+---
(0 rows)
DROP TABLE fkpart11.fk;
-- check that regular and deferrable AR triggers on the PK tables
-- still work as expected
CREATE FUNCTION fkpart11.print_row () RETURNS TRIGGER LANGUAGE plpgsql AS $$
BEGIN
RAISE NOTICE 'TABLE: %, OP: %, OLD: %, NEW: %', TG_RELNAME, TG_OP, OLD, NEW;
RETURN NULL;
END;
$$;
CREATE TRIGGER trig_upd_pk AFTER UPDATE ON fkpart11.pk FOR EACH ROW EXECUTE FUNCTION fkpart11.print_row();
CREATE TRIGGER trig_del_pk AFTER DELETE ON fkpart11.pk FOR EACH ROW EXECUTE FUNCTION fkpart11.print_row();
CREATE TRIGGER trig_ins_pk AFTER INSERT ON fkpart11.pk FOR EACH ROW EXECUTE FUNCTION fkpart11.print_row();
CREATE CONSTRAINT TRIGGER trig_upd_fk_parted AFTER UPDATE ON fkpart11.fk_parted INITIALLY DEFERRED FOR EACH ROW EXECUTE FUNCTION fkpart11.print_row();
CREATE CONSTRAINT TRIGGER trig_del_fk_parted AFTER DELETE ON fkpart11.fk_parted INITIALLY DEFERRED FOR EACH ROW EXECUTE FUNCTION fkpart11.print_row();
CREATE CONSTRAINT TRIGGER trig_ins_fk_parted AFTER INSERT ON fkpart11.fk_parted INITIALLY DEFERRED FOR EACH ROW EXECUTE FUNCTION fkpart11.print_row();
UPDATE fkpart11.pk SET a = 3 WHERE a = 4;
NOTICE: TABLE: pk3, OP: DELETE, OLD: (4,yyy), NEW: <NULL>
NOTICE: TABLE: pk2, OP: INSERT, OLD: <NULL>, NEW: (3,yyy)
NOTICE: TABLE: fk3, OP: DELETE, OLD: (4), NEW: <NULL>
NOTICE: TABLE: fk2, OP: INSERT, OLD: <NULL>, NEW: (3)
UPDATE fkpart11.pk SET a = 1 WHERE a = 2;
NOTICE: TABLE: pk12, OP: DELETE, OLD: (xxx,2), NEW: <NULL>
NOTICE: TABLE: pk11, OP: INSERT, OLD: <NULL>, NEW: (xxx,1)
NOTICE: TABLE: fk1, OP: UPDATE, OLD: (2), NEW: (1)
DROP SCHEMA fkpart11 CASCADE;
NOTICE: drop cascades to 4 other objects
DETAIL: drop cascades to table fkpart11.pk
drop cascades to table fkpart11.fk_parted
drop cascades to table fkpart11.fk_another
drop cascades to function fkpart11.print_row()

View File

@ -1871,12 +1871,145 @@ CREATE SCHEMA fkpart10
CREATE TABLE tbl1(f1 int PRIMARY KEY) PARTITION BY RANGE(f1)
CREATE TABLE tbl1_p1 PARTITION OF tbl1 FOR VALUES FROM (minvalue) TO (1)
CREATE TABLE tbl1_p2 PARTITION OF tbl1 FOR VALUES FROM (1) TO (maxvalue)
CREATE TABLE tbl2(f1 int REFERENCES tbl1 DEFERRABLE INITIALLY DEFERRED);
CREATE TABLE tbl2(f1 int REFERENCES tbl1 DEFERRABLE INITIALLY DEFERRED)
CREATE TABLE tbl3(f1 int PRIMARY KEY) PARTITION BY RANGE(f1)
CREATE TABLE tbl3_p1 PARTITION OF tbl3 FOR VALUES FROM (minvalue) TO (1)
CREATE TABLE tbl3_p2 PARTITION OF tbl3 FOR VALUES FROM (1) TO (maxvalue)
CREATE TABLE tbl4(f1 int REFERENCES tbl3 DEFERRABLE INITIALLY DEFERRED);
INSERT INTO fkpart10.tbl1 VALUES (0), (1);
INSERT INTO fkpart10.tbl2 VALUES (0), (1);
INSERT INTO fkpart10.tbl3 VALUES (-2), (-1), (0);
INSERT INTO fkpart10.tbl4 VALUES (-2), (-1);
BEGIN;
DELETE FROM fkpart10.tbl1 WHERE f1 = 0;
UPDATE fkpart10.tbl1 SET f1 = 2 WHERE f1 = 1;
INSERT INTO fkpart10.tbl1 VALUES (0), (1);
COMMIT;
-- test that cross-partition updates correctly enforces the foreign key
-- restriction (specifically testing INITIAILLY DEFERRED)
BEGIN;
UPDATE fkpart10.tbl1 SET f1 = 3 WHERE f1 = 0;
UPDATE fkpart10.tbl3 SET f1 = f1 * -1;
INSERT INTO fkpart10.tbl1 VALUES (4);
COMMIT;
BEGIN;
UPDATE fkpart10.tbl3 SET f1 = f1 * -1;
UPDATE fkpart10.tbl3 SET f1 = f1 + 3;
UPDATE fkpart10.tbl1 SET f1 = 3 WHERE f1 = 0;
INSERT INTO fkpart10.tbl1 VALUES (0);
COMMIT;
BEGIN;
UPDATE fkpart10.tbl3 SET f1 = f1 * -1;
UPDATE fkpart10.tbl1 SET f1 = 3 WHERE f1 = 0;
INSERT INTO fkpart10.tbl1 VALUES (0);
INSERT INTO fkpart10.tbl3 VALUES (-2), (-1);
COMMIT;
-- test where the updated table now has both an IMMEDIATE and a DEFERRED
-- constraint pointing into it
CREATE TABLE fkpart10.tbl5(f1 int REFERENCES fkpart10.tbl3);
INSERT INTO fkpart10.tbl5 VALUES (-2), (-1);
BEGIN;
UPDATE fkpart10.tbl3 SET f1 = f1 * -3;
COMMIT;
-- Now test where the row referenced from the table with an IMMEDIATE
-- constraint stays in place, while those referenced from the table with a
-- DEFERRED constraint don't.
DELETE FROM fkpart10.tbl5;
INSERT INTO fkpart10.tbl5 VALUES (0);
BEGIN;
UPDATE fkpart10.tbl3 SET f1 = f1 * -3;
COMMIT;
DROP SCHEMA fkpart10 CASCADE;
-- verify foreign keys are enforced during cross-partition updates,
-- especially on the PK side
CREATE SCHEMA fkpart11
CREATE TABLE pk (a INT PRIMARY KEY, b text) PARTITION BY LIST (a)
CREATE TABLE fk (
a INT,
CONSTRAINT fkey FOREIGN KEY (a) REFERENCES pk(a) ON UPDATE CASCADE ON DELETE CASCADE
)
CREATE TABLE fk_parted (
a INT PRIMARY KEY,
CONSTRAINT fkey FOREIGN KEY (a) REFERENCES pk(a) ON UPDATE CASCADE ON DELETE CASCADE
) PARTITION BY LIST (a)
CREATE TABLE fk_another (
a INT,
CONSTRAINT fkey FOREIGN KEY (a) REFERENCES fk_parted (a) ON UPDATE CASCADE ON DELETE CASCADE
)
CREATE TABLE pk1 PARTITION OF pk FOR VALUES IN (1, 2) PARTITION BY LIST (a)
CREATE TABLE pk2 PARTITION OF pk FOR VALUES IN (3)
CREATE TABLE pk3 PARTITION OF pk FOR VALUES IN (4)
CREATE TABLE fk1 PARTITION OF fk_parted FOR VALUES IN (1, 2)
CREATE TABLE fk2 PARTITION OF fk_parted FOR VALUES IN (3)
CREATE TABLE fk3 PARTITION OF fk_parted FOR VALUES IN (4);
CREATE TABLE fkpart11.pk11 (b text, a int NOT NULL);
ALTER TABLE fkpart11.pk1 ATTACH PARTITION fkpart11.pk11 FOR VALUES IN (1);
CREATE TABLE fkpart11.pk12 (b text, c int, a int NOT NULL);
ALTER TABLE fkpart11.pk12 DROP c;
ALTER TABLE fkpart11.pk1 ATTACH PARTITION fkpart11.pk12 FOR VALUES IN (2);
INSERT INTO fkpart11.pk VALUES (1, 'xxx'), (3, 'yyy');
INSERT INTO fkpart11.fk VALUES (1), (3);
INSERT INTO fkpart11.fk_parted VALUES (1), (3);
INSERT INTO fkpart11.fk_another VALUES (1), (3);
-- moves 2 rows from one leaf partition to another, with both updates being
-- cascaded to fk and fk_parted. Updates of fk_parted, of which one is
-- cross-partition (3 -> 4), are further cascaded to fk_another.
UPDATE fkpart11.pk SET a = a + 1 RETURNING tableoid::pg_catalog.regclass, *;
SELECT tableoid::pg_catalog.regclass, * FROM fkpart11.fk;
SELECT tableoid::pg_catalog.regclass, * FROM fkpart11.fk_parted;
SELECT tableoid::pg_catalog.regclass, * FROM fkpart11.fk_another;
-- let's try with the foreign key pointing at tables in the partition tree
-- that are not the same as the query's target table
-- 1. foreign key pointing into a non-root ancestor
--
-- A cross-partition update on the root table will fail, because we currently
-- can't enforce the foreign keys pointing into a non-leaf partition
ALTER TABLE fkpart11.fk DROP CONSTRAINT fkey;
DELETE FROM fkpart11.fk WHERE a = 4;
ALTER TABLE fkpart11.fk ADD CONSTRAINT fkey FOREIGN KEY (a) REFERENCES fkpart11.pk1 (a) ON UPDATE CASCADE ON DELETE CASCADE;
UPDATE fkpart11.pk SET a = a - 1;
-- it's okay though if the non-leaf partition is updated directly
UPDATE fkpart11.pk1 SET a = a - 1;
SELECT tableoid::pg_catalog.regclass, * FROM fkpart11.pk;
SELECT tableoid::pg_catalog.regclass, * FROM fkpart11.fk;
SELECT tableoid::pg_catalog.regclass, * FROM fkpart11.fk_parted;
SELECT tableoid::pg_catalog.regclass, * FROM fkpart11.fk_another;
-- 2. foreign key pointing into a single leaf partition
--
-- A cross-partition update that deletes from the pointed-to leaf partition
-- is allowed to succeed
ALTER TABLE fkpart11.fk DROP CONSTRAINT fkey;
ALTER TABLE fkpart11.fk ADD CONSTRAINT fkey FOREIGN KEY (a) REFERENCES fkpart11.pk11 (a) ON UPDATE CASCADE ON DELETE CASCADE;
-- will delete (1) from p11 which is cascaded to fk
UPDATE fkpart11.pk SET a = a + 1 WHERE a = 1;
SELECT tableoid::pg_catalog.regclass, * FROM fkpart11.fk;
DROP TABLE fkpart11.fk;
-- check that regular and deferrable AR triggers on the PK tables
-- still work as expected
CREATE FUNCTION fkpart11.print_row () RETURNS TRIGGER LANGUAGE plpgsql AS $$
BEGIN
RAISE NOTICE 'TABLE: %, OP: %, OLD: %, NEW: %', TG_RELNAME, TG_OP, OLD, NEW;
RETURN NULL;
END;
$$;
CREATE TRIGGER trig_upd_pk AFTER UPDATE ON fkpart11.pk FOR EACH ROW EXECUTE FUNCTION fkpart11.print_row();
CREATE TRIGGER trig_del_pk AFTER DELETE ON fkpart11.pk FOR EACH ROW EXECUTE FUNCTION fkpart11.print_row();
CREATE TRIGGER trig_ins_pk AFTER INSERT ON fkpart11.pk FOR EACH ROW EXECUTE FUNCTION fkpart11.print_row();
CREATE CONSTRAINT TRIGGER trig_upd_fk_parted AFTER UPDATE ON fkpart11.fk_parted INITIALLY DEFERRED FOR EACH ROW EXECUTE FUNCTION fkpart11.print_row();
CREATE CONSTRAINT TRIGGER trig_del_fk_parted AFTER DELETE ON fkpart11.fk_parted INITIALLY DEFERRED FOR EACH ROW EXECUTE FUNCTION fkpart11.print_row();
CREATE CONSTRAINT TRIGGER trig_ins_fk_parted AFTER INSERT ON fkpart11.fk_parted INITIALLY DEFERRED FOR EACH ROW EXECUTE FUNCTION fkpart11.print_row();
UPDATE fkpart11.pk SET a = 3 WHERE a = 4;
UPDATE fkpart11.pk SET a = 1 WHERE a = 2;
DROP SCHEMA fkpart11 CASCADE;