Don't reuse slots between root and partition in ON CONFLICT ... UPDATE.

Until now the the slot to store the conflicting tuple, and the result of the ON CONFLICT SET, where reused between partitions. That necessitated changing slots descriptor when switching partitions. Besides the overhead of switching descriptors on a slot (which requires memory allocations and prevents JITing), that's importantly also problematic for tableam. There individual partitions might belong to different tableams, needing different kinds of slots. In passing also fix ExecOnConflictUpdate to clear the existing slot at exit. Otherwise that slot could continue to hold a pin till the query ends, which could be far too long if the input data set is large, and there's no further conflicts. While previously also problematic, it's now more important as there will be more such slots when partitioned. Author: Andres Freund Reviewed-By: Robert Haas, David Rowley Discussion: https://postgr.es/m/20180703070645.wchpu5muyto5n647@alap3.anarazel.de
2019-03-06 15:43:33 -08:00 · 2019-03-06 15:43:33 -08:00 · 277cb78983
parent d16a74c20c
commit 277cb78983
3 changed files with 64 additions and 63 deletions
--- a/src/backend/executor/execPartition.c
+++ b/src/backend/executor/execPartition.c
@ -723,28 +723,55 @@ ExecInitPartitionInfo(ModifyTableState *mtstate, EState *estate,
 		if (node->onConflictAction == ONCONFLICT_UPDATE)
 		{
 			TupleConversionMap *map;
+			TupleDesc	leaf_desc;

 			map = leaf_part_rri->ri_PartitionInfo->pi_RootToPartitionMap;
+			leaf_desc = RelationGetDescr(leaf_part_rri->ri_RelationDesc);

 			Assert(node->onConflictSet != NIL);
 			Assert(rootResultRelInfo->ri_onConflict != NULL);

+			leaf_part_rri->ri_onConflict = makeNode(OnConflictSetState);
+
+			/*
+			 * Need a separate existing slot for each partition, as the
+			 * partition could be of a different AM, even if the tuple
+			 * descriptors match.
+			 */
+			leaf_part_rri->ri_onConflict->oc_Existing =
+				ExecInitExtraTupleSlot(mtstate->ps.state,
+									   leaf_desc,
+									   &TTSOpsBufferHeapTuple);
+
 			/*
 			 * If the partition's tuple descriptor matches exactly the root
-			 * parent (the common case), we can simply re-use the parent's ON
+			 * parent (the common case), we can re-use most of the parent's ON
 			 * CONFLICT SET state, skipping a bunch of work.  Otherwise, we
 			 * need to create state specific to this partition.
 			 */
 			if (map == NULL)
-				leaf_part_rri->ri_onConflict = rootResultRelInfo->ri_onConflict;
+			{
+				/*
+				 * It's safe to reuse these from the partition root, as we
+				 * only process one tuple at a time (therefore we won't
+				 * overwrite needed data in slots), and the results of
+				 * projections are independent of the underlying
+				 * storage. Projections and where clauses themselves don't
+				 * store state / are independent of the underlying storage.
+				 */
+				leaf_part_rri->ri_onConflict->oc_ProjSlot =
+					rootResultRelInfo->ri_onConflict->oc_ProjSlot;
+				leaf_part_rri->ri_onConflict->oc_ProjInfo =
+					rootResultRelInfo->ri_onConflict->oc_ProjInfo;
+				leaf_part_rri->ri_onConflict->oc_WhereClause =
+					rootResultRelInfo->ri_onConflict->oc_WhereClause;
+			}
 			else
 			{
 				List	   *onconflset;
 				TupleDesc	tupDesc;
 				bool		found_whole_row;

-				leaf_part_rri->ri_onConflict = makeNode(OnConflictSetState);
-
 				/*
 				 * Translate expressions in onConflictSet to account for
 				 * different attribute numbers.  For that, map partition
@ -778,20 +805,17 @@ ExecInitPartitionInfo(ModifyTableState *mtstate, EState *estate,
 				/* Finally, adjust this tlist to match the partition. */
 				onconflset = adjust_partition_tlist(onconflset, map);

-				/*
-				 * Build UPDATE SET's projection info.  The user of this
-				 * projection is responsible for setting the slot's tupdesc!
-				 * We set aside a tupdesc that's good for the common case of a
-				 * partition that's tupdesc-equal to the partitioned table;
-				 * partitions of different tupdescs must generate their own.
-				 */
+				/* create the tuple slot for the UPDATE SET projection */
 				tupDesc = ExecTypeFromTL(onconflset);
-				ExecSetSlotDescriptor(mtstate->mt_conflproj, tupDesc);
+				leaf_part_rri->ri_onConflict->oc_ProjSlot =
+					ExecInitExtraTupleSlot(mtstate->ps.state, tupDesc,
+										   &TTSOpsVirtual);
+
+				/* build UPDATE SET projection state */
 				leaf_part_rri->ri_onConflict->oc_ProjInfo =
 					ExecBuildProjectionInfo(onconflset, econtext,
-											mtstate->mt_conflproj,
+											leaf_part_rri->ri_onConflict->oc_ProjSlot,
 											&mtstate->ps, partrelDesc);
-				leaf_part_rri->ri_onConflict->oc_ProjTupdesc = tupDesc;

 				/*
 				 * If there is a WHERE clause, initialize state where it will
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@ -1304,6 +1304,7 @@ ExecOnConflictUpdate(ModifyTableState *mtstate,
 	ExprContext *econtext = mtstate->ps.ps_ExprContext;
 	Relation	relation = resultRelInfo->ri_RelationDesc;
 	ExprState  *onConflictSetWhere = resultRelInfo->ri_onConflict->oc_WhereClause;
+	TupleTableSlot *existing = resultRelInfo->ri_onConflict->oc_Existing;
 	HeapTupleData tuple;
 	HeapUpdateFailureData hufd;
 	LockTupleMode lockmode;
@ -1413,7 +1414,7 @@ ExecOnConflictUpdate(ModifyTableState *mtstate,
 	ExecCheckHeapTupleVisible(estate, &tuple, buffer);

 	/* Store target's existing tuple in the state's dedicated slot */
-	ExecStoreBufferHeapTuple(&tuple, mtstate->mt_existing, buffer);
+	ExecStorePinnedBufferHeapTuple(&tuple, existing, buffer);

 	/*
 	 * Make tuple and any needed join variables available to ExecQual and
@ -1422,13 +1423,13 @@ ExecOnConflictUpdate(ModifyTableState *mtstate,
 	 * has been made to reference INNER_VAR in setrefs.c, but there is no
 	 * other redirection.
 	 */
-	econtext->ecxt_scantuple = mtstate->mt_existing;
+	econtext->ecxt_scantuple = existing;
 	econtext->ecxt_innertuple = excludedSlot;
 	econtext->ecxt_outertuple = NULL;

 	if (!ExecQual(onConflictSetWhere, econtext))
 	{
-		ReleaseBuffer(buffer);
+		ExecClearTuple(existing);	/* see return below */
 		InstrCountFiltered1(&mtstate->ps, 1);
 		return true;			/* done with the tuple */
 	}
@ -1451,7 +1452,7 @@ ExecOnConflictUpdate(ModifyTableState *mtstate,
 		 * INSERT or UPDATE path.
 		 */
 		ExecWithCheckOptions(WCO_RLS_CONFLICT_CHECK, resultRelInfo,
-							 mtstate->mt_existing,
+							 existing,
 							 mtstate->ps.state);
 	}

@ -1469,11 +1470,17 @@ ExecOnConflictUpdate(ModifyTableState *mtstate,

 	/* Execute UPDATE with projection */
 	*returning = ExecUpdate(mtstate, &tuple.t_self, NULL,
-							mtstate->mt_conflproj, planSlot,
+							resultRelInfo->ri_onConflict->oc_ProjSlot,
+							planSlot,
 							&mtstate->mt_epqstate, mtstate->ps.state,
 							canSetTag);

-	ReleaseBuffer(buffer);
+	/*
+	 * Clear out existing tuple, as there might not be another conflict among
+	 * the next input rows. Don't want to hold resources till the end of the
+	 * query.
+	 */
+	ExecClearTuple(existing);
 	return true;
 }

@ -1633,7 +1640,6 @@ ExecPrepareTupleRouting(ModifyTableState *mtstate,
 						ResultRelInfo *targetRelInfo,
 						TupleTableSlot *slot)
 {
-	ModifyTable *node;
 	ResultRelInfo *partrel;
 	PartitionRoutingInfo *partrouteinfo;
 	TupleConversionMap *map;
@ -1698,19 +1704,6 @@ ExecPrepareTupleRouting(ModifyTableState *mtstate,
 		slot = execute_attr_map_slot(map->attrMap, slot, new_slot);
 	}

-	/* Initialize information needed to handle ON CONFLICT DO UPDATE. */
-	Assert(mtstate != NULL);
-	node = (ModifyTable *) mtstate->ps.plan;
-	if (node->onConflictAction == ONCONFLICT_UPDATE)
-	{
-		Assert(mtstate->mt_existing != NULL);
-		ExecSetSlotDescriptor(mtstate->mt_existing,
-							  RelationGetDescr(partrel->ri_RelationDesc));
-		Assert(mtstate->mt_conflproj != NULL);
-		ExecSetSlotDescriptor(mtstate->mt_conflproj,
-							  partrel->ri_onConflict->oc_ProjTupdesc);
-	}
-
 	return slot;
 }

@ -2319,43 +2312,28 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
 		econtext = mtstate->ps.ps_ExprContext;
 		relationDesc = resultRelInfo->ri_RelationDesc->rd_att;

-		/*
-		 * Initialize slot for the existing tuple.  If we'll be performing
-		 * tuple routing, the tuple descriptor to use for this will be
-		 * determined based on which relation the update is actually applied
-		 * to, so we don't set its tuple descriptor here.
-		 */
-		mtstate->mt_existing =
-			ExecInitExtraTupleSlot(mtstate->ps.state,
-								   mtstate->mt_partition_tuple_routing ?
-								   NULL : relationDesc, &TTSOpsBufferHeapTuple);
-
 		/* carried forward solely for the benefit of explain */
 		mtstate->mt_excludedtlist = node->exclRelTlist;

 		/* create state for DO UPDATE SET operation */
 		resultRelInfo->ri_onConflict = makeNode(OnConflictSetState);

-		/*
-		 * Create the tuple slot for the UPDATE SET projection.
-		 *
-		 * Just like mt_existing above, we leave it without a tuple descriptor
-		 * in the case of partitioning tuple routing, so that it can be
-		 * changed by ExecPrepareTupleRouting.  In that case, we still save
-		 * the tupdesc in the parent's state: it can be reused by partitions
-		 * with an identical descriptor to the parent.
-		 */
+		/* initialize slot for the existing tuple */
+		resultRelInfo->ri_onConflict->oc_Existing =
+			ExecInitExtraTupleSlot(mtstate->ps.state, relationDesc,
+								   &TTSOpsBufferHeapTuple);
+
+		/* create the tuple slot for the UPDATE SET projection */
 		tupDesc = ExecTypeFromTL((List *) node->onConflictSet);
-		mtstate->mt_conflproj =
-			ExecInitExtraTupleSlot(mtstate->ps.state,
-								   mtstate->mt_partition_tuple_routing ?
-								   NULL : tupDesc, &TTSOpsHeapTuple);
-		resultRelInfo->ri_onConflict->oc_ProjTupdesc = tupDesc;
+		resultRelInfo->ri_onConflict->oc_ProjSlot =
+			ExecInitExtraTupleSlot(mtstate->ps.state, tupDesc,
+								   &TTSOpsVirtual);

 		/* build UPDATE SET projection state */
 		resultRelInfo->ri_onConflict->oc_ProjInfo =
 			ExecBuildProjectionInfo(node->onConflictSet, econtext,
-									mtstate->mt_conflproj, &mtstate->ps,
+									resultRelInfo->ri_onConflict->oc_ProjSlot,
+									&mtstate->ps,
 									relationDesc);

 		/* initialize state to evaluate the WHERE clause, if any */
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@ -377,8 +377,9 @@ typedef struct OnConflictSetState
 {
 	NodeTag		type;

+	TupleTableSlot *oc_Existing;	/* slot to store existing target tuple in */
+	TupleTableSlot *oc_ProjSlot;	/* CONFLICT ... SET ... projection target */
 	ProjectionInfo *oc_ProjInfo;	/* for ON CONFLICT DO UPDATE SET */
-	TupleDesc	oc_ProjTupdesc; /* TupleDesc for the above projection */
 	ExprState  *oc_WhereClause; /* state for the WHERE clause */
 } OnConflictSetState;

@ -1109,9 +1110,7 @@ typedef struct ModifyTableState
 	List	  **mt_arowmarks;	/* per-subplan ExecAuxRowMark lists */
 	EPQState	mt_epqstate;	/* for evaluating EvalPlanQual rechecks */
 	bool		fireBSTriggers; /* do we need to fire stmt triggers? */
-	TupleTableSlot *mt_existing;	/* slot to store existing target tuple in */
 	List	   *mt_excludedtlist;	/* the excluded pseudo relation's tlist  */
-	TupleTableSlot *mt_conflproj;	/* CONFLICT ... SET ... projection target */

 	/*
 	 * Slot for storing tuples in the root partitioned table's rowtype during