Refactor addition of PlaceHolderVars to joinrel targetlists.

Make build_joinrel_tlist() responsible for adding PHVs that were already computed in one or the other input relation, and therefore change add_placeholders_to_joinrel() to only add PHVs that will be newly computed in this joinrel's output. This makes the handling of PHVs in build_joinrel_tlist() more like its handling of plain Vars, which seems like a good thing on intelligibility grounds and will simplify planned future changes. There is a purely cosmetic side-effect that the order of entries in the joinrel's tlist may change; but since it becomes more like the order of entries in the input tlists, that's not bad. The reason it wasn't done like this originally was the potential cost of looking up PlaceHolderInfo entries to consult ph_needed. Now that that's O(1) it shouldn't hurt. Discussion: https://postgr.es/m/1405792.1660677844@sss.pgh.pa.us
2022-08-17 16:12:23 -04:00 · 2022-08-17 16:12:23 -04:00 · afa0ec30bf
parent b3ff6c742f
commit afa0ec30bf
3 changed files with 36 additions and 19 deletions
--- a/src/backend/optimizer/util/placeholder.c
+++ b/src/backend/optimizer/util/placeholder.c
@ -427,14 +427,16 @@ add_placeholders_to_base_rels(PlannerInfo *root)

 /*
 * add_placeholders_to_joinrel
- *		Add any required PlaceHolderVars to a join rel's targetlist;
- *		and if they contain lateral references, add those references to the
- *		joinrel's direct_lateral_relids.
+ *		Add any newly-computable PlaceHolderVars to a join rel's targetlist;
+ *		and if computable PHVs contain lateral references, add those
+ *		references to the joinrel's direct_lateral_relids.
 *
 * A join rel should emit a PlaceHolderVar if (a) the PHV can be computed
 * at or below this join level and (b) the PHV is needed above this level.
- * However, condition (a) is sufficient to add to direct_lateral_relids,
- * as explained below.
+ * Our caller build_join_rel() has already added any PHVs that were computed
+ * in either join input rel, so we need add only newly-computable ones to
+ * the targetlist.  However, direct_lateral_relids must be updated for every
+ * PHV computable at or below this join, as explained below.
 */
 void
 add_placeholders_to_joinrel(PlannerInfo *root, RelOptInfo *joinrel,
@ -453,13 +455,10 @@ add_placeholders_to_joinrel(PlannerInfo *root, RelOptInfo *joinrel,
 			/* Is it still needed above this joinrel? */
 			if (bms_nonempty_difference(phinfo->ph_needed, relids))
 			{
-				/* Yup, add it to the output */
-				joinrel->reltarget->exprs = lappend(joinrel->reltarget->exprs,
-													phinfo->ph_var);
-				joinrel->reltarget->width += phinfo->ph_width;
-
 				/*
-				 * Charge the cost of evaluating the contained expression if
+				 * Yes, but only add to tlist if it wasn't computed in either
+				 * input; otherwise it should be there already.  Also, we
+				 * charge the cost of evaluating the contained expression if
 				 * the PHV can be computed here but not in either input.  This
 				 * is a bit bogus because we make the decision based on the
 				 * first pair of possible input relations considered for the
@ -472,12 +471,15 @@ add_placeholders_to_joinrel(PlannerInfo *root, RelOptInfo *joinrel,
 				if (!bms_is_subset(phinfo->ph_eval_at, outer_rel->relids) &&
 					!bms_is_subset(phinfo->ph_eval_at, inner_rel->relids))
 				{
+					PlaceHolderVar *phv = phinfo->ph_var;
 					QualCost	cost;

-					cost_qual_eval_node(&cost, (Node *) phinfo->ph_var->phexpr,
-										root);
+					joinrel->reltarget->exprs = lappend(joinrel->reltarget->exprs,
+														phv);
+					cost_qual_eval_node(&cost, (Node *) phv->phexpr, root);
 					joinrel->reltarget->cost.startup += cost.startup;
 					joinrel->reltarget->cost.per_tuple += cost.per_tuple;
+					joinrel->reltarget->width += phinfo->ph_width;
 				}
 			}

--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@ -679,8 +679,9 @@ build_join_rel(PlannerInfo *root,
 	set_foreign_rel_properties(joinrel, outer_rel, inner_rel);

 	/*
-	 * Create a new tlist containing just the vars that need to be output from
-	 * this join (ie, are needed for higher joinclauses or final output).
+	 * Fill the joinrel's tlist with just the Vars and PHVs that need to be
+	 * output from this join (ie, are needed for higher joinclauses or final
+	 * output).
 	 *
 	 * NOTE: the tlist order for a join rel will depend on which pair of outer
 	 * and inner rels we first try to build it from.  But the contents should
@ -966,6 +967,7 @@ min_join_parameterization(PlannerInfo *root,
 * The join's targetlist includes all Vars of its member relations that
 * will still be needed above the join.  This subroutine adds all such
 * Vars from the specified input rel's tlist to the join rel's tlist.
+ * Likewise for any PlaceHolderVars emitted by the input rel.
 *
 * We also compute the expected width of the join's output, making use
 * of data that was cached at the baserel level by set_rel_width().
@ -982,11 +984,24 @@ build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel,
 		Var		   *var = (Var *) lfirst(vars);

 		/*
-		 * Ignore PlaceHolderVars in the input tlists; we'll make our own
-		 * decisions about whether to copy them.
+		 * For a PlaceHolderVar, we have to look up the PlaceHolderInfo.
 		 */
 		if (IsA(var, PlaceHolderVar))
+		{
+			PlaceHolderVar *phv = (PlaceHolderVar *) var;
+			PlaceHolderInfo *phinfo = find_placeholder_info(root, phv);
+
+			/* Is it still needed above this joinrel? */
+			if (bms_nonempty_difference(phinfo->ph_needed, relids))
+			{
+				/* Yup, add it to the output */
+				joinrel->reltarget->exprs = lappend(joinrel->reltarget->exprs,
+													phv);
+				/* Bubbling up the precomputed result has cost zero */
+				joinrel->reltarget->width += phinfo->ph_width;
+			}
 			continue;
+		}

 		/*
 		 * Otherwise, anything in a baserel or joinrel targetlist ought to be
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@ -5793,10 +5793,10 @@ select * from
 Nested Loop
   Output: c.q1, c.q2, a.q1, a.q2, b.q1, (COALESCE(b.q2, '42'::bigint)), d.q1, (COALESCE((COALESCE(b.q2, '42'::bigint)), d.q2)), ((COALESCE((COALESCE(b.q2, '42'::bigint)), d.q2)))
   ->  Hash Right Join
-         Output: c.q1, c.q2, a.q1, a.q2, b.q1, d.q1, (COALESCE(b.q2, '42'::bigint)), (COALESCE((COALESCE(b.q2, '42'::bigint)), d.q2))
+         Output: c.q1, c.q2, a.q1, a.q2, b.q1, (COALESCE(b.q2, '42'::bigint)), d.q1, (COALESCE((COALESCE(b.q2, '42'::bigint)), d.q2))
         Hash Cond: (d.q1 = c.q2)
         ->  Nested Loop
-               Output: a.q1, a.q2, b.q1, d.q1, (COALESCE(b.q2, '42'::bigint)), (COALESCE((COALESCE(b.q2, '42'::bigint)), d.q2))
+               Output: a.q1, a.q2, b.q1, (COALESCE(b.q2, '42'::bigint)), d.q1, (COALESCE((COALESCE(b.q2, '42'::bigint)), d.q2))
               ->  Hash Left Join
                     Output: a.q1, a.q2, b.q1, (COALESCE(b.q2, '42'::bigint))
                     Hash Cond: (a.q2 = b.q1)