Refactor merge path generation code.

This shouldn't change the set of paths that get generated in any way, but it is preparatory work for further changes to allow a partial path to be merge-joined witih a non-partial path to produce a partial join path. Dilip Kumar, with cosmetic adjustments by me.
2016-12-21 09:44:33 -05:00 · 2016-12-21 09:44:33 -05:00 · 59649c3f1c
parent f3b421da5f
commit 59649c3f1c
1 changed files with 250 additions and 211 deletions
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@ -50,6 +50,15 @@ static List *select_mergejoin_clauses(PlannerInfo *root,
 						 List *restrictlist,
 						 JoinType jointype,
 						 bool *mergejoin_allowed);
+static void generate_mergejoin_paths(PlannerInfo *root,
+						 RelOptInfo *joinrel,
+						 RelOptInfo *innerrel,
+						 Path *outerpath,
+						 JoinType jointype,
+						 JoinPathExtraData *extra,
+						 bool useallclauses,
+						 Path *inner_cheapest_total,
+						 List *merge_pathkeys);


 /*
@ -776,6 +785,241 @@ sort_inner_and_outer(PlannerInfo *root,
 	}
 }

+/*
+ * generate_mergejoin_paths
+ *	Creates possible mergejoin paths for input outerpath.
+ *
+ * We generate mergejoins if mergejoin clauses are available.  We have
+ * two ways to generate the inner path for a mergejoin: sort the cheapest
+ * inner path, or use an inner path that is already suitably ordered for the
+ * merge.  If we have several mergeclauses, it could be that there is no inner
+ * path (or only a very expensive one) for the full list of mergeclauses, but
+ * better paths exist if we truncate the mergeclause list (thereby discarding
+ * some sort key requirements).  So, we consider truncations of the
+ * mergeclause list as well as the full list.  (Ideally we'd consider all
+ * subsets of the mergeclause list, but that seems way too expensive.)
+ */
+static void
+generate_mergejoin_paths(PlannerInfo *root,
+						 RelOptInfo *joinrel,
+						 RelOptInfo *innerrel,
+						 Path *outerpath,
+						 JoinType jointype,
+						 JoinPathExtraData *extra,
+						 bool useallclauses,
+						 Path *inner_cheapest_total,
+						 List *merge_pathkeys)
+{
+	List	   *mergeclauses;
+	List	   *innersortkeys;
+	List	   *trialsortkeys;
+	Path	   *cheapest_startup_inner;
+	Path	   *cheapest_total_inner;
+	JoinType	save_jointype = jointype;
+	int			num_sortkeys;
+	int			sortkeycnt;
+
+	if (jointype == JOIN_UNIQUE_OUTER || jointype == JOIN_UNIQUE_INNER)
+		jointype = JOIN_INNER;
+
+	/* Look for useful mergeclauses (if any) */
+	mergeclauses = find_mergeclauses_for_pathkeys(root,
+												  outerpath->pathkeys,
+												  true,
+												  extra->mergeclause_list);
+
+	/*
+	 * Done with this outer path if no chance for a mergejoin.
+	 *
+	 * Special corner case: for "x FULL JOIN y ON true", there will be no join
+	 * clauses at all.  Ordinarily we'd generate a clauseless nestloop path,
+	 * but since mergejoin is our only join type that supports FULL JOIN
+	 * without any join clauses, it's necessary to generate a clauseless
+	 * mergejoin path instead.
+	 */
+	if (mergeclauses == NIL)
+	{
+		if (jointype == JOIN_FULL)
+			 /* okay to try for mergejoin */ ;
+		else
+			return;
+	}
+	if (useallclauses &&
+		list_length(mergeclauses) != list_length(extra->mergeclause_list))
+		return;
+
+	/* Compute the required ordering of the inner path */
+	innersortkeys = make_inner_pathkeys_for_merge(root,
+												  mergeclauses,
+												  outerpath->pathkeys);
+
+	/*
+	 * Generate a mergejoin on the basis of sorting the cheapest inner. Since
+	 * a sort will be needed, only cheapest total cost matters. (But
+	 * try_mergejoin_path will do the right thing if inner_cheapest_total is
+	 * already correctly sorted.)
+	 */
+	try_mergejoin_path(root,
+					   joinrel,
+					   outerpath,
+					   inner_cheapest_total,
+					   merge_pathkeys,
+					   mergeclauses,
+					   NIL,
+					   innersortkeys,
+					   jointype,
+					   extra);
+
+	/* Can't do anything else if inner path needs to be unique'd */
+	if (save_jointype == JOIN_UNIQUE_INNER)
+		return;
+
+	/*
+	 * Look for presorted inner paths that satisfy the innersortkey list ---
+	 * or any truncation thereof, if we are allowed to build a mergejoin using
+	 * a subset of the merge clauses.  Here, we consider both cheap startup
+	 * cost and cheap total cost.
+	 *
+	 * Currently we do not consider parameterized inner paths here. This
+	 * interacts with decisions elsewhere that also discriminate against
+	 * mergejoins with parameterized inputs; see comments in
+	 * src/backend/optimizer/README.
+	 *
+	 * As we shorten the sortkey list, we should consider only paths that are
+	 * strictly cheaper than (in particular, not the same as) any path found
+	 * in an earlier iteration.  Otherwise we'd be intentionally using fewer
+	 * merge keys than a given path allows (treating the rest as plain
+	 * joinquals), which is unlikely to be a good idea.  Also, eliminating
+	 * paths here on the basis of compare_path_costs is a lot cheaper than
+	 * building the mergejoin path only to throw it away.
+	 *
+	 * If inner_cheapest_total is well enough sorted to have not required a
+	 * sort in the path made above, we shouldn't make a duplicate path with
+	 * it, either.  We handle that case with the same logic that handles the
+	 * previous consideration, by initializing the variables that track
+	 * cheapest-so-far properly.  Note that we do NOT reject
+	 * inner_cheapest_total if we find it matches some shorter set of
+	 * pathkeys.  That case corresponds to using fewer mergekeys to avoid
+	 * sorting inner_cheapest_total, whereas we did sort it above, so the
+	 * plans being considered are different.
+	 */
+	if (pathkeys_contained_in(innersortkeys,
+							  inner_cheapest_total->pathkeys))
+	{
+		/* inner_cheapest_total didn't require a sort */
+		cheapest_startup_inner = inner_cheapest_total;
+		cheapest_total_inner = inner_cheapest_total;
+	}
+	else
+	{
+		/* it did require a sort, at least for the full set of keys */
+		cheapest_startup_inner = NULL;
+		cheapest_total_inner = NULL;
+	}
+	num_sortkeys = list_length(innersortkeys);
+	if (num_sortkeys > 1 && !useallclauses)
+		trialsortkeys = list_copy(innersortkeys);		/* need modifiable copy */
+	else
+		trialsortkeys = innersortkeys;	/* won't really truncate */
+
+	for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--)
+	{
+		Path	   *innerpath;
+		List	   *newclauses = NIL;
+
+		/*
+		 * Look for an inner path ordered well enough for the first
+		 * 'sortkeycnt' innersortkeys.  NB: trialsortkeys list is modified
+		 * destructively, which is why we made a copy...
+		 */
+		trialsortkeys = list_truncate(trialsortkeys, sortkeycnt);
+		innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
+												   trialsortkeys,
+												   NULL,
+												   TOTAL_COST);
+		if (innerpath != NULL &&
+			(cheapest_total_inner == NULL ||
+			 compare_path_costs(innerpath, cheapest_total_inner,
+								TOTAL_COST) < 0))
+		{
+			/* Found a cheap (or even-cheaper) sorted path */
+			/* Select the right mergeclauses, if we didn't already */
+			if (sortkeycnt < num_sortkeys)
+			{
+				newclauses =
+					find_mergeclauses_for_pathkeys(root,
+												   trialsortkeys,
+												   false,
+												   mergeclauses);
+				Assert(newclauses != NIL);
+			}
+			else
+				newclauses = mergeclauses;
+			try_mergejoin_path(root,
+							   joinrel,
+							   outerpath,
+							   innerpath,
+							   merge_pathkeys,
+							   newclauses,
+							   NIL,
+							   NIL,
+							   jointype,
+							   extra);
+			cheapest_total_inner = innerpath;
+		}
+		/* Same on the basis of cheapest startup cost ... */
+		innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
+												   trialsortkeys,
+												   NULL,
+												   STARTUP_COST);
+		if (innerpath != NULL &&
+			(cheapest_startup_inner == NULL ||
+			 compare_path_costs(innerpath, cheapest_startup_inner,
+								STARTUP_COST) < 0))
+		{
+			/* Found a cheap (or even-cheaper) sorted path */
+			if (innerpath != cheapest_total_inner)
+			{
+				/*
+				 * Avoid rebuilding clause list if we already made one; saves
+				 * memory in big join trees...
+				 */
+				if (newclauses == NIL)
+				{
+					if (sortkeycnt < num_sortkeys)
+					{
+						newclauses =
+							find_mergeclauses_for_pathkeys(root,
+														   trialsortkeys,
+														   false,
+														   mergeclauses);
+						Assert(newclauses != NIL);
+					}
+					else
+						newclauses = mergeclauses;
+				}
+				try_mergejoin_path(root,
+								   joinrel,
+								   outerpath,
+								   innerpath,
+								   merge_pathkeys,
+								   newclauses,
+								   NIL,
+								   NIL,
+								   jointype,
+								   extra);
+			}
+			cheapest_startup_inner = innerpath;
+		}
+
+		/*
+		 * Don't consider truncated sortkeys if we need all clauses.
+		 */
+		if (useallclauses)
+			break;
+	}
+}
+
 /*
 * match_unsorted_outer
 *	  Creates possible join paths for processing a single join relation
@ -790,15 +1034,8 @@ sort_inner_and_outer(PlannerInfo *root,
 * cheapest-total inner-indexscan path (if any), and one on the
 * cheapest-startup inner-indexscan path (if different).
 *
- * We also consider mergejoins if mergejoin clauses are available.  We have
- * two ways to generate the inner path for a mergejoin: sort the cheapest
- * inner path, or use an inner path that is already suitably ordered for the
- * merge.  If we have several mergeclauses, it could be that there is no inner
- * path (or only a very expensive one) for the full list of mergeclauses, but
- * better paths exist if we truncate the mergeclause list (thereby discarding
- * some sort key requirements).  So, we consider truncations of the
- * mergeclause list as well as the full list.  (Ideally we'd consider all
- * subsets of the mergeclause list, but that seems way too expensive.)
+ * We also consider mergejoins if mergejoin clauses are available.  See
+ * detailed comments in generate_mergejoin_paths.
 *
 * 'joinrel' is the join relation
 * 'outerrel' is the outer join relation
@ -894,13 +1131,6 @@ match_unsorted_outer(PlannerInfo *root,
 	{
 		Path	   *outerpath = (Path *) lfirst(lc1);
 		List	   *merge_pathkeys;
-		List	   *mergeclauses;
-		List	   *innersortkeys;
-		List	   *trialsortkeys;
-		Path	   *cheapest_startup_inner;
-		Path	   *cheapest_total_inner;
-		int			num_sortkeys;
-		int			sortkeycnt;

 		/*
 		 * We cannot use an outer path that is parameterized by the inner rel.
@ -986,201 +1216,10 @@ match_unsorted_outer(PlannerInfo *root,
 		if (inner_cheapest_total == NULL)
 			continue;

-		/* Look for useful mergeclauses (if any) */
-		mergeclauses = find_mergeclauses_for_pathkeys(root,
-													  outerpath->pathkeys,
-													  true,
-													extra->mergeclause_list);
-
-		/*
-		 * Done with this outer path if no chance for a mergejoin.
-		 *
-		 * Special corner case: for "x FULL JOIN y ON true", there will be no
-		 * join clauses at all.  Ordinarily we'd generate a clauseless
-		 * nestloop path, but since mergejoin is our only join type that
-		 * supports FULL JOIN without any join clauses, it's necessary to
-		 * generate a clauseless mergejoin path instead.
-		 */
-		if (mergeclauses == NIL)
-		{
-			if (jointype == JOIN_FULL)
-				 /* okay to try for mergejoin */ ;
-			else
-				continue;
-		}
-		if (useallclauses && list_length(mergeclauses) != list_length(extra->mergeclause_list))
-			continue;
-
-		/* Compute the required ordering of the inner path */
-		innersortkeys = make_inner_pathkeys_for_merge(root,
-													  mergeclauses,
-													  outerpath->pathkeys);
-
-		/*
-		 * Generate a mergejoin on the basis of sorting the cheapest inner.
-		 * Since a sort will be needed, only cheapest total cost matters. (But
-		 * try_mergejoin_path will do the right thing if inner_cheapest_total
-		 * is already correctly sorted.)
-		 */
-		try_mergejoin_path(root,
-						   joinrel,
-						   outerpath,
-						   inner_cheapest_total,
-						   merge_pathkeys,
-						   mergeclauses,
-						   NIL,
-						   innersortkeys,
-						   jointype,
-						   extra);
-
-		/* Can't do anything else if inner path needs to be unique'd */
-		if (save_jointype == JOIN_UNIQUE_INNER)
-			continue;
-
-		/*
-		 * Look for presorted inner paths that satisfy the innersortkey list
-		 * --- or any truncation thereof, if we are allowed to build a
-		 * mergejoin using a subset of the merge clauses.  Here, we consider
-		 * both cheap startup cost and cheap total cost.
-		 *
-		 * Currently we do not consider parameterized inner paths here. This
-		 * interacts with decisions elsewhere that also discriminate against
-		 * mergejoins with parameterized inputs; see comments in
-		 * src/backend/optimizer/README.
-		 *
-		 * As we shorten the sortkey list, we should consider only paths that
-		 * are strictly cheaper than (in particular, not the same as) any path
-		 * found in an earlier iteration.  Otherwise we'd be intentionally
-		 * using fewer merge keys than a given path allows (treating the rest
-		 * as plain joinquals), which is unlikely to be a good idea.  Also,
-		 * eliminating paths here on the basis of compare_path_costs is a lot
-		 * cheaper than building the mergejoin path only to throw it away.
-		 *
-		 * If inner_cheapest_total is well enough sorted to have not required
-		 * a sort in the path made above, we shouldn't make a duplicate path
-		 * with it, either.  We handle that case with the same logic that
-		 * handles the previous consideration, by initializing the variables
-		 * that track cheapest-so-far properly.  Note that we do NOT reject
-		 * inner_cheapest_total if we find it matches some shorter set of
-		 * pathkeys.  That case corresponds to using fewer mergekeys to avoid
-		 * sorting inner_cheapest_total, whereas we did sort it above, so the
-		 * plans being considered are different.
-		 */
-		if (pathkeys_contained_in(innersortkeys,
-								  inner_cheapest_total->pathkeys))
-		{
-			/* inner_cheapest_total didn't require a sort */
-			cheapest_startup_inner = inner_cheapest_total;
-			cheapest_total_inner = inner_cheapest_total;
-		}
-		else
-		{
-			/* it did require a sort, at least for the full set of keys */
-			cheapest_startup_inner = NULL;
-			cheapest_total_inner = NULL;
-		}
-		num_sortkeys = list_length(innersortkeys);
-		if (num_sortkeys > 1 && !useallclauses)
-			trialsortkeys = list_copy(innersortkeys);	/* need modifiable copy */
-		else
-			trialsortkeys = innersortkeys;		/* won't really truncate */
-
-		for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--)
-		{
-			Path	   *innerpath;
-			List	   *newclauses = NIL;
-
-			/*
-			 * Look for an inner path ordered well enough for the first
-			 * 'sortkeycnt' innersortkeys.  NB: trialsortkeys list is modified
-			 * destructively, which is why we made a copy...
-			 */
-			trialsortkeys = list_truncate(trialsortkeys, sortkeycnt);
-			innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
-													   trialsortkeys,
-													   NULL,
-													   TOTAL_COST);
-			if (innerpath != NULL &&
-				(cheapest_total_inner == NULL ||
-				 compare_path_costs(innerpath, cheapest_total_inner,
-									TOTAL_COST) < 0))
-			{
-				/* Found a cheap (or even-cheaper) sorted path */
-				/* Select the right mergeclauses, if we didn't already */
-				if (sortkeycnt < num_sortkeys)
-				{
-					newclauses =
-						find_mergeclauses_for_pathkeys(root,
-													   trialsortkeys,
-													   false,
-													   mergeclauses);
-					Assert(newclauses != NIL);
-				}
-				else
-					newclauses = mergeclauses;
-				try_mergejoin_path(root,
-								   joinrel,
-								   outerpath,
-								   innerpath,
-								   merge_pathkeys,
-								   newclauses,
-								   NIL,
-								   NIL,
-								   jointype,
-								   extra);
-				cheapest_total_inner = innerpath;
-			}
-			/* Same on the basis of cheapest startup cost ... */
-			innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
-													   trialsortkeys,
-													   NULL,
-													   STARTUP_COST);
-			if (innerpath != NULL &&
-				(cheapest_startup_inner == NULL ||
-				 compare_path_costs(innerpath, cheapest_startup_inner,
-									STARTUP_COST) < 0))
-			{
-				/* Found a cheap (or even-cheaper) sorted path */
-				if (innerpath != cheapest_total_inner)
-				{
-					/*
-					 * Avoid rebuilding clause list if we already made one;
-					 * saves memory in big join trees...
-					 */
-					if (newclauses == NIL)
-					{
-						if (sortkeycnt < num_sortkeys)
-						{
-							newclauses =
-								find_mergeclauses_for_pathkeys(root,
-															   trialsortkeys,
-															   false,
-															   mergeclauses);
-							Assert(newclauses != NIL);
-						}
-						else
-							newclauses = mergeclauses;
-					}
-					try_mergejoin_path(root,
-									   joinrel,
-									   outerpath,
-									   innerpath,
-									   merge_pathkeys,
-									   newclauses,
-									   NIL,
-									   NIL,
-									   jointype,
-									   extra);
-				}
-				cheapest_startup_inner = innerpath;
-			}
-
-			/*
-			 * Don't consider truncated sortkeys if we need all clauses.
-			 */
-			if (useallclauses)
-				break;
-		}
+		/* Generate merge join paths */
+		generate_mergejoin_paths(root, joinrel, innerrel, outerpath,
+								 save_jointype, extra, useallclauses,
+								 inner_cheapest_total, merge_pathkeys);
 	}

 	/*