Use fuzzy not exact cost comparison for the final tie-breaker in add_path.

Instead of an exact cost comparison, use a fuzzy comparison with 1e-10 delta after all other path metrics have proved equal. This is to avoid having platform-specific roundoff behaviors determine the choice when two paths are really the same to our cost estimators. Adjust the recently-added test case that made it obvious we had a problem here.
2012-04-21 00:51:14 -04:00 · 2012-04-21 00:51:14 -04:00 · 33e99153e9
parent 09ff76fcdb
commit 33e99153e9
2 changed files with 38 additions and 22 deletions
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@ -125,8 +125,11 @@ compare_fractional_path_costs(Path *path1, Path *path2,
 *
 * We use fuzzy comparisons so that add_path() can avoid keeping both of
 * a pair of paths that really have insignificantly different cost.
- * The fuzz factor is 1% of the smaller cost.  (XXX does this percentage
+ *
- * need to be user-configurable?)
+ * The fuzz_factor argument must be 1.0 plus delta, where delta is the
 * fraction of the smaller cost that is considered to be a significant
 * difference.  For example, fuzz_factor = 1.01 makes the fuzziness limit
 * be 1% of the smaller cost.
 *
 * The two paths are said to have "equal" costs if both startup and total
 * costs are fuzzily the same.  Path1 is said to be better than path2 if
@ -138,16 +141,16 @@ compare_fractional_path_costs(Path *path1, Path *path2,
 * dominates the other across the whole performance spectrum.
 */
 static PathCostComparison
-compare_path_costs_fuzzily(Path *path1, Path *path2)
+compare_path_costs_fuzzily(Path *path1, Path *path2, double fuzz_factor)
 {
 	/*
 	 * Check total cost first since it's more likely to be different; many
 	 * paths have zero startup cost.
 	 */
-	if (path1->total_cost > path2->total_cost * 1.01)
+	if (path1->total_cost > path2->total_cost * fuzz_factor)
 	{
 		/* path1 fuzzily worse on total cost */
-		if (path2->startup_cost > path1->startup_cost * 1.01)
+		if (path2->startup_cost > path1->startup_cost * fuzz_factor)
 		{
 			/* ... but path2 fuzzily worse on startup, so DIFFERENT */
 			return COSTS_DIFFERENT;
@ -155,10 +158,10 @@ compare_path_costs_fuzzily(Path *path1, Path *path2)
 		/* else path2 dominates */
 		return COSTS_BETTER2;
 	}
-	if (path2->total_cost > path1->total_cost * 1.01)
+	if (path2->total_cost > path1->total_cost * fuzz_factor)
 	{
 		/* path2 fuzzily worse on total cost */
-		if (path1->startup_cost > path2->startup_cost * 1.01)
+		if (path1->startup_cost > path2->startup_cost * fuzz_factor)
 		{
 			/* ... but path1 fuzzily worse on startup, so DIFFERENT */
 			return COSTS_DIFFERENT;
@ -167,12 +170,12 @@ compare_path_costs_fuzzily(Path *path1, Path *path2)
 		return COSTS_BETTER1;
 	}
 	/* fuzzily the same on total cost */
-	if (path1->startup_cost > path2->startup_cost * 1.01)
+	if (path1->startup_cost > path2->startup_cost * fuzz_factor)
 	{
 		/* ... but path1 fuzzily worse on startup, so path2 wins */
 		return COSTS_BETTER2;
 	}
-	if (path2->startup_cost > path1->startup_cost * 1.01)
+	if (path2->startup_cost > path1->startup_cost * fuzz_factor)
 	{
 		/* ... but path2 fuzzily worse on startup, so path1 wins */
 		return COSTS_BETTER1;
@ -354,7 +357,11 @@ add_path(RelOptInfo *parent_rel, Path *new_path)
 		p1_next = lnext(p1);
-		costcmp = compare_path_costs_fuzzily(new_path, old_path);
+		/*
 		 * Do a fuzzy cost comparison with 1% fuzziness limit.  (XXX does this
 		 * percentage need to be user-configurable?)
 		 */
 		costcmp = compare_path_costs_fuzzily(new_path, old_path, 1.01);
 		/*
 		 * If the two paths compare differently for startup and total cost,
@ -403,15 +410,24 @@ add_path(RelOptInfo *parent_rel, Path *new_path)
 								/*
 								 * Same pathkeys and outer rels, and fuzzily
 								 * the same cost, so keep just one; to decide
-								 * which, first check rows and then do an
+								 * which, first check rows and then do a fuzzy
-								 * exact cost comparison.
+								 * cost comparison with very small fuzz limit.
 								 * (We used to do an exact cost comparison,
 								 * but that results in annoying
 								 * platform-specific plan variations due to
 								 * roundoff in the cost estimates.)  If things
 								 * are still tied, arbitrarily keep only the
 								 * old path.  Notice that we will keep only
 								 * the old path even if the less-fuzzy
 								 * comparison decides the startup and total
 								 * costs compare differently.
 								 */
 								if (new_path->rows < old_path->rows)
 									remove_old = true;	/* new dominates old */
 								else if (new_path->rows > old_path->rows)
 									accept_new = false;	/* old dominates new */
-								else if (compare_path_costs(new_path, old_path,
+								else if (compare_path_costs_fuzzily(new_path, old_path,
-													   TOTAL_COST) < 0)
+																	1.0000000001) == COSTS_BETTER1)
 									remove_old = true;	/* new dominates old */
 								else
 									accept_new = false; /* old equals or dominates new */
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@ -2798,17 +2798,17 @@ select b.unique1 from
   Sort Key: b.unique1
   ->  Nested Loop Left Join
         ->  Seq Scan on int4_tbl i2
-         ->  Nested Loop Left Join
+         ->  Nested Loop
-               Join Filter: (b.unique1 = 42)
+               ->  Seq Scan on int4_tbl i1
-               ->  Nested Loop
+               ->  Nested Loop Left Join
                     Join Filter: (b.unique1 = 42)
                     ->  Nested Loop
                           ->  Seq Scan on int4_tbl i1
                           ->  Index Scan using tenk1_thous_tenthous on tenk1 b
                                 Index Cond: ((thousand = i1.f1) AND (i2.f1 = tenthous))
-                     ->  Index Scan using tenk1_unique1 on tenk1 a
+                           ->  Index Scan using tenk1_unique1 on tenk1 a
-                           Index Cond: (unique1 = b.unique2)
+                                 Index Cond: (unique1 = b.unique2)
-               ->  Index Only Scan using tenk1_thous_tenthous on tenk1 c
+                     ->  Index Only Scan using tenk1_thous_tenthous on tenk1 c
-                     Index Cond: (thousand = a.thousand)
+                           Index Cond: (thousand = a.thousand)
 (15 rows)
 select b.unique1 from