From e540b9724822eeacf1033e9fede2162324474e5b Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 5 Sep 2008 21:07:29 +0000 Subject: [PATCH] Fix an oversight in the 8.2 patch that improved mergejoin performance by inserting a materialize node above an inner-side sort node, when the sort is expected to spill to disk. (The materialize protects the sort from having to support mark/restore, allowing it to do its final merge pass on-the-fly.) We neglected to teach cost_mergejoin about that hack, so it was failing to include the materialize's costs in the estimated cost of the mergejoin. The materialize's costs are generally going to be pretty negligible in comparison to the sort's, so this is only a small error and probably not worth back-patching; but it's still wrong. In the similar case where a materialize is inserted to protect an inner-side node that can't do mark/restore at all, it's still true that the materialize should not spill to disk, and so we should cost it cheaply rather than expensively. Noted while thinking about a question from Tom Raney. --- src/backend/optimizer/path/costsize.c | 15 ++++++++- src/backend/optimizer/plan/createplan.c | 5 +-- src/backend/optimizer/util/pathnode.c | 42 +++++++++++++++++++------ 3 files changed, 50 insertions(+), 12 deletions(-) diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 49f81845c9..bbc77db4e2 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -54,7 +54,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.196 2008/08/25 22:42:32 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.197 2008/09/05 21:07:29 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1584,6 +1584,19 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo) * innerstartsel * rescanratio; run_cost += (sort_path.total_cost - sort_path.startup_cost) * (innerendsel - innerstartsel) * rescanratio; + + /* + * If the inner sort is expected to spill to disk, we want to add a + * materialize node to shield it from the need to handle mark/restore. + * This will allow it to perform the last merge pass on-the-fly, while + * in most cases not requiring the materialize to spill to disk. + * Charge an extra cpu_tuple_cost per tuple to account for the + * materialize node. (Keep this estimate in sync with similar ones in + * create_mergejoin_path and create_mergejoin_plan.) + */ + if (relation_byte_size(inner_path_rows, inner_path->parent->width) > + (work_mem * 1024L)) + run_cost += cpu_tuple_cost * inner_path_rows; } else { diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 1a79c96dba..1195024b5f 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -10,7 +10,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.247 2008/08/28 23:09:46 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.248 2008/09/05 21:07:29 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1545,7 +1545,8 @@ create_mergejoin_plan(PlannerInfo *root, /* * We assume the materialize will not spill to disk, and therefore - * charge just cpu_tuple_cost per tuple. + * charge just cpu_tuple_cost per tuple. (Keep this estimate in sync + * with similar ones in cost_mergejoin and create_mergejoin_path.) */ copy_plan_costsize(matplan, inner_plan); matplan->total_cost += cpu_tuple_cost * matplan->plan_rows; diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 5996af2b5d..8e32895fb2 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.146 2008/08/14 18:47:59 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.147 2008/09/05 21:07:29 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1304,19 +1304,43 @@ create_mergejoin_path(PlannerInfo *root, /* * If we are not sorting the inner path, we may need a materialize node to - * ensure it can be marked/restored. (Sort does support mark/restore, so - * no materialize is needed in that case.) + * ensure it can be marked/restored. * * Since the inner side must be ordered, and only Sorts and IndexScans can - * create order to begin with, you might think there's no problem --- but - * you'd be wrong. Nestloop and merge joins can *preserve* the order of - * their inputs, so they can be selected as the input of a mergejoin, and - * they don't support mark/restore at present. + * create order to begin with, and they both support mark/restore, you + * might think there's no problem --- but you'd be wrong. Nestloop and + * merge joins can *preserve* the order of their inputs, so they can be + * selected as the input of a mergejoin, and they don't support + * mark/restore at present. + * + * Note: Sort supports mark/restore, so no materialize is really needed + * in that case; but one may be desirable anyway to optimize the sort. + * However, since we aren't representing the sort step separately in + * the Path tree, we can't explicitly represent the materialize either. + * So that case is not handled here. Instead, cost_mergejoin has to + * factor in the cost and create_mergejoin_plan has to add the plan node. */ if (innersortkeys == NIL && !ExecSupportsMarkRestore(inner_path->pathtype)) - inner_path = (Path *) - create_material_path(inner_path->parent, inner_path); + { + Path *mpath; + + mpath = (Path *) create_material_path(inner_path->parent, inner_path); + + /* + * We expect the materialize won't spill to disk (it could only do + * so if there were a whole lot of duplicate tuples, which is a case + * cost_mergejoin will avoid choosing anyway). Therefore + * cost_material's cost estimate is bogus and we should charge + * just cpu_tuple_cost per tuple. (Keep this estimate in sync with + * similar ones in cost_mergejoin and create_mergejoin_plan.) + */ + mpath->startup_cost = inner_path->startup_cost; + mpath->total_cost = inner_path->total_cost; + mpath->total_cost += cpu_tuple_cost * inner_path->parent->rows; + + inner_path = mpath; + } pathnode->jpath.path.pathtype = T_MergeJoin; pathnode->jpath.path.parent = joinrel;