Improve my initial, rather hacky implementation of joins to append

relations: fix the executor so that we can have an Append plan on the
inside of a nestloop and still pass down outer index keys to index scans
within the Append, then generate such plans as if they were regular
inner indexscans.  This avoids the need to evaluate the outer relation
multiple times.
This commit is contained in:
Tom Lane 2006-02-05 02:59:17 +00:00
parent 354213c7f4
commit 336a6491aa
8 changed files with 114 additions and 273 deletions

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1994-5, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/commands/explain.c,v 1.142 2005/11/29 01:25:49 tgl Exp $
* $PostgreSQL: pgsql/src/backend/commands/explain.c,v 1.143 2006/02/05 02:59:16 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -904,9 +904,15 @@ explain_outNode(StringInfo str,
appendStringInfo(str, " ");
appendStringInfo(str, " -> ");
/*
* Ordinarily we don't pass down our own outer_plan value to our
* child nodes, but in an Append we must, since we might be
* looking at an appendrel indexscan with outer references
* from the member scans.
*/
explain_outNode(str, subnode,
appendstate->appendplans[j],
NULL,
outer_plan,
indent + 3, es);
j++;
}

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/executor/nodeAppend.c,v 1.65 2005/10/15 02:49:17 momjian Exp $
* $PostgreSQL: pgsql/src/backend/executor/nodeAppend.c,v 1.66 2006/02/05 02:59:16 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -345,10 +345,12 @@ ExecReScanAppend(AppendState *node, ExprContext *exprCtxt)
UpdateChangedParamSet(subnode, node->ps.chgParam);
/*
* if chgParam of subnode is not null then plan will be re-scanned by
* first ExecProcNode.
* If chgParam of subnode is not null then plan will be re-scanned by
* first ExecProcNode. However, if caller is passing us an exprCtxt
* then forcibly rescan all the subnodes now, so that we can pass
* the exprCtxt down to the subnodes (needed for appendrel indexscan).
*/
if (subnode->chgParam == NULL)
if (subnode->chgParam == NULL || exprCtxt != NULL)
{
/* make sure estate is correct for this subnode (needed??) */
node->as_whichplan = i;

View File

@ -49,7 +49,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.152 2005/12/28 01:29:59 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.153 2006/02/05 02:59:16 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -991,6 +991,38 @@ cost_group(Path *path, PlannerInfo *root,
path->total_cost = total_cost;
}
/*
* If a nestloop's inner path is an indexscan, be sure to use its estimated
* output row count, which may be lower than the restriction-clause-only row
* count of its parent. (We don't include this case in the PATH_ROWS macro
* because it applies *only* to a nestloop's inner relation.) We have to
* be prepared to recurse through Append nodes in case of an appendrel.
*/
static double
nestloop_inner_path_rows(Path *path)
{
double result;
if (IsA(path, IndexPath))
result = ((IndexPath *) path)->rows;
else if (IsA(path, BitmapHeapPath))
result = ((BitmapHeapPath *) path)->rows;
else if (IsA(path, AppendPath))
{
ListCell *l;
result = 0;
foreach(l, ((AppendPath *) path)->subpaths)
{
result += nestloop_inner_path_rows((Path *) lfirst(l));
}
}
else
result = PATH_ROWS(path);
return result;
}
/*
* cost_nestloop
* Determines and returns the cost of joining two relations using the
@ -1008,21 +1040,10 @@ cost_nestloop(NestPath *path, PlannerInfo *root)
Cost cpu_per_tuple;
QualCost restrict_qual_cost;
double outer_path_rows = PATH_ROWS(outer_path);
double inner_path_rows = PATH_ROWS(inner_path);
double inner_path_rows = nestloop_inner_path_rows(inner_path);
double ntuples;
Selectivity joininfactor;
/*
* If inner path is an indexscan, be sure to use its estimated output row
* count, which may be lower than the restriction-clause-only row count of
* its parent. (We don't include this case in the PATH_ROWS macro because
* it applies *only* to a nestloop's inner relation.)
*/
if (IsA(inner_path, IndexPath))
inner_path_rows = ((IndexPath *) inner_path)->rows;
else if (IsA(inner_path, BitmapHeapPath))
inner_path_rows = ((BitmapHeapPath *) inner_path)->rows;
if (!enable_nestloop)
startup_cost += disable_cost;

View File

@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.200 2006/01/29 17:40:00 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.201 2006/02/05 02:59:16 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -129,7 +129,7 @@ static Const *string_to_const(const char *str, Oid datatype);
*
* 'rel' is the relation for which we want to generate index paths
*
* Note: check_partial_indexes() must have been run previously.
* Note: check_partial_indexes() must have been run previously for this rel.
*/
void
create_index_paths(PlannerInfo *root, RelOptInfo *rel)
@ -1290,6 +1290,9 @@ matches_any_index(RestrictInfo *rinfo, RelOptInfo *rel, Relids outer_relids)
* negligible startup cost. (True today, but someday we might have to think
* harder.) Therefore, there is only one dimension of comparison and so it's
* sufficient to return a single "best" path.
*
* Note: create_index_paths() must have been run previously for this rel,
* else the result will always be NULL.
*/
Path *
best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel,

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.101 2006/02/04 23:03:20 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.102 2006/02/05 02:59:16 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -35,9 +35,8 @@ static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel,
static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
RelOptInfo *outerrel, RelOptInfo *innerrel,
List *restrictlist, JoinType jointype);
static void join_before_append(PlannerInfo *root, RelOptInfo *joinrel,
RelOptInfo *outerrel, RelOptInfo *innerrel,
JoinType jointype);
static Path *best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel,
Relids outer_relids, JoinType jointype);
static List *select_mergejoin_clauses(RelOptInfo *joinrel,
RelOptInfo *outerrel,
RelOptInfo *innerrel,
@ -118,13 +117,6 @@ add_paths_to_joinrel(PlannerInfo *root,
if (enable_hashjoin)
hash_inner_and_outer(root, joinrel, outerrel, innerrel,
restrictlist, jointype);
/*
* 5. If the inner relation is an append relation, consider joining
* the outer rel to each append member and then appending the results.
*/
if (innerrel->cheapest_total_path->pathtype == T_Append)
join_before_append(root, joinrel, outerrel, innerrel, jointype);
}
/*
@ -405,8 +397,17 @@ match_unsorted_outer(PlannerInfo *root,
* Get the best innerjoin indexpath (if any) for this outer rel. It's
* the same for all outer paths.
*/
bestinnerjoin = best_inner_indexscan(root, innerrel,
outerrel->relids, jointype);
if (innerrel->reloptkind != RELOPT_JOINREL)
{
if (IsA(inner_cheapest_total, AppendPath))
bestinnerjoin = best_appendrel_indexscan(root, innerrel,
outerrel->relids,
jointype);
else if (innerrel->rtekind == RTE_RELATION)
bestinnerjoin = best_inner_indexscan(root, innerrel,
outerrel->relids,
jointype);
}
}
foreach(l, outerrel->pathlist)
@ -788,75 +789,27 @@ hash_inner_and_outer(PlannerInfo *root,
}
/*
* join_before_append
* Creates possible join paths for processing a single join relation
* 'joinrel' when the inner input is an append relation.
*
* The idea here is to swap the order of the APPEND and JOIN operators.
* This is only really helpful if it allows us to reduce the cost of
* scanning the members of the append relation, and so we only consider
* plans involving nestloops with inner indexscans. Also, since the APPEND
* will certainly yield an unsorted result, there's no point in considering
* any but the cheapest-total outer path.
*
* XXX this is a bit of a kluge, because the resulting plan has to evaluate
* the outer relation multiple times. Would be better to allow
* best_inner_indexscan to generate an AppendPath and not have this routine
* at all. But we can't do that without some executor changes (need a way
* to pass outer keys down through Append). FIXME later.
*
* 'joinrel' is the join relation
* 'outerrel' is the outer join relation
* 'innerrel' is the inner join relation
* 'jointype' is the type of join to do
* best_appendrel_indexscan
* Finds the best available set of inner indexscans for a nestloop join
* with the given append relation on the inside and the given outer_relids
* outside. Returns an AppendPath comprising the best inner scans, or
* NULL if there are no possible inner indexscans.
*/
static void
join_before_append(PlannerInfo *root,
RelOptInfo *joinrel,
RelOptInfo *outerrel,
RelOptInfo *innerrel,
JoinType jointype)
static Path *
best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel,
Relids outer_relids, JoinType jointype)
{
Path *outer_cheapest_total = outerrel->cheapest_total_path;
int parentRTindex = innerrel->relid;
int parentRTindex = rel->relid;
List *append_paths = NIL;
bool found_indexscan = false;
ListCell *l;
/*
* Swapping JOIN with APPEND only works for inner joins, not outer joins.
* However, we can also handle a unique-ified outer path.
*/
switch (jointype)
{
case JOIN_INNER:
break;
case JOIN_UNIQUE_OUTER:
outer_cheapest_total = (Path *)
create_unique_path(root, outerrel, outer_cheapest_total);
break;
case JOIN_LEFT:
case JOIN_RIGHT:
case JOIN_FULL:
case JOIN_IN:
case JOIN_UNIQUE_INNER:
return; /* can't join this way */
default:
elog(ERROR, "unrecognized join type: %d",
(int) jointype);
break;
}
/*
* Generate suitable access paths for each member relation.
*/
foreach(l, root->append_rel_list)
{
AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
int childRTindex;
RelOptInfo *childrel;
Path *bestinnerjoin;
RelOptInfo *this_joinrel;
List *this_restrictlist;
/* append_rel_list contains all append rels; ignore others */
if (appinfo->parent_relid != parentRTindex)
@ -876,42 +829,30 @@ join_before_append(PlannerInfo *root,
continue; /* OK, we can ignore it */
/*
* Get the best innerjoin indexpath (if any) for this outer rel.
* Get the best innerjoin indexpath (if any) for this child rel.
*/
bestinnerjoin = best_inner_indexscan(root, childrel,
outerrel->relids, JOIN_INNER);
outer_relids, jointype);
/*
* If no luck on an indexpath for this rel, we'll still consider
* an Append substituting the cheapest-total inner path. This
* is only likely to win if there's at least one member rel for
* which an indexscan path does exist.
* an Append substituting the cheapest-total inner path. However
* we must find at least one indexpath, else there's not going to
* be any improvement over the base path for the appendrel.
*/
if (!bestinnerjoin)
if (bestinnerjoin)
found_indexscan = true;
else
bestinnerjoin = childrel->cheapest_total_path;
/*
* We need a joinrel that describes this join accurately. Although
* the joinrel won't ever be used by the join path search algorithm
* in joinrels.c, it provides necessary context for the Path,
* such as properly-translated target and quals lists.
*/
this_joinrel = translate_join_rel(root, joinrel, appinfo,
outerrel, childrel, jointype,
&this_restrictlist);
/* Build Path for join and add to result list */
append_paths = lappend(append_paths,
create_nestloop_path(root,
this_joinrel,
JOIN_INNER,
outer_cheapest_total,
bestinnerjoin,
this_restrictlist,
NIL));
append_paths = lappend(append_paths, bestinnerjoin);
}
/* Form the completed Append path and add it to the join relation. */
add_path(joinrel, (Path *) create_append_path(joinrel, append_paths));
if (!found_indexscan)
return NULL;
/* Form and return the completed Append path. */
return (Path *) create_append_path(rel, append_paths);
}
/*

View File

@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/plan/setrefs.c,v 1.119 2005/11/26 22:14:57 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/plan/setrefs.c,v 1.120 2006/02/05 02:59:16 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -769,8 +769,9 @@ set_join_references(Join *join, List *rtable)
* Handle join references appearing in an inner indexscan's quals
*
* To handle bitmap-scan plan trees, we have to be able to recurse down
* to the bottom BitmapIndexScan nodes, so this is split out as a separate
* function.
* to the bottom BitmapIndexScan nodes; likewise, appendrel indexscans
* require recursing through Append nodes. This is split out as a separate
* function so that it can recurse.
*/
static void
set_inner_join_references(Plan *inner_plan,
@ -910,6 +911,22 @@ set_inner_join_references(Plan *inner_plan,
outer_itlist);
}
}
else if (IsA(inner_plan, Append))
{
/*
* The inner side is an append plan. Recurse to see if it contains
* indexscans that need to be fixed.
*/
Append *appendplan = (Append *) inner_plan;
ListCell *l;
foreach(l, appendplan->appendplans)
{
set_inner_join_references((Plan *) lfirst(l),
rtable,
outer_itlist);
}
}
else if (IsA(inner_plan, TidScan))
{
TidScan *innerscan = (TidScan *) inner_plan;

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/util/relnode.c,v 1.76 2006/02/03 21:08:49 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/util/relnode.c,v 1.77 2006/02/05 02:59:17 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -18,7 +18,6 @@
#include "optimizer/joininfo.h"
#include "optimizer/pathnode.h"
#include "optimizer/plancat.h"
#include "optimizer/prep.h"
#include "optimizer/restrictinfo.h"
#include "optimizer/tlist.h"
#include "parser/parsetree.h"
@ -571,144 +570,3 @@ subbuild_joinrel_joinlist(RelOptInfo *joinrel,
}
}
}
/*
* translate_join_rel
* Returns relation entry corresponding to the union of two given rels,
* creating a new relation entry if none already exists. This is used
* when one of the inputs is an append child relation. In addition to
* data about the input rels themselves, the corresponding joinrel for
* the append parent relation must be provided, plus the AppendRelInfo
* showing the parent-to-child translation.
*
* The reason for having this code, instead of just applying build_join_rel,
* is that we must have corresponding tlist orderings for all joinrels that
* are involved in an Append plan. So we generate the tlist for joinrels
* involving append child relations by translating the parent joinrel's tlist,
* rather than examining the input relations directly. (Another reason for
* doing it this way is that the base relation attr_needed info in relations
* being joined to the appendrel doesn't refer to the append child rel, but
* the append parent, and so couldn't be used directly anyway.) Otherwise
* this is exactly like build_join_rel.
*/
RelOptInfo *
translate_join_rel(PlannerInfo *root,
RelOptInfo *oldjoinrel,
AppendRelInfo *appinfo,
RelOptInfo *outer_rel,
RelOptInfo *inner_rel,
JoinType jointype,
List **restrictlist_ptr)
{
RelOptInfo *joinrel;
Relids joinrelids;
List *restrictlist;
/*
* Construct the Relids set for the translated joinrel, and see if
* we've already built it.
*/
joinrelids = bms_copy(oldjoinrel->relids);
joinrelids = bms_del_member(joinrelids, appinfo->parent_relid);
joinrelids = bms_add_member(joinrelids, appinfo->child_relid);
joinrel = find_join_rel(root, joinrelids);
if (joinrel)
{
/*
* Yes, so we only need to figure the restrictlist for this particular
* pair of component relations.
*/
bms_free(joinrelids);
if (restrictlist_ptr)
*restrictlist_ptr = build_joinrel_restrictlist(root,
joinrel,
outer_rel,
inner_rel,
jointype);
return joinrel;
}
/*
* Nope, so make one.
*/
joinrel = makeNode(RelOptInfo);
joinrel->reloptkind = RELOPT_JOINREL;
joinrel->relids = joinrelids;
joinrel->rows = 0;
joinrel->width = 0;
joinrel->reltargetlist = NIL;
joinrel->pathlist = NIL;
joinrel->cheapest_startup_path = NULL;
joinrel->cheapest_total_path = NULL;
joinrel->cheapest_unique_path = NULL;
joinrel->relid = 0; /* indicates not a baserel */
joinrel->rtekind = RTE_JOIN;
joinrel->min_attr = 0;
joinrel->max_attr = 0;
joinrel->attr_needed = NULL;
joinrel->attr_widths = NULL;
joinrel->indexlist = NIL;
joinrel->pages = 0;
joinrel->tuples = 0;
joinrel->subplan = NULL;
joinrel->baserestrictinfo = NIL;
joinrel->baserestrictcost.startup = 0;
joinrel->baserestrictcost.per_tuple = 0;
joinrel->joininfo = NIL;
joinrel->index_outer_relids = NULL;
joinrel->index_inner_paths = NIL;
/*
* Make the tlist by translating oldjoinrel's tlist, to ensure they
* are in compatible orders. Since we don't call build_joinrel_tlist,
* we need another way to set the rel width; for the moment, just
* assume it is the same as oldjoinrel. (The correct value may well be
* less, but it's not clear it's worth the trouble to get it right.)
*/
joinrel->reltargetlist = (List *)
adjust_appendrel_attrs((Node *) oldjoinrel->reltargetlist,
appinfo);
joinrel->width = oldjoinrel->width;
/*
* Construct restrict and join clause lists for the new joinrel. (The
* caller might or might not need the restrictlist, but I need it anyway
* for set_joinrel_size_estimates().)
*/
restrictlist = build_joinrel_restrictlist(root,
joinrel,
outer_rel,
inner_rel,
jointype);
if (restrictlist_ptr)
*restrictlist_ptr = restrictlist;
build_joinrel_joinlist(joinrel, outer_rel, inner_rel);
/*
* Set estimates of the joinrel's size.
*/
set_joinrel_size_estimates(root, joinrel, outer_rel, inner_rel,
jointype, restrictlist);
/*
* Add the joinrel to the query's joinrel list, and store it into the
* auxiliary hashtable if there is one. NB: GEQO requires us to append
* the new joinrel to the end of the list!
*/
root->join_rel_list = lappend(root->join_rel_list, joinrel);
if (root->join_rel_hash)
{
JoinHashEntry *hentry;
bool found;
hentry = (JoinHashEntry *) hash_search(root->join_rel_hash,
&(joinrel->relids),
HASH_ENTER,
&found);
Assert(!found);
hentry->join_rel = joinrel;
}
return joinrel;
}

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/optimizer/pathnode.h,v 1.65 2006/02/03 21:08:49 tgl Exp $
* $PostgreSQL: pgsql/src/include/optimizer/pathnode.h,v 1.66 2006/02/05 02:59:17 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -95,12 +95,5 @@ extern RelOptInfo *build_join_rel(PlannerInfo *root,
RelOptInfo *inner_rel,
JoinType jointype,
List **restrictlist_ptr);
extern RelOptInfo *translate_join_rel(PlannerInfo *root,
RelOptInfo *oldjoinrel,
AppendRelInfo *appinfo,
RelOptInfo *outer_rel,
RelOptInfo *inner_rel,
JoinType jointype,
List **restrictlist_ptr);
#endif /* PATHNODE_H */