Eliminate a lot of list-management overhead within join_search_one_level

by adding a requirement that build_join_rel add new join RelOptInfos to the
appropriate list immediately at creation.  Per report from Robert Haas,
the list_concat_unique_ptr() calls that this change eliminates were taking
the lion's share of the runtime in larger join problems.  This doesn't do
anything to fix the fundamental combinatorial explosion in large join
problems, but it should push out the threshold of pain a bit further.

Note: because this changes the order in which joinrel lists are built,
it might result in changes in selected plans in cases where different
alternatives have exactly the same costs.  There is one example in the
regression tests.
This commit is contained in:
Tom Lane 2009-11-28 00:46:19 +00:00
parent fe83b975b2
commit 1a95f12702
9 changed files with 964 additions and 954 deletions

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.372 2009/11/15 02:45:34 tgl Exp $
* $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.373 2009/11/28 00:46:18 tgl Exp $
*
* NOTES
* Every node type that can appear in stored rules' parsetrees *must*
@ -1544,6 +1544,7 @@ _outPlannerInfo(StringInfo str, PlannerInfo *node)
WRITE_NODE_FIELD(glob);
WRITE_UINT_FIELD(query_level);
WRITE_NODE_FIELD(join_rel_list);
WRITE_INT_FIELD(join_cur_level);
WRITE_NODE_FIELD(resultRelations);
WRITE_NODE_FIELD(init_plans);
WRITE_NODE_FIELD(cte_plan_ids);

View File

@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/backend/optimizer/geqo/geqo_eval.c,v 1.90 2009/07/19 21:00:43 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/geqo/geqo_eval.c,v 1.91 2009/11/28 00:46:18 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -88,9 +88,12 @@ geqo_eval(PlannerInfo *root, Gene *tour, int num_gene)
* is one. We can do this by just temporarily setting the link to NULL.
* (If we are dealing with enough join rels, which we very likely are, a
* new hash table will get built and used locally.)
*
* join_rel_level[] shouldn't be in use, so just Assert it isn't.
*/
savelength = list_length(root->join_rel_list);
savehash = root->join_rel_hash;
Assert(root->join_rel_level == NULL);
root->join_rel_hash = NULL;

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.190 2009/11/22 14:54:31 heikki Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.191 2009/11/28 00:46:19 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -898,10 +898,15 @@ make_rel_from_joinlist(PlannerInfo *root, List *joinlist)
RelOptInfo *
standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels)
{
List **joinitems;
int lev;
RelOptInfo *rel;
/*
* This function cannot be invoked recursively within any one planning
* problem, so join_rel_level[] can't be in use already.
*/
Assert(root->join_rel_level == NULL);
/*
* We employ a simple "dynamic programming" algorithm: we first find all
* ways to build joins of two jointree items, then all ways to build joins
@ -909,30 +914,31 @@ standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels)
* joins, and so on until we have considered all ways to join all the
* items into one rel.
*
* joinitems[j] is a list of all the j-item rels. Initially we set
* joinitems[1] to represent all the single-jointree-item relations.
* root->join_rel_level[j] is a list of all the j-item rels. Initially we
* set root->join_rel_level[1] to represent all the single-jointree-item
* relations.
*/
joinitems = (List **) palloc0((levels_needed + 1) * sizeof(List *));
root->join_rel_level = (List **) palloc0((levels_needed + 1) * sizeof(List *));
joinitems[1] = initial_rels;
root->join_rel_level[1] = initial_rels;
for (lev = 2; lev <= levels_needed; lev++)
{
ListCell *x;
ListCell *lc;
/*
* Determine all possible pairs of relations to be joined at this
* level, and build paths for making each one from every available
* pair of lower-level relations.
*/
joinitems[lev] = join_search_one_level(root, lev, joinitems);
join_search_one_level(root, lev);
/*
* Do cleanup work on each just-processed rel.
*/
foreach(x, joinitems[lev])
foreach(lc, root->join_rel_level[lev])
{
rel = (RelOptInfo *) lfirst(x);
rel = (RelOptInfo *) lfirst(lc);
/* Find and save the cheapest paths for this rel */
set_cheapest(rel);
@ -946,11 +952,13 @@ standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels)
/*
* We should have a single rel at the final level.
*/
if (joinitems[levels_needed] == NIL)
if (root->join_rel_level[levels_needed] == NIL)
elog(ERROR, "failed to build any %d-way joins", levels_needed);
Assert(list_length(joinitems[levels_needed]) == 1);
Assert(list_length(root->join_rel_level[levels_needed]) == 1);
rel = (RelOptInfo *) linitial(joinitems[levels_needed]);
rel = (RelOptInfo *) linitial(root->join_rel_level[levels_needed]);
root->join_rel_level = NULL;
return rel;
}

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/joinrels.c,v 1.102 2009/07/23 17:42:06 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/path/joinrels.c,v 1.103 2009/11/28 00:46:19 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -19,10 +19,10 @@
#include "optimizer/paths.h"
static List *make_rels_by_clause_joins(PlannerInfo *root,
static void make_rels_by_clause_joins(PlannerInfo *root,
RelOptInfo *old_rel,
ListCell *other_rels);
static List *make_rels_by_clauseless_joins(PlannerInfo *root,
static void make_rels_by_clauseless_joins(PlannerInfo *root,
RelOptInfo *old_rel,
ListCell *other_rels);
static bool has_join_restriction(PlannerInfo *root, RelOptInfo *rel);
@ -40,17 +40,23 @@ static bool restriction_is_constant_false(List *restrictlist);
* combination of lower-level rels are created and returned in a list.
* Implementation paths are created for each such joinrel, too.
*
* level: level of rels we want to make this time.
* joinrels[j], 1 <= j < level, is a list of rels containing j items.
* level: level of rels we want to make this time
* root->join_rel_level[j], 1 <= j < level, is a list of rels containing j items
*
* The result is returned in root->join_rel_level[level].
*/
List *
join_search_one_level(PlannerInfo *root, int level, List **joinrels)
void
join_search_one_level(PlannerInfo *root, int level)
{
List *result_rels = NIL;
List *new_rels;
List **joinrels = root->join_rel_level;
ListCell *r;
int k;
Assert(joinrels[level] == NIL);
/* Set join_cur_level so that new joinrels are added to proper list */
root->join_cur_level = level;
/*
* First, consider left-sided and right-sided plans, in which rels of
* exactly level-1 member relations are joined against initial relations.
@ -88,9 +94,9 @@ join_search_one_level(PlannerInfo *root, int level, List **joinrels)
*
* See also the last-ditch case below.
*/
new_rels = make_rels_by_clause_joins(root,
old_rel,
other_rels);
make_rels_by_clause_joins(root,
old_rel,
other_rels);
}
else
{
@ -99,20 +105,10 @@ join_search_one_level(PlannerInfo *root, int level, List **joinrels)
* relation, either directly or by join-order restrictions.
* Cartesian product time.
*/
new_rels = make_rels_by_clauseless_joins(root,
old_rel,
other_rels);
make_rels_by_clauseless_joins(root,
old_rel,
other_rels);
}
/*
* At levels above 2 we will generate the same joined relation in
* multiple ways --- for example (a join b) join c is the same
* RelOptInfo as (b join c) join a, though the second case will add a
* different set of Paths to it. To avoid making extra work for
* subsequent passes, do not enter the same RelOptInfo into our output
* list multiple times.
*/
result_rels = list_concat_unique_ptr(result_rels, new_rels);
}
/*
@ -168,13 +164,7 @@ join_search_one_level(PlannerInfo *root, int level, List **joinrels)
if (have_relevant_joinclause(root, old_rel, new_rel) ||
have_join_order_restriction(root, old_rel, new_rel))
{
RelOptInfo *jrel;
jrel = make_join_rel(root, old_rel, new_rel);
/* Avoid making duplicate entries ... */
if (jrel)
result_rels = list_append_unique_ptr(result_rels,
jrel);
(void) make_join_rel(root, old_rel, new_rel);
}
}
}
@ -193,7 +183,7 @@ join_search_one_level(PlannerInfo *root, int level, List **joinrels)
* choice but to make cartesian joins. We consider only left-sided and
* right-sided cartesian joins in this case (no bushy).
*/
if (result_rels == NIL)
if (joinrels[level] == NIL)
{
/*
* This loop is just like the first one, except we always call
@ -211,11 +201,9 @@ join_search_one_level(PlannerInfo *root, int level, List **joinrels)
other_rels = list_head(joinrels[1]); /* consider all initial
* rels */
new_rels = make_rels_by_clauseless_joins(root,
old_rel,
other_rels);
result_rels = list_concat_unique_ptr(result_rels, new_rels);
make_rels_by_clauseless_joins(root,
old_rel,
other_rels);
}
/*----------
@ -235,11 +223,9 @@ join_search_one_level(PlannerInfo *root, int level, List **joinrels)
* never fail, and so the following sanity check is useful.
*----------
*/
if (result_rels == NIL && root->join_info_list == NIL)
if (joinrels[level] == NIL && root->join_info_list == NIL)
elog(ERROR, "failed to build any %d-way joins", level);
}
return result_rels;
}
/*
@ -247,7 +233,13 @@ join_search_one_level(PlannerInfo *root, int level, List **joinrels)
* Build joins between the given relation 'old_rel' and other relations
* that participate in join clauses that 'old_rel' also participates in
* (or participate in join-order restrictions with it).
* The join rel nodes are returned in a list.
* The join rels are returned in root->join_rel_level[join_cur_level].
*
* Note: at levels above 2 we will generate the same joined relation in
* multiple ways --- for example (a join b) join c is the same RelOptInfo as
* (b join c) join a, though the second case will add a different set of Paths
* to it. This is the reason for using the join_rel_level mechanism, which
* automatically ensures that each new joinrel is only added to the list once.
*
* 'old_rel' is the relation entry for the relation to be joined
* 'other_rels': the first cell in a linked list containing the other
@ -256,12 +248,11 @@ join_search_one_level(PlannerInfo *root, int level, List **joinrels)
* Currently, this is only used with initial rels in other_rels, but it
* will work for joining to joinrels too.
*/
static List *
static void
make_rels_by_clause_joins(PlannerInfo *root,
RelOptInfo *old_rel,
ListCell *other_rels)
{
List *result = NIL;
ListCell *l;
for_each_cell(l, other_rels)
@ -272,15 +263,9 @@ make_rels_by_clause_joins(PlannerInfo *root,
(have_relevant_joinclause(root, old_rel, other_rel) ||
have_join_order_restriction(root, old_rel, other_rel)))
{
RelOptInfo *jrel;
jrel = make_join_rel(root, old_rel, other_rel);
if (jrel)
result = lcons(jrel, result);
(void) make_join_rel(root, old_rel, other_rel);
}
}
return result;
}
/*
@ -288,7 +273,7 @@ make_rels_by_clause_joins(PlannerInfo *root,
* Given a relation 'old_rel' and a list of other relations
* 'other_rels', create a join relation between 'old_rel' and each
* member of 'other_rels' that isn't already included in 'old_rel'.
* The join rel nodes are returned in a list.
* The join rels are returned in root->join_rel_level[join_cur_level].
*
* 'old_rel' is the relation entry for the relation to be joined
* 'other_rels': the first cell of a linked list containing the
@ -297,34 +282,22 @@ make_rels_by_clause_joins(PlannerInfo *root,
* Currently, this is only used with initial rels in other_rels, but it would
* work for joining to joinrels too.
*/
static List *
static void
make_rels_by_clauseless_joins(PlannerInfo *root,
RelOptInfo *old_rel,
ListCell *other_rels)
{
List *result = NIL;
ListCell *i;
ListCell *l;
for_each_cell(i, other_rels)
for_each_cell(l, other_rels)
{
RelOptInfo *other_rel = (RelOptInfo *) lfirst(i);
RelOptInfo *other_rel = (RelOptInfo *) lfirst(l);
if (!bms_overlap(other_rel->relids, old_rel->relids))
{
RelOptInfo *jrel;
jrel = make_join_rel(root, old_rel, other_rel);
/*
* As long as given other_rels are distinct, don't need to test to
* see if jrel is already part of output list.
*/
if (jrel)
result = lcons(jrel, result);
(void) make_join_rel(root, old_rel, other_rel);
}
}
return result;
}

View File

@ -14,7 +14,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.115 2009/06/11 14:48:59 momjian Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.116 2009/11/28 00:46:19 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -142,6 +142,8 @@ query_planner(PlannerInfo *root, List *tlist,
palloc0(root->simple_rel_array_size * sizeof(RelOptInfo *));
root->join_rel_list = NIL;
root->join_rel_hash = NULL;
root->join_rel_level = NULL;
root->join_cur_level = 0;
root->canon_pathkeys = NIL;
root->left_join_clauses = NIL;
root->right_join_clauses = NIL;

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/util/relnode.c,v 1.95 2009/10/12 18:10:48 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/util/relnode.c,v 1.96 2009/11/28 00:46:19 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -402,6 +402,20 @@ build_join_rel(PlannerInfo *root,
hentry->join_rel = joinrel;
}
/*
* Also, if dynamic-programming join search is active, add the new joinrel
* to the appropriate sublist. Note: you might think the Assert on
* number of members should be for equality, but some of the level 1
* rels might have been joinrels already, so we can only assert <=.
*/
if (root->join_rel_level)
{
Assert(root->join_cur_level > 0);
Assert(root->join_cur_level <= bms_num_members(joinrel->relids));
root->join_rel_level[root->join_cur_level] =
lappend(root->join_rel_level[root->join_cur_level], joinrel);
}
return joinrel;
}

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.179 2009/11/15 02:45:35 tgl Exp $
* $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.180 2009/11/28 00:46:19 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -142,6 +142,16 @@ typedef struct PlannerInfo
List *join_rel_list; /* list of join-relation RelOptInfos */
struct HTAB *join_rel_hash; /* optional hashtable for join relations */
/*
* When doing a dynamic-programming-style join search, join_rel_level[k]
* is a list of all join-relation RelOptInfos of level k, and
* join_cur_level is the current level. New join-relation RelOptInfos
* are automatically added to the join_rel_level[join_cur_level] list.
* join_rel_level is NULL if not in use.
*/
List **join_rel_level; /* lists of join-relation RelOptInfos */
int join_cur_level; /* index of list being extended */
List *resultRelations; /* integer list of RT indexes, or NIL */
List *init_plans; /* init SubPlans for query */

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/optimizer/paths.h,v 1.108 2009/09/17 20:49:29 tgl Exp $
* $PostgreSQL: pgsql/src/include/optimizer/paths.h,v 1.109 2009/11/28 00:46:19 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -99,8 +99,7 @@ extern void add_paths_to_joinrel(PlannerInfo *root, RelOptInfo *joinrel,
* joinrels.c
* routines to determine which relations to join
*/
extern List *join_search_one_level(PlannerInfo *root, int level,
List **joinrels);
extern void join_search_one_level(PlannerInfo *root, int level);
extern RelOptInfo *make_join_rel(PlannerInfo *root,
RelOptInfo *rel1, RelOptInfo *rel2);
extern bool have_join_order_restriction(PlannerInfo *root,

File diff suppressed because it is too large Load Diff