postgresql/src/backend/optimizer/geqo/geqo_eval.c

267 lines
8.7 KiB
C

/*------------------------------------------------------------------------
*
* geqo_eval.c
* Routines to evaluate query trees
*
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/backend/optimizer/geqo/geqo_eval.c,v 1.85 2007/02/16 00:14:01 tgl Exp $
*
*-------------------------------------------------------------------------
*/
/* contributed by:
=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=
* Martin Utesch * Institute of Automatic Control *
= = University of Mining and Technology =
* utesch@aut.tu-freiberg.de * Freiberg, Germany *
=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=
*/
#include "postgres.h"
#include <float.h>
#include <limits.h>
#include <math.h>
#include "optimizer/geqo.h"
#include "optimizer/joininfo.h"
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
#include "utils/memutils.h"
static bool desirable_join(PlannerInfo *root,
RelOptInfo *outer_rel, RelOptInfo *inner_rel);
/*
* geqo_eval
*
* Returns cost of a query tree as an individual of the population.
*/
Cost
geqo_eval(Gene *tour, int num_gene, GeqoEvalData *evaldata)
{
MemoryContext mycontext;
MemoryContext oldcxt;
RelOptInfo *joinrel;
Cost fitness;
int savelength;
struct HTAB *savehash;
/*
* Because gimme_tree considers both left- and right-sided trees, there is
* no difference between a tour (a,b,c,d,...) and a tour (b,a,c,d,...) ---
* the same join orders will be considered. To avoid redundant cost
* calculations, we simply reject tours where tour[0] > tour[1], assigning
* them an artificially bad fitness.
*
* init_tour() is aware of this rule and so we should never reject a tour
* during the initial filling of the pool. It seems difficult to persuade
* the recombination logic never to break the rule, however.
*/
if (num_gene >= 2 && tour[0] > tour[1])
return DBL_MAX;
/*
* Create a private memory context that will hold all temp storage
* allocated inside gimme_tree().
*
* Since geqo_eval() will be called many times, we can't afford to let all
* that memory go unreclaimed until end of statement. Note we make the
* temp context a child of the planner's normal context, so that it will
* be freed even if we abort via ereport(ERROR).
*/
mycontext = AllocSetContextCreate(CurrentMemoryContext,
"GEQO",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
oldcxt = MemoryContextSwitchTo(mycontext);
/*
* gimme_tree will add entries to root->join_rel_list, which may or may
* not already contain some entries. The newly added entries will be
* recycled by the MemoryContextDelete below, so we must ensure that the
* list is restored to its former state before exiting. We can do this by
* truncating the list to its original length. NOTE this assumes that any
* added entries are appended at the end!
*
* We also must take care not to mess up the outer join_rel_hash, if there
* is one. We can do this by just temporarily setting the link to NULL.
* (If we are dealing with enough join rels, which we very likely are, a
* new hash table will get built and used locally.)
*/
savelength = list_length(evaldata->root->join_rel_list);
savehash = evaldata->root->join_rel_hash;
evaldata->root->join_rel_hash = NULL;
/* construct the best path for the given combination of relations */
joinrel = gimme_tree(tour, num_gene, evaldata);
/*
* compute fitness
*
* XXX geqo does not currently support optimization for partial result
* retrieval --- how to fix?
*/
if (joinrel)
fitness = joinrel->cheapest_total_path->total_cost;
else
fitness = DBL_MAX;
/*
* Restore join_rel_list to its former state, and put back original
* hashtable if any.
*/
evaldata->root->join_rel_list = list_truncate(evaldata->root->join_rel_list,
savelength);
evaldata->root->join_rel_hash = savehash;
/* release all the memory acquired within gimme_tree */
MemoryContextSwitchTo(oldcxt);
MemoryContextDelete(mycontext);
return fitness;
}
/*
* gimme_tree
* Form planner estimates for a join tree constructed in the specified
* order.
*
* 'tour' is the proposed join order, of length 'num_gene'
* 'evaldata' contains the context we need
*
* Returns a new join relation whose cheapest path is the best plan for
* this join order. NB: will return NULL if join order is invalid.
*
* The original implementation of this routine always joined in the specified
* order, and so could only build left-sided plans (and right-sided and
* mixtures, as a byproduct of the fact that make_join_rel() is symmetric).
* It could never produce a "bushy" plan. This had a couple of big problems,
* of which the worst was that as of 7.4, there are situations involving IN
* subqueries where the only valid plans are bushy.
*
* The present implementation takes the given tour as a guideline, but
* postpones joins that seem unsuitable according to some heuristic rules.
* This allows correct bushy plans to be generated at need, and as a nice
* side-effect it seems to materially improve the quality of the generated
* plans.
*/
RelOptInfo *
gimme_tree(Gene *tour, int num_gene, GeqoEvalData *evaldata)
{
RelOptInfo **stack;
int stack_depth;
RelOptInfo *joinrel;
int rel_count;
/*
* Create a stack to hold not-yet-joined relations.
*/
stack = (RelOptInfo **) palloc(num_gene * sizeof(RelOptInfo *));
stack_depth = 0;
/*
* Push each relation onto the stack in the specified order. After
* pushing each relation, see whether the top two stack entries are
* joinable according to the desirable_join() heuristics. If so, join
* them into one stack entry, and try again to combine with the next stack
* entry down (if any). When the stack top is no longer joinable,
* continue to the next input relation. After we have pushed the last
* input relation, the heuristics are disabled and we force joining all
* the remaining stack entries.
*
* If desirable_join() always returns true, this produces a straight
* left-to-right join just like the old code. Otherwise we may produce a
* bushy plan or a left/right-sided plan that really corresponds to some
* tour other than the one given. To the extent that the heuristics are
* helpful, however, this will be a better plan than the raw tour.
*
* Also, when a join attempt fails (because of OJ or IN constraints), we
* may be able to recover and produce a workable plan, where the old code
* just had to give up. This case acts the same as a false result from
* desirable_join().
*/
for (rel_count = 0; rel_count < num_gene; rel_count++)
{
int cur_rel_index;
/* Get the next input relation and push it */
cur_rel_index = (int) tour[rel_count];
stack[stack_depth] = (RelOptInfo *) list_nth(evaldata->initial_rels,
cur_rel_index - 1);
stack_depth++;
/*
* While it's feasible, pop the top two stack entries and replace with
* their join.
*/
while (stack_depth >= 2)
{
RelOptInfo *outer_rel = stack[stack_depth - 2];
RelOptInfo *inner_rel = stack[stack_depth - 1];
/*
* Don't pop if heuristics say not to join now. However, once we
* have exhausted the input, the heuristics can't prevent popping.
*/
if (rel_count < num_gene - 1 &&
!desirable_join(evaldata->root, outer_rel, inner_rel))
break;
/*
* Construct a RelOptInfo representing the join of these two input
* relations. Note that we expect the joinrel not to exist in
* root->join_rel_list yet, and so the paths constructed for it
* will only include the ones we want.
*/
joinrel = make_join_rel(evaldata->root, outer_rel, inner_rel);
/* Can't pop stack here if join order is not valid */
if (!joinrel)
break;
/* Find and save the cheapest paths for this rel */
set_cheapest(joinrel);
/* Pop the stack and replace the inputs with their join */
stack_depth--;
stack[stack_depth - 1] = joinrel;
}
}
/* Did we succeed in forming a single join relation? */
if (stack_depth == 1)
joinrel = stack[0];
else
joinrel = NULL;
pfree(stack);
return joinrel;
}
/*
* Heuristics for gimme_tree: do we want to join these two relations?
*/
static bool
desirable_join(PlannerInfo *root,
RelOptInfo *outer_rel, RelOptInfo *inner_rel)
{
/*
* Join if there is an applicable join clause, or if there is a join
* order restriction forcing these rels to be joined.
*/
if (have_relevant_joinclause(root, outer_rel, inner_rel) ||
have_join_order_restriction(root, outer_rel, inner_rel))
return true;
/* Otherwise postpone the join till later. */
return false;
}