267 lines
8.7 KiB
C
267 lines
8.7 KiB
C
/*------------------------------------------------------------------------
|
|
*
|
|
* geqo_eval.c
|
|
* Routines to evaluate query trees
|
|
*
|
|
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
* $PostgreSQL: pgsql/src/backend/optimizer/geqo/geqo_eval.c,v 1.85 2007/02/16 00:14:01 tgl Exp $
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
|
|
/* contributed by:
|
|
=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=
|
|
* Martin Utesch * Institute of Automatic Control *
|
|
= = University of Mining and Technology =
|
|
* utesch@aut.tu-freiberg.de * Freiberg, Germany *
|
|
=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=
|
|
*/
|
|
|
|
#include "postgres.h"
|
|
|
|
#include <float.h>
|
|
#include <limits.h>
|
|
#include <math.h>
|
|
|
|
#include "optimizer/geqo.h"
|
|
#include "optimizer/joininfo.h"
|
|
#include "optimizer/pathnode.h"
|
|
#include "optimizer/paths.h"
|
|
#include "utils/memutils.h"
|
|
|
|
|
|
static bool desirable_join(PlannerInfo *root,
|
|
RelOptInfo *outer_rel, RelOptInfo *inner_rel);
|
|
|
|
|
|
/*
|
|
* geqo_eval
|
|
*
|
|
* Returns cost of a query tree as an individual of the population.
|
|
*/
|
|
Cost
|
|
geqo_eval(Gene *tour, int num_gene, GeqoEvalData *evaldata)
|
|
{
|
|
MemoryContext mycontext;
|
|
MemoryContext oldcxt;
|
|
RelOptInfo *joinrel;
|
|
Cost fitness;
|
|
int savelength;
|
|
struct HTAB *savehash;
|
|
|
|
/*
|
|
* Because gimme_tree considers both left- and right-sided trees, there is
|
|
* no difference between a tour (a,b,c,d,...) and a tour (b,a,c,d,...) ---
|
|
* the same join orders will be considered. To avoid redundant cost
|
|
* calculations, we simply reject tours where tour[0] > tour[1], assigning
|
|
* them an artificially bad fitness.
|
|
*
|
|
* init_tour() is aware of this rule and so we should never reject a tour
|
|
* during the initial filling of the pool. It seems difficult to persuade
|
|
* the recombination logic never to break the rule, however.
|
|
*/
|
|
if (num_gene >= 2 && tour[0] > tour[1])
|
|
return DBL_MAX;
|
|
|
|
/*
|
|
* Create a private memory context that will hold all temp storage
|
|
* allocated inside gimme_tree().
|
|
*
|
|
* Since geqo_eval() will be called many times, we can't afford to let all
|
|
* that memory go unreclaimed until end of statement. Note we make the
|
|
* temp context a child of the planner's normal context, so that it will
|
|
* be freed even if we abort via ereport(ERROR).
|
|
*/
|
|
mycontext = AllocSetContextCreate(CurrentMemoryContext,
|
|
"GEQO",
|
|
ALLOCSET_DEFAULT_MINSIZE,
|
|
ALLOCSET_DEFAULT_INITSIZE,
|
|
ALLOCSET_DEFAULT_MAXSIZE);
|
|
oldcxt = MemoryContextSwitchTo(mycontext);
|
|
|
|
/*
|
|
* gimme_tree will add entries to root->join_rel_list, which may or may
|
|
* not already contain some entries. The newly added entries will be
|
|
* recycled by the MemoryContextDelete below, so we must ensure that the
|
|
* list is restored to its former state before exiting. We can do this by
|
|
* truncating the list to its original length. NOTE this assumes that any
|
|
* added entries are appended at the end!
|
|
*
|
|
* We also must take care not to mess up the outer join_rel_hash, if there
|
|
* is one. We can do this by just temporarily setting the link to NULL.
|
|
* (If we are dealing with enough join rels, which we very likely are, a
|
|
* new hash table will get built and used locally.)
|
|
*/
|
|
savelength = list_length(evaldata->root->join_rel_list);
|
|
savehash = evaldata->root->join_rel_hash;
|
|
|
|
evaldata->root->join_rel_hash = NULL;
|
|
|
|
/* construct the best path for the given combination of relations */
|
|
joinrel = gimme_tree(tour, num_gene, evaldata);
|
|
|
|
/*
|
|
* compute fitness
|
|
*
|
|
* XXX geqo does not currently support optimization for partial result
|
|
* retrieval --- how to fix?
|
|
*/
|
|
if (joinrel)
|
|
fitness = joinrel->cheapest_total_path->total_cost;
|
|
else
|
|
fitness = DBL_MAX;
|
|
|
|
/*
|
|
* Restore join_rel_list to its former state, and put back original
|
|
* hashtable if any.
|
|
*/
|
|
evaldata->root->join_rel_list = list_truncate(evaldata->root->join_rel_list,
|
|
savelength);
|
|
evaldata->root->join_rel_hash = savehash;
|
|
|
|
/* release all the memory acquired within gimme_tree */
|
|
MemoryContextSwitchTo(oldcxt);
|
|
MemoryContextDelete(mycontext);
|
|
|
|
return fitness;
|
|
}
|
|
|
|
/*
|
|
* gimme_tree
|
|
* Form planner estimates for a join tree constructed in the specified
|
|
* order.
|
|
*
|
|
* 'tour' is the proposed join order, of length 'num_gene'
|
|
* 'evaldata' contains the context we need
|
|
*
|
|
* Returns a new join relation whose cheapest path is the best plan for
|
|
* this join order. NB: will return NULL if join order is invalid.
|
|
*
|
|
* The original implementation of this routine always joined in the specified
|
|
* order, and so could only build left-sided plans (and right-sided and
|
|
* mixtures, as a byproduct of the fact that make_join_rel() is symmetric).
|
|
* It could never produce a "bushy" plan. This had a couple of big problems,
|
|
* of which the worst was that as of 7.4, there are situations involving IN
|
|
* subqueries where the only valid plans are bushy.
|
|
*
|
|
* The present implementation takes the given tour as a guideline, but
|
|
* postpones joins that seem unsuitable according to some heuristic rules.
|
|
* This allows correct bushy plans to be generated at need, and as a nice
|
|
* side-effect it seems to materially improve the quality of the generated
|
|
* plans.
|
|
*/
|
|
RelOptInfo *
|
|
gimme_tree(Gene *tour, int num_gene, GeqoEvalData *evaldata)
|
|
{
|
|
RelOptInfo **stack;
|
|
int stack_depth;
|
|
RelOptInfo *joinrel;
|
|
int rel_count;
|
|
|
|
/*
|
|
* Create a stack to hold not-yet-joined relations.
|
|
*/
|
|
stack = (RelOptInfo **) palloc(num_gene * sizeof(RelOptInfo *));
|
|
stack_depth = 0;
|
|
|
|
/*
|
|
* Push each relation onto the stack in the specified order. After
|
|
* pushing each relation, see whether the top two stack entries are
|
|
* joinable according to the desirable_join() heuristics. If so, join
|
|
* them into one stack entry, and try again to combine with the next stack
|
|
* entry down (if any). When the stack top is no longer joinable,
|
|
* continue to the next input relation. After we have pushed the last
|
|
* input relation, the heuristics are disabled and we force joining all
|
|
* the remaining stack entries.
|
|
*
|
|
* If desirable_join() always returns true, this produces a straight
|
|
* left-to-right join just like the old code. Otherwise we may produce a
|
|
* bushy plan or a left/right-sided plan that really corresponds to some
|
|
* tour other than the one given. To the extent that the heuristics are
|
|
* helpful, however, this will be a better plan than the raw tour.
|
|
*
|
|
* Also, when a join attempt fails (because of OJ or IN constraints), we
|
|
* may be able to recover and produce a workable plan, where the old code
|
|
* just had to give up. This case acts the same as a false result from
|
|
* desirable_join().
|
|
*/
|
|
for (rel_count = 0; rel_count < num_gene; rel_count++)
|
|
{
|
|
int cur_rel_index;
|
|
|
|
/* Get the next input relation and push it */
|
|
cur_rel_index = (int) tour[rel_count];
|
|
stack[stack_depth] = (RelOptInfo *) list_nth(evaldata->initial_rels,
|
|
cur_rel_index - 1);
|
|
stack_depth++;
|
|
|
|
/*
|
|
* While it's feasible, pop the top two stack entries and replace with
|
|
* their join.
|
|
*/
|
|
while (stack_depth >= 2)
|
|
{
|
|
RelOptInfo *outer_rel = stack[stack_depth - 2];
|
|
RelOptInfo *inner_rel = stack[stack_depth - 1];
|
|
|
|
/*
|
|
* Don't pop if heuristics say not to join now. However, once we
|
|
* have exhausted the input, the heuristics can't prevent popping.
|
|
*/
|
|
if (rel_count < num_gene - 1 &&
|
|
!desirable_join(evaldata->root, outer_rel, inner_rel))
|
|
break;
|
|
|
|
/*
|
|
* Construct a RelOptInfo representing the join of these two input
|
|
* relations. Note that we expect the joinrel not to exist in
|
|
* root->join_rel_list yet, and so the paths constructed for it
|
|
* will only include the ones we want.
|
|
*/
|
|
joinrel = make_join_rel(evaldata->root, outer_rel, inner_rel);
|
|
|
|
/* Can't pop stack here if join order is not valid */
|
|
if (!joinrel)
|
|
break;
|
|
|
|
/* Find and save the cheapest paths for this rel */
|
|
set_cheapest(joinrel);
|
|
|
|
/* Pop the stack and replace the inputs with their join */
|
|
stack_depth--;
|
|
stack[stack_depth - 1] = joinrel;
|
|
}
|
|
}
|
|
|
|
/* Did we succeed in forming a single join relation? */
|
|
if (stack_depth == 1)
|
|
joinrel = stack[0];
|
|
else
|
|
joinrel = NULL;
|
|
|
|
pfree(stack);
|
|
|
|
return joinrel;
|
|
}
|
|
|
|
/*
|
|
* Heuristics for gimme_tree: do we want to join these two relations?
|
|
*/
|
|
static bool
|
|
desirable_join(PlannerInfo *root,
|
|
RelOptInfo *outer_rel, RelOptInfo *inner_rel)
|
|
{
|
|
/*
|
|
* Join if there is an applicable join clause, or if there is a join
|
|
* order restriction forcing these rels to be joined.
|
|
*/
|
|
if (have_relevant_joinclause(root, outer_rel, inner_rel) ||
|
|
have_join_order_restriction(root, outer_rel, inner_rel))
|
|
return true;
|
|
|
|
/* Otherwise postpone the join till later. */
|
|
return false;
|
|
}
|