mirror of
https://git.postgresql.org/git/postgresql.git
synced 2024-09-28 21:01:48 +02:00
Revise generation of hashjoin paths: generate one path per
hashjoinable clause, not one path for a randomly-chosen element of each set of clauses with the same join operator. That is, if you wrote SELECT ... WHERE t1.f1 = t2.f2 and t1.f3 = t2.f4, and both '=' ops were the same opcode (say, all four fields are int4), then the system would either consider hashing on f1=f2 or on f3=f4, but it would *not* consider both possibilities. Boo hiss. Also, revise estimation of hashjoin costs to include a penalty when the inner join var has a high disbursion --- ie, the most common value is pretty common. This tends to lead to badly skewed hash bucket occupancy and way more comparisons than you'd expect on average. I imagine that the cost calculation still needs tweaking, but at least it generates a more reasonable plan than before on George Young's example.
This commit is contained in:
parent
b7883d7e3a
commit
e1fad50a5d
@ -3,11 +3,22 @@
|
|||||||
* costsize.c
|
* costsize.c
|
||||||
* Routines to compute (and set) relation sizes and path costs
|
* Routines to compute (and set) relation sizes and path costs
|
||||||
*
|
*
|
||||||
|
* Path costs are measured in units of disk accesses: one page fetch
|
||||||
|
* has cost 1. The other primitive unit is the CPU time required to
|
||||||
|
* process one tuple, which we set at "_cpu_page_weight_" of a page
|
||||||
|
* fetch. Obviously, the CPU time per tuple depends on the query
|
||||||
|
* involved, but the relative CPU and disk speeds of a given platform
|
||||||
|
* are so variable that we are lucky if we can get useful numbers
|
||||||
|
* at all. _cpu_page_weight_ is user-settable, in case a particular
|
||||||
|
* user is clueful enough to have a better-than-default estimate
|
||||||
|
* of the ratio for his platform. There is also _cpu_index_page_weight_,
|
||||||
|
* the cost to process a tuple of an index during an index scan.
|
||||||
|
*
|
||||||
|
*
|
||||||
* Copyright (c) 1994, Regents of the University of California
|
* Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
*
|
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.43 1999/07/16 04:59:14 momjian Exp $
|
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.44 1999/08/06 04:00:15 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -15,6 +26,7 @@
|
|||||||
#include <math.h>
|
#include <math.h>
|
||||||
|
|
||||||
#include "postgres.h"
|
#include "postgres.h"
|
||||||
|
|
||||||
#ifdef HAVE_LIMITS_H
|
#ifdef HAVE_LIMITS_H
|
||||||
#include <limits.h>
|
#include <limits.h>
|
||||||
#ifndef MAXINT
|
#ifndef MAXINT
|
||||||
@ -26,25 +38,24 @@
|
|||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include "miscadmin.h"
|
||||||
#include "optimizer/cost.h"
|
#include "optimizer/cost.h"
|
||||||
#include "optimizer/internal.h"
|
#include "optimizer/internal.h"
|
||||||
#include "optimizer/tlist.h"
|
#include "optimizer/tlist.h"
|
||||||
#include "utils/lsyscache.h"
|
#include "utils/lsyscache.h"
|
||||||
|
|
||||||
extern int NBuffers;
|
|
||||||
|
|
||||||
|
static int compute_targetlist_width(List *targetlist);
|
||||||
static int compute_attribute_width(TargetEntry *tlistentry);
|
static int compute_attribute_width(TargetEntry *tlistentry);
|
||||||
static double relation_byte_size(int tuples, int width);
|
static double relation_byte_size(int tuples, int width);
|
||||||
static double base_log(double x, double b);
|
static double base_log(double x, double b);
|
||||||
static int compute_targetlist_width(List *targetlist);
|
|
||||||
|
|
||||||
int _disable_cost_ = 30000000;
|
int _disable_cost_ = 30000000;
|
||||||
|
|
||||||
bool _enable_seqscan_ = true;
|
bool _enable_seqscan_ = true;
|
||||||
bool _enable_indexscan_ = true;
|
bool _enable_indexscan_ = true;
|
||||||
bool _enable_sort_ = true;
|
bool _enable_sort_ = true;
|
||||||
bool _enable_hash_ = true;
|
|
||||||
bool _enable_nestloop_ = true;
|
bool _enable_nestloop_ = true;
|
||||||
bool _enable_mergejoin_ = true;
|
bool _enable_mergejoin_ = true;
|
||||||
bool _enable_hashjoin_ = true;
|
bool _enable_hashjoin_ = true;
|
||||||
@ -316,61 +327,68 @@ cost_mergejoin(Cost outercost,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* cost_hashjoin-- XXX HASH
|
* cost_hashjoin
|
||||||
|
*
|
||||||
* 'outercost' and 'innercost' are the (disk+cpu) costs of scanning the
|
* 'outercost' and 'innercost' are the (disk+cpu) costs of scanning the
|
||||||
* outer and inner relations
|
* outer and inner relations
|
||||||
* 'outerkeys' and 'innerkeys' are lists of the keys to be used
|
|
||||||
* to hash the outer and inner relations
|
|
||||||
* 'outersize' and 'innersize' are the number of tuples in the outer
|
* 'outersize' and 'innersize' are the number of tuples in the outer
|
||||||
* and inner relations
|
* and inner relations
|
||||||
* 'outerwidth' and 'innerwidth' are the (typical) widths (in bytes)
|
* 'outerwidth' and 'innerwidth' are the (typical) widths (in bytes)
|
||||||
* of the tuples of the outer and inner relations
|
* of the tuples of the outer and inner relations
|
||||||
|
* 'innerdisbursion' is an estimate of the disbursion statistic
|
||||||
|
* for the inner hash key.
|
||||||
*
|
*
|
||||||
* Returns a flonum.
|
* Returns a flonum.
|
||||||
*/
|
*/
|
||||||
Cost
|
Cost
|
||||||
cost_hashjoin(Cost outercost,
|
cost_hashjoin(Cost outercost,
|
||||||
Cost innercost,
|
Cost innercost,
|
||||||
List *outerkeys,
|
|
||||||
List *innerkeys,
|
|
||||||
int outersize,
|
int outersize,
|
||||||
int innersize,
|
int innersize,
|
||||||
int outerwidth,
|
int outerwidth,
|
||||||
int innerwidth)
|
int innerwidth,
|
||||||
|
Cost innerdisbursion)
|
||||||
{
|
{
|
||||||
Cost temp = 0;
|
Cost temp = 0;
|
||||||
int outerpages = page_size(outersize, outerwidth);
|
double outerbytes = relation_byte_size(outersize, outerwidth);
|
||||||
int innerpages = page_size(innersize, innerwidth);
|
double innerbytes = relation_byte_size(innersize, innerwidth);
|
||||||
|
long hashtablebytes = SortMem * 1024L;
|
||||||
|
|
||||||
if (!_enable_hashjoin_)
|
if (!_enable_hashjoin_)
|
||||||
temp += _disable_cost_;
|
temp += _disable_cost_;
|
||||||
|
|
||||||
/*
|
|
||||||
* Bias against putting larger relation on inside.
|
|
||||||
*
|
|
||||||
* Code used to use "outerpages < innerpages" but that has poor
|
|
||||||
* resolution when both relations are small.
|
|
||||||
*/
|
|
||||||
if (relation_byte_size(outersize, outerwidth) <
|
|
||||||
relation_byte_size(innersize, innerwidth))
|
|
||||||
temp += _disable_cost_;
|
|
||||||
|
|
||||||
/* cost of source data */
|
/* cost of source data */
|
||||||
temp += outercost + innercost;
|
temp += outercost + innercost;
|
||||||
|
|
||||||
/* cost of computing hash function: must do it once per tuple */
|
/* cost of computing hash function: must do it once per tuple */
|
||||||
temp += _cpu_page_weight_ * (outersize + innersize);
|
temp += _cpu_page_weight_ * (outersize + innersize);
|
||||||
|
|
||||||
/* cost of main-memory hashtable */
|
/* the number of tuple comparisons needed is the number of outer
|
||||||
temp += (innerpages < NBuffers) ? innerpages : NBuffers;
|
* tuples times the typical hash bucket size, which we estimate
|
||||||
|
* conservatively as the inner disbursion times the inner tuple
|
||||||
|
* count. The cost per comparison is set at _cpu_index_page_weight_;
|
||||||
|
* is that reasonable, or do we need another basic parameter?
|
||||||
|
*/
|
||||||
|
temp += _cpu_index_page_weight_ * outersize *
|
||||||
|
(innersize * innerdisbursion);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* if inner relation is too big then we will need to "batch" the join,
|
* if inner relation is too big then we will need to "batch" the join,
|
||||||
* which implies writing and reading most of the tuples to disk an
|
* which implies writing and reading most of the tuples to disk an
|
||||||
* extra time.
|
* extra time. Charge one cost unit per page of I/O.
|
||||||
*/
|
*/
|
||||||
if (innerpages > NBuffers)
|
if (innerbytes > hashtablebytes)
|
||||||
temp += 2 * (outerpages + innerpages);
|
temp += 2 * (page_size(outersize, outerwidth) +
|
||||||
|
page_size(innersize, innerwidth));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Bias against putting larger relation on inside. We don't want
|
||||||
|
* an absolute prohibition, though, since larger relation might have
|
||||||
|
* better disbursion --- and we can't trust the size estimates
|
||||||
|
* unreservedly, anyway.
|
||||||
|
*/
|
||||||
|
if (innerbytes > outerbytes)
|
||||||
|
temp *= 1.1; /* is this an OK fudge factor? */
|
||||||
|
|
||||||
Assert(temp >= 0);
|
Assert(temp >= 0);
|
||||||
|
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.42 1999/07/27 06:23:12 tgl Exp $
|
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.43 1999/08/06 04:00:15 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -16,11 +16,14 @@
|
|||||||
|
|
||||||
#include "postgres.h"
|
#include "postgres.h"
|
||||||
|
|
||||||
|
#include "access/htup.h"
|
||||||
|
#include "catalog/pg_attribute.h"
|
||||||
|
#include "optimizer/clauses.h"
|
||||||
#include "optimizer/cost.h"
|
#include "optimizer/cost.h"
|
||||||
#include "optimizer/pathnode.h"
|
#include "optimizer/pathnode.h"
|
||||||
#include "optimizer/paths.h"
|
#include "optimizer/paths.h"
|
||||||
|
#include "parser/parsetree.h"
|
||||||
|
#include "utils/syscache.h"
|
||||||
|
|
||||||
static Path *best_innerjoin(List *join_paths, List *outer_relid);
|
static Path *best_innerjoin(List *join_paths, List *outer_relid);
|
||||||
static List *sort_inner_and_outer(RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel,
|
static List *sort_inner_and_outer(RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel,
|
||||||
@ -30,8 +33,9 @@ static List *match_unsorted_outer(RelOptInfo *joinrel, RelOptInfo *outerrel, Rel
|
|||||||
List *mergeinfo_list);
|
List *mergeinfo_list);
|
||||||
static List *match_unsorted_inner(RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel,
|
static List *match_unsorted_inner(RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel,
|
||||||
List *innerpath_list, List *mergeinfo_list);
|
List *innerpath_list, List *mergeinfo_list);
|
||||||
static List *hash_inner_and_outer(RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel,
|
static List *hash_inner_and_outer(Query *root, RelOptInfo *joinrel,
|
||||||
List *hashinfo_list);
|
RelOptInfo *outerrel, RelOptInfo *innerrel);
|
||||||
|
static Cost estimate_disbursion(Query *root, Var *var);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* update_rels_pathlist_for_joins
|
* update_rels_pathlist_for_joins
|
||||||
@ -46,7 +50,7 @@ static List *hash_inner_and_outer(RelOptInfo *joinrel, RelOptInfo *outerrel, Rel
|
|||||||
*
|
*
|
||||||
* 'joinrels' is the list of relation entries to be joined
|
* 'joinrels' is the list of relation entries to be joined
|
||||||
*
|
*
|
||||||
* Modifies the pathlist field of the appropriate rel node to contain
|
* Modifies the pathlist field of each joinrel node to contain
|
||||||
* the unique join paths.
|
* the unique join paths.
|
||||||
* If bushy trees are considered, may modify the relid field of the
|
* If bushy trees are considered, may modify the relid field of the
|
||||||
* join rel nodes to flatten the lists.
|
* join rel nodes to flatten the lists.
|
||||||
@ -56,8 +60,6 @@ static List *hash_inner_and_outer(RelOptInfo *joinrel, RelOptInfo *outerrel, Rel
|
|||||||
void
|
void
|
||||||
update_rels_pathlist_for_joins(Query *root, List *joinrels)
|
update_rels_pathlist_for_joins(Query *root, List *joinrels)
|
||||||
{
|
{
|
||||||
List *mergeinfo_list = NIL;
|
|
||||||
List *hashinfo_list = NIL;
|
|
||||||
List *j;
|
List *j;
|
||||||
|
|
||||||
foreach(j, joinrels)
|
foreach(j, joinrels)
|
||||||
@ -69,13 +71,23 @@ update_rels_pathlist_for_joins(Query *root, List *joinrels)
|
|||||||
RelOptInfo *outerrel;
|
RelOptInfo *outerrel;
|
||||||
Path *bestinnerjoin;
|
Path *bestinnerjoin;
|
||||||
List *pathlist = NIL;
|
List *pathlist = NIL;
|
||||||
|
List *mergeinfo_list = NIL;
|
||||||
|
|
||||||
/* flatten out relids later in this function */
|
/*
|
||||||
|
* On entry, joinrel->relids is a list of two sublists of relids,
|
||||||
|
* namely the outer and inner member relids. Extract these sublists
|
||||||
|
* and change joinrel->relids to a flattened single list.
|
||||||
|
* (Use listCopy so as not to damage the member lists...)
|
||||||
|
*/
|
||||||
outerrelids = lfirst(joinrel->relids);
|
outerrelids = lfirst(joinrel->relids);
|
||||||
innerrelids = lsecond(joinrel->relids);
|
innerrelids = lsecond(joinrel->relids);
|
||||||
|
|
||||||
|
joinrel->relids = nconc(listCopy(outerrelids),
|
||||||
|
listCopy(innerrelids));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* base relation id is an integer and join relation relid is a
|
* Get the corresponding RelOptInfos for the outer and inner sides.
|
||||||
|
* Base relation id is an integer and join relation relid is a
|
||||||
* list of integers.
|
* list of integers.
|
||||||
*/
|
*/
|
||||||
innerrel = (length(innerrelids) == 1) ?
|
innerrel = (length(innerrelids) == 1) ?
|
||||||
@ -85,20 +97,15 @@ update_rels_pathlist_for_joins(Query *root, List *joinrels)
|
|||||||
get_base_rel(root, lfirsti(outerrelids)) :
|
get_base_rel(root, lfirsti(outerrelids)) :
|
||||||
get_join_rel(root, outerrelids);
|
get_join_rel(root, outerrelids);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Get the best inner join for match_unsorted_outer.
|
||||||
|
*/
|
||||||
bestinnerjoin = best_innerjoin(innerrel->innerjoin, outerrel->relids);
|
bestinnerjoin = best_innerjoin(innerrel->innerjoin, outerrel->relids);
|
||||||
|
|
||||||
if (_enable_mergejoin_)
|
if (_enable_mergejoin_)
|
||||||
mergeinfo_list = group_clauses_by_order(joinrel->restrictinfo,
|
mergeinfo_list = group_clauses_by_order(joinrel->restrictinfo,
|
||||||
innerrel->relids);
|
innerrel->relids);
|
||||||
|
|
||||||
if (_enable_hashjoin_)
|
|
||||||
hashinfo_list = group_clauses_by_hashop(joinrel->restrictinfo,
|
|
||||||
innerrel->relids);
|
|
||||||
|
|
||||||
/* need to flatten the relids list */
|
|
||||||
joinrel->relids = nconc(listCopy(outerrelids),
|
|
||||||
listCopy(innerrelids));
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* 1. Consider mergejoin paths where both relations must be
|
* 1. Consider mergejoin paths where both relations must be
|
||||||
* explicitly sorted.
|
* explicitly sorted.
|
||||||
@ -136,10 +143,13 @@ update_rels_pathlist_for_joins(Query *root, List *joinrels)
|
|||||||
* 4. Consider paths where both outer and inner relations must be
|
* 4. Consider paths where both outer and inner relations must be
|
||||||
* hashed before being joined.
|
* hashed before being joined.
|
||||||
*/
|
*/
|
||||||
pathlist = add_pathlist(joinrel, pathlist,
|
if (_enable_hashjoin_)
|
||||||
hash_inner_and_outer(joinrel, outerrel,
|
pathlist = add_pathlist(joinrel, pathlist,
|
||||||
innerrel, hashinfo_list));
|
hash_inner_and_outer(root, joinrel,
|
||||||
|
outerrel,
|
||||||
|
innerrel));
|
||||||
|
|
||||||
|
/* Save the completed pathlist in the join rel */
|
||||||
joinrel->pathlist = pathlist;
|
joinrel->pathlist = pathlist;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -488,70 +498,124 @@ match_unsorted_inner(RelOptInfo *joinrel,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* hash_inner_and_outer-- XXX HASH
|
* hash_inner_and_outer
|
||||||
* Create hashjoin join paths by explicitly hashing both the outer and
|
* Create hashjoin join paths by explicitly hashing both the outer and
|
||||||
* inner join relations on each available hash op.
|
* inner join relations of each available hash clause.
|
||||||
*
|
*
|
||||||
* 'joinrel' is the join relation
|
* 'joinrel' is the join relation
|
||||||
* 'outerrel' is the outer join relation
|
* 'outerrel' is the outer join relation
|
||||||
* 'innerrel' is the inner join relation
|
* 'innerrel' is the inner join relation
|
||||||
* 'hashinfo_list' is a list of nodes containing info on(hashjoinable)
|
|
||||||
* clauses for joining the relations
|
|
||||||
*
|
*
|
||||||
* Returns a list of hashjoin paths.
|
* Returns a list of hashjoin paths.
|
||||||
*/
|
*/
|
||||||
static List *
|
static List *
|
||||||
hash_inner_and_outer(RelOptInfo *joinrel,
|
hash_inner_and_outer(Query *root,
|
||||||
|
RelOptInfo *joinrel,
|
||||||
RelOptInfo *outerrel,
|
RelOptInfo *outerrel,
|
||||||
RelOptInfo *innerrel,
|
RelOptInfo *innerrel)
|
||||||
List *hashinfo_list)
|
|
||||||
{
|
{
|
||||||
List *hjoin_list = NIL;
|
List *hpath_list = NIL;
|
||||||
List *i;
|
List *i;
|
||||||
|
|
||||||
foreach(i, hashinfo_list)
|
foreach(i, joinrel->restrictinfo)
|
||||||
{
|
{
|
||||||
HashInfo *xhashinfo = (HashInfo *) lfirst(i);
|
RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(i);
|
||||||
List *outerkeys;
|
Oid hashjoinop = restrictinfo->hashjoinoperator;
|
||||||
List *innerkeys;
|
|
||||||
List *hash_pathkeys;
|
|
||||||
HashPath *temp_node;
|
|
||||||
|
|
||||||
outerkeys = make_pathkeys_from_joinkeys(
|
/* we consider only clauses previously marked hashjoinable */
|
||||||
((JoinMethod *) xhashinfo)->jmkeys,
|
if (hashjoinop)
|
||||||
outerrel->targetlist,
|
{
|
||||||
OUTER);
|
Expr *clause = restrictinfo->clause;
|
||||||
innerkeys = make_pathkeys_from_joinkeys(
|
Var *leftop = get_leftop(clause);
|
||||||
((JoinMethod *) xhashinfo)->jmkeys,
|
Var *rightop = get_rightop(clause);
|
||||||
innerrel->targetlist,
|
JoinKey *joinkey = makeNode(JoinKey);
|
||||||
INNER);
|
List *joinkey_list;
|
||||||
|
List *outerkeys;
|
||||||
|
List *innerkeys;
|
||||||
|
Cost innerdisbursion;
|
||||||
|
List *hash_pathkeys;
|
||||||
|
HashPath *hash_path;
|
||||||
|
|
||||||
/*
|
/* construct joinkey and pathkeys for this clause */
|
||||||
* We cannot assume that the output of the hashjoin appears in any
|
if (intMember(leftop->varno, innerrel->relids))
|
||||||
* particular order, so it should have NIL pathkeys.
|
{
|
||||||
*/
|
joinkey->outer = rightop;
|
||||||
#ifdef NOT_USED
|
joinkey->inner = leftop;
|
||||||
hash_pathkeys = new_join_pathkeys(outerkeys,
|
}
|
||||||
joinrel->targetlist,
|
else
|
||||||
((JoinMethod *) xhashinfo)->clauses);
|
{
|
||||||
#else
|
joinkey->outer = leftop;
|
||||||
hash_pathkeys = NIL;
|
joinkey->inner = rightop;
|
||||||
#endif
|
}
|
||||||
|
joinkey_list = lcons(joinkey, NIL);
|
||||||
|
|
||||||
temp_node = create_hashjoin_path(joinrel,
|
outerkeys = make_pathkeys_from_joinkeys(joinkey_list,
|
||||||
outerrel->size,
|
outerrel->targetlist,
|
||||||
innerrel->size,
|
OUTER);
|
||||||
outerrel->width,
|
innerkeys = make_pathkeys_from_joinkeys(joinkey_list,
|
||||||
innerrel->width,
|
innerrel->targetlist,
|
||||||
(Path *) outerrel->cheapestpath,
|
INNER);
|
||||||
(Path *) innerrel->cheapestpath,
|
|
||||||
hash_pathkeys,
|
innerdisbursion = estimate_disbursion(root, joinkey->inner);
|
||||||
xhashinfo->hashop,
|
|
||||||
((JoinMethod *) xhashinfo)->clauses,
|
/*
|
||||||
outerkeys,
|
* We cannot assume that the output of the hashjoin appears in
|
||||||
innerkeys);
|
* any particular order, so it should have NIL pathkeys.
|
||||||
hjoin_list = lappend(hjoin_list, temp_node);
|
*/
|
||||||
|
hash_pathkeys = NIL;
|
||||||
|
|
||||||
|
hash_path = create_hashjoin_path(joinrel,
|
||||||
|
outerrel->size,
|
||||||
|
innerrel->size,
|
||||||
|
outerrel->width,
|
||||||
|
innerrel->width,
|
||||||
|
(Path *) outerrel->cheapestpath,
|
||||||
|
(Path *) innerrel->cheapestpath,
|
||||||
|
hash_pathkeys,
|
||||||
|
hashjoinop,
|
||||||
|
lcons(clause, NIL),
|
||||||
|
outerkeys,
|
||||||
|
innerkeys,
|
||||||
|
innerdisbursion);
|
||||||
|
hpath_list = lappend(hpath_list, hash_path);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return hjoin_list;
|
return hpath_list;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Estimate disbursion of the specified Var
|
||||||
|
* Generate some kind of estimate, no matter what...
|
||||||
|
*
|
||||||
|
* We use a default of 0.1 if we can't figure out anything better.
|
||||||
|
* This will typically discourage use of a hash rather strongly,
|
||||||
|
* if the inner relation is large. We do not want to hash unless
|
||||||
|
* we know that the inner rel is well-dispersed (or the alternatives
|
||||||
|
* seem much worse).
|
||||||
|
*/
|
||||||
|
static Cost
|
||||||
|
estimate_disbursion(Query *root, Var *var)
|
||||||
|
{
|
||||||
|
Oid relid;
|
||||||
|
HeapTuple atp;
|
||||||
|
double disbursion;
|
||||||
|
|
||||||
|
if (! IsA(var, Var))
|
||||||
|
return 0.1;
|
||||||
|
|
||||||
|
relid = getrelid(var->varno, root->rtable);
|
||||||
|
|
||||||
|
atp = SearchSysCacheTuple(ATTNUM,
|
||||||
|
ObjectIdGetDatum(relid),
|
||||||
|
Int16GetDatum(var->varattno),
|
||||||
|
0, 0);
|
||||||
|
if (! HeapTupleIsValid(atp))
|
||||||
|
return 0.1;
|
||||||
|
|
||||||
|
disbursion = ((Form_pg_attribute) GETSTRUCT(atp))->attdisbursion;
|
||||||
|
if (disbursion > 0.0)
|
||||||
|
return disbursion;
|
||||||
|
|
||||||
|
return 0.1;
|
||||||
}
|
}
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.52 1999/07/30 22:34:19 tgl Exp $
|
* $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.53 1999/08/06 04:00:17 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -550,7 +550,7 @@ create_mergejoin_path(RelOptInfo *joinrel,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* create_hashjoin_path-- XXX HASH
|
* create_hashjoin_path
|
||||||
* Creates a pathnode corresponding to a hash join between two relations.
|
* Creates a pathnode corresponding to a hash join between two relations.
|
||||||
*
|
*
|
||||||
* 'joinrel' is the join relation
|
* 'joinrel' is the join relation
|
||||||
@ -558,13 +558,14 @@ create_mergejoin_path(RelOptInfo *joinrel,
|
|||||||
* 'innersize' is the number of tuples in the inner relation
|
* 'innersize' is the number of tuples in the inner relation
|
||||||
* 'outerwidth' is the number of bytes per tuple in the outer relation
|
* 'outerwidth' is the number of bytes per tuple in the outer relation
|
||||||
* 'innerwidth' is the number of bytes per tuple in the inner relation
|
* 'innerwidth' is the number of bytes per tuple in the inner relation
|
||||||
* 'outer_path' is the outer path
|
* 'outer_path' is the cheapest outer path
|
||||||
* 'inner_path' is the inner path
|
* 'inner_path' is the cheapest inner path
|
||||||
* 'pathkeys' are the new keys of the join relation
|
* 'pathkeys' are the path keys of the new join path
|
||||||
* 'operator' is the hashjoin operator
|
* 'operator' is the hashjoin operator
|
||||||
* 'hashclauses' are the applicable join/restriction clauses
|
* 'hashclauses' is a list of the hash join clause (always a 1-element list)
|
||||||
* 'outerkeys' are the sort varkeys for the outer relation
|
* 'outerkeys' are the sort varkeys for the outer relation
|
||||||
* 'innerkeys' are the sort varkeys for the inner relation
|
* 'innerkeys' are the sort varkeys for the inner relation
|
||||||
|
* 'innerdisbursion' is an estimate of the disbursion of the inner hash key
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
HashPath *
|
HashPath *
|
||||||
@ -579,7 +580,8 @@ create_hashjoin_path(RelOptInfo *joinrel,
|
|||||||
Oid operator,
|
Oid operator,
|
||||||
List *hashclauses,
|
List *hashclauses,
|
||||||
List *outerkeys,
|
List *outerkeys,
|
||||||
List *innerkeys)
|
List *innerkeys,
|
||||||
|
Cost innerdisbursion)
|
||||||
{
|
{
|
||||||
HashPath *pathnode = makeNode(HashPath);
|
HashPath *pathnode = makeNode(HashPath);
|
||||||
|
|
||||||
@ -600,10 +602,9 @@ create_hashjoin_path(RelOptInfo *joinrel,
|
|||||||
pathnode->innerhashkeys = innerkeys;
|
pathnode->innerhashkeys = innerkeys;
|
||||||
pathnode->jpath.path.path_cost = cost_hashjoin(outer_path->path_cost,
|
pathnode->jpath.path.path_cost = cost_hashjoin(outer_path->path_cost,
|
||||||
inner_path->path_cost,
|
inner_path->path_cost,
|
||||||
outerkeys,
|
|
||||||
innerkeys,
|
|
||||||
outersize, innersize,
|
outersize, innersize,
|
||||||
outerwidth, innerwidth);
|
outerwidth, innerwidth,
|
||||||
|
innerdisbursion);
|
||||||
|
|
||||||
return pathnode;
|
return pathnode;
|
||||||
}
|
}
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
*
|
*
|
||||||
* Copyright (c) 1994, Regents of the University of California
|
* Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* $Id: cost.h,v 1.22 1999/07/24 23:21:05 tgl Exp $
|
* $Id: cost.h,v 1.23 1999/08/06 04:00:13 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -28,7 +28,6 @@
|
|||||||
extern bool _enable_seqscan_;
|
extern bool _enable_seqscan_;
|
||||||
extern bool _enable_indexscan_;
|
extern bool _enable_indexscan_;
|
||||||
extern bool _enable_sort_;
|
extern bool _enable_sort_;
|
||||||
extern bool _enable_hash_;
|
|
||||||
extern bool _enable_nestloop_;
|
extern bool _enable_nestloop_;
|
||||||
extern bool _enable_mergejoin_;
|
extern bool _enable_mergejoin_;
|
||||||
extern bool _enable_hashjoin_;
|
extern bool _enable_hashjoin_;
|
||||||
@ -43,9 +42,10 @@ extern Cost cost_nestloop(Cost outercost, Cost innercost, int outertuples,
|
|||||||
extern Cost cost_mergejoin(Cost outercost, Cost innercost,
|
extern Cost cost_mergejoin(Cost outercost, Cost innercost,
|
||||||
List *outersortkeys, List *innersortkeys,
|
List *outersortkeys, List *innersortkeys,
|
||||||
int outersize, int innersize, int outerwidth, int innerwidth);
|
int outersize, int innersize, int outerwidth, int innerwidth);
|
||||||
extern Cost cost_hashjoin(Cost outercost, Cost innercost, List *outerkeys,
|
extern Cost cost_hashjoin(Cost outercost, Cost innercost,
|
||||||
List *innerkeys, int outersize, int innersize,
|
int outersize, int innersize,
|
||||||
int outerwidth, int innerwidth);
|
int outerwidth, int innerwidth,
|
||||||
|
Cost innerdisbursion);
|
||||||
extern int compute_rel_size(RelOptInfo *rel);
|
extern int compute_rel_size(RelOptInfo *rel);
|
||||||
extern int compute_rel_width(RelOptInfo *rel);
|
extern int compute_rel_width(RelOptInfo *rel);
|
||||||
extern int compute_joinrel_size(JoinPath *joinpath);
|
extern int compute_joinrel_size(JoinPath *joinpath);
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
*
|
*
|
||||||
* Copyright (c) 1994, Regents of the University of California
|
* Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* $Id: pathnode.h,v 1.19 1999/07/30 04:07:22 tgl Exp $
|
* $Id: pathnode.h,v 1.20 1999/08/06 04:00:13 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -33,9 +33,9 @@ extern MergePath *create_mergejoin_path(RelOptInfo *joinrel, int outersize,
|
|||||||
List *mergeclauses, List *outersortkeys, List *innersortkeys);
|
List *mergeclauses, List *outersortkeys, List *innersortkeys);
|
||||||
|
|
||||||
extern HashPath *create_hashjoin_path(RelOptInfo *joinrel, int outersize,
|
extern HashPath *create_hashjoin_path(RelOptInfo *joinrel, int outersize,
|
||||||
int innersize, int outerwidth, int innerwidth, Path *outer_path,
|
int innersize, int outerwidth, int innerwidth, Path *outer_path,
|
||||||
Path *inner_path, List *pathkeys, Oid operator, List *hashclauses,
|
Path *inner_path, List *pathkeys, Oid operator, List *hashclauses,
|
||||||
List *outerkeys, List *innerkeys);
|
List *outerkeys, List *innerkeys, Cost innerdisbursion);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* prototypes for rel.c
|
* prototypes for rel.c
|
||||||
|
Loading…
Reference in New Issue
Block a user