mirror of
https://git.postgresql.org/git/postgresql.git
synced 2024-09-28 21:01:48 +02:00
Make planner compute the number of hash buckets the same way that
nodeHash.c will compute it (by sharing code).
This commit is contained in:
parent
ccda1a672c
commit
01a819abe3
@ -7,7 +7,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
*
|
||||
* $Id: nodeHash.c,v 1.57 2001/05/27 20:42:18 tgl Exp $
|
||||
* $Id: nodeHash.c,v 1.58 2001/06/11 00:17:07 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -16,14 +16,12 @@
|
||||
* ExecHash - generate an in-memory hash table of the relation
|
||||
* ExecInitHash - initialize node and subnodes
|
||||
* ExecEndHash - shutdown node and subnodes
|
||||
*
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "postgres.h"
|
||||
|
||||
#include "executor/execdebug.h"
|
||||
#include "executor/nodeHash.h"
|
||||
#include "executor/nodeHashjoin.h"
|
||||
@ -209,111 +207,27 @@ ExecEndHash(Hash *node)
|
||||
* create a hashtable in shared memory for hashjoin.
|
||||
* ----------------------------------------------------------------
|
||||
*/
|
||||
#define FUDGE_FAC 2.0
|
||||
|
||||
HashJoinTable
|
||||
ExecHashTableCreate(Hash *node)
|
||||
{
|
||||
Plan *outerNode;
|
||||
double ntuples;
|
||||
int tupsize;
|
||||
double inner_rel_bytes;
|
||||
double hash_table_bytes;
|
||||
int nbatch;
|
||||
HashJoinTable hashtable;
|
||||
int nbuckets;
|
||||
Plan *outerNode;
|
||||
int totalbuckets;
|
||||
int bucketsize;
|
||||
int nbuckets;
|
||||
int nbatch;
|
||||
int i;
|
||||
MemoryContext oldcxt;
|
||||
|
||||
/*
|
||||
* Get information about the size of the relation to be hashed (it's
|
||||
* the "outer" subtree of this node, but the inner relation of the
|
||||
* hashjoin).
|
||||
*
|
||||
* Caution: this is only the planner's estimates, and so can't be trusted
|
||||
* too far. Apply a healthy fudge factor.
|
||||
* hashjoin). Compute the appropriate size of the hash table.
|
||||
*/
|
||||
outerNode = outerPlan(node);
|
||||
ntuples = outerNode->plan_rows;
|
||||
if (ntuples <= 0.0) /* force a plausible size if no info */
|
||||
ntuples = 1000.0;
|
||||
|
||||
/*
|
||||
* estimate tupsize based on footprint of tuple in hashtable... but
|
||||
* what about palloc overhead?
|
||||
*/
|
||||
tupsize = MAXALIGN(outerNode->plan_width) +
|
||||
MAXALIGN(sizeof(HashJoinTupleData));
|
||||
inner_rel_bytes = ntuples * tupsize * FUDGE_FAC;
|
||||
ExecChooseHashTableSize(outerNode->plan_rows, outerNode->plan_width,
|
||||
&totalbuckets, &nbuckets, &nbatch);
|
||||
|
||||
/*
|
||||
* Target hashtable size is SortMem kilobytes, but not less than
|
||||
* sqrt(estimated inner rel size), so as to avoid horrible
|
||||
* performance.
|
||||
*/
|
||||
hash_table_bytes = sqrt(inner_rel_bytes);
|
||||
if (hash_table_bytes < (SortMem * 1024L))
|
||||
hash_table_bytes = SortMem * 1024L;
|
||||
|
||||
/*
|
||||
* Count the number of hash buckets we want for the whole relation,
|
||||
* for an average bucket load of NTUP_PER_BUCKET (per virtual
|
||||
* bucket!).
|
||||
*/
|
||||
totalbuckets = (int) ceil(ntuples * FUDGE_FAC / NTUP_PER_BUCKET);
|
||||
|
||||
/*
|
||||
* Count the number of buckets we think will actually fit in the
|
||||
* target memory size, at a loading of NTUP_PER_BUCKET (physical
|
||||
* buckets). NOTE: FUDGE_FAC here determines the fraction of the
|
||||
* hashtable space reserved to allow for nonuniform distribution of
|
||||
* hash values. Perhaps this should be a different number from the
|
||||
* other uses of FUDGE_FAC, but since we have no real good way to pick
|
||||
* either one...
|
||||
*/
|
||||
bucketsize = NTUP_PER_BUCKET * tupsize;
|
||||
nbuckets = (int) (hash_table_bytes / (bucketsize * FUDGE_FAC));
|
||||
if (nbuckets <= 0)
|
||||
nbuckets = 1;
|
||||
|
||||
if (totalbuckets <= nbuckets)
|
||||
{
|
||||
|
||||
/*
|
||||
* We have enough space, so no batching. In theory we could even
|
||||
* reduce nbuckets, but since that could lead to poor behavior if
|
||||
* estimated ntuples is much less than reality, it seems better to
|
||||
* make more buckets instead of fewer.
|
||||
*/
|
||||
totalbuckets = nbuckets;
|
||||
nbatch = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
/*
|
||||
* Need to batch; compute how many batches we want to use. Note
|
||||
* that nbatch doesn't have to have anything to do with the ratio
|
||||
* totalbuckets/nbuckets; in fact, it is the number of groups we
|
||||
* will use for the part of the data that doesn't fall into the
|
||||
* first nbuckets hash buckets.
|
||||
*/
|
||||
nbatch = (int) ceil((inner_rel_bytes - hash_table_bytes) /
|
||||
hash_table_bytes);
|
||||
if (nbatch <= 0)
|
||||
nbatch = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Now, totalbuckets is the number of (virtual) hashbuckets for the
|
||||
* whole relation, and nbuckets is the number of physical hashbuckets
|
||||
* we will use in the first pass. Data falling into the first
|
||||
* nbuckets virtual hashbuckets gets handled in the first pass;
|
||||
* everything else gets divided into nbatch batches to be processed in
|
||||
* additional passes.
|
||||
*/
|
||||
#ifdef HJDEBUG
|
||||
printf("nbatch = %d, totalbuckets = %d, nbuckets = %d\n",
|
||||
nbatch, totalbuckets, nbuckets);
|
||||
@ -407,6 +321,117 @@ ExecHashTableCreate(Hash *node)
|
||||
return hashtable;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Compute appropriate size for hashtable given the estimated size of the
|
||||
* relation to be hashed (number of rows and average row width).
|
||||
*
|
||||
* Caution: the input is only the planner's estimates, and so can't be
|
||||
* trusted too far. Apply a healthy fudge factor.
|
||||
*
|
||||
* This is exported so that the planner's costsize.c can use it.
|
||||
*/
|
||||
|
||||
/* Target bucket loading (tuples per bucket) */
|
||||
#define NTUP_PER_BUCKET 10
|
||||
/* Fudge factor to allow for inaccuracy of input estimates */
|
||||
#define FUDGE_FAC 2.0
|
||||
|
||||
void
|
||||
ExecChooseHashTableSize(double ntuples, int tupwidth,
|
||||
int *virtualbuckets,
|
||||
int *physicalbuckets,
|
||||
int *numbatches)
|
||||
{
|
||||
int tupsize;
|
||||
double inner_rel_bytes;
|
||||
double hash_table_bytes;
|
||||
int nbatch;
|
||||
int nbuckets;
|
||||
int totalbuckets;
|
||||
int bucketsize;
|
||||
|
||||
/* Force a plausible relation size if no info */
|
||||
if (ntuples <= 0.0)
|
||||
ntuples = 1000.0;
|
||||
|
||||
/*
|
||||
* Estimate tupsize based on footprint of tuple in hashtable... but
|
||||
* what about palloc overhead?
|
||||
*/
|
||||
tupsize = MAXALIGN(tupwidth) + MAXALIGN(sizeof(HashJoinTupleData));
|
||||
inner_rel_bytes = ntuples * tupsize * FUDGE_FAC;
|
||||
|
||||
/*
|
||||
* Target hashtable size is SortMem kilobytes, but not less than
|
||||
* sqrt(estimated inner rel size), so as to avoid horrible
|
||||
* performance.
|
||||
*/
|
||||
hash_table_bytes = sqrt(inner_rel_bytes);
|
||||
if (hash_table_bytes < (SortMem * 1024L))
|
||||
hash_table_bytes = SortMem * 1024L;
|
||||
|
||||
/*
|
||||
* Count the number of hash buckets we want for the whole relation,
|
||||
* for an average bucket load of NTUP_PER_BUCKET (per virtual
|
||||
* bucket!).
|
||||
*/
|
||||
totalbuckets = (int) ceil(ntuples * FUDGE_FAC / NTUP_PER_BUCKET);
|
||||
|
||||
/*
|
||||
* Count the number of buckets we think will actually fit in the
|
||||
* target memory size, at a loading of NTUP_PER_BUCKET (physical
|
||||
* buckets). NOTE: FUDGE_FAC here determines the fraction of the
|
||||
* hashtable space reserved to allow for nonuniform distribution of
|
||||
* hash values. Perhaps this should be a different number from the
|
||||
* other uses of FUDGE_FAC, but since we have no real good way to pick
|
||||
* either one...
|
||||
*/
|
||||
bucketsize = NTUP_PER_BUCKET * tupsize;
|
||||
nbuckets = (int) (hash_table_bytes / (bucketsize * FUDGE_FAC));
|
||||
if (nbuckets <= 0)
|
||||
nbuckets = 1;
|
||||
|
||||
if (totalbuckets <= nbuckets)
|
||||
{
|
||||
/*
|
||||
* We have enough space, so no batching. In theory we could even
|
||||
* reduce nbuckets, but since that could lead to poor behavior if
|
||||
* estimated ntuples is much less than reality, it seems better to
|
||||
* make more buckets instead of fewer.
|
||||
*/
|
||||
totalbuckets = nbuckets;
|
||||
nbatch = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* Need to batch; compute how many batches we want to use. Note
|
||||
* that nbatch doesn't have to have anything to do with the ratio
|
||||
* totalbuckets/nbuckets; in fact, it is the number of groups we
|
||||
* will use for the part of the data that doesn't fall into the
|
||||
* first nbuckets hash buckets.
|
||||
*/
|
||||
nbatch = (int) ceil((inner_rel_bytes - hash_table_bytes) /
|
||||
hash_table_bytes);
|
||||
if (nbatch <= 0)
|
||||
nbatch = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Now, totalbuckets is the number of (virtual) hashbuckets for the
|
||||
* whole relation, and nbuckets is the number of physical hashbuckets
|
||||
* we will use in the first pass. Data falling into the first
|
||||
* nbuckets virtual hashbuckets gets handled in the first pass;
|
||||
* everything else gets divided into nbatch batches to be processed in
|
||||
* additional passes.
|
||||
*/
|
||||
*virtualbuckets = totalbuckets;
|
||||
*physicalbuckets = nbuckets;
|
||||
*numbatches = nbatch;
|
||||
}
|
||||
|
||||
|
||||
/* ----------------------------------------------------------------
|
||||
* ExecHashTableDestroy
|
||||
*
|
||||
|
@ -42,7 +42,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.76 2001/06/10 02:59:35 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.77 2001/06/11 00:17:08 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -791,19 +791,19 @@ cost_hashjoin(Path *path, Query *root,
|
||||
* smart enough to figure out how the restrict clauses might change the
|
||||
* distribution, so this will have to do for now.
|
||||
*
|
||||
* The executor tries for average bucket loading of NTUP_PER_BUCKET by setting
|
||||
* number of buckets equal to ntuples / NTUP_PER_BUCKET, which would yield
|
||||
* a bucketsize fraction of NTUP_PER_BUCKET / ntuples. But that goal will
|
||||
* be reached only if the data values are uniformly distributed among the
|
||||
* buckets, which requires (a) at least ntuples / NTUP_PER_BUCKET distinct
|
||||
* data values, and (b) a not-too-skewed data distribution. Otherwise the
|
||||
* buckets will be nonuniformly occupied. If the other relation in the join
|
||||
* has a similar distribution, the most-loaded buckets are exactly those
|
||||
* that will be probed most often. Therefore, the "average" bucket size for
|
||||
* costing purposes should really be taken as something close to the "worst
|
||||
* case" bucket size. We try to estimate this by first scaling up if there
|
||||
* are too few distinct data values, and then scaling up again by the
|
||||
* ratio of the most common value's frequency to the average frequency.
|
||||
* We can get the number of buckets the executor will use for the given
|
||||
* input relation. If the data were perfectly distributed, with the same
|
||||
* number of tuples going into each available bucket, then the bucketsize
|
||||
* fraction would be 1/nbuckets. But this happy state of affairs will occur
|
||||
* only if (a) there are at least nbuckets distinct data values, and (b)
|
||||
* we have a not-too-skewed data distribution. Otherwise the buckets will
|
||||
* be nonuniformly occupied. If the other relation in the join has a key
|
||||
* distribution similar to this one's, then the most-loaded buckets are
|
||||
* exactly those that will be probed most often. Therefore, the "average"
|
||||
* bucket size for costing purposes should really be taken as something close
|
||||
* to the "worst case" bucket size. We try to estimate this by adjusting the
|
||||
* fraction if there are too few distinct data values, and then scaling up
|
||||
* by the ratio of the most common value's frequency to the average frequency.
|
||||
*
|
||||
* If no statistics are available, use a default estimate of 0.1. This will
|
||||
* discourage use of a hash rather strongly if the inner relation is large,
|
||||
@ -815,11 +815,13 @@ estimate_hash_bucketsize(Query *root, Var *var)
|
||||
{
|
||||
Oid relid;
|
||||
RelOptInfo *rel;
|
||||
int virtualbuckets;
|
||||
int physicalbuckets;
|
||||
int numbatches;
|
||||
HeapTuple tuple;
|
||||
Form_pg_statistic stats;
|
||||
double estfract,
|
||||
ndistinct,
|
||||
needdistinct,
|
||||
mcvfreq,
|
||||
avgfreq;
|
||||
float4 *numbers;
|
||||
@ -841,6 +843,12 @@ estimate_hash_bucketsize(Query *root, Var *var)
|
||||
if (rel->tuples <= 0.0 || rel->rows <= 0.0)
|
||||
return 0.1; /* ensure we can divide below */
|
||||
|
||||
/* Get hash table size that executor would use for this relation */
|
||||
ExecChooseHashTableSize(rel->rows, rel->width,
|
||||
&virtualbuckets,
|
||||
&physicalbuckets,
|
||||
&numbatches);
|
||||
|
||||
tuple = SearchSysCache(STATRELATT,
|
||||
ObjectIdGetDatum(relid),
|
||||
Int16GetDatum(var->varattno),
|
||||
@ -857,7 +865,7 @@ estimate_hash_bucketsize(Query *root, Var *var)
|
||||
case ObjectIdAttributeNumber:
|
||||
case SelfItemPointerAttributeNumber:
|
||||
/* these are unique, so buckets should be well-distributed */
|
||||
return (double) NTUP_PER_BUCKET / rel->rows;
|
||||
return 1.0 / (double) virtualbuckets;
|
||||
case TableOidAttributeNumber:
|
||||
/* hashing this is a terrible idea... */
|
||||
return 1.0;
|
||||
@ -873,6 +881,12 @@ estimate_hash_bucketsize(Query *root, Var *var)
|
||||
if (ndistinct < 0.0)
|
||||
ndistinct = -ndistinct * rel->tuples;
|
||||
|
||||
if (ndistinct <= 0.0) /* ensure we can divide */
|
||||
{
|
||||
ReleaseSysCache(tuple);
|
||||
return 0.1;
|
||||
}
|
||||
|
||||
/* Also compute avg freq of all distinct data values in raw relation */
|
||||
avgfreq = (1.0 - stats->stanullfrac) / ndistinct;
|
||||
|
||||
@ -887,20 +901,14 @@ estimate_hash_bucketsize(Query *root, Var *var)
|
||||
ndistinct *= rel->rows / rel->tuples;
|
||||
|
||||
/*
|
||||
* Form initial estimate of bucketsize fraction. Here we use rel->rows,
|
||||
* ie the number of rows after applying restriction clauses, because
|
||||
* that's what the fraction will eventually be multiplied by in
|
||||
* cost_heapjoin.
|
||||
* Initial estimate of bucketsize fraction is 1/nbuckets as long as
|
||||
* the number of buckets is less than the expected number of distinct
|
||||
* values; otherwise it is 1/ndistinct.
|
||||
*/
|
||||
estfract = (double) NTUP_PER_BUCKET / rel->rows;
|
||||
|
||||
/*
|
||||
* Adjust estimated bucketsize if too few distinct values (after
|
||||
* restriction clauses) to fill all the buckets.
|
||||
*/
|
||||
needdistinct = rel->rows / (double) NTUP_PER_BUCKET;
|
||||
if (ndistinct < needdistinct)
|
||||
estfract *= needdistinct / ndistinct;
|
||||
if (ndistinct > (double) virtualbuckets)
|
||||
estfract = 1.0 / (double) virtualbuckets;
|
||||
else
|
||||
estfract = 1.0 / ndistinct;
|
||||
|
||||
/*
|
||||
* Look up the frequency of the most common value, if available.
|
||||
|
@ -7,7 +7,7 @@
|
||||
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $Id: nodeHash.h,v 1.19 2001/03/22 04:00:44 momjian Exp $
|
||||
* $Id: nodeHash.h,v 1.20 2001/06/11 00:17:07 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -16,9 +16,6 @@
|
||||
|
||||
#include "nodes/plannodes.h"
|
||||
|
||||
/* NTUP_PER_BUCKET is exported because planner wants to see it */
|
||||
#define NTUP_PER_BUCKET 10
|
||||
|
||||
extern TupleTableSlot *ExecHash(Hash *node);
|
||||
extern bool ExecInitHash(Hash *node, EState *estate, Plan *parent);
|
||||
extern int ExecCountSlotsHash(Hash *node);
|
||||
@ -35,5 +32,9 @@ extern HeapTuple ExecScanHashBucket(HashJoinState *hjstate, List *hjclauses,
|
||||
ExprContext *econtext);
|
||||
extern void ExecHashTableReset(HashJoinTable hashtable, long ntuples);
|
||||
extern void ExecReScanHash(Hash *node, ExprContext *exprCtxt, Plan *parent);
|
||||
extern void ExecChooseHashTableSize(double ntuples, int tupwidth,
|
||||
int *virtualbuckets,
|
||||
int *physicalbuckets,
|
||||
int *numbatches);
|
||||
|
||||
#endif /* NODEHASH_H */
|
||||
|
Loading…
Reference in New Issue
Block a user