Fix potential overflow problems when relation size exceeds

2gig.  Fix failure to reliably put the smaller relation on the inside of
a hashjoin.
This commit is contained in:
Tom Lane 1999-04-05 02:07:07 +00:00
parent 2e7ef7477c
commit e91f43a122
1 changed files with 39 additions and 18 deletions

View File

@ -7,7 +7,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.33 1999/02/15 03:22:04 momjian Exp $ * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.34 1999/04/05 02:07:07 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -37,6 +37,7 @@
extern int NBuffers; extern int NBuffers;
static int compute_attribute_width(TargetEntry *tlistentry); static int compute_attribute_width(TargetEntry *tlistentry);
static double relation_byte_size (int tuples, int width);
static double base_log(double x, double b); static double base_log(double x, double b);
static int compute_targetlist_width(List *targetlist); static int compute_targetlist_width(List *targetlist);
@ -323,27 +324,35 @@ cost_hashjoin(Cost outercost,
Cost temp = 0; Cost temp = 0;
int outerpages = page_size(outersize, outerwidth); int outerpages = page_size(outersize, outerwidth);
int innerpages = page_size(innersize, innerwidth); int innerpages = page_size(innersize, innerwidth);
int nrun = ceil((double) outerpages / (double) NBuffers);
if (outerpages < innerpages)
return _disable_cost_;
if (!_enable_hashjoin_) if (!_enable_hashjoin_)
temp += _disable_cost_; temp += _disable_cost_;
/* /* Bias against putting larger relation on inside.
* temp += outercost + (nrun + 1) * innercost;
* *
* the innercost shouldn't be used it. Instead the cost of hashing the * Code used to use "outerpages < innerpages" but that has
* innerpath should be used * poor resolution when both relations are small.
*
* ASSUME innercost is 1 for now -- a horrible hack - jolly temp +=
* outercost + (nrun + 1);
*
* But we must add innercost to result. - vadim 04/24/97
*/ */
temp += outercost + innercost + (nrun + 1); if (relation_byte_size(outersize, outerwidth) <
relation_byte_size(innersize, innerwidth))
temp += _disable_cost_;
/* cost of source data */
temp += outercost + innercost;
/* cost of computing hash function: must do it once per tuple */
temp += _cpu_page_wight_ * (outersize + innersize);
/* cost of main-memory hashtable */
temp += (innerpages < NBuffers) ? innerpages : NBuffers;
/* if inner relation is too big then we will need to "batch" the join,
* which implies writing and reading most of the tuples to disk an
* extra time.
*/
if (innerpages > NBuffers)
temp += 2 * (outerpages + innerpages);
temp += _cpu_page_wight_ * (outersize + nrun * innersize);
Assert(temp >= 0); Assert(temp >= 0);
return temp; return temp;
@ -458,6 +467,19 @@ compute_joinrel_size(JoinPath *joinpath)
return temp1; return temp1;
} }
/*
* relation_byte_size
* Estimate the storage space in bytes for a given number of tuples
* of a given width (size in bytes).
* To avoid overflow with big relations, result is a double.
*/
static double
relation_byte_size (int tuples, int width)
{
return ((double) tuples) * ((double) (width + sizeof(HeapTupleData)));
}
/* /*
* page_size * page_size
* Returns an estimate of the number of pages covered by a given * Returns an estimate of the number of pages covered by a given
@ -466,10 +488,9 @@ compute_joinrel_size(JoinPath *joinpath)
int int
page_size(int tuples, int width) page_size(int tuples, int width)
{ {
int temp = 0; int temp;
temp = ceil((double) (tuples * (width + sizeof(HeapTupleData))) temp = (int) ceil(relation_byte_size(tuples, width) / BLCKSZ);
/ BLCKSZ);
Assert(temp >= 0); Assert(temp >= 0);
return temp; return temp;
} }