1996-07-09 08:22:35 +02:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
1999-02-14 00:22:53 +01:00
|
|
|
* costsize.c
|
1997-09-07 07:04:48 +02:00
|
|
|
* Routines to compute (and set) relation sizes and path costs
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
1999-08-06 06:00:17 +02:00
|
|
|
* Path costs are measured in units of disk accesses: one page fetch
|
|
|
|
* has cost 1. The other primitive unit is the CPU time required to
|
|
|
|
* process one tuple, which we set at "_cpu_page_weight_" of a page
|
|
|
|
* fetch. Obviously, the CPU time per tuple depends on the query
|
|
|
|
* involved, but the relative CPU and disk speeds of a given platform
|
|
|
|
* are so variable that we are lucky if we can get useful numbers
|
|
|
|
* at all. _cpu_page_weight_ is user-settable, in case a particular
|
|
|
|
* user is clueful enough to have a better-than-default estimate
|
|
|
|
* of the ratio for his platform. There is also _cpu_index_page_weight_,
|
|
|
|
* the cost to process a tuple of an index during an index scan.
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
1999-08-06 06:00:17 +02:00
|
|
|
*
|
|
|
|
* Copyright (c) 1994, Regents of the University of California
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
1999-11-23 21:07:06 +01:00
|
|
|
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.46 1999/11/23 20:06:54 momjian Exp $
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
1997-01-08 11:33:46 +01:00
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
#include <math.h>
|
1998-01-13 05:05:12 +01:00
|
|
|
|
1999-07-16 07:00:38 +02:00
|
|
|
#include "postgres.h"
|
1999-08-06 06:00:17 +02:00
|
|
|
|
1997-02-14 05:19:07 +01:00
|
|
|
#ifdef HAVE_LIMITS_H
|
1997-09-07 07:04:48 +02:00
|
|
|
#include <limits.h>
|
|
|
|
#ifndef MAXINT
|
|
|
|
#define MAXINT INT_MAX
|
|
|
|
#endif
|
1996-07-09 08:22:35 +02:00
|
|
|
#else
|
1997-09-07 07:04:48 +02:00
|
|
|
#ifdef HAVE_VALUES_H
|
|
|
|
#include <values.h>
|
|
|
|
#endif
|
|
|
|
#endif
|
1996-07-09 08:22:35 +02:00
|
|
|
|
1999-08-06 06:00:17 +02:00
|
|
|
#include "miscadmin.h"
|
1996-07-09 08:22:35 +02:00
|
|
|
#include "optimizer/cost.h"
|
|
|
|
#include "optimizer/internal.h"
|
|
|
|
#include "optimizer/tlist.h"
|
1998-01-13 05:05:12 +01:00
|
|
|
#include "utils/lsyscache.h"
|
1996-07-09 08:22:35 +02:00
|
|
|
|
1996-10-31 06:58:01 +01:00
|
|
|
|
1999-08-06 06:00:17 +02:00
|
|
|
static int compute_targetlist_width(List *targetlist);
|
1997-09-08 23:56:23 +02:00
|
|
|
static int compute_attribute_width(TargetEntry *tlistentry);
|
1999-05-25 18:15:34 +02:00
|
|
|
static double relation_byte_size(int tuples, int width);
|
1997-09-08 04:41:22 +02:00
|
|
|
static double base_log(double x, double b);
|
1999-08-06 06:00:17 +02:00
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
|
1997-09-08 04:41:22 +02:00
|
|
|
int _disable_cost_ = 30000000;
|
1996-07-09 08:22:35 +02:00
|
|
|
|
1997-09-08 04:41:22 +02:00
|
|
|
bool _enable_seqscan_ = true;
|
|
|
|
bool _enable_indexscan_ = true;
|
|
|
|
bool _enable_sort_ = true;
|
|
|
|
bool _enable_nestloop_ = true;
|
1998-08-04 18:44:31 +02:00
|
|
|
bool _enable_mergejoin_ = true;
|
1997-09-08 04:41:22 +02:00
|
|
|
bool _enable_hashjoin_ = true;
|
1999-11-23 21:07:06 +01:00
|
|
|
bool _enable_tidscan_ = true;
|
1997-04-09 04:13:41 +02:00
|
|
|
|
1999-07-07 11:27:28 +02:00
|
|
|
Cost _cpu_page_weight_ = _CPU_PAGE_WEIGHT_;
|
1999-07-07 11:36:45 +02:00
|
|
|
Cost _cpu_index_page_weight_ = _CPU_INDEX_PAGE_WEIGHT_;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
/*
|
1999-02-14 00:22:53 +01:00
|
|
|
* cost_seqscan
|
1997-09-07 07:04:48 +02:00
|
|
|
* Determines and returns the cost of scanning a relation sequentially.
|
|
|
|
* If the relation is a temporary to be materialized from a query
|
|
|
|
* embedded within a data field (determined by 'relid' containing an
|
|
|
|
* attribute reference), then a predetermined constant is returned (we
|
|
|
|
* have NO IDEA how big the result of a POSTQUEL procedure is going to
|
|
|
|
* be).
|
|
|
|
*
|
|
|
|
* disk = p
|
|
|
|
* cpu = *CPU-PAGE-WEIGHT* * t
|
|
|
|
*
|
1996-07-09 08:22:35 +02:00
|
|
|
* 'relid' is the relid of the relation to be scanned
|
|
|
|
* 'relpages' is the number of pages in the relation to be scanned
|
1997-09-07 07:04:48 +02:00
|
|
|
* (as determined from the system catalogs)
|
1996-07-09 08:22:35 +02:00
|
|
|
* 'reltuples' is the number of tuples in the relation to be scanned
|
1997-09-07 07:04:48 +02:00
|
|
|
*
|
1996-07-09 08:22:35 +02:00
|
|
|
* Returns a flonum.
|
1997-09-07 07:04:48 +02:00
|
|
|
*
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
|
|
|
Cost
|
|
|
|
cost_seqscan(int relid, int relpages, int reltuples)
|
|
|
|
{
|
1997-09-08 04:41:22 +02:00
|
|
|
Cost temp = 0;
|
1996-07-09 08:22:35 +02:00
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
if (!_enable_seqscan_)
|
|
|
|
temp += _disable_cost_;
|
1996-07-09 08:22:35 +02:00
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
if (relid < 0)
|
|
|
|
{
|
|
|
|
|
|
|
|
/*
|
|
|
|
* cost of sequentially scanning a materialized temporary relation
|
|
|
|
*/
|
1999-02-09 18:03:14 +01:00
|
|
|
temp += _NONAME_SCAN_COST_;
|
1997-09-07 07:04:48 +02:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
temp += relpages;
|
1999-07-07 11:11:15 +02:00
|
|
|
temp += _cpu_page_weight_ * reltuples;
|
1997-09-07 07:04:48 +02:00
|
|
|
}
|
|
|
|
Assert(temp >= 0);
|
1998-09-01 05:29:17 +02:00
|
|
|
return temp;
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
/*
|
1999-02-14 00:22:53 +01:00
|
|
|
* cost_index
|
1997-09-07 07:04:48 +02:00
|
|
|
* Determines and returns the cost of scanning a relation using an index.
|
|
|
|
*
|
|
|
|
* disk = expected-index-pages + expected-data-pages
|
|
|
|
* cpu = *CPU-PAGE-WEIGHT* *
|
|
|
|
* (expected-index-tuples + expected-data-tuples)
|
|
|
|
*
|
1996-07-09 08:22:35 +02:00
|
|
|
* 'indexid' is the index OID
|
|
|
|
* 'expected-indexpages' is the number of index pages examined in the scan
|
|
|
|
* 'selec' is the selectivity of the index
|
|
|
|
* 'relpages' is the number of pages in the main relation
|
|
|
|
* 'reltuples' is the number of tuples in the main relation
|
|
|
|
* 'indexpages' is the number of pages in the index relation
|
|
|
|
* 'indextuples' is the number of tuples in the index relation
|
1997-09-07 07:04:48 +02:00
|
|
|
*
|
1996-07-09 08:22:35 +02:00
|
|
|
* Returns a flonum.
|
1997-09-07 07:04:48 +02:00
|
|
|
*
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
|
|
|
Cost
|
|
|
|
cost_index(Oid indexid,
|
1997-09-07 07:04:48 +02:00
|
|
|
int expected_indexpages,
|
|
|
|
Cost selec,
|
|
|
|
int relpages,
|
|
|
|
int reltuples,
|
|
|
|
int indexpages,
|
|
|
|
int indextuples,
|
|
|
|
bool is_injoin)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
1999-04-30 06:01:44 +02:00
|
|
|
Cost temp = 0;
|
1996-07-09 08:22:35 +02:00
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
if (!_enable_indexscan_ && !is_injoin)
|
|
|
|
temp += _disable_cost_;
|
1996-07-09 08:22:35 +02:00
|
|
|
|
1999-05-25 18:15:34 +02:00
|
|
|
/*
|
|
|
|
* We want to be sure we estimate the cost of an index scan as more
|
|
|
|
* than the cost of a sequential scan (when selec == 1.0), even if we
|
|
|
|
* don't have good stats. So, disbelieve zero index size.
|
1999-04-30 06:01:44 +02:00
|
|
|
*/
|
|
|
|
if (expected_indexpages <= 0)
|
|
|
|
expected_indexpages = 1;
|
|
|
|
if (indextuples <= 0)
|
|
|
|
indextuples = 1;
|
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
/* expected index relation pages */
|
|
|
|
temp += expected_indexpages;
|
1996-07-09 08:22:35 +02:00
|
|
|
|
1999-05-25 18:15:34 +02:00
|
|
|
/*
|
|
|
|
* expected base relation pages XXX this isn't really right, since we
|
|
|
|
* will access the table nonsequentially and might have to fetch the
|
|
|
|
* same page more than once. This calculation assumes the buffer
|
|
|
|
* cache will prevent that from happening...
|
1999-04-30 06:01:44 +02:00
|
|
|
*/
|
|
|
|
temp += ceil(((double) selec) * ((double) relpages));
|
1996-07-09 08:22:35 +02:00
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
/* per index tuples */
|
1999-07-07 11:36:45 +02:00
|
|
|
temp += _cpu_index_page_weight_ * selec * indextuples;
|
1997-04-09 04:13:41 +02:00
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
/* per heap tuples */
|
1999-07-07 11:11:15 +02:00
|
|
|
temp += _cpu_page_weight_ * selec * reltuples;
|
1996-07-09 08:22:35 +02:00
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
Assert(temp >= 0);
|
1998-09-01 05:29:17 +02:00
|
|
|
return temp;
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
1999-11-23 21:07:06 +01:00
|
|
|
/*
|
|
|
|
* cost_tidscan
|
|
|
|
* Determines and returns the cost of scanning a relation using tid-s.
|
|
|
|
*
|
|
|
|
* disk = number of tids
|
|
|
|
* cpu = *CPU-PAGE-WEIGHT* * number_of_tids
|
|
|
|
*
|
|
|
|
* Returns a flonum.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
Cost
|
|
|
|
cost_tidscan(List *tideval)
|
|
|
|
{
|
|
|
|
Cost temp = 0;
|
|
|
|
|
|
|
|
if (!_enable_tidscan_)
|
|
|
|
temp += _disable_cost_;
|
|
|
|
|
|
|
|
temp += (1.0 + _cpu_page_weight_) * length(tideval);
|
|
|
|
|
|
|
|
return temp;
|
|
|
|
}
|
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
/*
|
1999-02-14 00:22:53 +01:00
|
|
|
* cost_sort
|
1997-09-07 07:04:48 +02:00
|
|
|
* Determines and returns the cost of sorting a relation by considering
|
1999-05-01 21:47:42 +02:00
|
|
|
* the cost of doing an external sort: XXX this is probably too low
|
1997-09-07 07:04:48 +02:00
|
|
|
* disk = (p lg p)
|
|
|
|
* cpu = *CPU-PAGE-WEIGHT* * (t lg t)
|
|
|
|
*
|
1999-02-10 04:52:54 +01:00
|
|
|
* 'pathkeys' is a list of sort keys
|
1996-07-09 08:22:35 +02:00
|
|
|
* 'tuples' is the number of tuples in the relation
|
|
|
|
* 'width' is the average tuple width in bytes
|
1999-04-30 06:01:44 +02:00
|
|
|
*
|
|
|
|
* NOTE: some callers currently pass NULL for pathkeys because they
|
1999-05-01 21:47:42 +02:00
|
|
|
* can't conveniently supply the sort keys. Since this routine doesn't
|
1999-04-30 06:01:44 +02:00
|
|
|
* currently do anything with pathkeys anyway, that doesn't matter...
|
1999-05-01 21:47:42 +02:00
|
|
|
* but if it ever does, it should react gracefully to lack of key data.
|
1997-09-07 07:04:48 +02:00
|
|
|
*
|
1996-07-09 08:22:35 +02:00
|
|
|
* Returns a flonum.
|
|
|
|
*/
|
|
|
|
Cost
|
1999-05-01 21:47:42 +02:00
|
|
|
cost_sort(List *pathkeys, int tuples, int width)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
1997-09-08 04:41:22 +02:00
|
|
|
Cost temp = 0;
|
|
|
|
int npages = page_size(tuples, width);
|
1999-04-30 06:01:44 +02:00
|
|
|
double log_npages;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
if (!_enable_sort_)
|
|
|
|
temp += _disable_cost_;
|
1999-04-30 06:01:44 +02:00
|
|
|
|
1999-05-25 18:15:34 +02:00
|
|
|
/*
|
|
|
|
* We want to be sure the cost of a sort is never estimated as zero,
|
|
|
|
* even if passed-in tuple count is zero. Besides, mustn't do
|
|
|
|
* log(0)...
|
1999-04-30 06:01:44 +02:00
|
|
|
*/
|
1999-05-01 21:47:42 +02:00
|
|
|
if (tuples <= 0)
|
|
|
|
tuples = 1;
|
1999-04-30 06:01:44 +02:00
|
|
|
if (npages <= 0)
|
|
|
|
npages = 1;
|
|
|
|
|
|
|
|
log_npages = ceil(base_log((double) npages, 2.0));
|
|
|
|
if (log_npages <= 0.0)
|
|
|
|
log_npages = 1.0;
|
|
|
|
|
|
|
|
temp += npages * log_npages;
|
1996-07-09 08:22:35 +02:00
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
/*
|
1999-05-01 21:47:42 +02:00
|
|
|
* could be base_log(tuples, NBuffers), but we are only doing 2-way
|
1997-09-07 07:04:48 +02:00
|
|
|
* merges
|
|
|
|
*/
|
1999-07-07 11:11:15 +02:00
|
|
|
temp += _cpu_page_weight_ * tuples * base_log((double) tuples, 2.0);
|
1999-04-30 06:01:44 +02:00
|
|
|
|
|
|
|
Assert(temp > 0);
|
1996-07-09 08:22:35 +02:00
|
|
|
|
1998-09-01 05:29:17 +02:00
|
|
|
return temp;
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
/*
|
1999-02-14 00:22:53 +01:00
|
|
|
* cost_result
|
1997-09-07 07:04:48 +02:00
|
|
|
* Determines and returns the cost of writing a relation of 'tuples'
|
|
|
|
* tuples of 'width' bytes out to a result relation.
|
|
|
|
*
|
1996-07-09 08:22:35 +02:00
|
|
|
* Returns a flonum.
|
|
|
|
*
|
|
|
|
*/
|
1997-08-19 23:40:56 +02:00
|
|
|
#ifdef NOT_USED
|
1996-07-09 08:22:35 +02:00
|
|
|
Cost
|
|
|
|
cost_result(int tuples, int width)
|
|
|
|
{
|
1997-09-08 04:41:22 +02:00
|
|
|
Cost temp = 0;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
temp = temp + page_size(tuples, width);
|
1999-07-07 11:11:15 +02:00
|
|
|
temp = temp + _cpu_page_weight_ * tuples;
|
1997-09-07 07:04:48 +02:00
|
|
|
Assert(temp >= 0);
|
1998-09-01 05:29:17 +02:00
|
|
|
return temp;
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
1997-09-07 07:04:48 +02:00
|
|
|
|
1997-08-19 23:40:56 +02:00
|
|
|
#endif
|
1996-07-09 08:22:35 +02:00
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
/*
|
1999-02-14 00:22:53 +01:00
|
|
|
* cost_nestloop
|
1997-09-07 07:04:48 +02:00
|
|
|
* Determines and returns the cost of joining two relations using the
|
|
|
|
* nested loop algorithm.
|
|
|
|
*
|
1996-07-09 08:22:35 +02:00
|
|
|
* 'outercost' is the (disk+cpu) cost of scanning the outer relation
|
|
|
|
* 'innercost' is the (disk+cpu) cost of scanning the inner relation
|
|
|
|
* 'outertuples' is the number of tuples in the outer relation
|
1997-09-07 07:04:48 +02:00
|
|
|
*
|
1996-07-09 08:22:35 +02:00
|
|
|
* Returns a flonum.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
Cost
|
|
|
|
cost_nestloop(Cost outercost,
|
1997-09-07 07:04:48 +02:00
|
|
|
Cost innercost,
|
|
|
|
int outertuples,
|
|
|
|
int innertuples,
|
|
|
|
int outerpages,
|
|
|
|
bool is_indexjoin)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
1997-09-08 04:41:22 +02:00
|
|
|
Cost temp = 0;
|
1996-07-09 08:22:35 +02:00
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
if (!_enable_nestloop_)
|
|
|
|
temp += _disable_cost_;
|
|
|
|
temp += outercost;
|
|
|
|
temp += outertuples * innercost;
|
|
|
|
Assert(temp >= 0);
|
1996-07-09 08:22:35 +02:00
|
|
|
|
1998-09-01 05:29:17 +02:00
|
|
|
return temp;
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
/*
|
1999-02-14 00:22:53 +01:00
|
|
|
* cost_mergejoin
|
1997-09-07 07:04:48 +02:00
|
|
|
* 'outercost' and 'innercost' are the (disk+cpu) costs of scanning the
|
|
|
|
* outer and inner relations
|
|
|
|
* 'outersortkeys' and 'innersortkeys' are lists of the keys to be used
|
1999-05-01 21:47:42 +02:00
|
|
|
* to sort the outer and inner relations (or NIL if no explicit
|
|
|
|
* sort is needed because the source path is already ordered)
|
1997-09-07 07:04:48 +02:00
|
|
|
* 'outertuples' and 'innertuples' are the number of tuples in the outer
|
|
|
|
* and inner relations
|
|
|
|
* 'outerwidth' and 'innerwidth' are the (typical) widths (in bytes)
|
|
|
|
* of the tuples of the outer and inner relations
|
|
|
|
*
|
1996-07-09 08:22:35 +02:00
|
|
|
* Returns a flonum.
|
1997-09-07 07:04:48 +02:00
|
|
|
*
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
|
|
|
Cost
|
1998-08-04 18:44:31 +02:00
|
|
|
cost_mergejoin(Cost outercost,
|
1997-09-07 07:04:48 +02:00
|
|
|
Cost innercost,
|
1997-09-08 23:56:23 +02:00
|
|
|
List *outersortkeys,
|
|
|
|
List *innersortkeys,
|
1997-09-07 07:04:48 +02:00
|
|
|
int outersize,
|
|
|
|
int innersize,
|
|
|
|
int outerwidth,
|
|
|
|
int innerwidth)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
1997-09-08 04:41:22 +02:00
|
|
|
Cost temp = 0;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
1998-08-04 18:44:31 +02:00
|
|
|
if (!_enable_mergejoin_)
|
1997-09-07 07:04:48 +02:00
|
|
|
temp += _disable_cost_;
|
|
|
|
|
|
|
|
temp += outercost;
|
|
|
|
temp += innercost;
|
1999-04-30 06:01:44 +02:00
|
|
|
if (outersortkeys) /* do we need to sort? */
|
1999-05-01 21:47:42 +02:00
|
|
|
temp += cost_sort(outersortkeys, outersize, outerwidth);
|
1999-04-30 06:01:44 +02:00
|
|
|
if (innersortkeys) /* do we need to sort? */
|
1999-05-01 21:47:42 +02:00
|
|
|
temp += cost_sort(innersortkeys, innersize, innerwidth);
|
1999-07-07 11:11:15 +02:00
|
|
|
temp += _cpu_page_weight_ * (outersize + innersize);
|
1999-04-30 06:01:44 +02:00
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
Assert(temp >= 0);
|
|
|
|
|
1998-09-01 05:29:17 +02:00
|
|
|
return temp;
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
/*
|
1999-08-06 06:00:17 +02:00
|
|
|
* cost_hashjoin
|
|
|
|
*
|
1997-09-07 07:04:48 +02:00
|
|
|
* 'outercost' and 'innercost' are the (disk+cpu) costs of scanning the
|
|
|
|
* outer and inner relations
|
|
|
|
* 'outersize' and 'innersize' are the number of tuples in the outer
|
|
|
|
* and inner relations
|
|
|
|
* 'outerwidth' and 'innerwidth' are the (typical) widths (in bytes)
|
|
|
|
* of the tuples of the outer and inner relations
|
1999-08-06 06:00:17 +02:00
|
|
|
* 'innerdisbursion' is an estimate of the disbursion statistic
|
|
|
|
* for the inner hash key.
|
1997-09-07 07:04:48 +02:00
|
|
|
*
|
1996-07-09 08:22:35 +02:00
|
|
|
* Returns a flonum.
|
|
|
|
*/
|
|
|
|
Cost
|
|
|
|
cost_hashjoin(Cost outercost,
|
1997-09-07 07:04:48 +02:00
|
|
|
Cost innercost,
|
|
|
|
int outersize,
|
|
|
|
int innersize,
|
|
|
|
int outerwidth,
|
1999-08-06 06:00:17 +02:00
|
|
|
int innerwidth,
|
|
|
|
Cost innerdisbursion)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
1997-09-08 04:41:22 +02:00
|
|
|
Cost temp = 0;
|
1999-08-06 06:00:17 +02:00
|
|
|
double outerbytes = relation_byte_size(outersize, outerwidth);
|
|
|
|
double innerbytes = relation_byte_size(innersize, innerwidth);
|
|
|
|
long hashtablebytes = SortMem * 1024L;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
if (!_enable_hashjoin_)
|
|
|
|
temp += _disable_cost_;
|
|
|
|
|
1999-04-05 04:07:07 +02:00
|
|
|
/* cost of source data */
|
|
|
|
temp += outercost + innercost;
|
|
|
|
|
|
|
|
/* cost of computing hash function: must do it once per tuple */
|
1999-07-07 11:11:15 +02:00
|
|
|
temp += _cpu_page_weight_ * (outersize + innersize);
|
1999-04-05 04:07:07 +02:00
|
|
|
|
1999-08-06 06:00:17 +02:00
|
|
|
/* the number of tuple comparisons needed is the number of outer
|
|
|
|
* tuples times the typical hash bucket size, which we estimate
|
|
|
|
* conservatively as the inner disbursion times the inner tuple
|
|
|
|
* count. The cost per comparison is set at _cpu_index_page_weight_;
|
|
|
|
* is that reasonable, or do we need another basic parameter?
|
|
|
|
*/
|
|
|
|
temp += _cpu_index_page_weight_ * outersize *
|
|
|
|
(innersize * innerdisbursion);
|
1999-04-05 04:07:07 +02:00
|
|
|
|
1999-05-25 18:15:34 +02:00
|
|
|
/*
|
|
|
|
* if inner relation is too big then we will need to "batch" the join,
|
1999-04-05 04:07:07 +02:00
|
|
|
* which implies writing and reading most of the tuples to disk an
|
1999-08-06 06:00:17 +02:00
|
|
|
* extra time. Charge one cost unit per page of I/O.
|
|
|
|
*/
|
|
|
|
if (innerbytes > hashtablebytes)
|
|
|
|
temp += 2 * (page_size(outersize, outerwidth) +
|
|
|
|
page_size(innersize, innerwidth));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Bias against putting larger relation on inside. We don't want
|
|
|
|
* an absolute prohibition, though, since larger relation might have
|
|
|
|
* better disbursion --- and we can't trust the size estimates
|
|
|
|
* unreservedly, anyway.
|
1999-04-05 04:07:07 +02:00
|
|
|
*/
|
1999-08-06 06:00:17 +02:00
|
|
|
if (innerbytes > outerbytes)
|
|
|
|
temp *= 1.1; /* is this an OK fudge factor? */
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
Assert(temp >= 0);
|
|
|
|
|
1998-09-01 05:29:17 +02:00
|
|
|
return temp;
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
/*
|
1999-02-14 00:22:53 +01:00
|
|
|
* compute_rel_size
|
|
|
|
* Computes the size of each relation in 'rel_list' (after applying
|
1997-09-07 07:04:48 +02:00
|
|
|
* restrictions), by multiplying the selectivity of each restriction
|
|
|
|
* by the original size of the relation.
|
|
|
|
*
|
|
|
|
* Sets the 'size' field for each relation entry with this computed size.
|
|
|
|
*
|
1996-07-09 08:22:35 +02:00
|
|
|
* Returns the size.
|
|
|
|
*/
|
1997-09-07 07:04:48 +02:00
|
|
|
int
|
1999-05-26 00:43:53 +02:00
|
|
|
compute_rel_size(RelOptInfo *rel)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
1997-09-08 04:41:22 +02:00
|
|
|
Cost temp;
|
|
|
|
int temp1;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
1999-02-03 21:15:53 +01:00
|
|
|
temp = rel->tuples * product_selec(rel->restrictinfo);
|
1997-09-07 07:04:48 +02:00
|
|
|
Assert(temp >= 0);
|
|
|
|
if (temp >= (MAXINT - 1))
|
|
|
|
temp1 = MAXINT;
|
|
|
|
else
|
|
|
|
temp1 = ceil((double) temp);
|
|
|
|
Assert(temp1 >= 0);
|
|
|
|
Assert(temp1 <= MAXINT);
|
1998-09-01 05:29:17 +02:00
|
|
|
return temp1;
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
/*
|
1999-02-14 00:22:53 +01:00
|
|
|
* compute_rel_width
|
1997-09-07 07:04:48 +02:00
|
|
|
* Computes the width in bytes of a tuple from 'rel'.
|
|
|
|
*
|
1996-07-09 08:22:35 +02:00
|
|
|
* Returns the width of the tuple as a fixnum.
|
|
|
|
*/
|
|
|
|
int
|
1999-05-26 00:43:53 +02:00
|
|
|
compute_rel_width(RelOptInfo *rel)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
1999-08-22 22:15:04 +02:00
|
|
|
return compute_targetlist_width(rel->targetlist);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
/*
|
1999-02-14 00:22:53 +01:00
|
|
|
* compute_targetlist_width
|
1997-09-07 07:04:48 +02:00
|
|
|
* Computes the width in bytes of a tuple made from 'targetlist'.
|
|
|
|
*
|
1996-07-09 08:22:35 +02:00
|
|
|
* Returns the width of the tuple as a fixnum.
|
|
|
|
*/
|
1997-08-19 23:40:56 +02:00
|
|
|
static int
|
1997-09-08 23:56:23 +02:00
|
|
|
compute_targetlist_width(List *targetlist)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
1997-09-08 04:41:22 +02:00
|
|
|
List *temp_tl;
|
|
|
|
int tuple_width = 0;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
foreach(temp_tl, targetlist)
|
|
|
|
{
|
1999-08-22 22:15:04 +02:00
|
|
|
tuple_width += compute_attribute_width(lfirst(temp_tl));
|
1997-09-07 07:04:48 +02:00
|
|
|
}
|
1998-09-01 05:29:17 +02:00
|
|
|
return tuple_width;
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
/*
|
1999-02-14 00:22:53 +01:00
|
|
|
* compute_attribute_width
|
1997-09-07 07:04:48 +02:00
|
|
|
* Given a target list entry, find the size in bytes of the attribute.
|
|
|
|
*
|
|
|
|
* If a field is variable-length, it is assumed to be at least the size
|
|
|
|
* of a TID field.
|
|
|
|
*
|
1996-07-09 08:22:35 +02:00
|
|
|
* Returns the width of the attribute as a fixnum.
|
|
|
|
*/
|
|
|
|
static int
|
1997-09-08 23:56:23 +02:00
|
|
|
compute_attribute_width(TargetEntry *tlistentry)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
1997-09-08 04:41:22 +02:00
|
|
|
int width = get_typlen(tlistentry->resdom->restype);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
if (width < 0)
|
1998-09-01 05:29:17 +02:00
|
|
|
return _DEFAULT_ATTRIBUTE_WIDTH_;
|
1997-09-07 07:04:48 +02:00
|
|
|
else
|
1998-09-01 05:29:17 +02:00
|
|
|
return width;
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
/*
|
1999-02-14 00:22:53 +01:00
|
|
|
* compute_joinrel_size
|
1997-09-07 07:04:48 +02:00
|
|
|
* Computes the size of the join relation 'joinrel'.
|
|
|
|
*
|
1996-07-09 08:22:35 +02:00
|
|
|
* Returns a fixnum.
|
|
|
|
*/
|
|
|
|
int
|
1999-02-12 18:25:05 +01:00
|
|
|
compute_joinrel_size(JoinPath *joinpath)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
1997-09-08 04:41:22 +02:00
|
|
|
Cost temp = 1.0;
|
|
|
|
int temp1 = 0;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
1999-02-15 04:22:37 +01:00
|
|
|
/* cartesian product */
|
1997-09-07 07:04:48 +02:00
|
|
|
temp *= ((Path *) joinpath->outerjoinpath)->parent->size;
|
|
|
|
temp *= ((Path *) joinpath->innerjoinpath)->parent->size;
|
|
|
|
|
1999-02-03 21:15:53 +01:00
|
|
|
temp = temp * product_selec(joinpath->pathinfo);
|
1999-05-25 18:15:34 +02:00
|
|
|
if (temp >= (MAXINT - 1) / 2)
|
1997-09-07 07:04:48 +02:00
|
|
|
{
|
1999-02-15 04:22:37 +01:00
|
|
|
/* if we exceed (MAXINT-1)/2, we switch to log scale */
|
|
|
|
/* +1 prevents log(0) */
|
1999-05-25 18:15:34 +02:00
|
|
|
temp1 = ceil(log(temp + 1 - (MAXINT - 1) / 2) + (MAXINT - 1) / 2);
|
1997-09-07 07:04:48 +02:00
|
|
|
}
|
1999-02-15 04:22:37 +01:00
|
|
|
else
|
|
|
|
temp1 = ceil((double) temp);
|
1997-09-07 07:04:48 +02:00
|
|
|
Assert(temp1 >= 0);
|
|
|
|
|
1998-09-01 05:29:17 +02:00
|
|
|
return temp1;
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
1999-04-05 04:07:07 +02:00
|
|
|
/*
|
|
|
|
* relation_byte_size
|
1999-05-25 18:15:34 +02:00
|
|
|
* Estimate the storage space in bytes for a given number of tuples
|
|
|
|
* of a given width (size in bytes).
|
|
|
|
* To avoid overflow with big relations, result is a double.
|
1999-04-05 04:07:07 +02:00
|
|
|
*/
|
|
|
|
|
|
|
|
static double
|
1999-05-25 18:15:34 +02:00
|
|
|
relation_byte_size(int tuples, int width)
|
1999-04-05 04:07:07 +02:00
|
|
|
{
|
|
|
|
return ((double) tuples) * ((double) (width + sizeof(HeapTupleData)));
|
|
|
|
}
|
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
/*
|
1999-02-14 00:22:53 +01:00
|
|
|
* page_size
|
1997-09-07 07:04:48 +02:00
|
|
|
* Returns an estimate of the number of pages covered by a given
|
|
|
|
* number of tuples of a given width (size in bytes).
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
1997-09-07 07:04:48 +02:00
|
|
|
int
|
|
|
|
page_size(int tuples, int width)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
1999-04-05 04:07:07 +02:00
|
|
|
int temp;
|
1996-07-09 08:22:35 +02:00
|
|
|
|
1999-04-05 04:07:07 +02:00
|
|
|
temp = (int) ceil(relation_byte_size(tuples, width) / BLCKSZ);
|
1997-09-07 07:04:48 +02:00
|
|
|
Assert(temp >= 0);
|
1998-09-01 05:29:17 +02:00
|
|
|
return temp;
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static double
|
|
|
|
base_log(double x, double b)
|
|
|
|
{
|
1998-09-01 05:29:17 +02:00
|
|
|
return log(x) / log(b);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|