1996-07-09 08:22:35 +02:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
1999-02-14 00:22:53 +01:00
|
|
|
* costsize.c
|
1997-09-07 07:04:48 +02:00
|
|
|
* Routines to compute (and set) relation sizes and path costs
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
1999-08-06 06:00:17 +02:00
|
|
|
* Path costs are measured in units of disk accesses: one page fetch
|
|
|
|
* has cost 1. The other primitive unit is the CPU time required to
|
2000-01-23 00:50:30 +01:00
|
|
|
* process one tuple, which we set at "cpu_page_weight" of a page
|
1999-08-06 06:00:17 +02:00
|
|
|
* fetch. Obviously, the CPU time per tuple depends on the query
|
|
|
|
* involved, but the relative CPU and disk speeds of a given platform
|
|
|
|
* are so variable that we are lucky if we can get useful numbers
|
2000-01-23 00:50:30 +01:00
|
|
|
* at all. cpu_page_weight is user-settable, in case a particular
|
1999-08-06 06:00:17 +02:00
|
|
|
* user is clueful enough to have a better-than-default estimate
|
2000-01-23 00:50:30 +01:00
|
|
|
* of the ratio for his platform. There is also cpu_index_page_weight,
|
1999-08-06 06:00:17 +02:00
|
|
|
* the cost to process a tuple of an index during an index scan.
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
1999-08-06 06:00:17 +02:00
|
|
|
*
|
2000-01-26 06:58:53 +01:00
|
|
|
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc
|
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
2000-02-07 05:41:04 +01:00
|
|
|
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.51 2000/02/07 04:40:59 tgl Exp $
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
1997-01-08 11:33:46 +01:00
|
|
|
|
1999-07-16 07:00:38 +02:00
|
|
|
#include "postgres.h"
|
1999-08-06 06:00:17 +02:00
|
|
|
|
2000-01-09 01:26:47 +01:00
|
|
|
#include <math.h>
|
1997-02-14 05:19:07 +01:00
|
|
|
#ifdef HAVE_LIMITS_H
|
1997-09-07 07:04:48 +02:00
|
|
|
#include <limits.h>
|
|
|
|
#ifndef MAXINT
|
|
|
|
#define MAXINT INT_MAX
|
|
|
|
#endif
|
1996-07-09 08:22:35 +02:00
|
|
|
#else
|
1997-09-07 07:04:48 +02:00
|
|
|
#ifdef HAVE_VALUES_H
|
|
|
|
#include <values.h>
|
|
|
|
#endif
|
|
|
|
#endif
|
1996-07-09 08:22:35 +02:00
|
|
|
|
1999-08-06 06:00:17 +02:00
|
|
|
#include "miscadmin.h"
|
1996-07-09 08:22:35 +02:00
|
|
|
#include "optimizer/cost.h"
|
|
|
|
#include "optimizer/internal.h"
|
|
|
|
#include "optimizer/tlist.h"
|
1998-01-13 05:05:12 +01:00
|
|
|
#include "utils/lsyscache.h"
|
1996-07-09 08:22:35 +02:00
|
|
|
|
1996-10-31 06:58:01 +01:00
|
|
|
|
2000-01-23 00:50:30 +01:00
|
|
|
Cost cpu_page_weight = CPU_PAGE_WEIGHT;
|
|
|
|
Cost cpu_index_page_weight = CPU_INDEX_PAGE_WEIGHT;
|
|
|
|
|
|
|
|
Cost disable_cost = 100000000.0;
|
|
|
|
|
|
|
|
bool enable_seqscan = true;
|
|
|
|
bool enable_indexscan = true;
|
|
|
|
bool enable_tidscan = true;
|
|
|
|
bool enable_sort = true;
|
|
|
|
bool enable_nestloop = true;
|
|
|
|
bool enable_mergejoin = true;
|
|
|
|
bool enable_hashjoin = true;
|
|
|
|
|
|
|
|
|
2000-01-09 01:26:47 +01:00
|
|
|
static void set_rel_width(Query *root, RelOptInfo *rel);
|
1997-09-08 23:56:23 +02:00
|
|
|
static int compute_attribute_width(TargetEntry *tlistentry);
|
2000-01-09 01:26:47 +01:00
|
|
|
static double relation_byte_size(double tuples, int width);
|
|
|
|
static double page_size(double tuples, int width);
|
1997-09-08 04:41:22 +02:00
|
|
|
static double base_log(double x, double b);
|
1999-08-06 06:00:17 +02:00
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
/*
|
1999-02-14 00:22:53 +01:00
|
|
|
* cost_seqscan
|
1997-09-07 07:04:48 +02:00
|
|
|
* Determines and returns the cost of scanning a relation sequentially.
|
|
|
|
* If the relation is a temporary to be materialized from a query
|
|
|
|
* embedded within a data field (determined by 'relid' containing an
|
|
|
|
* attribute reference), then a predetermined constant is returned (we
|
|
|
|
* have NO IDEA how big the result of a POSTQUEL procedure is going to
|
|
|
|
* be).
|
|
|
|
*
|
|
|
|
* disk = p
|
2000-01-09 01:26:47 +01:00
|
|
|
* cpu = CPU-PAGE-WEIGHT * t
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
|
|
|
Cost
|
2000-01-09 01:26:47 +01:00
|
|
|
cost_seqscan(RelOptInfo *baserel)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
1997-09-08 04:41:22 +02:00
|
|
|
Cost temp = 0;
|
1996-07-09 08:22:35 +02:00
|
|
|
|
2000-01-09 01:26:47 +01:00
|
|
|
/* Should only be applied to base relations */
|
|
|
|
Assert(length(baserel->relids) == 1);
|
|
|
|
|
2000-01-23 00:50:30 +01:00
|
|
|
if (!enable_seqscan)
|
|
|
|
temp += disable_cost;
|
1996-07-09 08:22:35 +02:00
|
|
|
|
2000-01-09 01:26:47 +01:00
|
|
|
if (lfirsti(baserel->relids) < 0)
|
1997-09-07 07:04:48 +02:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
* cost of sequentially scanning a materialized temporary relation
|
|
|
|
*/
|
1999-02-09 18:03:14 +01:00
|
|
|
temp += _NONAME_SCAN_COST_;
|
1997-09-07 07:04:48 +02:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2000-01-09 01:26:47 +01:00
|
|
|
temp += baserel->pages;
|
2000-01-23 00:50:30 +01:00
|
|
|
temp += cpu_page_weight * baserel->tuples;
|
1997-09-07 07:04:48 +02:00
|
|
|
}
|
2000-01-09 01:26:47 +01:00
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
Assert(temp >= 0);
|
1998-09-01 05:29:17 +02:00
|
|
|
return temp;
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
/*
|
1999-02-14 00:22:53 +01:00
|
|
|
* cost_index
|
1997-09-07 07:04:48 +02:00
|
|
|
* Determines and returns the cost of scanning a relation using an index.
|
|
|
|
*
|
2000-01-23 00:50:30 +01:00
|
|
|
* NOTE: an indexscan plan node can actually represent several passes,
|
|
|
|
* but here we consider the cost of just one pass.
|
2000-01-09 01:26:47 +01:00
|
|
|
*
|
2000-01-23 00:50:30 +01:00
|
|
|
* 'root' is the query root
|
2000-01-09 01:26:47 +01:00
|
|
|
* 'baserel' is the base relation the index is for
|
|
|
|
* 'index' is the index to be used
|
2000-01-23 00:50:30 +01:00
|
|
|
* 'indexQuals' is the list of applicable qual clauses (implicit AND semantics)
|
2000-01-09 01:26:47 +01:00
|
|
|
* 'is_injoin' is T if we are considering using the index scan as the inside
|
|
|
|
* of a nestloop join.
|
|
|
|
*
|
2000-01-23 00:50:30 +01:00
|
|
|
* NOTE: 'indexQuals' must contain only clauses usable as index restrictions.
|
|
|
|
* Any additional quals evaluated as qpquals may reduce the number of returned
|
|
|
|
* tuples, but they won't reduce the number of tuples we have to fetch from
|
|
|
|
* the table, so they don't reduce the scan cost.
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
|
|
|
Cost
|
2000-01-23 00:50:30 +01:00
|
|
|
cost_index(Query *root,
|
|
|
|
RelOptInfo *baserel,
|
2000-01-09 01:26:47 +01:00
|
|
|
IndexOptInfo *index,
|
2000-01-23 00:50:30 +01:00
|
|
|
List *indexQuals,
|
1997-09-07 07:04:48 +02:00
|
|
|
bool is_injoin)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
1999-04-30 06:01:44 +02:00
|
|
|
Cost temp = 0;
|
2000-01-23 00:50:30 +01:00
|
|
|
Cost indexAccessCost;
|
|
|
|
Selectivity indexSelectivity;
|
|
|
|
double reltuples;
|
2000-01-09 01:26:47 +01:00
|
|
|
double relpages;
|
|
|
|
|
|
|
|
/* Should only be applied to base relations */
|
|
|
|
Assert(IsA(baserel, RelOptInfo) && IsA(index, IndexOptInfo));
|
|
|
|
Assert(length(baserel->relids) == 1);
|
1996-07-09 08:22:35 +02:00
|
|
|
|
2000-01-23 00:50:30 +01:00
|
|
|
if (!enable_indexscan && !is_injoin)
|
|
|
|
temp += disable_cost;
|
1996-07-09 08:22:35 +02:00
|
|
|
|
1999-05-25 18:15:34 +02:00
|
|
|
/*
|
2000-01-23 00:50:30 +01:00
|
|
|
* Call index-access-method-specific code to estimate the processing
|
|
|
|
* cost for scanning the index, as well as the selectivity of the index
|
|
|
|
* (ie, the fraction of main-table tuples we will have to retrieve).
|
1999-04-30 06:01:44 +02:00
|
|
|
*/
|
2000-01-23 00:50:30 +01:00
|
|
|
fmgr(index->amcostestimate, root, baserel, index, indexQuals,
|
|
|
|
&indexAccessCost, &indexSelectivity);
|
1999-04-30 06:01:44 +02:00
|
|
|
|
2000-01-23 00:50:30 +01:00
|
|
|
/* all costs for touching index itself included here */
|
|
|
|
temp += indexAccessCost;
|
1996-07-09 08:22:35 +02:00
|
|
|
|
2000-01-09 01:26:47 +01:00
|
|
|
/*--------------------
|
2000-01-23 00:50:30 +01:00
|
|
|
* Estimate number of main-table tuples and pages touched.
|
2000-01-09 01:26:47 +01:00
|
|
|
*
|
|
|
|
* Worst case is that each tuple the index tells us to fetch comes
|
|
|
|
* from a different base-rel page, in which case the I/O cost would be
|
|
|
|
* 'reltuples' pages. In practice we can expect the number of page
|
|
|
|
* fetches to be reduced by the buffer cache, because more than one
|
|
|
|
* tuple can be retrieved per page fetched. Currently, we estimate
|
|
|
|
* the number of pages to be retrieved as
|
|
|
|
* MIN(reltuples, relpages)
|
|
|
|
* This amounts to assuming that the buffer cache is perfectly efficient
|
|
|
|
* and never ends up reading the same page twice within one scan, which
|
|
|
|
* of course is too optimistic. On the other hand, we are assuming that
|
|
|
|
* the target tuples are perfectly uniformly distributed across the
|
|
|
|
* relation's pages, which is too pessimistic --- any nonuniformity of
|
|
|
|
* distribution will reduce the number of pages we have to fetch.
|
|
|
|
* So, we guess-and-hope that these sources of error will more or less
|
|
|
|
* balance out.
|
|
|
|
*
|
2000-01-23 00:50:30 +01:00
|
|
|
* XXX need to add a penalty for nonsequential page fetches.
|
|
|
|
*
|
2000-01-09 01:26:47 +01:00
|
|
|
* XXX if the relation has recently been "clustered" using this index,
|
|
|
|
* then in fact the target tuples will be highly nonuniformly distributed,
|
|
|
|
* and we will be seriously overestimating the scan cost! Currently we
|
|
|
|
* have no way to know whether the relation has been clustered, nor how
|
|
|
|
* much it's been modified since the last clustering, so we ignore this
|
|
|
|
* effect. Would be nice to do better someday.
|
|
|
|
*--------------------
|
1999-04-30 06:01:44 +02:00
|
|
|
*/
|
2000-01-23 00:50:30 +01:00
|
|
|
|
|
|
|
reltuples = indexSelectivity * baserel->tuples;
|
|
|
|
|
2000-01-09 01:26:47 +01:00
|
|
|
relpages = reltuples;
|
|
|
|
if (baserel->pages > 0 && baserel->pages < relpages)
|
|
|
|
relpages = baserel->pages;
|
1996-07-09 08:22:35 +02:00
|
|
|
|
2000-01-23 00:50:30 +01:00
|
|
|
/* disk costs for main table */
|
|
|
|
temp += relpages;
|
1997-04-09 04:13:41 +02:00
|
|
|
|
2000-01-23 00:50:30 +01:00
|
|
|
/* CPU costs for heap tuples */
|
|
|
|
temp += cpu_page_weight * reltuples;
|
1996-07-09 08:22:35 +02:00
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
Assert(temp >= 0);
|
1998-09-01 05:29:17 +02:00
|
|
|
return temp;
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
1999-11-23 21:07:06 +01:00
|
|
|
/*
|
|
|
|
* cost_tidscan
|
|
|
|
* Determines and returns the cost of scanning a relation using tid-s.
|
|
|
|
*
|
|
|
|
* disk = number of tids
|
2000-01-09 01:26:47 +01:00
|
|
|
* cpu = CPU-PAGE-WEIGHT * number_of_tids
|
1999-11-23 21:07:06 +01:00
|
|
|
*/
|
|
|
|
Cost
|
2000-01-09 01:26:47 +01:00
|
|
|
cost_tidscan(RelOptInfo *baserel, List *tideval)
|
1999-11-23 21:07:06 +01:00
|
|
|
{
|
|
|
|
Cost temp = 0;
|
|
|
|
|
2000-01-23 00:50:30 +01:00
|
|
|
if (!enable_tidscan)
|
|
|
|
temp += disable_cost;
|
1999-11-23 21:07:06 +01:00
|
|
|
|
2000-01-23 00:50:30 +01:00
|
|
|
temp += (1.0 + cpu_page_weight) * length(tideval);
|
1999-11-23 21:07:06 +01:00
|
|
|
|
|
|
|
return temp;
|
|
|
|
}
|
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
/*
|
1999-02-14 00:22:53 +01:00
|
|
|
* cost_sort
|
2000-01-09 01:26:47 +01:00
|
|
|
* Determines and returns the cost of sorting a relation.
|
|
|
|
*
|
|
|
|
* If the total volume of data to sort is less than SortMem, we will do
|
|
|
|
* an in-memory sort, which requires no I/O and about t*log2(t) tuple
|
2000-01-23 00:50:30 +01:00
|
|
|
* comparisons for t tuples. We use cpu_index_page_weight as the cost
|
2000-01-09 01:26:47 +01:00
|
|
|
* of a tuple comparison (is this reasonable, or do we need another
|
|
|
|
* basic parameter?).
|
|
|
|
*
|
|
|
|
* If the total volume exceeds SortMem, we switch to a tape-style merge
|
|
|
|
* algorithm. There will still be about t*log2(t) tuple comparisons in
|
|
|
|
* total, but we will also need to write and read each tuple once per
|
|
|
|
* merge pass. We expect about ceil(log6(r)) merge passes where r is the
|
|
|
|
* number of initial runs formed (log6 because tuplesort.c uses six-tape
|
|
|
|
* merging). Since the average initial run should be about twice SortMem,
|
|
|
|
* we have
|
|
|
|
* disk = 2 * p * ceil(log6(p / (2*SortMem)))
|
|
|
|
* cpu = CPU-INDEX-PAGE-WEIGHT * t * log2(t)
|
1997-09-07 07:04:48 +02:00
|
|
|
*
|
1999-02-10 04:52:54 +01:00
|
|
|
* 'pathkeys' is a list of sort keys
|
1996-07-09 08:22:35 +02:00
|
|
|
* 'tuples' is the number of tuples in the relation
|
|
|
|
* 'width' is the average tuple width in bytes
|
1999-04-30 06:01:44 +02:00
|
|
|
*
|
2000-01-09 01:26:47 +01:00
|
|
|
* NOTE: some callers currently pass NIL for pathkeys because they
|
1999-05-01 21:47:42 +02:00
|
|
|
* can't conveniently supply the sort keys. Since this routine doesn't
|
1999-04-30 06:01:44 +02:00
|
|
|
* currently do anything with pathkeys anyway, that doesn't matter...
|
1999-05-01 21:47:42 +02:00
|
|
|
* but if it ever does, it should react gracefully to lack of key data.
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
|
|
|
Cost
|
2000-01-09 01:26:47 +01:00
|
|
|
cost_sort(List *pathkeys, double tuples, int width)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
1997-09-08 04:41:22 +02:00
|
|
|
Cost temp = 0;
|
2000-01-09 01:26:47 +01:00
|
|
|
double nbytes = relation_byte_size(tuples, width);
|
|
|
|
long sortmembytes = SortMem * 1024L;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2000-01-23 00:50:30 +01:00
|
|
|
if (!enable_sort)
|
|
|
|
temp += disable_cost;
|
1999-04-30 06:01:44 +02:00
|
|
|
|
1999-05-25 18:15:34 +02:00
|
|
|
/*
|
|
|
|
* We want to be sure the cost of a sort is never estimated as zero,
|
|
|
|
* even if passed-in tuple count is zero. Besides, mustn't do
|
|
|
|
* log(0)...
|
1999-04-30 06:01:44 +02:00
|
|
|
*/
|
2000-01-09 01:26:47 +01:00
|
|
|
if (tuples < 2.0)
|
|
|
|
tuples = 2.0;
|
1999-04-30 06:01:44 +02:00
|
|
|
|
2000-01-23 00:50:30 +01:00
|
|
|
temp += cpu_index_page_weight * tuples * base_log(tuples, 2.0);
|
1999-04-30 06:01:44 +02:00
|
|
|
|
2000-01-09 01:26:47 +01:00
|
|
|
if (nbytes > sortmembytes)
|
|
|
|
{
|
|
|
|
double npages = ceil(nbytes / BLCKSZ);
|
|
|
|
double nruns = nbytes / (sortmembytes * 2);
|
|
|
|
double log_runs = ceil(base_log(nruns, 6.0));
|
1996-07-09 08:22:35 +02:00
|
|
|
|
2000-01-09 01:26:47 +01:00
|
|
|
if (log_runs < 1.0)
|
|
|
|
log_runs = 1.0;
|
|
|
|
temp += 2 * npages * log_runs;
|
|
|
|
}
|
1999-04-30 06:01:44 +02:00
|
|
|
|
|
|
|
Assert(temp > 0);
|
1998-09-01 05:29:17 +02:00
|
|
|
return temp;
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
/*
|
1999-02-14 00:22:53 +01:00
|
|
|
* cost_result
|
1997-09-07 07:04:48 +02:00
|
|
|
* Determines and returns the cost of writing a relation of 'tuples'
|
|
|
|
* tuples of 'width' bytes out to a result relation.
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
1997-08-19 23:40:56 +02:00
|
|
|
#ifdef NOT_USED
|
1996-07-09 08:22:35 +02:00
|
|
|
Cost
|
2000-01-09 01:26:47 +01:00
|
|
|
cost_result(double tuples, int width)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
1997-09-08 04:41:22 +02:00
|
|
|
Cost temp = 0;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2000-01-09 01:26:47 +01:00
|
|
|
temp += page_size(tuples, width);
|
2000-01-23 00:50:30 +01:00
|
|
|
temp += cpu_page_weight * tuples;
|
1997-09-07 07:04:48 +02:00
|
|
|
Assert(temp >= 0);
|
1998-09-01 05:29:17 +02:00
|
|
|
return temp;
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
1997-09-07 07:04:48 +02:00
|
|
|
|
1997-08-19 23:40:56 +02:00
|
|
|
#endif
|
1996-07-09 08:22:35 +02:00
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
/*
|
1999-02-14 00:22:53 +01:00
|
|
|
* cost_nestloop
|
1997-09-07 07:04:48 +02:00
|
|
|
* Determines and returns the cost of joining two relations using the
|
|
|
|
* nested loop algorithm.
|
|
|
|
*
|
2000-01-09 01:26:47 +01:00
|
|
|
* 'outer_path' is the path for the outer relation
|
|
|
|
* 'inner_path' is the path for the inner relation
|
|
|
|
* 'is_indexjoin' is true if we are using an indexscan for the inner relation
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
|
|
|
Cost
|
2000-01-09 01:26:47 +01:00
|
|
|
cost_nestloop(Path *outer_path,
|
|
|
|
Path *inner_path,
|
1997-09-07 07:04:48 +02:00
|
|
|
bool is_indexjoin)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
1997-09-08 04:41:22 +02:00
|
|
|
Cost temp = 0;
|
1996-07-09 08:22:35 +02:00
|
|
|
|
2000-01-23 00:50:30 +01:00
|
|
|
if (!enable_nestloop)
|
|
|
|
temp += disable_cost;
|
1996-07-09 08:22:35 +02:00
|
|
|
|
2000-01-09 01:26:47 +01:00
|
|
|
temp += outer_path->path_cost;
|
|
|
|
temp += outer_path->parent->rows * inner_path->path_cost;
|
|
|
|
|
|
|
|
Assert(temp >= 0);
|
1998-09-01 05:29:17 +02:00
|
|
|
return temp;
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
/*
|
1999-02-14 00:22:53 +01:00
|
|
|
* cost_mergejoin
|
2000-01-09 01:26:47 +01:00
|
|
|
* Determines and returns the cost of joining two relations using the
|
|
|
|
* merge join algorithm.
|
1997-09-07 07:04:48 +02:00
|
|
|
*
|
2000-01-09 01:26:47 +01:00
|
|
|
* 'outer_path' is the path for the outer relation
|
|
|
|
* 'inner_path' is the path for the inner relation
|
|
|
|
* 'outersortkeys' and 'innersortkeys' are lists of the keys to be used
|
|
|
|
* to sort the outer and inner relations, or NIL if no explicit
|
|
|
|
* sort is needed because the source path is already ordered
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
|
|
|
Cost
|
2000-01-09 01:26:47 +01:00
|
|
|
cost_mergejoin(Path *outer_path,
|
|
|
|
Path *inner_path,
|
1997-09-08 23:56:23 +02:00
|
|
|
List *outersortkeys,
|
2000-01-09 01:26:47 +01:00
|
|
|
List *innersortkeys)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
1997-09-08 04:41:22 +02:00
|
|
|
Cost temp = 0;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2000-01-23 00:50:30 +01:00
|
|
|
if (!enable_mergejoin)
|
|
|
|
temp += disable_cost;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2000-01-09 01:26:47 +01:00
|
|
|
/* cost of source data */
|
|
|
|
temp += outer_path->path_cost + inner_path->path_cost;
|
|
|
|
|
1999-04-30 06:01:44 +02:00
|
|
|
if (outersortkeys) /* do we need to sort? */
|
2000-01-09 01:26:47 +01:00
|
|
|
temp += cost_sort(outersortkeys,
|
|
|
|
outer_path->parent->rows,
|
|
|
|
outer_path->parent->width);
|
|
|
|
|
1999-04-30 06:01:44 +02:00
|
|
|
if (innersortkeys) /* do we need to sort? */
|
2000-01-09 01:26:47 +01:00
|
|
|
temp += cost_sort(innersortkeys,
|
|
|
|
inner_path->parent->rows,
|
|
|
|
inner_path->parent->width);
|
1999-04-30 06:01:44 +02:00
|
|
|
|
2000-01-09 01:26:47 +01:00
|
|
|
/*
|
|
|
|
* Estimate the number of tuples to be processed in the mergejoin itself
|
|
|
|
* as one per tuple in the two source relations. This could be a drastic
|
|
|
|
* underestimate if there are many equal-keyed tuples in either relation,
|
|
|
|
* but we have no good way of estimating that...
|
|
|
|
*/
|
2000-01-23 00:50:30 +01:00
|
|
|
temp += cpu_page_weight * (outer_path->parent->rows +
|
|
|
|
inner_path->parent->rows);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2000-01-09 01:26:47 +01:00
|
|
|
Assert(temp >= 0);
|
1998-09-01 05:29:17 +02:00
|
|
|
return temp;
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
/*
|
1999-08-06 06:00:17 +02:00
|
|
|
* cost_hashjoin
|
2000-01-09 01:26:47 +01:00
|
|
|
* Determines and returns the cost of joining two relations using the
|
|
|
|
* hash join algorithm.
|
1999-08-06 06:00:17 +02:00
|
|
|
*
|
2000-01-09 01:26:47 +01:00
|
|
|
* 'outer_path' is the path for the outer relation
|
|
|
|
* 'inner_path' is the path for the inner relation
|
|
|
|
* 'innerdisbursion' is an estimate of the disbursion statistic
|
1999-08-06 06:00:17 +02:00
|
|
|
* for the inner hash key.
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
|
|
|
Cost
|
2000-01-09 01:26:47 +01:00
|
|
|
cost_hashjoin(Path *outer_path,
|
|
|
|
Path *inner_path,
|
|
|
|
Selectivity innerdisbursion)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
1997-09-08 04:41:22 +02:00
|
|
|
Cost temp = 0;
|
2000-01-09 01:26:47 +01:00
|
|
|
double outerbytes = relation_byte_size(outer_path->parent->rows,
|
|
|
|
outer_path->parent->width);
|
|
|
|
double innerbytes = relation_byte_size(inner_path->parent->rows,
|
|
|
|
inner_path->parent->width);
|
1999-08-06 06:00:17 +02:00
|
|
|
long hashtablebytes = SortMem * 1024L;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2000-01-23 00:50:30 +01:00
|
|
|
if (!enable_hashjoin)
|
|
|
|
temp += disable_cost;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
1999-04-05 04:07:07 +02:00
|
|
|
/* cost of source data */
|
2000-01-09 01:26:47 +01:00
|
|
|
temp += outer_path->path_cost + inner_path->path_cost;
|
1999-04-05 04:07:07 +02:00
|
|
|
|
|
|
|
/* cost of computing hash function: must do it once per tuple */
|
2000-01-23 00:50:30 +01:00
|
|
|
temp += cpu_page_weight * (outer_path->parent->rows +
|
|
|
|
inner_path->parent->rows);
|
1999-04-05 04:07:07 +02:00
|
|
|
|
1999-08-06 06:00:17 +02:00
|
|
|
/* the number of tuple comparisons needed is the number of outer
|
|
|
|
* tuples times the typical hash bucket size, which we estimate
|
|
|
|
* conservatively as the inner disbursion times the inner tuple
|
2000-01-23 00:50:30 +01:00
|
|
|
* count. The cost per comparison is set at cpu_index_page_weight;
|
1999-08-06 06:00:17 +02:00
|
|
|
* is that reasonable, or do we need another basic parameter?
|
|
|
|
*/
|
2000-01-23 00:50:30 +01:00
|
|
|
temp += cpu_index_page_weight * outer_path->parent->rows *
|
2000-01-09 01:26:47 +01:00
|
|
|
(inner_path->parent->rows * innerdisbursion);
|
1999-04-05 04:07:07 +02:00
|
|
|
|
1999-05-25 18:15:34 +02:00
|
|
|
/*
|
|
|
|
* if inner relation is too big then we will need to "batch" the join,
|
1999-04-05 04:07:07 +02:00
|
|
|
* which implies writing and reading most of the tuples to disk an
|
1999-08-06 06:00:17 +02:00
|
|
|
* extra time. Charge one cost unit per page of I/O.
|
|
|
|
*/
|
|
|
|
if (innerbytes > hashtablebytes)
|
2000-01-09 01:26:47 +01:00
|
|
|
temp += 2 * (page_size(outer_path->parent->rows,
|
|
|
|
outer_path->parent->width) +
|
|
|
|
page_size(inner_path->parent->rows,
|
|
|
|
inner_path->parent->width));
|
1999-08-06 06:00:17 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Bias against putting larger relation on inside. We don't want
|
|
|
|
* an absolute prohibition, though, since larger relation might have
|
|
|
|
* better disbursion --- and we can't trust the size estimates
|
|
|
|
* unreservedly, anyway.
|
1999-04-05 04:07:07 +02:00
|
|
|
*/
|
1999-08-06 06:00:17 +02:00
|
|
|
if (innerbytes > outerbytes)
|
|
|
|
temp *= 1.1; /* is this an OK fudge factor? */
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
Assert(temp >= 0);
|
1998-09-01 05:29:17 +02:00
|
|
|
return temp;
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
/*
|
2000-02-07 05:41:04 +01:00
|
|
|
* set_baserel_size_estimates
|
|
|
|
* Set the size estimates for the given base relation.
|
1997-09-07 07:04:48 +02:00
|
|
|
*
|
2000-02-07 05:41:04 +01:00
|
|
|
* The rel's targetlist and restrictinfo list must have been constructed
|
|
|
|
* already.
|
|
|
|
*
|
|
|
|
* We set the following fields of the rel node:
|
|
|
|
* rows: the estimated number of output tuples (after applying
|
|
|
|
* restriction clauses).
|
|
|
|
* width: the estimated average output tuple width in bytes.
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
2000-01-09 01:26:47 +01:00
|
|
|
void
|
2000-02-07 05:41:04 +01:00
|
|
|
set_baserel_size_estimates(Query *root, RelOptInfo *rel)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2000-01-09 01:26:47 +01:00
|
|
|
/* Should only be applied to base relations */
|
|
|
|
Assert(length(rel->relids) == 1);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2000-01-23 03:07:00 +01:00
|
|
|
rel->rows = rel->tuples *
|
|
|
|
restrictlist_selectivity(root,
|
2000-02-07 05:41:04 +01:00
|
|
|
rel->baserestrictinfo,
|
2000-01-23 03:07:00 +01:00
|
|
|
lfirsti(rel->relids));
|
2000-01-09 01:26:47 +01:00
|
|
|
Assert(rel->rows >= 0);
|
|
|
|
|
|
|
|
set_rel_width(root, rel);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
/*
|
2000-02-07 05:41:04 +01:00
|
|
|
* set_joinrel_size_estimates
|
|
|
|
* Set the size estimates for the given join relation.
|
|
|
|
*
|
|
|
|
* The rel's targetlist must have been constructed already, and a
|
|
|
|
* restriction clause list that matches the given component rels must
|
|
|
|
* be provided.
|
|
|
|
*
|
|
|
|
* Since there is more than one way to make a joinrel for more than two
|
|
|
|
* base relations, the results we get here could depend on which component
|
|
|
|
* rel pair is provided. In theory we should get the same answers no matter
|
|
|
|
* which pair is provided; in practice, since the selectivity estimation
|
|
|
|
* routines don't handle all cases equally well, we might not. But there's
|
|
|
|
* not much to be done about it. (Would it make sense to repeat the
|
|
|
|
* calculations for each pair of input rels that's encountered, and somehow
|
|
|
|
* average the results? Probably way more trouble than it's worth.)
|
|
|
|
*
|
|
|
|
* We set the same relnode fields as set_baserel_size_estimates() does.
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
2000-01-09 01:26:47 +01:00
|
|
|
void
|
2000-02-07 05:41:04 +01:00
|
|
|
set_joinrel_size_estimates(Query *root, RelOptInfo *rel,
|
|
|
|
RelOptInfo *outer_rel,
|
|
|
|
RelOptInfo *inner_rel,
|
|
|
|
List *restrictlist)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2000-01-09 01:26:47 +01:00
|
|
|
double temp;
|
|
|
|
|
|
|
|
/* cartesian product */
|
2000-02-07 05:41:04 +01:00
|
|
|
temp = outer_rel->rows * inner_rel->rows;
|
2000-01-09 01:26:47 +01:00
|
|
|
|
2000-02-07 05:41:04 +01:00
|
|
|
/*
|
|
|
|
* Apply join restrictivity. Note that we are only considering clauses
|
|
|
|
* that become restriction clauses at this join level; we are not
|
|
|
|
* double-counting them because they were not considered in estimating
|
|
|
|
* the sizes of the component rels.
|
|
|
|
*/
|
2000-01-23 03:07:00 +01:00
|
|
|
temp *= restrictlist_selectivity(root,
|
2000-02-07 05:41:04 +01:00
|
|
|
restrictlist,
|
2000-01-23 03:07:00 +01:00
|
|
|
0);
|
2000-01-09 01:26:47 +01:00
|
|
|
|
|
|
|
Assert(temp >= 0);
|
|
|
|
rel->rows = temp;
|
|
|
|
|
2000-02-07 05:41:04 +01:00
|
|
|
/*
|
|
|
|
* We could apply set_rel_width() to compute the output tuple width
|
|
|
|
* from scratch, but at present it's always just the sum of the input
|
|
|
|
* widths, so why work harder than necessary? If relnode.c is ever
|
|
|
|
* taught to remove unneeded columns from join targetlists, go back
|
|
|
|
* to using set_rel_width here.
|
|
|
|
*/
|
|
|
|
rel->width = outer_rel->width + inner_rel->width;
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
/*
|
2000-01-09 01:26:47 +01:00
|
|
|
* set_rel_width
|
|
|
|
* Set the estimated output width of the relation.
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
2000-01-09 01:26:47 +01:00
|
|
|
static void
|
|
|
|
set_rel_width(Query *root, RelOptInfo *rel)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
1997-09-08 04:41:22 +02:00
|
|
|
int tuple_width = 0;
|
2000-01-09 01:26:47 +01:00
|
|
|
List *tle;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2000-01-09 01:26:47 +01:00
|
|
|
foreach(tle, rel->targetlist)
|
|
|
|
tuple_width += compute_attribute_width((TargetEntry *) lfirst(tle));
|
|
|
|
Assert(tuple_width >= 0);
|
|
|
|
rel->width = tuple_width;
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
/*
|
1999-02-14 00:22:53 +01:00
|
|
|
* compute_attribute_width
|
1997-09-07 07:04:48 +02:00
|
|
|
* Given a target list entry, find the size in bytes of the attribute.
|
|
|
|
*
|
2000-01-09 01:26:47 +01:00
|
|
|
* If a field is variable-length, we make a default assumption. Would be
|
|
|
|
* better if VACUUM recorded some stats about the average field width...
|
2000-02-07 05:41:04 +01:00
|
|
|
* also, we have access to the atttypmod, but fail to use it...
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
|
|
|
static int
|
1997-09-08 23:56:23 +02:00
|
|
|
compute_attribute_width(TargetEntry *tlistentry)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
1997-09-08 04:41:22 +02:00
|
|
|
int width = get_typlen(tlistentry->resdom->restype);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
if (width < 0)
|
1998-09-01 05:29:17 +02:00
|
|
|
return _DEFAULT_ATTRIBUTE_WIDTH_;
|
1997-09-07 07:04:48 +02:00
|
|
|
else
|
1998-09-01 05:29:17 +02:00
|
|
|
return width;
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
1999-04-05 04:07:07 +02:00
|
|
|
/*
|
|
|
|
* relation_byte_size
|
1999-05-25 18:15:34 +02:00
|
|
|
* Estimate the storage space in bytes for a given number of tuples
|
|
|
|
* of a given width (size in bytes).
|
1999-04-05 04:07:07 +02:00
|
|
|
*/
|
|
|
|
static double
|
2000-01-09 01:26:47 +01:00
|
|
|
relation_byte_size(double tuples, int width)
|
1999-04-05 04:07:07 +02:00
|
|
|
{
|
2000-01-09 01:26:47 +01:00
|
|
|
return tuples * ((double) (width + sizeof(HeapTupleData)));
|
1999-04-05 04:07:07 +02:00
|
|
|
}
|
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
/*
|
1999-02-14 00:22:53 +01:00
|
|
|
* page_size
|
1997-09-07 07:04:48 +02:00
|
|
|
* Returns an estimate of the number of pages covered by a given
|
|
|
|
* number of tuples of a given width (size in bytes).
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
2000-01-09 01:26:47 +01:00
|
|
|
static double
|
|
|
|
page_size(double tuples, int width)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2000-01-09 01:26:47 +01:00
|
|
|
return ceil(relation_byte_size(tuples, width) / BLCKSZ);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static double
|
|
|
|
base_log(double x, double b)
|
|
|
|
{
|
1998-09-01 05:29:17 +02:00
|
|
|
return log(x) / log(b);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|