While making the seq_page_cost changes, I was struck by the fact that
cost_nonsequential_access() is really totally inappropriate for its only remaining use, namely estimating I/O costs in cost_sort(). The routine was designed on the assumption that disk caching might eliminate the need for some re-reads on a random basis, but there's nothing very random in that sense about sort's access pattern --- it'll always be picking up the oldest outputs. If we had a good fix on the effective cache size we might consider charging zero for I/O unless the sort temp file size exceeds it, but that's probably putting much too much faith in the parameter. Instead just drop the logic in favor of a fixed compromise between seq_page_cost and random_page_cost per page of sort I/O.
This commit is contained in:
parent
b7af62e4a9
commit
7868590c61
|
@ -54,7 +54,7 @@
|
||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.156 2006/06/05 02:49:58 tgl Exp $
|
* $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.157 2006/06/05 20:56:33 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
|
@ -175,55 +175,6 @@ cost_seqscan(Path *path, PlannerInfo *root,
|
||||||
path->total_cost = startup_cost + run_cost;
|
path->total_cost = startup_cost + run_cost;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* cost_nonsequential_access
|
|
||||||
* Estimate the cost of accessing one page at random from a relation
|
|
||||||
* (or sort temp file) of the given size in pages.
|
|
||||||
*
|
|
||||||
* The simplistic model that the cost is random_page_cost is what we want
|
|
||||||
* to use for large relations; but for small ones that is a serious
|
|
||||||
* overestimate because of the effects of caching. This routine tries to
|
|
||||||
* account for that.
|
|
||||||
*
|
|
||||||
* Unfortunately we don't have any good way of estimating the effective cache
|
|
||||||
* size we are working with --- we know that Postgres itself has NBuffers
|
|
||||||
* internal buffers, but the size of the kernel's disk cache is uncertain,
|
|
||||||
* and how much of it we get to use is even less certain. We punt the problem
|
|
||||||
* for now by assuming we are given an effective_cache_size parameter.
|
|
||||||
*
|
|
||||||
* Given a guesstimated cache size, we estimate the actual I/O cost per page
|
|
||||||
* with the entirely ad-hoc equations (writing relsize for
|
|
||||||
* relpages/effective_cache_size):
|
|
||||||
* if relsize >= 1:
|
|
||||||
* random_page_cost - (random_page_cost-seq_page_cost)/2 * (1/relsize)
|
|
||||||
* if relsize < 1:
|
|
||||||
* seq_page_cost + ((random_page_cost-seq_page_cost)/2) * relsize ** 2
|
|
||||||
* These give the right asymptotic behavior (=> seq_page_cost as relpages
|
|
||||||
* becomes small, => random_page_cost as it becomes large) and meet in the
|
|
||||||
* middle with the estimate that the cache is about 50% effective for a
|
|
||||||
* relation of the same size as effective_cache_size. (XXX this is probably
|
|
||||||
* all wrong, but I haven't been able to find any theory about how effective
|
|
||||||
* a disk cache should be presumed to be.)
|
|
||||||
*/
|
|
||||||
static Cost
|
|
||||||
cost_nonsequential_access(double relpages)
|
|
||||||
{
|
|
||||||
double relsize;
|
|
||||||
double random_delta;
|
|
||||||
|
|
||||||
/* don't crash on bad input data */
|
|
||||||
if (relpages <= 0.0 || effective_cache_size <= 0.0)
|
|
||||||
return random_page_cost;
|
|
||||||
|
|
||||||
relsize = relpages / effective_cache_size;
|
|
||||||
|
|
||||||
random_delta = (random_page_cost - seq_page_cost) * 0.5;
|
|
||||||
if (relsize >= 1.0)
|
|
||||||
return random_page_cost - random_delta / relsize;
|
|
||||||
else
|
|
||||||
return seq_page_cost + random_delta * relsize * relsize;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* cost_index
|
* cost_index
|
||||||
* Determines and returns the cost of scanning a relation using an index.
|
* Determines and returns the cost of scanning a relation using an index.
|
||||||
|
@ -371,10 +322,7 @@ cost_index(IndexPath *path, PlannerInfo *root,
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* min_IO_cost corresponds to the perfectly correlated case (csquared=1),
|
* min_IO_cost corresponds to the perfectly correlated case (csquared=1),
|
||||||
* max_IO_cost to the perfectly uncorrelated case (csquared=0). Note that
|
* max_IO_cost to the perfectly uncorrelated case (csquared=0).
|
||||||
* we just charge random_page_cost per page in the uncorrelated case,
|
|
||||||
* rather than using cost_nonsequential_access, since we've already
|
|
||||||
* accounted for caching effects by using the Mackert model.
|
|
||||||
*/
|
*/
|
||||||
min_IO_cost = ceil(indexSelectivity * T) * seq_page_cost;
|
min_IO_cost = ceil(indexSelectivity * T) * seq_page_cost;
|
||||||
max_IO_cost = pages_fetched * random_page_cost;
|
max_IO_cost = pages_fetched * random_page_cost;
|
||||||
|
@ -778,7 +726,7 @@ cost_functionscan(Path *path, PlannerInfo *root, RelOptInfo *baserel)
|
||||||
* disk traffic = 2 * relsize * ceil(logM(p / (2*work_mem)))
|
* disk traffic = 2 * relsize * ceil(logM(p / (2*work_mem)))
|
||||||
* cpu = comparison_cost * t * log2(t)
|
* cpu = comparison_cost * t * log2(t)
|
||||||
*
|
*
|
||||||
* The disk traffic is assumed to be half sequential and half random
|
* The disk traffic is assumed to be 3/4ths sequential and 1/4th random
|
||||||
* accesses (XXX can't we refine that guess?)
|
* accesses (XXX can't we refine that guess?)
|
||||||
*
|
*
|
||||||
* We charge two operator evals per tuple comparison, which should be in
|
* We charge two operator evals per tuple comparison, which should be in
|
||||||
|
@ -838,9 +786,9 @@ cost_sort(Path *path, PlannerInfo *root,
|
||||||
else
|
else
|
||||||
log_runs = 1.0;
|
log_runs = 1.0;
|
||||||
npageaccesses = 2.0 * npages * log_runs;
|
npageaccesses = 2.0 * npages * log_runs;
|
||||||
/* Assume half are sequential, half are not */
|
/* Assume 3/4ths of accesses are sequential, 1/4th are not */
|
||||||
startup_cost += npageaccesses *
|
startup_cost += npageaccesses *
|
||||||
(seq_page_cost + cost_nonsequential_access(npages)) * 0.5;
|
(seq_page_cost * 0.75 + random_page_cost * 0.25);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
Loading…
Reference in New Issue