Have the planner account for the Memoize cache key memory

The Memoize executor node stores the cache key values along with the
tuple(s) which were found in the outer node which match each key value,
however, when the planner tried to estimate how many entries could be
stored in the cache, it didn't take into account that the cache key must
also be stored.  In many cases, this won't make a large difference as the
key is likely small in comparison to the tuple(s) being stored, however,
it's not impossible to craft cases where the key could take more memory
than the tuple(s) stored for it.

Here we adjust the planner so it takes into account the estimated amount
of memory to store the cache key.  Effectively, this change will reduce
the estimated cache hit ratio when it's thought that not all items will
fit in the cache, thus Memoize will become more expensive in such cases.

The executor already takes into account the memory consumed by the cache
key, so here we only need to adjust the planner.

Discussion: https://postgr.es/m/CAApHDvqGErGuyBfQvBQrTCHDbzLTqoiW=_G9sOzeFxWEc_7auA@mail.gmail.com
This commit is contained in:
David Rowley 2023-03-20 16:26:04 +13:00
parent 579ee5df14
commit 785f709576
1 changed files with 59 additions and 47 deletions

View File

@ -189,6 +189,7 @@ static Selectivity get_foreign_key_join_selectivity(PlannerInfo *root,
static Cost append_nonpartial_cost(List *subpaths, int numpaths,
int parallel_workers);
static void set_rel_width(PlannerInfo *root, RelOptInfo *rel);
static int32 get_expr_width(PlannerInfo *root, const Node *expr);
static double relation_byte_size(double tuples, int width);
static double page_size(double tuples, int width);
static double get_parallel_divisor(Path *path);
@ -2481,6 +2482,7 @@ cost_memoize_rescan(PlannerInfo *root, MemoizePath *mpath,
Cost *rescan_startup_cost, Cost *rescan_total_cost)
{
EstimationInfo estinfo;
ListCell *lc;
Cost input_startup_cost = mpath->subpath->startup_cost;
Cost input_total_cost = mpath->subpath->total_cost;
double tuples = mpath->subpath->rows;
@ -2504,12 +2506,14 @@ cost_memoize_rescan(PlannerInfo *root, MemoizePath *mpath,
* To provide us with better estimations on how many cache entries we can
* store at once, we make a call to the executor here to ask it what
* memory overheads there are for a single cache entry.
*
* XXX we also store the cache key, but that's not accounted for here.
*/
est_entry_bytes = relation_byte_size(tuples, width) +
ExecEstimateCacheEntryOverheadBytes(tuples);
/* include the estimated width for the cache keys */
foreach(lc, mpath->param_exprs)
est_entry_bytes += get_expr_width(root, (Node *) lfirst(lc));
/* estimate on the upper limit of cache entries we can hold at once */
est_cache_entries = floor(hash_mem_bytes / est_entry_bytes);
@ -6021,54 +6025,13 @@ set_pathtarget_cost_width(PlannerInfo *root, PathTarget *target)
{
Node *node = (Node *) lfirst(lc);
if (IsA(node, Var))
tuple_width += get_expr_width(root, node);
/* For non-Vars, account for evaluation cost */
if (!IsA(node, Var))
{
Var *var = (Var *) node;
int32 item_width;
/* We should not see any upper-level Vars here */
Assert(var->varlevelsup == 0);
/* Try to get data from RelOptInfo cache */
if (!IS_SPECIAL_VARNO(var->varno) &&
var->varno < root->simple_rel_array_size)
{
RelOptInfo *rel = root->simple_rel_array[var->varno];
if (rel != NULL &&
var->varattno >= rel->min_attr &&
var->varattno <= rel->max_attr)
{
int ndx = var->varattno - rel->min_attr;
if (rel->attr_widths[ndx] > 0)
{
tuple_width += rel->attr_widths[ndx];
continue;
}
}
}
/*
* No cached data available, so estimate using just the type info.
*/
item_width = get_typavgwidth(var->vartype, var->vartypmod);
Assert(item_width > 0);
tuple_width += item_width;
}
else
{
/*
* Handle general expressions using type info.
*/
int32 item_width;
QualCost cost;
item_width = get_typavgwidth(exprType(node), exprTypmod(node));
Assert(item_width > 0);
tuple_width += item_width;
/* Account for cost, too */
cost_qual_eval_node(&cost, node, root);
target->cost.startup += cost.startup;
target->cost.per_tuple += cost.per_tuple;
@ -6081,6 +6044,55 @@ set_pathtarget_cost_width(PlannerInfo *root, PathTarget *target)
return target;
}
/*
* get_expr_width
* Estimate the width of the given expr attempting to use the width
* cached in a Var's owning RelOptInfo, else fallback on the type's
* average width when unable to or when the given Node is not a Var.
*/
static int32
get_expr_width(PlannerInfo *root, const Node *expr)
{
int32 width;
if (IsA(expr, Var))
{
const Var *var = (const Var *) expr;
/* We should not see any upper-level Vars here */
Assert(var->varlevelsup == 0);
/* Try to get data from RelOptInfo cache */
if (!IS_SPECIAL_VARNO(var->varno) &&
var->varno < root->simple_rel_array_size)
{
RelOptInfo *rel = root->simple_rel_array[var->varno];
if (rel != NULL &&
var->varattno >= rel->min_attr &&
var->varattno <= rel->max_attr)
{
int ndx = var->varattno - rel->min_attr;
if (rel->attr_widths[ndx] > 0)
return rel->attr_widths[ndx];
}
}
/*
* No cached data available, so estimate using just the type info.
*/
width = get_typavgwidth(var->vartype, var->vartypmod);
Assert(width > 0);
return width;
}
width = get_typavgwidth(exprType(expr), exprTypmod(expr));
Assert(width > 0);
return width;
}
/*
* relation_byte_size
* Estimate the storage space in bytes for a given number of tuples