Flush Memoize cache when non-key parameters change, take 2

It's possible that a subplan below a Memoize node contains a parameter
from above the Memoize node.  If this parameter changes then cache entries
may become out-dated due to the new parameter value.

Previously Memoize was mistakenly not aware of this.  We fix this here by
flushing the cache whenever a parameter that's not part of the cache
key changes.

Bug: #17213
Reported by: Elvis Pranskevichus
Author: David Rowley
Discussion: https://postgr.es/m/17213-988ed34b225a2862@postgresql.org
Backpatch-through: 14, where Memoize was added
This commit is contained in:
David Rowley 2021-11-24 23:29:14 +13:00
parent fb5961fd13
commit 411137a429
12 changed files with 145 additions and 3 deletions

View File

@ -367,6 +367,37 @@ remove_cache_entry(MemoizeState *mstate, MemoizeEntry *entry)
pfree(key);
}
/*
* cache_purge_all
* Remove all items from the cache
*/
static void
cache_purge_all(MemoizeState *mstate)
{
uint64 evictions = mstate->hashtable->members;
PlanState *pstate = (PlanState *) mstate;
/*
* Likely the most efficient way to remove all items is to just reset the
* memory context for the cache and then rebuild a fresh hash table. This
* saves having to remove each item one by one and pfree each cached tuple
*/
MemoryContextReset(mstate->tableContext);
/* Make the hash table the same size as the original size */
build_hash_table(mstate, ((Memoize *) pstate->plan)->est_entries);
/* reset the LRU list */
dlist_init(&mstate->lru_list);
mstate->last_tuple = NULL;
mstate->entry = NULL;
mstate->mem_used = 0;
/* XXX should we add something new to track these purges? */
mstate->stats.cache_evictions += evictions; /* Update Stats */
}
/*
* cache_reduce_memory
* Evict older and less recently used items from the cache in order to
@ -979,6 +1010,7 @@ ExecInitMemoize(Memoize *node, EState *estate, int eflags)
* getting the first tuple. This allows us to mark it as so.
*/
mstate->singlerow = node->singlerow;
mstate->keyparamids = node->keyparamids;
/*
* Record if the cache keys should be compared bit by bit, or logically
@ -1082,6 +1114,12 @@ ExecReScanMemoize(MemoizeState *node)
if (outerPlan->chgParam == NULL)
ExecReScan(outerPlan);
/*
* Purge the entire cache if a parameter changed that is not part of the
* cache key.
*/
if (bms_nonempty_difference(outerPlan->chgParam, node->keyparamids))
cache_purge_all(node);
}
/*

View File

@ -540,6 +540,8 @@ bms_overlap_list(const Bitmapset *a, const List *b)
/*
* bms_nonempty_difference - do sets have a nonempty difference?
*
* i.e., are any members set in 'a' that are not also set in 'b'.
*/
bool
bms_nonempty_difference(const Bitmapset *a, const Bitmapset *b)

View File

@ -973,6 +973,7 @@ _copyMemoize(const Memoize *from)
COPY_SCALAR_FIELD(singlerow);
COPY_SCALAR_FIELD(binary_mode);
COPY_SCALAR_FIELD(est_entries);
COPY_BITMAPSET_FIELD(keyparamids);
return newnode;
}

View File

@ -868,6 +868,7 @@ _outMemoize(StringInfo str, const Memoize *node)
WRITE_BOOL_FIELD(singlerow);
WRITE_BOOL_FIELD(binary_mode);
WRITE_UINT_FIELD(est_entries);
WRITE_BITMAPSET_FIELD(keyparamids);
}
static void

View File

@ -2232,6 +2232,7 @@ _readMemoize(void)
READ_BOOL_FIELD(singlerow);
READ_BOOL_FIELD(binary_mode);
READ_UINT_FIELD(est_entries);
READ_BITMAPSET_FIELD(keyparamids);
READ_DONE();
}

View File

@ -280,7 +280,7 @@ static Material *make_material(Plan *lefttree);
static Memoize *make_memoize(Plan *lefttree, Oid *hashoperators,
Oid *collations, List *param_exprs,
bool singlerow, bool binary_mode,
uint32 est_entries);
uint32 est_entries, Bitmapset *keyparamids);
static WindowAgg *make_windowagg(List *tlist, Index winref,
int partNumCols, AttrNumber *partColIdx, Oid *partOperators, Oid *partCollations,
int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators, Oid *ordCollations,
@ -1586,6 +1586,7 @@ static Memoize *
create_memoize_plan(PlannerInfo *root, MemoizePath *best_path, int flags)
{
Memoize *plan;
Bitmapset *keyparamids;
Plan *subplan;
Oid *operators;
Oid *collations;
@ -1617,9 +1618,11 @@ create_memoize_plan(PlannerInfo *root, MemoizePath *best_path, int flags)
i++;
}
keyparamids = pull_paramids((Expr *) param_exprs);
plan = make_memoize(subplan, operators, collations, param_exprs,
best_path->singlerow, best_path->binary_mode,
best_path->est_entries);
best_path->est_entries, keyparamids);
copy_generic_path_info(&plan->plan, (Path *) best_path);
@ -6420,7 +6423,7 @@ materialize_finished_plan(Plan *subplan)
static Memoize *
make_memoize(Plan *lefttree, Oid *hashoperators, Oid *collations,
List *param_exprs, bool singlerow, bool binary_mode,
uint32 est_entries)
uint32 est_entries, Bitmapset *keyparamids)
{
Memoize *node = makeNode(Memoize);
Plan *plan = &node->plan;
@ -6437,6 +6440,7 @@ make_memoize(Plan *lefttree, Oid *hashoperators, Oid *collations,
node->singlerow = singlerow;
node->binary_mode = binary_mode;
node->est_entries = est_entries;
node->keyparamids = keyparamids;
return node;
}

View File

@ -152,6 +152,7 @@ static Query *substitute_actual_srf_parameters(Query *expr,
int nargs, List *args);
static Node *substitute_actual_srf_parameters_mutator(Node *node,
substitute_actual_srf_parameters_context *context);
static bool pull_paramids_walker(Node *node, Bitmapset **context);
/*****************************************************************************
@ -5214,3 +5215,33 @@ substitute_actual_srf_parameters_mutator(Node *node,
substitute_actual_srf_parameters_mutator,
(void *) context);
}
/*
* pull_paramids
* Returns a Bitmapset containing the paramids of all Params in 'expr'.
*/
Bitmapset *
pull_paramids(Expr *expr)
{
Bitmapset *result = NULL;
(void) pull_paramids_walker((Node *) expr, &result);
return result;
}
static bool
pull_paramids_walker(Node *node, Bitmapset **context)
{
if (node == NULL)
return false;
if (IsA(node, Param))
{
Param *param = (Param *)node;
*context = bms_add_member(*context, param->paramid);
return false;
}
return expression_tree_walker(node, pull_paramids_walker,
(void *) context);
}

View File

@ -2113,6 +2113,8 @@ typedef struct MemoizeState
* by bit, false when using hash equality ops */
MemoizeInstrumentation stats; /* execution statistics */
SharedMemoizeInfo *shared_info; /* statistics for parallel workers */
Bitmapset *keyparamids; /* Param->paramids of expressions belonging to
* param_exprs */
} MemoizeState;
/* ----------------

View File

@ -804,6 +804,7 @@ typedef struct Memoize
uint32 est_entries; /* The maximum number of entries that the
* planner expects will fit in the cache, or 0
* if unknown */
Bitmapset *keyparamids; /* paramids from param_exprs */
} Memoize;
/* ----------------

View File

@ -53,4 +53,6 @@ extern void CommuteOpExpr(OpExpr *clause);
extern Query *inline_set_returning_function(PlannerInfo *root,
RangeTblEntry *rte);
extern Bitmapset *pull_paramids(Expr *expr);
#endif /* CLAUSES_H */

View File

@ -196,6 +196,45 @@ SELECT * FROM strtest s1 INNER JOIN strtest s2 ON s1.t >= s2.t;', false);
(8 rows)
DROP TABLE strtest;
-- Exercise Memoize code that flushes the cache when a parameter changes which
-- is not part of the cache key.
-- Ensure we get a Memoize plan
EXPLAIN (COSTS OFF)
SELECT unique1 FROM tenk1 t0
WHERE unique1 < 3
AND EXISTS (
SELECT 1 FROM tenk1 t1
INNER JOIN tenk1 t2 ON t1.unique1 = t2.hundred
WHERE t0.ten = t1.twenty AND t0.two <> t2.four OFFSET 0);
QUERY PLAN
----------------------------------------------------------------
Index Scan using tenk1_unique1 on tenk1 t0
Index Cond: (unique1 < 3)
Filter: (SubPlan 1)
SubPlan 1
-> Nested Loop
-> Index Scan using tenk1_hundred on tenk1 t2
Filter: (t0.two <> four)
-> Memoize
Cache Key: t2.hundred
Cache Mode: logical
-> Index Scan using tenk1_unique1 on tenk1 t1
Index Cond: (unique1 = t2.hundred)
Filter: (t0.ten = twenty)
(13 rows)
-- Ensure the above query returns the correct result
SELECT unique1 FROM tenk1 t0
WHERE unique1 < 3
AND EXISTS (
SELECT 1 FROM tenk1 t1
INNER JOIN tenk1 t2 ON t1.unique1 = t2.hundred
WHERE t0.ten = t1.twenty AND t0.two <> t2.four OFFSET 0);
unique1
---------
2
(1 row)
RESET enable_seqscan;
RESET enable_mergejoin;
RESET work_mem;

View File

@ -103,6 +103,26 @@ SELECT * FROM strtest s1 INNER JOIN strtest s2 ON s1.t >= s2.t;', false);
DROP TABLE strtest;
-- Exercise Memoize code that flushes the cache when a parameter changes which
-- is not part of the cache key.
-- Ensure we get a Memoize plan
EXPLAIN (COSTS OFF)
SELECT unique1 FROM tenk1 t0
WHERE unique1 < 3
AND EXISTS (
SELECT 1 FROM tenk1 t1
INNER JOIN tenk1 t2 ON t1.unique1 = t2.hundred
WHERE t0.ten = t1.twenty AND t0.two <> t2.four OFFSET 0);
-- Ensure the above query returns the correct result
SELECT unique1 FROM tenk1 t0
WHERE unique1 < 3
AND EXISTS (
SELECT 1 FROM tenk1 t1
INNER JOIN tenk1 t2 ON t1.unique1 = t2.hundred
WHERE t0.ten = t1.twenty AND t0.two <> t2.four OFFSET 0);
RESET enable_seqscan;
RESET enable_mergejoin;
RESET work_mem;