From 2cca95e175463a7af95164498b889b1ea118583d Mon Sep 17 00:00:00 2001 From: David Rowley Date: Fri, 26 Jan 2024 16:18:58 +1300 Subject: [PATCH] Improve NestLoopParam generation for lateral subqueries It was possible in cases where we had a LATERAL joined subquery that when the same Var is mentioned in both the lateral references and in the outer Vars of the scan clauses that the given Var wouldn't be assigned to the same NestLoopParam. This could cause issues in Memoize as the cache key would reference the Var for the scan clauses but when the parameter for the lateral references changed some code in Memoize would see that some other parameter had changed that's not part of the cache key and end up purging the entire cache as a result, thinking the cache had become stale. This could result in a Nested Loop -> Memoize plan being quite inefficient as, in the worst case, the cache purging could result in never getting a cache hit. In no cases could this problem lead to incorrect query results. Here we switch the order of operations so that we create NestLoopParam for the lateral references first before doing replace_nestloop_params(). replace_nestloop_params() will find and reuse the existing NestLoopParam in cases where the Var exists in both locations. Author: Richard Guo Reviewed-by: Tom Lane, David Rowley Discussion: https://postgr.es/m/CAMbWs48XHJEK1Q1CzAQ7L9sTANTs9W1cepXu8%3DKc0quUL%2Btg4Q%40mail.gmail.com --- src/backend/optimizer/plan/createplan.c | 15 ++++++++-- src/test/regress/expected/memoize.out | 37 +++++++++++++++++++++++++ src/test/regress/sql/memoize.sql | 17 ++++++++++++ 3 files changed, 66 insertions(+), 3 deletions(-) diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index ca619eab94..610f4a56d6 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -3720,13 +3720,22 @@ create_subqueryscan_plan(PlannerInfo *root, SubqueryScanPath *best_path, /* Reduce RestrictInfo list to bare expressions; ignore pseudoconstants */ scan_clauses = extract_actual_clauses(scan_clauses, false); - /* Replace any outer-relation variables with nestloop params */ + /* + * Replace any outer-relation variables with nestloop params. + * + * We must provide nestloop params for both lateral references of the + * subquery and outer vars in the scan_clauses. It's better to assign the + * former first, because that code path requires specific param IDs, while + * replace_nestloop_params can adapt to the IDs assigned by + * process_subquery_nestloop_params. This avoids possibly duplicating + * nestloop params when the same Var is needed for both reasons. + */ if (best_path->path.param_info) { - scan_clauses = (List *) - replace_nestloop_params(root, (Node *) scan_clauses); process_subquery_nestloop_params(root, rel->subplan_params); + scan_clauses = (List *) + replace_nestloop_params(root, (Node *) scan_clauses); } scan_plan = make_subqueryscan(tlist, diff --git a/src/test/regress/expected/memoize.out b/src/test/regress/expected/memoize.out index f5202430f8..ca198ec3b8 100644 --- a/src/test/regress/expected/memoize.out +++ b/src/test/regress/expected/memoize.out @@ -92,6 +92,43 @@ WHERE t1.unique1 < 1000; 1000 | 9.5000000000000000 (1 row) +-- Try with LATERAL joins +SELECT explain_memoize(' +SELECT COUNT(*),AVG(t2.t1two) FROM tenk1 t1 LEFT JOIN +LATERAL ( + SELECT t1.two as t1two, * FROM tenk1 t2 WHERE t2.unique1 < 5 OFFSET 0 +) t2 +ON t1.two = t2.two +WHERE t1.unique1 < 10;', false); + explain_memoize +---------------------------------------------------------------------------------------------- + Aggregate (actual rows=1 loops=N) + -> Nested Loop Left Join (actual rows=25 loops=N) + -> Index Scan using tenk1_unique1 on tenk1 t1 (actual rows=10 loops=N) + Index Cond: (unique1 < 10) + -> Memoize (actual rows=2 loops=N) + Cache Key: t1.two, t1.two + Cache Mode: binary + Hits: 8 Misses: 2 Evictions: Zero Overflows: 0 Memory Usage: NkB + -> Subquery Scan on t2 (actual rows=2 loops=N) + Filter: (t1.two = t2.two) + Rows Removed by Filter: 2 + -> Index Scan using tenk1_unique1 on tenk1 t2_1 (actual rows=5 loops=N) + Index Cond: (unique1 < 5) +(13 rows) + +-- And check we get the expected results. +SELECT COUNT(*),AVG(t2.t1two) FROM tenk1 t1 LEFT JOIN +LATERAL ( + SELECT t1.two as t1two, * FROM tenk1 t2 WHERE t2.unique1 < 5 OFFSET 0 +) t2 +ON t1.two = t2.two +WHERE t1.unique1 < 10; + count | avg +-------+------------------------ + 25 | 0.40000000000000000000 +(1 row) + -- Reduce work_mem and hash_mem_multiplier so that we see some cache evictions SET work_mem TO '64kB'; SET hash_mem_multiplier TO 1.0; diff --git a/src/test/regress/sql/memoize.sql b/src/test/regress/sql/memoize.sql index 29ab1ea62d..3793c28593 100644 --- a/src/test/regress/sql/memoize.sql +++ b/src/test/regress/sql/memoize.sql @@ -57,6 +57,23 @@ LATERAL (SELECT t2.unique1 FROM tenk1 t2 WHERE t1.twenty = t2.unique1 OFFSET 0) t2 WHERE t1.unique1 < 1000; +-- Try with LATERAL joins +SELECT explain_memoize(' +SELECT COUNT(*),AVG(t2.t1two) FROM tenk1 t1 LEFT JOIN +LATERAL ( + SELECT t1.two as t1two, * FROM tenk1 t2 WHERE t2.unique1 < 5 OFFSET 0 +) t2 +ON t1.two = t2.two +WHERE t1.unique1 < 10;', false); + +-- And check we get the expected results. +SELECT COUNT(*),AVG(t2.t1two) FROM tenk1 t1 LEFT JOIN +LATERAL ( + SELECT t1.two as t1two, * FROM tenk1 t2 WHERE t2.unique1 < 5 OFFSET 0 +) t2 +ON t1.two = t2.two +WHERE t1.unique1 < 10; + -- Reduce work_mem and hash_mem_multiplier so that we see some cache evictions SET work_mem TO '64kB'; SET hash_mem_multiplier TO 1.0;