From 2f97105e93c286a1377ddc0e2c9c7a5af4a2eee3 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 20 Jun 2023 17:47:36 -0400 Subject: [PATCH] Fix hash join when inner hashkey expressions contain Params. If the inner-side expressions contain PARAM_EXEC Params, we must re-hash whenever the values of those Params change. The executor mechanism for that exists already, but we failed to invoke it because finalize_plan() neglected to search the Hash.hashkeys field for Params. This allowed a previous scan's hash table to be re-used when it should not be, leading to rows missing from the join's output. (I believe incorrectly-included join rows are impossible however, since checking the real hashclauses would reject false matches.) This bug is very ancient, dating probably to d24d75ff1 of 7.4. Sadly, this simple fix depends on the plan representational changes made by 2abd7ae9b, so it will only work back to v12. I thought about trying to make some kind of hack for v11, but I'm leery of putting code significantly different from what is used in the newer branches into a nearly-EOL branch. Seeing that the bug escaped detection for a full twenty years, problematic cases must be rare; so I don't feel too awful about leaving v11 as-is. Per bug #17985 from Zuming Jiang. Back-patch to v12. Discussion: https://postgr.es/m/17985-748b66607acd432e@postgresql.org --- src/backend/optimizer/plan/subselect.c | 6 ++++- src/test/regress/expected/join_hash.out | 36 +++++++++++++++++++++++++ src/test/regress/sql/join_hash.sql | 19 +++++++++++++ 3 files changed, 60 insertions(+), 1 deletion(-) diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index 8f86f7c714..f912c6b5eb 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -2632,6 +2632,11 @@ finalize_plan(PlannerInfo *root, Plan *plan, &context); break; + case T_Hash: + finalize_primnode((Node *) ((Hash *) plan)->hashkeys, + &context); + break; + case T_Limit: finalize_primnode(((Limit *) plan)->limitOffset, &context); @@ -2727,7 +2732,6 @@ finalize_plan(PlannerInfo *root, Plan *plan, break; case T_ProjectSet: - case T_Hash: case T_Material: case T_Sort: case T_IncrementalSort: diff --git a/src/test/regress/expected/join_hash.out b/src/test/regress/expected/join_hash.out index 3a91c144a2..e1ec01afdc 100644 --- a/src/test/regress/expected/join_hash.out +++ b/src/test/regress/expected/join_hash.out @@ -1013,3 +1013,39 @@ WHERE (1 row) ROLLBACK; +-- Verify that we behave sanely when the inner hash keys contain parameters +-- (that is, outer or lateral references). This situation has to defeat +-- re-use of the inner hash table across rescans. +begin; +set local enable_hashjoin = on; +explain (costs off) +select i8.q2, ss.* from +int8_tbl i8, +lateral (select t1.fivethous, i4.f1 from tenk1 t1 join int4_tbl i4 + on t1.fivethous = i4.f1+i8.q2 order by 1,2) ss; + QUERY PLAN +----------------------------------------------------------- + Nested Loop + -> Seq Scan on int8_tbl i8 + -> Sort + Sort Key: t1.fivethous, i4.f1 + -> Hash Join + Hash Cond: (t1.fivethous = (i4.f1 + i8.q2)) + -> Seq Scan on tenk1 t1 + -> Hash + -> Seq Scan on int4_tbl i4 +(9 rows) + +select i8.q2, ss.* from +int8_tbl i8, +lateral (select t1.fivethous, i4.f1 from tenk1 t1 join int4_tbl i4 + on t1.fivethous = i4.f1+i8.q2 order by 1,2) ss; + q2 | fivethous | f1 +-----+-----------+---- + 456 | 456 | 0 + 456 | 456 | 0 + 123 | 123 | 0 + 123 | 123 | 0 +(4 rows) + +rollback; diff --git a/src/test/regress/sql/join_hash.sql b/src/test/regress/sql/join_hash.sql index 68c1a8c7b6..7163422a03 100644 --- a/src/test/regress/sql/join_hash.sql +++ b/src/test/regress/sql/join_hash.sql @@ -538,3 +538,22 @@ WHERE AND hjtest_1.a <> hjtest_2.b; ROLLBACK; + +-- Verify that we behave sanely when the inner hash keys contain parameters +-- (that is, outer or lateral references). This situation has to defeat +-- re-use of the inner hash table across rescans. +begin; +set local enable_hashjoin = on; + +explain (costs off) +select i8.q2, ss.* from +int8_tbl i8, +lateral (select t1.fivethous, i4.f1 from tenk1 t1 join int4_tbl i4 + on t1.fivethous = i4.f1+i8.q2 order by 1,2) ss; + +select i8.q2, ss.* from +int8_tbl i8, +lateral (select t1.fivethous, i4.f1 from tenk1 t1 join int4_tbl i4 + on t1.fivethous = i4.f1+i8.q2 order by 1,2) ss; + +rollback;