diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c index d1d6029f84..5e9ce229b8 100644 --- a/src/backend/executor/nodeSubplan.c +++ b/src/backend/executor/nodeSubplan.c @@ -472,7 +472,7 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext) { SubPlan *subplan = node->subplan; PlanState *planstate = node->planstate; - int ncols = list_length(subplan->paramIds); + int ncols = node->numCols; ExprContext *innerecontext = node->innerecontext; MemoryContext oldcontext; long nbuckets; @@ -879,11 +879,6 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent) ALLOCSET_SMALL_SIZES); /* and a short-lived exprcontext for function evaluation */ sstate->innerecontext = CreateExprContext(estate); - /* Silly little array of column numbers 1..n */ - ncols = list_length(subplan->paramIds); - sstate->keyColIdx = (AttrNumber *) palloc(ncols * sizeof(AttrNumber)); - for (i = 0; i < ncols; i++) - sstate->keyColIdx[i] = i + 1; /* * We use ExecProject to evaluate the lefthand and righthand @@ -915,13 +910,15 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent) (int) nodeTag(subplan->testexpr)); oplist = NIL; /* keep compiler quiet */ } - Assert(list_length(oplist) == ncols); + ncols = list_length(oplist); lefttlist = righttlist = NIL; + sstate->numCols = ncols; + sstate->keyColIdx = (AttrNumber *) palloc(ncols * sizeof(AttrNumber)); sstate->tab_eq_funcoids = (Oid *) palloc(ncols * sizeof(Oid)); + sstate->tab_collations = (Oid *) palloc(ncols * sizeof(Oid)); sstate->tab_hash_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo)); sstate->tab_eq_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo)); - sstate->tab_collations = (Oid *) palloc(ncols * sizeof(Oid)); sstate->lhs_hash_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo)); sstate->cur_eq_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo)); /* we'll need the cross-type equality fns below, but not in sstate */ @@ -980,6 +977,9 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent) /* Set collation */ sstate->tab_collations[i - 1] = opexpr->inputcollid; + /* keyColIdx is just column numbers 1..n */ + sstate->keyColIdx[i - 1] = i; + i++; } diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index efd0fbc21c..b23e9492f3 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -69,7 +69,7 @@ typedef struct inline_cte_walker_context static Node *build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot, List *plan_params, SubLinkType subLinkType, int subLinkId, - Node *testexpr, bool adjust_testexpr, + Node *testexpr, List *testexpr_paramids, bool unknownEqFalse); static List *generate_subquery_params(PlannerInfo *root, List *tlist, List **paramIds); @@ -81,7 +81,8 @@ static Node *convert_testexpr(PlannerInfo *root, static Node *convert_testexpr_mutator(Node *node, convert_testexpr_context *context); static bool subplan_is_hashable(Plan *plan); -static bool testexpr_is_hashable(Node *testexpr); +static bool testexpr_is_hashable(Node *testexpr, List *param_ids); +static bool test_opexpr_is_hashable(OpExpr *testexpr, List *param_ids); static bool hash_ok_operator(OpExpr *expr); static bool contain_dml(Node *node); static bool contain_dml_walker(Node *node, void *context); @@ -237,7 +238,7 @@ make_subplan(PlannerInfo *root, Query *orig_subquery, /* And convert to SubPlan or InitPlan format. */ result = build_subplan(root, plan, subroot, plan_params, subLinkType, subLinkId, - testexpr, true, isTopQual); + testexpr, NIL, isTopQual); /* * If it's a correlated EXISTS with an unimportant targetlist, we might be @@ -291,12 +292,11 @@ make_subplan(PlannerInfo *root, Query *orig_subquery, plan_params, ANY_SUBLINK, 0, newtestexpr, - false, true)); + paramIds, + true)); /* Check we got what we expected */ Assert(hashplan->parParam == NIL); Assert(hashplan->useHashTable); - /* build_subplan won't have filled in paramIds */ - hashplan->paramIds = paramIds; /* Leave it to the executor to decide which plan to use */ asplan = makeNode(AlternativeSubPlan); @@ -319,7 +319,7 @@ static Node * build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot, List *plan_params, SubLinkType subLinkType, int subLinkId, - Node *testexpr, bool adjust_testexpr, + Node *testexpr, List *testexpr_paramids, bool unknownEqFalse) { Node *result; @@ -484,10 +484,10 @@ build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot, else { /* - * Adjust the Params in the testexpr, unless caller said it's not - * needed. + * Adjust the Params in the testexpr, unless caller already took care + * of it (as indicated by passing a list of Param IDs). */ - if (testexpr && adjust_testexpr) + if (testexpr && testexpr_paramids == NIL) { List *params; @@ -499,7 +499,10 @@ build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot, params); } else + { splan->testexpr = testexpr; + splan->paramIds = testexpr_paramids; + } /* * We can't convert subplans of ALL_SUBLINK or ANY_SUBLINK types to @@ -511,7 +514,7 @@ build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot, if (subLinkType == ANY_SUBLINK && splan->parParam == NIL && subplan_is_hashable(plan) && - testexpr_is_hashable(splan->testexpr)) + testexpr_is_hashable(splan->testexpr, splan->paramIds)) splan->useHashTable = true; /* @@ -733,24 +736,20 @@ subplan_is_hashable(Plan *plan) /* * testexpr_is_hashable: is an ANY SubLink's test expression hashable? + * + * To identify LHS vs RHS of the hash expression, we must be given the + * list of output Param IDs of the SubLink's subquery. */ static bool -testexpr_is_hashable(Node *testexpr) +testexpr_is_hashable(Node *testexpr, List *param_ids) { /* * The testexpr must be a single OpExpr, or an AND-clause containing only - * OpExprs. - * - * The combining operators must be hashable and strict. The need for - * hashability is obvious, since we want to use hashing. Without - * strictness, behavior in the presence of nulls is too unpredictable. We - * actually must assume even more than plain strictness: they can't yield - * NULL for non-null inputs, either (see nodeSubplan.c). However, hash - * indexes and hash joins assume that too. + * OpExprs, each of which satisfy test_opexpr_is_hashable(). */ if (testexpr && IsA(testexpr, OpExpr)) { - if (hash_ok_operator((OpExpr *) testexpr)) + if (test_opexpr_is_hashable((OpExpr *) testexpr, param_ids)) return true; } else if (is_andclause(testexpr)) @@ -763,7 +762,7 @@ testexpr_is_hashable(Node *testexpr) if (!IsA(andarg, OpExpr)) return false; - if (!hash_ok_operator((OpExpr *) andarg)) + if (!test_opexpr_is_hashable((OpExpr *) andarg, param_ids)) return false; } return true; @@ -772,6 +771,40 @@ testexpr_is_hashable(Node *testexpr) return false; } +static bool +test_opexpr_is_hashable(OpExpr *testexpr, List *param_ids) +{ + /* + * The combining operator must be hashable and strict. The need for + * hashability is obvious, since we want to use hashing. Without + * strictness, behavior in the presence of nulls is too unpredictable. We + * actually must assume even more than plain strictness: it can't yield + * NULL for non-null inputs, either (see nodeSubplan.c). However, hash + * indexes and hash joins assume that too. + */ + if (!hash_ok_operator(testexpr)) + return false; + + /* + * The left and right inputs must belong to the outer and inner queries + * respectively; hence Params that will be supplied by the subquery must + * not appear in the LHS, and Vars of the outer query must not appear in + * the RHS. (Ordinarily, this must be true because of the way that the + * parser builds an ANY SubLink's testexpr ... but inlining of functions + * could have changed the expression's structure, so we have to check. + * Such cases do not occur often enough to be worth trying to optimize, so + * we don't worry about trying to commute the clause or anything like + * that; we just need to be sure not to build an invalid plan.) + */ + if (list_length(testexpr->args) != 2) + return false; + if (contain_exec_param((Node *) linitial(testexpr->args), param_ids)) + return false; + if (contain_var_clause((Node *) lsecond(testexpr->args))) + return false; + return true; +} + /* * Check expression is hashable + strict * diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index 39ca47adf5..928752e6a9 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -108,6 +108,7 @@ static bool contain_volatile_functions_not_nextval_walker(Node *node, void *cont static bool max_parallel_hazard_walker(Node *node, max_parallel_hazard_context *context); static bool contain_nonstrict_functions_walker(Node *node, void *context); +static bool contain_exec_param_walker(Node *node, List *param_ids); static bool contain_context_dependent_node(Node *clause); static bool contain_context_dependent_node_walker(Node *node, int *flags); static bool contain_leaked_vars_walker(Node *node, void *context); @@ -1223,6 +1224,40 @@ contain_nonstrict_functions_walker(Node *node, void *context) context); } +/***************************************************************************** + * Check clauses for Params + *****************************************************************************/ + +/* + * contain_exec_param + * Recursively search for PARAM_EXEC Params within a clause. + * + * Returns true if the clause contains any PARAM_EXEC Param with a paramid + * appearing in the given list of Param IDs. Does not descend into + * subqueries! + */ +bool +contain_exec_param(Node *clause, List *param_ids) +{ + return contain_exec_param_walker(clause, param_ids); +} + +static bool +contain_exec_param_walker(Node *node, List *param_ids) +{ + if (node == NULL) + return false; + if (IsA(node, Param)) + { + Param *p = (Param *) node; + + if (p->paramkind == PARAM_EXEC && + list_member_int(param_ids, p->paramid)) + return true; + } + return expression_tree_walker(node, contain_exec_param_walker, param_ids); +} + /***************************************************************************** * Check clauses for context-dependent nodes *****************************************************************************/ diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 3d7e770851..a214a7bf34 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -862,6 +862,7 @@ typedef struct SubPlanState MemoryContext hashtablecxt; /* memory context containing hash tables */ MemoryContext hashtempcxt; /* temp memory context for hash tables */ ExprContext *innerecontext; /* econtext for computing inner tuples */ + /* each of the following fields is an array of length numCols: */ AttrNumber *keyColIdx; /* control data for hash tables */ Oid *tab_eq_funcoids; /* equality func oids for table * datatype(s) */ @@ -871,6 +872,7 @@ typedef struct SubPlanState FmgrInfo *lhs_hash_funcs; /* hash functions for lefthand datatype(s) */ FmgrInfo *cur_eq_funcs; /* equality functions for LHS vs. table */ ExprState *cur_eq_comp; /* equality comparator for LHS vs. table */ + int numCols; /* number of columns being hashed */ } SubPlanState; /* ---------------- diff --git a/src/include/optimizer/clauses.h b/src/include/optimizer/clauses.h index 2f9aeec4a7..c87c194bcd 100644 --- a/src/include/optimizer/clauses.h +++ b/src/include/optimizer/clauses.h @@ -38,6 +38,7 @@ extern bool contain_subplans(Node *clause); extern char max_parallel_hazard(Query *parse); extern bool is_parallel_safe(PlannerInfo *root, Node *node); extern bool contain_nonstrict_functions(Node *clause); +extern bool contain_exec_param(Node *clause, List *param_ids); extern bool contain_leaked_vars(Node *clause); extern Relids find_nonnullable_rels(Node *clause); diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out index f095809184..5996a69b0f 100644 --- a/src/test/regress/expected/subselect.out +++ b/src/test/regress/expected/subselect.out @@ -757,6 +757,7 @@ insert into outer_text values ('a', null); insert into outer_text values ('b', null); create temp table inner_text (c1 text, c2 text); insert into inner_text values ('a', null); +insert into inner_text values ('123', '456'); select * from outer_text where (f1, f2) not in (select * from inner_text); f1 | f2 ----+---- @@ -797,6 +798,82 @@ select '1'::text in (select '1'::name union all select '1'::name); t (1 row) +-- +-- Test that we don't try to use a hashed subplan if the simplified +-- testexpr isn't of the right shape +-- +-- this fails by default, of course +select * from int8_tbl where q1 in (select c1 from inner_text); +ERROR: operator does not exist: bigint = text +LINE 1: select * from int8_tbl where q1 in (select c1 from inner_tex... + ^ +HINT: No operator matches the given name and argument types. You might need to add explicit type casts. +begin; +-- make an operator to allow it to succeed +create function bogus_int8_text_eq(int8, text) returns boolean +language sql as 'select $1::text = $2'; +create operator = (procedure=bogus_int8_text_eq, leftarg=int8, rightarg=text); +explain (costs off) +select * from int8_tbl where q1 in (select c1 from inner_text); + QUERY PLAN +-------------------------------- + Seq Scan on int8_tbl + Filter: (hashed SubPlan 1) + SubPlan 1 + -> Seq Scan on inner_text +(4 rows) + +select * from int8_tbl where q1 in (select c1 from inner_text); + q1 | q2 +-----+------------------ + 123 | 456 + 123 | 4567890123456789 +(2 rows) + +-- inlining of this function results in unusual number of hash clauses, +-- which we can still cope with +create or replace function bogus_int8_text_eq(int8, text) returns boolean +language sql as 'select $1::text = $2 and $1::text = $2'; +explain (costs off) +select * from int8_tbl where q1 in (select c1 from inner_text); + QUERY PLAN +-------------------------------- + Seq Scan on int8_tbl + Filter: (hashed SubPlan 1) + SubPlan 1 + -> Seq Scan on inner_text +(4 rows) + +select * from int8_tbl where q1 in (select c1 from inner_text); + q1 | q2 +-----+------------------ + 123 | 456 + 123 | 4567890123456789 +(2 rows) + +-- inlining of this function causes LHS and RHS to be switched, +-- which we can't cope with, so hashing should be abandoned +create or replace function bogus_int8_text_eq(int8, text) returns boolean +language sql as 'select $2 = $1::text'; +explain (costs off) +select * from int8_tbl where q1 in (select c1 from inner_text); + QUERY PLAN +-------------------------------------- + Seq Scan on int8_tbl + Filter: (SubPlan 1) + SubPlan 1 + -> Materialize + -> Seq Scan on inner_text +(5 rows) + +select * from int8_tbl where q1 in (select c1 from inner_text); + q1 | q2 +-----+------------------ + 123 | 456 + 123 | 4567890123456789 +(2 rows) + +rollback; -- to get rid of the bogus operator -- -- Test case for planner bug with nested EXISTS handling -- diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql index 74ae1fefbe..b5da31ed4a 100644 --- a/src/test/regress/sql/subselect.sql +++ b/src/test/regress/sql/subselect.sql @@ -449,6 +449,7 @@ insert into outer_text values ('b', null); create temp table inner_text (c1 text, c2 text); insert into inner_text values ('a', null); +insert into inner_text values ('123', '456'); select * from outer_text where (f1, f2) not in (select * from inner_text); @@ -468,6 +469,46 @@ select 'foo'::text in (select 'bar'::name union all select 'bar'::name); select '1'::text in (select '1'::name union all select '1'::name); +-- +-- Test that we don't try to use a hashed subplan if the simplified +-- testexpr isn't of the right shape +-- + +-- this fails by default, of course +select * from int8_tbl where q1 in (select c1 from inner_text); + +begin; + +-- make an operator to allow it to succeed +create function bogus_int8_text_eq(int8, text) returns boolean +language sql as 'select $1::text = $2'; + +create operator = (procedure=bogus_int8_text_eq, leftarg=int8, rightarg=text); + +explain (costs off) +select * from int8_tbl where q1 in (select c1 from inner_text); +select * from int8_tbl where q1 in (select c1 from inner_text); + +-- inlining of this function results in unusual number of hash clauses, +-- which we can still cope with +create or replace function bogus_int8_text_eq(int8, text) returns boolean +language sql as 'select $1::text = $2 and $1::text = $2'; + +explain (costs off) +select * from int8_tbl where q1 in (select c1 from inner_text); +select * from int8_tbl where q1 in (select c1 from inner_text); + +-- inlining of this function causes LHS and RHS to be switched, +-- which we can't cope with, so hashing should be abandoned +create or replace function bogus_int8_text_eq(int8, text) returns boolean +language sql as 'select $2 = $1::text'; + +explain (costs off) +select * from int8_tbl where q1 in (select c1 from inner_text); +select * from int8_tbl where q1 in (select c1 from inner_text); + +rollback; -- to get rid of the bogus operator + -- -- Test case for planner bug with nested EXISTS handling --