From 41c6f9db25b5e3a8bb8afbb7d6715cff541fd41e Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 17 Jan 2020 16:17:17 -0500 Subject: [PATCH] Repair more failures with SubPlans in multi-row VALUES lists. Commit 9b63c13f0 turns out to have been fundamentally misguided: the parent node's subPlan list is by no means the only way in which a child SubPlan node can be hooked into the outer execution state. As shown in bug #16213 from Matt Jibson, we can also get short-lived tuple table slots added to the outer es_tupleTable list. At this point I have little faith that there aren't other possible connections as well; the long time it took to notice this problem shows that this isn't a heavily-exercised situation. Therefore, revert that fix, returning to the coding that passed a NULL parent plan pointer down to the transiently-built subexpressions. That gives us a pretty good guarantee that they won't hook into the outer executor state in any way. But then we need some other solution to make SubPlans work. Adopt the solution speculated about in the previous commit's log message: do expression initialization at plan startup for just those VALUES rows containing SubPlans, abandoning the goal of reclaiming memory intra-query for those rows. In practice it seems unlikely that queries containing a vast number of VALUES rows would be using SubPlans in them, so this should not give up much. (BTW, this test case also refutes my claim in connection with the prior commit that the issue only arises with use of LATERAL. That was just wrong: some variants of SubLink always produce SubPlans.) As with previous patch, back-patch to all supported branches. Discussion: https://postgr.es/m/16213-871ac3bc208ecf23@postgresql.org --- src/backend/executor/nodeValuesscan.c | 110 +++++++++++++++--------- src/include/nodes/execnodes.h | 10 ++- src/test/regress/expected/subselect.out | 27 ++++++ src/test/regress/sql/subselect.sql | 14 +++ 4 files changed, 118 insertions(+), 43 deletions(-) diff --git a/src/backend/executor/nodeValuesscan.c b/src/backend/executor/nodeValuesscan.c index 8d916a0303..88661217e9 100644 --- a/src/backend/executor/nodeValuesscan.c +++ b/src/backend/executor/nodeValuesscan.c @@ -26,6 +26,7 @@ #include "executor/executor.h" #include "executor/nodeValuesscan.h" #include "jit/jit.h" +#include "optimizer/clauses.h" #include "utils/expandeddatum.h" @@ -50,7 +51,7 @@ ValuesNext(ValuesScanState *node) EState *estate; ExprContext *econtext; ScanDirection direction; - List *exprlist; + int curr_idx; /* * get information from the estate and scan state @@ -67,19 +68,11 @@ ValuesNext(ValuesScanState *node) { if (node->curr_idx < node->array_len) node->curr_idx++; - if (node->curr_idx < node->array_len) - exprlist = node->exprlists[node->curr_idx]; - else - exprlist = NIL; } else { if (node->curr_idx >= 0) node->curr_idx--; - if (node->curr_idx >= 0) - exprlist = node->exprlists[node->curr_idx]; - else - exprlist = NIL; } /* @@ -90,16 +83,16 @@ ValuesNext(ValuesScanState *node) */ ExecClearTuple(slot); - if (exprlist) + curr_idx = node->curr_idx; + if (curr_idx >= 0 && curr_idx < node->array_len) { + List *exprlist = node->exprlists[curr_idx]; + List *exprstatelist = node->exprstatelists[curr_idx]; MemoryContext oldContext; - List *oldsubplans; - List *exprstatelist; Datum *values; bool *isnull; ListCell *lc; int resind; - int saved_jit_flags; /* * Get rid of any prior cycle's leftovers. We use ReScanExprContext @@ -109,38 +102,32 @@ ValuesNext(ValuesScanState *node) ReScanExprContext(econtext); /* - * Build the expression eval state in the econtext's per-tuple memory. - * This is a tad unusual, but we want to delete the eval state again - * when we move to the next row, to avoid growth of memory - * requirements over a long values list. + * Do per-VALUES-row work in the per-tuple context. */ oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory); /* - * The expressions might contain SubPlans (this is currently only - * possible if there's a sub-select containing a LATERAL reference, - * otherwise sub-selects in a VALUES list should be InitPlans). Those - * subplans will want to hook themselves into our subPlan list, which - * would result in a corrupted list after we delete the eval state. We - * can work around this by saving and restoring the subPlan list. - * (There's no need for the functionality that would be enabled by - * having the list entries, since the SubPlans aren't going to be - * re-executed anyway.) + * Unless we already made the expression eval state for this row, + * build it in the econtext's per-tuple memory. This is a tad + * unusual, but we want to delete the eval state again when we move to + * the next row, to avoid growth of memory requirements over a long + * values list. For rows in which that won't work, we already built + * the eval state at plan startup. */ - oldsubplans = node->ss.ps.subPlan; - node->ss.ps.subPlan = NIL; - - /* - * As the expressions are only ever used once, disable JIT for them. - * This is worthwhile because it's common to insert significant - * amounts of data via VALUES(). - */ - saved_jit_flags = econtext->ecxt_estate->es_jit_flags; - econtext->ecxt_estate->es_jit_flags = PGJIT_NONE; - exprstatelist = ExecInitExprList(exprlist, &node->ss.ps); - econtext->ecxt_estate->es_jit_flags = saved_jit_flags; - - node->ss.ps.subPlan = oldsubplans; + if (exprstatelist == NIL) + { + /* + * Pass parent as NULL, not my plan node, because we don't want + * anything in this transient state linking into permanent state. + * The only expression type that might wish to do so is a SubPlan, + * and we already checked that there aren't any. + * + * Note that passing parent = NULL also disables JIT compilation + * of the expressions, which is a win, because they're only going + * to be used once under normal circumstances. + */ + exprstatelist = ExecInitExprList(exprlist, NULL); + } /* parser should have checked all sublists are the same length */ Assert(list_length(exprstatelist) == slot->tts_tupleDescriptor->natts); @@ -281,13 +268,52 @@ ExecInitValuesScan(ValuesScan *node, EState *estate, int eflags) scanstate->curr_idx = -1; scanstate->array_len = list_length(node->values_lists); - /* convert list of sublists into array of sublists for easy addressing */ + /* + * Convert the list of expression sublists into an array for easier + * addressing at runtime. Also, detect whether any sublists contain + * SubPlans; for just those sublists, go ahead and do expression + * initialization. (This avoids problems with SubPlans wanting to connect + * themselves up to the outer plan tree. Notably, EXPLAIN won't see the + * subplans otherwise; also we will have troubles with dangling pointers + * and/or leaked resources if we try to handle SubPlans the same as + * simpler expressions.) + */ scanstate->exprlists = (List **) palloc(scanstate->array_len * sizeof(List *)); + scanstate->exprstatelists = (List **) + palloc0(scanstate->array_len * sizeof(List *)); i = 0; foreach(vtl, node->values_lists) { - scanstate->exprlists[i++] = (List *) lfirst(vtl); + List *exprs = castNode(List, lfirst(vtl)); + + scanstate->exprlists[i] = exprs; + + /* + * We can avoid the cost of a contain_subplans() scan in the simple + * case where there are no SubPlans anywhere. + */ + if (estate->es_subplanstates && + contain_subplans((Node *) exprs)) + { + int saved_jit_flags; + + /* + * As these expressions are only used once, disable JIT for them. + * This is worthwhile because it's common to insert significant + * amounts of data via VALUES(). Note that this doesn't prevent + * use of JIT *within* a subplan, since that's initialized + * separately; this just affects the upper-level subexpressions. + */ + saved_jit_flags = estate->es_jit_flags; + estate->es_jit_flags = PGJIT_NONE; + + scanstate->exprstatelists[i] = ExecInitExprList(exprs, + &scanstate->ss.ps); + + estate->es_jit_flags = saved_jit_flags; + } + i++; } return scanstate; diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index eaea1f3b0c..1f6f5bbc20 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1670,7 +1670,8 @@ typedef struct FunctionScanState * * rowcontext per-expression-list context * exprlists array of expression lists being evaluated - * array_len size of array + * exprstatelists array of expression state lists, for SubPlans only + * array_len size of above arrays * curr_idx current array index (0-based) * * Note: ss.ps.ps_ExprContext is used to evaluate any qual or projection @@ -1678,6 +1679,12 @@ typedef struct FunctionScanState * rowcontext, in which to build the executor expression state for each * Values sublist. Resetting this context lets us get rid of expression * state for each row, avoiding major memory leakage over a long values list. + * However, that doesn't work for sublists containing SubPlans, because a + * SubPlan has to be connected up to the outer plan tree to work properly. + * Therefore, for only those sublists containing SubPlans, we do expression + * state construction at executor start, and store those pointers in + * exprstatelists[]. NULL entries in that array correspond to simple + * subexpressions that are handled as described above. * ---------------- */ typedef struct ValuesScanState @@ -1685,6 +1692,7 @@ typedef struct ValuesScanState ScanState ss; /* its first field is NodeTag */ ExprContext *rowcontext; List **exprlists; + List **exprstatelists; int array_len; int curr_idx; } ValuesScanState; diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out index 547686166a..71a677b768 100644 --- a/src/test/regress/expected/subselect.out +++ b/src/test/regress/expected/subselect.out @@ -926,6 +926,33 @@ where s.i < 10 and (select val.x) < 110; 10 (17 rows) +-- another variant of that (bug #16213) +explain (verbose, costs off) +select * from +(values + (3 not in (select * from (values (1), (2)) ss1)), + (false) +) ss; + QUERY PLAN +---------------------------------------- + Values Scan on "*VALUES*" + Output: "*VALUES*".column1 + SubPlan 1 + -> Values Scan on "*VALUES*_1" + Output: "*VALUES*_1".column1 +(5 rows) + +select * from +(values + (3 not in (select * from (values (1), (2)) ss1)), + (false) +) ss; + column1 +--------- + t + f +(2 rows) + -- -- Check sane behavior with nested IN SubLinks -- diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql index 3e119ce127..bd8d2f63d8 100644 --- a/src/test/regress/sql/subselect.sql +++ b/src/test/regress/sql/subselect.sql @@ -518,6 +518,20 @@ select val.x ) as val(x) where s.i < 10 and (select val.x) < 110; +-- another variant of that (bug #16213) +explain (verbose, costs off) +select * from +(values + (3 not in (select * from (values (1), (2)) ss1)), + (false) +) ss; + +select * from +(values + (3 not in (select * from (values (1), (2)) ss1)), + (false) +) ss; + -- -- Check sane behavior with nested IN SubLinks --