diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c index 5f5c91366f..0c85ac785f 100644 --- a/src/backend/catalog/dependency.c +++ b/src/backend/catalog/dependency.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/catalog/dependency.c,v 1.78 2008/08/07 01:11:46 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/catalog/dependency.c,v 1.79 2008/08/22 00:16:03 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1464,7 +1464,7 @@ find_expr_references_walker(Node *node, context->addrs); /* fall through to examine arguments */ } - else if (is_subplan(node)) + else if (IsA(node, SubPlan)) { /* Extra work needed here if we ever need this case */ elog(ERROR, "already-planned subqueries not supported"); diff --git a/src/backend/executor/execQual.c b/src/backend/executor/execQual.c index 7761485d7c..57b2df9c00 100644 --- a/src/backend/executor/execQual.c +++ b/src/backend/executor/execQual.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/executor/execQual.c,v 1.231 2008/05/15 00:17:39 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/executor/execQual.c,v 1.232 2008/08/22 00:16:03 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -3957,6 +3957,19 @@ ExecInitExpr(Expr *node, PlanState *parent) state = (ExprState *) sstate; } break; + case T_AlternativeSubPlan: + { + AlternativeSubPlan *asplan = (AlternativeSubPlan *) node; + AlternativeSubPlanState *asstate; + + if (!parent) + elog(ERROR, "AlternativeSubPlan found with no parent plan"); + + asstate = ExecInitAlternativeSubPlan(asplan, parent); + + state = (ExprState *) asstate; + } + break; case T_FieldSelect: { FieldSelect *fselect = (FieldSelect *) node; diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c index 0c9ec82b01..3ebb045f92 100644 --- a/src/backend/executor/nodeSubplan.c +++ b/src/backend/executor/nodeSubplan.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/executor/nodeSubplan.c,v 1.93 2008/05/12 00:00:49 alvherre Exp $ + * $PostgreSQL: pgsql/src/backend/executor/nodeSubplan.c,v 1.94 2008/08/22 00:16:03 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -29,6 +29,14 @@ #include "utils/memutils.h" +static Datum ExecSubPlan(SubPlanState *node, + ExprContext *econtext, + bool *isNull, + ExprDoneCond *isDone); +static Datum ExecAlternativeSubPlan(AlternativeSubPlanState *node, + ExprContext *econtext, + bool *isNull, + ExprDoneCond *isDone); static Datum ExecHashSubPlan(SubPlanState *node, ExprContext *econtext, bool *isNull); @@ -45,7 +53,7 @@ static bool slotNoNulls(TupleTableSlot *slot); * ExecSubPlan * ---------------------------------------------------------------- */ -Datum +static Datum ExecSubPlan(SubPlanState *node, ExprContext *econtext, bool *isNull, @@ -1066,3 +1074,83 @@ ExecReScanSetParamPlan(SubPlanState *node, PlanState *parent) parent->chgParam = bms_add_member(parent->chgParam, paramid); } } + + +/* + * ExecInitAlternativeSubPlan + * + * Initialize for execution of one of a set of alternative subplans. + */ +AlternativeSubPlanState * +ExecInitAlternativeSubPlan(AlternativeSubPlan *asplan, PlanState *parent) +{ + AlternativeSubPlanState *asstate = makeNode(AlternativeSubPlanState); + double num_calls; + SubPlan *subplan1; + SubPlan *subplan2; + Cost cost1; + Cost cost2; + + asstate->xprstate.evalfunc = (ExprStateEvalFunc) ExecAlternativeSubPlan; + asstate->xprstate.expr = (Expr *) asplan; + + /* + * Initialize subplans. (Can we get away with only initializing the + * one we're going to use?) + */ + asstate->subplans = (List *) ExecInitExpr((Expr *) asplan->subplans, + parent); + + /* + * Select the one to be used. For this, we need an estimate of the + * number of executions of the subplan. We use the number of output + * rows expected from the parent plan node. This is a good estimate + * if we are in the parent's targetlist, and an underestimate (but + * probably not by more than a factor of 2) if we are in the qual. + */ + num_calls = parent->plan->plan_rows; + + /* + * The planner saved enough info so that we don't have to work very hard + * to estimate the total cost, given the number-of-calls estimate. + */ + Assert(list_length(asplan->subplans) == 2); + subplan1 = (SubPlan *) linitial(asplan->subplans); + subplan2 = (SubPlan *) lsecond(asplan->subplans); + + cost1 = subplan1->startup_cost + num_calls * subplan1->per_call_cost; + cost2 = subplan2->startup_cost + num_calls * subplan2->per_call_cost; + + if (cost1 < cost2) + asstate->active = 0; + else + asstate->active = 1; + + return asstate; +} + +/* + * ExecAlternativeSubPlan + * + * Execute one of a set of alternative subplans. + * + * Note: in future we might consider changing to different subplans on the + * fly, in case the original rowcount estimate turns out to be way off. + */ +static Datum +ExecAlternativeSubPlan(AlternativeSubPlanState *node, + ExprContext *econtext, + bool *isNull, + ExprDoneCond *isDone) +{ + /* Just pass control to the active subplan */ + SubPlanState *activesp = (SubPlanState *) list_nth(node->subplans, + node->active); + + Assert(IsA(activesp, SubPlanState)); + + return ExecSubPlan(activesp, + econtext, + isNull, + isDone); +} diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 83322320b0..4a30a3d822 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -15,7 +15,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.400 2008/08/14 18:47:58 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.401 2008/08/22 00:16:03 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -969,6 +969,21 @@ _copySubPlan(SubPlan *from) COPY_NODE_FIELD(setParam); COPY_NODE_FIELD(parParam); COPY_NODE_FIELD(args); + COPY_SCALAR_FIELD(startup_cost); + COPY_SCALAR_FIELD(per_call_cost); + + return newnode; +} + +/* + * _copyAlternativeSubPlan + */ +static AlternativeSubPlan * +_copyAlternativeSubPlan(AlternativeSubPlan *from) +{ + AlternativeSubPlan *newnode = makeNode(AlternativeSubPlan); + + COPY_NODE_FIELD(subplans); return newnode; } @@ -3146,6 +3161,9 @@ copyObject(void *from) case T_SubPlan: retval = _copySubPlan(from); break; + case T_AlternativeSubPlan: + retval = _copyAlternativeSubPlan(from); + break; case T_FieldSelect: retval = _copyFieldSelect(from); break; diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 831a5fe19b..3a25111c60 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -18,7 +18,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.327 2008/08/14 18:47:58 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.328 2008/08/22 00:16:03 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -314,6 +314,16 @@ _equalSubPlan(SubPlan *a, SubPlan *b) COMPARE_NODE_FIELD(setParam); COMPARE_NODE_FIELD(parParam); COMPARE_NODE_FIELD(args); + COMPARE_SCALAR_FIELD(startup_cost); + COMPARE_SCALAR_FIELD(per_call_cost); + + return true; +} + +static bool +_equalAlternativeSubPlan(AlternativeSubPlan *a, AlternativeSubPlan *b) +{ + COMPARE_NODE_FIELD(subplans); return true; } @@ -2098,6 +2108,9 @@ equal(void *a, void *b) case T_SubPlan: retval = _equalSubPlan(a, b); break; + case T_AlternativeSubPlan: + retval = _equalAlternativeSubPlan(a, b); + break; case T_FieldSelect: retval = _equalFieldSelect(a, b); break; diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 13c824389e..76f8da1bab 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.334 2008/08/14 18:47:58 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.335 2008/08/22 00:16:03 tgl Exp $ * * NOTES * Every node type that can appear in stored rules' parsetrees *must* @@ -846,6 +846,16 @@ _outSubPlan(StringInfo str, SubPlan *node) WRITE_NODE_FIELD(setParam); WRITE_NODE_FIELD(parParam); WRITE_NODE_FIELD(args); + WRITE_FLOAT_FIELD(startup_cost, "%.2f"); + WRITE_FLOAT_FIELD(per_call_cost, "%.2f"); +} + +static void +_outAlternativeSubPlan(StringInfo str, AlternativeSubPlan *node) +{ + WRITE_NODE_TYPE("ALTERNATIVESUBPLAN"); + + WRITE_NODE_FIELD(subplans); } static void @@ -2208,6 +2218,9 @@ _outNode(StringInfo str, void *obj) case T_SubPlan: _outSubPlan(str, obj); break; + case T_AlternativeSubPlan: + _outAlternativeSubPlan(str, obj); + break; case T_FieldSelect: _outFieldSelect(str, obj); break; diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c index 6db42c5f58..9ffdc32e77 100644 --- a/src/backend/optimizer/path/clausesel.c +++ b/src/backend/optimizer/path/clausesel.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/clausesel.c,v 1.92 2008/08/16 00:01:36 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/clausesel.c,v 1.93 2008/08/22 00:16:03 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -680,7 +680,8 @@ clause_selectivity(PlannerInfo *root, s1 = (Selectivity) 0.3333333; } #ifdef NOT_USED - else if (is_subplan(clause)) + else if (IsA(clause, SubPlan) || + IsA(clause, AlternativeSubPlan)) { /* * Just for the moment! FIX ME! - vadim 02/04/98 diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 14c3cf1f21..487de9ee93 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -54,7 +54,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.194 2008/08/16 00:01:36 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.195 2008/08/22 00:16:03 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1868,6 +1868,93 @@ cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo) } +/* + * cost_subplan + * Figure the costs for a SubPlan (or initplan). + * + * Note: we could dig the subplan's Plan out of the root list, but in practice + * all callers have it handy already, so we make them pass it. + */ +void +cost_subplan(PlannerInfo *root, SubPlan *subplan, Plan *plan) +{ + QualCost sp_cost; + + /* Figure any cost for evaluating the testexpr */ + cost_qual_eval(&sp_cost, + make_ands_implicit((Expr *) subplan->testexpr), + root); + + if (subplan->useHashTable) + { + /* + * If we are using a hash table for the subquery outputs, then the + * cost of evaluating the query is a one-time cost. We charge one + * cpu_operator_cost per tuple for the work of loading the hashtable, + * too. + */ + sp_cost.startup += plan->total_cost + + cpu_operator_cost * plan->plan_rows; + + /* + * The per-tuple costs include the cost of evaluating the lefthand + * expressions, plus the cost of probing the hashtable. We already + * accounted for the lefthand expressions as part of the testexpr, + * and will also have counted one cpu_operator_cost for each + * comparison operator. That is probably too low for the probing + * cost, but it's hard to make a better estimate, so live with it for + * now. + */ + } + else + { + /* + * Otherwise we will be rescanning the subplan output on each + * evaluation. We need to estimate how much of the output we will + * actually need to scan. NOTE: this logic should agree with the + * tuple_fraction estimates used by make_subplan() in + * plan/subselect.c. + */ + Cost plan_run_cost = plan->total_cost - plan->startup_cost; + + if (subplan->subLinkType == EXISTS_SUBLINK) + { + /* we only need to fetch 1 tuple */ + sp_cost.per_tuple += plan_run_cost / plan->plan_rows; + } + else if (subplan->subLinkType == ALL_SUBLINK || + subplan->subLinkType == ANY_SUBLINK) + { + /* assume we need 50% of the tuples */ + sp_cost.per_tuple += 0.50 * plan_run_cost; + /* also charge a cpu_operator_cost per row examined */ + sp_cost.per_tuple += 0.50 * plan->plan_rows * cpu_operator_cost; + } + else + { + /* assume we need all tuples */ + sp_cost.per_tuple += plan_run_cost; + } + + /* + * Also account for subplan's startup cost. If the subplan is + * uncorrelated or undirect correlated, AND its topmost node is a Sort + * or Material node, assume that we'll only need to pay its startup + * cost once; otherwise assume we pay the startup cost every time. + */ + if (subplan->parParam == NIL && + (IsA(plan, Sort) || + IsA(plan, Material))) + sp_cost.startup += plan->startup_cost; + else + sp_cost.per_tuple += plan->startup_cost; + } + + subplan->startup_cost = sp_cost.startup; + subplan->per_call_cost = sp_cost.per_tuple; +} + + /* * cost_qual_eval * Estimate the CPU costs of evaluating a WHERE clause. @@ -2066,79 +2153,30 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context) * subplan will be executed on each evaluation, so charge accordingly. * (Sub-selects that can be executed as InitPlans have already been * removed from the expression.) - * - * An exception occurs when we have decided we can implement the - * subplan by hashing. */ SubPlan *subplan = (SubPlan *) node; - Plan *plan = planner_subplan_get_plan(context->root, subplan); - if (subplan->useHashTable) - { - /* - * If we are using a hash table for the subquery outputs, then the - * cost of evaluating the query is a one-time cost. We charge one - * cpu_operator_cost per tuple for the work of loading the - * hashtable, too. - */ - context->total.startup += plan->total_cost + - cpu_operator_cost * plan->plan_rows; + context->total.startup += subplan->startup_cost; + context->total.per_tuple += subplan->per_call_cost; - /* - * The per-tuple costs include the cost of evaluating the lefthand - * expressions, plus the cost of probing the hashtable. Recursion - * into the testexpr will handle the lefthand expressions - * properly, and will count one cpu_operator_cost for each - * comparison operator. That is probably too low for the probing - * cost, but it's hard to make a better estimate, so live with it - * for now. - */ - } - else - { - /* - * Otherwise we will be rescanning the subplan output on each - * evaluation. We need to estimate how much of the output we will - * actually need to scan. NOTE: this logic should agree with - * get_initplan_cost, below, and with the estimates used by - * make_subplan() in plan/subselect.c. - */ - Cost plan_run_cost = plan->total_cost - plan->startup_cost; + /* + * We don't want to recurse into the testexpr, because it was already + * counted in the SubPlan node's costs. So we're done. + */ + return false; + } + else if (IsA(node, AlternativeSubPlan)) + { + /* + * Arbitrarily use the first alternative plan for costing. (We should + * certainly only include one alternative, and we don't yet have + * enough information to know which one the executor is most likely + * to use.) + */ + AlternativeSubPlan *asplan = (AlternativeSubPlan *) node; - if (subplan->subLinkType == EXISTS_SUBLINK) - { - /* we only need to fetch 1 tuple */ - context->total.per_tuple += plan_run_cost / plan->plan_rows; - } - else if (subplan->subLinkType == ALL_SUBLINK || - subplan->subLinkType == ANY_SUBLINK) - { - /* assume we need 50% of the tuples */ - context->total.per_tuple += 0.50 * plan_run_cost; - /* also charge a cpu_operator_cost per row examined */ - context->total.per_tuple += - 0.50 * plan->plan_rows * cpu_operator_cost; - } - else - { - /* assume we need all tuples */ - context->total.per_tuple += plan_run_cost; - } - - /* - * Also account for subplan's startup cost. If the subplan is - * uncorrelated or undirect correlated, AND its topmost node is a - * Sort or Material node, assume that we'll only need to pay its - * startup cost once; otherwise assume we pay the startup cost - * every time. - */ - if (subplan->parParam == NIL && - (IsA(plan, Sort) || - IsA(plan, Material))) - context->total.startup += plan->startup_cost; - else - context->total.per_tuple += plan->startup_cost; - } + return cost_qual_eval_walker((Node *) linitial(asplan->subplans), + context); } /* recurse into children */ @@ -2147,43 +2185,6 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context) } -/* - * get_initplan_cost - * Get the expected cost of evaluating an initPlan. - * - * Keep this in sync with cost_qual_eval_walker's handling of subplans, above, - * and with the estimates used by make_subplan() in plan/subselect.c. - */ -Cost -get_initplan_cost(PlannerInfo *root, SubPlan *subplan) -{ - Cost result; - Plan *plan = planner_subplan_get_plan(root, subplan); - - /* initPlans never use hashtables */ - Assert(!subplan->useHashTable); - /* they are never ALL or ANY, either */ - Assert(!(subplan->subLinkType == ALL_SUBLINK || - subplan->subLinkType == ANY_SUBLINK)); - - if (subplan->subLinkType == EXISTS_SUBLINK) - { - /* we only need to fetch 1 tuple */ - Cost plan_run_cost = plan->total_cost - plan->startup_cost; - - result = plan->startup_cost; - result += plan_run_cost / plan->plan_rows; - } - else - { - /* assume we need all tuples */ - result = plan->total_cost; - } - - return result; -} - - /* * approx_tuple_count * Quick-and-dirty estimation of the number of join rows passing diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index 6e6e340759..ee2d936b34 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.137 2008/08/20 19:58:24 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.138 2008/08/22 00:16:04 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -52,6 +52,9 @@ typedef struct finalize_primnode_context } finalize_primnode_context; +static Node *build_subplan(PlannerInfo *root, Plan *plan, List *rtable, + SubLinkType subLinkType, Node *testexpr, + bool adjust_testexpr, bool unknownEqFalse); static List *generate_subquery_params(PlannerInfo *root, List *tlist, List **paramIds); static List *generate_subquery_vars(PlannerInfo *root, List *tlist, @@ -61,9 +64,12 @@ static Node *convert_testexpr(PlannerInfo *root, List *subst_nodes); static Node *convert_testexpr_mutator(Node *node, convert_testexpr_context *context); -static bool subplan_is_hashable(SubLink *slink, SubPlan *node, Plan *plan); +static bool subplan_is_hashable(Plan *plan); +static bool testexpr_is_hashable(Node *testexpr); static bool hash_ok_operator(OpExpr *expr); static bool simplify_EXISTS_query(Query *query); +static Query *convert_EXISTS_to_ANY(PlannerInfo *root, Query *subselect, + Node **testexpr, List **paramIds); static Node *replace_correlation_vars_mutator(Node *node, PlannerInfo *root); static Node *process_sublinks_mutator(Node *node, process_sublinks_context *context); @@ -229,9 +235,13 @@ get_first_col_type(Plan *plan) /* * Convert a SubLink (as created by the parser) into a SubPlan. * - * We are given the original SubLink and the already-processed testexpr - * (use this instead of the SubLink's own field). We are also told if - * this expression appears at top level of a WHERE/HAVING qual. + * We are given the SubLink's contained query, type, and testexpr. We are + * also told if this expression appears at top level of a WHERE/HAVING qual. + * + * Note: we assume that the testexpr has been AND/OR flattened (actually, + * it's been through eval_const_expressions), but not converted to + * implicit-AND form; and any SubLinks in it should already have been + * converted to SubPlans. The subquery is as yet untouched, however. * * The result is whatever we need to substitute in place of the SubLink * node in the executable expression. This will be either the SubPlan @@ -240,16 +250,14 @@ get_first_col_type(Plan *plan) * tree containing InitPlan Param nodes. */ static Node * -make_subplan(PlannerInfo *root, SubLink *slink, Node *testexpr, bool isTopQual) +make_subplan(PlannerInfo *root, Query *orig_subquery, SubLinkType subLinkType, + Node *testexpr, bool isTopQual) { - Query *subquery = (Query *) (slink->subselect); + Query *subquery; + bool simple_exists = false; double tuple_fraction; - SubPlan *splan; Plan *plan; PlannerInfo *subroot; - bool isInitPlan; - Bitmapset *tmpset; - int paramid; Node *result; /* @@ -258,38 +266,37 @@ make_subplan(PlannerInfo *root, SubLink *slink, Node *testexpr, bool isTopQual) * same sub-Query node, but the planner wants to scribble on the Query. * Try to clean this up when we do querytree redesign... */ - subquery = (Query *) copyObject(subquery); + subquery = (Query *) copyObject(orig_subquery); /* * If it's an EXISTS subplan, we might be able to simplify it. */ - if (slink->subLinkType == EXISTS_SUBLINK) - (void) simplify_EXISTS_query(subquery); + if (subLinkType == EXISTS_SUBLINK) + simple_exists = simplify_EXISTS_query(subquery); /* * For an EXISTS subplan, tell lower-level planner to expect that only the * first tuple will be retrieved. For ALL and ANY subplans, we will be - * able to stop evaluating if the test condition fails, so very often not - * all the tuples will be retrieved; for lack of a better idea, specify - * 50% retrieval. For EXPR and ROWCOMPARE subplans, use default behavior - * (we're only expecting one row out, anyway). + * able to stop evaluating if the test condition fails or matches, so very + * often not all the tuples will be retrieved; for lack of a better idea, + * specify 50% retrieval. For EXPR and ROWCOMPARE subplans, use default + * behavior (we're only expecting one row out, anyway). * - * NOTE: if you change these numbers, also change cost_qual_eval_walker() - * and get_initplan_cost() in path/costsize.c. + * NOTE: if you change these numbers, also change cost_subplan() in + * path/costsize.c. * - * XXX If an ALL/ANY subplan is uncorrelated, we may decide to hash or - * materialize its result below. In that case it would've been better to - * specify full retrieval. At present, however, we can only detect - * correlation or lack of it after we've made the subplan :-(. Perhaps - * detection of correlation should be done as a separate step. Meanwhile, - * we don't want to be too optimistic about the percentage of tuples - * retrieved, for fear of selecting a plan that's bad for the - * materialization case. + * XXX If an ANY subplan is uncorrelated, build_subplan may decide to hash + * its output. In that case it would've been better to specify full + * retrieval. At present, however, we can only check hashability after + * we've made the subplan :-(. (Determining whether it'll fit in work_mem + * is the really hard part.) Therefore, we don't want to be too + * optimistic about the percentage of tuples retrieved, for fear of + * selecting a plan that's bad for the materialization case. */ - if (slink->subLinkType == EXISTS_SUBLINK) + if (subLinkType == EXISTS_SUBLINK) tuple_fraction = 1.0; /* just like a LIMIT 1 */ - else if (slink->subLinkType == ALL_SUBLINK || - slink->subLinkType == ANY_SUBLINK) + else if (subLinkType == ALL_SUBLINK || + subLinkType == ANY_SUBLINK) tuple_fraction = 0.5; /* 50% */ else tuple_fraction = 0.0; /* default behavior */ @@ -302,24 +309,104 @@ make_subplan(PlannerInfo *root, SubLink *slink, Node *testexpr, bool isTopQual) tuple_fraction, &subroot); + /* And convert to SubPlan or InitPlan format. */ + result = build_subplan(root, plan, subroot->parse->rtable, + subLinkType, testexpr, true, isTopQual); + /* - * Initialize the SubPlan node. Note plan_id isn't set yet. + * If it's a correlated EXISTS with an unimportant targetlist, we might be + * able to transform it to the equivalent of an IN and then implement it + * by hashing. We don't have enough information yet to tell which way + * is likely to be better (it depends on the expected number of executions + * of the EXISTS qual, and we are much too early in planning the outer + * query to be able to guess that). So we generate both plans, if + * possible, and leave it to the executor to decide which to use. + */ + if (simple_exists && IsA(result, SubPlan)) + { + Node *newtestexpr; + List *paramIds; + + /* Make a second copy of the original subquery */ + subquery = (Query *) copyObject(orig_subquery); + /* and re-simplify */ + simple_exists = simplify_EXISTS_query(subquery); + Assert(simple_exists); + /* See if it can be converted to an ANY query */ + subquery = convert_EXISTS_to_ANY(root, subquery, + &newtestexpr, ¶mIds); + if (subquery) + { + /* Generate the plan for the ANY subquery; we'll need all rows */ + plan = subquery_planner(root->glob, subquery, + root->query_level + 1, + 0.0, + &subroot); + + /* Now we can check if it'll fit in work_mem */ + if (subplan_is_hashable(plan)) + { + SubPlan *hashplan; + AlternativeSubPlan *asplan; + + /* OK, convert to SubPlan format. */ + hashplan = (SubPlan *) build_subplan(root, plan, + subroot->parse->rtable, + ANY_SUBLINK, newtestexpr, + false, true); + /* Check we got what we expected */ + Assert(IsA(hashplan, SubPlan)); + Assert(hashplan->parParam == NIL); + Assert(hashplan->useHashTable); + /* build_subplan won't have filled in paramIds */ + hashplan->paramIds = paramIds; + + /* Leave it to the executor to decide which plan to use */ + asplan = makeNode(AlternativeSubPlan); + asplan->subplans = list_make2(result, hashplan); + result = (Node *) asplan; + } + } + } + + return result; +} + +/* + * Build a SubPlan node given the raw inputs --- subroutine for make_subplan + * + * Returns either the SubPlan, or an expression using initplan output Params, + * as explained in the comments for make_subplan. + */ +static Node * +build_subplan(PlannerInfo *root, Plan *plan, List *rtable, + SubLinkType subLinkType, Node *testexpr, + bool adjust_testexpr, bool unknownEqFalse) +{ + Node *result; + SubPlan *splan; + bool isInitPlan; + Bitmapset *tmpset; + int paramid; + + /* + * Initialize the SubPlan node. Note plan_id isn't set till further down, + * likewise the cost fields. */ splan = makeNode(SubPlan); - splan->subLinkType = slink->subLinkType; + splan->subLinkType = subLinkType; splan->testexpr = NULL; splan->paramIds = NIL; splan->firstColType = get_first_col_type(plan); splan->useHashTable = false; - /* At top level of a qual, can treat UNKNOWN the same as FALSE */ - splan->unknownEqFalse = isTopQual; + splan->unknownEqFalse = unknownEqFalse; splan->setParam = NIL; splan->parParam = NIL; splan->args = NIL; /* - * Make parParam list of params that current query level will pass to this - * child plan. + * Make parParam and args lists of param IDs and expressions that current + * query level will pass to this child plan. */ tmpset = bms_copy(plan->extParam); while ((paramid = bms_first_member(tmpset)) >= 0) @@ -327,7 +414,15 @@ make_subplan(PlannerInfo *root, SubLink *slink, Node *testexpr, bool isTopQual) PlannerParamItem *pitem = list_nth(root->glob->paramlist, paramid); if (pitem->abslevel == root->query_level) + { splan->parParam = lappend_int(splan->parParam, paramid); + /* + * The Var or Aggref has already been adjusted to have the correct + * varlevelsup or agglevelsup. We probably don't even need to + * copy it again, but be safe. + */ + splan->args = lappend(splan->args, copyObject(pitem->item)); + } } bms_free(tmpset); @@ -339,21 +434,23 @@ make_subplan(PlannerInfo *root, SubLink *slink, Node *testexpr, bool isTopQual) * PARAM_EXEC Params instead of the PARAM_SUBLINK Params emitted by the * parser. */ - if (splan->parParam == NIL && slink->subLinkType == EXISTS_SUBLINK) + if (splan->parParam == NIL && subLinkType == EXISTS_SUBLINK) { Param *prm; + Assert(testexpr == NULL); prm = generate_new_param(root, BOOLOID, -1); splan->setParam = list_make1_int(prm->paramid); isInitPlan = true; result = (Node *) prm; } - else if (splan->parParam == NIL && slink->subLinkType == EXPR_SUBLINK) + else if (splan->parParam == NIL && subLinkType == EXPR_SUBLINK) { TargetEntry *te = linitial(plan->targetlist); Param *prm; Assert(!te->resjunk); + Assert(testexpr == NULL); prm = generate_new_param(root, exprType((Node *) te->expr), exprTypmod((Node *) te->expr)); @@ -361,13 +458,14 @@ make_subplan(PlannerInfo *root, SubLink *slink, Node *testexpr, bool isTopQual) isInitPlan = true; result = (Node *) prm; } - else if (splan->parParam == NIL && slink->subLinkType == ARRAY_SUBLINK) + else if (splan->parParam == NIL && subLinkType == ARRAY_SUBLINK) { TargetEntry *te = linitial(plan->targetlist); Oid arraytype; Param *prm; Assert(!te->resjunk); + Assert(testexpr == NULL); arraytype = get_array_type(exprType((Node *) te->expr)); if (!OidIsValid(arraytype)) elog(ERROR, "could not find array type for datatype %s", @@ -379,11 +477,12 @@ make_subplan(PlannerInfo *root, SubLink *slink, Node *testexpr, bool isTopQual) isInitPlan = true; result = (Node *) prm; } - else if (splan->parParam == NIL && slink->subLinkType == ROWCOMPARE_SUBLINK) + else if (splan->parParam == NIL && subLinkType == ROWCOMPARE_SUBLINK) { /* Adjust the Params */ List *params; + Assert(testexpr != NULL); params = generate_subquery_params(root, plan->targetlist, &splan->paramIds); @@ -400,14 +499,14 @@ make_subplan(PlannerInfo *root, SubLink *slink, Node *testexpr, bool isTopQual) } else { - List *args; - ListCell *l; - - if (testexpr) + /* + * Adjust the Params in the testexpr, unless caller said it's not + * needed. + */ + if (testexpr && adjust_testexpr) { List *params; - /* Adjust the Params in the testexpr */ params = generate_subquery_params(root, plan->targetlist, &splan->paramIds); @@ -415,15 +514,20 @@ make_subplan(PlannerInfo *root, SubLink *slink, Node *testexpr, bool isTopQual) testexpr, params); } + else + splan->testexpr = testexpr; /* * We can't convert subplans of ALL_SUBLINK or ANY_SUBLINK types to * initPlans, even when they are uncorrelated or undirect correlated, * because we need to scan the output of the subplan for each outer - * tuple. But if it's an IN (= ANY) test, we might be able to use a - * hashtable to avoid comparing all the tuples. + * tuple. But if it's a not-direct-correlated IN (= ANY) test, we + * might be able to use a hashtable to avoid comparing all the tuples. */ - if (subplan_is_hashable(slink, splan, plan)) + if (subLinkType == ANY_SUBLINK && + splan->parParam == NIL && + subplan_is_hashable(plan) && + testexpr_is_hashable(splan->testexpr)) splan->useHashTable = true; /* @@ -453,24 +557,6 @@ make_subplan(PlannerInfo *root, SubLink *slink, Node *testexpr, bool isTopQual) plan = materialize_finished_plan(plan); } - /* - * Make splan->args from parParam. - */ - args = NIL; - foreach(l, splan->parParam) - { - PlannerParamItem *pitem = list_nth(root->glob->paramlist, - lfirst_int(l)); - - /* - * The Var or Aggref has already been adjusted to have the correct - * varlevelsup or agglevelsup. We probably don't even need to - * copy it again, but be safe. - */ - args = lappend(args, copyObject(pitem->item)); - } - splan->args = args; - result = (Node *) splan; isInitPlan = false; } @@ -478,10 +564,8 @@ make_subplan(PlannerInfo *root, SubLink *slink, Node *testexpr, bool isTopQual) /* * Add the subplan and its rtable to the global lists. */ - root->glob->subplans = lappend(root->glob->subplans, - plan); - root->glob->subrtables = lappend(root->glob->subrtables, - subroot->parse->rtable); + root->glob->subplans = lappend(root->glob->subplans, plan); + root->glob->subrtables = lappend(root->glob->subrtables, rtable); splan->plan_id = list_length(root->glob->subplans); if (isInitPlan) @@ -498,6 +582,9 @@ make_subplan(PlannerInfo *root, SubLink *slink, Node *testexpr, bool isTopQual) root->glob->rewindPlanIDs = bms_add_member(root->glob->rewindPlanIDs, splan->plan_id); + /* Lastly, fill in the cost estimates for use later */ + cost_subplan(root, splan, plan); + return result; } @@ -620,40 +707,12 @@ convert_testexpr_mutator(Node *node, } /* - * subplan_is_hashable: decide whether we can implement a subplan by hashing - * - * Caution: the SubPlan node is not completely filled in yet. We can rely - * on its plan and parParam fields, however. + * subplan_is_hashable: can we implement an ANY subplan by hashing? */ static bool -subplan_is_hashable(SubLink *slink, SubPlan *node, Plan *plan) +subplan_is_hashable(Plan *plan) { double subquery_size; - ListCell *l; - - /* - * The sublink type must be "= ANY" --- that is, an IN operator. We - * expect that the test expression will be either a single OpExpr, or an - * AND-clause containing OpExprs. (If it's anything else then the parser - * must have determined that the operators have non-equality-like - * semantics. In the OpExpr case we can't be sure what the operator's - * semantics are like, but the test below for hashability will reject - * anything that's not equality.) - */ - if (slink->subLinkType != ANY_SUBLINK) - return false; - if (slink->testexpr == NULL || - (!IsA(slink->testexpr, OpExpr) && - !and_clause(slink->testexpr))) - return false; - - /* - * The subplan must not have any direct correlation vars --- else we'd - * have to recompute its output each time, so that the hashtable wouldn't - * gain anything. - */ - if (node->parParam != NIL) - return false; /* * The estimated size of the subquery result must fit in work_mem. (Note: @@ -666,7 +725,19 @@ subplan_is_hashable(SubLink *slink, SubPlan *node, Plan *plan) if (subquery_size > work_mem * 1024L) return false; + return true; +} + +/* + * testexpr_is_hashable: is an ANY SubLink's test expression hashable? + */ +static bool +testexpr_is_hashable(Node *testexpr) +{ /* + * The testexpr must be a single OpExpr, or an AND-clause containing + * only OpExprs. + * * The combining operators must be hashable and strict. The need for * hashability is obvious, since we want to use hashing. Without * strictness, behavior in the presence of nulls is too unpredictable. We @@ -674,25 +745,28 @@ subplan_is_hashable(SubLink *slink, SubPlan *node, Plan *plan) * NULL for non-null inputs, either (see nodeSubplan.c). However, hash * indexes and hash joins assume that too. */ - if (IsA(slink->testexpr, OpExpr)) + if (testexpr && IsA(testexpr, OpExpr)) { - if (!hash_ok_operator((OpExpr *) slink->testexpr)) - return false; + if (hash_ok_operator((OpExpr *) testexpr)) + return true; } - else + else if (and_clause(testexpr)) { - foreach(l, ((BoolExpr *) slink->testexpr)->args) + ListCell *l; + + foreach(l, ((BoolExpr *) testexpr)->args) { Node *andarg = (Node *) lfirst(l); if (!IsA(andarg, OpExpr)) - return false; /* probably can't happen */ + return false; if (!hash_ok_operator((OpExpr *) andarg)) return false; } + return true; } - return true; + return false; } static bool @@ -702,6 +776,10 @@ hash_ok_operator(OpExpr *expr) HeapTuple tup; Form_pg_operator optup; + /* quick out if not a binary operator */ + if (list_length(expr->args) != 2) + return false; + /* else must look up the operator properties */ tup = SearchSysCache(OPEROID, ObjectIdGetDatum(opid), 0, 0, 0); @@ -838,63 +916,6 @@ convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink, return true; } -/* - * simplify_EXISTS_query: remove any useless stuff in an EXISTS's subquery - * - * The only thing that matters about an EXISTS query is whether it returns - * zero or more than zero rows. Therefore, we can remove certain SQL features - * that won't affect that. The only part that is really likely to matter in - * typical usage is simplifying the targetlist: it's a common habit to write - * "SELECT * FROM" even though there is no need to evaluate any columns. - * - * Note: by suppressing the targetlist we could cause an observable behavioral - * change, namely that any errors that might occur in evaluating the tlist - * won't occur, nor will other side-effects of volatile functions. This seems - * unlikely to bother anyone in practice. - * - * Returns TRUE if was able to discard the targetlist, else FALSE. - */ -static bool -simplify_EXISTS_query(Query *query) -{ - /* - * We don't try to simplify at all if the query uses set operations, - * aggregates, HAVING, LIMIT/OFFSET, or FOR UPDATE/SHARE; none of these - * seem likely in normal usage and their possible effects are complex. - */ - if (query->commandType != CMD_SELECT || - query->intoClause || - query->setOperations || - query->hasAggs || - query->havingQual || - query->limitOffset || - query->limitCount || - query->rowMarks) - return false; - - /* - * Mustn't throw away the targetlist if it contains set-returning - * functions; those could affect whether zero rows are returned! - */ - if (expression_returns_set((Node *) query->targetList)) - return false; - - /* - * Otherwise, we can throw away the targetlist, as well as any GROUP, - * DISTINCT, and ORDER BY clauses; none of those clauses will change - * a nonzero-rows result to zero rows or vice versa. (Furthermore, - * since our parsetree representation of these clauses depends on the - * targetlist, we'd better throw them away if we drop the targetlist.) - */ - query->targetList = NIL; - query->groupClause = NIL; - query->distinctClause = NIL; - query->sortClause = NIL; - query->hasDistinctOn = false; - - return true; -} - /* * convert_EXISTS_sublink_to_join: can we convert an EXISTS SubLink to a join? * @@ -1044,6 +1065,288 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink, return true; } +/* + * simplify_EXISTS_query: remove any useless stuff in an EXISTS's subquery + * + * The only thing that matters about an EXISTS query is whether it returns + * zero or more than zero rows. Therefore, we can remove certain SQL features + * that won't affect that. The only part that is really likely to matter in + * typical usage is simplifying the targetlist: it's a common habit to write + * "SELECT * FROM" even though there is no need to evaluate any columns. + * + * Note: by suppressing the targetlist we could cause an observable behavioral + * change, namely that any errors that might occur in evaluating the tlist + * won't occur, nor will other side-effects of volatile functions. This seems + * unlikely to bother anyone in practice. + * + * Returns TRUE if was able to discard the targetlist, else FALSE. + */ +static bool +simplify_EXISTS_query(Query *query) +{ + /* + * We don't try to simplify at all if the query uses set operations, + * aggregates, HAVING, LIMIT/OFFSET, or FOR UPDATE/SHARE; none of these + * seem likely in normal usage and their possible effects are complex. + */ + if (query->commandType != CMD_SELECT || + query->intoClause || + query->setOperations || + query->hasAggs || + query->havingQual || + query->limitOffset || + query->limitCount || + query->rowMarks) + return false; + + /* + * Mustn't throw away the targetlist if it contains set-returning + * functions; those could affect whether zero rows are returned! + */ + if (expression_returns_set((Node *) query->targetList)) + return false; + + /* + * Otherwise, we can throw away the targetlist, as well as any GROUP, + * DISTINCT, and ORDER BY clauses; none of those clauses will change + * a nonzero-rows result to zero rows or vice versa. (Furthermore, + * since our parsetree representation of these clauses depends on the + * targetlist, we'd better throw them away if we drop the targetlist.) + */ + query->targetList = NIL; + query->groupClause = NIL; + query->distinctClause = NIL; + query->sortClause = NIL; + query->hasDistinctOn = false; + + return true; +} + +/* + * convert_EXISTS_to_ANY: try to convert EXISTS to a hashable ANY sublink + * + * The subselect is expected to be a fresh copy that we can munge up, + * and to have been successfully passed through simplify_EXISTS_query. + * + * On success, the modified subselect is returned, and we store a suitable + * upper-level test expression at *testexpr, plus a list of the subselect's + * output Params at *paramIds. (The test expression is already Param-ified + * and hence need not go through convert_testexpr, which is why we have to + * deal with the Param IDs specially.) + * + * On failure, returns NULL. + */ +static Query * +convert_EXISTS_to_ANY(PlannerInfo *root, Query *subselect, + Node **testexpr, List **paramIds) +{ + Node *whereClause; + List *leftargs, + *rightargs, + *opids, + *newWhere, + *tlist, + *testlist, + *paramids; + ListCell *lc, + *rc, + *oc; + AttrNumber resno; + + /* + * Query must not require a targetlist, since we have to insert a new one. + * Caller should have dealt with the case already. + */ + Assert(subselect->targetList == NIL); + + /* + * Separate out the WHERE clause. (We could theoretically also remove + * top-level plain JOIN/ON clauses, but it's probably not worth the + * trouble.) + */ + whereClause = subselect->jointree->quals; + subselect->jointree->quals = NULL; + + /* + * The rest of the sub-select must not refer to any Vars of the parent + * query. (Vars of higher levels should be okay, though.) + * + * Note: we need not check for Aggs separately because we know the + * sub-select is as yet unoptimized; any uplevel Agg must therefore + * contain an uplevel Var reference. This is not the case below ... + */ + if (contain_vars_of_level((Node *) subselect, 1)) + return NULL; + + /* + * We don't risk optimizing if the WHERE clause is volatile, either. + */ + if (contain_volatile_functions(whereClause)) + return NULL; + + /* + * Clean up the WHERE clause by doing const-simplification etc on it. + * Aside from simplifying the processing we're about to do, this is + * important for being able to pull chunks of the WHERE clause up into + * the parent query. Since we are invoked partway through the parent's + * preprocess_expression() work, earlier steps of preprocess_expression() + * wouldn't get applied to the pulled-up stuff unless we do them here. + * For the parts of the WHERE clause that get put back into the child + * query, this work is partially duplicative, but it shouldn't hurt. + * + * Note: we do not run flatten_join_alias_vars. This is OK because + * any parent aliases were flattened already, and we're not going to + * pull any child Vars (of any description) into the parent. + * + * Note: passing the parent's root to eval_const_expressions is technically + * wrong, but we can get away with it since only the boundParams (if any) + * are used, and those would be the same in a subroot. + */ + whereClause = eval_const_expressions(root, whereClause); + whereClause = (Node *) canonicalize_qual((Expr *) whereClause); + whereClause = (Node *) make_ands_implicit((Expr *) whereClause); + + /* + * We now have a flattened implicit-AND list of clauses, which we + * try to break apart into "outervar = innervar" hash clauses. + * Anything that can't be broken apart just goes back into the + * newWhere list. Note that we aren't trying hard yet to ensure + * that we have only outer or only inner on each side; we'll check + * that if we get to the end. + */ + leftargs = rightargs = opids = newWhere = NIL; + foreach(lc, (List *) whereClause) + { + OpExpr *expr = (OpExpr *) lfirst(lc); + + if (IsA(expr, OpExpr) && + hash_ok_operator(expr)) + { + Node *leftarg = (Node *) linitial(expr->args); + Node *rightarg = (Node *) lsecond(expr->args); + + if (contain_vars_of_level(leftarg, 1)) + { + leftargs = lappend(leftargs, leftarg); + rightargs = lappend(rightargs, rightarg); + opids = lappend_oid(opids, expr->opno); + continue; + } + if (contain_vars_of_level(rightarg, 1)) + { + /* + * We must commute the clause to put the outer var on the + * left, because the hashing code in nodeSubplan.c expects + * that. This probably shouldn't ever fail, since hashable + * operators ought to have commutators, but be paranoid. + */ + expr->opno = get_commutator(expr->opno); + if (OidIsValid(expr->opno) && hash_ok_operator(expr)) + { + leftargs = lappend(leftargs, rightarg); + rightargs = lappend(rightargs, leftarg); + opids = lappend_oid(opids, expr->opno); + continue; + } + /* If no commutator, no chance to optimize the WHERE clause */ + return NULL; + } + } + /* Couldn't handle it as a hash clause */ + newWhere = lappend(newWhere, expr); + } + + /* + * If we didn't find anything we could convert, fail. + */ + if (leftargs == NIL) + return NULL; + + /* + * There mustn't be any parent Vars or Aggs in the stuff that we intend to + * put back into the child query. Note: you might think we don't need to + * check for Aggs separately, because an uplevel Agg must contain an + * uplevel Var in its argument. But it is possible that the uplevel Var + * got optimized away by eval_const_expressions. Consider + * + * SUM(CASE WHEN false THEN uplevelvar ELSE 0 END) + */ + if (contain_vars_of_level((Node *) newWhere, 1) || + contain_vars_of_level((Node *) rightargs, 1)) + return NULL; + if (root->parse->hasAggs && + (contain_aggs_of_level((Node *) newWhere, 1) || + contain_aggs_of_level((Node *) rightargs, 1))) + return NULL; + + /* + * And there can't be any child Vars in the stuff we intend to pull up. + * (Note: we'd need to check for child Aggs too, except we know the + * child has no aggs at all because of simplify_EXISTS_query's check.) + */ + if (contain_vars_of_level((Node *) leftargs, 0)) + return NULL; + + /* + * Also reject sublinks in the stuff we intend to pull up. (It might be + * possible to support this, but doesn't seem worth the complication.) + */ + if (contain_subplans((Node *) leftargs)) + return NULL; + + /* + * Okay, adjust the sublevelsup in the stuff we're pulling up. + */ + IncrementVarSublevelsUp((Node *) leftargs, -1, 1); + + /* + * Put back any child-level-only WHERE clauses. + */ + if (newWhere) + subselect->jointree->quals = (Node *) make_ands_explicit(newWhere); + + /* + * Build a new targetlist for the child that emits the expressions + * we need. Concurrently, build a testexpr for the parent using + * Params to reference the child outputs. (Since we generate Params + * directly here, there will be no need to convert the testexpr in + * build_subplan.) + */ + tlist = testlist = paramids = NIL; + resno = 1; + /* there's no "for3" so we have to chase one of the lists manually */ + oc = list_head(opids); + forboth(lc, leftargs, rc, rightargs) + { + Node *leftarg = (Node *) lfirst(lc); + Node *rightarg = (Node *) lfirst(rc); + Oid opid = lfirst_oid(oc); + Param *param; + + oc = lnext(oc); + param = generate_new_param(root, + exprType(rightarg), + exprTypmod(rightarg)); + tlist = lappend(tlist, + makeTargetEntry((Expr *) rightarg, + resno++, + NULL, + false)); + testlist = lappend(testlist, + make_opclause(opid, BOOLOID, false, + (Expr *) leftarg, (Expr *) param)); + paramids = lappend_int(paramids, param->paramid); + } + + /* Put everything where it should go, and we're done */ + subselect->targetList = tlist; + *testexpr = (Node *) make_ands_explicit(testlist); + *paramIds = paramids; + + return subselect; +} + + /* * Replace correlation vars (uplevel vars) with Params. * @@ -1130,7 +1433,8 @@ process_sublinks_mutator(Node *node, process_sublinks_context *context) * Now build the SubPlan node and make the expr to return. */ return make_subplan(context->root, - sublink, + (Query *) sublink->subselect, + sublink->subLinkType, testexpr, context->isTopQual); } @@ -1140,7 +1444,8 @@ process_sublinks_mutator(Node *node, process_sublinks_context *context) * the very routine that creates 'em to begin with). We shouldn't find * ourselves invoked directly on a Query, either. */ - Assert(!is_subplan(node)); + Assert(!IsA(node, SubPlan)); + Assert(!IsA(node, AlternativeSubPlan)); Assert(!IsA(node, Query)); /* @@ -1251,7 +1556,7 @@ SS_finalize_plan(PlannerInfo *root, Plan *plan, bool attach_initplans) { initSetParam = bms_add_member(initSetParam, lfirst_int(l2)); } - initplan_cost += get_initplan_cost(root, initsubplan); + initplan_cost += initsubplan->startup_cost + initsubplan->per_call_cost; } /* @@ -1555,7 +1860,7 @@ finalize_primnode(Node *node, finalize_primnode_context *context) } return false; /* no more to do here */ } - if (is_subplan(node)) + if (IsA(node, SubPlan)) { SubPlan *subplan = (SubPlan *) node; Plan *plan = planner_subplan_get_plan(context->root, subplan); @@ -1655,6 +1960,8 @@ SS_make_initplan_from_plan(PlannerInfo *root, Plan *plan, * parParam and args lists remain empty. */ + cost_subplan(root, node, plan); + /* * Make a Param that will be the subplan's output. */ diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index ffe31bdff2..6a06b0806c 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.262 2008/08/14 18:47:59 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.263 2008/08/22 00:16:04 tgl Exp $ * * HISTORY * AUTHOR DATE MAJOR EVENT @@ -360,7 +360,7 @@ make_ands_implicit(Expr *clause) * are no subqueries. There mustn't be outer-aggregate references either. * * (If you want something like this but able to deal with subqueries, - * see rewriteManip.c's checkExprHasAggs().) + * see rewriteManip.c's contain_aggs_of_level().) */ bool contain_agg_clause(Node *clause) @@ -566,6 +566,8 @@ expression_returns_set_walker(Node *node, void *context) return false; if (IsA(node, SubPlan)) return false; + if (IsA(node, AlternativeSubPlan)) + return false; if (IsA(node, ArrayExpr)) return false; if (IsA(node, RowExpr)) @@ -637,6 +639,8 @@ expression_returns_set_rows_walker(Node *node, double *count) return false; if (IsA(node, SubPlan)) return false; + if (IsA(node, AlternativeSubPlan)) + return false; if (IsA(node, ArrayExpr)) return false; if (IsA(node, RowExpr)) @@ -684,6 +688,7 @@ contain_subplans_walker(Node *node, void *context) if (node == NULL) return false; if (IsA(node, SubPlan) || + IsA(node, AlternativeSubPlan) || IsA(node, SubLink)) return true; /* abort the tree traversal and return true */ return expression_tree_walker(node, contain_subplans_walker, context); @@ -1012,6 +1017,8 @@ contain_nonstrict_functions_walker(Node *node, void *context) } if (IsA(node, SubPlan)) return true; + if (IsA(node, AlternativeSubPlan)) + return true; /* ArrayCoerceExpr is strict at the array level, regardless of elemfunc */ if (IsA(node, FieldStore)) return true; @@ -2308,7 +2315,8 @@ eval_const_expressions_mutator(Node *node, break; } } - if (IsA(node, SubPlan)) + if (IsA(node, SubPlan) || + IsA(node, AlternativeSubPlan)) { /* * Return a SubPlan unchanged --- too late to do anything with it. @@ -4156,6 +4164,8 @@ expression_tree_walker(Node *node, return true; } break; + case T_AlternativeSubPlan: + return walker(((AlternativeSubPlan *) node)->subplans, context); case T_FieldSelect: return walker(((FieldSelect *) node)->arg, context); case T_FieldStore: @@ -4630,6 +4640,16 @@ expression_tree_mutator(Node *node, return (Node *) newnode; } break; + case T_AlternativeSubPlan: + { + AlternativeSubPlan *asplan = (AlternativeSubPlan *) node; + AlternativeSubPlan *newnode; + + FLATCOPY(newnode, asplan, AlternativeSubPlan); + MUTATE(newnode->subplans, asplan->subplans, List *); + return (Node *) newnode; + } + break; case T_FieldSelect: { FieldSelect *fselect = (FieldSelect *) node; diff --git a/src/backend/optimizer/util/var.c b/src/backend/optimizer/util/var.c index 290f9bb64b..f9bd59c799 100644 --- a/src/backend/optimizer/util/var.c +++ b/src/backend/optimizer/util/var.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/util/var.c,v 1.75 2008/08/14 18:47:59 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/util/var.c,v 1.76 2008/08/22 00:16:04 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -171,7 +171,7 @@ pull_varattnos_walker(Node *node, Bitmapset **varattnos) } /* Should not find a subquery or subplan */ Assert(!IsA(node, Query)); - Assert(!is_subplan(node)); + Assert(!IsA(node, SubPlan)); return expression_tree_walker(node, pull_varattnos_walker, (void *) varattnos); @@ -677,7 +677,7 @@ flatten_join_alias_vars_mutator(Node *node, return (Node *) newnode; } /* Already-planned tree not supported */ - Assert(!is_subplan(node)); + Assert(!IsA(node, SubPlan)); return expression_tree_mutator(node, flatten_join_alias_vars_mutator, (void *) context); diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c index 8addb53e51..6309525cab 100644 --- a/src/backend/parser/parse_expr.c +++ b/src/backend/parser/parse_expr.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/parser/parse_expr.c,v 1.229 2008/07/16 01:30:22 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/parser/parse_expr.c,v 1.230 2008/08/22 00:16:04 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1980,6 +1980,15 @@ exprType(Node *expr) } } break; + case T_AlternativeSubPlan: + { + /* As above, supported for the convenience of ruleutils.c */ + AlternativeSubPlan *asplan = (AlternativeSubPlan *) expr; + + /* subplans should all return the same thing */ + type = exprType((Node *) linitial(asplan->subplans)); + } + break; case T_FieldSelect: type = ((FieldSelect *) expr)->resulttype; break; diff --git a/src/backend/rewrite/rewriteManip.c b/src/backend/rewrite/rewriteManip.c index 1202c31314..baa9330016 100644 --- a/src/backend/rewrite/rewriteManip.c +++ b/src/backend/rewrite/rewriteManip.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/rewrite/rewriteManip.c,v 1.109 2008/08/14 20:31:29 heikki Exp $ + * $PostgreSQL: pgsql/src/backend/rewrite/rewriteManip.c,v 1.110 2008/08/22 00:16:04 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -25,10 +25,10 @@ typedef struct { int sublevels_up; -} checkExprHasAggs_context; +} contain_aggs_of_level_context; -static bool checkExprHasAggs_walker(Node *node, - checkExprHasAggs_context *context); +static bool contain_aggs_of_level_walker(Node *node, + contain_aggs_of_level_context *context); static bool checkExprHasSubLink_walker(Node *node, void *context); static Relids offset_relid_set(Relids relids, int offset); static Relids adjust_relid_set(Relids relids, int oldrelid, int newrelid); @@ -37,32 +37,44 @@ static Relids adjust_relid_set(Relids relids, int oldrelid, int newrelid); /* * checkExprHasAggs - * Check if an expression contains an aggregate function call. - * - * The objective of this routine is to detect whether there are aggregates - * belonging to the initial query level. Aggregates belonging to subqueries - * or outer queries do NOT cause a true result. We must recurse into - * subqueries to detect outer-reference aggregates that logically belong to - * the initial query level. */ bool checkExprHasAggs(Node *node) { - checkExprHasAggs_context context; + return contain_aggs_of_level(node, 0); +} - context.sublevels_up = 0; +/* + * contain_aggs_of_level - + * Check if an expression contains an aggregate function call of a + * specified query level. + * + * The objective of this routine is to detect whether there are aggregates + * belonging to the given query level. Aggregates belonging to subqueries + * or outer queries do NOT cause a true result. We must recurse into + * subqueries to detect outer-reference aggregates that logically belong to + * the specified query level. + */ +bool +contain_aggs_of_level(Node *node, int levelsup) +{ + contain_aggs_of_level_context context; + + context.sublevels_up = levelsup; /* * Must be prepared to start with a Query or a bare expression tree; if * it's a Query, we don't want to increment sublevels_up. */ return query_or_expression_tree_walker(node, - checkExprHasAggs_walker, + contain_aggs_of_level_walker, (void *) &context, 0); } static bool -checkExprHasAggs_walker(Node *node, checkExprHasAggs_context *context) +contain_aggs_of_level_walker(Node *node, + contain_aggs_of_level_context *context) { if (node == NULL) return false; @@ -79,12 +91,12 @@ checkExprHasAggs_walker(Node *node, checkExprHasAggs_context *context) context->sublevels_up++; result = query_tree_walker((Query *) node, - checkExprHasAggs_walker, + contain_aggs_of_level_walker, (void *) context, 0); context->sublevels_up--; return result; } - return expression_tree_walker(node, checkExprHasAggs_walker, + return expression_tree_walker(node, contain_aggs_of_level_walker, (void *) context); } diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index 0120353344..3c62e7a972 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/ruleutils.c,v 1.279 2008/08/02 21:32:00 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/ruleutils.c,v 1.280 2008/08/22 00:16:04 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -3732,6 +3732,11 @@ get_rule_expr(Node *node, deparse_context *context, } break; + case T_AlternativeSubPlan: + /* As above, just punt */ + appendStringInfo(buf, "(alternative subplans)"); + break; + case T_FieldSelect: { FieldSelect *fselect = (FieldSelect *) node; diff --git a/src/include/executor/nodeSubplan.h b/src/include/executor/nodeSubplan.h index c7c9f4e2dc..7d12a54b61 100644 --- a/src/include/executor/nodeSubplan.h +++ b/src/include/executor/nodeSubplan.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/executor/nodeSubplan.h,v 1.27 2008/01/01 19:45:57 momjian Exp $ + * $PostgreSQL: pgsql/src/include/executor/nodeSubplan.h,v 1.28 2008/08/22 00:16:04 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -17,10 +17,8 @@ #include "nodes/execnodes.h" extern SubPlanState *ExecInitSubPlan(SubPlan *subplan, PlanState *parent); -extern Datum ExecSubPlan(SubPlanState *node, - ExprContext *econtext, - bool *isNull, - ExprDoneCond *isDone); + +extern AlternativeSubPlanState *ExecInitAlternativeSubPlan(AlternativeSubPlan *asplan, PlanState *parent); extern void ExecReScanSetParamPlan(SubPlanState *node, PlanState *parent); diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index ce41c750af..ee8c4a2bef 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.186 2008/08/07 03:04:03 tgl Exp $ + * $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.187 2008/08/22 00:16:04 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -627,6 +627,17 @@ typedef struct SubPlanState FmgrInfo *cur_eq_funcs; /* equality functions for LHS vs. table */ } SubPlanState; +/* ---------------- + * AlternativeSubPlanState node + * ---------------- + */ +typedef struct AlternativeSubPlanState +{ + ExprState xprstate; + List *subplans; /* states of alternative subplans */ + int active; /* list index of the one we're using */ +} AlternativeSubPlanState; + /* ---------------- * FieldSelectState node * ---------------- diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index 5fdd23c5a9..b9e00bfdde 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/nodes/nodes.h,v 1.208 2008/08/14 18:48:00 tgl Exp $ + * $PostgreSQL: pgsql/src/include/nodes/nodes.h,v 1.209 2008/08/22 00:16:04 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -118,6 +118,7 @@ typedef enum NodeTag T_BoolExpr, T_SubLink, T_SubPlan, + T_AlternativeSubPlan, T_FieldSelect, T_FieldStore, T_RelabelType, @@ -160,6 +161,7 @@ typedef enum NodeTag T_ScalarArrayOpExprState, T_BoolExprState, T_SubPlanState, + T_AlternativeSubPlanState, T_FieldSelectState, T_FieldStoreState, T_CoerceViaIOState, diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h index f0c55112cb..f6da1adbfa 100644 --- a/src/include/nodes/primnodes.h +++ b/src/include/nodes/primnodes.h @@ -10,7 +10,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/nodes/primnodes.h,v 1.138 2008/08/02 21:32:00 tgl Exp $ + * $PostgreSQL: pgsql/src/include/nodes/primnodes.h,v 1.139 2008/08/22 00:16:04 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -445,7 +445,8 @@ typedef struct SubLink * * If the sub-select becomes an initplan rather than a subplan, the executable * expression is part of the outer plan's expression tree (and the SubPlan - * node itself is not). In this case testexpr is NULL to avoid duplication. + * node itself is not, but rather is found in the outer plan's initPlan + * list). In this case testexpr is NULL to avoid duplication. * * The planner also derives lists of the values that need to be passed into * and out of the subplan. Input values are represented as a list "args" of @@ -457,6 +458,10 @@ typedef struct SubLink * is an initplan; they are listed in order by sub-select output column * position. (parParam and setParam are integer Lists, not Bitmapsets, * because their ordering is significant.) + * + * Also, the planner computes startup and per-call costs for use of the + * SubPlan. Note that these include the cost of the subquery proper, + * evaluation of the testexpr if any, and any hashtable management overhead. */ typedef struct SubPlan { @@ -482,8 +487,25 @@ typedef struct SubPlan * Params for parent plan */ List *parParam; /* indices of input Params from parent plan */ List *args; /* exprs to pass as parParam values */ + /* Estimated execution costs: */ + Cost startup_cost; /* one-time setup cost */ + Cost per_call_cost; /* cost for each subplan evaluation */ } SubPlan; +/* + * AlternativeSubPlan - expression node for a choice among SubPlans + * + * The subplans are given as a List so that the node definition need not + * change if there's ever more than two alternatives. For the moment, + * though, there are always exactly two; and the first one is the fast-start + * plan. + */ +typedef struct AlternativeSubPlan +{ + Expr xpr; + List *subplans; /* SubPlan(s) with equivalent results */ +} AlternativeSubPlan; + /* ---------------- * FieldSelect * diff --git a/src/include/optimizer/clauses.h b/src/include/optimizer/clauses.h index 41e52d909a..1ea3d701fe 100644 --- a/src/include/optimizer/clauses.h +++ b/src/include/optimizer/clauses.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/optimizer/clauses.h,v 1.92 2008/08/14 18:48:00 tgl Exp $ + * $PostgreSQL: pgsql/src/include/optimizer/clauses.h,v 1.93 2008/08/22 00:16:04 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -19,7 +19,6 @@ #define is_opclause(clause) ((clause) != NULL && IsA(clause, OpExpr)) #define is_funcclause(clause) ((clause) != NULL && IsA(clause, FuncExpr)) -#define is_subplan(clause) ((clause) != NULL && IsA(clause, SubPlan)) typedef struct { diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h index 17caeb4ee4..8ad8f7f062 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/optimizer/cost.h,v 1.91 2008/08/14 18:48:00 tgl Exp $ + * $PostgreSQL: pgsql/src/include/optimizer/cost.h,v 1.92 2008/08/22 00:16:04 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -93,9 +93,9 @@ extern void cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo); extern void cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo); +extern void cost_subplan(PlannerInfo *root, SubPlan *subplan, Plan *plan); extern void cost_qual_eval(QualCost *cost, List *quals, PlannerInfo *root); extern void cost_qual_eval_node(QualCost *cost, Node *qual, PlannerInfo *root); -extern Cost get_initplan_cost(PlannerInfo *root, SubPlan *subplan); extern void set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel); extern void set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, RelOptInfo *outer_rel, diff --git a/src/include/rewrite/rewriteManip.h b/src/include/rewrite/rewriteManip.h index 110d8ac0d1..a870ce889b 100644 --- a/src/include/rewrite/rewriteManip.h +++ b/src/include/rewrite/rewriteManip.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/rewrite/rewriteManip.h,v 1.45 2008/08/14 20:31:29 heikki Exp $ + * $PostgreSQL: pgsql/src/include/rewrite/rewriteManip.h,v 1.46 2008/08/22 00:16:04 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -35,6 +35,7 @@ extern Query *getInsertSelectQuery(Query *parsetree, Query ***subquery_ptr); extern void AddQual(Query *parsetree, Node *qual); extern void AddInvertedQual(Query *parsetree, Node *qual); +extern bool contain_aggs_of_level(Node *node, int levelsup); extern bool checkExprHasAggs(Node *node); extern bool checkExprHasSubLink(Node *node);