diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index 994cec50ce..6b96e7de0a 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -2779,6 +2779,7 @@ estimate_path_cost_size(PlannerInfo *root, startup_cost = ofpinfo->rel_startup_cost; startup_cost += aggcosts.transCost.startup; startup_cost += aggcosts.transCost.per_tuple * input_rows; + startup_cost += aggcosts.finalCost.startup; startup_cost += (cpu_operator_cost * numGroupCols) * input_rows; /*----- @@ -2788,7 +2789,7 @@ estimate_path_cost_size(PlannerInfo *root, *----- */ run_cost = ofpinfo->rel_total_cost - ofpinfo->rel_startup_cost; - run_cost += aggcosts.finalCost * numGroups; + run_cost += aggcosts.finalCost.per_tuple * numGroups; run_cost += cpu_tuple_cost * numGroups; /* Account for the eval cost of HAVING quals, if any */ diff --git a/doc/src/sgml/xfunc.sgml b/doc/src/sgml/xfunc.sgml index d70aa6eed7..b486ef391e 100644 --- a/doc/src/sgml/xfunc.sgml +++ b/doc/src/sgml/xfunc.sgml @@ -3439,4 +3439,25 @@ supportfn(internal) returns internal simplify. Ensure rigorous equivalence between the simplified expression and an actual execution of the target function. + + + For target functions that return boolean, it is often useful to estimate + the fraction of rows that will be selected by a WHERE clause using that + function. This can be done by a support function that implements + the SupportRequestSelectivity request type. + + + + If the target function's runtime is highly dependent on its inputs, + it may be useful to provide a non-constant cost estimate for it. + This can be done by a support function that implements + the SupportRequestCost request type. + + + + For target functions that return sets, it is often useful to provide + a non-constant estimate for the number of rows that will be returned. + This can be done by a support function that implements + the SupportRequestRows request type. + diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c index abca03b805..e8142bddf0 100644 --- a/src/backend/optimizer/path/clausesel.c +++ b/src/backend/optimizer/path/clausesel.c @@ -762,6 +762,21 @@ clause_selectivity(PlannerInfo *root, if (IsA(clause, DistinctExpr)) s1 = 1.0 - s1; } + else if (is_funcclause(clause)) + { + FuncExpr *funcclause = (FuncExpr *) clause; + + /* Try to get an estimate from the support function, if any */ + s1 = function_selectivity(root, + funcclause->funcid, + funcclause->args, + funcclause->inputcollid, + treat_as_join_clause(clause, rinfo, + varRelid, sjinfo), + varRelid, + jointype, + sjinfo); + } else if (IsA(clause, ScalarArrayOpExpr)) { /* Use node specific selectivity calculation function */ diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 1057ddaa3e..beee50ec13 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -2112,9 +2112,9 @@ cost_agg(Path *path, PlannerInfo *root, /* * The transCost.per_tuple component of aggcosts should be charged once * per input tuple, corresponding to the costs of evaluating the aggregate - * transfns and their input expressions (with any startup cost of course - * charged but once). The finalCost component is charged once per output - * tuple, corresponding to the costs of evaluating the finalfns. + * transfns and their input expressions. The finalCost.per_tuple component + * is charged once per output tuple, corresponding to the costs of + * evaluating the finalfns. Startup costs are of course charged but once. * * If we are grouping, we charge an additional cpu_operator_cost per * grouping column per input tuple for grouping comparisons. @@ -2136,7 +2136,8 @@ cost_agg(Path *path, PlannerInfo *root, startup_cost = input_total_cost; startup_cost += aggcosts->transCost.startup; startup_cost += aggcosts->transCost.per_tuple * input_tuples; - startup_cost += aggcosts->finalCost; + startup_cost += aggcosts->finalCost.startup; + startup_cost += aggcosts->finalCost.per_tuple; /* we aren't grouping */ total_cost = startup_cost + cpu_tuple_cost; output_tuples = 1; @@ -2155,7 +2156,8 @@ cost_agg(Path *path, PlannerInfo *root, total_cost += aggcosts->transCost.startup; total_cost += aggcosts->transCost.per_tuple * input_tuples; total_cost += (cpu_operator_cost * numGroupCols) * input_tuples; - total_cost += aggcosts->finalCost * numGroups; + total_cost += aggcosts->finalCost.startup; + total_cost += aggcosts->finalCost.per_tuple * numGroups; total_cost += cpu_tuple_cost * numGroups; output_tuples = numGroups; } @@ -2168,8 +2170,9 @@ cost_agg(Path *path, PlannerInfo *root, startup_cost += aggcosts->transCost.startup; startup_cost += aggcosts->transCost.per_tuple * input_tuples; startup_cost += (cpu_operator_cost * numGroupCols) * input_tuples; + startup_cost += aggcosts->finalCost.startup; total_cost = startup_cost; - total_cost += aggcosts->finalCost * numGroups; + total_cost += aggcosts->finalCost.per_tuple * numGroups; total_cost += cpu_tuple_cost * numGroups; output_tuples = numGroups; } @@ -2234,7 +2237,11 @@ cost_windowagg(Path *path, PlannerInfo *root, Cost wfunccost; QualCost argcosts; - wfunccost = get_func_cost(wfunc->winfnoid) * cpu_operator_cost; + argcosts.startup = argcosts.per_tuple = 0; + add_function_cost(root, wfunc->winfnoid, (Node *) wfunc, + &argcosts); + startup_cost += argcosts.startup; + wfunccost = argcosts.per_tuple; /* also add the input expressions' cost to per-input-row costs */ cost_qual_eval_node(&argcosts, (Node *) wfunc->args, root); @@ -3864,8 +3871,8 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context) */ if (IsA(node, FuncExpr)) { - context->total.per_tuple += - get_func_cost(((FuncExpr *) node)->funcid) * cpu_operator_cost; + add_function_cost(context->root, ((FuncExpr *) node)->funcid, node, + &context->total); } else if (IsA(node, OpExpr) || IsA(node, DistinctExpr) || @@ -3873,8 +3880,8 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context) { /* rely on struct equivalence to treat these all alike */ set_opfuncid((OpExpr *) node); - context->total.per_tuple += - get_func_cost(((OpExpr *) node)->opfuncid) * cpu_operator_cost; + add_function_cost(context->root, ((OpExpr *) node)->opfuncid, node, + &context->total); } else if (IsA(node, ScalarArrayOpExpr)) { @@ -3884,10 +3891,15 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context) */ ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) node; Node *arraynode = (Node *) lsecond(saop->args); + QualCost sacosts; set_sa_opfuncid(saop); - context->total.per_tuple += get_func_cost(saop->opfuncid) * - cpu_operator_cost * estimate_array_length(arraynode) * 0.5; + sacosts.startup = sacosts.per_tuple = 0; + add_function_cost(context->root, saop->opfuncid, NULL, + &sacosts); + context->total.startup += sacosts.startup; + context->total.per_tuple += sacosts.per_tuple * + estimate_array_length(arraynode) * 0.5; } else if (IsA(node, Aggref) || IsA(node, WindowFunc)) @@ -3913,11 +3925,13 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context) /* check the result type's input function */ getTypeInputInfo(iocoerce->resulttype, &iofunc, &typioparam); - context->total.per_tuple += get_func_cost(iofunc) * cpu_operator_cost; + add_function_cost(context->root, iofunc, NULL, + &context->total); /* check the input type's output function */ getTypeOutputInfo(exprType((Node *) iocoerce->arg), &iofunc, &typisvarlena); - context->total.per_tuple += get_func_cost(iofunc) * cpu_operator_cost; + add_function_cost(context->root, iofunc, NULL, + &context->total); } else if (IsA(node, ArrayCoerceExpr)) { @@ -3941,8 +3955,8 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context) { Oid opid = lfirst_oid(lc); - context->total.per_tuple += get_func_cost(get_opcode(opid)) * - cpu_operator_cost; + add_function_cost(context->root, get_opcode(opid), NULL, + &context->total); } } else if (IsA(node, MinMaxExpr) || @@ -4941,7 +4955,7 @@ set_function_size_estimates(PlannerInfo *root, RelOptInfo *rel) foreach(lc, rte->functions) { RangeTblFunction *rtfunc = (RangeTblFunction *) lfirst(lc); - double ntup = expression_returns_set_rows(rtfunc->funcexpr); + double ntup = expression_returns_set_rows(root, rtfunc->funcexpr); if (ntup > rel->tuples) rel->tuples = ntup; diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index 002c29a5f5..86e4753a5b 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -36,8 +36,8 @@ #include "optimizer/clauses.h" #include "optimizer/cost.h" #include "optimizer/optimizer.h" +#include "optimizer/plancat.h" #include "optimizer/planmain.h" -#include "optimizer/prep.h" #include "parser/analyze.h" #include "parser/parse_agg.h" #include "parser/parse_coerce.h" @@ -343,19 +343,24 @@ get_agg_clause_costs_walker(Node *node, get_agg_clause_costs_context *context) if (DO_AGGSPLIT_COMBINE(context->aggsplit)) { /* charge for combining previously aggregated states */ - costs->transCost.per_tuple += get_func_cost(aggcombinefn) * cpu_operator_cost; + add_function_cost(context->root, aggcombinefn, NULL, + &costs->transCost); } else - costs->transCost.per_tuple += get_func_cost(aggtransfn) * cpu_operator_cost; + add_function_cost(context->root, aggtransfn, NULL, + &costs->transCost); if (DO_AGGSPLIT_DESERIALIZE(context->aggsplit) && OidIsValid(aggdeserialfn)) - costs->transCost.per_tuple += get_func_cost(aggdeserialfn) * cpu_operator_cost; + add_function_cost(context->root, aggdeserialfn, NULL, + &costs->transCost); if (DO_AGGSPLIT_SERIALIZE(context->aggsplit) && OidIsValid(aggserialfn)) - costs->finalCost += get_func_cost(aggserialfn) * cpu_operator_cost; + add_function_cost(context->root, aggserialfn, NULL, + &costs->finalCost); if (!DO_AGGSPLIT_SKIPFINAL(context->aggsplit) && OidIsValid(aggfinalfn)) - costs->finalCost += get_func_cost(aggfinalfn) * cpu_operator_cost; + add_function_cost(context->root, aggfinalfn, NULL, + &costs->finalCost); /* * These costs are incurred only by the initial aggregate node, so we @@ -392,8 +397,8 @@ get_agg_clause_costs_walker(Node *node, get_agg_clause_costs_context *context) { cost_qual_eval_node(&argcosts, (Node *) aggref->aggdirectargs, context->root); - costs->transCost.startup += argcosts.startup; - costs->finalCost += argcosts.per_tuple; + costs->finalCost.startup += argcosts.startup; + costs->finalCost.per_tuple += argcosts.per_tuple; } /* @@ -561,7 +566,7 @@ find_window_functions_walker(Node *node, WindowFuncLists *lists) * Note: keep this in sync with expression_returns_set() in nodes/nodeFuncs.c. */ double -expression_returns_set_rows(Node *clause) +expression_returns_set_rows(PlannerInfo *root, Node *clause) { if (clause == NULL) return 1.0; @@ -570,7 +575,7 @@ expression_returns_set_rows(Node *clause) FuncExpr *expr = (FuncExpr *) clause; if (expr->funcretset) - return clamp_row_est(get_func_rows(expr->funcid)); + return clamp_row_est(get_function_rows(root, expr->funcid, clause)); } if (IsA(clause, OpExpr)) { @@ -579,7 +584,7 @@ expression_returns_set_rows(Node *clause) if (expr->opretset) { set_opfuncid(expr); - return clamp_row_est(get_func_rows(expr->opfuncid)); + return clamp_row_est(get_function_rows(root, expr->opfuncid, clause)); } } return 1.0; diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index a3e64110d3..169e51e792 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -2711,7 +2711,7 @@ create_set_projection_path(PlannerInfo *root, Node *node = (Node *) lfirst(lc); double itemrows; - itemrows = expression_returns_set_rows(node); + itemrows = expression_returns_set_rows(root, node); if (tlist_rows < itemrows) tlist_rows = itemrows; } diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 3efa1bdc1a..d6dc83ca80 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -29,10 +29,12 @@ #include "catalog/heap.h" #include "catalog/partition.h" #include "catalog/pg_am.h" +#include "catalog/pg_proc.h" #include "catalog/pg_statistic_ext.h" #include "foreign/fdwapi.h" #include "miscadmin.h" #include "nodes/makefuncs.h" +#include "nodes/supportnodes.h" #include "optimizer/clauses.h" #include "optimizer/cost.h" #include "optimizer/optimizer.h" @@ -1772,6 +1774,8 @@ restriction_selectivity(PlannerInfo *root, * Returns the selectivity of a specified join operator clause. * This code executes registered procedures stored in the * operator relation, by calling the function manager. + * + * See clause_selectivity() for the meaning of the additional parameters. */ Selectivity join_selectivity(PlannerInfo *root, @@ -1805,6 +1809,184 @@ join_selectivity(PlannerInfo *root, return (Selectivity) result; } +/* + * function_selectivity + * + * Returns the selectivity of a specified boolean function clause. + * This code executes registered procedures stored in the + * pg_proc relation, by calling the function manager. + * + * See clause_selectivity() for the meaning of the additional parameters. + */ +Selectivity +function_selectivity(PlannerInfo *root, + Oid funcid, + List *args, + Oid inputcollid, + bool is_join, + int varRelid, + JoinType jointype, + SpecialJoinInfo *sjinfo) +{ + RegProcedure prosupport = get_func_support(funcid); + SupportRequestSelectivity req; + SupportRequestSelectivity *sresult; + + /* + * If no support function is provided, use our historical default + * estimate, 0.3333333. This seems a pretty unprincipled choice, but + * Postgres has been using that estimate for function calls since 1992. + * The hoariness of this behavior suggests that we should not be in too + * much hurry to use another value. + */ + if (!prosupport) + return (Selectivity) 0.3333333; + + req.type = T_SupportRequestSelectivity; + req.root = root; + req.funcid = funcid; + req.args = args; + req.inputcollid = inputcollid; + req.is_join = is_join; + req.varRelid = varRelid; + req.jointype = jointype; + req.sjinfo = sjinfo; + req.selectivity = -1; /* to catch failure to set the value */ + + sresult = (SupportRequestSelectivity *) + DatumGetPointer(OidFunctionCall1(prosupport, + PointerGetDatum(&req))); + + /* If support function fails, use default */ + if (sresult != &req) + return (Selectivity) 0.3333333; + + if (req.selectivity < 0.0 || req.selectivity > 1.0) + elog(ERROR, "invalid function selectivity: %f", req.selectivity); + + return (Selectivity) req.selectivity; +} + +/* + * add_function_cost + * + * Get an estimate of the execution cost of a function, and *add* it to + * the contents of *cost. The estimate may include both one-time and + * per-tuple components, since QualCost does. + * + * The funcid must always be supplied. If it is being called as the + * implementation of a specific parsetree node (FuncExpr, OpExpr, + * WindowFunc, etc), pass that as "node", else pass NULL. + * + * In some usages root might be NULL, too. + */ +void +add_function_cost(PlannerInfo *root, Oid funcid, Node *node, + QualCost *cost) +{ + HeapTuple proctup; + Form_pg_proc procform; + + proctup = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid)); + if (!HeapTupleIsValid(proctup)) + elog(ERROR, "cache lookup failed for function %u", funcid); + procform = (Form_pg_proc) GETSTRUCT(proctup); + + if (OidIsValid(procform->prosupport)) + { + SupportRequestCost req; + SupportRequestCost *sresult; + + req.type = T_SupportRequestCost; + req.root = root; + req.funcid = funcid; + req.node = node; + + /* Initialize cost fields so that support function doesn't have to */ + req.startup = 0; + req.per_tuple = 0; + + sresult = (SupportRequestCost *) + DatumGetPointer(OidFunctionCall1(procform->prosupport, + PointerGetDatum(&req))); + + if (sresult == &req) + { + /* Success, so accumulate support function's estimate into *cost */ + cost->startup += req.startup; + cost->per_tuple += req.per_tuple; + ReleaseSysCache(proctup); + return; + } + } + + /* No support function, or it failed, so rely on procost */ + cost->per_tuple += procform->procost * cpu_operator_cost; + + ReleaseSysCache(proctup); +} + +/* + * get_function_rows + * + * Get an estimate of the number of rows returned by a set-returning function. + * + * The funcid must always be supplied. In current usage, the calling node + * will always be supplied, and will be either a FuncExpr or OpExpr. + * But it's a good idea to not fail if it's NULL. + * + * In some usages root might be NULL, too. + * + * Note: this returns the unfiltered result of the support function, if any. + * It's usually a good idea to apply clamp_row_est() to the result, but we + * leave it to the caller to do so. + */ +double +get_function_rows(PlannerInfo *root, Oid funcid, Node *node) +{ + HeapTuple proctup; + Form_pg_proc procform; + double result; + + proctup = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid)); + if (!HeapTupleIsValid(proctup)) + elog(ERROR, "cache lookup failed for function %u", funcid); + procform = (Form_pg_proc) GETSTRUCT(proctup); + + Assert(procform->proretset); /* else caller error */ + + if (OidIsValid(procform->prosupport)) + { + SupportRequestRows req; + SupportRequestRows *sresult; + + req.type = T_SupportRequestRows; + req.root = root; + req.funcid = funcid; + req.node = node; + + req.rows = 0; /* just for sanity */ + + sresult = (SupportRequestRows *) + DatumGetPointer(OidFunctionCall1(procform->prosupport, + PointerGetDatum(&req))); + + if (sresult == &req) + { + /* Success */ + ReleaseSysCache(proctup); + return req.rows; + } + } + + /* No support function, or it failed, so rely on prorows */ + result = procform->prorows; + + ReleaseSysCache(proctup); + + return result; +} + /* * has_unique_index * diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c index a785361fd0..5b2917d159 100644 --- a/src/backend/utils/adt/arrayfuncs.c +++ b/src/backend/utils/adt/arrayfuncs.c @@ -22,12 +22,16 @@ #include "catalog/pg_type.h" #include "funcapi.h" #include "libpq/pqformat.h" +#include "nodes/nodeFuncs.h" +#include "nodes/supportnodes.h" +#include "optimizer/optimizer.h" #include "utils/array.h" #include "utils/arrayaccess.h" #include "utils/builtins.h" #include "utils/datum.h" #include "utils/lsyscache.h" #include "utils/memutils.h" +#include "utils/selfuncs.h" #include "utils/typcache.h" @@ -6025,6 +6029,36 @@ array_unnest(PG_FUNCTION_ARGS) } } +/* + * Planner support function for array_unnest(anyarray) + */ +Datum +array_unnest_support(PG_FUNCTION_ARGS) +{ + Node *rawreq = (Node *) PG_GETARG_POINTER(0); + Node *ret = NULL; + + if (IsA(rawreq, SupportRequestRows)) + { + /* Try to estimate the number of rows returned */ + SupportRequestRows *req = (SupportRequestRows *) rawreq; + + if (is_funcclause(req->node)) /* be paranoid */ + { + List *args = ((FuncExpr *) req->node)->args; + Node *arg1; + + /* We can use estimated argument values here */ + arg1 = estimate_expression_value(req->root, linitial(args)); + + req->rows = estimate_array_length(arg1); + ret = (Node *) req; + } + } + + PG_RETURN_POINTER(ret); +} + /* * array_replace/array_remove support diff --git a/src/backend/utils/adt/int.c b/src/backend/utils/adt/int.c index ad8e6d02ee..04825fc77d 100644 --- a/src/backend/utils/adt/int.c +++ b/src/backend/utils/adt/int.c @@ -30,11 +30,15 @@ #include #include +#include #include "catalog/pg_type.h" #include "common/int.h" #include "funcapi.h" #include "libpq/pqformat.h" +#include "nodes/nodeFuncs.h" +#include "nodes/supportnodes.h" +#include "optimizer/optimizer.h" #include "utils/array.h" #include "utils/builtins.h" @@ -1427,3 +1431,73 @@ generate_series_step_int4(PG_FUNCTION_ARGS) /* do when there is no more left */ SRF_RETURN_DONE(funcctx); } + +/* + * Planner support function for generate_series(int4, int4 [, int4]) + */ +Datum +generate_series_int4_support(PG_FUNCTION_ARGS) +{ + Node *rawreq = (Node *) PG_GETARG_POINTER(0); + Node *ret = NULL; + + if (IsA(rawreq, SupportRequestRows)) + { + /* Try to estimate the number of rows returned */ + SupportRequestRows *req = (SupportRequestRows *) rawreq; + + if (is_funcclause(req->node)) /* be paranoid */ + { + List *args = ((FuncExpr *) req->node)->args; + Node *arg1, + *arg2, + *arg3; + + /* We can use estimated argument values here */ + arg1 = estimate_expression_value(req->root, linitial(args)); + arg2 = estimate_expression_value(req->root, lsecond(args)); + if (list_length(args) >= 3) + arg3 = estimate_expression_value(req->root, lthird(args)); + else + arg3 = NULL; + + /* + * If any argument is constant NULL, we can safely assume that + * zero rows are returned. Otherwise, if they're all non-NULL + * constants, we can calculate the number of rows that will be + * returned. Use double arithmetic to avoid overflow hazards. + */ + if ((IsA(arg1, Const) && + ((Const *) arg1)->constisnull) || + (IsA(arg2, Const) && + ((Const *) arg2)->constisnull) || + (arg3 != NULL && IsA(arg3, Const) && + ((Const *) arg3)->constisnull)) + { + req->rows = 0; + ret = (Node *) req; + } + else if (IsA(arg1, Const) && + IsA(arg2, Const) && + (arg3 == NULL || IsA(arg3, Const))) + { + double start, + finish, + step; + + start = DatumGetInt32(((Const *) arg1)->constvalue); + finish = DatumGetInt32(((Const *) arg2)->constvalue); + step = arg3 ? DatumGetInt32(((Const *) arg3)->constvalue) : 1; + + /* This equation works for either sign of step */ + if (step != 0) + { + req->rows = floor((finish - start + step) / step); + ret = (Node *) req; + } + } + } + } + + PG_RETURN_POINTER(ret); +} diff --git a/src/backend/utils/adt/int8.c b/src/backend/utils/adt/int8.c index d16cc9e574..0ff9394a2f 100644 --- a/src/backend/utils/adt/int8.c +++ b/src/backend/utils/adt/int8.c @@ -20,6 +20,9 @@ #include "common/int.h" #include "funcapi.h" #include "libpq/pqformat.h" +#include "nodes/nodeFuncs.h" +#include "nodes/supportnodes.h" +#include "optimizer/optimizer.h" #include "utils/int8.h" #include "utils/builtins.h" @@ -1373,3 +1376,73 @@ generate_series_step_int8(PG_FUNCTION_ARGS) /* do when there is no more left */ SRF_RETURN_DONE(funcctx); } + +/* + * Planner support function for generate_series(int8, int8 [, int8]) + */ +Datum +generate_series_int8_support(PG_FUNCTION_ARGS) +{ + Node *rawreq = (Node *) PG_GETARG_POINTER(0); + Node *ret = NULL; + + if (IsA(rawreq, SupportRequestRows)) + { + /* Try to estimate the number of rows returned */ + SupportRequestRows *req = (SupportRequestRows *) rawreq; + + if (is_funcclause(req->node)) /* be paranoid */ + { + List *args = ((FuncExpr *) req->node)->args; + Node *arg1, + *arg2, + *arg3; + + /* We can use estimated argument values here */ + arg1 = estimate_expression_value(req->root, linitial(args)); + arg2 = estimate_expression_value(req->root, lsecond(args)); + if (list_length(args) >= 3) + arg3 = estimate_expression_value(req->root, lthird(args)); + else + arg3 = NULL; + + /* + * If any argument is constant NULL, we can safely assume that + * zero rows are returned. Otherwise, if they're all non-NULL + * constants, we can calculate the number of rows that will be + * returned. Use double arithmetic to avoid overflow hazards. + */ + if ((IsA(arg1, Const) && + ((Const *) arg1)->constisnull) || + (IsA(arg2, Const) && + ((Const *) arg2)->constisnull) || + (arg3 != NULL && IsA(arg3, Const) && + ((Const *) arg3)->constisnull)) + { + req->rows = 0; + ret = (Node *) req; + } + else if (IsA(arg1, Const) && + IsA(arg2, Const) && + (arg3 == NULL || IsA(arg3, Const))) + { + double start, + finish, + step; + + start = DatumGetInt64(((Const *) arg1)->constvalue); + finish = DatumGetInt64(((Const *) arg2)->constvalue); + step = arg3 ? DatumGetInt64(((Const *) arg3)->constvalue) : 1; + + /* This equation works for either sign of step */ + if (step != 0) + { + req->rows = floor((finish - start + step) / step); + ret = (Node *) req; + } + } + } + } + + PG_RETURN_POINTER(ret); +} diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 74fafc64f3..1ef6faecd1 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -1577,17 +1577,6 @@ boolvarsel(PlannerInfo *root, Node *arg, int varRelid) selec = var_eq_const(&vardata, BooleanEqualOperator, BoolGetDatum(true), false, true, false); } - else if (is_funcclause(arg)) - { - /* - * If we have no stats and it's a function call, estimate 0.3333333. - * This seems a pretty unprincipled choice, but Postgres has been - * using that estimate for function calls since 1992. The hoariness - * of this behavior suggests that we should not be in too much hurry - * to use another value. - */ - selec = 0.3333333; - } else { /* Otherwise, the default estimate is 0.5 */ @@ -3502,7 +3491,7 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, * pointless to worry too much about this without much better * estimates for SRF output rowcounts than we have today.) */ - this_srf_multiplier = expression_returns_set_rows(groupexpr); + this_srf_multiplier = expression_returns_set_rows(root, groupexpr); if (srf_multiplier < this_srf_multiplier) srf_multiplier = this_srf_multiplier; diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c index fba0ee8b84..e88c45d268 100644 --- a/src/backend/utils/cache/lsyscache.c +++ b/src/backend/utils/cache/lsyscache.c @@ -1605,41 +1605,28 @@ get_func_leakproof(Oid funcid) } /* - * get_func_cost - * Given procedure id, return the function's procost field. + * get_func_support + * + * Returns the support function OID associated with a given function, + * or InvalidOid if there is none. */ -float4 -get_func_cost(Oid funcid) +RegProcedure +get_func_support(Oid funcid) { HeapTuple tp; - float4 result; tp = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid)); - if (!HeapTupleIsValid(tp)) - elog(ERROR, "cache lookup failed for function %u", funcid); + if (HeapTupleIsValid(tp)) + { + Form_pg_proc functup = (Form_pg_proc) GETSTRUCT(tp); + RegProcedure result; - result = ((Form_pg_proc) GETSTRUCT(tp))->procost; - ReleaseSysCache(tp); - return result; -} - -/* - * get_func_rows - * Given procedure id, return the function's prorows field. - */ -float4 -get_func_rows(Oid funcid) -{ - HeapTuple tp; - float4 result; - - tp = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid)); - if (!HeapTupleIsValid(tp)) - elog(ERROR, "cache lookup failed for function %u", funcid); - - result = ((Form_pg_proc) GETSTRUCT(tp))->prorows; - ReleaseSysCache(tp); - return result; + result = functup->prosupport; + ReleaseSysCache(tp); + return result; + } + else + return (RegProcedure) InvalidOid; } /* ---------- RELATION CACHE ---------- */ diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 9233fb934e..968a6800dd 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 201902091 +#define CATALOG_VERSION_NO 201902092 #endif diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 1f5352ce54..50b742c06e 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -1530,9 +1530,12 @@ proargtypes => 'anyelement _int4 _int4', prosrc => 'array_fill_with_lower_bounds' }, { oid => '2331', descr => 'expand array to set of rows', - proname => 'unnest', prorows => '100', proretset => 't', - prorettype => 'anyelement', proargtypes => 'anyarray', + proname => 'unnest', prorows => '100', prosupport => 'array_unnest_support', + proretset => 't', prorettype => 'anyelement', proargtypes => 'anyarray', prosrc => 'array_unnest' }, +{ oid => '3996', descr => 'planner support for array_unnest', + proname => 'array_unnest_support', prorettype => 'internal', + proargtypes => 'internal', prosrc => 'array_unnest_support' }, { oid => '3167', descr => 'remove any occurrences of an element from an array', proname => 'array_remove', proisstrict => 'f', prorettype => 'anyarray', @@ -7536,21 +7539,31 @@ # non-persistent series generator { oid => '1066', descr => 'non-persistent series generator', - proname => 'generate_series', prorows => '1000', proretset => 't', + proname => 'generate_series', prorows => '1000', + prosupport => 'generate_series_int4_support', proretset => 't', prorettype => 'int4', proargtypes => 'int4 int4 int4', prosrc => 'generate_series_step_int4' }, { oid => '1067', descr => 'non-persistent series generator', - proname => 'generate_series', prorows => '1000', proretset => 't', + proname => 'generate_series', prorows => '1000', + prosupport => 'generate_series_int4_support', proretset => 't', prorettype => 'int4', proargtypes => 'int4 int4', prosrc => 'generate_series_int4' }, +{ oid => '3994', descr => 'planner support for generate_series', + proname => 'generate_series_int4_support', prorettype => 'internal', + proargtypes => 'internal', prosrc => 'generate_series_int4_support' }, { oid => '1068', descr => 'non-persistent series generator', - proname => 'generate_series', prorows => '1000', proretset => 't', + proname => 'generate_series', prorows => '1000', + prosupport => 'generate_series_int8_support', proretset => 't', prorettype => 'int8', proargtypes => 'int8 int8 int8', prosrc => 'generate_series_step_int8' }, { oid => '1069', descr => 'non-persistent series generator', - proname => 'generate_series', prorows => '1000', proretset => 't', + proname => 'generate_series', prorows => '1000', + prosupport => 'generate_series_int8_support', proretset => 't', prorettype => 'int8', proargtypes => 'int8 int8', prosrc => 'generate_series_int8' }, +{ oid => '3995', descr => 'planner support for generate_series', + proname => 'generate_series_int8_support', prorettype => 'internal', + proargtypes => 'internal', prosrc => 'generate_series_int8_support' }, { oid => '3259', descr => 'non-persistent series generator', proname => 'generate_series', prorows => '1000', proretset => 't', prorettype => 'numeric', proargtypes => 'numeric numeric numeric', diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index 0d2d1889e9..453079a9e2 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -507,7 +507,10 @@ typedef enum NodeTag T_TsmRoutine, /* in access/tsmapi.h */ T_ForeignKeyCacheInfo, /* in utils/rel.h */ T_CallContext, /* in nodes/parsenodes.h */ - T_SupportRequestSimplify /* in nodes/supportnodes.h */ + T_SupportRequestSimplify, /* in nodes/supportnodes.h */ + T_SupportRequestSelectivity, /* in nodes/supportnodes.h */ + T_SupportRequestCost, /* in nodes/supportnodes.h */ + T_SupportRequestRows /* in nodes/supportnodes.h */ } NodeTag; /* diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index 0b780b07c1..c23c4304f3 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -61,7 +61,7 @@ typedef struct AggClauseCosts bool hasNonPartial; /* does any agg not support partial mode? */ bool hasNonSerial; /* is any partial agg non-serializable? */ QualCost transCost; /* total per-input-row execution costs */ - Cost finalCost; /* total per-aggregated-row costs */ + QualCost finalCost; /* total per-aggregated-row costs */ Size transitionSpace; /* space for pass-by-ref transition data */ } AggClauseCosts; diff --git a/src/include/nodes/supportnodes.h b/src/include/nodes/supportnodes.h index 1f7d02b5ee..1a3a36ba99 100644 --- a/src/include/nodes/supportnodes.h +++ b/src/include/nodes/supportnodes.h @@ -36,6 +36,7 @@ #include "nodes/primnodes.h" struct PlannerInfo; /* avoid including relation.h here */ +struct SpecialJoinInfo; /* @@ -67,4 +68,103 @@ typedef struct SupportRequestSimplify FuncExpr *fcall; /* Function call to be simplified */ } SupportRequestSimplify; +/* + * The Selectivity request allows the support function to provide a + * selectivity estimate for a function appearing at top level of a WHERE + * clause (so it applies only to functions returning boolean). + * + * The input arguments are the same as are supplied to operator restriction + * and join estimators, except that we unify those two APIs into just one + * request type. See clause_selectivity() for the details. + * + * If an estimate can be made, store it into the "selectivity" field and + * return the address of the SupportRequestSelectivity node; the estimate + * must be between 0 and 1 inclusive. Return NULL if no estimate can be + * made (in which case the planner will fall back to a default estimate, + * traditionally 1/3). + * + * If the target function is being used as the implementation of an operator, + * the support function will not be used for this purpose; the operator's + * restriction or join estimator is consulted instead. + */ +typedef struct SupportRequestSelectivity +{ + NodeTag type; + + /* Input fields: */ + struct PlannerInfo *root; /* Planner's infrastructure */ + Oid funcid; /* function we are inquiring about */ + List *args; /* pre-simplified arguments to function */ + Oid inputcollid; /* function's input collation */ + bool is_join; /* is this a join or restriction case? */ + int varRelid; /* if restriction, RTI of target relation */ + JoinType jointype; /* if join, outer join type */ + struct SpecialJoinInfo *sjinfo; /* if outer join, info about join */ + + /* Output fields: */ + Selectivity selectivity; /* returned selectivity estimate */ +} SupportRequestSelectivity; + +/* + * The Cost request allows the support function to provide an execution + * cost estimate for its target function. The cost estimate can include + * both a one-time (query startup) component and a per-execution component. + * The estimate should *not* include the costs of evaluating the target + * function's arguments, only the target function itself. + * + * The "node" argument is normally the parse node that is invoking the + * target function. This is a FuncExpr in the simplest case, but it could + * also be an OpExpr, DistinctExpr, NullIfExpr, or WindowFunc, or possibly + * other cases in future. NULL is passed if the function cannot presume + * its arguments to be equivalent to what the calling node presents as + * arguments; that happens for, e.g., aggregate support functions and + * per-column comparison operators used by RowExprs. + * + * If an estimate can be made, store it into the cost fields and return the + * address of the SupportRequestCost node. Return NULL if no estimate can be + * made, in which case the planner will rely on the target function's procost + * field. (Note: while procost is automatically scaled by cpu_operator_cost, + * this is not the case for the outputs of the Cost request; the support + * function must scale its results appropriately on its own.) + */ +typedef struct SupportRequestCost +{ + NodeTag type; + + /* Input fields: */ + struct PlannerInfo *root; /* Planner's infrastructure (could be NULL) */ + Oid funcid; /* function we are inquiring about */ + Node *node; /* parse node invoking function, or NULL */ + + /* Output fields: */ + Cost startup; /* one-time cost */ + Cost per_tuple; /* per-evaluation cost */ +} SupportRequestCost; + +/* + * The Rows request allows the support function to provide an output rowcount + * estimate for its target function (so it applies only to set-returning + * functions). + * + * The "node" argument is the parse node that is invoking the target function; + * currently this will always be a FuncExpr or OpExpr. + * + * If an estimate can be made, store it into the rows field and return the + * address of the SupportRequestRows node. Return NULL if no estimate can be + * made, in which case the planner will rely on the target function's prorows + * field. + */ +typedef struct SupportRequestRows +{ + NodeTag type; + + /* Input fields: */ + struct PlannerInfo *root; /* Planner's infrastructure (could be NULL) */ + Oid funcid; /* function we are inquiring about */ + Node *node; /* parse node invoking function */ + + /* Output fields: */ + double rows; /* number of rows expected to be returned */ +} SupportRequestRows; + #endif /* SUPPORTNODES_H */ diff --git a/src/include/optimizer/clauses.h b/src/include/optimizer/clauses.h index 95a78cfa39..5e10fb1d50 100644 --- a/src/include/optimizer/clauses.h +++ b/src/include/optimizer/clauses.h @@ -31,7 +31,7 @@ extern void get_agg_clause_costs(PlannerInfo *root, Node *clause, extern bool contain_window_function(Node *clause); extern WindowFuncLists *find_window_functions(Node *clause, Index maxWinRef); -extern double expression_returns_set_rows(Node *clause); +extern double expression_returns_set_rows(PlannerInfo *root, Node *clause); extern bool contain_subplans(Node *clause); diff --git a/src/include/optimizer/plancat.h b/src/include/optimizer/plancat.h index 40f70f9f2b..c337f047cb 100644 --- a/src/include/optimizer/plancat.h +++ b/src/include/optimizer/plancat.h @@ -55,6 +55,20 @@ extern Selectivity join_selectivity(PlannerInfo *root, JoinType jointype, SpecialJoinInfo *sjinfo); +extern Selectivity function_selectivity(PlannerInfo *root, + Oid funcid, + List *args, + Oid inputcollid, + bool is_join, + int varRelid, + JoinType jointype, + SpecialJoinInfo *sjinfo); + +extern void add_function_cost(PlannerInfo *root, Oid funcid, Node *node, + QualCost *cost); + +extern double get_function_rows(PlannerInfo *root, Oid funcid, Node *node); + extern bool has_row_triggers(PlannerInfo *root, Index rti, CmdType event); #endif /* PLANCAT_H */ diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h index ceec85db92..16b0b1d2dc 100644 --- a/src/include/utils/lsyscache.h +++ b/src/include/utils/lsyscache.h @@ -120,8 +120,7 @@ extern char func_volatile(Oid funcid); extern char func_parallel(Oid funcid); extern char get_func_prokind(Oid funcid); extern bool get_func_leakproof(Oid funcid); -extern float4 get_func_cost(Oid funcid); -extern float4 get_func_rows(Oid funcid); +extern RegProcedure get_func_support(Oid funcid); extern Oid get_relname_relid(const char *relname, Oid relnamespace); extern char *get_rel_name(Oid relid); extern Oid get_rel_namespace(Oid relid); diff --git a/src/test/regress/expected/misc_functions.out b/src/test/regress/expected/misc_functions.out index 130a0e4be3..0879c885eb 100644 --- a/src/test/regress/expected/misc_functions.out +++ b/src/test/regress/expected/misc_functions.out @@ -133,3 +133,63 @@ ERROR: function num_nulls() does not exist LINE 1: SELECT num_nulls(); ^ HINT: No function matches the given name and argument types. You might need to add explicit type casts. +-- +-- Test adding a support function to a subject function +-- +CREATE FUNCTION my_int_eq(int, int) RETURNS bool + LANGUAGE internal STRICT IMMUTABLE PARALLEL SAFE + AS $$int4eq$$; +-- By default, planner does not think that's selective +EXPLAIN (COSTS OFF) +SELECT * FROM tenk1 a JOIN tenk1 b ON a.unique1 = b.unique1 +WHERE my_int_eq(a.unique2, 42); + QUERY PLAN +---------------------------------------------- + Hash Join + Hash Cond: (b.unique1 = a.unique1) + -> Seq Scan on tenk1 b + -> Hash + -> Seq Scan on tenk1 a + Filter: my_int_eq(unique2, 42) +(6 rows) + +-- With support function that knows it's int4eq, we get a different plan +ALTER FUNCTION my_int_eq(int, int) SUPPORT test_support_func; +EXPLAIN (COSTS OFF) +SELECT * FROM tenk1 a JOIN tenk1 b ON a.unique1 = b.unique1 +WHERE my_int_eq(a.unique2, 42); + QUERY PLAN +------------------------------------------------- + Nested Loop + -> Seq Scan on tenk1 a + Filter: my_int_eq(unique2, 42) + -> Index Scan using tenk1_unique1 on tenk1 b + Index Cond: (unique1 = a.unique1) +(5 rows) + +-- Also test non-default rowcount estimate +CREATE FUNCTION my_gen_series(int, int) RETURNS SETOF integer + LANGUAGE internal STRICT IMMUTABLE PARALLEL SAFE + AS $$generate_series_int4$$ + SUPPORT test_support_func; +EXPLAIN (COSTS OFF) +SELECT * FROM tenk1 a JOIN my_gen_series(1,1000) g ON a.unique1 = g; + QUERY PLAN +---------------------------------------- + Hash Join + Hash Cond: (g.g = a.unique1) + -> Function Scan on my_gen_series g + -> Hash + -> Seq Scan on tenk1 a +(5 rows) + +EXPLAIN (COSTS OFF) +SELECT * FROM tenk1 a JOIN my_gen_series(1,10) g ON a.unique1 = g; + QUERY PLAN +------------------------------------------------- + Nested Loop + -> Function Scan on my_gen_series g + -> Index Scan using tenk1_unique1 on tenk1 a + Index Cond: (unique1 = g.g) +(4 rows) + diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out index a54b4a5a7c..6e238e88b3 100644 --- a/src/test/regress/expected/subselect.out +++ b/src/test/regress/expected/subselect.out @@ -904,7 +904,7 @@ select * from int4_tbl where -- explain (verbose, costs off) select * from int4_tbl o where (f1, f1) in - (select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1); + (select f1, generate_series(1,50) / 10 g from int4_tbl i group by f1); QUERY PLAN ------------------------------------------------------------------- Nested Loop Semi Join @@ -918,9 +918,9 @@ select * from int4_tbl o where (f1, f1) in Output: "ANY_subquery".f1, "ANY_subquery".g Filter: ("ANY_subquery".f1 = "ANY_subquery".g) -> Result - Output: i.f1, ((generate_series(1, 2)) / 10) + Output: i.f1, ((generate_series(1, 50)) / 10) -> ProjectSet - Output: generate_series(1, 2), i.f1 + Output: generate_series(1, 50), i.f1 -> HashAggregate Output: i.f1 Group Key: i.f1 @@ -929,7 +929,7 @@ select * from int4_tbl o where (f1, f1) in (19 rows) select * from int4_tbl o where (f1, f1) in - (select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1); + (select f1, generate_series(1,50) / 10 g from int4_tbl i group by f1); f1 ---- 0 diff --git a/src/test/regress/input/create_function_1.source b/src/test/regress/input/create_function_1.source index 26e2227d3a..223454a5ea 100644 --- a/src/test/regress/input/create_function_1.source +++ b/src/test/regress/input/create_function_1.source @@ -68,6 +68,11 @@ CREATE FUNCTION test_fdw_handler() AS '@libdir@/regress@DLSUFFIX@', 'test_fdw_handler' LANGUAGE C; +CREATE FUNCTION test_support_func(internal) + RETURNS internal + AS '@libdir@/regress@DLSUFFIX@', 'test_support_func' + LANGUAGE C STRICT; + -- Things that shouldn't work: CREATE FUNCTION test1 (int) RETURNS int LANGUAGE SQL diff --git a/src/test/regress/output/create_function_1.source b/src/test/regress/output/create_function_1.source index 8c50d9b309..5f43e8de81 100644 --- a/src/test/regress/output/create_function_1.source +++ b/src/test/regress/output/create_function_1.source @@ -60,6 +60,10 @@ CREATE FUNCTION test_fdw_handler() RETURNS fdw_handler AS '@libdir@/regress@DLSUFFIX@', 'test_fdw_handler' LANGUAGE C; +CREATE FUNCTION test_support_func(internal) + RETURNS internal + AS '@libdir@/regress@DLSUFFIX@', 'test_support_func' + LANGUAGE C STRICT; -- Things that shouldn't work: CREATE FUNCTION test1 (int) RETURNS int LANGUAGE SQL AS 'SELECT ''not an integer'';'; diff --git a/src/test/regress/regress.c b/src/test/regress/regress.c index 70727286ca..ad3e803899 100644 --- a/src/test/regress/regress.c +++ b/src/test/regress/regress.c @@ -23,12 +23,16 @@ #include "access/transam.h" #include "access/tuptoaster.h" #include "access/xact.h" +#include "catalog/pg_operator.h" #include "catalog/pg_type.h" #include "commands/sequence.h" #include "commands/trigger.h" #include "executor/executor.h" #include "executor/spi.h" #include "miscadmin.h" +#include "nodes/supportnodes.h" +#include "optimizer/optimizer.h" +#include "optimizer/plancat.h" #include "port/atomics.h" #include "utils/builtins.h" #include "utils/geo_decls.h" @@ -863,3 +867,76 @@ test_fdw_handler(PG_FUNCTION_ARGS) elog(ERROR, "test_fdw_handler is not implemented"); PG_RETURN_NULL(); } + +PG_FUNCTION_INFO_V1(test_support_func); +Datum +test_support_func(PG_FUNCTION_ARGS) +{ + Node *rawreq = (Node *) PG_GETARG_POINTER(0); + Node *ret = NULL; + + if (IsA(rawreq, SupportRequestSelectivity)) + { + /* + * Assume that the target is int4eq; that's safe as long as we don't + * attach this to any other boolean-returning function. + */ + SupportRequestSelectivity *req = (SupportRequestSelectivity *) rawreq; + Selectivity s1; + + if (req->is_join) + s1 = join_selectivity(req->root, Int4EqualOperator, + req->args, + req->inputcollid, + req->jointype, + req->sjinfo); + else + s1 = restriction_selectivity(req->root, Int4EqualOperator, + req->args, + req->inputcollid, + req->varRelid); + + req->selectivity = s1; + ret = (Node *) req; + } + + if (IsA(rawreq, SupportRequestCost)) + { + /* Provide some generic estimate */ + SupportRequestCost *req = (SupportRequestCost *) rawreq; + + req->startup = 0; + req->per_tuple = 2 * cpu_operator_cost; + ret = (Node *) req; + } + + if (IsA(rawreq, SupportRequestRows)) + { + /* + * Assume that the target is generate_series_int4; that's safe as long + * as we don't attach this to any other set-returning function. + */ + SupportRequestRows *req = (SupportRequestRows *) rawreq; + + if (req->node && IsA(req->node, FuncExpr)) /* be paranoid */ + { + List *args = ((FuncExpr *) req->node)->args; + Node *arg1 = linitial(args); + Node *arg2 = lsecond(args); + + if (IsA(arg1, Const) && + !((Const *) arg1)->constisnull && + IsA(arg2, Const) && + !((Const *) arg2)->constisnull) + { + int32 val1 = DatumGetInt32(((Const *) arg1)->constvalue); + int32 val2 = DatumGetInt32(((Const *) arg2)->constvalue); + + req->rows = val2 - val1 + 1; + ret = (Node *) req; + } + } + } + + PG_RETURN_POINTER(ret); +} diff --git a/src/test/regress/sql/misc_functions.sql b/src/test/regress/sql/misc_functions.sql index 1a20c1f765..7a71f7659c 100644 --- a/src/test/regress/sql/misc_functions.sql +++ b/src/test/regress/sql/misc_functions.sql @@ -29,3 +29,35 @@ SELECT num_nulls(VARIADIC '{}'::int[]); -- should fail, one or more arguments is required SELECT num_nonnulls(); SELECT num_nulls(); + +-- +-- Test adding a support function to a subject function +-- + +CREATE FUNCTION my_int_eq(int, int) RETURNS bool + LANGUAGE internal STRICT IMMUTABLE PARALLEL SAFE + AS $$int4eq$$; + +-- By default, planner does not think that's selective +EXPLAIN (COSTS OFF) +SELECT * FROM tenk1 a JOIN tenk1 b ON a.unique1 = b.unique1 +WHERE my_int_eq(a.unique2, 42); + +-- With support function that knows it's int4eq, we get a different plan +ALTER FUNCTION my_int_eq(int, int) SUPPORT test_support_func; + +EXPLAIN (COSTS OFF) +SELECT * FROM tenk1 a JOIN tenk1 b ON a.unique1 = b.unique1 +WHERE my_int_eq(a.unique2, 42); + +-- Also test non-default rowcount estimate +CREATE FUNCTION my_gen_series(int, int) RETURNS SETOF integer + LANGUAGE internal STRICT IMMUTABLE PARALLEL SAFE + AS $$generate_series_int4$$ + SUPPORT test_support_func; + +EXPLAIN (COSTS OFF) +SELECT * FROM tenk1 a JOIN my_gen_series(1,1000) g ON a.unique1 = g; + +EXPLAIN (COSTS OFF) +SELECT * FROM tenk1 a JOIN my_gen_series(1,10) g ON a.unique1 = g; diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql index 843f511b3d..ccbe8a1df5 100644 --- a/src/test/regress/sql/subselect.sql +++ b/src/test/regress/sql/subselect.sql @@ -498,9 +498,9 @@ select * from int4_tbl where -- explain (verbose, costs off) select * from int4_tbl o where (f1, f1) in - (select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1); + (select f1, generate_series(1,50) / 10 g from int4_tbl i group by f1); select * from int4_tbl o where (f1, f1) in - (select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1); + (select f1, generate_series(1,50) / 10 g from int4_tbl i group by f1); -- -- check for over-optimization of whole-row Var referencing an Append plan