diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 7cfebc95d6..8b76e98529 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -1256,7 +1256,7 @@ cost_tidscan(Path *path, PlannerInfo *root, QualCost qpqual_cost; Cost cpu_per_tuple; QualCost tid_qual_cost; - int ntuples; + double ntuples; ListCell *l; double spc_random_page_cost; @@ -1283,7 +1283,7 @@ cost_tidscan(Path *path, PlannerInfo *root, ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) qual; Node *arraynode = (Node *) lsecond(saop->args); - ntuples += estimate_array_length(arraynode); + ntuples += estimate_array_length(root, arraynode); } else if (IsA(qual, CurrentOfExpr)) { @@ -4770,7 +4770,7 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context) Node *arraynode = (Node *) lsecond(saop->args); QualCost sacosts; QualCost hcosts; - int estarraylen = estimate_array_length(arraynode); + double estarraylen = estimate_array_length(context->root, arraynode); set_sa_opfuncid(saop); sacosts.startup = sacosts.per_tuple = 0; @@ -4808,7 +4808,7 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context) */ context->total.startup += sacosts.startup; context->total.per_tuple += sacosts.per_tuple * - estimate_array_length(arraynode) * 0.5; + estimate_array_length(context->root, arraynode) * 0.5; } } else if (IsA(node, Aggref) || @@ -4859,7 +4859,7 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context) context->total.startup += perelemcost.startup; if (perelemcost.per_tuple > 0) context->total.per_tuple += perelemcost.per_tuple * - estimate_array_length((Node *) acoerce->arg); + estimate_array_length(context->root, (Node *) acoerce->arg); } else if (IsA(node, RowCompareExpr)) { diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c index 957d21a0a0..f3fee54e37 100644 --- a/src/backend/utils/adt/arrayfuncs.c +++ b/src/backend/utils/adt/arrayfuncs.c @@ -6340,7 +6340,7 @@ array_unnest_support(PG_FUNCTION_ARGS) /* We can use estimated argument values here */ arg1 = estimate_expression_value(req->root, linitial(args)); - req->rows = estimate_array_length(arg1); + req->rows = estimate_array_length(req->root, arg1); ret = (Node *) req; } } diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 7a3f69f2d9..dbcd98d985 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -2128,10 +2128,11 @@ scalararraysel(PlannerInfo *root, /* * Estimate number of elements in the array yielded by an expression. * - * It's important that this agree with scalararraysel. + * Note: the result is integral, but we use "double" to avoid overflow + * concerns. Most callers will use it in double-type expressions anyway. */ -int -estimate_array_length(Node *arrayexpr) +double +estimate_array_length(PlannerInfo *root, Node *arrayexpr) { /* look through any binary-compatible relabeling of arrayexpr */ arrayexpr = strip_array_coercion(arrayexpr); @@ -2152,11 +2153,39 @@ estimate_array_length(Node *arrayexpr) { return list_length(((ArrayExpr *) arrayexpr)->elements); } - else + else if (arrayexpr) { - /* default guess --- see also scalararraysel */ - return 10; + /* See if we can find any statistics about it */ + VariableStatData vardata; + AttStatsSlot sslot; + double nelem = 0; + + examine_variable(root, arrayexpr, 0, &vardata); + if (HeapTupleIsValid(vardata.statsTuple)) + { + /* + * Found stats, so use the average element count, which is stored + * in the last stanumbers element of the DECHIST statistics. + * Actually that is the average count of *distinct* elements; + * perhaps we should scale it up somewhat? + */ + if (get_attstatsslot(&sslot, vardata.statsTuple, + STATISTIC_KIND_DECHIST, InvalidOid, + ATTSTATSSLOT_NUMBERS)) + { + if (sslot.nnumbers > 0) + nelem = clamp_row_est(sslot.numbers[sslot.nnumbers - 1]); + free_attstatsslot(&sslot); + } + } + ReleaseVariableStats(vardata); + + if (nelem > 0) + return nelem; } + + /* Else use a default guess --- this should match scalararraysel */ + return 10; } /* @@ -6540,7 +6569,7 @@ genericcostestimate(PlannerInfo *root, if (IsA(rinfo->clause, ScalarArrayOpExpr)) { ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) rinfo->clause; - int alength = estimate_array_length(lsecond(saop->args)); + double alength = estimate_array_length(root, lsecond(saop->args)); if (alength > 1) num_sa_scans *= alength; @@ -6820,7 +6849,7 @@ btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, { ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause; Node *other_operand = (Node *) lsecond(saop->args); - int alength = estimate_array_length(other_operand); + double alength = estimate_array_length(root, other_operand); clause_op = saop->opno; found_saop = true; @@ -7414,7 +7443,7 @@ gincost_scalararrayopexpr(PlannerInfo *root, { counts->exactEntries++; counts->searchEntries++; - counts->arrayScans *= estimate_array_length(rightop); + counts->arrayScans *= estimate_array_length(root, rightop); return true; } diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h index 6dd5171d54..2fa4c4fc1b 100644 --- a/src/include/utils/selfuncs.h +++ b/src/include/utils/selfuncs.h @@ -200,7 +200,7 @@ extern Selectivity scalararraysel(PlannerInfo *root, ScalarArrayOpExpr *clause, bool is_join_clause, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo); -extern int estimate_array_length(Node *arrayexpr); +extern double estimate_array_length(PlannerInfo *root, Node *arrayexpr); extern Selectivity rowcomparesel(PlannerInfo *root, RowCompareExpr *clause, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo);