diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index aa9a90cbfa..ce893a77be 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -758,11 +758,8 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, rel->subrtable = subroot->parse->rtable; rel->subrowmark = subroot->rowMarks; - /* Copy number of output rows from subplan */ - rel->tuples = rel->subplan->plan_rows; - /* Mark rel with estimated output rows, width, etc */ - set_baserel_size_estimates(root, rel); + set_subquery_size_estimates(root, rel, subroot); /* Convert subquery pathkeys to outer representation */ pathkeys = convert_subquery_pathkeys(root, rel, subroot->query_pathkeys); diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 16a5d0a3ca..0724f9a6c9 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -76,6 +76,7 @@ #include "optimizer/cost.h" #include "optimizer/pathnode.h" #include "optimizer/placeholder.h" +#include "optimizer/plancat.h" #include "optimizer/planmain.h" #include "optimizer/restrictinfo.h" #include "parser/parsetree.h" @@ -2986,7 +2987,7 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) * Set the size estimates for the given base relation. * * The rel's targetlist and restrictinfo list must have been constructed - * already. + * already, and rel->tuples must be set. * * We set the following fields of the rel node: * rows: the estimated number of output tuples (after applying @@ -3151,6 +3152,76 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, rel->rows = clamp_row_est(nrows); } +/* + * set_subquery_size_estimates + * Set the size estimates for a base relation that is a subquery. + * + * The rel's targetlist and restrictinfo list must have been constructed + * already, and the plan for the subquery must have been completed. + * We look at the subquery's plan and PlannerInfo to extract data. + * + * We set the same fields as set_baserel_size_estimates. + */ +void +set_subquery_size_estimates(PlannerInfo *root, RelOptInfo *rel, + PlannerInfo *subroot) +{ + RangeTblEntry *rte; + ListCell *lc; + + /* Should only be applied to base relations that are subqueries */ + Assert(rel->relid > 0); + rte = planner_rt_fetch(rel->relid, root); + Assert(rte->rtekind == RTE_SUBQUERY); + + /* Copy raw number of output rows from subplan */ + rel->tuples = rel->subplan->plan_rows; + + /* + * Compute per-output-column width estimates by examining the subquery's + * targetlist. For any output that is a plain Var, get the width estimate + * that was made while planning the subquery. Otherwise, fall back on a + * datatype-based estimate. + */ + foreach(lc, subroot->parse->targetList) + { + TargetEntry *te = (TargetEntry *) lfirst(lc); + Node *texpr = (Node *) te->expr; + int32 item_width; + + Assert(IsA(te, TargetEntry)); + /* junk columns aren't visible to upper query */ + if (te->resjunk) + continue; + + /* + * XXX This currently doesn't work for subqueries containing set + * operations, because the Vars in their tlists are bogus references + * to the first leaf subquery, which wouldn't give the right answer + * even if we could still get to its PlannerInfo. So fall back on + * datatype in that case. + */ + if (IsA(texpr, Var) && + subroot->parse->setOperations == NULL) + { + Var *var = (Var *) texpr; + RelOptInfo *subrel = find_base_rel(subroot, var->varno); + + item_width = subrel->attr_widths[var->varattno - subrel->min_attr]; + } + else + { + item_width = get_typavgwidth(exprType(texpr), exprTypmod(texpr)); + } + Assert(item_width > 0); + Assert(te->resno >= rel->min_attr && te->resno <= rel->max_attr); + rel->attr_widths[te->resno - rel->min_attr] = item_width; + } + + /* Now estimate number of output rows, etc */ + set_baserel_size_estimates(root, rel); +} + /* * set_function_size_estimates * Set the size estimates for a base relation that is a function call. @@ -3251,11 +3322,17 @@ set_cte_size_estimates(PlannerInfo *root, RelOptInfo *rel, Plan *cteplan) * set_rel_width * Set the estimated output width of a base relation. * + * The estimated output width is the sum of the per-attribute width estimates + * for the actually-referenced columns, plus any PHVs or other expressions + * that have to be calculated at this relation. This is the amount of data + * we'd need to pass upwards in case of a sort, hash, etc. + * * NB: this works best on plain relations because it prefers to look at - * real Vars. It will fail to make use of pg_statistic info when applied - * to a subquery relation, even if the subquery outputs are simple vars - * that we could have gotten info for. Is it worth trying to be smarter - * about subqueries? + * real Vars. For subqueries, set_subquery_size_estimates will already have + * copied up whatever per-column estimates were made within the subquery, + * and for other types of rels there isn't much we can do anyway. We fall + * back on (fairly stupid) datatype-based width estimates if we can't get + * any better number. * * The per-attribute width estimates are cached for possible re-use while * building join relations. @@ -3265,6 +3342,7 @@ set_rel_width(PlannerInfo *root, RelOptInfo *rel) { Oid reloid = planner_rt_fetch(rel->relid, root)->relid; int32 tuple_width = 0; + bool have_wholerow_var = false; ListCell *lc; foreach(lc, rel->reltargetlist) @@ -3284,8 +3362,18 @@ set_rel_width(PlannerInfo *root, RelOptInfo *rel) ndx = var->varattno - rel->min_attr; /* - * The width probably hasn't been cached yet, but may as well - * check + * If it's a whole-row Var, we'll deal with it below after we + * have already cached as many attr widths as possible. + */ + if (var->varattno == 0) + { + have_wholerow_var = true; + continue; + } + + /* + * The width may have been cached already (especially if it's + * a subquery), so don't duplicate effort. */ if (rel->attr_widths[ndx] > 0) { @@ -3294,7 +3382,7 @@ set_rel_width(PlannerInfo *root, RelOptInfo *rel) } /* Try to get column width from statistics */ - if (reloid != InvalidOid) + if (reloid != InvalidOid && var->varattno > 0) { item_width = get_attavgwidth(reloid, var->varattno); if (item_width > 0) @@ -3335,6 +3423,39 @@ set_rel_width(PlannerInfo *root, RelOptInfo *rel) tuple_width += item_width; } } + + /* + * If we have a whole-row reference, estimate its width as the sum of + * per-column widths plus sizeof(HeapTupleHeaderData). + */ + if (have_wholerow_var) + { + int32 wholerow_width = sizeof(HeapTupleHeaderData); + + if (reloid != InvalidOid) + { + /* Real relation, so estimate true tuple width */ + wholerow_width += get_relation_data_width(reloid, + rel->attr_widths - rel->min_attr); + } + else + { + /* Do what we can with info for a phony rel */ + AttrNumber i; + + for (i = 1; i <= rel->max_attr; i++) + wholerow_width += rel->attr_widths[i - rel->min_attr]; + } + + rel->attr_widths[0 - rel->min_attr] = wholerow_width; + + /* + * Include the whole-row Var as part of the output tuple. Yes, + * that really is what happens at runtime. + */ + tuple_width += wholerow_width; + } + Assert(tuple_width >= 0); rel->width = tuple_width; } diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 6324bce240..a1e5900592 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -3102,7 +3102,7 @@ plan_cluster_use_sort(Oid tableOid, Oid indexOid) * set_baserel_size_estimates, just do a quick hack for rows and width. */ rel->rows = rel->tuples; - rel->width = get_relation_data_width(tableOid); + rel->width = get_relation_data_width(tableOid, NULL); root->total_table_pages = rel->pages; diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 7ffa11588d..aafaf843fc 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -322,7 +322,7 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, * estimate_rel_size - estimate # pages and # tuples in a table or index * * If attr_widths isn't NULL, it points to the zero-index entry of the - * relation's attr_width[] cache; we fill this in if we have need to compute + * relation's attr_widths[] cache; we fill this in if we have need to compute * the attribute widths for estimation purposes. */ void @@ -435,8 +435,9 @@ estimate_rel_size(Relation rel, int32 *attr_widths, * get_rel_data_width * * Estimate the average width of (the data part of) the relation's tuples. - * If attr_widths isn't NULL, also store per-column width estimates into - * that array. + * + * If attr_widths isn't NULL, it points to the zero-index entry of the + * relation's attr_widths[] cache; use and update that cache as appropriate. * * Currently we ignore dropped columns. Ideally those should be included * in the result, but we haven't got any way to get info about them; and @@ -456,6 +457,14 @@ get_rel_data_width(Relation rel, int32 *attr_widths) if (att->attisdropped) continue; + + /* use previously cached data, if any */ + if (attr_widths != NULL && attr_widths[i] > 0) + { + tuple_width += attr_widths[i]; + continue; + } + /* This should match set_rel_width() in costsize.c */ item_width = get_attavgwidth(RelationGetRelid(rel), i); if (item_width <= 0) @@ -474,10 +483,11 @@ get_rel_data_width(Relation rel, int32 *attr_widths) /* * get_relation_data_width * - * External API for get_rel_data_width + * External API for get_rel_data_width: same behavior except we have to + * open the relcache entry. */ int32 -get_relation_data_width(Oid relid) +get_relation_data_width(Oid relid, int32 *attr_widths) { int32 result; Relation relation; @@ -485,7 +495,7 @@ get_relation_data_width(Oid relid) /* As above, assume relation is already locked */ relation = heap_open(relid, NoLock); - result = get_rel_data_width(relation, NULL); + result = get_rel_data_width(relation, attr_widths); heap_close(relation, NoLock); diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h index e1dcd6df14..8df1b95abe 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -121,6 +121,8 @@ extern void set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, RelOptInfo *inner_rel, SpecialJoinInfo *sjinfo, List *restrictlist); +extern void set_subquery_size_estimates(PlannerInfo *root, RelOptInfo *rel, + PlannerInfo *subroot); extern void set_function_size_estimates(PlannerInfo *root, RelOptInfo *rel); extern void set_values_size_estimates(PlannerInfo *root, RelOptInfo *rel); extern void set_cte_size_estimates(PlannerInfo *root, RelOptInfo *rel, diff --git a/src/include/optimizer/plancat.h b/src/include/optimizer/plancat.h index de7de84cb3..ca7b2c6469 100644 --- a/src/include/optimizer/plancat.h +++ b/src/include/optimizer/plancat.h @@ -31,7 +31,7 @@ extern void get_relation_info(PlannerInfo *root, Oid relationObjectId, extern void estimate_rel_size(Relation rel, int32 *attr_widths, BlockNumber *pages, double *tuples); -extern int32 get_relation_data_width(Oid relid); +extern int32 get_relation_data_width(Oid relid, int32 *attr_widths); extern bool relation_excluded_by_constraints(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte);