Teach planner how to propagate pathkeys from sub-SELECTs in FROM up to

the outer query.  (The implementation is a bit klugy, but it would take
nontrivial restructuring to make it nicer, which this is probably not
worth.)  This avoids unnecessary sort steps in examples like
SELECT foo,count(*) FROM (SELECT ... ORDER BY foo,bar) sub GROUP BY foo
which means there is now a reasonable technique for controlling the
order of inputs to custom aggregates, even in the grouping case.
This commit is contained in:
Tom Lane 2003-02-15 20:12:41 +00:00
parent 50c4190e37
commit 056467ec6b
9 changed files with 172 additions and 29 deletions

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.96 2003/02/08 20:20:54 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.97 2003/02/15 20:12:40 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -291,6 +291,7 @@ set_subquery_pathlist(Query *root, RelOptInfo *rel,
Index rti, RangeTblEntry *rte)
{
Query *subquery = rte->subquery;
List *pathkeys;
/*
* If there are any restriction clauses that have been attached to the
@ -351,8 +352,11 @@ set_subquery_pathlist(Query *root, RelOptInfo *rel,
/* Mark rel with estimated output rows, width, etc */
set_baserel_size_estimates(root, rel);
/* Convert subquery pathkeys to outer representation */
pathkeys = build_subquery_pathkeys(root, rel, subquery);
/* Generate appropriate path */
add_path(rel, create_subqueryscan_path(rel));
add_path(rel, create_subqueryscan_path(rel, pathkeys));
/* Select cheapest path (pretty easy in this case...) */
set_cheapest(rel);

View File

@ -11,7 +11,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v 1.46 2003/02/08 20:20:54 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v 1.47 2003/02/15 20:12:40 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -366,6 +366,31 @@ canonicalize_pathkeys(Query *root, List *pathkeys)
return new_pathkeys;
}
/*
* count_canonical_peers
* Given a PathKeyItem, find the equi_key_list subset it is a member of,
* if any. If so, return the number of other members of the set.
* If not, return 0 (without actually adding it to our equi_key_list).
*
* This is a hack to support the rather bogus heuristics in
* build_subquery_pathkeys.
*/
static int
count_canonical_peers(Query *root, PathKeyItem *item)
{
List *cursetlink;
foreach(cursetlink, root->equi_key_list)
{
List *curset = lfirst(cursetlink);
if (member(item, curset))
return length(curset) - 1;
}
return 0;
}
/****************************************************************************
* PATHKEY COMPARISONS
****************************************************************************/
@ -597,6 +622,9 @@ get_cheapest_fractional_path_for_pathkeys(List *paths,
*
* If 'scandir' is BackwardScanDirection, attempt to build pathkeys
* representing a backwards scan of the index. Return NIL if can't do it.
*
* We generate the full pathkeys list whether or not all are useful for the
* current query. Caller should do truncate_useless_pathkeys().
*/
List *
build_index_pathkeys(Query *root,
@ -699,9 +727,10 @@ find_indexkey_var(Query *root, RelOptInfo *rel, AttrNumber varattno)
foreach(temp, rel->targetlist)
{
Var *tle_var = get_expr(lfirst(temp));
Var *tle_var = (Var *) ((TargetEntry *) lfirst(temp))->expr;
if (IsA(tle_var, Var) &&tle_var->varattno == varattno)
if (IsA(tle_var, Var) &&
tle_var->varattno == varattno)
return tle_var;
}
@ -714,6 +743,112 @@ find_indexkey_var(Query *root, RelOptInfo *rel, AttrNumber varattno)
return makeVar(relid, varattno, vartypeid, type_mod, 0);
}
/*
* build_subquery_pathkeys
* Build a pathkeys list that describes the ordering of a subquery's
* result (in the terms of the outer query). The subquery must already
* have been planned, so that its query_pathkeys field has been set.
*
* It is not necessary for caller to do truncate_useless_pathkeys(),
* because we select keys in a way that takes usefulness of the keys into
* account.
*/
List *
build_subquery_pathkeys(Query *root, RelOptInfo *rel, Query *subquery)
{
List *retval = NIL;
int retvallen = 0;
int outer_query_keys = length(root->query_pathkeys);
List *l;
foreach(l, subquery->query_pathkeys)
{
List *sub_pathkey = (List *) lfirst(l);
List *j;
PathKeyItem *best_item = NULL;
int best_score = 0;
List *cpathkey;
/*
* The sub_pathkey could contain multiple elements (representing
* knowledge that multiple items are effectively equal). Each
* element might match none, one, or more of the output columns
* that are visible to the outer query. This means we may have
* multiple possible representations of the sub_pathkey in the
* context of the outer query. Ideally we would generate them all
* and put them all into a pathkey list of the outer query, thereby
* propagating equality knowledge up to the outer query. Right now
* we cannot do so, because the outer query's canonical pathkey
* sets are already frozen when this is called. Instead we prefer
* the one that has the highest "score" (number of canonical pathkey
* peers, plus one if it matches the outer query_pathkeys).
* This is the most likely to be useful in the outer query.
*/
foreach(j, sub_pathkey)
{
PathKeyItem *sub_item = (PathKeyItem *) lfirst(j);
Node *sub_key = sub_item->key;
List *k;
foreach(k, subquery->targetList)
{
TargetEntry *tle = (TargetEntry *) lfirst(k);
if (!tle->resdom->resjunk &&
equal(tle->expr, sub_key))
{
/* Found a representation for this sub_key */
Var *outer_var;
PathKeyItem *outer_item;
int score;
outer_var = makeVar(rel->relid,
tle->resdom->resno,
tle->resdom->restype,
tle->resdom->restypmod,
0);
outer_item = makePathKeyItem((Node *) outer_var,
sub_item->sortop);
/* score = # of mergejoin peers */
score = count_canonical_peers(root, outer_item);
/* +1 if it matches the proper query_pathkeys item */
if (retvallen < outer_query_keys &&
member(outer_item,
nth(retvallen, root->query_pathkeys)))
score++;
if (score > best_score)
{
best_item = outer_item;
best_score = score;
}
}
}
}
/*
* If we couldn't find a representation of this sub_pathkey,
* we're done (we can't use the ones to its right, either).
*/
if (!best_item)
break;
/* Canonicalize the chosen item (we did not before) */
cpathkey = make_canonical_pathkey(root, best_item);
/*
* Eliminate redundant ordering info; could happen if outer
* query equijoins subquery keys...
*/
if (!ptrMember(cpathkey, retval))
{
retval = lappend(retval, cpathkey);
retvallen++;
}
}
return retval;
}
/*
* build_join_pathkeys
* Build the path keys for a join relation constructed by mergejoin or

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.146 2003/02/09 23:57:19 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.147 2003/02/15 20:12:40 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -469,6 +469,9 @@ inheritance_planner(Query *parse, List *inheritlist)
/* Save the target-relations list for the executor, too */
parse->resultRelations = inheritlist;
/* Mark result as unordered (probably unnecessary) */
parse->query_pathkeys = NIL;
return (Plan *) make_append(subplans, true, tlist);
}
@ -491,7 +494,8 @@ inheritance_planner(Query *parse, List *inheritlist)
* The normal case is to pass -1, but some callers pass values >= 0 to
* override this routine's determination of the appropriate fraction.
*
* Returns a query plan.
* Returns a query plan. Also, parse->query_pathkeys is returned as the
* actual output ordering of the plan (in pathkey format).
*--------------------
*/
static Plan *
@ -1191,10 +1195,13 @@ grouping_planner(Query *parse, double tuple_fraction)
if (parse->sortClause)
{
if (!pathkeys_contained_in(sort_pathkeys, current_pathkeys))
{
result_plan = (Plan *) make_sort_from_sortclauses(parse,
tlist,
result_plan,
parse->sortClause);
current_pathkeys = sort_pathkeys;
}
}
/*
@ -1232,6 +1239,12 @@ grouping_planner(Query *parse, double tuple_fraction)
parse->limitCount);
}
/*
* Return the actual output ordering in query_pathkeys for possible
* use by an outer query level.
*/
parse->query_pathkeys = current_pathkeys;
return result_plan;
}

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.87 2003/02/08 20:20:55 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.88 2003/02/15 20:12:40 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -676,13 +676,13 @@ hash_safe_tlist(List *tlist)
* returning the pathnode.
*/
Path *
create_subqueryscan_path(RelOptInfo *rel)
create_subqueryscan_path(RelOptInfo *rel, List *pathkeys)
{
Path *pathnode = makeNode(Path);
pathnode->pathtype = T_SubqueryScan;
pathnode->parent = rel;
pathnode->pathkeys = NIL; /* for now, assume unordered result */
pathnode->pathkeys = pathkeys;
/* just copy the subplan's cost estimates */
pathnode->startup_cost = rel->subplan->startup_cost;

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/util/relnode.c,v 1.47 2003/02/08 20:20:55 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/util/relnode.c,v 1.48 2003/02/15 20:12:40 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -381,11 +381,11 @@ new_join_tlist(List *tlist,
foreach(i, tlist)
{
TargetEntry *xtl = lfirst(i);
TargetEntry *tle = lfirst(i);
resdomno += 1;
t_list = lappend(t_list,
create_tl_element(get_expr(xtl), resdomno));
create_tl_element((Var *) tle->expr, resdomno));
}
return t_list;

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/util/tlist.c,v 1.54 2003/01/20 18:54:57 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/util/tlist.c,v 1.55 2003/02/15 20:12:40 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -204,15 +204,6 @@ add_to_flat_tlist(List *tlist, List *vars)
return tlist;
}
Var *
get_expr(TargetEntry *tle)
{
Assert(tle != NULL);
Assert(tle->expr != NULL);
return (Var *) tle->expr;
}
/*
* get_sortgroupclause_tle
* Find the targetlist entry matching the given SortClause

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: pathnode.h,v 1.49 2003/02/08 20:20:55 tgl Exp $
* $Id: pathnode.h,v 1.50 2003/02/15 20:12:41 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -41,7 +41,7 @@ extern ResultPath *create_result_path(RelOptInfo *rel, Path *subpath,
extern MaterialPath *create_material_path(RelOptInfo *rel, Path *subpath);
extern UniquePath *create_unique_path(Query *root, RelOptInfo *rel,
Path *subpath);
extern Path *create_subqueryscan_path(RelOptInfo *rel);
extern Path *create_subqueryscan_path(RelOptInfo *rel, List *pathkeys);
extern Path *create_functionscan_path(Query *root, RelOptInfo *rel);
extern NestPath *create_nestloop_path(Query *root,

View File

@ -8,7 +8,7 @@
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: paths.h,v 1.65 2003/01/25 23:10:30 tgl Exp $
* $Id: paths.h,v 1.66 2003/02/15 20:12:41 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -104,6 +104,8 @@ extern Path *get_cheapest_fractional_path_for_pathkeys(List *paths,
extern List *build_index_pathkeys(Query *root, RelOptInfo *rel,
IndexOptInfo *index,
ScanDirection scandir);
extern List *build_subquery_pathkeys(Query *root, RelOptInfo *rel,
Query *subquery);
extern List *build_join_pathkeys(Query *root,
RelOptInfo *joinrel,
List *outer_pathkeys);

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: tlist.h,v 1.33 2003/01/20 18:55:06 tgl Exp $
* $Id: tlist.h,v 1.34 2003/02/15 20:12:41 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -26,8 +26,6 @@ extern List *new_unsorted_tlist(List *targetlist);
extern List *flatten_tlist(List *tlist);
extern List *add_to_flat_tlist(List *tlist, List *vars);
extern Var *get_expr(TargetEntry *tle);
extern TargetEntry *get_sortgroupclause_tle(SortClause *sortClause,
List *targetList);
extern Node *get_sortgroupclause_expr(SortClause *sortClause,