Reimplement planner's handling of MIN/MAX aggregate optimization.

Per my recent proposal, get rid of all the direct inspection of indexes
and manual generation of paths in planagg.c.  Instead, set up
EquivalenceClasses for the aggregate argument expressions, and let the
regular path generation logic deal with creating paths that can satisfy
those sort orders.  This makes planagg.c a bit more visible to the rest
of the planner than it was originally, but the approach is basically a lot
cleaner than before.  A major advantage of doing it this way is that we get
MIN/MAX optimization on inheritance trees (using MergeAppend of indexscans)
practically for free, whereas in the old way we'd have had to add a whole
lot more duplicative logic.

One small disadvantage of this approach is that MIN/MAX aggregates can no
longer exploit partial indexes having an "x IS NOT NULL" predicate, unless
that restriction or something that implies it is specified in the query.
The previous implementation was able to use the added "x IS NOT NULL"
condition as an extra predicate proof condition, but in this version we
rely entirely on indexes that are considered usable by the main planning
process.  That seems a fair tradeoff for the simplicity and functionality
gained.
This commit is contained in:
Tom Lane 2010-11-04 12:01:17 -04:00
parent 0abc8fdd4d
commit 034967bdcb
18 changed files with 883 additions and 481 deletions

View File

@ -1837,6 +1837,22 @@ _copyPlaceHolderInfo(PlaceHolderInfo *from)
return newnode;
}
/*
* _copyMinMaxAggInfo
*/
static MinMaxAggInfo *
_copyMinMaxAggInfo(MinMaxAggInfo *from)
{
MinMaxAggInfo *newnode = makeNode(MinMaxAggInfo);
COPY_SCALAR_FIELD(aggfnoid);
COPY_SCALAR_FIELD(aggsortop);
COPY_NODE_FIELD(target);
COPY_NODE_FIELD(pathkeys);
return newnode;
}
/* ****************************************************************
* parsenodes.h copy functions
* ****************************************************************
@ -3921,6 +3937,9 @@ copyObject(void *from)
case T_PlaceHolderInfo:
retval = _copyPlaceHolderInfo(from);
break;
case T_MinMaxAggInfo:
retval = _copyMinMaxAggInfo(from);
break;
/*
* VALUE NODES

View File

@ -844,6 +844,17 @@ _equalPlaceHolderInfo(PlaceHolderInfo *a, PlaceHolderInfo *b)
return true;
}
static bool
_equalMinMaxAggInfo(MinMaxAggInfo *a, MinMaxAggInfo *b)
{
COMPARE_SCALAR_FIELD(aggfnoid);
COMPARE_SCALAR_FIELD(aggsortop);
COMPARE_NODE_FIELD(target);
COMPARE_NODE_FIELD(pathkeys);
return true;
}
/*
* Stuff from parsenodes.h
@ -2568,6 +2579,9 @@ equal(void *a, void *b)
case T_PlaceHolderInfo:
retval = _equalPlaceHolderInfo(a, b);
break;
case T_MinMaxAggInfo:
retval = _equalMinMaxAggInfo(a, b);
break;
case T_List:
case T_IntList:

View File

@ -1608,6 +1608,7 @@ _outPlannerInfo(StringInfo str, PlannerInfo *node)
WRITE_NODE_FIELD(window_pathkeys);
WRITE_NODE_FIELD(distinct_pathkeys);
WRITE_NODE_FIELD(sort_pathkeys);
WRITE_NODE_FIELD(minmax_aggs);
WRITE_FLOAT_FIELD(total_table_pages, "%.0f");
WRITE_FLOAT_FIELD(tuple_fraction, "%.4f");
WRITE_BOOL_FIELD(hasInheritedTarget);
@ -1808,6 +1809,17 @@ _outPlaceHolderInfo(StringInfo str, PlaceHolderInfo *node)
WRITE_INT_FIELD(ph_width);
}
static void
_outMinMaxAggInfo(StringInfo str, MinMaxAggInfo *node)
{
WRITE_NODE_TYPE("MINMAXAGGINFO");
WRITE_OID_FIELD(aggfnoid);
WRITE_OID_FIELD(aggsortop);
WRITE_NODE_FIELD(target);
WRITE_NODE_FIELD(pathkeys);
}
static void
_outPlannerParamItem(StringInfo str, PlannerParamItem *node)
{
@ -2845,6 +2857,9 @@ _outNode(StringInfo str, void *obj)
case T_PlaceHolderInfo:
_outPlaceHolderInfo(str, obj);
break;
case T_MinMaxAggInfo:
_outMinMaxAggInfo(str, obj);
break;
case T_PlannerParamItem:
_outPlannerParamItem(str, obj);
break;

View File

@ -912,6 +912,39 @@ make_pathkeys_for_sortclauses(PlannerInfo *root,
return pathkeys;
}
/****************************************************************************
* PATHKEYS AND AGGREGATES
****************************************************************************/
/*
* make_pathkeys_for_aggregate
* Generate a pathkeys list (always a 1-item list) that represents
* the sort order needed by a MIN/MAX aggregate
*
* This is only called before EquivalenceClass merging, so we can assume
* we are not supposed to canonicalize.
*/
List *
make_pathkeys_for_aggregate(PlannerInfo *root,
Expr *aggtarget,
Oid aggsortop)
{
PathKey *pathkey;
/*
* We arbitrarily set nulls_first to false. Actually, a MIN/MAX agg can
* use either nulls ordering option, but that is dealt with elsewhere.
*/
pathkey = make_pathkey_from_sortinfo(root,
aggtarget,
aggsortop,
false, /* nulls_first */
0,
true,
false);
return list_make1(pathkey);
}
/****************************************************************************
* PATHKEYS AND MERGECLAUSES
****************************************************************************/
@ -1379,10 +1412,11 @@ make_inner_pathkeys_for_merge(PlannerInfo *root,
* PATHKEY USEFULNESS CHECKS
*
* We only want to remember as many of the pathkeys of a path as have some
* potential use, either for subsequent mergejoins or for meeting the query's
* requested output ordering. This ensures that add_path() won't consider
* a path to have a usefully different ordering unless it really is useful.
* These routines check for usefulness of given pathkeys.
* potential use, which can include subsequent mergejoins, meeting the query's
* requested output ordering, or implementing MIN/MAX aggregates. This
* ensures that add_path() won't consider a path to have a usefully different
* ordering unless it really is useful. These routines check for usefulness
* of given pathkeys.
****************************************************************************/
/*
@ -1403,7 +1437,7 @@ make_inner_pathkeys_for_merge(PlannerInfo *root,
* that direction should be preferred, in hopes of avoiding a final sort step.
* right_merge_direction() implements this heuristic.
*/
int
static int
pathkeys_useful_for_merging(PlannerInfo *root, RelOptInfo *rel, List *pathkeys)
{
int useful = 0;
@ -1506,7 +1540,7 @@ right_merge_direction(PlannerInfo *root, PathKey *pathkey)
* no good to order by just the first key(s) of the requested ordering.
* So the result is always either 0 or list_length(root->query_pathkeys).
*/
int
static int
pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
{
if (root->query_pathkeys == NIL)
@ -1524,6 +1558,50 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
return 0; /* path ordering not useful */
}
/*
* pathkeys_useful_for_minmax
* Count the number of pathkeys that are useful for implementing
* some MIN/MAX aggregate.
*
* Like pathkeys_useful_for_ordering, this is a yes-or-no affair, but
* there could be several MIN/MAX aggregates and we can match to any one.
*
* We can't use pathkeys_contained_in() because we would like to match
* pathkeys regardless of the nulls_first setting. However, we know that
* MIN/MAX aggregates will have at most one item in their pathkeys, so it's
* not too complicated to match by brute force.
*/
static int
pathkeys_useful_for_minmax(PlannerInfo *root, List *pathkeys)
{
PathKey *pathkey;
ListCell *lc;
if (pathkeys == NIL)
return 0; /* unordered path */
pathkey = (PathKey *) linitial(pathkeys);
foreach(lc, root->minmax_aggs)
{
MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc);
PathKey *mmpathkey;
/* Ignore minmax agg if its pathkey turned out to be redundant */
if (mminfo->pathkeys == NIL)
continue;
Assert(list_length(mminfo->pathkeys) == 1);
mmpathkey = (PathKey *) linitial(mminfo->pathkeys);
if (mmpathkey->pk_eclass == pathkey->pk_eclass &&
mmpathkey->pk_opfamily == pathkey->pk_opfamily &&
mmpathkey->pk_strategy == pathkey->pk_strategy)
return 1;
}
return 0; /* path ordering not useful */
}
/*
* truncate_useless_pathkeys
* Shorten the given pathkey list to just the useful pathkeys.
@ -1535,11 +1613,15 @@ truncate_useless_pathkeys(PlannerInfo *root,
{
int nuseful;
int nuseful2;
int nuseful3;
nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
if (nuseful2 > nuseful)
nuseful = nuseful2;
nuseful3 = pathkeys_useful_for_minmax(root, pathkeys);
if (nuseful3 > nuseful)
nuseful = nuseful3;
/*
* Note: not safe to modify input list destructively, but we can avoid
@ -1565,8 +1647,8 @@ truncate_useless_pathkeys(PlannerInfo *root,
*
* We could make the test more complex, for example checking to see if any of
* the joinclauses are really mergejoinable, but that likely wouldn't win
* often enough to repay the extra cycles. Queries with neither a join nor
* a sort are reasonably common, though, so this much work seems worthwhile.
* often enough to repay the extra cycles. Queries with no join, sort, or
* aggregate at all are reasonably common, so this much work seems worthwhile.
*/
bool
has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
@ -1575,5 +1657,7 @@ has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
return true; /* might be able to use pathkeys for merging */
if (root->query_pathkeys != NIL)
return true; /* might be able to use them for ordering */
if (root->minmax_aggs != NIL)
return true; /* might be able to use them for MIN/MAX */
return false; /* definitely useless */
}

View File

@ -81,6 +81,7 @@ static Node *replace_nestloop_params(PlannerInfo *root, Node *expr);
static Node *replace_nestloop_params_mutator(Node *node, PlannerInfo *root);
static List *fix_indexqual_references(PlannerInfo *root, IndexPath *index_path,
List *indexquals);
static Node *fix_indexqual_operand(Node *node, IndexOptInfo *index);
static List *get_switched_clauses(List *clauses, Relids outerrelids);
static List *order_qual_clauses(PlannerInfo *root, List *clauses);
static void copy_path_costsize(Plan *dest, Path *src);
@ -2396,10 +2397,8 @@ fix_indexqual_references(PlannerInfo *root, IndexPath *index_path,
/*
* fix_indexqual_operand
* Convert an indexqual expression to a Var referencing the index column.
*
* This is exported because planagg.c needs it.
*/
Node *
static Node *
fix_indexqual_operand(Node *node, IndexOptInfo *index)
{
/*

View File

@ -3,6 +3,17 @@
* planagg.c
* Special planning for aggregate queries.
*
* This module tries to replace MIN/MAX aggregate functions by subqueries
* of the form
* (SELECT col FROM tab WHERE ... ORDER BY col ASC/DESC LIMIT 1)
* Given a suitable index on tab.col, this can be much faster than the
* generic scan-all-the-rows aggregation plan. We can handle multiple
* MIN/MAX aggregates by generating multiple subqueries, and their
* orderings can be different. However, if the query contains any
* non-optimizable aggregates, there's no point since we'll have to
* scan all the rows anyway.
*
*
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
@ -24,71 +35,62 @@
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
#include "optimizer/planmain.h"
#include "optimizer/predtest.h"
#include "optimizer/prep.h"
#include "optimizer/restrictinfo.h"
#include "optimizer/subselect.h"
#include "parser/parse_clause.h"
#include "parser/parsetree.h"
#include "utils/lsyscache.h"
#include "utils/syscache.h"
/* Per-aggregate info during optimize_minmax_aggregates() */
typedef struct
{
Oid aggfnoid; /* pg_proc Oid of the aggregate */
Oid aggsortop; /* Oid of its sort operator */
Expr *target; /* expression we are aggregating on */
NullTest *notnulltest; /* expression for "target IS NOT NULL" */
IndexPath *path; /* access path for index scan */
MinMaxAggInfo *mminfo; /* info gathered by preprocessing */
Path *path; /* access path for ordered scan */
Cost pathcost; /* estimated cost to fetch first row */
bool nulls_first; /* null ordering direction matching index */
Param *param; /* param for subplan's output */
} MinMaxAggInfo;
} PrivateMMAggInfo;
static bool find_minmax_aggs_walker(Node *node, List **context);
static bool build_minmax_path(PlannerInfo *root, RelOptInfo *rel,
MinMaxAggInfo *info);
static ScanDirection match_agg_to_index_col(MinMaxAggInfo *info,
IndexOptInfo *index, int indexcol);
static void make_agg_subplan(PlannerInfo *root, MinMaxAggInfo *info);
static void attach_notnull_index_qual(MinMaxAggInfo *info, IndexScan *iplan);
static PrivateMMAggInfo *find_minmax_path(PlannerInfo *root, RelOptInfo *rel,
MinMaxAggInfo *mminfo);
static bool path_usable_for_agg(Path *path);
static void make_agg_subplan(PlannerInfo *root, RelOptInfo *rel,
PrivateMMAggInfo *info);
static void add_notnull_qual(PlannerInfo *root, RelOptInfo *rel,
PrivateMMAggInfo *info, Path *path);
static Node *replace_aggs_with_params_mutator(Node *node, List **context);
static Oid fetch_agg_sort_op(Oid aggfnoid);
/*
* optimize_minmax_aggregates - check for optimizing MIN/MAX via indexes
* preprocess_minmax_aggregates - preprocess MIN/MAX aggregates
*
* This checks to see if we can replace MIN/MAX aggregate functions by
* subqueries of the form
* (SELECT col FROM tab WHERE ... ORDER BY col ASC/DESC LIMIT 1)
* Given a suitable index on tab.col, this can be much faster than the
* generic scan-all-the-rows plan.
* Check to see whether the query contains MIN/MAX aggregate functions that
* might be optimizable via indexscans. If it does, and all the aggregates
* are potentially optimizable, then set up root->minmax_aggs with a list of
* these aggregates.
*
* We are passed the preprocessed tlist, and the best path
* devised for computing the input of a standard Agg node. If we are able
* to optimize all the aggregates, and the result is estimated to be cheaper
* than the generic aggregate method, then generate and return a Plan that
* does it that way. Otherwise, return NULL.
* Note: we are passed the preprocessed targetlist separately, because it's
* not necessarily equal to root->parse->targetList.
*/
Plan *
optimize_minmax_aggregates(PlannerInfo *root, List *tlist, Path *best_path)
void
preprocess_minmax_aggregates(PlannerInfo *root, List *tlist)
{
Query *parse = root->parse;
FromExpr *jtnode;
RangeTblRef *rtr;
RangeTblEntry *rte;
RelOptInfo *rel;
List *aggs_list;
ListCell *l;
Cost total_cost;
Path agg_p;
Plan *plan;
Node *hqual;
QualCost tlist_cost;
ListCell *lc;
/* minmax_aggs list should be empty at this point */
Assert(root->minmax_aggs == NIL);
/* Nothing to do if query has no aggregates */
if (!parse->hasAggs)
return NULL;
return;
Assert(!parse->setOperations); /* shouldn't get here if a setop */
Assert(parse->rowMarks == NIL); /* nor if FOR UPDATE */
@ -101,63 +103,126 @@ optimize_minmax_aggregates(PlannerInfo *root, List *tlist, Path *best_path)
* so there's not much point in optimizing MIN/MAX.
*/
if (parse->groupClause || parse->hasWindowFuncs)
return NULL;
return;
/*
* We also restrict the query to reference exactly one table, since join
* conditions can't be handled reasonably. (We could perhaps handle a
* query containing cartesian-product joins, but it hardly seems worth the
* trouble.) However, the single real table could be buried in several
* levels of FromExpr.
* levels of FromExpr due to subqueries. Note the single table could be
* an inheritance parent, too.
*/
jtnode = parse->jointree;
while (IsA(jtnode, FromExpr))
{
if (list_length(jtnode->fromlist) != 1)
return NULL;
return;
jtnode = linitial(jtnode->fromlist);
}
if (!IsA(jtnode, RangeTblRef))
return NULL;
return;
rtr = (RangeTblRef *) jtnode;
rte = planner_rt_fetch(rtr->rtindex, root);
if (rte->rtekind != RTE_RELATION || rte->inh)
if (rte->rtekind != RTE_RELATION)
return;
/*
* Scan the tlist and HAVING qual to find all the aggregates and verify
* all are MIN/MAX aggregates. Stop as soon as we find one that isn't.
*/
aggs_list = NIL;
if (find_minmax_aggs_walker((Node *) tlist, &aggs_list))
return;
if (find_minmax_aggs_walker(parse->havingQual, &aggs_list))
return;
/*
* OK, there is at least the possibility of performing the optimization.
* Build pathkeys (and thereby EquivalenceClasses) for each aggregate.
* The existence of the EquivalenceClasses will prompt the path generation
* logic to try to build paths matching the desired sort ordering(s).
*
* Note: the pathkeys are non-canonical at this point. They'll be fixed
* later by canonicalize_all_pathkeys().
*/
foreach(lc, aggs_list)
{
MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc);
mminfo->pathkeys = make_pathkeys_for_aggregate(root,
mminfo->target,
mminfo->aggsortop);
}
/*
* We're done until path generation is complete. Save info for later.
*/
root->minmax_aggs = aggs_list;
}
/*
* optimize_minmax_aggregates - check for optimizing MIN/MAX via indexes
*
* Check to see whether all the aggregates are in fact optimizable into
* indexscans. If so, and the result is estimated to be cheaper than the
* generic aggregate method, then generate and return a Plan that does it
* that way. Otherwise, return NULL.
*
* We are passed the preprocessed tlist, as well as the best path devised for
* computing the input of a standard Agg node.
*/
Plan *
optimize_minmax_aggregates(PlannerInfo *root, List *tlist, Path *best_path)
{
Query *parse = root->parse;
FromExpr *jtnode;
RangeTblRef *rtr;
RelOptInfo *rel;
List *aggs_list;
ListCell *lc;
Cost total_cost;
Path agg_p;
Plan *plan;
Node *hqual;
QualCost tlist_cost;
/* Nothing to do if preprocess_minmax_aggs rejected the query */
if (root->minmax_aggs == NIL)
return NULL;
/* Re-locate the one real table identified by preprocess_minmax_aggs */
jtnode = parse->jointree;
while (IsA(jtnode, FromExpr))
{
Assert(list_length(jtnode->fromlist) == 1);
jtnode = linitial(jtnode->fromlist);
}
Assert(IsA(jtnode, RangeTblRef));
rtr = (RangeTblRef *) jtnode;
rel = find_base_rel(root, rtr->rtindex);
/*
* Since this optimization is not applicable all that often, we want to
* fall out before doing very much work if possible. Therefore we do the
* work in several passes. The first pass scans the tlist and HAVING qual
* to find all the aggregates and verify that each of them is a MIN/MAX
* aggregate. If that succeeds, the second pass looks at each aggregate
* to see if it is optimizable; if so we make an IndexPath describing how
* we would scan it. (We do not try to optimize if only some aggs are
* optimizable, since that means we'll have to scan all the rows anyway.)
* If that succeeds, we have enough info to compare costs against the
* generic implementation. Only if that test passes do we build a Plan.
* Examine each agg to see if we can find a suitable ordered path for it.
* Give up if any agg isn't indexable.
*/
/* Pass 1: find all the aggregates */
aggs_list = NIL;
if (find_minmax_aggs_walker((Node *) tlist, &aggs_list))
return NULL;
if (find_minmax_aggs_walker(parse->havingQual, &aggs_list))
return NULL;
/* Pass 2: see if each one is optimizable */
total_cost = 0;
foreach(l, aggs_list)
foreach(lc, root->minmax_aggs)
{
MinMaxAggInfo *info = (MinMaxAggInfo *) lfirst(l);
MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc);
PrivateMMAggInfo *info;
if (!build_minmax_path(root, rel, info))
info = find_minmax_path(root, rel, mminfo);
if (!info)
return NULL;
aggs_list = lappend(aggs_list, info);
total_cost += info->pathcost;
}
/*
* Make the cost comparison.
* Now we have enough info to compare costs against the generic aggregate
* implementation.
*
* Note that we don't include evaluation cost of the tlist here; this is
* OK since it isn't included in best_path's cost either, and should be
@ -173,12 +238,12 @@ optimize_minmax_aggregates(PlannerInfo *root, List *tlist, Path *best_path)
/*
* OK, we are going to generate an optimized plan.
*
* First, generate a subplan and output Param node for each agg.
*/
/* Pass 3: generate subplans and output Param nodes */
foreach(l, aggs_list)
foreach(lc, aggs_list)
{
make_agg_subplan(root, (MinMaxAggInfo *) lfirst(l));
make_agg_subplan(root, rel, (PrivateMMAggInfo *) lfirst(lc));
}
/*
@ -241,36 +306,43 @@ find_minmax_aggs_walker(Node *node, List **context)
Aggref *aggref = (Aggref *) node;
Oid aggsortop;
TargetEntry *curTarget;
MinMaxAggInfo *info;
MinMaxAggInfo *mminfo;
ListCell *l;
Assert(aggref->agglevelsup == 0);
if (list_length(aggref->args) != 1 || aggref->aggorder != NIL)
return true; /* it couldn't be MIN/MAX */
/* note: we do not care if DISTINCT is mentioned ... */
curTarget = (TargetEntry *) linitial(aggref->args);
aggsortop = fetch_agg_sort_op(aggref->aggfnoid);
if (!OidIsValid(aggsortop))
return true; /* not a MIN/MAX aggregate */
if (contain_mutable_functions((Node *) curTarget->expr))
return true; /* not potentially indexable */
if (type_is_rowtype(exprType((Node *) curTarget->expr)))
return true; /* IS NOT NULL would have weird semantics */
/*
* Check whether it's already in the list, and add it if not.
*/
curTarget = (TargetEntry *) linitial(aggref->args);
foreach(l, *context)
{
info = (MinMaxAggInfo *) lfirst(l);
if (info->aggfnoid == aggref->aggfnoid &&
equal(info->target, curTarget->expr))
mminfo = (MinMaxAggInfo *) lfirst(l);
if (mminfo->aggfnoid == aggref->aggfnoid &&
equal(mminfo->target, curTarget->expr))
return false;
}
info = (MinMaxAggInfo *) palloc0(sizeof(MinMaxAggInfo));
info->aggfnoid = aggref->aggfnoid;
info->aggsortop = aggsortop;
info->target = curTarget->expr;
mminfo = makeNode(MinMaxAggInfo);
mminfo->aggfnoid = aggref->aggfnoid;
mminfo->aggsortop = aggsortop;
mminfo->target = curTarget->expr;
mminfo->pathkeys = NIL; /* don't compute pathkeys yet */
*context = lappend(*context, info);
*context = lappend(*context, mminfo);
/*
* We need not recurse into the argument, since it can't contain any
@ -284,204 +356,151 @@ find_minmax_aggs_walker(Node *node, List **context)
}
/*
* build_minmax_path
* Given a MIN/MAX aggregate, try to find an index it can be optimized
* with. Build a Path describing the best such index path.
* find_minmax_path
* Given a MIN/MAX aggregate, try to find an ordered Path it can be
* optimized with.
*
* Returns TRUE if successful, FALSE if not. In the TRUE case, info->path
* is filled in.
*
* XXX look at sharing more code with indxpath.c.
*
* Note: check_partial_indexes() must have been run previously.
* If successful, build and return a PrivateMMAggInfo struct. Otherwise,
* return NULL.
*/
static bool
build_minmax_path(PlannerInfo *root, RelOptInfo *rel, MinMaxAggInfo *info)
static PrivateMMAggInfo *
find_minmax_path(PlannerInfo *root, RelOptInfo *rel, MinMaxAggInfo *mminfo)
{
IndexPath *best_path = NULL;
PrivateMMAggInfo *info;
Path *best_path = NULL;
Cost best_cost = 0;
bool best_nulls_first = false;
NullTest *ntest;
List *allquals;
ListCell *l;
/* Build "target IS NOT NULL" expression for use below */
ntest = makeNode(NullTest);
ntest->nulltesttype = IS_NOT_NULL;
ntest->arg = copyObject(info->target);
ntest->argisrow = type_is_rowtype(exprType((Node *) ntest->arg));
if (ntest->argisrow)
return false; /* punt on composites */
info->notnulltest = ntest;
double path_fraction;
PathKey *mmpathkey;
ListCell *lc;
/*
* Build list of existing restriction clauses plus the notnull test. We
* cheat a bit by not bothering with a RestrictInfo node for the notnull
* test --- predicate_implied_by() won't care.
* Punt if the aggregate's pathkey turned out to be redundant, ie its
* pathkeys list is now empty. This would happen with something like
* "SELECT max(x) ... WHERE x = constant". There's no need to try to
* optimize such a case, because if there is an index that would help,
* it should already have been used with the WHERE clause.
*/
allquals = list_concat(list_make1(ntest), rel->baserestrictinfo);
if (mminfo->pathkeys == NIL)
return NULL;
foreach(l, rel->indexlist)
/*
* Search the paths that were generated for the rel to see if there are
* any with the desired ordering. There could be multiple such paths,
* in which case take the cheapest (as measured according to how fast it
* will be to fetch the first row).
*
* We can't use pathkeys_contained_in() to check the ordering, because we
* would like to match pathkeys regardless of the nulls_first setting.
* However, we know that MIN/MAX aggregates will have at most one item in
* their pathkeys, so it's not too complicated to match by brute force.
*
* Note: this test ignores the possible costs associated with skipping
* NULL tuples. We assume that adding the not-null criterion to the
* indexqual doesn't really cost anything.
*/
if (rel->rows > 1.0)
path_fraction = 1.0 / rel->rows;
else
path_fraction = 1.0;
Assert(list_length(mminfo->pathkeys) == 1);
mmpathkey = (PathKey *) linitial(mminfo->pathkeys);
foreach(lc, rel->pathlist)
{
IndexOptInfo *index = (IndexOptInfo *) lfirst(l);
ScanDirection indexscandir = NoMovementScanDirection;
int indexcol;
int prevcol;
List *restrictclauses;
IndexPath *new_path;
Cost new_cost;
bool found_clause;
Path *path = (Path *) lfirst(lc);
PathKey *pathkey;
Cost path_cost;
/* Ignore non-btree indexes */
if (index->relam != BTREE_AM_OID)
continue;
if (path->pathkeys == NIL)
continue; /* unordered path */
pathkey = (PathKey *) linitial(path->pathkeys);
/*
* Ignore partial indexes that do not match the query --- unless their
* predicates can be proven from the baserestrict list plus the IS NOT
* NULL test. In that case we can use them.
*/
if (index->indpred != NIL && !index->predOK &&
!predicate_implied_by(index->indpred, allquals))
continue;
/*
* Look for a match to one of the index columns. (In a stupidly
* designed index, there could be multiple matches, but we only care
* about the first one.)
*/
for (indexcol = 0; indexcol < index->ncolumns; indexcol++)
if (mmpathkey->pk_eclass == pathkey->pk_eclass &&
mmpathkey->pk_opfamily == pathkey->pk_opfamily &&
mmpathkey->pk_strategy == pathkey->pk_strategy)
{
indexscandir = match_agg_to_index_col(info, index, indexcol);
if (!ScanDirectionIsNoMovement(indexscandir))
break;
}
if (ScanDirectionIsNoMovement(indexscandir))
continue;
/*
* If the match is not at the first index column, we have to verify
* that there are "x = something" restrictions on all the earlier
* index columns. Since we'll need the restrictclauses list anyway to
* build the path, it's convenient to extract that first and then look
* through it for the equality restrictions.
*/
restrictclauses = group_clauses_by_indexkey(index,
index->rel->baserestrictinfo,
NIL,
NULL,
SAOP_FORBID,
&found_clause);
if (list_length(restrictclauses) < indexcol)
continue; /* definitely haven't got enough */
for (prevcol = 0; prevcol < indexcol; prevcol++)
{
List *rinfos = (List *) list_nth(restrictclauses, prevcol);
ListCell *ll;
foreach(ll, rinfos)
/*
* OK, it has the right ordering; is it acceptable otherwise?
* (We test in this order because the pathkey check is cheap.)
*/
if (path_usable_for_agg(path))
{
RestrictInfo *rinfo = (RestrictInfo *) lfirst(ll);
int strategy;
/*
* It'll work; but is it the cheapest?
*
* Note: cost calculation here should match
* compare_fractional_path_costs().
*/
path_cost = path->startup_cost +
path_fraction * (path->total_cost - path->startup_cost);
/* Could be an IS_NULL test, if so ignore */
if (!is_opclause(rinfo->clause))
continue;
strategy =
get_op_opfamily_strategy(((OpExpr *) rinfo->clause)->opno,
index->opfamily[prevcol]);
if (strategy == BTEqualStrategyNumber)
break;
if (best_path == NULL || path_cost < best_cost)
{
best_path = path;
best_cost = path_cost;
}
}
if (ll == NULL)
break; /* none are Equal for this index col */
}
if (prevcol < indexcol)
continue; /* didn't find all Equal clauses */
/*
* Build the access path. We don't bother marking it with pathkeys.
*/
new_path = create_index_path(root, index,
restrictclauses,
NIL,
indexscandir,
NULL);
/*
* Estimate actual cost of fetching just one row.
*/
if (new_path->rows > 1.0)
new_cost = new_path->path.startup_cost +
(new_path->path.total_cost - new_path->path.startup_cost)
* 1.0 / new_path->rows;
else
new_cost = new_path->path.total_cost;
/*
* Keep if first or if cheaper than previous best.
*/
if (best_path == NULL || new_cost < best_cost)
{
best_path = new_path;
best_cost = new_cost;
if (ScanDirectionIsForward(indexscandir))
best_nulls_first = index->nulls_first[indexcol];
else
best_nulls_first = !index->nulls_first[indexcol];
}
}
/* Fail if no suitable path */
if (best_path == NULL)
return NULL;
/* Construct private state for further processing */
info = (PrivateMMAggInfo *) palloc(sizeof(PrivateMMAggInfo));
info->mminfo = mminfo;
info->path = best_path;
info->pathcost = best_cost;
info->nulls_first = best_nulls_first;
return (best_path != NULL);
info->param = NULL; /* will be set later */
return info;
}
/*
* match_agg_to_index_col
* Does an aggregate match an index column?
*
* It matches if its argument is equal to the index column's data and its
* sortop is either the forward or reverse sort operator for the column.
*
* We return ForwardScanDirection if match the forward sort operator,
* BackwardScanDirection if match the reverse sort operator,
* and NoMovementScanDirection if there's no match.
* To be usable, a Path needs to be an IndexPath on a btree index, or be a
* MergeAppendPath of such IndexPaths. This restriction is mainly because
* we need to be sure the index can handle an added NOT NULL constraint at
* minimal additional cost. If you wish to relax it, you'll need to improve
* add_notnull_qual() too.
*/
static ScanDirection
match_agg_to_index_col(MinMaxAggInfo *info, IndexOptInfo *index, int indexcol)
static bool
path_usable_for_agg(Path *path)
{
ScanDirection result;
if (IsA(path, IndexPath))
{
IndexPath *ipath = (IndexPath *) path;
/* Check for operator match first (cheaper) */
if (info->aggsortop == index->fwdsortop[indexcol])
result = ForwardScanDirection;
else if (info->aggsortop == index->revsortop[indexcol])
result = BackwardScanDirection;
else
return NoMovementScanDirection;
/* OK if it's a btree index */
if (ipath->indexinfo->relam == BTREE_AM_OID)
return true;
}
else if (IsA(path, MergeAppendPath))
{
MergeAppendPath *mpath = (MergeAppendPath *) path;
ListCell *lc;
/* Check for data match */
if (!match_index_to_operand((Node *) info->target, indexcol, index))
return NoMovementScanDirection;
return result;
foreach(lc, mpath->subpaths)
{
if (!path_usable_for_agg((Path *) lfirst(lc)))
return false;
}
return true;
}
return false;
}
/*
* Construct a suitable plan for a converted aggregate query
*/
static void
make_agg_subplan(PlannerInfo *root, MinMaxAggInfo *info)
make_agg_subplan(PlannerInfo *root, RelOptInfo *rel, PrivateMMAggInfo *info)
{
PlannerInfo subroot;
Query *subparse;
Plan *plan;
IndexScan *iplan;
TargetEntry *tle;
SortGroupClause *sortcl;
/*
* Generate a suitably modified query. Much of the work here is probably
@ -500,58 +519,37 @@ make_agg_subplan(PlannerInfo *root, MinMaxAggInfo *info)
subparse->groupClause = NIL;
subparse->havingQual = NULL;
subparse->distinctClause = NIL;
subparse->sortClause = NIL;
subroot.hasHavingQual = false;
/* single tlist entry that is the aggregate target */
tle = makeTargetEntry(copyObject(info->target),
tle = makeTargetEntry(copyObject(info->mminfo->target),
1,
pstrdup("agg_target"),
false);
subparse->targetList = list_make1(tle);
/* set up the appropriate ORDER BY entry */
sortcl = makeNode(SortGroupClause);
sortcl->tleSortGroupRef = assignSortGroupRef(tle, subparse->targetList);
sortcl->eqop = get_equality_op_for_ordering_op(info->aggsortop, NULL);
if (!OidIsValid(sortcl->eqop)) /* shouldn't happen */
elog(ERROR, "could not find equality operator for ordering operator %u",
info->aggsortop);
sortcl->sortop = info->aggsortop;
sortcl->nulls_first = info->nulls_first;
sortcl->hashable = false; /* no need to make this accurate */
subparse->sortClause = list_make1(sortcl);
/* set up LIMIT 1 */
/* set up expressions for LIMIT 1 */
subparse->limitOffset = NULL;
subparse->limitCount = (Node *) makeConst(INT8OID, -1, sizeof(int64),
Int64GetDatum(1), false,
FLOAT8PASSBYVAL);
/*
* Generate the plan for the subquery. We already have a Path for the
* basic indexscan, but we have to convert it to a Plan and attach a LIMIT
* node above it.
*
* Also we must add a "WHERE target IS NOT NULL" restriction to the
* indexscan, to be sure we don't return a NULL, which'd be contrary to
* the standard behavior of MIN/MAX.
*
* The NOT NULL qual has to go on the actual indexscan; create_plan might
* have stuck a gating Result atop that, if there were any pseudoconstant
* quals.
* Modify the ordered Path to add an indexed "target IS NOT NULL"
* condition to each scan. We need this to ensure we don't return a NULL,
* which'd be contrary to the standard behavior of MIN/MAX. We insist on
* it being indexed, else the Path might not be as cheap as we thought.
*/
plan = create_plan(&subroot, (Path *) info->path);
add_notnull_qual(root, rel, info, info->path);
plan->targetlist = copyObject(subparse->targetList);
/*
* Generate the plan for the subquery. We already have a Path, but we have
* to convert it to a Plan and attach a LIMIT node above it.
*/
plan = create_plan(&subroot, info->path);
if (IsA(plan, Result))
iplan = (IndexScan *) plan->lefttree;
else
iplan = (IndexScan *) plan;
if (!IsA(iplan, IndexScan))
elog(ERROR, "result of create_plan(IndexPath) isn't an IndexScan");
attach_notnull_index_qual(info, iplan);
plan->targetlist = subparse->targetList;
plan = (Plan *) make_limit(plan,
subparse->limitOffset,
@ -572,166 +570,118 @@ make_agg_subplan(PlannerInfo *root, MinMaxAggInfo *info)
}
/*
* Add "target IS NOT NULL" to the quals of the given indexscan.
*
* This is trickier than it sounds because the new qual has to be added at an
* appropriate place in the qual list, to preserve the list's ordering by
* index column position.
* Attach a suitable NOT NULL qual to the IndexPath, or each of the member
* IndexPaths. Note we assume we can modify the paths in-place.
*/
static void
attach_notnull_index_qual(MinMaxAggInfo *info, IndexScan *iplan)
add_notnull_qual(PlannerInfo *root, RelOptInfo *rel, PrivateMMAggInfo *info,
Path *path)
{
NullTest *ntest;
List *newindexqual;
List *newindexqualorig;
bool done;
ListCell *lc1;
ListCell *lc2;
Expr *leftop;
AttrNumber targetattno;
/*
* We can skip adding the NOT NULL qual if it duplicates either an
* already-given WHERE condition, or a clause of the index predicate.
*/
if (list_member(iplan->indexqualorig, info->notnulltest) ||
list_member(info->path->indexinfo->indpred, info->notnulltest))
return;
/* Need a "fixed" copy as well as the original */
ntest = copyObject(info->notnulltest);
ntest->arg = (Expr *) fix_indexqual_operand((Node *) ntest->arg,
info->path->indexinfo);
/* Identify the target index column from the "fixed" copy */
leftop = ntest->arg;
if (leftop && IsA(leftop, RelabelType))
leftop = ((RelabelType *) leftop)->arg;
Assert(leftop != NULL);
if (!IsA(leftop, Var))
elog(ERROR, "NullTest indexqual has wrong key");
targetattno = ((Var *) leftop)->varattno;
/*
* list.c doesn't expose a primitive to insert a list cell at an arbitrary
* position, so our strategy is to copy the lists and insert the null test
* when we reach an appropriate spot.
*/
newindexqual = newindexqualorig = NIL;
done = false;
forboth(lc1, iplan->indexqual, lc2, iplan->indexqualorig)
if (IsA(path, IndexPath))
{
Expr *qual = (Expr *) lfirst(lc1);
Expr *qualorig = (Expr *) lfirst(lc2);
AttrNumber varattno;
IndexPath *ipath = (IndexPath *) path;
Expr *target;
NullTest *ntest;
RestrictInfo *rinfo;
List *newquals;
bool found_clause;
/*
* Identify which index column this qual is for. This code should
* match the qual disassembly code in ExecIndexBuildScanKeys.
* If we are looking at a child of the original rel, we have to adjust
* the agg target expression to match the child.
*/
if (IsA(qual, OpExpr))
if (ipath->path.parent != rel)
{
/* indexkey op expression */
leftop = (Expr *) get_leftop(qual);
AppendRelInfo *appinfo = NULL;
ListCell *lc;
if (leftop && IsA(leftop, RelabelType))
leftop = ((RelabelType *) leftop)->arg;
Assert(leftop != NULL);
if (!IsA(leftop, Var))
elog(ERROR, "indexqual doesn't have key on left side");
varattno = ((Var *) leftop)->varattno;
}
else if (IsA(qual, RowCompareExpr))
{
/* (indexkey, indexkey, ...) op (expression, expression, ...) */
RowCompareExpr *rc = (RowCompareExpr *) qual;
/*
* Examine just the first column of the rowcompare, which is what
* determines its placement in the overall qual list.
*/
leftop = (Expr *) linitial(rc->largs);
if (leftop && IsA(leftop, RelabelType))
leftop = ((RelabelType *) leftop)->arg;
Assert(leftop != NULL);
if (!IsA(leftop, Var))
elog(ERROR, "indexqual doesn't have key on left side");
varattno = ((Var *) leftop)->varattno;
}
else if (IsA(qual, ScalarArrayOpExpr))
{
/* indexkey op ANY (array-expression) */
ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) qual;
leftop = (Expr *) linitial(saop->args);
if (leftop && IsA(leftop, RelabelType))
leftop = ((RelabelType *) leftop)->arg;
Assert(leftop != NULL);
if (!IsA(leftop, Var))
elog(ERROR, "indexqual doesn't have key on left side");
varattno = ((Var *) leftop)->varattno;
}
else if (IsA(qual, NullTest))
{
/* indexkey IS NULL or indexkey IS NOT NULL */
NullTest *ntest = (NullTest *) qual;
leftop = ntest->arg;
if (leftop && IsA(leftop, RelabelType))
leftop = ((RelabelType *) leftop)->arg;
Assert(leftop != NULL);
if (!IsA(leftop, Var))
elog(ERROR, "NullTest indexqual has wrong key");
varattno = ((Var *) leftop)->varattno;
/* Search for the appropriate AppendRelInfo */
foreach(lc, root->append_rel_list)
{
appinfo = (AppendRelInfo *) lfirst(lc);
if (appinfo->parent_relid == rel->relid &&
appinfo->child_relid == ipath->path.parent->relid)
break;
appinfo = NULL;
}
if (!appinfo)
elog(ERROR, "failed to find AppendRelInfo for child rel");
target = (Expr *)
adjust_appendrel_attrs((Node *) info->mminfo->target,
appinfo);
}
else
{
elog(ERROR, "unsupported indexqual type: %d",
(int) nodeTag(qual));
varattno = 0; /* keep compiler quiet */
/* Otherwise, just make a copy (may not be necessary) */
target = copyObject(info->mminfo->target);
}
/* Insert the null test at the first place it can legally go */
if (!done && targetattno <= varattno)
{
newindexqual = lappend(newindexqual, ntest);
newindexqualorig = lappend(newindexqualorig, info->notnulltest);
done = true;
}
/* Build "target IS NOT NULL" expression */
ntest = makeNode(NullTest);
ntest->nulltesttype = IS_NOT_NULL;
ntest->arg = target;
/* we checked it wasn't a rowtype in find_minmax_aggs_walker */
ntest->argisrow = false;
newindexqual = lappend(newindexqual, qual);
newindexqualorig = lappend(newindexqualorig, qualorig);
/*
* We can skip adding the NOT NULL qual if it duplicates either an
* already-given index condition, or a clause of the index predicate.
*/
if (list_member(get_actual_clauses(ipath->indexquals), ntest) ||
list_member(ipath->indexinfo->indpred, ntest))
return;
/* Wrap it in a RestrictInfo and prepend to existing indexquals */
rinfo = make_restrictinfo((Expr *) ntest,
true,
false,
false,
NULL,
NULL);
newquals = list_concat(list_make1(rinfo), ipath->indexquals);
/*
* We can't just stick the IS NOT NULL at the front of the list,
* though. It has to go in the right position corresponding to its
* index column, which might not be the first one. Easiest way to fix
* this is to run the quals through group_clauses_by_indexkey again.
*/
newquals = group_clauses_by_indexkey(ipath->indexinfo,
newquals,
NIL,
NULL,
SAOP_FORBID,
&found_clause);
newquals = flatten_clausegroups_list(newquals);
/* Trouble if we lost any quals */
if (list_length(newquals) != list_length(ipath->indexquals) + 1)
elog(ERROR, "add_notnull_qual failed to add NOT NULL qual");
/*
* And update the path's indexquals. Note we don't bother adding
* to indexclauses, which is OK since this is like a generated
* index qual.
*/
ipath->indexquals = newquals;
}
/* Add the null test at the end if it must follow all existing quals */
if (!done)
else if (IsA(path, MergeAppendPath))
{
newindexqual = lappend(newindexqual, ntest);
newindexqualorig = lappend(newindexqualorig, info->notnulltest);
}
MergeAppendPath *mpath = (MergeAppendPath *) path;
ListCell *lc;
iplan->indexqual = newindexqual;
iplan->indexqualorig = newindexqualorig;
foreach(lc, mpath->subpaths)
{
add_notnull_qual(root, rel, info, (Path *) lfirst(lc));
}
}
else
{
/* shouldn't get here, because of path_usable_for_agg checks */
elog(ERROR, "add_notnull_qual failed");
}
}
/*
@ -750,13 +700,13 @@ replace_aggs_with_params_mutator(Node *node, List **context)
foreach(l, *context)
{
MinMaxAggInfo *info = (MinMaxAggInfo *) lfirst(l);
PrivateMMAggInfo *info = (PrivateMMAggInfo *) lfirst(l);
if (info->aggfnoid == aggref->aggfnoid &&
equal(info->target, curTarget->expr))
if (info->mminfo->aggfnoid == aggref->aggfnoid &&
equal(info->mminfo->target, curTarget->expr))
return (Node *) info->param;
}
elog(ERROR, "failed to re-find aggregate info record");
elog(ERROR, "failed to re-find PrivateMMAggInfo record");
}
Assert(!IsA(node, SubLink));
return expression_tree_mutator(node, replace_aggs_with_params_mutator,

View File

@ -30,6 +30,10 @@
#include "utils/selfuncs.h"
/* Local functions */
static void canonicalize_all_pathkeys(PlannerInfo *root);
/*
* query_planner
* Generate a path (that is, a simplified plan) for a basic query,
@ -68,9 +72,9 @@
* PlannerInfo field and not a passed parameter is that the low-level routines
* in indxpath.c need to see it.)
*
* Note: the PlannerInfo node also includes group_pathkeys, window_pathkeys,
* distinct_pathkeys, and sort_pathkeys, which like query_pathkeys need to be
* canonicalized once the info is available.
* Note: the PlannerInfo node includes other pathkeys fields besides
* query_pathkeys, all of which need to be canonicalized once the info is
* available. See canonicalize_all_pathkeys.
*
* tuple_fraction is interpreted as follows:
* 0: expect all tuples to be retrieved (normal case)
@ -118,16 +122,7 @@ query_planner(PlannerInfo *root, List *tlist,
* something like "SELECT 2+2 ORDER BY 1".
*/
root->canon_pathkeys = NIL;
root->query_pathkeys = canonicalize_pathkeys(root,
root->query_pathkeys);
root->group_pathkeys = canonicalize_pathkeys(root,
root->group_pathkeys);
root->window_pathkeys = canonicalize_pathkeys(root,
root->window_pathkeys);
root->distinct_pathkeys = canonicalize_pathkeys(root,
root->distinct_pathkeys);
root->sort_pathkeys = canonicalize_pathkeys(root,
root->sort_pathkeys);
canonicalize_all_pathkeys(root);
return;
}
@ -136,7 +131,7 @@ query_planner(PlannerInfo *root, List *tlist,
* for "simple" rels.
*
* NOTE: append_rel_list was set up by subquery_planner, so do not touch
* here; eq_classes may contain data already, too.
* here; eq_classes and minmax_aggs may contain data already, too.
*/
root->simple_rel_array_size = list_length(parse->rtable) + 1;
root->simple_rel_array = (RelOptInfo **)
@ -212,15 +207,10 @@ query_planner(PlannerInfo *root, List *tlist,
/*
* We have completed merging equivalence sets, so it's now possible to
* convert the requested query_pathkeys to canonical form. Also
* canonicalize the groupClause, windowClause, distinctClause and
* sortClause pathkeys for use later.
* convert previously generated pathkeys (in particular, the requested
* query_pathkeys) to canonical form.
*/
root->query_pathkeys = canonicalize_pathkeys(root, root->query_pathkeys);
root->group_pathkeys = canonicalize_pathkeys(root, root->group_pathkeys);
root->window_pathkeys = canonicalize_pathkeys(root, root->window_pathkeys);
root->distinct_pathkeys = canonicalize_pathkeys(root, root->distinct_pathkeys);
root->sort_pathkeys = canonicalize_pathkeys(root, root->sort_pathkeys);
canonicalize_all_pathkeys(root);
/*
* Examine any "placeholder" expressions generated during subquery pullup.
@ -430,3 +420,28 @@ query_planner(PlannerInfo *root, List *tlist,
*cheapest_path = cheapestpath;
*sorted_path = sortedpath;
}
/*
* canonicalize_all_pathkeys
* Canonicalize all pathkeys that were generated before entering
* query_planner and then stashed in PlannerInfo.
*/
static void
canonicalize_all_pathkeys(PlannerInfo *root)
{
ListCell *lc;
root->query_pathkeys = canonicalize_pathkeys(root, root->query_pathkeys);
root->group_pathkeys = canonicalize_pathkeys(root, root->group_pathkeys);
root->window_pathkeys = canonicalize_pathkeys(root, root->window_pathkeys);
root->distinct_pathkeys = canonicalize_pathkeys(root, root->distinct_pathkeys);
root->sort_pathkeys = canonicalize_pathkeys(root, root->sort_pathkeys);
foreach(lc, root->minmax_aggs)
{
MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc);
mminfo->pathkeys = canonicalize_pathkeys(root, mminfo->pathkeys);
}
}

View File

@ -1010,6 +1010,30 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
sub_tlist = make_subplanTargetList(root, tlist,
&groupColIdx, &need_tlist_eval);
/*
* Do aggregate preprocessing, if the query has any aggs.
*
* Note: think not that we can turn off hasAggs if we find no aggs. It
* is possible for constant-expression simplification to remove all
* explicit references to aggs, but we still have to follow the
* aggregate semantics (eg, producing only one output row).
*/
if (parse->hasAggs)
{
/*
* Will need actual number of aggregates for estimating costs.
* Note: we do not attempt to detect duplicate aggregates here; a
* somewhat-overestimated count is okay for our present purposes.
*/
count_agg_clauses((Node *) tlist, &agg_counts);
count_agg_clauses(parse->havingQual, &agg_counts);
/*
* Preprocess MIN/MAX aggregates, if any.
*/
preprocess_minmax_aggregates(root, tlist);
}
/*
* Calculate pathkeys that represent grouping/ordering requirements.
* Stash them in PlannerInfo so that query_planner can canonicalize
@ -1056,23 +1080,6 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
tlist,
false);
/*
* Will need actual number of aggregates for estimating costs.
*
* Note: we do not attempt to detect duplicate aggregates here; a
* somewhat-overestimated count is okay for our present purposes.
*
* Note: think not that we can turn off hasAggs if we find no aggs. It
* is possible for constant-expression simplification to remove all
* explicit references to aggs, but we still have to follow the
* aggregate semantics (eg, producing only one output row).
*/
if (parse->hasAggs)
{
count_agg_clauses((Node *) tlist, &agg_counts);
count_agg_clauses(parse->havingQual, &agg_counts);
}
/*
* Figure out whether we want a sorted result from query_planner.
*

View File

@ -1870,6 +1870,7 @@ substitute_multiple_relids_walker(Node *node,
Assert(!IsA(node, SpecialJoinInfo));
Assert(!IsA(node, AppendRelInfo));
Assert(!IsA(node, PlaceHolderInfo));
Assert(!IsA(node, MinMaxAggInfo));
return expression_tree_walker(node, substitute_multiple_relids_walker,
(void *) context);

View File

@ -1641,6 +1641,7 @@ adjust_appendrel_attrs_mutator(Node *node, AppendRelInfo *context)
Assert(!IsA(node, SpecialJoinInfo));
Assert(!IsA(node, AppendRelInfo));
Assert(!IsA(node, PlaceHolderInfo));
Assert(!IsA(node, MinMaxAggInfo));
/*
* We have to process RestrictInfo nodes specially. (Note: although

View File

@ -838,6 +838,7 @@ flatten_join_alias_vars_mutator(Node *node,
/* Shouldn't need to handle these planner auxiliary nodes here */
Assert(!IsA(node, SpecialJoinInfo));
Assert(!IsA(node, PlaceHolderInfo));
Assert(!IsA(node, MinMaxAggInfo));
return expression_tree_mutator(node, flatten_join_alias_vars_mutator,
(void *) context);

View File

@ -377,6 +377,7 @@ OffsetVarNodes_walker(Node *node, OffsetVarNodes_context *context)
/* Shouldn't need to handle other planner auxiliary nodes here */
Assert(!IsA(node, SpecialJoinInfo));
Assert(!IsA(node, PlaceHolderInfo));
Assert(!IsA(node, MinMaxAggInfo));
if (IsA(node, Query))
{
@ -544,6 +545,7 @@ ChangeVarNodes_walker(Node *node, ChangeVarNodes_context *context)
/* Shouldn't need to handle other planner auxiliary nodes here */
Assert(!IsA(node, SpecialJoinInfo));
Assert(!IsA(node, PlaceHolderInfo));
Assert(!IsA(node, MinMaxAggInfo));
if (IsA(node, Query))
{
@ -811,6 +813,7 @@ rangeTableEntry_used_walker(Node *node,
Assert(!IsA(node, SpecialJoinInfo));
Assert(!IsA(node, AppendRelInfo));
Assert(!IsA(node, PlaceHolderInfo));
Assert(!IsA(node, MinMaxAggInfo));
if (IsA(node, Query))
{

View File

@ -230,6 +230,7 @@ typedef enum NodeTag
T_SpecialJoinInfo,
T_AppendRelInfo,
T_PlaceHolderInfo,
T_MinMaxAggInfo,
T_PlannerParamItem,
/*

View File

@ -189,6 +189,8 @@ typedef struct PlannerInfo
List *distinct_pathkeys; /* distinctClause pathkeys, if any */
List *sort_pathkeys; /* sortClause pathkeys, if any */
List *minmax_aggs; /* List of MinMaxAggInfos */
List *initial_rels; /* RelOptInfos we are now trying to join */
MemoryContext planner_cxt; /* context holding PlannerInfo */
@ -1357,6 +1359,23 @@ typedef struct PlaceHolderInfo
int32 ph_width; /* estimated attribute width */
} PlaceHolderInfo;
/*
* For each potentially index-optimizable MIN/MAX aggregate function,
* root->minmax_aggs stores a MinMaxAggInfo describing it.
*
* Note: a MIN/MAX agg doesn't really care about the nulls_first property,
* so the pathkey's nulls_first flag should be ignored.
*/
typedef struct MinMaxAggInfo
{
NodeTag type;
Oid aggfnoid; /* pg_proc Oid of the aggregate */
Oid aggsortop; /* Oid of its sort operator */
Expr *target; /* expression we are aggregating on */
List *pathkeys; /* pathkeys representing needed sort order */
} MinMaxAggInfo;
/*
* glob->paramlist keeps track of the PARAM_EXEC slots that we have decided
* we need for the query. At runtime these slots are used to pass values

View File

@ -173,6 +173,9 @@ extern List *make_pathkeys_for_sortclauses(PlannerInfo *root,
List *sortclauses,
List *tlist,
bool canonicalize);
extern List *make_pathkeys_for_aggregate(PlannerInfo *root,
Expr *aggtarget,
Oid aggsortop);
extern void initialize_mergeclause_eclasses(PlannerInfo *root,
RestrictInfo *restrictinfo);
extern void update_mergeclause_eclasses(PlannerInfo *root,
@ -187,10 +190,6 @@ extern List *select_outer_pathkeys_for_merge(PlannerInfo *root,
extern List *make_inner_pathkeys_for_merge(PlannerInfo *root,
List *mergeclauses,
List *outer_pathkeys);
extern int pathkeys_useful_for_merging(PlannerInfo *root,
RelOptInfo *rel,
List *pathkeys);
extern int pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys);
extern List *truncate_useless_pathkeys(PlannerInfo *root,
RelOptInfo *rel,
List *pathkeys);

View File

@ -32,6 +32,7 @@ extern void query_planner(PlannerInfo *root, List *tlist,
/*
* prototypes for plan/planagg.c
*/
extern void preprocess_minmax_aggregates(PlannerInfo *root, List *tlist);
extern Plan *optimize_minmax_aggregates(PlannerInfo *root, List *tlist,
Path *best_path);
@ -39,7 +40,6 @@ extern Plan *optimize_minmax_aggregates(PlannerInfo *root, List *tlist,
* prototypes for plan/createplan.c
*/
extern Plan *create_plan(PlannerInfo *root, Path *best_path);
extern Node *fix_indexqual_operand(Node *node, IndexOptInfo *index);
extern SubqueryScan *make_subqueryscan(List *qptlist, List *qpqual,
Index scanrelid, Plan *subplan,
List *subrtable, List *subrowmark);

View File

@ -442,29 +442,90 @@ FROM bool_test;
(1 row)
--
-- Test several cases that should be optimized into indexscans instead of
-- the generic aggregate implementation. We can't actually verify that they
-- are done as indexscans, but we can check that the results are correct.
-- Test cases that should be optimized into indexscans instead of
-- the generic aggregate implementation.
--
analyze tenk1; -- ensure we get consistent plans here
-- Basic cases
explain (costs off)
select min(unique1) from tenk1;
QUERY PLAN
-------------------------------------------------------
Result
InitPlan 1 (returns $0)
-> Limit
-> Index Scan using tenk1_unique1 on tenk1
Index Cond: (unique1 IS NOT NULL)
(5 rows)
select min(unique1) from tenk1;
min
-----
0
(1 row)
explain (costs off)
select max(unique1) from tenk1;
QUERY PLAN
----------------------------------------------------------------
Result
InitPlan 1 (returns $0)
-> Limit
-> Index Scan Backward using tenk1_unique1 on tenk1
Index Cond: (unique1 IS NOT NULL)
(5 rows)
select max(unique1) from tenk1;
max
------
9999
(1 row)
explain (costs off)
select max(unique1) from tenk1 where unique1 < 42;
QUERY PLAN
------------------------------------------------------------------------
Result
InitPlan 1 (returns $0)
-> Limit
-> Index Scan Backward using tenk1_unique1 on tenk1
Index Cond: ((unique1 IS NOT NULL) AND (unique1 < 42))
(5 rows)
select max(unique1) from tenk1 where unique1 < 42;
max
-----
41
(1 row)
explain (costs off)
select max(unique1) from tenk1 where unique1 > 42;
QUERY PLAN
------------------------------------------------------------------------
Result
InitPlan 1 (returns $0)
-> Limit
-> Index Scan Backward using tenk1_unique1 on tenk1
Index Cond: ((unique1 IS NOT NULL) AND (unique1 > 42))
(5 rows)
select max(unique1) from tenk1 where unique1 > 42;
max
------
9999
(1 row)
explain (costs off)
select max(unique1) from tenk1 where unique1 > 42000;
QUERY PLAN
---------------------------------------------------------------------------
Result
InitPlan 1 (returns $0)
-> Limit
-> Index Scan Backward using tenk1_unique1 on tenk1
Index Cond: ((unique1 IS NOT NULL) AND (unique1 > 42000))
(5 rows)
select max(unique1) from tenk1 where unique1 > 42000;
max
-----
@ -472,12 +533,34 @@ select max(unique1) from tenk1 where unique1 > 42000;
(1 row)
-- multi-column index (uses tenk1_thous_tenthous)
explain (costs off)
select max(tenthous) from tenk1 where thousand = 33;
QUERY PLAN
--------------------------------------------------------------------------
Result
InitPlan 1 (returns $0)
-> Limit
-> Index Scan Backward using tenk1_thous_tenthous on tenk1
Index Cond: ((thousand = 33) AND (tenthous IS NOT NULL))
(5 rows)
select max(tenthous) from tenk1 where thousand = 33;
max
------
9033
(1 row)
explain (costs off)
select min(tenthous) from tenk1 where thousand = 33;
QUERY PLAN
--------------------------------------------------------------------------
Result
InitPlan 1 (returns $0)
-> Limit
-> Index Scan using tenk1_thous_tenthous on tenk1
Index Cond: ((thousand = 33) AND (tenthous IS NOT NULL))
(5 rows)
select min(tenthous) from tenk1 where thousand = 33;
min
-----
@ -485,8 +568,22 @@ select min(tenthous) from tenk1 where thousand = 33;
(1 row)
-- check parameter propagation into an indexscan subquery
explain (costs off)
select f1, (select min(unique1) from tenk1 where unique1 > f1) AS gt
from int4_tbl;
QUERY PLAN
-----------------------------------------------------------------------------------------
Seq Scan on int4_tbl
SubPlan 2
-> Result
InitPlan 1 (returns $1)
-> Limit
-> Index Scan using tenk1_unique1 on tenk1
Index Cond: ((unique1 IS NOT NULL) AND (unique1 > int4_tbl.f1))
(7 rows)
select f1, (select min(unique1) from tenk1 where unique1 > f1) AS gt
from int4_tbl;
from int4_tbl;
f1 | gt
-------------+----
0 | 1
@ -497,30 +594,94 @@ from int4_tbl;
(5 rows)
-- check some cases that were handled incorrectly in 8.3.0
explain (costs off)
select distinct max(unique2) from tenk1;
QUERY PLAN
----------------------------------------------------------------
HashAggregate
InitPlan 1 (returns $0)
-> Limit
-> Index Scan Backward using tenk1_unique2 on tenk1
Index Cond: (unique2 IS NOT NULL)
-> Result
(6 rows)
select distinct max(unique2) from tenk1;
max
------
9999
(1 row)
explain (costs off)
select max(unique2) from tenk1 order by 1;
QUERY PLAN
----------------------------------------------------------------
Sort
Sort Key: ($0)
InitPlan 1 (returns $0)
-> Limit
-> Index Scan Backward using tenk1_unique2 on tenk1
Index Cond: (unique2 IS NOT NULL)
-> Result
(7 rows)
select max(unique2) from tenk1 order by 1;
max
------
9999
(1 row)
explain (costs off)
select max(unique2) from tenk1 order by max(unique2);
QUERY PLAN
----------------------------------------------------------------
Sort
Sort Key: ($0)
InitPlan 1 (returns $0)
-> Limit
-> Index Scan Backward using tenk1_unique2 on tenk1
Index Cond: (unique2 IS NOT NULL)
-> Result
(7 rows)
select max(unique2) from tenk1 order by max(unique2);
max
------
9999
(1 row)
explain (costs off)
select max(unique2) from tenk1 order by max(unique2)+1;
QUERY PLAN
----------------------------------------------------------------
Sort
Sort Key: (($0 + 1))
InitPlan 1 (returns $0)
-> Limit
-> Index Scan Backward using tenk1_unique2 on tenk1
Index Cond: (unique2 IS NOT NULL)
-> Result
(7 rows)
select max(unique2) from tenk1 order by max(unique2)+1;
max
------
9999
(1 row)
explain (costs off)
select max(unique2), generate_series(1,3) as g from tenk1 order by g desc;
QUERY PLAN
----------------------------------------------------------------
Sort
Sort Key: (generate_series(1, 3))
InitPlan 1 (returns $0)
-> Limit
-> Index Scan Backward using tenk1_unique2 on tenk1
Index Cond: (unique2 IS NOT NULL)
-> Result
(7 rows)
select max(unique2), generate_series(1,3) as g from tenk1 order by g desc;
max | g
------+---
@ -529,6 +690,69 @@ select max(unique2), generate_series(1,3) as g from tenk1 order by g desc;
9999 | 1
(3 rows)
-- this is an interesting special case as of 9.1
explain (costs off)
select min(unique2) from tenk1 where unique2 = 42;
QUERY PLAN
-----------------------------------------------
Aggregate
-> Index Scan using tenk1_unique2 on tenk1
Index Cond: (unique2 = 42)
(3 rows)
select min(unique2) from tenk1 where unique2 = 42;
min
-----
42
(1 row)
-- try it on an inheritance tree
create table minmaxtest(f1 int);
create table minmaxtest1() inherits (minmaxtest);
create table minmaxtest2() inherits (minmaxtest);
create index minmaxtesti on minmaxtest(f1);
create index minmaxtest1i on minmaxtest1(f1);
create index minmaxtest2i on minmaxtest2(f1 desc);
insert into minmaxtest values(11), (12);
insert into minmaxtest1 values(13), (14);
insert into minmaxtest2 values(15), (16);
explain (costs off)
select min(f1), max(f1) from minmaxtest;
QUERY PLAN
--------------------------------------------------------------------------------------
Result
InitPlan 1 (returns $0)
-> Limit
-> Merge Append
Sort Key: public.minmaxtest.f1
-> Index Scan using minmaxtesti on minmaxtest
Index Cond: (f1 IS NOT NULL)
-> Index Scan using minmaxtest1i on minmaxtest1 minmaxtest
Index Cond: (f1 IS NOT NULL)
-> Index Scan Backward using minmaxtest2i on minmaxtest2 minmaxtest
Index Cond: (f1 IS NOT NULL)
InitPlan 2 (returns $1)
-> Limit
-> Merge Append
Sort Key: public.minmaxtest.f1
-> Index Scan Backward using minmaxtesti on minmaxtest
Index Cond: (f1 IS NOT NULL)
-> Index Scan Backward using minmaxtest1i on minmaxtest1 minmaxtest
Index Cond: (f1 IS NOT NULL)
-> Index Scan using minmaxtest2i on minmaxtest2 minmaxtest
Index Cond: (f1 IS NOT NULL)
(21 rows)
select min(f1), max(f1) from minmaxtest;
min | max
-----+-----
11 | 16
(1 row)
drop table minmaxtest cascade;
NOTICE: drop cascades to 2 other objects
DETAIL: drop cascades to table minmaxtest1
drop cascades to table minmaxtest2
--
-- Test combinations of DISTINCT and/or ORDER BY
--

View File

@ -205,31 +205,81 @@ SELECT
FROM bool_test;
--
-- Test several cases that should be optimized into indexscans instead of
-- the generic aggregate implementation. We can't actually verify that they
-- are done as indexscans, but we can check that the results are correct.
-- Test cases that should be optimized into indexscans instead of
-- the generic aggregate implementation.
--
analyze tenk1; -- ensure we get consistent plans here
-- Basic cases
explain (costs off)
select min(unique1) from tenk1;
select min(unique1) from tenk1;
explain (costs off)
select max(unique1) from tenk1;
select max(unique1) from tenk1;
explain (costs off)
select max(unique1) from tenk1 where unique1 < 42;
select max(unique1) from tenk1 where unique1 < 42;
explain (costs off)
select max(unique1) from tenk1 where unique1 > 42;
select max(unique1) from tenk1 where unique1 > 42;
explain (costs off)
select max(unique1) from tenk1 where unique1 > 42000;
select max(unique1) from tenk1 where unique1 > 42000;
-- multi-column index (uses tenk1_thous_tenthous)
explain (costs off)
select max(tenthous) from tenk1 where thousand = 33;
select max(tenthous) from tenk1 where thousand = 33;
explain (costs off)
select min(tenthous) from tenk1 where thousand = 33;
select min(tenthous) from tenk1 where thousand = 33;
-- check parameter propagation into an indexscan subquery
explain (costs off)
select f1, (select min(unique1) from tenk1 where unique1 > f1) AS gt
from int4_tbl;
select f1, (select min(unique1) from tenk1 where unique1 > f1) AS gt
from int4_tbl;
from int4_tbl;
-- check some cases that were handled incorrectly in 8.3.0
explain (costs off)
select distinct max(unique2) from tenk1;
select distinct max(unique2) from tenk1;
explain (costs off)
select max(unique2) from tenk1 order by 1;
select max(unique2) from tenk1 order by 1;
explain (costs off)
select max(unique2) from tenk1 order by max(unique2);
select max(unique2) from tenk1 order by max(unique2);
explain (costs off)
select max(unique2) from tenk1 order by max(unique2)+1;
select max(unique2) from tenk1 order by max(unique2)+1;
explain (costs off)
select max(unique2), generate_series(1,3) as g from tenk1 order by g desc;
select max(unique2), generate_series(1,3) as g from tenk1 order by g desc;
-- this is an interesting special case as of 9.1
explain (costs off)
select min(unique2) from tenk1 where unique2 = 42;
select min(unique2) from tenk1 where unique2 = 42;
-- try it on an inheritance tree
create table minmaxtest(f1 int);
create table minmaxtest1() inherits (minmaxtest);
create table minmaxtest2() inherits (minmaxtest);
create index minmaxtesti on minmaxtest(f1);
create index minmaxtest1i on minmaxtest1(f1);
create index minmaxtest2i on minmaxtest2(f1 desc);
insert into minmaxtest values(11), (12);
insert into minmaxtest1 values(13), (14);
insert into minmaxtest2 values(15), (16);
explain (costs off)
select min(f1), max(f1) from minmaxtest;
select min(f1), max(f1) from minmaxtest;
drop table minmaxtest cascade;
--
-- Test combinations of DISTINCT and/or ORDER BY