postgresql/src/backend/optimizer/util/tlist.c

509 lines
12 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* tlist.c
* Target list manipulation routines
*
* Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
2010-09-20 22:08:53 +02:00
* src/backend/optimizer/util/tlist.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
1999-07-16 07:00:38 +02:00
#include "optimizer/tlist.h"
/*****************************************************************************
* Target list creation and searching utilities
*****************************************************************************/
/*
* tlist_member
* Finds the (first) member of the given tlist whose expression is
* equal() to the given expression. Result is NULL if no such member.
*/
TargetEntry *
tlist_member(Node *node, List *targetlist)
{
ListCell *temp;
foreach(temp, targetlist)
{
TargetEntry *tlentry = (TargetEntry *) lfirst(temp);
if (equal(node, tlentry->expr))
return tlentry;
}
1998-09-01 05:29:17 +02:00
return NULL;
}
/*
* tlist_member_ignore_relabel
* Same as above, except that we ignore top-level RelabelType nodes
* while checking for a match. This is needed for some scenarios
* involving binary-compatible sort operations.
*/
TargetEntry *
tlist_member_ignore_relabel(Node *node, List *targetlist)
{
ListCell *temp;
while (node && IsA(node, RelabelType))
node = (Node *) ((RelabelType *) node)->arg;
foreach(temp, targetlist)
{
TargetEntry *tlentry = (TargetEntry *) lfirst(temp);
2007-11-15 22:14:46 +01:00
Expr *tlexpr = tlentry->expr;
while (tlexpr && IsA(tlexpr, RelabelType))
tlexpr = ((RelabelType *) tlexpr)->arg;
if (equal(node, tlexpr))
return tlentry;
}
return NULL;
}
/*
* tlist_member_match_var
* Same as above, except that we match the provided Var on the basis
* of varno/varattno/varlevelsup only, rather than using full equal().
*
* This is needed in some cases where we can't be sure of an exact typmod
* match. It's probably a good idea to check the vartype anyway, but
* we leave it to the caller to apply any suitable sanity checks.
*/
TargetEntry *
tlist_member_match_var(Var *var, List *targetlist)
{
ListCell *temp;
foreach(temp, targetlist)
{
TargetEntry *tlentry = (TargetEntry *) lfirst(temp);
Var *tlvar = (Var *) tlentry->expr;
if (!tlvar || !IsA(tlvar, Var))
continue;
if (var->varno == tlvar->varno &&
var->varattno == tlvar->varattno &&
var->varlevelsup == tlvar->varlevelsup)
return tlentry;
}
return NULL;
}
/*
* flatten_tlist
* Create a target list that only contains unique variables.
*
Avoid listing ungrouped Vars in the targetlist of Agg-underneath-Window. Regular aggregate functions in combination with, or within the arguments of, window functions are OK per spec; they have the semantics that the aggregate output rows are computed and then we run the window functions over that row set. (Thus, this combination is not really useful unless there's a GROUP BY so that more than one aggregate output row is possible.) The case without GROUP BY could fail, as recently reported by Jeff Davis, because sloppy construction of the Agg node's targetlist resulted in extra references to possibly-ungrouped Vars appearing outside the aggregate function calls themselves. See the added regression test case for an example. Fixing this requires modifying the API of flatten_tlist and its underlying function pull_var_clause. I chose to make pull_var_clause's API for aggregates identical to what it was already doing for placeholders, since the useful behaviors turn out to be the same (error, report node as-is, or recurse into it). I also tightened the error checking in this area a bit: if it was ever valid to see an uplevel Var, Aggref, or PlaceHolderVar here, that was a long time ago, so complain instead of ignoring them. Backpatch into 9.1. The failure exists in 8.4 and 9.0 as well, but seeing that it only occurs in a basically-useless corner case, it doesn't seem worth the risks of changing a function API in a minor release. There might be third-party code using pull_var_clause.
2011-07-13 00:23:55 +02:00
* Aggrefs and PlaceHolderVars in the input are treated according to
* aggbehavior and phbehavior, for which see pull_var_clause().
*
* 'tlist' is the current target list
*
* Returns the "flattened" new target list.
*
* The result is entirely new structure sharing no nodes with the original.
* Copying the Var nodes is probably overkill, but be safe for now.
*/
List *
Avoid listing ungrouped Vars in the targetlist of Agg-underneath-Window. Regular aggregate functions in combination with, or within the arguments of, window functions are OK per spec; they have the semantics that the aggregate output rows are computed and then we run the window functions over that row set. (Thus, this combination is not really useful unless there's a GROUP BY so that more than one aggregate output row is possible.) The case without GROUP BY could fail, as recently reported by Jeff Davis, because sloppy construction of the Agg node's targetlist resulted in extra references to possibly-ungrouped Vars appearing outside the aggregate function calls themselves. See the added regression test case for an example. Fixing this requires modifying the API of flatten_tlist and its underlying function pull_var_clause. I chose to make pull_var_clause's API for aggregates identical to what it was already doing for placeholders, since the useful behaviors turn out to be the same (error, report node as-is, or recurse into it). I also tightened the error checking in this area a bit: if it was ever valid to see an uplevel Var, Aggref, or PlaceHolderVar here, that was a long time ago, so complain instead of ignoring them. Backpatch into 9.1. The failure exists in 8.4 and 9.0 as well, but seeing that it only occurs in a basically-useless corner case, it doesn't seem worth the risks of changing a function API in a minor release. There might be third-party code using pull_var_clause.
2011-07-13 00:23:55 +02:00
flatten_tlist(List *tlist, PVCAggregateBehavior aggbehavior,
PVCPlaceHolderBehavior phbehavior)
{
List *vlist = pull_var_clause((Node *) tlist,
Avoid listing ungrouped Vars in the targetlist of Agg-underneath-Window. Regular aggregate functions in combination with, or within the arguments of, window functions are OK per spec; they have the semantics that the aggregate output rows are computed and then we run the window functions over that row set. (Thus, this combination is not really useful unless there's a GROUP BY so that more than one aggregate output row is possible.) The case without GROUP BY could fail, as recently reported by Jeff Davis, because sloppy construction of the Agg node's targetlist resulted in extra references to possibly-ungrouped Vars appearing outside the aggregate function calls themselves. See the added regression test case for an example. Fixing this requires modifying the API of flatten_tlist and its underlying function pull_var_clause. I chose to make pull_var_clause's API for aggregates identical to what it was already doing for placeholders, since the useful behaviors turn out to be the same (error, report node as-is, or recurse into it). I also tightened the error checking in this area a bit: if it was ever valid to see an uplevel Var, Aggref, or PlaceHolderVar here, that was a long time ago, so complain instead of ignoring them. Backpatch into 9.1. The failure exists in 8.4 and 9.0 as well, but seeing that it only occurs in a basically-useless corner case, it doesn't seem worth the risks of changing a function API in a minor release. There might be third-party code using pull_var_clause.
2011-07-13 00:23:55 +02:00
aggbehavior,
phbehavior);
List *new_tlist;
new_tlist = add_to_flat_tlist(NIL, vlist);
list_free(vlist);
return new_tlist;
}
/*
* add_to_flat_tlist
* Add more items to a flattened tlist (if they're not already in it)
*
* 'tlist' is the flattened tlist
* 'exprs' is a list of expressions (usually, but not necessarily, Vars)
*
* Returns the extended tlist.
*/
List *
add_to_flat_tlist(List *tlist, List *exprs)
{
int next_resno = list_length(tlist) + 1;
ListCell *lc;
foreach(lc, exprs)
{
Node *expr = (Node *) lfirst(lc);
if (!tlist_member(expr, tlist))
{
TargetEntry *tle;
tle = makeTargetEntry(copyObject(expr), /* copy needed?? */
next_resno++,
NULL,
false);
tlist = lappend(tlist, tle);
}
}
return tlist;
}
/*
* get_tlist_exprs
* Get just the expression subtrees of a tlist
*
* Resjunk columns are ignored unless includeJunk is true
*/
List *
get_tlist_exprs(List *tlist, bool includeJunk)
{
List *result = NIL;
ListCell *l;
foreach(l, tlist)
{
TargetEntry *tle = (TargetEntry *) lfirst(l);
if (tle->resjunk && !includeJunk)
continue;
result = lappend(result, tle->expr);
}
return result;
}
/*
* count_nonjunk_tlist_entries
* What it says ...
*/
int
count_nonjunk_tlist_entries(List *tlist)
{
int len = 0;
ListCell *l;
foreach(l, tlist)
{
TargetEntry *tle = (TargetEntry *) lfirst(l);
if (!tle->resjunk)
len++;
}
return len;
}
/*
* tlist_same_exprs
* Check whether two target lists contain the same expressions
*
* Note: this function is used to decide whether it's safe to jam a new tlist
* into a non-projection-capable plan node. Obviously we can't do that unless
* the node's tlist shows it already returns the column values we want.
* However, we can ignore the TargetEntry attributes resname, ressortgroupref,
* resorigtbl, resorigcol, and resjunk, because those are only labelings that
* don't affect the row values computed by the node. (Moreover, if we didn't
* ignore them, we'd frequently fail to make the desired optimization, since
* the planner tends to not bother to make resname etc. valid in intermediate
* plan nodes.) Note that on success, the caller must still jam the desired
* tlist into the plan node, else it won't have the desired labeling fields.
*/
bool
tlist_same_exprs(List *tlist1, List *tlist2)
{
ListCell *lc1,
*lc2;
if (list_length(tlist1) != list_length(tlist2))
return false; /* not same length, so can't match */
forboth(lc1, tlist1, lc2, tlist2)
{
TargetEntry *tle1 = (TargetEntry *) lfirst(lc1);
TargetEntry *tle2 = (TargetEntry *) lfirst(lc2);
if (!equal(tle1->expr, tle2->expr))
return false;
}
return true;
}
/*
* Does tlist have same output datatypes as listed in colTypes?
*
* Resjunk columns are ignored if junkOK is true; otherwise presence of
* a resjunk column will always cause a 'false' result.
*
* Note: currently no callers care about comparing typmods.
*/
bool
tlist_same_datatypes(List *tlist, List *colTypes, bool junkOK)
{
ListCell *l;
ListCell *curColType = list_head(colTypes);
foreach(l, tlist)
{
TargetEntry *tle = (TargetEntry *) lfirst(l);
if (tle->resjunk)
{
if (!junkOK)
return false;
}
else
{
if (curColType == NULL)
return false; /* tlist longer than colTypes */
if (exprType((Node *) tle->expr) != lfirst_oid(curColType))
return false;
curColType = lnext(curColType);
}
}
if (curColType != NULL)
return false; /* tlist shorter than colTypes */
return true;
}
/*
* Does tlist have same exposed collations as listed in colCollations?
*
* Identical logic to the above, but for collations.
*/
bool
tlist_same_collations(List *tlist, List *colCollations, bool junkOK)
{
ListCell *l;
ListCell *curColColl = list_head(colCollations);
foreach(l, tlist)
{
TargetEntry *tle = (TargetEntry *) lfirst(l);
if (tle->resjunk)
{
if (!junkOK)
return false;
}
else
{
if (curColColl == NULL)
return false; /* tlist longer than colCollations */
if (exprCollation((Node *) tle->expr) != lfirst_oid(curColColl))
return false;
curColColl = lnext(curColColl);
}
}
if (curColColl != NULL)
return false; /* tlist shorter than colCollations */
return true;
}
/*
* get_sortgroupref_tle
* Find the targetlist entry matching the given SortGroupRef index,
* and return it.
*/
TargetEntry *
get_sortgroupref_tle(Index sortref, List *targetList)
{
ListCell *l;
foreach(l, targetList)
{
TargetEntry *tle = (TargetEntry *) lfirst(l);
if (tle->ressortgroupref == sortref)
return tle;
}
elog(ERROR, "ORDER/GROUP BY expression not found in targetlist");
return NULL; /* keep compiler quiet */
}
/*
* get_sortgroupclause_tle
* Find the targetlist entry matching the given SortGroupClause
* by ressortgroupref, and return it.
*/
TargetEntry *
get_sortgroupclause_tle(SortGroupClause *sgClause,
List *targetList)
{
return get_sortgroupref_tle(sgClause->tleSortGroupRef, targetList);
}
/*
* get_sortgroupclause_expr
* Find the targetlist entry matching the given SortGroupClause
* by ressortgroupref, and return its expression.
*/
Node *
get_sortgroupclause_expr(SortGroupClause *sgClause, List *targetList)
{
TargetEntry *tle = get_sortgroupclause_tle(sgClause, targetList);
return (Node *) tle->expr;
}
/*
* get_sortgrouplist_exprs
* Given a list of SortGroupClauses, build a list
* of the referenced targetlist expressions.
*/
List *
get_sortgrouplist_exprs(List *sgClauses, List *targetList)
{
2003-08-04 02:43:34 +02:00
List *result = NIL;
ListCell *l;
foreach(l, sgClauses)
{
SortGroupClause *sortcl = (SortGroupClause *) lfirst(l);
Node *sortexpr;
sortexpr = get_sortgroupclause_expr(sortcl, targetList);
result = lappend(result, sortexpr);
}
return result;
}
/*****************************************************************************
* Functions to extract data from a list of SortGroupClauses
*
* These don't really belong in tlist.c, but they are sort of related to the
* functions just above, and they don't seem to deserve their own file.
*****************************************************************************/
Support GROUPING SETS, CUBE and ROLLUP. This SQL standard functionality allows to aggregate data by different GROUP BY clauses at once. Each grouping set returns rows with columns grouped by in other sets set to NULL. This could previously be achieved by doing each grouping as a separate query, conjoined by UNION ALLs. Besides being considerably more concise, grouping sets will in many cases be faster, requiring only one scan over the underlying data. The current implementation of grouping sets only supports using sorting for input. Individual sets that share a sort order are computed in one pass. If there are sets that don't share a sort order, additional sort & aggregation steps are performed. These additional passes are sourced by the previous sort step; thus avoiding repeated scans of the source data. The code is structured in a way that adding support for purely using hash aggregation or a mix of hashing and sorting is possible. Sorting was chosen to be supported first, as it is the most generic method of implementation. Instead of, as in an earlier versions of the patch, representing the chain of sort and aggregation steps as full blown planner and executor nodes, all but the first sort are performed inside the aggregation node itself. This avoids the need to do some unusual gymnastics to handle having to return aggregated and non-aggregated tuples from underlying nodes, as well as having to shut down underlying nodes early to limit memory usage. The optimizer still builds Sort/Agg node to describe each phase, but they're not part of the plan tree, but instead additional data for the aggregation node. They're a convenient and preexisting way to describe aggregation and sorting. The first (and possibly only) sort step is still performed as a separate execution step. That retains similarity with existing group by plans, makes rescans fairly simple, avoids very deep plans (leading to slow explains) and easily allows to avoid the sorting step if the underlying data is sorted by other means. A somewhat ugly side of this patch is having to deal with a grammar ambiguity between the new CUBE keyword and the cube extension/functions named cube (and rollup). To avoid breaking existing deployments of the cube extension it has not been renamed, neither has cube been made a reserved keyword. Instead precedence hacking is used to make GROUP BY cube(..) refer to the CUBE grouping sets feature, and not the function cube(). To actually group by a function cube(), unlikely as that might be, the function name has to be quoted. Needs a catversion bump because stored rules may change. Author: Andrew Gierth and Atri Sharma, with contributions from Andres Freund Reviewed-By: Andres Freund, Noah Misch, Tom Lane, Svenne Krap, Tomas Vondra, Erik Rijkers, Marti Raudsepp, Pavel Stehule Discussion: CAOeZVidmVRe2jU6aMk_5qkxnB7dfmPROzM7Ur8JPW5j8Y5X-Lw@mail.gmail.com
2015-05-16 03:40:59 +02:00
/*
* get_sortgroupref_clause
* Find the SortGroupClause matching the given SortGroupRef index,
* and return it.
*/
SortGroupClause *
get_sortgroupref_clause(Index sortref, List *clauses)
{
ListCell *l;
foreach(l, clauses)
{
SortGroupClause *cl = (SortGroupClause *) lfirst(l);
if (cl->tleSortGroupRef == sortref)
return cl;
}
elog(ERROR, "ORDER/GROUP BY expression not found in list");
return NULL; /* keep compiler quiet */
}
/*
* extract_grouping_ops - make an array of the equality operator OIDs
* for a SortGroupClause list
*/
Oid *
extract_grouping_ops(List *groupClause)
{
int numCols = list_length(groupClause);
int colno = 0;
Oid *groupOperators;
ListCell *glitem;
groupOperators = (Oid *) palloc(sizeof(Oid) * numCols);
foreach(glitem, groupClause)
{
SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
groupOperators[colno] = groupcl->eqop;
Assert(OidIsValid(groupOperators[colno]));
colno++;
}
return groupOperators;
}
/*
* extract_grouping_cols - make an array of the grouping column resnos
* for a SortGroupClause list
*/
AttrNumber *
extract_grouping_cols(List *groupClause, List *tlist)
{
AttrNumber *grpColIdx;
int numCols = list_length(groupClause);
int colno = 0;
ListCell *glitem;
grpColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
foreach(glitem, groupClause)
{
SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
TargetEntry *tle = get_sortgroupclause_tle(groupcl, tlist);
grpColIdx[colno++] = tle->resno;
}
return grpColIdx;
}
/*
* grouping_is_sortable - is it possible to implement grouping list by sorting?
*
* This is easy since the parser will have included a sortop if one exists.
*/
bool
grouping_is_sortable(List *groupClause)
{
ListCell *glitem;
foreach(glitem, groupClause)
{
SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
if (!OidIsValid(groupcl->sortop))
return false;
}
return true;
}
/*
* grouping_is_hashable - is it possible to implement grouping list by hashing?
*
* We rely on the parser to have set the hashable flag correctly.
*/
bool
grouping_is_hashable(List *groupClause)
{
ListCell *glitem;
foreach(glitem, groupClause)
{
SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
if (!groupcl->hashable)
return false;
}
return true;
}