Make GROUP BY work properly for datatypes that only support hashing and not

sorting.  The infrastructure for this was all in place already; it's only
necessary to fix the planner to not assume that sorting is always an available
option.
This commit is contained in:
Tom Lane 2008-08-03 19:10:52 +00:00
parent 82a1f09953
commit ec73b56a31
3 changed files with 112 additions and 58 deletions

View File

@ -14,7 +14,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.107 2008/07/31 22:47:56 tgl Exp $ * $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.108 2008/08/03 19:10:52 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -288,8 +288,7 @@ query_planner(PlannerInfo *root, List *tlist,
* levels of sort --- and, therefore, certainly need to read all the * levels of sort --- and, therefore, certainly need to read all the
* tuples --- unless ORDER BY is a subset of GROUP BY. * tuples --- unless ORDER BY is a subset of GROUP BY.
*/ */
if (root->group_pathkeys && root->sort_pathkeys && if (!pathkeys_contained_in(root->sort_pathkeys, root->group_pathkeys))
!pathkeys_contained_in(root->sort_pathkeys, root->group_pathkeys))
tuple_fraction = 0.0; tuple_fraction = 0.0;
} }
else if (parse->hasAggs || root->hasHavingQual) else if (parse->hasAggs || root->hasHavingQual)

View File

@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.236 2008/08/02 21:32:00 tgl Exp $ * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.237 2008/08/03 19:10:52 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -69,11 +69,12 @@ static double preprocess_limit(PlannerInfo *root,
int64 *offset_est, int64 *count_est); int64 *offset_est, int64 *count_est);
static void preprocess_groupclause(PlannerInfo *root); static void preprocess_groupclause(PlannerInfo *root);
static Oid *extract_grouping_ops(List *groupClause); static Oid *extract_grouping_ops(List *groupClause);
static bool grouping_is_sortable(List *groupClause);
static bool grouping_is_hashable(List *groupClause);
static bool choose_hashed_grouping(PlannerInfo *root, static bool choose_hashed_grouping(PlannerInfo *root,
double tuple_fraction, double limit_tuples, double tuple_fraction, double limit_tuples,
Path *cheapest_path, Path *sorted_path, Path *cheapest_path, Path *sorted_path,
Oid *groupOperators, double dNumGroups, double dNumGroups, AggClauseCounts *agg_counts);
AggClauseCounts *agg_counts);
static List *make_subplanTargetList(PlannerInfo *root, List *tlist, static List *make_subplanTargetList(PlannerInfo *root, List *tlist,
AttrNumber **groupColIdx, bool *need_tlist_eval); AttrNumber **groupColIdx, bool *need_tlist_eval);
static void locate_grouping_columns(PlannerInfo *root, static void locate_grouping_columns(PlannerInfo *root,
@ -839,7 +840,6 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
List *sub_tlist; List *sub_tlist;
List *group_pathkeys; List *group_pathkeys;
AttrNumber *groupColIdx = NULL; AttrNumber *groupColIdx = NULL;
Oid *groupOperators = NULL;
bool need_tlist_eval = true; bool need_tlist_eval = true;
QualCost tlist_cost; QualCost tlist_cost;
Path *cheapest_path; Path *cheapest_path;
@ -877,11 +877,15 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
* DISTINCT and ORDER BY requirements. This should be changed * DISTINCT and ORDER BY requirements. This should be changed
* someday, but DISTINCT ON is a bit of a problem ... * someday, but DISTINCT ON is a bit of a problem ...
*/ */
if (parse->groupClause && grouping_is_sortable(parse->groupClause))
root->group_pathkeys = root->group_pathkeys =
make_pathkeys_for_sortclauses(root, make_pathkeys_for_sortclauses(root,
parse->groupClause, parse->groupClause,
tlist, tlist,
false); false);
else
root->group_pathkeys = NIL;
if (list_length(parse->distinctClause) > list_length(parse->sortClause)) if (list_length(parse->distinctClause) > list_length(parse->sortClause))
root->sort_pathkeys = root->sort_pathkeys =
make_pathkeys_for_sortclauses(root, make_pathkeys_for_sortclauses(root,
@ -915,12 +919,12 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
/* /*
* Figure out whether we need a sorted result from query_planner. * Figure out whether we need a sorted result from query_planner.
* *
* If we have a GROUP BY clause, then we want a result sorted properly * If we have a sortable GROUP BY clause, then we want a result sorted
* for grouping. Otherwise, if there is an ORDER BY clause, we want * properly for grouping. Otherwise, if there is an ORDER BY clause,
* to sort by the ORDER BY clause. (Note: if we have both, and ORDER * we want to sort by the ORDER BY clause. (Note: if we have both, and
* BY is a superset of GROUP BY, it would be tempting to request sort * ORDER BY is a superset of GROUP BY, it would be tempting to request
* by ORDER BY --- but that might just leave us failing to exploit an * sort by ORDER BY --- but that might just leave us failing to
* available sort order at all. Needs more thought...) * exploit an available sort order at all. Needs more thought...)
*/ */
if (root->group_pathkeys) if (root->group_pathkeys)
root->query_pathkeys = root->group_pathkeys; root->query_pathkeys = root->group_pathkeys;
@ -942,17 +946,39 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
sort_pathkeys = root->sort_pathkeys; sort_pathkeys = root->sort_pathkeys;
/* /*
* If grouping, extract the grouping operators and decide whether we * If grouping, decide whether to use sorted or hashed grouping.
* want to use hashed grouping.
*/ */
if (parse->groupClause) if (parse->groupClause)
{ {
groupOperators = extract_grouping_ops(parse->groupClause); bool can_hash;
bool can_sort;
/*
* Executor doesn't support hashed aggregation with DISTINCT
* aggregates. (Doing so would imply storing *all* the input
* values in the hash table, which seems like a certain loser.)
*/
can_hash = (agg_counts.numDistinctAggs == 0 &&
grouping_is_hashable(parse->groupClause));
can_sort = grouping_is_sortable(parse->groupClause);
if (can_hash && can_sort)
{
/* we have a meaningful choice to make ... */
use_hashed_grouping = use_hashed_grouping =
choose_hashed_grouping(root, tuple_fraction, limit_tuples, choose_hashed_grouping(root,
tuple_fraction, limit_tuples,
cheapest_path, sorted_path, cheapest_path, sorted_path,
groupOperators, dNumGroups, dNumGroups, &agg_counts);
&agg_counts); }
else if (can_hash)
use_hashed_grouping = true;
else if (can_sort)
use_hashed_grouping = false;
else
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("could not implement GROUP BY"),
errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
/* Also convert # groups to long int --- but 'ware overflow! */ /* Also convert # groups to long int --- but 'ware overflow! */
numGroups = (long) Min(dNumGroups, (double) LONG_MAX); numGroups = (long) Min(dNumGroups, (double) LONG_MAX);
@ -1088,7 +1114,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
AGG_HASHED, AGG_HASHED,
numGroupCols, numGroupCols,
groupColIdx, groupColIdx,
groupOperators, extract_grouping_ops(parse->groupClause),
numGroups, numGroups,
agg_counts.numAggs, agg_counts.numAggs,
result_plan); result_plan);
@ -1131,7 +1157,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
aggstrategy, aggstrategy,
numGroupCols, numGroupCols,
groupColIdx, groupColIdx,
groupOperators, extract_grouping_ops(parse->groupClause),
numGroups, numGroups,
agg_counts.numAggs, agg_counts.numAggs,
result_plan); result_plan);
@ -1160,7 +1186,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
(List *) parse->havingQual, (List *) parse->havingQual,
numGroupCols, numGroupCols,
groupColIdx, groupColIdx,
groupOperators, extract_grouping_ops(parse->groupClause),
dNumGroups, dNumGroups,
result_plan); result_plan);
/* The Group node won't change sort ordering */ /* The Group node won't change sort ordering */
@ -1495,6 +1521,9 @@ preprocess_limit(PlannerInfo *root, double tuple_fraction,
* GROUP BY elements, which could match the sort ordering of other * GROUP BY elements, which could match the sort ordering of other
* possible plans (eg an indexscan) and thereby reduce cost. We don't * possible plans (eg an indexscan) and thereby reduce cost. We don't
* bother with that, though. Hashed grouping will frequently win anyway. * bother with that, though. Hashed grouping will frequently win anyway.
*
* Note: we need no comparable processing of the distinctClause because
* the parser already enforced that that matches ORDER BY.
*/ */
static void static void
preprocess_groupclause(PlannerInfo *root) preprocess_groupclause(PlannerInfo *root)
@ -1505,7 +1534,7 @@ preprocess_groupclause(PlannerInfo *root)
ListCell *sl; ListCell *sl;
ListCell *gl; ListCell *gl;
/* If no ORDER BY, nothing useful to do here anyway */ /* If no ORDER BY, nothing useful to do here */
if (parse->sortClause == NIL) if (parse->sortClause == NIL)
return; return;
@ -1546,7 +1575,8 @@ preprocess_groupclause(PlannerInfo *root)
* were able to make a complete match. In other words, we only * were able to make a complete match. In other words, we only
* rearrange the GROUP BY list if the result is that one list is a * rearrange the GROUP BY list if the result is that one list is a
* prefix of the other --- otherwise there's no possibility of a * prefix of the other --- otherwise there's no possibility of a
* common sort. * common sort. Also, give up if there are any non-sortable GROUP BY
* items, since then there's no hope anyway.
*/ */
foreach(gl, parse->groupClause) foreach(gl, parse->groupClause)
{ {
@ -1556,6 +1586,8 @@ preprocess_groupclause(PlannerInfo *root)
continue; /* it matched an ORDER BY item */ continue; /* it matched an ORDER BY item */
if (partial_match) if (partial_match)
return; /* give up, no common sort possible */ return; /* give up, no common sort possible */
if (!OidIsValid(gc->sortop))
return; /* give up, GROUP BY can't be sorted */
new_groupclause = lappend(new_groupclause, gc); new_groupclause = lappend(new_groupclause, gc);
} }
@ -1566,7 +1598,7 @@ preprocess_groupclause(PlannerInfo *root)
/* /*
* extract_grouping_ops - make an array of the equality operator OIDs * extract_grouping_ops - make an array of the equality operator OIDs
* for the GROUP BY clause * for a SortGroupClause list
*/ */
static Oid * static Oid *
extract_grouping_ops(List *groupClause) extract_grouping_ops(List *groupClause)
@ -1590,15 +1622,59 @@ extract_grouping_ops(List *groupClause)
return groupOperators; return groupOperators;
} }
/*
* grouping_is_sortable - is it possible to implement grouping list by sorting?
*
* This is easy since the parser will have included a sortop if one exists.
*/
static bool
grouping_is_sortable(List *groupClause)
{
ListCell *glitem;
foreach(glitem, groupClause)
{
SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
if (!OidIsValid(groupcl->sortop))
return false;
}
return true;
}
/*
* grouping_is_hashable - is it possible to implement grouping list by hashing?
*
* We assume hashing is OK if the equality operators are marked oprcanhash.
* (If there isn't actually a supporting hash function, the executor will
* complain at runtime; but this is a misdeclaration of the operator, not
* a system bug.)
*/
static bool
grouping_is_hashable(List *groupClause)
{
ListCell *glitem;
foreach(glitem, groupClause)
{
SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
if (!op_hashjoinable(groupcl->eqop))
return false;
}
return true;
}
/* /*
* choose_hashed_grouping - should we use hashed grouping? * choose_hashed_grouping - should we use hashed grouping?
*
* Note: this is only applied when both alternatives are actually feasible.
*/ */
static bool static bool
choose_hashed_grouping(PlannerInfo *root, choose_hashed_grouping(PlannerInfo *root,
double tuple_fraction, double limit_tuples, double tuple_fraction, double limit_tuples,
Path *cheapest_path, Path *sorted_path, Path *cheapest_path, Path *sorted_path,
Oid *groupOperators, double dNumGroups, double dNumGroups, AggClauseCounts *agg_counts)
AggClauseCounts *agg_counts)
{ {
int numGroupCols = list_length(root->parse->groupClause); int numGroupCols = list_length(root->parse->groupClause);
double cheapest_path_rows; double cheapest_path_rows;
@ -1607,27 +1683,10 @@ choose_hashed_grouping(PlannerInfo *root,
List *current_pathkeys; List *current_pathkeys;
Path hashed_p; Path hashed_p;
Path sorted_p; Path sorted_p;
int i;
/* /* Prefer sorting when enable_hashagg is off */
* Check can't-do-it conditions, including whether the grouping operators
* are hashjoinable. (We assume hashing is OK if they are marked
* oprcanhash. If there isn't actually a supporting hash function, the
* executor will complain at runtime.)
*
* Executor doesn't support hashed aggregation with DISTINCT aggregates.
* (Doing so would imply storing *all* the input values in the hash table,
* which seems like a certain loser.)
*/
if (!enable_hashagg) if (!enable_hashagg)
return false; return false;
if (agg_counts->numDistinctAggs != 0)
return false;
for (i = 0; i < numGroupCols; i++)
{
if (!op_hashjoinable(groupOperators[i]))
return false;
}
/* /*
* Don't do it if it doesn't look like the hashtable will fit into * Don't do it if it doesn't look like the hashtable will fit into

View File

@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.172 2008/08/02 21:32:00 tgl Exp $ * $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.173 2008/08/03 19:10:52 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -1351,15 +1351,11 @@ transformGroupClause(ParseState *pstate, List *grouplist,
/* /*
* If no match in ORDER BY, just add it to the result using * If no match in ORDER BY, just add it to the result using
* default sort/group semantics. * default sort/group semantics.
*
* XXX for now, the planner requires groupClause to be sortable,
* so we have to insist on that here.
*/ */
if (!found) if (!found)
result = addTargetToGroupList(pstate, tle, result = addTargetToGroupList(pstate, tle,
result, *targetlist, result, *targetlist,
true, /* XXX for now */ false, true);
true);
} }
return result; return result;