diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c index 3776cf9b34..d25a5509b4 100644 --- a/src/backend/optimizer/plan/planmain.c +++ b/src/backend/optimizer/plan/planmain.c @@ -14,7 +14,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.106 2008/01/11 04:02:18 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.107 2008/07/31 22:47:56 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -288,7 +288,7 @@ query_planner(PlannerInfo *root, List *tlist, * levels of sort --- and, therefore, certainly need to read all the * tuples --- unless ORDER BY is a subset of GROUP BY. */ - if (parse->groupClause && parse->sortClause && + if (root->group_pathkeys && root->sort_pathkeys && !pathkeys_contained_in(root->sort_pathkeys, root->group_pathkeys)) tuple_fraction = 0.0; } diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 9248022d8b..6ebbc5b7bc 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.234 2008/07/10 02:14:03 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.235 2008/07/31 22:47:56 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -826,6 +826,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) /* * Calculate pathkeys that represent result ordering requirements */ + Assert(parse->distinctClause == NIL); sort_pathkeys = make_pathkeys_for_sortclauses(root, parse->sortClause, tlist, @@ -864,17 +865,29 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) * Calculate pathkeys that represent grouping/ordering requirements. * Stash them in PlannerInfo so that query_planner can canonicalize * them after EquivalenceClasses have been formed. + * + * Note: for the moment, DISTINCT is always implemented via sort/uniq, + * and we set the sort_pathkeys to be the more rigorous of the + * DISTINCT and ORDER BY requirements. This should be changed + * someday, but DISTINCT ON is a bit of a problem ... */ root->group_pathkeys = make_pathkeys_for_sortclauses(root, parse->groupClause, tlist, false); - root->sort_pathkeys = - make_pathkeys_for_sortclauses(root, - parse->sortClause, - tlist, - false); + if (list_length(parse->distinctClause) > list_length(parse->sortClause)) + root->sort_pathkeys = + make_pathkeys_for_sortclauses(root, + parse->distinctClause, + tlist, + false); + else + root->sort_pathkeys = + make_pathkeys_for_sortclauses(root, + parse->sortClause, + tlist, + false); /* * Will need actual number of aggregates for estimating costs. @@ -903,9 +916,9 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) * by ORDER BY --- but that might just leave us failing to exploit an * available sort order at all. Needs more thought...) */ - if (parse->groupClause) + if (root->group_pathkeys) root->query_pathkeys = root->group_pathkeys; - else if (parse->sortClause) + else if (root->sort_pathkeys) root->query_pathkeys = root->sort_pathkeys; else root->query_pathkeys = NIL; @@ -1172,7 +1185,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) * If we were not able to make the plan come out in the right order, add * an explicit sort step. */ - if (parse->sortClause) + if (sort_pathkeys) { if (!pathkeys_contained_in(sort_pathkeys, current_pathkeys)) { diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c index ed18cf5e3f..b30cedee9f 100644 --- a/src/backend/optimizer/prep/prepunion.c +++ b/src/backend/optimizer/prep/prepunion.c @@ -22,7 +22,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.147 2008/06/19 00:46:04 alvherre Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.148 2008/07/31 22:47:56 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -69,6 +69,7 @@ static List *generate_setop_tlist(List *colTypes, int flag, static List *generate_append_tlist(List *colTypes, bool flag, List *input_plans, List *refnames_tlist); +static List *generate_setop_sortlist(List *targetlist); static void expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti); static void make_inh_translation_lists(Relation oldrelation, @@ -319,7 +320,7 @@ generate_union_plan(SetOperationStmt *op, PlannerInfo *root, { List *sortList; - sortList = addAllTargetsToSortList(NULL, NIL, tlist, false); + sortList = generate_setop_sortlist(tlist); if (sortList) { plan = (Plan *) make_sort_from_sortclauses(root, sortList, plan); @@ -384,7 +385,7 @@ generate_nonunion_plan(SetOperationStmt *op, PlannerInfo *root, * Sort the child results, then add a SetOp plan node to generate the * correct output. */ - sortList = addAllTargetsToSortList(NULL, NIL, tlist, false); + sortList = generate_setop_sortlist(tlist); if (sortList == NIL) /* nothing to sort on? */ { @@ -675,6 +676,31 @@ generate_append_tlist(List *colTypes, bool flag, return tlist; } +/* + * generate_setop_sortlist + * Build a SortClause list enumerating all the non-resjunk tlist entries, + * using default ordering properties. + */ +static List * +generate_setop_sortlist(List *targetlist) +{ + List *sortlist = NIL; + ListCell *l; + + foreach(l, targetlist) + { + TargetEntry *tle = (TargetEntry *) lfirst(l); + + if (!tle->resjunk) + sortlist = addTargetToSortList(NULL, tle, + sortlist, targetlist, + SORTBY_DEFAULT, + SORTBY_NULLS_DEFAULT, + NIL, false); + } + return sortlist; +} + /* * find_all_inheritors - diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c index 520b503725..329b1bc881 100644 --- a/src/backend/parser/analyze.c +++ b/src/backend/parser/analyze.c @@ -17,7 +17,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/parser/analyze.c,v 1.373 2008/07/18 20:26:06 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/parser/analyze.c,v 1.374 2008/07/31 22:47:56 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -711,6 +711,8 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt) /* * Transform sorting/grouping stuff. Do ORDER BY first because both * transformGroupClause and transformDistinctClause need the results. + * Note that these functions can also change the targetList, so it's + * passed to them by reference. */ qry->sortClause = transformSortClause(pstate, stmt->sortClause, @@ -725,8 +727,9 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt) qry->distinctClause = transformDistinctClause(pstate, stmt->distinctClause, &qry->targetList, - &qry->sortClause); + qry->sortClause); + /* transform LIMIT */ qry->limitOffset = transformLimitClause(pstate, stmt->limitOffset, "OFFSET"); qry->limitCount = transformLimitClause(pstate, stmt->limitCount, diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c index f532b26d37..d82573fd03 100644 --- a/src/backend/parser/parse_clause.c +++ b/src/backend/parser/parse_clause.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.170 2008/06/19 00:46:05 alvherre Exp $ + * $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.171 2008/07/31 22:47:56 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1454,16 +1454,23 @@ transformSortClause(ParseState *pstate, * transformDistinctClause - * transform a DISTINCT or DISTINCT ON clause * - * Since we may need to add items to the query's sortClause list, that list - * is passed by reference. Likewise for the targetlist. + * Since we may need to add items to the query's targetlist, that list + * is passed by reference. + * + * As with GROUP BY, we absorb the sorting semantics of ORDER BY as much as + * possible into the distinctClause. This avoids a possible need to re-sort, + * and allows the user to determine the equality semantics used by DISTINCT, + * should she be working with a datatype that has more than one btree equality + * operator. */ List * transformDistinctClause(ParseState *pstate, List *distinctlist, - List **targetlist, List **sortClause) + List **targetlist, List *sortClause) { List *result = NIL; ListCell *slitem; ListCell *dlitem; + ListCell *tlitem; /* No work if there was no DISTINCT clause */ if (distinctlist == NIL) @@ -1474,25 +1481,21 @@ transformDistinctClause(ParseState *pstate, List *distinctlist, /* We had SELECT DISTINCT */ /* - * All non-resjunk elements from target list that are not already in - * the sort list should be added to it. (We don't really care what - * order the DISTINCT fields are checked in, so we can leave the - * user's ORDER BY spec alone, and just add additional sort keys to it - * to ensure that all targetlist items get sorted.) + * The distinctClause should consist of all ORDER BY items followed + * by all other non-resjunk targetlist items. There must not be any + * resjunk ORDER BY items --- that would imply that we are sorting + * by a value that isn't necessarily unique within a DISTINCT group, + * so the results wouldn't be well-defined. This construction + * ensures we follow the rule that sortClause and distinctClause match; + * in fact the sortClause will always be a prefix of distinctClause. + * + * Note a corner case: the same TLE could be in the ORDER BY list + * multiple times with different sortops. We have to include it in + * the distinctClause the same way to preserve the prefix property. + * The net effect will be that the TLE value will be made unique + * according to both sortops. */ - *sortClause = addAllTargetsToSortList(pstate, - *sortClause, - *targetlist, - true); - - /* - * Now, DISTINCT list consists of all non-resjunk sortlist items. - * Actually, all the sortlist items had better be non-resjunk! - * Otherwise, user wrote SELECT DISTINCT with an ORDER BY item that - * does not appear anywhere in the SELECT targetlist, and we can't - * implement that with only one sorting pass... - */ - foreach(slitem, *sortClause) + foreach(slitem, sortClause) { SortClause *scl = (SortClause *) lfirst(slitem); TargetEntry *tle = get_sortgroupclause_tle(scl, *targetlist); @@ -1504,110 +1507,112 @@ transformDistinctClause(ParseState *pstate, List *distinctlist, else result = lappend(result, copyObject(scl)); } + + /* + * Now add any remaining non-resjunk tlist items, using default + * sorting semantics for their data types. + */ + foreach(tlitem, *targetlist) + { + TargetEntry *tle = (TargetEntry *) lfirst(tlitem); + + if (tle->resjunk) + continue; /* ignore junk */ + if (targetIsInSortList(tle, InvalidOid, result)) + continue; /* already in list (with some semantics) */ + result = addTargetToSortList(pstate, tle, + result, *targetlist, + SORTBY_DEFAULT, + SORTBY_NULLS_DEFAULT, + NIL, true); + } } else { /* We had SELECT DISTINCT ON (expr, ...) */ + Bitmapset *refnos = NULL; + int sortgroupref; + bool skipped_sortitem; /* - * If the user writes both DISTINCT ON and ORDER BY, then the two - * expression lists must match (until one or the other runs out). - * Otherwise the ORDER BY requires a different sort order than the - * DISTINCT does, and we can't implement that with only one sort pass - * (and if we do two passes, the results will be rather - * unpredictable). However, it's OK to have more DISTINCT ON - * expressions than ORDER BY expressions; we can just add the extra - * DISTINCT values to the sort list, much as we did above for ordinary - * DISTINCT fields. - * - * Actually, it'd be OK for the common prefixes of the two lists to - * match in any order, but implementing that check seems like more - * trouble than it's worth. + * Add all the DISTINCT ON expressions to the tlist (if not already + * present, they are added as resjunk items). Assign sortgroupref + * numbers to them, and form a bitmapset of these numbers. (A + * bitmapset is convenient here because we don't care about order + * and we can discard duplicates.) */ - ListCell *nextsortlist = list_head(*sortClause); - foreach(dlitem, distinctlist) { + Node *dexpr = (Node *) lfirst(dlitem); TargetEntry *tle; - tle = findTargetlistEntry(pstate, lfirst(dlitem), + tle = findTargetlistEntry(pstate, dexpr, targetlist, DISTINCT_ON_CLAUSE); + sortgroupref = assignSortGroupRef(tle, *targetlist); + refnos = bms_add_member(refnos, sortgroupref); + } - if (nextsortlist != NULL) + /* + * If the user writes both DISTINCT ON and ORDER BY, adopt the + * sorting semantics from ORDER BY items that match DISTINCT ON + * items, and also adopt their column sort order. We insist that + * the distinctClause and sortClause match, so throw error if we + * find the need to add any more distinctClause items after we've + * skipped an ORDER BY item that wasn't in DISTINCT ON. + */ + skipped_sortitem = false; + foreach(slitem, sortClause) + { + SortClause *scl = (SortClause *) lfirst(slitem); + + if (bms_is_member(scl->tleSortGroupRef, refnos)) { - SortClause *scl = (SortClause *) lfirst(nextsortlist); - - if (tle->ressortgroupref != scl->tleSortGroupRef) + if (skipped_sortitem) ereport(ERROR, (errcode(ERRCODE_INVALID_COLUMN_REFERENCE), errmsg("SELECT DISTINCT ON expressions must match initial ORDER BY expressions"))); - result = lappend(result, copyObject(scl)); - nextsortlist = lnext(nextsortlist); + else + result = lappend(result, copyObject(scl)); } else { - *sortClause = addTargetToSortList(pstate, tle, - *sortClause, *targetlist, - SORTBY_DEFAULT, - SORTBY_NULLS_DEFAULT, - NIL, true); - - /* - * Probably, the tle should always have been added at the end - * of the sort list ... but search to be safe. - */ - foreach(slitem, *sortClause) - { - SortClause *scl = (SortClause *) lfirst(slitem); - - if (tle->ressortgroupref == scl->tleSortGroupRef) - { - result = lappend(result, copyObject(scl)); - break; - } - } - if (slitem == NULL) /* should not happen */ - elog(ERROR, "failed to add DISTINCT ON clause to target list"); + skipped_sortitem = true; } } + + /* + * Now add any remaining DISTINCT ON items, using default sorting + * semantics for their data types. (Note: this is pretty + * questionable; if the ORDER BY list doesn't include all the DISTINCT + * ON items and more besides, you certainly aren't using DISTINCT ON + * in the intended way, and you probably aren't going to get + * consistent results. It might be better to throw an error or warning + * here. But historically we've allowed it, so keep doing so.) + */ + while ((sortgroupref = bms_first_member(refnos)) >= 0) + { + TargetEntry *tle = get_sortgroupref_tle(sortgroupref, *targetlist); + + if (targetIsInSortList(tle, InvalidOid, result)) + continue; /* already in list (with some semantics) */ + if (skipped_sortitem) + ereport(ERROR, + (errcode(ERRCODE_INVALID_COLUMN_REFERENCE), + errmsg("SELECT DISTINCT ON expressions must match initial ORDER BY expressions"))); + result = addTargetToSortList(pstate, tle, + result, *targetlist, + SORTBY_DEFAULT, + SORTBY_NULLS_DEFAULT, + NIL, true); + } } return result; } -/* - * addAllTargetsToSortList - * Make sure all non-resjunk targets in the targetlist are in the - * ORDER BY list, adding the not-yet-sorted ones to the end of the list. - * This is typically used to help implement SELECT DISTINCT. - * - * See addTargetToSortList for info about pstate and resolveUnknown inputs. - * - * Returns the updated ORDER BY list. - */ -List * -addAllTargetsToSortList(ParseState *pstate, List *sortlist, - List *targetlist, bool resolveUnknown) -{ - ListCell *l; - - foreach(l, targetlist) - { - TargetEntry *tle = (TargetEntry *) lfirst(l); - - if (!tle->resjunk) - sortlist = addTargetToSortList(pstate, tle, - sortlist, targetlist, - SORTBY_DEFAULT, - SORTBY_NULLS_DEFAULT, - NIL, resolveUnknown); - } - return sortlist; -} - /* * addTargetToSortList - * If the given targetlist entry isn't already in the ORDER BY list, + * If the given targetlist entry isn't already in the SortClause list, * add it to the end of the list, using the given sort ordering info. * * If resolveUnknown is TRUE, convert TLEs of type UNKNOWN to TEXT. If not, @@ -1615,7 +1620,7 @@ addAllTargetsToSortList(ParseState *pstate, List *sortlist, * pstate should be provided if resolveUnknown is TRUE, but can be NULL * otherwise. * - * Returns the updated ORDER BY list. + * Returns the updated SortClause list. */ List * addTargetToSortList(ParseState *pstate, TargetEntry *tle, diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index e6de4b49c8..fa78b4b188 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/nodes/parsenodes.h,v 1.368 2008/07/18 03:32:53 tgl Exp $ + * $PostgreSQL: pgsql/src/include/nodes/parsenodes.h,v 1.369 2008/07/31 22:47:56 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -613,14 +613,19 @@ typedef struct RangeTblEntry * tleSortGroupRef must match ressortgroupref of exactly one entry of the * associated targetlist; that is the expression to be sorted (or grouped) by. * sortop is the OID of the ordering operator (a "<" or ">" operator). - * nulls_first does about what you'd expect. + * nulls_first means about what you'd expect. * * SortClauses are also used to identify targets that we will do a "Unique" * filter step on (for SELECT DISTINCT and SELECT DISTINCT ON). The - * distinctClause list is simply a copy of the relevant members of the - * sortClause list. Note that distinctClause can be a subset of sortClause, - * but cannot have members not present in sortClause; and the members that - * do appear must be in the same order as in sortClause. + * distinctClause list is a list of SortClauses for the expressions to be + * unique-ified. (As per comment for GroupClause, this overspecifies the + * semantics.) In SELECT DISTINCT, the distinctClause list is typically + * longer than the ORDER BY list, while in SELECT DISTINCT ON it's typically + * shorter. The two lists must match up to the end of the shorter one --- + * the parser rearranges the distinctClause if necessary to make this true. + * (This restriction ensures that only one sort step is needed to both + * satisfy the ORDER BY and set up for the Unique step. This is semantically + * necessary for DISTINCT ON, and offers no real drawback for DISTINCT.) */ typedef struct SortClause { diff --git a/src/include/parser/parse_clause.h b/src/include/parser/parse_clause.h index f2a6bf01ba..6fc357522e 100644 --- a/src/include/parser/parse_clause.h +++ b/src/include/parser/parse_clause.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/parser/parse_clause.h,v 1.49 2008/01/01 19:45:58 momjian Exp $ + * $PostgreSQL: pgsql/src/include/parser/parse_clause.h,v 1.50 2008/07/31 22:47:56 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -31,11 +31,8 @@ extern List *transformGroupClause(ParseState *pstate, List *grouplist, extern List *transformSortClause(ParseState *pstate, List *orderlist, List **targetlist, bool resolveUnknown); extern List *transformDistinctClause(ParseState *pstate, List *distinctlist, - List **targetlist, List **sortClause); + List **targetlist, List *sortClause); -extern List *addAllTargetsToSortList(ParseState *pstate, - List *sortlist, List *targetlist, - bool resolveUnknown); extern List *addTargetToSortList(ParseState *pstate, TargetEntry *tle, List *sortlist, List *targetlist, SortByDir sortby_dir, SortByNulls sortby_nulls,