diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c index d58b9d4a2f..5f5c91366f 100644 --- a/src/backend/catalog/dependency.c +++ b/src/backend/catalog/dependency.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/catalog/dependency.c,v 1.77 2008/08/02 21:31:59 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/catalog/dependency.c,v 1.78 2008/08/07 01:11:46 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1597,6 +1597,15 @@ find_expr_references_walker(Node *node, context->rtables = list_delete_first(context->rtables); return result; } + else if (IsA(node, SetOperationStmt)) + { + SetOperationStmt *setop = (SetOperationStmt *) node; + + /* we need to look at the groupClauses for operator references */ + find_expr_references_walker((Node *) setop->groupClauses, context); + /* fall through to examine child nodes */ + } + return expression_tree_walker(node, find_expr_references_walker, (void *) context); } diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 652c726a67..e562949d93 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -15,7 +15,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.396 2008/08/02 21:31:59 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.397 2008/08/07 01:11:46 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1943,6 +1943,7 @@ _copySetOperationStmt(SetOperationStmt *from) COPY_NODE_FIELD(rarg); COPY_NODE_FIELD(colTypes); COPY_NODE_FIELD(colTypmods); + COPY_NODE_FIELD(groupClauses); return newnode; } diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 4e011947ef..dc0b2cca37 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -18,7 +18,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.325 2008/08/02 21:31:59 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.326 2008/08/07 01:11:47 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -839,6 +839,7 @@ _equalSetOperationStmt(SetOperationStmt *a, SetOperationStmt *b) COMPARE_NODE_FIELD(rarg); COMPARE_NODE_FIELD(colTypes); COMPARE_NODE_FIELD(colTypmods); + COMPARE_NODE_FIELD(groupClauses); return true; } diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 408b9b2a75..26f8fb3810 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.330 2008/08/05 02:43:17 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.331 2008/08/07 01:11:48 tgl Exp $ * * NOTES * Every node type that can appear in stored rules' parsetrees *must* @@ -1780,6 +1780,7 @@ _outSetOperationStmt(StringInfo str, SetOperationStmt *node) WRITE_NODE_FIELD(rarg); WRITE_NODE_FIELD(colTypes); WRITE_NODE_FIELD(colTypmods); + WRITE_NODE_FIELD(groupClauses); } static void diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index e7de4898c0..c46593d22c 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/readfuncs.c,v 1.211 2008/08/02 21:31:59 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/readfuncs.c,v 1.212 2008/08/07 01:11:49 tgl Exp $ * * NOTES * Path and Plan nodes do not have any readfuncs support, because we @@ -232,6 +232,7 @@ _readSetOperationStmt(void) READ_NODE_FIELD(rarg); READ_NODE_FIELD(colTypes); READ_NODE_FIELD(colTypmods); + READ_NODE_FIELD(groupClauses); READ_DONE(); } diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 5b7fde2533..c818f0ddf1 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.239 2008/08/05 16:03:10 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.240 2008/08/07 01:11:50 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -68,10 +68,6 @@ static double preprocess_limit(PlannerInfo *root, double tuple_fraction, int64 *offset_est, int64 *count_est); static void preprocess_groupclause(PlannerInfo *root); -static Oid *extract_grouping_ops(List *groupClause); -static AttrNumber *extract_grouping_cols(List *groupClause, List *tlist); -static bool grouping_is_sortable(List *groupClause); -static bool grouping_is_hashable(List *groupClause); static bool choose_hashed_grouping(PlannerInfo *root, double tuple_fraction, double limit_tuples, Path *cheapest_path, Path *sorted_path, @@ -784,10 +780,9 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) /* * If there's a top-level ORDER BY, assume we have to fetch all the - * tuples. This might seem too simplistic given all the hackery below - * to possibly avoid the sort ... but a nonzero tuple_fraction is only - * of use to plan_set_operations() when the setop is UNION ALL, and - * the result of UNION ALL is always unsorted. + * tuples. This might be too simplistic given all the hackery below + * to possibly avoid the sort; but the odds of accurate estimates + * here are pretty low anyway. */ if (parse->sortClause) tuple_fraction = 0.0; @@ -818,7 +813,8 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) */ Assert(parse->commandType == CMD_SELECT); - tlist = postprocess_setop_tlist(result_plan->targetlist, tlist); + tlist = postprocess_setop_tlist(copyObject(result_plan->targetlist), + tlist); /* * Can't handle FOR UPDATE/SHARE here (parser should have checked @@ -1714,100 +1710,6 @@ preprocess_groupclause(PlannerInfo *root) parse->groupClause = new_groupclause; } -/* - * extract_grouping_ops - make an array of the equality operator OIDs - * for a SortGroupClause list - */ -static Oid * -extract_grouping_ops(List *groupClause) -{ - int numCols = list_length(groupClause); - int colno = 0; - Oid *groupOperators; - ListCell *glitem; - - groupOperators = (Oid *) palloc(sizeof(Oid) * numCols); - - foreach(glitem, groupClause) - { - SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem); - - groupOperators[colno] = groupcl->eqop; - Assert(OidIsValid(groupOperators[colno])); - colno++; - } - - return groupOperators; -} - -/* - * extract_grouping_cols - make an array of the grouping column resnos - * for a SortGroupClause list - */ -static AttrNumber * -extract_grouping_cols(List *groupClause, List *tlist) -{ - AttrNumber *grpColIdx; - int numCols = list_length(groupClause); - int colno = 0; - ListCell *glitem; - - grpColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols); - - foreach(glitem, groupClause) - { - SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem); - TargetEntry *tle = get_sortgroupclause_tle(groupcl, tlist); - - grpColIdx[colno++] = tle->resno; - } - - return grpColIdx; -} - -/* - * grouping_is_sortable - is it possible to implement grouping list by sorting? - * - * This is easy since the parser will have included a sortop if one exists. - */ -static bool -grouping_is_sortable(List *groupClause) -{ - ListCell *glitem; - - foreach(glitem, groupClause) - { - SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem); - - if (!OidIsValid(groupcl->sortop)) - return false; - } - return true; -} - -/* - * grouping_is_hashable - is it possible to implement grouping list by hashing? - * - * We assume hashing is OK if the equality operators are marked oprcanhash. - * (If there isn't actually a supporting hash function, the executor will - * complain at runtime; but this is a misdeclaration of the operator, not - * a system bug.) - */ -static bool -grouping_is_hashable(List *groupClause) -{ - ListCell *glitem; - - foreach(glitem, groupClause) - { - SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem); - - if (!op_hashjoinable(groupcl->eqop)) - return false; - } - return true; -} - /* * choose_hashed_grouping - should we use hashed grouping? * diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c index 31ba005edb..750d59a951 100644 --- a/src/backend/optimizer/prep/prepunion.c +++ b/src/backend/optimizer/prep/prepunion.c @@ -22,7 +22,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.149 2008/08/02 21:32:00 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.150 2008/08/07 01:11:50 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -32,8 +32,12 @@ #include "access/heapam.h" #include "catalog/namespace.h" #include "catalog/pg_type.h" +#include "miscadmin.h" #include "nodes/makefuncs.h" #include "optimizer/clauses.h" +#include "optimizer/cost.h" +#include "optimizer/pathnode.h" +#include "optimizer/paths.h" #include "optimizer/plancat.h" #include "optimizer/planmain.h" #include "optimizer/planner.h" @@ -61,6 +65,13 @@ static List *recurse_union_children(Node *setOp, PlannerInfo *root, double tuple_fraction, SetOperationStmt *top_union, List *refnames_tlist); +static Plan *make_union_unique(SetOperationStmt *op, Plan *plan, + PlannerInfo *root, double tuple_fraction, + List **sortClauses); +static bool choose_hashed_setop(PlannerInfo *root, List *groupClauses, + Plan *input_plan, + double tuple_fraction, double dNumDistinctRows, + const char *construct); static List *generate_setop_tlist(List *colTypes, int flag, Index varno, bool hack_constants, @@ -69,7 +80,7 @@ static List *generate_setop_tlist(List *colTypes, int flag, static List *generate_append_tlist(List *colTypes, bool flag, List *input_plans, List *refnames_tlist); -static List *generate_setop_sortlist(List *targetlist); +static List *generate_setop_grouplist(SetOperationStmt *op, List *targetlist); static void expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti); static void make_inh_translation_lists(Relation oldrelation, @@ -99,7 +110,8 @@ static List *adjust_inherited_tlist(List *tlist, * top level has already been factored into tuple_fraction. * * *sortClauses is an output argument: it is set to a list of SortGroupClauses - * representing the result ordering of the topmost set operation. + * representing the result ordering of the topmost set operation. (This will + * be NIL if the output isn't ordered.) */ Plan * plan_set_operations(PlannerInfo *root, double tuple_fraction, @@ -287,8 +299,8 @@ generate_union_plan(SetOperationStmt *op, PlannerInfo *root, /* * If any of my children are identical UNION nodes (same op, all-flag, and * colTypes) then they can be merged into this node so that we generate - * only one Append and Sort for the lot. Recurse to find such nodes and - * compute their children's plans. + * only one Append and unique-ification for the lot. Recurse to find such + * nodes and compute their children's plans. */ planlist = list_concat(recurse_union_children(op->larg, root, tuple_fraction, @@ -314,22 +326,12 @@ generate_union_plan(SetOperationStmt *op, PlannerInfo *root, /* * For UNION ALL, we just need the Append plan. For UNION, need to add - * Sort and Unique nodes to produce unique output. + * node(s) to remove duplicates. */ - if (!op->all) - { - List *sortList; - - sortList = generate_setop_sortlist(tlist); - if (sortList) - { - plan = (Plan *) make_sort_from_sortclauses(root, sortList, plan); - plan = (Plan *) make_unique(plan, sortList); - } - *sortClauses = sortList; - } + if (op->all) + *sortClauses = NIL; /* result of UNION ALL is always unsorted */ else - *sortClauses = NIL; + plan = make_union_unique(op, plan, root, tuple_fraction, sortClauses); return plan; } @@ -346,7 +348,7 @@ generate_nonunion_plan(SetOperationStmt *op, PlannerInfo *root, *rplan, *plan; List *tlist, - *sortList, + *groupList, *planlist, *child_sortclauses; SetOpCmd cmd; @@ -381,19 +383,24 @@ generate_nonunion_plan(SetOperationStmt *op, PlannerInfo *root, */ plan = (Plan *) make_append(planlist, false, tlist); - /* - * Sort the child results, then add a SetOp plan node to generate the - * correct output. - */ - sortList = generate_setop_sortlist(tlist); + /* Identify the grouping semantics */ + groupList = generate_setop_grouplist(op, tlist); - if (sortList == NIL) /* nothing to sort on? */ + /* punt if nothing to group on (can this happen?) */ + if (groupList == NIL) { *sortClauses = NIL; return plan; } - plan = (Plan *) make_sort_from_sortclauses(root, sortList, plan); + /* + * Decide whether to hash or sort, and add a sort node if needed. + */ + plan = (Plan *) make_sort_from_sortclauses(root, groupList, plan); + + /* + * Finally, add a SetOp plan node to generate the correct output. + */ switch (op->op) { case SETOP_INTERSECT: @@ -403,14 +410,13 @@ generate_nonunion_plan(SetOperationStmt *op, PlannerInfo *root, cmd = op->all ? SETOPCMD_EXCEPT_ALL : SETOPCMD_EXCEPT; break; default: - elog(ERROR, "unrecognized set op: %d", - (int) op->op); + elog(ERROR, "unrecognized set op: %d", (int) op->op); cmd = SETOPCMD_INTERSECT; /* keep compiler quiet */ break; } - plan = (Plan *) make_setop(cmd, plan, sortList, list_length(op->colTypes) + 1); + plan = (Plan *) make_setop(cmd, plan, groupList, list_length(op->colTypes) + 1); - *sortClauses = sortList; + *sortClauses = groupList; return plan; } @@ -465,6 +471,171 @@ recurse_union_children(Node *setOp, PlannerInfo *root, &child_sortclauses)); } +/* + * Add nodes to the given plan tree to unique-ify the result of a UNION. + */ +static Plan * +make_union_unique(SetOperationStmt *op, Plan *plan, + PlannerInfo *root, double tuple_fraction, + List **sortClauses) +{ + List *groupList; + double dNumDistinctRows; + long numDistinctRows; + + /* Identify the grouping semantics */ + groupList = generate_setop_grouplist(op, plan->targetlist); + + /* punt if nothing to group on (can this happen?) */ + if (groupList == NIL) + { + *sortClauses = NIL; + return plan; + } + + /* + * XXX for the moment, take the number of distinct groups as being the + * total input size, ie, the worst case. This is too conservative, but + * we don't want to risk having the hashtable overrun memory; also, + * it's not clear how to get a decent estimate of the true size. One + * should note as well the propensity of novices to write UNION rather + * than UNION ALL even when they don't expect any duplicates... + */ + dNumDistinctRows = plan->plan_rows; + + /* Also convert to long int --- but 'ware overflow! */ + numDistinctRows = (long) Min(dNumDistinctRows, (double) LONG_MAX); + + /* Decide whether to hash or sort */ + if (choose_hashed_setop(root, groupList, plan, + tuple_fraction, dNumDistinctRows, + "UNION")) + { + /* Hashed aggregate plan --- no sort needed */ + plan = (Plan *) make_agg(root, + plan->targetlist, + NIL, + AGG_HASHED, + list_length(groupList), + extract_grouping_cols(groupList, + plan->targetlist), + extract_grouping_ops(groupList), + numDistinctRows, + 0, + plan); + /* Hashed aggregation produces randomly-ordered results */ + *sortClauses = NIL; + } + else + { + /* Sort and Unique */ + plan = (Plan *) make_sort_from_sortclauses(root, groupList, plan); + plan = (Plan *) make_unique(plan, groupList); + plan->plan_rows = dNumDistinctRows; + /* We know the sort order of the result */ + *sortClauses = groupList; + } + + return plan; +} + +/* + * choose_hashed_setop - should we use hashing for a set operation? + */ +static bool +choose_hashed_setop(PlannerInfo *root, List *groupClauses, + Plan *input_plan, + double tuple_fraction, double dNumDistinctRows, + const char *construct) +{ + int numDistinctCols = list_length(groupClauses); + bool can_sort; + bool can_hash; + Size hashentrysize; + List *needed_pathkeys; + Path hashed_p; + Path sorted_p; + + /* Check whether the operators support sorting or hashing */ + can_sort = grouping_is_sortable(groupClauses); + can_hash = grouping_is_hashable(groupClauses); + if (can_hash && can_sort) + { + /* we have a meaningful choice to make, continue ... */ + } + else if (can_hash) + return true; + else if (can_sort) + return false; + else + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + /* translator: %s is UNION, INTERSECT, or EXCEPT */ + errmsg("could not implement %s", construct), + errdetail("Some of the datatypes only support hashing, while others only support sorting."))); + + /* Prefer sorting when enable_hashagg is off */ + if (!enable_hashagg) + return false; + + /* + * Don't do it if it doesn't look like the hashtable will fit into + * work_mem. + */ + hashentrysize = MAXALIGN(input_plan->plan_width) + MAXALIGN(sizeof(MinimalTupleData)); + + if (hashentrysize * dNumDistinctRows > work_mem * 1024L) + return false; + + /* + * See if the estimated cost is no more than doing it the other way. + * + * We need to consider input_plan + hashagg versus input_plan + sort + + * group. Note that the actual result plan might involve a SetOp or + * Unique node, not Agg or Group, but the cost estimates for Agg and Group + * should be close enough for our purposes here. + * + * These path variables are dummies that just hold cost fields; we don't + * make actual Paths for these steps. + */ + cost_agg(&hashed_p, root, AGG_HASHED, 0, + numDistinctCols, dNumDistinctRows, + input_plan->startup_cost, input_plan->total_cost, + input_plan->plan_rows); + + /* + * Now for the sorted case. Note that the input is *always* unsorted, + * since it was made by appending unrelated sub-relations together. + */ + sorted_p.startup_cost = input_plan->startup_cost; + sorted_p.total_cost = input_plan->total_cost; + /* XXX this is more expensive than cost_sort really needs: */ + needed_pathkeys = make_pathkeys_for_sortclauses(root, + groupClauses, + input_plan->targetlist, + true); + cost_sort(&sorted_p, root, needed_pathkeys, sorted_p.total_cost, + input_plan->plan_rows, input_plan->plan_width, -1.0); + cost_group(&sorted_p, root, numDistinctCols, dNumDistinctRows, + sorted_p.startup_cost, sorted_p.total_cost, + input_plan->plan_rows); + + /* + * Now make the decision using the top-level tuple fraction. First we + * have to convert an absolute count (LIMIT) into fractional form. + */ + if (tuple_fraction >= 1.0) + tuple_fraction /= dNumDistinctRows; + + if (compare_fractional_path_costs(&hashed_p, &sorted_p, + tuple_fraction) < 0) + { + /* Hashed is cheaper, so use it */ + return true; + } + return false; +} + /* * Generate targetlist for a set-operation plan node * @@ -677,30 +848,47 @@ generate_append_tlist(List *colTypes, bool flag, } /* - * generate_setop_sortlist - * Build a SortGroupClause list enumerating all the non-resjunk - * tlist entries, using default ordering properties. + * generate_setop_grouplist + * Build a SortGroupClause list defining the sort/grouping properties + * of the setop's output columns. * - * For now, we require all the items to be sortable. Eventually we - * should implement hashing setops and allow hash-only datatypes. + * Parse analysis already determined the properties and built a suitable + * list, except that the entries do not have sortgrouprefs set because + * the parser output representation doesn't include a tlist for each + * setop. So what we need to do here is copy that list and install + * proper sortgrouprefs into it and into the targetlist. */ static List * -generate_setop_sortlist(List *targetlist) +generate_setop_grouplist(SetOperationStmt *op, List *targetlist) { - List *sortlist = NIL; - ListCell *l; + List *grouplist = (List *) copyObject(op->groupClauses); + ListCell *lg; + ListCell *lt; + Index refno = 1; - foreach(l, targetlist) + lg = list_head(grouplist); + foreach(lt, targetlist) { - TargetEntry *tle = (TargetEntry *) lfirst(l); + TargetEntry *tle = (TargetEntry *) lfirst(lt); + SortGroupClause *sgc; - if (!tle->resjunk) - sortlist = addTargetToGroupList(NULL, tle, - sortlist, targetlist, - true, /* XXX fixme someday */ - false); + /* tlist shouldn't have any sortgrouprefs yet */ + Assert(tle->ressortgroupref == 0); + + if (tle->resjunk) + continue; /* ignore resjunk columns */ + + /* non-resjunk columns should have grouping clauses */ + Assert(lg != NULL); + sgc = (SortGroupClause *) lfirst(lg); + lg = lnext(lg); + Assert(sgc->tleSortGroupRef == 0); + + /* we could use assignSortGroupRef here, but seems a bit silly */ + sgc->tleSortGroupRef = tle->ressortgroupref = refno++; } - return sortlist; + Assert(lg == NULL); + return grouplist; } diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index 04ef2224c9..858d4abcbd 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.260 2008/08/02 21:32:00 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.261 2008/08/07 01:11:50 tgl Exp $ * * HISTORY * AUTHOR DATE MAJOR EVENT @@ -3933,6 +3933,8 @@ expression_tree_walker(Node *node, return true; if (walker(setop->rarg, context)) return true; + + /* groupClauses are deemed uninteresting */ } break; case T_InClauseInfo: @@ -4535,6 +4537,7 @@ expression_tree_mutator(Node *node, FLATCOPY(newnode, setop, SetOperationStmt); MUTATE(newnode->larg, setop->larg, Node *); MUTATE(newnode->rarg, setop->rarg, Node *); + /* We do not mutate groupClauses by default */ return (Node *) newnode; } break; diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index ad3c10ac27..655443e9ef 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.144 2008/08/02 21:32:00 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.145 2008/08/07 01:11:50 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -24,7 +24,6 @@ #include "optimizer/paths.h" #include "optimizer/tlist.h" #include "parser/parse_expr.h" -#include "parser/parse_oper.h" #include "parser/parsetree.h" #include "utils/selfuncs.h" #include "utils/lsyscache.h" @@ -1003,12 +1002,6 @@ query_is_distinct_for(Query *query, List *colnos, List *opids) /* * UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row, * except with ALL. - * - * XXX this code knows that prepunion.c will adopt the default sort/group - * operators for each column datatype to determine uniqueness. It'd - * probably be better if these operators were chosen at parse time and - * stored into the parsetree, instead of leaving bits of the planner to - * decide semantics. */ if (query->setOperations) { @@ -1019,24 +1012,26 @@ query_is_distinct_for(Query *query, List *colnos, List *opids) if (!topop->all) { + ListCell *lg; + /* We're good if all the nonjunk output columns are in colnos */ + lg = list_head(topop->groupClauses); foreach(l, query->targetList) { TargetEntry *tle = (TargetEntry *) lfirst(l); - Oid tle_eq_opr; + SortGroupClause *sgc; if (tle->resjunk) continue; /* ignore resjunk columns */ + /* non-resjunk columns should have grouping clauses */ + Assert(lg != NULL); + sgc = (SortGroupClause *) lfirst(lg); + lg = lnext(lg); + opid = distinct_col_search(tle->resno, colnos, opids); - if (!OidIsValid(opid)) - break; /* exit early if no match */ - /* check for compatible semantics */ - get_sort_group_operators(exprType((Node *) tle->expr), - false, false, false, - NULL, &tle_eq_opr, NULL); - if (!OidIsValid(tle_eq_opr) || - !equality_ops_are_compatible(opid, tle_eq_opr)) + if (!OidIsValid(opid) || + !equality_ops_are_compatible(opid, sgc->eqop)) break; /* exit early if no match */ } if (l == NULL) /* had matches for all? */ diff --git a/src/backend/optimizer/util/tlist.c b/src/backend/optimizer/util/tlist.c index fc0880ad56..a2c627fd4d 100644 --- a/src/backend/optimizer/util/tlist.c +++ b/src/backend/optimizer/util/tlist.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/util/tlist.c,v 1.79 2008/08/02 21:32:00 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/util/tlist.c,v 1.80 2008/08/07 01:11:50 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -18,6 +18,7 @@ #include "optimizer/tlist.h" #include "optimizer/var.h" #include "parser/parse_expr.h" +#include "utils/lsyscache.h" /***************************************************************************** @@ -202,6 +203,109 @@ get_sortgrouplist_exprs(List *sgClauses, List *targetList) } +/***************************************************************************** + * Functions to extract data from a list of SortGroupClauses + * + * These don't really belong in tlist.c, but they are sort of related to the + * functions just above, and they don't seem to deserve their own file. + *****************************************************************************/ + +/* + * extract_grouping_ops - make an array of the equality operator OIDs + * for a SortGroupClause list + */ +Oid * +extract_grouping_ops(List *groupClause) +{ + int numCols = list_length(groupClause); + int colno = 0; + Oid *groupOperators; + ListCell *glitem; + + groupOperators = (Oid *) palloc(sizeof(Oid) * numCols); + + foreach(glitem, groupClause) + { + SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem); + + groupOperators[colno] = groupcl->eqop; + Assert(OidIsValid(groupOperators[colno])); + colno++; + } + + return groupOperators; +} + +/* + * extract_grouping_cols - make an array of the grouping column resnos + * for a SortGroupClause list + */ +AttrNumber * +extract_grouping_cols(List *groupClause, List *tlist) +{ + AttrNumber *grpColIdx; + int numCols = list_length(groupClause); + int colno = 0; + ListCell *glitem; + + grpColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols); + + foreach(glitem, groupClause) + { + SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem); + TargetEntry *tle = get_sortgroupclause_tle(groupcl, tlist); + + grpColIdx[colno++] = tle->resno; + } + + return grpColIdx; +} + +/* + * grouping_is_sortable - is it possible to implement grouping list by sorting? + * + * This is easy since the parser will have included a sortop if one exists. + */ +bool +grouping_is_sortable(List *groupClause) +{ + ListCell *glitem; + + foreach(glitem, groupClause) + { + SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem); + + if (!OidIsValid(groupcl->sortop)) + return false; + } + return true; +} + +/* + * grouping_is_hashable - is it possible to implement grouping list by hashing? + * + * We assume hashing is OK if the equality operators are marked oprcanhash. + * (If there isn't actually a supporting hash function, the executor will + * complain at runtime; but this is a misdeclaration of the operator, not + * a system bug.) + */ +bool +grouping_is_hashable(List *groupClause) +{ + ListCell *glitem; + + foreach(glitem, groupClause) + { + SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem); + + if (!op_hashjoinable(groupcl->eqop)) + return false; + } + return true; +} + + + /* * Does tlist have same output datatypes as listed in colTypes? * diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c index 2cc3f28d20..3de232ba71 100644 --- a/src/backend/parser/analyze.c +++ b/src/backend/parser/analyze.c @@ -17,7 +17,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/parser/analyze.c,v 1.375 2008/08/02 21:32:00 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/parser/analyze.c,v 1.376 2008/08/07 01:11:51 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -33,6 +33,7 @@ #include "parser/parse_clause.h" #include "parser/parse_coerce.h" #include "parser/parse_expr.h" +#include "parser/parse_oper.h" #include "parser/parse_relation.h" #include "parser/parse_target.h" #include "parser/parsetree.h" @@ -1326,6 +1327,7 @@ transformSetOperationTree(ParseState *pstate, SelectStmt *stmt) op->colTypes = NIL; op->colTypmods = NIL; + op->groupClauses = NIL; /* don't have a "foreach4", so chase two of the lists by hand */ lcm = list_head(lcoltypmods); rcm = list_head(rcoltypmods); @@ -1349,6 +1351,31 @@ transformSetOperationTree(ParseState *pstate, SelectStmt *stmt) op->colTypes = lappend_oid(op->colTypes, rescoltype); op->colTypmods = lappend_int(op->colTypmods, rescoltypmod); + /* + * For all cases except UNION ALL, identify the grouping operators + * (and, if available, sorting operators) that will be used to + * eliminate duplicates. + */ + if (op->op != SETOP_UNION || !op->all) + { + SortGroupClause *grpcl = makeNode(SortGroupClause); + Oid sortop; + Oid eqop; + + /* determine the eqop and optional sortop */ + get_sort_group_operators(rescoltype, + false, true, false, + &sortop, &eqop, NULL); + + /* we don't have a tlist yet, so can't assign sortgrouprefs */ + grpcl->tleSortGroupRef = 0; + grpcl->eqop = eqop; + grpcl->sortop = sortop; + grpcl->nulls_first = false; /* OK with or without sortop */ + + op->groupClauses = lappend(op->groupClauses, grpcl); + } + lcm = lnext(lcm); rcm = lnext(rcm); } diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c index 2b04ee5e33..79b45414d4 100644 --- a/src/backend/parser/parse_clause.c +++ b/src/backend/parser/parse_clause.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.174 2008/08/05 02:43:17 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.175 2008/08/07 01:11:51 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -70,6 +70,9 @@ static List *addTargetToSortList(ParseState *pstate, TargetEntry *tle, List *sortlist, List *targetlist, SortByDir sortby_dir, SortByNulls sortby_nulls, List *sortby_opname, bool resolveUnknown); +static List *addTargetToGroupList(ParseState *pstate, TargetEntry *tle, + List *grouplist, List *targetlist, + bool resolveUnknown); /* @@ -1355,7 +1358,7 @@ transformGroupClause(ParseState *pstate, List *grouplist, if (!found) result = addTargetToGroupList(pstate, tle, result, *targetlist, - false, true); + true); } return result; @@ -1456,7 +1459,7 @@ transformDistinctClause(ParseState *pstate, continue; /* ignore junk */ result = addTargetToGroupList(pstate, tle, result, *targetlist, - false, true); + true); } return result; @@ -1551,7 +1554,7 @@ transformDistinctOnClause(ParseState *pstate, List *distinctlist, errmsg("SELECT DISTINCT ON expressions must match initial ORDER BY expressions"))); result = addTargetToGroupList(pstate, tle, result, *targetlist, - false, true); + true); } return result; @@ -1679,10 +1682,6 @@ addTargetToSortList(ParseState *pstate, TargetEntry *tle, * the TLE is considered "already in the list" if it appears there with any * sorting semantics. * - * If requireSortOp is TRUE, we require a sorting operator to be found too. - * XXX this argument should eventually be obsolete, but for now there are - * parts of the system that can't support non-sortable grouping lists. - * * If resolveUnknown is TRUE, convert TLEs of type UNKNOWN to TEXT. If not, * do nothing (which implies the search for an equality operator will fail). * pstate should be provided if resolveUnknown is TRUE, but can be NULL @@ -1690,10 +1689,10 @@ addTargetToSortList(ParseState *pstate, TargetEntry *tle, * * Returns the updated SortGroupClause list. */ -List * +static List * addTargetToGroupList(ParseState *pstate, TargetEntry *tle, List *grouplist, List *targetlist, - bool requireSortOp, bool resolveUnknown) + bool resolveUnknown) { Oid restype = exprType((Node *) tle->expr); Oid sortop; @@ -1716,7 +1715,7 @@ addTargetToGroupList(ParseState *pstate, TargetEntry *tle, /* determine the eqop and optional sortop */ get_sort_group_operators(restype, - requireSortOp, true, false, + false, true, false, &sortop, &eqop, NULL); grpcl->tleSortGroupRef = assignSortGroupRef(tle, targetlist); diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 6be25b05f9..b844577c88 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -37,7 +37,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.475 2008/08/05 12:09:30 mha Exp $ + * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.476 2008/08/07 01:11:51 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 200808051 +#define CATALOG_VERSION_NO 200808061 #endif diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index f1a1e828fc..65c3698a84 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/nodes/parsenodes.h,v 1.370 2008/08/02 21:32:00 tgl Exp $ + * $PostgreSQL: pgsql/src/include/nodes/parsenodes.h,v 1.371 2008/08/07 01:11:51 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -797,7 +797,12 @@ typedef struct SelectStmt * top-level Query node containing the leaf SELECTs as subqueries in its * range table. Its setOperations field shows the tree of set operations, * with leaf SelectStmt nodes replaced by RangeTblRef nodes, and internal - * nodes replaced by SetOperationStmt nodes. + * nodes replaced by SetOperationStmt nodes. Information about the output + * column types is added, too. (Note that the child nodes do not necessarily + * produce these types directly, but we've checked that their output types + * can be coerced to the output column type.) Also, if it's not UNION ALL, + * information about the types' sort/group semantics is provided in the form + * of a SortGroupClause list (same representation as, eg, DISTINCT). * ---------------------- */ typedef struct SetOperationStmt @@ -812,6 +817,8 @@ typedef struct SetOperationStmt /* Fields derived during parse analysis: */ List *colTypes; /* OID list of output column type OIDs */ List *colTypmods; /* integer list of output column typmods */ + List *groupClauses; /* a list of SortGroupClause's */ + /* groupClauses is NIL if UNION ALL, but must be set otherwise */ } SetOperationStmt; diff --git a/src/include/optimizer/tlist.h b/src/include/optimizer/tlist.h index 8a8966213b..9899b14c82 100644 --- a/src/include/optimizer/tlist.h +++ b/src/include/optimizer/tlist.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/optimizer/tlist.h,v 1.50 2008/08/02 21:32:01 tgl Exp $ + * $PostgreSQL: pgsql/src/include/optimizer/tlist.h,v 1.51 2008/08/07 01:11:52 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -32,6 +32,11 @@ extern Node *get_sortgroupclause_expr(SortGroupClause *sgClause, extern List *get_sortgrouplist_exprs(List *sgClauses, List *targetList); +extern Oid *extract_grouping_ops(List *groupClause); +extern AttrNumber *extract_grouping_cols(List *groupClause, List *tlist); +extern bool grouping_is_sortable(List *groupClause); +extern bool grouping_is_hashable(List *groupClause); + extern bool tlist_same_datatypes(List *tlist, List *colTypes, bool junkOK); #endif /* TLIST_H */ diff --git a/src/include/parser/parse_clause.h b/src/include/parser/parse_clause.h index 357a2947cb..ffea3466b5 100644 --- a/src/include/parser/parse_clause.h +++ b/src/include/parser/parse_clause.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/parser/parse_clause.h,v 1.51 2008/08/02 21:32:01 tgl Exp $ + * $PostgreSQL: pgsql/src/include/parser/parse_clause.h,v 1.52 2008/08/07 01:11:52 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -35,9 +35,6 @@ extern List *transformDistinctClause(ParseState *pstate, extern List *transformDistinctOnClause(ParseState *pstate, List *distinctlist, List **targetlist, List *sortClause); -extern List *addTargetToGroupList(ParseState *pstate, TargetEntry *tle, - List *grouplist, List *targetlist, - bool requireSortOp, bool resolveUnknown); extern Index assignSortGroupRef(TargetEntry *tle, List *tlist); extern bool targetIsInSortList(TargetEntry *tle, Oid sortop, List *sortList); diff --git a/src/test/regress/expected/copyselect.out b/src/test/regress/expected/copyselect.out index c42bad143e..8a42b0e3d8 100644 --- a/src/test/regress/expected/copyselect.out +++ b/src/test/regress/expected/copyselect.out @@ -73,7 +73,7 @@ copy (select * from test1 join test2 using (id)) to stdout; -- -- Test UNION SELECT -- -copy (select t from test1 where id = 1 UNION select * from v_test1) to stdout; +copy (select t from test1 where id = 1 UNION select * from v_test1 ORDER BY 1) to stdout; a v_a v_b @@ -83,7 +83,7 @@ v_e -- -- Test subselect -- -copy (select * from (select t from test1 where id = 1 UNION select * from v_test1) t1) to stdout; +copy (select * from (select t from test1 where id = 1 UNION select * from v_test1 ORDER BY 1) t1) to stdout; a v_a v_b diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out index abcbc9503a..722f9651be 100644 --- a/src/test/regress/expected/union.out +++ b/src/test/regress/expected/union.out @@ -102,7 +102,7 @@ SELECT 1.1 AS three UNION SELECT 2 UNION SELECT 3; 3 (3 rows) -SELECT 1.1::float8 AS two UNION SELECT 2 UNION SELECT 2.0::float8; +SELECT 1.1::float8 AS two UNION SELECT 2 UNION SELECT 2.0::float8 ORDER BY 1; two ----- 1.1 @@ -129,7 +129,8 @@ SELECT 1.1 AS two UNION (SELECT 2 UNION ALL SELECT 2); -- SELECT f1 AS five FROM FLOAT8_TBL UNION -SELECT f1 FROM FLOAT8_TBL; +SELECT f1 FROM FLOAT8_TBL +ORDER BY 1; five ----------------------- -1.2345678901234e+200 @@ -158,7 +159,8 @@ SELECT f1 FROM FLOAT8_TBL; SELECT f1 AS nine FROM FLOAT8_TBL UNION -SELECT f1 FROM INT4_TBL; +SELECT f1 FROM INT4_TBL +ORDER BY 1; nine ----------------------- -1.2345678901234e+200 @@ -205,7 +207,8 @@ SELECT f1 FROM INT4_TBL SELECT CAST(f1 AS char(4)) AS three FROM VARCHAR_TBL UNION -SELECT f1 FROM CHAR_TBL; +SELECT f1 FROM CHAR_TBL +ORDER BY 1; three ------- a @@ -215,7 +218,8 @@ SELECT f1 FROM CHAR_TBL; SELECT f1 AS three FROM VARCHAR_TBL UNION -SELECT CAST(f1 AS varchar) FROM CHAR_TBL; +SELECT CAST(f1 AS varchar) FROM CHAR_TBL +ORDER BY 1; three ------- a @@ -242,7 +246,8 @@ SELECT f1 AS five FROM TEXT_TBL UNION SELECT f1 FROM VARCHAR_TBL UNION -SELECT TRIM(TRAILING FROM f1) FROM CHAR_TBL; +SELECT TRIM(TRAILING FROM f1) FROM CHAR_TBL +ORDER BY 1; five ------------------- a diff --git a/src/test/regress/sql/copyselect.sql b/src/test/regress/sql/copyselect.sql index c2526487c8..beca507ae7 100644 --- a/src/test/regress/sql/copyselect.sql +++ b/src/test/regress/sql/copyselect.sql @@ -53,11 +53,11 @@ copy (select * from test1 join test2 using (id)) to stdout; -- -- Test UNION SELECT -- -copy (select t from test1 where id = 1 UNION select * from v_test1) to stdout; +copy (select t from test1 where id = 1 UNION select * from v_test1 ORDER BY 1) to stdout; -- -- Test subselect -- -copy (select * from (select t from test1 where id = 1 UNION select * from v_test1) t1) to stdout; +copy (select * from (select t from test1 where id = 1 UNION select * from v_test1 ORDER BY 1) t1) to stdout; -- -- Test headers, CSV and quotes -- diff --git a/src/test/regress/sql/union.sql b/src/test/regress/sql/union.sql index 0f846091cd..0b83d6b185 100644 --- a/src/test/regress/sql/union.sql +++ b/src/test/regress/sql/union.sql @@ -34,7 +34,7 @@ SELECT 1.0::float8 AS two UNION ALL SELECT 1; SELECT 1.1 AS three UNION SELECT 2 UNION SELECT 3; -SELECT 1.1::float8 AS two UNION SELECT 2 UNION SELECT 2.0::float8; +SELECT 1.1::float8 AS two UNION SELECT 2 UNION SELECT 2.0::float8 ORDER BY 1; SELECT 1.1 AS three UNION SELECT 2 UNION ALL SELECT 2; @@ -46,7 +46,8 @@ SELECT 1.1 AS two UNION (SELECT 2 UNION ALL SELECT 2); SELECT f1 AS five FROM FLOAT8_TBL UNION -SELECT f1 FROM FLOAT8_TBL; +SELECT f1 FROM FLOAT8_TBL +ORDER BY 1; SELECT f1 AS ten FROM FLOAT8_TBL UNION ALL @@ -54,7 +55,8 @@ SELECT f1 FROM FLOAT8_TBL; SELECT f1 AS nine FROM FLOAT8_TBL UNION -SELECT f1 FROM INT4_TBL; +SELECT f1 FROM INT4_TBL +ORDER BY 1; SELECT f1 AS ten FROM FLOAT8_TBL UNION ALL @@ -68,11 +70,13 @@ SELECT f1 FROM INT4_TBL SELECT CAST(f1 AS char(4)) AS three FROM VARCHAR_TBL UNION -SELECT f1 FROM CHAR_TBL; +SELECT f1 FROM CHAR_TBL +ORDER BY 1; SELECT f1 AS three FROM VARCHAR_TBL UNION -SELECT CAST(f1 AS varchar) FROM CHAR_TBL; +SELECT CAST(f1 AS varchar) FROM CHAR_TBL +ORDER BY 1; SELECT f1 AS eight FROM VARCHAR_TBL UNION ALL @@ -82,7 +86,8 @@ SELECT f1 AS five FROM TEXT_TBL UNION SELECT f1 FROM VARCHAR_TBL UNION -SELECT TRIM(TRAILING FROM f1) FROM CHAR_TBL; +SELECT TRIM(TRAILING FROM f1) FROM CHAR_TBL +ORDER BY 1; -- -- INTERSECT and EXCEPT