diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c index 793071b0f0..d58b9d4a2f 100644 --- a/src/backend/catalog/dependency.c +++ b/src/backend/catalog/dependency.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/catalog/dependency.c,v 1.76 2008/06/14 18:04:33 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/catalog/dependency.c,v 1.77 2008/08/02 21:31:59 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1335,7 +1335,7 @@ find_expr_references_walker(Node *node, } return false; } - if (IsA(node, Const)) + else if (IsA(node, Const)) { Const *con = (Const *) node; Oid objoid; @@ -1408,7 +1408,7 @@ find_expr_references_walker(Node *node, } return false; } - if (IsA(node, Param)) + else if (IsA(node, Param)) { Param *param = (Param *) node; @@ -1416,7 +1416,7 @@ find_expr_references_walker(Node *node, add_object_address(OCLASS_TYPE, param->paramtype, 0, context->addrs); } - if (IsA(node, FuncExpr)) + else if (IsA(node, FuncExpr)) { FuncExpr *funcexpr = (FuncExpr *) node; @@ -1424,7 +1424,7 @@ find_expr_references_walker(Node *node, context->addrs); /* fall through to examine arguments */ } - if (IsA(node, OpExpr)) + else if (IsA(node, OpExpr)) { OpExpr *opexpr = (OpExpr *) node; @@ -1432,7 +1432,7 @@ find_expr_references_walker(Node *node, context->addrs); /* fall through to examine arguments */ } - if (IsA(node, DistinctExpr)) + else if (IsA(node, DistinctExpr)) { DistinctExpr *distinctexpr = (DistinctExpr *) node; @@ -1440,7 +1440,7 @@ find_expr_references_walker(Node *node, context->addrs); /* fall through to examine arguments */ } - if (IsA(node, ScalarArrayOpExpr)) + else if (IsA(node, ScalarArrayOpExpr)) { ScalarArrayOpExpr *opexpr = (ScalarArrayOpExpr *) node; @@ -1448,7 +1448,7 @@ find_expr_references_walker(Node *node, context->addrs); /* fall through to examine arguments */ } - if (IsA(node, NullIfExpr)) + else if (IsA(node, NullIfExpr)) { NullIfExpr *nullifexpr = (NullIfExpr *) node; @@ -1456,7 +1456,7 @@ find_expr_references_walker(Node *node, context->addrs); /* fall through to examine arguments */ } - if (IsA(node, Aggref)) + else if (IsA(node, Aggref)) { Aggref *aggref = (Aggref *) node; @@ -1464,12 +1464,12 @@ find_expr_references_walker(Node *node, context->addrs); /* fall through to examine arguments */ } - if (is_subplan(node)) + else if (is_subplan(node)) { /* Extra work needed here if we ever need this case */ elog(ERROR, "already-planned subqueries not supported"); } - if (IsA(node, RelabelType)) + else if (IsA(node, RelabelType)) { RelabelType *relab = (RelabelType *) node; @@ -1477,7 +1477,7 @@ find_expr_references_walker(Node *node, add_object_address(OCLASS_TYPE, relab->resulttype, 0, context->addrs); } - if (IsA(node, CoerceViaIO)) + else if (IsA(node, CoerceViaIO)) { CoerceViaIO *iocoerce = (CoerceViaIO *) node; @@ -1485,7 +1485,7 @@ find_expr_references_walker(Node *node, add_object_address(OCLASS_TYPE, iocoerce->resulttype, 0, context->addrs); } - if (IsA(node, ArrayCoerceExpr)) + else if (IsA(node, ArrayCoerceExpr)) { ArrayCoerceExpr *acoerce = (ArrayCoerceExpr *) node; @@ -1496,7 +1496,7 @@ find_expr_references_walker(Node *node, context->addrs); /* fall through to examine arguments */ } - if (IsA(node, ConvertRowtypeExpr)) + else if (IsA(node, ConvertRowtypeExpr)) { ConvertRowtypeExpr *cvt = (ConvertRowtypeExpr *) node; @@ -1504,14 +1504,14 @@ find_expr_references_walker(Node *node, add_object_address(OCLASS_TYPE, cvt->resulttype, 0, context->addrs); } - if (IsA(node, RowExpr)) + else if (IsA(node, RowExpr)) { RowExpr *rowexpr = (RowExpr *) node; add_object_address(OCLASS_TYPE, rowexpr->row_typeid, 0, context->addrs); } - if (IsA(node, RowCompareExpr)) + else if (IsA(node, RowCompareExpr)) { RowCompareExpr *rcexpr = (RowCompareExpr *) node; ListCell *l; @@ -1528,14 +1528,25 @@ find_expr_references_walker(Node *node, } /* fall through to examine arguments */ } - if (IsA(node, CoerceToDomain)) + else if (IsA(node, CoerceToDomain)) { CoerceToDomain *cd = (CoerceToDomain *) node; add_object_address(OCLASS_TYPE, cd->resulttype, 0, context->addrs); } - if (IsA(node, Query)) + else if (IsA(node, SortGroupClause)) + { + SortGroupClause *sgc = (SortGroupClause *) node; + + add_object_address(OCLASS_OPERATOR, sgc->eqop, 0, + context->addrs); + if (OidIsValid(sgc->sortop)) + add_object_address(OCLASS_OPERATOR, sgc->sortop, 0, + context->addrs); + return false; + } + else if (IsA(node, Query)) { /* Recurse into RTE subquery or not-yet-planned sublink subquery */ Query *query = (Query *) node; @@ -1572,6 +1583,11 @@ find_expr_references_walker(Node *node, } } + /* query_tree_walker ignores ORDER BY etc, but we need those opers */ + find_expr_references_walker((Node *) query->sortClause, context); + find_expr_references_walker((Node *) query->groupClause, context); + find_expr_references_walker((Node *) query->distinctClause, context); + /* Examine substructure of query */ context->rtables = lcons(query->rtable, context->rtables); result = query_tree_walker(query, diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index 4845e6a381..3b8423999a 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.123 2008/07/01 10:33:09 heikki Exp $ + * $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.124 2008/08/02 21:31:59 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1490,10 +1490,8 @@ static bool std_typanalyze(VacAttrStats *stats) { Form_pg_attribute attr = stats->attr; - Operator func_operator; - Oid eqopr = InvalidOid; - Oid eqfunc = InvalidOid; - Oid ltopr = InvalidOid; + Oid ltopr; + Oid eqopr; StdAnalyzeData *mystats; /* If the attstattarget column is negative, use the default value */ @@ -1501,29 +1499,19 @@ std_typanalyze(VacAttrStats *stats) if (attr->attstattarget < 0) attr->attstattarget = default_statistics_target; - /* If column has no "=" operator, we can't do much of anything */ - func_operator = equality_oper(attr->atttypid, true); - if (func_operator != NULL) - { - eqopr = oprid(func_operator); - eqfunc = oprfuncid(func_operator); - ReleaseSysCache(func_operator); - } - if (!OidIsValid(eqfunc)) - return false; + /* Look for default "<" and "=" operators for column's type */ + get_sort_group_operators(attr->atttypid, + false, false, false, + <opr, &eqopr, NULL); - /* Is there a "<" operator with suitable semantics? */ - func_operator = ordering_oper(attr->atttypid, true); - if (func_operator != NULL) - { - ltopr = oprid(func_operator); - ReleaseSysCache(func_operator); - } + /* If column has no "=" operator, we can't do much of anything */ + if (!OidIsValid(eqopr)) + return false; /* Save the operator info for compute_stats routines */ mystats = (StdAnalyzeData *) palloc(sizeof(StdAnalyzeData)); mystats->eqopr = eqopr; - mystats->eqfunc = eqfunc; + mystats->eqfunc = get_opcode(eqopr); mystats->ltopr = ltopr; stats->extra_data = mystats; diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c index 00bd21ba6f..9bcff0f8df 100644 --- a/src/backend/executor/nodeAgg.c +++ b/src/backend/executor/nodeAgg.c @@ -61,7 +61,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/executor/nodeAgg.c,v 1.158 2008/05/12 00:00:49 alvherre Exp $ + * $PostgreSQL: pgsql/src/backend/executor/nodeAgg.c,v 1.159 2008/08/02 21:31:59 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1496,7 +1496,8 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) if (aggref->aggdistinct) { - Oid eq_function; + Oid lt_opr; + Oid eq_opr; /* We don't implement DISTINCT aggs in the HASHED case */ Assert(node->aggstrategy != AGG_HASHED); @@ -1524,9 +1525,11 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) * record it in the Aggref node ... or at latest, do it in the * planner. */ - eq_function = equality_oper_funcid(inputTypes[0]); - fmgr_info(eq_function, &(peraggstate->equalfn)); - peraggstate->sortOperator = ordering_oper_opid(inputTypes[0]); + get_sort_group_operators(inputTypes[0], + true, true, false, + <_opr, &eq_opr, NULL); + fmgr_info(get_opcode(eq_opr), &(peraggstate->equalfn)); + peraggstate->sortOperator = lt_opr; peraggstate->sortstate = NULL; } diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 2e1ce4cb0d..652c726a67 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -15,7 +15,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.395 2008/07/16 01:30:22 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.396 2008/08/02 21:31:59 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1542,24 +1542,13 @@ _copyFkConstraint(FkConstraint *from) return newnode; } -static SortClause * -_copySortClause(SortClause *from) +static SortGroupClause * +_copySortGroupClause(SortGroupClause *from) { - SortClause *newnode = makeNode(SortClause); - - COPY_SCALAR_FIELD(tleSortGroupRef); - COPY_SCALAR_FIELD(sortop); - COPY_SCALAR_FIELD(nulls_first); - - return newnode; -} - -static GroupClause * -_copyGroupClause(GroupClause *from) -{ - GroupClause *newnode = makeNode(GroupClause); + SortGroupClause *newnode = makeNode(SortGroupClause); COPY_SCALAR_FIELD(tleSortGroupRef); + COPY_SCALAR_FIELD(eqop); COPY_SCALAR_FIELD(sortop); COPY_SCALAR_FIELD(nulls_first); @@ -1861,6 +1850,7 @@ _copyQuery(Query *from) COPY_NODE_FIELD(intoClause); COPY_SCALAR_FIELD(hasAggs); COPY_SCALAR_FIELD(hasSubLinks); + COPY_SCALAR_FIELD(hasDistinctOn); COPY_NODE_FIELD(rtable); COPY_NODE_FIELD(jointree); COPY_NODE_FIELD(targetList); @@ -3586,11 +3576,8 @@ copyObject(void *from) case T_RangeTblEntry: retval = _copyRangeTblEntry(from); break; - case T_SortClause: - retval = _copySortClause(from); - break; - case T_GroupClause: - retval = _copyGroupClause(from); + case T_SortGroupClause: + retval = _copySortGroupClause(from); break; case T_RowMarkClause: retval = _copyRowMarkClause(from); diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 41999226b6..4e011947ef 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -18,7 +18,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.324 2008/07/16 01:30:22 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.325 2008/08/02 21:31:59 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -756,6 +756,7 @@ _equalQuery(Query *a, Query *b) COMPARE_NODE_FIELD(intoClause); COMPARE_SCALAR_FIELD(hasAggs); COMPARE_SCALAR_FIELD(hasSubLinks); + COMPARE_SCALAR_FIELD(hasDistinctOn); COMPARE_NODE_FIELD(rtable); COMPARE_NODE_FIELD(jointree); COMPARE_NODE_FIELD(targetList); @@ -1885,9 +1886,10 @@ _equalRangeTblEntry(RangeTblEntry *a, RangeTblEntry *b) } static bool -_equalSortClause(SortClause *a, SortClause *b) +_equalSortGroupClause(SortGroupClause *a, SortGroupClause *b) { COMPARE_SCALAR_FIELD(tleSortGroupRef); + COMPARE_SCALAR_FIELD(eqop); COMPARE_SCALAR_FIELD(sortop); COMPARE_SCALAR_FIELD(nulls_first); @@ -2515,12 +2517,8 @@ equal(void *a, void *b) case T_RangeTblEntry: retval = _equalRangeTblEntry(a, b); break; - case T_SortClause: - retval = _equalSortClause(a, b); - break; - case T_GroupClause: - /* GroupClause is equivalent to SortClause */ - retval = _equalSortClause(a, b); + case T_SortGroupClause: + retval = _equalSortGroupClause(a, b); break; case T_RowMarkClause: retval = _equalRowMarkClause(a, b); diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 9c0726e42e..66630ae661 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.328 2008/07/17 16:02:12 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.329 2008/08/02 21:31:59 tgl Exp $ * * NOTES * Every node type that can appear in stored rules' parsetrees *must* @@ -1732,6 +1732,7 @@ _outQuery(StringInfo str, Query *node) WRITE_NODE_FIELD(intoClause); WRITE_BOOL_FIELD(hasAggs); WRITE_BOOL_FIELD(hasSubLinks); + WRITE_BOOL_FIELD(hasDistinctOn); WRITE_NODE_FIELD(rtable); WRITE_NODE_FIELD(jointree); WRITE_NODE_FIELD(targetList); @@ -1747,21 +1748,12 @@ _outQuery(StringInfo str, Query *node) } static void -_outSortClause(StringInfo str, SortClause *node) +_outSortGroupClause(StringInfo str, SortGroupClause *node) { - WRITE_NODE_TYPE("SORTCLAUSE"); - - WRITE_UINT_FIELD(tleSortGroupRef); - WRITE_OID_FIELD(sortop); - WRITE_BOOL_FIELD(nulls_first); -} - -static void -_outGroupClause(StringInfo str, GroupClause *node) -{ - WRITE_NODE_TYPE("GROUPCLAUSE"); + WRITE_NODE_TYPE("SORTGROUPCLAUSE"); WRITE_UINT_FIELD(tleSortGroupRef); + WRITE_OID_FIELD(eqop); WRITE_OID_FIELD(sortop); WRITE_BOOL_FIELD(nulls_first); } @@ -2398,11 +2390,8 @@ _outNode(StringInfo str, void *obj) case T_Query: _outQuery(str, obj); break; - case T_SortClause: - _outSortClause(str, obj); - break; - case T_GroupClause: - _outGroupClause(str, obj); + case T_SortGroupClause: + _outSortGroupClause(str, obj); break; case T_RowMarkClause: _outRowMarkClause(str, obj); diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index de1f1b5348..e7de4898c0 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/readfuncs.c,v 1.210 2008/01/01 19:45:50 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/readfuncs.c,v 1.211 2008/08/02 21:31:59 tgl Exp $ * * NOTES * Path and Plan nodes do not have any readfuncs support, because we @@ -142,6 +142,7 @@ _readQuery(void) READ_NODE_FIELD(intoClause); READ_BOOL_FIELD(hasAggs); READ_BOOL_FIELD(hasSubLinks); + READ_BOOL_FIELD(hasDistinctOn); READ_NODE_FIELD(rtable); READ_NODE_FIELD(jointree); READ_NODE_FIELD(targetList); @@ -187,29 +188,15 @@ _readDeclareCursorStmt(void) } /* - * _readSortClause + * _readSortGroupClause */ -static SortClause * -_readSortClause(void) +static SortGroupClause * +_readSortGroupClause(void) { - READ_LOCALS(SortClause); - - READ_UINT_FIELD(tleSortGroupRef); - READ_OID_FIELD(sortop); - READ_BOOL_FIELD(nulls_first); - - READ_DONE(); -} - -/* - * _readGroupClause - */ -static GroupClause * -_readGroupClause(void) -{ - READ_LOCALS(GroupClause); + READ_LOCALS(SortGroupClause); READ_UINT_FIELD(tleSortGroupRef); + READ_OID_FIELD(eqop); READ_OID_FIELD(sortop); READ_BOOL_FIELD(nulls_first); @@ -1030,10 +1017,8 @@ parseNodeString(void) if (MATCH("QUERY", 5)) return_value = _readQuery(); - else if (MATCH("SORTCLAUSE", 10)) - return_value = _readSortClause(); - else if (MATCH("GROUPCLAUSE", 11)) - return_value = _readGroupClause(); + else if (MATCH("SORTGROUPCLAUSE", 15)) + return_value = _readSortGroupClause(); else if (MATCH("ROWMARKCLAUSE", 13)) return_value = _readRowMarkClause(); else if (MATCH("SETOPERATIONSTMT", 16)) diff --git a/src/backend/optimizer/README b/src/backend/optimizer/README index 9a9aba2a41..ba18db0dd9 100644 --- a/src/backend/optimizer/README +++ b/src/backend/optimizer/README @@ -1,4 +1,4 @@ -$PostgreSQL: pgsql/src/backend/optimizer/README,v 1.46 2008/04/09 01:00:46 momjian Exp $ +$PostgreSQL: pgsql/src/backend/optimizer/README,v 1.47 2008/08/02 21:31:59 tgl Exp $ Optimizer ========= @@ -563,8 +563,8 @@ competing Paths are equivalently sorted. Pathkeys are also useful to represent an ordering that we wish to achieve, since they are easily compared to the pathkeys of a potential candidate -path. So, SortClause lists are turned into pathkeys lists for use inside -the optimizer. +path. So, SortGroupClause lists are turned into pathkeys lists for use +inside the optimizer. Because we have to generate pathkeys lists from the sort clauses before we've finished EquivalenceClass merging, we cannot use the pointer-equality diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index c833586813..b13371e00e 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.171 2008/06/27 03:56:55 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.172 2008/08/02 21:31:59 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -963,11 +963,12 @@ compare_tlist_datatypes(List *tlist, List *colTypes, * * 4. If the subquery uses DISTINCT ON, we must not push down any quals that * refer to non-DISTINCT output columns, because that could change the set - * of rows returned. This condition is vacuous for DISTINCT, because then - * there are no non-DISTINCT output columns, but unfortunately it's fairly - * expensive to tell the difference between DISTINCT and DISTINCT ON in the - * parsetree representation. It's cheaper to just make sure all the Vars - * in the qual refer to DISTINCT columns. + * of rows returned. (This condition is vacuous for DISTINCT, because then + * there are no non-DISTINCT output columns, so we needn't check. But note + * we are assuming that the qual can't distinguish values that the DISTINCT + * operator sees as equal. This is a bit shaky but we have no way to test + * for the case, and it's unlikely enough that we shouldn't refuse the + * optimization just because it could theoretically happen.) * * 5. We must not push down any quals that refer to subselect outputs that * return sets, else we'd introduce functions-returning-sets into the @@ -1030,8 +1031,8 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual, Assert(tle != NULL); Assert(!tle->resjunk); - /* If subquery uses DISTINCT or DISTINCT ON, check point 4 */ - if (subquery->distinctClause != NIL && + /* If subquery uses DISTINCT ON, check point 4 */ + if (subquery->hasDistinctOn && !targetIsInSortList(tle, InvalidOid, subquery->distinctClause)) { /* non-DISTINCT column, so fail */ diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index b289ea6e65..c4459e6e3e 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -10,7 +10,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/equivclass.c,v 1.10 2008/03/31 16:59:26 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/equivclass.c,v 1.11 2008/08/02 21:31:59 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -355,7 +355,7 @@ add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids, * class it is a member of; if none, build a new single-member * EquivalenceClass for it. * - * sortref is the SortGroupRef of the originating SortClause, if any, + * sortref is the SortGroupRef of the originating SortGroupClause, if any, * or zero if not. * * This can be used safely both before and after EquivalenceClass merging; diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index f0141b2366..ccb23834de 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -11,7 +11,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/pathkeys.c,v 1.93 2008/01/09 20:42:28 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/pathkeys.c,v 1.94 2008/08/02 21:31:59 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -227,8 +227,8 @@ canonicalize_pathkeys(PlannerInfo *root, List *pathkeys) * a PathKey. If canonicalize = true, the result is a "canonical" * PathKey, otherwise not. (But note it might be redundant anyway.) * - * If the PathKey is being generated from a SortClause, sortref should be - * the SortClause's SortGroupRef; otherwise zero. + * If the PathKey is being generated from a SortGroupClause, sortref should be + * the SortGroupClause's SortGroupRef; otherwise zero. * * canonicalize should always be TRUE after EquivalenceClass merging has * been performed, but FALSE if we haven't done EquivalenceClass merging yet. @@ -823,7 +823,7 @@ build_join_pathkeys(PlannerInfo *root, /* * make_pathkeys_for_sortclauses * Generate a pathkeys list that represents the sort order specified - * by a list of SortClauses (GroupClauses will work too!) + * by a list of SortGroupClauses * * If canonicalize is TRUE, the resulting PathKeys are all in canonical form; * otherwise not. canonicalize should always be TRUE after EquivalenceClass @@ -832,7 +832,7 @@ build_join_pathkeys(PlannerInfo *root, * be able to represent requested pathkeys before the equivalence classes have * been created for the query.) * - * 'sortclauses' is a list of SortClause or GroupClause nodes + * 'sortclauses' is a list of SortGroupClause nodes * 'tlist' is the targetlist to find the referenced tlist entries in */ List * @@ -846,11 +846,12 @@ make_pathkeys_for_sortclauses(PlannerInfo *root, foreach(l, sortclauses) { - SortClause *sortcl = (SortClause *) lfirst(l); + SortGroupClause *sortcl = (SortGroupClause *) lfirst(l); Expr *sortkey; PathKey *pathkey; sortkey = (Expr *) get_sortgroupclause_expr(sortcl, tlist); + Assert(OidIsValid(sortcl->sortop)); pathkey = make_pathkey_from_sortinfo(root, sortkey, sortcl->sortop, diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 0884cda607..80b6ed2edf 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -10,7 +10,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.241 2008/06/27 03:56:55 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.242 2008/08/02 21:32:00 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -760,7 +760,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path) Oid in_oper = lfirst_oid(l); Oid sortop; TargetEntry *tle; - SortClause *sortcl; + SortGroupClause *sortcl; sortop = get_ordering_op_for_equality_op(in_oper, false); if (!OidIsValid(sortop)) /* shouldn't happen */ @@ -769,9 +769,10 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path) tle = get_tle_by_resno(subplan->targetlist, groupColIdx[groupColPos]); Assert(tle != NULL); - sortcl = makeNode(SortClause); + sortcl = makeNode(SortGroupClause); sortcl->tleSortGroupRef = assignSortGroupRef(tle, subplan->targetlist); + sortcl->eqop = in_oper; sortcl->sortop = sortop; sortcl->nulls_first = false; sortList = lappend(sortList, sortcl); @@ -2531,6 +2532,8 @@ add_sort_column(AttrNumber colIdx, Oid sortOp, bool nulls_first, { int i; + Assert(OidIsValid(sortOp)); + for (i = 0; i < numCols; i++) { /* @@ -2753,7 +2756,7 @@ make_sort_from_pathkeys(PlannerInfo *root, Plan *lefttree, List *pathkeys, * make_sort_from_sortclauses * Create sort plan to sort according to given sortclauses * - * 'sortcls' is a list of SortClauses + * 'sortcls' is a list of SortGroupClauses * 'lefttree' is the node which yields input tuples */ Sort * @@ -2778,7 +2781,7 @@ make_sort_from_sortclauses(PlannerInfo *root, List *sortcls, Plan *lefttree) foreach(l, sortcls) { - SortClause *sortcl = (SortClause *) lfirst(l); + SortGroupClause *sortcl = (SortGroupClause *) lfirst(l); TargetEntry *tle = get_sortgroupclause_tle(sortcl, sub_tlist); /* @@ -2802,14 +2805,14 @@ make_sort_from_sortclauses(PlannerInfo *root, List *sortcls, Plan *lefttree) * make_sort_from_groupcols * Create sort plan to sort based on grouping columns * - * 'groupcls' is the list of GroupClauses + * 'groupcls' is the list of SortGroupClauses * 'grpColIdx' gives the column numbers to use * * This might look like it could be merged with make_sort_from_sortclauses, * but presently we *must* use the grpColIdx[] array to locate sort columns, * because the child plan's tlist is not marked with ressortgroupref info * appropriate to the grouping node. So, only the sort ordering info - * is used from the GroupClause entries. + * is used from the SortGroupClause entries. */ Sort * make_sort_from_groupcols(PlannerInfo *root, @@ -2837,7 +2840,7 @@ make_sort_from_groupcols(PlannerInfo *root, foreach(l, groupcls) { - GroupClause *grpcl = (GroupClause *) lfirst(l); + SortGroupClause *grpcl = (SortGroupClause *) lfirst(l); TargetEntry *tle = get_tle_by_resno(sub_tlist, grpColIdx[grpno]); /* @@ -3038,7 +3041,7 @@ make_group(PlannerInfo *root, } /* - * distinctList is a list of SortClauses, identifying the targetlist items + * distinctList is a list of SortGroupClauses, identifying the targetlist items * that should be considered by the Unique filter. The input path must * already be sorted accordingly. */ @@ -3074,7 +3077,7 @@ make_unique(Plan *lefttree, List *distinctList) plan->righttree = NULL; /* - * convert SortClause list into arrays of attr indexes and equality + * convert SortGroupClause list into arrays of attr indexes and equality * operators, as wanted by executor */ Assert(numCols > 0); @@ -3083,14 +3086,12 @@ make_unique(Plan *lefttree, List *distinctList) foreach(slitem, distinctList) { - SortClause *sortcl = (SortClause *) lfirst(slitem); + SortGroupClause *sortcl = (SortGroupClause *) lfirst(slitem); TargetEntry *tle = get_sortgroupclause_tle(sortcl, plan->targetlist); uniqColIdx[keyno] = tle->resno; - uniqOperators[keyno] = get_equality_op_for_ordering_op(sortcl->sortop); - if (!OidIsValid(uniqOperators[keyno])) /* shouldn't happen */ - elog(ERROR, "could not find equality operator for ordering operator %u", - sortcl->sortop); + uniqOperators[keyno] = sortcl->eqop; + Assert(OidIsValid(uniqOperators[keyno])); keyno++; } @@ -3102,8 +3103,8 @@ make_unique(Plan *lefttree, List *distinctList) } /* - * distinctList is a list of SortClauses, identifying the targetlist items - * that should be considered by the SetOp filter. The input path must + * distinctList is a list of SortGroupClauses, identifying the targetlist + * items that should be considered by the SetOp filter. The input path must * already be sorted accordingly. */ SetOp * @@ -3140,7 +3141,7 @@ make_setop(SetOpCmd cmd, Plan *lefttree, plan->righttree = NULL; /* - * convert SortClause list into arrays of attr indexes and equality + * convert SortGroupClause list into arrays of attr indexes and equality * operators, as wanted by executor */ Assert(numCols > 0); @@ -3149,14 +3150,12 @@ make_setop(SetOpCmd cmd, Plan *lefttree, foreach(slitem, distinctList) { - SortClause *sortcl = (SortClause *) lfirst(slitem); + SortGroupClause *sortcl = (SortGroupClause *) lfirst(slitem); TargetEntry *tle = get_sortgroupclause_tle(sortcl, plan->targetlist); dupColIdx[keyno] = tle->resno; - dupOperators[keyno] = get_equality_op_for_ordering_op(sortcl->sortop); - if (!OidIsValid(dupOperators[keyno])) /* shouldn't happen */ - elog(ERROR, "could not find equality operator for ordering operator %u", - sortcl->sortop); + dupOperators[keyno] = sortcl->eqop; + Assert(OidIsValid(dupOperators[keyno])); keyno++; } diff --git a/src/backend/optimizer/plan/planagg.c b/src/backend/optimizer/plan/planagg.c index af140d4acb..a6b4b1df66 100644 --- a/src/backend/optimizer/plan/planagg.c +++ b/src/backend/optimizer/plan/planagg.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/planagg.c,v 1.41 2008/07/10 02:14:03 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/planagg.c,v 1.42 2008/08/02 21:32:00 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -477,7 +477,7 @@ make_agg_subplan(PlannerInfo *root, MinMaxAggInfo *info) Plan *plan; Plan *iplan; TargetEntry *tle; - SortClause *sortcl; + SortGroupClause *sortcl; /* * Generate a suitably modified query. Much of the work here is probably @@ -492,6 +492,7 @@ make_agg_subplan(PlannerInfo *root, MinMaxAggInfo *info) subparse->utilityStmt = NULL; subparse->intoClause = NULL; subparse->hasAggs = false; + subparse->hasDistinctOn = false; subparse->groupClause = NIL; subparse->havingQual = NULL; subparse->distinctClause = NIL; @@ -505,8 +506,12 @@ make_agg_subplan(PlannerInfo *root, MinMaxAggInfo *info) subparse->targetList = list_make1(tle); /* set up the appropriate ORDER BY entry */ - sortcl = makeNode(SortClause); + sortcl = makeNode(SortGroupClause); sortcl->tleSortGroupRef = assignSortGroupRef(tle, subparse->targetList); + sortcl->eqop = get_equality_op_for_ordering_op(info->aggsortop, NULL); + if (!OidIsValid(sortcl->eqop)) /* shouldn't happen */ + elog(ERROR, "could not find equality operator for ordering operator %u", + info->aggsortop); sortcl->sortop = info->aggsortop; sortcl->nulls_first = info->nulls_first; subparse->sortClause = list_make1(sortcl); diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 6ebbc5b7bc..735a77ac4e 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.235 2008/07/31 22:47:56 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.236 2008/08/02 21:32:00 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -67,6 +67,7 @@ static bool is_dummy_plan(Plan *plan); static double preprocess_limit(PlannerInfo *root, double tuple_fraction, int64 *offset_est, int64 *count_est); +static void preprocess_groupclause(PlannerInfo *root); static Oid *extract_grouping_ops(List *groupClause); static bool choose_hashed_grouping(PlannerInfo *root, double tuple_fraction, double limit_tuples, @@ -846,11 +847,16 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) Path *best_path; long numGroups = 0; AggClauseCounts agg_counts; - int numGroupCols = list_length(parse->groupClause); + int numGroupCols; bool use_hashed_grouping = false; MemSet(&agg_counts, 0, sizeof(AggClauseCounts)); + /* Preprocess GROUP BY clause, if any */ + if (parse->groupClause) + preprocess_groupclause(root); + numGroupCols = list_length(parse->groupClause); + /* Preprocess targetlist */ tlist = preprocess_targetlist(root, tlist); @@ -1476,6 +1482,88 @@ preprocess_limit(PlannerInfo *root, double tuple_fraction, return tuple_fraction; } + +/* + * preprocess_groupclause - do preparatory work on GROUP BY clause + * + * The idea here is to adjust the ordering of the GROUP BY elements + * (which in itself is semantically insignificant) to match ORDER BY, + * thereby allowing a single sort operation to both implement the ORDER BY + * requirement and set up for a Unique step that implements GROUP BY. + * + * In principle it might be interesting to consider other orderings of the + * GROUP BY elements, which could match the sort ordering of other + * possible plans (eg an indexscan) and thereby reduce cost. We don't + * bother with that, though. Hashed grouping will frequently win anyway. + */ +static void +preprocess_groupclause(PlannerInfo *root) +{ + Query *parse = root->parse; + List *new_groupclause; + bool partial_match; + ListCell *sl; + ListCell *gl; + + /* If no ORDER BY, nothing useful to do here anyway */ + if (parse->sortClause == NIL) + return; + + /* + * Scan the ORDER BY clause and construct a list of matching GROUP BY + * items, but only as far as we can make a matching prefix. + * + * This code assumes that the sortClause contains no duplicate items. + */ + new_groupclause = NIL; + foreach(sl, parse->sortClause) + { + SortGroupClause *sc = (SortGroupClause *) lfirst(sl); + + foreach(gl, parse->groupClause) + { + SortGroupClause *gc = (SortGroupClause *) lfirst(gl); + + if (equal(gc, sc)) + { + new_groupclause = lappend(new_groupclause, gc); + break; + } + } + if (gl == NULL) + break; /* no match, so stop scanning */ + } + + /* Did we match all of the ORDER BY list, or just some of it? */ + partial_match = (sl != NULL); + + /* If no match at all, no point in reordering GROUP BY */ + if (new_groupclause == NIL) + return; + + /* + * Add any remaining GROUP BY items to the new list, but only if we + * were able to make a complete match. In other words, we only + * rearrange the GROUP BY list if the result is that one list is a + * prefix of the other --- otherwise there's no possibility of a + * common sort. + */ + foreach(gl, parse->groupClause) + { + SortGroupClause *gc = (SortGroupClause *) lfirst(gl); + + if (list_member_ptr(new_groupclause, gc)) + continue; /* it matched an ORDER BY item */ + if (partial_match) + return; /* give up, no common sort possible */ + new_groupclause = lappend(new_groupclause, gc); + } + + /* Success --- install the rearranged GROUP BY list */ + Assert(list_length(parse->groupClause) == list_length(new_groupclause)); + parse->groupClause = new_groupclause; +} + /* * extract_grouping_ops - make an array of the equality operator OIDs * for the GROUP BY clause @@ -1492,12 +1580,10 @@ extract_grouping_ops(List *groupClause) foreach(glitem, groupClause) { - GroupClause *groupcl = (GroupClause *) lfirst(glitem); + SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem); - groupOperators[colno] = get_equality_op_for_ordering_op(groupcl->sortop); - if (!OidIsValid(groupOperators[colno])) /* shouldn't happen */ - elog(ERROR, "could not find equality operator for ordering operator %u", - groupcl->sortop); + groupOperators[colno] = groupcl->eqop; + Assert(OidIsValid(groupOperators[colno])); colno++; } @@ -1738,7 +1824,7 @@ make_subplanTargetList(PlannerInfo *root, foreach(gl, parse->groupClause) { - GroupClause *grpcl = (GroupClause *) lfirst(gl); + SortGroupClause *grpcl = (SortGroupClause *) lfirst(gl); Node *groupexpr = get_sortgroupclause_expr(grpcl, tlist); TargetEntry *te = NULL; ListCell *sl; @@ -1797,7 +1883,7 @@ locate_grouping_columns(PlannerInfo *root, foreach(gl, root->parse->groupClause) { - GroupClause *grpcl = (GroupClause *) lfirst(gl); + SortGroupClause *grpcl = (SortGroupClause *) lfirst(gl); Node *groupexpr = get_sortgroupclause_expr(grpcl, tlist); TargetEntry *te = NULL; ListCell *sl; diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c index b30cedee9f..31ba005edb 100644 --- a/src/backend/optimizer/prep/prepunion.c +++ b/src/backend/optimizer/prep/prepunion.c @@ -22,7 +22,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.148 2008/07/31 22:47:56 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.149 2008/08/02 21:32:00 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -98,7 +98,7 @@ static List *adjust_inherited_tlist(List *tlist, * zero means "all the tuples will be fetched". Any LIMIT present at the * top level has already been factored into tuple_fraction. * - * *sortClauses is an output argument: it is set to a list of SortClauses + * *sortClauses is an output argument: it is set to a list of SortGroupClauses * representing the result ordering of the topmost set operation. */ Plan * @@ -152,7 +152,7 @@ plan_set_operations(PlannerInfo *root, double tuple_fraction, * junkOK: if true, child resjunk columns may be left in the result * flag: if >= 0, add a resjunk output column indicating value of flag * refnames_tlist: targetlist to take column names from - * *sortClauses: receives list of SortClauses for result plan, if any + * *sortClauses: receives list of SortGroupClauses for result plan, if any * * We don't have to care about typmods here: the only allowed difference * between set-op input and output typmods is input is a specific typmod @@ -678,8 +678,11 @@ generate_append_tlist(List *colTypes, bool flag, /* * generate_setop_sortlist - * Build a SortClause list enumerating all the non-resjunk tlist entries, - * using default ordering properties. + * Build a SortGroupClause list enumerating all the non-resjunk + * tlist entries, using default ordering properties. + * + * For now, we require all the items to be sortable. Eventually we + * should implement hashing setops and allow hash-only datatypes. */ static List * generate_setop_sortlist(List *targetlist) @@ -692,11 +695,10 @@ generate_setop_sortlist(List *targetlist) TargetEntry *tle = (TargetEntry *) lfirst(l); if (!tle->resjunk) - sortlist = addTargetToSortList(NULL, tle, - sortlist, targetlist, - SORTBY_DEFAULT, - SORTBY_NULLS_DEFAULT, - NIL, false); + sortlist = addTargetToGroupList(NULL, tle, + sortlist, targetlist, + true, /* XXX fixme someday */ + false); } return sortlist; } diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index ea92422279..04ef2224c9 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.259 2008/05/15 17:37:49 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.260 2008/08/02 21:32:00 tgl Exp $ * * HISTORY * AUTHOR DATE MAJOR EVENT @@ -1334,85 +1334,6 @@ is_pseudo_constant_clause_relids(Node *clause, Relids relids) } -/***************************************************************************** - * Tests on clauses of queries - * - * Possibly this code should go someplace else, since this isn't quite the - * same meaning of "clause" as is used elsewhere in this module. But I can't - * think of a better place for it... - *****************************************************************************/ - -/* - * Test whether a query uses DISTINCT ON, ie, has a distinct-list that is - * not the same as the set of output columns. - */ -bool -has_distinct_on_clause(Query *query) -{ - ListCell *l; - - /* Is there a DISTINCT clause at all? */ - if (query->distinctClause == NIL) - return false; - - /* - * If the DISTINCT list contains all the nonjunk targetlist items, and - * nothing else (ie, no junk tlist items), then it's a simple DISTINCT, - * else it's DISTINCT ON. We do not require the lists to be in the same - * order (since the parser may have adjusted the DISTINCT clause ordering - * to agree with ORDER BY). Furthermore, a non-DISTINCT junk tlist item - * that is in the sortClause is also evidence of DISTINCT ON, since we - * don't allow ORDER BY on junk tlist items when plain DISTINCT is used. - * - * This code assumes that the DISTINCT list is valid, ie, all its entries - * match some entry of the tlist. - */ - foreach(l, query->targetList) - { - TargetEntry *tle = (TargetEntry *) lfirst(l); - - if (tle->ressortgroupref == 0) - { - if (tle->resjunk) - continue; /* we can ignore unsorted junk cols */ - return true; /* definitely not in DISTINCT list */ - } - if (targetIsInSortList(tle, InvalidOid, query->distinctClause)) - { - if (tle->resjunk) - return true; /* junk TLE in DISTINCT means DISTINCT ON */ - /* else this TLE is okay, keep looking */ - } - else - { - /* This TLE is not in DISTINCT list */ - if (!tle->resjunk) - return true; /* non-junk, non-DISTINCT, so DISTINCT ON */ - if (targetIsInSortList(tle, InvalidOid, query->sortClause)) - return true; /* sorted, non-distinct junk */ - /* unsorted junk is okay, keep looking */ - } - } - /* It's a simple DISTINCT */ - return false; -} - -/* - * Test whether a query uses simple DISTINCT, ie, has a distinct-list that - * is the same as the set of output columns. - */ -bool -has_distinct_clause(Query *query) -{ - /* Is there a DISTINCT clause at all? */ - if (query->distinctClause == NIL) - return false; - - /* It's DISTINCT if it's not DISTINCT ON */ - return !has_distinct_on_clause(query); -} - - /***************************************************************************** * * * General clause-manipulating routines * diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index c5b6c2ed61..ad3c10ac27 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.143 2008/04/21 20:54:15 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.144 2008/08/02 21:32:00 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -935,8 +935,10 @@ translate_sub_tlist(List *tlist, int relid) * corresponding upper-level equality operators listed in opids would think * the values are distinct. (Note: the opids entries could be cross-type * operators, and thus not exactly the equality operators that the subquery - * would use itself. We assume that the subquery is compatible if these - * operators appear in the same btree opfamily as the ones the subquery uses.) + * would use itself. We use equality_ops_are_compatible() to check + * compatibility. That looks at btree or hash opfamily membership, and so + * should give trustworthy answers for all operators that we might need + * to deal with here.) */ static bool query_is_distinct_for(Query *query, List *colnos, List *opids) @@ -955,13 +957,13 @@ query_is_distinct_for(Query *query, List *colnos, List *opids) { foreach(l, query->distinctClause) { - SortClause *scl = (SortClause *) lfirst(l); - TargetEntry *tle = get_sortgroupclause_tle(scl, + SortGroupClause *sgc = (SortGroupClause *) lfirst(l); + TargetEntry *tle = get_sortgroupclause_tle(sgc, query->targetList); opid = distinct_col_search(tle->resno, colnos, opids); if (!OidIsValid(opid) || - !ops_in_same_btree_opfamily(opid, scl->sortop)) + !equality_ops_are_compatible(opid, sgc->eqop)) break; /* exit early if no match */ } if (l == NULL) /* had matches for all? */ @@ -976,13 +978,13 @@ query_is_distinct_for(Query *query, List *colnos, List *opids) { foreach(l, query->groupClause) { - GroupClause *grpcl = (GroupClause *) lfirst(l); - TargetEntry *tle = get_sortgroupclause_tle(grpcl, + SortGroupClause *sgc = (SortGroupClause *) lfirst(l); + TargetEntry *tle = get_sortgroupclause_tle(sgc, query->targetList); opid = distinct_col_search(tle->resno, colnos, opids); if (!OidIsValid(opid) || - !ops_in_same_btree_opfamily(opid, grpcl->sortop)) + !equality_ops_are_compatible(opid, sgc->eqop)) break; /* exit early if no match */ } if (l == NULL) /* had matches for all? */ @@ -1002,10 +1004,11 @@ query_is_distinct_for(Query *query, List *colnos, List *opids) * UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row, * except with ALL. * - * XXX this code knows that prepunion.c will adopt the default ordering - * operator for each column datatype as the sortop. It'd probably be - * better if these operators were chosen at parse time and stored into the - * parsetree, instead of leaving bits of the planner to decide semantics. + * XXX this code knows that prepunion.c will adopt the default sort/group + * operators for each column datatype to determine uniqueness. It'd + * probably be better if these operators were chosen at parse time and + * stored into the parsetree, instead of leaving bits of the planner to + * decide semantics. */ if (query->setOperations) { @@ -1020,14 +1023,20 @@ query_is_distinct_for(Query *query, List *colnos, List *opids) foreach(l, query->targetList) { TargetEntry *tle = (TargetEntry *) lfirst(l); + Oid tle_eq_opr; if (tle->resjunk) continue; /* ignore resjunk columns */ opid = distinct_col_search(tle->resno, colnos, opids); - if (!OidIsValid(opid) || - !ops_in_same_btree_opfamily(opid, - ordering_oper_opid(exprType((Node *) tle->expr)))) + if (!OidIsValid(opid)) + break; /* exit early if no match */ + /* check for compatible semantics */ + get_sort_group_operators(exprType((Node *) tle->expr), + false, false, false, + NULL, &tle_eq_opr, NULL); + if (!OidIsValid(tle_eq_opr) || + !equality_ops_are_compatible(opid, tle_eq_opr)) break; /* exit early if no match */ } if (l == NULL) /* had matches for all? */ diff --git a/src/backend/optimizer/util/tlist.c b/src/backend/optimizer/util/tlist.c index 618d65925c..fc0880ad56 100644 --- a/src/backend/optimizer/util/tlist.c +++ b/src/backend/optimizer/util/tlist.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/util/tlist.c,v 1.78 2008/01/01 19:45:50 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/util/tlist.c,v 1.79 2008/08/02 21:32:00 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -156,49 +156,43 @@ get_sortgroupref_tle(Index sortref, List *targetList) /* * get_sortgroupclause_tle - * Find the targetlist entry matching the given SortClause - * (or GroupClause) by ressortgroupref, and return it. - * - * Because GroupClause is typedef'd as SortClause, either kind of - * node can be passed without casting. + * Find the targetlist entry matching the given SortGroupClause + * by ressortgroupref, and return it. */ TargetEntry * -get_sortgroupclause_tle(SortClause *sortClause, +get_sortgroupclause_tle(SortGroupClause *sgClause, List *targetList) { - return get_sortgroupref_tle(sortClause->tleSortGroupRef, targetList); + return get_sortgroupref_tle(sgClause->tleSortGroupRef, targetList); } /* * get_sortgroupclause_expr - * Find the targetlist entry matching the given SortClause - * (or GroupClause) by ressortgroupref, and return its expression. - * - * Because GroupClause is typedef'd as SortClause, either kind of - * node can be passed without casting. + * Find the targetlist entry matching the given SortGroupClause + * by ressortgroupref, and return its expression. */ Node * -get_sortgroupclause_expr(SortClause *sortClause, List *targetList) +get_sortgroupclause_expr(SortGroupClause *sgClause, List *targetList) { - TargetEntry *tle = get_sortgroupclause_tle(sortClause, targetList); + TargetEntry *tle = get_sortgroupclause_tle(sgClause, targetList); return (Node *) tle->expr; } /* * get_sortgrouplist_exprs - * Given a list of SortClauses (or GroupClauses), build a list + * Given a list of SortGroupClauses, build a list * of the referenced targetlist expressions. */ List * -get_sortgrouplist_exprs(List *sortClauses, List *targetList) +get_sortgrouplist_exprs(List *sgClauses, List *targetList) { List *result = NIL; ListCell *l; - foreach(l, sortClauses) + foreach(l, sgClauses) { - SortClause *sortcl = (SortClause *) lfirst(l); + SortGroupClause *sortcl = (SortGroupClause *) lfirst(l); Node *sortexpr; sortexpr = get_sortgroupclause_expr(sortcl, targetList); diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c index 329b1bc881..2cc3f28d20 100644 --- a/src/backend/parser/analyze.c +++ b/src/backend/parser/analyze.c @@ -17,7 +17,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/parser/analyze.c,v 1.374 2008/07/31 22:47:56 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/parser/analyze.c,v 1.375 2008/08/02 21:32:00 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -724,10 +724,28 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt) &qry->targetList, qry->sortClause); - qry->distinctClause = transformDistinctClause(pstate, - stmt->distinctClause, - &qry->targetList, - qry->sortClause); + if (stmt->distinctClause == NIL) + { + qry->distinctClause = NIL; + qry->hasDistinctOn = false; + } + else if (linitial(stmt->distinctClause) == NULL) + { + /* We had SELECT DISTINCT */ + qry->distinctClause = transformDistinctClause(pstate, + &qry->targetList, + qry->sortClause); + qry->hasDistinctOn = false; + } + else + { + /* We had SELECT DISTINCT ON */ + qry->distinctClause = transformDistinctOnClause(pstate, + stmt->distinctClause, + &qry->targetList, + qry->sortClause); + qry->hasDistinctOn = true; + } /* transform LIMIT */ qry->limitOffset = transformLimitClause(pstate, stmt->limitOffset, diff --git a/src/backend/parser/parse_agg.c b/src/backend/parser/parse_agg.c index 03a765e96e..b5fbd0f78d 100644 --- a/src/backend/parser/parse_agg.c +++ b/src/backend/parser/parse_agg.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/parser/parse_agg.c,v 1.79 2008/01/01 19:45:50 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/parser/parse_agg.c,v 1.80 2008/08/02 21:32:00 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -131,7 +131,7 @@ parseCheckAggregates(ParseState *pstate, Query *qry) */ foreach(l, qry->groupClause) { - GroupClause *grpcl = (GroupClause *) lfirst(l); + SortGroupClause *grpcl = (SortGroupClause *) lfirst(l); Node *expr; expr = get_sortgroupclause_expr(grpcl, qry->targetList); diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c index d82573fd03..0a3b544b10 100644 --- a/src/backend/parser/parse_clause.c +++ b/src/backend/parser/parse_clause.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.171 2008/07/31 22:47:56 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.172 2008/08/02 21:32:00 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -66,6 +66,10 @@ static Node *buildMergedJoinVar(ParseState *pstate, JoinType jointype, Var *l_colvar, Var *r_colvar); static TargetEntry *findTargetlistEntry(ParseState *pstate, Node *node, List **tlist, int clause); +static List *addTargetToSortList(ParseState *pstate, TargetEntry *tle, + List *sortlist, List *targetlist, + SortByDir sortby_dir, SortByNulls sortby_nulls, + List *sortby_opname, bool resolveUnknown); /* @@ -1289,129 +1293,75 @@ findTargetlistEntry(ParseState *pstate, Node *node, List **tlist, int clause) return target_result; } -static GroupClause * -make_group_clause(TargetEntry *tle, List *targetlist, - Oid sortop, bool nulls_first) -{ - GroupClause *result; - - result = makeNode(GroupClause); - result->tleSortGroupRef = assignSortGroupRef(tle, targetlist); - result->sortop = sortop; - result->nulls_first = nulls_first; - return result; -} - /* * transformGroupClause - * transform a GROUP BY clause * * GROUP BY items will be added to the targetlist (as resjunk columns) * if not already present, so the targetlist must be passed by reference. - * - * The order of the elements of the grouping clause does not affect - * the semantics of the query. However, the optimizer is not currently - * smart enough to reorder the grouping clause, so we try to do some - * primitive reordering here. */ List * transformGroupClause(ParseState *pstate, List *grouplist, List **targetlist, List *sortClause) { List *result = NIL; - List *tle_list = NIL; - ListCell *l; + ListCell *gl; - /* Preprocess the grouping clause, lookup TLEs */ - foreach(l, grouplist) + foreach(gl, grouplist) { + Node *gexpr = (Node *) lfirst(gl); TargetEntry *tle; - Oid restype; - - tle = findTargetlistEntry(pstate, lfirst(l), - targetlist, GROUP_CLAUSE); - - /* if tlist item is an UNKNOWN literal, change it to TEXT */ - restype = exprType((Node *) tle->expr); - - if (restype == UNKNOWNOID) - tle->expr = (Expr *) coerce_type(pstate, (Node *) tle->expr, - restype, TEXTOID, -1, - COERCION_IMPLICIT, - COERCE_IMPLICIT_CAST); - - tle_list = lappend(tle_list, tle); - } - - /* - * Now iterate through the ORDER BY clause. If we find a grouping element - * that matches the ORDER BY element, append the grouping element to the - * result set immediately. Otherwise, stop iterating. The effect of this - * is to look for a prefix of the ORDER BY list in the grouping clauses, - * and to move that prefix to the front of the GROUP BY. - */ - foreach(l, sortClause) - { - SortClause *sc = (SortClause *) lfirst(l); - ListCell *prev = NULL; - ListCell *tl; bool found = false; - foreach(tl, tle_list) - { - TargetEntry *tle = (TargetEntry *) lfirst(tl); + tle = findTargetlistEntry(pstate, gexpr, + targetlist, GROUP_CLAUSE); - if (sc->tleSortGroupRef == tle->ressortgroupref) - { - GroupClause *gc; - - tle_list = list_delete_cell(tle_list, tl, prev); - - /* Use the sort clause's sorting information */ - gc = make_group_clause(tle, *targetlist, - sc->sortop, sc->nulls_first); - result = lappend(result, gc); - found = true; - break; - } - - prev = tl; - } - - /* As soon as we've failed to match an ORDER BY element, stop */ - if (!found) - break; - } - - /* - * Now add any remaining elements of the GROUP BY list in the order we - * received them. - * - * XXX: are there any additional criteria to consider when ordering - * grouping clauses? - */ - foreach(l, tle_list) - { - TargetEntry *tle = (TargetEntry *) lfirst(l); - GroupClause *gc; - Oid sort_op; - - /* - * Avoid making duplicate grouplist entries. Note that we don't - * enforce a particular sortop here. Along with the copying of sort - * information above, this means that if you write something like - * "GROUP BY foo ORDER BY foo USING <<<", the GROUP BY operation - * silently takes on the equality semantics implied by the ORDER BY. - */ + /* Eliminate duplicates (GROUP BY x, x) */ if (targetIsInSortList(tle, InvalidOid, result)) continue; - sort_op = ordering_oper_opid(exprType((Node *) tle->expr)); - gc = make_group_clause(tle, *targetlist, sort_op, false); - result = lappend(result, gc); + /* + * If the GROUP BY tlist entry also appears in ORDER BY, copy operator + * info from the (first) matching ORDER BY item. This means that if + * you write something like "GROUP BY foo ORDER BY foo USING <<<", the + * GROUP BY operation silently takes on the equality semantics implied + * by the ORDER BY. There are two reasons to do this: it improves + * the odds that we can implement both GROUP BY and ORDER BY with a + * single sort step, and it allows the user to choose the equality + * semantics used by GROUP BY, should she be working with a datatype + * that has more than one equality operator. + */ + if (tle->ressortgroupref > 0) + { + ListCell *sl; + + foreach(sl, sortClause) + { + SortGroupClause *sc = (SortGroupClause *) lfirst(sl); + + if (sc->tleSortGroupRef == tle->ressortgroupref) + { + result = lappend(result, copyObject(sc)); + found = true; + break; + } + } + } + + /* + * If no match in ORDER BY, just add it to the result using + * default sort/group semantics. + * + * XXX for now, the planner requires groupClause to be sortable, + * so we have to insist on that here. + */ + if (!found) + result = addTargetToGroupList(pstate, tle, + result, *targetlist, + true, /* XXX for now */ + true); } - list_free(tle_list); return result; } @@ -1433,7 +1383,7 @@ transformSortClause(ParseState *pstate, foreach(olitem, orderlist) { - SortBy *sortby = lfirst(olitem); + SortBy *sortby = (SortBy *) lfirst(olitem); TargetEntry *tle; tle = findTargetlistEntry(pstate, sortby->node, @@ -1452,159 +1402,165 @@ transformSortClause(ParseState *pstate, /* * transformDistinctClause - - * transform a DISTINCT or DISTINCT ON clause + * transform a DISTINCT clause * * Since we may need to add items to the query's targetlist, that list * is passed by reference. * * As with GROUP BY, we absorb the sorting semantics of ORDER BY as much as * possible into the distinctClause. This avoids a possible need to re-sort, - * and allows the user to determine the equality semantics used by DISTINCT, - * should she be working with a datatype that has more than one btree equality + * and allows the user to choose the equality semantics used by DISTINCT, + * should she be working with a datatype that has more than one equality * operator. */ List * -transformDistinctClause(ParseState *pstate, List *distinctlist, +transformDistinctClause(ParseState *pstate, List **targetlist, List *sortClause) { List *result = NIL; ListCell *slitem; - ListCell *dlitem; ListCell *tlitem; - /* No work if there was no DISTINCT clause */ - if (distinctlist == NIL) - return NIL; - - if (linitial(distinctlist) == NULL) + /* + * The distinctClause should consist of all ORDER BY items followed + * by all other non-resjunk targetlist items. There must not be any + * resjunk ORDER BY items --- that would imply that we are sorting + * by a value that isn't necessarily unique within a DISTINCT group, + * so the results wouldn't be well-defined. This construction + * ensures we follow the rule that sortClause and distinctClause match; + * in fact the sortClause will always be a prefix of distinctClause. + * + * Note a corner case: the same TLE could be in the ORDER BY list + * multiple times with different sortops. We have to include it in + * the distinctClause the same way to preserve the prefix property. + * The net effect will be that the TLE value will be made unique + * according to both sortops. + */ + foreach(slitem, sortClause) { - /* We had SELECT DISTINCT */ + SortGroupClause *scl = (SortGroupClause *) lfirst(slitem); + TargetEntry *tle = get_sortgroupclause_tle(scl, *targetlist); - /* - * The distinctClause should consist of all ORDER BY items followed - * by all other non-resjunk targetlist items. There must not be any - * resjunk ORDER BY items --- that would imply that we are sorting - * by a value that isn't necessarily unique within a DISTINCT group, - * so the results wouldn't be well-defined. This construction - * ensures we follow the rule that sortClause and distinctClause match; - * in fact the sortClause will always be a prefix of distinctClause. - * - * Note a corner case: the same TLE could be in the ORDER BY list - * multiple times with different sortops. We have to include it in - * the distinctClause the same way to preserve the prefix property. - * The net effect will be that the TLE value will be made unique - * according to both sortops. - */ - foreach(slitem, sortClause) - { - SortClause *scl = (SortClause *) lfirst(slitem); - TargetEntry *tle = get_sortgroupclause_tle(scl, *targetlist); - - if (tle->resjunk) - ereport(ERROR, - (errcode(ERRCODE_INVALID_COLUMN_REFERENCE), - errmsg("for SELECT DISTINCT, ORDER BY expressions must appear in select list"))); - else - result = lappend(result, copyObject(scl)); - } - - /* - * Now add any remaining non-resjunk tlist items, using default - * sorting semantics for their data types. - */ - foreach(tlitem, *targetlist) - { - TargetEntry *tle = (TargetEntry *) lfirst(tlitem); - - if (tle->resjunk) - continue; /* ignore junk */ - if (targetIsInSortList(tle, InvalidOid, result)) - continue; /* already in list (with some semantics) */ - result = addTargetToSortList(pstate, tle, - result, *targetlist, - SORTBY_DEFAULT, - SORTBY_NULLS_DEFAULT, - NIL, true); - } + if (tle->resjunk) + ereport(ERROR, + (errcode(ERRCODE_INVALID_COLUMN_REFERENCE), + errmsg("for SELECT DISTINCT, ORDER BY expressions must appear in select list"))); + result = lappend(result, copyObject(scl)); } - else + + /* + * Now add any remaining non-resjunk tlist items, using default + * sort/group semantics for their data types. + * + * XXX for now, the planner requires distinctClause to be sortable, + * so we have to insist on that here. + */ + foreach(tlitem, *targetlist) { - /* We had SELECT DISTINCT ON (expr, ...) */ - Bitmapset *refnos = NULL; - int sortgroupref; - bool skipped_sortitem; + TargetEntry *tle = (TargetEntry *) lfirst(tlitem); - /* - * Add all the DISTINCT ON expressions to the tlist (if not already - * present, they are added as resjunk items). Assign sortgroupref - * numbers to them, and form a bitmapset of these numbers. (A - * bitmapset is convenient here because we don't care about order - * and we can discard duplicates.) - */ - foreach(dlitem, distinctlist) + if (tle->resjunk) + continue; /* ignore junk */ + result = addTargetToGroupList(pstate, tle, + result, *targetlist, + true, /* XXX for now */ + true); + } + + return result; +} + +/* + * transformDistinctOnClause - + * transform a DISTINCT ON clause + * + * Since we may need to add items to the query's targetlist, that list + * is passed by reference. + * + * As with GROUP BY, we absorb the sorting semantics of ORDER BY as much as + * possible into the distinctClause. This avoids a possible need to re-sort, + * and allows the user to choose the equality semantics used by DISTINCT, + * should she be working with a datatype that has more than one equality + * operator. + */ +List * +transformDistinctOnClause(ParseState *pstate, List *distinctlist, + List **targetlist, List *sortClause) +{ + List *result = NIL; + ListCell *slitem; + ListCell *dlitem; + Bitmapset *refnos = NULL; + int sortgroupref; + bool skipped_sortitem; + + /* + * Add all the DISTINCT ON expressions to the tlist (if not already + * present, they are added as resjunk items). Assign sortgroupref + * numbers to them, and form a bitmapset of these numbers. (A + * bitmapset is convenient here because we don't care about order + * and we can discard duplicates.) + */ + foreach(dlitem, distinctlist) + { + Node *dexpr = (Node *) lfirst(dlitem); + TargetEntry *tle; + + tle = findTargetlistEntry(pstate, dexpr, + targetlist, DISTINCT_ON_CLAUSE); + sortgroupref = assignSortGroupRef(tle, *targetlist); + refnos = bms_add_member(refnos, sortgroupref); + } + + /* + * If the user writes both DISTINCT ON and ORDER BY, adopt the + * sorting semantics from ORDER BY items that match DISTINCT ON + * items, and also adopt their column sort order. We insist that + * the distinctClause and sortClause match, so throw error if we + * find the need to add any more distinctClause items after we've + * skipped an ORDER BY item that wasn't in DISTINCT ON. + */ + skipped_sortitem = false; + foreach(slitem, sortClause) + { + SortGroupClause *scl = (SortGroupClause *) lfirst(slitem); + + if (bms_is_member(scl->tleSortGroupRef, refnos)) { - Node *dexpr = (Node *) lfirst(dlitem); - TargetEntry *tle; - - tle = findTargetlistEntry(pstate, dexpr, - targetlist, DISTINCT_ON_CLAUSE); - sortgroupref = assignSortGroupRef(tle, *targetlist); - refnos = bms_add_member(refnos, sortgroupref); - } - - /* - * If the user writes both DISTINCT ON and ORDER BY, adopt the - * sorting semantics from ORDER BY items that match DISTINCT ON - * items, and also adopt their column sort order. We insist that - * the distinctClause and sortClause match, so throw error if we - * find the need to add any more distinctClause items after we've - * skipped an ORDER BY item that wasn't in DISTINCT ON. - */ - skipped_sortitem = false; - foreach(slitem, sortClause) - { - SortClause *scl = (SortClause *) lfirst(slitem); - - if (bms_is_member(scl->tleSortGroupRef, refnos)) - { - if (skipped_sortitem) - ereport(ERROR, - (errcode(ERRCODE_INVALID_COLUMN_REFERENCE), - errmsg("SELECT DISTINCT ON expressions must match initial ORDER BY expressions"))); - else - result = lappend(result, copyObject(scl)); - } - else - { - skipped_sortitem = true; - } - } - - /* - * Now add any remaining DISTINCT ON items, using default sorting - * semantics for their data types. (Note: this is pretty - * questionable; if the ORDER BY list doesn't include all the DISTINCT - * ON items and more besides, you certainly aren't using DISTINCT ON - * in the intended way, and you probably aren't going to get - * consistent results. It might be better to throw an error or warning - * here. But historically we've allowed it, so keep doing so.) - */ - while ((sortgroupref = bms_first_member(refnos)) >= 0) - { - TargetEntry *tle = get_sortgroupref_tle(sortgroupref, *targetlist); - - if (targetIsInSortList(tle, InvalidOid, result)) - continue; /* already in list (with some semantics) */ if (skipped_sortitem) ereport(ERROR, (errcode(ERRCODE_INVALID_COLUMN_REFERENCE), errmsg("SELECT DISTINCT ON expressions must match initial ORDER BY expressions"))); - result = addTargetToSortList(pstate, tle, - result, *targetlist, - SORTBY_DEFAULT, - SORTBY_NULLS_DEFAULT, - NIL, true); + else + result = lappend(result, copyObject(scl)); } + else + skipped_sortitem = true; + } + + /* + * Now add any remaining DISTINCT ON items, using default sort/group + * semantics for their data types. (Note: this is pretty questionable; + * if the ORDER BY list doesn't include all the DISTINCT ON items and more + * besides, you certainly aren't using DISTINCT ON in the intended way, + * and you probably aren't going to get consistent results. It might be + * better to throw an error or warning here. But historically we've + * allowed it, so keep doing so.) + */ + while ((sortgroupref = bms_first_member(refnos)) >= 0) + { + TargetEntry *tle = get_sortgroupref_tle(sortgroupref, *targetlist); + + if (targetIsInSortList(tle, InvalidOid, result)) + continue; /* already in list (with some semantics) */ + if (skipped_sortitem) + ereport(ERROR, + (errcode(ERRCODE_INVALID_COLUMN_REFERENCE), + errmsg("SELECT DISTINCT ON expressions must match initial ORDER BY expressions"))); + result = addTargetToGroupList(pstate, tle, + result, *targetlist, + true, /* someday allow hash-only? */ + true); } return result; @@ -1612,17 +1568,18 @@ transformDistinctClause(ParseState *pstate, List *distinctlist, /* * addTargetToSortList - * If the given targetlist entry isn't already in the SortClause list, - * add it to the end of the list, using the given sort ordering info. + * If the given targetlist entry isn't already in the SortGroupClause + * list, add it to the end of the list, using the given sort ordering + * info. * * If resolveUnknown is TRUE, convert TLEs of type UNKNOWN to TEXT. If not, * do nothing (which implies the search for a sort operator will fail). * pstate should be provided if resolveUnknown is TRUE, but can be NULL * otherwise. * - * Returns the updated SortClause list. + * Returns the updated SortGroupClause list. */ -List * +static List * addTargetToSortList(ParseState *pstate, TargetEntry *tle, List *sortlist, List *targetlist, SortByDir sortby_dir, SortByNulls sortby_nulls, @@ -1630,7 +1587,7 @@ addTargetToSortList(ParseState *pstate, TargetEntry *tle, { Oid restype = exprType((Node *) tle->expr); Oid sortop; - Oid cmpfunc; + Oid eqop; bool reverse; /* if tlist item is an UNKNOWN literal, change it to TEXT */ @@ -1643,16 +1600,20 @@ addTargetToSortList(ParseState *pstate, TargetEntry *tle, restype = TEXTOID; } - /* determine the sortop */ + /* determine the sortop, eqop, and directionality */ switch (sortby_dir) { case SORTBY_DEFAULT: case SORTBY_ASC: - sortop = ordering_oper_opid(restype); + get_sort_group_operators(restype, + true, true, false, + &sortop, &eqop, NULL); reverse = false; break; case SORTBY_DESC: - sortop = reverse_ordering_oper_opid(restype); + get_sort_group_operators(restype, + false, true, true, + NULL, &eqop, &sortop); reverse = true; break; case SORTBY_USING: @@ -1663,11 +1624,12 @@ addTargetToSortList(ParseState *pstate, TargetEntry *tle, false); /* - * Verify it's a valid ordering operator, and determine whether to - * consider it like ASC or DESC. + * Verify it's a valid ordering operator, fetch the corresponding + * equality operator, and determine whether to consider it like + * ASC or DESC. */ - if (!get_compare_function_for_ordering_op(sortop, - &cmpfunc, &reverse)) + eqop = get_equality_op_for_ordering_op(sortop, &reverse); + if (!OidIsValid(eqop)) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("operator %s is not a valid ordering operator", @@ -1677,6 +1639,7 @@ addTargetToSortList(ParseState *pstate, TargetEntry *tle, default: elog(ERROR, "unrecognized sortby_dir: %d", sortby_dir); sortop = InvalidOid; /* keep compiler quiet */ + eqop = InvalidOid; reverse = false; break; } @@ -1684,10 +1647,11 @@ addTargetToSortList(ParseState *pstate, TargetEntry *tle, /* avoid making duplicate sortlist entries */ if (!targetIsInSortList(tle, sortop, sortlist)) { - SortClause *sortcl = makeNode(SortClause); + SortGroupClause *sortcl = makeNode(SortGroupClause); sortcl->tleSortGroupRef = assignSortGroupRef(tle, targetlist); + sortcl->eqop = eqop; sortcl->sortop = sortop; switch (sortby_nulls) @@ -1713,6 +1677,68 @@ addTargetToSortList(ParseState *pstate, TargetEntry *tle, return sortlist; } +/* + * addTargetToGroupList + * If the given targetlist entry isn't already in the SortGroupClause + * list, add it to the end of the list, using default sort/group + * semantics. + * + * This is very similar to addTargetToSortList, except that we allow the + * case where only a grouping (equality) operator can be found, and that + * the TLE is considered "already in the list" if it appears there with any + * sorting semantics. + * + * If requireSortOp is TRUE, we require a sorting operator to be found too. + * XXX this argument should eventually be obsolete, but for now there are + * parts of the system that can't support non-sortable grouping lists. + * + * If resolveUnknown is TRUE, convert TLEs of type UNKNOWN to TEXT. If not, + * do nothing (which implies the search for an equality operator will fail). + * pstate should be provided if resolveUnknown is TRUE, but can be NULL + * otherwise. + * + * Returns the updated SortGroupClause list. + */ +List * +addTargetToGroupList(ParseState *pstate, TargetEntry *tle, + List *grouplist, List *targetlist, + bool requireSortOp, bool resolveUnknown) +{ + Oid restype = exprType((Node *) tle->expr); + Oid sortop; + Oid eqop; + + /* if tlist item is an UNKNOWN literal, change it to TEXT */ + if (restype == UNKNOWNOID && resolveUnknown) + { + tle->expr = (Expr *) coerce_type(pstate, (Node *) tle->expr, + restype, TEXTOID, -1, + COERCION_IMPLICIT, + COERCE_IMPLICIT_CAST); + restype = TEXTOID; + } + + /* avoid making duplicate grouplist entries */ + if (!targetIsInSortList(tle, InvalidOid, grouplist)) + { + SortGroupClause *grpcl = makeNode(SortGroupClause); + + /* determine the eqop and optional sortop */ + get_sort_group_operators(restype, + requireSortOp, true, false, + &sortop, &eqop, NULL); + + grpcl->tleSortGroupRef = assignSortGroupRef(tle, targetlist); + grpcl->eqop = eqop; + grpcl->sortop = sortop; + grpcl->nulls_first = false; /* OK with or without sortop */ + + grouplist = lappend(grouplist, grpcl); + } + + return grouplist; +} + /* * assignSortGroupRef * Assign the targetentry an unused ressortgroupref, if it doesn't @@ -1756,9 +1782,10 @@ assignSortGroupRef(TargetEntry *tle, List *tlist) * opposite nulls direction is redundant. Also, we can consider * ORDER BY foo ASC, foo DESC redundant, so check for a commutator match. * - * Works for both SortClause and GroupClause lists. Note that the main - * reason we need this routine (and not just a quick test for nonzeroness - * of ressortgroupref) is that a TLE might be in only one of the lists. + * Works for both ordering and grouping lists (sortop would normally be + * InvalidOid when considering grouping). Note that the main reason we need + * this routine (and not just a quick test for nonzeroness of ressortgroupref) + * is that a TLE might be in only one of the lists. */ bool targetIsInSortList(TargetEntry *tle, Oid sortop, List *sortList) @@ -1772,7 +1799,7 @@ targetIsInSortList(TargetEntry *tle, Oid sortop, List *sortList) foreach(l, sortList) { - SortClause *scl = (SortClause *) lfirst(l); + SortGroupClause *scl = (SortGroupClause *) lfirst(l); if (scl->tleSortGroupRef == ref && (sortop == InvalidOid || diff --git a/src/backend/parser/parse_oper.c b/src/backend/parser/parse_oper.c index 54fb63f959..055574ff98 100644 --- a/src/backend/parser/parse_oper.c +++ b/src/backend/parser/parse_oper.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/parser/parse_oper.c,v 1.102 2008/04/22 01:34:34 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/parser/parse_oper.c,v 1.103 2008/08/02 21:32:00 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -160,239 +160,93 @@ LookupOperNameTypeNames(ParseState *pstate, List *opername, } /* - * equality_oper - identify a suitable equality operator for a datatype + * get_sort_group_operators - get default sorting/grouping operators for type * - * On failure, return NULL if noError, else report a standard error + * We fetch the "<", "=", and ">" operators all at once to reduce lookup + * overhead (knowing that most callers will be interested in at least two). + * However, a given datatype might have only an "=" operator, if it is + * hashable but not sortable. (Other combinations of present and missing + * operators shouldn't happen, unless the system catalogs are messed up.) + * + * If an operator is missing and the corresponding needXX flag is true, + * throw a standard error message, else return InvalidOid. + * + * Callers can pass NULL pointers for any results they don't care to get. + * + * Note: the results are guaranteed to be exact or binary-compatible matches, + * since most callers are not prepared to cope with adding any run-time type + * coercion steps. */ -Operator -equality_oper(Oid argtype, bool noError) +void +get_sort_group_operators(Oid argtype, + bool needLT, bool needEQ, bool needGT, + Oid *ltOpr, Oid *eqOpr, Oid *gtOpr) { TypeCacheEntry *typentry; - Oid oproid; - Operator optup; + Oid lt_opr; + Oid eq_opr; + Oid gt_opr; /* - * Look for an "=" operator for the datatype. We require it to be an - * exact or binary-compatible match, since most callers are not prepared - * to cope with adding any run-time type coercion steps. + * Look up the operators using the type cache. + * + * Note: the search algorithm used by typcache.c ensures that the results + * are consistent, ie all from the same opclass. */ - typentry = lookup_type_cache(argtype, TYPECACHE_EQ_OPR); - oproid = typentry->eq_opr; + typentry = lookup_type_cache(argtype, + TYPECACHE_LT_OPR | TYPECACHE_EQ_OPR | TYPECACHE_GT_OPR); + lt_opr = typentry->lt_opr; + eq_opr = typentry->eq_opr; + gt_opr = typentry->gt_opr; /* - * If the datatype is an array, then we can use array_eq ... but only if - * there is a suitable equality operator for the element type. (This check - * is not in the raw typcache.c code ... should it be?) + * If the datatype is an array, then we can use array_lt and friends ... + * but only if there are suitable operators for the element type. (This + * check is not in the raw typcache.c code ... should it be?) Testing + * all three operator IDs here should be redundant. */ - if (oproid == ARRAY_EQ_OP) + if (lt_opr == ARRAY_LT_OP || + eq_opr == ARRAY_EQ_OP || + gt_opr == ARRAY_GT_OP) { Oid elem_type = get_element_type(argtype); if (OidIsValid(elem_type)) { - optup = equality_oper(elem_type, true); - if (optup != NULL) - ReleaseSysCache(optup); - else - oproid = InvalidOid; /* element type has no "=" */ + typentry = lookup_type_cache(elem_type, + TYPECACHE_LT_OPR | TYPECACHE_EQ_OPR | TYPECACHE_GT_OPR); + if (!OidIsValid(typentry->lt_opr)) + lt_opr = InvalidOid; /* element type has no "<" */ + if (!OidIsValid(typentry->eq_opr)) + eq_opr = InvalidOid; /* element type has no "=" */ + if (!OidIsValid(typentry->gt_opr)) + gt_opr = InvalidOid; /* element type has no ">" */ } else - oproid = InvalidOid; /* bogus array type? */ + lt_opr = eq_opr = gt_opr = InvalidOid; /* bogus array type? */ } - if (OidIsValid(oproid)) - { - optup = SearchSysCache(OPEROID, - ObjectIdGetDatum(oproid), - 0, 0, 0); - if (optup == NULL) /* should not fail */ - elog(ERROR, "cache lookup failed for operator %u", oproid); - return optup; - } - - if (!noError) + /* Report errors if needed */ + if ((needLT && !OidIsValid(lt_opr)) || + (needGT && !OidIsValid(gt_opr))) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("could not identify an ordering operator for type %s", + format_type_be(argtype)), + errhint("Use an explicit ordering operator or modify the query."))); + if (needEQ && !OidIsValid(eq_opr)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_FUNCTION), errmsg("could not identify an equality operator for type %s", format_type_be(argtype)))); - return NULL; -} -/* - * ordering_oper - identify a suitable sorting operator ("<") for a datatype - * - * On failure, return NULL if noError, else report a standard error - */ -Operator -ordering_oper(Oid argtype, bool noError) -{ - TypeCacheEntry *typentry; - Oid oproid; - Operator optup; - - /* - * Look for a "<" operator for the datatype. We require it to be an exact - * or binary-compatible match, since most callers are not prepared to cope - * with adding any run-time type coercion steps. - * - * Note: the search algorithm used by typcache.c ensures that if a "<" - * operator is returned, it will be consistent with the "=" operator - * returned by equality_oper. This is critical for sorting and grouping - * purposes. - */ - typentry = lookup_type_cache(argtype, TYPECACHE_LT_OPR); - oproid = typentry->lt_opr; - - /* - * If the datatype is an array, then we can use array_lt ... but only if - * there is a suitable less-than operator for the element type. (This - * check is not in the raw typcache.c code ... should it be?) - */ - if (oproid == ARRAY_LT_OP) - { - Oid elem_type = get_element_type(argtype); - - if (OidIsValid(elem_type)) - { - optup = ordering_oper(elem_type, true); - if (optup != NULL) - ReleaseSysCache(optup); - else - oproid = InvalidOid; /* element type has no "<" */ - } - else - oproid = InvalidOid; /* bogus array type? */ - } - - if (OidIsValid(oproid)) - { - optup = SearchSysCache(OPEROID, - ObjectIdGetDatum(oproid), - 0, 0, 0); - if (optup == NULL) /* should not fail */ - elog(ERROR, "cache lookup failed for operator %u", oproid); - return optup; - } - - if (!noError) - ereport(ERROR, - (errcode(ERRCODE_UNDEFINED_FUNCTION), - errmsg("could not identify an ordering operator for type %s", - format_type_be(argtype)), - errhint("Use an explicit ordering operator or modify the query."))); - return NULL; -} - -/* - * reverse_ordering_oper - identify DESC sort operator (">") for a datatype - * - * On failure, return NULL if noError, else report a standard error - */ -Operator -reverse_ordering_oper(Oid argtype, bool noError) -{ - TypeCacheEntry *typentry; - Oid oproid; - Operator optup; - - /* - * Look for a ">" operator for the datatype. We require it to be an exact - * or binary-compatible match, since most callers are not prepared to cope - * with adding any run-time type coercion steps. - * - * Note: the search algorithm used by typcache.c ensures that if a ">" - * operator is returned, it will be consistent with the "=" operator - * returned by equality_oper. This is critical for sorting and grouping - * purposes. - */ - typentry = lookup_type_cache(argtype, TYPECACHE_GT_OPR); - oproid = typentry->gt_opr; - - /* - * If the datatype is an array, then we can use array_gt ... but only if - * there is a suitable greater-than operator for the element type. (This - * check is not in the raw typcache.c code ... should it be?) - */ - if (oproid == ARRAY_GT_OP) - { - Oid elem_type = get_element_type(argtype); - - if (OidIsValid(elem_type)) - { - optup = reverse_ordering_oper(elem_type, true); - if (optup != NULL) - ReleaseSysCache(optup); - else - oproid = InvalidOid; /* element type has no ">" */ - } - else - oproid = InvalidOid; /* bogus array type? */ - } - - if (OidIsValid(oproid)) - { - optup = SearchSysCache(OPEROID, - ObjectIdGetDatum(oproid), - 0, 0, 0); - if (optup == NULL) /* should not fail */ - elog(ERROR, "cache lookup failed for operator %u", oproid); - return optup; - } - - if (!noError) - ereport(ERROR, - (errcode(ERRCODE_UNDEFINED_FUNCTION), - errmsg("could not identify an ordering operator for type %s", - format_type_be(argtype)), - errhint("Use an explicit ordering operator or modify the query."))); - return NULL; -} - -/* - * equality_oper_funcid - convenience routine for oprfuncid(equality_oper()) - */ -Oid -equality_oper_funcid(Oid argtype) -{ - Operator optup; - Oid result; - - optup = equality_oper(argtype, false); - result = oprfuncid(optup); - ReleaseSysCache(optup); - return result; -} - -/* - * ordering_oper_opid - convenience routine for oprid(ordering_oper()) - * - * This was formerly called any_ordering_op() - */ -Oid -ordering_oper_opid(Oid argtype) -{ - Operator optup; - Oid result; - - optup = ordering_oper(argtype, false); - result = oprid(optup); - ReleaseSysCache(optup); - return result; -} - -/* - * reverse_ordering_oper_opid - convenience routine for oprid(reverse_ordering_oper()) - */ -Oid -reverse_ordering_oper_opid(Oid argtype) -{ - Operator optup; - Oid result; - - optup = reverse_ordering_oper(argtype, false); - result = oprid(optup); - ReleaseSysCache(optup); - return result; + /* Return results as needed */ + if (ltOpr) + *ltOpr = lt_opr; + if (eqOpr) + *eqOpr = eq_opr; + if (gtOpr) + *gtOpr = gt_opr; } diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index b3603c53c1..0120353344 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/ruleutils.c,v 1.278 2008/07/18 03:32:52 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/ruleutils.c,v 1.279 2008/08/02 21:32:00 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -157,7 +157,7 @@ static void get_target_list(List *targetList, deparse_context *context, static void get_setop_query(Node *setOp, Query *query, deparse_context *context, TupleDesc resultDesc); -static Node *get_rule_sortgroupclause(SortClause *srt, List *tlist, +static Node *get_rule_sortgroupclause(SortGroupClause *srt, List *tlist, bool force_colno, deparse_context *context); static char *get_variable(Var *var, int levelsup, bool showstar, @@ -2056,7 +2056,7 @@ get_select_query_def(Query *query, deparse_context *context, sep = ""; foreach(l, query->sortClause) { - SortClause *srt = (SortClause *) lfirst(l); + SortGroupClause *srt = (SortGroupClause *) lfirst(l); Node *sortexpr; Oid sortcoltype; TypeCacheEntry *typentry; @@ -2178,13 +2178,13 @@ get_basic_select_query(Query *query, deparse_context *context, /* Add the DISTINCT clause if given */ if (query->distinctClause != NIL) { - if (has_distinct_on_clause(query)) + if (query->hasDistinctOn) { appendStringInfo(buf, " DISTINCT ON ("); sep = ""; foreach(l, query->distinctClause) { - SortClause *srt = (SortClause *) lfirst(l); + SortGroupClause *srt = (SortGroupClause *) lfirst(l); appendStringInfoString(buf, sep); get_rule_sortgroupclause(srt, query->targetList, @@ -2219,7 +2219,7 @@ get_basic_select_query(Query *query, deparse_context *context, sep = ""; foreach(l, query->groupClause) { - GroupClause *grp = (GroupClause *) lfirst(l); + SortGroupClause *grp = (SortGroupClause *) lfirst(l); appendStringInfoString(buf, sep); get_rule_sortgroupclause(grp, query->targetList, @@ -2398,7 +2398,7 @@ get_setop_query(Node *setOp, Query *query, deparse_context *context, * Also returns the expression tree, so caller need not find it again. */ static Node * -get_rule_sortgroupclause(SortClause *srt, List *tlist, bool force_colno, +get_rule_sortgroupclause(SortGroupClause *srt, List *tlist, bool force_colno, deparse_context *context) { StringInfo buf = context->buf; diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c index 79362441f8..ed94af6d49 100644 --- a/src/backend/utils/cache/lsyscache.c +++ b/src/backend/utils/cache/lsyscache.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/cache/lsyscache.c,v 1.158 2008/07/30 17:05:04 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/cache/lsyscache.c,v 1.159 2008/08/02 21:32:00 tgl Exp $ * * NOTES * Eventually, the index information should go through here, too. @@ -254,11 +254,14 @@ get_compare_function_for_ordering_op(Oid opno, Oid *cmpfunc, bool *reverse) * Get the OID of the datatype-specific btree equality operator * associated with an ordering operator (a "<" or ">" operator). * + * If "reverse" isn't NULL, also set *reverse to FALSE if the operator is "<", + * TRUE if it's ">" + * * Returns InvalidOid if no matching equality operator can be found. * (This indicates that the operator is not a valid ordering operator.) */ Oid -get_equality_op_for_ordering_op(Oid opno) +get_equality_op_for_ordering_op(Oid opno, bool *reverse) { Oid result = InvalidOid; Oid opfamily; @@ -274,6 +277,8 @@ get_equality_op_for_ordering_op(Oid opno) opcintype, opcintype, BTEqualStrategyNumber); + if (reverse) + *reverse = (strategy == BTGreaterStrategyNumber); } return result; @@ -666,36 +671,49 @@ get_op_btree_interpretation(Oid opno, List **opfamilies, List **opstrats) } /* - * ops_in_same_btree_opfamily - * Return TRUE if there exists a btree opfamily containing both operators. - * (This implies that they have compatible notions of equality.) + * equality_ops_are_compatible + * Return TRUE if the two given equality operators have compatible + * semantics. + * + * This is trivially true if they are the same operator. Otherwise, + * we look to see if they can be found in the same btree or hash opfamily. + * Either finding allows us to assume that they have compatible notions + * of equality. (The reason we need to do these pushups is that one might + * be a cross-type operator; for instance int24eq vs int4eq.) */ bool -ops_in_same_btree_opfamily(Oid opno1, Oid opno2) +equality_ops_are_compatible(Oid opno1, Oid opno2) { - bool result = false; + bool result; CatCList *catlist; int i; + /* Easy if they're the same operator */ + if (opno1 == opno2) + return true; + /* * We search through all the pg_amop entries for opno1. */ catlist = SearchSysCacheList(AMOPOPID, 1, ObjectIdGetDatum(opno1), 0, 0, 0); + + result = false; for (i = 0; i < catlist->n_members; i++) { HeapTuple op_tuple = &catlist->members[i]->tuple; Form_pg_amop op_form = (Form_pg_amop) GETSTRUCT(op_tuple); - /* must be btree */ - if (op_form->amopmethod != BTREE_AM_OID) - continue; - - if (op_in_opfamily(opno2, op_form->amopfamily)) + /* must be btree or hash */ + if (op_form->amopmethod == BTREE_AM_OID || + op_form->amopmethod == HASH_AM_OID) { - result = true; - break; + if (op_in_opfamily(opno2, op_form->amopfamily)) + { + result = true; + break; + } } } diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 264183af56..49d2a32431 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -37,7 +37,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.473 2008/07/30 19:35:13 tgl Exp $ + * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.474 2008/08/02 21:32:00 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 200807301 +#define CATALOG_VERSION_NO 200808011 #endif diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index 79d679b5be..e35356ab55 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/nodes/nodes.h,v 1.206 2008/03/20 21:42:48 tgl Exp $ + * $PostgreSQL: pgsql/src/include/nodes/nodes.h,v 1.207 2008/08/02 21:32:00 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -336,8 +336,7 @@ typedef enum NodeTag T_Constraint, T_DefElem, T_RangeTblEntry, - T_SortClause, - T_GroupClause, + T_SortGroupClause, T_FkConstraint, T_PrivGrantee, T_FuncWithArgs, diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index fa78b4b188..f1a1e828fc 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/nodes/parsenodes.h,v 1.369 2008/07/31 22:47:56 tgl Exp $ + * $PostgreSQL: pgsql/src/include/nodes/parsenodes.h,v 1.370 2008/08/02 21:32:00 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -108,6 +108,7 @@ typedef struct Query bool hasAggs; /* has aggregates in tlist or havingQual */ bool hasSubLinks; /* has subquery SubLink */ + bool hasDistinctOn; /* distinctClause is from DISTINCT ON */ List *rtable; /* list of range table entries */ FromExpr *jointree; /* table join tree (FROM and WHERE clauses) */ @@ -116,13 +117,13 @@ typedef struct Query List *returningList; /* return-values list (of TargetEntry) */ - List *groupClause; /* a list of GroupClause's */ + List *groupClause; /* a list of SortGroupClause's */ Node *havingQual; /* qualifications applied to groups */ - List *distinctClause; /* a list of SortClause's */ + List *distinctClause; /* a list of SortGroupClause's */ - List *sortClause; /* a list of SortClause's */ + List *sortClause; /* a list of SortGroupClause's */ Node *limitOffset; /* # of result tuples to skip (int8 expr) */ Node *limitCount; /* # of result tuples to return (int8 expr) */ @@ -607,47 +608,58 @@ typedef struct RangeTblEntry } RangeTblEntry; /* - * SortClause - - * representation of ORDER BY clauses + * SortGroupClause - + * representation of ORDER BY, GROUP BY, DISTINCT, DISTINCT ON items + * + * You might think that ORDER BY is only interested in defining ordering, + * and GROUP/DISTINCT are only interested in defining equality. However, + * one way to implement grouping is to sort and then apply a "uniq"-like + * filter. So it's also interesting to keep track of possible sort operators + * for GROUP/DISTINCT, and in particular to try to sort for the grouping + * in a way that will also yield a requested ORDER BY ordering. So we need + * to be able to compare ORDER BY and GROUP/DISTINCT lists, which motivates + * the decision to give them the same representation. * * tleSortGroupRef must match ressortgroupref of exactly one entry of the - * associated targetlist; that is the expression to be sorted (or grouped) by. - * sortop is the OID of the ordering operator (a "<" or ">" operator). - * nulls_first means about what you'd expect. + * query's targetlist; that is the expression to be sorted or grouped by. + * eqop is the OID of the equality operator. + * sortop is the OID of the ordering operator (a "<" or ">" operator), + * or InvalidOid if not available. + * nulls_first means about what you'd expect. If sortop is InvalidOid + * then nulls_first is meaningless and should be set to false. * - * SortClauses are also used to identify targets that we will do a "Unique" - * filter step on (for SELECT DISTINCT and SELECT DISTINCT ON). The - * distinctClause list is a list of SortClauses for the expressions to be - * unique-ified. (As per comment for GroupClause, this overspecifies the - * semantics.) In SELECT DISTINCT, the distinctClause list is typically - * longer than the ORDER BY list, while in SELECT DISTINCT ON it's typically - * shorter. The two lists must match up to the end of the shorter one --- - * the parser rearranges the distinctClause if necessary to make this true. - * (This restriction ensures that only one sort step is needed to both - * satisfy the ORDER BY and set up for the Unique step. This is semantically - * necessary for DISTINCT ON, and offers no real drawback for DISTINCT.) + * In an ORDER BY item, all fields must be valid. (The eqop isn't essential + * here, but it's cheap to get it along with the sortop, and requiring it + * to be valid eases comparisons to grouping items.) + * + * In a grouping item, eqop must be valid. If the eqop is a btree equality + * operator, then sortop should be set to a compatible ordering operator. + * We prefer to set eqop/sortop/nulls_first to match any ORDER BY item that + * the query presents for the same tlist item. If there is none, we just + * use the default ordering op for the datatype. + * + * If the tlist item's type has a hash opclass but no btree opclass, then + * we will set eqop to the hash equality operator, sortop to InvalidOid, + * and nulls_first to false. A grouping item of this kind can only be + * implemented by hashing, and of course it'll never match an ORDER BY item. + * + * A query might have both ORDER BY and DISTINCT (or DISTINCT ON) clauses. + * In SELECT DISTINCT, the distinctClause list is as long or longer than the + * sortClause list, while in SELECT DISTINCT ON it's typically shorter. + * The two lists must match up to the end of the shorter one --- the parser + * rearranges the distinctClause if necessary to make this true. (This + * restriction ensures that only one sort step is needed to both satisfy the + * ORDER BY and set up for the Unique step. This is semantically necessary + * for DISTINCT ON, and presents no real drawback for DISTINCT.) */ -typedef struct SortClause +typedef struct SortGroupClause { NodeTag type; Index tleSortGroupRef; /* reference into targetlist */ - Oid sortop; /* the ordering operator ('<' op) */ - bool nulls_first; /* do NULLs come before normal values? */ -} SortClause; - -/* - * GroupClause - - * representation of GROUP BY clauses - * - * GroupClause is exactly like SortClause except for the nodetag value. - * We have routines that operate interchangeably on both. - * - * XXX SortClause overspecifies the semantics so far as GROUP BY is concerned - * (ditto for DISTINCT). It'd be better to specify an equality operator not - * an ordering operator. However, the two implementations are tightly entwined - * at the moment ... breaking them apart is work for another day. - */ -typedef SortClause GroupClause; + Oid eqop; /* the equality operator ('=' op) */ + Oid sortop; /* the ordering operator ('<' op), or 0 */ + bool nulls_first; /* do NULLs come before normal values? */ +} SortGroupClause; /* * RowMarkClause - diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h index 59d7af5044..f0c55112cb 100644 --- a/src/include/nodes/primnodes.h +++ b/src/include/nodes/primnodes.h @@ -10,7 +10,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/nodes/primnodes.h,v 1.137 2008/01/01 19:45:58 momjian Exp $ + * $PostgreSQL: pgsql/src/include/nodes/primnodes.h,v 1.138 2008/08/02 21:32:00 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -969,14 +969,14 @@ typedef struct CurrentOfExpr * a specific system-generated name (such as "ctid") or NULL; anything else * risks confusing ExecGetJunkAttribute! * - * ressortgroupref is used in the representation of ORDER BY and - * GROUP BY items. Targetlist entries with ressortgroupref=0 are not - * sort/group items. If ressortgroupref>0, then this item is an ORDER BY or - * GROUP BY value. No two entries in a targetlist may have the same nonzero - * ressortgroupref --- but there is no particular meaning to the nonzero - * values, except as tags. (For example, one must not assume that lower - * ressortgroupref means a more significant sort key.) The order of the - * associated SortClause or GroupClause lists determine the semantics. + * ressortgroupref is used in the representation of ORDER BY, GROUP BY, and + * DISTINCT items. Targetlist entries with ressortgroupref=0 are not + * sort/group items. If ressortgroupref>0, then this item is an ORDER BY, + * GROUP BY, and/or DISTINCT target value. No two entries in a targetlist + * may have the same nonzero ressortgroupref --- but there is no particular + * meaning to the nonzero values, except as tags. (For example, one must + * not assume that lower ressortgroupref means a more significant sort key.) + * The order of the associated SortGroupClause lists determine the semantics. * * resorigtbl/resorigcol identify the source of the column, if it is a * simple reference to a column of a base table (or view). If it is not diff --git a/src/include/optimizer/clauses.h b/src/include/optimizer/clauses.h index dce10d6506..69a4f4c774 100644 --- a/src/include/optimizer/clauses.h +++ b/src/include/optimizer/clauses.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/optimizer/clauses.h,v 1.90 2008/04/01 00:48:33 tgl Exp $ + * $PostgreSQL: pgsql/src/include/optimizer/clauses.h,v 1.91 2008/08/02 21:32:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -63,9 +63,6 @@ extern Relids find_nonnullable_rels(Node *clause); extern bool is_pseudo_constant_clause(Node *clause); extern bool is_pseudo_constant_clause_relids(Node *clause, Relids relids); -extern bool has_distinct_clause(Query *query); -extern bool has_distinct_on_clause(Query *query); - extern int NumRelids(Node *clause); extern void CommuteOpExpr(OpExpr *clause); diff --git a/src/include/optimizer/tlist.h b/src/include/optimizer/tlist.h index e953d66520..8a8966213b 100644 --- a/src/include/optimizer/tlist.h +++ b/src/include/optimizer/tlist.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/optimizer/tlist.h,v 1.49 2008/01/01 19:45:58 momjian Exp $ + * $PostgreSQL: pgsql/src/include/optimizer/tlist.h,v 1.50 2008/08/02 21:32:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -25,11 +25,11 @@ extern List *add_to_flat_tlist(List *tlist, List *vars); extern TargetEntry *get_sortgroupref_tle(Index sortref, List *targetList); -extern TargetEntry *get_sortgroupclause_tle(SortClause *sortClause, +extern TargetEntry *get_sortgroupclause_tle(SortGroupClause *sgClause, List *targetList); -extern Node *get_sortgroupclause_expr(SortClause *sortClause, +extern Node *get_sortgroupclause_expr(SortGroupClause *sgClause, List *targetList); -extern List *get_sortgrouplist_exprs(List *sortClauses, +extern List *get_sortgrouplist_exprs(List *sgClauses, List *targetList); extern bool tlist_same_datatypes(List *tlist, List *colTypes, bool junkOK); diff --git a/src/include/parser/parse_clause.h b/src/include/parser/parse_clause.h index 6fc357522e..357a2947cb 100644 --- a/src/include/parser/parse_clause.h +++ b/src/include/parser/parse_clause.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/parser/parse_clause.h,v 1.50 2008/07/31 22:47:56 tgl Exp $ + * $PostgreSQL: pgsql/src/include/parser/parse_clause.h,v 1.51 2008/08/02 21:32:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -30,13 +30,14 @@ extern List *transformGroupClause(ParseState *pstate, List *grouplist, List **targetlist, List *sortClause); extern List *transformSortClause(ParseState *pstate, List *orderlist, List **targetlist, bool resolveUnknown); -extern List *transformDistinctClause(ParseState *pstate, List *distinctlist, +extern List *transformDistinctClause(ParseState *pstate, + List **targetlist, List *sortClause); +extern List *transformDistinctOnClause(ParseState *pstate, List *distinctlist, List **targetlist, List *sortClause); -extern List *addTargetToSortList(ParseState *pstate, TargetEntry *tle, - List *sortlist, List *targetlist, - SortByDir sortby_dir, SortByNulls sortby_nulls, - List *sortby_opname, bool resolveUnknown); +extern List *addTargetToGroupList(ParseState *pstate, TargetEntry *tle, + List *grouplist, List *targetlist, + bool requireSortOp, bool resolveUnknown); extern Index assignSortGroupRef(TargetEntry *tle, List *tlist); extern bool targetIsInSortList(TargetEntry *tle, Oid sortop, List *sortList); diff --git a/src/include/parser/parse_oper.h b/src/include/parser/parse_oper.h index f0ec2d1f08..ad0d65b3be 100644 --- a/src/include/parser/parse_oper.h +++ b/src/include/parser/parse_oper.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/parser/parse_oper.h,v 1.42 2008/01/01 19:45:58 momjian Exp $ + * $PostgreSQL: pgsql/src/include/parser/parse_oper.h,v 1.43 2008/08/02 21:32:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -45,16 +45,13 @@ extern Operator compatible_oper(ParseState *pstate, List *op, /* currently no need for compatible_left_oper/compatible_right_oper */ -/* Routines for identifying "=", "<", ">" operators for a type */ -extern Operator equality_oper(Oid argtype, bool noError); -extern Operator ordering_oper(Oid argtype, bool noError); -extern Operator reverse_ordering_oper(Oid argtype, bool noError); +/* Routines for identifying "<", "=", ">" operators for a type */ +extern void get_sort_group_operators(Oid argtype, + bool needLT, bool needEQ, bool needGT, + Oid *ltOpr, Oid *eqOpr, Oid *gtOpr); /* Convenience routines for common calls on the above */ extern Oid compatible_oper_opid(List *op, Oid arg1, Oid arg2, bool noError); -extern Oid equality_oper_funcid(Oid argtype); -extern Oid ordering_oper_opid(Oid argtype); -extern Oid reverse_ordering_oper_opid(Oid argtype); /* Extract operator OID or underlying-function OID from an Operator tuple */ extern Oid oprid(Operator op); diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h index 66e035cffa..0538e0ff56 100644 --- a/src/include/utils/lsyscache.h +++ b/src/include/utils/lsyscache.h @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/utils/lsyscache.h,v 1.124 2008/07/30 17:05:05 tgl Exp $ + * $PostgreSQL: pgsql/src/include/utils/lsyscache.h,v 1.125 2008/08/02 21:32:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -38,7 +38,7 @@ extern bool get_ordering_op_properties(Oid opno, Oid *opfamily, Oid *opcintype, int16 *strategy); extern bool get_compare_function_for_ordering_op(Oid opno, Oid *cmpfunc, bool *reverse); -extern Oid get_equality_op_for_ordering_op(Oid opno); +extern Oid get_equality_op_for_ordering_op(Oid opno, bool *reverse); extern Oid get_ordering_op_for_equality_op(Oid opno, bool use_lhs_type); extern List *get_mergejoin_opfamilies(Oid opno); extern bool get_compatible_hash_operators(Oid opno, @@ -47,7 +47,7 @@ extern bool get_op_hash_functions(Oid opno, RegProcedure *lhs_procno, RegProcedure *rhs_procno); extern void get_op_btree_interpretation(Oid opno, List **opfamilies, List **opstrats); -extern bool ops_in_same_btree_opfamily(Oid opno1, Oid opno2); +extern bool equality_ops_are_compatible(Oid opno1, Oid opno2); extern Oid get_opfamily_proc(Oid opfamily, Oid lefttype, Oid righttype, int16 procnum); extern char *get_attname(Oid relid, AttrNumber attnum);