Tweak sorting so that nulls appear at the front of a descending sort

(vs. at the end of a normal sort).  This ensures that explicit sorts
yield the same ordering as a btree index scan.  To be really sure that
that equivalence holds, we use the btree entries in pg_amop to decide
whether we are looking at a '<' or '>' operator.  For a sort operator
that has no btree association, we put the nulls at the front if the
operator is named '>' ... pretty grotty, but it does the right thing in
simple ASC and DESC cases, and at least there's no possibility of getting
a different answer depending on the plan type chosen.
This commit is contained in:
Tom Lane 2001-06-02 19:01:53 +00:00
parent e542036461
commit 5433b48380
3 changed files with 144 additions and 127 deletions

View File

@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/commands/analyze.c,v 1.17 2001/05/07 00:43:17 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/commands/analyze.c,v 1.18 2001/06/02 19:01:53 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -1439,27 +1439,12 @@ compare_scalars(const void *a, const void *b)
int ta = ((ScalarItem *) a)->tupno; int ta = ((ScalarItem *) a)->tupno;
Datum db = ((ScalarItem *) b)->value; Datum db = ((ScalarItem *) b)->value;
int tb = ((ScalarItem *) b)->tupno; int tb = ((ScalarItem *) b)->tupno;
int32 compare;
if (datumCmpFnKind == SORTFUNC_LT) compare = ApplySortFunction(datumCmpFn, datumCmpFnKind,
{ da, false, db, false);
if (DatumGetBool(FunctionCall2(datumCmpFn, da, db))) if (compare != 0)
return -1; /* a < b */ return compare;
if (DatumGetBool(FunctionCall2(datumCmpFn, db, da)))
return 1; /* a > b */
}
else
{
/* sort function is CMP or REVCMP */
int32 compare;
compare = DatumGetInt32(FunctionCall2(datumCmpFn, da, db));
if (compare != 0)
{
if (datumCmpFnKind == SORTFUNC_REVCMP)
compare = -compare;
return compare;
}
}
/* /*
* The two datums are equal, so update datumCmpTupnoLink[]. * The two datums are equal, so update datumCmpTupnoLink[].

View File

@ -78,7 +78,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/utils/sort/tuplesort.c,v 1.16 2001/05/07 00:43:24 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/utils/sort/tuplesort.c,v 1.17 2001/06/02 19:01:52 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -90,10 +90,12 @@
#include "catalog/catname.h" #include "catalog/catname.h"
#include "catalog/pg_amop.h" #include "catalog/pg_amop.h"
#include "catalog/pg_amproc.h" #include "catalog/pg_amproc.h"
#include "catalog/pg_operator.h"
#include "miscadmin.h" #include "miscadmin.h"
#include "utils/fmgroids.h" #include "utils/fmgroids.h"
#include "utils/logtape.h" #include "utils/logtape.h"
#include "utils/lsyscache.h" #include "utils/lsyscache.h"
#include "utils/syscache.h"
#include "utils/tuplesort.h" #include "utils/tuplesort.h"
/* /*
@ -1732,53 +1734,25 @@ comparetup_heap(Tuplesortstate *state, const void *a, const void *b)
for (nkey = 0; nkey < state->nKeys; nkey++) for (nkey = 0; nkey < state->nKeys; nkey++)
{ {
ScanKey scanKey = state->scanKeys + nkey; ScanKey scanKey = state->scanKeys + nkey;
SortFunctionKind fnKind = state->sortFnKinds[nkey];
AttrNumber attno = scanKey->sk_attno; AttrNumber attno = scanKey->sk_attno;
Datum lattr, Datum datum1,
rattr; datum2;
bool isnull1, bool isnull1,
isnull2; isnull2;
int32 compare;
lattr = heap_getattr(ltup, attno, tupDesc, &isnull1); datum1 = heap_getattr(ltup, attno, tupDesc, &isnull1);
rattr = heap_getattr(rtup, attno, tupDesc, &isnull2); datum2 = heap_getattr(rtup, attno, tupDesc, &isnull2);
if (isnull1) compare = ApplySortFunction(&scanKey->sk_func,
state->sortFnKinds[nkey],
datum1, isnull1, datum2, isnull2);
if (compare != 0)
{ {
if (!isnull2) /* dead code? SK_COMMUTE can't actually be set here, can it? */
return 1; /* NULL sorts after non-NULL */ if (scanKey->sk_flags & SK_COMMUTE)
} compare = -compare;
else if (isnull2) return compare;
return -1;
else
{
int32 compare;
if (fnKind == SORTFUNC_LT)
{
if (DatumGetBool(FunctionCall2(&scanKey->sk_func,
lattr, rattr)))
compare = -1; /* a < b */
else if (DatumGetBool(FunctionCall2(&scanKey->sk_func,
rattr, lattr)))
compare = 1; /* a > b */
else
compare = 0;
}
else
{
/* sort function is CMP or REVCMP */
compare = DatumGetInt32(FunctionCall2(&scanKey->sk_func,
lattr, rattr));
if (fnKind == SORTFUNC_REVCMP)
compare = -compare;
}
if (compare != 0)
{
if (scanKey->sk_flags & SK_COMMUTE)
compare = -compare;
return compare;
}
} }
} }
@ -1861,7 +1835,6 @@ tuplesize_heap(Tuplesortstate *state, void *tup)
static int static int
comparetup_index(Tuplesortstate *state, const void *a, const void *b) comparetup_index(Tuplesortstate *state, const void *a, const void *b)
{ {
/* /*
* This is almost the same as _bt_tuplecompare(), but we need to keep * This is almost the same as _bt_tuplecompare(), but we need to keep
* track of whether any null fields are present. * track of whether any null fields are present.
@ -1880,41 +1853,28 @@ comparetup_index(Tuplesortstate *state, const void *a, const void *b)
for (i = 1; i <= keysz; i++) for (i = 1; i <= keysz; i++)
{ {
ScanKey entry = &scankey[i - 1]; ScanKey entry = &scankey[i - 1];
Datum attrDatum1, Datum datum1,
attrDatum2; datum2;
bool isFirstNull, bool isnull1,
isSecondNull; isnull2;
int32 compare; int32 compare;
attrDatum1 = index_getattr(tuple1, i, tupDes, &isFirstNull); datum1 = index_getattr(tuple1, i, tupDes, &isnull1);
attrDatum2 = index_getattr(tuple2, i, tupDes, &isSecondNull); datum2 = index_getattr(tuple2, i, tupDes, &isnull2);
/* see comments about NULLs handling in btbuild */ /* see comments about NULLs handling in btbuild */
if (isFirstNull) /* attr in tuple1 is NULL */
{ /* the comparison function is always of CMP type */
if (isSecondNull) /* attr in tuple2 is NULL too */ compare = ApplySortFunction(&entry->sk_func, SORTFUNC_CMP,
{ datum1, isnull1, datum2, isnull2);
compare = 0;
equal_hasnull = true;
}
else
compare = 1; /* NULL ">" not-NULL */
}
else if (isSecondNull) /* attr in tuple1 is NOT_NULL and */
{ /* attr in tuple2 is NULL */
compare = -1; /* not-NULL "<" NULL */
}
else
{
/* the comparison function is always of CMP type */
compare = DatumGetInt32(FunctionCall2(&entry->sk_func,
attrDatum1,
attrDatum2));
}
if (compare != 0) if (compare != 0)
return (int) compare; /* done when we find unequal return (int) compare; /* done when we find unequal
* attributes */ * attributes */
/* they are equal, so we only need to examine one null flag */
if (isnull1)
equal_hasnull = true;
} }
/* /*
@ -2002,35 +1962,9 @@ comparetup_datum(Tuplesortstate *state, const void *a, const void *b)
DatumTuple *ltup = (DatumTuple *) a; DatumTuple *ltup = (DatumTuple *) a;
DatumTuple *rtup = (DatumTuple *) b; DatumTuple *rtup = (DatumTuple *) b;
if (ltup->isNull) return ApplySortFunction(&state->sortOpFn, state->sortFnKind,
{ ltup->val, ltup->isNull,
if (!rtup->isNull) rtup->val, rtup->isNull);
return 1; /* NULL sorts after non-NULL */
return 0;
}
else if (rtup->isNull)
return -1;
else if (state->sortFnKind == SORTFUNC_LT)
{
if (DatumGetBool(FunctionCall2(&state->sortOpFn,
ltup->val, rtup->val)))
return -1; /* a < b */
if (DatumGetBool(FunctionCall2(&state->sortOpFn,
rtup->val, ltup->val)))
return 1; /* a > b */
return 0;
}
else
{
/* sort function is CMP or REVCMP */
int32 compare;
compare = DatumGetInt32(FunctionCall2(&state->sortOpFn,
ltup->val, rtup->val));
if (state->sortFnKind == SORTFUNC_REVCMP)
compare = -compare;
return compare;
}
} }
static void * static void *
@ -2123,6 +2057,7 @@ SelectSortFunction(Oid sortOperator,
HeapScanDesc scan; HeapScanDesc scan;
ScanKeyData skey[3]; ScanKeyData skey[3];
HeapTuple tuple; HeapTuple tuple;
Form_pg_operator optup;
Oid opclass = InvalidOid; Oid opclass = InvalidOid;
/* /*
@ -2207,11 +2142,97 @@ SelectSortFunction(Oid sortOperator,
return; return;
} }
/* Can't find a comparator, so use the operator as-is */ /*
* Can't find a comparator, so use the operator as-is. Decide whether
*kind = SORTFUNC_LT; * it is forward or reverse sort by looking at its name (grotty, but
*sortFunction = get_opcode(sortOperator); * this only matters for deciding which end NULLs should get sorted to).
if (!RegProcedureIsValid(*sortFunction)) */
elog(ERROR, "SelectSortFunction: operator %u has no implementation", tuple = SearchSysCache(OPEROID,
ObjectIdGetDatum(sortOperator),
0, 0, 0);
if (!HeapTupleIsValid(tuple))
elog(ERROR, "SelectSortFunction: cache lookup failed for operator %u",
sortOperator); sortOperator);
optup = (Form_pg_operator) GETSTRUCT(tuple);
if (strcmp(NameStr(optup->oprname), ">") == 0)
*kind = SORTFUNC_REVLT;
else
*kind = SORTFUNC_LT;
*sortFunction = optup->oprcode;
ReleaseSysCache(tuple);
Assert(RegProcedureIsValid(*sortFunction));
}
/*
* Apply a sort function (by now converted to fmgr lookup form)
* and return a 3-way comparison result. This takes care of handling
* NULLs and sort ordering direction properly.
*/
int32
ApplySortFunction(FmgrInfo *sortFunction, SortFunctionKind kind,
Datum datum1, bool isNull1,
Datum datum2, bool isNull2)
{
switch (kind)
{
case SORTFUNC_LT:
if (isNull1)
{
if (isNull2)
return 0;
return 1; /* NULL sorts after non-NULL */
}
if (isNull2)
return -1;
if (DatumGetBool(FunctionCall2(sortFunction, datum1, datum2)))
return -1; /* a < b */
if (DatumGetBool(FunctionCall2(sortFunction, datum2, datum1)))
return 1; /* a > b */
return 0;
case SORTFUNC_REVLT:
/* We reverse the ordering of NULLs, but not the operator */
if (isNull1)
{
if (isNull2)
return 0;
return -1; /* NULL sorts before non-NULL */
}
if (isNull2)
return 1;
if (DatumGetBool(FunctionCall2(sortFunction, datum1, datum2)))
return -1; /* a < b */
if (DatumGetBool(FunctionCall2(sortFunction, datum2, datum1)))
return 1; /* a > b */
return 0;
case SORTFUNC_CMP:
if (isNull1)
{
if (isNull2)
return 0;
return 1; /* NULL sorts after non-NULL */
}
if (isNull2)
return -1;
return DatumGetInt32(FunctionCall2(sortFunction,
datum1, datum2));
case SORTFUNC_REVCMP:
if (isNull1)
{
if (isNull2)
return 0;
return -1; /* NULL sorts before non-NULL */
}
if (isNull2)
return 1;
return - DatumGetInt32(FunctionCall2(sortFunction,
datum1, datum2));
default:
elog(ERROR, "Invalid SortFunctionKind %d", (int) kind);
return 0; /* can't get here, but keep compiler quiet */
}
} }

View File

@ -13,7 +13,7 @@
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $Id: tuplesort.h,v 1.7 2001/05/07 00:43:26 tgl Exp $ * $Id: tuplesort.h,v 1.8 2001/06/02 19:01:52 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -22,6 +22,7 @@
#include "access/htup.h" #include "access/htup.h"
#include "access/itup.h" #include "access/itup.h"
#include "fmgr.h"
/* Tuplesortstate is an opaque type whose details are not known outside tuplesort.c. */ /* Tuplesortstate is an opaque type whose details are not known outside tuplesort.c. */
@ -83,6 +84,7 @@ extern void tuplesort_restorepos(Tuplesortstate *state);
typedef enum typedef enum
{ {
SORTFUNC_LT, /* raw "<" operator */ SORTFUNC_LT, /* raw "<" operator */
SORTFUNC_REVLT, /* raw "<" operator, but reverse NULLs */
SORTFUNC_CMP, /* -1 / 0 / 1 three-way comparator */ SORTFUNC_CMP, /* -1 / 0 / 1 three-way comparator */
SORTFUNC_REVCMP /* 1 / 0 / -1 (reversed) 3-way comparator */ SORTFUNC_REVCMP /* 1 / 0 / -1 (reversed) 3-way comparator */
} SortFunctionKind; } SortFunctionKind;
@ -91,4 +93,13 @@ extern void SelectSortFunction(Oid sortOperator,
RegProcedure *sortFunction, RegProcedure *sortFunction,
SortFunctionKind *kind); SortFunctionKind *kind);
/*
* Apply a sort function (by now converted to fmgr lookup form)
* and return a 3-way comparison result. This takes care of handling
* NULLs and sort ordering direction properly.
*/
extern int32 ApplySortFunction(FmgrInfo *sortFunction, SortFunctionKind kind,
Datum datum1, bool isNull1,
Datum datum2, bool isNull2);
#endif /* TUPLESORT_H */ #endif /* TUPLESORT_H */