From 120b31dc5406cf004b99150b84b48dc312577eca Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Tue, 22 Dec 2015 13:57:18 -0500 Subject: [PATCH] Comment improvements for abbreviated keys. Peter Geoghegan and Robert Haas --- src/backend/utils/sort/tuplesort.c | 17 ++++++++++---- src/include/utils/sortsupport.h | 37 +++++++++++++++--------------- 2 files changed, 31 insertions(+), 23 deletions(-) diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c index 6572af875a..cf1cdcbeab 100644 --- a/src/backend/utils/sort/tuplesort.c +++ b/src/backend/utils/sort/tuplesort.c @@ -930,7 +930,14 @@ tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation, state->sortKeys->ssup_collation = sortCollation; state->sortKeys->ssup_nulls_first = nullsFirstFlag; - /* abbreviation is possible here only for by-reference types */ + /* + * Abbreviation is possible here only for by-reference types. In theory, + * a pass-by-value datatype could have an abbreviated form that is cheaper + * to compare. In a tuple sort, we could support that, because we can + * always extract the original datum from the tuple is needed. Here, we + * can't, because a datum sort only stores a single copy of the datum; + * the "tuple" field of each sortTuple is NULL. + */ state->sortKeys->abbreviate = !typbyval; PrepareSortSupportFromOrderingOp(sortOperator, state->sortKeys); @@ -1271,7 +1278,7 @@ tuplesort_putindextuplevalues(Tuplesortstate *state, Relation rel, * Store ordinary Datum representation, or NULL value. If there is a * converter it won't expect NULL values, and cost model is not * required to account for NULL, so in that case we avoid calling - * converter and just set datum1 to "void" representation (to be + * converter and just set datum1 to zeroed representation (to be * consistent). */ stup.datum1 = original; @@ -3155,7 +3162,7 @@ copytup_heap(Tuplesortstate *state, SortTuple *stup, void *tup) * Store ordinary Datum representation, or NULL value. If there is a * converter it won't expect NULL values, and cost model is not * required to account for NULL, so in that case we avoid calling - * converter and just set datum1 to "void" representation (to be + * converter and just set datum1 to zeroed representation (to be * consistent). */ stup->datum1 = original; @@ -3397,7 +3404,7 @@ copytup_cluster(Tuplesortstate *state, SortTuple *stup, void *tup) * Store ordinary Datum representation, or NULL value. If there is a * converter it won't expect NULL values, and cost model is not * required to account for NULL, so in that case we avoid calling - * converter and just set datum1 to "void" representation (to be + * converter and just set datum1 to zeroed representation (to be * consistent). */ stup->datum1 = original; @@ -3701,7 +3708,7 @@ copytup_index(Tuplesortstate *state, SortTuple *stup, void *tup) * Store ordinary Datum representation, or NULL value. If there is a * converter it won't expect NULL values, and cost model is not * required to account for NULL, so in that case we avoid calling - * converter and just set datum1 to "void" representation (to be + * converter and just set datum1 to zeroed representation (to be * consistent). */ stup->datum1 = original; diff --git a/src/include/utils/sortsupport.h b/src/include/utils/sortsupport.h index 787404ed90..a984edda58 100644 --- a/src/include/utils/sortsupport.h +++ b/src/include/utils/sortsupport.h @@ -116,24 +116,25 @@ typedef struct SortSupportData * * This allows opclass authors to supply a conversion routine, used to * create an alternative representation of the underlying type (an - * "abbreviated key"). Typically, this representation is an ad-hoc, - * pass-by-value Datum format that only the opclass has knowledge of. An - * alternative comparator, used only with this alternative representation - * must also be provided (which is assigned to "comparator"). This - * representation is a simple approximation of the original Datum. It - * must be possible to compare datums of this representation with each - * other using the supplied alternative comparator, and have any non-zero - * return value be a reliable proxy for what a proper comparison would - * indicate. Returning zero from the alternative comparator does not - * indicate equality, as with a conventional support routine 1, though -- - * it indicates that it wasn't possible to determine how the two - * abbreviated values compared. A proper comparison, using - * "abbrev_full_comparator"/ ApplySortAbbrevFullComparator() is therefore - * required. In many cases this results in most or all comparisons only - * using the cheap alternative comparison func, which is typically - * implemented as code that compiles to just a few CPU instructions. CPU - * cache miss penalties are expensive; to get good overall performance, - * sort infrastructure must heavily weigh cache performance. + * "abbreviated key"). This representation must be pass-by-value and + * typically will use some ad-hoc format that only the opclass has + * knowledge of. An alternative comparator, used only with this + * alternative representation must also be provided (which is assigned to + * "comparator"). This representation is a simple approximation of the + * original Datum. It must be possible to compare datums of this + * representation with each other using the supplied alternative + * comparator, and have any non-zero return value be a reliable proxy for + * what a proper comparison would indicate. Returning zero from the + * alternative comparator does not indicate equality, as with a + * conventional support routine 1, though -- it indicates that it wasn't + * possible to determine how the two abbreviated values compared. A + * proper comparison, using "abbrev_full_comparator"/ + * ApplySortAbbrevFullComparator() is therefore required. In many cases + * this results in most or all comparisons only using the cheap + * alternative comparison func, which is typically implemented as code + * that compiles to just a few CPU instructions. CPU cache miss penalties + * are expensive; to get good overall performance, sort infrastructure + * must heavily weigh cache performance. * * Opclass authors must consider the final cardinality of abbreviated keys * when devising an encoding scheme. It's possible for a strategy to work