From bfb54ff15a447fb22e9deae096e0d45b3e4bd56f Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Fri, 9 Oct 2015 15:06:06 -0400 Subject: [PATCH] Make abbreviated key comparisons for text a bit cheaper. If we do some byte-swapping while abbreviating, we can do comparisons using integer arithmetic rather than memcmp. Peter Geoghegan, reviewed and slightly revised by me. --- src/backend/utils/adt/varlena.c | 29 ++++++++++++++++++++--------- src/include/port/pg_bswap.h | 26 ++++++++++++++++++++++++-- 2 files changed, 44 insertions(+), 11 deletions(-) diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index 2fbbf5475e..49a4898987 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -26,6 +26,7 @@ #include "libpq/pqformat.h" #include "miscadmin.h" #include "parser/scansup.h" +#include "port/pg_bswap.h" #include "regex/regex.h" #include "utils/builtins.h" #include "utils/bytea.h" @@ -1967,25 +1968,25 @@ done: static int bttextcmp_abbrev(Datum x, Datum y, SortSupport ssup) { - char *a = (char *) &x; - char *b = (char *) &y; - int result; - - result = memcmp(a, b, sizeof(Datum)); - /* - * When result = 0, the core system will call bttextfastcmp_c() or + * When 0 is returned, the core system will call bttextfastcmp_c() or * bttextfastcmp_locale(). Even a strcmp() on two non-truncated strxfrm() * blobs cannot indicate *equality* authoritatively, for the same reason * that there is a strcoll() tie-breaker call to strcmp() in varstr_cmp(). */ - return result; + if (x > y) + return 1; + else if (x == y) + return 0; + else + return -1; } /* * Conversion routine for sortsupport. Converts original text to abbreviated * key representation. Our encoding strategy is simple -- pack the first 8 - * bytes of a strxfrm() blob into a Datum. + * bytes of a strxfrm() blob into a Datum (on little-endian machines, the 8 + * bytes are stored in reverse order), and treat it as an unsigned integer. */ static Datum bttext_abbrev_convert(Datum original, SortSupport ssup) @@ -2104,6 +2105,16 @@ bttext_abbrev_convert(Datum original, SortSupport ssup) addHyperLogLog(&tss->abbr_card, hash); + /* + * Byteswap on little-endian machines. + * + * This is needed so that bttextcmp_abbrev() (an unsigned integer 3-way + * comparator) works correctly on all platforms. If we didn't do this, + * the comparator would have to call memcmp() with a pair of pointers to + * the first byte of each abbreviated key, which is slower. + */ + res = DatumBigEndianToNative(res); + /* Don't leak memory here */ if (PointerGetDatum(authoritative) != original) pfree(authoritative); diff --git a/src/include/port/pg_bswap.h b/src/include/port/pg_bswap.h index 6555942c92..e9cf93233f 100644 --- a/src/include/port/pg_bswap.h +++ b/src/include/port/pg_bswap.h @@ -28,7 +28,7 @@ ((x << 8) & 0x00ff0000) | \ ((x >> 8) & 0x0000ff00) | \ ((x >> 24) & 0x000000ff)) -#endif /* HAVE__BUILTIN_BSWAP32 */ +#endif /* HAVE__BUILTIN_BSWAP32 */ #ifdef HAVE__BUILTIN_BSWAP64 #define BSWAP64(x) __builtin_bswap64(x) @@ -41,6 +41,28 @@ ((x >> 24) & 0x0000000000ff0000UL) | \ ((x >> 40) & 0x000000000000ff00UL) | \ ((x >> 56) & 0x00000000000000ffUL)) -#endif /* HAVE__BUILTIN_BSWAP64 */ +#endif /* HAVE__BUILTIN_BSWAP64 */ + +/* + * Rearrange the bytes of a Datum from big-endian order into the native byte + * order. On big-endian machines, this does nothing at all. Note that the C + * type Datum is an unsigned integer type on all platforms. + * + * One possible application of the DatumBigEndianToNative() macro is to make + * bitwise comparisons cheaper. A simple 3-way comparison of Datums + * transformed by the macro (based on native, unsigned comparisons) will return + * the same result as a memcmp() of the corresponding original Datums, but can + * be much cheaper. It's generally safe to do this on big-endian systems + * without any special transformation occurring first. + */ +#ifdef WORDS_BIGENDIAN +#define DatumBigEndianToNative(x) (x) +#else /* !WORDS_BIGENDIAN */ +#if SIZEOF_DATUM == 8 +#define DatumBigEndianToNative(x) BSWAP64(x) +#else /* SIZEOF_DATUM != 8 */ +#define DatumBigEndianToNative(x) BSWAP32(x) +#endif /* SIZEOF_DATUM == 8 */ +#endif /* WORDS_BIGENDIAN */ #endif /* PG_BSWAP_H */