Make abbreviated key comparisons for text a bit cheaper.

If we do some byte-swapping while abbreviating, we can do comparisons
using integer arithmetic rather than memcmp.

Peter Geoghegan, reviewed and slightly revised by me.
This commit is contained in:
Robert Haas 2015-10-09 15:06:06 -04:00
parent db0f6cad48
commit bfb54ff15a
2 changed files with 44 additions and 11 deletions

View File

@ -26,6 +26,7 @@
#include "libpq/pqformat.h"
#include "miscadmin.h"
#include "parser/scansup.h"
#include "port/pg_bswap.h"
#include "regex/regex.h"
#include "utils/builtins.h"
#include "utils/bytea.h"
@ -1967,25 +1968,25 @@ done:
static int
bttextcmp_abbrev(Datum x, Datum y, SortSupport ssup)
{
char *a = (char *) &x;
char *b = (char *) &y;
int result;
result = memcmp(a, b, sizeof(Datum));
/*
* When result = 0, the core system will call bttextfastcmp_c() or
* When 0 is returned, the core system will call bttextfastcmp_c() or
* bttextfastcmp_locale(). Even a strcmp() on two non-truncated strxfrm()
* blobs cannot indicate *equality* authoritatively, for the same reason
* that there is a strcoll() tie-breaker call to strcmp() in varstr_cmp().
*/
return result;
if (x > y)
return 1;
else if (x == y)
return 0;
else
return -1;
}
/*
* Conversion routine for sortsupport. Converts original text to abbreviated
* key representation. Our encoding strategy is simple -- pack the first 8
* bytes of a strxfrm() blob into a Datum.
* bytes of a strxfrm() blob into a Datum (on little-endian machines, the 8
* bytes are stored in reverse order), and treat it as an unsigned integer.
*/
static Datum
bttext_abbrev_convert(Datum original, SortSupport ssup)
@ -2104,6 +2105,16 @@ bttext_abbrev_convert(Datum original, SortSupport ssup)
addHyperLogLog(&tss->abbr_card, hash);
/*
* Byteswap on little-endian machines.
*
* This is needed so that bttextcmp_abbrev() (an unsigned integer 3-way
* comparator) works correctly on all platforms. If we didn't do this,
* the comparator would have to call memcmp() with a pair of pointers to
* the first byte of each abbreviated key, which is slower.
*/
res = DatumBigEndianToNative(res);
/* Don't leak memory here */
if (PointerGetDatum(authoritative) != original)
pfree(authoritative);

View File

@ -28,7 +28,7 @@
((x << 8) & 0x00ff0000) | \
((x >> 8) & 0x0000ff00) | \
((x >> 24) & 0x000000ff))
#endif /* HAVE__BUILTIN_BSWAP32 */
#endif /* HAVE__BUILTIN_BSWAP32 */
#ifdef HAVE__BUILTIN_BSWAP64
#define BSWAP64(x) __builtin_bswap64(x)
@ -41,6 +41,28 @@
((x >> 24) & 0x0000000000ff0000UL) | \
((x >> 40) & 0x000000000000ff00UL) | \
((x >> 56) & 0x00000000000000ffUL))
#endif /* HAVE__BUILTIN_BSWAP64 */
#endif /* HAVE__BUILTIN_BSWAP64 */
/*
* Rearrange the bytes of a Datum from big-endian order into the native byte
* order. On big-endian machines, this does nothing at all. Note that the C
* type Datum is an unsigned integer type on all platforms.
*
* One possible application of the DatumBigEndianToNative() macro is to make
* bitwise comparisons cheaper. A simple 3-way comparison of Datums
* transformed by the macro (based on native, unsigned comparisons) will return
* the same result as a memcmp() of the corresponding original Datums, but can
* be much cheaper. It's generally safe to do this on big-endian systems
* without any special transformation occurring first.
*/
#ifdef WORDS_BIGENDIAN
#define DatumBigEndianToNative(x) (x)
#else /* !WORDS_BIGENDIAN */
#if SIZEOF_DATUM == 8
#define DatumBigEndianToNative(x) BSWAP64(x)
#else /* SIZEOF_DATUM != 8 */
#define DatumBigEndianToNative(x) BSWAP32(x)
#endif /* SIZEOF_DATUM == 8 */
#endif /* WORDS_BIGENDIAN */
#endif /* PG_BSWAP_H */