Make use of compiler builtins and/or assembly for CLZ, CTZ, POPCNT.
Test for the compiler builtins __builtin_clz, __builtin_ctz, and
__builtin_popcount, and make use of these in preference to
handwritten C code if they're available. Create src/port
infrastructure for "leftmost one", "rightmost one", and "popcount"
so as to centralize these decisions.
On x86_64, __builtin_popcount generally won't make use of the POPCNT
opcode because that's not universally supported yet. Provide code
that checks CPUID and then calls POPCNT via asm() if available.
This requires indirecting through a function pointer, which is
an annoying amount of overhead for a one-instruction operation,
but it's probably not worth working harder than this for our
current use-cases.
I'm not sure we've found all the existing places that could profit
from this new infrastructure; but we at least touched all the
ones that used copied-and-pasted versions of the bitmapset.c code,
and got rid of multiple copies of the associated constant arrays.
While at it, replace c-compiler.m4's one-per-builtin-function
macros with a single one that can handle all the cases we need
to worry about so far. Also, because I'm paranoid, make those
checks into AC_LINK checks rather than just AC_COMPILE; the
former coding failed to verify that libgcc has support for the
builtin, in cases where it's not inline code.
David Rowley, Thomas Munro, Alvaro Herrera, Tom Lane
Discussion: https://postgr.es/m/CAKJS1f9WTAGG1tPeJnD18hiQW5gAk59fQ6WK-vfdAKEHyRg2RA@mail.gmail.com
2019-02-16 05:22:27 +01:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
|
|
|
* pg_bitutils.h
|
|
|
|
* Miscellaneous functions for bit-wise operations.
|
|
|
|
*
|
|
|
|
*
|
2020-01-01 18:21:45 +01:00
|
|
|
* Copyright (c) 2019-2020, PostgreSQL Global Development Group
|
Make use of compiler builtins and/or assembly for CLZ, CTZ, POPCNT.
Test for the compiler builtins __builtin_clz, __builtin_ctz, and
__builtin_popcount, and make use of these in preference to
handwritten C code if they're available. Create src/port
infrastructure for "leftmost one", "rightmost one", and "popcount"
so as to centralize these decisions.
On x86_64, __builtin_popcount generally won't make use of the POPCNT
opcode because that's not universally supported yet. Provide code
that checks CPUID and then calls POPCNT via asm() if available.
This requires indirecting through a function pointer, which is
an annoying amount of overhead for a one-instruction operation,
but it's probably not worth working harder than this for our
current use-cases.
I'm not sure we've found all the existing places that could profit
from this new infrastructure; but we at least touched all the
ones that used copied-and-pasted versions of the bitmapset.c code,
and got rid of multiple copies of the associated constant arrays.
While at it, replace c-compiler.m4's one-per-builtin-function
macros with a single one that can handle all the cases we need
to worry about so far. Also, because I'm paranoid, make those
checks into AC_LINK checks rather than just AC_COMPILE; the
former coding failed to verify that libgcc has support for the
builtin, in cases where it's not inline code.
David Rowley, Thomas Munro, Alvaro Herrera, Tom Lane
Discussion: https://postgr.es/m/CAKJS1f9WTAGG1tPeJnD18hiQW5gAk59fQ6WK-vfdAKEHyRg2RA@mail.gmail.com
2019-02-16 05:22:27 +01:00
|
|
|
*
|
|
|
|
* src/include/port/pg_bitutils.h
|
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
#ifndef PG_BITUTILS_H
|
|
|
|
#define PG_BITUTILS_H
|
|
|
|
|
|
|
|
extern PGDLLIMPORT const uint8 pg_leftmost_one_pos[256];
|
|
|
|
extern PGDLLIMPORT const uint8 pg_rightmost_one_pos[256];
|
|
|
|
extern PGDLLIMPORT const uint8 pg_number_of_ones[256];
|
|
|
|
|
|
|
|
/*
|
|
|
|
* pg_leftmost_one_pos32
|
|
|
|
* Returns the position of the most significant set bit in "word",
|
|
|
|
* measured from the least significant bit. word must not be 0.
|
|
|
|
*/
|
|
|
|
static inline int
|
|
|
|
pg_leftmost_one_pos32(uint32 word)
|
|
|
|
{
|
|
|
|
#ifdef HAVE__BUILTIN_CLZ
|
|
|
|
Assert(word != 0);
|
|
|
|
|
|
|
|
return 31 - __builtin_clz(word);
|
|
|
|
#else
|
|
|
|
int shift = 32 - 8;
|
|
|
|
|
|
|
|
Assert(word != 0);
|
|
|
|
|
|
|
|
while ((word >> shift) == 0)
|
|
|
|
shift -= 8;
|
|
|
|
|
|
|
|
return shift + pg_leftmost_one_pos[(word >> shift) & 255];
|
|
|
|
#endif /* HAVE__BUILTIN_CLZ */
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* pg_leftmost_one_pos64
|
|
|
|
* As above, but for a 64-bit word.
|
|
|
|
*/
|
|
|
|
static inline int
|
|
|
|
pg_leftmost_one_pos64(uint64 word)
|
|
|
|
{
|
|
|
|
#ifdef HAVE__BUILTIN_CLZ
|
|
|
|
Assert(word != 0);
|
|
|
|
|
|
|
|
#if defined(HAVE_LONG_INT_64)
|
|
|
|
return 63 - __builtin_clzl(word);
|
|
|
|
#elif defined(HAVE_LONG_LONG_INT_64)
|
|
|
|
return 63 - __builtin_clzll(word);
|
|
|
|
#else
|
|
|
|
#error must have a working 64-bit integer datatype
|
|
|
|
#endif
|
|
|
|
#else /* !HAVE__BUILTIN_CLZ */
|
|
|
|
int shift = 64 - 8;
|
|
|
|
|
|
|
|
Assert(word != 0);
|
|
|
|
|
|
|
|
while ((word >> shift) == 0)
|
|
|
|
shift -= 8;
|
|
|
|
|
|
|
|
return shift + pg_leftmost_one_pos[(word >> shift) & 255];
|
2019-06-17 09:13:16 +02:00
|
|
|
#endif /* HAVE__BUILTIN_CLZ */
|
Make use of compiler builtins and/or assembly for CLZ, CTZ, POPCNT.
Test for the compiler builtins __builtin_clz, __builtin_ctz, and
__builtin_popcount, and make use of these in preference to
handwritten C code if they're available. Create src/port
infrastructure for "leftmost one", "rightmost one", and "popcount"
so as to centralize these decisions.
On x86_64, __builtin_popcount generally won't make use of the POPCNT
opcode because that's not universally supported yet. Provide code
that checks CPUID and then calls POPCNT via asm() if available.
This requires indirecting through a function pointer, which is
an annoying amount of overhead for a one-instruction operation,
but it's probably not worth working harder than this for our
current use-cases.
I'm not sure we've found all the existing places that could profit
from this new infrastructure; but we at least touched all the
ones that used copied-and-pasted versions of the bitmapset.c code,
and got rid of multiple copies of the associated constant arrays.
While at it, replace c-compiler.m4's one-per-builtin-function
macros with a single one that can handle all the cases we need
to worry about so far. Also, because I'm paranoid, make those
checks into AC_LINK checks rather than just AC_COMPILE; the
former coding failed to verify that libgcc has support for the
builtin, in cases where it's not inline code.
David Rowley, Thomas Munro, Alvaro Herrera, Tom Lane
Discussion: https://postgr.es/m/CAKJS1f9WTAGG1tPeJnD18hiQW5gAk59fQ6WK-vfdAKEHyRg2RA@mail.gmail.com
2019-02-16 05:22:27 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* pg_rightmost_one_pos32
|
|
|
|
* Returns the position of the least significant set bit in "word",
|
|
|
|
* measured from the least significant bit. word must not be 0.
|
|
|
|
*/
|
|
|
|
static inline int
|
|
|
|
pg_rightmost_one_pos32(uint32 word)
|
|
|
|
{
|
|
|
|
#ifdef HAVE__BUILTIN_CTZ
|
|
|
|
Assert(word != 0);
|
|
|
|
|
|
|
|
return __builtin_ctz(word);
|
|
|
|
#else
|
|
|
|
int result = 0;
|
|
|
|
|
|
|
|
Assert(word != 0);
|
|
|
|
|
|
|
|
while ((word & 255) == 0)
|
|
|
|
{
|
|
|
|
word >>= 8;
|
|
|
|
result += 8;
|
|
|
|
}
|
|
|
|
result += pg_rightmost_one_pos[word & 255];
|
|
|
|
return result;
|
|
|
|
#endif /* HAVE__BUILTIN_CTZ */
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* pg_rightmost_one_pos64
|
|
|
|
* As above, but for a 64-bit word.
|
|
|
|
*/
|
|
|
|
static inline int
|
|
|
|
pg_rightmost_one_pos64(uint64 word)
|
|
|
|
{
|
|
|
|
#ifdef HAVE__BUILTIN_CTZ
|
|
|
|
Assert(word != 0);
|
|
|
|
|
|
|
|
#if defined(HAVE_LONG_INT_64)
|
|
|
|
return __builtin_ctzl(word);
|
|
|
|
#elif defined(HAVE_LONG_LONG_INT_64)
|
|
|
|
return __builtin_ctzll(word);
|
|
|
|
#else
|
|
|
|
#error must have a working 64-bit integer datatype
|
|
|
|
#endif
|
|
|
|
#else /* !HAVE__BUILTIN_CTZ */
|
|
|
|
int result = 0;
|
|
|
|
|
|
|
|
Assert(word != 0);
|
|
|
|
|
|
|
|
while ((word & 255) == 0)
|
|
|
|
{
|
|
|
|
word >>= 8;
|
|
|
|
result += 8;
|
|
|
|
}
|
|
|
|
result += pg_rightmost_one_pos[word & 255];
|
|
|
|
return result;
|
|
|
|
#endif /* HAVE__BUILTIN_CTZ */
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Count the number of one-bits in a uint32 or uint64 */
|
|
|
|
extern int (*pg_popcount32) (uint32 word);
|
|
|
|
extern int (*pg_popcount64) (uint64 word);
|
|
|
|
|
|
|
|
/* Count the number of one-bits in a byte array */
|
|
|
|
extern uint64 pg_popcount(const char *buf, int bytes);
|
|
|
|
|
2019-12-23 23:31:24 +01:00
|
|
|
/*
|
|
|
|
* Rotate the bits of "word" to the right by n bits.
|
|
|
|
*/
|
|
|
|
static inline uint32
|
|
|
|
pg_rotate_right32(uint32 word, int n)
|
|
|
|
{
|
|
|
|
return (word >> n) | (word << (sizeof(word) * BITS_PER_BYTE - n));
|
|
|
|
}
|
|
|
|
|
Make use of compiler builtins and/or assembly for CLZ, CTZ, POPCNT.
Test for the compiler builtins __builtin_clz, __builtin_ctz, and
__builtin_popcount, and make use of these in preference to
handwritten C code if they're available. Create src/port
infrastructure for "leftmost one", "rightmost one", and "popcount"
so as to centralize these decisions.
On x86_64, __builtin_popcount generally won't make use of the POPCNT
opcode because that's not universally supported yet. Provide code
that checks CPUID and then calls POPCNT via asm() if available.
This requires indirecting through a function pointer, which is
an annoying amount of overhead for a one-instruction operation,
but it's probably not worth working harder than this for our
current use-cases.
I'm not sure we've found all the existing places that could profit
from this new infrastructure; but we at least touched all the
ones that used copied-and-pasted versions of the bitmapset.c code,
and got rid of multiple copies of the associated constant arrays.
While at it, replace c-compiler.m4's one-per-builtin-function
macros with a single one that can handle all the cases we need
to worry about so far. Also, because I'm paranoid, make those
checks into AC_LINK checks rather than just AC_COMPILE; the
former coding failed to verify that libgcc has support for the
builtin, in cases where it's not inline code.
David Rowley, Thomas Munro, Alvaro Herrera, Tom Lane
Discussion: https://postgr.es/m/CAKJS1f9WTAGG1tPeJnD18hiQW5gAk59fQ6WK-vfdAKEHyRg2RA@mail.gmail.com
2019-02-16 05:22:27 +01:00
|
|
|
#endif /* PG_BITUTILS_H */
|