Fix code for probing availability of AVX-512.
This commit fixes a few things: * Instead of checking for CPU support of the "xsave" extension, we need to check for OS support of XGETBV instructions via the "osxsave" flag. * We must check that additional XCR0 bits are set to be sure the ZMM registers are fully enabled. * We should use the recommended ordering of steps. Specifically, we need to check that the ZMM registers are enabled prior to checking for AVX-512 via CPUID. In passing, split this code into separate functions to improve readability. Reported-by: Andrew Kane Reviewed-by: Akash Shankaran, Raghuveer Devulapalli Discussion: https://postgr.es/m/20240418024459.GA3385227%40nathanxps13
This commit is contained in:
parent
bb3ca23239
commit
598e0114a3
|
@ -34,39 +34,13 @@
|
||||||
#ifdef TRY_POPCNT_FAST
|
#ifdef TRY_POPCNT_FAST
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Returns true if the CPU supports the instructions required for the AVX-512
|
* Does CPUID say there's support for XSAVE instructions?
|
||||||
* pg_popcount() implementation.
|
|
||||||
*/
|
*/
|
||||||
bool
|
static inline bool
|
||||||
pg_popcount_avx512_available(void)
|
xsave_available(void)
|
||||||
{
|
{
|
||||||
unsigned int exx[4] = {0, 0, 0, 0};
|
unsigned int exx[4] = {0, 0, 0, 0};
|
||||||
|
|
||||||
/* Does CPUID say there's support for AVX-512 popcount instructions? */
|
|
||||||
#if defined(HAVE__GET_CPUID_COUNT)
|
|
||||||
__get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]);
|
|
||||||
#elif defined(HAVE__CPUIDEX)
|
|
||||||
__cpuidex(exx, 7, 0);
|
|
||||||
#else
|
|
||||||
#error cpuid instruction not available
|
|
||||||
#endif
|
|
||||||
if ((exx[2] & (1 << 14)) == 0) /* avx512-vpopcntdq */
|
|
||||||
return false;
|
|
||||||
|
|
||||||
/* Does CPUID say there's support for AVX-512 byte and word instructions? */
|
|
||||||
memset(exx, 0, sizeof(exx));
|
|
||||||
#if defined(HAVE__GET_CPUID_COUNT)
|
|
||||||
__get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]);
|
|
||||||
#elif defined(HAVE__CPUIDEX)
|
|
||||||
__cpuidex(exx, 7, 0);
|
|
||||||
#else
|
|
||||||
#error cpuid instruction not available
|
|
||||||
#endif
|
|
||||||
if ((exx[1] & (1 << 30)) == 0) /* avx512-bw */
|
|
||||||
return false;
|
|
||||||
|
|
||||||
/* Does CPUID say there's support for XSAVE instructions? */
|
|
||||||
memset(exx, 0, sizeof(exx));
|
|
||||||
#if defined(HAVE__GET_CPUID)
|
#if defined(HAVE__GET_CPUID)
|
||||||
__get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
|
__get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
|
||||||
#elif defined(HAVE__CPUID)
|
#elif defined(HAVE__CPUID)
|
||||||
|
@ -74,15 +48,55 @@ pg_popcount_avx512_available(void)
|
||||||
#else
|
#else
|
||||||
#error cpuid instruction not available
|
#error cpuid instruction not available
|
||||||
#endif
|
#endif
|
||||||
if ((exx[2] & (1 << 26)) == 0) /* xsave */
|
return (exx[2] & (1 << 27)) != 0; /* osxsave */
|
||||||
return false;
|
}
|
||||||
|
|
||||||
/* Does XGETBV say the ZMM registers are enabled? */
|
/*
|
||||||
|
* Does XGETBV say the ZMM registers are enabled?
|
||||||
|
*
|
||||||
|
* NB: Caller is responsible for verifying that xsave_available() returns true
|
||||||
|
* before calling this.
|
||||||
|
*/
|
||||||
|
static inline bool
|
||||||
|
zmm_regs_available(void)
|
||||||
|
{
|
||||||
#ifdef HAVE_XSAVE_INTRINSICS
|
#ifdef HAVE_XSAVE_INTRINSICS
|
||||||
return (_xgetbv(0) & 0xe0) != 0;
|
return (_xgetbv(0) & 0xe6) == 0xe6;
|
||||||
#else
|
#else
|
||||||
return false;
|
return false;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Does CPUID say there's support for AVX-512 popcount and byte-and-word
|
||||||
|
* instructions?
|
||||||
|
*/
|
||||||
|
static inline bool
|
||||||
|
avx512_popcnt_available(void)
|
||||||
|
{
|
||||||
|
unsigned int exx[4] = {0, 0, 0, 0};
|
||||||
|
|
||||||
|
#if defined(HAVE__GET_CPUID_COUNT)
|
||||||
|
__get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]);
|
||||||
|
#elif defined(HAVE__CPUIDEX)
|
||||||
|
__cpuidex(exx, 7, 0);
|
||||||
|
#else
|
||||||
|
#error cpuid instruction not available
|
||||||
|
#endif
|
||||||
|
return (exx[2] & (1 << 14)) != 0 && /* avx512-vpopcntdq */
|
||||||
|
(exx[1] & (1 << 30)) != 0; /* avx512-bw */
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Returns true if the CPU supports the instructions required for the AVX-512
|
||||||
|
* pg_popcount() implementation.
|
||||||
|
*/
|
||||||
|
bool
|
||||||
|
pg_popcount_avx512_available(void)
|
||||||
|
{
|
||||||
|
return xsave_available() &&
|
||||||
|
zmm_regs_available() &&
|
||||||
|
avx512_popcnt_available();
|
||||||
|
}
|
||||||
|
|
||||||
#endif /* TRY_POPCNT_FAST */
|
#endif /* TRY_POPCNT_FAST */
|
||||||
|
|
Loading…
Reference in New Issue