diff --git a/src/include/port/pg_lfind.h b/src/include/port/pg_lfind.h index d575e733d3..0625cac6b5 100644 --- a/src/include/port/pg_lfind.h +++ b/src/include/port/pg_lfind.h @@ -151,7 +151,7 @@ pg_lfind32(uint32 key, uint32 *base, uint32 nelem) result = vector32_or(tmp1, tmp2); /* see if there was a match */ - if (vector8_is_highbit_set((Vector8) result)) + if (vector32_is_highbit_set(result)) { Assert(assert_result == true); return true; diff --git a/src/include/port/simd.h b/src/include/port/simd.h index b538ac070f..0ff1549083 100644 --- a/src/include/port/simd.h +++ b/src/include/port/simd.h @@ -274,6 +274,28 @@ vector8_is_highbit_set(const Vector8 v) #endif } +/* + * Exactly like vector32_is_highbit_set except for the input type, so it + * looks at each byte separately. + * + * XXX x86 uses the same underlying type for 8-bit, 16-bit, and 32-bit + * integer elements, but Arm does not, hence the need for a separate + * function. We could instead adopt the behavior of Arm's vmaxvq_u32(), i.e. + * check each 32-bit element, but that would require an additional mask + * operation on x86. + */ +#ifndef USE_NO_SIMD +static inline bool +vector32_is_highbit_set(const Vector32 v) +{ +#if defined(USE_NEON) + return vector8_is_highbit_set((Vector8) v); +#else + return vector8_is_highbit_set(v); +#endif +} +#endif /* ! USE_NO_SIMD */ + /* * Return the bitwise OR of the inputs */