diff --git a/src/common/wchar.c b/src/common/wchar.c index 1e6e198bf2..fa8854d9e9 100644 --- a/src/common/wchar.c +++ b/src/common/wchar.c @@ -1919,10 +1919,11 @@ pg_utf8_verifystr(const unsigned char *s, int len) uint32 state = BGN; /* - * Sixteen seems to give the best balance of performance across different - * byte distributions. + * With a stride of two vector widths, gcc will unroll the loop. Even if + * the compiler can unroll a longer loop, it's not worth it because we + * must fall back to the byte-wise algorithm if we find any non-ASCII. */ -#define STRIDE_LENGTH 16 +#define STRIDE_LENGTH (2 * sizeof(Vector8)) if (len >= STRIDE_LENGTH) { diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h index 011b0b3abd..1e8c3af360 100644 --- a/src/include/mb/pg_wchar.h +++ b/src/include/mb/pg_wchar.h @@ -19,6 +19,8 @@ #ifndef PG_WCHAR_H #define PG_WCHAR_H +#include "port/simd.h" + /* * The pg_wchar type */ @@ -704,25 +706,28 @@ extern WCHAR *pgwin32_message_to_UTF16(const char *str, int len, int *utf16len); * Verify a chunk of bytes for valid ASCII. * * Returns false if the input contains any zero bytes or bytes with the - * high-bit set. Input len must be a multiple of 8. + * high-bit set. Input len must be a multiple of the chunk size (8 or 16). */ static inline bool is_valid_ascii(const unsigned char *s, int len) { const unsigned char *const s_end = s + len; - uint64 chunk, - highbit_cum = UINT64CONST(0), - zero_cum = UINT64CONST(0x8080808080808080); + Vector8 chunk; + Vector8 highbit_cum = vector8_broadcast(0); +#ifdef USE_NO_SIMD + Vector8 zero_cum = vector8_broadcast(0x80); +#endif Assert(len % sizeof(chunk) == 0); while (s < s_end) { - memcpy(&chunk, s, sizeof(chunk)); + vector8_load(&chunk, s); + + /* Capture any zero bytes in this chunk. */ +#ifdef USE_NO_SIMD /* - * Capture any zero bytes in this chunk. - * * First, add 0x7f to each byte. This sets the high bit in each byte, * unless it was a zero. If any resulting high bits are zero, the * corresponding high bits in the zero accumulator will be cleared. @@ -733,21 +738,32 @@ is_valid_ascii(const unsigned char *s, int len) * any input bytes did have the high bit set, it doesn't matter * because we check for those separately. */ - zero_cum &= (chunk + UINT64CONST(0x7f7f7f7f7f7f7f7f)); + zero_cum &= (chunk + vector8_broadcast(0x7F)); +#else + + /* + * Set all bits in each lane of the highbit accumulator where input + * bytes are zero. + */ + highbit_cum = vector8_or(highbit_cum, + vector8_eq(chunk, vector8_broadcast(0))); +#endif /* Capture all set bits in this chunk. */ - highbit_cum |= chunk; + highbit_cum = vector8_or(highbit_cum, chunk); s += sizeof(chunk); } /* Check if any high bits in the high bit accumulator got set. */ - if (highbit_cum & UINT64CONST(0x8080808080808080)) + if (vector8_is_highbit_set(highbit_cum)) return false; +#ifdef USE_NO_SIMD /* Check if any high bits in the zero accumulator got cleared. */ - if (zero_cum != UINT64CONST(0x8080808080808080)) + if (zero_cum != vector8_broadcast(0x80)) return false; +#endif return true; } diff --git a/src/include/port/simd.h b/src/include/port/simd.h index 61e4362258..a425cd887b 100644 --- a/src/include/port/simd.h +++ b/src/include/port/simd.h @@ -52,7 +52,18 @@ static inline Vector8 vector8_broadcast(const uint8 c); static inline bool vector8_has(const Vector8 v, const uint8 c); static inline bool vector8_has_zero(const Vector8 v); static inline bool vector8_has_le(const Vector8 v, const uint8 c); +static inline bool vector8_is_highbit_set(const Vector8 v); +/* arithmetic operations */ +static inline Vector8 vector8_or(const Vector8 v1, const Vector8 v2); + +/* Different semantics for SIMD architectures. */ +#ifndef USE_NO_SIMD + +/* comparisons between vectors */ +static inline Vector8 vector8_eq(const Vector8 v1, const Vector8 v2); + +#endif /* ! USE_NO_SIMD */ /* * Load a chunk of memory into the given vector. @@ -193,4 +204,48 @@ vector8_has_le(const Vector8 v, const uint8 c) return result; } +/* + * Return true if the high bit of any element is set + */ +static inline bool +vector8_is_highbit_set(const Vector8 v) +{ +#ifdef USE_SSE2 + return _mm_movemask_epi8(v) != 0; +#else + return v & vector8_broadcast(0x80); +#endif +} + +/* + * Return the bitwise OR of the inputs + */ +static inline Vector8 +vector8_or(const Vector8 v1, const Vector8 v2) +{ +#ifdef USE_SSE2 + return _mm_or_si128(v1, v2); +#else + return v1 | v2; +#endif +} + + +/* Different semantics for SIMD architectures. */ +#ifndef USE_NO_SIMD + +/* + * Return a vector with all bits set in each lane where the the corresponding + * lanes in the inputs are equal. + */ +static inline Vector8 +vector8_eq(const Vector8 v1, const Vector8 v2) +{ +#ifdef USE_SSE2 + return _mm_cmpeq_epi8(v1, v2); +#endif +} + +#endif /* ! USE_NO_SIMD */ + #endif /* SIMD_H */