From 598954436ffd4da2d6cf8c6737b4c675afcb8447 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Thu, 11 Apr 2019 20:15:44 -0400 Subject: [PATCH 1/4] common/swap: Remove 32-bit ARM path We don't plan to support host 32-bit ARM execution environments, so this is essentially dead code. --- src/common/swap.h | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/src/common/swap.h b/src/common/swap.h index b3eab1324d..cb723cb8a1 100644 --- a/src/common/swap.h +++ b/src/common/swap.h @@ -71,19 +71,6 @@ inline u32 swap32(u32 _data) { inline u64 swap64(u64 _data) { return _byteswap_uint64(_data); } -#elif defined(ARCHITECTURE_ARM) && (__ARM_ARCH >= 6) -inline u16 swap16(u16 _data) { - u32 data = _data; - __asm__("rev16 %0, %1\n" : "=l"(data) : "l"(data)); - return (u16)data; -} -inline u32 swap32(u32 _data) { - __asm__("rev %0, %1\n" : "=l"(_data) : "l"(_data)); - return _data; -} -inline u64 swap64(u64 _data) { - return ((u64)swap32(_data) << 32) | swap32(_data >> 32); -} #elif __linux__ inline u16 swap16(u16 _data) { return bswap_16(_data); From 9cb4b7be40dd93d4982d78a5f420326fd9c628a8 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Thu, 11 Apr 2019 20:26:37 -0400 Subject: [PATCH 2/4] common/swap: Simplify swap function ifdefs Including every OS' own built-in byte swapping functions is kind of undesirable, since it adds yet another build path to ensure compilation succeeds on. Given we only support clang, GCC, and MSVC for the time being, we can utilize their built-in functions directly instead of going through the OS's API functions. This shrinks the overall code down to just if (msvc) use msvc's functions else if (clang or gcc) use clang/gcc's builtins else use the slow path --- src/common/swap.h | 63 +++++++++++------------------------------------ 1 file changed, 15 insertions(+), 48 deletions(-) diff --git a/src/common/swap.h b/src/common/swap.h index cb723cb8a1..fad0f35a76 100644 --- a/src/common/swap.h +++ b/src/common/swap.h @@ -21,11 +21,6 @@ #if defined(_MSC_VER) #include -#elif defined(__linux__) -#include -#elif defined(__Bitrig__) || defined(__DragonFly__) || defined(__FreeBSD__) || \ - defined(__NetBSD__) || defined(__OpenBSD__) -#include #endif #include #include "common/common_types.h" @@ -62,58 +57,30 @@ namespace Common { #ifdef _MSC_VER -inline u16 swap16(u16 _data) { - return _byteswap_ushort(_data); +inline u16 swap16(u16 data) { + return _byteswap_ushort(data); } -inline u32 swap32(u32 _data) { - return _byteswap_ulong(_data); +inline u32 swap32(u32 data) { + return _byteswap_ulong(data); } -inline u64 swap64(u64 _data) { - return _byteswap_uint64(_data); +inline u64 swap64(u64 data) { + return _byteswap_uint64(data); } -#elif __linux__ -inline u16 swap16(u16 _data) { - return bswap_16(_data); -} -inline u32 swap32(u32 _data) { - return bswap_32(_data); -} -inline u64 swap64(u64 _data) { - return bswap_64(_data); -} -#elif __APPLE__ -inline __attribute__((always_inline)) u16 swap16(u16 _data) { - return (_data >> 8) | (_data << 8); -} -inline __attribute__((always_inline)) u32 swap32(u32 _data) { - return __builtin_bswap32(_data); -} -inline __attribute__((always_inline)) u64 swap64(u64 _data) { - return __builtin_bswap64(_data); -} -#elif defined(__Bitrig__) || defined(__OpenBSD__) +#elif defined(__clang__) || defined(__GNUC__) +#if defined(__Bitrig__) || defined(__OpenBSD__) // redefine swap16, swap32, swap64 as inline functions #undef swap16 #undef swap32 #undef swap64 -inline u16 swap16(u16 _data) { - return __swap16(_data); +#endif +inline u16 swap16(u16 data) { + return __builtin_bswap16(data); } -inline u32 swap32(u32 _data) { - return __swap32(_data); +inline u32 swap32(u32 data) { + return __builtin_bswap32(data); } -inline u64 swap64(u64 _data) { - return __swap64(_data); -} -#elif defined(__DragonFly__) || defined(__FreeBSD__) || defined(__NetBSD__) -inline u16 swap16(u16 _data) { - return bswap16(_data); -} -inline u32 swap32(u32 _data) { - return bswap32(_data); -} -inline u64 swap64(u64 _data) { - return bswap64(_data); +inline u64 swap64(u64 data) { + return __builtin_bswap64(data); } #else // Slow generic implementation. From 66b73fd39957f9d40dc2d2b05468bbd4d2a1347e Mon Sep 17 00:00:00 2001 From: Lioncash Date: Thu, 11 Apr 2019 20:42:40 -0400 Subject: [PATCH 3/4] common/swap: Mark byte swapping free functions with [[nodiscard]] and noexcept Allows the compiler to inform when the result of a swap function is being ignored (which is 100% a bug in all usage scenarios). We also mark them noexcept to allow other functions using them to be able to be marked as noexcept and play nicely with things that potentially inspect "nothrowability". --- src/common/swap.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/common/swap.h b/src/common/swap.h index fad0f35a76..086449028d 100644 --- a/src/common/swap.h +++ b/src/common/swap.h @@ -57,13 +57,13 @@ namespace Common { #ifdef _MSC_VER -inline u16 swap16(u16 data) { +[[nodiscard]] inline u16 swap16(u16 data) noexcept { return _byteswap_ushort(data); } -inline u32 swap32(u32 data) { +[[nodiscard]] inline u32 swap32(u32 data) noexcept { return _byteswap_ulong(data); } -inline u64 swap64(u64 data) { +[[nodiscard]] inline u64 swap64(u64 data) noexcept { return _byteswap_uint64(data); } #elif defined(__clang__) || defined(__GNUC__) @@ -73,29 +73,29 @@ inline u64 swap64(u64 data) { #undef swap32 #undef swap64 #endif -inline u16 swap16(u16 data) { +[[nodiscard]] inline u16 swap16(u16 data) noexcept { return __builtin_bswap16(data); } -inline u32 swap32(u32 data) { +[[nodiscard]] inline u32 swap32(u32 data) noexcept { return __builtin_bswap32(data); } -inline u64 swap64(u64 data) { +[[nodiscard]] inline u64 swap64(u64 data) noexcept { return __builtin_bswap64(data); } #else // Slow generic implementation. -inline u16 swap16(u16 data) { +[[nodiscard]] inline u16 swap16(u16 data) noexcept { return (data >> 8) | (data << 8); } -inline u32 swap32(u32 data) { +[[nodiscard]] inline u32 swap32(u32 data) noexcept { return (swap16(data) << 16) | swap16(data >> 16); } -inline u64 swap64(u64 data) { +[[nodiscard]] inline u64 swap64(u64 data) noexcept { return ((u64)swap32(data) << 32) | swap32(data >> 32); } #endif -inline float swapf(float f) { +[[nodiscard]] inline float swapf(float f) noexcept { static_assert(sizeof(u32) == sizeof(float), "float must be the same size as uint32_t."); u32 value; @@ -107,7 +107,7 @@ inline float swapf(float f) { return f; } -inline double swapd(double f) { +[[nodiscard]] inline double swapd(double f) noexcept { static_assert(sizeof(u64) == sizeof(double), "double must be the same size as uint64_t."); u64 value; From 0d8ef2d3b9cea849d83b4d96529576f28bae5c7c Mon Sep 17 00:00:00 2001 From: Lioncash Date: Thu, 11 Apr 2019 21:20:22 -0400 Subject: [PATCH 4/4] common/swap: Improve codegen of the default swap fallbacks Uses arithmetic that can be identified more trivially by compilers for optimizations. e.g. Rather than shifting the halves of the value and then swapping and combining them, we can swap them in place. e.g. for the original swap32 code on x86-64, clang 8.0 would generate: mov ecx, edi rol cx, 8 shl ecx, 16 shr edi, 16 rol di, 8 movzx eax, di or eax, ecx ret while GCC 8.3 would generate the ideal: mov eax, edi bswap eax ret now both generate the same optimal output. MSVC used to generate the following with the old code: mov eax, ecx rol cx, 8 shr eax, 16 rol ax, 8 movzx ecx, cx movzx eax, ax shl ecx, 16 or eax, ecx ret 0 Now MSVC also generates a similar, but equally optimal result as clang/GCC: bswap ecx mov eax, ecx ret 0 ==== In the swap64 case, for the original code, clang 8.0 would generate: mov eax, edi bswap eax shl rax, 32 shr rdi, 32 bswap edi or rax, rdi ret (almost there, but still missing the mark) while, again, GCC 8.3 would generate the more ideal: mov rax, rdi bswap rax ret now clang also generates the optimal sequence for this fallback as well. This is a case where MSVC unfortunately falls short, despite the new code, this one still generates a doozy of an output. mov r8, rcx mov r9, rcx mov rax, 71776119061217280 mov rdx, r8 and r9, rax and edx, 65280 mov rax, rcx shr rax, 16 or r9, rax mov rax, rcx shr r9, 16 mov rcx, 280375465082880 and rax, rcx mov rcx, 1095216660480 or r9, rax mov rax, r8 and rax, rcx shr r9, 16 or r9, rax mov rcx, r8 mov rax, r8 shr r9, 8 shl rax, 16 and ecx, 16711680 or rdx, rax mov eax, -16777216 and rax, r8 shl rdx, 16 or rdx, rcx shl rdx, 16 or rax, rdx shl rax, 8 or rax, r9 ret 0 which is pretty unfortunate. --- src/common/swap.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/common/swap.h b/src/common/swap.h index 086449028d..71932c2bbd 100644 --- a/src/common/swap.h +++ b/src/common/swap.h @@ -83,15 +83,19 @@ namespace Common { return __builtin_bswap64(data); } #else -// Slow generic implementation. +// Generic implementation. [[nodiscard]] inline u16 swap16(u16 data) noexcept { return (data >> 8) | (data << 8); } [[nodiscard]] inline u32 swap32(u32 data) noexcept { - return (swap16(data) << 16) | swap16(data >> 16); + return ((data & 0xFF000000U) >> 24) | ((data & 0x00FF0000U) >> 8) | + ((data & 0x0000FF00U) << 8) | ((data & 0x000000FFU) << 24); } [[nodiscard]] inline u64 swap64(u64 data) noexcept { - return ((u64)swap32(data) << 32) | swap32(data >> 32); + return ((data & 0xFF00000000000000ULL) >> 56) | ((data & 0x00FF000000000000ULL) >> 40) | + ((data & 0x0000FF0000000000ULL) >> 24) | ((data & 0x000000FF00000000ULL) >> 8) | + ((data & 0x00000000FF000000ULL) << 8) | ((data & 0x0000000000FF0000ULL) << 24) | + ((data & 0x000000000000FF00ULL) << 40) | ((data & 0x00000000000000FFULL) << 56); } #endif