From: Christopher Dykes Date: Thu, 10 Nov 2016 20:46:19 +0000 (-0800) Subject: Always use inline-asm version on GCC/Clang X-Git-Tag: v2016.11.14.00~11 X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=7213df65aa8d4d6fca18b1a6cfe76a77eed6a797;p=folly.git Always use inline-asm version on GCC/Clang Summary: Because the intrinsic version requires explicitly adding `__target__` attributes, which results in things not being inlined. Although the code generated with the `__target__` attribute is strictly better, ensuring it's applied on all the relevant functions is error-prone, so just use the inline assembly version for GCC/Clang so that it can be inlined elsewhere. MSVC will inline the intrinsic version without any issue. This also marks the functions as `ALWAYS_INLINE`, as the diff that is getting reverted made that change as well. Reviewed By: yfeldblum, philippv, ot Differential Revision: D3963935 fbshipit-source-id: 47175d64e7be351eb455a4d053b91ce9392bf152 --- diff --git a/folly/experimental/Instructions.h b/folly/experimental/Instructions.h index ffc4fa1b..f8368942 100644 --- a/folly/experimental/Instructions.h +++ b/folly/experimental/Instructions.h @@ -18,17 +18,14 @@ #include +#ifdef _MSC_VER +#include +#endif + #include #include #include -#if defined(__GNUC__) || defined(__clang__) -// For compilers supporting AT&T assembly syntax. -#define FOLLY_INSTRUCTIONS_SUPPORTED 1 -#else -#define FOLLY_INSTRUCTIONS_SUPPORTED 0 -#endif - namespace folly { namespace compression { namespace instructions { // NOTE: It's recommended to compile EF coding with -msse4.2, starting @@ -59,8 +56,6 @@ struct Default { } }; -#if FOLLY_INSTRUCTIONS_SUPPORTED - struct Nehalem : public Default { static bool supported(const folly::CpuId& cpuId = {}) { return cpuId.popcnt(); @@ -68,9 +63,14 @@ struct Nehalem : public Default { static FOLLY_ALWAYS_INLINE uint64_t popcount(uint64_t value) { // POPCNT is supported starting with Intel Nehalem, AMD K10. +#if defined(__GNUC__) || defined(__clang__) + // GCC and Clang won't inline the intrinsics. uint64_t result; asm ("popcntq %1, %0" : "=r" (result) : "r" (value)); return result; +#else + return _mm_popcnt_u64(value); +#endif } }; @@ -82,17 +82,15 @@ struct Haswell : public Nehalem { static FOLLY_ALWAYS_INLINE uint64_t blsr(uint64_t value) { // BMI1 is supported starting with Intel Haswell, AMD Piledriver. // BLSR combines two instuctions into one and reduces register pressure. +#if defined(__GNUC__) || defined(__clang__) + // GCC and Clang won't inline the intrinsics. uint64_t result; asm ("blsrq %1, %0" : "=r" (result) : "r" (value)); return result; +#else + return _blsr_u64(value); +#endif } }; -#else // FOLLY_INSTRUCTIONS_SUPPORTED - -struct Nehalem : public Default {}; -struct Haswell : public Nehalem {}; - -#endif // FOLLY_INSTRUCTIONS_SUPPORTED - }}} // namespaces diff --git a/folly/experimental/Select64.h b/folly/experimental/Select64.h index e79b489c..1a2ed35f 100644 --- a/folly/experimental/Select64.h +++ b/folly/experimental/Select64.h @@ -63,11 +63,11 @@ inline uint64_t select64(uint64_t x, uint64_t k) { return place + detail::kSelectInByte[((x >> place) & 0xFF) | (byteRank << 8)]; } -#if FOLLY_INSTRUCTIONS_SUPPORTED - template <> FOLLY_ALWAYS_INLINE uint64_t select64(uint64_t x, uint64_t k) { +#if defined(__GNUC__) || defined(__clang__) + // GCC and Clang won't inline the intrinsics. uint64_t result = uint64_t(1) << k; asm("pdep %1, %0, %0\n\t" @@ -76,8 +76,9 @@ select64(uint64_t x, uint64_t k) { : "r"(x)); return result; +#else + return _tzcnt_u64(_pdep_u64(1ULL << k, x)); +#endif } -#endif // FOLLY_INSTRUCTIONS_SUPPORTED - } // namespace folly