From: Elizabeth Smith Date: Thu, 10 Jul 2014 22:15:26 +0000 (-0700) Subject: MSVC intrinsics for bits and cpuid X-Git-Tag: v0.22.0~460 X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=5ed20b59910fcb9e7f7f866c5279b0075f5887eb;p=folly.git MSVC intrinsics for bits and cpuid Summary: Use msvc intrinsics for cpuid, popcount, byteswap, and bit scan functionality Test Plan: fbconfig -r folly && fbmake runtests Reviewed By: delong.j@fb.com FB internal diff: D1413254 --- diff --git a/folly/Bits.cpp b/folly/Bits.cpp index 2c6694f2..a2715a0b 100644 --- a/folly/Bits.cpp +++ b/folly/Bits.cpp @@ -20,9 +20,8 @@ #include // None of this is necessary if we're compiling for a target that supports -// popcnt -#ifndef __POPCNT__ - +// popcnt, which includes MSVC +#if !defined(__POPCNT__) && !defined(_MSC_VER) namespace { int popcount_builtin(unsigned int x) { diff --git a/folly/Bits.h b/folly/Bits.h index ade126d0..c915111e 100644 --- a/folly/Bits.h +++ b/folly/Bits.h @@ -57,14 +57,10 @@ #include -#ifndef __GNUC__ -#error GCC required -#endif - -#ifndef __clang__ +#if !defined(__clang__) && !defined(_MSC_VER) #define FOLLY_INTRINSIC_CONSTEXPR constexpr #else -// Unlike GCC, in Clang (as of 3.2) intrinsics aren't constexpr. +// GCC is the only compiler with intrinsics constexpr. #define FOLLY_INTRINSIC_CONSTEXPR const #endif @@ -78,6 +74,14 @@ # include #endif +#ifdef _MSC_VER +# include +# pragma intrinsic(_BitScanForward) +# pragma intrinsic(_BitScanForward64) +# pragma intrinsic(_BitScanReverse) +# pragma intrinsic(_BitScanReverse64) +#endif + #include #include #include @@ -98,7 +102,12 @@ typename std::enable_if< sizeof(T) <= sizeof(unsigned int)), unsigned int>::type findFirstSet(T x) { +#ifdef _MSC_VER + unsigned long index; + return _BitScanForward(&index, x) ? index : 0; +#else return __builtin_ffs(x); +#endif } template @@ -110,7 +119,12 @@ typename std::enable_if< sizeof(T) <= sizeof(unsigned long)), unsigned int>::type findFirstSet(T x) { +#ifdef _MSC_VER + unsigned long index; + return _BitScanForward(&index, x) ? index : 0; +#else return __builtin_ffsl(x); +#endif } template @@ -122,7 +136,12 @@ typename std::enable_if< sizeof(T) <= sizeof(unsigned long long)), unsigned int>::type findFirstSet(T x) { +#ifdef _MSC_VER + unsigned long index; + return _BitScanForward64(&index, x) ? index : 0; +#else return __builtin_ffsll(x); +#endif } template @@ -147,7 +166,18 @@ typename std::enable_if< sizeof(T) <= sizeof(unsigned int)), unsigned int>::type findLastSet(T x) { +#ifdef _MSC_VER + unsigned long index; + int clz; + if (_BitScanReverse(&index, x)) { + clz = static_cast(31 - index); + } else { + clz = 32; + } + return x ? 8 * sizeof(unsigned int) - clz : 0; +#else return x ? 8 * sizeof(unsigned int) - __builtin_clz(x) : 0; +#endif } template @@ -159,7 +189,18 @@ typename std::enable_if< sizeof(T) <= sizeof(unsigned long)), unsigned int>::type findLastSet(T x) { +#ifdef _MSC_VER + unsigned long index; + int clz; + if (_BitScanReverse(&index, x)) { + clz = static_cast(31 - index); + } else { + clz = 32; + } + return x ? 8 * sizeof(unsigned int) - clz : 0; +#else return x ? 8 * sizeof(unsigned long) - __builtin_clzl(x) : 0; +#endif } template @@ -171,7 +212,18 @@ typename std::enable_if< sizeof(T) <= sizeof(unsigned long long)), unsigned int>::type findLastSet(T x) { +#ifdef _MSC_VER + unsigned long index; + unsigned long long clz; + if (_BitScanReverse(&index, x)) { + clz = static_cast(63 - index); + } else { + clz = 64; + } + return x ? 8 * sizeof(unsigned long long) - clz : 0; +#else return x ? 8 * sizeof(unsigned long long) - __builtin_clzll(x) : 0; +#endif } template @@ -237,6 +289,8 @@ struct EndianIntBase { static T swap(T x); }; +#ifndef _MSC_VER + /** * If we have the bswap_16 macro from byteswap.h, use it; otherwise, provide our * own definition. @@ -254,6 +308,8 @@ our_bswap16(Int16 x) { } #endif +#endif + #define FB_GEN(t, fn) \ template<> inline t EndianIntBase::swap(t x) { return fn(x); } @@ -262,12 +318,21 @@ template<> inline t EndianIntBase::swap(t x) { return fn(x); } // __builtin_bswap16 for some reason, so we have to provide our own. FB_GEN( int8_t,) FB_GEN(uint8_t,) +#ifdef _MSC_VER +FB_GEN( int64_t, _byteswap_uint64) +FB_GEN(uint64_t, _byteswap_uint64) +FB_GEN( int32_t, _byteswap_ulong) +FB_GEN(uint32_t, _byteswap_ulong) +FB_GEN( int16_t, _byteswap_ushort) +FB_GEN(uint16_t, _byteswap_ushort) +#else FB_GEN( int64_t, __builtin_bswap64) FB_GEN(uint64_t, __builtin_bswap64) FB_GEN( int32_t, __builtin_bswap32) FB_GEN(uint32_t, __builtin_bswap32) FB_GEN( int16_t, our_bswap16) FB_GEN(uint16_t, our_bswap16) +#endif #undef FB_GEN diff --git a/folly/CpuId.h b/folly/CpuId.h index 966bfc1f..4a70e146 100644 --- a/folly/CpuId.h +++ b/folly/CpuId.h @@ -30,7 +30,14 @@ namespace folly { class CpuId { public: CpuId() { -#if FOLLY_X64 || defined(__i386__) +#ifdef _MSC_VER + int reg[4]; + + __cpuid((int *)reg, 1); + c_ = reg[2]; + d_ = reg[3]; + +#elif FOLLY_X64 || defined(__i386__) __asm__("cpuid" : "=c"(c_), "=d"(d_) : "a"(1) : "ebx"); #else // On non-Intel, none of these features exist; at least not in the same form diff --git a/folly/detail/BitsDetail.h b/folly/detail/BitsDetail.h index df3acd77..96887081 100644 --- a/folly/detail/BitsDetail.h +++ b/folly/detail/BitsDetail.h @@ -24,7 +24,14 @@ namespace detail { // __builtin_popcount directly, as it's presumably inlined. // If not, use runtime detection using __attribute__((ifunc)) // (see Bits.cpp) -#ifdef __POPCNT__ +#ifdef _MSC_VER +inline int popcount(unsigned int x) { + return __popcnt(x); +} +inline int popcountll(unsigned long long x) { + return __popcnt64(x); +} +#elif defined(__POPCNT__) inline int popcount(unsigned int x) { return __builtin_popcount(x);