From: Elizabeth Smith <elizabeths@fb.com>
Date: Thu, 10 Jul 2014 22:15:26 +0000 (-0700)
Subject: MSVC intrinsics for bits and cpuid
X-Git-Tag: v0.22.0~460
X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=5ed20b59910fcb9e7f7f866c5279b0075f5887eb;p=folly.git

MSVC intrinsics for bits and cpuid

Summary: Use msvc intrinsics for cpuid, popcount, byteswap, and bit scan functionality

Test Plan: fbconfig -r folly && fbmake runtests

Reviewed By: delong.j@fb.com

FB internal diff: D1413254
---

diff --git a/folly/Bits.cpp b/folly/Bits.cpp
index 2c6694f2..a2715a0b 100644
--- a/folly/Bits.cpp
+++ b/folly/Bits.cpp
@@ -20,9 +20,8 @@
 #include <folly/Portability.h>
 
 // None of this is necessary if we're compiling for a target that supports
-// popcnt
-#ifndef __POPCNT__
-
+// popcnt, which includes MSVC
+#if !defined(__POPCNT__) && !defined(_MSC_VER)
 namespace {
 
 int popcount_builtin(unsigned int x) {
diff --git a/folly/Bits.h b/folly/Bits.h
index ade126d0..c915111e 100644
--- a/folly/Bits.h
+++ b/folly/Bits.h
@@ -57,14 +57,10 @@
 
 #include <folly/Portability.h>
 
-#ifndef __GNUC__
-#error GCC required
-#endif
-
-#ifndef __clang__
+#if !defined(__clang__) && !defined(_MSC_VER)
 #define FOLLY_INTRINSIC_CONSTEXPR constexpr
 #else
-// Unlike GCC, in Clang (as of 3.2) intrinsics aren't constexpr.
+// GCC is the only compiler with intrinsics constexpr.
 #define FOLLY_INTRINSIC_CONSTEXPR const
 #endif
 
@@ -78,6 +74,14 @@
 # include <byteswap.h>
 #endif
 
+#ifdef _MSC_VER
+# include <intrin.h>
+# pragma intrinsic(_BitScanForward)
+# pragma intrinsic(_BitScanForward64)
+# pragma intrinsic(_BitScanReverse)
+# pragma intrinsic(_BitScanReverse64)
+#endif
+
 #include <cassert>
 #include <cinttypes>
 #include <iterator>
@@ -98,7 +102,12 @@ typename std::enable_if<
    sizeof(T) <= sizeof(unsigned int)),
   unsigned int>::type
   findFirstSet(T x) {
+#ifdef _MSC_VER
+  unsigned long index;
+  return _BitScanForward(&index, x) ? index : 0;
+#else
   return __builtin_ffs(x);
+#endif
 }
 
 template <class T>
@@ -110,7 +119,12 @@ typename std::enable_if<
    sizeof(T) <= sizeof(unsigned long)),
   unsigned int>::type
   findFirstSet(T x) {
+#ifdef _MSC_VER
+  unsigned long index;
+  return _BitScanForward(&index, x) ? index : 0;
+#else
   return __builtin_ffsl(x);
+#endif
 }
 
 template <class T>
@@ -122,7 +136,12 @@ typename std::enable_if<
    sizeof(T) <= sizeof(unsigned long long)),
   unsigned int>::type
   findFirstSet(T x) {
+#ifdef _MSC_VER
+  unsigned long index;
+  return _BitScanForward64(&index, x) ? index : 0;
+#else
   return __builtin_ffsll(x);
+#endif
 }
 
 template <class T>
@@ -147,7 +166,18 @@ typename std::enable_if<
    sizeof(T) <= sizeof(unsigned int)),
   unsigned int>::type
   findLastSet(T x) {
+#ifdef _MSC_VER
+  unsigned long index;
+  int clz;
+  if (_BitScanReverse(&index, x)) {
+    clz = static_cast<int>(31 - index);
+  } else {
+    clz = 32;
+  }
+  return x ? 8 * sizeof(unsigned int) - clz : 0;
+#else
   return x ? 8 * sizeof(unsigned int) - __builtin_clz(x) : 0;
+#endif
 }
 
 template <class T>
@@ -159,7 +189,18 @@ typename std::enable_if<
    sizeof(T) <= sizeof(unsigned long)),
   unsigned int>::type
   findLastSet(T x) {
+#ifdef _MSC_VER
+  unsigned long index;
+  int clz;
+  if (_BitScanReverse(&index, x)) {
+    clz = static_cast<int>(31 - index);
+  } else {
+    clz = 32;
+  }
+  return x ? 8 * sizeof(unsigned int) - clz : 0;
+#else
   return x ? 8 * sizeof(unsigned long) - __builtin_clzl(x) : 0;
+#endif
 }
 
 template <class T>
@@ -171,7 +212,18 @@ typename std::enable_if<
    sizeof(T) <= sizeof(unsigned long long)),
   unsigned int>::type
   findLastSet(T x) {
+#ifdef _MSC_VER
+  unsigned long index;
+  unsigned long long clz;
+  if (_BitScanReverse(&index, x)) {
+    clz = static_cast<unsigned long long>(63 - index);
+  } else {
+    clz = 64;
+  }
+  return x ? 8 * sizeof(unsigned long long) - clz : 0;
+#else
   return x ? 8 * sizeof(unsigned long long) - __builtin_clzll(x) : 0;
+#endif
 }
 
 template <class T>
@@ -237,6 +289,8 @@ struct EndianIntBase {
   static T swap(T x);
 };
 
+#ifndef _MSC_VER
+
 /**
  * If we have the bswap_16 macro from byteswap.h, use it; otherwise, provide our
  * own definition.
@@ -254,6 +308,8 @@ our_bswap16(Int16 x) {
 }
 #endif
 
+#endif
+
 #define FB_GEN(t, fn) \
 template<> inline t EndianIntBase<t>::swap(t x) { return fn(x); }
 
@@ -262,12 +318,21 @@ template<> inline t EndianIntBase<t>::swap(t x) { return fn(x); }
 // __builtin_bswap16 for some reason, so we have to provide our own.
 FB_GEN( int8_t,)
 FB_GEN(uint8_t,)
+#ifdef _MSC_VER
+FB_GEN( int64_t, _byteswap_uint64)
+FB_GEN(uint64_t, _byteswap_uint64)
+FB_GEN( int32_t, _byteswap_ulong)
+FB_GEN(uint32_t, _byteswap_ulong)
+FB_GEN( int16_t, _byteswap_ushort)
+FB_GEN(uint16_t, _byteswap_ushort)
+#else
 FB_GEN( int64_t, __builtin_bswap64)
 FB_GEN(uint64_t, __builtin_bswap64)
 FB_GEN( int32_t, __builtin_bswap32)
 FB_GEN(uint32_t, __builtin_bswap32)
 FB_GEN( int16_t, our_bswap16)
 FB_GEN(uint16_t, our_bswap16)
+#endif
 
 #undef FB_GEN
 
diff --git a/folly/CpuId.h b/folly/CpuId.h
index 966bfc1f..4a70e146 100644
--- a/folly/CpuId.h
+++ b/folly/CpuId.h
@@ -30,7 +30,14 @@ namespace folly {
 class CpuId {
  public:
   CpuId() {
-#if FOLLY_X64 || defined(__i386__)
+#ifdef _MSC_VER
+    int reg[4];
+
+    __cpuid((int *)reg, 1);
+    c_ = reg[2];
+    d_ = reg[3];
+
+#elif FOLLY_X64 || defined(__i386__)
     __asm__("cpuid" : "=c"(c_), "=d"(d_) : "a"(1) : "ebx");
 #else
     // On non-Intel, none of these features exist; at least not in the same form
diff --git a/folly/detail/BitsDetail.h b/folly/detail/BitsDetail.h
index df3acd77..96887081 100644
--- a/folly/detail/BitsDetail.h
+++ b/folly/detail/BitsDetail.h
@@ -24,7 +24,14 @@ namespace detail {
 // __builtin_popcount directly, as it's presumably inlined.
 // If not, use runtime detection using __attribute__((ifunc))
 // (see Bits.cpp)
-#ifdef __POPCNT__
+#ifdef _MSC_VER
+inline int popcount(unsigned int x) {
+  return __popcnt(x);
+}
+inline int popcountll(unsigned long long x) {
+  return __popcnt64(x);
+}
+#elif defined(__POPCNT__)
 
 inline int popcount(unsigned int x) {
   return __builtin_popcount(x);