From f9651857d7a364e91ca74d07ad77765caf5fb161 Mon Sep 17 00:00:00 2001 From: Orvid King Date: Tue, 28 Jul 2015 14:23:14 -0700 Subject: [PATCH] Normalize SSE support detection Summary: This unifies ways of detecting SSE support into the FOLLY_SSE define which is a numeric value from 0 to 4.2 depending on detected level. This also adds a `#define` for `__extension__` which is used in folly, but is GCC specific. Closes #259 Reviewed By: @yfeldblum Differential Revision: D2283522 Pulled By: @sgolemon --- folly/GroupVarint.h | 6 +++--- folly/Portability.h | 26 ++++++++++++++++++++++++++ folly/RWSpinLock.h | 2 +- 3 files changed, 30 insertions(+), 4 deletions(-) diff --git a/folly/GroupVarint.h b/folly/GroupVarint.h index 153673ca..4e8657a6 100644 --- a/folly/GroupVarint.h +++ b/folly/GroupVarint.h @@ -33,7 +33,7 @@ #include #include -#ifdef __SSSE3__ +#if FOLLY_SSE >= 3 #include namespace folly { namespace detail { @@ -188,7 +188,7 @@ class GroupVarint : public detail::GroupVarintBase { return decode_simple(p, dest, dest+1, dest+2, dest+3); } -#ifdef __SSSE3__ +#if FOLLY_SSE >= 3 /** * Just like the non-SSSE3 decode below, but with the additional constraint * that we must be able to read at least 17 bytes from the input pointer, p. @@ -214,7 +214,7 @@ class GroupVarint : public detail::GroupVarintBase { __m128i r = _mm_shuffle_epi8(val, mask); // Extracting 32 bits at a time out of an XMM register is a SSE4 feature -#ifdef __SSE4__ +#if FOLLY_SSE >= 4 *a = _mm_extract_epi32(r, 0); *b = _mm_extract_epi32(r, 1); *c = _mm_extract_epi32(r, 2); diff --git a/folly/Portability.h b/folly/Portability.h index a9c36793..7f7a8a1f 100644 --- a/folly/Portability.h +++ b/folly/Portability.h @@ -261,6 +261,32 @@ typedef SSIZE_T ssize_t; // compiler specific to compiler specific // nolint # define __PRETTY_FUNCTION__ __FUNCSIG__ + +// Hide a GCC specific thing that breaks MSVC if left alone. +# define __extension__ + +#ifdef _M_IX86_FP +# define FOLLY_SSE _M_IX86_FP +#endif + +#endif + +#ifndef FOLLY_SSE +# if defined(__SSE4_2__) +# define FOLLY_SSE 4.2 +# elif defined(__SSE4_1__) +# define FOLLY_SSE 4.1 +# elif defined(__SSE4__) +# define FOLLY_SSE 4 +# elif defined(__SSE3__) +# define FOLLY_SSE 3 +# elif defined(__SSE2__) +# define FOLLY_SSE 2 +# elif defined(__SSE__) +# define FOLLY_SSE 1 +# else +# define FOLLY_SSE 0 +# endif #endif #if FOLLY_UNUSUAL_GFLAGS_NAMESPACE diff --git a/folly/RWSpinLock.h b/folly/RWSpinLock.h index 8a7a8410..6cb331cf 100644 --- a/folly/RWSpinLock.h +++ b/folly/RWSpinLock.h @@ -130,7 +130,7 @@ pthread_rwlock_t Read 728698 24us 101ns 7.28ms 194us #endif // iOS doesn't define _mm_cvtsi64_si128 and friends -#if defined(__SSE2__) && !TARGET_OS_IPHONE +#if (FOLLY_SSE >= 2) && !TARGET_OS_IPHONE #define RW_SPINLOCK_USE_SSE_INSTRUCTIONS_ #else #undef RW_SPINLOCK_USE_SSE_INSTRUCTIONS_ -- 2.34.1