From 69b46104294b1cccaeafc2c88a67e9c7f9543962 Mon Sep 17 00:00:00 2001
From: Peter Griess <pgriess@fb.com>
Date: Sat, 21 Sep 2013 11:40:58 -0500
Subject: [PATCH] Be more careful when using SSE intrinsics.

Summary:
- Check FOLLY_HAVE_EMMINTRIN_H before #including it in Range.cpp.
- The version of Clang that Mac OS X uses for Xcode is based on GCC 4.2.
It turns out that this does not have a full compliment of SSE2
builtins (e.g. __builtin_ia32_loaddqu is undefined, but
__builtin_ia32_storedqu is). Work around this omission by not
compiling the specialized code on this platform.

@override-unit-failures
Ignore unit test failures as two tests are just sitting in the postponed
state forever. I investigated all the other unit test failures and they
just look like flakey tests.

Test Plan:
- fbconfig -r folly && fbmake runtests
- ./configure && make check on Ubuntu/FC/Mac

Reviewed By: delong.j@fb.com

FB internal diff: D999130
---
 folly/Range.cpp | 12 ++++++++----
 folly/Range.h   |  2 +-
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/folly/Range.cpp b/folly/Range.cpp
index c343a362..a36f350b 100644
--- a/folly/Range.cpp
+++ b/folly/Range.cpp
@@ -19,7 +19,9 @@
 
 #include "folly/Range.h"
 
+#if FOLLY_HAVE_EMMINTRIN_H
 #include <emmintrin.h>  // __v16qi
+#endif
 #include <iostream>
 
 namespace folly {
@@ -66,7 +68,9 @@ static_assert(kMinPageSize >= 16,
   (reinterpret_cast<uintptr_t>(addr) / kMinPageSize)
 
 
-#if FOLLY_HAVE_EMMINTRIN_H
+// Earlier versions of GCC (for example, Clang on Mac OS X, which is based on
+// GCC 4.2) do not have a full compliment of SSE builtins.
+#if FOLLY_HAVE_EMMINTRIN_H && __GNUC_PREREQ(4, 6)
 inline size_t nextAlignedIndex(const char* arr) {
    auto firstPossible = reinterpret_cast<uintptr_t>(arr) + 1;
    return 1 +                       // add 1 because the index starts at 'arr'
@@ -119,7 +123,7 @@ size_t qfind_first_byte_of_needles16(const StringPiece& haystack,
   }
   return StringPiece::npos;
 }
-#endif // FOLLY_HAVE_EMMINTRIN_H
+#endif // FOLLY_HAVE_EMMINTRIN_H && GCC 4.6+
 
 // Aho, Hopcroft, and Ullman refer to this trick in "The Design and Analysis
 // of Computer Algorithms" (1974), but the best description is here:
@@ -165,7 +169,7 @@ size_t qfind_first_byte_of_byteset(const StringPiece& haystack,
   return StringPiece::npos;
 }
 
-#if FOLLY_HAVE_EMMINTRIN_H
+#if FOLLY_HAVE_EMMINTRIN_H && __GNUC_PREREQ(4, 6)
 
 template <bool HAYSTACK_ALIGNED>
 inline size_t scanHaystackBlock(const StringPiece& haystack,
@@ -254,7 +258,7 @@ size_t qfind_first_byte_of_sse42(const StringPiece& haystack,
 
   return StringPiece::npos;
 }
-#endif // FOLLY_HAVE_EMMINTRIN_H
+#endif // FOLLY_HAVE_EMMINTRIN_H && GCC 4.6+
 
 size_t qfind_first_byte_of_nosse(const StringPiece& haystack,
                                  const StringPiece& needles) {
diff --git a/folly/Range.h b/folly/Range.h
index cabd9078..6925c6d4 100644
--- a/folly/Range.h
+++ b/folly/Range.h
@@ -628,7 +628,7 @@ namespace detail {
 size_t qfind_first_byte_of_nosse(const StringPiece& haystack,
                                  const StringPiece& needles);
 
-#if FOLLY_HAVE_EMMINTRIN_H
+#if FOLLY_HAVE_EMMINTRIN_H && __GNUC_PREREQ(4, 6)
 size_t qfind_first_byte_of_sse42(const StringPiece& haystack,
                                  const StringPiece& needles);
 
-- 
2.34.1