Random.h \
Random-inl.h \
Range.h \
+ RangeCommon.h \
+ RangeSse42.h \
ReadMostlySharedPtr.h \
RWSpinLock.h \
ScopeGuard.h \
FormatTables.cpp \
Malloc.cpp \
Range.cpp \
+ RangeCommon.cpp \
+ RangeSse42.cpp \
StringBase.cpp \
String.cpp \
Unicode.cpp
+++ /dev/null
-/*
- * Copyright 2015 Facebook, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// @author Mark Rabkin (mrabkin@fb.com)
-// @author Andrei Alexandrescu (andrei.alexandrescu@fb.com)
-
-#include <folly/Range.h>
-#include <folly/SparseByteSet.h>
-
-#if FOLLY_HAVE_EMMINTRIN_H
-#include <emmintrin.h> // __v16qi
-#endif
-#include <bitset>
-#include <iostream>
-
-namespace folly {
-
-namespace {
-
-// It's okay if pages are bigger than this (as powers of two), but they should
-// not be smaller.
-constexpr size_t kMinPageSize = 4096;
-static_assert(kMinPageSize >= 16,
- "kMinPageSize must be at least SSE register size");
-#define PAGE_FOR(addr) \
- (reinterpret_cast<uintptr_t>(addr) / kMinPageSize)
-
-
-// Earlier versions of GCC (for example, Clang on Mac OS X, which is based on
-// GCC 4.2) do not have a full compliment of SSE builtins.
-#if FOLLY_HAVE_EMMINTRIN_H && __GNUC_PREREQ(4, 6)
-inline size_t nextAlignedIndex(const char* arr) {
- auto firstPossible = reinterpret_cast<uintptr_t>(arr) + 1;
- return 1 + // add 1 because the index starts at 'arr'
- ((firstPossible + 15) & ~0xF) // round up to next multiple of 16
- - firstPossible;
-}
-
-// build sse4.2-optimized version even if -msse4.2 is not passed to GCC
-size_t qfind_first_byte_of_needles16(const StringPiece haystack,
- const StringPiece needles)
- __attribute__ ((__target__("sse4.2"), noinline))
- FOLLY_DISABLE_ADDRESS_SANITIZER;
-
-// helper method for case where needles.size() <= 16
-size_t qfind_first_byte_of_needles16(const StringPiece haystack,
- const StringPiece needles) {
- DCHECK(!haystack.empty());
- DCHECK(!needles.empty());
- DCHECK_LE(needles.size(), 16);
- if ((needles.size() <= 2 && haystack.size() >= 256) ||
- // must bail if we can't even SSE-load a single segment of haystack
- (haystack.size() < 16 &&
- PAGE_FOR(haystack.end() - 1) != PAGE_FOR(haystack.data() + 15)) ||
- // can't load needles into SSE register if it could cross page boundary
- PAGE_FOR(needles.end() - 1) != PAGE_FOR(needles.data() + 15)) {
- return detail::qfind_first_byte_of_nosse(haystack, needles);
- }
-
- auto arr2 = __builtin_ia32_loaddqu(needles.data());
- // do an unaligned load for first block of haystack
- auto arr1 = __builtin_ia32_loaddqu(haystack.data());
- auto index = __builtin_ia32_pcmpestri128(arr2, needles.size(),
- arr1, haystack.size(), 0);
- if (index < 16) {
- return index;
- }
-
- // Now, we can do aligned loads hereafter...
- size_t i = nextAlignedIndex(haystack.data());
- for (; i < haystack.size(); i+= 16) {
- void* ptr1 = __builtin_assume_aligned(haystack.data() + i, 16);
- auto arr1 = *reinterpret_cast<const __v16qi*>(ptr1);
- auto index = __builtin_ia32_pcmpestri128(arr2, needles.size(),
- arr1, haystack.size() - i, 0);
- if (index < 16) {
- return i + index;
- }
- }
- return StringPiece::npos;
-}
-#endif // FOLLY_HAVE_EMMINTRIN_H && GCC 4.6+
-
-} // namespace
-
-namespace detail {
-
-size_t qfind_first_byte_of_byteset(const StringPiece haystack,
- const StringPiece needles) {
- SparseByteSet s;
- for (auto needle: needles) {
- s.add(needle);
- }
- for (size_t index = 0; index < haystack.size(); ++index) {
- if (s.contains(haystack[index])) {
- return index;
- }
- }
- return StringPiece::npos;
-}
-
-size_t qfind_first_byte_of_bitset(const StringPiece haystack,
- const StringPiece needles) {
- std::bitset<256> s;
- for (auto needle : needles) {
- s[(uint8_t)needle] = true;
- }
- for (size_t index = 0; index < haystack.size(); ++index) {
- if (s[(uint8_t)haystack[index]]) {
- return index;
- }
- }
- return StringPiece::npos;
-}
-
-#if FOLLY_HAVE_EMMINTRIN_H && __GNUC_PREREQ(4, 6)
-
-template <bool HAYSTACK_ALIGNED>
-size_t scanHaystackBlock(const StringPiece haystack,
- const StringPiece needles,
- uint64_t idx)
-// inline is okay because it's only called from other sse4.2 functions
- __attribute__ ((__target__("sse4.2")))
-// Turn off ASAN because the "arr2 = ..." assignment in the loop below reads
-// up to 15 bytes beyond end of the buffer in #needles#. That is ok because
-// ptr2 is always 16-byte aligned, so the read can never span a page boundary.
-// Also, the extra data that may be read is never actually used.
- FOLLY_DISABLE_ADDRESS_SANITIZER;
-
-// Scans a 16-byte block of haystack (starting at blockStartIdx) to find first
-// needle. If HAYSTACK_ALIGNED, then haystack must be 16byte aligned.
-// If !HAYSTACK_ALIGNED, then caller must ensure that it is safe to load the
-// block.
-template <bool HAYSTACK_ALIGNED>
-size_t scanHaystackBlock(const StringPiece haystack,
- const StringPiece needles,
- uint64_t blockStartIdx) {
- DCHECK_GT(needles.size(), 16); // should handled by *needles16() method
- DCHECK(blockStartIdx + 16 <= haystack.size() ||
- (PAGE_FOR(haystack.data() + blockStartIdx) ==
- PAGE_FOR(haystack.data() + blockStartIdx + 15)));
-
- __v16qi arr1;
- if (HAYSTACK_ALIGNED) {
- void* ptr1 = __builtin_assume_aligned(haystack.data() + blockStartIdx, 16);
- arr1 = *reinterpret_cast<const __v16qi*>(ptr1);
- } else {
- arr1 = __builtin_ia32_loaddqu(haystack.data() + blockStartIdx);
- }
-
- // This load is safe because needles.size() >= 16
- auto arr2 = __builtin_ia32_loaddqu(needles.data());
- size_t b = __builtin_ia32_pcmpestri128(
- arr2, 16, arr1, haystack.size() - blockStartIdx, 0);
-
- size_t j = nextAlignedIndex(needles.data());
- for (; j < needles.size(); j += 16) {
- void* ptr2 = __builtin_assume_aligned(needles.data() + j, 16);
- arr2 = *reinterpret_cast<const __v16qi*>(ptr2);
-
- auto index = __builtin_ia32_pcmpestri128(
- arr2, needles.size() - j, arr1, haystack.size() - blockStartIdx, 0);
- b = std::min<size_t>(index, b);
- }
-
- if (b < 16) {
- return blockStartIdx + b;
- }
- return StringPiece::npos;
-}
-
-size_t qfind_first_byte_of_sse42(const StringPiece haystack,
- const StringPiece needles)
- __attribute__ ((__target__("sse4.2"), noinline));
-
-size_t qfind_first_byte_of_sse42(const StringPiece haystack,
- const StringPiece needles) {
- if (UNLIKELY(needles.empty() || haystack.empty())) {
- return StringPiece::npos;
- } else if (needles.size() <= 16) {
- // we can save some unnecessary load instructions by optimizing for
- // the common case of needles.size() <= 16
- return qfind_first_byte_of_needles16(haystack, needles);
- }
-
- if (haystack.size() < 16 &&
- PAGE_FOR(haystack.end() - 1) != PAGE_FOR(haystack.data() + 16)) {
- // We can't safely SSE-load haystack. Use a different approach.
- if (haystack.size() <= 2) {
- return qfind_first_of(haystack, needles, AsciiCaseSensitive());
- }
- return qfind_first_byte_of_byteset(haystack, needles);
- }
-
- auto ret = scanHaystackBlock<false>(haystack, needles, 0);
- if (ret != StringPiece::npos) {
- return ret;
- }
-
- size_t i = nextAlignedIndex(haystack.data());
- for (; i < haystack.size(); i += 16) {
- auto ret = scanHaystackBlock<true>(haystack, needles, i);
- if (ret != StringPiece::npos) {
- return ret;
- }
- }
-
- return StringPiece::npos;
-}
-#endif // FOLLY_HAVE_EMMINTRIN_H && GCC 4.6+
-
-size_t qfind_first_byte_of_nosse(const StringPiece haystack,
- const StringPiece needles) {
- if (UNLIKELY(needles.empty() || haystack.empty())) {
- return StringPiece::npos;
- }
- // The thresholds below were empirically determined by benchmarking.
- // This is not an exact science since it depends on the CPU, the size of
- // needles, and the size of haystack.
- if ((needles.size() >= 4 && haystack.size() <= 10) ||
- (needles.size() >= 16 && haystack.size() <= 64) ||
- needles.size() >= 32) {
- return qfind_first_byte_of_byteset(haystack, needles);
- }
- return qfind_first_of(haystack, needles, AsciiCaseSensitive());
-}
-
-} // namespace detail
-} // namespace folly
#include <folly/CpuId.h>
#include <folly/Traits.h>
#include <folly/Likely.h>
+#include <folly/detail/RangeCommon.h>
+#include <folly/detail/RangeSse42.h>
// Ignore shadowing warnings within this file, so includers can use -Wshadow.
#pragma GCC diagnostic push
namespace detail {
-size_t qfind_first_byte_of_nosse(const StringPiece haystack,
- const StringPiece needles);
-
-#if FOLLY_HAVE_EMMINTRIN_H && __GNUC_PREREQ(4, 6)
-size_t qfind_first_byte_of_sse42(const StringPiece haystack,
- const StringPiece needles);
-
inline size_t qfind_first_byte_of(const StringPiece haystack,
const StringPiece needles) {
static auto const qfind_first_byte_of_fn =
return qfind_first_byte_of_fn(haystack, needles);
}
-#else
-inline size_t qfind_first_byte_of(const StringPiece haystack,
- const StringPiece needles) {
- return qfind_first_byte_of_nosse(haystack, needles);
-}
-#endif // FOLLY_HAVE_EMMINTRIN_H
-
} // namespace detail
template <class T, class Comp>
# Checks for header files.
AC_HEADER_STDC
-AC_CHECK_HEADERS([fcntl.h features.h inttypes.h limits.h stdint.h stdlib.h string.h sys/time.h unistd.h mutex.h malloc.h emmintrin.h byteswap.h bits/functexcept.h bits/c++config.h])
+AC_CHECK_HEADERS([fcntl.h features.h inttypes.h limits.h stdint.h stdlib.h string.h sys/time.h unistd.h mutex.h malloc.h byteswap.h bits/functexcept.h bits/c++config.h])
AC_CHECK_HEADER(double-conversion/double-conversion.h, [], [AC_MSG_ERROR(
[Couldn't find double-conversion.h, please download from \
--- /dev/null
+/*
+ * Copyright 2015 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <folly/detail/RangeCommon.h>
+
+#include <bitset>
+#include <folly/SparseByteSet.h>
+
+namespace folly {
+
+namespace detail {
+
+size_t qfind_first_byte_of_bitset(const StringPieceLite haystack,
+ const StringPieceLite needles) {
+ std::bitset<256> s;
+ for (auto needle : needles) {
+ s[(uint8_t)needle] = true;
+ }
+ for (size_t index = 0; index < haystack.size(); ++index) {
+ if (s[(uint8_t)haystack[index]]) {
+ return index;
+ }
+ }
+ return std::string::npos;
+}
+
+size_t qfind_first_byte_of_byteset(const StringPieceLite haystack,
+ const StringPieceLite needles) {
+ SparseByteSet s;
+ for (auto needle: needles) {
+ s.add(needle);
+ }
+ for (size_t index = 0; index < haystack.size(); ++index) {
+ if (s.contains(haystack[index])) {
+ return index;
+ }
+ }
+ return std::string::npos;
+}
+
+}
+
+}
--- /dev/null
+/*
+ * Copyright 2015 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FOLLY_DETAIL_RANGE_COMMON_H_
+#define FOLLY_DETAIL_RANGE_COMMON_H_
+
+#include <algorithm>
+#include <string>
+#include <glog/logging.h>
+#include <folly/Likely.h>
+
+namespace folly {
+
+namespace detail {
+
+/***
+ * The qfind_first_byte_of_* functions are declared here, before Range.h, so
+ * they cannot take StringPiece values. But they're there to operate on
+ * StringPiece values. Dependency cycles: fun.
+ *
+ * StringPieceLite is here to break that dependency cycle.
+ */
+class StringPieceLite {
+ public:
+ StringPieceLite(const char* b, const char* e) : b_(b), e_(e) {}
+ template <typename Range>
+ /* implicit */ StringPieceLite(const Range& r) :
+ StringPieceLite(r.data(), r.data() + r.size()) {}
+ const char* data() const { return b_; }
+ const char* begin() const { return b_; }
+ const char* end() const { return e_; }
+ size_t size() const { return e_ - b_; }
+ bool empty() const { return size() == 0; }
+ const char& operator[](size_t i) const { DCHECK_GT(size(), i); return b_[i]; }
+ template <typename Range>
+ explicit operator Range() const { return Range(begin(), end()); }
+ private:
+ const char* b_;
+ const char* e_;
+};
+
+inline size_t qfind_first_byte_of_std(const StringPieceLite haystack,
+ const StringPieceLite needles) {
+ auto ret = std::find_first_of(haystack.begin(), haystack.end(),
+ needles.begin(), needles.end(),
+ [](char a, char b) { return a == b; });
+ return ret == haystack.end() ? std::string::npos : ret - haystack.begin();
+}
+
+
+size_t qfind_first_byte_of_bitset(const StringPieceLite haystack,
+ const StringPieceLite needles);
+
+size_t qfind_first_byte_of_byteset(const StringPieceLite haystack,
+ const StringPieceLite needles);
+
+inline size_t qfind_first_byte_of_nosse(const StringPieceLite haystack,
+ const StringPieceLite needles) {
+ if (UNLIKELY(needles.empty() || haystack.empty())) {
+ return std::string::npos;
+ }
+ // The thresholds below were empirically determined by benchmarking.
+ // This is not an exact science since it depends on the CPU, the size of
+ // needles, and the size of haystack.
+ if ((needles.size() >= 4 && haystack.size() <= 10) ||
+ (needles.size() >= 16 && haystack.size() <= 64) ||
+ needles.size() >= 32) {
+ return qfind_first_byte_of_byteset(haystack, needles);
+ }
+ return qfind_first_byte_of_std(haystack, needles);
+}
+
+}
+
+}
+
+#endif
--- /dev/null
+/*
+ * Copyright 2015 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "RangeSse42.h"
+
+#include <cstdint>
+#include <limits>
+#include <string>
+#include <emmintrin.h>
+#include <smmintrin.h>
+#include <glog/logging.h>
+#include <folly/Likely.h>
+#include <folly/Portability.h>
+
+namespace folly {
+
+namespace detail {
+
+// It's okay if pages are bigger than this (as powers of two), but they should
+// not be smaller.
+static constexpr size_t kMinPageSize = 4096;
+static_assert(kMinPageSize >= 16,
+ "kMinPageSize must be at least SSE register size");
+
+template <typename T>
+static inline uintptr_t page_for(T* addr) {
+ return reinterpret_cast<uintptr_t>(addr) / kMinPageSize;
+}
+
+static inline size_t nextAlignedIndex(const char* arr) {
+ auto firstPossible = reinterpret_cast<uintptr_t>(arr) + 1;
+ return 1 + // add 1 because the index starts at 'arr'
+ ((firstPossible + 15) & ~0xF) // round up to next multiple of 16
+ - firstPossible;
+}
+
+static size_t qfind_first_byte_of_needles16(const StringPieceLite haystack,
+ const StringPieceLite needles)
+ FOLLY_DISABLE_ADDRESS_SANITIZER;
+
+// helper method for case where needles.size() <= 16
+size_t qfind_first_byte_of_needles16(const StringPieceLite haystack,
+ const StringPieceLite needles) {
+ DCHECK_GT(haystack.size(), 0);
+ DCHECK_GT(needles.size(), 0);
+ DCHECK_LE(needles.size(), 16);
+ if ((needles.size() <= 2 && haystack.size() >= 256) ||
+ // must bail if we can't even SSE-load a single segment of haystack
+ (haystack.size() < 16 &&
+ page_for(haystack.end() - 1) != page_for(haystack.data() + 15)) ||
+ // can't load needles into SSE register if it could cross page boundary
+ page_for(needles.end() - 1) != page_for(needles.data() + 15)) {
+ return detail::qfind_first_byte_of_nosse(haystack, needles);
+ }
+
+ auto arr2 = ::_mm_loadu_si128(
+ reinterpret_cast<const __m128i*>(needles.data()));
+ // do an unaligned load for first block of haystack
+ auto arr1 = ::_mm_loadu_si128(
+ reinterpret_cast<const __m128i*>(haystack.data()));
+ auto index = __builtin_ia32_pcmpestri128((__v16qi)arr2, needles.size(),
+ (__v16qi)arr1, haystack.size(), 0);
+ if (index < 16) {
+ return index;
+ }
+
+ // Now, we can do aligned loads hereafter...
+ size_t i = nextAlignedIndex(haystack.data());
+ for (; i < haystack.size(); i+= 16) {
+ auto arr1 = ::_mm_load_si128(
+ reinterpret_cast<const __m128i*>(haystack.data() + i));
+ auto index = __builtin_ia32_pcmpestri128(
+ (__v16qi)arr2, needles.size(),
+ (__v16qi)arr1, haystack.size() - i, 0);
+ if (index < 16) {
+ return i + index;
+ }
+ }
+ return std::string::npos;
+}
+
+template <bool HAYSTACK_ALIGNED>
+size_t scanHaystackBlock(const StringPieceLite haystack,
+ const StringPieceLite needles,
+ uint64_t idx)
+// Turn off ASAN because the "arr2 = ..." assignment in the loop below reads
+// up to 15 bytes beyond end of the buffer in #needles#. That is ok because
+// ptr2 is always 16-byte aligned, so the read can never span a page boundary.
+// Also, the extra data that may be read is never actually used.
+ FOLLY_DISABLE_ADDRESS_SANITIZER;
+
+// Scans a 16-byte block of haystack (starting at blockStartIdx) to find first
+// needle. If HAYSTACK_ALIGNED, then haystack must be 16byte aligned.
+// If !HAYSTACK_ALIGNED, then caller must ensure that it is safe to load the
+// block.
+template <bool HAYSTACK_ALIGNED>
+size_t scanHaystackBlock(const StringPieceLite haystack,
+ const StringPieceLite needles,
+ uint64_t blockStartIdx) {
+ DCHECK_GT(needles.size(), 16); // should handled by *needles16() method
+ DCHECK(blockStartIdx + 16 <= haystack.size() ||
+ (page_for(haystack.data() + blockStartIdx) ==
+ page_for(haystack.data() + blockStartIdx + 15)));
+
+ __m128i arr1;
+ if (HAYSTACK_ALIGNED) {
+ arr1 = ::_mm_load_si128(
+ reinterpret_cast<const __m128i*>(haystack.data() + blockStartIdx));
+ } else {
+ arr1 = ::_mm_loadu_si128(
+ reinterpret_cast<const __m128i*>(haystack.data() + blockStartIdx));
+ }
+
+ // This load is safe because needles.size() >= 16
+ auto arr2 = ::_mm_loadu_si128(
+ reinterpret_cast<const __m128i*>(needles.data()));
+ size_t b = __builtin_ia32_pcmpestri128(
+ (__v16qi)arr2, 16, (__v16qi)arr1, haystack.size() - blockStartIdx, 0);
+
+ size_t j = nextAlignedIndex(needles.data());
+ for (; j < needles.size(); j += 16) {
+ arr2 = ::_mm_load_si128(
+ reinterpret_cast<const __m128i*>(needles.data() + j));
+
+ auto index = __builtin_ia32_pcmpestri128(
+ (__v16qi)arr2, needles.size() - j,
+ (__v16qi)arr1, haystack.size() - blockStartIdx, 0);
+ b = std::min<size_t>(index, b);
+ }
+
+ if (b < 16) {
+ return blockStartIdx + b;
+ }
+ return std::string::npos;
+}
+
+size_t qfind_first_byte_of_sse42(const StringPieceLite haystack,
+ const StringPieceLite needles);
+
+size_t qfind_first_byte_of_sse42(const StringPieceLite haystack,
+ const StringPieceLite needles) {
+ if (UNLIKELY(needles.empty() || haystack.empty())) {
+ return std::string::npos;
+ } else if (needles.size() <= 16) {
+ // we can save some unnecessary load instructions by optimizing for
+ // the common case of needles.size() <= 16
+ return qfind_first_byte_of_needles16(haystack, needles);
+ }
+
+ if (haystack.size() < 16 &&
+ page_for(haystack.end() - 1) != page_for(haystack.data() + 16)) {
+ // We can't safely SSE-load haystack. Use a different approach.
+ if (haystack.size() <= 2) {
+ return qfind_first_byte_of_std(haystack, needles);
+ }
+ return qfind_first_byte_of_byteset(haystack, needles);
+ }
+
+ auto ret = scanHaystackBlock<false>(haystack, needles, 0);
+ if (ret != std::string::npos) {
+ return ret;
+ }
+
+ size_t i = nextAlignedIndex(haystack.data());
+ for (; i < haystack.size(); i += 16) {
+ auto ret = scanHaystackBlock<true>(haystack, needles, i);
+ if (ret != std::string::npos) {
+ return ret;
+ }
+ }
+
+ return std::string::npos;
+}
+
+}
+
+}
--- /dev/null
+/*
+ * Copyright 2015 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FOLLY_DETAIL_RANGE_SSE42_H_
+#define FOLLY_DETAIL_RANGE_SSE42_H_
+
+#include <cstddef>
+#include <folly/detail/RangeCommon.h>
+
+namespace folly {
+
+namespace detail {
+
+size_t qfind_first_byte_of_sse42(const StringPieceLite haystack,
+ const StringPieceLite needles);
+
+}
+
+}
+
+#endif
#include <random>
#include <string>
-namespace folly { namespace detail {
-// declaration of functions in Range.cpp
-size_t qfind_first_byte_of_byteset(const StringPiece haystack,
- const StringPiece needles);
-
-size_t qfind_first_byte_of_bitset(const StringPiece haystack,
- const StringPiece needles);
-
-size_t qfind_first_byte_of_nosse(const StringPiece haystack,
- const StringPiece needles);
-}}
-
using namespace folly;
using namespace std;
BENCHMARK_DRAW_LINE();
-// it's useful to compare our custom implementations vs. the standard library
-inline size_t qfind_first_byte_of_std(const StringPiece haystack,
- const StringPiece needles) {
- return qfind_first_of(haystack, needles, AsciiCaseSensitive());
-}
-
template <class Func>
void countHits(Func func, size_t n) {
StringPiece needles = "\r\n\1";
}
BENCHMARK_RELATIVE(FindFirstOf1NeedlesStd, n) {
- findFirstOfRange(delims1, qfind_first_byte_of_std, n);
+ findFirstOfRange(delims1, detail::qfind_first_byte_of_std, n);
}
BENCHMARK_RELATIVE(FindFirstOf1NeedlesByteSet, n) {
}
BENCHMARK_RELATIVE(FindFirstOf2NeedlesStd, n) {
- findFirstOfRange(delims2, qfind_first_byte_of_std, n);
+ findFirstOfRange(delims2, detail::qfind_first_byte_of_std, n);
}
BENCHMARK_RELATIVE(FindFirstOf2NeedlesByteSet, n) {
}
BENCHMARK_RELATIVE(FindFirstOf4NeedlesStd, n) {
- findFirstOfRange(delims4, qfind_first_byte_of_std, n);
+ findFirstOfRange(delims4, detail::qfind_first_byte_of_std, n);
}
BENCHMARK_RELATIVE(FindFirstOf4NeedlesByteSet, n) {
}
BENCHMARK_RELATIVE(FindFirstOf8NeedlesStd, n) {
- findFirstOfRange(delims8, qfind_first_byte_of_std, n);
+ findFirstOfRange(delims8, detail::qfind_first_byte_of_std, n);
}
BENCHMARK_RELATIVE(FindFirstOf8NeedlesByteSet, n) {
}
BENCHMARK_RELATIVE(FindFirstOf16NeedlesStd, n) {
- findFirstOfRange(delims16, qfind_first_byte_of_std, n);
+ findFirstOfRange(delims16, detail::qfind_first_byte_of_std, n);
}
BENCHMARK_RELATIVE(FindFirstOf16NeedlesByteSet, n) {
}
BENCHMARK_RELATIVE(FindFirstOf32NeedlesStd, n) {
- findFirstOfRange(delims32, qfind_first_byte_of_std, n);
+ findFirstOfRange(delims32, detail::qfind_first_byte_of_std, n);
}
BENCHMARK_RELATIVE(FindFirstOf32NeedlesByteSet, n) {
}
BENCHMARK_RELATIVE(FindFirstOf64NeedlesStd, n) {
- findFirstOfRange(delims64, qfind_first_byte_of_std, n);
+ findFirstOfRange(delims64, detail::qfind_first_byte_of_std, n);
}
BENCHMARK_RELATIVE(FindFirstOf64NeedlesByteSet, n) {
}
BENCHMARK_RELATIVE(FindFirstOfRandomStd, n) {
- findFirstOfRandom(qfind_first_byte_of_std, n);
+ findFirstOfRandom(detail::qfind_first_byte_of_std, n);
}
BENCHMARK_RELATIVE(FindFirstOfRandomByteSet, n) {
}
BENCHMARK_RELATIVE(CountDelimsStd, n) {
- countHits(qfind_first_byte_of_std, n);
+ countHits(detail::qfind_first_byte_of_std, n);
}
BENCHMARK_RELATIVE(CountDelimsByteSet, n) {
#include <boost/range/concepts.hpp>
#include <gtest/gtest.h>
-namespace folly { namespace detail {
-
-// declaration of functions in Range.cpp
-size_t qfind_first_byte_of_byteset(const StringPiece haystack,
- const StringPiece needles);
-
-}} // namespaces
-
using namespace folly;
using namespace std;