return os;
}
-namespace detail {
-
-size_t qfind_first_byte_of_memchr(const StringPiece haystack,
- const StringPiece needles) {
- size_t best = haystack.size();
- for (char needle: needles) {
- const void* ptr = memchr(haystack.data(), needle, best);
- if (ptr) {
- auto found = static_cast<const char*>(ptr) - haystack.data();
- best = std::min<size_t>(best, found);
- }
- }
- if (best == haystack.size()) {
- return StringPiece::npos;
- }
- return best;
-}
-
-} // namespace detail
-
namespace {
// It's okay if pages are bigger than this (as powers of two), but they should
DCHECK(!haystack.empty());
DCHECK(!needles.empty());
DCHECK_LE(needles.size(), 16);
- // benchmarking shows that memchr beats out SSE for small needle-sets
- // with large haystacks.
if ((needles.size() <= 2 && haystack.size() >= 256) ||
// must bail if we can't even SSE-load a single segment of haystack
(haystack.size() < 16 &&
PAGE_FOR(haystack.end() - 1) != PAGE_FOR(haystack.data() + 15)) ||
// can't load needles into SSE register if it could cross page boundary
PAGE_FOR(needles.end() - 1) != PAGE_FOR(needles.data() + 15)) {
- return detail::qfind_first_byte_of_memchr(haystack, needles);
+ return detail::qfind_first_byte_of_nosse(haystack, needles);
}
auto arr2 = __builtin_ia32_loaddqu(needles.data());
// The thresholds below were empirically determined by benchmarking.
// This is not an exact science since it depends on the CPU, the size of
// needles, and the size of haystack.
- if (haystack.size() == 1 ||
- (haystack.size() < 4 && needles.size() <= 16)) {
- return qfind_first_of(haystack, needles, asciiCaseSensitive);
- } else if ((needles.size() >= 4 && haystack.size() <= 10) ||
+ if ((needles.size() >= 4 && haystack.size() <= 10) ||
(needles.size() >= 16 && haystack.size() <= 64) ||
needles.size() >= 32) {
return qfind_first_byte_of_byteset(haystack, needles);
}
-
- return qfind_first_byte_of_memchr(haystack, needles);
+ return qfind_first_of(haystack, needles, asciiCaseSensitive);
}
} // namespace detail
namespace folly { namespace detail {
// declaration of functions in Range.cpp
-size_t qfind_first_byte_of_memchr(const StringPiece haystack,
- const StringPiece needles);
-
size_t qfind_first_byte_of_byteset(const StringPiece haystack,
const StringPiece needles);
constexpr int kVstrSize = 16;
std::vector<std::string> vstr;
std::vector<StringPiece> vstrp;
+std::string file;
void initStr(int len) {
cout << "string length " << len << ':' << endl;
const size_t ffoDelimSize = 128;
vector<string> ffoDelim;
+void initFile(int len) {
+ std::uniform_int_distribution<uint32_t> validChar(1, 64);
+ file.clear();
+ while (len--) {
+ char ch = validChar(rnd);
+ if (ch == '\r') {
+ ch = '\n';
+ }
+ file.push_back(ch);
+ }
+}
+
+
string generateString(int len) {
std::uniform_int_distribution<uint32_t> validChar(1, 255); // no null-char
string ret;
return qfind_first_of(haystack, needles, asciiCaseSensitive);
}
+template <class Func>
+void countHits(Func func, size_t n) {
+ StringPiece needles = "\r\n\1";
+ FOR_EACH_RANGE (i, 0, n) {
+ size_t p, n = 0;
+ for (StringPiece left = file;
+ (p = func(left, needles)) != StringPiece::npos;
+ left.advance(p + 1)) {
+ ++n;
+ }
+ doNotOptimizeAway(n);
+ }
+}
+
template <class Func>
void findFirstOfRange(StringPiece needles, Func func, size_t n) {
FOR_EACH_RANGE (i, 0, n) {
}
}
+const string delims1 = "b";
+
+BENCHMARK(FindFirstOf1NeedlesBase, n) {
+ findFirstOfRange(delims1, detail::qfind_first_byte_of, n);
+}
+
+BENCHMARK_RELATIVE(FindFirstOf1NeedlesNoSSE, n) {
+ findFirstOfRange(delims1, detail::qfind_first_byte_of_nosse, n);
+}
+
+BENCHMARK_RELATIVE(FindFirstOf1NeedlesStd, n) {
+ findFirstOfRange(delims1, qfind_first_byte_of_std, n);
+}
+
+BENCHMARK_RELATIVE(FindFirstOf1NeedlesByteSet, n) {
+ findFirstOfRange(delims1, detail::qfind_first_byte_of_byteset, n);
+}
+
+BENCHMARK_DRAW_LINE();
+
const string delims2 = "bc";
BENCHMARK(FindFirstOf2NeedlesBase, n) {
findFirstOfRange(delims2, qfind_first_byte_of_std, n);
}
-BENCHMARK_RELATIVE(FindFirstOf2NeedlesMemchr, n) {
- findFirstOfRange(delims2, detail::qfind_first_byte_of_memchr, n);
-}
-
BENCHMARK_RELATIVE(FindFirstOf2NeedlesByteSet, n) {
findFirstOfRange(delims2, detail::qfind_first_byte_of_byteset, n);
}
findFirstOfRange(delims4, qfind_first_byte_of_std, n);
}
-BENCHMARK_RELATIVE(FindFirstOf4NeedlesMemchr, n) {
- findFirstOfRange(delims4, detail::qfind_first_byte_of_memchr, n);
-}
-
BENCHMARK_RELATIVE(FindFirstOf4NeedlesByteSet, n) {
findFirstOfRange(delims4, detail::qfind_first_byte_of_byteset, n);
}
findFirstOfRange(delims8, qfind_first_byte_of_std, n);
}
-BENCHMARK_RELATIVE(FindFirstOf8NeedlesMemchr, n) {
- findFirstOfRange(delims8, detail::qfind_first_byte_of_memchr, n);
-}
-
BENCHMARK_RELATIVE(FindFirstOf8NeedlesByteSet, n) {
findFirstOfRange(delims8, detail::qfind_first_byte_of_byteset, n);
}
findFirstOfRange(delims16, qfind_first_byte_of_std, n);
}
-BENCHMARK_RELATIVE(FindFirstOf16NeedlesMemchr, n) {
- findFirstOfRange(delims16, detail::qfind_first_byte_of_memchr, n);
-}
-
BENCHMARK_RELATIVE(FindFirstOf16NeedlesByteSet, n) {
findFirstOfRange(delims16, detail::qfind_first_byte_of_byteset, n);
}
findFirstOfRange(delims32, qfind_first_byte_of_std, n);
}
-BENCHMARK_RELATIVE(FindFirstOf32NeedlesMemchr, n) {
- findFirstOfRange(delims32, detail::qfind_first_byte_of_memchr, n);
-}
-
BENCHMARK_RELATIVE(FindFirstOf32NeedlesByteSet, n) {
findFirstOfRange(delims32, detail::qfind_first_byte_of_byteset, n);
}
findFirstOfRange(delims64, qfind_first_byte_of_std, n);
}
-BENCHMARK_RELATIVE(FindFirstOf64NeedlesMemchr, n) {
- findFirstOfRange(delims64, detail::qfind_first_byte_of_memchr, n);
-}
-
BENCHMARK_RELATIVE(FindFirstOf64NeedlesByteSet, n) {
findFirstOfRange(delims64, detail::qfind_first_byte_of_byteset, n);
}
findFirstOfRandom(qfind_first_byte_of_std, n);
}
-BENCHMARK_RELATIVE(FindFirstOfRandomMemchr, n) {
- findFirstOfRandom(detail::qfind_first_byte_of_memchr, n);
-}
-
BENCHMARK_RELATIVE(FindFirstOfRandomByteSet, n) {
findFirstOfRandom(detail::qfind_first_byte_of_byteset, n);
}
BENCHMARK_DRAW_LINE();
+BENCHMARK(CountDelimsBase, n) {
+ countHits(detail::qfind_first_byte_of, n);
+}
+
+BENCHMARK_RELATIVE(CountDelimsNoSSE, n) {
+ countHits(detail::qfind_first_byte_of_nosse, n);
+}
+
+BENCHMARK_RELATIVE(CountDelimsStd, n) {
+ countHits(qfind_first_byte_of_std, n);
+}
+
+BENCHMARK_RELATIVE(CountDelimsByteSet, n) {
+ countHits(detail::qfind_first_byte_of_byteset, n);
+}
+
+BENCHMARK_DRAW_LINE();
+
BENCHMARK(FindFirstOfOffsetRange, n) {
StringPiece haystack(str);
folly::StringPiece needles("bc");
for (int len : {1, 8, 10, 16, 32, 64, 128, 256, 10*1024, 1024*1024}) {
initStr(len);
initDelims(len);
+ initFile(len);
runBenchmarks();
}
return 0;