From 81d9192f8afdf53fcbc7bf572f8ce638516380f8 Mon Sep 17 00:00:00 2001 From: Nathan Bronson Date: Sun, 30 Oct 2016 18:12:26 -0700 Subject: [PATCH] improvements to folly::doNotOptimizeAway Summary: A common failure mode for folly::doNotOptimizeAway is to pass it a pointer to a complex object that the caller wants to ensure is fully calculated, which doesn't actually ensure that. Also, the GCC and MSVC implementations used inline assembly with an in-out parameter, which causes a store. A survey of many usages of doNotOptimizeAway found only 1 that relies on this make-unpredictable behavior. This diff makes doNotOptimizeAway(&x) equivalent to doNotOptimizeAway(x) for GCC and clang, and makes it a read-only sink. For the rare case that the benchmark wants to disable subexpression elimination, constant propagation, or power reduction, there is a new function makeUnpredictable. It also merges the clang and GCC implementations, removing a potential bias in our microbenchmarks. Reviewed By: davidtgoldblatt Differential Revision: D4074670 fbshipit-source-id: 43f02e7fe149147bb172babe77787dea06e098fa --- folly/Benchmark.h | 68 +++++++++--- folly/test/BenchmarkTest.cpp | 196 +++++++++++++++++++++++++++++++++++ 2 files changed, 251 insertions(+), 13 deletions(-) diff --git a/folly/Benchmark.h b/folly/Benchmark.h index f3c3ada2..d1db800c 100644 --- a/folly/Benchmark.h +++ b/folly/Benchmark.h @@ -19,6 +19,7 @@ #include #include // for FB_ANONYMOUS_VARIABLE #include +#include #include #include @@ -235,32 +236,73 @@ addBenchmark(const char* file, const char* name, Lambda&& lambda) { } /** - * Call doNotOptimizeAway(var) against variables that you use for + * Call doNotOptimizeAway(var) to ensure that var will be computed even + * post-optimization. Use it for variables that are computed during * benchmarking but otherwise are useless. The compiler tends to do a - * good job at eliminating unused variables, and this function fools - * it into thinking var is in fact needed. + * good job at eliminating unused variables, and this function fools it + * into thinking var is in fact needed. + * + * Call makeUnpredictable(var) when you don't want the optimizer to use + * its knowledge of var to shape the following code. This is useful + * when constant propagation or power reduction is possible during your + * benchmark but not in real use cases. */ + #ifdef _MSC_VER #pragma optimize("", off) -template -void doNotOptimizeAway(T&& datum) { - datum = datum; -} +inline void doNotOptimizeDependencySink(const void*) {} #pragma optimize("", on) -#elif defined(__clang__) - template -__attribute__((__optnone__)) void doNotOptimizeAway(T&& /* datum */) {} +void doNotOptimizeAway(const T& datum) { + doNotOptimizeDependencySink(&datum); +} + +template +void makeUnpredictable(T& datum) { + doNotOptimizeDependencySink(&datum); +} #else -template -void doNotOptimizeAway(T&& datum) { - asm volatile("" : "+r" (datum)); +namespace detail { +template +struct DoNotOptimizeAwayNeedsIndirect { + using Decayed = typename std::decay::type; + + // First two constraints ensure it can be an "r" operand. + // std::is_pointer check is because callers seem to expect that + // doNotOptimizeAway(&x) is equivalent to doNotOptimizeAway(x). + constexpr static bool value = !folly::IsTriviallyCopyable::value || + sizeof(Decayed) > sizeof(long) || std::is_pointer::value; +}; +} // detail namespace + +template +auto doNotOptimizeAway(const T& datum) -> typename std::enable_if< + !detail::DoNotOptimizeAwayNeedsIndirect::value>::type { + asm volatile("" ::"X"(datum)); +} + +template +auto doNotOptimizeAway(const T& datum) -> typename std::enable_if< + detail::DoNotOptimizeAwayNeedsIndirect::value>::type { + asm volatile("" ::"m"(datum) : "memory"); +} + +template +auto makeUnpredictable(T& datum) -> typename std::enable_if< + !detail::DoNotOptimizeAwayNeedsIndirect::value>::type { + asm volatile("" : "+r"(datum)); +} + +template +auto makeUnpredictable(T& datum) -> typename std::enable_if< + detail::DoNotOptimizeAwayNeedsIndirect::value>::type { + asm volatile("" ::"m"(datum) : "memory"); } #endif diff --git a/folly/test/BenchmarkTest.cpp b/folly/test/BenchmarkTest.cpp index 0e2191f2..e6b36e44 100644 --- a/folly/test/BenchmarkTest.cpp +++ b/folly/test/BenchmarkTest.cpp @@ -46,6 +46,202 @@ BENCHMARK(gun) { BENCHMARK_DRAW_LINE() +BENCHMARK(optimizerCanDiscardTrivial, n) { + long x = 0; + for (long i = 0; i < n; ++i) { + for (long j = 0; j < 10000; ++j) { + x += j; + } + } +} + +BENCHMARK(optimizerCanPowerReduceInner1Trivial, n) { + long x = 0; + for (long i = 0; i < n; ++i) { + for (long j = 0; j < 10000; ++j) { + x += i + j; + } + doNotOptimizeAway(x); + } +} + +BENCHMARK(optimizerCanPowerReduceInner2Trivial, n) { + long x = 0; + for (long i = 0; i < n; ++i) { + makeUnpredictable(i); + for (long j = 0; j < 10000; ++j) { + x += i + j; + } + } + doNotOptimizeAway(x); +} + +BENCHMARK(optimizerDisabled1Trivial, n) { + long x = 0; + for (long i = 0; i < n; ++i) { + for (long j = 0; j < 10000; ++j) { + x += i + j; + doNotOptimizeAway(x); + } + } +} + +BENCHMARK(optimizerDisabled2Trivial, n) { + long x = 0; + for (long i = 0; i < n; ++i) { + makeUnpredictable(i); + for (long j = 0; j < 10000; ++j) { + makeUnpredictable(j); + x += i + j; + } + } + doNotOptimizeAway(x); +} + +BENCHMARK(optimizerCanPowerReduceInner1TrivialPtr, n) { + long x = 0; + for (long i = 0; i < n; ++i) { + for (long j = 0; j < 10000; ++j) { + x += i + j; + } + doNotOptimizeAway(&x); + } +} + +BENCHMARK(optimizerCanPowerReduceInner2TrivialPtr, n) { + long x = 0; + for (long i = 0; i < n; ++i) { + makeUnpredictable(i); + for (long j = 0; j < 10000; ++j) { + x += i + j; + } + } + doNotOptimizeAway(&x); +} + +BENCHMARK(optimizerDisabled1TrivialPtr, n) { + long x = 0; + for (long i = 0; i < n; ++i) { + for (long j = 0; j < 10000; ++j) { + x += i + j; + doNotOptimizeAway(&x); + } + } +} + +namespace { +class NonTrivialLong { + public: + explicit NonTrivialLong(long v) : value_(v) {} + virtual ~NonTrivialLong() {} + + void operator++() { + ++value_; + } + void operator+=(long rhs) { + value_ += rhs; + } + void operator+=(const NonTrivialLong& rhs) { + value_ += rhs.value_; + } + bool operator<(long rhs) { + return value_ < rhs; + } + NonTrivialLong operator+(const NonTrivialLong& rhs) { + return NonTrivialLong(value_ + rhs.value_); + } + + private: + long value_; + long otherStuff_[3]; +}; +} + +BENCHMARK(optimizerCanDiscardNonTrivial, n) { + NonTrivialLong x(0); + for (NonTrivialLong i(0); i < n; ++i) { + for (NonTrivialLong j(0); j < 10000; ++j) { + x += j; + } + } +} + +BENCHMARK(optimizerCanPowerReduceInner1NonTrivial, n) { + NonTrivialLong x(0); + for (NonTrivialLong i(0); i < n; ++i) { + for (NonTrivialLong j(0); j < 10000; ++j) { + x += i + j; + } + doNotOptimizeAway(x); + } +} + +BENCHMARK(optimizerCanPowerReduceInner2NonTrivial, n) { + NonTrivialLong x(0); + for (NonTrivialLong i(0); i < n; ++i) { + makeUnpredictable(i); + for (NonTrivialLong j(0); j < 10000; ++j) { + x += i + j; + } + } + doNotOptimizeAway(x); +} + +BENCHMARK(optimizerDisabled1NonTrivial, n) { + NonTrivialLong x(0); + for (NonTrivialLong i(0); i < n; ++i) { + for (NonTrivialLong j(0); j < 10000; ++j) { + x += i + j; + doNotOptimizeAway(x); + } + } +} + +BENCHMARK(optimizerDisabled2NonTrivial, n) { + NonTrivialLong x(0); + for (NonTrivialLong i(0); i < n; ++i) { + makeUnpredictable(i); + for (NonTrivialLong j(0); j < 10000; ++j) { + makeUnpredictable(j); + x += i + j; + } + } + doNotOptimizeAway(x); +} + +BENCHMARK(optimizerCanPowerReduceInner1NonTrivialPtr, n) { + NonTrivialLong x(0); + for (NonTrivialLong i(0); i < n; ++i) { + for (NonTrivialLong j(0); j < 10000; ++j) { + x += i + j; + } + doNotOptimizeAway(&x); + } +} + +BENCHMARK(optimizerCanPowerReduceInner2NonTrivialPtr, n) { + NonTrivialLong x(0); + for (NonTrivialLong i(0); i < n; ++i) { + makeUnpredictable(i); + for (NonTrivialLong j(0); j < 10000; ++j) { + x += i + j; + } + } + doNotOptimizeAway(&x); +} + +BENCHMARK(optimizerDisabled1NonTrivialPtr, n) { + NonTrivialLong x(0); + for (NonTrivialLong i(0); i < n; ++i) { + for (NonTrivialLong j(0); j < 10000; ++j) { + x += i + j; + doNotOptimizeAway(&x); + } + } +} + +BENCHMARK_DRAW_LINE() + BENCHMARK(baselinevector) { vector v; -- 2.34.1