#include <folly/Portability.h>
#include <folly/Preprocessor.h> // for FB_ANONYMOUS_VARIABLE
#include <folly/ScopeGuard.h>
+#include <folly/Traits.h>
#include <folly/portability/GFlags.h>
#include <folly/portability/Time.h>
}
/**
- * Call doNotOptimizeAway(var) against variables that you use for
+ * Call doNotOptimizeAway(var) to ensure that var will be computed even
+ * post-optimization. Use it for variables that are computed during
* benchmarking but otherwise are useless. The compiler tends to do a
- * good job at eliminating unused variables, and this function fools
- * it into thinking var is in fact needed.
+ * good job at eliminating unused variables, and this function fools it
+ * into thinking var is in fact needed.
+ *
+ * Call makeUnpredictable(var) when you don't want the optimizer to use
+ * its knowledge of var to shape the following code. This is useful
+ * when constant propagation or power reduction is possible during your
+ * benchmark but not in real use cases.
*/
+
#ifdef _MSC_VER
#pragma optimize("", off)
-template <class T>
-void doNotOptimizeAway(T&& datum) {
- datum = datum;
-}
+inline void doNotOptimizeDependencySink(const void*) {}
#pragma optimize("", on)
-#elif defined(__clang__)
-
template <class T>
-__attribute__((__optnone__)) void doNotOptimizeAway(T&& /* datum */) {}
+void doNotOptimizeAway(const T& datum) {
+ doNotOptimizeDependencySink(&datum);
+}
+
+template <typename T>
+void makeUnpredictable(T& datum) {
+ doNotOptimizeDependencySink(&datum);
+}
#else
-template <class T>
-void doNotOptimizeAway(T&& datum) {
- asm volatile("" : "+r" (datum));
+namespace detail {
+template <typename T>
+struct DoNotOptimizeAwayNeedsIndirect {
+ using Decayed = typename std::decay<T>::type;
+
+ // First two constraints ensure it can be an "r" operand.
+ // std::is_pointer check is because callers seem to expect that
+ // doNotOptimizeAway(&x) is equivalent to doNotOptimizeAway(x).
+ constexpr static bool value = !folly::IsTriviallyCopyable<Decayed>::value ||
+ sizeof(Decayed) > sizeof(long) || std::is_pointer<Decayed>::value;
+};
+} // detail namespace
+
+template <typename T>
+auto doNotOptimizeAway(const T& datum) -> typename std::enable_if<
+ !detail::DoNotOptimizeAwayNeedsIndirect<T>::value>::type {
+ asm volatile("" ::"X"(datum));
+}
+
+template <typename T>
+auto doNotOptimizeAway(const T& datum) -> typename std::enable_if<
+ detail::DoNotOptimizeAwayNeedsIndirect<T>::value>::type {
+ asm volatile("" ::"m"(datum) : "memory");
+}
+
+template <typename T>
+auto makeUnpredictable(T& datum) -> typename std::enable_if<
+ !detail::DoNotOptimizeAwayNeedsIndirect<T>::value>::type {
+ asm volatile("" : "+r"(datum));
+}
+
+template <typename T>
+auto makeUnpredictable(T& datum) -> typename std::enable_if<
+ detail::DoNotOptimizeAwayNeedsIndirect<T>::value>::type {
+ asm volatile("" ::"m"(datum) : "memory");
}
#endif
BENCHMARK_DRAW_LINE()
+BENCHMARK(optimizerCanDiscardTrivial, n) {
+ long x = 0;
+ for (long i = 0; i < n; ++i) {
+ for (long j = 0; j < 10000; ++j) {
+ x += j;
+ }
+ }
+}
+
+BENCHMARK(optimizerCanPowerReduceInner1Trivial, n) {
+ long x = 0;
+ for (long i = 0; i < n; ++i) {
+ for (long j = 0; j < 10000; ++j) {
+ x += i + j;
+ }
+ doNotOptimizeAway(x);
+ }
+}
+
+BENCHMARK(optimizerCanPowerReduceInner2Trivial, n) {
+ long x = 0;
+ for (long i = 0; i < n; ++i) {
+ makeUnpredictable(i);
+ for (long j = 0; j < 10000; ++j) {
+ x += i + j;
+ }
+ }
+ doNotOptimizeAway(x);
+}
+
+BENCHMARK(optimizerDisabled1Trivial, n) {
+ long x = 0;
+ for (long i = 0; i < n; ++i) {
+ for (long j = 0; j < 10000; ++j) {
+ x += i + j;
+ doNotOptimizeAway(x);
+ }
+ }
+}
+
+BENCHMARK(optimizerDisabled2Trivial, n) {
+ long x = 0;
+ for (long i = 0; i < n; ++i) {
+ makeUnpredictable(i);
+ for (long j = 0; j < 10000; ++j) {
+ makeUnpredictable(j);
+ x += i + j;
+ }
+ }
+ doNotOptimizeAway(x);
+}
+
+BENCHMARK(optimizerCanPowerReduceInner1TrivialPtr, n) {
+ long x = 0;
+ for (long i = 0; i < n; ++i) {
+ for (long j = 0; j < 10000; ++j) {
+ x += i + j;
+ }
+ doNotOptimizeAway(&x);
+ }
+}
+
+BENCHMARK(optimizerCanPowerReduceInner2TrivialPtr, n) {
+ long x = 0;
+ for (long i = 0; i < n; ++i) {
+ makeUnpredictable(i);
+ for (long j = 0; j < 10000; ++j) {
+ x += i + j;
+ }
+ }
+ doNotOptimizeAway(&x);
+}
+
+BENCHMARK(optimizerDisabled1TrivialPtr, n) {
+ long x = 0;
+ for (long i = 0; i < n; ++i) {
+ for (long j = 0; j < 10000; ++j) {
+ x += i + j;
+ doNotOptimizeAway(&x);
+ }
+ }
+}
+
+namespace {
+class NonTrivialLong {
+ public:
+ explicit NonTrivialLong(long v) : value_(v) {}
+ virtual ~NonTrivialLong() {}
+
+ void operator++() {
+ ++value_;
+ }
+ void operator+=(long rhs) {
+ value_ += rhs;
+ }
+ void operator+=(const NonTrivialLong& rhs) {
+ value_ += rhs.value_;
+ }
+ bool operator<(long rhs) {
+ return value_ < rhs;
+ }
+ NonTrivialLong operator+(const NonTrivialLong& rhs) {
+ return NonTrivialLong(value_ + rhs.value_);
+ }
+
+ private:
+ long value_;
+ long otherStuff_[3];
+};
+}
+
+BENCHMARK(optimizerCanDiscardNonTrivial, n) {
+ NonTrivialLong x(0);
+ for (NonTrivialLong i(0); i < n; ++i) {
+ for (NonTrivialLong j(0); j < 10000; ++j) {
+ x += j;
+ }
+ }
+}
+
+BENCHMARK(optimizerCanPowerReduceInner1NonTrivial, n) {
+ NonTrivialLong x(0);
+ for (NonTrivialLong i(0); i < n; ++i) {
+ for (NonTrivialLong j(0); j < 10000; ++j) {
+ x += i + j;
+ }
+ doNotOptimizeAway(x);
+ }
+}
+
+BENCHMARK(optimizerCanPowerReduceInner2NonTrivial, n) {
+ NonTrivialLong x(0);
+ for (NonTrivialLong i(0); i < n; ++i) {
+ makeUnpredictable(i);
+ for (NonTrivialLong j(0); j < 10000; ++j) {
+ x += i + j;
+ }
+ }
+ doNotOptimizeAway(x);
+}
+
+BENCHMARK(optimizerDisabled1NonTrivial, n) {
+ NonTrivialLong x(0);
+ for (NonTrivialLong i(0); i < n; ++i) {
+ for (NonTrivialLong j(0); j < 10000; ++j) {
+ x += i + j;
+ doNotOptimizeAway(x);
+ }
+ }
+}
+
+BENCHMARK(optimizerDisabled2NonTrivial, n) {
+ NonTrivialLong x(0);
+ for (NonTrivialLong i(0); i < n; ++i) {
+ makeUnpredictable(i);
+ for (NonTrivialLong j(0); j < 10000; ++j) {
+ makeUnpredictable(j);
+ x += i + j;
+ }
+ }
+ doNotOptimizeAway(x);
+}
+
+BENCHMARK(optimizerCanPowerReduceInner1NonTrivialPtr, n) {
+ NonTrivialLong x(0);
+ for (NonTrivialLong i(0); i < n; ++i) {
+ for (NonTrivialLong j(0); j < 10000; ++j) {
+ x += i + j;
+ }
+ doNotOptimizeAway(&x);
+ }
+}
+
+BENCHMARK(optimizerCanPowerReduceInner2NonTrivialPtr, n) {
+ NonTrivialLong x(0);
+ for (NonTrivialLong i(0); i < n; ++i) {
+ makeUnpredictable(i);
+ for (NonTrivialLong j(0); j < 10000; ++j) {
+ x += i + j;
+ }
+ }
+ doNotOptimizeAway(&x);
+}
+
+BENCHMARK(optimizerDisabled1NonTrivialPtr, n) {
+ NonTrivialLong x(0);
+ for (NonTrivialLong i(0); i < n; ++i) {
+ for (NonTrivialLong j(0); j < 10000; ++j) {
+ x += i + j;
+ doNotOptimizeAway(&x);
+ }
+ }
+}
+
+BENCHMARK_DRAW_LINE()
+
BENCHMARK(baselinevector) {
vector<int> v;