From aed6ef6fd357f48e0ebe9025d12021f73b00f812 Mon Sep 17 00:00:00 2001
From: Marcus Holland-Moritz <mhx@fb.com>
Date: Fri, 6 Jun 2014 15:57:21 -0700
Subject: [PATCH] Add support for returning number of benchmark iterations

Summary:
I'm looping through a large number of test cases in a benchmark and I'm
interested in the average time per test case rather than the total time,
which is what I'm currently seeing. In order to get the average time,
I need to be able to tell the benchmark module how many iterations have
been run.

This change adds _MULTI variants of the different BENCHMARK_ macros that
allow for returning the actual number of iterations that have been run,
e.g.:

BENCHMARK_MULTI(benchmarkSomething) {
std::vector<int> testCases { 0, 1, 1, 2, 3, 5 };
for (int c : testCases) {
doSomething(c);
}
return testCases.size();
}

Test Plan:
* fbconfig -r folly && fbmake runtests
* added new test cases to example benchmark code

Reviewed By: simpkins@fb.com

Subscribers: folly@lists, london_search@

FB internal diff: D1356437
---
 folly/Benchmark.cpp          |   9 +--
 folly/Benchmark.h            | 120 +++++++++++++++++++++++++++++++----
 folly/gen/test/Bench.h       |   2 +
 folly/test/BenchmarkTest.cpp |  61 ++++++++++++++++++
 4 files changed, 177 insertions(+), 15 deletions(-)
diff --git a/folly/Benchmark.cpp b/folly/Benchmark.cpp
index dc08f506..62599708 100644
--- a/folly/Benchmark.cpp
+++ b/folly/Benchmark.cpp
@@ -51,7 +51,7 @@ namespace folly {
 
 BenchmarkSuspender::NanosecondsSpent BenchmarkSuspender::nsSpent;
 
-typedef function<uint64_t(unsigned int)> BenchmarkFun;
+typedef function<detail::TimeIterPair(unsigned int)> BenchmarkFun;
 static vector<tuple<const char*, const char*, BenchmarkFun>> benchmarks;
 
 // Add the global baseline
@@ -231,13 +231,14 @@ static double runBenchmarkGetNSPerIteration(const BenchmarkFun& fun,
 
   for (; actualEpochs < epochs; ++actualEpochs) {
     for (unsigned int n = FLAGS_bm_min_iters; n < (1UL << 30); n *= 2) {
-      auto const nsecs = fun(n);
-      if (nsecs < minNanoseconds) {
+      auto const nsecsAndIter = fun(n);
+      if (nsecsAndIter.first < minNanoseconds) {
         continue;
       }
       // We got an accurate enough timing, done. But only save if
       // smaller than the current result.
-      epochResults[actualEpochs] = max(0.0, double(nsecs) / n - globalBaseline);
+      epochResults[actualEpochs] = max(0.0, double(nsecsAndIter.first) /
+                                       nsecsAndIter.second - globalBaseline);
       // Done with the current epoch, we got a meaningful timing.
       break;
     }
diff --git a/folly/Benchmark.h b/folly/Benchmark.h
index 3465fa1c..0ca2b167 100644
--- a/folly/Benchmark.h
+++ b/folly/Benchmark.h
@@ -56,13 +56,15 @@ namespace detail {
  */
 enum Clock { DEFAULT_CLOCK_ID = CLOCK_REALTIME };
 
+typedef std::pair<uint64_t, unsigned int> TimeIterPair;
+
 /**
  * Adds a benchmark wrapped in a std::function. Only used
  * internally. Pass by value is intentional.
  */
 void addBenchmarkImpl(const char* file,
                       const char* name,
-                      std::function<uint64_t(unsigned int)>);
+                      std::function<TimeIterPair(unsigned int)>);
 
 /**
  * Takes the difference between two timespec values. end is assumed to
@@ -185,24 +187,27 @@ typename std::enable_if<
   == 2
 >::type
 addBenchmark(const char* file, const char* name, Lambda&& lambda) {
-  auto execute = [=](unsigned int times) -> uint64_t {
+  auto execute = [=](unsigned int times) {
     BenchmarkSuspender::nsSpent = 0;
     timespec start, end;
+    unsigned int niter;
 
     // CORE MEASUREMENT STARTS
     auto const r1 = clock_gettime(detail::DEFAULT_CLOCK_ID, &start);
-    lambda(times);
+    niter = lambda(times);
     auto const r2 = clock_gettime(detail::DEFAULT_CLOCK_ID, &end);
     // CORE MEASUREMENT ENDS
 
     CHECK_EQ(0, r1);
     CHECK_EQ(0, r2);
 
-    return detail::timespecDiff(end, start) - BenchmarkSuspender::nsSpent;
+    return detail::TimeIterPair(
+      detail::timespecDiff(end, start) - BenchmarkSuspender::nsSpent,
+      niter);
   };
 
   detail::addBenchmarkImpl(file, name,
-                           std::function<uint64_t(unsigned int)>(execute));
+    std::function<detail::TimeIterPair(unsigned int)>(execute));
 }
 
 /**
@@ -218,9 +223,11 @@ typename std::enable_if<
 >::type
 addBenchmark(const char* file, const char* name, Lambda&& lambda) {
   addBenchmark(file, name, [=](unsigned int times) {
+      unsigned int niter = 0;
       while (times-- > 0) {
-        lambda();
+        niter += lambda();
       }
+      return niter;
     });
 }
 
@@ -254,14 +261,28 @@ void doNotOptimizeAway(T&& datum) {
  * Introduces a benchmark function. Used internally, see BENCHMARK and
  * friends below.
  */
-#define BENCHMARK_IMPL(funName, stringName, paramType, paramName)       \
+#define BENCHMARK_IMPL(funName, stringName, rv, paramType, paramName)   \
   static void funName(paramType);                                       \
   static bool FB_ANONYMOUS_VARIABLE(follyBenchmarkUnused) = (           \
     ::folly::addBenchmark(__FILE__, stringName,                         \
-      [](paramType paramName) { funName(paramName); }),                 \
+      [](paramType paramName) -> unsigned { funName(paramName);         \
+                                            return rv; }),              \
     true);                                                              \
   static void funName(paramType paramName)
 
+/**
+ * Introduces a benchmark function with support for returning the actual
+ * number of iterations. Used internally, see BENCHMARK_MULTI and friends
+ * below.
+ */
+#define BENCHMARK_MULTI_IMPL(funName, stringName, paramType, paramName) \
+  static unsigned funName(paramType);                                   \
+  static bool FB_ANONYMOUS_VARIABLE(follyBenchmarkUnused) = (           \
+    ::folly::addBenchmark(__FILE__, stringName,                         \
+      [](paramType paramName) { return funName(paramName); }),          \
+    true);                                                              \
+  static unsigned funName(paramType paramName)
+
 /**
  * Introduces a benchmark function. Use with either one one or two
  * arguments. The first is the name of the benchmark. Use something
@@ -283,6 +304,29 @@ void doNotOptimizeAway(T&& datum) {
  */
 #define BENCHMARK(name, ...)                                    \
   BENCHMARK_IMPL(                                               \
+    name,                                                       \
+    FB_STRINGIZE(name),                                         \
+    FB_ARG_2_OR_1(1, ## __VA_ARGS__),                           \
+    FB_ONE_OR_NONE(unsigned, ## __VA_ARGS__),                   \
+    __VA_ARGS__)
+
+/**
+ * Like BENCHMARK above, but allows the user to return the actual
+ * number of iterations executed in the function body. This can be
+ * useful if the benchmark function doesn't know upfront how many
+ * iterations it's going to run or if it runs through a certain
+ * number of test cases, e.g.:
+ *
+ * BENCHMARK_MULTI(benchmarkSomething) {
+ *   std::vector<int> testCases { 0, 1, 1, 2, 3, 5 };
+ *   for (int c : testCases) {
+ *     doSomething(c);
+ *   }
+ *   return testCases.size();
+ * }
+ */
+#define BENCHMARK_MULTI(name, ...)                              \
+  BENCHMARK_MULTI_IMPL(                                         \
     name,                                                       \
     FB_STRINGIZE(name),                                         \
     FB_ONE_OR_NONE(unsigned, ## __VA_ARGS__),                   \
@@ -313,6 +357,13 @@ void doNotOptimizeAway(T&& datum) {
 #define BENCHMARK_PARAM(name, param)                                    \
   BENCHMARK_NAMED_PARAM(name, param, param)
 
+/**
+ * Same as BENCHMARK_PARAM, but allows to return the actual number of
+ * iterations that have been run.
+ */
+#define BENCHMARK_PARAM_MULTI(name, param)                              \
+  BENCHMARK_NAMED_PARAM_MULTI(name, param, param)
+
 /*
  * Like BENCHMARK_PARAM(), but allows a custom name to be specified for each
  * parameter, rather than using the parameter value.
@@ -340,11 +391,25 @@ void doNotOptimizeAway(T&& datum) {
   BENCHMARK_IMPL(                                                       \
       FB_CONCATENATE(name, FB_CONCATENATE(_, param_name)),              \
       FB_STRINGIZE(name) "(" FB_STRINGIZE(param_name) ")",              \
+      iters,                                                            \
       unsigned,                                                         \
       iters) {                                                          \
     name(iters, ## __VA_ARGS__);                                        \
   }
 
+/**
+ * Same as BENCHMARK_NAMED_PARAM, but allows to return the actual number
+ * of iterations that have been run.
+ */
+#define BENCHMARK_NAMED_PARAM_MULTI(name, param_name, ...)              \
+  BENCHMARK_MULTI_IMPL(                                                 \
+      FB_CONCATENATE(name, FB_CONCATENATE(_, param_name)),              \
+      FB_STRINGIZE(name) "(" FB_STRINGIZE(param_name) ")",              \
+      unsigned,                                                         \
+      iters) {                                                          \
+    return name(iters, ## __VA_ARGS__);                                 \
+  }
+
 /**
  * Just like BENCHMARK, but prints the time relative to a
  * baseline. The baseline is the most recent BENCHMARK() seen in
@@ -371,6 +436,18 @@ void doNotOptimizeAway(T&& datum) {
  */
 #define BENCHMARK_RELATIVE(name, ...)                           \
   BENCHMARK_IMPL(                                               \
+    name,                                                       \
+    "%" FB_STRINGIZE(name),                                     \
+    FB_ARG_2_OR_1(1, ## __VA_ARGS__),                           \
+    FB_ONE_OR_NONE(unsigned, ## __VA_ARGS__),                   \
+    __VA_ARGS__)
+
+/**
+ * Same as BENCHMARK_RELATIVE, but allows to return the actual number
+ * of iterations that have been run.
+ */
+#define BENCHMARK_RELATIVE_MULTI(name, ...)                     \
+  BENCHMARK_MULTI_IMPL(                                         \
     name,                                                       \
     "%" FB_STRINGIZE(name),                                     \
     FB_ONE_OR_NONE(unsigned, ## __VA_ARGS__),                   \
@@ -382,6 +459,13 @@ void doNotOptimizeAway(T&& datum) {
 #define BENCHMARK_RELATIVE_PARAM(name, param)                           \
   BENCHMARK_RELATIVE_NAMED_PARAM(name, param, param)
 
+/**
+ * Same as BENCHMARK_RELATIVE_PARAM, but allows to return the actual
+ * number of iterations that have been run.
+ */
+#define BENCHMARK_RELATIVE_PARAM_MULTI(name, param)                     \
+  BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(name, param, param)
+
 /**
  * A combination of BENCHMARK_RELATIVE and BENCHMARK_NAMED_PARAM.
  */
@@ -389,17 +473,31 @@ void doNotOptimizeAway(T&& datum) {
   BENCHMARK_IMPL(                                                       \
       FB_CONCATENATE(name, FB_CONCATENATE(_, param_name)),              \
       "%" FB_STRINGIZE(name) "(" FB_STRINGIZE(param_name) ")",          \
+      iters,                                                            \
       unsigned,                                                         \
       iters) {                                                          \
     name(iters, ## __VA_ARGS__);                                        \
   }
 
+/**
+ * Same as BENCHMARK_RELATIVE_NAMED_PARAM, but allows to return the
+ * actual number of iterations that have been run.
+ */
+#define BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(name, param_name, ...)     \
+  BENCHMARK_MULTI_IMPL(                                                 \
+      FB_CONCATENATE(name, FB_CONCATENATE(_, param_name)),              \
+      "%" FB_STRINGIZE(name) "(" FB_STRINGIZE(param_name) ")",          \
+      unsigned,                                                         \
+      iters) {                                                          \
+    return name(iters, ## __VA_ARGS__);                                 \
+  }
+
 /**
  * Draws a line of dashes.
  */
-#define BENCHMARK_DRAW_LINE()                                   \
-  static bool FB_ANONYMOUS_VARIABLE(follyBenchmarkUnused) = (   \
-    ::folly::addBenchmark(__FILE__, "-", []() { }),             \
+#define BENCHMARK_DRAW_LINE()                                             \
+  static bool FB_ANONYMOUS_VARIABLE(follyBenchmarkUnused) = (             \
+    ::folly::addBenchmark(__FILE__, "-", []() -> unsigned { return 0; }), \
     true);
 
 /**
diff --git a/folly/gen/test/Bench.h b/folly/gen/test/Bench.h
index be91357f..31c00258 100644
--- a/folly/gen/test/Bench.h
+++ b/folly/gen/test/Bench.h
@@ -23,9 +23,11 @@
 static bool FB_ANONYMOUS_VARIABLE(benchGen) = (             \
   ::folly::addBenchmark(__FILE__, prefix FB_STRINGIZE(gen), \
     [](unsigned iters){                                     \
+      const unsigned num = iters;                           \
       while (iters--) {                                     \
         folly::doNotOptimizeAway(gen);                      \
       }                                                     \
+      return num;                                           \
     }), true)
 #define BENCH_GEN(gen) BENCH_GEN_IMPL(gen, "")
 #define BENCH_GEN_REL(gen) BENCH_GEN_IMPL(gen, "%")
diff --git a/folly/test/BenchmarkTest.cpp b/folly/test/BenchmarkTest.cpp
index 9bd00159..317df615 100644
--- a/folly/test/BenchmarkTest.cpp
+++ b/folly/test/BenchmarkTest.cpp
@@ -67,6 +67,67 @@ BENCHMARK(superslow) {
   sleep(1);
 }
 
+BENCHMARK_DRAW_LINE()
+
+BENCHMARK(noMulti) {
+  fun();
+}
+
+BENCHMARK_MULTI(multiSimple) {
+  FOR_EACH_RANGE (i, 0, 10) {
+    fun();
+  }
+  return 10;
+}
+
+BENCHMARK_RELATIVE_MULTI(multiSimpleRel) {
+  FOR_EACH_RANGE (i, 0, 10) {
+    fun();
+    fun();
+  }
+  return 10;
+}
+
+BENCHMARK_MULTI(multiIterArgs, iter) {
+  FOR_EACH_RANGE (i, 0, 10 * iter) {
+    fun();
+  }
+  return 10 * iter;
+}
+
+BENCHMARK_RELATIVE_MULTI(multiIterArgsRel, iter) {
+  FOR_EACH_RANGE (i, 0, 10 * iter) {
+    fun();
+    fun();
+  }
+  return 10 * iter;
+}
+
+unsigned paramMulti(unsigned iter, unsigned num) {
+  for (unsigned i = 0; i < iter; ++i) {
+    for (unsigned j = 0; j < num; ++j) {
+      fun();
+    }
+  }
+  return num * iter;
+}
+
+unsigned paramMultiRel(unsigned iter, unsigned num) {
+  for (unsigned i = 0; i < iter; ++i) {
+    for (unsigned j = 0; j < num; ++j) {
+      fun();
+      fun();
+    }
+  }
+  return num * iter;
+}
+
+BENCHMARK_PARAM_MULTI(paramMulti, 1);
+BENCHMARK_RELATIVE_PARAM_MULTI(paramMultiRel, 1);
+
+BENCHMARK_PARAM_MULTI(paramMulti, 5);
+BENCHMARK_RELATIVE_PARAM_MULTI(paramMultiRel, 5);
+
 int main(int argc, char** argv) {
   google::ParseCommandLineFlags(&argc, &argv, true);
   runBenchmarks();
-- 
2.34.1