From aed6ef6fd357f48e0ebe9025d12021f73b00f812 Mon Sep 17 00:00:00 2001 From: Marcus Holland-Moritz <mhx@fb.com> Date: Fri, 6 Jun 2014 15:57:21 -0700 Subject: [PATCH] Add support for returning number of benchmark iterations Summary: I'm looping through a large number of test cases in a benchmark and I'm interested in the average time per test case rather than the total time, which is what I'm currently seeing. In order to get the average time, I need to be able to tell the benchmark module how many iterations have been run. This change adds _MULTI variants of the different BENCHMARK_ macros that allow for returning the actual number of iterations that have been run, e.g.: BENCHMARK_MULTI(benchmarkSomething) { std::vector<int> testCases { 0, 1, 1, 2, 3, 5 }; for (int c : testCases) { doSomething(c); } return testCases.size(); } Test Plan: * fbconfig -r folly && fbmake runtests * added new test cases to example benchmark code Reviewed By: simpkins@fb.com Subscribers: folly@lists, london_search@ FB internal diff: D1356437 --- folly/Benchmark.cpp | 9 +-- folly/Benchmark.h | 120 +++++++++++++++++++++++++++++++---- folly/gen/test/Bench.h | 2 + folly/test/BenchmarkTest.cpp | 61 ++++++++++++++++++ 4 files changed, 177 insertions(+), 15 deletions(-) diff --git a/folly/Benchmark.cpp b/folly/Benchmark.cpp index dc08f506..62599708 100644 --- a/folly/Benchmark.cpp +++ b/folly/Benchmark.cpp @@ -51,7 +51,7 @@ namespace folly { BenchmarkSuspender::NanosecondsSpent BenchmarkSuspender::nsSpent; -typedef function<uint64_t(unsigned int)> BenchmarkFun; +typedef function<detail::TimeIterPair(unsigned int)> BenchmarkFun; static vector<tuple<const char*, const char*, BenchmarkFun>> benchmarks; // Add the global baseline @@ -231,13 +231,14 @@ static double runBenchmarkGetNSPerIteration(const BenchmarkFun& fun, for (; actualEpochs < epochs; ++actualEpochs) { for (unsigned int n = FLAGS_bm_min_iters; n < (1UL << 30); n *= 2) { - auto const nsecs = fun(n); - if (nsecs < minNanoseconds) { + auto const nsecsAndIter = fun(n); + if (nsecsAndIter.first < minNanoseconds) { continue; } // We got an accurate enough timing, done. But only save if // smaller than the current result. - epochResults[actualEpochs] = max(0.0, double(nsecs) / n - globalBaseline); + epochResults[actualEpochs] = max(0.0, double(nsecsAndIter.first) / + nsecsAndIter.second - globalBaseline); // Done with the current epoch, we got a meaningful timing. break; } diff --git a/folly/Benchmark.h b/folly/Benchmark.h index 3465fa1c..0ca2b167 100644 --- a/folly/Benchmark.h +++ b/folly/Benchmark.h @@ -56,13 +56,15 @@ namespace detail { */ enum Clock { DEFAULT_CLOCK_ID = CLOCK_REALTIME }; +typedef std::pair<uint64_t, unsigned int> TimeIterPair; + /** * Adds a benchmark wrapped in a std::function. Only used * internally. Pass by value is intentional. */ void addBenchmarkImpl(const char* file, const char* name, - std::function<uint64_t(unsigned int)>); + std::function<TimeIterPair(unsigned int)>); /** * Takes the difference between two timespec values. end is assumed to @@ -185,24 +187,27 @@ typename std::enable_if< == 2 >::type addBenchmark(const char* file, const char* name, Lambda&& lambda) { - auto execute = [=](unsigned int times) -> uint64_t { + auto execute = [=](unsigned int times) { BenchmarkSuspender::nsSpent = 0; timespec start, end; + unsigned int niter; // CORE MEASUREMENT STARTS auto const r1 = clock_gettime(detail::DEFAULT_CLOCK_ID, &start); - lambda(times); + niter = lambda(times); auto const r2 = clock_gettime(detail::DEFAULT_CLOCK_ID, &end); // CORE MEASUREMENT ENDS CHECK_EQ(0, r1); CHECK_EQ(0, r2); - return detail::timespecDiff(end, start) - BenchmarkSuspender::nsSpent; + return detail::TimeIterPair( + detail::timespecDiff(end, start) - BenchmarkSuspender::nsSpent, + niter); }; detail::addBenchmarkImpl(file, name, - std::function<uint64_t(unsigned int)>(execute)); + std::function<detail::TimeIterPair(unsigned int)>(execute)); } /** @@ -218,9 +223,11 @@ typename std::enable_if< >::type addBenchmark(const char* file, const char* name, Lambda&& lambda) { addBenchmark(file, name, [=](unsigned int times) { + unsigned int niter = 0; while (times-- > 0) { - lambda(); + niter += lambda(); } + return niter; }); } @@ -254,14 +261,28 @@ void doNotOptimizeAway(T&& datum) { * Introduces a benchmark function. Used internally, see BENCHMARK and * friends below. */ -#define BENCHMARK_IMPL(funName, stringName, paramType, paramName) \ +#define BENCHMARK_IMPL(funName, stringName, rv, paramType, paramName) \ static void funName(paramType); \ static bool FB_ANONYMOUS_VARIABLE(follyBenchmarkUnused) = ( \ ::folly::addBenchmark(__FILE__, stringName, \ - [](paramType paramName) { funName(paramName); }), \ + [](paramType paramName) -> unsigned { funName(paramName); \ + return rv; }), \ true); \ static void funName(paramType paramName) +/** + * Introduces a benchmark function with support for returning the actual + * number of iterations. Used internally, see BENCHMARK_MULTI and friends + * below. + */ +#define BENCHMARK_MULTI_IMPL(funName, stringName, paramType, paramName) \ + static unsigned funName(paramType); \ + static bool FB_ANONYMOUS_VARIABLE(follyBenchmarkUnused) = ( \ + ::folly::addBenchmark(__FILE__, stringName, \ + [](paramType paramName) { return funName(paramName); }), \ + true); \ + static unsigned funName(paramType paramName) + /** * Introduces a benchmark function. Use with either one one or two * arguments. The first is the name of the benchmark. Use something @@ -283,6 +304,29 @@ void doNotOptimizeAway(T&& datum) { */ #define BENCHMARK(name, ...) \ BENCHMARK_IMPL( \ + name, \ + FB_STRINGIZE(name), \ + FB_ARG_2_OR_1(1, ## __VA_ARGS__), \ + FB_ONE_OR_NONE(unsigned, ## __VA_ARGS__), \ + __VA_ARGS__) + +/** + * Like BENCHMARK above, but allows the user to return the actual + * number of iterations executed in the function body. This can be + * useful if the benchmark function doesn't know upfront how many + * iterations it's going to run or if it runs through a certain + * number of test cases, e.g.: + * + * BENCHMARK_MULTI(benchmarkSomething) { + * std::vector<int> testCases { 0, 1, 1, 2, 3, 5 }; + * for (int c : testCases) { + * doSomething(c); + * } + * return testCases.size(); + * } + */ +#define BENCHMARK_MULTI(name, ...) \ + BENCHMARK_MULTI_IMPL( \ name, \ FB_STRINGIZE(name), \ FB_ONE_OR_NONE(unsigned, ## __VA_ARGS__), \ @@ -313,6 +357,13 @@ void doNotOptimizeAway(T&& datum) { #define BENCHMARK_PARAM(name, param) \ BENCHMARK_NAMED_PARAM(name, param, param) +/** + * Same as BENCHMARK_PARAM, but allows to return the actual number of + * iterations that have been run. + */ +#define BENCHMARK_PARAM_MULTI(name, param) \ + BENCHMARK_NAMED_PARAM_MULTI(name, param, param) + /* * Like BENCHMARK_PARAM(), but allows a custom name to be specified for each * parameter, rather than using the parameter value. @@ -340,11 +391,25 @@ void doNotOptimizeAway(T&& datum) { BENCHMARK_IMPL( \ FB_CONCATENATE(name, FB_CONCATENATE(_, param_name)), \ FB_STRINGIZE(name) "(" FB_STRINGIZE(param_name) ")", \ + iters, \ unsigned, \ iters) { \ name(iters, ## __VA_ARGS__); \ } +/** + * Same as BENCHMARK_NAMED_PARAM, but allows to return the actual number + * of iterations that have been run. + */ +#define BENCHMARK_NAMED_PARAM_MULTI(name, param_name, ...) \ + BENCHMARK_MULTI_IMPL( \ + FB_CONCATENATE(name, FB_CONCATENATE(_, param_name)), \ + FB_STRINGIZE(name) "(" FB_STRINGIZE(param_name) ")", \ + unsigned, \ + iters) { \ + return name(iters, ## __VA_ARGS__); \ + } + /** * Just like BENCHMARK, but prints the time relative to a * baseline. The baseline is the most recent BENCHMARK() seen in @@ -371,6 +436,18 @@ void doNotOptimizeAway(T&& datum) { */ #define BENCHMARK_RELATIVE(name, ...) \ BENCHMARK_IMPL( \ + name, \ + "%" FB_STRINGIZE(name), \ + FB_ARG_2_OR_1(1, ## __VA_ARGS__), \ + FB_ONE_OR_NONE(unsigned, ## __VA_ARGS__), \ + __VA_ARGS__) + +/** + * Same as BENCHMARK_RELATIVE, but allows to return the actual number + * of iterations that have been run. + */ +#define BENCHMARK_RELATIVE_MULTI(name, ...) \ + BENCHMARK_MULTI_IMPL( \ name, \ "%" FB_STRINGIZE(name), \ FB_ONE_OR_NONE(unsigned, ## __VA_ARGS__), \ @@ -382,6 +459,13 @@ void doNotOptimizeAway(T&& datum) { #define BENCHMARK_RELATIVE_PARAM(name, param) \ BENCHMARK_RELATIVE_NAMED_PARAM(name, param, param) +/** + * Same as BENCHMARK_RELATIVE_PARAM, but allows to return the actual + * number of iterations that have been run. + */ +#define BENCHMARK_RELATIVE_PARAM_MULTI(name, param) \ + BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(name, param, param) + /** * A combination of BENCHMARK_RELATIVE and BENCHMARK_NAMED_PARAM. */ @@ -389,17 +473,31 @@ void doNotOptimizeAway(T&& datum) { BENCHMARK_IMPL( \ FB_CONCATENATE(name, FB_CONCATENATE(_, param_name)), \ "%" FB_STRINGIZE(name) "(" FB_STRINGIZE(param_name) ")", \ + iters, \ unsigned, \ iters) { \ name(iters, ## __VA_ARGS__); \ } +/** + * Same as BENCHMARK_RELATIVE_NAMED_PARAM, but allows to return the + * actual number of iterations that have been run. + */ +#define BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(name, param_name, ...) \ + BENCHMARK_MULTI_IMPL( \ + FB_CONCATENATE(name, FB_CONCATENATE(_, param_name)), \ + "%" FB_STRINGIZE(name) "(" FB_STRINGIZE(param_name) ")", \ + unsigned, \ + iters) { \ + return name(iters, ## __VA_ARGS__); \ + } + /** * Draws a line of dashes. */ -#define BENCHMARK_DRAW_LINE() \ - static bool FB_ANONYMOUS_VARIABLE(follyBenchmarkUnused) = ( \ - ::folly::addBenchmark(__FILE__, "-", []() { }), \ +#define BENCHMARK_DRAW_LINE() \ + static bool FB_ANONYMOUS_VARIABLE(follyBenchmarkUnused) = ( \ + ::folly::addBenchmark(__FILE__, "-", []() -> unsigned { return 0; }), \ true); /** diff --git a/folly/gen/test/Bench.h b/folly/gen/test/Bench.h index be91357f..31c00258 100644 --- a/folly/gen/test/Bench.h +++ b/folly/gen/test/Bench.h @@ -23,9 +23,11 @@ static bool FB_ANONYMOUS_VARIABLE(benchGen) = ( \ ::folly::addBenchmark(__FILE__, prefix FB_STRINGIZE(gen), \ [](unsigned iters){ \ + const unsigned num = iters; \ while (iters--) { \ folly::doNotOptimizeAway(gen); \ } \ + return num; \ }), true) #define BENCH_GEN(gen) BENCH_GEN_IMPL(gen, "") #define BENCH_GEN_REL(gen) BENCH_GEN_IMPL(gen, "%") diff --git a/folly/test/BenchmarkTest.cpp b/folly/test/BenchmarkTest.cpp index 9bd00159..317df615 100644 --- a/folly/test/BenchmarkTest.cpp +++ b/folly/test/BenchmarkTest.cpp @@ -67,6 +67,67 @@ BENCHMARK(superslow) { sleep(1); } +BENCHMARK_DRAW_LINE() + +BENCHMARK(noMulti) { + fun(); +} + +BENCHMARK_MULTI(multiSimple) { + FOR_EACH_RANGE (i, 0, 10) { + fun(); + } + return 10; +} + +BENCHMARK_RELATIVE_MULTI(multiSimpleRel) { + FOR_EACH_RANGE (i, 0, 10) { + fun(); + fun(); + } + return 10; +} + +BENCHMARK_MULTI(multiIterArgs, iter) { + FOR_EACH_RANGE (i, 0, 10 * iter) { + fun(); + } + return 10 * iter; +} + +BENCHMARK_RELATIVE_MULTI(multiIterArgsRel, iter) { + FOR_EACH_RANGE (i, 0, 10 * iter) { + fun(); + fun(); + } + return 10 * iter; +} + +unsigned paramMulti(unsigned iter, unsigned num) { + for (unsigned i = 0; i < iter; ++i) { + for (unsigned j = 0; j < num; ++j) { + fun(); + } + } + return num * iter; +} + +unsigned paramMultiRel(unsigned iter, unsigned num) { + for (unsigned i = 0; i < iter; ++i) { + for (unsigned j = 0; j < num; ++j) { + fun(); + fun(); + } + } + return num * iter; +} + +BENCHMARK_PARAM_MULTI(paramMulti, 1); +BENCHMARK_RELATIVE_PARAM_MULTI(paramMultiRel, 1); + +BENCHMARK_PARAM_MULTI(paramMulti, 5); +BENCHMARK_RELATIVE_PARAM_MULTI(paramMultiRel, 5); + int main(int argc, char** argv) { google::ParseCommandLineFlags(&argc, &argv, true); runBenchmarks(); -- 2.34.1