X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=folly%2FBenchmark.cpp;h=de379ec598367516f421c3c46aeb6af2c43b7307;hb=1509cebb0407aee1ca0a381af6f737d53aa2325b;hp=545cefccb574a08cc065b132a97980eaf7876647;hpb=efdc68945456c0c71a05d4a58cf8491454af3ce3;p=folly.git diff --git a/folly/Benchmark.cpp b/folly/Benchmark.cpp index 545cefcc..de379ec5 100644 --- a/folly/Benchmark.cpp +++ b/folly/Benchmark.cpp @@ -1,5 +1,5 @@ /* - * Copyright 2012 Facebook, Inc. + * Copyright 2014 Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,10 +16,10 @@ // @author Andrei Alexandrescu (andrei.alexandrescu@fb.com) -#include "Benchmark.h" -#include "Foreach.h" -#include "json.h" -#include "String.h" +#include +#include +#include +#include #include #include @@ -28,6 +28,7 @@ #include #include #include +#include using namespace std; @@ -40,6 +41,9 @@ DEFINE_string(bm_regex, "", DEFINE_int64(bm_min_usec, 100, "Minimum # of microseconds we'll accept for each benchmark."); +DEFINE_int64(bm_min_iters, 1, + "Minimum # of iterations we'll try for each benchmark."); + DEFINE_int32(bm_max_secs, 1, "Maximum # of seconds we'll spend on each benchmark."); @@ -48,17 +52,45 @@ namespace folly { BenchmarkSuspender::NanosecondsSpent BenchmarkSuspender::nsSpent; -typedef function BenchmarkFun; -static vector> benchmarks; +typedef function BenchmarkFun; + + +vector>& benchmarks() { + static vector> _benchmarks; + return _benchmarks; +} + +#define FB_FOLLY_GLOBAL_BENCHMARK_BASELINE fbFollyGlobalBenchmarkBaseline +#define FB_STRINGIZE_X2(x) FB_STRINGIZE(x) // Add the global baseline -BENCHMARK(globalBenchmarkBaseline) { +BENCHMARK(FB_FOLLY_GLOBAL_BENCHMARK_BASELINE) { +#ifdef _MSC_VER + _ReadWriteBarrier(); +#else asm volatile(""); +#endif +} + +int getGlobalBenchmarkBaselineIndex() { + const char *global = FB_STRINGIZE_X2(FB_FOLLY_GLOBAL_BENCHMARK_BASELINE); + auto it = std::find_if( + benchmarks().begin(), + benchmarks().end(), + [global](const tuple &v) { + return std::strcmp(get<1>(v), global) == 0; + } + ); + CHECK(it != benchmarks().end()); + return it - benchmarks().begin(); } +#undef FB_STRINGIZE_X2 +#undef FB_FOLLY_GLOBAL_BENCHMARK_BASELINE + void detail::addBenchmarkImpl(const char* file, const char* name, BenchmarkFun fun) { - benchmarks.emplace_back(file, name, std::move(fun)); + benchmarks().emplace_back(file, name, std::move(fun)); } /** @@ -207,14 +239,15 @@ static double runBenchmarkGetNSPerIteration(const BenchmarkFun& fun, // the clock resolution is worse than that, it will be larger. In // essence we're aiming at making the quantization noise 0.01%. static const auto minNanoseconds = - max(FLAGS_bm_min_usec * 1000UL, min(resolutionInNs * 100000, 1000000000UL)); + max(FLAGS_bm_min_usec * 1000UL, + min(resolutionInNs * 100000, 1000000000ULL)); // We do measurements in several epochs and take the minimum, to // account for jitter. static const unsigned int epochs = 1000; // We establish a total time budget as we don't want a measurement // to take too long. This will curtail the number of actual epochs. - const uint64_t timeBudgetInNs = FLAGS_bm_max_secs * 1000000000; + const uint64_t timeBudgetInNs = FLAGS_bm_max_secs * 1000000000ULL; timespec global; CHECK_EQ(0, clock_gettime(CLOCK_REALTIME, &global)); @@ -222,14 +255,15 @@ static double runBenchmarkGetNSPerIteration(const BenchmarkFun& fun, size_t actualEpochs = 0; for (; actualEpochs < epochs; ++actualEpochs) { - for (unsigned int n = 1; n < (1UL << 30); n *= 2) { - auto const nsecs = fun(n); - if (nsecs < minNanoseconds) { + for (unsigned int n = FLAGS_bm_min_iters; n < (1UL << 30); n *= 2) { + auto const nsecsAndIter = fun(n); + if (nsecsAndIter.first < minNanoseconds) { continue; } // We got an accurate enough timing, done. But only save if // smaller than the current result. - epochResults[actualEpochs] = max(0.0, double(nsecs) / n - globalBaseline); + epochResults[actualEpochs] = max(0.0, double(nsecsAndIter.first) / + nsecsAndIter.second - globalBaseline); // Done with the current epoch, we got a meaningful timing. break; } @@ -263,7 +297,7 @@ static const ScaleInfo kTimeSuffixes[] { { 1E-9, "ns" }, { 1E-12, "ps" }, { 1E-15, "fs" }, - { 0, NULL }, + { 0, nullptr }, }; static const ScaleInfo kMetricSuffixes[] { @@ -285,7 +319,7 @@ static const ScaleInfo kMetricSuffixes[] { { 1E-18, "a" }, // atto { 1E-21, "z" }, // zepto { 1E-24, "y" }, // yocto - { 0, NULL }, + { 0, nullptr }, }; static string humanReadable(double n, unsigned int decimals, @@ -296,7 +330,7 @@ static string humanReadable(double n, unsigned int decimals, const double absValue = fabs(n); const ScaleInfo* scale = scales; - while (absValue < scale[0].boundary && scale[1].suffix != NULL) { + while (absValue < scale[0].boundary && scale[1].suffix != nullptr) { ++scale; } @@ -315,12 +349,12 @@ static string metricReadable(double n, unsigned int decimals) { static void printBenchmarkResultsAsTable( const vector >& data) { // Width available - static const uint columns = 76; + static const unsigned int columns = 76; // Compute the longest benchmark name size_t longestName = 0; - FOR_EACH_RANGE (i, 1, benchmarks.size()) { - longestName = max(longestName, strlen(get<1>(benchmarks[i]))); + FOR_EACH_RANGE (i, 1, benchmarks().size()) { + longestName = max(longestName, strlen(get<1>(benchmarks()[i]))); } // Print a horizontal rule @@ -404,10 +438,10 @@ static void printBenchmarkResults( } void runBenchmarks() { - CHECK(!benchmarks.empty()); + CHECK(!benchmarks().empty()); vector> results; - results.reserve(benchmarks.size() - 1); + results.reserve(benchmarks().size() - 1); std::unique_ptr bmRegex; if (!FLAGS_bm_regex.empty()) { @@ -416,19 +450,24 @@ void runBenchmarks() { // PLEASE KEEP QUIET. MEASUREMENTS IN PROGRESS. - auto const globalBaseline = runBenchmarkGetNSPerIteration( - get<2>(benchmarks.front()), 0); - FOR_EACH_RANGE (i, 1, benchmarks.size()) { + unsigned int baselineIndex = getGlobalBenchmarkBaselineIndex(); + + auto const globalBaseline = + runBenchmarkGetNSPerIteration(get<2>(benchmarks()[baselineIndex]), 0); + FOR_EACH_RANGE (i, 0, benchmarks().size()) { + if (i == baselineIndex) { + continue; + } double elapsed = 0.0; - if (!strcmp(get<1>(benchmarks[i]), "-") == 0) { // skip separators - if (bmRegex && !boost::regex_search(get<1>(benchmarks[i]), *bmRegex)) { + if (strcmp(get<1>(benchmarks()[i]), "-") != 0) { // skip separators + if (bmRegex && !boost::regex_search(get<1>(benchmarks()[i]), *bmRegex)) { continue; } - elapsed = runBenchmarkGetNSPerIteration(get<2>(benchmarks[i]), + elapsed = runBenchmarkGetNSPerIteration(get<2>(benchmarks()[i]), globalBaseline); } - results.emplace_back(get<0>(benchmarks[i]), - get<1>(benchmarks[i]), elapsed); + results.emplace_back(get<0>(benchmarks()[i]), + get<1>(benchmarks()[i]), elapsed); } // PLEASE MAKE NOISE. MEASUREMENTS DONE.