X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;ds=sidebyside;f=folly%2FBenchmark.cpp;h=940fe91c27e81bdba1fd883fff220ef9b4c77b27;hb=0606460a10e0e41f9be09552d771ea1ee327f8ee;hp=aa83804b98e63960950230c1636764bfc6171f7e;hpb=2e76cb012388630c6e167e9ea50395d0ef597e76;p=folly.git diff --git a/folly/Benchmark.cpp b/folly/Benchmark.cpp index aa83804b..940fe91c 100644 --- a/folly/Benchmark.cpp +++ b/folly/Benchmark.cpp @@ -1,5 +1,5 @@ /* - * Copyright 2012 Facebook, Inc. + * Copyright 2014 Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,11 +16,13 @@ // @author Andrei Alexandrescu (andrei.alexandrescu@fb.com) -#include "Benchmark.h" -#include "Foreach.h" -#include "json.h" -#include "String.h" +#include +#include +#include +#include + #include +#include #include #include #include @@ -32,16 +34,33 @@ using namespace std; DEFINE_bool(benchmark, false, "Run benchmarks."); DEFINE_bool(json, false, "Output in JSON format."); +DEFINE_string(bm_regex, "", + "Only benchmarks whose names match this regex will be run."); + +DEFINE_int64(bm_min_usec, 100, + "Minimum # of microseconds we'll accept for each benchmark."); + +DEFINE_int64(bm_min_iters, 1, + "Minimum # of iterations we'll try for each benchmark."); + +DEFINE_int32(bm_max_secs, 1, + "Maximum # of seconds we'll spend on each benchmark."); + + namespace folly { BenchmarkSuspender::NanosecondsSpent BenchmarkSuspender::nsSpent; -typedef function BenchmarkFun; +typedef function BenchmarkFun; static vector> benchmarks; // Add the global baseline BENCHMARK(globalBenchmarkBaseline) { +#ifdef _MSC_VER + _ReadWriteBarrier(); +#else asm volatile(""); +#endif } void detail::addBenchmarkImpl(const char* file, const char* name, @@ -191,17 +210,19 @@ static double runBenchmarkGetNSPerIteration(const BenchmarkFun& fun, CHECK_EQ(1, ts.tv_nsec) << "Clock too coarse, upgrade your kernel."; resolutionInNs = ts.tv_nsec; } - // Whe choose a minimum minimum (sic) of 10,000 nanoseconds, but if + // We choose a minimum minimum (sic) of 100,000 nanoseconds, but if // the clock resolution is worse than that, it will be larger. In // essence we're aiming at making the quantization noise 0.01%. - static const auto minNanoseconds = min(resolutionInNs * 100000, 1000000000UL); + static const auto minNanoseconds = + max(FLAGS_bm_min_usec * 1000UL, + min(resolutionInNs * 100000, 1000000000ULL)); // We do measurements in several epochs and take the minimum, to // account for jitter. static const unsigned int epochs = 1000; // We establish a total time budget as we don't want a measurement // to take too long. This will curtail the number of actual epochs. - static const uint64_t timeBudgetInNs = 1000000000; + const uint64_t timeBudgetInNs = FLAGS_bm_max_secs * 1000000000; timespec global; CHECK_EQ(0, clock_gettime(CLOCK_REALTIME, &global)); @@ -209,14 +230,15 @@ static double runBenchmarkGetNSPerIteration(const BenchmarkFun& fun, size_t actualEpochs = 0; for (; actualEpochs < epochs; ++actualEpochs) { - for (unsigned int n = 1; n < (1U << 30); n *= 2) { - auto const nsecs = fun(n); - if (nsecs < minNanoseconds) { + for (unsigned int n = FLAGS_bm_min_iters; n < (1UL << 30); n *= 2) { + auto const nsecsAndIter = fun(n); + if (nsecsAndIter.first < minNanoseconds) { continue; } // We got an accurate enough timing, done. But only save if // smaller than the current result. - epochResults[actualEpochs] = max(0.0, double(nsecs) / n - globalBaseline); + epochResults[actualEpochs] = max(0.0, double(nsecsAndIter.first) / + nsecsAndIter.second - globalBaseline); // Done with the current epoch, we got a meaningful timing. break; } @@ -234,74 +256,75 @@ static double runBenchmarkGetNSPerIteration(const BenchmarkFun& fun, return max(0.0, estimateTime(epochResults, epochResults + actualEpochs)); } -static string humanReadable(double n, unsigned int decimals) { - auto a = fabs(n); - char suffix = ' '; - - if (a >= 1E21) { - // Too big to be comprehended by the puny human brain - suffix = '!'; - n /= 1E21; - } else if (a >= 1E18) { - // "EXA" written with suffix 'X' so as to not create confusion - // with scientific notation. - suffix = 'X'; - n /= 1E18; - } else if (a >= 1E15) { - // "PETA" - suffix = 'P'; - n /= 1E15; - } else if (a >= 1E12) { - // "TERA" - suffix = 'T'; - n /= 1E12; - } else if (a >= 1E9) { - // "GIGA" - suffix = 'G'; - n /= 1E9; - } else if (a >= 1E6) { - // "MEGA" - suffix = 'M'; - n /= 1E6; - } else if (a >= 1E3) { - // "KILO" - suffix = 'K'; - n /= 1E3; - } else if (a == 0.0) { - suffix = ' '; - } else if (a < 1E-15) { - // too small - suffix = '?'; - n *= 1E18; - } else if (a < 1E-12) { - // "femto" - suffix = 'f'; - n *= 1E15; - } else if (a < 1E-9) { - // "pico" - suffix = 'p'; - n *= 1E12; - } else if (a < 1E-6) { - // "nano" - suffix = 'n'; - n *= 1E9; - } else if (a < 1E-3) { - // "micro" - suffix = 'u'; - n *= 1E6; - } else if (a < 1) { - // "mili" - suffix = 'm'; - n *= 1E3; +struct ScaleInfo { + double boundary; + const char* suffix; +}; + +static const ScaleInfo kTimeSuffixes[] { + { 365.25 * 24 * 3600, "years" }, + { 24 * 3600, "days" }, + { 3600, "hr" }, + { 60, "min" }, + { 1, "s" }, + { 1E-3, "ms" }, + { 1E-6, "us" }, + { 1E-9, "ns" }, + { 1E-12, "ps" }, + { 1E-15, "fs" }, + { 0, nullptr }, +}; + +static const ScaleInfo kMetricSuffixes[] { + { 1E24, "Y" }, // yotta + { 1E21, "Z" }, // zetta + { 1E18, "X" }, // "exa" written with suffix 'X' so as to not create + // confusion with scientific notation + { 1E15, "P" }, // peta + { 1E12, "T" }, // terra + { 1E9, "G" }, // giga + { 1E6, "M" }, // mega + { 1E3, "K" }, // kilo + { 1, "" }, + { 1E-3, "m" }, // milli + { 1E-6, "u" }, // micro + { 1E-9, "n" }, // nano + { 1E-12, "p" }, // pico + { 1E-15, "f" }, // femto + { 1E-18, "a" }, // atto + { 1E-21, "z" }, // zepto + { 1E-24, "y" }, // yocto + { 0, nullptr }, +}; + +static string humanReadable(double n, unsigned int decimals, + const ScaleInfo* scales) { + if (std::isinf(n) || std::isnan(n)) { + return folly::to(n); + } + + const double absValue = fabs(n); + const ScaleInfo* scale = scales; + while (absValue < scale[0].boundary && scale[1].suffix != nullptr) { + ++scale; } - return stringPrintf("%*.*f%c", decimals + 3 + 1, decimals, n, suffix); + const double scaledValue = n / scale->boundary; + return stringPrintf("%.*f%s", decimals, scaledValue, scale->suffix); +} + +static string readableTime(double n, unsigned int decimals) { + return humanReadable(n, decimals, kTimeSuffixes); +} + +static string metricReadable(double n, unsigned int decimals) { + return humanReadable(n, decimals, kMetricSuffixes); } static void printBenchmarkResultsAsTable( const vector >& data) { // Width available - static const uint columns = 76; + static const unsigned int columns = 76; // Compute the longest benchmark name size_t longestName = 0; @@ -317,8 +340,8 @@ static void printBenchmarkResultsAsTable( // Print header for a file auto header = [&](const char* file) { separator('='); - printf("%-*srelative ns/iter iters/s\n", - columns - 26, file); + printf("%-*srelative time/iter iters/s\n", + columns - 28, file); separator('='); }; @@ -346,23 +369,24 @@ static void printBenchmarkResultsAsTable( baselineNsPerIter = get<2>(datum); useBaseline = false; } - s.resize(columns - 27, ' '); + s.resize(columns - 29, ' '); auto nsPerIter = get<2>(datum); - auto itersPerSec = 1E9 / nsPerIter; + auto secPerIter = nsPerIter / 1E9; + auto itersPerSec = 1 / secPerIter; if (!useBaseline) { // Print without baseline - printf("%*s %s %s\n", + printf("%*s %9s %7s\n", static_cast(s.size()), s.c_str(), - humanReadable(nsPerIter, 2).c_str(), - humanReadable(itersPerSec, 2).c_str()); + readableTime(secPerIter, 2).c_str(), + metricReadable(itersPerSec, 2).c_str()); } else { // Print with baseline auto rel = baselineNsPerIter / nsPerIter * 100.0; - printf("%*s %7.2f%% %s %s\n", + printf("%*s %7.2f%% %9s %7s\n", static_cast(s.size()), s.c_str(), rel, - humanReadable(nsPerIter, 2).c_str(), - humanReadable(itersPerSec, 2).c_str()); + readableTime(secPerIter, 2).c_str(), + metricReadable(itersPerSec, 2).c_str()); } } separator('='); @@ -394,15 +418,24 @@ void runBenchmarks() { vector> results; results.reserve(benchmarks.size() - 1); + std::unique_ptr bmRegex; + if (!FLAGS_bm_regex.empty()) { + bmRegex.reset(new boost::regex(FLAGS_bm_regex)); + } + // PLEASE KEEP QUIET. MEASUREMENTS IN PROGRESS. auto const globalBaseline = runBenchmarkGetNSPerIteration( get<2>(benchmarks.front()), 0); FOR_EACH_RANGE (i, 1, benchmarks.size()) { - auto elapsed = strcmp(get<1>(benchmarks[i]), "-") == 0 - ? 0.0 // skip the separators - : runBenchmarkGetNSPerIteration(get<2>(benchmarks[i]), - globalBaseline); + double elapsed = 0.0; + if (strcmp(get<1>(benchmarks[i]), "-") != 0) { // skip separators + if (bmRegex && !boost::regex_search(get<1>(benchmarks[i]), *bmRegex)) { + continue; + } + elapsed = runBenchmarkGetNSPerIteration(get<2>(benchmarks[i]), + globalBaseline); + } results.emplace_back(get<0>(benchmarks[i]), get<1>(benchmarks[i]), elapsed); }