X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;ds=sidebyside;f=folly%2FBenchmark.cpp;h=940fe91c27e81bdba1fd883fff220ef9b4c77b27;hb=0606460a10e0e41f9be09552d771ea1ee327f8ee;hp=aa83804b98e63960950230c1636764bfc6171f7e;hpb=2e76cb012388630c6e167e9ea50395d0ef597e76;p=folly.git

diff --git a/folly/Benchmark.cpp b/folly/Benchmark.cpp
index aa83804b..940fe91c 100644
--- a/folly/Benchmark.cpp
+++ b/folly/Benchmark.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright 2012 Facebook, Inc.
+ * Copyright 2014 Facebook, Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,11 +16,13 @@
 
 // @author Andrei Alexandrescu (andrei.alexandrescu@fb.com)
 
-#include "Benchmark.h"
-#include "Foreach.h"
-#include "json.h"
-#include "String.h"
+#include <folly/Benchmark.h>
+#include <folly/Foreach.h>
+#include <folly/json.h>
+#include <folly/String.h>
+
 #include <algorithm>
+#include <boost/regex.hpp>
 #include <cmath>
 #include <iostream>
 #include <limits>
@@ -32,16 +34,33 @@ using namespace std;
 DEFINE_bool(benchmark, false, "Run benchmarks.");
 DEFINE_bool(json, false, "Output in JSON format.");
 
+DEFINE_string(bm_regex, "",
+              "Only benchmarks whose names match this regex will be run.");
+
+DEFINE_int64(bm_min_usec, 100,
+             "Minimum # of microseconds we'll accept for each benchmark.");
+
+DEFINE_int64(bm_min_iters, 1,
+             "Minimum # of iterations we'll try for each benchmark.");
+
+DEFINE_int32(bm_max_secs, 1,
+             "Maximum # of seconds we'll spend on each benchmark.");
+
+
 namespace folly {
 
 BenchmarkSuspender::NanosecondsSpent BenchmarkSuspender::nsSpent;
 
-typedef function<uint64_t(unsigned int)> BenchmarkFun;
+typedef function<detail::TimeIterPair(unsigned int)> BenchmarkFun;
 static vector<tuple<const char*, const char*, BenchmarkFun>> benchmarks;
 
 // Add the global baseline
 BENCHMARK(globalBenchmarkBaseline) {
+#ifdef _MSC_VER
+  _ReadWriteBarrier();
+#else
   asm volatile("");
+#endif
 }
 
 void detail::addBenchmarkImpl(const char* file, const char* name,
@@ -191,17 +210,19 @@ static double runBenchmarkGetNSPerIteration(const BenchmarkFun& fun,
     CHECK_EQ(1, ts.tv_nsec) << "Clock too coarse, upgrade your kernel.";
     resolutionInNs = ts.tv_nsec;
   }
-  // Whe choose a minimum minimum (sic) of 10,000 nanoseconds, but if
+  // We choose a minimum minimum (sic) of 100,000 nanoseconds, but if
   // the clock resolution is worse than that, it will be larger. In
   // essence we're aiming at making the quantization noise 0.01%.
-  static const auto minNanoseconds = min(resolutionInNs * 100000, 1000000000UL);
+  static const auto minNanoseconds =
+    max<uint64_t>(FLAGS_bm_min_usec * 1000UL,
+        min<uint64_t>(resolutionInNs * 100000, 1000000000ULL));
 
   // We do measurements in several epochs and take the minimum, to
   // account for jitter.
   static const unsigned int epochs = 1000;
   // We establish a total time budget as we don't want a measurement
   // to take too long. This will curtail the number of actual epochs.
-  static const uint64_t timeBudgetInNs = 1000000000;
+  const uint64_t timeBudgetInNs = FLAGS_bm_max_secs * 1000000000;
   timespec global;
   CHECK_EQ(0, clock_gettime(CLOCK_REALTIME, &global));
 
@@ -209,14 +230,15 @@ static double runBenchmarkGetNSPerIteration(const BenchmarkFun& fun,
   size_t actualEpochs = 0;
 
   for (; actualEpochs < epochs; ++actualEpochs) {
-    for (unsigned int n = 1; n < (1U << 30); n *= 2) {
-      auto const nsecs = fun(n);
-      if (nsecs < minNanoseconds) {
+    for (unsigned int n = FLAGS_bm_min_iters; n < (1UL << 30); n *= 2) {
+      auto const nsecsAndIter = fun(n);
+      if (nsecsAndIter.first < minNanoseconds) {
         continue;
       }
       // We got an accurate enough timing, done. But only save if
       // smaller than the current result.
-      epochResults[actualEpochs] = max(0.0, double(nsecs) / n - globalBaseline);
+      epochResults[actualEpochs] = max(0.0, double(nsecsAndIter.first) /
+                                       nsecsAndIter.second - globalBaseline);
       // Done with the current epoch, we got a meaningful timing.
       break;
     }
@@ -234,74 +256,75 @@ static double runBenchmarkGetNSPerIteration(const BenchmarkFun& fun,
   return max(0.0, estimateTime(epochResults, epochResults + actualEpochs));
 }
 
-static string humanReadable(double n, unsigned int decimals) {
-  auto a = fabs(n);
-  char suffix = ' ';
-
-  if (a >= 1E21) {
-    // Too big to be comprehended by the puny human brain
-    suffix = '!';
-    n /= 1E21;
-  } else if (a >= 1E18) {
-    // "EXA" written with suffix 'X' so as to not create confusion
-    // with scientific notation.
-    suffix = 'X';
-    n /= 1E18;
-  } else if (a >= 1E15) {
-    // "PETA"
-    suffix = 'P';
-    n /= 1E15;
-  } else if (a >= 1E12) {
-    // "TERA"
-    suffix = 'T';
-    n /= 1E12;
-  } else if (a >= 1E9) {
-    // "GIGA"
-    suffix = 'G';
-    n /= 1E9;
-  } else if (a >= 1E6) {
-    // "MEGA"
-    suffix = 'M';
-    n /= 1E6;
-  } else if (a >= 1E3) {
-    // "KILO"
-    suffix = 'K';
-    n /= 1E3;
-  } else if (a == 0.0) {
-    suffix = ' ';
-  } else if (a < 1E-15) {
-    // too small
-    suffix = '?';
-    n *= 1E18;
-  } else if (a < 1E-12) {
-    // "femto"
-    suffix = 'f';
-    n *= 1E15;
-  } else if (a < 1E-9) {
-    // "pico"
-    suffix = 'p';
-    n *= 1E12;
-  } else if (a < 1E-6) {
-    // "nano"
-    suffix = 'n';
-    n *= 1E9;
-  } else if (a < 1E-3) {
-    // "micro"
-    suffix = 'u';
-    n *= 1E6;
-  } else if (a < 1) {
-    // "mili"
-    suffix = 'm';
-    n *= 1E3;
+struct ScaleInfo {
+  double boundary;
+  const char* suffix;
+};
+
+static const ScaleInfo kTimeSuffixes[] {
+  { 365.25 * 24 * 3600, "years" },
+  { 24 * 3600, "days" },
+  { 3600, "hr" },
+  { 60, "min" },
+  { 1, "s" },
+  { 1E-3, "ms" },
+  { 1E-6, "us" },
+  { 1E-9, "ns" },
+  { 1E-12, "ps" },
+  { 1E-15, "fs" },
+  { 0, nullptr },
+};
+
+static const ScaleInfo kMetricSuffixes[] {
+  { 1E24, "Y" },  // yotta
+  { 1E21, "Z" },  // zetta
+  { 1E18, "X" },  // "exa" written with suffix 'X' so as to not create
+                  //   confusion with scientific notation
+  { 1E15, "P" },  // peta
+  { 1E12, "T" },  // terra
+  { 1E9, "G" },   // giga
+  { 1E6, "M" },   // mega
+  { 1E3, "K" },   // kilo
+  { 1, "" },
+  { 1E-3, "m" },  // milli
+  { 1E-6, "u" },  // micro
+  { 1E-9, "n" },  // nano
+  { 1E-12, "p" }, // pico
+  { 1E-15, "f" }, // femto
+  { 1E-18, "a" }, // atto
+  { 1E-21, "z" }, // zepto
+  { 1E-24, "y" }, // yocto
+  { 0, nullptr },
+};
+
+static string humanReadable(double n, unsigned int decimals,
+                            const ScaleInfo* scales) {
+  if (std::isinf(n) || std::isnan(n)) {
+    return folly::to<string>(n);
+  }
+
+  const double absValue = fabs(n);
+  const ScaleInfo* scale = scales;
+  while (absValue < scale[0].boundary && scale[1].suffix != nullptr) {
+    ++scale;
   }
 
-  return stringPrintf("%*.*f%c", decimals + 3 + 1, decimals, n, suffix);
+  const double scaledValue = n / scale->boundary;
+  return stringPrintf("%.*f%s", decimals, scaledValue, scale->suffix);
+}
+
+static string readableTime(double n, unsigned int decimals) {
+  return humanReadable(n, decimals, kTimeSuffixes);
+}
+
+static string metricReadable(double n, unsigned int decimals) {
+  return humanReadable(n, decimals, kMetricSuffixes);
 }
 
 static void printBenchmarkResultsAsTable(
   const vector<tuple<const char*, const char*, double> >& data) {
   // Width available
-  static const uint columns = 76;
+  static const unsigned int columns = 76;
 
   // Compute the longest benchmark name
   size_t longestName = 0;
@@ -317,8 +340,8 @@ static void printBenchmarkResultsAsTable(
   // Print header for a file
   auto header = [&](const char* file) {
     separator('=');
-    printf("%-*srelative  ns/iter  iters/s\n",
-           columns - 26, file);
+    printf("%-*srelative  time/iter  iters/s\n",
+           columns - 28, file);
     separator('=');
   };
 
@@ -346,23 +369,24 @@ static void printBenchmarkResultsAsTable(
       baselineNsPerIter = get<2>(datum);
       useBaseline = false;
     }
-    s.resize(columns - 27, ' ');
+    s.resize(columns - 29, ' ');
     auto nsPerIter = get<2>(datum);
-    auto itersPerSec = 1E9 / nsPerIter;
+    auto secPerIter = nsPerIter / 1E9;
+    auto itersPerSec = 1 / secPerIter;
     if (!useBaseline) {
       // Print without baseline
-      printf("%*s           %s  %s\n",
+      printf("%*s           %9s  %7s\n",
              static_cast<int>(s.size()), s.c_str(),
-             humanReadable(nsPerIter, 2).c_str(),
-             humanReadable(itersPerSec, 2).c_str());
+             readableTime(secPerIter, 2).c_str(),
+             metricReadable(itersPerSec, 2).c_str());
     } else {
       // Print with baseline
       auto rel = baselineNsPerIter / nsPerIter * 100.0;
-      printf("%*s %7.2f%%  %s  %s\n",
+      printf("%*s %7.2f%%  %9s  %7s\n",
              static_cast<int>(s.size()), s.c_str(),
              rel,
-             humanReadable(nsPerIter, 2).c_str(),
-             humanReadable(itersPerSec, 2).c_str());
+             readableTime(secPerIter, 2).c_str(),
+             metricReadable(itersPerSec, 2).c_str());
     }
   }
   separator('=');
@@ -394,15 +418,24 @@ void runBenchmarks() {
   vector<tuple<const char*, const char*, double>> results;
   results.reserve(benchmarks.size() - 1);
 
+  std::unique_ptr<boost::regex> bmRegex;
+  if (!FLAGS_bm_regex.empty()) {
+    bmRegex.reset(new boost::regex(FLAGS_bm_regex));
+  }
+
   // PLEASE KEEP QUIET. MEASUREMENTS IN PROGRESS.
 
   auto const globalBaseline = runBenchmarkGetNSPerIteration(
     get<2>(benchmarks.front()), 0);
   FOR_EACH_RANGE (i, 1, benchmarks.size()) {
-    auto elapsed = strcmp(get<1>(benchmarks[i]), "-") == 0
-      ? 0.0 // skip the separators
-      : runBenchmarkGetNSPerIteration(get<2>(benchmarks[i]),
-                                      globalBaseline);
+    double elapsed = 0.0;
+    if (strcmp(get<1>(benchmarks[i]), "-") != 0) { // skip separators
+      if (bmRegex && !boost::regex_search(get<1>(benchmarks[i]), *bmRegex)) {
+        continue;
+      }
+      elapsed = runBenchmarkGetNSPerIteration(get<2>(benchmarks[i]),
+                                              globalBaseline);
+    }
     results.emplace_back(get<0>(benchmarks[i]),
                          get<1>(benchmarks[i]), elapsed);
   }