Use std::chrono::high_resolution_clock for folly::Benchmark

author Christopher Dykes <cdykes@fb.com>

Wed, 1 Feb 2017 00:34:49 +0000 (16:34 -0800)

committer Facebook Github Bot <facebook-github-bot@users.noreply.github.com>

Wed, 1 Feb 2017 00:47:59 +0000 (16:47 -0800)
author Christopher Dykes <cdykes@fb.com>
Wed, 1 Feb 2017 00:34:49 +0000 (16:34 -0800)
committer Facebook Github Bot <facebook-github-bot@users.noreply.github.com>
Wed, 1 Feb 2017 00:47:59 +0000 (16:47 -0800)
diff --git a/folly/Benchmark.cpp b/folly/Benchmark.cpp

index 4ec24c72970e2386831615baf47785b304cc83ea..4a96bf3697356584a1095960c5d98d039d3c1526 100644 (file)
--- a/folly/Benchmark.cpp
+++ b/folly/Benchmark.cpp
@@ -62,7 +62,7 @@ DEFINE_int32(
  
  namespace folly {
  
-BenchmarkSuspender::NanosecondsSpent BenchmarkSuspender::nsSpent;
+std::chrono::high_resolution_clock::duration BenchmarkSuspender::timeSpent;
  
  typedef function<detail::TimeIterPair(unsigned int)> BenchmarkFun;
  
@@ -118,33 +118,31 @@ static double estimateTime(double * begin, double * end) {
  
  static double runBenchmarkGetNSPerIteration(const BenchmarkFun& fun,
                                              const double globalBaseline) {
+  using std::chrono::duration_cast;
+  using std::chrono::high_resolution_clock;
+  using std::chrono::microseconds;
+  using std::chrono::nanoseconds;
+  using std::chrono::seconds;
+
    // They key here is accuracy; too low numbers means the accuracy was
    // coarse. We up the ante until we get to at least minNanoseconds
    // timings.
-  static uint64_t resolutionInNs = 0;
-  if (!resolutionInNs) {
-    timespec ts;
-    CHECK_EQ(0, clock_getres(CLOCK_REALTIME, &ts));
-    CHECK_EQ(0, ts.tv_sec) << "Clock sucks.";
-    CHECK_LT(0, ts.tv_nsec) << "Clock too fast for its own good.";
-    CHECK_EQ(1, ts.tv_nsec) << "Clock too coarse, upgrade your kernel.";
-    resolutionInNs = uint64_t(ts.tv_nsec);
-  }
+  static_assert(
+      std::is_same<high_resolution_clock::duration, nanoseconds>::value,
+      "High resolution clock must be nanosecond resolution.");
    // We choose a minimum minimum (sic) of 100,000 nanoseconds, but if
    // the clock resolution is worse than that, it will be larger. In
    // essence we're aiming at making the quantization noise 0.01%.
-  static const auto minNanoseconds = max<uint64_t>(
-      uint64_t(FLAGS_bm_min_usec) * 1000ULL,
-      min<uint64_t>(resolutionInNs * 100000ULL, 1000000000ULL));
+  static const auto minNanoseconds = std::max<nanoseconds>(
+      nanoseconds(100000), microseconds(FLAGS_bm_min_usec));
  
    // We do measurements in several epochs and take the minimum, to
    // account for jitter.
    static const unsigned int epochs = 1000;
    // We establish a total time budget as we don't want a measurement
    // to take too long. This will curtail the number of actual epochs.
-  const uint64_t timeBudgetInNs = FLAGS_bm_max_secs * 1000000000ULL;
-  timespec global;
-  CHECK_EQ(0, clock_gettime(CLOCK_REALTIME, &global));
+  const auto timeBudget = seconds(FLAGS_bm_max_secs);
+  auto global = high_resolution_clock::now();
  
    double epochResults[epochs] = { 0 };
    size_t actualEpochs = 0;
@@ -158,14 +156,14 @@ static double runBenchmarkGetNSPerIteration(const BenchmarkFun& fun,
        }
        // We got an accurate enough timing, done. But only save if
        // smaller than the current result.
-      epochResults[actualEpochs] = max(0.0, double(nsecsAndIter.first) /
-                                       nsecsAndIter.second - globalBaseline);
+      auto nsecs = duration_cast<nanoseconds>(nsecsAndIter.first).count();
+      epochResults[actualEpochs] =
+          max(0.0, double(nsecs) / nsecsAndIter.second - globalBaseline);
        // Done with the current epoch, we got a meaningful timing.
        break;
      }
-    timespec now;
-    CHECK_EQ(0, clock_gettime(CLOCK_REALTIME, &now));
-    if (detail::timespecDiff(now, global) >= timeBudgetInNs) {
+    auto now = high_resolution_clock::now();
+    if (now - global >= timeBudget) {
        // No more time budget available.
        ++actualEpochs;
        break;
diff --git a/folly/Benchmark.h b/folly/Benchmark.h

index e954798790510b26f203f96229464fe5fd518a54..cf11ca75b20b02ca67da3dfbaa4fdf198d3b14de 100644 (file)
--- a/folly/Benchmark.h
+++ b/folly/Benchmark.h
@@ -21,16 +21,16 @@
  #include <folly/ScopeGuard.h>
  #include <folly/Traits.h>
  #include <folly/portability/GFlags.h>
-#include <folly/portability/Time.h>
  
  #include <cassert>
-#include <ctime>
-#include <boost/function_types/function_arity.hpp>
+#include <chrono>
  #include <functional>
-#include <glog/logging.h>
  #include <limits>
  #include <type_traits>
  
+#include <boost/function_types/function_arity.hpp>
+#include <glog/logging.h>
+
  DECLARE_bool(benchmark);
  
  namespace folly {
@@ -53,7 +53,8 @@ inline bool runBenchmarksOnFlag() {
  
  namespace detail {
  
-typedef std::pair<uint64_t, unsigned int> TimeIterPair;
+using TimeIterPair =
+    std::pair<std::chrono::high_resolution_clock::duration, unsigned int>;
  
  /**
   * Adds a benchmark wrapped in a std::function. Only used
@@ -63,85 +64,51 @@ void addBenchmarkImpl(const char* file,
                        const char* name,
                        std::function<TimeIterPair(unsigned int)>);
  
-/**
- * Takes the difference between two timespec values. end is assumed to
- * occur after start.
- */
-inline uint64_t timespecDiff(timespec end, timespec start) {
-  if (end.tv_sec == start.tv_sec) {
-    assert(end.tv_nsec >= start.tv_nsec);
-    return uint64_t(end.tv_nsec - start.tv_nsec);
-  }
-  assert(end.tv_sec > start.tv_sec);
-  auto diff = uint64_t(end.tv_sec - start.tv_sec);
-  assert(diff < std::numeric_limits<uint64_t>::max() / 1000000000ULL);
-  return diff * 1000000000ULL + end.tv_nsec - start.tv_nsec;
-}
-
-/**
- * Takes the difference between two sets of timespec values. The first
- * two come from a high-resolution clock whereas the other two come
- * from a low-resolution clock. The crux of the matter is that
- * high-res values may be bogus as documented in
- * http://linux.die.net/man/3/clock_gettime. The trouble is when the
- * running process migrates from one CPU to another, which is more
- * likely for long-running processes. Therefore we watch for high
- * differences between the two timings.
- *
- * This function is subject to further improvements.
- */
-inline uint64_t timespecDiff(timespec end, timespec start,
-                             timespec endCoarse, timespec startCoarse) {
-  auto fine = timespecDiff(end, start);
-  auto coarse = timespecDiff(endCoarse, startCoarse);
-  if (coarse - fine >= 1000000) {
-    // The fine time is in all likelihood bogus
-    return coarse;
-  }
-  return fine;
-}
-
  } // namespace detail
  
  /**
   * Supporting type for BENCHMARK_SUSPEND defined below.
   */
  struct BenchmarkSuspender {
+  using Clock = std::chrono::high_resolution_clock;
+  using TimePoint = Clock::time_point;
+  using Duration = Clock::duration;
+
    BenchmarkSuspender() {
-    CHECK_EQ(0, clock_gettime(CLOCK_REALTIME, &start));
+    start = Clock::now();
    }
  
    BenchmarkSuspender(const BenchmarkSuspender &) = delete;
    BenchmarkSuspender(BenchmarkSuspender && rhs) noexcept {
      start = rhs.start;
-    rhs.start = {0, 0};
+    rhs.start = {};
    }
  
    BenchmarkSuspender& operator=(const BenchmarkSuspender &) = delete;
    BenchmarkSuspender& operator=(BenchmarkSuspender && rhs) {
-    if (start.tv_nsec > 0 || start.tv_sec > 0) {
+    if (start != TimePoint{}) {
        tally();
      }
      start = rhs.start;
-    rhs.start = {0, 0};
+    rhs.start = {};
      return *this;
    }
  
    ~BenchmarkSuspender() {
-    if (start.tv_nsec > 0 || start.tv_sec > 0) {
+    if (start != TimePoint{}) {
        tally();
      }
    }
  
    void dismiss() {
-    assert(start.tv_nsec > 0 || start.tv_sec > 0);
+    assert(start != TimePoint{});
      tally();
-    start = {0, 0};
+    start = {};
    }
  
    void rehire() {
-    assert(start.tv_nsec == 0 || start.tv_sec == 0);
-    CHECK_EQ(0, clock_gettime(CLOCK_REALTIME, &start));
+    assert(start == TimePoint{});
+    start = Clock::now();
    }
  
    template <class F>
@@ -160,20 +127,18 @@ struct BenchmarkSuspender {
    }
  
    /**
-   * Accumulates nanoseconds spent outside benchmark.
+   * Accumulates time spent outside benchmark.
     */
-  typedef uint64_t NanosecondsSpent;
-  static NanosecondsSpent nsSpent;
+  static Duration timeSpent;
  
-private:
+ private:
    void tally() {
-    timespec end;
-    CHECK_EQ(0, clock_gettime(CLOCK_REALTIME, &end));
-    nsSpent += detail::timespecDiff(end, start);
+    auto end = Clock::now();
+    timeSpent += end - start;
      start = end;
    }
  
-  timespec start;
+  TimePoint start;
  };
  
  /**
@@ -190,22 +155,17 @@ typename std::enable_if<
  >::type
  addBenchmark(const char* file, const char* name, Lambda&& lambda) {
    auto execute = [=](unsigned int times) {
-    BenchmarkSuspender::nsSpent = 0;
-    timespec start, end;
+    BenchmarkSuspender::timeSpent = {};
      unsigned int niter;
  
      // CORE MEASUREMENT STARTS
-    auto const r1 = clock_gettime(CLOCK_REALTIME, &start);
+    auto start = std::chrono::high_resolution_clock::now();
      niter = lambda(times);
-    auto const r2 = clock_gettime(CLOCK_REALTIME, &end);
+    auto end = std::chrono::high_resolution_clock::now();
      // CORE MEASUREMENT ENDS
  
-    CHECK_EQ(0, r1);
-    CHECK_EQ(0, r2);
-
      return detail::TimeIterPair(
-      detail::timespecDiff(end, start) - BenchmarkSuspender::nsSpent,
-      niter);
+        (end - start) - BenchmarkSuspender::timeSpent, niter);
    };
  
    detail::addBenchmarkImpl(file, name,
diff --git a/folly/test/ProducerConsumerQueueBenchmark.cpp b/folly/test/ProducerConsumerQueueBenchmark.cpp

index 9c422277383746841b8aeaf5b4f7f1dcd9d5d9ef..e807968031e0c942a009fdbaaddcfaeae2532c31 100644 (file)
--- a/folly/test/ProducerConsumerQueueBenchmark.cpp
+++ b/folly/test/ProducerConsumerQueueBenchmark.cpp
@@ -97,6 +97,17 @@ struct LatencyTest {
        computeTimeCost();
      }
  
+  static uint64_t timespecDiff(timespec end, timespec start) {
+    if (end.tv_sec == start.tv_sec) {
+      assert(end.tv_nsec >= start.tv_nsec);
+      return uint64_t(end.tv_nsec - start.tv_nsec);
+    }
+    assert(end.tv_sec > start.tv_sec);
+    auto diff = uint64_t(end.tv_sec - start.tv_sec);
+    assert(diff < std::numeric_limits<uint64_t>::max() / 1000000000ULL);
+    return diff * 1000000000ULL + end.tv_nsec - start.tv_nsec;
+  }
+
    void computeTimeCost() {
      timespec start, end;
      clock_gettime(CLOCK_REALTIME, &start);
@@ -105,7 +116,7 @@ struct LatencyTest {
        clock_gettime(CLOCK_REALTIME, &tv);
      }
      clock_gettime(CLOCK_REALTIME, &end);
-    time_cost_ = 2 * detail::timespecDiff(end, start) / iters_;
+    time_cost_ = 2 * timespecDiff(end, start) / iters_;
    }
  
    void producer() {
@@ -120,7 +131,7 @@ struct LatencyTest {
        clock_gettime(CLOCK_REALTIME, &sleepstart);
        do {
          clock_gettime(CLOCK_REALTIME, &sleeptime);
-      } while (detail::timespecDiff(sleeptime, sleepstart) < 1000000);
+      } while (timespecDiff(sleeptime, sleepstart) < 1000000);
  
        timespec tv;
        clock_gettime(CLOCK_REALTIME, &tv);
author	Christopher Dykes <cdykes@fb.com>
	Wed, 1 Feb 2017 00:34:49 +0000 (16:34 -0800)
committer	Facebook Github Bot <facebook-github-bot@users.noreply.github.com>
	Wed, 1 Feb 2017 00:47:59 +0000 (16:47 -0800)
folly/Benchmark.cpp		patch \| blob \| history
folly/Benchmark.h		patch \| blob \| history
folly/test/ProducerConsumerQueueBenchmark.cpp		patch \| blob \| history