From e0b75e77cc224d3caffbb717342873c858e893fe Mon Sep 17 00:00:00 2001
From: Christopher Dykes <cdykes@fb.com>
Date: Tue, 31 Jan 2017 16:34:49 -0800
Subject: [PATCH] Use std::chrono::high_resolution_clock for folly::Benchmark

Summary: `clock_gettime(CLOCK_REALTIME)` is based on `std::system_clock` on Windows, but that only has a resolution that's the same as `FILETIME` (100s of nanoseconds), so modernize things and use `std::chrono::high_resolution_clock` which is intended for this purpose.

Reviewed By: yfeldblum

Differential Revision: D4476671

fbshipit-source-id: 3db1debc8f408f689b5c9fe1966a72b8dad4da93
---
 folly/Benchmark.cpp                           | 40 ++++----
 folly/Benchmark.h                             | 98 ++++++-------------
 folly/test/ProducerConsumerQueueBenchmark.cpp | 15 ++-
 3 files changed, 61 insertions(+), 92 deletions(-)
diff --git a/folly/Benchmark.cpp b/folly/Benchmark.cpp
index 4ec24c72..4a96bf36 100644
--- a/folly/Benchmark.cpp
+++ b/folly/Benchmark.cpp
@@ -62,7 +62,7 @@ DEFINE_int32(
 
 namespace folly {
 
-BenchmarkSuspender::NanosecondsSpent BenchmarkSuspender::nsSpent;
+std::chrono::high_resolution_clock::duration BenchmarkSuspender::timeSpent;
 
 typedef function<detail::TimeIterPair(unsigned int)> BenchmarkFun;
 
@@ -118,33 +118,31 @@ static double estimateTime(double * begin, double * end) {
 
 static double runBenchmarkGetNSPerIteration(const BenchmarkFun& fun,
                                             const double globalBaseline) {
+  using std::chrono::duration_cast;
+  using std::chrono::high_resolution_clock;
+  using std::chrono::microseconds;
+  using std::chrono::nanoseconds;
+  using std::chrono::seconds;
+
   // They key here is accuracy; too low numbers means the accuracy was
   // coarse. We up the ante until we get to at least minNanoseconds
   // timings.
-  static uint64_t resolutionInNs = 0;
-  if (!resolutionInNs) {
-    timespec ts;
-    CHECK_EQ(0, clock_getres(CLOCK_REALTIME, &ts));
-    CHECK_EQ(0, ts.tv_sec) << "Clock sucks.";
-    CHECK_LT(0, ts.tv_nsec) << "Clock too fast for its own good.";
-    CHECK_EQ(1, ts.tv_nsec) << "Clock too coarse, upgrade your kernel.";
-    resolutionInNs = uint64_t(ts.tv_nsec);
-  }
+  static_assert(
+      std::is_same<high_resolution_clock::duration, nanoseconds>::value,
+      "High resolution clock must be nanosecond resolution.");
   // We choose a minimum minimum (sic) of 100,000 nanoseconds, but if
   // the clock resolution is worse than that, it will be larger. In
   // essence we're aiming at making the quantization noise 0.01%.
-  static const auto minNanoseconds = max<uint64_t>(
-      uint64_t(FLAGS_bm_min_usec) * 1000ULL,
-      min<uint64_t>(resolutionInNs * 100000ULL, 1000000000ULL));
+  static const auto minNanoseconds = std::max<nanoseconds>(
+      nanoseconds(100000), microseconds(FLAGS_bm_min_usec));
 
   // We do measurements in several epochs and take the minimum, to
   // account for jitter.
   static const unsigned int epochs = 1000;
   // We establish a total time budget as we don't want a measurement
   // to take too long. This will curtail the number of actual epochs.
-  const uint64_t timeBudgetInNs = FLAGS_bm_max_secs * 1000000000ULL;
-  timespec global;
-  CHECK_EQ(0, clock_gettime(CLOCK_REALTIME, &global));
+  const auto timeBudget = seconds(FLAGS_bm_max_secs);
+  auto global = high_resolution_clock::now();
 
   double epochResults[epochs] = { 0 };
   size_t actualEpochs = 0;
@@ -158,14 +156,14 @@ static double runBenchmarkGetNSPerIteration(const BenchmarkFun& fun,
       }
       // We got an accurate enough timing, done. But only save if
       // smaller than the current result.
-      epochResults[actualEpochs] = max(0.0, double(nsecsAndIter.first) /
-                                       nsecsAndIter.second - globalBaseline);
+      auto nsecs = duration_cast<nanoseconds>(nsecsAndIter.first).count();
+      epochResults[actualEpochs] =
+          max(0.0, double(nsecs) / nsecsAndIter.second - globalBaseline);
       // Done with the current epoch, we got a meaningful timing.
       break;
     }
-    timespec now;
-    CHECK_EQ(0, clock_gettime(CLOCK_REALTIME, &now));
-    if (detail::timespecDiff(now, global) >= timeBudgetInNs) {
+    auto now = high_resolution_clock::now();
+    if (now - global >= timeBudget) {
       // No more time budget available.
       ++actualEpochs;
       break;
diff --git a/folly/Benchmark.h b/folly/Benchmark.h
index e9547987..cf11ca75 100644
--- a/folly/Benchmark.h
+++ b/folly/Benchmark.h
@@ -21,16 +21,16 @@
 #include <folly/ScopeGuard.h>
 #include <folly/Traits.h>
 #include <folly/portability/GFlags.h>
-#include <folly/portability/Time.h>
 
 #include <cassert>
-#include <ctime>
-#include <boost/function_types/function_arity.hpp>
+#include <chrono>
 #include <functional>
-#include <glog/logging.h>
 #include <limits>
 #include <type_traits>
 
+#include <boost/function_types/function_arity.hpp>
+#include <glog/logging.h>
+
 DECLARE_bool(benchmark);
 
 namespace folly {
@@ -53,7 +53,8 @@ inline bool runBenchmarksOnFlag() {
 
 namespace detail {
 
-typedef std::pair<uint64_t, unsigned int> TimeIterPair;
+using TimeIterPair =
+    std::pair<std::chrono::high_resolution_clock::duration, unsigned int>;
 
 /**
  * Adds a benchmark wrapped in a std::function. Only used
@@ -63,85 +64,51 @@ void addBenchmarkImpl(const char* file,
                       const char* name,
                       std::function<TimeIterPair(unsigned int)>);
 
-/**
- * Takes the difference between two timespec values. end is assumed to
- * occur after start.
- */
-inline uint64_t timespecDiff(timespec end, timespec start) {
-  if (end.tv_sec == start.tv_sec) {
-    assert(end.tv_nsec >= start.tv_nsec);
-    return uint64_t(end.tv_nsec - start.tv_nsec);
-  }
-  assert(end.tv_sec > start.tv_sec);
-  auto diff = uint64_t(end.tv_sec - start.tv_sec);
-  assert(diff < std::numeric_limits<uint64_t>::max() / 1000000000ULL);
-  return diff * 1000000000ULL + end.tv_nsec - start.tv_nsec;
-}
-
-/**
- * Takes the difference between two sets of timespec values. The first
- * two come from a high-resolution clock whereas the other two come
- * from a low-resolution clock. The crux of the matter is that
- * high-res values may be bogus as documented in
- * http://linux.die.net/man/3/clock_gettime. The trouble is when the
- * running process migrates from one CPU to another, which is more
- * likely for long-running processes. Therefore we watch for high
- * differences between the two timings.
- *
- * This function is subject to further improvements.
- */
-inline uint64_t timespecDiff(timespec end, timespec start,
-                             timespec endCoarse, timespec startCoarse) {
-  auto fine = timespecDiff(end, start);
-  auto coarse = timespecDiff(endCoarse, startCoarse);
-  if (coarse - fine >= 1000000) {
-    // The fine time is in all likelihood bogus
-    return coarse;
-  }
-  return fine;
-}
-
 } // namespace detail
 
 /**
  * Supporting type for BENCHMARK_SUSPEND defined below.
  */
 struct BenchmarkSuspender {
+  using Clock = std::chrono::high_resolution_clock;
+  using TimePoint = Clock::time_point;
+  using Duration = Clock::duration;
+
   BenchmarkSuspender() {
-    CHECK_EQ(0, clock_gettime(CLOCK_REALTIME, &start));
+    start = Clock::now();
   }
 
   BenchmarkSuspender(const BenchmarkSuspender &) = delete;
   BenchmarkSuspender(BenchmarkSuspender && rhs) noexcept {
     start = rhs.start;
-    rhs.start = {0, 0};
+    rhs.start = {};
   }
 
   BenchmarkSuspender& operator=(const BenchmarkSuspender &) = delete;
   BenchmarkSuspender& operator=(BenchmarkSuspender && rhs) {
-    if (start.tv_nsec > 0 || start.tv_sec > 0) {
+    if (start != TimePoint{}) {
       tally();
     }
     start = rhs.start;
-    rhs.start = {0, 0};
+    rhs.start = {};
     return *this;
   }
 
   ~BenchmarkSuspender() {
-    if (start.tv_nsec > 0 || start.tv_sec > 0) {
+    if (start != TimePoint{}) {
       tally();
     }
   }
 
   void dismiss() {
-    assert(start.tv_nsec > 0 || start.tv_sec > 0);
+    assert(start != TimePoint{});
     tally();
-    start = {0, 0};
+    start = {};
   }
 
   void rehire() {
-    assert(start.tv_nsec == 0 || start.tv_sec == 0);
-    CHECK_EQ(0, clock_gettime(CLOCK_REALTIME, &start));
+    assert(start == TimePoint{});
+    start = Clock::now();
   }
 
   template <class F>
@@ -160,20 +127,18 @@ struct BenchmarkSuspender {
   }
 
   /**
-   * Accumulates nanoseconds spent outside benchmark.
+   * Accumulates time spent outside benchmark.
    */
-  typedef uint64_t NanosecondsSpent;
-  static NanosecondsSpent nsSpent;
+  static Duration timeSpent;
 
-private:
+ private:
   void tally() {
-    timespec end;
-    CHECK_EQ(0, clock_gettime(CLOCK_REALTIME, &end));
-    nsSpent += detail::timespecDiff(end, start);
+    auto end = Clock::now();
+    timeSpent += end - start;
     start = end;
   }
 
-  timespec start;
+  TimePoint start;
 };
 
 /**
@@ -190,22 +155,17 @@ typename std::enable_if<
 >::type
 addBenchmark(const char* file, const char* name, Lambda&& lambda) {
   auto execute = [=](unsigned int times) {
-    BenchmarkSuspender::nsSpent = 0;
-    timespec start, end;
+    BenchmarkSuspender::timeSpent = {};
     unsigned int niter;
 
     // CORE MEASUREMENT STARTS
-    auto const r1 = clock_gettime(CLOCK_REALTIME, &start);
+    auto start = std::chrono::high_resolution_clock::now();
     niter = lambda(times);
-    auto const r2 = clock_gettime(CLOCK_REALTIME, &end);
+    auto end = std::chrono::high_resolution_clock::now();
     // CORE MEASUREMENT ENDS
 
-    CHECK_EQ(0, r1);
-    CHECK_EQ(0, r2);
-
     return detail::TimeIterPair(
-      detail::timespecDiff(end, start) - BenchmarkSuspender::nsSpent,
-      niter);
+        (end - start) - BenchmarkSuspender::timeSpent, niter);
   };
 
   detail::addBenchmarkImpl(file, name,
diff --git a/folly/test/ProducerConsumerQueueBenchmark.cpp b/folly/test/ProducerConsumerQueueBenchmark.cpp
index 9c422277..e8079680 100644
--- a/folly/test/ProducerConsumerQueueBenchmark.cpp
+++ b/folly/test/ProducerConsumerQueueBenchmark.cpp
@@ -97,6 +97,17 @@ struct LatencyTest {
       computeTimeCost();
     }
 
+  static uint64_t timespecDiff(timespec end, timespec start) {
+    if (end.tv_sec == start.tv_sec) {
+      assert(end.tv_nsec >= start.tv_nsec);
+      return uint64_t(end.tv_nsec - start.tv_nsec);
+    }
+    assert(end.tv_sec > start.tv_sec);
+    auto diff = uint64_t(end.tv_sec - start.tv_sec);
+    assert(diff < std::numeric_limits<uint64_t>::max() / 1000000000ULL);
+    return diff * 1000000000ULL + end.tv_nsec - start.tv_nsec;
+  }
+
   void computeTimeCost() {
     timespec start, end;
     clock_gettime(CLOCK_REALTIME, &start);
@@ -105,7 +116,7 @@ struct LatencyTest {
       clock_gettime(CLOCK_REALTIME, &tv);
     }
     clock_gettime(CLOCK_REALTIME, &end);
-    time_cost_ = 2 * detail::timespecDiff(end, start) / iters_;
+    time_cost_ = 2 * timespecDiff(end, start) / iters_;
   }
 
   void producer() {
@@ -120,7 +131,7 @@ struct LatencyTest {
       clock_gettime(CLOCK_REALTIME, &sleepstart);
       do {
         clock_gettime(CLOCK_REALTIME, &sleeptime);
-      } while (detail::timespecDiff(sleeptime, sleepstart) < 1000000);
+      } while (timespecDiff(sleeptime, sleepstart) < 1000000);
 
       timespec tv;
       clock_gettime(CLOCK_REALTIME, &tv);
-- 
2.34.1