From e0b75e77cc224d3caffbb717342873c858e893fe Mon Sep 17 00:00:00 2001 From: Christopher Dykes Date: Tue, 31 Jan 2017 16:34:49 -0800 Subject: [PATCH] Use std::chrono::high_resolution_clock for folly::Benchmark Summary: `clock_gettime(CLOCK_REALTIME)` is based on `std::system_clock` on Windows, but that only has a resolution that's the same as `FILETIME` (100s of nanoseconds), so modernize things and use `std::chrono::high_resolution_clock` which is intended for this purpose. Reviewed By: yfeldblum Differential Revision: D4476671 fbshipit-source-id: 3db1debc8f408f689b5c9fe1966a72b8dad4da93 --- folly/Benchmark.cpp | 40 ++++---- folly/Benchmark.h | 98 ++++++------------- folly/test/ProducerConsumerQueueBenchmark.cpp | 15 ++- 3 files changed, 61 insertions(+), 92 deletions(-) diff --git a/folly/Benchmark.cpp b/folly/Benchmark.cpp index 4ec24c72..4a96bf36 100644 --- a/folly/Benchmark.cpp +++ b/folly/Benchmark.cpp @@ -62,7 +62,7 @@ DEFINE_int32( namespace folly { -BenchmarkSuspender::NanosecondsSpent BenchmarkSuspender::nsSpent; +std::chrono::high_resolution_clock::duration BenchmarkSuspender::timeSpent; typedef function BenchmarkFun; @@ -118,33 +118,31 @@ static double estimateTime(double * begin, double * end) { static double runBenchmarkGetNSPerIteration(const BenchmarkFun& fun, const double globalBaseline) { + using std::chrono::duration_cast; + using std::chrono::high_resolution_clock; + using std::chrono::microseconds; + using std::chrono::nanoseconds; + using std::chrono::seconds; + // They key here is accuracy; too low numbers means the accuracy was // coarse. We up the ante until we get to at least minNanoseconds // timings. - static uint64_t resolutionInNs = 0; - if (!resolutionInNs) { - timespec ts; - CHECK_EQ(0, clock_getres(CLOCK_REALTIME, &ts)); - CHECK_EQ(0, ts.tv_sec) << "Clock sucks."; - CHECK_LT(0, ts.tv_nsec) << "Clock too fast for its own good."; - CHECK_EQ(1, ts.tv_nsec) << "Clock too coarse, upgrade your kernel."; - resolutionInNs = uint64_t(ts.tv_nsec); - } + static_assert( + std::is_same::value, + "High resolution clock must be nanosecond resolution."); // We choose a minimum minimum (sic) of 100,000 nanoseconds, but if // the clock resolution is worse than that, it will be larger. In // essence we're aiming at making the quantization noise 0.01%. - static const auto minNanoseconds = max( - uint64_t(FLAGS_bm_min_usec) * 1000ULL, - min(resolutionInNs * 100000ULL, 1000000000ULL)); + static const auto minNanoseconds = std::max( + nanoseconds(100000), microseconds(FLAGS_bm_min_usec)); // We do measurements in several epochs and take the minimum, to // account for jitter. static const unsigned int epochs = 1000; // We establish a total time budget as we don't want a measurement // to take too long. This will curtail the number of actual epochs. - const uint64_t timeBudgetInNs = FLAGS_bm_max_secs * 1000000000ULL; - timespec global; - CHECK_EQ(0, clock_gettime(CLOCK_REALTIME, &global)); + const auto timeBudget = seconds(FLAGS_bm_max_secs); + auto global = high_resolution_clock::now(); double epochResults[epochs] = { 0 }; size_t actualEpochs = 0; @@ -158,14 +156,14 @@ static double runBenchmarkGetNSPerIteration(const BenchmarkFun& fun, } // We got an accurate enough timing, done. But only save if // smaller than the current result. - epochResults[actualEpochs] = max(0.0, double(nsecsAndIter.first) / - nsecsAndIter.second - globalBaseline); + auto nsecs = duration_cast(nsecsAndIter.first).count(); + epochResults[actualEpochs] = + max(0.0, double(nsecs) / nsecsAndIter.second - globalBaseline); // Done with the current epoch, we got a meaningful timing. break; } - timespec now; - CHECK_EQ(0, clock_gettime(CLOCK_REALTIME, &now)); - if (detail::timespecDiff(now, global) >= timeBudgetInNs) { + auto now = high_resolution_clock::now(); + if (now - global >= timeBudget) { // No more time budget available. ++actualEpochs; break; diff --git a/folly/Benchmark.h b/folly/Benchmark.h index e9547987..cf11ca75 100644 --- a/folly/Benchmark.h +++ b/folly/Benchmark.h @@ -21,16 +21,16 @@ #include #include #include -#include #include -#include -#include +#include #include -#include #include #include +#include +#include + DECLARE_bool(benchmark); namespace folly { @@ -53,7 +53,8 @@ inline bool runBenchmarksOnFlag() { namespace detail { -typedef std::pair TimeIterPair; +using TimeIterPair = + std::pair; /** * Adds a benchmark wrapped in a std::function. Only used @@ -63,85 +64,51 @@ void addBenchmarkImpl(const char* file, const char* name, std::function); -/** - * Takes the difference between two timespec values. end is assumed to - * occur after start. - */ -inline uint64_t timespecDiff(timespec end, timespec start) { - if (end.tv_sec == start.tv_sec) { - assert(end.tv_nsec >= start.tv_nsec); - return uint64_t(end.tv_nsec - start.tv_nsec); - } - assert(end.tv_sec > start.tv_sec); - auto diff = uint64_t(end.tv_sec - start.tv_sec); - assert(diff < std::numeric_limits::max() / 1000000000ULL); - return diff * 1000000000ULL + end.tv_nsec - start.tv_nsec; -} - -/** - * Takes the difference between two sets of timespec values. The first - * two come from a high-resolution clock whereas the other two come - * from a low-resolution clock. The crux of the matter is that - * high-res values may be bogus as documented in - * http://linux.die.net/man/3/clock_gettime. The trouble is when the - * running process migrates from one CPU to another, which is more - * likely for long-running processes. Therefore we watch for high - * differences between the two timings. - * - * This function is subject to further improvements. - */ -inline uint64_t timespecDiff(timespec end, timespec start, - timespec endCoarse, timespec startCoarse) { - auto fine = timespecDiff(end, start); - auto coarse = timespecDiff(endCoarse, startCoarse); - if (coarse - fine >= 1000000) { - // The fine time is in all likelihood bogus - return coarse; - } - return fine; -} - } // namespace detail /** * Supporting type for BENCHMARK_SUSPEND defined below. */ struct BenchmarkSuspender { + using Clock = std::chrono::high_resolution_clock; + using TimePoint = Clock::time_point; + using Duration = Clock::duration; + BenchmarkSuspender() { - CHECK_EQ(0, clock_gettime(CLOCK_REALTIME, &start)); + start = Clock::now(); } BenchmarkSuspender(const BenchmarkSuspender &) = delete; BenchmarkSuspender(BenchmarkSuspender && rhs) noexcept { start = rhs.start; - rhs.start = {0, 0}; + rhs.start = {}; } BenchmarkSuspender& operator=(const BenchmarkSuspender &) = delete; BenchmarkSuspender& operator=(BenchmarkSuspender && rhs) { - if (start.tv_nsec > 0 || start.tv_sec > 0) { + if (start != TimePoint{}) { tally(); } start = rhs.start; - rhs.start = {0, 0}; + rhs.start = {}; return *this; } ~BenchmarkSuspender() { - if (start.tv_nsec > 0 || start.tv_sec > 0) { + if (start != TimePoint{}) { tally(); } } void dismiss() { - assert(start.tv_nsec > 0 || start.tv_sec > 0); + assert(start != TimePoint{}); tally(); - start = {0, 0}; + start = {}; } void rehire() { - assert(start.tv_nsec == 0 || start.tv_sec == 0); - CHECK_EQ(0, clock_gettime(CLOCK_REALTIME, &start)); + assert(start == TimePoint{}); + start = Clock::now(); } template @@ -160,20 +127,18 @@ struct BenchmarkSuspender { } /** - * Accumulates nanoseconds spent outside benchmark. + * Accumulates time spent outside benchmark. */ - typedef uint64_t NanosecondsSpent; - static NanosecondsSpent nsSpent; + static Duration timeSpent; -private: + private: void tally() { - timespec end; - CHECK_EQ(0, clock_gettime(CLOCK_REALTIME, &end)); - nsSpent += detail::timespecDiff(end, start); + auto end = Clock::now(); + timeSpent += end - start; start = end; } - timespec start; + TimePoint start; }; /** @@ -190,22 +155,17 @@ typename std::enable_if< >::type addBenchmark(const char* file, const char* name, Lambda&& lambda) { auto execute = [=](unsigned int times) { - BenchmarkSuspender::nsSpent = 0; - timespec start, end; + BenchmarkSuspender::timeSpent = {}; unsigned int niter; // CORE MEASUREMENT STARTS - auto const r1 = clock_gettime(CLOCK_REALTIME, &start); + auto start = std::chrono::high_resolution_clock::now(); niter = lambda(times); - auto const r2 = clock_gettime(CLOCK_REALTIME, &end); + auto end = std::chrono::high_resolution_clock::now(); // CORE MEASUREMENT ENDS - CHECK_EQ(0, r1); - CHECK_EQ(0, r2); - return detail::TimeIterPair( - detail::timespecDiff(end, start) - BenchmarkSuspender::nsSpent, - niter); + (end - start) - BenchmarkSuspender::timeSpent, niter); }; detail::addBenchmarkImpl(file, name, diff --git a/folly/test/ProducerConsumerQueueBenchmark.cpp b/folly/test/ProducerConsumerQueueBenchmark.cpp index 9c422277..e8079680 100644 --- a/folly/test/ProducerConsumerQueueBenchmark.cpp +++ b/folly/test/ProducerConsumerQueueBenchmark.cpp @@ -97,6 +97,17 @@ struct LatencyTest { computeTimeCost(); } + static uint64_t timespecDiff(timespec end, timespec start) { + if (end.tv_sec == start.tv_sec) { + assert(end.tv_nsec >= start.tv_nsec); + return uint64_t(end.tv_nsec - start.tv_nsec); + } + assert(end.tv_sec > start.tv_sec); + auto diff = uint64_t(end.tv_sec - start.tv_sec); + assert(diff < std::numeric_limits::max() / 1000000000ULL); + return diff * 1000000000ULL + end.tv_nsec - start.tv_nsec; + } + void computeTimeCost() { timespec start, end; clock_gettime(CLOCK_REALTIME, &start); @@ -105,7 +116,7 @@ struct LatencyTest { clock_gettime(CLOCK_REALTIME, &tv); } clock_gettime(CLOCK_REALTIME, &end); - time_cost_ = 2 * detail::timespecDiff(end, start) / iters_; + time_cost_ = 2 * timespecDiff(end, start) / iters_; } void producer() { @@ -120,7 +131,7 @@ struct LatencyTest { clock_gettime(CLOCK_REALTIME, &sleepstart); do { clock_gettime(CLOCK_REALTIME, &sleeptime); - } while (detail::timespecDiff(sleeptime, sleepstart) < 1000000); + } while (timespecDiff(sleeptime, sleepstart) < 1000000); timespec tv; clock_gettime(CLOCK_REALTIME, &tv); -- 2.34.1