folly/Benchmark.h

   1 /*
   2  * Copyright 2012 Facebook, Inc.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *   http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #ifndef FOLLY_BENCHMARK_H_
  18 #define FOLLY_BENCHMARK_H_
  19
  20 #include "folly/Preprocessor.h" // for FB_ANONYMOUS_VARIABLE
  21 #include <cassert>
  22 #include <ctime>
  23 #include <boost/function_types/function_arity.hpp>
  24 #include <functional>
  25 #include <glog/logging.h>
  26 #include <gflags/gflags.h>
  27 #include <limits>
  28
  29 DECLARE_bool(benchmark);
  30
  31 namespace folly {
  32
  33 /**
  34  * Runs all benchmarks defined. Usually put in main().
  35  */
  36 void runBenchmarks();
  37
  38 /**
  39  * Runs all benchmarks defined if and only if the --benchmark flag has
  40  * been passed to the program. Usually put in main().
  41  */
  42 inline bool runBenchmarksOnFlag() {
  43   if (FLAGS_benchmark) {
  44     runBenchmarks();
  45   }
  46   return FLAGS_benchmark;
  47 }
  48
  49 namespace detail {
  50
  51 /**
  52  * This is the clock ID used for measuring time. On older kernels, the
  53  * resolution of this clock will be very coarse, which will cause the
  54  * benchmarks to fail.
  55  */
  56 enum Clock { DEFAULT_CLOCK_ID = CLOCK_REALTIME };
  57
  58 /**
  59  * Adds a benchmark wrapped in a std::function. Only used
  60  * internally. Pass by value is intentional.
  61  */
  62 void addBenchmarkImpl(const char* file,
  63                       const char* name,
  64                       std::function<uint64_t(unsigned int)>);
  65
  66 /**
  67  * Takes the difference between two timespec values. end is assumed to
  68  * occur after start.
  69  */
  70 inline uint64_t timespecDiff(timespec end, timespec start) {
  71   if (end.tv_sec == start.tv_sec) {
  72     assert(end.tv_nsec >= start.tv_nsec);
  73     return end.tv_nsec - start.tv_nsec;
  74   }
  75   assert(end.tv_sec > start.tv_sec &&
  76          end.tv_sec - start.tv_sec <
  77          std::numeric_limits<uint64_t>::max() / 1000000000UL);
  78   return (end.tv_sec - start.tv_sec) * 1000000000UL
  79     + end.tv_nsec - start.tv_nsec;
  80 }
  81
  82 /**
  83  * Takes the difference between two sets of timespec values. The first
  84  * two come from a high-resolution clock whereas the other two come
  85  * from a low-resolution clock. The crux of the matter is that
  86  * high-res values may be bogus as documented in
  87  * http://linux.die.net/man/3/clock_gettime. The trouble is when the
  88  * running process migrates from one CPU to another, which is more
  89  * likely for long-running processes. Therefore we watch for high
  90  * differences between the two timings.
  91  *
  92  * This function is subject to further improvements.
  93  */
  94 inline uint64_t timespecDiff(timespec end, timespec start,
  95                              timespec endCoarse, timespec startCoarse) {
  96   auto fine = timespecDiff(end, start);
  97   auto coarse = timespecDiff(endCoarse, startCoarse);
  98   if (coarse - fine >= 1000000) {
  99     // The fine time is in all likelihood bogus
 100     return coarse;
 101   }
 102   return fine;
 103 }
 104
 105 } // namespace detail
 106
 107 /**
 108  * Supporting type for BENCHMARK_SUSPEND defined below.
 109  */
 110 struct BenchmarkSuspender {
 111   BenchmarkSuspender() {
 112     CHECK_EQ(0, clock_gettime(detail::DEFAULT_CLOCK_ID, &start));
 113   }
 114
 115   BenchmarkSuspender(const BenchmarkSuspender &) = delete;
 116   BenchmarkSuspender(BenchmarkSuspender && rhs) {
 117     start = rhs.start;
 118     rhs.start.tv_nsec = rhs.start.tv_sec = 0;
 119   }
 120
 121   BenchmarkSuspender& operator=(const BenchmarkSuspender &) = delete;
 122   BenchmarkSuspender& operator=(BenchmarkSuspender && rhs) {
 123     if (start.tv_nsec > 0 || start.tv_sec > 0) {
 124       tally();
 125     }
 126     start = rhs.start;
 127     rhs.start.tv_nsec = rhs.start.tv_sec = 0;
 128     return *this;
 129   }
 130
 131   ~BenchmarkSuspender() {
 132     if (start.tv_nsec > 0 || start.tv_sec > 0) {
 133       tally();
 134     }
 135   }
 136
 137   void dismiss() {
 138     assert(start.tv_nsec > 0 || start.tv_sec > 0);
 139     tally();
 140     start.tv_nsec = start.tv_sec = 0;
 141   }
 142
 143   void rehire() {
 144     assert(start.tv_nsec == 0 || start.tv_sec == 0);
 145     CHECK_EQ(0, clock_gettime(detail::DEFAULT_CLOCK_ID, &start));
 146   }
 147
 148   /**
 149    * This helps the macro definition. To get around the dangers of
 150    * operator bool, returns a pointer to member (which allows no
 151    * arithmetic).
 152    */
 153   operator int BenchmarkSuspender::*() const {
 154     return nullptr;
 155   }
 156
 157   /**
 158    * Accumulates nanoseconds spent outside benchmark.
 159    */
 160   typedef uint64_t NanosecondsSpent;
 161   static NanosecondsSpent nsSpent;
 162
 163 private:
 164   void tally() {
 165     timespec end;
 166     CHECK_EQ(0, clock_gettime(detail::DEFAULT_CLOCK_ID, &end));
 167     nsSpent += detail::timespecDiff(end, start);
 168     start = end;
 169   }
 170
 171   timespec start;
 172 };
 173
 174 /**
 175  * Adds a benchmark. Usually not called directly but instead through
 176  * the macro BENCHMARK defined below. The lambda function involved
 177  * must take exactly one parameter of type unsigned, and the benchmark
 178  * uses it with counter semantics (iteration occurs inside the
 179  * function).
 180  */
 181 template <typename Lambda>
 182 typename std::enable_if<
 183   boost::function_types::function_arity<decltype(&Lambda::operator())>::value
 184   == 2
 185 >::type
 186 addBenchmark(const char* file, const char* name, Lambda&& lambda) {
 187   auto execute = [=](unsigned int times) {
 188     BenchmarkSuspender::nsSpent = 0;
 189     timespec start, end;
 190
 191     // CORE MEASUREMENT STARTS
 192     CHECK_EQ(0, clock_gettime(detail::DEFAULT_CLOCK_ID, &start));
 193     lambda(times);
 194     CHECK_EQ(0, clock_gettime(detail::DEFAULT_CLOCK_ID, &end));
 195     // CORE MEASUREMENT ENDS
 196
 197     return detail::timespecDiff(end, start) - BenchmarkSuspender::nsSpent;
 198   };
 199
 200   detail::addBenchmarkImpl(file, name,
 201                            std::function<uint64_t(unsigned int)>(execute));
 202 }
 203
 204 /**
 205  * Adds a benchmark. Usually not called directly but instead through
 206  * the macro BENCHMARK defined below. The lambda function involved
 207  * must take zero parameters, and the benchmark calls it repeatedly
 208  * (iteration occurs outside the function).
 209  */
 210 template <typename Lambda>
 211 typename std::enable_if<
 212   boost::function_types::function_arity<decltype(&Lambda::operator())>::value
 213   == 1
 214 >::type
 215 addBenchmark(const char* file, const char* name, Lambda&& lambda) {
 216   addBenchmark(file, name, [=](unsigned int times) {
 217       while (times-- > 0) {
 218         lambda();
 219       }
 220     });
 221 }
 222
 223 /**
 224  * Call doNotOptimizeAway(var) against variables that you use for
 225  * benchmarking but otherwise are useless. The compiler tends to do a
 226  * good job at eliminating unused variables, and this function fools
 227  * it into thinking var is in fact needed.
 228  */
 229 template <class T>
 230 void doNotOptimizeAway(T&& datum) {
 231   asm volatile("" : "+r" (datum));
 232 }
 233
 234 } // namespace folly
 235
 236 /**
 237  * Introduces a benchmark function. Used internally, see BENCHMARK and
 238  * friends below.
 239  */
 240 #define BENCHMARK_IMPL(funName, stringName, paramType, paramName)       \
 241   static void funName(paramType);                                       \
 242   static bool FB_ANONYMOUS_VARIABLE(follyBenchmarkUnused) = (           \
 243     ::folly::addBenchmark(__FILE__, stringName,                         \
 244       [](paramType paramName) { funName(paramName); }),                 \
 245     true);                                                              \
 246   static void funName(paramType paramName)
 247
 248 /**
 249  * Introduces a benchmark function. Use with either one one or two
 250  * arguments. The first is the name of the benchmark. Use something
 251  * descriptive, such as insertVectorBegin. The second argument may be
 252  * missing, or could be a symbolic counter. The counter dictates how
 253  * many internal iteration the benchmark does. Example:
 254  *
 255  * BENCHMARK(vectorPushBack) {
 256  *   vector<int> v;
 257  *   v.push_back(42);
 258  * }
 259  *
 260  * BENCHMARK(insertVectorBegin, n) {
 261  *   vector<int> v;
 262  *   FOR_EACH_RANGE (i, 0, n) {
 263  *     v.insert(v.begin(), 42);
 264  *   }
 265  * }
 266  */
 267 #define BENCHMARK(name, ...)                                    \
 268   BENCHMARK_IMPL(                                               \
 269     name,                                                       \
 270     FB_STRINGIZE(name),                                         \
 271     FB_ONE_OR_NONE(unsigned, ## __VA_ARGS__),                   \
 272     __VA_ARGS__)
 273
 274 /**
 275  * Defines a benchmark that passes a parameter to another one. This is
 276  * common for benchmarks that need a "problem size" in addition to
 277  * "number of iterations". Consider:
 278  *
 279  * void pushBack(uint n, size_t initialSize) {
 280  *   vector<int> v;
 281  *   BENCHMARK_SUSPEND {
 282  *     v.resize(initialSize);
 283  *   }
 284  *   FOR_EACH_RANGE (i, 0, n) {
 285  *    v.push_back(i);
 286  *   }
 287  * }
 288  * BENCHMARK_PARAM(pushBack, 0)
 289  * BENCHMARK_PARAM(pushBack, 1000)
 290  * BENCHMARK_PARAM(pushBack, 1000000)
 291  *
 292  * The benchmark above estimates the speed of push_back at different
 293  * initial sizes of the vector. The framework will pass 0, 1000, and
 294  * 1000000 for initialSize, and the iteration count for n.
 295  */
 296 #define BENCHMARK_PARAM(name, param)                                    \
 297   BENCHMARK_IMPL(                                                       \
 298       FB_CONCATENATE(name, FB_CONCATENATE(_, param)),                   \
 299       FB_STRINGIZE(name) "(" FB_STRINGIZE(param) ")",                   \
 300       unsigned,                                                         \
 301       iters) {                                                          \
 302     name(iters, param);                                                 \
 303   }
 304
 305 /**
 306  * Just like BENCHMARK, but prints the time relative to a
 307  * baseline. The baseline is the most recent BENCHMARK() seen in
 308  * lexical order. Example:
 309  *
 310  * // This is the baseline
 311  * BENCHMARK(insertVectorBegin, n) {
 312  *   vector<int> v;
 313  *   FOR_EACH_RANGE (i, 0, n) {
 314  *     v.insert(v.begin(), 42);
 315  *   }
 316  * }
 317  *
 318  * BENCHMARK_RELATIVE(insertListBegin, n) {
 319  *   list<int> s;
 320  *   FOR_EACH_RANGE (i, 0, n) {
 321  *     s.insert(s.begin(), 42);
 322  *   }
 323  * }
 324  *
 325  * Any number of relative benchmark can be associated with a
 326  * baseline. Another BENCHMARK() occurrence effectively establishes a
 327  * new baseline.
 328  */
 329 #define BENCHMARK_RELATIVE(name, ...)                           \
 330   BENCHMARK_IMPL(                                               \
 331     name,                                                       \
 332     "%" FB_STRINGIZE(name),                                     \
 333     FB_ONE_OR_NONE(unsigned, ## __VA_ARGS__),                   \
 334     __VA_ARGS__)
 335
 336 /**
 337  * A combination of BENCHMARK_RELATIVE and BENCHMARK_PARAM.
 338  */
 339 #define BENCHMARK_RELATIVE_PARAM(name, param)                           \
 340   BENCHMARK_IMPL(                                                       \
 341       FB_CONCATENATE(name, FB_CONCATENATE(_, param)),                   \
 342       "%" FB_STRINGIZE(name) "(" FB_STRINGIZE(param) ")",               \
 343       unsigned,                                                         \
 344       iters) {                                                          \
 345     name(iters, param);                                                 \
 346   }
 347
 348 /**
 349  * Draws a line of dashes.
 350  */
 351 #define BENCHMARK_DRAW_LINE()                                   \
 352   static bool FB_ANONYMOUS_VARIABLE(follyBenchmarkUnused) = (   \
 353     ::folly::addBenchmark(__FILE__, "-", []() { }),             \
 354     true);
 355
 356 /**
 357  * Allows execution of code that doesn't count torward the benchmark's
 358  * time budget. Example:
 359  *
 360  * BENCHMARK_START_GROUP(insertVectorBegin, n) {
 361  *   vector<int> v;
 362  *   BENCHMARK_SUSPEND {
 363  *     v.reserve(n);
 364  *   }
 365  *   FOR_EACH_RANGE (i, 0, n) {
 366  *     v.insert(v.begin(), 42);
 367  *   }
 368  * }
 369  */
 370 #define BENCHMARK_SUSPEND                               \
 371   if (auto FB_ANONYMOUS_VARIABLE(BENCHMARK_SUSPEND) =   \
 372       ::folly::BenchmarkSuspender()) {}                 \
 373   else
 374
 375 #endif // FOLLY_BENCHMARK_H_