folly/stats/TimeseriesHistogram.h

   1 /*
   2  * Copyright 2014 Facebook, Inc.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *   http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #ifndef FOLLY_TIMESERIES_HISTOGRAM_H_
  18 #define FOLLY_TIMESERIES_HISTOGRAM_H_
  19
  20 #include <string>
  21 #include <boost/static_assert.hpp>
  22 #include <folly/stats/Histogram.h>
  23 #include <folly/stats/MultiLevelTimeSeries.h>
  24
  25 namespace folly {
  26
  27 /*
  28  * TimeseriesHistogram tracks data distributions as they change over time.
  29  *
  30  * Specifically, it is a bucketed histogram with different value ranges assigned
  31  * to each bucket.  Within each bucket is a MultiLevelTimeSeries from
  32  * 'folly/stats/MultiLevelTimeSeries.h'. This means that each bucket contains a
  33  * different set of data for different historical time periods, and one can
  34  * query data distributions over different trailing time windows.
  35  *
  36  * For example, this can answer questions: "What is the data distribution over
  37  * the last minute? Over the last 10 minutes?  Since I last cleared this
  38  * histogram?"
  39  *
  40  * The class can also estimate percentiles and answer questions like: "What was
  41  * the 99th percentile data value over the last 10 minutes?"
  42  *
  43  * Note: that depending on the size of your buckets and the smoothness
  44  * of your data distribution, the estimate may be way off from the actual
  45  * value.  In particular, if the given percentile falls outside of the bucket
  46  * range (i.e. your buckets range in 0 - 100,000 but the 99th percentile is
  47  * around 115,000) this estimate may be very wrong.
  48  *
  49  * The memory usage for a typical histogram is roughly 3k * (# of buckets).  All
  50  * insertion operations are amortized O(1), and all queries are O(# of buckets).
  51  */
  52 template <class T, class TT=std::chrono::seconds,
  53           class C=folly::MultiLevelTimeSeries<T, TT>>
  54 class TimeseriesHistogram {
  55  private:
  56    // NOTE: T must be equivalent to _signed_ numeric type for our math.
  57    BOOST_STATIC_ASSERT(std::numeric_limits<T>::is_signed);
  58
  59  public:
  60   // values to be inserted into container
  61   typedef T ValueType;
  62   // the container type we use internally for each bucket
  63   typedef C ContainerType;
  64   // The time type.
  65   typedef TT TimeType;
  66
  67   /*
  68    * Create a TimeSeries histogram and initialize the bucketing and levels.
  69    *
  70    * The buckets are created by chopping the range [min, max) into pieces
  71    * of size bucketSize, with the last bucket being potentially shorter.  Two
  72    * additional buckets are always created -- the "under" bucket for the range
  73    * (-inf, min) and the "over" bucket for the range [max, +inf).
  74    *
  75    * @param bucketSize the width of each bucket
  76    * @param min the smallest value for the bucket range.
  77    * @param max the largest value for the bucket range
  78    * @param defaultContainer a pre-initialized timeseries with the desired
  79    *                         number of levels and their durations.
  80    */
  81   TimeseriesHistogram(ValueType bucketSize, ValueType min, ValueType max,
  82                       const ContainerType& defaultContainer);
  83
  84   /* Return the bucket size of each bucket in the histogram. */
  85   ValueType getBucketSize() const { return buckets_.getBucketSize(); }
  86
  87   /* Return the min value at which bucketing begins. */
  88   ValueType getMin() const { return buckets_.getMin(); }
  89
  90   /* Return the max value at which bucketing ends. */
  91   ValueType getMax() const { return buckets_.getMax(); }
  92
  93   /* Return the number of levels of the Timeseries object in each bucket */
  94   int getNumLevels() const {
  95     return buckets_.getByIndex(0).numLevels();
  96   }
  97
  98   /* Return the number of buckets */
  99   int getNumBuckets() const { return buckets_.getNumBuckets(); }
 100
 101   /* Return the bucket index into which the given value would fall. */
 102   int getBucketIdx(const ValueType& value) const;
 103
 104   /*
 105    * Return the threshold of the bucket for the given index in range
 106    * [0..numBuckets).  The bucket will have range [thresh, thresh + bucketSize)
 107    * or [thresh, max), whichever is shorter.
 108    */
 109   ValueType getBucketMin(int bucketIdx) const {
 110     return buckets_.getBucketMin(bucketIdx);
 111   }
 112
 113   /* Return the actual timeseries in the given bucket (for reading only!) */
 114   const ContainerType& getBucket(int bucketIdx) const {
 115     return buckets_.getByIndex(bucketIdx);
 116   }
 117
 118   /* Total count of values at the given timeseries level (all buckets). */
 119   int64_t count(int level) const {
 120     int64_t total = 0;
 121     for (int b = 0; b < buckets_.getNumBuckets(); ++b) {
 122       total += buckets_.getByIndex(b).count(level);
 123     }
 124     return total;
 125   }
 126
 127   /* Total count of values added during the given interval (all buckets). */
 128   int64_t count(TimeType start, TimeType end) const {
 129     int64_t total = 0;
 130     for (int b = 0; b < buckets_.getNumBuckets(); ++b) {
 131       total += buckets_.getByIndex(b).count(start, end);
 132     }
 133     return total;
 134   }
 135
 136   /* Total sum of values at the given timeseries level (all buckets). */
 137   ValueType sum(int level) const {
 138     ValueType total = ValueType();
 139     for (int b = 0; b < buckets_.getNumBuckets(); ++b) {
 140       total += buckets_.getByIndex(b).sum(level);
 141     }
 142     return total;
 143   }
 144
 145   /* Total sum of values added during the given interval (all buckets). */
 146   ValueType sum(TimeType start, TimeType end) const {
 147     ValueType total = ValueType();
 148     for (int b = 0; b < buckets_.getNumBuckets(); ++b) {
 149       total += buckets_.getByIndex(b).sum(start, end);
 150     }
 151     return total;
 152   }
 153
 154   /* Average of values at the given timeseries level (all buckets). */
 155   template <typename ReturnType=double>
 156   ReturnType avg(int level) const;
 157
 158   /* Average of values added during the given interval (all buckets). */
 159   template <typename ReturnType=double>
 160   ReturnType avg(TimeType start, TimeType end) const;
 161
 162   /*
 163    * Rate at the given timeseries level (all buckets).
 164    * This is the sum of all values divided by the time interval (in seconds).
 165    */
 166   ValueType rate(int level) const;
 167
 168   /*
 169    * Rate for the given interval (all buckets).
 170    * This is the sum of all values divided by the time interval (in seconds).
 171    */
 172   template <typename ReturnType=double>
 173   ReturnType rate(TimeType start, TimeType end) const;
 174
 175   /*
 176    * Update every underlying timeseries object with the given timestamp. You
 177    * must call this directly before querying to ensure that the data in all
 178    * buckets is decayed properly.
 179    */
 180   void update(TimeType now);
 181
 182   /* clear all the data from the histogram. */
 183   void clear();
 184
 185   /* Add a value into the histogram with timestamp 'now' */
 186   void addValue(TimeType now, const ValueType& value);
 187   /* Add a value the given number of times with timestamp 'now' */
 188   void addValue(TimeType now, const ValueType& value, int64_t times);
 189
 190   /*
 191    * Add all of the values from the specified histogram.
 192    *
 193    * All of the values will be added to the current time-slot.
 194    *
 195    * One use of this is for thread-local caching of frequently updated
 196    * histogram data.  For example, each thread can store a thread-local
 197    * Histogram that is updated frequently, and only add it to the global
 198    * TimeseriesHistogram once a second.
 199    */
 200   void addValues(TimeType now, const folly::Histogram<ValueType>& values);
 201
 202   /*
 203    * Return an estimate of the value at the given percentile in the histogram
 204    * in the given timeseries level.  The percentile is estimated as follows:
 205    *
 206    * - We retrieve a count of the values in each bucket (at the given level)
 207    * - We determine via the counts which bucket the given percentile falls in.
 208    * - We assume the average value in the bucket is also its median
 209    * - We then linearly interpolate within the bucket, by assuming that the
 210    *   distribution is uniform in the two value ranges [left, median) and
 211    *   [median, right) where [left, right) is the bucket value range.
 212    *
 213    * Caveats:
 214    * - If the histogram is empty, this always returns ValueType(), usually 0.
 215    * - For the 'under' and 'over' special buckets, their range is unbounded
 216    *   on one side.  In order for the interpolation to work, we assume that
 217    *   the average value in the bucket is equidistant from the two edges of
 218    *   the bucket.  In other words, we assume that the distance between the
 219    *   average and the known bound is equal to the distance between the average
 220    *   and the unknown bound.
 221    */
 222   ValueType getPercentileEstimate(double pct, int level) const;
 223   /*
 224    * Return an estimate of the value at the given percentile in the histogram
 225    * in the given historical interval.  Please see the documentation for
 226    * getPercentileEstimate(int pct, int level) for the explanation of the
 227    * estimation algorithm.
 228    */
 229   ValueType getPercentileEstimate(double pct, TimeType start, TimeType end)
 230     const;
 231
 232   /*
 233    * Return the bucket index that the given percentile falls into (in the
 234    * given timeseries level).  This index can then be used to retrieve either
 235    * the bucket threshold, or other data from inside the bucket.
 236    */
 237   int getPercentileBucketIdx(double pct, int level) const;
 238   /*
 239    * Return the bucket index that the given percentile falls into (in the
 240    * given historical interval).  This index can then be used to retrieve either
 241    * the bucket threshold, or other data from inside the bucket.
 242    */
 243   int getPercentileBucketIdx(double pct, TimeType start, TimeType end) const;
 244
 245   /* Get the bucket threshold for the bucket containing the given pct. */
 246   int getPercentileBucketMin(double pct, int level) const {
 247     return getBucketMin(getPercentileBucketIdx(pct, level));
 248   }
 249   /* Get the bucket threshold for the bucket containing the given pct. */
 250   int getPercentileBucketMin(double pct, TimeType start, TimeType end) const {
 251     return getBucketMin(getPercentileBucketIdx(pct, start, end));
 252   }
 253
 254   /*
 255    * Print out serialized data from all buckets at the given level.
 256    * Format is: BUCKET [',' BUCKET ...]
 257    * Where: BUCKET == bucketMin ':' count ':' avg
 258    */
 259   std::string getString(int level) const;
 260
 261   /*
 262    * Print out serialized data for all buckets in the historical interval.
 263    * For format, please see getString(int level).
 264    */
 265   std::string getString(TimeType start, TimeType end) const;
 266
 267  private:
 268   typedef ContainerType Bucket;
 269   struct CountFromLevel {
 270     explicit CountFromLevel(int level) : level_(level) {}
 271
 272     uint64_t operator()(const ContainerType& bucket) const {
 273       return bucket.count(level_);
 274     }
 275
 276    private:
 277     int level_;
 278   };
 279   struct CountFromInterval {
 280     explicit CountFromInterval(TimeType start, TimeType end)
 281       : start_(start),
 282         end_(end) {}
 283
 284     uint64_t operator()(const ContainerType& bucket) const {
 285       return bucket.count(start_, end_);
 286     }
 287
 288    private:
 289     TimeType start_;
 290     TimeType end_;
 291   };
 292
 293   struct AvgFromLevel {
 294     explicit AvgFromLevel(int level) : level_(level) {}
 295
 296     ValueType operator()(const ContainerType& bucket) const {
 297       return bucket.template avg<ValueType>(level_);
 298     }
 299
 300    private:
 301     int level_;
 302   };
 303
 304   template <typename ReturnType>
 305   struct AvgFromInterval {
 306     explicit AvgFromInterval(TimeType start, TimeType end)
 307       : start_(start),
 308         end_(end) {}
 309
 310     ReturnType operator()(const ContainerType& bucket) const {
 311       return bucket.template avg<ReturnType>(start_, end_);
 312     }
 313
 314    private:
 315     TimeType start_;
 316     TimeType end_;
 317   };
 318
 319   /*
 320    * Special logic for the case of only one unique value registered
 321    * (this can happen when clients don't pick good bucket ranges or have
 322    * other bugs).  It's a lot easier for clients to track down these issues
 323    * if they are getting the correct value.
 324    */
 325   void maybeHandleSingleUniqueValue(const ValueType& value);
 326
 327   folly::detail::HistogramBuckets<ValueType, ContainerType> buckets_;
 328   bool haveNotSeenValue_;
 329   bool singleUniqueValue_;
 330   ValueType firstValue_;
 331 };
 332
 333 }  // folly
 334
 335 #endif  // FOLLY_TIMESERIES_HISTOGRAM_H_