folly/stats/MultiLevelTimeSeries.h

   1 /*
   2  * Copyright 2016 Facebook, Inc.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *   http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #pragma once
  18
  19 #include <chrono>
  20 #include <stdexcept>
  21 #include <string>
  22 #include <vector>
  23
  24 #include <folly/String.h>
  25 #include <folly/stats/BucketedTimeSeries.h>
  26 #include <glog/logging.h>
  27
  28 namespace folly {
  29
  30 /*
  31  * This class represents a timeseries which keeps several levels of data
  32  * granularity (similar in principle to the loads reported by the UNIX
  33  * 'uptime' command).  It uses several instances (one per level) of
  34  * BucketedTimeSeries as the underlying storage.
  35  *
  36  * This can easily be used to track sums (and thus rates or averages) over
  37  * several predetermined time periods, as well as all-time sums.  For example,
  38  * you would use to it to track query rate or response speed over the last
  39  * 5, 15, 30, and 60 minutes.
  40  *
  41  * The MultiLevelTimeSeries takes a list of level durations as an input; the
  42  * durations must be strictly increasing.  Furthermore a special level can be
  43  * provided with a duration of '0' -- this will be an "all-time" level.  If
  44  * an all-time level is provided, it MUST be the last level present.
  45  *
  46  * The class assumes that time advances forward --  you can't retroactively add
  47  * values for events in the past -- the 'now' argument is provided for better
  48  * efficiency and ease of unittesting.
  49  *
  50  * The class is not thread-safe -- use your own synchronization!
  51  */
  52 template <typename VT, typename TT=std::chrono::seconds>
  53 class MultiLevelTimeSeries {
  54  public:
  55   typedef VT ValueType;
  56   typedef TT TimeType;
  57   typedef folly::BucketedTimeSeries<ValueType, TimeType> Level;
  58
  59   /*
  60    * Create a new MultiLevelTimeSeries.
  61    *
  62    * This creates a new MultiLevelTimeSeries that tracks time series data at the
  63    * specified time durations (level). The time series data tracked at each
  64    * level is then further divided by numBuckets for memory efficiency.
  65    *
  66    * The durations must be strictly increasing. Furthermore a special level can
  67    * be provided with a duration of '0' -- this will be an "all-time" level. If
  68    * an all-time level is provided, it MUST be the last level present.
  69    */
  70   MultiLevelTimeSeries(size_t numBuckets,
  71                        size_t numLevels,
  72                        const TimeType levelDurations[]);
  73
  74   MultiLevelTimeSeries(
  75       size_t numBuckets,
  76       std::initializer_list<TimeType> durations);
  77
  78   /*
  79    * Return the number of buckets used to track time series at each level.
  80    */
  81   size_t numBuckets() const {
  82     // The constructor ensures that levels_ has at least one item
  83     return levels_[0].numBuckets();
  84   }
  85
  86   /*
  87    * Return the number of levels tracked by MultiLevelTimeSeries.
  88    */
  89   size_t numLevels() const { return levels_.size(); }
  90
  91   /*
  92    * Get the BucketedTimeSeries backing the specified level.
  93    *
  94    * Note: you should generally call update() or flush() before accessing the
  95    * data. Otherwise you may be reading stale data if update() or flush() has
  96    * not been called recently.
  97    */
  98   const Level& getLevel(int level) const {
  99     CHECK(level >= 0);
 100     CHECK_LT(level, levels_.size());
 101     return levels_[level];
 102   }
 103
 104   /*
 105    * Get the highest granularity level that is still large enough to contain
 106    * data going back to the specified start time.
 107    *
 108    * Note: you should generally call update() or flush() before accessing the
 109    * data. Otherwise you may be reading stale data if update() or flush() has
 110    * not been called recently.
 111    */
 112   const Level& getLevel(TimeType start) const {
 113     for (const auto& level : levels_) {
 114       if (level.isAllTime()) {
 115         return level;
 116       }
 117       // Note that we use duration() here rather than elapsed().
 118       // If duration is large enough to contain the start time then this level
 119       // is good enough, even if elapsed() indicates that no data was recorded
 120       // before the specified start time.
 121       if (level.getLatestTime() - level.duration() <= start) {
 122         return level;
 123       }
 124     }
 125     // We should always have an all-time level, so this is never reached.
 126     LOG(FATAL) << "No level of timeseries covers internval"
 127                << " from " << start.count() << " to now";
 128     return levels_.back();
 129   }
 130
 131   /*
 132    * Get the BucketedTimeSeries backing the specified level.
 133    *
 134    * Note: you should generally call update() or flush() before accessing the
 135    * data. Otherwise you may be reading stale data if update() or flush() has
 136    * not been called recently.
 137    */
 138   const Level& getLevelByDuration(TimeType duration) const {
 139     // since the number of levels is expected to be small (less than 5 in most
 140     // cases), a simple linear scan would be efficient and is intentionally
 141     // chosen here over other alternatives for lookup.
 142     for (const auto& level : levels_) {
 143       if (level.duration() == duration) {
 144         return level;
 145       }
 146     }
 147     throw std::out_of_range(folly::to<std::string>(
 148         "No level of duration ", duration.count(), " found"));
 149   }
 150
 151   /*
 152    * Return the sum of all the data points currently tracked at this level.
 153    *
 154    * Note: you should generally call update() or flush() before accessing the
 155    * data. Otherwise you may be reading stale data if update() or flush() has
 156    * not been called recently.
 157    */
 158   ValueType sum(int level) const {
 159     return getLevel(level).sum();
 160   }
 161
 162   /*
 163    * Return the average (sum / count) of all the data points currently tracked
 164    * at this level.
 165    *
 166    * The return type may be specified to control whether floating-point or
 167    * integer division should be performed.
 168    *
 169    * Note: you should generally call update() or flush() before accessing the
 170    * data. Otherwise you may be reading stale data if update() or flush() has
 171    * not been called recently.
 172    */
 173   template <typename ReturnType=double>
 174   ReturnType avg(int level) const {
 175     return getLevel(level).template avg<ReturnType>();
 176   }
 177
 178   /*
 179    * Return the rate (sum divided by elaspsed time) of the all data points
 180    * currently tracked at this level.
 181    *
 182    * Note: you should generally call update() or flush() before accessing the
 183    * data. Otherwise you may be reading stale data if update() or flush() has
 184    * not been called recently.
 185    */
 186   template <typename ReturnType=double, typename Interval=TimeType>
 187   ReturnType rate(int level) const {
 188     return getLevel(level).template rate<ReturnType, Interval>();
 189   }
 190
 191   /*
 192    * Return the number of data points currently tracked at this level.
 193    *
 194    * Note: you should generally call update() or flush() before accessing the
 195    * data. Otherwise you may be reading stale data if update() or flush() has
 196    * not been called recently.
 197    */
 198   int64_t count(int level) const {
 199     return getLevel(level).count();
 200   }
 201
 202   /*
 203    * Return the count divided by the elapsed time tracked at this level.
 204    *
 205    * Note: you should generally call update() or flush() before accessing the
 206    * data. Otherwise you may be reading stale data if update() or flush() has
 207    * not been called recently.
 208    */
 209   template <typename ReturnType=double, typename Interval=TimeType>
 210   ReturnType countRate(int level) const {
 211     return getLevel(level).template countRate<ReturnType, Interval>();
 212   }
 213
 214   /*
 215    * Return the sum of all the data points currently tracked at this level.
 216    *
 217    * This method is identical to sum(int level) above but takes in the
 218    * duration that the user is interested in querying as the parameter.
 219    *
 220    * Note: you should generally call update() or flush() before accessing the
 221    * data. Otherwise you may be reading stale data if update() or flush() has
 222    * not been called recently.
 223    */
 224   ValueType sum(TimeType duration) const {
 225     return getLevelByDuration(duration).sum();
 226   }
 227
 228   /*
 229    * Return the average (sum / count) of all the data points currently tracked
 230    * at this level.
 231    *
 232    * This method is identical to avg(int level) above but takes in the
 233    * duration that the user is interested in querying as the parameter.
 234    *
 235    * Note: you should generally call update() or flush() before accessing the
 236    * data. Otherwise you may be reading stale data if update() or flush() has
 237    * not been called recently.
 238    */
 239   template <typename ReturnType = double>
 240   ReturnType avg(TimeType duration) const {
 241     return getLevelByDuration(duration).template avg<ReturnType>();
 242   }
 243
 244   /*
 245    * Return the rate (sum divided by elaspsed time) of the all data points
 246    * currently tracked at this level.
 247    *
 248    * This method is identical to rate(int level) above but takes in the
 249    * duration that the user is interested in querying as the parameter.
 250    *
 251    * Note: you should generally call update() or flush() before accessing the
 252    * data. Otherwise you may be reading stale data if update() or flush() has
 253    * not been called recently.
 254    */
 255   template <typename ReturnType = double, typename Interval = TimeType>
 256   ReturnType rate(TimeType duration) const {
 257     return getLevelByDuration(duration).template rate<ReturnType, Interval>();
 258   }
 259
 260   /*
 261    * Return the number of data points currently tracked at this level.
 262    *
 263    * This method is identical to count(int level) above but takes in the
 264    * duration that the user is interested in querying as the parameter.
 265    *
 266    * Note: you should generally call update() or flush() before accessing the
 267    * data. Otherwise you may be reading stale data if update() or flush() has
 268    * not been called recently.
 269    */
 270   int64_t count(TimeType duration) const {
 271     return getLevelByDuration(duration).count();
 272   }
 273
 274   /*
 275    * Return the count divided by the elapsed time tracked at this level.
 276    *
 277    * This method is identical to countRate(int level) above but takes in the
 278    * duration that the user is interested in querying as the parameter.
 279    *
 280    * Note: you should generally call update() or flush() before accessing the
 281    * data. Otherwise you may be reading stale data if update() or flush() has
 282    * not been called recently.
 283    */
 284   template <typename ReturnType = double, typename Interval = TimeType>
 285   ReturnType countRate(TimeType duration) const {
 286     return getLevelByDuration(duration)
 287         .template countRate<ReturnType, Interval>();
 288   }
 289
 290   /*
 291    * Estimate the sum of the data points that occurred in the specified time
 292    * period at this level.
 293    *
 294    * The range queried is [start, end).
 295    * That is, start is inclusive, and end is exclusive.
 296    *
 297    * Note that data outside of the timeseries duration will no longer be
 298    * available for use in the estimation.  Specifying a start time earlier than
 299    * getEarliestTime() will not have much effect, since only data points after
 300    * that point in time will be counted.
 301    *
 302    * Note that the value returned is an estimate, and may not be precise.
 303    *
 304    * Note: you should generally call update() or flush() before accessing the
 305    * data. Otherwise you may be reading stale data if update() or flush() has
 306    * not been called recently.
 307    */
 308   ValueType sum(TimeType start, TimeType end) const {
 309     return getLevel(start).sum(start, end);
 310   }
 311
 312   /*
 313    * Estimate the average value during the specified time period.
 314    *
 315    * The same caveats documented in the sum(TimeType start, TimeType end)
 316    * comments apply here as well.
 317    *
 318    * Note: you should generally call update() or flush() before accessing the
 319    * data. Otherwise you may be reading stale data if update() or flush() has
 320    * not been called recently.
 321    */
 322   template <typename ReturnType=double>
 323   ReturnType avg(TimeType start, TimeType end) const {
 324     return getLevel(start).template avg<ReturnType>(start, end);
 325   }
 326
 327   /*
 328    * Estimate the rate during the specified time period.
 329    *
 330    * The same caveats documented in the sum(TimeType start, TimeType end)
 331    * comments apply here as well.
 332    *
 333    * Note: you should generally call update() or flush() before accessing the
 334    * data. Otherwise you may be reading stale data if update() or flush() has
 335    * not been called recently.
 336    */
 337   template <typename ReturnType=double>
 338   ReturnType rate(TimeType start, TimeType end) const {
 339     return getLevel(start).template rate<ReturnType>(start, end);
 340   }
 341
 342   /*
 343    * Estimate the count during the specified time period.
 344    *
 345    * The same caveats documented in the sum(TimeType start, TimeType end)
 346    * comments apply here as well.
 347    *
 348    * Note: you should generally call update() or flush() before accessing the
 349    * data. Otherwise you may be reading stale data if update() or flush() has
 350    * not been called recently.
 351    */
 352   int64_t count(TimeType start, TimeType end) const {
 353     return getLevel(start).count(start, end);
 354   }
 355
 356   /*
 357    * Adds the value 'val' at time 'now' to all levels.
 358    *
 359    * Data points added at the same time point is cached internally here and not
 360    * propagated to the underlying levels until either flush() is called or when
 361    * update from a different time comes.
 362    *
 363    * This function expects time to always move forwards: it cannot be used to
 364    * add historical data points that have occurred in the past.  If now is
 365    * older than the another timestamp that has already been passed to
 366    * addValue() or update(), now will be ignored and the latest timestamp will
 367    * be used.
 368    */
 369   void addValue(TimeType now, const ValueType& val);
 370
 371   /*
 372    * Adds the value 'val' at time 'now' to all levels.
 373    */
 374   void addValue(TimeType now, const ValueType& val, int64_t times);
 375
 376   /*
 377    * Adds the value 'val' at time 'now' to all levels as the sum of 'nsamples'
 378    * samples.
 379    */
 380   void addValueAggregated(TimeType now, const ValueType& sum, int64_t nsamples);
 381
 382   /*
 383    * Update all the levels to the specified time, doing all the necessary
 384    * work to rotate the buckets and remove any stale data points.
 385    *
 386    * When reading data from the timeseries, you should make sure to manually
 387    * call update() before accessing the data. Otherwise you may be reading
 388    * stale data if update() has not been called recently.
 389    */
 390   void update(TimeType now);
 391
 392   /*
 393    * Reset all the timeseries to an empty state as if no data points have ever
 394    * been added to it.
 395    */
 396   void clear();
 397
 398   /*
 399    * Flush all cached updates.
 400    */
 401   void flush();
 402
 403  private:
 404   std::vector<Level> levels_;
 405
 406   // Updates within the same time interval are cached
 407   // They are flushed out when updates from a different time comes,
 408   // or flush() is called.
 409   TimeType cachedTime_;
 410   ValueType cachedSum_;
 411   int cachedCount_;
 412 };
 413
 414 } // folly