--- /dev/null
+/*
+ * Copyright 2013 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FOLLY_TIMESERIES_HISTOGRAM_DEF_H_
+#define FOLLY_TIMESERIES_HISTOGRAM_DEF_H_
+
+#include "folly/Conv.h"
+#include "folly/stats/Histogram-defs.h"
+#include "folly/stats/MultiLevelTimeSeries-defs.h"
+#include "folly/stats/BucketedTimeSeries-defs.h"
+
+namespace folly {
+
+template <class T, class TT, class C>
+template <typename ReturnType>
+ReturnType TimeseriesHistogram<T, TT, C>::avg(int level) const {
+ ValueType total = ValueType();
+ int64_t count = 0;
+ for (int b = 0; b < buckets_.getNumBuckets(); ++b) {
+ const auto& levelObj = buckets_.getByIndex(b).getLevel(level);
+ total += levelObj.sum();
+ count += levelObj.count();
+ }
+ return folly::detail::avgHelper<ReturnType>(total, count);
+}
+
+template <class T, class TT, class C>
+template <typename ReturnType>
+ReturnType TimeseriesHistogram<T, TT, C>::avg(TimeType start,
+ TimeType end) const {
+ ValueType total = ValueType();
+ int64_t count = 0;
+ for (int b = 0; b < buckets_.getNumBuckets(); ++b) {
+ const auto& levelObj = buckets_.getByIndex(b).getLevel(start, end);
+ total += levelObj.sum(start, end);
+ count += levelObj.count(start, end);
+ }
+ return folly::detail::avgHelper<ReturnType>(total, count);
+}
+
+template <class T, class TT, class C>
+template <typename ReturnType>
+ReturnType TimeseriesHistogram<T, TT, C>::rate(TimeType start,
+ TimeType end) const {
+ ValueType total = ValueType();
+ TimeType elapsed(0);
+ for (int b = 0; b < buckets_.getNumBuckets(); ++b) {
+ const auto& level = buckets_.getByIndex(b).getLevel(start);
+ total += level.sum(start, end);
+ elapsed = std::max(elapsed, level.elapsed(start, end));
+ }
+ return folly::detail::rateHelper<ReturnType, TimeType, TimeType>(
+ total, elapsed);
+}
+
+template <typename T, typename TT, typename C>
+TimeseriesHistogram<T, TT, C>::TimeseriesHistogram(ValueType bucketSize,
+ ValueType min,
+ ValueType max,
+ const ContainerType& copyMe)
+ : buckets_(bucketSize, min, max, copyMe),
+ haveNotSeenValue_(true),
+ singleUniqueValue_(false) {
+}
+
+template <typename T, typename TT, typename C>
+void TimeseriesHistogram<T, TT, C>::addValue(TimeType now,
+ const ValueType& value) {
+ buckets_.getByValue(value).addValue(now, value);
+ maybeHandleSingleUniqueValue(value);
+}
+
+template <typename T, typename TT, typename C>
+void TimeseriesHistogram<T, TT, C>::addValue(TimeType now,
+ const ValueType& value,
+ int64_t times) {
+ buckets_.getByValue(value).addValue(now, value, times);
+ maybeHandleSingleUniqueValue(value);
+}
+
+template <typename T, typename TT, typename C>
+void TimeseriesHistogram<T, TT, C>::addValues(
+ TimeType now, const folly::Histogram<ValueType>& hist) {
+ CHECK_EQ(hist.getMin(), getMin());
+ CHECK_EQ(hist.getMax(), getMax());
+ CHECK_EQ(hist.getBucketSize(), getBucketSize());
+ CHECK_EQ(hist.getNumBuckets(), getNumBuckets());
+
+ for (unsigned int n = 0; n < hist.getNumBuckets(); ++n) {
+ const typename folly::Histogram<ValueType>::Bucket& histBucket =
+ hist.getBucketByIndex(n);
+ Bucket& myBucket = buckets_.getByIndex(n);
+ myBucket.addValueAggregated(now, histBucket.sum, histBucket.count);
+ }
+
+ // We don't bother with the singleUniqueValue_ tracking.
+ haveNotSeenValue_ = false;
+ singleUniqueValue_ = false;
+}
+
+template <typename T, typename TT, typename C>
+void TimeseriesHistogram<T, TT, C>::maybeHandleSingleUniqueValue(
+ const ValueType& value) {
+ if (haveNotSeenValue_) {
+ firstValue_ = value;
+ singleUniqueValue_ = true;
+ haveNotSeenValue_ = false;
+ } else if (singleUniqueValue_) {
+ if (value != firstValue_) {
+ singleUniqueValue_ = false;
+ }
+ }
+}
+
+template <typename T, typename TT, typename C>
+T TimeseriesHistogram<T, TT, C>::getPercentileEstimate(double pct,
+ int level) const {
+ if (singleUniqueValue_) {
+ return firstValue_;
+ }
+
+ return buckets_.getPercentileEstimate(pct / 100.0, CountFromLevel(level),
+ AvgFromLevel(level));
+}
+
+template <typename T, typename TT, typename C>
+T TimeseriesHistogram<T, TT, C>::getPercentileEstimate(double pct,
+ TimeType start,
+ TimeType end) const {
+ if (singleUniqueValue_) {
+ return firstValue_;
+ }
+
+ return buckets_.getPercentileEstimate(pct / 100.0,
+ CountFromInterval(start, end),
+ AvgFromInterval<T>(start, end));
+}
+
+template <typename T, typename TT, typename C>
+int TimeseriesHistogram<T, TT, C>::getPercentileBucketIdx(
+ double pct,
+ int level
+) const {
+ return buckets_.getPercentileBucketIdx(pct / 100.0, CountFromLevel(level));
+}
+
+template <typename T, typename TT, typename C>
+int TimeseriesHistogram<T, TT, C>::getPercentileBucketIdx(double pct,
+ TimeType start,
+ TimeType end) const {
+ return buckets_.getPercentileBucketIdx(pct / 100.0,
+ CountFromInterval(start, end));
+}
+
+template <typename T, typename TT, typename C>
+T TimeseriesHistogram<T, TT, C>::rate(int level) const {
+ ValueType total = ValueType();
+ TimeType elapsed(0);
+ for (int b = 0; b < buckets_.getNumBuckets(); ++b) {
+ const auto& levelObj = buckets_.getByIndex(b).getLevel(level);
+ total += levelObj.sum();
+ elapsed = std::max(elapsed, levelObj.elapsed());
+ }
+ return elapsed == TimeType(0) ? 0 : (total / elapsed.count());
+}
+
+template <typename T, typename TT, typename C>
+void TimeseriesHistogram<T, TT, C>::clear() {
+ for (int i = 0; i < buckets_.getNumBuckets(); i++) {
+ buckets_.getByIndex(i).clear();
+ }
+}
+
+template <typename T, typename TT, typename C>
+void TimeseriesHistogram<T, TT, C>::update(TimeType now) {
+ for (int i = 0; i < buckets_.getNumBuckets(); i++) {
+ buckets_.getByIndex(i).update(now);
+ }
+}
+
+template <typename T, typename TT, typename C>
+std::string TimeseriesHistogram<T, TT, C>::getString(int level) const {
+ std::string result;
+
+ for (int i = 0; i < buckets_.getNumBuckets(); i++) {
+ if (i > 0) {
+ toAppend(",", &result);
+ }
+ const ContainerType& cont = buckets_.getByIndex(i);
+ toAppend(buckets_.getBucketMin(i),
+ ":", cont.count(level),
+ ":", cont.avg<ValueType>(level), &result);
+ }
+
+ return result;
+}
+
+template <typename T, typename TT, typename C>
+std::string TimeseriesHistogram<T, TT, C>::getString(TimeType start,
+ TimeType end) const {
+ std::string result;
+
+ for (int i = 0; i < buckets_.getNumBuckets(); i++) {
+ if (i > 0) {
+ toAppend(",", &result);
+ }
+ const ContainerType& cont = buckets_.getByIndex(i);
+ toAppend(buckets_.getBucketMin(i),
+ ":", cont.count(start, end),
+ ":", cont.avg(start, end), &result);
+ }
+
+ return result;
+}
+
+} // namespace folly
+
+#endif
--- /dev/null
+/*
+ * Copyright 2013 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FOLLY_TIMESERIES_HISTOGRAM_H_
+#define FOLLY_TIMESERIES_HISTOGRAM_H_
+
+#include <string>
+#include <boost/static_assert.hpp>
+#include "folly/stats/Histogram.h"
+#include "folly/stats/MultiLevelTimeSeries.h"
+
+namespace folly {
+
+/*
+ * TimeseriesHistogram tracks data distributions as they change over time.
+ *
+ * Specifically, it is a bucketed histogram with different value ranges assigned
+ * to each bucket. Within each bucket is a MultiLevelTimeSeries from
+ * 'folly/stats/MultiLevelTimeSeries.h'. This means that each bucket contains a
+ * different set of data for different historical time periods, and one can
+ * query data distributions over different trailing time windows.
+ *
+ * For example, this can answer questions: "What is the data distribution over
+ * the last minute? Over the last 10 minutes? Since I last cleared this
+ * histogram?"
+ *
+ * The class can also estimate percentiles and answer questions like: "What was
+ * the 99th percentile data value over the last 10 minutes?"
+ *
+ * Note: that depending on the size of your buckets and the smoothness
+ * of your data distribution, the estimate may be way off from the actual
+ * value. In particular, if the given percentile falls outside of the bucket
+ * range (i.e. your buckets range in 0 - 100,000 but the 99th percentile is
+ * around 115,000) this estimate may be very wrong.
+ *
+ * The memory usage for a typical histogram is roughly 3k * (# of buckets). All
+ * insertion operations are amortized O(1), and all queries are O(# of buckets).
+ */
+template <class T, class TT=std::chrono::seconds,
+ class C=folly::MultiLevelTimeSeries<T, TT>>
+class TimeseriesHistogram {
+ private:
+ // NOTE: T must be equivalent to _signed_ numeric type for our math.
+ BOOST_STATIC_ASSERT(std::numeric_limits<T>::is_signed);
+
+ public:
+ // values to be inserted into container
+ typedef T ValueType;
+ // the container type we use internally for each bucket
+ typedef C ContainerType;
+ // The time type.
+ typedef TT TimeType;
+
+ /*
+ * Create a TimeSeries histogram and initialize the bucketing and levels.
+ *
+ * The buckets are created by chopping the range [min, max) into pieces
+ * of size bucketSize, with the last bucket being potentially shorter. Two
+ * additional buckets are always created -- the "under" bucket for the range
+ * (-inf, min) and the "over" bucket for the range [max, +inf).
+ *
+ * @param bucketSize the width of each bucket
+ * @param min the smallest value for the bucket range.
+ * @param max the largest value for the bucket range
+ * @param defaultContainer a pre-initialized timeseries with the desired
+ * number of levels and their durations.
+ */
+ TimeseriesHistogram(ValueType bucketSize, ValueType min, ValueType max,
+ const ContainerType& defaultContainer);
+
+ /* Return the bucket size of each bucket in the histogram. */
+ ValueType getBucketSize() const { return buckets_.getBucketSize(); }
+
+ /* Return the min value at which bucketing begins. */
+ ValueType getMin() const { return buckets_.getMin(); }
+
+ /* Return the max value at which bucketing ends. */
+ ValueType getMax() const { return buckets_.getMax(); }
+
+ /* Return the number of levels of the Timeseries object in each bucket */
+ int getNumLevels() const {
+ return buckets_.getByIndex(0).numLevels();
+ }
+
+ /* Return the number of buckets */
+ int getNumBuckets() const { return buckets_.getNumBuckets(); }
+
+ /* Return the bucket index into which the given value would fall. */
+ int getBucketIdx(const ValueType& value) const;
+
+ /*
+ * Return the threshold of the bucket for the given index in range
+ * [0..numBuckets). The bucket will have range [thresh, thresh + bucketSize)
+ * or [thresh, max), whichever is shorter.
+ */
+ ValueType getBucketMin(int bucketIdx) const {
+ return buckets_.getBucketMin(bucketIdx);
+ }
+
+ /* Return the actual timeseries in the given bucket (for reading only!) */
+ const ContainerType& getBucket(int bucketIdx) const {
+ return buckets_.getByIndex(bucketIdx);
+ }
+
+ /* Total count of values at the given timeseries level (all buckets). */
+ int64_t count(int level) const {
+ int64_t total = 0;
+ for (int b = 0; b < buckets_.getNumBuckets(); ++b) {
+ total += buckets_.getByIndex(b).count(level);
+ }
+ return total;
+ }
+
+ /* Total count of values added during the given interval (all buckets). */
+ int64_t count(TimeType start, TimeType end) const {
+ int64_t total = 0;
+ for (int b = 0; b < buckets_.getNumBuckets(); ++b) {
+ total += buckets_.getByIndex(b).count(start, end);
+ }
+ return total;
+ }
+
+ /* Total sum of values at the given timeseries level (all buckets). */
+ ValueType sum(int level) const {
+ ValueType total = ValueType();
+ for (int b = 0; b < buckets_.getNumBuckets(); ++b) {
+ total += buckets_.getByIndex(b).sum(level);
+ }
+ return total;
+ }
+
+ /* Total sum of values added during the given interval (all buckets). */
+ ValueType sum(TimeType start, TimeType end) const {
+ ValueType total = ValueType();
+ for (int b = 0; b < buckets_.getNumBuckets(); ++b) {
+ total += buckets_.getByIndex(b).sum(start, end);
+ }
+ return total;
+ }
+
+ /* Average of values at the given timeseries level (all buckets). */
+ template <typename ReturnType=double>
+ ReturnType avg(int level) const;
+
+ /* Average of values added during the given interval (all buckets). */
+ template <typename ReturnType=double>
+ ReturnType avg(TimeType start, TimeType end) const;
+
+ /*
+ * Rate at the given timeseries level (all buckets).
+ * This is the sum of all values divided by the time interval (in seconds).
+ */
+ ValueType rate(int level) const;
+
+ /*
+ * Rate for the given interval (all buckets).
+ * This is the sum of all values divided by the time interval (in seconds).
+ */
+ template <typename ReturnType=double>
+ ReturnType rate(TimeType start, TimeType end) const;
+
+ /*
+ * Update every underlying timeseries object with the given timestamp. You
+ * must call this directly before querying to ensure that the data in all
+ * buckets is decayed properly.
+ */
+ void update(TimeType now);
+
+ /* clear all the data from the histogram. */
+ void clear();
+
+ /* Add a value into the histogram with timestamp 'now' */
+ void addValue(TimeType now, const ValueType& value);
+ /* Add a value the given number of times with timestamp 'now' */
+ void addValue(TimeType now, const ValueType& value, int64_t times);
+
+ /*
+ * Add all of the values from the specified histogram.
+ *
+ * All of the values will be added to the current time-slot.
+ *
+ * One use of this is for thread-local caching of frequently updated
+ * histogram data. For example, each thread can store a thread-local
+ * Histogram that is updated frequently, and only add it to the global
+ * TimeseriesHistogram once a second.
+ */
+ void addValues(TimeType now, const folly::Histogram<ValueType>& values);
+
+ /*
+ * Return an estimate of the value at the given percentile in the histogram
+ * in the given timeseries level. The percentile is estimated as follows:
+ *
+ * - We retrieve a count of the values in each bucket (at the given level)
+ * - We determine via the counts which bucket the given percentile falls in.
+ * - We assume the average value in the bucket is also its median
+ * - We then linearly interpolate within the bucket, by assuming that the
+ * distribution is uniform in the two value ranges [left, median) and
+ * [median, right) where [left, right) is the bucket value range.
+ *
+ * Caveats:
+ * - If the histogram is empty, this always returns ValueType(), usually 0.
+ * - For the 'under' and 'over' special buckets, their range is unbounded
+ * on one side. In order for the interpolation to work, we assume that
+ * the average value in the bucket is equidistant from the two edges of
+ * the bucket. In other words, we assume that the distance between the
+ * average and the known bound is equal to the distance between the average
+ * and the unknown bound.
+ */
+ ValueType getPercentileEstimate(double pct, int level) const;
+ /*
+ * Return an estimate of the value at the given percentile in the histogram
+ * in the given historical interval. Please see the documentation for
+ * getPercentileEstimate(int pct, int level) for the explanation of the
+ * estimation algorithm.
+ */
+ ValueType getPercentileEstimate(double pct, TimeType start, TimeType end)
+ const;
+
+ /*
+ * Return the bucket index that the given percentile falls into (in the
+ * given timeseries level). This index can then be used to retrieve either
+ * the bucket threshold, or other data from inside the bucket.
+ */
+ int getPercentileBucketIdx(double pct, int level) const;
+ /*
+ * Return the bucket index that the given percentile falls into (in the
+ * given historical interval). This index can then be used to retrieve either
+ * the bucket threshold, or other data from inside the bucket.
+ */
+ int getPercentileBucketIdx(double pct, TimeType start, TimeType end) const;
+
+ /* Get the bucket threshold for the bucket containing the given pct. */
+ int getPercentileBucketMin(double pct, int level) const {
+ return getBucketMin(getPercentileBucketIdx(pct, level));
+ }
+ /* Get the bucket threshold for the bucket containing the given pct. */
+ int getPercentileBucketMin(double pct, TimeType start, TimeType end) const {
+ return getBucketMin(getPercentileBucketIdx(pct, start, end));
+ }
+
+ /*
+ * Print out serialized data from all buckets at the given level.
+ * Format is: BUCKET [',' BUCKET ...]
+ * Where: BUCKET == bucketMin ':' count ':' avg
+ */
+ std::string getString(int level) const;
+
+ /*
+ * Print out serialized data for all buckets in the historical interval.
+ * For format, please see getString(int level).
+ */
+ std::string getString(TimeType start, TimeType end) const;
+
+ private:
+ typedef ContainerType Bucket;
+ struct CountFromLevel {
+ explicit CountFromLevel(int level) : level_(level) {}
+
+ uint64_t operator()(const ContainerType& bucket) const {
+ return bucket.count(level_);
+ }
+
+ private:
+ int level_;
+ };
+ struct CountFromInterval {
+ explicit CountFromInterval(TimeType start, TimeType end)
+ : start_(start),
+ end_(end) {}
+
+ uint64_t operator()(const ContainerType& bucket) const {
+ return bucket.count(start_, end_);
+ }
+
+ private:
+ TimeType start_;
+ TimeType end_;
+ };
+
+ struct AvgFromLevel {
+ explicit AvgFromLevel(int level) : level_(level) {}
+
+ ValueType operator()(const ContainerType& bucket) const {
+ return bucket.template avg<ValueType>(level_);
+ }
+
+ private:
+ int level_;
+ };
+
+ template <typename ReturnType>
+ struct AvgFromInterval {
+ explicit AvgFromInterval(TimeType start, TimeType end)
+ : start_(start),
+ end_(end) {}
+
+ ReturnType operator()(const ContainerType& bucket) const {
+ return bucket.template avg<ReturnType>(start_, end_);
+ }
+
+ private:
+ TimeType start_;
+ TimeType end_;
+ };
+
+ /*
+ * Special logic for the case of only one unique value registered
+ * (this can happen when clients don't pick good bucket ranges or have
+ * other bugs). It's a lot easier for clients to track down these issues
+ * if they are getting the correct value.
+ */
+ void maybeHandleSingleUniqueValue(const ValueType& value);
+
+ folly::detail::HistogramBuckets<ValueType, ContainerType> buckets_;
+ bool haveNotSeenValue_;
+ bool singleUniqueValue_;
+ ValueType firstValue_;
+};
+
+} // folly
+
+#endif // FOLLY_TIMESERIES_HISTOGRAM_H_
--- /dev/null
+/*
+ * Copyright 2013 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "folly/stats/TimeseriesHistogram.h"
+#include "folly/stats/TimeseriesHistogram-defs.h"
+
+#include <gtest/gtest.h>
+
+using namespace std;
+using namespace folly;
+using std::chrono::seconds;
+
+namespace IntMTMHTS {
+ enum Levels {
+ MINUTE,
+ TEN_MINUTE,
+ HOUR,
+ ALLTIME,
+ NUM_LEVELS,
+ };
+
+ const seconds kDurations[] = {
+ seconds(60), seconds(600), seconds(3600), seconds(0)
+ };
+};
+
+namespace IntMHTS {
+ enum Levels {
+ MINUTE,
+ HOUR,
+ ALLTIME,
+ NUM_LEVELS,
+ };
+
+ const seconds kDurations[] = {
+ seconds(60), seconds(3600), seconds(0)
+ };
+};
+
+typedef std::mt19937 RandomInt32;
+
+TEST(TimeseriesHistogram, Percentile) {
+ RandomInt32 random(5);
+ // [10, 109], 12 buckets including above and below
+ {
+ TimeseriesHistogram<int> h(10, 10, 110,
+ MultiLevelTimeSeries<int>(
+ 60, IntMTMHTS::NUM_LEVELS,
+ IntMTMHTS::kDurations));
+
+ EXPECT_EQ(0, h.getPercentileEstimate(0, IntMTMHTS::ALLTIME));
+
+ EXPECT_EQ(12, h.getNumBuckets());
+ EXPECT_EQ(10, h.getBucketSize());
+ EXPECT_EQ(10, h.getMin());
+ EXPECT_EQ(110, h.getMax());
+
+ for (int i = 0; i < h.getNumBuckets(); ++i) {
+ EXPECT_EQ(4, h.getBucket(i).numLevels());
+ }
+
+ int maxVal = 120;
+ h.addValue(seconds(0), 0);
+ h.addValue(seconds(0), maxVal);
+ for (int i = 0; i < 98; i++) {
+ h.addValue(seconds(0), random() % maxVal);
+ }
+
+ h.update(std::chrono::duration_cast<std::chrono::seconds>(
+ std::chrono::system_clock::now().time_since_epoch()));
+ // bucket 0 stores everything below min, so its minimum
+ // is the lowest possible number
+ EXPECT_EQ(std::numeric_limits<int>::min(),
+ h.getPercentileBucketMin(1, IntMTMHTS::ALLTIME));
+ EXPECT_EQ(110, h.getPercentileBucketMin(99, IntMTMHTS::ALLTIME));
+
+ EXPECT_EQ(-2, h.getPercentileEstimate(0, IntMTMHTS::ALLTIME));
+ EXPECT_EQ(-1, h.getPercentileEstimate(1, IntMTMHTS::ALLTIME));
+ EXPECT_EQ(119, h.getPercentileEstimate(99, IntMTMHTS::ALLTIME));
+ EXPECT_EQ(120, h.getPercentileEstimate(100, IntMTMHTS::ALLTIME));
+ }
+}
+
+TEST(TimeseriesHistogram, String) {
+ RandomInt32 random(5);
+ // [10, 109], 12 buckets including above and below
+ {
+ TimeseriesHistogram<int> hist(10, 10, 110,
+ MultiLevelTimeSeries<int>(
+ 60, IntMTMHTS::NUM_LEVELS,
+ IntMTMHTS::kDurations));
+
+ int maxVal = 120;
+ hist.addValue(seconds(0), 0);
+ hist.addValue(seconds(0), maxVal);
+ for (int i = 0; i < 98; i++) {
+ hist.addValue(seconds(0), random() % maxVal);
+ }
+
+ hist.update(seconds(0));
+
+ const char* const kStringValues1[IntMTMHTS::NUM_LEVELS] = {
+ "-2147483648:12:4,10:8:13,20:8:24,30:6:34,40:13:46,50:8:54,60:7:64,"
+ "70:7:74,80:8:84,90:10:94,100:3:103,110:10:115",
+ "-2147483648:12:4,10:8:13,20:8:24,30:6:34,40:13:46,50:8:54,60:7:64,"
+ "70:7:74,80:8:84,90:10:94,100:3:103,110:10:115",
+ "-2147483648:12:4,10:8:13,20:8:24,30:6:34,40:13:46,50:8:54,60:7:64,"
+ "70:7:74,80:8:84,90:10:94,100:3:103,110:10:115",
+ "-2147483648:12:4,10:8:13,20:8:24,30:6:34,40:13:46,50:8:54,60:7:64,"
+ "70:7:74,80:8:84,90:10:94,100:3:103,110:10:115",
+ };
+
+ CHECK_EQ(IntMTMHTS::NUM_LEVELS, hist.getNumLevels());
+
+ for (int level = 0; level < hist.getNumLevels(); ++level) {
+ EXPECT_EQ(kStringValues1[level], hist.getString(level));
+ }
+
+ const char* const kStringValues2[IntMTMHTS::NUM_LEVELS] = {
+ "-2147483648:12:4,10:8:13,20:8:24,30:6:34,40:13:46,50:8:54,60:7:64,"
+ "70:7:74,80:8:84,90:10:94,100:3:103,110:10:115",
+ "-2147483648:12:4,10:8:13,20:8:24,30:6:34,40:13:46,50:8:54,60:7:64,"
+ "70:7:74,80:8:84,90:10:94,100:3:103,110:10:115",
+ "-2147483648:12:4,10:8:13,20:8:24,30:6:34,40:13:46,50:8:54,60:7:64,"
+ "70:7:74,80:8:84,90:10:94,100:3:103,110:10:115",
+ "-2147483648:12:4,10:8:13,20:8:24,30:6:34,40:13:46,50:8:54,60:7:64,"
+ "70:7:74,80:8:84,90:10:94,100:3:103,110:10:115",
+ };
+
+ CHECK_EQ(IntMTMHTS::NUM_LEVELS, hist.getNumLevels());
+
+ for (int level = 0; level < hist.getNumLevels(); ++level) {
+ EXPECT_EQ(kStringValues2[level], hist.getString(level));
+ }
+ }
+}
+
+TEST(TimeseriesHistogram, Clear) {
+ {
+ TimeseriesHistogram<int> hist(10, 0, 100,
+ MultiLevelTimeSeries<int>(
+ 60, IntMTMHTS::NUM_LEVELS,
+ IntMTMHTS::kDurations));
+
+ for (int now = 0; now < 3600; now++) {
+ for (int i = 0; i < 100; i++) {
+ hist.addValue(seconds(now), i, 2); // adds each item 2 times
+ }
+ }
+
+ // check clearing
+ hist.clear();
+
+ for (int b = 0; b < hist.getNumBuckets(); ++b) {
+ EXPECT_EQ(0, hist.getBucket(b).count(IntMTMHTS::MINUTE));
+ EXPECT_EQ(0, hist.getBucket(b).count(IntMTMHTS::TEN_MINUTE));
+ EXPECT_EQ(0, hist.getBucket(b).count(IntMTMHTS::HOUR));
+ EXPECT_EQ(0, hist.getBucket(b).count(IntMTMHTS::ALLTIME));
+ }
+
+ for (int pct = 0; pct <= 100; pct++) {
+ EXPECT_EQ(0, hist.getPercentileBucketMin(pct, IntMTMHTS::MINUTE));
+ EXPECT_EQ(0, hist.getPercentileBucketMin(pct, IntMTMHTS::TEN_MINUTE));
+ EXPECT_EQ(0, hist.getPercentileBucketMin(pct, IntMTMHTS::HOUR));
+ EXPECT_EQ(0, hist.getPercentileBucketMin(pct, IntMTMHTS::ALLTIME));
+
+ EXPECT_EQ(0, hist.getPercentileEstimate(pct, IntMTMHTS::MINUTE));
+ EXPECT_EQ(0, hist.getPercentileEstimate(pct, IntMTMHTS::TEN_MINUTE));
+ EXPECT_EQ(0, hist.getPercentileEstimate(pct, IntMTMHTS::HOUR));
+ EXPECT_EQ(0, hist.getPercentileEstimate(pct, IntMTMHTS::ALLTIME));
+ }
+ }
+}
+
+
+TEST(TimeseriesHistogram, Basic) {
+ {
+ TimeseriesHistogram<int> hist(10, 0, 100,
+ MultiLevelTimeSeries<int>(
+ 60, IntMTMHTS::NUM_LEVELS,
+ IntMTMHTS::kDurations));
+
+ for (int now = 0; now < 3600; now++) {
+ for (int i = 0; i < 100; i++) {
+ hist.addValue(seconds(now), i);
+ }
+ }
+
+ hist.update(seconds(3599));
+ for (int pct = 1; pct <= 100; pct++) {
+ int expected = (pct - 1) / 10 * 10;
+ EXPECT_EQ(expected, hist.getPercentileBucketMin(pct, IntMTMHTS::MINUTE));
+ EXPECT_EQ(expected, hist.getPercentileBucketMin(pct,
+ IntMTMHTS::TEN_MINUTE));
+ EXPECT_EQ(expected, hist.getPercentileBucketMin(pct, IntMTMHTS::HOUR));
+ EXPECT_EQ(expected, hist.getPercentileBucketMin(pct, IntMTMHTS::ALLTIME));
+ }
+
+ for (int b = 1; (b + 1) < hist.getNumBuckets(); ++b) {
+ EXPECT_EQ(600, hist.getBucket(b).count(IntMTMHTS::MINUTE));
+ EXPECT_EQ(6000, hist.getBucket(b).count(IntMTMHTS::TEN_MINUTE));
+ EXPECT_EQ(36000, hist.getBucket(b).count(IntMTMHTS::HOUR));
+ EXPECT_EQ(36000, hist.getBucket(b).count(IntMTMHTS::ALLTIME));
+ }
+ EXPECT_EQ(0, hist.getBucket(0).count(IntMTMHTS::MINUTE));
+ EXPECT_EQ(0, hist.getBucket(hist.getNumBuckets() - 1).count(
+ IntMTMHTS::MINUTE));
+ }
+
+ // -----------------
+
+ {
+ TimeseriesHistogram<int> hist(10, 0, 100,
+ MultiLevelTimeSeries<int>(
+ 60, IntMTMHTS::NUM_LEVELS,
+ IntMTMHTS::kDurations));
+
+ for (int now = 0; now < 3600; now++) {
+ for (int i = 0; i < 100; i++) {
+ hist.addValue(seconds(now), i, 2); // adds each item 2 times
+ }
+ }
+
+ hist.update(seconds(3599));
+ for (int pct = 1; pct <= 100; pct++) {
+ int expected = (pct - 1) / 10 * 10;
+ EXPECT_EQ(expected, hist.getPercentileBucketMin(pct, IntMTMHTS::MINUTE));
+ EXPECT_EQ(expected, hist.getPercentileBucketMin(pct,
+ IntMTMHTS::TEN_MINUTE));
+ EXPECT_EQ(expected, hist.getPercentileBucketMin(pct, IntMTMHTS::HOUR));
+ EXPECT_EQ(expected, hist.getPercentileBucketMin(pct, IntMTMHTS::ALLTIME));
+ }
+
+ for (int b = 1; (b + 1) < hist.getNumBuckets(); ++b) {
+ EXPECT_EQ(600 * 2, hist.getBucket(b).count(IntMTMHTS::MINUTE));
+ EXPECT_EQ(6000 * 2, hist.getBucket(b).count(IntMTMHTS::TEN_MINUTE));
+ EXPECT_EQ(36000 * 2, hist.getBucket(b).count(IntMTMHTS::HOUR));
+ EXPECT_EQ(36000 * 2, hist.getBucket(b).count(IntMTMHTS::ALLTIME));
+ }
+ EXPECT_EQ(0, hist.getBucket(0).count(IntMTMHTS::MINUTE));
+ EXPECT_EQ(0, hist.getBucket(hist.getNumBuckets() - 1).count(
+ IntMTMHTS::MINUTE));
+ }
+
+ // -----------------
+
+ {
+ TimeseriesHistogram<int> hist(10, 0, 100,
+ MultiLevelTimeSeries<int>(
+ 60, IntMTMHTS::NUM_LEVELS,
+ IntMTMHTS::kDurations));
+
+ for (int now = 0; now < 3600; now++) {
+ for (int i = 0; i < 50; i++) {
+ hist.addValue(seconds(now), i * 2, 2); // adds each item 2 times
+ }
+ }
+
+ hist.update(seconds(3599));
+ for (int pct = 1; pct <= 100; pct++) {
+ int expected = (pct - 1) / 10 * 10;
+ EXPECT_EQ(expected, hist.getPercentileBucketMin(pct, IntMTMHTS::MINUTE));
+ EXPECT_EQ(expected, hist.getPercentileBucketMin(pct,
+ IntMTMHTS::TEN_MINUTE));
+ EXPECT_EQ(expected, hist.getPercentileBucketMin(pct, IntMTMHTS::HOUR));
+ EXPECT_EQ(expected, hist.getPercentileBucketMin(pct, IntMTMHTS::ALLTIME));
+ }
+
+ EXPECT_EQ(0, hist.getBucket(0).count(IntMTMHTS::MINUTE));
+ EXPECT_EQ(0, hist.getBucket(0).count(IntMTMHTS::TEN_MINUTE));
+ EXPECT_EQ(0, hist.getBucket(0).count(IntMTMHTS::HOUR));
+ EXPECT_EQ(0, hist.getBucket(0).count(IntMTMHTS::ALLTIME));
+ EXPECT_EQ(0, hist.getBucket(hist.getNumBuckets() - 1).count(
+ IntMTMHTS::MINUTE));
+ EXPECT_EQ(0,
+ hist.getBucket(hist.getNumBuckets() - 1).
+ count(IntMTMHTS::TEN_MINUTE));
+ EXPECT_EQ(0, hist.getBucket(hist.getNumBuckets() - 1).count(
+ IntMTMHTS::HOUR));
+ EXPECT_EQ(0,
+ hist.getBucket(hist.getNumBuckets() - 1).count(
+ IntMTMHTS::ALLTIME));
+
+ for (int b = 1; (b + 1) < hist.getNumBuckets(); ++b) {
+ EXPECT_EQ(600, hist.getBucket(b).count(IntMTMHTS::MINUTE));
+ EXPECT_EQ(6000, hist.getBucket(b).count(IntMTMHTS::TEN_MINUTE));
+ EXPECT_EQ(36000, hist.getBucket(b).count(IntMTMHTS::HOUR));
+ EXPECT_EQ(36000, hist.getBucket(b).count(IntMTMHTS::ALLTIME));
+ }
+
+ for (int i = 0; i < 100; ++i) {
+ hist.addValue(seconds(3599), 200 + i);
+ }
+ hist.update(seconds(3599));
+ EXPECT_EQ(100,
+ hist.getBucket(hist.getNumBuckets() - 1).count(
+ IntMTMHTS::ALLTIME));
+
+ }
+}
+
+TEST(TimeseriesHistogram, QueryByInterval) {
+ TimeseriesHistogram<int> mhts(8, 8, 120,
+ MultiLevelTimeSeries<int>(
+ 60, IntMHTS::NUM_LEVELS,
+ IntMHTS::kDurations));
+
+ mhts.update(seconds(0));
+
+ int curTime;
+ for (curTime = 0; curTime < 7200; curTime++) {
+ mhts.addValue(seconds(curTime), 1);
+ }
+ for (curTime = 7200; curTime < 7200 + 3540; curTime++) {
+ mhts.addValue(seconds(curTime), 10);
+ }
+ for (curTime = 7200 + 3540; curTime < 7200 + 3600; curTime++) {
+ mhts.addValue(seconds(curTime), 100);
+ }
+
+ mhts.update(seconds(7200 + 3600 - 1));
+
+ struct TimeInterval {
+ TimeInterval(int s, int e)
+ : start(s), end(e) {}
+
+ std::chrono::seconds start;
+ std::chrono::seconds end;
+ };
+ TimeInterval intervals[12] = {
+ { curTime - 60, curTime },
+ { curTime - 3600, curTime },
+ { curTime - 7200, curTime },
+ { curTime - 3600, curTime - 60 },
+ { curTime - 7200, curTime - 60 },
+ { curTime - 7200, curTime - 3600 },
+ { curTime - 50, curTime - 20 },
+ { curTime - 3020, curTime - 20 },
+ { curTime - 7200, curTime - 20 },
+ { curTime - 3000, curTime - 1000 },
+ { curTime - 7200, curTime - 1000 },
+ { curTime - 7200, curTime - 3600 },
+ };
+
+ int expectedSums[12] = {
+ 6000, 41400, 32400, 35400, 32129, 16200, 3000, 33600, 32308, 20000, 27899,
+ 16200
+ };
+
+ int expectedCounts[12] = {
+ 60, 3600, 7200, 3540, 7139, 3600, 30, 3000, 7178, 2000, 6199, 3600
+ };
+
+ // The first 7200 values added all fell below the histogram minimum,
+ // and went into the bucket that tracks all of the too-small values.
+ // This bucket reports a minimum value of the smallest possible integer.
+ int belowMinBucket = std::numeric_limits<int>::min();
+
+ int expectedValues[12][3] = {
+ {96, 96, 96},
+ { 8, 8, 96},
+ { belowMinBucket, belowMinBucket, 8}, // alltime
+ { 8, 8, 8},
+ { belowMinBucket, belowMinBucket, 8}, // alltime
+ { belowMinBucket, belowMinBucket, 8}, // alltime
+ {96, 96, 96},
+ { 8, 8, 96},
+ { belowMinBucket, belowMinBucket, 8}, // alltime
+ { 8, 8, 8},
+ { belowMinBucket, belowMinBucket, 8}, // alltime
+ { belowMinBucket, belowMinBucket, 8} // alltime
+ };
+
+ for (int i = 0; i < 12; i++) {
+ const auto& itv = intervals[i];
+ int s = mhts.sum(itv.start, itv.end);
+ EXPECT_EQ(expectedSums[i], s);
+
+ int c = mhts.count(itv.start, itv.end);
+ EXPECT_EQ(expectedCounts[i], c);
+ }
+
+ // 3 levels
+ for (int i = 1; i <= 100; i++) {
+ EXPECT_EQ(96, mhts.getPercentileBucketMin(i, 0));
+ EXPECT_EQ(96, mhts.getPercentileBucketMin(i, seconds(curTime - 60),
+ seconds(curTime)));
+ EXPECT_EQ(8, mhts.getPercentileBucketMin(i, seconds(curTime - 3540),
+ seconds(curTime - 60)));
+ }
+
+ EXPECT_EQ(8, mhts.getPercentileBucketMin(1, 1));
+ EXPECT_EQ(8, mhts.getPercentileBucketMin(98, 1));
+ EXPECT_EQ(96, mhts.getPercentileBucketMin(99, 1));
+ EXPECT_EQ(96, mhts.getPercentileBucketMin(100, 1));
+
+ EXPECT_EQ(belowMinBucket, mhts.getPercentileBucketMin(1, 2));
+ EXPECT_EQ(belowMinBucket, mhts.getPercentileBucketMin(66, 2));
+ EXPECT_EQ(8, mhts.getPercentileBucketMin(67, 2));
+ EXPECT_EQ(8, mhts.getPercentileBucketMin(99, 2));
+ EXPECT_EQ(96, mhts.getPercentileBucketMin(100, 2));
+
+ // 0 is currently the value for bucket 0 (below min)
+ for (int i = 0; i < 12; i++) {
+ const auto& itv = intervals[i];
+ int v = mhts.getPercentileBucketMin(1, itv.start, itv.end);
+ EXPECT_EQ(expectedValues[i][0], v);
+
+ v = mhts.getPercentileBucketMin(50, itv.start, itv.end);
+ EXPECT_EQ(expectedValues[i][1], v);
+
+ v = mhts.getPercentileBucketMin(99, itv.start, itv.end);
+ EXPECT_EQ(expectedValues[i][2], v);
+ }
+
+ for (int i = 0; i < 12; i++) {
+ const auto& itv = intervals[i];
+ int c = mhts.count(itv.start, itv.end);
+ // Some of the older intervals that fall in the alltime bucket
+ // are off by 1 or 2 in their estimated counts.
+ size_t tolerance = 0;
+ if (itv.start <= seconds(curTime - 7200)) {
+ tolerance = 2;
+ } else if (itv.start <= seconds(curTime - 3000)) {
+ tolerance = 1;
+ }
+ size_t actualCount = (itv.end - itv.start).count();
+ size_t estimatedCount = mhts.count(itv.start, itv.end);
+ EXPECT_GE(actualCount, estimatedCount);
+ EXPECT_LE(actualCount - tolerance, estimatedCount);
+ }
+}
+
+TEST(TimeseriesHistogram, SingleUniqueValue) {
+ int values[] = {-1, 0, 500, 1000, 1500};
+ for (int ii = 0; ii < 5; ++ii) {
+ int value = values[ii];
+ TimeseriesHistogram<int> h(10, 0, 1000,
+ MultiLevelTimeSeries<int>(
+ 60, IntMTMHTS::NUM_LEVELS,
+ IntMTMHTS::kDurations));
+
+ const int kNumIters = 1000;
+ for (int jj = 0; jj < kNumIters; ++jj) {
+ h.addValue(seconds(time(nullptr)), value);
+ }
+ h.update(seconds(time(nullptr)));
+ // since we've only added one unique value, all percentiles should
+ // be that value
+ EXPECT_EQ(h.getPercentileEstimate(10, 0), value);
+ EXPECT_EQ(h.getPercentileEstimate(50, 0), value);
+ EXPECT_EQ(h.getPercentileEstimate(99, 0), value);
+
+ // Things get trickier if there are multiple unique values.
+ const int kNewValue = 750;
+ for (int kk = 0; kk < 2*kNumIters; ++kk) {
+ h.addValue(seconds(time(nullptr)), kNewValue);
+ }
+ h.update(seconds(time(nullptr)));
+ EXPECT_NEAR(h.getPercentileEstimate(50, 0), kNewValue+5, 5);
+ if (value >= 0 && value <= 1000) {
+ // only do further testing if value is within our bucket range,
+ // else estimates can be wildly off
+ if (kNewValue > value) {
+ EXPECT_NEAR(h.getPercentileEstimate(10, 0), value+5, 5);
+ EXPECT_NEAR(h.getPercentileEstimate(99, 0), kNewValue+5, 5);
+ } else {
+ EXPECT_NEAR(h.getPercentileEstimate(10, 0), kNewValue+5, 5);
+ EXPECT_NEAR(h.getPercentileEstimate(99, 0), value+5, 5);
+ }
+ }
+ }
+}
+