From 586dd85c31e0ac378bfee598064282c24e4c48a0 Mon Sep 17 00:00:00 2001 From: Stephen Chen Date: Mon, 11 Nov 2013 13:32:34 -0800 Subject: [PATCH] Port TimeseriesHistogram from common/stats to folly Summary: Port TimeseriesHistogram from common/stats to folly. Similarly to MultiLevelTimeSeries and Histogram classes we've converted before. Test Plan: Ported the old unittest for TimeseriesHistogram from common/stats to folly/test. The same unittest is left in tact and is still passing as well. Ran all tests in folly/test and common/stats Reviewed By: simpkins@fb.com FB internal diff: D1054291 --- folly/stats/Instantiations.cpp | 4 + folly/stats/MultiLevelTimeSeries.h | 1 + folly/stats/TimeseriesHistogram-defs.h | 231 ++++++++++++ folly/stats/TimeseriesHistogram.h | 335 +++++++++++++++++ folly/test/TimeseriesHistogramTest.cpp | 487 +++++++++++++++++++++++++ 5 files changed, 1058 insertions(+) create mode 100644 folly/stats/TimeseriesHistogram-defs.h create mode 100644 folly/stats/TimeseriesHistogram.h create mode 100644 folly/test/TimeseriesHistogramTest.cpp diff --git a/folly/stats/Instantiations.cpp b/folly/stats/Instantiations.cpp index e9442162..40aa8020 100644 --- a/folly/stats/Instantiations.cpp +++ b/folly/stats/Instantiations.cpp @@ -30,11 +30,15 @@ #include "folly/stats/MultiLevelTimeSeries.h" #include "folly/stats/MultiLevelTimeSeries-defs.h" +#include "folly/stats/TimeseriesHistogram.h" +#include "folly/stats/TimeseriesHistogram-defs.h" + namespace folly { template class BucketedTimeSeries; template class Histogram; template class detail::HistogramBuckets::Bucket>; template class MultiLevelTimeSeries; +template class TimeseriesHistogram; } // folly diff --git a/folly/stats/MultiLevelTimeSeries.h b/folly/stats/MultiLevelTimeSeries.h index 32e5d639..849cc922 100644 --- a/folly/stats/MultiLevelTimeSeries.h +++ b/folly/stats/MultiLevelTimeSeries.h @@ -21,6 +21,7 @@ #include #include +#include #include "folly/stats/BucketedTimeSeries.h" namespace folly { diff --git a/folly/stats/TimeseriesHistogram-defs.h b/folly/stats/TimeseriesHistogram-defs.h new file mode 100644 index 00000000..ddf71d35 --- /dev/null +++ b/folly/stats/TimeseriesHistogram-defs.h @@ -0,0 +1,231 @@ +/* + * Copyright 2013 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FOLLY_TIMESERIES_HISTOGRAM_DEF_H_ +#define FOLLY_TIMESERIES_HISTOGRAM_DEF_H_ + +#include "folly/Conv.h" +#include "folly/stats/Histogram-defs.h" +#include "folly/stats/MultiLevelTimeSeries-defs.h" +#include "folly/stats/BucketedTimeSeries-defs.h" + +namespace folly { + +template +template +ReturnType TimeseriesHistogram::avg(int level) const { + ValueType total = ValueType(); + int64_t count = 0; + for (int b = 0; b < buckets_.getNumBuckets(); ++b) { + const auto& levelObj = buckets_.getByIndex(b).getLevel(level); + total += levelObj.sum(); + count += levelObj.count(); + } + return folly::detail::avgHelper(total, count); +} + +template +template +ReturnType TimeseriesHistogram::avg(TimeType start, + TimeType end) const { + ValueType total = ValueType(); + int64_t count = 0; + for (int b = 0; b < buckets_.getNumBuckets(); ++b) { + const auto& levelObj = buckets_.getByIndex(b).getLevel(start, end); + total += levelObj.sum(start, end); + count += levelObj.count(start, end); + } + return folly::detail::avgHelper(total, count); +} + +template +template +ReturnType TimeseriesHistogram::rate(TimeType start, + TimeType end) const { + ValueType total = ValueType(); + TimeType elapsed(0); + for (int b = 0; b < buckets_.getNumBuckets(); ++b) { + const auto& level = buckets_.getByIndex(b).getLevel(start); + total += level.sum(start, end); + elapsed = std::max(elapsed, level.elapsed(start, end)); + } + return folly::detail::rateHelper( + total, elapsed); +} + +template +TimeseriesHistogram::TimeseriesHistogram(ValueType bucketSize, + ValueType min, + ValueType max, + const ContainerType& copyMe) + : buckets_(bucketSize, min, max, copyMe), + haveNotSeenValue_(true), + singleUniqueValue_(false) { +} + +template +void TimeseriesHistogram::addValue(TimeType now, + const ValueType& value) { + buckets_.getByValue(value).addValue(now, value); + maybeHandleSingleUniqueValue(value); +} + +template +void TimeseriesHistogram::addValue(TimeType now, + const ValueType& value, + int64_t times) { + buckets_.getByValue(value).addValue(now, value, times); + maybeHandleSingleUniqueValue(value); +} + +template +void TimeseriesHistogram::addValues( + TimeType now, const folly::Histogram& hist) { + CHECK_EQ(hist.getMin(), getMin()); + CHECK_EQ(hist.getMax(), getMax()); + CHECK_EQ(hist.getBucketSize(), getBucketSize()); + CHECK_EQ(hist.getNumBuckets(), getNumBuckets()); + + for (unsigned int n = 0; n < hist.getNumBuckets(); ++n) { + const typename folly::Histogram::Bucket& histBucket = + hist.getBucketByIndex(n); + Bucket& myBucket = buckets_.getByIndex(n); + myBucket.addValueAggregated(now, histBucket.sum, histBucket.count); + } + + // We don't bother with the singleUniqueValue_ tracking. + haveNotSeenValue_ = false; + singleUniqueValue_ = false; +} + +template +void TimeseriesHistogram::maybeHandleSingleUniqueValue( + const ValueType& value) { + if (haveNotSeenValue_) { + firstValue_ = value; + singleUniqueValue_ = true; + haveNotSeenValue_ = false; + } else if (singleUniqueValue_) { + if (value != firstValue_) { + singleUniqueValue_ = false; + } + } +} + +template +T TimeseriesHistogram::getPercentileEstimate(double pct, + int level) const { + if (singleUniqueValue_) { + return firstValue_; + } + + return buckets_.getPercentileEstimate(pct / 100.0, CountFromLevel(level), + AvgFromLevel(level)); +} + +template +T TimeseriesHistogram::getPercentileEstimate(double pct, + TimeType start, + TimeType end) const { + if (singleUniqueValue_) { + return firstValue_; + } + + return buckets_.getPercentileEstimate(pct / 100.0, + CountFromInterval(start, end), + AvgFromInterval(start, end)); +} + +template +int TimeseriesHistogram::getPercentileBucketIdx( + double pct, + int level +) const { + return buckets_.getPercentileBucketIdx(pct / 100.0, CountFromLevel(level)); +} + +template +int TimeseriesHistogram::getPercentileBucketIdx(double pct, + TimeType start, + TimeType end) const { + return buckets_.getPercentileBucketIdx(pct / 100.0, + CountFromInterval(start, end)); +} + +template +T TimeseriesHistogram::rate(int level) const { + ValueType total = ValueType(); + TimeType elapsed(0); + for (int b = 0; b < buckets_.getNumBuckets(); ++b) { + const auto& levelObj = buckets_.getByIndex(b).getLevel(level); + total += levelObj.sum(); + elapsed = std::max(elapsed, levelObj.elapsed()); + } + return elapsed == TimeType(0) ? 0 : (total / elapsed.count()); +} + +template +void TimeseriesHistogram::clear() { + for (int i = 0; i < buckets_.getNumBuckets(); i++) { + buckets_.getByIndex(i).clear(); + } +} + +template +void TimeseriesHistogram::update(TimeType now) { + for (int i = 0; i < buckets_.getNumBuckets(); i++) { + buckets_.getByIndex(i).update(now); + } +} + +template +std::string TimeseriesHistogram::getString(int level) const { + std::string result; + + for (int i = 0; i < buckets_.getNumBuckets(); i++) { + if (i > 0) { + toAppend(",", &result); + } + const ContainerType& cont = buckets_.getByIndex(i); + toAppend(buckets_.getBucketMin(i), + ":", cont.count(level), + ":", cont.avg(level), &result); + } + + return result; +} + +template +std::string TimeseriesHistogram::getString(TimeType start, + TimeType end) const { + std::string result; + + for (int i = 0; i < buckets_.getNumBuckets(); i++) { + if (i > 0) { + toAppend(",", &result); + } + const ContainerType& cont = buckets_.getByIndex(i); + toAppend(buckets_.getBucketMin(i), + ":", cont.count(start, end), + ":", cont.avg(start, end), &result); + } + + return result; +} + +} // namespace folly + +#endif diff --git a/folly/stats/TimeseriesHistogram.h b/folly/stats/TimeseriesHistogram.h new file mode 100644 index 00000000..94815423 --- /dev/null +++ b/folly/stats/TimeseriesHistogram.h @@ -0,0 +1,335 @@ +/* + * Copyright 2013 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FOLLY_TIMESERIES_HISTOGRAM_H_ +#define FOLLY_TIMESERIES_HISTOGRAM_H_ + +#include +#include +#include "folly/stats/Histogram.h" +#include "folly/stats/MultiLevelTimeSeries.h" + +namespace folly { + +/* + * TimeseriesHistogram tracks data distributions as they change over time. + * + * Specifically, it is a bucketed histogram with different value ranges assigned + * to each bucket. Within each bucket is a MultiLevelTimeSeries from + * 'folly/stats/MultiLevelTimeSeries.h'. This means that each bucket contains a + * different set of data for different historical time periods, and one can + * query data distributions over different trailing time windows. + * + * For example, this can answer questions: "What is the data distribution over + * the last minute? Over the last 10 minutes? Since I last cleared this + * histogram?" + * + * The class can also estimate percentiles and answer questions like: "What was + * the 99th percentile data value over the last 10 minutes?" + * + * Note: that depending on the size of your buckets and the smoothness + * of your data distribution, the estimate may be way off from the actual + * value. In particular, if the given percentile falls outside of the bucket + * range (i.e. your buckets range in 0 - 100,000 but the 99th percentile is + * around 115,000) this estimate may be very wrong. + * + * The memory usage for a typical histogram is roughly 3k * (# of buckets). All + * insertion operations are amortized O(1), and all queries are O(# of buckets). + */ +template > +class TimeseriesHistogram { + private: + // NOTE: T must be equivalent to _signed_ numeric type for our math. + BOOST_STATIC_ASSERT(std::numeric_limits::is_signed); + + public: + // values to be inserted into container + typedef T ValueType; + // the container type we use internally for each bucket + typedef C ContainerType; + // The time type. + typedef TT TimeType; + + /* + * Create a TimeSeries histogram and initialize the bucketing and levels. + * + * The buckets are created by chopping the range [min, max) into pieces + * of size bucketSize, with the last bucket being potentially shorter. Two + * additional buckets are always created -- the "under" bucket for the range + * (-inf, min) and the "over" bucket for the range [max, +inf). + * + * @param bucketSize the width of each bucket + * @param min the smallest value for the bucket range. + * @param max the largest value for the bucket range + * @param defaultContainer a pre-initialized timeseries with the desired + * number of levels and their durations. + */ + TimeseriesHistogram(ValueType bucketSize, ValueType min, ValueType max, + const ContainerType& defaultContainer); + + /* Return the bucket size of each bucket in the histogram. */ + ValueType getBucketSize() const { return buckets_.getBucketSize(); } + + /* Return the min value at which bucketing begins. */ + ValueType getMin() const { return buckets_.getMin(); } + + /* Return the max value at which bucketing ends. */ + ValueType getMax() const { return buckets_.getMax(); } + + /* Return the number of levels of the Timeseries object in each bucket */ + int getNumLevels() const { + return buckets_.getByIndex(0).numLevels(); + } + + /* Return the number of buckets */ + int getNumBuckets() const { return buckets_.getNumBuckets(); } + + /* Return the bucket index into which the given value would fall. */ + int getBucketIdx(const ValueType& value) const; + + /* + * Return the threshold of the bucket for the given index in range + * [0..numBuckets). The bucket will have range [thresh, thresh + bucketSize) + * or [thresh, max), whichever is shorter. + */ + ValueType getBucketMin(int bucketIdx) const { + return buckets_.getBucketMin(bucketIdx); + } + + /* Return the actual timeseries in the given bucket (for reading only!) */ + const ContainerType& getBucket(int bucketIdx) const { + return buckets_.getByIndex(bucketIdx); + } + + /* Total count of values at the given timeseries level (all buckets). */ + int64_t count(int level) const { + int64_t total = 0; + for (int b = 0; b < buckets_.getNumBuckets(); ++b) { + total += buckets_.getByIndex(b).count(level); + } + return total; + } + + /* Total count of values added during the given interval (all buckets). */ + int64_t count(TimeType start, TimeType end) const { + int64_t total = 0; + for (int b = 0; b < buckets_.getNumBuckets(); ++b) { + total += buckets_.getByIndex(b).count(start, end); + } + return total; + } + + /* Total sum of values at the given timeseries level (all buckets). */ + ValueType sum(int level) const { + ValueType total = ValueType(); + for (int b = 0; b < buckets_.getNumBuckets(); ++b) { + total += buckets_.getByIndex(b).sum(level); + } + return total; + } + + /* Total sum of values added during the given interval (all buckets). */ + ValueType sum(TimeType start, TimeType end) const { + ValueType total = ValueType(); + for (int b = 0; b < buckets_.getNumBuckets(); ++b) { + total += buckets_.getByIndex(b).sum(start, end); + } + return total; + } + + /* Average of values at the given timeseries level (all buckets). */ + template + ReturnType avg(int level) const; + + /* Average of values added during the given interval (all buckets). */ + template + ReturnType avg(TimeType start, TimeType end) const; + + /* + * Rate at the given timeseries level (all buckets). + * This is the sum of all values divided by the time interval (in seconds). + */ + ValueType rate(int level) const; + + /* + * Rate for the given interval (all buckets). + * This is the sum of all values divided by the time interval (in seconds). + */ + template + ReturnType rate(TimeType start, TimeType end) const; + + /* + * Update every underlying timeseries object with the given timestamp. You + * must call this directly before querying to ensure that the data in all + * buckets is decayed properly. + */ + void update(TimeType now); + + /* clear all the data from the histogram. */ + void clear(); + + /* Add a value into the histogram with timestamp 'now' */ + void addValue(TimeType now, const ValueType& value); + /* Add a value the given number of times with timestamp 'now' */ + void addValue(TimeType now, const ValueType& value, int64_t times); + + /* + * Add all of the values from the specified histogram. + * + * All of the values will be added to the current time-slot. + * + * One use of this is for thread-local caching of frequently updated + * histogram data. For example, each thread can store a thread-local + * Histogram that is updated frequently, and only add it to the global + * TimeseriesHistogram once a second. + */ + void addValues(TimeType now, const folly::Histogram& values); + + /* + * Return an estimate of the value at the given percentile in the histogram + * in the given timeseries level. The percentile is estimated as follows: + * + * - We retrieve a count of the values in each bucket (at the given level) + * - We determine via the counts which bucket the given percentile falls in. + * - We assume the average value in the bucket is also its median + * - We then linearly interpolate within the bucket, by assuming that the + * distribution is uniform in the two value ranges [left, median) and + * [median, right) where [left, right) is the bucket value range. + * + * Caveats: + * - If the histogram is empty, this always returns ValueType(), usually 0. + * - For the 'under' and 'over' special buckets, their range is unbounded + * on one side. In order for the interpolation to work, we assume that + * the average value in the bucket is equidistant from the two edges of + * the bucket. In other words, we assume that the distance between the + * average and the known bound is equal to the distance between the average + * and the unknown bound. + */ + ValueType getPercentileEstimate(double pct, int level) const; + /* + * Return an estimate of the value at the given percentile in the histogram + * in the given historical interval. Please see the documentation for + * getPercentileEstimate(int pct, int level) for the explanation of the + * estimation algorithm. + */ + ValueType getPercentileEstimate(double pct, TimeType start, TimeType end) + const; + + /* + * Return the bucket index that the given percentile falls into (in the + * given timeseries level). This index can then be used to retrieve either + * the bucket threshold, or other data from inside the bucket. + */ + int getPercentileBucketIdx(double pct, int level) const; + /* + * Return the bucket index that the given percentile falls into (in the + * given historical interval). This index can then be used to retrieve either + * the bucket threshold, or other data from inside the bucket. + */ + int getPercentileBucketIdx(double pct, TimeType start, TimeType end) const; + + /* Get the bucket threshold for the bucket containing the given pct. */ + int getPercentileBucketMin(double pct, int level) const { + return getBucketMin(getPercentileBucketIdx(pct, level)); + } + /* Get the bucket threshold for the bucket containing the given pct. */ + int getPercentileBucketMin(double pct, TimeType start, TimeType end) const { + return getBucketMin(getPercentileBucketIdx(pct, start, end)); + } + + /* + * Print out serialized data from all buckets at the given level. + * Format is: BUCKET [',' BUCKET ...] + * Where: BUCKET == bucketMin ':' count ':' avg + */ + std::string getString(int level) const; + + /* + * Print out serialized data for all buckets in the historical interval. + * For format, please see getString(int level). + */ + std::string getString(TimeType start, TimeType end) const; + + private: + typedef ContainerType Bucket; + struct CountFromLevel { + explicit CountFromLevel(int level) : level_(level) {} + + uint64_t operator()(const ContainerType& bucket) const { + return bucket.count(level_); + } + + private: + int level_; + }; + struct CountFromInterval { + explicit CountFromInterval(TimeType start, TimeType end) + : start_(start), + end_(end) {} + + uint64_t operator()(const ContainerType& bucket) const { + return bucket.count(start_, end_); + } + + private: + TimeType start_; + TimeType end_; + }; + + struct AvgFromLevel { + explicit AvgFromLevel(int level) : level_(level) {} + + ValueType operator()(const ContainerType& bucket) const { + return bucket.template avg(level_); + } + + private: + int level_; + }; + + template + struct AvgFromInterval { + explicit AvgFromInterval(TimeType start, TimeType end) + : start_(start), + end_(end) {} + + ReturnType operator()(const ContainerType& bucket) const { + return bucket.template avg(start_, end_); + } + + private: + TimeType start_; + TimeType end_; + }; + + /* + * Special logic for the case of only one unique value registered + * (this can happen when clients don't pick good bucket ranges or have + * other bugs). It's a lot easier for clients to track down these issues + * if they are getting the correct value. + */ + void maybeHandleSingleUniqueValue(const ValueType& value); + + folly::detail::HistogramBuckets buckets_; + bool haveNotSeenValue_; + bool singleUniqueValue_; + ValueType firstValue_; +}; + +} // folly + +#endif // FOLLY_TIMESERIES_HISTOGRAM_H_ diff --git a/folly/test/TimeseriesHistogramTest.cpp b/folly/test/TimeseriesHistogramTest.cpp new file mode 100644 index 00000000..196fd822 --- /dev/null +++ b/folly/test/TimeseriesHistogramTest.cpp @@ -0,0 +1,487 @@ +/* + * Copyright 2013 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "folly/stats/TimeseriesHistogram.h" +#include "folly/stats/TimeseriesHistogram-defs.h" + +#include + +using namespace std; +using namespace folly; +using std::chrono::seconds; + +namespace IntMTMHTS { + enum Levels { + MINUTE, + TEN_MINUTE, + HOUR, + ALLTIME, + NUM_LEVELS, + }; + + const seconds kDurations[] = { + seconds(60), seconds(600), seconds(3600), seconds(0) + }; +}; + +namespace IntMHTS { + enum Levels { + MINUTE, + HOUR, + ALLTIME, + NUM_LEVELS, + }; + + const seconds kDurations[] = { + seconds(60), seconds(3600), seconds(0) + }; +}; + +typedef std::mt19937 RandomInt32; + +TEST(TimeseriesHistogram, Percentile) { + RandomInt32 random(5); + // [10, 109], 12 buckets including above and below + { + TimeseriesHistogram h(10, 10, 110, + MultiLevelTimeSeries( + 60, IntMTMHTS::NUM_LEVELS, + IntMTMHTS::kDurations)); + + EXPECT_EQ(0, h.getPercentileEstimate(0, IntMTMHTS::ALLTIME)); + + EXPECT_EQ(12, h.getNumBuckets()); + EXPECT_EQ(10, h.getBucketSize()); + EXPECT_EQ(10, h.getMin()); + EXPECT_EQ(110, h.getMax()); + + for (int i = 0; i < h.getNumBuckets(); ++i) { + EXPECT_EQ(4, h.getBucket(i).numLevels()); + } + + int maxVal = 120; + h.addValue(seconds(0), 0); + h.addValue(seconds(0), maxVal); + for (int i = 0; i < 98; i++) { + h.addValue(seconds(0), random() % maxVal); + } + + h.update(std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch())); + // bucket 0 stores everything below min, so its minimum + // is the lowest possible number + EXPECT_EQ(std::numeric_limits::min(), + h.getPercentileBucketMin(1, IntMTMHTS::ALLTIME)); + EXPECT_EQ(110, h.getPercentileBucketMin(99, IntMTMHTS::ALLTIME)); + + EXPECT_EQ(-2, h.getPercentileEstimate(0, IntMTMHTS::ALLTIME)); + EXPECT_EQ(-1, h.getPercentileEstimate(1, IntMTMHTS::ALLTIME)); + EXPECT_EQ(119, h.getPercentileEstimate(99, IntMTMHTS::ALLTIME)); + EXPECT_EQ(120, h.getPercentileEstimate(100, IntMTMHTS::ALLTIME)); + } +} + +TEST(TimeseriesHistogram, String) { + RandomInt32 random(5); + // [10, 109], 12 buckets including above and below + { + TimeseriesHistogram hist(10, 10, 110, + MultiLevelTimeSeries( + 60, IntMTMHTS::NUM_LEVELS, + IntMTMHTS::kDurations)); + + int maxVal = 120; + hist.addValue(seconds(0), 0); + hist.addValue(seconds(0), maxVal); + for (int i = 0; i < 98; i++) { + hist.addValue(seconds(0), random() % maxVal); + } + + hist.update(seconds(0)); + + const char* const kStringValues1[IntMTMHTS::NUM_LEVELS] = { + "-2147483648:12:4,10:8:13,20:8:24,30:6:34,40:13:46,50:8:54,60:7:64," + "70:7:74,80:8:84,90:10:94,100:3:103,110:10:115", + "-2147483648:12:4,10:8:13,20:8:24,30:6:34,40:13:46,50:8:54,60:7:64," + "70:7:74,80:8:84,90:10:94,100:3:103,110:10:115", + "-2147483648:12:4,10:8:13,20:8:24,30:6:34,40:13:46,50:8:54,60:7:64," + "70:7:74,80:8:84,90:10:94,100:3:103,110:10:115", + "-2147483648:12:4,10:8:13,20:8:24,30:6:34,40:13:46,50:8:54,60:7:64," + "70:7:74,80:8:84,90:10:94,100:3:103,110:10:115", + }; + + CHECK_EQ(IntMTMHTS::NUM_LEVELS, hist.getNumLevels()); + + for (int level = 0; level < hist.getNumLevels(); ++level) { + EXPECT_EQ(kStringValues1[level], hist.getString(level)); + } + + const char* const kStringValues2[IntMTMHTS::NUM_LEVELS] = { + "-2147483648:12:4,10:8:13,20:8:24,30:6:34,40:13:46,50:8:54,60:7:64," + "70:7:74,80:8:84,90:10:94,100:3:103,110:10:115", + "-2147483648:12:4,10:8:13,20:8:24,30:6:34,40:13:46,50:8:54,60:7:64," + "70:7:74,80:8:84,90:10:94,100:3:103,110:10:115", + "-2147483648:12:4,10:8:13,20:8:24,30:6:34,40:13:46,50:8:54,60:7:64," + "70:7:74,80:8:84,90:10:94,100:3:103,110:10:115", + "-2147483648:12:4,10:8:13,20:8:24,30:6:34,40:13:46,50:8:54,60:7:64," + "70:7:74,80:8:84,90:10:94,100:3:103,110:10:115", + }; + + CHECK_EQ(IntMTMHTS::NUM_LEVELS, hist.getNumLevels()); + + for (int level = 0; level < hist.getNumLevels(); ++level) { + EXPECT_EQ(kStringValues2[level], hist.getString(level)); + } + } +} + +TEST(TimeseriesHistogram, Clear) { + { + TimeseriesHistogram hist(10, 0, 100, + MultiLevelTimeSeries( + 60, IntMTMHTS::NUM_LEVELS, + IntMTMHTS::kDurations)); + + for (int now = 0; now < 3600; now++) { + for (int i = 0; i < 100; i++) { + hist.addValue(seconds(now), i, 2); // adds each item 2 times + } + } + + // check clearing + hist.clear(); + + for (int b = 0; b < hist.getNumBuckets(); ++b) { + EXPECT_EQ(0, hist.getBucket(b).count(IntMTMHTS::MINUTE)); + EXPECT_EQ(0, hist.getBucket(b).count(IntMTMHTS::TEN_MINUTE)); + EXPECT_EQ(0, hist.getBucket(b).count(IntMTMHTS::HOUR)); + EXPECT_EQ(0, hist.getBucket(b).count(IntMTMHTS::ALLTIME)); + } + + for (int pct = 0; pct <= 100; pct++) { + EXPECT_EQ(0, hist.getPercentileBucketMin(pct, IntMTMHTS::MINUTE)); + EXPECT_EQ(0, hist.getPercentileBucketMin(pct, IntMTMHTS::TEN_MINUTE)); + EXPECT_EQ(0, hist.getPercentileBucketMin(pct, IntMTMHTS::HOUR)); + EXPECT_EQ(0, hist.getPercentileBucketMin(pct, IntMTMHTS::ALLTIME)); + + EXPECT_EQ(0, hist.getPercentileEstimate(pct, IntMTMHTS::MINUTE)); + EXPECT_EQ(0, hist.getPercentileEstimate(pct, IntMTMHTS::TEN_MINUTE)); + EXPECT_EQ(0, hist.getPercentileEstimate(pct, IntMTMHTS::HOUR)); + EXPECT_EQ(0, hist.getPercentileEstimate(pct, IntMTMHTS::ALLTIME)); + } + } +} + + +TEST(TimeseriesHistogram, Basic) { + { + TimeseriesHistogram hist(10, 0, 100, + MultiLevelTimeSeries( + 60, IntMTMHTS::NUM_LEVELS, + IntMTMHTS::kDurations)); + + for (int now = 0; now < 3600; now++) { + for (int i = 0; i < 100; i++) { + hist.addValue(seconds(now), i); + } + } + + hist.update(seconds(3599)); + for (int pct = 1; pct <= 100; pct++) { + int expected = (pct - 1) / 10 * 10; + EXPECT_EQ(expected, hist.getPercentileBucketMin(pct, IntMTMHTS::MINUTE)); + EXPECT_EQ(expected, hist.getPercentileBucketMin(pct, + IntMTMHTS::TEN_MINUTE)); + EXPECT_EQ(expected, hist.getPercentileBucketMin(pct, IntMTMHTS::HOUR)); + EXPECT_EQ(expected, hist.getPercentileBucketMin(pct, IntMTMHTS::ALLTIME)); + } + + for (int b = 1; (b + 1) < hist.getNumBuckets(); ++b) { + EXPECT_EQ(600, hist.getBucket(b).count(IntMTMHTS::MINUTE)); + EXPECT_EQ(6000, hist.getBucket(b).count(IntMTMHTS::TEN_MINUTE)); + EXPECT_EQ(36000, hist.getBucket(b).count(IntMTMHTS::HOUR)); + EXPECT_EQ(36000, hist.getBucket(b).count(IntMTMHTS::ALLTIME)); + } + EXPECT_EQ(0, hist.getBucket(0).count(IntMTMHTS::MINUTE)); + EXPECT_EQ(0, hist.getBucket(hist.getNumBuckets() - 1).count( + IntMTMHTS::MINUTE)); + } + + // ----------------- + + { + TimeseriesHistogram hist(10, 0, 100, + MultiLevelTimeSeries( + 60, IntMTMHTS::NUM_LEVELS, + IntMTMHTS::kDurations)); + + for (int now = 0; now < 3600; now++) { + for (int i = 0; i < 100; i++) { + hist.addValue(seconds(now), i, 2); // adds each item 2 times + } + } + + hist.update(seconds(3599)); + for (int pct = 1; pct <= 100; pct++) { + int expected = (pct - 1) / 10 * 10; + EXPECT_EQ(expected, hist.getPercentileBucketMin(pct, IntMTMHTS::MINUTE)); + EXPECT_EQ(expected, hist.getPercentileBucketMin(pct, + IntMTMHTS::TEN_MINUTE)); + EXPECT_EQ(expected, hist.getPercentileBucketMin(pct, IntMTMHTS::HOUR)); + EXPECT_EQ(expected, hist.getPercentileBucketMin(pct, IntMTMHTS::ALLTIME)); + } + + for (int b = 1; (b + 1) < hist.getNumBuckets(); ++b) { + EXPECT_EQ(600 * 2, hist.getBucket(b).count(IntMTMHTS::MINUTE)); + EXPECT_EQ(6000 * 2, hist.getBucket(b).count(IntMTMHTS::TEN_MINUTE)); + EXPECT_EQ(36000 * 2, hist.getBucket(b).count(IntMTMHTS::HOUR)); + EXPECT_EQ(36000 * 2, hist.getBucket(b).count(IntMTMHTS::ALLTIME)); + } + EXPECT_EQ(0, hist.getBucket(0).count(IntMTMHTS::MINUTE)); + EXPECT_EQ(0, hist.getBucket(hist.getNumBuckets() - 1).count( + IntMTMHTS::MINUTE)); + } + + // ----------------- + + { + TimeseriesHistogram hist(10, 0, 100, + MultiLevelTimeSeries( + 60, IntMTMHTS::NUM_LEVELS, + IntMTMHTS::kDurations)); + + for (int now = 0; now < 3600; now++) { + for (int i = 0; i < 50; i++) { + hist.addValue(seconds(now), i * 2, 2); // adds each item 2 times + } + } + + hist.update(seconds(3599)); + for (int pct = 1; pct <= 100; pct++) { + int expected = (pct - 1) / 10 * 10; + EXPECT_EQ(expected, hist.getPercentileBucketMin(pct, IntMTMHTS::MINUTE)); + EXPECT_EQ(expected, hist.getPercentileBucketMin(pct, + IntMTMHTS::TEN_MINUTE)); + EXPECT_EQ(expected, hist.getPercentileBucketMin(pct, IntMTMHTS::HOUR)); + EXPECT_EQ(expected, hist.getPercentileBucketMin(pct, IntMTMHTS::ALLTIME)); + } + + EXPECT_EQ(0, hist.getBucket(0).count(IntMTMHTS::MINUTE)); + EXPECT_EQ(0, hist.getBucket(0).count(IntMTMHTS::TEN_MINUTE)); + EXPECT_EQ(0, hist.getBucket(0).count(IntMTMHTS::HOUR)); + EXPECT_EQ(0, hist.getBucket(0).count(IntMTMHTS::ALLTIME)); + EXPECT_EQ(0, hist.getBucket(hist.getNumBuckets() - 1).count( + IntMTMHTS::MINUTE)); + EXPECT_EQ(0, + hist.getBucket(hist.getNumBuckets() - 1). + count(IntMTMHTS::TEN_MINUTE)); + EXPECT_EQ(0, hist.getBucket(hist.getNumBuckets() - 1).count( + IntMTMHTS::HOUR)); + EXPECT_EQ(0, + hist.getBucket(hist.getNumBuckets() - 1).count( + IntMTMHTS::ALLTIME)); + + for (int b = 1; (b + 1) < hist.getNumBuckets(); ++b) { + EXPECT_EQ(600, hist.getBucket(b).count(IntMTMHTS::MINUTE)); + EXPECT_EQ(6000, hist.getBucket(b).count(IntMTMHTS::TEN_MINUTE)); + EXPECT_EQ(36000, hist.getBucket(b).count(IntMTMHTS::HOUR)); + EXPECT_EQ(36000, hist.getBucket(b).count(IntMTMHTS::ALLTIME)); + } + + for (int i = 0; i < 100; ++i) { + hist.addValue(seconds(3599), 200 + i); + } + hist.update(seconds(3599)); + EXPECT_EQ(100, + hist.getBucket(hist.getNumBuckets() - 1).count( + IntMTMHTS::ALLTIME)); + + } +} + +TEST(TimeseriesHistogram, QueryByInterval) { + TimeseriesHistogram mhts(8, 8, 120, + MultiLevelTimeSeries( + 60, IntMHTS::NUM_LEVELS, + IntMHTS::kDurations)); + + mhts.update(seconds(0)); + + int curTime; + for (curTime = 0; curTime < 7200; curTime++) { + mhts.addValue(seconds(curTime), 1); + } + for (curTime = 7200; curTime < 7200 + 3540; curTime++) { + mhts.addValue(seconds(curTime), 10); + } + for (curTime = 7200 + 3540; curTime < 7200 + 3600; curTime++) { + mhts.addValue(seconds(curTime), 100); + } + + mhts.update(seconds(7200 + 3600 - 1)); + + struct TimeInterval { + TimeInterval(int s, int e) + : start(s), end(e) {} + + std::chrono::seconds start; + std::chrono::seconds end; + }; + TimeInterval intervals[12] = { + { curTime - 60, curTime }, + { curTime - 3600, curTime }, + { curTime - 7200, curTime }, + { curTime - 3600, curTime - 60 }, + { curTime - 7200, curTime - 60 }, + { curTime - 7200, curTime - 3600 }, + { curTime - 50, curTime - 20 }, + { curTime - 3020, curTime - 20 }, + { curTime - 7200, curTime - 20 }, + { curTime - 3000, curTime - 1000 }, + { curTime - 7200, curTime - 1000 }, + { curTime - 7200, curTime - 3600 }, + }; + + int expectedSums[12] = { + 6000, 41400, 32400, 35400, 32129, 16200, 3000, 33600, 32308, 20000, 27899, + 16200 + }; + + int expectedCounts[12] = { + 60, 3600, 7200, 3540, 7139, 3600, 30, 3000, 7178, 2000, 6199, 3600 + }; + + // The first 7200 values added all fell below the histogram minimum, + // and went into the bucket that tracks all of the too-small values. + // This bucket reports a minimum value of the smallest possible integer. + int belowMinBucket = std::numeric_limits::min(); + + int expectedValues[12][3] = { + {96, 96, 96}, + { 8, 8, 96}, + { belowMinBucket, belowMinBucket, 8}, // alltime + { 8, 8, 8}, + { belowMinBucket, belowMinBucket, 8}, // alltime + { belowMinBucket, belowMinBucket, 8}, // alltime + {96, 96, 96}, + { 8, 8, 96}, + { belowMinBucket, belowMinBucket, 8}, // alltime + { 8, 8, 8}, + { belowMinBucket, belowMinBucket, 8}, // alltime + { belowMinBucket, belowMinBucket, 8} // alltime + }; + + for (int i = 0; i < 12; i++) { + const auto& itv = intervals[i]; + int s = mhts.sum(itv.start, itv.end); + EXPECT_EQ(expectedSums[i], s); + + int c = mhts.count(itv.start, itv.end); + EXPECT_EQ(expectedCounts[i], c); + } + + // 3 levels + for (int i = 1; i <= 100; i++) { + EXPECT_EQ(96, mhts.getPercentileBucketMin(i, 0)); + EXPECT_EQ(96, mhts.getPercentileBucketMin(i, seconds(curTime - 60), + seconds(curTime))); + EXPECT_EQ(8, mhts.getPercentileBucketMin(i, seconds(curTime - 3540), + seconds(curTime - 60))); + } + + EXPECT_EQ(8, mhts.getPercentileBucketMin(1, 1)); + EXPECT_EQ(8, mhts.getPercentileBucketMin(98, 1)); + EXPECT_EQ(96, mhts.getPercentileBucketMin(99, 1)); + EXPECT_EQ(96, mhts.getPercentileBucketMin(100, 1)); + + EXPECT_EQ(belowMinBucket, mhts.getPercentileBucketMin(1, 2)); + EXPECT_EQ(belowMinBucket, mhts.getPercentileBucketMin(66, 2)); + EXPECT_EQ(8, mhts.getPercentileBucketMin(67, 2)); + EXPECT_EQ(8, mhts.getPercentileBucketMin(99, 2)); + EXPECT_EQ(96, mhts.getPercentileBucketMin(100, 2)); + + // 0 is currently the value for bucket 0 (below min) + for (int i = 0; i < 12; i++) { + const auto& itv = intervals[i]; + int v = mhts.getPercentileBucketMin(1, itv.start, itv.end); + EXPECT_EQ(expectedValues[i][0], v); + + v = mhts.getPercentileBucketMin(50, itv.start, itv.end); + EXPECT_EQ(expectedValues[i][1], v); + + v = mhts.getPercentileBucketMin(99, itv.start, itv.end); + EXPECT_EQ(expectedValues[i][2], v); + } + + for (int i = 0; i < 12; i++) { + const auto& itv = intervals[i]; + int c = mhts.count(itv.start, itv.end); + // Some of the older intervals that fall in the alltime bucket + // are off by 1 or 2 in their estimated counts. + size_t tolerance = 0; + if (itv.start <= seconds(curTime - 7200)) { + tolerance = 2; + } else if (itv.start <= seconds(curTime - 3000)) { + tolerance = 1; + } + size_t actualCount = (itv.end - itv.start).count(); + size_t estimatedCount = mhts.count(itv.start, itv.end); + EXPECT_GE(actualCount, estimatedCount); + EXPECT_LE(actualCount - tolerance, estimatedCount); + } +} + +TEST(TimeseriesHistogram, SingleUniqueValue) { + int values[] = {-1, 0, 500, 1000, 1500}; + for (int ii = 0; ii < 5; ++ii) { + int value = values[ii]; + TimeseriesHistogram h(10, 0, 1000, + MultiLevelTimeSeries( + 60, IntMTMHTS::NUM_LEVELS, + IntMTMHTS::kDurations)); + + const int kNumIters = 1000; + for (int jj = 0; jj < kNumIters; ++jj) { + h.addValue(seconds(time(nullptr)), value); + } + h.update(seconds(time(nullptr))); + // since we've only added one unique value, all percentiles should + // be that value + EXPECT_EQ(h.getPercentileEstimate(10, 0), value); + EXPECT_EQ(h.getPercentileEstimate(50, 0), value); + EXPECT_EQ(h.getPercentileEstimate(99, 0), value); + + // Things get trickier if there are multiple unique values. + const int kNewValue = 750; + for (int kk = 0; kk < 2*kNumIters; ++kk) { + h.addValue(seconds(time(nullptr)), kNewValue); + } + h.update(seconds(time(nullptr))); + EXPECT_NEAR(h.getPercentileEstimate(50, 0), kNewValue+5, 5); + if (value >= 0 && value <= 1000) { + // only do further testing if value is within our bucket range, + // else estimates can be wildly off + if (kNewValue > value) { + EXPECT_NEAR(h.getPercentileEstimate(10, 0), value+5, 5); + EXPECT_NEAR(h.getPercentileEstimate(99, 0), kNewValue+5, 5); + } else { + EXPECT_NEAR(h.getPercentileEstimate(10, 0), kNewValue+5, 5); + EXPECT_NEAR(h.getPercentileEstimate(99, 0), value+5, 5); + } + } + } +} + -- 2.34.1