2 * Copyright 2017 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
27 #include <folly/CPortability.h>
28 #include <folly/stats/detail/Bucket.h>
35 * A helper class to manage a set of histogram buckets.
37 template <typename T, typename BucketT>
38 class HistogramBuckets {
41 typedef BucketT BucketType;
44 * Create a set of histogram buckets.
46 * One bucket will be created for each bucketSize interval of values within
47 * the specified range. Additionally, one bucket will be created to track
48 * all values that fall below the specified minimum, and one bucket will be
49 * created for all values above the specified maximum.
51 * If (max - min) is not a multiple of bucketSize, the last bucket will cover
52 * a smaller range of values than the other buckets.
54 * (max - min) must be larger than or equal to bucketSize.
56 HistogramBuckets(ValueType bucketSize, ValueType min, ValueType max,
57 const BucketType& defaultBucket);
59 /* Returns the bucket size of each bucket in the histogram. */
60 ValueType getBucketSize() const {
64 /* Returns the min value at which bucketing begins. */
65 ValueType getMin() const {
69 /* Returns the max value at which bucketing ends. */
70 ValueType getMax() const {
75 * Returns the number of buckets.
77 * This includes the total number of buckets for the [min, max) range,
78 * plus 2 extra buckets, one for handling values less than min, and one for
79 * values greater than max.
81 size_t getNumBuckets() const {
82 return buckets_.size();
85 /* Returns the bucket index into which the given value would fall. */
86 size_t getBucketIdx(ValueType value) const;
88 /* Returns the bucket for the specified value */
89 BucketType& getByValue(ValueType value) {
90 return buckets_[getBucketIdx(value)];
93 /* Returns the bucket for the specified value */
94 const BucketType& getByValue(ValueType value) const {
95 return buckets_[getBucketIdx(value)];
99 * Returns the bucket at the specified index.
101 * Note that index 0 is the bucket for all values less than the specified
102 * minimum. Index 1 is the first bucket in the specified bucket range.
104 BucketType& getByIndex(size_t idx) {
105 return buckets_[idx];
108 /* Returns the bucket at the specified index. */
109 const BucketType& getByIndex(size_t idx) const {
110 return buckets_[idx];
114 * Returns the minimum threshold for the bucket at the given index.
116 * The bucket at the specified index will store values in the range
117 * [bucketMin, bucketMin + bucketSize), or [bucketMin, max), if the overall
118 * max is smaller than bucketMin + bucketSize.
120 ValueType getBucketMin(size_t idx) const {
122 return std::numeric_limits<ValueType>::min();
124 if (idx == buckets_.size() - 1) {
128 return ValueType(min_ + ((idx - 1) * bucketSize_));
132 * Returns the maximum threshold for the bucket at the given index.
134 * The bucket at the specified index will store values in the range
135 * [bucketMin, bucketMin + bucketSize), or [bucketMin, max), if the overall
136 * max is smaller than bucketMin + bucketSize.
138 ValueType getBucketMax(size_t idx) const {
139 if (idx == buckets_.size() - 1) {
140 return std::numeric_limits<ValueType>::max();
143 return ValueType(min_ + (idx * bucketSize_));
147 * Computes the total number of values stored across all buckets.
149 * Runs in O(numBuckets)
151 * @param countFn A function that takes a const BucketType&, and returns the
152 * number of values in that bucket
153 * @return Returns the total number of values stored across all buckets
155 template <typename CountFn>
156 uint64_t computeTotalCount(CountFn countFromBucket) const;
159 * Determine which bucket the specified percentile falls into.
161 * Looks for the bucket that contains the Nth percentile data point.
163 * @param pct The desired percentile to find, as a value from 0.0 to 1.0.
164 * @param countFn A function that takes a const BucketType&, and returns the
165 * number of values in that bucket.
166 * @param lowPct The lowest percentile stored in the selected bucket will be
167 * returned via this parameter.
168 * @param highPct The highest percentile stored in the selected bucket will
169 * be returned via this parameter.
171 * @return Returns the index of the bucket that contains the Nth percentile
174 template <typename CountFn>
175 size_t getPercentileBucketIdx(
177 CountFn countFromBucket,
178 double* lowPct = nullptr,
179 double* highPct = nullptr) const;
182 * Estimate the value at the specified percentile.
184 * @param pct The desired percentile to find, as a value from 0.0 to 1.0.
185 * @param countFn A function that takes a const BucketType&, and returns the
186 * number of values in that bucket.
187 * @param avgFn A function that takes a const BucketType&, and returns the
188 * average of all the values in that bucket.
190 * @return Returns an estimate for N, where N is the number where exactly pct
191 * percentage of the data points in the histogram are less than N.
193 template <typename CountFn, typename AvgFn>
194 ValueType getPercentileEstimate(double pct,
195 CountFn countFromBucket,
196 AvgFn avgFromBucket) const;
199 * Iterator access to the buckets.
201 * Note that the first bucket is for all values less than min, and the last
202 * bucket is for all values greater than max. The buckets tracking values in
203 * the [min, max) actually start at the second bucket.
205 typename std::vector<BucketType>::const_iterator begin() const {
206 return buckets_.begin();
208 typename std::vector<BucketType>::iterator begin() {
209 return buckets_.begin();
211 typename std::vector<BucketType>::const_iterator end() const {
212 return buckets_.end();
214 typename std::vector<BucketType>::iterator end() {
215 return buckets_.end();
219 ValueType bucketSize_;
222 std::vector<BucketType> buckets_;
229 * A basic histogram class.
231 * Groups data points into equally-sized buckets, and stores the overall sum of
232 * the data points in each bucket, as well as the number of data points in the
235 * The caller must specify the minimum and maximum data points to expect ahead
236 * of time, as well as the bucket width.
238 template <typename T>
242 typedef detail::Bucket<T> Bucket;
244 Histogram(ValueType bucketSize, ValueType min, ValueType max)
245 : buckets_(bucketSize, min, max, Bucket()) {}
247 /* Add a data point to the histogram */
248 void addValue(ValueType value) FOLLY_DISABLE_UNDEFINED_BEHAVIOR_SANITIZER(
249 "signed-integer-overflow",
250 "unsigned-integer-overflow") {
251 Bucket& bucket = buckets_.getByValue(value);
252 // NOTE: Overflow is handled elsewhere and tests check this
253 // behavior (see HistogramTest.cpp TestOverflow* tests).
254 // TODO: It would be nice to handle overflow here and redesign this class.
259 /* Add multiple same data points to the histogram */
260 void addRepeatedValue(ValueType value, uint64_t nSamples)
261 FOLLY_DISABLE_UNDEFINED_BEHAVIOR_SANITIZER(
262 "signed-integer-overflow",
263 "unsigned-integer-overflow") {
264 Bucket& bucket = buckets_.getByValue(value);
265 // NOTE: Overflow is handled elsewhere and tests check this
266 // behavior (see HistogramTest.cpp TestOverflow* tests).
267 // TODO: It would be nice to handle overflow here and redesign this class.
268 bucket.sum += value * nSamples;
269 bucket.count += nSamples;
273 * Remove a data point to the histogram
275 * Note that this method does not actually verify that this exact data point
276 * had previously been added to the histogram; it merely subtracts the
277 * requested value from the appropriate bucket's sum.
279 void removeValue(ValueType value) FOLLY_DISABLE_UNDEFINED_BEHAVIOR_SANITIZER(
280 "signed-integer-overflow",
281 "unsigned-integer-overflow") {
282 Bucket& bucket = buckets_.getByValue(value);
283 // NOTE: Overflow is handled elsewhere and tests check this
284 // behavior (see HistogramTest.cpp TestOverflow* tests).
285 // TODO: It would be nice to handle overflow here and redesign this class.
286 if (bucket.count > 0) {
290 bucket.sum = ValueType();
295 /* Remove multiple same data points from the histogram */
296 void removeRepeatedValue(ValueType value, uint64_t nSamples) {
297 Bucket& bucket = buckets_.getByValue(value);
298 // TODO: It would be nice to handle overflow here.
299 if (bucket.count >= nSamples) {
300 bucket.sum -= value * nSamples;
301 bucket.count -= nSamples;
303 bucket.sum = ValueType();
308 /* Remove all data points from the histogram */
310 for (size_t i = 0; i < buckets_.getNumBuckets(); i++) {
311 buckets_.getByIndex(i).clear();
315 /* Subtract another histogram data from the histogram */
316 void subtract(const Histogram &hist) {
317 // the two histogram bucket definitions must match to support
319 if (getBucketSize() != hist.getBucketSize() ||
320 getMin() != hist.getMin() ||
321 getMax() != hist.getMax() ||
322 getNumBuckets() != hist.getNumBuckets() ) {
323 throw std::invalid_argument("Cannot subtract input histogram.");
326 for (size_t i = 0; i < buckets_.getNumBuckets(); i++) {
327 buckets_.getByIndex(i) -= hist.buckets_.getByIndex(i);
331 /* Merge two histogram data together */
332 void merge(const Histogram &hist) {
333 // the two histogram bucket definitions must match to support
335 if (getBucketSize() != hist.getBucketSize() ||
336 getMin() != hist.getMin() ||
337 getMax() != hist.getMax() ||
338 getNumBuckets() != hist.getNumBuckets() ) {
339 throw std::invalid_argument("Cannot merge from input histogram.");
342 for (size_t i = 0; i < buckets_.getNumBuckets(); i++) {
343 buckets_.getByIndex(i) += hist.buckets_.getByIndex(i);
347 /* Copy bucket values from another histogram */
348 void copy(const Histogram &hist) {
349 // the two histogram bucket definition must match
350 if (getBucketSize() != hist.getBucketSize() ||
351 getMin() != hist.getMin() ||
352 getMax() != hist.getMax() ||
353 getNumBuckets() != hist.getNumBuckets() ) {
354 throw std::invalid_argument("Cannot copy from input histogram.");
357 for (size_t i = 0; i < buckets_.getNumBuckets(); i++) {
358 buckets_.getByIndex(i) = hist.buckets_.getByIndex(i);
362 /* Returns the bucket size of each bucket in the histogram. */
363 ValueType getBucketSize() const {
364 return buckets_.getBucketSize();
366 /* Returns the min value at which bucketing begins. */
367 ValueType getMin() const {
368 return buckets_.getMin();
370 /* Returns the max value at which bucketing ends. */
371 ValueType getMax() const {
372 return buckets_.getMax();
374 /* Returns the number of buckets */
375 size_t getNumBuckets() const {
376 return buckets_.getNumBuckets();
379 /* Returns the specified bucket (for reading only!) */
380 const Bucket& getBucketByIndex(size_t idx) const {
381 return buckets_.getByIndex(idx);
385 * Returns the minimum threshold for the bucket at the given index.
387 * The bucket at the specified index will store values in the range
388 * [bucketMin, bucketMin + bucketSize), or [bucketMin, max), if the overall
389 * max is smaller than bucketMin + bucketSize.
391 ValueType getBucketMin(size_t idx) const {
392 return buckets_.getBucketMin(idx);
396 * Returns the maximum threshold for the bucket at the given index.
398 * The bucket at the specified index will store values in the range
399 * [bucketMin, bucketMin + bucketSize), or [bucketMin, max), if the overall
400 * max is smaller than bucketMin + bucketSize.
402 ValueType getBucketMax(size_t idx) const {
403 return buckets_.getBucketMax(idx);
407 * Computes the total number of values stored across all buckets.
409 * Runs in O(numBuckets)
411 uint64_t computeTotalCount() const {
412 CountFromBucket countFn;
413 return buckets_.computeTotalCount(countFn);
417 * Get the bucket that the specified percentile falls into
419 * The lowest and highest percentile data points in returned bucket will be
420 * returned in the lowPct and highPct arguments, if they are not nullptr.
422 size_t getPercentileBucketIdx(
424 double* lowPct = nullptr,
425 double* highPct = nullptr) const {
426 // We unfortunately can't use lambdas here yet;
427 // Some users of this code are still built with gcc-4.4.
428 CountFromBucket countFn;
429 return buckets_.getPercentileBucketIdx(pct, countFn, lowPct, highPct);
433 * Estimate the value at the specified percentile.
435 * @param pct The desired percentile to find, as a value from 0.0 to 1.0.
437 * @return Returns an estimate for N, where N is the number where exactly pct
438 * percentage of the data points in the histogram are less than N.
440 ValueType getPercentileEstimate(double pct) const {
441 CountFromBucket countFn;
443 return buckets_.getPercentileEstimate(pct, countFn, avgFn);
447 * Get a human-readable string describing the histogram contents
449 std::string debugString() const;
452 * Write the histogram contents in tab-separated values (TSV) format.
453 * Format is "min max count sum".
455 void toTSV(std::ostream& out, bool skipEmptyBuckets = true) const;
457 struct CountFromBucket {
458 uint64_t operator()(const Bucket& bucket) const {
462 struct AvgFromBucket {
463 ValueType operator()(const Bucket& bucket) const {
464 if (bucket.count == 0) {
467 // Cast bucket.count to a signed integer type. This ensures that we
468 // perform division properly here: If bucket.sum is a signed integer
469 // type but we divide by an unsigned number, unsigned division will be
470 // performed and bucket.sum will be converted to unsigned first.
471 // If bucket.sum is unsigned, the code will still do unsigned division
474 // The only downside is if bucket.count is large enough to be negative
475 // when treated as signed. That should be extremely unlikely, though.
476 return bucket.sum / static_cast<int64_t>(bucket.count);
481 detail::HistogramBuckets<ValueType, Bucket> buckets_;