BucketedTimeSeries: fix type converison issues computing avg()
authorAdam Simpkins <simpkins@fb.com>
Mon, 20 Aug 2012 21:58:25 +0000 (14:58 -0700)
committerTudor Bosman <tudorb@fb.com>
Sun, 26 Aug 2012 18:13:53 +0000 (11:13 -0700)
Summary:
D527040 had a bug in the code to compute the average: it incorrectly
performed unsigned division when ValueType was a signed integer type.
As a result, the average was reported incorrectly for stats with
negative values.

This makes the average code more intelligent when handling type
conversions: if the caller wants a floating point value, or if the input
type is floating point, floating point division is always returned.
Otherwise, if the input is a signed type, signed integer division is
performed.  Otherwise, unsigned integer division is performed.

Test Plan: beholdunittests

Reviewed By: lars@fb.com

FB internal diff: D553583

folly/detail/Stats.h
folly/stats/BucketedTimeSeries-defs.h
folly/test/TimeseriesTest.cpp

index 1b74aa4d7aad0be3dfb3d92760c5c8308a85c33d..b3fa7e68ea7300d39876871349895f49cef50817 100644 (file)
 
 namespace folly { namespace detail {
 
+/*
+ * Helper functions for how to perform division based on the desired
+ * return type.
+ */
+
+// For floating point input types, do floating point division
+template <typename ReturnType, typename ValueType>
+typename std::enable_if<std::is_floating_point<ValueType>::value,
+                        ReturnType>::type
+avgHelper(ValueType sum, uint64_t count) {
+  if (count == 0) { return ReturnType(0); }
+  return static_cast<ReturnType>(sum / count);
+}
+
+// For floating point return types, do floating point division
+template <typename ReturnType, typename ValueType>
+typename std::enable_if<std::is_floating_point<ReturnType>::value &&
+                        !std::is_floating_point<ValueType>::value,
+                        ReturnType>::type
+avgHelper(ValueType sum, uint64_t count) {
+  if (count == 0) { return ReturnType(0); }
+  return static_cast<ReturnType>(sum) / count;
+}
+
+// For signed integer input types, do signed division
+template <typename ReturnType, typename ValueType>
+typename std::enable_if<!std::is_floating_point<ReturnType>::value &&
+                        !std::is_floating_point<ValueType>::value &&
+                        std::is_signed<ValueType>::value,
+                        ReturnType>::type
+avgHelper(ValueType sum, uint64_t count) {
+  if (count == 0) { return ReturnType(0); }
+  return sum / static_cast<int64_t>(count);
+}
+
+// For unsigned integer input types, do unsigned division
+template <typename ReturnType, typename ValueType>
+typename std::enable_if<!std::is_floating_point<ReturnType>::value &&
+                        !std::is_floating_point<ValueType>::value &&
+                        std::is_unsigned<ValueType>::value,
+                        ReturnType>::type
+avgHelper(ValueType sum, uint64_t count) {
+  if (count == 0) { return ReturnType(0); }
+  return sum / count;
+}
+
+
 template<typename T>
 struct Bucket {
  public:
@@ -55,9 +102,7 @@ struct Bucket {
 
   template <typename ReturnType>
   ReturnType avg() const {
-    return (count ?
-            static_cast<ReturnType>(sum) / count :
-            ReturnType(0));
+    return avgHelper<ReturnType>(sum, count);
   }
 
   ValueType sum;
index e65b197db7e74a6b77f0c5f339cef5e76195491d..61bf7f613bbc46a5e2c2fe6931cd18c106f3b4f0 100644 (file)
@@ -233,7 +233,7 @@ ReturnType BucketedTimeSeries<VT, TT>::avg(TimeType start, TimeType end) const {
     return ReturnType(0);
   }
 
-  return static_cast<ReturnType>(sum) / count;
+  return detail::avgHelper<ReturnType>(sum, count);
 }
 
 /*
index 84b64b1412525efa3e45ab284db2044894a4cfea..c052b7067919be42bfd7949dbdd16914f462fb4e 100644 (file)
@@ -19,6 +19,8 @@
 #include <glog/logging.h>
 #include <gtest/gtest.h>
 
+#include "folly/Foreach.h"
+
 using std::chrono::seconds;
 using std::string;
 using std::vector;
@@ -319,6 +321,116 @@ TEST(BucketedTimeSeries, rate) {
   EXPECT_NEAR(1.5, ts.countRate(), 0.005);
 }
 
+TEST(BucketedTimeSeries, avgTypeConversion) {
+  // The average code has many different code paths to decide what type of
+  // division to perform (floating point, signed integer, unsigned integer).
+  // Test the various code paths.
+
+  {
+    // Simple sanity tests for small positive integer values
+    BucketedTimeSeries<int64_t> ts(60, seconds(600));
+    ts.addValue(seconds(0), 4, 100);
+    ts.addValue(seconds(0), 10, 200);
+    ts.addValue(seconds(0), 16, 100);
+
+    EXPECT_DOUBLE_EQ(ts.avg(), 10.0);
+    EXPECT_DOUBLE_EQ(ts.avg<float>(), 10.0);
+    EXPECT_EQ(ts.avg<uint64_t>(), 10);
+    EXPECT_EQ(ts.avg<int64_t>(), 10);
+    EXPECT_EQ(ts.avg<int32_t>(), 10);
+    EXPECT_EQ(ts.avg<int16_t>(), 10);
+    EXPECT_EQ(ts.avg<int8_t>(), 10);
+    EXPECT_EQ(ts.avg<uint8_t>(), 10);
+  }
+
+  {
+    // Test signed integer types with negative values
+    BucketedTimeSeries<int64_t> ts(60, seconds(600));
+    ts.addValue(seconds(0), -100);
+    ts.addValue(seconds(0), -200);
+    ts.addValue(seconds(0), -300);
+    ts.addValue(seconds(0), -200, 65535);
+
+    EXPECT_DOUBLE_EQ(ts.avg(), -200.0);
+    EXPECT_DOUBLE_EQ(ts.avg<float>(), -200.0);
+    EXPECT_EQ(ts.avg<int64_t>(), -200);
+    EXPECT_EQ(ts.avg<int32_t>(), -200);
+    EXPECT_EQ(ts.avg<int16_t>(), -200);
+  }
+
+  {
+    // Test uint64_t values that would overflow int64_t
+    BucketedTimeSeries<uint64_t> ts(60, seconds(600));
+    ts.addValueAggregated(seconds(0),
+                          std::numeric_limits<uint64_t>::max(),
+                          std::numeric_limits<uint64_t>::max());
+
+    EXPECT_DOUBLE_EQ(ts.avg(), 1.0);
+    EXPECT_DOUBLE_EQ(ts.avg<float>(), 1.0);
+    EXPECT_EQ(ts.avg<uint64_t>(), 1);
+    EXPECT_EQ(ts.avg<int64_t>(), 1);
+    EXPECT_EQ(ts.avg<int8_t>(), 1);
+  }
+
+  {
+    // Test doubles with small-ish values that will fit in integer types
+    BucketedTimeSeries<double> ts(60, seconds(600));
+    ts.addValue(seconds(0), 4.0, 100);
+    ts.addValue(seconds(0), 10.0, 200);
+    ts.addValue(seconds(0), 16.0, 100);
+
+    EXPECT_DOUBLE_EQ(ts.avg(), 10.0);
+    EXPECT_DOUBLE_EQ(ts.avg<float>(), 10.0);
+    EXPECT_EQ(ts.avg<uint64_t>(), 10);
+    EXPECT_EQ(ts.avg<int64_t>(), 10);
+    EXPECT_EQ(ts.avg<int32_t>(), 10);
+    EXPECT_EQ(ts.avg<int16_t>(), 10);
+    EXPECT_EQ(ts.avg<int8_t>(), 10);
+    EXPECT_EQ(ts.avg<uint8_t>(), 10);
+  }
+
+  {
+    // Test doubles with huge values
+    BucketedTimeSeries<double> ts(60, seconds(600));
+    ts.addValue(seconds(0), 1e19, 100);
+    ts.addValue(seconds(0), 2e19, 200);
+    ts.addValue(seconds(0), 3e19, 100);
+
+    EXPECT_DOUBLE_EQ(ts.avg(), 2e19);
+    EXPECT_NEAR(ts.avg<float>(), 2e19, 1e11);
+  }
+
+  {
+    // Test doubles where the sum adds up larger than a uint64_t,
+    // but the average fits in an int64_t
+    BucketedTimeSeries<double> ts(60, seconds(600));
+    uint64_t value = 0x3fffffffffffffff;
+    FOR_EACH_RANGE(i, 0, 16) {
+      ts.addValue(seconds(0), value);
+    }
+
+    EXPECT_DOUBLE_EQ(ts.avg(), value);
+    EXPECT_DOUBLE_EQ(ts.avg<float>(), value);
+    EXPECT_DOUBLE_EQ(ts.avg<uint64_t>(), value);
+    EXPECT_DOUBLE_EQ(ts.avg<int64_t>(), value);
+  }
+
+  {
+    // Test BucketedTimeSeries with a smaller integer type
+    BucketedTimeSeries<int16_t> ts(60, seconds(600));
+    FOR_EACH_RANGE(i, 0, 101) {
+      ts.addValue(seconds(0), i);
+    }
+
+    EXPECT_DOUBLE_EQ(ts.avg(), 50.0);
+    EXPECT_DOUBLE_EQ(ts.avg<float>(), 50.0);
+    EXPECT_DOUBLE_EQ(ts.avg<uint64_t>(), 50);
+    EXPECT_DOUBLE_EQ(ts.avg<int64_t>(), 50);
+    EXPECT_DOUBLE_EQ(ts.avg<int16_t>(), 50);
+    EXPECT_DOUBLE_EQ(ts.avg<int8_t>(), 50);
+  }
+}
+
 TEST(BucketedTimeSeries, forEachBucket) {
   typedef BucketedTimeSeries<int64_t>::Bucket Bucket;
   struct BucketInfo {