Updating Folly Formatting's use of separators for Decimal Integers 'd' and Numbers 'n'
authorJohn Ehrhardt <ehrhardt@fb.com>
Thu, 26 Feb 2015 04:52:21 +0000 (20:52 -0800)
committerAlecs King <int@fb.com>
Tue, 3 Mar 2015 03:30:10 +0000 (19:30 -0800)
Summary:
Updating Folly Formatting's use of separators for Decimal Integers 'd' and Numbers 'n'.
Updated Decimal Integers 'd' use of separators to directly use commas ',' and grouping of 3 digits.
Updated Numbers use of separators and grouping to follow locale settings.

Test Plan:
Wrote unit tests to validate the insertion of thousandsSeparators works.
Note that the numbers unit test does not modify the locale since the test cases are not thread safe.

Reviewed By: lesha@fb.com

Subscribers: tudorb, trunkagent, folly-diffs@, yfeldblum, aric

FB internal diff: D1821156

Tasks: 6087521

Signature: t1:1821156:1424923837:d0fb383a07fd733375b72b1905e6112afa141265

folly/Format-inl.h
folly/Format.cpp
folly/test/FormatTest.cpp

index 1603423eef1c921016b4656cd23bfde05e755b79..060fce33c25a046dc41f6a9be73590ac97e481ca 100644 (file)
@@ -29,6 +29,9 @@ namespace folly {
 
 namespace detail {
 
+// Updates the end of the buffer after the comma separators have been added.
+void insertThousandsGroupingUnsafe(char* start_buffer, char** end_buffer);
+
 extern const char formatHexUpper[256][2];
 extern const char formatHexLower[256][2];
 extern const char formatOctal[512][3];
@@ -493,27 +496,31 @@ class FormatValue<
     char* valBufBegin = nullptr;
     char* valBufEnd = nullptr;
 
-    // Defer to sprintf
-    auto useSprintf = [&] (const char* format) mutable {
-      valBufBegin = valBuf + 3;  // room for sign and base prefix
-      valBufEnd = valBufBegin + sprintf(valBufBegin, format,
-                                        static_cast<uintmax_t>(uval));
-    };
-
     int prefixLen = 0;
-
     switch (presentation) {
-    case 'n':  // TODO(tudorb): locale awareness?
+    case 'n':
+      arg.enforce(!arg.basePrefix,
+                  "base prefix not allowed with '", presentation,
+                  "' specifier");
+
+      arg.enforce(!arg.thousandsSeparator,
+                  "cannot use ',' with the '", presentation,
+                  "' specifier");
+
+      valBufBegin = valBuf + 3;  // room for sign and base prefix
+      valBufEnd = valBufBegin + sprintf(valBufBegin, "%'ju",
+                                        static_cast<uintmax_t>(uval));
+      break;
     case 'd':
       arg.enforce(!arg.basePrefix,
                   "base prefix not allowed with '", presentation,
                   "' specifier");
+      valBufBegin = valBuf + 3;  // room for sign and base prefix
+
+      // Use uintToBuffer, faster than sprintf
+      valBufEnd = valBufBegin + uint64ToBufferUnsafe(uval, valBufBegin);
       if (arg.thousandsSeparator) {
-        useSprintf("%'ju");
-      } else {
-        // Use uintToBuffer, faster than sprintf
-        valBufBegin = valBuf + 3;
-        valBufEnd = valBufBegin + uint64ToBufferUnsafe(uval, valBufBegin);
+        detail::insertThousandsGroupingUnsafe(valBufBegin, &valBufEnd);
       }
       break;
     case 'c':
index feec8fb255c3f9b4c15807e22f43e9433aad56d8..6bba0aade6aaf7b2ac81db8062804e443c9b9b55 100644 (file)
@@ -142,4 +142,41 @@ void FormatArg::validate(Type type) const {
   }
 }
 
+namespace detail {
+void insertThousandsGroupingUnsafe(char* start_buffer, char** end_buffer) {
+  uint32_t remaining_digits = *end_buffer - start_buffer;
+  uint32_t separator_size = (remaining_digits - 1) / 3;
+  uint32_t result_size = remaining_digits + separator_size;
+  *end_buffer = *end_buffer + separator_size;
+
+  // get the end of the new string with the separators
+  uint32_t buffer_write_index = result_size - 1;
+  uint32_t buffer_read_index = remaining_digits - 1;
+  start_buffer[buffer_write_index + 1] = 0;
+
+  uint32_t count = 0;
+  bool done = false;
+  uint32_t next_group_size = 3;
+
+  while (!done) {
+    uint32_t current_group_size = std::max<uint32_t>(1,
+      std::min<uint32_t>(remaining_digits, next_group_size));
+
+    // write out the current group's digits to the buffer index
+    for (uint32_t i = 0; i < current_group_size; i++) {
+      start_buffer[buffer_write_index--] = start_buffer[buffer_read_index--];
+    }
+
+    // if not finished, write the separator before the next group
+    if (buffer_write_index < buffer_write_index + 1) {
+      start_buffer[buffer_write_index--] = ',';
+    } else {
+      done = true;
+    }
+
+    remaining_digits -= current_group_size;
+  }
+}
+} // detail
+
 }  // namespace folly
index e67c5ece6c17aa91ccb5d9709276921941f7dfb4..170ec1469d5f9f6f990060a5673a08c223a49347 100644 (file)
@@ -287,6 +287,90 @@ TEST(Format, dynamic) {
   EXPECT_EQ("(null)", sformat("{}", dynamic(nullptr)));
 }
 
+TEST(Format, separatorDecimalInteger) {
+  EXPECT_EQ("0", sformat("{:,d}", 0));
+  EXPECT_EQ("1", sformat("{:d}", 1));
+  EXPECT_EQ("1", sformat("{:,d}", 1));
+  EXPECT_EQ("1", sformat("{:,}", 1));
+  EXPECT_EQ("123", sformat("{:d}", 123));
+  EXPECT_EQ("123", sformat("{:,d}", 123));
+  EXPECT_EQ("123", sformat("{:,}", 123));
+  EXPECT_EQ("1234", sformat("{:d}", 1234));
+  EXPECT_EQ("1,234", sformat("{:,d}", 1234));
+  EXPECT_EQ("1,234", sformat("{:,}", 1234));
+  EXPECT_EQ("12345678", sformat("{:d}", 12345678));
+  EXPECT_EQ("12,345,678", sformat("{:,d}", 12345678));
+  EXPECT_EQ("12,345,678", sformat("{:,}", 12345678));
+  EXPECT_EQ("-1234", sformat("{:d}", -1234));
+  EXPECT_EQ("-1,234", sformat("{:,d}", -1234));
+  EXPECT_EQ("-1,234", sformat("{:,}", -1234));
+
+  int64_t max_int64_t = std::numeric_limits<int64_t>::max();
+  int64_t min_int64_t = std::numeric_limits<int64_t>::min();
+  uint64_t max_uint64_t = std::numeric_limits<uint64_t>::max();
+  EXPECT_EQ("9223372036854775807", sformat("{:d}", max_int64_t));
+  EXPECT_EQ("9,223,372,036,854,775,807", sformat("{:,d}", max_int64_t));
+  EXPECT_EQ("9,223,372,036,854,775,807", sformat("{:,}", max_int64_t));
+  EXPECT_EQ("-9223372036854775808", sformat("{:d}", min_int64_t));
+  EXPECT_EQ("-9,223,372,036,854,775,808", sformat("{:,d}", min_int64_t));
+  EXPECT_EQ("-9,223,372,036,854,775,808", sformat("{:,}", min_int64_t));
+  EXPECT_EQ("18446744073709551615", sformat("{:d}", max_uint64_t));
+  EXPECT_EQ("18,446,744,073,709,551,615", sformat("{:,d}", max_uint64_t));
+  EXPECT_EQ("18,446,744,073,709,551,615", sformat("{:,}", max_uint64_t));
+
+  EXPECT_EQ("  -1,234", sformat("{: 8,}", -1234));
+  EXPECT_EQ("-001,234", sformat("{:08,d}", -1234));
+  EXPECT_EQ("-00001,234", sformat("{:010,d}", -1234));
+  EXPECT_EQ(" -1,234 ", sformat("{:^ 8,d}", -1234));
+}
+
+// Note that sformat("{:n}", ...) uses the current locale setting to insert the
+// appropriate number separator characters.
+TEST(Format, separatorNumber) {
+  EXPECT_EQ("0", sformat("{:n}", 0));
+  EXPECT_EQ("1", sformat("{:n}", 1));
+  EXPECT_EQ("123", sformat("{:n}", 123));
+  EXPECT_EQ("1234", sformat("{:n}", 1234));
+  EXPECT_EQ("12345678", sformat("{:n}", 12345678));
+  EXPECT_EQ("-1234", sformat("{:n}", -1234));
+
+  int64_t max_int64_t = std::numeric_limits<int64_t>::max();
+  int64_t min_int64_t = std::numeric_limits<int64_t>::min();
+  uint64_t max_uint64_t = std::numeric_limits<uint64_t>::max();
+  EXPECT_EQ("9223372036854775807", sformat("{:n}", max_int64_t));
+  EXPECT_EQ("-9223372036854775808", sformat("{:n}", min_int64_t));
+  EXPECT_EQ("18446744073709551615", sformat("{:n}", max_uint64_t));
+
+  EXPECT_EQ("   -1234", sformat("{: 8n}", -1234));
+  EXPECT_EQ("-0001234", sformat("{:08n}", -1234));
+  EXPECT_EQ("-000001234", sformat("{:010n}", -1234));
+  EXPECT_EQ(" -1234  ", sformat("{:^ 8n}", -1234));
+}
+
+// insertThousandsGroupingUnsafe requires non-const params
+static void testGrouping(const char* a_str, const char* expected) {
+  char str[256];
+  strcpy(str, a_str);
+  char * end_ptr = str + strlen(str);
+  folly::detail::insertThousandsGroupingUnsafe(str, &end_ptr);
+  ASSERT_STREQ(expected, str);
+}
+
+TEST(Format, separatorUnit) {
+  testGrouping("0", "0");
+  testGrouping("1", "1");
+  testGrouping("12", "12");
+  testGrouping("123", "123");
+  testGrouping("1234", "1,234");
+  testGrouping("12345", "12,345");
+  testGrouping("123456", "123,456");
+  testGrouping("1234567", "1,234,567");
+  testGrouping("1234567890", "1,234,567,890");
+  testGrouping("9223372036854775807", "9,223,372,036,854,775,807");
+  testGrouping("18446744073709551615", "18,446,744,073,709,551,615");
+}
+
+
 namespace {
 
 struct KeyValue {