From 74651ade10a241046ff85206acf77010edffc039 Mon Sep 17 00:00:00 2001 From: John Ehrhardt Date: Wed, 25 Feb 2015 20:52:21 -0800 Subject: [PATCH] Updating Folly Formatting's use of separators for Decimal Integers 'd' and Numbers 'n' Summary: Updating Folly Formatting's use of separators for Decimal Integers 'd' and Numbers 'n'. Updated Decimal Integers 'd' use of separators to directly use commas ',' and grouping of 3 digits. Updated Numbers use of separators and grouping to follow locale settings. Test Plan: Wrote unit tests to validate the insertion of thousandsSeparators works. Note that the numbers unit test does not modify the locale since the test cases are not thread safe. Reviewed By: lesha@fb.com Subscribers: tudorb, trunkagent, folly-diffs@, yfeldblum, aric FB internal diff: D1821156 Tasks: 6087521 Signature: t1:1821156:1424923837:d0fb383a07fd733375b72b1905e6112afa141265 --- folly/Format-inl.h | 35 +++++++++------- folly/Format.cpp | 37 +++++++++++++++++ folly/test/FormatTest.cpp | 84 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 142 insertions(+), 14 deletions(-) diff --git a/folly/Format-inl.h b/folly/Format-inl.h index 1603423e..060fce33 100644 --- a/folly/Format-inl.h +++ b/folly/Format-inl.h @@ -29,6 +29,9 @@ namespace folly { namespace detail { +// Updates the end of the buffer after the comma separators have been added. +void insertThousandsGroupingUnsafe(char* start_buffer, char** end_buffer); + extern const char formatHexUpper[256][2]; extern const char formatHexLower[256][2]; extern const char formatOctal[512][3]; @@ -493,27 +496,31 @@ class FormatValue< char* valBufBegin = nullptr; char* valBufEnd = nullptr; - // Defer to sprintf - auto useSprintf = [&] (const char* format) mutable { - valBufBegin = valBuf + 3; // room for sign and base prefix - valBufEnd = valBufBegin + sprintf(valBufBegin, format, - static_cast(uval)); - }; - int prefixLen = 0; - switch (presentation) { - case 'n': // TODO(tudorb): locale awareness? + case 'n': + arg.enforce(!arg.basePrefix, + "base prefix not allowed with '", presentation, + "' specifier"); + + arg.enforce(!arg.thousandsSeparator, + "cannot use ',' with the '", presentation, + "' specifier"); + + valBufBegin = valBuf + 3; // room for sign and base prefix + valBufEnd = valBufBegin + sprintf(valBufBegin, "%'ju", + static_cast(uval)); + break; case 'd': arg.enforce(!arg.basePrefix, "base prefix not allowed with '", presentation, "' specifier"); + valBufBegin = valBuf + 3; // room for sign and base prefix + + // Use uintToBuffer, faster than sprintf + valBufEnd = valBufBegin + uint64ToBufferUnsafe(uval, valBufBegin); if (arg.thousandsSeparator) { - useSprintf("%'ju"); - } else { - // Use uintToBuffer, faster than sprintf - valBufBegin = valBuf + 3; - valBufEnd = valBufBegin + uint64ToBufferUnsafe(uval, valBufBegin); + detail::insertThousandsGroupingUnsafe(valBufBegin, &valBufEnd); } break; case 'c': diff --git a/folly/Format.cpp b/folly/Format.cpp index feec8fb2..6bba0aad 100644 --- a/folly/Format.cpp +++ b/folly/Format.cpp @@ -142,4 +142,41 @@ void FormatArg::validate(Type type) const { } } +namespace detail { +void insertThousandsGroupingUnsafe(char* start_buffer, char** end_buffer) { + uint32_t remaining_digits = *end_buffer - start_buffer; + uint32_t separator_size = (remaining_digits - 1) / 3; + uint32_t result_size = remaining_digits + separator_size; + *end_buffer = *end_buffer + separator_size; + + // get the end of the new string with the separators + uint32_t buffer_write_index = result_size - 1; + uint32_t buffer_read_index = remaining_digits - 1; + start_buffer[buffer_write_index + 1] = 0; + + uint32_t count = 0; + bool done = false; + uint32_t next_group_size = 3; + + while (!done) { + uint32_t current_group_size = std::max(1, + std::min(remaining_digits, next_group_size)); + + // write out the current group's digits to the buffer index + for (uint32_t i = 0; i < current_group_size; i++) { + start_buffer[buffer_write_index--] = start_buffer[buffer_read_index--]; + } + + // if not finished, write the separator before the next group + if (buffer_write_index < buffer_write_index + 1) { + start_buffer[buffer_write_index--] = ','; + } else { + done = true; + } + + remaining_digits -= current_group_size; + } +} +} // detail + } // namespace folly diff --git a/folly/test/FormatTest.cpp b/folly/test/FormatTest.cpp index e67c5ece..170ec146 100644 --- a/folly/test/FormatTest.cpp +++ b/folly/test/FormatTest.cpp @@ -287,6 +287,90 @@ TEST(Format, dynamic) { EXPECT_EQ("(null)", sformat("{}", dynamic(nullptr))); } +TEST(Format, separatorDecimalInteger) { + EXPECT_EQ("0", sformat("{:,d}", 0)); + EXPECT_EQ("1", sformat("{:d}", 1)); + EXPECT_EQ("1", sformat("{:,d}", 1)); + EXPECT_EQ("1", sformat("{:,}", 1)); + EXPECT_EQ("123", sformat("{:d}", 123)); + EXPECT_EQ("123", sformat("{:,d}", 123)); + EXPECT_EQ("123", sformat("{:,}", 123)); + EXPECT_EQ("1234", sformat("{:d}", 1234)); + EXPECT_EQ("1,234", sformat("{:,d}", 1234)); + EXPECT_EQ("1,234", sformat("{:,}", 1234)); + EXPECT_EQ("12345678", sformat("{:d}", 12345678)); + EXPECT_EQ("12,345,678", sformat("{:,d}", 12345678)); + EXPECT_EQ("12,345,678", sformat("{:,}", 12345678)); + EXPECT_EQ("-1234", sformat("{:d}", -1234)); + EXPECT_EQ("-1,234", sformat("{:,d}", -1234)); + EXPECT_EQ("-1,234", sformat("{:,}", -1234)); + + int64_t max_int64_t = std::numeric_limits::max(); + int64_t min_int64_t = std::numeric_limits::min(); + uint64_t max_uint64_t = std::numeric_limits::max(); + EXPECT_EQ("9223372036854775807", sformat("{:d}", max_int64_t)); + EXPECT_EQ("9,223,372,036,854,775,807", sformat("{:,d}", max_int64_t)); + EXPECT_EQ("9,223,372,036,854,775,807", sformat("{:,}", max_int64_t)); + EXPECT_EQ("-9223372036854775808", sformat("{:d}", min_int64_t)); + EXPECT_EQ("-9,223,372,036,854,775,808", sformat("{:,d}", min_int64_t)); + EXPECT_EQ("-9,223,372,036,854,775,808", sformat("{:,}", min_int64_t)); + EXPECT_EQ("18446744073709551615", sformat("{:d}", max_uint64_t)); + EXPECT_EQ("18,446,744,073,709,551,615", sformat("{:,d}", max_uint64_t)); + EXPECT_EQ("18,446,744,073,709,551,615", sformat("{:,}", max_uint64_t)); + + EXPECT_EQ(" -1,234", sformat("{: 8,}", -1234)); + EXPECT_EQ("-001,234", sformat("{:08,d}", -1234)); + EXPECT_EQ("-00001,234", sformat("{:010,d}", -1234)); + EXPECT_EQ(" -1,234 ", sformat("{:^ 8,d}", -1234)); +} + +// Note that sformat("{:n}", ...) uses the current locale setting to insert the +// appropriate number separator characters. +TEST(Format, separatorNumber) { + EXPECT_EQ("0", sformat("{:n}", 0)); + EXPECT_EQ("1", sformat("{:n}", 1)); + EXPECT_EQ("123", sformat("{:n}", 123)); + EXPECT_EQ("1234", sformat("{:n}", 1234)); + EXPECT_EQ("12345678", sformat("{:n}", 12345678)); + EXPECT_EQ("-1234", sformat("{:n}", -1234)); + + int64_t max_int64_t = std::numeric_limits::max(); + int64_t min_int64_t = std::numeric_limits::min(); + uint64_t max_uint64_t = std::numeric_limits::max(); + EXPECT_EQ("9223372036854775807", sformat("{:n}", max_int64_t)); + EXPECT_EQ("-9223372036854775808", sformat("{:n}", min_int64_t)); + EXPECT_EQ("18446744073709551615", sformat("{:n}", max_uint64_t)); + + EXPECT_EQ(" -1234", sformat("{: 8n}", -1234)); + EXPECT_EQ("-0001234", sformat("{:08n}", -1234)); + EXPECT_EQ("-000001234", sformat("{:010n}", -1234)); + EXPECT_EQ(" -1234 ", sformat("{:^ 8n}", -1234)); +} + +// insertThousandsGroupingUnsafe requires non-const params +static void testGrouping(const char* a_str, const char* expected) { + char str[256]; + strcpy(str, a_str); + char * end_ptr = str + strlen(str); + folly::detail::insertThousandsGroupingUnsafe(str, &end_ptr); + ASSERT_STREQ(expected, str); +} + +TEST(Format, separatorUnit) { + testGrouping("0", "0"); + testGrouping("1", "1"); + testGrouping("12", "12"); + testGrouping("123", "123"); + testGrouping("1234", "1,234"); + testGrouping("12345", "12,345"); + testGrouping("123456", "123,456"); + testGrouping("1234567", "1,234,567"); + testGrouping("1234567890", "1,234,567,890"); + testGrouping("9223372036854775807", "9,223,372,036,854,775,807"); + testGrouping("18446744073709551615", "18,446,744,073,709,551,615"); +} + + namespace { struct KeyValue { -- 2.34.1