From 116f13d81d9816da852999f87f5fa7387d79f842 Mon Sep 17 00:00:00 2001 From: Giuseppe Ottaviano Date: Fri, 29 Jan 2016 07:02:58 -0800 Subject: [PATCH] Optimize getline(istream&, fbstring&) implementation Summary: Current `getline` implementation in `fbstring` always allocates, even if the passed string is already large enough. Furthermore, the growing strategy relies on outdated assumptions about the allocator. This implementation reuses the existing allocation as much as possible, then uses exponential growth. Reviewed By: luciang, philippv Differential Revision: D2871976 fb-gh-sync-id: 8db9512030be3f4953efa8f008747827504c032c --- folly/FBString.h | 109 +++++++++++------------- folly/test/FBStringBenchmark.cpp | 1 + folly/test/FBStringTest.cpp | 21 ++--- folly/test/FBStringTestBenchmarks.cpp.h | 24 ++++++ 4 files changed, 79 insertions(+), 76 deletions(-) diff --git a/folly/FBString.h b/folly/FBString.h index 2c8e5e19..a950a708 100644 --- a/folly/FBString.h +++ b/folly/FBString.h @@ -1333,10 +1333,8 @@ public: } basic_fbstring& append(const value_type* s, size_type n) { -#ifndef NDEBUG Invariant checker(*this); - (void) checker; -#endif + if (FBSTRING_UNLIKELY(!n)) { // Unlikely but must be done return *this; @@ -1411,7 +1409,7 @@ public: basic_fbstring& assign(const value_type* s, const size_type n) { Invariant checker(*this); - (void) checker; + if (size() >= n) { std::copy(s, s + n, begin()); resize(n); @@ -1473,13 +1471,56 @@ public: return begin() + pos; } +#ifndef _LIBSTDCXX_FBSTRING + private: + typedef std::basic_istream istream_type; + + public: + friend inline istream_type& getline(istream_type& is, + basic_fbstring& str, + value_type delim) { + Invariant checker(str); + + str.clear(); + size_t size = 0; + while (true) { + size_t avail = str.capacity() - size; + // fbstring has 1 byte extra capacity for the null terminator, + // and getline null-terminates the read string. + is.getline(str.store_.expand_noinit(avail), avail + 1, delim); + size += is.gcount(); + + if (is.bad() || is.eof() || !is.fail()) { + // Done by either failure, end of file, or normal read. + if (!is.bad() && !is.eof()) { + --size; // gcount() also accounts for the delimiter. + } + str.resize(size); + break; + } + + assert(size == str.size()); + assert(size == str.capacity()); + // Start at minimum allocation 63 + terminator = 64. + str.reserve(std::max(63, 3 * size / 2)); + // Clear the error so we can continue reading. + is.clear(); + } + return is; + } + + friend inline istream_type& getline(istream_type& is, basic_fbstring& str) { + return getline(is, str, '\n'); + } +#endif + private: template class Selector {}; iterator insertImplDiscr(const_iterator p, size_type n, value_type c, Selector<1>) { Invariant checker(*this); - (void) checker; + auto const pos = p - begin(); assert(p >= begin() && p <= end()); if (capacity() - size() < n) { @@ -1517,7 +1558,7 @@ private: iterator insertImpl(const_iterator i, FwdIterator s1, FwdIterator s2, std::forward_iterator_tag) { Invariant checker(*this); - (void) checker; + const size_type pos = i - begin(); const typename std::iterator_traits::difference_type n2 = std::distance(s1, s2); @@ -1581,7 +1622,7 @@ public: basic_fbstring& erase(size_type pos = 0, size_type n = npos) { Invariant checker(*this); - (void) checker; + enforce(pos <= length(), std::__throw_out_of_range, ""); procrustes(n, length() - pos); std::copy(begin() + pos + n, end(), begin() + pos); @@ -1635,7 +1676,7 @@ public: basic_fbstring& replace(size_type pos, size_type n1, StrOrLength s_or_n2, NumOrChar n_or_c) { Invariant checker(*this); - (void) checker; + enforce(pos <= size(), std::__throw_out_of_range, ""); procrustes(n1, length() - pos); const iterator b = begin() + pos; @@ -1714,7 +1755,6 @@ private: void replaceImpl(iterator i1, iterator i2, FwdIterator s1, FwdIterator s2, std::forward_iterator_tag) { Invariant checker(*this); - (void) checker; // Handle aliased replace if (replaceAliased(i1, i2, s1, s2, @@ -2406,57 +2446,6 @@ operator<<( return os; } -#ifndef _LIBSTDCXX_FBSTRING - -template -inline -std::basic_istream::value_type, - typename basic_fbstring::traits_type>& -getline( - std::basic_istream::value_type, - typename basic_fbstring::traits_type>& is, - basic_fbstring& str, - typename basic_fbstring::value_type delim) { - // Use the nonstandard getdelim() - char * buf = nullptr; - size_t size = 0; - for (;;) { - // This looks quadratic but it really depends on realloc - auto const newSize = size + 128; - buf = static_cast(checkedRealloc(buf, newSize)); - is.getline(buf + size, newSize - size, delim); - if (is.bad() || is.eof() || !is.fail()) { - // done by either failure, end of file, or normal read - size += std::strlen(buf + size); - break; - } - // Here we have failed due to too short a buffer - // Minus one to discount the terminating '\0' - size = newSize - 1; - assert(buf[size] == 0); - // Clear the error so we can continue reading - is.clear(); - } - basic_fbstring result(buf, size, size + 1, - AcquireMallocatedString()); - result.swap(str); - return is; -} - -template -inline -std::basic_istream::value_type, - typename basic_fbstring::traits_type>& -getline( - std::basic_istream::value_type, - typename basic_fbstring::traits_type>& is, - basic_fbstring& str) { - // Just forward to the version with a delimiter - return getline(is, str, '\n'); -} - -#endif - template const typename basic_fbstring::size_type basic_fbstring::npos = diff --git a/folly/test/FBStringBenchmark.cpp b/folly/test/FBStringBenchmark.cpp index 8cb9b41c..f2eb5e72 100644 --- a/folly/test/FBStringBenchmark.cpp +++ b/folly/test/FBStringBenchmark.cpp @@ -23,6 +23,7 @@ #include #include +#include #include #include diff --git a/folly/test/FBStringTest.cpp b/folly/test/FBStringTest.cpp index 67c79cf2..d9b5948f 100644 --- a/folly/test/FBStringTest.cpp +++ b/folly/test/FBStringTest.cpp @@ -22,9 +22,9 @@ #include #include -#include -#include #include +#include +#include #include #include #include @@ -1114,7 +1114,7 @@ TEST(FBString, testAllClauses) { } TEST(FBString, testGetline) { - fbstring s1 = "\ + string s1 = "\ Lorem ipsum dolor sit amet, consectetur adipiscing elit. Cras accumsan \n\ elit ut urna consectetur in sagittis mi auctor. Nulla facilisi. In nec \n\ dolor leo, vitae imperdiet neque. Donec ut erat mauris, a faucibus \n\ @@ -1132,28 +1132,17 @@ massa, ut accumsan magna. Donec imperdiet tempor nisi et \n\ laoreet. Phasellus lectus quam, ultricies ut tincidunt in, dignissim \n\ id eros. Mauris vulputate tortor nec neque pellentesque sagittis quis \n\ sed nisl. In diam lacus, lobortis ut posuere nec, ornare id quam."; - char f[] = "/tmp/fbstring_testing.XXXXXX"; - int fd = mkstemp(f); - EXPECT_TRUE(fd > 0); - if (fd > 0) { - close(fd); // Yeah - std::ofstream out(f); - if (!(out << s1)) { - EXPECT_TRUE(0) << "Couldn't write to temp file."; - return; - } - } + vector v; boost::split(v, s1, boost::is_any_of("\n")); { - ifstream input(f); + istringstream input(s1); fbstring line; FOR_EACH (i, v) { EXPECT_TRUE(!getline(input, line).fail()); EXPECT_EQ(line, *i); } } - unlink(f); } TEST(FBString, testMoveCtor) { diff --git a/folly/test/FBStringTestBenchmarks.cpp.h b/folly/test/FBStringTestBenchmarks.cpp.h index 356c46d7..e42eaad7 100644 --- a/folly/test/FBStringTestBenchmarks.cpp.h +++ b/folly/test/FBStringTestBenchmarks.cpp.h @@ -250,3 +250,27 @@ void BENCHFUN(short_append)(size_t iters, size_t arg) { } BENCHMARK_PARAM(BENCHFUN(short_append), 23); BENCHMARK_PARAM(BENCHFUN(short_append), 1024); + +void BENCHFUN(getline)(size_t iters, size_t arg) { + string lines; + + BENCHMARK_SUSPEND { + string line; + FOR_EACH_RANGE(i, 0, 512) { + randomString(&line, arg); + lines += line; + lines += '\n'; + } + } + + STRING line; + while (iters) { + std::istringstream is(lines); + while (iters && getline(is, line)) { + folly::doNotOptimizeAway(line.size()); + iters--; + } + } +} +BENCHMARK_PARAM(BENCHFUN(getline), 23); +BENCHMARK_PARAM(BENCHFUN(getline), 1000); -- 2.34.1