From 6b0452df2faa5b6b001fd65127243bf507d2b158 Mon Sep 17 00:00:00 2001 From: Tom Jackson Date: Tue, 30 Apr 2013 13:13:34 -0700 Subject: [PATCH] Fixed-size split() Summary: There are quite a few places where we split strings into a fixed number of fields. This enables this to be done a bit faster by not using any variable-length structures at runtime. Test Plan: Unit tests, Benchmarks Reviewed By: philipp@fb.com FB internal diff: D794523 --- folly/String-inl.h | 47 +++++++++++++++++++++++++++ folly/String.h | 29 +++++++++++++++++ folly/test/StringTest.cpp | 67 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 143 insertions(+) diff --git a/folly/String-inl.h b/folly/String-inl.h index 8ae2d20a..c608398a 100644 --- a/folly/String-inl.h +++ b/folly/String-inl.h @@ -347,6 +347,39 @@ template StringPiece prepareDelim(const String& s) { } inline char prepareDelim(char c) { return c; } +template +bool splitFixed(const Delim& delimiter, + StringPiece input, + StringPiece& out) { + if (exact && UNLIKELY(std::string::npos != input.find(delimiter))) { + return false; + } + out = input; + return true; +} + +template +bool splitFixed(const Delim& delimiter, + StringPiece input, + StringPiece& outHead, + StringPieces&... outTail) { + size_t cut = input.find(delimiter); + if (UNLIKELY(cut == std::string::npos)) { + return false; + } + StringPiece head(input.begin(), input.begin() + cut); + StringPiece tail(input.begin() + cut + detail::delimSize(delimiter), + input.end()); + if (LIKELY(splitFixed(delimiter, tail, outTail...))) { + outHead = head; + return true; + } + return false; +} + } ////////////////////////////////////////////////////////////////////// @@ -388,6 +421,20 @@ void splitTo(const Delim& delimiter, ignoreEmpty); } +template +bool split(const Delim& delimiter, + StringPiece input, + StringPiece& outHead, + StringPieces&... outTail) { + return detail::splitFixed( + detail::prepareDelim(delimiter), + input, + outHead, + outTail...); +} + namespace detail { template diff --git a/folly/String.h b/folly/String.h index 2edd0da9..93a4c658 100644 --- a/folly/String.h +++ b/folly/String.h @@ -394,6 +394,35 @@ void splitTo(const Delim& delimiter, OutputIterator out, bool ignoreEmpty = false); +/* + * Split a string into a fixed number of pieces by delimiter. Returns 'true' if + * the fields were all successfully populated. + * + * Example: + * + * folly::StringPiece name, key, value; + * if (folly::split('\t', line, name, key, value)) + * ... + * + * The 'exact' template paremeter specifies how the function behaves when too + * many fields are present in the input string. When 'exact' is set to its + * default value of 'true', a call to split will fail if the number of fields in + * the input string does not exactly match the number of output parameters + * passed. If 'exact' is overridden to 'false', all remaining fields will be + * stored, unsplit, in the last field, as shown below: + * + * folly::StringPiece x, y. + * if (folly::split(':', "a:b:c", x, y)) + * assert(x == "a" && y == "b:c"); + */ +template +bool split(const Delim& delimiter, + StringPiece input, + StringPiece& outHead, + StringPieces&... outTail); + /* * Join list of tokens. * diff --git a/folly/test/StringTest.cpp b/folly/test/StringTest.cpp index 8c6d6d61..6a9e2e99 100644 --- a/folly/test/StringTest.cpp +++ b/folly/test/StringTest.cpp @@ -758,6 +758,49 @@ TEST(Split, pieces_fbvector) { piecesTest(); } +TEST(Split, fixed) { + StringPiece a, b, c, d; + + EXPECT_TRUE(folly::split('.', "a.b.c.d", a, b, c, d)); + EXPECT_TRUE(folly::split('.', "a.b.c", a, b, c)); + EXPECT_TRUE(folly::split('.', "a.b", a, b)); + EXPECT_TRUE(folly::split('.', "a", a)); + + EXPECT_TRUE(folly::split('.', "a.b.c.d", a, b, c, d)); + EXPECT_TRUE(folly::split('.', "a.b.c", a, b, c)); + EXPECT_TRUE(folly::split('.', "a.b", a, b)); + EXPECT_TRUE(folly::split('.', "a", a)); + + EXPECT_TRUE(folly::split('.', "a.b.c", a, b, c)); + EXPECT_EQ("a", a); + EXPECT_EQ("b", b); + EXPECT_EQ("c", c); + EXPECT_FALSE(folly::split('.', "a.b", a, b, c)); + EXPECT_TRUE(folly::split('.', "a.b.c", a, b)); + EXPECT_EQ("a", a); + EXPECT_EQ("b.c", b); + + EXPECT_TRUE(folly::split('.', "a.b.c", a, b, c)); + EXPECT_EQ("a", a); + EXPECT_EQ("b", b); + EXPECT_EQ("c", c); + EXPECT_FALSE(folly::split('.', "a.b.c", a, b)); + EXPECT_FALSE(folly::split('.', "a.b", a, b, c)); + + EXPECT_TRUE(folly::split('.', "a.b", a, b)); + EXPECT_EQ("a", a); + EXPECT_EQ("b", b); + EXPECT_FALSE(folly::split('.', "a", a, b)); + EXPECT_TRUE(folly::split('.', "a.b", a)); + EXPECT_EQ("a.b", a); + + EXPECT_TRUE(folly::split('.', "a.b", a, b)); + EXPECT_EQ("a", a); + EXPECT_EQ("b", b); + EXPECT_FALSE(folly::split('.', "a", a, b)); + EXPECT_FALSE(folly::split('.', "a.b", a)); +} + TEST(String, join) { string output; @@ -869,6 +912,22 @@ BENCHMARK(splitOnSingleChar, iters) { } } +BENCHMARK(splitOnSingleCharFixed, iters) { + static const std::string line = "one:two:three:four"; + for (int i = 0; i < iters << 4; ++i) { + StringPiece a, b, c, d; + folly::split(':', line, a, b, c, d); + } +} + +BENCHMARK(splitOnSingleCharFixedAllowExtra, iters) { + static const std::string line = "one:two:three:four"; + for (int i = 0; i < iters << 4; ++i) { + StringPiece a, b, c, d; + folly::split(':', line, a, b, c, d); + } +} + BENCHMARK(splitStr, iters) { static const std::string line = "one-*-two-*-three-*-four"; for (int i = 0; i < iters << 4; ++i) { @@ -877,6 +936,14 @@ BENCHMARK(splitStr, iters) { } } +BENCHMARK(splitStrFixed, iters) { + static const std::string line = "one-*-two-*-three-*-four"; + for (int i = 0; i < iters << 4; ++i) { + StringPiece a, b, c, d; + folly::split("-*-", line, a, b, c, d); + } +} + BENCHMARK(boost_splitOnSingleChar, iters) { static const std::string line = "one:two:three:four"; for (int i = 0; i < iters << 4; ++i) { -- 2.34.1