From 5a2591ef0afd71a3fd6998907fbb06c9385de329 Mon Sep 17 00:00:00 2001 From: Haim Grosman Date: Mon, 1 Jun 2015 17:14:49 -0700 Subject: [PATCH] folly: adding folly::ltrimWhitespace / folly::rtrimWhitespace Summary: folly: adding folly::trimWhitespace extending the idea of folly::skipWhitespace. the reason for adding it, we want to be able to do: auto s = skipWhitespace(trimWhitespace(" aaaa ")) very similar to python's strip() Test Plan: adding unit tests: # make ; _bin/folly/test/string_test --gtest_filter="String.whitespace" # fbmake --ccache off --distcc off dbg -j 16 fbmake dbg -j 16 Fbmake run id: G62i4cDP42U Note: Google Test filter = String.whitespace [==========] Running 1 test from 1 test case. [----------] Global test environment set-up. [----------] 1 test from String [ RUN ] String.whitespace [ OK ] String.whitespace (0 ms) [----------] 1 test from String (0 ms total) [----------] Global test environment tear-down [==========] 1 test from 1 test case ran. (0 ms total) [ PASSED ] 1 test. Reviewed By: ldbrandy@fb.com Subscribers: ldbrandy, folly-diffs@, yfeldblum, chalfant, azhavnerchik FB internal diff: D2109364 Signature: t1:2109364:1433192444:862e237bba1928fcb94be1f95c57a68d457939e9 --- folly/String.h | 25 ++++++++++++++++++++- folly/StringBase.cpp | 27 +++++++++++++++++----- folly/test/StringTest.cpp | 47 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 93 insertions(+), 6 deletions(-) diff --git a/folly/String.h b/folly/String.h index ef0ef3f6..04004b56 100644 --- a/folly/String.h +++ b/folly/String.h @@ -554,7 +554,30 @@ std::string join(const Delim& delimiter, Iterator begin, Iterator end) { * Returns a subpiece with all whitespace removed from the front of @sp. * Whitespace means any of [' ', '\n', '\r', '\t']. */ -StringPiece skipWhitespace(StringPiece sp); +StringPiece ltrimWhitespace(StringPiece sp); + +/** + * Returns a subpiece with all whitespace removed from the back of @sp. + * Whitespace means any of [' ', '\n', '\r', '\t']. + */ +StringPiece rtrimWhitespace(StringPiece sp); + +/** + * Returns a subpiece with all whitespace removed from the back and front of @sp. + * Whitespace means any of [' ', '\n', '\r', '\t']. + */ +inline StringPiece trimWhitespace(StringPiece sp) { + return ltrimWhitespace(rtrimWhitespace(sp)); +} + +/** + * Returns a subpiece with all whitespace removed from the front of @sp. + * Whitespace means any of [' ', '\n', '\r', '\t']. + * DEPRECATED: @see ltrimWhitespace @see rtrimWhitespace + */ +inline StringPiece skipWhitespace(StringPiece sp) { + return ltrimWhitespace(sp); +} /** * Fast, in-place lowercasing of ASCII alphabetic characters in strings. diff --git a/folly/StringBase.cpp b/folly/StringBase.cpp index 6c428d68..e51dea99 100644 --- a/folly/StringBase.cpp +++ b/folly/StringBase.cpp @@ -18,18 +18,19 @@ namespace folly { -StringPiece skipWhitespace(StringPiece sp) { +static inline bool is_oddspace(char c) { + return c == '\n' || c == '\t' || c == '\r'; +} + +StringPiece ltrimWhitespace(StringPiece sp) { // Spaces other than ' ' characters are less common but should be // checked. This configuration where we loop on the ' ' // separately from oddspaces was empirically fastest. - auto oddspace = [] (char c) { - return c == '\n' || c == '\t' || c == '\r'; - }; loop: for (; !sp.empty() && sp.front() == ' '; sp.pop_front()) { } - if (!sp.empty() && oddspace(sp.front())) { + if (!sp.empty() && is_oddspace(sp.front())) { sp.pop_front(); goto loop; } @@ -37,4 +38,20 @@ loop: return sp; } +StringPiece rtrimWhitespace(StringPiece sp) { + // Spaces other than ' ' characters are less common but should be + // checked. This configuration where we loop on the ' ' + // separately from oddspaces was empirically fastest. + +loop: + for (; !sp.empty() && sp.back() == ' '; sp.pop_back()) { + } + if (!sp.empty() && is_oddspace(sp.back())) { + sp.pop_back(); + goto loop; + } + + return sp; +} + } diff --git a/folly/test/StringTest.cpp b/folly/test/StringTest.cpp index 492534b6..bda025b7 100644 --- a/folly/test/StringTest.cpp +++ b/folly/test/StringTest.cpp @@ -1290,6 +1290,53 @@ BENCHMARK(joinInt, iters) { } } +TEST(String, whitespace) { + // trimWhitespace: + EXPECT_EQ("kavabanga", + trimWhitespace("kavabanga")); + EXPECT_EQ("kavabanga", + trimWhitespace("kavabanga \t \n ")); + EXPECT_EQ("kavabanga", + trimWhitespace(" \t \r \n \n kavabanga")); + EXPECT_EQ("kavabanga", + trimWhitespace("\t \r \n kavabanga \t \n ")); + EXPECT_EQ("kavabanga", + trimWhitespace(" \t \r \n \n kavabanga")); + EXPECT_EQ("kavabanga", + trimWhitespace("\t \r \n kavabanga \t \n ")); + EXPECT_EQ( + ltrimWhitespace(rtrimWhitespace("kavabanga")), + rtrimWhitespace(ltrimWhitespace("kavabanga"))); + EXPECT_EQ( + ltrimWhitespace(rtrimWhitespace("kavabanga \r\t\n")), + rtrimWhitespace(ltrimWhitespace("kavabanga \r\t\n"))); + EXPECT_EQ("", trimWhitespace("\t \r \n \t \n ")); + EXPECT_EQ("", trimWhitespace("")); + EXPECT_EQ("", trimWhitespace("\t")); + EXPECT_EQ("", trimWhitespace("\r")); + EXPECT_EQ("", trimWhitespace("\n")); + EXPECT_EQ("", trimWhitespace("\t ")); + EXPECT_EQ("", trimWhitespace("\r ")); + EXPECT_EQ("", trimWhitespace("\n ")); + EXPECT_EQ("", trimWhitespace(" \t")); + EXPECT_EQ("", trimWhitespace(" \r")); + EXPECT_EQ("", trimWhitespace(" \n")); + + // ltrimWhitespace: + EXPECT_EQ("kavabanga", ltrimWhitespace("\t kavabanga")); + EXPECT_EQ("kavabanga \r\n", ltrimWhitespace("\t kavabanga \r\n")); + EXPECT_EQ("", ltrimWhitespace("\r ")); + EXPECT_EQ("", ltrimWhitespace("\n ")); + EXPECT_EQ("", ltrimWhitespace("\r ")); + + // rtrimWhitespace: + EXPECT_EQ("\t kavabanga", rtrimWhitespace("\t kavabanga")); + EXPECT_EQ("\t kavabanga", rtrimWhitespace("\t kavabanga \r\n")); + EXPECT_EQ("", rtrimWhitespace("\r ")); + EXPECT_EQ("", rtrimWhitespace("\n ")); + EXPECT_EQ("", rtrimWhitespace("\r ")); +} + int main(int argc, char *argv[]) { testing::InitGoogleTest(&argc, argv); gflags::ParseCommandLineFlags(&argc, &argv, true); -- 2.34.1