From 9cc86052c80864ac55c9c1c3ec4af63b695a4705 Mon Sep 17 00:00:00 2001 From: Vojin Katic Date: Wed, 11 Jun 2014 13:53:16 -0700 Subject: [PATCH] folly::gen::splitByLine Summary: I made it work, but please send your feedback how to improve code quality. splitByLine will split on \r, \n, and \r\n. Test Plan: add new test, arc unit Reviewed By: tjackson@fb.com Subscribers: folly@lists, crawler-diffs@ FB internal diff: D1322212 --- folly/gen/String-inl.h | 45 +++++++++++++++++++++++++++++++---- folly/gen/String.h | 25 +++++++++++++++++-- folly/gen/test/StringTest.cpp | 25 ++++++++++++++++++- 3 files changed, 87 insertions(+), 8 deletions(-) diff --git a/folly/gen/String-inl.h b/folly/gen/String-inl.h index e29f2a57..651252f5 100644 --- a/folly/gen/String-inl.h +++ b/folly/gen/String-inl.h @@ -26,6 +26,39 @@ namespace folly { namespace gen { namespace detail { +inline bool splitPrefix(StringPiece& in, + StringPiece& prefix, + StringPiece delimiter) { + auto p = in.find(delimiter); + if (p != std::string::npos) { + prefix.assign(in.data(), in.data() + p); + in.advance(p + delimiter.size()); + return true; + } + prefix.clear(); + return false; +} + +/** + * Split by any of the EOL terms: \r, \n, or \r\n. + */ +inline bool splitPrefix(StringPiece& in, + StringPiece& prefix, + MixedNewlines) { + auto newline = "\r\n"; + auto p = in.find_first_of(newline); + if (p != std::string::npos) { + prefix.assign(in.data(), in.data() + p); + in.advance(p); + if (!in.removePrefix(newline)) { + in.advance(1); + } + return true; + } + prefix.clear(); + return false; +} + inline bool splitPrefix(StringPiece& in, StringPiece& prefix, char delimiter) { auto p = static_cast(memchr(in.data(), delimiter, in.size())); if (p) { @@ -128,14 +161,16 @@ class StringResplitter : public Operator { } }; -class SplitStringSource : public GenImpl { +template +class SplitStringSource + : public GenImpl> { StringPiece source_; - char delimiter_; + DelimiterType delimiter_; public: SplitStringSource(const StringPiece& source, - char delimiter) + DelimiterType delimiter) : source_(source) - , delimiter_(delimiter) { } + , delimiter_(std::move(delimiter)) { } template bool apply(Body&& body) const { @@ -166,7 +201,7 @@ template> { Delimiter delimiter_; public: - Unsplit(const Delimiter& delimiter) + explicit Unsplit(const Delimiter& delimiter) : delimiter_(delimiter) { } diff --git a/folly/gen/String.h b/folly/gen/String.h index 3f4497b1..56aa774f 100644 --- a/folly/gen/String.h +++ b/folly/gen/String.h @@ -1,5 +1,5 @@ /* - * Copyright 2013 Facebook, Inc. + * Copyright 2014 Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,6 +25,8 @@ namespace gen { namespace detail { class StringResplitter; + +template class SplitStringSource; template @@ -54,11 +56,30 @@ S resplit(char delimiter) { return S(delimiter); } -template +template > S split(const StringPiece& source, char delimiter) { return S(source, delimiter); } +template > +S split(StringPiece source, StringPiece delimiter) { + return S(source, delimiter); +} + +/** + * EOL terms ("\r", "\n", or "\r\n"). + */ +class MixedNewlines {}; + +/** + * Split by EOL ("\r", "\n", or "\r\n"). + * @see split(). + */ +template > +S lines(StringPiece source) { + return S(source, MixedNewlines{}); +} + /* * Joins a sequence of tokens into a string, with the chosen delimiter. * diff --git a/folly/gen/test/StringTest.cpp b/folly/gen/test/StringTest.cpp index 17fb9c09..5a5a1888 100644 --- a/folly/gen/test/StringTest.cpp +++ b/folly/gen/test/StringTest.cpp @@ -82,13 +82,36 @@ TEST(StringGen, Split) { } { - auto pieces = split("hello,, world, goodbye, meow", ',') + auto pieces = split("hello,, world, goodbye, meow", ",") | take(5) | collect; EXPECT_EQ(5, pieces.size()); EXPECT_EQ("hello", pieces[0]); EXPECT_EQ("", pieces[1]); EXPECT_EQ(" world", pieces[2]); } + + { + auto pieces = split("hello,, world, goodbye, meow", ", ") + | collect; + EXPECT_EQ(4, pieces.size()); + EXPECT_EQ("hello,", pieces[0]); + EXPECT_EQ("world", pieces[1]); + EXPECT_EQ("goodbye", pieces[2]); + EXPECT_EQ("meow", pieces[3]); + } +} + +TEST(StringGen, SplitByNewLine) { + auto collect = eachTo() | as(); + { + auto pieces = lines("hello\n\n world\r\n goodbye\r meow") | collect; + EXPECT_EQ(5, pieces.size()); + EXPECT_EQ("hello", pieces[0]); + EXPECT_EQ("", pieces[1]); + EXPECT_EQ(" world", pieces[2]); + EXPECT_EQ(" goodbye", pieces[3]); + EXPECT_EQ(" meow", pieces[4]); + } } TEST(StringGen, EmptyResplit) { -- 2.34.1