From: Song Zhou Date: Sat, 20 May 2017 14:53:57 +0000 (-0700) Subject: Added a new variant of byLine to keep the delimiter X-Git-Tag: v2017.05.22.00~1 X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=988893267f284be2133b302d55b9bb9b9ea3e930;p=folly.git Added a new variant of byLine to keep the delimiter Summary: new method byLineFull will not trim the delimiter so that consumers can check if final line is ended up with delimiter or not. Reviewed By: philippv, yfeldblum Differential Revision: D5085371 fbshipit-source-id: 5045127ee11d008e3cd7d13d33bffad280fe0a7e --- diff --git a/folly/gen/File-inl.h b/folly/gen/File-inl.h index 89d770d5..591ead34 100644 --- a/folly/gen/File-inl.h +++ b/folly/gen/File-inl.h @@ -120,6 +120,15 @@ class FileWriter : public Operator { std::unique_ptr buffer_; }; +inline auto byLineImpl(File file, char delim, bool keepDelimiter) + -> decltype(fromFile(std::move(file)) + | eachAs() + | resplit(delim, keepDelimiter)) { + return fromFile(std::move(file)) + | eachAs() + | resplit(delim, keepDelimiter); +} + } // !detail /** @@ -127,13 +136,24 @@ class FileWriter : public Operator { * Note: This produces StringPieces which reference temporary strings which are * only valid during iteration. */ +inline auto byLineFull(File file, char delim = '\n') + -> decltype(detail::byLineImpl(std::move(file), delim, true)) { + return detail::byLineImpl(std::move(file), delim, true); +} + +inline auto byLineFull(int fd, char delim = '\n') + -> decltype(byLineFull(File(fd), delim)) { + return byLineFull(File(fd), delim); +} + +inline auto byLineFull(const char* f, char delim = '\n') + -> decltype(byLineFull(File(f), delim)) { + return byLineFull(File(f), delim); +} + inline auto byLine(File file, char delim = '\n') - -> decltype(fromFile(std::move(file)) - | eachAs() - | resplit(delim)) { - return fromFile(std::move(file)) - | eachAs() - | resplit(delim); + -> decltype(detail::byLineImpl(std::move(file), delim, false)) { + return detail::byLineImpl(std::move(file), delim, false); } inline auto byLine(int fd, char delim = '\n') @@ -141,5 +161,4 @@ inline auto byLine(int fd, char delim = '\n') inline auto byLine(const char* f, char delim = '\n') -> decltype(byLine(File(f), delim)) { return byLine(File(f), delim); } - }} // !folly::gen diff --git a/folly/gen/String-inl.h b/folly/gen/String-inl.h index 4d6061bb..be9f1d54 100644 --- a/folly/gen/String-inl.h +++ b/folly/gen/String-inl.h @@ -213,16 +213,23 @@ namespace detail { class StringResplitter : public Operator { char delimiter_; + bool keepDelimiter_; + public: - explicit StringResplitter(char delimiter) : delimiter_(delimiter) { } + explicit StringResplitter(char delimiter, bool keepDelimiter = false) + : delimiter_(delimiter), keepDelimiter_(keepDelimiter) {} template class Generator : public GenImpl> { Source source_; char delimiter_; + bool keepDelimiter_; + public: - Generator(Source source, char delimiter) - : source_(std::move(source)), delimiter_(delimiter) { } + Generator(Source source, char delimiter, bool keepDelimiter) + : source_(std::move(source)), + delimiter_(delimiter), + keepDelimiter_(keepDelimiter) {} template bool apply(Body&& body) const { @@ -236,7 +243,9 @@ class StringResplitter : public Operator { if (s.back() != this->delimiter_) { return body(s); } - s.pop_back(); // Remove the 1-character delimiter + if (!keepDelimiter_) { + s.pop_back(); // Remove the 1-character delimiter + } return body(s); }); if (!source_.apply(splitter)) { @@ -252,14 +261,14 @@ class StringResplitter : public Operator { class Value, class Gen = Generator> Gen compose(GenImpl&& source) const { - return Gen(std::move(source.self()), delimiter_); + return Gen(std::move(source.self()), delimiter_, keepDelimiter_); } template> Gen compose(const GenImpl& source) const { - return Gen(source.self(), delimiter_); + return Gen(source.self(), delimiter_, keepDelimiter_); } }; diff --git a/folly/gen/String.h b/folly/gen/String.h index 47794feb..fde954a0 100644 --- a/folly/gen/String.h +++ b/folly/gen/String.h @@ -54,9 +54,9 @@ class SplitTo; */ // make this a template so we don't require StringResplitter to be complete // until use -template -S resplit(char delimiter) { - return S(delimiter); +template +S resplit(char delimiter, bool keepDelimiter = false) { + return S(delimiter, keepDelimiter); } template > diff --git a/folly/gen/test/FileTest.cpp b/folly/gen/test/FileTest.cpp index cc5bfa19..5e9edbbc 100644 --- a/folly/gen/test/FileTest.cpp +++ b/folly/gen/test/FileTest.cpp @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -56,7 +57,34 @@ TEST(FileGen, ByLine) { } } -class FileGenBufferedTest : public ::testing::TestWithParam { }; +TEST(FileGen, ByLineFull) { + auto cases = std::vector { + stripLeftMargin(R"( + Hello world + This is the second line + + + a few empty lines above + incomplete last line)"), + + "complete last line\n", + + "\n", + + ""}; + + for (auto& lines : cases) { + test::TemporaryFile file("ByLineFull"); + EXPECT_EQ(lines.size(), write(file.fd(), lines.data(), lines.size())); + + auto found = + byLineFull(file.path().string().c_str()) | unsplit(""); + + EXPECT_EQ(lines, found); + } +} + +class FileGenBufferedTest : public ::testing::TestWithParam {}; TEST_P(FileGenBufferedTest, FileWriter) { size_t bufferSize = GetParam(); diff --git a/folly/gen/test/StringTest.cpp b/folly/gen/test/StringTest.cpp index d4e14192..42b166e9 100644 --- a/folly/gen/test/StringTest.cpp +++ b/folly/gen/test/StringTest.cpp @@ -260,6 +260,30 @@ TEST(StringGen, Resplit) { } } +TEST(StringGen, ResplitKeepDelimiter) { + auto collect = eachTo() | as(); + { + auto pieces = + from({"hello,, world, goodbye, meow"}) | resplit(',', true) | collect; + ASSERT_EQ(5, pieces.size()); + EXPECT_EQ("hello,", pieces[0]); + EXPECT_EQ(",", pieces[1]); + EXPECT_EQ(" world,", pieces[2]); + EXPECT_EQ(" goodbye,", pieces[3]); + EXPECT_EQ(" meow", pieces[4]); + } + { + auto pieces = from({"hel", "lo,", ", world", ", goodbye, m", "eow"}) | + resplit(',', true) | collect; + ASSERT_EQ(5, pieces.size()); + EXPECT_EQ("hello,", pieces[0]); + EXPECT_EQ(",", pieces[1]); + EXPECT_EQ(" world,", pieces[2]); + EXPECT_EQ(" goodbye,", pieces[3]); + EXPECT_EQ(" meow", pieces[4]); + } +} + void checkResplitMaxLength(vector ins, char delim, uint64_t maxLength,