From: Tudor Bosman Date: Tue, 13 Nov 2012 03:46:41 +0000 (-0800) Subject: add split operator X-Git-Tag: v0.22.0~1144 X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=ed4482475c02ded524019c347d271032d51553d4;p=folly.git add split operator Summary: Also added eachAs (cast each value as a given type) and eachTo (use folly::to on each value) Test Plan: test added Reviewed By: tjackson@fb.com FB internal diff: D628868 --- diff --git a/folly/experimental/File-inl.h b/folly/experimental/File-inl.h new file mode 100644 index 00000000..02240d6a --- /dev/null +++ b/folly/experimental/File-inl.h @@ -0,0 +1,56 @@ +/* + * Copyright 2012 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FOLLY_FILE_H_ +#error This file may only be included from folly/experimental/File.h +#endif + +#include + +namespace folly { + +inline File::File(int fd, bool ownsFd) : fd_(fd), ownsFd_(ownsFd) { } + +inline File::~File() { + closeNoThrow(); // ignore error +} + +inline void File::release() { + fd_ = -1; + ownsFd_ = false; +} + +inline void File::swap(File& other) { + using std::swap; + swap(fd_, other.fd_); + swap(ownsFd_, other.ownsFd_); +} + +inline File::File(File&& other) : fd_(other.fd_), ownsFd_(other.ownsFd_) { + other.release(); +} + +inline File& File::operator=(File&& other) { + File(std::move(other)).swap(*this); + return *this; +} + +inline void swap(File& a, File& b) { + a.swap(b); +} + +} // namespace folly + diff --git a/folly/experimental/File.cpp b/folly/experimental/File.cpp new file mode 100644 index 00000000..119ae924 --- /dev/null +++ b/folly/experimental/File.cpp @@ -0,0 +1,47 @@ +/* + * Copyright 2012 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "folly/experimental/File.h" + +#include + +#include + +namespace folly { + +File::File(const char* name, int flags, mode_t mode) + : fd_(::open(name, flags, mode)), ownsFd_(false) { + if (fd_ == -1) { + throw std::system_error(errno, std::system_category(), "open() failed"); + } + ownsFd_ = true; +} + +void File::close() { + if (!closeNoThrow()) { + throw std::system_error(errno, std::system_category(), "close() failed"); + } +} + +bool File::closeNoThrow() { + DCHECK(fd_ != -1); + int r = ownsFd_ ? ::close(fd_) : 0; + release(); + return r == 0; +} + +} // namespace folly + diff --git a/folly/experimental/File.h b/folly/experimental/File.h new file mode 100644 index 00000000..4fd69d6c --- /dev/null +++ b/folly/experimental/File.h @@ -0,0 +1,90 @@ +/* + * Copyright 2012 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FOLLY_FILE_H_ +#define FOLLY_FILE_H_ + +#include +#include +#include + +namespace folly { + +/** + * A File represents an open file. + */ +class File { + public: + /** + * Create a File object from an existing file descriptor. + * Takes ownership of the file descriptor if ownsFd is true. + */ + /* implicit */ File(int fd, bool ownsFd=false); + + /** + * Open and create a file object. Throws on error. + */ + /* implicit */ File(const char* name, int flags=O_RDONLY, mode_t mode=0644); + + ~File(); + + /** + * Return the file descriptor, or -1 if the file was closed. + */ + int fd() const { return fd_; } + + /** + * If we own the file descriptor, close the file and throw on error. + * Otherwise, do nothing. + */ + void close(); + + /** + * Closes the file (if owned). Returns true on success, false (and sets + * errno) on error. + */ + bool closeNoThrow(); + + /** + * Releases the file descriptor; no longer owned by this File. + */ + void release(); + + /** + * Swap this File with another. + */ + void swap(File& other); + + // movable + File(File&&); + File& operator=(File&&); + private: + // not copyable + File(const File&) = delete; + File& operator=(const File&) = delete; + + int fd_; + bool ownsFd_; +}; + +void swap(File& a, File& b); + +} // namespace folly + +#include "folly/experimental/File-inl.h" + +#endif /* FOLLY_FILE_H_ */ + diff --git a/folly/experimental/FileGen-inl.h b/folly/experimental/FileGen-inl.h new file mode 100644 index 00000000..429cdd7b --- /dev/null +++ b/folly/experimental/FileGen-inl.h @@ -0,0 +1,126 @@ +/* + * Copyright 2012 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FOLLY_FILEGEN_H_ +#error This file may only be included from folly/experimental/FileGen.h +#endif + +#include + +#include "folly/experimental/StringGen.h" + +namespace folly { +namespace gen { +namespace detail { + +class FileReader : public GenImpl { + public: + FileReader(File file, std::unique_ptr buffer) + : file_(std::move(file)), + buffer_(std::move(buffer)) { + buffer_->clear(); + } + + template + bool apply(Body&& body) const { + for (;;) { + ssize_t n = ::read(file_.fd(), buffer_->writableTail(), + buffer_->capacity()); + if (n == -1) { + throw std::system_error(errno, std::system_category(), "read failed"); + } + if (n == 0) { + return true; + } + if (!body(ByteRange(buffer_->tail(), n))) { + return false; + } + } + } + private: + File file_; + std::unique_ptr buffer_; +}; + +class FileWriter : public Operator { + public: + FileWriter(File file, std::unique_ptr buffer) + : file_(std::move(file)), + buffer_(std::move(buffer)) { + if (buffer_) { + buffer_->clear(); + } + } + + template + void compose(const GenImpl& source) const { + auto fn = [&](ByteRange v) { + if (!this->buffer_ || v.size() >= this->buffer_->capacity()) { + this->flushBuffer(); + this->write(v); + } else { + if (v.size() > this->buffer_->tailroom()) { + this->flushBuffer(); + } + memcpy(this->buffer_->writableTail(), v.data(), v.size()); + this->buffer_->append(v.size()); + } + }; + + // Iterate + source.foreach(std::move(fn)); + + flushBuffer(); + file_.close(); + } + + private: + void write(ByteRange v) const { + ssize_t n; + while (!v.empty()) { + n = ::write(file_.fd(), v.data(), v.size()); + if (n == -1) { + if (errno == EINTR) { + continue; + } + throw std::system_error(errno, std::system_category(), + "write() failed"); + } + v.advance(n); + } + } + + void flushBuffer() const { + if (buffer_ && buffer_->length() != 0) { + write(ByteRange(buffer_->data(), buffer_->length())); + buffer_->clear(); + } + } + + mutable File file_; + std::unique_ptr buffer_; +}; + +} // namespace detail + +auto byLine(File file, char delim='\n') -> +decltype(fromFile(std::move(file)) | eachAs() | resplit(delim)) { + return fromFile(std::move(file)) | eachAs() | resplit(delim); +} + +} // namespace gen +} // namespace folly + diff --git a/folly/experimental/FileGen.h b/folly/experimental/FileGen.h new file mode 100644 index 00000000..ab5af127 --- /dev/null +++ b/folly/experimental/FileGen.h @@ -0,0 +1,74 @@ +/* + * Copyright 2012 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FOLLY_FILEGEN_H_ +#define FOLLY_FILEGEN_H_ + +#include "folly/experimental/File.h" +#include "folly/experimental/Gen.h" +#include "folly/experimental/io/IOBuf.h" + +namespace folly { +namespace gen { + +namespace detail { +class FileReader; +class FileWriter; +} // namespace detail + +/** + * Generator that reads from a file with a buffer of the given size. + * Reads must be buffered (the generator interface expects the generator + * to hold each value). + */ +template +S fromFile(File file, size_t bufferSize=4096) { + return S(std::move(file), IOBuf::create(bufferSize)); +} + +/** + * Generator that reads from a file using a given buffer. + */ +template +S fromFile(File file, std::unique_ptr buffer) { + return S(std::move(file), std::move(buffer)); +} + +/** + * Sink that writes to a file with a buffer of the given size. + * If bufferSize is 0, writes will be unbuffered. + */ +template +S toFile(File file, size_t bufferSize=4096) { + return S(std::move(file), bufferSize ? nullptr : IOBuf::create(bufferSize)); +} + +/** + * Sink that writes to a file using a given buffer. + * If the buffer is nullptr, writes will be unbuffered. + */ +template +S toFile(File file, std::unique_ptr buffer) { + return S(std::move(file), std::move(buffer)); +} + +} // namespace gen +} // namespace folly + +#include "folly/experimental/FileGen-inl.h" + +#endif /* FOLLY_FILEGEN_H_ */ + diff --git a/folly/experimental/Gen-inl.h b/folly/experimental/Gen-inl.h index 5079ae22..95f0b76f 100644 --- a/folly/experimental/Gen-inl.h +++ b/folly/experimental/Gen-inl.h @@ -261,7 +261,7 @@ template auto operator|(const GenImpl& gen, const Operator& op) -> -decltype(op.self().compose(gen)) { +decltype(op.self().compose(gen.self())) { return op.self().compose(gen.self()); } @@ -291,7 +291,7 @@ template class ReferencedSource : public GenImpl> { - Container* const container_; + Container* container_; public: explicit ReferencedSource(Container* container) : container_(container) {} @@ -344,7 +344,7 @@ class CopiedSource : static_assert( !std::is_reference::value, "Can't copy into a reference"); - const std::shared_ptr copy_; + std::shared_ptr copy_; public: typedef Container ContainerType; @@ -451,8 +451,8 @@ public: template class Chain : public GenImpl> { - const First first_; - const Second second_; + First first_; + Second second_; public: explicit Chain(First first, Second second) : first_(std::move(first)) @@ -477,7 +477,7 @@ public: **/ template class Yield : public GenImpl> { - const Source source_; + Source source_; public: explicit Yield(Source source) : source_(std::move(source)) { @@ -520,7 +520,7 @@ class Yield : public GenImpl> { */ template class Map : public Operator> { - const Predicate predicate_; + Predicate predicate_; public: explicit Map(const Predicate& predicate = Predicate()) : predicate_(predicate) @@ -533,8 +533,8 @@ class Map : public Operator> { >::type> class Generator : public GenImpl> { - const Source source_; - const Predicate pred_; + Source source_; + Predicate pred_; public: explicit Generator(Source source, const Predicate& pred) : source_(std::move(source)), pred_(pred) {} @@ -582,7 +582,7 @@ class Map : public Operator> { */ template class Filter : public Operator> { - const Predicate predicate_; + Predicate predicate_; public: explicit Filter(const Predicate& predicate) : predicate_(predicate) @@ -591,8 +591,8 @@ class Filter : public Operator> { template class Generator : public GenImpl> { - const Source source_; - const Predicate pred_; + Source source_; + Predicate pred_; public: explicit Generator(Source source, const Predicate& pred) : source_(std::move(source)), pred_(pred) {} @@ -643,7 +643,7 @@ class Filter : public Operator> { */ template class Until : public Operator> { - const Predicate predicate_; + Predicate predicate_; public: explicit Until(const Predicate& predicate) : predicate_(predicate) @@ -654,8 +654,8 @@ class Until : public Operator> { class Result = typename std::result_of::type> class Generator : public GenImpl> { - const Source source_; - const Predicate pred_; + Source source_; + Predicate pred_; public: explicit Generator(Source source, const Predicate& pred) : source_(std::move(source)), pred_(pred) {} @@ -694,7 +694,7 @@ class Until : public Operator> { * | take(10); */ class Take : public Operator { - const size_t count_; + size_t count_; public: explicit Take(size_t count) : count_(count) {} @@ -703,8 +703,8 @@ public: class Source> class Generator : public GenImpl> { - const Source source_; - const size_t count_; + Source source_; + size_t count_; public: explicit Generator(Source source, size_t count) : source_(std::move(source)) , count_(count) {} @@ -747,7 +747,7 @@ public: * | take(10); */ class Skip : public Operator { - const size_t count_; + size_t count_; public: explicit Skip(size_t count) : count_(count) {} @@ -756,8 +756,8 @@ public: class Source> class Generator : public GenImpl> { - const Source source_; - const size_t count_; + Source source_; + size_t count_; public: explicit Generator(Source source, size_t count) : source_(std::move(source)) , count_(count) {} @@ -824,8 +824,8 @@ public: */ template class Order : public Operator> { - const Selector selector_; - const Comparer comparer_; + Selector selector_; + Comparer comparer_; public: Order(const Selector& selector = Selector(), const Comparer& comparer = Comparer()) @@ -838,9 +838,9 @@ class Order : public Operator> { class Generator : public GenImpl> { - const Source source_; - const Selector selector_; - const Comparer comparer_; + Source source_; + Selector selector_; + Comparer comparer_; typedef std::vector VectorType; @@ -923,8 +923,8 @@ class Order : public Operator> { template class Composed : public Operator> { - const First first_; - const Second second_; + First first_; + Second second_; public: Composed() {} Composed(First first, Second second) @@ -971,8 +971,8 @@ class Composed : public Operator> { template class FoldLeft : public Operator> { - const Seed seed_; - const Fold fold_; + Seed seed_; + Fold fold_; public: FoldLeft(const Seed& seed, const Fold& fold) : seed_(seed) @@ -1052,7 +1052,7 @@ class Any : public Operator { */ template class Reduce : public Operator> { - const Reducer reducer_; + Reducer reducer_; public: Reduce(const Reducer& reducer) : reducer_(reducer) @@ -1175,7 +1175,7 @@ class Min : public Operator> { */ template class Append : public Operator> { - Collection* const collection_; + Collection* collection_; public: explicit Append(Collection* collection) : collection_(collection) @@ -1271,7 +1271,7 @@ public: class InnerValue = typename std::decay::type::ValueType> class Generator : public GenImpl> { - const Source source_; + Source source_; public: explicit Generator(Source source) : source_(std::move(source)) {} @@ -1325,7 +1325,7 @@ public: class InnerValue = typename ValueTypeOfRange::RefType> class Generator : public GenImpl> { - const Source source_; + Source source_; public: explicit Generator(Source source) : source_(std::move(source)) {} @@ -1393,7 +1393,7 @@ class VirtualGen : public GenImpl> { template class WrapperImpl : public WrapperBase { - const Wrapped wrapped_; + Wrapped wrapped_; public: explicit WrapperImpl(Wrapped wrapped) : wrapped_(std::move(wrapped)) { diff --git a/folly/experimental/Gen.h b/folly/experimental/Gen.h index 9126d59b..457c6e5c 100644 --- a/folly/experimental/Gen.h +++ b/folly/experimental/Gen.h @@ -24,6 +24,7 @@ #include "folly/Range.h" #include "folly/Optional.h" +#include "folly/Conv.h" /** * Generator-based Sequence Comprehensions in C++, akin to C#'s LINQ @@ -135,6 +136,24 @@ public: } }; +template +class Cast { + public: + template + Dest operator()(Value&& value) const { + return Dest(std::forward(value)); + } +}; + +template +class To { + public: + template + Dest operator()(Value&& value) const { + return ::folly::to(std::forward(value)); + } +}; + namespace detail { template @@ -343,6 +362,20 @@ Get get() { return Get(); } +// construct Dest from each value +template >> +Cast eachAs() { + return Cast(); +} + +// call folly::to on each value +template >> +To eachTo() { + return To(); +} + /* * Sink Factories */ diff --git a/folly/experimental/StringGen-inl.h b/folly/experimental/StringGen-inl.h new file mode 100644 index 00000000..3330e8b5 --- /dev/null +++ b/folly/experimental/StringGen-inl.h @@ -0,0 +1,130 @@ +/* + * Copyright 2012 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FOLLY_STRINGGEN_H_ +#error This file may only be included from folly/experimental/StringGen.h +#endif + +#include "folly/experimental/io/IOBuf.h" + +namespace folly { +namespace gen { +namespace detail { + +bool splitPrefix(StringPiece& in, StringPiece& prefix, char delimiter) { + auto p = static_cast(memchr(in.data(), delimiter, in.size())); + if (p) { + prefix.assign(in.data(), p); + in.assign(p + 1, in.end()); + return true; + } + prefix.clear(); + return false; +} + +inline const char* ch(const unsigned char* p) { + return reinterpret_cast(p); +} + +class StringResplitter : public Operator { + char delimiter_; + public: + explicit StringResplitter(char delimiter) : delimiter_(delimiter) { } + + template + class Generator : public GenImpl> { + Source source_; + char delimiter_; + static constexpr size_t kDefaultLineSize = 256; + public: + Generator(Source source, char delimiter) + : source_(std::move(source)), delimiter_(delimiter) { } + + template + bool apply(Body&& body) const { + std::unique_ptr buffer; + + auto fn = [&](StringPiece in) -> bool { + StringPiece prefix; + bool found = splitPrefix(in, prefix, this->delimiter_); + if (found && buffer && buffer->length() != 0) { + // Append to end of buffer, return line + if (!prefix.empty()) { + buffer->reserve(0, prefix.size()); + memcpy(buffer->writableTail(), prefix.data(), prefix.size()); + buffer->append(prefix.size()); + } + if (!body(StringPiece(ch(buffer->data()), buffer->length()))) { + return false; + } + buffer->clear(); + found = splitPrefix(in, prefix, this->delimiter_); + } + // Buffer is empty, return lines directly from input (no buffer) + while (found) { + if (!body(prefix)) { + return false; + } + found = splitPrefix(in, prefix, this->delimiter_); + } + if (!in.empty()) { + // Incomplete line left, append to buffer + if (!buffer) { + // Arbitrarily assume that we have half a line and get enough + // room for twice that. + buffer = IOBuf::create(std::max(kDefaultLineSize, 2 * in.size())); + } + buffer->reserve(0, in.size()); + memcpy(buffer->writableTail(), in.data(), in.size()); + buffer->append(in.size()); + } + return true; + }; + + // Iterate + if (!source_.apply(std::move(fn))) { + return false; + } + + // Incomplete last line + if (buffer && buffer->length() != 0) { + if (!body(StringPiece(ch(buffer->data()), buffer->length()))) { + return false; + } + } + return true; + } + }; + + template> + Gen compose(GenImpl&& source) const { + return Gen(std::move(source.self()), delimiter_); + } + + template> + Gen compose(const GenImpl& source) const { + return Gen(source.self(), delimiter_); + } +}; + +} // namespace detail +} // namespace gen +} // namespace folly + diff --git a/folly/experimental/StringGen.h b/folly/experimental/StringGen.h new file mode 100644 index 00000000..8eba7646 --- /dev/null +++ b/folly/experimental/StringGen.h @@ -0,0 +1,49 @@ +/* + * Copyright 2012 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FOLLY_STRINGGEN_H_ +#define FOLLY_STRINGGEN_H_ + +#include "folly/Range.h" + +namespace folly { +namespace gen { + +namespace detail { +class StringResplitter; +} // namespace detail + +/** + * Split the output from a generator into StringPiece "lines" delimited by + * the given delimiter. Delimters are NOT included in the output. + * + * resplit() behaves as if the input strings were concatenated into one long + * string and then split. + */ +// make this a template so we don't require StringResplitter to be complete +// until use +template +S resplit(char delimiter) { + return S(delimiter); +} + +} // namespace gen +} // namespace folly + +#include "folly/experimental/StringGen-inl.h" + +#endif /* FOLLY_STRINGGEN_H_ */ + diff --git a/folly/experimental/test/GenBenchmark.cpp b/folly/experimental/test/GenBenchmark.cpp index 18435c26..70db7be7 100644 --- a/folly/experimental/test/GenBenchmark.cpp +++ b/folly/experimental/test/GenBenchmark.cpp @@ -15,9 +15,13 @@ */ #include "folly/experimental/Gen.h" +#include "folly/experimental/StringGen.h" +#include "folly/experimental/FileGen.h" -#include #include +#include + +#include #include "folly/Benchmark.h" @@ -267,6 +271,86 @@ BENCHMARK_RELATIVE(Composed_GenRegular, iters) { folly::doNotOptimizeAway(s); } +BENCHMARK_DRAW_LINE() + +namespace { + +const char* const kLine = "The quick brown fox jumped over the lazy dog.\n"; +const size_t kLineCount = 10000; +std::string bigLines; +const size_t kSmallLineSize = 17; +std::vector smallLines; + +void initStringResplitterBenchmark() { + bigLines.reserve(kLineCount * strlen(kLine)); + for (size_t i = 0; i < kLineCount; ++i) { + bigLines += kLine; + } + size_t remaining = bigLines.size(); + size_t pos = 0; + while (remaining) { + size_t n = std::min(kSmallLineSize, remaining); + smallLines.push_back(bigLines.substr(pos, n)); + pos += n; + remaining -= n; + } +} + +size_t len(folly::StringPiece s) { return s.size(); } + +} // namespace + +BENCHMARK(StringResplitter_Big, iters) { + size_t s = 0; + while (iters--) { + s += from({bigLines}) | resplit('\n') | map(&len) | sum; + } + folly::doNotOptimizeAway(s); +} + +BENCHMARK_RELATIVE(StringResplitter_Small, iters) { + size_t s = 0; + while (iters--) { + s += from(smallLines) | resplit('\n') | map(&len) | sum; + } + folly::doNotOptimizeAway(s); +} + +BENCHMARK_DRAW_LINE() + +BENCHMARK(ByLine_Pipes, iters) { + std::thread thread; + int rfd; + int wfd; + BENCHMARK_SUSPEND { + int p[2]; + CHECK_ERR(::pipe(p)); + rfd = p[0]; + wfd = p[1]; + thread = std::thread([wfd, iters] { + char x = 'x'; + PCHECK(::write(wfd, &x, 1) == 1); // signal startup + FILE* f = fdopen(wfd, "w"); + PCHECK(f); + for (int i = 1; i <= iters; ++i) { + fprintf(f, "%d\n", i); + } + fclose(f); + }); + char buf; + PCHECK(::read(rfd, &buf, 1) == 1); // wait for startup + } + + auto s = byLine(rfd) | eachTo() | sum; + folly::doNotOptimizeAway(s); + + BENCHMARK_SUSPEND { + ::close(rfd); + CHECK_EQ(s, int64_t(iters) * (iters + 1) / 2); + thread.join(); + } +} + // Results from a dual core Xeon L5520 @ 2.27GHz: // // ============================================================================ @@ -300,6 +384,7 @@ BENCHMARK_RELATIVE(Composed_GenRegular, iters) { int main(int argc, char *argv[]) { google::ParseCommandLineFlags(&argc, &argv, true); + initStringResplitterBenchmark(); runBenchmarks(); return 0; } diff --git a/folly/experimental/test/GenTest.cpp b/folly/experimental/test/GenTest.cpp index 06499877..c3627831 100644 --- a/folly/experimental/test/GenTest.cpp +++ b/folly/experimental/test/GenTest.cpp @@ -20,7 +20,11 @@ #include #include #include "folly/experimental/Gen.h" +#include "folly/experimental/StringGen.h" +#include "folly/experimental/FileGen.h" +#include "folly/experimental/TestUtil.h" #include "folly/FBVector.h" +#include "folly/Format.h" #include "folly/dynamic.h" using namespace folly::gen; @@ -30,6 +34,7 @@ using std::pair; using std::set; using std::unique_ptr; using std::vector; +using std::string; using std::tuple; using std::make_tuple; //using std::unordered_map; @@ -480,6 +485,34 @@ TEST(Gen, NoNeedlessCopies) { EXPECT_EQ(6, gen | take(3) | sum); } +namespace { +class TestIntSeq : public GenImpl { + public: + TestIntSeq() { } + + template + bool apply(Body&& body) const { + for (int i = 1; i < 6; ++i) { + if (!body(i)) { + return false; + } + } + return true; + } + + TestIntSeq(TestIntSeq&&) = default; + TestIntSeq& operator=(TestIntSeq&&) = default; + TestIntSeq(const TestIntSeq&) = delete; + TestIntSeq& operator=(const TestIntSeq&) = delete; +}; +} // namespace + +TEST(Gen, NoGeneratorCopies) { + EXPECT_EQ(15, TestIntSeq() | sum); + auto x = TestIntSeq() | take(3); + EXPECT_EQ(6, std::move(x) | sum); +} + TEST(Gen, FromArray) { int source[] = {2, 3, 5, 7}; auto gen = from(source); @@ -562,6 +595,98 @@ TEST(Gen, Dynamic) { EXPECT_EQ(dynamic(5), from(array3) | rconcat | rconcat | sum); } +TEST(StringGen, EmptySplit) { + auto collect = eachTo() | as(); + { + auto pieces = from({""}) | resplit(',') | collect; + EXPECT_EQ(0, pieces.size()); + } + + // The last delimiter is eaten, just like std::getline + { + auto pieces = from({","}) | resplit(',') | collect; + EXPECT_EQ(1, pieces.size()); + EXPECT_EQ("", pieces[0]); + } + + { + auto pieces = from({",,"}) | resplit(',') | collect; + EXPECT_EQ(2, pieces.size()); + EXPECT_EQ("", pieces[0]); + EXPECT_EQ("", pieces[1]); + } +} + +TEST(StringGen, Split) { + auto collect = eachTo() | as(); + { + auto pieces = from({"hello,, world, goodbye, meow"}) | + resplit(',') | collect; + EXPECT_EQ(5, pieces.size()); + EXPECT_EQ("hello", pieces[0]); + EXPECT_EQ("", pieces[1]); + EXPECT_EQ(" world", pieces[2]); + EXPECT_EQ(" goodbye", pieces[3]); + EXPECT_EQ(" meow", pieces[4]); + } + { + auto pieces = from({"hel", "lo,", ", world", ", goodbye, m", "eow"}) | + resplit(',') | collect; + EXPECT_EQ(5, pieces.size()); + EXPECT_EQ("hello", pieces[0]); + EXPECT_EQ("", pieces[1]); + EXPECT_EQ(" world", pieces[2]); + EXPECT_EQ(" goodbye", pieces[3]); + EXPECT_EQ(" meow", pieces[4]); + } +} + +TEST(FileGen, ByLine) { + auto collect = eachTo() | as(); + test::TemporaryFile file("ByLine"); + static const std::string lines( + "Hello world\n" + "This is the second line\n" + "\n" + "\n" + "a few empty lines above\n" + "incomplete last line"); + EXPECT_EQ(lines.size(), write(file.fd(), lines.data(), lines.size())); + + auto expected = from({lines}) | resplit('\n') | collect; + auto found = byLine(file.path().c_str()) | collect; + + EXPECT_TRUE(expected == found); +} + +class FileGenBufferedTest : public ::testing::TestWithParam { }; + +TEST_P(FileGenBufferedTest, FileWriter) { + size_t bufferSize = GetParam(); + test::TemporaryFile file("FileWriter"); + + static const std::string lines( + "Hello world\n" + "This is the second line\n" + "\n" + "\n" + "a few empty lines above\n"); + + auto src = from({lines, lines, lines, lines, lines, lines, lines, lines}); + auto collect = eachTo() | as(); + auto expected = src | resplit('\n') | collect; + + src | eachAs() | toFile(file.fd(), bufferSize); + auto found = byLine(file.path().c_str()) | collect; + + EXPECT_TRUE(expected == found); +} + +INSTANTIATE_TEST_CASE_P( + DifferentBufferSizes, + FileGenBufferedTest, + ::testing::Values(0, 1, 2, 4, 8, 64, 4096)); + int main(int argc, char *argv[]) { testing::InitGoogleTest(&argc, argv); google::ParseCommandLineFlags(&argc, &argv, true);