From 634f30b5350a805027086a79b1641a6f15262242 Mon Sep 17 00:00:00 2001 From: Kittipat Virochsiri Date: Fri, 2 Aug 2013 13:25:35 -0700 Subject: [PATCH] Batch Summary: Convert stream of elements to stream of fixed-sized vectors. Test Plan: unit tests Reviewed By: tjackson@fb.com FB internal diff: D912290 --- folly/experimental/Gen-inl.h | 81 +++++++++++++++++++++++++++++ folly/experimental/Gen.h | 2 + folly/experimental/test/GenTest.cpp | 26 +++++++++ 3 files changed, 109 insertions(+) diff --git a/folly/experimental/Gen-inl.h b/folly/experimental/Gen-inl.h index 9a7d568f..cc8fd5c5 100644 --- a/folly/experimental/Gen-inl.h +++ b/folly/experimental/Gen-inl.h @@ -1153,6 +1153,83 @@ class Distinct : public Operator> { } }; +/** + * Batch - For producing fixed-size batches of each value from a source. + * + * This type is usually used through the 'batch' helper function: + * + * auto batchSums + * = seq(1, 10) + * | batch(3) + * | map([](const std::vector& batch) { + * return from(batch) | sum; + * }) + * | as(); + */ +class Batch : public Operator { + size_t batchSize_; + public: + explicit Batch(size_t batchSize) + : batchSize_(batchSize) { + if (batchSize_ == 0) { + throw std::invalid_argument("Batch size must be non-zero!"); + } + } + + template::type, + class VectorType = std::vector> + class Generator : + public GenImpl> { + Source source_; + size_t batchSize_; + public: + explicit Generator(Source source, size_t batchSize) + : source_(std::move(source)) + , batchSize_(batchSize) {} + + template + bool apply(Handler&& handler) const { + VectorType batch_; + batch_.reserve(batchSize_); + bool shouldContinue = source_.apply([&](Value value) -> bool { + batch_.push_back(std::forward(value)); + if (batch_.size() == batchSize_) { + bool needMore = handler(batch_); + batch_.clear(); + return needMore; + } + // Always need more if the handler is not called. + return true; + }); + // Flush everything, if and only if `handler` hasn't returned false. + if (shouldContinue && !batch_.empty()) { + shouldContinue = handler(batch_); + batch_.clear(); + } + return shouldContinue; + } + + static constexpr bool infinite = Source::infinite; + }; + + template> + Gen compose(GenImpl&& source) const { + return Gen(std::move(source.self()), batchSize_); + } + + template> + Gen compose(const GenImpl& source) const { + return Gen(source.self(), batchSize_); + } +}; + /** * Composed - For building up a pipeline of operations to perform, absent any * particular source generator. Useful for building up custom pipelines. @@ -1994,6 +2071,10 @@ inline detail::Skip skip(size_t count) { return detail::Skip(count); } +inline detail::Batch batch(size_t batchSize) { + return detail::Batch(batchSize); +} + }} //folly::gen #pragma GCC diagnostic pop diff --git a/folly/experimental/Gen.h b/folly/experimental/Gen.h index 31c1cedc..fa1ec1b2 100644 --- a/folly/experimental/Gen.h +++ b/folly/experimental/Gen.h @@ -310,6 +310,8 @@ class RangeConcat; class Cycle; +class Batch; + /* * Sinks */ diff --git a/folly/experimental/test/GenTest.cpp b/folly/experimental/test/GenTest.cpp index 9ab1aa57..a0726555 100644 --- a/folly/experimental/test/GenTest.cpp +++ b/folly/experimental/test/GenTest.cpp @@ -1283,6 +1283,32 @@ TEST(Gen, Guard) { runtime_error); } +TEST(Gen, Batch) { + EXPECT_EQ((vector> { {1} }), + seq(1, 1) | batch(5) | as()); + EXPECT_EQ((vector> { {1, 2, 3}, {4, 5, 6}, {7, 8, 9}, {10, 11} }), + seq(1, 11) | batch(3) | as()); + EXPECT_THROW(seq(1, 1) | batch(0) | as(), + std::invalid_argument); +} + +TEST(Gen, BatchMove) { + auto expected = vector>{ {0, 1}, {2, 3}, {4} }; + auto actual = + seq(0, 4) + | mapped([](int i) { return std::unique_ptr(new int(i)); }) + | batch(2) + | mapped([](std::vector>& pVector) { + std::vector iVector; + for (const auto& p : pVector) { + iVector.push_back(*p); + }; + return iVector; + }) + | as(); + EXPECT_EQ(expected, actual); +} + int main(int argc, char *argv[]) { testing::InitGoogleTest(&argc, argv); google::ParseCommandLineFlags(&argc, &argv, true); -- 2.34.1