From: Tom Jackson Date: Thu, 25 Jun 2015 23:39:22 +0000 (-0700) Subject: GroupBy X-Git-Tag: v0.48.0~9 X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=df324b0aa2b0ecd637dae9acdb15466ec58dee68;p=folly.git GroupBy Reviewed By: @ot Differential Revision: D2188740 --- diff --git a/folly/gen/Base-inl.h b/folly/gen/Base-inl.h index 43dde36d..ade7b5d8 100644 --- a/folly/gen/Base-inl.h +++ b/folly/gen/Base-inl.h @@ -37,6 +37,56 @@ struct ArgumentReference T&& // int -> int&& >::type> {}; +template +class Group : public GenImpl> { + public: + static_assert(!std::is_reference::value && + !std::is_reference::value, + "Key and Value must be decayed types"); + + typedef std::vector VectorType; + typedef Key KeyType; + typedef Value ValueType; + + Group(Key key, VectorType values) + : key_(std::move(key)), values_(std::move(values)) {} + + const Key& key() const { return key_; } + + size_t size() const { return values_.size(); } + const VectorType& values() const { return values_; } + VectorType& values() { return values_; } + + VectorType operator|(const detail::Collect&) const { + return values(); + } + + VectorType operator|(const detail::CollectTemplate&) const { + return values(); + } + + template + void foreach(Body&& body) const { + for (auto& value : values_) { + body(std::move(value)); + } + } + + template + bool apply(Handler&& handler) const { + for (auto& value : values_) { + if (!handler(std::move(value))) { + return false; + } + } + return true; + } + + private: + Key key_; + mutable VectorType values_; +}; + namespace detail { /* @@ -941,6 +991,79 @@ class Order : public Operator> { } }; +/** + * GroupBy - Group values by a given key selector, producing a sequence of + * groups. + * + * This type is usually used through the 'groupBy' helper function, like: + * + * auto bests + * = from(places) + * | groupBy([](const Place& p) { + * return p.city; + * }) + * | [](Group&& g) { + * cout << g.key() << ": " << (g | first).description; + * }; + */ +template +class GroupBy : public Operator> { + Selector selector_; + + public: + GroupBy() {} + + explicit GroupBy(Selector selector) : selector_(std::move(selector)) {} + + template ::type, + class Key = typename std::result_of::type, + class KeyDecayed = typename std::decay::type> + class Generator + : public GenImpl< + Group&&, + Generator> { + static_assert(!Source::infinite, "Cannot group infinite source!"); + Source source_; + Selector selector_; + + public: + Generator(Source source, Selector selector) + : source_(std::move(source)), selector_(std::move(selector)) {} + + typedef Group GroupType; + + template + bool apply(Handler&& handler) const { + std::unordered_map groups; + source_ | [&](Value value) { + const Value& cv = value; + auto& group = groups[selector_(cv)]; + group.push_back(std::forward(value)); + }; + for (auto& kg : groups) { + GroupType group(kg.first, std::move(kg.second)); + if (!handler(std::move(group))) { + return false; + } + kg.second.clear(); + } + return true; + } + }; + + template > + Gen compose(GenImpl&& source) const { + return Gen(std::move(source.self()), selector_); + } + + template > + Gen compose(const GenImpl& source) const { + return Gen(source.self(), selector_); + } +}; + /* * TypeAssertion - For verifying the exact type of the value produced by a * generator. Useful for testing and debugging, and acts as a no-op at runtime. @@ -1897,7 +2020,7 @@ class Indirect : public Operator { explicit Generator(Source source) : source_(std::move(source)) {} template - void foreach (Body&& body) const { + void foreach(Body&& body) const { source_.foreach([&](Value value) { return body(&value); }); diff --git a/folly/gen/Base.h b/folly/gen/Base.h index 093ee007..8a52eb29 100644 --- a/folly/gen/Base.h +++ b/folly/gen/Base.h @@ -17,18 +17,19 @@ #ifndef FOLLY_GEN_BASE_H #define FOLLY_GEN_BASE_H +#include #include #include +#include #include +#include +#include #include -#include -#include #include -#include -#include -#include #include +#include +#include #include /** @@ -241,6 +242,9 @@ class To { } }; +template +class Group; + namespace detail { template @@ -326,6 +330,9 @@ class Skip; template class Order; +template +class GroupBy; + template class Distinct; @@ -640,6 +647,12 @@ Order orderByDescending(Selector selector = Selector()) { return Order(std::move(selector)); } +template> +GroupBy groupBy(Selector selector = Identity()) { + return GroupBy(std::move(selector)); +} + template> Distinct distinctBy(Selector selector = Selector()) { diff --git a/folly/gen/test/BaseTest.cpp b/folly/gen/test/BaseTest.cpp index 26fba7b1..62818f0d 100644 --- a/folly/gen/test/BaseTest.cpp +++ b/folly/gen/test/BaseTest.cpp @@ -1106,6 +1106,28 @@ TEST(Gen, Just) { } } +TEST(Gen, GroupBy) { + vector strs{"zero", "one", "two", "three", "four", + "five", "six", "seven", "eight", "nine"}; + + auto gb = from(strs) | groupBy([](const string& str) { return str.size(); }); + + EXPECT_EQ(10, gb | mapOp(count) | sum); + EXPECT_EQ(3, gb | count); + + vector mode{"zero", "four", "five", "nine"}; + EXPECT_EQ( + mode, + gb | maxBy([](const Group& g) { return g.size(); }) + | as()); + + vector largest{"three", "seven", "eight"}; + EXPECT_EQ( + largest, + gb | maxBy([](const Group& g) { return g.key(); }) + | as()); +} + int main(int argc, char *argv[]) { testing::InitGoogleTest(&argc, argv); gflags::ParseCommandLineFlags(&argc, &argv, true);