From c5a492c05a7fb386569ebc09c887a7abe6a63e6d Mon Sep 17 00:00:00 2001 From: Tom Jackson Date: Fri, 26 Apr 2013 13:58:08 -0700 Subject: [PATCH] distinctBy() Test Plan: Unit tests Reviewed By: jbrewer@fb.com FB internal diff: D791149 --- folly/experimental/Gen-inl.h | 90 +++++++++++++++++++++++++++-- folly/experimental/Gen.h | 11 ++++ folly/experimental/test/GenTest.cpp | 37 ++++++++++++ 3 files changed, 134 insertions(+), 4 deletions(-) diff --git a/folly/experimental/Gen-inl.h b/folly/experimental/Gen-inl.h index dd742500..73c10816 100644 --- a/folly/experimental/Gen-inl.h +++ b/folly/experimental/Gen-inl.h @@ -901,11 +901,11 @@ class Order : public Operator> { } public: Generator(Source source, - const Selector& selector, - const Comparer& comparer) + Selector selector, + Comparer comparer) : source_(std::move(source)), - selector_(selector), - comparer_(comparer) {} + selector_(std::move(selector)), + comparer_(std::move(comparer)) {} VectorType operator|(const Collect&) const { return asVector(); @@ -956,6 +956,86 @@ class Order : public Operator> { } }; +/** + * Distinct - For filtering duplicates out of a sequence. A selector may be + * provided to generate a key to uniquify for each value. + * + * This type is usually used through the 'distinct' helper function, like: + * + * auto closest = from(results) + * | distinctBy([](Item& i) { + * return i.target; + * }) + * | take(10); + */ +template +class Distinct : public Operator> { + Selector selector_; + public: + Distinct() {} + + explicit Distinct(Selector selector) + : selector_(std::move(selector)) + {} + + template + class Generator : public GenImpl> { + Source source_; + Selector selector_; + + typedef typename std::decay::type StorageType; + + // selector_ cannot be passed an rvalue or it would end up passing the husk + // of a value to the downstream operators. + typedef const StorageType& ParamType; + + typedef typename std::result_of::type KeyType; + typedef typename std::decay::type KeyStorageType; + + public: + Generator(Source source, + Selector selector) + : source_(std::move(source)), + selector_(std::move(selector)) {} + + template + void foreach(Body&& body) const { + std::unordered_set keysSeen; + source_.foreach([&](Value value) { + if (keysSeen.insert(selector_(ParamType(value))).second) { + body(std::forward(value)); + } + }); + } + + template + bool apply(Handler&& handler) const { + std::unordered_set keysSeen; + return source_.apply([&](Value value) -> bool { + if (keysSeen.insert(selector_(ParamType(value))).second) { + return handler(std::forward(value)); + } + return true; + }); + } + }; + + template> + Gen compose(GenImpl&& source) const { + return Gen(std::move(source.self()), selector_); + } + + template> + Gen compose(const GenImpl& source) const { + return Gen(source.self(), selector_); + } +}; + /** * Composed - For building up a pipeline of operations to perform, absent any * particular source generator. Useful for building up custom pipelines. @@ -1614,6 +1694,8 @@ static const detail::Min max; static const detail::Order order; +static const detail::Distinct distinct; + static const detail::Map move; static const detail::Concat concat; diff --git a/folly/experimental/Gen.h b/folly/experimental/Gen.h index da64fde9..9baae833 100644 --- a/folly/experimental/Gen.h +++ b/folly/experimental/Gen.h @@ -21,6 +21,8 @@ #include #include #include +#include +#include #include "folly/Range.h" #include "folly/Optional.h" @@ -213,6 +215,9 @@ class Skip; template class Order; +template +class Distinct; + template class Composed; @@ -380,6 +385,12 @@ Order orderByDescending(Selector selector = Identity()) { return Order(std::move(selector)); } +template> +Distinct distinctBy(Selector selector = Identity()) { + return Distinct(std::move(selector)); +} + template>> Get get() { diff --git a/folly/experimental/test/GenTest.cpp b/folly/experimental/test/GenTest.cpp index 2bc28467..7b6d22b3 100644 --- a/folly/experimental/test/GenTest.cpp +++ b/folly/experimental/test/GenTest.cpp @@ -268,6 +268,43 @@ TEST(Gen, OrderTake) { EXPECT_EQ(expected, actual); } +TEST(Gen, Distinct) { + auto expected = vector{3, 1, 2}; + auto actual = + from({3, 1, 3, 2, 1, 2, 3}) + | distinct + | as(); + EXPECT_EQ(expected, actual); +} + +TEST(Gen, DistinctBy) { // 0 1 4 9 6 5 6 9 4 1 0 + auto expected = vector{0, 1, 2, 3, 4, 5}; + auto actual = + seq(0, 100) + | distinctBy([](int i) { return i * i % 10; }) + | as(); + EXPECT_EQ(expected, actual); +} + +TEST(Gen, DistinctMove) { // 0 1 4 9 6 5 6 9 4 1 0 + auto expected = vector{0, 1, 2, 3, 4, 5}; + auto actual = + seq(0, 100) + | mapped([](int i) { return std::unique_ptr(new int(i)); }) + // see comment below about selector parameters for Distinct + | distinctBy([](const std::unique_ptr& pi) { return *pi * *pi % 10; }) + | mapped([](std::unique_ptr pi) { return *pi; }) + | as(); + + // NOTE(tjackson): the following line intentionally doesn't work: + // | distinctBy([](std::unique_ptr pi) { return *pi * *pi % 10; }) + // This is because distinctBy because the selector intentionally requires a + // const reference. If it required a move-reference, the value might get + // gutted by the selector before said value could be passed to downstream + // operators. + EXPECT_EQ(expected, actual); +} + TEST(Gen, MinBy) { EXPECT_EQ(7, seq(1, 10) | minBy([](int i) -> double { -- 2.34.1