From: Giuseppe Ottaviano Date: Wed, 10 Jan 2018 02:44:43 +0000 (-0800) Subject: Improve performance of enumerate() with optimization disabled X-Git-Tag: v2018.01.15.00~34 X-Git-Url: http://demsky.eecs.uci.edu/git/?p=folly.git;a=commitdiff_plain;h=66c782bb73c911a99dc7e41f8aa9659515e3a20b Improve performance of enumerate() with optimization disabled Reviewed By: yfeldblum, WillerZ Differential Revision: D6682606 fbshipit-source-id: 5a203a849e96d3020cf9ad2669451122954c2199 --- diff --git a/folly/container/Enumerate.h b/folly/container/Enumerate.h index 94bcecef..79765e51 100644 --- a/folly/container/Enumerate.h +++ b/folly/container/Enumerate.h @@ -1,5 +1,5 @@ /* - * Copyright 2017 Facebook, Inc. + * Copyright 2017-present Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ #include #include +#include #include /** @@ -64,11 +65,13 @@ struct MakeConst { // second overload will be SFINAEd out in that case. Otherwise, the // second is preferred in the partial order for getPointer(_, 0). template -auto getPointer(const Iterator& it, long) -> decltype(std::addressof(*it)) { +FOLLY_ALWAYS_INLINE auto getPointer(const Iterator& it, long) + -> decltype(std::addressof(*it)) { return std::addressof(*it); } template -auto getPointer(const Iterator& it, int) -> decltype(it.operator->()) { +FOLLY_ALWAYS_INLINE auto getPointer(const Iterator& it, int) + -> decltype(it.operator->()) { return it.operator->(); } @@ -85,21 +88,22 @@ class Enumerator { using pointer = typename std::iterator_traits::pointer; using iterator_category = std::input_iterator_tag; - explicit Proxy(const Enumerator* e) : it_(e->it_), index(e->idx_) {} + FOLLY_ALWAYS_INLINE explicit Proxy(const Enumerator& e) + : it_(e.it_), index(e.idx_) {} // Non-const Proxy: Forward constness from Iterator. - reference operator*() { + FOLLY_ALWAYS_INLINE reference operator*() { return *it_; } - pointer operator->() { + FOLLY_ALWAYS_INLINE pointer operator->() { return getPointer(it_, 0); } // Const Proxy: Force const references. - typename MakeConst::type operator*() const { + FOLLY_ALWAYS_INLINE typename MakeConst::type operator*() const { return *it_; } - typename MakeConst::type operator->() const { + FOLLY_ALWAYS_INLINE typename MakeConst::type operator->() const { return getPointer(it_, 0); } @@ -110,24 +114,24 @@ class Enumerator { const size_t index; }; - Proxy operator*() const { - return Proxy(this); + FOLLY_ALWAYS_INLINE Proxy operator*() const { + return Proxy(*this); } - Enumerator& operator++() { + FOLLY_ALWAYS_INLINE Enumerator& operator++() { ++it_; ++idx_; return *this; } template - bool operator==(const Enumerator& rhs) { + FOLLY_ALWAYS_INLINE bool operator==(const Enumerator& rhs) { return it_ == rhs.it_; } template - bool operator!=(const Enumerator& rhs) { - return !(*this == rhs); + FOLLY_ALWAYS_INLINE bool operator!=(const Enumerator& rhs) { + return !(it_ == rhs.it_); } private: diff --git a/folly/container/Foreach.h b/folly/container/Foreach.h index ba744e8f..3d8344f4 100644 --- a/folly/container/Foreach.h +++ b/folly/container/Foreach.h @@ -1,5 +1,5 @@ /* - * Copyright 2017 Facebook, Inc. + * Copyright 2017-present Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -121,9 +121,9 @@ FOLLY_CPP14_CONSTEXPR decltype(auto) fetch(Sequence&& sequence, Index&& index); } // namespace folly /** - * Everything below this is deprecated. Use the folly::for_each algorithm above - * instead + * Everything below is deprecated. */ + /* * Form a local variable name from "FOR_EACH_" x __LINE__, so that * FOR_EACH can be nested without creating shadowed declarations. @@ -159,9 +159,9 @@ FOLLY_CPP14_CONSTEXPR decltype(auto) fetch(Sequence&& sequence, Index&& index); i != _FE_ANON(s2_).rend(); ++i) /* - * If you just want the element values, please use this (ranges-v3) construct: + * If you just want the element values, please use this construct: * - * for (auto&& element : collection | view::zip(view::ints)) + * for (auto&& element : folly::enumerate(collection)) * * If you need access to the iterators please write an explicit iterator loop * and use a counter variable diff --git a/folly/container/test/ForeachBenchmark.cpp b/folly/container/test/ForeachBenchmark.cpp index 5b741afb..a3e810eb 100644 --- a/folly/container/test/ForeachBenchmark.cpp +++ b/folly/container/test/ForeachBenchmark.cpp @@ -1,5 +1,5 @@ /* - * Copyright 2017 Facebook, Inc. + * Copyright 2017-present Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,12 +14,14 @@ * limitations under the License. */ -#include +#include +#include #include -#include - -#include +#include +#include +#include +#include using namespace folly; using namespace folly::detail; @@ -31,10 +33,14 @@ using namespace folly::detail; // iter->second as is, without assigning to local variables. // 3. Use FOR_EACH_KV loop to iterate through the map. -std::map bmMap; // For use in benchmarks below. +// For use in benchmarks below. +std::map bmMap; std::vector vec_one; std::vector vec_two; +// Smallest type to isolate iteration overhead. +std::vector vec_char; + void setupBenchmark(size_t iters) { bmMap.clear(); for (size_t i = 0; i < iters; ++i) { @@ -47,6 +53,12 @@ void setupBenchmark(size_t iters) { vec_two.resize(iters); } +void setupCharVecBenchmark(size_t iters) { + vec_char.resize(iters); + std::generate( + vec_char.begin(), vec_char.end(), [] { return Random::rand32(128); }); +} + BENCHMARK(ForEachFunctionNoAssign, iters) { BenchmarkSuspender suspender; @@ -330,13 +342,61 @@ BENCHMARK(ForEachRangeR, iters) { doNotOptimizeAway(sum); } -int main(int argc, char** argv) { - testing::InitGoogleTest(&argc, argv); - gflags::ParseCommandLineFlags(&argc, &argv, true); - auto r = RUN_ALL_TESTS(); - if (r) { - return r; +BENCHMARK(CharVecForRange, iters) { + BENCHMARK_SUSPEND { + setupCharVecBenchmark(iters); + } + size_t sum = 0; + for (auto& c : vec_char) { + sum += c; + } + doNotOptimizeAway(sum); +} + +BENCHMARK(CharVecForRangeExplicitIndex, iters) { + BENCHMARK_SUSPEND { + setupCharVecBenchmark(iters); + } + size_t sum = 0; + size_t index = 0; + for (auto& c : vec_char) { + sum += c * index; + ++index; + } + doNotOptimizeAway(sum); +} + +BENCHMARK(CharVecForEach, iters) { + BENCHMARK_SUSPEND { + setupCharVecBenchmark(iters); + } + size_t sum = 0; + folly::for_each(vec_char, [&](auto& c) { sum += c; }); + doNotOptimizeAway(sum); +} + +BENCHMARK(CharVecForEachIndex, iters) { + BENCHMARK_SUSPEND { + setupCharVecBenchmark(iters); + } + size_t sum = 0; + folly::for_each(vec_char, [&](auto& c, auto index) { sum += c * index; }); + doNotOptimizeAway(sum); +} + +BENCHMARK(CharVecForRangeEnumerate, iters) { + BENCHMARK_SUSPEND { + setupCharVecBenchmark(iters); + } + size_t sum = 0; + for (auto it : enumerate(vec_char)) { + sum += *it * it.index; } + doNotOptimizeAway(sum); +} + +int main(int argc, char** argv) { + folly::init(&argc, &argv); runBenchmarks(); return 0; }