From b2d3a93b2a2bac05087d4294dc09d59cca64c90e Mon Sep 17 00:00:00 2001 From: Marc Celani Date: Thu, 13 Mar 2014 19:52:09 -0700 Subject: [PATCH] Introduce a merge() function for folly::sorted_vector_map Summary: Feed spends a non trivial amount of time merging two sorted_vector_maps. Writing code for this efficiently is actually a little tricky, and its much easier to maintain the simpler code. This adds merge() to folly so that its easy for feed and fast. Benchmarks with large input sizes show this is a gigantic win (moving from O(n^2) to O(n). Test Plan: unit tests, benchmark Reviewed By: delong.j@fb.com FB internal diff: D1221725 @override-unit-failures --- folly/sorted_vector_types.h | 65 ++++++++++++++++++++++++++ folly/test/SortedVectorBenchmark.cpp | 69 ++++++++++++++++++++++++++++ folly/test/sorted_vector_test.cpp | 26 +++++++++++ 3 files changed, 160 insertions(+) create mode 100644 folly/test/SortedVectorBenchmark.cpp diff --git a/folly/sorted_vector_types.h b/folly/sorted_vector_types.h index 6d29a101..a7273539 100644 --- a/folly/sorted_vector_types.h +++ b/folly/sorted_vector_types.h @@ -617,6 +617,71 @@ inline void swap(sorted_vector_map& a, return a.swap(b); } +/* + * Efficiently moves all elements from b into a by taking advantage of sorted + * inputs. Any keys that belong to both a and b will have the value from b. + * Assumes that C and A can be constructed using the default constructor. + * + * std::merge cannot be used for this use case because in the event of equal + * keys belonging to both a and b, it undefined which element will be inserted + * into the output map last (and therefore be present in the map). + */ +template +inline void merge(sorted_vector_map& a, + sorted_vector_map& b) { + auto size = a.size(); + auto it_a = a.begin(); + auto it_b = b.begin(); + while (it_a != a.end() && it_b != b.end()) { + auto comp = a.key_comp()(it_a->first, it_b->first); + if (!comp) { + if (!a.key_comp()(it_b->first, it_a->first)) { + ++it_a; + ++it_b; + } else { + ++size; + ++it_b; + } + } else { + ++it_a; + } + } + if (it_b != b.end()) { + size += b.end() - it_b; + } + + sorted_vector_map c; + c.reserve(size); + it_a = a.begin(); + it_b = b.begin(); + while (it_a != a.end() && it_b != b.end()) { + auto comp = a.key_comp()(it_a->first, it_b->first); + if (!comp) { + if (!a.key_comp()(it_b->first, it_a->first)) { + c.insert(c.end(), std::move(*it_b)); + ++it_a; + ++it_b; + } else { + c.insert(c.end(), std::move(*it_b)); + ++it_b; + } + } else { + c.insert(c.end(), std::move(*it_a)); + ++it_a; + } + } + while (it_a != a.end()) { + c.insert(c.end(), std::move(*it_a)); + ++it_a; + } + while (it_b != b.end()) { + c.insert(c.end(), std::move(*it_b)); + ++it_b; + } + a.swap(c); + b.clear(); +} + ////////////////////////////////////////////////////////////////////// } diff --git a/folly/test/SortedVectorBenchmark.cpp b/folly/test/SortedVectorBenchmark.cpp new file mode 100644 index 00000000..5a4a7bf9 --- /dev/null +++ b/folly/test/SortedVectorBenchmark.cpp @@ -0,0 +1,69 @@ +/* + * Copyright 2014 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "folly/Format.h" + +#include + +#include "folly/sorted_vector_types.h" +#include "folly/Benchmark.h" + +namespace { + +using folly::sorted_vector_map; + +sorted_vector_map a; +sorted_vector_map b; + +BENCHMARK(merge_by_setting, iters) { + while (iters--) { + // copy to match merge benchmark + auto a_cpy = a; + auto b_cpy = b; + for (const auto& kv : b_cpy) { + a_cpy[kv.first] = kv.second; + } + } +} + +BENCHMARK_RELATIVE(merge, iters) { + while (iters--) { + auto a_cpy = a; + auto b_cpy = b; + merge(a_cpy, b_cpy); + } +} +} + +// Benchmark results on my dev server (Intel(R) Xeon(R) CPU E5-2660 0 @ 2.20GHz) +// +// ============================================================================ +// folly/test/SortedVectorBenchmark.cpp relative time/iter iters/s +// ============================================================================ +// merge_by_setting 482.01us 2.07K +// merge 2809.19% 17.16us 58.28K +// ============================================================================ + +int main(int argc, char *argv[]) { + google::ParseCommandLineFlags(&argc, &argv, true); + for (int i = 0; i < 1000; ++i) { + a[2 * i] = 2 * i; + b[2 * i + 1] = 2 * i + 1; + } + + folly::runBenchmarks(); + return 0; +} diff --git a/folly/test/sorted_vector_test.cpp b/folly/test/sorted_vector_test.cpp index 80038a0a..d03823fe 100644 --- a/folly/test/sorted_vector_test.cpp +++ b/folly/test/sorted_vector_test.cpp @@ -301,3 +301,29 @@ TEST(SortedVectorTest, EmptyTest) { EXPECT_TRUE(emptyMap.lower_bound(10) == emptyMap.end()); EXPECT_TRUE(emptyMap.find(10) == emptyMap.end()); } + +TEST(SortedVectorTest, MergeTest) { + sorted_vector_map a; + a[0] = 0; + a[1] = 1; + a[5] = 5; + a[10] = 10; + + sorted_vector_map b; + b[0] = 10; + b[3] = 13; + b[7] = 17; + b[11] = 111; + + merge(a, b); + + EXPECT_TRUE(b.empty()); + EXPECT_EQ(a.size(), 7); + EXPECT_EQ(a[0], 10); + EXPECT_EQ(a[1], 1); + EXPECT_EQ(a[3], 13); + EXPECT_EQ(a[5], 5); + EXPECT_EQ(a[7], 17); + EXPECT_EQ(a[10], 10); + EXPECT_EQ(a[11], 111); +} -- 2.34.1