2 * Copyright 2017 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include <folly/AtomicUnorderedMap.h>
21 #include <unordered_map>
23 #include <folly/Benchmark.h>
24 #include <folly/portability/GFlags.h>
25 #include <folly/portability/GTest.h>
26 #include <folly/portability/Semaphore.h>
27 #include <folly/test/DeterministicSchedule.h>
29 using namespace folly;
30 using namespace folly::test;
36 non_atomic() = default;
37 non_atomic(const non_atomic&) = delete;
38 constexpr /* implicit */ non_atomic(T desired): value(desired) {}
40 T operator+=(T arg) { value += arg; return load();}
42 T load(std::memory_order /* order */ = std::memory_order_seq_cst) const {
47 operator T() const {return load();}
50 std::memory_order /* order */ = std::memory_order_seq_cst) {
55 std::memory_order /* order */ = std::memory_order_seq_cst) {
61 bool compare_exchange_weak(
64 std::memory_order /* success */ = std::memory_order_seq_cst,
65 std::memory_order /* failure */ = std::memory_order_seq_cst) {
66 if (value == expected) {
75 bool compare_exchange_strong(
78 std::memory_order /* success */ = std::memory_order_seq_cst,
79 std::memory_order /* failure */ = std::memory_order_seq_cst) {
80 if (value == expected) {
89 bool is_lock_free() const {return true;}
96 template <typename> class Atom = std::atomic,
97 typename Allocator = std::allocator<char>>
99 AtomicUnorderedInsertMap<Key,
103 (boost::has_trivial_destructor<Key>::value &&
104 boost::has_trivial_destructor<Value>::value),
110 template <typename T>
111 struct AtomicUnorderedInsertMapTest : public ::testing::Test {};
114 // uint16_t doesn't make sense for most platforms, but we might as well
116 using IndexTypesToTest = ::testing::Types<uint16_t, uint32_t, uint64_t>;
117 TYPED_TEST_CASE(AtomicUnorderedInsertMapTest, IndexTypesToTest);
119 TYPED_TEST(AtomicUnorderedInsertMapTest, basic) {
124 folly::detail::MMapAlloc> m(100);
126 m.emplace("abc", "ABC");
127 EXPECT_TRUE(m.find("abc") != m.cend());
128 EXPECT_EQ(m.find("abc")->first, "abc");
129 EXPECT_EQ(m.find("abc")->second, "ABC");
130 EXPECT_TRUE(m.find("def") == m.cend());
131 auto iter = m.cbegin();
132 EXPECT_TRUE(iter != m.cend());
133 EXPECT_TRUE(iter == m.find("abc"));
135 EXPECT_TRUE(a == iter);
138 EXPECT_TRUE(iter == m.cend());
140 EXPECT_TRUE(a != iter);
142 EXPECT_TRUE(a == iter);
146 TEST(AtomicUnorderedInsertMap, load_factor) {
147 AtomicUnorderedInsertMap<int, bool> m(5000, 0.5f);
149 // we should be able to put in much more than 5000 things because of
150 // our load factor request
151 for (int i = 0; i < 10000; ++i) {
156 TEST(AtomicUnorderedInsertMap, capacity_exceeded) {
157 AtomicUnorderedInsertMap<int, bool> m(5000, 1.0f);
160 for (int i = 0; i < 6000; ++i) {
166 TYPED_TEST(AtomicUnorderedInsertMapTest, value_mutation) {
167 UIM<int, MutableAtom<int>, TypeParam> m(100);
169 for (int i = 0; i < 50; ++i) {
173 m.find(1)->second.data++;
176 TEST(UnorderedInsertMap, value_mutation) {
177 UIM<int, MutableData<int>, uint32_t, non_atomic> m(100);
179 for (int i = 0; i < 50; ++i) {
183 m.find(1)->second.data++;
184 EXPECT_EQ(m.find(1)->second.data, 2);
187 // This test is too expensive to run automatically. On my dev server it
188 // takes about 10 minutes for dbg build, 2 for opt.
189 TEST(AtomicUnorderedInsertMap, DISABLED_mega_map) {
190 size_t capacity = 2000000000;
191 AtomicUnorderedInsertMap64<size_t,size_t> big(capacity);
192 for (size_t i = 0; i < capacity * 2; i += 2) {
193 big.emplace(i, i * 10);
195 for (size_t i = 0; i < capacity * 3; i += capacity / 1000 + 1) {
196 auto iter = big.find(i);
197 if ((i & 1) == 0 && i < capacity * 2) {
198 EXPECT_EQ(iter->second, i * 10);
200 EXPECT_TRUE(iter == big.cend());
205 BENCHMARK(lookup_int_int_hit, iters) {
206 std::unique_ptr<AtomicUnorderedInsertMap<int,size_t>> ptr = {};
208 size_t capacity = 100000;
211 ptr = std::make_unique<AtomicUnorderedInsertMap<int, size_t>>(capacity);
212 for (size_t i = 0; i < capacity; ++i) {
213 auto k = 3 * ((5641 * i) % capacity);
214 ptr->emplace(k, k + 1);
215 EXPECT_EQ(ptr->find(k)->second, k + 1);
219 for (size_t i = 0; i < iters; ++i) {
220 size_t k = 3 * (((i * 7919) ^ (i * 4001)) % capacity);
221 auto iter = ptr->find(k);
222 if (iter == ptr->cend() ||
223 iter->second != k + 1) {
224 auto jter = ptr->find(k);
225 EXPECT_TRUE(iter == jter);
227 EXPECT_EQ(iter->second, k + 1);
236 size_t operator()(const std::pair<uint64_t,uint64_t>& pr) const {
237 return pr.first ^ pr.second;
241 void contendedRW(size_t itersPerThread,
244 size_t readsPerWrite) {
245 typedef std::pair<uint64_t,uint64_t> Key;
246 typedef AtomicUnorderedInsertMap<Key,MutableAtom<uint32_t>,PairHash> Map;
248 std::unique_ptr<Map> ptr = {};
249 std::atomic<bool> go;
250 std::vector<std::thread> threads;
253 ptr = std::make_unique<Map>(capacity);
254 while (threads.size() < numThreads) {
255 threads.emplace_back([&](){
257 std::this_thread::yield();
262 while (reads + writes < itersPerThread) {
263 auto r = Random::rand32();
264 Key key(reads + writes, r);
265 if (reads < writes * readsPerWrite ||
266 writes >= capacity / numThreads) {
269 auto iter = ptr->find(key);
271 iter == ptr->cend() ||
272 iter->second.data.load(std::memory_order_acquire) >= key.first);
276 auto pr = ptr->emplace(key, key.first);
278 pr.first->second.data++;
280 } catch (std::bad_alloc&) {
281 LOG(INFO) << "bad alloc";
291 for (auto& thr : threads) {
300 // sudo nice -n -20 ~/fbcode/_bin/common/concurrency/experimental/atomic_unordered_map --benchmark --bm_min_iters=1000000
302 // without MAP_HUGETLB (default)
304 // ============================================================================
305 // common/concurrency/experimental/AtomicUnorderedMapTest.cpprelative time/iter
307 // ============================================================================
308 // lookup_int_int_hit 20.05ns 49.89M
309 // contendedRW(small_32thr_99pct) 70.36ns 14.21M
310 // contendedRW(large_32thr_99pct) 164.23ns 6.09M
311 // contendedRW(large_32thr_99_9pct) 158.81ns 6.30M
312 // ============================================================================
314 // with MAP_HUGETLB hacked in
315 // ============================================================================
316 // lookup_int_int_hit 19.67ns 50.84M
317 // contendedRW(small_32thr_99pct) 62.46ns 16.01M
318 // contendedRW(large_32thr_99pct) 119.41ns 8.37M
319 // contendedRW(large_32thr_99_9pct) 111.23ns 8.99M
320 // ============================================================================
321 BENCHMARK_NAMED_PARAM(contendedRW, small_32thr_99pct, 100000, 32, 99)
322 BENCHMARK_NAMED_PARAM(contendedRW, large_32thr_99pct, 100000000, 32, 99)
323 BENCHMARK_NAMED_PARAM(contendedRW, large_32thr_99_9pct, 100000000, 32, 999)
325 BENCHMARK_DRAW_LINE();
327 // sudo nice -n -20 ~/fbcode/_build/opt/site_integrity/quasar/experimental/atomic_unordered_map_test --benchmark --bm_min_iters=10000
328 // Single threaded benchmarks to test how much better we are than
329 // std::unordered_map and what is the cost of using atomic operations
330 // in the uncontended use case
331 // ============================================================================
332 // std_map 1.20ms 832.58
333 // atomic_fast_map 511.35us 1.96K
334 // fast_map 196.28us 5.09K
335 // ============================================================================
338 std::unordered_map<long, long> m;
340 for (int i=0; i<10000; ++i) {
344 for (int i=0; i<10000; ++i) {
346 folly::doNotOptimizeAway(&*a);
350 BENCHMARK(atomic_fast_map) {
351 UIM<long, long, uint32_t, std::atomic> m(10000);
352 for (int i=0; i<10000; ++i) {
356 for (int i=0; i<10000; ++i) {
358 folly::doNotOptimizeAway(&*a);
362 BENCHMARK(fast_map) {
363 UIM<long, long, uint32_t, non_atomic> m(10000);
364 for (int i=0; i<10000; ++i) {
368 for (int i=0; i<10000; ++i) {
370 folly::doNotOptimizeAway(&*a);
374 BENCHMARK(atomic_fast_map_64) {
375 UIM<long, long, uint64_t, std::atomic> m(10000);
376 for (int i=0; i<10000; ++i) {
380 for (int i=0; i<10000; ++i) {
382 folly::doNotOptimizeAway(&*a);
386 BENCHMARK(fast_map_64) {
387 UIM<long, long, uint64_t, non_atomic> m(10000);
388 for (int i=0; i<10000; ++i) {
392 for (int i=0; i<10000; ++i) {
394 folly::doNotOptimizeAway(&*a);
399 int main(int argc, char ** argv) {
400 testing::InitGoogleTest(&argc, argv);
401 gflags::ParseCommandLineFlags(&argc, &argv, true);
402 int rv = RUN_ALL_TESTS();
403 folly::runBenchmarksOnFlag();