2 * Copyright 2015 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include <folly/AtomicHashMap.h>
19 #include <glog/logging.h>
20 #include <gtest/gtest.h>
25 #include <folly/Benchmark.h>
26 #include <folly/Conv.h>
30 using folly::AtomicHashMap;
31 using folly::AtomicHashArray;
34 DEFINE_double(targetLoadFactor, 0.75, "Target memory utilization fraction.");
35 DEFINE_double(maxLoadFactor, 0.80, "Max before growth.");
36 DEFINE_int32(numThreads, 8, "Threads to use for concurrency tests.");
37 DEFINE_int64(numBMElements, 12 * 1000 * 1000, "Size of maps for benchmarks.");
39 const double LF = FLAGS_maxLoadFactor / FLAGS_targetLoadFactor;
40 const int maxBMElements = int(FLAGS_numBMElements * LF); // hit our target LF.
42 static int64_t nowInUsec() {
45 return int64_t(tv.tv_sec) * 1000 * 1000 + tv.tv_usec;
48 TEST(Ahm, BasicStrings) {
49 typedef AtomicHashMap<int64_t,string> AHM;
51 EXPECT_TRUE(myMap.begin() == myMap.end());
53 for (int i = 0; i < 100; ++i) {
54 myMap.insert(make_pair(i, folly::to<string>(i)));
56 for (int i = 0; i < 100; ++i) {
57 EXPECT_EQ(myMap.find(i)->second, folly::to<string>(i));
60 myMap.insert(std::make_pair(999, "A"));
61 myMap.insert(std::make_pair(999, "B"));
62 EXPECT_EQ(myMap.find(999)->second, "A"); // shouldn't have overwritten
63 myMap.find(999)->second = "B";
64 myMap.find(999)->second = "C";
65 EXPECT_EQ(myMap.find(999)->second, "C");
66 EXPECT_EQ(myMap.find(999)->first, 999);
70 TEST(Ahm, BasicNoncopyable) {
71 typedef AtomicHashMap<int64_t,std::unique_ptr<int>> AHM;
73 EXPECT_TRUE(myMap.begin() == myMap.end());
75 for (int i = 0; i < 50; ++i) {
76 myMap.insert(make_pair(i, std::unique_ptr<int>(new int(i))));
78 for (int i = 50; i < 100; ++i) {
79 myMap.insert(i, std::unique_ptr<int>(new int (i)));
81 for (int i = 100; i < 150; ++i) {
82 myMap.emplace(i, new int (i));
84 for (int i = 150; i < 200; ++i) {
85 myMap.emplace(i, new int (i), std::default_delete<int>());
87 for (int i = 0; i < 200; ++i) {
88 EXPECT_EQ(*(myMap.find(i)->second), i);
90 for (int i = 0; i < 200; i+=4) {
93 for (int i = 0; i < 200; i+=4) {
94 EXPECT_EQ(myMap.find(i), myMap.end());
99 typedef int32_t ValueT;
101 typedef AtomicHashMap<KeyT,ValueT> AHMapT;
102 typedef AHMapT::value_type RecordT;
103 typedef AtomicHashArray<KeyT,ValueT> AHArrayT;
105 AHArrayT::Config config;
106 static AHArrayT::SmartPtr globalAHA(nullptr);
107 static std::unique_ptr<AHMapT> globalAHM;
109 // Generate a deterministic value based on an input key
110 static int genVal(int key) {
115 VLOG(1) << "Overhead: " << sizeof(AHArrayT) << " (array) " <<
116 sizeof(AHMapT) + sizeof(AHArrayT) << " (map/set) Bytes.";
117 uint64_t numEntries = 10000;
118 float sizeFactor = 0.46;
120 std::unique_ptr<AHMapT> m(new AHMapT(int(numEntries * sizeFactor), config));
122 // load map - make sure we succeed and the index is accurate
124 for (uint64_t i = 0; i < numEntries; i++) {
125 auto ret = m->insert(RecordT(i, genVal(i)));
126 success &= ret.second;
127 success &= (m->findAt(ret.first.getIndex())->second == genVal(i));
129 // Overwrite vals to make sure there are no dups
130 // Every insert should fail because the keys are already in the map.
132 for (uint64_t i = 0; i < numEntries; i++) {
133 auto ret = m->insert(RecordT(i, genVal(i * 2)));
134 success &= (ret.second == false); // fail on collision
135 success &= (ret.first->second == genVal(i)); // return the previous value
136 success &= (m->findAt(ret.first.getIndex())->second == genVal(i));
138 EXPECT_TRUE(success);
141 size_t cap = m->capacity();
143 EXPECT_GT(m->numSubMaps(), 1); // make sure we grew
145 EXPECT_EQ(m->size(), numEntries);
146 for (size_t i = 0; i < numEntries; i++) {
147 success &= (m->find(i)->second == genVal(i));
149 EXPECT_TRUE(success);
154 AHMapT::const_iterator retIt;
155 for (int32_t i = 0; i < int32_t(numEntries); i++) {
157 retIt = m->findAt(retIt.getIndex());
158 success &= (retIt->second == genVal(i));
159 // We use a uint32_t index so that this comparison is between two
160 // variables of the same type.
161 success &= (retIt->first == i);
163 EXPECT_TRUE(success);
165 // Try modifying value
166 m->find(8)->second = 5309;
167 EXPECT_EQ(m->find(8)->second, 5309);
172 for (uint64_t i = 0; i < numEntries / 2; i++) {
173 success &= m->insert(RecordT(i, genVal(i))).second;
175 EXPECT_TRUE(success);
176 EXPECT_EQ(m->size(), numEntries / 2);
179 TEST(Ahm, iterator) {
180 int numEntries = 10000;
181 float sizeFactor = .46;
182 std::unique_ptr<AHMapT> m(new AHMapT(int(numEntries * sizeFactor), config));
184 // load map - make sure we succeed and the index is accurate
185 for (int i = 0; i < numEntries; i++) {
186 m->insert(RecordT(i, genVal(i)));
192 success &= (it->second == genVal(it->first));
195 EXPECT_TRUE(success);
196 EXPECT_EQ(count, numEntries);
201 // Note: Unfortunately can't currently put a std::atomic<int64_t> in
202 // the value in ahm since it doesn't support types that are both non-copy
203 // and non-move constructible yet.
204 AtomicHashMap<int64_t,int64_t> ahm;
207 explicit Counters(size_t numCounters) : ahm(numCounters) {}
209 void increment(int64_t obj_id) {
210 auto ret = ahm.insert(std::make_pair(obj_id, 1));
212 // obj_id already exists, increment count
213 __sync_fetch_and_add(&ret.first->second, 1);
217 int64_t getValue(int64_t obj_id) {
218 auto ret = ahm.find(obj_id);
219 return ret != ahm.end() ? ret->second : 0;
222 // export the counters without blocking increments
225 ret.reserve(ahm.size() * 32);
226 for (const auto& e : ahm) {
227 ret += folly::to<string>(
228 " [", e.first, ":", e.second, "]\n");
235 // If you get an error "terminate called without an active exception", there
236 // might be too many threads getting created - decrease numKeys and/or mult.
238 const int numKeys = 10;
241 vector<int64_t> keys;
242 FOR_EACH_RANGE(i, 1, numKeys) {
245 vector<std::thread> threads;
246 for (auto key : keys) {
247 FOR_EACH_RANGE(i, 0, key * mult) {
248 threads.push_back(std::thread([&, key] { c.increment(key); }));
251 for (auto& t : threads) {
254 string str = c.toString();
255 for (auto key : keys) {
256 int val = key * mult;
257 EXPECT_EQ(val, c.getValue(key));
258 EXPECT_NE(string::npos, str.find(folly::to<string>("[",key,":",val,"]")));
265 explicit Integer(KeyT v = 0) : v_(v) {}
267 Integer& operator=(const Integer& a) {
268 static bool throwException_ = false;
269 throwException_ = !throwException_;
270 if (throwException_) {
277 bool operator==(const Integer& a) const { return v_ == a.v_; }
283 TEST(Ahm, map_exception_safety) {
284 typedef AtomicHashMap<KeyT,Integer> MyMapT;
286 int numEntries = 10000;
287 float sizeFactor = 0.46;
288 std::unique_ptr<MyMapT> m(new MyMapT(int(numEntries * sizeFactor)));
292 for (int i = 0; i < numEntries; i++) {
294 m->insert(i, Integer(genVal(i)));
295 success &= (m->find(i)->second == Integer(genVal(i)));
298 success &= !m->count(i);
301 EXPECT_EQ(count, m->size());
302 EXPECT_TRUE(success);
305 TEST(Ahm, basicErase) {
306 size_t numEntries = 3000;
308 std::unique_ptr<AHMapT> s(new AHMapT(numEntries, config));
309 // Iterate filling up the map and deleting all keys a few times
310 // to test more than one subMap.
311 for (int iterations = 0; iterations < 4; ++iterations) {
312 // Testing insertion of keys
314 for (size_t i = 0; i < numEntries; ++i) {
315 success &= !(s->count(i));
316 auto ret = s->insert(RecordT(i, i));
317 success &= s->count(i);
318 success &= ret.second;
320 EXPECT_TRUE(success);
321 EXPECT_EQ(s->size(), numEntries);
323 // Delete every key in the map and verify that the key is gone and the the
326 for (size_t i = 0; i < numEntries; ++i) {
327 success &= s->erase(i);
328 success &= (s->size() == numEntries - 1 - i);
329 success &= !(s->count(i));
330 success &= !(s->erase(i));
332 EXPECT_TRUE(success);
334 VLOG(1) << "Final number of subMaps = " << s->numSubMaps();
339 inline KeyT randomizeKey(int key) {
340 // We deterministically randomize the key to more accurately simulate
341 // real-world usage, and to avoid pathalogical performance patterns (e.g.
342 // those related to __gnu_cxx::hash<int64_t>()(1) == 1).
344 // Use a hash function we don't normally use for ints to avoid interactions.
345 return folly::hash::jenkins_rev_mix32(key);
348 int numOpsPerThread = 0;
350 void* insertThread(void* jj) {
351 int64_t j = (int64_t) jj;
352 for (int i = 0; i < numOpsPerThread; ++i) {
353 KeyT key = randomizeKey(i + j * numOpsPerThread);
354 globalAHM->insert(key, genVal(key));
359 void* insertThreadArr(void* jj) {
360 int64_t j = (int64_t) jj;
361 for (int i = 0; i < numOpsPerThread; ++i) {
362 KeyT key = randomizeKey(i + j * numOpsPerThread);
363 globalAHA->insert(std::make_pair(key, genVal(key)));
368 std::atomic<bool> runThreadsCreatedAllThreads;
369 void runThreads(void *(*thread)(void*), int numThreads, void **statuses) {
370 folly::BenchmarkSuspender susp;
371 runThreadsCreatedAllThreads.store(false);
372 vector<pthread_t> threadIds;
373 for (int64_t j = 0; j < numThreads; j++) {
375 if (pthread_create(&tid, nullptr, thread, (void*) j) != 0) {
376 LOG(ERROR) << "Could not start thread";
378 threadIds.push_back(tid);
383 runThreadsCreatedAllThreads.store(true);
384 for (size_t i = 0; i < threadIds.size(); ++i) {
385 pthread_join(threadIds[i], statuses == nullptr ? nullptr : &statuses[i]);
389 void runThreads(void *(*thread)(void*)) {
390 runThreads(thread, FLAGS_numThreads, nullptr);
395 TEST(Ahm, collision_test) {
396 const int numInserts = 1000000 / 4;
398 // Doing the same number on each thread so we collide.
399 numOpsPerThread = numInserts;
401 float sizeFactor = 0.46;
402 int entrySize = sizeof(KeyT) + sizeof(ValueT);
403 VLOG(1) << "Testing " << numInserts << " unique " << entrySize <<
404 " Byte entries replicated in " << FLAGS_numThreads <<
405 " threads with " << FLAGS_maxLoadFactor * 100.0 << "% max load factor.";
407 globalAHM.reset(new AHMapT(int(numInserts * sizeFactor), config));
409 size_t sizeInit = globalAHM->capacity();
410 VLOG(1) << " Initial capacity: " << sizeInit;
412 double start = nowInUsec();
413 runThreads([](void*) -> void* { // collisionInsertThread
414 for (int i = 0; i < numOpsPerThread; ++i) {
415 KeyT key = randomizeKey(i);
416 globalAHM->insert(key, genVal(key));
420 double elapsed = nowInUsec() - start;
422 size_t finalCap = globalAHM->capacity();
423 size_t sizeAHM = globalAHM->size();
424 VLOG(1) << elapsed/sizeAHM << " usec per " << FLAGS_numThreads <<
425 " duplicate inserts (atomic).";
426 VLOG(1) << " Final capacity: " << finalCap << " in " <<
427 globalAHM->numSubMaps() << " sub maps (" <<
428 sizeAHM * 100 / finalCap << "% load factor, " <<
429 (finalCap - sizeInit) * 100 / sizeInit << "% growth).";
432 EXPECT_EQ(sizeAHM, numInserts);
435 for (int i = 0; i < numInserts; ++i) {
436 KeyT key = randomizeKey(i);
437 success &= (globalAHM->find(key)->second == genVal(key));
439 EXPECT_TRUE(success);
441 // check colliding finds
443 runThreads([](void*) -> void* { // collisionFindThread
445 for (int i = 0; i < numOpsPerThread; ++i) {
446 globalAHM->find(key);
451 elapsed = nowInUsec() - start;
453 VLOG(1) << elapsed/sizeAHM << " usec per " << FLAGS_numThreads <<
454 " duplicate finds (atomic).";
459 const int kInsertPerThread = 100000;
460 int raceFinalSizeEstimate;
462 void* raceIterateThread(void* jj) {
463 int64_t j = (int64_t) jj;
466 AHMapT::iterator it = globalAHM->begin();
467 AHMapT::iterator end = globalAHM->end();
468 for (; it != end; ++it) {
470 if (count > raceFinalSizeEstimate) {
471 EXPECT_FALSE("Infinite loop in iterator.");
478 void* raceInsertRandomThread(void* jj) {
479 int64_t j = (int64_t) jj;
480 for (int i = 0; i < kInsertPerThread; ++i) {
482 globalAHM->insert(key, genVal(key));
489 // Test for race conditions when inserting and iterating at the same time and
490 // creating multiple submaps.
491 TEST(Ahm, race_insert_iterate_thread_test) {
492 const int kInsertThreads = 20;
493 const int kIterateThreads = 20;
494 raceFinalSizeEstimate = kInsertThreads * kInsertPerThread;
496 VLOG(1) << "Testing iteration and insertion with " << kInsertThreads
497 << " threads inserting and " << kIterateThreads << " threads iterating.";
499 globalAHM.reset(new AHMapT(raceFinalSizeEstimate / 9, config));
501 vector<pthread_t> threadIds;
502 for (int64_t j = 0; j < kInsertThreads + kIterateThreads; j++) {
504 void *(*thread)(void*) =
505 (j < kInsertThreads ? raceInsertRandomThread : raceIterateThread);
506 if (pthread_create(&tid, nullptr, thread, (void*) j) != 0) {
507 LOG(ERROR) << "Could not start thread";
509 threadIds.push_back(tid);
512 for (size_t i = 0; i < threadIds.size(); ++i) {
513 pthread_join(threadIds[i], nullptr);
515 VLOG(1) << "Ended up with " << globalAHM->numSubMaps() << " submaps";
516 VLOG(1) << "Final size of map " << globalAHM->size();
521 const int kTestEraseInsertions = 200000;
522 std::atomic<int32_t> insertedLevel;
524 void* testEraseInsertThread(void*) {
525 for (int i = 0; i < kTestEraseInsertions; ++i) {
526 KeyT key = randomizeKey(i);
527 globalAHM->insert(key, genVal(key));
528 insertedLevel.store(i, std::memory_order_release);
530 insertedLevel.store(kTestEraseInsertions, std::memory_order_release);
534 void* testEraseEraseThread(void*) {
535 for (int i = 0; i < kTestEraseInsertions; ++i) {
537 * Make sure that we don't get ahead of the insert thread, because
538 * part of the condition for this unit test succeeding is that the
541 * Note, there is a subtle case here when a new submap is
542 * allocated: the erasing thread might get 0 from count(key)
543 * because it hasn't seen numSubMaps_ update yet. To avoid this
544 * race causing problems for the test (it's ok for real usage), we
545 * lag behind the inserter by more than just element.
550 currentLevel = insertedLevel.load(std::memory_order_acquire);
551 if (currentLevel == kTestEraseInsertions) currentLevel += lag + 1;
552 } while (currentLevel - lag < i);
554 KeyT key = randomizeKey(i);
555 while (globalAHM->count(key)) {
556 if (globalAHM->erase(key)) {
566 // Here we have a single thread inserting some values, and several threads
567 // racing to delete the values in the order they were inserted.
568 TEST(Ahm, thread_erase_insert_race) {
569 const int kInsertThreads = 1;
570 const int kEraseThreads = 10;
572 VLOG(1) << "Testing insertion and erase with " << kInsertThreads
573 << " thread inserting and " << kEraseThreads << " threads erasing.";
575 globalAHM.reset(new AHMapT(kTestEraseInsertions / 4, config));
577 vector<pthread_t> threadIds;
578 for (int64_t j = 0; j < kInsertThreads + kEraseThreads; j++) {
580 void *(*thread)(void*) =
581 (j < kInsertThreads ? testEraseInsertThread : testEraseEraseThread);
582 if (pthread_create(&tid, nullptr, thread, (void*) j) != 0) {
583 LOG(ERROR) << "Could not start thread";
585 threadIds.push_back(tid);
588 for (size_t i = 0; i < threadIds.size(); i++) {
589 pthread_join(threadIds[i], nullptr);
592 EXPECT_TRUE(globalAHM->empty());
593 EXPECT_EQ(globalAHM->size(), 0);
595 VLOG(1) << "Ended up with " << globalAHM->numSubMaps() << " submaps";
598 // Repro for T#483734: Duplicate AHM inserts due to incorrect AHA return value.
599 typedef AtomicHashArray<int32_t, int32_t> AHA;
600 AHA::Config configRace;
601 auto atomicHashArrayInsertRaceArray = AHA::create(2, configRace);
602 void* atomicHashArrayInsertRaceThread(void* j) {
603 AHA* arr = atomicHashArrayInsertRaceArray.get();
604 uintptr_t numInserted = 0;
605 while (!runThreadsCreatedAllThreads.load());
606 for (int i = 0; i < 2; i++) {
607 if (arr->insert(RecordT(randomizeKey(i), 0)).first != arr->end()) {
611 pthread_exit((void *) numInserted);
613 TEST(Ahm, atomic_hash_array_insert_race) {
614 AHA* arr = atomicHashArrayInsertRaceArray.get();
615 int numIterations = 5000, FLAGS_numThreads = 4;
616 void* statuses[FLAGS_numThreads];
617 for (int i = 0; i < numIterations; i++) {
619 runThreads(atomicHashArrayInsertRaceThread, FLAGS_numThreads, statuses);
620 EXPECT_GE(arr->size(), 1);
621 for (int j = 0; j < FLAGS_numThreads; j++) {
622 EXPECT_EQ(arr->size(), uintptr_t(statuses[j]));
627 // Repro for T#5841499. Race between erase() and find() on the same key.
628 TEST(Ahm, erase_find_race) {
629 const uint64_t limit = 10000;
630 AtomicHashMap<uint64_t, uint64_t> map(limit + 10);
631 std::atomic<uint64_t> key {1};
633 // Invariant: all values are equal to their keys.
634 // At any moment there is one or two consecutive keys in the map.
636 std::thread write_thread([&]() {
642 map.insert(k + 1, k + 1);
647 std::thread read_thread([&]() {
649 uint64_t k = key.load();
654 auto it = map.find(k);
655 if (it != map.end()) {
656 ASSERT_EQ(k, it->second);
665 // Repro for a bug when iterator didn't skip empty submaps.
666 TEST(Ahm, iterator_skips_empty_submaps) {
667 AtomicHashMap<uint64_t, uint64_t>::Config config;
668 config.growthFactor = 1;
670 AtomicHashMap<uint64_t, uint64_t> map(1, config);
678 auto it = map.find(1);
680 ASSERT_NE(map.end(), it);
681 ASSERT_EQ(1, it->first);
682 ASSERT_EQ(1, it->second);
686 ASSERT_NE(map.end(), it);
687 ASSERT_EQ(3, it->first);
688 ASSERT_EQ(3, it->second);
691 ASSERT_EQ(map.end(), it);
696 void loadGlobalAha() {
697 std::cout << "loading global AHA with " << FLAGS_numThreads
699 uint64_t start = nowInUsec();
700 globalAHA = AHArrayT::create(maxBMElements, config);
701 numOpsPerThread = FLAGS_numBMElements / FLAGS_numThreads;
702 CHECK_EQ(0, FLAGS_numBMElements % FLAGS_numThreads) <<
703 "kNumThreads must evenly divide kNumInserts.";
704 runThreads(insertThreadArr);
705 uint64_t elapsed = nowInUsec() - start;
706 std::cout << " took " << elapsed / 1000 << " ms (" <<
707 (elapsed * 1000 / FLAGS_numBMElements) << " ns/insert).\n";
708 EXPECT_EQ(globalAHA->size(), FLAGS_numBMElements);
711 void loadGlobalAhm() {
712 std::cout << "loading global AHM with " << FLAGS_numThreads
714 uint64_t start = nowInUsec();
715 globalAHM.reset(new AHMapT(maxBMElements, config));
716 numOpsPerThread = FLAGS_numBMElements / FLAGS_numThreads;
717 runThreads(insertThread);
718 uint64_t elapsed = nowInUsec() - start;
719 std::cout << " took " << elapsed / 1000 << " ms (" <<
720 (elapsed * 1000 / FLAGS_numBMElements) << " ns/insert).\n";
721 EXPECT_EQ(globalAHM->size(), FLAGS_numBMElements);
726 BENCHMARK(st_aha_find, iters) {
727 CHECK_LE(iters, FLAGS_numBMElements);
728 for (size_t i = 0; i < iters; i++) {
729 KeyT key = randomizeKey(i);
730 folly::doNotOptimizeAway(globalAHA->find(key)->second);
734 BENCHMARK(st_ahm_find, iters) {
735 CHECK_LE(iters, FLAGS_numBMElements);
736 for (size_t i = 0; i < iters; i++) {
737 KeyT key = randomizeKey(i);
738 folly::doNotOptimizeAway(globalAHM->find(key)->second);
742 BENCHMARK_DRAW_LINE()
744 BENCHMARK(mt_ahm_miss, iters) {
745 CHECK_LE(iters, FLAGS_numBMElements);
746 numOpsPerThread = iters / FLAGS_numThreads;
747 runThreads([](void* jj) -> void* {
748 int64_t j = (int64_t) jj;
749 while (!runThreadsCreatedAllThreads.load());
750 for (int i = 0; i < numOpsPerThread; ++i) {
751 KeyT key = i + j * numOpsPerThread * 100;
752 folly::doNotOptimizeAway(globalAHM->find(key) == globalAHM->end());
758 BENCHMARK(st_ahm_miss, iters) {
759 CHECK_LE(iters, FLAGS_numBMElements);
760 for (size_t i = 0; i < iters; i++) {
761 KeyT key = randomizeKey(i + iters * 100);
762 folly::doNotOptimizeAway(globalAHM->find(key) == globalAHM->end());
766 BENCHMARK(mt_ahm_find_insert_mix, iters) {
767 CHECK_LE(iters, FLAGS_numBMElements);
768 numOpsPerThread = iters / FLAGS_numThreads;
769 runThreads([](void* jj) -> void* {
770 int64_t j = (int64_t) jj;
771 while (!runThreadsCreatedAllThreads.load());
772 for (int i = 0; i < numOpsPerThread; ++i) {
773 if (i % 128) { // ~1% insert mix
774 KeyT key = randomizeKey(i + j * numOpsPerThread);
775 folly::doNotOptimizeAway(globalAHM->find(key)->second);
777 KeyT key = randomizeKey(i + j * numOpsPerThread * 100);
778 globalAHM->insert(key, genVal(key));
785 BENCHMARK(mt_aha_find, iters) {
786 CHECK_LE(iters, FLAGS_numBMElements);
787 numOpsPerThread = iters / FLAGS_numThreads;
788 runThreads([](void* jj) -> void* {
789 int64_t j = (int64_t) jj;
790 while (!runThreadsCreatedAllThreads.load());
791 for (int i = 0; i < numOpsPerThread; ++i) {
792 KeyT key = randomizeKey(i + j * numOpsPerThread);
793 folly::doNotOptimizeAway(globalAHA->find(key)->second);
799 BENCHMARK(mt_ahm_find, iters) {
800 CHECK_LE(iters, FLAGS_numBMElements);
801 numOpsPerThread = iters / FLAGS_numThreads;
802 runThreads([](void* jj) -> void* {
803 int64_t j = (int64_t) jj;
804 while (!runThreadsCreatedAllThreads.load());
805 for (int i = 0; i < numOpsPerThread; ++i) {
806 KeyT key = randomizeKey(i + j * numOpsPerThread);
807 folly::doNotOptimizeAway(globalAHM->find(key)->second);
814 BENCHMARK(st_baseline_modulus_and_random, iters) {
815 for (size_t i = 0; i < iters; ++i) {
816 k = randomizeKey(i) % iters;
820 // insertions go last because they reset the map
822 BENCHMARK(mt_ahm_insert, iters) {
824 globalAHM.reset(new AHMapT(int(iters * LF), config));
825 numOpsPerThread = iters / FLAGS_numThreads;
827 runThreads(insertThread);
830 BENCHMARK(st_ahm_insert, iters) {
831 folly::BenchmarkSuspender susp;
832 std::unique_ptr<AHMapT> ahm(new AHMapT(int(iters * LF), config));
835 for (size_t i = 0; i < iters; i++) {
836 KeyT key = randomizeKey(i);
837 ahm->insert(key, genVal(key));
841 void benchmarkSetup() {
842 config.maxLoadFactor = FLAGS_maxLoadFactor;
843 configRace.maxLoadFactor = 0.5;
844 int numCores = sysconf(_SC_NPROCESSORS_ONLN);
847 string numIters = folly::to<string>(
848 std::min(1000000, int(FLAGS_numBMElements)));
850 gflags::SetCommandLineOptionWithMode(
851 "bm_max_iters", numIters.c_str(), gflags::SET_FLAG_IF_DEFAULT
853 gflags::SetCommandLineOptionWithMode(
854 "bm_min_iters", numIters.c_str(), gflags::SET_FLAG_IF_DEFAULT
856 string numCoresStr = folly::to<string>(numCores);
857 gflags::SetCommandLineOptionWithMode(
858 "numThreads", numCoresStr.c_str(), gflags::SET_FLAG_IF_DEFAULT
861 std::cout << "\nRunning AHM benchmarks on machine with " << numCores
862 << " logical cores.\n"
863 " num elements per map: " << FLAGS_numBMElements << "\n"
864 << " num threads for mt tests: " << FLAGS_numThreads << "\n"
865 << " AHM load factor: " << FLAGS_targetLoadFactor << "\n\n";
868 int main(int argc, char** argv) {
869 testing::InitGoogleTest(&argc, argv);
870 gflags::ParseCommandLineFlags(&argc, &argv, true);
871 auto ret = RUN_ALL_TESTS();
872 if (!ret && FLAGS_benchmark) {
874 folly::runBenchmarks();
880 Benchmarks run on dual Xeon X5650's @ 2.67GHz w/hyperthreading enabled
881 (12 physical cores, 12 MB cache, 72 GB RAM)
883 Running AHM benchmarks on machine with 24 logical cores.
884 num elements per map: 12000000
885 num threads for mt tests: 24
886 AHM load factor: 0.75
888 Benchmark Iters Total t t/iter iter/sec
889 ------------------------------------------------------------------------------
890 Comparing benchmarks: BM_mt_aha_find,BM_mt_ahm_find
891 * BM_mt_aha_find 1000000 7.767 ms 7.767 ns 122.8 M
892 +0.81% BM_mt_ahm_find 1000000 7.83 ms 7.83 ns 121.8 M
893 ------------------------------------------------------------------------------
894 Comparing benchmarks: BM_st_aha_find,BM_st_ahm_find
895 * BM_st_aha_find 1000000 57.83 ms 57.83 ns 16.49 M
896 +77.9% BM_st_ahm_find 1000000 102.9 ms 102.9 ns 9.27 M
897 ------------------------------------------------------------------------------
898 BM_mt_ahm_miss 1000000 2.937 ms 2.937 ns 324.7 M
899 BM_st_ahm_miss 1000000 164.2 ms 164.2 ns 5.807 M
900 BM_mt_ahm_find_insert_mix 1000000 8.797 ms 8.797 ns 108.4 M
901 BM_mt_ahm_insert 1000000 17.39 ms 17.39 ns 54.83 M
902 BM_st_ahm_insert 1000000 106.8 ms 106.8 ns 8.93 M
903 BM_st_baseline_modulus_and_rando 1000000 6.223 ms 6.223 ns 153.2 M