2 * Copyright 2016 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include <folly/AtomicHashMap.h>
19 #include <glog/logging.h>
20 #include <gtest/gtest.h>
25 #include <folly/Assume.h>
26 #include <folly/Benchmark.h>
27 #include <folly/Conv.h>
28 #include <folly/portability/Atomic.h>
29 #include <folly/portability/SysTime.h>
33 using folly::AtomicHashMap;
34 using folly::AtomicHashArray;
35 using folly::StringPiece;
38 DEFINE_double(targetLoadFactor, 0.75, "Target memory utilization fraction.");
39 DEFINE_double(maxLoadFactor, 0.80, "Max before growth.");
40 DEFINE_int32(numThreads, 8, "Threads to use for concurrency tests.");
41 DEFINE_int64(numBMElements, 12 * 1000 * 1000, "Size of maps for benchmarks.");
43 const double LF = FLAGS_maxLoadFactor / FLAGS_targetLoadFactor;
44 const int maxBMElements = int(FLAGS_numBMElements * LF); // hit our target LF.
46 static int64_t nowInUsec() {
49 return int64_t(tv.tv_sec) * 1000 * 1000 + tv.tv_usec;
52 TEST(Ahm, BasicStrings) {
53 typedef AtomicHashMap<int64_t,string> AHM;
55 EXPECT_TRUE(myMap.begin() == myMap.end());
57 for (int i = 0; i < 100; ++i) {
58 myMap.insert(make_pair(i, folly::to<string>(i)));
60 for (int i = 0; i < 100; ++i) {
61 EXPECT_EQ(myMap.find(i)->second, folly::to<string>(i));
64 myMap.insert(std::make_pair(999, "A"));
65 myMap.insert(std::make_pair(999, "B"));
66 EXPECT_EQ(myMap.find(999)->second, "A"); // shouldn't have overwritten
67 myMap.find(999)->second = "B";
68 myMap.find(999)->second = "C";
69 EXPECT_EQ(myMap.find(999)->second, "C");
70 EXPECT_EQ(myMap.find(999)->first, 999);
74 TEST(Ahm, BasicNoncopyable) {
75 typedef AtomicHashMap<int64_t,std::unique_ptr<int>> AHM;
77 EXPECT_TRUE(myMap.begin() == myMap.end());
79 for (int i = 0; i < 50; ++i) {
80 myMap.insert(make_pair(i, std::unique_ptr<int>(new int(i))));
82 for (int i = 50; i < 100; ++i) {
83 myMap.insert(i, std::unique_ptr<int>(new int (i)));
85 for (int i = 100; i < 150; ++i) {
86 myMap.emplace(i, new int (i));
88 for (int i = 150; i < 200; ++i) {
89 myMap.emplace(i, new int (i), std::default_delete<int>());
91 for (int i = 0; i < 200; ++i) {
92 EXPECT_EQ(*(myMap.find(i)->second), i);
94 for (int i = 0; i < 200; i+=4) {
97 for (int i = 0; i < 200; i+=4) {
98 EXPECT_EQ(myMap.find(i), myMap.end());
102 typedef int32_t KeyT;
103 typedef int32_t ValueT;
105 typedef AtomicHashMap<KeyT,ValueT> AHMapT;
106 typedef AHMapT::value_type RecordT;
107 typedef AtomicHashArray<KeyT,ValueT> AHArrayT;
108 AHArrayT::Config config;
109 typedef folly::QuadraticProbingAtomicHashMap<KeyT,ValueT> QPAHMapT;
110 QPAHMapT::Config qpConfig;
111 static AHArrayT::SmartPtr globalAHA(nullptr);
112 static std::unique_ptr<AHMapT> globalAHM;
113 static std::unique_ptr<QPAHMapT> globalQPAHM;
115 // Generate a deterministic value based on an input key
116 static int genVal(int key) {
120 static bool legalKey(const char* a);
123 bool operator()(const char* a, const char* b) {
124 return legalKey(a) && (strcmp(a, b) == 0);
126 bool operator()(const char* a, const char& b) {
127 return legalKey(a) && (a[0] != '\0') && (a[0] == b);
129 bool operator()(const char* a, const StringPiece b) {
130 return legalKey(a) &&
131 (strlen(a) == b.size()) && (strcmp(a, b.begin()) == 0);
136 size_t operator()(const char* a) {
138 while (a[0] != 0) result += static_cast<size_t>(*(a++));
141 size_t operator()(const char& a) {
142 return static_cast<size_t>(a);
144 size_t operator()(const StringPiece a) {
146 for (const auto& ch : a) result += static_cast<size_t>(ch);
151 typedef AtomicHashMap<const char*, int64_t, HashTraits, EqTraits> AHMCstrInt;
152 AHMCstrInt::Config cstrIntCfg;
154 static bool legalKey(const char* a) {
155 return a != cstrIntCfg.emptyKey &&
156 a != cstrIntCfg.lockedKey &&
157 a != cstrIntCfg.erasedKey;
160 TEST(Ahm, BasicLookup) {
161 AHMCstrInt myMap(1024, cstrIntCfg);
162 EXPECT_TRUE(myMap.begin() == myMap.end());
163 myMap.insert(std::make_pair("f", 42));
164 EXPECT_EQ(42, myMap.find("f")->second);
166 // Look up a single char, successfully.
167 auto it = myMap.find<char>('f');
168 EXPECT_EQ(42, it->second);
171 // Look up a single char, unsuccessfully.
172 auto it = myMap.find<char>('g');
173 EXPECT_TRUE(it == myMap.end());
176 // Look up a string piece, successfully.
177 const StringPiece piece("f");
178 auto it = myMap.find(piece);
179 EXPECT_EQ(42, it->second);
184 VLOG(1) << "Overhead: " << sizeof(AHArrayT) << " (array) " <<
185 sizeof(AHMapT) + sizeof(AHArrayT) << " (map/set) Bytes.";
186 uint64_t numEntries = 10000;
187 float sizeFactor = 0.46f;
189 std::unique_ptr<AHMapT> m(new AHMapT(int(numEntries * sizeFactor), config));
191 // load map - make sure we succeed and the index is accurate
193 for (uint64_t i = 0; i < numEntries; i++) {
194 auto ret = m->insert(RecordT(i, genVal(i)));
195 success &= ret.second;
196 success &= (m->findAt(ret.first.getIndex())->second == genVal(i));
198 // Overwrite vals to make sure there are no dups
199 // Every insert should fail because the keys are already in the map.
201 for (uint64_t i = 0; i < numEntries; i++) {
202 auto ret = m->insert(RecordT(i, genVal(i * 2)));
203 success &= (ret.second == false); // fail on collision
204 success &= (ret.first->second == genVal(i)); // return the previous value
205 success &= (m->findAt(ret.first.getIndex())->second == genVal(i));
207 EXPECT_TRUE(success);
210 EXPECT_GT(m->numSubMaps(), 1); // make sure we grew
212 EXPECT_EQ(m->size(), numEntries);
213 for (size_t i = 0; i < numEntries; i++) {
214 success &= (m->find(i)->second == genVal(i));
216 EXPECT_TRUE(success);
220 AHMapT::const_iterator retIt;
221 for (int32_t i = 0; i < int32_t(numEntries); i++) {
223 retIt = m->findAt(retIt.getIndex());
224 success &= (retIt->second == genVal(i));
225 // We use a uint32_t index so that this comparison is between two
226 // variables of the same type.
227 success &= (retIt->first == i);
229 EXPECT_TRUE(success);
231 // Try modifying value
232 m->find(8)->second = 5309;
233 EXPECT_EQ(m->find(8)->second, 5309);
238 for (uint64_t i = 0; i < numEntries / 2; i++) {
239 success &= m->insert(RecordT(i, genVal(i))).second;
241 EXPECT_TRUE(success);
242 EXPECT_EQ(m->size(), numEntries / 2);
245 TEST(Ahm, iterator) {
246 int numEntries = 10000;
247 float sizeFactor = .46f;
248 std::unique_ptr<AHMapT> m(new AHMapT(int(numEntries * sizeFactor), config));
250 // load map - make sure we succeed and the index is accurate
251 for (int i = 0; i < numEntries; i++) {
252 m->insert(RecordT(i, genVal(i)));
258 success &= (it->second == genVal(it->first));
261 EXPECT_TRUE(success);
262 EXPECT_EQ(count, numEntries);
267 // Note: Unfortunately can't currently put a std::atomic<int64_t> in
268 // the value in ahm since it doesn't support types that are both non-copy
269 // and non-move constructible yet.
270 AtomicHashMap<int64_t,int64_t> ahm;
273 explicit Counters(size_t numCounters) : ahm(numCounters) {}
275 void increment(int64_t obj_id) {
276 auto ret = ahm.insert(std::make_pair(obj_id, 1));
278 // obj_id already exists, increment count
279 __sync_fetch_and_add(&ret.first->second, 1);
283 int64_t getValue(int64_t obj_id) {
284 auto ret = ahm.find(obj_id);
285 return ret != ahm.end() ? ret->second : 0;
288 // export the counters without blocking increments
291 ret.reserve(ahm.size() * 32);
292 for (const auto& e : ahm) {
293 ret += folly::to<string>(
294 " [", e.first, ":", e.second, "]\n");
301 // If you get an error "terminate called without an active exception", there
302 // might be too many threads getting created - decrease numKeys and/or mult.
304 const int numKeys = 10;
307 vector<int64_t> keys;
308 FOR_EACH_RANGE(i, 1, numKeys) {
311 vector<std::thread> threads;
312 for (auto key : keys) {
313 FOR_EACH_RANGE(i, 0, key * mult) {
314 threads.push_back(std::thread([&, key] { c.increment(key); }));
317 for (auto& t : threads) {
320 string str = c.toString();
321 for (auto key : keys) {
322 int val = key * mult;
323 EXPECT_EQ(val, c.getValue(key));
324 EXPECT_NE(string::npos, str.find(folly::to<string>("[",key,":",val,"]")));
331 explicit Integer(KeyT v = 0) : v_(v) {}
333 Integer& operator=(const Integer& a) {
334 static bool throwException_ = false;
335 throwException_ = !throwException_;
336 if (throwException_) {
343 bool operator==(const Integer& a) const { return v_ == a.v_; }
349 TEST(Ahm, map_exception_safety) {
350 typedef AtomicHashMap<KeyT,Integer> MyMapT;
352 int numEntries = 10000;
353 float sizeFactor = 0.46f;
354 std::unique_ptr<MyMapT> m(new MyMapT(int(numEntries * sizeFactor)));
358 for (int i = 0; i < numEntries; i++) {
360 m->insert(i, Integer(genVal(i)));
361 success &= (m->find(i)->second == Integer(genVal(i)));
364 success &= !m->count(i);
367 EXPECT_EQ(count, m->size());
368 EXPECT_TRUE(success);
371 TEST(Ahm, basicErase) {
372 size_t numEntries = 3000;
374 std::unique_ptr<AHMapT> s(new AHMapT(numEntries, config));
375 // Iterate filling up the map and deleting all keys a few times
376 // to test more than one subMap.
377 for (int iterations = 0; iterations < 4; ++iterations) {
378 // Testing insertion of keys
380 for (size_t i = 0; i < numEntries; ++i) {
381 success &= !(s->count(i));
382 auto ret = s->insert(RecordT(i, i));
383 success &= s->count(i);
384 success &= ret.second;
386 EXPECT_TRUE(success);
387 EXPECT_EQ(s->size(), numEntries);
389 // Delete every key in the map and verify that the key is gone and the the
392 for (size_t i = 0; i < numEntries; ++i) {
393 success &= s->erase(i);
394 success &= (s->size() == numEntries - 1 - i);
395 success &= !(s->count(i));
396 success &= !(s->erase(i));
398 EXPECT_TRUE(success);
400 VLOG(1) << "Final number of subMaps = " << s->numSubMaps();
405 inline KeyT randomizeKey(int key) {
406 // We deterministically randomize the key to more accurately simulate
407 // real-world usage, and to avoid pathalogical performance patterns (e.g.
408 // those related to __gnu_cxx::hash<int64_t>()(1) == 1).
410 // Use a hash function we don't normally use for ints to avoid interactions.
411 return folly::hash::jenkins_rev_mix32(key);
414 int numOpsPerThread = 0;
416 void* insertThread(void* jj) {
417 int64_t j = (int64_t) jj;
418 for (int i = 0; i < numOpsPerThread; ++i) {
419 KeyT key = randomizeKey(i + j * numOpsPerThread);
420 globalAHM->insert(key, genVal(key));
425 void* qpInsertThread(void* jj) {
426 int64_t j = (int64_t) jj;
427 for (int i = 0; i < numOpsPerThread; ++i) {
428 KeyT key = randomizeKey(i + j * numOpsPerThread);
429 globalQPAHM->insert(key, genVal(key));
434 void* insertThreadArr(void* jj) {
435 int64_t j = (int64_t) jj;
436 for (int i = 0; i < numOpsPerThread; ++i) {
437 KeyT key = randomizeKey(i + j * numOpsPerThread);
438 globalAHA->insert(std::make_pair(key, genVal(key)));
443 std::atomic<bool> runThreadsCreatedAllThreads;
444 void runThreads(void *(*mainFunc)(void*), int numThreads, void **statuses) {
445 folly::BenchmarkSuspender susp;
446 runThreadsCreatedAllThreads.store(false);
447 vector<std::thread> threads;
448 for (int64_t j = 0; j < numThreads; j++) {
449 threads.emplace_back([statuses, mainFunc, j]() {
450 auto ret = mainFunc((void*)j);
451 if (statuses != nullptr) {
458 runThreadsCreatedAllThreads.store(true);
459 for (size_t i = 0; i < threads.size(); ++i) {
464 void runThreads(void *(*mainFunc)(void*)) {
465 runThreads(mainFunc, FLAGS_numThreads, nullptr);
470 TEST(Ahm, collision_test) {
471 const int numInserts = 1000000 / 4;
473 // Doing the same number on each thread so we collide.
474 numOpsPerThread = numInserts;
476 float sizeFactor = 0.46f;
477 int entrySize = sizeof(KeyT) + sizeof(ValueT);
478 VLOG(1) << "Testing " << numInserts << " unique " << entrySize <<
479 " Byte entries replicated in " << FLAGS_numThreads <<
480 " threads with " << FLAGS_maxLoadFactor * 100.0 << "% max load factor.";
482 globalAHM.reset(new AHMapT(int(numInserts * sizeFactor), config));
484 size_t sizeInit = globalAHM->capacity();
485 VLOG(1) << " Initial capacity: " << sizeInit;
487 double start = nowInUsec();
488 runThreads([](void*) -> void* { // collisionInsertThread
489 for (int i = 0; i < numOpsPerThread; ++i) {
490 KeyT key = randomizeKey(i);
491 globalAHM->insert(key, genVal(key));
495 double elapsed = nowInUsec() - start;
497 size_t finalCap = globalAHM->capacity();
498 size_t sizeAHM = globalAHM->size();
499 VLOG(1) << elapsed/sizeAHM << " usec per " << FLAGS_numThreads <<
500 " duplicate inserts (atomic).";
501 VLOG(1) << " Final capacity: " << finalCap << " in " <<
502 globalAHM->numSubMaps() << " sub maps (" <<
503 sizeAHM * 100 / finalCap << "% load factor, " <<
504 (finalCap - sizeInit) * 100 / sizeInit << "% growth).";
507 EXPECT_EQ(sizeAHM, numInserts);
509 for (int i = 0; i < numInserts; ++i) {
510 KeyT key = randomizeKey(i);
511 success &= (globalAHM->find(key)->second == genVal(key));
513 EXPECT_TRUE(success);
515 // check colliding finds
517 runThreads([](void*) -> void* { // collisionFindThread
519 for (int i = 0; i < numOpsPerThread; ++i) {
520 globalAHM->find(key);
525 elapsed = nowInUsec() - start;
527 VLOG(1) << elapsed/sizeAHM << " usec per " << FLAGS_numThreads <<
528 " duplicate finds (atomic).";
533 const int kInsertPerThread = 100000;
534 int raceFinalSizeEstimate;
536 void* raceIterateThread(void*) {
539 AHMapT::iterator it = globalAHM->begin();
540 AHMapT::iterator end = globalAHM->end();
541 for (; it != end; ++it) {
543 if (count > raceFinalSizeEstimate) {
544 EXPECT_FALSE("Infinite loop in iterator.");
551 void* raceInsertRandomThread(void*) {
552 for (int i = 0; i < kInsertPerThread; ++i) {
554 globalAHM->insert(key, genVal(key));
561 // Test for race conditions when inserting and iterating at the same time and
562 // creating multiple submaps.
563 TEST(Ahm, race_insert_iterate_thread_test) {
564 const int kInsertThreads = 20;
565 const int kIterateThreads = 20;
566 raceFinalSizeEstimate = kInsertThreads * kInsertPerThread;
568 VLOG(1) << "Testing iteration and insertion with " << kInsertThreads
569 << " threads inserting and " << kIterateThreads << " threads iterating.";
571 globalAHM.reset(new AHMapT(raceFinalSizeEstimate / 9, config));
573 vector<pthread_t> threadIds;
574 for (int j = 0; j < kInsertThreads + kIterateThreads; j++) {
576 void *(*thread)(void*) =
577 (j < kInsertThreads ? raceInsertRandomThread : raceIterateThread);
578 if (pthread_create(&tid, nullptr, thread, nullptr) != 0) {
579 LOG(ERROR) << "Could not start thread";
581 threadIds.push_back(tid);
584 for (size_t i = 0; i < threadIds.size(); ++i) {
585 pthread_join(threadIds[i], nullptr);
587 VLOG(1) << "Ended up with " << globalAHM->numSubMaps() << " submaps";
588 VLOG(1) << "Final size of map " << globalAHM->size();
593 const int kTestEraseInsertions = 200000;
594 std::atomic<int32_t> insertedLevel;
596 void* testEraseInsertThread(void*) {
597 for (int i = 0; i < kTestEraseInsertions; ++i) {
598 KeyT key = randomizeKey(i);
599 globalAHM->insert(key, genVal(key));
600 insertedLevel.store(i, std::memory_order_release);
602 insertedLevel.store(kTestEraseInsertions, std::memory_order_release);
606 void* testEraseEraseThread(void*) {
607 for (int i = 0; i < kTestEraseInsertions; ++i) {
609 * Make sure that we don't get ahead of the insert thread, because
610 * part of the condition for this unit test succeeding is that the
613 * Note, there is a subtle case here when a new submap is
614 * allocated: the erasing thread might get 0 from count(key)
615 * because it hasn't seen numSubMaps_ update yet. To avoid this
616 * race causing problems for the test (it's ok for real usage), we
617 * lag behind the inserter by more than just element.
622 currentLevel = insertedLevel.load(std::memory_order_acquire);
623 if (currentLevel == kTestEraseInsertions) currentLevel += lag + 1;
624 } while (currentLevel - lag < i);
626 KeyT key = randomizeKey(i);
627 while (globalAHM->count(key)) {
628 if (globalAHM->erase(key)) {
638 // Here we have a single thread inserting some values, and several threads
639 // racing to delete the values in the order they were inserted.
640 TEST(Ahm, thread_erase_insert_race) {
641 const int kInsertThreads = 1;
642 const int kEraseThreads = 10;
644 VLOG(1) << "Testing insertion and erase with " << kInsertThreads
645 << " thread inserting and " << kEraseThreads << " threads erasing.";
647 globalAHM.reset(new AHMapT(kTestEraseInsertions / 4, config));
649 vector<pthread_t> threadIds;
650 for (int64_t j = 0; j < kInsertThreads + kEraseThreads; j++) {
652 void *(*thread)(void*) =
653 (j < kInsertThreads ? testEraseInsertThread : testEraseEraseThread);
654 if (pthread_create(&tid, nullptr, thread, (void*) j) != 0) {
655 LOG(ERROR) << "Could not start thread";
657 threadIds.push_back(tid);
660 for (size_t i = 0; i < threadIds.size(); i++) {
661 pthread_join(threadIds[i], nullptr);
664 EXPECT_TRUE(globalAHM->empty());
665 EXPECT_EQ(globalAHM->size(), 0);
667 VLOG(1) << "Ended up with " << globalAHM->numSubMaps() << " submaps";
670 // Repro for T#483734: Duplicate AHM inserts due to incorrect AHA return value.
671 typedef AtomicHashArray<int32_t, int32_t> AHA;
672 AHA::Config configRace;
673 auto atomicHashArrayInsertRaceArray = AHA::create(2, configRace);
674 void* atomicHashArrayInsertRaceThread(void* /* j */) {
675 AHA* arr = atomicHashArrayInsertRaceArray.get();
676 uintptr_t numInserted = 0;
677 while (!runThreadsCreatedAllThreads.load());
678 for (int i = 0; i < 2; i++) {
679 if (arr->insert(RecordT(randomizeKey(i), 0)).first != arr->end()) {
683 return (void*)numInserted;
685 TEST(Ahm, atomic_hash_array_insert_race) {
686 AHA* arr = atomicHashArrayInsertRaceArray.get();
687 int numIterations = 5000;
688 constexpr int numThreads = 4;
689 void* statuses[numThreads];
690 for (int i = 0; i < numIterations; i++) {
692 runThreads(atomicHashArrayInsertRaceThread, numThreads, statuses);
693 EXPECT_GE(arr->size(), 1);
694 for (int j = 0; j < numThreads; j++) {
695 EXPECT_EQ(arr->size(), uintptr_t(statuses[j]));
700 // Repro for T#5841499. Race between erase() and find() on the same key.
701 TEST(Ahm, erase_find_race) {
702 const uint64_t limit = 10000;
703 AtomicHashMap<uint64_t, uint64_t> map(limit + 10);
704 std::atomic<uint64_t> key {1};
706 // Invariant: all values are equal to their keys.
707 // At any moment there is one or two consecutive keys in the map.
709 std::thread write_thread([&]() {
715 map.insert(k + 1, k + 1);
720 std::thread read_thread([&]() {
722 uint64_t k = key.load();
727 auto it = map.find(k);
728 if (it != map.end()) {
729 ASSERT_EQ(k, it->second);
738 // Erase right after insert race bug repro (t9130653)
739 TEST(Ahm, erase_after_insert_race) {
740 const uint64_t limit = 10000;
741 const size_t num_threads = 100;
742 const size_t num_iters = 500;
743 AtomicHashMap<uint64_t, uint64_t> map(limit + 10);
745 std::atomic<bool> go{false};
746 std::vector<std::thread> ts;
747 for (size_t i = 0; i < num_threads; ++i) {
748 ts.emplace_back([&]() {
752 for (size_t n = 0; n < num_iters; ++n) {
766 // Repro for a bug when iterator didn't skip empty submaps.
767 TEST(Ahm, iterator_skips_empty_submaps) {
768 AtomicHashMap<uint64_t, uint64_t>::Config config;
769 config.growthFactor = 1;
771 AtomicHashMap<uint64_t, uint64_t> map(1, config);
779 auto it = map.find(1);
781 ASSERT_NE(map.end(), it);
782 ASSERT_EQ(1, it->first);
783 ASSERT_EQ(1, it->second);
787 ASSERT_NE(map.end(), it);
788 ASSERT_EQ(3, it->first);
789 ASSERT_EQ(3, it->second);
792 ASSERT_EQ(map.end(), it);
797 void loadGlobalAha() {
798 std::cout << "loading global AHA with " << FLAGS_numThreads
800 uint64_t start = nowInUsec();
801 globalAHA = AHArrayT::create(maxBMElements, config);
802 numOpsPerThread = FLAGS_numBMElements / FLAGS_numThreads;
803 CHECK_EQ(0, FLAGS_numBMElements % FLAGS_numThreads) <<
804 "kNumThreads must evenly divide kNumInserts.";
805 runThreads(insertThreadArr);
806 uint64_t elapsed = nowInUsec() - start;
807 std::cout << " took " << elapsed / 1000 << " ms (" <<
808 (elapsed * 1000 / FLAGS_numBMElements) << " ns/insert).\n";
809 EXPECT_EQ(globalAHA->size(), FLAGS_numBMElements);
812 void loadGlobalAhm() {
813 std::cout << "loading global AHM with " << FLAGS_numThreads
815 uint64_t start = nowInUsec();
816 globalAHM.reset(new AHMapT(maxBMElements, config));
817 numOpsPerThread = FLAGS_numBMElements / FLAGS_numThreads;
818 runThreads(insertThread);
819 uint64_t elapsed = nowInUsec() - start;
820 std::cout << " took " << elapsed / 1000 << " ms (" <<
821 (elapsed * 1000 / FLAGS_numBMElements) << " ns/insert).\n";
822 EXPECT_EQ(globalAHM->size(), FLAGS_numBMElements);
825 void loadGlobalQPAhm() {
826 std::cout << "loading global QPAHM with " << FLAGS_numThreads
828 uint64_t start = nowInUsec();
829 globalQPAHM.reset(new QPAHMapT(maxBMElements, qpConfig));
830 numOpsPerThread = FLAGS_numBMElements / FLAGS_numThreads;
831 runThreads(qpInsertThread);
832 uint64_t elapsed = nowInUsec() - start;
833 std::cout << " took " << elapsed / 1000 << " ms (" <<
834 (elapsed * 1000 / FLAGS_numBMElements) << " ns/insert).\n";
835 EXPECT_EQ(globalQPAHM->size(), FLAGS_numBMElements);
840 BENCHMARK(st_aha_find, iters) {
841 CHECK_LE(iters, FLAGS_numBMElements);
842 for (size_t i = 0; i < iters; i++) {
843 KeyT key = randomizeKey(i);
844 folly::doNotOptimizeAway(globalAHA->find(key)->second);
848 BENCHMARK(st_ahm_find, iters) {
849 CHECK_LE(iters, FLAGS_numBMElements);
850 for (size_t i = 0; i < iters; i++) {
851 KeyT key = randomizeKey(i);
852 folly::doNotOptimizeAway(globalAHM->find(key)->second);
856 BENCHMARK(st_qpahm_find, iters) {
857 CHECK_LE(iters, FLAGS_numBMElements);
858 for (size_t i = 0; i < iters; i++) {
859 KeyT key = randomizeKey(i);
860 folly::doNotOptimizeAway(globalQPAHM->find(key)->second);
864 BENCHMARK_DRAW_LINE()
866 BENCHMARK(mt_ahm_miss, iters) {
867 CHECK_LE(iters, FLAGS_numBMElements);
868 numOpsPerThread = iters / FLAGS_numThreads;
869 runThreads([](void* jj) -> void* {
870 int64_t j = (int64_t) jj;
871 while (!runThreadsCreatedAllThreads.load());
872 for (int i = 0; i < numOpsPerThread; ++i) {
873 KeyT key = i + j * numOpsPerThread * 100;
874 folly::doNotOptimizeAway(globalAHM->find(key) == globalAHM->end());
880 BENCHMARK(mt_qpahm_miss, iters) {
881 CHECK_LE(iters, FLAGS_numBMElements);
882 numOpsPerThread = iters / FLAGS_numThreads;
883 runThreads([](void* jj) -> void* {
884 int64_t j = (int64_t) jj;
885 while (!runThreadsCreatedAllThreads.load());
886 for (int i = 0; i < numOpsPerThread; ++i) {
887 KeyT key = i + j * numOpsPerThread * 100;
888 folly::doNotOptimizeAway(globalQPAHM->find(key) == globalQPAHM->end());
894 BENCHMARK(st_ahm_miss, iters) {
895 CHECK_LE(iters, FLAGS_numBMElements);
896 for (size_t i = 0; i < iters; i++) {
897 KeyT key = randomizeKey(i + iters * 100);
898 folly::doNotOptimizeAway(globalAHM->find(key) == globalAHM->end());
902 BENCHMARK(st_qpahm_miss, iters) {
903 CHECK_LE(iters, FLAGS_numBMElements);
904 for (size_t i = 0; i < iters; i++) {
905 KeyT key = randomizeKey(i + iters * 100);
906 folly::doNotOptimizeAway(globalQPAHM->find(key) == globalQPAHM->end());
910 BENCHMARK(mt_ahm_find_insert_mix, iters) {
911 CHECK_LE(iters, FLAGS_numBMElements);
912 numOpsPerThread = iters / FLAGS_numThreads;
913 runThreads([](void* jj) -> void* {
914 int64_t j = (int64_t) jj;
915 while (!runThreadsCreatedAllThreads.load());
916 for (int i = 0; i < numOpsPerThread; ++i) {
917 if (i % 128) { // ~1% insert mix
918 KeyT key = randomizeKey(i + j * numOpsPerThread);
919 folly::doNotOptimizeAway(globalAHM->find(key)->second);
921 KeyT key = randomizeKey(i + j * numOpsPerThread * 100);
922 globalAHM->insert(key, genVal(key));
930 BENCHMARK(mt_qpahm_find_insert_mix, iters) {
931 CHECK_LE(iters, FLAGS_numBMElements);
932 numOpsPerThread = iters / FLAGS_numThreads;
933 runThreads([](void* jj) -> void* {
934 int64_t j = (int64_t) jj;
935 while (!runThreadsCreatedAllThreads.load());
936 for (int i = 0; i < numOpsPerThread; ++i) {
937 if (i % 128) { // ~1% insert mix
938 KeyT key = randomizeKey(i + j * numOpsPerThread);
939 folly::doNotOptimizeAway(globalQPAHM->find(key)->second);
941 KeyT key = randomizeKey(i + j * numOpsPerThread * 100);
942 globalQPAHM->insert(key, genVal(key));
949 BENCHMARK(mt_aha_find, iters) {
950 CHECK_LE(iters, FLAGS_numBMElements);
951 numOpsPerThread = iters / FLAGS_numThreads;
952 runThreads([](void* jj) -> void* {
953 int64_t j = (int64_t) jj;
954 while (!runThreadsCreatedAllThreads.load());
955 for (int i = 0; i < numOpsPerThread; ++i) {
956 KeyT key = randomizeKey(i + j * numOpsPerThread);
957 folly::doNotOptimizeAway(globalAHA->find(key)->second);
963 BENCHMARK(mt_ahm_find, iters) {
964 CHECK_LE(iters, FLAGS_numBMElements);
965 numOpsPerThread = iters / FLAGS_numThreads;
966 runThreads([](void* jj) -> void* {
967 int64_t j = (int64_t) jj;
968 while (!runThreadsCreatedAllThreads.load());
969 for (int i = 0; i < numOpsPerThread; ++i) {
970 KeyT key = randomizeKey(i + j * numOpsPerThread);
971 folly::doNotOptimizeAway(globalAHM->find(key)->second);
977 BENCHMARK(mt_qpahm_find, iters) {
978 CHECK_LE(iters, FLAGS_numBMElements);
979 numOpsPerThread = iters / FLAGS_numThreads;
980 runThreads([](void* jj) -> void* {
981 int64_t j = (int64_t) jj;
982 while (!runThreadsCreatedAllThreads.load());
983 for (int i = 0; i < numOpsPerThread; ++i) {
984 KeyT key = randomizeKey(i + j * numOpsPerThread);
985 folly::doNotOptimizeAway(globalQPAHM->find(key)->second);
992 BENCHMARK(st_baseline_modulus_and_random, iters) {
993 for (size_t i = 0; i < iters; ++i) {
994 k = randomizeKey(i) % iters;
998 // insertions go last because they reset the map
1000 BENCHMARK(mt_ahm_insert, iters) {
1002 globalAHM.reset(new AHMapT(int(iters * LF), config));
1003 numOpsPerThread = iters / FLAGS_numThreads;
1005 runThreads(insertThread);
1008 BENCHMARK(mt_qpahm_insert, iters) {
1010 globalQPAHM.reset(new QPAHMapT(int(iters * LF), qpConfig));
1011 numOpsPerThread = iters / FLAGS_numThreads;
1013 runThreads(qpInsertThread);
1016 BENCHMARK(st_ahm_insert, iters) {
1017 folly::BenchmarkSuspender susp;
1018 std::unique_ptr<AHMapT> ahm(new AHMapT(int(iters * LF), config));
1021 for (size_t i = 0; i < iters; i++) {
1022 KeyT key = randomizeKey(i);
1023 ahm->insert(key, genVal(key));
1027 BENCHMARK(st_qpahm_insert, iters) {
1028 folly::BenchmarkSuspender susp;
1029 std::unique_ptr<QPAHMapT> ahm(new QPAHMapT(int(iters * LF), qpConfig));
1032 for (size_t i = 0; i < iters; i++) {
1033 KeyT key = randomizeKey(i);
1034 ahm->insert(key, genVal(key));
1038 void benchmarkSetup() {
1039 config.maxLoadFactor = FLAGS_maxLoadFactor;
1040 qpConfig.maxLoadFactor = FLAGS_maxLoadFactor;
1041 configRace.maxLoadFactor = 0.5;
1042 int numCores = sysconf(_SC_NPROCESSORS_ONLN);
1046 string numIters = folly::to<string>(
1047 std::min(1000000, int(FLAGS_numBMElements)));
1049 gflags::SetCommandLineOptionWithMode(
1050 "bm_max_iters", numIters.c_str(), gflags::SET_FLAG_IF_DEFAULT
1052 gflags::SetCommandLineOptionWithMode(
1053 "bm_min_iters", numIters.c_str(), gflags::SET_FLAG_IF_DEFAULT
1055 string numCoresStr = folly::to<string>(numCores);
1056 gflags::SetCommandLineOptionWithMode(
1057 "numThreads", numCoresStr.c_str(), gflags::SET_FLAG_IF_DEFAULT
1060 std::cout << "\nRunning AHM benchmarks on machine with " << numCores
1061 << " logical cores.\n"
1062 " num elements per map: " << FLAGS_numBMElements << "\n"
1063 << " num threads for mt tests: " << FLAGS_numThreads << "\n"
1064 << " AHM load factor: " << FLAGS_targetLoadFactor << "\n\n";
1067 int main(int argc, char** argv) {
1068 testing::InitGoogleTest(&argc, argv);
1069 gflags::ParseCommandLineFlags(&argc, &argv, true);
1070 auto ret = RUN_ALL_TESTS();
1071 if (!ret && FLAGS_benchmark) {
1073 folly::runBenchmarks();
1079 loading global AHA with 8 threads...
1080 took 487 ms (40 ns/insert).
1081 loading global AHM with 8 threads...
1082 took 478 ms (39 ns/insert).
1083 loading global QPAHM with 8 threads...
1084 took 478 ms (39 ns/insert).
1086 Running AHM benchmarks on machine with 24 logical cores.
1087 num elements per map: 12000000
1088 num threads for mt tests: 24
1089 AHM load factor: 0.75
1091 ============================================================================
1092 folly/test/AtomicHashMapTest.cpp relative time/iter iters/s
1093 ============================================================================
1094 st_aha_find 92.63ns 10.80M
1095 st_ahm_find 107.78ns 9.28M
1096 st_qpahm_find 90.69ns 11.03M
1097 ----------------------------------------------------------------------------
1098 mt_ahm_miss 2.09ns 477.36M
1099 mt_qpahm_miss 1.37ns 728.82M
1100 st_ahm_miss 241.07ns 4.15M
1101 st_qpahm_miss 223.17ns 4.48M
1102 mt_ahm_find_insert_mix 8.05ns 124.24M
1103 mt_qpahm_find_insert_mix 9.10ns 109.85M
1104 mt_aha_find 6.82ns 146.68M
1105 mt_ahm_find 7.95ns 125.77M
1106 mt_qpahm_find 6.81ns 146.83M
1107 st_baseline_modulus_and_random 6.02ns 166.03M
1108 mt_ahm_insert 14.29ns 69.97M
1109 mt_qpahm_insert 11.68ns 85.61M
1110 st_ahm_insert 125.39ns 7.98M
1111 st_qpahm_insert 128.76ns 7.77M
1112 ============================================================================