2 * Copyright 2017 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include <folly/AtomicHashMap.h>
23 #include <glog/logging.h>
25 #include <folly/Benchmark.h>
26 #include <folly/Conv.h>
27 #include <folly/portability/GTest.h>
28 #include <folly/portability/SysTime.h>
32 using folly::AtomicHashMap;
33 using folly::AtomicHashArray;
34 using folly::StringPiece;
37 DEFINE_double(targetLoadFactor, 0.75, "Target memory utilization fraction.");
38 DEFINE_double(maxLoadFactor, 0.80, "Max before growth.");
39 DEFINE_int32(numThreads, 8, "Threads to use for concurrency tests.");
40 DEFINE_int64(numBMElements, 12 * 1000 * 1000, "Size of maps for benchmarks.");
42 const double LF = FLAGS_maxLoadFactor / FLAGS_targetLoadFactor;
43 const int maxBMElements = int(FLAGS_numBMElements * LF); // hit our target LF.
45 static int64_t nowInUsec() {
47 gettimeofday(&tv, nullptr);
48 return int64_t(tv.tv_sec) * 1000 * 1000 + tv.tv_usec;
51 TEST(Ahm, BasicStrings) {
52 typedef AtomicHashMap<int64_t,string> AHM;
54 EXPECT_TRUE(myMap.begin() == myMap.end());
56 for (int i = 0; i < 100; ++i) {
57 myMap.insert(make_pair(i, folly::to<string>(i)));
59 for (int i = 0; i < 100; ++i) {
60 EXPECT_EQ(myMap.find(i)->second, folly::to<string>(i));
63 myMap.insert(std::make_pair(999, "A"));
64 myMap.insert(std::make_pair(999, "B"));
65 EXPECT_EQ(myMap.find(999)->second, "A"); // shouldn't have overwritten
66 myMap.find(999)->second = "B";
67 myMap.find(999)->second = "C";
68 EXPECT_EQ(myMap.find(999)->second, "C");
69 EXPECT_EQ(myMap.find(999)->first, 999);
73 TEST(Ahm, BasicNoncopyable) {
74 typedef AtomicHashMap<int64_t,std::unique_ptr<int>> AHM;
76 EXPECT_TRUE(myMap.begin() == myMap.end());
78 for (int i = 0; i < 50; ++i) {
79 myMap.insert(make_pair(i, std::make_unique<int>(i)));
81 for (int i = 50; i < 100; ++i) {
82 myMap.insert(i, std::make_unique<int>(i));
84 for (int i = 100; i < 150; ++i) {
85 myMap.emplace(i, new int (i));
87 for (int i = 150; i < 200; ++i) {
88 myMap.emplace(i, new int (i), std::default_delete<int>());
90 for (int i = 0; i < 200; ++i) {
91 EXPECT_EQ(*(myMap.find(i)->second), i);
93 for (int i = 0; i < 200; i+=4) {
96 for (int i = 0; i < 200; i+=4) {
97 EXPECT_EQ(myMap.find(i), myMap.end());
101 typedef int32_t KeyT;
102 typedef int32_t ValueT;
104 typedef AtomicHashMap<KeyT,ValueT> AHMapT;
105 typedef AHMapT::value_type RecordT;
106 typedef AtomicHashArray<KeyT,ValueT> AHArrayT;
107 AHArrayT::Config config;
108 typedef folly::QuadraticProbingAtomicHashMap<KeyT,ValueT> QPAHMapT;
109 QPAHMapT::Config qpConfig;
110 static AHArrayT::SmartPtr globalAHA(nullptr);
111 static std::unique_ptr<AHMapT> globalAHM;
112 static std::unique_ptr<QPAHMapT> globalQPAHM;
114 // Generate a deterministic value based on an input key
115 static int genVal(int key) {
119 static bool legalKey(const char* a);
122 bool operator()(const char* a, const char* b) {
123 return legalKey(a) && (strcmp(a, b) == 0);
125 bool operator()(const char* a, const char& b) {
126 return legalKey(a) && (a[0] != '\0') && (a[0] == b);
128 bool operator()(const char* a, const StringPiece b) {
129 return legalKey(a) &&
130 (strlen(a) == b.size()) && (strcmp(a, b.begin()) == 0);
135 size_t operator()(const char* a) {
138 result += static_cast<size_t>(*(a++));
142 size_t operator()(const char& a) {
143 return static_cast<size_t>(a);
145 size_t operator()(const StringPiece a) {
147 for (const auto& ch : a) {
148 result += static_cast<size_t>(ch);
154 typedef AtomicHashMap<const char*, int64_t, HashTraits, EqTraits> AHMCstrInt;
155 AHMCstrInt::Config cstrIntCfg;
157 static bool legalKey(const char* a) {
158 return a != cstrIntCfg.emptyKey &&
159 a != cstrIntCfg.lockedKey &&
160 a != cstrIntCfg.erasedKey;
163 TEST(Ahm, BasicLookup) {
164 AHMCstrInt myMap(1024, cstrIntCfg);
165 EXPECT_TRUE(myMap.begin() == myMap.end());
166 myMap.insert(std::make_pair("f", 42));
167 EXPECT_EQ(42, myMap.find("f")->second);
169 // Look up a single char, successfully.
170 auto it = myMap.find<char>('f');
171 EXPECT_EQ(42, it->second);
174 // Look up a single char, unsuccessfully.
175 auto it = myMap.find<char>('g');
176 EXPECT_TRUE(it == myMap.end());
179 // Look up a string piece, successfully.
180 const StringPiece piece("f");
181 auto it = myMap.find(piece);
182 EXPECT_EQ(42, it->second);
187 VLOG(1) << "Overhead: " << sizeof(AHArrayT) << " (array) " <<
188 sizeof(AHMapT) + sizeof(AHArrayT) << " (map/set) Bytes.";
189 uint64_t numEntries = 10000;
190 float sizeFactor = 0.46f;
192 std::unique_ptr<AHMapT> m(new AHMapT(int(numEntries * sizeFactor), config));
194 // load map - make sure we succeed and the index is accurate
196 for (uint64_t i = 0; i < numEntries; i++) {
197 auto ret = m->insert(RecordT(i, genVal(i)));
198 success &= ret.second;
199 success &= (m->findAt(ret.first.getIndex())->second == genVal(i));
201 // Overwrite vals to make sure there are no dups
202 // Every insert should fail because the keys are already in the map.
204 for (uint64_t i = 0; i < numEntries; i++) {
205 auto ret = m->insert(RecordT(i, genVal(i * 2)));
206 success &= (ret.second == false); // fail on collision
207 success &= (ret.first->second == genVal(i)); // return the previous value
208 success &= (m->findAt(ret.first.getIndex())->second == genVal(i));
210 EXPECT_TRUE(success);
213 EXPECT_GT(m->numSubMaps(), 1); // make sure we grew
215 EXPECT_EQ(m->size(), numEntries);
216 for (size_t i = 0; i < numEntries; i++) {
217 success &= (m->find(i)->second == genVal(i));
219 EXPECT_TRUE(success);
223 AHMapT::const_iterator retIt;
224 for (int32_t i = 0; i < int32_t(numEntries); i++) {
226 retIt = m->findAt(retIt.getIndex());
227 success &= (retIt->second == genVal(i));
228 // We use a uint32_t index so that this comparison is between two
229 // variables of the same type.
230 success &= (retIt->first == i);
232 EXPECT_TRUE(success);
234 // Try modifying value
235 m->find(8)->second = 5309;
236 EXPECT_EQ(m->find(8)->second, 5309);
241 for (uint64_t i = 0; i < numEntries / 2; i++) {
242 success &= m->insert(RecordT(i, genVal(i))).second;
244 EXPECT_TRUE(success);
245 EXPECT_EQ(m->size(), numEntries / 2);
248 TEST(Ahm, iterator) {
249 int numEntries = 10000;
250 float sizeFactor = .46f;
251 std::unique_ptr<AHMapT> m(new AHMapT(int(numEntries * sizeFactor), config));
253 // load map - make sure we succeed and the index is accurate
254 for (int i = 0; i < numEntries; i++) {
255 m->insert(RecordT(i, genVal(i)));
261 success &= (it->second == genVal(it->first));
264 EXPECT_TRUE(success);
265 EXPECT_EQ(count, numEntries);
270 // Note: Unfortunately can't currently put a std::atomic<int64_t> in
271 // the value in ahm since it doesn't support types that are both non-copy
272 // and non-move constructible yet.
273 AtomicHashMap<int64_t,int64_t> ahm;
276 explicit Counters(size_t numCounters) : ahm(numCounters) {}
278 void increment(int64_t obj_id) {
279 auto ret = ahm.insert(std::make_pair(obj_id, 1));
281 // obj_id already exists, increment count
282 __sync_fetch_and_add(&ret.first->second, 1);
286 int64_t getValue(int64_t obj_id) {
287 auto ret = ahm.find(obj_id);
288 return ret != ahm.end() ? ret->second : 0;
291 // export the counters without blocking increments
294 ret.reserve(ahm.size() * 32);
295 for (const auto& e : ahm) {
296 ret += folly::to<string>(
297 " [", e.first, ":", e.second, "]\n");
304 // If you get an error "terminate called without an active exception", there
305 // might be too many threads getting created - decrease numKeys and/or mult.
307 const int numKeys = 10;
310 vector<int64_t> keys;
311 FOR_EACH_RANGE(i, 1, numKeys) {
314 vector<std::thread> threads;
315 for (auto key : keys) {
316 FOR_EACH_RANGE(i, 0, key * mult) {
317 threads.push_back(std::thread([&, key] { c.increment(key); }));
320 for (auto& t : threads) {
323 string str = c.toString();
324 for (auto key : keys) {
325 int val = key * mult;
326 EXPECT_EQ(val, c.getValue(key));
327 EXPECT_NE(string::npos, str.find(folly::to<string>("[",key,":",val,"]")));
334 explicit Integer(KeyT v = 0) : v_(v) {}
336 Integer& operator=(const Integer& a) {
337 static bool throwException_ = false;
338 throwException_ = !throwException_;
339 if (throwException_) {
346 bool operator==(const Integer& a) const { return v_ == a.v_; }
352 TEST(Ahm, map_exception_safety) {
353 typedef AtomicHashMap<KeyT,Integer> MyMapT;
355 int numEntries = 10000;
356 float sizeFactor = 0.46f;
357 std::unique_ptr<MyMapT> m(new MyMapT(int(numEntries * sizeFactor)));
361 for (int i = 0; i < numEntries; i++) {
363 m->insert(i, Integer(genVal(i)));
364 success &= (m->find(i)->second == Integer(genVal(i)));
367 success &= !m->count(i);
370 EXPECT_EQ(count, m->size());
371 EXPECT_TRUE(success);
374 TEST(Ahm, basicErase) {
375 size_t numEntries = 3000;
377 std::unique_ptr<AHMapT> s(new AHMapT(numEntries, config));
378 // Iterate filling up the map and deleting all keys a few times
379 // to test more than one subMap.
380 for (int iterations = 0; iterations < 4; ++iterations) {
381 // Testing insertion of keys
383 for (size_t i = 0; i < numEntries; ++i) {
384 success &= !(s->count(i));
385 auto ret = s->insert(RecordT(i, i));
386 success &= s->count(i);
387 success &= ret.second;
389 EXPECT_TRUE(success);
390 EXPECT_EQ(s->size(), numEntries);
392 // Delete every key in the map and verify that the key is gone and the the
395 for (size_t i = 0; i < numEntries; ++i) {
396 success &= s->erase(i);
397 success &= (s->size() == numEntries - 1 - i);
398 success &= !(s->count(i));
399 success &= !(s->erase(i));
401 EXPECT_TRUE(success);
403 VLOG(1) << "Final number of subMaps = " << s->numSubMaps();
408 inline KeyT randomizeKey(int key) {
409 // We deterministically randomize the key to more accurately simulate
410 // real-world usage, and to avoid pathalogical performance patterns (e.g.
411 // those related to std::hash<int64_t>()(1) == 1).
413 // Use a hash function we don't normally use for ints to avoid interactions.
414 return folly::hash::jenkins_rev_mix32(key);
417 int numOpsPerThread = 0;
419 void* insertThread(void* jj) {
420 int64_t j = (int64_t) jj;
421 for (int i = 0; i < numOpsPerThread; ++i) {
422 KeyT key = randomizeKey(i + j * numOpsPerThread);
423 globalAHM->insert(key, genVal(key));
428 void* qpInsertThread(void* jj) {
429 int64_t j = (int64_t) jj;
430 for (int i = 0; i < numOpsPerThread; ++i) {
431 KeyT key = randomizeKey(i + j * numOpsPerThread);
432 globalQPAHM->insert(key, genVal(key));
437 void* insertThreadArr(void* jj) {
438 int64_t j = (int64_t) jj;
439 for (int i = 0; i < numOpsPerThread; ++i) {
440 KeyT key = randomizeKey(i + j * numOpsPerThread);
441 globalAHA->insert(std::make_pair(key, genVal(key)));
446 std::atomic<bool> runThreadsCreatedAllThreads;
447 void runThreads(void *(*mainFunc)(void*), int numThreads, void **statuses) {
448 folly::BenchmarkSuspender susp;
449 runThreadsCreatedAllThreads.store(false);
450 vector<std::thread> threads;
451 for (int64_t j = 0; j < numThreads; j++) {
452 threads.emplace_back([statuses, mainFunc, j]() {
453 auto ret = mainFunc((void*)j);
454 if (statuses != nullptr) {
461 runThreadsCreatedAllThreads.store(true);
462 for (size_t i = 0; i < threads.size(); ++i) {
467 void runThreads(void *(*mainFunc)(void*)) {
468 runThreads(mainFunc, FLAGS_numThreads, nullptr);
473 TEST(Ahm, collision_test) {
474 const int numInserts = 1000000 / 4;
476 // Doing the same number on each thread so we collide.
477 numOpsPerThread = numInserts;
479 float sizeFactor = 0.46f;
480 int entrySize = sizeof(KeyT) + sizeof(ValueT);
481 VLOG(1) << "Testing " << numInserts << " unique " << entrySize <<
482 " Byte entries replicated in " << FLAGS_numThreads <<
483 " threads with " << FLAGS_maxLoadFactor * 100.0 << "% max load factor.";
485 globalAHM = std::make_unique<AHMapT>(int(numInserts * sizeFactor), config);
487 size_t sizeInit = globalAHM->capacity();
488 VLOG(1) << " Initial capacity: " << sizeInit;
490 double start = nowInUsec();
491 runThreads([](void*) -> void* { // collisionInsertThread
492 for (int i = 0; i < numOpsPerThread; ++i) {
493 KeyT key = randomizeKey(i);
494 globalAHM->insert(key, genVal(key));
498 double elapsed = nowInUsec() - start;
500 size_t finalCap = globalAHM->capacity();
501 size_t sizeAHM = globalAHM->size();
502 VLOG(1) << elapsed/sizeAHM << " usec per " << FLAGS_numThreads <<
503 " duplicate inserts (atomic).";
504 VLOG(1) << " Final capacity: " << finalCap << " in " <<
505 globalAHM->numSubMaps() << " sub maps (" <<
506 sizeAHM * 100 / finalCap << "% load factor, " <<
507 (finalCap - sizeInit) * 100 / sizeInit << "% growth).";
510 EXPECT_EQ(sizeAHM, numInserts);
512 for (int i = 0; i < numInserts; ++i) {
513 KeyT key = randomizeKey(i);
514 success &= (globalAHM->find(key)->second == genVal(key));
516 EXPECT_TRUE(success);
518 // check colliding finds
520 runThreads([](void*) -> void* { // collisionFindThread
522 for (int i = 0; i < numOpsPerThread; ++i) {
523 globalAHM->find(key);
528 elapsed = nowInUsec() - start;
530 VLOG(1) << elapsed/sizeAHM << " usec per " << FLAGS_numThreads <<
531 " duplicate finds (atomic).";
536 const int kInsertPerThread = 100000;
537 int raceFinalSizeEstimate;
539 void* raceIterateThread(void*) {
542 AHMapT::iterator it = globalAHM->begin();
543 AHMapT::iterator end = globalAHM->end();
544 for (; it != end; ++it) {
546 if (count > raceFinalSizeEstimate) {
547 EXPECT_FALSE("Infinite loop in iterator.");
554 void* raceInsertRandomThread(void*) {
555 for (int i = 0; i < kInsertPerThread; ++i) {
557 globalAHM->insert(key, genVal(key));
564 // Test for race conditions when inserting and iterating at the same time and
565 // creating multiple submaps.
566 TEST(Ahm, race_insert_iterate_thread_test) {
567 const int kInsertThreads = 20;
568 const int kIterateThreads = 20;
569 raceFinalSizeEstimate = kInsertThreads * kInsertPerThread;
571 VLOG(1) << "Testing iteration and insertion with " << kInsertThreads
572 << " threads inserting and " << kIterateThreads << " threads iterating.";
574 globalAHM = std::make_unique<AHMapT>(raceFinalSizeEstimate / 9, config);
576 vector<pthread_t> threadIds;
577 for (int j = 0; j < kInsertThreads + kIterateThreads; j++) {
579 void *(*thread)(void*) =
580 (j < kInsertThreads ? raceInsertRandomThread : raceIterateThread);
581 if (pthread_create(&tid, nullptr, thread, nullptr) != 0) {
582 LOG(ERROR) << "Could not start thread";
584 threadIds.push_back(tid);
587 for (size_t i = 0; i < threadIds.size(); ++i) {
588 pthread_join(threadIds[i], nullptr);
590 VLOG(1) << "Ended up with " << globalAHM->numSubMaps() << " submaps";
591 VLOG(1) << "Final size of map " << globalAHM->size();
596 const int kTestEraseInsertions = 200000;
597 std::atomic<int32_t> insertedLevel;
599 void* testEraseInsertThread(void*) {
600 for (int i = 0; i < kTestEraseInsertions; ++i) {
601 KeyT key = randomizeKey(i);
602 globalAHM->insert(key, genVal(key));
603 insertedLevel.store(i, std::memory_order_release);
605 insertedLevel.store(kTestEraseInsertions, std::memory_order_release);
609 void* testEraseEraseThread(void*) {
610 for (int i = 0; i < kTestEraseInsertions; ++i) {
612 * Make sure that we don't get ahead of the insert thread, because
613 * part of the condition for this unit test succeeding is that the
616 * Note, there is a subtle case here when a new submap is
617 * allocated: the erasing thread might get 0 from count(key)
618 * because it hasn't seen numSubMaps_ update yet. To avoid this
619 * race causing problems for the test (it's ok for real usage), we
620 * lag behind the inserter by more than just element.
625 currentLevel = insertedLevel.load(std::memory_order_acquire);
626 if (currentLevel == kTestEraseInsertions) {
627 currentLevel += lag + 1;
629 } while (currentLevel - lag < i);
631 KeyT key = randomizeKey(i);
632 while (globalAHM->count(key)) {
633 if (globalAHM->erase(key)) {
643 // Here we have a single thread inserting some values, and several threads
644 // racing to delete the values in the order they were inserted.
645 TEST(Ahm, thread_erase_insert_race) {
646 const int kInsertThreads = 1;
647 const int kEraseThreads = 10;
649 VLOG(1) << "Testing insertion and erase with " << kInsertThreads
650 << " thread inserting and " << kEraseThreads << " threads erasing.";
652 globalAHM = std::make_unique<AHMapT>(kTestEraseInsertions / 4, config);
654 vector<pthread_t> threadIds;
655 for (int64_t j = 0; j < kInsertThreads + kEraseThreads; j++) {
657 void *(*thread)(void*) =
658 (j < kInsertThreads ? testEraseInsertThread : testEraseEraseThread);
659 if (pthread_create(&tid, nullptr, thread, (void*) j) != 0) {
660 LOG(ERROR) << "Could not start thread";
662 threadIds.push_back(tid);
665 for (size_t i = 0; i < threadIds.size(); i++) {
666 pthread_join(threadIds[i], nullptr);
669 EXPECT_TRUE(globalAHM->empty());
670 EXPECT_EQ(globalAHM->size(), 0);
672 VLOG(1) << "Ended up with " << globalAHM->numSubMaps() << " submaps";
675 // Repro for T#483734: Duplicate AHM inserts due to incorrect AHA return value.
676 typedef AtomicHashArray<int32_t, int32_t> AHA;
677 AHA::Config configRace;
678 auto atomicHashArrayInsertRaceArray = AHA::create(2, configRace);
679 void* atomicHashArrayInsertRaceThread(void* /* j */) {
680 AHA* arr = atomicHashArrayInsertRaceArray.get();
681 uintptr_t numInserted = 0;
682 while (!runThreadsCreatedAllThreads.load()) {
685 for (int i = 0; i < 2; i++) {
686 if (arr->insert(RecordT(randomizeKey(i), 0)).first != arr->end()) {
690 return (void*)numInserted;
692 TEST(Ahm, atomic_hash_array_insert_race) {
693 AHA* arr = atomicHashArrayInsertRaceArray.get();
694 int numIterations = 5000;
695 constexpr int numThreads = 4;
696 void* statuses[numThreads];
697 for (int i = 0; i < numIterations; i++) {
699 runThreads(atomicHashArrayInsertRaceThread, numThreads, statuses);
700 EXPECT_GE(arr->size(), 1);
701 for (int j = 0; j < numThreads; j++) {
702 EXPECT_EQ(arr->size(), uintptr_t(statuses[j]));
707 // Repro for T#5841499. Race between erase() and find() on the same key.
708 TEST(Ahm, erase_find_race) {
709 const uint64_t limit = 10000;
710 AtomicHashMap<uint64_t, uint64_t> map(limit + 10);
711 std::atomic<uint64_t> key {1};
713 // Invariant: all values are equal to their keys.
714 // At any moment there is one or two consecutive keys in the map.
716 std::thread write_thread([&]() {
722 map.insert(k + 1, k + 1);
727 std::thread read_thread([&]() {
729 uint64_t k = key.load();
734 auto it = map.find(k);
735 if (it != map.end()) {
736 ASSERT_EQ(k, it->second);
745 // Erase right after insert race bug repro (t9130653)
746 TEST(Ahm, erase_after_insert_race) {
747 const uint64_t limit = 10000;
748 const size_t num_threads = 100;
749 const size_t num_iters = 500;
750 AtomicHashMap<uint64_t, uint64_t> map(limit + 10);
752 std::atomic<bool> go{false};
753 std::vector<std::thread> ts;
754 for (size_t i = 0; i < num_threads; ++i) {
755 ts.emplace_back([&]() {
759 for (size_t n = 0; n < num_iters; ++n) {
773 // Repro for a bug when iterator didn't skip empty submaps.
774 TEST(Ahm, iterator_skips_empty_submaps) {
775 AtomicHashMap<uint64_t, uint64_t>::Config config;
776 config.growthFactor = 1;
778 AtomicHashMap<uint64_t, uint64_t> map(1, config);
786 auto it = map.find(1);
788 ASSERT_NE(map.end(), it);
789 ASSERT_EQ(1, it->first);
790 ASSERT_EQ(1, it->second);
794 ASSERT_NE(map.end(), it);
795 ASSERT_EQ(3, it->first);
796 ASSERT_EQ(3, it->second);
799 ASSERT_EQ(map.end(), it);
804 void loadGlobalAha() {
805 std::cout << "loading global AHA with " << FLAGS_numThreads
807 uint64_t start = nowInUsec();
808 globalAHA = AHArrayT::create(maxBMElements, config);
809 numOpsPerThread = FLAGS_numBMElements / FLAGS_numThreads;
810 CHECK_EQ(0, FLAGS_numBMElements % FLAGS_numThreads) <<
811 "kNumThreads must evenly divide kNumInserts.";
812 runThreads(insertThreadArr);
813 uint64_t elapsed = nowInUsec() - start;
814 std::cout << " took " << elapsed / 1000 << " ms (" <<
815 (elapsed * 1000 / FLAGS_numBMElements) << " ns/insert).\n";
816 EXPECT_EQ(globalAHA->size(), FLAGS_numBMElements);
819 void loadGlobalAhm() {
820 std::cout << "loading global AHM with " << FLAGS_numThreads
822 uint64_t start = nowInUsec();
823 globalAHM = std::make_unique<AHMapT>(maxBMElements, config);
824 numOpsPerThread = FLAGS_numBMElements / FLAGS_numThreads;
825 runThreads(insertThread);
826 uint64_t elapsed = nowInUsec() - start;
827 std::cout << " took " << elapsed / 1000 << " ms (" <<
828 (elapsed * 1000 / FLAGS_numBMElements) << " ns/insert).\n";
829 EXPECT_EQ(globalAHM->size(), FLAGS_numBMElements);
832 void loadGlobalQPAhm() {
833 std::cout << "loading global QPAHM with " << FLAGS_numThreads
835 uint64_t start = nowInUsec();
836 globalQPAHM = std::make_unique<QPAHMapT>(maxBMElements, qpConfig);
837 numOpsPerThread = FLAGS_numBMElements / FLAGS_numThreads;
838 runThreads(qpInsertThread);
839 uint64_t elapsed = nowInUsec() - start;
840 std::cout << " took " << elapsed / 1000 << " ms (" <<
841 (elapsed * 1000 / FLAGS_numBMElements) << " ns/insert).\n";
842 EXPECT_EQ(globalQPAHM->size(), FLAGS_numBMElements);
847 BENCHMARK(st_aha_find, iters) {
848 CHECK_LE(iters, FLAGS_numBMElements);
849 for (size_t i = 0; i < iters; i++) {
850 KeyT key = randomizeKey(i);
851 folly::doNotOptimizeAway(globalAHA->find(key)->second);
855 BENCHMARK(st_ahm_find, iters) {
856 CHECK_LE(iters, FLAGS_numBMElements);
857 for (size_t i = 0; i < iters; i++) {
858 KeyT key = randomizeKey(i);
859 folly::doNotOptimizeAway(globalAHM->find(key)->second);
863 BENCHMARK(st_qpahm_find, iters) {
864 CHECK_LE(iters, FLAGS_numBMElements);
865 for (size_t i = 0; i < iters; i++) {
866 KeyT key = randomizeKey(i);
867 folly::doNotOptimizeAway(globalQPAHM->find(key)->second);
871 BENCHMARK_DRAW_LINE()
873 BENCHMARK(mt_ahm_miss, iters) {
874 CHECK_LE(iters, FLAGS_numBMElements);
875 numOpsPerThread = iters / FLAGS_numThreads;
876 runThreads([](void* jj) -> void* {
877 int64_t j = (int64_t) jj;
878 while (!runThreadsCreatedAllThreads.load()) {
881 for (int i = 0; i < numOpsPerThread; ++i) {
882 KeyT key = i + j * numOpsPerThread * 100;
883 folly::doNotOptimizeAway(globalAHM->find(key) == globalAHM->end());
889 BENCHMARK(mt_qpahm_miss, iters) {
890 CHECK_LE(iters, FLAGS_numBMElements);
891 numOpsPerThread = iters / FLAGS_numThreads;
892 runThreads([](void* jj) -> void* {
893 int64_t j = (int64_t) jj;
894 while (!runThreadsCreatedAllThreads.load()) {
897 for (int i = 0; i < numOpsPerThread; ++i) {
898 KeyT key = i + j * numOpsPerThread * 100;
899 folly::doNotOptimizeAway(globalQPAHM->find(key) == globalQPAHM->end());
905 BENCHMARK(st_ahm_miss, iters) {
906 CHECK_LE(iters, FLAGS_numBMElements);
907 for (size_t i = 0; i < iters; i++) {
908 KeyT key = randomizeKey(i + iters * 100);
909 folly::doNotOptimizeAway(globalAHM->find(key) == globalAHM->end());
913 BENCHMARK(st_qpahm_miss, iters) {
914 CHECK_LE(iters, FLAGS_numBMElements);
915 for (size_t i = 0; i < iters; i++) {
916 KeyT key = randomizeKey(i + iters * 100);
917 folly::doNotOptimizeAway(globalQPAHM->find(key) == globalQPAHM->end());
921 BENCHMARK(mt_ahm_find_insert_mix, iters) {
922 CHECK_LE(iters, FLAGS_numBMElements);
923 numOpsPerThread = iters / FLAGS_numThreads;
924 runThreads([](void* jj) -> void* {
925 int64_t j = (int64_t) jj;
926 while (!runThreadsCreatedAllThreads.load()) {
929 for (int i = 0; i < numOpsPerThread; ++i) {
930 if (i % 128) { // ~1% insert mix
931 KeyT key = randomizeKey(i + j * numOpsPerThread);
932 folly::doNotOptimizeAway(globalAHM->find(key)->second);
934 KeyT key = randomizeKey(i + j * numOpsPerThread * 100);
935 globalAHM->insert(key, genVal(key));
943 BENCHMARK(mt_qpahm_find_insert_mix, iters) {
944 CHECK_LE(iters, FLAGS_numBMElements);
945 numOpsPerThread = iters / FLAGS_numThreads;
946 runThreads([](void* jj) -> void* {
947 int64_t j = (int64_t) jj;
948 while (!runThreadsCreatedAllThreads.load()) {
951 for (int i = 0; i < numOpsPerThread; ++i) {
952 if (i % 128) { // ~1% insert mix
953 KeyT key = randomizeKey(i + j * numOpsPerThread);
954 folly::doNotOptimizeAway(globalQPAHM->find(key)->second);
956 KeyT key = randomizeKey(i + j * numOpsPerThread * 100);
957 globalQPAHM->insert(key, genVal(key));
964 BENCHMARK(mt_aha_find, iters) {
965 CHECK_LE(iters, FLAGS_numBMElements);
966 numOpsPerThread = iters / FLAGS_numThreads;
967 runThreads([](void* jj) -> void* {
968 int64_t j = (int64_t) jj;
969 while (!runThreadsCreatedAllThreads.load()) {
972 for (int i = 0; i < numOpsPerThread; ++i) {
973 KeyT key = randomizeKey(i + j * numOpsPerThread);
974 folly::doNotOptimizeAway(globalAHA->find(key)->second);
980 BENCHMARK(mt_ahm_find, iters) {
981 CHECK_LE(iters, FLAGS_numBMElements);
982 numOpsPerThread = iters / FLAGS_numThreads;
983 runThreads([](void* jj) -> void* {
984 int64_t j = (int64_t) jj;
985 while (!runThreadsCreatedAllThreads.load()) {
988 for (int i = 0; i < numOpsPerThread; ++i) {
989 KeyT key = randomizeKey(i + j * numOpsPerThread);
990 folly::doNotOptimizeAway(globalAHM->find(key)->second);
996 BENCHMARK(mt_qpahm_find, iters) {
997 CHECK_LE(iters, FLAGS_numBMElements);
998 numOpsPerThread = iters / FLAGS_numThreads;
999 runThreads([](void* jj) -> void* {
1000 int64_t j = (int64_t) jj;
1001 while (!runThreadsCreatedAllThreads.load()) {
1004 for (int i = 0; i < numOpsPerThread; ++i) {
1005 KeyT key = randomizeKey(i + j * numOpsPerThread);
1006 folly::doNotOptimizeAway(globalQPAHM->find(key)->second);
1013 BENCHMARK(st_baseline_modulus_and_random, iters) {
1014 for (size_t i = 0; i < iters; ++i) {
1015 k = randomizeKey(i) % iters;
1019 // insertions go last because they reset the map
1021 BENCHMARK(mt_ahm_insert, iters) {
1023 globalAHM = std::make_unique<AHMapT>(int(iters * LF), config);
1024 numOpsPerThread = iters / FLAGS_numThreads;
1026 runThreads(insertThread);
1029 BENCHMARK(mt_qpahm_insert, iters) {
1031 globalQPAHM = std::make_unique<QPAHMapT>(int(iters * LF), qpConfig);
1032 numOpsPerThread = iters / FLAGS_numThreads;
1034 runThreads(qpInsertThread);
1037 BENCHMARK(st_ahm_insert, iters) {
1038 folly::BenchmarkSuspender susp;
1039 std::unique_ptr<AHMapT> ahm(new AHMapT(int(iters * LF), config));
1042 for (size_t i = 0; i < iters; i++) {
1043 KeyT key = randomizeKey(i);
1044 ahm->insert(key, genVal(key));
1048 BENCHMARK(st_qpahm_insert, iters) {
1049 folly::BenchmarkSuspender susp;
1050 std::unique_ptr<QPAHMapT> ahm(new QPAHMapT(int(iters * LF), qpConfig));
1053 for (size_t i = 0; i < iters; i++) {
1054 KeyT key = randomizeKey(i);
1055 ahm->insert(key, genVal(key));
1059 void benchmarkSetup() {
1060 config.maxLoadFactor = FLAGS_maxLoadFactor;
1061 qpConfig.maxLoadFactor = FLAGS_maxLoadFactor;
1062 configRace.maxLoadFactor = 0.5;
1063 int numCores = sysconf(_SC_NPROCESSORS_ONLN);
1067 string numIters = folly::to<string>(
1068 std::min(1000000, int(FLAGS_numBMElements)));
1070 gflags::SetCommandLineOptionWithMode(
1071 "bm_max_iters", numIters.c_str(), gflags::SET_FLAG_IF_DEFAULT
1073 gflags::SetCommandLineOptionWithMode(
1074 "bm_min_iters", numIters.c_str(), gflags::SET_FLAG_IF_DEFAULT
1076 string numCoresStr = folly::to<string>(numCores);
1077 gflags::SetCommandLineOptionWithMode(
1078 "numThreads", numCoresStr.c_str(), gflags::SET_FLAG_IF_DEFAULT
1081 std::cout << "\nRunning AHM benchmarks on machine with " << numCores
1082 << " logical cores.\n"
1083 " num elements per map: " << FLAGS_numBMElements << "\n"
1084 << " num threads for mt tests: " << FLAGS_numThreads << "\n"
1085 << " AHM load factor: " << FLAGS_targetLoadFactor << "\n\n";
1088 int main(int argc, char** argv) {
1089 testing::InitGoogleTest(&argc, argv);
1090 gflags::ParseCommandLineFlags(&argc, &argv, true);
1091 auto ret = RUN_ALL_TESTS();
1092 if (!ret && FLAGS_benchmark) {
1094 folly::runBenchmarks();
1100 loading global AHA with 8 threads...
1101 took 487 ms (40 ns/insert).
1102 loading global AHM with 8 threads...
1103 took 478 ms (39 ns/insert).
1104 loading global QPAHM with 8 threads...
1105 took 478 ms (39 ns/insert).
1107 Running AHM benchmarks on machine with 24 logical cores.
1108 num elements per map: 12000000
1109 num threads for mt tests: 24
1110 AHM load factor: 0.75
1112 ============================================================================
1113 folly/test/AtomicHashMapTest.cpp relative time/iter iters/s
1114 ============================================================================
1115 st_aha_find 92.63ns 10.80M
1116 st_ahm_find 107.78ns 9.28M
1117 st_qpahm_find 90.69ns 11.03M
1118 ----------------------------------------------------------------------------
1119 mt_ahm_miss 2.09ns 477.36M
1120 mt_qpahm_miss 1.37ns 728.82M
1121 st_ahm_miss 241.07ns 4.15M
1122 st_qpahm_miss 223.17ns 4.48M
1123 mt_ahm_find_insert_mix 8.05ns 124.24M
1124 mt_qpahm_find_insert_mix 9.10ns 109.85M
1125 mt_aha_find 6.82ns 146.68M
1126 mt_ahm_find 7.95ns 125.77M
1127 mt_qpahm_find 6.81ns 146.83M
1128 st_baseline_modulus_and_random 6.02ns 166.03M
1129 mt_ahm_insert 14.29ns 69.97M
1130 mt_qpahm_insert 11.68ns 85.61M
1131 st_ahm_insert 125.39ns 7.98M
1132 st_qpahm_insert 128.76ns 7.77M
1133 ============================================================================