2 * Copyright 2016 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include <folly/AtomicHashMap.h>
19 #include <glog/logging.h>
20 #include <gtest/gtest.h>
25 #include <folly/Benchmark.h>
26 #include <folly/Conv.h>
30 using folly::AtomicHashMap;
31 using folly::AtomicHashArray;
32 using folly::StringPiece;
35 DEFINE_double(targetLoadFactor, 0.75, "Target memory utilization fraction.");
36 DEFINE_double(maxLoadFactor, 0.80, "Max before growth.");
37 DEFINE_int32(numThreads, 8, "Threads to use for concurrency tests.");
38 DEFINE_int64(numBMElements, 12 * 1000 * 1000, "Size of maps for benchmarks.");
40 const double LF = FLAGS_maxLoadFactor / FLAGS_targetLoadFactor;
41 const int maxBMElements = int(FLAGS_numBMElements * LF); // hit our target LF.
43 static int64_t nowInUsec() {
46 return int64_t(tv.tv_sec) * 1000 * 1000 + tv.tv_usec;
49 TEST(Ahm, BasicStrings) {
50 typedef AtomicHashMap<int64_t,string> AHM;
52 EXPECT_TRUE(myMap.begin() == myMap.end());
54 for (int i = 0; i < 100; ++i) {
55 myMap.insert(make_pair(i, folly::to<string>(i)));
57 for (int i = 0; i < 100; ++i) {
58 EXPECT_EQ(myMap.find(i)->second, folly::to<string>(i));
61 myMap.insert(std::make_pair(999, "A"));
62 myMap.insert(std::make_pair(999, "B"));
63 EXPECT_EQ(myMap.find(999)->second, "A"); // shouldn't have overwritten
64 myMap.find(999)->second = "B";
65 myMap.find(999)->second = "C";
66 EXPECT_EQ(myMap.find(999)->second, "C");
67 EXPECT_EQ(myMap.find(999)->first, 999);
71 TEST(Ahm, BasicNoncopyable) {
72 typedef AtomicHashMap<int64_t,std::unique_ptr<int>> AHM;
74 EXPECT_TRUE(myMap.begin() == myMap.end());
76 for (int i = 0; i < 50; ++i) {
77 myMap.insert(make_pair(i, std::unique_ptr<int>(new int(i))));
79 for (int i = 50; i < 100; ++i) {
80 myMap.insert(i, std::unique_ptr<int>(new int (i)));
82 for (int i = 100; i < 150; ++i) {
83 myMap.emplace(i, new int (i));
85 for (int i = 150; i < 200; ++i) {
86 myMap.emplace(i, new int (i), std::default_delete<int>());
88 for (int i = 0; i < 200; ++i) {
89 EXPECT_EQ(*(myMap.find(i)->second), i);
91 for (int i = 0; i < 200; i+=4) {
94 for (int i = 0; i < 200; i+=4) {
95 EXPECT_EQ(myMap.find(i), myMap.end());
100 typedef int32_t ValueT;
102 typedef AtomicHashMap<KeyT,ValueT> AHMapT;
103 typedef AHMapT::value_type RecordT;
104 typedef AtomicHashArray<KeyT,ValueT> AHArrayT;
105 AHArrayT::Config config;
106 typedef folly::QuadraticProbingAtomicHashMap<KeyT,ValueT> QPAHMapT;
107 QPAHMapT::Config qpConfig;
108 static AHArrayT::SmartPtr globalAHA(nullptr);
109 static std::unique_ptr<AHMapT> globalAHM;
110 static std::unique_ptr<QPAHMapT> globalQPAHM;
112 // Generate a deterministic value based on an input key
113 static int genVal(int key) {
117 static bool legalKey(const char* a);
120 bool operator()(const char* a, const char* b) {
121 return legalKey(a) && (strcmp(a, b) == 0);
123 bool operator()(const char* a, const char& b) {
124 return legalKey(a) && (a[0] != '\0') && (a[0] == b);
126 bool operator()(const char* a, const StringPiece b) {
127 return legalKey(a) &&
128 (strlen(a) == b.size()) && (strcmp(a, b.begin()) == 0);
133 size_t operator()(const char* a) {
135 while (a[0] != 0) result += static_cast<size_t>(*(a++));
138 size_t operator()(const char& a) {
139 return static_cast<size_t>(a);
141 size_t operator()(const StringPiece a) {
143 for (const auto& ch : a) result += static_cast<size_t>(ch);
148 typedef AtomicHashMap<const char*, int64_t, HashTraits, EqTraits> AHMCstrInt;
149 AHMCstrInt::Config cstrIntCfg;
151 static bool legalKey(const char* a) {
152 return a != cstrIntCfg.emptyKey &&
153 a != cstrIntCfg.lockedKey &&
154 a != cstrIntCfg.erasedKey;
157 TEST(Ahm, BasicLookup) {
158 AHMCstrInt myMap(1024, cstrIntCfg);
159 EXPECT_TRUE(myMap.begin() == myMap.end());
160 myMap.insert(std::make_pair("f", 42));
161 EXPECT_EQ(42, myMap.find("f")->second);
163 // Look up a single char, successfully.
164 auto it = myMap.find<char>('f');
165 EXPECT_EQ(42, it->second);
168 // Look up a single char, unsuccessfully.
169 auto it = myMap.find<char>('g');
170 EXPECT_TRUE(it == myMap.end());
173 // Look up a string piece, successfully.
174 const StringPiece piece("f");
175 auto it = myMap.find(piece);
176 EXPECT_EQ(42, it->second);
181 VLOG(1) << "Overhead: " << sizeof(AHArrayT) << " (array) " <<
182 sizeof(AHMapT) + sizeof(AHArrayT) << " (map/set) Bytes.";
183 uint64_t numEntries = 10000;
184 float sizeFactor = 0.46;
186 std::unique_ptr<AHMapT> m(new AHMapT(int(numEntries * sizeFactor), config));
188 // load map - make sure we succeed and the index is accurate
190 for (uint64_t i = 0; i < numEntries; i++) {
191 auto ret = m->insert(RecordT(i, genVal(i)));
192 success &= ret.second;
193 success &= (m->findAt(ret.first.getIndex())->second == genVal(i));
195 // Overwrite vals to make sure there are no dups
196 // Every insert should fail because the keys are already in the map.
198 for (uint64_t i = 0; i < numEntries; i++) {
199 auto ret = m->insert(RecordT(i, genVal(i * 2)));
200 success &= (ret.second == false); // fail on collision
201 success &= (ret.first->second == genVal(i)); // return the previous value
202 success &= (m->findAt(ret.first.getIndex())->second == genVal(i));
204 EXPECT_TRUE(success);
207 EXPECT_GT(m->numSubMaps(), 1); // make sure we grew
209 EXPECT_EQ(m->size(), numEntries);
210 for (size_t i = 0; i < numEntries; i++) {
211 success &= (m->find(i)->second == genVal(i));
213 EXPECT_TRUE(success);
217 AHMapT::const_iterator retIt;
218 for (int32_t i = 0; i < int32_t(numEntries); i++) {
220 retIt = m->findAt(retIt.getIndex());
221 success &= (retIt->second == genVal(i));
222 // We use a uint32_t index so that this comparison is between two
223 // variables of the same type.
224 success &= (retIt->first == i);
226 EXPECT_TRUE(success);
228 // Try modifying value
229 m->find(8)->second = 5309;
230 EXPECT_EQ(m->find(8)->second, 5309);
235 for (uint64_t i = 0; i < numEntries / 2; i++) {
236 success &= m->insert(RecordT(i, genVal(i))).second;
238 EXPECT_TRUE(success);
239 EXPECT_EQ(m->size(), numEntries / 2);
242 TEST(Ahm, iterator) {
243 int numEntries = 10000;
244 float sizeFactor = .46;
245 std::unique_ptr<AHMapT> m(new AHMapT(int(numEntries * sizeFactor), config));
247 // load map - make sure we succeed and the index is accurate
248 for (int i = 0; i < numEntries; i++) {
249 m->insert(RecordT(i, genVal(i)));
255 success &= (it->second == genVal(it->first));
258 EXPECT_TRUE(success);
259 EXPECT_EQ(count, numEntries);
264 // Note: Unfortunately can't currently put a std::atomic<int64_t> in
265 // the value in ahm since it doesn't support types that are both non-copy
266 // and non-move constructible yet.
267 AtomicHashMap<int64_t,int64_t> ahm;
270 explicit Counters(size_t numCounters) : ahm(numCounters) {}
272 void increment(int64_t obj_id) {
273 auto ret = ahm.insert(std::make_pair(obj_id, 1));
275 // obj_id already exists, increment count
276 __sync_fetch_and_add(&ret.first->second, 1);
280 int64_t getValue(int64_t obj_id) {
281 auto ret = ahm.find(obj_id);
282 return ret != ahm.end() ? ret->second : 0;
285 // export the counters without blocking increments
288 ret.reserve(ahm.size() * 32);
289 for (const auto& e : ahm) {
290 ret += folly::to<string>(
291 " [", e.first, ":", e.second, "]\n");
298 // If you get an error "terminate called without an active exception", there
299 // might be too many threads getting created - decrease numKeys and/or mult.
301 const int numKeys = 10;
304 vector<int64_t> keys;
305 FOR_EACH_RANGE(i, 1, numKeys) {
308 vector<std::thread> threads;
309 for (auto key : keys) {
310 FOR_EACH_RANGE(i, 0, key * mult) {
311 threads.push_back(std::thread([&, key] { c.increment(key); }));
314 for (auto& t : threads) {
317 string str = c.toString();
318 for (auto key : keys) {
319 int val = key * mult;
320 EXPECT_EQ(val, c.getValue(key));
321 EXPECT_NE(string::npos, str.find(folly::to<string>("[",key,":",val,"]")));
328 explicit Integer(KeyT v = 0) : v_(v) {}
330 Integer& operator=(const Integer& a) {
331 static bool throwException_ = false;
332 throwException_ = !throwException_;
333 if (throwException_) {
340 bool operator==(const Integer& a) const { return v_ == a.v_; }
346 TEST(Ahm, map_exception_safety) {
347 typedef AtomicHashMap<KeyT,Integer> MyMapT;
349 int numEntries = 10000;
350 float sizeFactor = 0.46;
351 std::unique_ptr<MyMapT> m(new MyMapT(int(numEntries * sizeFactor)));
355 for (int i = 0; i < numEntries; i++) {
357 m->insert(i, Integer(genVal(i)));
358 success &= (m->find(i)->second == Integer(genVal(i)));
361 success &= !m->count(i);
364 EXPECT_EQ(count, m->size());
365 EXPECT_TRUE(success);
368 TEST(Ahm, basicErase) {
369 size_t numEntries = 3000;
371 std::unique_ptr<AHMapT> s(new AHMapT(numEntries, config));
372 // Iterate filling up the map and deleting all keys a few times
373 // to test more than one subMap.
374 for (int iterations = 0; iterations < 4; ++iterations) {
375 // Testing insertion of keys
377 for (size_t i = 0; i < numEntries; ++i) {
378 success &= !(s->count(i));
379 auto ret = s->insert(RecordT(i, i));
380 success &= s->count(i);
381 success &= ret.second;
383 EXPECT_TRUE(success);
384 EXPECT_EQ(s->size(), numEntries);
386 // Delete every key in the map and verify that the key is gone and the the
389 for (size_t i = 0; i < numEntries; ++i) {
390 success &= s->erase(i);
391 success &= (s->size() == numEntries - 1 - i);
392 success &= !(s->count(i));
393 success &= !(s->erase(i));
395 EXPECT_TRUE(success);
397 VLOG(1) << "Final number of subMaps = " << s->numSubMaps();
402 inline KeyT randomizeKey(int key) {
403 // We deterministically randomize the key to more accurately simulate
404 // real-world usage, and to avoid pathalogical performance patterns (e.g.
405 // those related to __gnu_cxx::hash<int64_t>()(1) == 1).
407 // Use a hash function we don't normally use for ints to avoid interactions.
408 return folly::hash::jenkins_rev_mix32(key);
411 int numOpsPerThread = 0;
413 void* insertThread(void* jj) {
414 int64_t j = (int64_t) jj;
415 for (int i = 0; i < numOpsPerThread; ++i) {
416 KeyT key = randomizeKey(i + j * numOpsPerThread);
417 globalAHM->insert(key, genVal(key));
422 void* qpInsertThread(void* jj) {
423 int64_t j = (int64_t) jj;
424 for (int i = 0; i < numOpsPerThread; ++i) {
425 KeyT key = randomizeKey(i + j * numOpsPerThread);
426 globalQPAHM->insert(key, genVal(key));
431 void* insertThreadArr(void* jj) {
432 int64_t j = (int64_t) jj;
433 for (int i = 0; i < numOpsPerThread; ++i) {
434 KeyT key = randomizeKey(i + j * numOpsPerThread);
435 globalAHA->insert(std::make_pair(key, genVal(key)));
440 std::atomic<bool> runThreadsCreatedAllThreads;
441 void runThreads(void *(*thread)(void*), int numThreads, void **statuses) {
442 folly::BenchmarkSuspender susp;
443 runThreadsCreatedAllThreads.store(false);
444 vector<pthread_t> threadIds;
445 for (int64_t j = 0; j < numThreads; j++) {
447 if (pthread_create(&tid, nullptr, thread, (void*) j) != 0) {
448 LOG(ERROR) << "Could not start thread";
450 threadIds.push_back(tid);
455 runThreadsCreatedAllThreads.store(true);
456 for (size_t i = 0; i < threadIds.size(); ++i) {
457 pthread_join(threadIds[i], statuses == nullptr ? nullptr : &statuses[i]);
461 void runThreads(void *(*thread)(void*)) {
462 runThreads(thread, FLAGS_numThreads, nullptr);
467 TEST(Ahm, collision_test) {
468 const int numInserts = 1000000 / 4;
470 // Doing the same number on each thread so we collide.
471 numOpsPerThread = numInserts;
473 float sizeFactor = 0.46;
474 int entrySize = sizeof(KeyT) + sizeof(ValueT);
475 VLOG(1) << "Testing " << numInserts << " unique " << entrySize <<
476 " Byte entries replicated in " << FLAGS_numThreads <<
477 " threads with " << FLAGS_maxLoadFactor * 100.0 << "% max load factor.";
479 globalAHM.reset(new AHMapT(int(numInserts * sizeFactor), config));
481 size_t sizeInit = globalAHM->capacity();
482 VLOG(1) << " Initial capacity: " << sizeInit;
484 double start = nowInUsec();
485 runThreads([](void*) -> void* { // collisionInsertThread
486 for (int i = 0; i < numOpsPerThread; ++i) {
487 KeyT key = randomizeKey(i);
488 globalAHM->insert(key, genVal(key));
492 double elapsed = nowInUsec() - start;
494 size_t finalCap = globalAHM->capacity();
495 size_t sizeAHM = globalAHM->size();
496 VLOG(1) << elapsed/sizeAHM << " usec per " << FLAGS_numThreads <<
497 " duplicate inserts (atomic).";
498 VLOG(1) << " Final capacity: " << finalCap << " in " <<
499 globalAHM->numSubMaps() << " sub maps (" <<
500 sizeAHM * 100 / finalCap << "% load factor, " <<
501 (finalCap - sizeInit) * 100 / sizeInit << "% growth).";
504 EXPECT_EQ(sizeAHM, numInserts);
506 for (int i = 0; i < numInserts; ++i) {
507 KeyT key = randomizeKey(i);
508 success &= (globalAHM->find(key)->second == genVal(key));
510 EXPECT_TRUE(success);
512 // check colliding finds
514 runThreads([](void*) -> void* { // collisionFindThread
516 for (int i = 0; i < numOpsPerThread; ++i) {
517 globalAHM->find(key);
522 elapsed = nowInUsec() - start;
524 VLOG(1) << elapsed/sizeAHM << " usec per " << FLAGS_numThreads <<
525 " duplicate finds (atomic).";
530 const int kInsertPerThread = 100000;
531 int raceFinalSizeEstimate;
533 void* raceIterateThread(void*) {
536 AHMapT::iterator it = globalAHM->begin();
537 AHMapT::iterator end = globalAHM->end();
538 for (; it != end; ++it) {
540 if (count > raceFinalSizeEstimate) {
541 EXPECT_FALSE("Infinite loop in iterator.");
548 void* raceInsertRandomThread(void*) {
549 for (int i = 0; i < kInsertPerThread; ++i) {
551 globalAHM->insert(key, genVal(key));
558 // Test for race conditions when inserting and iterating at the same time and
559 // creating multiple submaps.
560 TEST(Ahm, race_insert_iterate_thread_test) {
561 const int kInsertThreads = 20;
562 const int kIterateThreads = 20;
563 raceFinalSizeEstimate = kInsertThreads * kInsertPerThread;
565 VLOG(1) << "Testing iteration and insertion with " << kInsertThreads
566 << " threads inserting and " << kIterateThreads << " threads iterating.";
568 globalAHM.reset(new AHMapT(raceFinalSizeEstimate / 9, config));
570 vector<pthread_t> threadIds;
571 for (int j = 0; j < kInsertThreads + kIterateThreads; j++) {
573 void *(*thread)(void*) =
574 (j < kInsertThreads ? raceInsertRandomThread : raceIterateThread);
575 if (pthread_create(&tid, nullptr, thread, nullptr) != 0) {
576 LOG(ERROR) << "Could not start thread";
578 threadIds.push_back(tid);
581 for (size_t i = 0; i < threadIds.size(); ++i) {
582 pthread_join(threadIds[i], nullptr);
584 VLOG(1) << "Ended up with " << globalAHM->numSubMaps() << " submaps";
585 VLOG(1) << "Final size of map " << globalAHM->size();
590 const int kTestEraseInsertions = 200000;
591 std::atomic<int32_t> insertedLevel;
593 void* testEraseInsertThread(void*) {
594 for (int i = 0; i < kTestEraseInsertions; ++i) {
595 KeyT key = randomizeKey(i);
596 globalAHM->insert(key, genVal(key));
597 insertedLevel.store(i, std::memory_order_release);
599 insertedLevel.store(kTestEraseInsertions, std::memory_order_release);
603 void* testEraseEraseThread(void*) {
604 for (int i = 0; i < kTestEraseInsertions; ++i) {
606 * Make sure that we don't get ahead of the insert thread, because
607 * part of the condition for this unit test succeeding is that the
610 * Note, there is a subtle case here when a new submap is
611 * allocated: the erasing thread might get 0 from count(key)
612 * because it hasn't seen numSubMaps_ update yet. To avoid this
613 * race causing problems for the test (it's ok for real usage), we
614 * lag behind the inserter by more than just element.
619 currentLevel = insertedLevel.load(std::memory_order_acquire);
620 if (currentLevel == kTestEraseInsertions) currentLevel += lag + 1;
621 } while (currentLevel - lag < i);
623 KeyT key = randomizeKey(i);
624 while (globalAHM->count(key)) {
625 if (globalAHM->erase(key)) {
635 // Here we have a single thread inserting some values, and several threads
636 // racing to delete the values in the order they were inserted.
637 TEST(Ahm, thread_erase_insert_race) {
638 const int kInsertThreads = 1;
639 const int kEraseThreads = 10;
641 VLOG(1) << "Testing insertion and erase with " << kInsertThreads
642 << " thread inserting and " << kEraseThreads << " threads erasing.";
644 globalAHM.reset(new AHMapT(kTestEraseInsertions / 4, config));
646 vector<pthread_t> threadIds;
647 for (int64_t j = 0; j < kInsertThreads + kEraseThreads; j++) {
649 void *(*thread)(void*) =
650 (j < kInsertThreads ? testEraseInsertThread : testEraseEraseThread);
651 if (pthread_create(&tid, nullptr, thread, (void*) j) != 0) {
652 LOG(ERROR) << "Could not start thread";
654 threadIds.push_back(tid);
657 for (size_t i = 0; i < threadIds.size(); i++) {
658 pthread_join(threadIds[i], nullptr);
661 EXPECT_TRUE(globalAHM->empty());
662 EXPECT_EQ(globalAHM->size(), 0);
664 VLOG(1) << "Ended up with " << globalAHM->numSubMaps() << " submaps";
667 // Repro for T#483734: Duplicate AHM inserts due to incorrect AHA return value.
668 typedef AtomicHashArray<int32_t, int32_t> AHA;
669 AHA::Config configRace;
670 auto atomicHashArrayInsertRaceArray = AHA::create(2, configRace);
671 void* atomicHashArrayInsertRaceThread(void* /* j */) {
672 AHA* arr = atomicHashArrayInsertRaceArray.get();
673 uintptr_t numInserted = 0;
674 while (!runThreadsCreatedAllThreads.load());
675 for (int i = 0; i < 2; i++) {
676 if (arr->insert(RecordT(randomizeKey(i), 0)).first != arr->end()) {
680 pthread_exit((void *) numInserted);
682 TEST(Ahm, atomic_hash_array_insert_race) {
683 AHA* arr = atomicHashArrayInsertRaceArray.get();
684 int numIterations = 5000, FLAGS_numThreads = 4;
685 void* statuses[FLAGS_numThreads];
686 for (int i = 0; i < numIterations; i++) {
688 runThreads(atomicHashArrayInsertRaceThread, FLAGS_numThreads, statuses);
689 EXPECT_GE(arr->size(), 1);
690 for (int j = 0; j < FLAGS_numThreads; j++) {
691 EXPECT_EQ(arr->size(), uintptr_t(statuses[j]));
696 // Repro for T#5841499. Race between erase() and find() on the same key.
697 TEST(Ahm, erase_find_race) {
698 const uint64_t limit = 10000;
699 AtomicHashMap<uint64_t, uint64_t> map(limit + 10);
700 std::atomic<uint64_t> key {1};
702 // Invariant: all values are equal to their keys.
703 // At any moment there is one or two consecutive keys in the map.
705 std::thread write_thread([&]() {
711 map.insert(k + 1, k + 1);
716 std::thread read_thread([&]() {
718 uint64_t k = key.load();
723 auto it = map.find(k);
724 if (it != map.end()) {
725 ASSERT_EQ(k, it->second);
734 // Erase right after insert race bug repro (t9130653)
735 TEST(Ahm, erase_after_insert_race) {
736 const uint64_t limit = 10000;
737 const size_t num_threads = 100;
738 const size_t num_iters = 500;
739 AtomicHashMap<uint64_t, uint64_t> map(limit + 10);
741 std::atomic<bool> go{false};
742 std::vector<std::thread> ts;
743 for (size_t i = 0; i < num_threads; ++i) {
744 ts.emplace_back([&]() {
748 for (size_t n = 0; n < num_iters; ++n) {
762 // Repro for a bug when iterator didn't skip empty submaps.
763 TEST(Ahm, iterator_skips_empty_submaps) {
764 AtomicHashMap<uint64_t, uint64_t>::Config config;
765 config.growthFactor = 1;
767 AtomicHashMap<uint64_t, uint64_t> map(1, config);
775 auto it = map.find(1);
777 ASSERT_NE(map.end(), it);
778 ASSERT_EQ(1, it->first);
779 ASSERT_EQ(1, it->second);
783 ASSERT_NE(map.end(), it);
784 ASSERT_EQ(3, it->first);
785 ASSERT_EQ(3, it->second);
788 ASSERT_EQ(map.end(), it);
793 void loadGlobalAha() {
794 std::cout << "loading global AHA with " << FLAGS_numThreads
796 uint64_t start = nowInUsec();
797 globalAHA = AHArrayT::create(maxBMElements, config);
798 numOpsPerThread = FLAGS_numBMElements / FLAGS_numThreads;
799 CHECK_EQ(0, FLAGS_numBMElements % FLAGS_numThreads) <<
800 "kNumThreads must evenly divide kNumInserts.";
801 runThreads(insertThreadArr);
802 uint64_t elapsed = nowInUsec() - start;
803 std::cout << " took " << elapsed / 1000 << " ms (" <<
804 (elapsed * 1000 / FLAGS_numBMElements) << " ns/insert).\n";
805 EXPECT_EQ(globalAHA->size(), FLAGS_numBMElements);
808 void loadGlobalAhm() {
809 std::cout << "loading global AHM with " << FLAGS_numThreads
811 uint64_t start = nowInUsec();
812 globalAHM.reset(new AHMapT(maxBMElements, config));
813 numOpsPerThread = FLAGS_numBMElements / FLAGS_numThreads;
814 runThreads(insertThread);
815 uint64_t elapsed = nowInUsec() - start;
816 std::cout << " took " << elapsed / 1000 << " ms (" <<
817 (elapsed * 1000 / FLAGS_numBMElements) << " ns/insert).\n";
818 EXPECT_EQ(globalAHM->size(), FLAGS_numBMElements);
821 void loadGlobalQPAhm() {
822 std::cout << "loading global QPAHM with " << FLAGS_numThreads
824 uint64_t start = nowInUsec();
825 globalQPAHM.reset(new QPAHMapT(maxBMElements, qpConfig));
826 numOpsPerThread = FLAGS_numBMElements / FLAGS_numThreads;
827 runThreads(qpInsertThread);
828 uint64_t elapsed = nowInUsec() - start;
829 std::cout << " took " << elapsed / 1000 << " ms (" <<
830 (elapsed * 1000 / FLAGS_numBMElements) << " ns/insert).\n";
831 EXPECT_EQ(globalQPAHM->size(), FLAGS_numBMElements);
836 BENCHMARK(st_aha_find, iters) {
837 CHECK_LE(iters, FLAGS_numBMElements);
838 for (size_t i = 0; i < iters; i++) {
839 KeyT key = randomizeKey(i);
840 folly::doNotOptimizeAway(globalAHA->find(key)->second);
844 BENCHMARK(st_ahm_find, iters) {
845 CHECK_LE(iters, FLAGS_numBMElements);
846 for (size_t i = 0; i < iters; i++) {
847 KeyT key = randomizeKey(i);
848 folly::doNotOptimizeAway(globalAHM->find(key)->second);
852 BENCHMARK(st_qpahm_find, iters) {
853 CHECK_LE(iters, FLAGS_numBMElements);
854 for (size_t i = 0; i < iters; i++) {
855 KeyT key = randomizeKey(i);
856 folly::doNotOptimizeAway(globalQPAHM->find(key)->second);
860 BENCHMARK_DRAW_LINE()
862 BENCHMARK(mt_ahm_miss, iters) {
863 CHECK_LE(iters, FLAGS_numBMElements);
864 numOpsPerThread = iters / FLAGS_numThreads;
865 runThreads([](void* jj) -> void* {
866 int64_t j = (int64_t) jj;
867 while (!runThreadsCreatedAllThreads.load());
868 for (int i = 0; i < numOpsPerThread; ++i) {
869 KeyT key = i + j * numOpsPerThread * 100;
870 folly::doNotOptimizeAway(globalAHM->find(key) == globalAHM->end());
876 BENCHMARK(mt_qpahm_miss, iters) {
877 CHECK_LE(iters, FLAGS_numBMElements);
878 numOpsPerThread = iters / FLAGS_numThreads;
879 runThreads([](void* jj) -> void* {
880 int64_t j = (int64_t) jj;
881 while (!runThreadsCreatedAllThreads.load());
882 for (int i = 0; i < numOpsPerThread; ++i) {
883 KeyT key = i + j * numOpsPerThread * 100;
884 folly::doNotOptimizeAway(globalQPAHM->find(key) == globalQPAHM->end());
890 BENCHMARK(st_ahm_miss, iters) {
891 CHECK_LE(iters, FLAGS_numBMElements);
892 for (size_t i = 0; i < iters; i++) {
893 KeyT key = randomizeKey(i + iters * 100);
894 folly::doNotOptimizeAway(globalAHM->find(key) == globalAHM->end());
898 BENCHMARK(st_qpahm_miss, iters) {
899 CHECK_LE(iters, FLAGS_numBMElements);
900 for (size_t i = 0; i < iters; i++) {
901 KeyT key = randomizeKey(i + iters * 100);
902 folly::doNotOptimizeAway(globalQPAHM->find(key) == globalQPAHM->end());
906 BENCHMARK(mt_ahm_find_insert_mix, iters) {
907 CHECK_LE(iters, FLAGS_numBMElements);
908 numOpsPerThread = iters / FLAGS_numThreads;
909 runThreads([](void* jj) -> void* {
910 int64_t j = (int64_t) jj;
911 while (!runThreadsCreatedAllThreads.load());
912 for (int i = 0; i < numOpsPerThread; ++i) {
913 if (i % 128) { // ~1% insert mix
914 KeyT key = randomizeKey(i + j * numOpsPerThread);
915 folly::doNotOptimizeAway(globalAHM->find(key)->second);
917 KeyT key = randomizeKey(i + j * numOpsPerThread * 100);
918 globalAHM->insert(key, genVal(key));
926 BENCHMARK(mt_qpahm_find_insert_mix, iters) {
927 CHECK_LE(iters, FLAGS_numBMElements);
928 numOpsPerThread = iters / FLAGS_numThreads;
929 runThreads([](void* jj) -> void* {
930 int64_t j = (int64_t) jj;
931 while (!runThreadsCreatedAllThreads.load());
932 for (int i = 0; i < numOpsPerThread; ++i) {
933 if (i % 128) { // ~1% insert mix
934 KeyT key = randomizeKey(i + j * numOpsPerThread);
935 folly::doNotOptimizeAway(globalQPAHM->find(key)->second);
937 KeyT key = randomizeKey(i + j * numOpsPerThread * 100);
938 globalQPAHM->insert(key, genVal(key));
945 BENCHMARK(mt_aha_find, iters) {
946 CHECK_LE(iters, FLAGS_numBMElements);
947 numOpsPerThread = iters / FLAGS_numThreads;
948 runThreads([](void* jj) -> void* {
949 int64_t j = (int64_t) jj;
950 while (!runThreadsCreatedAllThreads.load());
951 for (int i = 0; i < numOpsPerThread; ++i) {
952 KeyT key = randomizeKey(i + j * numOpsPerThread);
953 folly::doNotOptimizeAway(globalAHA->find(key)->second);
959 BENCHMARK(mt_ahm_find, iters) {
960 CHECK_LE(iters, FLAGS_numBMElements);
961 numOpsPerThread = iters / FLAGS_numThreads;
962 runThreads([](void* jj) -> void* {
963 int64_t j = (int64_t) jj;
964 while (!runThreadsCreatedAllThreads.load());
965 for (int i = 0; i < numOpsPerThread; ++i) {
966 KeyT key = randomizeKey(i + j * numOpsPerThread);
967 folly::doNotOptimizeAway(globalAHM->find(key)->second);
973 BENCHMARK(mt_qpahm_find, iters) {
974 CHECK_LE(iters, FLAGS_numBMElements);
975 numOpsPerThread = iters / FLAGS_numThreads;
976 runThreads([](void* jj) -> void* {
977 int64_t j = (int64_t) jj;
978 while (!runThreadsCreatedAllThreads.load());
979 for (int i = 0; i < numOpsPerThread; ++i) {
980 KeyT key = randomizeKey(i + j * numOpsPerThread);
981 folly::doNotOptimizeAway(globalQPAHM->find(key)->second);
988 BENCHMARK(st_baseline_modulus_and_random, iters) {
989 for (size_t i = 0; i < iters; ++i) {
990 k = randomizeKey(i) % iters;
994 // insertions go last because they reset the map
996 BENCHMARK(mt_ahm_insert, iters) {
998 globalAHM.reset(new AHMapT(int(iters * LF), config));
999 numOpsPerThread = iters / FLAGS_numThreads;
1001 runThreads(insertThread);
1004 BENCHMARK(mt_qpahm_insert, iters) {
1006 globalQPAHM.reset(new QPAHMapT(int(iters * LF), qpConfig));
1007 numOpsPerThread = iters / FLAGS_numThreads;
1009 runThreads(qpInsertThread);
1012 BENCHMARK(st_ahm_insert, iters) {
1013 folly::BenchmarkSuspender susp;
1014 std::unique_ptr<AHMapT> ahm(new AHMapT(int(iters * LF), config));
1017 for (size_t i = 0; i < iters; i++) {
1018 KeyT key = randomizeKey(i);
1019 ahm->insert(key, genVal(key));
1023 BENCHMARK(st_qpahm_insert, iters) {
1024 folly::BenchmarkSuspender susp;
1025 std::unique_ptr<QPAHMapT> ahm(new QPAHMapT(int(iters * LF), qpConfig));
1028 for (size_t i = 0; i < iters; i++) {
1029 KeyT key = randomizeKey(i);
1030 ahm->insert(key, genVal(key));
1034 void benchmarkSetup() {
1035 config.maxLoadFactor = FLAGS_maxLoadFactor;
1036 qpConfig.maxLoadFactor = FLAGS_maxLoadFactor;
1037 configRace.maxLoadFactor = 0.5;
1038 int numCores = sysconf(_SC_NPROCESSORS_ONLN);
1042 string numIters = folly::to<string>(
1043 std::min(1000000, int(FLAGS_numBMElements)));
1045 gflags::SetCommandLineOptionWithMode(
1046 "bm_max_iters", numIters.c_str(), gflags::SET_FLAG_IF_DEFAULT
1048 gflags::SetCommandLineOptionWithMode(
1049 "bm_min_iters", numIters.c_str(), gflags::SET_FLAG_IF_DEFAULT
1051 string numCoresStr = folly::to<string>(numCores);
1052 gflags::SetCommandLineOptionWithMode(
1053 "numThreads", numCoresStr.c_str(), gflags::SET_FLAG_IF_DEFAULT
1056 std::cout << "\nRunning AHM benchmarks on machine with " << numCores
1057 << " logical cores.\n"
1058 " num elements per map: " << FLAGS_numBMElements << "\n"
1059 << " num threads for mt tests: " << FLAGS_numThreads << "\n"
1060 << " AHM load factor: " << FLAGS_targetLoadFactor << "\n\n";
1063 int main(int argc, char** argv) {
1064 testing::InitGoogleTest(&argc, argv);
1065 gflags::ParseCommandLineFlags(&argc, &argv, true);
1066 auto ret = RUN_ALL_TESTS();
1067 if (!ret && FLAGS_benchmark) {
1069 folly::runBenchmarks();
1075 loading global AHA with 8 threads...
1076 took 487 ms (40 ns/insert).
1077 loading global AHM with 8 threads...
1078 took 478 ms (39 ns/insert).
1079 loading global QPAHM with 8 threads...
1080 took 478 ms (39 ns/insert).
1082 Running AHM benchmarks on machine with 24 logical cores.
1083 num elements per map: 12000000
1084 num threads for mt tests: 24
1085 AHM load factor: 0.75
1087 ============================================================================
1088 folly/test/AtomicHashMapTest.cpp relative time/iter iters/s
1089 ============================================================================
1090 st_aha_find 92.63ns 10.80M
1091 st_ahm_find 107.78ns 9.28M
1092 st_qpahm_find 90.69ns 11.03M
1093 ----------------------------------------------------------------------------
1094 mt_ahm_miss 2.09ns 477.36M
1095 mt_qpahm_miss 1.37ns 728.82M
1096 st_ahm_miss 241.07ns 4.15M
1097 st_qpahm_miss 223.17ns 4.48M
1098 mt_ahm_find_insert_mix 8.05ns 124.24M
1099 mt_qpahm_find_insert_mix 9.10ns 109.85M
1100 mt_aha_find 6.82ns 146.68M
1101 mt_ahm_find 7.95ns 125.77M
1102 mt_qpahm_find 6.81ns 146.83M
1103 st_baseline_modulus_and_random 6.02ns 166.03M
1104 mt_ahm_insert 14.29ns 69.97M
1105 mt_qpahm_insert 11.68ns 85.61M
1106 st_ahm_insert 125.39ns 7.98M
1107 st_qpahm_insert 128.76ns 7.77M
1108 ============================================================================