folly/test/AtomicHashMapTest.cpp

   1 /*
   2  * Copyright 2014 Facebook, Inc.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *   http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include <folly/AtomicHashMap.h>
  18
  19 #include <glog/logging.h>
  20 #include <gtest/gtest.h>
  21 #include <sys/time.h>
  22 #include <thread>
  23 #include <atomic>
  24 #include <memory>
  25 #include <folly/Benchmark.h>
  26 #include <folly/Conv.h>
  27
  28 using std::vector;
  29 using std::string;
  30 using folly::AtomicHashMap;
  31 using folly::AtomicHashArray;
  32
  33 // Tunables:
  34 DEFINE_double(targetLoadFactor, 0.75, "Target memory utilization fraction.");
  35 DEFINE_double(maxLoadFactor, 0.80, "Max before growth.");
  36 DEFINE_int32(numThreads, 8, "Threads to use for concurrency tests.");
  37 DEFINE_int64(numBMElements, 12 * 1000 * 1000, "Size of maps for benchmarks.");
  38
  39 const double LF = FLAGS_maxLoadFactor / FLAGS_targetLoadFactor;
  40 const int maxBMElements = int(FLAGS_numBMElements * LF); // hit our target LF.
  41
  42 static int64_t nowInUsec() {
  43   timeval tv;
  44   gettimeofday(&tv, 0);
  45   return int64_t(tv.tv_sec) * 1000 * 1000 + tv.tv_usec;
  46 }
  47
  48 TEST(Ahm, BasicStrings) {
  49   typedef AtomicHashMap<int64_t,string> AHM;
  50   AHM myMap(1024);
  51   EXPECT_TRUE(myMap.begin() == myMap.end());
  52
  53   for (int i = 0; i < 100; ++i) {
  54     myMap.insert(make_pair(i, folly::to<string>(i)));
  55   }
  56   for (int i = 0; i < 100; ++i) {
  57     EXPECT_EQ(myMap.find(i)->second, folly::to<string>(i));
  58   }
  59
  60   myMap.insert(std::make_pair(999, "A"));
  61   myMap.insert(std::make_pair(999, "B"));
  62   EXPECT_EQ(myMap.find(999)->second, "A"); // shouldn't have overwritten
  63   myMap.find(999)->second = "B";
  64   myMap.find(999)->second = "C";
  65   EXPECT_EQ(myMap.find(999)->second, "C");
  66   EXPECT_EQ(myMap.find(999)->first, 999);
  67 }
  68
  69
  70 TEST(Ahm, BasicNoncopyable) {
  71   typedef AtomicHashMap<int64_t,std::unique_ptr<int>> AHM;
  72   AHM myMap(1024);
  73   EXPECT_TRUE(myMap.begin() == myMap.end());
  74
  75   for (int i = 0; i < 50; ++i) {
  76     myMap.insert(make_pair(i, std::unique_ptr<int>(new int(i))));
  77   }
  78   for (int i = 50; i < 100; ++i) {
  79     myMap.insert(i, std::unique_ptr<int>(new int (i)));
  80   }
  81   for (int i = 0; i < 100; ++i) {
  82     EXPECT_EQ(*(myMap.find(i)->second), i);
  83   }
  84   for (int i = 0; i < 100; i+=4) {
  85     myMap.erase(i);
  86   }
  87   for (int i = 0; i < 100; i+=4) {
  88     EXPECT_EQ(myMap.find(i), myMap.end());
  89   }
  90 }
  91
  92 typedef int32_t     KeyT;
  93 typedef int64_t     KeyTBig;
  94 typedef int32_t     ValueT;
  95
  96 typedef AtomicHashMap<KeyT,ValueT> AHMapT;
  97 typedef AHMapT::value_type RecordT;
  98 typedef AtomicHashArray<KeyT,ValueT> AHArrayT;
  99
 100 AHArrayT::Config config;
 101 static AHArrayT::SmartPtr globalAHA(nullptr);
 102 static std::unique_ptr<AHMapT> globalAHM;
 103
 104 // Generate a deterministic value based on an input key
 105 static int genVal(int key) {
 106   return key / 3;
 107 }
 108
 109 TEST(Ahm, grow) {
 110   VLOG(1) << "Overhead: " << sizeof(AHArrayT) << " (array) " <<
 111     sizeof(AHMapT) + sizeof(AHArrayT) << " (map/set) Bytes.";
 112   int numEntries = 10000;
 113   float sizeFactor = 0.46;
 114
 115   std::unique_ptr<AHMapT> m(new AHMapT(int(numEntries * sizeFactor), config));
 116
 117   // load map - make sure we succeed and the index is accurate
 118   bool success = true;
 119   for (uint64_t i = 0; i < numEntries; i++) {
 120     auto ret = m->insert(RecordT(i, genVal(i)));
 121     success &= ret.second;
 122     success &= (m->findAt(ret.first.getIndex())->second == genVal(i));
 123   }
 124   // Overwrite vals to make sure there are no dups
 125   // Every insert should fail because the keys are already in the map.
 126   success = true;
 127   for (uint64_t i = 0; i < numEntries; i++) {
 128     auto ret = m->insert(RecordT(i, genVal(i * 2)));
 129     success &= (ret.second == false);  // fail on collision
 130     success &= (ret.first->second == genVal(i)); // return the previous value
 131     success &= (m->findAt(ret.first.getIndex())->second == genVal(i));
 132   }
 133   EXPECT_TRUE(success);
 134
 135   // check correctness
 136   size_t cap = m->capacity();
 137   ValueT val;
 138   EXPECT_GT(m->numSubMaps(), 1);  // make sure we grew
 139   success = true;
 140   EXPECT_EQ(m->size(), numEntries);
 141   for (int i = 0; i < numEntries; i++) {
 142     success &= (m->find(i)->second == genVal(i));
 143   }
 144   EXPECT_TRUE(success);
 145
 146   // Check findAt
 147   success = true;
 148   KeyT key(0);
 149   AHMapT::const_iterator retIt;
 150   for (uint64_t i = 0; i < numEntries; i++) {
 151     retIt = m->find(i);
 152     retIt = m->findAt(retIt.getIndex());
 153     success &= (retIt->second == genVal(i));
 154     success &= (retIt->first == i);
 155   }
 156   EXPECT_TRUE(success);
 157
 158   // Try modifying value
 159   m->find(8)->second = 5309;
 160   EXPECT_EQ(m->find(8)->second, 5309);
 161
 162   // check clear()
 163   m->clear();
 164   success = true;
 165   for (uint64_t i = 0; i < numEntries / 2; i++) {
 166     success &= m->insert(RecordT(i, genVal(i))).second;
 167   }
 168   EXPECT_TRUE(success);
 169   EXPECT_EQ(m->size(), numEntries / 2);
 170 }
 171
 172 TEST(Ahm, iterator) {
 173   int numEntries = 10000;
 174   float sizeFactor = .46;
 175   std::unique_ptr<AHMapT> m(new AHMapT(int(numEntries * sizeFactor), config));
 176
 177   // load map - make sure we succeed and the index is accurate
 178   for (uint64_t i = 0; i < numEntries; i++) {
 179     m->insert(RecordT(i, genVal(i)));
 180   }
 181
 182   bool success = true;
 183   int count = 0;
 184   FOR_EACH(it, *m) {
 185     success &= (it->second == genVal(it->first));
 186     ++count;
 187   }
 188   EXPECT_TRUE(success);
 189   EXPECT_EQ(count, numEntries);
 190 }
 191
 192 class Counters {
 193 private:
 194   // Note: Unfortunately can't currently put a std::atomic<int64_t> in
 195   // the value in ahm since it doesn't support types that are both non-copy
 196   // and non-move constructible yet.
 197   AtomicHashMap<int64_t,int64_t> ahm;
 198
 199 public:
 200   explicit Counters(size_t numCounters) : ahm(numCounters) {}
 201
 202   void increment(int64_t obj_id) {
 203     auto ret = ahm.insert(std::make_pair(obj_id, 1));
 204     if (!ret.second) {
 205       // obj_id already exists, increment count
 206       __sync_fetch_and_add(&ret.first->second, 1);
 207     }
 208   }
 209
 210   int64_t getValue(int64_t obj_id) {
 211     auto ret = ahm.find(obj_id);
 212     return ret != ahm.end() ? ret->second : 0;
 213   }
 214
 215   // export the counters without blocking increments
 216   string toString() {
 217     string ret = "{\n";
 218     ret.reserve(ahm.size() * 32);
 219     for (const auto& e : ahm) {
 220       ret += folly::to<string>(
 221         "  [", e.first, ":", e.second, "]\n");
 222     }
 223     ret += "}\n";
 224     return ret;
 225   }
 226 };
 227
 228 // If you get an error "terminate called without an active exception", there
 229 // might be too many threads getting created - decrease numKeys and/or mult.
 230 TEST(Ahm, counter) {
 231   const int numKeys = 10;
 232   const int mult = 10;
 233   Counters c(numKeys);
 234   vector<int64_t> keys;
 235   FOR_EACH_RANGE(i, 1, numKeys) {
 236     keys.push_back(i);
 237   }
 238   vector<std::thread> threads;
 239   for (auto key : keys) {
 240     FOR_EACH_RANGE(i, 0, key * mult) {
 241       threads.push_back(std::thread([&, key] { c.increment(key); }));
 242     }
 243   }
 244   for (auto& t : threads) {
 245     t.join();
 246   }
 247   string str = c.toString();
 248   for (auto key : keys) {
 249     int val = key * mult;
 250     EXPECT_EQ(val, c.getValue(key));
 251     EXPECT_NE(string::npos, str.find(folly::to<string>("[",key,":",val,"]")));
 252   }
 253 }
 254
 255 class Integer {
 256
 257  public:
 258   explicit Integer(KeyT v = 0) : v_(v) {}
 259
 260   Integer& operator=(const Integer& a) {
 261     static bool throwException_ = false;
 262     throwException_ = !throwException_;
 263     if (throwException_) {
 264       throw 1;
 265     }
 266     v_ = a.v_;
 267     return *this;
 268   }
 269
 270   bool operator==(const Integer& a) const { return v_ == a.v_; }
 271
 272  private:
 273   KeyT v_;
 274 };
 275
 276 TEST(Ahm, map_exception_safety) {
 277   typedef AtomicHashMap<KeyT,Integer> MyMapT;
 278
 279   int numEntries = 10000;
 280   float sizeFactor = 0.46;
 281   std::unique_ptr<MyMapT> m(new MyMapT(int(numEntries * sizeFactor)));
 282
 283   bool success = true;
 284   int count = 0;
 285   for (int i = 0; i < numEntries; i++) {
 286     try {
 287       m->insert(i, Integer(genVal(i)));
 288       success &= (m->find(i)->second == Integer(genVal(i)));
 289       ++count;
 290     } catch (...) {
 291       success &= !m->count(i);
 292     }
 293   }
 294   EXPECT_EQ(count, m->size());
 295   EXPECT_TRUE(success);
 296 }
 297
 298 TEST(Ahm, basicErase) {
 299   int numEntries = 3000;
 300
 301   std::unique_ptr<AHMapT> s(new AHMapT(numEntries, config));
 302   // Iterate filling up the map and deleting all keys a few times
 303   // to test more than one subMap.
 304   for (int iterations = 0; iterations < 4; ++iterations) {
 305     // Testing insertion of keys
 306     bool success = true;
 307     for (uint64_t i = 0; i < numEntries; ++i) {
 308       success &= !(s->count(i));
 309       auto ret = s->insert(RecordT(i, i));
 310       success &= s->count(i);
 311       success &= ret.second;
 312     }
 313     EXPECT_TRUE(success);
 314     EXPECT_EQ(s->size(), numEntries);
 315
 316     // Delete every key in the map and verify that the key is gone and the the
 317     // size is correct.
 318     success = true;
 319     for (uint64_t i = 0; i < numEntries; ++i) {
 320       success &= s->erase(i);
 321       success &= (s->size() == numEntries - 1 - i);
 322       success &= !(s->count(i));
 323       success &= !(s->erase(i));
 324     }
 325     EXPECT_TRUE(success);
 326   }
 327   VLOG(1) << "Final number of subMaps = " << s->numSubMaps();
 328 }
 329
 330 namespace {
 331
 332 inline KeyT randomizeKey(int key) {
 333   // We deterministically randomize the key to more accurately simulate
 334   // real-world usage, and to avoid pathalogical performance patterns (e.g.
 335   // those related to __gnu_cxx::hash<int64_t>()(1) == 1).
 336   //
 337   // Use a hash function we don't normally use for ints to avoid interactions.
 338   return folly::hash::jenkins_rev_mix32(key);
 339 }
 340
 341 int numOpsPerThread = 0;
 342
 343 void* insertThread(void* jj) {
 344   int64_t j = (int64_t) jj;
 345   for (int i = 0; i < numOpsPerThread; ++i) {
 346     KeyT key = randomizeKey(i + j * numOpsPerThread);
 347     globalAHM->insert(key, genVal(key));
 348   }
 349   return nullptr;
 350 }
 351
 352 void* insertThreadArr(void* jj) {
 353   int64_t j = (int64_t) jj;
 354   for (int i = 0; i < numOpsPerThread; ++i) {
 355     KeyT key = randomizeKey(i + j * numOpsPerThread);
 356     globalAHA->insert(std::make_pair(key, genVal(key)));
 357   }
 358   return nullptr;
 359 }
 360
 361 std::atomic<bool> runThreadsCreatedAllThreads;
 362 void runThreads(void *(*thread)(void*), int numThreads, void **statuses) {
 363   folly::BenchmarkSuspender susp;
 364   runThreadsCreatedAllThreads.store(false);
 365   vector<pthread_t> threadIds;
 366   for (int64_t j = 0; j < numThreads; j++) {
 367     pthread_t tid;
 368     if (pthread_create(&tid, nullptr, thread, (void*) j) != 0) {
 369        LOG(ERROR) << "Could not start thread";
 370     } else {
 371       threadIds.push_back(tid);
 372     }
 373   }
 374   susp.dismiss();
 375
 376   runThreadsCreatedAllThreads.store(true);
 377   for (int i = 0; i < threadIds.size(); ++i) {
 378     pthread_join(threadIds[i], statuses == nullptr ? nullptr : &statuses[i]);
 379   }
 380 }
 381
 382 void runThreads(void *(*thread)(void*)) {
 383   runThreads(thread, FLAGS_numThreads, nullptr);
 384 }
 385
 386 }
 387
 388 TEST(Ahm, collision_test) {
 389   const int numInserts = 1000000 / 4;
 390
 391   // Doing the same number on each thread so we collide.
 392   numOpsPerThread = numInserts;
 393
 394   float sizeFactor = 0.46;
 395   int entrySize = sizeof(KeyT) + sizeof(ValueT);
 396   VLOG(1) << "Testing " << numInserts << " unique " << entrySize <<
 397     " Byte entries replicated in " << FLAGS_numThreads <<
 398     " threads with " << FLAGS_maxLoadFactor * 100.0 << "% max load factor.";
 399
 400   globalAHM.reset(new AHMapT(int(numInserts * sizeFactor), config));
 401
 402   size_t sizeInit = globalAHM->capacity();
 403   VLOG(1) << "  Initial capacity: " << sizeInit;
 404
 405   double start = nowInUsec();
 406   runThreads([](void*) -> void* { // collisionInsertThread
 407     for (int i = 0; i < numOpsPerThread; ++i) {
 408       KeyT key = randomizeKey(i);
 409       globalAHM->insert(key, genVal(key));
 410     }
 411     return nullptr;
 412   });
 413   double elapsed = nowInUsec() - start;
 414
 415   size_t finalCap = globalAHM->capacity();
 416   size_t sizeAHM = globalAHM->size();
 417   VLOG(1) << elapsed/sizeAHM << " usec per " << FLAGS_numThreads <<
 418     " duplicate inserts (atomic).";
 419   VLOG(1) << "  Final capacity: " << finalCap << " in " <<
 420     globalAHM->numSubMaps() << " sub maps (" <<
 421     sizeAHM * 100 / finalCap << "% load factor, " <<
 422     (finalCap - sizeInit) * 100 / sizeInit << "% growth).";
 423
 424   // check correctness
 425   EXPECT_EQ(sizeAHM, numInserts);
 426   bool success = true;
 427   ValueT val;
 428   for (int i = 0; i < numInserts; ++i) {
 429     KeyT key = randomizeKey(i);
 430     success &= (globalAHM->find(key)->second == genVal(key));
 431   }
 432   EXPECT_TRUE(success);
 433
 434   // check colliding finds
 435   start = nowInUsec();
 436   runThreads([](void*) -> void* { // collisionFindThread
 437     KeyT key(0);
 438     for (int i = 0; i < numOpsPerThread; ++i) {
 439       globalAHM->find(key);
 440     }
 441     return nullptr;
 442   });
 443
 444   elapsed = nowInUsec() - start;
 445
 446   VLOG(1) << elapsed/sizeAHM << " usec per " << FLAGS_numThreads <<
 447     " duplicate finds (atomic).";
 448 }
 449
 450 namespace {
 451
 452 const int kInsertPerThread = 100000;
 453 int raceFinalSizeEstimate;
 454
 455 void* raceIterateThread(void* jj) {
 456   int64_t j = (int64_t) jj;
 457   int count = 0;
 458
 459   AHMapT::iterator it = globalAHM->begin();
 460   AHMapT::iterator end = globalAHM->end();
 461   for (; it != end; ++it) {
 462     ++count;
 463     if (count > raceFinalSizeEstimate) {
 464       EXPECT_FALSE("Infinite loop in iterator.");
 465       return nullptr;
 466     }
 467   }
 468   return nullptr;
 469 }
 470
 471 void* raceInsertRandomThread(void* jj) {
 472   int64_t j = (int64_t) jj;
 473   for (int i = 0; i < kInsertPerThread; ++i) {
 474     KeyT key = rand();
 475     globalAHM->insert(key, genVal(key));
 476   }
 477   return nullptr;
 478 }
 479
 480 }
 481
 482 // Test for race conditions when inserting and iterating at the same time and
 483 // creating multiple submaps.
 484 TEST(Ahm, race_insert_iterate_thread_test) {
 485   const int kInsertThreads = 20;
 486   const int kIterateThreads = 20;
 487   raceFinalSizeEstimate = kInsertThreads * kInsertPerThread;
 488
 489   VLOG(1) << "Testing iteration and insertion with " << kInsertThreads
 490     << " threads inserting and " << kIterateThreads << " threads iterating.";
 491
 492   globalAHM.reset(new AHMapT(raceFinalSizeEstimate / 9, config));
 493
 494   vector<pthread_t> threadIds;
 495   for (int64_t j = 0; j < kInsertThreads + kIterateThreads; j++) {
 496     pthread_t tid;
 497     void *(*thread)(void*) =
 498       (j < kInsertThreads ? raceInsertRandomThread : raceIterateThread);
 499     if (pthread_create(&tid, nullptr, thread, (void*) j) != 0) {
 500       LOG(ERROR) << "Could not start thread";
 501     } else {
 502       threadIds.push_back(tid);
 503     }
 504   }
 505   for (int i = 0; i < threadIds.size(); ++i) {
 506     pthread_join(threadIds[i], nullptr);
 507   }
 508   VLOG(1) << "Ended up with " << globalAHM->numSubMaps() << " submaps";
 509   VLOG(1) << "Final size of map " << globalAHM->size();
 510 }
 511
 512 namespace {
 513
 514 const int kTestEraseInsertions = 200000;
 515 std::atomic<int32_t> insertedLevel;
 516
 517 void* testEraseInsertThread(void*) {
 518   for (int i = 0; i < kTestEraseInsertions; ++i) {
 519     KeyT key = randomizeKey(i);
 520     globalAHM->insert(key, genVal(key));
 521     insertedLevel.store(i, std::memory_order_release);
 522   }
 523   insertedLevel.store(kTestEraseInsertions, std::memory_order_release);
 524   return nullptr;
 525 }
 526
 527 void* testEraseEraseThread(void*) {
 528   for (int i = 0; i < kTestEraseInsertions; ++i) {
 529     /*
 530      * Make sure that we don't get ahead of the insert thread, because
 531      * part of the condition for this unit test succeeding is that the
 532      * map ends up empty.
 533      *
 534      * Note, there is a subtle case here when a new submap is
 535      * allocated: the erasing thread might get 0 from count(key)
 536      * because it hasn't seen numSubMaps_ update yet.  To avoid this
 537      * race causing problems for the test (it's ok for real usage), we
 538      * lag behind the inserter by more than just element.
 539      */
 540     const int lag = 10;
 541     int currentLevel;
 542     do {
 543       currentLevel = insertedLevel.load(std::memory_order_acquire);
 544       if (currentLevel == kTestEraseInsertions) currentLevel += lag + 1;
 545     } while (currentLevel - lag < i);
 546
 547     KeyT key = randomizeKey(i);
 548     while (globalAHM->count(key)) {
 549       if (globalAHM->erase(key)) {
 550         break;
 551       }
 552     }
 553   }
 554   return nullptr;
 555 }
 556
 557 }
 558
 559 // Here we have a single thread inserting some values, and several threads
 560 // racing to delete the values in the order they were inserted.
 561 TEST(Ahm, thread_erase_insert_race) {
 562   const int kInsertThreads = 1;
 563   const int kEraseThreads = 10;
 564
 565   VLOG(1) << "Testing insertion and erase with " << kInsertThreads
 566     << " thread inserting and " << kEraseThreads << " threads erasing.";
 567
 568   globalAHM.reset(new AHMapT(kTestEraseInsertions / 4, config));
 569
 570   vector<pthread_t> threadIds;
 571   for (int64_t j = 0; j < kInsertThreads + kEraseThreads; j++) {
 572     pthread_t tid;
 573     void *(*thread)(void*) =
 574       (j < kInsertThreads ? testEraseInsertThread : testEraseEraseThread);
 575     if (pthread_create(&tid, nullptr, thread, (void*) j) != 0) {
 576       LOG(ERROR) << "Could not start thread";
 577     } else {
 578       threadIds.push_back(tid);
 579     }
 580   }
 581   for (int i = 0; i < threadIds.size(); i++) {
 582     pthread_join(threadIds[i], nullptr);
 583   }
 584
 585   EXPECT_TRUE(globalAHM->empty());
 586   EXPECT_EQ(globalAHM->size(), 0);
 587
 588   VLOG(1) << "Ended up with " << globalAHM->numSubMaps() << " submaps";
 589 }
 590
 591 // Repro for T#483734: Duplicate AHM inserts due to incorrect AHA return value.
 592 typedef AtomicHashArray<int32_t, int32_t> AHA;
 593 AHA::Config configRace;
 594 auto atomicHashArrayInsertRaceArray = AHA::create(2, configRace);
 595 void* atomicHashArrayInsertRaceThread(void* j) {
 596   AHA* arr = atomicHashArrayInsertRaceArray.get();
 597   uintptr_t numInserted = 0;
 598   while (!runThreadsCreatedAllThreads.load());
 599   for (int i = 0; i < 2; i++) {
 600     if (arr->insert(RecordT(randomizeKey(i), 0)).first != arr->end()) {
 601       numInserted++;
 602     }
 603   }
 604   pthread_exit((void *) numInserted);
 605 }
 606 TEST(Ahm, atomic_hash_array_insert_race) {
 607   AHA* arr = atomicHashArrayInsertRaceArray.get();
 608   int numIterations = 50000, FLAGS_numThreads = 4;
 609   void* statuses[FLAGS_numThreads];
 610   for (int i = 0; i < numIterations; i++) {
 611     arr->clear();
 612     runThreads(atomicHashArrayInsertRaceThread, FLAGS_numThreads, statuses);
 613     EXPECT_GE(arr->size(), 1);
 614     for (int j = 0; j < FLAGS_numThreads; j++) {
 615       EXPECT_EQ(arr->size(), uintptr_t(statuses[j]));
 616     }
 617   }
 618 }
 619
 620 namespace {
 621
 622 void loadGlobalAha() {
 623   std::cout << "loading global AHA with " << FLAGS_numThreads
 624             << " threads...\n";
 625   uint64_t start = nowInUsec();
 626   globalAHA = AHArrayT::create(maxBMElements, config);
 627   numOpsPerThread = FLAGS_numBMElements / FLAGS_numThreads;
 628   CHECK_EQ(0, FLAGS_numBMElements % FLAGS_numThreads) <<
 629     "kNumThreads must evenly divide kNumInserts.";
 630   runThreads(insertThreadArr);
 631   uint64_t elapsed = nowInUsec() - start;
 632   std::cout << "  took " << elapsed / 1000 << " ms (" <<
 633     (elapsed * 1000 / FLAGS_numBMElements) << " ns/insert).\n";
 634   EXPECT_EQ(globalAHA->size(), FLAGS_numBMElements);
 635 }
 636
 637 void loadGlobalAhm() {
 638   std::cout << "loading global AHM with " << FLAGS_numThreads
 639             << " threads...\n";
 640   uint64_t start = nowInUsec();
 641   globalAHM.reset(new AHMapT(maxBMElements, config));
 642   numOpsPerThread = FLAGS_numBMElements / FLAGS_numThreads;
 643   runThreads(insertThread);
 644   uint64_t elapsed = nowInUsec() - start;
 645   std::cout << "  took " << elapsed / 1000 << " ms (" <<
 646     (elapsed * 1000 / FLAGS_numBMElements) << " ns/insert).\n";
 647   EXPECT_EQ(globalAHM->size(), FLAGS_numBMElements);
 648 }
 649
 650 }
 651
 652 BENCHMARK(st_aha_find, iters) {
 653   CHECK_LE(iters, FLAGS_numBMElements);
 654   for (int i = 0; i < iters; i++) {
 655     KeyT key = randomizeKey(i);
 656     folly::doNotOptimizeAway(globalAHA->find(key)->second);
 657   }
 658 }
 659
 660 BENCHMARK(st_ahm_find, iters) {
 661   CHECK_LE(iters, FLAGS_numBMElements);
 662   for (int i = 0; i < iters; i++) {
 663     KeyT key = randomizeKey(i);
 664     folly::doNotOptimizeAway(globalAHM->find(key)->second);
 665   }
 666 }
 667
 668 BENCHMARK_DRAW_LINE()
 669
 670 BENCHMARK(mt_ahm_miss, iters) {
 671   CHECK_LE(iters, FLAGS_numBMElements);
 672   numOpsPerThread = iters / FLAGS_numThreads;
 673   runThreads([](void* jj) -> void* {
 674     int64_t j = (int64_t) jj;
 675     while (!runThreadsCreatedAllThreads.load());
 676     for (int i = 0; i < numOpsPerThread; ++i) {
 677       KeyT key = i + j * numOpsPerThread * 100;
 678       folly::doNotOptimizeAway(globalAHM->find(key) == globalAHM->end());
 679     }
 680     return nullptr;
 681   });
 682 }
 683
 684 BENCHMARK(st_ahm_miss, iters) {
 685   CHECK_LE(iters, FLAGS_numBMElements);
 686   for (int i = 0; i < iters; i++) {
 687     KeyT key = randomizeKey(i + iters * 100);
 688     folly::doNotOptimizeAway(globalAHM->find(key) == globalAHM->end());
 689   }
 690 }
 691
 692 BENCHMARK(mt_ahm_find_insert_mix, iters) {
 693   CHECK_LE(iters, FLAGS_numBMElements);
 694   numOpsPerThread = iters / FLAGS_numThreads;
 695   runThreads([](void* jj) -> void* {
 696     int64_t j = (int64_t) jj;
 697     while (!runThreadsCreatedAllThreads.load());
 698     for (int i = 0; i < numOpsPerThread; ++i) {
 699       if (i % 128) {  // ~1% insert mix
 700         KeyT key = randomizeKey(i + j * numOpsPerThread);
 701         folly::doNotOptimizeAway(globalAHM->find(key)->second);
 702       } else {
 703         KeyT key = randomizeKey(i + j * numOpsPerThread * 100);
 704         globalAHM->insert(key, genVal(key));
 705       }
 706     }
 707     return nullptr;
 708   });
 709 }
 710
 711 BENCHMARK(mt_aha_find, iters) {
 712   CHECK_LE(iters, FLAGS_numBMElements);
 713   numOpsPerThread = iters / FLAGS_numThreads;
 714   runThreads([](void* jj) -> void* {
 715       int64_t j = (int64_t) jj;
 716       while (!runThreadsCreatedAllThreads.load());
 717       for (int i = 0; i < numOpsPerThread; ++i) {
 718         KeyT key = randomizeKey(i + j * numOpsPerThread);
 719         folly::doNotOptimizeAway(globalAHA->find(key)->second);
 720       }
 721       return nullptr;
 722     });
 723 }
 724
 725 BENCHMARK(mt_ahm_find, iters) {
 726   CHECK_LE(iters, FLAGS_numBMElements);
 727   numOpsPerThread = iters / FLAGS_numThreads;
 728   runThreads([](void* jj) -> void* {
 729     int64_t j = (int64_t) jj;
 730     while (!runThreadsCreatedAllThreads.load());
 731     for (int i = 0; i < numOpsPerThread; ++i) {
 732       KeyT key = randomizeKey(i + j * numOpsPerThread);
 733       folly::doNotOptimizeAway(globalAHM->find(key)->second);
 734     }
 735     return nullptr;
 736   });
 737 }
 738
 739 KeyT k;
 740 BENCHMARK(st_baseline_modulus_and_random, iters) {
 741   for (int i = 0; i < iters; ++i) {
 742     k = randomizeKey(i) % iters;
 743   }
 744 }
 745
 746 // insertions go last because they reset the map
 747
 748 BENCHMARK(mt_ahm_insert, iters) {
 749   BENCHMARK_SUSPEND {
 750     globalAHM.reset(new AHMapT(int(iters * LF), config));
 751     numOpsPerThread = iters / FLAGS_numThreads;
 752   }
 753   runThreads(insertThread);
 754 }
 755
 756 BENCHMARK(st_ahm_insert, iters) {
 757   folly::BenchmarkSuspender susp;
 758   std::unique_ptr<AHMapT> ahm(new AHMapT(int(iters * LF), config));
 759   susp.dismiss();
 760
 761   for (int i = 0; i < iters; i++) {
 762     KeyT key = randomizeKey(i);
 763     ahm->insert(key, genVal(key));
 764   }
 765 }
 766
 767 void benchmarkSetup() {
 768   config.maxLoadFactor = FLAGS_maxLoadFactor;
 769   configRace.maxLoadFactor = 0.5;
 770   int numCores = sysconf(_SC_NPROCESSORS_ONLN);
 771   loadGlobalAha();
 772   loadGlobalAhm();
 773   string numIters = folly::to<string>(
 774     std::min(1000000, int(FLAGS_numBMElements)));
 775
 776   gflags::SetCommandLineOptionWithMode(
 777     "bm_max_iters", numIters.c_str(), gflags::SET_FLAG_IF_DEFAULT
 778   );
 779   gflags::SetCommandLineOptionWithMode(
 780     "bm_min_iters", numIters.c_str(), gflags::SET_FLAG_IF_DEFAULT
 781   );
 782   string numCoresStr = folly::to<string>(numCores);
 783   gflags::SetCommandLineOptionWithMode(
 784     "numThreads", numCoresStr.c_str(), gflags::SET_FLAG_IF_DEFAULT
 785   );
 786
 787   std::cout << "\nRunning AHM benchmarks on machine with " << numCores
 788     << " logical cores.\n"
 789        "  num elements per map: " << FLAGS_numBMElements << "\n"
 790     << "  num threads for mt tests: " << FLAGS_numThreads << "\n"
 791     << "  AHM load factor: " << FLAGS_targetLoadFactor << "\n\n";
 792 }
 793
 794 int main(int argc, char** argv) {
 795   testing::InitGoogleTest(&argc, argv);
 796   gflags::ParseCommandLineFlags(&argc, &argv, true);
 797   auto ret = RUN_ALL_TESTS();
 798   if (!ret && FLAGS_benchmark) {
 799     benchmarkSetup();
 800     folly::runBenchmarks();
 801   }
 802   return ret;
 803 }
 804
 805 /*
 806 Benchmarks run on dual Xeon X5650's @ 2.67GHz w/hyperthreading enabled
 807   (12 physical cores, 12 MB cache, 72 GB RAM)
 808
 809 Running AHM benchmarks on machine with 24 logical cores.
 810   num elements per map: 12000000
 811   num threads for mt tests: 24
 812   AHM load factor: 0.75
 813
 814 Benchmark                               Iters   Total t    t/iter iter/sec
 815 ------------------------------------------------------------------------------
 816 Comparing benchmarks: BM_mt_aha_find,BM_mt_ahm_find
 817 *       BM_mt_aha_find                1000000  7.767 ms  7.767 ns  122.8 M
 818  +0.81% BM_mt_ahm_find                1000000   7.83 ms   7.83 ns  121.8 M
 819 ------------------------------------------------------------------------------
 820 Comparing benchmarks: BM_st_aha_find,BM_st_ahm_find
 821 *       BM_st_aha_find                1000000  57.83 ms  57.83 ns  16.49 M
 822  +77.9% BM_st_ahm_find                1000000  102.9 ms  102.9 ns   9.27 M
 823 ------------------------------------------------------------------------------
 824 BM_mt_ahm_miss                        1000000  2.937 ms  2.937 ns  324.7 M
 825 BM_st_ahm_miss                        1000000  164.2 ms  164.2 ns  5.807 M
 826 BM_mt_ahm_find_insert_mix             1000000  8.797 ms  8.797 ns  108.4 M
 827 BM_mt_ahm_insert                      1000000  17.39 ms  17.39 ns  54.83 M
 828 BM_st_ahm_insert                      1000000  106.8 ms  106.8 ns   8.93 M
 829 BM_st_baseline_modulus_and_rando      1000000  6.223 ms  6.223 ns  153.2 M
 830 */