Ensure curly-braces around control-flow

[folly.git] / folly / test / AtomicHashMapTest.cpp
diff --git a/folly/test/AtomicHashMapTest.cpp b/folly/test/AtomicHashMapTest.cpp

index 84105965221e0d0d48640dbfc90b7a61d9e7369c..e2bedea47cd3a57dd7a5760a96adf958496c794e 100644 (file)
--- a/folly/test/AtomicHashMapTest.cpp
+++ b/folly/test/AtomicHashMapTest.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright 2012 Facebook, Inc.
+ * Copyright 2017 Facebook, Inc.
   *
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
@@ -14,21 +14,24 @@
   * limitations under the License.
   */
  
-#include "folly/AtomicHashMap.h"
+#include <folly/AtomicHashMap.h>
  
-#include <glog/logging.h>
-#include <gtest/gtest.h>
-#include <sys/time.h>
-#include <thread>
  #include <atomic>
  #include <memory>
-#include "folly/Benchmark.h"
-#include "folly/Conv.h"
+#include <thread>
+
+#include <glog/logging.h>
+
+#include <folly/Benchmark.h>
+#include <folly/Conv.h>
+#include <folly/portability/GTest.h>
+#include <folly/portability/SysTime.h>
  
  using std::vector;
  using std::string;
  using folly::AtomicHashMap;
  using folly::AtomicHashArray;
+using folly::StringPiece;
  
  // Tunables:
  DEFINE_double(targetLoadFactor, 0.75, "Target memory utilization fraction.");
@@ -41,7 +44,7 @@ const int maxBMElements = int(FLAGS_numBMElements * LF); // hit our target LF.
  
  static int64_t nowInUsec() {
    timeval tv;
-  gettimeofday(&tv, 0);
+  gettimeofday(&tv, nullptr);
    return int64_t(tv.tv_sec) * 1000 * 1000 + tv.tv_usec;
  }
  
@@ -78,39 +81,113 @@ TEST(Ahm, BasicNoncopyable) {
    for (int i = 50; i < 100; ++i) {
      myMap.insert(i, std::unique_ptr<int>(new int (i)));
    }
-  for (int i = 0; i < 100; ++i) {
+  for (int i = 100; i < 150; ++i) {
+    myMap.emplace(i, new int (i));
+  }
+  for (int i = 150; i < 200; ++i) {
+    myMap.emplace(i, new int (i), std::default_delete<int>());
+  }
+  for (int i = 0; i < 200; ++i) {
      EXPECT_EQ(*(myMap.find(i)->second), i);
    }
-  for (int i = 0; i < 100; i+=4) {
+  for (int i = 0; i < 200; i+=4) {
      myMap.erase(i);
    }
-  for (int i = 0; i < 100; i+=4) {
+  for (int i = 0; i < 200; i+=4) {
      EXPECT_EQ(myMap.find(i), myMap.end());
    }
  }
  
  typedef int32_t     KeyT;
-typedef int64_t     KeyTBig;
  typedef int32_t     ValueT;
  
  typedef AtomicHashMap<KeyT,ValueT> AHMapT;
  typedef AHMapT::value_type RecordT;
  typedef AtomicHashArray<KeyT,ValueT> AHArrayT;
-
  AHArrayT::Config config;
+typedef folly::QuadraticProbingAtomicHashMap<KeyT,ValueT> QPAHMapT;
+QPAHMapT::Config qpConfig;
  static AHArrayT::SmartPtr globalAHA(nullptr);
  static std::unique_ptr<AHMapT> globalAHM;
+static std::unique_ptr<QPAHMapT> globalQPAHM;
  
  // Generate a deterministic value based on an input key
  static int genVal(int key) {
    return key / 3;
  }
  
+static bool legalKey(const char* a);
+
+struct EqTraits {
+  bool operator()(const char* a, const char* b) {
+    return legalKey(a) && (strcmp(a, b) == 0);
+  }
+  bool operator()(const char* a, const char& b) {
+    return legalKey(a) && (a[0] != '\0') && (a[0] == b);
+  }
+  bool operator()(const char* a, const StringPiece b) {
+    return legalKey(a) &&
+      (strlen(a) == b.size()) && (strcmp(a, b.begin()) == 0);
+  }
+};
+
+struct HashTraits {
+  size_t operator()(const char* a) {
+    size_t result = 0;
+    while (a[0] != 0) {
+      result += static_cast<size_t>(*(a++));
+    }
+    return result;
+  }
+  size_t operator()(const char& a) {
+    return static_cast<size_t>(a);
+  }
+  size_t operator()(const StringPiece a) {
+    size_t result = 0;
+    for (const auto& ch : a) {
+      result += static_cast<size_t>(ch);
+    }
+    return result;
+  }
+};
+
+typedef AtomicHashMap<const char*, int64_t, HashTraits, EqTraits> AHMCstrInt;
+AHMCstrInt::Config cstrIntCfg;
+
+static bool legalKey(const char* a) {
+  return a != cstrIntCfg.emptyKey &&
+    a != cstrIntCfg.lockedKey &&
+    a != cstrIntCfg.erasedKey;
+}
+
+TEST(Ahm, BasicLookup) {
+  AHMCstrInt myMap(1024, cstrIntCfg);
+  EXPECT_TRUE(myMap.begin() == myMap.end());
+  myMap.insert(std::make_pair("f", 42));
+  EXPECT_EQ(42, myMap.find("f")->second);
+  {
+    // Look up a single char, successfully.
+    auto it = myMap.find<char>('f');
+    EXPECT_EQ(42, it->second);
+  }
+  {
+    // Look up a single char, unsuccessfully.
+    auto it = myMap.find<char>('g');
+    EXPECT_TRUE(it == myMap.end());
+  }
+  {
+    // Look up a string piece, successfully.
+    const StringPiece piece("f");
+    auto it = myMap.find(piece);
+    EXPECT_EQ(42, it->second);
+  }
+}
+
  TEST(Ahm, grow) {
    VLOG(1) << "Overhead: " << sizeof(AHArrayT) << " (array) " <<
      sizeof(AHMapT) + sizeof(AHArrayT) << " (map/set) Bytes.";
-  int numEntries = 10000;
-  float sizeFactor = 0.46;
+  uint64_t numEntries = 10000;
+  float sizeFactor = 0.46f;
  
    std::unique_ptr<AHMapT> m(new AHMapT(int(numEntries * sizeFactor), config));
  
@@ -133,24 +210,23 @@ TEST(Ahm, grow) {
    EXPECT_TRUE(success);
  
    // check correctness
-  size_t cap = m->capacity();
-  ValueT val;
    EXPECT_GT(m->numSubMaps(), 1);  // make sure we grew
    success = true;
    EXPECT_EQ(m->size(), numEntries);
-  for (int i = 0; i < numEntries; i++) {
+  for (size_t i = 0; i < numEntries; i++) {
      success &= (m->find(i)->second == genVal(i));
    }
    EXPECT_TRUE(success);
  
    // Check findAt
    success = true;
-  KeyT key(0);
    AHMapT::const_iterator retIt;
-  for (uint64_t i = 0; i < numEntries; i++) {
+  for (int32_t i = 0; i < int32_t(numEntries); i++) {
      retIt = m->find(i);
      retIt = m->findAt(retIt.getIndex());
      success &= (retIt->second == genVal(i));
+    // We use a uint32_t index so that this comparison is between two
+    // variables of the same type.
      success &= (retIt->first == i);
    }
    EXPECT_TRUE(success);
@@ -171,11 +247,11 @@ TEST(Ahm, grow) {
  
  TEST(Ahm, iterator) {
    int numEntries = 10000;
-  float sizeFactor = .46;
+  float sizeFactor = .46f;
    std::unique_ptr<AHMapT> m(new AHMapT(int(numEntries * sizeFactor), config));
  
    // load map - make sure we succeed and the index is accurate
-  for (uint64_t i = 0; i < numEntries; i++) {
+  for (int i = 0; i < numEntries; i++) {
      m->insert(RecordT(i, genVal(i)));
    }
  
@@ -190,13 +266,13 @@ TEST(Ahm, iterator) {
  }
  
  class Counters {
-private:
+ private:
    // Note: Unfortunately can't currently put a std::atomic<int64_t> in
    // the value in ahm since it doesn't support types that are both non-copy
    // and non-move constructible yet.
    AtomicHashMap<int64_t,int64_t> ahm;
  
-public:
+ public:
    explicit Counters(size_t numCounters) : ahm(numCounters) {}
  
    void increment(int64_t obj_id) {
@@ -277,7 +353,7 @@ TEST(Ahm, map_exception_safety) {
    typedef AtomicHashMap<KeyT,Integer> MyMapT;
  
    int numEntries = 10000;
-  float sizeFactor = 0.46;
+  float sizeFactor = 0.46f;
    std::unique_ptr<MyMapT> m(new MyMapT(int(numEntries * sizeFactor)));
  
    bool success = true;
@@ -296,7 +372,7 @@ TEST(Ahm, map_exception_safety) {
  }
  
  TEST(Ahm, basicErase) {
-  int numEntries = 3000;
+  size_t numEntries = 3000;
  
    std::unique_ptr<AHMapT> s(new AHMapT(numEntries, config));
    // Iterate filling up the map and deleting all keys a few times
@@ -304,7 +380,7 @@ TEST(Ahm, basicErase) {
    for (int iterations = 0; iterations < 4; ++iterations) {
      // Testing insertion of keys
      bool success = true;
-    for (uint64_t i = 0; i < numEntries; ++i) {
+    for (size_t i = 0; i < numEntries; ++i) {
        success &= !(s->count(i));
        auto ret = s->insert(RecordT(i, i));
        success &= s->count(i);
@@ -316,7 +392,7 @@ TEST(Ahm, basicErase) {
      // Delete every key in the map and verify that the key is gone and the the
      // size is correct.
      success = true;
-    for (uint64_t i = 0; i < numEntries; ++i) {
+    for (size_t i = 0; i < numEntries; ++i) {
        success &= s->erase(i);
        success &= (s->size() == numEntries - 1 - i);
        success &= !(s->count(i));
@@ -332,7 +408,7 @@ namespace {
  inline KeyT randomizeKey(int key) {
    // We deterministically randomize the key to more accurately simulate
    // real-world usage, and to avoid pathalogical performance patterns (e.g.
-  // those related to __gnu_cxx::hash<int64_t>()(1) == 1).
+  // those related to std::hash<int64_t>()(1) == 1).
    //
    // Use a hash function we don't normally use for ints to avoid interactions.
    return folly::hash::jenkins_rev_mix32(key);
@@ -346,7 +422,16 @@ void* insertThread(void* jj) {
      KeyT key = randomizeKey(i + j * numOpsPerThread);
      globalAHM->insert(key, genVal(key));
    }
-  return NULL;
+  return nullptr;
+}
+
+void* qpInsertThread(void* jj) {
+  int64_t j = (int64_t) jj;
+  for (int i = 0; i < numOpsPerThread; ++i) {
+    KeyT key = randomizeKey(i + j * numOpsPerThread);
+    globalQPAHM->insert(key, genVal(key));
+  }
+  return nullptr;
  }
  
  void* insertThreadArr(void* jj) {
@@ -355,32 +440,32 @@ void* insertThreadArr(void* jj) {
      KeyT key = randomizeKey(i + j * numOpsPerThread);
      globalAHA->insert(std::make_pair(key, genVal(key)));
    }
-  return NULL;
+  return nullptr;
  }
  
  std::atomic<bool> runThreadsCreatedAllThreads;
-void runThreads(void *(*thread)(void*), int numThreads, void **statuses) {
+void runThreads(void *(*mainFunc)(void*), int numThreads, void **statuses) {
    folly::BenchmarkSuspender susp;
    runThreadsCreatedAllThreads.store(false);
-  vector<pthread_t> threadIds;
+  vector<std::thread> threads;
    for (int64_t j = 0; j < numThreads; j++) {
-    pthread_t tid;
-    if (pthread_create(&tid, NULL, thread, (void*) j) != 0) {
-       LOG(ERROR) << "Could not start thread";
-    } else {
-      threadIds.push_back(tid);
-    }
+    threads.emplace_back([statuses, mainFunc, j]() {
+      auto ret = mainFunc((void*)j);
+      if (statuses != nullptr) {
+        statuses[j] = ret;
+      }
+    });
    }
    susp.dismiss();
  
    runThreadsCreatedAllThreads.store(true);
-  for (int i = 0; i < threadIds.size(); ++i) {
-    pthread_join(threadIds[i], statuses == NULL ? NULL : &statuses[i]);
+  for (size_t i = 0; i < threads.size(); ++i) {
+    threads[i].join();
    }
  }
  
-void runThreads(void *(*thread)(void*)) {
-  runThreads(thread, FLAGS_numThreads, NULL);
+void runThreads(void *(*mainFunc)(void*)) {
+  runThreads(mainFunc, FLAGS_numThreads, nullptr);
  }
  
  }
@@ -391,7 +476,7 @@ TEST(Ahm, collision_test) {
    // Doing the same number on each thread so we collide.
    numOpsPerThread = numInserts;
  
-  float sizeFactor = 0.46;
+  float sizeFactor = 0.46f;
    int entrySize = sizeof(KeyT) + sizeof(ValueT);
    VLOG(1) << "Testing " << numInserts << " unique " << entrySize <<
      " Byte entries replicated in " << FLAGS_numThreads <<
@@ -424,7 +509,6 @@ TEST(Ahm, collision_test) {
    // check correctness
    EXPECT_EQ(sizeAHM, numInserts);
    bool success = true;
-  ValueT val;
    for (int i = 0; i < numInserts; ++i) {
      KeyT key = randomizeKey(i);
      success &= (globalAHM->find(key)->second == genVal(key));
@@ -452,8 +536,7 @@ namespace {
  const int kInsertPerThread = 100000;
  int raceFinalSizeEstimate;
  
-void* raceIterateThread(void* jj) {
-  int64_t j = (int64_t) jj;
+void* raceIterateThread(void*) {
    int count = 0;
  
    AHMapT::iterator it = globalAHM->begin();
@@ -462,19 +545,18 @@ void* raceIterateThread(void* jj) {
      ++count;
      if (count > raceFinalSizeEstimate) {
        EXPECT_FALSE("Infinite loop in iterator.");
-      return NULL;
+      return nullptr;
      }
    }
-  return NULL;
+  return nullptr;
  }
  
-void* raceInsertRandomThread(void* jj) {
-  int64_t j = (int64_t) jj;
+void* raceInsertRandomThread(void*) {
    for (int i = 0; i < kInsertPerThread; ++i) {
      KeyT key = rand();
      globalAHM->insert(key, genVal(key));
    }
-  return NULL;
+  return nullptr;
  }
  
  }
@@ -492,18 +574,18 @@ TEST(Ahm, race_insert_iterate_thread_test) {
    globalAHM.reset(new AHMapT(raceFinalSizeEstimate / 9, config));
  
    vector<pthread_t> threadIds;
-  for (int64_t j = 0; j < kInsertThreads + kIterateThreads; j++) {
+  for (int j = 0; j < kInsertThreads + kIterateThreads; j++) {
      pthread_t tid;
      void *(*thread)(void*) =
        (j < kInsertThreads ? raceInsertRandomThread : raceIterateThread);
-    if (pthread_create(&tid, NULL, thread, (void*) j) != 0) {
+    if (pthread_create(&tid, nullptr, thread, nullptr) != 0) {
        LOG(ERROR) << "Could not start thread";
      } else {
        threadIds.push_back(tid);
      }
    }
-  for (int i = 0; i < threadIds.size(); ++i) {
-    pthread_join(threadIds[i], NULL);
+  for (size_t i = 0; i < threadIds.size(); ++i) {
+    pthread_join(threadIds[i], nullptr);
    }
    VLOG(1) << "Ended up with " << globalAHM->numSubMaps() << " submaps";
    VLOG(1) << "Final size of map " << globalAHM->size();
@@ -521,7 +603,7 @@ void* testEraseInsertThread(void*) {
      insertedLevel.store(i, std::memory_order_release);
    }
    insertedLevel.store(kTestEraseInsertions, std::memory_order_release);
-  return NULL;
+  return nullptr;
  }
  
  void* testEraseEraseThread(void*) {
@@ -541,7 +623,9 @@ void* testEraseEraseThread(void*) {
      int currentLevel;
      do {
        currentLevel = insertedLevel.load(std::memory_order_acquire);
-      if (currentLevel == kTestEraseInsertions) currentLevel += lag + 1;
+      if (currentLevel == kTestEraseInsertions) {
+        currentLevel += lag + 1;
+      }
      } while (currentLevel - lag < i);
  
      KeyT key = randomizeKey(i);
@@ -551,7 +635,7 @@ void* testEraseEraseThread(void*) {
        }
      }
    }
-  return NULL;
+  return nullptr;
  }
  
  }
@@ -572,14 +656,14 @@ TEST(Ahm, thread_erase_insert_race) {
      pthread_t tid;
      void *(*thread)(void*) =
        (j < kInsertThreads ? testEraseInsertThread : testEraseEraseThread);
-    if (pthread_create(&tid, NULL, thread, (void*) j) != 0) {
+    if (pthread_create(&tid, nullptr, thread, (void*) j) != 0) {
        LOG(ERROR) << "Could not start thread";
      } else {
        threadIds.push_back(tid);
      }
    }
-  for (int i = 0; i < threadIds.size(); i++) {
-    pthread_join(threadIds[i], NULL);
+  for (size_t i = 0; i < threadIds.size(); i++) {
+    pthread_join(threadIds[i], nullptr);
    }
  
    EXPECT_TRUE(globalAHM->empty());
@@ -592,31 +676,129 @@ TEST(Ahm, thread_erase_insert_race) {
  typedef AtomicHashArray<int32_t, int32_t> AHA;
  AHA::Config configRace;
  auto atomicHashArrayInsertRaceArray = AHA::create(2, configRace);
-void* atomicHashArrayInsertRaceThread(void* j) {
+void* atomicHashArrayInsertRaceThread(void* /* j */) {
    AHA* arr = atomicHashArrayInsertRaceArray.get();
    uintptr_t numInserted = 0;
-  while (!runThreadsCreatedAllThreads.load());
+  while (!runThreadsCreatedAllThreads.load()) {
+    ;
+  }
    for (int i = 0; i < 2; i++) {
      if (arr->insert(RecordT(randomizeKey(i), 0)).first != arr->end()) {
        numInserted++;
      }
    }
-  pthread_exit((void *) numInserted);
+  return (void*)numInserted;
  }
  TEST(Ahm, atomic_hash_array_insert_race) {
    AHA* arr = atomicHashArrayInsertRaceArray.get();
-  int numIterations = 50000, FLAGS_numThreads = 4;
-  void* statuses[FLAGS_numThreads];
+  int numIterations = 5000;
+  constexpr int numThreads = 4;
+  void* statuses[numThreads];
    for (int i = 0; i < numIterations; i++) {
      arr->clear();
-    runThreads(atomicHashArrayInsertRaceThread, FLAGS_numThreads, statuses);
+    runThreads(atomicHashArrayInsertRaceThread, numThreads, statuses);
      EXPECT_GE(arr->size(), 1);
-    for (int j = 0; j < FLAGS_numThreads; j++) {
+    for (int j = 0; j < numThreads; j++) {
        EXPECT_EQ(arr->size(), uintptr_t(statuses[j]));
      }
    }
  }
  
+// Repro for T#5841499. Race between erase() and find() on the same key.
+TEST(Ahm, erase_find_race) {
+  const uint64_t limit = 10000;
+  AtomicHashMap<uint64_t, uint64_t> map(limit + 10);
+  std::atomic<uint64_t> key {1};
+
+  // Invariant: all values are equal to their keys.
+  // At any moment there is one or two consecutive keys in the map.
+
+  std::thread write_thread([&]() {
+    while (true) {
+      uint64_t k = ++key;
+      if (k > limit) {
+        break;
+      }
+      map.insert(k + 1, k + 1);
+      map.erase(k);
+    }
+  });
+
+  std::thread read_thread([&]() {
+    while (true) {
+      uint64_t k = key.load();
+      if (k > limit) {
+        break;
+      }
+
+      auto it = map.find(k);
+      if (it != map.end()) {
+        ASSERT_EQ(k, it->second);
+      }
+    }
+  });
+
+  read_thread.join();
+  write_thread.join();
+}
+
+// Erase right after insert race bug repro (t9130653)
+TEST(Ahm, erase_after_insert_race) {
+  const uint64_t limit = 10000;
+  const size_t num_threads = 100;
+  const size_t num_iters = 500;
+  AtomicHashMap<uint64_t, uint64_t> map(limit + 10);
+
+  std::atomic<bool> go{false};
+  std::vector<std::thread> ts;
+  for (size_t i = 0; i < num_threads; ++i) {
+    ts.emplace_back([&]() {
+        while (!go) {
+          continue;
+        }
+        for (size_t n = 0; n < num_iters; ++n) {
+          map.erase(1);
+          map.insert(1, 1);
+        }
+      });
+  }
+
+  go = true;
+
+  for (auto& t : ts) {
+    t.join();
+  }
+}
+
+// Repro for a bug when iterator didn't skip empty submaps.
+TEST(Ahm, iterator_skips_empty_submaps) {
+  AtomicHashMap<uint64_t, uint64_t>::Config config;
+  config.growthFactor = 1;
+
+  AtomicHashMap<uint64_t, uint64_t> map(1, config);
+
+  map.insert(1, 1);
+  map.insert(2, 2);
+  map.insert(3, 3);
+
+  map.erase(2);
+
+  auto it = map.find(1);
+
+  ASSERT_NE(map.end(), it);
+  ASSERT_EQ(1, it->first);
+  ASSERT_EQ(1, it->second);
+
+  ++it;
+
+  ASSERT_NE(map.end(), it);
+  ASSERT_EQ(3, it->first);
+  ASSERT_EQ(3, it->second);
+
+  ++it;
+  ASSERT_EQ(map.end(), it);
+}
+
  namespace {
  
  void loadGlobalAha() {
@@ -647,11 +829,24 @@ void loadGlobalAhm() {
    EXPECT_EQ(globalAHM->size(), FLAGS_numBMElements);
  }
  
+void loadGlobalQPAhm() {
+  std::cout << "loading global QPAHM with " << FLAGS_numThreads
+            << " threads...\n";
+  uint64_t start = nowInUsec();
+  globalQPAHM.reset(new QPAHMapT(maxBMElements, qpConfig));
+  numOpsPerThread = FLAGS_numBMElements / FLAGS_numThreads;
+  runThreads(qpInsertThread);
+  uint64_t elapsed = nowInUsec() - start;
+  std::cout << "  took " << elapsed / 1000 << " ms (" <<
+    (elapsed * 1000 / FLAGS_numBMElements) << " ns/insert).\n";
+  EXPECT_EQ(globalQPAHM->size(), FLAGS_numBMElements);
+}
+
  }
  
  BENCHMARK(st_aha_find, iters) {
    CHECK_LE(iters, FLAGS_numBMElements);
-  for (int i = 0; i < iters; i++) {
+  for (size_t i = 0; i < iters; i++) {
      KeyT key = randomizeKey(i);
      folly::doNotOptimizeAway(globalAHA->find(key)->second);
    }
@@ -659,12 +854,20 @@ BENCHMARK(st_aha_find, iters) {
  
  BENCHMARK(st_ahm_find, iters) {
    CHECK_LE(iters, FLAGS_numBMElements);
-  for (int i = 0; i < iters; i++) {
+  for (size_t i = 0; i < iters; i++) {
      KeyT key = randomizeKey(i);
      folly::doNotOptimizeAway(globalAHM->find(key)->second);
    }
  }
  
+BENCHMARK(st_qpahm_find, iters) {
+  CHECK_LE(iters, FLAGS_numBMElements);
+  for (size_t i = 0; i < iters; i++) {
+    KeyT key = randomizeKey(i);
+    folly::doNotOptimizeAway(globalQPAHM->find(key)->second);
+  }
+}
+
  BENCHMARK_DRAW_LINE()
  
  BENCHMARK(mt_ahm_miss, iters) {
@@ -672,7 +875,9 @@ BENCHMARK(mt_ahm_miss, iters) {
    numOpsPerThread = iters / FLAGS_numThreads;
    runThreads([](void* jj) -> void* {
      int64_t j = (int64_t) jj;
-    while (!runThreadsCreatedAllThreads.load());
+    while (!runThreadsCreatedAllThreads.load()) {
+      ;
+    }
      for (int i = 0; i < numOpsPerThread; ++i) {
        KeyT key = i + j * numOpsPerThread * 100;
        folly::doNotOptimizeAway(globalAHM->find(key) == globalAHM->end());
@@ -681,20 +886,46 @@ BENCHMARK(mt_ahm_miss, iters) {
    });
  }
  
+BENCHMARK(mt_qpahm_miss, iters) {
+  CHECK_LE(iters, FLAGS_numBMElements);
+  numOpsPerThread = iters / FLAGS_numThreads;
+  runThreads([](void* jj) -> void* {
+    int64_t j = (int64_t) jj;
+    while (!runThreadsCreatedAllThreads.load()) {
+      ;
+    }
+    for (int i = 0; i < numOpsPerThread; ++i) {
+      KeyT key = i + j * numOpsPerThread * 100;
+      folly::doNotOptimizeAway(globalQPAHM->find(key) == globalQPAHM->end());
+    }
+    return nullptr;
+  });
+}
+
  BENCHMARK(st_ahm_miss, iters) {
    CHECK_LE(iters, FLAGS_numBMElements);
-  for (int i = 0; i < iters; i++) {
+  for (size_t i = 0; i < iters; i++) {
      KeyT key = randomizeKey(i + iters * 100);
      folly::doNotOptimizeAway(globalAHM->find(key) == globalAHM->end());
    }
  }
  
+BENCHMARK(st_qpahm_miss, iters) {
+  CHECK_LE(iters, FLAGS_numBMElements);
+  for (size_t i = 0; i < iters; i++) {
+    KeyT key = randomizeKey(i + iters * 100);
+    folly::doNotOptimizeAway(globalQPAHM->find(key) == globalQPAHM->end());
+  }
+}
+
  BENCHMARK(mt_ahm_find_insert_mix, iters) {
    CHECK_LE(iters, FLAGS_numBMElements);
    numOpsPerThread = iters / FLAGS_numThreads;
    runThreads([](void* jj) -> void* {
      int64_t j = (int64_t) jj;
-    while (!runThreadsCreatedAllThreads.load());
+    while (!runThreadsCreatedAllThreads.load()) {
+      ;
+    }
      for (int i = 0; i < numOpsPerThread; ++i) {
        if (i % 128) {  // ~1% insert mix
          KeyT key = randomizeKey(i + j * numOpsPerThread);
@@ -708,12 +939,36 @@ BENCHMARK(mt_ahm_find_insert_mix, iters) {
    });
  }
  
+
+BENCHMARK(mt_qpahm_find_insert_mix, iters) {
+  CHECK_LE(iters, FLAGS_numBMElements);
+  numOpsPerThread = iters / FLAGS_numThreads;
+  runThreads([](void* jj) -> void* {
+    int64_t j = (int64_t) jj;
+    while (!runThreadsCreatedAllThreads.load()) {
+      ;
+    }
+    for (int i = 0; i < numOpsPerThread; ++i) {
+      if (i % 128) {  // ~1% insert mix
+        KeyT key = randomizeKey(i + j * numOpsPerThread);
+        folly::doNotOptimizeAway(globalQPAHM->find(key)->second);
+      } else {
+        KeyT key = randomizeKey(i + j * numOpsPerThread * 100);
+        globalQPAHM->insert(key, genVal(key));
+      }
+    }
+    return nullptr;
+  });
+}
+
  BENCHMARK(mt_aha_find, iters) {
    CHECK_LE(iters, FLAGS_numBMElements);
    numOpsPerThread = iters / FLAGS_numThreads;
    runThreads([](void* jj) -> void* {
        int64_t j = (int64_t) jj;
-      while (!runThreadsCreatedAllThreads.load());
+      while (!runThreadsCreatedAllThreads.load()) {
+        ;
+      }
        for (int i = 0; i < numOpsPerThread; ++i) {
          KeyT key = randomizeKey(i + j * numOpsPerThread);
          folly::doNotOptimizeAway(globalAHA->find(key)->second);
@@ -727,7 +982,9 @@ BENCHMARK(mt_ahm_find, iters) {
    numOpsPerThread = iters / FLAGS_numThreads;
    runThreads([](void* jj) -> void* {
      int64_t j = (int64_t) jj;
-    while (!runThreadsCreatedAllThreads.load());
+    while (!runThreadsCreatedAllThreads.load()) {
+      ;
+    }
      for (int i = 0; i < numOpsPerThread; ++i) {
        KeyT key = randomizeKey(i + j * numOpsPerThread);
        folly::doNotOptimizeAway(globalAHM->find(key)->second);
@@ -736,9 +993,25 @@ BENCHMARK(mt_ahm_find, iters) {
    });
  }
  
+BENCHMARK(mt_qpahm_find, iters) {
+  CHECK_LE(iters, FLAGS_numBMElements);
+  numOpsPerThread = iters / FLAGS_numThreads;
+  runThreads([](void* jj) -> void* {
+    int64_t j = (int64_t) jj;
+    while (!runThreadsCreatedAllThreads.load()) {
+      ;
+    }
+    for (int i = 0; i < numOpsPerThread; ++i) {
+      KeyT key = randomizeKey(i + j * numOpsPerThread);
+      folly::doNotOptimizeAway(globalQPAHM->find(key)->second);
+    }
+    return nullptr;
+  });
+}
+
  KeyT k;
  BENCHMARK(st_baseline_modulus_and_random, iters) {
-  for (int i = 0; i < iters; ++i) {
+  for (size_t i = 0; i < iters; ++i) {
      k = randomizeKey(i) % iters;
    }
  }
@@ -753,12 +1026,31 @@ BENCHMARK(mt_ahm_insert, iters) {
    runThreads(insertThread);
  }
  
+BENCHMARK(mt_qpahm_insert, iters) {
+  BENCHMARK_SUSPEND {
+    globalQPAHM.reset(new QPAHMapT(int(iters * LF), qpConfig));
+    numOpsPerThread = iters / FLAGS_numThreads;
+  }
+  runThreads(qpInsertThread);
+}
+
  BENCHMARK(st_ahm_insert, iters) {
    folly::BenchmarkSuspender susp;
    std::unique_ptr<AHMapT> ahm(new AHMapT(int(iters * LF), config));
    susp.dismiss();
  
-  for (int i = 0; i < iters; i++) {
+  for (size_t i = 0; i < iters; i++) {
+    KeyT key = randomizeKey(i);
+    ahm->insert(key, genVal(key));
+  }
+}
+
+BENCHMARK(st_qpahm_insert, iters) {
+  folly::BenchmarkSuspender susp;
+  std::unique_ptr<QPAHMapT> ahm(new QPAHMapT(int(iters * LF), qpConfig));
+  susp.dismiss();
+
+  for (size_t i = 0; i < iters; i++) {
      KeyT key = randomizeKey(i);
      ahm->insert(key, genVal(key));
    }
@@ -766,22 +1058,24 @@ BENCHMARK(st_ahm_insert, iters) {
  
  void benchmarkSetup() {
    config.maxLoadFactor = FLAGS_maxLoadFactor;
+  qpConfig.maxLoadFactor = FLAGS_maxLoadFactor;
    configRace.maxLoadFactor = 0.5;
    int numCores = sysconf(_SC_NPROCESSORS_ONLN);
    loadGlobalAha();
    loadGlobalAhm();
+  loadGlobalQPAhm();
    string numIters = folly::to<string>(
      std::min(1000000, int(FLAGS_numBMElements)));
  
-  google::SetCommandLineOptionWithMode(
-    "bm_max_iters", numIters.c_str(), google::SET_FLAG_IF_DEFAULT
+  gflags::SetCommandLineOptionWithMode(
+    "bm_max_iters", numIters.c_str(), gflags::SET_FLAG_IF_DEFAULT
    );
-  google::SetCommandLineOptionWithMode(
-    "bm_min_iters", numIters.c_str(), google::SET_FLAG_IF_DEFAULT
+  gflags::SetCommandLineOptionWithMode(
+    "bm_min_iters", numIters.c_str(), gflags::SET_FLAG_IF_DEFAULT
    );
    string numCoresStr = folly::to<string>(numCores);
-  google::SetCommandLineOptionWithMode(
-    "numThreads", numCoresStr.c_str(), google::SET_FLAG_IF_DEFAULT
+  gflags::SetCommandLineOptionWithMode(
+    "numThreads", numCoresStr.c_str(), gflags::SET_FLAG_IF_DEFAULT
    );
  
    std::cout << "\nRunning AHM benchmarks on machine with " << numCores
@@ -793,7 +1087,7 @@ void benchmarkSetup() {
  
  int main(int argc, char** argv) {
    testing::InitGoogleTest(&argc, argv);
-  google::ParseCommandLineFlags(&argc, &argv, true);
+  gflags::ParseCommandLineFlags(&argc, &argv, true);
    auto ret = RUN_ALL_TESTS();
    if (!ret && FLAGS_benchmark) {
      benchmarkSetup();
@@ -803,28 +1097,38 @@ int main(int argc, char** argv) {
  }
  
  /*
-Benchmarks run on dual Xeon X5650's @ 2.67GHz w/hyperthreading enabled
-  (12 physical cores, 12 MB cache, 72 GB RAM)
+loading global AHA with 8 threads...
+  took 487 ms (40 ns/insert).
+loading global AHM with 8 threads...
+  took 478 ms (39 ns/insert).
+loading global QPAHM with 8 threads...
+  took 478 ms (39 ns/insert).
  
  Running AHM benchmarks on machine with 24 logical cores.
    num elements per map: 12000000
    num threads for mt tests: 24
    AHM load factor: 0.75
  
-Benchmark                               Iters   Total t    t/iter iter/sec
-------------------------------------------------------------------------------
-Comparing benchmarks: BM_mt_aha_find,BM_mt_ahm_find
-*       BM_mt_aha_find                1000000  7.767 ms  7.767 ns  122.8 M
- +0.81% BM_mt_ahm_find                1000000   7.83 ms   7.83 ns  121.8 M
-------------------------------------------------------------------------------
-Comparing benchmarks: BM_st_aha_find,BM_st_ahm_find
-*       BM_st_aha_find                1000000  57.83 ms  57.83 ns  16.49 M
- +77.9% BM_st_ahm_find                1000000  102.9 ms  102.9 ns   9.27 M
-------------------------------------------------------------------------------
-BM_mt_ahm_miss                        1000000  2.937 ms  2.937 ns  324.7 M
-BM_st_ahm_miss                        1000000  164.2 ms  164.2 ns  5.807 M
-BM_mt_ahm_find_insert_mix             1000000  8.797 ms  8.797 ns  108.4 M
-BM_mt_ahm_insert                      1000000  17.39 ms  17.39 ns  54.83 M
-BM_st_ahm_insert                      1000000  106.8 ms  106.8 ns   8.93 M
-BM_st_baseline_modulus_and_rando      1000000  6.223 ms  6.223 ns  153.2 M
+============================================================================
+folly/test/AtomicHashMapTest.cpp                relative  time/iter  iters/s
+============================================================================
+st_aha_find                                                 92.63ns   10.80M
+st_ahm_find                                                107.78ns    9.28M
+st_qpahm_find                                               90.69ns   11.03M
+----------------------------------------------------------------------------
+mt_ahm_miss                                                  2.09ns  477.36M
+mt_qpahm_miss                                                1.37ns  728.82M
+st_ahm_miss                                                241.07ns    4.15M
+st_qpahm_miss                                              223.17ns    4.48M
+mt_ahm_find_insert_mix                                       8.05ns  124.24M
+mt_qpahm_find_insert_mix                                     9.10ns  109.85M
+mt_aha_find                                                  6.82ns  146.68M
+mt_ahm_find                                                  7.95ns  125.77M
+mt_qpahm_find                                                6.81ns  146.83M
+st_baseline_modulus_and_random                               6.02ns  166.03M
+mt_ahm_insert                                               14.29ns   69.97M
+mt_qpahm_insert                                             11.68ns   85.61M
+st_ahm_insert                                              125.39ns    7.98M
+st_qpahm_insert                                            128.76ns    7.77M
+============================================================================
  */