From 99de4c5fde6fce17ac9dab77b2b99ceffce999dc Mon Sep 17 00:00:00 2001 From: Xiaofan Yang Date: Thu, 29 Oct 2015 12:27:58 -0700 Subject: [PATCH] add config to allow using quadratic probing Summary: In my use case, 1.5 billion keys with loadFactor 0.8, the linear probing performs really bad. Reviewed By: nbronson Differential Revision: D2579243 fb-gh-sync-id: 5081356de55f770823a4afad55bf7e2114b4e313 --- folly/AtomicHashArray-inl.h | 54 +++++---- folly/AtomicHashArray.h | 42 +++++-- folly/AtomicHashMap-inl.h | 175 +++++++++++++++++++---------- folly/AtomicHashMap.h | 17 ++- folly/test/AtomicHashArrayTest.cpp | 54 ++++++++- folly/test/AtomicHashMapTest.cpp | 157 ++++++++++++++++++++++---- 6 files changed, 386 insertions(+), 113 deletions(-) diff --git a/folly/AtomicHashArray-inl.h b/folly/AtomicHashArray-inl.h index 7d6b13e1..e3b32a04 100644 --- a/folly/AtomicHashArray-inl.h +++ b/folly/AtomicHashArray-inl.h @@ -25,8 +25,8 @@ namespace folly { // AtomicHashArray private constructor -- template -AtomicHashArray:: + class HashFcn, class EqualFcn, class Allocator, class ProbeFcn> +AtomicHashArray:: AtomicHashArray(size_t capacity, KeyT emptyKey, KeyT lockedKey, KeyT erasedKey, double _maxLoadFactor, size_t cacheSize) : capacity_(capacity), @@ -44,17 +44,17 @@ AtomicHashArray(size_t capacity, KeyT emptyKey, KeyT lockedKey, * ret.index is set to capacity_. */ template + class HashFcn, class EqualFcn, class Allocator, class ProbeFcn> typename AtomicHashArray::SimpleRetT -AtomicHashArray:: + HashFcn, EqualFcn, Allocator, ProbeFcn>::SimpleRetT +AtomicHashArray:: findInternal(const KeyT key_in) { DCHECK_NE(key_in, kEmptyKey_); DCHECK_NE(key_in, kLockedKey_); DCHECK_NE(key_in, kErasedKey_); for (size_t idx = keyToAnchorIdx(key_in), numProbes = 0; ; - idx = probeNext(idx, numProbes)) { + idx = ProbeFcn()(idx, numProbes, capacity_)) { const KeyT key = acquireLoadKey(cells_[idx]); if (LIKELY(EqualFcn()(key, key_in))) { return SimpleRetT(idx, true); @@ -63,6 +63,8 @@ findInternal(const KeyT key_in) { // if we hit an empty element, this key does not exist return SimpleRetT(capacity_, false); } + // NOTE: the way we count numProbes must be same in find(), insert(), + // and erase(). Otherwise it may break probing. ++numProbes; if (UNLIKELY(numProbes >= capacity_)) { // probed every cell...fail @@ -82,11 +84,11 @@ findInternal(const KeyT key_in) { * default. */ template + class HashFcn, class EqualFcn, class Allocator, class ProbeFcn> template typename AtomicHashArray::SimpleRetT -AtomicHashArray:: + HashFcn, EqualFcn, Allocator, ProbeFcn>::SimpleRetT +AtomicHashArray:: insertInternal(KeyT key_in, ArgTs&&... vCtorArgs) { const short NO_NEW_INSERTS = 1; const short NO_PENDING_INSERTS = 2; @@ -174,13 +176,16 @@ insertInternal(KeyT key_in, ArgTs&&... vCtorArgs) { continue; } + + // NOTE: the way we count numProbes must be same in find(), + // insert(), and erase(). Otherwise it may break probing. ++numProbes; if (UNLIKELY(numProbes >= capacity_)) { // probed every cell...fail return SimpleRetT(capacity_, false); } - idx = probeNext(idx, numProbes); + idx = ProbeFcn()(idx, numProbes, capacity_); } } @@ -196,15 +201,16 @@ insertInternal(KeyT key_in, ArgTs&&... vCtorArgs) { * touch it either. */ template -size_t AtomicHashArray:: + class HashFcn, class EqualFcn, class Allocator, class ProbeFcn> +size_t AtomicHashArray:: erase(KeyT key_in) { CHECK_NE(key_in, kEmptyKey_); CHECK_NE(key_in, kLockedKey_); CHECK_NE(key_in, kErasedKey_); + for (size_t idx = keyToAnchorIdx(key_in), numProbes = 0; ; - idx = probeNext(idx, numProbes)) { + idx = ProbeFcn()(idx, numProbes, capacity_)) { DCHECK_LT(idx, capacity_); value_type* cell = &cells_[idx]; KeyT currentKey = acquireLoadKey(*cell); @@ -231,6 +237,9 @@ erase(KeyT key_in) { // If another thread succeeds in erasing our key, we'll stop our search. return 0; } + + // NOTE: the way we count numProbes must be same in find(), insert(), + // and erase(). Otherwise it may break probing. ++numProbes; if (UNLIKELY(numProbes >= capacity_)) { // probed every cell...fail @@ -240,10 +249,10 @@ erase(KeyT key_in) { } template + class HashFcn, class EqualFcn, class Allocator, class ProbeFcn> typename AtomicHashArray::SmartPtr -AtomicHashArray:: + HashFcn, EqualFcn, Allocator, ProbeFcn>::SmartPtr +AtomicHashArray:: create(size_t maxSize, const Config& c) { CHECK_LE(c.maxLoadFactor, 1.0); CHECK_GT(c.maxLoadFactor, 0.0); @@ -282,8 +291,8 @@ create(size_t maxSize, const Config& c) { } template -void AtomicHashArray:: + class HashFcn, class EqualFcn, class Allocator, class ProbeFcn> +void AtomicHashArray:: destroy(AtomicHashArray* p) { assert(p); @@ -301,8 +310,8 @@ destroy(AtomicHashArray* p) { // clear -- clears all keys and values in the map and resets all counters template -void AtomicHashArray:: + class HashFcn, class EqualFcn, class Allocator, class ProbeFcn> +void AtomicHashArray:: clear() { FOR_EACH_RANGE(i, 0, capacity_) { if (cells_[i].first != kEmptyKey_) { @@ -321,9 +330,10 @@ clear() { // Iterator implementation template + class HashFcn, class EqualFcn, class Allocator, class ProbeFcn> template -struct AtomicHashArray::aha_iterator +struct AtomicHashArray:: + aha_iterator : boost::iterator_facade, IterVal, boost::forward_traversal_tag> diff --git a/folly/AtomicHashArray.h b/folly/AtomicHashArray.h index fbe976d2..71ef2c76 100644 --- a/folly/AtomicHashArray.h +++ b/folly/AtomicHashArray.h @@ -42,16 +42,38 @@ namespace folly { +struct AtomicHashArrayLinearProbeFcn +{ + inline size_t operator()(size_t idx, size_t numProbes, size_t capacity) const{ + idx += 1; // linear probing + + // Avoid modulus because it's slow + return LIKELY(idx < capacity) ? idx : (idx - capacity); + } +}; + +struct AtomicHashArrayQuadraticProbeFcn +{ + inline size_t operator()(size_t idx, size_t numProbes, size_t capacity) const{ + idx += numProbes; // quadratic probing + + // Avoid modulus because it's slow + return LIKELY(idx < capacity) ? idx : (idx - capacity); + } +}; + template , class EqualFcn = std::equal_to, - class Allocator = std::allocator> + class Allocator = std::allocator, + class ProbeFcn = AtomicHashArrayLinearProbeFcn> class AtomicHashMap; template , class EqualFcn = std::equal_to, - class Allocator = std::allocator> + class Allocator = std::allocator, + class ProbeFcn = AtomicHashArrayLinearProbeFcn> class AtomicHashArray : boost::noncopyable { static_assert((std::is_convertible::value || std::is_convertible::value || @@ -240,13 +262,20 @@ class AtomicHashArray : boost::noncopyable { /* Private data and helper functions... */ private: - friend class AtomicHashMap; +friend class AtomicHashMap; struct SimpleRetT { size_t idx; bool success; SimpleRetT(size_t i, bool s) : idx(i), success(s) {} SimpleRetT() = default; }; + + template SimpleRetT insertInternal(KeyT key, ArgTs&&... vCtorArgs); @@ -307,12 +336,7 @@ class AtomicHashArray : boost::noncopyable { return LIKELY(probe < capacity_) ? probe : hashVal % capacity_; } - inline size_t probeNext(size_t idx, size_t /*numProbes*/) { - //idx += numProbes; // quadratic probing - idx += 1; // linear probing - // Avoid modulus because it's slow - return LIKELY(idx < capacity_) ? idx : (idx - capacity_); - } + }; // AtomicHashArray } // namespace folly diff --git a/folly/AtomicHashMap-inl.h b/folly/AtomicHashMap-inl.h index 6d8bb917..61c23b14 100644 --- a/folly/AtomicHashMap-inl.h +++ b/folly/AtomicHashMap-inl.h @@ -24,9 +24,13 @@ namespace folly { // AtomicHashMap constructor -- Atomic wrapper that allows growth // This class has a lot of overhead (184 Bytes) so only use for big maps -template -AtomicHashMap:: +template +AtomicHashMap:: AtomicHashMap(size_t finalSizeEst, const Config& config) : kGrowthFrac_(config.growthFactor < 0 ? 1.0 - config.maxLoadFactor : config.growthFactor) { @@ -41,12 +45,16 @@ AtomicHashMap(size_t finalSizeEst, const Config& config) } // emplace -- -template +template template std::pair::iterator, bool> -AtomicHashMap:: + EqualFcn, Allocator, ProbeFcn>::iterator, bool> +AtomicHashMap:: emplace(key_type k, ArgTs&&... vCtorArgs) { SimpleRetT ret = insertInternal(k, std::forward(vCtorArgs)...); SubMap* subMap = subMaps_[ret.i].load(std::memory_order_relaxed); @@ -55,11 +63,16 @@ emplace(key_type k, ArgTs&&... vCtorArgs) { } // insertInternal -- Allocates new sub maps as existing ones fill up. -template +template template -typename AtomicHashMap::SimpleRetT -AtomicHashMap:: +typename AtomicHashMap:: + SimpleRetT +AtomicHashMap:: insertInternal(key_type key, ArgTs&&... vCtorArgs) { beginInsertInternal: auto nextMapIdx = // this maintains our state @@ -133,11 +146,16 @@ insertInternal(key_type key, ArgTs&&... vCtorArgs) { } // find -- -template -typename AtomicHashMap::iterator -AtomicHashMap:: -find(KeyT k) { +template +typename AtomicHashMap:: + iterator +AtomicHashMap::find( + KeyT k) { SimpleRetT ret = findInternal(k); if (!ret.success) { return end(); @@ -146,21 +164,30 @@ find(KeyT k) { return iterator(this, ret.i, subMap->makeIter(ret.j)); } -template +template typename AtomicHashMap::const_iterator -AtomicHashMap:: + HashFcn, EqualFcn, Allocator, ProbeFcn>::const_iterator +AtomicHashMap:: find(KeyT k) const { return const_cast(this)->find(k); } // findInternal -- -template -typename AtomicHashMap::SimpleRetT -AtomicHashMap:: -findInternal(const KeyT k) const { +template +typename AtomicHashMap:: + SimpleRetT +AtomicHashMap:: + findInternal(const KeyT k) const { SubMap* const primaryMap = subMaps_[0].load(std::memory_order_relaxed); typename SubMap::SimpleRetT ret = primaryMap->findInternal(k); if (LIKELY(ret.idx != primaryMap->capacity_)) { @@ -180,10 +207,15 @@ findInternal(const KeyT k) const { } // findAtInternal -- see encodeIndex() for details. -template -typename AtomicHashMap::SimpleRetT -AtomicHashMap:: +template +typename AtomicHashMap:: + SimpleRetT +AtomicHashMap:: findAtInternal(uint32_t idx) const { uint32_t subMapIdx, subMapOffset; if (idx & kSecondaryMapBit_) { @@ -201,10 +233,15 @@ findAtInternal(uint32_t idx) const { } // erase -- -template -typename AtomicHashMap::size_type -AtomicHashMap:: +template +typename AtomicHashMap:: + size_type +AtomicHashMap:: erase(const KeyT k) { int const numMaps = numMapsAllocated_.load(std::memory_order_acquire); FOR_EACH_RANGE(i, 0, numMaps) { @@ -218,9 +255,13 @@ erase(const KeyT k) { } // capacity -- summation of capacities of all submaps -template -size_t AtomicHashMap:: +template +size_t AtomicHashMap:: capacity() const { size_t totalCap(0); int const numMaps = numMapsAllocated_.load(std::memory_order_acquire); @@ -232,9 +273,13 @@ capacity() const { // spaceRemaining -- // number of new insertions until current submaps are all at max load -template -size_t AtomicHashMap:: +template +size_t AtomicHashMap:: spaceRemaining() const { size_t spaceRem(0); int const numMaps = numMapsAllocated_.load(std::memory_order_acquire); @@ -250,9 +295,13 @@ spaceRemaining() const { // clear -- Wipes all keys and values from primary map and destroys // all secondary maps. Not thread safe. -template -void AtomicHashMap:: +template +void AtomicHashMap:: clear() { subMaps_[0].load(std::memory_order_relaxed)->clear(); int const numMaps = numMapsAllocated_ @@ -267,9 +316,13 @@ clear() { } // size -- -template -size_t AtomicHashMap:: +template +size_t AtomicHashMap:: size() const { size_t totalSize(0); int const numMaps = numMapsAllocated_.load(std::memory_order_acquire); @@ -297,10 +350,15 @@ size() const { // 31 1 // 27-30 which subMap // 0-26 subMap offset (index_ret input) -template -inline uint32_t AtomicHashMap:: -encodeIndex(uint32_t subMap, uint32_t offset) { +template +inline uint32_t +AtomicHashMap:: + encodeIndex(uint32_t subMap, uint32_t offset) { DCHECK_EQ(offset & kSecondaryMapBit_, 0); // offset can't be too big if (subMap == 0) return offset; // Make sure subMap isn't too big @@ -315,14 +373,17 @@ encodeIndex(uint32_t subMap, uint32_t offset) { // Iterator implementation -template -template -struct AtomicHashMap::ahm_iterator - : boost::iterator_facade, - IterVal, - boost::forward_traversal_tag> -{ +template +template +struct AtomicHashMap:: + ahm_iterator : boost::iterator_facade, + IterVal, + boost::forward_traversal_tag> { explicit ahm_iterator() : ahm_(0) {} // Conversion ctor for interoperability between const_iterator and diff --git a/folly/AtomicHashMap.h b/folly/AtomicHashMap.h index 90c59661..c6f06bd8 100644 --- a/folly/AtomicHashMap.h +++ b/folly/AtomicHashMap.h @@ -156,9 +156,10 @@ struct AtomicHashMapFullError : std::runtime_error { }; template + class HashFcn, class EqualFcn, class Allocator, class ProbeFcn> class AtomicHashMap : boost::noncopyable { - typedef AtomicHashArray SubMap; +typedef AtomicHashArray + SubMap; public: typedef KeyT key_type; @@ -422,6 +423,18 @@ class AtomicHashMap : boost::noncopyable { }; // AtomicHashMap +template , + class EqualFcn = std::equal_to, + class Allocator = std::allocator> +using QuadraticProbingAtomicHashMap = + AtomicHashMap; } // namespace folly #include diff --git a/folly/test/AtomicHashArrayTest.cpp b/folly/test/AtomicHashArrayTest.cpp index 3c7c32f2..1f83bc75 100644 --- a/folly/test/AtomicHashArrayTest.cpp +++ b/folly/test/AtomicHashArrayTest.cpp @@ -96,10 +96,13 @@ pair createEntry(int i) { to(i + 3)); } -template> +template , + class ProbeFcn = AtomicHashArrayLinearProbeFcn> void testMap() { typedef AtomicHashArray, - std::equal_to, Allocator> MyArr; + std::equal_to, Allocator, ProbeFcn> MyArr; auto arr = MyArr::create(150); map ref; for (int i = 0; i < 100; ++i) { @@ -144,10 +147,13 @@ void testMap() { } } -template> +template, + class ProbeFcn = AtomicHashArrayLinearProbeFcn> void testNoncopyableMap() { typedef AtomicHashArray, std::hash, - std::equal_to, Allocator> MyArr; + std::equal_to, Allocator, ProbeFcn> MyArr; + auto arr = MyArr::create(250); for (int i = 0; i < 100; i++) { arr->insert(make_pair(i,std::unique_ptr(new ValueT(i)))); @@ -168,34 +174,74 @@ void testNoncopyableMap() { TEST(Aha, InsertErase_i32_i32) { testMap(); testMap>(); + testMap, AtomicHashArrayQuadraticProbeFcn>(); + testMap, AtomicHashArrayQuadraticProbeFcn>(); testNoncopyableMap(); testNoncopyableMap>(); + testNoncopyableMap, AtomicHashArrayQuadraticProbeFcn>(); + testNoncopyableMap, AtomicHashArrayQuadraticProbeFcn>(); } TEST(Aha, InsertErase_i64_i32) { testMap(); testMap>(); + testMap, AtomicHashArrayQuadraticProbeFcn>(); + testMap, AtomicHashArrayQuadraticProbeFcn>(); testNoncopyableMap(); testNoncopyableMap>(); + testNoncopyableMap, AtomicHashArrayQuadraticProbeFcn>(); + testNoncopyableMap, AtomicHashArrayQuadraticProbeFcn>(); } TEST(Aha, InsertErase_i64_i64) { testMap(); testMap>(); + testMap, AtomicHashArrayQuadraticProbeFcn>(); + testMap, AtomicHashArrayQuadraticProbeFcn>(); testNoncopyableMap(); testNoncopyableMap>(); + testNoncopyableMap, AtomicHashArrayQuadraticProbeFcn>(); + testNoncopyableMap, AtomicHashArrayQuadraticProbeFcn>(); } TEST(Aha, InsertErase_i32_i64) { testMap(); testMap>(); + testMap, AtomicHashArrayQuadraticProbeFcn>(); + testMap, AtomicHashArrayQuadraticProbeFcn>(); testNoncopyableMap(); testNoncopyableMap>(); + testNoncopyableMap, AtomicHashArrayQuadraticProbeFcn>(); + testNoncopyableMap, AtomicHashArrayQuadraticProbeFcn>(); } TEST(Aha, InsertErase_i32_str) { testMap(); testMap>(); + testMap, AtomicHashArrayQuadraticProbeFcn>(); + testMap, AtomicHashArrayQuadraticProbeFcn>(); } TEST(Aha, InsertErase_i64_str) { testMap(); testMap>(); + testMap, AtomicHashArrayQuadraticProbeFcn>(); + testMap, AtomicHashArrayQuadraticProbeFcn>(); } TEST(Aha, Create_cstr_i64) { diff --git a/folly/test/AtomicHashMapTest.cpp b/folly/test/AtomicHashMapTest.cpp index e173a4f4..46223107 100644 --- a/folly/test/AtomicHashMapTest.cpp +++ b/folly/test/AtomicHashMapTest.cpp @@ -101,10 +101,12 @@ typedef int32_t ValueT; typedef AtomicHashMap AHMapT; typedef AHMapT::value_type RecordT; typedef AtomicHashArray AHArrayT; - AHArrayT::Config config; +typedef folly::QuadraticProbingAtomicHashMap QPAHMapT; +QPAHMapT::Config qpConfig; static AHArrayT::SmartPtr globalAHA(nullptr); static std::unique_ptr globalAHM; +static std::unique_ptr globalQPAHM; // Generate a deterministic value based on an input key static int genVal(int key) { @@ -353,6 +355,15 @@ void* insertThread(void* jj) { return nullptr; } +void* qpInsertThread(void* jj) { + int64_t j = (int64_t) jj; + for (int i = 0; i < numOpsPerThread; ++i) { + KeyT key = randomizeKey(i + j * numOpsPerThread); + globalQPAHM->insert(key, genVal(key)); + } + return nullptr; +} + void* insertThreadArr(void* jj) { int64_t j = (int64_t) jj; for (int i = 0; i < numOpsPerThread; ++i) { @@ -715,6 +726,19 @@ void loadGlobalAhm() { EXPECT_EQ(globalAHM->size(), FLAGS_numBMElements); } +void loadGlobalQPAhm() { + std::cout << "loading global QPAHM with " << FLAGS_numThreads + << " threads...\n"; + uint64_t start = nowInUsec(); + globalQPAHM.reset(new QPAHMapT(maxBMElements, qpConfig)); + numOpsPerThread = FLAGS_numBMElements / FLAGS_numThreads; + runThreads(qpInsertThread); + uint64_t elapsed = nowInUsec() - start; + std::cout << " took " << elapsed / 1000 << " ms (" << + (elapsed * 1000 / FLAGS_numBMElements) << " ns/insert).\n"; + EXPECT_EQ(globalQPAHM->size(), FLAGS_numBMElements); +} + } BENCHMARK(st_aha_find, iters) { @@ -733,6 +757,14 @@ BENCHMARK(st_ahm_find, iters) { } } +BENCHMARK(st_qpahm_find, iters) { + CHECK_LE(iters, FLAGS_numBMElements); + for (size_t i = 0; i < iters; i++) { + KeyT key = randomizeKey(i); + folly::doNotOptimizeAway(globalQPAHM->find(key)->second); + } +} + BENCHMARK_DRAW_LINE() BENCHMARK(mt_ahm_miss, iters) { @@ -749,6 +781,20 @@ BENCHMARK(mt_ahm_miss, iters) { }); } +BENCHMARK(mt_qpahm_miss, iters) { + CHECK_LE(iters, FLAGS_numBMElements); + numOpsPerThread = iters / FLAGS_numThreads; + runThreads([](void* jj) -> void* { + int64_t j = (int64_t) jj; + while (!runThreadsCreatedAllThreads.load()); + for (int i = 0; i < numOpsPerThread; ++i) { + KeyT key = i + j * numOpsPerThread * 100; + folly::doNotOptimizeAway(globalQPAHM->find(key) == globalQPAHM->end()); + } + return nullptr; + }); +} + BENCHMARK(st_ahm_miss, iters) { CHECK_LE(iters, FLAGS_numBMElements); for (size_t i = 0; i < iters; i++) { @@ -757,6 +803,14 @@ BENCHMARK(st_ahm_miss, iters) { } } +BENCHMARK(st_qpahm_miss, iters) { + CHECK_LE(iters, FLAGS_numBMElements); + for (size_t i = 0; i < iters; i++) { + KeyT key = randomizeKey(i + iters * 100); + folly::doNotOptimizeAway(globalQPAHM->find(key) == globalQPAHM->end()); + } +} + BENCHMARK(mt_ahm_find_insert_mix, iters) { CHECK_LE(iters, FLAGS_numBMElements); numOpsPerThread = iters / FLAGS_numThreads; @@ -776,6 +830,26 @@ BENCHMARK(mt_ahm_find_insert_mix, iters) { }); } + +BENCHMARK(mt_qpahm_find_insert_mix, iters) { + CHECK_LE(iters, FLAGS_numBMElements); + numOpsPerThread = iters / FLAGS_numThreads; + runThreads([](void* jj) -> void* { + int64_t j = (int64_t) jj; + while (!runThreadsCreatedAllThreads.load()); + for (int i = 0; i < numOpsPerThread; ++i) { + if (i % 128) { // ~1% insert mix + KeyT key = randomizeKey(i + j * numOpsPerThread); + folly::doNotOptimizeAway(globalQPAHM->find(key)->second); + } else { + KeyT key = randomizeKey(i + j * numOpsPerThread * 100); + globalQPAHM->insert(key, genVal(key)); + } + } + return nullptr; + }); +} + BENCHMARK(mt_aha_find, iters) { CHECK_LE(iters, FLAGS_numBMElements); numOpsPerThread = iters / FLAGS_numThreads; @@ -804,6 +878,20 @@ BENCHMARK(mt_ahm_find, iters) { }); } +BENCHMARK(mt_qpahm_find, iters) { + CHECK_LE(iters, FLAGS_numBMElements); + numOpsPerThread = iters / FLAGS_numThreads; + runThreads([](void* jj) -> void* { + int64_t j = (int64_t) jj; + while (!runThreadsCreatedAllThreads.load()); + for (int i = 0; i < numOpsPerThread; ++i) { + KeyT key = randomizeKey(i + j * numOpsPerThread); + folly::doNotOptimizeAway(globalQPAHM->find(key)->second); + } + return nullptr; + }); +} + KeyT k; BENCHMARK(st_baseline_modulus_and_random, iters) { for (size_t i = 0; i < iters; ++i) { @@ -821,6 +909,14 @@ BENCHMARK(mt_ahm_insert, iters) { runThreads(insertThread); } +BENCHMARK(mt_qpahm_insert, iters) { + BENCHMARK_SUSPEND { + globalQPAHM.reset(new QPAHMapT(int(iters * LF), qpConfig)); + numOpsPerThread = iters / FLAGS_numThreads; + } + runThreads(qpInsertThread); +} + BENCHMARK(st_ahm_insert, iters) { folly::BenchmarkSuspender susp; std::unique_ptr ahm(new AHMapT(int(iters * LF), config)); @@ -832,12 +928,25 @@ BENCHMARK(st_ahm_insert, iters) { } } +BENCHMARK(st_qpahm_insert, iters) { + folly::BenchmarkSuspender susp; + std::unique_ptr ahm(new QPAHMapT(int(iters * LF), qpConfig)); + susp.dismiss(); + + for (size_t i = 0; i < iters; i++) { + KeyT key = randomizeKey(i); + ahm->insert(key, genVal(key)); + } +} + void benchmarkSetup() { config.maxLoadFactor = FLAGS_maxLoadFactor; + qpConfig.maxLoadFactor = FLAGS_maxLoadFactor; configRace.maxLoadFactor = 0.5; int numCores = sysconf(_SC_NPROCESSORS_ONLN); loadGlobalAha(); loadGlobalAhm(); + loadGlobalQPAhm(); string numIters = folly::to( std::min(1000000, int(FLAGS_numBMElements))); @@ -871,28 +980,38 @@ int main(int argc, char** argv) { } /* -Benchmarks run on dual Xeon X5650's @ 2.67GHz w/hyperthreading enabled - (12 physical cores, 12 MB cache, 72 GB RAM) +loading global AHA with 8 threads... + took 487 ms (40 ns/insert). +loading global AHM with 8 threads... + took 478 ms (39 ns/insert). +loading global QPAHM with 8 threads... + took 478 ms (39 ns/insert). Running AHM benchmarks on machine with 24 logical cores. num elements per map: 12000000 num threads for mt tests: 24 AHM load factor: 0.75 -Benchmark Iters Total t t/iter iter/sec ------------------------------------------------------------------------------- -Comparing benchmarks: BM_mt_aha_find,BM_mt_ahm_find -* BM_mt_aha_find 1000000 7.767 ms 7.767 ns 122.8 M - +0.81% BM_mt_ahm_find 1000000 7.83 ms 7.83 ns 121.8 M ------------------------------------------------------------------------------- -Comparing benchmarks: BM_st_aha_find,BM_st_ahm_find -* BM_st_aha_find 1000000 57.83 ms 57.83 ns 16.49 M - +77.9% BM_st_ahm_find 1000000 102.9 ms 102.9 ns 9.27 M ------------------------------------------------------------------------------- -BM_mt_ahm_miss 1000000 2.937 ms 2.937 ns 324.7 M -BM_st_ahm_miss 1000000 164.2 ms 164.2 ns 5.807 M -BM_mt_ahm_find_insert_mix 1000000 8.797 ms 8.797 ns 108.4 M -BM_mt_ahm_insert 1000000 17.39 ms 17.39 ns 54.83 M -BM_st_ahm_insert 1000000 106.8 ms 106.8 ns 8.93 M -BM_st_baseline_modulus_and_rando 1000000 6.223 ms 6.223 ns 153.2 M +============================================================================ +folly/test/AtomicHashMapTest.cpp relative time/iter iters/s +============================================================================ +st_aha_find 92.63ns 10.80M +st_ahm_find 107.78ns 9.28M +st_qpahm_find 90.69ns 11.03M +---------------------------------------------------------------------------- +mt_ahm_miss 2.09ns 477.36M +mt_qpahm_miss 1.37ns 728.82M +st_ahm_miss 241.07ns 4.15M +st_qpahm_miss 223.17ns 4.48M +mt_ahm_find_insert_mix 8.05ns 124.24M +mt_qpahm_find_insert_mix 9.10ns 109.85M +mt_aha_find 6.82ns 146.68M +mt_ahm_find 7.95ns 125.77M +mt_qpahm_find 6.81ns 146.83M +st_baseline_modulus_and_random 6.02ns 166.03M +mt_ahm_insert 14.29ns 69.97M +mt_qpahm_insert 11.68ns 85.61M +st_ahm_insert 125.39ns 7.98M +st_qpahm_insert 128.76ns 7.77M +============================================================================ */ -- 2.34.1