add config to allow using quadratic probing

author Xiaofan Yang <xiaofan@fb.com>

Thu, 29 Oct 2015 19:27:58 +0000 (12:27 -0700)

committer facebook-github-bot-1 <folly-bot@fb.com>

Thu, 29 Oct 2015 20:20:22 +0000 (13:20 -0700)
author Xiaofan Yang <xiaofan@fb.com>
Thu, 29 Oct 2015 19:27:58 +0000 (12:27 -0700)
committer facebook-github-bot-1 <folly-bot@fb.com>
Thu, 29 Oct 2015 20:20:22 +0000 (13:20 -0700)
diff --git a/folly/AtomicHashArray-inl.h b/folly/AtomicHashArray-inl.h

index 7d6b13e1f9e22b03a5a5800fbac27d0576d063ec..e3b32a049bcaefeb34f8097f04a4bec6fec89eb5 100644 (file)
--- a/folly/AtomicHashArray-inl.h
+++ b/folly/AtomicHashArray-inl.h
@@ -25,8 +25,8 @@ namespace folly {
  
  // AtomicHashArray private constructor --
  template <class KeyT, class ValueT,
-          class HashFcn, class EqualFcn, class Allocator>
-AtomicHashArray<KeyT, ValueT, HashFcn, EqualFcn, Allocator>::
+          class HashFcn, class EqualFcn, class Allocator, class ProbeFcn>
+AtomicHashArray<KeyT, ValueT, HashFcn, EqualFcn, Allocator, ProbeFcn>::
  AtomicHashArray(size_t capacity, KeyT emptyKey, KeyT lockedKey,
                  KeyT erasedKey, double _maxLoadFactor, size_t cacheSize)
      : capacity_(capacity),
@@ -44,17 +44,17 @@ AtomicHashArray(size_t capacity, KeyT emptyKey, KeyT lockedKey,
   *   ret.index is set to capacity_.
   */
  template <class KeyT, class ValueT,
-          class HashFcn, class EqualFcn, class Allocator>
+          class HashFcn, class EqualFcn, class Allocator, class ProbeFcn>
  typename AtomicHashArray<KeyT, ValueT,
-         HashFcn, EqualFcn, Allocator>::SimpleRetT
-AtomicHashArray<KeyT, ValueT, HashFcn, EqualFcn, Allocator>::
+         HashFcn, EqualFcn, Allocator, ProbeFcn>::SimpleRetT
+AtomicHashArray<KeyT, ValueT, HashFcn, EqualFcn, Allocator, ProbeFcn>::
  findInternal(const KeyT key_in) {
    DCHECK_NE(key_in, kEmptyKey_);
    DCHECK_NE(key_in, kLockedKey_);
    DCHECK_NE(key_in, kErasedKey_);
    for (size_t idx = keyToAnchorIdx(key_in), numProbes = 0;
         ;
-       idx = probeNext(idx, numProbes)) {
+       idx = ProbeFcn()(idx, numProbes, capacity_)) {
      const KeyT key = acquireLoadKey(cells_[idx]);
      if (LIKELY(EqualFcn()(key, key_in))) {
        return SimpleRetT(idx, true);
@@ -63,6 +63,8 @@ findInternal(const KeyT key_in) {
        // if we hit an empty element, this key does not exist
        return SimpleRetT(capacity_, false);
      }
+    // NOTE: the way we count numProbes must be same in find(), insert(),
+    // and erase(). Otherwise it may break probing.
      ++numProbes;
      if (UNLIKELY(numProbes >= capacity_)) {
        // probed every cell...fail
@@ -82,11 +84,11 @@ findInternal(const KeyT key_in) {
   *   default.
   */
  template <class KeyT, class ValueT,
-          class HashFcn, class EqualFcn, class Allocator>
+          class HashFcn, class EqualFcn, class Allocator, class ProbeFcn>
  template <typename... ArgTs>
  typename AtomicHashArray<KeyT, ValueT,
-         HashFcn, EqualFcn, Allocator>::SimpleRetT
-AtomicHashArray<KeyT, ValueT, HashFcn, EqualFcn, Allocator>::
+         HashFcn, EqualFcn, Allocator, ProbeFcn>::SimpleRetT
+AtomicHashArray<KeyT, ValueT, HashFcn, EqualFcn, Allocator, ProbeFcn>::
  insertInternal(KeyT key_in, ArgTs&&... vCtorArgs) {
    const short NO_NEW_INSERTS = 1;
    const short NO_PENDING_INSERTS = 2;
@@ -174,13 +176,16 @@ insertInternal(KeyT key_in, ArgTs&&... vCtorArgs) {
        continue;
      }
  
+
+    // NOTE: the way we count numProbes must be same in find(),
+    // insert(), and erase(). Otherwise it may break probing.
      ++numProbes;
      if (UNLIKELY(numProbes >= capacity_)) {
        // probed every cell...fail
        return SimpleRetT(capacity_, false);
      }
  
-    idx = probeNext(idx, numProbes);
+    idx = ProbeFcn()(idx, numProbes, capacity_);
    }
  }
  
@@ -196,15 +201,16 @@ insertInternal(KeyT key_in, ArgTs&&... vCtorArgs) {
   *   touch it either.
   */
  template <class KeyT, class ValueT,
-          class HashFcn, class EqualFcn, class Allocator>
-size_t AtomicHashArray<KeyT, ValueT, HashFcn, EqualFcn, Allocator>::
+          class HashFcn, class EqualFcn, class Allocator, class ProbeFcn>
+size_t AtomicHashArray<KeyT, ValueT, HashFcn, EqualFcn, Allocator, ProbeFcn>::
  erase(KeyT key_in) {
    CHECK_NE(key_in, kEmptyKey_);
    CHECK_NE(key_in, kLockedKey_);
    CHECK_NE(key_in, kErasedKey_);
+
    for (size_t idx = keyToAnchorIdx(key_in), numProbes = 0;
         ;
-       idx = probeNext(idx, numProbes)) {
+       idx = ProbeFcn()(idx, numProbes, capacity_)) {
      DCHECK_LT(idx, capacity_);
      value_type* cell = &cells_[idx];
      KeyT currentKey = acquireLoadKey(*cell);
@@ -231,6 +237,9 @@ erase(KeyT key_in) {
        // If another thread succeeds in erasing our key, we'll stop our search.
        return 0;
      }
+
+    // NOTE: the way we count numProbes must be same in find(), insert(),
+    // and erase(). Otherwise it may break probing.
      ++numProbes;
      if (UNLIKELY(numProbes >= capacity_)) {
        // probed every cell...fail
@@ -240,10 +249,10 @@ erase(KeyT key_in) {
  }
  
  template <class KeyT, class ValueT,
-         class HashFcn, class EqualFcn, class Allocator>
+         class HashFcn, class EqualFcn, class Allocator, class ProbeFcn>
  typename AtomicHashArray<KeyT, ValueT,
-         HashFcn, EqualFcn, Allocator>::SmartPtr
-AtomicHashArray<KeyT, ValueT, HashFcn, EqualFcn, Allocator>::
+         HashFcn, EqualFcn, Allocator, ProbeFcn>::SmartPtr
+AtomicHashArray<KeyT, ValueT, HashFcn, EqualFcn, Allocator, ProbeFcn>::
  create(size_t maxSize, const Config& c) {
    CHECK_LE(c.maxLoadFactor, 1.0);
    CHECK_GT(c.maxLoadFactor, 0.0);
@@ -282,8 +291,8 @@ create(size_t maxSize, const Config& c) {
  }
  
  template <class KeyT, class ValueT,
-          class HashFcn, class EqualFcn, class Allocator>
-void AtomicHashArray<KeyT, ValueT, HashFcn, EqualFcn, Allocator>::
+          class HashFcn, class EqualFcn, class Allocator, class ProbeFcn>
+void AtomicHashArray<KeyT, ValueT, HashFcn, EqualFcn, Allocator, ProbeFcn>::
  destroy(AtomicHashArray* p) {
    assert(p);
  
@@ -301,8 +310,8 @@ destroy(AtomicHashArray* p) {
  
  // clear -- clears all keys and values in the map and resets all counters
  template <class KeyT, class ValueT,
-          class HashFcn, class EqualFcn, class Allocator>
-void AtomicHashArray<KeyT, ValueT, HashFcn, EqualFcn, Allocator>::
+          class HashFcn, class EqualFcn, class Allocator, class ProbeFcn>
+void AtomicHashArray<KeyT, ValueT, HashFcn, EqualFcn, Allocator, ProbeFcn>::
  clear() {
    FOR_EACH_RANGE(i, 0, capacity_) {
      if (cells_[i].first != kEmptyKey_) {
@@ -321,9 +330,10 @@ clear() {
  // Iterator implementation
  
  template <class KeyT, class ValueT,
-          class HashFcn, class EqualFcn, class Allocator>
+          class HashFcn, class EqualFcn, class Allocator, class ProbeFcn>
  template <class ContT, class IterVal>
-struct AtomicHashArray<KeyT, ValueT, HashFcn, EqualFcn, Allocator>::aha_iterator
+struct AtomicHashArray<KeyT, ValueT, HashFcn, EqualFcn, Allocator, ProbeFcn>::
+    aha_iterator
      : boost::iterator_facade<aha_iterator<ContT,IterVal>,
                               IterVal,
                               boost::forward_traversal_tag>
diff --git a/folly/AtomicHashArray.h b/folly/AtomicHashArray.h

index fbe976d2c6ce142917b55158fceaf2eadd633ad8..71ef2c76ab5fe87ffba5b5679c7d64c5dc064963 100644 (file)
--- a/folly/AtomicHashArray.h
+++ b/folly/AtomicHashArray.h
@@ -42,16 +42,38 @@
  
  namespace folly {
  
+struct AtomicHashArrayLinearProbeFcn
+{
+  inline size_t operator()(size_t idx, size_t numProbes, size_t capacity) const{
+    idx += 1; // linear probing
+
+    // Avoid modulus because it's slow
+    return LIKELY(idx < capacity) ? idx : (idx - capacity);
+  }
+};
+
+struct AtomicHashArrayQuadraticProbeFcn
+{
+  inline size_t operator()(size_t idx, size_t numProbes, size_t capacity) const{
+    idx += numProbes; // quadratic probing
+
+    // Avoid modulus because it's slow
+    return LIKELY(idx < capacity) ? idx : (idx - capacity);
+  }
+};
+
  template <class KeyT, class ValueT,
            class HashFcn = std::hash<KeyT>,
            class EqualFcn = std::equal_to<KeyT>,
-          class Allocator = std::allocator<char>>
+          class Allocator = std::allocator<char>,
+          class ProbeFcn = AtomicHashArrayLinearProbeFcn>
  class AtomicHashMap;
  
  template <class KeyT, class ValueT,
            class HashFcn = std::hash<KeyT>,
            class EqualFcn = std::equal_to<KeyT>,
-          class Allocator = std::allocator<char>>
+          class Allocator = std::allocator<char>,
+          class ProbeFcn = AtomicHashArrayLinearProbeFcn>
  class AtomicHashArray : boost::noncopyable {
    static_assert((std::is_convertible<KeyT,int32_t>::value ||
                   std::is_convertible<KeyT,int64_t>::value ||
@@ -240,13 +262,20 @@ class AtomicHashArray : boost::noncopyable {
    /* Private data and helper functions... */
  
   private:
-  friend class AtomicHashMap<KeyT, ValueT, HashFcn, EqualFcn, Allocator>;
+friend class AtomicHashMap<KeyT,
+                           ValueT,
+                           HashFcn,
+                           EqualFcn,
+                           Allocator,
+                           ProbeFcn>;
  
    struct SimpleRetT { size_t idx; bool success;
      SimpleRetT(size_t i, bool s) : idx(i), success(s) {}
      SimpleRetT() = default;
    };
  
+
+
    template <typename... ArgTs>
    SimpleRetT insertInternal(KeyT key, ArgTs&&... vCtorArgs);
  
@@ -307,12 +336,7 @@ class AtomicHashArray : boost::noncopyable {
      return LIKELY(probe < capacity_) ? probe : hashVal % capacity_;
    }
  
-  inline size_t probeNext(size_t idx, size_t /*numProbes*/) {
-    //idx += numProbes; // quadratic probing
-    idx += 1; // linear probing
-    // Avoid modulus because it's slow
-    return LIKELY(idx < capacity_) ? idx : (idx - capacity_);
-  }
+
  }; // AtomicHashArray
  
  } // namespace folly
diff --git a/folly/AtomicHashMap-inl.h b/folly/AtomicHashMap-inl.h

index 6d8bb917ee2dac266e2db52dd677959cabcccb36..61c23b14cfdd1f24698620e63b13cd75ec8da016 100644 (file)
--- a/folly/AtomicHashMap-inl.h
+++ b/folly/AtomicHashMap-inl.h
@@ -24,9 +24,13 @@ namespace folly {
  
  // AtomicHashMap constructor -- Atomic wrapper that allows growth
  // This class has a lot of overhead (184 Bytes) so only use for big maps
-template <typename KeyT, typename ValueT,
-          typename HashFcn, typename EqualFcn, typename Allocator>
-AtomicHashMap<KeyT, ValueT, HashFcn, EqualFcn, Allocator>::
+template <typename KeyT,
+          typename ValueT,
+          typename HashFcn,
+          typename EqualFcn,
+          typename Allocator,
+          typename ProbeFcn>
+AtomicHashMap<KeyT, ValueT, HashFcn, EqualFcn, Allocator, ProbeFcn>::
  AtomicHashMap(size_t finalSizeEst, const Config& config)
    : kGrowthFrac_(config.growthFactor < 0 ?
                   1.0 - config.maxLoadFactor : config.growthFactor) {
@@ -41,12 +45,16 @@ AtomicHashMap(size_t finalSizeEst, const Config& config)
  }
  
  // emplace --
-template <typename KeyT, typename ValueT,
-          typename HashFcn, typename EqualFcn, typename Allocator>
+template <typename KeyT,
+          typename ValueT,
+          typename HashFcn,
+          typename EqualFcn,
+          typename Allocator,
+          typename ProbeFcn>
  template <typename... ArgTs>
  std::pair<typename AtomicHashMap<KeyT, ValueT, HashFcn,
-                                 EqualFcn, Allocator>::iterator, bool>
-AtomicHashMap<KeyT, ValueT, HashFcn, EqualFcn, Allocator>::
+                                 EqualFcn, Allocator, ProbeFcn>::iterator, bool>
+AtomicHashMap<KeyT, ValueT, HashFcn, EqualFcn, Allocator, ProbeFcn>::
  emplace(key_type k, ArgTs&&... vCtorArgs) {
    SimpleRetT ret = insertInternal(k, std::forward<ArgTs>(vCtorArgs)...);
    SubMap* subMap = subMaps_[ret.i].load(std::memory_order_relaxed);
@@ -55,11 +63,16 @@ emplace(key_type k, ArgTs&&... vCtorArgs) {
  }
  
  // insertInternal -- Allocates new sub maps as existing ones fill up.
-template <typename KeyT, typename ValueT,
-          typename HashFcn, typename EqualFcn, typename Allocator>
+template <typename KeyT,
+          typename ValueT,
+          typename HashFcn,
+          typename EqualFcn,
+          typename Allocator,
+          typename ProbeFcn>
  template <typename... ArgTs>
-typename AtomicHashMap<KeyT, ValueT, HashFcn, EqualFcn, Allocator>::SimpleRetT
-AtomicHashMap<KeyT, ValueT, HashFcn, EqualFcn, Allocator>::
+typename AtomicHashMap<KeyT, ValueT, HashFcn, EqualFcn, Allocator, ProbeFcn>::
+    SimpleRetT
+AtomicHashMap<KeyT, ValueT, HashFcn, EqualFcn, Allocator, ProbeFcn>::
  insertInternal(key_type key, ArgTs&&... vCtorArgs) {
   beginInsertInternal:
    auto nextMapIdx = // this maintains our state
@@ -133,11 +146,16 @@ insertInternal(key_type key, ArgTs&&... vCtorArgs) {
  }
  
  // find --
-template <typename KeyT, typename ValueT,
-          typename HashFcn, typename EqualFcn, typename Allocator>
-typename AtomicHashMap<KeyT, ValueT, HashFcn, EqualFcn, Allocator>::iterator
-AtomicHashMap<KeyT, ValueT, HashFcn, EqualFcn, Allocator>::
-find(KeyT k) {
+template <typename KeyT,
+          typename ValueT,
+          typename HashFcn,
+          typename EqualFcn,
+          typename Allocator,
+          typename ProbeFcn>
+typename AtomicHashMap<KeyT, ValueT, HashFcn, EqualFcn, Allocator, ProbeFcn>::
+    iterator
+AtomicHashMap<KeyT, ValueT, HashFcn, EqualFcn, Allocator, ProbeFcn>::find(
+    KeyT k) {
    SimpleRetT ret = findInternal(k);
    if (!ret.success) {
      return end();
@@ -146,21 +164,30 @@ find(KeyT k) {
    return iterator(this, ret.i, subMap->makeIter(ret.j));
  }
  
-template <typename KeyT, typename ValueT,
-          typename HashFcn, typename EqualFcn, typename Allocator>
+template <typename KeyT,
+          typename ValueT,
+          typename HashFcn,
+          typename EqualFcn,
+          typename Allocator,
+          typename ProbeFcn>
  typename AtomicHashMap<KeyT, ValueT,
-         HashFcn, EqualFcn, Allocator>::const_iterator
-AtomicHashMap<KeyT, ValueT, HashFcn, EqualFcn, Allocator>::
+         HashFcn, EqualFcn, Allocator, ProbeFcn>::const_iterator
+AtomicHashMap<KeyT, ValueT, HashFcn, EqualFcn, Allocator, ProbeFcn>::
  find(KeyT k) const {
    return const_cast<AtomicHashMap*>(this)->find(k);
  }
  
  // findInternal --
-template <typename KeyT, typename ValueT,
-          typename HashFcn, typename EqualFcn, typename Allocator>
-typename AtomicHashMap<KeyT, ValueT, HashFcn, EqualFcn, Allocator>::SimpleRetT
-AtomicHashMap<KeyT, ValueT, HashFcn, EqualFcn, Allocator>::
-findInternal(const KeyT k) const {
+template <typename KeyT,
+          typename ValueT,
+          typename HashFcn,
+          typename EqualFcn,
+          typename Allocator,
+          typename ProbeFcn>
+typename AtomicHashMap<KeyT, ValueT, HashFcn, EqualFcn, Allocator, ProbeFcn>::
+    SimpleRetT
+AtomicHashMap<KeyT, ValueT, HashFcn, EqualFcn, Allocator, ProbeFcn>::
+    findInternal(const KeyT k) const {
    SubMap* const primaryMap = subMaps_[0].load(std::memory_order_relaxed);
    typename SubMap::SimpleRetT ret = primaryMap->findInternal(k);
    if (LIKELY(ret.idx != primaryMap->capacity_)) {
@@ -180,10 +207,15 @@ findInternal(const KeyT k) const {
  }
  
  // findAtInternal -- see encodeIndex() for details.
-template <typename KeyT, typename ValueT,
-          typename HashFcn, typename EqualFcn, typename Allocator>
-typename AtomicHashMap<KeyT, ValueT, HashFcn, EqualFcn, Allocator>::SimpleRetT
-AtomicHashMap<KeyT, ValueT, HashFcn, EqualFcn, Allocator>::
+template <typename KeyT,
+          typename ValueT,
+          typename HashFcn,
+          typename EqualFcn,
+          typename Allocator,
+          typename ProbeFcn>
+typename AtomicHashMap<KeyT, ValueT, HashFcn, EqualFcn, Allocator, ProbeFcn>::
+    SimpleRetT
+AtomicHashMap<KeyT, ValueT, HashFcn, EqualFcn, Allocator, ProbeFcn>::
  findAtInternal(uint32_t idx) const {
    uint32_t subMapIdx, subMapOffset;
    if (idx & kSecondaryMapBit_) {
@@ -201,10 +233,15 @@ findAtInternal(uint32_t idx) const {
  }
  
  // erase --
-template <typename KeyT, typename ValueT,
-          typename HashFcn, typename EqualFcn, typename Allocator>
-typename AtomicHashMap<KeyT, ValueT, HashFcn, EqualFcn, Allocator>::size_type
-AtomicHashMap<KeyT, ValueT, HashFcn, EqualFcn, Allocator>::
+template <typename KeyT,
+          typename ValueT,
+          typename HashFcn,
+          typename EqualFcn,
+          typename Allocator,
+          typename ProbeFcn>
+typename AtomicHashMap<KeyT, ValueT, HashFcn, EqualFcn, Allocator, ProbeFcn>::
+    size_type
+AtomicHashMap<KeyT, ValueT, HashFcn, EqualFcn, Allocator, ProbeFcn>::
  erase(const KeyT k) {
    int const numMaps = numMapsAllocated_.load(std::memory_order_acquire);
    FOR_EACH_RANGE(i, 0, numMaps) {
@@ -218,9 +255,13 @@ erase(const KeyT k) {
  }
  
  // capacity -- summation of capacities of all submaps
-template <typename KeyT, typename ValueT,
-          typename HashFcn, typename EqualFcn, typename Allocator>
-size_t AtomicHashMap<KeyT, ValueT, HashFcn, EqualFcn, Allocator>::
+template <typename KeyT,
+          typename ValueT,
+          typename HashFcn,
+          typename EqualFcn,
+          typename Allocator,
+          typename ProbeFcn>
+size_t AtomicHashMap<KeyT, ValueT, HashFcn, EqualFcn, Allocator, ProbeFcn>::
  capacity() const {
    size_t totalCap(0);
    int const numMaps = numMapsAllocated_.load(std::memory_order_acquire);
@@ -232,9 +273,13 @@ capacity() const {
  
  // spaceRemaining --
  // number of new insertions until current submaps are all at max load
-template <typename KeyT, typename ValueT,
-          typename HashFcn, typename EqualFcn, typename Allocator>
-size_t AtomicHashMap<KeyT, ValueT, HashFcn, EqualFcn, Allocator>::
+template <typename KeyT,
+          typename ValueT,
+          typename HashFcn,
+          typename EqualFcn,
+          typename Allocator,
+          typename ProbeFcn>
+size_t AtomicHashMap<KeyT, ValueT, HashFcn, EqualFcn, Allocator, ProbeFcn>::
  spaceRemaining() const {
    size_t spaceRem(0);
    int const numMaps = numMapsAllocated_.load(std::memory_order_acquire);
@@ -250,9 +295,13 @@ spaceRemaining() const {
  
  // clear -- Wipes all keys and values from primary map and destroys
  // all secondary maps.  Not thread safe.
-template <typename KeyT, typename ValueT,
-          typename HashFcn, typename EqualFcn, typename Allocator>
-void AtomicHashMap<KeyT, ValueT, HashFcn, EqualFcn, Allocator>::
+template <typename KeyT,
+          typename ValueT,
+          typename HashFcn,
+          typename EqualFcn,
+          typename Allocator,
+          typename ProbeFcn>
+void AtomicHashMap<KeyT, ValueT, HashFcn, EqualFcn, Allocator, ProbeFcn>::
  clear() {
    subMaps_[0].load(std::memory_order_relaxed)->clear();
    int const numMaps = numMapsAllocated_
@@ -267,9 +316,13 @@ clear() {
  }
  
  // size --
-template <typename KeyT, typename ValueT,
-          typename HashFcn, typename EqualFcn, typename Allocator>
-size_t AtomicHashMap<KeyT, ValueT, HashFcn, EqualFcn, Allocator>::
+template <typename KeyT,
+          typename ValueT,
+          typename HashFcn,
+          typename EqualFcn,
+          typename Allocator,
+          typename ProbeFcn>
+size_t AtomicHashMap<KeyT, ValueT, HashFcn, EqualFcn, Allocator, ProbeFcn>::
  size() const {
    size_t totalSize(0);
    int const numMaps = numMapsAllocated_.load(std::memory_order_acquire);
@@ -297,10 +350,15 @@ size() const {
  //         31              1
  //      27-30   which subMap
  //       0-26  subMap offset (index_ret input)
-template <typename KeyT, typename ValueT,
-          typename HashFcn, typename EqualFcn, typename Allocator>
-inline uint32_t AtomicHashMap<KeyT, ValueT, HashFcn, EqualFcn, Allocator>::
-encodeIndex(uint32_t subMap, uint32_t offset) {
+template <typename KeyT,
+          typename ValueT,
+          typename HashFcn,
+          typename EqualFcn,
+          typename Allocator,
+          typename ProbeFcn>
+inline uint32_t
+AtomicHashMap<KeyT, ValueT, HashFcn, EqualFcn, Allocator, ProbeFcn>::
+    encodeIndex(uint32_t subMap, uint32_t offset) {
    DCHECK_EQ(offset & kSecondaryMapBit_, 0);  // offset can't be too big
    if (subMap == 0) return offset;
    // Make sure subMap isn't too big
@@ -315,14 +373,17 @@ encodeIndex(uint32_t subMap, uint32_t offset) {
  
  // Iterator implementation
  
-template <typename KeyT, typename ValueT,
-          typename HashFcn, typename EqualFcn, typename Allocator>
-template<class ContT, class IterVal, class SubIt>
-struct AtomicHashMap<KeyT, ValueT, HashFcn, EqualFcn, Allocator>::ahm_iterator
-    : boost::iterator_facade<ahm_iterator<ContT,IterVal,SubIt>,
-                             IterVal,
-                             boost::forward_traversal_tag>
-{
+template <typename KeyT,
+          typename ValueT,
+          typename HashFcn,
+          typename EqualFcn,
+          typename Allocator,
+          typename ProbeFcn>
+template <class ContT, class IterVal, class SubIt>
+struct AtomicHashMap<KeyT, ValueT, HashFcn, EqualFcn, Allocator, ProbeFcn>::
+    ahm_iterator : boost::iterator_facade<ahm_iterator<ContT, IterVal, SubIt>,
+                                          IterVal,
+                                          boost::forward_traversal_tag> {
    explicit ahm_iterator() : ahm_(0) {}
  
    // Conversion ctor for interoperability between const_iterator and
diff --git a/folly/AtomicHashMap.h b/folly/AtomicHashMap.h

index 90c59661fa81d1de7ca63b0ce3fa72479d2fc10b..c6f06bd8f27016d0dcf5e371e94ece91af2508cd 100644 (file)
--- a/folly/AtomicHashMap.h
+++ b/folly/AtomicHashMap.h
@@ -156,9 +156,10 @@ struct AtomicHashMapFullError : std::runtime_error {
  };
  
  template<class KeyT, class ValueT,
-         class HashFcn, class EqualFcn, class Allocator>
+         class HashFcn, class EqualFcn, class Allocator, class ProbeFcn>
  class AtomicHashMap : boost::noncopyable {
-  typedef AtomicHashArray<KeyT, ValueT, HashFcn, EqualFcn, Allocator> SubMap;
+typedef AtomicHashArray<KeyT, ValueT, HashFcn, EqualFcn, Allocator, ProbeFcn>
+    SubMap;
  
   public:
    typedef KeyT                key_type;
@@ -422,6 +423,18 @@ class AtomicHashMap : boost::noncopyable {
  
  }; // AtomicHashMap
  
+template <class KeyT,
+          class ValueT,
+          class HashFcn = std::hash<KeyT>,
+          class EqualFcn = std::equal_to<KeyT>,
+          class Allocator = std::allocator<char>>
+using QuadraticProbingAtomicHashMap =
+    AtomicHashMap<KeyT,
+                  ValueT,
+                  HashFcn,
+                  EqualFcn,
+                  Allocator,
+                  AtomicHashArrayQuadraticProbeFcn>;
  } // namespace folly
  
  #include <folly/AtomicHashMap-inl.h>
diff --git a/folly/test/AtomicHashArrayTest.cpp b/folly/test/AtomicHashArrayTest.cpp

index 3c7c32f29cac26c802df0c0ce93272f8c31898f2..1f83bc754b8e9d17a536502b975ed219716d6498 100644 (file)
--- a/folly/test/AtomicHashArrayTest.cpp
+++ b/folly/test/AtomicHashArrayTest.cpp
@@ -96,10 +96,13 @@ pair<KeyT,ValueT> createEntry(int i) {
                             to<ValueT>(i + 3));
  }
  
-template<class KeyT, class ValueT, class Allocator = std::allocator<char>>
+template <class KeyT,
+          class ValueT,
+          class Allocator = std::allocator<char>,
+          class ProbeFcn = AtomicHashArrayLinearProbeFcn>
  void testMap() {
    typedef AtomicHashArray<KeyT, ValueT, std::hash<KeyT>,
-                          std::equal_to<KeyT>, Allocator> MyArr;
+                          std::equal_to<KeyT>, Allocator, ProbeFcn> MyArr;
    auto arr = MyArr::create(150);
    map<KeyT, ValueT> ref;
    for (int i = 0; i < 100; ++i) {
@@ -144,10 +147,13 @@ void testMap() {
    }
  }
  
-template<class KeyT, class ValueT, class Allocator = std::allocator<char>>
+template<class KeyT, class ValueT,
+    class Allocator = std::allocator<char>,
+    class ProbeFcn = AtomicHashArrayLinearProbeFcn>
  void testNoncopyableMap() {
    typedef AtomicHashArray<KeyT, std::unique_ptr<ValueT>, std::hash<KeyT>,
-                          std::equal_to<KeyT>, Allocator> MyArr;
+                          std::equal_to<KeyT>, Allocator, ProbeFcn> MyArr;
+
    auto arr = MyArr::create(250);
    for (int i = 0; i < 100; i++) {
      arr->insert(make_pair(i,std::unique_ptr<ValueT>(new ValueT(i))));
@@ -168,34 +174,74 @@ void testNoncopyableMap() {
  TEST(Aha, InsertErase_i32_i32) {
    testMap<int32_t, int32_t>();
    testMap<int32_t, int32_t, MmapAllocator<char>>();
+  testMap<int32_t, int32_t,
+      std::allocator<char>, AtomicHashArrayQuadraticProbeFcn>();
+  testMap<int32_t, int32_t,
+      MmapAllocator<char>, AtomicHashArrayQuadraticProbeFcn>();
    testNoncopyableMap<int32_t, int32_t>();
    testNoncopyableMap<int32_t, int32_t, MmapAllocator<char>>();
+  testNoncopyableMap<int32_t, int32_t,
+      std::allocator<char>, AtomicHashArrayQuadraticProbeFcn>();
+  testNoncopyableMap<int32_t, int32_t,
+      MmapAllocator<char>, AtomicHashArrayQuadraticProbeFcn>();
  }
  TEST(Aha, InsertErase_i64_i32) {
    testMap<int64_t, int32_t>();
    testMap<int64_t, int32_t, MmapAllocator<char>>();
+  testMap<int64_t, int32_t,
+      std::allocator<char>, AtomicHashArrayQuadraticProbeFcn>();
+  testMap<int64_t, int32_t,
+      MmapAllocator<char>, AtomicHashArrayQuadraticProbeFcn>();
    testNoncopyableMap<int64_t, int32_t>();
    testNoncopyableMap<int64_t, int32_t, MmapAllocator<char>>();
+  testNoncopyableMap<int64_t, int32_t,
+      std::allocator<char>, AtomicHashArrayQuadraticProbeFcn>();
+  testNoncopyableMap<int64_t, int32_t,
+      MmapAllocator<char>, AtomicHashArrayQuadraticProbeFcn>();
  }
  TEST(Aha, InsertErase_i64_i64) {
    testMap<int64_t, int64_t>();
    testMap<int64_t, int64_t, MmapAllocator<char>>();
+  testMap<int64_t, int64_t,
+      std::allocator<char>, AtomicHashArrayQuadraticProbeFcn>();
+  testMap<int64_t, int64_t,
+      MmapAllocator<char>, AtomicHashArrayQuadraticProbeFcn>();
    testNoncopyableMap<int64_t, int64_t>();
    testNoncopyableMap<int64_t, int64_t, MmapAllocator<char>>();
+  testNoncopyableMap<int64_t, int64_t,
+      std::allocator<char>, AtomicHashArrayQuadraticProbeFcn>();
+  testNoncopyableMap<int64_t, int64_t,
+      MmapAllocator<char>, AtomicHashArrayQuadraticProbeFcn>();
  }
  TEST(Aha, InsertErase_i32_i64) {
    testMap<int32_t, int64_t>();
    testMap<int32_t, int64_t, MmapAllocator<char>>();
+  testMap<int32_t, int64_t,
+      std::allocator<char>, AtomicHashArrayQuadraticProbeFcn>();
+  testMap<int32_t, int64_t,
+      MmapAllocator<char>, AtomicHashArrayQuadraticProbeFcn>();
    testNoncopyableMap<int32_t, int64_t>();
    testNoncopyableMap<int32_t, int64_t, MmapAllocator<char>>();
+  testNoncopyableMap<int32_t, int64_t,
+      std::allocator<char>, AtomicHashArrayQuadraticProbeFcn>();
+  testNoncopyableMap<int32_t, int64_t,
+      MmapAllocator<char>, AtomicHashArrayQuadraticProbeFcn>();
  }
  TEST(Aha, InsertErase_i32_str) {
    testMap<int32_t, string>();
    testMap<int32_t, string, MmapAllocator<char>>();
+  testMap<int32_t, string,
+      std::allocator<char>, AtomicHashArrayQuadraticProbeFcn>();
+  testMap<int32_t, string,
+      MmapAllocator<char>, AtomicHashArrayQuadraticProbeFcn>();
  }
  TEST(Aha, InsertErase_i64_str) {
    testMap<int64_t, string>();
    testMap<int64_t, string, MmapAllocator<char>>();
+  testMap<int64_t, string,
+      std::allocator<char>, AtomicHashArrayQuadraticProbeFcn>();
+  testMap<int64_t, string,
+      MmapAllocator<char>, AtomicHashArrayQuadraticProbeFcn>();
  }
  
  TEST(Aha, Create_cstr_i64) {
diff --git a/folly/test/AtomicHashMapTest.cpp b/folly/test/AtomicHashMapTest.cpp

index e173a4f49909766cbb75c29320b93e5f3dafafb5..462231077270bd7ce6c495086370781ad3ac27b1 100644 (file)
--- a/folly/test/AtomicHashMapTest.cpp
+++ b/folly/test/AtomicHashMapTest.cpp
@@ -101,10 +101,12 @@ typedef int32_t     ValueT;
  typedef AtomicHashMap<KeyT,ValueT> AHMapT;
  typedef AHMapT::value_type RecordT;
  typedef AtomicHashArray<KeyT,ValueT> AHArrayT;
-
  AHArrayT::Config config;
+typedef folly::QuadraticProbingAtomicHashMap<KeyT,ValueT> QPAHMapT;
+QPAHMapT::Config qpConfig;
  static AHArrayT::SmartPtr globalAHA(nullptr);
  static std::unique_ptr<AHMapT> globalAHM;
+static std::unique_ptr<QPAHMapT> globalQPAHM;
  
  // Generate a deterministic value based on an input key
  static int genVal(int key) {
@@ -353,6 +355,15 @@ void* insertThread(void* jj) {
    return nullptr;
  }
  
+void* qpInsertThread(void* jj) {
+  int64_t j = (int64_t) jj;
+  for (int i = 0; i < numOpsPerThread; ++i) {
+    KeyT key = randomizeKey(i + j * numOpsPerThread);
+    globalQPAHM->insert(key, genVal(key));
+  }
+  return nullptr;
+}
+
  void* insertThreadArr(void* jj) {
    int64_t j = (int64_t) jj;
    for (int i = 0; i < numOpsPerThread; ++i) {
@@ -715,6 +726,19 @@ void loadGlobalAhm() {
    EXPECT_EQ(globalAHM->size(), FLAGS_numBMElements);
  }
  
+void loadGlobalQPAhm() {
+  std::cout << "loading global QPAHM with " << FLAGS_numThreads
+            << " threads...\n";
+  uint64_t start = nowInUsec();
+  globalQPAHM.reset(new QPAHMapT(maxBMElements, qpConfig));
+  numOpsPerThread = FLAGS_numBMElements / FLAGS_numThreads;
+  runThreads(qpInsertThread);
+  uint64_t elapsed = nowInUsec() - start;
+  std::cout << "  took " << elapsed / 1000 << " ms (" <<
+    (elapsed * 1000 / FLAGS_numBMElements) << " ns/insert).\n";
+  EXPECT_EQ(globalQPAHM->size(), FLAGS_numBMElements);
+}
+
  }
  
  BENCHMARK(st_aha_find, iters) {
@@ -733,6 +757,14 @@ BENCHMARK(st_ahm_find, iters) {
    }
  }
  
+BENCHMARK(st_qpahm_find, iters) {
+  CHECK_LE(iters, FLAGS_numBMElements);
+  for (size_t i = 0; i < iters; i++) {
+    KeyT key = randomizeKey(i);
+    folly::doNotOptimizeAway(globalQPAHM->find(key)->second);
+  }
+}
+
  BENCHMARK_DRAW_LINE()
  
  BENCHMARK(mt_ahm_miss, iters) {
@@ -749,6 +781,20 @@ BENCHMARK(mt_ahm_miss, iters) {
    });
  }
  
+BENCHMARK(mt_qpahm_miss, iters) {
+  CHECK_LE(iters, FLAGS_numBMElements);
+  numOpsPerThread = iters / FLAGS_numThreads;
+  runThreads([](void* jj) -> void* {
+    int64_t j = (int64_t) jj;
+    while (!runThreadsCreatedAllThreads.load());
+    for (int i = 0; i < numOpsPerThread; ++i) {
+      KeyT key = i + j * numOpsPerThread * 100;
+      folly::doNotOptimizeAway(globalQPAHM->find(key) == globalQPAHM->end());
+    }
+    return nullptr;
+  });
+}
+
  BENCHMARK(st_ahm_miss, iters) {
    CHECK_LE(iters, FLAGS_numBMElements);
    for (size_t i = 0; i < iters; i++) {
@@ -757,6 +803,14 @@ BENCHMARK(st_ahm_miss, iters) {
    }
  }
  
+BENCHMARK(st_qpahm_miss, iters) {
+  CHECK_LE(iters, FLAGS_numBMElements);
+  for (size_t i = 0; i < iters; i++) {
+    KeyT key = randomizeKey(i + iters * 100);
+    folly::doNotOptimizeAway(globalQPAHM->find(key) == globalQPAHM->end());
+  }
+}
+
  BENCHMARK(mt_ahm_find_insert_mix, iters) {
    CHECK_LE(iters, FLAGS_numBMElements);
    numOpsPerThread = iters / FLAGS_numThreads;
@@ -776,6 +830,26 @@ BENCHMARK(mt_ahm_find_insert_mix, iters) {
    });
  }
  
+
+BENCHMARK(mt_qpahm_find_insert_mix, iters) {
+  CHECK_LE(iters, FLAGS_numBMElements);
+  numOpsPerThread = iters / FLAGS_numThreads;
+  runThreads([](void* jj) -> void* {
+    int64_t j = (int64_t) jj;
+    while (!runThreadsCreatedAllThreads.load());
+    for (int i = 0; i < numOpsPerThread; ++i) {
+      if (i % 128) {  // ~1% insert mix
+        KeyT key = randomizeKey(i + j * numOpsPerThread);
+        folly::doNotOptimizeAway(globalQPAHM->find(key)->second);
+      } else {
+        KeyT key = randomizeKey(i + j * numOpsPerThread * 100);
+        globalQPAHM->insert(key, genVal(key));
+      }
+    }
+    return nullptr;
+  });
+}
+
  BENCHMARK(mt_aha_find, iters) {
    CHECK_LE(iters, FLAGS_numBMElements);
    numOpsPerThread = iters / FLAGS_numThreads;
@@ -804,6 +878,20 @@ BENCHMARK(mt_ahm_find, iters) {
    });
  }
  
+BENCHMARK(mt_qpahm_find, iters) {
+  CHECK_LE(iters, FLAGS_numBMElements);
+  numOpsPerThread = iters / FLAGS_numThreads;
+  runThreads([](void* jj) -> void* {
+    int64_t j = (int64_t) jj;
+    while (!runThreadsCreatedAllThreads.load());
+    for (int i = 0; i < numOpsPerThread; ++i) {
+      KeyT key = randomizeKey(i + j * numOpsPerThread);
+      folly::doNotOptimizeAway(globalQPAHM->find(key)->second);
+    }
+    return nullptr;
+  });
+}
+
  KeyT k;
  BENCHMARK(st_baseline_modulus_and_random, iters) {
    for (size_t i = 0; i < iters; ++i) {
@@ -821,6 +909,14 @@ BENCHMARK(mt_ahm_insert, iters) {
    runThreads(insertThread);
  }
  
+BENCHMARK(mt_qpahm_insert, iters) {
+  BENCHMARK_SUSPEND {
+    globalQPAHM.reset(new QPAHMapT(int(iters * LF), qpConfig));
+    numOpsPerThread = iters / FLAGS_numThreads;
+  }
+  runThreads(qpInsertThread);
+}
+
  BENCHMARK(st_ahm_insert, iters) {
    folly::BenchmarkSuspender susp;
    std::unique_ptr<AHMapT> ahm(new AHMapT(int(iters * LF), config));
@@ -832,12 +928,25 @@ BENCHMARK(st_ahm_insert, iters) {
    }
  }
  
+BENCHMARK(st_qpahm_insert, iters) {
+  folly::BenchmarkSuspender susp;
+  std::unique_ptr<QPAHMapT> ahm(new QPAHMapT(int(iters * LF), qpConfig));
+  susp.dismiss();
+
+  for (size_t i = 0; i < iters; i++) {
+    KeyT key = randomizeKey(i);
+    ahm->insert(key, genVal(key));
+  }
+}
+
  void benchmarkSetup() {
    config.maxLoadFactor = FLAGS_maxLoadFactor;
+  qpConfig.maxLoadFactor = FLAGS_maxLoadFactor;
    configRace.maxLoadFactor = 0.5;
    int numCores = sysconf(_SC_NPROCESSORS_ONLN);
    loadGlobalAha();
    loadGlobalAhm();
+  loadGlobalQPAhm();
    string numIters = folly::to<string>(
      std::min(1000000, int(FLAGS_numBMElements)));
  
@@ -871,28 +980,38 @@ int main(int argc, char** argv) {
  }
  
  /*
-Benchmarks run on dual Xeon X5650's @ 2.67GHz w/hyperthreading enabled
-  (12 physical cores, 12 MB cache, 72 GB RAM)
+loading global AHA with 8 threads...
+  took 487 ms (40 ns/insert).
+loading global AHM with 8 threads...
+  took 478 ms (39 ns/insert).
+loading global QPAHM with 8 threads...
+  took 478 ms (39 ns/insert).
  
  Running AHM benchmarks on machine with 24 logical cores.
    num elements per map: 12000000
    num threads for mt tests: 24
    AHM load factor: 0.75
  
-Benchmark                               Iters   Total t    t/iter iter/sec
-------------------------------------------------------------------------------
-Comparing benchmarks: BM_mt_aha_find,BM_mt_ahm_find
-*       BM_mt_aha_find                1000000  7.767 ms  7.767 ns  122.8 M
- +0.81% BM_mt_ahm_find                1000000   7.83 ms   7.83 ns  121.8 M
-------------------------------------------------------------------------------
-Comparing benchmarks: BM_st_aha_find,BM_st_ahm_find
-*       BM_st_aha_find                1000000  57.83 ms  57.83 ns  16.49 M
- +77.9% BM_st_ahm_find                1000000  102.9 ms  102.9 ns   9.27 M
-------------------------------------------------------------------------------
-BM_mt_ahm_miss                        1000000  2.937 ms  2.937 ns  324.7 M
-BM_st_ahm_miss                        1000000  164.2 ms  164.2 ns  5.807 M
-BM_mt_ahm_find_insert_mix             1000000  8.797 ms  8.797 ns  108.4 M
-BM_mt_ahm_insert                      1000000  17.39 ms  17.39 ns  54.83 M
-BM_st_ahm_insert                      1000000  106.8 ms  106.8 ns   8.93 M
-BM_st_baseline_modulus_and_rando      1000000  6.223 ms  6.223 ns  153.2 M
+============================================================================
+folly/test/AtomicHashMapTest.cpp                relative  time/iter  iters/s
+============================================================================
+st_aha_find                                                 92.63ns   10.80M
+st_ahm_find                                                107.78ns    9.28M
+st_qpahm_find                                               90.69ns   11.03M
+----------------------------------------------------------------------------
+mt_ahm_miss                                                  2.09ns  477.36M
+mt_qpahm_miss                                                1.37ns  728.82M
+st_ahm_miss                                                241.07ns    4.15M
+st_qpahm_miss                                              223.17ns    4.48M
+mt_ahm_find_insert_mix                                       8.05ns  124.24M
+mt_qpahm_find_insert_mix                                     9.10ns  109.85M
+mt_aha_find                                                  6.82ns  146.68M
+mt_ahm_find                                                  7.95ns  125.77M
+mt_qpahm_find                                                6.81ns  146.83M
+st_baseline_modulus_and_random                               6.02ns  166.03M
+mt_ahm_insert                                               14.29ns   69.97M
+mt_qpahm_insert                                             11.68ns   85.61M
+st_ahm_insert                                              125.39ns    7.98M
+st_qpahm_insert                                            128.76ns    7.77M
+============================================================================
  */
author	Xiaofan Yang <xiaofan@fb.com>
	Thu, 29 Oct 2015 19:27:58 +0000 (12:27 -0700)
committer	facebook-github-bot-1 <folly-bot@fb.com>
	Thu, 29 Oct 2015 20:20:22 +0000 (13:20 -0700)
folly/AtomicHashArray-inl.h		patch \| blob \| history
folly/AtomicHashArray.h		patch \| blob \| history
folly/AtomicHashMap-inl.h		patch \| blob \| history
folly/AtomicHashMap.h		patch \| blob \| history
folly/test/AtomicHashArrayTest.cpp		patch \| blob \| history
folly/test/AtomicHashMapTest.cpp		patch \| blob \| history