templatize AtomicUnorderedInsertMap's internals to allow big maps

author Nathan Bronson <ngbronson@fb.com>

Fri, 23 Oct 2015 14:23:37 +0000 (07:23 -0700)

committer facebook-github-bot-4 <folly-bot@fb.com>

Fri, 23 Oct 2015 15:20:18 +0000 (08:20 -0700)
author Nathan Bronson <ngbronson@fb.com>
Fri, 23 Oct 2015 14:23:37 +0000 (07:23 -0700)
committer facebook-github-bot-4 <folly-bot@fb.com>
Fri, 23 Oct 2015 15:20:18 +0000 (08:20 -0700)
diff --git a/folly/AtomicUnorderedMap.h b/folly/AtomicUnorderedMap.h

index a2c4ac73e38e10f691b5f355a28c69ba5dcf6d35..941905efabebfd343e03caa8fa14939ecd4e65a8 100644 (file)
--- a/folly/AtomicUnorderedMap.h
+++ b/folly/AtomicUnorderedMap.h
@@ -55,10 +55,12 @@ namespace folly {
  ///   O(1/(1-actual_load_factor)).  Note that this is a pretty strong
  ///   limitation, because you can't remove existing keys.
  ///
-/// * 2^30 maximum capacity - you'll need to use something else if you
-///   have more than a billion entries.  If this limit bothers you let it
-///   wouldn't be too hard to parameterize the internal indexes between
-///   uint32_t and uint64_t.
+/// * 2^30 maximum default capacity - by default AtomicUnorderedInsertMap
+///   uses uint32_t internal indexes (and steals 2 bits), limiting you
+///   to about a billion entries.  If you need more you can fill in all
+///   of the template params so you change IndexType to uint64_t, or you
+///   can use AtomicUnorderedInsertMap64.  64-bit indexes will increase
+///   the space over of the map, of course.
  ///
  /// WHAT YOU GET IN EXCHANGE:
  ///
@@ -133,7 +135,8 @@ template <typename Key,
                (boost::has_trivial_destructor<Key>::value &&
                 boost::has_trivial_destructor<Value>::value),
            template<typename> class Atom = std::atomic,
-          typename Allocator = folly::detail::MMapAlloc>
+          typename Allocator = folly::detail::MMapAlloc,
+          typename IndexType = uint32_t>
  
  struct AtomicUnorderedInsertMap {
  
@@ -147,7 +150,7 @@ struct AtomicUnorderedInsertMap {
    typedef const value_type& const_reference;
  
    typedef struct ConstIterator {
-    ConstIterator(const AtomicUnorderedInsertMap& owner, uint32_t slot)
+    ConstIterator(const AtomicUnorderedInsertMap& owner, IndexType slot)
        : owner_(owner)
        , slot_(slot)
      {}
@@ -190,17 +193,17 @@ struct AtomicUnorderedInsertMap {
  
     private:
      const AtomicUnorderedInsertMap& owner_;
-    uint32_t slot_;
+    IndexType slot_;
    } const_iterator;
  
    friend ConstIterator;
  
-  /// Constructs a map that will support the insertion of maxSize
-  /// key-value pairs without exceeding the max load factor.  Load
-  /// factors of greater than 1 are not supported, and once the actual load
-  /// factor of the map approaches 1 the insert performance will suffer.
-  /// The capacity is limited to 2^30 (about a billion), beyond which
-  /// we will throw invalid_argument.
+  /// Constructs a map that will support the insertion of maxSize key-value
+  /// pairs without exceeding the max load factor.  Load factors of greater
+  /// than 1 are not supported, and once the actual load factor of the
+  /// map approaches 1 the insert performance will suffer.  The capacity
+  /// is limited to 2^30 (about a billion) for the default IndexType,
+  /// beyond which we will throw invalid_argument.
    explicit AtomicUnorderedInsertMap(
        size_t maxSize,
        float maxLoadFactor = 0.8f,
@@ -208,13 +211,15 @@ struct AtomicUnorderedInsertMap {
      : allocator_(alloc)
    {
      size_t capacity = maxSize / std::max(1.0f, maxLoadFactor) + 128;
-    if (capacity > (1 << 30) && maxSize < (1 << 30)) {
+    size_t avail = size_t{1} << (8 * sizeof(IndexType) - 2);
+    if (capacity > avail && maxSize < avail) {
        // we'll do our best
-      capacity = (1 << 30);
+      capacity = avail;
      }
-    if (capacity < maxSize || capacity > (1 << 30)) {
+    if (capacity < maxSize || capacity > avail) {
        throw std::invalid_argument(
-          "AtomicUnorderedInsertMap capacity must fit in 30 bits");
+          "AtomicUnorderedInsertMap capacity must fit in IndexType with 2 bits "
+          "left over");
      }
  
      numSlots_ = capacity;
@@ -319,7 +324,7 @@ struct AtomicUnorderedInsertMap {
    }
  
    const_iterator cbegin() const {
-    uint32_t slot = numSlots_ - 1;
+    IndexType slot = numSlots_ - 1;
      while (slot > 0 && slots_[slot].state() != LINKED) {
        --slot;
      }
@@ -336,7 +341,7 @@ struct AtomicUnorderedInsertMap {
      kMaxAllocationTries = 1000, // after this we throw
    };
  
-  enum BucketState : uint32_t {
+  enum BucketState : IndexType {
      EMPTY = 0,
      CONSTRUCTING = 1,
      LINKED = 2,
@@ -354,10 +359,10 @@ struct AtomicUnorderedInsertMap {
      /// of the first bucket for the chain whose keys map to this slot.
      /// When things are going well the head usually links to this slot,
      /// but that doesn't always have to happen.
-    Atom<uint32_t> headAndState_;
+    Atom<IndexType> headAndState_;
  
      /// The next bucket in the chain
-    uint32_t next_;
+    IndexType next_;
  
      /// Key and Value
      typename std::aligned_storage<sizeof(value_type),
@@ -407,7 +412,7 @@ struct AtomicUnorderedInsertMap {
    Allocator allocator_;
    Slot* slots_;
  
-  uint32_t keyToSlotIdx(const Key& key) const {
+  IndexType keyToSlotIdx(const Key& key) const {
      size_t h = hasher()(key);
      h &= slotMask_;
      while (h >= numSlots_) {
@@ -416,7 +421,7 @@ struct AtomicUnorderedInsertMap {
      return h;
    }
  
-  uint32_t find(const Key& key, uint32_t slot) const {
+  IndexType find(const Key& key, IndexType slot) const {
      KeyEqual ke = {};
      auto hs = slots_[slot].headAndState_.load(std::memory_order_acquire);
      for (slot = hs >> 2; slot != 0; slot = slots_[slot].next_) {
@@ -429,7 +434,7 @@ struct AtomicUnorderedInsertMap {
  
    /// Allocates a slot and returns its index.  Tries to put it near
    /// slots_[start].
-  uint32_t allocateNear(uint32_t start) {
+  IndexType allocateNear(IndexType start) {
      for (auto tries = 0; tries < kMaxAllocationTries; ++tries) {
        auto slot = allocationAttempt(start, tries);
        auto prev = slots_[slot].headAndState_.load(std::memory_order_acquire);
@@ -445,11 +450,16 @@ struct AtomicUnorderedInsertMap {
    /// Returns the slot we should attempt to allocate after tries failed
    /// tries, starting from the specified slot.  This is pulled out so we
    /// can specialize it differently during deterministic testing
-  uint32_t allocationAttempt(uint32_t start, uint32_t tries) const {
+  IndexType allocationAttempt(IndexType start, IndexType tries) const {
      if (LIKELY(tries < 8 && start + tries < numSlots_)) {
        return start + tries;
      } else {
-      uint32_t rv = folly::Random::rand32(numSlots_);
+      IndexType rv;
+      if (sizeof(IndexType) <= 4) {
+        rv = folly::Random::rand32(numSlots_);
+      } else {
+        rv = folly::Random::rand64(numSlots_);
+      }
        assert(rv < numSlots_);
        return rv;
      }
@@ -463,6 +473,29 @@ struct AtomicUnorderedInsertMap {
    }
  };
  
+/// AtomicUnorderedInsertMap64 is just a type alias that makes it easier
+/// to select a 64 bit slot index type.  Use this if you need a capacity
+/// bigger than 2^30 (about a billion).  This increases memory overheads,
+/// obviously.
+template <typename Key,
+          typename Value,
+          typename Hash = std::hash<Key>,
+          typename KeyEqual = std::equal_to<Key>,
+          bool SkipKeyValueDeletion =
+              (boost::has_trivial_destructor<Key>::value &&
+               boost::has_trivial_destructor<Value>::value),
+          template <typename> class Atom = std::atomic,
+          typename Allocator = folly::detail::MMapAlloc>
+using AtomicUnorderedInsertMap64 =
+    AtomicUnorderedInsertMap<Key,
+                             Value,
+                             Hash,
+                             KeyEqual,
+                             SkipKeyValueDeletion,
+                             Atom,
+                             Allocator,
+                             uint64_t>;
+
  
  /// MutableAtom is a tiny wrapper than gives you the option of atomically
  /// updating values inserted into an AtomicUnorderedInsertMap<K,
diff --git a/folly/test/AtomicUnorderedMapTest.cpp b/folly/test/AtomicUnorderedMapTest.cpp

index 7896906f1e26cd1325e55fdb36a233a879bffe56..5d4e5a390c0eb154f4cc2c05e8ca918efb3388d6 100644 (file)
--- a/folly/test/AtomicUnorderedMapTest.cpp
+++ b/folly/test/AtomicUnorderedMapTest.cpp
@@ -81,20 +81,38 @@ struct non_atomic {
    bool is_lock_free() const {return true;}
  };
  
-template<
-    typename Key, typename Value, template<typename> class Atom = non_atomic>
-using UnorderedInsertMap =  AtomicUnorderedInsertMap<
-    Key,
-    Value,
-    std::hash<Key>,
-    std::equal_to<Key>,
-    (boost::has_trivial_destructor<Key>::value &&
-     boost::has_trivial_destructor<Value>::value),
-    Atom,
-    std::allocator<char>>;
-
-TEST(AtomicUnorderedInsertMap, basic) {
-  AtomicUnorderedInsertMap<std::string,std::string> m(100);
+template <typename Key,
+          typename Value,
+          typename IndexType,
+          template <typename> class Atom = std::atomic,
+          typename Allocator = std::allocator<char>>
+using UIM =
+    AtomicUnorderedInsertMap<Key,
+                             Value,
+                             std::hash<Key>,
+                             std::equal_to<Key>,
+                             (boost::has_trivial_destructor<Key>::value &&
+                              boost::has_trivial_destructor<Value>::value),
+                             Atom,
+                             Allocator,
+                             IndexType>;
+
+namespace {
+template <typename T>
+struct AtomicUnorderedInsertMapTest : public ::testing::Test {};
+}
+
+// uint16_t doesn't make sense for most platforms, but we might as well
+// test it
+using IndexTypesToTest = ::testing::Types<uint16_t, uint32_t, uint64_t>;
+TYPED_TEST_CASE(AtomicUnorderedInsertMapTest, IndexTypesToTest);
+
+TYPED_TEST(AtomicUnorderedInsertMapTest, basic) {
+  UIM<std::string,
+      std::string,
+      TypeParam,
+      std::atomic,
+      folly::detail::MMapAlloc> m(100);
  
    m.emplace("abc", "ABC");
    EXPECT_TRUE(m.find("abc") != m.cend());
@@ -116,8 +134,8 @@ TEST(AtomicUnorderedInsertMap, basic) {
    EXPECT_TRUE(a != b);
  }
  
-TEST(AtomicUnorderedInsertMap, value_mutation) {
-  AtomicUnorderedInsertMap<int, MutableAtom<int>> m(100);
+TYPED_TEST(AtomicUnorderedInsertMapTest, value_mutation) {
+  UIM<int, MutableAtom<int>, TypeParam> m(100);
  
    for (int i = 0; i < 50; ++i) {
      m.emplace(i, i);
@@ -127,7 +145,7 @@ TEST(AtomicUnorderedInsertMap, value_mutation) {
  }
  
  TEST(UnorderedInsertMap, value_mutation) {
-  UnorderedInsertMap<int, MutableData<int>> m(100);
+  UIM<int, MutableData<int>, uint32_t, non_atomic> m(100);
  
    for (int i = 0; i < 50; ++i) {
      m.emplace(i, i);
@@ -137,6 +155,24 @@ TEST(UnorderedInsertMap, value_mutation) {
    EXPECT_EQ(m.find(1)->second.data, 2);
  }
  
+// This test is too expensive to run automatically.  On my dev server it
+// takes about 10 minutes for dbg build, 2 for opt.
+TEST(AtomicUnorderedInsertMap, DISABLED_mega_map) {
+  size_t capacity = 2000000000;
+  AtomicUnorderedInsertMap64<size_t,size_t> big(capacity);
+  for (size_t i = 0; i < capacity * 2; i += 2) {
+    big.emplace(i, i * 10);
+  }
+  for (size_t i = 0; i < capacity * 3; i += capacity / 1000 + 1) {
+    auto iter = big.find(i);
+    if ((i & 1) == 0 && i < capacity * 2) {
+      EXPECT_EQ(iter->second, i * 10);
+    } else {
+      EXPECT_TRUE(iter == big.cend());
+    }
+  }
+}
+
  BENCHMARK(lookup_int_int_hit, iters) {
    std::unique_ptr<AtomicUnorderedInsertMap<int,size_t>> ptr = {};
  
@@ -283,7 +319,7 @@ BENCHMARK(std_map) {
  }
  
  BENCHMARK(atomic_fast_map) {
-  UnorderedInsertMap<long, long, std::atomic> m(10000);
+  UIM<long, long, uint32_t, std::atomic> m(10000);
    for (int i=0; i<10000; ++i) {
      m.emplace(i,i);
    }
@@ -295,7 +331,31 @@ BENCHMARK(atomic_fast_map) {
  }
  
  BENCHMARK(fast_map) {
-  UnorderedInsertMap<long, long> m(10000);
+  UIM<long, long, uint32_t, non_atomic> m(10000);
+  for (int i=0; i<10000; ++i) {
+    m.emplace(i,i);
+  }
+
+  for (int i=0; i<10000; ++i) {
+    auto a = m.find(i);
+    folly::doNotOptimizeAway(&*a);
+  }
+}
+
+BENCHMARK(atomic_fast_map_64) {
+  UIM<long, long, uint64_t, std::atomic> m(10000);
+  for (int i=0; i<10000; ++i) {
+    m.emplace(i,i);
+  }
+
+  for (int i=0; i<10000; ++i) {
+    auto a = m.find(i);
+    folly::doNotOptimizeAway(&*a);
+  }
+}
+
+BENCHMARK(fast_map_64) {
+  UIM<long, long, uint64_t, non_atomic> m(10000);
    for (int i=0; i<10000; ++i) {
      m.emplace(i,i);
    }
author	Nathan Bronson <ngbronson@fb.com>
	Fri, 23 Oct 2015 14:23:37 +0000 (07:23 -0700)
committer	facebook-github-bot-4 <folly-bot@fb.com>
	Fri, 23 Oct 2015 15:20:18 +0000 (08:20 -0700)
folly/AtomicUnorderedMap.h		patch \| blob \| history
folly/test/AtomicUnorderedMapTest.cpp		patch \| blob \| history