From: Nathan Bronson Date: Fri, 23 Oct 2015 14:23:37 +0000 (-0700) Subject: templatize AtomicUnorderedInsertMap's internals to allow big maps X-Git-Tag: deprecate-dynamic-initializer~302 X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=d0889c8c36f0aa4a2773c0d9e95a2ff3eace6b89;p=folly.git templatize AtomicUnorderedInsertMap's internals to allow big maps Summary: AtomicUnorderedInsertMap used 32 bit index values internally. 2 bits were stolen, limiting the capacity to 2^30. This diff makes the internal index type a template parameter, so you can make really big maps if you want (at the expense of bigger map overhead). The easiest way is to substitute AtomicUnorderedInsertMap64. Reviewed By: yfeldblum Differential Revision: D2574338 fb-gh-sync-id: a74994b6da1046a149c2e7763c3dc19e35d9840b --- diff --git a/folly/AtomicUnorderedMap.h b/folly/AtomicUnorderedMap.h index a2c4ac73..941905ef 100644 --- a/folly/AtomicUnorderedMap.h +++ b/folly/AtomicUnorderedMap.h @@ -55,10 +55,12 @@ namespace folly { /// O(1/(1-actual_load_factor)). Note that this is a pretty strong /// limitation, because you can't remove existing keys. /// -/// * 2^30 maximum capacity - you'll need to use something else if you -/// have more than a billion entries. If this limit bothers you let it -/// wouldn't be too hard to parameterize the internal indexes between -/// uint32_t and uint64_t. +/// * 2^30 maximum default capacity - by default AtomicUnorderedInsertMap +/// uses uint32_t internal indexes (and steals 2 bits), limiting you +/// to about a billion entries. If you need more you can fill in all +/// of the template params so you change IndexType to uint64_t, or you +/// can use AtomicUnorderedInsertMap64. 64-bit indexes will increase +/// the space over of the map, of course. /// /// WHAT YOU GET IN EXCHANGE: /// @@ -133,7 +135,8 @@ template ::value && boost::has_trivial_destructor::value), template class Atom = std::atomic, - typename Allocator = folly::detail::MMapAlloc> + typename Allocator = folly::detail::MMapAlloc, + typename IndexType = uint32_t> struct AtomicUnorderedInsertMap { @@ -147,7 +150,7 @@ struct AtomicUnorderedInsertMap { typedef const value_type& const_reference; typedef struct ConstIterator { - ConstIterator(const AtomicUnorderedInsertMap& owner, uint32_t slot) + ConstIterator(const AtomicUnorderedInsertMap& owner, IndexType slot) : owner_(owner) , slot_(slot) {} @@ -190,17 +193,17 @@ struct AtomicUnorderedInsertMap { private: const AtomicUnorderedInsertMap& owner_; - uint32_t slot_; + IndexType slot_; } const_iterator; friend ConstIterator; - /// Constructs a map that will support the insertion of maxSize - /// key-value pairs without exceeding the max load factor. Load - /// factors of greater than 1 are not supported, and once the actual load - /// factor of the map approaches 1 the insert performance will suffer. - /// The capacity is limited to 2^30 (about a billion), beyond which - /// we will throw invalid_argument. + /// Constructs a map that will support the insertion of maxSize key-value + /// pairs without exceeding the max load factor. Load factors of greater + /// than 1 are not supported, and once the actual load factor of the + /// map approaches 1 the insert performance will suffer. The capacity + /// is limited to 2^30 (about a billion) for the default IndexType, + /// beyond which we will throw invalid_argument. explicit AtomicUnorderedInsertMap( size_t maxSize, float maxLoadFactor = 0.8f, @@ -208,13 +211,15 @@ struct AtomicUnorderedInsertMap { : allocator_(alloc) { size_t capacity = maxSize / std::max(1.0f, maxLoadFactor) + 128; - if (capacity > (1 << 30) && maxSize < (1 << 30)) { + size_t avail = size_t{1} << (8 * sizeof(IndexType) - 2); + if (capacity > avail && maxSize < avail) { // we'll do our best - capacity = (1 << 30); + capacity = avail; } - if (capacity < maxSize || capacity > (1 << 30)) { + if (capacity < maxSize || capacity > avail) { throw std::invalid_argument( - "AtomicUnorderedInsertMap capacity must fit in 30 bits"); + "AtomicUnorderedInsertMap capacity must fit in IndexType with 2 bits " + "left over"); } numSlots_ = capacity; @@ -319,7 +324,7 @@ struct AtomicUnorderedInsertMap { } const_iterator cbegin() const { - uint32_t slot = numSlots_ - 1; + IndexType slot = numSlots_ - 1; while (slot > 0 && slots_[slot].state() != LINKED) { --slot; } @@ -336,7 +341,7 @@ struct AtomicUnorderedInsertMap { kMaxAllocationTries = 1000, // after this we throw }; - enum BucketState : uint32_t { + enum BucketState : IndexType { EMPTY = 0, CONSTRUCTING = 1, LINKED = 2, @@ -354,10 +359,10 @@ struct AtomicUnorderedInsertMap { /// of the first bucket for the chain whose keys map to this slot. /// When things are going well the head usually links to this slot, /// but that doesn't always have to happen. - Atom headAndState_; + Atom headAndState_; /// The next bucket in the chain - uint32_t next_; + IndexType next_; /// Key and Value typename std::aligned_storage= numSlots_) { @@ -416,7 +421,7 @@ struct AtomicUnorderedInsertMap { return h; } - uint32_t find(const Key& key, uint32_t slot) const { + IndexType find(const Key& key, IndexType slot) const { KeyEqual ke = {}; auto hs = slots_[slot].headAndState_.load(std::memory_order_acquire); for (slot = hs >> 2; slot != 0; slot = slots_[slot].next_) { @@ -429,7 +434,7 @@ struct AtomicUnorderedInsertMap { /// Allocates a slot and returns its index. Tries to put it near /// slots_[start]. - uint32_t allocateNear(uint32_t start) { + IndexType allocateNear(IndexType start) { for (auto tries = 0; tries < kMaxAllocationTries; ++tries) { auto slot = allocationAttempt(start, tries); auto prev = slots_[slot].headAndState_.load(std::memory_order_acquire); @@ -445,11 +450,16 @@ struct AtomicUnorderedInsertMap { /// Returns the slot we should attempt to allocate after tries failed /// tries, starting from the specified slot. This is pulled out so we /// can specialize it differently during deterministic testing - uint32_t allocationAttempt(uint32_t start, uint32_t tries) const { + IndexType allocationAttempt(IndexType start, IndexType tries) const { if (LIKELY(tries < 8 && start + tries < numSlots_)) { return start + tries; } else { - uint32_t rv = folly::Random::rand32(numSlots_); + IndexType rv; + if (sizeof(IndexType) <= 4) { + rv = folly::Random::rand32(numSlots_); + } else { + rv = folly::Random::rand64(numSlots_); + } assert(rv < numSlots_); return rv; } @@ -463,6 +473,29 @@ struct AtomicUnorderedInsertMap { } }; +/// AtomicUnorderedInsertMap64 is just a type alias that makes it easier +/// to select a 64 bit slot index type. Use this if you need a capacity +/// bigger than 2^30 (about a billion). This increases memory overheads, +/// obviously. +template , + typename KeyEqual = std::equal_to, + bool SkipKeyValueDeletion = + (boost::has_trivial_destructor::value && + boost::has_trivial_destructor::value), + template class Atom = std::atomic, + typename Allocator = folly::detail::MMapAlloc> +using AtomicUnorderedInsertMap64 = + AtomicUnorderedInsertMap; + /// MutableAtom is a tiny wrapper than gives you the option of atomically /// updating values inserted into an AtomicUnorderedInsertMap class Atom = non_atomic> -using UnorderedInsertMap = AtomicUnorderedInsertMap< - Key, - Value, - std::hash, - std::equal_to, - (boost::has_trivial_destructor::value && - boost::has_trivial_destructor::value), - Atom, - std::allocator>; - -TEST(AtomicUnorderedInsertMap, basic) { - AtomicUnorderedInsertMap m(100); +template class Atom = std::atomic, + typename Allocator = std::allocator> +using UIM = + AtomicUnorderedInsertMap, + std::equal_to, + (boost::has_trivial_destructor::value && + boost::has_trivial_destructor::value), + Atom, + Allocator, + IndexType>; + +namespace { +template +struct AtomicUnorderedInsertMapTest : public ::testing::Test {}; +} + +// uint16_t doesn't make sense for most platforms, but we might as well +// test it +using IndexTypesToTest = ::testing::Types; +TYPED_TEST_CASE(AtomicUnorderedInsertMapTest, IndexTypesToTest); + +TYPED_TEST(AtomicUnorderedInsertMapTest, basic) { + UIM m(100); m.emplace("abc", "ABC"); EXPECT_TRUE(m.find("abc") != m.cend()); @@ -116,8 +134,8 @@ TEST(AtomicUnorderedInsertMap, basic) { EXPECT_TRUE(a != b); } -TEST(AtomicUnorderedInsertMap, value_mutation) { - AtomicUnorderedInsertMap> m(100); +TYPED_TEST(AtomicUnorderedInsertMapTest, value_mutation) { + UIM, TypeParam> m(100); for (int i = 0; i < 50; ++i) { m.emplace(i, i); @@ -127,7 +145,7 @@ TEST(AtomicUnorderedInsertMap, value_mutation) { } TEST(UnorderedInsertMap, value_mutation) { - UnorderedInsertMap> m(100); + UIM, uint32_t, non_atomic> m(100); for (int i = 0; i < 50; ++i) { m.emplace(i, i); @@ -137,6 +155,24 @@ TEST(UnorderedInsertMap, value_mutation) { EXPECT_EQ(m.find(1)->second.data, 2); } +// This test is too expensive to run automatically. On my dev server it +// takes about 10 minutes for dbg build, 2 for opt. +TEST(AtomicUnorderedInsertMap, DISABLED_mega_map) { + size_t capacity = 2000000000; + AtomicUnorderedInsertMap64 big(capacity); + for (size_t i = 0; i < capacity * 2; i += 2) { + big.emplace(i, i * 10); + } + for (size_t i = 0; i < capacity * 3; i += capacity / 1000 + 1) { + auto iter = big.find(i); + if ((i & 1) == 0 && i < capacity * 2) { + EXPECT_EQ(iter->second, i * 10); + } else { + EXPECT_TRUE(iter == big.cend()); + } + } +} + BENCHMARK(lookup_int_int_hit, iters) { std::unique_ptr> ptr = {}; @@ -283,7 +319,7 @@ BENCHMARK(std_map) { } BENCHMARK(atomic_fast_map) { - UnorderedInsertMap m(10000); + UIM m(10000); for (int i=0; i<10000; ++i) { m.emplace(i,i); } @@ -295,7 +331,31 @@ BENCHMARK(atomic_fast_map) { } BENCHMARK(fast_map) { - UnorderedInsertMap m(10000); + UIM m(10000); + for (int i=0; i<10000; ++i) { + m.emplace(i,i); + } + + for (int i=0; i<10000; ++i) { + auto a = m.find(i); + folly::doNotOptimizeAway(&*a); + } +} + +BENCHMARK(atomic_fast_map_64) { + UIM m(10000); + for (int i=0; i<10000; ++i) { + m.emplace(i,i); + } + + for (int i=0; i<10000; ++i) { + auto a = m.find(i); + folly::doNotOptimizeAway(&*a); + } +} + +BENCHMARK(fast_map_64) { + UIM m(10000); for (int i=0; i<10000; ++i) { m.emplace(i,i); }