/// O(1/(1-actual_load_factor)). Note that this is a pretty strong
/// limitation, because you can't remove existing keys.
///
-/// * 2^30 maximum capacity - you'll need to use something else if you
-/// have more than a billion entries. If this limit bothers you let it
-/// wouldn't be too hard to parameterize the internal indexes between
-/// uint32_t and uint64_t.
+/// * 2^30 maximum default capacity - by default AtomicUnorderedInsertMap
+/// uses uint32_t internal indexes (and steals 2 bits), limiting you
+/// to about a billion entries. If you need more you can fill in all
+/// of the template params so you change IndexType to uint64_t, or you
+/// can use AtomicUnorderedInsertMap64. 64-bit indexes will increase
+/// the space over of the map, of course.
///
/// WHAT YOU GET IN EXCHANGE:
///
(boost::has_trivial_destructor<Key>::value &&
boost::has_trivial_destructor<Value>::value),
template<typename> class Atom = std::atomic,
- typename Allocator = folly::detail::MMapAlloc>
+ typename Allocator = folly::detail::MMapAlloc,
+ typename IndexType = uint32_t>
struct AtomicUnorderedInsertMap {
typedef const value_type& const_reference;
typedef struct ConstIterator {
- ConstIterator(const AtomicUnorderedInsertMap& owner, uint32_t slot)
+ ConstIterator(const AtomicUnorderedInsertMap& owner, IndexType slot)
: owner_(owner)
, slot_(slot)
{}
private:
const AtomicUnorderedInsertMap& owner_;
- uint32_t slot_;
+ IndexType slot_;
} const_iterator;
friend ConstIterator;
- /// Constructs a map that will support the insertion of maxSize
- /// key-value pairs without exceeding the max load factor. Load
- /// factors of greater than 1 are not supported, and once the actual load
- /// factor of the map approaches 1 the insert performance will suffer.
- /// The capacity is limited to 2^30 (about a billion), beyond which
- /// we will throw invalid_argument.
+ /// Constructs a map that will support the insertion of maxSize key-value
+ /// pairs without exceeding the max load factor. Load factors of greater
+ /// than 1 are not supported, and once the actual load factor of the
+ /// map approaches 1 the insert performance will suffer. The capacity
+ /// is limited to 2^30 (about a billion) for the default IndexType,
+ /// beyond which we will throw invalid_argument.
explicit AtomicUnorderedInsertMap(
size_t maxSize,
float maxLoadFactor = 0.8f,
: allocator_(alloc)
{
size_t capacity = maxSize / std::max(1.0f, maxLoadFactor) + 128;
- if (capacity > (1 << 30) && maxSize < (1 << 30)) {
+ size_t avail = size_t{1} << (8 * sizeof(IndexType) - 2);
+ if (capacity > avail && maxSize < avail) {
// we'll do our best
- capacity = (1 << 30);
+ capacity = avail;
}
- if (capacity < maxSize || capacity > (1 << 30)) {
+ if (capacity < maxSize || capacity > avail) {
throw std::invalid_argument(
- "AtomicUnorderedInsertMap capacity must fit in 30 bits");
+ "AtomicUnorderedInsertMap capacity must fit in IndexType with 2 bits "
+ "left over");
}
numSlots_ = capacity;
}
const_iterator cbegin() const {
- uint32_t slot = numSlots_ - 1;
+ IndexType slot = numSlots_ - 1;
while (slot > 0 && slots_[slot].state() != LINKED) {
--slot;
}
kMaxAllocationTries = 1000, // after this we throw
};
- enum BucketState : uint32_t {
+ enum BucketState : IndexType {
EMPTY = 0,
CONSTRUCTING = 1,
LINKED = 2,
/// of the first bucket for the chain whose keys map to this slot.
/// When things are going well the head usually links to this slot,
/// but that doesn't always have to happen.
- Atom<uint32_t> headAndState_;
+ Atom<IndexType> headAndState_;
/// The next bucket in the chain
- uint32_t next_;
+ IndexType next_;
/// Key and Value
typename std::aligned_storage<sizeof(value_type),
Allocator allocator_;
Slot* slots_;
- uint32_t keyToSlotIdx(const Key& key) const {
+ IndexType keyToSlotIdx(const Key& key) const {
size_t h = hasher()(key);
h &= slotMask_;
while (h >= numSlots_) {
return h;
}
- uint32_t find(const Key& key, uint32_t slot) const {
+ IndexType find(const Key& key, IndexType slot) const {
KeyEqual ke = {};
auto hs = slots_[slot].headAndState_.load(std::memory_order_acquire);
for (slot = hs >> 2; slot != 0; slot = slots_[slot].next_) {
/// Allocates a slot and returns its index. Tries to put it near
/// slots_[start].
- uint32_t allocateNear(uint32_t start) {
+ IndexType allocateNear(IndexType start) {
for (auto tries = 0; tries < kMaxAllocationTries; ++tries) {
auto slot = allocationAttempt(start, tries);
auto prev = slots_[slot].headAndState_.load(std::memory_order_acquire);
/// Returns the slot we should attempt to allocate after tries failed
/// tries, starting from the specified slot. This is pulled out so we
/// can specialize it differently during deterministic testing
- uint32_t allocationAttempt(uint32_t start, uint32_t tries) const {
+ IndexType allocationAttempt(IndexType start, IndexType tries) const {
if (LIKELY(tries < 8 && start + tries < numSlots_)) {
return start + tries;
} else {
- uint32_t rv = folly::Random::rand32(numSlots_);
+ IndexType rv;
+ if (sizeof(IndexType) <= 4) {
+ rv = folly::Random::rand32(numSlots_);
+ } else {
+ rv = folly::Random::rand64(numSlots_);
+ }
assert(rv < numSlots_);
return rv;
}
}
};
+/// AtomicUnorderedInsertMap64 is just a type alias that makes it easier
+/// to select a 64 bit slot index type. Use this if you need a capacity
+/// bigger than 2^30 (about a billion). This increases memory overheads,
+/// obviously.
+template <typename Key,
+ typename Value,
+ typename Hash = std::hash<Key>,
+ typename KeyEqual = std::equal_to<Key>,
+ bool SkipKeyValueDeletion =
+ (boost::has_trivial_destructor<Key>::value &&
+ boost::has_trivial_destructor<Value>::value),
+ template <typename> class Atom = std::atomic,
+ typename Allocator = folly::detail::MMapAlloc>
+using AtomicUnorderedInsertMap64 =
+ AtomicUnorderedInsertMap<Key,
+ Value,
+ Hash,
+ KeyEqual,
+ SkipKeyValueDeletion,
+ Atom,
+ Allocator,
+ uint64_t>;
+
/// MutableAtom is a tiny wrapper than gives you the option of atomically
/// updating values inserted into an AtomicUnorderedInsertMap<K,
bool is_lock_free() const {return true;}
};
-template<
- typename Key, typename Value, template<typename> class Atom = non_atomic>
-using UnorderedInsertMap = AtomicUnorderedInsertMap<
- Key,
- Value,
- std::hash<Key>,
- std::equal_to<Key>,
- (boost::has_trivial_destructor<Key>::value &&
- boost::has_trivial_destructor<Value>::value),
- Atom,
- std::allocator<char>>;
-
-TEST(AtomicUnorderedInsertMap, basic) {
- AtomicUnorderedInsertMap<std::string,std::string> m(100);
+template <typename Key,
+ typename Value,
+ typename IndexType,
+ template <typename> class Atom = std::atomic,
+ typename Allocator = std::allocator<char>>
+using UIM =
+ AtomicUnorderedInsertMap<Key,
+ Value,
+ std::hash<Key>,
+ std::equal_to<Key>,
+ (boost::has_trivial_destructor<Key>::value &&
+ boost::has_trivial_destructor<Value>::value),
+ Atom,
+ Allocator,
+ IndexType>;
+
+namespace {
+template <typename T>
+struct AtomicUnorderedInsertMapTest : public ::testing::Test {};
+}
+
+// uint16_t doesn't make sense for most platforms, but we might as well
+// test it
+using IndexTypesToTest = ::testing::Types<uint16_t, uint32_t, uint64_t>;
+TYPED_TEST_CASE(AtomicUnorderedInsertMapTest, IndexTypesToTest);
+
+TYPED_TEST(AtomicUnorderedInsertMapTest, basic) {
+ UIM<std::string,
+ std::string,
+ TypeParam,
+ std::atomic,
+ folly::detail::MMapAlloc> m(100);
m.emplace("abc", "ABC");
EXPECT_TRUE(m.find("abc") != m.cend());
EXPECT_TRUE(a != b);
}
-TEST(AtomicUnorderedInsertMap, value_mutation) {
- AtomicUnorderedInsertMap<int, MutableAtom<int>> m(100);
+TYPED_TEST(AtomicUnorderedInsertMapTest, value_mutation) {
+ UIM<int, MutableAtom<int>, TypeParam> m(100);
for (int i = 0; i < 50; ++i) {
m.emplace(i, i);
}
TEST(UnorderedInsertMap, value_mutation) {
- UnorderedInsertMap<int, MutableData<int>> m(100);
+ UIM<int, MutableData<int>, uint32_t, non_atomic> m(100);
for (int i = 0; i < 50; ++i) {
m.emplace(i, i);
EXPECT_EQ(m.find(1)->second.data, 2);
}
+// This test is too expensive to run automatically. On my dev server it
+// takes about 10 minutes for dbg build, 2 for opt.
+TEST(AtomicUnorderedInsertMap, DISABLED_mega_map) {
+ size_t capacity = 2000000000;
+ AtomicUnorderedInsertMap64<size_t,size_t> big(capacity);
+ for (size_t i = 0; i < capacity * 2; i += 2) {
+ big.emplace(i, i * 10);
+ }
+ for (size_t i = 0; i < capacity * 3; i += capacity / 1000 + 1) {
+ auto iter = big.find(i);
+ if ((i & 1) == 0 && i < capacity * 2) {
+ EXPECT_EQ(iter->second, i * 10);
+ } else {
+ EXPECT_TRUE(iter == big.cend());
+ }
+ }
+}
+
BENCHMARK(lookup_int_int_hit, iters) {
std::unique_ptr<AtomicUnorderedInsertMap<int,size_t>> ptr = {};
}
BENCHMARK(atomic_fast_map) {
- UnorderedInsertMap<long, long, std::atomic> m(10000);
+ UIM<long, long, uint32_t, std::atomic> m(10000);
for (int i=0; i<10000; ++i) {
m.emplace(i,i);
}
}
BENCHMARK(fast_map) {
- UnorderedInsertMap<long, long> m(10000);
+ UIM<long, long, uint32_t, non_atomic> m(10000);
+ for (int i=0; i<10000; ++i) {
+ m.emplace(i,i);
+ }
+
+ for (int i=0; i<10000; ++i) {
+ auto a = m.find(i);
+ folly::doNotOptimizeAway(&*a);
+ }
+}
+
+BENCHMARK(atomic_fast_map_64) {
+ UIM<long, long, uint64_t, std::atomic> m(10000);
+ for (int i=0; i<10000; ++i) {
+ m.emplace(i,i);
+ }
+
+ for (int i=0; i<10000; ++i) {
+ auto a = m.find(i);
+ folly::doNotOptimizeAway(&*a);
+ }
+}
+
+BENCHMARK(fast_map_64) {
+ UIM<long, long, uint64_t, non_atomic> m(10000);
for (int i=0; i<10000; ++i) {
m.emplace(i,i);
}