--- /dev/null
+/*
+ * Copyright 2015 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef FOLLY_ATOMICUNORDEREDMAP_H
+#define FOLLY_ATOMICUNORDEREDMAP_H
+
+#include <atomic>
+#include <functional>
+#include <stdexcept>
+#include <system_error>
+#include <type_traits>
+#include <stdint.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <folly/Likely.h>
+#include <folly/Bits.h>
+#include <folly/Conv.h>
+#include <folly/Random.h>
+#include <folly/detail/AtomicUnorderedMapUtils.h>
+#include <boost/type_traits/has_trivial_destructor.hpp>
+#include <limits>
+
+namespace folly {
+
+/// You're probably reading this because you are looking for an
+/// AtomicUnorderedMap<K,V> that is fully general, highly concurrent (for
+/// reads, writes, and iteration), and makes no performance compromises.
+/// We haven't figured that one out yet. What you will find here is a
+/// hash table implementation that sacrifices generality so that it can
+/// give you all of the other things.
+///
+/// LIMITATIONS:
+///
+/// * Insert only (*) - the only write operation supported directly by
+/// AtomicUnorderedInsertMap is findOrConstruct. There is a (*) because
+/// values aren't moved, so you can roll your own concurrency control for
+/// in-place updates of values (see MutableData and MutableAtom below),
+/// but the hash table itself doesn't help you.
+///
+/// * No resizing - you must specify the capacity up front, and once
+/// the hash map gets full you won't be able to insert. Insert
+/// performance will degrade once the load factor is high. Insert is
+/// O(1/(1-actual_load_factor)). Note that this is a pretty strong
+/// limitation, because you can't remove existing keys.
+///
+/// * 2^30 maximum capacity - you'll need to use something else if you
+/// have more than a billion entries. If this limit bothers you let it
+/// wouldn't be too hard to parameterize the internal indexes between
+/// uint32_t and uint64_t.
+///
+/// WHAT YOU GET IN EXCHANGE:
+///
+/// * Arbitrary key and value types - any K and V that can be used in a
+/// std::unordered_map can be used here. In fact, the key and value
+/// types don't even have to be copyable or moveable!
+///
+/// * Keys and values in the map won't be moved - it is safe to keep
+/// pointers or references to the keys and values in the map, because
+/// they are never moved or destroyed (until the map itself is destroyed).
+///
+/// * Iterators are never invalidated - writes don't invalidate iterators,
+/// so you can scan and insert in parallel.
+///
+/// * Fast wait-free reads - reads are usually only a single cache miss,
+/// even when the hash table is very large. Wait-freedom means that
+/// you won't see latency outliers even in the face of concurrent writes.
+///
+/// * Lock-free insert - writes proceed in parallel. If a thread in the
+/// middle of a write is unlucky and gets suspended, it doesn't block
+/// anybody else.
+///
+/// COMMENTS ON INSERT-ONLY
+///
+/// This map provides wait-free linearizable reads and lock-free
+/// linearizable inserts. Inserted values won't be moved, but no
+/// concurrency control is provided for safely updating them. To remind
+/// you of that fact they are only provided in const form. This is the
+/// only simple safe thing to do while preserving something like the normal
+/// std::map iteration form, which requires that iteration be exposed
+/// via std::pair (and prevents encapsulation of access to the value).
+///
+/// There are a couple of reasonable policies for doing in-place
+/// concurrency control on the values. I am hoping that the policy can
+/// be injected via the value type or an extra template param, to keep
+/// the core AtomicUnorderedInsertMap insert-only:
+///
+/// CONST: this is the currently implemented strategy, which is simple,
+/// performant, and not that expressive. You can always put in a value
+/// with a mutable field (see MutableAtom below), but that doesn't look
+/// as pretty as it should.
+///
+/// ATOMIC: for integers and integer-size trivially copyable structs
+/// (via an adapter like tao/queues/AtomicStruct) the value can be a
+/// std::atomic and read and written atomically.
+///
+/// SEQ-LOCK: attach a counter incremented before and after write.
+/// Writers serialize by using CAS to make an even->odd transition,
+/// then odd->even after the write. Readers grab the value with memcpy,
+/// checking sequence value before and after. Readers retry until they
+/// see an even sequence number that doesn't change. This works for
+/// larger structs, but still requires memcpy to be equivalent to copy
+/// assignment, and it is no longer lock-free. It scales very well,
+/// because the readers are still invisible (no cache line writes).
+///
+/// LOCK: folly's SharedMutex would be a good choice here.
+///
+/// MEMORY ALLOCATION
+///
+/// Underlying memory is allocated as a big anonymous mmap chunk, which
+/// might be cheaper than calloc() and is certainly not more expensive
+/// for large maps. If the SkipKeyValueDeletion template param is true
+/// then deletion of the map consists of unmapping the backing memory,
+/// which is much faster than destructing all of the keys and values.
+/// Feel free to override if std::is_trivial_destructor isn't recognizing
+/// the triviality of your destructors.
+template <typename Key,
+ typename Value,
+ typename Hash = std::hash<Key>,
+ typename KeyEqual = std::equal_to<Key>,
+ bool SkipKeyValueDeletion =
+ (boost::has_trivial_destructor<Key>::value &&
+ boost::has_trivial_destructor<Value>::value),
+ template<typename> class Atom = std::atomic,
+ typename Allocator = folly::detail::MMapAlloc>
+
+struct AtomicUnorderedInsertMap {
+
+ typedef Key key_type;
+ typedef Value mapped_type;
+ typedef std::pair<Key,Value> value_type;
+ typedef std::size_t size_type;
+ typedef std::ptrdiff_t difference_type;
+ typedef Hash hasher;
+ typedef KeyEqual key_equal;
+ typedef const value_type& const_reference;
+
+ typedef struct ConstIterator {
+ ConstIterator(const AtomicUnorderedInsertMap& owner, uint32_t slot)
+ : owner_(owner)
+ , slot_(slot)
+ {}
+
+ ConstIterator(const ConstIterator&) = default;
+ ConstIterator& operator= (const ConstIterator&) = default;
+
+ const value_type& operator* () const {
+ return owner_.slots_[slot_].keyValue();
+ }
+
+ const value_type* operator-> () const {
+ return &owner_.slots_[slot_].keyValue();
+ }
+
+ // pre-increment
+ const ConstIterator& operator++ () {
+ while (slot_ > 0) {
+ --slot_;
+ if (owner_.slots_[slot_].state() == LINKED) {
+ break;
+ }
+ }
+ return *this;
+ }
+
+ // post-increment
+ ConstIterator operator++ (int dummy) {
+ auto prev = *this;
+ ++*this;
+ return prev;
+ }
+
+ bool operator== (const ConstIterator& rhs) const {
+ return slot_ == rhs.slot_;
+ }
+ bool operator!= (const ConstIterator& rhs) const {
+ return !(*this == rhs);
+ }
+
+ private:
+ const AtomicUnorderedInsertMap& owner_;
+ uint32_t slot_;
+ } const_iterator;
+
+ friend ConstIterator;
+
+ /// Constructs a map that will support the insertion of maxSize
+ /// key-value pairs without exceeding the max load factor. Load
+ /// factors of greater than 1 are not supported, and once the actual load
+ /// factor of the map approaches 1 the insert performance will suffer.
+ /// The capacity is limited to 2^30 (about a billion), beyond which
+ /// we will throw invalid_argument.
+ explicit AtomicUnorderedInsertMap(
+ size_t maxSize,
+ float maxLoadFactor = 0.8f,
+ const Allocator& alloc = Allocator())
+ : allocator_(alloc)
+ {
+ size_t capacity = maxSize / std::max(1.0f, maxLoadFactor) + 128;
+ if (capacity > (1 << 30) && maxSize < (1 << 30)) {
+ // we'll do our best
+ capacity = (1 << 30);
+ }
+ if (capacity < maxSize || capacity > (1 << 30)) {
+ throw std::invalid_argument(
+ "AtomicUnorderedInsertMap capacity must fit in 30 bits");
+ }
+
+ numSlots_ = capacity;
+ slotMask_ = folly::nextPowTwo(capacity * 4) - 1;
+ mmapRequested_ = sizeof(Slot) * capacity;
+ slots_ = reinterpret_cast<Slot*>(allocator_.allocate(mmapRequested_));
+ zeroFillSlots();
+ // mark the zero-th slot as in-use but not valid, since that happens
+ // to be our nil value
+ slots_[0].stateUpdate(EMPTY, CONSTRUCTING);
+ }
+
+ ~AtomicUnorderedInsertMap() {
+ if (!SkipKeyValueDeletion) {
+ for (size_t i = 1; i < numSlots_; ++i) {
+ slots_[i].~Slot();
+ }
+ }
+ allocator_.deallocate(reinterpret_cast<char*>(slots_), mmapRequested_);
+ }
+
+ /// Searches for the key, returning (iter,false) if it is found.
+ /// If it is not found calls the functor Func with a void* argument
+ /// that is raw storage suitable for placement construction of a Value
+ /// (see raw_value_type), then returns (iter,true). May call Func and
+ /// then return (iter,false) if there are other concurrent writes, in
+ /// which case the newly constructed value will be immediately destroyed.
+ ///
+ /// This function does not block other readers or writers. If there
+ /// are other concurrent writes, many parallel calls to func may happen
+ /// and only the first one to complete will win. The values constructed
+ /// by the other calls to func will be destroyed.
+ ///
+ /// Usage:
+ ///
+ /// AtomicUnorderedInsertMap<std::string,std::string> memo;
+ ///
+ /// auto value = memo.findOrConstruct(key, [=](void* raw) {
+ /// new (raw) std::string(computation(key));
+ /// })->first;
+ template<typename Func>
+ std::pair<const_iterator,bool> findOrConstruct(const Key& key, Func&& func) {
+ auto const slot = keyToSlotIdx(key);
+ auto prev = slots_[slot].headAndState_.load(std::memory_order_acquire);
+
+ auto existing = find(key, slot);
+ if (existing != 0) {
+ return std::make_pair(ConstIterator(*this, existing), false);
+ }
+
+ auto idx = allocateNear(slot);
+ new (&slots_[idx].keyValue().first) Key(key);
+ func(static_cast<void*>(&slots_[idx].keyValue().second));
+
+ while (true) {
+ slots_[idx].next_ = prev >> 2;
+
+ // we can merge the head update and the CONSTRUCTING -> LINKED update
+ // into a single CAS if slot == idx (which should happen often)
+ auto after = idx << 2;
+ if (slot == idx) {
+ after += LINKED;
+ } else {
+ after += (prev & 3);
+ }
+
+ if (slots_[slot].headAndState_.compare_exchange_strong(prev, after)) {
+ // success
+ if (idx != slot) {
+ slots_[idx].stateUpdate(CONSTRUCTING, LINKED);
+ }
+ return std::make_pair(ConstIterator(*this, idx), true);
+ }
+ // compare_exchange_strong updates its first arg on failure, so
+ // there is no need to reread prev
+
+ existing = find(key, slot);
+ if (existing != 0) {
+ // our allocated key and value are no longer needed
+ slots_[idx].keyValue().first.~Key();
+ slots_[idx].keyValue().second.~Value();
+ slots_[idx].stateUpdate(CONSTRUCTING, EMPTY);
+
+ return std::make_pair(ConstIterator(*this, existing), false);
+ }
+ }
+ }
+
+ /// This isn't really emplace, but it is what we need to test.
+ /// Eventually we can duplicate all of the std::pair constructor
+ /// forms, including a recursive tuple forwarding template
+ /// http://functionalcpp.wordpress.com/2013/08/28/tuple-forwarding/).
+ template<class K, class V>
+ std::pair<const_iterator,bool> emplace(const K& key, V&& value) {
+ return findOrConstruct(key, [&](void* raw) {
+ new (raw) Value(std::forward<V>(value));
+ });
+ }
+
+ const_iterator find(const Key& key) const {
+ return ConstIterator(*this, find(key, keyToSlotIdx(key)));
+ }
+
+ const_iterator cbegin() const {
+ uint32_t slot = numSlots_ - 1;
+ while (slot > 0 && slots_[slot].state() != LINKED) {
+ --slot;
+ }
+ return ConstIterator(*this, slot);
+ }
+
+ const_iterator cend() const {
+ return ConstIterator(*this, 0);
+ }
+
+ private:
+
+ enum {
+ kMaxAllocationTries = 1000, // after this we throw
+ };
+
+ enum BucketState : uint32_t {
+ EMPTY = 0,
+ CONSTRUCTING = 1,
+ LINKED = 2,
+ };
+
+ /// Lock-free insertion is easiest by prepending to collision chains.
+ /// A large chaining hash table takes two cache misses instead of
+ /// one, however. Our solution is to colocate the bucket storage and
+ /// the head storage, so that even though we are traversing chains we
+ /// are likely to stay within the same cache line. Just make sure to
+ /// traverse head before looking at any keys. This strategy gives us
+ /// 32 bit pointers and fast iteration.
+ struct Slot {
+ /// The bottom two bits are the BucketState, the rest is the index
+ /// of the first bucket for the chain whose keys map to this slot.
+ /// When things are going well the head usually links to this slot,
+ /// but that doesn't always have to happen.
+ Atom<uint32_t> headAndState_;
+
+ /// The next bucket in the chain
+ uint32_t next_;
+
+ /// Key and Value
+ typename std::aligned_storage<sizeof(value_type),
+ alignof(value_type)>::type raw_;
+
+
+ ~Slot() {
+ auto s = state();
+ assert(s == EMPTY || s == LINKED);
+ if (s == LINKED) {
+ keyValue().first.~Key();
+ keyValue().second.~Value();
+ }
+ }
+
+ BucketState state() const {
+ return BucketState(headAndState_.load(std::memory_order_acquire) & 3);
+ }
+
+ void stateUpdate(BucketState before, BucketState after) {
+ assert(state() == before);
+ headAndState_ += (after - before);
+ }
+
+ value_type& keyValue() {
+ assert(state() != EMPTY);
+ return *static_cast<value_type*>(static_cast<void*>(&raw_));
+ }
+
+ const value_type& keyValue() const {
+ assert(state() != EMPTY);
+ return *static_cast<const value_type*>(static_cast<const void*>(&raw_));
+ }
+
+ };
+
+ // We manually manage the slot memory so we can bypass initialization
+ // (by getting a zero-filled mmap chunk) and optionally destruction of
+ // the slots
+
+ size_t mmapRequested_;
+ size_t numSlots_;
+
+ /// tricky, see keyToSlodIdx
+ size_t slotMask_;
+
+ Allocator allocator_;
+ Slot* slots_;
+
+ uint32_t keyToSlotIdx(const Key& key) const {
+ size_t h = hasher()(key);
+ h &= slotMask_;
+ while (h >= numSlots_) {
+ h -= numSlots_;
+ }
+ return h;
+ }
+
+ uint32_t find(const Key& key, uint32_t slot) const {
+ KeyEqual ke = {};
+ auto hs = slots_[slot].headAndState_.load(std::memory_order_acquire);
+ for (slot = hs >> 2; slot != 0; slot = slots_[slot].next_) {
+ if (ke(key, slots_[slot].keyValue().first)) {
+ return slot;
+ }
+ }
+ return 0;
+ }
+
+ /// Allocates a slot and returns its index. Tries to put it near
+ /// slots_[start].
+ uint32_t allocateNear(uint32_t start) {
+ for (auto tries = 0; tries < kMaxAllocationTries; ++tries) {
+ auto slot = allocationAttempt(start, tries);
+ auto prev = slots_[slot].headAndState_.load(std::memory_order_acquire);
+ if ((prev & 3) == EMPTY &&
+ slots_[slot].headAndState_.compare_exchange_strong(
+ prev, prev + CONSTRUCTING - EMPTY)) {
+ return slot;
+ }
+ }
+ throw std::bad_alloc();
+ }
+
+ /// Returns the slot we should attempt to allocate after tries failed
+ /// tries, starting from the specified slot. This is pulled out so we
+ /// can specialize it differently during deterministic testing
+ uint32_t allocationAttempt(uint32_t start, uint32_t tries) const {
+ if (LIKELY(tries < 8 && start + tries < numSlots_)) {
+ return start + tries;
+ } else {
+ uint32_t rv = folly::Random::rand32(numSlots_);
+ assert(rv < numSlots_);
+ return rv;
+ }
+ }
+
+ void zeroFillSlots() {
+ using folly::detail::GivesZeroFilledMemory;
+ if (!GivesZeroFilledMemory<Allocator>::value) {
+ memset(slots_, 0, mmapRequested_);
+ }
+ }
+};
+
+
+/// MutableAtom is a tiny wrapper than gives you the option of atomically
+/// updating values inserted into an AtomicUnorderedInsertMap<K,
+/// MutableAtom<V>>. This relies on AtomicUnorderedInsertMap's guarantee
+/// that it doesn't move values.
+template <typename T,
+ template<typename> class Atom = std::atomic>
+struct MutableAtom {
+ mutable Atom<T> data;
+
+ explicit MutableAtom(const T& init) : data(init) {}
+};
+
+/// MutableData is a tiny wrapper than gives you the option of using an
+/// external concurrency control mechanism to updating values inserted
+/// into an AtomicUnorderedInsertMap.
+template <typename T>
+struct MutableData {
+ mutable T data;
+ explicit MutableData(const T& init) : data(init) {}
+};
+
+
+}
+#endif
--- /dev/null
+/*
+ * Copyright 2015 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <folly/AtomicUnorderedMap.h>
+#include <folly/test/DeterministicSchedule.h>
+#include <thread>
+#include <semaphore.h>
+#include <gflags/gflags.h>
+#include <gtest/gtest.h>
+#include <folly/Benchmark.h>
+#include <unordered_map>
+
+using namespace folly;
+using namespace folly::test;
+
+template<class T>
+struct non_atomic {
+ T value;
+
+ non_atomic() = default;
+ non_atomic(const non_atomic&) = delete;
+ constexpr /* implicit */ non_atomic(T desired): value(desired) {}
+
+ T operator+=(T arg) { value += arg; return load();}
+
+ T load(std::memory_order order= std::memory_order_seq_cst) const {
+ return value;
+ }
+
+ /* implicit */
+ operator T() const {return load();}
+
+ void store(T desired, std::memory_order order = std::memory_order_seq_cst) {
+ value = desired;
+ }
+
+ T exchange(T desired, std::memory_order order = std::memory_order_seq_cst) {
+ T old = load();
+ store(desired);
+ return old;
+ }
+
+ bool compare_exchange_weak(
+ T& expected, T desired,
+ std::memory_order success = std::memory_order_seq_cst,
+ std::memory_order failure = std::memory_order_seq_cst) {
+ if (value == expected) {
+ value = desired;
+ return true;
+ }
+
+ expected = value;
+ return false;
+ }
+
+ bool compare_exchange_strong(
+ T& expected, T desired,
+ std::memory_order success = std::memory_order_seq_cst,
+ std::memory_order failure = std::memory_order_seq_cst) {
+ if (value == expected) {
+ value = desired;
+ return true;
+ }
+
+ expected = value;
+ return false;
+ }
+
+ bool is_lock_free() const {return true;}
+};
+
+template<
+ typename Key, typename Value, template<typename> class Atom = non_atomic>
+using UnorderedInsertMap = AtomicUnorderedInsertMap<
+ Key,
+ Value,
+ std::hash<Key>,
+ std::equal_to<Key>,
+ (boost::has_trivial_destructor<Key>::value &&
+ boost::has_trivial_destructor<Value>::value),
+ Atom,
+ std::allocator<char>>;
+
+TEST(AtomicUnorderedInsertMap, basic) {
+ AtomicUnorderedInsertMap<std::string,std::string> m(100);
+
+ m.emplace("abc", "ABC");
+ EXPECT_TRUE(m.find("abc") != m.cend());
+ EXPECT_EQ(m.find("abc")->first, "abc");
+ EXPECT_EQ(m.find("abc")->second, "ABC");
+ EXPECT_TRUE(m.find("def") == m.cend());
+ auto iter = m.cbegin();
+ EXPECT_TRUE(iter != m.cend());
+ EXPECT_TRUE(iter == m.find("abc"));
+ auto a = iter;
+ EXPECT_TRUE(a == iter);
+ auto b = iter;
+ ++iter;
+ EXPECT_TRUE(iter == m.cend());
+ EXPECT_TRUE(a == b);
+ EXPECT_TRUE(a != iter);
+ a++;
+ EXPECT_TRUE(a == iter);
+ EXPECT_TRUE(a != b);
+}
+
+TEST(AtomicUnorderedInsertMap, value_mutation) {
+ AtomicUnorderedInsertMap<int, MutableAtom<int>> m(100);
+
+ for (int i = 0; i < 50; ++i) {
+ m.emplace(i, i);
+ }
+
+ m.find(1)->second.data++;
+}
+
+TEST(UnorderedInsertMap, value_mutation) {
+ UnorderedInsertMap<int, MutableData<int>> m(100);
+
+ for (int i = 0; i < 50; ++i) {
+ m.emplace(i, i);
+ }
+
+ m.find(1)->second.data++;
+ EXPECT_EQ(m.find(1)->second.data, 2);
+}
+
+BENCHMARK(lookup_int_int_hit, iters) {
+ std::unique_ptr<AtomicUnorderedInsertMap<int,size_t>> ptr = {};
+
+ size_t capacity = 100000;
+
+ BENCHMARK_SUSPEND {
+ ptr.reset(new AtomicUnorderedInsertMap<int,size_t>(capacity));
+ for (size_t i = 0; i < capacity; ++i) {
+ auto k = 3 * ((5641 * i) % capacity);
+ ptr->emplace(k, k + 1);
+ EXPECT_EQ(ptr->find(k)->second, k + 1);
+ }
+ }
+
+ for (size_t i = 0; i < iters; ++i) {
+ size_t k = 3 * (((i * 7919) ^ (i * 4001)) % capacity);
+ auto iter = ptr->find(k);
+ if (iter == ptr->cend() ||
+ iter->second != k + 1) {
+ auto jter = ptr->find(k);
+ EXPECT_TRUE(iter == jter);
+ }
+ EXPECT_EQ(iter->second, k + 1);
+ }
+
+ BENCHMARK_SUSPEND {
+ ptr.reset(nullptr);
+ }
+}
+
+struct PairHash {
+ size_t operator()(const std::pair<uint64_t,uint64_t>& pr) const {
+ return pr.first ^ pr.second;
+ }
+};
+
+void contendedRW(size_t itersPerThread,
+ size_t capacity,
+ size_t numThreads,
+ size_t readsPerWrite) {
+ typedef std::pair<uint64_t,uint64_t> Key;
+ typedef AtomicUnorderedInsertMap<Key,MutableAtom<uint32_t>,PairHash> Map;
+
+ std::unique_ptr<Map> ptr = {};
+ std::atomic<bool> go;
+ std::vector<std::thread> threads;
+
+ BENCHMARK_SUSPEND {
+ ptr.reset(new Map(capacity));
+ while (threads.size() < numThreads) {
+ threads.emplace_back([&](){
+ while (!go) {
+ std::this_thread::yield();
+ }
+
+ size_t reads = 0;
+ size_t writes = 0;
+ while (reads + writes < itersPerThread) {
+ auto r = Random::rand32();
+ Key key(reads + writes, r);
+ if (reads < writes * readsPerWrite ||
+ writes >= capacity / numThreads) {
+ // read needed
+ ++reads;
+ auto iter = ptr->find(key);
+ EXPECT_TRUE(
+ iter == ptr->cend() ||
+ iter->second.data.load(std::memory_order_acquire) >= key.first);
+ } else {
+ ++writes;
+ try {
+ auto pr = ptr->emplace(key, key.first);
+ if (!pr.second) {
+ pr.first->second.data++;
+ }
+ } catch (std::bad_alloc& x) {
+ LOG(INFO) << "bad alloc";
+ }
+ }
+ }
+ });
+ }
+ }
+
+ go = true;
+
+ for (auto& thr : threads) {
+ thr.join();
+ }
+
+ BENCHMARK_SUSPEND {
+ ptr.reset(nullptr);
+ }
+}
+
+// sudo nice -n -20 ~/fbcode/_bin/common/concurrency/experimental/atomic_unordered_map --benchmark --bm_min_iters=1000000
+//
+// without MAP_HUGETLB (default)
+//
+// ============================================================================
+// common/concurrency/experimental/AtomicUnorderedMapTest.cpprelative time/iter
+// iters/s
+// ============================================================================
+// lookup_int_int_hit 20.05ns 49.89M
+// contendedRW(small_32thr_99pct) 70.36ns 14.21M
+// contendedRW(large_32thr_99pct) 164.23ns 6.09M
+// contendedRW(large_32thr_99_9pct) 158.81ns 6.30M
+// ============================================================================
+//
+// with MAP_HUGETLB hacked in
+// ============================================================================
+// lookup_int_int_hit 19.67ns 50.84M
+// contendedRW(small_32thr_99pct) 62.46ns 16.01M
+// contendedRW(large_32thr_99pct) 119.41ns 8.37M
+// contendedRW(large_32thr_99_9pct) 111.23ns 8.99M
+// ============================================================================
+BENCHMARK_NAMED_PARAM(contendedRW, small_32thr_99pct, 100000, 32, 99)
+BENCHMARK_NAMED_PARAM(contendedRW, large_32thr_99pct, 100000000, 32, 99)
+BENCHMARK_NAMED_PARAM(contendedRW, large_32thr_99_9pct, 100000000, 32, 999)
+
+BENCHMARK_DRAW_LINE();
+
+// sudo nice -n -20 ~/fbcode/_build/opt/site_integrity/quasar/experimental/atomic_unordered_map_test --benchmark --bm_min_iters=10000
+// Single threaded benchmarks to test how much better we are than
+// std::unordered_map and what is the cost of using atomic operations
+// in the uncontended use case
+// ============================================================================
+// std_map 1.20ms 832.58
+// atomic_fast_map 511.35us 1.96K
+// fast_map 196.28us 5.09K
+// ============================================================================
+
+BENCHMARK(std_map) {
+ std::unordered_map<long, long> m;
+ m.reserve(10000);
+ for (int i=0; i<10000; ++i) {
+ m.emplace(i,i);
+ }
+
+ for (int i=0; i<10000; ++i) {
+ auto a = m.find(i);
+ folly::doNotOptimizeAway(&*a);
+ }
+}
+
+BENCHMARK(atomic_fast_map) {
+ UnorderedInsertMap<long, long, std::atomic> m(10000);
+ for (int i=0; i<10000; ++i) {
+ m.emplace(i,i);
+ }
+
+ for (int i=0; i<10000; ++i) {
+ auto a = m.find(i);
+ folly::doNotOptimizeAway(&*a);
+ }
+}
+
+BENCHMARK(fast_map) {
+ UnorderedInsertMap<long, long> m(10000);
+ for (int i=0; i<10000; ++i) {
+ m.emplace(i,i);
+ }
+
+ for (int i=0; i<10000; ++i) {
+ auto a = m.find(i);
+ folly::doNotOptimizeAway(&*a);
+ }
+}
+
+
+int main(int argc, char ** argv) {
+ testing::InitGoogleTest(&argc, argv);
+ google::ParseCommandLineFlags(&argc, &argv, true);
+ int rv = RUN_ALL_TESTS();
+ folly::runBenchmarksOnFlag();
+ return rv;
+}