2 * Copyright 2016 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
20 * A high-performance concurrent hash map with int32 or int64 keys. Supports
21 * insert, find(key), findAt(index), erase(key), size, and more. Memory cannot
22 * be freed or reclaimed by erase. Can grow to a maximum of about 18 times the
23 * initial capacity, but performance degrades linearly with growth. Can also be
24 * used as an object store with unique 32-bit references directly into the
25 * internal storage (retrieved with iterator::getIndex()).
28 * - High-performance (~2-4x tbb::concurrent_hash_map in heavily
29 * multi-threaded environments).
30 * - Efficient memory usage if initial capacity is not over estimated
31 * (especially for small keys and values).
32 * - Good fragmentation properties (only allocates in large slabs which can
33 * be reused with clear() and never move).
34 * - Can generate unique, long-lived 32-bit references for efficient lookup
38 * - Keys must be native int32 or int64, or explicitly converted.
39 * - Must be able to specify unique empty, locked, and erased keys
40 * - Performance degrades linearly as size grows beyond initialization
42 * - Max size limit of ~18x initial size (dependent on max load factor).
43 * - Memory is not freed or reclaimed by erase.
45 * Usage and Operation Details:
46 * Simple performance/memory tradeoff with maxLoadFactor. Higher load factors
47 * give better memory utilization but probe lengths increase, reducing
50 * Implementation and Performance Details:
51 * AHArray is a fixed size contiguous block of value_type cells. When
52 * writing a cell, the key is locked while the rest of the record is
53 * written. Once done, the cell is unlocked by setting the key. find()
54 * is completely wait-free and doesn't require any non-relaxed atomic
55 * operations. AHA cannot grow beyond initialization capacity, but is
56 * faster because of reduced data indirection.
58 * AHMap is a wrapper around AHArray sub-maps that allows growth and provides
59 * an interface closer to the STL UnorderedAssociativeContainer concept. These
60 * sub-maps are allocated on the fly and are processed in series, so the more
61 * there are (from growing past initial capacity), the worse the performance.
63 * Insert returns false if there is a key collision and throws if the max size
64 * of the map is exceeded.
66 * Benchmark performance with 8 simultaneous threads processing 1 million
67 * unique <int64, int64> entries on a 4-core, 2.5 GHz machine:
69 * Load Factor Mem Efficiency usec/Insert usec/Find
75 * See folly/tests/AtomicHashMapTest.cpp for more benchmarks.
77 * @author Spencer Ahrens <sahrens@fb.com>
78 * @author Jordan DeLong <delong.j@fb.com>
83 #define FOLLY_ATOMICHASHMAP_H_
85 #include <boost/iterator/iterator_facade.hpp>
86 #include <boost/noncopyable.hpp>
87 #include <boost/type_traits/is_convertible.hpp>
93 #include <folly/AtomicHashArray.h>
94 #include <folly/Foreach.h>
95 #include <folly/Hash.h>
96 #include <folly/Likely.h>
97 #include <folly/ThreadCachedInt.h>
102 * AtomicHashMap provides an interface somewhat similar to the
103 * UnorderedAssociativeContainer concept in C++. This does not
104 * exactly match this concept (or even the basic Container concept),
105 * because of some restrictions imposed by our datastructure.
107 * Specific differences (there are quite a few):
109 * - Efficiently thread safe for inserts (main point of this stuff),
110 * wait-free for lookups.
112 * - You can erase from this container, but the cell containing the key will
113 * not be free or reclaimed.
115 * - You can erase everything by calling clear() (and you must guarantee only
116 * one thread can be using the container to do that).
118 * - We aren't DefaultConstructible, CopyConstructible, Assignable, or
119 * EqualityComparable. (Most of these are probably not something
120 * you actually want to do with this anyway.)
122 * - We don't support the various bucket functions, rehash(),
123 * reserve(), or equal_range(). Also no constructors taking
124 * iterators, although this could change.
126 * - Several insertion functions, notably operator[], are not
127 * implemented. It is a little too easy to misuse these functions
128 * with this container, where part of the point is that when an
129 * insertion happens for a new key, it will atomically have the
132 * - The map has no templated insert() taking an iterator range, but
133 * we do provide an insert(key, value). The latter seems more
134 * frequently useful for this container (to avoid sprinkling
135 * make_pair everywhere), and providing both can lead to some gross
136 * template error messages.
138 * - The Allocator must not be stateful (a new instance will be spun up for
139 * each allocation), and its allocate() method must take a raw number of
142 * - KeyT must be a 32 bit or 64 bit atomic integer type, and you must
143 * define special 'locked' and 'empty' key values in the ctor
145 * - We don't take the Hash function object as an instance in the
150 // Thrown when insertion fails due to running out of space for
152 struct AtomicHashMapFullError : std::runtime_error {
153 explicit AtomicHashMapFullError()
154 : std::runtime_error("AtomicHashMap is full")
158 template<class KeyT, class ValueT, class HashFcn, class EqualFcn,
159 class Allocator, class ProbeFcn, class KeyConvertFcn>
160 class AtomicHashMap : boost::noncopyable {
161 typedef AtomicHashArray<KeyT, ValueT, HashFcn, EqualFcn,
162 Allocator, ProbeFcn, KeyConvertFcn>
166 typedef KeyT key_type;
167 typedef ValueT mapped_type;
168 typedef std::pair<const KeyT, ValueT> value_type;
169 typedef HashFcn hasher;
170 typedef EqualFcn key_equal;
171 typedef KeyConvertFcn key_convert;
172 typedef value_type* pointer;
173 typedef value_type& reference;
174 typedef const value_type& const_reference;
175 typedef std::ptrdiff_t difference_type;
176 typedef std::size_t size_type;
177 typedef typename SubMap::Config Config;
179 template<class ContT, class IterVal, class SubIt>
182 typedef ahm_iterator<const AtomicHashMap,
184 typename SubMap::const_iterator>
186 typedef ahm_iterator<AtomicHashMap,
188 typename SubMap::iterator>
192 const float kGrowthFrac_; // How much to grow when we run out of capacity.
194 // The constructor takes a finalSizeEst which is the optimal
195 // number of elements to maximize space utilization and performance,
196 // and a Config object to specify more advanced options.
197 explicit AtomicHashMap(size_t finalSizeEst, const Config& c = Config());
200 const int numMaps = numMapsAllocated_.load(std::memory_order_relaxed);
201 FOR_EACH_RANGE (i, 0, numMaps) {
202 SubMap* thisMap = subMaps_[i].load(std::memory_order_relaxed);
204 SubMap::destroy(thisMap);
208 key_equal key_eq() const { return key_equal(); }
209 hasher hash_function() const { return hasher(); }
214 * Returns a pair with iterator to the element at r.first and
215 * success. Retrieve the index with ret.first.getIndex().
217 * Does not overwrite on key collision, but returns an iterator to
218 * the existing element (since this could due to a race with
219 * another thread, it is often important to check this return
222 * Allocates new sub maps as the existing ones become full. If
223 * all sub maps are full, no element is inserted, and
224 * AtomicHashMapFullError is thrown.
226 std::pair<iterator,bool> insert(const value_type& r) {
227 return emplace(r.first, r.second);
229 std::pair<iterator,bool> insert(key_type k, const mapped_type& v) {
230 return emplace(k, v);
232 std::pair<iterator,bool> insert(value_type&& r) {
233 return emplace(r.first, std::move(r.second));
235 std::pair<iterator,bool> insert(key_type k, mapped_type&& v) {
236 return emplace(k, std::move(v));
242 * Same contract as insert(), but performs in-place construction
243 * of the value type using the specified arguments.
245 * Also, like find(), this method optionally allows 'key_in' to have a type
246 * different from that stored in the table; see find(). If and only if no
247 * equal key is already present, this method converts 'key_in' to a key of
248 * type KeyT using the provided LookupKeyToKeyFcn.
250 template <typename LookupKeyT = key_type,
251 typename LookupHashFcn = hasher,
252 typename LookupEqualFcn = key_equal,
253 typename LookupKeyToKeyFcn = key_convert,
255 std::pair<iterator,bool> emplace(LookupKeyT k, ArgTs&&... vCtorArg);
260 * Returns the iterator to the element if found, otherwise end().
262 * As an optional feature, the type of the key to look up (LookupKeyT) is
263 * allowed to be different from the type of keys actually stored (KeyT).
265 * This enables use cases where materializing the key is costly and usually
266 * redudant, e.g., canonicalizing/interning a set of strings and being able
267 * to look up by StringPiece. To use this feature, LookupHashFcn must take
268 * a LookupKeyT, and LookupEqualFcn must take KeyT and LookupKeyT as first
269 * and second parameter, respectively.
271 * See folly/test/ArrayHashMapTest.cpp for sample usage.
273 template <typename LookupKeyT = key_type,
274 typename LookupHashFcn = hasher,
275 typename LookupEqualFcn = key_equal>
276 iterator find(LookupKeyT k);
278 template <typename LookupKeyT = key_type,
279 typename LookupHashFcn = hasher,
280 typename LookupEqualFcn = key_equal>
281 const_iterator find(LookupKeyT k) const;
286 * Erases key k from the map
288 * Returns 1 iff the key is found and erased, and 0 otherwise.
290 size_type erase(key_type k);
295 * Wipes all keys and values from primary map and destroys all secondary
296 * maps. Primary map remains allocated and thus the memory can be reused
297 * in place. Not thread safe.
305 * Returns the exact size of the map. Note this is not as cheap as typical
306 * size() implementations because, for each AtomicHashArray in this AHM, we
307 * need to grab a lock and accumulate the values from all the thread local
308 * counters. See folly/ThreadCachedInt.h for more details.
312 bool empty() const { return size() == 0; }
314 size_type count(key_type k) const {
315 return find(k) == end() ? 0 : 1;
322 * Returns an iterator into the map.
324 * idx should only be an unmodified value returned by calling getIndex() on
325 * a valid iterator returned by find() or insert(). If idx is invalid you
326 * have a bug and the process aborts.
328 iterator findAt(uint32_t idx) {
329 SimpleRetT ret = findAtInternal(idx);
330 DCHECK_LT(ret.i, numSubMaps());
331 return iterator(this, ret.i,
332 subMaps_[ret.i].load(std::memory_order_relaxed)->makeIter(ret.j));
334 const_iterator findAt(uint32_t idx) const {
335 return const_cast<AtomicHashMap*>(this)->findAt(idx);
338 // Total capacity - summation of capacities of all submaps.
339 size_t capacity() const;
341 // Number of new insertions until current submaps are all at max load factor.
342 size_t spaceRemaining() const;
344 void setEntryCountThreadCacheSize(int32_t newSize) {
345 const int numMaps = numMapsAllocated_.load(std::memory_order_acquire);
346 for (int i = 0; i < numMaps; ++i) {
347 SubMap* map = subMaps_[i].load(std::memory_order_relaxed);
348 map->setEntryCountThreadCacheSize(newSize);
352 // Number of sub maps allocated so far to implement this map. The more there
353 // are, the worse the performance.
354 int numSubMaps() const {
355 return numMapsAllocated_.load(std::memory_order_acquire);
360 subMaps_[0].load(std::memory_order_relaxed)->begin());
361 it.checkAdvanceToNextSubmap();
365 const_iterator begin() const {
366 const_iterator it(this, 0,
367 subMaps_[0].load(std::memory_order_relaxed)->begin());
368 it.checkAdvanceToNextSubmap();
376 const_iterator end() const {
377 return const_iterator();
380 /* Advanced functions for direct access: */
382 inline uint32_t recToIdx(const value_type& r, bool mayInsert = true) {
383 SimpleRetT ret = mayInsert ?
384 insertInternal(r.first, r.second) : findInternal(r.first);
385 return encodeIndex(ret.i, ret.j);
388 inline uint32_t recToIdx(value_type&& r, bool mayInsert = true) {
389 SimpleRetT ret = mayInsert ?
390 insertInternal(r.first, std::move(r.second)) : findInternal(r.first);
391 return encodeIndex(ret.i, ret.j);
394 inline uint32_t recToIdx(key_type k, const mapped_type& v,
395 bool mayInsert = true) {
396 SimpleRetT ret = mayInsert ? insertInternal(k, v) : findInternal(k);
397 return encodeIndex(ret.i, ret.j);
400 inline uint32_t recToIdx(key_type k, mapped_type&& v, bool mayInsert = true) {
401 SimpleRetT ret = mayInsert ?
402 insertInternal(k, std::move(v)) : findInternal(k);
403 return encodeIndex(ret.i, ret.j);
406 inline uint32_t keyToIdx(const KeyT k, bool mayInsert = false) {
407 return recToIdx(value_type(k), mayInsert);
410 inline const value_type& idxToRec(uint32_t idx) const {
411 SimpleRetT ret = findAtInternal(idx);
412 return subMaps_[ret.i].load(std::memory_order_relaxed)->idxToRec(ret.j);
415 /* Private data and helper functions... */
418 // This limits primary submap size to 2^31 ~= 2 billion, secondary submap
419 // size to 2^(32 - kNumSubMapBits_ - 1) = 2^27 ~= 130 million, and num subMaps
420 // to 2^kNumSubMapBits_ = 16.
421 static const uint32_t kNumSubMapBits_ = 4;
422 static const uint32_t kSecondaryMapBit_ = 1u << 31; // Highest bit
423 static const uint32_t kSubMapIndexShift_ = 32 - kNumSubMapBits_ - 1;
424 static const uint32_t kSubMapIndexMask_ = (1 << kSubMapIndexShift_) - 1;
425 static const uint32_t kNumSubMaps_ = 1 << kNumSubMapBits_;
426 static const uintptr_t kLockedPtr_ = 0x88ULL << 48; // invalid pointer
428 struct SimpleRetT { uint32_t i; size_t j; bool success;
429 SimpleRetT(uint32_t ii, size_t jj, bool s) : i(ii), j(jj), success(s) {}
430 SimpleRetT() = default;
433 template <typename LookupKeyT = key_type,
434 typename LookupHashFcn = hasher,
435 typename LookupEqualFcn = key_equal,
436 typename LookupKeyToKeyFcn = key_convert,
438 SimpleRetT insertInternal(LookupKeyT key, ArgTs&&... value);
440 template <typename LookupKeyT = key_type,
441 typename LookupHashFcn = hasher,
442 typename LookupEqualFcn = key_equal>
443 SimpleRetT findInternal(const LookupKeyT k) const;
445 SimpleRetT findAtInternal(uint32_t idx) const;
447 std::atomic<SubMap*> subMaps_[kNumSubMaps_];
448 std::atomic<uint32_t> numMapsAllocated_;
450 inline bool tryLockMap(int idx) {
451 SubMap* val = nullptr;
452 return subMaps_[idx].compare_exchange_strong(val, (SubMap*)kLockedPtr_,
453 std::memory_order_acquire);
456 static inline uint32_t encodeIndex(uint32_t subMap, uint32_t subMapIdx);
460 template <class KeyT,
462 class HashFcn = std::hash<KeyT>,
463 class EqualFcn = std::equal_to<KeyT>,
464 class Allocator = std::allocator<char>>
465 using QuadraticProbingAtomicHashMap =
471 AtomicHashArrayQuadraticProbeFcn>;
474 #include <folly/AtomicHashMap-inl.h>