folly/AtomicHashMap.h

   1 /*
   2  * Copyright 2017 Facebook, Inc.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *   http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 /*
  18  * AtomicHashMap --
  19  *
  20  * A high-performance concurrent hash map with int32 or int64 keys. Supports
  21  * insert, find(key), findAt(index), erase(key), size, and more.  Memory cannot
  22  * be freed or reclaimed by erase.  Can grow to a maximum of about 18 times the
  23  * initial capacity, but performance degrades linearly with growth. Can also be
  24  * used as an object store with unique 32-bit references directly into the
  25  * internal storage (retrieved with iterator::getIndex()).
  26  *
  27  * Advantages:
  28  *    - High-performance (~2-4x tbb::concurrent_hash_map in heavily
  29  *      multi-threaded environments).
  30  *    - Efficient memory usage if initial capacity is not over estimated
  31  *      (especially for small keys and values).
  32  *    - Good fragmentation properties (only allocates in large slabs which can
  33  *      be reused with clear() and never move).
  34  *    - Can generate unique, long-lived 32-bit references for efficient lookup
  35  *      (see findAt()).
  36  *
  37  * Disadvantages:
  38  *    - Keys must be native int32 or int64, or explicitly converted.
  39  *    - Must be able to specify unique empty, locked, and erased keys
  40  *    - Performance degrades linearly as size grows beyond initialization
  41  *      capacity.
  42  *    - Max size limit of ~18x initial size (dependent on max load factor).
  43  *    - Memory is not freed or reclaimed by erase.
  44  *
  45  * Usage and Operation Details:
  46  *   Simple performance/memory tradeoff with maxLoadFactor.  Higher load factors
  47  *   give better memory utilization but probe lengths increase, reducing
  48  *   performance.
  49  *
  50  * Implementation and Performance Details:
  51  *   AHArray is a fixed size contiguous block of value_type cells.  When
  52  *   writing a cell, the key is locked while the rest of the record is
  53  *   written.  Once done, the cell is unlocked by setting the key.  find()
  54  *   is completely wait-free and doesn't require any non-relaxed atomic
  55  *   operations.  AHA cannot grow beyond initialization capacity, but is
  56  *   faster because of reduced data indirection.
  57  *
  58  *   AHMap is a wrapper around AHArray sub-maps that allows growth and provides
  59  *   an interface closer to the STL UnorderedAssociativeContainer concept. These
  60  *   sub-maps are allocated on the fly and are processed in series, so the more
  61  *   there are (from growing past initial capacity), the worse the performance.
  62  *
  63  *   Insert returns false if there is a key collision and throws if the max size
  64  *   of the map is exceeded.
  65  *
  66  *   Benchmark performance with 8 simultaneous threads processing 1 million
  67  *   unique <int64, int64> entries on a 4-core, 2.5 GHz machine:
  68  *
  69  *     Load Factor   Mem Efficiency   usec/Insert   usec/Find
  70  *         50%             50%           0.19         0.05
  71  *         85%             85%           0.20         0.06
  72  *         90%             90%           0.23         0.08
  73  *         95%             95%           0.27         0.10
  74  *
  75  *   See folly/tests/AtomicHashMapTest.cpp for more benchmarks.
  76  *
  77  * @author Spencer Ahrens <sahrens@fb.com>
  78  * @author Jordan DeLong <delong.j@fb.com>
  79  *
  80  */
  81
  82 #pragma once
  83 #define FOLLY_ATOMICHASHMAP_H_
  84
  85 #include <boost/iterator/iterator_facade.hpp>
  86 #include <boost/noncopyable.hpp>
  87 #include <boost/type_traits/is_convertible.hpp>
  88
  89 #include <atomic>
  90 #include <functional>
  91 #include <stdexcept>
  92
  93 #include <folly/AtomicHashArray.h>
  94 #include <folly/Foreach.h>
  95 #include <folly/Hash.h>
  96 #include <folly/Likely.h>
  97 #include <folly/ThreadCachedInt.h>
  98
  99 namespace folly {
 100
 101 /*
 102  * AtomicHashMap provides an interface somewhat similar to the
 103  * UnorderedAssociativeContainer concept in C++.  This does not
 104  * exactly match this concept (or even the basic Container concept),
 105  * because of some restrictions imposed by our datastructure.
 106  *
 107  * Specific differences (there are quite a few):
 108  *
 109  * - Efficiently thread safe for inserts (main point of this stuff),
 110  *   wait-free for lookups.
 111  *
 112  * - You can erase from this container, but the cell containing the key will
 113  *   not be free or reclaimed.
 114  *
 115  * - You can erase everything by calling clear() (and you must guarantee only
 116  *   one thread can be using the container to do that).
 117  *
 118  * - We aren't DefaultConstructible, CopyConstructible, Assignable, or
 119  *   EqualityComparable.  (Most of these are probably not something
 120  *   you actually want to do with this anyway.)
 121  *
 122  * - We don't support the various bucket functions, rehash(),
 123  *   reserve(), or equal_range().  Also no constructors taking
 124  *   iterators, although this could change.
 125  *
 126  * - Several insertion functions, notably operator[], are not
 127  *   implemented.  It is a little too easy to misuse these functions
 128  *   with this container, where part of the point is that when an
 129  *   insertion happens for a new key, it will atomically have the
 130  *   desired value.
 131  *
 132  * - The map has no templated insert() taking an iterator range, but
 133  *   we do provide an insert(key, value).  The latter seems more
 134  *   frequently useful for this container (to avoid sprinkling
 135  *   make_pair everywhere), and providing both can lead to some gross
 136  *   template error messages.
 137  *
 138  * - The Allocator must not be stateful (a new instance will be spun up for
 139  *   each allocation), and its allocate() method must take a raw number of
 140  *   bytes.
 141  *
 142  * - KeyT must be a 32 bit or 64 bit atomic integer type, and you must
 143  *   define special 'locked' and 'empty' key values in the ctor
 144  *
 145  * - We don't take the Hash function object as an instance in the
 146  *   constructor.
 147  *
 148  */
 149
 150 // Thrown when insertion fails due to running out of space for
 151 // submaps.
 152 struct AtomicHashMapFullError : std::runtime_error {
 153   explicit AtomicHashMapFullError()
 154     : std::runtime_error("AtomicHashMap is full")
 155   {}
 156 };
 157
 158 template <
 159     class KeyT,
 160     class ValueT,
 161     class HashFcn,
 162     class EqualFcn,
 163     class Allocator,
 164     class ProbeFcn,
 165     class KeyConvertFcn>
 166 class AtomicHashMap : boost::noncopyable {
 167 typedef AtomicHashArray<KeyT, ValueT, HashFcn, EqualFcn,
 168                         Allocator, ProbeFcn, KeyConvertFcn>
 169     SubMap;
 170
 171  public:
 172   typedef KeyT                key_type;
 173   typedef ValueT              mapped_type;
 174   typedef std::pair<const KeyT, ValueT> value_type;
 175   typedef HashFcn             hasher;
 176   typedef EqualFcn            key_equal;
 177   typedef KeyConvertFcn       key_convert;
 178   typedef value_type*         pointer;
 179   typedef value_type&         reference;
 180   typedef const value_type&   const_reference;
 181   typedef std::ptrdiff_t      difference_type;
 182   typedef std::size_t         size_type;
 183   typedef typename SubMap::Config Config;
 184
 185   template <class ContT, class IterVal, class SubIt>
 186   struct ahm_iterator;
 187
 188   typedef ahm_iterator<const AtomicHashMap,
 189                        const value_type,
 190                        typename SubMap::const_iterator>
 191     const_iterator;
 192   typedef ahm_iterator<AtomicHashMap,
 193                        value_type,
 194                        typename SubMap::iterator>
 195     iterator;
 196
 197  public:
 198   const float kGrowthFrac_;  // How much to grow when we run out of capacity.
 199
 200   // The constructor takes a finalSizeEst which is the optimal
 201   // number of elements to maximize space utilization and performance,
 202   // and a Config object to specify more advanced options.
 203   explicit AtomicHashMap(size_t finalSizeEst, const Config& c = Config());
 204
 205   ~AtomicHashMap() {
 206     const unsigned int numMaps =
 207         numMapsAllocated_.load(std::memory_order_relaxed);
 208     FOR_EACH_RANGE (i, 0, numMaps) {
 209       SubMap* thisMap = subMaps_[i].load(std::memory_order_relaxed);
 210       DCHECK(thisMap);
 211       SubMap::destroy(thisMap);
 212     }
 213   }
 214
 215   key_equal key_eq() const { return key_equal(); }
 216   hasher hash_function() const { return hasher(); }
 217
 218   /*
 219    * insert --
 220    *
 221    *   Returns a pair with iterator to the element at r.first and
 222    *   success.  Retrieve the index with ret.first.getIndex().
 223    *
 224    *   Does not overwrite on key collision, but returns an iterator to
 225    *   the existing element (since this could due to a race with
 226    *   another thread, it is often important to check this return
 227    *   value).
 228    *
 229    *   Allocates new sub maps as the existing ones become full.  If
 230    *   all sub maps are full, no element is inserted, and
 231    *   AtomicHashMapFullError is thrown.
 232    */
 233   std::pair<iterator,bool> insert(const value_type& r) {
 234     return emplace(r.first, r.second);
 235   }
 236   std::pair<iterator,bool> insert(key_type k, const mapped_type& v) {
 237     return emplace(k, v);
 238   }
 239   std::pair<iterator,bool> insert(value_type&& r) {
 240     return emplace(r.first, std::move(r.second));
 241   }
 242   std::pair<iterator,bool> insert(key_type k, mapped_type&& v) {
 243     return emplace(k, std::move(v));
 244   }
 245
 246   /*
 247    * emplace --
 248    *
 249    *   Same contract as insert(), but performs in-place construction
 250    *   of the value type using the specified arguments.
 251    *
 252    *   Also, like find(), this method optionally allows 'key_in' to have a type
 253    *   different from that stored in the table; see find(). If and only if no
 254    *   equal key is already present, this method converts 'key_in' to a key of
 255    *   type KeyT using the provided LookupKeyToKeyFcn.
 256    */
 257   template <
 258       typename LookupKeyT = key_type,
 259       typename LookupHashFcn = hasher,
 260       typename LookupEqualFcn = key_equal,
 261       typename LookupKeyToKeyFcn = key_convert,
 262       typename... ArgTs>
 263   std::pair<iterator,bool> emplace(LookupKeyT k, ArgTs&&... vCtorArg);
 264
 265   /*
 266    * find --
 267    *
 268    *   Returns the iterator to the element if found, otherwise end().
 269    *
 270    *   As an optional feature, the type of the key to look up (LookupKeyT) is
 271    *   allowed to be different from the type of keys actually stored (KeyT).
 272    *
 273    *   This enables use cases where materializing the key is costly and usually
 274    *   redudant, e.g., canonicalizing/interning a set of strings and being able
 275    *   to look up by StringPiece. To use this feature, LookupHashFcn must take
 276    *   a LookupKeyT, and LookupEqualFcn must take KeyT and LookupKeyT as first
 277    *   and second parameter, respectively.
 278    *
 279    *   See folly/test/ArrayHashMapTest.cpp for sample usage.
 280    */
 281   template <
 282       typename LookupKeyT = key_type,
 283       typename LookupHashFcn = hasher,
 284       typename LookupEqualFcn = key_equal>
 285   iterator find(LookupKeyT k);
 286
 287   template <
 288       typename LookupKeyT = key_type,
 289       typename LookupHashFcn = hasher,
 290       typename LookupEqualFcn = key_equal>
 291   const_iterator find(LookupKeyT k) const;
 292
 293   /*
 294    * erase --
 295    *
 296    *   Erases key k from the map
 297    *
 298    *   Returns 1 iff the key is found and erased, and 0 otherwise.
 299    */
 300   size_type erase(key_type k);
 301
 302   /*
 303    * clear --
 304    *
 305    *   Wipes all keys and values from primary map and destroys all secondary
 306    *   maps.  Primary map remains allocated and thus the memory can be reused
 307    *   in place.  Not thread safe.
 308    *
 309    */
 310   void clear();
 311
 312   /*
 313    * size --
 314    *
 315    *  Returns the exact size of the map.  Note this is not as cheap as typical
 316    *  size() implementations because, for each AtomicHashArray in this AHM, we
 317    *  need to grab a lock and accumulate the values from all the thread local
 318    *  counters.  See folly/ThreadCachedInt.h for more details.
 319    */
 320   size_t size() const;
 321
 322   bool empty() const { return size() == 0; }
 323
 324   size_type count(key_type k) const {
 325     return find(k) == end() ? 0 : 1;
 326   }
 327
 328
 329   /*
 330    * findAt --
 331    *
 332    *   Returns an iterator into the map.
 333    *
 334    *   idx should only be an unmodified value returned by calling getIndex() on
 335    *   a valid iterator returned by find() or insert(). If idx is invalid you
 336    *   have a bug and the process aborts.
 337    */
 338   iterator findAt(uint32_t idx) {
 339     SimpleRetT ret = findAtInternal(idx);
 340     DCHECK_LT(ret.i, numSubMaps());
 341     return iterator(this, ret.i,
 342       subMaps_[ret.i].load(std::memory_order_relaxed)->makeIter(ret.j));
 343   }
 344   const_iterator findAt(uint32_t idx) const {
 345     return const_cast<AtomicHashMap*>(this)->findAt(idx);
 346   }
 347
 348   // Total capacity - summation of capacities of all submaps.
 349   size_t capacity() const;
 350
 351   // Number of new insertions until current submaps are all at max load factor.
 352   size_t spaceRemaining() const;
 353
 354   void setEntryCountThreadCacheSize(int32_t newSize) {
 355     const int numMaps = numMapsAllocated_.load(std::memory_order_acquire);
 356     for (int i = 0; i < numMaps; ++i) {
 357       SubMap* map = subMaps_[i].load(std::memory_order_relaxed);
 358       map->setEntryCountThreadCacheSize(newSize);
 359     }
 360   }
 361
 362   // Number of sub maps allocated so far to implement this map.  The more there
 363   // are, the worse the performance.
 364   int numSubMaps() const {
 365     return numMapsAllocated_.load(std::memory_order_acquire);
 366   }
 367
 368   iterator begin() {
 369     iterator it(this, 0,
 370       subMaps_[0].load(std::memory_order_relaxed)->begin());
 371     it.checkAdvanceToNextSubmap();
 372     return it;
 373   }
 374
 375   const_iterator begin() const {
 376     const_iterator it(this, 0,
 377       subMaps_[0].load(std::memory_order_relaxed)->begin());
 378     it.checkAdvanceToNextSubmap();
 379     return it;
 380   }
 381
 382   iterator end() {
 383     return iterator();
 384   }
 385
 386   const_iterator end() const {
 387     return const_iterator();
 388   }
 389
 390   /* Advanced functions for direct access: */
 391
 392   inline uint32_t recToIdx(const value_type& r, bool mayInsert = true) {
 393     SimpleRetT ret = mayInsert ?
 394       insertInternal(r.first, r.second) : findInternal(r.first);
 395     return encodeIndex(ret.i, ret.j);
 396   }
 397
 398   inline uint32_t recToIdx(value_type&& r, bool mayInsert = true) {
 399     SimpleRetT ret = mayInsert ?
 400       insertInternal(r.first, std::move(r.second)) : findInternal(r.first);
 401     return encodeIndex(ret.i, ret.j);
 402   }
 403
 404   inline uint32_t recToIdx(key_type k, const mapped_type& v,
 405     bool mayInsert = true) {
 406     SimpleRetT ret = mayInsert ? insertInternal(k, v) : findInternal(k);
 407     return encodeIndex(ret.i, ret.j);
 408   }
 409
 410   inline uint32_t recToIdx(key_type k, mapped_type&& v, bool mayInsert = true) {
 411     SimpleRetT ret = mayInsert ?
 412       insertInternal(k, std::move(v)) : findInternal(k);
 413     return encodeIndex(ret.i, ret.j);
 414   }
 415
 416   inline uint32_t keyToIdx(const KeyT k, bool mayInsert = false) {
 417     return recToIdx(value_type(k), mayInsert);
 418   }
 419
 420   inline const value_type& idxToRec(uint32_t idx) const {
 421     SimpleRetT ret = findAtInternal(idx);
 422     return subMaps_[ret.i].load(std::memory_order_relaxed)->idxToRec(ret.j);
 423   }
 424
 425   /* Private data and helper functions... */
 426
 427  private:
 428   // This limits primary submap size to 2^31 ~= 2 billion, secondary submap
 429   // size to 2^(32 - kNumSubMapBits_ - 1) = 2^27 ~= 130 million, and num subMaps
 430   // to 2^kNumSubMapBits_ = 16.
 431   static const uint32_t  kNumSubMapBits_     = 4;
 432   static const uint32_t  kSecondaryMapBit_   = 1u << 31; // Highest bit
 433   static const uint32_t  kSubMapIndexShift_  = 32 - kNumSubMapBits_ - 1;
 434   static const uint32_t  kSubMapIndexMask_   = (1 << kSubMapIndexShift_) - 1;
 435   static const uint32_t  kNumSubMaps_        = 1 << kNumSubMapBits_;
 436   static const uintptr_t kLockedPtr_         = 0x88ULL << 48; // invalid pointer
 437
 438   struct SimpleRetT { uint32_t i; size_t j; bool success;
 439     SimpleRetT(uint32_t ii, size_t jj, bool s) : i(ii), j(jj), success(s) {}
 440     SimpleRetT() = default;
 441   };
 442
 443   template <
 444       typename LookupKeyT = key_type,
 445       typename LookupHashFcn = hasher,
 446       typename LookupEqualFcn = key_equal,
 447       typename LookupKeyToKeyFcn = key_convert,
 448       typename... ArgTs>
 449   SimpleRetT insertInternal(LookupKeyT key, ArgTs&&... value);
 450
 451   template <
 452       typename LookupKeyT = key_type,
 453       typename LookupHashFcn = hasher,
 454       typename LookupEqualFcn = key_equal>
 455   SimpleRetT findInternal(const LookupKeyT k) const;
 456
 457   SimpleRetT findAtInternal(uint32_t idx) const;
 458
 459   std::atomic<SubMap*> subMaps_[kNumSubMaps_];
 460   std::atomic<uint32_t> numMapsAllocated_;
 461
 462   inline bool tryLockMap(unsigned int idx) {
 463     SubMap* val = nullptr;
 464     return subMaps_[idx].compare_exchange_strong(val, (SubMap*)kLockedPtr_,
 465       std::memory_order_acquire);
 466   }
 467
 468   static inline uint32_t encodeIndex(uint32_t subMap, uint32_t subMapIdx);
 469
 470 }; // AtomicHashMap
 471
 472 template <
 473     class KeyT,
 474     class ValueT,
 475     class HashFcn = std::hash<KeyT>,
 476     class EqualFcn = std::equal_to<KeyT>,
 477     class Allocator = std::allocator<char>>
 478 using QuadraticProbingAtomicHashMap =
 479     AtomicHashMap<KeyT,
 480                   ValueT,
 481                   HashFcn,
 482                   EqualFcn,
 483                   Allocator,
 484                   AtomicHashArrayQuadraticProbeFcn>;
 485 } // namespace folly
 486
 487 #include <folly/AtomicHashMap-inl.h>