folly/concurrency/ConcurrentHashMap.h

   1 /*
   2  * Copyright 2017-present Facebook, Inc.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *   http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16 #pragma once
  17
  18 #include <folly/Optional.h>
  19 #include <folly/concurrency/detail/ConcurrentHashMap-detail.h>
  20 #include <folly/experimental/hazptr/hazptr.h>
  21 #include <atomic>
  22 #include <mutex>
  23
  24 namespace folly {
  25
  26 /**
  27  * Based on Java's ConcurrentHashMap
  28  *
  29  * Readers are always wait-free.
  30  * Writers are sharded, but take a lock.
  31  *
  32  * The interface is as close to std::unordered_map as possible, but there
  33  * are a handful of changes:
  34  *
  35  * * Iterators hold hazard pointers to the returned elements.  Elements can only
  36  *   be accessed while Iterators are still valid!
  37  *
  38  * * Therefore operator[] and at() return copies, since they do not
  39  *   return an iterator.  The returned value is const, to remind you
  40  *   that changes do not affect the value in the map.
  41  *
  42  * * erase() calls the hash function, and may fail if the hash
  43  *   function throws an exception.
  44  *
  45  * * clear() initializes new segments, and is not noexcept.
  46  *
  47  * * The interface adds assign_if_equal, since find() doesn't take a lock.
  48  *
  49  * * Only const version of find() is supported, and const iterators.
  50  *   Mutation must use functions provided, like assign().
  51  *
  52  * * iteration iterates over all the buckets in the table, unlike
  53  *   std::unordered_map which iterates over a linked list of elements.
  54  *   If the table is sparse, this may be more expensive.
  55  *
  56  * * rehash policy is a power of two, using supplied factor.
  57  *
  58  * * Allocator must be stateless.
  59  *
  60  * * ValueTypes without copy constructors will work, but pessimize the
  61  *   implementation.
  62  *
  63  * Comparisons:
  64  *      Single-threaded performance is extremely similar to std::unordered_map.
  65  *
  66  *      Multithreaded performance beats anything except the lock-free
  67  *           atomic maps (AtomicUnorderedMap, AtomicHashMap), BUT only
  68  *           if you can perfectly size the atomic maps, and you don't
  69  *           need erase().  If you don't know the size in advance or
  70  *           your workload frequently calls erase(), this is the
  71  *           better choice.
  72  */
  73
  74 template <
  75     typename KeyType,
  76     typename ValueType,
  77     typename HashFn = std::hash<KeyType>,
  78     typename KeyEqual = std::equal_to<KeyType>,
  79     typename Allocator = std::allocator<uint8_t>,
  80     uint8_t ShardBits = 8,
  81     template <typename> class Atom = std::atomic,
  82     class Mutex = std::mutex>
  83 class ConcurrentHashMap {
  84   using SegmentT = detail::ConcurrentHashMapSegment<
  85       KeyType,
  86       ValueType,
  87       ShardBits,
  88       HashFn,
  89       KeyEqual,
  90       Allocator,
  91       Atom,
  92       Mutex>;
  93   static constexpr uint64_t NumShards = (1 << ShardBits);
  94   // Slightly higher than 1.0, in case hashing to shards isn't
  95   // perfectly balanced, reserve(size) will still work without
  96   // rehashing.
  97   float load_factor_ = 1.05;
  98
  99  public:
 100   class ConstIterator;
 101
 102   typedef KeyType key_type;
 103   typedef ValueType mapped_type;
 104   typedef std::pair<const KeyType, ValueType> value_type;
 105   typedef std::size_t size_type;
 106   typedef HashFn hasher;
 107   typedef KeyEqual key_equal;
 108   typedef ConstIterator const_iterator;
 109
 110   /*
 111    * Construct a ConcurrentHashMap with 1 << ShardBits shards, size
 112    * and max_size given.  Both size and max_size will be rounded up to
 113    * the next power of two, if they are not already a power of two, so
 114    * that we can index in to Shards efficiently.
 115    *
 116    * Insertion functions will throw bad_alloc if max_size is exceeded.
 117    */
 118   explicit ConcurrentHashMap(size_t size = 8, size_t max_size = 0) {
 119     size_ = folly::nextPowTwo(size);
 120     if (max_size != 0) {
 121       max_size_ = folly::nextPowTwo(max_size);
 122     }
 123     CHECK(max_size_ == 0 || max_size_ >= size_);
 124     for (uint64_t i = 0; i < NumShards; i++) {
 125       segments_[i].store(nullptr, std::memory_order_relaxed);
 126     }
 127   }
 128
 129   ConcurrentHashMap(ConcurrentHashMap&& o) noexcept {
 130     for (uint64_t i = 0; i < NumShards; i++) {
 131       segments_[i].store(
 132           o.segments_[i].load(std::memory_order_relaxed),
 133           std::memory_order_relaxed);
 134       o.segments_[i].store(nullptr, std::memory_order_relaxed);
 135     }
 136   }
 137
 138   ConcurrentHashMap& operator=(ConcurrentHashMap&& o) {
 139     for (uint64_t i = 0; i < NumShards; i++) {
 140       auto seg = segments_[i].load(std::memory_order_relaxed);
 141       if (seg) {
 142         seg->~SegmentT();
 143         Allocator().deallocate((uint8_t*)seg, sizeof(SegmentT));
 144       }
 145       segments_[i].store(
 146           o.segments_[i].load(std::memory_order_relaxed),
 147           std::memory_order_relaxed);
 148       o.segments_[i].store(nullptr, std::memory_order_relaxed);
 149     }
 150     return *this;
 151   }
 152
 153   ~ConcurrentHashMap() {
 154     for (uint64_t i = 0; i < NumShards; i++) {
 155       auto seg = segments_[i].load(std::memory_order_relaxed);
 156       if (seg) {
 157         seg->~SegmentT();
 158         Allocator().deallocate((uint8_t*)seg, sizeof(SegmentT));
 159       }
 160     }
 161   }
 162
 163   bool empty() const noexcept {
 164     for (uint64_t i = 0; i < NumShards; i++) {
 165       auto seg = segments_[i].load(std::memory_order_acquire);
 166       if (seg) {
 167         if (!seg->empty()) {
 168           return false;
 169         }
 170       }
 171     }
 172     return true;
 173   }
 174
 175   ConstIterator find(const KeyType& k) const {
 176     auto segment = pickSegment(k);
 177     ConstIterator res(this, segment);
 178     auto seg = segments_[segment].load(std::memory_order_acquire);
 179     if (!seg || !seg->find(res.it_, k)) {
 180       res.segment_ = NumShards;
 181     }
 182     return res;
 183   }
 184
 185   ConstIterator cend() const noexcept {
 186     return ConstIterator(NumShards);
 187   }
 188
 189   ConstIterator cbegin() const noexcept {
 190     return ConstIterator(this);
 191   }
 192
 193   std::pair<ConstIterator, bool> insert(
 194       std::pair<key_type, mapped_type>&& foo) {
 195     auto segment = pickSegment(foo.first);
 196     std::pair<ConstIterator, bool> res(
 197         std::piecewise_construct,
 198         std::forward_as_tuple(this, segment),
 199         std::forward_as_tuple(false));
 200     res.second = ensureSegment(segment)->insert(res.first.it_, std::move(foo));
 201     return res;
 202   }
 203
 204   template <typename Key, typename Value>
 205   std::pair<ConstIterator, bool> insert(Key&& k, Value&& v) {
 206     auto segment = pickSegment(k);
 207     std::pair<ConstIterator, bool> res(
 208         std::piecewise_construct,
 209         std::forward_as_tuple(this, segment),
 210         std::forward_as_tuple(false));
 211     res.second = ensureSegment(segment)->insert(
 212         res.first.it_, std::forward<Key>(k), std::forward<Value>(v));
 213     return res;
 214   }
 215
 216   template <typename Key, typename... Args>
 217   std::pair<ConstIterator, bool> try_emplace(Key&& k, Args&&... args) {
 218     auto segment = pickSegment(k);
 219     std::pair<ConstIterator, bool> res(
 220         std::piecewise_construct,
 221         std::forward_as_tuple(this, segment),
 222         std::forward_as_tuple(false));
 223     res.second = ensureSegment(segment)->try_emplace(
 224         res.first.it_, std::forward<Key>(k), std::forward<Args>(args)...);
 225     return res;
 226   }
 227
 228   template <typename... Args>
 229   std::pair<ConstIterator, bool> emplace(Args&&... args) {
 230     using Node = typename SegmentT::Node;
 231     auto node = (Node*)Allocator().allocate(sizeof(Node));
 232     new (node) Node(std::forward<Args>(args)...);
 233     auto segment = pickSegment(node->getItem().first);
 234     std::pair<ConstIterator, bool> res(
 235         std::piecewise_construct,
 236         std::forward_as_tuple(this, segment),
 237         std::forward_as_tuple(false));
 238     res.second = ensureSegment(segment)->emplace(
 239         res.first.it_, node->getItem().first, node);
 240     if (!res.second) {
 241       node->~Node();
 242       Allocator().deallocate((uint8_t*)node, sizeof(Node));
 243     }
 244     return res;
 245   }
 246
 247   template <typename Key, typename Value>
 248   std::pair<ConstIterator, bool> insert_or_assign(Key&& k, Value&& v) {
 249     auto segment = pickSegment(k);
 250     std::pair<ConstIterator, bool> res(
 251         std::piecewise_construct,
 252         std::forward_as_tuple(this, segment),
 253         std::forward_as_tuple(false));
 254     res.second = ensureSegment(segment)->insert_or_assign(
 255         res.first.it_, std::forward<Key>(k), std::forward<Value>(v));
 256     return res;
 257   }
 258
 259   template <typename Key, typename Value>
 260   folly::Optional<ConstIterator> assign(Key&& k, Value&& v) {
 261     auto segment = pickSegment(k);
 262     ConstIterator res(this, segment);
 263     auto seg = segments_[segment].load(std::memory_order_acquire);
 264     if (!seg) {
 265       return folly::Optional<ConstIterator>();
 266     } else {
 267       auto r =
 268           seg->assign(res.it_, std::forward<Key>(k), std::forward<Value>(v));
 269       if (!r) {
 270         return folly::Optional<ConstIterator>();
 271       }
 272     }
 273     return res;
 274   }
 275
 276   // Assign to desired if and only if key k is equal to expected
 277   template <typename Key, typename Value>
 278   folly::Optional<ConstIterator>
 279   assign_if_equal(Key&& k, const ValueType& expected, Value&& desired) {
 280     auto segment = pickSegment(k);
 281     ConstIterator res(this, segment);
 282     auto seg = segments_[segment].load(std::memory_order_acquire);
 283     if (!seg) {
 284       return folly::Optional<ConstIterator>();
 285     } else {
 286       auto r = seg->assign_if_equal(
 287           res.it_,
 288           std::forward<Key>(k),
 289           expected,
 290           std::forward<Value>(desired));
 291       if (!r) {
 292         return folly::Optional<ConstIterator>();
 293       }
 294     }
 295     return res;
 296   }
 297
 298   // Copying wrappers around insert and find.
 299   // Only available for copyable types.
 300   const ValueType operator[](const KeyType& key) {
 301     auto item = insert(key, ValueType());
 302     return item.first->second;
 303   }
 304
 305   const ValueType at(const KeyType& key) const {
 306     auto item = find(key);
 307     if (item == cend()) {
 308       throw std::out_of_range("at(): value out of range");
 309     }
 310     return item->second;
 311   }
 312
 313   // TODO update assign interface, operator[], at
 314
 315   size_type erase(const key_type& k) {
 316     auto segment = pickSegment(k);
 317     auto seg = segments_[segment].load(std::memory_order_acquire);
 318     if (!seg) {
 319       return 0;
 320     } else {
 321       return seg->erase(k);
 322     }
 323   }
 324
 325   // Calls the hash function, and therefore may throw.
 326   ConstIterator erase(ConstIterator& pos) {
 327     auto segment = pickSegment(pos->first);
 328     ConstIterator res(this, segment);
 329     res.next();
 330     ensureSegment(segment)->erase(res.it_, pos.it_);
 331     res.next(); // May point to segment end, and need to advance.
 332     return res;
 333   }
 334
 335   // NOT noexcept, initializes new shard segments vs.
 336   void clear() {
 337     for (uint64_t i = 0; i < NumShards; i++) {
 338       auto seg = segments_[i].load(std::memory_order_acquire);
 339       if (seg) {
 340         seg->clear();
 341       }
 342     }
 343   }
 344
 345   void reserve(size_t count) {
 346     count = count >> ShardBits;
 347     for (uint64_t i = 0; i < NumShards; i++) {
 348       auto seg = segments_[i].load(std::memory_order_acquire);
 349       if (seg) {
 350         seg->rehash(count);
 351       }
 352     }
 353   }
 354
 355   // This is a rolling size, and is not exact at any moment in time.
 356   size_t size() const noexcept {
 357     size_t res = 0;
 358     for (uint64_t i = 0; i < NumShards; i++) {
 359       auto seg = segments_[i].load(std::memory_order_acquire);
 360       if (seg) {
 361         res += seg->size();
 362       }
 363     }
 364     return res;
 365   }
 366
 367   float max_load_factor() const {
 368     return load_factor_;
 369   }
 370
 371   void max_load_factor(float factor) {
 372     for (uint64_t i = 0; i < NumShards; i++) {
 373       auto seg = segments_[i].load(std::memory_order_acquire);
 374       if (seg) {
 375         seg->max_load_factor(factor);
 376       }
 377     }
 378   }
 379
 380   class ConstIterator {
 381    public:
 382     friend class ConcurrentHashMap;
 383
 384     const value_type& operator*() const {
 385       return *it_;
 386     }
 387
 388     const value_type* operator->() const {
 389       return &*it_;
 390     }
 391
 392     ConstIterator& operator++() {
 393       it_++;
 394       next();
 395       return *this;
 396     }
 397
 398     ConstIterator operator++(int) {
 399       auto prev = *this;
 400       ++*this;
 401       return prev;
 402     }
 403
 404     bool operator==(const ConstIterator& o) const {
 405       return it_ == o.it_ && segment_ == o.segment_;
 406     }
 407
 408     bool operator!=(const ConstIterator& o) const {
 409       return !(*this == o);
 410     }
 411
 412     ConstIterator& operator=(const ConstIterator& o) {
 413       it_ = o.it_;
 414       segment_ = o.segment_;
 415       return *this;
 416     }
 417
 418     ConstIterator(const ConstIterator& o) {
 419       it_ = o.it_;
 420       segment_ = o.segment_;
 421     }
 422
 423     ConstIterator(const ConcurrentHashMap* parent, uint64_t segment)
 424         : segment_(segment), parent_(parent) {}
 425
 426    private:
 427     // cbegin iterator
 428     explicit ConstIterator(const ConcurrentHashMap* parent)
 429         : it_(parent->ensureSegment(0)->cbegin()),
 430           segment_(0),
 431           parent_(parent) {
 432       // Always iterate to the first element, could be in any shard.
 433       next();
 434     }
 435
 436     // cend iterator
 437     explicit ConstIterator(uint64_t shards) : it_(nullptr), segment_(shards) {}
 438
 439     void next() {
 440       while (it_ == parent_->ensureSegment(segment_)->cend() &&
 441              segment_ < parent_->NumShards) {
 442         segment_++;
 443         auto seg = parent_->segments_[segment_].load(std::memory_order_acquire);
 444         if (segment_ < parent_->NumShards) {
 445           if (!seg) {
 446             continue;
 447           }
 448           it_ = seg->cbegin();
 449         }
 450       }
 451     }
 452
 453     typename SegmentT::Iterator it_;
 454     uint64_t segment_;
 455     const ConcurrentHashMap* parent_;
 456   };
 457
 458  private:
 459   uint64_t pickSegment(const KeyType& k) const {
 460     auto h = HashFn()(k);
 461     // Use the lowest bits for our shard bits.
 462     //
 463     // This works well even if the hash function is biased towards the
 464     // low bits: The sharding will happen in the segments_ instead of
 465     // in the segment buckets, so we'll still get write sharding as
 466     // well.
 467     //
 468     // Low-bit bias happens often for std::hash using small numbers,
 469     // since the integer hash function is the identity function.
 470     return h & (NumShards - 1);
 471   }
 472
 473   SegmentT* ensureSegment(uint64_t i) const {
 474     SegmentT* seg = segments_[i].load(std::memory_order_acquire);
 475     if (!seg) {
 476       SegmentT* newseg = (SegmentT*)Allocator().allocate(sizeof(SegmentT));
 477       newseg = new (newseg)
 478           SegmentT(size_ >> ShardBits, load_factor_, max_size_ >> ShardBits);
 479       if (!segments_[i].compare_exchange_strong(seg, newseg)) {
 480         // seg is updated with new value, delete ours.
 481         newseg->~SegmentT();
 482         Allocator().deallocate((uint8_t*)newseg, sizeof(SegmentT));
 483       } else {
 484         seg = newseg;
 485       }
 486     }
 487     return seg;
 488   }
 489
 490   mutable Atom<SegmentT*> segments_[NumShards];
 491   size_t size_{0};
 492   size_t max_size_{0};
 493 };
 494
 495 } // namespace folly