cds/container/impl/multilevel_hashset.h

   1 //$$CDS-header$$
   2
   3 #ifndef CDSLIB_CONTAINER_IMPL_MULTILEVEL_HASHSET_H
   4 #define CDSLIB_CONTAINER_IMPL_MULTILEVEL_HASHSET_H
   5
   6 #include <cds/intrusive/impl/multilevel_hashset.h>
   7 #include <cds/container/details/multilevel_hashset_base.h>
   8
   9 namespace cds { namespace container {
  10
  11     /// Hash set based on multi-level array
  12     /** @ingroup cds_nonintrusive_set
  13         @anchor cds_container_MultilevelHashSet_hp
  14
  15         Source:
  16         - [2013] Steven Feldman, Pierre LaBorde, Damian Dechev "Concurrent Multi-level Arrays:
  17                  Wait-free Extensible Hash Maps"
  18
  19         [From the paper] The hardest problem encountered while developing a parallel hash map is how to perform
  20         a global resize, the process of redistributing the elements in a hash map that occurs when adding new
  21         buckets. The negative impact of blocking synchronization is multiplied during a global resize, because all
  22         threads will be forced to wait on the thread that is performing the involved process of resizing the hash map
  23         and redistributing the elements. \p %MultilevelHashSet implementation avoids global resizes through new array
  24         allocation. By allowing concurrent expansion this structure is free from the overhead of an explicit resize,
  25         which facilitates concurrent operations.
  26
  27         The presented design includes dynamic hashing, the use of sub-arrays within the hash map data structure;
  28         which, in combination with <b>perfect hashing</b>, means that each element has a unique final, as well as current, position.
  29         It is important to note that the perfect hash function required by our hash map is trivial to realize as
  30         any hash function that permutes the bits of the key is suitable. This is possible because of our approach
  31         to the hash function; we require that it produces hash values that are equal in size to that of the key.
  32         We know that if we expand the hash map a fixed number of times there can be no collision as duplicate keys
  33         are not provided for in the standard semantics of a hash map.
  34
  35         \p %MultiLevelHashSet is a multi-level array which has an internal structure similar to a tree:
  36         @image html multilevel_hashset.png
  37         The multi-level array differs from a tree in that each position on the tree could hold an array of nodes or a single node.
  38         A position that holds a single node is a \p dataNode which holds the hash value of a key and the value that is associated\r
  39         with that key; it is a simple struct holding two variables. A \p dataNode in the multi-level array could be marked.\r
  40         A \p markedDataNode refers to a pointer to a \p dataNode that has been bitmarked at the least significant bit (LSB)\r
  41         of the pointer to the node. This signifies that this \p dataNode is contended. An expansion must occur at this node;\r
  42         any thread that sees this \p markedDataNode will try to replace it with an \p arrayNode; which is a position that holds\r
  43         an array of nodes. The pointer to an \p arrayNode is differentiated from that of a pointer to a \p dataNode by a bitmark\r
  44         on the second-least significant bit.\r
  45 \r
  46         \p %MultiLevelHashSet multi-level array is similar to a tree in that we keep a pointer to the root, which is a memory array\r
  47         called \p head. The length of the \p head memory array is unique, whereas every other \p arrayNode has a uniform length;\r
  48         a normal \p arrayNode has a fixed power-of-two length equal to the binary logarithm of a variable called \p arrayLength.\r
  49         The maximum depth of the tree, \p maxDepth, is the maximum number of pointers that must be followed to reach any node.\r
  50         We define \p currentDepth as the number of memory arrays that we need to traverse to reach the \p arrayNode on which\r
  51         we need to operate; this is initially one, because of \p head.\r
  52 \r
  53         That approach to the structure of the hash set uses an extensible hashing scheme; <b> the hash value is treated as a bit\r
  54         string</b> and rehash incrementally.\r
  55 \r
  56         @note Two important things you should keep in mind when you're using \p %MultiLevelHashSet:\r
  57         - all keys must be fixed-size. It means that you cannot use \p std::string as a key for \p %MultiLevelHashSet.\r
  58           Instead, for the strings you should use well-known hashing algorithms like <a href="https://en.wikipedia.org/wiki/Secure_Hash_Algorithm">SHA1, SHA2</a>,\r
  59           <a href="https://en.wikipedia.org/wiki/MurmurHash">MurmurHash</a>, <a href="https://en.wikipedia.org/wiki/CityHash">CityHash</a> \r
  60           or its successor <a href="https://code.google.com/p/farmhash/">FarmHash</a> and so on, which\r
  61           converts variable-length strings to fixed-length bit-strings, and use that hash as a key in \p %MultiLevelHashSet.\r
  62         - \p %MultiLevelHashSet uses a perfect hashing. It means that if two different keys, for example, of type \p std::string,\r
  63           have identical hash then you cannot insert both that keys in the set. \p %MultiLevelHashSet does not maintain the key,\r
  64           it maintains its fixed-size hash value.\r
  65 \r
  66         The set supports @ref cds_container_MultilevelHashSet_iterators "bidirectional thread-safe iterators".\r
  67 \r
  68         Template parameters:\r
  69         - \p GC - safe memory reclamation schema. Can be \p gc::HP, \p gc::DHP or one of \ref cds_urcu_type "RCU type"\r
  70         - \p T - a value type to be stored in the set\r
  71         - \p Traits - type traits, the structure based on \p multilevel_hashset::traits or result of \p multilevel_hashset::make_traits metafunction.\r
  72             \p Traits is the mandatory argument because it has one mandatory type - an @ref multilevel_hashset::traits::hash_accessor "accessor" \r
  73             to hash value of \p T. The set algorithm does not calculate that hash value.\r
  74 \r
  75         There are several specializations of \p %MultiLevelHashSet for each \p GC. You should include:
  76         - <tt><cds/container/multilevel_hashset_hp.h></tt> for \p gc::HP garbage collector
  77         - <tt><cds/container/multilevel_hashset_dhp.h></tt> for \p gc::DHP garbage collector
  78         - <tt><cds/container/multilevel_hashset_rcu.h></tt> for \ref cds_intrusive_MultiLevelHashSet_rcu "RCU type". RCU specialization
  79             has a slightly different interface.
  80     */
  81     template <
  82         class GC
  83         , typename T
  84 #ifdef CDS_DOXYGEN_INVOKED
  85         , class Traits = multilevel_hashset::traits
  86 #else
  87         , class Traits
  88 #endif
  89     >
  90     class MultiLevelHashSet
  91 #ifdef CDS_DOXYGEN_INVOKED
  92         : protected cds::intrusive::MultiLevelHashSet< GC, T, Traits >
  93 #else
  94         : protected cds::container::details::make_multilevel_hashset< GC, T, Traits >::type
  95 #endif
  96     {
  97         //@cond
  98         typedef cds::container::details::make_multilevel_hashset< GC, T, Traits > maker;
  99         typedef typename maker::type base_class;
 100         //@endcond
 101
 102     public:
 103         typedef GC      gc;         ///< Garbage collector
 104         typedef T       value_type; ///< type of value stored in the set
 105         typedef Traits  traits;     ///< Traits template parameter, see \p multilevel_hashset::traits
 106
 107         typedef typename base_class::hash_accessor hash_accessor; ///< Hash accessor functor
 108         typedef typename base_class::hash_type hash_type; ///< Hash type deduced from \p hash_accessor return type
 109         typedef typename base_class::hash_comparator hash_comparator; ///< hash compare functor based on \p opt::compare and \p opt::less option setter
 110
 111         typedef typename traits::item_counter   item_counter;   ///< Item counter type
 112         typedef typename traits::allocator      allocator;      ///< Element allocator
 113         typedef typename traits::node_allocator node_allocator; ///< Array node allocator
 114         typedef typename traits::memory_model   memory_model;   ///< Memory model
 115         typedef typename traits::back_off       back_off;       ///< Backoff strategy
 116         typedef typename traits::stat           stat;           ///< Internal statistics type
 117
 118         typedef typename gc::template guarded_ptr< value_type > guarded_ptr; ///< Guarded pointer
 119
 120         /// Count of hazard pointers required
 121         static CDS_CONSTEXPR size_t const c_nHazardPtrCount = base_class::c_nHazardPtrCount;
 122
 123         typedef typename base_class::iterator               iterator;       ///< @ref cds_container_MultilevelHashSet_iterators "bidirectional iterator" type
 124         typedef typename base_class::const_iterator         const_iterator; ///< @ref cds_container_MultilevelHashSet_iterators "bidirectional const iterator" type
 125         typedef typename base_class::reverse_iterator       reverse_iterator;       ///< @ref cds_container_MultilevelHashSet_iterators "bidirectional reverse iterator" type
 126         typedef typename base_class::const_reverse_iterator const_reverse_iterator; ///< @ref cds_container_MultilevelHashSet_iterators "bidirectional reverse const iterator" type
 127
 128     protected:
 129         //@cond
 130         typedef typename maker::cxx_node_allocator cxx_node_allocator;
 131         typedef std::unique_ptr< value_type, typename maker::node_disposer > scoped_node_ptr;
 132         //@endcond
 133
 134     public:
 135         /// Creates empty set
 136         /**
 137             @param head_bits: 2<sup>head_bits</sup> specifies the size of head array, minimum is 4.
 138             @param array_bits: 2<sup>array_bits</sup> specifies the size of array node, minimum is 2.
 139
 140             Equation for \p head_bits and \p array_bits:
 141             \code
 142             sizeof(hash_type) * 8 == head_bits + N * array_bits
 143             \endcode
 144             where \p N is multi-level array depth.
 145         */
 146         MultiLevelHashSet( size_t head_bits = 8, size_t array_bits = 4 )
 147             : base_class( head_bits, array_bits )
 148         {}
 149
 150         /// Destructs the set and frees all data
 151         ~MultiLevelHashSet()
 152         {}
 153
 154         /// Inserts new element
 155         /**
 156             The function creates an element with copy of \p val value and then inserts it into the set.
 157
 158             The type \p Q should contain as minimum the complete key for the element.
 159             The object of \ref value_type should be constructible from a value of type \p Q.
 160             In trivial case, \p Q is equal to \ref value_type.
 161
 162             Returns \p true if \p val is inserted into the set, \p false otherwise.
 163         */
 164         template <typename Q>
 165         bool insert( Q const& val )
 166         {
 167             scoped_node_ptr sp( cxx_node_allocator().New( val ));
 168             if ( base_class::insert( *sp )) {
 169                 sp.release();
 170                 return true;
 171             }
 172             return false;
 173         }
 174
 175         /// Inserts new element
 176         /**
 177             The function allows to split creating of new item into two part:
 178             - create item with key only
 179             - insert new item into the set
 180             - if inserting is success, calls \p f functor to initialize value-fields of \p val.
 181
 182             The functor signature is:
 183             \code
 184                 void func( value_type& val );
 185             \endcode
 186             where \p val is the item inserted. User-defined functor \p f should guarantee that during changing
 187             \p val no any other changes could be made on this set's item by concurrent threads.
 188             The user-defined functor is called only if the inserting is success.
 189         */
 190         template <typename Q, typename Func>
 191         bool insert( Q const& val, Func f )
 192         {
 193             scoped_node_ptr sp( cxx_node_allocator().New( val ));
 194             if ( base_class::insert( *sp, f )) {
 195                 sp.release();
 196                 return true;
 197             }
 198             return false;
 199         }
 200
 201         /// Updates the element
 202         /**
 203             The operation performs inserting or changing data with lock-free manner.
 204
 205             If the \p val key not found in the set, then the new item created from \p val
 206             will be inserted into the set iff \p bInsert is \p true.
 207             Otherwise, if \p val is found, the functor \p func will be called with the item found.
 208
 209             The functor \p Func signature:
 210             \code
 211                 struct my_functor {
 212                     void operator()( bool bNew, value_type& item, const Q& val );
 213                 };
 214             \endcode
 215             where:
 216             - \p bNew - \p true if the item has been inserted, \p false otherwise
 217             - \p item - item of the set
 218             - \p val - argument \p key passed into the \p %update() function
 219
 220             The functor may change non-key fields of the \p item; however, \p func must guarantee
 221             that during changing no any other modifications could be made on this item by concurrent threads.
 222
 223             Returns <tt> std::pair<bool, bool> </tt> where \p first is \p true if operation is successfull,
 224             i.e. the item has been inserted or updated,
 225             \p second is \p true if the new item has been added or \p false if the item with key equal to \p val
 226             already exists.
 227
 228             @warning See \ref cds_intrusive_item_creating "insert item troubleshooting"
 229         */
 230         template <typename Q, typename Func>
 231         std::pair<bool, bool> update( const Q& val, Func func, bool bInsert = true )
 232         {
 233             scoped_node_ptr sp( cxx_node_allocator().New( val ));
 234             std::pair<bool, bool> bRes = base_class::update( *sp, func, bInsert );
 235             if ( bRes.first && bRes.second )
 236                 sp.release();
 237             return bRes;
 238         }
 239
 240         /// Inserts data of type \p value_type created in-place from <tt>std::forward<Args>(args)...</tt>
 241         /**
 242             Returns \p true if inserting successful, \p false otherwise.
 243         */
 244         template <typename... Args>
 245         bool emplace( Args&&... args )
 246         {
 247             scoped_node_ptr sp( cxx_node_allocator().New( std::forward<Args>(args)... ));
 248             if ( base_class::insert( *sp )) {
 249                 sp.release();
 250                 return true;
 251             }
 252             return false;
 253         }
 254
 255         /// Deletes the item from the set
 256         /**
 257             The function searches \p hash in the set,
 258             deletes the item found, and returns \p true.
 259             If that item is not found the function returns \p false.
 260         */
 261         bool erase( hash_type const& hash )
 262         {
 263             return base_class::erase( hash );
 264         }
 265
 266         /// Deletes the item from the set
 267         /**
 268             The function searches \p hash in the set,
 269             call \p f functor with item found, and deltes the element from the set.
 270
 271             The \p Func interface is
 272             \code
 273             struct functor {
 274                 void operator()( value_type& item );
 275             };
 276             \endcode
 277
 278             If \p hash is not found the function returns \p false.
 279         */
 280         template <typename Func>
 281         bool erase( hash_type const& hash, Func f )
 282         {
 283             return base_class::erase( hash, f );
 284         }
 285
 286         /// Deletes the item pointed by iterator \p iter
 287         /**
 288             Returns \p true if the operation is successful, \p false otherwise.
 289
 290             The function does not invalidate the iterator, it remains valid and can be used for further traversing.
 291         */
 292         bool erase_at( iterator const& iter )
 293         {
 294             return base_class::erase_at( iter );
 295         }
 296
 297         /// Extracts the item with specified \p hash
 298         /**
 299             The function searches \p hash in the set,
 300             unlinks it from the set, and returns an guarded pointer to the item extracted.
 301             If \p hash is not found the function returns an empty guarded pointer.
 302
 303             The item returned is reclaimed by garbage collector \p GC
 304             when returned \ref guarded_ptr object to be destroyed or released.
 305             @note Each \p guarded_ptr object uses the GC's guard that can be limited resource.
 306
 307             Usage:
 308             \code
 309             typedef cds::container::MultiLevelHashSet< your_template_args > my_set;
 310             my_set theSet;
 311             // ...
 312             {
 313                 my_set::guarded_ptr gp( theSet.extract( 5 ));
 314                 if ( gp ) {
 315                     // Deal with gp
 316                     // ...
 317                 }
 318                 // Destructor of gp releases internal HP guard
 319             }
 320             \endcode
 321         */
 322         guarded_ptr extract( hash_type const& hash )
 323         {
 324             return base_class::extract( hash );
 325         }
 326
 327         /// Finds an item by it's \p hash
 328         /**
 329             The function searches the item by \p hash and calls the functor \p f for item found.
 330             The interface of \p Func functor is:
 331             \code
 332             struct functor {
 333                 void operator()( value_type& item );
 334             };
 335             \endcode
 336             where \p item is the item found.
 337
 338             The functor may change non-key fields of \p item. Note that the functor is only guarantee
 339             that \p item cannot be disposed during the functor is executing.
 340             The functor does not serialize simultaneous access to the set's \p item. If such access is
 341             possible you must provide your own synchronization schema on item level to prevent unsafe item modifications.
 342
 343             The function returns \p true if \p hash is found, \p false otherwise.
 344         */
 345         template <typename Func>
 346         bool find( hash_type const& hash, Func f )
 347         {
 348             return base_class::find( hash, f );
 349         }
 350
 351         /// Checks whether the set contains \p hash
 352         /**
 353             The function searches the item by its \p hash
 354             and returns \p true if it is found, or \p false otherwise.
 355         */
 356         bool contains( hash_type const& hash )
 357         {
 358             return base_class::contains( hash );
 359         }
 360
 361         /// Finds an item by it's \p hash and returns the item found
 362         /**
 363             The function searches the item by its \p hash
 364             and returns the guarded pointer to the item found.
 365             If \p hash is not found the function returns an empty \p guarded_ptr.
 366
 367             @note Each \p guarded_ptr object uses one GC's guard which can be limited resource.
 368
 369             Usage:
 370             \code
 371             typedef cds::container::MultiLevelHashSet< your_template_params >  my_set;
 372             my_set theSet;
 373             // ...
 374             {
 375                 my_set::guarded_ptr gp( theSet.get( 5 ));
 376                 if ( theSet.get( 5 )) {
 377                     // Deal with gp
 378                     //...
 379                 }
 380                 // Destructor of guarded_ptr releases internal HP guard
 381             }
 382             \endcode
 383         */
 384         guarded_ptr get( hash_type const& hash )
 385         {
 386             return base_class::get( hash );
 387         }
 388
 389         /// Clears the set (non-atomic)
 390         /**
 391             The function unlink all data node from the set.
 392             The function is not atomic but is thread-safe.
 393             After \p %clear() the set may not be empty because another threads may insert items.
 394         */
 395         void clear()
 396         {
 397             base_class::clear();
 398         }
 399
 400         /// Checks if the set is empty
 401         /**
 402             Emptiness is checked by item counting: if item count is zero then the set is empty.
 403             Thus, the correct item counting feature is an important part of the set implementation.
 404         */
 405         bool empty() const
 406         {
 407             return base_class::empty();
 408         }
 409
 410         /// Returns item count in the set
 411         size_t size() const
 412         {
 413             return base_class::size();
 414         }
 415
 416         /// Returns const reference to internal statistics
 417         stat const& statistics() const
 418         {
 419             return base_class::statistics();
 420         }
 421
 422         /// Returns the size of head node
 423         size_t head_size() const
 424         {
 425             return base_class::head_size();
 426         }
 427
 428         /// Returns the size of the array node
 429         size_t array_node_size() const
 430         {
 431             return base_class::array_node_size();
 432         }
 433
 434     public:
 435     ///@name Thread-safe iterators
 436         /** @anchor cds_container_MultilevelHashSet_iterators
 437             The set supports thread-safe iterators: you may iterate over the set in multi-threaded environment.\r
 438             It is guaranteed that the iterators will remain valid even if another thread deletes the node the iterator points to:\r
 439             Hazard Pointer embedded into the iterator object protects the node from physical reclamation.\r
 440 \r
 441             @note Since the iterator object contains hazard pointer that is a thread-local resource,\r
 442             the iterator should not be passed to another thread.\r
 443 \r
 444             Each iterator object supports the common interface:\r
 445             - dereference operators:\r
 446                 @code\r
 447                 value_type [const] * operator ->() noexcept\r
 448                 value_type [const] & operator *() noexcept\r
 449                 @endcode\r
 450             - pre-increment and pre-decrement. Post-operators is not supported\r
 451             - equality operators <tt>==</tt> and <tt>!=</tt>.\r
 452                 Iterators are equal iff they point to the same cell of the same array node.
 453                 Note that for two iterators \p it1 and \p it2, the conditon <tt> it1 == it2 </tt>
 454                 does not entail <tt> &(*it1) == &(*it2) </tt>
 455             - helper member function \p release() that clears internal hazard pointer.\r
 456                 After \p release() call the iterator points to \p nullptr but it still remain valid: further iterating is possible.
 457         */
 458     ///@{
 459
 460         /// Returns an iterator to the beginning of the set
 461         iterator begin()
 462         {
 463             return base_class::begin();
 464         }
 465
 466         /// Returns an const iterator to the beginning of the set
 467         const_iterator begin() const
 468         {
 469             return base_class::begin();
 470         }
 471
 472         /// Returns an const iterator to the beginning of the set
 473         const_iterator cbegin()
 474         {
 475             return base_class::cbegin();
 476         }
 477
 478         /// Returns an iterator to the element following the last element of the set. This element acts as a placeholder; attempting to access it results in undefined behavior.
 479         iterator end()
 480         {
 481             return base_class::end();
 482         }
 483
 484         /// Returns a const iterator to the element following the last element of the set. This element acts as a placeholder; attempting to access it results in undefined behavior.
 485         const_iterator end() const
 486         {
 487             return base_class::end();
 488         }
 489
 490         /// Returns a const iterator to the element following the last element of the set. This element acts as a placeholder; attempting to access it results in undefined behavior.
 491         const_iterator cend()
 492         {
 493             return base_class::cend();
 494         }
 495
 496         /// Returns a reverse iterator to the first element of the reversed set
 497         reverse_iterator rbegin()
 498         {
 499             return base_class::rbegin();
 500         }
 501
 502         /// Returns a const reverse iterator to the first element of the reversed set
 503         const_reverse_iterator rbegin() const
 504         {
 505             return base_class::rbegin();
 506         }
 507
 508         /// Returns a const reverse iterator to the first element of the reversed set
 509         const_reverse_iterator crbegin()
 510         {
 511             return base_class::crbegin();
 512         }
 513
 514         /// Returns a reverse iterator to the element following the last element of the reversed set
 515         /**
 516             It corresponds to the element preceding the first element of the non-reversed container.
 517             This element acts as a placeholder, attempting to access it results in undefined behavior.
 518         */
 519         reverse_iterator rend()
 520         {
 521             return base_class::rend();
 522         }
 523
 524         /// Returns a const reverse iterator to the element following the last element of the reversed set
 525         /**
 526             It corresponds to the element preceding the first element of the non-reversed container.
 527             This element acts as a placeholder, attempting to access it results in undefined behavior.
 528         */
 529         const_reverse_iterator rend() const
 530         {
 531             return base_class::rend();
 532         }
 533
 534         /// Returns a const reverse iterator to the element following the last element of the reversed set
 535         /**
 536             It corresponds to the element preceding the first element of the non-reversed container.
 537             This element acts as a placeholder, attempting to access it results in undefined behavior.
 538         */
 539         const_reverse_iterator crend()
 540         {
 541             return base_class::crend();
 542         }
 543     ///@}
 544     };
 545
 546 }} // namespace cds::container
 547
 548 #endif // #ifndef CDSLIB_CONTAINER_IMPL_MULTILEVEL_HASHSET_H