cds/urcu/details/base.h

   1 //$$CDS-header$$
   2
   3 #ifndef CDSLIB_URCU_DETAILS_BASE_H
   4 #define CDSLIB_URCU_DETAILS_BASE_H
   5
   6 #include <cds/algo/atomic.h>
   7 #include <cds/gc/details/retired_ptr.h>
   8 #include <cds/details/allocator.h>
   9 #include <cds/os/thread.h>
  10 #include <cds/details/marked_ptr.h>
  11
  12 namespace cds {
  13     /// User-space Read-Copy Update (URCU) namespace
  14     /** @ingroup cds_garbage_collector
  15         @anchor cds_urcu_desc
  16
  17         This namespace contains declarations for different types of Read-Copy Update (%RCU)
  18         synchronization primitive and data structures developed for RCU.
  19         In <b>libcds</b> %RCU is used as garbage collector.
  20
  21         <b>Source papers</b>:
  22         - [2009] M.Desnoyers "Low-Impact Operating System Tracing" PhD Thesis,
  23           Chapter 6 "User-Level Implementations of Read-Copy Update"
  24         - [2011] M.Desnoyers, P.McKenney, A.Stern, M.Dagenias, J.Walpole "User-Level
  25           Implementations of Read-Copy Update"
  26
  27         <b>Informal introduction to user-space %RCU</b>
  28
  29         [<i>From Desnoyer's papers</i>] %RCU is a synchronization mechanism that was added to the
  30         Linux kernel in October of 2002. %RCU achieves scalability improvements by allowing
  31         reads to occur concurrently with updates. In contrast to conventional locking
  32         primitives that ensure mutual exclusion among concurrent threads regardless of whether
  33         they be readers or updaters, or with reader-writer locks that allow concurrent reads
  34         but not in the presence of updates, %RCU supports concurrency between multiple updaters
  35         and multiple readers. %RCU ensures that data are not freed up until all pre-existing
  36         critical sections complete. %RCU defines and uses efficient and scalable mechanisms
  37         for deferring reclamation of old data. These mechanisms distribute the work among read and update
  38         paths in such a way as to make read paths extremely fast.
  39
  40         %RCU readers execute within %RCU read-side critical sections. Each such critical section begins with
  41         \p rcu_read_lock(), ends with \p rcu_read_unlock() (in \p libcds these primitives are the methods of
  42         GC class and are usually called \p access_lock and \p access_unlock respectively). Read-side
  43         critical sections can be nested.
  44         The performance benefits of %RCU are due to the fact that \p rcu_read_lock()
  45         and \p rcu_read_unlock() are exceedingly fast.
  46
  47         When a thread is not in an %RCU read-side critical section, it is in a quiescent state.
  48         A quiescent state that persists for a significant time period is an extended quiescent state.
  49         Any time period during which every thread has been in at least one quiescent state
  50         is a grace period; this implies that every %RCU read-side critical section
  51         that starts before a grace period must end before that grace period does.
  52         Distinct grace periods may overlap, either partially or completely. Any time period
  53         that includes a grace period is by definition itself a grace period.
  54         Each grace period is guaranteed to complete as long as all read-side critical sections
  55         are finite in duration; thus even a constant flow of such critical sections is unable to
  56         extend an %RCU grace period indefinitely.
  57
  58         Suppose that readers enclose each of their data-structure traversals in
  59         an %RCU read-side critical section. If an updater first removes an element
  60         from such a data structure and then waits for a grace period, there can be
  61         no more readers accessing that element. The updater can then carry out destructive
  62         operations, for example freeing the element, without disturbing any readers.
  63
  64         The %RCU update is split into two phases, a removal phase and a reclamation phase.
  65         These two phases must be separated by a grace period, for example via the \p synchronize_rcu()
  66         primitive, which initiates a grace period and waits for it to finish.
  67         During the removal phase, the %RCU update removes elements from a shared data structure.
  68         The removed data elements will only be accessible to read-side critical sections
  69         that ran concurrently with the removal phase, which are guaranteed to complete before the
  70         grace period ends. Therefore the reclamation phase can safely free the data elements
  71         removed by the removal phase.
  72
  73         Desnoyers describes several classes of user-space %RCU implementations:
  74         - The Quiescent-State-Based Reclamation (QSBR) %RCU implementation offers
  75           the best possible read-side performance, but requires that each thread periodically
  76           calls a function to announce that it is in a quiescent state, thus strongly
  77           constraining the application design. This type of %RCU is not implemented in \p libcds.
  78         - The general-purpose %RCU implementation places almost no constraints on the application\92s
  79           design, thus being appropriate for use within a general-purpose library, but it has
  80           relatively higher read-side overhead. The \p libcds contains several implementations of general-purpose
  81           %RCU: \ref general_instant, \ref general_buffered, \ref general_threaded.
  82         - The signal-handling %RCU presents an implementation having low read-side overhead and
  83           requiring only that the application give up one POSIX signal to %RCU update processing.
  84           The \p libcds contains several implementations if signal-handling %RCU: \ref signal_buffered,
  85           \ref signal_threaded.
  86
  87     @note The signal-handled %RCU is defined only for UNIX-like systems, not for Windows.
  88
  89     @anchor cds_urcu_type
  90     <b>RCU implementation type</b>
  91
  92         There are several internal implementation of RCU (all declared in \p %cds::urcu namespace):
  93         - \ref general_instant - general purpose RCU with immediate reclamation
  94         - \ref general_buffered - general purpose RCU with deferred (buffered) reclamation
  95         - \ref general_threaded - general purpose RCU with special reclamation thread
  96         - \ref signal_buffered - signal-handling RCU with deferred (buffered) reclamation
  97         - \ref signal_threaded - signal-handling RCU with special reclamation thread
  98
  99         You cannot create an object of any of those classes directly.
 100         Instead, you should use wrapper classes.
 101         The wrapper simplifies creation and usage of RCU singleton objects
 102         and has the reacher interface that combines interfaces of wrapped class i.e. RCU global part like
 103         \p synchronize, and corresponding RCU thread-specific interface like \p access_lock, \p access_unlock and \p retire_ptr.
 104
 105     @anchor cds_urcu_gc
 106     There are several wrapper classes (all declared in \p %cds::urcu namespace)
 107         - \ref cds_urcu_general_instant_gc "gc<general_instant>" - general purpose RCU with immediate reclamation,
 108             include file <tt><cds/urcu/general_instant.h></tt>
 109         - \ref cds_urcu_general_buffered_gc "gc<general_buffered>" - general purpose RCU with deferred (buffered) reclamation,
 110             include file <tt><cds/urcu/general_buffered.h></tt>
 111         - \ref cds_urcu_general_threaded_gc "gc<general_threaded>" - general purpose RCU with special reclamation thread
 112             include file <tt><cds/urcu/general_threaded.h></tt>
 113         - \ref cds_urcu_signal_buffered_gc "gc<signal_buffered>" - signal-handling RCU with deferred (buffered) reclamation
 114             include file <tt><cds/urcu/signal_buffered.h></tt>
 115         - \ref cds_urcu_signal_threaded_gc "gc<signal_threaded>" - signal-handling RCU with special reclamation thread
 116             include file <tt><cds/urcu/signal_threaded.h></tt>
 117
 118         Any RCU-related container in \p libcds expects that its \p RCU template parameter is one of those wrapper.
 119
 120     @anchor cds_urcu_tags
 121         For simplicity, in some algorithms instead of using RCU implementation type
 122         you should specify corresponding RCU tags (all declared in \p %cds::urcu namespace):
 123         - \ref general_instant_tag - for \ref general_instant
 124         - \ref general_buffered_tag - for \ref general_buffered
 125         - \ref general_threaded_tag - for \ref general_threaded
 126         - \ref signal_buffered_tag - for \ref signal_buffered
 127         - \ref signal_threaded_tag - for \ref signal_threaded
 128
 129     @anchor cds_urcu_performance
 130     <b>Performance</b>
 131
 132         As a result of our experiments we can range above %RCU implementation in such order,
 133         from high to low performance:
 134         - <tt>gc<general_buffered></tt> - high
 135         - <tt>gc<general_threaded></tt>
 136         - <tt>gc<signal_buffered></tt>
 137         - <tt>gc<signal_threaded></tt>
 138         - <tt>gc<general_instant></tt> - low
 139
 140         This estimation is very rough and depends on many factors:
 141         type of payload - mostly read-only (seeking) or read-write (inserting and deleting), -
 142         a hardware, your application, and so on.
 143
 144     @anchor cds_urcu_howto
 145     <b>How to use</b>
 146
 147         Usually, in your application you use only one \ref cds_urcu_gc "type of RCU" that is the best for your needs.
 148         However, the library allows to apply several RCU singleton in one application.
 149         The only limitation is that only one object of each RCU type can be instantiated.
 150         Since each RCU type is a template class the creation of two object of one RCU type class
 151         with different template arguments is an error and is not supported.
 152         However, it is correct when your RCU objects relates to different RCU types.
 153
 154         @note If you want to use \p %signal_buffered and \p %signal_threaded RCU in your application simultaneously,
 155         you should specify different signal number for each signal-handled RCU type on construction time,
 156         for example, \p SIGUSR1 and \p SIGUSR2 respectively. By default, both signal-handled RCU implementation
 157         share \p SIGUSR1 signal and cannot be applied together.
 158
 159         In \p libcds, many GC-based ordered list, set and map template classes have %RCU-related specializations
 160         that hide the %RCU specific details.
 161
 162         RCU GC is initialized in usual way: you should declare an object of type \p cds::urcu::gc< RCU_type >,
 163         for example:
 164         \code
 165         #include <cds/urcu/general_buffered.h>
 166
 167         typedef cds::urcu::gc< cds::urcu::general_buffered<> >    rcu_gpb;
 168
 169         int main() {
 170             // Initialize libcds
 171             cds::Initialize();
 172             {
 173                 // Initialize general_buffered RCU
 174                 rcu_gpb   gpbRCU;
 175
 176                 // If main thread uses lock-free containers
 177                 // the main thread should be attached to libcds infrastructure
 178                 cds::threading::Manager::attachThread();
 179
 180                 // Now you can use RCU-based containers in the main thread
 181                 //...
 182             }
 183             // Terminate libcds
 184             cds::Terminate();
 185         }
 186         \endcode
 187
 188         Each thread that deals with RCU-based container should be initialized first:
 189         \code
 190         #include <cds/urcu/general_buffered.h>
 191         int myThreadEntryPoint(void *)
 192         {
 193             // Attach the thread to libcds infrastructure
 194             cds::threading::Manager::attachThread();
 195
 196             // Now you can use RCU-based containers in the thread
 197             //...
 198
 199             // Detach thread when terminating
 200             cds::threading::Manager::detachThread();
 201         }
 202         \endcode
 203     */
 204     namespace urcu {
 205
 206 #   if CDS_OS_INTERFACE == CDS_OSI_UNIX || defined(CDS_DOXYGEN_INVOKED)
 207 #       define CDS_URCU_SIGNAL_HANDLING_ENABLED 1
 208 #   endif
 209
 210         /// General-purpose URCU type
 211         struct general_purpose_rcu {
 212             //@cond
 213             static uint32_t const c_nControlBit = 0x80000000;
 214             static uint32_t const c_nNestMask   = c_nControlBit - 1;
 215             //@endcond
 216         };
 217
 218 #   ifdef CDS_URCU_SIGNAL_HANDLING_ENABLED
 219         /// Signal-handling URCU type
 220         struct signal_handling_rcu {
 221             //@cond
 222             static uint32_t const c_nControlBit = 0x80000000;
 223             static uint32_t const c_nNestMask   = c_nControlBit - 1;
 224             //@endcond
 225         };
 226 #   endif
 227
 228         /// Tag for general_instant URCU
 229         struct general_instant_tag: public general_purpose_rcu {
 230             typedef general_purpose_rcu     rcu_class ; ///< The URCU type
 231         };
 232
 233         /// Tag for general_buffered URCU
 234         struct general_buffered_tag: public general_purpose_rcu
 235         {
 236             typedef general_purpose_rcu     rcu_class ; ///< The URCU type
 237         };
 238
 239         /// Tag for general_threaded URCU
 240         struct general_threaded_tag: public general_purpose_rcu {
 241             typedef general_purpose_rcu     rcu_class ; ///< The URCU type
 242         };
 243
 244 #   ifdef CDS_URCU_SIGNAL_HANDLING_ENABLED
 245         /// Tag for signal_buffered URCU
 246         struct signal_buffered_tag: public signal_handling_rcu {
 247                 typedef signal_handling_rcu     rcu_class ; ///< The URCU type
 248         };
 249
 250         /// Tag for signal_threaded URCU
 251         struct signal_threaded_tag: public signal_handling_rcu {
 252             typedef signal_handling_rcu     rcu_class ; ///< The URCU type
 253         };
 254 #   endif
 255
 256         ///@anchor cds_urcu_retired_ptr Retired pointer, i.e. pointer that ready for reclamation
 257         typedef cds::gc::details::retired_ptr   retired_ptr;
 258
 259         /// Pointer to function to free (destruct and deallocate) retired pointer of specific type
 260         typedef cds::gc::details::free_retired_ptr_func free_retired_ptr_func;
 261
 262         //@cond
 263         /// Implementation-specific URCU details
 264         namespace details {
 265             /// forward declarations
 266             template <typename RCUtag>
 267             struct thread_data;
 268
 269             template <typename RCUtag>
 270             class thread_gc;
 271
 272             template <typename RCUtag >
 273             class singleton;
 274
 275             //@cond
 276             class singleton_vtbl {
 277             protected:
 278                 virtual ~singleton_vtbl()
 279                 {}
 280             public:
 281                 virtual void retire_ptr( retired_ptr& p ) = 0;
 282             };
 283
 284             class gc_common
 285             {
 286             public:
 287                 template <typename MarkedPtr> using atomic_marked_ptr = atomics::atomic<MarkedPtr>;
 288             };
 289             //@endcond
 290
 291             //@cond
 292             template <typename ThreadData>
 293             struct thread_list_record {
 294                 ThreadData *    m_pNext ;  ///< Next item in thread list
 295                 atomics::atomic<OS::ThreadId>    m_idOwner   ; ///< Owner thread id; 0 - the record is free (not owned)
 296
 297                 thread_list_record()
 298                     : m_pNext( nullptr )
 299                     , m_idOwner( cds::OS::c_NullThreadId )
 300                 {}
 301
 302                 ~thread_list_record()
 303                 {}
 304             };
 305             //@endcond
 306
 307             //@cond
 308             template <typename RCUtag, class Alloc = CDS_DEFAULT_ALLOCATOR >
 309             class thread_list {
 310             public:
 311                 typedef thread_data<RCUtag>         thread_record;
 312                 typedef cds::details::Allocator< thread_record, Alloc >   allocator_type;
 313
 314             private:
 315                 atomics::atomic<thread_record *>   m_pHead;
 316
 317             public:
 318                 thread_list()
 319                     : m_pHead( nullptr )
 320                 {}
 321
 322                 ~thread_list()
 323                 {
 324                     destroy();
 325                 }
 326
 327                 thread_record * alloc()
 328                 {
 329                     thread_record * pRec;
 330                     cds::OS::ThreadId const nullThreadId = cds::OS::c_NullThreadId;
 331                     cds::OS::ThreadId const curThreadId  = cds::OS::get_current_thread_id();
 332
 333                     // First try to reuse a retired (non-active) HP record
 334                     for ( pRec = m_pHead.load( atomics::memory_order_acquire ); pRec; pRec = pRec->m_list.m_pNext ) {
 335                         cds::OS::ThreadId thId = nullThreadId;
 336                         if ( !pRec->m_list.m_idOwner.compare_exchange_strong( thId, curThreadId, atomics::memory_order_seq_cst, atomics::memory_order_relaxed ) )
 337                             continue;
 338                         return pRec;
 339                     }
 340
 341                     // No records available for reuse
 342                     // Allocate and push a new record
 343                     pRec = allocator_type().New();
 344                     pRec->m_list.m_idOwner.store( curThreadId, atomics::memory_order_relaxed );
 345
 346                     atomics::atomic_thread_fence( atomics::memory_order_release );
 347
 348                     thread_record * pOldHead = m_pHead.load( atomics::memory_order_acquire );
 349                     do {
 350                         pRec->m_list.m_pNext = pOldHead;
 351                     } while ( !m_pHead.compare_exchange_weak( pOldHead, pRec, atomics::memory_order_release, atomics::memory_order_relaxed ));
 352
 353                     return pRec;
 354                 }
 355
 356                 void retire( thread_record * pRec )
 357                 {
 358                     assert( pRec != nullptr );
 359                     pRec->m_list.m_idOwner.store( cds::OS::c_NullThreadId, atomics::memory_order_release );
 360                 }
 361
 362                 void detach_all()
 363                 {
 364                     thread_record * pNext = nullptr;
 365                     cds::OS::ThreadId const nullThreadId = cds::OS::c_NullThreadId;
 366
 367                     for ( thread_record * pRec = m_pHead.load(atomics::memory_order_acquire); pRec; pRec = pNext ) {
 368                         pNext = pRec->m_list.m_pNext;
 369                         if ( pRec->m_list.m_idOwner.load(atomics::memory_order_relaxed) != nullThreadId ) {
 370                             retire( pRec );
 371                         }
 372                     }
 373                 }
 374
 375                 thread_record * head( atomics::memory_order mo ) const
 376                 {
 377                     return m_pHead.load( mo );
 378                 }
 379
 380             private:
 381                 void destroy()
 382                 {
 383                     allocator_type al;
 384                     CDS_DEBUG_ONLY( cds::OS::ThreadId const nullThreadId = cds::OS::c_NullThreadId; )
 385                     CDS_DEBUG_ONLY( cds::OS::ThreadId const mainThreadId = cds::OS::get_current_thread_id() ;)
 386
 387                     thread_record * p = m_pHead.exchange( nullptr, atomics::memory_order_seq_cst );
 388                     while ( p ) {
 389                         thread_record * pNext = p->m_list.m_pNext;
 390
 391                         assert( p->m_list.m_idOwner.load( atomics::memory_order_relaxed ) == nullThreadId
 392                             || p->m_list.m_idOwner.load( atomics::memory_order_relaxed ) == mainThreadId
 393                             || !cds::OS::is_thread_alive( p->m_list.m_idOwner.load( atomics::memory_order_relaxed ) )
 394                             );
 395
 396                         al.Delete( p );
 397                         p = pNext;
 398                     }
 399                 }
 400             };
 401             //@endcond
 402
 403             //@cond
 404             template <class ThreadGC>
 405             class scoped_lock {
 406             public:
 407                 typedef ThreadGC                    thread_gc;
 408                 typedef typename thread_gc::rcu_tag rcu_tag;
 409
 410                 bool m_bLocked;
 411             public:
 412                 scoped_lock(bool bLock = true)
 413                     : m_bLocked( bLock )
 414                 {
 415                     if ( bLock )
 416                         thread_gc::access_lock();
 417                 }
 418
 419                 ~scoped_lock()
 420                 {
 421                     if ( m_bLocked )
 422                         thread_gc::access_unlock();
 423                 }
 424             };
 425             //@endcond
 426         } // namespace details
 427         //@endcond
 428
 429         // forwards
 430         //@cond
 431         template <typename RCUimpl> class gc;
 432         //@endcond
 433
 434         /// Epoch-based retired ptr
 435         /**
 436             Retired pointer with additional epoch field that prevents early reclamation.
 437             This type of retired pointer is used in buffered RCU implementations.
 438         */
 439         struct epoch_retired_ptr: public retired_ptr
 440         {
 441             uint64_t    m_nEpoch ;  ///< The epoch when the object has been retired
 442
 443             //@cond
 444             epoch_retired_ptr()
 445             {}
 446             //@endcond
 447
 448             /// Constructor creates a copy of \p rp retired pointer and saves \p nEpoch reclamation epoch.
 449             epoch_retired_ptr( retired_ptr const& rp, uint64_t nEpoch )
 450                 : retired_ptr( rp )
 451                 , m_nEpoch( nEpoch )
 452             {}
 453         };
 454
 455     } // namespace urcu
 456 } // namespace cds
 457
 458 #endif // #ifndef CDSLIB_URCU_DETAILS_BASE_H