3 #ifndef CDSLIB_MEMORY_MICHAEL_ALLOCATOR_TMPL_H
4 #define CDSLIB_MEMORY_MICHAEL_ALLOCATOR_TMPL_H
7 Michael allocator implementation
9 [2004] Maged Michael "Scalable Lock-Free Dynamic Memory Allocation"
12 2011.09.07 khizmax Optimization: small page (about 64K) is allocated by Heap::alloc call.
13 This optimization allows to allocate system memory more regularly,
14 in blocks of 1M that leads to less memory fragmentation.
15 2011.01.02 khizmax Created
19 #include <mutex> // unique_lock
21 #include <cds/memory/michael/options.h>
22 #include <cds/memory/michael/bound_check.h>
23 #include <cds/memory/michael/procheap_stat.h>
24 #include <cds/memory/michael/osalloc_stat.h>
26 #include <cds/os/topology.h>
27 #include <cds/os/alloc_aligned.h>
28 #include <cds/sync/spinlock.h>
29 #include <cds/details/type_padding.h>
30 #include <cds/details/marked_ptr.h>
31 #include <cds/container/vyukov_mpmc_cycle_queue.h>
32 #include <cds/user_setup/cache_line.h>
33 #include <cds/details/lib.h>
35 #include <boost/intrusive/list.hpp>
38 /// Memory-related algorithms: allocators etc.
40 /// Michael's allocator (class Heap)
43 \li [2004] M.Michael "Scalable Lock-free Dynamic Memory Allocation"
45 This namespace declares the main class Heap and a lot of helper classes.
51 unsigned int nBlockSize ; ///< block size in bytes
52 unsigned int nSBSize ; ///< superblock size (64K or 1M)
53 unsigned int nCapacity ; ///< superblock capacity (nSBSize / nBlockSize)
54 unsigned int nSBSizeIdx ; ///< internal superblock size index (page index)
57 /// %Heap based on system \p malloc and \p free functions
60 /// Allocates memory block of \p nSize bytes (\p malloc wrapper)
61 static void * alloc( size_t nSize )
63 CDS_TSAN_ANNOTATE_IGNORE_WRITES_BEGIN;
64 void * p = ::malloc( nSize );
65 CDS_TSAN_ANNOTATE_IGNORE_WRITES_END;
68 /// Returning memory block to the system (\p free wrapper)
69 static void free( void * p )
71 CDS_TSAN_ANNOTATE_IGNORE_WRITES_BEGIN;
73 CDS_TSAN_ANNOTATE_IGNORE_WRITES_END;
77 /// %Heap based on system provided aligned \p malloc and \p free functions
78 struct aligned_malloc_heap
80 /// Allocates aligned memory block of \p nSize bytes with alignment \p nAlignment
81 static void * alloc( size_t nSize, size_t nAlignment )
83 return cds::OS::aligned_malloc( nSize, nAlignment );
85 /// Frees aligned memory block \p p that has been previosly allocated by \ref alloc call
86 static void free( void * p )
88 cds::OS::aligned_free( p );
92 /// Page heap based on \p Heap
94 Page heap can allocate memory by page-sized block only.
95 \p Heap may be any heap that provides interface like \ref malloc_heap.
97 This class is one of available implementation of opt::page_heap option.
99 template <class Heap = malloc_heap>
100 class page_allocator: public Heap
103 typedef Heap base_class;
110 size_t nPageSize ///< page size in bytes
112 : m_nPageSize( nPageSize )
115 /// Allocate new page
118 return base_class::alloc( m_nPageSize );
121 /// Free page \p pPage
122 void free( void * pPage )
124 base_class::free( pPage );
128 /// Page cacheable heap
130 To improve performance this allocator maintains small list of free pages.
131 Page heap can allocate memory by page-sized block only.
134 \li \p FreeListCapacity - capacity of free-list, default value is 64 page
135 \li \p Heap may be any heap that provides interface like \ref malloc_heap.
137 This class is one of available implementation of opt::page_heap option.
139 template <size_t FreeListCapacity = 64, class Heap = malloc_heap>
140 class page_cached_allocator: public page_allocator<Heap>
143 typedef page_allocator<Heap> base_class;
146 struct make_null_ptr {
147 void operator ()(void *& p)
153 struct free_list_traits : public cds::container::vyukov_queue::traits
155 typedef opt::v::static_buffer<void *, FreeListCapacity> buffer;
157 typedef make_null_ptr value_cleaner;
160 typedef container::VyukovMPMCCycleQueue< void *, free_list_traits > free_list;
162 free_list m_FreeList;
167 page_cached_allocator(
168 size_t nPageSize ///< page size in bytes
170 : base_class( nPageSize )
171 , m_FreeList( FreeListCapacity )
175 ~page_cached_allocator()
178 while ( m_FreeList.pop(pPage) )
179 base_class::free( pPage );
183 /// Allocate new page
187 if ( !m_FreeList.pop( pPage ) )
188 pPage = base_class::alloc();
192 /// Free page \p pPage
193 void free( void * pPage )
195 if ( !m_FreeList.push( pPage ))
196 base_class::free( pPage );
200 /// Implementation of opt::sizeclass_selector option
202 Default size-class selector can manage memory blocks up to 64K.
204 class CDS_EXPORT_API default_sizeclass_selector
207 /// Count of different size-classes
208 static const size_t c_nSizeClassCount = 63;
211 static const size_t c_nMaxBlockSize = 64 * 1024;
213 /// Page size of type 0 (64K)
214 static const unsigned int c_nPage64K = 64 * 1024 - 32;
216 /// Page size of type 1 (1M)
217 static const unsigned int c_nPage1M = 1024 * 1024;
219 static CDS_DATA_ALIGNMENT(128) unsigned int const m_szClassBounds[c_nSizeClassCount];
220 static size_class const m_szClass[c_nSizeClassCount];
221 static unsigned char const m_szClassMap[];
224 /// Type of size-class index
225 typedef unsigned int sizeclass_index;
228 default_sizeclass_selector();
231 /// "No size class" index
232 static const sizeclass_index c_nNoSizeClass = (unsigned int) (0 - 1);
234 /// Returns size-class count
235 static sizeclass_index size()
237 return c_nSizeClassCount;
240 /// Returns page size in bytes for given page type \p nPageType
241 static size_t page_size(size_t nPageType )
249 assert(false) ; // anything forgotten?..
254 /// Returns count of page size-class
256 This class supports pages of two types: 64K page for small objects and 1M page for other objects.
258 static size_t pageTypeCount()
263 /// Returns size-class index for \p nSize
265 For large blocks that cannot be allocated by Michael's allocator
266 the function must return -1.
268 static sizeclass_index find( size_t nSize )
270 if ( nSize > c_nMaxBlockSize ) {
271 // Too large block - allocate from system
272 return c_nNoSizeClass;
274 sizeclass_index szClass = m_szClassMap[ (nSize + 15) / 16 ];
275 assert( nSize <= m_szClassBounds[ szClass ] );
276 assert( szClass == 0 || m_szClassBounds[ szClass - 1] < nSize );
281 /// Gets details::size_class struct for size-class index \p nIndex
282 static const size_class * at( sizeclass_index nIndex )
284 assert( nIndex < size() );
285 return m_szClass + nIndex;
291 struct free_list_tag;
292 typedef boost::intrusive::list_base_hook< boost::intrusive::tag< free_list_tag > > free_list_locked_hook;
294 struct partial_list_tag;
295 typedef boost::intrusive::list_base_hook< boost::intrusive::tag< partial_list_tag > > partial_list_locked_hook;
297 struct intrusive_superblock_desc: public free_list_locked_hook, partial_list_locked_hook
302 /// List of free superblock descriptor
304 This class is a implementation of \ref opt::free_list option
306 template <class Lock, class T = details::intrusive_superblock_desc>
307 class free_list_locked: public boost::intrusive::list<T, boost::intrusive::base_hook<details::free_list_locked_hook> >
310 typedef boost::intrusive::list<T, boost::intrusive::base_hook<details::free_list_locked_hook> > base_class;
312 typedef details::free_list_locked_hook item_hook;
313 typedef Lock lock_type;
315 typedef std::unique_lock<lock_type> auto_lock;
317 mutable lock_type m_access;
321 /// Rebinds to other item type \p T2
324 typedef free_list_locked<Lock, T2> other ; ///< rebind result
328 /// Push superblock descriptor to free-list
329 void push( T * pDesc )
331 assert( base_class::node_algorithms::inited( static_cast<item_hook *>(pDesc) ) );
332 auto_lock al(m_access);
333 base_class::push_back( *pDesc );
336 /// Pop superblock descriptor from free-list
339 auto_lock al(m_access);
340 if ( base_class::empty() )
342 T& rDesc = base_class::front();
343 base_class::pop_front();
344 assert( base_class::node_algorithms::inited( static_cast<item_hook *>(&rDesc) ) );
348 /// Returns current count of superblocks in free-list
351 auto_lock al(m_access);
352 return base_class::size();
356 /// List of partial filled superblock descriptor
358 This class is a implementation of \ref opt::partial_list option
360 template <class Lock, class T = details::intrusive_superblock_desc>
361 class partial_list_locked: public boost::intrusive::list<T, boost::intrusive::base_hook<details::partial_list_locked_hook> >
364 typedef boost::intrusive::list<T, boost::intrusive::base_hook<details::partial_list_locked_hook> > base_class;
366 typedef details::partial_list_locked_hook item_hook;
367 typedef Lock lock_type;
369 typedef std::unique_lock<lock_type> auto_lock;
371 mutable lock_type m_access;
375 /// Rebinds to other item type \p T2
378 typedef partial_list_locked<Lock, T2> other ; ///< rebind result
382 /// Push a superblock \p pDesc to the list
383 void push( T * pDesc )
385 auto_lock al( m_access );
386 assert( base_class::node_algorithms::inited( static_cast<item_hook *>(pDesc) ) );
387 base_class::push_back( *pDesc );
390 /// Pop superblock from the list
393 auto_lock al( m_access );
394 if ( base_class::empty() )
396 T& rDesc = base_class::front();
397 base_class::pop_front();
398 assert( base_class::node_algorithms::inited( static_cast<item_hook *>(&rDesc) ) );
402 /// Removes \p pDesc descriptor from the free-list
403 bool unlink( T * pDesc )
405 assert(pDesc != nullptr);
406 auto_lock al( m_access );
407 // !inited(pDesc) is equal to "pDesc is being linked to partial list"
408 if ( !base_class::node_algorithms::inited( static_cast<item_hook *>(pDesc) ) ) {
409 base_class::erase( base_class::iterator_to( *pDesc ) );
415 /// Count of element in the list
418 auto_lock al( m_access );
419 return base_class::size();
423 /// Summary processor heap statistics
425 Summary heap statistics for use with Heap::summaryStat function.
429 size_t nAllocFromActive ; ///< Event count of allocation from active superblock
430 size_t nAllocFromPartial ; ///< Event count of allocation from partial superblock
431 size_t nAllocFromNew ; ///< Event count of allocation from new superblock
432 size_t nFreeCount ; ///< Count of \p free function call
433 size_t nPageAllocCount ; ///< Count of page (superblock) allocated
434 size_t nPageDeallocCount ; ///< Count of page (superblock) deallocated
435 size_t nDescAllocCount ; ///< Count of superblock descriptors
436 size_t nDescFull ; ///< Count of full superblock
437 atomic64u_t nBytesAllocated ; ///< Count of allocated bytes (for heap managed memory blocks)
438 atomic64u_t nBytesDeallocated ; ///< Count of deallocated bytes (for heap managed memory blocks)
440 size_t nSysAllocCount ; ///< Count of \p alloc and \p alloc_aligned function call (for large memory blocks that allocated directly from OS)
441 size_t nSysFreeCount ; ///< Count of \p free and \p free_aligned function call (for large memory blocks that allocated directly from OS)
442 atomic64u_t nSysBytesAllocated ; ///< Count of allocated bytes (for large memory blocks that allocated directly from OS)
443 atomic64_t nSysBytesDeallocated; ///< Count of deallocated bytes (for large memory blocks that allocated directly from OS)
445 // Internal contention indicators
446 /// CAS failure counter for updating active field of active block of \p alloc_from_active Heap internal function
448 Contention indicator. The less value is better
450 size_t nActiveDescCASFailureCount;
451 /// CAS failure counter for updating active field of active block of \p alloc_from_active Heap internal function
453 Contention indicator. The less value is better
455 size_t nActiveAnchorCASFailureCount;
456 /// CAS failure counter for updating anchor field of partial block of \p alloc_from_partial Heap internal function
458 Contention indicator. The less value is better
460 size_t nPartialDescCASFailureCount;
461 /// CAS failure counter for updating anchor field of partial block of \p alloc_from_partial Heap internal function
463 Contention indicator. The less value is better
465 size_t nPartialAnchorCASFailureCount;
469 /// Constructs empty statistics. All counters are zero.
475 /// Difference statistics
477 This operator computes difference between \p *this and \p stat and places the difference to \p this.
480 summary_stat& operator -=( const summary_stat& stat )
482 nAllocFromActive -= stat.nAllocFromActive;
483 nAllocFromPartial -= stat.nAllocFromPartial;
484 nAllocFromNew -= stat.nAllocFromNew;
485 nFreeCount -= stat.nFreeCount;
486 nPageAllocCount -= stat.nPageAllocCount;
487 nPageDeallocCount -= stat.nPageDeallocCount;
488 nDescAllocCount -= stat.nDescAllocCount;
489 nDescFull -= stat.nDescFull;
490 nBytesAllocated -= stat.nBytesAllocated;
491 nBytesDeallocated -= stat.nBytesDeallocated;
493 nSysAllocCount -= stat.nSysAllocCount;
494 nSysFreeCount -= stat.nSysFreeCount;
495 nSysBytesAllocated -= stat.nSysBytesAllocated;
496 nSysBytesDeallocated -= stat.nSysBytesDeallocated;
498 nActiveDescCASFailureCount -= stat.nActiveDescCASFailureCount;
499 nActiveAnchorCASFailureCount -= stat.nActiveAnchorCASFailureCount;
500 nPartialDescCASFailureCount -= stat.nPartialDescCASFailureCount;
501 nPartialAnchorCASFailureCount -= stat.nPartialAnchorCASFailureCount;
506 /// Clears statistics
508 All counters are set to zero.
512 memset( this, 0, sizeof(*this));
516 template <typename Stat>
517 summary_stat& add_procheap_stat( const Stat& stat )
519 nAllocFromActive += stat.allocFromActive();
520 nAllocFromPartial += stat.allocFromPartial();
521 nAllocFromNew += stat.allocFromNew();
522 nFreeCount += stat.freeCount();
523 nPageAllocCount += stat.blockAllocated();
524 nPageDeallocCount += stat.blockDeallocated();
525 nDescAllocCount += stat.descAllocCount();
526 nDescFull += stat.descFull();
527 nBytesAllocated += stat.allocatedBytes();
528 nBytesDeallocated += stat.deallocatedBytes();
530 nActiveDescCASFailureCount += stat.activeDescCASFailureCount();
531 nActiveAnchorCASFailureCount += stat.activeAnchorCASFailureCount();
532 nPartialDescCASFailureCount += stat.partialDescCASFailureCount();
533 nPartialAnchorCASFailureCount += stat.partialAnchorCASFailureCount();
538 template <typename Stat>
539 summary_stat& add_heap_stat( const Stat& stat )
541 nSysAllocCount += stat.allocCount();
542 nSysFreeCount += stat.freeCount();
544 nSysBytesAllocated += stat.allocatedBytes();
545 nSysBytesDeallocated+= stat.deallocatedBytes();
552 /// Michael's allocator
554 This class provides base functionality for Michael's allocator. It does not provide
555 the interface described by \p std::allocator, therefore, we name it as a heap, not as an allocator.
556 The heap interface is closer to semantics of \p malloc / \p free system functions.
557 The heap supports allocation of aligned and unaligned data.
559 The algorithm is based on simplified version of
560 \li [2004] M.Michael "Scalable Lock-free Dynamic Memory Allocation"
562 that, in turn, is concurrent version of well-known Hoard allocator developed by Emery Berger, see
563 \li [2002] Emery Berger "Memory Management for High-Performance Application", PhD thesis
565 This is powerful, scalable, fully customizable heap with fast-path without any locks
566 that has been developed specifically for multi-threading.
567 With opt:sys_topology you can set as many allocation arena ("processor heap") as you need.
568 You can manually bound any your thread to any arena ("processor"). With opt::sizeclass_selector option you can manage
569 allocation granularity. With opt::page_heap you can utilize any OS-provided features for page allocation
570 like \p mmap, \p VirtualAlloc etc. The heap can gather internal statistics that helps you to tune your application.
571 The opt::check_bounds feature can help you to find a memory buffer overflow.
573 Brief algorithm description from Michael's work:
575 Large blocks (greater than 64K) are allocated directly from the OS and freed directly to the OS. For smaller block sizes,
576 the heap is composed of large superblocks (64 KB or 1MB size). Each superblock is divided into multiple equal-sized blocks.
577 Superblocks are distributed among size classes based on their block sizes. Each size class contains multiple processor
578 heaps proportional to the number of processors in the system. A processor heap contains at most one active superblock.
579 An active superblock contains one or more blocks available for reservation that are guaranteed to be available to threads
580 that reach them through the header of the processor heap. Each superblock is associated with a descriptor. Each allocated
581 block contains a prefix (8 bytes) that points to the descriptor of its superblock. On the first call to malloc, the static
582 structures for the size classes and processor heaps (about 16 KB for a 16 processor machine) are allocated and initialized
583 in a lock-free manner.
585 Malloc starts by identifying the appropriate processor heap, based on the requested block size and the identity of
586 the calling thread. Typically, the heap already has an active superblock with blocks available for reservation. The thread
587 atomically reads a pointer to the descriptor of the active superblock and reserves a block. Next, the thread atomically
588 pops a block from that superblock and updates its descriptor. A typical free pushes the freed block into the list of
589 available blocks of its original superblock by atomically updating its descriptor.
591 <b>Constraint</b>: one superblock may contain up to 2048 block. This restriction imposes a restriction on the maximum
594 Available \p Options:
595 - \ref opt::sys_topology - class that describes system topology needed for allocator.
596 Default is \p cds::OS::topology (see cds::OS::Win32::topology for interface description)
597 - \ref opt::system_heap - option setter for an allocator for large blocks that is used for direct allocation from OS.
598 Default is \ref malloc_heap.
599 - \ref opt::aligned_heap - option setter for a heap used for internal aligned memory management.
600 Default is \ref aligned_malloc_heap
601 - \ref opt::page_heap - option setter for a heap used for page (superblock) allocation of 64K/1M size.
602 Default is \ref page_cached_allocator
603 - \ref opt::sizeclass_selector - option setter for a class used to select appropriate size-class
604 for incoming allocation request.
605 Default is \ref default_sizeclass_selector
606 - \ref opt::free_list - option setter for a class to manage a list of free superblock descriptors
607 Default is \ref free_list_locked
608 - \ref opt::partial_list - option setter for a class to manage a list of partial filled superblocks
609 Default is \ref partial_list_locked
610 - \ref opt::procheap_stat - option setter for a class to gather internal statistics for memory allocation
611 that is maintained by the heap.
612 Default is \ref procheap_empty_stat
613 - \ref opt::os_allocated_stat - option setter for a class to gather internal statistics for large block
614 allocation. Term "large block" is specified by the size-class selector (see \ref opt::sizeclass_selector)
615 and it is 64K for \ref default_sizeclass_selector. Any block that is large that 64K is allocated from
616 OS directly. \p os_allocated_stat option is set a class to gather statistics for large blocks.
617 Default is \ref os_allocated_empty
618 - \ref opt::check_bounds - a bound checker.
619 Default is no bound checker (cds::opt::none)
622 The heap is the basic building block for your allocator or <tt> operator new</tt> implementation.
625 #include <cds/memory/michael/allocator.h>
627 // Heap with explicitly defined options:
628 cds::memory::michael::Heap<
629 opt::aligned_heap< aligned_malloc_heap >,
630 opt::page_heap< page_cached_allocator<16, malloc_heap> >
633 // Heap with default options:
634 cds::memory::michael::Heap<> myDefHeap;
637 \par How to make std-like allocator
639 There are serious differencies of heap and <tt>std::allocator</tt> interface:
640 - Heap is stateful, and \p std::allocator is stateless.
641 - Heap has much more template parameters than \p std::allocator
642 - Heap has low-level interface for memory allocating only unlike the allocator
643 interface that can construct/destroy objects of any type T.
645 To convert heap interface into \p std::allocator -like interface you should:
646 - Declare object of class cds::memory::michael::Heap specifying the necessary
647 template parameters; this is usually static object
648 - Create a class with \p std::allocator interface that uses the function of heap.
650 #include <cds/memory/michael/allocator.h>
653 class MichaelAllocator
655 typedef std::allocator<T> std_allocator;
656 typedef cds::memory::michael::Heap<> michael_heap;
658 // Michael heap static object
659 static michael_heap s_Heap;
661 // Declare typedefs from std::allocator
662 typedef typename std_allocator::const_pointer const_pointer;
663 typedef typename std_allocator::pointer pointer;
664 typedef typename std_allocator::const_reference const_reference;
665 typedef typename std_allocator::reference reference;
666 typedef typename std_allocator::difference_type difference_type;
667 typedef typename std_allocator::size_type size_type;
668 typedef typename std_allocator::value_type value_type;
670 // Allocation function
671 pointer allocate( size_type _Count, const void* _Hint )
673 return reinterpret_cast<pointer>( s_Heap.alloc( sizeof(T) * _Count ));
676 // Deallocation function
677 void deallocate( pointer _Ptr, size_type _Count )
682 // Other std::allocator specific functions: address, construct, destroy, etc.
685 // Rebinding allocator to other type
686 template <class _Other>
688 typedef MichaelAllocator<_Other> other;
693 MichaelAllocator::michael_heap MichaelAllocator::s_Heap;
697 template <typename... Options>
702 static const unsigned int c_nAlignment = cds::c_nCacheLineSize;
703 static const unsigned int c_nDefaultBlockAlignment = 8;
705 struct default_options {
706 typedef cds::OS::topology sys_topology;
707 typedef malloc_heap system_heap;
708 typedef page_cached_allocator<> page_heap;
709 typedef aligned_malloc_heap aligned_heap;
710 typedef default_sizeclass_selector sizeclass_selector;
711 typedef free_list_locked<cds::sync::spin> free_list;
712 typedef partial_list_locked<cds::sync::spin> partial_list;
713 typedef procheap_empty_stat procheap_stat;
714 typedef os_allocated_empty os_allocated_stat;
715 typedef cds::opt::none check_bounds;
721 typedef typename opt::make_options<default_options, Options...>::type options;
725 typedef unsigned char byte;
728 typedef typename options::sys_topology sys_topology ; ///< effective system topology
729 typedef typename options::system_heap system_heap ; ///< effective system heap
730 typedef typename options::aligned_heap aligned_heap ; ///< effective aligned heap
731 typedef typename options::sizeclass_selector sizeclass_selector ; ///< effective sizeclass selector
732 typedef typename options::page_heap page_heap ; ///< effective page heap
733 typedef typename options::procheap_stat procheap_stat ; ///< effective processor heap statistics
734 typedef typename options::os_allocated_stat os_allocated_stat ; ///< effective OS-allocated memory statistics
735 typedef details::bound_checker_selector< typename options::check_bounds > bound_checker ; ///< effective bound checker
737 // forward declarations
739 struct superblock_desc;
740 struct processor_heap_base;
741 struct processor_desc;
744 /// Superblock states
746 A superblock can be in one of four states: \p ACTIVE, \p FULL,
747 \p PARTIAL, or \p EMPTY. A superblock is \p ACTIVE if it is the active
748 superblock in a heap, or if a thread intends to try to install it
749 as such. A superblock is \p FULL if all its blocks are either allocated
750 or reserved. A superblock is \p PARTIAL if it is not \p ACTIVE
751 and contains unreserved available blocks. A superblock is
752 \p EMPTY if all its blocks are free and it is not \p ACTIVE.
754 enum superblock_state {
755 SBSTATE_ACTIVE = 0, ///< superblock is active
756 SBSTATE_FULL = 1, ///< superblock is full
757 SBSTATE_PARTIAL = 2, ///< superblock is partially allocated
758 SBSTATE_EMPTY = 3 ///< superblock is empty and may be freed
761 static const size_t c_nMaxBlockInSuperBlock = 1024 * 2 ; ///< Max count of blocks in superblock (2 ** 11)
763 /// Anchor of the superblock descriptor. Updated by CAS
765 unsigned long long avail:11 ; ///< index of first available block in the superblock
766 unsigned long long count:11 ; ///< number of unreserved blocks in the superblock
767 unsigned long long state: 2 ; ///< state of the superblock (see \ref superblock_state enum)
768 unsigned long long tag:40 ; ///< ABA prevention tag
771 /// Superblock descriptor
772 struct superblock_desc
773 : public options::free_list::item_hook
774 , public options::partial_list::item_hook
776 atomics::atomic<anchor_tag> anchor ; ///< anchor, see \ref anchor_tag
777 byte * pSB ; ///< ptr to superblock
778 processor_heap_base * pProcHeap ; ///< pointer to owner processor heap
779 unsigned int nBlockSize ; ///< block size in bytes
780 unsigned int nCapacity ; ///< superblock size/block size
785 , pProcHeap( nullptr )
791 typedef typename options::free_list::template rebind<superblock_desc>::other free_list;
792 typedef typename options::partial_list::template rebind<superblock_desc>::other partial_list;
795 #if CDS_BUILD_BITS == 32
796 /// Allocated block header
798 Each allocated block has 8-byte header.
799 The header contains pointer to owner superblock descriptor and the redirection flag.
800 If the block has been allocated by \ref alloc, then the redirection flag is 0 and the block's structure is:
803 | blockHeader | [8 byte] pointer to owner superblock (flag=0)
805 | | <- memory allocated
810 If the block has been allocated by \ref alloc_aligned, then it is possible that pointer returned must be aligned.
811 In this case the redirection flag is 1 and the block's structure is:
814 +-> | blockHeader | [8 byte] pointer to owner superblock (flag=0)
820 +-- | blockHeader | [8 byte] pointer to block head (flag=1)
822 | | <- memory allocated
837 superblock_desc * pDesc ; // pointer to superblock descriptor
838 uint32_t nSize ; // block size (allocated form OS)
843 void set( superblock_desc * pdesc, uint32_t isAligned )
846 nFlags = isAligned ? bitAligned : 0;
849 superblock_desc * desc()
851 assert( (nFlags & bitOSAllocated) == 0 );
852 return (nFlags & bitAligned) ? reinterpret_cast<block_header *>( pDesc )->desc() : pDesc;
855 block_header * begin()
857 return (nFlags & bitAligned) ? reinterpret_cast<block_header *>(pDesc) : this;
860 bool isAligned() const
862 return (nFlags & bitAligned) != 0;
865 bool isOSAllocated() const
867 return (nFlags & bitOSAllocated) != 0;
870 void setOSAllocated( size_t sz )
873 nFlags = bitOSAllocated;
876 size_t getOSAllocSize() const
878 assert( isOSAllocated() );
884 #elif CDS_BUILD_BITS == 64
892 typedef cds::details::marked_ptr<superblock_desc, bitAligned|bitOSAllocated> marked_desc_ptr;
893 // If bitOSAllocated is set the pDesc contains size of memory block
895 marked_desc_ptr pDesc;
897 void set( superblock_desc * pdesc, uint32_t isAligned )
899 pDesc = marked_desc_ptr( pdesc, isAligned );
902 superblock_desc * desc()
904 assert( !isOSAllocated() );
905 return (pDesc.bits() & bitAligned) ? reinterpret_cast<block_header *>( pDesc.ptr() )->desc() : pDesc.ptr();
908 block_header * begin()
910 return (pDesc.bits() & bitAligned) ? reinterpret_cast<block_header *>( pDesc.ptr() ) : this;
913 bool isAligned() const
915 return (pDesc.bits() & bitAligned) != 0;
918 bool isOSAllocated() const
920 return (pDesc.bits() & bitOSAllocated) != 0;
923 void setOSAllocated( size_t nSize )
926 pDesc = marked_desc_ptr( reinterpret_cast<superblock_desc *>(nSize << 2), bitOSAllocated );
929 size_t getOSAllocSize() const
931 assert( isOSAllocated() );
932 return reinterpret_cast<uintptr_t>( pDesc.ptr() ) >> 2;
938 # error "Unexpected value of CDS_BUILD_BITS"
939 #endif // CDS_BUILD_BITS
942 struct free_block_header: block_header {
943 unsigned int nNextFree;
947 #if CDS_BUILD_BITS == 32
948 /// Processor heap's \p active field
950 The \p active field in the processor heap structure is primarily a pointer to the descriptor
951 of the active superblock owned by the processor heap. If the value of \p active is not \p nullptr, it is
952 guaranteed that the active superblock has at least one block available for reservation.
953 Since the addresses of superblock descriptors can be guaranteed to be aligned to some power
954 of 2 (e.g., 64), as an optimization, we can carve a credits subfield to hold the number
955 of blocks available for reservation in the active superblock less one. That is, if the value
956 of credits is n, then the active superblock contains n+1 blocks available for reservation
957 through the \p active field. Note that the number of blocks in a superblock is not limited
958 to the maximum reservations that can be held in the credits subfield. In a typical malloc operation
959 (i.e., when \p active != \p nullptr and \p credits > 0), the thread reads \p active and then
960 atomically decrements credits while validating that the active superblock is still valid.
964 superblock_desc * pDesc;
968 static const unsigned int c_nMaxCredits = 0 - 1;
971 CDS_CONSTEXPR active_tag() CDS_NOEXCEPT
976 active_tag( active_tag const& ) CDS_NOEXCEPT = default;
977 ~active_tag() CDS_NOEXCEPT = default;
978 active_tag& operator=(active_tag const& ) CDS_NOEXCEPT = default;
979 # if !defined(CDS_DISABLE_DEFAULT_MOVE_CTOR)
980 active_tag( active_tag&& ) CDS_NOEXCEPT = default;
981 active_tag& operator=(active_tag&&) CDS_NOEXCEPT = default;
984 /// Returns pointer to superblock descriptor
985 superblock_desc * ptr() const
990 /// Sets superblock descriptor
991 void ptr( superblock_desc * p )
996 unsigned int credits() const
1001 void credits( unsigned int n )
1012 void set( superblock_desc * pSB, unsigned int n )
1019 #elif CDS_BUILD_BITS == 64
1024 static const unsigned int c_nMaxCredits = c_nAlignment - 1 ; // 0x003F;
1026 typedef cds::details::marked_ptr<superblock_desc, c_nMaxCredits> marked_desc_ptr;
1027 marked_desc_ptr pDesc;
1030 active_tag() CDS_NOEXCEPT
1033 // Clang 3.1: error: first argument to atomic operation must be a pointer to a trivially-copyable type
1034 //active_tag() CDS_NOEXCEPT = default;
1035 active_tag( active_tag const& ) CDS_NOEXCEPT = default;
1036 ~active_tag() CDS_NOEXCEPT = default;
1037 active_tag& operator=(active_tag const&) CDS_NOEXCEPT = default;
1038 # if !defined(CDS_DISABLE_DEFAULT_MOVE_CTOR)
1039 active_tag( active_tag&& ) CDS_NOEXCEPT = default;
1040 active_tag& operator=(active_tag&&) CDS_NOEXCEPT = default;
1042 superblock_desc * ptr() const
1047 void ptr( superblock_desc * p )
1049 assert( (reinterpret_cast<uintptr_t>(p) & c_nMaxCredits) == 0 );
1050 pDesc = marked_desc_ptr( p, pDesc.bits());
1053 unsigned int credits()
1055 return (unsigned int) pDesc.bits();
1058 void credits( unsigned int n )
1060 assert( n <= c_nMaxCredits );
1061 pDesc = marked_desc_ptr( pDesc.ptr(), n );
1066 pDesc = marked_desc_ptr();
1069 void set( superblock_desc * pSB, unsigned int n )
1071 assert( (reinterpret_cast<uintptr_t>(pSB) & c_nMaxCredits) == 0 );
1072 pDesc = marked_desc_ptr( pSB, n );
1078 # error "Unexpected value of CDS_BUILD_BITS"
1079 #endif // CDS_BUILD_BITS
1083 struct processor_heap_base
1085 CDS_DATA_ALIGNMENT(8) atomics::atomic<active_tag> active; ///< pointer to the descriptor of active superblock owned by processor heap
1086 processor_desc * pProcDesc ; ///< pointer to parent processor descriptor
1087 const size_class * pSizeClass ; ///< pointer to size class
1088 atomics::atomic<superblock_desc *> pPartial ; ///< pointer to partial filled superblock (may be \p nullptr)
1089 partial_list partialList ; ///< list of partial filled superblocks owned by the processor heap
1090 unsigned int nPageIdx ; ///< page size-class index, \ref c_nPageSelfAllocation - "small page"
1092 /// Small page marker
1094 If page is small and can be allocated by the Heap, the \p nPageIdx value is \p c_nPageSelfAllocation.
1095 This optimization allows to allocate system memory more regularly, in blocks of 1M that leads
1096 to less memory fragmentation.
1098 static const unsigned int c_nPageSelfAllocation = (unsigned int) -1;
1100 procheap_stat stat ; ///< heap statistics
1101 //processor_heap_statistics stat;
1104 processor_heap_base() CDS_NOEXCEPT
1105 : pProcDesc( nullptr )
1106 , pSizeClass( nullptr )
1107 , pPartial( nullptr )
1109 assert( (reinterpret_cast<uintptr_t>(this) & (c_nAlignment - 1)) == 0 );
1113 /// Get partial superblock owned by the processor heap
1114 superblock_desc * get_partial()
1116 superblock_desc * pDesc = pPartial.load(atomics::memory_order_acquire);
1119 pDesc = partialList.pop();
1122 } while ( !pPartial.compare_exchange_weak( pDesc, nullptr, atomics::memory_order_release, atomics::memory_order_relaxed ) );
1124 //assert( pDesc == nullptr || free_desc_list<superblock_desc>::node_algorithms::inited( static_cast<sb_free_list_hook *>(pDesc) ));
1125 //assert( pDesc == nullptr || partial_desc_list<superblock_desc>::node_algorithms::inited( static_cast<sb_partial_list_hook *>(pDesc) ) );
1129 /// Add partial superblock \p pDesc to the list
1130 void add_partial( superblock_desc * pDesc )
1132 assert( pPartial != pDesc );
1133 //assert( partial_desc_list<superblock_desc>::node_algorithms::inited( static_cast<sb_partial_list_hook *>(pDesc) ) );
1135 superblock_desc * pCur = nullptr;
1136 if ( !pPartial.compare_exchange_strong(pCur, pDesc, atomics::memory_order_acq_rel, atomics::memory_order_relaxed) )
1137 partialList.push( pDesc );
1141 /// Remove superblock \p pDesc from the list of partial superblock
1142 bool unlink_partial( superblock_desc * pDesc )
1144 return partialList.unlink( pDesc );
1148 /// Aligned superblock descriptor
1149 typedef typename cds::details::type_padding<processor_heap_base, c_nAlignment>::type processor_heap;
1151 /// Processor descriptor
1152 struct processor_desc
1154 processor_heap * arrProcHeap ; ///< array of processor heap
1155 free_list listSBDescFree ; ///< List of free superblock descriptors
1156 page_heap * pageHeaps ; ///< array of page heap (one for each page size)
1160 : arrProcHeap( nullptr )
1161 , pageHeaps( nullptr )
1168 sys_topology m_Topology ; ///< System topology
1169 system_heap m_LargeHeap ; ///< Heap for large block
1170 aligned_heap m_AlignedHeap ; ///< Internal aligned heap
1171 sizeclass_selector m_SizeClassSelector ; ///< Size-class selector
1172 atomics::atomic<processor_desc *> * m_arrProcDesc ; ///< array of pointers to the processor descriptors
1173 unsigned int m_nProcessorCount ; ///< Processor count
1174 bound_checker m_BoundChecker ; ///< Bound checker
1176 os_allocated_stat m_OSAllocStat ; ///< OS-allocated memory statistics
1181 /// Allocates large block from system memory
1182 block_header * alloc_from_OS( size_t nSize )
1184 block_header * p = reinterpret_cast<block_header *>( m_LargeHeap.alloc( nSize ) );
1185 m_OSAllocStat.incBytesAllocated( nSize );
1186 p->setOSAllocated( nSize );
1190 /// Allocates from the active superblock if it possible
1191 block_header * alloc_from_active( processor_heap * pProcHeap )
1193 active_tag oldActive;
1194 int nCollision = -1;
1199 oldActive = pProcHeap->active.load(atomics::memory_order_acquire);
1200 if ( !oldActive.ptr() )
1202 unsigned int nCredits = oldActive.credits();
1203 active_tag newActive ; // default = 0
1204 if ( nCredits != 0 ) {
1205 newActive = oldActive;
1206 newActive.credits( nCredits - 1 );
1208 if ( pProcHeap->active.compare_exchange_strong( oldActive, newActive, atomics::memory_order_release, atomics::memory_order_relaxed ))
1213 pProcHeap->stat.incActiveDescCASFailureCount( nCollision );
1216 superblock_desc * pDesc = oldActive.ptr();
1218 anchor_tag oldAnchor;
1219 anchor_tag newAnchor;
1221 unsigned int nMoreCredits = 0;
1226 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1228 assert( oldAnchor.avail < pDesc->nCapacity );
1229 CDS_TSAN_ANNOTATE_IGNORE_READS_BEGIN;
1230 pAddr = pDesc->pSB + oldAnchor.avail * (unsigned long long) pDesc->nBlockSize;
1231 newAnchor.avail = reinterpret_cast<free_block_header *>( pAddr )->nNextFree;
1232 CDS_TSAN_ANNOTATE_IGNORE_READS_END;
1235 if ( oldActive.credits() == 0 ) {
1236 // state must be ACTIVE
1237 if ( oldAnchor.count == 0 )
1238 newAnchor.state = SBSTATE_FULL;
1240 nMoreCredits = oldAnchor.count < active_tag::c_nMaxCredits ? ((unsigned int) oldAnchor.count) : active_tag::c_nMaxCredits;
1241 newAnchor.count -= nMoreCredits;
1244 } while ( !pDesc->anchor.compare_exchange_strong( oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed ));
1247 pProcHeap->stat.incActiveAnchorCASFailureCount( nCollision );
1249 assert( newAnchor.state != SBSTATE_EMPTY );
1251 if ( newAnchor.state == SBSTATE_FULL )
1252 pProcHeap->stat.incDescFull();
1253 if ( oldActive.credits() == 0 && oldAnchor.count > 0 )
1254 update_active( pProcHeap, pDesc, nMoreCredits );
1256 pProcHeap->stat.incAllocFromActive();
1258 // block_header fields is not needed to setup
1259 // It was set in alloc_from_new_superblock
1260 assert( reinterpret_cast<block_header *>( pAddr )->desc() == pDesc );
1261 assert( !reinterpret_cast<block_header *>( pAddr )->isOSAllocated() );
1262 assert( !reinterpret_cast<block_header *>( pAddr )->isAligned() );
1264 return reinterpret_cast<block_header *>( pAddr );
1267 /// Allocates from a partial filled superblock if it possible
1268 block_header * alloc_from_partial( processor_heap * pProcHeap )
1271 superblock_desc * pDesc = pProcHeap->get_partial();
1276 anchor_tag oldAnchor;
1277 anchor_tag newAnchor;
1279 unsigned int nMoreCredits = 0;
1281 int nCollision = -1;
1285 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1286 if ( oldAnchor.state == SBSTATE_EMPTY ) {
1287 free_superblock( pDesc );
1291 nMoreCredits = ((unsigned int)(oldAnchor.count - 1)) < active_tag::c_nMaxCredits ? (unsigned int)(oldAnchor.count - 1) : active_tag::c_nMaxCredits;
1292 newAnchor.count -= nMoreCredits + 1;
1293 newAnchor.state = (nMoreCredits > 0) ? SBSTATE_ACTIVE : SBSTATE_FULL;
1295 } while ( !pDesc->anchor.compare_exchange_strong(oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed) );
1298 pProcHeap->stat.incPartialDescCASFailureCount( nCollision );
1300 if ( newAnchor.state == SBSTATE_FULL )
1301 pProcHeap->stat.incDescFull();
1303 // Now, the thread is guaranteed to have reserved one or more blocks
1304 // pop reserved block
1310 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1312 assert( oldAnchor.avail < pDesc->nCapacity );
1313 CDS_TSAN_ANNOTATE_IGNORE_READS_BEGIN;
1314 pAddr = pDesc->pSB + oldAnchor.avail * pDesc->nBlockSize;
1315 newAnchor.avail = reinterpret_cast<free_block_header *>( pAddr )->nNextFree;
1316 CDS_TSAN_ANNOTATE_IGNORE_READS_END;
1318 } while ( !pDesc->anchor.compare_exchange_strong(oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed) );
1321 pProcHeap->stat.incPartialAnchorCASFailureCount( nCollision );
1323 assert( newAnchor.state != SBSTATE_EMPTY );
1325 pProcHeap->stat.incAllocFromPartial();
1327 if ( nMoreCredits > 0 )
1328 update_active( pProcHeap, pDesc, nMoreCredits );
1330 // block_header fields is not needed to setup
1331 // It was set in alloc_from_new_superblock
1332 assert( reinterpret_cast<block_header *>( pAddr )->desc() == pDesc );
1333 assert( !reinterpret_cast<block_header *>( pAddr )->isAligned() );
1334 assert( !reinterpret_cast<block_header *>( pAddr )->isOSAllocated() );
1336 return reinterpret_cast<block_header *>( pAddr );
1339 /// Allocates from the new superblock
1340 block_header * alloc_from_new_superblock( processor_heap * pProcHeap )
1342 superblock_desc * pDesc = new_superblock_desc( pProcHeap );
1343 assert( pDesc != nullptr );
1344 pDesc->pSB = new_superblock_buffer( pProcHeap );
1346 anchor_tag anchor = pDesc->anchor.load(atomics::memory_order_relaxed);
1349 // Make single-linked list of free blocks in superblock
1350 byte * pEnd = pDesc->pSB + pDesc->nCapacity * pDesc->nBlockSize;
1351 unsigned int nNext = 0;
1352 const unsigned int nBlockSize = pDesc->nBlockSize;
1353 CDS_TSAN_ANNOTATE_IGNORE_WRITES_BEGIN;
1354 for ( byte * p = pDesc->pSB; p < pEnd; p += nBlockSize ) {
1355 reinterpret_cast<block_header *>( p )->set( pDesc, 0 );
1356 reinterpret_cast<free_block_header *>( p )->nNextFree = ++nNext;
1358 reinterpret_cast<free_block_header *>( pEnd - nBlockSize )->nNextFree = 0;
1359 CDS_TSAN_ANNOTATE_IGNORE_WRITES_END;
1361 active_tag newActive;
1362 newActive.set( pDesc, ( (pDesc->nCapacity - 1 < active_tag::c_nMaxCredits) ? pDesc->nCapacity - 1 : active_tag::c_nMaxCredits ) - 1 );
1364 anchor.count = pDesc->nCapacity - 1 - (newActive.credits() + 1);
1365 anchor.state = SBSTATE_ACTIVE;
1366 pDesc->anchor.store(anchor, atomics::memory_order_relaxed);
1368 active_tag curActive;
1369 if ( pProcHeap->active.compare_exchange_strong( curActive, newActive, atomics::memory_order_release, atomics::memory_order_relaxed )) {
1370 pProcHeap->stat.incAllocFromNew();
1371 //reinterpret_cast<block_header *>( pDesc->pSB )->set( pDesc, 0 );
1372 return reinterpret_cast<block_header *>( pDesc->pSB );
1375 free_superblock( pDesc );
1379 /// Find appropriate processor heap based on size-class selected
1380 processor_heap * find_heap( typename sizeclass_selector::sizeclass_index nSizeClassIndex )
1382 assert( nSizeClassIndex < m_SizeClassSelector.size() );
1384 unsigned int nProcessorId = m_Topology.current_processor();
1385 assert( nProcessorId < m_nProcessorCount );
1387 if ( nProcessorId >= m_nProcessorCount )
1390 processor_desc * pDesc = m_arrProcDesc[ nProcessorId ].load( atomics::memory_order_relaxed );
1393 processor_desc * pNewDesc = new_processor_desc( nProcessorId );
1394 if ( m_arrProcDesc[nProcessorId].compare_exchange_strong( pDesc, pNewDesc, atomics::memory_order_release, atomics::memory_order_relaxed ) ) {
1398 free_processor_desc( pNewDesc );
1401 return &( pDesc->arrProcHeap[ nSizeClassIndex ] );
1404 /// Updates active field of processor heap \p pProcHeap
1405 void update_active( processor_heap * pProcHeap, superblock_desc * pDesc, unsigned int nCredits )
1407 assert( pProcHeap == pDesc->pProcHeap );
1409 active_tag nullActive;
1410 active_tag newActive;
1411 newActive.set( pDesc, nCredits - 1 );
1413 if ( pProcHeap->active.compare_exchange_strong( nullActive, newActive, atomics::memory_order_seq_cst, atomics::memory_order_relaxed ) )
1416 // Someone installed another active superblock.
1417 // Return credits to superblock and make it partial
1419 anchor_tag oldAnchor;
1420 anchor_tag newAnchor;
1423 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1424 newAnchor.count += nCredits;
1425 newAnchor.state = SBSTATE_PARTIAL;
1426 } while ( !pDesc->anchor.compare_exchange_weak( oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed ));
1428 pDesc->pProcHeap->add_partial( pDesc );
1431 /// Allocates new processor descriptor
1432 processor_desc * new_processor_desc( unsigned int nProcessorId )
1434 CDS_UNUSED( nProcessorId );
1435 processor_desc * pDesc;
1436 const size_t nPageHeapCount = m_SizeClassSelector.pageTypeCount();
1439 Processor descriptor layout
1441 proc_desc - 64-byte alignment
1442 page_heap[0] 64-byte alignment
1443 page_heap[1] 64-byte alignment
1445 page_heap[P] 64-byte alignment
1447 proc_heap[0] 64-byte alignment
1448 proc_heap[1] 64-byte alignment
1450 proc_heap[N] 64-byte alignment
1453 const size_t szDesc =
1454 ( sizeof(processor_desc)
1455 + sizeof(pDesc->pageHeaps[0]) * nPageHeapCount
1460 const size_t szTotal = szDesc * c_nAlignment + sizeof(processor_heap) * m_SizeClassSelector.size();
1462 static_assert( (sizeof(processor_heap) % c_nAlignment) == 0, "sizeof(processor_heap) error" );
1464 // TSan false positive: a new descriptor will be linked further with release fence
1465 CDS_TSAN_ANNOTATE_IGNORE_WRITES_BEGIN;
1467 pDesc = new( m_AlignedHeap.alloc( szTotal, c_nAlignment ) ) processor_desc;
1469 pDesc->pageHeaps = reinterpret_cast<page_heap *>( pDesc + 1 );
1470 for ( size_t i = 0; i < nPageHeapCount; ++i )
1471 new (pDesc->pageHeaps + i) page_heap( m_SizeClassSelector.page_size(i));
1473 // initialize processor heaps
1474 pDesc->arrProcHeap =
1475 reinterpret_cast<processor_heap *>(
1476 reinterpret_cast<uintptr_t>(reinterpret_cast<byte *>(pDesc + 1) + sizeof(pDesc->pageHeaps[0]) * nPageHeapCount + c_nAlignment - 1)
1477 & ~(uintptr_t(c_nAlignment) - 1)
1480 processor_heap * pProcHeap = pDesc->arrProcHeap;
1481 processor_heap * pProcHeapEnd = pDesc->arrProcHeap + m_SizeClassSelector.size();
1482 for ( unsigned int i = 0; pProcHeap != pProcHeapEnd; ++pProcHeap, ++i ) {
1483 new (pProcHeap) processor_heap();
1484 pProcHeap->pProcDesc = pDesc;
1485 pProcHeap->pSizeClass = m_SizeClassSelector.at(i);
1486 if ( m_SizeClassSelector.find( pProcHeap->pSizeClass->nSBSize ) != sizeclass_selector::c_nNoSizeClass )
1487 pProcHeap->nPageIdx = processor_heap::c_nPageSelfAllocation;
1489 pProcHeap->nPageIdx = pProcHeap->pSizeClass->nSBSizeIdx;
1491 CDS_TSAN_ANNOTATE_IGNORE_WRITES_END;
1496 void free_processor_heap( processor_heap * pProcHeap )
1498 if ( pProcHeap->nPageIdx == processor_heap::c_nPageSelfAllocation ) {
1499 superblock_desc * pDesc;
1501 for ( pDesc = pProcHeap->partialList.pop(); pDesc; pDesc = pProcHeap->partialList.pop()) {
1503 m_AlignedHeap.free( pDesc );
1506 superblock_desc * pPartial = pProcHeap->pPartial.load(atomics::memory_order_relaxed);
1508 free( pPartial->pSB );
1509 m_AlignedHeap.free( pPartial );
1512 pDesc = pProcHeap->active.load(atomics::memory_order_relaxed).ptr();
1515 m_AlignedHeap.free( pDesc );
1519 page_heap& pageHeap = pProcHeap->pProcDesc->pageHeaps[pProcHeap->nPageIdx];
1520 superblock_desc * pDesc;
1522 for ( pDesc = pProcHeap->partialList.pop(); pDesc; pDesc = pProcHeap->partialList.pop()) {
1523 pageHeap.free( pDesc->pSB );
1524 m_AlignedHeap.free( pDesc );
1527 superblock_desc * pPartial = pProcHeap->pPartial.load(atomics::memory_order_relaxed);
1529 pageHeap.free( pPartial->pSB );
1530 m_AlignedHeap.free( pPartial );
1533 pDesc = pProcHeap->active.load(atomics::memory_order_relaxed).ptr();
1535 pageHeap.free( pDesc->pSB );
1536 m_AlignedHeap.free( pDesc );
1539 pProcHeap->~processor_heap();
1542 /// Frees processor descriptor
1543 void free_processor_desc( processor_desc * pDesc )
1545 const size_t nPageHeapCount = m_SizeClassSelector.pageTypeCount();
1547 for (unsigned int j = 0; j < m_SizeClassSelector.size(); ++j )
1548 free_processor_heap( pDesc->arrProcHeap + j );
1550 for ( superblock_desc * pSBDesc = pDesc->listSBDescFree.pop(); pSBDesc; pSBDesc = pDesc->listSBDescFree.pop())
1551 m_AlignedHeap.free( pSBDesc );
1553 for (size_t i = 0; i < nPageHeapCount; ++i )
1554 (pDesc->pageHeaps + i)->page_heap::~page_heap();
1556 //m_IntHeap.free( pDesc->pageHeaps );
1557 pDesc->pageHeaps = nullptr;
1559 pDesc->processor_desc::~processor_desc();
1560 m_AlignedHeap.free( pDesc );
1563 /// Allocates new superblock descriptor
1564 superblock_desc * new_superblock_desc( processor_heap * pProcHeap )
1567 superblock_desc * pDesc = pProcHeap->pProcDesc->listSBDescFree.pop();
1568 if ( pDesc == nullptr ) {
1569 pDesc = new( m_AlignedHeap.alloc(sizeof(superblock_desc), c_nAlignment ) ) superblock_desc;
1570 assert( (uintptr_t(pDesc) & (c_nAlignment - 1)) == 0 );
1572 anchor = pDesc->anchor.load( atomics::memory_order_relaxed );
1574 pDesc->anchor.store( anchor, atomics::memory_order_relaxed );
1576 pProcHeap->stat.incDescAllocCount();
1578 pDesc->nBlockSize = pProcHeap->pSizeClass->nBlockSize;
1579 pDesc->nCapacity = pProcHeap->pSizeClass->nCapacity;
1580 assert( pDesc->nCapacity <= c_nMaxBlockInSuperBlock );
1581 pDesc->pProcHeap = pProcHeap;
1583 anchor = pDesc->anchor.load( atomics::memory_order_relaxed );
1585 pDesc->anchor.store( anchor, atomics::memory_order_relaxed );
1590 /// Allocates superblock page
1591 byte * new_superblock_buffer( processor_heap * pProcHeap )
1593 pProcHeap->stat.incBlockAllocated();
1594 if ( pProcHeap->nPageIdx == processor_heap::c_nPageSelfAllocation ) {
1595 return (byte *) alloc( pProcHeap->pSizeClass->nSBSize );
1598 return (byte *) pProcHeap->pProcDesc->pageHeaps[pProcHeap->nPageIdx].alloc();
1602 /// Frees superblock descriptor and its page
1603 void free_superblock( superblock_desc * pDesc )
1605 pDesc->pProcHeap->stat.incBlockDeallocated();
1606 processor_desc * pProcDesc = pDesc->pProcHeap->pProcDesc;
1608 if ( pDesc->pProcHeap->nPageIdx == processor_heap::c_nPageSelfAllocation ) {
1612 pProcDesc->pageHeaps[pDesc->pProcHeap->nPageIdx].free( pDesc->pSB );
1615 pProcDesc->listSBDescFree.push( pDesc );
1618 /// Allocate memory block
1619 block_header * int_alloc(
1620 size_t nSize ///< Size of memory block to allocate in bytes
1623 typename sizeclass_selector::sizeclass_index nSizeClassIndex = m_SizeClassSelector.find( nSize );
1624 if ( nSizeClassIndex == sizeclass_selector::c_nNoSizeClass ) {
1625 return alloc_from_OS( nSize );
1627 assert( nSizeClassIndex < m_SizeClassSelector.size() );
1629 block_header * pBlock;
1630 processor_heap * pProcHeap;
1632 pProcHeap = find_heap( nSizeClassIndex );
1634 return alloc_from_OS( nSize );
1636 if ( (pBlock = alloc_from_active( pProcHeap )) != nullptr )
1638 if ( (pBlock = alloc_from_partial( pProcHeap )) != nullptr )
1640 if ( (pBlock = alloc_from_new_superblock( pProcHeap )) != nullptr )
1644 pProcHeap->stat.incAllocatedBytes( pProcHeap->pSizeClass->nBlockSize );
1646 assert( pBlock != nullptr );
1652 /// Heap constructor
1655 // Explicit libcds initialization is needed since a static object may be constructed
1658 m_nProcessorCount = m_Topology.processor_count();
1659 m_arrProcDesc = new( m_AlignedHeap.alloc(sizeof(processor_desc *) * m_nProcessorCount, c_nAlignment ))
1660 atomics::atomic<processor_desc *>[ m_nProcessorCount ];
1661 memset( m_arrProcDesc, 0, sizeof(processor_desc *) * m_nProcessorCount ) ; // ?? memset for atomic<>
1666 The destructor frees all memory allocated by the heap.
1670 for ( unsigned int i = 0; i < m_nProcessorCount; ++i ) {
1671 processor_desc * pDesc = m_arrProcDesc[i].load(atomics::memory_order_relaxed);
1673 free_processor_desc( pDesc );
1676 m_AlignedHeap.free( m_arrProcDesc );
1678 // Explicit termination of libcds
1682 /// Allocate memory block
1684 size_t nSize ///< Size of memory block to allocate in bytes
1687 block_header * pBlock = int_alloc( nSize + sizeof(block_header) + bound_checker::trailer_size );
1689 // Bound checking is only for our blocks
1690 if ( !pBlock->isOSAllocated() ) {
1691 // the block is allocated from our heap - bound checker is applicable
1692 m_BoundChecker.make_trailer(
1693 reinterpret_cast<byte *>(pBlock + 1),
1694 reinterpret_cast<byte *>(pBlock) + pBlock->desc()->nBlockSize,
1702 /// Free previously allocated memory block
1704 void * pMemory ///< Pointer to memory block to free
1710 block_header * pRedirect = (reinterpret_cast<block_header *>( pMemory ) - 1);
1711 block_header * pBlock = pRedirect->begin();
1713 if ( pBlock->isOSAllocated() ) {
1714 // Block has been allocated from OS
1715 m_OSAllocStat.incBytesDeallocated( pBlock->getOSAllocSize() );
1716 m_LargeHeap.free( pBlock );
1720 assert( !pBlock->isAligned() );
1721 superblock_desc * pDesc = pBlock->desc();
1723 m_BoundChecker.check_bounds(
1725 reinterpret_cast<byte *>( pBlock ) + pDesc->nBlockSize,
1730 anchor_tag oldAnchor;
1731 anchor_tag newAnchor;
1732 processor_heap_base * pProcHeap = pDesc->pProcHeap;
1734 pProcHeap->stat.incDeallocatedBytes( pDesc->nBlockSize );
1736 oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1738 newAnchor = oldAnchor;
1739 reinterpret_cast<free_block_header *>( pBlock )->nNextFree = oldAnchor.avail;
1740 newAnchor.avail = (reinterpret_cast<byte *>( pBlock ) - pDesc->pSB) / pDesc->nBlockSize;
1743 assert( oldAnchor.state != SBSTATE_EMPTY );
1745 if ( oldAnchor.state == SBSTATE_FULL )
1746 newAnchor.state = SBSTATE_PARTIAL;
1748 if ( oldAnchor.count == pDesc->nCapacity - 1 ) {
1749 //pProcHeap = pDesc->pProcHeap;
1750 //CDS_COMPILER_RW_BARRIER ; // instruction fence is needed?..
1751 newAnchor.state = SBSTATE_EMPTY;
1754 newAnchor.count += 1;
1755 } while ( !pDesc->anchor.compare_exchange_strong( oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed ) );
1757 pProcHeap->stat.incFreeCount();
1759 if ( newAnchor.state == SBSTATE_EMPTY ) {
1760 if ( pProcHeap->unlink_partial( pDesc ))
1761 free_superblock( pDesc );
1763 else if (oldAnchor.state == SBSTATE_FULL ) {
1764 assert( pProcHeap != nullptr );
1765 pProcHeap->stat.decDescFull();
1766 pProcHeap->add_partial( pDesc );
1770 /// Reallocate memory block
1772 If \p nNewSize is zero, then the block pointed to by \p pMemory is freed;
1773 the return value is \p nullptr, and \p pMemory is left pointing at a freed block.
1775 If there is not enough available memory to expand the block to the given size,
1776 the original block is left unchanged, and \p nullptr is returned.
1778 Aligned memory block cannot be realloc'ed: if \p pMemory has been allocated by \ref alloc_aligned,
1779 then the return value is \p nullptr and the original block is left unchanged.
1782 void * pMemory, ///< Pointer to previously allocated memory block
1783 size_t nNewSize ///< New size of memory block, in bytes
1786 if ( nNewSize == 0 ) {
1791 const size_t nOrigSize = nNewSize;
1792 nNewSize += sizeof(block_header) + bound_checker::trailer_size;
1794 block_header * pBlock = reinterpret_cast<block_header *>( pMemory ) - 1;
1796 // Reallocation of aligned block is not possible
1797 if ( pBlock->isAligned() ) {
1802 if ( pBlock->isOSAllocated() ) {
1803 // The block has been allocated from OS
1804 size_t nCurSize = pBlock->getOSAllocSize();
1806 if ( nCurSize >= nNewSize )
1810 void * pNewBuf = alloc( nOrigSize );
1812 memcpy( pNewBuf, pMemory, nCurSize - sizeof(block_header) );
1818 superblock_desc * pDesc = pBlock->desc();
1819 if ( pDesc->nBlockSize <= nNewSize ) {
1820 // In-place reallocation
1821 m_BoundChecker.make_trailer(
1822 reinterpret_cast<byte *>(pBlock + 1),
1823 reinterpret_cast<byte *>(pBlock) + pBlock->desc()->nBlockSize,
1830 void * pNew = alloc( nNewSize );
1832 memcpy( pNew, pMemory, pDesc->nBlockSize - sizeof(block_header) );
1840 /// Allocate aligned memory block
1841 void * alloc_aligned(
1842 size_t nSize, ///< Size of memory block to allocate in bytes
1843 size_t nAlignment ///< Alignment
1846 if ( nAlignment <= c_nDefaultBlockAlignment ) {
1847 void * p = alloc( nSize );
1848 assert( (reinterpret_cast<uintptr_t>(p) & (nAlignment - 1)) == 0 );
1852 block_header * pBlock = int_alloc( nSize + nAlignment + sizeof(block_header) + bound_checker::trailer_size );
1854 block_header * pRedirect;
1855 if ( (reinterpret_cast<uintptr_t>( pBlock + 1) & (nAlignment - 1)) != 0 ) {
1856 pRedirect = reinterpret_cast<block_header *>( (reinterpret_cast<uintptr_t>( pBlock ) & ~(nAlignment - 1)) + nAlignment ) - 1;
1857 assert( pRedirect != pBlock );
1858 pRedirect->set( reinterpret_cast<superblock_desc *>(pBlock), 1 );
1860 assert( (reinterpret_cast<uintptr_t>(pRedirect + 1) & (nAlignment - 1)) == 0 );
1866 // Bound checking is only for our blocks
1867 if ( !pBlock->isOSAllocated() ) {
1868 // the block is allocated from our heap - bound checker is applicable
1869 m_BoundChecker.make_trailer(
1870 reinterpret_cast<byte *>(pRedirect + 1),
1871 reinterpret_cast<byte *>(pBlock) + pBlock->desc()->nBlockSize,
1876 return pRedirect + 1;
1879 /// Free aligned memory block previously allocated by \ref alloc_aligned
1881 void * pMemory ///< Pointer to memory block to free
1889 /// Get instant summary statistics
1890 void summaryStat( summary_stat& st )
1892 size_t nProcHeapCount = m_SizeClassSelector.size();
1893 for ( unsigned int nProcessor = 0; nProcessor < m_nProcessorCount; ++nProcessor ) {
1894 processor_desc * pProcDesc = m_arrProcDesc[nProcessor].load(atomics::memory_order_relaxed);
1896 for ( unsigned int i = 0; i < nProcHeapCount; ++i ) {
1897 processor_heap_base * pProcHeap = pProcDesc->arrProcHeap + i;
1899 st.add_procheap_stat( pProcHeap->stat );
1905 st.add_heap_stat( m_OSAllocStat );
1909 }}} // namespace cds::memory::michael
1911 #endif // CDSLIB_MEMORY_MICHAEL_ALLOCATOR_TMPL_H