3 #ifndef __CDS_MEMORY_MICHAEL_ALLOCATOR_TMPL_H
4 #define __CDS_MEMORY_MICHAEL_ALLOCATOR_TMPL_H
7 Michael allocator implementation
9 [2004] Maged Michael "Scalable Lock-Free Dynamic Memory Allocation"
12 2011.09.07 khizmax Optimization: small page (about 64K) is allocated by Heap::alloc call.
13 This optimization allows to allocate system memory more regularly,
14 in blocks of 1M that leads to less memory fragmentation.
15 2011.01.02 khizmax Created
19 #include <mutex> // unique_lock
21 #include <cds/memory/michael/options.h>
22 #include <cds/memory/michael/bound_check.h>
23 #include <cds/memory/michael/procheap_stat.h>
24 #include <cds/memory/michael/osalloc_stat.h>
26 #include <cds/os/topology.h>
27 #include <cds/os/alloc_aligned.h>
28 #include <cds/sync/spinlock.h>
29 #include <cds/details/type_padding.h>
30 #include <cds/details/marked_ptr.h>
31 #include <cds/container/vyukov_mpmc_cycle_queue.h>
32 #include <cds/user_setup/cache_line.h>
33 #include <cds/details/lib.h>
35 #include <boost/intrusive/list.hpp>
38 /// Memory-related algorithms: allocators etc.
40 /// Michael's allocator (class Heap)
43 \li [2004] M.Michael "Scalable Lock-free Dynamic Memory Allocation"
45 This namespace declares the main class Heap and a lot of helper classes.
51 unsigned int nBlockSize ; ///< block size in bytes
52 unsigned int nSBSize ; ///< superblock size (64K or 1M)
53 unsigned int nCapacity ; ///< superblock capacity (nSBSize / nBlockSize)
54 unsigned int nSBSizeIdx ; ///< internal superblock size index (page index)
57 /// %Heap based on system \p malloc and \p free functions
60 /// Allocates memory block of \p nSize bytes (\p malloc wrapper)
61 static void * alloc( size_t nSize )
63 return ::malloc( nSize );
65 /// Returning memory block to the system (\p free wrapper)
66 static void free( void * p )
72 /// %Heap based on system provided aligned \p malloc and \p free functions
73 struct aligned_malloc_heap
75 /// Allocates aligned memory block of \p nSize bytes with alignment \p nAlignment
76 static void * alloc( size_t nSize, size_t nAlignment )
78 return cds::OS::aligned_malloc( nSize, nAlignment );
80 /// Frees aligned memory block \p p that has been previosly allocated by \ref alloc call
81 static void free( void * p )
83 cds::OS::aligned_free( p );
87 /// Page heap based on \p Heap
89 Page heap can allocate memory by page-sized block only.
90 \p Heap may be any heap that provides interface like \ref malloc_heap.
92 This class is one of available implementation of opt::page_heap option.
94 template <class Heap = malloc_heap>
95 class page_allocator: public Heap
98 typedef Heap base_class;
105 size_t nPageSize ///< page size in bytes
107 : m_nPageSize( nPageSize )
110 /// Allocate new page
113 return base_class::alloc( m_nPageSize );
116 /// Free page \p pPage
117 void free( void * pPage )
119 base_class::free( pPage );
123 /// Page cacheable heap
125 To improve performance this allocator maintains small list of free pages.
126 Page heap can allocate memory by page-sized block only.
129 \li \p FreeListCapacity - capacity of free-list, default value is 64 page
130 \li \p Heap may be any heap that provides interface like \ref malloc_heap.
132 This class is one of available implementation of opt::page_heap option.
134 template <size_t FreeListCapacity = 64, class Heap = malloc_heap>
135 class page_cached_allocator: public page_allocator<Heap>
138 typedef page_allocator<Heap> base_class;
141 struct make_null_ptr {
142 void operator ()(void *& p)
148 struct free_list_traits : public cds::container::vyukov_queue::traits
150 typedef opt::v::static_buffer<void *, FreeListCapacity> buffer;
152 typedef make_null_ptr value_cleaner;
155 typedef container::VyukovMPMCCycleQueue< void *, free_list_traits > free_list;
157 free_list m_FreeList;
162 page_cached_allocator(
163 size_t nPageSize ///< page size in bytes
165 : base_class( nPageSize )
166 , m_FreeList( FreeListCapacity )
170 ~page_cached_allocator()
173 while ( m_FreeList.pop(pPage) )
174 base_class::free( pPage );
178 /// Allocate new page
182 if ( !m_FreeList.pop( pPage ) )
183 pPage = base_class::alloc();
187 /// Free page \p pPage
188 void free( void * pPage )
190 if ( !m_FreeList.push( pPage ))
191 base_class::free( pPage );
195 /// Implementation of opt::sizeclass_selector option
197 Default size-class selector can manage memory blocks up to 64K.
199 class CDS_EXPORT_API default_sizeclass_selector
202 /// Count of different size-classes
203 static const size_t c_nSizeClassCount = 63;
206 static const size_t c_nMaxBlockSize = 64 * 1024;
208 /// Page size of type 0 (64K)
209 static const unsigned int c_nPage64K = 64 * 1024 - 32;
211 /// Page size of type 1 (1M)
212 static const unsigned int c_nPage1M = 1024 * 1024;
214 static CDS_DATA_ALIGNMENT(128) unsigned int const m_szClassBounds[c_nSizeClassCount];
215 static size_class const m_szClass[c_nSizeClassCount];
216 static unsigned char const m_szClassMap[];
219 /// Type of size-class index
220 typedef unsigned int sizeclass_index;
223 default_sizeclass_selector();
226 /// "No size class" index
227 static const sizeclass_index c_nNoSizeClass = (unsigned int) (0 - 1);
229 /// Returns size-class count
230 static sizeclass_index size()
232 return c_nSizeClassCount;
235 /// Returns page size in bytes for given page type \p nPageType
236 static size_t page_size(size_t nPageType )
244 assert(false) ; // anything forgotten?..
249 /// Returns count of page size-class
251 This class supports pages of two types: 64K page for small objects and 1M page for other objects.
253 static size_t pageTypeCount()
258 /// Returns size-class index for \p nSize
260 For large blocks that cannot be allocated by Michael's allocator
261 the function must return -1.
263 static sizeclass_index find( size_t nSize )
265 if ( nSize > c_nMaxBlockSize ) {
266 // Too large block - allocate from system
267 return c_nNoSizeClass;
269 sizeclass_index szClass = m_szClassMap[ (nSize + 15) / 16 ];
270 assert( nSize <= m_szClassBounds[ szClass ] );
271 assert( szClass == 0 || m_szClassBounds[ szClass - 1] < nSize );
276 /// Gets details::size_class struct for size-class index \p nIndex
277 static const size_class * at( sizeclass_index nIndex )
279 assert( nIndex < size() );
280 return m_szClass + nIndex;
286 struct free_list_tag;
287 typedef boost::intrusive::list_base_hook< boost::intrusive::tag< free_list_tag > > free_list_locked_hook;
289 struct partial_list_tag;
290 typedef boost::intrusive::list_base_hook< boost::intrusive::tag< partial_list_tag > > partial_list_locked_hook;
292 struct intrusive_superblock_desc: public free_list_locked_hook, partial_list_locked_hook
297 /// List of free superblock descriptor
299 This class is a implementation of \ref opt::free_list option
301 template <class Lock, class T = details::intrusive_superblock_desc>
302 class free_list_locked: public boost::intrusive::list<T, boost::intrusive::base_hook<details::free_list_locked_hook> >
305 typedef boost::intrusive::list<T, boost::intrusive::base_hook<details::free_list_locked_hook> > base_class;
307 typedef details::free_list_locked_hook item_hook;
308 typedef Lock lock_type;
310 typedef std::unique_lock<lock_type> auto_lock;
312 mutable lock_type m_access;
316 /// Rebinds to other item type \p T2
319 typedef free_list_locked<Lock, T2> other ; ///< rebind result
323 /// Push superblock descriptor to free-list
324 void push( T * pDesc )
326 assert( base_class::node_algorithms::inited( static_cast<item_hook *>(pDesc) ) );
327 auto_lock al(m_access);
328 base_class::push_back( *pDesc );
331 /// Pop superblock descriptor from free-list
334 auto_lock al(m_access);
335 if ( base_class::empty() )
337 T& rDesc = base_class::front();
338 base_class::pop_front();
339 assert( base_class::node_algorithms::inited( static_cast<item_hook *>(&rDesc) ) );
343 /// Returns current count of superblocks in free-list
346 auto_lock al(m_access);
347 return base_class::size();
351 /// List of partial filled superblock descriptor
353 This class is a implementation of \ref opt::partial_list option
355 template <class Lock, class T = details::intrusive_superblock_desc>
356 class partial_list_locked: public boost::intrusive::list<T, boost::intrusive::base_hook<details::partial_list_locked_hook> >
359 typedef boost::intrusive::list<T, boost::intrusive::base_hook<details::partial_list_locked_hook> > base_class;
361 typedef details::partial_list_locked_hook item_hook;
362 typedef Lock lock_type;
364 typedef std::unique_lock<lock_type> auto_lock;
366 mutable lock_type m_access;
370 /// Rebinds to other item type \p T2
373 typedef partial_list_locked<Lock, T2> other ; ///< rebind result
377 /// Push a superblock \p pDesc to the list
378 void push( T * pDesc )
380 auto_lock al( m_access );
381 assert( base_class::node_algorithms::inited( static_cast<item_hook *>(pDesc) ) );
382 base_class::push_back( *pDesc );
385 /// Pop superblock from the list
388 auto_lock al( m_access );
389 if ( base_class::empty() )
391 T& rDesc = base_class::front();
392 base_class::pop_front();
393 assert( base_class::node_algorithms::inited( static_cast<item_hook *>(&rDesc) ) );
397 /// Removes \p pDesc descriptor from the free-list
398 bool unlink( T * pDesc )
400 assert(pDesc != nullptr);
401 auto_lock al( m_access );
402 // !inited(pDesc) is equal to "pDesc is being linked to partial list"
403 if ( !base_class::node_algorithms::inited( static_cast<item_hook *>(pDesc) ) ) {
404 base_class::erase( base_class::iterator_to( *pDesc ) );
410 /// Count of element in the list
413 auto_lock al( m_access );
414 return base_class::size();
418 /// Summary processor heap statistics
420 Summary heap statistics for use with Heap::summaryStat function.
424 size_t nAllocFromActive ; ///< Event count of allocation from active superblock
425 size_t nAllocFromPartial ; ///< Event count of allocation from partial superblock
426 size_t nAllocFromNew ; ///< Event count of allocation from new superblock
427 size_t nFreeCount ; ///< Count of \p free function call
428 size_t nPageAllocCount ; ///< Count of page (superblock) allocated
429 size_t nPageDeallocCount ; ///< Count of page (superblock) deallocated
430 size_t nDescAllocCount ; ///< Count of superblock descriptors
431 size_t nDescFull ; ///< Count of full superblock
432 atomic64u_t nBytesAllocated ; ///< Count of allocated bytes (for heap managed memory blocks)
433 atomic64u_t nBytesDeallocated ; ///< Count of deallocated bytes (for heap managed memory blocks)
435 size_t nSysAllocCount ; ///< Count of \p alloc and \p alloc_aligned function call (for large memory blocks that allocated directly from OS)
436 size_t nSysFreeCount ; ///< Count of \p free and \p free_aligned function call (for large memory blocks that allocated directly from OS)
437 atomic64u_t nSysBytesAllocated ; ///< Count of allocated bytes (for large memory blocks that allocated directly from OS)
438 atomic64_t nSysBytesDeallocated; ///< Count of deallocated bytes (for large memory blocks that allocated directly from OS)
440 // Internal contention indicators
441 /// CAS failure counter for updating active field of active block of \p alloc_from_active Heap internal function
443 Contention indicator. The less value is better
445 size_t nActiveDescCASFailureCount;
446 /// CAS failure counter for updating active field of active block of \p alloc_from_active Heap internal function
448 Contention indicator. The less value is better
450 size_t nActiveAnchorCASFailureCount;
451 /// CAS failure counter for updating anchor field of partial block of \p alloc_from_partial Heap internal function
453 Contention indicator. The less value is better
455 size_t nPartialDescCASFailureCount;
456 /// CAS failure counter for updating anchor field of partial block of \p alloc_from_partial Heap internal function
458 Contention indicator. The less value is better
460 size_t nPartialAnchorCASFailureCount;
464 /// Constructs empty statistics. All counters are zero.
470 /// Difference statistics
472 This operator computes difference between \p *this and \p stat and places the difference to \p this.
475 summary_stat& operator -=( const summary_stat& stat )
477 nAllocFromActive -= stat.nAllocFromActive;
478 nAllocFromPartial -= stat.nAllocFromPartial;
479 nAllocFromNew -= stat.nAllocFromNew;
480 nFreeCount -= stat.nFreeCount;
481 nPageAllocCount -= stat.nPageAllocCount;
482 nPageDeallocCount -= stat.nPageDeallocCount;
483 nDescAllocCount -= stat.nDescAllocCount;
484 nDescFull -= stat.nDescFull;
485 nBytesAllocated -= stat.nBytesAllocated;
486 nBytesDeallocated -= stat.nBytesDeallocated;
488 nSysAllocCount -= stat.nSysAllocCount;
489 nSysFreeCount -= stat.nSysFreeCount;
490 nSysBytesAllocated -= stat.nSysBytesAllocated;
491 nSysBytesDeallocated -= stat.nSysBytesDeallocated;
493 nActiveDescCASFailureCount -= stat.nActiveDescCASFailureCount;
494 nActiveAnchorCASFailureCount -= stat.nActiveAnchorCASFailureCount;
495 nPartialDescCASFailureCount -= stat.nPartialDescCASFailureCount;
496 nPartialAnchorCASFailureCount -= stat.nPartialAnchorCASFailureCount;
501 /// Clears statistics
503 All counters are set to zero.
507 memset( this, 0, sizeof(*this));
511 template <typename Stat>
512 summary_stat& add_procheap_stat( const Stat& stat )
514 nAllocFromActive += stat.allocFromActive();
515 nAllocFromPartial += stat.allocFromPartial();
516 nAllocFromNew += stat.allocFromNew();
517 nFreeCount += stat.freeCount();
518 nPageAllocCount += stat.blockAllocated();
519 nPageDeallocCount += stat.blockDeallocated();
520 nDescAllocCount += stat.descAllocCount();
521 nDescFull += stat.descFull();
522 nBytesAllocated += stat.allocatedBytes();
523 nBytesDeallocated += stat.deallocatedBytes();
525 nActiveDescCASFailureCount += stat.activeDescCASFailureCount();
526 nActiveAnchorCASFailureCount += stat.activeAnchorCASFailureCount();
527 nPartialDescCASFailureCount += stat.partialDescCASFailureCount();
528 nPartialAnchorCASFailureCount += stat.partialAnchorCASFailureCount();
533 template <typename Stat>
534 summary_stat& add_heap_stat( const Stat& stat )
536 nSysAllocCount += stat.allocCount();
537 nSysFreeCount += stat.freeCount();
539 nSysBytesAllocated += stat.allocatedBytes();
540 nSysBytesDeallocated+= stat.deallocatedBytes();
547 /// Michael's allocator
549 This class provides base functionality for Michael's allocator. It does not provide
550 the interface described by \p std::allocator, therefore, we name it as a heap, not as an allocator.
551 The heap interface is closer to semantics of \p malloc / \p free system functions.
552 The heap supports allocation of aligned and unaligned data.
554 The algorithm is based on simplified version of
555 \li [2004] M.Michael "Scalable Lock-free Dynamic Memory Allocation"
557 that, in turn, is concurrent version of well-known Hoard allocator developed by Emery Berger, see
558 \li [2002] Emery Berger "Memory Management for High-Performance Application", PhD thesis
560 This is powerful, scalable, fully customizable heap with fast-path without any locks
561 that has been developed specifically for multi-threading.
562 With opt:sys_topology you can set as many allocation arena ("processor heap") as you need.
563 You can manually bound any your thread to any arena ("processor"). With opt::sizeclass_selector option you can manage
564 allocation granularity. With opt::page_heap you can utilize any OS-provided features for page allocation
565 like \p mmap, \p VirtualAlloc etc. The heap can gather internal statistics that helps you to tune your application.
566 The opt::check_bounds feature can help you to find a memory buffer overflow.
568 Brief algorithm description from Michael's work:
570 Large blocks (greater than 64K) are allocated directly from the OS and freed directly to the OS. For smaller block sizes,
571 the heap is composed of large superblocks (64 KB or 1MB size). Each superblock is divided into multiple equal-sized blocks.
572 Superblocks are distributed among size classes based on their block sizes. Each size class contains multiple processor
573 heaps proportional to the number of processors in the system. A processor heap contains at most one active superblock.
574 An active superblock contains one or more blocks available for reservation that are guaranteed to be available to threads
575 that reach them through the header of the processor heap. Each superblock is associated with a descriptor. Each allocated
576 block contains a prefix (8 bytes) that points to the descriptor of its superblock. On the first call to malloc, the static
577 structures for the size classes and processor heaps (about 16 KB for a 16 processor machine) are allocated and initialized
578 in a lock-free manner.
580 Malloc starts by identifying the appropriate processor heap, based on the requested block size and the identity of
581 the calling thread. Typically, the heap already has an active superblock with blocks available for reservation. The thread
582 atomically reads a pointer to the descriptor of the active superblock and reserves a block. Next, the thread atomically
583 pops a block from that superblock and updates its descriptor. A typical free pushes the freed block into the list of
584 available blocks of its original superblock by atomically updating its descriptor.
586 <b>Constraint</b>: one superblock may contain up to 2048 block. This restriction imposes a restriction on the maximum
589 Available \p Options:
590 - \ref opt::sys_topology - class that describes system topology needed for allocator.
591 Default is \p cds::OS::topology (see cds::OS::Win32::topology for interface description)
592 - \ref opt::system_heap - option setter for an allocator for large blocks that is used for direct allocation from OS.
593 Default is \ref malloc_heap.
594 - \ref opt::aligned_heap - option setter for a heap used for internal aligned memory management.
595 Default is \ref aligned_malloc_heap
596 - \ref opt::page_heap - option setter for a heap used for page (superblock) allocation of 64K/1M size.
597 Default is \ref page_cached_allocator
598 - \ref opt::sizeclass_selector - option setter for a class used to select appropriate size-class
599 for incoming allocation request.
600 Default is \ref default_sizeclass_selector
601 - \ref opt::free_list - option setter for a class to manage a list of free superblock descriptors
602 Default is \ref free_list_locked
603 - \ref opt::partial_list - option setter for a class to manage a list of partial filled superblocks
604 Default is \ref partial_list_locked
605 - \ref opt::procheap_stat - option setter for a class to gather internal statistics for memory allocation
606 that is maintained by the heap.
607 Default is \ref procheap_empty_stat
608 - \ref opt::os_allocated_stat - option setter for a class to gather internal statistics for large block
609 allocation. Term "large block" is specified by the size-class selector (see \ref opt::sizeclass_selector)
610 and it is 64K for \ref default_sizeclass_selector. Any block that is large that 64K is allocated from
611 OS directly. \p os_allocated_stat option is set a class to gather statistics for large blocks.
612 Default is \ref os_allocated_empty
613 - \ref opt::check_bounds - a bound checker.
614 Default is no bound checker (cds::opt::none)
617 The heap is the basic building block for your allocator or <tt> operator new</tt> implementation.
620 #include <cds/memory/michael/allocator.h>
622 // Heap with explicitly defined options:
623 cds::memory::michael::Heap<
624 opt::aligned_heap< aligned_malloc_heap >,
625 opt::page_heap< page_cached_allocator<16, malloc_heap> >
628 // Heap with default options:
629 cds::memory::michael::Heap<> myDefHeap;
632 \par How to make std-like allocator
634 There are serious differencies of heap and <tt>std::allocator</tt> interface:
635 - Heap is stateful, and \p std::allocator is stateless.
636 - Heap has much more template parameters than \p std::allocator
637 - Heap has low-level interface for memory allocating only unlike the allocator
638 interface that can construct/destroy objects of any type T.
640 To convert heap interface into \p std::allocator -like interface you should:
641 - Declare object of class cds::memory::michael::Heap specifying the necessary
642 template parameters; this is usually static object
643 - Create a class with \p std::allocator interface that uses the function of heap.
645 #include <cds/memory/michael/allocator.h>
648 class MichaelAllocator
650 typedef std::allocator<T> std_allocator;
651 typedef cds::memory::michael::Heap<> michael_heap;
653 // Michael heap static object
654 static michael_heap s_Heap;
656 // Declare typedefs from std::allocator
657 typedef typename std_allocator::const_pointer const_pointer;
658 typedef typename std_allocator::pointer pointer;
659 typedef typename std_allocator::const_reference const_reference;
660 typedef typename std_allocator::reference reference;
661 typedef typename std_allocator::difference_type difference_type;
662 typedef typename std_allocator::size_type size_type;
663 typedef typename std_allocator::value_type value_type;
665 // Allocation function
666 pointer allocate( size_type _Count, const void* _Hint )
668 return reinterpret_cast<pointer>( s_Heap.alloc( sizeof(T) * _Count ));
671 // Deallocation function
672 void deallocate( pointer _Ptr, size_type _Count )
677 // Other std::allocator specific functions: address, construct, destroy, etc.
680 // Rebinding allocator to other type
681 template <class _Other>
683 typedef MichaelAllocator<_Other> other;
688 MichaelAllocator::michael_heap MichaelAllocator::s_Heap;
692 template <typename... Options>
697 static const unsigned int c_nAlignment = cds::c_nCacheLineSize;
698 static const unsigned int c_nDefaultBlockAlignment = 8;
700 struct default_options {
701 typedef cds::OS::topology sys_topology;
702 typedef malloc_heap system_heap;
703 typedef page_cached_allocator<> page_heap;
704 typedef aligned_malloc_heap aligned_heap;
705 typedef default_sizeclass_selector sizeclass_selector;
706 typedef free_list_locked<cds::sync::spin> free_list;
707 typedef partial_list_locked<cds::sync::spin> partial_list;
708 typedef procheap_empty_stat procheap_stat;
709 typedef os_allocated_empty os_allocated_stat;
710 typedef cds::opt::none check_bounds;
716 typedef typename opt::make_options<default_options, Options...>::type options;
720 typedef unsigned char byte;
723 typedef typename options::sys_topology sys_topology ; ///< effective system topology
724 typedef typename options::system_heap system_heap ; ///< effective system heap
725 typedef typename options::aligned_heap aligned_heap ; ///< effective aligned heap
726 typedef typename options::sizeclass_selector sizeclass_selector ; ///< effective sizeclass selector
727 typedef typename options::page_heap page_heap ; ///< effective page heap
728 typedef typename options::procheap_stat procheap_stat ; ///< effective processor heap statistics
729 typedef typename options::os_allocated_stat os_allocated_stat ; ///< effective OS-allocated memory statistics
730 typedef details::bound_checker_selector< typename options::check_bounds > bound_checker ; ///< effective bound checker
732 // forward declarations
734 struct superblock_desc;
735 struct processor_heap_base;
736 struct processor_desc;
739 /// Superblock states
741 A superblock can be in one of four states: \p ACTIVE, \p FULL,
742 \p PARTIAL, or \p EMPTY. A superblock is \p ACTIVE if it is the active
743 superblock in a heap, or if a thread intends to try to install it
744 as such. A superblock is \p FULL if all its blocks are either allocated
745 or reserved. A superblock is \p PARTIAL if it is not \p ACTIVE
746 and contains unreserved available blocks. A superblock is
747 \p EMPTY if all its blocks are free and it is not \p ACTIVE.
749 enum superblock_state {
750 SBSTATE_ACTIVE = 0, ///< superblock is active
751 SBSTATE_FULL = 1, ///< superblock is full
752 SBSTATE_PARTIAL = 2, ///< superblock is partially allocated
753 SBSTATE_EMPTY = 3 ///< superblock is empty and may be freed
756 static const size_t c_nMaxBlockInSuperBlock = 1024 * 2 ; ///< Max count of blocks in superblock (2 ** 11)
758 /// Anchor of the superblock descriptor. Updated by CAS
760 unsigned long long avail:11 ; ///< index of first available block in the superblock
761 unsigned long long count:11 ; ///< number of unreserved blocks in the superblock
762 unsigned long long state: 2 ; ///< state of the superblock (see \ref superblock_state enum)
763 unsigned long long tag:40 ; ///< ABA prevention tag
766 /// Superblock descriptor
767 struct superblock_desc
768 : public options::free_list::item_hook
769 , public options::partial_list::item_hook
771 atomics::atomic<anchor_tag> anchor ; ///< anchor, see \ref anchor_tag
772 byte * pSB ; ///< ptr to superblock
773 processor_heap_base * pProcHeap ; ///< pointer to owner processor heap
774 unsigned int nBlockSize ; ///< block size in bytes
775 unsigned int nCapacity ; ///< superblock size/block size
780 , pProcHeap( nullptr )
786 typedef typename options::free_list::template rebind<superblock_desc>::other free_list;
787 typedef typename options::partial_list::template rebind<superblock_desc>::other partial_list;
790 #if CDS_BUILD_BITS == 32
791 /// Allocated block header
793 Each allocated block has 8-byte header.
794 The header contains pointer to owner superblock descriptor and the redirection flag.
795 If the block has been allocated by \ref alloc, then the redirection flag is 0 and the block's structure is:
798 | blockHeader | [8 byte] pointer to owner superblock (flag=0)
800 | | <- memory allocated
805 If the block has been allocated by \ref alloc_aligned, then it is possible that pointer returned must be aligned.
806 In this case the redirection flag is 1 and the block's structure is:
809 +-> | blockHeader | [8 byte] pointer to owner superblock (flag=0)
815 +-- | blockHeader | [8 byte] pointer to block head (flag=1)
817 | | <- memory allocated
832 superblock_desc * pDesc ; // pointer to superblock descriptor
833 uint32_t nSize ; // block size (allocated form OS)
838 void set( superblock_desc * pdesc, uint32_t isAligned )
841 nFlags = isAligned ? bitAligned : 0;
844 superblock_desc * desc()
846 assert( (nFlags & bitOSAllocated) == 0 );
847 return (nFlags & bitAligned) ? reinterpret_cast<block_header *>( pDesc )->desc() : pDesc;
850 block_header * begin()
852 return (nFlags & bitAligned) ? reinterpret_cast<block_header *>(pDesc) : this;
855 bool isAligned() const
857 return (nFlags & bitAligned) != 0;
860 bool isOSAllocated() const
862 return (nFlags & bitOSAllocated) != 0;
865 void setOSAllocated( size_t sz )
868 nFlags = bitOSAllocated;
871 size_t getOSAllocSize() const
873 assert( isOSAllocated() );
879 #elif CDS_BUILD_BITS == 64
887 typedef cds::details::marked_ptr<superblock_desc, bitAligned|bitOSAllocated> marked_desc_ptr;
888 // If bitOSAllocated is set the pDesc contains size of memory block
890 marked_desc_ptr pDesc;
892 void set( superblock_desc * pdesc, uint32_t isAligned )
894 pDesc = marked_desc_ptr( pdesc, isAligned );
897 superblock_desc * desc()
899 assert( !isOSAllocated() );
900 return (pDesc.bits() & bitAligned) ? reinterpret_cast<block_header *>( pDesc.ptr() )->desc() : pDesc.ptr();
903 block_header * begin()
905 return (pDesc.bits() & bitAligned) ? reinterpret_cast<block_header *>( pDesc.ptr() ) : this;
908 bool isAligned() const
910 return (pDesc.bits() & bitAligned) != 0;
913 bool isOSAllocated() const
915 return (pDesc.bits() & bitOSAllocated) != 0;
918 void setOSAllocated( size_t nSize )
921 pDesc = marked_desc_ptr( reinterpret_cast<superblock_desc *>(nSize << 2), bitOSAllocated );
924 size_t getOSAllocSize() const
926 assert( isOSAllocated() );
927 return reinterpret_cast<uintptr_t>( pDesc.ptr() ) >> 2;
933 # error "Unexpected value of CDS_BUILD_BITS"
934 #endif // CDS_BUILD_BITS
937 struct free_block_header: block_header {
938 unsigned int nNextFree;
942 #if CDS_BUILD_BITS == 32
943 /// Processor heap's \p active field
945 The \p active field in the processor heap structure is primarily a pointer to the descriptor
946 of the active superblock owned by the processor heap. If the value of \p active is not \p nullptr, it is
947 guaranteed that the active superblock has at least one block available for reservation.
948 Since the addresses of superblock descriptors can be guaranteed to be aligned to some power
949 of 2 (e.g., 64), as an optimization, we can carve a credits subfield to hold the number
950 of blocks available for reservation in the active superblock less one. That is, if the value
951 of credits is n, then the active superblock contains n+1 blocks available for reservation
952 through the \p active field. Note that the number of blocks in a superblock is not limited
953 to the maximum reservations that can be held in the credits subfield. In a typical malloc operation
954 (i.e., when \p active != \p nullptr and \p credits > 0), the thread reads \p active and then
955 atomically decrements credits while validating that the active superblock is still valid.
959 superblock_desc * pDesc;
963 static const unsigned int c_nMaxCredits = 0 - 1;
966 CDS_CONSTEXPR active_tag() CDS_NOEXCEPT
971 active_tag( active_tag const& ) CDS_NOEXCEPT = default;
972 ~active_tag() CDS_NOEXCEPT = default;
973 active_tag& operator=(active_tag const& ) CDS_NOEXCEPT = default;
974 # if !defined(CDS_DISABLE_DEFAULT_MOVE_CTOR)
975 active_tag( active_tag&& ) CDS_NOEXCEPT = default;
976 active_tag& operator=(active_tag&&) CDS_NOEXCEPT = default;
979 /// Returns pointer to superblock descriptor
980 superblock_desc * ptr() const
985 /// Sets superblock descriptor
986 void ptr( superblock_desc * p )
991 unsigned int credits() const
996 void credits( unsigned int n )
1007 void set( superblock_desc * pSB, unsigned int n )
1014 #elif CDS_BUILD_BITS == 64
1019 static const unsigned int c_nMaxCredits = c_nAlignment - 1 ; // 0x003F;
1021 typedef cds::details::marked_ptr<superblock_desc, c_nMaxCredits> marked_desc_ptr;
1022 marked_desc_ptr pDesc;
1025 active_tag() CDS_NOEXCEPT
1028 // Clang 3.1: error: first argument to atomic operation must be a pointer to a trivially-copyable type
1029 //active_tag() CDS_NOEXCEPT = default;
1030 active_tag( active_tag const& ) CDS_NOEXCEPT = default;
1031 ~active_tag() CDS_NOEXCEPT = default;
1032 active_tag& operator=(active_tag const&) CDS_NOEXCEPT = default;
1033 # if !defined(CDS_DISABLE_DEFAULT_MOVE_CTOR)
1034 active_tag( active_tag&& ) CDS_NOEXCEPT = default;
1035 active_tag& operator=(active_tag&&) CDS_NOEXCEPT = default;
1037 superblock_desc * ptr() const
1042 void ptr( superblock_desc * p )
1044 assert( (reinterpret_cast<uintptr_t>(p) & c_nMaxCredits) == 0 );
1045 pDesc = marked_desc_ptr( p, pDesc.bits());
1048 unsigned int credits()
1050 return (unsigned int) pDesc.bits();
1053 void credits( unsigned int n )
1055 assert( n <= c_nMaxCredits );
1056 pDesc = marked_desc_ptr( pDesc.ptr(), n );
1061 pDesc = marked_desc_ptr();
1064 void set( superblock_desc * pSB, unsigned int n )
1066 assert( (reinterpret_cast<uintptr_t>(pSB) & c_nMaxCredits) == 0 );
1067 pDesc = marked_desc_ptr( pSB, n );
1073 # error "Unexpected value of CDS_BUILD_BITS"
1074 #endif // CDS_BUILD_BITS
1078 struct processor_heap_base
1080 CDS_DATA_ALIGNMENT(8) atomics::atomic<active_tag> active; ///< pointer to the descriptor of active superblock owned by processor heap
1081 processor_desc * pProcDesc ; ///< pointer to parent processor descriptor
1082 const size_class * pSizeClass ; ///< pointer to size class
1083 atomics::atomic<superblock_desc *> pPartial ; ///< pointer to partial filled superblock (may be \p nullptr)
1084 partial_list partialList ; ///< list of partial filled superblocks owned by the processor heap
1085 unsigned int nPageIdx ; ///< page size-class index, \ref c_nPageSelfAllocation - "small page"
1087 /// Small page marker
1089 If page is small and can be allocated by the Heap, the \p nPageIdx value is \p c_nPageSelfAllocation.
1090 This optimization allows to allocate system memory more regularly, in blocks of 1M that leads
1091 to less memory fragmentation.
1093 static const unsigned int c_nPageSelfAllocation = (unsigned int) -1;
1095 procheap_stat stat ; ///< heap statistics
1096 //processor_heap_statistics stat;
1099 processor_heap_base() CDS_NOEXCEPT
1100 : pProcDesc( nullptr )
1101 , pSizeClass( nullptr )
1102 , pPartial( nullptr )
1104 assert( (reinterpret_cast<uintptr_t>(this) & (c_nAlignment - 1)) == 0 );
1108 /// Get partial superblock owned by the processor heap
1109 superblock_desc * get_partial()
1111 superblock_desc * pDesc = pPartial.load(atomics::memory_order_acquire);
1114 pDesc = partialList.pop();
1117 } while ( !pPartial.compare_exchange_weak( pDesc, nullptr, atomics::memory_order_release, atomics::memory_order_relaxed ) );
1119 //assert( pDesc == nullptr || free_desc_list<superblock_desc>::node_algorithms::inited( static_cast<sb_free_list_hook *>(pDesc) ));
1120 //assert( pDesc == nullptr || partial_desc_list<superblock_desc>::node_algorithms::inited( static_cast<sb_partial_list_hook *>(pDesc) ) );
1124 /// Add partial superblock \p pDesc to the list
1125 void add_partial( superblock_desc * pDesc )
1127 assert( pPartial != pDesc );
1128 //assert( partial_desc_list<superblock_desc>::node_algorithms::inited( static_cast<sb_partial_list_hook *>(pDesc) ) );
1130 superblock_desc * pCur = nullptr;
1131 if ( !pPartial.compare_exchange_strong(pCur, pDesc, atomics::memory_order_acq_rel, atomics::memory_order_relaxed) )
1132 partialList.push( pDesc );
1136 /// Remove superblock \p pDesc from the list of partial superblock
1137 bool unlink_partial( superblock_desc * pDesc )
1139 return partialList.unlink( pDesc );
1143 /// Aligned superblock descriptor
1144 typedef typename cds::details::type_padding<processor_heap_base, c_nAlignment>::type processor_heap;
1146 /// Processor descriptor
1147 struct processor_desc
1149 processor_heap * arrProcHeap ; ///< array of processor heap
1150 free_list listSBDescFree ; ///< List of free superblock descriptors
1151 page_heap * pageHeaps ; ///< array of page heap (one for each page size)
1155 : arrProcHeap( nullptr )
1156 , pageHeaps( nullptr )
1163 sys_topology m_Topology ; ///< System topology
1164 system_heap m_LargeHeap ; ///< Heap for large block
1165 aligned_heap m_AlignedHeap ; ///< Internal aligned heap
1166 sizeclass_selector m_SizeClassSelector ; ///< Size-class selector
1167 atomics::atomic<processor_desc *> * m_arrProcDesc ; ///< array of pointers to the processor descriptors
1168 unsigned int m_nProcessorCount ; ///< Processor count
1169 bound_checker m_BoundChecker ; ///< Bound checker
1171 os_allocated_stat m_OSAllocStat ; ///< OS-allocated memory statistics
1176 /// Allocates large block from system memory
1177 block_header * alloc_from_OS( size_t nSize )
1179 block_header * p = reinterpret_cast<block_header *>( m_LargeHeap.alloc( nSize ) );
1180 m_OSAllocStat.incBytesAllocated( nSize );
1181 p->setOSAllocated( nSize );
1185 /// Allocates from the active superblock if it possible
1186 block_header * alloc_from_active( processor_heap * pProcHeap )
1188 active_tag oldActive;
1189 int nCollision = -1;
1194 oldActive = pProcHeap->active.load(atomics::memory_order_acquire);
1195 if ( !oldActive.ptr() )
1197 unsigned int nCredits = oldActive.credits();
1198 active_tag newActive ; // default = 0
1199 if ( nCredits != 0 ) {
1200 newActive = oldActive;
1201 newActive.credits( nCredits - 1 );
1203 if ( pProcHeap->active.compare_exchange_strong( oldActive, newActive, atomics::memory_order_release, atomics::memory_order_relaxed ))
1208 pProcHeap->stat.incActiveDescCASFailureCount( nCollision );
1211 superblock_desc * pDesc = oldActive.ptr();
1213 anchor_tag oldAnchor;
1214 anchor_tag newAnchor;
1216 unsigned int nMoreCredits = 0;
1221 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1223 assert( oldAnchor.avail < pDesc->nCapacity );
1224 pAddr = pDesc->pSB + oldAnchor.avail * (unsigned long long) pDesc->nBlockSize;
1225 newAnchor.avail = reinterpret_cast<free_block_header *>( pAddr )->nNextFree;
1228 if ( oldActive.credits() == 0 ) {
1229 // state must be ACTIVE
1230 if ( oldAnchor.count == 0 )
1231 newAnchor.state = SBSTATE_FULL;
1233 nMoreCredits = oldAnchor.count < active_tag::c_nMaxCredits ? ((unsigned int) oldAnchor.count) : active_tag::c_nMaxCredits;
1234 newAnchor.count -= nMoreCredits;
1237 } while ( !pDesc->anchor.compare_exchange_strong( oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed ));
1240 pProcHeap->stat.incActiveAnchorCASFailureCount( nCollision );
1242 assert( newAnchor.state != SBSTATE_EMPTY );
1244 if ( newAnchor.state == SBSTATE_FULL )
1245 pProcHeap->stat.incDescFull();
1246 if ( oldActive.credits() == 0 && oldAnchor.count > 0 )
1247 update_active( pProcHeap, pDesc, nMoreCredits );
1249 pProcHeap->stat.incAllocFromActive();
1251 // block_header fields is not needed to setup
1252 // It was set in alloc_from_new_superblock
1253 assert( reinterpret_cast<block_header *>( pAddr )->desc() == pDesc );
1254 assert( !reinterpret_cast<block_header *>( pAddr )->isOSAllocated() );
1255 assert( !reinterpret_cast<block_header *>( pAddr )->isAligned() );
1257 return reinterpret_cast<block_header *>( pAddr );
1260 /// Allocates from a partial filled superblock if it possible
1261 block_header * alloc_from_partial( processor_heap * pProcHeap )
1264 superblock_desc * pDesc = pProcHeap->get_partial();
1269 anchor_tag oldAnchor;
1270 anchor_tag newAnchor;
1272 unsigned int nMoreCredits = 0;
1274 int nCollision = -1;
1278 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1279 if ( oldAnchor.state == SBSTATE_EMPTY ) {
1280 free_superblock( pDesc );
1284 nMoreCredits = ((unsigned int)(oldAnchor.count - 1)) < active_tag::c_nMaxCredits ? (unsigned int)(oldAnchor.count - 1) : active_tag::c_nMaxCredits;
1285 newAnchor.count -= nMoreCredits + 1;
1286 newAnchor.state = (nMoreCredits > 0) ? SBSTATE_ACTIVE : SBSTATE_FULL;
1288 } while ( !pDesc->anchor.compare_exchange_strong(oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed) );
1291 pProcHeap->stat.incPartialDescCASFailureCount( nCollision );
1293 if ( newAnchor.state == SBSTATE_FULL )
1294 pProcHeap->stat.incDescFull();
1296 // Now, the thread is guaranteed to have reserved one or more blocks
1297 // pop reserved block
1303 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1305 assert( oldAnchor.avail < pDesc->nCapacity );
1306 pAddr = pDesc->pSB + oldAnchor.avail * pDesc->nBlockSize;
1307 newAnchor.avail = reinterpret_cast<free_block_header *>( pAddr )->nNextFree;
1309 } while ( !pDesc->anchor.compare_exchange_strong(oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed) );
1312 pProcHeap->stat.incPartialAnchorCASFailureCount( nCollision );
1314 assert( newAnchor.state != SBSTATE_EMPTY );
1316 pProcHeap->stat.incAllocFromPartial();
1318 if ( nMoreCredits > 0 )
1319 update_active( pProcHeap, pDesc, nMoreCredits );
1321 // block_header fields is not needed to setup
1322 // It was set in alloc_from_new_superblock
1323 assert( reinterpret_cast<block_header *>( pAddr )->desc() == pDesc );
1324 assert( !reinterpret_cast<block_header *>( pAddr )->isAligned() );
1325 assert( !reinterpret_cast<block_header *>( pAddr )->isOSAllocated() );
1327 return reinterpret_cast<block_header *>( pAddr );
1330 /// Allocates from the new superblock
1331 block_header * alloc_from_new_superblock( processor_heap * pProcHeap )
1333 superblock_desc * pDesc = new_superblock_desc( pProcHeap );
1334 assert( pDesc != nullptr );
1335 pDesc->pSB = new_superblock_buffer( pProcHeap );
1337 anchor_tag anchor = pDesc->anchor.load(atomics::memory_order_relaxed);
1340 // Make single-linked list of free blocks in superblock
1341 byte * pEnd = pDesc->pSB + pDesc->nCapacity * pDesc->nBlockSize;
1342 unsigned int nNext = 0;
1343 const unsigned int nBlockSize = pDesc->nBlockSize;
1344 for ( byte * p = pDesc->pSB; p < pEnd; p += nBlockSize ) {
1345 reinterpret_cast<block_header *>( p )->set( pDesc, 0 );
1346 reinterpret_cast<free_block_header *>( p )->nNextFree = ++nNext;
1348 reinterpret_cast<free_block_header *>( pEnd - nBlockSize )->nNextFree = 0;
1350 active_tag newActive;
1351 newActive.set( pDesc, ( (pDesc->nCapacity - 1 < active_tag::c_nMaxCredits) ? pDesc->nCapacity - 1 : active_tag::c_nMaxCredits ) - 1 );
1353 anchor.count = pDesc->nCapacity - 1 - (newActive.credits() + 1);
1354 anchor.state = SBSTATE_ACTIVE;
1355 pDesc->anchor.store(anchor, atomics::memory_order_relaxed);
1357 active_tag curActive;
1358 if ( pProcHeap->active.compare_exchange_strong( curActive, newActive, atomics::memory_order_release, atomics::memory_order_relaxed )) {
1359 pProcHeap->stat.incAllocFromNew();
1360 //reinterpret_cast<block_header *>( pDesc->pSB )->set( pDesc, 0 );
1361 return reinterpret_cast<block_header *>( pDesc->pSB );
1364 free_superblock( pDesc );
1368 /// Find appropriate processor heap based on size-class selected
1369 processor_heap * find_heap( typename sizeclass_selector::sizeclass_index nSizeClassIndex )
1371 assert( nSizeClassIndex < m_SizeClassSelector.size() );
1373 unsigned int nProcessorId = m_Topology.current_processor();
1374 assert( nProcessorId < m_nProcessorCount );
1376 if ( nProcessorId >= m_nProcessorCount )
1379 processor_desc * pDesc = m_arrProcDesc[ nProcessorId ].load( atomics::memory_order_relaxed );
1382 processor_desc * pNewDesc = new_processor_desc( nProcessorId );
1383 if ( m_arrProcDesc[nProcessorId].compare_exchange_strong( pDesc, pNewDesc, atomics::memory_order_release, atomics::memory_order_relaxed ) ) {
1387 free_processor_desc( pNewDesc );
1390 return &( pDesc->arrProcHeap[ nSizeClassIndex ] );
1393 /// Updates active field of processor heap \p pProcHeap
1394 void update_active( processor_heap * pProcHeap, superblock_desc * pDesc, unsigned int nCredits )
1396 assert( pProcHeap == pDesc->pProcHeap );
1398 active_tag nullActive;
1399 active_tag newActive;
1400 newActive.set( pDesc, nCredits - 1 );
1402 if ( pProcHeap->active.compare_exchange_strong( nullActive, newActive, atomics::memory_order_seq_cst, atomics::memory_order_relaxed ) )
1405 // Someone installed another active superblock.
1406 // Return credits to superblock and make it partial
1408 anchor_tag oldAnchor;
1409 anchor_tag newAnchor;
1412 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1413 newAnchor.count += nCredits;
1414 newAnchor.state = SBSTATE_PARTIAL;
1415 } while ( !pDesc->anchor.compare_exchange_weak( oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed ));
1417 pDesc->pProcHeap->add_partial( pDesc );
1420 /// Allocates new processor descriptor
1421 processor_desc * new_processor_desc( unsigned int nProcessorId )
1423 CDS_UNUSED( nProcessorId );
1424 processor_desc * pDesc;
1425 const size_t nPageHeapCount = m_SizeClassSelector.pageTypeCount();
1428 Processor descriptor layout
1430 proc_desc - 64-byte alignment
1431 page_heap[0] 64-byte alignment
1432 page_heap[1] 64-byte alignment
1434 page_heap[P] 64-byte alignment
1436 proc_heap[0] 64-byte alignment
1437 proc_heap[1] 64-byte alignment
1439 proc_heap[N] 64-byte alignment
1442 const size_t szDesc =
1443 ( sizeof(processor_desc)
1444 + sizeof(pDesc->pageHeaps[0]) * nPageHeapCount
1449 const size_t szTotal = szDesc * c_nAlignment + sizeof(processor_heap) * m_SizeClassSelector.size();
1451 static_assert( (sizeof(processor_heap) % c_nAlignment) == 0, "sizeof(processor_heap) error" );
1453 pDesc = new( m_AlignedHeap.alloc( szTotal, c_nAlignment ) ) processor_desc;
1455 pDesc->pageHeaps = reinterpret_cast<page_heap *>( pDesc + 1 );
1456 for ( size_t i = 0; i < nPageHeapCount; ++i )
1457 new (pDesc->pageHeaps + i) page_heap( m_SizeClassSelector.page_size(i));
1459 // initialize processor heaps
1460 pDesc->arrProcHeap =
1461 reinterpret_cast<processor_heap *>(
1462 reinterpret_cast<uintptr_t>(reinterpret_cast<byte *>(pDesc + 1) + sizeof(pDesc->pageHeaps[0]) * nPageHeapCount + c_nAlignment - 1)
1463 & ~(uintptr_t(c_nAlignment) - 1)
1466 processor_heap * pProcHeap = pDesc->arrProcHeap;
1467 processor_heap * pProcHeapEnd = pDesc->arrProcHeap + m_SizeClassSelector.size();
1468 for ( unsigned int i = 0; pProcHeap != pProcHeapEnd; ++pProcHeap, ++i ) {
1469 new (pProcHeap) processor_heap();
1470 pProcHeap->pProcDesc = pDesc;
1471 pProcHeap->pSizeClass = m_SizeClassSelector.at(i);
1472 if ( m_SizeClassSelector.find( pProcHeap->pSizeClass->nSBSize ) != sizeclass_selector::c_nNoSizeClass )
1473 pProcHeap->nPageIdx = processor_heap::c_nPageSelfAllocation;
1475 pProcHeap->nPageIdx = pProcHeap->pSizeClass->nSBSizeIdx;
1481 void free_processor_heap( processor_heap * pProcHeap )
1483 if ( pProcHeap->nPageIdx == processor_heap::c_nPageSelfAllocation ) {
1484 superblock_desc * pDesc;
1486 for ( pDesc = pProcHeap->partialList.pop(); pDesc; pDesc = pProcHeap->partialList.pop()) {
1488 m_AlignedHeap.free( pDesc );
1491 superblock_desc * pPartial = pProcHeap->pPartial.load(atomics::memory_order_relaxed);
1493 free( pPartial->pSB );
1494 m_AlignedHeap.free( pPartial );
1497 pDesc = pProcHeap->active.load(atomics::memory_order_relaxed).ptr();
1500 m_AlignedHeap.free( pDesc );
1504 page_heap& pageHeap = pProcHeap->pProcDesc->pageHeaps[pProcHeap->nPageIdx];
1505 superblock_desc * pDesc;
1507 for ( pDesc = pProcHeap->partialList.pop(); pDesc; pDesc = pProcHeap->partialList.pop()) {
1508 pageHeap.free( pDesc->pSB );
1509 m_AlignedHeap.free( pDesc );
1512 superblock_desc * pPartial = pProcHeap->pPartial.load(atomics::memory_order_relaxed);
1514 pageHeap.free( pPartial->pSB );
1515 m_AlignedHeap.free( pPartial );
1518 pDesc = pProcHeap->active.load(atomics::memory_order_relaxed).ptr();
1520 pageHeap.free( pDesc->pSB );
1521 m_AlignedHeap.free( pDesc );
1524 pProcHeap->~processor_heap();
1527 /// Frees processor descriptor
1528 void free_processor_desc( processor_desc * pDesc )
1530 const size_t nPageHeapCount = m_SizeClassSelector.pageTypeCount();
1532 for (unsigned int j = 0; j < m_SizeClassSelector.size(); ++j )
1533 free_processor_heap( pDesc->arrProcHeap + j );
1535 for ( superblock_desc * pSBDesc = pDesc->listSBDescFree.pop(); pSBDesc; pSBDesc = pDesc->listSBDescFree.pop())
1536 m_AlignedHeap.free( pSBDesc );
1538 for (size_t i = 0; i < nPageHeapCount; ++i )
1539 (pDesc->pageHeaps + i)->page_heap::~page_heap();
1541 //m_IntHeap.free( pDesc->pageHeaps );
1542 pDesc->pageHeaps = nullptr;
1544 pDesc->processor_desc::~processor_desc();
1545 m_AlignedHeap.free( pDesc );
1548 /// Allocates new superblock descriptor
1549 superblock_desc * new_superblock_desc( processor_heap * pProcHeap )
1552 superblock_desc * pDesc = pProcHeap->pProcDesc->listSBDescFree.pop();
1553 if ( pDesc == nullptr ) {
1554 pDesc = new( m_AlignedHeap.alloc(sizeof(superblock_desc), c_nAlignment ) ) superblock_desc;
1555 assert( (uintptr_t(pDesc) & (c_nAlignment - 1)) == 0 );
1557 anchor = pDesc->anchor.load( atomics::memory_order_relaxed );
1559 pDesc->anchor.store( anchor, atomics::memory_order_relaxed );
1561 pProcHeap->stat.incDescAllocCount();
1563 pDesc->nBlockSize = pProcHeap->pSizeClass->nBlockSize;
1564 pDesc->nCapacity = pProcHeap->pSizeClass->nCapacity;
1565 assert( pDesc->nCapacity <= c_nMaxBlockInSuperBlock );
1566 pDesc->pProcHeap = pProcHeap;
1568 anchor = pDesc->anchor.load( atomics::memory_order_relaxed );
1570 pDesc->anchor.store( anchor, atomics::memory_order_relaxed );
1575 /// Allocates superblock page
1576 byte * new_superblock_buffer( processor_heap * pProcHeap )
1578 pProcHeap->stat.incBlockAllocated();
1579 if ( pProcHeap->nPageIdx == processor_heap::c_nPageSelfAllocation ) {
1580 return (byte *) alloc( pProcHeap->pSizeClass->nSBSize );
1583 return (byte *) pProcHeap->pProcDesc->pageHeaps[pProcHeap->nPageIdx].alloc();
1587 /// Frees superblock descriptor and its page
1588 void free_superblock( superblock_desc * pDesc )
1590 pDesc->pProcHeap->stat.incBlockDeallocated();
1591 processor_desc * pProcDesc = pDesc->pProcHeap->pProcDesc;
1593 if ( pDesc->pProcHeap->nPageIdx == processor_heap::c_nPageSelfAllocation ) {
1597 pProcDesc->pageHeaps[pDesc->pProcHeap->nPageIdx].free( pDesc->pSB );
1600 pProcDesc->listSBDescFree.push( pDesc );
1603 /// Allocate memory block
1604 block_header * int_alloc(
1605 size_t nSize ///< Size of memory block to allocate in bytes
1608 typename sizeclass_selector::sizeclass_index nSizeClassIndex = m_SizeClassSelector.find( nSize );
1609 if ( nSizeClassIndex == sizeclass_selector::c_nNoSizeClass ) {
1610 return alloc_from_OS( nSize );
1612 assert( nSizeClassIndex < m_SizeClassSelector.size() );
1614 block_header * pBlock;
1615 processor_heap * pProcHeap;
1617 pProcHeap = find_heap( nSizeClassIndex );
1619 return alloc_from_OS( nSize );
1621 if ( (pBlock = alloc_from_active( pProcHeap )) != nullptr )
1623 if ( (pBlock = alloc_from_partial( pProcHeap )) != nullptr )
1625 if ( (pBlock = alloc_from_new_superblock( pProcHeap )) != nullptr )
1629 pProcHeap->stat.incAllocatedBytes( pProcHeap->pSizeClass->nBlockSize );
1631 assert( pBlock != nullptr );
1637 /// Heap constructor
1640 // Explicit libcds initialization is needed since a static object may be constructed
1643 m_nProcessorCount = m_Topology.processor_count();
1644 m_arrProcDesc = new( m_AlignedHeap.alloc(sizeof(processor_desc *) * m_nProcessorCount, c_nAlignment ))
1645 atomics::atomic<processor_desc *>[ m_nProcessorCount ];
1646 memset( m_arrProcDesc, 0, sizeof(processor_desc *) * m_nProcessorCount ) ; // ?? memset for atomic<>
1651 The destructor frees all memory allocated by the heap.
1655 for ( unsigned int i = 0; i < m_nProcessorCount; ++i ) {
1656 processor_desc * pDesc = m_arrProcDesc[i].load(atomics::memory_order_relaxed);
1658 free_processor_desc( pDesc );
1661 m_AlignedHeap.free( m_arrProcDesc );
1663 // Explicit termination of libcds
1667 /// Allocate memory block
1669 size_t nSize ///< Size of memory block to allocate in bytes
1672 block_header * pBlock = int_alloc( nSize + sizeof(block_header) + bound_checker::trailer_size );
1674 // Bound checking is only for our blocks
1675 if ( !pBlock->isOSAllocated() ) {
1676 // the block is allocated from our heap - bound checker is applicable
1677 m_BoundChecker.make_trailer(
1678 reinterpret_cast<byte *>(pBlock + 1),
1679 reinterpret_cast<byte *>(pBlock) + pBlock->desc()->nBlockSize,
1687 /// Free previously allocated memory block
1689 void * pMemory ///< Pointer to memory block to free
1695 block_header * pRedirect = (reinterpret_cast<block_header *>( pMemory ) - 1);
1696 block_header * pBlock = pRedirect->begin();
1698 if ( pBlock->isOSAllocated() ) {
1699 // Block has been allocated from OS
1700 m_OSAllocStat.incBytesDeallocated( pBlock->getOSAllocSize() );
1701 m_LargeHeap.free( pBlock );
1705 assert( !pBlock->isAligned() );
1706 superblock_desc * pDesc = pBlock->desc();
1708 m_BoundChecker.check_bounds(
1710 reinterpret_cast<byte *>( pBlock ) + pDesc->nBlockSize,
1715 anchor_tag oldAnchor;
1716 anchor_tag newAnchor;
1717 processor_heap_base * pProcHeap = pDesc->pProcHeap;
1719 pProcHeap->stat.incDeallocatedBytes( pDesc->nBlockSize );
1721 oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1723 newAnchor = oldAnchor;
1724 reinterpret_cast<free_block_header *>( pBlock )->nNextFree = oldAnchor.avail;
1725 newAnchor.avail = (reinterpret_cast<byte *>( pBlock ) - pDesc->pSB) / pDesc->nBlockSize;
1728 assert( oldAnchor.state != SBSTATE_EMPTY );
1730 if ( oldAnchor.state == SBSTATE_FULL )
1731 newAnchor.state = SBSTATE_PARTIAL;
1733 if ( oldAnchor.count == pDesc->nCapacity - 1 ) {
1734 //pProcHeap = pDesc->pProcHeap;
1735 //CDS_COMPILER_RW_BARRIER ; // instruction fence is needed?..
1736 newAnchor.state = SBSTATE_EMPTY;
1739 newAnchor.count += 1;
1740 } while ( !pDesc->anchor.compare_exchange_strong( oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed ) );
1742 pProcHeap->stat.incFreeCount();
1744 if ( newAnchor.state == SBSTATE_EMPTY ) {
1745 if ( pProcHeap->unlink_partial( pDesc ))
1746 free_superblock( pDesc );
1748 else if (oldAnchor.state == SBSTATE_FULL ) {
1749 assert( pProcHeap != nullptr );
1750 pProcHeap->stat.decDescFull();
1751 pProcHeap->add_partial( pDesc );
1755 /// Reallocate memory block
1757 If \p nNewSize is zero, then the block pointed to by \p pMemory is freed;
1758 the return value is \p nullptr, and \p pMemory is left pointing at a freed block.
1760 If there is not enough available memory to expand the block to the given size,
1761 the original block is left unchanged, and \p nullptr is returned.
1763 Aligned memory block cannot be realloc'ed: if \p pMemory has been allocated by \ref alloc_aligned,
1764 then the return value is \p nullptr and the original block is left unchanged.
1767 void * pMemory, ///< Pointer to previously allocated memory block
1768 size_t nNewSize ///< New size of memory block, in bytes
1771 if ( nNewSize == 0 ) {
1776 const size_t nOrigSize = nNewSize;
1777 nNewSize += sizeof(block_header) + bound_checker::trailer_size;
1779 block_header * pBlock = reinterpret_cast<block_header *>( pMemory ) - 1;
1781 // Reallocation of aligned block is not possible
1782 if ( pBlock->isAligned() ) {
1787 if ( pBlock->isOSAllocated() ) {
1788 // The block has been allocated from OS
1789 size_t nCurSize = pBlock->getOSAllocSize();
1791 if ( nCurSize >= nNewSize )
1795 void * pNewBuf = alloc( nOrigSize );
1797 memcpy( pNewBuf, pMemory, nCurSize - sizeof(block_header) );
1803 superblock_desc * pDesc = pBlock->desc();
1804 if ( pDesc->nBlockSize <= nNewSize ) {
1805 // In-place reallocation
1806 m_BoundChecker.make_trailer(
1807 reinterpret_cast<byte *>(pBlock + 1),
1808 reinterpret_cast<byte *>(pBlock) + pBlock->desc()->nBlockSize,
1815 void * pNew = alloc( nNewSize );
1817 memcpy( pNew, pMemory, pDesc->nBlockSize - sizeof(block_header) );
1825 /// Allocate aligned memory block
1826 void * alloc_aligned(
1827 size_t nSize, ///< Size of memory block to allocate in bytes
1828 size_t nAlignment ///< Alignment
1831 if ( nAlignment <= c_nDefaultBlockAlignment ) {
1832 void * p = alloc( nSize );
1833 assert( (reinterpret_cast<uintptr_t>(p) & (nAlignment - 1)) == 0 );
1837 block_header * pBlock = int_alloc( nSize + nAlignment + sizeof(block_header) + bound_checker::trailer_size );
1839 block_header * pRedirect;
1840 if ( (reinterpret_cast<uintptr_t>( pBlock + 1) & (nAlignment - 1)) != 0 ) {
1841 pRedirect = reinterpret_cast<block_header *>( (reinterpret_cast<uintptr_t>( pBlock ) & ~(nAlignment - 1)) + nAlignment ) - 1;
1842 assert( pRedirect != pBlock );
1843 pRedirect->set( reinterpret_cast<superblock_desc *>(pBlock), 1 );
1845 assert( (reinterpret_cast<uintptr_t>(pRedirect + 1) & (nAlignment - 1)) == 0 );
1851 // Bound checking is only for our blocks
1852 if ( !pBlock->isOSAllocated() ) {
1853 // the block is allocated from our heap - bound checker is applicable
1854 m_BoundChecker.make_trailer(
1855 reinterpret_cast<byte *>(pRedirect + 1),
1856 reinterpret_cast<byte *>(pBlock) + pBlock->desc()->nBlockSize,
1861 return pRedirect + 1;
1864 /// Free aligned memory block previously allocated by \ref alloc_aligned
1866 void * pMemory ///< Pointer to memory block to free
1874 /// Get instant summary statistics
1875 void summaryStat( summary_stat& st )
1877 size_t nProcHeapCount = m_SizeClassSelector.size();
1878 for ( unsigned int nProcessor = 0; nProcessor < m_nProcessorCount; ++nProcessor ) {
1879 processor_desc * pProcDesc = m_arrProcDesc[nProcessor].load(atomics::memory_order_relaxed);
1881 for ( unsigned int i = 0; i < nProcHeapCount; ++i ) {
1882 processor_heap_base * pProcHeap = pProcDesc->arrProcHeap + i;
1884 st.add_procheap_stat( pProcHeap->stat );
1890 st.add_heap_stat( m_OSAllocStat );
1894 }}} // namespace cds::memory::michael
1896 #endif // __CDS_MEMORY_MICHAEL_ALLOCATOR_TMPL_H