3 #ifndef __CDS_MEMORY_MICHAEL_ALLOCATOR_TMPL_H
4 #define __CDS_MEMORY_MICHAEL_ALLOCATOR_TMPL_H
7 Michael allocator implementation
9 [2004] Maged Michael "Scalable Lock-Free Dynamic Memory Allocation"
12 2011.09.07 khizmax Optimization: small page (about 64K) is allocated by Heap::alloc call.
13 This optimization allows to allocate system memory more regularly,
14 in blocks of 1M that leads to less memory fragmentation.
15 2011.01.02 khizmax Created
19 #include <cds/memory/michael/options.h>
20 #include <cds/memory/michael/bound_check.h>
21 #include <cds/memory/michael/procheap_stat.h>
22 #include <cds/memory/michael/osalloc_stat.h>
24 #include <cds/os/topology.h>
25 #include <cds/os/alloc_aligned.h>
26 #include <cds/lock/spinlock.h>
27 #include <cds/details/type_padding.h>
28 #include <cds/details/marked_ptr.h>
29 #include <cds/container/vyukov_mpmc_cycle_queue.h>
30 #include <cds/user_setup/cache_line.h>
31 #include <cds/details/lib.h>
34 #include <boost/intrusive/list.hpp>
37 /// Memory-related algorithms: allocators etc.
39 /// Michael's allocator (class Heap)
42 \li [2004] M.Michael "Scalable Lock-free Dynamic Memory Allocation"
44 This namespace declares the main class Heap and a lot of helper classes.
50 unsigned int nBlockSize ; ///< block size in bytes
51 unsigned int nSBSize ; ///< superblock size (64K or 1M)
52 unsigned int nCapacity ; ///< superblock capacity (nSBSize / nBlockSize)
53 unsigned int nSBSizeIdx ; ///< internal superblock size index (page index)
56 /// %Heap based on system \p malloc and \p free functions
59 /// Allocates memory block of \p nSize bytes (\p malloc wrapper)
60 static void * alloc( size_t nSize )
62 return ::malloc( nSize );
64 /// Returning memory block to the system (\p free wrapper)
65 static void free( void * p )
71 /// %Heap based on system provided aligned \p malloc and \p free functions
72 struct aligned_malloc_heap
74 /// Allocates aligned memory block of \p nSize bytes with alignment \p nAlignment
75 static void * alloc( size_t nSize, size_t nAlignment )
77 return cds::OS::aligned_malloc( nSize, nAlignment );
79 /// Frees aligned memory block \p p that has been previosly allocated by \ref alloc call
80 static void free( void * p )
82 cds::OS::aligned_free( p );
86 /// Page heap based on \p Heap
88 Page heap can allocate memory by page-sized block only.
89 \p Heap may be any heap that provides interface like \ref malloc_heap.
91 This class is one of available implementation of opt::page_heap option.
93 template <class Heap = malloc_heap>
94 class page_allocator: public Heap
97 typedef Heap base_class;
104 size_t nPageSize ///< page size in bytes
106 : m_nPageSize( nPageSize )
109 /// Allocate new page
112 return base_class::alloc( m_nPageSize );
115 /// Free page \p pPage
116 void free( void * pPage )
118 base_class::free( pPage );
122 /// Page cacheable heap
124 To improve performance this allocator maintains small list of free pages.
125 Page heap can allocate memory by page-sized block only.
128 \li \p FreeListCapacity - capacity of free-list, default value is 64 page
129 \li \p Heap may be any heap that provides interface like \ref malloc_heap.
131 This class is one of available implementation of opt::page_heap option.
133 template <size_t FreeListCapacity = 64, class Heap = malloc_heap>
134 class page_cached_allocator: public page_allocator<Heap>
137 typedef page_allocator<Heap> base_class;
140 struct make_null_ptr {
141 void operator ()(void *& p)
143 p = null_ptr<void *>();
148 typedef container::VyukovMPMCCycleQueue<
150 opt::buffer< opt::v::static_buffer<void *, FreeListCapacity> >
152 , opt::value_cleaner< make_null_ptr >
156 free_list m_FreeList;
161 page_cached_allocator(
162 size_t nPageSize ///< page size in bytes
164 : base_class( nPageSize )
165 , m_FreeList( FreeListCapacity )
169 ~page_cached_allocator()
172 while ( m_FreeList.pop(pPage) )
173 base_class::free( pPage );
177 /// Allocate new page
181 if ( !m_FreeList.pop( pPage ) )
182 pPage = base_class::alloc();
186 /// Free page \p pPage
187 void free( void * pPage )
189 if ( !m_FreeList.push( pPage ))
190 base_class::free( pPage );
194 /// Implementation of opt::sizeclass_selector option
196 Default size-class selector can manage memory blocks up to 64K.
198 class CDS_EXPORT_API default_sizeclass_selector
201 /// Count of different size-classes
202 static const size_t c_nSizeClassCount = 63;
205 static const size_t c_nMaxBlockSize = 64 * 1024;
207 /// Page size of type 0 (64K)
208 static const unsigned int c_nPage64K = 64 * 1024 - 32;
210 /// Page size of type 1 (1M)
211 static const unsigned int c_nPage1M = 1024 * 1024;
213 static CDS_DATA_ALIGNMENT(128) unsigned int const m_szClassBounds[c_nSizeClassCount];
214 static size_class const m_szClass[c_nSizeClassCount];
215 static unsigned char const m_szClassMap[];
218 /// Type of size-class index
219 typedef unsigned int sizeclass_index;
222 default_sizeclass_selector();
225 /// "No size class" index
226 static const sizeclass_index c_nNoSizeClass = (unsigned int) (0 - 1);
228 /// Returns size-class count
229 static sizeclass_index size()
231 return c_nSizeClassCount;
234 /// Returns page size in bytes for given page type \p nPageType
235 static size_t page_size(size_t nPageType )
243 assert(false) ; // anything forgotten?..
248 /// Returns count of page size-class
250 This class supports pages of two types: 64K page for small objects and 1M page for other objects.
252 static size_t pageTypeCount()
257 /// Returns size-class index for \p nSize
259 For large blocks that cannot be allocated by Michael's allocator
260 the function must return -1.
262 static sizeclass_index find( size_t nSize )
264 if ( nSize > c_nMaxBlockSize ) {
265 // Too large block - allocate from system
266 return c_nNoSizeClass;
268 sizeclass_index szClass = m_szClassMap[ (nSize + 15) / 16 ];
269 assert( nSize <= m_szClassBounds[ szClass ] );
270 assert( szClass == 0 || m_szClassBounds[ szClass - 1] < nSize );
275 /// Gets details::size_class struct for size-class index \p nIndex
276 static const size_class * at( sizeclass_index nIndex )
278 assert( nIndex < size() );
279 return m_szClass + nIndex;
285 struct free_list_tag;
286 typedef boost::intrusive::list_base_hook< boost::intrusive::tag< free_list_tag > > free_list_locked_hook;
288 struct partial_list_tag;
289 typedef boost::intrusive::list_base_hook< boost::intrusive::tag< partial_list_tag > > partial_list_locked_hook;
291 struct intrusive_superblock_desc: public free_list_locked_hook, partial_list_locked_hook
296 /// List of free superblock descriptor
298 This class is a implementation of \ref opt::free_list option
300 template <class Lock, class T = details::intrusive_superblock_desc>
301 class free_list_locked: public boost::intrusive::list<T, boost::intrusive::base_hook<details::free_list_locked_hook> >
304 typedef boost::intrusive::list<T, boost::intrusive::base_hook<details::free_list_locked_hook> > base_class;
306 typedef details::free_list_locked_hook item_hook;
307 typedef Lock lock_type;
309 typedef cds::lock::scoped_lock<lock_type> auto_lock;
311 mutable lock_type m_access;
315 /// Rebinds to other item type \p T2
318 typedef free_list_locked<Lock, T2> other ; ///< rebind result
322 /// Push superblock descriptor to free-list
323 void push( T * pDesc )
325 assert( base_class::node_algorithms::inited( static_cast<item_hook *>(pDesc) ) );
326 auto_lock al(m_access);
327 base_class::push_back( *pDesc );
330 /// Pop superblock descriptor from free-list
333 auto_lock al(m_access);
334 if ( base_class::empty() )
335 return null_ptr<T *>();
336 T& rDesc = base_class::front();
337 base_class::pop_front();
338 assert( base_class::node_algorithms::inited( static_cast<item_hook *>(&rDesc) ) );
342 /// Returns current count of superblocks in free-list
345 auto_lock al(m_access);
346 return base_class::size();
350 /// List of partial filled superblock descriptor
352 This class is a implementation of \ref opt::partial_list option
354 template <class Lock, class T = details::intrusive_superblock_desc>
355 class partial_list_locked: public boost::intrusive::list<T, boost::intrusive::base_hook<details::partial_list_locked_hook> >
358 typedef boost::intrusive::list<T, boost::intrusive::base_hook<details::partial_list_locked_hook> > base_class;
360 typedef details::partial_list_locked_hook item_hook;
361 typedef Lock lock_type;
363 typedef cds::lock::scoped_lock<lock_type> auto_lock;
365 mutable lock_type m_access;
369 /// Rebinds to other item type \p T2
372 typedef partial_list_locked<Lock, T2> other ; ///< rebind result
376 /// Push a superblock \p pDesc to the list
377 void push( T * pDesc )
379 auto_lock al( m_access );
380 assert( base_class::node_algorithms::inited( static_cast<item_hook *>(pDesc) ) );
381 base_class::push_back( *pDesc );
384 /// Pop superblock from the list
387 auto_lock al( m_access );
388 if ( base_class::empty() )
389 return null_ptr<T *>();
390 T& rDesc = base_class::front();
391 base_class::pop_front();
392 assert( base_class::node_algorithms::inited( static_cast<item_hook *>(&rDesc) ) );
396 /// Removes \p pDesc descriptor from the free-list
397 bool unlink( T * pDesc )
399 assert( pDesc != null_ptr<T *>() );
400 auto_lock al( m_access );
401 // !inited(pDesc) is equal to "pDesc is being linked to partial list"
402 if ( !base_class::node_algorithms::inited( static_cast<item_hook *>(pDesc) ) ) {
403 base_class::erase( base_class::iterator_to( *pDesc ) );
409 /// Count of element in the list
412 auto_lock al( m_access );
413 return base_class::size();
417 /// Summary processor heap statistics
419 Summary heap statistics for use with Heap::summaryStat function.
423 size_t nAllocFromActive ; ///< Event count of allocation from active superblock
424 size_t nAllocFromPartial ; ///< Event count of allocation from partial superblock
425 size_t nAllocFromNew ; ///< Event count of allocation from new superblock
426 size_t nFreeCount ; ///< Count of \p free function call
427 size_t nPageAllocCount ; ///< Count of page (superblock) allocated
428 size_t nPageDeallocCount ; ///< Count of page (superblock) deallocated
429 size_t nDescAllocCount ; ///< Count of superblock descriptors
430 size_t nDescFull ; ///< Count of full superblock
431 atomic64u_t nBytesAllocated ; ///< Count of allocated bytes (for heap managed memory blocks)
432 atomic64u_t nBytesDeallocated ; ///< Count of deallocated bytes (for heap managed memory blocks)
434 size_t nSysAllocCount ; ///< Count of \p alloc and \p alloc_aligned function call (for large memory blocks that allocated directly from OS)
435 size_t nSysFreeCount ; ///< Count of \p free and \p free_aligned function call (for large memory blocks that allocated directly from OS)
436 atomic64u_t nSysBytesAllocated ; ///< Count of allocated bytes (for large memory blocks that allocated directly from OS)
437 atomic64_t nSysBytesDeallocated; ///< Count of deallocated bytes (for large memory blocks that allocated directly from OS)
439 // Internal contention indicators
440 /// CAS failure counter for updating active field of active block of \p alloc_from_active Heap internal function
442 Contention indicator. The less value is better
444 size_t nActiveDescCASFailureCount;
445 /// CAS failure counter for updating active field of active block of \p alloc_from_active Heap internal function
447 Contention indicator. The less value is better
449 size_t nActiveAnchorCASFailureCount;
450 /// CAS failure counter for updating anchor field of partial block of \p alloc_from_partial Heap internal function
452 Contention indicator. The less value is better
454 size_t nPartialDescCASFailureCount;
455 /// CAS failure counter for updating anchor field of partial block of \p alloc_from_partial Heap internal function
457 Contention indicator. The less value is better
459 size_t nPartialAnchorCASFailureCount;
463 /// Constructs empty statistics. All counters are zero.
469 /// Difference statistics
471 This operator computes difference between \p *this and \p stat and places the difference to \p this.
474 summary_stat& operator -=( const summary_stat& stat )
476 nAllocFromActive -= stat.nAllocFromActive;
477 nAllocFromPartial -= stat.nAllocFromPartial;
478 nAllocFromNew -= stat.nAllocFromNew;
479 nFreeCount -= stat.nFreeCount;
480 nPageAllocCount -= stat.nPageAllocCount;
481 nPageDeallocCount -= stat.nPageDeallocCount;
482 nDescAllocCount -= stat.nDescAllocCount;
483 nDescFull -= stat.nDescFull;
484 nBytesAllocated -= stat.nBytesAllocated;
485 nBytesDeallocated -= stat.nBytesDeallocated;
487 nSysAllocCount -= stat.nSysAllocCount;
488 nSysFreeCount -= stat.nSysFreeCount;
489 nSysBytesAllocated -= stat.nSysBytesAllocated;
490 nSysBytesDeallocated -= stat.nSysBytesDeallocated;
492 nActiveDescCASFailureCount -= stat.nActiveDescCASFailureCount;
493 nActiveAnchorCASFailureCount -= stat.nActiveAnchorCASFailureCount;
494 nPartialDescCASFailureCount -= stat.nPartialDescCASFailureCount;
495 nPartialAnchorCASFailureCount -= stat.nPartialAnchorCASFailureCount;
500 /// Clears statistics
502 All counters are set to zero.
506 memset( this, 0, sizeof(*this));
510 template <typename Stat>
511 summary_stat& add_procheap_stat( const Stat& stat )
513 nAllocFromActive += stat.allocFromActive();
514 nAllocFromPartial += stat.allocFromPartial();
515 nAllocFromNew += stat.allocFromNew();
516 nFreeCount += stat.freeCount();
517 nPageAllocCount += stat.blockAllocated();
518 nPageDeallocCount += stat.blockDeallocated();
519 nDescAllocCount += stat.descAllocCount();
520 nDescFull += stat.descFull();
521 nBytesAllocated += stat.allocatedBytes();
522 nBytesDeallocated += stat.deallocatedBytes();
524 nActiveDescCASFailureCount += stat.activeDescCASFailureCount();
525 nActiveAnchorCASFailureCount += stat.activeAnchorCASFailureCount();
526 nPartialDescCASFailureCount += stat.partialDescCASFailureCount();
527 nPartialAnchorCASFailureCount += stat.partialAnchorCASFailureCount();
532 template <typename Stat>
533 summary_stat& add_heap_stat( const Stat& stat )
535 nSysAllocCount += stat.allocCount();
536 nSysFreeCount += stat.freeCount();
538 nSysBytesAllocated += stat.allocatedBytes();
539 nSysBytesDeallocated+= stat.deallocatedBytes();
546 /// Michael's allocator
548 This class provides base functionality for Michael's allocator. It does not provide
549 the interface described by \p std::allocator, therefore, we name it as a heap, not as an allocator.
550 The heap interface is closer to semantics of \p malloc / \p free system functions.
551 The heap supports allocation of aligned and unaligned data.
553 The algorithm is based on simplified version of
554 \li [2004] M.Michael "Scalable Lock-free Dynamic Memory Allocation"
556 that, in turn, is concurrent version of well-known Hoard allocator developed by Emery Berger, see
557 \li [2002] Emery Berger "Memory Management for High-Performance Application", PhD thesis
559 This is powerful, scalable, fully customizable heap with fast-path without any locks
560 that has been developed specifically for multi-threading.
561 With opt:sys_topology you can set as many allocation arena ("processor heap") as you need.
562 You can manually bound any your thread to any arena ("processor"). With opt::sizeclass_selector option you can manage
563 allocation granularity. With opt::page_heap you can utilize any OS-provided features for page allocation
564 like \p mmap, \p VirtualAlloc etc. The heap can gather internal statistics that helps you to tune your application.
565 The opt::check_bounds feature can help you to find a memory buffer overflow.
567 Brief algorithm description from Michael's work:
569 Large blocks (greater than 64K) are allocated directly from the OS and freed directly to the OS. For smaller block sizes,
570 the heap is composed of large superblocks (64 KB or 1MB size). Each superblock is divided into multiple equal-sized blocks.
571 Superblocks are distributed among size classes based on their block sizes. Each size class contains multiple processor
572 heaps proportional to the number of processors in the system. A processor heap contains at most one active superblock.
573 An active superblock contains one or more blocks available for reservation that are guaranteed to be available to threads
574 that reach them through the header of the processor heap. Each superblock is associated with a descriptor. Each allocated
575 block contains a prefix (8 bytes) that points to the descriptor of its superblock. On the first call to malloc, the static
576 structures for the size classes and processor heaps (about 16 KB for a 16 processor machine) are allocated and initialized
577 in a lock-free manner.
579 Malloc starts by identifying the appropriate processor heap, based on the requested block size and the identity of
580 the calling thread. Typically, the heap already has an active superblock with blocks available for reservation. The thread
581 atomically reads a pointer to the descriptor of the active superblock and reserves a block. Next, the thread atomically
582 pops a block from that superblock and updates its descriptor. A typical free pushes the freed block into the list of
583 available blocks of its original superblock by atomically updating its descriptor.
585 <b>Constraint</b>: one superblock may contain up to 2048 block. This restriction imposes a restriction on the maximum
588 Available \p Options:
589 - \ref opt::sys_topology - class that describes system topology needed for allocator.
590 Default is \p cds::OS::topology (see cds::OS::Win32::topology for interface description)
591 - \ref opt::system_heap - option setter for an allocator for large blocks that is used for direct allocation from OS.
592 Default is \ref malloc_heap.
593 - \ref opt::aligned_heap - option setter for a heap used for internal aligned memory management.
594 Default is \ref aligned_malloc_heap
595 - \ref opt::page_heap - option setter for a heap used for page (superblock) allocation of 64K/1M size.
596 Default is \ref page_cached_allocator
597 - \ref opt::sizeclass_selector - option setter for a class used to select appropriate size-class
598 for incoming allocation request.
599 Default is \ref default_sizeclass_selector
600 - \ref opt::free_list - option setter for a class to manage a list of free superblock descriptors
601 Default is \ref free_list_locked
602 - \ref opt::partial_list - option setter for a class to manage a list of partial filled superblocks
603 Default is \ref partial_list_locked
604 - \ref opt::procheap_stat - option setter for a class to gather internal statistics for memory allocation
605 that is maintained by the heap.
606 Default is \ref procheap_empty_stat
607 - \ref opt::os_allocated_stat - option setter for a class to gather internal statistics for large block
608 allocation. Term "large block" is specified by the size-class selector (see \ref opt::sizeclass_selector)
609 and it is 64K for \ref default_sizeclass_selector. Any block that is large that 64K is allocated from
610 OS directly. \p os_allocated_stat option is set a class to gather statistics for large blocks.
611 Default is \ref os_allocated_empty
612 - \ref opt::check_bounds - a bound checker.
613 Default is no bound checker (cds::opt::none)
616 The heap is the basic building block for your allocator or <tt> operator new</tt> implementation.
619 #include <cds/memory/michael/allocator.h>
621 // Heap with explicitly defined options:
622 cds::memory::michael::Heap<
623 opt::aligned_heap< aligned_malloc_heap >,
624 opt::page_heap< page_cached_allocator<16, malloc_heap> >
627 // Heap with default options:
628 cds::memory::michael::Heap<> myDefHeap;
631 \par How to make std-like allocator
633 There are serious differencies of heap and <tt>std::allocator</tt> interface:
634 - Heap is stateful, and \p std::allocator is stateless.
635 - Heap has much more template parameters than \p std::allocator
636 - Heap has low-level interface for memory allocating only unlike the allocator
637 interface that can construct/destroy objects of any type T.
639 To convert heap interface into \p std::allocator -like interface you should:
640 - Declare object of class cds::memory::michael::Heap specifying the necessary
641 template parameters; this is usually static object
642 - Create a class with \p std::allocator interface that uses the function of heap.
644 #include <cds/memory/michael/allocator.h>
647 class MichaelAllocator
649 typedef std::allocator<T> std_allocator;
650 typedef cds::memory::michael::Heap<> michael_heap;
652 // Michael heap static object
653 static michael_heap s_Heap;
655 // Declare typedefs from std::allocator
656 typedef typename std_allocator::const_pointer const_pointer;
657 typedef typename std_allocator::pointer pointer;
658 typedef typename std_allocator::const_reference const_reference;
659 typedef typename std_allocator::reference reference;
660 typedef typename std_allocator::difference_type difference_type;
661 typedef typename std_allocator::size_type size_type;
662 typedef typename std_allocator::value_type value_type;
664 // Allocation function
665 pointer allocate( size_type _Count, const void* _Hint )
667 return reinterpret_cast<pointer>( s_Heap.alloc( sizeof(T) * _Count ));
670 // Deallocation function
671 void deallocate( pointer _Ptr, size_type _Count )
676 // Other std::allocator specific functions: address, construct, destroy, etc.
679 // Rebinding allocator to other type
680 template <class _Other>
682 typedef MichaelAllocator<_Other> other;
687 MichaelAllocator::michael_heap MichaelAllocator::s_Heap;
691 #ifdef CDS_CXX11_VARIADIC_TEMPLATE_SUPPORT
692 template <typename... Options>
695 typename O1 = opt::none,
696 typename O2 = opt::none,
697 typename O3 = opt::none,
698 typename O4 = opt::none,
699 typename O5 = opt::none,
700 typename O6 = opt::none,
701 typename O7 = opt::none,
702 typename O8 = opt::none,
703 typename O9 = opt::none,
704 typename O10= opt::none
711 static const unsigned int c_nAlignment = cds::c_nCacheLineSize;
712 static const unsigned int c_nDefaultBlockAlignment = 8;
714 struct default_options {
715 typedef cds::OS::topology sys_topology;
716 typedef malloc_heap system_heap;
717 typedef page_cached_allocator<> page_heap;
718 typedef aligned_malloc_heap aligned_heap;
719 typedef default_sizeclass_selector sizeclass_selector;
720 typedef free_list_locked<cds::lock::Spin> free_list;
721 typedef partial_list_locked<cds::lock::Spin> partial_list;
722 typedef procheap_empty_stat procheap_stat;
723 typedef os_allocated_empty os_allocated_stat;
724 typedef cds::opt::none check_bounds;
730 #ifdef CDS_CXX11_VARIADIC_TEMPLATE_SUPPORT
731 typedef typename opt::make_options<default_options, Options...>::type options;
733 typedef typename opt::make_options<default_options, O1, O2, O3, O4, O5, O6, O7, O8, O9, O10 >::type options;
738 typedef unsigned char byte;
741 typedef typename options::sys_topology sys_topology ; ///< effective system topology
742 typedef typename options::system_heap system_heap ; ///< effective system heap
743 typedef typename options::aligned_heap aligned_heap ; ///< effective aligned heap
744 typedef typename options::sizeclass_selector sizeclass_selector ; ///< effective sizeclass selector
745 typedef typename options::page_heap page_heap ; ///< effective page heap
746 typedef typename options::procheap_stat procheap_stat ; ///< effective processor heap statistics
747 typedef typename options::os_allocated_stat os_allocated_stat ; ///< effective OS-allocated memory statistics
748 typedef details::bound_checker_selector< typename options::check_bounds > bound_checker ; ///< effective bound checker
750 // forward declarations
752 struct superblock_desc;
753 struct processor_heap_base;
754 struct processor_desc;
757 /// Superblock states
759 A superblock can be in one of four states: \p ACTIVE, \p FULL,
760 \p PARTIAL, or \p EMPTY. A superblock is \p ACTIVE if it is the active
761 superblock in a heap, or if a thread intends to try to install it
762 as such. A superblock is \p FULL if all its blocks are either allocated
763 or reserved. A superblock is \p PARTIAL if it is not \p ACTIVE
764 and contains unreserved available blocks. A superblock is
765 \p EMPTY if all its blocks are free and it is not \p ACTIVE.
767 enum superblock_state {
768 SBSTATE_ACTIVE = 0, ///< superblock is active
769 SBSTATE_FULL = 1, ///< superblock is full
770 SBSTATE_PARTIAL = 2, ///< superblock is partially allocated
771 SBSTATE_EMPTY = 3 ///< superblock is empty and may be freed
774 static const size_t c_nMaxBlockInSuperBlock = 1024 * 2 ; ///< Max count of blocks in superblock (2 ** 11)
776 /// Anchor of the superblock descriptor. Updated by CAS
778 unsigned long long avail:11 ; ///< index of first available block in the superblock
779 unsigned long long count:11 ; ///< number of unreserved blocks in the superblock
780 unsigned long long state: 2 ; ///< state of the superblock (see \ref superblock_state enum)
781 unsigned long long tag:40 ; ///< ABA prevention tag
784 /// Superblock descriptor
785 struct superblock_desc
786 : public options::free_list::item_hook
787 , public options::partial_list::item_hook
789 CDS_ATOMIC::atomic<anchor_tag> anchor ; ///< anchor, see \ref anchor_tag
790 byte * pSB ; ///< ptr to superblock
791 processor_heap_base * pProcHeap ; ///< pointer to owner processor heap
792 unsigned int nBlockSize ; ///< block size in bytes
793 unsigned int nCapacity ; ///< superblock size/block size
797 : pSB( null_ptr<byte *>() )
798 , pProcHeap( null_ptr<processor_heap_base *>() )
804 typedef typename options::free_list::template rebind<superblock_desc>::other free_list;
805 typedef typename options::partial_list::template rebind<superblock_desc>::other partial_list;
808 #if CDS_BUILD_BITS == 32
809 /// Allocated block header
811 Each allocated block has 8-byte header.
812 The header contains pointer to owner superblock descriptor and the redirection flag.
813 If the block has been allocated by \ref alloc, then the redirection flag is 0 and the block's structure is:
816 | blockHeader | [8 byte] pointer to owner superblock (flag=0)
818 | | <- memory allocated
823 If the block has been allocated by \ref alloc_aligned, then it is possible that pointer returned must be aligned.
824 In this case the redirection flag is 1 and the block's structure is:
827 +-> | blockHeader | [8 byte] pointer to owner superblock (flag=0)
833 +-- | blockHeader | [8 byte] pointer to block head (flag=1)
835 | | <- memory allocated
850 superblock_desc * pDesc ; // pointer to superblock descriptor
851 atomic32u_t nSize ; // block size (allocated form OS)
856 void set( superblock_desc * pdesc, atomic32u_t isAligned )
859 nFlags = isAligned ? bitAligned : 0;
862 superblock_desc * desc()
864 assert( (nFlags & bitOSAllocated) == 0 );
865 return (nFlags & bitAligned) ? reinterpret_cast<block_header *>( pDesc )->desc() : pDesc;
868 block_header * begin()
870 return (nFlags & bitAligned) ? reinterpret_cast<block_header *>(pDesc) : this;
873 bool isAligned() const
875 return (nFlags & bitAligned) != 0;
878 bool isOSAllocated() const
880 return (nFlags & bitOSAllocated) != 0;
883 void setOSAllocated( size_t sz )
886 nFlags = bitOSAllocated;
889 size_t getOSAllocSize() const
891 assert( isOSAllocated() );
897 #elif CDS_BUILD_BITS == 64
905 typedef cds::details::marked_ptr<superblock_desc, bitAligned|bitOSAllocated> marked_desc_ptr;
906 // If bitOSAllocated is set the pDesc contains size of memory block
908 marked_desc_ptr pDesc;
910 void set( superblock_desc * pdesc, atomic32u_t isAligned )
912 pDesc = marked_desc_ptr( pdesc, isAligned );
915 superblock_desc * desc()
917 assert( !isOSAllocated() );
918 return (pDesc.bits() & bitAligned) ? reinterpret_cast<block_header *>( pDesc.ptr() )->desc() : pDesc.ptr();
921 block_header * begin()
923 return (pDesc.bits() & bitAligned) ? reinterpret_cast<block_header *>( pDesc.ptr() ) : this;
926 bool isAligned() const
928 return (pDesc.bits() & bitAligned) != 0;
931 bool isOSAllocated() const
933 return (pDesc.bits() & bitOSAllocated) != 0;
936 void setOSAllocated( size_t nSize )
939 pDesc = marked_desc_ptr( reinterpret_cast<superblock_desc *>(nSize << 2), bitOSAllocated );
942 size_t getOSAllocSize() const
944 assert( isOSAllocated() );
945 return reinterpret_cast<uptr_atomic_t>( pDesc.ptr() ) >> 2;
951 # error "Unexpected value of CDS_BUILD_BITS"
952 #endif // CDS_BUILD_BITS
955 struct free_block_header: block_header {
956 unsigned int nNextFree;
960 #if CDS_BUILD_BITS == 32
961 /// Processor heap's \p active field
963 The \p active field in the processor heap structure is primarily a pointer to the descriptor
964 of the active superblock owned by the processor heap. If the value of \p active is not \p NULL, it is
965 guaranteed that the active superblock has at least one block available for reservation.
966 Since the addresses of superblock descriptors can be guaranteed to be aligned to some power
967 of 2 (e.g., 64), as an optimization, we can carve a credits subfield to hold the number
968 of blocks available for reservation in the active superblock less one. That is, if the value
969 of credits is n, then the active superblock contains n+1 blocks available for reservation
970 through the \p active field. Note that the number of blocks in a superblock is not limited
971 to the maximum reservations that can be held in the credits subfield. In a typical malloc operation
972 (i.e., when \p active != \p NULL and \p credits > 0), the thread reads \p active and then
973 atomically decrements credits while validating that the active superblock is still valid.
977 superblock_desc * pDesc;
978 atomic32u_t nCredits;
981 static const unsigned int c_nMaxCredits = 0 - 1;
984 CDS_CONSTEXPR active_tag() CDS_NOEXCEPT
985 : pDesc(null_ptr<superblock_desc *>())
989 # ifdef CDS_CXX11_EXPLICITLY_DEFAULTED_FUNCTION_SUPPORT
990 active_tag( active_tag const& ) CDS_NOEXCEPT_DEFAULTED = default;
991 ~active_tag() CDS_NOEXCEPT_DEFAULTED = default;
992 active_tag& operator=(active_tag const& ) CDS_NOEXCEPT_DEFAULTED = default;
993 # if defined(CDS_MOVE_SEMANTICS_SUPPORT) && !defined(CDS_DISABLE_DEFAULT_MOVE_CTOR)
994 active_tag( active_tag&& ) CDS_NOEXCEPT_DEFAULTED = default;
995 active_tag& operator=(active_tag&&) CDS_NOEXCEPT_DEFAULTED = default;
999 /// Returns pointer to superblock descriptor
1000 superblock_desc * ptr() const
1005 /// Sets superblock descriptor
1006 void ptr( superblock_desc * p )
1011 unsigned int credits() const
1016 void credits( unsigned int n )
1023 pDesc = null_ptr<superblock_desc *>();
1027 void set( superblock_desc * pSB, unsigned int n )
1034 #elif CDS_BUILD_BITS == 64
1039 static const unsigned int c_nMaxCredits = c_nAlignment - 1 ; // 0x003F;
1041 typedef cds::details::marked_ptr<superblock_desc, c_nMaxCredits> marked_desc_ptr;
1042 marked_desc_ptr pDesc;
1045 active_tag() CDS_NOEXCEPT
1046 : pDesc( null_ptr<superblock_desc *>() )
1048 # ifdef CDS_CXX11_EXPLICITLY_DEFAULTED_FUNCTION_SUPPORT
1049 // Clang 3.1: error: first argument to atomic operation must be a pointer to a trivially-copyable type
1050 //active_tag() CDS_NOEXCEPT_DEFAULTED = default;
1051 active_tag( active_tag const& ) CDS_NOEXCEPT_DEFAULTED = default;
1052 ~active_tag() CDS_NOEXCEPT_DEFAULTED = default;
1053 active_tag& operator=(active_tag const&) CDS_NOEXCEPT_DEFAULTED = default;
1054 # if defined(CDS_MOVE_SEMANTICS_SUPPORT) && !defined(CDS_DISABLE_DEFAULT_MOVE_CTOR)
1055 active_tag( active_tag&& ) CDS_NOEXCEPT_DEFAULTED = default;
1056 active_tag& operator=(active_tag&&) CDS_NOEXCEPT_DEFAULTED = default;
1059 superblock_desc * ptr() const
1064 void ptr( superblock_desc * p )
1066 assert( (reinterpret_cast<uptr_atomic_t>(p) & c_nMaxCredits) == 0 );
1067 pDesc = marked_desc_ptr( p, pDesc.bits());
1070 unsigned int credits()
1072 return (unsigned int) pDesc.bits();
1075 void credits( unsigned int n )
1077 assert( n <= c_nMaxCredits );
1078 pDesc = marked_desc_ptr( pDesc.ptr(), n );
1083 pDesc = marked_desc_ptr();
1086 void set( superblock_desc * pSB, unsigned int n )
1088 assert( (reinterpret_cast<uptr_atomic_t>(pSB) & c_nMaxCredits) == 0 );
1089 pDesc = marked_desc_ptr( pSB, n );
1095 # error "Unexpected value of CDS_BUILD_BITS"
1096 #endif // CDS_BUILD_BITS
1100 struct processor_heap_base
1102 CDS_DATA_ALIGNMENT(8) CDS_ATOMIC::atomic<active_tag> active; ///< pointer to the descriptor of active superblock owned by processor heap
1103 processor_desc * pProcDesc ; ///< pointer to parent processor descriptor
1104 const size_class * pSizeClass ; ///< pointer to size class
1105 CDS_ATOMIC::atomic<superblock_desc *> pPartial ; ///< pointer to partial filled superblock (may be NULL)
1106 partial_list partialList ; ///< list of partial filled superblocks owned by the processor heap
1107 unsigned int nPageIdx ; ///< page size-class index, \ref c_nPageSelfAllocation - "small page"
1109 /// Small page marker
1111 If page is small and can be allocated by the Heap, the \p nPageIdx value is \p c_nPageSelfAllocation.
1112 This optimization allows to allocate system memory more regularly, in blocks of 1M that leads
1113 to less memory fragmentation.
1115 static const unsigned int c_nPageSelfAllocation = (unsigned int) -1;
1117 procheap_stat stat ; ///< heap statistics
1118 //processor_heap_statistics stat;
1121 processor_heap_base() CDS_NOEXCEPT
1122 : pProcDesc( null_ptr<processor_desc *>() )
1123 , pSizeClass( null_ptr<size_class *>() )
1124 , pPartial( null_ptr<superblock_desc *>() )
1126 assert( (reinterpret_cast<uptr_atomic_t>(this) & (c_nAlignment - 1)) == 0 );
1130 /// Get partial superblock owned by the processor heap
1131 superblock_desc * get_partial()
1133 superblock_desc * pDesc = pPartial.load(CDS_ATOMIC::memory_order_acquire);
1136 pDesc = partialList.pop();
1139 } while ( !pPartial.compare_exchange_weak( pDesc, null_ptr<superblock_desc *>(), CDS_ATOMIC::memory_order_release, CDS_ATOMIC::memory_order_relaxed));
1141 //assert( pDesc == NULL || free_desc_list<superblock_desc>::node_algorithms::inited( static_cast<sb_free_list_hook *>(pDesc) ));
1142 //assert( pDesc == NULL || partial_desc_list<superblock_desc>::node_algorithms::inited( static_cast<sb_partial_list_hook *>(pDesc) ) );
1146 /// Add partial superblock \p pDesc to the list
1147 void add_partial( superblock_desc * pDesc )
1149 assert( pPartial != pDesc );
1150 //assert( partial_desc_list<superblock_desc>::node_algorithms::inited( static_cast<sb_partial_list_hook *>(pDesc) ) );
1152 superblock_desc * pCur = null_ptr<superblock_desc *>();
1153 if ( !pPartial.compare_exchange_strong(pCur, pDesc, CDS_ATOMIC::memory_order_acq_rel, CDS_ATOMIC::memory_order_relaxed) )
1154 partialList.push( pDesc );
1158 /// Remove superblock \p pDesc from the list of partial superblock
1159 bool unlink_partial( superblock_desc * pDesc )
1161 return partialList.unlink( pDesc );
1165 /// Aligned superblock descriptor
1166 typedef typename cds::details::type_padding<processor_heap_base, c_nAlignment>::type processor_heap;
1168 /// Processor descriptor
1169 struct processor_desc
1171 processor_heap * arrProcHeap ; ///< array of processor heap
1172 free_list listSBDescFree ; ///< List of free superblock descriptors
1173 page_heap * pageHeaps ; ///< array of page heap (one for each page size)
1177 : arrProcHeap( null_ptr<processor_heap *>() )
1178 , pageHeaps( null_ptr<page_heap *>() )
1185 sys_topology m_Topology ; ///< System topology
1186 system_heap m_LargeHeap ; ///< Heap for large block
1187 aligned_heap m_AlignedHeap ; ///< Internal aligned heap
1188 sizeclass_selector m_SizeClassSelector ; ///< Size-class selector
1189 CDS_ATOMIC::atomic<processor_desc *> * m_arrProcDesc ; ///< array of pointers to the processor descriptors
1190 unsigned int m_nProcessorCount ; ///< Processor count
1191 bound_checker m_BoundChecker ; ///< Bound checker
1193 os_allocated_stat m_OSAllocStat ; ///< OS-allocated memory statistics
1198 /// Allocates large block from system memory
1199 block_header * alloc_from_OS( size_t nSize )
1201 block_header * p = reinterpret_cast<block_header *>( m_LargeHeap.alloc( nSize ) );
1202 m_OSAllocStat.incBytesAllocated( nSize );
1203 p->setOSAllocated( nSize );
1207 /// Allocates from the active superblock if it possible
1208 block_header * alloc_from_active( processor_heap * pProcHeap )
1210 active_tag oldActive;
1211 int nCollision = -1;
1216 oldActive = pProcHeap->active.load(CDS_ATOMIC::memory_order_acquire);
1217 if ( !oldActive.ptr() )
1218 return null_ptr<block_header *>();
1219 unsigned int nCredits = oldActive.credits();
1220 active_tag newActive ; // default = 0
1221 if ( nCredits != 0 ) {
1222 newActive = oldActive;
1223 newActive.credits( nCredits - 1 );
1225 if ( pProcHeap->active.compare_exchange_strong( oldActive, newActive, CDS_ATOMIC::memory_order_release, CDS_ATOMIC::memory_order_relaxed ))
1230 pProcHeap->stat.incActiveDescCASFailureCount( nCollision );
1233 superblock_desc * pDesc = oldActive.ptr();
1235 anchor_tag oldAnchor;
1236 anchor_tag newAnchor;
1238 unsigned int nMoreCredits = 0;
1243 newAnchor = oldAnchor = pDesc->anchor.load(CDS_ATOMIC::memory_order_acquire);
1245 assert( oldAnchor.avail < pDesc->nCapacity );
1246 pAddr = pDesc->pSB + oldAnchor.avail * (unsigned long long) pDesc->nBlockSize;
1247 newAnchor.avail = reinterpret_cast<free_block_header *>( pAddr )->nNextFree;
1250 if ( oldActive.credits() == 0 ) {
1251 // state must be ACTIVE
1252 if ( oldAnchor.count == 0 )
1253 newAnchor.state = SBSTATE_FULL;
1255 nMoreCredits = oldAnchor.count < active_tag::c_nMaxCredits ? ((unsigned int) oldAnchor.count) : active_tag::c_nMaxCredits;
1256 newAnchor.count -= nMoreCredits;
1259 } while ( !pDesc->anchor.compare_exchange_strong( oldAnchor, newAnchor, CDS_ATOMIC::memory_order_release, CDS_ATOMIC::memory_order_relaxed ));
1262 pProcHeap->stat.incActiveAnchorCASFailureCount( nCollision );
1264 assert( newAnchor.state != SBSTATE_EMPTY );
1266 if ( newAnchor.state == SBSTATE_FULL )
1267 pProcHeap->stat.incDescFull();
1268 if ( oldActive.credits() == 0 && oldAnchor.count > 0 )
1269 update_active( pProcHeap, pDesc, nMoreCredits );
1271 pProcHeap->stat.incAllocFromActive();
1273 // block_header fields is not needed to setup
1274 // It was set in alloc_from_new_superblock
1275 assert( reinterpret_cast<block_header *>( pAddr )->desc() == pDesc );
1276 assert( !reinterpret_cast<block_header *>( pAddr )->isOSAllocated() );
1277 assert( !reinterpret_cast<block_header *>( pAddr )->isAligned() );
1279 return reinterpret_cast<block_header *>( pAddr );
1282 /// Allocates from a partial filled superblock if it possible
1283 block_header * alloc_from_partial( processor_heap * pProcHeap )
1286 superblock_desc * pDesc = pProcHeap->get_partial();
1288 return null_ptr<block_header *>();
1291 anchor_tag oldAnchor;
1292 anchor_tag newAnchor;
1294 unsigned int nMoreCredits = 0;
1296 int nCollision = -1;
1300 newAnchor = oldAnchor = pDesc->anchor.load(CDS_ATOMIC::memory_order_acquire);
1301 if ( oldAnchor.state == SBSTATE_EMPTY ) {
1302 free_superblock( pDesc );
1306 nMoreCredits = ((unsigned int)(oldAnchor.count - 1)) < active_tag::c_nMaxCredits ? (unsigned int)(oldAnchor.count - 1) : active_tag::c_nMaxCredits;
1307 newAnchor.count -= nMoreCredits + 1;
1308 newAnchor.state = (nMoreCredits > 0) ? SBSTATE_ACTIVE : SBSTATE_FULL;
1310 } while ( !pDesc->anchor.compare_exchange_strong(oldAnchor, newAnchor, CDS_ATOMIC::memory_order_release, CDS_ATOMIC::memory_order_relaxed) );
1313 pProcHeap->stat.incPartialDescCASFailureCount( nCollision );
1315 if ( newAnchor.state == SBSTATE_FULL )
1316 pProcHeap->stat.incDescFull();
1318 // Now, the thread is guaranteed to have reserved one or more blocks
1319 // pop reserved block
1325 newAnchor = oldAnchor = pDesc->anchor.load(CDS_ATOMIC::memory_order_acquire);
1327 assert( oldAnchor.avail < pDesc->nCapacity );
1328 pAddr = pDesc->pSB + oldAnchor.avail * pDesc->nBlockSize;
1329 newAnchor.avail = reinterpret_cast<free_block_header *>( pAddr )->nNextFree;
1331 } while ( !pDesc->anchor.compare_exchange_strong(oldAnchor, newAnchor, CDS_ATOMIC::memory_order_release, CDS_ATOMIC::memory_order_relaxed) );
1334 pProcHeap->stat.incPartialAnchorCASFailureCount( nCollision );
1336 assert( newAnchor.state != SBSTATE_EMPTY );
1338 pProcHeap->stat.incAllocFromPartial();
1340 if ( nMoreCredits > 0 )
1341 update_active( pProcHeap, pDesc, nMoreCredits );
1343 // block_header fields is not needed to setup
1344 // It was set in alloc_from_new_superblock
1345 assert( reinterpret_cast<block_header *>( pAddr )->desc() == pDesc );
1346 assert( !reinterpret_cast<block_header *>( pAddr )->isAligned() );
1347 assert( !reinterpret_cast<block_header *>( pAddr )->isOSAllocated() );
1349 return reinterpret_cast<block_header *>( pAddr );
1352 /// Allocates from the new superblock
1353 block_header * alloc_from_new_superblock( processor_heap * pProcHeap )
1355 superblock_desc * pDesc = new_superblock_desc( pProcHeap );
1356 assert( pDesc != null_ptr<superblock_desc *>() );
1357 pDesc->pSB = new_superblock_buffer( pProcHeap );
1359 anchor_tag anchor = pDesc->anchor.load(CDS_ATOMIC::memory_order_relaxed);
1362 // Make single-linked list of free blocks in superblock
1363 byte * pEnd = pDesc->pSB + pDesc->nCapacity * pDesc->nBlockSize;
1364 unsigned int nNext = 0;
1365 const unsigned int nBlockSize = pDesc->nBlockSize;
1366 for ( byte * p = pDesc->pSB; p < pEnd; p += nBlockSize ) {
1367 reinterpret_cast<block_header *>( p )->set( pDesc, 0 );
1368 reinterpret_cast<free_block_header *>( p )->nNextFree = ++nNext;
1370 reinterpret_cast<free_block_header *>( pEnd - nBlockSize )->nNextFree = 0;
1372 active_tag newActive;
1373 newActive.set( pDesc, ( (pDesc->nCapacity - 1 < active_tag::c_nMaxCredits) ? pDesc->nCapacity - 1 : active_tag::c_nMaxCredits ) - 1 );
1375 anchor.count = pDesc->nCapacity - 1 - (newActive.credits() + 1);
1376 anchor.state = SBSTATE_ACTIVE;
1377 pDesc->anchor.store(anchor, CDS_ATOMIC::memory_order_relaxed);
1379 active_tag curActive;
1380 if ( pProcHeap->active.compare_exchange_strong( curActive, newActive, CDS_ATOMIC::memory_order_release, CDS_ATOMIC::memory_order_relaxed )) {
1381 pProcHeap->stat.incAllocFromNew();
1382 //reinterpret_cast<block_header *>( pDesc->pSB )->set( pDesc, 0 );
1383 return reinterpret_cast<block_header *>( pDesc->pSB );
1386 free_superblock( pDesc );
1387 return null_ptr<block_header *>();
1390 /// Find appropriate processor heap based on size-class selected
1391 processor_heap * find_heap( typename sizeclass_selector::sizeclass_index nSizeClassIndex )
1393 assert( nSizeClassIndex < m_SizeClassSelector.size() );
1395 unsigned int nProcessorId = m_Topology.current_processor();
1396 assert( nProcessorId < m_nProcessorCount );
1398 if ( nProcessorId >= m_nProcessorCount )
1401 processor_desc * pDesc = m_arrProcDesc[ nProcessorId ].load( CDS_ATOMIC::memory_order_relaxed );
1404 processor_desc * pNewDesc = new_processor_desc( nProcessorId );
1405 if ( m_arrProcDesc[nProcessorId].compare_exchange_strong( pDesc, pNewDesc, CDS_ATOMIC::memory_order_release, CDS_ATOMIC::memory_order_relaxed ) ) {
1409 free_processor_desc( pNewDesc );
1412 return &( pDesc->arrProcHeap[ nSizeClassIndex ] );
1415 /// Updates active field of processor heap \p pProcHeap
1416 void update_active( processor_heap * pProcHeap, superblock_desc * pDesc, unsigned int nCredits )
1418 assert( pProcHeap == pDesc->pProcHeap );
1420 active_tag nullActive;
1421 active_tag newActive;
1422 newActive.set( pDesc, nCredits - 1 );
1424 if ( pProcHeap->active.compare_exchange_strong( nullActive, newActive, CDS_ATOMIC::memory_order_seq_cst, CDS_ATOMIC::memory_order_relaxed ) )
1427 // Someone installed another active superblock.
1428 // Return credits to superblock and make it partial
1430 anchor_tag oldAnchor;
1431 anchor_tag newAnchor;
1434 newAnchor = oldAnchor = pDesc->anchor.load(CDS_ATOMIC::memory_order_acquire);
1435 newAnchor.count += nCredits;
1436 newAnchor.state = SBSTATE_PARTIAL;
1437 } while ( !pDesc->anchor.compare_exchange_weak( oldAnchor, newAnchor, CDS_ATOMIC::memory_order_release, CDS_ATOMIC::memory_order_relaxed ));
1439 pDesc->pProcHeap->add_partial( pDesc );
1442 /// Allocates new processor descriptor
1443 processor_desc * new_processor_desc( unsigned int nProcessorId )
1445 processor_desc * pDesc;
1446 const size_t nPageHeapCount = m_SizeClassSelector.pageTypeCount();
1449 Processor descriptor layout
1451 proc_desc - 64-byte alignment
1452 page_heap[0] 64-byte alignment
1453 page_heap[1] 64-byte alignment
1455 page_heap[P] 64-byte alignment
1457 proc_heap[0] 64-byte alignment
1458 proc_heap[1] 64-byte alignment
1460 proc_heap[N] 64-byte alignment
1463 const size_t szDesc =
1464 ( sizeof(processor_desc)
1465 + sizeof(pDesc->pageHeaps[0]) * nPageHeapCount
1470 const size_t szTotal = szDesc * c_nAlignment + sizeof(processor_heap) * m_SizeClassSelector.size();
1472 static_assert( (sizeof(processor_heap) % c_nAlignment) == 0, "sizeof(processor_heap) error" );
1474 pDesc = new( m_AlignedHeap.alloc( szTotal, c_nAlignment ) ) processor_desc;
1476 pDesc->pageHeaps = reinterpret_cast<page_heap *>( pDesc + 1 );
1477 for ( size_t i = 0; i < nPageHeapCount; ++i )
1478 new (pDesc->pageHeaps + i) page_heap( m_SizeClassSelector.page_size(i));
1480 // initialize processor heaps
1481 pDesc->arrProcHeap =
1482 reinterpret_cast<processor_heap *>(
1483 reinterpret_cast<uptr_atomic_t>(reinterpret_cast<byte *>(pDesc + 1) + sizeof(pDesc->pageHeaps[0]) * nPageHeapCount + c_nAlignment - 1)
1484 & ~(uptr_atomic_t(c_nAlignment) - 1)
1487 processor_heap * pProcHeap = pDesc->arrProcHeap;
1488 processor_heap * pProcHeapEnd = pDesc->arrProcHeap + m_SizeClassSelector.size();
1489 for ( unsigned int i = 0; pProcHeap != pProcHeapEnd; ++pProcHeap, ++i ) {
1490 new (pProcHeap) processor_heap();
1491 pProcHeap->pProcDesc = pDesc;
1492 pProcHeap->pSizeClass = m_SizeClassSelector.at(i);
1493 if ( m_SizeClassSelector.find( pProcHeap->pSizeClass->nSBSize ) != sizeclass_selector::c_nNoSizeClass )
1494 pProcHeap->nPageIdx = processor_heap::c_nPageSelfAllocation;
1496 pProcHeap->nPageIdx = pProcHeap->pSizeClass->nSBSizeIdx;
1502 void free_processor_heap( processor_heap * pProcHeap )
1504 if ( pProcHeap->nPageIdx == processor_heap::c_nPageSelfAllocation ) {
1505 superblock_desc * pDesc;
1507 for ( pDesc = pProcHeap->partialList.pop(); pDesc; pDesc = pProcHeap->partialList.pop()) {
1509 m_AlignedHeap.free( pDesc );
1512 superblock_desc * pPartial = pProcHeap->pPartial.load(CDS_ATOMIC::memory_order_relaxed);
1514 free( pPartial->pSB );
1515 m_AlignedHeap.free( pPartial );
1518 pDesc = pProcHeap->active.load(CDS_ATOMIC::memory_order_relaxed).ptr();
1521 m_AlignedHeap.free( pDesc );
1525 page_heap& pageHeap = pProcHeap->pProcDesc->pageHeaps[pProcHeap->nPageIdx];
1526 superblock_desc * pDesc;
1528 for ( pDesc = pProcHeap->partialList.pop(); pDesc; pDesc = pProcHeap->partialList.pop()) {
1529 pageHeap.free( pDesc->pSB );
1530 m_AlignedHeap.free( pDesc );
1533 superblock_desc * pPartial = pProcHeap->pPartial.load(CDS_ATOMIC::memory_order_relaxed);
1535 pageHeap.free( pPartial->pSB );
1536 m_AlignedHeap.free( pPartial );
1539 pDesc = pProcHeap->active.load(CDS_ATOMIC::memory_order_relaxed).ptr();
1541 pageHeap.free( pDesc->pSB );
1542 m_AlignedHeap.free( pDesc );
1545 pProcHeap->~processor_heap();
1548 /// Frees processor descriptor
1549 void free_processor_desc( processor_desc * pDesc )
1551 const size_t nPageHeapCount = m_SizeClassSelector.pageTypeCount();
1553 for (unsigned int j = 0; j < m_SizeClassSelector.size(); ++j )
1554 free_processor_heap( pDesc->arrProcHeap + j );
1556 for ( superblock_desc * pSBDesc = pDesc->listSBDescFree.pop(); pSBDesc; pSBDesc = pDesc->listSBDescFree.pop())
1557 m_AlignedHeap.free( pSBDesc );
1559 for (size_t i = 0; i < nPageHeapCount; ++i )
1560 (pDesc->pageHeaps + i)->page_heap::~page_heap();
1562 //m_IntHeap.free( pDesc->pageHeaps );
1563 pDesc->pageHeaps = null_ptr<page_heap *>();
1565 pDesc->processor_desc::~processor_desc();
1566 m_AlignedHeap.free( pDesc );
1569 /// Allocates new superblock descriptor
1570 superblock_desc * new_superblock_desc( processor_heap * pProcHeap )
1573 superblock_desc * pDesc = pProcHeap->pProcDesc->listSBDescFree.pop();
1574 if ( pDesc == null_ptr<superblock_desc *>() ) {
1575 pDesc = new( m_AlignedHeap.alloc(sizeof(superblock_desc), c_nAlignment ) ) superblock_desc;
1576 assert( (uptr_atomic_t(pDesc) & (c_nAlignment - 1)) == 0 );
1578 anchor = pDesc->anchor.load( CDS_ATOMIC::memory_order_relaxed );
1580 pDesc->anchor.store( anchor, CDS_ATOMIC::memory_order_relaxed );
1582 pProcHeap->stat.incDescAllocCount();
1584 pDesc->nBlockSize = pProcHeap->pSizeClass->nBlockSize;
1585 pDesc->nCapacity = pProcHeap->pSizeClass->nCapacity;
1586 assert( pDesc->nCapacity <= c_nMaxBlockInSuperBlock );
1587 pDesc->pProcHeap = pProcHeap;
1589 anchor = pDesc->anchor.load( CDS_ATOMIC::memory_order_relaxed );
1591 pDesc->anchor.store( anchor, CDS_ATOMIC::memory_order_relaxed );
1596 /// Allocates superblock page
1597 byte * new_superblock_buffer( processor_heap * pProcHeap )
1599 pProcHeap->stat.incBlockAllocated();
1600 if ( pProcHeap->nPageIdx == processor_heap::c_nPageSelfAllocation ) {
1601 return (byte *) alloc( pProcHeap->pSizeClass->nSBSize );
1604 return (byte *) pProcHeap->pProcDesc->pageHeaps[pProcHeap->nPageIdx].alloc();
1608 /// Frees superblock descriptor and its page
1609 void free_superblock( superblock_desc * pDesc )
1611 pDesc->pProcHeap->stat.incBlockDeallocated();
1612 processor_desc * pProcDesc = pDesc->pProcHeap->pProcDesc;
1614 if ( pDesc->pProcHeap->nPageIdx == processor_heap::c_nPageSelfAllocation ) {
1618 pProcDesc->pageHeaps[pDesc->pProcHeap->nPageIdx].free( pDesc->pSB );
1621 pProcDesc->listSBDescFree.push( pDesc );
1624 /// Allocate memory block
1625 block_header * int_alloc(
1626 size_t nSize ///< Size of memory block to allocate in bytes
1629 typename sizeclass_selector::sizeclass_index nSizeClassIndex = m_SizeClassSelector.find( nSize );
1630 if ( nSizeClassIndex == sizeclass_selector::c_nNoSizeClass ) {
1631 return alloc_from_OS( nSize );
1633 assert( nSizeClassIndex < m_SizeClassSelector.size() );
1635 block_header * pBlock;
1636 processor_heap * pProcHeap;
1638 pProcHeap = find_heap( nSizeClassIndex );
1640 return alloc_from_OS( nSize );
1642 if ( (pBlock = alloc_from_active( pProcHeap )) != null_ptr<block_header *>() )
1644 if ( (pBlock = alloc_from_partial( pProcHeap )) != null_ptr<block_header *>() )
1646 if ( (pBlock = alloc_from_new_superblock( pProcHeap )) != null_ptr<block_header *>() )
1650 pProcHeap->stat.incAllocatedBytes( pProcHeap->pSizeClass->nBlockSize );
1652 assert( pBlock != null_ptr<block_header *>() );
1658 /// Heap constructor
1661 // Explicit libcds initialization is needed since a static object may be constructed
1664 m_nProcessorCount = m_Topology.processor_count();
1665 m_arrProcDesc = new( m_AlignedHeap.alloc(sizeof(processor_desc *) * m_nProcessorCount, c_nAlignment ))
1666 CDS_ATOMIC::atomic<processor_desc *>[ m_nProcessorCount ];
1667 memset( m_arrProcDesc, 0, sizeof(processor_desc *) * m_nProcessorCount ) ; // ?? memset for atomic<>
1672 The destructor frees all memory allocated by the heap.
1676 for ( unsigned int i = 0; i < m_nProcessorCount; ++i ) {
1677 processor_desc * pDesc = m_arrProcDesc[i].load(CDS_ATOMIC::memory_order_relaxed);
1679 free_processor_desc( pDesc );
1682 m_AlignedHeap.free( m_arrProcDesc );
1684 // Explicit termination of libcds
1688 /// Allocate memory block
1690 size_t nSize ///< Size of memory block to allocate in bytes
1693 block_header * pBlock = int_alloc( nSize + sizeof(block_header) + bound_checker::trailer_size );
1695 // Bound checking is only for our blocks
1696 if ( !pBlock->isOSAllocated() ) {
1697 // the block is allocated from our heap - bound checker is applicable
1698 m_BoundChecker.make_trailer(
1699 reinterpret_cast<byte *>(pBlock + 1),
1700 reinterpret_cast<byte *>(pBlock) + pBlock->desc()->nBlockSize,
1708 /// Free previously allocated memory block
1710 void * pMemory ///< Pointer to memory block to free
1716 block_header * pRedirect = (reinterpret_cast<block_header *>( pMemory ) - 1);
1717 block_header * pBlock = pRedirect->begin();
1719 if ( pBlock->isOSAllocated() ) {
1720 // Block has been allocated from OS
1721 m_OSAllocStat.incBytesDeallocated( pBlock->getOSAllocSize() );
1722 m_LargeHeap.free( pBlock );
1726 assert( !pBlock->isAligned() );
1727 superblock_desc * pDesc = pBlock->desc();
1729 m_BoundChecker.check_bounds(
1731 reinterpret_cast<byte *>( pBlock ) + pDesc->nBlockSize,
1736 anchor_tag oldAnchor;
1737 anchor_tag newAnchor;
1738 processor_heap_base * pProcHeap = pDesc->pProcHeap;
1740 pProcHeap->stat.incDeallocatedBytes( pDesc->nBlockSize );
1742 oldAnchor = pDesc->anchor.load(CDS_ATOMIC::memory_order_acquire);
1744 newAnchor = oldAnchor;
1745 reinterpret_cast<free_block_header *>( pBlock )->nNextFree = oldAnchor.avail;
1746 newAnchor.avail = (reinterpret_cast<byte *>( pBlock ) - pDesc->pSB) / pDesc->nBlockSize;
1749 assert( oldAnchor.state != SBSTATE_EMPTY );
1751 if ( oldAnchor.state == SBSTATE_FULL )
1752 newAnchor.state = SBSTATE_PARTIAL;
1754 if ( oldAnchor.count == pDesc->nCapacity - 1 ) {
1755 //pProcHeap = pDesc->pProcHeap;
1756 //CDS_COMPILER_RW_BARRIER ; // instruction fence is needed?..
1757 newAnchor.state = SBSTATE_EMPTY;
1760 newAnchor.count += 1;
1761 } while ( !pDesc->anchor.compare_exchange_strong( oldAnchor, newAnchor, CDS_ATOMIC::memory_order_release, CDS_ATOMIC::memory_order_relaxed ) );
1763 pProcHeap->stat.incFreeCount();
1765 if ( newAnchor.state == SBSTATE_EMPTY ) {
1766 if ( pProcHeap->unlink_partial( pDesc ))
1767 free_superblock( pDesc );
1769 else if (oldAnchor.state == SBSTATE_FULL ) {
1770 assert( pProcHeap != null_ptr<processor_heap_base *>() );
1771 pProcHeap->stat.decDescFull();
1772 pProcHeap->add_partial( pDesc );
1776 /// Reallocate memory block
1778 If \p nNewSize is zero, then the block pointed to by \p pMemory is freed;
1779 the return value is \p NULL, and \p pMemory is left pointing at a freed block.
1781 If there is not enough available memory to expand the block to the given size,
1782 the original block is left unchanged, and \p NULL is returned.
1784 Aligned memory block cannot be realloc'ed: if \p pMemory has been allocated by \ref alloc_aligned,
1785 then the return value is \p NULL and the original block is left unchanged.
1788 void * pMemory, ///< Pointer to previously allocated memory block
1789 size_t nNewSize ///< New size of memory block, in bytes
1792 if ( nNewSize == 0 ) {
1794 return null_ptr<void *>();
1797 const size_t nOrigSize = nNewSize;
1798 nNewSize += sizeof(block_header) + bound_checker::trailer_size;
1800 block_header * pBlock = reinterpret_cast<block_header *>( pMemory ) - 1;
1802 // Reallocation of aligned block is not possible
1803 if ( pBlock->isAligned() ) {
1805 return null_ptr<void *>();
1808 if ( pBlock->isOSAllocated() ) {
1809 // The block has been allocated from OS
1810 size_t nCurSize = pBlock->getOSAllocSize();
1812 if ( nCurSize >= nNewSize )
1816 void * pNewBuf = alloc( nOrigSize );
1818 memcpy( pNewBuf, pMemory, nCurSize - sizeof(block_header) );
1824 superblock_desc * pDesc = pBlock->desc();
1825 if ( pDesc->nBlockSize <= nNewSize ) {
1826 // In-place reallocation
1827 m_BoundChecker.make_trailer(
1828 reinterpret_cast<byte *>(pBlock + 1),
1829 reinterpret_cast<byte *>(pBlock) + pBlock->desc()->nBlockSize,
1836 void * pNew = alloc( nNewSize );
1838 memcpy( pNew, pMemory, pDesc->nBlockSize - sizeof(block_header) );
1843 return null_ptr<void *>();
1846 /// Allocate aligned memory block
1847 void * alloc_aligned(
1848 size_t nSize, ///< Size of memory block to allocate in bytes
1849 size_t nAlignment ///< Alignment
1852 if ( nAlignment <= c_nDefaultBlockAlignment ) {
1853 void * p = alloc( nSize );
1854 assert( (reinterpret_cast<uptr_atomic_t>(p) & (nAlignment - 1)) == 0 );
1858 block_header * pBlock = int_alloc( nSize + nAlignment + sizeof(block_header) + bound_checker::trailer_size );
1860 block_header * pRedirect;
1861 if ( (reinterpret_cast<uptr_atomic_t>( pBlock + 1) & (nAlignment - 1)) != 0 ) {
1862 pRedirect = reinterpret_cast<block_header *>( (reinterpret_cast<uptr_atomic_t>( pBlock ) & ~(nAlignment - 1)) + nAlignment ) - 1;
1863 assert( pRedirect != pBlock );
1864 pRedirect->set( reinterpret_cast<superblock_desc *>(pBlock), 1 );
1866 assert( (reinterpret_cast<uptr_atomic_t>(pRedirect + 1) & (nAlignment - 1)) == 0 );
1872 // Bound checking is only for our blocks
1873 if ( !pBlock->isOSAllocated() ) {
1874 // the block is allocated from our heap - bound checker is applicable
1875 m_BoundChecker.make_trailer(
1876 reinterpret_cast<byte *>(pRedirect + 1),
1877 reinterpret_cast<byte *>(pBlock) + pBlock->desc()->nBlockSize,
1882 return pRedirect + 1;
1885 /// Free aligned memory block previously allocated by \ref alloc_aligned
1887 void * pMemory ///< Pointer to memory block to free
1895 /// Get instant summary statistics
1896 void summaryStat( summary_stat& st )
1898 size_t nProcHeapCount = m_SizeClassSelector.size();
1899 for ( unsigned int nProcessor = 0; nProcessor < m_nProcessorCount; ++nProcessor ) {
1900 processor_desc * pProcDesc = m_arrProcDesc[nProcessor].load(CDS_ATOMIC::memory_order_relaxed);
1902 for ( unsigned int i = 0; i < nProcHeapCount; ++i ) {
1903 processor_heap_base * pProcHeap = pProcDesc->arrProcHeap + i;
1905 st.add_procheap_stat( pProcHeap->stat );
1911 st.add_heap_stat( m_OSAllocStat );
1915 }}} // namespace cds::memory::michael
1917 #endif // __CDS_MEMORY_MICHAEL_ALLOCATOR_TMPL_H