2 This file is a part of libcds - Concurrent Data Structures library
4 (C) Copyright Maxim Khizhinsky (libcds.dev@gmail.com) 2006-2016
6 Source code repo: http://github.com/khizmax/libcds/
7 Download: http://sourceforge.net/projects/libcds/files/
9 Redistribution and use in source and binary forms, with or without
10 modification, are permitted provided that the following conditions are met:
12 * Redistributions of source code must retain the above copyright notice, this
13 list of conditions and the following disclaimer.
15 * Redistributions in binary form must reproduce the above copyright notice,
16 this list of conditions and the following disclaimer in the documentation
17 and/or other materials provided with the distribution.
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 #ifndef CDSLIB_MEMORY_MICHAEL_ALLOCATOR_TMPL_H
32 #define CDSLIB_MEMORY_MICHAEL_ALLOCATOR_TMPL_H
35 Michael allocator implementation
37 [2004] Maged Michael "Scalable Lock-Free Dynamic Memory Allocation"
40 2011.09.07 khizmax Optimization: small page (about 64K) is allocated by Heap::alloc call.
41 This optimization allows to allocate system memory more regularly,
42 in blocks of 1M that leads to less memory fragmentation.
43 2011.01.02 khizmax Created
47 #include <mutex> // unique_lock
49 #include <cds/memory/michael/options.h>
50 #include <cds/memory/michael/bound_check.h>
51 #include <cds/memory/michael/procheap_stat.h>
52 #include <cds/memory/michael/osalloc_stat.h>
54 #include <cds/os/topology.h>
55 #include <cds/os/alloc_aligned.h>
56 #include <cds/sync/spinlock.h>
57 #include <cds/details/type_padding.h>
58 #include <cds/details/marked_ptr.h>
59 #include <cds/container/vyukov_mpmc_cycle_queue.h>
60 #include <cds/user_setup/cache_line.h>
61 #include <cds/details/lib.h>
63 #include <boost/intrusive/list.hpp>
66 /// Memory-related algorithms: allocators etc.
68 /// Michael's allocator (class Heap)
71 \li [2004] M.Michael "Scalable Lock-free Dynamic Memory Allocation"
73 This namespace declares the main class Heap and a lot of helper classes.
79 unsigned int nBlockSize ; ///< block size in bytes
80 unsigned int nSBSize ; ///< superblock size (64K or 1M)
81 unsigned int nCapacity ; ///< superblock capacity (nSBSize / nBlockSize)
82 unsigned int nSBSizeIdx ; ///< internal superblock size index (page index)
85 /// %Heap based on system \p malloc and \p free functions
88 /// Allocates memory block of \p nSize bytes (\p malloc wrapper)
89 static void * alloc( size_t nSize )
91 void * p = ::malloc( nSize );
94 /// Returning memory block to the system (\p free wrapper)
95 static void free( void * p )
101 /// %Heap based on system provided aligned \p malloc and \p free functions
102 struct aligned_malloc_heap
104 /// Allocates aligned memory block of \p nSize bytes with alignment \p nAlignment
105 static void * alloc( size_t nSize, size_t nAlignment )
107 return cds::OS::aligned_malloc( nSize, nAlignment );
109 /// Frees aligned memory block \p p that has been previosly allocated by \ref alloc call
110 static void free( void * p )
112 cds::OS::aligned_free( p );
116 /// Page heap based on \p Heap
118 Page heap can allocate memory by page-sized block only.
119 \p Heap may be any heap that provides interface like \ref malloc_heap.
121 This class is one of available implementation of opt::page_heap option.
123 template <class Heap = malloc_heap>
124 class page_allocator: public Heap
127 typedef Heap base_class;
134 size_t nPageSize ///< page size in bytes
136 : m_nPageSize( nPageSize )
139 /// Allocate new page
142 return base_class::alloc( m_nPageSize );
145 /// Free page \p pPage
146 void free( void * pPage )
148 base_class::free( pPage );
152 /// Page cacheable heap
154 To improve performance this allocator maintains small list of free pages.
155 Page heap can allocate memory by page-sized block only.
158 \li \p FreeListCapacity - capacity of free-list, default value is 64 page
159 \li \p Heap may be any heap that provides interface like \ref malloc_heap.
161 This class is one of available implementation of opt::page_heap option.
163 template <size_t FreeListCapacity = 64, class Heap = malloc_heap>
164 class page_cached_allocator: public page_allocator<Heap>
167 typedef page_allocator<Heap> base_class;
170 struct make_null_ptr {
171 void operator ()(void *& p)
177 struct free_list_traits : public cds::container::vyukov_queue::traits
179 typedef opt::v::initialized_static_buffer<void *, FreeListCapacity> buffer;
181 typedef make_null_ptr value_cleaner;
184 typedef container::VyukovMPMCCycleQueue< void *, free_list_traits > free_list;
186 free_list m_FreeList;
191 page_cached_allocator(
192 size_t nPageSize ///< page size in bytes
194 : base_class( nPageSize )
195 , m_FreeList( FreeListCapacity )
199 ~page_cached_allocator()
202 while ( m_FreeList.pop(pPage))
203 base_class::free( pPage );
207 /// Allocate new page
211 if ( !m_FreeList.pop( pPage ))
212 pPage = base_class::alloc();
216 /// Free page \p pPage
217 void free( void * pPage )
219 if ( !m_FreeList.push( pPage ))
220 base_class::free( pPage );
224 /// Implementation of opt::sizeclass_selector option
226 Default size-class selector can manage memory blocks up to 64K.
228 class CDS_EXPORT_API default_sizeclass_selector
231 /// Count of different size-classes
232 static const size_t c_nSizeClassCount = 63;
235 static const size_t c_nMaxBlockSize = 64 * 1024;
237 /// Page size of type 0 (64K)
238 static const unsigned int c_nPage64K = 64 * 1024 - 32;
240 /// Page size of type 1 (1M)
241 static const unsigned int c_nPage1M = 1024 * 1024;
243 static CDS_DATA_ALIGNMENT(128) unsigned int const m_szClassBounds[c_nSizeClassCount];
244 static size_class const m_szClass[c_nSizeClassCount];
245 static unsigned char const m_szClassMap[];
248 /// Type of size-class index
249 typedef unsigned int sizeclass_index;
252 default_sizeclass_selector();
255 /// "No size class" index
256 static const sizeclass_index c_nNoSizeClass = (unsigned int) (0 - 1);
258 /// Returns size-class count
259 static sizeclass_index size()
261 return c_nSizeClassCount;
264 /// Returns page size in bytes for given page type \p nPageType
265 static size_t page_size(size_t nPageType )
273 assert(false) ; // anything forgotten?..
278 /// Returns count of page size-class
280 This class supports pages of two types: 64K page for small objects and 1M page for other objects.
282 static size_t pageTypeCount()
287 /// Returns size-class index for \p nSize
289 For large blocks that cannot be allocated by Michael's allocator
290 the function must return -1.
292 static sizeclass_index find( size_t nSize )
294 if ( nSize > c_nMaxBlockSize ) {
295 // Too large block - allocate from system
296 return c_nNoSizeClass;
298 sizeclass_index szClass = m_szClassMap[ (nSize + 15) / 16 ];
299 assert( nSize <= m_szClassBounds[ szClass ] );
300 assert( szClass == 0 || m_szClassBounds[ szClass - 1] < nSize );
305 /// Gets details::size_class struct for size-class index \p nIndex
306 static const size_class * at( sizeclass_index nIndex )
308 assert( nIndex < size());
309 return m_szClass + nIndex;
315 struct free_list_tag;
316 typedef boost::intrusive::list_base_hook< boost::intrusive::tag< free_list_tag > > free_list_locked_hook;
318 struct partial_list_tag;
319 typedef boost::intrusive::list_base_hook< boost::intrusive::tag< partial_list_tag > > partial_list_locked_hook;
321 struct intrusive_superblock_desc: public free_list_locked_hook, partial_list_locked_hook
326 /// List of free superblock descriptor
328 This class is a implementation of \ref opt::free_list option
330 template <class Lock, class T = details::intrusive_superblock_desc>
331 class free_list_locked: public boost::intrusive::list<T, boost::intrusive::base_hook<details::free_list_locked_hook> >
334 typedef boost::intrusive::list<T, boost::intrusive::base_hook<details::free_list_locked_hook> > base_class;
336 typedef details::free_list_locked_hook item_hook;
337 typedef Lock lock_type;
339 typedef std::unique_lock<lock_type> auto_lock;
341 mutable lock_type m_access;
345 /// Rebinds to other item type \p T2
348 typedef free_list_locked<Lock, T2> other ; ///< rebind result
352 /// Push superblock descriptor to free-list
353 void push( T * pDesc )
355 assert( base_class::node_algorithms::inited( static_cast<item_hook *>(pDesc)));
356 auto_lock al(m_access);
357 base_class::push_back( *pDesc );
360 /// Pop superblock descriptor from free-list
363 auto_lock al(m_access);
364 if ( base_class::empty())
366 T& rDesc = base_class::front();
367 base_class::pop_front();
368 assert( base_class::node_algorithms::inited( static_cast<item_hook *>(&rDesc)));
372 /// Returns current count of superblocks in free-list
375 auto_lock al(m_access);
376 return base_class::size();
380 /// List of partial filled superblock descriptor
382 This class is a implementation of \ref opt::partial_list option
384 template <class Lock, class T = details::intrusive_superblock_desc>
385 class partial_list_locked: public boost::intrusive::list<T, boost::intrusive::base_hook<details::partial_list_locked_hook> >
388 typedef boost::intrusive::list<T, boost::intrusive::base_hook<details::partial_list_locked_hook> > base_class;
390 typedef details::partial_list_locked_hook item_hook;
391 typedef Lock lock_type;
393 typedef std::unique_lock<lock_type> auto_lock;
395 mutable lock_type m_access;
399 /// Rebinds to other item type \p T2
402 typedef partial_list_locked<Lock, T2> other ; ///< rebind result
406 /// Push a superblock \p pDesc to the list
407 void push( T * pDesc )
409 auto_lock al( m_access );
410 assert( base_class::node_algorithms::inited( static_cast<item_hook *>(pDesc)));
411 base_class::push_back( *pDesc );
414 /// Pop superblock from the list
417 auto_lock al( m_access );
418 if ( base_class::empty())
420 T& rDesc = base_class::front();
421 base_class::pop_front();
422 assert( base_class::node_algorithms::inited( static_cast<item_hook *>(&rDesc)));
426 /// Removes \p pDesc descriptor from the free-list
427 bool unlink( T * pDesc )
429 assert(pDesc != nullptr);
430 auto_lock al( m_access );
431 // !inited(pDesc) is equal to "pDesc is being linked to partial list"
432 if ( !base_class::node_algorithms::inited( static_cast<item_hook *>(pDesc))) {
433 base_class::erase( base_class::iterator_to( *pDesc ));
439 /// Count of element in the list
442 auto_lock al( m_access );
443 return base_class::size();
447 /// Summary processor heap statistics
449 Summary heap statistics for use with Heap::summaryStat function.
453 size_t nAllocFromActive ; ///< Event count of allocation from active superblock
454 size_t nAllocFromPartial ; ///< Event count of allocation from partial superblock
455 size_t nAllocFromNew ; ///< Event count of allocation from new superblock
456 size_t nFreeCount ; ///< Count of \p free function call
457 size_t nPageAllocCount ; ///< Count of page (superblock) allocated
458 size_t nPageDeallocCount ; ///< Count of page (superblock) deallocated
459 size_t nDescAllocCount ; ///< Count of superblock descriptors
460 size_t nDescFull ; ///< Count of full superblock
461 uint64_t nBytesAllocated ; ///< Count of allocated bytes (for heap managed memory blocks)
462 uint64_t nBytesDeallocated ; ///< Count of deallocated bytes (for heap managed memory blocks)
464 size_t nSysAllocCount ; ///< Count of \p alloc and \p alloc_aligned function call (for large memory blocks that allocated directly from OS)
465 size_t nSysFreeCount ; ///< Count of \p free and \p free_aligned function call (for large memory blocks that allocated directly from OS)
466 uint64_t nSysBytesAllocated ; ///< Count of allocated bytes (for large memory blocks that allocated directly from OS)
467 int64_t nSysBytesDeallocated; ///< Count of deallocated bytes (for large memory blocks that allocated directly from OS)
469 // Internal contention indicators
470 /// CAS failure counter for updating active field of active block of \p alloc_from_active Heap internal function
472 Contention indicator. The less value is better
474 size_t nActiveDescCASFailureCount;
475 /// CAS failure counter for updating active field of active block of \p alloc_from_active Heap internal function
477 Contention indicator. The less value is better
479 size_t nActiveAnchorCASFailureCount;
480 /// CAS failure counter for updating anchor field of partial block of \p alloc_from_partial Heap internal function
482 Contention indicator. The less value is better
484 size_t nPartialDescCASFailureCount;
485 /// CAS failure counter for updating anchor field of partial block of \p alloc_from_partial Heap internal function
487 Contention indicator. The less value is better
489 size_t nPartialAnchorCASFailureCount;
493 /// Constructs empty statistics. All counters are zero.
499 /// Difference statistics
501 This operator computes difference between \p *this and \p stat and places the difference to \p this.
504 summary_stat& operator -=( const summary_stat& stat )
506 nAllocFromActive -= stat.nAllocFromActive;
507 nAllocFromPartial -= stat.nAllocFromPartial;
508 nAllocFromNew -= stat.nAllocFromNew;
509 nFreeCount -= stat.nFreeCount;
510 nPageAllocCount -= stat.nPageAllocCount;
511 nPageDeallocCount -= stat.nPageDeallocCount;
512 nDescAllocCount -= stat.nDescAllocCount;
513 nDescFull -= stat.nDescFull;
514 nBytesAllocated -= stat.nBytesAllocated;
515 nBytesDeallocated -= stat.nBytesDeallocated;
517 nSysAllocCount -= stat.nSysAllocCount;
518 nSysFreeCount -= stat.nSysFreeCount;
519 nSysBytesAllocated -= stat.nSysBytesAllocated;
520 nSysBytesDeallocated -= stat.nSysBytesDeallocated;
522 nActiveDescCASFailureCount -= stat.nActiveDescCASFailureCount;
523 nActiveAnchorCASFailureCount -= stat.nActiveAnchorCASFailureCount;
524 nPartialDescCASFailureCount -= stat.nPartialDescCASFailureCount;
525 nPartialAnchorCASFailureCount -= stat.nPartialAnchorCASFailureCount;
530 /// Clears statistics
532 All counters are set to zero.
536 memset( this, 0, sizeof(*this));
540 template <typename Stat>
541 summary_stat& add_procheap_stat( const Stat& stat )
543 nAllocFromActive += stat.allocFromActive();
544 nAllocFromPartial += stat.allocFromPartial();
545 nAllocFromNew += stat.allocFromNew();
546 nFreeCount += stat.freeCount();
547 nPageAllocCount += stat.blockAllocated();
548 nPageDeallocCount += stat.blockDeallocated();
549 nDescAllocCount += stat.descAllocCount();
550 nDescFull += stat.descFull();
551 nBytesAllocated += stat.allocatedBytes();
552 nBytesDeallocated += stat.deallocatedBytes();
554 nActiveDescCASFailureCount += stat.activeDescCASFailureCount();
555 nActiveAnchorCASFailureCount += stat.activeAnchorCASFailureCount();
556 nPartialDescCASFailureCount += stat.partialDescCASFailureCount();
557 nPartialAnchorCASFailureCount += stat.partialAnchorCASFailureCount();
562 template <typename Stat>
563 summary_stat& add_heap_stat( const Stat& stat )
565 nSysAllocCount += stat.allocCount();
566 nSysFreeCount += stat.freeCount();
568 nSysBytesAllocated += stat.allocatedBytes();
569 nSysBytesDeallocated+= stat.deallocatedBytes();
576 /// Michael's allocator
578 This class provides base functionality for Michael's allocator. It does not provide
579 the interface described by \p std::allocator, therefore, we name it as a heap, not as an allocator.
580 The heap interface is closer to semantics of \p malloc / \p free system functions.
581 The heap supports allocation of aligned and unaligned data.
583 The algorithm is based on simplified version of
584 \li [2004] M.Michael "Scalable Lock-free Dynamic Memory Allocation"
586 that, in turn, is concurrent version of well-known Hoard allocator developed by Emery Berger, see
587 \li [2002] Emery Berger "Memory Management for High-Performance Application", PhD thesis
589 This is powerful, scalable, fully customizable heap with fast-path without any locks
590 that has been developed specifically for multi-threading.
591 With opt:sys_topology you can set as many allocation arena ("processor heap") as you need.
592 You can manually bound any your thread to any arena ("processor"). With opt::sizeclass_selector option you can manage
593 allocation granularity. With opt::page_heap you can utilize any OS-provided features for page allocation
594 like \p mmap, \p VirtualAlloc etc. The heap can gather internal statistics that helps you to tune your application.
595 The opt::check_bounds feature can help you to find a memory buffer overflow.
597 Brief algorithm description from Michael's work:
599 Large blocks (greater than 64K) are allocated directly from the OS and freed directly to the OS. For smaller block sizes,
600 the heap is composed of large superblocks (64 KB or 1MB size). Each superblock is divided into multiple equal-sized blocks.
601 Superblocks are distributed among size classes based on their block sizes. Each size class contains multiple processor
602 heaps proportional to the number of processors in the system. A processor heap contains at most one active superblock.
603 An active superblock contains one or more blocks available for reservation that are guaranteed to be available to threads
604 that reach them through the header of the processor heap. Each superblock is associated with a descriptor. Each allocated
605 block contains a prefix (8 bytes) that points to the descriptor of its superblock. On the first call to malloc, the static
606 structures for the size classes and processor heaps (about 16 KB for a 16 processor machine) are allocated and initialized
607 in a lock-free manner.
609 Malloc starts by identifying the appropriate processor heap, based on the requested block size and the identity of
610 the calling thread. Typically, the heap already has an active superblock with blocks available for reservation. The thread
611 atomically reads a pointer to the descriptor of the active superblock and reserves a block. Next, the thread atomically
612 pops a block from that superblock and updates its descriptor. A typical free pushes the freed block into the list of
613 available blocks of its original superblock by atomically updating its descriptor.
615 <b>Constraint</b>: one superblock may contain up to 2048 block. This restriction imposes a restriction on the maximum
618 Available \p Options:
619 - \ref opt::sys_topology - class that describes system topology needed for allocator.
620 Default is \p cds::OS::topology (see cds::OS::Win32::topology for interface description)
621 - \ref opt::system_heap - option setter for an allocator for large blocks that is used for direct allocation from OS.
622 Default is \ref malloc_heap.
623 - \ref opt::aligned_heap - option setter for a heap used for internal aligned memory management.
624 Default is \ref aligned_malloc_heap
625 - \ref opt::page_heap - option setter for a heap used for page (superblock) allocation of 64K/1M size.
626 Default is \ref page_cached_allocator
627 - \ref opt::sizeclass_selector - option setter for a class used to select appropriate size-class
628 for incoming allocation request.
629 Default is \ref default_sizeclass_selector
630 - \ref opt::free_list - option setter for a class to manage a list of free superblock descriptors
631 Default is \ref free_list_locked
632 - \ref opt::partial_list - option setter for a class to manage a list of partial filled superblocks
633 Default is \ref partial_list_locked
634 - \ref opt::procheap_stat - option setter for a class to gather internal statistics for memory allocation
635 that is maintained by the heap.
636 Default is \ref procheap_empty_stat
637 - \ref opt::os_allocated_stat - option setter for a class to gather internal statistics for large block
638 allocation. Term "large block" is specified by the size-class selector (see \ref opt::sizeclass_selector)
639 and it is 64K for \ref default_sizeclass_selector. Any block that is large that 64K is allocated from
640 OS directly. \p os_allocated_stat option is set a class to gather statistics for large blocks.
641 Default is \ref os_allocated_empty
642 - \ref opt::check_bounds - a bound checker.
643 Default is no bound checker (cds::opt::none)
646 The heap is the basic building block for your allocator or <tt> operator new</tt> implementation.
649 #include <cds/memory/michael/allocator.h>
651 // Heap with explicitly defined options:
652 cds::memory::michael::Heap<
653 opt::aligned_heap< aligned_malloc_heap >,
654 opt::page_heap< page_cached_allocator<16, malloc_heap> >
657 // Heap with default options:
658 cds::memory::michael::Heap<> myDefHeap;
661 \par How to make std-like allocator
663 There are serious differencies of heap and <tt>std::allocator</tt> interface:
664 - Heap is stateful, and \p std::allocator is stateless.
665 - Heap has much more template parameters than \p std::allocator
666 - Heap has low-level interface for memory allocating only unlike the allocator
667 interface that can construct/destroy objects of any type T.
669 To convert heap interface into \p std::allocator -like interface you should:
670 - Declare object of class cds::memory::michael::Heap specifying the necessary
671 template parameters; this is usually static object
672 - Create a class with \p std::allocator interface that uses the function of heap.
674 #include <cds/memory/michael/allocator.h>
677 class MichaelAllocator
679 typedef std::allocator<T> std_allocator;
680 typedef cds::memory::michael::Heap<> michael_heap;
682 // Michael heap static object
683 static michael_heap s_Heap;
685 // Declare typedefs from std::allocator
686 typedef typename std_allocator::const_pointer const_pointer;
687 typedef typename std_allocator::pointer pointer;
688 typedef typename std_allocator::const_reference const_reference;
689 typedef typename std_allocator::reference reference;
690 typedef typename std_allocator::difference_type difference_type;
691 typedef typename std_allocator::size_type size_type;
692 typedef typename std_allocator::value_type value_type;
694 // Allocation function
695 pointer allocate( size_type _Count, const void* _Hint )
697 return reinterpret_cast<pointer>( s_Heap.alloc( sizeof(T) * _Count ));
700 // Deallocation function
701 void deallocate( pointer _Ptr, size_type _Count )
706 // Other std::allocator specific functions: address, construct, destroy, etc.
709 // Rebinding allocator to other type
710 template <class _Other>
712 typedef MichaelAllocator<_Other> other;
717 MichaelAllocator::michael_heap MichaelAllocator::s_Heap;
721 template <typename... Options>
726 static const unsigned int c_nAlignment = cds::c_nCacheLineSize;
727 static const unsigned int c_nDefaultBlockAlignment = 8;
729 struct default_options {
730 typedef cds::OS::topology sys_topology;
731 typedef malloc_heap system_heap;
732 typedef page_cached_allocator<> page_heap;
733 typedef aligned_malloc_heap aligned_heap;
734 typedef default_sizeclass_selector sizeclass_selector;
735 typedef free_list_locked<cds::sync::spin> free_list;
736 typedef partial_list_locked<cds::sync::spin> partial_list;
737 typedef procheap_empty_stat procheap_stat;
738 typedef os_allocated_empty os_allocated_stat;
739 typedef cds::opt::none check_bounds;
745 typedef typename opt::make_options<default_options, Options...>::type options;
749 typedef unsigned char byte;
752 typedef typename options::sys_topology sys_topology ; ///< effective system topology
753 typedef typename options::system_heap system_heap ; ///< effective system heap
754 typedef typename options::aligned_heap aligned_heap ; ///< effective aligned heap
755 typedef typename options::sizeclass_selector sizeclass_selector ; ///< effective sizeclass selector
756 typedef typename options::page_heap page_heap ; ///< effective page heap
757 typedef typename options::procheap_stat procheap_stat ; ///< effective processor heap statistics
758 typedef typename options::os_allocated_stat os_allocated_stat ; ///< effective OS-allocated memory statistics
759 typedef details::bound_checker_selector< typename options::check_bounds > bound_checker ; ///< effective bound checker
761 // forward declarations
763 struct superblock_desc;
764 struct processor_heap_base;
765 struct processor_desc;
768 /// Superblock states
770 A superblock can be in one of four states: \p ACTIVE, \p FULL,
771 \p PARTIAL, or \p EMPTY. A superblock is \p ACTIVE if it is the active
772 superblock in a heap, or if a thread intends to try to install it
773 as such. A superblock is \p FULL if all its blocks are either allocated
774 or reserved. A superblock is \p PARTIAL if it is not \p ACTIVE
775 and contains unreserved available blocks. A superblock is
776 \p EMPTY if all its blocks are free and it is not \p ACTIVE.
778 enum superblock_state {
779 SBSTATE_ACTIVE = 0, ///< superblock is active
780 SBSTATE_FULL = 1, ///< superblock is full
781 SBSTATE_PARTIAL = 2, ///< superblock is partially allocated
782 SBSTATE_EMPTY = 3 ///< superblock is empty and may be freed
785 static const size_t c_nMaxBlockInSuperBlock = 1024 * 2 ; ///< Max count of blocks in superblock (2 ** 11)
787 /// Anchor of the superblock descriptor. Updated by CAS
789 unsigned long long avail:11 ; ///< index of first available block in the superblock
790 unsigned long long count:11 ; ///< number of unreserved blocks in the superblock
791 unsigned long long state: 2 ; ///< state of the superblock (see \ref superblock_state enum)
792 unsigned long long tag:40 ; ///< ABA prevention tag
795 /// Superblock descriptor
796 struct superblock_desc
797 : public options::free_list::item_hook
798 , public options::partial_list::item_hook
800 atomics::atomic<anchor_tag> anchor ; ///< anchor, see \ref anchor_tag
801 byte * pSB ; ///< ptr to superblock
802 processor_heap_base * pProcHeap ; ///< pointer to owner processor heap
803 unsigned int nBlockSize ; ///< block size in bytes
804 unsigned int nCapacity ; ///< superblock size/block size
809 , pProcHeap( nullptr )
815 typedef typename options::free_list::template rebind<superblock_desc>::other free_list;
816 typedef typename options::partial_list::template rebind<superblock_desc>::other partial_list;
819 #if CDS_BUILD_BITS == 32
820 /// Allocated block header
822 Each allocated block has 8-byte header.
823 The header contains pointer to owner superblock descriptor and the redirection flag.
824 If the block has been allocated by \ref alloc, then the redirection flag is 0 and the block's structure is:
827 | blockHeader | [8 byte] pointer to owner superblock (flag=0)
829 | | <- memory allocated
834 If the block has been allocated by \ref alloc_aligned, then it is possible that pointer returned must be aligned.
835 In this case the redirection flag is 1 and the block's structure is:
838 +-> | blockHeader | [8 byte] pointer to owner superblock (flag=0)
844 +-- | blockHeader | [8 byte] pointer to block head (flag=1)
846 | | <- memory allocated
861 superblock_desc * pDesc ; // pointer to superblock descriptor
862 uint32_t nSize ; // block size (allocated form OS)
867 void set( superblock_desc * pdesc, uint32_t isAligned )
870 nFlags = isAligned ? bitAligned : 0;
873 superblock_desc * desc()
875 assert( (nFlags & bitOSAllocated) == 0 );
876 return (nFlags & bitAligned) ? reinterpret_cast<block_header *>( pDesc )->desc() : pDesc;
879 block_header * begin()
881 return (nFlags & bitAligned) ? reinterpret_cast<block_header *>(pDesc) : this;
884 bool isAligned() const
886 return (nFlags & bitAligned) != 0;
889 bool isOSAllocated() const
891 return (nFlags & bitOSAllocated) != 0;
894 void setOSAllocated( size_t sz )
897 nFlags = bitOSAllocated;
900 size_t getOSAllocSize() const
902 assert( isOSAllocated());
908 #elif CDS_BUILD_BITS == 64
916 typedef cds::details::marked_ptr<superblock_desc, bitAligned|bitOSAllocated> marked_desc_ptr;
917 // If bitOSAllocated is set the pDesc contains size of memory block
919 marked_desc_ptr pDesc;
921 void set( superblock_desc * pdesc, uint32_t isAligned )
923 pDesc = marked_desc_ptr( pdesc, isAligned );
926 superblock_desc * desc()
928 assert( !isOSAllocated());
929 return (pDesc.bits() & bitAligned) ? reinterpret_cast<block_header *>( pDesc.ptr())->desc() : pDesc.ptr();
932 block_header * begin()
934 return (pDesc.bits() & bitAligned) ? reinterpret_cast<block_header *>( pDesc.ptr()) : this;
937 bool isAligned() const
939 return (pDesc.bits() & bitAligned) != 0;
942 bool isOSAllocated() const
944 return (pDesc.bits() & bitOSAllocated) != 0;
947 void setOSAllocated( size_t nSize )
950 pDesc = marked_desc_ptr( reinterpret_cast<superblock_desc *>(nSize << 2), bitOSAllocated );
953 size_t getOSAllocSize() const
955 assert( isOSAllocated());
956 return reinterpret_cast<uintptr_t>( pDesc.ptr()) >> 2;
962 # error "Unexpected value of CDS_BUILD_BITS"
963 #endif // CDS_BUILD_BITS
966 struct free_block_header: block_header {
967 unsigned int nNextFree;
971 #if CDS_BUILD_BITS == 32
972 /// Processor heap's \p active field
974 The \p active field in the processor heap structure is primarily a pointer to the descriptor
975 of the active superblock owned by the processor heap. If the value of \p active is not \p nullptr, it is
976 guaranteed that the active superblock has at least one block available for reservation.
977 Since the addresses of superblock descriptors can be guaranteed to be aligned to some power
978 of 2 (e.g., 64), as an optimization, we can carve a credits subfield to hold the number
979 of blocks available for reservation in the active superblock less one. That is, if the value
980 of credits is n, then the active superblock contains n+1 blocks available for reservation
981 through the \p active field. Note that the number of blocks in a superblock is not limited
982 to the maximum reservations that can be held in the credits subfield. In a typical malloc operation
983 (i.e., when \p active != \p nullptr and \p credits > 0), the thread reads \p active and then
984 atomically decrements credits while validating that the active superblock is still valid.
988 superblock_desc * pDesc;
992 static const unsigned int c_nMaxCredits = 0 - 1;
995 CDS_CONSTEXPR active_tag() CDS_NOEXCEPT
1000 active_tag( active_tag const& ) CDS_NOEXCEPT = default;
1001 ~active_tag() CDS_NOEXCEPT = default;
1002 active_tag& operator=(active_tag const& ) CDS_NOEXCEPT = default;
1003 # if !defined(CDS_DISABLE_DEFAULT_MOVE_CTOR)
1004 active_tag( active_tag&& ) CDS_NOEXCEPT = default;
1005 active_tag& operator=(active_tag&&) CDS_NOEXCEPT = default;
1008 /// Returns pointer to superblock descriptor
1009 superblock_desc * ptr() const
1014 /// Sets superblock descriptor
1015 void ptr( superblock_desc * p )
1020 unsigned int credits() const
1025 void credits( unsigned int n )
1036 void set( superblock_desc * pSB, unsigned int n )
1043 #elif CDS_BUILD_BITS == 64
1048 static const unsigned int c_nMaxCredits = c_nAlignment - 1 ; // 0x003F;
1050 typedef cds::details::marked_ptr<superblock_desc, c_nMaxCredits> marked_desc_ptr;
1051 marked_desc_ptr pDesc;
1054 active_tag() CDS_NOEXCEPT
1057 // Clang 3.1: error: first argument to atomic operation must be a pointer to a trivially-copyable type
1058 //active_tag() CDS_NOEXCEPT = default;
1059 active_tag( active_tag const& ) CDS_NOEXCEPT = default;
1060 ~active_tag() CDS_NOEXCEPT = default;
1061 active_tag& operator=(active_tag const&) CDS_NOEXCEPT = default;
1062 # if !defined(CDS_DISABLE_DEFAULT_MOVE_CTOR)
1063 active_tag( active_tag&& ) CDS_NOEXCEPT = default;
1064 active_tag& operator=(active_tag&&) CDS_NOEXCEPT = default;
1066 superblock_desc * ptr() const
1071 void ptr( superblock_desc * p )
1073 assert( (reinterpret_cast<uintptr_t>(p) & c_nMaxCredits) == 0 );
1074 pDesc = marked_desc_ptr( p, pDesc.bits());
1077 unsigned int credits()
1079 return (unsigned int) pDesc.bits();
1082 void credits( unsigned int n )
1084 assert( n <= c_nMaxCredits );
1085 pDesc = marked_desc_ptr( pDesc.ptr(), n );
1090 pDesc = marked_desc_ptr();
1093 void set( superblock_desc * pSB, unsigned int n )
1095 assert( (reinterpret_cast<uintptr_t>(pSB) & c_nMaxCredits) == 0 );
1096 pDesc = marked_desc_ptr( pSB, n );
1102 # error "Unexpected value of CDS_BUILD_BITS"
1103 #endif // CDS_BUILD_BITS
1107 struct processor_heap_base
1109 CDS_DATA_ALIGNMENT(8) atomics::atomic<active_tag> active; ///< pointer to the descriptor of active superblock owned by processor heap
1110 processor_desc * pProcDesc ; ///< pointer to parent processor descriptor
1111 const size_class * pSizeClass ; ///< pointer to size class
1112 atomics::atomic<superblock_desc *> pPartial ; ///< pointer to partial filled superblock (may be \p nullptr)
1113 partial_list partialList ; ///< list of partial filled superblocks owned by the processor heap
1114 unsigned int nPageIdx ; ///< page size-class index, \ref c_nPageSelfAllocation - "small page"
1116 /// Small page marker
1118 If page is small and can be allocated by the Heap, the \p nPageIdx value is \p c_nPageSelfAllocation.
1119 This optimization allows to allocate system memory more regularly, in blocks of 1M that leads
1120 to less memory fragmentation.
1122 static const unsigned int c_nPageSelfAllocation = (unsigned int) -1;
1124 procheap_stat stat ; ///< heap statistics
1125 //processor_heap_statistics stat;
1128 processor_heap_base() CDS_NOEXCEPT
1129 : pProcDesc( nullptr )
1130 , pSizeClass( nullptr )
1131 , pPartial( nullptr )
1133 assert( (reinterpret_cast<uintptr_t>(this) & (c_nAlignment - 1)) == 0 );
1137 /// Get partial superblock owned by the processor heap
1138 superblock_desc * get_partial()
1140 superblock_desc * pDesc = pPartial.load(atomics::memory_order_acquire);
1143 pDesc = partialList.pop();
1146 } while ( !pPartial.compare_exchange_weak( pDesc, nullptr, atomics::memory_order_release, atomics::memory_order_relaxed ));
1148 //assert( pDesc == nullptr || free_desc_list<superblock_desc>::node_algorithms::inited( static_cast<sb_free_list_hook *>(pDesc)));
1149 //assert( pDesc == nullptr || partial_desc_list<superblock_desc>::node_algorithms::inited( static_cast<sb_partial_list_hook *>(pDesc)));
1153 /// Add partial superblock \p pDesc to the list
1154 void add_partial( superblock_desc * pDesc )
1156 assert( pPartial != pDesc );
1157 //assert( partial_desc_list<superblock_desc>::node_algorithms::inited( static_cast<sb_partial_list_hook *>(pDesc)));
1159 superblock_desc * pCur = nullptr;
1160 if ( !pPartial.compare_exchange_strong(pCur, pDesc, atomics::memory_order_acq_rel, atomics::memory_order_relaxed))
1161 partialList.push( pDesc );
1165 /// Remove superblock \p pDesc from the list of partial superblock
1166 bool unlink_partial( superblock_desc * pDesc )
1168 return partialList.unlink( pDesc );
1172 /// Aligned superblock descriptor
1173 typedef typename cds::details::type_padding<processor_heap_base, c_nAlignment>::type processor_heap;
1175 /// Processor descriptor
1176 struct processor_desc
1178 processor_heap * arrProcHeap ; ///< array of processor heap
1179 free_list listSBDescFree ; ///< List of free superblock descriptors
1180 page_heap * pageHeaps ; ///< array of page heap (one for each page size)
1184 : arrProcHeap( nullptr )
1185 , pageHeaps( nullptr )
1192 sys_topology m_Topology ; ///< System topology
1193 system_heap m_LargeHeap ; ///< Heap for large block
1194 aligned_heap m_AlignedHeap ; ///< Internal aligned heap
1195 sizeclass_selector m_SizeClassSelector ; ///< Size-class selector
1196 atomics::atomic<processor_desc *> * m_arrProcDesc ; ///< array of pointers to the processor descriptors
1197 unsigned int m_nProcessorCount ; ///< Processor count
1198 bound_checker m_BoundChecker ; ///< Bound checker
1200 os_allocated_stat m_OSAllocStat ; ///< OS-allocated memory statistics
1205 /// Allocates large block from system memory
1206 block_header * alloc_from_OS( size_t nSize )
1208 block_header * p = reinterpret_cast<block_header *>( m_LargeHeap.alloc( nSize ));
1209 m_OSAllocStat.incBytesAllocated( nSize );
1210 p->setOSAllocated( nSize );
1214 /// Allocates from the active superblock if it possible
1215 block_header * alloc_from_active( processor_heap * pProcHeap )
1217 active_tag oldActive;
1218 int nCollision = -1;
1223 oldActive = pProcHeap->active.load(atomics::memory_order_acquire);
1224 if ( !oldActive.ptr())
1226 unsigned int nCredits = oldActive.credits();
1227 active_tag newActive ; // default = 0
1228 if ( nCredits != 0 ) {
1229 newActive = oldActive;
1230 newActive.credits( nCredits - 1 );
1232 if ( pProcHeap->active.compare_exchange_strong( oldActive, newActive, atomics::memory_order_release, atomics::memory_order_relaxed ))
1237 pProcHeap->stat.incActiveDescCASFailureCount( nCollision );
1240 superblock_desc * pDesc = oldActive.ptr();
1242 anchor_tag oldAnchor;
1243 anchor_tag newAnchor;
1245 unsigned int nMoreCredits = 0;
1250 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1252 assert( oldAnchor.avail < pDesc->nCapacity );
1253 pAddr = pDesc->pSB + oldAnchor.avail * (unsigned long long) pDesc->nBlockSize;
1255 // TSan reports data race if the block contained atomic ops before
1256 CDS_TSAN_ANNOTATE_IGNORE_WRITES_BEGIN;
1257 newAnchor.avail = reinterpret_cast<free_block_header *>( pAddr )->nNextFree;
1258 CDS_TSAN_ANNOTATE_IGNORE_WRITES_END;
1261 if ( oldActive.credits() == 0 ) {
1262 // state must be ACTIVE
1263 if ( oldAnchor.count == 0 )
1264 newAnchor.state = SBSTATE_FULL;
1266 nMoreCredits = oldAnchor.count < active_tag::c_nMaxCredits ? ((unsigned int) oldAnchor.count) : active_tag::c_nMaxCredits;
1267 newAnchor.count -= nMoreCredits;
1270 } while ( !pDesc->anchor.compare_exchange_strong( oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed ));
1273 pProcHeap->stat.incActiveAnchorCASFailureCount( nCollision );
1275 assert( newAnchor.state != SBSTATE_EMPTY );
1277 if ( newAnchor.state == SBSTATE_FULL )
1278 pProcHeap->stat.incDescFull();
1279 if ( oldActive.credits() == 0 && oldAnchor.count > 0 )
1280 update_active( pProcHeap, pDesc, nMoreCredits );
1282 pProcHeap->stat.incAllocFromActive();
1284 // block_header fields is not needed to setup
1285 // It was set in alloc_from_new_superblock
1286 assert( reinterpret_cast<block_header *>( pAddr )->desc() == pDesc );
1287 assert( !reinterpret_cast<block_header *>( pAddr )->isOSAllocated());
1288 assert( !reinterpret_cast<block_header *>( pAddr )->isAligned());
1290 return reinterpret_cast<block_header *>( pAddr );
1293 /// Allocates from a partial filled superblock if it possible
1294 block_header * alloc_from_partial( processor_heap * pProcHeap )
1297 superblock_desc * pDesc = pProcHeap->get_partial();
1302 anchor_tag oldAnchor;
1303 anchor_tag newAnchor;
1305 unsigned int nMoreCredits = 0;
1307 int nCollision = -1;
1311 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1312 if ( oldAnchor.state == SBSTATE_EMPTY ) {
1313 free_superblock( pDesc );
1317 nMoreCredits = ((unsigned int)(oldAnchor.count - 1)) < active_tag::c_nMaxCredits ? (unsigned int)(oldAnchor.count - 1) : active_tag::c_nMaxCredits;
1318 newAnchor.count -= nMoreCredits + 1;
1319 newAnchor.state = (nMoreCredits > 0) ? SBSTATE_ACTIVE : SBSTATE_FULL;
1321 } while ( !pDesc->anchor.compare_exchange_strong(oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed));
1324 pProcHeap->stat.incPartialDescCASFailureCount( nCollision );
1326 if ( newAnchor.state == SBSTATE_FULL )
1327 pProcHeap->stat.incDescFull();
1329 // Now, the thread is guaranteed to have reserved one or more blocks
1330 // pop reserved block
1336 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1338 assert( oldAnchor.avail < pDesc->nCapacity );
1339 pAddr = pDesc->pSB + oldAnchor.avail * pDesc->nBlockSize;
1340 newAnchor.avail = reinterpret_cast<free_block_header *>( pAddr )->nNextFree;
1342 } while ( !pDesc->anchor.compare_exchange_strong(oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed));
1345 pProcHeap->stat.incPartialAnchorCASFailureCount( nCollision );
1347 assert( newAnchor.state != SBSTATE_EMPTY );
1349 pProcHeap->stat.incAllocFromPartial();
1351 if ( nMoreCredits > 0 )
1352 update_active( pProcHeap, pDesc, nMoreCredits );
1354 // block_header fields is not needed to setup
1355 // It was set in alloc_from_new_superblock
1356 assert( reinterpret_cast<block_header *>( pAddr )->desc() == pDesc );
1357 assert( !reinterpret_cast<block_header *>( pAddr )->isAligned());
1358 assert( !reinterpret_cast<block_header *>( pAddr )->isOSAllocated());
1360 return reinterpret_cast<block_header *>( pAddr );
1363 /// Allocates from the new superblock
1364 block_header * alloc_from_new_superblock( processor_heap * pProcHeap )
1366 superblock_desc * pDesc = new_superblock_desc( pProcHeap );
1367 assert( pDesc != nullptr );
1368 pDesc->pSB = new_superblock_buffer( pProcHeap );
1370 anchor_tag anchor = pDesc->anchor.load(atomics::memory_order_relaxed);
1373 // Make single-linked list of free blocks in superblock
1374 byte * pEnd = pDesc->pSB + pDesc->nCapacity * pDesc->nBlockSize;
1375 unsigned int nNext = 0;
1376 const unsigned int nBlockSize = pDesc->nBlockSize;
1377 for ( byte * p = pDesc->pSB; p < pEnd; p += nBlockSize ) {
1378 reinterpret_cast<block_header *>( p )->set( pDesc, 0 );
1379 reinterpret_cast<free_block_header *>( p )->nNextFree = ++nNext;
1381 reinterpret_cast<free_block_header *>( pEnd - nBlockSize )->nNextFree = 0;
1383 active_tag newActive;
1384 newActive.set( pDesc, ( (pDesc->nCapacity - 1 < active_tag::c_nMaxCredits) ? pDesc->nCapacity - 1 : active_tag::c_nMaxCredits ) - 1 );
1386 anchor.count = pDesc->nCapacity - 1 - (newActive.credits() + 1);
1387 anchor.state = SBSTATE_ACTIVE;
1388 pDesc->anchor.store(anchor, atomics::memory_order_relaxed);
1390 active_tag curActive;
1391 if ( pProcHeap->active.compare_exchange_strong( curActive, newActive, atomics::memory_order_release, atomics::memory_order_relaxed )) {
1392 pProcHeap->stat.incAllocFromNew();
1393 //reinterpret_cast<block_header *>( pDesc->pSB )->set( pDesc, 0 );
1394 return reinterpret_cast<block_header *>( pDesc->pSB );
1397 free_superblock( pDesc );
1401 /// Find appropriate processor heap based on size-class selected
1402 processor_heap * find_heap( typename sizeclass_selector::sizeclass_index nSizeClassIndex )
1404 assert( nSizeClassIndex < m_SizeClassSelector.size());
1406 unsigned int nProcessorId = m_Topology.current_processor();
1407 assert( nProcessorId < m_nProcessorCount );
1409 if ( nProcessorId >= m_nProcessorCount )
1412 processor_desc * pDesc = m_arrProcDesc[ nProcessorId ].load( atomics::memory_order_relaxed );
1415 processor_desc * pNewDesc = new_processor_desc( nProcessorId );
1416 if ( m_arrProcDesc[nProcessorId].compare_exchange_strong( pDesc, pNewDesc, atomics::memory_order_release, atomics::memory_order_relaxed )) {
1420 free_processor_desc( pNewDesc );
1423 return &( pDesc->arrProcHeap[ nSizeClassIndex ] );
1426 /// Updates active field of processor heap \p pProcHeap
1427 void update_active( processor_heap * pProcHeap, superblock_desc * pDesc, unsigned int nCredits )
1429 assert( pProcHeap == pDesc->pProcHeap );
1431 active_tag nullActive;
1432 active_tag newActive;
1433 newActive.set( pDesc, nCredits - 1 );
1435 if ( pProcHeap->active.compare_exchange_strong( nullActive, newActive, atomics::memory_order_seq_cst, atomics::memory_order_relaxed ))
1438 // Someone installed another active superblock.
1439 // Return credits to superblock and make it partial
1441 anchor_tag oldAnchor;
1442 anchor_tag newAnchor;
1445 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1446 newAnchor.count += nCredits;
1447 newAnchor.state = SBSTATE_PARTIAL;
1448 } while ( !pDesc->anchor.compare_exchange_weak( oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed ));
1450 pDesc->pProcHeap->add_partial( pDesc );
1453 /// Allocates new processor descriptor
1454 processor_desc * new_processor_desc( unsigned int nProcessorId )
1456 CDS_UNUSED( nProcessorId );
1457 processor_desc * pDesc;
1458 const size_t nPageHeapCount = m_SizeClassSelector.pageTypeCount();
1461 Processor descriptor layout
1463 proc_desc - 64-byte alignment
1464 page_heap[0] 64-byte alignment
1465 page_heap[1] 64-byte alignment
1467 page_heap[P] 64-byte alignment
1469 proc_heap[0] 64-byte alignment
1470 proc_heap[1] 64-byte alignment
1472 proc_heap[N] 64-byte alignment
1475 const size_t szDesc =
1476 ( sizeof(processor_desc)
1477 + sizeof(pDesc->pageHeaps[0]) * nPageHeapCount
1482 const size_t szTotal = szDesc * c_nAlignment + sizeof(processor_heap) * m_SizeClassSelector.size();
1484 static_assert( (sizeof(processor_heap) % c_nAlignment) == 0, "sizeof(processor_heap) error" );
1486 // TSan false positive: a new descriptor will be linked further with release fence
1487 CDS_TSAN_ANNOTATE_IGNORE_WRITES_BEGIN;
1489 pDesc = new( m_AlignedHeap.alloc( szTotal, c_nAlignment )) processor_desc;
1491 pDesc->pageHeaps = reinterpret_cast<page_heap *>( pDesc + 1 );
1492 for ( size_t i = 0; i < nPageHeapCount; ++i )
1493 new (pDesc->pageHeaps + i) page_heap( m_SizeClassSelector.page_size(i));
1495 // initialize processor heaps
1496 pDesc->arrProcHeap =
1497 reinterpret_cast<processor_heap *>(
1498 reinterpret_cast<uintptr_t>(reinterpret_cast<byte *>(pDesc + 1) + sizeof(pDesc->pageHeaps[0]) * nPageHeapCount + c_nAlignment - 1)
1499 & ~(uintptr_t(c_nAlignment) - 1)
1502 processor_heap * pProcHeap = pDesc->arrProcHeap;
1503 processor_heap * pProcHeapEnd = pDesc->arrProcHeap + m_SizeClassSelector.size();
1504 for ( unsigned int i = 0; pProcHeap != pProcHeapEnd; ++pProcHeap, ++i ) {
1505 new (pProcHeap) processor_heap();
1506 pProcHeap->pProcDesc = pDesc;
1507 pProcHeap->pSizeClass = m_SizeClassSelector.at(i);
1508 if ( m_SizeClassSelector.find( pProcHeap->pSizeClass->nSBSize ) != sizeclass_selector::c_nNoSizeClass )
1509 pProcHeap->nPageIdx = processor_heap::c_nPageSelfAllocation;
1511 pProcHeap->nPageIdx = pProcHeap->pSizeClass->nSBSizeIdx;
1513 CDS_TSAN_ANNOTATE_IGNORE_WRITES_END;
1518 void free_processor_heap( processor_heap * pProcHeap )
1520 assert( pProcHeap->nPageIdx != processor_heap::c_nPageSelfAllocation );
1522 page_heap& pageHeap = pProcHeap->pProcDesc->pageHeaps[pProcHeap->nPageIdx];
1523 superblock_desc * pDesc;
1525 for ( pDesc = pProcHeap->partialList.pop(); pDesc; pDesc = pProcHeap->partialList.pop()) {
1526 pageHeap.free( pDesc->pSB );
1527 m_AlignedHeap.free( pDesc );
1530 superblock_desc * pPartial = pProcHeap->pPartial.load(atomics::memory_order_relaxed);
1532 pageHeap.free( pPartial->pSB );
1533 m_AlignedHeap.free( pPartial );
1536 pDesc = pProcHeap->active.load(atomics::memory_order_relaxed).ptr();
1538 pageHeap.free( pDesc->pSB );
1539 m_AlignedHeap.free( pDesc );
1543 /// Frees processor descriptor
1544 void free_processor_desc( processor_desc * pDesc )
1546 const size_t nPageHeapCount = m_SizeClassSelector.pageTypeCount();
1549 processor_heap * const pProcHeapEnd = pDesc->arrProcHeap + m_SizeClassSelector.size();
1551 // free large blocks only
1552 for ( processor_heap * pProcHeap = pDesc->arrProcHeap; pProcHeap < pProcHeapEnd; ++pProcHeap ) {
1553 if ( pProcHeap->nPageIdx != processor_heap::c_nPageSelfAllocation )
1554 free_processor_heap( pProcHeap );
1556 pProcHeap->~processor_heap();
1560 for ( superblock_desc * pSBDesc = pDesc->listSBDescFree.pop(); pSBDesc; pSBDesc = pDesc->listSBDescFree.pop())
1561 m_AlignedHeap.free( pSBDesc );
1563 for (size_t i = 0; i < nPageHeapCount; ++i )
1564 (pDesc->pageHeaps + i)->page_heap::~page_heap();
1566 pDesc->pageHeaps = nullptr;
1568 pDesc->processor_desc::~processor_desc();
1569 m_AlignedHeap.free( pDesc );
1572 /// Allocates new superblock descriptor
1573 superblock_desc * new_superblock_desc( processor_heap * pProcHeap )
1576 superblock_desc * pDesc = pProcHeap->pProcDesc->listSBDescFree.pop();
1577 if ( pDesc == nullptr ) {
1578 pDesc = new( m_AlignedHeap.alloc(sizeof(superblock_desc), c_nAlignment )) superblock_desc;
1579 assert( (uintptr_t(pDesc) & (c_nAlignment - 1)) == 0 );
1581 anchor = pDesc->anchor.load( atomics::memory_order_relaxed );
1583 pDesc->anchor.store( anchor, atomics::memory_order_relaxed );
1585 pProcHeap->stat.incDescAllocCount();
1587 pDesc->nBlockSize = pProcHeap->pSizeClass->nBlockSize;
1588 pDesc->nCapacity = pProcHeap->pSizeClass->nCapacity;
1589 assert( pDesc->nCapacity <= c_nMaxBlockInSuperBlock );
1590 pDesc->pProcHeap = pProcHeap;
1592 anchor = pDesc->anchor.load( atomics::memory_order_relaxed );
1594 pDesc->anchor.store( anchor, atomics::memory_order_relaxed );
1599 /// Allocates superblock page
1600 byte * new_superblock_buffer( processor_heap * pProcHeap )
1602 pProcHeap->stat.incBlockAllocated();
1603 if ( pProcHeap->nPageIdx == processor_heap::c_nPageSelfAllocation ) {
1604 return (byte *) alloc( pProcHeap->pSizeClass->nSBSize );
1607 return (byte *) pProcHeap->pProcDesc->pageHeaps[pProcHeap->nPageIdx].alloc();
1611 /// Frees superblock descriptor and its page
1612 void free_superblock( superblock_desc * pDesc )
1614 pDesc->pProcHeap->stat.incBlockDeallocated();
1615 processor_desc * pProcDesc = pDesc->pProcHeap->pProcDesc;
1617 if ( pDesc->pProcHeap->nPageIdx == processor_heap::c_nPageSelfAllocation )
1620 pProcDesc->pageHeaps[pDesc->pProcHeap->nPageIdx].free( pDesc->pSB );
1622 pProcDesc->listSBDescFree.push( pDesc );
1625 /// Allocate memory block
1626 block_header * int_alloc(
1627 size_t nSize ///< Size of memory block to allocate in bytes
1630 typename sizeclass_selector::sizeclass_index nSizeClassIndex = m_SizeClassSelector.find( nSize );
1631 if ( nSizeClassIndex == sizeclass_selector::c_nNoSizeClass ) {
1632 return alloc_from_OS( nSize );
1634 assert( nSizeClassIndex < m_SizeClassSelector.size());
1636 block_header * pBlock;
1637 processor_heap * pProcHeap;
1639 pProcHeap = find_heap( nSizeClassIndex );
1641 return alloc_from_OS( nSize );
1643 if ( (pBlock = alloc_from_active( pProcHeap )) != nullptr )
1645 if ( (pBlock = alloc_from_partial( pProcHeap )) != nullptr )
1647 if ( (pBlock = alloc_from_new_superblock( pProcHeap )) != nullptr )
1651 pProcHeap->stat.incAllocatedBytes( pProcHeap->pSizeClass->nBlockSize );
1653 assert( pBlock != nullptr );
1659 /// Heap constructor
1662 // Explicit libcds initialization is needed since a static object may be constructed
1665 m_nProcessorCount = m_Topology.processor_count();
1666 m_arrProcDesc = new( m_AlignedHeap.alloc(sizeof(processor_desc *) * m_nProcessorCount, c_nAlignment ))
1667 atomics::atomic<processor_desc *>[ m_nProcessorCount ];
1668 memset( m_arrProcDesc, 0, sizeof(processor_desc *) * m_nProcessorCount ) ; // ?? memset for atomic<>
1673 The destructor frees all memory allocated by the heap.
1677 for ( unsigned int i = 0; i < m_nProcessorCount; ++i ) {
1678 processor_desc * pDesc = m_arrProcDesc[i].load(atomics::memory_order_relaxed);
1680 free_processor_desc( pDesc );
1683 m_AlignedHeap.free( m_arrProcDesc );
1685 // Explicit termination of libcds
1689 /// Allocate memory block
1691 size_t nSize ///< Size of memory block to allocate in bytes
1694 block_header * pBlock = int_alloc( nSize + sizeof(block_header) + bound_checker::trailer_size );
1696 // Bound checking is only for our blocks
1697 if ( !pBlock->isOSAllocated()) {
1698 // the block is allocated from our heap - bound checker is applicable
1699 m_BoundChecker.make_trailer(
1700 reinterpret_cast<byte *>(pBlock + 1),
1701 reinterpret_cast<byte *>(pBlock) + pBlock->desc()->nBlockSize,
1706 CDS_TSAN_ANNOTATE_NEW_MEMORY( pBlock + 1, nSize );
1710 /// Free previously allocated memory block
1712 void * pMemory ///< Pointer to memory block to free
1718 block_header * pRedirect = (reinterpret_cast<block_header *>( pMemory ) - 1);
1719 block_header * pBlock = pRedirect->begin();
1721 if ( pBlock->isOSAllocated()) {
1722 // Block has been allocated from OS
1723 m_OSAllocStat.incBytesDeallocated( pBlock->getOSAllocSize());
1724 m_LargeHeap.free( pBlock );
1728 assert( !pBlock->isAligned());
1729 superblock_desc * pDesc = pBlock->desc();
1731 m_BoundChecker.check_bounds(
1733 reinterpret_cast<byte *>( pBlock ) + pDesc->nBlockSize,
1738 anchor_tag oldAnchor;
1739 anchor_tag newAnchor;
1740 processor_heap_base * pProcHeap = pDesc->pProcHeap;
1742 pProcHeap->stat.incDeallocatedBytes( pDesc->nBlockSize );
1744 oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1746 newAnchor = oldAnchor;
1747 reinterpret_cast<free_block_header *>( pBlock )->nNextFree = oldAnchor.avail;
1748 newAnchor.avail = (reinterpret_cast<byte *>( pBlock ) - pDesc->pSB) / pDesc->nBlockSize;
1751 assert( oldAnchor.state != SBSTATE_EMPTY );
1753 if ( oldAnchor.state == SBSTATE_FULL )
1754 newAnchor.state = SBSTATE_PARTIAL;
1756 if ( oldAnchor.count == pDesc->nCapacity - 1 ) {
1757 //pProcHeap = pDesc->pProcHeap;
1758 //CDS_COMPILER_RW_BARRIER ; // instruction fence is needed?..
1759 newAnchor.state = SBSTATE_EMPTY;
1762 newAnchor.count += 1;
1763 } while ( !pDesc->anchor.compare_exchange_strong( oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed ));
1765 pProcHeap->stat.incFreeCount();
1767 if ( newAnchor.state == SBSTATE_EMPTY ) {
1768 if ( pProcHeap->unlink_partial( pDesc ))
1769 free_superblock( pDesc );
1771 else if (oldAnchor.state == SBSTATE_FULL ) {
1772 assert( pProcHeap != nullptr );
1773 pProcHeap->stat.decDescFull();
1774 pProcHeap->add_partial( pDesc );
1778 /// Reallocate memory block
1780 If \p nNewSize is zero, then the block pointed to by \p pMemory is freed;
1781 the return value is \p nullptr, and \p pMemory is left pointing at a freed block.
1783 If there is not enough available memory to expand the block to the given size,
1784 the original block is left unchanged, and \p nullptr is returned.
1786 Aligned memory block cannot be realloc'ed: if \p pMemory has been allocated by \ref alloc_aligned,
1787 then the return value is \p nullptr and the original block is left unchanged.
1790 void * pMemory, ///< Pointer to previously allocated memory block
1791 size_t nNewSize ///< New size of memory block, in bytes
1794 if ( nNewSize == 0 ) {
1799 const size_t nOrigSize = nNewSize;
1800 nNewSize += sizeof(block_header) + bound_checker::trailer_size;
1802 block_header * pBlock = reinterpret_cast<block_header *>( pMemory ) - 1;
1804 // Reallocation of aligned block is not possible
1805 if ( pBlock->isAligned()) {
1810 if ( pBlock->isOSAllocated()) {
1811 // The block has been allocated from OS
1812 size_t nCurSize = pBlock->getOSAllocSize();
1814 if ( nCurSize >= nNewSize )
1818 void * pNewBuf = alloc( nOrigSize );
1820 memcpy( pNewBuf, pMemory, nCurSize - sizeof(block_header));
1826 superblock_desc * pDesc = pBlock->desc();
1827 if ( pDesc->nBlockSize <= nNewSize ) {
1828 // In-place reallocation
1829 m_BoundChecker.make_trailer(
1830 reinterpret_cast<byte *>(pBlock + 1),
1831 reinterpret_cast<byte *>(pBlock) + pBlock->desc()->nBlockSize,
1838 void * pNew = alloc( nNewSize );
1840 memcpy( pNew, pMemory, pDesc->nBlockSize - sizeof(block_header));
1848 /// Allocate aligned memory block
1849 void * alloc_aligned(
1850 size_t nSize, ///< Size of memory block to allocate in bytes
1851 size_t nAlignment ///< Alignment
1854 if ( nAlignment <= c_nDefaultBlockAlignment ) {
1855 void * p = alloc( nSize );
1856 assert( (reinterpret_cast<uintptr_t>(p) & (nAlignment - 1)) == 0 );
1860 block_header * pBlock = int_alloc( nSize + nAlignment + sizeof(block_header) + bound_checker::trailer_size );
1862 block_header * pRedirect;
1863 if ( (reinterpret_cast<uintptr_t>( pBlock + 1) & (nAlignment - 1)) != 0 ) {
1864 pRedirect = reinterpret_cast<block_header *>( (reinterpret_cast<uintptr_t>( pBlock ) & ~(nAlignment - 1)) + nAlignment ) - 1;
1865 assert( pRedirect != pBlock );
1866 pRedirect->set( reinterpret_cast<superblock_desc *>(pBlock), 1 );
1868 assert( (reinterpret_cast<uintptr_t>(pRedirect + 1) & (nAlignment - 1)) == 0 );
1874 // Bound checking is only for our blocks
1875 if ( !pBlock->isOSAllocated()) {
1876 // the block is allocated from our heap - bound checker is applicable
1877 m_BoundChecker.make_trailer(
1878 reinterpret_cast<byte *>(pRedirect + 1),
1879 reinterpret_cast<byte *>(pBlock) + pBlock->desc()->nBlockSize,
1884 return pRedirect + 1;
1887 /// Free aligned memory block previously allocated by \ref alloc_aligned
1889 void * pMemory ///< Pointer to memory block to free
1897 /// Get instant summary statistics
1898 void summaryStat( summary_stat& st )
1900 size_t nProcHeapCount = m_SizeClassSelector.size();
1901 for ( unsigned int nProcessor = 0; nProcessor < m_nProcessorCount; ++nProcessor ) {
1902 processor_desc * pProcDesc = m_arrProcDesc[nProcessor].load(atomics::memory_order_relaxed);
1904 for ( unsigned int i = 0; i < nProcHeapCount; ++i ) {
1905 processor_heap_base * pProcHeap = pProcDesc->arrProcHeap + i;
1907 st.add_procheap_stat( pProcHeap->stat );
1913 st.add_heap_stat( m_OSAllocStat );
1917 }}} // namespace cds::memory::michael
1919 #endif // CDSLIB_MEMORY_MICHAEL_ALLOCATOR_TMPL_H