From ea2510ecfad84d7c3d8f063c221e492bf4eafbf4 Mon Sep 17 00:00:00 2001 From: jjenista Date: Wed, 15 Sep 2010 21:16:05 +0000 Subject: [PATCH] this micro benchmark does not accurately imitate OOOjava, just checking in current code for possible reuse later --- Robust/src/Tests/memPool/memPool.h | 161 ++++++++++-------- Robust/src/Tests/memPool/testMemPool-calloc.c | 81 +++++---- Robust/src/Tests/memPool/testMemPool-malloc.c | 82 +++++---- Robust/src/Tests/memPool/testMemPool.c | 115 +++++++++++++ 4 files changed, 284 insertions(+), 155 deletions(-) create mode 100644 Robust/src/Tests/memPool/testMemPool.c diff --git a/Robust/src/Tests/memPool/memPool.h b/Robust/src/Tests/memPool/memPool.h index 9a729976..de9d80b2 100644 --- a/Robust/src/Tests/memPool/memPool.h +++ b/Robust/src/Tests/memPool/memPool.h @@ -1,34 +1,75 @@ #ifndef ___MEMPOOL_H__ #define ___MEMPOOL_H__ +////////////////////////////////////////////////////////// +// +// A memory pool implements POOLCREATE, POOLALLOC and +// POOLFREE to improve memory allocation by reusing records. +// +// EACH THREAD should have one local pool. +// +// +// +// This implementation uses a lock-free singly-linked list +// to store reusable records. The algorithm is a much- +// simplified version of the list described in Valois '95 +// because it supports less features. +// +// poolfree adds newly freed records to the list FRONT +// +// poolalloc either takes records from BACK or mallocs +// +// Note the use of dummy nodes between every valid list +// element. This is a crucial aspect in allowing the +// implementation to have simple CAS logic. +// +// Empty list: +// dummyItem --> 1stPTR --> NULL +// head --> tail --> 2ndPTR --> 1stPTR +// +// Prepare a new record to add at head: +// Record --> 1stPTR --> currentHead +// 2ndPTR --> 1stPTR +// CAS( &head, +// head, +// 2ndPtr ) +// +// Remove a record from tail: +// +// +// +// +// +////////////////////////////////////////////////////////// + #include #include "mlp_lock.h" -// A memory pool implements POOLALLOCATE and POOLFREE -// to improve memory allocation by reusing records. -// +typedef struct MemPoolItem_t { + void* next; +} MemPoolItem; typedef struct MemPool_t { int itemSize; + MemPoolItem* head; - int poolSize; - INTPTR* ringBuffer; + // avoid cache line contention between producer/consumer... + char buffer[CACHELINESIZE - sizeof(void*)]; - int head; - int tail; + MemPoolItem* tail; } MemPool; - -MemPool* poolcreate( int itemSize, int poolSize ) { - MemPool* p = malloc( sizeof( MemPool ) ); - p->itemSize = itemSize; - p->poolSize = poolSize; - p->ringBuffer = malloc( poolSize * sizeof( INTPTR ) ); - p->head = 0; - p->tail = 0; +// the memory pool must always have at least one +// item in it +MemPool* poolcreate( int itemSize ) { + MemPool* p = malloc( sizeof( MemPool ) ); + p->itemSize = itemSize; + p->head = malloc( itemSize ); + p->head->next = NULL; + p->tail = p->head; } @@ -43,33 +84,29 @@ MemPool* poolcreate( int itemSize, int poolSize ) { // otherwise someone did CAS before you, so try again (the return // value is the old value you will pass next time.) +static inline void poolfree( MemPool* p, void* ptr ) { -static inline void* poolalloc( MemPool* p ) { - int headNow; - int headNext; - int headActual; + MemPoolItem* tailCurrent; + MemPoolItem* tailActual; - while( 1 ) { - // if there are no free items available, just do regular - // allocation and move on--ok to use unnsynched reads - // for this test, its conservative - if( p->head == p->tail ) { - return calloc( 1, p->itemSize ); - } + // set up the now unneeded record to as the tail of the + // free list by treating its first bytes as next pointer, + MemPoolItem* tailNew = (MemPoolItem*) ptr; + newTail->next = NULL; - // otherwise, attempt to grab a free item from the - // front of the free list - headNow = p->head; - headNext = headNow + 1; - if( headNext == p->poolSize ) { - headNext = 0; - } - - headActual = CAS( &(p->head), headNow, headNext ); - if( headActual == headNow ) { - // can't some other pool accesses happen during - // this time, before return??? - return (void*) p->ringBuffer[headActual]; + while( 1 ) { + // make sure the null happens before the insertion, + // also makes sure that we reload tailCurrent, etc.. + BARRIER(); + + tailCurrent = p->tail; + tailActual = CAS( &(p->tail), // ptr to set + tailCurrent, // current tail's next should be NULL + tailNew ); // try set to our new tail + if( tailActual == tailCurrent ) { + // success, update tail + tailCurrent->next = newTail; + return; } // if CAS failed, retry entire operation @@ -77,45 +114,25 @@ static inline void* poolalloc( MemPool* p ) { } -static inline void poolfree( MemPool* p, void* ptr ) { - int tailNow; - int tailNext; - int tailActual; - - while( 1 ) { - // if the ring buffer is full, just do regular free, ok to - // use unsyhcronized reads for this test, its conservative - if( p->tail + 1 == p->head || - ( p->tail == p->poolSize - 1 && - p->head == 0 - ) - ) { - free( ptr ); - return; - } - - // otherwise, attempt to grab add the free item to the - // end of the free list - tailNow = p->tail; - tailNext = tailNow + 1; - if( tailNext == p->poolSize ) { - tailNext = 0; - } +static inline void* poolalloc( MemPool* p ) { - tailActual = CAS( &(p->tail), tailNow, tailNext ); - if( tailActual == tailNow ) { - // can't some other pool accesses happen during - // this time, before return??? - p->ringBuffer[tailActual] = (INTPTR)ptr; - return; - } + // to protect CAS in poolfree from dereferencing + // null, treat the queue as empty when there is + // only one item. The dequeue operation is only + // executed by the thread that owns the pool, so + // it doesn't require an atomic op + MemPoolItem* headCurrent = p->head; - // if CAS failed, retry entire operation + if( headCurrent->next == NULL ) { + // only one item, so don't take from pool + return calloc( 1, p->itemSize ); } + + p->head = headCurrent->next; + return headCurrent; } - #endif // ___MEMPOOL_H__ diff --git a/Robust/src/Tests/memPool/testMemPool-calloc.c b/Robust/src/Tests/memPool/testMemPool-calloc.c index e4a74644..a1e64116 100644 --- a/Robust/src/Tests/memPool/testMemPool-calloc.c +++ b/Robust/src/Tests/memPool/testMemPool-calloc.c @@ -2,54 +2,70 @@ #include #include +#include "memPool.h" + +#define numThreads 24 +#define numCyclesPerThread 500000 +#define extraBytesInRecords 1000 struct bar { int x; - char takeSpace[1000]; + char takeSpace[extraBytesInRecords]; }; struct baz { int y; - char takeSpace[1000]; + char takeSpace[extraBytesInRecords]; }; struct foo { struct bar* br; struct baz* bz; int z; - char takeSpace[1000]; + char takeSpace[extraBytesInRecords]; }; void* workerMain( void* arg ) { - struct foo* f = (struct foo*)arg; + INTPTR i = (INTPTR)arg; + int j; + struct bar* br; + struct baz* bz; + struct foo* foos = malloc( numCyclesPerThread*sizeof( struct foo ) ); - f->z = f->br->x + f->bz->y; + for( j = 0; j < numCyclesPerThread; ++j ) { + br = calloc( 1, sizeof( struct bar ) ); + bz = calloc( 1, sizeof( struct baz ) ); - free( f->br ); - free( f->bz ); + br->x = i + j; + bz->y = -4321; + + foos[j].br = br; + foos[j].bz = bz; + foos[j].z = foos[j].br->x + foos[j].bz->y; + + free( foos[j].br ); + free( foos[j].bz ); + } - pthread_exit( arg ); + pthread_exit( foos ); } int main() { - int i; + INTPTR i; + int j; - struct bar* br; - struct baz* bz; - struct foo* f; + struct foo* foos; - int numThreads = 10000; pthread_t threads[numThreads]; pthread_attr_t attr; - + int total = 0; - pthread_attr_init( &attr ); pthread_attr_setdetachstate( &attr, PTHREAD_CREATE_JOINABLE ); @@ -57,44 +73,27 @@ int main() { for( i = 0; i < numThreads; ++i ) { - br = calloc( 1, sizeof( struct bar ) ); - bz = calloc( 1, sizeof( struct baz ) ); - f = calloc( 1, sizeof( struct foo ) ); - - br->x = i; - bz->y = -4321; - - f->br = br; - f->bz = bz; - pthread_create( &(threads[i]), &attr, workerMain, - (void*)f ); - - if( i % 1000 == 0 ) { - printf( "." ); - } - - if( i % 100 == 0 ) { - sched_yield(); - } + (void*)i ); + printf( "." ); } printf( "\n" ); for( i = 0; i < numThreads; ++i ) { - f = NULL; + foos = NULL; pthread_join( threads[i], - (void**)&f ); - - total += f->z; - free( f ); + (void**)&foos ); - if( i % 1000 == 0 ) { - printf( "+" ); + for( j = 0; j < numCyclesPerThread; ++j ) { + total += foos[j].z; } + free( foos ); + + printf( "+" ); } printf( "\nTotal=%d\n", total ); diff --git a/Robust/src/Tests/memPool/testMemPool-malloc.c b/Robust/src/Tests/memPool/testMemPool-malloc.c index 2c08e4a4..b4bba5ca 100644 --- a/Robust/src/Tests/memPool/testMemPool-malloc.c +++ b/Robust/src/Tests/memPool/testMemPool-malloc.c @@ -2,99 +2,97 @@ #include #include +#include "memPool.h" + +#define numThreads 24 +#define numCyclesPerThread 500000 +#define extraBytesInRecords 1000 struct bar { int x; - char takeSpace[1000]; + char takeSpace[extraBytesInRecords]; }; struct baz { int y; - char takeSpace[1000]; + char takeSpace[extraBytesInRecords]; }; struct foo { struct bar* br; struct baz* bz; int z; - char takeSpace[1000]; + char takeSpace[extraBytesInRecords]; }; void* workerMain( void* arg ) { - struct foo* f = (struct foo*)arg; + INTPTR i = (INTPTR)arg; + int j; + struct bar* br; + struct baz* bz; + struct foo* foos = malloc( numCyclesPerThread*sizeof( struct foo ) ); - f->z = f->br->x + f->bz->y; + for( j = 0; j < numCyclesPerThread; ++j ) { + br = malloc( sizeof( struct bar ) ); + bz = malloc( sizeof( struct baz ) ); + + br->x = i + j; + bz->y = -4321; + + foos[j].br = br; + foos[j].bz = bz; + foos[j].z = foos[j].br->x + foos[j].bz->y; - free( f->br ); - free( f->bz ); + free( foos[j].br ); + free( foos[j].bz ); + } - pthread_exit( arg ); + pthread_exit( foos ); } int main() { - int i; + INTPTR i; + int j; - struct bar* br; - struct baz* bz; - struct foo* f; + struct foo* foos; - int numThreads = 10000; pthread_t threads[numThreads]; pthread_attr_t attr; - + int total = 0; - pthread_attr_init( &attr ); pthread_attr_setdetachstate( &attr, PTHREAD_CREATE_JOINABLE ); - for( i = 0; i < numThreads; ++i ) { - br = malloc( sizeof( struct bar ) ); - bz = malloc( sizeof( struct baz ) ); - f = malloc( sizeof( struct foo ) ); - - br->x = i; - bz->y = -4321; - - f->br = br; - f->bz = bz; - pthread_create( &(threads[i]), &attr, workerMain, - (void*)f ); - - if( i % 1000 == 0 ) { - printf( "." ); - } - - if( i % 100 == 0 ) { - sched_yield(); - } + (void*)i ); + printf( "." ); } printf( "\n" ); for( i = 0; i < numThreads; ++i ) { - f = NULL; + foos = NULL; pthread_join( threads[i], - (void**)&f ); - - total += f->z; - free( f ); + (void**)&foos ); - if( i % 1000 == 0 ) { - printf( "+" ); + for( j = 0; j < numCyclesPerThread; ++j ) { + total += foos[j].z; } + free( foos ); + + printf( "+" ); } printf( "\nTotal=%d\n", total ); diff --git a/Robust/src/Tests/memPool/testMemPool.c b/Robust/src/Tests/memPool/testMemPool.c new file mode 100644 index 00000000..0d8b9794 --- /dev/null +++ b/Robust/src/Tests/memPool/testMemPool.c @@ -0,0 +1,115 @@ +#include +#include + +#include +#include "memPool.h" +#include "config-test.h" + + +#define numThreads 24 +#define numCyclesPerThread 500000 +#define extraBytesInRecords 1000 + + +#ifdef CONFIG_POOLALLOC +__thread static MemPool* pool; +#endif + + +struct bar { + int x; + char takeSpace[extraBytesInRecords]; +}; + +struct baz { + int y; + char takeSpace[extraBytesInRecords]; +}; + +struct foo { + struct bar* br; + struct baz* bz; + int z; + char takeSpace[extraBytesInRecords]; +}; + + +void* workerMain( void* arg ) { + + INTPTR i = (INTPTR)arg; + int j; + struct bar* br; + struct baz* bz; + struct foo* foos = malloc( numCyclesPerThread*sizeof( struct foo ) ); + + for( j = 0; j < numCyclesPerThread; ++j ) { + br = poolalloc( barPool ); + bz = poolalloc( bazPool ); + + br->x = i + j; + bz->y = -4321; + + foos[j].br = br; + foos[j].bz = bz; + foos[j].z = foos[j].br->x + foos[j].bz->y; + + poolfree( barPool, foos[j].br ); + poolfree( bazPool, foos[j].bz ); + } + + pthread_exit( foos ); +} + + +int main() { + + INTPTR i; + int j; + + struct foo* foos; + + pthread_t threads[numThreads]; + pthread_attr_t attr; + + int total = 0; + + +#ifdef CONFIG_POOLALLOC + pool = poolcreate( sizeof( struct bar ) ); +#endif + + + pthread_attr_init( &attr ); + pthread_attr_setdetachstate( &attr, + PTHREAD_CREATE_JOINABLE ); + + + for( i = 0; i < numThreads; ++i ) { + + pthread_create( &(threads[i]), + &attr, + workerMain, + (void*)i ); + printf( "." ); + } + + printf( "\n" ); + + for( i = 0; i < numThreads; ++i ) { + + foos = NULL; + pthread_join( threads[i], + (void**)&foos ); + + for( j = 0; j < numCyclesPerThread; ++j ) { + total += foos[j].z; + } + free( foos ); + + printf( "+" ); + } + + printf( "\nTotal=%d\n", total ); + + return 0; +} -- 2.34.1