#ifndef ___MEMPOOL_H__
#define ___MEMPOOL_H__
+//////////////////////////////////////////////////////////
+//
+// A memory pool implements POOLCREATE, POOLALLOC and
+// POOLFREE to improve memory allocation by reusing records.
+//
+// EACH THREAD should have one local pool.
+//
+//
+//
+// This implementation uses a lock-free singly-linked list
+// to store reusable records. The algorithm is a much-
+// simplified version of the list described in Valois '95
+// because it supports less features.
+//
+// poolfree adds newly freed records to the list FRONT
+//
+// poolalloc either takes records from BACK or mallocs
+//
+// Note the use of dummy nodes between every valid list
+// element. This is a crucial aspect in allowing the
+// implementation to have simple CAS logic.
+//
+// Empty list:
+// dummyItem --> 1stPTR --> NULL
+// head --> tail --> 2ndPTR --> 1stPTR
+//
+// Prepare a new record to add at head:
+// Record --> 1stPTR --> currentHead
+// 2ndPTR --> 1stPTR
+// CAS( &head,
+// head,
+// 2ndPtr )
+//
+// Remove a record from tail:
+//
+//
+//
+//
+//
+//////////////////////////////////////////////////////////
+
#include <stdlib.h>
#include "mlp_lock.h"
-// A memory pool implements POOLALLOCATE and POOLFREE
-// to improve memory allocation by reusing records.
-//
+typedef struct MemPoolItem_t {
+ void* next;
+} MemPoolItem;
typedef struct MemPool_t {
int itemSize;
+ MemPoolItem* head;
- int poolSize;
- INTPTR* ringBuffer;
+ // avoid cache line contention between producer/consumer...
+ char buffer[CACHELINESIZE - sizeof(void*)];
- int head;
- int tail;
+ MemPoolItem* tail;
} MemPool;
-
-MemPool* poolcreate( int itemSize, int poolSize ) {
- MemPool* p = malloc( sizeof( MemPool ) );
- p->itemSize = itemSize;
- p->poolSize = poolSize;
- p->ringBuffer = malloc( poolSize * sizeof( INTPTR ) );
- p->head = 0;
- p->tail = 0;
+// the memory pool must always have at least one
+// item in it
+MemPool* poolcreate( int itemSize ) {
+ MemPool* p = malloc( sizeof( MemPool ) );
+ p->itemSize = itemSize;
+ p->head = malloc( itemSize );
+ p->head->next = NULL;
+ p->tail = p->head;
}
// otherwise someone did CAS before you, so try again (the return
// value is the old value you will pass next time.)
+static inline void poolfree( MemPool* p, void* ptr ) {
-static inline void* poolalloc( MemPool* p ) {
- int headNow;
- int headNext;
- int headActual;
+ MemPoolItem* tailCurrent;
+ MemPoolItem* tailActual;
- while( 1 ) {
- // if there are no free items available, just do regular
- // allocation and move on--ok to use unnsynched reads
- // for this test, its conservative
- if( p->head == p->tail ) {
- return calloc( 1, p->itemSize );
- }
+ // set up the now unneeded record to as the tail of the
+ // free list by treating its first bytes as next pointer,
+ MemPoolItem* tailNew = (MemPoolItem*) ptr;
+ newTail->next = NULL;
- // otherwise, attempt to grab a free item from the
- // front of the free list
- headNow = p->head;
- headNext = headNow + 1;
- if( headNext == p->poolSize ) {
- headNext = 0;
- }
-
- headActual = CAS( &(p->head), headNow, headNext );
- if( headActual == headNow ) {
- // can't some other pool accesses happen during
- // this time, before return???
- return (void*) p->ringBuffer[headActual];
+ while( 1 ) {
+ // make sure the null happens before the insertion,
+ // also makes sure that we reload tailCurrent, etc..
+ BARRIER();
+
+ tailCurrent = p->tail;
+ tailActual = CAS( &(p->tail), // ptr to set
+ tailCurrent, // current tail's next should be NULL
+ tailNew ); // try set to our new tail
+ if( tailActual == tailCurrent ) {
+ // success, update tail
+ tailCurrent->next = newTail;
+ return;
}
// if CAS failed, retry entire operation
}
-static inline void poolfree( MemPool* p, void* ptr ) {
- int tailNow;
- int tailNext;
- int tailActual;
-
- while( 1 ) {
- // if the ring buffer is full, just do regular free, ok to
- // use unsyhcronized reads for this test, its conservative
- if( p->tail + 1 == p->head ||
- ( p->tail == p->poolSize - 1 &&
- p->head == 0
- )
- ) {
- free( ptr );
- return;
- }
-
- // otherwise, attempt to grab add the free item to the
- // end of the free list
- tailNow = p->tail;
- tailNext = tailNow + 1;
- if( tailNext == p->poolSize ) {
- tailNext = 0;
- }
+static inline void* poolalloc( MemPool* p ) {
- tailActual = CAS( &(p->tail), tailNow, tailNext );
- if( tailActual == tailNow ) {
- // can't some other pool accesses happen during
- // this time, before return???
- p->ringBuffer[tailActual] = (INTPTR)ptr;
- return;
- }
+ // to protect CAS in poolfree from dereferencing
+ // null, treat the queue as empty when there is
+ // only one item. The dequeue operation is only
+ // executed by the thread that owns the pool, so
+ // it doesn't require an atomic op
+ MemPoolItem* headCurrent = p->head;
- // if CAS failed, retry entire operation
+ if( headCurrent->next == NULL ) {
+ // only one item, so don't take from pool
+ return calloc( 1, p->itemSize );
}
+
+ p->head = headCurrent->next;
+ return headCurrent;
}
-
#endif // ___MEMPOOL_H__
#include <stdio.h>
#include <pthread.h>
+#include "memPool.h"
+
+#define numThreads 24
+#define numCyclesPerThread 500000
+#define extraBytesInRecords 1000
struct bar {
int x;
- char takeSpace[1000];
+ char takeSpace[extraBytesInRecords];
};
struct baz {
int y;
- char takeSpace[1000];
+ char takeSpace[extraBytesInRecords];
};
struct foo {
struct bar* br;
struct baz* bz;
int z;
- char takeSpace[1000];
+ char takeSpace[extraBytesInRecords];
};
void* workerMain( void* arg ) {
- struct foo* f = (struct foo*)arg;
+ INTPTR i = (INTPTR)arg;
+ int j;
+ struct bar* br;
+ struct baz* bz;
+ struct foo* foos = malloc( numCyclesPerThread*sizeof( struct foo ) );
- f->z = f->br->x + f->bz->y;
+ for( j = 0; j < numCyclesPerThread; ++j ) {
+ br = calloc( 1, sizeof( struct bar ) );
+ bz = calloc( 1, sizeof( struct baz ) );
- free( f->br );
- free( f->bz );
+ br->x = i + j;
+ bz->y = -4321;
+
+ foos[j].br = br;
+ foos[j].bz = bz;
+ foos[j].z = foos[j].br->x + foos[j].bz->y;
+
+ free( foos[j].br );
+ free( foos[j].bz );
+ }
- pthread_exit( arg );
+ pthread_exit( foos );
}
int main() {
- int i;
+ INTPTR i;
+ int j;
- struct bar* br;
- struct baz* bz;
- struct foo* f;
+ struct foo* foos;
- int numThreads = 10000;
pthread_t threads[numThreads];
pthread_attr_t attr;
-
+
int total = 0;
-
pthread_attr_init( &attr );
pthread_attr_setdetachstate( &attr,
PTHREAD_CREATE_JOINABLE );
for( i = 0; i < numThreads; ++i ) {
- br = calloc( 1, sizeof( struct bar ) );
- bz = calloc( 1, sizeof( struct baz ) );
- f = calloc( 1, sizeof( struct foo ) );
-
- br->x = i;
- bz->y = -4321;
-
- f->br = br;
- f->bz = bz;
-
pthread_create( &(threads[i]),
&attr,
workerMain,
- (void*)f );
-
- if( i % 1000 == 0 ) {
- printf( "." );
- }
-
- if( i % 100 == 0 ) {
- sched_yield();
- }
+ (void*)i );
+ printf( "." );
}
printf( "\n" );
for( i = 0; i < numThreads; ++i ) {
- f = NULL;
+ foos = NULL;
pthread_join( threads[i],
- (void**)&f );
-
- total += f->z;
- free( f );
+ (void**)&foos );
- if( i % 1000 == 0 ) {
- printf( "+" );
+ for( j = 0; j < numCyclesPerThread; ++j ) {
+ total += foos[j].z;
}
+ free( foos );
+
+ printf( "+" );
}
printf( "\nTotal=%d\n", total );
#include <stdio.h>
#include <pthread.h>
+#include "memPool.h"
+
+#define numThreads 24
+#define numCyclesPerThread 500000
+#define extraBytesInRecords 1000
struct bar {
int x;
- char takeSpace[1000];
+ char takeSpace[extraBytesInRecords];
};
struct baz {
int y;
- char takeSpace[1000];
+ char takeSpace[extraBytesInRecords];
};
struct foo {
struct bar* br;
struct baz* bz;
int z;
- char takeSpace[1000];
+ char takeSpace[extraBytesInRecords];
};
void* workerMain( void* arg ) {
- struct foo* f = (struct foo*)arg;
+ INTPTR i = (INTPTR)arg;
+ int j;
+ struct bar* br;
+ struct baz* bz;
+ struct foo* foos = malloc( numCyclesPerThread*sizeof( struct foo ) );
- f->z = f->br->x + f->bz->y;
+ for( j = 0; j < numCyclesPerThread; ++j ) {
+ br = malloc( sizeof( struct bar ) );
+ bz = malloc( sizeof( struct baz ) );
+
+ br->x = i + j;
+ bz->y = -4321;
+
+ foos[j].br = br;
+ foos[j].bz = bz;
+ foos[j].z = foos[j].br->x + foos[j].bz->y;
- free( f->br );
- free( f->bz );
+ free( foos[j].br );
+ free( foos[j].bz );
+ }
- pthread_exit( arg );
+ pthread_exit( foos );
}
int main() {
- int i;
+ INTPTR i;
+ int j;
- struct bar* br;
- struct baz* bz;
- struct foo* f;
+ struct foo* foos;
- int numThreads = 10000;
pthread_t threads[numThreads];
pthread_attr_t attr;
-
+
int total = 0;
-
pthread_attr_init( &attr );
pthread_attr_setdetachstate( &attr,
PTHREAD_CREATE_JOINABLE );
-
for( i = 0; i < numThreads; ++i ) {
- br = malloc( sizeof( struct bar ) );
- bz = malloc( sizeof( struct baz ) );
- f = malloc( sizeof( struct foo ) );
-
- br->x = i;
- bz->y = -4321;
-
- f->br = br;
- f->bz = bz;
-
pthread_create( &(threads[i]),
&attr,
workerMain,
- (void*)f );
-
- if( i % 1000 == 0 ) {
- printf( "." );
- }
-
- if( i % 100 == 0 ) {
- sched_yield();
- }
+ (void*)i );
+ printf( "." );
}
printf( "\n" );
for( i = 0; i < numThreads; ++i ) {
- f = NULL;
+ foos = NULL;
pthread_join( threads[i],
- (void**)&f );
-
- total += f->z;
- free( f );
+ (void**)&foos );
- if( i % 1000 == 0 ) {
- printf( "+" );
+ for( j = 0; j < numCyclesPerThread; ++j ) {
+ total += foos[j].z;
}
+ free( foos );
+
+ printf( "+" );
}
printf( "\nTotal=%d\n", total );
--- /dev/null
+#include <stdlib.h>
+#include <stdio.h>
+
+#include <pthread.h>
+#include "memPool.h"
+#include "config-test.h"
+
+
+#define numThreads 24
+#define numCyclesPerThread 500000
+#define extraBytesInRecords 1000
+
+
+#ifdef CONFIG_POOLALLOC
+__thread static MemPool* pool;
+#endif
+
+
+struct bar {
+ int x;
+ char takeSpace[extraBytesInRecords];
+};
+
+struct baz {
+ int y;
+ char takeSpace[extraBytesInRecords];
+};
+
+struct foo {
+ struct bar* br;
+ struct baz* bz;
+ int z;
+ char takeSpace[extraBytesInRecords];
+};
+
+
+void* workerMain( void* arg ) {
+
+ INTPTR i = (INTPTR)arg;
+ int j;
+ struct bar* br;
+ struct baz* bz;
+ struct foo* foos = malloc( numCyclesPerThread*sizeof( struct foo ) );
+
+ for( j = 0; j < numCyclesPerThread; ++j ) {
+ br = poolalloc( barPool );
+ bz = poolalloc( bazPool );
+
+ br->x = i + j;
+ bz->y = -4321;
+
+ foos[j].br = br;
+ foos[j].bz = bz;
+ foos[j].z = foos[j].br->x + foos[j].bz->y;
+
+ poolfree( barPool, foos[j].br );
+ poolfree( bazPool, foos[j].bz );
+ }
+
+ pthread_exit( foos );
+}
+
+
+int main() {
+
+ INTPTR i;
+ int j;
+
+ struct foo* foos;
+
+ pthread_t threads[numThreads];
+ pthread_attr_t attr;
+
+ int total = 0;
+
+
+#ifdef CONFIG_POOLALLOC
+ pool = poolcreate( sizeof( struct bar ) );
+#endif
+
+
+ pthread_attr_init( &attr );
+ pthread_attr_setdetachstate( &attr,
+ PTHREAD_CREATE_JOINABLE );
+
+
+ for( i = 0; i < numThreads; ++i ) {
+
+ pthread_create( &(threads[i]),
+ &attr,
+ workerMain,
+ (void*)i );
+ printf( "." );
+ }
+
+ printf( "\n" );
+
+ for( i = 0; i < numThreads; ++i ) {
+
+ foos = NULL;
+ pthread_join( threads[i],
+ (void**)&foos );
+
+ for( j = 0; j < numCyclesPerThread; ++j ) {
+ total += foos[j].z;
+ }
+ free( foos );
+
+ printf( "+" );
+ }
+
+ printf( "\nTotal=%d\n", total );
+
+ return 0;
+}