-coreprof-enable cpe_taskstallmem
-USEOOO= -ooojava 24 2 -squeue -ooodebug-disable-task-mem-pool #-ooodebug
+USEOOO= -ooojava 24 2 -squeue -mempool-detect-misuse #-ooodebug-disable-task-mem-pool #-ooodebug
BSFLAGS= -64bit -mainclass $(PROGRAM) -heapsize-mb 50 -garbagestats -joptimize -noloop -debug -debug-deque # -optimize src-after-pp
DRELEASEMODE=-disjoint-release-mode -disjoint-dvisit-stack-callees-on-top -disjoint-alias-file aliases.txt tabbed
// don't bother if the task never has children (a leaf task)
output.println( "#ifndef OOO_DISABLE_TASKMEMPOOL" );
if( !fsen.getIsLeafSESE() ) {
- output.println(" runningSESE->taskRecordMemPool = taskpoolcreate( "+
- maxTaskRecSizeStr+" );");
+ output.println(" runningSESE->taskRecordMemPool = poolcreate( "+
+ maxTaskRecSizeStr+", freshTaskRecordInitializer );");
} else {
// make it clear we purposefully did not initialize this
output.println(" runningSESE->taskRecordMemPool = (MemPool*)0x7;");
) {
output.println(" "+
fsen.getSESErecordName()+"* seseToIssue = ("+
- fsen.getSESErecordName()+"*) taskpoolalloc( runningSESE->taskRecordMemPool );");
+ fsen.getSESErecordName()+"*) poolalloc( runningSESE->taskRecordMemPool );");
} else {
output.println(" "+
fsen.getSESErecordName()+"* seseToIssue = ("+
//////////////////////////////////////////////////////////
#include <stdlib.h>
+
+#ifdef MEMPOOL_DETECT_MISUSE
+#include <stdio.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+static INTPTR pageSize;
+#endif
+
#include "runtime.h"
#include "mem.h"
#include "mlp_lock.h"
#define CACHELINESIZE 64
+
typedef struct MemPoolItem_t {
void* next;
} MemPoolItem;
+
typedef struct MemPool_t {
int itemSize;
- MemPoolItem* head;
- // avoid cache line contention between producer/consumer...
- char buffer[CACHELINESIZE - sizeof(void*)];
+ // only invoke this on items that are
+ // actually new, saves time for reused
+ // items
+ void(*initFreshlyAllocated)(void*);
+#ifdef MEMPOOL_DETECT_MISUSE
+ int allocSize;
+#else
+ //normal version
+ MemPoolItem* head;
+ // avoid cache line contention between producer/consumer...
+ char buffer[CACHELINESIZE];
MemPoolItem* tail;
+#endif
} MemPool;
// the memory pool must always have at least one
// item in it
-static MemPool* poolcreate( int itemSize ) {
- MemPool* p = RUNMALLOC( sizeof( MemPool ) );
- p->itemSize = itemSize;
- p->head = RUNMALLOC( itemSize );
+static MemPool* poolcreate( int itemSize,
+ void(*initializer)(void*)
+ ) {
+
+ MemPool* p = RUNMALLOC( sizeof( MemPool ) );
+ p->itemSize = itemSize;
+
+ p->initFreshlyAllocated = initializer;
+
+#ifdef MEMPOOL_DETECT_MISUSE
+ // when detecting misuse, round the item size
+ // up to a page and add a page, so whatever
+ // allocated memory you get, you can use a
+ // page-aligned subset as the record
+ pageSize = sysconf( _SC_PAGESIZE );
+
+ if( itemSize % pageSize == 0 ) {
+ // if the item size is already an exact multiple
+ // of the page size, just increase by one page
+ p->allocSize = itemSize + pageSize;
+ } else {
+ // otherwise, round down to a page size, then add two
+ p->allocSize = (itemSize & ~(pageSize-1)) + 2*pageSize;
+ }
+#else
+
+ // normal version
+ p->head = RUNMALLOC( p->itemSize );
+
+ if( p->initFreshlyAllocated != NULL ) {
+ p->initFreshlyAllocated( p->head );
+ }
+
p->head->next = NULL;
p->tail = p->head;
+#endif
+
return p;
}
-// CAS
-// in: a ptr, expected old, desired new
-// return: actual old
-//
-// Pass in a ptr, what you expect the old value is and
-// what you want the new value to be.
-// The CAS returns what the value is actually: if it matches
-// your proposed old value then you assume the update was successful,
-// otherwise someone did CAS before you, so try again (the return
-// value is the old value you will pass next time.)
+
+#ifdef MEMPOOL_DETECT_MISUSE
+static inline void poolfreeinto( MemPool* p, void* ptr ) {
+ // don't actually return memory to the pool, just lock
+ // it up tight so first code to touch it badly gets caught
+ // also, mprotect automatically protects full pages
+ if( mprotect( ptr, p->itemSize, PROT_NONE ) != 0 ) {
+ printf( "mprotect failed, %s.\n", strerror( errno ) );
+ exit( -1 );
+ }
+}
+
+#else
+
+// normal version
static inline void poolfreeinto( MemPool* p, void* ptr ) {
MemPoolItem* tailCurrent;
// if CAS failed, retry entire operation
}
}
+#endif
+
+
+
+#ifdef MEMPOOL_DETECT_MISUSE
+static inline void* poolalloc( MemPool* p ) {
+ // put the memory we intend to expose to client
+ // on a page-aligned boundary, always return
+ // new memory
+ INTPTR nonAligned = (INTPTR) RUNMALLOC( p->allocSize );
+
+ void* newRec = (void*)((nonAligned + pageSize-1) & ~(pageSize-1));
+
+ if( p->initFreshlyAllocated != NULL ) {
+ p->initFreshlyAllocated( newRec );
+ }
+
+ return newRec;
+}
+
+#else
+
+// normal version
static inline void* poolalloc( MemPool* p ) {
// to protect CAS in poolfree from dereferencing
int i;
if(next == NULL) {
// only one item, so don't take from pool
- return (void*) RUNMALLOC( p->itemSize );
+ void* newRec = RUNMALLOC( p->itemSize );
+
+ if( p->initFreshlyAllocated != NULL ) {
+ p->initFreshlyAllocated( newRec );
+ }
+
+ return newRec;
}
p->head = next;
- //////////////////////////////////////////////////////////
- //
- //
- // static inline void prefetch(void *x)
- // {
- // asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x));
- // }
- //
- //
- // but this built-in gcc one seems the most portable:
- //////////////////////////////////////////////////////////
- //__builtin_prefetch( &(p->head->next) );
asm volatile( "prefetcht0 (%0)" :: "r" (next));
next=(MemPoolItem*)(((char *)next)+CACHELINESIZE);
asm volatile( "prefetcht0 (%0)" :: "r" (next));
return (void*)headCurrent;
}
+#endif
+
static void pooldestroy( MemPool* p ) {
+
+#ifndef MEMPOOL_DETECT_MISUSE
MemPoolItem* i = p->head;
MemPoolItem* n;
free( i );
i = n;
}
+#endif
free( p );
}
+// this is for using a memPool to allocate task records,
+// pass this into the poolcreate so it will run your
+// custom init code ONLY for fresh records, reused records
+// can be returned as is
+void freshTaskRecordInitializer( void* seseRecord ) {
+ SESEcommon* c = (SESEcommon*) seseRecord;
+ pthread_cond_init( &(c->runningChildrenCond), NULL );
+ pthread_mutex_init( &(c->lock), NULL );
+
+ // no need to use return value yet, future maybe
+ //return NULL;
+}
+
+
+
+
void* mlpAllocSESErecord( int size ) {
void* newrec = RUNMALLOC( size );
if( newrec == 0 ) {
queue->head = dummy;
queue->tail = dummy;
#ifndef OOO_DISABLE_TASKMEMPOOL
- queue->rentrypool = poolcreate(sizeof(REntry));
+ queue->rentrypool = poolcreate( sizeof(REntry), NULL );
#endif
return queue;
}
}
}
-static MemPool* taskpoolcreate( int itemSize ) {
- MemPool* p = RUNMALLOC( sizeof( MemPool ) );
- SESEcommon *c = (SESEcommon *) RUNMALLOC(itemSize);
- pthread_cond_init( &(c->runningChildrenCond), NULL );
- pthread_mutex_init( &(c->lock), NULL );
-
- p->itemSize = itemSize;
- p->head = (void *)c;
- p->head->next = NULL;
- p->tail = p->head;
- return p;
-}
-
-static inline void* taskpoolalloc( MemPool* p ) {
-
- // to protect CAS in poolfree from dereferencing
- // null, treat the queue as empty when there is
- // only one item. The dequeue operation is only
- // executed by the thread that owns the pool, so
- // it doesn't require an atomic op
- MemPoolItem* headCurrent = p->head;
- MemPoolItem* next=headCurrent->next;
- int i;
- if(next == NULL) {
- // only one item, so don't take from pool
- SESEcommon *c = (SESEcommon*) RUNMALLOC( p->itemSize );
- pthread_cond_init( &(c->runningChildrenCond), NULL );
- pthread_mutex_init( &(c->lock), NULL );
- return c;
- }
-
- p->head = next;
-
- //////////////////////////////////////////////////////////
- //
- //
- // static inline void prefetch(void *x)
- // {
- // asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x));
- // }
- //
- //
- // but this built-in gcc one seems the most portable:
- //////////////////////////////////////////////////////////
- //__builtin_prefetch( &(p->head->next) );
- asm volatile( "prefetcht0 (%0)" :: "r" (next));
- next=(MemPoolItem*)(((char *)next)+CACHELINESIZE);
- asm volatile( "prefetcht0 (%0)" :: "r" (next));
- next=(MemPoolItem*)(((char *)next)+CACHELINESIZE);
- asm volatile( "prefetcht0 (%0)" :: "r" (next));
- next=(MemPoolItem*)(((char *)next)+CACHELINESIZE);
- asm volatile( "prefetcht0 (%0)" :: "r" (next));
-
- return (void*)headCurrent;
-}
+// this is for using a memPool to allocate task records,
+// pass this into the poolcreate so it will run your
+// custom init code ONLY for fresh records, reused records
+// can be returned as is
+void freshTaskRecordInitializer( void* seseRecord );
+
#endif /* __MLP_RUNTIME__ */
#include "runtime.h"
#include "mem.h"
#include "mlp_lock.h"
-#include "memPool.h"
+
#define CACHELINESIZE 64
#define DQ_POP_EMPTY NULL
#define DQ_POP_ABORT NULL
+typedef struct sqMemPoolItem_t {
+ void* next;
+} sqMemPoolItem;
+
+
+typedef struct sqMemPool_t {
+ int itemSize;
+
+ sqMemPoolItem* head;
+
+ // avoid cache line contention between producer/consumer...
+ char buffer[CACHELINESIZE];
+
+ sqMemPoolItem* tail;
+
+} sqMemPool;
+
+
+
typedef struct dequeItem_t {
void *otherqueue;
struct dequeItem_t * next;
dequeItem* tail;
- MemPool objret;
+ sqMemPool objret;
} deque;
#define EXTRACTPTR(x) (x&0x0000ffffffffffff)
q->objret.tail=q->objret.head;
}
-static inline void tagpoolfreeinto( MemPool* p, void* ptr, void *realptr ) {
- MemPoolItem* tailCurrent;
- MemPoolItem* tailActual;
+static inline void tagpoolfreeinto( sqMemPool* p, void* ptr, void *realptr ) {
+ sqMemPoolItem* tailCurrent;
+ sqMemPoolItem* tailActual;
// set up the now unneeded record to as the tail of the
// free list by treating its first bytes as next pointer,
- MemPoolItem* tailNew = (MemPoolItem*) realptr;
+ sqMemPoolItem* tailNew = (sqMemPoolItem*) realptr;
tailNew->next = NULL;
while( 1 ) {
BARRIER();
tailCurrent = p->tail;
- tailActual = (MemPoolItem*)
+ tailActual = (sqMemPoolItem*)
CAS( &(p->tail), // ptr to set
(INTPTR) tailCurrent, // current tail's next should be NULL
(INTPTR) realptr); // try set to our new tail
if( tailActual == tailCurrent ) {
// success, update tail
- tailCurrent->next = (MemPoolItem *) ptr;
+ tailCurrent->next = (sqMemPoolItem *) ptr;
return;
}
}
}
-static inline void* tagpoolalloc( MemPool* p ) {
+static inline void* tagpoolalloc( sqMemPool* p ) {
// to protect CAS in poolfree from dereferencing
// null, treat the queue as empty when there is
// only one item. The dequeue operation is only
// executed by the thread that owns the pool, so
// it doesn't require an atomic op
- MemPoolItem* headCurrent = p->head;
- MemPoolItem* realHead=(MemPoolItem *) EXTRACTPTR((INTPTR)headCurrent);
- MemPoolItem* next=realHead->next;
+ sqMemPoolItem* headCurrent = p->head;
+ sqMemPoolItem* realHead=(sqMemPoolItem *) EXTRACTPTR((INTPTR)headCurrent);
+ sqMemPoolItem* next=realHead->next;
int i;
if(next == NULL) {
// only one item, so don't take from pool
// but this built-in gcc one seems the most portable:
//////////////////////////////////////////////////////////
//__builtin_prefetch( &(p->head->next) );
- MemPoolItem* realNext=(MemPoolItem *) EXTRACTPTR((INTPTR)next);
+ sqMemPoolItem* realNext=(sqMemPoolItem *) EXTRACTPTR((INTPTR)next);
asm volatile( "prefetcht0 (%0)" :: "r" (realNext));
- realNext=(MemPoolItem*)(((char *)realNext)+CACHELINESIZE);
+ realNext=(sqMemPoolItem*)(((char *)realNext)+CACHELINESIZE);
asm volatile( "prefetcht0 (%0)" :: "r" (realNext));
return (void*)headCurrent;
echo "-ooojava <numberofcores> <maxseseage>"
echo -ooodebug general OOOJava debugging messages
echo -ooodebug-disable-task-mem-pool this is a tricky module, disable for simpler runtime
+echo -mempool-detect-misuse turn on to find code misusing pool-allocated records
echo -rcr turn on runtime conflict resolver
echo -squeue use single queue
echo
then
EXTRAOPTIONS="$EXTRAOPTIONS -DOOO_DISABLE_TASKMEMPOOL"
+elif [[ $1 = '-mempool-detect-misuse' ]]
+then
+EXTRAOPTIONS="$EXTRAOPTIONS -DMEMPOOL_DETECT_MISUSE"
+
elif [[ $1 = '-heapsize-mb' ]]
then
EXTRAOPTIONS="$EXTRAOPTIONS -DINITIALHEAPSIZE_MB=($2)"