From faa55897fc05f23266446806400f76c84844103c Mon Sep 17 00:00:00 2001 From: jjenista Date: Thu, 16 Sep 2010 21:54:42 +0000 Subject: [PATCH] modularize core prof correctly, starting mem pool strat for task records --- Robust/src/Benchmarks/oooJava/sor/makefile | 2 +- Robust/src/IR/Flat/BuildCode.java | 112 +++++++++++++----- Robust/src/Runtime/memPool.h | 130 +++++++++++++++++++++ Robust/src/Runtime/mlp_lock.h | 7 ++ 4 files changed, 220 insertions(+), 31 deletions(-) create mode 100644 Robust/src/Runtime/memPool.h diff --git a/Robust/src/Benchmarks/oooJava/sor/makefile b/Robust/src/Benchmarks/oooJava/sor/makefile index d1de74d3..d944c444 100644 --- a/Robust/src/Benchmarks/oooJava/sor/makefile +++ b/Robust/src/Benchmarks/oooJava/sor/makefile @@ -5,7 +5,7 @@ SOURCE_FILES=JGFSORBenchSizeD.java BUILDSCRIPT=../../../buildscript USEOOO= -ooojava 24 2 -ooodebug -BSFLAGS= -64bit -mainclass $(PROGRAM) -heapsize-mb 1024 -garbagestats -debug -joptimize -noloop -optimize -coreprof -coreprof-eventwords 1024*1024*128 -coreprof-checkoverflow +BSFLAGS= -64bit -mainclass $(PROGRAM) -heapsize-mb 1024 -garbagestats -debug -joptimize -noloop -optimize -coreprof -coreprof-eventwords 1024*1024*128 #-coreprof-checkoverflow DISJOINT= -disjoint -disjoint-k 1 -enable-assertions #-disjoint-desire-determinism default: diff --git a/Robust/src/IR/Flat/BuildCode.java b/Robust/src/IR/Flat/BuildCode.java index 6f002a13..0e592230 100644 --- a/Robust/src/IR/Flat/BuildCode.java +++ b/Robust/src/IR/Flat/BuildCode.java @@ -78,6 +78,7 @@ public class BuildCode { PrefetchAnalysis pa; MLPAnalysis mlpa; OoOJavaAnalysis oooa; + String maxTaskRecSizeStr="__maxTaskRecSize___"; String mlperrstr = "if(status != 0) { "+ "sprintf(errmsg, \"MLP error at %s:%d\", __FILE__, __LINE__); "+ "perror(errmsg); exit(-1); }"; @@ -214,6 +215,11 @@ public class BuildCode { outmethodheader.println("#include "); outmethodheader.println("#include \"mlp_runtime.h\""); outmethodheader.println("#include \"psemaphore.h\""); + outmethodheader.println("#include \"memPool.h\""); + + // spit out a global to inform all worker threads with + // the maximum size is for any task record + outmethodheader.println("extern int "+maxTaskRecSizeStr+";"); } /* Output Structures */ @@ -252,7 +258,7 @@ public class BuildCode { outputTaskTypes(outtask); } - if( state.MLP || state.OOOJAVA) { + if( state.MLP || state.OOOJAVA) { // have to initialize some SESE compiler data before // analyzing normal methods, which must happen before // generating SESE internal code @@ -297,7 +303,7 @@ public class BuildCode { //get effect set Hashtable> effects=oooa.getDisjointAnalysis().getEffectsAnalysis().get(fsen); - rcr.traverseSESEBlock(fsen, effects, conflicts, rg); + //rcr.traverseSESEBlock(fsen, effects, conflicts, rg); } } } @@ -309,6 +315,10 @@ public class BuildCode { // Output function prototypes and structures for SESE's and code if( state.MLP || state.OOOJAVA ) { + // spit out a global to inform all worker threads with + // the maximum size is for any task record + outmethod.println("int "+maxTaskRecSizeStr+" = 0;"); + // used to differentiate, during code generation, whether we are // passing over SESE body code, or non-SESE code nonSESEpass = false; @@ -376,7 +386,25 @@ public class BuildCode { outmethod.println(" int i;"); if (state.MLP || state.OOOJAVA) { - //outmethod.println(" pthread_once( &mlpOnceObj, mlpInitOncePerThread );"); + + // do a calculation to determine which task record + // is the largest, store that as a global value for + // allocating records + Iterator seseit; + if(state.MLP){ + seseit=mlpa.getAllSESEs().iterator(); + }else{ + seseit=oooa.getAllSESEs().iterator(); + } + while(seseit.hasNext()){ + FlatSESEEnterNode fsen = seseit.next(); + outmethod.println("if( sizeof( "+fsen.getSESErecordName()+ + " ) > "+maxTaskRecSizeStr+ + " ) { "+maxTaskRecSizeStr+ + " = sizeof( "+fsen.getSESErecordName()+ + " ); }" ); + } + outmethod.println(" workScheduleInit( "+state.MLP_NUMCORES+", invokeSESEmethod );"); @@ -549,6 +577,11 @@ public class BuildCode { outmethod.println("#include \"methodheaders.h\""); outmethod.println("#include \"virtualtable.h\""); outmethod.println("#include \"runtime.h\""); + + // always include: compiler directives will leave out + // instrumentation when option is not set + outmethod.println("#include \"coreprof/coreprof.h\""); + if (state.SANDBOX) { outmethod.println("#include \"sandboxdefs.c\""); } @@ -582,9 +615,6 @@ public class BuildCode { outmethod.println("#include \"RuntimeConflictResolver.h\""); } } - if (state.COREPROF) { - outmethod.println("#include \"coreprof.h\""); - } //Store the sizes of classes & array elements generateSizeArray(outmethod); @@ -2380,8 +2410,9 @@ public class BuildCode { // initialize thread-local var to a non-zero, invalid address output.println(" seseCaller = (SESEcommon*) 0x2;"); - - output.println(" CP_LOGEVENT( CP_EVENTID_TASKEXECUTE, CP_EVENTTYPE_BEGIN );"); + if( state.COREPROF ) { + output.println(" CP_LOGEVENT( CP_EVENTID_TASKEXECUTE, CP_EVENTTYPE_BEGIN );"); + } HashSet exitset=new HashSet(); exitset.add(seseExit); @@ -2426,10 +2457,8 @@ public class BuildCode { (state.OOOJAVA && fsen.equals( oooa.getMainSESE() )) ) { outmethod.println( " /* work scheduler works forever, explicitly exit */"); - if (state.COREPROF) { - outmethod.println(" CP_EXIT();"); - outmethod.println(" CP_DUMP();"); - } + outmethod.println( " CP_EXIT();"); + outmethod.println( " CP_DUMP();"); outmethod.println( " workScheduleExit();"); outmethod.println( " exit( 0 );"); } @@ -2855,7 +2884,9 @@ public class BuildCode { output.println(" {"); output.println(" SESEcommon* common = (SESEcommon*) "+pair+";"); - //output.println(" CP_LOGEVENT( CP_EVENTID_TASKSTALLVAR, CP_EVENTTYPE_BEGIN );"); + if( state.COREPROF ) { + //output.println(" CP_LOGEVENT( CP_EVENTID_TASKSTALLVAR, CP_EVENTTYPE_BEGIN );"); + } output.println(" pthread_mutex_lock( &(common->lock) );"); output.println(" while( common->doneExecuting == FALSE ) {"); output.println(" pthread_cond_wait( &(common->doneCond), &(common->lock) );"); @@ -2878,8 +2909,9 @@ public class BuildCode { output.println(" "+generateTemp( fmContext, td, null )+ " = child->"+vst.getAddrVar().getSafeSymbol()+";"); } - - //output.println(" CP_LOGEVENT( CP_EVENTID_TASKSTALLVAR, CP_EVENTTYPE_END );"); + if( state.COREPROF ) { + //output.println(" CP_LOGEVENT( CP_EVENTID_TASKSTALLVAR, CP_EVENTTYPE_END );"); + } output.println(" }"); } @@ -2892,7 +2924,9 @@ public class BuildCode { // otherwise the dynamic write nodes will have the local var up-to-date output.println(" {"); output.println(" if( "+dynVar+"_srcSESE != NULL ) {"); - //output.println(" CP_LOGEVENT( CP_EVENTID_TASKSTALLVAR, CP_EVENTTYPE_BEGIN );"); + if( state.COREPROF ) { + //output.println(" CP_LOGEVENT( CP_EVENTID_TASKSTALLVAR, CP_EVENTTYPE_BEGIN );"); + } output.println(" SESEcommon* common = (SESEcommon*) "+dynVar+"_srcSESE;"); output.println(" psem_take( &(common->stallSem) );"); @@ -2916,7 +2950,9 @@ public class BuildCode { output.println(" "+generateTemp( fmContext, dynVar, null )+ " = *(("+typeStr+"*) ("+ dynVar+"_srcSESE + "+dynVar+"_srcOffset));"); - //output.println(" CP_LOGEVENT( CP_EVENTID_TASKSTALLVAR, CP_EVENTTYPE_END );"); + if( state.COREPROF ) { + //output.println(" CP_LOGEVENT( CP_EVENTID_TASKSTALLVAR, CP_EVENTTYPE_END );"); + } output.println(" }"); output.println(" }"); } @@ -2984,9 +3020,13 @@ public class BuildCode { output.println(" rentry->queue=seseCaller->memoryQueueArray["+ waitingElement.getQueueID()+ "];"); output.println(" if(ADDRENTRY(seseCaller->memoryQueueArray["+ waitingElement.getQueueID() + "],rentry)==NOTREADY){"); - //output.println(" CP_LOGEVENT( CP_EVENTID_TASKSTALLMEM, CP_EVENTTYPE_BEGIN );"); + if( state.COREPROF ) { + //output.println(" CP_LOGEVENT( CP_EVENTID_TASKSTALLMEM, CP_EVENTTYPE_BEGIN );"); + } output.println(" psem_take( &(rentry->parentStallSem) );"); - //output.println(" CP_LOGEVENT( CP_EVENTID_TASKSTALLMEM, CP_EVENTTYPE_END );"); + if( state.COREPROF ) { + //output.println(" CP_LOGEVENT( CP_EVENTID_TASKSTALLMEM, CP_EVENTTYPE_END );"); + } output.println(" } "); if(state.RCR && rcr != null) { @@ -3032,9 +3072,13 @@ public class BuildCode { output .println(" if(ADDRENTRY(seseCaller->memoryQueueArray["+ waitingElement.getQueueID() + "],rentry)==NOTREADY){"); - //output.println(" CP_LOGEVENT( CP_EVENTID_TASKSTALLMEM, CP_EVENTTYPE_BEGIN );"); + if( state.COREPROF ) { + //output.println(" CP_LOGEVENT( CP_EVENTID_TASKSTALLMEM, CP_EVENTTYPE_BEGIN );"); + } output.println(" psem_take( &(rentry->parentStallSem) );"); - //output.println(" CP_LOGEVENT( CP_EVENTID_TASKSTALLMEM, CP_EVENTTYPE_END );"); + if( state.COREPROF ) { + //output.println(" CP_LOGEVENT( CP_EVENTID_TASKSTALLMEM, CP_EVENTTYPE_END );"); + } output.println(" } "); } output.println(" }"); @@ -3573,8 +3617,9 @@ public class BuildCode { output.println(" {"); - - output.println("CP_LOGEVENT( CP_EVENTID_TASKDISPATCH, CP_EVENTTYPE_BEGIN );"); + if( state.COREPROF ) { + output.println("CP_LOGEVENT( CP_EVENTID_TASKDISPATCH, CP_EVENTTYPE_BEGIN );"); + } // set up the parent if( (state.MLP && fsen == mlpa.getMainSESE()) || @@ -4078,8 +4123,9 @@ public class BuildCode { // eventually, for it to mark itself finished // output.println(" pthread_mutex_unlock( &(seseToIssue->common.lock) );"); - - output.println("CP_LOGEVENT( CP_EVENTID_TASKDISPATCH, CP_EVENTTYPE_END );"); + if( state.COREPROF ) { + output.println("CP_LOGEVENT( CP_EVENTID_TASKDISPATCH, CP_EVENTTYPE_END );"); + } output.println(" }"); @@ -4111,11 +4157,16 @@ public class BuildCode { if( fsen.getIsCallerSESEplaceholder() ) { return; } - - output.println(" CP_LOGEVENT( CP_EVENTID_TASKEXECUTE, CP_EVENTTYPE_END );"); + + if( state.COREPROF ) { + output.println(" CP_LOGEVENT( CP_EVENTID_TASKEXECUTE, CP_EVENTTYPE_END );"); + } output.println(" /* SESE exiting */"); - output.println(" CP_LOGEVENT( CP_EVENTID_TASKRETIRE, CP_EVENTTYPE_BEGIN );"); + + if( state.COREPROF ) { + output.println(" CP_LOGEVENT( CP_EVENTID_TASKRETIRE, CP_EVENTTYPE_BEGIN );"); + } String com = paramsprefix+"->common"; @@ -4252,8 +4303,9 @@ public class BuildCode { // calls to a non-zero, invalid address output.println(" seseCaller = (SESEcommon*) 0x1;"); - - output.println(" CP_LOGEVENT( CP_EVENTID_TASKRETIRE, CP_EVENTTYPE_END );"); + if( state.COREPROF ) { + output.println(" CP_LOGEVENT( CP_EVENTID_TASKRETIRE, CP_EVENTTYPE_END );"); + } } public void generateFlatWriteDynamicVarNode( FlatMethod fm, diff --git a/Robust/src/Runtime/memPool.h b/Robust/src/Runtime/memPool.h new file mode 100644 index 00000000..ffd665c1 --- /dev/null +++ b/Robust/src/Runtime/memPool.h @@ -0,0 +1,130 @@ +#ifndef ___MEMPOOL_H__ +#define ___MEMPOOL_H__ + +////////////////////////////////////////////////////////// +// +// A memory pool implements POOLCREATE, POOLALLOC and +// POOLFREE to improve memory allocation by reusing records. +// +// This implementation uses a lock-free singly-linked list +// to store reusable records. The list is initialized with +// one valid record, and the list is considered empty when +// it has only one record; this allows the enqueue operation's +// CAS to assume tail can always be dereferenced. +// +// poolfree adds newly freed records to the list BACK +// +// poolalloc either takes records from FRONT or mallocs +// +////////////////////////////////////////////////////////// + +#include +#include "mlp_lock.h" + + +// The cache line size is set for the AMD Opteron 6168 (dc-10) +// that has L1 and L2 cache line sizes of 64 bytes. Source: +// http://www.cs.virginia.edu/~skadron/cs451/opteron/opteron.ppt +#define CACHELINESIZE 64 + + +typedef struct MemPoolItem_t { + void* next; +} MemPoolItem; + + +typedef struct MemPool_t { + int itemSize; + MemPoolItem* head; + + // avoid cache line contention between producer/consumer... + char buffer[CACHELINESIZE - sizeof(void*)]; + + MemPoolItem* tail; +} MemPool; + + +// the memory pool must always have at least one +// item in it +static MemPool* poolcreate( int itemSize ) { + MemPool* p = malloc( sizeof( MemPool ) ); + p->itemSize = itemSize; + p->head = malloc( itemSize ); + p->head->next = NULL; + p->tail = p->head; +} + + +// CAS +// in: a ptr, expected old, desired new +// return: actual old +// +// Pass in a ptr, what you expect the old value is and +// what you want the new value to be. +// The CAS returns what the value is actually: if it matches +// your proposed old value then you assume the update was successful, +// otherwise someone did CAS before you, so try again (the return +// value is the old value you will pass next time.) + +static inline void poolfree( MemPool* p, void* ptr ) { + + MemPoolItem* tailCurrent; + MemPoolItem* tailActual; + + // set up the now unneeded record to as the tail of the + // free list by treating its first bytes as next pointer, + MemPoolItem* tailNew = (MemPoolItem*) ptr; + tailNew->next = NULL; + + while( 1 ) { + // make sure the null happens before the insertion, + // also makes sure that we reload tailCurrent, etc.. + BARRIER(); + + tailCurrent = p->tail; + tailActual = (MemPoolItem*) + CAS( &(p->tail), // ptr to set + (long) tailCurrent, // current tail's next should be NULL + (long) tailNew // try set to our new tail + ); + if( tailActual == tailCurrent ) { + // success, update tail + tailCurrent->next = tailNew; + return; + } + + // if CAS failed, retry entire operation + } +} + + +static inline void* poolalloc( MemPool* p ) { + + // to protect CAS in poolfree from dereferencing + // null, treat the queue as empty when there is + // only one item. The dequeue operation is only + // executed by the thread that owns the pool, so + // it doesn't require an atomic op + MemPoolItem* headCurrent = p->head; + + if( headCurrent->next == NULL ) { + // only one item, so don't take from pool + return calloc( 1, p->itemSize ); + } + + p->head = headCurrent->next; + return headCurrent; +} + + +#endif // ___MEMPOOL_H__ + + + + + + + + + + diff --git a/Robust/src/Runtime/mlp_lock.h b/Robust/src/Runtime/mlp_lock.h index b1bea4ba..0550a9c3 100644 --- a/Robust/src/Runtime/mlp_lock.h +++ b/Robust/src/Runtime/mlp_lock.h @@ -1,3 +1,7 @@ +#ifndef ____MLP_LOCK_H__ +#define ____MLP_LOCK_H__ + + #include "runtime.h" #include @@ -94,3 +98,6 @@ static inline int BARRIER(){ CFENCE; return 1; } + + +#endif // ____MLP_LOCK_H__ -- 2.34.1