output.println(" ");
output.println(" // code of this task's body should use this to access the running task record");
output.println(" runningSESE = &(___params___->common);");
+ output.println(" childSESE = 0;");
output.println(" ");
// setup memory queue
// don't bother if the task never has children (a leaf task)
output.println( "#ifndef OOO_DISABLE_TASKMEMPOOL" );
if( !fsen.getIsLeafSESE() ) {
- output.println(" runningSESE->taskRecordMemPool = poolcreate( "+
+ output.println(" runningSESE->taskRecordMemPool = taskpoolcreate( "+
maxTaskRecSizeStr+" );");
} else {
// make it clear we purposefully did not initialize this
// before doing anything, lock your own record and increment the running children
if( (state.MLP && fsen != mlpa.getMainSESE()) ||
(state.OOOJAVA && fsen != oooa.getMainSESE())
- ) {
- output.println(" atomic_inc(&(runningSESE->numRunningChildren));");
+ ) {
+ output.println(" childSESE++;");
}
// allocate the space for this record
) {
output.println(" "+
fsen.getSESErecordName()+"* seseToIssue = ("+
- fsen.getSESErecordName()+"*) poolalloc( runningSESE->taskRecordMemPool );");
+ fsen.getSESErecordName()+"*) taskpoolalloc( runningSESE->taskRecordMemPool );");
} else {
output.println(" "+
fsen.getSESErecordName()+"* seseToIssue = ("+
// fill in common data
output.println(" int localCount=0;");
output.println(" seseToIssue->common.classID = "+fsen.getIdentifier()+";");
- output.println(" seseToIssue->common.parentsStallSem = NULL;");
- output.println(" seseToIssue->common.forwardList = createQueue();");
output.println(" seseToIssue->common.unresolvedDependencies = 10000;");
+ output.println(" seseToIssue->common.parentsStallSem = NULL;");
+ output.println(" initQueue(&seseToIssue->common.forwardList);");
output.println(" seseToIssue->common.doneExecuting = FALSE;");
- output.println(" pthread_cond_init( &(seseToIssue->common.runningChildrenCond), NULL );");
output.println(" seseToIssue->common.numRunningChildren = 0;");
+ output.println( "#ifdef OOO_DISABLE_TASKMEMPOOL" );
+ output.println(" pthread_cond_init( &(seseToIssue->common.runningChildrenCond), NULL );");
+ output.println("#endif");
output.println(" seseToIssue->common.parent = runningSESE;");
// start with refCount = 2, one being the count that the child itself
// will decrement when it retires, to say it is done using its own
// before potentially adding this SESE to other forwarding lists,
// create it's lock
+ output.println( "#ifdef OOO_DISABLE_TASKMEMPOOL" );
output.println(" pthread_mutex_init( &(seseToIssue->common.lock), NULL );");
-
+ output.println("#endif");
if( (state.MLP && fsen != mlpa.getMainSESE()) ||
(state.OOOJAVA && fsen != oooa.getMainSESE())
output.println(" pthread_mutex_lock( &(src->lock) );");
// FORWARD TODO
output.println(" if( !src->doneExecuting ) {");
- output.println(" addNewItem( src->forwardList, seseToIssue );");
+ output.println(" addNewItem( &src->forwardList, seseToIssue );");
output.println(" ++(localCount);");
output.println(" }");
output.println("#ifndef OOO_DISABLE_TASKMEMPOOL" );
// FORWARD TODO
- output.println(" if( isEmpty( src->forwardList ) ||");
- output.println(" seseToIssue != peekItem( src->forwardList ) ) {");
+ output.println(" if( isEmpty( &src->forwardList ) ||");
+ output.println(" seseToIssue != peekItem( &src->forwardList ) ) {");
output.println(" if( !src->doneExecuting ) {");
- output.println(" addNewItem( src->forwardList, seseToIssue );");
+ output.println(" addNewItem( &src->forwardList, seseToIssue );");
output.println(" ++(localCount);");
output.println(" }");
output.println(" }");
// this SESE cannot be done until all of its children are done
// so grab your own lock with the condition variable for watching
// that the number of your running children is greater than zero
+ output.println(" atomic_add(childSESE, &runningSESE->numRunningChildren);");
output.println(" pthread_mutex_lock( &(runningSESE->lock) );");
output.println(" if( runningSESE->numRunningChildren > 0 ) {");
output.println(" stopforgc( (struct garbagelist *)&___locals___ );");
// decrement dependency count for all SESE's on your forwarding list
// FORWARD TODO
- output.println(" while( !isEmpty( runningSESE->forwardList ) ) {");
- output.println(" SESEcommon* consumer = (SESEcommon*) getItem( runningSESE->forwardList );");
+ output.println(" while( !isEmpty( &runningSESE->forwardList ) ) {");
+ output.println(" SESEcommon* consumer = (SESEcommon*) getItem( &runningSESE->forwardList );");
output.println(" if(consumer->rentryIdx>0){");
return queue;
}
+void initQueue(struct Queue * q) {
+ q->head=NULL;
+ q->tail=NULL;
+}
+
void freeQueue(struct Queue * q) {
RUNFREE(q);
}
#define isEmpty(x) ((x)->head==NULL)
+void initQueue(struct Queue *);
struct Queue * createQueue();
void freeQueue(struct Queue * q);
gl=gl->next;
}
// iterate forwarding list of seseRec
- struct Queue* fList=seseRec->forwardList;
+ struct Queue* fList=&seseRec->forwardList;
updateForwardList(fList,prevUpdate);
fqItem=getNextQueueItem(fqItem);
}
#include "runtime.h"
#include "mem.h"
#include "mlp_lock.h"
+
+
#define CACHELINESIZE 64
void* next;
} MemPoolItem;
-
typedef struct MemPool_t {
int itemSize;
MemPoolItem* head;
}
}
-
-
static inline void* poolalloc( MemPool* p ) {
// to protect CAS in poolfree from dereferencing
return c;
}
+
+static inline void atomic_add(int i, volatile int *v) {
+ __asm__ __volatile__ (LOCK_PREFIX "addl %1,%0"
+ : "+m" (*v)
+ : "ir" (i));
+}
+
static inline int LOCKXCHG32(volatile int* ptr, int val){
int retval;
//note: xchgl always implies lock
__thread SESEcommon* runningSESE;
+__thread int childSESE=0;
__thread psemaphore runningSESEstallSem;
#define OBJPTRPTR_2_OBJTYPE( opp ) ((int*)(opp))[0]
#define OBJPTRPTR_2_OBJOID( opp ) ((int*)(opp))[1]
-
-
// forwarding list elements is a linked
// structure of arrays, should help task
// dispatch because the first element is
INTPTR items[FLIST_ITEMS_PER_ELEMENT];
} ForwardingListElement;
-
+struct MemPool_t;
// these fields are common to any SESE, and casting the
// generated SESE record to this can be used, because
// IMPORTANT: the class ID must be the first field of
// the task record so task dispatch works correctly!
int classID;
-
+ volatile int unresolvedDependencies;
// a parent waits on this semaphore when stalling on
// this child, the child gives it at its SESE exit
psemaphore* parentsStallSem;
- // the lock guards the following data SESE's
- // use to coordinate with one another
- pthread_mutex_t lock;
-
// NOTE: first element is embedded in the task
// record, so don't free it!
//ForwardingListElement forwardList;
- struct Queue* forwardList;
-
- volatile int unresolvedDependencies;
+ struct Queue forwardList;
- pthread_cond_t doneCond;
volatile int doneExecuting;
-
- pthread_cond_t runningChildrenCond;
- int numRunningChildren;
+ volatile int numRunningChildren;
struct SESEcommon_t* parent;
int numMemoryQueue;
int rentryIdx;
int unresolvedRentryIdx;
+ volatile int refCount;
+ int numDependentSESErecords;
+ int offsetToDepSESErecords;
+ struct MemPool_t * taskRecordMemPool;
+
struct MemoryQueue_t** memoryQueueArray;
struct REntry_t* rentryArray[NUMRENTRY];
struct REntry_t* unresolvedRentryArray[NUMRENTRY];
- int numDependentSESErecords;
- int offsetToDepSESErecords;
+
#ifdef RCR
int offsetToParamRecords;
volatile int rcrstatus;
volatile int retired;
#endif
- // for determining when task records can be returned
- // to the parent record's memory pool
- MemPool* taskRecordMemPool;
- volatile int refCount;
+ // the lock guards the following data SESE's
+ // use to coordinate with one another
+ pthread_mutex_t lock;
+ pthread_cond_t runningChildrenCond;
} SESEcommon;
// a thread-local var refers to the currently
// running task
extern __thread SESEcommon* runningSESE;
+extern __thread int childSESE;
// there only needs to be one stall semaphore
// per thread, just give a reference to it to
}
}
+static MemPool* taskpoolcreate( int itemSize ) {
+ MemPool* p = calloc( 1, sizeof( MemPool ) );
+ SESEcommon *c = (SESEcommon *) p;
+ pthread_cond_init( &(c->runningChildrenCond), NULL );
+ pthread_mutex_init( &(c->lock), NULL );
+
+ p->itemSize = itemSize;
+ p->head = calloc( 1, itemSize );
+ p->head->next = NULL;
+ p->tail = p->head;
+ return p;
+}
+
+
+static inline void* taskpoolalloc( MemPool* p ) {
+
+ // to protect CAS in poolfree from dereferencing
+ // null, treat the queue as empty when there is
+ // only one item. The dequeue operation is only
+ // executed by the thread that owns the pool, so
+ // it doesn't require an atomic op
+ MemPoolItem* headCurrent = p->head;
+ MemPoolItem* next=headCurrent->next;
+ int i;
+ if(next == NULL) {
+ // only one item, so don't take from pool
+ SESEcommon *c = (SESEcommon*) RUNMALLOC( p->itemSize );
+ pthread_cond_init( &(c->runningChildrenCond), NULL );
+ pthread_mutex_init( &(c->lock), NULL );
+ return c;
+ }
+
+ p->head = next;
+
+ //////////////////////////////////////////////////////////
+ //
+ //
+ // static inline void prefetch(void *x)
+ // {
+ // asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x));
+ // }
+ //
+ //
+ // but this built-in gcc one seems the most portable:
+ //////////////////////////////////////////////////////////
+ //__builtin_prefetch( &(p->head->next) );
+ asm volatile( "prefetcht0 (%0)" :: "r" (next));
+ next=(MemPoolItem*)(((char *)next)+CACHELINESIZE);
+ asm volatile( "prefetcht0 (%0)" :: "r" (next));
+ next=(MemPoolItem*)(((char *)next)+CACHELINESIZE);
+ asm volatile( "prefetcht0 (%0)" :: "r" (next));
+ next=(MemPoolItem*)(((char *)next)+CACHELINESIZE);
+ asm volatile( "prefetcht0 (%0)" :: "r" (next));
+
+ return (void*)headCurrent;
+}
#endif /* __MLP_RUNTIME__ */
void workScheduleSubmit( void* workUnit ) {
if( myWorkerID == workerID_NOTAWORKER ) {
+ CP_LOGEVENT( CP_EVENTID_DEBUG_A, CP_EVENTTYPE_BEGIN );
dqPushBottom( &(deques[0]), workUnit );
+ CP_LOGEVENT( CP_EVENTID_DEBUG_A, CP_EVENTTYPE_END );
return;
}
+ CP_LOGEVENT( CP_EVENTID_DEBUG_A, CP_EVENTTYPE_BEGIN );
dqPushBottom( &(deques[myWorkerID]), workUnit );
+ CP_LOGEVENT( CP_EVENTID_DEBUG_A, CP_EVENTTYPE_END );
}