From 14a9a9160ea9ad6d61f139b8b34d7e75cc80e689 Mon Sep 17 00:00:00 2001 From: jzhou Date: Wed, 13 May 2009 23:14:44 +0000 Subject: [PATCH] Updates to support multicore version runtime of Tilera. Also fix a bug in BuildCodeMulticore.java: it faulty omitted codes which repeatedly transfer objects to the same core. In current scheduling, to send 4 objs to 2 cores, it could be specified as an array [0,0,1,1] indicating the target cores for each object. Originaly the array was generated as [0,0,1,null] which caused missing of objects. --- Robust/src/IR/Flat/BuildCodeMultiCore.java | 26 +++++--- Robust/src/Runtime/mem.c | 11 +++- Robust/src/Runtime/mem.h | 3 +- Robust/src/Runtime/multicoreruntime.h | 22 ++++--- Robust/src/Runtime/multicoretask.c | 77 +++++++++++++++++++++- 5 files changed, 115 insertions(+), 24 deletions(-) diff --git a/Robust/src/IR/Flat/BuildCodeMultiCore.java b/Robust/src/IR/Flat/BuildCodeMultiCore.java index c6dabfc5..d058c95f 100644 --- a/Robust/src/IR/Flat/BuildCodeMultiCore.java +++ b/Robust/src/IR/Flat/BuildCodeMultiCore.java @@ -1048,7 +1048,7 @@ public class BuildCodeMultiCore extends BuildCode { output.println("}"); } - Vector sendto = new Vector(); + //Vector sendto = new Vector(); Queue queue = null; if(targetCoreTbl != null) { queue = targetCoreTbl.get(tmpFState); @@ -1106,7 +1106,9 @@ public class BuildCodeMultiCore extends BuildCode { } else { tmpinfo.fs = tmpFState; } - if(!contains(sendto, tmpinfo)) { + // fixed 05/12/09, it's very likely to repeatedly send an object to the same core + // as sheduled + //if(!contains(sendto, tmpinfo)) { qinfo = outputtransqueues(tmpinfo.fs, targetcore, output); output.println("tmpObjInfo = RUNMALLOC(sizeof(struct transObjInfo));"); output.println("tmpObjInfo->objptr = (void *)" + tmpinfo.name + ";"); @@ -1114,8 +1116,8 @@ public class BuildCodeMultiCore extends BuildCode { output.println("tmpObjInfo->queues = " + qinfo.qname + ";"); output.println("tmpObjInfo->length = " + qinfo.length + ";"); output.println("addNewItem(totransobjqueue, (void*)tmpObjInfo);"); - sendto.add(tmpinfo); - } + //sendto.add(tmpinfo); + //} output.println("}"); } output.println("break;"); @@ -1142,7 +1144,9 @@ public class BuildCodeMultiCore extends BuildCode { } else { tmpinfo.fs = tmpFState; } - if(!contains(sendto, tmpinfo)) { + // fixed 05/12/09, it's very likely to repeatedly send an object to the same core + // as sheduled + //if(!contains(sendto, tmpinfo)) { qinfo = outputtransqueues(tmpinfo.fs, targetcore, output); output.println("tmpObjInfo = RUNMALLOC(sizeof(struct transObjInfo));"); output.println("tmpObjInfo->objptr = (void *)" + tmpinfo.name + ";"); @@ -1150,8 +1154,8 @@ public class BuildCodeMultiCore extends BuildCode { output.println("tmpObjInfo->queues = " + qinfo.qname + ";"); output.println("tmpObjInfo->length = " + qinfo.length + ";"); output.println("addNewItem(totransobjqueue, (void*)tmpObjInfo);"); - sendto.add(tmpinfo); - } + //sendto.add(tmpinfo); + //} output.println("}"); } output.println("/* increase index*/"); @@ -1196,7 +1200,9 @@ public class BuildCodeMultiCore extends BuildCode { } else { tmpinfo.fs = tmpFState; } - if(!contains(sendto, tmpinfo)) { + // fixed 05/12/09, it's very likely to repeatedly send an object to the same core + // as sheduled + //if(!contains(sendto, tmpinfo)) { qinfo = outputtransqueues(tmpinfo.fs, targetcores.elementAt(i), output); output.println("tmpObjInfo = RUNMALLOC(sizeof(struct transObjInfo));"); output.println("tmpObjInfo->objptr = (void *)" + tmpinfo.name + ";"); @@ -1204,8 +1210,8 @@ public class BuildCodeMultiCore extends BuildCode { output.println("tmpObjInfo->queues = " + qinfo.qname + ";"); output.println("tmpObjInfo->length = " + qinfo.length + ";"); output.println("addNewItem(totransobjqueue, (void*)tmpObjInfo);"); - sendto.add(tmpinfo); - } + //sendto.add(tmpinfo); + //} output.println("}"); //} } diff --git a/Robust/src/Runtime/mem.c b/Robust/src/Runtime/mem.c index 5ef956b0..04c8115a 100644 --- a/Robust/src/Runtime/mem.c +++ b/Robust/src/Runtime/mem.c @@ -14,6 +14,15 @@ }*/ void * mycalloc(int m, int size) { + void * p = NULL; + int isize = 2*BAMBOO_CACHE_LINE_SIZE-4+(size-1)&(~BAMBOO_CACHE_LINE_MASK); + BAMBOO_START_CRITICAL_SECTION_MEM(); + p = BAMBOO_LOCAL_MEM_CALLOC(m, isize); // calloc(m, isize); + BAMBOO_CLOSE_CRITICAL_SECTION_MEM(); + return (void *)(BAMBOO_CACHE_LINE_SIZE+((int)p-1)&(~BAMBOO_CACHE_LINE_MASK)); +} + +void * mycalloc_share(int m, int size) { void * p = NULL; int isize = 2*BAMBOO_CACHE_LINE_SIZE-4+(size-1)&(~BAMBOO_CACHE_LINE_MASK); BAMBOO_START_CRITICAL_SECTION_MEM(); @@ -25,7 +34,7 @@ void * mycalloc(int m, int size) { void * mycalloc_i(int m, int size) { void * p = NULL; int isize = 2*BAMBOO_CACHE_LINE_SIZE-4+(size-1)&(~BAMBOO_CACHE_LINE_MASK); - p = BAMBOO_SHARE_MEM_CALLOC(m, isize); // calloc(m, isize); + p = BAMBOO_LOCAL_MEM_CALLOC(m, isize); // calloc(m, isize); return (void *)(BAMBOO_CACHE_LINE_SIZE+((int)p-1)&(~BAMBOO_CACHE_LINE_MASK)); } diff --git a/Robust/src/Runtime/mem.h b/Robust/src/Runtime/mem.h index 954019ee..8e033a78 100644 --- a/Robust/src/Runtime/mem.h +++ b/Robust/src/Runtime/mem.h @@ -24,9 +24,10 @@ //#define PTR(x) (x) #else void * mycalloc(int m, int size); +void * mycalloc_share(int m, int size); void * mycalloc_i(int m, int size); void myfree(void * ptr); -#define FREEMALLOC(x) mycalloc(1,x) +#define FREEMALLOC(x) mycalloc_share(1,x) #define RUNMALLOC(x) mycalloc(1,x) // handle interruption inside #define RUNMALLOC_I(x) mycalloc_i(1,x) // with interruption blocked beforehand #define RUNFREE(x) myfree(x); diff --git a/Robust/src/Runtime/multicoreruntime.h b/Robust/src/Runtime/multicoreruntime.h index 164a1b8d..e6bf0c63 100644 --- a/Robust/src/Runtime/multicoreruntime.h +++ b/Robust/src/Runtime/multicoreruntime.h @@ -95,16 +95,16 @@ inline void initCommunication(void) __attribute__((always_inline)); inline void fakeExecution(void) __attribute__((always_inline)); inline void terminate(void) __attribute__((always_inline)); -inline void send_msg_1(int targetcore, int n0) __attribute__((always_inline)); -inline void send_msg_2(int targetcore, int n0, int n1) __attribute__((always_inline)); -inline void send_msg_3(int targetcore, int n0, int n1, int n2) __attribute__((always_inline)); -inline void send_msg_4(int targetcore, int n0, int n1, int n2, int n3) __attribute__((always_inline)); -inline void send_msg_5(int targetcore, int n0, int n1, int n2, int n3, int n4) __attribute__((always_inline)); -inline void send_msg_6(int targetcore, int n0, int n1, int n2, int n3, int n4, int n5) __attribute__((always_inline)); -inline void cache_msg_2(int targetcore, int n0, int n1) __attribute__((always_inline)); -inline void cache_msg_3(int targetcore, int n0, int n1, int n2) __attribute__((always_inline)); -inline void cache_msg_4(int targetcore, int n0, int n1, int n2, int n3) __attribute__((always_inline)); -inline void cache_msg_6(int targetcore, int n0, int n1, int n2, int n3, int n4, int n5) __attribute__((always_inline)); +inline void send_msg_1(int targetcore, unsigned long n0) __attribute__((always_inline)); +inline void send_msg_2(int targetcore, unsigned long n0, unsigned long n1) __attribute__((always_inline)); +inline void send_msg_3(int targetcore, unsigned long n0, unsigned long n1, unsigned long n2) __attribute__((always_inline)); +inline void send_msg_4(int targetcore, unsigned long n0, unsigned long n1, unsigned long n2, unsigned long n3) __attribute__((always_inline)); +inline void send_msg_5(int targetcore, unsigned long n0, unsigned long n1, unsigned long n2, unsigned long n3, unsigned long n4) __attribute__((always_inline)); +inline void send_msg_6(int targetcore, unsigned long n0, unsigned long n1, unsigned long n2, unsigned long n3, unsigned long n4, unsigned long n5) __attribute__((always_inline)); +inline void cache_msg_2(int targetcore, unsigned long n0, unsigned long n1) __attribute__((always_inline)); +inline void cache_msg_3(int targetcore, unsigned long n0, unsigned long n1, unsigned long n2) __attribute__((always_inline)); +inline void cache_msg_4(int targetcore, unsigned long n0, unsigned long n1, unsigned long n2, unsigned long n3) __attribute__((always_inline)); +inline void cache_msg_6(int targetcore, unsigned long n0, unsigned long n1, unsigned long n2, unsigned long n3, unsigned long n4, unsigned long n5) __attribute__((always_inline)); inline void transferObject(struct transObjInfo * transObj); inline int receiveMsg(void) __attribute__((always_inline)); @@ -122,6 +122,8 @@ inline void profileTaskEnd(void) __attribute__((always_inline)); // BAMBOO_GET_NUM_OF_CORE(): compute the # of current residing core // // BAMBOO_DEBUGPRINT(x): print out integer x // // BAMBOO_DEBUGPRINT_REG(x): print out value of variable x // +// BAMBOO_LOCAL_MEM_CALLOC(x, y): allocate an array of x elements each of whose // +// size in bytes is y on local memory // // BAMBOO_SHARE_MEM_CALLOC(x, y): allocate an array of x elements each of whose // // size in bytes is y on shared memory // // BAMBOO_START_CRITICAL_SECTION_OBJ_QUEUE() // diff --git a/Robust/src/Runtime/multicoretask.c b/Robust/src/Runtime/multicoretask.c index 93a16e92..b47a6203 100644 --- a/Robust/src/Runtime/multicoretask.c +++ b/Robust/src/Runtime/multicoretask.c @@ -214,9 +214,12 @@ inline void run(void * arg) { while(!isEmpty(&objqueue)) { void * obj = NULL; BAMBOO_START_CRITICAL_SECTION_OBJ_QUEUE(); +#ifdef DEBUG + BAMBOO_DEBUGPRINT(0xf001); +#endif #ifdef PROFILE //isInterrupt = false; -#endif +#endif #ifdef DEBUG BAMBOO_DEBUGPRINT(0xeee1); #endif @@ -260,8 +263,14 @@ inline void run(void * arg) { #ifdef DEBUG BAMBOO_DEBUGPRINT_REG(taskindex); BAMBOO_DEBUGPRINT_REG(paramindex); + struct ___Object___ * tmpptr = (struct ___Object___ *)obj; + tprintf("Process %x(%d): receive obj %x(%lld), ptrflag %x\n", corenum, corenum, (int)obj, (long)obj, tmpptr->flag); #endif + enqueueObject_I(obj, queues, 1); +#ifdef DEBUG + BAMBOO_DEBUGPRINT_REG(hashsize(activetasks)); +#endif } removeItem(&objqueue, objitem); releasewritelock_I(obj); @@ -277,9 +286,15 @@ inline void run(void * arg) { //isInterrupt = true; #endif BAMBOO_CLOSE_CRITICAL_SECTION_OBJ_QUEUE(); +#ifdef DEBUG + BAMBOO_DEBUGPRINT(0xf000); +#endif break; } BAMBOO_CLOSE_CRITICAL_SECTION_OBJ_QUEUE(); +#ifdef DEBUG + BAMBOO_DEBUGPRINT(0xf000); +#endif } #ifdef PROFILE if(isChecking) { @@ -304,8 +319,12 @@ inline void run(void * arg) { (waitconfirm && (numconfirm == 0))) { #ifdef DEBUG BAMBOO_DEBUGPRINT(0xee04); + BAMBOO_DEBUGPRINT_REG(waitconfirm); #endif BAMBOO_START_CRITICAL_SECTION_STATUS(); +#ifdef DEBUG + BAMBOO_DEBUGPRINT(0xf001); +#endif corestatus[corenum] = 0; numsendobjs[corenum] = self_numsendobjs; numreceiveobjs[corenum] = self_numreceiveobjs; @@ -373,6 +392,9 @@ inline void run(void * arg) { // out progiling data #ifdef PROFILE BAMBOO_CLOSE_CRITICAL_SECTION_STATUS(); +#ifdef DEBUG + BAMBOO_DEBUGPRINT(0xf000); +#endif for(i = 1; i < NUMCORES; ++i) { // send profile request msg to core i send_msg_2(i, 6, totalexetime); @@ -381,6 +403,9 @@ inline void run(void * arg) { outputProfileData(); while(true) { BAMBOO_START_CRITICAL_SECTION_STATUS(); +#ifdef DEBUG + BAMBOO_DEBUGPRINT(0xf001); +#endif profilestatus[corenum] = 0; // check the status of all cores allStall = true; @@ -399,6 +424,9 @@ inline void run(void * arg) { if(!allStall) { int halt = 100; BAMBOO_CLOSE_CRITICAL_SECTION_STATUS(); +#ifdef DEBUG + BAMBOO_DEBUGPRINT(0xf000); +#endif while(halt--) { } } else { @@ -426,6 +454,9 @@ inline void run(void * arg) { numconfirm = 0; } BAMBOO_CLOSE_CRITICAL_SECTION_STATUS(); +#ifdef DEBUG + BAMBOO_DEBUGPRINT(0xf000); +#endif } } else { if(!sendStall) { @@ -941,7 +972,7 @@ foundtag: } } } - + /* Check flags */ for(i=0; inumberofterms; i++) { int andmask=parameter->intarray[i*2]; @@ -1778,8 +1809,14 @@ bool getreadlock(void * ptr) { // reside on this core int deny = 0; BAMBOO_START_CRITICAL_SECTION_LOCK(); +#ifdef DEBUG + BAMBOO_DEBUGPRINT(0xf001); +#endif deny = processlockrequest(0, lock2require, (int)ptr, corenum, corenum, false); BAMBOO_CLOSE_CRITICAL_SECTION_LOCK(); +#ifdef DEBUG + BAMBOO_DEBUGPRINT(0xf000); +#endif if(deny == -1) { // redirected return true; @@ -1820,6 +1857,9 @@ void releasereadlock(void * ptr) { if(targetcore == corenum) { BAMBOO_START_CRITICAL_SECTION_LOCK(); +#ifdef DEBUG + BAMBOO_DEBUGPRINT(0xf001); +#endif // reside on this core if(!RuntimeHashcontainskey(locktbl, reallock)) { // no locks for this object, something is wrong @@ -1832,6 +1872,9 @@ void releasereadlock(void * ptr) { lockvalue->value--; } BAMBOO_CLOSE_CRITICAL_SECTION_LOCK(); +#ifdef DEBUG + BAMBOO_DEBUGPRINT(0xf000); +#endif return; } else { // send lock release msg @@ -1932,8 +1975,14 @@ bool getwritelock(void * ptr) { // reside on this core int deny = 0; BAMBOO_START_CRITICAL_SECTION_LOCK(); +#ifdef DEBUG + BAMBOO_DEBUGPRINT(0xf001); +#endif deny = processlockrequest(1, lock2require, (int)ptr, corenum, corenum, false); BAMBOO_CLOSE_CRITICAL_SECTION_LOCK(); +#ifdef DEBUG + BAMBOO_DEBUGPRINT(0xf000); +#endif #ifdef DEBUG BAMBOO_DEBUGPRINT(0xe555); BAMBOO_DEBUGPRINT_REG(lockresult); @@ -1985,6 +2034,9 @@ void releasewritelock(void * ptr) { if(targetcore == corenum) { BAMBOO_START_CRITICAL_SECTION_LOCK(); +#ifdef DEBUG + BAMBOO_DEBUGPRINT(0xf001); +#endif // reside on this core if(!RuntimeHashcontainskey(locktbl, reallock)) { // no locks for this object, something is wrong @@ -1997,6 +2049,9 @@ void releasewritelock(void * ptr) { lockvalue->value++; } BAMBOO_CLOSE_CRITICAL_SECTION_LOCK(); +#ifdef DEBUG + BAMBOO_DEBUGPRINT(0xf000); +#endif return; } else { // send lock release msg @@ -2019,6 +2074,9 @@ void releasewritelock_r(void * lock, void * redirectlock) { if(targetcore == corenum) { BAMBOO_START_CRITICAL_SECTION_LOCK(); +#ifdef DEBUG + BAMBOO_DEBUGPRINT(0xf001); +#endif // reside on this core if(!RuntimeHashcontainskey(locktbl, reallock)) { // no locks for this object, something is wrong @@ -2041,6 +2099,9 @@ void releasewritelock_r(void * lock, void * redirectlock) { #endif } BAMBOO_CLOSE_CRITICAL_SECTION_LOCK(); +#ifdef DEBUG + BAMBOO_DEBUGPRINT(0xf000); +#endif return; } else { // send lock release with redirect info msg @@ -2618,6 +2679,9 @@ newtask: #endif getwritelock(lock); BAMBOO_START_CRITICAL_SECTION(); +#ifdef DEBUG + BAMBOO_DEBUGPRINT(0xf001); +#endif #ifdef PROFILE //isInterrupt = false; #endif @@ -2643,6 +2707,9 @@ newtask: //isInterrupt = true; #endif BAMBOO_CLOSE_CRITICAL_SECTION(); +#ifdef DEBUG + BAMBOO_DEBUGPRINT(0xf000); +#endif if(grount == 0) { int j = 0; @@ -2682,6 +2749,9 @@ newtask: BAMBOO_CACHE_FLUSH_RANGE((int)parameter, classsize[((struct ___Object___ *)parameter)->type]); /* BAMBOO_START_CRITICAL_SECTION_LOCK(); +#ifdef DEBUG + BAMBOO_DEBUGPRINT(0xf001); +#endif if(RuntimeHashcontainskey(objRedirectLockTbl, (int)parameter)) { int redirectlock_r = 0; RuntimeHashget(objRedirectLockTbl, (int)parameter, &redirectlock_r); @@ -2689,6 +2759,9 @@ newtask: RuntimeHashremovekey(objRedirectLockTbl, (int)parameter); } BAMBOO_CLOSE_CRITICAL_SECTION_LOCK(); +#ifdef DEBUG + BAMBOO_DEBUGPRINT(0xf000); +#endif */ #endif tmpparam = (struct ___Object___ *)parameter; -- 2.34.1