From 4ee5c456c36d34fa6f6ae88750f29eb41f6daa24 Mon Sep 17 00:00:00 2001 From: jzhou Date: Wed, 17 Mar 2010 00:03:17 +0000 Subject: [PATCH] code clean: define TILERA_BME mode and TILERA_ZLINUX mode. TILERA_ZLINUX mode is not tested yet --- Robust/src/IR/Flat/BuildCodeMultiCore.java | 16 +-- Robust/src/Runtime/RAW/runtime_arch.h | 1 - Robust/src/Runtime/RAW/task_arch.c | 20 +-- Robust/src/Runtime/mem.c | 14 +- Robust/src/Runtime/multicoregarbage.c | 143 +++++++++++---------- Robust/src/Runtime/multicoreruntime.h | 78 ++++++----- Robust/src/Runtime/multicoretask.c | 139 ++++++++++---------- Robust/src/Runtime/runtime.h | 2 +- Robust/src/buildscript | 84 ++++++++---- 9 files changed, 265 insertions(+), 232 deletions(-) diff --git a/Robust/src/IR/Flat/BuildCodeMultiCore.java b/Robust/src/IR/Flat/BuildCodeMultiCore.java index 94380ba1..89bc6530 100644 --- a/Robust/src/IR/Flat/BuildCodeMultiCore.java +++ b/Robust/src/IR/Flat/BuildCodeMultiCore.java @@ -676,9 +676,9 @@ public class BuildCodeMultiCore extends BuildCode { output.println("BAMBOO_DEBUGPRINT(0xAAAA);"); output.println("BAMBOO_DEBUGPRINT_REG(tmpsum);"); } else { - output.println("BAMBOO_START_CRITICAL_SECTION();"); + //output.println("BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();"); output.println("tprintf(\"Process %x(%d): task %s\\n\", corenum, corenum, \"" + task.getSymbol() + "\");"); - output.println("BAMBOO_CLOSE_CRITICAL_SECTION();"); + //output.println("BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();"); } //output.println("BAMBOO_DEBUGPRINT(BAMBOO_GET_EXE_TIME());"); output.println("#endif"); @@ -687,9 +687,9 @@ public class BuildCodeMultiCore extends BuildCode { output.println("BAMBOO_DEBUGPRINT(0xAAAA);"); output.println("BAMBOO_DEBUGPRINT_REG(tmpsum);"); } else { - output.println("BAMBOO_START_CRITICAL_SECTION();"); + //output.println("BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();"); output.println("tprintf(\"Process %x(%d): task %s\\n\", corenum, corenum, \"" + task.getSymbol() + "\");"); - output.println("BAMBOO_CLOSE_CRITICAL_SECTION();"); + //output.println("BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();"); } output.println("#endif"); if(this.state.RAW) { @@ -728,7 +728,7 @@ public class BuildCodeMultiCore extends BuildCode { if (current_node.kind()!=FKind.FlatReturnNode) { //output.println(" flushAll();"); output.println("#ifdef CACHEFLUSH"); - output.println("BAMBOO_START_CRITICAL_SECTION();"); + output.println("BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();"); output.println("#ifdef DEBUG"); output.println("BAMBOO_DEBUGPRINT(0xec00);"); output.println("#endif"); @@ -736,7 +736,7 @@ public class BuildCodeMultiCore extends BuildCode { output.println("#ifdef DEBUG"); output.println("BAMBOO_DEBUGPRINT(0xecff);"); output.println("#endif"); - output.println("BAMBOO_CLOSE_CRITICAL_SECTION();"); + output.println("BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();"); output.println("#endif"); outputTransCode(output); output.println(" return;"); @@ -1619,7 +1619,7 @@ public class BuildCodeMultiCore extends BuildCode { } else { if(fm.getTask() != null) { output.println("#ifdef CACHEFLUSH"); - output.println("BAMBOO_START_CRITICAL_SECTION();"); + output.println("BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();"); output.println("#ifdef DEBUG"); output.println("BAMBOO_DEBUGPRINT(0xec00);"); output.println("#endif"); @@ -1627,7 +1627,7 @@ public class BuildCodeMultiCore extends BuildCode { output.println("#ifdef DEBUG"); output.println("BAMBOO_DEBUGPRINT(0xecff);"); output.println("#endif"); - output.println("BAMBOO_CLOSE_CRITICAL_SECTION();"); + output.println("BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();"); output.println("#endif"); outputTransCode(output); } diff --git a/Robust/src/Runtime/RAW/runtime_arch.h b/Robust/src/Runtime/RAW/runtime_arch.h index 6e33e5cf..19210e6e 100644 --- a/Robust/src/Runtime/RAW/runtime_arch.h +++ b/Robust/src/Runtime/RAW/runtime_arch.h @@ -13,7 +13,6 @@ #define BAMBOO_CACHE_LINE_SIZE (kCacheLineSize) #define BAMBOO_CACHE_LINE_MASK (kCacheLineMask) -#define BAMBOO_TOTALCORE (raw_get_num_tiles()) // the total # of cores available in the processor #define BAMBOO_NUM_OF_CORE corenum // the # of current residing core #define BAMBOO_GET_NUM_OF_CORE() (raw_get_abs_pos_x() + raw_get_array_size_x() * raw_get_abs_pos_y()) // compute the # of current residing core #define BAMBOO_DEBUGPRINT(x) (raw_test_pass((x))) diff --git a/Robust/src/Runtime/RAW/task_arch.c b/Robust/src/Runtime/RAW/task_arch.c index 40c42655..4a247a3b 100644 --- a/Robust/src/Runtime/RAW/task_arch.c +++ b/Robust/src/Runtime/RAW/task_arch.c @@ -790,7 +790,7 @@ bool getreadlock(void * ptr) { } else { lock2require = (int)(((struct ___Object___ *)ptr)->lock); } - targetcore = (lock2require >> 5) % BAMBOO_TOTALCORE; + targetcore = (lock2require >> 5) % NUMCORES; lockflag = false; #ifndef INTERRUPT reside = false; @@ -849,7 +849,7 @@ void releasereadlock(void * ptr) { } else { reallock = (int)(((struct ___Object___ *)ptr)->lock); } - targetcore = (reallock >> 5) % BAMBOO_TOTALCORE; + targetcore = (reallock >> 5) % NUMCORES; if(targetcore == BAMBOO_NUM_OF_CORE) { BAMBOO_START_CRITICAL_SECTION_LOCK(); @@ -892,7 +892,7 @@ bool getreadlock_I_r(void * ptr, void * redirectlock, int core, bool cache) { #endif lockresult = 0; } - targetcore = ((int)redirectlock >> 5) % BAMBOO_TOTALCORE; + targetcore = ((int)redirectlock >> 5) % NUMCORES; if(targetcore == BAMBOO_NUM_OF_CORE) { // reside on this core @@ -957,7 +957,7 @@ bool getwritelock(void * ptr) { } else { lock2require = (int)(((struct ___Object___ *)ptr)->lock); } - targetcore = (lock2require >> 5) % BAMBOO_TOTALCORE; + targetcore = (lock2require >> 5) % NUMCORES; lockflag = false; #ifndef INTERRUPT reside = false; @@ -1024,7 +1024,7 @@ void releasewritelock(void * ptr) { } else { reallock = (int)(((struct ___Object___ *)ptr)->lock); } - targetcore = (reallock >> 5) % BAMBOO_TOTALCORE; + targetcore = (reallock >> 5) % NUMCORES; #ifdef DEBUG BAMBOO_DEBUGPRINT(0xe661); @@ -1069,7 +1069,7 @@ bool getwritelock_I(void * ptr) { } else { lock2require = (int)(((struct ___Object___ *)ptr)->lock); } - targetcore = (lock2require >> 5) % BAMBOO_TOTALCORE; + targetcore = (lock2require >> 5) % NUMCORES; lockflag = false; #ifndef INTERRUPT reside = false; @@ -1134,7 +1134,7 @@ bool getwritelock_I_r(void * ptr, void * redirectlock, int core, bool cache) { #endif lockresult = 0; } - targetcore = ((int)redirectlock >> 5) % BAMBOO_TOTALCORE; + targetcore = ((int)redirectlock >> 5) % NUMCORES; #ifdef DEBUG BAMBOO_DEBUGPRINT(0xe571); @@ -1204,7 +1204,7 @@ void releasewritelock_I(void * ptr) { } else { reallock = (int)(((struct ___Object___ *)ptr)->lock); } - targetcore = (reallock >> 5) % BAMBOO_TOTALCORE; + targetcore = (reallock >> 5) % NUMCORES; #ifdef DEBUG BAMBOO_DEBUGPRINT(0xe681); @@ -1236,7 +1236,7 @@ void releasewritelock_I(void * ptr) { void releasewritelock_I_r(void * lock, void * redirectlock) { int targetcore = 0; int reallock = (int)lock; - targetcore = (reallock >> 5) % BAMBOO_TOTALCORE; + targetcore = (reallock >> 5) % NUMCORES; #ifdef DEBUG BAMBOO_DEBUGPRINT(0xe691); @@ -1282,7 +1282,7 @@ void releasewritelock_I_r(void * lock, void * redirectlock) { // 1: the lock request is denied __attribute__((always_inline)) int processlockrequest(int locktype, int lock, int obj, int requestcore, int rootrequestcore, bool cache) { int deny = 0; - if( ((lock >> 5) % BAMBOO_TOTALCORE) != BAMBOO_NUM_OF_CORE ) { + if( ((lock >> 5) % NUMCORES) != BAMBOO_NUM_OF_CORE ) { // the lock should not be on this core #ifndef TILERA BAMBOO_DEBUGPRINT_REG(requestcore); diff --git a/Robust/src/Runtime/mem.c b/Robust/src/Runtime/mem.c index 9cc3da92..1df82bae 100644 --- a/Robust/src/Runtime/mem.c +++ b/Robust/src/Runtime/mem.c @@ -8,12 +8,12 @@ void * mycalloc(int m, int size) { void * p = NULL; int isize = size; - BAMBOO_START_CRITICAL_SECTION_MEM(); + BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT(); p = BAMBOO_LOCAL_MEM_CALLOC(m, isize); // calloc(m, isize); if(p == NULL) { BAMBOO_EXIT(0xc001); } - BAMBOO_CLOSE_CRITICAL_SECTION_MEM(); + BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(); return p; } @@ -25,7 +25,7 @@ void * mycalloc_share(struct garbagelist * stackptr, int isize = 2*BAMBOO_CACHE_LINE_SIZE-4+(size-1)&(~BAMBOO_CACHE_LINE_MASK); bool hasgc = false; memalloc: - BAMBOO_START_CRITICAL_SECTION_MEM(); + BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT(); #ifdef DEBUG tprintf("ask for shared mem: %x \n", isize); #endif @@ -35,7 +35,7 @@ memalloc: #endif if(p == NULL) { // no more global shared memory - BAMBOO_CLOSE_CRITICAL_SECTION_MEM(); + BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(); if(!hasgc) { // start gc gc(stackptr); @@ -48,7 +48,7 @@ memalloc: // try to malloc again goto memalloc; } - BAMBOO_CLOSE_CRITICAL_SECTION_MEM(); + BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(); void * alignedp = (void *)(BAMBOO_CACHE_LINE_SIZE+((int)p-1)&(~BAMBOO_CACHE_LINE_MASK)); BAMBOO_MEMSET_WH(p, -2, (alignedp - p)); @@ -60,13 +60,13 @@ void * mycalloc_share(int m, int size) { void * p = NULL; int isize = 2*BAMBOO_CACHE_LINE_SIZE-4+(size-1)&(~BAMBOO_CACHE_LINE_MASK); - BAMBOO_START_CRITICAL_SECTION_MEM(); + BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT(); p = BAMBOO_SHARE_MEM_CALLOC_I(m, isize); // calloc(m, isize); if(p == NULL) { // no more global shared memory BAMBOO_EXIT(0xc003); } - BAMBOO_CLOSE_CRITICAL_SECTION_MEM(); + BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(); return (void *)(BAMBOO_CACHE_LINE_SIZE+((int)p-1)&(~BAMBOO_CACHE_LINE_MASK)); } diff --git a/Robust/src/Runtime/multicoregarbage.c b/Robust/src/Runtime/multicoregarbage.c index 3ffe8e54..c7314cd2 100644 --- a/Robust/src/Runtime/multicoregarbage.c +++ b/Robust/src/Runtime/multicoregarbage.c @@ -106,8 +106,7 @@ inline void dumpSMem() { coren = gc_block2core[block%(NUMCORES4GC*2)]; } // compute core coordinate - x = bamboo_cpu2coords[coren*2]; - y = bamboo_cpu2coords[coren*2+1]; + BAMBOO_COORDS(coren, &x, &y); tprintf("==== %d, %d : core (%d,%d), saddr %x====\n", block, sblock++, x, y, (sblock-1)*(BAMBOO_SMEM_SIZE)+BAMBOO_BASE_VA); @@ -416,7 +415,7 @@ inline void checkMarkStatue() { #ifdef DEBUG BAMBOO_DEBUGPRINT(0xee02); #endif - BAMBOO_START_CRITICAL_SECTION_STATUS(); + BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT(); gccorestatus[BAMBOO_NUM_OF_CORE] = 0; gcnumsendobjs[BAMBOO_NUM_OF_CORE] = gcself_numsendobjs; gcnumreceiveobjs[BAMBOO_NUM_OF_CORE] = gcself_numreceiveobjs; @@ -440,10 +439,11 @@ inline void checkMarkStatue() { gccorestatus[BAMBOO_NUM_OF_CORE] = 1; waitconfirm = true; numconfirm = NUMCORESACTIVE - 1; + BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(); for(i = 1; i < NUMCORESACTIVE; ++i) { gccorestatus[i] = 1; // send mark phase finish confirm request msg to core i - send_msg_1(i, GCMARKCONFIRM, true); + send_msg_1(i, GCMARKCONFIRM); } // for(i = 1; i < NUMCORESACTIVE; ++i) } else { // check if the sum of send objs and receive obj are the same @@ -481,9 +481,11 @@ inline void checkMarkStatue() { } waitconfirm = false; }// if(0 == sumsendobj) else ... + BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(); } // if(!gcwaitconfirm) else() - } // if(allStall) - BAMBOO_CLOSE_CRITICAL_SECTION_STATUS(); + } else { + BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(); + } // if(allStall) } // if((!waitconfirm)... #ifdef DEBUG BAMBOO_DEBUGPRINT(0xee0a); @@ -506,7 +508,7 @@ inline bool preGC() { for(i = 1; i < NUMCORESACTIVE; ++i) { corestatus[i] = 1; // send status confirm msg to core i - send_msg_1(i, STATUSCONFIRM, false); + send_msg_1(i, STATUSCONFIRM); } // for(i = 1; i < NUMCORESACTIVE; ++i) #ifdef DEBUG @@ -960,11 +962,11 @@ inline void moveLObjs() { gcheaptop += size; // cache the mapping info anyway //if(ptr != tmpheaptop) { - BAMBOO_START_CRITICAL_SECTION(); + BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT(); //mgchashInsert_I(ptr, tmpheaptop); RuntimeHashadd_I(gcpointertbl, ptr, tmpheaptop); //MGCHashadd_I(gcpointertbl, ptr, tmpheaptop); - BAMBOO_CLOSE_CRITICAL_SECTION(); + BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(); //} #ifdef DEBUG BAMBOO_DEBUGPRINT(0xcdca); @@ -973,7 +975,7 @@ inline void moveLObjs() { #endif if(host != BAMBOO_NUM_OF_CORE) { // send the original host core with the mapping info - send_msg_3(host, GCLOBJMAPPING, ptr, tmpheaptop, false); + send_msg_3(host, GCLOBJMAPPING, ptr, tmpheaptop); #ifdef DEBUG BAMBOO_DEBUGPRINT(0xcdcb); BAMBOO_DEBUGPRINT_REG(ptr); @@ -1044,11 +1046,11 @@ inline void moveLObjs() { cpysize += isize; // cache the mapping info anyway //if(ptr != tmpheaptop) { - BAMBOO_START_CRITICAL_SECTION(); + BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT(); //mgchashInsert_I(ptr, tmpheaptop); RuntimeHashadd_I(gcpointertbl, ptr, tmpheaptop); //MGCHashadd_I(gcpointertbl, ptr, tmpheaptop); - BAMBOO_CLOSE_CRITICAL_SECTION(); + BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(); //} #ifdef DEBUG BAMBOO_DEBUGPRINT(0xcdcc); @@ -1058,7 +1060,7 @@ inline void moveLObjs() { #endif if(host != BAMBOO_NUM_OF_CORE) { // send the original host core with the mapping info - send_msg_3(host, GCLOBJMAPPING, ptr, tmpheaptop, false); + send_msg_3(host, GCLOBJMAPPING, ptr, tmpheaptop); #ifdef DEBUG BAMBOO_DEBUGPRINT(0xcdcd); BAMBOO_DEBUGPRINT_REG(ptr); @@ -1114,14 +1116,14 @@ inline void markObj(void * objptr) { int host = hostcore(objptr); if(BAMBOO_NUM_OF_CORE == host) { // on this core - BAMBOO_START_CRITICAL_SECTION(); + BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT(); if(((int *)objptr)[6] == INIT) { // this is the first time that this object is discovered, // set the flag as DISCOVERED ((int *)objptr)[6] = DISCOVERED; gc_enqueue_I(objptr); } - BAMBOO_CLOSE_CRITICAL_SECTION(); + BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(); } else { #ifdef DEBUG BAMBOO_DEBUGPRINT(0xbbbb); @@ -1134,7 +1136,7 @@ inline void markObj(void * objptr) { unsigned long long ttime = BAMBOO_GET_EXE_TIME(); #endif // send a msg to host informing that objptr is active - send_msg_2(host, GCMARKEDOBJ, objptr, false); + send_msg_2(host, GCMARKEDOBJ, objptr); #ifdef GC_PROFILE marktime += BAMBOO_GET_EXE_TIME() - ttime; num_markrequest++; @@ -1144,9 +1146,9 @@ inline void markObj(void * objptr) { } } } else { - BAMBOO_START_CRITICAL_SECTION(); + BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT(); gc_enqueue_I(objptr); - BAMBOO_CLOSE_CRITICAL_SECTION(); + BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(); } // if(ISSHAREDOBJ(objptr)) } // void markObj(void * objptr) @@ -1289,9 +1291,9 @@ inline void mark(bool isfirst, if(BAMBOO_NUM_OF_CORE == 0) BAMBOO_DEBUGPRINT(0xed04); #endif while(true) { - BAMBOO_START_CRITICAL_SECTION(); + BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT(); bool hasItems = gc_moreItems2_I(); - BAMBOO_CLOSE_CRITICAL_SECTION(); + BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(); #ifdef DEBUG BAMBOO_DEBUGPRINT(0xed05); #endif @@ -1323,10 +1325,10 @@ inline void mark(bool isfirst, BAMBOO_DEBUGPRINT_REG(ptr); BAMBOO_DEBUGPRINT_REG(*((int*)ptr)); #endif - BAMBOO_START_CRITICAL_SECTION(); + BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT(); gc_lobjenqueue_I(ptr, size, BAMBOO_NUM_OF_CORE); gcnumlobjs++; - BAMBOO_CLOSE_CRITICAL_SECTION(); + BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(); // mark this obj ((int *)ptr)[6] = MARKED; } else if(isnotmarked) { @@ -1361,7 +1363,7 @@ inline void mark(bool isfirst, // check if this obj has been forwarded if(!MGCHashcontains(gcforwardobjtbl, (int)ptr)) { // send a msg to host informing that ptr is active - send_msg_2(host, GCMARKEDOBJ, ptr, false); + send_msg_2(host, GCMARKEDOBJ, ptr); gcself_numsendobjs++; MGCHashadd(gcforwardobjtbl, (int)ptr); } @@ -1419,7 +1421,7 @@ inline void mark(bool isfirst, BAMBOO_DEBUGPRINT(0xed09); #endif send_msg_4(STARTUPCORE, GCFINISHMARK, BAMBOO_NUM_OF_CORE, - gcself_numsendobjs, gcself_numreceiveobjs, false); + gcself_numsendobjs, gcself_numreceiveobjs); sendStall = true; } } // if(STARTUPCORE == BAMBOO_NUM_OF_CORE) ... @@ -1448,7 +1450,7 @@ inline void compact2Heaptophelper_I(int coren, gcdstcore = gctopcore; gcblock2fill = *numblocks + 1; } else { - send_msg_4(coren, GCMOVESTART, gctopcore, *p, (*numblocks) + 1, true); + send_msg_4(coren, GCMOVESTART, gctopcore, *p, (*numblocks) + 1); } #ifdef DEBUG BAMBOO_DEBUGPRINT_REG(coren); @@ -1508,7 +1510,7 @@ inline void compact2Heaptop() { ((BAMBOO_SMEM_SIZE_L)-(p%(BAMBOO_SMEM_SIZE_L))) :((BAMBOO_SMEM_SIZE)-(p%(BAMBOO_SMEM_SIZE))); // check if the top core finishes - BAMBOO_START_CRITICAL_SECTION(); + BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT(); if(gccorestatus[gctopcore] != 0) { #ifdef DEBUG BAMBOO_DEBUGPRINT(0xd101); @@ -1516,10 +1518,10 @@ inline void compact2Heaptop() { #endif // let the top core finishes its own work first compact2Heaptophelper_I(gctopcore, &p, &numblocks, &remain); - BAMBOO_CLOSE_CRITICAL_SECTION(); + BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(); return; } - BAMBOO_CLOSE_CRITICAL_SECTION(); + BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(); #ifdef DEBUG BAMBOO_DEBUGPRINT(0xd102); @@ -1529,7 +1531,7 @@ inline void compact2Heaptop() { BAMBOO_DEBUGPRINT_REG(remain); #endif for(int i = 0; i < NUMCORES4GC; i++) { - BAMBOO_START_CRITICAL_SECTION(); + BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT(); if((gccorestatus[i] != 0) && (gcrequiredmems[i] > 0)) { #ifdef DEBUG BAMBOO_DEBUGPRINT(0xd103); @@ -1540,12 +1542,12 @@ inline void compact2Heaptop() { BAMBOO_DEBUGPRINT(0xd101); BAMBOO_DEBUGPRINT_REG(gctopcore); #endif - BAMBOO_CLOSE_CRITICAL_SECTION(); + BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(); // the top core is not free now return; } } // if((gccorestatus[i] != 0) && (gcrequiredmems[i] > 0)) - BAMBOO_CLOSE_CRITICAL_SECTION(); + BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(); } // for(i = 0; i < NUMCORES4GC; i++) #ifdef DEBUG BAMBOO_DEBUGPRINT(0xd106); @@ -1604,12 +1606,12 @@ inline void resolvePendingMoveRequest() { // find match int tomove = 0; int startaddr = 0; - BAMBOO_START_CRITICAL_SECTION(); + BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT(); gcrequiredmems[dstcore] = assignSpareMem_I(sourcecore, gcrequiredmems[dstcore], &tomove, &startaddr); - BAMBOO_CLOSE_CRITICAL_SECTION(); + BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(); #ifdef DEBUG BAMBOO_DEBUGPRINT(0xeb02); BAMBOO_DEBUGPRINT_REG(sourcecore); @@ -1630,7 +1632,7 @@ inline void resolvePendingMoveRequest() { BAMBOO_DEBUGPRINT(0xeb04); #endif send_msg_4(dstcore, GCMOVESTART, sourcecore, - startaddr, tomove, false); + startaddr, tomove); } gcmovepending--; nosparemem = true; @@ -1911,11 +1913,11 @@ innermoveobj: BAMBOO_MEMSET_WH(to->ptr+size, -2, isize-size); } // store mapping info - BAMBOO_START_CRITICAL_SECTION(); + BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT(); //mgchashInsert_I(orig->ptr, to->ptr); RuntimeHashadd_I(gcpointertbl, orig->ptr, to->ptr); //MGCHashadd_I(gcpointertbl, orig->ptr, to->ptr); - BAMBOO_CLOSE_CRITICAL_SECTION(); + BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(); //} #ifdef DEBUG BAMBOO_DEBUGPRINT(0xcdce); @@ -2061,7 +2063,7 @@ innercompact: #endif // ask for more mem gctomove = false; - BAMBOO_START_CRITICAL_SECTION(); + BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT(); if(gcfindSpareMem_I(&gcmovestartaddr, &gcblock2fill, &gcdstcore, gccurr_heaptop, BAMBOO_NUM_OF_CORE)) { #ifdef DEBUG @@ -2069,13 +2071,13 @@ innercompact: #endif gctomove = true; } else { - BAMBOO_CLOSE_CRITICAL_SECTION(); + BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(); #ifdef DEBUG BAMBOO_DEBUGPRINT(0xe105); #endif return false; } - BAMBOO_CLOSE_CRITICAL_SECTION(); + BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(); } else { #ifdef DEBUG BAMBOO_DEBUGPRINT(0xe106); @@ -2092,7 +2094,7 @@ innercompact: // ask for more mem gctomove = false; send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE, - *filledblocks, *heaptopptr, gccurr_heaptop, false); + *filledblocks, *heaptopptr, gccurr_heaptop); } else { #ifdef DEBUG BAMBOO_DEBUGPRINT(0xe108); @@ -2100,7 +2102,7 @@ innercompact: #endif // finish compacting send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE, - *filledblocks, *heaptopptr, 0, false); + *filledblocks, *heaptopptr, 0); } } // if(STARTUPCORE == BAMBOO_NUM_OF_CORE) @@ -2164,7 +2166,7 @@ inline void compact() { BAMBOO_DEBUGPRINT_REG(to->base); #endif send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE, - 0, to->base, 0, false); + 0, to->base, 0); RUNFREE(orig); RUNFREE(to); return; @@ -2197,7 +2199,7 @@ inline void * flushObj(void * objptr) { BAMBOO_DEBUGPRINT_REG(objptr); #endif // a shared obj ptr, change to new address - BAMBOO_START_CRITICAL_SECTION(); + BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT(); #ifdef GC_PROFILE unsigned long long ttime = BAMBOO_GET_EXE_TIME(); #endif @@ -2207,7 +2209,7 @@ inline void * flushObj(void * objptr) { flushstalltime += BAMBOO_GET_EXE_TIME()-ttime; #endif //MGCHashget(gcpointertbl, objptr, &dstptr); - BAMBOO_CLOSE_CRITICAL_SECTION(); + BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(); #ifdef DEBUG BAMBOO_DEBUGPRINT_REG(dstptr); #endif @@ -2240,7 +2242,7 @@ inline void * flushObj(void * objptr) { // the first time require the mapping, send msg to the hostcore // for the mapping info send_msg_3(hostcore(objptr), GCMAPREQUEST, (int)objptr, - BAMBOO_NUM_OF_CORE, false); + BAMBOO_NUM_OF_CORE); while(true) { if(gcismapped) { break; @@ -2253,11 +2255,11 @@ inline void * flushObj(void * objptr) { // TODO //flushstalltime += BAMBOO_GET_EXE_TIME() - ttime; #endif - BAMBOO_START_CRITICAL_SECTION(); + BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT(); //dstptr = mgchashSearch(objptr); RuntimeHashget(gcpointertbl, objptr, &dstptr); //MGCHashget(gcpointertbl, objptr, &dstptr); - BAMBOO_CLOSE_CRITICAL_SECTION(); + BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(); } // if(hostcore(objptr) == BAMBOO_NUM_OF_CORE) else ... #ifdef DEBUG BAMBOO_DEBUGPRINT_REG(dstptr); @@ -2388,9 +2390,9 @@ inline void flush(struct garbagelist * stackptr) { #endif while(true) { - BAMBOO_START_CRITICAL_SECTION(); + BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT(); bool hasItems = gc_moreItems_I(); - BAMBOO_CLOSE_CRITICAL_SECTION(); + BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(); if(!hasItems) { break; } @@ -2398,9 +2400,9 @@ inline void flush(struct garbagelist * stackptr) { #ifdef DEBUG BAMBOO_DEBUGPRINT(0xe301); #endif - BAMBOO_START_CRITICAL_SECTION(); + BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT(); void * ptr = gc_dequeue_I(); - BAMBOO_CLOSE_CRITICAL_SECTION(); + BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(); if(ISSHAREDOBJ(ptr)) { // should be a local shared obj and should have mapping info ptr = flushObj(ptr); @@ -2479,7 +2481,6 @@ inline void flush(struct garbagelist * stackptr) { } } // if((!ISSHAREDOBJ(ptr)) || (((int *)(ptr))[6] == COMPACTED)) } // while(gc_moreItems()) - BAMBOO_CLOSE_CRITICAL_SECTION(); #ifdef DEBUG BAMBOO_DEBUGPRINT(0xe308); #endif @@ -2580,7 +2581,7 @@ inline void flush(struct garbagelist * stackptr) { if(STARTUPCORE == BAMBOO_NUM_OF_CORE) { gccorestatus[BAMBOO_NUM_OF_CORE] = 0; } else { - send_msg_2(STARTUPCORE, GCFINISHFLUSH, BAMBOO_NUM_OF_CORE, false); + send_msg_2(STARTUPCORE, GCFINISHFLUSH, BAMBOO_NUM_OF_CORE); } #ifdef GC_PROFILE if(BAMBOO_NUM_OF_CORE == 0) { @@ -2609,7 +2610,7 @@ inline void gc_collect(struct garbagelist * stackptr) { #endif initGC(); //send init finish msg to core coordinator - send_msg_2(STARTUPCORE, GCFINISHINIT, BAMBOO_NUM_OF_CORE, false); + send_msg_2(STARTUPCORE, GCFINISHINIT, BAMBOO_NUM_OF_CORE); while(true) { if(MARKPHASE == gcphase) { break; @@ -2660,7 +2661,7 @@ inline void gc_nocollect(struct garbagelist * stackptr) { #endif initGC(); //send init finish msg to core coordinator - send_msg_2(STARTUPCORE, GCFINISHINIT, BAMBOO_NUM_OF_CORE, false); + send_msg_2(STARTUPCORE, GCFINISHINIT, BAMBOO_NUM_OF_CORE); while(true) { if(MARKPHASE == gcphase) { break; @@ -2733,7 +2734,7 @@ inline void gc(struct garbagelist * stackptr) { // Note: all cores need to init gc including non-gc cores for(i = 1; i < NUMCORESACTIVE/*NUMCORES4GC*/; i++) { // send GC init messages to all cores - send_msg_1(i, GCSTARTINIT, false); + send_msg_1(i, GCSTARTINIT); } bool isfirst = true; bool allStall = false; @@ -2744,12 +2745,12 @@ inline void gc(struct garbagelist * stackptr) { gccorestatus[BAMBOO_NUM_OF_CORE] = 0; while(true) { - BAMBOO_START_CRITICAL_SECTION(); + BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT(); if(gc_checkAllCoreStatus_I()) { - BAMBOO_CLOSE_CRITICAL_SECTION(); + BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(); break; } - BAMBOO_CLOSE_CRITICAL_SECTION(); + BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(); } #ifdef GC_PROFILE gc_profileItem(); @@ -2764,7 +2765,7 @@ inline void gc(struct garbagelist * stackptr) { for(i = 1; i < NUMCORESACTIVE/*NUMCORES4GC*/; ++i) { gccorestatus[i] = 1; // send GC start messages to all cores - send_msg_1(i, GCSTART, false); + send_msg_1(i, GCSTART); } gcphase = MARKPHASE; @@ -2782,7 +2783,7 @@ inline void gc(struct garbagelist * stackptr) { // Note: only need to ask gc cores, non-gc cores do not host any objs numconfirm = NUMCORES4GC - 1; for(i = 1; i < NUMCORES4GC; ++i) { - send_msg_1(i, GCLOBJREQUEST, false); + send_msg_1(i, GCLOBJREQUEST); } gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop; while(true) { @@ -2838,14 +2839,14 @@ inline void gc(struct garbagelist * stackptr) { if (tmpcoreptr < tmpheaptop/*tmptopptr*/) { gcstopblock[i] = numpbc + 1; if(i != STARTUPCORE) { - send_msg_2(i, GCSTARTCOMPACT, numpbc+1, false); + send_msg_2(i, GCSTARTCOMPACT, numpbc+1); } else { gcblock2fill = numpbc+1; } // if(i != STARTUPCORE) } else { gcstopblock[i] = numpbc; if(i != STARTUPCORE) { - send_msg_2(i, GCSTARTCOMPACT, numpbc, false); + send_msg_2(i, GCSTARTCOMPACT, numpbc); } else { gcblock2fill = numpbc; } // if(i != STARTUPCORE) @@ -2897,17 +2898,17 @@ inline void gc(struct garbagelist * stackptr) { #endif } - BAMBOO_START_CRITICAL_SECTION(); + BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT(); if(gc_checkCoreStatus_I()) { // all cores have finished compacting // restore the gcstatus of all cores for(i = 0; i < NUMCORES4GC; ++i) { gccorestatus[i] = 1; } - BAMBOO_CLOSE_CRITICAL_SECTION(); + BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(); break; } else { - BAMBOO_CLOSE_CRITICAL_SECTION(); + BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(); // check if there are spare mem for pending move requires if(COMPACTPHASE == gcphase) { #ifdef DEBUG @@ -2982,7 +2983,7 @@ inline void gc(struct garbagelist * stackptr) { for(i = 1; i < NUMCORESACTIVE/*NUMCORES4GC*/; ++i) { // send start flush messages to all cores gccorestatus[i] = 1; - send_msg_1(i, GCSTARTFLUSH, false); + send_msg_1(i, GCSTARTFLUSH); } #ifdef GC_PROFILE gc_profileItem(); @@ -2995,12 +2996,12 @@ inline void gc(struct garbagelist * stackptr) { gccorestatus[BAMBOO_NUM_OF_CORE] = 0; while(FLUSHPHASE == gcphase) { // check the status of all cores - BAMBOO_START_CRITICAL_SECTION(); + BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT(); if(gc_checkAllCoreStatus_I()) { - BAMBOO_CLOSE_CRITICAL_SECTION(); + BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(); break; } - BAMBOO_CLOSE_CRITICAL_SECTION(); + BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(); } // while(FLUSHPHASE == gcphase) gcphase = FINISHPHASE; @@ -3019,7 +3020,7 @@ inline void gc(struct garbagelist * stackptr) { gccorestatus[BAMBOO_NUM_OF_CORE] = 1; for(i = 1; i < NUMCORESACTIVE/*NUMCORES4GC*/; ++i) { // send gc finish messages to all cores - send_msg_1(i, GCFINISH, false); + send_msg_1(i, GCFINISH); gccorestatus[i] = 1; } #ifdef RAWPATH // TODO GC_DEBUG diff --git a/Robust/src/Runtime/multicoreruntime.h b/Robust/src/Runtime/multicoreruntime.h index 63b324a9..dfbd9e88 100644 --- a/Robust/src/Runtime/multicoreruntime.h +++ b/Robust/src/Runtime/multicoreruntime.h @@ -62,6 +62,8 @@ volatile bool isMsgSending; outmsgdata[outmsglast] = (n); \ OUTMSG_LASTINDEXINC(); +#define MAX_PACKET_WORDS 5 + /* Message format: * type + Msgbody * type: 1 -- transfer object @@ -401,38 +403,36 @@ INLINE void processlockrelease(int locktype, // msg related functions INLINE void send_hanging_msg(); INLINE void send_msg_1(int targetcore, - unsigned long n0, - bool isinterrupton); + unsigned long n0); INLINE void send_msg_2(int targetcore, unsigned long n0, - unsigned long n1, - bool isinterrupton); + unsigned long n1); INLINE void send_msg_3(int targetcore, unsigned long n0, unsigned long n1, - unsigned long n2, - bool isinterrupton); + unsigned long n2); INLINE void send_msg_4(int targetcore, unsigned long n0, unsigned long n1, unsigned long n2, - unsigned long n3, - bool isinterrupton); + unsigned long n3); INLINE void send_msg_5(int targetcore, unsigned long n0, unsigned long n1, unsigned long n2, unsigned long n3, - unsigned long n4, - bool isinterrupton); + unsigned long n4); INLINE void send_msg_6(int targetcore, unsigned long n0, unsigned long n1, unsigned long n2, unsigned long n3, unsigned long n4, - unsigned long n5, - bool isinterrupton); + unsigned long n5); +INLINE void send_msg_3_I(int targetcore, + unsigned long n0, + unsigned long n1, + unsigned long n2); INLINE void cache_msg_1(int targetcore, unsigned long n0); INLINE void cache_msg_2(int targetcore, @@ -461,7 +461,7 @@ INLINE void cache_msg_6(int targetcore, unsigned long n4, unsigned long n5); INLINE void transferObject(struct transObjInfo * transObj); -INLINE int receiveMsg(void); +INLINE int receiveMsg(uint32_t send_port_pending); #ifdef MULTICORE_GC INLINE void transferMarkResults(); @@ -477,53 +477,47 @@ void outputProfileData(); ///////////////////////////////////////////////////////////////////////////// // For each version of BAMBOO runtime, there should be a header file named // // runtim_arch.h defining following MARCOS: // -// BAMBOO_TOTALCORE: the total # of cores in the processor // // BAMBOO_NUM_OF_CORE: the # of current residing core // // BAMBOO_GET_NUM_OF_CORE(): compute the # of current residing core // +// BAMBOO_COORDS(c, x, y): convert the cpu # to coords (*x, *y) // // BAMBOO_DEBUGPRINT(x): print out integer x // // BAMBOO_DEBUGPRINT_REG(x): print out value of variable x // +// BAMBOO_EXIT(x): exit routine // +// BAMBOO_DIE(x): error exit routine // +// BAMBOO_GET_EXE_TIME(): rountine to get current clock cycle number // +// BAMBOO_MSG_AVAIL(): checking if there are msgs coming in // +// BAMBOO_GCMSG_AVAIL(): checking if there are gcmsgs coming in // +// BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT(): change to runtime mode from // +// client mode // +// BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(): change to client mode from // +// runtime mode // +// BAMBOO_ENTER_SEND_MODE_FROM_CLIENT(): change to send mode from // +// client mode // +// BAMBOO_ENTER_CLIENT_MODE_FROM_SEND(): change to client mode from // +// send mode // +// BAMBOO_ENTER_RUNTIME_MODE_FROM_SEND(): change to runtime mode from // +// send mode // +// BAMBOO_ENTER_SEND_MODE_FROM_RUNTIME(): change to send mode from // +// runtime mode // +// BAMBOO_WAITING_FOR_LOCK(): routine executed while waiting for lock // +// request response // // BAMBOO_LOCAL_MEM_CALLOC(x, y): allocate an array of x elements each of // // whose size in bytes is y on local memory // // BAMBOO_LOCAL_MEM_FREE(x): free space with ptr x on local memory // -// BAMBOO_SHARE_MEM_CALLOC(x, y): allocate an array of x elements each of // +// BAMBOO_LOCAL_MEM_CLOSE(): close the local heap // +// BAMBOO_SHARE_MEM_CALLOC_I(x, y): allocate an array of x elements each of// // whose size in bytes is y on shared memory// -// BAMBOO_START_CRITICAL_SECTION_OBJ_QUEUE() // -// BAMBOO_CLOSE_CRITICAL_SECTION_OBJ_QUEUE(): locks for global data // -// structures related to obj // -// queue // -// BAMBOO_START_CRITICAL_SECTION_STATUS() // -// BAMBOO_CLOSE_CRITICAL_SECTION_STATUS(): locks for global data structures// -// related to status data // -// BAMBOO_START_CRITICAL_SECTION_MSG() // -// BAMBOO_CLOSE_CRITICAL_SECTION_MSG(): locks for global data structures // -// related to msg data // -// BAMBOO_START_CRITICAL_SECTION_LOCK() // -// BAMBOO_CLOSE_CRITICAL_SECTION_LOCK(): locks for global data structures // -// related to lock table // -// BAMBOO_START_CRITICAL_SECTION_MEM() // -// BAMBOO_CLOSE_CRITICAL_SECTION_MEM(): locks for allocating memory // -// BAMBOO_START_CRITICAL_SECTION() // -// BAMBOO_CLOSE_CRITICAL_SECTION(): locks for all global data structures // -// BAMBOO_WAITING_FOR_LOCK(): routine executed while waiting for lock // -// request response // +// BAMBOO_SHARE_MEM_CLOSE(): close the shared heap // // BAMBOO_CACHE_LINE_SIZE: the cache line size // // BAMBOO_CACHE_LINE_MASK: mask for a cache line // // BAMBOO_CACHE_FLUSH_RANGE(x, y): flush cache lines started at x with // // length y // // BAMBOO_CACHE_FLUSH_ALL(): flush the whole cache of a core if necessary // -// BAMBOO_EXIT(x): exit routine // -// BAMBOO_MSG_AVAIL(): checking if there are msgs coming in // -// BAMBOO_GCMSG_AVAIL(): checking if there are gcmsgs coming in // -// BAMBOO_GET_EXE_TIME(): rountine to get current clock cycle number // // BAMBOO_MEMSET_WH(x, y, z): memset the specified region of memory (start // // address x, size z) to value y with write // // hint, the processor will not fetch the // // current content of the memory and directly // // write // -// // -// runtime_arch.h should also define following global parameters: // -// bamboo_cpu2coords: map the cpu # to (x,y) coordinates // -// bamboo_coords2cpu: map the (x,y) coordinates to cpu # // ///////////////////////////////////////////////////////////////////////////// #endif // #ifdef MULTICORE diff --git a/Robust/src/Runtime/multicoretask.c b/Robust/src/Runtime/multicoretask.c index cfe56c73..b0c92326 100644 --- a/Robust/src/Runtime/multicoretask.c +++ b/Robust/src/Runtime/multicoretask.c @@ -195,6 +195,8 @@ void disruntimedata() { RUNFREE(currtpd); currtpd = NULL; } + BAMBOO_LOCAL_MEM_CLOSE(); + BAMBOO_SHARE_MEM_CLOSE(); } inline __attribute__((always_inline)) @@ -215,7 +217,7 @@ bool checkObjQueue() { while(!isEmpty(&objqueue)) { void * obj = NULL; - BAMBOO_START_CRITICAL_SECTION_OBJ_QUEUE(); + BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT(); #ifdef DEBUG BAMBOO_DEBUGPRINT(0xf001); #endif @@ -235,7 +237,7 @@ bool checkObjQueue() { grount = 0; getwritelock_I(obj); while(!lockflag) { - BAMBOO_WAITING_FOR_LOCK(); + BAMBOO_WAITING_FOR_LOCK(0); } // while(!lockflag) grount = lockresult; #ifdef DEBUG @@ -305,13 +307,13 @@ bool checkObjQueue() { //isInterrupt = true; #endif objqueuebreak: - BAMBOO_CLOSE_CRITICAL_SECTION_OBJ_QUEUE(); + BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(); #ifdef DEBUG BAMBOO_DEBUGPRINT(0xf000); #endif break; } // if(grount == 1) - BAMBOO_CLOSE_CRITICAL_SECTION_OBJ_QUEUE(); + BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(); #ifdef DEBUG BAMBOO_DEBUGPRINT(0xf000); #endif @@ -342,7 +344,7 @@ void checkCoreStatus() { BAMBOO_DEBUGPRINT(0xee04); BAMBOO_DEBUGPRINT_REG(waitconfirm); #endif - BAMBOO_START_CRITICAL_SECTION_STATUS(); + BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT(); #ifdef DEBUG BAMBOO_DEBUGPRINT(0xf001); #endif @@ -391,7 +393,7 @@ void checkCoreStatus() { for(i = 1; i < NUMCORESACTIVE; ++i) { corestatus[i] = 1; // send status confirm msg to core i - send_msg_1(i, STATUSCONFIRM, false); + send_msg_1(i, STATUSCONFIRM); } // for(i = 1; i < NUMCORESACTIVE; ++i) waitconfirm = true; numconfirm = NUMCORESACTIVE - 1; @@ -414,18 +416,18 @@ void checkCoreStatus() { // profile mode, send msgs to other cores to request pouring // out progiling data #ifdef PROFILE - BAMBOO_CLOSE_CRITICAL_SECTION_STATUS(); + BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(); #ifdef DEBUG BAMBOO_DEBUGPRINT(0xf000); #endif for(i = 1; i < NUMCORESACTIVE; ++i) { // send profile request msg to core i - send_msg_2(i, PROFILEOUTPUT, totalexetime, false); + send_msg_2(i, PROFILEOUTPUT, totalexetime); } // for(i = 1; i < NUMCORESACTIVE; ++i) // pour profiling data on startup core outputProfileData(); while(true) { - BAMBOO_START_CRITICAL_SECTION_STATUS(); + BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT(); #ifdef DEBUG BAMBOO_DEBUGPRINT(0xf001); #endif @@ -446,7 +448,7 @@ void checkCoreStatus() { } // for(i = 0; i < NUMCORESACTIVE; ++i) if(!allStall) { int halt = 100; - BAMBOO_CLOSE_CRITICAL_SECTION_STATUS(); + BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(); #ifdef DEBUG BAMBOO_DEBUGPRINT(0xf000); #endif @@ -484,7 +486,7 @@ void checkCoreStatus() { waitconfirm = false; numconfirm = 0; } // if(allStall) - BAMBOO_CLOSE_CRITICAL_SECTION_STATUS(); + BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(); #ifdef DEBUG BAMBOO_DEBUGPRINT(0xf000); #endif @@ -608,7 +610,7 @@ inline void run(void * arg) { #endif // send stall msg send_msg_4(STARTUPCORE, TRANSTALL, BAMBOO_NUM_OF_CORE, - self_numsendobjs, self_numreceiveobjs, false); + self_numsendobjs, self_numreceiveobjs); sendStall = true; isfirst = true; busystatus = false; @@ -1356,7 +1358,7 @@ void * globalmalloc_I(int coren, if(foundsmem == 1) { // find suitable block mem = gcbaseva+bamboo_smemtbl[tofindb]+((tofindb 0) { BAMBOO_DEBUGPRINT(msgdata[msgdataindex+i]); @@ -1649,13 +1652,13 @@ INLINE void processmsg_lockrequest_I() { return; } else { // send response msg - // for 32 bit machine, the size is always 4 words + // for 32 bit machine, the size is always 4 words, cache the msg first int tmp = deny==1?LOCKDENY:LOCKGROUNT; - if(isMsgSending) { + //if(isMsgSending) { cache_msg_4(data4, tmp, locktype, data2, data3); - } else { - send_msg_4(data4, tmp, locktype, data2, data3, true); - } + /*} else { + send_msg_4(data4, tmp, locktype, data2, data3); + }*/ } } @@ -1750,14 +1753,14 @@ INLINE void processmsg_redirectlock_I() { return; } else { // send response msg - // for 32 bit machine, the size is always 4 words - if(isMsgSending) { + // for 32 bit machine, the size is always 4 words, cache the msg first + //if(isMsgSending) { cache_msg_4(data4, deny==1?REDIRECTDENY:REDIRECTGROUNT, data1, data2, data3); - } else { + /*} else { send_msg_4(data4, deny==1?REDIRECTDENY:REDIRECTGROUNT, - data1, data2, data3, true); - } + data1, data2, data3); + }*/ } } @@ -1850,11 +1853,12 @@ INLINE void processmsg_profileoutput_I() { totalexetime = msgdata[msgdataindex]; //[1] MSG_INDEXINC_I(); outputProfileData(); - if(isMsgSending) { + // cache the msg first + //if(isMsgSending) { cache_msg_2(STARTUPCORE, PROFILEFINISH, BAMBOO_NUM_OF_CORE); - } else { - send_msg_2(STARTUPCORE, PROFILEFINISH, BAMBOO_NUM_OF_CORE, true); - } + /*} else { + send_msg_2(STARTUPCORE, PROFILEFINISH, BAMBOO_NUM_OF_CORE); + }*/ } INLINE void processmsg_profilefinish_I() { @@ -1888,15 +1892,16 @@ INLINE void processmsg_statusconfirm_I() { BAMBOO_DEBUGPRINT(0xe887); #endif #endif - if(isMsgSending) { + // cache the msg first + //if(isMsgSending) { cache_msg_5(STARTUPCORE, STATUSREPORT, busystatus?1:0, BAMBOO_NUM_OF_CORE, self_numsendobjs, self_numreceiveobjs); - } else { + /*} else { send_msg_5(STARTUPCORE, STATUSREPORT, busystatus?1:0, BAMBOO_NUM_OF_CORE, self_numsendobjs, - self_numreceiveobjs, true); - } + self_numreceiveobjs); + }*/ } } @@ -1965,23 +1970,24 @@ INLINE void processmsg_memrequest_I() { if(gcprocessing) { // is currently doing gc, dump this msg if(INITPHASE == gcphase) { - // if still in the initphase of gc, send a startinit msg again - if(isMsgSending) { + // if still in the initphase of gc, send a startinit msg again, + // cache the msg first + //if(isMsgSending) { cache_msg_1(data2, GCSTARTINIT); - } else { - send_msg_1(data2, GCSTARTINIT, true); - } + /*} else { + send_msg_1(data2, GCSTARTINIT); + }*/ } } else { #endif mem = smemalloc_I(data2, data1, &allocsize); if(mem != NULL) { - // send the start_va to request core - if(isMsgSending) { + // send the start_va to request core, cache the msg first + //if(isMsgSending) { cache_msg_3(data2, MEMRESPONSE, mem, allocsize); - } else { - send_msg_3(data2, MEMRESPONSE, mem, allocsize, true); - } + /*} else { + send_msg_3(data2, MEMRESPONSE, mem, allocsize); + }*/ } // if mem == NULL, the gcflag of the startup core has been set // and the gc should be started later, then a GCSTARTINIT msg // will be sent to the requesting core to notice it to start gc @@ -2134,11 +2140,12 @@ INLINE void processmsg_gcfinishcompact_I() { int tomove = 0; int dstcore = 0; if(gcfindSpareMem_I(&startaddr, &tomove, &dstcore, data4, cnum)) { - if(isMsgSending) { + // cache the msg first + //if(isMsgSending) { cache_msg_4(cnum, GCMOVESTART, dstcore, startaddr, tomove); - } else { - send_msg_4(cnum, GCMOVESTART, dstcore, startaddr, tomove, true); - } + /*} else { + send_msg_4(cnum, GCMOVESTART, dstcore, startaddr, tomove); + }*/ } } else { gccorestatus[cnum] = 0; @@ -2170,16 +2177,16 @@ INLINE void processmsg_gcmarkconfirm_I() { // wrong core to receive such msg BAMBOO_EXIT(0xb005); } else { - // send response msg - if(isMsgSending) { + // send response msg, cahce the msg first + //if(isMsgSending) { cache_msg_5(STARTUPCORE, GCMARKREPORT, BAMBOO_NUM_OF_CORE, gcbusystatus, gcself_numsendobjs, gcself_numreceiveobjs); - } else { + /*} else { send_msg_5(STARTUPCORE, GCMARKREPORT, BAMBOO_NUM_OF_CORE, gcbusystatus, gcself_numsendobjs, - gcself_numreceiveobjs, true); - } + gcself_numreceiveobjs); + }*/ } } @@ -2265,15 +2272,15 @@ INLINE void processmsg_gcmaprequest_I() { /*if(isMsgSending) { cache_msg_3(msgdata[2], GCMAPINFO, msgdata[1], msgdata[1]); } else { - send_msg_3(msgdata[2], GCMAPINFO, msgdata[1], msgdata[1], true); + send_msg_3(msgdata[2], GCMAPINFO, msgdata[1], msgdata[1]); }*/ } else { - // send back the mapping info - if(isMsgSending) { + // send back the mapping info, cache the msg first + //if(isMsgSending) { cache_msg_3(data2, GCMAPINFO, data1, (int)dstptr); - } else { - send_msg_3(data2, GCMAPINFO, data1, (int)dstptr, true); - } + /*} else { + send_msg_3(data2, GCMAPINFO, data1, (int)dstptr); + }*/ } #ifdef GC_PROFILE flushstalltime_i += BAMBOO_GET_EXE_TIME()-ttimei; @@ -2366,17 +2373,17 @@ INLINE void processmsg_gclobjmapping_I() { // 3--received a lock Msg // RAW version: -1 -- received nothing // otherwise -- received msg type -int receiveObject() { +int receiveObject(int send_port_pending) { msg: // get the incoming msgs - if(receiveMsg() == -1) { + if(receiveMsg(send_port_pending) == -1) { return -1; } processmsg: // processing received msgs int size = 0; MSG_REMAINSIZE_I(&size); - if(checkMsgLength_I(size) == -1) { + if((size == 0) || (checkMsgLength_I(size) == -1)) { // not a whole msg // have new coming msg if(BAMBOO_MSG_AVAIL() != 0) { @@ -2840,7 +2847,7 @@ int containstag(struct ___Object___ *ptr, void releasewritelock_r(void * lock, void * redirectlock) { int targetcore = 0; int reallock = (int)lock; - targetcore = (reallock >> 5) % BAMBOO_TOTALCORE; + targetcore = (reallock >> 5) % NUMCORES; #ifdef DEBUG BAMBOO_DEBUGPRINT(0xe671); @@ -2850,7 +2857,7 @@ void releasewritelock_r(void * lock, void * redirectlock) { #endif if(targetcore == BAMBOO_NUM_OF_CORE) { - BAMBOO_START_CRITICAL_SECTION_LOCK(); + BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT(); #ifdef DEBUG BAMBOO_DEBUGPRINT(0xf001); #endif @@ -2875,7 +2882,7 @@ void releasewritelock_r(void * lock, void * redirectlock) { BAMBOO_DEBUGPRINT_REG(lockvalue->value); #endif } - BAMBOO_CLOSE_CRITICAL_SECTION_LOCK(); + BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(); #ifdef DEBUG BAMBOO_DEBUGPRINT(0xf000); #endif @@ -2884,7 +2891,7 @@ void releasewritelock_r(void * lock, void * redirectlock) { // send lock release with redirect info msg // for 32 bit machine, the size is always 4 words send_msg_4(targetcore, REDIRECTRELEASE, 1, (int)lock, - (int)redirectlock, false); + (int)redirectlock); } } #endif @@ -2992,7 +2999,7 @@ newtask: BAMBOO_DEBUGPRINT_REG((int)(runtime_locks[i].value)); #endif getwritelock(lock); - BAMBOO_START_CRITICAL_SECTION(); + BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT(); #ifdef DEBUG BAMBOO_DEBUGPRINT(0xf001); #endif @@ -3000,7 +3007,7 @@ newtask: //isInterrupt = false; #endif while(!lockflag) { - BAMBOO_WAITING_FOR_LOCK(); + BAMBOO_WAITING_FOR_LOCK(0); } #ifndef INTERRUPT if(reside) { @@ -3020,7 +3027,7 @@ newtask: #ifdef PROFILE //isInterrupt = true; #endif - BAMBOO_CLOSE_CRITICAL_SECTION(); + BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(); #ifdef DEBUG BAMBOO_DEBUGPRINT(0xf000); #endif diff --git a/Robust/src/Runtime/runtime.h b/Robust/src/Runtime/runtime.h index 37af2b7e..02ef5503 100644 --- a/Robust/src/Runtime/runtime.h +++ b/Robust/src/Runtime/runtime.h @@ -151,7 +151,7 @@ inline void run(void * arg); #ifdef MULTICORE_GC inline void setupsmemmode(void); #endif -int receiveObject(void); +int receiveObject(int send_port_pending); void flagorand(void * ptr, int ormask, int andmask, struct parameterwrapper ** queues, int length); void flagorandinit(void * ptr, int ormask, int andmask); void enqueueObject(void * ptr, struct parameterwrapper ** queues,int length); diff --git a/Robust/src/buildscript b/Robust/src/buildscript index 49dfc33e..c6621f58 100755 --- a/Robust/src/buildscript +++ b/Robust/src/buildscript @@ -22,6 +22,29 @@ echo -abortreaders abort readers immediately echo -trueprob double - probabiltiy of true branch echo -dsmcaching -enable caching in dsm runtime echo +echo BAMBOO Multicore options +echo -scheduling do task scheduling +echo -multicore generate multi-core version binary +echo "-numcore set the number of cores (should be used together with -multicore), defaultly set as 1" +echo "-cacheflush enable cache flush in raw version binary (should be used togethere with -raw)" +echo "-interrupt generate raw version binary with interruption (should be used togethere with -raw)" +echo "-rawpath print out execute path information for raw version (should be used together with -raw)" +echo "-useprofile use profiling data for scheduling (should be used together with -raw)" +echo -printscheduling print out scheduling graphs +echo -printschedulesim print out scheduling simulator result graphs +echo -abcclose close the array boundary check +echo "-tilera_bme generate tilera version binary for Bare Mental Environment (should be used together with -multicore" +echo "-tilera_zlinux generate tilera version binary for Zero-Overhead Linux (should be used together with -multicore" +echo "-tileraconfig config tilera simulator/pci as nxm (should be used together with -tilera)" +echo "-raw generate raw version binary (should be used together with -multicore)" +echo "-rawconfig config raw simulator as 4xn (should be used together with -raw)" +echo -threadsimulate generate multi-thread simulate version binary +echo -multicoregc generate multi-core binary with garbage collection +echo "-numcore4gc set the number of cores for gc (should be used together with -multicoregc), defaultly set as 0" +echo -gcprofile build with gcprofile options +echo -accurateprofile build with accurate profile information including pre/post task processing info +echo "-useio use standard io to output profiling data (should be used together with -raw and -profile), it only works with single core version" +echo echo Other options echo -builddir setup different build directory echo -robustroot set up the ROBUSTROOT to directory other than default one @@ -40,23 +63,6 @@ echo -selfloop task - this task cannot self loop forever echo "-excprefetch methoddescriptor - exclude prefetches for this method (specified as class.method)" echo -taskstate do task state analysis echo -tagstate do tag state analysis -echo -scheduling do task scheduling -echo -multicore generate multi-core version binary -echo "-numcore set the number of cores (should be used together with -multicore), defaultly set as 1" -echo "-cacheflush enable cache flush in raw version binary (should be used togethere with -raw)" -echo "-interrupt generate raw version binary with interruption (should be used togethere with -raw)" -echo "-rawpath print out execute path information for raw version (should be used together with -raw)" -echo "-useprofile use profiling data for scheduling (should be used together with -raw)" -echo -printscheduling print out scheduling graphs -echo -printschedulesim print out scheduling simulator result graphs -echo -abcclose close the array boundary check -echo "-tilera generate tilera version binary (should be used together with -multicore" -echo "-tileraconfig config tilera simulator/pci as nxm (should be used together with -tilera)" -echo "-raw generate raw version binary (should be used together with -multicore)" -echo "-rawconfig config raw simulator as 4xn (should be used together with -raw)" -echo -threadsimulate generate multi-thread simulate version binary -echo -multicoregc generate multi-core binary with garbage collection -echo "-numcore4gc set the number of cores for gc (should be used together with -multicoregc), defaultly set as 0" echo -optional enable optional echo -debug generate debug symbols echo -prefetch do prefetch analysis @@ -72,9 +78,6 @@ echo -o binary echo -nojava do not run bristlecone compiler echo -instructionfailures inject code for instructionfailures echo -profile build with profile options -echo -gcprofile build with gcprofile options -echo -accurateprofile build with accurate profile information including pre/post task processing info -echo "-useio use standard io to output profiling data (should be used together with -raw and -profile), it only works with single core version" echo "-enable-assertions execute assert statements during compilation" echo -justanalyze exit after compiler analyses complete echo "-distributioninfo execute to collect distribution info for simulated annealing in multi-core version" @@ -112,6 +115,8 @@ MLPDEBUG=false MULTICOREFLAG=false RAWFLAG=false TILERAFLAG=false +TILERABMEFLAG=false +TILERAZLINUXFLAG=false TILERACONFIG='' CACHEFLUSHFLAG=false RAWCONFIG='' @@ -314,9 +319,14 @@ elif [[ $1 = '-raw' ]] then RAWFLAG=true JAVAOPTS="$JAVAOPTS -raw" -elif [[ $1 = '-tilera' ]] +elif [[ $1 = '-tilera_bme' ]] +then +TILERAFLAG=true +TILERABMEFLAG=true +elif [[ $1 = '-tilera_zlinux' ]] then TILERAFLAG=true +TILERAZLINUXFLAG=true elif [[ $1 = '-tileraconfig' ]] then TILERACONFIG="$2" @@ -644,9 +654,17 @@ make elif $TILERAFLAG then # TILERAFLAG TILERADIR="$CURDIR/tilera" +if $TILERABMEFLAG +then # TILERABMEFLAG +TILERA_INDIR="BME" MAKEFILE="Makefile.tilera.$TILERACONFIG" SIMHVC="sim.hvc.$TILERACONFIG" PCIHVC="pci.hvc.$TILERACONFIG" +elif $TILERAZLINUXFLAG +then # TILERAZLINUXFLAG +TILERA_INDIR="ZLINUX" +MAKEFILE="Makefile.tilera.$TILERACONFIG" +fi mkdir $TILERADIR cd $TILERADIR make clean @@ -654,6 +672,14 @@ rm ./* export TILERACFLAGS="-DTASK -DMULTICORE -DCLOSE_PRINT -DTILERA" +if $TILERABMEFLAG +then # TILERABMEFLAG +TILERACFLAGS="${TILERACFLAGS} -DTILERA_BME" +elif $TILERAZLINUXFLAG +then # TILERAZLINUXFLAG +TILERACFLAGS="${TILERACFLAGS} -DTILERA_ZLINUX" +fi + if $CACHEFLUSHFLAG then # print path TILERACFLAGS="${TILERACFLAGS} -DCACHEFLUSH" @@ -699,10 +725,13 @@ then # GC_PROFILE version TILERACFLAGS="${TILERACFLAGS} -DGC_PROFILE" fi -cp $ROBUSTROOT/Tilera/Runtime/$MAKEFILE ./Makefile -cp $ROBUSTROOT/Tilera/Runtime/$SIMHVC ./sim.hvc -cp $ROBUSTROOT/Tilera/Runtime/$PCIHVC ./pci.hvc -cp $ROBUSTROOT/Tilera/Runtime/bamboo-vmlinux-pci.hvc ./bamboo-vmlinux-pci.hvc +cp $ROBUSTROOT/Tilera/Runtime/$TILERA_INDIR/$MAKEFILE ./Makefile +if $TILERABMEFLAG +then # TILERABMEFLAG +cp $ROBUSTROOT/Tilera/Runtime/$TILERA_INDIR/$SIMHVC ./sim.hvc +cp $ROBUSTROOT/Tilera/Runtime/$TILERA_INDIR/$PCIHVC ./pci.hvc +cp $ROBUSTROOT/Tilera/Runtime/$TILERA_INDIR/bamboo-vmlinux-pci.hvc ./bamboo-vmlinux-pci.hvc +fi cp ../Runtime/multicoretask.c ./ cp ../Runtime/multicoreruntime.c ./ cp ../Runtime/Queue.c ./ @@ -730,13 +759,16 @@ cp ../Runtime/multicorehelper.h ./ cp ../Runtime/MGCHash.h ./ cp ../Tilera/Runtime/*.c ./ cp ../Tilera/Runtime/*.h ./ +cp ../Tilera/Runtime/$TILERA_INDIR/*.c ./ +cp ../Tilera/Runtime/$TILERA_INDIR/*.h ./ +cp ../Tilera/Runtime/$TILERA_INDIR/*.S ./ cp ../Tilera/lib/* ./ cp ../$tmpbuilddirectory/*.c ./ cp ../$tmpbuilddirectory/*.h ./ make -else #!RAWFLAG && !TILERAFLAG +else #!RAWFLAG && !TILERABMEFLAG && ! TILERAZLINUXFLAG cd $CURDIR INCLUDES="$INCLUDES -I$ROBUSTROOT/Runtime -I. -IRuntime/include \ -- 2.34.1