From: jzhou Date: Wed, 10 Feb 2010 17:22:25 +0000 (+0000) Subject: bug fixing in multicore gc and add profiling code for gc X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=8581fa3cc30cf51e595c8e3c67a7208c3725bf48;p=IRC.git bug fixing in multicore gc and add profiling code for gc --- diff --git a/Robust/src/Analysis/Scheduling/ScheduleAnalysis.java b/Robust/src/Analysis/Scheduling/ScheduleAnalysis.java index 4334cb2d..0da216fe 100644 --- a/Robust/src/Analysis/Scheduling/ScheduleAnalysis.java +++ b/Robust/src/Analysis/Scheduling/ScheduleAnalysis.java @@ -419,7 +419,8 @@ public class ScheduleAnalysis { (cdname.equals("KMeans")) || (cdname.equals("ZTransform")) || (cdname.equals("TestRunner")) || - (cdname.equals("LinkList"))) { + (cdname.equals("LinkList")) || + (cdname.equals("BHRunner"))) { newRate = this.coreNum; } else if(cdname.equals("SentenceParser")) { newRate = 4; diff --git a/Robust/src/Runtime/MGCHash.c b/Robust/src/Runtime/MGCHash.c index 2155f673..97d8c890 100755 --- a/Robust/src/Runtime/MGCHash.c +++ b/Robust/src/Runtime/MGCHash.c @@ -66,7 +66,7 @@ void mgchashreset() { tmpptr=next; } } else {*/ - memset(mgc_table, '\0', sizeof(mgchashlistnode_t)*mgc_size); + BAMBOO_MEMSET_WH(mgc_table, '\0', sizeof(mgchashlistnode_t)*mgc_size); //} while(mgc_structs->next!=NULL) { mgcliststruct_t *next=mgc_structs->next; @@ -328,7 +328,7 @@ struct MGCHash * allocateMGCHash(int size, thisvar->bucket = (struct MGCNode *) RUNMALLOC(sizeof(struct MGCNode)*size); // zero out all the buckets - memset(thisvar->bucket, '\0', sizeof(struct MGCNode)*size); + BAMBOO_MEMSET_WH(thisvar->bucket, '\0', sizeof(struct MGCNode)*size); //Set data counts thisvar->num4conflicts = conflicts; return thisvar; diff --git a/Robust/src/Runtime/mem.c b/Robust/src/Runtime/mem.c index 3ae21fbe..9cc3da92 100644 --- a/Robust/src/Runtime/mem.c +++ b/Robust/src/Runtime/mem.c @@ -51,8 +51,8 @@ memalloc: BAMBOO_CLOSE_CRITICAL_SECTION_MEM(); void * alignedp = (void *)(BAMBOO_CACHE_LINE_SIZE+((int)p-1)&(~BAMBOO_CACHE_LINE_MASK)); - memset(p, -2, (alignedp - p)); - memset(alignedp + size, -2, p + isize - alignedp - size); + BAMBOO_MEMSET_WH(p, -2, (alignedp - p)); + BAMBOO_MEMSET_WH(alignedp + size, -2, p + isize - alignedp - size); return alignedp; } #else diff --git a/Robust/src/Runtime/multicoregarbage.c b/Robust/src/Runtime/multicoregarbage.c index ce6f6537..20a86eb5 100644 --- a/Robust/src/Runtime/multicoregarbage.c +++ b/Robust/src/Runtime/multicoregarbage.c @@ -99,12 +99,6 @@ inline void dumpSMem() { coren = gc_block2core[block%(NUMCORES4GC*2)]; } // compute core coordinate - /*int tmpcore = coren; - if((NUMCORES4GC==62) && (tmpcore > 5)) { - tmpcore+=2; - } - x = tmpcore/bamboo_width; - y = tmpcore%bamboo_width;*/ x = bamboo_cpu2coords[coren*2]; y = bamboo_cpu2coords[coren*2+1]; tprintf("==== %d, %d : core (%d,%d), saddr %x====\n", @@ -394,6 +388,17 @@ inline bool gc_checkCoreStatus() { return allStall; } +inline bool gc_checkAllCoreStatus() { + bool allStall = true; + for(int i = 0; i < NUMCORESACTIVE; ++i) { + if(gccorestatus[i] != 0) { + allStall = false; + break; + } // if(gccorestatus[i] != 0) + } // for(i = 0; i < NUMCORESACTIVE; ++i) + return allStall; +} + inline void checkMarkStatue() { #ifdef DEBUG BAMBOO_DEBUGPRINT(0xee01); @@ -409,7 +414,7 @@ inline void checkMarkStatue() { gcnumsendobjs[BAMBOO_NUM_OF_CORE] = gcself_numsendobjs; gcnumreceiveobjs[BAMBOO_NUM_OF_CORE] = gcself_numreceiveobjs; // check the status of all cores - bool allStall = gc_checkCoreStatus(); + bool allStall = gc_checkAllCoreStatus(); #ifdef DEBUG BAMBOO_DEBUGPRINT(0xee03); #endif @@ -427,26 +432,26 @@ inline void checkMarkStatue() { // reset the corestatus array too gccorestatus[BAMBOO_NUM_OF_CORE] = 1; waitconfirm = true; - numconfirm = NUMCORES4GC - 1; - for(i = 1; i < NUMCORES4GC; ++i) { + numconfirm = NUMCORESACTIVE - 1; + for(i = 1; i < NUMCORESACTIVE; ++i) { gccorestatus[i] = 1; // send mark phase finish confirm request msg to core i send_msg_1(i, GCMARKCONFIRM, false); - } // for(i = 1; i < NUMCORES4GC; ++i) + } // for(i = 1; i < NUMCORESACTIVE; ++i) } else { // check if the sum of send objs and receive obj are the same // yes->check if the info is the latest; no->go on executing int sumsendobj = 0; - for(i = 0; i < NUMCORES4GC; ++i) { + for(i = 0; i < NUMCORESACTIVE; ++i) { sumsendobj += gcnumsendobjs[i]; - } // for(i = 0; i < NUMCORES4GC; ++i) + } // for(i = 0; i < NUMCORESACTIVE; ++i) #ifdef DEBUG BAMBOO_DEBUGPRINT(0xee06); BAMBOO_DEBUGPRINT_REG(sumsendobj); #endif - for(i = 0; i < NUMCORES4GC; ++i) { + for(i = 0; i < NUMCORESACTIVE; ++i) { sumsendobj -= gcnumreceiveobjs[i]; - } // for(i = 0; i < NUMCORES4GC; ++i) + } // for(i = 0; i < NUMCORESACTIVE; ++i) #ifdef DEBUG BAMBOO_DEBUGPRINT(0xee07); BAMBOO_DEBUGPRINT_REG(sumsendobj); @@ -459,9 +464,9 @@ inline void checkMarkStatue() { // stop mark phase gcphase = COMPACTPHASE; // restore the gcstatus for all cores - for(i = 0; i < NUMCORES4GC; ++i) { + for(i = 0; i < NUMCORESACTIVE; ++i) { gccorestatus[i] = 1; - } // for(i = 0; i < NUMCORES4GC; ++i) + } // for(i = 0; i < NUMCORESACTIVE; ++i) } // if(0 == sumsendobj) } // if(!gcwaitconfirm) else() } // if(allStall) @@ -558,6 +563,11 @@ inline void initGC() { gcfilledblocks[i] = 0; gcstopblock[i] = 0; } // for(i = 0; i < NUMCORES4GC; ++i) + for(i = NUMCORES4GC; i < NUMCORESACTIVE; ++i) { + gccorestatus[i] = 1; + gcnumsendobjs[i] = 0; + gcnumreceiveobjs[i] = 0; + } gcheaptop = 0; gctopcore = 0; gctopblock = 0; @@ -606,8 +616,6 @@ inline void initGC() { freeMGCHash(gcforwardobjtbl); gcforwardobjtbl = allocateMGCHash(20, 3); - - memset(gcsmemtbl, '\0', sizeof(int)*gcnumblock); } // void initGC() // compute load balance for all cores @@ -733,6 +741,7 @@ inline bool cacheLObjs() { if((int)dst < (int)(gclobjtail2->lobjs[gclobjtailindex2])+size) { memmove(dst, gclobjtail2->lobjs[gclobjtailindex2], size); } else { + //BAMBOO_WRITE_HINT_CACHE(dst, size); memcpy(dst, gclobjtail2->lobjs[gclobjtailindex2], size); } #ifdef DEBUG @@ -750,7 +759,7 @@ inline bool cacheLObjs() { // NOTE: the free mem chunks should be maintained in an ordered linklist // the listtop param always specify current list tail -// update the gcsmemtbl to record current shared mem usage +// update the bmmboo_smemtbl to record current shared mem usage void updateSmemTbl(int coren, int localtop) { int ltopcore = 0; @@ -766,10 +775,10 @@ void updateSmemTbl(int coren, do{ toset = gc_core2block[2*coren+i]+(NUMCORES4GC*2)*j; if(toset < ltopcore) { - gcsmemtbl[toset]= + bamboo_smemtbl[toset]= (tosetnext == NULL) { - if(bamboo_free_mem_list->backuplist != NULL) { - tochange->next = bamboo_free_mem_list->backuplist; - bamboo_free_mem_list->backuplist = NULL; - } else { - tochange->next = - (struct freeMemItem *)RUNMALLOC(sizeof(struct freeMemItem)); - } - } // if(tochange->next == NULL) - tochange = tochange->next; - } else { - *sethead = true; - } // if(sethead) - tochange->ptr = ptr; - tochange->size = size; - BLOCKINDEX(ptr, &(tochange->startblock)); - BLOCKINDEX(ptr+size-1, &(tochange->endblock)); - // zero out all these spare memory - // note that, leave the mem starting from heaptop, as it caches large objs - // zero out these cache later when moving large obj - { - INTPTR tmp = tochange->ptr; - unsigned long long int size = tochange->size; - while(size > 0) { - int tsize = size>1024*1024*1024?1024*1024*1024:size; - memset(tmp, '\0', tsize); - size -= tsize; - tmp += tsize; - } - } - return tochange; -} // struct freeMemItem * addFreeMemItem(int,int,struct freeMemItem*,bool*, int) - inline void moveLObjs() { #ifdef DEBUG BAMBOO_DEBUGPRINT(0xea01); #endif + // zero out the smemtbl + BAMBOO_MEMSET_WH(bamboo_smemtbl, 0, sizeof(int)*gcnumblock); // find current heap top // flush all gcloads to indicate the real heap top on one core // previous it represents the next available ptr on a core @@ -839,7 +811,7 @@ inline void moveLObjs() { #ifdef DEBUG BAMBOO_DEBUGPRINT(0xea02); BAMBOO_DEBUGPRINT_REG(gcloads[0]); - BAMBOO_DEBUGPRINT_REG(gcsmemtbl[0]); + BAMBOO_DEBUGPRINT_REG(bamboo_smemtbl[0]); #endif for(int i = 1; i < NUMCORES4GC; i++) { int tmptop = 0; @@ -871,14 +843,14 @@ inline void moveLObjs() { int bound = 0; int i = 0; for(i = gcnumblock-1; i >= 0; i--) { - if(gcsmemtbl[i] > 0) { + if(bamboo_smemtbl[i] > 0) { break; } } if(i == -1) { tmpheaptop = gcbaseva; } else { - tmpheaptop = gcbaseva+gcsmemtbl[i]+((i 0) { // close current block, fill its header - memset(base, '\0', BAMBOO_CACHE_LINE_SIZE); + BAMBOO_MEMSET_WH(base, '\0', BAMBOO_CACHE_LINE_SIZE); *((int*)base) = cpysize + BAMBOO_CACHE_LINE_SIZE; - gcsmemtbl[b]+=BAMBOO_CACHE_LINE_SIZE; // add the size of the header + bamboo_smemtbl[b]+=BAMBOO_CACHE_LINE_SIZE; // add the size of the header cpysize = 0; base = tmpheaptop; if(remain == 0) { @@ -948,12 +920,13 @@ inline void moveLObjs() { if((int)gcheaptop < (int)(tmpheaptop)+size) { memmove(tmpheaptop, gcheaptop, size); } else { + //BAMBOO_WRITE_HINT_CACHE(tmpheaptop, size); memcpy(tmpheaptop, gcheaptop, size); } // fill the remaining space with -2 padding - memset(tmpheaptop+size, -2, isize-size); + BAMBOO_MEMSET_WH(tmpheaptop+size, -2, isize-size); // zero out original mem caching the lobj - memset(gcheaptop, '\0', size); + BAMBOO_MEMSET_WH(gcheaptop, '\0', size); #ifdef DEBUG BAMBOO_DEBUGPRINT(0xea05); BAMBOO_DEBUGPRINT_REG(gcheaptop); @@ -987,7 +960,7 @@ inline void moveLObjs() { } // if(host == BAMBOO_NUM_OF_CORE) else ... tmpheaptop += isize; - // set the gcsbstarttbl and gcsmemtbl + // set the gcsbstarttbl and bamboo_smemtbl int tmpsbs = 1+(isize-remain-1)/BAMBOO_SMEM_SIZE; for(int k = 1; k < tmpsbs; k++) { gcsbstarttbl[sb+k] = (INTPTR)(-1); @@ -996,7 +969,7 @@ inline void moveLObjs() { bound = (b 0) { // close current block, fill the header - memset(base, '\0', BAMBOO_CACHE_LINE_SIZE); + BAMBOO_MEMSET_WH(base, '\0', BAMBOO_CACHE_LINE_SIZE); *((int*)base) = cpysize + BAMBOO_CACHE_LINE_SIZE; - gcsmemtbl[b] += BAMBOO_CACHE_LINE_SIZE; // add the size of the header + bamboo_smemtbl[b] += BAMBOO_CACHE_LINE_SIZE; // add the size of the header } else { tmpheaptop -= BAMBOO_CACHE_LINE_SIZE; } @@ -1091,82 +1065,19 @@ inline void moveLObjs() { BAMBOO_DEBUGPRINT(0xea07); BAMBOO_DEBUGPRINT_REG(gcheaptop); #endif - - // update the free mem list - // create new free mem list according to gcsmemtbl - bool sethead = false; - if(bamboo_free_mem_list->head == NULL) { - bamboo_free_mem_list->head = bamboo_free_mem_list->backuplist; - bamboo_free_mem_list->backuplist = NULL; - } - struct freeMemItem * tochange = bamboo_free_mem_list->head; - if(tochange == NULL) { - bamboo_free_mem_list->head = tochange = - (struct freeMemItem *)RUNMALLOC(sizeof(struct freeMemItem)); - tochange->next = NULL; - } - int startptr = 0; - size = 0; - bound = BAMBOO_SMEM_SIZE_L; - for(i = 0; i < gcnumblock-bamboo_reserved_smem; i++) { - if(gcsmemtbl[i] < bound) { - if(gcsmemtbl[i] == 0) { - // blank one - if(startptr == 0) { - // a start of a new free mem chunk - startptr = gcbaseva+((inext != NULL) { - struct freeMemItem * blist = NULL; - if(bamboo_free_mem_list->backuplist != NULL) { - blist = tochange->next; + + bamboo_free_block = 0; + int tbound = 0; + do { + tbound = (bamboo_free_blockbackuplist = tochange->next; - blist = bamboo_free_mem_list->backuplist->next; - bamboo_free_mem_list->backuplist->next = NULL; + // the first non-full partition + break; } - tochange->next = NULL; - while(blist != NULL) { - struct freeMemItem * tmp = blist; - blist = blist->next; - RUNFREE(tmp); - } // if(blist != NULL) - } - + } while(true); #ifdef DEBUG BAMBOO_DEBUGPRINT(0xea08); BAMBOO_DEBUGPRINT_REG(gcheaptop); @@ -1232,9 +1143,6 @@ inline void tomark(struct garbagelist * stackptr) { #endif for(i=0; isize; i++) { if(stackptr->array[i] != NULL) { - //BAMBOO_START_CRITICAL_SECTION(); - //gc_enqueue_I(stackptr->array[i]); - //BAMBOO_CLOSE_CRITICAL_SECTION(); markObj(stackptr->array[i]); } } @@ -1255,9 +1163,6 @@ inline void tomark(struct garbagelist * stackptr) { struct ObjectHash * set=parameter->objectset; struct ObjectNode * ptr=set->listhead; while(ptr!=NULL) { - //BAMBOO_START_CRITICAL_SECTION(); - //gc_enqueue_I((void *)ptr->key); - //BAMBOO_CLOSE_CRITICAL_SECTION(); markObj((void *)ptr->key); ptr=ptr->lnext; } @@ -1271,9 +1176,6 @@ inline void tomark(struct garbagelist * stackptr) { BAMBOO_DEBUGPRINT(0xe504); #endif for(i=0; inumParameters; i++) { - //BAMBOO_START_CRITICAL_SECTION(); - //gc_enqueue_I(currtpd->parameterArray[i]); - //BAMBOO_CLOSE_CRITICAL_SECTION(); markObj(currtpd->parameterArray[i]); } } @@ -1288,9 +1190,6 @@ inline void tomark(struct garbagelist * stackptr) { struct taskparamdescriptor *tpd=ptr->src; int i; for(i=0; inumParameters; i++) { - //BAMBOO_START_CRITICAL_SECTION(); - //gc_enqueue_I(tpd->parameterArray[i]); - //BAMBOO_CLOSE_CRITICAL_SECTION(); markObj(tpd->parameterArray[i]); } ptr=ptr->inext; @@ -1305,9 +1204,6 @@ inline void tomark(struct garbagelist * stackptr) { while(tmpobjptr != NULL) { struct transObjInfo * objInfo = (struct transObjInfo *)(tmpobjptr->objectptr); - //BAMBOO_START_CRITICAL_SECTION(); - //gc_enqueue_I(objInfo->objptr); - //BAMBOO_CLOSE_CRITICAL_SECTION(); markObj(objInfo->objptr); tmpobjptr = getNextQueueItem(tmpobjptr); } @@ -1320,9 +1216,6 @@ inline void tomark(struct garbagelist * stackptr) { while(item != NULL) { struct transObjInfo * totransobj = (struct transObjInfo *)(item->objectptr); - //BAMBOO_START_CRITICAL_SECTION(); - //gc_enqueue_I(totransobj->objptr); - //BAMBOO_CLOSE_CRITICAL_SECTION(); markObj(totransobj->objptr); item = getNextQueueItem(item); } // while(item != NULL) @@ -1332,10 +1225,8 @@ inline void tomark(struct garbagelist * stackptr) { #endif // enqueue lock related info for(i = 0; i < runtime_locklen; ++i) { - //gc_enqueue_I((void *)(runtime_locks[i].redirectlock)); markObj((void *)(runtime_locks[i].redirectlock)); if(runtime_locks[i].value != NULL) { - //gc_enqueue_I((void *)(runtime_locks[i].value)); markObj((void *)(runtime_locks[i].value)); } } @@ -1596,10 +1487,6 @@ inline void compact2Heaptop() { BAMBOO_DEBUGPRINT_REG(b); BAMBOO_DEBUGPRINT_REG(remain); #endif - /*if((gctopcore == STARTUPCORE) && (b == 0)) { - remain -= gcreservedsb*BAMBOO_SMEM_SIZE; - p += gcreservedsb*BAMBOO_SMEM_SIZE; - }*/ for(int i = 0; i < NUMCORES4GC; i++) { BAMBOO_START_CRITICAL_SECTION(); if((gccorestatus[i] != 0) && (gcrequiredmems[i] > 0)) { @@ -1771,12 +1658,18 @@ innernextSBlock: orig->blockbase = orig->base; orig->sblockindex = (orig->blockbase-BAMBOO_BASE_VA)/BAMBOO_SMEM_SIZE; sbchanged = true; + int blocknum = 0; + BLOCKINDEX(orig->base, &blocknum); + if(bamboo_smemtbl[blocknum] == 0) { + // goto next block + goto innernextSBlock; + } } else if(0 == (orig->blockbase%BAMBOO_SMEM_SIZE)) { orig->sblockindex += 1; sbchanged = true; } // if((orig->blockbase >= orig->bound) || (orig->ptr >= orig->bound)... - // check if this sblock should be omitted or have special start point + // check if this sblock should be skipped or have special start point if(gcsbstarttbl[orig->sblockindex] == -1) { // goto next sblock #ifdef DEBUG @@ -1951,10 +1844,10 @@ innermoveobj: // check to see if remaining space is enough if(to->top + isize > to->bound) { // fill 0 indicating the end of this block - memset(to->ptr, '\0', to->bound - to->top); + BAMBOO_MEMSET_WH(to->ptr, '\0', to->bound - to->top); // fill the header of this block and then go to next block to->offset += to->bound - to->top; - memset(to->base, '\0', BAMBOO_CACHE_LINE_SIZE); + BAMBOO_MEMSET_WH(to->base, '\0', BAMBOO_CACHE_LINE_SIZE); (*((int*)(to->base))) = to->offset; nextBlock(to); if(stopblock == to->numblocks) { @@ -1969,10 +1862,11 @@ innermoveobj: if((int)(orig->ptr) < (int)(to->ptr)+size) { memmove(to->ptr, orig->ptr, size); } else { + //BAMBOO_WRITE_HINT_CACHE(to->ptr, size); memcpy(to->ptr, orig->ptr, size); } // fill the remaining space with -2 - memset(to->ptr+size, -2, isize-size); + BAMBOO_MEMSET_WH(to->ptr+size, -2, isize-size); } // store mapping info BAMBOO_START_CRITICAL_SECTION(); @@ -1981,7 +1875,6 @@ innermoveobj: //MGCHashadd_I(gcpointertbl, orig->ptr, to->ptr); BAMBOO_CLOSE_CRITICAL_SECTION(); //} - #ifdef DEBUG BAMBOO_DEBUGPRINT(0xcdce); BAMBOO_DEBUGPRINT_REG(orig->ptr); @@ -1993,7 +1886,7 @@ innermoveobj: to->top += isize; if(to->top == to->bound) { // fill the header of this block and then go to next block - memset(to->base, '\0', BAMBOO_CACHE_LINE_SIZE); + BAMBOO_MEMSET_WH(to->base, '\0', BAMBOO_CACHE_LINE_SIZE); (*((int*)(to->base))) = to->offset; nextBlock(to); } @@ -2095,7 +1988,7 @@ innercompact: // if no objs have been compact, do nothing, // otherwise, fill the header of this block if(to->offset > BAMBOO_CACHE_LINE_SIZE) { - memset(to->base, '\0', BAMBOO_CACHE_LINE_SIZE); + BAMBOO_MEMSET_WH(to->base, '\0', BAMBOO_CACHE_LINE_SIZE); (*((int*)(to->base))) = to->offset; } else { to->offset = 0; @@ -2248,6 +2141,9 @@ inline void * flushObj(void * objptr) { #ifdef DEBUG BAMBOO_DEBUGPRINT(0xe401); #endif + if(objptr == NULL) { + return NULL; + } void * dstptr = NULL; if(ISSHAREDOBJ(objptr)) { #ifdef DEBUG @@ -2437,8 +2333,10 @@ inline void flush(struct garbagelist * stackptr) { #ifdef DEBUG BAMBOO_DEBUGPRINT_REG(objptr); #endif - ((void **)(((char *)&ao->___length___)+sizeof(int)))[j] = - flushObj(objptr); + if(objptr != NULL) { + ((void **)(((char *)&ao->___length___)+sizeof(int)))[j] = + flushObj(objptr); + } } } else { #ifdef DEBUG @@ -2452,11 +2350,12 @@ inline void flush(struct garbagelist * stackptr) { #endif unsigned int offset=pointer[i]; void * objptr=*((void **)(((char *)ptr)+offset)); - #ifdef DEBUG BAMBOO_DEBUGPRINT_REG(objptr); #endif - *((void **)(((char *)ptr)+offset)) = flushObj(objptr); + if(objptr != NULL) { + *((void **)(((char *)ptr)+offset)) = flushObj(objptr); + } } // for(i=1; i<=size; i++) } // if (pointer==0) else if (((INTPTR)pointer)==1) else () // restore the mark field, indicating that this obj has been flushed @@ -2477,7 +2376,6 @@ inline void flush(struct garbagelist * stackptr) { BAMBOO_DEBUGPRINT(0xe309); #endif void * ptr = gc_lobjdequeue(NULL, NULL); - //if(ISSHAREDOBJ(ptr)) { void * tptr = flushObj(ptr); #ifdef DEBUG BAMBOO_DEBUGPRINT(0xe30a); @@ -2488,8 +2386,7 @@ inline void flush(struct garbagelist * stackptr) { if(tptr != NULL) { ptr = tptr; } - //} - if(/*(!ISSHAREDOBJ(ptr)) || */(((int *)(ptr))[6] == COMPACTED)) { + if(((int *)(ptr))[6] == COMPACTED) { int type = ((int *)(ptr))[0]; // scan all pointers in ptr unsigned INTPTR * pointer; @@ -2518,8 +2415,10 @@ inline void flush(struct garbagelist * stackptr) { #ifdef DEBUG BAMBOO_DEBUGPRINT_REG(objptr); #endif - ((void **)(((char *)&ao->___length___)+sizeof(int)))[j] = - flushObj(objptr); + if(objptr != NULL) { + ((void **)(((char *)&ao->___length___)+sizeof(int)))[j] = + flushObj(objptr); + } } } else { #ifdef DEBUG @@ -2537,13 +2436,13 @@ inline void flush(struct garbagelist * stackptr) { #ifdef DEBUG BAMBOO_DEBUGPRINT_REG(objptr); #endif - *((void **)(((char *)ptr)+offset)) = flushObj(objptr); + if(objptr != NULL) { + *((void **)(((char *)ptr)+offset)) = flushObj(objptr); + } } // for(i=1; i<=size; i++) } // if (pointer==0) else if (((INTPTR)pointer)==1) else () // restore the mark field, indicating that this obj has been flushed - //if(ISSHAREDOBJ(ptr)) { - ((int *)(ptr))[6] = INIT; - //} + ((int *)(ptr))[6] = INIT; } // if(((int *)(ptr))[6] == COMPACTED) } // while(gc_lobjmoreItems()) #ifdef DEBUG @@ -2564,12 +2463,9 @@ inline void flush(struct garbagelist * stackptr) { inline void gc_collect(struct garbagelist * stackptr) { // core collector routine while(true) { - //BAMBOO_START_CRITICAL_SECTION(); if(INITPHASE == gcphase) { - //BAMBOO_CLOSE_CRITICAL_SECTION(); break; } - //BAMBOO_CLOSE_CRITICAL_SECTION(); } #ifdef RAWPATH // TODO GC_DEBUG tprintf("Do initGC\n"); @@ -2578,12 +2474,9 @@ inline void gc_collect(struct garbagelist * stackptr) { //send init finish msg to core coordinator send_msg_2(STARTUPCORE, GCFINISHINIT, BAMBOO_NUM_OF_CORE, false); while(true) { - //BAMBOO_START_CRITICAL_SECTION(); if(MARKPHASE == gcphase) { - //BAMBOO_CLOSE_CRITICAL_SECTION(); break; } - //BAMBOO_CLOSE_CRITICAL_SECTION(); } #ifdef RAWPATH // TODO GC_DEBUG tprintf("Start mark phase\n"); @@ -2597,12 +2490,9 @@ inline void gc_collect(struct garbagelist * stackptr) { tprintf("Finish compact phase\n"); #endif while(true) { - //BAMBOO_START_CRITICAL_SECTION(); if(FLUSHPHASE == gcphase) { - //BAMBOO_CLOSE_CRITICAL_SECTION(); break; } - //BAMBOO_CLOSE_CRITICAL_SECTION(); } #ifdef RAWPATH // TODO GC_DEBUG tprintf("Start flush phase\n"); @@ -2613,12 +2503,57 @@ inline void gc_collect(struct garbagelist * stackptr) { #endif while(true) { - //BAMBOO_START_CRITICAL_SECTION(); if(FINISHPHASE == gcphase) { - //BAMBOO_CLOSE_CRITICAL_SECTION(); break; } - //BAMBOO_CLOSE_CRITICAL_SECTION(); + } +#ifdef RAWPATH // TODO GC_DEBUG + tprintf("Finish gc!\n"); +#endif +} // void gc_collect(struct garbagelist * stackptr) + +inline void gc_nocollect(struct garbagelist * stackptr) { + while(true) { + if(INITPHASE == gcphase) { + break; + } + } +#ifdef RAWPATH // TODO GC_DEBUG + tprintf("Do initGC\n"); +#endif + initGC(); + //send init finish msg to core coordinator + send_msg_2(STARTUPCORE, GCFINISHINIT, BAMBOO_NUM_OF_CORE, false); + while(true) { + if(MARKPHASE == gcphase) { + break; + } + } +#ifdef RAWPATH // TODO GC_DEBUG + tprintf("Start mark phase\n"); +#endif + mark(true, stackptr); +#ifdef RAWPATH // TODO GC_DEBUG + tprintf("Finish mark phase, wait for flush\n"); +#endif + // non-gc core collector routine + while(true) { + if(FLUSHPHASE == gcphase) { + break; + } + } +#ifdef RAWPATH // TODO GC_DEBUG + tprintf("Start flush phase\n"); +#endif + flush(stackptr); +#ifdef RAWPATH // TODO GC_DEBUG + tprintf("Finish flush phase\n"); +#endif + + while(true) { + if(FINISHPHASE == gcphase) { + break; + } } #ifdef RAWPATH // TODO GC_DEBUG tprintf("Finish gc!\n"); @@ -2643,6 +2578,10 @@ inline void gc(struct garbagelist * stackptr) { return; } +#ifdef GC_PROFILE + gc_profileStart(); +#endif + #ifdef RAWPATH // TODO GC_DEBUG tprintf("start gc! \n"); //dumpSMem(); @@ -2652,7 +2591,8 @@ inline void gc(struct garbagelist * stackptr) { waitconfirm = false; waitconfirm = 0; gcphase = INITPHASE; - for(i = 1; i < NUMCORES4GC; i++) { + // Note: all cores need to init gc including non-gc cores + for(i = 1; i < NUMCORESACTIVE/*NUMCORES4GC*/; i++) { // send GC init messages to all cores send_msg_1(i, GCSTARTINIT, false); } @@ -2666,20 +2606,21 @@ inline void gc(struct garbagelist * stackptr) { gccorestatus[BAMBOO_NUM_OF_CORE] = 0; while(true) { - BAMBOO_START_CRITICAL_SECTION(); - if(gc_checkCoreStatus()) { - BAMBOO_CLOSE_CRITICAL_SECTION(); + if(gc_checkAllCoreStatus()) { break; } - BAMBOO_CLOSE_CRITICAL_SECTION(); } +#ifdef GC_PROFILE + gc_profileItem(); +#endif #ifdef RAWPATH // TODO GC_DEBUG tprintf("Start mark phase \n"); #endif // all cores have finished compacting // restore the gcstatus of all cores + // Note: all cores have to do mark including non-gc cores gccorestatus[BAMBOO_NUM_OF_CORE] = 1; - for(i = 1; i < NUMCORES4GC; ++i) { + for(i = 1; i < NUMCORESACTIVE/*NUMCORES4GC*/; ++i) { gccorestatus[i] = 1; // send GC start messages to all cores send_msg_1(i, GCSTART, false); @@ -2697,6 +2638,7 @@ inline void gc(struct garbagelist * stackptr) { checkMarkStatue(); } // while(MARKPHASE == gcphase) // send msgs to all cores requiring large objs info + // Note: only need to ask gc cores, non-gc cores do not host any objs numconfirm = NUMCORES4GC - 1; for(i = 1; i < NUMCORES4GC; ++i) { send_msg_1(i, GCLOBJREQUEST, false); @@ -2711,6 +2653,9 @@ inline void gc(struct garbagelist * stackptr) { if(gcheaptop < gcmarkedptrbound) { gcheaptop = gcmarkedptrbound; } +#ifdef GC_PROFILE + gc_profileItem(); +#endif #ifdef RAWPATH // TODO GC_DEBUG tprintf("prepare to cache large objs \n"); //dumpSMem(); @@ -2769,6 +2714,10 @@ inline void gc(struct garbagelist * stackptr) { gcrequiredmems[i] = 0; } +#ifdef GC_PROFILE + gc_profileItem(); +#endif + // compact phase bool finalcompact = false; // initialize pointers for comapcting @@ -2859,7 +2808,9 @@ inline void gc(struct garbagelist * stackptr) { } // if(gctomove) } // while(COMPACTPHASE == gcphase) - +#ifdef GC_PROFILE + gc_profileItem(); +#endif #ifdef RAWPATH // TODO GC_DEBUG tprintf("prepare to move large objs \n"); //dumpSMem(); @@ -2876,12 +2827,16 @@ inline void gc(struct garbagelist * stackptr) { gcphase = FLUSHPHASE; gccorestatus[BAMBOO_NUM_OF_CORE] = 1; - for(i = 1; i < NUMCORES4GC; ++i) { + // Note: all cores should flush their runtime data including non-gc + // cores + for(i = 1; i < NUMCORESACTIVE/*NUMCORES4GC*/; ++i) { // send start flush messages to all cores gccorestatus[i] = 1; send_msg_1(i, GCSTARTFLUSH, false); } - +#ifdef GC_PROFILE + gc_profileItem(); +#endif #ifdef RAWPATH // TODO GC_DEBUG tprintf("Start flush phase \n"); #endif @@ -2890,14 +2845,26 @@ inline void gc(struct garbagelist * stackptr) { gccorestatus[BAMBOO_NUM_OF_CORE] = 0; while(FLUSHPHASE == gcphase) { // check the status of all cores - if(gc_checkCoreStatus()) { + if(gc_checkAllCoreStatus()) { break; } } // while(FLUSHPHASE == gcphase) gcphase = FINISHPHASE; + // invalidate all shared mem pointers + // put it here as it takes time to inform all the other cores to + // finish gc and it might cause problem when some core resumes + // mutator earlier than the other cores + bamboo_cur_msp = NULL; + bamboo_smem_size = 0; + gcflag = false; + gcprocessing = false; + +#ifdef GC_PROFILE + gc_profileEnd(); +#endif gccorestatus[BAMBOO_NUM_OF_CORE] = 1; - for(i = 1; i < NUMCORES4GC; ++i) { + for(i = 1; i < NUMCORESACTIVE/*NUMCORES4GC*/; ++i) { // send gc finish messages to all cores send_msg_1(i, GCFINISH, false); gccorestatus[i] = 1; @@ -2906,18 +2873,116 @@ inline void gc(struct garbagelist * stackptr) { tprintf("gc finished \n"); //dumpSMem(); #endif - } else { + } else if(BAMBOO_NUM_OF_CORE < NUMCORES4GC) { gcprocessing = true; gc_collect(stackptr); - } - // invalidate all shared mem pointers - bamboo_cur_msp = NULL; - bamboo_smem_size = 0; + // invalidate all shared mem pointers + bamboo_cur_msp = NULL; + bamboo_smem_size = 0; - gcflag = false; - gcprocessing = false; + gcflag = false; + gcprocessing = false; + } else { + // not a gc core, should wait for gcfinish msg + gcprocessing = true; + gc_nocollect(stackptr); + // invalidate all shared mem pointers + bamboo_cur_msp = NULL; + bamboo_smem_size = 0; + + gcflag = false; + gcprocessing = false; + } } // void gc(struct garbagelist * stackptr) +#ifdef GC_PROFILE +inline void gc_profileStart(void) { + if(!gc_infoOverflow) { + GCInfo* gcInfo = RUNMALLOC(sizeof(struct gc_info)); + gc_infoArray[gc_infoIndex] = gcInfo; + gcInfo->index = 1; + gcInfo->time[0] = BAMBOO_GET_EXE_TIME(); + } +} + +inline void gc_profileItem(void) { + if(!gc_infoOverflow) { + GCInfo* gcInfo = gc_infoArray[gc_infoIndex]; + gcInfo->time[gcInfo->index++] = BAMBOO_GET_EXE_TIME(); + } +} + +inline void gc_profileEnd(void) { + if(!gc_infoOverflow) { + GCInfo* gcInfo = gc_infoArray[gc_infoIndex]; + gcInfo->time[gcInfo->index++] = BAMBOO_GET_EXE_TIME(); + gc_infoIndex++; + if(gc_infoIndex == GCINFOLENGTH) { + gc_infoOverflow = true; + //taskInfoIndex = 0; + } + } +} + +// output the profiling data +void gc_outputProfileData() { +#ifdef USEIO + int i,j; + unsigned long long totalgc = 0; + + //printf("Start Time, End Time, Duration\n"); + // output task related info + for(i = 0; i < gc_infoIndex; i++) { + GCInfo * gcInfo = gc_infoArray[i]; + unsigned long long tmp = 0; + for(j = 0; j < gcInfo->index; j++) { + printf("%lld(%lld), ", gcInfo->time[j], (gcInfo->time[j]-tmp)); + tmp = gcInfo->time[j]; + } + tmp = (tmp-gcInfo->time[0]); + printf(" ++ %lld \n", tmp); + totalgc += tmp; + } + + if(gc_infoOverflow) { + printf("Caution: gc info overflow!\n"); + } + + printf("\n\n total gc time: %lld \n", totalgc); +#else + int i = 0; + int j = 0; + unsigned long long totalgc = 0; + + BAMBOO_DEBUGPRINT(0xdddd); + // output task related info + for(i= 0; i < gc_infoIndex; i++) { + GCInfo * gcInfo = gc_infoArray[i]; + unsigned long long tmp = 0; + BAMBOO_DEBUGPRINT(0xddda); + for(j = 0; j < gcInfo->index; j++) { + BAMBOO_DEBUGPRINT(gcInfo->time[j]); + BAMBOO_DEBUGPRINT(gcInfo->time[j]-tmp); + BAMBOO_DEBUGPRINT(0xdddb); + tmp = gcInfo->time[j]; + } + tmp = (tmp-gcInfo->time[0]); + BAMBOO_DEBUGPRINT_REG(tmp); + BAMBOO_DEBUGPRINT(0xdddc); + totalgc += tmp; + } + BAMBOO_DEBUGPRINT(0xdddd); + BAMBOO_DEBUGPRINT_REG(totalgc); + + if(gc_infoOverflow) { + BAMBOO_DEBUGPRINT(0xefee); + } + + BAMBOO_DEBUGPRINT(0xeeee); +#endif +} +#endif // #ifdef GC_PROFILE + #endif diff --git a/Robust/src/Runtime/multicoregarbage.h b/Robust/src/Runtime/multicoregarbage.h index 95329471..08ba12bc 100644 --- a/Robust/src/Runtime/multicoregarbage.h +++ b/Robust/src/Runtime/multicoregarbage.h @@ -13,12 +13,28 @@ #ifdef GC_DEBUG #define BAMBOO_SMEM_SIZE_L (BAMBOO_SMEM_SIZE * 2) #else -#define BAMBOO_SMEM_SIZE_L (32 * BAMBOO_SMEM_SIZE) +#define BAMBOO_SMEM_SIZE_L (2 * BAMBOO_SMEM_SIZE) #endif -#define BAMBOO_LARGE_SMEM_BOUND (BAMBOO_SMEM_SIZE_L*NUMCORES4GC) // NUMCORES=62 +#define BAMBOO_LARGE_SMEM_BOUND (BAMBOO_SMEM_SIZE_L*NUMCORES4GC) + // let each gc core to have one big block, this is very important + // for the computation of NUMBLOCKS(s, n), DO NOT change this! #define NUMPTRS 100 +// for GC profile +#ifdef GC_PROFILE +#define GCINFOLENGTH 100 + +typedef struct gc_info { + unsigned long long time[7]; + int index; +} GCInfo; + +GCInfo * gc_infoArray[GCINFOLENGTH]; +int gc_infoIndex; +bool gc_infoOverflow; +#endif + typedef enum { INIT = 0, // 0 DISCOVERED, // 1 @@ -44,11 +60,11 @@ volatile GCPHASETYPE gcphase; // indicating GC phase int gccurr_heaptop; struct MGCHash * gcforwardobjtbl; // cache forwarded objs in mark phase // for mark phase termination -int gccorestatus[NUMCORES4GC]; // records status of each core - // 1: running gc - // 0: stall -int gcnumsendobjs[NUMCORES4GC]; // records how many objects sent out -int gcnumreceiveobjs[NUMCORES4GC]; // records how many objects received +int gccorestatus[NUMCORESACTIVE]; // records status of each core + // 1: running gc + // 0: stall +int gcnumsendobjs[NUMCORESACTIVE]; // records how many objects sent out +int gcnumreceiveobjs[NUMCORESACTIVE]; // records how many objects received bool gcbusystatus; int gcself_numsendobjs; int gcself_numreceiveobjs; @@ -90,10 +106,6 @@ int gcreservedsb; // number of reserved sblock for sbstarttbl int gcnumblock; // number of total blocks in the shared mem int gcbaseva; // base va for shared memory without reserved sblocks -// table recording the number of used bytes in each block -// Note: this table resides on master core's local heap -int * gcsmemtbl; - #define ISSHAREDOBJ(p) \ ((((int)p)>gcbaseva)&&(((int)p)<(gcbaseva+(BAMBOO_SHARED_MEM_SIZE)))) @@ -163,6 +175,7 @@ int * gcsmemtbl; inline void gc(struct garbagelist * stackptr); // core coordinator routine inline void gc_collect(struct garbagelist* stackptr);//core collector routine +inline void gc_nocollect(struct garbagelist* stackptr);//non-gc core collector routine inline void transferMarkResults_I(); inline void gc_enqueue_I(void *ptr); inline void gc_lobjenqueue_I(void *ptr, int length, int host); @@ -176,5 +189,12 @@ inline void * gc_lobjdequeue4(int * length, int * host); inline int gc_lobjmoreItems4(); inline void gc_lobjqueueinit4(); +#ifdef GC_PROFILE +INLINE void gc_profileStart(void); +INLINE void gc_profileItem(void); +INLINE void gc_profileEnd(void); +void gc_outputProfileData(); +#endif + #endif diff --git a/Robust/src/Runtime/multicoreruntime.h b/Robust/src/Runtime/multicoreruntime.h index 14364342..bdf4ab85 100644 --- a/Robust/src/Runtime/multicoreruntime.h +++ b/Robust/src/Runtime/multicoreruntime.h @@ -236,10 +236,10 @@ struct Queue * totransobjqueue; // queue to hold objs to be transferred #define BAMBOO_SMEM_SIZE (64 * 64) // (BAMBOO_PAGE_SIZE) #define BAMBOO_SHARED_MEM_SIZE ((BAMBOO_PAGE_SIZE) * (BAMBOO_NUM_PAGES)) #else -#define BAMBOO_NUM_PAGES (64 * 1024) //(64 * 4 * 0.75) //(1024 * 1024 * 3.5) 3G +#define BAMBOO_NUM_PAGES (15 * 1024) //(64 * 4 * 0.75) //(1024 * 1024 * 3.5) 3G #define BAMBOO_PAGE_SIZE (16 * 1024)// * 1024) // (4096) #define BAMBOO_SMEM_SIZE (16 * 1024) -#define BAMBOO_SHARED_MEM_SIZE (1024 * 1024 * 1024) +#define BAMBOO_SHARED_MEM_SIZE (1024 * 1024 * 240) //(1024 * 1024 * 1024) //(3.0 * 1024 * 1024 * 1024) // 3G// ((BAMBOO_PAGE_SIZE) * (BAMBOO_NUM_PAGES)) #endif @@ -272,7 +272,13 @@ struct freeMemList { // only maintain 1 fremmMemItem }; -struct freeMemList * bamboo_free_mem_list; +// table recording the number of allocated bytes on each block +// Note: this table resides on the bottom of the shared heap for all cores +// to access +int * bamboo_smemtbl; +int bamboo_free_block; +//bool bamboo_smem_flushed; +//struct freeMemList * bamboo_free_mem_list; int bamboo_reserved_smem; // reserved blocks on the top of the shared heap // e.g. 20% of the heap and should not be allocated // otherwise gc is invoked @@ -398,6 +404,8 @@ INLINE void send_msg_6(int targetcore, unsigned long n4, unsigned long n5, bool isinterrupton); +INLINE void cache_msg_1(int targetcore, + unsigned long n0); INLINE void cache_msg_2(int targetcore, unsigned long n0, unsigned long n1); @@ -478,6 +486,11 @@ void outputProfileData(); // BAMBOO_MSG_AVAIL(): checking if there are msgs coming in // // BAMBOO_GCMSG_AVAIL(): checking if there are gcmsgs coming in // // BAMBOO_GET_EXE_TIME(): rountine to get current clock cycle number // +// BAMBOO_MEMSET_WH(x, y, z): memset the specified region of memory (start // +// address x, size z) to value y with write // +// hint, the processor will not fetch the // +// current content of the memory and directly // +// write // // // // runtime_arch.h should also define following global parameters: // // bamboo_cpu2coords: map the cpu # to (x,y) coordinates // diff --git a/Robust/src/Runtime/multicoretask.c b/Robust/src/Runtime/multicoretask.c index 0acb6cab..6d1ee763 100644 --- a/Robust/src/Runtime/multicoretask.c +++ b/Robust/src/Runtime/multicoretask.c @@ -53,17 +53,23 @@ void initruntimedata() { // initialize the profile data arrays profilestatus[i] = 1; #endif - } // for(i = 0; i < NUMCORESACTIVE; ++i) #ifdef MULTICORE_GC - for(i = 0; i < NUMCORES4GC; ++i) { gccorestatus[i] = 1; gcnumsendobjs[i] = 0; gcnumreceiveobjs[i] = 0; +#endif + } // for(i = 0; i < NUMCORESACTIVE; ++i) +#ifdef MULTICORE_GC + for(i = 0; i < NUMCORES4GC; ++i) { gcloads[i] = 0; gcrequiredmems[i] = 0; gcstopblock[i] = 0; gcfilledblocks[i] = 0; } // for(i = 0; i < NUMCORES4GC; ++i) +#ifdef GC_PROFILE + gc_infoIndex = 0; + gc_infoOverflow = false; +#endif #endif numconfirm = 0; waitconfirm = false; @@ -119,7 +125,8 @@ void initruntimedata() { gcmovepending = 0; gcblock2fill = 0; gcsbstarttbl = BAMBOO_BASE_VA; - gcsmemtbl = RUNMALLOC_I(sizeof(int)*gcnumblock); + bamboo_smemtbl = (void *)gcsbstarttbl + + (BAMBOO_SHARED_MEM_SIZE/BAMBOO_SMEM_SIZE)*sizeof(INTPTR); #else // create the lock table, lockresult table and obj queue locktable.size = 20; @@ -169,9 +176,6 @@ void disruntimedata() { freeRuntimeHash(gcpointertbl); //freeMGCHash(gcpointertbl); freeMGCHash(gcforwardobjtbl); - if(gcsmemtbl != NULL) { - RUNFREE(gcsmemtbl); - } #else freeRuntimeHash(lockRedirectTbl); freeRuntimeHash(objRedirectLockTbl); @@ -446,6 +450,13 @@ void checkCoreStatus() { } // if(!allStall) } // while(true) #endif + + // gc_profile mode, ourput gc prfiling data +#ifdef MULTICORE_GC +#ifdef GC_PROFILE + gc_outputProfileData(); +#endif // #ifdef GC_PROFILE +#endif // #ifdef MULTICORE_GC disruntimedata(); terminate(); // All done. } // if(!waitconfirm) @@ -1203,181 +1214,160 @@ inline void addNewObjInfo(void * nobj) { #endif #ifdef MULTICORE_GC -struct freeMemItem * findFreeMemChunk_I(int coren, - int isize, - int * tofindb) { - struct freeMemItem * freemem = bamboo_free_mem_list->head; - struct freeMemItem * prev = NULL; +void * localmalloc_I(int coren, + int isize, + int * allocsize) { + void * mem = NULL; int i = 0; int j = 0; - *tofindb = gc_core2block[2*coren+i]+(NUMCORES4GC*2)*j; - // check available shared mem chunks + int tofindb = gc_core2block[2*coren+i]+(NUMCORES4GC*2)*j; + int totest = tofindb; + int bound = BAMBOO_SMEM_SIZE_L; + int foundsmem = 0; + int size = 0; do { - int foundsmem = 0; - switch(bamboo_smem_mode) { - case SMEMLOCAL: { - int startb = freemem->startblock; - int endb = freemem->endblock; - while(startb > *tofindb) { - i++; - if(2==i) { - i = 0; - j++; - } - *tofindb = gc_core2block[2*coren+i]+(NUMCORES4GC*2)*j; - } // while(startb > tofindb) - if(startb <= *tofindb) { - if((endb >= *tofindb) && (freemem->size >= isize)) { - foundsmem = 1; - } else if(*tofindb > gcnumblock-1) { - // no more local mem - foundsmem = 2; - } // if(endb >= tofindb) - } // if(startb <= tofindb) - break; - } - - case SMEMFIXED: { - int startb = freemem->startblock; - int endb = freemem->endblock; - if(startb <= *tofindb) { - if((endb >= *tofindb) && (freemem->size >= isize)) { - foundsmem = 1; - } - } else { - // use the global mem - if(((startb > NUMCORES4GC-1) && (freemem->size >= isize)) || - ((endb > NUMCORES4GC-1) && ((freemem->size- - (gcbaseva+BAMBOO_LARGE_SMEM_BOUND-freemem->ptr))>=isize))) { - foundsmem = 1; - } - } - break; - } - - case SMEMMIXED: { - // TODO not supported yet - BAMBOO_EXIT(0xe001); - break; - } - - case SMEMGLOBAL: { - foundsmem = (freemem->size >= isize); - break; - } - default: - break; - } - - if(1 == foundsmem) { - // found one - break; - } else if (2 == foundsmem) { - // terminate, no more mem - freemem = NULL; - break; - } - if(freemem->size == 0) { - // an empty item, remove it - struct freeMemItem * toremove = freemem; - freemem = freemem->next; - if(prev == NULL ){ - // the head - bamboo_free_mem_list->head = freemem; + bound = (totest < NUMCORES4GC) ? BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE; + int nsize = bamboo_smemtbl[totest]; + bool islocal = true; + if(nsize < bound) { + bool tocheck = true; + // have some space in the block + if(totest == tofindb) { + // the first partition + size = bound - nsize; + } else if(nsize == 0) { + // an empty partition, can be appended + size += bound; } else { - prev->next = freemem; - } - // put it to the tail of the list for reuse - if(bamboo_free_mem_list->backuplist == NULL) { - //toremove->next = bamboo_free_mem_list->backuplist; - bamboo_free_mem_list->backuplist = toremove; - bamboo_free_mem_list->backuplist->next = NULL; - } else { - // free it - RUNFREE(toremove); + // not an empty partition, can not be appended + // the last continuous block is not big enough, go to check the next + // local block + islocal = true; + tocheck = false; + } // if(totest == tofindb) else if(nsize == 0) else ... + if(tocheck) { + if(size >= isize) { + // have enough space in the block, malloc + foundsmem = 1; + break; + } else { + // no enough space yet, try to append next continuous block + islocal = false; + } // if(size > isize) else ... + } // if(tocheck) + } // if(nsize < bound) + if(islocal) { + // no space in the block, go to check the next block + i++; + if(2==i) { + i = 0; + j++; } + tofindb = totest = gc_core2block[2*coren+i]+(NUMCORES4GC*2)*j; } else { - prev = freemem; - freemem = freemem->next; + totest += 1; + } // if(islocal) else ... + if(totest > gcnumblock-1-bamboo_reserved_smem) { + // no more local mem, do not find suitable block + foundsmem = 2; + break; + } // if(totest > gcnumblock-1-bamboo_reserved_smem) ... + } while(true); + + if(foundsmem == 1) { + // find suitable block + mem = gcbaseva+bamboo_smemtbl[tofindb]+((tofindbstartblock; - int endb = freemem->endblock; - int tmpptr = gcbaseva+((tofindbsize+freemem->ptr-tmpptr)>=isize) { - mem = (tmpptr>freemem->ptr)?((void *)tmpptr):(freemem->ptr); - } else { - mem = (void *)(freemem->size+freemem->ptr-isize); - } - // check the remaining space in this block - int remain = (int)(mem-gcbaseva); - int bound = (BAMBOO_SMEM_SIZE); - if(remain < BAMBOO_LARGE_SMEM_BOUND) { - bound = (BAMBOO_SMEM_SIZE_L); - } - remain = bound - remain%bound; - if(remain < isize) { - // this object acrosses blocks - *allocsize = isize; - } else { - // round the asigned block to the end of the current block - *allocsize = remain; - } - if(freemem->ptr == (int)mem) { - freemem->ptr = ((void*)freemem->ptr) + (*allocsize); - freemem->size -= *allocsize; - BLOCKINDEX(freemem->ptr, &(freemem->startblock)); - } else if((freemem->ptr+freemem->size) == ((int)mem+(*allocsize))) { - freemem->size -= *allocsize; - BLOCKINDEX(((int)mem)-1, &(freemem->endblock)); - } else { - struct freeMemItem * tmp = - (struct freeMemItem *)RUNMALLOC_I(sizeof(struct freeMemItem)); - tmp->ptr = (int)mem+*allocsize; - tmp->size = freemem->ptr+freemem->size-(int)mem-*allocsize; - BLOCKINDEX(tmp->ptr, &(tmp->startblock)); - tmp->endblock = freemem->endblock; - tmp->next = freemem->next; - freemem->next = tmp; - freemem->size = (int)mem - freemem->ptr; - BLOCKINDEX(((int)mem-1), &(freemem->endblock)); + } else if(foundsmem == 2) { + // no suitable block + *allocsize = 0; } + return mem; -} // void * localmalloc_I(int, int, struct freeMemItem *, int *) +} // void * localmalloc_I(int, int, int *) -void * globalmalloc_I(int isize, - struct freeMemItem * freemem, +void * globalmalloc_I(int coren, + int isize, int * allocsize) { - void * mem = (void *)(freemem->ptr); - // check the remaining space in this block - int remain = (int)(mem-gcbaseva); - int bound = (BAMBOO_SMEM_SIZE); - if(remain < BAMBOO_LARGE_SMEM_BOUND) { - bound = (BAMBOO_SMEM_SIZE_L); + void * mem = NULL; + int tofindb = bamboo_free_block; //0; + int totest = tofindb; + int bound = BAMBOO_SMEM_SIZE_L; + int foundsmem = 0; + int size = 0; + if(tofindb > gcnumblock-1-bamboo_reserved_smem) { + *allocsize = 0; + return NULL; } - remain = bound - remain%bound; - if(remain < isize) { - // this object acrosses blocks - *allocsize = isize; - } else { - // round the asigned block to the end of the current block - *allocsize = remain; + do { + bound = (totest < NUMCORES4GC) ? BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE; + int nsize = bamboo_smemtbl[totest]; + bool isnext = false; + if(nsize < bound) { + bool tocheck = true; + // have some space in the block + if(totest == tofindb) { + // the first partition + size = bound - nsize; + } else if(nsize == 0) { + // an empty partition, can be appended + size += bound; + } else { + // not an empty partition, can not be appended + // the last continuous block is not big enough, start another block + isnext = true; + tocheck = false; + } // if(totest == tofindb) else if(nsize == 0) else ... + if(tocheck) { + if(size >= isize) { + // have enough space in the block, malloc + foundsmem = 1; + break; + } // if(size > isize) + } // if(tocheck) + } else { + isnext = true; + }// if(nsize < bound) else ... + totest += 1; + if(totest > gcnumblock-1-bamboo_reserved_smem) { + // no more local mem, do not find suitable block + foundsmem = 2; + break; + } // if(totest > gcnumblock-1-bamboo_reserved_smem) ... + if(isnext) { + // start another block + tofindb = totest; + } // if(islocal) + } while(true); + + if(foundsmem == 1) { + // find suitable block + mem = gcbaseva+bamboo_smemtbl[tofindb]+((tofindbptr = ((void*)freemem->ptr) + (*allocsize); - freemem->size -= *allocsize; + return mem; -} // void * globalmalloc_I(int, struct freeMemItem *, int *) -#endif +} // void * globalmalloc_I(int, int, int *) +#endif // #ifdef MULTICORE_GC // malloc from the shared memory void * smemalloc_I(int coren, @@ -1386,47 +1376,36 @@ void * smemalloc_I(int coren, void * mem = NULL; #ifdef MULTICORE_GC int isize = size+(BAMBOO_CACHE_LINE_SIZE); - int toallocate = (isize>(BAMBOO_SMEM_SIZE)) ? (isize):(BAMBOO_SMEM_SIZE); - // go through free mem list for suitable chunks - int tofindb = 0; - struct freeMemItem * freemem = findFreeMemChunk_I(coren, isize, &tofindb); - - // allocate shared mem if available - if(freemem != NULL) { - switch(bamboo_smem_mode) { - case SMEMLOCAL: { - mem = localmalloc_I(tofindb, isize, freemem, allocsize); - break; - } - case SMEMFIXED: { - int startb = freemem->startblock; - int endb = freemem->endblock; - if(startb > tofindb) { - // malloc on global mem - mem = globalmalloc_I(isize, freemem, allocsize); - } else { - // malloc on local mem - mem = localmalloc_I(tofindb, isize, freemem, allocsize); - } - break; - } + // go through the bamboo_smemtbl for suitable partitions + switch(bamboo_smem_mode) { + case SMEMLOCAL: { + mem = localmalloc_I(coren, isize, allocsize); + break; + } - case SMEMMIXED: { - // TODO not supported yet - BAMBOO_EXIT(0xe002); - break; - } + case SMEMFIXED: { + // TODO not supported yet + BAMBOO_EXIT(0xe001); + break; + } - case SMEMGLOBAL: { - mem = globalmalloc_I(isize,freemem, allocsize); - break; - } + case SMEMMIXED: { + // TODO not supported yet + BAMBOO_EXIT(0xe002); + break; + } - default: - break; + case SMEMGLOBAL: { + mem = globalmalloc_I(coren, isize, allocsize); + break; } - } else { + + default: + break; + } + + if(mem == NULL) { #else int toallocate = (size>(BAMBOO_SMEM_SIZE)) ? (size):(BAMBOO_SMEM_SIZE); mem = mspace_calloc(bamboo_free_msp, 1, toallocate); @@ -1862,15 +1841,28 @@ msg: BAMBOO_DEBUGPRINT(0xe88a); #endif #endif + int allocsize = 0; + void * mem = NULL; #ifdef MULTICORE_GC if(gcprocessing) { // is currently doing gc, dump this msg + if(INITPHASE == gcphase) { + // if still in the initphase of gc, send a startinit msg again + if(isMsgSending) { + cache_msg_1(msgdata[2], GCSTARTINIT); + } else { + send_msg_1(msgdata[2], GCSTARTINIT, true); + } + } break; - } + } #endif - int allocsize = 0; - void * mem = smemalloc_I(msgdata[2], msgdata[1], &allocsize); + mem = smemalloc_I(msgdata[2], msgdata[1], &allocsize); if(mem == NULL) { + // in this case, the gcflag of the startup core has been set + // and the gc should be started later, then a GCSTARTINIT msg + // will be sent to the requesting core to notice it to start gc + // and try malloc again break; } // send the start_va to request core @@ -1902,6 +1894,7 @@ msg: } else { #ifdef MULTICORE_GC // fill header to store the size of this mem block + memset(msgdata[1], 0, BAMBOO_CACHE_LINE_SIZE); (*((int*)msgdata[1])) = msgdata[2]; bamboo_smem_size = msgdata[2] - BAMBOO_CACHE_LINE_SIZE; bamboo_cur_msp = msgdata[1] + BAMBOO_CACHE_LINE_SIZE; @@ -1967,7 +1960,8 @@ msg: BAMBOO_DEBUGPRINT(0xe88c); BAMBOO_DEBUGPRINT_REG(msgdata[1]); #endif - if(msgdata[1] < NUMCORES4GC) { + // All cores should do init GC + if(msgdata[1] < NUMCORESACTIVE) { gccorestatus[msgdata[1]] = 0; } } @@ -1981,7 +1975,8 @@ msg: #endif BAMBOO_EXIT(0xb002); } - if(msgdata[1] < NUMCORES4GC) { + // all cores should do mark + if(msgdata[1] < NUMCORESACTIVE) { gccorestatus[msgdata[1]] = 0; gcnumsendobjs[msgdata[1]] = msgdata[2]; gcnumreceiveobjs[msgdata[1]] = msgdata[3]; @@ -2003,6 +1998,7 @@ msg: int filledblocks = msgdata[2]; int heaptop = msgdata[3]; int data4 = msgdata[4]; + // only gc cores need to do compact if(cnum < NUMCORES4GC) { if(COMPACTPHASE == gcphase) { gcfilledblocks[cnum] = filledblocks; @@ -2022,39 +2018,6 @@ msg: } } else { gccorestatus[cnum] = 0; - // check if there is pending move request - /*if(gcmovepending > 0) { - int j; - for(j = 0; j < NUMCORES4GC; j++) { - if(gcrequiredmems[j]>0) { - break; - } - } - if(j < NUMCORES4GC) { - // find match - int tomove = 0; - int startaddr = 0; - gcrequiredmems[j] = assignSpareMem_I(cnum, - gcrequiredmems[j], - &tomove, - &startaddr); - if(STARTUPCORE == j) { - gcdstcore = cnum; - gctomove = true; - gcmovestartaddr = startaddr; - gcblock2fill = tomove; - } else { - if(isMsgSending) { - cache_msg_4(j, GCMOVESTART, cnum, startaddr, tomove); - } else { - send_msg_4(j, GCMOVESTART, cnum, startaddr, tomove, true); - } - } // if(STARTUPCORE == j) - if(gcrequiredmems[j] == 0) { - gcmovepending--; - } - } // if(j < NUMCORES4GC) - } // if(gcmovepending > 0) */ } // if(data4>0) } // if(cnum < NUMCORES4GC) break; @@ -2070,7 +2033,8 @@ msg: #endif BAMBOO_EXIT(0xb004); } - if(msgdata[1] < NUMCORES4GC) { + // all cores should do flush + if(msgdata[1] < NUMCORESACTIVE) { gccorestatus[msgdata[1]] = 0; } break; @@ -2084,8 +2048,9 @@ msg: case GCMARKCONFIRM: { // received a marked phase finish confirm request msg + // all cores should do mark if((BAMBOO_NUM_OF_CORE == STARTUPCORE) - || (BAMBOO_NUM_OF_CORE > NUMCORES4GC - 1)) { + || (BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1)) { // wrong core to receive such msg BAMBOO_EXIT(0xb005); } else { @@ -2238,9 +2203,6 @@ msg: default: break; } - /*for(; msgdataindex > 0; --msgdataindex) { - msgdata[msgdataindex-1] = -1; - }*/ memset(msgdata, '\0', sizeof(int) * msgdataindex); msgdataindex = 0; msglength = BAMBOO_MSG_BUF_LENGTH; @@ -2604,22 +2566,6 @@ newtask: //clock2 = BAMBOO_GET_EXE_TIME(); for(i = 0; i < runtime_locklen; i++) { - /*for(i = 0; i < numparams; i++) { - void * param = currtpd->parameterArray[i]; - int * lock = 0; - bool insert = true; - if(((struct ___Object___ *)param)->type == STARTUPTYPE) { - islock = false; - taskpointerarray[i+OFFSET]=param; - goto execute; - } - if(((struct ___Object___ *)param)->lock == NULL) { - lock = (int *)param; - } else { - lock = (int *)(((struct ___Object___ *)param)->lock); - } - */ - int * lock = (int *)(runtime_locks[i].redirectlock); islock = true; // require locks for this parameter if it is not a startup object @@ -2667,18 +2613,9 @@ newtask: BAMBOO_DEBUGPRINT_REG(lock); #endif // check if has the lock already - /*bool giveup = true; - for(j = 0; j < runtime_locklen; j++) { - if(runtime_locks[j].value == lock) { - giveup = false; - break; - } - } - if(giveup) {*/ // can not get the lock, try later // release all grabbed locks for previous parameters for(j = 0; j < i; ++j) { - //for(j = 0; j < runtime_locklen; ++j) { lock = (int*)(runtime_locks[j].redirectlock); releasewritelock(lock); } @@ -2697,12 +2634,7 @@ newtask: #endif goto newtask; //} - }/* else { // line 2794: if(grount == 0) - // TODO - runtime_locks[runtime_locklen].value = (int)lock; - runtime_locks[runtime_locklen].redirectlock = (int)param; - runtime_locklen++; - }*/ + } } // line 2752: for(i = 0; i < runtime_locklen; i++) /*long clock3; @@ -3249,4 +3181,184 @@ void toiNext(struct tagobjectiterator *it, Objnext(&it->it); } } + +#ifdef PROFILE +inline void profileTaskStart(char * taskname) { + if(!taskInfoOverflow) { + TaskInfo* taskInfo = RUNMALLOC(sizeof(struct task_info)); + taskInfoArray[taskInfoIndex] = taskInfo; + taskInfo->taskName = taskname; + taskInfo->startTime = BAMBOO_GET_EXE_TIME(); + taskInfo->endTime = -1; + taskInfo->exitIndex = -1; + taskInfo->newObjs = NULL; + } +} + +inline void profileTaskEnd() { + if(!taskInfoOverflow) { + taskInfoArray[taskInfoIndex]->endTime = BAMBOO_GET_EXE_TIME(); + taskInfoIndex++; + if(taskInfoIndex == TASKINFOLENGTH) { + taskInfoOverflow = true; + //taskInfoIndex = 0; + } + } +} + +// output the profiling data +void outputProfileData() { +#ifdef USEIO + int i; + unsigned long long totaltasktime = 0; + unsigned long long preprocessingtime = 0; + unsigned long long objqueuecheckingtime = 0; + unsigned long long postprocessingtime = 0; + //int interruptiontime = 0; + unsigned long long other = 0; + unsigned long long averagetasktime = 0; + int tasknum = 0; + + printf("Task Name, Start Time, End Time, Duration, Exit Index(, NewObj Name, Num)+\n"); + // output task related info + for(i = 0; i < taskInfoIndex; i++) { + TaskInfo* tmpTInfo = taskInfoArray[i]; + unsigned long long duration = tmpTInfo->endTime - tmpTInfo->startTime; + printf("%s, %lld, %lld, %lld, %lld", + tmpTInfo->taskName, tmpTInfo->startTime, tmpTInfo->endTime, + duration, tmpTInfo->exitIndex); + // summarize new obj info + if(tmpTInfo->newObjs != NULL) { + struct RuntimeHash * nobjtbl = allocateRuntimeHash(5); + struct RuntimeIterator * iter = NULL; + while(0 == isEmpty(tmpTInfo->newObjs)) { + char * objtype = (char *)(getItem(tmpTInfo->newObjs)); + if(RuntimeHashcontainskey(nobjtbl, (int)(objtype))) { + int num = 0; + RuntimeHashget(nobjtbl, (int)objtype, &num); + RuntimeHashremovekey(nobjtbl, (int)objtype); + num++; + RuntimeHashadd(nobjtbl, (int)objtype, num); + } else { + RuntimeHashadd(nobjtbl, (int)objtype, 1); + } + //printf(stderr, "new obj!\n"); + } + + // output all new obj info + iter = RuntimeHashcreateiterator(nobjtbl); + while(RunhasNext(iter)) { + char * objtype = (char *)Runkey(iter); + int num = Runnext(iter); + printf(", %s, %d", objtype, num); + } + } + printf("\n"); + if(strcmp(tmpTInfo->taskName, "tpd checking") == 0) { + preprocessingtime += duration; + } else if(strcmp(tmpTInfo->taskName, "post task execution") == 0) { + postprocessingtime += duration; + } else if(strcmp(tmpTInfo->taskName, "objqueue checking") == 0) { + objqueuecheckingtime += duration; + } else { + totaltasktime += duration; + averagetasktime += duration; + tasknum++; + } + } + + if(taskInfoOverflow) { + printf("Caution: task info overflow!\n"); + } + + other = totalexetime-totaltasktime-preprocessingtime-postprocessingtime; + averagetasktime /= tasknum; + + printf("\nTotal time: %lld\n", totalexetime); + printf("Total task execution time: %lld (%d%%)\n", totaltasktime, + (int)(((double)totaltasktime/(double)totalexetime)*100)); + printf("Total objqueue checking time: %lld (%d%%)\n", + objqueuecheckingtime, + (int)(((double)objqueuecheckingtime/(double)totalexetime)*100)); + printf("Total pre-processing time: %lld (%d%%)\n", preprocessingtime, + (int)(((double)preprocessingtime/(double)totalexetime)*100)); + printf("Total post-processing time: %lld (%d%%)\n", postprocessingtime, + (int)(((double)postprocessingtime/(double)totalexetime)*100)); + printf("Other time: %lld (%d%%)\n", other, + (int)(((double)other/(double)totalexetime)*100)); + + printf("\nAverage task execution time: %lld\n", averagetasktime); +#else + int i = 0; + int j = 0; + + BAMBOO_DEBUGPRINT(0xdddd); + // output task related info + for(i= 0; i < taskInfoIndex; i++) { + TaskInfo* tmpTInfo = taskInfoArray[i]; + char* tmpName = tmpTInfo->taskName; + int nameLen = strlen(tmpName); + BAMBOO_DEBUGPRINT(0xddda); + for(j = 0; j < nameLen; j++) { + BAMBOO_DEBUGPRINT_REG(tmpName[j]); + } + BAMBOO_DEBUGPRINT(0xdddb); + BAMBOO_DEBUGPRINT_REG(tmpTInfo->startTime); + BAMBOO_DEBUGPRINT_REG(tmpTInfo->endTime); + BAMBOO_DEBUGPRINT_REG(tmpTInfo->exitIndex); + if(tmpTInfo->newObjs != NULL) { + struct RuntimeHash * nobjtbl = allocateRuntimeHash(5); + struct RuntimeIterator * iter = NULL; + while(0 == isEmpty(tmpTInfo->newObjs)) { + char * objtype = (char *)(getItem(tmpTInfo->newObjs)); + if(RuntimeHashcontainskey(nobjtbl, (int)(objtype))) { + int num = 0; + RuntimeHashget(nobjtbl, (int)objtype, &num); + RuntimeHashremovekey(nobjtbl, (int)objtype); + num++; + RuntimeHashadd(nobjtbl, (int)objtype, num); + } else { + RuntimeHashadd(nobjtbl, (int)objtype, 1); + } + } + + // ouput all new obj info + iter = RuntimeHashcreateiterator(nobjtbl); + while(RunhasNext(iter)) { + char * objtype = (char *)Runkey(iter); + int num = Runnext(iter); + int nameLen = strlen(objtype); + BAMBOO_DEBUGPRINT(0xddda); + for(j = 0; j < nameLen; j++) { + BAMBOO_DEBUGPRINT_REG(objtype[j]); + } + BAMBOO_DEBUGPRINT(0xdddb); + BAMBOO_DEBUGPRINT_REG(num); + } + } + BAMBOO_DEBUGPRINT(0xdddc); + } + + if(taskInfoOverflow) { + BAMBOO_DEBUGPRINT(0xefee); + } + + // output interrupt related info + /*for(i = 0; i < interruptInfoIndex; i++) { + InterruptInfo* tmpIInfo = interruptInfoArray[i]; + BAMBOO_DEBUGPRINT(0xddde); + BAMBOO_DEBUGPRINT_REG(tmpIInfo->startTime); + BAMBOO_DEBUGPRINT_REG(tmpIInfo->endTime); + BAMBOO_DEBUGPRINT(0xdddf); + } + + if(interruptInfoOverflow) { + BAMBOO_DEBUGPRINT(0xefef); + }*/ + + BAMBOO_DEBUGPRINT(0xeeee); +#endif +} +#endif // #ifdef PROFILE + #endif diff --git a/Robust/src/buildscript b/Robust/src/buildscript index 931382bd..04195913 100755 --- a/Robust/src/buildscript +++ b/Robust/src/buildscript @@ -72,6 +72,7 @@ echo -o binary echo -nojava do not run bristlecone compiler echo -instructionfailures inject code for instructionfailures echo -profile build with profile options +echo -gcprofile build with gcprofile options echo -accurateprofile build with accurate profile information including pre/post task processing info echo "-useio use standard io to output profiling data (should be used together with -raw and -profile), it only works with single core version" echo "-enable-assertions execute assert statements during compilation" @@ -117,6 +118,7 @@ RAWCONFIG='' DEBUGFLAG=false RAWPATHFLAG=false PROFILEFLAG=false +GCPROFILEFLAG=false ACCURATEPROFILEFLAG=false USEIOFLAG=false INTERRUPTFLAG=false @@ -277,6 +279,9 @@ elif [[ $1 = '-profile' ]] then PROFILEFLAG=true EXTRAOPTIONS="$EXTRAOPTIONS -pg" +elif [[ $1 = '-gcprofile' ]] +then +GCPROFILEFLAG=true elif [[ $1 = '-accurateprofile' ]] then ACCURATEPROFILEFLAG=true @@ -299,11 +304,11 @@ JAVAOPTS="$JAVAOPTS -multicore" elif [[ $1 = '-numcore' ]] then JAVAOPTS="$JAVAOPTS -numcore $2" -GCCORES="GC_$2" shift elif [[ $1 = '-numcore4gc' ]] then JAVAOPTS="$JAVAOPTS -numcore4gc $2" +GCCORES="GC_$2" shift elif [[ $1 = '-raw' ]] then @@ -689,6 +694,11 @@ then #MULTICOREGC version TILERACFLAGS="${TILERACFLAGS} -DMULTICORE_GC -D${GCCORES}" fi +if $GCPROFILEFLAG +then # GC_PROFILE version +TILERACFLAGS="${TILERACFLAGS} -DGC_PROFILE" +fi + cp $ROBUSTROOT/Tilera/Runtime/$MAKEFILE ./Makefile cp $ROBUSTROOT/Tilera/Runtime/$SIMHVC ./sim.hvc cp $ROBUSTROOT/Tilera/Runtime/$PCIHVC ./pci.hvc