From 5819687625bd408e5f4fe6d9d2bacb7e8bcfa33a Mon Sep 17 00:00:00 2001 From: jzhou Date: Wed, 14 Jul 2010 00:20:52 +0000 Subject: [PATCH] Add macros to control the memory allocation/cache/memory controller strategies in multicore gc version --- Robust/src/Runtime/GCSharedHash.c | 8 ----- Robust/src/Runtime/multicoregarbage.c | 49 +++++++++++++++++++------- Robust/src/Runtime/multicoregarbage.h | 16 ++++++--- Robust/src/Runtime/multicoreruntime.c | 8 +++++ Robust/src/Runtime/multicoretask.c | 20 ++++++----- Robust/src/buildscript | 50 +++++++++++++++++++++++++++ 6 files changed, 117 insertions(+), 34 deletions(-) diff --git a/Robust/src/Runtime/GCSharedHash.c b/Robust/src/Runtime/GCSharedHash.c index 98187f03..e60729ae 100755 --- a/Robust/src/Runtime/GCSharedHash.c +++ b/Robust/src/Runtime/GCSharedHash.c @@ -21,9 +21,6 @@ #define INLINE inline __attribute__((always_inline)) #endif // #ifndef INLINE -// TODO check the average collision times -//int gc_num_search = 0; -//int gc_num_collision = 0; /* GCSHARED HASH ********************************************************/ @@ -462,14 +459,9 @@ INLINE void * mgcsharedhashSearch(mgcsharedhashtbl_t * tbl, void * key) { &tbl->table[(((unsigned INTPTR)key)&tbl->mask)>>6]; mgcsharedhashlistnode_t *top = &tbl->table[tbl->size]; - //int i = 0; - //gc_num_search++; do { //i++; if(node->key == key) { - // TODO - //printf("%x \n", 0xe000+i); - //gc_num_collision += i; return node->val; } node++; diff --git a/Robust/src/Runtime/multicoregarbage.c b/Robust/src/Runtime/multicoregarbage.c index 36be3a43..a30203a6 100644 --- a/Robust/src/Runtime/multicoregarbage.c +++ b/Robust/src/Runtime/multicoregarbage.c @@ -10,9 +10,9 @@ // TODO for profiling the flush phase #ifdef GC_PROFILE -int num_mapinforequest; +/*int num_mapinforequest; int num_markrequest; -unsigned long long marktime; +unsigned long long marktime;*/ #endif extern int corenum; @@ -687,12 +687,12 @@ inline void initGC() { } #ifdef GC_PROFILE // TODO - num_mapinforequest = 0; + /*num_mapinforequest = 0; num_mapinforequest_i = 0; flushstalltime = 0; flushstalltime_i = 0; num_markrequest = 0; - marktime = 0; + marktime = 0;*/ #endif } // void initGC() @@ -1210,7 +1210,10 @@ inline void markObj(void * objptr) { /* marktime += BAMBOO_GET_EXE_TIME() - ttime; num_markrequest++;*/ -#endif +#ifdef GC_PROFILE_S + gc_num_forwardobj++; +#endif // GC_PROFILE_S +#endif // GC_PROFILE gcself_numsendobjs++; MGCHashadd(gcforwardobjtbl, (int)objptr); } @@ -1962,6 +1965,9 @@ innermoveobj: if((mark & MARKED) != 0) { #ifdef DEBUG BAMBOO_DEBUGPRINT(0xe204); +#endif +#ifdef GC_PROFILE_S + gc_num_liveobj++; #endif // marked obj, copy it to current heap top // check to see if remaining space is enough @@ -2298,7 +2304,7 @@ inline void * flushObj(void * objptr) { // a shared obj ptr, change to new address BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT(); #ifdef GC_PROFILE - unsigned long long ttime = BAMBOO_GET_EXE_TIME(); + //unsigned long long ttime = BAMBOO_GET_EXE_TIME(); #endif #ifdef LOCALHASHTBL_TEST RuntimeHashget(gcpointertbl, objptr, &dstptr); @@ -2307,7 +2313,7 @@ inline void * flushObj(void * objptr) { #endif //MGCHashget(gcpointertbl, objptr, &dstptr); #ifdef GC_PROFILE - flushstalltime += BAMBOO_GET_EXE_TIME()-ttime; + //flushstalltime += BAMBOO_GET_EXE_TIME()-ttime; #endif BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(); #ifdef DEBUG @@ -2330,7 +2336,7 @@ inline void * flushObj(void * objptr) { } else { int hostc = hostcore(objptr); #ifdef GC_PROFILE - unsigned long long ttimet = BAMBOO_GET_EXE_TIME(); + //unsigned long long ttimet = BAMBOO_GET_EXE_TIME(); #endif // check the corresponsing sharedptbl BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT(); @@ -2349,7 +2355,7 @@ inline void * flushObj(void * objptr) { } BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(); #ifdef GC_PROFILE - flushstalltime_i += BAMBOO_GET_EXE_TIME()-ttimet; + //flushstalltime_i += BAMBOO_GET_EXE_TIME()-ttimet; #endif if(dstptr == NULL) { @@ -2813,6 +2819,13 @@ inline void gc_collect(struct garbagelist * stackptr) { #ifdef RAWPATH // TODO GC_DEBUG printf("(%x,%x) Finish gc!\n", udn_tile_coord_x(), udn_tile_coord_y()); #endif +#ifdef GC_PROFILE_S + BAMBOO_DEBUGPRINT(0xaaaa); + BAMBOO_DEBUGPRINT_REG(gc_num_obj); + BAMBOO_DEBUGPRINT_REG(gc_num_liveobj); + BAMBOO_DEBUGPRINT_REG(gc_num_forwardobj); + BAMBOO_DEBUGPRINT(0xaaab); +#endif // GC_PROFLIE_S } // void gc_collect(struct garbagelist * stackptr) inline void gc_nocollect(struct garbagelist * stackptr) { @@ -2865,6 +2878,13 @@ inline void gc_nocollect(struct garbagelist * stackptr) { #ifdef RAWPATH // TODO GC_DEBUG printf("(%x,%x) Finish gc!\n", udn_tile_coord_x(), udn_tile_coord_y()); #endif +#ifdef GC_PROFILE_S + BAMBOO_DEBUGPRINT(0xaaaa); + BAMBOO_DEBUGPRINT_REG(gc_num_obj); + BAMBOO_DEBUGPRINT_REG(gc_num_liveobj); + BAMBOO_DEBUGPRINT_REG(gc_num_forwardobj); + BAMBOO_DEBUGPRINT(0xaaab); +#endif // GC_PROFLIE_S } // void gc_collect(struct garbagelist * stackptr) inline void gc(struct garbagelist * stackptr) { @@ -3239,10 +3259,13 @@ inline void gc(struct garbagelist * stackptr) { udn_tile_coord_y()); //dumpSMem(); #endif - // TODO - /*extern int gc_num_search; - extern int gc_num_collision; - tprintf("Average collision: %d \n", gc_num_collision/gc_num_search);*/ +#ifdef GC_PROFILE_S + BAMBOO_DEBUGPRINT(0xaaaa); + BAMBOO_DEBUGPRINT_REG(gc_num_obj); + BAMBOO_DEBUGPRINT_REG(gc_num_liveobj); + BAMBOO_DEBUGPRINT_REG(gc_num_forwardobj); + BAMBOO_DEBUGPRINT(0xaaab); +#endif // GC_PROFLIE_S } else if(BAMBOO_NUM_OF_CORE < NUMCORES4GC) { gcprocessing = true; gc_collect(stackptr); diff --git a/Robust/src/Runtime/multicoregarbage.h b/Robust/src/Runtime/multicoregarbage.h index f36f2218..32889e60 100644 --- a/Robust/src/Runtime/multicoregarbage.h +++ b/Robust/src/Runtime/multicoregarbage.h @@ -12,9 +12,9 @@ // data structures for GC #ifdef GC_DEBUG -#define BAMBOO_SMEM_SIZE_L (BAMBOO_SMEM_SIZE) // * 2) +#define BAMBOO_SMEM_SIZE_L (BAMBOO_SMEM_SIZE * 2) #else -#define BAMBOO_SMEM_SIZE_L (BAMBOO_SMEM_SIZE) // * 2) +#define BAMBOO_SMEM_SIZE_L (BAMBOO_SMEM_SIZE * 2) #endif #define BAMBOO_LARGE_SMEM_BOUND (BAMBOO_SMEM_SIZE_L*NUMCORES4GC) // let each gc core to have one big block, this is very important @@ -36,10 +36,16 @@ int gc_infoIndex; bool gc_infoOverflow; // TODO -unsigned long long flushstalltime; +/*unsigned long long flushstalltime; unsigned long long flushstalltime_i; -int num_mapinforequest_i; -#endif +int num_mapinforequest_i;*/ +#ifdef GC_PROFILE_S +unsigned int gc_num_liveobj; +unsigned int gc_num_obj; +unsigned int gc_num_forwardobj; +#endif // GC_PROFILE_S + +#endif // GC_PROFILE typedef enum { INIT = 0, // 0 diff --git a/Robust/src/Runtime/multicoreruntime.c b/Robust/src/Runtime/multicoreruntime.c index ea36e42d..7d278e2c 100644 --- a/Robust/src/Runtime/multicoreruntime.c +++ b/Robust/src/Runtime/multicoreruntime.c @@ -220,6 +220,10 @@ void * allocate_new(void * ptr, int type) { v->lock = NULL; v->lockcount = 0; initlock(v); +#ifdef GC_PROFILE_S + extern unsigned int gc_num_obj; + gc_num_obj++; +#endif return v; } @@ -239,6 +243,10 @@ struct ArrayObject * allocate_newarray(void * ptr, int type, int length) { } v->___length___=length; initlock(v); +#ifdef GC_PROFILE_S + extern unsigned int gc_num_obj; + gc_num_obj++; +#endif return v; } diff --git a/Robust/src/Runtime/multicoretask.c b/Robust/src/Runtime/multicoretask.c index 9162e038..8ca658d7 100644 --- a/Robust/src/Runtime/multicoretask.c +++ b/Robust/src/Runtime/multicoretask.c @@ -301,6 +301,11 @@ void initruntimedata() { (unsigned int)((BAMBOO_SHARED_MEM_SIZE-(gcbaseva-BAMBOO_BASE_VA))*0.8); gcmem_mixed_usedmem = 0; #endif +#ifdef GC_PROFILE_S + gc_num_obj = 0; + gc_num_liveobj = 0; + gc_num_forwardobj = 0; +#endif #else // create the lock table, lockresult table and obj queue locktable.size = 20; @@ -622,8 +627,8 @@ void checkCoreStatus() { BAMBOO_DEBUGPRINT(0xe000 + profilestatus[i]); #endif if(profilestatus[i] != 0) { - allStall = false; - break; + allStall = false; + break; } } // for(i = 0; i < NUMCORESACTIVE; ++i) if(!allStall) { @@ -637,8 +642,8 @@ void checkCoreStatus() { } else { BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(); break; - } // if(!allStall) - } // while(true) + } // if(!allStall) + } // while(true) #endif // gc_profile mode, ourput gc prfiling data @@ -1411,9 +1416,10 @@ void * localmalloc_I(int coren, int isize, int * allocsize) { void * mem = NULL; + int gccorenum = (coren < NUMCORES4GC) ? (coren) : (coren % NUMCORES4GC); int i = 0; int j = 0; - int tofindb = gc_core2block[2*coren+i]+(NUMCORES4GC*2)*j; + int tofindb = gc_core2block[2*gccorenum+i]+(NUMCORES4GC*2)*j; int totest = tofindb; int bound = BAMBOO_SMEM_SIZE_L; int foundsmem = 0; @@ -1456,7 +1462,7 @@ void * localmalloc_I(int coren, i = 0; j++; } - tofindb = totest = gc_core2block[2*coren+i]+(NUMCORES4GC*2)*j; + tofindb = totest = gc_core2block[2*gccorenum+i]+(NUMCORES4GC*2)*j; } else { totest += 1; } // if(islocal) else ... @@ -1601,8 +1607,6 @@ void * mixedmalloc_I(int coren, int j = 0; int k = 0; int gccorenum = (coren < NUMCORES4GC) ? (coren) : (coren % NUMCORES4GC); - int coords_x = bamboo_cpu2coords[gccorenum*2]; - int coords_y = bamboo_cpu2coords[gccorenum*2+1]; int ii = 1; int tofindb = gc_core2block[2*core2test[gccorenum][k]+i]+(NUMCORES4GC*2)*j; int totest = tofindb; diff --git a/Robust/src/buildscript b/Robust/src/buildscript index d81958a6..76d63517 100755 --- a/Robust/src/buildscript +++ b/Robust/src/buildscript @@ -44,7 +44,12 @@ echo "-gcmem_local set the gc shared memory allocation strategy as local (should echo "-gcmem_fixed set the gc shared memory allocation strategy as fixed (should be used together with -multicoregc)" echo "-gcmem_mixed set the gc shared memory allocation strategy as mixed (should be used together with -multicoregc)" echo "-gcmem_global set the gc shared memory allocation strategy as global (should be used together with -multicoregc)" +echo "-gccache_local set the gc shared memory cache strategy as local (should be used together with -multicoregc)" +echo "-gccache_ran set the gc shared memory cache strategy as random (should be used together with -multicoregc)" +echo "-gccontroller_near set the gc shared memory to use the nearest controller for each core (should be used together with -multicoregc)" +echo "-gccontroller_remote set the gc shared memory to use a remote controller for each core (should be used together with -multicoregc)" echo -gcprofile build with gcprofile options +echo -gcprofile_s build with gcprofile_s options echo -accurateprofile build with accurate profile information including pre/post task processing info echo "-useio use standard io to output profiling data (should be used together with -raw and -profile), it only works with single core version" echo @@ -127,6 +132,7 @@ DEBUGFLAG=false RAWPATHFLAG=false PROFILEFLAG=false GCPROFILEFLAG=false +GCPROFILESFLAG=false ACCURATEPROFILEFLAG=false USEIOFLAG=false INTERRUPTFLAG=false @@ -136,6 +142,10 @@ GCMEMLOCALFLAG=false; GCMEMFIXEDFLAG=false; GCMEMMIXEDFLAG=false; GCMEMGLOBALFLAG=false; +GCCACHELOCALFLAG=false; +GCCACHERANFLAG=false; +GCCONTROLLERNEARFLAG=false; +GCCONTROLLERREMOTEFLAG=false; USEDMALLOC=false THREADFLAG=false FASTCHECK=false @@ -296,6 +306,9 @@ EXTRAOPTIONS="$EXTRAOPTIONS -pg" elif [[ $1 = '-gcprofile' ]] then GCPROFILEFLAG=true +elif [[ $1 = '-gcprofile_s' ]] +then +GCPROFILESFLAG=true elif [[ $1 = '-accurateprofile' ]] then ACCURATEPROFILEFLAG=true @@ -381,6 +394,18 @@ GCMEMMIXEDFLAG=true elif [[ $1 = '-gcmem_global' ]] then GCMEMGLOBALFLAG=true +elif [[ $1 = '-gccache_local' ]] +then +GCCACHELOCALFLAG=true +elif [[ $1 = '-gccache_ran' ]] +then +GCCACHERANFLAG=true +elif [[ $1 = '-gccontroller_near' ]] +then +GCCONTROLLERNEARFLAG=true +elif [[ $1 = '-gccontroller_remote' ]] +then +GCCONTROLLERREMOTEFLAG=true elif [[ $1 = '-dmalloc' ]] then USEDMALLOC=true @@ -766,6 +791,11 @@ then # GC_PROFILE version TILERACFLAGS="${TILERACFLAGS} -DGC_PROFILE" fi +if $GCPROFILESFLAG +then # GC_PROFILE_S version +TILERACFLAGS="${TILERACFLAGS} -DGC_PROFILE_S" +fi + if $GCMEMLOCALFLAG then # SMEMLOCAL version TILERACFLAGS="${TILERACFLAGS} -DSMEML" @@ -786,6 +816,26 @@ then # SMEMGLOBAL version TILERACFLAGS="${TILERACFLAGS} -DSMEMG" fi +if $GCCACHELOCALFLAG +then # CACHE_LOCAL version +TILERACFLAGS="${TILERACFLAGS} -DCACHE_LOCAL" +fi + +if $GCCACHERANFLAG +then # CACHE_RAN version +TILERACFLAGS="${TILERACFLAGS} -DCACHE_RAN" +fi + +if $GCCONTROLLERNEARFLAG +then # CONTROLLER_NEAR version +TILERACFLAGS="${TILERACFLAGS} -DCONTROLLER_NEAR" +fi + +if $GCCONTROLLERREMOTEFLAG +then # CONTROLLER_REMOTE version +TILERACFLAGS="${TILERACFLAGS} -DCONTROLLER_REMOTE" +fi + cp $ROBUSTROOT/Tilera/Runtime/$TILERA_INDIR/$MAKEFILE ./Makefile if $TILERABMEFLAG then # TILERABMEFLAG -- 2.34.1