unsigned int block = 0;
BLOCKINDEX(page_sva, &block);
unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
- tprintf("va: %x page_index: %d host: %d\t",(int)page_sva,page_index,coren);
+ printf("%x, %d, %d, ",(int)page_sva,page_index,coren);
for(int i = 0; i < NUMCORESACTIVE; i++) {
int * local_tbl = (int *)((void *)gccachesamplingtbl+size_cachesamplingtbl_local*i);
int freq = local_tbl[page_index];
- if(freq != 0) {
- printf("(%d) %d, ", i, freq);
- }
+ //if(freq != 0) {
+ printf("%d, ", freq);
+ //}
}
printf("\n");
}
void gc_output_cache_sampling_r() {
// TODO summary data
- unsigned int sumdata[4][NUMCORESACTIVE]; // 0 -- single core accessed
- // 1 -- all cores accessed
- // 2 -- less than 5 cores accessed
- // 3 -- multiple cores(5<=n<all) accessed
- memset(sumdata, '0', sizeof(unsigned int)*4*NUMCORESACTIVE);
+ unsigned int sumdata[NUMCORESACTIVE][NUMCORESACTIVE];
+ for(int i = 0; i < NUMCORESACTIVE; i++) {
+ for(int j = 0; j < NUMCORESACTIVE; j++) {
+ sumdata[i][j] = 0;
+ }
+ }
tprintf("cache sampling_r \n");
unsigned int page_index = 0;
VA page_sva = 0;
unsigned int block = 0;
BLOCKINDEX(page_sva, &block);
unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
- tprintf("va: %x page_index: %d host: %d\t",(int)page_sva,page_index,coren);
+ printf(" %x, %d, %d, ",(int)page_sva,page_index,coren);
int accesscore = 0; // TODO
for(int i = 0; i < NUMCORESACTIVE; i++) {
int * local_tbl = (int *)((void *)gccachesamplingtbl_r+size_cachesamplingtbl_local_r*i);
int freq = local_tbl[page_index]/BAMBOO_PAGE_SIZE;
+ printf("%d, ", freq);
if(freq != 0) {
- printf("(%d) %d, ", i, freq);
accesscore++;// TODO
}
}
- if(accesscore==0) {
- } else if(accesscore==1) {
+ if(accesscore!=0) {
for(int i = 0; i < NUMCORESACTIVE; i++) {
int * local_tbl = (int *)((void *)gccachesamplingtbl_r+size_cachesamplingtbl_local_r*i);
int freq = local_tbl[page_index]/BAMBOO_PAGE_SIZE;
- sumdata[0][i]+=freq;
- }
- } else if(accesscore<5) {
- for(int i = 0; i < NUMCORESACTIVE; i++) {
- int * local_tbl = (int *)((void *)gccachesamplingtbl_r+size_cachesamplingtbl_local_r*i);
- int freq = local_tbl[page_index]/BAMBOO_PAGE_SIZE;
- sumdata[2][i]+=freq;
- }
- } else if(accesscore<NUMCORESACTIVE) {
- for(int i = 0; i < NUMCORESACTIVE; i++) {
- int * local_tbl = (int *)((void *)gccachesamplingtbl_r+size_cachesamplingtbl_local_r*i);
- int freq = local_tbl[page_index]/BAMBOO_PAGE_SIZE;
- sumdata[3][i]+=freq;
- }
- } else {
- for(int i = 0; i < NUMCORESACTIVE; i++) {
- int * local_tbl = (int *)((void *)gccachesamplingtbl_r+size_cachesamplingtbl_local_r*i);
- int freq = local_tbl[page_index]/BAMBOO_PAGE_SIZE;
- sumdata[1][i]+=freq;
+ sumdata[accesscore-1][i]+=freq;
}
}
}
// TODO printout the summary data
for(int i = 0; i < NUMCORESACTIVE; i++) {
- tprintf("core %d: %d, %d, %d, %d \n", i, sumdata[0][i], sumdata[2][i], sumdata[3][i], sumdata[1][i]);
+ printf("%d ", i);
+ for(int j = 0; j < NUMCORESACTIVE; j++) {
+ printf(" %d ", sumdata[j][i]);
+ }
+ printf("\n");
}
printf("=================\n");
}
// set the mark field to , indicating that this obj has been moved
// and need to be flushed
((struct ___Object___ *)(gclobjtail2->lobjs[gclobjtailindex2]))->marked=COMPACTED;
+ BAMBOO_CACHE_FLUSH_LINE(gclobjtail2->lobjs[gclobjtailindex2]);
dst -= size;
if((unsigned int)dst<(unsigned int)(gclobjtail2->lobjs[gclobjtailindex2]+size)) {
memmove(dst, gclobjtail2->lobjs[gclobjtailindex2], size);
// predict number of blocks to fill for each core
unsigned int tmpheaptop = 0;
int numpbc = loadbalance(&tmpheaptop);
+ //tprintf("numpbc: %d \n", numpbc);
numpbc = (BAMBOO_SHARED_MEM_SIZE)/(BAMBOO_SMEM_SIZE);
GC_PRINTF("mark phase finished \n");
gccorestatus[i] = 1;
//send start compact messages to all cores
//TODO bug here, do not know if the direction is positive or negtive?
- if (tmpcoreptr < tmpheaptop) {
- gcstopblock[i] = numpbc + 1;
+ //if (tmpcoreptr < tmpheaptop) {
+ gcstopblock[i] = numpbc+1;
if(i != STARTUPCORE) {
send_msg_2(i, GCSTARTCOMPACT, numpbc+1);
} else {
gcblock2fill = numpbc+1;
}
- } else {
+ /*} else {
gcstopblock[i] = numpbc;
if(i != STARTUPCORE) {
send_msg_2(i, GCSTARTCOMPACT, numpbc);
} else {
gcblock2fill = numpbc;
}
- }
+ }*/
}
BAMBOO_CACHE_MF();
GCPROFILE_ITEM();
}
void gc_master(struct garbagelist * stackptr) {
- //tprintf("start GC !!!!!!!!!!!!! \n");
+ tprintf("start GC !!!!!!!!!!!!! \n");
gc_status_info.gcprocessing = true;
gc_status_info.gcphase = INITPHASE;
master_finish();
GC_PRINTF("gc finished \n");
- //tprintf("finish GC ! %d \n",gcflag);
+ tprintf("finish GC ! %d \n",gcflag);
}
void pregccheck() {
struct ArrayObject *ao=(struct ArrayObject *)(origptr);
unsigned int elementsize=classsize[*type];
unsigned int length=ao->___length___;
- size=(unsigned int)sizeof(struct ArrayObject)
- +(unsigned int)(length*elementsize);
+ size=(unsigned int)sizeof(struct ArrayObject)+(unsigned int)(length*elementsize);
}
return size;
}
if(((struct ___Object___ *)origptr)->marked == MARKED) {
unsigned int totop = (unsigned int)to->top;
unsigned int tobound = (unsigned int)to->bound;
+ BAMBOO_ASSERT(totop<=tobound);
GCPROFILE_RECORD_LIVE_OBJ();
// marked obj, copy it to current heap top
// check to see if remaining space is enough
unsigned int tmp_ptr = to->ptr;
#endif
nextBlock(to);
+ if((to->top+isize)>(to->bound)) tprintf("%x, %x, %d, %d, %d, %d \n", to->ptr, orig->ptr, to->top, to->bound, isize, size);
+ BAMBOO_ASSERT((to->top+isize)<=(to->bound));
#ifdef GC_CACHE_ADAPT
CACHEADAPT_COMPLETE_PAGE_CONVERT(orig, to, tmp_ptr, true);
#endif
// already fulfilled the block
return true;
}
- }
+ }
+ BAMBOO_ASSERT((to->top+isize)<=(to->bound));
// set the mark field to 2, indicating that this obj has been moved
// and need to be flushed
((struct ___Object___ *)origptr)->marked = COMPACTED;
to->ptr += isize;
to->offset += isize;
to->top += isize;
+ BAMBOO_ASSERT((to->top)<=(to->bound));
#ifdef GC_CACHE_ADAPT
unsigned int tmp_ptr = to->ptr;
#endif // GC_CACHE_ADAPT
return false;
}
-INLINE bool compacthelper(struct moveHelper * orig,struct moveHelper * to,int * filledblocks,unsigned int * heaptopptr,bool * localcompact) {
+INLINE bool compacthelper(struct moveHelper * orig,struct moveHelper * to,int * filledblocks,unsigned int * heaptopptr,bool * localcompact, bool lbmove) {
+ bool loadbalancemove = lbmove;
// scan over all objs in this block, compact the marked objs
// loop stop when finishing either scanning all active objs or
// fulfilled the gcstopblock
*heaptopptr = to->ptr;
*filledblocks = to->numblocks;
}
+ /*if(loadbalancemove) {
+ // write back to the Main Memory and release any DTLB entry for the
+ // last block as someone else might later write into it
+ // flush the shared heap
+ BAMBOO_CACHE_FLUSH_L2();
+ //loadbalancemove = false;
+ }*/
// send msgs to core coordinator indicating that the compact is finishing
// send compact finish message to core coordinator
if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
gcfilledblocks[BAMBOO_NUM_OF_CORE] = *filledblocks;
gcloads[BAMBOO_NUM_OF_CORE] = *heaptopptr;
+ //tprintf("--finish compact: %d, %d, %d, %x, %x \n", BAMBOO_NUM_OF_CORE, loadbalancemove, *filledblocks, *heaptopptr, gccurr_heaptop);
if((unsigned int)(orig->ptr) < (unsigned int)gcmarkedptrbound) {
// ask for more mem
gctomove = false;
} else {
gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
gctomove = false;
+ // write back to the Main Memory and release any DTLB entry for the
+ // last block as someone else might later write into it
+ // flush the shared heap
+ //BAMBOO_CACHE_FLUSH_L2();
return true;
}
} else {
if((unsigned int)(orig->ptr) < (unsigned int)gcmarkedptrbound) {
// ask for more mem
gctomove = false;
- send_msg_5(STARTUPCORE,GCFINISHCOMPACT,BAMBOO_NUM_OF_CORE,*filledblocks,*heaptopptr,gccurr_heaptop);
+ //tprintf("finish compact: %d, %d, %d, %x, %x \n", BAMBOO_NUM_OF_CORE, loadbalancemove, *filledblocks, *heaptopptr, gccurr_heaptop);
+ send_msg_6(STARTUPCORE,GCFINISHCOMPACT,BAMBOO_NUM_OF_CORE,loadbalancemove,*filledblocks,*heaptopptr,gccurr_heaptop);
} else {
+ //tprintf("++ finish compact: %d, %d, %d, %x, %x \n", BAMBOO_NUM_OF_CORE, loadbalancemove, *filledblocks, *heaptopptr, 0);
// finish compacting
- send_msg_5(STARTUPCORE,GCFINISHCOMPACT,BAMBOO_NUM_OF_CORE,*filledblocks,*heaptopptr, 0);
+ send_msg_6(STARTUPCORE,GCFINISHCOMPACT,BAMBOO_NUM_OF_CORE,loadbalancemove,*filledblocks,*heaptopptr, 0);
+ // write back to the Main Memory and release any DTLB entry for the
+ // last block as someone else might later write into it.
+ // flush the shared heap
+ //BAMBOO_CACHE_FLUSH_L2();
}
- }
+ }
if(orig->ptr < gcmarkedptrbound) {
// still have unpacked obj
while(!gctomove) ;
+ BAMBOO_CACHE_MF();
+ loadbalancemove = true;
+ //tprintf("move start: %x, %d \n", gcmovestartaddr, gcdstcore);
gctomove = false;
-
to->ptr = gcmovestartaddr;
to->numblocks = gcblock2fill - 1;
to->bound = BLOCKBOUND(to->numblocks);
void compact() {
BAMBOO_ASSERT(COMPACTPHASE == gc_status_info.gcphase);
+ BAMBOO_CACHE_MF();
// initialize pointers for comapcting
struct moveHelper * orig = (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
if(!initOrig_Dst(orig, to)) {
// no available data to compact
// send compact finish msg to STARTUP core
- send_msg_5(STARTUPCORE,GCFINISHCOMPACT,BAMBOO_NUM_OF_CORE,0,to->base,0);
+ send_msg_6(STARTUPCORE,GCFINISHCOMPACT,BAMBOO_NUM_OF_CORE,false,0,to->base,0);
RUNFREE(orig);
RUNFREE(to);
} else {
unsigned int filledblocks = 0;
unsigned int heaptopptr = 0;
bool localcompact = true;
- compacthelper(orig, to, &filledblocks, &heaptopptr, &localcompact);
+ compacthelper(orig, to, &filledblocks, &heaptopptr, &localcompact, false);
RUNFREE(orig);
RUNFREE(to);
}
bool finishcompact = false;
bool iscontinue = true;
bool localcompact = true;
+ bool lbmove = false;
while((COMPACTPHASE == gc_status_info.gcphase) || (SUBTLECOMPACTPHASE == gc_status_info.gcphase)) {
if((!finishcompact) && iscontinue) {
- finishcompact = compacthelper(orig,to,&filledblocks,&heaptopptr,&localcompact);
+ finishcompact = compacthelper(orig,to,&filledblocks,&heaptopptr,&localcompact, lbmove);
}
if(gc_checkCoreStatus()) {
}
if(gctomove) {
+ BAMBOO_CACHE_MF();
to->ptr = gcmovestartaddr;
to->numblocks = gcblock2fill - 1;
to->bound = BLOCKBOUND(to->numblocks);
localcompact = (gcdstcore == BAMBOO_NUM_OF_CORE);
gctomove = false;
iscontinue = true;
+ lbmove = true;
} else if(!finishcompact) {
// still pending
iscontinue = false;
+ lbmove = false;
}
}
}
// ptr is a large object and not marked or enqueued
gc_lobjenqueue(ptr, size, BAMBOO_NUM_OF_CORE);
gcnumlobjs++;
+ // mark this obj
+ //((struct ___Object___ *)ptr)->marked = COMPACTED;
} else {
// ptr is an unmarked active object on this core
ALIGNSIZE(size, &isize);
if((unsigned int)(ptr + size) > (unsigned int)gcmarkedptrbound) {
gcmarkedptrbound = (unsigned int)(ptr + size);
}
+ // mark this obj
+ //((struct ___Object___ *)ptr)->marked = MARKED;
}
// mark this obj
((struct ___Object___ *)ptr)->marked = MARKED;
return mem;
}
+// 06/07/11 add a parameter minremain, it specifies the minimum number of
+// blocks to leave for each core for local allocation.
INLINE void * searchBlock4Mem(int* tofindb,
int* totest,
int gccorenum,
int isize,
- int * allocsize) {
+ int * allocsize,
+ int minremain) {
int i=0;
int j=0;
int size = 0;
int bound = BAMBOO_SMEM_SIZE_L;
- while(*totest<(gcnumblock-bamboo_reserved_smem)) {
+ int freeblocks=(gcnumblock-bamboo_reserved_smem-1)/NUMCORES4GC+1;
+ while((*totest<(gcnumblock-bamboo_reserved_smem))&&(freeblocks>minremain)) {
bound = BLOCKSIZE(*totest<NUMCORES4GC);
int nsize = bamboo_smemtbl[*totest];
if((nsize==bound)||((nsize != 0)&&(*totest != *tofindb))) {
j++;
}
*tofindb=*totest=gc_core2block[2*gccorenum+i]+(NUMCORES4GC*2)*j;
+ freeblocks--;
} else {
// an empty block or a partially occupied block that can be set as the
// first block
void * localmalloc_I(int coren,
int isize,
int * allocsize) {
- void * mem = NULL;
- int gccorenum = (coren<NUMCORES4GC)?(coren):(coren%NUMCORES4GC);
- int tofindb = gc_core2block[2*gccorenum];
- int totest = tofindb;
- mem = searchBlock4Mem(&tofindb, &totest, gccorenum, isize, allocsize);
- if(mem == NULL) {
+ void * mem=NULL;
+ int gccorenum=(coren<NUMCORES4GC)?(coren):(coren%NUMCORES4GC);
+ int tofindb=gc_core2block[2*gccorenum];
+ int totest=tofindb;
+ mem=searchBlock4Mem(&tofindb,&totest,gccorenum,isize,allocsize,0);
+ if(mem==NULL) {
// no more local mem, do not find suitable block
- *allocsize = 0;
+ *allocsize=0;
}
return mem;
}
+#define LOCALMEMRESERVATION 2
+
#ifdef SMEMF
// Allocate the local shared memory to each core with the highest priority,
// if a core has used up its local shared memory, try to allocate the
int * allocsize) {
void * mem;
int k;
- int gccorenum = (coren<NUMCORES4GC)?(coren):(coren%NUMCORES4GC);
- int totest, tofindb;
- int bound = BAMBOO_SMEM_SIZE_L;
- int foundsmem = 0;
- int size = 0;
- for(k=0; k<NUM_CORES2TEST; k++) {
- if(core2test[gccorenum][k] == -1) {
+ int gccorenum=(coren<NUMCORES4GC)?(coren):(coren%NUMCORES4GC);
+ int totest,tofindb;
+ int bound=BAMBOO_SMEM_SIZE_L;
+ int foundsmem=0;
+ int size=0;
+ for(k=0;k<NUM_CORES2TEST;k++) {
+ if(core2test[gccorenum][k]==-1) {
// try next neighbour
continue;
}
tofindb=totest=gc_core2block[2*core2test[gccorenum][k]];
- mem=searchBlock4Mem(&tofindb,&totest,core2test[gccorenum][k],
- isize,allocsize);
- if(mem != NULL) {
+ mem=searchBlock4Mem(&tofindb,&totest,core2test[gccorenum][k],isize,allocsize,(k==0)?0:((gcnumblock/NUMCORES4GC)>>LOCALMEMRESERVATION));
+ if(mem!=NULL) {
return mem;
}
}
// no more memory available on either coren or its neighbour cores
- *allocsize = 0;
+ *allocsize=0;
return NULL;
}
#endif
int * allocsize) {
void * mem;
int k;
- int gccorenum = (coren < NUMCORES4GC) ? (coren) : (coren % NUMCORES4GC);
+ int gccorenum=(coren<NUMCORES4GC)?(coren):(coren%NUMCORES4GC);
int totest,tofindb;
- int bound = BAMBOO_SMEM_SIZE_L;
- int foundsmem = 0;
- int size = 0;
- for(k=0; k<NUM_CORES2TEST; k++) {
- if(core2test[gccorenum][k] == -1) {
+ int bound=BAMBOO_SMEM_SIZE_L;
+ int foundsmem=0;
+ int size=0;
+ for(k=0;k<NUM_CORES2TEST;k++) {
+ if(core2test[gccorenum][k]==-1) {
// try next neighbour
continue;
}
tofindb=totest=gc_core2block[2*core2test[gccorenum][k]];
- mem=searchBlock4Mem(&tofindb,&totest,core2test[gccorenum][k],
- isize,allocsize);
- if(mem != NULL) {
- gcmem_mixed_usedmem += size;
+ mem=searchBlock4Mem(&tofindb,&totest,core2test[gccorenum][k],isize,allocsize,(k==0)?0:((gcnumblock/NUMCORES4GC)>>LOCALMEMRESERVATION));
+ if(mem!=NULL) {
+ gcmem_mixed_usedmem+=size;
return mem;
}
}
- if(gcmem_mixed_usedmem >= gcmem_mixed_threshold) {
+ if(gcmem_mixed_usedmem>=gcmem_mixed_threshold) {
// no more memory available on either coren or its neighbour cores
*allocsize = 0;
return NULL;
} else {
// try allocate globally
- mem = globalmalloc_I(coren, isize, allocsize);
- if(mem != NULL) {
- gcmem_mixed_usedmem += size;
+ mem=globalmalloc_I(coren,isize,allocsize);
+ if(mem!=NULL) {
+ gcmem_mixed_usedmem+=size;
}
return mem;
}
}
return NULL;
}
+ /*if(coren == hostcore(mem)) {
+ tprintf("Allocate host mem: %d, %d, %d \n", coren, hostcore(mem), mem);
+ } else {
+ tprintf("---Allocate non-host mem: %d, %d, %d \n", coren, hostcore(mem), mem);
+ }*/
return mem;
}
#else
4, //GCFINISHPRE, // 0xE7
2, //GCFINISHINIT, // 0xE8
4, //GCFINISHMARK, // 0xE9
- 5, //GCFINISHCOMPACT, // 0xEa
+ 6, //GCFINISHCOMPACT, // 0xEa
2, //GCFINISHFLUSH, // 0xEb
1, //GCFINISH, // 0xEc
1, //GCMARKCONFIRM, // 0xEd
BAMBOO_ASSERT(BAMBOO_NUM_OF_CORE == STARTUPCORE);
int cnum = msgdata[msgdataindex];
- MSG_INDEXINC_I();
+ MSG_INDEXINC_I();
+ bool loadbalancemove = msgdata[msgdataindex];
+ MSG_INDEXINC_I();
int filledblocks = msgdata[msgdataindex];
MSG_INDEXINC_I();
int heaptop = msgdata[msgdataindex];
MSG_INDEXINC_I();
// only gc cores need to do compact
if(cnum < NUMCORES4GC) {
- if(COMPACTPHASE == gc_status_info.gcphase) {
+ if(!loadbalancemove && (COMPACTPHASE == gc_status_info.gcphase)) {
gcfilledblocks[cnum] = filledblocks;
gcloads[cnum] = heaptop;
}
}
#endif
+#ifdef D___System______exit____I
void CALL11(___System______exit____I,
int ___status___,
int ___status___) {
#endif
BAMBOO_EXIT_APP(___status___);
}
+#endif
#ifdef D___Vector______removeElement_____AR_L___Object____I_I
void CALL23(___Vector______removeElement_____AR_L___Object____I_I,
}
#endif
+#ifdef D___System______printI____I
void CALL11(___System______printI____I,
int ___status___,
int ___status___) {
BAMBOO_PRINT(0x1111);
BAMBOO_PRINT_REG(___status___);
}
+#endif
+#ifdef D___System______currentTimeMillis____
long long CALL00(___System______currentTimeMillis____) {
//TilePro64 is 700mHz
return ((unsigned long long)BAMBOO_GET_EXE_TIME())/700000;
}
+#endif
void CALL00(___System______setgcprofileflag____) {
#ifdef GC_PROFILE
#endif
}
+#ifdef D___System______printString____L___String___
void CALL01(___System______printString____L___String___,
struct ___String___ * ___s___) {
#ifdef MGC
#endif // TILERA_BME
#endif // MGC
}
+#endif
/* Object allocation function */
void failedboundschk(int num) {
#ifndef TASK
- printf("Array out of bounds\n");
+ printf("Array out of bounds, %d \n", num);
#ifdef THREADS
threadexit();
#elif defined MGC
cp ../Runtime/Queue.h ./
cp ../Runtime/runtime.h ./
cp ../Runtime/SimpleHash.h ./
+cp ../Runtime/objtypes.h ./
cp $BAMBOORUNTIME/*.c ./
cp $BAMBOORUNTIME/*.h ./
cp ../Tilera/Runtime/*.c ./