1 // TODO: DO NOT support tag!!!
4 #include "multicoregarbage.h"
5 #include "multicoregcmark.h"
7 #include "multicoregccompact.h"
8 #include "multicoregcflush.h"
9 #include "multicoreruntime.h"
10 #include "multicoregcprofile.h"
14 extern unsigned int gcmem_mixed_threshold;
15 extern unsigned int gcmem_mixed_usedmem;
19 // dump whole mem in blocks
28 printf("(%x,%x) Dump shared mem: \n",udn_tile_coord_x(),udn_tile_coord_y());
29 // reserved blocks for sblocktbl
30 printf("(%x,%x) ++++ reserved sblocks ++++ \n", udn_tile_coord_x(),
32 for(i=BAMBOO_BASE_VA; (unsinged int)i<(unsigned int)gcbaseva; i+= 4*16) {
33 printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
34 udn_tile_coord_x(), udn_tile_coord_y(),
35 *((int *)(i)), *((int *)(i + 4)),
36 *((int *)(i + 4*2)), *((int *)(i + 4*3)),
37 *((int *)(i + 4*4)), *((int *)(i + 4*5)),
38 *((int *)(i + 4*6)), *((int *)(i + 4*7)),
39 *((int *)(i + 4*8)), *((int *)(i + 4*9)),
40 *((int *)(i + 4*10)), *((int *)(i + 4*11)),
41 *((int *)(i + 4*12)), *((int *)(i + 4*13)),
42 *((int *)(i + 4*14)), *((int *)(i + 4*15)));
44 sblock = gcreservedsb;
45 bool advanceblock = false;
47 for(i=gcbaseva; (unsigned int)i<(unsigned int)(gcbaseva+BAMBOO_SHARED_MEM_SIZE); i+=4*16) {
49 // computing sblock # and block #, core coordinate (x,y) also
50 if(j%((BAMBOO_SMEM_SIZE)/(4*16)) == 0) {
52 if(j < ((BAMBOO_LARGE_SMEM_BOUND)/(4*16))) {
53 if((j > 0) && (j%((BAMBOO_SMEM_SIZE_L)/(4*16)) == 0)) {
65 coren = gc_block2core[block%(NUMCORES4GC*2)];
67 // compute core coordinate
68 x = BAMBOO_COORDS_X(coren);
69 y = BAMBOO_COORDS_Y(coren);
70 printf("(%x,%x) ==== %d, %d : core (%d,%d), saddr %x====\n",
71 udn_tile_coord_x(), udn_tile_coord_y(),block, sblock++, x, y,
72 (sblock-1)*(BAMBOO_SMEM_SIZE)+gcbaseva);
75 printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
76 udn_tile_coord_x(), udn_tile_coord_y(),
77 *((int *)(i)), *((int *)(i + 4)),
78 *((int *)(i + 4*2)), *((int *)(i + 4*3)),
79 *((int *)(i + 4*4)), *((int *)(i + 4*5)),
80 *((int *)(i + 4*6)), *((int *)(i + 4*7)),
81 *((int *)(i + 4*8)), *((int *)(i + 4*9)),
82 *((int *)(i + 4*10)), *((int *)(i + 4*11)),
83 *((int *)(i + 4*12)), *((int *)(i + 4*13)),
84 *((int *)(i + 4*14)), *((int *)(i + 4*15)));
86 printf("(%x,%x) \n", udn_tile_coord_x(), udn_tile_coord_y());
90 void initmulticoregcdata() {
91 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
92 // startup core to initialize corestatus[]
93 for(int i = 0; i < NUMCORESACTIVE; i++) {
95 gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
96 gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
98 for(int i = 0; i < NUMCORES4GC; i++) {
100 gcrequiredmems[i] = 0;
102 gcfilledblocks[i] = 0;
106 bamboo_smem_zero_top = NULL;
108 gcprocessing = false;
109 gcphase = FINISHPHASE;
112 gcself_numsendobjs = 0;
113 gcself_numreceiveobjs = 0;
114 gcmarkedptrbound = 0;
115 gcforwardobjtbl = allocateMGCHash_I(20, 3);
125 gcmem_mixed_threshold = (unsigned int)((BAMBOO_SHARED_MEM_SIZE
126 -bamboo_reserved_smem*BAMBOO_SMEM_SIZE)*0.8);
127 gcmem_mixed_usedmem = 0;
130 gc_profile_flag = false;
133 gc_num_flush_dtlb = 0;
135 gc_localheap_s = false;
136 #ifdef GC_CACHE_ADAPT
137 gccachestage = false;
140 INIT_MULTICORE_GCPROFILE_DATA();
143 void dismulticoregcdata() {
144 freeMGCHash(gcforwardobjtbl);
148 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
149 for(int i = 0; i < NUMCORES4GC; i++) {
151 gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
152 gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
154 gcrequiredmems[i] = 0;
155 gcfilledblocks[i] = 0;
158 for(int i = NUMCORES4GC; i < NUMCORESACTIVE; i++) {
160 gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
161 gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
166 gcnumsrobjs_index = 0;
168 gcself_numsendobjs = 0;
169 gcself_numreceiveobjs = 0;
170 gcmarkedptrbound = 0;
181 freeMGCHash(gcforwardobjtbl);
182 gcforwardobjtbl = allocateMGCHash(20, 3);
187 bool gc_checkAllCoreStatus() {
188 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
189 for(int i = 0; i < NUMCORESACTIVE; i++) {
190 if(gccorestatus[i] != 0) {
191 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
195 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
199 // NOTE: should be invoked with interrupts turned off
200 bool gc_checkAllCoreStatus_I() {
201 for(int i = 0; i < NUMCORESACTIVE; i++) {
202 if(gccorestatus[i] != 0) {
209 INLINE void checkMarkStatus_p2() {
210 // check if the sum of send objs and receive obj are the same
211 // yes->check if the info is the latest; no->go on executing
212 unsigned int sumsendobj = 0;
213 for(int i = 0; i < NUMCORESACTIVE; i++) {
214 sumsendobj += gcnumsendobjs[gcnumsrobjs_index][i];
216 for(int i = 0; i < NUMCORESACTIVE; i++) {
217 sumsendobj -= gcnumreceiveobjs[gcnumsrobjs_index][i];
219 if(0 == sumsendobj) {
220 // Check if there are changes of the numsendobjs or numreceiveobjs
223 for(i = 0; i < NUMCORESACTIVE; i++) {
224 if((gcnumsendobjs[0][i]!=gcnumsendobjs[1][i])||(gcnumreceiveobjs[0][i]!=gcnumreceiveobjs[1][i]) ) {
228 if(i == NUMCORESACTIVE) {
229 // all the core status info are the latest,stop mark phase
230 gcphase = COMPACTPHASE;
231 // restore the gcstatus for all cores
232 for(int i = 0; i < NUMCORESACTIVE; i++) {
236 // There were changes between phase 1 and phase 2, can not decide
237 // whether the mark phase has been finished
239 // As it fails in phase 2, flip the entries
240 gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
243 // There were changes between phase 1 and phase 2, can not decide
244 // whether the mark phase has been finished
246 // As it fails in phase 2, flip the entries
247 gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
251 INLINE void checkMarkStatus() {
252 if((!waitconfirm)||(waitconfirm && (numconfirm == 0))) {
253 unsigned int entry_index = 0;
256 entry_index = (gcnumsrobjs_index == 0) ? 1 : 0;
259 entry_index = gcnumsrobjs_index;
261 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
262 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
263 gcnumsendobjs[entry_index][BAMBOO_NUM_OF_CORE] = gcself_numsendobjs;
264 gcnumreceiveobjs[entry_index][BAMBOO_NUM_OF_CORE] = gcself_numreceiveobjs;
265 // check the status of all cores
266 if (gc_checkAllCoreStatus_I()) {
269 // the first time found all cores stall
270 // send out status confirm msg to all other cores
271 // reset the corestatus array too
273 numconfirm = NUMCORESACTIVE - 1;
274 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
275 GC_SEND_MSG_1_TO_CLIENT(GCMARKCONFIRM);
278 checkMarkStatus_p2();
279 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
282 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
287 // compute load balance for all cores
288 INLINE int loadbalance(unsigned int * heaptop) {
289 // compute load balance
290 // get the total loads
291 unsigned int tloads = gcloads[STARTUPCORE];
292 for(int i = 1; i < NUMCORES4GC; i++) {
293 tloads += gcloads[i];
295 *heaptop = gcbaseva + tloads;
298 BLOCKINDEX(*heaptop, &b);
299 // num of blocks per core
300 unsigned int numbpc = (unsigned int)b/(unsigned int)(NUMCORES4GC);
302 RESIDECORE(heaptop, &gctopcore);
306 // compute total mem size required and sort the lobjs in ascending order
307 INLINE unsigned int sortLObjs() {
308 unsigned int tmp_lobj = 0;
309 unsigned int tmp_len = 0;
310 unsigned int tmp_host = 0;
311 unsigned int sumsize = 0;
313 gclobjtail2 = gclobjtail;
314 gclobjtailindex2 = gclobjtailindex;
315 // TODO USE QUICK SORT INSTEAD?
316 while(gc_lobjmoreItems2_I()) {
318 tmp_lobj = gclobjtail2->lobjs[gclobjtailindex2-1];
319 tmp_host = gclobjtail2->hosts[gclobjtailindex2-1];
320 tmp_len = gclobjtail2->lengths[gclobjtailindex2 - 1];
322 GCPROFILE_RECORD_LOBJ();
323 unsigned int i = gclobjtailindex2-1;
324 struct lobjpointerblock * tmp_block = gclobjtail2;
325 // find the place to insert
328 if(tmp_block->prev == NULL) {
331 if(tmp_block->prev->lobjs[NUMLOBJPTRS-1] > tmp_lobj) {
332 tmp_block->lobjs[i] = tmp_block->prev->lobjs[NUMLOBJPTRS-1];
333 tmp_block->lengths[i] = tmp_block->prev->lengths[NUMLOBJPTRS-1];
334 tmp_block->hosts[i] = tmp_block->prev->hosts[NUMLOBJPTRS-1];
335 tmp_block = tmp_block->prev;
339 } // if(tmp_block->prev->lobjs[NUMLOBJPTRS-1] < tmp_lobj)
341 if(tmp_block->lobjs[i-1] > tmp_lobj) {
342 tmp_block->lobjs[i] = tmp_block->lobjs[i-1];
343 tmp_block->lengths[i] = tmp_block->lengths[i-1];
344 tmp_block->hosts[i] = tmp_block->hosts[i-1];
352 if(i != gclobjtailindex2 - 1) {
353 tmp_block->lobjs[i] = tmp_lobj;
354 tmp_block->lengths[i] = tmp_len;
355 tmp_block->hosts[i] = tmp_host;
361 INLINE bool cacheLObjs() {
362 // check the total mem size need for large objs
363 unsigned long long sumsize = 0;
364 unsigned int size = 0;
366 sumsize = sortLObjs();
368 GCPROFILE_RECORD_LOBJSPACE();
370 // check if there are enough space to cache these large objs
371 unsigned int dst = gcbaseva + (BAMBOO_SHARED_MEM_SIZE) -sumsize;
372 if((unsigned long long)gcheaptop > (unsigned long long)dst) {
373 // do not have enough room to cache large objs
377 gcheaptop = dst; // Note: record the start of cached lobjs with gcheaptop
378 // cache the largeObjs to the top of the shared heap
379 dst = gcbaseva + (BAMBOO_SHARED_MEM_SIZE);
380 while(gc_lobjmoreItems3_I()) {
382 size = gclobjtail2->lengths[gclobjtailindex2];
383 // set the mark field to , indicating that this obj has been moved
384 // and need to be flushed
385 ((struct ___Object___ *)(gclobjtail2->lobjs[gclobjtailindex2]))->marked=COMPACTED;
387 if((unsigned int)dst<(unsigned int)(gclobjtail2->lobjs[gclobjtailindex2]+size)) {
388 memmove(dst, gclobjtail2->lobjs[gclobjtailindex2], size);
390 memcpy(dst, gclobjtail2->lobjs[gclobjtailindex2], size);
396 // update the bmmboo_smemtbl to record current shared mem usage
397 void updateSmemTbl(unsigned int coren, unsigned int localtop) {
398 unsigned int ltopcore = 0;
399 unsigned int bound = BAMBOO_SMEM_SIZE_L;
400 BLOCKINDEX(localtop, <opcore);
401 if((unsigned int)localtop>=(unsigned int)(gcbaseva+BAMBOO_LARGE_SMEM_BOUND)){
402 bound = BAMBOO_SMEM_SIZE;
404 unsigned int load = (unsigned int)(localtop-gcbaseva)%(unsigned int)bound;
405 unsigned int toset = 0;
406 for(int j=0; 1; j++) {
407 for(int i=0; i<2; i++) {
408 toset = gc_core2block[2*coren+i]+(unsigned int)(NUMCORES4GC*2)*j;
409 if(toset < ltopcore) {
410 bamboo_smemtbl[toset]=BLOCKSIZE(toset<NUMCORES4GC);
412 gcmem_mixed_usedmem += bamboo_smemtbl[toset];
414 } else if(toset == ltopcore) {
415 bamboo_smemtbl[toset] = load;
417 gcmem_mixed_usedmem += bamboo_smemtbl[toset];
427 INLINE unsigned int checkCurrHeapTop() {
428 // update the smemtbl
429 BAMBOO_MEMSET_WH(bamboo_smemtbl, 0, sizeof(int)*gcnumblock);
430 // flush all gcloads to indicate the real heap top on one core
431 // previous it represents the next available ptr on a core
432 if(((unsigned int)gcloads[0]>(unsigned int)(gcbaseva+BAMBOO_SMEM_SIZE_L))&&(((unsigned int)gcloads[0]%(BAMBOO_SMEM_SIZE)) == 0)) {
433 // edge of a block, check if this is exactly the heaptop
434 BASEPTR(0, gcfilledblocks[0]-1, &(gcloads[0]));
435 gcloads[0]+=BLOCKSIZE(gcfilledblocks[0]<=1);
437 updateSmemTbl(0, gcloads[0]);
438 for(int i = 1; i < NUMCORES4GC; i++) {
439 unsigned int tmptop = 0;
440 if((gcfilledblocks[i] > 0)&&(((unsigned int)gcloads[i]%(BAMBOO_SMEM_SIZE)) == 0)) {
441 // edge of a block, check if this is exactly the heaptop
442 BASEPTR(i, gcfilledblocks[i]-1, &gcloads[i]);
443 gcloads[i]+=BLOCKSIZE(gcfilledblocks[i]<=1);
446 updateSmemTbl(i, gcloads[i]);
449 // find current heap top
451 // a bug here: when using local allocation, directly move large objects
452 // to the highest free chunk might not be memory efficient
453 unsigned int tmpheaptop = 0;
454 for(int i = gcnumblock-1; i >= 0; i--) {
455 if(bamboo_smemtbl[i] > 0) {
456 return gcbaseva+bamboo_smemtbl[i]+OFFSET2BASEVA(i);
462 INLINE void movelobj(unsigned int tmpheaptop,unsigned int ptr,int size,int isize) {
463 // move the large obj
464 if((unsigned int)gcheaptop < (unsigned int)(tmpheaptop+size)) {
465 memmove(tmpheaptop, gcheaptop, size);
467 memcpy(tmpheaptop, gcheaptop, size);
469 // fill the remaining space with -2 padding
470 BAMBOO_MEMSET_WH(tmpheaptop+size, -2, isize-size);
472 // cache the mapping info
473 gcmappingtbl[OBJMAPPINGINDEX((unsigned int)ptr)]=(unsigned int)tmpheaptop;
477 INLINE void moveLObjs() {
479 // update the gcmem_mixed_usedmem
480 gcmem_mixed_usedmem = 0;
482 unsigned int size = 0;
483 unsigned int bound = 0;
484 unsigned int tmpheaptop = checkCurrHeapTop();
486 // move large objs from gcheaptop to tmpheaptop
487 // write the header first
488 unsigned int tomove = gcbaseva+(BAMBOO_SHARED_MEM_SIZE)-gcheaptop;
490 gcmem_mixed_usedmem += tomove;
492 // flush the sbstartbl
493 BAMBOO_MEMSET_WH(&(gcsbstarttbl[gcreservedsb]),'\0',(BAMBOO_SHARED_MEM_SIZE/BAMBOO_SMEM_SIZE-(unsigned int)gcreservedsb)*sizeof(unsigned int));
495 gcheaptop = tmpheaptop;
497 // check how many blocks it acrosses
498 unsigned int remain = tmpheaptop-gcbaseva;
499 //number of the sblock
500 unsigned int sb = remain/BAMBOO_SMEM_SIZE+(unsigned int)gcreservedsb;
501 unsigned int b = 0; // number of the block
502 BLOCKINDEX(tmpheaptop, &b);
503 // check the remaining space in this block
504 bound = (BAMBOO_SMEM_SIZE);
505 if(remain < (BAMBOO_LARGE_SMEM_BOUND)) {
506 bound = (BAMBOO_SMEM_SIZE_L);
508 remain = bound - remain%bound;
511 unsigned int isize = 0;
512 unsigned int host = 0;
513 unsigned int ptr = 0;
514 unsigned int base = tmpheaptop;
515 unsigned int cpysize = 0;
516 remain -= BAMBOO_CACHE_LINE_SIZE;
517 tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
518 gc_lobjqueueinit4_I();
519 while(gc_lobjmoreItems4_I()) {
520 ptr = (unsigned int)(gc_lobjdequeue4_I(&size, &host));
521 ALIGNSIZE(size, &isize);
522 if(remain >= isize) {
524 // move the large obj
525 movelobj(tmpheaptop,ptr,size,isize);
527 // update bamboo_smemtbl
528 bamboo_smemtbl[b] += isize;
530 // this object acrosses blocks
532 CLOSEBLOCK(base, cpysize+BAMBOO_CACHE_LINE_SIZE);
533 bamboo_smemtbl[b] += BAMBOO_CACHE_LINE_SIZE;
537 remain = BLOCKSIZE((tmpheaptop-gcbaseva)<(BAMBOO_LARGE_SMEM_BOUND));
539 remain -= BAMBOO_CACHE_LINE_SIZE;
540 tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
541 BLOCKINDEX(tmpheaptop, &b);
542 sb = (unsigned int)(tmpheaptop-gcbaseva)/(BAMBOO_SMEM_SIZE)+gcreservedsb;
545 movelobj(tmpheaptop,ptr,size,isize);
547 // set the gcsbstarttbl and bamboo_smemtbl
548 unsigned int tmpsbs=1+(unsigned int)(isize-remain-1)/BAMBOO_SMEM_SIZE;
549 for(int k = 1; k < tmpsbs; k++) {
550 gcsbstarttbl[sb+k] = -1;
553 bound = BLOCKSIZE(b<NUMCORES4GC);
554 BLOCKINDEX(tmpheaptop-1, &tmpsbs);
555 for(; b < tmpsbs; b++) {
556 bamboo_smemtbl[b] = bound;
557 if(b==NUMCORES4GC-1) {
558 bound = BAMBOO_SMEM_SIZE;
561 if(((unsigned int)(isize-remain)%(BAMBOO_SMEM_SIZE)) == 0) {
562 gcsbstarttbl[sb] = -1;
563 remain = BLOCKSIZE((tmpheaptop-gcbaseva)<(BAMBOO_LARGE_SMEM_BOUND));
564 bamboo_smemtbl[b] = bound;
566 gcsbstarttbl[sb] = (int)tmpheaptop;
567 remain = tmpheaptop-gcbaseva;
568 bamboo_smemtbl[b] = remain%bound;
569 remain = bound - bamboo_smemtbl[b];
572 CLOSEBLOCK(base, isize+BAMBOO_CACHE_LINE_SIZE);
575 if(remain == BAMBOO_CACHE_LINE_SIZE) {
576 // fill with 0 in case
577 BAMBOO_MEMSET_WH(tmpheaptop, '\0', remain);
579 remain -= BAMBOO_CACHE_LINE_SIZE;
580 tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
585 CLOSEBLOCK(base, cpysize+BAMBOO_CACHE_LINE_SIZE);
586 bamboo_smemtbl[b] += BAMBOO_CACHE_LINE_SIZE;
588 tmpheaptop -= BAMBOO_CACHE_LINE_SIZE;
590 gcheaptop = tmpheaptop;
593 bamboo_free_block = 0;
594 unsigned int tbound = 0;
596 tbound=BLOCKSIZE(bamboo_free_block<NUMCORES4GC);
597 if(bamboo_smemtbl[bamboo_free_block] == tbound) {
600 // the first non-full partition
605 GCPROFILE_RECORD_SPACE();
608 void gc_collect(struct garbagelist * stackptr) {
610 // inform the master that this core is at a gc safe point and is ready to
612 send_msg_4(STARTUPCORE,GCFINISHPRE,BAMBOO_NUM_OF_CORE,self_numsendobjs,self_numreceiveobjs);
614 // core collector routine
615 //wait for init phase
616 WAITFORGCPHASE(INITPHASE);
618 GC_PRINTF("Do initGC\n");
621 //send init finish msg to core coordinator
622 send_msg_2(STARTUPCORE,GCFINISHINIT,BAMBOO_NUM_OF_CORE);
624 //wait for mark phase
625 WAITFORGCPHASE(MARKPHASE);
627 GC_PRINTF("Start mark phase\n");
628 mark(true, stackptr);
629 GC_PRINTF("Finish mark phase, start compact phase\n");
631 GC_PRINTF("Finish compact phase\n");
633 WAITFORGCPHASE(FLUSHPHASE);
635 GC_PRINTF("Start flush phase\n");
636 GCPROFILE_INFO_2_MASTER();
638 GC_PRINTF("Finish flush phase\n");
640 CACHEADAPT_PHASE_CLIENT();
642 // invalidate all shared mem pointers
643 bamboo_cur_msp = NULL;
644 bamboo_smem_size = 0;
645 bamboo_smem_zero_top = NULL;
648 WAITFORGCPHASE(FINISHPHASE);
650 GC_PRINTF("Finish gc! \n");
653 void gc_nocollect(struct garbagelist * stackptr) {
655 // inform the master that this core is at a gc safe point and is ready to
657 send_msg_4(STARTUPCORE,GCFINISHPRE,BAMBOO_NUM_OF_CORE,self_numsendobjs,self_numreceiveobjs);
659 WAITFORGCPHASE(INITPHASE);
661 GC_PRINTF("Do initGC\n");
664 //send init finish msg to core coordinator
665 send_msg_2(STARTUPCORE,GCFINISHINIT,BAMBOO_NUM_OF_CORE);
667 WAITFORGCPHASE(MARKPHASE);
669 GC_PRINTF("Start mark phase\n");
670 mark(true, stackptr);
671 GC_PRINTF("Finish mark phase, wait for flush\n");
673 // non-gc core collector routine
674 WAITFORGCPHASE(FLUSHPHASE);
676 GC_PRINTF("Start flush phase\n");
677 GCPROFILE_INFO_2_MASTER();
679 GC_PRINTF("Finish flush phase\n");
681 CACHEADAPT_PHASE_CLIENT();
683 // invalidate all shared mem pointers
684 bamboo_cur_msp = NULL;
685 bamboo_smem_size = 0;
686 bamboo_smem_zero_top = NULL;
689 WAITFORGCPHASE(FINISHPHASE);
691 GC_PRINTF("Finish gc! \n");
694 void master_mark(struct garbagelist *stackptr) {
697 GC_PRINTF("Start mark phase \n");
698 GC_SEND_MSG_1_TO_CLIENT(GCSTART);
702 while(MARKPHASE == gcphase) {
703 mark(isfirst, stackptr);
710 void master_getlargeobjs() {
711 // send msgs to all cores requiring large objs info
712 // Note: only need to ask gc cores, non-gc cores do not host any objs
713 numconfirm = NUMCORES4GC - 1;
714 for(int i = 1; i < NUMCORES4GC; i++) {
715 send_msg_1(i,GCLOBJREQUEST);
717 gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
718 //spin until we have all responses
719 while(numconfirm!=0) ;
722 if(gcheaptop < gcmarkedptrbound) {
723 gcheaptop = gcmarkedptrbound;
726 GC_PRINTF("prepare to cache large objs \n");
728 // cache all large objs
729 BAMBOO_ASSERTMSG(cacheLObjs(), "Not enough space to cache large objects\n");
732 void master_compact() {
733 // predict number of blocks to fill for each core
734 unsigned int tmpheaptop = 0;
735 int numpbc = loadbalance(&tmpheaptop);
737 numpbc = (BAMBOO_SHARED_MEM_SIZE)/(BAMBOO_SMEM_SIZE);
738 GC_PRINTF("mark phase finished \n");
740 tmpheaptop = gcbaseva + (BAMBOO_SHARED_MEM_SIZE);
741 for(int i = 0; i < NUMCORES4GC; i++) {
742 unsigned int tmpcoreptr = 0;
743 BASEPTR(i, numpbc, &tmpcoreptr);
744 // init some data strutures for compact phase
746 gcfilledblocks[i] = 0;
747 gcrequiredmems[i] = 0;
749 //send start compact messages to all cores
750 //TODO bug here, do not know if the direction is positive or negtive?
751 if (tmpcoreptr < tmpheaptop) {
752 gcstopblock[i] = numpbc + 1;
753 if(i != STARTUPCORE) {
754 send_msg_2(i, GCSTARTCOMPACT, numpbc+1);
756 gcblock2fill = numpbc+1;
759 gcstopblock[i] = numpbc;
760 if(i != STARTUPCORE) {
761 send_msg_2(i, GCSTARTCOMPACT, numpbc);
763 gcblock2fill = numpbc;
770 struct moveHelper * orig = (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
771 struct moveHelper * to = (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
772 compact_master(orig, to);
774 GC_PRINTF("prepare to move large objs \n");
777 GC_PRINTF("compact phase finished \n");
782 void master_updaterefs(struct garbagelist * stackptr) {
783 gcphase = FLUSHPHASE;
784 GC_SEND_MSG_1_TO_CLIENT(GCSTARTFLUSH);
786 GC_PRINTF("Start flush phase \n");
789 // now the master core need to decide the new cache strategy
791 GC_CHECK_ALL_CORE_STATUS(FLUSHPHASE==gcphase);
792 GC_PRINTF("Finish flush phase \n");
795 void master_finish() {
796 gcphase = FINISHPHASE;
798 // invalidate all shared mem pointers
799 // put it here as it takes time to inform all the other cores to
800 // finish gc and it might cause problem when some core resumes
801 // mutator earlier than the other cores
802 bamboo_cur_msp = NULL;
803 bamboo_smem_size = 0;
804 bamboo_smem_zero_top = NULL;
808 GC_SEND_MSG_1_TO_CLIENT(GCFINISH);
810 gcprocessing = false;
812 // inform other cores to stop and wait for gc
814 for(int i = 0; i < NUMCORESACTIVE; i++) {
815 // reuse the gcnumsendobjs & gcnumreceiveobjs
816 gcnumsendobjs[0][i] = 0;
817 gcnumreceiveobjs[0][i] = 0;
819 GC_SEND_MSG_1_TO_CLIENT(GCSTARTPRE);
823 void gc_master(struct garbagelist * stackptr) {
824 tprintf("start GC !!!!!!!!!!!!! \n");
831 GC_SEND_MSG_1_TO_CLIENT(GCSTARTINIT);
833 GC_PRINTF("Check core status \n");
834 GC_CHECK_ALL_CORE_STATUS(true);
836 CACHEADAPT_OUTPUT_CACHE_SAMPLING();
839 master_mark(stackptr);
841 // get large objects from all cores
842 master_getlargeobjs();
847 // update the references
848 master_updaterefs(stackptr);
850 // do cache adaptation
851 CACHEADAPT_PHASE_MASTER();
853 // do finish up stuff
856 GC_PRINTF("gc finished \n");
857 tprintf("finish GC ! %d \n", gcflag);
862 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
863 gcnumsendobjs[0][BAMBOO_NUM_OF_CORE] = self_numsendobjs;
864 gcnumreceiveobjs[0][BAMBOO_NUM_OF_CORE] = self_numreceiveobjs;
866 for(int i = 0; i < NUMCORESACTIVE; i++) {
867 sumsendobj += gcnumsendobjs[0][i];
869 for(int i = 0; i < NUMCORESACTIVE; i++) {
870 sumsendobj -= gcnumreceiveobjs[0][i];
872 if(0 != sumsendobj) {
873 // there were still some msgs on the fly, wait until there
874 // are some update pregc information coming and check it again
876 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
880 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
886 void pregcprocessing() {
887 #if defined(GC_CACHE_ADAPT)&&defined(GC_CACHE_SAMPLING)
888 // disable the timer interrupt
889 bamboo_mask_timer_intr();
891 // Zero out the remaining memory here because for the GC_CACHE_ADAPT version,
892 // we need to make sure during the gcinit phase the shared heap is not
893 // touched. Otherwise, there would be problem when adapt the cache strategy.
894 BAMBOO_CLOSE_CUR_MSP();
896 if(gc_num_flush_dtlb < GC_NUM_FLUSH_DTLB) {
901 #if defined(GC_CACHE_ADAPT)&&defined(GC_CACHE_SAMPLING)
902 // get the sampling data
903 bamboo_output_dtlb_sampling();
907 void postgcprocessing() {
908 #if defined(GC_CACHE_ADAPT)&&defined(GC_CACHE_SAMPLING)
909 // enable the timer interrupt
910 bamboo_tile_timer_set_next_event(GC_TILE_TIMER_EVENT_SETTING);
911 bamboo_unmask_timer_intr();
915 bool gc(struct garbagelist * stackptr) {
918 gcprocessing = false;
922 // core coordinator routine
923 if(0 == BAMBOO_NUM_OF_CORE) {
924 GC_PRINTF("Check if we can do gc or not\n");
925 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
926 if(!gc_checkAllCoreStatus()) {
927 // some of the cores are still executing the mutator and did not reach
928 // some gc safe point, therefore it is not ready to do gc
935 GC_PRINTF("start gc! \n");
938 } else if(BAMBOO_NUM_OF_CORE < NUMCORES4GC) {
940 gc_collect(stackptr);
943 gc_nocollect(stackptr);