1 // TODO: DO NOT support tag!!!
4 #include "multicoregarbage.h"
5 #include "multicoregcmark.h"
7 #include "multicoregccompact.h"
8 #include "multicoregcflush.h"
9 #include "multicoreruntime.h"
10 #include "multicoregcprofile.h"
14 extern unsigned int gcmem_mixed_threshold;
15 extern unsigned int gcmem_mixed_usedmem;
19 // dump whole mem in blocks
28 printf("(%x,%x) Dump shared mem: \n",udn_tile_coord_x(),udn_tile_coord_y());
29 // reserved blocks for sblocktbl
30 printf("(%x,%x) ++++ reserved sblocks ++++ \n", udn_tile_coord_x(),
32 for(i=BAMBOO_BASE_VA; (unsinged int)i<(unsigned int)gcbaseva; i+= 4*16) {
33 printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
34 udn_tile_coord_x(), udn_tile_coord_y(),
35 *((int *)(i)), *((int *)(i + 4)),
36 *((int *)(i + 4*2)), *((int *)(i + 4*3)),
37 *((int *)(i + 4*4)), *((int *)(i + 4*5)),
38 *((int *)(i + 4*6)), *((int *)(i + 4*7)),
39 *((int *)(i + 4*8)), *((int *)(i + 4*9)),
40 *((int *)(i + 4*10)), *((int *)(i + 4*11)),
41 *((int *)(i + 4*12)), *((int *)(i + 4*13)),
42 *((int *)(i + 4*14)), *((int *)(i + 4*15)));
44 sblock = gcreservedsb;
45 bool advanceblock = false;
47 for(i=gcbaseva; (unsigned int)i<(unsigned int)(gcbaseva+BAMBOO_SHARED_MEM_SIZE); i+=4*16) {
49 // computing sblock # and block #, core coordinate (x,y) also
50 if(j%((BAMBOO_SMEM_SIZE)/(4*16)) == 0) {
52 if(j < ((BAMBOO_LARGE_SMEM_BOUND)/(4*16))) {
53 if((j > 0) && (j%((BAMBOO_SMEM_SIZE_L)/(4*16)) == 0)) {
65 coren = gc_block2core[block%(NUMCORES4GC*2)];
67 // compute core coordinate
68 x = BAMBOO_COORDS_X(coren);
69 y = BAMBOO_COORDS_Y(coren);
70 printf("(%x,%x) ==== %d, %d : core (%d,%d), saddr %x====\n",
71 udn_tile_coord_x(), udn_tile_coord_y(),
72 block, sblock++, x, y,
73 (sblock-1)*(BAMBOO_SMEM_SIZE)+gcbaseva);
76 printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
77 udn_tile_coord_x(), udn_tile_coord_y(),
78 *((int *)(i)), *((int *)(i + 4)),
79 *((int *)(i + 4*2)), *((int *)(i + 4*3)),
80 *((int *)(i + 4*4)), *((int *)(i + 4*5)),
81 *((int *)(i + 4*6)), *((int *)(i + 4*7)),
82 *((int *)(i + 4*8)), *((int *)(i + 4*9)),
83 *((int *)(i + 4*10)), *((int *)(i + 4*11)),
84 *((int *)(i + 4*12)), *((int *)(i + 4*13)),
85 *((int *)(i + 4*14)), *((int *)(i + 4*15)));
87 printf("(%x,%x) \n", udn_tile_coord_x(), udn_tile_coord_y());
91 void initmulticoregcdata() {
92 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
93 // startup core to initialize corestatus[]
94 for(int i = 0; i < NUMCORESACTIVE; i++) {
96 gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
97 gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
99 for(int i = 0; i < NUMCORES4GC; i++) {
101 gcrequiredmems[i] = 0;
103 gcfilledblocks[i] = 0;
107 bamboo_smem_zero_top = NULL;
109 gcprocessing = false;
110 gcphase = FINISHPHASE;
113 gcself_numsendobjs = 0;
114 gcself_numreceiveobjs = 0;
115 gcmarkedptrbound = 0;
116 gcforwardobjtbl = allocateMGCHash_I(20, 3);
126 gcmem_mixed_threshold = (unsigned int)((BAMBOO_SHARED_MEM_SIZE
127 -bamboo_reserved_smem*BAMBOO_SMEM_SIZE)*0.8);
128 gcmem_mixed_usedmem = 0;
131 gc_profile_flag = false;
134 gc_num_flush_dtlb = 0;
136 gc_localheap_s = false;
137 #ifdef GC_CACHE_ADAPT
138 gccachestage = false;
141 INIT_MULTICORE_GCPROFILE_DATA();
144 void dismulticoregcdata() {
145 freeMGCHash(gcforwardobjtbl);
149 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
150 for(int i = 0; i < NUMCORES4GC; i++) {
152 gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
153 gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
155 gcrequiredmems[i] = 0;
156 gcfilledblocks[i] = 0;
159 for(int i = NUMCORES4GC; i < NUMCORESACTIVE; i++) {
161 gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
162 gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
167 gcnumsrobjs_index = 0;
169 gcself_numsendobjs = 0;
170 gcself_numreceiveobjs = 0;
171 gcmarkedptrbound = 0;
182 freeMGCHash(gcforwardobjtbl);
183 gcforwardobjtbl = allocateMGCHash(20, 3);
188 bool gc_checkAllCoreStatus() {
189 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
190 for(int i = 0; i < NUMCORESACTIVE; i++) {
191 if(gccorestatus[i] != 0) {
192 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
196 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
200 // NOTE: should be invoked with interrupts turned off
201 bool gc_checkAllCoreStatus_I() {
202 for(int i = 0; i < NUMCORESACTIVE; i++) {
203 if(gccorestatus[i] != 0) {
210 INLINE void checkMarkStatus() {
212 (waitconfirm && (numconfirm == 0))) {
213 unsigned int entry_index = 0;
216 entry_index = (gcnumsrobjs_index == 0) ? 1 : 0;
219 entry_index = gcnumsrobjs_index;
221 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
222 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
223 gcnumsendobjs[entry_index][BAMBOO_NUM_OF_CORE] = gcself_numsendobjs;
224 gcnumreceiveobjs[entry_index][BAMBOO_NUM_OF_CORE] = gcself_numreceiveobjs;
225 // check the status of all cores
226 if (gc_checkAllCoreStatus_I()) {
229 // the first time found all cores stall
230 // send out status confirm msg to all other cores
231 // reset the corestatus array too
233 numconfirm = NUMCORESACTIVE - 1;
234 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
235 GC_SEND_MSG_1_TO_CLIENT(GCMARKCONFIRM);
238 // check if the sum of send objs and receive obj are the same
239 // yes->check if the info is the latest; no->go on executing
240 unsigned int sumsendobj = 0;
241 for(int i = 0; i < NUMCORESACTIVE; i++) {
242 sumsendobj += gcnumsendobjs[gcnumsrobjs_index][i];
244 for(int i = 0; i < NUMCORESACTIVE; i++) {
245 sumsendobj -= gcnumreceiveobjs[gcnumsrobjs_index][i];
247 if(0 == sumsendobj) {
248 // Check if there are changes of the numsendobjs or numreceiveobjs on
250 bool ischanged = false;
251 for(int i = 0; i < NUMCORESACTIVE; i++) {
252 if((gcnumsendobjs[0][i] != gcnumsendobjs[1][i]) ||
253 (gcnumreceiveobjs[0][i] != gcnumreceiveobjs[1][i]) ) {
259 // all the core status info are the latest,stop mark phase
260 gcphase = COMPACTPHASE;
261 // restore the gcstatus for all cores
262 for(int i = 0; i < NUMCORESACTIVE; i++) {
266 // There were changes between phase 1 and phase 2, can not decide
267 // whether the mark phase has been finished
269 // As it fails in phase 2, flip the entries
270 gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
273 // There were changes between phase 1 and phase 2, can not decide
274 // whether the mark phase has been finished
276 // As it fails in phase 2, flip the entries
277 gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
279 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
282 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
287 // compute load balance for all cores
288 INLINE int loadbalance(unsigned int * heaptop) {
289 // compute load balance
291 // get the total loads
292 unsigned int tloads = gcloads[STARTUPCORE];
293 for(int i = 1; i < NUMCORES4GC; i++) {
294 tloads += gcloads[i];
296 *heaptop = gcbaseva + tloads;
299 BLOCKINDEX(*heaptop, &b);
300 // num of blocks per core
301 unsigned int numbpc = (unsigned int)b/(unsigned int)(NUMCORES4GC);
303 RESIDECORE(heaptop, &gctopcore);
307 // compute total mem size required and sort the lobjs in ascending order
308 INLINE unsigned int sortLObjs() {
309 unsigned int tmp_lobj = 0;
310 unsigned int tmp_len = 0;
311 unsigned int tmp_host = 0;
312 unsigned int sumsize = 0;
314 gclobjtail2 = gclobjtail;
315 gclobjtailindex2 = gclobjtailindex;
316 // TODO USE QUICK SORT INSTEAD?
317 while(gc_lobjmoreItems2_I()) {
319 tmp_lobj = gclobjtail2->lobjs[gclobjtailindex2-1];
320 tmp_host = gclobjtail2->hosts[gclobjtailindex2-1];
321 tmp_len = gclobjtail2->lengths[gclobjtailindex2 - 1];
323 GCPROFILE_RECORD_LOBJ();
324 unsigned int i = gclobjtailindex2-1;
325 struct lobjpointerblock * tmp_block = gclobjtail2;
326 // find the place to insert
329 if(tmp_block->prev == NULL) {
332 if(tmp_block->prev->lobjs[NUMLOBJPTRS-1] > tmp_lobj) {
333 tmp_block->lobjs[i] = tmp_block->prev->lobjs[NUMLOBJPTRS-1];
334 tmp_block->lengths[i] = tmp_block->prev->lengths[NUMLOBJPTRS-1];
335 tmp_block->hosts[i] = tmp_block->prev->hosts[NUMLOBJPTRS-1];
336 tmp_block = tmp_block->prev;
340 } // if(tmp_block->prev->lobjs[NUMLOBJPTRS-1] < tmp_lobj)
342 if(tmp_block->lobjs[i-1] > tmp_lobj) {
343 tmp_block->lobjs[i] = tmp_block->lobjs[i-1];
344 tmp_block->lengths[i] = tmp_block->lengths[i-1];
345 tmp_block->hosts[i] = tmp_block->hosts[i-1];
353 if(i != gclobjtailindex2 - 1) {
354 tmp_block->lobjs[i] = tmp_lobj;
355 tmp_block->lengths[i] = tmp_len;
356 tmp_block->hosts[i] = tmp_host;
362 INLINE bool cacheLObjs() {
363 // check the total mem size need for large objs
364 unsigned long long sumsize = 0;
365 unsigned int size = 0;
367 sumsize = sortLObjs();
369 GCPROFILE_RECORD_LOBJSPACE();
371 // check if there are enough space to cache these large objs
372 unsigned int dst = gcbaseva + (BAMBOO_SHARED_MEM_SIZE) -sumsize;
373 if((unsigned long long)gcheaptop > (unsigned long long)dst) {
374 // do not have enough room to cache large objs
378 gcheaptop = dst; // Note: record the start of cached lobjs with gcheaptop
379 // cache the largeObjs to the top of the shared heap
380 dst = gcbaseva + (BAMBOO_SHARED_MEM_SIZE);
381 while(gc_lobjmoreItems3_I()) {
383 size = gclobjtail2->lengths[gclobjtailindex2];
384 // set the mark field to , indicating that this obj has been moved
385 // and need to be flushed
386 ((struct ___Object___ *)(gclobjtail2->lobjs[gclobjtailindex2]))->marked =
389 if((unsigned int)dst <
390 (unsigned int)(gclobjtail2->lobjs[gclobjtailindex2]+size)) {
391 memmove(dst, gclobjtail2->lobjs[gclobjtailindex2], size);
393 memcpy(dst, gclobjtail2->lobjs[gclobjtailindex2], size);
399 // update the bmmboo_smemtbl to record current shared mem usage
400 void updateSmemTbl(unsigned int coren, unsigned int localtop) {
401 unsigned int ltopcore = 0;
402 unsigned int bound = BAMBOO_SMEM_SIZE_L;
403 BLOCKINDEX(localtop, <opcore);
404 if((unsigned int)localtop>=(unsigned int)(gcbaseva+BAMBOO_LARGE_SMEM_BOUND)){
405 bound = BAMBOO_SMEM_SIZE;
407 unsigned int load = (unsigned int)(localtop-gcbaseva)%(unsigned int)bound;
408 unsigned int toset = 0;
409 for(int j=0; 1; j++) {
410 for(int i=0; i<2; i++) {
411 toset = gc_core2block[2*coren+i]+(unsigned int)(NUMCORES4GC*2)*j;
412 if(toset < ltopcore) {
413 bamboo_smemtbl[toset]=(toset<NUMCORES4GC) ? BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
415 gcmem_mixed_usedmem += bamboo_smemtbl[toset];
417 } else if(toset == ltopcore) {
418 bamboo_smemtbl[toset] = load;
420 gcmem_mixed_usedmem += bamboo_smemtbl[toset];
430 INLINE unsigned int checkCurrHeapTop() {
431 // update the smemtbl
432 BAMBOO_MEMSET_WH(bamboo_smemtbl, 0, sizeof(int)*gcnumblock);
433 // flush all gcloads to indicate the real heap top on one core
434 // previous it represents the next available ptr on a core
435 if(((unsigned int)gcloads[0] > (unsigned int)(gcbaseva+BAMBOO_SMEM_SIZE_L))
436 && (((unsigned int)gcloads[0]%(BAMBOO_SMEM_SIZE)) == 0)) {
437 // edge of a block, check if this is exactly the heaptop
438 BASEPTR(0, gcfilledblocks[0]-1, &(gcloads[0]));
439 gcloads[0]+=(gcfilledblocks[0]>1?(BAMBOO_SMEM_SIZE):(BAMBOO_SMEM_SIZE_L));
441 updateSmemTbl(0, gcloads[0]);
442 for(int i = 1; i < NUMCORES4GC; i++) {
443 unsigned int tmptop = 0;
444 if((gcfilledblocks[i] > 0)
445 && (((unsigned int)gcloads[i] % (BAMBOO_SMEM_SIZE)) == 0)) {
446 // edge of a block, check if this is exactly the heaptop
447 BASEPTR(i, gcfilledblocks[i]-1, &gcloads[i]);
449 (gcfilledblocks[i]>1?(BAMBOO_SMEM_SIZE):(BAMBOO_SMEM_SIZE_L));
452 updateSmemTbl(i, gcloads[i]);
455 // find current heap top
457 // a bug here: when using local allocation, directly move large objects
458 // to the highest free chunk might not be memory efficient
459 unsigned int tmpheaptop = 0;
460 for(int i = gcnumblock-1; i >= 0; i--) {
461 if(bamboo_smemtbl[i] > 0) {
462 return gcbaseva+bamboo_smemtbl[i]+((i<NUMCORES4GC) ?
463 (BAMBOO_SMEM_SIZE_L*i) :
464 (BAMBOO_SMEM_SIZE*(i-NUMCORES4GC)+BAMBOO_LARGE_SMEM_BOUND));
470 INLINE void moveLObjs() {
472 // update the gcmem_mixed_usedmem
473 gcmem_mixed_usedmem = 0;
475 unsigned int size = 0;
476 unsigned int bound = 0;
477 unsigned int tmpheaptop = checkCurrHeapTop();
479 // move large objs from gcheaptop to tmpheaptop
480 // write the header first
481 unsigned int tomove = gcbaseva+(BAMBOO_SHARED_MEM_SIZE)-gcheaptop;
483 gcmem_mixed_usedmem += tomove;
485 // flush the sbstartbl
486 BAMBOO_MEMSET_WH(&(gcsbstarttbl[gcreservedsb]), '\0',
487 (BAMBOO_SHARED_MEM_SIZE/BAMBOO_SMEM_SIZE-(unsigned int)gcreservedsb)
488 *sizeof(unsigned int));
490 gcheaptop = tmpheaptop;
492 // check how many blocks it acrosses
493 unsigned int remain = tmpheaptop-gcbaseva;
494 //number of the sblock
495 unsigned int sb = remain/BAMBOO_SMEM_SIZE+(unsigned int)gcreservedsb;
496 unsigned int b = 0; // number of the block
497 BLOCKINDEX(tmpheaptop, &b);
498 // check the remaining space in this block
499 bound = (BAMBOO_SMEM_SIZE);
500 if(remain < (BAMBOO_LARGE_SMEM_BOUND)) {
501 bound = (BAMBOO_SMEM_SIZE_L);
503 remain = bound - remain%bound;
506 unsigned int isize = 0;
507 unsigned int host = 0;
508 unsigned int ptr = 0;
509 unsigned int base = tmpheaptop;
510 unsigned int cpysize = 0;
511 remain -= BAMBOO_CACHE_LINE_SIZE;
512 tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
513 gc_lobjqueueinit4_I();
514 while(gc_lobjmoreItems4_I()) {
515 ptr = (unsigned int)(gc_lobjdequeue4_I(&size, &host));
516 ALIGNSIZE(size, &isize);
517 if(remain >= isize) {
519 // move the large obj
520 if((unsigned int)gcheaptop < (unsigned int)(tmpheaptop+size)) {
521 memmove(tmpheaptop, gcheaptop, size);
523 memcpy(tmpheaptop, gcheaptop, size);
525 // fill the remaining space with -2 padding
526 BAMBOO_MEMSET_WH(tmpheaptop+size, -2, isize-size);
530 // cache the mapping info
531 gcmappingtbl[OBJMAPPINGINDEX((unsigned int)ptr)]=(unsigned int)tmpheaptop;
534 // update bamboo_smemtbl
535 bamboo_smemtbl[b] += isize;
537 // this object acrosses blocks
539 CLOSEBLOCK(base, cpysize+BAMBOO_CACHE_LINE_SIZE);
540 bamboo_smemtbl[b] += BAMBOO_CACHE_LINE_SIZE;
544 remain = ((tmpheaptop-gcbaseva)<(BAMBOO_LARGE_SMEM_BOUND)) ?
545 BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
547 remain -= BAMBOO_CACHE_LINE_SIZE;
548 tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
549 BLOCKINDEX(tmpheaptop, &b);
550 sb = (unsigned int)(tmpheaptop-gcbaseva)/(BAMBOO_SMEM_SIZE)+gcreservedsb;
553 // move the large obj
554 if((unsigned int)gcheaptop < (unsigned int)(tmpheaptop+size)) {
555 memmove(tmpheaptop, gcheaptop, size);
557 memcpy(tmpheaptop, gcheaptop, size);
559 // fill the remaining space with -2 padding
560 BAMBOO_MEMSET_WH(tmpheaptop+size, -2, isize-size);
562 // cache the mapping info
563 gcmappingtbl[OBJMAPPINGINDEX((unsigned int)ptr)]=(unsigned int)tmpheaptop;
566 // set the gcsbstarttbl and bamboo_smemtbl
567 unsigned int tmpsbs=1+(unsigned int)(isize-remain-1)/BAMBOO_SMEM_SIZE;
568 for(int k = 1; k < tmpsbs; k++) {
569 gcsbstarttbl[sb+k] = -1;
572 bound = (b<NUMCORES4GC) ? BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
573 BLOCKINDEX(tmpheaptop-1, &tmpsbs);
574 for(; b < tmpsbs; b++) {
575 bamboo_smemtbl[b] = bound;
576 if(b==NUMCORES4GC-1) {
577 bound = BAMBOO_SMEM_SIZE;
580 if(((unsigned int)(isize-remain)%(BAMBOO_SMEM_SIZE)) == 0) {
581 gcsbstarttbl[sb] = -1;
582 remain = ((tmpheaptop-gcbaseva)<(BAMBOO_LARGE_SMEM_BOUND)) ?
583 BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
584 bamboo_smemtbl[b] = bound;
586 gcsbstarttbl[sb] = (int)tmpheaptop;
587 remain = tmpheaptop-gcbaseva;
588 bamboo_smemtbl[b] = remain%bound;
589 remain = bound - bamboo_smemtbl[b];
592 CLOSEBLOCK(base, isize+BAMBOO_CACHE_LINE_SIZE);
595 if(remain == BAMBOO_CACHE_LINE_SIZE) {
596 // fill with 0 in case
597 BAMBOO_MEMSET_WH(tmpheaptop, '\0', remain);
599 remain -= BAMBOO_CACHE_LINE_SIZE;
600 tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
605 CLOSEBLOCK(base, cpysize+BAMBOO_CACHE_LINE_SIZE);
606 bamboo_smemtbl[b] += BAMBOO_CACHE_LINE_SIZE;
608 tmpheaptop -= BAMBOO_CACHE_LINE_SIZE;
610 gcheaptop = tmpheaptop;
613 bamboo_free_block = 0;
614 unsigned int tbound = 0;
616 tbound=(bamboo_free_block<NUMCORES4GC)?BAMBOO_SMEM_SIZE_L:BAMBOO_SMEM_SIZE;
617 if(bamboo_smemtbl[bamboo_free_block] == tbound) {
620 // the first non-full partition
625 GCPROFILE_RECORD_SPACE();
628 void gc_collect(struct garbagelist * stackptr) {
630 // inform the master that this core is at a gc safe point and is ready to
632 send_msg_4(STARTUPCORE,GCFINISHPRE,BAMBOO_NUM_OF_CORE,self_numsendobjs,self_numreceiveobjs);
634 // core collector routine
635 //wait for init phase
636 WAITFORGCPHASE(INITPHASE);
638 GC_PRINTF("Do initGC\n");
641 //send init finish msg to core coordinator
642 send_msg_2(STARTUPCORE,GCFINISHINIT,BAMBOO_NUM_OF_CORE);
644 //wait for mark phase
645 WAITFORGCPHASE(MARKPHASE);
647 GC_PRINTF("Start mark phase\n");
648 mark(true, stackptr);
649 GC_PRINTF("Finish mark phase, start compact phase\n");
651 GC_PRINTF("Finish compact phase\n");
653 WAITFORGCPHASE(FLUSHPHASE);
655 GC_PRINTF("Start flush phase\n");
656 GCPROFILE_INFO_2_MASTER();
658 GC_PRINTF("Finish flush phase\n");
660 CACHEADAPT_PHASE_CLIENT();
662 // invalidate all shared mem pointers
663 bamboo_cur_msp = NULL;
664 bamboo_smem_size = 0;
665 bamboo_smem_zero_top = NULL;
668 WAITFORGCPHASE(FINISHPHASE);
670 GC_PRINTF("Finish gc! \n");
673 void gc_nocollect(struct garbagelist * stackptr) {
675 // inform the master that this core is at a gc safe point and is ready to
677 send_msg_4(STARTUPCORE,GCFINISHPRE,BAMBOO_NUM_OF_CORE,self_numsendobjs,self_numreceiveobjs);
679 WAITFORGCPHASE(INITPHASE);
681 GC_PRINTF("Do initGC\n");
684 //send init finish msg to core coordinator
685 send_msg_2(STARTUPCORE,GCFINISHINIT,BAMBOO_NUM_OF_CORE);
687 WAITFORGCPHASE(MARKPHASE);
689 GC_PRINTF("Start mark phase\n");
690 mark(true, stackptr);
691 GC_PRINTF("Finish mark phase, wait for flush\n");
693 // non-gc core collector routine
694 WAITFORGCPHASE(FLUSHPHASE);
696 GC_PRINTF("Start flush phase\n");
697 GCPROFILE_INFO_2_MASTER();
699 GC_PRINTF("Finish flush phase\n");
701 CACHEADAPT_PHASE_CLIENT();
703 // invalidate all shared mem pointers
704 bamboo_cur_msp = NULL;
705 bamboo_smem_size = 0;
706 bamboo_smem_zero_top = NULL;
709 WAITFORGCPHASE(FINISHPHASE);
711 GC_PRINTF("Finish gc! \n");
714 void master_mark(struct garbagelist *stackptr) {
717 GC_PRINTF("Start mark phase \n");
718 GC_SEND_MSG_1_TO_CLIENT(GCSTART);
722 while(MARKPHASE == gcphase) {
723 mark(isfirst, stackptr);
730 void master_getlargeobjs() {
731 // send msgs to all cores requiring large objs info
732 // Note: only need to ask gc cores, non-gc cores do not host any objs
733 numconfirm = NUMCORES4GC - 1;
734 for(int i = 1; i < NUMCORES4GC; i++) {
735 send_msg_1(i,GCLOBJREQUEST);
737 gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
738 //spin until we have all responses
739 while(numconfirm!=0) ;
742 if(gcheaptop < gcmarkedptrbound) {
743 gcheaptop = gcmarkedptrbound;
746 GC_PRINTF("prepare to cache large objs \n");
748 // cache all large objs
749 BAMBOO_ASSERTMSG(cacheLObjs(), "Not enough space to cache large objects\n");
752 void master_compact() {
753 // predict number of blocks to fill for each core
754 unsigned int tmpheaptop = 0;
755 int numpbc = loadbalance(&tmpheaptop);
757 numpbc = (BAMBOO_SHARED_MEM_SIZE)/(BAMBOO_SMEM_SIZE);
758 GC_PRINTF("mark phase finished \n");
760 tmpheaptop = gcbaseva + (BAMBOO_SHARED_MEM_SIZE);
761 for(int i = 0; i < NUMCORES4GC; i++) {
762 unsigned int tmpcoreptr = 0;
763 BASEPTR(i, numpbc, &tmpcoreptr);
764 // init some data strutures for compact phase
766 gcfilledblocks[i] = 0;
767 gcrequiredmems[i] = 0;
769 //send start compact messages to all cores
770 //TODO bug here, do not know if the direction is positive or negtive?
771 if (tmpcoreptr < tmpheaptop) {
772 gcstopblock[i] = numpbc + 1;
773 if(i != STARTUPCORE) {
774 send_msg_2(i, GCSTARTCOMPACT, numpbc+1);
776 gcblock2fill = numpbc+1;
779 gcstopblock[i] = numpbc;
780 if(i != STARTUPCORE) {
781 send_msg_2(i, GCSTARTCOMPACT, numpbc);
783 gcblock2fill = numpbc;
790 struct moveHelper * orig = (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
791 struct moveHelper * to = (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
792 compact_master(orig, to);
794 GC_PRINTF("prepare to move large objs \n");
797 GC_PRINTF("compact phase finished \n");
802 void master_updaterefs(struct garbagelist * stackptr) {
803 gcphase = FLUSHPHASE;
804 GC_SEND_MSG_1_TO_CLIENT(GCSTARTFLUSH);
806 GC_PRINTF("Start flush phase \n");
809 // now the master core need to decide the new cache strategy
811 GC_CHECK_ALL_CORE_STATUS(FLUSHPHASE==gcphase);
812 GC_PRINTF("Finish flush phase \n");
815 void master_finish() {
816 gcphase = FINISHPHASE;
818 // invalidate all shared mem pointers
819 // put it here as it takes time to inform all the other cores to
820 // finish gc and it might cause problem when some core resumes
821 // mutator earlier than the other cores
822 bamboo_cur_msp = NULL;
823 bamboo_smem_size = 0;
824 bamboo_smem_zero_top = NULL;
828 GC_SEND_MSG_1_TO_CLIENT(GCFINISH);
830 gcprocessing = false;
832 // inform other cores to stop and wait for gc
834 for(int i = 0; i < NUMCORESACTIVE; i++) {
835 // reuse the gcnumsendobjs & gcnumreceiveobjs
836 gcnumsendobjs[0][i] = 0;
837 gcnumreceiveobjs[0][i] = 0;
839 GC_SEND_MSG_1_TO_CLIENT(GCSTARTPRE);
843 void gc_master(struct garbagelist * stackptr) {
844 tprintf("start GC !!!!!!!!!!!!! \n");
851 GC_SEND_MSG_1_TO_CLIENT(GCSTARTINIT);
853 GC_PRINTF("Check core status \n");
854 GC_CHECK_ALL_CORE_STATUS(true);
856 CACHEADAPT_OUTPUT_CACHE_SAMPLING();
859 master_mark(stackptr);
861 // get large objects from all cores
862 master_getlargeobjs();
867 // update the references
868 master_updaterefs(stackptr);
870 // do cache adaptation
871 CACHEADAPT_PHASE_MASTER();
873 // do finish up stuff
876 GC_PRINTF("gc finished \n");
877 tprintf("finish GC ! %d \n", gcflag);
882 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
883 gcnumsendobjs[0][BAMBOO_NUM_OF_CORE] = self_numsendobjs;
884 gcnumreceiveobjs[0][BAMBOO_NUM_OF_CORE] = self_numreceiveobjs;
886 for(int i = 0; i < NUMCORESACTIVE; i++) {
887 sumsendobj += gcnumsendobjs[0][i];
889 for(int i = 0; i < NUMCORESACTIVE; i++) {
890 sumsendobj -= gcnumreceiveobjs[0][i];
892 if(0 != sumsendobj) {
893 // there were still some msgs on the fly, wait until there
894 // are some update pregc information coming and check it again
896 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
900 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
906 void pregcprocessing() {
907 #if defined(GC_CACHE_ADAPT)&&defined(GC_CACHE_SAMPLING)
908 // disable the timer interrupt
909 bamboo_mask_timer_intr();
911 // Zero out the remaining memory here because for the GC_CACHE_ADAPT version,
912 // we need to make sure during the gcinit phase the shared heap is not
913 // touched. Otherwise, there would be problem when adapt the cache strategy.
914 BAMBOO_CLOSE_CUR_MSP();
916 if(gc_num_flush_dtlb < GC_NUM_FLUSH_DTLB) {
921 #if defined(GC_CACHE_ADAPT)&&defined(GC_CACHE_SAMPLING)
922 // get the sampling data
923 bamboo_output_dtlb_sampling();
927 void postgcprocessing() {
928 #if defined(GC_CACHE_ADAPT)&&defined(GC_CACHE_SAMPLING)
929 // enable the timer interrupt
930 bamboo_tile_timer_set_next_event(GC_TILE_TIMER_EVENT_SETTING);
931 bamboo_unmask_timer_intr();
935 bool gc(struct garbagelist * stackptr) {
938 gcprocessing = false;
942 // core coordinator routine
943 if(0 == BAMBOO_NUM_OF_CORE) {
944 GC_PRINTF("Check if we can do gc or not\n");
945 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
946 if(!gc_checkAllCoreStatus()) {
947 // some of the cores are still executing the mutator and did not reach
948 // some gc safe point, therefore it is not ready to do gc
955 GC_PRINTF("start gc! \n");
958 } else if(BAMBOO_NUM_OF_CORE < NUMCORES4GC) {
960 gc_collect(stackptr);
963 gc_nocollect(stackptr);