1 // TODO: DO NOT support tag!!!
4 #include "multicoregarbage.h"
5 #include "multicoregcmark.h"
7 #include "multicoregccompact.h"
8 #include "multicoregcflush.h"
9 #include "multicoreruntime.h"
10 #include "multicoregcprofile.h"
13 #include "runtime_arch.h"
19 extern unsigned int gcmem_mixed_threshold;
20 extern unsigned int gcmem_mixed_usedmem;
24 // dump whole mem in blocks
33 printf("(%x,%x) Dump shared mem: \n",udn_tile_coord_x(),udn_tile_coord_y());
34 // reserved blocks for sblocktbl
35 printf("(%x,%x) ++++ reserved sblocks ++++ \n", udn_tile_coord_x(),
37 for(i=BAMBOO_BASE_VA; (unsinged int)i<(unsigned int)gcbaseva; i+= 4*16) {
38 printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
39 udn_tile_coord_x(), udn_tile_coord_y(),
40 *((int *)(i)), *((int *)(i + 4)),
41 *((int *)(i + 4*2)), *((int *)(i + 4*3)),
42 *((int *)(i + 4*4)), *((int *)(i + 4*5)),
43 *((int *)(i + 4*6)), *((int *)(i + 4*7)),
44 *((int *)(i + 4*8)), *((int *)(i + 4*9)),
45 *((int *)(i + 4*10)), *((int *)(i + 4*11)),
46 *((int *)(i + 4*12)), *((int *)(i + 4*13)),
47 *((int *)(i + 4*14)), *((int *)(i + 4*15)));
49 sblock = gcreservedsb;
50 bool advanceblock = false;
52 for(i=gcbaseva; (unsigned int)i<(unsigned int)(gcbaseva+BAMBOO_SHARED_MEM_SIZE); i+=4*16) {
54 // computing sblock # and block #, core coordinate (x,y) also
55 if(j%((BAMBOO_SMEM_SIZE)/(4*16)) == 0) {
57 if(j < ((BAMBOO_LARGE_SMEM_BOUND)/(4*16))) {
58 if((j > 0) && (j%((BAMBOO_SMEM_SIZE_L)/(4*16)) == 0)) {
70 coren = gc_block2core[block%(NUMCORES4GC*2)];
72 // compute core coordinate
73 x = BAMBOO_COORDS_X(coren);
74 y = BAMBOO_COORDS_Y(coren);
75 printf("(%x,%x) ==== %d, %d : core (%d,%d), saddr %x====\n",
76 udn_tile_coord_x(), udn_tile_coord_y(),
77 block, sblock++, x, y,
78 (sblock-1)*(BAMBOO_SMEM_SIZE)+gcbaseva);
81 printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
82 udn_tile_coord_x(), udn_tile_coord_y(),
83 *((int *)(i)), *((int *)(i + 4)),
84 *((int *)(i + 4*2)), *((int *)(i + 4*3)),
85 *((int *)(i + 4*4)), *((int *)(i + 4*5)),
86 *((int *)(i + 4*6)), *((int *)(i + 4*7)),
87 *((int *)(i + 4*8)), *((int *)(i + 4*9)),
88 *((int *)(i + 4*10)), *((int *)(i + 4*11)),
89 *((int *)(i + 4*12)), *((int *)(i + 4*13)),
90 *((int *)(i + 4*14)), *((int *)(i + 4*15)));
92 printf("(%x,%x) \n", udn_tile_coord_x(), udn_tile_coord_y());
96 void initmulticoregcdata() {
97 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
98 // startup core to initialize corestatus[]
99 for(int i = 0; i < NUMCORESACTIVE; i++) {
101 gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
102 gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
104 for(int i = 0; i < NUMCORES4GC; i++) {
106 gcrequiredmems[i] = 0;
108 gcfilledblocks[i] = 0;
112 bamboo_smem_zero_top = NULL;
114 gcprocessing = false;
115 gcphase = FINISHPHASE;
118 gcself_numsendobjs = 0;
119 gcself_numreceiveobjs = 0;
120 gcmarkedptrbound = 0;
121 gcforwardobjtbl = allocateMGCHash_I(20, 3);
131 gcmem_mixed_threshold = (unsigned int)((BAMBOO_SHARED_MEM_SIZE
132 -bamboo_reserved_smem*BAMBOO_SMEM_SIZE)*0.8);
133 gcmem_mixed_usedmem = 0;
136 gc_profile_flag = false;
139 gc_num_flush_dtlb = 0;
141 gc_localheap_s = false;
142 #ifdef GC_CACHE_ADAPT
143 gccachestage = false;
146 INIT_MULTICORE_GCPROFILE_DATA();
149 void dismulticoregcdata() {
150 freeMGCHash(gcforwardobjtbl);
154 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
155 for(int i = 0; i < NUMCORES4GC; i++) {
157 gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
158 gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
160 gcrequiredmems[i] = 0;
161 gcfilledblocks[i] = 0;
164 for(int i = NUMCORES4GC; i < NUMCORESACTIVE; i++) {
166 gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
167 gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
172 gcnumsrobjs_index = 0;
174 gcself_numsendobjs = 0;
175 gcself_numreceiveobjs = 0;
176 gcmarkedptrbound = 0;
187 freeMGCHash(gcforwardobjtbl);
188 gcforwardobjtbl = allocateMGCHash(20, 3);
193 bool gc_checkAllCoreStatus() {
194 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
195 for(int i = 0; i < NUMCORESACTIVE; i++) {
196 if(gccorestatus[i] != 0) {
197 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
201 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
205 // NOTE: should be invoked with interrupts turned off
206 bool gc_checkAllCoreStatus_I() {
207 for(int i = 0; i < NUMCORESACTIVE; i++) {
208 if(gccorestatus[i] != 0) {
215 INLINE void checkMarkStatus() {
217 (waitconfirm && (numconfirm == 0))) {
218 unsigned int entry_index = 0;
221 entry_index = (gcnumsrobjs_index == 0) ? 1 : 0;
224 entry_index = gcnumsrobjs_index;
226 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
227 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
228 gcnumsendobjs[entry_index][BAMBOO_NUM_OF_CORE] = gcself_numsendobjs;
229 gcnumreceiveobjs[entry_index][BAMBOO_NUM_OF_CORE] = gcself_numreceiveobjs;
230 // check the status of all cores
231 if (gc_checkAllCoreStatus_I()) {
234 // the first time found all cores stall
235 // send out status confirm msg to all other cores
236 // reset the corestatus array too
238 numconfirm = NUMCORESACTIVE - 1;
239 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
240 GC_SEND_MSG_1_TO_CLIENT(GCMARKCONFIRM);
243 // check if the sum of send objs and receive obj are the same
244 // yes->check if the info is the latest; no->go on executing
245 unsigned int sumsendobj = 0;
246 for(int i = 0; i < NUMCORESACTIVE; i++) {
247 sumsendobj += gcnumsendobjs[gcnumsrobjs_index][i];
249 for(int i = 0; i < NUMCORESACTIVE; i++) {
250 sumsendobj -= gcnumreceiveobjs[gcnumsrobjs_index][i];
252 if(0 == sumsendobj) {
253 // Check if there are changes of the numsendobjs or numreceiveobjs on
255 bool ischanged = false;
256 for(int i = 0; i < NUMCORESACTIVE; i++) {
257 if((gcnumsendobjs[0][i] != gcnumsendobjs[1][i]) ||
258 (gcnumreceiveobjs[0][i] != gcnumreceiveobjs[1][i]) ) {
264 // all the core status info are the latest,stop mark phase
265 gcphase = COMPACTPHASE;
266 // restore the gcstatus for all cores
267 for(int i = 0; i < NUMCORESACTIVE; i++) {
271 // There were changes between phase 1 and phase 2, can not decide
272 // whether the mark phase has been finished
274 // As it fails in phase 2, flip the entries
275 gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
278 // There were changes between phase 1 and phase 2, can not decide
279 // whether the mark phase has been finished
281 // As it fails in phase 2, flip the entries
282 gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
284 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
287 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
292 // compute load balance for all cores
293 INLINE int loadbalance(unsigned int * heaptop) {
294 // compute load balance
296 // get the total loads
297 unsigned int tloads = gcloads[STARTUPCORE];
298 for(int i = 1; i < NUMCORES4GC; i++) {
299 tloads += gcloads[i];
301 *heaptop = gcbaseva + tloads;
304 BLOCKINDEX(*heaptop, &b);
305 // num of blocks per core
306 unsigned int numbpc = (unsigned int)b/(unsigned int)(NUMCORES4GC);
308 RESIDECORE(heaptop, &gctopcore);
312 // compute total mem size required and sort the lobjs in ascending order
313 INLINE unsigned int sortLObjs() {
314 unsigned int tmp_lobj = 0;
315 unsigned int tmp_len = 0;
316 unsigned int tmp_host = 0;
317 unsigned int sumsize = 0;
319 gclobjtail2 = gclobjtail;
320 gclobjtailindex2 = gclobjtailindex;
321 // TODO USE QUICK SORT INSTEAD?
322 while(gc_lobjmoreItems2_I()) {
324 tmp_lobj = gclobjtail2->lobjs[gclobjtailindex2-1];
325 tmp_host = gclobjtail2->hosts[gclobjtailindex2-1];
326 tmp_len = gclobjtail2->lengths[gclobjtailindex2 - 1];
328 GCPROFILE_RECORD_LOBJ();
329 unsigned int i = gclobjtailindex2-1;
330 struct lobjpointerblock * tmp_block = gclobjtail2;
331 // find the place to insert
334 if(tmp_block->prev == NULL) {
337 if(tmp_block->prev->lobjs[NUMLOBJPTRS-1] > tmp_lobj) {
338 tmp_block->lobjs[i] = tmp_block->prev->lobjs[NUMLOBJPTRS-1];
339 tmp_block->lengths[i] = tmp_block->prev->lengths[NUMLOBJPTRS-1];
340 tmp_block->hosts[i] = tmp_block->prev->hosts[NUMLOBJPTRS-1];
341 tmp_block = tmp_block->prev;
345 } // if(tmp_block->prev->lobjs[NUMLOBJPTRS-1] < tmp_lobj)
347 if(tmp_block->lobjs[i-1] > tmp_lobj) {
348 tmp_block->lobjs[i] = tmp_block->lobjs[i-1];
349 tmp_block->lengths[i] = tmp_block->lengths[i-1];
350 tmp_block->hosts[i] = tmp_block->hosts[i-1];
358 if(i != gclobjtailindex2 - 1) {
359 tmp_block->lobjs[i] = tmp_lobj;
360 tmp_block->lengths[i] = tmp_len;
361 tmp_block->hosts[i] = tmp_host;
367 INLINE bool cacheLObjs() {
368 // check the total mem size need for large objs
369 unsigned long long sumsize = 0;
370 unsigned int size = 0;
372 sumsize = sortLObjs();
374 GCPROFILE_RECORD_LOBJSPACE();
376 // check if there are enough space to cache these large objs
377 unsigned int dst = gcbaseva + (BAMBOO_SHARED_MEM_SIZE) -sumsize;
378 if((unsigned long long)gcheaptop > (unsigned long long)dst) {
379 // do not have enough room to cache large objs
383 gcheaptop = dst; // Note: record the start of cached lobjs with gcheaptop
384 // cache the largeObjs to the top of the shared heap
385 dst = gcbaseva + (BAMBOO_SHARED_MEM_SIZE);
386 while(gc_lobjmoreItems3_I()) {
388 size = gclobjtail2->lengths[gclobjtailindex2];
389 // set the mark field to , indicating that this obj has been moved
390 // and need to be flushed
391 ((struct ___Object___ *)(gclobjtail2->lobjs[gclobjtailindex2]))->marked =
394 if((unsigned int)dst <
395 (unsigned int)(gclobjtail2->lobjs[gclobjtailindex2]+size)) {
396 memmove(dst, gclobjtail2->lobjs[gclobjtailindex2], size);
398 memcpy(dst, gclobjtail2->lobjs[gclobjtailindex2], size);
404 // update the bmmboo_smemtbl to record current shared mem usage
405 void updateSmemTbl(unsigned int coren, unsigned int localtop) {
406 unsigned int ltopcore = 0;
407 unsigned int bound = BAMBOO_SMEM_SIZE_L;
408 BLOCKINDEX(localtop, <opcore);
409 if((unsigned int)localtop>=(unsigned int)(gcbaseva+BAMBOO_LARGE_SMEM_BOUND)){
410 bound = BAMBOO_SMEM_SIZE;
412 unsigned int load = (unsigned int)(localtop-gcbaseva)%(unsigned int)bound;
413 unsigned int toset = 0;
414 for(int j=0; 1; j++) {
415 for(int i=0; i<2; i++) {
416 toset = gc_core2block[2*coren+i]+(unsigned int)(NUMCORES4GC*2)*j;
417 if(toset < ltopcore) {
418 bamboo_smemtbl[toset]=(toset<NUMCORES4GC) ? BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
420 gcmem_mixed_usedmem += bamboo_smemtbl[toset];
422 } else if(toset == ltopcore) {
423 bamboo_smemtbl[toset] = load;
425 gcmem_mixed_usedmem += bamboo_smemtbl[toset];
435 INLINE unsigned int checkCurrHeapTop() {
436 // update the smemtbl
437 BAMBOO_MEMSET_WH(bamboo_smemtbl, 0, sizeof(int)*gcnumblock);
438 // flush all gcloads to indicate the real heap top on one core
439 // previous it represents the next available ptr on a core
440 if(((unsigned int)gcloads[0] > (unsigned int)(gcbaseva+BAMBOO_SMEM_SIZE_L))
441 && (((unsigned int)gcloads[0]%(BAMBOO_SMEM_SIZE)) == 0)) {
442 // edge of a block, check if this is exactly the heaptop
443 BASEPTR(0, gcfilledblocks[0]-1, &(gcloads[0]));
444 gcloads[0]+=(gcfilledblocks[0]>1?(BAMBOO_SMEM_SIZE):(BAMBOO_SMEM_SIZE_L));
446 updateSmemTbl(0, gcloads[0]);
447 for(int i = 1; i < NUMCORES4GC; i++) {
448 unsigned int tmptop = 0;
449 if((gcfilledblocks[i] > 0)
450 && (((unsigned int)gcloads[i] % (BAMBOO_SMEM_SIZE)) == 0)) {
451 // edge of a block, check if this is exactly the heaptop
452 BASEPTR(i, gcfilledblocks[i]-1, &gcloads[i]);
454 (gcfilledblocks[i]>1?(BAMBOO_SMEM_SIZE):(BAMBOO_SMEM_SIZE_L));
457 updateSmemTbl(i, gcloads[i]);
460 // find current heap top
462 // a bug here: when using local allocation, directly move large objects
463 // to the highest free chunk might not be memory efficient
464 unsigned int tmpheaptop = 0;
465 for(int i = gcnumblock-1; i >= 0; i--) {
466 if(bamboo_smemtbl[i] > 0) {
467 return gcbaseva+bamboo_smemtbl[i]+((i<NUMCORES4GC) ?
468 (BAMBOO_SMEM_SIZE_L*i) :
469 (BAMBOO_SMEM_SIZE*(i-NUMCORES4GC)+BAMBOO_LARGE_SMEM_BOUND));
475 INLINE void moveLObjs() {
477 // update the gcmem_mixed_usedmem
478 gcmem_mixed_usedmem = 0;
480 unsigned int size = 0;
481 unsigned int bound = 0;
482 unsigned int tmpheaptop = checkCurrHeapTop();
484 // move large objs from gcheaptop to tmpheaptop
485 // write the header first
486 unsigned int tomove = gcbaseva+(BAMBOO_SHARED_MEM_SIZE)-gcheaptop;
488 gcmem_mixed_usedmem += tomove;
490 // flush the sbstartbl
491 BAMBOO_MEMSET_WH(&(gcsbstarttbl[gcreservedsb]), '\0',
492 (BAMBOO_SHARED_MEM_SIZE/BAMBOO_SMEM_SIZE-(unsigned int)gcreservedsb)
493 *sizeof(unsigned int));
495 gcheaptop = tmpheaptop;
497 // check how many blocks it acrosses
498 unsigned int remain = tmpheaptop-gcbaseva;
499 //number of the sblock
500 unsigned int sb = remain/BAMBOO_SMEM_SIZE+(unsigned int)gcreservedsb;
501 unsigned int b = 0; // number of the block
502 BLOCKINDEX(tmpheaptop, &b);
503 // check the remaining space in this block
504 bound = (BAMBOO_SMEM_SIZE);
505 if(remain < (BAMBOO_LARGE_SMEM_BOUND)) {
506 bound = (BAMBOO_SMEM_SIZE_L);
508 remain = bound - remain%bound;
511 unsigned int isize = 0;
512 unsigned int host = 0;
513 unsigned int ptr = 0;
514 unsigned int base = tmpheaptop;
515 unsigned int cpysize = 0;
516 remain -= BAMBOO_CACHE_LINE_SIZE;
517 tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
518 gc_lobjqueueinit4_I();
519 while(gc_lobjmoreItems4_I()) {
520 ptr = (unsigned int)(gc_lobjdequeue4_I(&size, &host));
521 ALIGNSIZE(size, &isize);
522 if(remain >= isize) {
524 // move the large obj
525 if((unsigned int)gcheaptop < (unsigned int)(tmpheaptop+size)) {
526 memmove(tmpheaptop, gcheaptop, size);
528 memcpy(tmpheaptop, gcheaptop, size);
530 // fill the remaining space with -2 padding
531 BAMBOO_MEMSET_WH(tmpheaptop+size, -2, isize-size);
535 // cache the mapping info
536 gcmappingtbl[OBJMAPPINGINDEX((unsigned int)ptr)]=(unsigned int)tmpheaptop;
539 // update bamboo_smemtbl
540 bamboo_smemtbl[b] += isize;
542 // this object acrosses blocks
544 CLOSEBLOCK(base, cpysize+BAMBOO_CACHE_LINE_SIZE);
545 bamboo_smemtbl[b] += BAMBOO_CACHE_LINE_SIZE;
549 remain = ((tmpheaptop-gcbaseva)<(BAMBOO_LARGE_SMEM_BOUND)) ?
550 BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
552 remain -= BAMBOO_CACHE_LINE_SIZE;
553 tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
554 BLOCKINDEX(tmpheaptop, &b);
555 sb = (unsigned int)(tmpheaptop-gcbaseva)/(BAMBOO_SMEM_SIZE)+gcreservedsb;
558 // move the large obj
559 if((unsigned int)gcheaptop < (unsigned int)(tmpheaptop+size)) {
560 memmove(tmpheaptop, gcheaptop, size);
562 memcpy(tmpheaptop, gcheaptop, size);
564 // fill the remaining space with -2 padding
565 BAMBOO_MEMSET_WH(tmpheaptop+size, -2, isize-size);
567 // cache the mapping info
568 gcmappingtbl[OBJMAPPINGINDEX((unsigned int)ptr)]=(unsigned int)tmpheaptop;
571 // set the gcsbstarttbl and bamboo_smemtbl
572 unsigned int tmpsbs=1+(unsigned int)(isize-remain-1)/BAMBOO_SMEM_SIZE;
573 for(int k = 1; k < tmpsbs; k++) {
574 gcsbstarttbl[sb+k] = -1;
577 bound = (b<NUMCORES4GC) ? BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
578 BLOCKINDEX(tmpheaptop-1, &tmpsbs);
579 for(; b < tmpsbs; b++) {
580 bamboo_smemtbl[b] = bound;
581 if(b==NUMCORES4GC-1) {
582 bound = BAMBOO_SMEM_SIZE;
585 if(((unsigned int)(isize-remain)%(BAMBOO_SMEM_SIZE)) == 0) {
586 gcsbstarttbl[sb] = -1;
587 remain = ((tmpheaptop-gcbaseva)<(BAMBOO_LARGE_SMEM_BOUND)) ?
588 BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
589 bamboo_smemtbl[b] = bound;
591 gcsbstarttbl[sb] = (int)tmpheaptop;
592 remain = tmpheaptop-gcbaseva;
593 bamboo_smemtbl[b] = remain%bound;
594 remain = bound - bamboo_smemtbl[b];
597 CLOSEBLOCK(base, isize+BAMBOO_CACHE_LINE_SIZE);
600 if(remain == BAMBOO_CACHE_LINE_SIZE) {
601 // fill with 0 in case
602 BAMBOO_MEMSET_WH(tmpheaptop, '\0', remain);
604 remain -= BAMBOO_CACHE_LINE_SIZE;
605 tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
610 CLOSEBLOCK(base, cpysize+BAMBOO_CACHE_LINE_SIZE);
611 bamboo_smemtbl[b] += BAMBOO_CACHE_LINE_SIZE;
613 tmpheaptop -= BAMBOO_CACHE_LINE_SIZE;
615 gcheaptop = tmpheaptop;
618 bamboo_free_block = 0;
619 unsigned int tbound = 0;
621 tbound=(bamboo_free_block<NUMCORES4GC)?BAMBOO_SMEM_SIZE_L:BAMBOO_SMEM_SIZE;
622 if(bamboo_smemtbl[bamboo_free_block] == tbound) {
625 // the first non-full partition
630 GCPROFILE_RECORD_SPACE();
633 void gc_collect(struct garbagelist * stackptr) {
635 // inform the master that this core is at a gc safe point and is ready to
637 send_msg_4(STARTUPCORE,GCFINISHPRE,BAMBOO_NUM_OF_CORE,self_numsendobjs,self_numreceiveobjs);
639 // core collector routine
640 //wait for init phase
641 WAITFORGCPHASE(INITPHASE);
643 GC_PRINTF("Do initGC\n");
646 //send init finish msg to core coordinator
647 send_msg_2(STARTUPCORE,GCFINISHINIT,BAMBOO_NUM_OF_CORE);
649 //wait for mark phase
650 WAITFORGCPHASE(MARKPHASE);
652 GC_PRINTF("Start mark phase\n");
653 mark(true, stackptr);
654 GC_PRINTF("Finish mark phase, start compact phase\n");
656 GC_PRINTF("Finish compact phase\n");
658 WAITFORGCPHASE(FLUSHPHASE);
660 GC_PRINTF("Start flush phase\n");
661 GCPROFILE_INFO_2_MASTER();
663 GC_PRINTF("Finish flush phase\n");
665 CACHEADAPT_PHASE_CLIENT();
667 // invalidate all shared mem pointers
668 bamboo_cur_msp = NULL;
669 bamboo_smem_size = 0;
670 bamboo_smem_zero_top = NULL;
673 WAITFORGCPHASE(FINISHPHASE);
675 GC_PRINTF("Finish gc! \n");
678 void gc_nocollect(struct garbagelist * stackptr) {
680 // inform the master that this core is at a gc safe point and is ready to
682 send_msg_4(STARTUPCORE,GCFINISHPRE,BAMBOO_NUM_OF_CORE,self_numsendobjs,self_numreceiveobjs);
684 WAITFORGCPHASE(INITPHASE);
686 GC_PRINTF("Do initGC\n");
689 //send init finish msg to core coordinator
690 send_msg_2(STARTUPCORE,GCFINISHINIT,BAMBOO_NUM_OF_CORE);
692 WAITFORGCPHASE(MARKPHASE);
694 GC_PRINTF("Start mark phase\n");
695 mark(true, stackptr);
696 GC_PRINTF("Finish mark phase, wait for flush\n");
698 // non-gc core collector routine
699 WAITFORGCPHASE(FLUSHPHASE);
701 GC_PRINTF("Start flush phase\n");
702 GCPROFILE_INFO_2_MASTER();
704 GC_PRINTF("Finish flush phase\n");
706 CACHEADAPT_PHASE_CLIENT();
708 // invalidate all shared mem pointers
709 bamboo_cur_msp = NULL;
710 bamboo_smem_size = 0;
711 bamboo_smem_zero_top = NULL;
714 WAITFORGCPHASE(FINISHPHASE);
716 GC_PRINTF("Finish gc! \n");
719 void master_mark(struct garbagelist *stackptr) {
722 GC_PRINTF("Start mark phase \n");
723 GC_SEND_MSG_1_TO_CLIENT(GCSTART);
727 while(MARKPHASE == gcphase) {
728 mark(isfirst, stackptr);
735 void master_getlargeobjs() {
736 // send msgs to all cores requiring large objs info
737 // Note: only need to ask gc cores, non-gc cores do not host any objs
738 numconfirm = NUMCORES4GC - 1;
739 for(int i = 1; i < NUMCORES4GC; i++) {
740 send_msg_1(i,GCLOBJREQUEST);
742 gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
743 //spin until we have all responses
744 while(numconfirm!=0) ;
747 if(gcheaptop < gcmarkedptrbound) {
748 gcheaptop = gcmarkedptrbound;
751 GC_PRINTF("prepare to cache large objs \n");
753 // cache all large objs
754 BAMBOO_ASSERTMSG(cacheLObjs(), "Not enough space to cache large objects\n");
757 void master_compact() {
758 // predict number of blocks to fill for each core
759 unsigned int tmpheaptop = 0;
760 int numpbc = loadbalance(&tmpheaptop);
762 numpbc = (BAMBOO_SHARED_MEM_SIZE)/(BAMBOO_SMEM_SIZE);
763 GC_PRINTF("mark phase finished \n");
765 tmpheaptop = gcbaseva + (BAMBOO_SHARED_MEM_SIZE);
766 for(int i = 0; i < NUMCORES4GC; i++) {
767 unsigned int tmpcoreptr = 0;
768 BASEPTR(i, numpbc, &tmpcoreptr);
769 // init some data strutures for compact phase
771 gcfilledblocks[i] = 0;
772 gcrequiredmems[i] = 0;
774 //send start compact messages to all cores
775 //TODO bug here, do not know if the direction is positive or negtive?
776 if (tmpcoreptr < tmpheaptop) {
777 gcstopblock[i] = numpbc + 1;
778 if(i != STARTUPCORE) {
779 send_msg_2(i, GCSTARTCOMPACT, numpbc+1);
781 gcblock2fill = numpbc+1;
784 gcstopblock[i] = numpbc;
785 if(i != STARTUPCORE) {
786 send_msg_2(i, GCSTARTCOMPACT, numpbc);
788 gcblock2fill = numpbc;
795 struct moveHelper * orig = (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
796 struct moveHelper * to = (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
797 compact_master(orig, to);
799 GC_PRINTF("prepare to move large objs \n");
802 GC_PRINTF("compact phase finished \n");
807 void master_updaterefs(struct garbagelist * stackptr) {
808 gcphase = FLUSHPHASE;
809 GC_SEND_MSG_1_TO_CLIENT(GCSTARTFLUSH);
811 GC_PRINTF("Start flush phase \n");
814 // now the master core need to decide the new cache strategy
816 GC_CHECK_ALL_CORE_STATUS(FLUSHPHASE==gcphase);
817 GC_PRINTF("Finish flush phase \n");
820 void master_finish() {
821 gcphase = FINISHPHASE;
823 // invalidate all shared mem pointers
824 // put it here as it takes time to inform all the other cores to
825 // finish gc and it might cause problem when some core resumes
826 // mutator earlier than the other cores
827 bamboo_cur_msp = NULL;
828 bamboo_smem_size = 0;
829 bamboo_smem_zero_top = NULL;
833 GC_SEND_MSG_1_TO_CLIENT(GCFINISH);
835 gcprocessing = false;
837 // inform other cores to stop and wait for gc
839 for(int i = 0; i < NUMCORESACTIVE; i++) {
840 // reuse the gcnumsendobjs & gcnumreceiveobjs
841 gcnumsendobjs[0][i] = 0;
842 gcnumreceiveobjs[0][i] = 0;
844 GC_SEND_MSG_1_TO_CLIENT(GCSTARTPRE);
848 void gc_master(struct garbagelist * stackptr) {
849 tprintf("start GC !!!!!!!!!!!!! \n");
856 GC_SEND_MSG_1_TO_CLIENT(GCSTARTINIT);
858 GC_PRINTF("Check core status \n");
859 GC_CHECK_ALL_CORE_STATUS(true);
861 CACHEADAPT_OUTPUT_CACHE_SAMPLING();
864 master_mark(stackptr);
866 // get large objects from all cores
867 master_getlargeobjs();
872 // update the references
873 master_updaterefs(stackptr);
875 // do cache adaptation
876 CACHEADAPT_PHASE_MASTER();
878 // do finish up stuff
881 GC_PRINTF("gc finished \n");
882 tprintf("finish GC ! %d \n", gcflag);
887 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
888 gcnumsendobjs[0][BAMBOO_NUM_OF_CORE] = self_numsendobjs;
889 gcnumreceiveobjs[0][BAMBOO_NUM_OF_CORE] = self_numreceiveobjs;
891 for(int i = 0; i < NUMCORESACTIVE; i++) {
892 sumsendobj += gcnumsendobjs[0][i];
894 for(int i = 0; i < NUMCORESACTIVE; i++) {
895 sumsendobj -= gcnumreceiveobjs[0][i];
897 if(0 != sumsendobj) {
898 // there were still some msgs on the fly, wait until there
899 // are some update pregc information coming and check it again
901 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
905 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
911 void pregcprocessing() {
912 #if defined(GC_CACHE_ADAPT)&&defined(GC_CACHE_SAMPLING)
913 // disable the timer interrupt
914 bamboo_mask_timer_intr();
916 // Zero out the remaining memory here because for the GC_CACHE_ADAPT version,
917 // we need to make sure during the gcinit phase the shared heap is not
918 // touched. Otherwise, there would be problem when adapt the cache strategy.
919 BAMBOO_CLOSE_CUR_MSP();
921 if(gc_num_flush_dtlb < GC_NUM_FLUSH_DTLB) {
926 #if defined(GC_CACHE_ADAPT)&&defined(GC_CACHE_SAMPLING)
927 // get the sampling data
928 bamboo_output_dtlb_sampling();
932 void postgcprocessing() {
933 #if defined(GC_CACHE_ADAPT)&&defined(GC_CACHE_SAMPLING)
934 // enable the timer interrupt
935 bamboo_tile_timer_set_next_event(GC_TILE_TIMER_EVENT_SETTING);
936 bamboo_unmask_timer_intr();
940 bool gc(struct garbagelist * stackptr) {
943 gcprocessing = false;
947 // core coordinator routine
948 if(0 == BAMBOO_NUM_OF_CORE) {
949 GC_PRINTF("Check if we can do gc or not\n");
950 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
951 if(!gc_checkAllCoreStatus()) {
952 // some of the cores are still executing the mutator and did not reach
953 // some gc safe point, therefore it is not ready to do gc
960 GC_PRINTF("start gc! \n");
963 } else if(BAMBOO_NUM_OF_CORE < NUMCORES4GC) {
965 gc_collect(stackptr);
968 gc_nocollect(stackptr);