1 // BAMBOO_EXIT(0xb000);
2 // TODO: DO NOT support tag!!!
5 #include "multicoregarbage.h"
6 #include "multicoregcmark.h"
7 #include "multicoregccompact.h"
8 #include "multicoregcflush.h"
9 #include "multicoreruntime.h"
10 #include "multicoregcprofile.h"
12 struct pointerblock *gchead=NULL;
14 struct pointerblock *gctail=NULL;
16 struct pointerblock *gctail2=NULL;
18 struct pointerblock *gcspare=NULL;
20 struct lobjpointerblock *gclobjhead=NULL;
21 int gclobjheadindex=0;
22 struct lobjpointerblock *gclobjtail=NULL;
23 int gclobjtailindex=0;
24 struct lobjpointerblock *gclobjtail2=NULL;
25 int gclobjtailindex2=0;
26 struct lobjpointerblock *gclobjspare=NULL;
30 extern unsigned int gcmem_mixed_threshold;
31 extern unsigned int gcmem_mixed_usedmem;
33 #endif // MULTICORE_GC
36 // dump whole mem in blocks
45 printf("(%x,%x) Dump shared mem: \n",udn_tile_coord_x(),udn_tile_coord_y());
46 // reserved blocks for sblocktbl
47 printf("(%x,%x) ++++ reserved sblocks ++++ \n", udn_tile_coord_x(),
49 for(i=BAMBOO_BASE_VA; (unsinged int)i<(unsigned int)gcbaseva; i+= 4*16) {
50 printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
51 udn_tile_coord_x(), udn_tile_coord_y(),
52 *((int *)(i)), *((int *)(i + 4)),
53 *((int *)(i + 4*2)), *((int *)(i + 4*3)),
54 *((int *)(i + 4*4)), *((int *)(i + 4*5)),
55 *((int *)(i + 4*6)), *((int *)(i + 4*7)),
56 *((int *)(i + 4*8)), *((int *)(i + 4*9)),
57 *((int *)(i + 4*10)), *((int *)(i + 4*11)),
58 *((int *)(i + 4*12)), *((int *)(i + 4*13)),
59 *((int *)(i + 4*14)), *((int *)(i + 4*15)));
61 sblock = gcreservedsb;
62 bool advanceblock = false;
64 for(i=gcbaseva; (unsigned int)i<(unsigned int)(gcbaseva+BAMBOO_SHARED_MEM_SIZE); i+=4*16) {
66 // computing sblock # and block #, core coordinate (x,y) also
67 if(j%((BAMBOO_SMEM_SIZE)/(4*16)) == 0) {
69 if(j < ((BAMBOO_LARGE_SMEM_BOUND)/(4*16))) {
70 if((j > 0) && (j%((BAMBOO_SMEM_SIZE_L)/(4*16)) == 0)) {
82 coren = gc_block2core[block%(NUMCORES4GC*2)];
84 // compute core coordinate
85 x = BAMBOO_COORDS_X(coren);
86 y = BAMBOO_COORDS_Y(coren);
87 printf("(%x,%x) ==== %d, %d : core (%d,%d), saddr %x====\n",
88 udn_tile_coord_x(), udn_tile_coord_y(),
89 block, sblock++, x, y,
90 (sblock-1)*(BAMBOO_SMEM_SIZE)+gcbaseva);
93 printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
94 udn_tile_coord_x(), udn_tile_coord_y(),
95 *((int *)(i)), *((int *)(i + 4)),
96 *((int *)(i + 4*2)), *((int *)(i + 4*3)),
97 *((int *)(i + 4*4)), *((int *)(i + 4*5)),
98 *((int *)(i + 4*6)), *((int *)(i + 4*7)),
99 *((int *)(i + 4*8)), *((int *)(i + 4*9)),
100 *((int *)(i + 4*10)), *((int *)(i + 4*11)),
101 *((int *)(i + 4*12)), *((int *)(i + 4*13)),
102 *((int *)(i + 4*14)), *((int *)(i + 4*15)));
104 printf("(%x,%x) \n", udn_tile_coord_x(), udn_tile_coord_y());
108 void initmulticoregcdata() {
109 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
110 // startup core to initialize corestatus[]
112 for(i = 0; i < NUMCORESACTIVE; ++i) {
114 gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
115 gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
117 for(i = 0; i < NUMCORES4GC; ++i) {
119 gcrequiredmems[i] = 0;
121 gcfilledblocks[i] = 0;
125 bamboo_smem_zero_top = NULL;
127 gcprocessing = false;
128 gcphase = FINISHPHASE;
131 gcself_numsendobjs = 0;
132 gcself_numreceiveobjs = 0;
133 gcmarkedptrbound = 0;
134 gcforwardobjtbl = allocateMGCHash_I(20, 3);
144 gcmem_mixed_threshold = (unsigned int)((BAMBOO_SHARED_MEM_SIZE
145 -bamboo_reserved_smem*BAMBOO_SMEM_SIZE)*0.8);
146 gcmem_mixed_usedmem = 0;
149 gc_profile_flag = false;
152 gc_num_flush_dtlb = 0;
154 gc_localheap_s = false;
155 #ifdef GC_CACHE_ADAPT
156 gccachestage = false;
159 INIT_MULTICORE_GCPROFILE_DATA();
162 void dismulticoregcdata() {
163 freeMGCHash(gcforwardobjtbl);
167 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
169 for(i = 0; i < NUMCORES4GC; ++i) {
171 gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
172 gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
174 gcrequiredmems[i] = 0;
175 gcfilledblocks[i] = 0;
178 for(i = NUMCORES4GC; i < NUMCORESACTIVE; ++i) {
180 gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
181 gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
186 gcnumsrobjs_index = 0;
188 gcself_numsendobjs = 0;
189 gcself_numreceiveobjs = 0;
190 gcmarkedptrbound = 0;
201 gcheadindex=gctailindex=gctailindex2 = 0;
202 gchead=gctail=gctail2=RUNMALLOC(sizeof(struct pointerblock));
204 gctailindex=gctailindex2=gcheadindex=0;
205 gctail=gctail2=gchead;
209 // initialize the large obj queues
210 if (gclobjhead==NULL) {
214 gclobjhead=gclobjtail=gclobjtail2=
215 RUNMALLOC(sizeof(struct lobjpointerblock));
217 gclobjtailindex=gclobjtailindex2=gclobjheadindex=0;
218 gclobjtail=gclobjtail2=gclobjhead;
220 gclobjhead->next=gclobjhead->prev=NULL;
222 freeMGCHash(gcforwardobjtbl);
223 gcforwardobjtbl = allocateMGCHash(20, 3);
228 bool gc_checkAllCoreStatus_I() {
230 for(i = 0; i < NUMCORESACTIVE; ++i) {
231 if(gccorestatus[i] != 0) {
238 INLINE void checkMarkStatus() {
241 (waitconfirm && (numconfirm == 0))) {
242 unsigned int entry_index = 0;
245 entry_index = (gcnumsrobjs_index == 0) ? 1 : 0;
248 entry_index = gcnumsrobjs_index;
250 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
251 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
252 gcnumsendobjs[entry_index][BAMBOO_NUM_OF_CORE] = gcself_numsendobjs;
253 gcnumreceiveobjs[entry_index][BAMBOO_NUM_OF_CORE] = gcself_numreceiveobjs;
254 // check the status of all cores
255 if (gc_checkAllCoreStatus_I()) {
258 // the first time found all cores stall
259 // send out status confirm msg to all other cores
260 // reset the corestatus array too
261 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
263 numconfirm = NUMCORESACTIVE - 1;
264 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
265 for(i = 1; i < NUMCORESACTIVE; ++i) {
267 // send mark phase finish confirm request msg to core i
268 send_msg_1(i, GCMARKCONFIRM, false);
272 // check if the sum of send objs and receive obj are the same
273 // yes->check if the info is the latest; no->go on executing
274 unsigned int sumsendobj = 0;
275 for(i = 0; i < NUMCORESACTIVE; ++i) {
276 sumsendobj += gcnumsendobjs[gcnumsrobjs_index][i];
278 for(i = 0; i < NUMCORESACTIVE; ++i) {
279 sumsendobj -= gcnumreceiveobjs[gcnumsrobjs_index][i];
281 if(0 == sumsendobj) {
282 // Check if there are changes of the numsendobjs or numreceiveobjs on
284 bool ischanged = false;
285 for(i = 0; i < NUMCORESACTIVE; ++i) {
286 if((gcnumsendobjs[0][i] != gcnumsendobjs[1][i]) ||
287 (gcnumreceiveobjs[0][i] != gcnumreceiveobjs[1][i]) ) {
293 // all the core status info are the latest,stop mark phase
294 gcphase = COMPACTPHASE;
295 // restore the gcstatus for all cores
296 for(i = 0; i < NUMCORESACTIVE; ++i) {
300 // There were changes between phase 1 and phase 2, can not decide
301 // whether the mark phase has been finished
303 // As it fails in phase 2, flip the entries
304 gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
307 // There were changes between phase 1 and phase 2, can not decide
308 // whether the mark phase has been finished
310 // As it fails in phase 2, flip the entries
311 gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
313 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
316 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
321 // compute load balance for all cores
322 INLINE int loadbalance(unsigned int * heaptop) {
323 // compute load balance
326 // get the total loads
327 unsigned int tloads = gcloads[STARTUPCORE];
328 for(i = 1; i < NUMCORES4GC; i++) {
329 tloads += gcloads[i];
331 *heaptop = gcbaseva + tloads;
334 BLOCKINDEX(*heaptop, &b);
335 // num of blocks per core
336 unsigned int numbpc = (unsigned int)b/(unsigned int)(NUMCORES4GC);
338 RESIDECORE(heaptop, &gctopcore);
342 // compute total mem size required and sort the lobjs in ascending order
343 INLINE unsigned int sortLObjs() {
344 unsigned int tmp_lobj = 0;
345 unsigned int tmp_len = 0;
346 unsigned int tmp_host = 0;
347 unsigned int sumsize = 0;
349 gclobjtail2 = gclobjtail;
350 gclobjtailindex2 = gclobjtailindex;
351 // TODO USE QUICK SORT INSTEAD?
352 while(gc_lobjmoreItems2_I()) {
354 tmp_lobj = gclobjtail2->lobjs[gclobjtailindex2-1];
355 tmp_host = gclobjtail2->hosts[gclobjtailindex2-1];
356 tmp_len = gclobjtail2->lengths[gclobjtailindex2 - 1];
358 GCPROFILE_RECORD_LOBJ();
359 unsigned int i = gclobjtailindex2-1;
360 struct lobjpointerblock * tmp_block = gclobjtail2;
361 // find the place to insert
364 if(tmp_block->prev == NULL) {
367 if(tmp_block->prev->lobjs[NUMLOBJPTRS-1] > tmp_lobj) {
368 tmp_block->lobjs[i] = tmp_block->prev->lobjs[NUMLOBJPTRS-1];
369 tmp_block->lengths[i] = tmp_block->prev->lengths[NUMLOBJPTRS-1];
370 tmp_block->hosts[i] = tmp_block->prev->hosts[NUMLOBJPTRS-1];
371 tmp_block = tmp_block->prev;
375 } // if(tmp_block->prev->lobjs[NUMLOBJPTRS-1] < tmp_lobj)
377 if(tmp_block->lobjs[i-1] > tmp_lobj) {
378 tmp_block->lobjs[i] = tmp_block->lobjs[i-1];
379 tmp_block->lengths[i] = tmp_block->lengths[i-1];
380 tmp_block->hosts[i] = tmp_block->hosts[i-1];
388 if(i != gclobjtailindex2 - 1) {
389 tmp_block->lobjs[i] = tmp_lobj;
390 tmp_block->lengths[i] = tmp_len;
391 tmp_block->hosts[i] = tmp_host;
397 INLINE bool cacheLObjs() {
398 // check the total mem size need for large objs
399 unsigned long long sumsize = 0;
400 unsigned int size = 0;
402 sumsize = sortLObjs();
404 GCPROFILE_RECORD_LOBJSPACE();
406 // check if there are enough space to cache these large objs
407 unsigned int dst = gcbaseva + (BAMBOO_SHARED_MEM_SIZE) -sumsize;
408 if((unsigned long long)gcheaptop > (unsigned long long)dst) {
409 // do not have enough room to cache large objs
413 gcheaptop = dst; // Note: record the start of cached lobjs with gcheaptop
414 // cache the largeObjs to the top of the shared heap
415 dst = gcbaseva + (BAMBOO_SHARED_MEM_SIZE);
416 while(gc_lobjmoreItems3_I()) {
418 size = gclobjtail2->lengths[gclobjtailindex2];
419 // set the mark field to , indicating that this obj has been moved
420 // and need to be flushed
421 ((struct ___Object___ *)(gclobjtail2->lobjs[gclobjtailindex2]))->marked =
424 if((unsigned int)dst <
425 (unsigned int)(gclobjtail2->lobjs[gclobjtailindex2]+size)) {
426 memmove(dst, gclobjtail2->lobjs[gclobjtailindex2], size);
428 memcpy(dst, gclobjtail2->lobjs[gclobjtailindex2], size);
434 // update the bmmboo_smemtbl to record current shared mem usage
435 void updateSmemTbl(unsigned int coren,
436 unsigned int localtop) {
437 unsigned int ltopcore = 0;
438 unsigned int bound = BAMBOO_SMEM_SIZE_L;
439 BLOCKINDEX(localtop, <opcore);
440 if((unsigned int)localtop>=(unsigned int)(gcbaseva+BAMBOO_LARGE_SMEM_BOUND)){
441 bound = BAMBOO_SMEM_SIZE;
443 unsigned int load = (unsigned int)(localtop-gcbaseva)%(unsigned int)bound;
446 unsigned int toset = 0;
448 toset = gc_core2block[2*coren+i]+(unsigned int)(NUMCORES4GC*2)*j;
449 if(toset < ltopcore) {
450 bamboo_smemtbl[toset]=(toset<NUMCORES4GC) ? BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
452 gcmem_mixed_usedmem += bamboo_smemtbl[toset];
454 } else if(toset == ltopcore) {
455 bamboo_smemtbl[toset] = load;
457 gcmem_mixed_usedmem += bamboo_smemtbl[toset];
471 INLINE unsigned int checkCurrHeapTop() {
472 // update the smemtbl
473 BAMBOO_MEMSET_WH(bamboo_smemtbl, 0, sizeof(int)*gcnumblock);
474 // flush all gcloads to indicate the real heap top on one core
475 // previous it represents the next available ptr on a core
476 if(((unsigned int)gcloads[0] > (unsigned int)(gcbaseva+BAMBOO_SMEM_SIZE_L))
477 && (((unsigned int)gcloads[0]%(BAMBOO_SMEM_SIZE)) == 0)) {
478 // edge of a block, check if this is exactly the heaptop
479 BASEPTR(0, gcfilledblocks[0]-1, &(gcloads[0]));
480 gcloads[0]+=(gcfilledblocks[0]>1?(BAMBOO_SMEM_SIZE):(BAMBOO_SMEM_SIZE_L));
482 updateSmemTbl(0, gcloads[0]);
483 for(int i = 1; i < NUMCORES4GC; i++) {
484 unsigned int tmptop = 0;
485 if((gcfilledblocks[i] > 0)
486 && (((unsigned int)gcloads[i] % (BAMBOO_SMEM_SIZE)) == 0)) {
487 // edge of a block, check if this is exactly the heaptop
488 BASEPTR(i, gcfilledblocks[i]-1, &gcloads[i]);
490 (gcfilledblocks[i]>1?(BAMBOO_SMEM_SIZE):(BAMBOO_SMEM_SIZE_L));
493 updateSmemTbl(i, gcloads[i]);
496 // find current heap top
498 // a bug here: when using local allocation, directly move large objects
499 // to the highest free chunk might not be memory efficient
500 unsigned int tmpheaptop = 0;
502 for(i = gcnumblock-1; i >= 0; i--) {
503 if(bamboo_smemtbl[i] > 0) {
508 tmpheaptop = gcbaseva;
510 tmpheaptop = gcbaseva+bamboo_smemtbl[i]+((i<NUMCORES4GC) ?
511 (BAMBOO_SMEM_SIZE_L*i) :
512 (BAMBOO_SMEM_SIZE*(i-NUMCORES4GC)+BAMBOO_LARGE_SMEM_BOUND));
517 INLINE void moveLObjs() {
519 // update the gcmem_mixed_usedmem
520 gcmem_mixed_usedmem = 0;
522 unsigned int size = 0;
523 unsigned int bound = 0;
524 unsigned int tmpheaptop = checkCurrHeapTop();
526 // move large objs from gcheaptop to tmpheaptop
527 // write the header first
528 unsigned int tomove = gcbaseva+(BAMBOO_SHARED_MEM_SIZE)-gcheaptop;
530 gcmem_mixed_usedmem += tomove;
532 // flush the sbstartbl
533 BAMBOO_MEMSET_WH(&(gcsbstarttbl[gcreservedsb]), '\0',
534 (BAMBOO_SHARED_MEM_SIZE/BAMBOO_SMEM_SIZE-(unsigned int)gcreservedsb)
535 *sizeof(unsigned int));
537 gcheaptop = tmpheaptop;
539 // check how many blocks it acrosses
540 unsigned int remain = tmpheaptop-gcbaseva;
541 //number of the sblock
542 unsigned int sb = remain/BAMBOO_SMEM_SIZE+(unsigned int)gcreservedsb;
543 unsigned int b = 0; // number of the block
544 BLOCKINDEX(tmpheaptop, &b);
545 // check the remaining space in this block
546 bound = (BAMBOO_SMEM_SIZE);
547 if(remain < (BAMBOO_LARGE_SMEM_BOUND)) {
548 bound = (BAMBOO_SMEM_SIZE_L);
550 remain = bound - remain%bound;
553 unsigned int isize = 0;
554 unsigned int host = 0;
555 unsigned int ptr = 0;
556 unsigned int base = tmpheaptop;
557 unsigned int cpysize = 0;
558 remain -= BAMBOO_CACHE_LINE_SIZE;
559 tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
560 gc_lobjqueueinit4_I();
561 while(gc_lobjmoreItems4_I()) {
562 ptr = (unsigned int)(gc_lobjdequeue4_I(&size, &host));
563 ALIGNSIZE(size, &isize);
564 if(remain >= isize) {
566 // move the large obj
567 if((unsigned int)gcheaptop < (unsigned int)(tmpheaptop+size)) {
568 memmove(tmpheaptop, gcheaptop, size);
570 memcpy(tmpheaptop, gcheaptop, size);
572 // fill the remaining space with -2 padding
573 BAMBOO_MEMSET_WH(tmpheaptop+size, -2, isize-size);
577 // cache the mapping info
578 gcmappingtbl[OBJMAPPINGINDEX((unsigned int)ptr)]=(unsigned int)tmpheaptop;
581 // update bamboo_smemtbl
582 bamboo_smemtbl[b] += isize;
584 // this object acrosses blocks
586 CLOSEBLOCK(base, cpysize+BAMBOO_CACHE_LINE_SIZE);
587 bamboo_smemtbl[b] += BAMBOO_CACHE_LINE_SIZE;
591 remain = ((tmpheaptop-gcbaseva)<(BAMBOO_LARGE_SMEM_BOUND)) ?
592 BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
594 remain -= BAMBOO_CACHE_LINE_SIZE;
595 tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
596 BLOCKINDEX(tmpheaptop, &b);
597 sb = (unsigned int)(tmpheaptop-gcbaseva)/(BAMBOO_SMEM_SIZE)+gcreservedsb;
600 // move the large obj
601 if((unsigned int)gcheaptop < (unsigned int)(tmpheaptop+size)) {
602 memmove(tmpheaptop, gcheaptop, size);
604 memcpy(tmpheaptop, gcheaptop, size);
606 // fill the remaining space with -2 padding
607 BAMBOO_MEMSET_WH(tmpheaptop+size, -2, isize-size);
609 // cache the mapping info
610 gcmappingtbl[OBJMAPPINGINDEX((unsigned int)ptr)]=(unsigned int)tmpheaptop;
613 // set the gcsbstarttbl and bamboo_smemtbl
614 unsigned int tmpsbs=1+(unsigned int)(isize-remain-1)/BAMBOO_SMEM_SIZE;
615 for(int k = 1; k < tmpsbs; k++) {
616 gcsbstarttbl[sb+k] = -1;
619 bound = (b<NUMCORES4GC) ? BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
620 BLOCKINDEX(tmpheaptop-1, &tmpsbs);
621 for(; b < tmpsbs; b++) {
622 bamboo_smemtbl[b] = bound;
623 if(b==NUMCORES4GC-1) {
624 bound = BAMBOO_SMEM_SIZE;
627 if(((unsigned int)(isize-remain)%(BAMBOO_SMEM_SIZE)) == 0) {
628 gcsbstarttbl[sb] = -1;
629 remain = ((tmpheaptop-gcbaseva)<(BAMBOO_LARGE_SMEM_BOUND)) ?
630 BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
631 bamboo_smemtbl[b] = bound;
633 gcsbstarttbl[sb] = (int)tmpheaptop;
634 remain = tmpheaptop-gcbaseva;
635 bamboo_smemtbl[b] = remain%bound;
636 remain = bound - bamboo_smemtbl[b];
639 CLOSEBLOCK(base, isize+BAMBOO_CACHE_LINE_SIZE);
642 if(remain == BAMBOO_CACHE_LINE_SIZE) {
643 // fill with 0 in case
644 BAMBOO_MEMSET_WH(tmpheaptop, '\0', remain);
646 remain -= BAMBOO_CACHE_LINE_SIZE;
647 tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
652 CLOSEBLOCK(base, cpysize+BAMBOO_CACHE_LINE_SIZE);
653 bamboo_smemtbl[b] += BAMBOO_CACHE_LINE_SIZE;
655 tmpheaptop -= BAMBOO_CACHE_LINE_SIZE;
657 gcheaptop = tmpheaptop;
660 bamboo_free_block = 0;
661 unsigned int tbound = 0;
663 tbound=(bamboo_free_block<NUMCORES4GC)?BAMBOO_SMEM_SIZE_L:BAMBOO_SMEM_SIZE;
664 if(bamboo_smemtbl[bamboo_free_block] == tbound) {
667 // the first non-full partition
672 GCPROFILE_RECORD_SPACE();
675 void gc_collect(struct garbagelist * stackptr) {
677 // inform the master that this core is at a gc safe point and is ready to
679 send_msg_4(STARTUPCORE, GCFINISHPRE, BAMBOO_NUM_OF_CORE, self_numsendobjs,
680 self_numreceiveobjs, false);
682 // core collector routine
683 //wait for init phase
684 WAITFORGCPHASE(INITPHASE);
686 GC_PRINTF("Do initGC\n");
689 //send init finish msg to core coordinator
690 send_msg_2(STARTUPCORE, GCFINISHINIT, BAMBOO_NUM_OF_CORE, false);
692 //wait for mark phase
693 WAITFORGCPHASE(MARKPHASE);
695 GC_PRINTF("Start mark phase\n");
696 mark(true, stackptr);
697 GC_PRINTF("Finish mark phase, start compact phase\n");
699 GC_PRINTF("Finish compact phase\n");
701 WAITFORGCPHASE(FLUSHPHASE);
703 GC_PRINTF("Start flush phase\n");
704 GCPROFILE_INFO_2_MASTER();
706 GC_PRINTF("Finish flush phase\n");
708 CACHEADAPT_PHASE_CLIENT();
710 // invalidate all shared mem pointers
711 bamboo_cur_msp = NULL;
712 bamboo_smem_size = 0;
713 bamboo_smem_zero_top = NULL;
716 WAITFORGCPHASE(FINISHPHASE);
718 GC_PRINTF("Finish gc! \n");
721 void gc_nocollect(struct garbagelist * stackptr) {
723 // inform the master that this core is at a gc safe point and is ready to
725 send_msg_4(STARTUPCORE, GCFINISHPRE, BAMBOO_NUM_OF_CORE, self_numsendobjs,
726 self_numreceiveobjs, false);
728 WAITFORGCPHASE(INITPHASE);
730 GC_PRINTF("Do initGC\n");
733 //send init finish msg to core coordinator
734 send_msg_2(STARTUPCORE, GCFINISHINIT, BAMBOO_NUM_OF_CORE, false);
736 WAITFORGCPHASE(MARKPHASE);
738 GC_PRINTF("Start mark phase\n");
739 mark(true, stackptr);
740 GC_PRINTF("Finish mark phase, wait for flush\n");
742 // non-gc core collector routine
743 WAITFORGCPHASE(FLUSHPHASE);
745 GC_PRINTF("Start flush phase\n");
746 GCPROFILE_INFO_2_MASTER();
748 GC_PRINTF("Finish flush phase\n");
750 CACHEADAPT_PHASE_CLIENT();
752 // invalidate all shared mem pointers
753 bamboo_cur_msp = NULL;
754 bamboo_smem_size = 0;
755 bamboo_smem_zero_top = NULL;
758 WAITFORGCPHASE(FINISHPHASE);
760 GC_PRINTF("Finish gc! \n");
763 void master_mark(struct garbagelist *stackptr) {
766 GC_PRINTF("Start mark phase \n");
767 GC_SEND_MSG_1_TO_CLIENT(GCSTART);
771 while(MARKPHASE == gcphase) {
772 mark(isfirst, stackptr);
779 void master_getlargeobjs() {
780 // send msgs to all cores requiring large objs info
781 // Note: only need to ask gc cores, non-gc cores do not host any objs
782 numconfirm = NUMCORES4GC - 1;
783 for(i = 1; i < NUMCORES4GC; ++i) {
784 send_msg_1(i, GCLOBJREQUEST, false);
786 gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
787 //spin until we have all responses
792 if(gcheaptop < gcmarkedptrbound) {
793 gcheaptop = gcmarkedptrbound;
796 GC_PRINTF("prepare to cache large objs \n");
798 // cache all large objs
800 // no enough space to cache large objs
801 GC_PRINTF("Not enough space to cache large objects\n");
806 void master_compact() {
807 // predict number of blocks to fill for each core
808 unsigned int tmpheaptop = 0;
809 int numpbc = loadbalance(&tmpheaptop);
812 numpbc = (BAMBOO_SHARED_MEM_SIZE)/(BAMBOO_SMEM_SIZE);
813 GC_PRINTF("mark phase finished \n");
815 tmpheaptop = gcbaseva + (BAMBOO_SHARED_MEM_SIZE);
816 for(i = 0; i < NUMCORES4GC; ++i) {
817 unsigned int tmpcoreptr = 0;
818 BASEPTR(i, numpbc, &tmpcoreptr);
819 // init some data strutures for compact phase
821 gcfilledblocks[i] = 0;
822 gcrequiredmems[i] = 0;
824 //send start compact messages to all cores
825 //TODO bug here, do not know if the direction is positive or negtive?
826 if (tmpcoreptr < tmpheaptop) {
827 gcstopblock[i] = numpbc + 1;
828 if(i != STARTUPCORE) {
829 send_msg_2(i, GCSTARTCOMPACT, numpbc+1, false);
831 gcblock2fill = numpbc+1;
834 gcstopblock[i] = numpbc;
835 if(i != STARTUPCORE) {
836 send_msg_2(i, GCSTARTCOMPACT, numpbc, false);
838 gcblock2fill = numpbc;
845 struct moveHelper * orig = (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
846 struct moveHelper * to = (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
847 compact_master(orig, to);
849 GC_PRINTF("prepare to move large objs \n");
852 GC_PRINTF("compact phase finished \n");
857 void master_updaterefs() {
858 gcphase = FLUSHPHASE;
859 GC_SEND_MSG_1_TO_CLIENT(GCSTARTFLUSH);
861 GC_PRINTF("Start flush phase \n");
864 // now the master core need to decide the new cache strategy
866 GC_CHECK_ALL_CORE_STATUS(FLUSHPHASE==gcphase);
867 GC_PRINTF("Finish flush phase \n");
870 void master_finish() {
871 gcphase = FINISHPHASE;
873 // invalidate all shared mem pointers
874 // put it here as it takes time to inform all the other cores to
875 // finish gc and it might cause problem when some core resumes
876 // mutator earlier than the other cores
877 bamboo_cur_msp = NULL;
878 bamboo_smem_size = 0;
879 bamboo_smem_zero_top = NULL;
883 GC_SEND_MSG_1_TO_CLIENT(GCFINISH);
885 gcprocessing = false;
887 // inform other cores to stop and wait for gc
889 for(int i = 0; i < NUMCORESACTIVE; i++) {
890 // reuse the gcnumsendobjs & gcnumreceiveobjs
891 gcnumsendobjs[0][i] = 0;
892 gcnumreceiveobjs[0][i] = 0;
894 GC_SEND_MSG_1_TO_CLIENT(GCSTARTPRE);
898 void gc_master(struct garbagelist * stackptr) {
899 tprintf("start GC !!!!!!!!!!!!! \n");
906 GC_SEND_MSG_1_TO_CLIENT(GCSTARTINIT);
908 GC_PRINTF("Check core status \n");
909 GC_CHECK_ALL_CORE_STATUS(true);
911 CACHEADAPT_OUTPUT_CACHE_SAMPLING();
914 master_mark(stackptr);
916 // get large objects from all cores
917 master_getlargeobjs();
922 // update the references
925 // do cache adaptation
926 CACHEADAPT_PHASE_MASTER();
928 // do finish up stuff
931 GC_PRINTF("gc finished \n");
932 tprintf("finish GC ! %d \n", gcflag);
935 void pregccheck_I() {
937 gcnumsendobjs[0][BAMBOO_NUM_OF_CORE] = self_numsendobjs;
938 gcnumreceiveobjs[0][BAMBOO_NUM_OF_CORE] = self_numreceiveobjs;
941 for(i = 0; i < NUMCORESACTIVE; ++i) {
942 sumsendobj += gcnumsendobjs[0][i];
944 for(i = 0; i < NUMCORESACTIVE; ++i) {
945 sumsendobj -= gcnumreceiveobjs[0][i];
947 if(0 != sumsendobj) {
948 // there were still some msgs on the fly, wait until there
949 // are some update pregc information coming and check it again
951 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
955 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
962 void pregcprocessing() {
963 #if defined(GC_CACHE_ADAPT)&&defined(GC_CACHE_SAMPLING)
964 // disable the timer interrupt
965 bamboo_mask_timer_intr();
967 // Zero out the remaining memory here because for the GC_CACHE_ADAPT version,
968 // we need to make sure during the gcinit phase the shared heap is not
969 // touched. Otherwise, there would be problem when adapt the cache strategy.
970 BAMBOO_CLOSE_CUR_MSP();
972 if(gc_num_flush_dtlb < GC_NUM_FLUSH_DTLB) {
977 #if defined(GC_CACHE_ADAPT)&&defined(GC_CACHE_SAMPLING)
978 // get the sampling data
979 bamboo_output_dtlb_sampling();
983 void postgcprocessing() {
984 #if defined(GC_CACHE_ADAPT)&&defined(GC_CACHE_SAMPLING)
985 // enable the timer interrupt
986 bamboo_tile_timer_set_next_event(GC_TILE_TIMER_EVENT_SETTING);
987 bamboo_unmask_timer_intr();
991 bool gc(struct garbagelist * stackptr) {
994 gcprocessing = false;
998 // core coordinator routine
999 if(0 == BAMBOO_NUM_OF_CORE) {
1000 GC_PRINTF("Check if we can do gc or not\n");
1001 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
1002 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1003 if(!gc_checkAllCoreStatus_I()) {
1004 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1005 // some of the cores are still executing the mutator and did not reach
1006 // some gc safe point, therefore it is not ready to do gc
1012 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1014 GC_PRINTF("start gc! \n");
1016 gc_master(stackptr);
1017 } else if(BAMBOO_NUM_OF_CORE < NUMCORES4GC) {
1019 gc_collect(stackptr);
1022 gc_nocollect(stackptr);