1 // TODO: DO NOT support tag!!!
4 #include "multicoreruntime.h"
5 #include "multicoregarbage.h"
6 #include "multicoregcmark.h"
8 #include "multicoregccompact.h"
9 #include "multicoregcflush.h"
10 #include "multicoregcprofile.h"
14 extern unsigned int gcmem_mixed_threshold;
15 extern unsigned int gcmem_mixed_usedmem;
19 gc_status_t gc_status_info;
21 unsigned long long gc_output_cache_policy_time=0;
24 // dump whole mem in blocks
33 printf("(%x,%x) Dump shared mem: \n",udn_tile_coord_x(),udn_tile_coord_y());
34 // reserved blocks for sblocktbl
35 printf("(%x,%x) ++++ reserved sblocks ++++ \n", udn_tile_coord_x(),
37 for(i=BAMBOO_BASE_VA; (unsinged int)i<(unsigned int)gcbaseva; i+= 4*16) {
38 printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
39 udn_tile_coord_x(), udn_tile_coord_y(),
40 *((int *)(i)), *((int *)(i + 4)),
41 *((int *)(i + 4*2)), *((int *)(i + 4*3)),
42 *((int *)(i + 4*4)), *((int *)(i + 4*5)),
43 *((int *)(i + 4*6)), *((int *)(i + 4*7)),
44 *((int *)(i + 4*8)), *((int *)(i + 4*9)),
45 *((int *)(i + 4*10)), *((int *)(i + 4*11)),
46 *((int *)(i + 4*12)), *((int *)(i + 4*13)),
47 *((int *)(i + 4*14)), *((int *)(i + 4*15)));
49 sblock = gcreservedsb;
50 bool advanceblock = false;
52 for(i=gcbaseva; (unsigned int)i<(unsigned int)(gcbaseva+BAMBOO_SHARED_MEM_SIZE); i+=4*16) {
54 // computing sblock # and block #, core coordinate (x,y) also
55 if(j%((BAMBOO_SMEM_SIZE)/(4*16)) == 0) {
57 if(j < ((BAMBOO_LARGE_SMEM_BOUND)/(4*16))) {
58 if((j > 0) && (j%((BAMBOO_SMEM_SIZE_L)/(4*16)) == 0)) {
70 coren = gc_block2core[block%(NUMCORES4GC*2)];
72 // compute core coordinate
73 x = BAMBOO_COORDS_X(coren);
74 y = BAMBOO_COORDS_Y(coren);
75 printf("(%x,%x) ==== %d, %d : core (%d,%d), saddr %x====\n",
76 udn_tile_coord_x(), udn_tile_coord_y(),block, sblock++, x, y,
77 (sblock-1)*(BAMBOO_SMEM_SIZE)+gcbaseva);
80 printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
81 udn_tile_coord_x(), udn_tile_coord_y(),
82 *((int *)(i)), *((int *)(i + 4)),
83 *((int *)(i + 4*2)), *((int *)(i + 4*3)),
84 *((int *)(i + 4*4)), *((int *)(i + 4*5)),
85 *((int *)(i + 4*6)), *((int *)(i + 4*7)),
86 *((int *)(i + 4*8)), *((int *)(i + 4*9)),
87 *((int *)(i + 4*10)), *((int *)(i + 4*11)),
88 *((int *)(i + 4*12)), *((int *)(i + 4*13)),
89 *((int *)(i + 4*14)), *((int *)(i + 4*15)));
91 printf("(%x,%x) \n", udn_tile_coord_x(), udn_tile_coord_y());
95 void initmulticoregcdata() {
96 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
97 // startup core to initialize corestatus[]
98 for(int i = 0; i < NUMCORESACTIVE; i++) {
100 gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
101 gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
103 for(int i = 0; i < NUMCORES4GC; i++) {
105 gcrequiredmems[i] = 0;
107 gcfilledblocks[i] = 0;
111 bamboo_smem_zero_top = NULL;
113 gc_status_info.gcprocessing = false;
114 gc_status_info.gcphase = FINISHPHASE;
118 gcself_numsendobjs = 0;
119 gcself_numreceiveobjs = 0;
120 gcmarkedptrbound = 0;
121 gcforwardobjtbl = allocateMGCHash_I(20, 3);
131 gcmem_mixed_threshold=(unsigned int)((BAMBOO_SHARED_MEM_SIZE-bamboo_reserved_smem*BAMBOO_SMEM_SIZE)*0.8);
132 gcmem_mixed_usedmem = 0;
135 gc_profile_flag = false;
137 gc_localheap_s = false;
138 #ifdef GC_CACHE_ADAPT
139 gccachestage = false;
142 INIT_MULTICORE_GCPROFILE_DATA();
145 void dismulticoregcdata() {
146 freeMGCHash(gcforwardobjtbl);
150 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
151 for(int i = 0; i < NUMCORES4GC; i++) {
153 gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
154 gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
156 gcrequiredmems[i] = 0;
157 gcfilledblocks[i] = 0;
160 for(int i = NUMCORES4GC; i < NUMCORESACTIVE; i++) {
162 gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
163 gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
168 gcnumsrobjs_index = 0;
170 gcself_numsendobjs = 0;
171 gcself_numreceiveobjs = 0;
172 gcmarkedptrbound = 0;
183 freeMGCHash(gcforwardobjtbl);
184 gcforwardobjtbl = allocateMGCHash(20, 3);
187 gc_output_cache_policy_time=0;
190 bool gc_checkAllCoreStatus() {
191 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
192 for(int i = 0; i < NUMCORESACTIVE; i++) {
193 if(gccorestatus[i] != 0) {
194 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
198 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
202 // NOTE: should be invoked with interrupts turned off
203 bool gc_checkAllCoreStatus_I() {
204 for(int i = 0; i < NUMCORESACTIVE; i++) {
205 if(gccorestatus[i] != 0) {
212 INLINE void checkMarkStatus_p2() {
213 // check if the sum of send objs and receive obj are the same
214 // yes->check if the info is the latest; no->go on executing
215 unsigned int sumsendobj = 0;
216 for(int i = 0; i < NUMCORESACTIVE; i++) {
217 sumsendobj += gcnumsendobjs[gcnumsrobjs_index][i];
219 for(int i = 0; i < NUMCORESACTIVE; i++) {
220 sumsendobj -= gcnumreceiveobjs[gcnumsrobjs_index][i];
222 if(0 == sumsendobj) {
223 // Check if there are changes of the numsendobjs or numreceiveobjs
226 for(i = 0; i < NUMCORESACTIVE; i++) {
227 if((gcnumsendobjs[0][i]!=gcnumsendobjs[1][i])||(gcnumreceiveobjs[0][i]!=gcnumreceiveobjs[1][i]) ) {
231 if(i == NUMCORESACTIVE) {
232 // all the core status info are the latest,stop mark phase
233 gc_status_info.gcphase = COMPACTPHASE;
234 // restore the gcstatus for all cores
235 for(int i = 0; i < NUMCORESACTIVE; i++) {
239 // There were changes between phase 1 and phase 2, can not decide
240 // whether the mark phase has been finished
242 // As it fails in phase 2, flip the entries
243 gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
246 // There were changes between phase 1 and phase 2, can not decide
247 // whether the mark phase has been finished
249 // As it fails in phase 2, flip the entries
250 gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
254 INLINE void checkMarkStatus() {
255 if((!waitconfirm)||(waitconfirm && (numconfirm == 0))) {
256 unsigned int entry_index = 0;
259 entry_index = (gcnumsrobjs_index == 0) ? 1 : 0;
262 entry_index = gcnumsrobjs_index;
264 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
265 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
266 gcnumsendobjs[entry_index][BAMBOO_NUM_OF_CORE] = gcself_numsendobjs;
267 gcnumreceiveobjs[entry_index][BAMBOO_NUM_OF_CORE] = gcself_numreceiveobjs;
268 // check the status of all cores
269 if (gc_checkAllCoreStatus_I()) {
272 // the first time found all cores stall
273 // send out status confirm msg to all other cores
274 // reset the corestatus array too
276 numconfirm = NUMCORESACTIVE - 1;
277 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
278 GC_SEND_MSG_1_TO_CLIENT(GCMARKCONFIRM);
281 checkMarkStatus_p2();
282 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
285 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
290 // compute load balance for all cores
291 INLINE int loadbalance(unsigned int * heaptop) {
292 // compute load balance
293 // get the total loads
294 unsigned int tloads = gcloads[STARTUPCORE];
295 for(int i = 1; i < NUMCORES4GC; i++) {
296 tloads += gcloads[i];
298 *heaptop = gcbaseva + tloads;
301 BLOCKINDEX(*heaptop, &b);
302 // num of blocks per core
303 unsigned int numbpc = (unsigned int)b/(unsigned int)(NUMCORES4GC);
305 RESIDECORE(heaptop, &gctopcore);
309 // compute total mem size required and sort the lobjs in ascending order
310 INLINE unsigned int sortLObjs() {
311 unsigned int tmp_lobj = 0;
312 unsigned int tmp_len = 0;
313 unsigned int tmp_host = 0;
314 unsigned int sumsize = 0;
316 gclobjtail2 = gclobjtail;
317 gclobjtailindex2 = gclobjtailindex;
318 // TODO USE QUICK SORT INSTEAD?
319 while(gc_lobjmoreItems2_I()) {
321 tmp_lobj = gclobjtail2->lobjs[gclobjtailindex2-1];
322 tmp_host = gclobjtail2->hosts[gclobjtailindex2-1];
323 tmp_len = gclobjtail2->lengths[gclobjtailindex2 - 1];
325 GCPROFILE_RECORD_LOBJ();
326 unsigned int i = gclobjtailindex2-1;
327 struct lobjpointerblock * tmp_block = gclobjtail2;
328 // find the place to insert
331 if(tmp_block->prev == NULL) {
334 if(tmp_block->prev->lobjs[NUMLOBJPTRS-1] > tmp_lobj) {
335 tmp_block->lobjs[i] = tmp_block->prev->lobjs[NUMLOBJPTRS-1];
336 tmp_block->lengths[i] = tmp_block->prev->lengths[NUMLOBJPTRS-1];
337 tmp_block->hosts[i] = tmp_block->prev->hosts[NUMLOBJPTRS-1];
338 tmp_block = tmp_block->prev;
342 } // if(tmp_block->prev->lobjs[NUMLOBJPTRS-1] < tmp_lobj)
344 if(tmp_block->lobjs[i-1] > tmp_lobj) {
345 tmp_block->lobjs[i] = tmp_block->lobjs[i-1];
346 tmp_block->lengths[i] = tmp_block->lengths[i-1];
347 tmp_block->hosts[i] = tmp_block->hosts[i-1];
355 if(i != gclobjtailindex2 - 1) {
356 tmp_block->lobjs[i] = tmp_lobj;
357 tmp_block->lengths[i] = tmp_len;
358 tmp_block->hosts[i] = tmp_host;
364 INLINE bool cacheLObjs() {
365 // check the total mem size need for large objs
366 unsigned long long sumsize = 0;
367 unsigned int size = 0;
369 sumsize = sortLObjs();
371 GCPROFILE_RECORD_LOBJSPACE();
373 // check if there are enough space to cache these large objs
374 unsigned int dst = gcbaseva + (BAMBOO_SHARED_MEM_SIZE) -sumsize;
375 if((unsigned long long)gcheaptop > (unsigned long long)dst) {
376 // do not have enough room to cache large objs
380 gcheaptop = dst; // Note: record the start of cached lobjs with gcheaptop
381 // cache the largeObjs to the top of the shared heap
382 dst = gcbaseva + (BAMBOO_SHARED_MEM_SIZE);
383 while(gc_lobjmoreItems3_I()) {
385 size = gclobjtail2->lengths[gclobjtailindex2];
386 // set the mark field to , indicating that this obj has been moved
387 // and need to be flushed
388 ((struct ___Object___ *)(gclobjtail2->lobjs[gclobjtailindex2]))->marked=COMPACTED;
389 BAMBOO_CACHE_FLUSH_LINE(gclobjtail2->lobjs[gclobjtailindex2]);
391 if((unsigned int)dst<(unsigned int)(gclobjtail2->lobjs[gclobjtailindex2]+size)) {
392 memmove(dst, gclobjtail2->lobjs[gclobjtailindex2], size);
394 memcpy(dst, gclobjtail2->lobjs[gclobjtailindex2], size);
400 // update the bmmboo_smemtbl to record current shared mem usage
401 void updateSmemTbl(unsigned int coren, unsigned int localtop) {
402 unsigned int ltopcore = 0;
403 unsigned int bound = BAMBOO_SMEM_SIZE_L;
404 BLOCKINDEX(localtop, <opcore);
405 if((unsigned int)localtop>=(unsigned int)(gcbaseva+BAMBOO_LARGE_SMEM_BOUND)){
406 bound = BAMBOO_SMEM_SIZE;
408 unsigned int load = (unsigned int)(localtop-gcbaseva)%(unsigned int)bound;
409 unsigned int toset = 0;
410 for(int j=0; 1; j++) {
411 for(int i=0; i<2; i++) {
412 toset = gc_core2block[2*coren+i]+(unsigned int)(NUMCORES4GC*2)*j;
413 if(toset < ltopcore) {
414 bamboo_smemtbl[toset]=BLOCKSIZE(toset<NUMCORES4GC);
416 gcmem_mixed_usedmem += bamboo_smemtbl[toset];
418 } else if(toset == ltopcore) {
419 bamboo_smemtbl[toset] = load;
421 gcmem_mixed_usedmem += bamboo_smemtbl[toset];
431 INLINE unsigned int checkCurrHeapTop() {
432 // update the smemtbl
433 BAMBOO_MEMSET_WH(bamboo_smemtbl, 0, sizeof(int)*gcnumblock);
434 // flush all gcloads to indicate the real heap top on one core
435 // previous it represents the next available ptr on a core
436 if(((unsigned int)gcloads[0]>(unsigned int)(gcbaseva+BAMBOO_SMEM_SIZE_L))&&(((unsigned int)gcloads[0]%(BAMBOO_SMEM_SIZE)) == 0)) {
437 // edge of a block, check if this is exactly the heaptop
438 BASEPTR(0, gcfilledblocks[0]-1, &(gcloads[0]));
439 gcloads[0]+=BLOCKSIZE(gcfilledblocks[0]<=1);
441 updateSmemTbl(0, gcloads[0]);
442 for(int i = 1; i < NUMCORES4GC; i++) {
443 unsigned int tmptop = 0;
444 if((gcfilledblocks[i] > 0)&&(((unsigned int)gcloads[i]%(BAMBOO_SMEM_SIZE)) == 0)) {
445 // edge of a block, check if this is exactly the heaptop
446 BASEPTR(i, gcfilledblocks[i]-1, &gcloads[i]);
447 gcloads[i]+=BLOCKSIZE(gcfilledblocks[i]<=1);
450 updateSmemTbl(i, gcloads[i]);
453 // find current heap top
455 // a bug here: when using local allocation, directly move large objects
456 // to the highest free chunk might not be memory efficient
457 unsigned int tmpheaptop = 0;
458 for(int i = gcnumblock-1; i >= 0; i--) {
459 if(bamboo_smemtbl[i] > 0) {
460 return gcbaseva+bamboo_smemtbl[i]+OFFSET2BASEVA(i);
466 INLINE void movelobj(unsigned int tmpheaptop,unsigned int ptr,int size,int isize) {
467 // move the large obj
468 if((unsigned int)gcheaptop < (unsigned int)(tmpheaptop+size)) {
469 memmove(tmpheaptop, gcheaptop, size);
471 memcpy(tmpheaptop, gcheaptop, size);
473 // fill the remaining space with -2 padding
474 BAMBOO_MEMSET_WH(tmpheaptop+size, -2, isize-size);
476 // cache the mapping info
477 gcmappingtbl[OBJMAPPINGINDEX((unsigned int)ptr)]=(unsigned int)tmpheaptop;
481 INLINE void moveLObjs() {
483 // update the gcmem_mixed_usedmem
484 gcmem_mixed_usedmem = 0;
486 unsigned int size = 0;
487 unsigned int bound = 0;
488 unsigned int tmpheaptop = checkCurrHeapTop();
490 // move large objs from gcheaptop to tmpheaptop
491 // write the header first
492 unsigned int tomove = gcbaseva+(BAMBOO_SHARED_MEM_SIZE)-gcheaptop;
494 gcmem_mixed_usedmem += tomove;
496 // flush the sbstartbl
497 BAMBOO_MEMSET_WH(&(gcsbstarttbl[gcreservedsb]),'\0',(BAMBOO_SHARED_MEM_SIZE/BAMBOO_SMEM_SIZE-(unsigned int)gcreservedsb)*sizeof(unsigned int));
499 gcheaptop = tmpheaptop;
501 // check how many blocks it acrosses
502 unsigned int remain = tmpheaptop-gcbaseva;
503 //number of the sblock
504 unsigned int sb = remain/BAMBOO_SMEM_SIZE+(unsigned int)gcreservedsb;
505 unsigned int b = 0; // number of the block
506 BLOCKINDEX(tmpheaptop, &b);
507 // check the remaining space in this block
508 bound = (BAMBOO_SMEM_SIZE);
509 if(remain < (BAMBOO_LARGE_SMEM_BOUND)) {
510 bound = (BAMBOO_SMEM_SIZE_L);
512 remain = bound - remain%bound;
515 unsigned int isize = 0;
516 unsigned int host = 0;
517 unsigned int ptr = 0;
518 unsigned int base = tmpheaptop;
519 unsigned int cpysize = 0;
520 remain -= BAMBOO_CACHE_LINE_SIZE;
521 tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
522 gc_lobjqueueinit4_I();
523 while(gc_lobjmoreItems4_I()) {
524 ptr = (unsigned int)(gc_lobjdequeue4_I(&size, &host));
525 ALIGNSIZE(size, &isize);
526 if(remain >= isize) {
528 // move the large obj
529 movelobj(tmpheaptop,ptr,size,isize);
531 // update bamboo_smemtbl
532 bamboo_smemtbl[b] += isize;
534 // this object acrosses blocks
536 CLOSEBLOCK(base, cpysize+BAMBOO_CACHE_LINE_SIZE);
537 bamboo_smemtbl[b] += BAMBOO_CACHE_LINE_SIZE;
541 remain = BLOCKSIZE((tmpheaptop-gcbaseva)<(BAMBOO_LARGE_SMEM_BOUND));
543 remain -= BAMBOO_CACHE_LINE_SIZE;
544 tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
545 BLOCKINDEX(tmpheaptop, &b);
546 sb = (unsigned int)(tmpheaptop-gcbaseva)/(BAMBOO_SMEM_SIZE)+gcreservedsb;
549 movelobj(tmpheaptop,ptr,size,isize);
551 // set the gcsbstarttbl and bamboo_smemtbl
552 unsigned int tmpsbs=1+(unsigned int)(isize-remain-1)/BAMBOO_SMEM_SIZE;
553 for(int k = 1; k < tmpsbs; k++) {
554 gcsbstarttbl[sb+k] = -1;
557 bound = BLOCKSIZE(b<NUMCORES4GC);
558 BLOCKINDEX(tmpheaptop-1, &tmpsbs);
559 for(; b < tmpsbs; b++) {
560 bamboo_smemtbl[b] = bound;
561 if(b==NUMCORES4GC-1) {
562 bound = BAMBOO_SMEM_SIZE;
565 if(((unsigned int)(isize-remain)%(BAMBOO_SMEM_SIZE)) == 0) {
566 gcsbstarttbl[sb] = -1;
567 remain = BLOCKSIZE((tmpheaptop-gcbaseva)<(BAMBOO_LARGE_SMEM_BOUND));
568 bamboo_smemtbl[b] = bound;
570 gcsbstarttbl[sb] = (int)tmpheaptop;
571 remain = tmpheaptop-gcbaseva;
572 bamboo_smemtbl[b] = remain%bound;
573 remain = bound - bamboo_smemtbl[b];
576 CLOSEBLOCK(base, isize+BAMBOO_CACHE_LINE_SIZE);
579 if(remain == BAMBOO_CACHE_LINE_SIZE) {
580 // fill with 0 in case
581 BAMBOO_MEMSET_WH(tmpheaptop, '\0', remain);
583 remain -= BAMBOO_CACHE_LINE_SIZE;
584 tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
589 CLOSEBLOCK(base, cpysize+BAMBOO_CACHE_LINE_SIZE);
590 bamboo_smemtbl[b] += BAMBOO_CACHE_LINE_SIZE;
592 tmpheaptop -= BAMBOO_CACHE_LINE_SIZE;
594 gcheaptop = tmpheaptop;
597 bamboo_free_block = 0;
598 unsigned int tbound = 0;
600 tbound=BLOCKSIZE(bamboo_free_block<NUMCORES4GC);
601 if(bamboo_smemtbl[bamboo_free_block] == tbound) {
604 // the first non-full partition
609 GCPROFILE_RECORD_SPACE();
612 void gc_collect(struct garbagelist * stackptr) {
613 gc_status_info.gcprocessing = true;
614 // inform the master that this core is at a gc safe point and is ready to
616 send_msg_4(STARTUPCORE,GCFINISHPRE,BAMBOO_NUM_OF_CORE,self_numsendobjs,self_numreceiveobjs);
618 // core collector routine
619 //wait for init phase
620 WAITFORGCPHASE(INITPHASE);
622 GC_PRINTF("Do initGC\n");
625 //send init finish msg to core coordinator
626 send_msg_2(STARTUPCORE,GCFINISHINIT,BAMBOO_NUM_OF_CORE);
628 //wait for mark phase
629 WAITFORGCPHASE(MARKPHASE);
631 GC_PRINTF("Start mark phase\n");
632 mark(true, stackptr);
633 GC_PRINTF("Finish mark phase, start compact phase\n");
635 GC_PRINTF("Finish compact phase\n");
637 WAITFORGCPHASE(FLUSHPHASE);
639 GC_PRINTF("Start flush phase\n");
640 GCPROFILE_INFO_2_MASTER();
642 GC_PRINTF("Finish flush phase\n");
644 CACHEADAPT_PHASE_CLIENT();
646 // invalidate all shared mem pointers
647 bamboo_cur_msp = NULL;
648 bamboo_smem_size = 0;
649 bamboo_smem_zero_top = NULL;
652 WAITFORGCPHASE(FINISHPHASE);
654 GC_PRINTF("Finish gc! \n");
657 void gc_nocollect(struct garbagelist * stackptr) {
658 gc_status_info.gcprocessing = true;
659 // inform the master that this core is at a gc safe point and is ready to
661 send_msg_4(STARTUPCORE,GCFINISHPRE,BAMBOO_NUM_OF_CORE,self_numsendobjs,self_numreceiveobjs);
663 WAITFORGCPHASE(INITPHASE);
665 GC_PRINTF("Do initGC\n");
668 //send init finish msg to core coordinator
669 send_msg_2(STARTUPCORE,GCFINISHINIT,BAMBOO_NUM_OF_CORE);
671 WAITFORGCPHASE(MARKPHASE);
673 GC_PRINTF("Start mark phase\n");
674 mark(true, stackptr);
675 GC_PRINTF("Finish mark phase, wait for flush\n");
677 // non-gc core collector routine
678 WAITFORGCPHASE(FLUSHPHASE);
680 GC_PRINTF("Start flush phase\n");
681 GCPROFILE_INFO_2_MASTER();
683 GC_PRINTF("Finish flush phase\n");
685 CACHEADAPT_PHASE_CLIENT();
687 // invalidate all shared mem pointers
688 bamboo_cur_msp = NULL;
689 bamboo_smem_size = 0;
690 bamboo_smem_zero_top = NULL;
693 WAITFORGCPHASE(FINISHPHASE);
695 GC_PRINTF("Finish gc! \n");
698 void master_mark(struct garbagelist *stackptr) {
701 GC_PRINTF("Start mark phase \n");
702 GC_SEND_MSG_1_TO_CLIENT(GCSTART);
703 gc_status_info.gcphase = MARKPHASE;
706 while(MARKPHASE == gc_status_info.gcphase) {
707 mark(isfirst, stackptr);
714 void master_getlargeobjs() {
715 // send msgs to all cores requiring large objs info
716 // Note: only need to ask gc cores, non-gc cores do not host any objs
717 numconfirm = NUMCORES4GC - 1;
718 for(int i = 1; i < NUMCORES4GC; i++) {
719 send_msg_1(i,GCLOBJREQUEST);
721 gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
722 //spin until we have all responses
723 while(numconfirm!=0) ;
726 if(gcheaptop < gcmarkedptrbound) {
727 gcheaptop = gcmarkedptrbound;
730 GC_PRINTF("prepare to cache large objs \n");
732 // cache all large objs
733 BAMBOO_ASSERTMSG(cacheLObjs(), "Not enough space to cache large objects\n");
736 void master_compact() {
737 // predict number of blocks to fill for each core
738 unsigned int tmpheaptop = 0;
739 int numpbc = loadbalance(&tmpheaptop);
740 //tprintf("numpbc: %d \n", numpbc);
742 numpbc = (BAMBOO_SHARED_MEM_SIZE)/(BAMBOO_SMEM_SIZE);
743 GC_PRINTF("mark phase finished \n");
745 tmpheaptop = gcbaseva + (BAMBOO_SHARED_MEM_SIZE);
746 for(int i = 0; i < NUMCORES4GC; i++) {
747 unsigned int tmpcoreptr = 0;
748 BASEPTR(i, numpbc, &tmpcoreptr);
749 // init some data strutures for compact phase
751 gcfilledblocks[i] = 0;
752 gcrequiredmems[i] = 0;
754 //send start compact messages to all cores
755 //TODO bug here, do not know if the direction is positive or negtive?
756 //if (tmpcoreptr < tmpheaptop) {
757 gcstopblock[i] = numpbc+1;
758 if(i != STARTUPCORE) {
759 send_msg_2(i, GCSTARTCOMPACT, numpbc+1);
761 gcblock2fill = numpbc+1;
764 gcstopblock[i] = numpbc;
765 if(i != STARTUPCORE) {
766 send_msg_2(i, GCSTARTCOMPACT, numpbc);
768 gcblock2fill = numpbc;
775 struct moveHelper * orig = (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
776 struct moveHelper * to = (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
777 compact_master(orig, to);
779 GC_PRINTF("prepare to move large objs \n");
782 GC_PRINTF("compact phase finished \n");
787 void master_updaterefs(struct garbagelist * stackptr) {
788 gc_status_info.gcphase = FLUSHPHASE;
789 GC_SEND_MSG_1_TO_CLIENT(GCSTARTFLUSH);
791 GC_PRINTF("Start flush phase \n");
794 GC_CHECK_ALL_CORE_STATUS(FLUSHPHASE==gc_status_info.gcphase);
795 GC_PRINTF("Finish flush phase \n");
798 void master_finish() {
799 gc_status_info.gcphase = FINISHPHASE;
801 // invalidate all shared mem pointers
802 // put it here as it takes time to inform all the other cores to
803 // finish gc and it might cause problem when some core resumes
804 // mutator earlier than the other cores
805 bamboo_cur_msp = NULL;
806 bamboo_smem_size = 0;
807 bamboo_smem_zero_top = NULL;
810 unsigned long long tmpt = BAMBOO_GET_EXE_TIME();
811 CACHEADAPT_OUTPUT_CACHE_POLICY();
812 gc_output_cache_policy_time += (BAMBOO_GET_EXE_TIME()-tmpt);
814 GC_SEND_MSG_1_TO_CLIENT(GCFINISH);
816 gc_status_info.gcprocessing = false;
818 // inform other cores to stop and wait for gc
820 for(int i = 0; i < NUMCORESACTIVE; i++) {
821 // reuse the gcnumsendobjs & gcnumreceiveobjs
822 gcnumsendobjs[0][i] = 0;
823 gcnumreceiveobjs[0][i] = 0;
825 GC_SEND_MSG_1_TO_CLIENT(GCSTARTPRE);
829 void gc_master(struct garbagelist * stackptr) {
830 tprintf("start GC !!!!!!!!!!!!! \n");
831 gc_status_info.gcprocessing = true;
832 gc_status_info.gcphase = INITPHASE;
837 GC_SEND_MSG_1_TO_CLIENT(GCSTARTINIT);
839 GC_PRINTF("Check core status \n");
840 GC_CHECK_ALL_CORE_STATUS(true);
842 unsigned long long tmpt = BAMBOO_GET_EXE_TIME();
843 CACHEADAPT_OUTPUT_CACHE_SAMPLING();
844 gc_output_cache_policy_time += (BAMBOO_GET_EXE_TIME()-tmpt);
847 master_mark(stackptr);
849 // get large objects from all cores
850 master_getlargeobjs();
855 // update the references
856 master_updaterefs(stackptr);
858 // do cache adaptation
859 CACHEADAPT_PHASE_MASTER();
861 // do finish up stuff
864 GC_PRINTF("gc finished \n");
865 tprintf("finish GC ! %d \n",gcflag);
870 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
871 gcnumsendobjs[0][BAMBOO_NUM_OF_CORE] = self_numsendobjs;
872 gcnumreceiveobjs[0][BAMBOO_NUM_OF_CORE] = self_numreceiveobjs;
874 for(int i = 0; i < NUMCORESACTIVE; i++) {
875 sumsendobj += gcnumsendobjs[0][i];
877 for(int i = 0; i < NUMCORESACTIVE; i++) {
878 sumsendobj -= gcnumreceiveobjs[0][i];
880 if(0 != sumsendobj) {
881 // there were still some msgs on the fly, wait until there
882 // are some update pregc information coming and check it again
884 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
888 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
894 void pregcprocessing() {
895 #if defined(GC_CACHE_ADAPT)&&defined(GC_CACHE_SAMPLING)
896 // disable the timer interrupt
897 bamboo_mask_timer_intr();
899 // Zero out the remaining memory here because for the GC_CACHE_ADAPT version,
900 // we need to make sure during the gcinit phase the shared heap is not
901 // touched. Otherwise, there would be problem when adapt the cache strategy.
902 BAMBOO_CLOSE_CUR_MSP();
903 #if defined(GC_CACHE_ADAPT)&&defined(GC_CACHE_SAMPLING)
904 // get the sampling data
905 bamboo_output_dtlb_sampling();
909 void postgcprocessing() {
910 #if defined(GC_CACHE_ADAPT)&&defined(GC_CACHE_SAMPLING)
911 // enable the timer interrupt
912 bamboo_tile_timer_set_next_event(GC_TILE_TIMER_EVENT_SETTING);
913 bamboo_unmask_timer_intr();
917 bool gc(struct garbagelist * stackptr) {
920 gc_status_info.gcprocessing = false;
924 // core coordinator routine
925 if(0 == BAMBOO_NUM_OF_CORE) {
926 GC_PRINTF("Check if we can do gc or not\n");
927 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
928 if(!gc_checkAllCoreStatus()) {
929 // some of the cores are still executing the mutator and did not reach
930 // some gc safe point, therefore it is not ready to do gc
937 GC_PRINTF("start gc! \n");
940 } else if(BAMBOO_NUM_OF_CORE < NUMCORES4GC) {
942 gc_collect(stackptr);
945 gc_nocollect(stackptr);