1 // BAMBOO_EXIT(0xb000);
2 // TODO: DO NOT support tag!!!
5 #include "multicoregarbage.h"
6 #include "multicoregcmark.h"
7 #include "multicoregccompact.h"
8 #include "multicoregcflush.h"
9 #include "multicoreruntime.h"
10 #include "multicoregcprofile.h"
12 struct pointerblock *gchead=NULL;
14 struct pointerblock *gctail=NULL;
16 struct pointerblock *gctail2=NULL;
18 struct pointerblock *gcspare=NULL;
20 struct lobjpointerblock *gclobjhead=NULL;
21 int gclobjheadindex=0;
22 struct lobjpointerblock *gclobjtail=NULL;
23 int gclobjtailindex=0;
24 struct lobjpointerblock *gclobjtail2=NULL;
25 int gclobjtailindex2=0;
26 struct lobjpointerblock *gclobjspare=NULL;
30 extern unsigned int gcmem_mixed_threshold;
31 extern unsigned int gcmem_mixed_usedmem;
33 #endif // MULTICORE_GC
36 // dump whole mem in blocks
37 INLINE void dumpSMem() {
45 printf("(%x,%x) Dump shared mem: \n",udn_tile_coord_x(),udn_tile_coord_y());
46 // reserved blocks for sblocktbl
47 printf("(%x,%x) ++++ reserved sblocks ++++ \n", udn_tile_coord_x(),
49 for(i=BAMBOO_BASE_VA; (unsinged int)i<(unsigned int)gcbaseva; i+= 4*16) {
50 printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
51 udn_tile_coord_x(), udn_tile_coord_y(),
52 *((int *)(i)), *((int *)(i + 4)),
53 *((int *)(i + 4*2)), *((int *)(i + 4*3)),
54 *((int *)(i + 4*4)), *((int *)(i + 4*5)),
55 *((int *)(i + 4*6)), *((int *)(i + 4*7)),
56 *((int *)(i + 4*8)), *((int *)(i + 4*9)),
57 *((int *)(i + 4*10)), *((int *)(i + 4*11)),
58 *((int *)(i + 4*12)), *((int *)(i + 4*13)),
59 *((int *)(i + 4*14)), *((int *)(i + 4*15)));
61 sblock = gcreservedsb;
62 bool advanceblock = false;
65 (unsigned int)i<(unsigned int)(gcbaseva+BAMBOO_SHARED_MEM_SIZE);
68 // computing sblock # and block #, core coordinate (x,y) also
69 if(j%((BAMBOO_SMEM_SIZE)/(4*16)) == 0) {
71 if(j < ((BAMBOO_LARGE_SMEM_BOUND)/(4*16))) {
72 if((j > 0) && (j%((BAMBOO_SMEM_SIZE_L)/(4*16)) == 0)) {
84 coren = gc_block2core[block%(NUMCORES4GC*2)];
86 // compute core coordinate
87 x = BAMBOO_COORDS_X(coren);
88 y = BAMBOO_COORDS_Y(coren);
89 printf("(%x,%x) ==== %d, %d : core (%d,%d), saddr %x====\n",
90 udn_tile_coord_x(), udn_tile_coord_y(),
91 block, sblock++, x, y,
92 (sblock-1)*(BAMBOO_SMEM_SIZE)+gcbaseva);
95 printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
96 udn_tile_coord_x(), udn_tile_coord_y(),
97 *((int *)(i)), *((int *)(i + 4)),
98 *((int *)(i + 4*2)), *((int *)(i + 4*3)),
99 *((int *)(i + 4*4)), *((int *)(i + 4*5)),
100 *((int *)(i + 4*6)), *((int *)(i + 4*7)),
101 *((int *)(i + 4*8)), *((int *)(i + 4*9)),
102 *((int *)(i + 4*10)), *((int *)(i + 4*11)),
103 *((int *)(i + 4*12)), *((int *)(i + 4*13)),
104 *((int *)(i + 4*14)), *((int *)(i + 4*15)));
106 printf("(%x,%x) \n", udn_tile_coord_x(), udn_tile_coord_y());
110 INLINE void initmulticoregcdata() {
112 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
113 // startup core to initialize corestatus[]
114 for(i = 0; i < NUMCORESACTIVE; ++i) {
116 gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
117 gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
119 for(i = 0; i < NUMCORES4GC; ++i) {
121 gcrequiredmems[i] = 0;
123 gcfilledblocks[i] = 0;
127 bamboo_smem_zero_top = NULL;
129 gcprocessing = false;
130 gcphase = FINISHPHASE;
133 gcself_numsendobjs = 0;
134 gcself_numreceiveobjs = 0;
135 gcmarkedptrbound = 0;
136 gcforwardobjtbl = allocateMGCHash_I(20, 3);
146 gcmem_mixed_threshold = (unsigned int)((BAMBOO_SHARED_MEM_SIZE
147 -bamboo_reserved_smem*BAMBOO_SMEM_SIZE)*0.8);
148 gcmem_mixed_usedmem = 0;
151 gc_profile_flag = false;
154 gc_num_flush_dtlb = 0;
156 gc_localheap_s = false;
157 #ifdef GC_CACHE_ADAPT
158 gccachestage = false;
161 INIT_MULTICORE_GCPROFILE_DATA();
164 INLINE void dismulticoregcdata() {
165 freeMGCHash(gcforwardobjtbl);
168 INLINE void initGC() {
170 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
171 for(i = 0; i < NUMCORES4GC; ++i) {
173 gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
174 gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
176 gcrequiredmems[i] = 0;
177 gcfilledblocks[i] = 0;
180 for(i = NUMCORES4GC; i < NUMCORESACTIVE; ++i) {
182 gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
183 gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
188 gcnumsrobjs_index = 0;
190 gcself_numsendobjs = 0;
191 gcself_numreceiveobjs = 0;
192 gcmarkedptrbound = 0;
203 gcheadindex=gctailindex=gctailindex2 = 0;
204 gchead=gctail=gctail2=RUNMALLOC(sizeof(struct pointerblock));
206 gctailindex = gctailindex2 = gcheadindex = 0;
207 gctail = gctail2 = gchead;
210 // initialize the large obj queues
211 if (gclobjhead==NULL) {
214 gclobjtailindex2 = 0;
215 gclobjhead=gclobjtail=gclobjtail2=
216 RUNMALLOC(sizeof(struct lobjpointerblock));
218 gclobjtailindex = gclobjtailindex2 = gclobjheadindex = 0;
219 gclobjtail = gclobjtail2 = gclobjhead;
221 gclobjhead->next = gclobjhead->prev = NULL;
223 freeMGCHash(gcforwardobjtbl);
224 gcforwardobjtbl = allocateMGCHash(20, 3);
229 INLINE bool gc_checkAllCoreStatus_I() {
231 for(i = 0; i < NUMCORESACTIVE; ++i) {
232 if(gccorestatus[i] != 0) {
236 return (i == NUMCORESACTIVE);
239 INLINE void checkMarkStatue() {
242 (waitconfirm && (numconfirm == 0))) {
243 unsigned int entry_index = 0;
246 entry_index = (gcnumsrobjs_index == 0) ? 1 : 0;
249 entry_index = gcnumsrobjs_index;
251 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
252 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
253 gcnumsendobjs[entry_index][BAMBOO_NUM_OF_CORE] = gcself_numsendobjs;
254 gcnumreceiveobjs[entry_index][BAMBOO_NUM_OF_CORE] = gcself_numreceiveobjs;
255 // check the status of all cores
256 bool allStall = gc_checkAllCoreStatus_I();
260 // the first time found all cores stall
261 // send out status confirm msg to all other cores
262 // reset the corestatus array too
263 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
265 numconfirm = NUMCORESACTIVE - 1;
266 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
267 for(i = 1; i < NUMCORESACTIVE; ++i) {
269 // send mark phase finish confirm request msg to core i
270 send_msg_1(i, GCMARKCONFIRM, false);
274 // check if the sum of send objs and receive obj are the same
275 // yes->check if the info is the latest; no->go on executing
276 unsigned int sumsendobj = 0;
277 for(i = 0; i < NUMCORESACTIVE; ++i) {
278 sumsendobj += gcnumsendobjs[gcnumsrobjs_index][i];
280 for(i = 0; i < NUMCORESACTIVE; ++i) {
281 sumsendobj -= gcnumreceiveobjs[gcnumsrobjs_index][i];
283 if(0 == sumsendobj) {
284 // Check if there are changes of the numsendobjs or numreceiveobjs on
286 bool ischanged = false;
287 for(i = 0; i < NUMCORESACTIVE; ++i) {
288 if((gcnumsendobjs[0][i] != gcnumsendobjs[1][i]) ||
289 (gcnumreceiveobjs[0][i] != gcnumreceiveobjs[1][i]) ) {
295 // all the core status info are the latest,stop mark phase
296 gcphase = COMPACTPHASE;
297 // restore the gcstatus for all cores
298 for(i = 0; i < NUMCORESACTIVE; ++i) {
302 // There were changes between phase 1 and phase 2, can not decide
303 // whether the mark phase has been finished
305 // As it fails in phase 2, flip the entries
306 gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
309 // There were changes between phase 1 and phase 2, can not decide
310 // whether the mark phase has been finished
312 // As it fails in phase 2, flip the entries
313 gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
315 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
318 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
323 // compute load balance for all cores
324 INLINE int loadbalance(unsigned int * heaptop) {
325 // compute load balance
328 // get the total loads
329 unsigned int tloads = gcloads[STARTUPCORE];
330 for(i = 1; i < NUMCORES4GC; i++) {
331 tloads += gcloads[i];
333 *heaptop = gcbaseva + tloads;
336 BLOCKINDEX(*heaptop, &b);
337 // num of blocks per core
338 unsigned int numbpc = (unsigned int)b/(unsigned int)(NUMCORES4GC);
340 RESIDECORE(heaptop, &gctopcore);
344 // compute total mem size required and sort the lobjs in ascending order
345 INLINE unsigned int sortLObjs() {
346 unsigned int tmp_lobj = 0;
347 unsigned int tmp_len = 0;
348 unsigned int tmp_host = 0;
349 unsigned int sumsize = 0;
351 gclobjtail2 = gclobjtail;
352 gclobjtailindex2 = gclobjtailindex;
353 // TODO USE QUICK SORT INSTEAD?
354 while(gc_lobjmoreItems2_I()) {
356 tmp_lobj = gclobjtail2->lobjs[gclobjtailindex2-1];
357 tmp_host = gclobjtail2->hosts[gclobjtailindex2-1];
358 tmp_len = gclobjtail2->lengths[gclobjtailindex2 - 1];
360 GCPROFILE_RECORD_LOBJ();
361 unsigned int i = gclobjtailindex2-1;
362 struct lobjpointerblock * tmp_block = gclobjtail2;
363 // find the place to insert
366 if(tmp_block->prev == NULL) {
369 if(tmp_block->prev->lobjs[NUMLOBJPTRS-1] > tmp_lobj) {
370 tmp_block->lobjs[i] = tmp_block->prev->lobjs[NUMLOBJPTRS-1];
371 tmp_block->lengths[i] = tmp_block->prev->lengths[NUMLOBJPTRS-1];
372 tmp_block->hosts[i] = tmp_block->prev->hosts[NUMLOBJPTRS-1];
373 tmp_block = tmp_block->prev;
377 } // if(tmp_block->prev->lobjs[NUMLOBJPTRS-1] < tmp_lobj)
379 if(tmp_block->lobjs[i-1] > tmp_lobj) {
380 tmp_block->lobjs[i] = tmp_block->lobjs[i-1];
381 tmp_block->lengths[i] = tmp_block->lengths[i-1];
382 tmp_block->hosts[i] = tmp_block->hosts[i-1];
390 if(i != gclobjtailindex2 - 1) {
391 tmp_block->lobjs[i] = tmp_lobj;
392 tmp_block->lengths[i] = tmp_len;
393 tmp_block->hosts[i] = tmp_host;
399 INLINE bool cacheLObjs() {
400 // check the total mem size need for large objs
401 unsigned long long sumsize = 0;
402 unsigned int size = 0;
404 sumsize = sortLObjs();
406 GCPROFILE_RECORD_LOBJSPACE();
408 // check if there are enough space to cache these large objs
409 unsigned int dst = gcbaseva + (BAMBOO_SHARED_MEM_SIZE) -sumsize;
410 if((unsigned long long)gcheaptop > (unsigned long long)dst) {
411 // do not have enough room to cache large objs
415 gcheaptop = dst; // Note: record the start of cached lobjs with gcheaptop
416 // cache the largeObjs to the top of the shared heap
417 dst = gcbaseva + (BAMBOO_SHARED_MEM_SIZE);
418 while(gc_lobjmoreItems3_I()) {
420 size = gclobjtail2->lengths[gclobjtailindex2];
421 // set the mark field to , indicating that this obj has been moved
422 // and need to be flushed
423 ((struct ___Object___ *)(gclobjtail2->lobjs[gclobjtailindex2]))->marked =
426 if((unsigned int)dst <
427 (unsigned int)(gclobjtail2->lobjs[gclobjtailindex2]+size)) {
428 memmove(dst, gclobjtail2->lobjs[gclobjtailindex2], size);
430 memcpy(dst, gclobjtail2->lobjs[gclobjtailindex2], size);
436 // update the bmmboo_smemtbl to record current shared mem usage
437 void updateSmemTbl(unsigned int coren,
438 unsigned int localtop) {
439 unsigned int ltopcore = 0;
440 unsigned int bound = BAMBOO_SMEM_SIZE_L;
441 BLOCKINDEX(localtop, <opcore);
442 if((unsigned int)localtop>=(unsigned int)(gcbaseva+BAMBOO_LARGE_SMEM_BOUND)){
443 bound = BAMBOO_SMEM_SIZE;
445 unsigned int load = (unsigned int)(localtop-gcbaseva)%(unsigned int)bound;
448 unsigned int toset = 0;
450 toset = gc_core2block[2*coren+i]+(unsigned int)(NUMCORES4GC*2)*j;
451 if(toset < ltopcore) {
452 bamboo_smemtbl[toset]=
453 (toset<NUMCORES4GC) ? BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
455 gcmem_mixed_usedmem += bamboo_smemtbl[toset];
457 } else if(toset == ltopcore) {
458 bamboo_smemtbl[toset] = load;
460 gcmem_mixed_usedmem += bamboo_smemtbl[toset];
474 INLINE unsigned int checkCurrHeapTop() {
475 // update the smemtbl
476 BAMBOO_MEMSET_WH(bamboo_smemtbl, 0, sizeof(int)*gcnumblock);
477 // flush all gcloads to indicate the real heap top on one core
478 // previous it represents the next available ptr on a core
479 if(((unsigned int)gcloads[0] > (unsigned int)(gcbaseva+BAMBOO_SMEM_SIZE_L))
480 && (((unsigned int)gcloads[0]%(BAMBOO_SMEM_SIZE)) == 0)) {
481 // edge of a block, check if this is exactly the heaptop
482 BASEPTR(0, gcfilledblocks[0]-1, &(gcloads[0]));
483 gcloads[0]+=(gcfilledblocks[0]>1?(BAMBOO_SMEM_SIZE):(BAMBOO_SMEM_SIZE_L));
485 updateSmemTbl(0, gcloads[0]);
486 for(int i = 1; i < NUMCORES4GC; i++) {
487 unsigned int tmptop = 0;
488 if((gcfilledblocks[i] > 0)
489 && (((unsigned int)gcloads[i] % (BAMBOO_SMEM_SIZE)) == 0)) {
490 // edge of a block, check if this is exactly the heaptop
491 BASEPTR(i, gcfilledblocks[i]-1, &gcloads[i]);
493 (gcfilledblocks[i]>1?(BAMBOO_SMEM_SIZE):(BAMBOO_SMEM_SIZE_L));
496 updateSmemTbl(i, gcloads[i]);
499 // find current heap top
501 // a bug here: when using local allocation, directly move large objects
502 // to the highest free chunk might not be memory efficient
503 unsigned int tmpheaptop = 0;
505 for(i = gcnumblock-1; i >= 0; i--) {
506 if(bamboo_smemtbl[i] > 0) {
511 tmpheaptop = gcbaseva;
513 tmpheaptop = gcbaseva+bamboo_smemtbl[i]+((i<NUMCORES4GC) ?
514 (BAMBOO_SMEM_SIZE_L*i) :
515 (BAMBOO_SMEM_SIZE*(i-NUMCORES4GC)+BAMBOO_LARGE_SMEM_BOUND));
520 INLINE void moveLObjs() {
522 // update the gcmem_mixed_usedmem
523 gcmem_mixed_usedmem = 0;
525 unsigned int size = 0;
526 unsigned int bound = 0;
527 unsigned int tmpheaptop = checkCurrHeapTop();
529 // move large objs from gcheaptop to tmpheaptop
530 // write the header first
531 unsigned int tomove = gcbaseva+(BAMBOO_SHARED_MEM_SIZE)-gcheaptop;
533 gcmem_mixed_usedmem += tomove;
535 // flush the sbstartbl
536 BAMBOO_MEMSET_WH(&(gcsbstarttbl[gcreservedsb]), '\0',
537 (BAMBOO_SHARED_MEM_SIZE/BAMBOO_SMEM_SIZE-(unsigned int)gcreservedsb)
538 *sizeof(unsigned int));
540 gcheaptop = tmpheaptop;
542 // check how many blocks it acrosses
543 unsigned int remain = tmpheaptop-gcbaseva;
544 //number of the sblock
545 unsigned int sb = remain/BAMBOO_SMEM_SIZE+(unsigned int)gcreservedsb;
546 unsigned int b = 0; // number of the block
547 BLOCKINDEX(tmpheaptop, &b);
548 // check the remaining space in this block
549 bound = (BAMBOO_SMEM_SIZE);
550 if(remain < (BAMBOO_LARGE_SMEM_BOUND)) {
551 bound = (BAMBOO_SMEM_SIZE_L);
553 remain = bound - remain%bound;
556 unsigned int isize = 0;
557 unsigned int host = 0;
558 unsigned int ptr = 0;
559 unsigned int base = tmpheaptop;
560 unsigned int cpysize = 0;
561 remain -= BAMBOO_CACHE_LINE_SIZE;
562 tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
563 gc_lobjqueueinit4_I();
564 while(gc_lobjmoreItems4_I()) {
565 ptr = (unsigned int)(gc_lobjdequeue4_I(&size, &host));
566 ALIGNSIZE(size, &isize);
567 if(remain >= isize) {
569 // move the large obj
570 if((unsigned int)gcheaptop < (unsigned int)(tmpheaptop+size)) {
571 memmove(tmpheaptop, gcheaptop, size);
573 memcpy(tmpheaptop, gcheaptop, size);
575 // fill the remaining space with -2 padding
576 BAMBOO_MEMSET_WH(tmpheaptop+size, -2, isize-size);
580 // cache the mapping info
581 gcmappingtbl[OBJMAPPINGINDEX((unsigned int)ptr)]=(unsigned int)tmpheaptop;
584 // update bamboo_smemtbl
585 bamboo_smemtbl[b] += isize;
587 // this object acrosses blocks
589 CLOSEBLOCK(base, cpysize+BAMBOO_CACHE_LINE_SIZE);
590 bamboo_smemtbl[b] += BAMBOO_CACHE_LINE_SIZE;
594 remain = ((tmpheaptop-gcbaseva)<(BAMBOO_LARGE_SMEM_BOUND)) ?
595 BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
597 remain -= BAMBOO_CACHE_LINE_SIZE;
598 tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
599 BLOCKINDEX(tmpheaptop, &b);
600 sb = (unsigned int)(tmpheaptop-gcbaseva)/(BAMBOO_SMEM_SIZE)+gcreservedsb;
603 // move the large obj
604 if((unsigned int)gcheaptop < (unsigned int)(tmpheaptop+size)) {
605 memmove(tmpheaptop, gcheaptop, size);
607 memcpy(tmpheaptop, gcheaptop, size);
609 // fill the remaining space with -2 padding
610 BAMBOO_MEMSET_WH(tmpheaptop+size, -2, isize-size);
612 // cache the mapping info
613 gcmappingtbl[OBJMAPPINGINDEX((unsigned int)ptr)]=(unsigned int)tmpheaptop;
616 // set the gcsbstarttbl and bamboo_smemtbl
617 unsigned int tmpsbs=1+(unsigned int)(isize-remain-1)/BAMBOO_SMEM_SIZE;
618 for(int k = 1; k < tmpsbs; k++) {
619 gcsbstarttbl[sb+k] = -1;
622 bound = (b<NUMCORES4GC) ? BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
623 BLOCKINDEX(tmpheaptop-1, &tmpsbs);
624 for(; b < tmpsbs; b++) {
625 bamboo_smemtbl[b] = bound;
626 if(b==NUMCORES4GC-1) {
627 bound = BAMBOO_SMEM_SIZE;
630 if(((unsigned int)(isize-remain)%(BAMBOO_SMEM_SIZE)) == 0) {
631 gcsbstarttbl[sb] = -1;
632 remain = ((tmpheaptop-gcbaseva)<(BAMBOO_LARGE_SMEM_BOUND)) ?
633 BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
634 bamboo_smemtbl[b] = bound;
636 gcsbstarttbl[sb] = (int)tmpheaptop;
637 remain = tmpheaptop-gcbaseva;
638 bamboo_smemtbl[b] = remain%bound;
639 remain = bound - bamboo_smemtbl[b];
642 CLOSEBLOCK(base, isize+BAMBOO_CACHE_LINE_SIZE);
645 if(remain == BAMBOO_CACHE_LINE_SIZE) {
646 // fill with 0 in case
647 BAMBOO_MEMSET_WH(tmpheaptop, '\0', remain);
649 remain -= BAMBOO_CACHE_LINE_SIZE;
650 tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
655 CLOSEBLOCK(base, cpysize+BAMBOO_CACHE_LINE_SIZE);
656 bamboo_smemtbl[b] += BAMBOO_CACHE_LINE_SIZE;
658 tmpheaptop -= BAMBOO_CACHE_LINE_SIZE;
660 gcheaptop = tmpheaptop;
663 bamboo_free_block = 0;
664 unsigned int tbound = 0;
666 tbound=(bamboo_free_block<NUMCORES4GC)?BAMBOO_SMEM_SIZE_L:BAMBOO_SMEM_SIZE;
667 if(bamboo_smemtbl[bamboo_free_block] == tbound) {
670 // the first non-full partition
675 GCPROFILE_RECORD_SPACE();
678 INLINE void gc_collect(struct garbagelist * stackptr) {
681 // inform the master that this core is at a gc safe point and is ready to
683 send_msg_4(STARTUPCORE, GCFINISHPRE, BAMBOO_NUM_OF_CORE, self_numsendobjs,
684 self_numreceiveobjs, false);
686 // core collector routine
688 if(INITPHASE == gcphase) {
692 GC_PRINTF("Do initGC\n");
695 //send init finish msg to core coordinator
696 send_msg_2(STARTUPCORE, GCFINISHINIT, BAMBOO_NUM_OF_CORE, false);
699 if(MARKPHASE == gcphase) {
703 GC_PRINTF("Start mark phase\n");
704 mark(true, stackptr);
705 GC_PRINTF("Finish mark phase, start compact phase\n");
707 GC_PRINTF("Finish compact phase\n");
710 if(FLUSHPHASE == gcphase) {
714 GC_PRINTF("Start flush phase\n");
715 GCPROFILE_INFO_2_MASTER();
717 GC_PRINTF("Finish flush phase\n");
719 CACHEADAPT_PHASE_CLIENT();
721 // invalidate all shared mem pointers
722 bamboo_cur_msp = NULL;
723 bamboo_smem_size = 0;
724 bamboo_smem_zero_top = NULL;
728 if(FINISHPHASE == gcphase) {
733 GC_PRINTF("Finish gc! \n");
736 INLINE void gc_nocollect(struct garbagelist * stackptr) {
739 // inform the master that this core is at a gc safe point and is ready to
741 send_msg_4(STARTUPCORE, GCFINISHPRE, BAMBOO_NUM_OF_CORE, self_numsendobjs,
742 self_numreceiveobjs, false);
745 if(INITPHASE == gcphase) {
749 GC_PRINTF("Do initGC\n");
752 //send init finish msg to core coordinator
753 send_msg_2(STARTUPCORE, GCFINISHINIT, BAMBOO_NUM_OF_CORE, false);
756 if(MARKPHASE == gcphase) {
760 GC_PRINTF("Start mark phase\n");
761 mark(true, stackptr);
762 GC_PRINTF("Finish mark phase, wait for flush\n");
764 // non-gc core collector routine
766 if(FLUSHPHASE == gcphase) {
770 GC_PRINTF("Start flush phase\n");
771 GCPROFILE_INFO_2_MASTER();
773 GC_PRINTF("Finish flush phase\n");
775 CACHEADAPT_PHASE_CLIENT();
777 // invalidate all shared mem pointers
778 bamboo_cur_msp = NULL;
779 bamboo_smem_size = 0;
780 bamboo_smem_zero_top = NULL;
784 if(FINISHPHASE == gcphase) {
788 GC_PRINTF("Finish gc! \n");
791 INLINE void gc_master(struct garbagelist * stackptr) {
793 tprintf("start GC !!!!!!!!!!!!! \n");
800 GC_SEND_MSG_1_TO_CLIENT(GCSTARTINIT);
802 GC_PRINTF("Check core status \n");
803 GC_CHECK_ALL_CORE_STATUS(true);
805 CACHEADAPT_OUTPUT_CACHE_SAMPLING();
807 GC_PRINTF("(%x,%x) Start mark phase \n");
808 GC_SEND_MSG_1_TO_CLIENT(GCSTART);
812 while(MARKPHASE == gcphase) {
813 mark(isfirst, stackptr);
822 // send msgs to all cores requiring large objs info
823 // Note: only need to ask gc cores, non-gc cores do not host any objs
824 numconfirm = NUMCORES4GC - 1;
825 for(i = 1; i < NUMCORES4GC; ++i) {
826 send_msg_1(i, GCLOBJREQUEST, false);
828 gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
833 } // wait for responses
835 if(gcheaptop < gcmarkedptrbound) {
836 gcheaptop = gcmarkedptrbound;
839 GC_PRINTF("prepare to cache large objs \n");
840 // cache all large objs
842 // no enough space to cache large objs
845 // predict number of blocks to fill for each core
846 unsigned int tmpheaptop = 0;
847 int numpbc = loadbalance(&tmpheaptop);
849 numpbc = (BAMBOO_SHARED_MEM_SIZE)/(BAMBOO_SMEM_SIZE);
850 GC_PRINTF("mark phase finished \n");
853 //BASEPTR(gctopcore, 0, &tmptopptr);
855 //tmptopptr = gcbaseva + (BAMBOO_SHARED_MEM_SIZE);
856 tmpheaptop = gcbaseva + (BAMBOO_SHARED_MEM_SIZE);
857 for(i = 0; i < NUMCORES4GC; ++i) {
858 unsigned int tmpcoreptr = 0;
859 BASEPTR(i, numpbc, &tmpcoreptr);
860 // init some data strutures for compact phase
862 gcfilledblocks[i] = 0;
863 gcrequiredmems[i] = 0;
865 //send start compact messages to all cores
866 //TODO bug here, do not know if the direction is positive or negtive?
867 if (tmpcoreptr < tmpheaptop) {
868 gcstopblock[i] = numpbc + 1;
869 if(i != STARTUPCORE) {
870 send_msg_2(i, GCSTARTCOMPACT, numpbc+1, false);
872 gcblock2fill = numpbc+1;
875 gcstopblock[i] = numpbc;
876 if(i != STARTUPCORE) {
877 send_msg_2(i, GCSTARTCOMPACT, numpbc, false);
879 gcblock2fill = numpbc;
886 struct moveHelper * orig =
887 (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
888 struct moveHelper * to =
889 (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
890 compact_master(orig, to);
892 GC_PRINTF("prepare to move large objs \n");
895 GC_PRINTF("compact phase finished \n");
900 gcphase = FLUSHPHASE;
901 GC_SEND_MSG_1_TO_CLIENT(GCSTARTFLUSH);
903 GC_PRINTF("Start flush phase \n");
906 // now the master core need to decide the new cache strategy
908 GC_CHECK_ALL_CORE_STATUS(FLUSHPHASE==gcphase);
909 GC_PRINTF("Finish flush phase \n");
911 CACHEADAPT_PHASE_MASTER();
913 gcphase = FINISHPHASE;
915 // invalidate all shared mem pointers
916 // put it here as it takes time to inform all the other cores to
917 // finish gc and it might cause problem when some core resumes
918 // mutator earlier than the other cores
919 bamboo_cur_msp = NULL;
920 bamboo_smem_size = 0;
921 bamboo_smem_zero_top = NULL;
925 GC_SEND_MSG_1_TO_CLIENT(GCFINISH);
927 gcprocessing = false;
929 // inform other cores to stop and wait for gc
931 for(int i = 0; i < NUMCORESACTIVE; i++) {
932 // reuse the gcnumsendobjs & gcnumreceiveobjs
933 gcnumsendobjs[0][i] = 0;
934 gcnumreceiveobjs[0][i] = 0;
936 GC_SEND_MSG_1_TO_CLIENT(GCSTARTPRE);
938 GC_PRINTF("gc finished \n");
939 tprintf("finish GC ! %d \n", gcflag);
942 INLINE void pregccheck_I() {
944 gcnumsendobjs[0][BAMBOO_NUM_OF_CORE] = self_numsendobjs;
945 gcnumreceiveobjs[0][BAMBOO_NUM_OF_CORE] = self_numreceiveobjs;
948 for(i = 0; i < NUMCORESACTIVE; ++i) {
949 sumsendobj += gcnumsendobjs[0][i];
951 for(i = 0; i < NUMCORESACTIVE; ++i) {
952 sumsendobj -= gcnumreceiveobjs[0][i];
954 if(0 != sumsendobj) {
955 // there were still some msgs on the fly, wait until there
956 // are some update pregc information coming and check it again
958 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
964 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
971 INLINE void pregcprocessing() {
972 #ifdef GC_CACHE_ADAPT
973 #ifdef GC_CACHE_SAMPLING
974 // disable the timer interrupt
975 bamboo_mask_timer_intr();
978 // Zero out the remaining memory here because for the GC_CACHE_ADAPT version,
979 // we need to make sure during the gcinit phase the shared heap is not
980 // touched. Otherwise, there would be problem when adapt the cache strategy.
981 BAMBOO_CLOSE_CUR_MSP();
983 if(gc_num_flush_dtlb < GC_NUM_FLUSH_DTLB) {
988 #ifdef GC_CACHE_ADAPT
989 #ifdef GC_CACHE_SAMPLING
990 // get the sampling data
991 bamboo_output_dtlb_sampling();
996 INLINE void postgcprocessing() {
997 #ifdef GC_CACHE_ADAPT
998 #ifdef GC_CACHE_SAMPLING
999 // enable the timer interrupt
1000 bamboo_tile_timer_set_next_event(GC_TILE_TIMER_EVENT_SETTING);
1001 bamboo_unmask_timer_intr();
1006 INLINE bool gc(struct garbagelist * stackptr) {
1009 gcprocessing = false;
1013 // core coordinator routine
1014 if(0 == BAMBOO_NUM_OF_CORE) {
1015 GC_PRINTF("Check if we can do gc or not\n");
1016 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
1017 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1018 if(!gc_checkAllCoreStatus_I()) {
1019 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1020 // some of the cores are still executing the mutator and did not reach
1021 // some gc safe point, therefore it is not ready to do gc
1027 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1029 GC_PRINTF("start gc! \n");
1031 gc_master(stackptr);
1032 } else if(BAMBOO_NUM_OF_CORE < NUMCORES4GC) {
1034 gc_collect(stackptr);
1037 gc_nocollect(stackptr);