1 // TODO: DO NOT support tag!!!
4 #include "multicoreruntime.h"
5 #include "multicoregarbage.h"
6 #include "multicoregcmark.h"
8 #include "multicoregccompact.h"
9 #include "multicoregcflush.h"
10 #include "multicoregcprofile.h"
14 extern unsigned int gcmem_mixed_threshold;
15 extern unsigned int gcmem_mixed_usedmem;
19 gc_status_t gc_status_info;
21 unsigned long long gc_output_cache_policy_time=0;
24 // dump whole mem in blocks
33 printf("(%x,%x) Dump shared mem: \n",udn_tile_coord_x(),udn_tile_coord_y());
34 // reserved blocks for sblocktbl
35 printf("(%x,%x) ++++ reserved sblocks ++++ \n", udn_tile_coord_x(),
37 for(i=BAMBOO_BASE_VA; (unsinged int)i<(unsigned int)gcbaseva; i+= 4*16) {
38 printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
39 udn_tile_coord_x(), udn_tile_coord_y(),
40 *((int *)(i)), *((int *)(i + 4)),
41 *((int *)(i + 4*2)), *((int *)(i + 4*3)),
42 *((int *)(i + 4*4)), *((int *)(i + 4*5)),
43 *((int *)(i + 4*6)), *((int *)(i + 4*7)),
44 *((int *)(i + 4*8)), *((int *)(i + 4*9)),
45 *((int *)(i + 4*10)), *((int *)(i + 4*11)),
46 *((int *)(i + 4*12)), *((int *)(i + 4*13)),
47 *((int *)(i + 4*14)), *((int *)(i + 4*15)));
50 bool advanceblock = false;
52 for(i=gcbaseva; (unsigned int)i<(unsigned int)(gcbaseva+BAMBOO_SHARED_MEM_SIZE); i+=4*16) {
54 // computing sblock # and block #, core coordinate (x,y) also
55 if(j%((BAMBOO_SMEM_SIZE)/(4*16)) == 0) {
57 if(j < ((BAMBOO_LARGE_SMEM_BOUND)/(4*16))) {
58 if((j > 0) && (j%((BAMBOO_SMEM_SIZE_L)/(4*16)) == 0)) {
70 coren = gc_block2core[block%(NUMCORES4GC*2)];
72 // compute core coordinate
73 x = BAMBOO_COORDS_X(coren);
74 y = BAMBOO_COORDS_Y(coren);
75 printf("(%x,%x) ==== %d, %d : core (%d,%d), saddr %x====\n",
76 udn_tile_coord_x(), udn_tile_coord_y(),block, sblock++, x, y,
77 (sblock-1)*(BAMBOO_SMEM_SIZE)+gcbaseva);
80 printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
81 udn_tile_coord_x(), udn_tile_coord_y(),
82 *((int *)(i)), *((int *)(i + 4)),
83 *((int *)(i + 4*2)), *((int *)(i + 4*3)),
84 *((int *)(i + 4*4)), *((int *)(i + 4*5)),
85 *((int *)(i + 4*6)), *((int *)(i + 4*7)),
86 *((int *)(i + 4*8)), *((int *)(i + 4*9)),
87 *((int *)(i + 4*10)), *((int *)(i + 4*11)),
88 *((int *)(i + 4*12)), *((int *)(i + 4*13)),
89 *((int *)(i + 4*14)), *((int *)(i + 4*15)));
91 printf("(%x,%x) \n", udn_tile_coord_x(), udn_tile_coord_y());
95 void initmulticoregcdata() {
96 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
97 // startup core to initialize corestatus[]
98 for(int i = 0; i < NUMCORESACTIVE; i++) {
100 gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
101 gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
103 for(int i = 0; i < NUMCORES4GC; i++) {
105 gcrequiredmems[i] = 0;
107 gcfilledblocks[i] = 0;
111 bamboo_smem_zero_top = NULL;
113 gc_status_info.gcprocessing = false;
114 gc_status_info.gcphase = FINISHPHASE;
118 gcself_numsendobjs = 0;
119 gcself_numreceiveobjs = 0;
120 gcmarkedptrbound = 0;
121 gcforwardobjtbl = allocateMGCHash_I(128);
131 gcmem_mixed_threshold=(unsigned int)((BAMBOO_SHARED_MEM_SIZE-bamboo_reserved_smem*BAMBOO_SMEM_SIZE)*0.8);
132 gcmem_mixed_usedmem = 0;
135 gc_profile_flag = false;
137 gc_localheap_s = false;
138 #ifdef GC_CACHE_ADAPT
139 gccachestage = false;
142 INIT_MULTICORE_GCPROFILE_DATA();
145 void dismulticoregcdata() {
146 freeMGCHash(gcforwardobjtbl);
150 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
151 for(int i = 0; i < NUMCORES4GC; i++) {
153 gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
154 gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
156 gcrequiredmems[i] = 0;
157 gcfilledblocks[i] = 0;
160 for(int i = NUMCORES4GC; i < NUMCORESACTIVE; i++) {
162 gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
163 gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
168 gcnumsrobjs_index = 0;
170 gcself_numsendobjs = 0;
171 gcself_numreceiveobjs = 0;
172 gcmarkedptrbound = 0;
182 MGCHashreset(gcforwardobjtbl);
185 gc_output_cache_policy_time=0;
188 bool gc_checkAllCoreStatus() {
189 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
190 for(int i = 0; i < NUMCORESACTIVE; i++) {
191 if(gccorestatus[i] != 0) {
192 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
196 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
200 // NOTE: should be invoked with interrupts turned off
201 bool gc_checkAllCoreStatus_I() {
202 for(int i = 0; i < NUMCORESACTIVE; i++) {
203 if(gccorestatus[i] != 0) {
210 void checkMarkStatus_p2() {
211 // check if the sum of send objs and receive obj are the same
212 // yes->check if the info is the latest; no->go on executing
213 unsigned int sumsendobj = 0;
214 for(int i = 0; i < NUMCORESACTIVE; i++) {
215 sumsendobj += gcnumsendobjs[gcnumsrobjs_index][i];
217 for(int i = 0; i < NUMCORESACTIVE; i++) {
218 sumsendobj -= gcnumreceiveobjs[gcnumsrobjs_index][i];
220 if(0 == sumsendobj) {
221 // Check if there are changes of the numsendobjs or numreceiveobjs
224 for(i = 0; i < NUMCORESACTIVE; i++) {
225 if((gcnumsendobjs[0][i]!=gcnumsendobjs[1][i])||(gcnumreceiveobjs[0][i]!=gcnumreceiveobjs[1][i]) ) {
229 if(i == NUMCORESACTIVE) {
230 // all the core status info are the latest,stop mark phase
231 gc_status_info.gcphase = COMPACTPHASE;
232 // restore the gcstatus for all cores
233 for(int i = 0; i < NUMCORESACTIVE; i++) {
237 // There were changes between phase 1 and phase 2, can not decide
238 // whether the mark phase has been finished
240 // As it fails in phase 2, flip the entries
241 gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
244 // There were changes between phase 1 and phase 2, can not decide
245 // whether the mark phase has been finished
247 // As it fails in phase 2, flip the entries
248 gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
252 void checkMarkStatus() {
253 if((!waitconfirm)||(waitconfirm && (numconfirm == 0))) {
254 unsigned int entry_index = 0;
257 entry_index = (gcnumsrobjs_index == 0) ? 1 : 0;
260 entry_index = gcnumsrobjs_index;
262 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
263 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
264 gcnumsendobjs[entry_index][BAMBOO_NUM_OF_CORE] = gcself_numsendobjs;
265 gcnumreceiveobjs[entry_index][BAMBOO_NUM_OF_CORE] = gcself_numreceiveobjs;
266 // check the status of all cores
267 if (gc_checkAllCoreStatus_I()) {
270 // the first time found all cores stall
271 // send out status confirm msg to all other cores
272 // reset the corestatus array too
274 numconfirm = NUMCORESACTIVE - 1;
275 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
276 GC_SEND_MSG_1_TO_CLIENT(GCMARKCONFIRM);
279 checkMarkStatus_p2();
280 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
283 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
288 // compute load balance for all cores
289 int loadbalance(void ** heaptop, unsigned int * topblock, unsigned int * topcore) {
290 // compute load balance
291 // get the total loads
292 unsigned int tloads = 0;
293 for(int i = 0; i < NUMCORES4GC; i++) {
294 tloads += gcloads[i];
296 *heaptop = gcbaseva + tloads;
298 unsigned int topblockindex;
300 BLOCKINDEX(topblockindex, *heaptop);
301 // num of blocks per core
302 unsigned int numbpc = (topblockindex+NUMCORES4GC-1)/NUMCORES4GC;
304 *topblock = topblockindex;
305 RESIDECORE(*heaptop, *topcore);
309 // compute total mem size required and sort the lobjs in ascending order
310 unsigned int sortLObjs() {
311 unsigned int tmp_lobj = 0;
312 unsigned int tmp_len = 0;
313 unsigned int tmp_host = 0;
314 unsigned int sumsize = 0;
316 gclobjtail2 = gclobjtail;
317 gclobjtailindex2 = gclobjtailindex;
318 // TODO USE QUICK SORT INSTEAD?
319 while(gc_lobjmoreItems2_I()) {
321 tmp_lobj = gclobjtail2->lobjs[gclobjtailindex2-1];
322 tmp_host = gclobjtail2->hosts[gclobjtailindex2-1];
323 tmp_len = gclobjtail2->lengths[gclobjtailindex2 - 1];
325 GCPROFILE_RECORD_LOBJ();
326 unsigned int i = gclobjtailindex2-1;
327 struct lobjpointerblock * tmp_block = gclobjtail2;
328 // find the place to insert
331 if(tmp_block->prev == NULL) {
334 if(tmp_block->prev->lobjs[NUMLOBJPTRS-1] > tmp_lobj) {
335 tmp_block->lobjs[i] = tmp_block->prev->lobjs[NUMLOBJPTRS-1];
336 tmp_block->lengths[i] = tmp_block->prev->lengths[NUMLOBJPTRS-1];
337 tmp_block->hosts[i] = tmp_block->prev->hosts[NUMLOBJPTRS-1];
338 tmp_block = tmp_block->prev;
342 } // if(tmp_block->prev->lobjs[NUMLOBJPTRS-1] < tmp_lobj)
344 if(tmp_block->lobjs[i-1] > tmp_lobj) {
345 tmp_block->lobjs[i] = tmp_block->lobjs[i-1];
346 tmp_block->lengths[i] = tmp_block->lengths[i-1];
347 tmp_block->hosts[i] = tmp_block->hosts[i-1];
355 if(i != gclobjtailindex2 - 1) {
356 tmp_block->lobjs[i] = tmp_lobj;
357 tmp_block->lengths[i] = tmp_len;
358 tmp_block->hosts[i] = tmp_host;
365 // check the total mem size need for large objs
366 unsigned long long sumsize = 0;
367 unsigned int size = 0;
369 sumsize = sortLObjs();
371 GCPROFILE_RECORD_LOBJSPACE();
373 // check if there are enough space to cache these large objs
374 unsigned int dst = gcbaseva + (BAMBOO_SHARED_MEM_SIZE) -sumsize;
375 if((unsigned long long)gcheaptop > (unsigned long long)dst) {
376 // do not have enough room to cache large objs
380 gcheaptop = dst; // Note: record the start of cached lobjs with gcheaptop
381 // cache the largeObjs to the top of the shared heap
382 dst = gcbaseva + (BAMBOO_SHARED_MEM_SIZE);
383 while(gc_lobjmoreItems3_I()) {
385 size = gclobjtail2->lengths[gclobjtailindex2];
386 // set the mark field to , indicating that this obj has been moved
387 // and need to be flushed
389 if((unsigned int)dst<(unsigned int)(gclobjtail2->lobjs[gclobjtailindex2]+size)) {
390 memmove(dst, gclobjtail2->lobjs[gclobjtailindex2], size);
392 memcpy(dst, gclobjtail2->lobjs[gclobjtailindex2], size);
398 // update the bmmboo_smemtbl to record current shared mem usage
399 void updateSmemTbl(unsigned int coren, void * localtop) {
400 unsigned int ltopcore = 0;
401 unsigned int bound = BAMBOO_SMEM_SIZE_L;
402 BLOCKINDEX(ltopcore, localtop);
403 if((unsigned int)localtop>=(unsigned int)(gcbaseva+BAMBOO_LARGE_SMEM_BOUND)){
404 bound = BAMBOO_SMEM_SIZE;
406 unsigned int load = (unsigned INTPTR)(localtop-gcbaseva)%(unsigned int)bound;
407 unsigned int toset = 0;
408 for(int j=0; 1; j++) {
409 for(int i=0; i<2; i++) {
410 toset = gc_core2block[2*coren+i]+(unsigned int)(NUMCORES4GC*2)*j;
411 if(toset < ltopcore) {
412 bamboo_smemtbl[toset]=BLOCKSIZE(toset<NUMCORES4GC);
414 gcmem_mixed_usedmem += bamboo_smemtbl[toset];
416 } else if(toset == ltopcore) {
417 bamboo_smemtbl[toset] = load;
419 gcmem_mixed_usedmem += bamboo_smemtbl[toset];
429 void gc_collect(struct garbagelist * stackptr) {
430 gc_status_info.gcprocessing = true;
431 // inform the master that this core is at a gc safe point and is ready to
433 send_msg_4(STARTUPCORE,GCFINISHPRE,BAMBOO_NUM_OF_CORE,self_numsendobjs,self_numreceiveobjs);
435 // core collector routine
436 //wait for init phase
437 WAITFORGCPHASE(INITPHASE);
439 GC_PRINTF("Do initGC\n");
442 //send init finish msg to core coordinator
443 send_msg_2(STARTUPCORE,GCFINISHINIT,BAMBOO_NUM_OF_CORE);
445 //wait for mark phase
446 WAITFORGCPHASE(MARKPHASE);
448 GC_PRINTF("Start mark phase\n");
449 mark(true, stackptr);
450 GC_PRINTF("Finish mark phase, start compact phase\n");
452 GC_PRINTF("Finish compact phase\n");
454 WAITFORGCPHASE(FLUSHPHASE);
456 GC_PRINTF("Start flush phase\n");
457 GCPROFILE_INFO_2_MASTER();
459 GC_PRINTF("Finish flush phase\n");
461 CACHEADAPT_PHASE_CLIENT();
463 // invalidate all shared mem pointers
464 bamboo_cur_msp = NULL;
465 bamboo_smem_size = 0;
466 bamboo_smem_zero_top = NULL;
469 WAITFORGCPHASE(FINISHPHASE);
471 GC_PRINTF("Finish gc! \n");
474 void gc_nocollect(struct garbagelist * stackptr) {
475 gc_status_info.gcprocessing = true;
476 // inform the master that this core is at a gc safe point and is ready to
478 send_msg_4(STARTUPCORE,GCFINISHPRE,BAMBOO_NUM_OF_CORE,self_numsendobjs,self_numreceiveobjs);
480 WAITFORGCPHASE(INITPHASE);
482 GC_PRINTF("Do initGC\n");
485 //send init finish msg to core coordinator
486 send_msg_2(STARTUPCORE,GCFINISHINIT,BAMBOO_NUM_OF_CORE);
488 WAITFORGCPHASE(MARKPHASE);
490 GC_PRINTF("Start mark phase\n");
491 mark(true, stackptr);
492 GC_PRINTF("Finish mark phase, wait for flush\n");
494 // non-gc core collector routine
495 WAITFORGCPHASE(FLUSHPHASE);
497 GC_PRINTF("Start flush phase\n");
498 GCPROFILE_INFO_2_MASTER();
500 GC_PRINTF("Finish flush phase\n");
502 CACHEADAPT_PHASE_CLIENT();
504 // invalidate all shared mem pointers
505 bamboo_cur_msp = NULL;
506 bamboo_smem_size = 0;
507 bamboo_smem_zero_top = NULL;
510 WAITFORGCPHASE(FINISHPHASE);
512 GC_PRINTF("Finish gc! \n");
515 void master_mark(struct garbagelist *stackptr) {
518 GC_PRINTF("Start mark phase \n");
519 GC_SEND_MSG_1_TO_CLIENT(GCSTART);
520 gc_status_info.gcphase = MARKPHASE;
523 while(MARKPHASE == gc_status_info.gcphase) {
524 mark(isfirst, stackptr);
531 void master_getlargeobjs() {
532 // send msgs to all cores requiring large objs info
533 // Note: only need to ask gc cores, non-gc cores do not host any objs
534 numconfirm = NUMCORES4GC - 1;
535 for(int i = 1; i < NUMCORES4GC; i++) {
536 send_msg_1(i,GCLOBJREQUEST);
538 gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
539 //spin until we have all responses
540 while(numconfirm!=0) ;
543 if(gcheaptop < gcmarkedptrbound) {
544 gcheaptop = gcmarkedptrbound;
547 GC_PRINTF("prepare to cache large objs \n");
549 // cache all large objs
550 BAMBOO_ASSERTMSG(cacheLObjs(), "Not enough space to cache large objects\n");
554 void master_updaterefs(struct garbagelist * stackptr) {
555 gc_status_info.gcphase = FLUSHPHASE;
556 GC_SEND_MSG_1_TO_CLIENT(GCSTARTFLUSH);
558 GC_PRINTF("Start flush phase \n");
561 GC_CHECK_ALL_CORE_STATUS(FLUSHPHASE==gc_status_info.gcphase);
562 GC_PRINTF("Finish flush phase \n");
565 void master_finish() {
566 gc_status_info.gcphase = FINISHPHASE;
568 // invalidate all shared mem pointers
569 // put it here as it takes time to inform all the other cores to
570 // finish gc and it might cause problem when some core resumes
571 // mutator earlier than the other cores
572 bamboo_cur_msp = NULL;
573 bamboo_smem_size = 0;
574 bamboo_smem_zero_top = NULL;
577 unsigned long long tmpt = BAMBOO_GET_EXE_TIME();
578 CACHEADAPT_OUTPUT_CACHE_POLICY();
579 gc_output_cache_policy_time += (BAMBOO_GET_EXE_TIME()-tmpt);
581 GC_SEND_MSG_1_TO_CLIENT(GCFINISH);
583 gc_status_info.gcprocessing = false;
585 // inform other cores to stop and wait for gc
587 for(int i = 0; i < NUMCORESACTIVE; i++) {
588 // reuse the gcnumsendobjs & gcnumreceiveobjs
589 gcnumsendobjs[0][i] = 0;
590 gcnumreceiveobjs[0][i] = 0;
592 GC_SEND_MSG_1_TO_CLIENT(GCSTARTPRE);
596 void gc_master(struct garbagelist * stackptr) {
597 tprintf("start GC !!!!!!!!!!!!! \n");
598 gc_status_info.gcprocessing = true;
599 gc_status_info.gcphase = INITPHASE;
604 GC_SEND_MSG_1_TO_CLIENT(GCSTARTINIT);
606 GC_PRINTF("Check core status \n");
607 GC_CHECK_ALL_CORE_STATUS(true);
609 unsigned long long tmpt = BAMBOO_GET_EXE_TIME();
610 CACHEADAPT_OUTPUT_CACHE_SAMPLING();
611 gc_output_cache_policy_time += (BAMBOO_GET_EXE_TIME()-tmpt);
614 master_mark(stackptr);
616 // get large objects from all cores
617 master_getlargeobjs();
622 // update the references
623 master_updaterefs(stackptr);
625 // do cache adaptation
626 CACHEADAPT_PHASE_MASTER();
628 // do finish up stuff
631 GC_PRINTF("gc finished \n");
632 tprintf("finish GC ! %d \n",gcflag);
637 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
638 gcnumsendobjs[0][BAMBOO_NUM_OF_CORE] = self_numsendobjs;
639 gcnumreceiveobjs[0][BAMBOO_NUM_OF_CORE] = self_numreceiveobjs;
641 for(int i = 0; i < NUMCORESACTIVE; i++) {
642 sumsendobj += gcnumsendobjs[0][i];
644 for(int i = 0; i < NUMCORESACTIVE; i++) {
645 sumsendobj -= gcnumreceiveobjs[0][i];
647 if(0 != sumsendobj) {
648 // there were still some msgs on the fly, wait until there
649 // are some update pregc information coming and check it again
651 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
655 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
661 void pregcprocessing() {
662 #if defined(GC_CACHE_ADAPT)&&defined(GC_CACHE_SAMPLING)
663 // disable the timer interrupt
664 bamboo_mask_timer_intr();
666 // Zero out the remaining memory here because for the GC_CACHE_ADAPT version,
667 // we need to make sure during the gcinit phase the shared heap is not
668 // touched. Otherwise, there would be problem when adapt the cache strategy.
669 BAMBOO_CLOSE_CUR_MSP();
670 #if defined(GC_CACHE_ADAPT)&&defined(GC_CACHE_SAMPLING)
671 // get the sampling data
672 bamboo_output_dtlb_sampling();
676 void postgcprocessing() {
677 #if defined(GC_CACHE_ADAPT)&&defined(GC_CACHE_SAMPLING)
678 // enable the timer interrupt
679 bamboo_tile_timer_set_next_event(GC_TILE_TIMER_EVENT_SETTING);
680 bamboo_unmask_timer_intr();
684 bool gc(struct garbagelist * stackptr) {
687 gc_status_info.gcprocessing = false;
691 // core coordinator routine
692 if(0 == BAMBOO_NUM_OF_CORE) {
693 GC_PRINTF("Check if we can do gc or not\n");
694 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
695 if(!gc_checkAllCoreStatus()) {
696 // some of the cores are still executing the mutator and did not reach
697 // some gc safe point, therefore it is not ready to do gc
704 GC_PRINTF("start gc! \n");
707 } else if(BAMBOO_NUM_OF_CORE < NUMCORES4GC) {
709 gc_collect(stackptr);
712 gc_nocollect(stackptr);