1 // TODO: DO NOT support tag!!!
4 #include "multicoreruntime.h"
5 #include "multicoregarbage.h"
6 #include "multicoregcmark.h"
8 #include "multicoregccompact.h"
9 #include "multicoregcflush.h"
10 #include "multicoregcprofile.h"
14 extern unsigned int gcmem_mixed_threshold;
15 extern unsigned int gcmem_mixed_usedmem;
19 gc_status_t gc_status_info;
21 unsigned long long gc_output_cache_policy_time=0;
24 // dump whole mem in blocks
33 printf("(%x,%x) Dump shared mem: \n",udn_tile_coord_x(),udn_tile_coord_y());
34 // reserved blocks for sblocktbl
35 printf("(%x,%x) ++++ reserved sblocks ++++ \n", udn_tile_coord_x(),
37 for(i=BAMBOO_BASE_VA; (unsinged int)i<(unsigned int)gcbaseva; i+= 4*16) {
38 printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
39 udn_tile_coord_x(), udn_tile_coord_y(),
40 *((int *)(i)), *((int *)(i + 4)),
41 *((int *)(i + 4*2)), *((int *)(i + 4*3)),
42 *((int *)(i + 4*4)), *((int *)(i + 4*5)),
43 *((int *)(i + 4*6)), *((int *)(i + 4*7)),
44 *((int *)(i + 4*8)), *((int *)(i + 4*9)),
45 *((int *)(i + 4*10)), *((int *)(i + 4*11)),
46 *((int *)(i + 4*12)), *((int *)(i + 4*13)),
47 *((int *)(i + 4*14)), *((int *)(i + 4*15)));
50 bool advanceblock = false;
52 for(i=gcbaseva; (unsigned int)i<(unsigned int)(gcbaseva+BAMBOO_SHARED_MEM_SIZE); i+=4*16) {
54 // computing sblock # and block #, core coordinate (x,y) also
55 if(j%((BAMBOO_SMEM_SIZE)/(4*16)) == 0) {
57 if(j < ((BAMBOO_LARGE_SMEM_BOUND)/(4*16))) {
58 if((j > 0) && (j%((BAMBOO_SMEM_SIZE_L)/(4*16)) == 0)) {
70 coren = gc_block2core[block%(NUMCORES4GC*2)];
72 // compute core coordinate
73 x = BAMBOO_COORDS_X(coren);
74 y = BAMBOO_COORDS_Y(coren);
75 printf("(%x,%x) ==== %d, %d : core (%d,%d), saddr %x====\n",
76 udn_tile_coord_x(), udn_tile_coord_y(),block, sblock++, x, y,
77 (sblock-1)*(BAMBOO_SMEM_SIZE)+gcbaseva);
80 printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
81 udn_tile_coord_x(), udn_tile_coord_y(),
82 *((int *)(i)), *((int *)(i + 4)),
83 *((int *)(i + 4*2)), *((int *)(i + 4*3)),
84 *((int *)(i + 4*4)), *((int *)(i + 4*5)),
85 *((int *)(i + 4*6)), *((int *)(i + 4*7)),
86 *((int *)(i + 4*8)), *((int *)(i + 4*9)),
87 *((int *)(i + 4*10)), *((int *)(i + 4*11)),
88 *((int *)(i + 4*12)), *((int *)(i + 4*13)),
89 *((int *)(i + 4*14)), *((int *)(i + 4*15)));
91 printf("(%x,%x) \n", udn_tile_coord_x(), udn_tile_coord_y());
95 void initmulticoregcdata() {
96 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
97 // startup core to initialize corestatus[]
98 for(int i = 0; i < NUMCORESACTIVE; i++) {
100 gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
101 gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
103 for(int i = 0; i < NUMCORES4GC; i++) {
105 gcrequiredmems[i] = 0;
107 gcfilledblocks[i] = 0;
111 bamboo_smem_zero_top = NULL;
113 gc_status_info.gcprocessing = false;
114 gc_status_info.gcphase = FINISHPHASE;
118 gcself_numsendobjs = 0;
119 gcself_numreceiveobjs = 0;
120 gcmarkedptrbound = 0;
121 gcforwardobjtbl = allocateMGCHash_I(128);
130 gcmem_mixed_threshold=(unsigned int)((BAMBOO_SHARED_MEM_SIZE-bamboo_reserved_smem*BAMBOO_SMEM_SIZE)*0.8);
131 gcmem_mixed_usedmem = 0;
134 gc_profile_flag = false;
136 gc_localheap_s = false;
137 #ifdef GC_CACHE_ADAPT
138 gccachestage = false;
141 INIT_MULTICORE_GCPROFILE_DATA();
144 void dismulticoregcdata() {
145 freeMGCHash(gcforwardobjtbl);
149 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
150 for(int i = 0; i < NUMCORES4GC; i++) {
152 gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
153 gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
155 gcrequiredmems[i] = 0;
156 gcfilledblocks[i] = 0;
159 for(int i = NUMCORES4GC; i < NUMCORESACTIVE; i++) {
161 gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
162 gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
167 gcnumsrobjs_index = 0;
169 gcself_numsendobjs = 0;
170 gcself_numreceiveobjs = 0;
171 gcmarkedptrbound = 0;
180 MGCHashreset(gcforwardobjtbl);
183 gc_output_cache_policy_time=0;
186 bool gc_checkAllCoreStatus() {
187 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
188 for(int i = 0; i < NUMCORESACTIVE; i++) {
189 if(gccorestatus[i] != 0) {
190 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
194 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
198 // NOTE: should be invoked with interrupts turned off
199 bool gc_checkAllCoreStatus_I() {
200 for(int i = 0; i < NUMCORESACTIVE; i++) {
201 if(gccorestatus[i] != 0) {
208 void checkMarkStatus_p2() {
209 // check if the sum of send objs and receive obj are the same
210 // yes->check if the info is the latest; no->go on executing
211 unsigned int sumsendobj = 0;
212 for(int i = 0; i < NUMCORESACTIVE; i++) {
213 sumsendobj += gcnumsendobjs[gcnumsrobjs_index][i];
215 for(int i = 0; i < NUMCORESACTIVE; i++) {
216 sumsendobj -= gcnumreceiveobjs[gcnumsrobjs_index][i];
218 if(0 == sumsendobj) {
219 // Check if there are changes of the numsendobjs or numreceiveobjs
222 for(i = 0; i < NUMCORESACTIVE; i++) {
223 if((gcnumsendobjs[0][i]!=gcnumsendobjs[1][i])||(gcnumreceiveobjs[0][i]!=gcnumreceiveobjs[1][i]) ) {
227 if(i == NUMCORESACTIVE) {
228 // all the core status info are the latest,stop mark phase
229 gc_status_info.gcphase = COMPACTPHASE;
230 // restore the gcstatus for all cores
231 for(int i = 0; i < NUMCORESACTIVE; i++) {
235 // There were changes between phase 1 and phase 2, can not decide
236 // whether the mark phase has been finished
238 // As it fails in phase 2, flip the entries
239 gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
242 // There were changes between phase 1 and phase 2, can not decide
243 // whether the mark phase has been finished
245 // As it fails in phase 2, flip the entries
246 gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
250 void checkMarkStatus() {
251 if((!waitconfirm)||(waitconfirm && (numconfirm == 0))) {
252 unsigned int entry_index = 0;
255 entry_index = (gcnumsrobjs_index == 0) ? 1 : 0;
258 entry_index = gcnumsrobjs_index;
260 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
261 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
262 gcnumsendobjs[entry_index][BAMBOO_NUM_OF_CORE] = gcself_numsendobjs;
263 gcnumreceiveobjs[entry_index][BAMBOO_NUM_OF_CORE] = gcself_numreceiveobjs;
264 // check the status of all cores
265 if (gc_checkAllCoreStatus_I()) {
268 // the first time found all cores stall
269 // send out status confirm msg to all other cores
270 // reset the corestatus array too
272 numconfirm = NUMCORESACTIVE - 1;
273 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
274 GC_SEND_MSG_1_TO_CLIENT(GCMARKCONFIRM);
277 checkMarkStatus_p2();
278 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
281 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
286 // compute load balance for all cores
287 int loadbalance(void ** heaptop, unsigned int * topblock, unsigned int * topcore) {
288 // compute load balance
289 // get the total loads
290 unsigned int tloads = 0;
291 for(int i = 0; i < NUMCORES4GC; i++) {
292 tloads += gcloads[i];
294 *heaptop = gcbaseva + tloads;
296 unsigned int topblockindex;
298 BLOCKINDEX(topblockindex, *heaptop);
299 // num of blocks per core
300 unsigned int numbpc = (topblockindex+NUMCORES4GC-1)/NUMCORES4GC;
302 *topblock = topblockindex;
303 RESIDECORE(*heaptop, *topcore);
308 // update the bmmboo_smemtbl to record current shared mem usage
309 void updateSmemTbl(unsigned int coren, void * localtop) {
310 unsigned int ltopcore = 0;
311 unsigned int bound = BAMBOO_SMEM_SIZE_L;
312 BLOCKINDEX(ltopcore, localtop);
313 if((unsigned int)localtop>=(unsigned int)(gcbaseva+BAMBOO_LARGE_SMEM_BOUND)){
314 bound = BAMBOO_SMEM_SIZE;
316 unsigned int load = (unsigned INTPTR)(localtop-gcbaseva)%(unsigned int)bound;
317 unsigned int toset = 0;
318 for(int j=0; 1; j++) {
319 for(int i=0; i<2; i++) {
320 toset = gc_core2block[2*coren+i]+(unsigned int)(NUMCORES4GC*2)*j;
321 if(toset < ltopcore) {
322 bamboo_smemtbl[toset]=BLOCKSIZE(toset<NUMCORES4GC);
324 gcmem_mixed_usedmem += bamboo_smemtbl[toset];
326 } else if(toset == ltopcore) {
327 bamboo_smemtbl[toset] = load;
329 gcmem_mixed_usedmem += bamboo_smemtbl[toset];
339 void gc_collect(struct garbagelist * stackptr) {
340 gc_status_info.gcprocessing = true;
341 // inform the master that this core is at a gc safe point and is ready to
343 send_msg_4(STARTUPCORE,GCFINISHPRE,BAMBOO_NUM_OF_CORE,self_numsendobjs,self_numreceiveobjs);
345 // core collector routine
346 //wait for init phase
347 WAITFORGCPHASE(INITPHASE);
349 GC_PRINTF("Do initGC\n");
352 //send init finish msg to core coordinator
353 send_msg_2(STARTUPCORE,GCFINISHINIT,BAMBOO_NUM_OF_CORE);
355 //wait for mark phase
356 WAITFORGCPHASE(MARKPHASE);
358 GC_PRINTF("Start mark phase\n");
359 mark(true, stackptr);
360 GC_PRINTF("Finish mark phase, start compact phase\n");
362 GC_PRINTF("Finish compact phase\n");
364 WAITFORGCPHASE(UPDATEPHASE);
366 GC_PRINTF("Start flush phase\n");
367 GCPROFILE_INFO_2_MASTER();
369 GC_PRINTF("Finish flush phase\n");
371 CACHEADAPT_PHASE_CLIENT();
373 // invalidate all shared mem pointers
374 bamboo_cur_msp = NULL;
375 bamboo_smem_size = 0;
376 bamboo_smem_zero_top = NULL;
379 WAITFORGCPHASE(FINISHPHASE);
381 GC_PRINTF("Finish gc! \n");
384 void gc_nocollect(struct garbagelist * stackptr) {
385 gc_status_info.gcprocessing = true;
386 // inform the master that this core is at a gc safe point and is ready to
388 send_msg_4(STARTUPCORE,GCFINISHPRE,BAMBOO_NUM_OF_CORE,self_numsendobjs,self_numreceiveobjs);
390 WAITFORGCPHASE(INITPHASE);
392 GC_PRINTF("Do initGC\n");
395 //send init finish msg to core coordinator
396 send_msg_2(STARTUPCORE,GCFINISHINIT,BAMBOO_NUM_OF_CORE);
398 WAITFORGCPHASE(MARKPHASE);
400 GC_PRINTF("Start mark phase\n");
401 mark(true, stackptr);
402 GC_PRINTF("Finish mark phase, wait for flush\n");
404 // non-gc core collector routine
405 WAITFORGCPHASE(UPDATEPHASE);
407 GC_PRINTF("Start flush phase\n");
408 GCPROFILE_INFO_2_MASTER();
410 GC_PRINTF("Finish flush phase\n");
412 CACHEADAPT_PHASE_CLIENT();
414 // invalidate all shared mem pointers
415 bamboo_cur_msp = NULL;
416 bamboo_smem_size = 0;
417 bamboo_smem_zero_top = NULL;
420 WAITFORGCPHASE(FINISHPHASE);
422 GC_PRINTF("Finish gc! \n");
425 void master_mark(struct garbagelist *stackptr) {
428 GC_PRINTF("Start mark phase \n");
429 GC_SEND_MSG_1_TO_CLIENT(GCSTART);
430 gc_status_info.gcphase = MARKPHASE;
433 while(MARKPHASE == gc_status_info.gcphase) {
434 mark(isfirst, stackptr);
441 void master_getlargeobjs() {
442 // send msgs to all cores requiring large objs info
443 // Note: only need to ask gc cores, non-gc cores do not host any objs
444 numconfirm = NUMCORES4GC - 1;
445 for(int i = 1; i < NUMCORES4GC; i++) {
446 send_msg_1(i,GCLOBJREQUEST);
448 gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
449 //spin until we have all responses
450 while(numconfirm!=0) ;
453 if(gcheaptop < gcmarkedptrbound) {
454 gcheaptop = gcmarkedptrbound;
457 GC_PRINTF("prepare to cache large objs \n");
462 void master_updaterefs(struct garbagelist * stackptr) {
463 gc_status_info.gcphase = UPDATEPHASE;
464 GC_SEND_MSG_1_TO_CLIENT(GCSTARTUPDATE);
466 GC_PRINTF("Start flush phase \n");
469 GC_CHECK_ALL_CORE_STATUS(UPDATEPHASE==gc_status_info.gcphase);
470 GC_PRINTF("Finish flush phase \n");
473 void master_finish() {
474 gc_status_info.gcphase = FINISHPHASE;
476 // invalidate all shared mem pointers
477 // put it here as it takes time to inform all the other cores to
478 // finish gc and it might cause problem when some core resumes
479 // mutator earlier than the other cores
480 bamboo_cur_msp = NULL;
481 bamboo_smem_size = 0;
482 bamboo_smem_zero_top = NULL;
485 unsigned long long tmpt = BAMBOO_GET_EXE_TIME();
486 CACHEADAPT_OUTPUT_CACHE_POLICY();
487 gc_output_cache_policy_time += (BAMBOO_GET_EXE_TIME()-tmpt);
489 GC_SEND_MSG_1_TO_CLIENT(GCFINISH);
491 gc_status_info.gcprocessing = false;
493 // inform other cores to stop and wait for gc
495 for(int i = 0; i < NUMCORESACTIVE; i++) {
496 // reuse the gcnumsendobjs & gcnumreceiveobjs
497 gcnumsendobjs[0][i] = 0;
498 gcnumreceiveobjs[0][i] = 0;
500 GC_SEND_MSG_1_TO_CLIENT(GCSTARTPRE);
504 void gc_master(struct garbagelist * stackptr) {
505 tprintf("start GC !!!!!!!!!!!!! \n");
506 gc_status_info.gcprocessing = true;
507 gc_status_info.gcphase = INITPHASE;
512 GC_SEND_MSG_1_TO_CLIENT(GCSTARTINIT);
514 GC_PRINTF("Check core status \n");
515 GC_CHECK_ALL_CORE_STATUS(true);
517 unsigned long long tmpt = BAMBOO_GET_EXE_TIME();
518 CACHEADAPT_OUTPUT_CACHE_SAMPLING();
519 gc_output_cache_policy_time += (BAMBOO_GET_EXE_TIME()-tmpt);
522 master_mark(stackptr);
524 // get large objects from all cores
525 master_getlargeobjs();
530 // update the references
531 master_updaterefs(stackptr);
533 // do cache adaptation
534 CACHEADAPT_PHASE_MASTER();
536 // do finish up stuff
539 GC_PRINTF("gc finished \n");
540 tprintf("finish GC ! %d \n",gcflag);
545 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
546 gcnumsendobjs[0][BAMBOO_NUM_OF_CORE] = self_numsendobjs;
547 gcnumreceiveobjs[0][BAMBOO_NUM_OF_CORE] = self_numreceiveobjs;
549 for(int i = 0; i < NUMCORESACTIVE; i++) {
550 sumsendobj += gcnumsendobjs[0][i];
552 for(int i = 0; i < NUMCORESACTIVE; i++) {
553 sumsendobj -= gcnumreceiveobjs[0][i];
555 if(0 != sumsendobj) {
556 // there were still some msgs on the fly, wait until there
557 // are some update pregc information coming and check it again
559 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
563 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
569 void pregcprocessing() {
570 #if defined(GC_CACHE_ADAPT)&&defined(GC_CACHE_SAMPLING)
571 // disable the timer interrupt
572 bamboo_mask_timer_intr();
574 // Zero out the remaining memory here because for the GC_CACHE_ADAPT version,
575 // we need to make sure during the gcinit phase the shared heap is not
576 // touched. Otherwise, there would be problem when adapt the cache strategy.
577 BAMBOO_CLOSE_CUR_MSP();
578 #if defined(GC_CACHE_ADAPT)&&defined(GC_CACHE_SAMPLING)
579 // get the sampling data
580 bamboo_output_dtlb_sampling();
584 void postgcprocessing() {
585 #if defined(GC_CACHE_ADAPT)&&defined(GC_CACHE_SAMPLING)
586 // enable the timer interrupt
587 bamboo_tile_timer_set_next_event(GC_TILE_TIMER_EVENT_SETTING);
588 bamboo_unmask_timer_intr();
592 bool gc(struct garbagelist * stackptr) {
595 gc_status_info.gcprocessing = false;
599 // core coordinator routine
600 if(0 == BAMBOO_NUM_OF_CORE) {
601 GC_PRINTF("Check if we can do gc or not\n");
602 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
603 if(!gc_checkAllCoreStatus()) {
604 // some of the cores are still executing the mutator and did not reach
605 // some gc safe point, therefore it is not ready to do gc
612 GC_PRINTF("start gc! \n");
615 } else if(BAMBOO_NUM_OF_CORE < NUMCORES4GC) {
617 gc_collect(stackptr);
620 gc_nocollect(stackptr);