1 // TODO: DO NOT support tag!!!
4 #include "multicoreruntime.h"
5 #include "multicoregarbage.h"
6 #include "multicoregcmark.h"
8 #include "multicoregccompact.h"
9 #include "multicoregcflush.h"
10 #include "multicoregcprofile.h"
14 gc_status_t gc_status_info;
16 unsigned long long gc_output_cache_policy_time=0;
19 // dump whole mem in blocks
28 printf("(%x,%x) Dump shared mem: \n",udn_tile_coord_x(),udn_tile_coord_y());
29 // reserved blocks for sblocktbl
30 printf("(%x,%x) ++++ reserved sblocks ++++ \n", udn_tile_coord_x(),
32 for(i=BAMBOO_BASE_VA; (unsinged int)i<(unsigned int)gcbaseva; i+= 4*16) {
33 printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
34 udn_tile_coord_x(), udn_tile_coord_y(),
35 *((int *)(i)), *((int *)(i + 4)),
36 *((int *)(i + 4*2)), *((int *)(i + 4*3)),
37 *((int *)(i + 4*4)), *((int *)(i + 4*5)),
38 *((int *)(i + 4*6)), *((int *)(i + 4*7)),
39 *((int *)(i + 4*8)), *((int *)(i + 4*9)),
40 *((int *)(i + 4*10)), *((int *)(i + 4*11)),
41 *((int *)(i + 4*12)), *((int *)(i + 4*13)),
42 *((int *)(i + 4*14)), *((int *)(i + 4*15)));
45 bool advanceblock = false;
47 for(i=gcbaseva; (unsigned int)i<(unsigned int)(gcbaseva+BAMBOO_SHARED_MEM_SIZE); i+=4*16) {
49 // computing sblock # and block #, core coordinate (x,y) also
50 if(j%((BAMBOO_SMEM_SIZE)/(4*16)) == 0) {
52 if(j < ((BAMBOO_LARGE_SMEM_BOUND)/(4*16))) {
53 if((j > 0) && (j%((BAMBOO_SMEM_SIZE_L)/(4*16)) == 0)) {
65 coren = gc_block2core[block%(NUMCORES4GC*2)];
67 // compute core coordinate
68 x = BAMBOO_COORDS_X(coren);
69 y = BAMBOO_COORDS_Y(coren);
70 printf("(%x,%x) ==== %d, %d : core (%d,%d), saddr %x====\n",
71 udn_tile_coord_x(), udn_tile_coord_y(),block, sblock++, x, y,
72 (sblock-1)*(BAMBOO_SMEM_SIZE)+gcbaseva);
75 printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
76 udn_tile_coord_x(), udn_tile_coord_y(),
77 *((int *)(i)), *((int *)(i + 4)),
78 *((int *)(i + 4*2)), *((int *)(i + 4*3)),
79 *((int *)(i + 4*4)), *((int *)(i + 4*5)),
80 *((int *)(i + 4*6)), *((int *)(i + 4*7)),
81 *((int *)(i + 4*8)), *((int *)(i + 4*9)),
82 *((int *)(i + 4*10)), *((int *)(i + 4*11)),
83 *((int *)(i + 4*12)), *((int *)(i + 4*13)),
84 *((int *)(i + 4*14)), *((int *)(i + 4*15)));
86 printf("(%x,%x) \n", udn_tile_coord_x(), udn_tile_coord_y());
90 void initmulticoregcdata() {
91 bamboo_smem_zero_top = NULL;
93 gc_status_info.gcprocessing = false;
94 gc_status_info.gcphase = FINISHPHASE;
97 gcforwardobjtbl = allocateMGCHash_I(128);
100 gc_profile_flag = false;
102 #ifdef GC_CACHE_ADAPT
103 gccachestage = false;
106 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
107 allocationinfo.blocktable=RUNMALLOC(sizeof(struct blockrecord)*GCNUMBLOCK);
108 for(int i=0; i<GCNUMBLOCK;i++) {
110 allocationinfo.blocktable[i].corenum=0;
112 allocationinfo.blocktable[i].corenum=gc_block2core[(i%(NUMCORES4GC*2))];
116 INIT_MULTICORE_GCPROFILE_DATA();
119 void dismulticoregcdata() {
120 freeMGCHash(gcforwardobjtbl);
124 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
125 for(int i=0; i<GCNUMBLOCK;i++) {
126 allocationinfo.blocktable[i].status=BS_INIT;
128 allocationinfo.lowestfreeblock=NOFREEBLOCK;
129 for(int i = 0; i < NUMCORES4GC; i++) {
131 gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
132 gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
134 gcrequiredmems[i] = 0;
135 gcfilledblocks[i] = 0;
138 for(int i = NUMCORES4GC; i < NUMCORESACTIVE; i++) {
140 gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
141 gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
144 gcnumsrobjs_index = 0;
146 gcself_numsendobjs = 0;
147 gcself_numreceiveobjs = 0;
148 gcmarkedptrbound = 0;
157 MGCHashreset(gcforwardobjtbl);
160 gc_output_cache_policy_time=0;
163 bool gc_checkAllCoreStatus() {
164 for(int i = 0; i < NUMCORESACTIVE; i++) {
165 if(gccorestatus[i] != 0) {
172 // NOTE: should be invoked with interrupts turned off
173 bool gc_checkAllCoreStatus_I() {
174 for(int i = 0; i < NUMCORESACTIVE; i++) {
175 if(gccorestatus[i] != 0) {
182 void checkMarkStatus_p2() {
183 // check if the sum of send objs and receive obj are the same
184 // yes->check if the info is the latest; no->go on executing
185 unsigned int sumsendobj = 0;
186 for(int i = 0; i < NUMCORESACTIVE; i++) {
187 sumsendobj += gcnumsendobjs[gcnumsrobjs_index][i];
189 for(int i = 0; i < NUMCORESACTIVE; i++) {
190 sumsendobj -= gcnumreceiveobjs[gcnumsrobjs_index][i];
192 if(0 == sumsendobj) {
193 // Check if there are changes of the numsendobjs or numreceiveobjs
196 for(i = 0; i < NUMCORESACTIVE; i++) {
197 if((gcnumsendobjs[0][i]!=gcnumsendobjs[1][i])||(gcnumreceiveobjs[0][i]!=gcnumreceiveobjs[1][i]) ) {
201 if(i == NUMCORESACTIVE) {
202 // all the core status info are the latest,stop mark phase
203 gc_status_info.gcphase = COMPACTPHASE;
204 // restore the gcstatus for all cores
205 for(int i = 0; i < NUMCORESACTIVE; i++) {
209 // There were changes between phase 1 and phase 2, can not decide
210 // whether the mark phase has been finished
212 // As it fails in phase 2, flip the entries
213 gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
216 // There were changes between phase 1 and phase 2, can not decide
217 // whether the mark phase has been finished
219 // As it fails in phase 2, flip the entries
220 gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
224 void checkMarkStatus() {
225 if((!waitconfirm)||(waitconfirm && (numconfirm == 0))) {
226 unsigned int entry_index = 0;
229 entry_index = (gcnumsrobjs_index == 0) ? 1 : 0;
232 entry_index = gcnumsrobjs_index;
234 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
235 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
236 gcnumsendobjs[entry_index][BAMBOO_NUM_OF_CORE] = gcself_numsendobjs;
237 gcnumreceiveobjs[entry_index][BAMBOO_NUM_OF_CORE] = gcself_numreceiveobjs;
238 // check the status of all cores
239 if (gc_checkAllCoreStatus_I()) {
242 // the first time found all cores stall
243 // send out status confirm msg to all other cores
244 // reset the corestatus array too
246 numconfirm = NUMCORESACTIVE - 1;
247 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
248 GC_SEND_MSG_1_TO_CLIENT(GCMARKCONFIRM);
251 checkMarkStatus_p2();
252 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
255 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
260 // compute load balance for all cores
261 int loadbalance(void ** heaptop) {
262 // compute load balance
263 // get the total loads
264 unsigned int tloads = 0;
265 for(int i = 0; i < NUMCORES4GC; i++) {
266 tloads += gcloads[i];
268 *heaptop = gcbaseva + tloads;
270 unsigned int topblockindex;
272 BLOCKINDEX(topblockindex, *heaptop);
273 // num of blocks per core
274 unsigned int numbpc = (topblockindex+NUMCORES4GC-1)/NUMCORES4GC;
279 void gc_collect(struct garbagelist * stackptr) {
280 gc_status_info.gcprocessing = true;
281 // inform the master that this core is at a gc safe point and is ready to
283 send_msg_4(STARTUPCORE,GCFINISHPRE,BAMBOO_NUM_OF_CORE,self_numsendobjs,self_numreceiveobjs);
285 // core collector routine
286 //wait for init phase
287 WAITFORGCPHASE(INITPHASE);
289 GC_PRINTF("Do initGC\n");
292 //send init finish msg to core coordinator
293 send_msg_2(STARTUPCORE,GCFINISHINIT,BAMBOO_NUM_OF_CORE);
295 //wait for mark phase
296 WAITFORGCPHASE(MARKPHASE);
298 GC_PRINTF("Start mark phase\n");
299 mark(true, stackptr);
300 GC_PRINTF("Finish mark phase, start compact phase\n");
302 GC_PRINTF("Finish compact phase\n");
304 WAITFORGCPHASE(UPDATEPHASE);
306 GC_PRINTF("Start flush phase\n");
307 GCPROFILE_INFO_2_MASTER();
309 GC_PRINTF("Finish flush phase\n");
311 CACHEADAPT_PHASE_CLIENT();
313 // invalidate all shared mem pointers
314 bamboo_cur_msp = NULL;
315 bamboo_smem_size = 0;
316 bamboo_smem_zero_top = NULL;
319 WAITFORGCPHASE(FINISHPHASE);
321 GC_PRINTF("Finish gc! \n");
324 void gc_nocollect(struct garbagelist * stackptr) {
325 gc_status_info.gcprocessing = true;
326 // inform the master that this core is at a gc safe point and is ready to
328 send_msg_4(STARTUPCORE,GCFINISHPRE,BAMBOO_NUM_OF_CORE,self_numsendobjs,self_numreceiveobjs);
330 WAITFORGCPHASE(INITPHASE);
332 GC_PRINTF("Do initGC\n");
335 //send init finish msg to core coordinator
336 send_msg_2(STARTUPCORE,GCFINISHINIT,BAMBOO_NUM_OF_CORE);
338 WAITFORGCPHASE(MARKPHASE);
340 GC_PRINTF("Start mark phase\n");
341 mark(true, stackptr);
342 GC_PRINTF("Finish mark phase, wait for flush\n");
344 // non-gc core collector routine
345 WAITFORGCPHASE(UPDATEPHASE);
347 GC_PRINTF("Start flush phase\n");
348 GCPROFILE_INFO_2_MASTER();
350 GC_PRINTF("Finish flush phase\n");
352 CACHEADAPT_PHASE_CLIENT();
354 // invalidate all shared mem pointers
355 bamboo_cur_msp = NULL;
356 bamboo_smem_size = 0;
357 bamboo_smem_zero_top = NULL;
360 WAITFORGCPHASE(FINISHPHASE);
362 GC_PRINTF("Finish gc! \n");
365 void master_mark(struct garbagelist *stackptr) {
368 GC_PRINTF("Start mark phase \n");
369 GC_SEND_MSG_1_TO_CLIENT(GCSTART);
370 gc_status_info.gcphase = MARKPHASE;
373 while(MARKPHASE == gc_status_info.gcphase) {
374 mark(isfirst, stackptr);
381 void master_getlargeobjs() {
382 // send msgs to all cores requiring large objs info
383 // Note: only need to ask gc cores, non-gc cores do not host any objs
384 numconfirm = NUMCORES4GC - 1;
385 for(int i = 1; i < NUMCORES4GC; i++) {
386 send_msg_1(i,GCLOBJREQUEST);
388 gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
389 //spin until we have all responses
390 while(numconfirm!=0) ;
393 if(gcheaptop < gcmarkedptrbound) {
394 gcheaptop = gcmarkedptrbound;
397 GC_PRINTF("prepare to cache large objs \n");
402 void master_updaterefs(struct garbagelist * stackptr) {
403 gc_status_info.gcphase = UPDATEPHASE;
404 GC_SEND_MSG_1_TO_CLIENT(GCSTARTUPDATE);
406 GC_PRINTF("Start flush phase \n");
409 GC_CHECK_ALL_CORE_STATUS(UPDATEPHASE==gc_status_info.gcphase);
410 GC_PRINTF("Finish flush phase \n");
413 void master_finish() {
414 gc_status_info.gcphase = FINISHPHASE;
416 // invalidate all shared mem pointers
417 // put it here as it takes time to inform all the other cores to
418 // finish gc and it might cause problem when some core resumes
419 // mutator earlier than the other cores
420 bamboo_cur_msp = NULL;
421 bamboo_smem_size = 0;
422 bamboo_smem_zero_top = NULL;
425 unsigned long long tmpt = BAMBOO_GET_EXE_TIME();
426 CACHEADAPT_OUTPUT_CACHE_POLICY();
427 gc_output_cache_policy_time += (BAMBOO_GET_EXE_TIME()-tmpt);
429 GC_SEND_MSG_1_TO_CLIENT(GCFINISH);
431 gc_status_info.gcprocessing = false;
433 // inform other cores to stop and wait for gc
435 for(int i = 0; i < NUMCORESACTIVE; i++) {
436 // reuse the gcnumsendobjs & gcnumreceiveobjs
437 gcnumsendobjs[0][i] = 0;
438 gcnumreceiveobjs[0][i] = 0;
440 GC_SEND_MSG_1_TO_CLIENT(GCSTARTPRE);
444 void gc_master(struct garbagelist * stackptr) {
445 tprintf("start GC !!!!!!!!!!!!! \n");
446 gc_status_info.gcprocessing = true;
447 gc_status_info.gcphase = INITPHASE;
452 GC_SEND_MSG_1_TO_CLIENT(GCSTARTINIT);
454 GC_PRINTF("Check core status \n");
455 GC_CHECK_ALL_CORE_STATUS(true);
457 unsigned long long tmpt = BAMBOO_GET_EXE_TIME();
458 CACHEADAPT_OUTPUT_CACHE_SAMPLING();
459 gc_output_cache_policy_time += (BAMBOO_GET_EXE_TIME()-tmpt);
462 master_mark(stackptr);
464 // get large objects from all cores
465 master_getlargeobjs();
470 // update the references
471 master_updaterefs(stackptr);
473 // do cache adaptation
474 CACHEADAPT_PHASE_MASTER();
476 // do finish up stuff
479 GC_PRINTF("gc finished \n");
480 tprintf("finish GC ! %d \n",gcflag);
485 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
486 gcnumsendobjs[0][BAMBOO_NUM_OF_CORE] = self_numsendobjs;
487 gcnumreceiveobjs[0][BAMBOO_NUM_OF_CORE] = self_numreceiveobjs;
489 for(int i = 0; i < NUMCORESACTIVE; i++) {
490 sumsendobj += gcnumsendobjs[0][i];
492 for(int i = 0; i < NUMCORESACTIVE; i++) {
493 sumsendobj -= gcnumreceiveobjs[0][i];
495 if(0 != sumsendobj) {
496 // there were still some msgs on the fly, wait until there
497 // are some update pregc information coming and check it again
499 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
503 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
509 void pregcprocessing() {
510 #if defined(GC_CACHE_ADAPT)&&defined(GC_CACHE_SAMPLING)
511 // disable the timer interrupt
512 bamboo_mask_timer_intr();
514 #if defined(GC_CACHE_ADAPT)&&defined(GC_CACHE_SAMPLING)
515 // get the sampling data
516 bamboo_output_dtlb_sampling();
520 void postgcprocessing() {
521 #if defined(GC_CACHE_ADAPT)&&defined(GC_CACHE_SAMPLING)
522 // enable the timer interrupt
523 bamboo_tile_timer_set_next_event(GC_TILE_TIMER_EVENT_SETTING);
524 bamboo_unmask_timer_intr();
528 bool gc(struct garbagelist * stackptr) {
531 gc_status_info.gcprocessing = false;
535 // core coordinator routine
536 if(0 == BAMBOO_NUM_OF_CORE) {
537 GC_PRINTF("Check if we can do gc or not\n");
538 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
539 if(!gc_checkAllCoreStatus()) {
540 // some of the cores are still executing the mutator and did not reach
541 // some gc safe point, therefore it is not ready to do gc
548 GC_PRINTF("start gc! \n");
551 } else if(BAMBOO_NUM_OF_CORE < NUMCORES4GC) {
553 gc_collect(stackptr);
556 gc_nocollect(stackptr);