3 #include "multicoreruntime.h"
4 #include "multicoregarbage.h"
5 #include "multicoregcmark.h"
6 #include "multicoregccompact.h"
7 #include "multicoregcflush.h"
8 #include "multicoregcprofile.h"
10 #include "multicoremem_helper.h"
11 #include "bambooalign.h"
14 gc_status_t gc_status_info;
16 unsigned long long gc_output_cache_policy_time=0;
19 // dump whole mem in blocks
28 printf("(%x,%x) Dump shared mem: \n",udn_tile_coord_x(),udn_tile_coord_y());
29 // reserved blocks for sblocktbl
30 printf("(%x,%x) ++++ reserved sblocks ++++ \n", udn_tile_coord_x(),
32 for(i=BAMBOO_BASE_VA; i<gcbaseva; i+= 4*16) {
33 printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
34 udn_tile_coord_x(), udn_tile_coord_y(),
35 *((int *)(i)), *((int *)(i + 4)),
36 *((int *)(i + 4*2)), *((int *)(i + 4*3)),
37 *((int *)(i + 4*4)), *((int *)(i + 4*5)),
38 *((int *)(i + 4*6)), *((int *)(i + 4*7)),
39 *((int *)(i + 4*8)), *((int *)(i + 4*9)),
40 *((int *)(i + 4*10)), *((int *)(i + 4*11)),
41 *((int *)(i + 4*12)), *((int *)(i + 4*13)),
42 *((int *)(i + 4*14)), *((int *)(i + 4*15)));
45 bool advanceblock = false;
47 for(i=gcbaseva; (unsigned int)i<(unsigned int)(gcbaseva+BAMBOO_SHARED_MEM_SIZE); i+=4*16) {
49 // computing sblock # and block #, core coordinate (x,y) also
50 if(j%((BAMBOO_SMEM_SIZE)/(4*16)) == 0) {
52 if(j < ((BAMBOO_LARGE_SMEM_BOUND)/(4*16))) {
53 if((j > 0) && (j%((BAMBOO_SMEM_SIZE_L)/(4*16)) == 0)) {
65 coren = gc_block2core[block%(NUMCORES4GC*2)];
67 // compute core coordinate
68 x = BAMBOO_COORDS_X(coren);
69 y = BAMBOO_COORDS_Y(coren);
70 printf("(%x,%x) ==== %d, %d : core (%d,%d), saddr %x====\n",
71 udn_tile_coord_x(), udn_tile_coord_y(),block, sblock++, x, y,
72 (sblock-1)*(BAMBOO_SMEM_SIZE)+gcbaseva);
75 printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
76 udn_tile_coord_x(), udn_tile_coord_y(),
77 *((int *)(i)), *((int *)(i + 4)),
78 *((int *)(i + 4*2)), *((int *)(i + 4*3)),
79 *((int *)(i + 4*4)), *((int *)(i + 4*5)),
80 *((int *)(i + 4*6)), *((int *)(i + 4*7)),
81 *((int *)(i + 4*8)), *((int *)(i + 4*9)),
82 *((int *)(i + 4*10)), *((int *)(i + 4*11)),
83 *((int *)(i + 4*12)), *((int *)(i + 4*13)),
84 *((int *)(i + 4*14)), *((int *)(i + 4*15)));
86 printf("(%x,%x) \n", udn_tile_coord_x(), udn_tile_coord_y());
90 void initmulticoregcdata() {
91 bamboo_smem_zero_top = NULL;
93 gc_status_info.gcprocessing = false;
94 gc_status_info.gcphase = FINISHPHASE;
97 gcforwardobjtbl = allocateMGCHash_I(128);
99 gc_profile_flag = false;
101 #ifdef GC_CACHE_ADAPT
102 gccachestage = false;
105 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
106 allocationinfo.blocktable=RUNMALLOC(sizeof(struct blockrecord)*GCNUMBLOCK);
107 for(int i=0; i<GCNUMBLOCK;i++) {
109 allocationinfo.blocktable[i].corenum=0;
111 allocationinfo.blocktable[i].corenum=gc_block2core[(i%(NUMCORES4GC*2))];
112 allocationinfo.blocktable[i].status=BS_FREE;
113 allocationinfo.blocktable[i].usedspace=0;
114 allocationinfo.blocktable[i].freespace=GLOBALBLOCKSIZE(i);
119 //initialize update structures
121 for(int i=0;i<NUMCORES4GC;i++) {
122 origblockarray[i]=NULL;
125 INIT_MULTICORE_GCPROFILE_DATA();
128 void dismulticoregcdata() {
129 freeMGCHash(gcforwardobjtbl);
133 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
134 for(int i = 0; i < NUMCORES4GC; i++) {
136 gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
137 gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
139 gcrequiredmems[i] = 0;
141 for(int i = NUMCORES4GC; i < NUMCORESACTIVE; i++) {
143 gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
144 gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
146 gcnumsrobjs_index = 0;
148 gcself_numsendobjs = 0;
149 gcself_numreceiveobjs = 0;
155 update_origblockptr=NULL;
158 MGCHashreset(gcforwardobjtbl);
161 gc_output_cache_policy_time=0;
164 void checkMarkStatus_p2() {
165 // tprintf("Check mark status 2\n");
166 // check if the sum of send objs and receive obj are the same
167 // yes->check if the info is the latest; no->go on executing
168 unsigned int sumsendobj = 0;
169 for(int i = 0; i < NUMCORESACTIVE; i++) {
170 sumsendobj += gcnumsendobjs[gcnumsrobjs_index][i];
172 for(int i = 0; i < NUMCORESACTIVE; i++) {
173 sumsendobj -= gcnumreceiveobjs[gcnumsrobjs_index][i];
175 if(0 == sumsendobj) {
176 // Check if there are changes of the numsendobjs or numreceiveobjs
179 for(i = 0; i < NUMCORESACTIVE; i++) {
180 if((gcnumsendobjs[0][i]!=gcnumsendobjs[1][i])||(gcnumreceiveobjs[0][i]!=gcnumreceiveobjs[1][i]) ) {
184 if(i == NUMCORESACTIVE) {
185 //tprintf("Mark terminated\n");
186 // all the core status info are the latest,stop mark phase
187 gc_status_info.gcphase = COMPACTPHASE;
188 // restore the gcstatus for all cores
189 for(int i = 0; i < NUMCORESACTIVE; i++) {
193 // There were changes between phase 1 and phase 2, can not decide
194 // whether the mark phase has been finished
196 // As it fails in phase 2, flip the entries
197 gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
200 // There were changes between phase 1 and phase 2, can not decide
201 // whether the mark phase has been finished
203 // As it fails in phase 2, flip the entries
204 gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
208 void checkMarkStatus() {
209 // tprintf("Check mark status\n");
210 if((!waitconfirm)||(waitconfirm && (numconfirm == 0))) {
211 unsigned int entry_index = 0;
214 entry_index = (gcnumsrobjs_index == 0) ? 1 : 0;
217 entry_index = gcnumsrobjs_index;
219 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
220 // check the status of all cores
221 if (gc_checkCoreStatus()) {
224 // the first time found all cores stall
225 // send out status confirm msg to all other cores
226 // reset the corestatus array too
228 numconfirm = NUMCORESACTIVE - 1;
229 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
230 GC_SEND_MSG_1_TO_CLIENT(GCMARKCONFIRM);
233 checkMarkStatus_p2();
234 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
237 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
242 // compute load balance for all cores
244 // compute load balance
245 // get the total loads
247 unsigned int tloads = 0;
248 for(int i = 0; i < NUMCORES4GC; i++) {
249 tloads += gcloads[i];
250 //tprintf("load: %d %d \n", gcloads[i], i);
252 heaptop = gcbaseva + tloads;
254 unsigned int topblockindex;
256 BLOCKINDEX(topblockindex, heaptop);
257 // num of blocks per core
258 unsigned int numbpc = (topblockindex+NUMCORES4GC-1)/NUMCORES4GC;
263 void gc_collect(struct garbagelist * stackptr) {
264 gc_status_info.gcprocessing = true;
265 // inform the master that this core is at a gc safe point and is ready to
267 send_msg_4(STARTUPCORE,GCFINISHPRE,BAMBOO_NUM_OF_CORE,self_numsendobjs,self_numreceiveobjs);
269 // core collector routine
270 //wait for init phase
271 WAITFORGCPHASE(INITPHASE);
273 GC_PRINTF("Do initGC\n");
276 //send init finish msg to core coordinator
277 send_msg_2(STARTUPCORE,GCFINISHINIT,BAMBOO_NUM_OF_CORE);
279 //wait for mark phase
280 WAITFORGCPHASE(MARKPHASE);
282 GC_PRINTF("Start mark phase\n");
284 GC_PRINTF("Finish mark phase, start compact phase\n");
286 GC_PRINTF("Finish compact phase\n");
288 WAITFORGCPHASE(UPDATEPHASE);
290 GC_PRINTF("Start update phase\n");
291 GCPROFILE_INFO_2_MASTER();
293 GC_PRINTF("Finish update phase\n");
295 CACHEADAPT_PHASE_CLIENT();
297 // invalidate all shared mem pointers
298 bamboo_cur_msp = NULL;
299 bamboo_smem_size = 0;
300 bamboo_smem_zero_top = NULL;
303 WAITFORGCPHASE(FINISHPHASE);
305 GC_PRINTF("Finish gc! \n");
308 void gc_nocollect(struct garbagelist * stackptr) {
309 gc_status_info.gcprocessing = true;
310 // inform the master that this core is at a gc safe point and is ready to
312 send_msg_4(STARTUPCORE,GCFINISHPRE,BAMBOO_NUM_OF_CORE,self_numsendobjs,self_numreceiveobjs);
314 WAITFORGCPHASE(INITPHASE);
316 GC_PRINTF("Do initGC\n");
319 //send init finish msg to core coordinator
320 send_msg_2(STARTUPCORE,GCFINISHINIT,BAMBOO_NUM_OF_CORE);
322 WAITFORGCPHASE(MARKPHASE);
324 GC_PRINTF("Start mark phase\n");
326 GC_PRINTF("Finish mark phase, wait for update\n");
328 // non-gc core collector routine
329 WAITFORGCPHASE(UPDATEPHASE);
331 GC_PRINTF("Start update phase\n");
332 GCPROFILE_INFO_2_MASTER();
334 GC_PRINTF("Finish update phase\n");
336 CACHEADAPT_PHASE_CLIENT();
338 // invalidate all shared mem pointers
339 bamboo_cur_msp = NULL;
340 bamboo_smem_size = 0;
341 bamboo_smem_zero_top = NULL;
344 WAITFORGCPHASE(FINISHPHASE);
346 GC_PRINTF("Finish gc! \n");
349 void master_mark(struct garbagelist *stackptr) {
351 GC_PRINTF("Start mark phase \n");
352 GC_SEND_MSG_1_TO_CLIENT(GCSTART);
353 gc_status_info.gcphase = MARKPHASE;
359 void master_getlargeobjs() {
360 // send msgs to all cores requiring large objs info
361 // Note: only need to ask gc cores, non-gc cores do not host any objs
362 numconfirm = NUMCORES4GC - 1;
363 for(int i = 1; i < NUMCORES4GC; i++) {
364 send_msg_1(i,GCLOBJREQUEST);
366 gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
367 //spin until we have all responses
368 while(numconfirm!=0) ;
371 GC_PRINTF("prepare to cache large objs \n");
376 void master_updaterefs(struct garbagelist * stackptr) {
377 gc_status_info.gcphase = UPDATEPHASE;
378 GC_SEND_MSG_1_TO_CLIENT(GCSTARTUPDATE);
380 GC_PRINTF("Start update phase \n");
383 GC_CHECK_ALL_CORE_STATUS();
384 GC_PRINTF("Finish update phase \n");
387 void master_finish() {
388 gc_status_info.gcphase = FINISHPHASE;
390 // invalidate all shared mem pointers
391 // put it here as it takes time to inform all the other cores to
392 // finish gc and it might cause problem when some core resumes
393 // mutator earlier than the other cores
394 bamboo_cur_msp = NULL;
395 bamboo_smem_size = 0;
396 bamboo_smem_zero_top = NULL;
399 unsigned long long tmpt = BAMBOO_GET_EXE_TIME();
400 CACHEADAPT_OUTPUT_CACHE_POLICY();
401 gc_output_cache_policy_time += (BAMBOO_GET_EXE_TIME()-tmpt);
404 GC_SEND_MSG_1_TO_CLIENT(GCFINISH);
405 gc_status_info.gcprocessing = false;
408 // inform other cores to stop and wait for gc
409 GC_PRINTF("Back to Back gc case\n");
411 for(int i = 0; i < NUMCORESACTIVE; i++) {
412 // reuse the gcnumsendobjs & gcnumreceiveobjs
413 gcnumsendobjs[0][i] = 0;
414 gcnumreceiveobjs[0][i] = 0;
416 GC_SEND_MSG_1_TO_CLIENT(GCSTARTPRE);
420 void gc_master(struct garbagelist * stackptr) {
421 //tprintf("start GC!\n");
422 gc_status_info.gcprocessing = true;
423 gc_status_info.gcphase = INITPHASE;
428 GC_SEND_MSG_1_TO_CLIENT(GCSTARTINIT);
430 //tprintf("Check core status \n");
431 GC_CHECK_ALL_CORE_STATUS();
433 unsigned long long tmpt = BAMBOO_GET_EXE_TIME();
434 CACHEADAPT_OUTPUT_CACHE_SAMPLING();
435 gc_output_cache_policy_time += (BAMBOO_GET_EXE_TIME()-tmpt);
436 //tprintf("start mark phase\n");
438 master_mark(stackptr);
439 //tprintf("finish mark phase\n");
440 // get large objects from all cores
441 master_getlargeobjs();
442 //tprintf("start compact phase\n");
445 //tprintf("start update phase\n");
446 // update the references
447 master_updaterefs(stackptr);
448 //tprintf("gc master finished update \n");
449 // do cache adaptation
450 CACHEADAPT_PHASE_MASTER();
451 //tprintf("finish cachdapt phase\n");
452 // do finish up stuff
454 for(int i=0;i<GCNUMBLOCK;i++) {
455 struct blockrecord *record=&allocationinfo.blocktable[i];
456 tprintf("%u. used=%u free=%u corenum=%u status=%u, base=%x, ptr=%x\n", i, record->usedspace, record->freespace, record->corenum, record->status, gcbaseva+OFFSET2BASEVA(i), (gcbaseva+OFFSET2BASEVA(i)+record->usedspace));
461 //tprintf("finish GC ! %d \n",gcflag);
466 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
467 gcnumsendobjs[0][BAMBOO_NUM_OF_CORE] = self_numsendobjs;
468 gcnumreceiveobjs[0][BAMBOO_NUM_OF_CORE] = self_numreceiveobjs;
470 for(int i = 0; i < NUMCORESACTIVE; i++) {
471 sumsendobj += gcnumsendobjs[0][i];
473 for(int i = 0; i < NUMCORESACTIVE; i++) {
474 sumsendobj -= gcnumreceiveobjs[0][i];
476 if(0 != sumsendobj) {
477 // there were still some msgs on the fly, wait until there
478 // are some update pregc information coming and check it again
480 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
484 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
490 void pregcprocessing() {
491 #if defined(GC_CACHE_ADAPT)&&defined(GC_CACHE_SAMPLING)
492 // disable the timer interrupt
493 bamboo_mask_timer_intr();
495 #if defined(GC_CACHE_ADAPT)&&defined(GC_CACHE_SAMPLING)
496 // get the sampling data
497 bamboo_output_dtlb_sampling();
501 void postgcprocessing() {
502 #if defined(GC_CACHE_ADAPT)&&defined(GC_CACHE_SAMPLING)
503 // enable the timer interrupt
504 bamboo_tile_timer_set_next_event(GC_TILE_TIMER_EVENT_SETTING);
505 bamboo_unmask_timer_intr();
509 bool gc(struct garbagelist * stackptr) {
512 gc_status_info.gcprocessing = false;
516 // core coordinator routine
517 if(0 == BAMBOO_NUM_OF_CORE) {
518 GC_PRINTF("Check if we can do gc or not\n");
519 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
521 //wait for other cores to catch up
522 while(!gc_checkCoreStatus())
527 GC_PRINTF("start gc! \n");
530 } else if(BAMBOO_NUM_OF_CORE < NUMCORES4GC) {
531 GC_PRINTF("Core reporting for gc.\n");
533 gc_collect(stackptr);
536 gc_nocollect(stackptr);