3 #include "multicoreruntime.h"
4 #include "multicoregarbage.h"
5 #include "multicoregcmark.h"
6 #include "multicoregccompact.h"
7 #include "multicoregcflush.h"
8 #include "multicoregcprofile.h"
10 #include "multicoremem_helper.h"
11 #include "bambooalign.h"
13 #include "bme_perf_counter.h"
17 gc_status_t gc_status_info;
19 unsigned long long gc_output_cache_policy_time=0;
22 // dump whole mem in blocks
31 printf("(%x,%x) Dump shared mem: \n",udn_tile_coord_x(),udn_tile_coord_y());
32 // reserved blocks for sblocktbl
33 printf("(%x,%x) ++++ reserved sblocks ++++ \n", udn_tile_coord_x(),
35 for(i=BAMBOO_BASE_VA; i<gcbaseva; i+= 4*16) {
36 printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
37 udn_tile_coord_x(), udn_tile_coord_y(),
38 *((int *)(i)), *((int *)(i + 4)),
39 *((int *)(i + 4*2)), *((int *)(i + 4*3)),
40 *((int *)(i + 4*4)), *((int *)(i + 4*5)),
41 *((int *)(i + 4*6)), *((int *)(i + 4*7)),
42 *((int *)(i + 4*8)), *((int *)(i + 4*9)),
43 *((int *)(i + 4*10)), *((int *)(i + 4*11)),
44 *((int *)(i + 4*12)), *((int *)(i + 4*13)),
45 *((int *)(i + 4*14)), *((int *)(i + 4*15)));
48 bool advanceblock = false;
50 for(i=gcbaseva; (unsigned int)i<(unsigned int)(gcbaseva+BAMBOO_SHARED_MEM_SIZE); i+=4*16) {
52 // computing sblock # and block #, core coordinate (x,y) also
53 if(j%((BAMBOO_SMEM_SIZE)/(4*16)) == 0) {
55 if(j < ((BAMBOO_LARGE_SMEM_BOUND)/(4*16))) {
56 if((j > 0) && (j%((BAMBOO_SMEM_SIZE_L)/(4*16)) == 0)) {
68 coren = gc_block2core[block%(NUMCORES4GC*2)];
70 // compute core coordinate
71 x = BAMBOO_COORDS_X(coren);
72 y = BAMBOO_COORDS_Y(coren);
73 printf("(%x,%x) ==== %d, %d : core (%d,%d), saddr %x====\n",
74 udn_tile_coord_x(), udn_tile_coord_y(),block, sblock++, x, y,
75 (sblock-1)*(BAMBOO_SMEM_SIZE)+gcbaseva);
78 printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
79 udn_tile_coord_x(), udn_tile_coord_y(),
80 *((int *)(i)), *((int *)(i + 4)),
81 *((int *)(i + 4*2)), *((int *)(i + 4*3)),
82 *((int *)(i + 4*4)), *((int *)(i + 4*5)),
83 *((int *)(i + 4*6)), *((int *)(i + 4*7)),
84 *((int *)(i + 4*8)), *((int *)(i + 4*9)),
85 *((int *)(i + 4*10)), *((int *)(i + 4*11)),
86 *((int *)(i + 4*12)), *((int *)(i + 4*13)),
87 *((int *)(i + 4*14)), *((int *)(i + 4*15)));
89 printf("(%x,%x) \n", udn_tile_coord_x(), udn_tile_coord_y());
93 bool gc_checkCoreStatus() {
94 for(int i = 0; i < NUMCORES4GC; i++) {
102 void gc_resetCoreStatus() {
103 for(int i = 0; i < NUMCORES4GC; i++) {
109 void initmulticoregcdata() {
112 bamboo_smem_zero_top = NULL;
114 gc_status_info.gcprocessing = false;
115 gc_status_info.gcphase = FINISHPHASE;
118 gcforwardobjtbl = allocateMGCHash_I(128);
120 gc_profile_flag = false;
123 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
124 allocationinfo.blocktable=RUNMALLOC(sizeof(struct blockrecord)*GCNUMBLOCK);
125 for(int i=0; i<GCNUMBLOCK;i++) {
127 allocationinfo.blocktable[i].corenum=0;
129 allocationinfo.blocktable[i].corenum=gc_block2core[(i%(NUMCORES4GC*2))];
130 allocationinfo.blocktable[i].status=BS_FREE;
131 allocationinfo.blocktable[i].usedspace=0;
132 allocationinfo.blocktable[i].freespace=GLOBALBLOCKSIZE(i);
137 //initialize update structures
139 for(int i=0;i<NUMCORES4GC;i++) {
140 origblockarray[i]=NULL;
143 INIT_MULTICORE_GCPROFILE_DATA();
146 void dismulticoregcdata() {
147 freeMGCHash(gcforwardobjtbl);
151 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
152 for(int i = 0; i < NUMCORES4GC; i++) {
154 gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
155 gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
157 gcrequiredmems[i] = 0;
159 for(int i = NUMCORES4GC; i < NUMCORESACTIVE; i++) {
161 gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
162 gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
164 gcnumsrobjs_index = 0;
166 gcself_numsendobjs = 0;
167 gcself_numreceiveobjs = 0;
173 update_origblockptr=NULL;
176 MGCHashreset(gcforwardobjtbl);
179 gc_output_cache_policy_time=0;
182 void checkMarkStatus_p2() {
183 // tprintf("Check mark status 2\n");
184 // check if the sum of send objs and receive obj are the same
185 // yes->check if the info is the latest; no->go on executing
186 unsigned int sumsendobj = 0;
187 for(int i = 0; i < NUMCORESACTIVE; i++) {
188 sumsendobj += gcnumsendobjs[gcnumsrobjs_index][i];
190 for(int i = 0; i < NUMCORESACTIVE; i++) {
191 sumsendobj -= gcnumreceiveobjs[gcnumsrobjs_index][i];
193 if(0 == sumsendobj) {
194 // Check if there are changes of the numsendobjs or numreceiveobjs
197 for(i = 0; i < NUMCORESACTIVE; i++) {
198 if((gcnumsendobjs[0][i]!=gcnumsendobjs[1][i])||(gcnumreceiveobjs[0][i]!=gcnumreceiveobjs[1][i]) ) {
202 if(i == NUMCORESACTIVE) {
203 //tprintf("Mark terminated\n");
204 // all the core status info are the latest,stop mark phase
205 gc_status_info.gcphase = COMPACTPHASE;
206 // restore the gcstatus for all cores
207 for(int i = 0; i < NUMCORESACTIVE; i++) {
211 // There were changes between phase 1 and phase 2, can not decide
212 // whether the mark phase has been finished
214 // As it fails in phase 2, flip the entries
215 gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
218 // There were changes between phase 1 and phase 2, can not decide
219 // whether the mark phase has been finished
221 // As it fails in phase 2, flip the entries
222 gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
226 void checkMarkStatus() {
227 // tprintf("Check mark status\n");
228 if((!waitconfirm)||(waitconfirm && (numconfirm == 0))) {
229 unsigned int entry_index = 0;
232 entry_index = (gcnumsrobjs_index == 0) ? 1 : 0;
235 entry_index = gcnumsrobjs_index;
237 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
238 // check the status of all cores
239 if (gc_checkCoreStatus()) {
242 // the first time found all cores stall
243 // send out status confirm msg to all other cores
244 // reset the corestatus array too
246 numconfirm = NUMCORESACTIVE - 1;
247 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
248 GC_SEND_MSG_1_TO_CLIENT(GCMARKCONFIRM);
251 checkMarkStatus_p2();
252 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
255 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
260 // compute load balance for all cores
262 // compute load balance
263 // get the total loads
265 unsigned int tloads = 0;
266 for(int i = 0; i < NUMCORES4GC; i++) {
267 tloads += gcloads[i];
268 //tprintf("load: %d %d \n", gcloads[i], i);
270 heaptop = gcbaseva + tloads;
272 unsigned int topblockindex;
274 BLOCKINDEX(topblockindex, heaptop);
275 // num of blocks per core
276 unsigned int numbpc = (topblockindex+NUMCORES4GC-1)/NUMCORES4GC;
281 void gc_collect(struct garbagelist * stackptr) {
282 gc_status_info.gcprocessing = true;
283 // inform the master that this core is at a gc safe point and is ready to
285 send_msg_4(STARTUPCORE,GCFINISHPRE,BAMBOO_NUM_OF_CORE,self_numsendobjs,self_numreceiveobjs);
287 // invalidate all shared mem pointers
288 bamboo_cur_msp = NULL;
289 bamboo_smem_size = 0;
290 bamboo_smem_zero_top = NULL;
294 // core collector routine
295 //wait for init phase
296 WAITFORGCPHASE(INITPHASE);
298 GC_PRINTF("Do initGC\n");
301 //send init finish msg to core coordinator
302 send_msg_2(STARTUPCORE,GCFINISHINIT,BAMBOO_NUM_OF_CORE);
304 //wait for mark phase
305 WAITFORGCPHASE(MARKPHASE);
307 GC_PRINTF("Start mark phase\n");
309 GC_PRINTF("Finish mark phase, start compact phase\n");
311 GC_PRINTF("Finish compact phase\n");
313 WAITFORGCPHASE(UPDATEPHASE);
315 GC_PRINTF("Start update phase\n");
316 GCPROFILE_INFO_2_MASTER();
318 GC_PRINTF("Finish update phase\n");
320 CACHEADAPT_PHASE_CLIENT();
322 WAITFORGCPHASE(FINISHPHASE);
324 GC_PRINTF("Finish gc! \n");
327 void gc_nocollect(struct garbagelist * stackptr) {
328 gc_status_info.gcprocessing = true;
329 // inform the master that this core is at a gc safe point and is ready to
331 send_msg_4(STARTUPCORE,GCFINISHPRE,BAMBOO_NUM_OF_CORE,self_numsendobjs,self_numreceiveobjs);
333 // invalidate all shared mem pointers
334 bamboo_cur_msp = NULL;
335 bamboo_smem_size = 0;
336 bamboo_smem_zero_top = NULL;
339 WAITFORGCPHASE(INITPHASE);
341 GC_PRINTF("Do initGC\n");
345 //send init finish msg to core coordinator
346 send_msg_2(STARTUPCORE,GCFINISHINIT,BAMBOO_NUM_OF_CORE);
349 WAITFORGCPHASE(MARKPHASE);
351 GC_PRINTF("Start mark phase\n");
353 GC_PRINTF("Finish mark phase, wait for update\n");
355 // non-gc core collector routine
356 WAITFORGCPHASE(UPDATEPHASE);
358 GC_PRINTF("Start update phase\n");
359 GCPROFILE_INFO_2_MASTER();
361 GC_PRINTF("Finish update phase\n");
363 CACHEADAPT_PHASE_CLIENT();
365 WAITFORGCPHASE(FINISHPHASE);
367 GC_PRINTF("Finish gc! \n");
370 void master_mark(struct garbagelist *stackptr) {
372 GC_PRINTF("Start mark phase \n");
373 gc_status_info.gcphase = MARKPHASE;
374 GC_SEND_MSG_1_TO_CLIENT(GCSTART);
380 void master_getlargeobjs() {
381 // send msgs to all cores requiring large objs info
382 // Note: only need to ask gc cores, non-gc cores do not host any objs
383 numconfirm = NUMCORES4GC - 1;
384 for(int i = 1; i < NUMCORES4GC; i++) {
385 send_msg_1(i,GCLOBJREQUEST);
387 gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
388 //spin until we have all responses
389 while(numconfirm!=0) ;
391 GCPROFILE_ITEM_MASTER();
392 GC_PRINTF("prepare to cache large objs \n");
397 void master_updaterefs(struct garbagelist * stackptr) {
398 gc_status_info.gcphase = UPDATEPHASE;
399 GC_SEND_MSG_1_TO_CLIENT(GCSTARTUPDATE);
400 GC_PRINTF("Start update phase \n");
403 GC_CHECK_ALL_CORE_STATUS();
404 GC_PRINTF("Finish update phase \n");
407 void master_finish() {
408 gc_status_info.gcphase = FINISHPHASE;
410 // invalidate all shared mem pointers
411 // put it here as it takes time to inform all the other cores to
412 // finish gc and it might cause problem when some core resumes
413 // mutator earlier than the other cores
414 bamboo_cur_msp = NULL;
415 bamboo_smem_size = 0;
416 bamboo_smem_zero_top = NULL;
418 GCPROFILE_END_MASTER();
419 unsigned long long tmpt = BAMBOO_GET_EXE_TIME();
420 CACHEADAPT_OUTPUT_CACHE_POLICY();
421 gc_output_cache_policy_time += (BAMBOO_GET_EXE_TIME()-tmpt);
424 GC_SEND_MSG_1_TO_CLIENT(GCFINISH);
425 gc_status_info.gcprocessing = false;
428 // inform other cores to stop and wait for gc
429 GC_PRINTF("Back to Back gc case\n");
431 for(int i = 0; i < NUMCORESACTIVE; i++) {
432 // reuse the gcnumsendobjs & gcnumreceiveobjs
433 gcnumsendobjs[0][i] = 0;
434 gcnumreceiveobjs[0][i] = 0;
436 GC_SEND_MSG_1_TO_CLIENT(GCSTARTPRE);
440 void gc_master(struct garbagelist * stackptr) {
441 tprintf("start GC!\n");
442 gc_status_info.gcprocessing = true;
443 gc_status_info.gcphase = INITPHASE;
446 GC_SEND_MSG_1_TO_CLIENT(GCSTARTINIT);
451 //tprintf("Check core status \n");
452 GC_CHECK_ALL_CORE_STATUS();
453 unsigned long long tmpt = BAMBOO_GET_EXE_TIME();
454 CACHEADAPT_OUTPUT_CACHE_SAMPLING();
455 gc_output_cache_policy_time += (BAMBOO_GET_EXE_TIME()-tmpt);
456 //tprintf("start mark phase\n");
458 GCPROFILE_ITEM_MASTER();
459 master_mark(stackptr);
460 GCPROFILE_ITEM_MASTER();
461 //tprintf("finish mark phase\n");
462 // get large objects from all cores
463 master_getlargeobjs();
464 //tprintf("start compact phase\n");
467 //tprintf("start update phase\n");
468 // update the references
469 master_updaterefs(stackptr);
470 //tprintf("gc master finished update \n");
471 // do cache adaptation
472 CACHEADAPT_PHASE_MASTER();
473 //tprintf("finish cachdapt phase\n");
474 // do finish up stuff
476 for(int i=0;i<GCNUMBLOCK;i++) {
477 struct blockrecord *record=&allocationinfo.blocktable[i];
478 tprintf("%u. used=%u free=%u corenum=%u status=%u, base=%x, ptr=%x\n", i, record->usedspace, record->freespace, record->corenum, record->status, gcbaseva+OFFSET2BASEVA(i), (gcbaseva+OFFSET2BASEVA(i)+record->usedspace));
484 //tprintf("finish GC ! %d \n",gcflag);
489 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
490 gcnumsendobjs[0][BAMBOO_NUM_OF_CORE] = self_numsendobjs;
491 gcnumreceiveobjs[0][BAMBOO_NUM_OF_CORE] = self_numreceiveobjs;
493 for(int i = 0; i < NUMCORESACTIVE; i++) {
494 sumsendobj += gcnumsendobjs[0][i];
496 for(int i = 0; i < NUMCORESACTIVE; i++) {
497 sumsendobj -= gcnumreceiveobjs[0][i];
499 if(0 != sumsendobj) {
500 // there were still some msgs on the fly, wait until there
501 // are some update pregc information coming and check it again
503 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
507 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
513 void pregcprocessing() {
514 #if defined(GC_CACHE_ADAPT)&&defined(GC_CACHE_SAMPLING)&&(defined(GC_CACHE_ADAPT_POLICY4)||defined(GC_CACHE_ADAPT_POLICY3))
515 // disable the timer interrupt
516 bamboo_mask_timer_intr();
517 // get the sampling data
518 bamboo_output_dtlb_sampling();
522 void postgcprocessing() {
523 #if defined(GC_CACHE_ADAPT)&&defined(GC_CACHE_SAMPLING)&&(defined(GC_CACHE_ADAPT_POLICY4)||defined(GC_CACHE_ADAPT_POLICY3))
524 // enable the timer interrupt
525 bamboo_tile_timer_set_next_event(GC_TILE_TIMER_EVENT_SETTING);
526 bamboo_unmask_timer_intr();
527 //turn on sampling again
528 bamboo_dtlb_sampling_init();
532 bool gc(struct garbagelist * stackptr) {
535 gc_status_info.gcprocessing = false;
539 profile_start(GC_REGION);
542 // core coordinator routine
543 if(0 == BAMBOO_NUM_OF_CORE) {
544 GC_PRINTF("start gc! \n");
545 GCPROFILE_START_MASTER();
546 unsigned long long thisgctime = BAMBOO_GET_EXE_TIME();
548 GC_PRINTF("Check if we can do gc or not\n");
549 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
552 //wait for other cores to catch up
553 while(!gc_checkCoreStatus())
558 GCtime = BAMBOO_GET_EXE_TIME() - thisgctime;
560 } else if(BAMBOO_NUM_OF_CORE < NUMCORES4GC) {
561 GC_PRINTF("Core reporting for gc.\n");
563 gc_collect(stackptr);
566 gc_nocollect(stackptr);
570 profile_start(APP_REGION);