3 #include "multicoreruntime.h"
4 #include "multicoregarbage.h"
5 #include "multicoregcmark.h"
6 #include "multicoregccompact.h"
7 #include "multicoregcflush.h"
8 #include "multicoregcprofile.h"
10 #include "multicoremem_helper.h"
13 gc_status_t gc_status_info;
15 unsigned long long gc_output_cache_policy_time=0;
18 // dump whole mem in blocks
27 printf("(%x,%x) Dump shared mem: \n",udn_tile_coord_x(),udn_tile_coord_y());
28 // reserved blocks for sblocktbl
29 printf("(%x,%x) ++++ reserved sblocks ++++ \n", udn_tile_coord_x(),
31 for(i=BAMBOO_BASE_VA; (unsinged int)i<(unsigned int)gcbaseva; i+= 4*16) {
32 printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
33 udn_tile_coord_x(), udn_tile_coord_y(),
34 *((int *)(i)), *((int *)(i + 4)),
35 *((int *)(i + 4*2)), *((int *)(i + 4*3)),
36 *((int *)(i + 4*4)), *((int *)(i + 4*5)),
37 *((int *)(i + 4*6)), *((int *)(i + 4*7)),
38 *((int *)(i + 4*8)), *((int *)(i + 4*9)),
39 *((int *)(i + 4*10)), *((int *)(i + 4*11)),
40 *((int *)(i + 4*12)), *((int *)(i + 4*13)),
41 *((int *)(i + 4*14)), *((int *)(i + 4*15)));
44 bool advanceblock = false;
46 for(i=gcbaseva; (unsigned int)i<(unsigned int)(gcbaseva+BAMBOO_SHARED_MEM_SIZE); i+=4*16) {
48 // computing sblock # and block #, core coordinate (x,y) also
49 if(j%((BAMBOO_SMEM_SIZE)/(4*16)) == 0) {
51 if(j < ((BAMBOO_LARGE_SMEM_BOUND)/(4*16))) {
52 if((j > 0) && (j%((BAMBOO_SMEM_SIZE_L)/(4*16)) == 0)) {
64 coren = gc_block2core[block%(NUMCORES4GC*2)];
66 // compute core coordinate
67 x = BAMBOO_COORDS_X(coren);
68 y = BAMBOO_COORDS_Y(coren);
69 printf("(%x,%x) ==== %d, %d : core (%d,%d), saddr %x====\n",
70 udn_tile_coord_x(), udn_tile_coord_y(),block, sblock++, x, y,
71 (sblock-1)*(BAMBOO_SMEM_SIZE)+gcbaseva);
74 printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
75 udn_tile_coord_x(), udn_tile_coord_y(),
76 *((int *)(i)), *((int *)(i + 4)),
77 *((int *)(i + 4*2)), *((int *)(i + 4*3)),
78 *((int *)(i + 4*4)), *((int *)(i + 4*5)),
79 *((int *)(i + 4*6)), *((int *)(i + 4*7)),
80 *((int *)(i + 4*8)), *((int *)(i + 4*9)),
81 *((int *)(i + 4*10)), *((int *)(i + 4*11)),
82 *((int *)(i + 4*12)), *((int *)(i + 4*13)),
83 *((int *)(i + 4*14)), *((int *)(i + 4*15)));
85 printf("(%x,%x) \n", udn_tile_coord_x(), udn_tile_coord_y());
89 void initmulticoregcdata() {
90 bamboo_smem_zero_top = NULL;
92 gc_status_info.gcprocessing = false;
93 gc_status_info.gcphase = FINISHPHASE;
96 gcforwardobjtbl = allocateMGCHash_I(128);
98 gc_profile_flag = false;
100 #ifdef GC_CACHE_ADAPT
101 gccachestage = false;
104 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
105 allocationinfo.blocktable=RUNMALLOC(sizeof(struct blockrecord)*GCNUMBLOCK);
106 for(int i=0; i<GCNUMBLOCK;i++) {
108 allocationinfo.blocktable[i].corenum=0;
110 allocationinfo.blocktable[i].corenum=gc_block2core[(i%(NUMCORES4GC*2))];
115 INIT_MULTICORE_GCPROFILE_DATA();
118 void dismulticoregcdata() {
119 freeMGCHash(gcforwardobjtbl);
123 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
124 for(int i = 0; i < NUMCORES4GC; i++) {
126 gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
127 gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
129 gcrequiredmems[i] = 0;
131 for(int i = NUMCORES4GC; i < NUMCORESACTIVE; i++) {
133 gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
134 gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
136 gcnumsrobjs_index = 0;
138 gcself_numsendobjs = 0;
139 gcself_numreceiveobjs = 0;
148 MGCHashreset(gcforwardobjtbl);
151 gc_output_cache_policy_time=0;
154 bool gc_checkAllCoreStatus() {
155 for(int i = 0; i < NUMCORESACTIVE; i++) {
156 if(gccorestatus[i] != 0) {
163 // NOTE: should be invoked with interrupts turned off
164 bool gc_checkAllCoreStatus_I() {
165 for(int i = 0; i < NUMCORESACTIVE; i++) {
166 if(gccorestatus[i] != 0) {
173 void checkMarkStatus_p2() {
174 // check if the sum of send objs and receive obj are the same
175 // yes->check if the info is the latest; no->go on executing
176 unsigned int sumsendobj = 0;
177 for(int i = 0; i < NUMCORESACTIVE; i++) {
178 sumsendobj += gcnumsendobjs[gcnumsrobjs_index][i];
180 for(int i = 0; i < NUMCORESACTIVE; i++) {
181 sumsendobj -= gcnumreceiveobjs[gcnumsrobjs_index][i];
183 if(0 == sumsendobj) {
184 // Check if there are changes of the numsendobjs or numreceiveobjs
187 for(i = 0; i < NUMCORESACTIVE; i++) {
188 if((gcnumsendobjs[0][i]!=gcnumsendobjs[1][i])||(gcnumreceiveobjs[0][i]!=gcnumreceiveobjs[1][i]) ) {
192 if(i == NUMCORESACTIVE) {
193 // all the core status info are the latest,stop mark phase
194 gc_status_info.gcphase = COMPACTPHASE;
195 // restore the gcstatus for all cores
196 for(int i = 0; i < NUMCORESACTIVE; i++) {
200 // There were changes between phase 1 and phase 2, can not decide
201 // whether the mark phase has been finished
203 // As it fails in phase 2, flip the entries
204 gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
207 // There were changes between phase 1 and phase 2, can not decide
208 // whether the mark phase has been finished
210 // As it fails in phase 2, flip the entries
211 gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
215 void checkMarkStatus() {
216 if((!waitconfirm)||(waitconfirm && (numconfirm == 0))) {
217 unsigned int entry_index = 0;
220 entry_index = (gcnumsrobjs_index == 0) ? 1 : 0;
223 entry_index = gcnumsrobjs_index;
225 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
226 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
227 gcnumsendobjs[entry_index][BAMBOO_NUM_OF_CORE] = gcself_numsendobjs;
228 gcnumreceiveobjs[entry_index][BAMBOO_NUM_OF_CORE] = gcself_numreceiveobjs;
229 // check the status of all cores
230 if (gc_checkAllCoreStatus_I()) {
233 // the first time found all cores stall
234 // send out status confirm msg to all other cores
235 // reset the corestatus array too
237 numconfirm = NUMCORESACTIVE - 1;
238 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
239 GC_SEND_MSG_1_TO_CLIENT(GCMARKCONFIRM);
242 checkMarkStatus_p2();
243 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
246 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
251 // compute load balance for all cores
253 // compute load balance
254 // get the total loads
256 unsigned int tloads = 0;
257 for(int i = 0; i < NUMCORES4GC; i++) {
258 tloads += gcloads[i];
260 heaptop = gcbaseva + tloads;
262 unsigned int topblockindex;
264 BLOCKINDEX(topblockindex, heaptop);
265 // num of blocks per core
266 unsigned int numbpc = (topblockindex+NUMCORES4GC-1)/NUMCORES4GC;
271 void gc_collect(struct garbagelist * stackptr) {
272 gc_status_info.gcprocessing = true;
273 // inform the master that this core is at a gc safe point and is ready to
275 send_msg_4(STARTUPCORE,GCFINISHPRE,BAMBOO_NUM_OF_CORE,self_numsendobjs,self_numreceiveobjs);
277 // core collector routine
278 //wait for init phase
279 WAITFORGCPHASE(INITPHASE);
281 GC_PRINTF("Do initGC\n");
284 //send init finish msg to core coordinator
285 send_msg_2(STARTUPCORE,GCFINISHINIT,BAMBOO_NUM_OF_CORE);
287 //wait for mark phase
288 WAITFORGCPHASE(MARKPHASE);
290 GC_PRINTF("Start mark phase\n");
292 GC_PRINTF("Finish mark phase, start compact phase\n");
294 GC_PRINTF("Finish compact phase\n");
296 WAITFORGCPHASE(UPDATEPHASE);
298 GC_PRINTF("Start flush phase\n");
299 GCPROFILE_INFO_2_MASTER();
301 GC_PRINTF("Finish flush phase\n");
303 CACHEADAPT_PHASE_CLIENT();
305 // invalidate all shared mem pointers
306 bamboo_cur_msp = NULL;
307 bamboo_smem_size = 0;
308 bamboo_smem_zero_top = NULL;
311 WAITFORGCPHASE(FINISHPHASE);
313 GC_PRINTF("Finish gc! \n");
316 void gc_nocollect(struct garbagelist * stackptr) {
317 gc_status_info.gcprocessing = true;
318 // inform the master that this core is at a gc safe point and is ready to
320 send_msg_4(STARTUPCORE,GCFINISHPRE,BAMBOO_NUM_OF_CORE,self_numsendobjs,self_numreceiveobjs);
322 WAITFORGCPHASE(INITPHASE);
324 GC_PRINTF("Do initGC\n");
327 //send init finish msg to core coordinator
328 send_msg_2(STARTUPCORE,GCFINISHINIT,BAMBOO_NUM_OF_CORE);
330 WAITFORGCPHASE(MARKPHASE);
332 GC_PRINTF("Start mark phase\n");
334 GC_PRINTF("Finish mark phase, wait for flush\n");
336 // non-gc core collector routine
337 WAITFORGCPHASE(UPDATEPHASE);
339 GC_PRINTF("Start flush phase\n");
340 GCPROFILE_INFO_2_MASTER();
342 GC_PRINTF("Finish flush phase\n");
344 CACHEADAPT_PHASE_CLIENT();
346 // invalidate all shared mem pointers
347 bamboo_cur_msp = NULL;
348 bamboo_smem_size = 0;
349 bamboo_smem_zero_top = NULL;
352 WAITFORGCPHASE(FINISHPHASE);
354 GC_PRINTF("Finish gc! \n");
357 void master_mark(struct garbagelist *stackptr) {
359 GC_PRINTF("Start mark phase \n");
360 GC_SEND_MSG_1_TO_CLIENT(GCSTART);
361 gc_status_info.gcphase = MARKPHASE;
367 void master_getlargeobjs() {
368 // send msgs to all cores requiring large objs info
369 // Note: only need to ask gc cores, non-gc cores do not host any objs
370 numconfirm = NUMCORES4GC - 1;
371 for(int i = 1; i < NUMCORES4GC; i++) {
372 send_msg_1(i,GCLOBJREQUEST);
374 gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
375 //spin until we have all responses
376 while(numconfirm!=0) ;
379 GC_PRINTF("prepare to cache large objs \n");
384 void master_updaterefs(struct garbagelist * stackptr) {
385 gc_status_info.gcphase = UPDATEPHASE;
386 GC_SEND_MSG_1_TO_CLIENT(GCSTARTUPDATE);
388 GC_PRINTF("Start flush phase \n");
391 GC_CHECK_ALL_CORE_STATUS(UPDATEPHASE==gc_status_info.gcphase);
392 GC_PRINTF("Finish flush phase \n");
395 void master_finish() {
396 gc_status_info.gcphase = FINISHPHASE;
398 // invalidate all shared mem pointers
399 // put it here as it takes time to inform all the other cores to
400 // finish gc and it might cause problem when some core resumes
401 // mutator earlier than the other cores
402 bamboo_cur_msp = NULL;
403 bamboo_smem_size = 0;
404 bamboo_smem_zero_top = NULL;
407 unsigned long long tmpt = BAMBOO_GET_EXE_TIME();
408 CACHEADAPT_OUTPUT_CACHE_POLICY();
409 gc_output_cache_policy_time += (BAMBOO_GET_EXE_TIME()-tmpt);
411 GC_SEND_MSG_1_TO_CLIENT(GCFINISH);
413 gc_status_info.gcprocessing = false;
415 // inform other cores to stop and wait for gc
417 for(int i = 0; i < NUMCORESACTIVE; i++) {
418 // reuse the gcnumsendobjs & gcnumreceiveobjs
419 gcnumsendobjs[0][i] = 0;
420 gcnumreceiveobjs[0][i] = 0;
422 GC_SEND_MSG_1_TO_CLIENT(GCSTARTPRE);
426 void gc_master(struct garbagelist * stackptr) {
427 tprintf("start GC !!!!!!!!!!!!! \n");
428 gc_status_info.gcprocessing = true;
429 gc_status_info.gcphase = INITPHASE;
434 GC_SEND_MSG_1_TO_CLIENT(GCSTARTINIT);
436 GC_PRINTF("Check core status \n");
437 GC_CHECK_ALL_CORE_STATUS(true);
439 unsigned long long tmpt = BAMBOO_GET_EXE_TIME();
440 CACHEADAPT_OUTPUT_CACHE_SAMPLING();
441 gc_output_cache_policy_time += (BAMBOO_GET_EXE_TIME()-tmpt);
444 master_mark(stackptr);
446 // get large objects from all cores
447 master_getlargeobjs();
452 // update the references
453 master_updaterefs(stackptr);
455 // do cache adaptation
456 CACHEADAPT_PHASE_MASTER();
458 // do finish up stuff
461 GC_PRINTF("gc finished \n");
462 tprintf("finish GC ! %d \n",gcflag);
467 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
468 gcnumsendobjs[0][BAMBOO_NUM_OF_CORE] = self_numsendobjs;
469 gcnumreceiveobjs[0][BAMBOO_NUM_OF_CORE] = self_numreceiveobjs;
471 for(int i = 0; i < NUMCORESACTIVE; i++) {
472 sumsendobj += gcnumsendobjs[0][i];
474 for(int i = 0; i < NUMCORESACTIVE; i++) {
475 sumsendobj -= gcnumreceiveobjs[0][i];
477 if(0 != sumsendobj) {
478 // there were still some msgs on the fly, wait until there
479 // are some update pregc information coming and check it again
481 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
485 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
491 void pregcprocessing() {
492 #if defined(GC_CACHE_ADAPT)&&defined(GC_CACHE_SAMPLING)
493 // disable the timer interrupt
494 bamboo_mask_timer_intr();
496 #if defined(GC_CACHE_ADAPT)&&defined(GC_CACHE_SAMPLING)
497 // get the sampling data
498 bamboo_output_dtlb_sampling();
502 void postgcprocessing() {
503 #if defined(GC_CACHE_ADAPT)&&defined(GC_CACHE_SAMPLING)
504 // enable the timer interrupt
505 bamboo_tile_timer_set_next_event(GC_TILE_TIMER_EVENT_SETTING);
506 bamboo_unmask_timer_intr();
510 bool gc(struct garbagelist * stackptr) {
513 gc_status_info.gcprocessing = false;
517 // core coordinator routine
518 if(0 == BAMBOO_NUM_OF_CORE) {
519 GC_PRINTF("Check if we can do gc or not\n");
520 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
521 if(!gc_checkAllCoreStatus()) {
522 // some of the cores are still executing the mutator and did not reach
523 // some gc safe point, therefore it is not ready to do gc
530 GC_PRINTF("start gc! \n");
533 } else if(BAMBOO_NUM_OF_CORE < NUMCORES4GC) {
535 gc_collect(stackptr);
538 gc_nocollect(stackptr);