3 #include "multicoreruntime.h"
4 #include "multicoregarbage.h"
5 #include "multicoregcmark.h"
6 #include "multicoregccompact.h"
7 #include "multicoregcflush.h"
8 #include "multicoregcprofile.h"
10 #include "multicoremem_helper.h"
13 gc_status_t gc_status_info;
15 unsigned long long gc_output_cache_policy_time=0;
18 // dump whole mem in blocks
27 printf("(%x,%x) Dump shared mem: \n",udn_tile_coord_x(),udn_tile_coord_y());
28 // reserved blocks for sblocktbl
29 printf("(%x,%x) ++++ reserved sblocks ++++ \n", udn_tile_coord_x(),
31 for(i=BAMBOO_BASE_VA; i<gcbaseva; i+= 4*16) {
32 printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
33 udn_tile_coord_x(), udn_tile_coord_y(),
34 *((int *)(i)), *((int *)(i + 4)),
35 *((int *)(i + 4*2)), *((int *)(i + 4*3)),
36 *((int *)(i + 4*4)), *((int *)(i + 4*5)),
37 *((int *)(i + 4*6)), *((int *)(i + 4*7)),
38 *((int *)(i + 4*8)), *((int *)(i + 4*9)),
39 *((int *)(i + 4*10)), *((int *)(i + 4*11)),
40 *((int *)(i + 4*12)), *((int *)(i + 4*13)),
41 *((int *)(i + 4*14)), *((int *)(i + 4*15)));
44 bool advanceblock = false;
46 for(i=gcbaseva; (unsigned int)i<(unsigned int)(gcbaseva+BAMBOO_SHARED_MEM_SIZE); i+=4*16) {
48 // computing sblock # and block #, core coordinate (x,y) also
49 if(j%((BAMBOO_SMEM_SIZE)/(4*16)) == 0) {
51 if(j < ((BAMBOO_LARGE_SMEM_BOUND)/(4*16))) {
52 if((j > 0) && (j%((BAMBOO_SMEM_SIZE_L)/(4*16)) == 0)) {
64 coren = gc_block2core[block%(NUMCORES4GC*2)];
66 // compute core coordinate
67 x = BAMBOO_COORDS_X(coren);
68 y = BAMBOO_COORDS_Y(coren);
69 printf("(%x,%x) ==== %d, %d : core (%d,%d), saddr %x====\n",
70 udn_tile_coord_x(), udn_tile_coord_y(),block, sblock++, x, y,
71 (sblock-1)*(BAMBOO_SMEM_SIZE)+gcbaseva);
74 printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
75 udn_tile_coord_x(), udn_tile_coord_y(),
76 *((int *)(i)), *((int *)(i + 4)),
77 *((int *)(i + 4*2)), *((int *)(i + 4*3)),
78 *((int *)(i + 4*4)), *((int *)(i + 4*5)),
79 *((int *)(i + 4*6)), *((int *)(i + 4*7)),
80 *((int *)(i + 4*8)), *((int *)(i + 4*9)),
81 *((int *)(i + 4*10)), *((int *)(i + 4*11)),
82 *((int *)(i + 4*12)), *((int *)(i + 4*13)),
83 *((int *)(i + 4*14)), *((int *)(i + 4*15)));
85 printf("(%x,%x) \n", udn_tile_coord_x(), udn_tile_coord_y());
89 void initmulticoregcdata() {
90 bamboo_smem_zero_top = NULL;
92 gc_status_info.gcprocessing = false;
93 gc_status_info.gcphase = FINISHPHASE;
96 gcforwardobjtbl = allocateMGCHash_I(128);
98 gc_profile_flag = false;
100 #ifdef GC_CACHE_ADAPT
101 gccachestage = false;
104 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
105 allocationinfo.blocktable=RUNMALLOC(sizeof(struct blockrecord)*GCNUMBLOCK);
106 for(int i=0; i<GCNUMBLOCK;i++) {
108 allocationinfo.blocktable[i].corenum=0;
110 allocationinfo.blocktable[i].corenum=gc_block2core[(i%(NUMCORES4GC*2))];
111 allocationinfo.blocktable[i].status=BS_FREE;
112 allocationinfo.blocktable[i].usedspace=0;
113 allocationinfo.blocktable[i].freespace=GLOBALBLOCKSIZE(i);
118 //initialize update structures
120 for(int i=0;i<NUMCORES4GC;i++) {
121 origblockarray[i]=NULL;
124 INIT_MULTICORE_GCPROFILE_DATA();
127 void dismulticoregcdata() {
128 freeMGCHash(gcforwardobjtbl);
132 if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
133 for(int i = 0; i < NUMCORES4GC; i++) {
135 gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
136 gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
138 gcrequiredmems[i] = 0;
140 for(int i = NUMCORES4GC; i < NUMCORESACTIVE; i++) {
142 gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
143 gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
145 gcnumsrobjs_index = 0;
147 gcself_numsendobjs = 0;
148 gcself_numreceiveobjs = 0;
154 update_origblockptr=NULL;
157 MGCHashreset(gcforwardobjtbl);
160 gc_output_cache_policy_time=0;
163 void checkMarkStatus_p2() {
164 // check if the sum of send objs and receive obj are the same
165 // yes->check if the info is the latest; no->go on executing
166 unsigned int sumsendobj = 0;
167 for(int i = 0; i < NUMCORESACTIVE; i++) {
168 sumsendobj += gcnumsendobjs[gcnumsrobjs_index][i];
170 for(int i = 0; i < NUMCORESACTIVE; i++) {
171 sumsendobj -= gcnumreceiveobjs[gcnumsrobjs_index][i];
173 if(0 == sumsendobj) {
174 // Check if there are changes of the numsendobjs or numreceiveobjs
177 for(i = 0; i < NUMCORESACTIVE; i++) {
178 if((gcnumsendobjs[0][i]!=gcnumsendobjs[1][i])||(gcnumreceiveobjs[0][i]!=gcnumreceiveobjs[1][i]) ) {
182 if(i == NUMCORESACTIVE) {
183 // all the core status info are the latest,stop mark phase
184 gc_status_info.gcphase = COMPACTPHASE;
185 // restore the gcstatus for all cores
186 for(int i = 0; i < NUMCORESACTIVE; i++) {
190 // There were changes between phase 1 and phase 2, can not decide
191 // whether the mark phase has been finished
193 // As it fails in phase 2, flip the entries
194 gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
197 // There were changes between phase 1 and phase 2, can not decide
198 // whether the mark phase has been finished
200 // As it fails in phase 2, flip the entries
201 gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
205 void checkMarkStatus() {
206 if((!waitconfirm)||(waitconfirm && (numconfirm == 0))) {
207 unsigned int entry_index = 0;
210 entry_index = (gcnumsrobjs_index == 0) ? 1 : 0;
213 entry_index = gcnumsrobjs_index;
215 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
216 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
217 gcnumsendobjs[entry_index][BAMBOO_NUM_OF_CORE] = gcself_numsendobjs;
218 gcnumreceiveobjs[entry_index][BAMBOO_NUM_OF_CORE] = gcself_numreceiveobjs;
219 // check the status of all cores
220 if (gc_checkCoreStatus()) {
223 // the first time found all cores stall
224 // send out status confirm msg to all other cores
225 // reset the corestatus array too
227 numconfirm = NUMCORESACTIVE - 1;
228 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
229 GC_SEND_MSG_1_TO_CLIENT(GCMARKCONFIRM);
232 checkMarkStatus_p2();
233 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
236 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
241 // compute load balance for all cores
243 // compute load balance
244 // get the total loads
246 unsigned int tloads = 0;
247 for(int i = 0; i < NUMCORES4GC; i++) {
248 tloads += gcloads[i];
250 heaptop = gcbaseva + tloads;
252 unsigned int topblockindex;
254 BLOCKINDEX(topblockindex, heaptop);
255 // num of blocks per core
256 unsigned int numbpc = (topblockindex+NUMCORES4GC-1)/NUMCORES4GC;
261 void gc_collect(struct garbagelist * stackptr) {
262 gc_status_info.gcprocessing = true;
263 // inform the master that this core is at a gc safe point and is ready to
265 send_msg_4(STARTUPCORE,GCFINISHPRE,BAMBOO_NUM_OF_CORE,self_numsendobjs,self_numreceiveobjs);
267 // core collector routine
268 //wait for init phase
269 WAITFORGCPHASE(INITPHASE);
271 GC_PRINTF("Do initGC\n");
274 //send init finish msg to core coordinator
275 send_msg_2(STARTUPCORE,GCFINISHINIT,BAMBOO_NUM_OF_CORE);
277 //wait for mark phase
278 WAITFORGCPHASE(MARKPHASE);
280 GC_PRINTF("Start mark phase\n");
282 GC_PRINTF("Finish mark phase, start compact phase\n");
284 GC_PRINTF("Finish compact phase\n");
286 WAITFORGCPHASE(UPDATEPHASE);
288 GC_PRINTF("Start flush phase\n");
289 GCPROFILE_INFO_2_MASTER();
291 GC_PRINTF("Finish flush phase\n");
293 CACHEADAPT_PHASE_CLIENT();
295 // invalidate all shared mem pointers
296 bamboo_cur_msp = NULL;
297 bamboo_smem_size = 0;
298 bamboo_smem_zero_top = NULL;
301 WAITFORGCPHASE(FINISHPHASE);
303 GC_PRINTF("Finish gc! \n");
306 void gc_nocollect(struct garbagelist * stackptr) {
307 gc_status_info.gcprocessing = true;
308 // inform the master that this core is at a gc safe point and is ready to
310 send_msg_4(STARTUPCORE,GCFINISHPRE,BAMBOO_NUM_OF_CORE,self_numsendobjs,self_numreceiveobjs);
312 WAITFORGCPHASE(INITPHASE);
314 GC_PRINTF("Do initGC\n");
317 //send init finish msg to core coordinator
318 send_msg_2(STARTUPCORE,GCFINISHINIT,BAMBOO_NUM_OF_CORE);
320 WAITFORGCPHASE(MARKPHASE);
322 GC_PRINTF("Start mark phase\n");
324 GC_PRINTF("Finish mark phase, wait for flush\n");
326 // non-gc core collector routine
327 WAITFORGCPHASE(UPDATEPHASE);
329 GC_PRINTF("Start flush phase\n");
330 GCPROFILE_INFO_2_MASTER();
332 GC_PRINTF("Finish flush phase\n");
334 CACHEADAPT_PHASE_CLIENT();
336 // invalidate all shared mem pointers
337 bamboo_cur_msp = NULL;
338 bamboo_smem_size = 0;
339 bamboo_smem_zero_top = NULL;
342 WAITFORGCPHASE(FINISHPHASE);
344 GC_PRINTF("Finish gc! \n");
347 void master_mark(struct garbagelist *stackptr) {
349 GC_PRINTF("Start mark phase \n");
350 GC_SEND_MSG_1_TO_CLIENT(GCSTART);
351 gc_status_info.gcphase = MARKPHASE;
357 void master_getlargeobjs() {
358 // send msgs to all cores requiring large objs info
359 // Note: only need to ask gc cores, non-gc cores do not host any objs
360 numconfirm = NUMCORES4GC - 1;
361 for(int i = 1; i < NUMCORES4GC; i++) {
362 send_msg_1(i,GCLOBJREQUEST);
364 gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
365 //spin until we have all responses
366 while(numconfirm!=0) ;
369 GC_PRINTF("prepare to cache large objs \n");
374 void master_updaterefs(struct garbagelist * stackptr) {
375 gc_status_info.gcphase = UPDATEPHASE;
376 GC_SEND_MSG_1_TO_CLIENT(GCSTARTUPDATE);
378 GC_PRINTF("Start flush phase \n");
381 GC_CHECK_ALL_CORE_STATUS(UPDATEPHASE==gc_status_info.gcphase);
382 GC_PRINTF("Finish flush phase \n");
385 void master_finish() {
386 gc_status_info.gcphase = FINISHPHASE;
388 // invalidate all shared mem pointers
389 // put it here as it takes time to inform all the other cores to
390 // finish gc and it might cause problem when some core resumes
391 // mutator earlier than the other cores
392 bamboo_cur_msp = NULL;
393 bamboo_smem_size = 0;
394 bamboo_smem_zero_top = NULL;
397 unsigned long long tmpt = BAMBOO_GET_EXE_TIME();
398 CACHEADAPT_OUTPUT_CACHE_POLICY();
399 gc_output_cache_policy_time += (BAMBOO_GET_EXE_TIME()-tmpt);
401 GC_SEND_MSG_1_TO_CLIENT(GCFINISH);
403 gc_status_info.gcprocessing = false;
405 // inform other cores to stop and wait for gc
407 for(int i = 0; i < NUMCORESACTIVE; i++) {
408 // reuse the gcnumsendobjs & gcnumreceiveobjs
409 gcnumsendobjs[0][i] = 0;
410 gcnumreceiveobjs[0][i] = 0;
412 GC_SEND_MSG_1_TO_CLIENT(GCSTARTPRE);
416 void gc_master(struct garbagelist * stackptr) {
417 tprintf("start GC !!!!!!!!!!!!! \n");
418 gc_status_info.gcprocessing = true;
419 gc_status_info.gcphase = INITPHASE;
424 GC_SEND_MSG_1_TO_CLIENT(GCSTARTINIT);
426 GC_PRINTF("Check core status \n");
427 GC_CHECK_ALL_CORE_STATUS(true);
429 unsigned long long tmpt = BAMBOO_GET_EXE_TIME();
430 CACHEADAPT_OUTPUT_CACHE_SAMPLING();
431 gc_output_cache_policy_time += (BAMBOO_GET_EXE_TIME()-tmpt);
434 master_mark(stackptr);
436 // get large objects from all cores
437 master_getlargeobjs();
442 // update the references
443 master_updaterefs(stackptr);
445 // do cache adaptation
446 CACHEADAPT_PHASE_MASTER();
448 // do finish up stuff
451 GC_PRINTF("gc finished \n");
452 tprintf("finish GC ! %d \n",gcflag);
457 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
458 gcnumsendobjs[0][BAMBOO_NUM_OF_CORE] = self_numsendobjs;
459 gcnumreceiveobjs[0][BAMBOO_NUM_OF_CORE] = self_numreceiveobjs;
461 for(int i = 0; i < NUMCORESACTIVE; i++) {
462 sumsendobj += gcnumsendobjs[0][i];
464 for(int i = 0; i < NUMCORESACTIVE; i++) {
465 sumsendobj -= gcnumreceiveobjs[0][i];
467 if(0 != sumsendobj) {
468 // there were still some msgs on the fly, wait until there
469 // are some update pregc information coming and check it again
471 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
475 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
481 void pregcprocessing() {
482 #if defined(GC_CACHE_ADAPT)&&defined(GC_CACHE_SAMPLING)
483 // disable the timer interrupt
484 bamboo_mask_timer_intr();
486 #if defined(GC_CACHE_ADAPT)&&defined(GC_CACHE_SAMPLING)
487 // get the sampling data
488 bamboo_output_dtlb_sampling();
492 void postgcprocessing() {
493 #if defined(GC_CACHE_ADAPT)&&defined(GC_CACHE_SAMPLING)
494 // enable the timer interrupt
495 bamboo_tile_timer_set_next_event(GC_TILE_TIMER_EVENT_SETTING);
496 bamboo_unmask_timer_intr();
500 bool gc(struct garbagelist * stackptr) {
503 gc_status_info.gcprocessing = false;
507 // core coordinator routine
508 if(0 == BAMBOO_NUM_OF_CORE) {
509 GC_PRINTF("Check if we can do gc or not\n");
510 gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
511 if(!gc_checkCoreStatus()) {
512 // some of the cores are still executing the mutator and did not reach
513 // some gc safe point, therefore it is not ready to do gc
520 GC_PRINTF("start gc! \n");
523 } else if(BAMBOO_NUM_OF_CORE < NUMCORES4GC) {
525 gc_collect(stackptr);
528 gc_nocollect(stackptr);