Robust/src/Runtime/bamboo/multicoregarbage.c

   1 // TODO: DO NOT support tag!!!
   2 #ifdef MULTICORE_GC
   3 #include "runtime.h"
   4 #include "multicoreruntime.h"
   5 #include "multicoregarbage.h"
   6 #include "multicoregcmark.h"
   7 #include "gcqueue.h"
   8 #include "multicoregccompact.h"
   9 #include "multicoregcflush.h"
  10 #include "multicoregcprofile.h"
  11 #include "gcqueue.h"
  12
  13 #ifdef SMEMM
  14 extern unsigned int gcmem_mixed_threshold;
  15 extern unsigned int gcmem_mixed_usedmem;
  16 #endif // SMEMM
  17
  18 volatile bool gcflag;
  19 gc_status_t gc_status_info;
  20
  21 unsigned long long gc_output_cache_policy_time=0;
  22
  23 #ifdef GC_DEBUG
  24 // dump whole mem in blocks
  25 void dumpSMem() {
  26   int block = 0;
  27   int sblock = 0;
  28   unsigned int j = 0;
  29   void * i = 0;
  30   int coren = 0;
  31   int x = 0;
  32   int y = 0;
  33   printf("(%x,%x) Dump shared mem: \n",udn_tile_coord_x(),udn_tile_coord_y());
  34   // reserved blocks for sblocktbl
  35   printf("(%x,%x) ++++ reserved sblocks ++++ \n", udn_tile_coord_x(),
  36          udn_tile_coord_y());
  37   for(i=BAMBOO_BASE_VA; (unsinged int)i<(unsigned int)gcbaseva; i+= 4*16) {
  38     printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
  39         udn_tile_coord_x(), udn_tile_coord_y(),
  40         *((int *)(i)), *((int *)(i + 4)),
  41         *((int *)(i + 4*2)), *((int *)(i + 4*3)),
  42         *((int *)(i + 4*4)), *((int *)(i + 4*5)),
  43         *((int *)(i + 4*6)), *((int *)(i + 4*7)),
  44         *((int *)(i + 4*8)), *((int *)(i + 4*9)),
  45         *((int *)(i + 4*10)), *((int *)(i + 4*11)),
  46         *((int *)(i + 4*12)), *((int *)(i + 4*13)),
  47         *((int *)(i + 4*14)), *((int *)(i + 4*15)));
  48   }
  49   sblock = 0;
  50   bool advanceblock = false;
  51   // remaining memory
  52   for(i=gcbaseva; (unsigned int)i<(unsigned int)(gcbaseva+BAMBOO_SHARED_MEM_SIZE); i+=4*16) {
  53     advanceblock = false;
  54     // computing sblock # and block #, core coordinate (x,y) also
  55     if(j%((BAMBOO_SMEM_SIZE)/(4*16)) == 0) {
  56       // finished a sblock
  57       if(j < ((BAMBOO_LARGE_SMEM_BOUND)/(4*16))) {
  58         if((j > 0) && (j%((BAMBOO_SMEM_SIZE_L)/(4*16)) == 0)) {
  59           // finished a block
  60           block++;
  61           advanceblock = true;
  62         }
  63       } else {
  64         // finished a block
  65         block++;
  66         advanceblock = true;
  67       }
  68       // compute core #
  69       if(advanceblock) {
  70         coren = gc_block2core[block%(NUMCORES4GC*2)];
  71       }
  72       // compute core coordinate
  73       x = BAMBOO_COORDS_X(coren);
  74       y = BAMBOO_COORDS_Y(coren);
  75       printf("(%x,%x) ==== %d, %d : core (%d,%d), saddr %x====\n",
  76           udn_tile_coord_x(), udn_tile_coord_y(),block, sblock++, x, y,
  77           (sblock-1)*(BAMBOO_SMEM_SIZE)+gcbaseva);
  78     }
  79     j++;
  80     printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
  81         udn_tile_coord_x(), udn_tile_coord_y(),
  82         *((int *)(i)), *((int *)(i + 4)),
  83         *((int *)(i + 4*2)), *((int *)(i + 4*3)),
  84         *((int *)(i + 4*4)), *((int *)(i + 4*5)),
  85         *((int *)(i + 4*6)), *((int *)(i + 4*7)),
  86         *((int *)(i + 4*8)), *((int *)(i + 4*9)),
  87         *((int *)(i + 4*10)), *((int *)(i + 4*11)),
  88         *((int *)(i + 4*12)), *((int *)(i + 4*13)),
  89         *((int *)(i + 4*14)), *((int *)(i + 4*15)));
  90   }
  91   printf("(%x,%x) \n", udn_tile_coord_x(), udn_tile_coord_y());
  92 }
  93 #endif
  94
  95 void initmulticoregcdata() {
  96   if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
  97     // startup core to initialize corestatus[]
  98     for(int i = 0; i < NUMCORESACTIVE; i++) {
  99       gccorestatus[i] = 1;
 100       gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
 101       gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
 102     }
 103     for(int i = 0; i < NUMCORES4GC; i++) {
 104       gcloads[i] = 0;
 105       gcrequiredmems[i] = 0;
 106       gcstopblock[i] = 0;
 107       gcfilledblocks[i] = 0;
 108     }
 109   }
 110
 111   bamboo_smem_zero_top = NULL;
 112   gcflag = false;
 113   gc_status_info.gcprocessing = false;
 114   gc_status_info.gcphase = FINISHPHASE;
 115
 116   gcprecheck = true;
 117   gccurr_heaptop = 0;
 118   gcself_numsendobjs = 0;
 119   gcself_numreceiveobjs = 0;
 120   gcmarkedptrbound = 0;
 121   gcforwardobjtbl = allocateMGCHash_I(128);
 122   gcnumlobjs = 0;
 123   gcheaptop = 0;
 124   gctopcore = 0;
 125   gctopblock = 0;
 126   gcmovestartaddr = 0;
 127   gctomove = false;
 128   gcmovepending = 0;
 129   gcblock2fill = 0;
 130 #ifdef SMEMM
 131   gcmem_mixed_threshold=(unsigned int)((BAMBOO_SHARED_MEM_SIZE-bamboo_reserved_smem*BAMBOO_SMEM_SIZE)*0.8);
 132   gcmem_mixed_usedmem = 0;
 133 #endif
 134 #ifdef MGC_SPEC
 135   gc_profile_flag = false;
 136 #endif
 137   gc_localheap_s = false;
 138 #ifdef GC_CACHE_ADAPT
 139   gccachestage = false;
 140 #endif
 141
 142   INIT_MULTICORE_GCPROFILE_DATA();
 143 }
 144
 145 void dismulticoregcdata() {
 146   freeMGCHash(gcforwardobjtbl);
 147 }
 148
 149 void initGC() {
 150   if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
 151     for(int i = 0; i < NUMCORES4GC; i++) {
 152       gccorestatus[i] = 1;
 153       gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
 154       gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
 155       gcloads[i] = 0;
 156       gcrequiredmems[i] = 0;
 157       gcfilledblocks[i] = 0;
 158       gcstopblock[i] = 0;
 159     }
 160     for(int i = NUMCORES4GC; i < NUMCORESACTIVE; i++) {
 161       gccorestatus[i] = 1;
 162       gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
 163       gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
 164     }
 165     gcheaptop = 0;
 166     gctopcore = 0;
 167     gctopblock = 0;
 168     gcnumsrobjs_index = 0;
 169   }
 170   gcself_numsendobjs = 0;
 171   gcself_numreceiveobjs = 0;
 172   gcmarkedptrbound = 0;
 173   gcnumlobjs = 0;
 174   gcmovestartaddr = 0;
 175   gctomove = false;
 176   gcblock2fill = 0;
 177   gcmovepending = 0;
 178   gccurr_heaptop = 0;
 179
 180   gc_queueinit();
 181
 182   MGCHashreset(gcforwardobjtbl);
 183
 184   GCPROFILE_INIT();
 185   gc_output_cache_policy_time=0;
 186 }
 187
 188 bool gc_checkAllCoreStatus() {
 189   BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
 190   for(int i = 0; i < NUMCORESACTIVE; i++) {
 191     if(gccorestatus[i] != 0) {
 192       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 193       return false;
 194     }
 195   }
 196   BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 197   return true;
 198 }
 199
 200 // NOTE: should be invoked with interrupts turned off
 201 bool gc_checkAllCoreStatus_I() {
 202   for(int i = 0; i < NUMCORESACTIVE; i++) {
 203     if(gccorestatus[i] != 0) {
 204       return false;
 205     }
 206   }
 207   return true;
 208 }
 209
 210 void checkMarkStatus_p2() {
 211   // check if the sum of send objs and receive obj are the same
 212   // yes->check if the info is the latest; no->go on executing
 213   unsigned int sumsendobj = 0;
 214   for(int i = 0; i < NUMCORESACTIVE; i++) {
 215     sumsendobj += gcnumsendobjs[gcnumsrobjs_index][i];
 216   }
 217   for(int i = 0; i < NUMCORESACTIVE; i++) {
 218     sumsendobj -= gcnumreceiveobjs[gcnumsrobjs_index][i];
 219   }
 220   if(0 == sumsendobj) {
 221     // Check if there are changes of the numsendobjs or numreceiveobjs
 222     // on each core
 223     int i = 0;
 224     for(i = 0; i < NUMCORESACTIVE; i++) {
 225       if((gcnumsendobjs[0][i]!=gcnumsendobjs[1][i])||(gcnumreceiveobjs[0][i]!=gcnumreceiveobjs[1][i]) ) {
 226         break;
 227       }
 228     }
 229     if(i == NUMCORESACTIVE) {
 230       // all the core status info are the latest,stop mark phase
 231       gc_status_info.gcphase = COMPACTPHASE;
 232       // restore the gcstatus for all cores
 233       for(int i = 0; i < NUMCORESACTIVE; i++) {
 234         gccorestatus[i] = 1;
 235       }
 236     } else {
 237       // There were changes between phase 1 and phase 2, can not decide
 238       // whether the mark phase has been finished
 239       waitconfirm = false;
 240       // As it fails in phase 2, flip the entries
 241       gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
 242     }
 243   } else {
 244     // There were changes between phase 1 and phase 2, can not decide
 245     // whether the mark phase has been finished
 246     waitconfirm = false;
 247     // As it fails in phase 2, flip the entries
 248     gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
 249   }
 250 }
 251
 252 void checkMarkStatus() {
 253   if((!waitconfirm)||(waitconfirm && (numconfirm == 0))) {
 254     unsigned int entry_index = 0;
 255     if(waitconfirm) {
 256       // phase 2
 257       entry_index = (gcnumsrobjs_index == 0) ? 1 : 0;
 258     } else {
 259       // phase 1
 260       entry_index = gcnumsrobjs_index;
 261     }
 262     BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
 263     gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
 264     gcnumsendobjs[entry_index][BAMBOO_NUM_OF_CORE] = gcself_numsendobjs;
 265     gcnumreceiveobjs[entry_index][BAMBOO_NUM_OF_CORE] = gcself_numreceiveobjs;
 266     // check the status of all cores
 267     if (gc_checkAllCoreStatus_I()) {
 268       // ask for confirm
 269       if(!waitconfirm) {
 270         // the first time found all cores stall
 271         // send out status confirm msg to all other cores
 272         // reset the corestatus array too
 273         waitconfirm = true;
 274         numconfirm = NUMCORESACTIVE - 1;
 275         BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 276         GC_SEND_MSG_1_TO_CLIENT(GCMARKCONFIRM);
 277       } else {
 278         // Phase 2
 279         checkMarkStatus_p2();
 280         BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 281       }
 282     } else {
 283       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 284     }
 285   }
 286 }
 287
 288 // compute load balance for all cores
 289 int loadbalance(void ** heaptop, unsigned int * topblock, unsigned int * topcore) {
 290   // compute load balance
 291   // get the total loads
 292   unsigned int tloads = 0;
 293   for(int i = 0; i < NUMCORES4GC; i++) {
 294     tloads += gcloads[i];
 295   }
 296   *heaptop = gcbaseva + tloads;
 297
 298   unsigned int topblockindex;
 299
 300   BLOCKINDEX(topblockindex, *heaptop);
 301   // num of blocks per core
 302   unsigned int numbpc = (topblockindex+NUMCORES4GC-1)/NUMCORES4GC;
 303
 304   *topblock = topblockindex;
 305   RESIDECORE(*heaptop, *topcore);
 306   return numbpc;
 307 }
 308
 309 // compute total mem size required and sort the lobjs in ascending order
 310 unsigned int sortLObjs() {
 311   unsigned int tmp_lobj = 0;
 312   unsigned int tmp_len = 0;
 313   unsigned int tmp_host = 0;
 314   unsigned int sumsize = 0;
 315
 316   gclobjtail2 = gclobjtail;
 317   gclobjtailindex2 = gclobjtailindex;
 318   // TODO USE QUICK SORT INSTEAD?
 319   while(gc_lobjmoreItems2_I()) {
 320     gc_lobjdequeue2_I();
 321     tmp_lobj = gclobjtail2->lobjs[gclobjtailindex2-1];
 322     tmp_host = gclobjtail2->hosts[gclobjtailindex2-1];
 323     tmp_len = gclobjtail2->lengths[gclobjtailindex2 - 1];
 324     sumsize += tmp_len;
 325     GCPROFILE_RECORD_LOBJ();
 326     unsigned int i = gclobjtailindex2-1;
 327     struct lobjpointerblock * tmp_block = gclobjtail2;
 328     // find the place to insert
 329     while(true) {
 330       if(i == 0) {
 331         if(tmp_block->prev == NULL) {
 332           break;
 333         }
 334         if(tmp_block->prev->lobjs[NUMLOBJPTRS-1] > tmp_lobj) {
 335           tmp_block->lobjs[i] = tmp_block->prev->lobjs[NUMLOBJPTRS-1];
 336           tmp_block->lengths[i] = tmp_block->prev->lengths[NUMLOBJPTRS-1];
 337           tmp_block->hosts[i] = tmp_block->prev->hosts[NUMLOBJPTRS-1];
 338           tmp_block = tmp_block->prev;
 339           i = NUMLOBJPTRS-1;
 340         } else {
 341           break;
 342         }  // if(tmp_block->prev->lobjs[NUMLOBJPTRS-1] < tmp_lobj)
 343       } else {
 344         if(tmp_block->lobjs[i-1] > tmp_lobj) {
 345           tmp_block->lobjs[i] = tmp_block->lobjs[i-1];
 346           tmp_block->lengths[i] = tmp_block->lengths[i-1];
 347           tmp_block->hosts[i] = tmp_block->hosts[i-1];
 348           i--;
 349         } else {
 350           break;
 351         }
 352       }
 353     }
 354     // insert it
 355     if(i != gclobjtailindex2 - 1) {
 356       tmp_block->lobjs[i] = tmp_lobj;
 357       tmp_block->lengths[i] = tmp_len;
 358       tmp_block->hosts[i] = tmp_host;
 359     }
 360   }
 361   return sumsize;
 362 }
 363
 364 bool cacheLObjs() {
 365   // check the total mem size need for large objs
 366   unsigned long long sumsize = 0;
 367   unsigned int size = 0;
 368
 369   sumsize = sortLObjs();
 370
 371   GCPROFILE_RECORD_LOBJSPACE();
 372
 373   // check if there are enough space to cache these large objs
 374   unsigned int dst = gcbaseva + (BAMBOO_SHARED_MEM_SIZE) -sumsize;
 375   if((unsigned long long)gcheaptop > (unsigned long long)dst) {
 376     // do not have enough room to cache large objs
 377     return false;
 378   }
 379
 380   gcheaptop = dst; // Note: record the start of cached lobjs with gcheaptop
 381   // cache the largeObjs to the top of the shared heap
 382   dst = gcbaseva + (BAMBOO_SHARED_MEM_SIZE);
 383   while(gc_lobjmoreItems3_I()) {
 384     gc_lobjdequeue3_I();
 385     size = gclobjtail2->lengths[gclobjtailindex2];
 386     // set the mark field to , indicating that this obj has been moved
 387     // and need to be flushed
 388     dst -= size;
 389     if((unsigned int)dst<(unsigned int)(gclobjtail2->lobjs[gclobjtailindex2]+size)) {
 390       memmove(dst, gclobjtail2->lobjs[gclobjtailindex2], size);
 391     } else {
 392       memcpy(dst, gclobjtail2->lobjs[gclobjtailindex2], size);
 393     }
 394   }
 395   return true;
 396 }
 397
 398 // update the bmmboo_smemtbl to record current shared mem usage
 399 void updateSmemTbl(unsigned int coren, void * localtop) {
 400   unsigned int ltopcore = 0;
 401   unsigned int bound = BAMBOO_SMEM_SIZE_L;
 402   BLOCKINDEX(ltopcore, localtop);
 403   if((unsigned int)localtop>=(unsigned int)(gcbaseva+BAMBOO_LARGE_SMEM_BOUND)){
 404     bound = BAMBOO_SMEM_SIZE;
 405   }
 406   unsigned int load = (unsigned INTPTR)(localtop-gcbaseva)%(unsigned int)bound;
 407   unsigned int toset = 0;
 408   for(int j=0; 1; j++) {
 409     for(int i=0; i<2; i++) {
 410       toset = gc_core2block[2*coren+i]+(unsigned int)(NUMCORES4GC*2)*j;
 411       if(toset < ltopcore) {
 412         bamboo_smemtbl[toset]=BLOCKSIZE(toset<NUMCORES4GC);
 413 #ifdef SMEMM
 414         gcmem_mixed_usedmem += bamboo_smemtbl[toset];
 415 #endif
 416       } else if(toset == ltopcore) {
 417         bamboo_smemtbl[toset] = load;
 418 #ifdef SMEMM
 419         gcmem_mixed_usedmem += bamboo_smemtbl[toset];
 420 #endif
 421         return;
 422       } else {
 423         return;
 424       }
 425     }
 426   }
 427 }
 428
 429 void gc_collect(struct garbagelist * stackptr) {
 430   gc_status_info.gcprocessing = true;
 431   // inform the master that this core is at a gc safe point and is ready to
 432   // do gc
 433   send_msg_4(STARTUPCORE,GCFINISHPRE,BAMBOO_NUM_OF_CORE,self_numsendobjs,self_numreceiveobjs);
 434
 435   // core collector routine
 436   //wait for init phase
 437   WAITFORGCPHASE(INITPHASE);
 438
 439   GC_PRINTF("Do initGC\n");
 440   initGC();
 441   CACHEADAPT_GC(true);
 442   //send init finish msg to core coordinator
 443   send_msg_2(STARTUPCORE,GCFINISHINIT,BAMBOO_NUM_OF_CORE);
 444
 445   //wait for mark phase
 446   WAITFORGCPHASE(MARKPHASE);
 447
 448   GC_PRINTF("Start mark phase\n");
 449   mark(true, stackptr);
 450   GC_PRINTF("Finish mark phase, start compact phase\n");
 451   compact();
 452   GC_PRINTF("Finish compact phase\n");
 453
 454   WAITFORGCPHASE(FLUSHPHASE);
 455
 456   GC_PRINTF("Start flush phase\n");
 457   GCPROFILE_INFO_2_MASTER();
 458   flush(stackptr);
 459   GC_PRINTF("Finish flush phase\n");
 460
 461   CACHEADAPT_PHASE_CLIENT();
 462
 463   // invalidate all shared mem pointers
 464   bamboo_cur_msp = NULL;
 465   bamboo_smem_size = 0;
 466   bamboo_smem_zero_top = NULL;
 467   gcflag = false;
 468
 469   WAITFORGCPHASE(FINISHPHASE);
 470
 471   GC_PRINTF("Finish gc! \n");
 472 }
 473
 474 void gc_nocollect(struct garbagelist * stackptr) {
 475   gc_status_info.gcprocessing = true;
 476   // inform the master that this core is at a gc safe point and is ready to
 477   // do gc
 478   send_msg_4(STARTUPCORE,GCFINISHPRE,BAMBOO_NUM_OF_CORE,self_numsendobjs,self_numreceiveobjs);
 479
 480   WAITFORGCPHASE(INITPHASE);
 481
 482   GC_PRINTF("Do initGC\n");
 483   initGC();
 484   CACHEADAPT_GC(true);
 485   //send init finish msg to core coordinator
 486   send_msg_2(STARTUPCORE,GCFINISHINIT,BAMBOO_NUM_OF_CORE);
 487
 488   WAITFORGCPHASE(MARKPHASE);
 489
 490   GC_PRINTF("Start mark phase\n");
 491   mark(true, stackptr);
 492   GC_PRINTF("Finish mark phase, wait for flush\n");
 493
 494   // non-gc core collector routine
 495   WAITFORGCPHASE(FLUSHPHASE);
 496
 497   GC_PRINTF("Start flush phase\n");
 498   GCPROFILE_INFO_2_MASTER();
 499   flush(stackptr);
 500   GC_PRINTF("Finish flush phase\n");
 501
 502   CACHEADAPT_PHASE_CLIENT();
 503
 504   // invalidate all shared mem pointers
 505   bamboo_cur_msp = NULL;
 506   bamboo_smem_size = 0;
 507   bamboo_smem_zero_top = NULL;
 508
 509   gcflag = false;
 510   WAITFORGCPHASE(FINISHPHASE);
 511
 512   GC_PRINTF("Finish gc! \n");
 513 }
 514
 515 void master_mark(struct garbagelist *stackptr) {
 516   bool isfirst = true;
 517
 518   GC_PRINTF("Start mark phase \n");
 519   GC_SEND_MSG_1_TO_CLIENT(GCSTART);
 520   gc_status_info.gcphase = MARKPHASE;
 521   // mark phase
 522
 523   while(MARKPHASE == gc_status_info.gcphase) {
 524     mark(isfirst, stackptr);
 525     isfirst=false;
 526     // check gcstatus
 527     checkMarkStatus();
 528   }
 529 }
 530
 531 void master_getlargeobjs() {
 532   // send msgs to all cores requiring large objs info
 533   // Note: only need to ask gc cores, non-gc cores do not host any objs
 534   numconfirm = NUMCORES4GC - 1;
 535   for(int i = 1; i < NUMCORES4GC; i++) {
 536     send_msg_1(i,GCLOBJREQUEST);
 537   }
 538   gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
 539   //spin until we have all responses
 540   while(numconfirm!=0) ;
 541
 542   // check the heaptop
 543   if(gcheaptop < gcmarkedptrbound) {
 544     gcheaptop = gcmarkedptrbound;
 545   }
 546   GCPROFILE_ITEM();
 547   GC_PRINTF("prepare to cache large objs \n");
 548
 549   // cache all large objs
 550   BAMBOO_ASSERTMSG(cacheLObjs(), "Not enough space to cache large objects\n");
 551 }
 552
 553
 554 void master_updaterefs(struct garbagelist * stackptr) {
 555   gc_status_info.gcphase = FLUSHPHASE;
 556   GC_SEND_MSG_1_TO_CLIENT(GCSTARTFLUSH);
 557   GCPROFILE_ITEM();
 558   GC_PRINTF("Start flush phase \n");
 559   // flush phase
 560   flush(stackptr);
 561   GC_CHECK_ALL_CORE_STATUS(FLUSHPHASE==gc_status_info.gcphase);
 562   GC_PRINTF("Finish flush phase \n");
 563 }
 564
 565 void master_finish() {
 566   gc_status_info.gcphase = FINISHPHASE;
 567
 568   // invalidate all shared mem pointers
 569   // put it here as it takes time to inform all the other cores to
 570   // finish gc and it might cause problem when some core resumes
 571   // mutator earlier than the other cores
 572   bamboo_cur_msp = NULL;
 573   bamboo_smem_size = 0;
 574   bamboo_smem_zero_top = NULL;
 575
 576   GCPROFILE_END();
 577   unsigned long long tmpt = BAMBOO_GET_EXE_TIME();
 578   CACHEADAPT_OUTPUT_CACHE_POLICY();
 579   gc_output_cache_policy_time += (BAMBOO_GET_EXE_TIME()-tmpt);
 580   gcflag = false;
 581   GC_SEND_MSG_1_TO_CLIENT(GCFINISH);
 582
 583   gc_status_info.gcprocessing = false;
 584   if(gcflag) {
 585     // inform other cores to stop and wait for gc
 586     gcprecheck = true;
 587     for(int i = 0; i < NUMCORESACTIVE; i++) {
 588       // reuse the gcnumsendobjs & gcnumreceiveobjs
 589       gcnumsendobjs[0][i] = 0;
 590       gcnumreceiveobjs[0][i] = 0;
 591     }
 592     GC_SEND_MSG_1_TO_CLIENT(GCSTARTPRE);
 593   }
 594 }
 595
 596 void gc_master(struct garbagelist * stackptr) {
 597   tprintf("start GC !!!!!!!!!!!!! \n");
 598   gc_status_info.gcprocessing = true;
 599   gc_status_info.gcphase = INITPHASE;
 600
 601   waitconfirm = false;
 602   numconfirm = 0;
 603   initGC();
 604   GC_SEND_MSG_1_TO_CLIENT(GCSTARTINIT);
 605   CACHEADAPT_GC(true);
 606   GC_PRINTF("Check core status \n");
 607   GC_CHECK_ALL_CORE_STATUS(true);
 608   GCPROFILE_ITEM();
 609   unsigned long long tmpt = BAMBOO_GET_EXE_TIME();
 610   CACHEADAPT_OUTPUT_CACHE_SAMPLING();
 611   gc_output_cache_policy_time += (BAMBOO_GET_EXE_TIME()-tmpt);
 612
 613   // do mark phase
 614   master_mark(stackptr);
 615
 616   // get large objects from all cores
 617   master_getlargeobjs();
 618
 619   // compact the heap
 620   master_compact();
 621
 622   // update the references
 623   master_updaterefs(stackptr);
 624
 625   // do cache adaptation
 626   CACHEADAPT_PHASE_MASTER();
 627
 628   // do finish up stuff
 629   master_finish();
 630
 631   GC_PRINTF("gc finished   \n");
 632   tprintf("finish GC ! %d \n",gcflag);
 633 }
 634
 635 void pregccheck() {
 636   while(true) {
 637     BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
 638     gcnumsendobjs[0][BAMBOO_NUM_OF_CORE] = self_numsendobjs;
 639     gcnumreceiveobjs[0][BAMBOO_NUM_OF_CORE] = self_numreceiveobjs;
 640     int sumsendobj = 0;
 641     for(int i = 0; i < NUMCORESACTIVE; i++) {
 642       sumsendobj += gcnumsendobjs[0][i];
 643     }
 644     for(int i = 0; i < NUMCORESACTIVE; i++) {
 645       sumsendobj -= gcnumreceiveobjs[0][i];
 646     }
 647     if(0 != sumsendobj) {
 648       // there were still some msgs on the fly, wait until there
 649       // are some update pregc information coming and check it again
 650       gcprecheck = false;
 651       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 652
 653       while(!gcprecheck) ;
 654     } else {
 655       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 656       return;
 657     }
 658   }
 659 }
 660
 661 void pregcprocessing() {
 662 #if defined(GC_CACHE_ADAPT)&&defined(GC_CACHE_SAMPLING)
 663   // disable the timer interrupt
 664   bamboo_mask_timer_intr();
 665 #endif
 666   // Zero out the remaining memory here because for the GC_CACHE_ADAPT version,
 667   // we need to make sure during the gcinit phase the shared heap is not
 668   // touched. Otherwise, there would be problem when adapt the cache strategy.
 669   BAMBOO_CLOSE_CUR_MSP();
 670 #if defined(GC_CACHE_ADAPT)&&defined(GC_CACHE_SAMPLING)
 671   // get the sampling data
 672   bamboo_output_dtlb_sampling();
 673 #endif
 674 }
 675
 676 void postgcprocessing() {
 677 #if defined(GC_CACHE_ADAPT)&&defined(GC_CACHE_SAMPLING)
 678   // enable the timer interrupt
 679   bamboo_tile_timer_set_next_event(GC_TILE_TIMER_EVENT_SETTING);
 680   bamboo_unmask_timer_intr();
 681 #endif
 682 }
 683
 684 bool gc(struct garbagelist * stackptr) {
 685   // check if do gc
 686   if(!gcflag) {
 687     gc_status_info.gcprocessing = false;
 688     return false;
 689   }
 690
 691   // core coordinator routine
 692   if(0 == BAMBOO_NUM_OF_CORE) {
 693     GC_PRINTF("Check if we can do gc or not\n");
 694     gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
 695     if(!gc_checkAllCoreStatus()) {
 696       // some of the cores are still executing the mutator and did not reach
 697       // some gc safe point, therefore it is not ready to do gc
 698       gcflag = true;
 699       return false;
 700     } else {
 701       GCPROFILE_START();
 702       pregccheck();
 703     }
 704     GC_PRINTF("start gc! \n");
 705     pregcprocessing();
 706     gc_master(stackptr);
 707   } else if(BAMBOO_NUM_OF_CORE < NUMCORES4GC) {
 708     pregcprocessing();
 709     gc_collect(stackptr);
 710   } else {
 711     pregcprocessing();
 712     gc_nocollect(stackptr);
 713   }
 714   postgcprocessing();
 715
 716   return true;
 717 }
 718
 719 #endif