Robust/src/Runtime/bamboo/multicoregarbage.c

   1 #ifdef MULTICORE_GC
   2 #include "runtime.h"
   3 #include "multicoreruntime.h"
   4 #include "multicoregarbage.h"
   5 #include "multicoregcmark.h"
   6 #include "multicoregccompact.h"
   7 #include "multicoregcflush.h"
   8 #include "multicoregcprofile.h"
   9 #include "gcqueue.h"
  10 #include "multicoremem_helper.h"
  11
  12 volatile bool gcflag;
  13 gc_status_t gc_status_info;
  14
  15 unsigned long long gc_output_cache_policy_time=0;
  16
  17 #ifdef GC_DEBUG
  18 // dump whole mem in blocks
  19 void dumpSMem() {
  20   int block = 0;
  21   int sblock = 0;
  22   unsigned int j = 0;
  23   unsigned int i = 0;
  24   int coren = 0;
  25   int x = 0;
  26   int y = 0;
  27   printf("(%x,%x) Dump shared mem: \n",udn_tile_coord_x(),udn_tile_coord_y());
  28   // reserved blocks for sblocktbl
  29   printf("(%x,%x) ++++ reserved sblocks ++++ \n", udn_tile_coord_x(),
  30          udn_tile_coord_y());
  31   for(i=BAMBOO_BASE_VA; i<gcbaseva; i+= 4*16) {
  32     printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
  33         udn_tile_coord_x(), udn_tile_coord_y(),
  34         *((int *)(i)), *((int *)(i + 4)),
  35         *((int *)(i + 4*2)), *((int *)(i + 4*3)),
  36         *((int *)(i + 4*4)), *((int *)(i + 4*5)),
  37         *((int *)(i + 4*6)), *((int *)(i + 4*7)),
  38         *((int *)(i + 4*8)), *((int *)(i + 4*9)),
  39         *((int *)(i + 4*10)), *((int *)(i + 4*11)),
  40         *((int *)(i + 4*12)), *((int *)(i + 4*13)),
  41         *((int *)(i + 4*14)), *((int *)(i + 4*15)));
  42   }
  43   sblock = 0;
  44   bool advanceblock = false;
  45   // remaining memory
  46   for(i=gcbaseva; (unsigned int)i<(unsigned int)(gcbaseva+BAMBOO_SHARED_MEM_SIZE); i+=4*16) {
  47     advanceblock = false;
  48     // computing sblock # and block #, core coordinate (x,y) also
  49     if(j%((BAMBOO_SMEM_SIZE)/(4*16)) == 0) {
  50       // finished a sblock
  51       if(j < ((BAMBOO_LARGE_SMEM_BOUND)/(4*16))) {
  52         if((j > 0) && (j%((BAMBOO_SMEM_SIZE_L)/(4*16)) == 0)) {
  53           // finished a block
  54           block++;
  55           advanceblock = true;
  56         }
  57       } else {
  58         // finished a block
  59         block++;
  60         advanceblock = true;
  61       }
  62       // compute core #
  63       if(advanceblock) {
  64         coren = gc_block2core[block%(NUMCORES4GC*2)];
  65       }
  66       // compute core coordinate
  67       x = BAMBOO_COORDS_X(coren);
  68       y = BAMBOO_COORDS_Y(coren);
  69       printf("(%x,%x) ==== %d, %d : core (%d,%d), saddr %x====\n",
  70           udn_tile_coord_x(), udn_tile_coord_y(),block, sblock++, x, y,
  71           (sblock-1)*(BAMBOO_SMEM_SIZE)+gcbaseva);
  72     }
  73     j++;
  74     printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
  75         udn_tile_coord_x(), udn_tile_coord_y(),
  76         *((int *)(i)), *((int *)(i + 4)),
  77         *((int *)(i + 4*2)), *((int *)(i + 4*3)),
  78         *((int *)(i + 4*4)), *((int *)(i + 4*5)),
  79         *((int *)(i + 4*6)), *((int *)(i + 4*7)),
  80         *((int *)(i + 4*8)), *((int *)(i + 4*9)),
  81         *((int *)(i + 4*10)), *((int *)(i + 4*11)),
  82         *((int *)(i + 4*12)), *((int *)(i + 4*13)),
  83         *((int *)(i + 4*14)), *((int *)(i + 4*15)));
  84   }
  85   printf("(%x,%x) \n", udn_tile_coord_x(), udn_tile_coord_y());
  86 }
  87 #endif
  88
  89 void initmulticoregcdata() {
  90   bamboo_smem_zero_top = NULL;
  91   gcflag = false;
  92   gc_status_info.gcprocessing = false;
  93   gc_status_info.gcphase = FINISHPHASE;
  94
  95   gcprecheck = true;
  96   gcforwardobjtbl = allocateMGCHash_I(128);
  97 #ifdef MGC_SPEC
  98   gc_profile_flag = false;
  99 #endif
 100 #ifdef GC_CACHE_ADAPT
 101   gccachestage = false;
 102 #endif
 103
 104   if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
 105     allocationinfo.blocktable=RUNMALLOC(sizeof(struct blockrecord)*GCNUMBLOCK);
 106     for(int i=0; i<GCNUMBLOCK;i++) {
 107       if (1==NUMCORES4GC)
 108         allocationinfo.blocktable[i].corenum=0;
 109       else
 110         allocationinfo.blocktable[i].corenum=gc_block2core[(i%(NUMCORES4GC*2))];
 111       allocationinfo.blocktable[i].status=BS_FREE;
 112       allocationinfo.blocktable[i].usedspace=0;
 113       allocationinfo.blocktable[i].freespace=GLOBALBLOCKSIZE(i);
 114     }
 115     buildCore2Test();
 116   }
 117
 118   //initialize update structures
 119   origarraycount=0;
 120   for(int i=0;i<NUMCORES4GC;i++) {
 121     origblockarray[i]=NULL;
 122   }
 123
 124   INIT_MULTICORE_GCPROFILE_DATA();
 125 }
 126
 127 void dismulticoregcdata() {
 128   freeMGCHash(gcforwardobjtbl);
 129 }
 130
 131 void initGC() {
 132   if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
 133     for(int i = 0; i < NUMCORES4GC; i++) {
 134       gccorestatus[i] = 1;
 135       gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
 136       gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
 137       gcloads[i] = 0;
 138       gcrequiredmems[i] = 0;
 139     }
 140     for(int i = NUMCORES4GC; i < NUMCORESACTIVE; i++) {
 141       gccorestatus[i] = 1;
 142       gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
 143       gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
 144     }
 145     gcnumsrobjs_index = 0;
 146   }
 147   gcself_numsendobjs = 0;
 148   gcself_numreceiveobjs = 0;
 149   gcmovestartaddr = 0;
 150   gctomove = false;
 151   gcblock2fill = 0;
 152   gcmovepending = 0;
 153   gccurr_heaptop = 0;
 154   update_origblockptr=NULL;
 155   gc_queueinit();
 156
 157   MGCHashreset(gcforwardobjtbl);
 158
 159   GCPROFILE_INIT();
 160   gc_output_cache_policy_time=0;
 161 }
 162
 163 void checkMarkStatus_p2() {
 164   // check if the sum of send objs and receive obj are the same
 165   // yes->check if the info is the latest; no->go on executing
 166   unsigned int sumsendobj = 0;
 167   for(int i = 0; i < NUMCORESACTIVE; i++) {
 168     sumsendobj += gcnumsendobjs[gcnumsrobjs_index][i];
 169   }
 170   for(int i = 0; i < NUMCORESACTIVE; i++) {
 171     sumsendobj -= gcnumreceiveobjs[gcnumsrobjs_index][i];
 172   }
 173   if(0 == sumsendobj) {
 174     // Check if there are changes of the numsendobjs or numreceiveobjs
 175     // on each core
 176     int i = 0;
 177     for(i = 0; i < NUMCORESACTIVE; i++) {
 178       if((gcnumsendobjs[0][i]!=gcnumsendobjs[1][i])||(gcnumreceiveobjs[0][i]!=gcnumreceiveobjs[1][i]) ) {
 179         break;
 180       }
 181     }
 182     if(i == NUMCORESACTIVE) {
 183       // all the core status info are the latest,stop mark phase
 184       gc_status_info.gcphase = COMPACTPHASE;
 185       // restore the gcstatus for all cores
 186       for(int i = 0; i < NUMCORESACTIVE; i++) {
 187         gccorestatus[i] = 1;
 188       }
 189     } else {
 190       // There were changes between phase 1 and phase 2, can not decide
 191       // whether the mark phase has been finished
 192       waitconfirm = false;
 193       // As it fails in phase 2, flip the entries
 194       gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
 195     }
 196   } else {
 197     // There were changes between phase 1 and phase 2, can not decide
 198     // whether the mark phase has been finished
 199     waitconfirm = false;
 200     // As it fails in phase 2, flip the entries
 201     gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
 202   }
 203 }
 204
 205 void checkMarkStatus() {
 206   if((!waitconfirm)||(waitconfirm && (numconfirm == 0))) {
 207     unsigned int entry_index = 0;
 208     if(waitconfirm) {
 209       // phase 2
 210       entry_index = (gcnumsrobjs_index == 0) ? 1 : 0;
 211     } else {
 212       // phase 1
 213       entry_index = gcnumsrobjs_index;
 214     }
 215     BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
 216     gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
 217     gcnumsendobjs[entry_index][BAMBOO_NUM_OF_CORE] = gcself_numsendobjs;
 218     gcnumreceiveobjs[entry_index][BAMBOO_NUM_OF_CORE] = gcself_numreceiveobjs;
 219     // check the status of all cores
 220     if (gc_checkCoreStatus()) {
 221       // ask for confirm
 222       if(!waitconfirm) {
 223         // the first time found all cores stall
 224         // send out status confirm msg to all other cores
 225         // reset the corestatus array too
 226         waitconfirm = true;
 227         numconfirm = NUMCORESACTIVE - 1;
 228         BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 229         GC_SEND_MSG_1_TO_CLIENT(GCMARKCONFIRM);
 230       } else {
 231         // Phase 2
 232         checkMarkStatus_p2();
 233         BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 234       }
 235     } else {
 236       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 237     }
 238   }
 239 }
 240
 241 // compute load balance for all cores
 242 int loadbalance() {
 243   // compute load balance
 244   // get the total loads
 245   void * heaptop;
 246   unsigned int tloads = 0;
 247   for(int i = 0; i < NUMCORES4GC; i++) {
 248     tloads += gcloads[i];
 249   }
 250   heaptop = gcbaseva + tloads;
 251
 252   unsigned int topblockindex;
 253
 254   BLOCKINDEX(topblockindex, heaptop);
 255   // num of blocks per core
 256   unsigned int numbpc = (topblockindex+NUMCORES4GC-1)/NUMCORES4GC;
 257
 258   return numbpc;
 259 }
 260
 261 void gc_collect(struct garbagelist * stackptr) {
 262   gc_status_info.gcprocessing = true;
 263   // inform the master that this core is at a gc safe point and is ready to
 264   // do gc
 265   send_msg_4(STARTUPCORE,GCFINISHPRE,BAMBOO_NUM_OF_CORE,self_numsendobjs,self_numreceiveobjs);
 266
 267   // core collector routine
 268   //wait for init phase
 269   WAITFORGCPHASE(INITPHASE);
 270
 271   GC_PRINTF("Do initGC\n");
 272   initGC();
 273   CACHEADAPT_GC(true);
 274   //send init finish msg to core coordinator
 275   send_msg_2(STARTUPCORE,GCFINISHINIT,BAMBOO_NUM_OF_CORE);
 276
 277   //wait for mark phase
 278   WAITFORGCPHASE(MARKPHASE);
 279
 280   GC_PRINTF("Start mark phase\n");
 281   mark(stackptr);
 282   GC_PRINTF("Finish mark phase, start compact phase\n");
 283   compact();
 284   GC_PRINTF("Finish compact phase\n");
 285
 286   WAITFORGCPHASE(UPDATEPHASE);
 287
 288   GC_PRINTF("Start flush phase\n");
 289   GCPROFILE_INFO_2_MASTER();
 290   update(stackptr);
 291   GC_PRINTF("Finish flush phase\n");
 292
 293   CACHEADAPT_PHASE_CLIENT();
 294
 295   // invalidate all shared mem pointers
 296   bamboo_cur_msp = NULL;
 297   bamboo_smem_size = 0;
 298   bamboo_smem_zero_top = NULL;
 299   gcflag = false;
 300
 301   WAITFORGCPHASE(FINISHPHASE);
 302
 303   GC_PRINTF("Finish gc! \n");
 304 }
 305
 306 void gc_nocollect(struct garbagelist * stackptr) {
 307   gc_status_info.gcprocessing = true;
 308   // inform the master that this core is at a gc safe point and is ready to
 309   // do gc
 310   send_msg_4(STARTUPCORE,GCFINISHPRE,BAMBOO_NUM_OF_CORE,self_numsendobjs,self_numreceiveobjs);
 311
 312   WAITFORGCPHASE(INITPHASE);
 313
 314   GC_PRINTF("Do initGC\n");
 315   initGC();
 316   CACHEADAPT_GC(true);
 317   //send init finish msg to core coordinator
 318   send_msg_2(STARTUPCORE,GCFINISHINIT,BAMBOO_NUM_OF_CORE);
 319
 320   WAITFORGCPHASE(MARKPHASE);
 321
 322   GC_PRINTF("Start mark phase\n");
 323   mark(stackptr);
 324   GC_PRINTF("Finish mark phase, wait for flush\n");
 325
 326   // non-gc core collector routine
 327   WAITFORGCPHASE(UPDATEPHASE);
 328
 329   GC_PRINTF("Start flush phase\n");
 330   GCPROFILE_INFO_2_MASTER();
 331   update(stackptr);
 332   GC_PRINTF("Finish flush phase\n");
 333
 334   CACHEADAPT_PHASE_CLIENT();
 335
 336   // invalidate all shared mem pointers
 337   bamboo_cur_msp = NULL;
 338   bamboo_smem_size = 0;
 339   bamboo_smem_zero_top = NULL;
 340
 341   gcflag = false;
 342   WAITFORGCPHASE(FINISHPHASE);
 343
 344   GC_PRINTF("Finish gc! \n");
 345 }
 346
 347 void master_mark(struct garbagelist *stackptr) {
 348
 349   GC_PRINTF("Start mark phase \n");
 350   GC_SEND_MSG_1_TO_CLIENT(GCSTART);
 351   gc_status_info.gcphase = MARKPHASE;
 352   // mark phase
 353
 354   mark(stackptr);
 355 }
 356
 357 void master_getlargeobjs() {
 358   // send msgs to all cores requiring large objs info
 359   // Note: only need to ask gc cores, non-gc cores do not host any objs
 360   numconfirm = NUMCORES4GC - 1;
 361   for(int i = 1; i < NUMCORES4GC; i++) {
 362     send_msg_1(i,GCLOBJREQUEST);
 363   }
 364   gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
 365   //spin until we have all responses
 366   while(numconfirm!=0) ;
 367
 368   GCPROFILE_ITEM();
 369   GC_PRINTF("prepare to cache large objs \n");
 370
 371 }
 372
 373
 374 void master_updaterefs(struct garbagelist * stackptr) {
 375   gc_status_info.gcphase = UPDATEPHASE;
 376   GC_SEND_MSG_1_TO_CLIENT(GCSTARTUPDATE);
 377   GCPROFILE_ITEM();
 378   GC_PRINTF("Start flush phase \n");
 379   // flush phase
 380   update(stackptr);
 381   GC_CHECK_ALL_CORE_STATUS(UPDATEPHASE==gc_status_info.gcphase);
 382   GC_PRINTF("Finish flush phase \n");
 383 }
 384
 385 void master_finish() {
 386   gc_status_info.gcphase = FINISHPHASE;
 387
 388   // invalidate all shared mem pointers
 389   // put it here as it takes time to inform all the other cores to
 390   // finish gc and it might cause problem when some core resumes
 391   // mutator earlier than the other cores
 392   bamboo_cur_msp = NULL;
 393   bamboo_smem_size = 0;
 394   bamboo_smem_zero_top = NULL;
 395
 396   GCPROFILE_END();
 397   unsigned long long tmpt = BAMBOO_GET_EXE_TIME();
 398   CACHEADAPT_OUTPUT_CACHE_POLICY();
 399   gc_output_cache_policy_time += (BAMBOO_GET_EXE_TIME()-tmpt);
 400   gcflag = false;
 401   GC_SEND_MSG_1_TO_CLIENT(GCFINISH);
 402
 403   gc_status_info.gcprocessing = false;
 404   if(gcflag) {
 405     // inform other cores to stop and wait for gc
 406     gcprecheck = true;
 407     for(int i = 0; i < NUMCORESACTIVE; i++) {
 408       // reuse the gcnumsendobjs & gcnumreceiveobjs
 409       gcnumsendobjs[0][i] = 0;
 410       gcnumreceiveobjs[0][i] = 0;
 411     }
 412     GC_SEND_MSG_1_TO_CLIENT(GCSTARTPRE);
 413   }
 414 }
 415
 416 void gc_master(struct garbagelist * stackptr) {
 417   tprintf("start GC !!!!!!!!!!!!! \n");
 418   gc_status_info.gcprocessing = true;
 419   gc_status_info.gcphase = INITPHASE;
 420
 421   waitconfirm = false;
 422   numconfirm = 0;
 423   initGC();
 424   GC_SEND_MSG_1_TO_CLIENT(GCSTARTINIT);
 425   CACHEADAPT_GC(true);
 426   GC_PRINTF("Check core status \n");
 427   GC_CHECK_ALL_CORE_STATUS(true);
 428   GCPROFILE_ITEM();
 429   unsigned long long tmpt = BAMBOO_GET_EXE_TIME();
 430   CACHEADAPT_OUTPUT_CACHE_SAMPLING();
 431   gc_output_cache_policy_time += (BAMBOO_GET_EXE_TIME()-tmpt);
 432
 433   // do mark phase
 434   master_mark(stackptr);
 435
 436   // get large objects from all cores
 437   master_getlargeobjs();
 438
 439   // compact the heap
 440   master_compact();
 441
 442   // update the references
 443   master_updaterefs(stackptr);
 444
 445   // do cache adaptation
 446   CACHEADAPT_PHASE_MASTER();
 447
 448   // do finish up stuff
 449   master_finish();
 450
 451   GC_PRINTF("gc finished   \n");
 452   tprintf("finish GC ! %d \n",gcflag);
 453 }
 454
 455 void pregccheck() {
 456   while(true) {
 457     BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
 458     gcnumsendobjs[0][BAMBOO_NUM_OF_CORE] = self_numsendobjs;
 459     gcnumreceiveobjs[0][BAMBOO_NUM_OF_CORE] = self_numreceiveobjs;
 460     int sumsendobj = 0;
 461     for(int i = 0; i < NUMCORESACTIVE; i++) {
 462       sumsendobj += gcnumsendobjs[0][i];
 463     }
 464     for(int i = 0; i < NUMCORESACTIVE; i++) {
 465       sumsendobj -= gcnumreceiveobjs[0][i];
 466     }
 467     if(0 != sumsendobj) {
 468       // there were still some msgs on the fly, wait until there
 469       // are some update pregc information coming and check it again
 470       gcprecheck = false;
 471       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 472
 473       while(!gcprecheck) ;
 474     } else {
 475       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 476       return;
 477     }
 478   }
 479 }
 480
 481 void pregcprocessing() {
 482 #if defined(GC_CACHE_ADAPT)&&defined(GC_CACHE_SAMPLING)
 483   // disable the timer interrupt
 484   bamboo_mask_timer_intr();
 485 #endif
 486 #if defined(GC_CACHE_ADAPT)&&defined(GC_CACHE_SAMPLING)
 487   // get the sampling data
 488   bamboo_output_dtlb_sampling();
 489 #endif
 490 }
 491
 492 void postgcprocessing() {
 493 #if defined(GC_CACHE_ADAPT)&&defined(GC_CACHE_SAMPLING)
 494   // enable the timer interrupt
 495   bamboo_tile_timer_set_next_event(GC_TILE_TIMER_EVENT_SETTING);
 496   bamboo_unmask_timer_intr();
 497 #endif
 498 }
 499
 500 bool gc(struct garbagelist * stackptr) {
 501   // check if do gc
 502   if(!gcflag) {
 503     gc_status_info.gcprocessing = false;
 504     return false;
 505   }
 506
 507   // core coordinator routine
 508   if(0 == BAMBOO_NUM_OF_CORE) {
 509     GC_PRINTF("Check if we can do gc or not\n");
 510     gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
 511     if(!gc_checkCoreStatus()) {
 512       // some of the cores are still executing the mutator and did not reach
 513       // some gc safe point, therefore it is not ready to do gc
 514       gcflag = true;
 515       return false;
 516     } else {
 517       GCPROFILE_START();
 518       pregccheck();
 519     }
 520     GC_PRINTF("start gc! \n");
 521     pregcprocessing();
 522     gc_master(stackptr);
 523   } else if(BAMBOO_NUM_OF_CORE < NUMCORES4GC) {
 524     pregcprocessing();
 525     gc_collect(stackptr);
 526   } else {
 527     pregcprocessing();
 528     gc_nocollect(stackptr);
 529   }
 530   postgcprocessing();
 531
 532   return true;
 533 }
 534
 535 #endif