Robust/src/Runtime/bamboo/multicoregarbage.c

   1 #ifdef MULTICORE_GC
   2 #include "runtime.h"
   3 #include "multicoreruntime.h"
   4 #include "multicoregarbage.h"
   5 #include "multicoregcmark.h"
   6 #include "multicoregccompact.h"
   7 #include "multicoregcflush.h"
   8 #include "multicoregcprofile.h"
   9 #include "gcqueue.h"
  10 #include "multicoremem_helper.h"
  11 #include "bambooalign.h"
  12
  13 volatile bool gcflag;
  14 gc_status_t gc_status_info;
  15
  16 unsigned long long gc_output_cache_policy_time=0;
  17
  18 #ifdef GC_DEBUG
  19 // dump whole mem in blocks
  20 void dumpSMem() {
  21   int block = 0;
  22   int sblock = 0;
  23   unsigned int j = 0;
  24   unsigned int i = 0;
  25   int coren = 0;
  26   int x = 0;
  27   int y = 0;
  28   printf("(%x,%x) Dump shared mem: \n",udn_tile_coord_x(),udn_tile_coord_y());
  29   // reserved blocks for sblocktbl
  30   printf("(%x,%x) ++++ reserved sblocks ++++ \n", udn_tile_coord_x(),
  31          udn_tile_coord_y());
  32   for(i=BAMBOO_BASE_VA; i<gcbaseva; i+= 4*16) {
  33     printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
  34         udn_tile_coord_x(), udn_tile_coord_y(),
  35         *((int *)(i)), *((int *)(i + 4)),
  36         *((int *)(i + 4*2)), *((int *)(i + 4*3)),
  37         *((int *)(i + 4*4)), *((int *)(i + 4*5)),
  38         *((int *)(i + 4*6)), *((int *)(i + 4*7)),
  39         *((int *)(i + 4*8)), *((int *)(i + 4*9)),
  40         *((int *)(i + 4*10)), *((int *)(i + 4*11)),
  41         *((int *)(i + 4*12)), *((int *)(i + 4*13)),
  42         *((int *)(i + 4*14)), *((int *)(i + 4*15)));
  43   }
  44   sblock = 0;
  45   bool advanceblock = false;
  46   // remaining memory
  47   for(i=gcbaseva; (unsigned int)i<(unsigned int)(gcbaseva+BAMBOO_SHARED_MEM_SIZE); i+=4*16) {
  48     advanceblock = false;
  49     // computing sblock # and block #, core coordinate (x,y) also
  50     if(j%((BAMBOO_SMEM_SIZE)/(4*16)) == 0) {
  51       // finished a sblock
  52       if(j < ((BAMBOO_LARGE_SMEM_BOUND)/(4*16))) {
  53         if((j > 0) && (j%((BAMBOO_SMEM_SIZE_L)/(4*16)) == 0)) {
  54           // finished a block
  55           block++;
  56           advanceblock = true;
  57         }
  58       } else {
  59         // finished a block
  60         block++;
  61         advanceblock = true;
  62       }
  63       // compute core #
  64       if(advanceblock) {
  65         coren = gc_block2core[block%(NUMCORES4GC*2)];
  66       }
  67       // compute core coordinate
  68       x = BAMBOO_COORDS_X(coren);
  69       y = BAMBOO_COORDS_Y(coren);
  70       printf("(%x,%x) ==== %d, %d : core (%d,%d), saddr %x====\n",
  71           udn_tile_coord_x(), udn_tile_coord_y(),block, sblock++, x, y,
  72           (sblock-1)*(BAMBOO_SMEM_SIZE)+gcbaseva);
  73     }
  74     j++;
  75     printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
  76         udn_tile_coord_x(), udn_tile_coord_y(),
  77         *((int *)(i)), *((int *)(i + 4)),
  78         *((int *)(i + 4*2)), *((int *)(i + 4*3)),
  79         *((int *)(i + 4*4)), *((int *)(i + 4*5)),
  80         *((int *)(i + 4*6)), *((int *)(i + 4*7)),
  81         *((int *)(i + 4*8)), *((int *)(i + 4*9)),
  82         *((int *)(i + 4*10)), *((int *)(i + 4*11)),
  83         *((int *)(i + 4*12)), *((int *)(i + 4*13)),
  84         *((int *)(i + 4*14)), *((int *)(i + 4*15)));
  85   }
  86   printf("(%x,%x) \n", udn_tile_coord_x(), udn_tile_coord_y());
  87 }
  88 #endif
  89
  90 bool gc_checkCoreStatus() {
  91   for(int i = 0; i < NUMCORES4GC; i++) {
  92     if(gccorestatus[i]) {
  93       return false;
  94     }
  95   }
  96   return true;
  97 }
  98
  99 void gc_resetCoreStatus() {
 100   for(int i = 0; i < NUMCORES4GC; i++) {
 101     gccorestatus[i] = 1;
 102   }
 103 }
 104
 105
 106 void initmulticoregcdata() {
 107   bamboo_smem_zero_top = NULL;
 108   gcflag = false;
 109   gc_status_info.gcprocessing = false;
 110   gc_status_info.gcphase = FINISHPHASE;
 111
 112   gcprecheck = true;
 113   gcforwardobjtbl = allocateMGCHash_I(128);
 114 #ifdef MGC_SPEC
 115   gc_profile_flag = false;
 116 #endif
 117 #ifdef GC_CACHE_ADAPT
 118   gccachestage = false;
 119 #endif
 120
 121   if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
 122     allocationinfo.blocktable=RUNMALLOC(sizeof(struct blockrecord)*GCNUMBLOCK);
 123     for(int i=0; i<GCNUMBLOCK;i++) {
 124       if (1==NUMCORES4GC)
 125         allocationinfo.blocktable[i].corenum=0;
 126       else
 127         allocationinfo.blocktable[i].corenum=gc_block2core[(i%(NUMCORES4GC*2))];
 128       allocationinfo.blocktable[i].status=BS_FREE;
 129       allocationinfo.blocktable[i].usedspace=0;
 130       allocationinfo.blocktable[i].freespace=GLOBALBLOCKSIZE(i);
 131     }
 132     buildCore2Test();
 133   }
 134
 135   //initialize update structures
 136   origarraycount=0;
 137   for(int i=0;i<NUMCORES4GC;i++) {
 138     origblockarray[i]=NULL;
 139   }
 140
 141   INIT_MULTICORE_GCPROFILE_DATA();
 142 }
 143
 144 void dismulticoregcdata() {
 145   freeMGCHash(gcforwardobjtbl);
 146 }
 147
 148 void initGC() {
 149   if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
 150     for(int i = 0; i < NUMCORES4GC; i++) {
 151       gccorestatus[i] = 1;
 152       gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
 153       gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
 154       gcloads[i] = 0;
 155       gcrequiredmems[i] = 0;
 156     }
 157     for(int i = NUMCORES4GC; i < NUMCORESACTIVE; i++) {
 158       gccorestatus[i] = 1;
 159       gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
 160       gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
 161     }
 162     gcnumsrobjs_index = 0;
 163   }
 164   gcself_numsendobjs = 0;
 165   gcself_numreceiveobjs = 0;
 166   gcmovestartaddr = 0;
 167   gctomove = false;
 168   gcblock2fill = 0;
 169   gcmovepending = 0;
 170   gccurr_heaptop = 0;
 171   update_origblockptr=NULL;
 172   gc_queueinit();
 173
 174   MGCHashreset(gcforwardobjtbl);
 175
 176   GCPROFILE_INIT();
 177   gc_output_cache_policy_time=0;
 178 }
 179
 180 void checkMarkStatus_p2() {
 181   //  tprintf("Check mark status 2\n");
 182   // check if the sum of send objs and receive obj are the same
 183   // yes->check if the info is the latest; no->go on executing
 184   unsigned int sumsendobj = 0;
 185   for(int i = 0; i < NUMCORESACTIVE; i++) {
 186     sumsendobj += gcnumsendobjs[gcnumsrobjs_index][i];
 187   }
 188   for(int i = 0; i < NUMCORESACTIVE; i++) {
 189     sumsendobj -= gcnumreceiveobjs[gcnumsrobjs_index][i];
 190   }
 191   if(0 == sumsendobj) {
 192     // Check if there are changes of the numsendobjs or numreceiveobjs
 193     // on each core
 194     int i = 0;
 195     for(i = 0; i < NUMCORESACTIVE; i++) {
 196       if((gcnumsendobjs[0][i]!=gcnumsendobjs[1][i])||(gcnumreceiveobjs[0][i]!=gcnumreceiveobjs[1][i]) ) {
 197         break;
 198       }
 199     }
 200     if(i == NUMCORESACTIVE) {
 201       //tprintf("Mark terminated\n");
 202       // all the core status info are the latest,stop mark phase
 203       gc_status_info.gcphase = COMPACTPHASE;
 204       // restore the gcstatus for all cores
 205       for(int i = 0; i < NUMCORESACTIVE; i++) {
 206         gccorestatus[i] = 1;
 207       }
 208     } else {
 209       // There were changes between phase 1 and phase 2, can not decide
 210       // whether the mark phase has been finished
 211       waitconfirm = false;
 212       // As it fails in phase 2, flip the entries
 213       gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
 214     }
 215   } else {
 216     // There were changes between phase 1 and phase 2, can not decide
 217     // whether the mark phase has been finished
 218     waitconfirm = false;
 219     // As it fails in phase 2, flip the entries
 220     gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
 221   }
 222 }
 223
 224 void checkMarkStatus() {
 225   //  tprintf("Check mark status\n");
 226   if((!waitconfirm)||(waitconfirm && (numconfirm == 0))) {
 227     unsigned int entry_index = 0;
 228     if(waitconfirm) {
 229       // phase 2
 230       entry_index = (gcnumsrobjs_index == 0) ? 1 : 0;
 231     } else {
 232       // phase 1
 233       entry_index = gcnumsrobjs_index;
 234     }
 235     BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
 236     // check the status of all cores
 237     if (gc_checkCoreStatus()) {
 238       // ask for confirm
 239       if(!waitconfirm) {
 240         // the first time found all cores stall
 241         // send out status confirm msg to all other cores
 242         // reset the corestatus array too
 243         waitconfirm = true;
 244         numconfirm = NUMCORESACTIVE - 1;
 245         BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 246         GC_SEND_MSG_1_TO_CLIENT(GCMARKCONFIRM);
 247       } else {
 248         // Phase 2
 249         checkMarkStatus_p2();
 250         BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 251       }
 252     } else {
 253       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 254     }
 255   }
 256 }
 257
 258 // compute load balance for all cores
 259 int loadbalance() {
 260   // compute load balance
 261   // get the total loads
 262   void * heaptop;
 263   unsigned int tloads = 0;
 264   for(int i = 0; i < NUMCORES4GC; i++) {
 265     tloads += gcloads[i];
 266     //tprintf("load: %d %d \n", gcloads[i], i);
 267   }
 268   heaptop = gcbaseva + tloads;
 269
 270   unsigned int topblockindex;
 271
 272   BLOCKINDEX(topblockindex, heaptop);
 273   // num of blocks per core
 274   unsigned int numbpc = (topblockindex+NUMCORES4GC-1)/NUMCORES4GC;
 275
 276   return numbpc;
 277 }
 278
 279 void gc_collect(struct garbagelist * stackptr) {
 280   gc_status_info.gcprocessing = true;
 281   // inform the master that this core is at a gc safe point and is ready to
 282   // do gc
 283   send_msg_4(STARTUPCORE,GCFINISHPRE,BAMBOO_NUM_OF_CORE,self_numsendobjs,self_numreceiveobjs);
 284
 285   // core collector routine
 286   //wait for init phase
 287   WAITFORGCPHASE(INITPHASE);
 288
 289   GC_PRINTF("Do initGC\n");
 290   initGC();
 291   CACHEADAPT_GC(true);
 292   //send init finish msg to core coordinator
 293   send_msg_2(STARTUPCORE,GCFINISHINIT,BAMBOO_NUM_OF_CORE);
 294
 295   //wait for mark phase
 296   WAITFORGCPHASE(MARKPHASE);
 297
 298   GC_PRINTF("Start mark phase\n");
 299   mark(stackptr);
 300   GC_PRINTF("Finish mark phase, start compact phase\n");
 301   compact();
 302   GC_PRINTF("Finish compact phase\n");
 303
 304   WAITFORGCPHASE(UPDATEPHASE);
 305
 306   GC_PRINTF("Start update phase\n");
 307   GCPROFILE_INFO_2_MASTER();
 308   update(stackptr);
 309   GC_PRINTF("Finish update phase\n");
 310
 311   CACHEADAPT_PHASE_CLIENT();
 312
 313   // invalidate all shared mem pointers
 314   bamboo_cur_msp = NULL;
 315   bamboo_smem_size = 0;
 316   bamboo_smem_zero_top = NULL;
 317   gcflag = false;
 318
 319   WAITFORGCPHASE(FINISHPHASE);
 320
 321   GC_PRINTF("Finish gc! \n");
 322 }
 323
 324 void gc_nocollect(struct garbagelist * stackptr) {
 325   gc_status_info.gcprocessing = true;
 326   // inform the master that this core is at a gc safe point and is ready to
 327   // do gc
 328   send_msg_4(STARTUPCORE,GCFINISHPRE,BAMBOO_NUM_OF_CORE,self_numsendobjs,self_numreceiveobjs);
 329
 330   WAITFORGCPHASE(INITPHASE);
 331
 332   GC_PRINTF("Do initGC\n");
 333   initGC();
 334   CACHEADAPT_GC(true);
 335   //send init finish msg to core coordinator
 336   send_msg_2(STARTUPCORE,GCFINISHINIT,BAMBOO_NUM_OF_CORE);
 337
 338   WAITFORGCPHASE(MARKPHASE);
 339
 340   GC_PRINTF("Start mark phase\n");
 341   mark(stackptr);
 342   GC_PRINTF("Finish mark phase, wait for update\n");
 343
 344   // non-gc core collector routine
 345   WAITFORGCPHASE(UPDATEPHASE);
 346
 347   GC_PRINTF("Start update phase\n");
 348   GCPROFILE_INFO_2_MASTER();
 349   update(stackptr);
 350   GC_PRINTF("Finish update phase\n");
 351
 352   CACHEADAPT_PHASE_CLIENT();
 353
 354   // invalidate all shared mem pointers
 355   bamboo_cur_msp = NULL;
 356   bamboo_smem_size = 0;
 357   bamboo_smem_zero_top = NULL;
 358
 359   gcflag = false;
 360   WAITFORGCPHASE(FINISHPHASE);
 361
 362   GC_PRINTF("Finish gc! \n");
 363 }
 364
 365 void master_mark(struct garbagelist *stackptr) {
 366
 367   GC_PRINTF("Start mark phase \n");
 368   gc_status_info.gcphase = MARKPHASE;
 369   GC_SEND_MSG_1_TO_CLIENT(GCSTART);
 370   // mark phase
 371
 372   mark(stackptr);
 373 }
 374
 375 void master_getlargeobjs() {
 376   // send msgs to all cores requiring large objs info
 377   // Note: only need to ask gc cores, non-gc cores do not host any objs
 378   numconfirm = NUMCORES4GC - 1;
 379   for(int i = 1; i < NUMCORES4GC; i++) {
 380     send_msg_1(i,GCLOBJREQUEST);
 381   }
 382   gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
 383   //spin until we have all responses
 384   while(numconfirm!=0) ;
 385
 386   GCPROFILE_ITEM();
 387   GC_PRINTF("prepare to cache large objs \n");
 388
 389 }
 390
 391
 392 void master_updaterefs(struct garbagelist * stackptr) {
 393   gc_status_info.gcphase = UPDATEPHASE;
 394   GC_SEND_MSG_1_TO_CLIENT(GCSTARTUPDATE);
 395   GCPROFILE_ITEM();
 396   GC_PRINTF("Start update phase \n");
 397   // update phase
 398   update(stackptr);
 399   GC_CHECK_ALL_CORE_STATUS();
 400   GC_PRINTF("Finish update phase \n");
 401 }
 402
 403 void master_finish() {
 404   gc_status_info.gcphase = FINISHPHASE;
 405
 406   // invalidate all shared mem pointers
 407   // put it here as it takes time to inform all the other cores to
 408   // finish gc and it might cause problem when some core resumes
 409   // mutator earlier than the other cores
 410   bamboo_cur_msp = NULL;
 411   bamboo_smem_size = 0;
 412   bamboo_smem_zero_top = NULL;
 413
 414   GCPROFILE_END();
 415   unsigned long long tmpt = BAMBOO_GET_EXE_TIME();
 416   CACHEADAPT_OUTPUT_CACHE_POLICY();
 417   gc_output_cache_policy_time += (BAMBOO_GET_EXE_TIME()-tmpt);
 418   gcflag = false;
 419
 420   GC_SEND_MSG_1_TO_CLIENT(GCFINISH);
 421   gc_status_info.gcprocessing = false;
 422
 423   if(gcflag) {
 424     // inform other cores to stop and wait for gc
 425     GC_PRINTF("Back to Back gc case\n");
 426     gcprecheck = true;
 427     for(int i = 0; i < NUMCORESACTIVE; i++) {
 428       // reuse the gcnumsendobjs & gcnumreceiveobjs
 429       gcnumsendobjs[0][i] = 0;
 430       gcnumreceiveobjs[0][i] = 0;
 431     }
 432     GC_SEND_MSG_1_TO_CLIENT(GCSTARTPRE);
 433   }
 434 }
 435
 436 void gc_master(struct garbagelist * stackptr) {
 437   tprintf("start GC!\n");
 438   gc_status_info.gcprocessing = true;
 439   gc_status_info.gcphase = INITPHASE;
 440
 441   waitconfirm = false;
 442   numconfirm = 0;
 443   initGC();
 444   GC_SEND_MSG_1_TO_CLIENT(GCSTARTINIT);
 445   CACHEADAPT_GC(true);
 446   //tprintf("Check core status \n");
 447   GC_CHECK_ALL_CORE_STATUS();
 448   GCPROFILE_ITEM();
 449   unsigned long long tmpt = BAMBOO_GET_EXE_TIME();
 450   CACHEADAPT_OUTPUT_CACHE_SAMPLING();
 451   gc_output_cache_policy_time += (BAMBOO_GET_EXE_TIME()-tmpt);
 452   //tprintf("start mark phase\n");
 453   // do mark phase
 454   master_mark(stackptr);
 455   //tprintf("finish mark phase\n");
 456   // get large objects from all cores
 457   master_getlargeobjs();
 458   //tprintf("start compact phase\n");
 459   // compact the heap
 460   master_compact();
 461   //tprintf("start update phase\n");
 462   // update the references
 463   master_updaterefs(stackptr);
 464   //tprintf("gc master finished update   \n");
 465   // do cache adaptation
 466   CACHEADAPT_PHASE_MASTER();
 467   //tprintf("finish cachdapt phase\n");
 468   // do finish up stuff
 469 #ifdef GC_DEBUG
 470   for(int i=0;i<GCNUMBLOCK;i++) {
 471     struct blockrecord *record=&allocationinfo.blocktable[i];
 472     tprintf("%u. used=%u free=%u corenum=%u status=%u, base=%x, ptr=%x\n", i, record->usedspace, record->freespace, record->corenum, record->status, gcbaseva+OFFSET2BASEVA(i), (gcbaseva+OFFSET2BASEVA(i)+record->usedspace));
 473   }
 474 #endif
 475
 476   master_finish();
 477
 478   //tprintf("finish GC ! %d \n",gcflag);
 479 }
 480
 481 void pregccheck() {
 482   while(true) {
 483     BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
 484     gcnumsendobjs[0][BAMBOO_NUM_OF_CORE] = self_numsendobjs;
 485     gcnumreceiveobjs[0][BAMBOO_NUM_OF_CORE] = self_numreceiveobjs;
 486     int sumsendobj = 0;
 487     for(int i = 0; i < NUMCORESACTIVE; i++) {
 488       sumsendobj += gcnumsendobjs[0][i];
 489     }
 490     for(int i = 0; i < NUMCORESACTIVE; i++) {
 491       sumsendobj -= gcnumreceiveobjs[0][i];
 492     }
 493     if(0 != sumsendobj) {
 494       // there were still some msgs on the fly, wait until there
 495       // are some update pregc information coming and check it again
 496       gcprecheck = false;
 497       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 498
 499       while(!gcprecheck) ;
 500     } else {
 501       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 502       return;
 503     }
 504   }
 505 }
 506
 507 void pregcprocessing() {
 508 #if defined(GC_CACHE_ADAPT)&&defined(GC_CACHE_SAMPLING)
 509   // disable the timer interrupt
 510   bamboo_mask_timer_intr();
 511 #endif
 512 #if defined(GC_CACHE_ADAPT)&&defined(GC_CACHE_SAMPLING)
 513   // get the sampling data
 514   bamboo_output_dtlb_sampling();
 515 #endif
 516 }
 517
 518 void postgcprocessing() {
 519 #if defined(GC_CACHE_ADAPT)&&defined(GC_CACHE_SAMPLING)
 520   // enable the timer interrupt
 521   bamboo_tile_timer_set_next_event(GC_TILE_TIMER_EVENT_SETTING);
 522   bamboo_unmask_timer_intr();
 523 #endif
 524 }
 525
 526 bool gc(struct garbagelist * stackptr) {
 527   // check if do gc
 528   if(!gcflag) {
 529     gc_status_info.gcprocessing = false;
 530     return false;
 531   }
 532
 533   // core coordinator routine
 534   if(0 == BAMBOO_NUM_OF_CORE) {
 535     GC_PRINTF("Check if we can do gc or not\n");
 536     gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
 537
 538     //wait for other cores to catch up
 539     while(!gc_checkCoreStatus())
 540       ;
 541
 542     GCPROFILE_START();
 543     pregccheck();
 544     GC_PRINTF("start gc! \n");
 545     pregcprocessing();
 546     gc_master(stackptr);
 547   } else if(BAMBOO_NUM_OF_CORE < NUMCORES4GC) {
 548     GC_PRINTF("Core reporting for gc.\n");
 549     pregcprocessing();
 550     gc_collect(stackptr);
 551   } else {
 552     pregcprocessing();
 553     gc_nocollect(stackptr);
 554   }
 555   postgcprocessing();
 556   return true;
 557 }
 558
 559 #endif