Robust/src/Runtime/bamboo/multicoregarbage.c

   1 #ifdef MULTICORE_GC
   2 #include "runtime.h"
   3 #include "multicoreruntime.h"
   4 #include "multicoregarbage.h"
   5 #include "multicoregcmark.h"
   6 #include "multicoregccompact.h"
   7 #include "multicoregcflush.h"
   8 #include "multicoregcprofile.h"
   9 #include "gcqueue.h"
  10 #include "multicoremem_helper.h"
  11 #include "bambooalign.h"
  12 #ifdef PERFCOUNT
  13 #include "bme_perf_counter.h"
  14 #endif
  15
  16 volatile bool gcflag;
  17 gc_status_t gc_status_info;
  18
  19 unsigned long long gc_output_cache_policy_time=0;
  20
  21 #ifdef GC_DEBUG
  22 // dump whole mem in blocks
  23 void dumpSMem() {
  24   int block = 0;
  25   int sblock = 0;
  26   unsigned int j = 0;
  27   unsigned int i = 0;
  28   int coren = 0;
  29   int x = 0;
  30   int y = 0;
  31   printf("(%x,%x) Dump shared mem: \n",udn_tile_coord_x(),udn_tile_coord_y());
  32   // reserved blocks for sblocktbl
  33   printf("(%x,%x) ++++ reserved sblocks ++++ \n", udn_tile_coord_x(),
  34          udn_tile_coord_y());
  35   for(i=BAMBOO_BASE_VA; i<gcbaseva; i+= 4*16) {
  36     printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
  37         udn_tile_coord_x(), udn_tile_coord_y(),
  38         *((int *)(i)), *((int *)(i + 4)),
  39         *((int *)(i + 4*2)), *((int *)(i + 4*3)),
  40         *((int *)(i + 4*4)), *((int *)(i + 4*5)),
  41         *((int *)(i + 4*6)), *((int *)(i + 4*7)),
  42         *((int *)(i + 4*8)), *((int *)(i + 4*9)),
  43         *((int *)(i + 4*10)), *((int *)(i + 4*11)),
  44         *((int *)(i + 4*12)), *((int *)(i + 4*13)),
  45         *((int *)(i + 4*14)), *((int *)(i + 4*15)));
  46   }
  47   sblock = 0;
  48   bool advanceblock = false;
  49   // remaining memory
  50   for(i=gcbaseva; (unsigned int)i<(unsigned int)(gcbaseva+BAMBOO_SHARED_MEM_SIZE); i+=4*16) {
  51     advanceblock = false;
  52     // computing sblock # and block #, core coordinate (x,y) also
  53     if(j%((BAMBOO_SMEM_SIZE)/(4*16)) == 0) {
  54       // finished a sblock
  55       if(j < ((BAMBOO_LARGE_SMEM_BOUND)/(4*16))) {
  56         if((j > 0) && (j%((BAMBOO_SMEM_SIZE_L)/(4*16)) == 0)) {
  57           // finished a block
  58           block++;
  59           advanceblock = true;
  60         }
  61       } else {
  62         // finished a block
  63         block++;
  64         advanceblock = true;
  65       }
  66       // compute core #
  67       if(advanceblock) {
  68         coren = gc_block2core[block%(NUMCORES4GC*2)];
  69       }
  70       // compute core coordinate
  71       x = BAMBOO_COORDS_X(coren);
  72       y = BAMBOO_COORDS_Y(coren);
  73       printf("(%x,%x) ==== %d, %d : core (%d,%d), saddr %x====\n",
  74           udn_tile_coord_x(), udn_tile_coord_y(),block, sblock++, x, y,
  75           (sblock-1)*(BAMBOO_SMEM_SIZE)+gcbaseva);
  76     }
  77     j++;
  78     printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
  79         udn_tile_coord_x(), udn_tile_coord_y(),
  80         *((int *)(i)), *((int *)(i + 4)),
  81         *((int *)(i + 4*2)), *((int *)(i + 4*3)),
  82         *((int *)(i + 4*4)), *((int *)(i + 4*5)),
  83         *((int *)(i + 4*6)), *((int *)(i + 4*7)),
  84         *((int *)(i + 4*8)), *((int *)(i + 4*9)),
  85         *((int *)(i + 4*10)), *((int *)(i + 4*11)),
  86         *((int *)(i + 4*12)), *((int *)(i + 4*13)),
  87         *((int *)(i + 4*14)), *((int *)(i + 4*15)));
  88   }
  89   printf("(%x,%x) \n", udn_tile_coord_x(), udn_tile_coord_y());
  90 }
  91 #endif
  92
  93 bool gc_checkCoreStatus() {
  94   for(int i = 0; i < NUMCORES4GC; i++) {
  95     if(gccorestatus[i]) {
  96       return false;
  97     }
  98   }
  99   return true;
 100 }
 101
 102 void gc_resetCoreStatus() {
 103   for(int i = 0; i < NUMCORES4GC; i++) {
 104     gccorestatus[i] = 1;
 105   }
 106 }
 107
 108
 109 void initmulticoregcdata() {
 110   numGCs = 0;
 111   GCtime = 0;
 112   bamboo_smem_zero_top = NULL;
 113   gcflag = false;
 114   gc_status_info.gcprocessing = false;
 115   gc_status_info.gcphase = FINISHPHASE;
 116
 117   gcprecheck = true;
 118   gcforwardobjtbl = allocateMGCHash_I(128);
 119 #ifdef MGC_SPEC
 120   gc_profile_flag = false;
 121 #endif
 122
 123   if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
 124     allocationinfo.blocktable=RUNMALLOC(sizeof(struct blockrecord)*GCNUMBLOCK);
 125     for(int i=0; i<GCNUMBLOCK;i++) {
 126       if (1==NUMCORES4GC)
 127         allocationinfo.blocktable[i].corenum=0;
 128       else
 129         allocationinfo.blocktable[i].corenum=gc_block2core[(i%(NUMCORES4GC*2))];
 130       allocationinfo.blocktable[i].status=BS_FREE;
 131       allocationinfo.blocktable[i].usedspace=0;
 132       allocationinfo.blocktable[i].freespace=GLOBALBLOCKSIZE(i);
 133     }
 134     buildCore2Test();
 135   }
 136
 137   //initialize update structures
 138   origarraycount=0;
 139   for(int i=0;i<NUMCORES4GC;i++) {
 140     origblockarray[i]=NULL;
 141   }
 142
 143   INIT_MULTICORE_GCPROFILE_DATA();
 144 }
 145
 146 void dismulticoregcdata() {
 147   freeMGCHash(gcforwardobjtbl);
 148 }
 149
 150 void initGC() {
 151   if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
 152     for(int i = 0; i < NUMCORES4GC; i++) {
 153       gccorestatus[i] = 1;
 154       gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
 155       gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
 156       gcloads[i] = 0;
 157       gcrequiredmems[i] = 0;
 158     }
 159     for(int i = NUMCORES4GC; i < NUMCORESACTIVE; i++) {
 160       gccorestatus[i] = 1;
 161       gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
 162       gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
 163     }
 164     gcnumsrobjs_index = 0;
 165   }
 166   gcself_numsendobjs = 0;
 167   gcself_numreceiveobjs = 0;
 168   gcmovestartaddr = 0;
 169   gctomove = false;
 170   gcblock2fill = 0;
 171   gcmovepending = 0;
 172   gccurr_heaptop = 0;
 173   update_origblockptr=NULL;
 174   gc_queueinit();
 175
 176   MGCHashreset(gcforwardobjtbl);
 177
 178   GCPROFILE_INIT();
 179   gc_output_cache_policy_time=0;
 180 }
 181
 182 void checkMarkStatus_p2() {
 183   //  tprintf("Check mark status 2\n");
 184   // check if the sum of send objs and receive obj are the same
 185   // yes->check if the info is the latest; no->go on executing
 186   unsigned int sumsendobj = 0;
 187   for(int i = 0; i < NUMCORESACTIVE; i++) {
 188     sumsendobj += gcnumsendobjs[gcnumsrobjs_index][i];
 189   }
 190   for(int i = 0; i < NUMCORESACTIVE; i++) {
 191     sumsendobj -= gcnumreceiveobjs[gcnumsrobjs_index][i];
 192   }
 193   if(0 == sumsendobj) {
 194     // Check if there are changes of the numsendobjs or numreceiveobjs
 195     // on each core
 196     int i = 0;
 197     for(i = 0; i < NUMCORESACTIVE; i++) {
 198       if((gcnumsendobjs[0][i]!=gcnumsendobjs[1][i])||(gcnumreceiveobjs[0][i]!=gcnumreceiveobjs[1][i]) ) {
 199         break;
 200       }
 201     }
 202     if(i == NUMCORESACTIVE) {
 203       //tprintf("Mark terminated\n");
 204       // all the core status info are the latest,stop mark phase
 205       gc_status_info.gcphase = COMPACTPHASE;
 206       // restore the gcstatus for all cores
 207       for(int i = 0; i < NUMCORESACTIVE; i++) {
 208         gccorestatus[i] = 1;
 209       }
 210     } else {
 211       // There were changes between phase 1 and phase 2, can not decide
 212       // whether the mark phase has been finished
 213       waitconfirm = false;
 214       // As it fails in phase 2, flip the entries
 215       gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
 216     }
 217   } else {
 218     // There were changes between phase 1 and phase 2, can not decide
 219     // whether the mark phase has been finished
 220     waitconfirm = false;
 221     // As it fails in phase 2, flip the entries
 222     gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
 223   }
 224 }
 225
 226 void checkMarkStatus() {
 227   //  tprintf("Check mark status\n");
 228   if((!waitconfirm)||(waitconfirm && (numconfirm == 0))) {
 229     unsigned int entry_index = 0;
 230     if(waitconfirm) {
 231       // phase 2
 232       entry_index = (gcnumsrobjs_index == 0) ? 1 : 0;
 233     } else {
 234       // phase 1
 235       entry_index = gcnumsrobjs_index;
 236     }
 237     BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
 238     // check the status of all cores
 239     if (gc_checkCoreStatus()) {
 240       // ask for confirm
 241       if(!waitconfirm) {
 242         // the first time found all cores stall
 243         // send out status confirm msg to all other cores
 244         // reset the corestatus array too
 245         waitconfirm = true;
 246         numconfirm = NUMCORESACTIVE - 1;
 247         BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 248         GC_SEND_MSG_1_TO_CLIENT(GCMARKCONFIRM);
 249       } else {
 250         // Phase 2
 251         checkMarkStatus_p2();
 252         BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 253       }
 254     } else {
 255       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 256     }
 257   }
 258 }
 259
 260 // compute load balance for all cores
 261 int loadbalance() {
 262   // compute load balance
 263   // get the total loads
 264   void * heaptop;
 265   unsigned int tloads = 0;
 266   for(int i = 0; i < NUMCORES4GC; i++) {
 267     tloads += gcloads[i];
 268     //tprintf("load: %d %d \n", gcloads[i], i);
 269   }
 270   heaptop = gcbaseva + tloads;
 271
 272   unsigned int topblockindex;
 273
 274   BLOCKINDEX(topblockindex, heaptop);
 275   // num of blocks per core
 276   unsigned int numbpc = (topblockindex+NUMCORES4GC-1)/NUMCORES4GC;
 277
 278   return numbpc;
 279 }
 280
 281 void gc_collect(struct garbagelist * stackptr) {
 282   gc_status_info.gcprocessing = true;
 283   // inform the master that this core is at a gc safe point and is ready to
 284   // do gc
 285   send_msg_4(STARTUPCORE,GCFINISHPRE,BAMBOO_NUM_OF_CORE,self_numsendobjs,self_numreceiveobjs);
 286
 287   // invalidate all shared mem pointers
 288   bamboo_cur_msp = NULL;
 289   bamboo_smem_size = 0;
 290   bamboo_smem_zero_top = NULL;
 291   gcflag = false;
 292
 293
 294   // core collector routine
 295   //wait for init phase
 296   WAITFORGCPHASE(INITPHASE);
 297
 298   GC_PRINTF("Do initGC\n");
 299   initGC();
 300   CACHEADAPT_GC(true);
 301   //send init finish msg to core coordinator
 302   send_msg_2(STARTUPCORE,GCFINISHINIT,BAMBOO_NUM_OF_CORE);
 303
 304   //wait for mark phase
 305   WAITFORGCPHASE(MARKPHASE);
 306
 307   GC_PRINTF("Start mark phase\n");
 308   mark(stackptr);
 309   GC_PRINTF("Finish mark phase, start compact phase\n");
 310   compact();
 311   GC_PRINTF("Finish compact phase\n");
 312
 313   WAITFORGCPHASE(UPDATEPHASE);
 314
 315   GC_PRINTF("Start update phase\n");
 316   GCPROFILE_INFO_2_MASTER();
 317   update(stackptr);
 318   GC_PRINTF("Finish update phase\n");
 319
 320   CACHEADAPT_PHASE_CLIENT();
 321
 322   WAITFORGCPHASE(FINISHPHASE);
 323
 324   GC_PRINTF("Finish gc! \n");
 325 }
 326
 327 void gc_nocollect(struct garbagelist * stackptr) {
 328   gc_status_info.gcprocessing = true;
 329   // inform the master that this core is at a gc safe point and is ready to
 330   // do gc
 331   send_msg_4(STARTUPCORE,GCFINISHPRE,BAMBOO_NUM_OF_CORE,self_numsendobjs,self_numreceiveobjs);
 332
 333   // invalidate all shared mem pointers
 334   bamboo_cur_msp = NULL;
 335   bamboo_smem_size = 0;
 336   bamboo_smem_zero_top = NULL;
 337   gcflag = false;
 338
 339   WAITFORGCPHASE(INITPHASE);
 340
 341   GC_PRINTF("Do initGC\n");
 342   initGC();
 343   CACHEADAPT_GC(true);
 344
 345   //send init finish msg to core coordinator
 346   send_msg_2(STARTUPCORE,GCFINISHINIT,BAMBOO_NUM_OF_CORE);
 347
 348
 349   WAITFORGCPHASE(MARKPHASE);
 350
 351   GC_PRINTF("Start mark phase\n");
 352   mark(stackptr);
 353   GC_PRINTF("Finish mark phase, wait for update\n");
 354
 355   // non-gc core collector routine
 356   WAITFORGCPHASE(UPDATEPHASE);
 357
 358   GC_PRINTF("Start update phase\n");
 359   GCPROFILE_INFO_2_MASTER();
 360   update(stackptr);
 361   GC_PRINTF("Finish update phase\n");
 362
 363   CACHEADAPT_PHASE_CLIENT();
 364
 365   WAITFORGCPHASE(FINISHPHASE);
 366
 367   GC_PRINTF("Finish gc! \n");
 368 }
 369
 370 void master_mark(struct garbagelist *stackptr) {
 371
 372   GC_PRINTF("Start mark phase \n");
 373   gc_status_info.gcphase = MARKPHASE;
 374   GC_SEND_MSG_1_TO_CLIENT(GCSTART);
 375   // mark phase
 376
 377   mark(stackptr);
 378 }
 379
 380 void master_getlargeobjs() {
 381   // send msgs to all cores requiring large objs info
 382   // Note: only need to ask gc cores, non-gc cores do not host any objs
 383   numconfirm = NUMCORES4GC - 1;
 384   for(int i = 1; i < NUMCORES4GC; i++) {
 385     send_msg_1(i,GCLOBJREQUEST);
 386   }
 387   gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
 388   //spin until we have all responses
 389   while(numconfirm!=0) ;
 390
 391   GCPROFILE_ITEM_MASTER();
 392   GC_PRINTF("prepare to cache large objs \n");
 393
 394 }
 395
 396
 397 void master_updaterefs(struct garbagelist * stackptr) {
 398   gc_status_info.gcphase = UPDATEPHASE;
 399   GC_SEND_MSG_1_TO_CLIENT(GCSTARTUPDATE);
 400   GC_PRINTF("Start update phase \n");
 401   // update phase
 402   update(stackptr);
 403   GC_CHECK_ALL_CORE_STATUS();
 404   GC_PRINTF("Finish update phase \n");
 405 }
 406
 407 void master_finish() {
 408   gc_status_info.gcphase = FINISHPHASE;
 409
 410   // invalidate all shared mem pointers
 411   // put it here as it takes time to inform all the other cores to
 412   // finish gc and it might cause problem when some core resumes
 413   // mutator earlier than the other cores
 414   bamboo_cur_msp = NULL;
 415   bamboo_smem_size = 0;
 416   bamboo_smem_zero_top = NULL;
 417
 418   GCPROFILE_END_MASTER();
 419   unsigned long long tmpt = BAMBOO_GET_EXE_TIME();
 420   CACHEADAPT_OUTPUT_CACHE_POLICY();
 421   gc_output_cache_policy_time += (BAMBOO_GET_EXE_TIME()-tmpt);
 422   gcflag = false;
 423
 424   GC_SEND_MSG_1_TO_CLIENT(GCFINISH);
 425   gc_status_info.gcprocessing = false;
 426
 427   if(gcflag) {
 428     // inform other cores to stop and wait for gc
 429     GC_PRINTF("Back to Back gc case\n");
 430     gcprecheck = true;
 431     for(int i = 0; i < NUMCORESACTIVE; i++) {
 432       // reuse the gcnumsendobjs & gcnumreceiveobjs
 433       gcnumsendobjs[0][i] = 0;
 434       gcnumreceiveobjs[0][i] = 0;
 435     }
 436     GC_SEND_MSG_1_TO_CLIENT(GCSTARTPRE);
 437   }
 438 }
 439
 440 void gc_master(struct garbagelist * stackptr) {
 441   tprintf("start GC!\n");
 442   gc_status_info.gcprocessing = true;
 443   gc_status_info.gcphase = INITPHASE;
 444   initGC();
 445
 446   GC_SEND_MSG_1_TO_CLIENT(GCSTARTINIT);
 447
 448   waitconfirm = false;
 449   numconfirm = 0;
 450   CACHEADAPT_GC(true);
 451   //tprintf("Check core status \n");
 452   GC_CHECK_ALL_CORE_STATUS();
 453   unsigned long long tmpt = BAMBOO_GET_EXE_TIME();
 454   CACHEADAPT_OUTPUT_CACHE_SAMPLING();
 455   gc_output_cache_policy_time += (BAMBOO_GET_EXE_TIME()-tmpt);
 456   //tprintf("start mark phase\n");
 457   // do mark phase
 458   GCPROFILE_ITEM_MASTER();
 459   master_mark(stackptr);
 460   GCPROFILE_ITEM_MASTER();
 461   //tprintf("finish mark phase\n");
 462   // get large objects from all cores
 463   master_getlargeobjs();
 464   //tprintf("start compact phase\n");
 465   // compact the heap
 466   master_compact();
 467   //tprintf("start update phase\n");
 468   // update the references
 469   master_updaterefs(stackptr);
 470   //tprintf("gc master finished update   \n");
 471   // do cache adaptation
 472   CACHEADAPT_PHASE_MASTER();
 473   //tprintf("finish cachdapt phase\n");
 474   // do finish up stuff
 475 #ifdef GC_DEBUG
 476   for(int i=0;i<GCNUMBLOCK;i++) {
 477     struct blockrecord *record=&allocationinfo.blocktable[i];
 478     tprintf("%u. used=%u free=%u corenum=%u status=%u, base=%x, ptr=%x\n", i, record->usedspace, record->freespace, record->corenum, record->status, gcbaseva+OFFSET2BASEVA(i), (gcbaseva+OFFSET2BASEVA(i)+record->usedspace));
 479   }
 480 #endif
 481
 482   master_finish();
 483
 484   //tprintf("finish GC ! %d \n",gcflag);
 485 }
 486
 487 void pregccheck() {
 488   while(true) {
 489     BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
 490     gcnumsendobjs[0][BAMBOO_NUM_OF_CORE] = self_numsendobjs;
 491     gcnumreceiveobjs[0][BAMBOO_NUM_OF_CORE] = self_numreceiveobjs;
 492     int sumsendobj = 0;
 493     for(int i = 0; i < NUMCORESACTIVE; i++) {
 494       sumsendobj += gcnumsendobjs[0][i];
 495     }
 496     for(int i = 0; i < NUMCORESACTIVE; i++) {
 497       sumsendobj -= gcnumreceiveobjs[0][i];
 498     }
 499     if(0 != sumsendobj) {
 500       // there were still some msgs on the fly, wait until there
 501       // are some update pregc information coming and check it again
 502       gcprecheck = false;
 503       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 504
 505       while(!gcprecheck) ;
 506     } else {
 507       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 508       return;
 509     }
 510   }
 511 }
 512
 513 void pregcprocessing() {
 514 #if defined(GC_CACHE_ADAPT)&&defined(GC_CACHE_SAMPLING)&&(defined(GC_CACHE_ADAPT_POLICY4)||defined(GC_CACHE_ADAPT_POLICY3))
 515   // disable the timer interrupt
 516   bamboo_mask_timer_intr();
 517   // get the sampling data
 518   bamboo_output_dtlb_sampling();
 519 #endif
 520 }
 521
 522 void postgcprocessing() {
 523 #if defined(GC_CACHE_ADAPT)&&defined(GC_CACHE_SAMPLING)&&(defined(GC_CACHE_ADAPT_POLICY4)||defined(GC_CACHE_ADAPT_POLICY3))
 524   // enable the timer interrupt
 525   bamboo_tile_timer_set_next_event(GC_TILE_TIMER_EVENT_SETTING);
 526   bamboo_unmask_timer_intr();
 527   //turn on sampling again
 528   bamboo_dtlb_sampling_init();
 529 #endif
 530 }
 531
 532 bool gc(struct garbagelist * stackptr) {
 533   // check if do gc
 534   if(!gcflag) {
 535     gc_status_info.gcprocessing = false;
 536     return false;
 537   }
 538 #ifdef PERFCOUNT
 539   profile_start(GC_REGION);
 540 #endif
 541
 542   // core coordinator routine
 543   if(0 == BAMBOO_NUM_OF_CORE) {
 544     GC_PRINTF("start gc! \n");
 545     GCPROFILE_START_MASTER();
 546     unsigned long long thisgctime = BAMBOO_GET_EXE_TIME();
 547
 548     GC_PRINTF("Check if we can do gc or not\n");
 549     gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
 550     pregcprocessing();
 551
 552     //wait for other cores to catch up
 553     while(!gc_checkCoreStatus())
 554       ;
 555
 556     //pregccheck();
 557     gc_master(stackptr);
 558     GCtime = BAMBOO_GET_EXE_TIME() - thisgctime;
 559     numGCs++;
 560   } else if(BAMBOO_NUM_OF_CORE < NUMCORES4GC) {
 561     GC_PRINTF("Core reporting for gc.\n");
 562     pregcprocessing();
 563     gc_collect(stackptr);
 564   } else {
 565     pregcprocessing();
 566     gc_nocollect(stackptr);
 567   }
 568   postgcprocessing();
 569 #ifdef PERFCOUNT
 570   profile_start(APP_REGION);
 571 #endif
 572   return true;
 573 }
 574
 575 #endif