Robust/src/Runtime/bamboo/multicoregarbage.c

   1 #ifdef MULTICORE_GC
   2 #include "runtime.h"
   3 #include "multicoreruntime.h"
   4 #include "multicoregarbage.h"
   5 #include "multicoregcmark.h"
   6 #include "multicoregccompact.h"
   7 #include "multicoregcflush.h"
   8 #include "multicoregcprofile.h"
   9 #include "gcqueue.h"
  10 #include "multicoremem_helper.h"
  11 #include "bambooalign.h"
  12
  13 volatile bool gcflag;
  14 gc_status_t gc_status_info;
  15
  16 unsigned long long gc_output_cache_policy_time=0;
  17
  18 #ifdef GC_DEBUG
  19 // dump whole mem in blocks
  20 void dumpSMem() {
  21   int block = 0;
  22   int sblock = 0;
  23   unsigned int j = 0;
  24   unsigned int i = 0;
  25   int coren = 0;
  26   int x = 0;
  27   int y = 0;
  28   printf("(%x,%x) Dump shared mem: \n",udn_tile_coord_x(),udn_tile_coord_y());
  29   // reserved blocks for sblocktbl
  30   printf("(%x,%x) ++++ reserved sblocks ++++ \n", udn_tile_coord_x(),
  31          udn_tile_coord_y());
  32   for(i=BAMBOO_BASE_VA; i<gcbaseva; i+= 4*16) {
  33     printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
  34         udn_tile_coord_x(), udn_tile_coord_y(),
  35         *((int *)(i)), *((int *)(i + 4)),
  36         *((int *)(i + 4*2)), *((int *)(i + 4*3)),
  37         *((int *)(i + 4*4)), *((int *)(i + 4*5)),
  38         *((int *)(i + 4*6)), *((int *)(i + 4*7)),
  39         *((int *)(i + 4*8)), *((int *)(i + 4*9)),
  40         *((int *)(i + 4*10)), *((int *)(i + 4*11)),
  41         *((int *)(i + 4*12)), *((int *)(i + 4*13)),
  42         *((int *)(i + 4*14)), *((int *)(i + 4*15)));
  43   }
  44   sblock = 0;
  45   bool advanceblock = false;
  46   // remaining memory
  47   for(i=gcbaseva; (unsigned int)i<(unsigned int)(gcbaseva+BAMBOO_SHARED_MEM_SIZE); i+=4*16) {
  48     advanceblock = false;
  49     // computing sblock # and block #, core coordinate (x,y) also
  50     if(j%((BAMBOO_SMEM_SIZE)/(4*16)) == 0) {
  51       // finished a sblock
  52       if(j < ((BAMBOO_LARGE_SMEM_BOUND)/(4*16))) {
  53         if((j > 0) && (j%((BAMBOO_SMEM_SIZE_L)/(4*16)) == 0)) {
  54           // finished a block
  55           block++;
  56           advanceblock = true;
  57         }
  58       } else {
  59         // finished a block
  60         block++;
  61         advanceblock = true;
  62       }
  63       // compute core #
  64       if(advanceblock) {
  65         coren = gc_block2core[block%(NUMCORES4GC*2)];
  66       }
  67       // compute core coordinate
  68       x = BAMBOO_COORDS_X(coren);
  69       y = BAMBOO_COORDS_Y(coren);
  70       printf("(%x,%x) ==== %d, %d : core (%d,%d), saddr %x====\n",
  71           udn_tile_coord_x(), udn_tile_coord_y(),block, sblock++, x, y,
  72           (sblock-1)*(BAMBOO_SMEM_SIZE)+gcbaseva);
  73     }
  74     j++;
  75     printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
  76         udn_tile_coord_x(), udn_tile_coord_y(),
  77         *((int *)(i)), *((int *)(i + 4)),
  78         *((int *)(i + 4*2)), *((int *)(i + 4*3)),
  79         *((int *)(i + 4*4)), *((int *)(i + 4*5)),
  80         *((int *)(i + 4*6)), *((int *)(i + 4*7)),
  81         *((int *)(i + 4*8)), *((int *)(i + 4*9)),
  82         *((int *)(i + 4*10)), *((int *)(i + 4*11)),
  83         *((int *)(i + 4*12)), *((int *)(i + 4*13)),
  84         *((int *)(i + 4*14)), *((int *)(i + 4*15)));
  85   }
  86   printf("(%x,%x) \n", udn_tile_coord_x(), udn_tile_coord_y());
  87 }
  88 #endif
  89
  90 void initmulticoregcdata() {
  91   bamboo_smem_zero_top = NULL;
  92   gcflag = false;
  93   gc_status_info.gcprocessing = false;
  94   gc_status_info.gcphase = FINISHPHASE;
  95
  96   gcprecheck = true;
  97   gcforwardobjtbl = allocateMGCHash_I(128);
  98 #ifdef MGC_SPEC
  99   gc_profile_flag = false;
 100 #endif
 101 #ifdef GC_CACHE_ADAPT
 102   gccachestage = false;
 103 #endif
 104
 105   if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
 106     allocationinfo.blocktable=RUNMALLOC(sizeof(struct blockrecord)*GCNUMBLOCK);
 107     for(int i=0; i<GCNUMBLOCK;i++) {
 108       if (1==NUMCORES4GC)
 109         allocationinfo.blocktable[i].corenum=0;
 110       else
 111         allocationinfo.blocktable[i].corenum=gc_block2core[(i%(NUMCORES4GC*2))];
 112       allocationinfo.blocktable[i].status=BS_FREE;
 113       allocationinfo.blocktable[i].usedspace=0;
 114       allocationinfo.blocktable[i].freespace=GLOBALBLOCKSIZE(i);
 115     }
 116     buildCore2Test();
 117   }
 118
 119   //initialize update structures
 120   origarraycount=0;
 121   for(int i=0;i<NUMCORES4GC;i++) {
 122     origblockarray[i]=NULL;
 123   }
 124
 125   INIT_MULTICORE_GCPROFILE_DATA();
 126 }
 127
 128 void dismulticoregcdata() {
 129   freeMGCHash(gcforwardobjtbl);
 130 }
 131
 132 void initGC() {
 133   if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
 134     for(int i = 0; i < NUMCORES4GC; i++) {
 135       gccorestatus[i] = 1;
 136       gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
 137       gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
 138       gcloads[i] = 0;
 139       gcrequiredmems[i] = 0;
 140     }
 141     for(int i = NUMCORES4GC; i < NUMCORESACTIVE; i++) {
 142       gccorestatus[i] = 1;
 143       gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
 144       gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
 145     }
 146     gcnumsrobjs_index = 0;
 147   }
 148   gcself_numsendobjs = 0;
 149   gcself_numreceiveobjs = 0;
 150   gcmovestartaddr = 0;
 151   gctomove = false;
 152   gcblock2fill = 0;
 153   gcmovepending = 0;
 154   gccurr_heaptop = 0;
 155   update_origblockptr=NULL;
 156   gc_queueinit();
 157
 158   MGCHashreset(gcforwardobjtbl);
 159
 160   GCPROFILE_INIT();
 161   gc_output_cache_policy_time=0;
 162 }
 163
 164 void checkMarkStatus_p2() {
 165   //  tprintf("Check mark status 2\n");
 166   // check if the sum of send objs and receive obj are the same
 167   // yes->check if the info is the latest; no->go on executing
 168   unsigned int sumsendobj = 0;
 169   for(int i = 0; i < NUMCORESACTIVE; i++) {
 170     sumsendobj += gcnumsendobjs[gcnumsrobjs_index][i];
 171   }
 172   for(int i = 0; i < NUMCORESACTIVE; i++) {
 173     sumsendobj -= gcnumreceiveobjs[gcnumsrobjs_index][i];
 174   }
 175   if(0 == sumsendobj) {
 176     // Check if there are changes of the numsendobjs or numreceiveobjs
 177     // on each core
 178     int i = 0;
 179     for(i = 0; i < NUMCORESACTIVE; i++) {
 180       if((gcnumsendobjs[0][i]!=gcnumsendobjs[1][i])||(gcnumreceiveobjs[0][i]!=gcnumreceiveobjs[1][i]) ) {
 181         break;
 182       }
 183     }
 184     if(i == NUMCORESACTIVE) {
 185       //tprintf("Mark terminated\n");
 186       // all the core status info are the latest,stop mark phase
 187       gc_status_info.gcphase = COMPACTPHASE;
 188       // restore the gcstatus for all cores
 189       for(int i = 0; i < NUMCORESACTIVE; i++) {
 190         gccorestatus[i] = 1;
 191       }
 192     } else {
 193       // There were changes between phase 1 and phase 2, can not decide
 194       // whether the mark phase has been finished
 195       waitconfirm = false;
 196       // As it fails in phase 2, flip the entries
 197       gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
 198     }
 199   } else {
 200     // There were changes between phase 1 and phase 2, can not decide
 201     // whether the mark phase has been finished
 202     waitconfirm = false;
 203     // As it fails in phase 2, flip the entries
 204     gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
 205   }
 206 }
 207
 208 void checkMarkStatus() {
 209   //  tprintf("Check mark status\n");
 210   if((!waitconfirm)||(waitconfirm && (numconfirm == 0))) {
 211     unsigned int entry_index = 0;
 212     if(waitconfirm) {
 213       // phase 2
 214       entry_index = (gcnumsrobjs_index == 0) ? 1 : 0;
 215     } else {
 216       // phase 1
 217       entry_index = gcnumsrobjs_index;
 218     }
 219     BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
 220     // check the status of all cores
 221     if (gc_checkCoreStatus()) {
 222       // ask for confirm
 223       if(!waitconfirm) {
 224         // the first time found all cores stall
 225         // send out status confirm msg to all other cores
 226         // reset the corestatus array too
 227         waitconfirm = true;
 228         numconfirm = NUMCORESACTIVE - 1;
 229         BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 230         GC_SEND_MSG_1_TO_CLIENT(GCMARKCONFIRM);
 231       } else {
 232         // Phase 2
 233         checkMarkStatus_p2();
 234         BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 235       }
 236     } else {
 237       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 238     }
 239   }
 240 }
 241
 242 // compute load balance for all cores
 243 int loadbalance() {
 244   // compute load balance
 245   // get the total loads
 246   void * heaptop;
 247   unsigned int tloads = 0;
 248   for(int i = 0; i < NUMCORES4GC; i++) {
 249     tloads += gcloads[i];
 250     //tprintf("load: %d %d \n", gcloads[i], i);
 251   }
 252   heaptop = gcbaseva + tloads;
 253
 254   unsigned int topblockindex;
 255
 256   BLOCKINDEX(topblockindex, heaptop);
 257   // num of blocks per core
 258   unsigned int numbpc = (topblockindex+NUMCORES4GC-1)/NUMCORES4GC;
 259
 260   return numbpc;
 261 }
 262
 263 void gc_collect(struct garbagelist * stackptr) {
 264   gc_status_info.gcprocessing = true;
 265   // inform the master that this core is at a gc safe point and is ready to
 266   // do gc
 267   send_msg_4(STARTUPCORE,GCFINISHPRE,BAMBOO_NUM_OF_CORE,self_numsendobjs,self_numreceiveobjs);
 268
 269   // core collector routine
 270   //wait for init phase
 271   WAITFORGCPHASE(INITPHASE);
 272
 273   GC_PRINTF("Do initGC\n");
 274   initGC();
 275   CACHEADAPT_GC(true);
 276   //send init finish msg to core coordinator
 277   send_msg_2(STARTUPCORE,GCFINISHINIT,BAMBOO_NUM_OF_CORE);
 278
 279   //wait for mark phase
 280   WAITFORGCPHASE(MARKPHASE);
 281
 282   GC_PRINTF("Start mark phase\n");
 283   mark(stackptr);
 284   GC_PRINTF("Finish mark phase, start compact phase\n");
 285   compact();
 286   GC_PRINTF("Finish compact phase\n");
 287
 288   WAITFORGCPHASE(UPDATEPHASE);
 289
 290   GC_PRINTF("Start update phase\n");
 291   GCPROFILE_INFO_2_MASTER();
 292   update(stackptr);
 293   GC_PRINTF("Finish update phase\n");
 294
 295   CACHEADAPT_PHASE_CLIENT();
 296
 297   // invalidate all shared mem pointers
 298   bamboo_cur_msp = NULL;
 299   bamboo_smem_size = 0;
 300   bamboo_smem_zero_top = NULL;
 301   gcflag = false;
 302
 303   WAITFORGCPHASE(FINISHPHASE);
 304
 305   GC_PRINTF("Finish gc! \n");
 306 }
 307
 308 void gc_nocollect(struct garbagelist * stackptr) {
 309   gc_status_info.gcprocessing = true;
 310   // inform the master that this core is at a gc safe point and is ready to
 311   // do gc
 312   send_msg_4(STARTUPCORE,GCFINISHPRE,BAMBOO_NUM_OF_CORE,self_numsendobjs,self_numreceiveobjs);
 313
 314   WAITFORGCPHASE(INITPHASE);
 315
 316   GC_PRINTF("Do initGC\n");
 317   initGC();
 318   CACHEADAPT_GC(true);
 319   //send init finish msg to core coordinator
 320   send_msg_2(STARTUPCORE,GCFINISHINIT,BAMBOO_NUM_OF_CORE);
 321
 322   WAITFORGCPHASE(MARKPHASE);
 323
 324   GC_PRINTF("Start mark phase\n");
 325   mark(stackptr);
 326   GC_PRINTF("Finish mark phase, wait for update\n");
 327
 328   // non-gc core collector routine
 329   WAITFORGCPHASE(UPDATEPHASE);
 330
 331   GC_PRINTF("Start update phase\n");
 332   GCPROFILE_INFO_2_MASTER();
 333   update(stackptr);
 334   GC_PRINTF("Finish update phase\n");
 335
 336   CACHEADAPT_PHASE_CLIENT();
 337
 338   // invalidate all shared mem pointers
 339   bamboo_cur_msp = NULL;
 340   bamboo_smem_size = 0;
 341   bamboo_smem_zero_top = NULL;
 342
 343   gcflag = false;
 344   WAITFORGCPHASE(FINISHPHASE);
 345
 346   GC_PRINTF("Finish gc! \n");
 347 }
 348
 349 void master_mark(struct garbagelist *stackptr) {
 350
 351   GC_PRINTF("Start mark phase \n");
 352   GC_SEND_MSG_1_TO_CLIENT(GCSTART);
 353   gc_status_info.gcphase = MARKPHASE;
 354   // mark phase
 355
 356   mark(stackptr);
 357 }
 358
 359 void master_getlargeobjs() {
 360   // send msgs to all cores requiring large objs info
 361   // Note: only need to ask gc cores, non-gc cores do not host any objs
 362   numconfirm = NUMCORES4GC - 1;
 363   for(int i = 1; i < NUMCORES4GC; i++) {
 364     send_msg_1(i,GCLOBJREQUEST);
 365   }
 366   gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
 367   //spin until we have all responses
 368   while(numconfirm!=0) ;
 369
 370   GCPROFILE_ITEM();
 371   GC_PRINTF("prepare to cache large objs \n");
 372
 373 }
 374
 375
 376 void master_updaterefs(struct garbagelist * stackptr) {
 377   gc_status_info.gcphase = UPDATEPHASE;
 378   GC_SEND_MSG_1_TO_CLIENT(GCSTARTUPDATE);
 379   GCPROFILE_ITEM();
 380   GC_PRINTF("Start update phase \n");
 381   // update phase
 382   update(stackptr);
 383   GC_CHECK_ALL_CORE_STATUS();
 384   GC_PRINTF("Finish update phase \n");
 385 }
 386
 387 void master_finish() {
 388   gc_status_info.gcphase = FINISHPHASE;
 389
 390   // invalidate all shared mem pointers
 391   // put it here as it takes time to inform all the other cores to
 392   // finish gc and it might cause problem when some core resumes
 393   // mutator earlier than the other cores
 394   bamboo_cur_msp = NULL;
 395   bamboo_smem_size = 0;
 396   bamboo_smem_zero_top = NULL;
 397
 398   GCPROFILE_END();
 399   unsigned long long tmpt = BAMBOO_GET_EXE_TIME();
 400   CACHEADAPT_OUTPUT_CACHE_POLICY();
 401   gc_output_cache_policy_time += (BAMBOO_GET_EXE_TIME()-tmpt);
 402   gcflag = false;
 403
 404   GC_SEND_MSG_1_TO_CLIENT(GCFINISH);
 405   gc_status_info.gcprocessing = false;
 406
 407   if(gcflag) {
 408     // inform other cores to stop and wait for gc
 409     GC_PRINTF("Back to Back gc case\n");
 410     gcprecheck = true;
 411     for(int i = 0; i < NUMCORESACTIVE; i++) {
 412       // reuse the gcnumsendobjs & gcnumreceiveobjs
 413       gcnumsendobjs[0][i] = 0;
 414       gcnumreceiveobjs[0][i] = 0;
 415     }
 416     GC_SEND_MSG_1_TO_CLIENT(GCSTARTPRE);
 417   }
 418 }
 419
 420 void gc_master(struct garbagelist * stackptr) {
 421   //tprintf("start GC!\n");
 422   gc_status_info.gcprocessing = true;
 423   gc_status_info.gcphase = INITPHASE;
 424
 425   waitconfirm = false;
 426   numconfirm = 0;
 427   initGC();
 428   GC_SEND_MSG_1_TO_CLIENT(GCSTARTINIT);
 429   CACHEADAPT_GC(true);
 430   //tprintf("Check core status \n");
 431   GC_CHECK_ALL_CORE_STATUS();
 432   GCPROFILE_ITEM();
 433   unsigned long long tmpt = BAMBOO_GET_EXE_TIME();
 434   CACHEADAPT_OUTPUT_CACHE_SAMPLING();
 435   gc_output_cache_policy_time += (BAMBOO_GET_EXE_TIME()-tmpt);
 436   //tprintf("start mark phase\n");
 437   // do mark phase
 438   master_mark(stackptr);
 439   //tprintf("finish mark phase\n");
 440   // get large objects from all cores
 441   master_getlargeobjs();
 442   //tprintf("start compact phase\n");
 443   // compact the heap
 444   master_compact();
 445   //tprintf("start update phase\n");
 446   // update the references
 447   master_updaterefs(stackptr);
 448   //tprintf("gc master finished update   \n");
 449   // do cache adaptation
 450   CACHEADAPT_PHASE_MASTER();
 451   //tprintf("finish cachdapt phase\n");
 452   // do finish up stuff
 453 #ifdef GC_DEBUG
 454   for(int i=0;i<GCNUMBLOCK;i++) {
 455     struct blockrecord *record=&allocationinfo.blocktable[i];
 456     tprintf("%u. used=%u free=%u corenum=%u status=%u, base=%x, ptr=%x\n", i, record->usedspace, record->freespace, record->corenum, record->status, gcbaseva+OFFSET2BASEVA(i), (gcbaseva+OFFSET2BASEVA(i)+record->usedspace));
 457   }
 458 #endif
 459   master_finish();
 460
 461   //tprintf("finish GC ! %d \n",gcflag);
 462 }
 463
 464 void pregccheck() {
 465   while(true) {
 466     BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
 467     gcnumsendobjs[0][BAMBOO_NUM_OF_CORE] = self_numsendobjs;
 468     gcnumreceiveobjs[0][BAMBOO_NUM_OF_CORE] = self_numreceiveobjs;
 469     int sumsendobj = 0;
 470     for(int i = 0; i < NUMCORESACTIVE; i++) {
 471       sumsendobj += gcnumsendobjs[0][i];
 472     }
 473     for(int i = 0; i < NUMCORESACTIVE; i++) {
 474       sumsendobj -= gcnumreceiveobjs[0][i];
 475     }
 476     if(0 != sumsendobj) {
 477       // there were still some msgs on the fly, wait until there
 478       // are some update pregc information coming and check it again
 479       gcprecheck = false;
 480       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 481
 482       while(!gcprecheck) ;
 483     } else {
 484       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 485       return;
 486     }
 487   }
 488 }
 489
 490 void pregcprocessing() {
 491 #if defined(GC_CACHE_ADAPT)&&defined(GC_CACHE_SAMPLING)
 492   // disable the timer interrupt
 493   bamboo_mask_timer_intr();
 494 #endif
 495 #if defined(GC_CACHE_ADAPT)&&defined(GC_CACHE_SAMPLING)
 496   // get the sampling data
 497   bamboo_output_dtlb_sampling();
 498 #endif
 499 }
 500
 501 void postgcprocessing() {
 502 #if defined(GC_CACHE_ADAPT)&&defined(GC_CACHE_SAMPLING)
 503   // enable the timer interrupt
 504   bamboo_tile_timer_set_next_event(GC_TILE_TIMER_EVENT_SETTING);
 505   bamboo_unmask_timer_intr();
 506 #endif
 507 }
 508
 509 bool gc(struct garbagelist * stackptr) {
 510   // check if do gc
 511   if(!gcflag) {
 512     gc_status_info.gcprocessing = false;
 513     return false;
 514   }
 515
 516   // core coordinator routine
 517   if(0 == BAMBOO_NUM_OF_CORE) {
 518     GC_PRINTF("Check if we can do gc or not\n");
 519     gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
 520
 521     //wait for other cores to catch up
 522     while(!gc_checkCoreStatus())
 523       ;
 524
 525     GCPROFILE_START();
 526     pregccheck();
 527     GC_PRINTF("start gc! \n");
 528     pregcprocessing();
 529     gc_master(stackptr);
 530   } else if(BAMBOO_NUM_OF_CORE < NUMCORES4GC) {
 531     GC_PRINTF("Core reporting for gc.\n");
 532     pregcprocessing();
 533     gc_collect(stackptr);
 534   } else {
 535     pregcprocessing();
 536     gc_nocollect(stackptr);
 537   }
 538   postgcprocessing();
 539   return true;
 540 }
 541
 542 #endif