Robust/src/Runtime/bamboo/multicoregarbage.c

   1 // TODO: DO NOT support tag!!!
   2 #ifdef MULTICORE_GC
   3 #include "runtime.h"
   4 #include "multicoreruntime.h"
   5 #include "multicoregarbage.h"
   6 #include "multicoregcmark.h"
   7 #include "gcqueue.h"
   8 #include "multicoregccompact.h"
   9 #include "multicoregcflush.h"
  10 #include "multicoregcprofile.h"
  11 #include "gcqueue.h"
  12
  13 volatile bool gcflag;
  14 gc_status_t gc_status_info;
  15
  16 unsigned long long gc_output_cache_policy_time=0;
  17
  18 #ifdef GC_DEBUG
  19 // dump whole mem in blocks
  20 void dumpSMem() {
  21   int block = 0;
  22   int sblock = 0;
  23   unsigned int j = 0;
  24   void * i = 0;
  25   int coren = 0;
  26   int x = 0;
  27   int y = 0;
  28   printf("(%x,%x) Dump shared mem: \n",udn_tile_coord_x(),udn_tile_coord_y());
  29   // reserved blocks for sblocktbl
  30   printf("(%x,%x) ++++ reserved sblocks ++++ \n", udn_tile_coord_x(),
  31          udn_tile_coord_y());
  32   for(i=BAMBOO_BASE_VA; (unsinged int)i<(unsigned int)gcbaseva; i+= 4*16) {
  33     printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
  34         udn_tile_coord_x(), udn_tile_coord_y(),
  35         *((int *)(i)), *((int *)(i + 4)),
  36         *((int *)(i + 4*2)), *((int *)(i + 4*3)),
  37         *((int *)(i + 4*4)), *((int *)(i + 4*5)),
  38         *((int *)(i + 4*6)), *((int *)(i + 4*7)),
  39         *((int *)(i + 4*8)), *((int *)(i + 4*9)),
  40         *((int *)(i + 4*10)), *((int *)(i + 4*11)),
  41         *((int *)(i + 4*12)), *((int *)(i + 4*13)),
  42         *((int *)(i + 4*14)), *((int *)(i + 4*15)));
  43   }
  44   sblock = 0;
  45   bool advanceblock = false;
  46   // remaining memory
  47   for(i=gcbaseva; (unsigned int)i<(unsigned int)(gcbaseva+BAMBOO_SHARED_MEM_SIZE); i+=4*16) {
  48     advanceblock = false;
  49     // computing sblock # and block #, core coordinate (x,y) also
  50     if(j%((BAMBOO_SMEM_SIZE)/(4*16)) == 0) {
  51       // finished a sblock
  52       if(j < ((BAMBOO_LARGE_SMEM_BOUND)/(4*16))) {
  53         if((j > 0) && (j%((BAMBOO_SMEM_SIZE_L)/(4*16)) == 0)) {
  54           // finished a block
  55           block++;
  56           advanceblock = true;
  57         }
  58       } else {
  59         // finished a block
  60         block++;
  61         advanceblock = true;
  62       }
  63       // compute core #
  64       if(advanceblock) {
  65         coren = gc_block2core[block%(NUMCORES4GC*2)];
  66       }
  67       // compute core coordinate
  68       x = BAMBOO_COORDS_X(coren);
  69       y = BAMBOO_COORDS_Y(coren);
  70       printf("(%x,%x) ==== %d, %d : core (%d,%d), saddr %x====\n",
  71           udn_tile_coord_x(), udn_tile_coord_y(),block, sblock++, x, y,
  72           (sblock-1)*(BAMBOO_SMEM_SIZE)+gcbaseva);
  73     }
  74     j++;
  75     printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
  76         udn_tile_coord_x(), udn_tile_coord_y(),
  77         *((int *)(i)), *((int *)(i + 4)),
  78         *((int *)(i + 4*2)), *((int *)(i + 4*3)),
  79         *((int *)(i + 4*4)), *((int *)(i + 4*5)),
  80         *((int *)(i + 4*6)), *((int *)(i + 4*7)),
  81         *((int *)(i + 4*8)), *((int *)(i + 4*9)),
  82         *((int *)(i + 4*10)), *((int *)(i + 4*11)),
  83         *((int *)(i + 4*12)), *((int *)(i + 4*13)),
  84         *((int *)(i + 4*14)), *((int *)(i + 4*15)));
  85   }
  86   printf("(%x,%x) \n", udn_tile_coord_x(), udn_tile_coord_y());
  87 }
  88 #endif
  89
  90 void initmulticoregcdata() {
  91   bamboo_smem_zero_top = NULL;
  92   gcflag = false;
  93   gc_status_info.gcprocessing = false;
  94   gc_status_info.gcphase = FINISHPHASE;
  95
  96   gcprecheck = true;
  97   gcforwardobjtbl = allocateMGCHash_I(128);
  98   gcheaptop = 0;
  99 #ifdef MGC_SPEC
 100   gc_profile_flag = false;
 101 #endif
 102 #ifdef GC_CACHE_ADAPT
 103   gccachestage = false;
 104 #endif
 105
 106   if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
 107     allocationinfo.blocktable=RUNMALLOC(sizeof(struct blockrecord)*GCNUMBLOCK);
 108     for(int i=0; i<GCNUMBLOCK;i++) {
 109       if (1==NUMCORES4GC)
 110         allocationinfo.blocktable[i].corenum=0;
 111       else
 112         allocationinfo.blocktable[i].corenum=gc_block2core[(i%(NUMCORES4GC*2))];
 113     }
 114   }
 115
 116   INIT_MULTICORE_GCPROFILE_DATA();
 117 }
 118
 119 void dismulticoregcdata() {
 120   freeMGCHash(gcforwardobjtbl);
 121 }
 122
 123 void initGC() {
 124   if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
 125     for(int i=0; i<GCNUMBLOCK;i++) {
 126       allocationinfo.blocktable[i].status=BS_INIT;
 127     }
 128     allocationinfo.lowestfreeblock=NOFREEBLOCK;
 129     for(int i = 0; i < NUMCORES4GC; i++) {
 130       gccorestatus[i] = 1;
 131       gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
 132       gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
 133       gcloads[i] = 0;
 134       gcrequiredmems[i] = 0;
 135       gcfilledblocks[i] = 0;
 136       gcstopblock[i] = 0;
 137     }
 138     for(int i = NUMCORES4GC; i < NUMCORESACTIVE; i++) {
 139       gccorestatus[i] = 1;
 140       gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
 141       gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
 142     }
 143     gcheaptop = 0;
 144     gcnumsrobjs_index = 0;
 145   }
 146   gcself_numsendobjs = 0;
 147   gcself_numreceiveobjs = 0;
 148   gcmarkedptrbound = 0;
 149   gcmovestartaddr = 0;
 150   gctomove = false;
 151   gcblock2fill = 0;
 152   gcmovepending = 0;
 153   gccurr_heaptop = 0;
 154
 155   gc_queueinit();
 156
 157   MGCHashreset(gcforwardobjtbl);
 158
 159   GCPROFILE_INIT();
 160   gc_output_cache_policy_time=0;
 161 }
 162
 163 bool gc_checkAllCoreStatus() {
 164   for(int i = 0; i < NUMCORESACTIVE; i++) {
 165     if(gccorestatus[i] != 0) {
 166       return false;
 167     }
 168   }
 169   return true;
 170 }
 171
 172 // NOTE: should be invoked with interrupts turned off
 173 bool gc_checkAllCoreStatus_I() {
 174   for(int i = 0; i < NUMCORESACTIVE; i++) {
 175     if(gccorestatus[i] != 0) {
 176       return false;
 177     }
 178   }
 179   return true;
 180 }
 181
 182 void checkMarkStatus_p2() {
 183   // check if the sum of send objs and receive obj are the same
 184   // yes->check if the info is the latest; no->go on executing
 185   unsigned int sumsendobj = 0;
 186   for(int i = 0; i < NUMCORESACTIVE; i++) {
 187     sumsendobj += gcnumsendobjs[gcnumsrobjs_index][i];
 188   }
 189   for(int i = 0; i < NUMCORESACTIVE; i++) {
 190     sumsendobj -= gcnumreceiveobjs[gcnumsrobjs_index][i];
 191   }
 192   if(0 == sumsendobj) {
 193     // Check if there are changes of the numsendobjs or numreceiveobjs
 194     // on each core
 195     int i = 0;
 196     for(i = 0; i < NUMCORESACTIVE; i++) {
 197       if((gcnumsendobjs[0][i]!=gcnumsendobjs[1][i])||(gcnumreceiveobjs[0][i]!=gcnumreceiveobjs[1][i]) ) {
 198         break;
 199       }
 200     }
 201     if(i == NUMCORESACTIVE) {
 202       // all the core status info are the latest,stop mark phase
 203       gc_status_info.gcphase = COMPACTPHASE;
 204       // restore the gcstatus for all cores
 205       for(int i = 0; i < NUMCORESACTIVE; i++) {
 206         gccorestatus[i] = 1;
 207       }
 208     } else {
 209       // There were changes between phase 1 and phase 2, can not decide
 210       // whether the mark phase has been finished
 211       waitconfirm = false;
 212       // As it fails in phase 2, flip the entries
 213       gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
 214     }
 215   } else {
 216     // There were changes between phase 1 and phase 2, can not decide
 217     // whether the mark phase has been finished
 218     waitconfirm = false;
 219     // As it fails in phase 2, flip the entries
 220     gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
 221   }
 222 }
 223
 224 void checkMarkStatus() {
 225   if((!waitconfirm)||(waitconfirm && (numconfirm == 0))) {
 226     unsigned int entry_index = 0;
 227     if(waitconfirm) {
 228       // phase 2
 229       entry_index = (gcnumsrobjs_index == 0) ? 1 : 0;
 230     } else {
 231       // phase 1
 232       entry_index = gcnumsrobjs_index;
 233     }
 234     BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
 235     gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
 236     gcnumsendobjs[entry_index][BAMBOO_NUM_OF_CORE] = gcself_numsendobjs;
 237     gcnumreceiveobjs[entry_index][BAMBOO_NUM_OF_CORE] = gcself_numreceiveobjs;
 238     // check the status of all cores
 239     if (gc_checkAllCoreStatus_I()) {
 240       // ask for confirm
 241       if(!waitconfirm) {
 242         // the first time found all cores stall
 243         // send out status confirm msg to all other cores
 244         // reset the corestatus array too
 245         waitconfirm = true;
 246         numconfirm = NUMCORESACTIVE - 1;
 247         BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 248         GC_SEND_MSG_1_TO_CLIENT(GCMARKCONFIRM);
 249       } else {
 250         // Phase 2
 251         checkMarkStatus_p2();
 252         BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 253       }
 254     } else {
 255       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 256     }
 257   }
 258 }
 259
 260 // compute load balance for all cores
 261 int loadbalance(void ** heaptop) {
 262   // compute load balance
 263   // get the total loads
 264   unsigned int tloads = 0;
 265   for(int i = 0; i < NUMCORES4GC; i++) {
 266     tloads += gcloads[i];
 267   }
 268   *heaptop = gcbaseva + tloads;
 269
 270   unsigned int topblockindex;
 271
 272   BLOCKINDEX(topblockindex, *heaptop);
 273   // num of blocks per core
 274   unsigned int numbpc = (topblockindex+NUMCORES4GC-1)/NUMCORES4GC;
 275
 276   return numbpc;
 277 }
 278
 279 void gc_collect(struct garbagelist * stackptr) {
 280   gc_status_info.gcprocessing = true;
 281   // inform the master that this core is at a gc safe point and is ready to
 282   // do gc
 283   send_msg_4(STARTUPCORE,GCFINISHPRE,BAMBOO_NUM_OF_CORE,self_numsendobjs,self_numreceiveobjs);
 284
 285   // core collector routine
 286   //wait for init phase
 287   WAITFORGCPHASE(INITPHASE);
 288
 289   GC_PRINTF("Do initGC\n");
 290   initGC();
 291   CACHEADAPT_GC(true);
 292   //send init finish msg to core coordinator
 293   send_msg_2(STARTUPCORE,GCFINISHINIT,BAMBOO_NUM_OF_CORE);
 294
 295   //wait for mark phase
 296   WAITFORGCPHASE(MARKPHASE);
 297
 298   GC_PRINTF("Start mark phase\n");
 299   mark(true, stackptr);
 300   GC_PRINTF("Finish mark phase, start compact phase\n");
 301   compact();
 302   GC_PRINTF("Finish compact phase\n");
 303
 304   WAITFORGCPHASE(UPDATEPHASE);
 305
 306   GC_PRINTF("Start flush phase\n");
 307   GCPROFILE_INFO_2_MASTER();
 308   update(stackptr);
 309   GC_PRINTF("Finish flush phase\n");
 310
 311   CACHEADAPT_PHASE_CLIENT();
 312
 313   // invalidate all shared mem pointers
 314   bamboo_cur_msp = NULL;
 315   bamboo_smem_size = 0;
 316   bamboo_smem_zero_top = NULL;
 317   gcflag = false;
 318
 319   WAITFORGCPHASE(FINISHPHASE);
 320
 321   GC_PRINTF("Finish gc! \n");
 322 }
 323
 324 void gc_nocollect(struct garbagelist * stackptr) {
 325   gc_status_info.gcprocessing = true;
 326   // inform the master that this core is at a gc safe point and is ready to
 327   // do gc
 328   send_msg_4(STARTUPCORE,GCFINISHPRE,BAMBOO_NUM_OF_CORE,self_numsendobjs,self_numreceiveobjs);
 329
 330   WAITFORGCPHASE(INITPHASE);
 331
 332   GC_PRINTF("Do initGC\n");
 333   initGC();
 334   CACHEADAPT_GC(true);
 335   //send init finish msg to core coordinator
 336   send_msg_2(STARTUPCORE,GCFINISHINIT,BAMBOO_NUM_OF_CORE);
 337
 338   WAITFORGCPHASE(MARKPHASE);
 339
 340   GC_PRINTF("Start mark phase\n");
 341   mark(true, stackptr);
 342   GC_PRINTF("Finish mark phase, wait for flush\n");
 343
 344   // non-gc core collector routine
 345   WAITFORGCPHASE(UPDATEPHASE);
 346
 347   GC_PRINTF("Start flush phase\n");
 348   GCPROFILE_INFO_2_MASTER();
 349   update(stackptr);
 350   GC_PRINTF("Finish flush phase\n");
 351
 352   CACHEADAPT_PHASE_CLIENT();
 353
 354   // invalidate all shared mem pointers
 355   bamboo_cur_msp = NULL;
 356   bamboo_smem_size = 0;
 357   bamboo_smem_zero_top = NULL;
 358
 359   gcflag = false;
 360   WAITFORGCPHASE(FINISHPHASE);
 361
 362   GC_PRINTF("Finish gc! \n");
 363 }
 364
 365 void master_mark(struct garbagelist *stackptr) {
 366   bool isfirst = true;
 367
 368   GC_PRINTF("Start mark phase \n");
 369   GC_SEND_MSG_1_TO_CLIENT(GCSTART);
 370   gc_status_info.gcphase = MARKPHASE;
 371   // mark phase
 372
 373   while(MARKPHASE == gc_status_info.gcphase) {
 374     mark(isfirst, stackptr);
 375     isfirst=false;
 376     // check gcstatus
 377     checkMarkStatus();
 378   }
 379 }
 380
 381 void master_getlargeobjs() {
 382   // send msgs to all cores requiring large objs info
 383   // Note: only need to ask gc cores, non-gc cores do not host any objs
 384   numconfirm = NUMCORES4GC - 1;
 385   for(int i = 1; i < NUMCORES4GC; i++) {
 386     send_msg_1(i,GCLOBJREQUEST);
 387   }
 388   gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
 389   //spin until we have all responses
 390   while(numconfirm!=0) ;
 391
 392   // check the heaptop
 393   if(gcheaptop < gcmarkedptrbound) {
 394     gcheaptop = gcmarkedptrbound;
 395   }
 396   GCPROFILE_ITEM();
 397   GC_PRINTF("prepare to cache large objs \n");
 398
 399 }
 400
 401
 402 void master_updaterefs(struct garbagelist * stackptr) {
 403   gc_status_info.gcphase = UPDATEPHASE;
 404   GC_SEND_MSG_1_TO_CLIENT(GCSTARTUPDATE);
 405   GCPROFILE_ITEM();
 406   GC_PRINTF("Start flush phase \n");
 407   // flush phase
 408   update(stackptr);
 409   GC_CHECK_ALL_CORE_STATUS(UPDATEPHASE==gc_status_info.gcphase);
 410   GC_PRINTF("Finish flush phase \n");
 411 }
 412
 413 void master_finish() {
 414   gc_status_info.gcphase = FINISHPHASE;
 415
 416   // invalidate all shared mem pointers
 417   // put it here as it takes time to inform all the other cores to
 418   // finish gc and it might cause problem when some core resumes
 419   // mutator earlier than the other cores
 420   bamboo_cur_msp = NULL;
 421   bamboo_smem_size = 0;
 422   bamboo_smem_zero_top = NULL;
 423
 424   GCPROFILE_END();
 425   unsigned long long tmpt = BAMBOO_GET_EXE_TIME();
 426   CACHEADAPT_OUTPUT_CACHE_POLICY();
 427   gc_output_cache_policy_time += (BAMBOO_GET_EXE_TIME()-tmpt);
 428   gcflag = false;
 429   GC_SEND_MSG_1_TO_CLIENT(GCFINISH);
 430
 431   gc_status_info.gcprocessing = false;
 432   if(gcflag) {
 433     // inform other cores to stop and wait for gc
 434     gcprecheck = true;
 435     for(int i = 0; i < NUMCORESACTIVE; i++) {
 436       // reuse the gcnumsendobjs & gcnumreceiveobjs
 437       gcnumsendobjs[0][i] = 0;
 438       gcnumreceiveobjs[0][i] = 0;
 439     }
 440     GC_SEND_MSG_1_TO_CLIENT(GCSTARTPRE);
 441   }
 442 }
 443
 444 void gc_master(struct garbagelist * stackptr) {
 445   tprintf("start GC !!!!!!!!!!!!! \n");
 446   gc_status_info.gcprocessing = true;
 447   gc_status_info.gcphase = INITPHASE;
 448
 449   waitconfirm = false;
 450   numconfirm = 0;
 451   initGC();
 452   GC_SEND_MSG_1_TO_CLIENT(GCSTARTINIT);
 453   CACHEADAPT_GC(true);
 454   GC_PRINTF("Check core status \n");
 455   GC_CHECK_ALL_CORE_STATUS(true);
 456   GCPROFILE_ITEM();
 457   unsigned long long tmpt = BAMBOO_GET_EXE_TIME();
 458   CACHEADAPT_OUTPUT_CACHE_SAMPLING();
 459   gc_output_cache_policy_time += (BAMBOO_GET_EXE_TIME()-tmpt);
 460
 461   // do mark phase
 462   master_mark(stackptr);
 463
 464   // get large objects from all cores
 465   master_getlargeobjs();
 466
 467   // compact the heap
 468   master_compact();
 469
 470   // update the references
 471   master_updaterefs(stackptr);
 472
 473   // do cache adaptation
 474   CACHEADAPT_PHASE_MASTER();
 475
 476   // do finish up stuff
 477   master_finish();
 478
 479   GC_PRINTF("gc finished   \n");
 480   tprintf("finish GC ! %d \n",gcflag);
 481 }
 482
 483 void pregccheck() {
 484   while(true) {
 485     BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
 486     gcnumsendobjs[0][BAMBOO_NUM_OF_CORE] = self_numsendobjs;
 487     gcnumreceiveobjs[0][BAMBOO_NUM_OF_CORE] = self_numreceiveobjs;
 488     int sumsendobj = 0;
 489     for(int i = 0; i < NUMCORESACTIVE; i++) {
 490       sumsendobj += gcnumsendobjs[0][i];
 491     }
 492     for(int i = 0; i < NUMCORESACTIVE; i++) {
 493       sumsendobj -= gcnumreceiveobjs[0][i];
 494     }
 495     if(0 != sumsendobj) {
 496       // there were still some msgs on the fly, wait until there
 497       // are some update pregc information coming and check it again
 498       gcprecheck = false;
 499       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 500
 501       while(!gcprecheck) ;
 502     } else {
 503       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 504       return;
 505     }
 506   }
 507 }
 508
 509 void pregcprocessing() {
 510 #if defined(GC_CACHE_ADAPT)&&defined(GC_CACHE_SAMPLING)
 511   // disable the timer interrupt
 512   bamboo_mask_timer_intr();
 513 #endif
 514 #if defined(GC_CACHE_ADAPT)&&defined(GC_CACHE_SAMPLING)
 515   // get the sampling data
 516   bamboo_output_dtlb_sampling();
 517 #endif
 518 }
 519
 520 void postgcprocessing() {
 521 #if defined(GC_CACHE_ADAPT)&&defined(GC_CACHE_SAMPLING)
 522   // enable the timer interrupt
 523   bamboo_tile_timer_set_next_event(GC_TILE_TIMER_EVENT_SETTING);
 524   bamboo_unmask_timer_intr();
 525 #endif
 526 }
 527
 528 bool gc(struct garbagelist * stackptr) {
 529   // check if do gc
 530   if(!gcflag) {
 531     gc_status_info.gcprocessing = false;
 532     return false;
 533   }
 534
 535   // core coordinator routine
 536   if(0 == BAMBOO_NUM_OF_CORE) {
 537     GC_PRINTF("Check if we can do gc or not\n");
 538     gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
 539     if(!gc_checkAllCoreStatus()) {
 540       // some of the cores are still executing the mutator and did not reach
 541       // some gc safe point, therefore it is not ready to do gc
 542       gcflag = true;
 543       return false;
 544     } else {
 545       GCPROFILE_START();
 546       pregccheck();
 547     }
 548     GC_PRINTF("start gc! \n");
 549     pregcprocessing();
 550     gc_master(stackptr);
 551   } else if(BAMBOO_NUM_OF_CORE < NUMCORES4GC) {
 552     pregcprocessing();
 553     gc_collect(stackptr);
 554   } else {
 555     pregcprocessing();
 556     gc_nocollect(stackptr);
 557   }
 558   postgcprocessing();
 559
 560   return true;
 561 }
 562
 563 #endif