Robust/src/Runtime/bamboo/multicoregarbage.c

   1 #ifdef MULTICORE_GC
   2 #include "runtime.h"
   3 #include "multicoreruntime.h"
   4 #include "multicoregarbage.h"
   5 #include "multicoregcmark.h"
   6 #include "multicoregccompact.h"
   7 #include "multicoregcflush.h"
   8 #include "multicoregcprofile.h"
   9 #include "gcqueue.h"
  10 #include "multicoremem_helper.h"
  11
  12 volatile bool gcflag;
  13 gc_status_t gc_status_info;
  14
  15 unsigned long long gc_output_cache_policy_time=0;
  16
  17 #ifdef GC_DEBUG
  18 // dump whole mem in blocks
  19 void dumpSMem() {
  20   int block = 0;
  21   int sblock = 0;
  22   unsigned int j = 0;
  23   void * i = 0;
  24   int coren = 0;
  25   int x = 0;
  26   int y = 0;
  27   printf("(%x,%x) Dump shared mem: \n",udn_tile_coord_x(),udn_tile_coord_y());
  28   // reserved blocks for sblocktbl
  29   printf("(%x,%x) ++++ reserved sblocks ++++ \n", udn_tile_coord_x(),
  30          udn_tile_coord_y());
  31   for(i=BAMBOO_BASE_VA; (unsinged int)i<(unsigned int)gcbaseva; i+= 4*16) {
  32     printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
  33         udn_tile_coord_x(), udn_tile_coord_y(),
  34         *((int *)(i)), *((int *)(i + 4)),
  35         *((int *)(i + 4*2)), *((int *)(i + 4*3)),
  36         *((int *)(i + 4*4)), *((int *)(i + 4*5)),
  37         *((int *)(i + 4*6)), *((int *)(i + 4*7)),
  38         *((int *)(i + 4*8)), *((int *)(i + 4*9)),
  39         *((int *)(i + 4*10)), *((int *)(i + 4*11)),
  40         *((int *)(i + 4*12)), *((int *)(i + 4*13)),
  41         *((int *)(i + 4*14)), *((int *)(i + 4*15)));
  42   }
  43   sblock = 0;
  44   bool advanceblock = false;
  45   // remaining memory
  46   for(i=gcbaseva; (unsigned int)i<(unsigned int)(gcbaseva+BAMBOO_SHARED_MEM_SIZE); i+=4*16) {
  47     advanceblock = false;
  48     // computing sblock # and block #, core coordinate (x,y) also
  49     if(j%((BAMBOO_SMEM_SIZE)/(4*16)) == 0) {
  50       // finished a sblock
  51       if(j < ((BAMBOO_LARGE_SMEM_BOUND)/(4*16))) {
  52         if((j > 0) && (j%((BAMBOO_SMEM_SIZE_L)/(4*16)) == 0)) {
  53           // finished a block
  54           block++;
  55           advanceblock = true;
  56         }
  57       } else {
  58         // finished a block
  59         block++;
  60         advanceblock = true;
  61       }
  62       // compute core #
  63       if(advanceblock) {
  64         coren = gc_block2core[block%(NUMCORES4GC*2)];
  65       }
  66       // compute core coordinate
  67       x = BAMBOO_COORDS_X(coren);
  68       y = BAMBOO_COORDS_Y(coren);
  69       printf("(%x,%x) ==== %d, %d : core (%d,%d), saddr %x====\n",
  70           udn_tile_coord_x(), udn_tile_coord_y(),block, sblock++, x, y,
  71           (sblock-1)*(BAMBOO_SMEM_SIZE)+gcbaseva);
  72     }
  73     j++;
  74     printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
  75         udn_tile_coord_x(), udn_tile_coord_y(),
  76         *((int *)(i)), *((int *)(i + 4)),
  77         *((int *)(i + 4*2)), *((int *)(i + 4*3)),
  78         *((int *)(i + 4*4)), *((int *)(i + 4*5)),
  79         *((int *)(i + 4*6)), *((int *)(i + 4*7)),
  80         *((int *)(i + 4*8)), *((int *)(i + 4*9)),
  81         *((int *)(i + 4*10)), *((int *)(i + 4*11)),
  82         *((int *)(i + 4*12)), *((int *)(i + 4*13)),
  83         *((int *)(i + 4*14)), *((int *)(i + 4*15)));
  84   }
  85   printf("(%x,%x) \n", udn_tile_coord_x(), udn_tile_coord_y());
  86 }
  87 #endif
  88
  89 void initmulticoregcdata() {
  90   bamboo_smem_zero_top = NULL;
  91   gcflag = false;
  92   gc_status_info.gcprocessing = false;
  93   gc_status_info.gcphase = FINISHPHASE;
  94
  95   gcprecheck = true;
  96   gcforwardobjtbl = allocateMGCHash_I(128);
  97 #ifdef MGC_SPEC
  98   gc_profile_flag = false;
  99 #endif
 100 #ifdef GC_CACHE_ADAPT
 101   gccachestage = false;
 102 #endif
 103
 104   if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
 105     allocationinfo.blocktable=RUNMALLOC(sizeof(struct blockrecord)*GCNUMBLOCK);
 106     for(int i=0; i<GCNUMBLOCK;i++) {
 107       if (1==NUMCORES4GC)
 108         allocationinfo.blocktable[i].corenum=0;
 109       else
 110         allocationinfo.blocktable[i].corenum=gc_block2core[(i%(NUMCORES4GC*2))];
 111     }
 112     buildCore2Test();
 113   }
 114
 115   INIT_MULTICORE_GCPROFILE_DATA();
 116 }
 117
 118 void dismulticoregcdata() {
 119   freeMGCHash(gcforwardobjtbl);
 120 }
 121
 122 void initGC() {
 123   if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
 124     for(int i = 0; i < NUMCORES4GC; i++) {
 125       gccorestatus[i] = 1;
 126       gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
 127       gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
 128       gcloads[i] = 0;
 129       gcrequiredmems[i] = 0;
 130     }
 131     for(int i = NUMCORES4GC; i < NUMCORESACTIVE; i++) {
 132       gccorestatus[i] = 1;
 133       gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
 134       gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
 135     }
 136     gcnumsrobjs_index = 0;
 137   }
 138   gcself_numsendobjs = 0;
 139   gcself_numreceiveobjs = 0;
 140   gcmovestartaddr = 0;
 141   gctomove = false;
 142   gcblock2fill = 0;
 143   gcmovepending = 0;
 144   gccurr_heaptop = 0;
 145
 146   gc_queueinit();
 147
 148   MGCHashreset(gcforwardobjtbl);
 149
 150   GCPROFILE_INIT();
 151   gc_output_cache_policy_time=0;
 152 }
 153
 154 bool gc_checkAllCoreStatus() {
 155   for(int i = 0; i < NUMCORESACTIVE; i++) {
 156     if(gccorestatus[i] != 0) {
 157       return false;
 158     }
 159   }
 160   return true;
 161 }
 162
 163 // NOTE: should be invoked with interrupts turned off
 164 bool gc_checkAllCoreStatus_I() {
 165   for(int i = 0; i < NUMCORESACTIVE; i++) {
 166     if(gccorestatus[i] != 0) {
 167       return false;
 168     }
 169   }
 170   return true;
 171 }
 172
 173 void checkMarkStatus_p2() {
 174   // check if the sum of send objs and receive obj are the same
 175   // yes->check if the info is the latest; no->go on executing
 176   unsigned int sumsendobj = 0;
 177   for(int i = 0; i < NUMCORESACTIVE; i++) {
 178     sumsendobj += gcnumsendobjs[gcnumsrobjs_index][i];
 179   }
 180   for(int i = 0; i < NUMCORESACTIVE; i++) {
 181     sumsendobj -= gcnumreceiveobjs[gcnumsrobjs_index][i];
 182   }
 183   if(0 == sumsendobj) {
 184     // Check if there are changes of the numsendobjs or numreceiveobjs
 185     // on each core
 186     int i = 0;
 187     for(i = 0; i < NUMCORESACTIVE; i++) {
 188       if((gcnumsendobjs[0][i]!=gcnumsendobjs[1][i])||(gcnumreceiveobjs[0][i]!=gcnumreceiveobjs[1][i]) ) {
 189         break;
 190       }
 191     }
 192     if(i == NUMCORESACTIVE) {
 193       // all the core status info are the latest,stop mark phase
 194       gc_status_info.gcphase = COMPACTPHASE;
 195       // restore the gcstatus for all cores
 196       for(int i = 0; i < NUMCORESACTIVE; i++) {
 197         gccorestatus[i] = 1;
 198       }
 199     } else {
 200       // There were changes between phase 1 and phase 2, can not decide
 201       // whether the mark phase has been finished
 202       waitconfirm = false;
 203       // As it fails in phase 2, flip the entries
 204       gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
 205     }
 206   } else {
 207     // There were changes between phase 1 and phase 2, can not decide
 208     // whether the mark phase has been finished
 209     waitconfirm = false;
 210     // As it fails in phase 2, flip the entries
 211     gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
 212   }
 213 }
 214
 215 void checkMarkStatus() {
 216   if((!waitconfirm)||(waitconfirm && (numconfirm == 0))) {
 217     unsigned int entry_index = 0;
 218     if(waitconfirm) {
 219       // phase 2
 220       entry_index = (gcnumsrobjs_index == 0) ? 1 : 0;
 221     } else {
 222       // phase 1
 223       entry_index = gcnumsrobjs_index;
 224     }
 225     BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
 226     gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
 227     gcnumsendobjs[entry_index][BAMBOO_NUM_OF_CORE] = gcself_numsendobjs;
 228     gcnumreceiveobjs[entry_index][BAMBOO_NUM_OF_CORE] = gcself_numreceiveobjs;
 229     // check the status of all cores
 230     if (gc_checkAllCoreStatus_I()) {
 231       // ask for confirm
 232       if(!waitconfirm) {
 233         // the first time found all cores stall
 234         // send out status confirm msg to all other cores
 235         // reset the corestatus array too
 236         waitconfirm = true;
 237         numconfirm = NUMCORESACTIVE - 1;
 238         BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 239         GC_SEND_MSG_1_TO_CLIENT(GCMARKCONFIRM);
 240       } else {
 241         // Phase 2
 242         checkMarkStatus_p2();
 243         BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 244       }
 245     } else {
 246       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 247     }
 248   }
 249 }
 250
 251 // compute load balance for all cores
 252 int loadbalance() {
 253   // compute load balance
 254   // get the total loads
 255   void * heaptop;
 256   unsigned int tloads = 0;
 257   for(int i = 0; i < NUMCORES4GC; i++) {
 258     tloads += gcloads[i];
 259   }
 260   heaptop = gcbaseva + tloads;
 261
 262   unsigned int topblockindex;
 263
 264   BLOCKINDEX(topblockindex, heaptop);
 265   // num of blocks per core
 266   unsigned int numbpc = (topblockindex+NUMCORES4GC-1)/NUMCORES4GC;
 267
 268   return numbpc;
 269 }
 270
 271 void gc_collect(struct garbagelist * stackptr) {
 272   gc_status_info.gcprocessing = true;
 273   // inform the master that this core is at a gc safe point and is ready to
 274   // do gc
 275   send_msg_4(STARTUPCORE,GCFINISHPRE,BAMBOO_NUM_OF_CORE,self_numsendobjs,self_numreceiveobjs);
 276
 277   // core collector routine
 278   //wait for init phase
 279   WAITFORGCPHASE(INITPHASE);
 280
 281   GC_PRINTF("Do initGC\n");
 282   initGC();
 283   CACHEADAPT_GC(true);
 284   //send init finish msg to core coordinator
 285   send_msg_2(STARTUPCORE,GCFINISHINIT,BAMBOO_NUM_OF_CORE);
 286
 287   //wait for mark phase
 288   WAITFORGCPHASE(MARKPHASE);
 289
 290   GC_PRINTF("Start mark phase\n");
 291   mark(stackptr);
 292   GC_PRINTF("Finish mark phase, start compact phase\n");
 293   compact();
 294   GC_PRINTF("Finish compact phase\n");
 295
 296   WAITFORGCPHASE(UPDATEPHASE);
 297
 298   GC_PRINTF("Start flush phase\n");
 299   GCPROFILE_INFO_2_MASTER();
 300   update(stackptr);
 301   GC_PRINTF("Finish flush phase\n");
 302
 303   CACHEADAPT_PHASE_CLIENT();
 304
 305   // invalidate all shared mem pointers
 306   bamboo_cur_msp = NULL;
 307   bamboo_smem_size = 0;
 308   bamboo_smem_zero_top = NULL;
 309   gcflag = false;
 310
 311   WAITFORGCPHASE(FINISHPHASE);
 312
 313   GC_PRINTF("Finish gc! \n");
 314 }
 315
 316 void gc_nocollect(struct garbagelist * stackptr) {
 317   gc_status_info.gcprocessing = true;
 318   // inform the master that this core is at a gc safe point and is ready to
 319   // do gc
 320   send_msg_4(STARTUPCORE,GCFINISHPRE,BAMBOO_NUM_OF_CORE,self_numsendobjs,self_numreceiveobjs);
 321
 322   WAITFORGCPHASE(INITPHASE);
 323
 324   GC_PRINTF("Do initGC\n");
 325   initGC();
 326   CACHEADAPT_GC(true);
 327   //send init finish msg to core coordinator
 328   send_msg_2(STARTUPCORE,GCFINISHINIT,BAMBOO_NUM_OF_CORE);
 329
 330   WAITFORGCPHASE(MARKPHASE);
 331
 332   GC_PRINTF("Start mark phase\n");
 333   mark(stackptr);
 334   GC_PRINTF("Finish mark phase, wait for flush\n");
 335
 336   // non-gc core collector routine
 337   WAITFORGCPHASE(UPDATEPHASE);
 338
 339   GC_PRINTF("Start flush phase\n");
 340   GCPROFILE_INFO_2_MASTER();
 341   update(stackptr);
 342   GC_PRINTF("Finish flush phase\n");
 343
 344   CACHEADAPT_PHASE_CLIENT();
 345
 346   // invalidate all shared mem pointers
 347   bamboo_cur_msp = NULL;
 348   bamboo_smem_size = 0;
 349   bamboo_smem_zero_top = NULL;
 350
 351   gcflag = false;
 352   WAITFORGCPHASE(FINISHPHASE);
 353
 354   GC_PRINTF("Finish gc! \n");
 355 }
 356
 357 void master_mark(struct garbagelist *stackptr) {
 358
 359   GC_PRINTF("Start mark phase \n");
 360   GC_SEND_MSG_1_TO_CLIENT(GCSTART);
 361   gc_status_info.gcphase = MARKPHASE;
 362   // mark phase
 363
 364   mark(stackptr);
 365 }
 366
 367 void master_getlargeobjs() {
 368   // send msgs to all cores requiring large objs info
 369   // Note: only need to ask gc cores, non-gc cores do not host any objs
 370   numconfirm = NUMCORES4GC - 1;
 371   for(int i = 1; i < NUMCORES4GC; i++) {
 372     send_msg_1(i,GCLOBJREQUEST);
 373   }
 374   gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
 375   //spin until we have all responses
 376   while(numconfirm!=0) ;
 377
 378   GCPROFILE_ITEM();
 379   GC_PRINTF("prepare to cache large objs \n");
 380
 381 }
 382
 383
 384 void master_updaterefs(struct garbagelist * stackptr) {
 385   gc_status_info.gcphase = UPDATEPHASE;
 386   GC_SEND_MSG_1_TO_CLIENT(GCSTARTUPDATE);
 387   GCPROFILE_ITEM();
 388   GC_PRINTF("Start flush phase \n");
 389   // flush phase
 390   update(stackptr);
 391   GC_CHECK_ALL_CORE_STATUS(UPDATEPHASE==gc_status_info.gcphase);
 392   GC_PRINTF("Finish flush phase \n");
 393 }
 394
 395 void master_finish() {
 396   gc_status_info.gcphase = FINISHPHASE;
 397
 398   // invalidate all shared mem pointers
 399   // put it here as it takes time to inform all the other cores to
 400   // finish gc and it might cause problem when some core resumes
 401   // mutator earlier than the other cores
 402   bamboo_cur_msp = NULL;
 403   bamboo_smem_size = 0;
 404   bamboo_smem_zero_top = NULL;
 405
 406   GCPROFILE_END();
 407   unsigned long long tmpt = BAMBOO_GET_EXE_TIME();
 408   CACHEADAPT_OUTPUT_CACHE_POLICY();
 409   gc_output_cache_policy_time += (BAMBOO_GET_EXE_TIME()-tmpt);
 410   gcflag = false;
 411   GC_SEND_MSG_1_TO_CLIENT(GCFINISH);
 412
 413   gc_status_info.gcprocessing = false;
 414   if(gcflag) {
 415     // inform other cores to stop and wait for gc
 416     gcprecheck = true;
 417     for(int i = 0; i < NUMCORESACTIVE; i++) {
 418       // reuse the gcnumsendobjs & gcnumreceiveobjs
 419       gcnumsendobjs[0][i] = 0;
 420       gcnumreceiveobjs[0][i] = 0;
 421     }
 422     GC_SEND_MSG_1_TO_CLIENT(GCSTARTPRE);
 423   }
 424 }
 425
 426 void gc_master(struct garbagelist * stackptr) {
 427   tprintf("start GC !!!!!!!!!!!!! \n");
 428   gc_status_info.gcprocessing = true;
 429   gc_status_info.gcphase = INITPHASE;
 430
 431   waitconfirm = false;
 432   numconfirm = 0;
 433   initGC();
 434   GC_SEND_MSG_1_TO_CLIENT(GCSTARTINIT);
 435   CACHEADAPT_GC(true);
 436   GC_PRINTF("Check core status \n");
 437   GC_CHECK_ALL_CORE_STATUS(true);
 438   GCPROFILE_ITEM();
 439   unsigned long long tmpt = BAMBOO_GET_EXE_TIME();
 440   CACHEADAPT_OUTPUT_CACHE_SAMPLING();
 441   gc_output_cache_policy_time += (BAMBOO_GET_EXE_TIME()-tmpt);
 442
 443   // do mark phase
 444   master_mark(stackptr);
 445
 446   // get large objects from all cores
 447   master_getlargeobjs();
 448
 449   // compact the heap
 450   master_compact();
 451
 452   // update the references
 453   master_updaterefs(stackptr);
 454
 455   // do cache adaptation
 456   CACHEADAPT_PHASE_MASTER();
 457
 458   // do finish up stuff
 459   master_finish();
 460
 461   GC_PRINTF("gc finished   \n");
 462   tprintf("finish GC ! %d \n",gcflag);
 463 }
 464
 465 void pregccheck() {
 466   while(true) {
 467     BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
 468     gcnumsendobjs[0][BAMBOO_NUM_OF_CORE] = self_numsendobjs;
 469     gcnumreceiveobjs[0][BAMBOO_NUM_OF_CORE] = self_numreceiveobjs;
 470     int sumsendobj = 0;
 471     for(int i = 0; i < NUMCORESACTIVE; i++) {
 472       sumsendobj += gcnumsendobjs[0][i];
 473     }
 474     for(int i = 0; i < NUMCORESACTIVE; i++) {
 475       sumsendobj -= gcnumreceiveobjs[0][i];
 476     }
 477     if(0 != sumsendobj) {
 478       // there were still some msgs on the fly, wait until there
 479       // are some update pregc information coming and check it again
 480       gcprecheck = false;
 481       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 482
 483       while(!gcprecheck) ;
 484     } else {
 485       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 486       return;
 487     }
 488   }
 489 }
 490
 491 void pregcprocessing() {
 492 #if defined(GC_CACHE_ADAPT)&&defined(GC_CACHE_SAMPLING)
 493   // disable the timer interrupt
 494   bamboo_mask_timer_intr();
 495 #endif
 496 #if defined(GC_CACHE_ADAPT)&&defined(GC_CACHE_SAMPLING)
 497   // get the sampling data
 498   bamboo_output_dtlb_sampling();
 499 #endif
 500 }
 501
 502 void postgcprocessing() {
 503 #if defined(GC_CACHE_ADAPT)&&defined(GC_CACHE_SAMPLING)
 504   // enable the timer interrupt
 505   bamboo_tile_timer_set_next_event(GC_TILE_TIMER_EVENT_SETTING);
 506   bamboo_unmask_timer_intr();
 507 #endif
 508 }
 509
 510 bool gc(struct garbagelist * stackptr) {
 511   // check if do gc
 512   if(!gcflag) {
 513     gc_status_info.gcprocessing = false;
 514     return false;
 515   }
 516
 517   // core coordinator routine
 518   if(0 == BAMBOO_NUM_OF_CORE) {
 519     GC_PRINTF("Check if we can do gc or not\n");
 520     gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
 521     if(!gc_checkAllCoreStatus()) {
 522       // some of the cores are still executing the mutator and did not reach
 523       // some gc safe point, therefore it is not ready to do gc
 524       gcflag = true;
 525       return false;
 526     } else {
 527       GCPROFILE_START();
 528       pregccheck();
 529     }
 530     GC_PRINTF("start gc! \n");
 531     pregcprocessing();
 532     gc_master(stackptr);
 533   } else if(BAMBOO_NUM_OF_CORE < NUMCORES4GC) {
 534     pregcprocessing();
 535     gc_collect(stackptr);
 536   } else {
 537     pregcprocessing();
 538     gc_nocollect(stackptr);
 539   }
 540   postgcprocessing();
 541
 542   return true;
 543 }
 544
 545 #endif