Robust/src/Runtime/bamboo/multicoregarbage.c

   1 #ifdef MULTICORE_GC
   2 #include "runtime.h"
   3 #include "multicoregarbage.h"
   4 #include "multicoreruntime.h"
   5 #include "runtime_arch.h"
   6 #include "SimpleHash.h"
   7 #include "GenericHashtable.h"
   8 #include "ObjectHash.h"
   9 #include "GCSharedHash.h"
  10
  11 extern int corenum;
  12 extern struct parameterwrapper ** objectqueues[][NUMCLASSES];
  13 extern int numqueues[][NUMCLASSES];
  14
  15 extern struct genhashtable * activetasks;
  16 extern struct parameterwrapper ** objectqueues[][NUMCLASSES];
  17 extern struct taskparamdescriptor *currtpd;
  18
  19 extern struct LockValue runtime_locks[MAXTASKPARAMS];
  20 extern int runtime_locklen;
  21
  22 #ifdef SMEMM
  23 extern unsigned int gcmem_mixed_threshold;
  24 extern unsigned int gcmem_mixed_usedmem;
  25 #endif
  26
  27 struct pointerblock {
  28   void * ptrs[NUMPTRS];
  29   struct pointerblock *next;
  30 };
  31
  32 struct pointerblock *gchead=NULL;
  33 int gcheadindex=0;
  34 struct pointerblock *gctail=NULL;
  35 int gctailindex=0;
  36 struct pointerblock *gctail2=NULL;
  37 int gctailindex2=0;
  38 struct pointerblock *gcspare=NULL;
  39
  40 #define NUMLOBJPTRS 20
  41
  42 struct lobjpointerblock {
  43   void * lobjs[NUMLOBJPTRS];
  44   int lengths[NUMLOBJPTRS];
  45   int hosts[NUMLOBJPTRS];
  46   struct lobjpointerblock *next;
  47   struct lobjpointerblock *prev;
  48 };
  49
  50 struct lobjpointerblock *gclobjhead=NULL;
  51 int gclobjheadindex=0;
  52 struct lobjpointerblock *gclobjtail=NULL;
  53 int gclobjtailindex=0;
  54 struct lobjpointerblock *gclobjtail2=NULL;
  55 int gclobjtailindex2=0;
  56 struct lobjpointerblock *gclobjspare=NULL;
  57
  58 #ifdef GC_CACHE_ADAPT
  59 typedef struct gc_cache_revise_info {
  60   int orig_page_start_va;
  61   int orig_page_end_va;
  62   int orig_page_index;
  63   int to_page_start_va;
  64   int to_page_end_va;
  65   int to_page_index;
  66   unsigned int revised_sampling[NUMCORESACTIVE];
  67 } gc_cache_revise_info_t;
  68 gc_cache_revise_info_t gc_cache_revise_infomation;
  69 #endif// GC_CACHE_ADAPT
  70
  71 #ifdef GC_DEBUG
  72 // dump whole mem in blocks
  73 inline void dumpSMem() {
  74   int block = 0;
  75   int sblock = 0;
  76   int j = 0;
  77   int i = 0;
  78   int coren = 0;
  79   int x = 0;
  80   int y = 0;
  81   printf("(%x,%x) Dump shared mem: \n", udn_tile_coord_x(),
  82              udn_tile_coord_y());
  83   // reserved blocks for sblocktbl
  84   printf("(%x,%x) ++++ reserved sblocks ++++ \n", udn_tile_coord_x(),
  85              udn_tile_coord_y());
  86   for(i=BAMBOO_BASE_VA; i<gcbaseva; i+= 4*16) {
  87     printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
  88                    udn_tile_coord_x(), udn_tile_coord_y(),
  89            *((int *)(i)), *((int *)(i + 4)),
  90            *((int *)(i + 4*2)), *((int *)(i + 4*3)),
  91            *((int *)(i + 4*4)), *((int *)(i + 4*5)),
  92            *((int *)(i + 4*6)), *((int *)(i + 4*7)),
  93            *((int *)(i + 4*8)), *((int *)(i + 4*9)),
  94            *((int *)(i + 4*10)), *((int *)(i + 4*11)),
  95            *((int *)(i + 4*12)), *((int *)(i + 4*13)),
  96            *((int *)(i + 4*14)), *((int *)(i + 4*15)));
  97   }
  98   sblock = gcreservedsb;
  99   bool advanceblock = false;
 100   // remaining memory
 101   for(i=gcbaseva; i<gcbaseva+BAMBOO_SHARED_MEM_SIZE; i+=4*16) {
 102     advanceblock = false;
 103     // computing sblock # and block #, core coordinate (x,y) also
 104     if(j%((BAMBOO_SMEM_SIZE)/(4*16)) == 0) {
 105       // finished a sblock
 106       if(j < ((BAMBOO_LARGE_SMEM_BOUND)/(4*16))) {
 107                 if((j > 0) && (j%((BAMBOO_SMEM_SIZE_L)/(4*16)) == 0)) {
 108                   // finished a block
 109                   block++;
 110                   advanceblock = true;
 111                 }
 112       } else {
 113                 // finished a block
 114                 block++;
 115                 advanceblock = true;
 116       }
 117       // compute core #
 118       if(advanceblock) {
 119                 coren = gc_block2core[block%(NUMCORES4GC*2)];
 120       }
 121       // compute core coordinate
 122       BAMBOO_COORDS(coren, &x, &y);
 123       printf("(%x,%x) ==== %d, %d : core (%d,%d), saddr %x====\n",
 124                      udn_tile_coord_x(), udn_tile_coord_y(),
 125              block, sblock++, x, y,
 126              (sblock-1)*(BAMBOO_SMEM_SIZE)+gcbaseva);
 127     }
 128     j++;
 129     printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
 130                    udn_tile_coord_x(), udn_tile_coord_y(),
 131            *((int *)(i)), *((int *)(i + 4)),
 132            *((int *)(i + 4*2)), *((int *)(i + 4*3)),
 133            *((int *)(i + 4*4)), *((int *)(i + 4*5)),
 134            *((int *)(i + 4*6)), *((int *)(i + 4*7)),
 135            *((int *)(i + 4*8)), *((int *)(i + 4*9)),
 136            *((int *)(i + 4*10)), *((int *)(i + 4*11)),
 137            *((int *)(i + 4*12)), *((int *)(i + 4*13)),
 138            *((int *)(i + 4*14)), *((int *)(i + 4*15)));
 139   }
 140   printf("(%x,%x) \n", udn_tile_coord_x(), udn_tile_coord_y());
 141 }
 142 #endif
 143
 144 // should be invoked with interruption closed
 145 inline void gc_enqueue_I(void *ptr) {
 146   GC_BAMBOO_DEBUGPRINT(0xe601);
 147   GC_BAMBOO_DEBUGPRINT_REG(ptr);
 148   if (gcheadindex==NUMPTRS) {
 149     struct pointerblock * tmp;
 150     if (gcspare!=NULL) {
 151       tmp=gcspare;
 152       gcspare=NULL;
 153     } else {
 154       tmp=RUNMALLOC_I(sizeof(struct pointerblock));
 155     }  // if (gcspare!=NULL)
 156     gchead->next=tmp;
 157     gchead=tmp;
 158     gcheadindex=0;
 159   } // if (gcheadindex==NUMPTRS)
 160   gchead->ptrs[gcheadindex++]=ptr;
 161   GC_BAMBOO_DEBUGPRINT(0xe602);
 162 } // void gc_enqueue_I(void *ptr)
 163
 164 // dequeue and destroy the queue
 165 inline void * gc_dequeue_I() {
 166   if (gctailindex==NUMPTRS) {
 167     struct pointerblock *tmp=gctail;
 168     gctail=gctail->next;
 169     gctailindex=0;
 170     if (gcspare!=NULL) {
 171       RUNFREE(tmp);
 172     } else {
 173       gcspare=tmp;
 174     }  // if (gcspare!=NULL)
 175   } // if (gctailindex==NUMPTRS)
 176   return gctail->ptrs[gctailindex++];
 177 } // void * gc_dequeue()
 178
 179 // dequeue and do not destroy the queue
 180 inline void * gc_dequeue2_I() {
 181   if (gctailindex2==NUMPTRS) {
 182     struct pointerblock *tmp=gctail2;
 183     gctail2=gctail2->next;
 184     gctailindex2=0;
 185   } // if (gctailindex2==NUMPTRS)
 186   return gctail2->ptrs[gctailindex2++];
 187 } // void * gc_dequeue2()
 188
 189 inline int gc_moreItems_I() {
 190   if ((gchead==gctail)&&(gctailindex==gcheadindex))
 191     return 0;
 192   return 1;
 193 } // int gc_moreItems()
 194
 195 inline int gc_moreItems2_I() {
 196   if ((gchead==gctail2)&&(gctailindex2==gcheadindex))
 197     return 0;
 198   return 1;
 199 } // int gc_moreItems2()
 200
 201 // should be invoked with interruption closed
 202 // enqueue a large obj: start addr & length
 203 inline void gc_lobjenqueue_I(void *ptr,
 204                              int length,
 205                              int host) {
 206   GC_BAMBOO_DEBUGPRINT(0xe901);
 207   if (gclobjheadindex==NUMLOBJPTRS) {
 208     struct lobjpointerblock * tmp;
 209     if (gclobjspare!=NULL) {
 210       tmp=gclobjspare;
 211       gclobjspare=NULL;
 212     } else {
 213       tmp=RUNMALLOC_I(sizeof(struct lobjpointerblock));
 214     }  // if (gclobjspare!=NULL)
 215     gclobjhead->next=tmp;
 216     tmp->prev = gclobjhead;
 217     gclobjhead=tmp;
 218     gclobjheadindex=0;
 219   } // if (gclobjheadindex==NUMLOBJPTRS)
 220   gclobjhead->lobjs[gclobjheadindex]=ptr;
 221   gclobjhead->lengths[gclobjheadindex]=length;
 222   gclobjhead->hosts[gclobjheadindex++]=host;
 223   GC_BAMBOO_DEBUGPRINT_REG(gclobjhead->lobjs[gclobjheadindex-1]);
 224   GC_BAMBOO_DEBUGPRINT_REG(gclobjhead->lengths[gclobjheadindex-1]);
 225   GC_BAMBOO_DEBUGPRINT_REG(gclobjhead->hosts[gclobjheadindex-1]);
 226 } // void gc_lobjenqueue_I(void *ptr...)
 227
 228 // dequeue and destroy the queue
 229 inline void * gc_lobjdequeue_I(int * length,
 230                                int * host) {
 231   if (gclobjtailindex==NUMLOBJPTRS) {
 232     struct lobjpointerblock *tmp=gclobjtail;
 233     gclobjtail=gclobjtail->next;
 234     gclobjtailindex=0;
 235     gclobjtail->prev = NULL;
 236     if (gclobjspare!=NULL) {
 237       RUNFREE(tmp);
 238     } else {
 239       gclobjspare=tmp;
 240       tmp->next = NULL;
 241       tmp->prev = NULL;
 242     }  // if (gclobjspare!=NULL)
 243   } // if (gclobjtailindex==NUMLOBJPTRS)
 244   if(length != NULL) {
 245     *length = gclobjtail->lengths[gclobjtailindex];
 246   }
 247   if(host != NULL) {
 248     *host = (int)(gclobjtail->hosts[gclobjtailindex]);
 249   }
 250   return gclobjtail->lobjs[gclobjtailindex++];
 251 } // void * gc_lobjdequeue()
 252
 253 inline int gc_lobjmoreItems_I() {
 254   if ((gclobjhead==gclobjtail)&&(gclobjtailindex==gclobjheadindex))
 255     return 0;
 256   return 1;
 257 } // int gc_lobjmoreItems()
 258
 259 // dequeue and don't destroy the queue
 260 inline void gc_lobjdequeue2_I() {
 261   if (gclobjtailindex2==NUMLOBJPTRS) {
 262     gclobjtail2=gclobjtail2->next;
 263     gclobjtailindex2=1;
 264   } else {
 265     gclobjtailindex2++;
 266   }  // if (gclobjtailindex2==NUMLOBJPTRS)
 267 } // void * gc_lobjdequeue2()
 268
 269 inline int gc_lobjmoreItems2_I() {
 270   if ((gclobjhead==gclobjtail2)&&(gclobjtailindex2==gclobjheadindex))
 271     return 0;
 272   return 1;
 273 } // int gc_lobjmoreItems2()
 274
 275 // 'reversly' dequeue and don't destroy the queue
 276 inline void gc_lobjdequeue3_I() {
 277   if (gclobjtailindex2==0) {
 278     gclobjtail2=gclobjtail2->prev;
 279     gclobjtailindex2=NUMLOBJPTRS-1;
 280   } else {
 281     gclobjtailindex2--;
 282   }  // if (gclobjtailindex2==NUMLOBJPTRS)
 283 } // void * gc_lobjdequeue3()
 284
 285 inline int gc_lobjmoreItems3_I() {
 286   if ((gclobjtail==gclobjtail2)&&(gclobjtailindex2==gclobjtailindex))
 287     return 0;
 288   return 1;
 289 } // int gc_lobjmoreItems3()
 290
 291 inline void gc_lobjqueueinit4_I() {
 292   gclobjtail2 = gclobjtail;
 293   gclobjtailindex2 = gclobjtailindex;
 294 } // void gc_lobjqueueinit2()
 295
 296 inline void * gc_lobjdequeue4_I(int * length,
 297                                 int * host) {
 298   if (gclobjtailindex2==NUMLOBJPTRS) {
 299     gclobjtail2=gclobjtail2->next;
 300     gclobjtailindex2=0;
 301   } // if (gclobjtailindex==NUMLOBJPTRS)
 302   if(length != NULL) {
 303     *length = gclobjtail2->lengths[gclobjtailindex2];
 304   }
 305   if(host != NULL) {
 306     *host = (int)(gclobjtail2->hosts[gclobjtailindex2]);
 307   }
 308   return gclobjtail2->lobjs[gclobjtailindex2++];
 309 } // void * gc_lobjdequeue()
 310
 311 inline int gc_lobjmoreItems4_I() {
 312   if ((gclobjhead==gclobjtail2)&&(gclobjtailindex2==gclobjheadindex))
 313     return 0;
 314   return 1;
 315 } // int gc_lobjmoreItems(
 316
 317 INTPTR gccurr_heapbound = 0;
 318
 319 inline void gettype_size(void * ptr,
 320                          int * ttype,
 321                          int * tsize) {
 322   int type = ((int *)ptr)[0];
 323   int size = 0;
 324   if(type < NUMCLASSES) {
 325     // a normal object
 326     size = classsize[type];
 327   } else {
 328     // an array
 329     struct ArrayObject *ao=(struct ArrayObject *)ptr;
 330     int elementsize=classsize[type];
 331     int length=ao->___length___;
 332     size=sizeof(struct ArrayObject)+length*elementsize;
 333   }  // if(type < NUMCLASSES)
 334   *ttype = type;
 335   *tsize = size;
 336 }
 337
 338 inline bool isLarge(void * ptr,
 339                     int * ttype,
 340                     int * tsize) {
 341   GC_BAMBOO_DEBUGPRINT(0xe701);
 342   GC_BAMBOO_DEBUGPRINT_REG(ptr);
 343   // check if a pointer is referring to a large object
 344   gettype_size(ptr, ttype, tsize);
 345   GC_BAMBOO_DEBUGPRINT(*tsize);
 346   int bound = (BAMBOO_SMEM_SIZE);
 347   if(((int)ptr-gcbaseva) < (BAMBOO_LARGE_SMEM_BOUND)) {
 348     bound = (BAMBOO_SMEM_SIZE_L);
 349   }
 350   if((((int)ptr-gcbaseva)%(bound))==0) {
 351     // ptr is a start of a block
 352     GC_BAMBOO_DEBUGPRINT(0xe702);
 353     GC_BAMBOO_DEBUGPRINT(1);
 354     return true;
 355   }
 356   if((bound-(((int)ptr-gcbaseva)%bound)) < (*tsize)) {
 357     // it acrosses the boundary of current block
 358     GC_BAMBOO_DEBUGPRINT(0xe703);
 359     GC_BAMBOO_DEBUGPRINT(1);
 360     return true;
 361   }
 362   GC_BAMBOO_DEBUGPRINT(0);
 363   return false;
 364 } // bool isLarge(void * ptr, int * ttype, int * tsize)
 365
 366 inline int hostcore(void * ptr) {
 367   // check the host core of ptr
 368   int host = 0;
 369   RESIDECORE(ptr, &host);
 370   GC_BAMBOO_DEBUGPRINT(0xedd0);
 371   GC_BAMBOO_DEBUGPRINT_REG(ptr);
 372   GC_BAMBOO_DEBUGPRINT_REG(host);
 373   return host;
 374 } // int hostcore(void * ptr)
 375
 376 inline void cpu2coords(int coren,
 377                            int * x,
 378                                            int * y) {
 379   *x = bamboo_cpu2coords[2*coren];
 380   *y = bamboo_cpu2coords[2*coren+1];
 381 } // void cpu2coords(...)
 382
 383 inline bool isLocal(void * ptr) {
 384   // check if a pointer is in shared heap on this core
 385   return hostcore(ptr) == BAMBOO_NUM_OF_CORE;
 386 } // bool isLocal(void * ptr)
 387
 388 inline bool gc_checkCoreStatus_I() {
 389   bool allStall = true;
 390   for(int i = 0; i < NUMCORES4GC; ++i) {
 391     if(gccorestatus[i] != 0) {
 392       allStall = false;
 393       break;
 394     }  // if(gccorestatus[i] != 0)
 395   }  // for(i = 0; i < NUMCORES4GC; ++i)
 396   return allStall;
 397 }
 398
 399 inline bool gc_checkAllCoreStatus_I() {
 400   bool allStall = true;
 401   for(int i = 0; i < NUMCORESACTIVE; ++i) {
 402     if(gccorestatus[i] != 0) {
 403       allStall = false;
 404       break;
 405     }  // if(gccorestatus[i] != 0)
 406   }  // for(i = 0; i < NUMCORESACTIVE; ++i)
 407   return allStall;
 408 }
 409
 410 inline void checkMarkStatue() {
 411   GC_BAMBOO_DEBUGPRINT(0xee01);
 412   int i;
 413   if((!waitconfirm) ||
 414      (waitconfirm && (numconfirm == 0))) {
 415     GC_BAMBOO_DEBUGPRINT(0xee02);
 416         int entry_index = 0;
 417         if(waitconfirm) {
 418           // phase 2
 419           entry_index = (gcnumsrobjs_index == 0) ? 1 : 0;
 420         } else {
 421           // phase 1
 422           entry_index = gcnumsrobjs_index;
 423         }
 424     BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
 425     gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
 426     gcnumsendobjs[entry_index][BAMBOO_NUM_OF_CORE] = gcself_numsendobjs;
 427     gcnumreceiveobjs[entry_index][BAMBOO_NUM_OF_CORE] = gcself_numreceiveobjs;
 428     // check the status of all cores
 429     bool allStall = gc_checkAllCoreStatus_I();
 430     GC_BAMBOO_DEBUGPRINT(0xee03);
 431     if(allStall) {
 432       GC_BAMBOO_DEBUGPRINT(0xee04);
 433       // ask for confirm
 434       if(!waitconfirm) {
 435                 GC_BAMBOO_DEBUGPRINT(0xee05);
 436                 // the first time found all cores stall
 437                 // send out status confirm msg to all other cores
 438                 // reset the corestatus array too
 439                 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
 440                 waitconfirm = true;
 441                 numconfirm = NUMCORESACTIVE - 1;
 442                 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 443                 for(i = 1; i < NUMCORESACTIVE; ++i) {
 444                   gccorestatus[i] = 1;
 445                   // send mark phase finish confirm request msg to core i
 446                   send_msg_1(i, GCMARKCONFIRM, false);
 447                 }  // for(i = 1; i < NUMCORESACTIVE; ++i)
 448       } else {
 449                 // Phase 2
 450                 // check if the sum of send objs and receive obj are the same
 451                 // yes->check if the info is the latest; no->go on executing
 452                 int sumsendobj = 0;
 453                 for(i = 0; i < NUMCORESACTIVE; ++i) {
 454                   sumsendobj += gcnumsendobjs[gcnumsrobjs_index][i];
 455                 }  // for(i = 0; i < NUMCORESACTIVE; ++i)
 456                 GC_BAMBOO_DEBUGPRINT(0xee06);
 457                 GC_BAMBOO_DEBUGPRINT_REG(sumsendobj);
 458                 for(i = 0; i < NUMCORESACTIVE; ++i) {
 459                   sumsendobj -= gcnumreceiveobjs[gcnumsrobjs_index][i];
 460                 }  // for(i = 0; i < NUMCORESACTIVE; ++i)
 461                 GC_BAMBOO_DEBUGPRINT(0xee07);
 462                 GC_BAMBOO_DEBUGPRINT_REG(sumsendobj);
 463                 if(0 == sumsendobj) {
 464                   // Check if there are changes of the numsendobjs or numreceiveobjs on
 465                   // each core
 466                   bool ischanged = false;
 467                   for(i = 0; i < NUMCORESACTIVE; ++i) {
 468                         if((gcnumsendobjs[0][i] != gcnumsendobjs[1][i]) ||
 469                                 (gcnumreceiveobjs[0][i] != gcnumreceiveobjs[1][i]) ) {
 470                           ischanged = true;
 471                           break;
 472                         }
 473                   }  // for(i = 0; i < NUMCORESACTIVE; ++i)
 474                   GC_BAMBOO_DEBUGPRINT(0xee08);
 475                   GC_BAMBOO_DEBUGPRINT_REG(ischanged);
 476                   if(!ischanged) {
 477                         GC_BAMBOO_DEBUGPRINT(0xee09);
 478                         // all the core status info are the latest
 479                         // stop mark phase
 480                         gcphase = COMPACTPHASE;
 481                         // restore the gcstatus for all cores
 482                         for(i = 0; i < NUMCORESACTIVE; ++i) {
 483                           gccorestatus[i] = 1;
 484                         }  // for(i = 0; i < NUMCORESACTIVE; ++i)
 485                   } else {
 486                         waitconfirm = false;
 487                         gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
 488                   } // if(!ischanged)
 489                 } else {
 490                   // There were changes between phase 1 and phase 2, can not decide
 491                   // whether the mark phase has been finished
 492                   waitconfirm = false;
 493                   // As it fails in phase 2, flip the entries
 494                   gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
 495                 } // if(0 == sumsendobj) else ...
 496                 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 497       } // if(!gcwaitconfirm) else()
 498     } else {
 499       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 500     } // if(allStall)
 501   }  // if((!waitconfirm)...
 502   GC_BAMBOO_DEBUGPRINT(0xee0a);
 503 } // void checkMarkStatue()
 504
 505 inline void initGC() {
 506   int i;
 507   if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
 508     for(i = 0; i < NUMCORES4GC; ++i) {
 509       gccorestatus[i] = 1;
 510       gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
 511       gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
 512       gcloads[i] = 0;
 513       gcrequiredmems[i] = 0;
 514       gcfilledblocks[i] = 0;
 515       gcstopblock[i] = 0;
 516     } // for(i = 0; i < NUMCORES4GC; ++i)
 517     for(i = NUMCORES4GC; i < NUMCORESACTIVE; ++i) {
 518       gccorestatus[i] = 1;
 519       gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
 520       gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
 521     }
 522     gcheaptop = 0;
 523     gctopcore = 0;
 524     gctopblock = 0;
 525   } // if(STARTUPCORE == BAMBOO_NUM_OF_CORE)
 526   gcself_numsendobjs = 0;
 527   gcself_numreceiveobjs = 0;
 528   gcmarkedptrbound = 0;
 529   gcobj2map = 0;
 530   gcmappedobj = 0;
 531   gcnumlobjs = 0;
 532   gcmovestartaddr = 0;
 533   gctomove = false;
 534   gcblock2fill = 0;
 535   gcmovepending = 0;
 536   gccurr_heaptop = 0;
 537   gcdstcore = 0;
 538
 539   // initialize queue
 540   if (gchead==NULL) {
 541     gcheadindex=gctailindex=gctailindex2 = 0;
 542     gchead=gctail=gctail2=RUNMALLOC(sizeof(struct pointerblock));
 543   } else {
 544     gctailindex = gctailindex2 = gcheadindex;
 545     gctail = gctail2 = gchead;
 546   }
 547
 548   // initialize the large obj queues
 549   if (gclobjhead==NULL) {
 550     gclobjheadindex=0;
 551     gclobjtailindex=0;
 552     gclobjtailindex2 = 0;
 553     gclobjhead=gclobjtail=gclobjtail2=
 554           RUNMALLOC(sizeof(struct lobjpointerblock));
 555   } else {
 556     gclobjtailindex = gclobjtailindex2 = gclobjheadindex = 0;
 557     gclobjtail = gclobjtail2 = gclobjhead;
 558   }
 559   gclobjhead->next = gclobjhead->prev = NULL;
 560
 561 #ifdef LOCALHASHTBL_TEST
 562   freeRuntimeHash(gcpointertbl);
 563   gcpointertbl = allocateRuntimeHash(20);
 564 #else
 565   mgchashreset(gcpointertbl);
 566 #endif
 567
 568   freeMGCHash(gcforwardobjtbl);
 569   gcforwardobjtbl = allocateMGCHash(20, 3);
 570
 571   // initialize the mapping info related structures
 572   if((BAMBOO_NUM_OF_CORE < NUMCORES4GC) && (gcsharedptbl != NULL)) {
 573         // Never free the shared hash table, just reset it
 574         mgcsharedhashReset(gcsharedptbl);
 575   }
 576 #ifdef GC_PROFILE
 577   gc_num_livespace = 0;
 578   gc_num_freespace = 0;
 579   gc_num_lobj = 0;
 580   gc_num_lobjspace = 0;
 581   gc_num_liveobj = 0;
 582   gc_num_forwardobj = 0;
 583   gc_num_profiles = NUMCORESACTIVE - 1;
 584 #endif
 585 } // void initGC()
 586
 587 // compute load balance for all cores
 588 inline int loadbalance(int * heaptop) {
 589   // compute load balance
 590   int i;
 591
 592   // get the total loads
 593   int tloads = gcloads[STARTUPCORE];
 594   for(i = 1; i < NUMCORES4GC; i++) {
 595     tloads += gcloads[i];
 596   }
 597   *heaptop = gcbaseva + tloads;
 598
 599   GC_BAMBOO_DEBUGPRINT(0xdddd);
 600   GC_BAMBOO_DEBUGPRINT_REG(tloads);
 601   GC_BAMBOO_DEBUGPRINT_REG(*heaptop);
 602   int b = 0;
 603   BLOCKINDEX(*heaptop, &b);
 604   int numbpc = b / NUMCORES4GC;       // num of blocks per core
 605   GC_BAMBOO_DEBUGPRINT_REG(b);
 606   GC_BAMBOO_DEBUGPRINT_REG(numbpc);
 607   gctopblock = b;
 608   RESIDECORE(heaptop, &gctopcore);
 609   GC_BAMBOO_DEBUGPRINT_REG(gctopcore);
 610   return numbpc;
 611 } // void loadbalance(int * heaptop)
 612
 613 inline bool cacheLObjs() {
 614   // check the total mem size need for large objs
 615   unsigned long long sumsize = 0;
 616   int size = 0;
 617   GC_BAMBOO_DEBUGPRINT(0xe801);
 618   gclobjtail2 = gclobjtail;
 619   gclobjtailindex2 = gclobjtailindex;
 620   int tmp_lobj = 0;
 621   int tmp_len = 0;
 622   int tmp_host = 0;
 623   // compute total mem size required and sort the lobjs in ascending order
 624   // TODO USE QUICK SORT INSTEAD?
 625   while(gc_lobjmoreItems2_I()) {
 626     gc_lobjdequeue2_I();
 627     tmp_lobj = gclobjtail2->lobjs[gclobjtailindex2-1];
 628     tmp_host = gclobjtail2->hosts[gclobjtailindex2-1];
 629     tmp_len = gclobjtail2->lengths[gclobjtailindex2 - 1];
 630     sumsize += tmp_len;
 631 #ifdef GC_PROFILE
 632         gc_num_lobj++;
 633 #endif
 634     GC_BAMBOO_DEBUGPRINT_REG(gclobjtail2->lobjs[gclobjtailindex2-1]);
 635     GC_BAMBOO_DEBUGPRINT_REG(tmp_len);
 636     GC_BAMBOO_DEBUGPRINT_REG(sumsize);
 637     int i = gclobjtailindex2-1;
 638     struct lobjpointerblock * tmp_block = gclobjtail2;
 639     // find the place to insert
 640     while(true) {
 641       if(i == 0) {
 642                 if(tmp_block->prev == NULL) {
 643                   break;
 644                 }
 645                 if(tmp_block->prev->lobjs[NUMLOBJPTRS-1] > tmp_lobj) {
 646                   tmp_block->lobjs[i] = tmp_block->prev->lobjs[NUMLOBJPTRS-1];
 647                   tmp_block->lengths[i] = tmp_block->prev->lengths[NUMLOBJPTRS-1];
 648                   tmp_block->hosts[i] = tmp_block->prev->hosts[NUMLOBJPTRS-1];
 649                   tmp_block = tmp_block->prev;
 650                   i = NUMLOBJPTRS-1;
 651                 } else {
 652                   break;
 653                 }  // if(tmp_block->prev->lobjs[NUMLOBJPTRS-1] < tmp_lobj)
 654           } else {
 655                 if(tmp_block->lobjs[i-1] > tmp_lobj) {
 656                   tmp_block->lobjs[i] = tmp_block->lobjs[i-1];
 657                   tmp_block->lengths[i] = tmp_block->lengths[i-1];
 658                   tmp_block->hosts[i] = tmp_block->hosts[i-1];
 659                   i--;
 660                 } else {
 661                   break;
 662                 }  // if(tmp_block->lobjs[i-1] < tmp_lobj)
 663       }  // if(i ==0 ) else {}
 664     }   // while(true)
 665     // insert it
 666     if(i != gclobjtailindex2 - 1) {
 667       tmp_block->lobjs[i] = tmp_lobj;
 668       tmp_block->lengths[i] = tmp_len;
 669       tmp_block->hosts[i] = tmp_host;
 670     }
 671   }  // while(gc_lobjmoreItems2())
 672
 673 #ifdef GC_PROFILE
 674   gc_num_lobjspace = sumsize;
 675 #endif
 676   // check if there are enough space to cache these large objs
 677   INTPTR dst = gcbaseva + (BAMBOO_SHARED_MEM_SIZE) -sumsize;
 678   if((unsigned long long)gcheaptop > (unsigned long long)dst) {
 679     // do not have enough room to cache large objs
 680     GC_BAMBOO_DEBUGPRINT(0xe802);
 681     GC_BAMBOO_DEBUGPRINT_REG(dst);
 682     GC_BAMBOO_DEBUGPRINT_REG(gcheaptop);
 683         GC_BAMBOO_DEBUGPRINT_REG(sumsize);
 684     return false;
 685   }
 686   GC_BAMBOO_DEBUGPRINT(0xe803);
 687   GC_BAMBOO_DEBUGPRINT_REG(dst);
 688   GC_BAMBOO_DEBUGPRINT_REG(gcheaptop);
 689
 690   gcheaptop = dst; // Note: record the start of cached lobjs with gcheaptop
 691   // cache the largeObjs to the top of the shared heap
 692   dst = gcbaseva + (BAMBOO_SHARED_MEM_SIZE);
 693   while(gc_lobjmoreItems3_I()) {
 694     gc_lobjdequeue3_I();
 695     size = gclobjtail2->lengths[gclobjtailindex2];
 696     // set the mark field to , indicating that this obj has been moved
 697     // and need to be flushed
 698     ((int *)(gclobjtail2->lobjs[gclobjtailindex2]))[6] = COMPACTED;
 699     dst -= size;
 700     if((int)dst < (int)(gclobjtail2->lobjs[gclobjtailindex2])+size) {
 701       memmove(dst, gclobjtail2->lobjs[gclobjtailindex2], size);
 702     } else {
 703       memcpy(dst, gclobjtail2->lobjs[gclobjtailindex2], size);
 704     }
 705     GC_BAMBOO_DEBUGPRINT(0x804);
 706     GC_BAMBOO_DEBUGPRINT_REG(gclobjtail2->lobjs[gclobjtailindex2]);
 707     GC_BAMBOO_DEBUGPRINT(dst);
 708     GC_BAMBOO_DEBUGPRINT_REG(size);
 709     GC_BAMBOO_DEBUGPRINT_REG(*((int*)gclobjtail2->lobjs[gclobjtailindex2]));
 710     GC_BAMBOO_DEBUGPRINT_REG(*((int*)(dst)));
 711   }
 712   return true;
 713 } // void cacheLObjs()
 714
 715 // update the bmmboo_smemtbl to record current shared mem usage
 716 void updateSmemTbl(int coren,
 717                    int localtop) {
 718   int ltopcore = 0;
 719   int bound = BAMBOO_SMEM_SIZE_L;
 720   BLOCKINDEX(localtop, &ltopcore);
 721   if(localtop >= (gcbaseva+(BAMBOO_LARGE_SMEM_BOUND))) {
 722     bound = BAMBOO_SMEM_SIZE;
 723   }
 724   int load = (localtop-gcbaseva)%bound;
 725   int i = 0;
 726   int j = 0;
 727   int toset = 0;
 728   do {
 729     toset = gc_core2block[2*coren+i]+(NUMCORES4GC*2)*j;
 730     if(toset < ltopcore) {
 731       bamboo_smemtbl[toset]=
 732         (toset<NUMCORES4GC) ? BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
 733 #ifdef SMEMM
 734           gcmem_mixed_usedmem += bamboo_smemtbl[toset];
 735 #endif
 736     } else if(toset == ltopcore) {
 737       bamboo_smemtbl[toset] = load;
 738 #ifdef SMEMM
 739           gcmem_mixed_usedmem += bamboo_smemtbl[toset];
 740 #endif
 741       break;
 742     } else {
 743       break;
 744     }
 745     i++;
 746     if(i == 2) {
 747       i = 0;
 748       j++;
 749     }
 750   } while(true);
 751 } // void updateSmemTbl(int, int)
 752
 753 inline void moveLObjs() {
 754   GC_BAMBOO_DEBUGPRINT(0xea01);
 755 #ifdef SMEMM
 756   // update the gcmem_mixed_usedmem
 757   gcmem_mixed_usedmem = 0;
 758 #endif
 759   // zero out the smemtbl
 760   BAMBOO_MEMSET_WH(bamboo_smemtbl, 0, sizeof(int)*gcnumblock);
 761   // find current heap top
 762   // flush all gcloads to indicate the real heap top on one core
 763   // previous it represents the next available ptr on a core
 764   if((gcloads[0] > (gcbaseva+(BAMBOO_SMEM_SIZE_L)))
 765      && ((gcloads[0]%(BAMBOO_SMEM_SIZE)) == 0)) {
 766     // edge of a block, check if this is exactly the heaptop
 767     BASEPTR(0, gcfilledblocks[0]-1, &(gcloads[0]));
 768     gcloads[0]+=(gcfilledblocks[0]>1 ?
 769                  (BAMBOO_SMEM_SIZE) : (BAMBOO_SMEM_SIZE_L));
 770   }
 771   updateSmemTbl(0, gcloads[0]);
 772   GC_BAMBOO_DEBUGPRINT(0xea02);
 773   GC_BAMBOO_DEBUGPRINT_REG(gcloads[0]);
 774   GC_BAMBOO_DEBUGPRINT_REG(bamboo_smemtbl[0]);
 775   for(int i = 1; i < NUMCORES4GC; i++) {
 776     int tmptop = 0;
 777     GC_BAMBOO_DEBUGPRINT(0xf000+i);
 778     GC_BAMBOO_DEBUGPRINT_REG(gcloads[i]);
 779     GC_BAMBOO_DEBUGPRINT_REG(gcfilledblocks[i]);
 780     if((gcfilledblocks[i] > 0)
 781        && ((gcloads[i] % (BAMBOO_SMEM_SIZE)) == 0)) {
 782       // edge of a block, check if this is exactly the heaptop
 783       BASEPTR(i, gcfilledblocks[i]-1, &gcloads[i]);
 784       gcloads[i] +=
 785                 (gcfilledblocks[i]>1 ? (BAMBOO_SMEM_SIZE) : (BAMBOO_SMEM_SIZE_L));
 786       tmptop = gcloads[i];
 787     }
 788     updateSmemTbl(i, gcloads[i]);
 789     GC_BAMBOO_DEBUGPRINT_REG(gcloads[i]);
 790   } // for(int i = 1; i < NUMCORES4GC; i++) {
 791
 792   // find current heap top
 793   // TODO
 794   // a bug here: when using local allocation, directly move large objects
 795   // to the highest free chunk might not be memory efficient
 796   int tmpheaptop = 0;
 797   int size = 0;
 798   int bound = 0;
 799   int i = 0;
 800   for(i = gcnumblock-1; i >= 0; i--) {
 801     if(bamboo_smemtbl[i] > 0) {
 802       break;
 803     }
 804   }
 805   if(i == -1) {
 806     tmpheaptop = gcbaseva;
 807   } else {
 808     tmpheaptop = gcbaseva+bamboo_smemtbl[i]+((i<NUMCORES4GC) ?
 809                 (BAMBOO_SMEM_SIZE_L*i) :
 810         (BAMBOO_SMEM_SIZE*(i-NUMCORES4GC)+BAMBOO_LARGE_SMEM_BOUND));
 811   }
 812
 813   // move large objs from gcheaptop to tmpheaptop
 814   // write the header first
 815   unsigned int tomove = gcbaseva + (BAMBOO_SHARED_MEM_SIZE) -gcheaptop;
 816 #ifdef SMEMM
 817   gcmem_mixed_usedmem += tomove;
 818 #endif
 819   GC_BAMBOO_DEBUGPRINT(0xea03);
 820   GC_BAMBOO_DEBUGPRINT_REG(tomove);
 821   GC_BAMBOO_DEBUGPRINT_REG(tmpheaptop);
 822   GC_BAMBOO_DEBUGPRINT_REG(gcheaptop);
 823   // flush the sbstartbl
 824   BAMBOO_MEMSET_WH(&(gcsbstarttbl[gcreservedsb]), '\0',
 825           (BAMBOO_SHARED_MEM_SIZE/BAMBOO_SMEM_SIZE-gcreservedsb)*sizeof(INTPTR));
 826   if(tomove == 0) {
 827     gcheaptop = tmpheaptop;
 828   } else {
 829     // check how many blocks it acrosses
 830     int remain = tmpheaptop-gcbaseva;
 831     int sb = remain/(BAMBOO_SMEM_SIZE) + gcreservedsb;//number of the sblock
 832     int b = 0;  // number of the block
 833     BLOCKINDEX(tmpheaptop, &b);
 834     // check the remaining space in this block
 835     bound = (BAMBOO_SMEM_SIZE);
 836     if(remain < (BAMBOO_LARGE_SMEM_BOUND)) {
 837       bound = (BAMBOO_SMEM_SIZE_L);
 838     }
 839     remain = bound - remain%bound;
 840
 841     GC_BAMBOO_DEBUGPRINT(0xea04);
 842     size = 0;
 843     int isize = 0;
 844     int host = 0;
 845     int ptr = 0;
 846     int base = tmpheaptop;
 847     int cpysize = 0;
 848     remain -= BAMBOO_CACHE_LINE_SIZE;
 849     tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
 850     gc_lobjqueueinit4_I();
 851     while(gc_lobjmoreItems4_I()) {
 852       ptr = (int)(gc_lobjdequeue4_I(&size, &host));
 853       ALIGNSIZE(size, &isize);
 854       if(remain < isize) {
 855                 // this object acrosses blocks
 856                 if(cpysize > 0) {
 857                   // close current block, fill its header
 858                   BAMBOO_MEMSET_WH(base, '\0', BAMBOO_CACHE_LINE_SIZE);
 859                   *((int*)base) = cpysize + BAMBOO_CACHE_LINE_SIZE;
 860                   bamboo_smemtbl[b]+=BAMBOO_CACHE_LINE_SIZE;//add the size of header
 861                   cpysize = 0;
 862                   base = tmpheaptop;
 863                   if(remain == 0) {
 864                         remain = ((tmpheaptop-gcbaseva)<(BAMBOO_LARGE_SMEM_BOUND)) ?
 865                                          BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
 866                   }
 867                   remain -= BAMBOO_CACHE_LINE_SIZE;
 868                   tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
 869                   BLOCKINDEX(tmpheaptop, &b);
 870                   sb = (tmpheaptop-gcbaseva)/(BAMBOO_SMEM_SIZE) + gcreservedsb;
 871                 }  // if(cpysize > 0)
 872
 873                 // move the large obj
 874                 if((int)gcheaptop < (int)(tmpheaptop)+size) {
 875                   memmove(tmpheaptop, gcheaptop, size);
 876                 } else {
 877                   //BAMBOO_WRITE_HINT_CACHE(tmpheaptop, size);
 878                   memcpy(tmpheaptop, gcheaptop, size);
 879                 }
 880                 // fill the remaining space with -2 padding
 881                 BAMBOO_MEMSET_WH(tmpheaptop+size, -2, isize-size);
 882                 GC_BAMBOO_DEBUGPRINT(0xea05);
 883                 GC_BAMBOO_DEBUGPRINT_REG(gcheaptop);
 884                 GC_BAMBOO_DEBUGPRINT_REG(tmpheaptop);
 885                 GC_BAMBOO_DEBUGPRINT_REG(size);
 886                 GC_BAMBOO_DEBUGPRINT_REG(isize);
 887                 GC_BAMBOO_DEBUGPRINT_REG(base);
 888                 gcheaptop += size;
 889                 // cache the mapping info anyway
 890                 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
 891 #ifdef LOCALHASHTBL_TEST
 892                 RuntimeHashadd_I(gcpointertbl, ptr, tmpheaptop);
 893 #else
 894                 mgchashInsert_I(gcpointertbl, ptr, tmpheaptop);
 895 #endif
 896                 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 897                 GC_BAMBOO_DEBUGPRINT(0xcdca);
 898                 GC_BAMBOO_DEBUGPRINT_REG(ptr);
 899                 GC_BAMBOO_DEBUGPRINT_REG(tmpheaptop);
 900                 if(host != BAMBOO_NUM_OF_CORE) {
 901                   // send the original host core with the mapping info
 902                   send_msg_3(host, GCLOBJMAPPING, ptr, tmpheaptop, false);
 903                   GC_BAMBOO_DEBUGPRINT(0xcdcb);
 904                   GC_BAMBOO_DEBUGPRINT_REG(ptr);
 905                   GC_BAMBOO_DEBUGPRINT_REG(tmpheaptop);
 906                 } // if(host != BAMBOO_NUM_OF_CORE)
 907                 tmpheaptop += isize;
 908
 909                 // set the gcsbstarttbl and bamboo_smemtbl
 910                 int tmpsbs = 1+(isize-remain-1)/BAMBOO_SMEM_SIZE;
 911                 for(int k = 1; k < tmpsbs; k++) {
 912                   gcsbstarttbl[sb+k] = (INTPTR)(-1);
 913                 }
 914                 sb += tmpsbs;
 915                 bound = (b<NUMCORES4GC) ? BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
 916                 BLOCKINDEX(tmpheaptop-1, &tmpsbs);
 917                 for(; b < tmpsbs; b++) {
 918                   bamboo_smemtbl[b] = bound;
 919                   if(b==NUMCORES4GC-1) {
 920                         bound = BAMBOO_SMEM_SIZE;
 921                   }
 922                 }
 923                 if(((isize-remain)%(BAMBOO_SMEM_SIZE)) == 0) {
 924                   gcsbstarttbl[sb] = (INTPTR)(-1);
 925                   remain = ((tmpheaptop-gcbaseva)<(BAMBOO_LARGE_SMEM_BOUND)) ?
 926                                    BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
 927                   bamboo_smemtbl[b] = bound;
 928                 } else {
 929                   gcsbstarttbl[sb] = (INTPTR)(tmpheaptop);
 930                   remain = tmpheaptop-gcbaseva;
 931                   bamboo_smemtbl[b] = remain%bound;
 932                   remain = bound - bamboo_smemtbl[b];
 933                 } // if(((isize-remain)%(BAMBOO_SMEM_SIZE)) == 0) else ...
 934
 935                 // close current block and fill the header
 936                 BAMBOO_MEMSET_WH(base, '\0', BAMBOO_CACHE_LINE_SIZE);
 937                 *((int*)base) = isize + BAMBOO_CACHE_LINE_SIZE;
 938                 cpysize = 0;
 939                 base = tmpheaptop;
 940                 if(remain == BAMBOO_CACHE_LINE_SIZE) {
 941                   // fill with 0 in case
 942                   BAMBOO_MEMSET_WH(tmpheaptop, '\0', remain);
 943                 }
 944                 remain -= BAMBOO_CACHE_LINE_SIZE;
 945                 tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
 946       } else {
 947                 remain -= isize;
 948                 // move the large obj
 949                 if((int)gcheaptop < (int)(tmpheaptop)+size) {
 950                   memmove(tmpheaptop, gcheaptop, size);
 951                 } else {
 952                   memcpy(tmpheaptop, gcheaptop, size);
 953                 }
 954                 // fill the remaining space with -2 padding
 955                 BAMBOO_MEMSET_WH(tmpheaptop+size, -2, isize-size);
 956                 GC_BAMBOO_DEBUGPRINT(0xea06);
 957                 GC_BAMBOO_DEBUGPRINT_REG(gcheaptop);
 958                 GC_BAMBOO_DEBUGPRINT_REG(tmpheaptop);
 959                 GC_BAMBOO_DEBUGPRINT_REG(size);
 960                 GC_BAMBOO_DEBUGPRINT_REG(isize);
 961
 962                 gcheaptop += size;
 963                 cpysize += isize;
 964                 // cache the mapping info anyway
 965                 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
 966 #ifdef LOCALHASHTBL_TEST
 967                 RuntimeHashadd_I(gcpointertbl, ptr, tmpheaptop);
 968 #else
 969                 mgchashInsert_I(gcpointertbl, ptr, tmpheaptop);
 970 #endif
 971                 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 972                 GC_BAMBOO_DEBUGPRINT(0xcdcc);
 973                 GC_BAMBOO_DEBUGPRINT_REG(ptr);
 974                 GC_BAMBOO_DEBUGPRINT_REG(tmpheaptop);
 975                 GC_BAMBOO_DEBUGPRINT_REG(*((int*)tmpheaptop));
 976                 if(host != BAMBOO_NUM_OF_CORE) {
 977                   // send the original host core with the mapping info
 978                   send_msg_3(host, GCLOBJMAPPING, ptr, tmpheaptop, false);
 979                   GC_BAMBOO_DEBUGPRINT(0xcdcd);
 980                   GC_BAMBOO_DEBUGPRINT_REG(ptr);
 981                   GC_BAMBOO_DEBUGPRINT_REG(tmpheaptop);
 982                 }  // if(host != BAMBOO_NUM_OF_CORE)
 983                 tmpheaptop += isize;
 984
 985                 // update bamboo_smemtbl
 986                 bamboo_smemtbl[b] += isize;
 987           }  // if(remain < isize) else ...
 988     }  // while(gc_lobjmoreItems())
 989     if(cpysize > 0) {
 990       // close current block, fill the header
 991       BAMBOO_MEMSET_WH(base, '\0', BAMBOO_CACHE_LINE_SIZE);
 992       *((int*)base) = cpysize + BAMBOO_CACHE_LINE_SIZE;
 993       bamboo_smemtbl[b] += BAMBOO_CACHE_LINE_SIZE;// add the size of the header
 994     } else {
 995       tmpheaptop -= BAMBOO_CACHE_LINE_SIZE;
 996     }
 997     gcheaptop = tmpheaptop;
 998
 999   } // if(tomove == 0)
1000
1001   GC_BAMBOO_DEBUGPRINT(0xea07);
1002   GC_BAMBOO_DEBUGPRINT_REG(gcheaptop);
1003
1004   bamboo_free_block = 0;
1005   int tbound = 0;
1006   do {
1007     tbound = (bamboo_free_block<NUMCORES4GC) ?
1008              BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
1009     if(bamboo_smemtbl[bamboo_free_block] == tbound) {
1010       bamboo_free_block++;
1011     } else {
1012       // the first non-full partition
1013       break;
1014     }
1015   } while(true);
1016
1017 #ifdef GC_PROFILE
1018   // check how many live space there are
1019   gc_num_livespace = 0;
1020   for(int tmpi = 0; tmpi < gcnumblock; tmpi++) {
1021         gc_num_livespace += bamboo_smemtbl[tmpi];
1022   }
1023   gc_num_freespace = (BAMBOO_SHARED_MEM_SIZE) - gc_num_livespace;
1024 #endif
1025   GC_BAMBOO_DEBUGPRINT(0xea08);
1026   GC_BAMBOO_DEBUGPRINT_REG(gcheaptop);
1027 } // void moveLObjs()
1028
1029 inline void markObj(void * objptr) {
1030   if(objptr == NULL) {
1031     return;
1032   }
1033   if(ISSHAREDOBJ(objptr)) {
1034     int host = hostcore(objptr);
1035     if(BAMBOO_NUM_OF_CORE == host) {
1036       // on this core
1037       BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1038       if(((int *)objptr)[6] == INIT) {
1039                 // this is the first time that this object is discovered,
1040                 // set the flag as DISCOVERED
1041                 ((int *)objptr)[6] |= DISCOVERED;
1042                 BAMBOO_CACHE_FLUSH_LINE(objptr);
1043                 gc_enqueue_I(objptr);
1044           }
1045       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1046     } else {
1047       GC_BAMBOO_DEBUGPRINT(0xbbbb);
1048       GC_BAMBOO_DEBUGPRINT_REG(host);
1049       GC_BAMBOO_DEBUGPRINT_REG(objptr);
1050       // check if this obj has been forwarded
1051       if(!MGCHashcontains(gcforwardobjtbl, (int)objptr)) {
1052                 // send a msg to host informing that objptr is active
1053                 send_msg_2(host, GCMARKEDOBJ, objptr, false);
1054 #ifdef GC_PROFILE
1055                 gc_num_forwardobj++;
1056 #endif // GC_PROFILE
1057                 gcself_numsendobjs++;
1058                 MGCHashadd(gcforwardobjtbl, (int)objptr);
1059       }
1060     }
1061   } else {
1062     BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1063     gc_enqueue_I(objptr);
1064     BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1065   }  // if(ISSHAREDOBJ(objptr))
1066 } // void markObj(void * objptr)
1067
1068 // enqueue root objs
1069 inline void tomark(struct garbagelist * stackptr) {
1070   if(MARKPHASE != gcphase) {
1071     GC_BAMBOO_DEBUGPRINT_REG(gcphase);
1072     BAMBOO_EXIT(0xb001);
1073   }
1074   gcbusystatus = true;
1075   gcnumlobjs = 0;
1076
1077   int i,j;
1078   // enqueue current stack
1079   while(stackptr!=NULL) {
1080     GC_BAMBOO_DEBUGPRINT(0xe501);
1081     GC_BAMBOO_DEBUGPRINT_REG(stackptr->size);
1082     GC_BAMBOO_DEBUGPRINT_REG(stackptr->next);
1083     GC_BAMBOO_DEBUGPRINT_REG(stackptr->array[0]);
1084     for(i=0; i<stackptr->size; i++) {
1085       if(stackptr->array[i] != NULL) {
1086                 markObj(stackptr->array[i]);
1087       }
1088     }
1089     stackptr=stackptr->next;
1090   }
1091
1092   GC_BAMBOO_DEBUGPRINT(0xe503);
1093   // enqueue objectsets
1094   if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
1095     for(i=0; i<NUMCLASSES; i++) {
1096       struct parameterwrapper ** queues =
1097         objectqueues[BAMBOO_NUM_OF_CORE][i];
1098       int length = numqueues[BAMBOO_NUM_OF_CORE][i];
1099       for(j = 0; j < length; ++j) {
1100                 struct parameterwrapper * parameter = queues[j];
1101                 struct ObjectHash * set=parameter->objectset;
1102                 struct ObjectNode * ptr=set->listhead;
1103                 while(ptr!=NULL) {
1104                   markObj((void *)ptr->key);
1105                   ptr=ptr->lnext;
1106                 }
1107       }
1108     }
1109   }
1110
1111   // euqueue current task descriptor
1112   if(currtpd != NULL) {
1113     GC_BAMBOO_DEBUGPRINT(0xe504);
1114     for(i=0; i<currtpd->numParameters; i++) {
1115       markObj(currtpd->parameterArray[i]);
1116     }
1117   }
1118
1119   GC_BAMBOO_DEBUGPRINT(0xe505);
1120   // euqueue active tasks
1121   if(activetasks != NULL) {
1122     struct genpointerlist * ptr=activetasks->list;
1123     while(ptr!=NULL) {
1124       struct taskparamdescriptor *tpd=ptr->src;
1125       int i;
1126       for(i=0; i<tpd->numParameters; i++) {
1127                 markObj(tpd->parameterArray[i]);
1128       }
1129       ptr=ptr->inext;
1130     }
1131   }
1132
1133   GC_BAMBOO_DEBUGPRINT(0xe506);
1134   // enqueue cached transferred obj
1135   struct QueueItem * tmpobjptr =  getHead(&objqueue);
1136   while(tmpobjptr != NULL) {
1137     struct transObjInfo * objInfo =
1138       (struct transObjInfo *)(tmpobjptr->objectptr);
1139     markObj(objInfo->objptr);
1140     tmpobjptr = getNextQueueItem(tmpobjptr);
1141   }
1142
1143   GC_BAMBOO_DEBUGPRINT(0xe507);
1144   // enqueue cached objs to be transferred
1145   struct QueueItem * item = getHead(totransobjqueue);
1146   while(item != NULL) {
1147     struct transObjInfo * totransobj =
1148       (struct transObjInfo *)(item->objectptr);
1149     markObj(totransobj->objptr);
1150     item = getNextQueueItem(item);
1151   } // while(item != NULL)
1152
1153   GC_BAMBOO_DEBUGPRINT(0xe508);
1154   // enqueue lock related info
1155   for(i = 0; i < runtime_locklen; ++i) {
1156     markObj((void *)(runtime_locks[i].redirectlock));
1157     if(runtime_locks[i].value != NULL) {
1158       markObj((void *)(runtime_locks[i].value));
1159     }
1160   }
1161
1162 } // void tomark(struct garbagelist * stackptr)
1163
1164 inline void mark(bool isfirst,
1165                  struct garbagelist * stackptr) {
1166   if(BAMBOO_NUM_OF_CORE == 0) GC_BAMBOO_DEBUGPRINT(0xed01);
1167   if(isfirst) {
1168     if(BAMBOO_NUM_OF_CORE == 0) GC_BAMBOO_DEBUGPRINT(0xed02);
1169     // enqueue root objs
1170     tomark(stackptr);
1171     gccurr_heaptop = 0; // record the size of all active objs in this core
1172                         // aligned but does not consider block boundaries
1173     gcmarkedptrbound = 0;
1174   }
1175   if(BAMBOO_NUM_OF_CORE == 0) GC_BAMBOO_DEBUGPRINT(0xed03);
1176   int isize = 0;
1177   bool checkfield = true;
1178   bool sendStall = false;
1179   // mark phase
1180   while(MARKPHASE == gcphase) {
1181     if(BAMBOO_NUM_OF_CORE == 0) GC_BAMBOO_DEBUGPRINT(0xed04);
1182     while(true) {
1183       BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1184       bool hasItems = gc_moreItems2_I();
1185       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1186       GC_BAMBOO_DEBUGPRINT(0xed05);
1187       if(!hasItems) {
1188                 break;
1189       }
1190       sendStall = false;
1191       gcbusystatus = true;
1192       checkfield = true;
1193       void * ptr = gc_dequeue2_I();
1194
1195       GC_BAMBOO_DEBUGPRINT_REG(ptr);
1196       int size = 0;
1197       int isize = 0;
1198       int type = 0;
1199       // check if it is a shared obj
1200       if(ISSHAREDOBJ(ptr)) {
1201                 // a shared obj, check if it is a local obj on this core
1202                 int host = hostcore(ptr);
1203                 bool islocal = (host == BAMBOO_NUM_OF_CORE);
1204                 if(islocal) {
1205                   bool isnotmarked = ((((int *)ptr)[6] & DISCOVERED) != 0);
1206                   if(isLarge(ptr, &type, &size) && isnotmarked) {
1207                         // ptr is a large object and not marked or enqueued
1208                         GC_BAMBOO_DEBUGPRINT(0xecec);
1209                         GC_BAMBOO_DEBUGPRINT_REG(ptr);
1210                         GC_BAMBOO_DEBUGPRINT_REG(*((int*)ptr));
1211                         BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1212                         gc_lobjenqueue_I(ptr, size, BAMBOO_NUM_OF_CORE);
1213                         gcnumlobjs++;
1214                         BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1215                         // mark this obj
1216                         ((int *)ptr)[6] = ((int *)ptr)[6] & (~DISCOVERED) | MARKED;
1217                         BAMBOO_CACHE_FLUSH_LINE(ptr);
1218                   } else if(isnotmarked) {
1219                         // ptr is an unmarked active object on this core
1220                         ALIGNSIZE(size, &isize);
1221                         gccurr_heaptop += isize;
1222                         GC_BAMBOO_DEBUGPRINT(0xaaaa);
1223                         GC_BAMBOO_DEBUGPRINT_REG(ptr);
1224                         GC_BAMBOO_DEBUGPRINT_REG(isize);
1225                         GC_BAMBOO_DEBUGPRINT(((int *)(ptr))[0]);
1226                         // mark this obj
1227                         ((int *)ptr)[6] = ((int *)ptr)[6] & (~DISCOVERED) | MARKED;
1228                         BAMBOO_CACHE_FLUSH_LINE(ptr);
1229
1230                         if(ptr + size > gcmarkedptrbound) {
1231                           gcmarkedptrbound = ptr + size;
1232                         } // if(ptr + size > gcmarkedptrbound)
1233                   } else {
1234                         // ptr is not an active obj or has been marked
1235                         checkfield = false;
1236                   } // if(isLarge(ptr, &type, &size)) else ...
1237                 }  /* can never reach here
1238                 else {
1239                   // check if this obj has been forwarded
1240                   if(!MGCHashcontains(gcforwardobjtbl, (int)ptr)) {
1241                         // send a msg to host informing that ptr is active
1242                         send_msg_2(host, GCMARKEDOBJ, ptr, false);
1243                         gcself_numsendobjs++;
1244                         MGCHashadd(gcforwardobjtbl, (int)ptr);
1245                   }
1246                         checkfield = false;
1247                 }// if(isLocal(ptr)) else ...*/
1248           }   // if(ISSHAREDOBJ(ptr))
1249       GC_BAMBOO_DEBUGPRINT(0xed06);
1250
1251       if(checkfield) {
1252                 // scan all pointers in ptr
1253                 unsigned INTPTR * pointer;
1254                 pointer=pointerarray[type];
1255                 if (pointer==0) {
1256                   /* Array of primitives */
1257                   /* Do nothing */
1258                 } else if (((INTPTR)pointer)==1) {
1259                   /* Array of pointers */
1260                   struct ArrayObject *ao=(struct ArrayObject *) ptr;
1261                   int length=ao->___length___;
1262                   int j;
1263                   for(j=0; j<length; j++) {
1264                         void *objptr =
1265                           ((void **)(((char *)&ao->___length___)+sizeof(int)))[j];
1266                         markObj(objptr);
1267                   }
1268                 } else {
1269                   INTPTR size=pointer[0];
1270                   int i;
1271                   for(i=1; i<=size; i++) {
1272                         unsigned int offset=pointer[i];
1273                         void * objptr=*((void **)(((char *)ptr)+offset));
1274                         markObj(objptr);
1275                   }
1276                 }     // if (pointer==0) else if ... else ...
1277       }   // if(checkfield)
1278     }     // while(gc_moreItems2())
1279     GC_BAMBOO_DEBUGPRINT(0xed07);
1280     gcbusystatus = false;
1281     // send mark finish msg to core coordinator
1282     if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
1283       GC_BAMBOO_DEBUGPRINT(0xed08);
1284       gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
1285       gcnumsendobjs[gcnumsrobjs_index][BAMBOO_NUM_OF_CORE]=gcself_numsendobjs;
1286       gcnumreceiveobjs[gcnumsrobjs_index][BAMBOO_NUM_OF_CORE]=
1287                 gcself_numreceiveobjs;
1288       gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
1289     } else {
1290       if(!sendStall) {
1291                 GC_BAMBOO_DEBUGPRINT(0xed09);
1292                 send_msg_4(STARTUPCORE, GCFINISHMARK, BAMBOO_NUM_OF_CORE,
1293                                    gcself_numsendobjs, gcself_numreceiveobjs, false);
1294                 sendStall = true;
1295       }
1296     }  // if(STARTUPCORE == BAMBOO_NUM_OF_CORE) ...
1297     GC_BAMBOO_DEBUGPRINT(0xed0a);
1298
1299     if(BAMBOO_NUM_OF_CORE == STARTUPCORE) {
1300       GC_BAMBOO_DEBUGPRINT(0xed0b);
1301       return;
1302     }
1303   } // while(MARKPHASE == gcphase)
1304
1305   BAMBOO_CACHE_MF();
1306 } // mark()
1307
1308 inline void compact2Heaptophelper_I(int coren,
1309                                     int* p,
1310                                     int* numblocks,
1311                                     int* remain) {
1312   int b;
1313   int memneed = gcrequiredmems[coren] + BAMBOO_CACHE_LINE_SIZE;
1314   if(STARTUPCORE == coren) {
1315     gctomove = true;
1316     gcmovestartaddr = *p;
1317     gcdstcore = gctopcore;
1318     gcblock2fill = *numblocks + 1;
1319   } else {
1320     send_msg_4(coren, GCMOVESTART, gctopcore, *p, (*numblocks) + 1, false);
1321   }
1322   GC_BAMBOO_DEBUGPRINT_REG(coren);
1323   GC_BAMBOO_DEBUGPRINT_REG(gctopcore);
1324   GC_BAMBOO_DEBUGPRINT_REG(*p);
1325   GC_BAMBOO_DEBUGPRINT_REG(*numblocks+1);
1326   if(memneed < *remain) {
1327     GC_BAMBOO_DEBUGPRINT(0xd104);
1328     *p = *p + memneed;
1329     gcrequiredmems[coren] = 0;
1330     gcloads[gctopcore] += memneed;
1331     *remain = *remain - memneed;
1332   } else {
1333     GC_BAMBOO_DEBUGPRINT(0xd105);
1334     // next available block
1335     *p = *p + *remain;
1336     gcfilledblocks[gctopcore] += 1;
1337     int newbase = 0;
1338     BASEPTR(gctopcore, gcfilledblocks[gctopcore], &newbase);
1339     gcloads[gctopcore] = newbase;
1340     gcrequiredmems[coren] -= *remain - BAMBOO_CACHE_LINE_SIZE;
1341     gcstopblock[gctopcore]++;
1342     gctopcore = NEXTTOPCORE(gctopblock);
1343     gctopblock++;
1344     *numblocks = gcstopblock[gctopcore];
1345     *p = gcloads[gctopcore];
1346     BLOCKINDEX(*p, &b);
1347     *remain=(b<NUMCORES4GC) ?
1348              ((BAMBOO_SMEM_SIZE_L)-((*p)%(BAMBOO_SMEM_SIZE_L)))
1349              : ((BAMBOO_SMEM_SIZE)-((*p)%(BAMBOO_SMEM_SIZE)));
1350     GC_BAMBOO_DEBUGPRINT(0xd106);
1351     GC_BAMBOO_DEBUGPRINT_REG(gctopcore);
1352     GC_BAMBOO_DEBUGPRINT_REG(*p);
1353     GC_BAMBOO_DEBUGPRINT_REG(b);
1354     GC_BAMBOO_DEBUGPRINT_REG(*remain);
1355   }  // if(memneed < remain)
1356   gcmovepending--;
1357 } // void compact2Heaptophelper_I(int, int*, int*, int*)
1358
1359 inline void compact2Heaptop() {
1360   // no cores with spare mem and some cores are blocked with pending move
1361   // find the current heap top and make them move to the heap top
1362   int p;
1363   int numblocks = gcfilledblocks[gctopcore];
1364   p = gcloads[gctopcore];
1365   int b;
1366   BLOCKINDEX(p, &b);
1367   int remain = (b<NUMCORES4GC) ?
1368                ((BAMBOO_SMEM_SIZE_L)-(p%(BAMBOO_SMEM_SIZE_L)))
1369                : ((BAMBOO_SMEM_SIZE)-(p%(BAMBOO_SMEM_SIZE)));
1370   // check if the top core finishes
1371   BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1372   if(gccorestatus[gctopcore] != 0) {
1373     GC_BAMBOO_DEBUGPRINT(0xd101);
1374     GC_BAMBOO_DEBUGPRINT_REG(gctopcore);
1375     // let the top core finishes its own work first
1376     compact2Heaptophelper_I(gctopcore, &p, &numblocks, &remain);
1377     BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1378     return;
1379   }
1380   BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1381
1382   GC_BAMBOO_DEBUGPRINT(0xd102);
1383   GC_BAMBOO_DEBUGPRINT_REG(gctopcore);
1384   GC_BAMBOO_DEBUGPRINT_REG(p);
1385   GC_BAMBOO_DEBUGPRINT_REG(b);
1386   GC_BAMBOO_DEBUGPRINT_REG(remain);
1387   for(int i = 0; i < NUMCORES4GC; i++) {
1388     BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1389     if((gccorestatus[i] != 0) && (gcrequiredmems[i] > 0)) {
1390       GC_BAMBOO_DEBUGPRINT(0xd103);
1391       compact2Heaptophelper_I(i, &p, &numblocks, &remain);
1392       if(gccorestatus[gctopcore] != 0) {
1393                 GC_BAMBOO_DEBUGPRINT(0xd101);
1394                 GC_BAMBOO_DEBUGPRINT_REG(gctopcore);
1395                 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1396                 // the top core is not free now
1397                 return;
1398       }
1399     }  // if((gccorestatus[i] != 0) && (gcrequiredmems[i] > 0))
1400     BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1401   }   // for(i = 0; i < NUMCORES4GC; i++)
1402   GC_BAMBOO_DEBUGPRINT(0xd106);
1403 } // void compact2Heaptop()
1404
1405 inline void resolvePendingMoveRequest() {
1406   GC_BAMBOO_DEBUGPRINT(0xeb01);
1407   GC_BAMBOO_DEBUGPRINT(0xeeee);
1408   for(int k = 0; k < NUMCORES4GC; k++) {
1409     GC_BAMBOO_DEBUGPRINT(0xf000+k);
1410     GC_BAMBOO_DEBUGPRINT_REG(gccorestatus[k]);
1411     GC_BAMBOO_DEBUGPRINT_REG(gcloads[k]);
1412     GC_BAMBOO_DEBUGPRINT_REG(gcfilledblocks[k]);
1413     GC_BAMBOO_DEBUGPRINT_REG(gcstopblock[k]);
1414   }
1415   GC_BAMBOO_DEBUGPRINT(0xffff);
1416   int i;
1417   int j;
1418   bool nosparemem = true;
1419   bool haspending = false;
1420   bool hasrunning = false;
1421   bool noblock = false;
1422   int dstcore = 0;       // the core who need spare mem
1423   int sourcecore = 0;       // the core who has spare mem
1424   for(i = j = 0; (i < NUMCORES4GC) && (j < NUMCORES4GC); ) {
1425     if(nosparemem) {
1426       // check if there are cores with spare mem
1427       if(gccorestatus[i] == 0) {
1428                 // finished working, check if it still have spare mem
1429                 if(gcfilledblocks[i] < gcstopblock[i]) {
1430                   // still have spare mem
1431                   nosparemem = false;
1432                   sourcecore = i;
1433                 }  // if(gcfilledblocks[i] < gcstopblock[i]) else ...
1434       }
1435       i++;
1436     }  // if(nosparemem)
1437     if(!haspending) {
1438       if(gccorestatus[j] != 0) {
1439                 // not finished, check if it has pending move requests
1440                 if((gcfilledblocks[j]==gcstopblock[j])&&(gcrequiredmems[j]>0)) {
1441                   dstcore = j;
1442                   haspending = true;
1443                 } else {
1444                   hasrunning = true;
1445                 }  // if((gcfilledblocks[i] == gcstopblock[i])...) else ...
1446       }  // if(gccorestatus[i] == 0) else ...
1447       j++;
1448     }  // if(!haspending)
1449     if(!nosparemem && haspending) {
1450       // find match
1451       int tomove = 0;
1452       int startaddr = 0;
1453       BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1454       gcrequiredmems[dstcore] = assignSpareMem_I(sourcecore,
1455                                                  gcrequiredmems[dstcore],
1456                                                  &tomove,
1457                                                  &startaddr);
1458       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1459       GC_BAMBOO_DEBUGPRINT(0xeb02);
1460       GC_BAMBOO_DEBUGPRINT_REG(sourcecore);
1461       GC_BAMBOO_DEBUGPRINT_REG(dstcore);
1462       GC_BAMBOO_DEBUGPRINT_REG(startaddr);
1463       GC_BAMBOO_DEBUGPRINT_REG(tomove);
1464       if(STARTUPCORE == dstcore) {
1465                 GC_BAMBOO_DEBUGPRINT(0xeb03);
1466                 gcdstcore = sourcecore;
1467                 gctomove = true;
1468                 gcmovestartaddr = startaddr;
1469                 gcblock2fill = tomove;
1470       } else {
1471                 GC_BAMBOO_DEBUGPRINT(0xeb04);
1472                 send_msg_4(dstcore, GCMOVESTART, sourcecore,
1473                                    startaddr, tomove, false);
1474       }
1475       gcmovepending--;
1476       nosparemem = true;
1477       haspending = false;
1478       noblock = true;
1479     }
1480   }   // for(i = 0; i < NUMCORES4GC; i++)
1481   GC_BAMBOO_DEBUGPRINT(0xcccc);
1482   GC_BAMBOO_DEBUGPRINT_REG(hasrunning);
1483   GC_BAMBOO_DEBUGPRINT_REG(haspending);
1484   GC_BAMBOO_DEBUGPRINT_REG(noblock);
1485
1486   if(!hasrunning && !noblock) {
1487     gcphase = SUBTLECOMPACTPHASE;
1488     compact2Heaptop();
1489   }
1490
1491 } // void resovePendingMoveRequest()
1492
1493 struct moveHelper {
1494   int numblocks;       // block num for heap
1495   INTPTR base;       // base virtual address of current heap block
1496   INTPTR ptr;       // virtual address of current heap top
1497   int offset;       // offset in current heap block
1498   int blockbase;       // virtual address of current small block to check
1499   int blockbound;       // bound virtual address of current small blcok
1500   int sblockindex;       // index of the small blocks
1501   int top;       // real size of current heap block to check
1502   int bound;       // bound size of current heap block to check
1503 }; // struct moveHelper
1504
1505 // If out of boundary of valid shared memory, return false, else return true
1506 inline bool nextSBlock(struct moveHelper * orig) {
1507   orig->blockbase = orig->blockbound;
1508
1509   bool sbchanged = false;
1510   INTPTR origptr = orig->ptr;
1511   int blockbase = orig->blockbase;
1512   int blockbound = orig->blockbound;
1513   int bound = orig->bound;
1514   GC_BAMBOO_DEBUGPRINT(0xecc0);
1515   GC_BAMBOO_DEBUGPRINT_REG(blockbase);
1516   GC_BAMBOO_DEBUGPRINT_REG(blockbound);
1517   GC_BAMBOO_DEBUGPRINT_REG(bound);
1518   GC_BAMBOO_DEBUGPRINT_REG(origptr);
1519 outernextSBlock:
1520   // check if across a big block
1521   // TODO now do not zero out the whole memory, maybe the last two conditions
1522   // are useless now
1523   if((blockbase>=bound)||(origptr>=bound)
1524           ||((origptr!=NULL)&&(*((int*)origptr))==0)||((*((int*)blockbase))==0)) {
1525 innernextSBlock:
1526     // end of current heap block, jump to next one
1527     orig->numblocks++;
1528     GC_BAMBOO_DEBUGPRINT(0xecc1);
1529     GC_BAMBOO_DEBUGPRINT_REG(orig->numblocks);
1530     BASEPTR(BAMBOO_NUM_OF_CORE, orig->numblocks, &(orig->base));
1531     GC_BAMBOO_DEBUGPRINT(orig->base);
1532     if(orig->base >= gcbaseva + BAMBOO_SHARED_MEM_SIZE) {
1533       // out of boundary
1534       orig->ptr = orig->base; // set current ptr to out of boundary too
1535       return false;
1536     }
1537     orig->blockbase = orig->base;
1538     orig->sblockindex = (orig->blockbase-gcbaseva)/BAMBOO_SMEM_SIZE;
1539     sbchanged = true;
1540     int blocknum = 0;
1541     BLOCKINDEX(orig->base, &blocknum);
1542     if(bamboo_smemtbl[blocknum] == 0) {
1543       // goto next block
1544       goto innernextSBlock;
1545     }
1546         // check the bamboo_smemtbl to decide the real bound
1547         orig->bound = orig->base + bamboo_smemtbl[blocknum];
1548   } else if(0 == (orig->blockbase%BAMBOO_SMEM_SIZE)) {
1549     orig->sblockindex += 1;
1550     sbchanged = true;
1551   }  // if((orig->blockbase >= orig->bound) || (orig->ptr >= orig->bound)...
1552
1553   // check if this sblock should be skipped or have special start point
1554   int sbstart = gcsbstarttbl[orig->sblockindex];
1555   if(sbstart == -1) {
1556     // goto next sblock
1557     GC_BAMBOO_DEBUGPRINT(0xecc2);
1558     orig->sblockindex += 1;
1559     orig->blockbase += BAMBOO_SMEM_SIZE;
1560     goto outernextSBlock;
1561   } else if((sbstart != 0) && (sbchanged)) {
1562     // the first time to access this SBlock
1563     GC_BAMBOO_DEBUGPRINT(0xecc3);
1564     // not start from the very beginning
1565     orig->blockbase = sbstart;
1566   }  // if(gcsbstarttbl[orig->sblockindex] == -1) else ...
1567
1568   // setup information for this sblock
1569   orig->blockbound = orig->blockbase + *((int*)(orig->blockbase));
1570   orig->offset = BAMBOO_CACHE_LINE_SIZE;
1571   orig->ptr = orig->blockbase + orig->offset;
1572   GC_BAMBOO_DEBUGPRINT(0xecc4);
1573   GC_BAMBOO_DEBUGPRINT_REG(orig->base);
1574   GC_BAMBOO_DEBUGPRINT_REG(orig->bound);
1575   GC_BAMBOO_DEBUGPRINT_REG(orig->ptr);
1576   GC_BAMBOO_DEBUGPRINT_REG(orig->blockbound);
1577   GC_BAMBOO_DEBUGPRINT_REG(orig->blockbase);
1578   GC_BAMBOO_DEBUGPRINT_REG(orig->offset);
1579   if(orig->ptr >= orig->bound) {
1580     // met a lobj, move to next block
1581     goto innernextSBlock;
1582   }
1583
1584   return true;
1585 } // bool nextSBlock(struct moveHelper * orig)
1586
1587 // return false if there are no available data to compact
1588 inline bool initOrig_Dst(struct moveHelper * orig,
1589                          struct moveHelper * to) {
1590   // init the dst ptr
1591   to->numblocks = 0;
1592   to->top = to->offset = BAMBOO_CACHE_LINE_SIZE;
1593   to->bound = BAMBOO_SMEM_SIZE_L;
1594   BASEPTR(BAMBOO_NUM_OF_CORE, to->numblocks, &(to->base));
1595
1596   GC_BAMBOO_DEBUGPRINT(0xef01);
1597   GC_BAMBOO_DEBUGPRINT_REG(to->base);
1598   INTPTR tobase = to->base;
1599   to->ptr = tobase + to->offset;
1600 #ifdef GC_CACHE_ADAPT
1601   // initialize the gc_cache_revise_information
1602   gc_cache_revise_infomation.to_page_start_va = to->ptr;
1603   int toindex = (tobase-gcbaseva)/(BAMBOO_PAGE_SIZE);
1604   gc_cache_revise_infomation.to_page_end_va = (BAMBOO_PAGE_SIZE)*
1605         (toindex+1);
1606   gc_cache_revise_infomation.to_page_index = toindex;
1607   gc_cache_revise_infomation.orig_page_start_va = -1;
1608 #endif // GC_CACHE_ADAPT
1609
1610   // init the orig ptr
1611   orig->numblocks = 0;
1612   orig->base = tobase;
1613   int blocknum = 0;
1614   BLOCKINDEX(orig->base, &blocknum);
1615   INTPTR origbase = orig->base;
1616   // check the bamboo_smemtbl to decide the real bound
1617   orig->bound = origbase + bamboo_smemtbl[blocknum];
1618   orig->blockbase = origbase;
1619   orig->sblockindex = (origbase - gcbaseva) / BAMBOO_SMEM_SIZE;
1620   GC_BAMBOO_DEBUGPRINT(0xef02);
1621   GC_BAMBOO_DEBUGPRINT_REG(origbase);
1622   GC_BAMBOO_DEBUGPRINT_REG(orig->sblockindex);
1623   GC_BAMBOO_DEBUGPRINT_REG(gcsbstarttbl);
1624   GC_BAMBOO_DEBUGPRINT_REG(gcsbstarttbl[orig->sblockindex]);
1625
1626   int sbstart = gcsbstarttbl[orig->sblockindex];
1627   if(sbstart == -1) {
1628     GC_BAMBOO_DEBUGPRINT(0xef03);
1629     // goto next sblock
1630     orig->blockbound =
1631       gcbaseva+BAMBOO_SMEM_SIZE*(orig->sblockindex+1);
1632     return nextSBlock(orig);
1633   } else if(sbstart != 0) {
1634     GC_BAMBOO_DEBUGPRINT(0xef04);
1635     orig->blockbase = sbstart;
1636   }
1637   GC_BAMBOO_DEBUGPRINT(0xef05);
1638   orig->blockbound = orig->blockbase + *((int*)(orig->blockbase));
1639   orig->offset = BAMBOO_CACHE_LINE_SIZE;
1640   orig->ptr = orig->blockbase + orig->offset;
1641   GC_BAMBOO_DEBUGPRINT(0xef06);
1642   GC_BAMBOO_DEBUGPRINT_REG(orig->base);
1643
1644   return true;
1645 } // bool initOrig_Dst(struct moveHelper * orig, struct moveHelper * to)
1646
1647 inline void nextBlock(struct moveHelper * to) {
1648   to->top = to->bound + BAMBOO_CACHE_LINE_SIZE; // header!
1649   to->bound += BAMBOO_SMEM_SIZE;
1650   to->numblocks++;
1651   BASEPTR(BAMBOO_NUM_OF_CORE, to->numblocks, &(to->base));
1652   to->offset = BAMBOO_CACHE_LINE_SIZE;
1653   to->ptr = to->base + to->offset;
1654 } // void nextBlock(struct moveHelper * to)
1655
1656 #ifdef GC_CACHE_ADAPT
1657 inline void samplingDataConvert(int current_ptr) {
1658   unsigned int tmp_factor =
1659         current_ptr-gc_cache_revise_infomation.to_page_start_va;
1660   int topage=gc_cache_revise_infomation.to_page_index;
1661   int oldpage = gc_cache_revise_infomation.orig_page_index;
1662   unsigned int * newtable=&gccachesamplingtbl_r[topage];
1663   unsigned int * oldtable=&gccachesamplingtbl[oldpage];
1664
1665   for(int tt = 0; tt < NUMCORESACTIVE; tt++) {
1666     (*newtable) = ((*newtable)+(*oldtable)*tmp_factor);
1667     newtable=(unsigned int*)(((char *)newtable)+size_cachesamplingtbl_local_r);
1668     oldtable=(unsigned int*) (((char *)oldtable)+size_cachesamplingtbl_local);
1669   }
1670 } // inline void samplingDataConvert(int)
1671
1672 inline void completePageConvert(struct moveHelper * orig,
1673                                     struct moveHelper * to,
1674                                                                 int current_ptr,
1675                                                                 bool closeToPage) {
1676   INTPTR ptr = 0;
1677   int tocompare = 0;
1678   if(closeToPage) {
1679         ptr = to->ptr;
1680         tocompare = gc_cache_revise_infomation.to_page_end_va;
1681   } else {
1682          ptr = orig->ptr;
1683          tocompare = gc_cache_revise_infomation.orig_page_end_va;
1684   }
1685   if(ptr >= tocompare) {
1686         // end of an orig/to page
1687         // compute the impact of this page for the new page
1688         samplingDataConvert(current_ptr);
1689         // prepare for an new orig page
1690         int tmp_index = (orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
1691         gc_cache_revise_infomation.orig_page_start_va = orig->ptr;
1692         gc_cache_revise_infomation.orig_page_end_va = gcbaseva +
1693           (BAMBOO_PAGE_SIZE)*(tmp_index+1);
1694         gc_cache_revise_infomation.orig_page_index = tmp_index;
1695         gc_cache_revise_infomation.to_page_start_va = to->ptr;
1696         if(closeToPage) {
1697           gc_cache_revise_infomation.to_page_end_va = gcbaseva +
1698                 (BAMBOO_PAGE_SIZE)*((to->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
1699           gc_cache_revise_infomation.to_page_index =
1700                 (to->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
1701         }
1702   }
1703 } // inline void completePageConvert(...)
1704 #endif // GC_CACHE_ADAPT
1705
1706 // endaddr does not contain spaces for headers
1707 inline bool moveobj(struct moveHelper * orig,
1708                     struct moveHelper * to,
1709                     int stopblock) {
1710   if(stopblock == 0) {
1711     return true;
1712   }
1713
1714   GC_BAMBOO_DEBUGPRINT(0xe201);
1715   GC_BAMBOO_DEBUGPRINT_REG(orig->ptr);
1716   GC_BAMBOO_DEBUGPRINT_REG(to->ptr);
1717
1718   int type = 0;
1719   int size = 0;
1720   int mark = 0;
1721   int isize = 0;
1722 innermoveobj:
1723   while((char)(*((int*)(orig->ptr))) == (char)(-2)) {
1724     orig->ptr = (int*)(orig->ptr) + 1;
1725   }
1726 #ifdef GC_CACHE_ADAPT
1727   completePageConvert(orig, to, to->ptr, false);
1728 #endif
1729   INTPTR origptr = orig->ptr;
1730   int origbound = orig->bound;
1731   int origblockbound = orig->blockbound;
1732   if((origptr >= origbound) || (origptr == origblockbound)) {
1733     if(!nextSBlock(orig)) {
1734       // finished, no more data
1735       return true;
1736     }
1737     goto innermoveobj;
1738   }
1739   GC_BAMBOO_DEBUGPRINT(0xe202);
1740   GC_BAMBOO_DEBUGPRINT_REG(origptr);
1741   GC_BAMBOO_DEBUGPRINT(((int *)(origptr))[0]);
1742   // check the obj's type, size and mark flag
1743   type = ((int *)(origptr))[0];
1744   size = 0;
1745   if(type == 0) {
1746     // end of this block, go to next one
1747     if(!nextSBlock(orig)) {
1748       // finished, no more data
1749       return true;
1750     }
1751     goto innermoveobj;
1752   } else if(type < NUMCLASSES) {
1753     // a normal object
1754     size = classsize[type];
1755   } else {
1756     // an array
1757     struct ArrayObject *ao=(struct ArrayObject *)(origptr);
1758     int elementsize=classsize[type];
1759     int length=ao->___length___;
1760     size=sizeof(struct ArrayObject)+length*elementsize;
1761   }
1762   mark = ((int *)(origptr))[6];
1763   bool isremote = ((((int *)(origptr))[6] & REMOTEM) != 0);
1764   GC_BAMBOO_DEBUGPRINT(0xe203);
1765   GC_BAMBOO_DEBUGPRINT_REG(origptr);
1766   GC_BAMBOO_DEBUGPRINT_REG(size);
1767   ALIGNSIZE(size, &isize);       // no matter is the obj marked or not
1768                                  // should be able to across it
1769   if((mark & MARKED) != 0) {
1770         int totop = to->top;
1771         int tobound = to->bound;
1772     GC_BAMBOO_DEBUGPRINT(0xe204);
1773 #ifdef GC_PROFILE
1774         gc_num_liveobj++;
1775 #endif
1776     // marked obj, copy it to current heap top
1777     // check to see if remaining space is enough
1778     if(totop + isize > tobound) {
1779       // fill 0 indicating the end of this block
1780       BAMBOO_MEMSET_WH(to->ptr,  '\0', tobound - totop);
1781       // fill the header of this block and then go to next block
1782       to->offset += tobound - totop;
1783       BAMBOO_MEMSET_WH(to->base, '\0', BAMBOO_CACHE_LINE_SIZE);
1784       (*((int*)(to->base))) = to->offset;
1785 #ifdef GC_CACHE_ADAPT
1786           int tmp_ptr = to->ptr;
1787 #endif // GC_CACHE_ADAPT
1788       nextBlock(to);
1789 #ifdef GC_CACHE_ADAPT
1790           completePageConvert(orig, to, tmp_ptr, true);
1791 #endif // GC_CACHE_ADAPT
1792       if(stopblock == to->numblocks) {
1793                 // already fulfilled the block
1794                 return true;
1795       }   // if(stopblock == to->numblocks)
1796     }   // if(to->top + isize > to->bound)
1797     // set the mark field to 2, indicating that this obj has been moved
1798     // and need to be flushed
1799     ((int *)(origptr))[6] = COMPACTED;
1800         INTPTR toptr = to->ptr;
1801     if(toptr != origptr) {
1802       if((int)(origptr) < (int)(toptr)+size) {
1803                 memmove(toptr, origptr, size);
1804       } else {
1805                 memcpy(toptr, origptr, size);
1806       }
1807       // fill the remaining space with -2
1808       BAMBOO_MEMSET_WH(toptr+size, -2, isize-size);
1809     }
1810     // store mapping info
1811     BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1812 #ifdef LOCALHASHTBL_TEST
1813     RuntimeHashadd_I(gcpointertbl, origptr, toptr);
1814 #else
1815         mgchashInsert_I(gcpointertbl, origptr, toptr);
1816 #endif
1817         if(isremote) {
1818           // add to the sharedptbl
1819           if(gcsharedptbl != NULL) {
1820                 mgcsharedhashInsert_I(gcsharedptbl, origptr, toptr);
1821           }
1822         }
1823     BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1824     GC_BAMBOO_DEBUGPRINT(0xcdce);
1825     GC_BAMBOO_DEBUGPRINT_REG(origptr);
1826     GC_BAMBOO_DEBUGPRINT_REG(toptr);
1827     GC_BAMBOO_DEBUGPRINT_REG(isize);
1828     gccurr_heaptop -= isize;
1829     to->ptr += isize;
1830     to->offset += isize;
1831     to->top += isize;
1832 #ifdef GC_CACHE_ADAPT
1833         int tmp_ptr = to->ptr;
1834 #endif // GC_CACHE_ADAPT
1835     if(to->top == to->bound) {
1836       // fill the header of this block and then go to next block
1837       BAMBOO_MEMSET_WH(to->base, '\0', BAMBOO_CACHE_LINE_SIZE);
1838       (*((int*)(to->base))) = to->offset;
1839       nextBlock(to);
1840     }
1841 #ifdef GC_CACHE_ADAPT
1842         completePageConvert(orig, to, tmp_ptr, true);
1843 #endif // GC_CACHE_ADAPT
1844   } // if(mark == 1)
1845   GC_BAMBOO_DEBUGPRINT(0xe205);
1846   // move to next obj
1847   orig->ptr += size;
1848
1849   GC_BAMBOO_DEBUGPRINT_REG(isize);
1850   GC_BAMBOO_DEBUGPRINT_REG(size);
1851   GC_BAMBOO_DEBUGPRINT_REG(orig->ptr);
1852   GC_BAMBOO_DEBUGPRINT_REG(orig->bound);
1853   if((orig->ptr > orig->bound) || (orig->ptr == orig->blockbound)) {
1854     GC_BAMBOO_DEBUGPRINT(0xe206);
1855     if(!nextSBlock(orig)) {
1856       // finished, no more data
1857       return true;
1858     }
1859   }
1860   GC_BAMBOO_DEBUGPRINT(0xe207);
1861   GC_BAMBOO_DEBUGPRINT_REG(orig->ptr);
1862   return false;
1863 } //bool moveobj(struct moveHelper* orig,struct moveHelper* to,int* endaddr)
1864
1865 // should be invoked with interrupt closed
1866 inline int assignSpareMem_I(int sourcecore,
1867                             int * requiredmem,
1868                             int * tomove,
1869                             int * startaddr) {
1870   int b = 0;
1871   BLOCKINDEX(gcloads[sourcecore], &b);
1872   int boundptr = (b<NUMCORES4GC) ? ((b+1)*BAMBOO_SMEM_SIZE_L)
1873                  : (BAMBOO_LARGE_SMEM_BOUND+(b-NUMCORES4GC+1)*BAMBOO_SMEM_SIZE);
1874   int remain = boundptr - gcloads[sourcecore];
1875   int memneed = requiredmem + BAMBOO_CACHE_LINE_SIZE;
1876   *startaddr = gcloads[sourcecore];
1877   *tomove = gcfilledblocks[sourcecore] + 1;
1878   if(memneed < remain) {
1879     gcloads[sourcecore] += memneed;
1880     return 0;
1881   } else {
1882     // next available block
1883     gcfilledblocks[sourcecore] += 1;
1884     int newbase = 0;
1885     BASEPTR(sourcecore, gcfilledblocks[sourcecore], &newbase);
1886     gcloads[sourcecore] = newbase;
1887     return requiredmem-remain;
1888   }
1889 } // int assignSpareMem_I(int ,int * , int * , int * )
1890
1891 // should be invoked with interrupt closed
1892 inline bool gcfindSpareMem_I(int * startaddr,
1893                              int * tomove,
1894                              int * dstcore,
1895                              int requiredmem,
1896                              int requiredcore) {
1897   for(int k = 0; k < NUMCORES4GC; k++) {
1898     if((gccorestatus[k] == 0) && (gcfilledblocks[k] < gcstopblock[k])) {
1899       // check if this stopped core has enough mem
1900       assignSpareMem_I(k, requiredmem, tomove, startaddr);
1901       *dstcore = k;
1902       return true;
1903     }
1904   }
1905   // if can not find spare mem right now, hold the request
1906   gcrequiredmems[requiredcore] = requiredmem;
1907   gcmovepending++;
1908   return false;
1909 } //bool gcfindSpareMem_I(int* startaddr,int* tomove,int mem,int core)
1910
1911 inline bool compacthelper(struct moveHelper * orig,
1912                           struct moveHelper * to,
1913                           int * filledblocks,
1914                           int * heaptopptr,
1915                           bool * localcompact) {
1916   // scan over all objs in this block, compact the marked objs
1917   // loop stop when finishing either scanning all active objs or
1918   // fulfilled the gcstopblock
1919   GC_BAMBOO_DEBUGPRINT(0xe101);
1920   GC_BAMBOO_DEBUGPRINT_REG(gcblock2fill);
1921   GC_BAMBOO_DEBUGPRINT_REG(gcmarkedptrbound);
1922 innercompact:
1923   while(orig->ptr < gcmarkedptrbound) {
1924     bool stop = moveobj(orig, to, gcblock2fill);
1925     if(stop) {
1926       break;
1927     }
1928   }
1929 #ifdef GC_CACHE_ADAPT
1930   // end of an to page, wrap up its information
1931   samplingDataConvert(to->ptr);
1932 #endif // GC_CACHE_ADAPT
1933   // if no objs have been compact, do nothing,
1934   // otherwise, fill the header of this block
1935   if(to->offset > BAMBOO_CACHE_LINE_SIZE) {
1936     BAMBOO_MEMSET_WH(to->base, '\0', BAMBOO_CACHE_LINE_SIZE);
1937     (*((int*)(to->base))) = to->offset;
1938   } else {
1939     to->offset = 0;
1940     to->ptr = to->base;
1941     to->top -= BAMBOO_CACHE_LINE_SIZE;
1942   }  // if(to->offset > BAMBOO_CACHE_LINE_SIZE) else ...
1943   if(*localcompact) {
1944     *heaptopptr = to->ptr;
1945     *filledblocks = to->numblocks;
1946   }
1947   GC_BAMBOO_DEBUGPRINT(0xe102);
1948   GC_BAMBOO_DEBUGPRINT_REG(orig->ptr);
1949   GC_BAMBOO_DEBUGPRINT_REG(gcmarkedptrbound);
1950   GC_BAMBOO_DEBUGPRINT_REG(*heaptopptr);
1951   GC_BAMBOO_DEBUGPRINT_REG(*filledblocks);
1952   GC_BAMBOO_DEBUGPRINT_REG(gccurr_heaptop);
1953
1954   // send msgs to core coordinator indicating that the compact is finishing
1955   // send compact finish message to core coordinator
1956   if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
1957     gcfilledblocks[BAMBOO_NUM_OF_CORE] = *filledblocks;
1958     gcloads[BAMBOO_NUM_OF_CORE] = *heaptopptr;
1959     if(orig->ptr < gcmarkedptrbound) {
1960       GC_BAMBOO_DEBUGPRINT(0xe103);
1961       // ask for more mem
1962       gctomove = false;
1963       BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1964       if(gcfindSpareMem_I(&gcmovestartaddr, &gcblock2fill, &gcdstcore,
1965                           gccurr_heaptop, BAMBOO_NUM_OF_CORE)) {
1966                 GC_BAMBOO_DEBUGPRINT(0xe104);
1967                 gctomove = true;
1968       } else {
1969                 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1970                 GC_BAMBOO_DEBUGPRINT(0xe105);
1971                 return false;
1972       }
1973       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1974     } else {
1975       GC_BAMBOO_DEBUGPRINT(0xe106);
1976       gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
1977       gctomove = false;
1978       return true;
1979     }
1980   } else {
1981     if(orig->ptr < gcmarkedptrbound) {
1982       GC_BAMBOO_DEBUGPRINT(0xe107);
1983       // ask for more mem
1984       gctomove = false;
1985       send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE,
1986                  *filledblocks, *heaptopptr, gccurr_heaptop, false);
1987     } else {
1988       GC_BAMBOO_DEBUGPRINT(0xe108);
1989       GC_BAMBOO_DEBUGPRINT_REG(*heaptopptr);
1990       // finish compacting
1991       send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE,
1992                  *filledblocks, *heaptopptr, 0, false);
1993     }
1994   }       // if(STARTUPCORE == BAMBOO_NUM_OF_CORE)
1995
1996   if(orig->ptr < gcmarkedptrbound) {
1997     GC_BAMBOO_DEBUGPRINT(0xe109);
1998     // still have unpacked obj
1999     while(true) {
2000       if(gctomove) {
2001                 break;
2002       }
2003     }
2004     ;
2005         gctomove = false;
2006     GC_BAMBOO_DEBUGPRINT(0xe10a);
2007
2008     to->ptr = gcmovestartaddr;
2009     to->numblocks = gcblock2fill - 1;
2010     to->bound = (to->numblocks==0) ?
2011                 BAMBOO_SMEM_SIZE_L :
2012                 BAMBOO_SMEM_SIZE_L+BAMBOO_SMEM_SIZE*to->numblocks;
2013     BASEPTR(gcdstcore, to->numblocks, &(to->base));
2014     to->offset = to->ptr - to->base;
2015     to->top = (to->numblocks==0) ?
2016               (to->offset) : (to->bound-BAMBOO_SMEM_SIZE+to->offset);
2017     to->base = to->ptr;
2018     to->offset = BAMBOO_CACHE_LINE_SIZE;
2019     to->ptr += to->offset;   // for header
2020     to->top += to->offset;
2021     if(gcdstcore == BAMBOO_NUM_OF_CORE) {
2022       *localcompact = true;
2023     } else {
2024       *localcompact = false;
2025     }
2026 #ifdef GC_CACHE_ADAPT
2027         // initialize the gc_cache_revise_information
2028         gc_cache_revise_infomation.to_page_start_va = to->ptr;
2029         gc_cache_revise_infomation.to_page_end_va = gcbaseva +
2030           (BAMBOO_PAGE_SIZE)*((to->base-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
2031         gc_cache_revise_infomation.to_page_index =
2032           (to->base-gcbaseva)/(BAMBOO_PAGE_SIZE);
2033         gc_cache_revise_infomation.orig_page_start_va = orig->ptr;
2034         gc_cache_revise_infomation.orig_page_end_va = gcbaseva +
2035           (BAMBOO_PAGE_SIZE)*((orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
2036         gc_cache_revise_infomation.orig_page_index =
2037           (orig->blockbase-gcbaseva)/(BAMBOO_PAGE_SIZE);
2038 #endif // GC_CACHE_ADAPT
2039     goto innercompact;
2040   }
2041   GC_BAMBOO_DEBUGPRINT(0xe10b);
2042   return true;
2043 } // void compacthelper()
2044
2045 inline void compact() {
2046   if(COMPACTPHASE != gcphase) {
2047     BAMBOO_EXIT(0xb002);
2048   }
2049
2050   // initialize pointers for comapcting
2051   struct moveHelper * orig =
2052     (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
2053   struct moveHelper * to =
2054     (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
2055
2056   if(!initOrig_Dst(orig, to)) {
2057     // no available data to compact
2058     // send compact finish msg to STARTUP core
2059     GC_BAMBOO_DEBUGPRINT(0xe001);
2060     GC_BAMBOO_DEBUGPRINT_REG(to->base);
2061     send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE,
2062                0, to->base, 0, false);
2063     RUNFREE(orig);
2064     RUNFREE(to);
2065     return;
2066   }
2067 #ifdef GC_CACHE_ADAPT
2068   gc_cache_revise_infomation.orig_page_start_va = orig->ptr;
2069   gc_cache_revise_infomation.orig_page_end_va = gcbaseva +
2070         (BAMBOO_PAGE_SIZE)*((orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
2071   gc_cache_revise_infomation.orig_page_index =
2072         (orig->blockbase-gcbaseva)/(BAMBOO_PAGE_SIZE);
2073 #endif // GC_CACHE_ADAPT
2074
2075   int filledblocks = 0;
2076   INTPTR heaptopptr = 0;
2077   bool localcompact = true;
2078   compacthelper(orig, to, &filledblocks, &heaptopptr, &localcompact);
2079
2080   RUNFREE(orig);
2081   RUNFREE(to);
2082 } // compact()
2083
2084 // if return NULL, means
2085 //   1. objptr is NULL
2086 //   2. objptr is not a shared obj
2087 // in these cases, remain the original value is OK
2088 inline void * flushObj(void * objptr) {
2089   GC_BAMBOO_DEBUGPRINT(0xe401);
2090   if(objptr == NULL) {
2091     return NULL;
2092   }
2093   void * dstptr = NULL;
2094   if(ISSHAREDOBJ(objptr)) {
2095     GC_BAMBOO_DEBUGPRINT(0xe402);
2096     GC_BAMBOO_DEBUGPRINT_REG(objptr);
2097     // a shared obj ptr, change to new address
2098     BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2099 #ifdef LOCALHASHTBL_TEST
2100     RuntimeHashget(gcpointertbl, objptr, &dstptr);
2101 #else
2102         dstptr = mgchashSearch(gcpointertbl, objptr);
2103 #endif
2104     BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2105     GC_BAMBOO_DEBUGPRINT_REG(dstptr);
2106
2107     if(NULL == dstptr) {
2108       // no mapping info
2109       GC_BAMBOO_DEBUGPRINT(0xe403);
2110       GC_BAMBOO_DEBUGPRINT_REG(objptr);
2111       GC_BAMBOO_DEBUGPRINT_REG(hostcore(objptr));
2112       if(hostcore(objptr) == BAMBOO_NUM_OF_CORE) {
2113                 // error! the obj is right on this core, but cannot find it
2114                 GC_BAMBOO_DEBUGPRINT_REG(objptr);
2115                 BAMBOO_EXIT(0xb003);
2116       } else {
2117                 int hostc = hostcore(objptr);
2118                 // check the corresponsing sharedptbl
2119                 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2120                 mgcsharedhashtbl_t * sptbl = gcrpointertbls[hostc];
2121                 if(sptbl != NULL) {
2122                   dstptr = mgcsharedhashSearch(sptbl, (int)objptr);
2123                   if(dstptr != NULL) {
2124 #ifdef LOCALHASHTBL_TEST
2125                         RuntimeHashadd_I(gcpointertbl, (int)objptr, (int)dstptr);
2126 #else
2127                         mgchashInsert_I(gcpointertbl, (int)objptr, (int)dstptr);
2128 #endif
2129                   }
2130                 }
2131                 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2132
2133                 if(dstptr == NULL) {
2134                   // still can not get the mapping info,
2135                   // send msg to host core for the mapping info
2136                   gcobj2map = (int)objptr;
2137                   gcismapped = false;
2138                   gcmappedobj = NULL;
2139                   // the first time require the mapping, send msg to the hostcore
2140                   // for the mapping info
2141                   send_msg_3(hostc, GCMAPREQUEST, (int)objptr,
2142                           BAMBOO_NUM_OF_CORE, false);
2143                   while(true) {
2144                         if(gcismapped) {
2145                           break;
2146                         }
2147                   }
2148                   BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2149 #ifdef LOCALHASHTBL_TEST
2150                   RuntimeHashget(gcpointertbl, objptr, &dstptr);
2151 #else
2152                   dstptr = mgchashSearch(gcpointertbl, objptr);
2153 #endif
2154                   BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2155                 } // if(dstptr == NULL)
2156           }    // if(hostcore(objptr) == BAMBOO_NUM_OF_CORE) else ...
2157       GC_BAMBOO_DEBUGPRINT_REG(dstptr);
2158     }  // if(NULL == dstptr)
2159   }   // if(ISSHAREDOBJ(objptr))
2160   // if not a shared obj, return NULL to indicate no need to flush
2161   GC_BAMBOO_DEBUGPRINT(0xe404);
2162   return dstptr;
2163 } // void flushObj(void * objptr)
2164
2165 inline void flushRuntimeObj(struct garbagelist * stackptr) {
2166   int i,j;
2167   // flush current stack
2168   while(stackptr!=NULL) {
2169     for(i=0; i<stackptr->size; i++) {
2170       if(stackptr->array[i] != NULL) {
2171                 void * dst = flushObj(stackptr->array[i]);
2172                 if(dst != NULL) {
2173                   stackptr->array[i] = dst;
2174                 }
2175       }
2176     }
2177     stackptr=stackptr->next;
2178   }
2179
2180   // flush objectsets
2181   if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
2182     for(i=0; i<NUMCLASSES; i++) {
2183       struct parameterwrapper ** queues =
2184         objectqueues[BAMBOO_NUM_OF_CORE][i];
2185       int length = numqueues[BAMBOO_NUM_OF_CORE][i];
2186       for(j = 0; j < length; ++j) {
2187                 struct parameterwrapper * parameter = queues[j];
2188                 struct ObjectHash * set=parameter->objectset;
2189                 struct ObjectNode * ptr=set->listhead;
2190                 while(ptr!=NULL) {
2191                   void * dst = flushObj((void *)ptr->key);
2192                   if(dst != NULL) {
2193                         ptr->key = dst;
2194                   }
2195                   ptr=ptr->lnext;
2196                 }
2197                 ObjectHashrehash(set);
2198       }
2199     }
2200   }
2201
2202   // flush current task descriptor
2203   if(currtpd != NULL) {
2204     for(i=0; i<currtpd->numParameters; i++) {
2205       void * dst = flushObj(currtpd->parameterArray[i]);
2206       if(dst != NULL) {
2207                 currtpd->parameterArray[i] = dst;
2208       }
2209     }
2210   }
2211
2212   // flush active tasks
2213   if(activetasks != NULL) {
2214     struct genpointerlist * ptr=activetasks->list;
2215     while(ptr!=NULL) {
2216       struct taskparamdescriptor *tpd=ptr->src;
2217       int i;
2218       for(i=0; i<tpd->numParameters; i++) {
2219                 void * dst = flushObj(tpd->parameterArray[i]);
2220                 if(dst != NULL) {
2221                   tpd->parameterArray[i] = dst;
2222                 }
2223       }
2224       ptr=ptr->inext;
2225     }
2226     genrehash(activetasks);
2227   }
2228
2229   // flush cached transferred obj
2230   struct QueueItem * tmpobjptr =  getHead(&objqueue);
2231   while(tmpobjptr != NULL) {
2232     struct transObjInfo * objInfo =
2233       (struct transObjInfo *)(tmpobjptr->objectptr);
2234     void * dst = flushObj(objInfo->objptr);
2235     if(dst != NULL) {
2236       objInfo->objptr = dst;
2237     }
2238     tmpobjptr = getNextQueueItem(tmpobjptr);
2239   }
2240
2241   // flush cached objs to be transferred
2242   struct QueueItem * item = getHead(totransobjqueue);
2243   while(item != NULL) {
2244     struct transObjInfo * totransobj =
2245       (struct transObjInfo *)(item->objectptr);
2246     void * dst = flushObj(totransobj->objptr);
2247     if(dst != NULL) {
2248       totransobj->objptr = dst;
2249     }
2250     item = getNextQueueItem(item);
2251   }  // while(item != NULL)
2252
2253   // enqueue lock related info
2254   for(i = 0; i < runtime_locklen; ++i) {
2255     void * dst = flushObj(runtime_locks[i].redirectlock);
2256     if(dst != NULL) {
2257       runtime_locks[i].redirectlock = (int)dst;
2258     }
2259     if(runtime_locks[i].value != NULL) {
2260       void * dst=flushObj(runtime_locks[i].value);
2261       if(dst != NULL) {
2262                 runtime_locks[i].value = (int)dst;
2263       }
2264     }
2265   }
2266
2267 } // void flushRuntimeObj(struct garbagelist * stackptr)
2268
2269 inline void transmappinginfo() {
2270   // broadcast the sharedptbl pointer
2271   for(int i = 0; i < NUMCORESACTIVE; i++) {
2272         if(i != BAMBOO_NUM_OF_CORE) {
2273           send_msg_3(i, GCMAPTBL, gcsharedptbl, BAMBOO_NUM_OF_CORE, false);
2274         }
2275   }
2276
2277   if(STARTUPCORE != BAMBOO_NUM_OF_CORE) {
2278         send_msg_2(STARTUPCORE, GCFINISHMAPINFO, BAMBOO_NUM_OF_CORE, false);
2279   }
2280 }
2281
2282 inline void flush(struct garbagelist * stackptr) {
2283
2284   flushRuntimeObj(stackptr);
2285
2286   while(true) {
2287     BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2288     bool hasItems = gc_moreItems_I();
2289     BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2290     if(!hasItems) {
2291       break;
2292     }
2293
2294     GC_BAMBOO_DEBUGPRINT(0xe301);
2295     BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2296     void * ptr = gc_dequeue_I();
2297     BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2298     if(ISSHAREDOBJ(ptr)) {
2299       // should be a local shared obj and should have mapping info
2300       ptr = flushObj(ptr);
2301       GC_BAMBOO_DEBUGPRINT(0xe302);
2302       GC_BAMBOO_DEBUGPRINT_REG(ptr);
2303       GC_BAMBOO_DEBUGPRINT_REG(tptr);
2304       GC_BAMBOO_DEBUGPRINT_REG(((int *)(tptr))[0]);
2305       if(ptr == NULL) {
2306                 BAMBOO_EXIT(0xb004);
2307       }
2308     } // if(ISSHAREDOBJ(ptr))
2309     if((!ISSHAREDOBJ(ptr)) || (((int *)(ptr))[6] == COMPACTED)) {
2310       int type = ((int *)(ptr))[0];
2311       // scan all pointers in ptr
2312       unsigned INTPTR * pointer;
2313       pointer=pointerarray[type];
2314       GC_BAMBOO_DEBUGPRINT(0xe303);
2315       GC_BAMBOO_DEBUGPRINT_REG(pointer);
2316       if (pointer==0) {
2317                 /* Array of primitives */
2318                 /* Do nothing */
2319       } else if (((INTPTR)pointer)==1) {
2320                 GC_BAMBOO_DEBUGPRINT(0xe304);
2321                 /* Array of pointers */
2322                 struct ArrayObject *ao=(struct ArrayObject *) ptr;
2323                 int length=ao->___length___;
2324                 int j;
2325                 for(j=0; j<length; j++) {
2326                   GC_BAMBOO_DEBUGPRINT(0xe305);
2327                   void *objptr=
2328                         ((void **)(((char *)&ao->___length___)+sizeof(int)))[j];
2329                   GC_BAMBOO_DEBUGPRINT_REG(objptr);
2330                   if(objptr != NULL) {
2331                         void * dst = flushObj(objptr);
2332                         if(dst != NULL) {
2333                           ((void **)(((char *)&ao->___length___)+sizeof(int)))[j] = dst;
2334                         }
2335                   }
2336                 }
2337       } else {
2338                 GC_BAMBOO_DEBUGPRINT(0xe306);
2339                 INTPTR size=pointer[0];
2340                 int i;
2341                 for(i=1; i<=size; i++) {
2342                   GC_BAMBOO_DEBUGPRINT(0xe307);
2343                   unsigned int offset=pointer[i];
2344                   void * objptr=*((void **)(((char *)ptr)+offset));
2345                   GC_BAMBOO_DEBUGPRINT_REG(objptr);
2346                   if(objptr != NULL) {
2347                         void * dst = flushObj(objptr);
2348                         if(dst != NULL) {
2349                           *((void **)(((char *)ptr)+offset)) = dst;
2350                         }
2351                   }
2352                 } // for(i=1; i<=size; i++)
2353       }  // if (pointer==0) else if (((INTPTR)pointer)==1) else ()
2354       // restore the mark field, indicating that this obj has been flushed
2355       if(ISSHAREDOBJ(ptr)) {
2356                 ((int *)(ptr))[6] = INIT;
2357       }
2358     }  // if((!ISSHAREDOBJ(ptr)) || (((int *)(ptr))[6] == COMPACTED))
2359   }   // while(gc_moreItems())
2360   GC_BAMBOO_DEBUGPRINT(0xe308);
2361
2362   // TODO bug here: the startup core contains all lobjs' info, thus all the
2363   // lobjs are flushed in sequence.
2364   // flush lobjs
2365   while(gc_lobjmoreItems_I()) {
2366     GC_BAMBOO_DEBUGPRINT(0xe309);
2367     void * ptr = gc_lobjdequeue_I(NULL, NULL);
2368     ptr = flushObj(ptr);
2369     GC_BAMBOO_DEBUGPRINT(0xe30a);
2370     GC_BAMBOO_DEBUGPRINT_REG(ptr);
2371     GC_BAMBOO_DEBUGPRINT_REG(tptr);
2372     GC_BAMBOO_DEBUGPRINT_REG(((int *)(tptr))[0]);
2373     if(ptr == NULL) {
2374       BAMBOO_EXIT(0xb005);
2375     }
2376     if(((int *)(ptr))[6] == COMPACTED) {
2377       int type = ((int *)(ptr))[0];
2378       // scan all pointers in ptr
2379       unsigned INTPTR * pointer;
2380       pointer=pointerarray[type];
2381       GC_BAMBOO_DEBUGPRINT(0xe30b);
2382       GC_BAMBOO_DEBUGPRINT_REG(pointer);
2383       if (pointer==0) {
2384                 /* Array of primitives */
2385                 /* Do nothing */
2386       } else if (((INTPTR)pointer)==1) {
2387                 GC_BAMBOO_DEBUGPRINT(0xe30c);
2388                 /* Array of pointers */
2389                 struct ArrayObject *ao=(struct ArrayObject *) ptr;
2390                 int length=ao->___length___;
2391                 int j;
2392                 for(j=0; j<length; j++) {
2393                   GC_BAMBOO_DEBUGPRINT(0xe30d);
2394                   void *objptr=
2395                         ((void **)(((char *)&ao->___length___)+sizeof(int)))[j];
2396                   GC_BAMBOO_DEBUGPRINT_REG(objptr);
2397                   if(objptr != NULL) {
2398                         void * dst = flushObj(objptr);
2399                         if(dst != NULL) {
2400                           ((void **)(((char *)&ao->___length___)+sizeof(int)))[j] = dst;
2401                         }
2402                   }
2403                 }
2404       } else {
2405                 GC_BAMBOO_DEBUGPRINT(0xe30e);
2406                 INTPTR size=pointer[0];
2407                 int i;
2408                 for(i=1; i<=size; i++) {
2409                   GC_BAMBOO_DEBUGPRINT(0xe30f);
2410                   unsigned int offset=pointer[i];
2411                   void * objptr=*((void **)(((char *)ptr)+offset));
2412
2413                   GC_BAMBOO_DEBUGPRINT_REG(objptr);
2414                   if(objptr != NULL) {
2415                         void * dst = flushObj(objptr);
2416                         if(dst != NULL) {
2417                           *((void **)(((char *)ptr)+offset)) = dst;
2418                         }
2419                   }
2420                 }  // for(i=1; i<=size; i++)
2421       }  // if (pointer==0) else if (((INTPTR)pointer)==1) else ()
2422       // restore the mark field, indicating that this obj has been flushed
2423       ((int *)(ptr))[6] = INIT;
2424     }     // if(((int *)(ptr))[6] == COMPACTED)
2425   }     // while(gc_lobjmoreItems())
2426   GC_BAMBOO_DEBUGPRINT(0xe310);
2427
2428   // send flush finish message to core coordinator
2429   if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
2430     gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
2431   } else {
2432     send_msg_2(STARTUPCORE, GCFINISHFLUSH, BAMBOO_NUM_OF_CORE, false);
2433   }
2434   GC_BAMBOO_DEBUGPRINT(0xe311);
2435 } // flush()
2436
2437 #ifdef GC_CACHE_ADAPT
2438 // prepare for cache adaption:
2439 //   -- flush the shared heap
2440 //   -- clean dtlb entries
2441 //   -- change cache strategy
2442 void cacheAdapt_gc(bool isgccachestage) {
2443   // flush the shared heap
2444   BAMBOO_CACHE_FLUSH_L2();
2445
2446   // clean the dtlb entries
2447   BAMBOO_CLEAN_DTLB();
2448
2449   // change the cache strategy
2450   gccachestage = isgccachestage;
2451 } // cacheAdapt_gc(bool isgccachestage)
2452
2453 // the master core decides how to adapt cache strategy for the mutator
2454 // according to collected statistic data
2455
2456 // make all pages hfh
2457 int cacheAdapt_policy_h4h(){
2458   unsigned int page_index = 0;
2459   VA page_sva = 0;
2460   unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
2461   int numchanged = 0;
2462   int * tmp_p = gccachepolicytbl+1;
2463   for(page_index = 0; page_index < page_num; page_index++) {
2464         page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
2465         bamboo_cache_policy_t policy = {0};
2466         policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
2467         *tmp_p = page_index;
2468         tmp_p++;
2469         *tmp_p = policy.word;
2470         tmp_p++;
2471         numchanged++;
2472   }
2473
2474   return numchanged;
2475 } // int cacheAdapt_policy_hfh()
2476
2477 // make all pages local as non-cache-adaptable gc local mode
2478 int cacheAdapt_policy_local(){
2479   unsigned int page_index = 0;
2480   VA page_sva = 0;
2481   unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
2482   int numchanged = 0;
2483   int * tmp_p = gccachepolicytbl+1;
2484   for(page_index = 0; page_index < page_num; page_index++) {
2485         page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
2486         bamboo_cache_policy_t policy = {0};
2487         int block = 0;
2488         BLOCKINDEX(page_sva, &block);
2489         int coren = gc_block2core[block%(NUMCORES4GC*2)];
2490         // locally cache the page in the hotest core
2491         // NOTE: (x,y) should be changed to (x+1, y+1)!!!
2492         policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
2493         policy.lotar_x = bamboo_cpu2coords[2*coren]+1;
2494         policy.lotar_y = bamboo_cpu2coords[2*coren+1]+1;
2495         *tmp_p = page_index;
2496         tmp_p++;
2497         *tmp_p = policy.word;
2498         tmp_p++;
2499         numchanged++;
2500   }
2501
2502   return numchanged;
2503 } // int cacheAdapt_policy_local()
2504
2505 int cacheAdapt_policy_hotest(){
2506   unsigned int page_index = 0;
2507   VA page_sva = 0;
2508   unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
2509   int numchanged = 0;
2510   int * tmp_p = gccachepolicytbl+1;
2511   for(page_index = 0; page_index < page_num; page_index++) {
2512         page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
2513         bamboo_cache_policy_t policy = {0};
2514         int hotestcore = 0;
2515         unsigned int hotfreq = 0;
2516
2517         unsigned int *local_tbl=&gccachesamplingtbl_r[page_index];
2518         for(int i = 0; i < NUMCORESACTIVE; i++) {
2519           unsigned int freq = *local_tbl;
2520           local_tbl=(unsigned int *)(
2521                   ((char *)local_tbl)+size_cachesamplingtbl_local_r);
2522
2523           // check the freqency, decide if this page is hot for the core
2524           if(hotfreq < freq) {
2525                 hotfreq = freq;
2526                 hotestcore = i;
2527           }
2528         }
2529         // TODO
2530         // Decide the cache strategy for this page
2531         // If decide to adapt a new cache strategy, write into the shared block of
2532         // the gcsharedsamplingtbl. The mem recording information that has been
2533         // written is enough to hold the information.
2534         // Format: page start va + cache strategy(hfh/(host core+[x,y]))
2535         if(hotfreq == 0) {
2536           // this page has not been accessed, do not change its cache policy
2537           continue;
2538         } else {
2539           // locally cache the page in the hotest core
2540           // NOTE: (x,y) should be changed to (x+1, y+1)!!!
2541           policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
2542           policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
2543           policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
2544           *tmp_p = page_index;
2545           tmp_p++;
2546           *tmp_p = policy.word;
2547           tmp_p++;
2548           numchanged++;
2549         }
2550   }
2551
2552   return numchanged;
2553 } // int cacheAdapt_policy_hotest()
2554
2555 #define GC_CACHE_ADAPT_DOMINATE_THRESHOLD  50
2556 // cache the page on the core that accesses it the most if that core accesses
2557 // it more than (GC_CACHE_ADAPT_DOMINATE_THRESHOLD)% of the total.  Otherwise,
2558 // h4h the page.
2559 int cacheAdapt_policy_dominate(){
2560   unsigned int page_index = 0;
2561   VA page_sva = 0;
2562   unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
2563   int numchanged = 0;
2564   int * tmp_p = gccachepolicytbl+1;
2565   for(page_index = 0; page_index < page_num; page_index++) {
2566         page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
2567         bamboo_cache_policy_t policy = {0};
2568         int hotestcore = 0;
2569         unsigned long long totalfreq = 0;
2570         unsigned int hotfreq = 0;
2571
2572         unsigned int *local_tbl=&gccachesamplingtbl_r[page_index];
2573         for(int i = 0; i < NUMCORESACTIVE; i++) {
2574           unsigned int freq = *local_tbl;
2575           local_tbl=(unsigned int *)(
2576                   ((char *)local_tbl)+size_cachesamplingtbl_local_r);
2577           totalfreq += freq;
2578           // check the freqency, decide if this page is hot for the core
2579           if(hotfreq < freq) {
2580                 hotfreq = freq;
2581                 hotestcore = i;
2582           }
2583         }
2584
2585         // Decide the cache strategy for this page
2586         // If decide to adapt a new cache strategy, write into the shared block of
2587         // the gcpolicytbl
2588         // Format: page start va + cache policy
2589         if(hotfreq == 0) {
2590           // this page has not been accessed, do not change its cache policy
2591           continue;
2592         }
2593         totalfreq =
2594           (totalfreq*GC_CACHE_ADAPT_DOMINATE_THRESHOLD)/100/BAMBOO_PAGE_SIZE;
2595         hotfreq/=BAMBOO_PAGE_SIZE;
2596         if(hotfreq < totalfreq) {
2597           // use hfh
2598           policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
2599         } else {
2600           // locally cache the page in the hotest core
2601           // NOTE: (x,y) should be changed to (x+1, y+1)!!!
2602           policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
2603           policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
2604           policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
2605         }
2606         *tmp_p = page_index;
2607         tmp_p++;
2608         *tmp_p = policy.word;
2609         tmp_p++;
2610         numchanged++;
2611   }
2612
2613   return numchanged;
2614 } // int cacheAdapt_policy_dominate()
2615
2616 #define GC_CACHE_ADAPT_OVERLOAD_THRESHOLD 10
2617
2618 void gc_quicksort(unsigned long long *array,
2619                       int left,
2620                                   int right,
2621                                   int offset) {
2622   int pivot = 0;;
2623   int leftIdx = left;
2624   int rightIdx = right;
2625   if((right-left+1) >= 1) {
2626         pivot = (left+right)/2;
2627         while((leftIdx <= pivot) && (rightIdx >= pivot)) {
2628           int pivotValue = array[pivot*3-offset];
2629           while((array[leftIdx*3-offset] > pivotValue) && (leftIdx <= pivot)) {
2630                 leftIdx++;
2631           }
2632           while((array[rightIdx*3-offset] < pivotValue) && (rightIdx >= pivot)) {
2633                 rightIdx--;
2634           }
2635           // swap [leftIdx] & [rightIdx]
2636           for(int k = 0; k < 3; k++) {
2637                 unsigned long long tmp = array[3*rightIdx-k];
2638                 array[3*rightIdx-k] = array[3*leftIdx-k];
2639                 array[3*leftIdx-k] = tmp;
2640           }
2641           leftIdx++;
2642           rightIdx--;
2643           if((leftIdx-1) == pivot) {
2644                 pivot = rightIdx = rightIdx + 1;
2645           } else if((leftIdx+1) == pivot) {
2646                 pivot = leftIdx = leftIdx-1;
2647           }
2648         }
2649         gc_quicksort(array, left, pivot-1, offset);
2650         gc_quicksort(array, pivot+1, right, offset);
2651   }
2652   return;
2653 } // void gc_quicksort(...)
2654
2655 // Every page cached on the core that accesses it the most.
2656 // Check to see if any core's pages total more accesses than threshold
2657 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD.  If so, find the pages with the
2658 // most remote accesses and hash for home them until we get below
2659 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD
2660 int cacheAdapt_policy_overload(){
2661   unsigned int page_index = 0;
2662   VA page_sva = 0;
2663   unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
2664   int numchanged = 0;
2665   int * tmp_p = gccachepolicytbl+1;
2666   unsigned long long workload[NUMCORESACTIVE];
2667   memset(workload, 0, NUMCORESACTIVE*sizeof(unsigned long long));
2668   unsigned long long total_workload = 0;
2669   unsigned long long core2heavypages[NUMCORESACTIVE][page_num*3+1];
2670   memset(core2heavypages,0,
2671           sizeof(unsigned long long)*(page_num*3+1)*NUMCORESACTIVE);
2672   for(page_index = 0; page_index < page_num; page_index++) {
2673         page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
2674         bamboo_cache_policy_t policy = {0};
2675         int hotestcore = 0;
2676         unsigned long long totalfreq = 0;
2677         unsigned int hotfreq = 0;
2678
2679         unsigned int *local_tbl=&gccachesamplingtbl_r[page_index];
2680         for(int i = 0; i < NUMCORESACTIVE; i++) {
2681           unsigned int freq = *local_tbl;
2682           local_tbl=(unsigned int *)(
2683                   ((char *)local_tbl)+size_cachesamplingtbl_local_r);
2684           totalfreq += freq;
2685           // check the freqency, decide if this page is hot for the core
2686           if(hotfreq < freq) {
2687                 hotfreq = freq;
2688                 hotestcore = i;
2689           }
2690         }
2691         // Decide the cache strategy for this page
2692         // If decide to adapt a new cache strategy, write into the shared block of
2693         // the gcsharedsamplingtbl. The mem recording information that has been
2694         // written is enough to hold the information.
2695         // Format: page start va + cache strategy(hfh/(host core+[x,y]))
2696         if(hotfreq == 0) {
2697           // this page has not been accessed, do not change its cache policy
2698           continue;
2699         }
2700
2701         totalfreq/=BAMBOO_PAGE_SIZE;
2702         hotfreq/=BAMBOO_PAGE_SIZE;
2703         // locally cache the page in the hotest core
2704         // NOTE: (x,y) should be changed to (x+1, y+1)!!!
2705         policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
2706         policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
2707         policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
2708         *tmp_p = page_index;
2709         tmp_p++;
2710         *tmp_p = policy.word;
2711         tmp_p++;
2712         numchanged++;
2713         workload[hotestcore] += totalfreq;
2714         total_workload += totalfreq;
2715         // insert into core2heavypages using quicksort
2716         unsigned long long remoteaccess = totalfreq - hotfreq;
2717         int index = (int)core2heavypages[hotestcore][0];
2718         core2heavypages[hotestcore][3*index+3] = remoteaccess;
2719         core2heavypages[hotestcore][3*index+2] = totalfreq;
2720         core2heavypages[hotestcore][3*index+1] = (unsigned long long)(tmp_p-1);
2721         core2heavypages[hotestcore][0]++;
2722   }
2723
2724   unsigned long long workload_threshold =
2725         total_workload/GC_CACHE_ADAPT_OVERLOAD_THRESHOLD;
2726   // Check the workload of each core
2727   for(int i = 0; i < NUMCORESACTIVE; i++) {
2728         int j = 1;
2729         int index = (int)core2heavypages[i][0];
2730         if(workload[i] > workload_threshold) {
2731           // sort according to the remoteaccess
2732           gc_quicksort(&core2heavypages[i][0], 1, index, 0);
2733           while((workload[i] > workload_threshold) && (j<index*3)) {
2734                 // hfh those pages with more remote accesses
2735                 bamboo_cache_policy_t policy = {0};
2736                 policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
2737                 *((int*)core2heavypages[i][j]) = policy.word;
2738                 workload[i] -= core2heavypages[i][j+1];
2739                 j += 3;
2740           }
2741         }
2742   }
2743
2744   return numchanged;
2745 } // int cacheAdapt_policy_overload()
2746
2747 #define GC_CACHE_ADAPT_ACCESS_THRESHOLD 70
2748 #define GC_CACHE_ADAPT_CROWD_THRESHOLD  20
2749 // Every page cached on the core that accesses it the most.
2750 // Check to see if any core's pages total more accesses than threshold
2751 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD.  If so, find the pages with the
2752 // most remote accesses and hash for home them until we get below
2753 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD.
2754 // Sort pages based on activity....
2755 // If more then GC_CACHE_ADAPT_ACCESS_THRESHOLD% of the accesses for a
2756 // core's pages are from more than GC_CACHE_ADAPT_CROWD_THRESHOLD pages,
2757 // then start hfh these pages(selecting the ones with the most remote
2758 // accesses first or fewest local accesses) until we get below
2759 // GC_CACHE_ADAPT_CROWD_THRESHOLD pages.
2760 int cacheAdapt_policy_crowd(){
2761   unsigned int page_index = 0;
2762   VA page_sva = 0;
2763   unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
2764   int numchanged = 0;
2765   int * tmp_p = gccachepolicytbl+1;
2766   unsigned long long workload[NUMCORESACTIVE];
2767   memset(workload, 0, NUMCORESACTIVE*sizeof(unsigned long long));
2768   unsigned long long total_workload = 0;
2769   unsigned long long core2heavypages[NUMCORESACTIVE][page_num*3+1];
2770   memset(core2heavypages,0,
2771           sizeof(unsigned long long)*(page_num*3+1)*NUMCORESACTIVE);
2772   for(page_index = 0; page_index < page_num; page_index++) {
2773         page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
2774         bamboo_cache_policy_t policy = {0};
2775         int hotestcore = 0;
2776         unsigned long long totalfreq = 0;
2777         unsigned int hotfreq = 0;
2778
2779         unsigned int *local_tbl=&gccachesamplingtbl_r[page_index];
2780         for(int i = 0; i < NUMCORESACTIVE; i++) {
2781           unsigned int freq = *local_tbl;
2782           local_tbl=(unsigned int *)(
2783                   ((char *)local_tbl)+size_cachesamplingtbl_local_r);
2784           totalfreq += freq;
2785           // check the freqency, decide if this page is hot for the core
2786           if(hotfreq < freq) {
2787                 hotfreq = freq;
2788                 hotestcore = i;
2789           }
2790         }
2791         // Decide the cache strategy for this page
2792         // If decide to adapt a new cache strategy, write into the shared block of
2793         // the gcsharedsamplingtbl. The mem recording information that has been
2794         // written is enough to hold the information.
2795         // Format: page start va + cache strategy(hfh/(host core+[x,y]))
2796         if(hotfreq == 0) {
2797           // this page has not been accessed, do not change its cache policy
2798           continue;
2799         }
2800         totalfreq/=BAMBOO_PAGE_SIZE;
2801         hotfreq/=BAMBOO_PAGE_SIZE;
2802         // locally cache the page in the hotest core
2803         // NOTE: (x,y) should be changed to (x+1, y+1)!!!
2804         policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
2805         policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
2806         policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
2807         *tmp_p = page_index;
2808         tmp_p++;
2809         *tmp_p = policy.word;
2810         tmp_p++;
2811         numchanged++;
2812         workload[hotestcore] += totalfreq;
2813         total_workload += totalfreq;
2814         // insert into core2heavypages using quicksort
2815         unsigned long long remoteaccess = totalfreq - hotfreq;
2816         int index = (int)core2heavypages[hotestcore][0];
2817         core2heavypages[hotestcore][3*index+3] = remoteaccess;
2818         core2heavypages[hotestcore][3*index+2] = totalfreq;
2819         core2heavypages[hotestcore][3*index+1] = (unsigned long long)(tmp_p-1);
2820         core2heavypages[hotestcore][0]++;
2821   }
2822
2823   unsigned long long workload_threshold =
2824         total_workload / GC_CACHE_ADAPT_OVERLOAD_THRESHOLD;
2825   // Check the workload of each core
2826   for(int i = 0; i < NUMCORESACTIVE; i++) {
2827         int j = 1;
2828         int index = (int)core2heavypages[i][0];
2829         if(workload[i] > workload_threshold) {
2830           // sort according to the remoteaccess
2831           gc_quicksort(&core2heavypages[i][0], 1, index, 0);
2832           while((workload[i] > workload_threshold) && (j<index*3)) {
2833                 // hfh those pages with more remote accesses
2834                 bamboo_cache_policy_t policy = {0};
2835                 policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
2836                 *((int*)core2heavypages[i][j]) = policy.word;
2837                 workload[i] -= core2heavypages[i][j+1];
2838                 j += 3;
2839           }
2840         }
2841
2842         // Check if the accesses are crowded on few pages
2843         // sort according to the total access
2844 inner_crowd:
2845         gc_quicksort(&core2heavypages[i][0], j/3+1, index, 1);
2846         unsigned long long threshold =
2847           GC_CACHE_ADAPT_ACCESS_THRESHOLD*workload[i]/100;
2848         int num_crowded = 0;
2849         unsigned long long t_workload = 0;
2850         do {
2851           t_workload += core2heavypages[i][j+num_crowded*3+1];
2852           num_crowded++;
2853         } while(t_workload < threshold);
2854         // num_crowded <= GC_CACHE_ADAPT_CROWD_THRESHOLD and if there are enough
2855         // items, it is always == GC_CACHE_ADAPT_CROWD_THRESHOLD
2856         if(num_crowded > GC_CACHE_ADAPT_CROWD_THRESHOLD) {
2857           // need to hfh these pages
2858           // sort the pages according to remote access
2859           gc_quicksort(&core2heavypages[i][0], j/3+1, j/3+num_crowded, 0);
2860           // h4h those pages with more remote accesses
2861           bamboo_cache_policy_t policy = {0};
2862           policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
2863           *((int*)core2heavypages[i][j]) = policy.word;
2864           workload[i] -= core2heavypages[i][j+1];
2865           t_workload -= core2heavypages[i][j+1];
2866           j += 3;
2867           threshold = GC_CACHE_ADAPT_ACCESS_THRESHOLD*workload[i]/100;
2868           goto inner_crowd;
2869         }
2870   }
2871
2872   return numchanged;
2873 } // int cacheAdapt_policy_overload()
2874
2875 void cacheAdapt_master() {
2876 #ifdef GC_CACHE_ADAPT_SAMPLING_OUTPUT
2877   gc_output_cache_sampling_r();
2878 #endif // GC_CACHE_ADAPT_SAMPLING_OUTPUT
2879   int numchanged = 0;
2880   // check the statistic data
2881   // for each page, decide the new cache strategy
2882 #ifdef GC_CACHE_ADAPT_POLICY1
2883   numchanged = cacheAdapt_policy_h4h();
2884 #elif defined GC_CACHE_ADAPT_POLICY2
2885   numchanged = cacheAdapt_policy_local();
2886 #elif defined GC_CACHE_ADAPT_POLICY3
2887   numchanged = cacheAdapt_policy_hotest();
2888 #elif defined GC_CACHE_ADAPT_POLICY4
2889   numchanged = cacheAdapt_policy_dominate();
2890 #elif defined GC_CACHE_ADAPT_POLICY5
2891   numchanged = cacheAdapt_policy_overload();
2892 #elif defined GC_CACHE_ADAPT_POLICY6
2893   numchanged = cacheAdapt_policy_crowd();
2894 #endif
2895   *gccachepolicytbl = numchanged;
2896 }
2897
2898 // adapt the cache strategy for the mutator
2899 void cacheAdapt_mutator() {
2900   int numchanged = *gccachepolicytbl;
2901   // check the changes and adapt them
2902   int * tmp_p = gccachepolicytbl+1;
2903   while(numchanged--) {
2904         // read out the policy
2905         int page_index = *tmp_p;
2906         bamboo_cache_policy_t policy = (bamboo_cache_policy_t)(*(tmp_p+1));
2907         // adapt the policy
2908         bamboo_adapt_cache_policy(page_index*(BAMBOO_PAGE_SIZE)+gcbaseva,
2909                 policy, BAMBOO_PAGE_SIZE);
2910
2911         tmp_p += 2;
2912   }
2913 }
2914
2915 void gc_output_cache_sampling() {
2916   unsigned int page_index = 0;
2917   VA page_sva = 0;
2918   unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
2919   for(page_index = 0; page_index < page_num; page_index++) {
2920         page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
2921         int block = 0;
2922         BLOCKINDEX(page_sva, &block);
2923         int coren = gc_block2core[block%(NUMCORES4GC*2)];
2924         tprintf("va: %x page_index: %d host: %d\n",
2925                 (int)page_sva, page_index, coren);
2926         for(int i = 0; i < NUMCORESACTIVE; i++) {
2927           unsigned int * local_tbl = (unsigned int *)((void *)gccachesamplingtbl
2928                   +size_cachesamplingtbl_local*i);
2929           unsigned int freq = local_tbl[page_index];
2930           printf("%8d ",freq);
2931         }
2932         printf("\n");
2933   }
2934   printf("=================\n");
2935 } // gc_output_cache_sampling
2936
2937 void gc_output_cache_sampling_r() {
2938   unsigned int page_index = 0;
2939   VA page_sva = 0;
2940   unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
2941   for(page_index = 0; page_index < page_num; page_index++) {
2942         page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
2943         int block = 0;
2944         BLOCKINDEX(page_sva, &block);
2945         int coren = gc_block2core[block%(NUMCORES4GC*2)];
2946         tprintf("va: %x page_index: %d host: %d\n",
2947                 (int)page_sva, page_index, coren);
2948         for(int i = 0; i < NUMCORESACTIVE; i++) {
2949           unsigned int * local_tbl = (unsigned int *)((void *)gccachesamplingtbl_r
2950                   +size_cachesamplingtbl_local_r*i);
2951           unsigned int freq = local_tbl[page_index]/BAMBOO_PAGE_SIZE;
2952           printf("%8d ",freq);
2953         }
2954         printf("\n");
2955   }
2956   printf("=================\n");
2957 } // gc_output_cache_sampling
2958 #endif // GC_CACHE_ADAPT
2959
2960 inline void gc_collect(struct garbagelist * stackptr) {
2961   // inform the master that this core is at a gc safe point and is ready to
2962   // do gc
2963   send_msg_4(STARTUPCORE, GCFINISHPRE, BAMBOO_NUM_OF_CORE, self_numsendobjs,
2964           self_numreceiveobjs, false);
2965
2966   // core collector routine
2967   while(true) {
2968     if(INITPHASE == gcphase) {
2969       break;
2970     }
2971   }
2972 #ifdef RAWPATH // TODO GC_DEBUG
2973   printf("(%X,%X) Do initGC\n", udn_tile_coord_x(), udn_tile_coord_y());
2974 #endif
2975   initGC();
2976 #ifdef GC_CACHE_ADAPT
2977   // prepare for cache adaption:
2978   cacheAdapt_gc(true);
2979 #endif // GC_CACHE_ADAPT
2980   //send init finish msg to core coordinator
2981   send_msg_2(STARTUPCORE, GCFINISHINIT, BAMBOO_NUM_OF_CORE, false);
2982
2983   while(true) {
2984     if(MARKPHASE == gcphase) {
2985       break;
2986     }
2987   }
2988 #ifdef RAWPATH // TODO GC_DEBUG
2989   printf("(%x,%x) Start mark phase\n", udn_tile_coord_x(),
2990              udn_tile_coord_y());
2991 #endif
2992   mark(true, stackptr);
2993 #ifdef RAWPATH // TODO GC_DEBUG
2994   printf("(%x,%x) Finish mark phase, start compact phase\n",
2995              udn_tile_coord_x(), udn_tile_coord_y());
2996 #endif
2997   compact();
2998 #ifdef RAWPATH // TODO GC_DEBUG
2999   printf("(%x,%x) Finish compact phase\n", udn_tile_coord_x(),
3000              udn_tile_coord_y());
3001 #endif
3002
3003   while(true) {
3004         if(MAPPHASE == gcphase) {
3005           break;
3006         }
3007   }
3008 #ifdef RAWPATH // TODO GC_DEBUG
3009   printf("(%x,%x) Start map phase\n", udn_tile_coord_x(),
3010              udn_tile_coord_y());
3011 #endif
3012   transmappinginfo();
3013 #ifdef RAWPATH // TODO GC_DEBUG
3014   printf("(%x,%x) Finish map phase\n", udn_tile_coord_x(),
3015              udn_tile_coord_y());
3016 #endif
3017
3018   while(true) {
3019     if(FLUSHPHASE == gcphase) {
3020       break;
3021     }
3022   }
3023 #ifdef RAWPATH // TODO GC_DEBUG
3024   printf("(%x,%x) Start flush phase\n", udn_tile_coord_x(),
3025              udn_tile_coord_y());
3026 #endif
3027 #ifdef GC_PROFILE
3028   // send the num of obj/liveobj/forwardobj to the startupcore
3029   if(STARTUPCORE != BAMBOO_NUM_OF_CORE) {
3030         send_msg_4(STARTUPCORE, GCPROFILES, gc_num_obj,
3031                 gc_num_liveobj, gc_num_forwardobj, false);
3032   }
3033   gc_num_obj = 0;
3034 #endif // GC_PROFLIE
3035   flush(stackptr);
3036 #ifdef RAWPATH // TODO GC_DEBUG
3037   printf("(%x,%x) Finish flush phase\n", udn_tile_coord_x(),
3038              udn_tile_coord_y());
3039 #endif
3040
3041 #ifdef GC_CACHE_ADAPT
3042   while(true) {
3043     if(PREFINISHPHASE == gcphase) {
3044       break;
3045     }
3046   }
3047 #ifdef RAWPATH // TODO GC_DEBUG
3048   printf("(%x,%x) Start prefinish phase\n", udn_tile_coord_x(),
3049              udn_tile_coord_y());
3050 #endif
3051   // cache adapt phase
3052   cacheAdapt_mutator();
3053   cacheAdapt_gc(false);
3054   //send init finish msg to core coordinator
3055   send_msg_2(STARTUPCORE, GCFINISHPREF, BAMBOO_NUM_OF_CORE, false);
3056 #ifdef RAWPATH // TODO GC_DEBUG
3057   printf("(%x,%x) Finish prefinish phase\n", udn_tile_coord_x(),
3058              udn_tile_coord_y());
3059 #endif
3060 #ifdef GC_CACHE_SAMPLING
3061   // reset the sampling arrays
3062   bamboo_dtlb_sampling_reset();
3063 #endif // GC_CACHE_SAMPLING
3064   if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
3065         // zero out the gccachesamplingtbl
3066         BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
3067         BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,
3068                 size_cachesamplingtbl_local_r);
3069   }
3070 #endif // GC_CACHE_ADAPT
3071
3072   // invalidate all shared mem pointers
3073   bamboo_cur_msp = NULL;
3074   bamboo_smem_size = 0;
3075   bamboo_smem_zero_top = NULL;
3076
3077   while(true) {
3078     if(FINISHPHASE == gcphase) {
3079       break;
3080     }
3081   }
3082
3083   gcflag = false;
3084   gcprocessing = false;
3085 #ifdef RAWPATH // TODO GC_DEBUG
3086   printf("(%x,%x) Finish gc! \n", udn_tile_coord_x(), udn_tile_coord_y());
3087 #endif
3088 } // void gc_collect(struct garbagelist * stackptr)
3089
3090 inline void gc_nocollect(struct garbagelist * stackptr) {
3091   // inform the master that this core is at a gc safe point and is ready to
3092   // do gc
3093   send_msg_4(STARTUPCORE, GCFINISHPRE, BAMBOO_NUM_OF_CORE, self_numsendobjs,
3094           self_numreceiveobjs, false);
3095
3096   while(true) {
3097     if(INITPHASE == gcphase) {
3098       break;
3099     }
3100   }
3101 #ifdef RAWPATH // TODO GC_DEBUG
3102   printf("(%x,%x) Do initGC\n", udn_tile_coord_x(), udn_tile_coord_y());
3103 #endif
3104   initGC();
3105 #ifdef GC_CACHE_ADAPT
3106   // prepare for cache adaption:
3107   cacheAdapt_gc(true);
3108 #endif // GC_CACHE_ADAPT
3109   //send init finish msg to core coordinator
3110   send_msg_2(STARTUPCORE, GCFINISHINIT, BAMBOO_NUM_OF_CORE, false);
3111
3112   while(true) {
3113     if(MARKPHASE == gcphase) {
3114       break;
3115     }
3116   }
3117 #ifdef RAWPATH // TODO GC_DEBUG
3118   printf("(%x,%x) Start mark phase\n", udn_tile_coord_x(),
3119              udn_tile_coord_y());
3120 #endif
3121   mark(true, stackptr);
3122 #ifdef RAWPATH // TODO GC_DEBUG
3123   printf("(%x,%x) Finish mark phase, wait for flush\n",
3124              udn_tile_coord_x(), udn_tile_coord_y());
3125 #endif
3126
3127   // non-gc core collector routine
3128   while(true) {
3129     if(FLUSHPHASE == gcphase) {
3130       break;
3131     }
3132   }
3133 #ifdef RAWPATH // TODO GC_DEBUG
3134   printf("(%x,%x) Start flush phase\n", udn_tile_coord_x(),
3135              udn_tile_coord_y());
3136 #endif
3137 #ifdef GC_PROFILE
3138   if(STARTUPCORE != BAMBOO_NUM_OF_CORE) {
3139         send_msg_4(STARTUPCORE, GCPROFILES, gc_num_obj,
3140                 gc_num_liveobj, gc_num_forwardobj, false);
3141   }
3142   gc_num_obj = 0;
3143 #endif // GC_PROFLIE
3144   flush(stackptr);
3145 #ifdef RAWPATH // TODO GC_DEBUG
3146   printf("(%x,%x) Finish flush phase\n", udn_tile_coord_x(),
3147              udn_tile_coord_y());
3148 #endif
3149
3150 #ifdef GC_CACHE_ADAPT
3151   while(true) {
3152     if(PREFINISHPHASE == gcphase) {
3153       break;
3154     }
3155   }
3156 #ifdef RAWPATH // TODO GC_DEBUG
3157   printf("(%x,%x) Start prefinish phase\n", udn_tile_coord_x(),
3158              udn_tile_coord_y());
3159 #endif
3160   // cache adapt phase
3161   cacheAdapt_mutator();
3162   cacheAdapt_gc(false);
3163   //send init finish msg to core coordinator
3164   send_msg_2(STARTUPCORE, GCFINISHPREF, BAMBOO_NUM_OF_CORE, false);
3165 #ifdef RAWPATH // TODO GC_DEBUG
3166   printf("(%x,%x) Finish prefinish phase\n", udn_tile_coord_x(),
3167              udn_tile_coord_y());
3168 #endif
3169 #ifdef GC_CACHE_SAMPLING
3170   // reset the sampling arrays
3171   bamboo_dtlb_sampling_reset();
3172 #endif // GC_CACHE_SAMPLING
3173   if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
3174         // zero out the gccachesamplingtbl
3175         BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
3176         BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,
3177                 size_cachesamplingtbl_local_r);
3178   }
3179 #endif // GC_CACHE_ADAPT
3180
3181   // invalidate all shared mem pointers
3182   bamboo_cur_msp = NULL;
3183   bamboo_smem_size = 0;
3184   bamboo_smem_zero_top = NULL;
3185
3186   while(true) {
3187     if(FINISHPHASE == gcphase) {
3188       break;
3189     }
3190   }
3191   gcflag = false;
3192   gcprocessing = false;
3193 #ifdef RAWPATH // TODO GC_DEBUG
3194   printf("(%x,%x) Finish gc! \n", udn_tile_coord_x(), udn_tile_coord_y());
3195 #endif
3196 } // void gc_collect(struct garbagelist * stackptr)
3197
3198 inline void gc_master(struct garbagelist * stackptr) {
3199
3200   gcphase = INITPHASE;
3201   int i = 0;
3202   waitconfirm = false;
3203   numconfirm = 0;
3204   initGC();
3205
3206   // Note: all cores need to init gc including non-gc cores
3207   for(i = 1; i < NUMCORESACTIVE /*NUMCORES4GC*/; i++) {
3208         // send GC init messages to all cores
3209         send_msg_1(i, GCSTARTINIT, false);
3210   }
3211   bool isfirst = true;
3212   bool allStall = false;
3213
3214 #ifdef GC_CACHE_ADAPT
3215   // prepare for cache adaption:
3216   cacheAdapt_gc(true);
3217 #endif // GC_CACHE_ADAPT
3218
3219 #ifdef RAWPATH // TODO GC_DEBUG
3220   printf("(%x,%x) Check core status \n", udn_tile_coord_x(),
3221                  udn_tile_coord_y());
3222 #endif
3223
3224   gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
3225   while(true) {
3226         BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
3227         if(gc_checkAllCoreStatus_I()) {
3228           BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3229           break;
3230         }
3231         BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3232   }
3233 #ifdef GC_PROFILE
3234   gc_profileItem();
3235 #endif
3236 #ifdef GC_CACHE_ADAPT_POLICY_OUTPUT
3237   gc_output_cache_sampling();
3238 #endif // GC_CACHE_ADAPT
3239 #ifdef RAWPATH // TODO GC_DEBUG
3240   printf("(%x,%x) Start mark phase \n", udn_tile_coord_x(),
3241                  udn_tile_coord_y());
3242 #endif
3243   // all cores have finished compacting
3244   // restore the gcstatus of all cores
3245   // Note: all cores have to do mark including non-gc cores
3246   gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
3247   for(i = 1; i < NUMCORESACTIVE; ++i) {
3248         gccorestatus[i] = 1;
3249         // send GC start messages to all cores
3250         send_msg_1(i, GCSTART, false);
3251   }
3252
3253   gcphase = MARKPHASE;
3254   // mark phase
3255   while(MARKPHASE == gcphase) {
3256         mark(isfirst, stackptr);
3257         if(isfirst) {
3258           isfirst = false;
3259         }
3260
3261         // check gcstatus
3262         checkMarkStatue();
3263   }   // while(MARKPHASE == gcphase)
3264   // send msgs to all cores requiring large objs info
3265   // Note: only need to ask gc cores, non-gc cores do not host any objs
3266   numconfirm = NUMCORES4GC - 1;
3267   for(i = 1; i < NUMCORES4GC; ++i) {
3268         send_msg_1(i, GCLOBJREQUEST, false);
3269   }
3270   gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
3271   while(true) {
3272         if(numconfirm==0) {
3273           break;
3274         }
3275   }   // wait for responses
3276   // check the heaptop
3277   if(gcheaptop < gcmarkedptrbound) {
3278         gcheaptop = gcmarkedptrbound;
3279   }
3280 #ifdef GC_PROFILE
3281   gc_profileItem();
3282 #endif
3283 #ifdef RAWPATH // TODO GC_DEBUG
3284   printf("(%x,%x) prepare to cache large objs \n", udn_tile_coord_x(),
3285                  udn_tile_coord_y());
3286 #endif
3287   // cache all large objs
3288   if(!cacheLObjs()) {
3289         // no enough space to cache large objs
3290         BAMBOO_EXIT(0xb006);
3291   }
3292   // predict number of blocks to fill for each core
3293   int tmpheaptop = 0;
3294   int numpbc = loadbalance(&tmpheaptop);
3295   // TODO
3296   numpbc = (BAMBOO_SHARED_MEM_SIZE)/(BAMBOO_SMEM_SIZE);
3297 #ifdef RAWPATH // TODO GC_DEBUG
3298   printf("(%x,%x) mark phase finished \n", udn_tile_coord_x(),
3299                  udn_tile_coord_y());
3300 #endif
3301   //int tmptopptr = 0;
3302   //BASEPTR(gctopcore, 0, &tmptopptr);
3303   // TODO
3304   //tmptopptr = gcbaseva + (BAMBOO_SHARED_MEM_SIZE);
3305   tmpheaptop = gcbaseva + (BAMBOO_SHARED_MEM_SIZE);
3306   GC_BAMBOO_DEBUGPRINT(0xabab);
3307   GC_BAMBOO_DEBUGPRINT_REG(tmptopptr);
3308   for(i = 0; i < NUMCORES4GC; ++i) {
3309         int tmpcoreptr = 0;
3310         BASEPTR(i, numpbc, &tmpcoreptr);
3311         //send start compact messages to all cores
3312         //TODO bug here, do not know if the direction is positive or negtive?
3313         if (tmpcoreptr < tmpheaptop) {
3314           gcstopblock[i] = numpbc + 1;
3315           if(i != STARTUPCORE) {
3316                 send_msg_2(i, GCSTARTCOMPACT, numpbc+1, false);
3317           } else {
3318                 gcblock2fill = numpbc+1;
3319           }   // if(i != STARTUPCORE)
3320         } else {
3321           gcstopblock[i] = numpbc;
3322           if(i != STARTUPCORE) {
3323                 send_msg_2(i, GCSTARTCOMPACT, numpbc, false);
3324           } else {
3325                 gcblock2fill = numpbc;
3326           }  // if(i != STARTUPCORE)
3327         }
3328         GC_BAMBOO_DEBUGPRINT(0xf000+i);
3329         GC_BAMBOO_DEBUGPRINT_REG(tmpcoreptr);
3330         GC_BAMBOO_DEBUGPRINT_REG(gcstopblock[i]);
3331         // init some data strutures for compact phase
3332         gcloads[i] = 0;
3333         gcfilledblocks[i] = 0;
3334         gcrequiredmems[i] = 0;
3335   }
3336
3337   BAMBOO_CACHE_MF();
3338
3339 #ifdef GC_PROFILE
3340   gc_profileItem();
3341 #endif
3342
3343   // compact phase
3344   bool finalcompact = false;
3345   // initialize pointers for comapcting
3346   struct moveHelper * orig =
3347         (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
3348   struct moveHelper * to =
3349         (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
3350   initOrig_Dst(orig, to);
3351   int filledblocks = 0;
3352   INTPTR heaptopptr = 0;
3353   bool finishcompact = false;
3354   bool iscontinue = true;
3355   bool localcompact = true;
3356   while((COMPACTPHASE == gcphase) || (SUBTLECOMPACTPHASE == gcphase)) {
3357         if((!finishcompact) && iscontinue) {
3358           GC_BAMBOO_DEBUGPRINT(0xe001);
3359           GC_BAMBOO_DEBUGPRINT_REG(numpbc);
3360           GC_BAMBOO_DEBUGPRINT_REG(gcblock2fill);
3361           finishcompact = compacthelper(orig, to, &filledblocks,
3362                                                                         &heaptopptr, &localcompact);
3363           GC_BAMBOO_DEBUGPRINT(0xe002);
3364           GC_BAMBOO_DEBUGPRINT_REG(finishcompact);
3365           GC_BAMBOO_DEBUGPRINT_REG(gctomove);
3366           GC_BAMBOO_DEBUGPRINT_REG(gcrequiredmems[0]);
3367           GC_BAMBOO_DEBUGPRINT_REG(gcfilledblocks[0]);
3368           GC_BAMBOO_DEBUGPRINT_REG(gcstopblock[0]);
3369         }
3370
3371         BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
3372         if(gc_checkCoreStatus_I()) {
3373           // all cores have finished compacting
3374           // restore the gcstatus of all cores
3375           for(i = 0; i < NUMCORES4GC; ++i) {
3376                 gccorestatus[i] = 1;
3377           }
3378           BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3379           break;
3380         } else {
3381           BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3382           // check if there are spare mem for pending move requires
3383           if(COMPACTPHASE == gcphase) {
3384                 GC_BAMBOO_DEBUGPRINT(0xe003);
3385                 resolvePendingMoveRequest();
3386                 GC_BAMBOO_DEBUGPRINT_REG(gctomove);
3387           } else {
3388                 GC_BAMBOO_DEBUGPRINT(0xe004);
3389                 compact2Heaptop();
3390           }
3391         }   // if(gc_checkCoreStatus_I()) else ...
3392
3393         if(gctomove) {
3394           GC_BAMBOO_DEBUGPRINT(0xe005);
3395           GC_BAMBOO_DEBUGPRINT_REG(gcmovestartaddr);
3396           GC_BAMBOO_DEBUGPRINT_REG(gcblock2fill);
3397           GC_BAMBOO_DEBUGPRINT_REG(gctomove);
3398           to->ptr = gcmovestartaddr;
3399           to->numblocks = gcblock2fill - 1;
3400           to->bound = (to->numblocks==0) ?
3401                                   BAMBOO_SMEM_SIZE_L :
3402                                   BAMBOO_SMEM_SIZE_L+BAMBOO_SMEM_SIZE*to->numblocks;
3403           BASEPTR(gcdstcore, to->numblocks, &(to->base));
3404           to->offset = to->ptr - to->base;
3405           to->top = (to->numblocks==0) ?
3406                                 (to->offset) : (to->bound-BAMBOO_SMEM_SIZE+to->offset);
3407           to->base = to->ptr;
3408           to->offset = BAMBOO_CACHE_LINE_SIZE;
3409           to->ptr += to->offset;                         // for header
3410           to->top += to->offset;
3411           if(gcdstcore == BAMBOO_NUM_OF_CORE) {
3412                 localcompact = true;
3413           } else {
3414                 localcompact = false;
3415           }
3416           gctomove = false;
3417           iscontinue = true;
3418         } else if(!finishcompact) {
3419           // still pending
3420           iscontinue = false;
3421         }  // if(gctomove)
3422   }  // while(COMPACTPHASE == gcphase)
3423 #ifdef GC_PROFILE
3424   gc_profileItem();
3425 #endif
3426 #ifdef RAWPATH // TODO GC_DEBUG
3427   printf("(%x,%x) prepare to move large objs \n", udn_tile_coord_x(),
3428                  udn_tile_coord_y());
3429 #endif
3430   // move largeObjs
3431   moveLObjs();
3432 #ifdef RAWPATH // TODO GC_DEBUG
3433   printf("(%x,%x) compact phase finished \n", udn_tile_coord_x(),
3434                  udn_tile_coord_y());
3435 #endif
3436   RUNFREE(orig);
3437   RUNFREE(to);
3438   orig = to = NULL;
3439
3440   gcphase = MAPPHASE;
3441   gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
3442   // Note: all cores should flush their runtime data including non-gc
3443   //       cores
3444   for(i = 1; i < NUMCORES4GC; ++i) {
3445         // send start flush messages to all cores
3446         gccorestatus[i] = 1;
3447         send_msg_1(i, GCSTARTMAPINFO, false);
3448   }
3449 #ifdef GC_PROFILE
3450   gc_profileItem();
3451 #endif
3452 #ifdef RAWPATH // TODO GC_DEBUG
3453   printf("(%x,%x) Start map phase \n", udn_tile_coord_x(),
3454                  udn_tile_coord_y());
3455 #endif
3456   // mapinto phase
3457   transmappinginfo();
3458 #ifdef RAWPATH // TODO GC_DEBUG
3459   printf("(%x,%x) Finish map phase \n", udn_tile_coord_x(),
3460                  udn_tile_coord_y());
3461 #endif
3462   gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
3463   while(MAPPHASE == gcphase) {
3464         // check the status of all cores
3465         BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
3466         if(gc_checkCoreStatus_I()) {
3467           // all cores have finished sending mapping info
3468           BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3469           break;
3470         }
3471         BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3472   }  // while(MAPPHASE == gcphase)
3473
3474   gcphase = FLUSHPHASE;
3475   gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
3476   // Note: all cores should flush their runtime data including non-gc
3477   //       cores
3478   for(i = 1; i < NUMCORESACTIVE; ++i) {
3479         // send start flush messages to all cores
3480         gccorestatus[i] = 1;
3481         send_msg_1(i, GCSTARTFLUSH, false);
3482   }
3483 #ifdef GC_PROFILE
3484   gc_profileItem();
3485 #endif
3486 #ifdef RAWPATH // TODO GC_DEBUG
3487   printf("(%x,%x) Start flush phase \n", udn_tile_coord_x(),
3488                  udn_tile_coord_y());
3489 #endif
3490   // flush phase
3491   flush(stackptr);
3492
3493 #ifdef GC_CACHE_ADAPT
3494   // now the master core need to decide the new cache strategy
3495   cacheAdapt_master();
3496 #endif // GC_CACHE_ADAPT
3497
3498   gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
3499   while(FLUSHPHASE == gcphase) {
3500         // check the status of all cores
3501         BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
3502         if(gc_checkAllCoreStatus_I()) {
3503           BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3504           break;
3505         }
3506         BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3507   }  // while(FLUSHPHASE == gcphase)
3508 #ifdef RAWPATH // TODO GC_DEBUG
3509   printf("(%x,%x) Finish flush phase \n", udn_tile_coord_x(),
3510                  udn_tile_coord_y());
3511 #endif
3512
3513 #ifdef GC_CACHE_ADAPT
3514 #ifdef GC_PROFILE
3515   gc_profileItem();
3516 #endif
3517   gcphase = PREFINISHPHASE;
3518   gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
3519   // Note: all cores should flush their runtime data including non-gc
3520   //       cores
3521   for(i = 1; i < NUMCORESACTIVE; ++i) {
3522         // send start flush messages to all cores
3523         gccorestatus[i] = 1;
3524         send_msg_1(i, GCSTARTPREF, false);
3525   }
3526 #ifdef RAWPATH // TODO GC_DEBUG
3527   printf("(%x,%x) Start prefinish phase \n", udn_tile_coord_x(),
3528                  udn_tile_coord_y());
3529 #endif
3530   // cache adapt phase
3531   cacheAdapt_mutator();
3532 #ifdef GC_CACHE_ADAPT_OUTPUT
3533   bamboo_output_cache_policy();
3534 #endif
3535   cacheAdapt_gc(false);
3536
3537   gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
3538   while(PREFINISHPHASE == gcphase) {
3539         // check the status of all cores
3540         BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
3541         if(gc_checkAllCoreStatus_I()) {
3542           BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3543           break;
3544         }
3545         BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3546   }  // while(PREFINISHPHASE == gcphase)
3547
3548 #ifdef GC_CACHE_SAMPLING
3549   // reset the sampling arrays
3550   bamboo_dtlb_sampling_reset();
3551 #endif // GC_CACHE_SAMPLING
3552   if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
3553         // zero out the gccachesamplingtbl
3554         BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
3555         BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,
3556                 size_cachesamplingtbl_local_r);
3557         BAMBOO_MEMSET_WH(gccachepolicytbl,0,size_cachepolicytbl);
3558   }
3559 #endif // GC_CACHE_ADAPT
3560
3561   gcphase = FINISHPHASE;
3562
3563   // invalidate all shared mem pointers
3564   // put it here as it takes time to inform all the other cores to
3565   // finish gc and it might cause problem when some core resumes
3566   // mutator earlier than the other cores
3567   bamboo_cur_msp = NULL;
3568   bamboo_smem_size = 0;
3569   bamboo_smem_zero_top = NULL;
3570
3571 #ifdef GC_PROFILE
3572   gc_profileEnd();
3573 #endif
3574   gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
3575   for(i = 1; i < NUMCORESACTIVE; ++i) {
3576         // send gc finish messages to all cores
3577         send_msg_1(i, GCFINISH, false);
3578         gccorestatus[i] = 1;
3579   }
3580
3581   gcflag = false;
3582   gcprocessing = false;
3583 #ifdef RAWPATH // TODO GC_DEBUG
3584   printf("(%x,%x) gc finished   \n", udn_tile_coord_x(),
3585                  udn_tile_coord_y());
3586 #endif
3587 } // void gc_master(struct garbagelist * stackptr)
3588
3589 inline bool gc(struct garbagelist * stackptr) {
3590   // check if do gc
3591   if(!gcflag) {
3592     gcprocessing = false;
3593     return false;
3594   }
3595
3596 #ifdef GC_CACHE_ADAPT
3597 #ifdef GC_CACHE_SAMPLING
3598     // disable the timer interrupt
3599     bamboo_mask_timer_intr();
3600 #endif
3601 #endif
3602   // core coordinator routine
3603   if(0 == BAMBOO_NUM_OF_CORE) {
3604 #ifdef GC_DEBUG
3605     printf("(%x,%X) Check if can do gc or not\n", udn_tile_coord_x(),
3606                    udn_tile_coord_y());
3607 #endif
3608         bool isallstall = true;
3609         gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
3610         BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
3611         int ti = 0;
3612         for(ti = 0; ti < NUMCORESACTIVE; ++ti) {
3613           if(gccorestatus[ti] != 0) {
3614                 isallstall = false;
3615                 break;
3616           }
3617         }
3618         if(!isallstall) {
3619           BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3620           // some of the cores are still executing the mutator and did not reach
3621           // some gc safe point, therefore it is not ready to do gc
3622           // in case that there are some pregc information msg lost, send a confirm
3623           // msg to the 'busy' core
3624           send_msg_1(ti, GCSTARTPRE, false);
3625           gcflag = true;
3626           return false;
3627         } else {
3628 #ifdef GC_PROFILE
3629     gc_profileStart();
3630 #endif
3631 pregccheck:
3632           gcnumsendobjs[0][BAMBOO_NUM_OF_CORE] = self_numsendobjs;
3633           gcnumreceiveobjs[0][BAMBOO_NUM_OF_CORE] = self_numreceiveobjs;
3634           int sumsendobj = 0;
3635           GC_BAMBOO_DEBUGPRINT(0xec04);
3636           for(int i = 0; i < NUMCORESACTIVE; ++i) {
3637                 sumsendobj += gcnumsendobjs[0][i];
3638                 GC_BAMBOO_DEBUGPRINT(0xf000 + gcnumsendobjs[0][i]);
3639           }  // for(i = 1; i < NUMCORESACTIVE; ++i)
3640           GC_BAMBOO_DEBUGPRINT(0xec05);
3641           GC_BAMBOO_DEBUGPRINT_REG(sumsendobj);
3642           for(int i = 0; i < NUMCORESACTIVE; ++i) {
3643                 sumsendobj -= gcnumreceiveobjs[0][i];
3644                 GC_BAMBOO_DEBUGPRINT(0xf000 + gcnumreceiveobjs[i]);
3645           }  // for(i = 1; i < NUMCORESACTIVE; ++i)
3646           GC_BAMBOO_DEBUGPRINT(0xec06);
3647           GC_BAMBOO_DEBUGPRINT_REG(sumsendobj);
3648           if(0 != sumsendobj) {
3649                 // there were still some msgs on the fly, wait until there
3650                 // are some update pregc information coming and check it again
3651                 gcprecheck = false;
3652                 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3653                 while(true) {
3654                   if(gcprecheck) {
3655                         break;
3656                   }
3657                 }
3658                 goto pregccheck;
3659           } else {
3660                 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3661           }
3662         }
3663 #ifdef RAWPATH // TODO GC_DEBUG
3664     printf("(%x,%x) start gc! \n", udn_tile_coord_x(), udn_tile_coord_y());
3665 #endif
3666         // Zero out the remaining bamboo_cur_msp
3667         // Only zero out the first 4 bytes of the remaining memory
3668         // Move the operation here because for the GC_CACHE_ADAPT version,
3669         // we need to make sure during the gcinit phase the shared heap is not
3670         // touched. Otherwise, there would be problem when adapt the cache
3671         // strategy.
3672         if((bamboo_cur_msp != 0)
3673                 && (bamboo_smem_zero_top == bamboo_cur_msp)
3674                 && (bamboo_smem_size > 0)) {
3675           *((int *)bamboo_cur_msp) = 0;
3676         }
3677 #ifdef GC_FLUSH_DTLB
3678         if(gc_num_flush_dtlb < GC_NUM_FLUSH_DTLB) {
3679           BAMBOO_CLEAN_DTLB();
3680           gc_num_flush_dtlb++;
3681         }
3682 #endif
3683 #ifdef GC_CACHE_ADAPT
3684 #ifdef GC_CACHE_SAMPLING
3685     // get the sampling data
3686     bamboo_output_dtlb_sampling();
3687 #endif // GC_CACHE_SAMPLING
3688 #endif // GC_CACHE_ADAPT
3689         gcprocessing = true;
3690         gc_master(stackptr);
3691   } else if(BAMBOO_NUM_OF_CORE < NUMCORES4GC) {
3692         // Zero out the remaining bamboo_cur_msp
3693         // Only zero out the first 4 bytes of the remaining memory
3694         // Move the operation here because for the GC_CACHE_ADAPT version,
3695         // we need to make sure during the gcinit phase the shared heap is not
3696         // touched. Otherwise, there would be problem when adapt the cache
3697         // strategy.
3698         if((bamboo_cur_msp != 0)
3699                 && (bamboo_smem_zero_top == bamboo_cur_msp)
3700                 && (bamboo_smem_size > 0)) {
3701           *((int *)bamboo_cur_msp) = 0;
3702         }
3703 #ifdef GC_FLUSH_DTLB
3704         if(gc_num_flush_dtlb < GC_NUM_FLUSH_DTLB) {
3705           BAMBOO_CLEAN_DTLB();
3706           gc_num_flush_dtlb++;
3707         }
3708 #endif
3709 #ifdef GC_CACHE_ADAPT
3710 #ifdef GC_CACHE_SAMPLING
3711         if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
3712           // get the sampling data
3713           bamboo_output_dtlb_sampling();
3714         }
3715 #endif // GC_CACHE_SAMPLING
3716 #endif // GC_CACHE_ADAPT
3717     gcprocessing = true;
3718     gc_collect(stackptr);
3719   } else {
3720         // Zero out the remaining bamboo_cur_msp
3721         // Only zero out the first 4 bytes of the remaining memory
3722         // Move the operation here because for the GC_CACHE_ADAPT version,
3723         // we need to make sure during the gcinit phase the shared heap is not
3724         // touched. Otherwise, there would be problem when adapt the cache
3725         // strategy.
3726         if((bamboo_cur_msp != 0)
3727                 && (bamboo_smem_zero_top == bamboo_cur_msp)
3728                 && (bamboo_smem_size > 0)) {
3729           *((int *)bamboo_cur_msp) = 0;
3730         }
3731 #ifdef GC_FLUSH_DTLB
3732         if(gc_num_flush_dtlb < GC_NUM_FLUSH_DTLB) {
3733           BAMBOO_CLEAN_DTLB();
3734           gc_num_flush_dtlb++;
3735         }
3736 #endif
3737 #ifdef GC_CACHE_ADAPT
3738 #ifdef GC_CACHE_SAMPLING
3739         if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
3740           // get the sampling data
3741           bamboo_output_dtlb_sampling();
3742         }
3743 #endif // GC_CACHE_SAMPLING
3744 #endif // GC_CACHE_ADAPT
3745     // not a gc core, should wait for gcfinish msg
3746     gcprocessing = true;
3747     gc_nocollect(stackptr);
3748   }
3749 #ifdef GC_CACHE_ADAPT
3750 #ifdef GC_CACHE_SAMPLING
3751   // enable the timer interrupt
3752   bamboo_tile_timer_set_next_event(GC_TILE_TIMER_EVENT_SETTING);
3753   bamboo_unmask_timer_intr();
3754 #endif // GC_CACHE_SAMPLING
3755 #endif // GC_CACHE_ADAPT
3756
3757   return true;
3758 } // void gc(struct garbagelist * stackptr)
3759
3760 #ifdef GC_PROFILE
3761 inline void gc_profileStart(void) {
3762   if(!gc_infoOverflow) {
3763     GCInfo* gcInfo = RUNMALLOC(sizeof(struct gc_info));
3764     gc_infoArray[gc_infoIndex] = gcInfo;
3765     gcInfo->index = 1;
3766     gcInfo->time[0] = BAMBOO_GET_EXE_TIME();
3767   }
3768 }
3769
3770 inline void gc_profileItem(void) {
3771   if(!gc_infoOverflow) {
3772     GCInfo* gcInfo = gc_infoArray[gc_infoIndex];
3773     gcInfo->time[gcInfo->index++] = BAMBOO_GET_EXE_TIME();
3774   }
3775 }
3776
3777 inline void gc_profileEnd(void) {
3778   if(!gc_infoOverflow) {
3779     GCInfo* gcInfo = gc_infoArray[gc_infoIndex];
3780     gcInfo->time[gcInfo->index++] = BAMBOO_GET_EXE_TIME();
3781         gcInfo->time[gcInfo->index++] = gc_num_livespace;
3782         gcInfo->time[gcInfo->index++] = gc_num_freespace;
3783         gcInfo->time[gcInfo->index++] = gc_num_lobj;
3784         gcInfo->time[gcInfo->index++] = gc_num_lobjspace;
3785         gcInfo->time[gcInfo->index++] = gc_num_obj;
3786         gcInfo->time[gcInfo->index++] = gc_num_liveobj;
3787         gcInfo->time[gcInfo->index++] = gc_num_forwardobj;
3788     gc_infoIndex++;
3789     if(gc_infoIndex == GCINFOLENGTH) {
3790       gc_infoOverflow = true;
3791       //taskInfoIndex = 0;
3792     }
3793   }
3794 }
3795
3796 // output the profiling data
3797 void gc_outputProfileData() {
3798   int i = 0;
3799   int j = 0;
3800   unsigned long long totalgc = 0;
3801
3802 #ifndef BAMBOO_MEMPROF
3803   BAMBOO_PRINT(0xdddd);
3804 #endif
3805   // output task related info
3806   for(i= 0; i < gc_infoIndex; i++) {
3807     GCInfo * gcInfo = gc_infoArray[i];
3808 #ifdef BAMBOO_MEMPROF
3809     unsigned long long tmp=gcInfo->time[gcInfo->index-8]-gcInfo->time[0]; //0;
3810 #else
3811         unsigned long long tmp = 0;
3812     BAMBOO_PRINT(0xddda);
3813     for(j = 0; j < gcInfo->index - 7; j++) {
3814       BAMBOO_PRINT(gcInfo->time[j]);
3815       BAMBOO_PRINT(gcInfo->time[j]-tmp);
3816       BAMBOO_PRINT(0xdddb);
3817       tmp = gcInfo->time[j];
3818     }
3819     tmp = (tmp-gcInfo->time[0]);
3820     BAMBOO_PRINT_REG(tmp);
3821         BAMBOO_PRINT(0xdddc);
3822         BAMBOO_PRINT(gcInfo->time[gcInfo->index - 7]);
3823         BAMBOO_PRINT(gcInfo->time[gcInfo->index - 6]);
3824         BAMBOO_PRINT(gcInfo->time[gcInfo->index - 5]);
3825         BAMBOO_PRINT(gcInfo->time[gcInfo->index - 4]);
3826         BAMBOO_PRINT(gcInfo->time[gcInfo->index - 3]);
3827         BAMBOO_PRINT(gcInfo->time[gcInfo->index - 2]);
3828         BAMBOO_PRINT(gcInfo->time[gcInfo->index - 1]);
3829     BAMBOO_PRINT(0xddde);
3830 #endif
3831     totalgc += tmp;
3832   }
3833 #ifndef BAMBOO_MEMPROF
3834   BAMBOO_PRINT(0xdddf);
3835 #endif
3836   BAMBOO_PRINT_REG(totalgc);
3837
3838   if(gc_infoOverflow) {
3839     BAMBOO_PRINT(0xefee);
3840   }
3841
3842 #ifndef BAMBOO_MEMPROF
3843   BAMBOO_PRINT(0xeeee);
3844 #endif
3845 }
3846 #endif  // #ifdef GC_PROFILE
3847
3848 #endif