Robust/src/Runtime/bamboo/multicoregarbage.c

   1 // BAMBOO_EXIT(0xb000);
   2 // TODO: DO NOT support tag!!!
   3 #ifdef MULTICORE_GC
   4 #include "runtime.h"
   5 #include "multicoregarbage.h"
   6 #include "multicoreruntime.h"
   7 #include "runtime_arch.h"
   8 #include "SimpleHash.h"
   9 #include "GenericHashtable.h"
  10 #include "ObjectHash.h"
  11 #include "GCSharedHash.h"
  12
  13 extern int corenum;
  14 #ifdef TASK
  15 extern struct parameterwrapper ** objectqueues[][NUMCLASSES];
  16 extern int numqueues[][NUMCLASSES];
  17 extern struct genhashtable * activetasks;
  18 extern struct parameterwrapper ** objectqueues[][NUMCLASSES];
  19 extern struct taskparamdescriptor *currtpd;
  20 extern struct LockValue runtime_locks[MAXTASKPARAMS];
  21 extern int runtime_locklen;
  22 #endif
  23
  24 extern struct global_defs_t * global_defs_p;
  25
  26 #ifdef SMEMM
  27 extern unsigned int gcmem_mixed_threshold;
  28 extern unsigned int gcmem_mixed_usedmem;
  29 #endif
  30
  31 #ifdef MGC
  32 extern struct lockvector bamboo_threadlocks;
  33 #endif
  34
  35 struct pointerblock {
  36   void * ptrs[NUMPTRS];
  37   struct pointerblock *next;
  38 };
  39
  40 struct pointerblock *gchead=NULL;
  41 int gcheadindex=0;
  42 struct pointerblock *gctail=NULL;
  43 int gctailindex=0;
  44 struct pointerblock *gctail2=NULL;
  45 int gctailindex2=0;
  46 struct pointerblock *gcspare=NULL;
  47
  48 #define NUMLOBJPTRS 20
  49
  50 struct lobjpointerblock {
  51   void * lobjs[NUMLOBJPTRS];
  52   int lengths[NUMLOBJPTRS];
  53   int hosts[NUMLOBJPTRS];
  54   struct lobjpointerblock *next;
  55   struct lobjpointerblock *prev;
  56 };
  57
  58 struct lobjpointerblock *gclobjhead=NULL;
  59 int gclobjheadindex=0;
  60 struct lobjpointerblock *gclobjtail=NULL;
  61 int gclobjtailindex=0;
  62 struct lobjpointerblock *gclobjtail2=NULL;
  63 int gclobjtailindex2=0;
  64 struct lobjpointerblock *gclobjspare=NULL;
  65
  66 #ifdef GC_CACHE_ADAPT
  67 typedef struct gc_cache_revise_info {
  68   unsigned int orig_page_start_va;
  69   unsigned int orig_page_end_va;
  70   unsigned int orig_page_index;
  71   unsigned int to_page_start_va;
  72   unsigned int to_page_end_va;
  73   unsigned int to_page_index;
  74   unsigned int revised_sampling[NUMCORESACTIVE];
  75 } gc_cache_revise_info_t;
  76 gc_cache_revise_info_t gc_cache_revise_infomation;
  77 #endif// GC_CACHE_ADAPT
  78
  79 #ifdef GC_DEBUG
  80 // dump whole mem in blocks
  81 inline void dumpSMem() {
  82   int block = 0;
  83   int sblock = 0;
  84   unsigned int j = 0;
  85   unsigned int i = 0;
  86   int coren = 0;
  87   int x = 0;
  88   int y = 0;
  89   printf("(%x,%x) Dump shared mem: \n", udn_tile_coord_x(),
  90              udn_tile_coord_y());
  91   // reserved blocks for sblocktbl
  92   printf("(%x,%x) ++++ reserved sblocks ++++ \n", udn_tile_coord_x(),
  93              udn_tile_coord_y());
  94   for(i=BAMBOO_BASE_VA; i<gcbaseva; i+= 4*16) {
  95     printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
  96                    udn_tile_coord_x(), udn_tile_coord_y(),
  97            *((int *)(i)), *((int *)(i + 4)),
  98            *((int *)(i + 4*2)), *((int *)(i + 4*3)),
  99            *((int *)(i + 4*4)), *((int *)(i + 4*5)),
 100            *((int *)(i + 4*6)), *((int *)(i + 4*7)),
 101            *((int *)(i + 4*8)), *((int *)(i + 4*9)),
 102            *((int *)(i + 4*10)), *((int *)(i + 4*11)),
 103            *((int *)(i + 4*12)), *((int *)(i + 4*13)),
 104            *((int *)(i + 4*14)), *((int *)(i + 4*15)));
 105   }
 106   sblock = gcreservedsb;
 107   bool advanceblock = false;
 108   // remaining memory
 109   for(i=gcbaseva; i<gcbaseva+BAMBOO_SHARED_MEM_SIZE; i+=4*16) {
 110     advanceblock = false;
 111     // computing sblock # and block #, core coordinate (x,y) also
 112     if(j%((BAMBOO_SMEM_SIZE)/(4*16)) == 0) {
 113       // finished a sblock
 114       if(j < ((BAMBOO_LARGE_SMEM_BOUND)/(4*16))) {
 115                 if((j > 0) && (j%((BAMBOO_SMEM_SIZE_L)/(4*16)) == 0)) {
 116                   // finished a block
 117                   block++;
 118                   advanceblock = true;
 119                 }
 120       } else {
 121                 // finished a block
 122                 block++;
 123                 advanceblock = true;
 124       }
 125       // compute core #
 126       if(advanceblock) {
 127                 coren = gc_block2core[block%(NUMCORES4GC*2)];
 128       }
 129       // compute core coordinate
 130       BAMBOO_COORDS(coren, &x, &y);
 131       printf("(%x,%x) ==== %d, %d : core (%d,%d), saddr %x====\n",
 132                      udn_tile_coord_x(), udn_tile_coord_y(),
 133              block, sblock++, x, y,
 134              (sblock-1)*(BAMBOO_SMEM_SIZE)+gcbaseva);
 135     }
 136     j++;
 137     printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
 138                    udn_tile_coord_x(), udn_tile_coord_y(),
 139            *((int *)(i)), *((int *)(i + 4)),
 140            *((int *)(i + 4*2)), *((int *)(i + 4*3)),
 141            *((int *)(i + 4*4)), *((int *)(i + 4*5)),
 142            *((int *)(i + 4*6)), *((int *)(i + 4*7)),
 143            *((int *)(i + 4*8)), *((int *)(i + 4*9)),
 144            *((int *)(i + 4*10)), *((int *)(i + 4*11)),
 145            *((int *)(i + 4*12)), *((int *)(i + 4*13)),
 146            *((int *)(i + 4*14)), *((int *)(i + 4*15)));
 147   }
 148   printf("(%x,%x) \n", udn_tile_coord_x(), udn_tile_coord_y());
 149 }
 150 #endif
 151
 152 // should be invoked with interruption closed
 153 inline void gc_enqueue_I(void *ptr) {
 154   GC_BAMBOO_DEBUGPRINT(0xe601);
 155   GC_BAMBOO_DEBUGPRINT_REG(ptr);
 156   if (gcheadindex==NUMPTRS) {
 157     struct pointerblock * tmp;
 158     if (gcspare!=NULL) {
 159       tmp=gcspare;
 160       gcspare=NULL;
 161     } else {
 162       tmp=RUNMALLOC_I(sizeof(struct pointerblock));
 163     }  // if (gcspare!=NULL)
 164     gchead->next=tmp;
 165     gchead=tmp;
 166     gcheadindex=0;
 167   } // if (gcheadindex==NUMPTRS)
 168   gchead->ptrs[gcheadindex++]=ptr;
 169   GC_BAMBOO_DEBUGPRINT(0xe602);
 170 } // void gc_enqueue_I(void *ptr)
 171
 172 // dequeue and destroy the queue
 173 inline void * gc_dequeue_I() {
 174   if (gctailindex==NUMPTRS) {
 175     struct pointerblock *tmp=gctail;
 176     gctail=gctail->next;
 177     gctailindex=0;
 178     if (gcspare!=NULL) {
 179       RUNFREE(tmp);
 180     } else {
 181       gcspare=tmp;
 182     }  // if (gcspare!=NULL)
 183   } // if (gctailindex==NUMPTRS)
 184   return gctail->ptrs[gctailindex++];
 185 } // void * gc_dequeue()
 186
 187 // dequeue and do not destroy the queue
 188 inline void * gc_dequeue2_I() {
 189   if (gctailindex2==NUMPTRS) {
 190     struct pointerblock *tmp=gctail2;
 191     gctail2=gctail2->next;
 192     gctailindex2=0;
 193   } // if (gctailindex2==NUMPTRS)
 194   return gctail2->ptrs[gctailindex2++];
 195 } // void * gc_dequeue2()
 196
 197 inline int gc_moreItems_I() {
 198   if ((gchead==gctail)&&(gctailindex==gcheadindex))
 199     return 0;
 200   return 1;
 201 } // int gc_moreItems()
 202
 203 inline int gc_moreItems2_I() {
 204   if ((gchead==gctail2)&&(gctailindex2==gcheadindex))
 205     return 0;
 206   return 1;
 207 } // int gc_moreItems2()
 208
 209 // should be invoked with interruption closed
 210 // enqueue a large obj: start addr & length
 211 inline void gc_lobjenqueue_I(void *ptr,
 212                              unsigned int length,
 213                              unsigned int host) {
 214   GC_BAMBOO_DEBUGPRINT(0xe901);
 215   if (gclobjheadindex==NUMLOBJPTRS) {
 216     struct lobjpointerblock * tmp;
 217     if (gclobjspare!=NULL) {
 218       tmp=gclobjspare;
 219       gclobjspare=NULL;
 220     } else {
 221       tmp=RUNMALLOC_I(sizeof(struct lobjpointerblock));
 222     }  // if (gclobjspare!=NULL)
 223     gclobjhead->next=tmp;
 224     tmp->prev = gclobjhead;
 225     gclobjhead=tmp;
 226     gclobjheadindex=0;
 227   } // if (gclobjheadindex==NUMLOBJPTRS)
 228   gclobjhead->lobjs[gclobjheadindex]=ptr;
 229   gclobjhead->lengths[gclobjheadindex]=length;
 230   gclobjhead->hosts[gclobjheadindex++]=host;
 231   GC_BAMBOO_DEBUGPRINT_REG(gclobjhead->lobjs[gclobjheadindex-1]);
 232   GC_BAMBOO_DEBUGPRINT_REG(gclobjhead->lengths[gclobjheadindex-1]);
 233   GC_BAMBOO_DEBUGPRINT_REG(gclobjhead->hosts[gclobjheadindex-1]);
 234 } // void gc_lobjenqueue_I(void *ptr...)
 235
 236 // dequeue and destroy the queue
 237 inline void * gc_lobjdequeue_I(unsigned int * length,
 238                                unsigned int * host) {
 239   if (gclobjtailindex==NUMLOBJPTRS) {
 240     struct lobjpointerblock *tmp=gclobjtail;
 241     gclobjtail=gclobjtail->next;
 242     gclobjtailindex=0;
 243     gclobjtail->prev = NULL;
 244     if (gclobjspare!=NULL) {
 245       RUNFREE(tmp);
 246     } else {
 247       gclobjspare=tmp;
 248       tmp->next = NULL;
 249       tmp->prev = NULL;
 250     }  // if (gclobjspare!=NULL)
 251   } // if (gclobjtailindex==NUMLOBJPTRS)
 252   if(length != NULL) {
 253     *length = gclobjtail->lengths[gclobjtailindex];
 254   }
 255   if(host != NULL) {
 256     *host = (unsigned int)(gclobjtail->hosts[gclobjtailindex]);
 257   }
 258   return gclobjtail->lobjs[gclobjtailindex++];
 259 } // void * gc_lobjdequeue()
 260
 261 inline int gc_lobjmoreItems_I() {
 262   if ((gclobjhead==gclobjtail)&&(gclobjtailindex==gclobjheadindex))
 263     return 0;
 264   return 1;
 265 } // int gc_lobjmoreItems()
 266
 267 // dequeue and don't destroy the queue
 268 inline void gc_lobjdequeue2_I() {
 269   if (gclobjtailindex2==NUMLOBJPTRS) {
 270     gclobjtail2=gclobjtail2->next;
 271     gclobjtailindex2=1;
 272   } else {
 273     gclobjtailindex2++;
 274   }  // if (gclobjtailindex2==NUMLOBJPTRS)
 275 } // void * gc_lobjdequeue2()
 276
 277 inline int gc_lobjmoreItems2_I() {
 278   if ((gclobjhead==gclobjtail2)&&(gclobjtailindex2==gclobjheadindex))
 279     return 0;
 280   return 1;
 281 } // int gc_lobjmoreItems2()
 282
 283 // 'reversly' dequeue and don't destroy the queue
 284 inline void gc_lobjdequeue3_I() {
 285   if (gclobjtailindex2==0) {
 286     gclobjtail2=gclobjtail2->prev;
 287     gclobjtailindex2=NUMLOBJPTRS-1;
 288   } else {
 289     gclobjtailindex2--;
 290   }  // if (gclobjtailindex2==NUMLOBJPTRS)
 291 } // void * gc_lobjdequeue3()
 292
 293 inline int gc_lobjmoreItems3_I() {
 294   if ((gclobjtail==gclobjtail2)&&(gclobjtailindex2==gclobjtailindex))
 295     return 0;
 296   return 1;
 297 } // int gc_lobjmoreItems3()
 298
 299 inline void gc_lobjqueueinit4_I() {
 300   gclobjtail2 = gclobjtail;
 301   gclobjtailindex2 = gclobjtailindex;
 302 } // void gc_lobjqueueinit2()
 303
 304 inline void * gc_lobjdequeue4_I(unsigned int * length,
 305                                 unsigned int * host) {
 306   if (gclobjtailindex2==NUMLOBJPTRS) {
 307     gclobjtail2=gclobjtail2->next;
 308     gclobjtailindex2=0;
 309   } // if (gclobjtailindex==NUMLOBJPTRS)
 310   if(length != NULL) {
 311     *length = gclobjtail2->lengths[gclobjtailindex2];
 312   }
 313   if(host != NULL) {
 314     *host = (unsigned int)(gclobjtail2->hosts[gclobjtailindex2]);
 315   }
 316   return gclobjtail2->lobjs[gclobjtailindex2++];
 317 } // void * gc_lobjdequeue()
 318
 319 inline int gc_lobjmoreItems4_I() {
 320   if ((gclobjhead==gclobjtail2)&&(gclobjtailindex2==gclobjheadindex))
 321     return 0;
 322   return 1;
 323 } // int gc_lobjmoreItems(
 324
 325 unsigned int gccurr_heapbound = 0;
 326
 327 inline void gettype_size(void * ptr,
 328                          int * ttype,
 329                          unsigned int * tsize) {
 330   int type = ((int *)ptr)[0];
 331   unsigned int size = 0;
 332   if(type < NUMCLASSES) {
 333     // a normal object
 334     size = classsize[type];
 335   } else {
 336     // an array
 337     struct ArrayObject *ao=(struct ArrayObject *)ptr;
 338     unsigned int elementsize=classsize[type];
 339     unsigned int length=ao->___length___;
 340     size=sizeof(struct ArrayObject)+length*elementsize;
 341   }  // if(type < NUMCLASSES)
 342   *ttype = type;
 343   *tsize = size;
 344 }
 345
 346 inline bool isLarge(void * ptr,
 347                     int * ttype,
 348                     unsigned int * tsize) {
 349   GC_BAMBOO_DEBUGPRINT(0xe701);
 350   GC_BAMBOO_DEBUGPRINT_REG(ptr);
 351   // check if a pointer is referring to a large object
 352   gettype_size(ptr, ttype, tsize);
 353   GC_BAMBOO_DEBUGPRINT(*tsize);
 354   unsigned int bound = (BAMBOO_SMEM_SIZE);
 355   if(((unsigned int)ptr-gcbaseva) < (BAMBOO_LARGE_SMEM_BOUND)) {
 356     bound = (BAMBOO_SMEM_SIZE_L);
 357   }
 358   if((((unsigned int)ptr-gcbaseva)%(bound))==0) {
 359     // ptr is a start of a block
 360     GC_BAMBOO_DEBUGPRINT(0xe702);
 361     GC_BAMBOO_DEBUGPRINT(1);
 362     return true;
 363   }
 364   if((bound-(((unsigned int)ptr-gcbaseva)%bound)) < (*tsize)) {
 365     // it acrosses the boundary of current block
 366     GC_BAMBOO_DEBUGPRINT(0xe703);
 367     GC_BAMBOO_DEBUGPRINT(1);
 368     return true;
 369   }
 370   GC_BAMBOO_DEBUGPRINT(0);
 371   return false;
 372 } // bool isLarge(void * ptr, int * ttype, int * tsize)
 373
 374 inline unsigned int hostcore(void * ptr) {
 375   // check the host core of ptr
 376   unsigned int host = 0;
 377   RESIDECORE(ptr, &host);
 378   GC_BAMBOO_DEBUGPRINT(0xedd0);
 379   GC_BAMBOO_DEBUGPRINT_REG(ptr);
 380   GC_BAMBOO_DEBUGPRINT_REG(host);
 381   return host;
 382 } // int hostcore(void * ptr)
 383
 384 inline void cpu2coords(unsigned int coren,
 385                            unsigned int * x,
 386                                            unsigned int * y) {
 387   *x = bamboo_cpu2coords[2*coren];
 388   *y = bamboo_cpu2coords[2*coren+1];
 389 } // void cpu2coords(...)
 390
 391 inline bool isLocal(void * ptr) {
 392   // check if a pointer is in shared heap on this core
 393   return hostcore(ptr) == BAMBOO_NUM_OF_CORE;
 394 } // bool isLocal(void * ptr)
 395
 396 inline bool gc_checkCoreStatus_I() {
 397   bool allStall = true;
 398   for(int i = 0; i < NUMCORES4GC; ++i) {
 399     if(gccorestatus[i] != 0) {
 400       allStall = false;
 401       break;
 402     }  // if(gccorestatus[i] != 0)
 403   }  // for(i = 0; i < NUMCORES4GC; ++i)
 404   return allStall;
 405 }
 406
 407 inline bool gc_checkAllCoreStatus_I() {
 408   bool allStall = true;
 409   for(int i = 0; i < NUMCORESACTIVE; ++i) {
 410     if(gccorestatus[i] != 0) {
 411       allStall = false;
 412       break;
 413     }  // if(gccorestatus[i] != 0)
 414   }  // for(i = 0; i < NUMCORESACTIVE; ++i)
 415   return allStall;
 416 }
 417
 418 inline void checkMarkStatue() {
 419   GC_BAMBOO_DEBUGPRINT(0xee01);
 420   int i;
 421   if((!waitconfirm) ||
 422      (waitconfirm && (numconfirm == 0))) {
 423     GC_BAMBOO_DEBUGPRINT(0xee02);
 424         unsigned int entry_index = 0;
 425         if(waitconfirm) {
 426           // phase 2
 427           entry_index = (gcnumsrobjs_index == 0) ? 1 : 0;
 428         } else {
 429           // phase 1
 430           entry_index = gcnumsrobjs_index;
 431         }
 432     BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
 433     gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
 434     gcnumsendobjs[entry_index][BAMBOO_NUM_OF_CORE] = gcself_numsendobjs;
 435     gcnumreceiveobjs[entry_index][BAMBOO_NUM_OF_CORE] = gcself_numreceiveobjs;
 436     // check the status of all cores
 437     bool allStall = gc_checkAllCoreStatus_I();
 438     GC_BAMBOO_DEBUGPRINT(0xee03);
 439     if(allStall) {
 440       GC_BAMBOO_DEBUGPRINT(0xee04);
 441       // ask for confirm
 442       if(!waitconfirm) {
 443                 GC_BAMBOO_DEBUGPRINT(0xee05);
 444                 // the first time found all cores stall
 445                 // send out status confirm msg to all other cores
 446                 // reset the corestatus array too
 447                 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
 448                 waitconfirm = true;
 449                 numconfirm = NUMCORESACTIVE - 1;
 450                 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 451                 for(i = 1; i < NUMCORESACTIVE; ++i) {
 452                   gccorestatus[i] = 1;
 453                   // send mark phase finish confirm request msg to core i
 454                   send_msg_1(i, GCMARKCONFIRM, false);
 455                 }  // for(i = 1; i < NUMCORESACTIVE; ++i)
 456       } else {
 457                 // Phase 2
 458                 // check if the sum of send objs and receive obj are the same
 459                 // yes->check if the info is the latest; no->go on executing
 460                 unsigned int sumsendobj = 0;
 461                 for(i = 0; i < NUMCORESACTIVE; ++i) {
 462                   sumsendobj += gcnumsendobjs[gcnumsrobjs_index][i];
 463                 }  // for(i = 0; i < NUMCORESACTIVE; ++i)
 464                 GC_BAMBOO_DEBUGPRINT(0xee06);
 465                 GC_BAMBOO_DEBUGPRINT_REG(sumsendobj);
 466                 for(i = 0; i < NUMCORESACTIVE; ++i) {
 467                   sumsendobj -= gcnumreceiveobjs[gcnumsrobjs_index][i];
 468                 }  // for(i = 0; i < NUMCORESACTIVE; ++i)
 469                 GC_BAMBOO_DEBUGPRINT(0xee07);
 470                 GC_BAMBOO_DEBUGPRINT_REG(sumsendobj);
 471                 if(0 == sumsendobj) {
 472                   // Check if there are changes of the numsendobjs or numreceiveobjs on
 473                   // each core
 474                   bool ischanged = false;
 475                   for(i = 0; i < NUMCORESACTIVE; ++i) {
 476                         if((gcnumsendobjs[0][i] != gcnumsendobjs[1][i]) ||
 477                                 (gcnumreceiveobjs[0][i] != gcnumreceiveobjs[1][i]) ) {
 478                           ischanged = true;
 479                           break;
 480                         }
 481                   }  // for(i = 0; i < NUMCORESACTIVE; ++i)
 482                   GC_BAMBOO_DEBUGPRINT(0xee08);
 483                   GC_BAMBOO_DEBUGPRINT_REG(ischanged);
 484                   if(!ischanged) {
 485                         GC_BAMBOO_DEBUGPRINT(0xee09);
 486                         // all the core status info are the latest
 487                         // stop mark phase
 488                         gcphase = COMPACTPHASE;
 489                         // restore the gcstatus for all cores
 490                         for(i = 0; i < NUMCORESACTIVE; ++i) {
 491                           gccorestatus[i] = 1;
 492                         }  // for(i = 0; i < NUMCORESACTIVE; ++i)
 493                   } else {
 494                         // There were changes between phase 1 and phase 2, can not decide
 495                         // whether the mark phase has been finished
 496                         waitconfirm = false;
 497                         // As it fails in phase 2, flip the entries
 498                         gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
 499                   } // if(!ischanged)
 500                 } else {
 501                   // There were changes between phase 1 and phase 2, can not decide
 502                   // whether the mark phase has been finished
 503                   waitconfirm = false;
 504                   // As it fails in phase 2, flip the entries
 505                   gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
 506                 } // if(0 == sumsendobj) else ...
 507                 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 508       } // if(!gcwaitconfirm) else()
 509     } else {
 510           BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 511     } // if(allStall)
 512   }  // if((!waitconfirm)...
 513   GC_BAMBOO_DEBUGPRINT(0xee0a);
 514 } // void checkMarkStatue()
 515
 516 inline void initGC() {
 517   int i;
 518   if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
 519     for(i = 0; i < NUMCORES4GC; ++i) {
 520       gccorestatus[i] = 1;
 521       gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
 522       gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
 523       gcloads[i] = 0;
 524       gcrequiredmems[i] = 0;
 525       gcfilledblocks[i] = 0;
 526       gcstopblock[i] = 0;
 527     } // for(i = 0; i < NUMCORES4GC; ++i)
 528     for(i = NUMCORES4GC; i < NUMCORESACTIVE; ++i) {
 529       gccorestatus[i] = 1;
 530       gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
 531       gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
 532     }
 533     gcheaptop = 0;
 534     gctopcore = 0;
 535     gctopblock = 0;
 536 #ifdef GC_TBL_DEBUG
 537         // initialize the gcmappingtbl
 538         BAMBOO_MEMSET_WH(gcmappingtbl, 0, bamboo_rmsp_size);
 539 #endif
 540   } // if(STARTUPCORE == BAMBOO_NUM_OF_CORE)
 541   gcself_numsendobjs = 0;
 542   gcself_numreceiveobjs = 0;
 543   gcmarkedptrbound = 0;
 544   gcnumlobjs = 0;
 545   gcmovestartaddr = 0;
 546   gctomove = false;
 547   gcblock2fill = 0;
 548   gcmovepending = 0;
 549   gccurr_heaptop = 0;
 550   gcdstcore = 0;
 551
 552   // initialize queue
 553   if (gchead==NULL) {
 554     gcheadindex=gctailindex=gctailindex2 = 0;
 555     gchead=gctail=gctail2=RUNMALLOC(sizeof(struct pointerblock));
 556   } else {
 557     gctailindex = gctailindex2 = gcheadindex;
 558     gctail = gctail2 = gchead;
 559   }
 560
 561   // initialize the large obj queues
 562   if (gclobjhead==NULL) {
 563     gclobjheadindex=0;
 564     gclobjtailindex=0;
 565     gclobjtailindex2 = 0;
 566     gclobjhead=gclobjtail=gclobjtail2=
 567           RUNMALLOC(sizeof(struct lobjpointerblock));
 568   } else {
 569     gclobjtailindex = gclobjtailindex2 = gclobjheadindex = 0;
 570     gclobjtail = gclobjtail2 = gclobjhead;
 571   }
 572   gclobjhead->next = gclobjhead->prev = NULL;
 573
 574   freeMGCHash(gcforwardobjtbl);
 575   gcforwardobjtbl = allocateMGCHash(20, 3);
 576
 577 #ifdef GC_PROFILE
 578   gc_num_livespace = 0;
 579   gc_num_freespace = 0;
 580   gc_num_lobj = 0;
 581   gc_num_lobjspace = 0;
 582   gc_num_liveobj = 0;
 583   gc_num_forwardobj = 0;
 584   gc_num_profiles = NUMCORESACTIVE - 1;
 585 #endif
 586 } // void initGC()
 587
 588 // compute load balance for all cores
 589 inline int loadbalance(unsigned int * heaptop) {
 590   // compute load balance
 591   int i;
 592
 593   // get the total loads
 594   unsigned int tloads = gcloads[STARTUPCORE];
 595   for(i = 1; i < NUMCORES4GC; i++) {
 596     tloads += gcloads[i];
 597   }
 598   *heaptop = gcbaseva + tloads;
 599
 600   GC_BAMBOO_DEBUGPRINT(0xdddd);
 601   GC_BAMBOO_DEBUGPRINT_REG(tloads);
 602   GC_BAMBOO_DEBUGPRINT_REG(*heaptop);
 603   unsigned int b = 0;
 604   BLOCKINDEX(*heaptop, &b);
 605   unsigned int numbpc = (unsigned int)b/(unsigned int)(NUMCORES4GC);// num of blocks per core
 606   GC_BAMBOO_DEBUGPRINT_REG(b);
 607   GC_BAMBOO_DEBUGPRINT_REG(numbpc);
 608   gctopblock = b;
 609   RESIDECORE(heaptop, &gctopcore);
 610   GC_BAMBOO_DEBUGPRINT_REG(gctopcore);
 611   return numbpc;
 612 } // void loadbalance(int * heaptop)
 613
 614 inline bool cacheLObjs() {
 615   // check the total mem size need for large objs
 616   unsigned long long sumsize = 0;
 617   unsigned int size = 0;
 618   GC_BAMBOO_DEBUGPRINT(0xe801);
 619   gclobjtail2 = gclobjtail;
 620   gclobjtailindex2 = gclobjtailindex;
 621   unsigned int tmp_lobj = 0;
 622   unsigned int tmp_len = 0;
 623   unsigned int tmp_host = 0;
 624   // compute total mem size required and sort the lobjs in ascending order
 625   // TODO USE QUICK SORT INSTEAD?
 626   while(gc_lobjmoreItems2_I()) {
 627     gc_lobjdequeue2_I();
 628     tmp_lobj = gclobjtail2->lobjs[gclobjtailindex2-1];
 629     tmp_host = gclobjtail2->hosts[gclobjtailindex2-1];
 630     tmp_len = gclobjtail2->lengths[gclobjtailindex2 - 1];
 631     sumsize += tmp_len;
 632 #ifdef GC_PROFILE
 633 #ifdef MGC_SPEC
 634         if((STARTUPCORE != BAMBOO_NUM_OF_CORE) || gc_profile_flag) {
 635 #endif
 636         gc_num_lobj++;
 637 #ifdef MGC_SPEC
 638         }
 639 #endif
 640 #endif
 641     GC_BAMBOO_DEBUGPRINT_REG(gclobjtail2->lobjs[gclobjtailindex2-1]);
 642     GC_BAMBOO_DEBUGPRINT_REG(tmp_len);
 643     GC_BAMBOO_DEBUGPRINT_REG(sumsize);
 644     unsigned int i = gclobjtailindex2-1;
 645     struct lobjpointerblock * tmp_block = gclobjtail2;
 646     // find the place to insert
 647     while(true) {
 648       if(i == 0) {
 649                 if(tmp_block->prev == NULL) {
 650                   break;
 651                 }
 652                 if(tmp_block->prev->lobjs[NUMLOBJPTRS-1] > tmp_lobj) {
 653                   tmp_block->lobjs[i] = tmp_block->prev->lobjs[NUMLOBJPTRS-1];
 654                   tmp_block->lengths[i] = tmp_block->prev->lengths[NUMLOBJPTRS-1];
 655                   tmp_block->hosts[i] = tmp_block->prev->hosts[NUMLOBJPTRS-1];
 656                   tmp_block = tmp_block->prev;
 657                   i = NUMLOBJPTRS-1;
 658                 } else {
 659                   break;
 660                 }  // if(tmp_block->prev->lobjs[NUMLOBJPTRS-1] < tmp_lobj)
 661           } else {
 662                 if(tmp_block->lobjs[i-1] > tmp_lobj) {
 663                   tmp_block->lobjs[i] = tmp_block->lobjs[i-1];
 664                   tmp_block->lengths[i] = tmp_block->lengths[i-1];
 665                   tmp_block->hosts[i] = tmp_block->hosts[i-1];
 666                   i--;
 667                 } else {
 668                   break;
 669                 }  // if(tmp_block->lobjs[i-1] < tmp_lobj)
 670       }  // if(i ==0 ) else {}
 671     }   // while(true)
 672     // insert it
 673     if(i != gclobjtailindex2 - 1) {
 674       tmp_block->lobjs[i] = tmp_lobj;
 675       tmp_block->lengths[i] = tmp_len;
 676       tmp_block->hosts[i] = tmp_host;
 677     }
 678   }  // while(gc_lobjmoreItems2())
 679
 680 #ifdef GC_PROFILE
 681 #ifdef MGC_SPEC
 682         if((STARTUPCORE != BAMBOO_NUM_OF_CORE) || gc_profile_flag) {
 683 #endif
 684   gc_num_lobjspace = sumsize;
 685 #ifdef MGC_SPEC
 686         }
 687 #endif
 688 #endif
 689   // check if there are enough space to cache these large objs
 690   unsigned int dst = gcbaseva + (BAMBOO_SHARED_MEM_SIZE) -sumsize;
 691   if((unsigned long long)gcheaptop > (unsigned long long)dst) {
 692     // do not have enough room to cache large objs
 693     GC_BAMBOO_DEBUGPRINT(0xe802);
 694     GC_BAMBOO_DEBUGPRINT_REG(dst);
 695     GC_BAMBOO_DEBUGPRINT_REG(gcheaptop);
 696         GC_BAMBOO_DEBUGPRINT_REG(sumsize);
 697     return false;
 698   }
 699   GC_BAMBOO_DEBUGPRINT(0xe803);
 700   GC_BAMBOO_DEBUGPRINT_REG(dst);
 701   GC_BAMBOO_DEBUGPRINT_REG(gcheaptop);
 702
 703   gcheaptop = dst; // Note: record the start of cached lobjs with gcheaptop
 704   // cache the largeObjs to the top of the shared heap
 705   dst = gcbaseva + (BAMBOO_SHARED_MEM_SIZE);
 706   while(gc_lobjmoreItems3_I()) {
 707     gc_lobjdequeue3_I();
 708     size = gclobjtail2->lengths[gclobjtailindex2];
 709     // set the mark field to , indicating that this obj has been moved
 710     // and need to be flushed
 711     ((int *)(gclobjtail2->lobjs[gclobjtailindex2]))[BAMBOOMARKBIT] = COMPACTED;
 712     dst -= size;
 713     if((unsigned int)dst <
 714                 (unsigned int)(gclobjtail2->lobjs[gclobjtailindex2]+size)) {
 715       memmove(dst, gclobjtail2->lobjs[gclobjtailindex2], size);
 716     } else {
 717       memcpy(dst, gclobjtail2->lobjs[gclobjtailindex2], size);
 718     }
 719     GC_BAMBOO_DEBUGPRINT(0x804);
 720     GC_BAMBOO_DEBUGPRINT_REG(gclobjtail2->lobjs[gclobjtailindex2]);
 721     GC_BAMBOO_DEBUGPRINT(dst);
 722     GC_BAMBOO_DEBUGPRINT_REG(size);
 723     GC_BAMBOO_DEBUGPRINT_REG(*((int*)gclobjtail2->lobjs[gclobjtailindex2]));
 724     GC_BAMBOO_DEBUGPRINT_REG(*((int*)(dst)));
 725   }
 726   return true;
 727 } // void cacheLObjs()
 728
 729 // update the bmmboo_smemtbl to record current shared mem usage
 730 void updateSmemTbl(unsigned int coren,
 731                    unsigned int localtop) {
 732   unsigned int ltopcore = 0;
 733   unsigned int bound = BAMBOO_SMEM_SIZE_L;
 734   BLOCKINDEX(localtop, &ltopcore);
 735   if((unsigned int)localtop>=(unsigned int)(gcbaseva+BAMBOO_LARGE_SMEM_BOUND)){
 736     bound = BAMBOO_SMEM_SIZE;
 737   }
 738   unsigned int load = (unsigned int)(localtop-gcbaseva)%(unsigned int)bound;
 739   unsigned int i = 0;
 740   unsigned int j = 0;
 741   unsigned int toset = 0;
 742   do {
 743     toset = gc_core2block[2*coren+i]+(unsigned int)(NUMCORES4GC*2)*j;
 744 #ifdef GC_TBL_DEBUG
 745         if(toset >= gcnumblock) {
 746           tprintf("ltopcore: %d, localtop: %x, toset: %d, gcnumblock: %d (%d, %d) \n", ltopcore, localtop, toset, gcnumblock, i, j);
 747           BAMBOO_EXIT(0xb001);
 748         }
 749 #endif
 750     if(toset < ltopcore) {
 751       bamboo_smemtbl[toset]=
 752         (toset<NUMCORES4GC) ? BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
 753 #ifdef SMEMM
 754           gcmem_mixed_usedmem += bamboo_smemtbl[toset];
 755 #endif
 756     } else if(toset == ltopcore) {
 757       bamboo_smemtbl[toset] = load;
 758 #ifdef SMEMM
 759           gcmem_mixed_usedmem += bamboo_smemtbl[toset];
 760 #endif
 761       break;
 762     } else {
 763       break;
 764     }
 765     i++;
 766     if(i == 2) {
 767       i = 0;
 768       j++;
 769     }
 770   } while(true);
 771 } // void updateSmemTbl(int, int)
 772
 773 inline void moveLObjs() {
 774   GC_BAMBOO_DEBUGPRINT(0xea01);
 775 #ifdef SMEMM
 776   // update the gcmem_mixed_usedmem
 777   gcmem_mixed_usedmem = 0;
 778 #endif
 779   // zero out the smemtbl
 780   BAMBOO_MEMSET_WH(bamboo_smemtbl, 0, sizeof(int)*gcnumblock);
 781   // find current heap top
 782   // flush all gcloads to indicate the real heap top on one core
 783   // previous it represents the next available ptr on a core
 784   if(((unsigned int)gcloads[0] > (unsigned int)(gcbaseva+BAMBOO_SMEM_SIZE_L))
 785      && (((unsigned int)gcloads[0]%(BAMBOO_SMEM_SIZE)) == 0)) {
 786     // edge of a block, check if this is exactly the heaptop
 787     BASEPTR(0, gcfilledblocks[0]-1, &(gcloads[0]));
 788     gcloads[0]+=(gcfilledblocks[0]>1 ?
 789                  (BAMBOO_SMEM_SIZE) : (BAMBOO_SMEM_SIZE_L));
 790   }
 791   updateSmemTbl(0, gcloads[0]);
 792   GC_BAMBOO_DEBUGPRINT(0xea02);
 793   GC_BAMBOO_DEBUGPRINT_REG(gcloads[0]);
 794   GC_BAMBOO_DEBUGPRINT_REG(bamboo_smemtbl[0]);
 795   for(int i = 1; i < NUMCORES4GC; i++) {
 796     unsigned int tmptop = 0;
 797     GC_BAMBOO_DEBUGPRINT(0xf000+i);
 798     GC_BAMBOO_DEBUGPRINT_REG(gcloads[i]);
 799     GC_BAMBOO_DEBUGPRINT_REG(gcfilledblocks[i]);
 800     if((gcfilledblocks[i] > 0)
 801        && (((unsigned int)gcloads[i] % (BAMBOO_SMEM_SIZE)) == 0)) {
 802       // edge of a block, check if this is exactly the heaptop
 803       BASEPTR(i, gcfilledblocks[i]-1, &gcloads[i]);
 804       gcloads[i] +=
 805                 (gcfilledblocks[i]>1 ? (BAMBOO_SMEM_SIZE) : (BAMBOO_SMEM_SIZE_L));
 806       tmptop = gcloads[i];
 807     }
 808     updateSmemTbl(i, gcloads[i]);
 809     GC_BAMBOO_DEBUGPRINT_REG(gcloads[i]);
 810   } // for(int i = 1; i < NUMCORES4GC; i++) {
 811
 812   // find current heap top
 813   // TODO
 814   // a bug here: when using local allocation, directly move large objects
 815   // to the highest free chunk might not be memory efficient
 816   unsigned int tmpheaptop = 0;
 817   unsigned int size = 0;
 818   unsigned int bound = 0;
 819   int i = 0;
 820   for(i = gcnumblock-1; i >= 0; i--) {
 821     if(bamboo_smemtbl[i] > 0) {
 822       break;
 823     }
 824   }
 825   if(i == -1) {
 826     tmpheaptop = gcbaseva;
 827   } else {
 828     tmpheaptop = gcbaseva+bamboo_smemtbl[i]+((i<NUMCORES4GC) ?
 829                 (BAMBOO_SMEM_SIZE_L*i) :
 830         (BAMBOO_SMEM_SIZE*(i-NUMCORES4GC)+BAMBOO_LARGE_SMEM_BOUND));
 831   }
 832
 833   // move large objs from gcheaptop to tmpheaptop
 834   // write the header first
 835   unsigned int tomove = gcbaseva+(BAMBOO_SHARED_MEM_SIZE)-gcheaptop;
 836 #ifdef SMEMM
 837   gcmem_mixed_usedmem += tomove;
 838 #endif
 839   GC_BAMBOO_DEBUGPRINT(0xea03);
 840   GC_BAMBOO_DEBUGPRINT_REG(tomove);
 841   GC_BAMBOO_DEBUGPRINT_REG(tmpheaptop);
 842   GC_BAMBOO_DEBUGPRINT_REG(gcheaptop);
 843   // flush the sbstartbl
 844   BAMBOO_MEMSET_WH(&(gcsbstarttbl[gcreservedsb]), '\0',
 845           (BAMBOO_SHARED_MEM_SIZE/BAMBOO_SMEM_SIZE-(unsigned int)gcreservedsb)
 846           *sizeof(unsigned int));
 847   if(tomove == 0) {
 848     gcheaptop = tmpheaptop;
 849   } else {
 850     // check how many blocks it acrosses
 851     unsigned int remain = tmpheaptop-gcbaseva;
 852     unsigned int sb = remain/BAMBOO_SMEM_SIZE+(unsigned int)gcreservedsb;//number of the sblock
 853     unsigned int b = 0;  // number of the block
 854     BLOCKINDEX(tmpheaptop, &b);
 855     // check the remaining space in this block
 856     bound = (BAMBOO_SMEM_SIZE);
 857     if(remain < (BAMBOO_LARGE_SMEM_BOUND)) {
 858       bound = (BAMBOO_SMEM_SIZE_L);
 859     }
 860     remain = bound - remain%bound;
 861
 862     GC_BAMBOO_DEBUGPRINT(0xea04);
 863     size = 0;
 864     unsigned int isize = 0;
 865     unsigned int host = 0;
 866     unsigned int ptr = 0;
 867     unsigned int base = tmpheaptop;
 868     unsigned int cpysize = 0;
 869     remain -= BAMBOO_CACHE_LINE_SIZE;
 870     tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
 871     gc_lobjqueueinit4_I();
 872     while(gc_lobjmoreItems4_I()) {
 873       ptr = (unsigned int)(gc_lobjdequeue4_I(&size, &host));
 874       ALIGNSIZE(size, &isize);
 875       if(remain < isize) {
 876                 // this object acrosses blocks
 877                 if(cpysize > 0) {
 878                   // close current block, fill its header
 879                   BAMBOO_MEMSET_WH(base, '\0', BAMBOO_CACHE_LINE_SIZE);
 880                   *((int*)base) = cpysize + BAMBOO_CACHE_LINE_SIZE;
 881                   bamboo_smemtbl[b]+=BAMBOO_CACHE_LINE_SIZE;//add the size of header
 882 #ifdef GC_TBL_DEBUG
 883                   if(b >= gcnumblock) {
 884                         BAMBOO_EXIT(0xb002);
 885                   }
 886 #endif
 887                   cpysize = 0;
 888                   base = tmpheaptop;
 889                   if(remain == 0) {
 890                         remain = ((tmpheaptop-gcbaseva)<(BAMBOO_LARGE_SMEM_BOUND)) ?
 891                                          BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
 892                   }
 893                   remain -= BAMBOO_CACHE_LINE_SIZE;
 894                   tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
 895                   BLOCKINDEX(tmpheaptop, &b);
 896                   sb = (unsigned int)(tmpheaptop-gcbaseva)/(BAMBOO_SMEM_SIZE)
 897                         +gcreservedsb;
 898                 }  // if(cpysize > 0)
 899
 900                 // move the large obj
 901                 if((unsigned int)gcheaptop < (unsigned int)(tmpheaptop+size)) {
 902                   memmove(tmpheaptop, gcheaptop, size);
 903                 } else {
 904                   //BAMBOO_WRITE_HINT_CACHE(tmpheaptop, size);
 905                   memcpy(tmpheaptop, gcheaptop, size);
 906                 }
 907                 // fill the remaining space with -2 padding
 908                 BAMBOO_MEMSET_WH(tmpheaptop+size, -2, isize-size);
 909                 GC_BAMBOO_DEBUGPRINT(0xea05);
 910                 GC_BAMBOO_DEBUGPRINT_REG(gcheaptop);
 911                 GC_BAMBOO_DEBUGPRINT_REG(tmpheaptop);
 912                 GC_BAMBOO_DEBUGPRINT_REG(size);
 913                 GC_BAMBOO_DEBUGPRINT_REG(isize);
 914                 GC_BAMBOO_DEBUGPRINT_REG(base);
 915                 gcheaptop += size;
 916 #ifdef GC_TBL_DEBUG
 917                 if((gcmappingtbl[OBJMAPPINGINDEX((unsigned int)ptr)] != 3)) {
 918                   tprintf("Error moveLobj: %x %x \n",
 919                           (int)ptr, ((int *)(ptr))[BAMBOOMARKBIT] );
 920                   BAMBOO_EXIT(0xb003);
 921                 }
 922 #endif
 923                 // cache the mapping info
 924                 gcmappingtbl[OBJMAPPINGINDEX((unsigned int)ptr)] =
 925                   (unsigned int)tmpheaptop;
 926 #ifdef GC_TBL_DEBUG
 927                 if(gcmappingtbl[OBJMAPPINGINDEX((unsigned int)ptr)] ==
 928                         gcmappingtbl[OBJMAPPINGINDEX((unsigned int)ptr)-1]) {
 929                   tprintf("Error moveobj ^^ : %x, %x, %d \n", (int)ptr,
 930                           (int)tmpheaptop, OBJMAPPINGINDEX((unsigned int)ptr));
 931                   BAMBOO_EXIT(0xb004);
 932                 }
 933 #endif
 934                 GC_BAMBOO_DEBUGPRINT(0xcdca);
 935                 GC_BAMBOO_DEBUGPRINT_REG(ptr);
 936                 GC_BAMBOO_DEBUGPRINT_REG(tmpheaptop);
 937                 tmpheaptop += isize;
 938
 939                 // set the gcsbstarttbl and bamboo_smemtbl
 940                 unsigned int tmpsbs=1+(unsigned int)(isize-remain-1)/BAMBOO_SMEM_SIZE;
 941                 for(int k = 1; k < tmpsbs; k++) {
 942                   gcsbstarttbl[sb+k] = -1;
 943 #ifdef GC_TBL_DEBUG
 944                   if((sb+k) >= gcsbstarttbl_len) {
 945                         BAMBOO_EXIT(0xb005);
 946                   }
 947 #endif
 948                 }
 949                 sb += tmpsbs;
 950                 bound = (b<NUMCORES4GC) ? BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
 951                 BLOCKINDEX(tmpheaptop-1, &tmpsbs);
 952                 for(; b < tmpsbs; b++) {
 953                   bamboo_smemtbl[b] = bound;
 954 #ifdef GC_TBL_DEBUG
 955                   if(b >= gcnumblock) {
 956                         BAMBOO_EXIT(0xb006);
 957                   }
 958 #endif
 959                   if(b==NUMCORES4GC-1) {
 960                         bound = BAMBOO_SMEM_SIZE;
 961                   }
 962                 }
 963                 if(((unsigned int)(isize-remain)%(BAMBOO_SMEM_SIZE)) == 0) {
 964                   gcsbstarttbl[sb] = -1;
 965                   remain = ((tmpheaptop-gcbaseva)<(BAMBOO_LARGE_SMEM_BOUND)) ?
 966                                    BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
 967                   bamboo_smemtbl[b] = bound;
 968                 } else {
 969                   gcsbstarttbl[sb] = (int)tmpheaptop;
 970                   remain = tmpheaptop-gcbaseva;
 971                   bamboo_smemtbl[b] = remain%bound;
 972                   remain = bound - bamboo_smemtbl[b];
 973                 } // if(((isize-remain)%(BAMBOO_SMEM_SIZE)) == 0) else ...
 974 #ifdef GC_TBL_DEBUG
 975                 if(sb >= gcsbstarttbl_len) {
 976                   BAMBOO_EXIT(0xb007);
 977                 }
 978                 if(b >= gcnumblock) {
 979                   BAMBOO_EXIT(0xb008);
 980                 }
 981 #endif
 982
 983                 // close current block and fill the header
 984                 BAMBOO_MEMSET_WH(base, '\0', BAMBOO_CACHE_LINE_SIZE);
 985                 *((int*)base) = isize + BAMBOO_CACHE_LINE_SIZE;
 986                 cpysize = 0;
 987                 base = tmpheaptop;
 988                 if(remain == BAMBOO_CACHE_LINE_SIZE) {
 989                   // fill with 0 in case
 990                   BAMBOO_MEMSET_WH(tmpheaptop, '\0', remain);
 991                 }
 992                 remain -= BAMBOO_CACHE_LINE_SIZE;
 993                 tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
 994       } else {
 995                 remain -= isize;
 996                 // move the large obj
 997                 if((unsigned int)gcheaptop < (unsigned int)(tmpheaptop+size)) {
 998                   memmove(tmpheaptop, gcheaptop, size);
 999                 } else {
1000                   memcpy(tmpheaptop, gcheaptop, size);
1001                 }
1002                 // fill the remaining space with -2 padding
1003                 BAMBOO_MEMSET_WH(tmpheaptop+size, -2, isize-size);
1004                 GC_BAMBOO_DEBUGPRINT(0xea06);
1005                 GC_BAMBOO_DEBUGPRINT_REG(gcheaptop);
1006                 GC_BAMBOO_DEBUGPRINT_REG(tmpheaptop);
1007                 GC_BAMBOO_DEBUGPRINT_REG(size);
1008                 GC_BAMBOO_DEBUGPRINT_REG(isize);
1009
1010                 gcheaptop += size;
1011                 cpysize += isize;
1012 #ifdef GC_TBL_DEBUG
1013                 if((gcmappingtbl[OBJMAPPINGINDEX((unsigned int)ptr)] != 3)) {
1014                   tprintf("Error moveLobj: %x %x \n", (int)ptr,
1015                           ((int *)(ptr))[BAMBOOMARKBIT] );
1016                   BAMBOO_EXIT(0xb009);
1017                 }
1018 #endif
1019                 // cache the mapping info
1020                 gcmappingtbl[OBJMAPPINGINDEX((unsigned int)ptr)] =
1021                   (unsigned int)tmpheaptop;
1022 #ifdef GC_TBL_DEBUG
1023                 if(gcmappingtbl[OBJMAPPINGINDEX((unsigned int)ptr)] ==
1024                         gcmappingtbl[OBJMAPPINGINDEX((unsigned int)ptr)-1]) {
1025                   tprintf("Error moveobj ?? : %x, %x, %d \n", (int)ptr,
1026                           (int)tmpheaptop, OBJMAPPINGINDEX((unsigned int)ptr));
1027                   BAMBOO_EXIT(0xb00a);
1028                 }
1029                 if(!ISSHAREDOBJ(tmpheaptop)) {
1030                   tprintf("Error: %x, %x \n", (int)ptr, (int)tmpheaptop);
1031                   BAMBOO_EXIT(0xb00b);
1032                 }
1033 #endif
1034                 GC_BAMBOO_DEBUGPRINT(0xcdcc);
1035                 GC_BAMBOO_DEBUGPRINT_REG(ptr);
1036                 GC_BAMBOO_DEBUGPRINT_REG(tmpheaptop);
1037                 GC_BAMBOO_DEBUGPRINT_REG(*((int*)tmpheaptop));
1038                 tmpheaptop += isize;
1039
1040                 // update bamboo_smemtbl
1041                 bamboo_smemtbl[b] += isize;
1042 #ifdef GC_TBL_DEBUG
1043                 if(b >= gcnumblock) {
1044                   BAMBOO_EXIT(0xb00c);
1045                 }
1046 #endif
1047           }  // if(remain < isize) else ...
1048     }  // while(gc_lobjmoreItems())
1049     if(cpysize > 0) {
1050       // close current block, fill the header
1051       BAMBOO_MEMSET_WH(base, '\0', BAMBOO_CACHE_LINE_SIZE);
1052       *((int*)base) = cpysize + BAMBOO_CACHE_LINE_SIZE;
1053       bamboo_smemtbl[b] += BAMBOO_CACHE_LINE_SIZE;//add the size of the header
1054 #ifdef GC_TBL_DEBUG
1055           if(b >= gcnumblock) {
1056                 BAMBOO_EXIT(0xb00d);
1057           }
1058 #endif
1059     } else {
1060       tmpheaptop -= BAMBOO_CACHE_LINE_SIZE;
1061     }
1062     gcheaptop = tmpheaptop;
1063
1064   } // if(tomove == 0)
1065
1066   GC_BAMBOO_DEBUGPRINT(0xea07);
1067   GC_BAMBOO_DEBUGPRINT_REG(gcheaptop);
1068
1069   bamboo_free_block = 0;
1070   unsigned int tbound = 0;
1071   do {
1072     tbound = (bamboo_free_block<NUMCORES4GC) ?
1073              BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
1074     if(bamboo_smemtbl[bamboo_free_block] == tbound) {
1075       bamboo_free_block++;
1076     } else {
1077       // the first non-full partition
1078       break;
1079     }
1080   } while(true);
1081 #ifdef GC_TBL_DEBUG
1082   if(bamboo_free_block >= gcnumblock) {
1083         BAMBOO_EXIT(0xb00e);
1084   }
1085 #endif
1086
1087 #ifdef GC_PROFILE
1088 #ifdef MGC_SPEC
1089         if((STARTUPCORE != BAMBOO_NUM_OF_CORE) || gc_profile_flag) {
1090 #endif
1091   // check how many live space there are
1092   gc_num_livespace = 0;
1093   for(int tmpi = 0; tmpi < gcnumblock; tmpi++) {
1094         gc_num_livespace += bamboo_smemtbl[tmpi];
1095   }
1096   gc_num_freespace = (BAMBOO_SHARED_MEM_SIZE) - gc_num_livespace;
1097 #ifdef MGC_SPEC
1098         }
1099 #endif
1100 #endif
1101   GC_BAMBOO_DEBUGPRINT(0xea08);
1102   GC_BAMBOO_DEBUGPRINT_REG(gcheaptop);
1103 } // void moveLObjs()
1104
1105 inline void markObj(void * objptr) {
1106   if(objptr == NULL) {
1107     return;
1108   }
1109   if(ISSHAREDOBJ(objptr)) {
1110     unsigned int host = hostcore(objptr);
1111     if(BAMBOO_NUM_OF_CORE == host) {
1112       // on this core
1113       BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1114       if(((int *)objptr)[BAMBOOMARKBIT] == INIT) {
1115                 // this is the first time that this object is discovered,
1116                 // set the flag as DISCOVERED
1117                 ((int *)objptr)[BAMBOOMARKBIT] = DISCOVERED;
1118                 BAMBOO_CACHE_FLUSH_LINE(objptr);
1119                 gc_enqueue_I(objptr);
1120 #ifdef GC_TBL_DEBUG
1121                 // for test
1122                 gcmappingtbl[OBJMAPPINGINDEX((unsigned int)objptr)]=1;
1123 #endif
1124           }
1125       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1126     } else {
1127       GC_BAMBOO_DEBUGPRINT(0xbbbb);
1128       GC_BAMBOO_DEBUGPRINT_REG(host);
1129       GC_BAMBOO_DEBUGPRINT_REG(objptr);
1130       // check if this obj has been forwarded
1131       if(!MGCHashcontains(gcforwardobjtbl, (int)objptr)) {
1132                 // send a msg to host informing that objptr is active
1133                 send_msg_2(host, GCMARKEDOBJ, objptr, false);
1134 #ifdef GC_PROFILE
1135 #ifdef MGC_SPEC
1136         if((STARTUPCORE != BAMBOO_NUM_OF_CORE) || gc_profile_flag) {
1137 #endif
1138                 gc_num_forwardobj++;
1139 #ifdef MGC_SPEC
1140         }
1141 #endif
1142 #endif // GC_PROFILE
1143                 gcself_numsendobjs++;
1144                 MGCHashadd(gcforwardobjtbl, (int)objptr);
1145       }
1146     }
1147   } else {
1148 #ifdef GC_TBL_DEBUG
1149         tprintf("Non shared pointer to be marked %x \n", (int)objptr);
1150         BAMBOO_EXIT(0xb00f);
1151 #endif
1152     BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1153     gc_enqueue_I(objptr);
1154     BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1155   }  // if(ISSHAREDOBJ(objptr))
1156 } // void markObj(void * objptr)
1157
1158 // enqueue root objs
1159 inline void tomark(struct garbagelist * stackptr) {
1160   if(MARKPHASE != gcphase) {
1161     GC_BAMBOO_DEBUGPRINT_REG(gcphase);
1162     BAMBOO_EXIT(0xb010);
1163   }
1164   gcbusystatus = true;
1165   gcnumlobjs = 0;
1166
1167
1168   int i,j;
1169   // enqueue current stack
1170   while(stackptr!=NULL) {
1171     GC_BAMBOO_DEBUGPRINT(0xe501);
1172     GC_BAMBOO_DEBUGPRINT_REG(stackptr->size);
1173     GC_BAMBOO_DEBUGPRINT_REG(stackptr->next);
1174     GC_BAMBOO_DEBUGPRINT_REG(stackptr->array[0]);
1175     for(i=0; i<stackptr->size; i++) {
1176       if(stackptr->array[i] != NULL) {
1177                 markObj(stackptr->array[i]);
1178       }
1179     }
1180     stackptr=stackptr->next;
1181   }
1182   GC_BAMBOO_DEBUGPRINT(0xe502);
1183
1184   // enqueue static pointers global_defs_p
1185   if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
1186         struct garbagelist * staticptr=(struct garbagelist *)global_defs_p;
1187         while(staticptr != NULL) {
1188           for(i=0; i<staticptr->size; i++) {
1189                 if(staticptr->array[i] != NULL) {
1190                   markObj(staticptr->array[i]);
1191                 }
1192           }
1193           staticptr = staticptr->next;
1194         }
1195   }
1196   GC_BAMBOO_DEBUGPRINT(0xe503);
1197
1198 #ifdef TASK
1199   // enqueue objectsets
1200   if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
1201     for(i=0; i<NUMCLASSES; i++) {
1202       struct parameterwrapper ** queues =
1203         objectqueues[BAMBOO_NUM_OF_CORE][i];
1204       int length = numqueues[BAMBOO_NUM_OF_CORE][i];
1205       for(j = 0; j < length; ++j) {
1206                 struct parameterwrapper * parameter = queues[j];
1207                 struct ObjectHash * set=parameter->objectset;
1208                 struct ObjectNode * ptr=set->listhead;
1209                 while(ptr!=NULL) {
1210                   markObj((void *)ptr->key);
1211                   ptr=ptr->lnext;
1212                 }
1213       }
1214     }
1215   }
1216
1217   // euqueue current task descriptor
1218   if(currtpd != NULL) {
1219     GC_BAMBOO_DEBUGPRINT(0xe504);
1220     for(i=0; i<currtpd->numParameters; i++) {
1221       markObj(currtpd->parameterArray[i]);
1222     }
1223   }
1224
1225   GC_BAMBOO_DEBUGPRINT(0xe505);
1226   // euqueue active tasks
1227   if(activetasks != NULL) {
1228     struct genpointerlist * ptr=activetasks->list;
1229     while(ptr!=NULL) {
1230       struct taskparamdescriptor *tpd=ptr->src;
1231       int i;
1232       for(i=0; i<tpd->numParameters; i++) {
1233                 markObj(tpd->parameterArray[i]);
1234       }
1235       ptr=ptr->inext;
1236     }
1237   }
1238
1239   GC_BAMBOO_DEBUGPRINT(0xe506);
1240   // enqueue cached transferred obj
1241   struct QueueItem * tmpobjptr =  getHead(&objqueue);
1242   while(tmpobjptr != NULL) {
1243     struct transObjInfo * objInfo =
1244       (struct transObjInfo *)(tmpobjptr->objectptr);
1245     markObj(objInfo->objptr);
1246     tmpobjptr = getNextQueueItem(tmpobjptr);
1247   }
1248
1249   GC_BAMBOO_DEBUGPRINT(0xe507);
1250   // enqueue cached objs to be transferred
1251   struct QueueItem * item = getHead(totransobjqueue);
1252   while(item != NULL) {
1253     struct transObjInfo * totransobj =
1254       (struct transObjInfo *)(item->objectptr);
1255     markObj(totransobj->objptr);
1256     item = getNextQueueItem(item);
1257   } // while(item != NULL)
1258
1259   GC_BAMBOO_DEBUGPRINT(0xe508);
1260   // enqueue lock related info
1261   for(i = 0; i < runtime_locklen; ++i) {
1262     markObj((void *)(runtime_locks[i].redirectlock));
1263     if(runtime_locks[i].value != NULL) {
1264       markObj((void *)(runtime_locks[i].value));
1265     }
1266   }
1267   GC_BAMBOO_DEBUGPRINT(0xe509);
1268 #endif
1269
1270 #ifdef MGC
1271   // enqueue global thread queue
1272   if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
1273         lockthreadqueue();
1274         unsigned int thread_counter = *((unsigned int*)(bamboo_thread_queue+1));
1275         if(thread_counter > 0) {
1276           unsigned int start = *((unsigned int*)(bamboo_thread_queue+2));
1277           for(i = thread_counter; i > 0; i--) {
1278                 markObj((void *)bamboo_thread_queue[4+start]);
1279                 start = (start+1)&bamboo_max_thread_num_mask;
1280           }
1281         }
1282   }
1283
1284   // enqueue the bamboo_threadlocks
1285   for(i = 0; i < bamboo_threadlocks.index; i++) {
1286         markObj((void *)(bamboo_threadlocks.locks[i].object));
1287   }
1288
1289   // enqueue the bamboo_current_thread
1290   if(bamboo_current_thread != 0) {
1291         markObj((void *)bamboo_current_thread);
1292   }
1293
1294   GC_BAMBOO_DEBUGPRINT(0xe50a);
1295 #endif
1296 } // void tomark(struct garbagelist * stackptr)
1297
1298 inline void mark(bool isfirst,
1299                  struct garbagelist * stackptr) {
1300   if(BAMBOO_NUM_OF_CORE == 0) GC_BAMBOO_DEBUGPRINT(0xed01);
1301   if(isfirst) {
1302     if(BAMBOO_NUM_OF_CORE == 0) GC_BAMBOO_DEBUGPRINT(0xed02);
1303     // enqueue root objs
1304     tomark(stackptr);
1305     gccurr_heaptop = 0; // record the size of all active objs in this core
1306                         // aligned but does not consider block boundaries
1307     gcmarkedptrbound = 0;
1308   }
1309   if(BAMBOO_NUM_OF_CORE == 0) GC_BAMBOO_DEBUGPRINT(0xed03);
1310   unsigned int isize = 0;
1311   bool checkfield = true;
1312   bool sendStall = false;
1313   // mark phase
1314   while(MARKPHASE == gcphase) {
1315     if(BAMBOO_NUM_OF_CORE == 0) GC_BAMBOO_DEBUGPRINT(0xed04);
1316     while(true) {
1317       BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1318       bool hasItems = gc_moreItems2_I();
1319       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1320       GC_BAMBOO_DEBUGPRINT(0xed05);
1321       if(!hasItems) {
1322                 break;
1323       }
1324       sendStall = false;
1325       gcbusystatus = true;
1326       checkfield = true;
1327       void * ptr = gc_dequeue2_I();
1328
1329       GC_BAMBOO_DEBUGPRINT_REG(ptr);
1330       unsigned int size = 0;
1331       unsigned int isize = 0;
1332       unsigned int type = 0;
1333       // check if it is a shared obj
1334       if(ISSHAREDOBJ(ptr)) {
1335                 // a shared obj, check if it is a local obj on this core
1336                 unsigned int host = hostcore(ptr);
1337                 bool islocal = (host == BAMBOO_NUM_OF_CORE);
1338                 if(islocal) {
1339                   bool isnotmarked = (((int *)ptr)[BAMBOOMARKBIT] == DISCOVERED);
1340                   if(isLarge(ptr, &type, &size) && isnotmarked) {
1341                         // ptr is a large object and not marked or enqueued
1342                         GC_BAMBOO_DEBUGPRINT(0xecec);
1343                         GC_BAMBOO_DEBUGPRINT_REG(ptr);
1344                         GC_BAMBOO_DEBUGPRINT_REG(*((int*)ptr));
1345                         BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1346                         gc_lobjenqueue_I(ptr, size, BAMBOO_NUM_OF_CORE);
1347                         gcnumlobjs++;
1348                         BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1349                         // mark this obj
1350                         ((int *)ptr)[BAMBOOMARKBIT] = MARKED;
1351                         BAMBOO_CACHE_FLUSH_LINE(ptr);
1352 #ifdef GC_TBL_DEBUG
1353                         // for test
1354                         gcmappingtbl[OBJMAPPINGINDEX((unsigned int)ptr)]=3;
1355 #endif
1356                   } else if(isnotmarked) {
1357                         // ptr is an unmarked active object on this core
1358                         ALIGNSIZE(size, &isize);
1359                         gccurr_heaptop += isize;
1360                         GC_BAMBOO_DEBUGPRINT(0xaaaa);
1361                         GC_BAMBOO_DEBUGPRINT_REG(ptr);
1362                         GC_BAMBOO_DEBUGPRINT_REG(isize);
1363                         GC_BAMBOO_DEBUGPRINT(((int *)(ptr))[0]);
1364                         // mark this obj
1365                         ((int *)ptr)[BAMBOOMARKBIT] = MARKED;
1366                         BAMBOO_CACHE_FLUSH_LINE(ptr);
1367 #ifdef GC_TBL_DEBUG
1368                         // for test
1369                         gcmappingtbl[OBJMAPPINGINDEX((unsigned int)ptr)]=2;
1370 #endif
1371
1372                         if((unsigned int)(ptr + size) > (unsigned int)gcmarkedptrbound) {
1373                           gcmarkedptrbound = (unsigned int)(ptr + size);
1374                         } // if(ptr + size > gcmarkedptrbound)
1375                   } else {
1376                         // ptr is not an active obj or has been marked
1377                         checkfield = false;
1378                   } // if(isLarge(ptr, &type, &size)) else ...
1379                 }
1380 #ifdef GC_TBL_DEBUG
1381                 else {
1382                   tprintf("Error mark: %x, %d, %d \n", (int)ptr, BAMBOO_NUM_OF_CORE,
1383                           hostcore(ptr));
1384                   BAMBOO_EXIT(0xb011);
1385                 }
1386 #endif /* can never reach here
1387                 else {
1388                   // check if this obj has been forwarded
1389                   if(!MGCHashcontains(gcforwardobjtbl, (int)ptr)) {
1390                         // send a msg to host informing that ptr is active
1391                         send_msg_2(host, GCMARKEDOBJ, ptr, false);
1392                         gcself_numsendobjs++;
1393                         MGCHashadd(gcforwardobjtbl, (int)ptr);
1394                   }
1395                         checkfield = false;
1396                 }// if(isLocal(ptr)) else ...*/
1397           }   // if(ISSHAREDOBJ(ptr))
1398       GC_BAMBOO_DEBUGPRINT(0xed06);
1399
1400       if(checkfield) {
1401                 // scan all pointers in ptr
1402                 unsigned int * pointer;
1403                 pointer=pointerarray[type];
1404                 if (pointer==0) {
1405                   /* Array of primitives */
1406                   /* Do nothing */
1407                 } else if (((unsigned int)pointer)==1) {
1408                   /* Array of pointers */
1409                   struct ArrayObject *ao=(struct ArrayObject *) ptr;
1410                   int length=ao->___length___;
1411                   int j;
1412                   for(j=0; j<length; j++) {
1413                         void *objptr =
1414                           ((void **)(((char *)&ao->___length___)+sizeof(int)))[j];
1415                         markObj(objptr);
1416                   }
1417                 } else {
1418                   unsigned int size=pointer[0];
1419                   int i;
1420                   for(i=1; i<=size; i++) {
1421                         unsigned int offset=pointer[i];
1422                         void * objptr=*((void **)(((char *)ptr)+offset));
1423                         markObj(objptr);
1424                   }
1425                 }     // if (pointer==0) else if ... else ...
1426                 {
1427                   pointer=pointerarray[OBJECTTYPE];
1428                   //handle object class
1429                   unsigned int size=pointer[0];
1430                   int i;
1431                   for(i=1; i<=size; i++) {
1432                         unsigned int offset=pointer[i];
1433                         void * objptr=*((void **)(((char *)ptr)+offset));
1434                         markObj(objptr);
1435                   }
1436                 }
1437       }   // if(checkfield)
1438     }     // while(gc_moreItems2())
1439     GC_BAMBOO_DEBUGPRINT(0xed07);
1440         gcbusystatus = false;
1441     // send mark finish msg to core coordinator
1442     if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
1443       GC_BAMBOO_DEBUGPRINT(0xed08);
1444           int entry_index = 0;
1445           if(waitconfirm)  {
1446                 // phase 2
1447                 entry_index = (gcnumsrobjs_index == 0) ? 1 : 0;
1448           } else {
1449                 // phase 1
1450                 entry_index = gcnumsrobjs_index;
1451           }
1452       gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
1453       gcnumsendobjs[entry_index][BAMBOO_NUM_OF_CORE]=gcself_numsendobjs;
1454       gcnumreceiveobjs[entry_index][BAMBOO_NUM_OF_CORE]=gcself_numreceiveobjs;
1455       gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
1456     } else {
1457       if(!sendStall) {
1458                 GC_BAMBOO_DEBUGPRINT(0xed09);
1459                 send_msg_4(STARTUPCORE, GCFINISHMARK, BAMBOO_NUM_OF_CORE,
1460                                    gcself_numsendobjs, gcself_numreceiveobjs, false);
1461                 sendStall = true;
1462       }
1463     }  // if(STARTUPCORE == BAMBOO_NUM_OF_CORE) ...
1464     GC_BAMBOO_DEBUGPRINT(0xed0a);
1465
1466     if(BAMBOO_NUM_OF_CORE == STARTUPCORE) {
1467       GC_BAMBOO_DEBUGPRINT(0xed0b);
1468       return;
1469     }
1470   } // while(MARKPHASE == gcphase)
1471
1472   BAMBOO_CACHE_MF();
1473 } // mark()
1474
1475 inline void compact2Heaptophelper_I(unsigned int coren,
1476                                     unsigned int* p,
1477                                     unsigned int* numblocks,
1478                                     unsigned int* remain) {
1479   unsigned int b;
1480   unsigned int memneed = gcrequiredmems[coren] + BAMBOO_CACHE_LINE_SIZE;
1481   if(STARTUPCORE == coren) {
1482     gctomove = true;
1483     gcmovestartaddr = *p;
1484     gcdstcore = gctopcore;
1485     gcblock2fill = *numblocks + 1;
1486   } else {
1487     send_msg_4(coren, GCMOVESTART, gctopcore, *p, (*numblocks) + 1, false);
1488   }
1489   GC_BAMBOO_DEBUGPRINT_REG(coren);
1490   GC_BAMBOO_DEBUGPRINT_REG(gctopcore);
1491   GC_BAMBOO_DEBUGPRINT_REG(*p);
1492   GC_BAMBOO_DEBUGPRINT_REG(*numblocks+1);
1493   if(memneed < *remain) {
1494     GC_BAMBOO_DEBUGPRINT(0xd104);
1495     *p = *p + memneed;
1496     gcrequiredmems[coren] = 0;
1497     gcloads[gctopcore] += memneed;
1498     *remain = *remain - memneed;
1499   } else {
1500     GC_BAMBOO_DEBUGPRINT(0xd105);
1501     // next available block
1502     *p = *p + *remain;
1503     gcfilledblocks[gctopcore] += 1;
1504     unsigned int newbase = 0;
1505     BASEPTR(gctopcore, gcfilledblocks[gctopcore], &newbase);
1506     gcloads[gctopcore] = newbase;
1507     gcrequiredmems[coren] -= *remain - BAMBOO_CACHE_LINE_SIZE;
1508     gcstopblock[gctopcore]++;
1509     gctopcore = NEXTTOPCORE(gctopblock);
1510     gctopblock++;
1511     *numblocks = gcstopblock[gctopcore];
1512     *p = gcloads[gctopcore];
1513     BLOCKINDEX(*p, &b);
1514     *remain=(b<NUMCORES4GC) ?
1515              ((BAMBOO_SMEM_SIZE_L)-((*p)%(BAMBOO_SMEM_SIZE_L)))
1516              : ((BAMBOO_SMEM_SIZE)-((*p)%(BAMBOO_SMEM_SIZE)));
1517     GC_BAMBOO_DEBUGPRINT(0xd106);
1518     GC_BAMBOO_DEBUGPRINT_REG(gctopcore);
1519     GC_BAMBOO_DEBUGPRINT_REG(*p);
1520     GC_BAMBOO_DEBUGPRINT_REG(b);
1521     GC_BAMBOO_DEBUGPRINT_REG(*remain);
1522   }  // if(memneed < remain)
1523   gcmovepending--;
1524 } // void compact2Heaptophelper_I(int, int*, int*, int*)
1525
1526 inline void compact2Heaptop() {
1527   // no cores with spare mem and some cores are blocked with pending move
1528   // find the current heap top and make them move to the heap top
1529   unsigned int p;
1530   unsigned int numblocks = gcfilledblocks[gctopcore];
1531   p = gcloads[gctopcore];
1532   unsigned int b;
1533   BLOCKINDEX(p, &b);
1534   unsigned int remain = (b<NUMCORES4GC) ?
1535                ((BAMBOO_SMEM_SIZE_L)-(p%(BAMBOO_SMEM_SIZE_L)))
1536                : ((BAMBOO_SMEM_SIZE)-(p%(BAMBOO_SMEM_SIZE)));
1537   // check if the top core finishes
1538   BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1539   if(gccorestatus[gctopcore] != 0) {
1540     GC_BAMBOO_DEBUGPRINT(0xd101);
1541     GC_BAMBOO_DEBUGPRINT_REG(gctopcore);
1542     // let the top core finishes its own work first
1543     compact2Heaptophelper_I(gctopcore, &p, &numblocks, &remain);
1544     BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1545     return;
1546   }
1547   BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1548
1549   GC_BAMBOO_DEBUGPRINT(0xd102);
1550   GC_BAMBOO_DEBUGPRINT_REG(gctopcore);
1551   GC_BAMBOO_DEBUGPRINT_REG(p);
1552   GC_BAMBOO_DEBUGPRINT_REG(b);
1553   GC_BAMBOO_DEBUGPRINT_REG(remain);
1554   for(int i = 0; i < NUMCORES4GC; i++) {
1555     BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1556     if((gccorestatus[i] != 0) && (gcrequiredmems[i] > 0)) {
1557       GC_BAMBOO_DEBUGPRINT(0xd103);
1558       compact2Heaptophelper_I(i, &p, &numblocks, &remain);
1559       if(gccorestatus[gctopcore] != 0) {
1560                 GC_BAMBOO_DEBUGPRINT(0xd101);
1561                 GC_BAMBOO_DEBUGPRINT_REG(gctopcore);
1562                 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1563                 // the top core is not free now
1564                 return;
1565       }
1566     }  // if((gccorestatus[i] != 0) && (gcrequiredmems[i] > 0))
1567     BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1568   }   // for(i = 0; i < NUMCORES4GC; i++)
1569   GC_BAMBOO_DEBUGPRINT(0xd106);
1570 } // void compact2Heaptop()
1571
1572 inline void resolvePendingMoveRequest() {
1573   GC_BAMBOO_DEBUGPRINT(0xeb01);
1574   GC_BAMBOO_DEBUGPRINT(0xeeee);
1575   for(int k = 0; k < NUMCORES4GC; k++) {
1576     GC_BAMBOO_DEBUGPRINT(0xf000+k);
1577     GC_BAMBOO_DEBUGPRINT_REG(gccorestatus[k]);
1578     GC_BAMBOO_DEBUGPRINT_REG(gcloads[k]);
1579     GC_BAMBOO_DEBUGPRINT_REG(gcfilledblocks[k]);
1580     GC_BAMBOO_DEBUGPRINT_REG(gcstopblock[k]);
1581   }
1582   GC_BAMBOO_DEBUGPRINT(0xffff);
1583   int i;
1584   int j;
1585   bool nosparemem = true;
1586   bool haspending = false;
1587   bool hasrunning = false;
1588   bool noblock = false;
1589   unsigned int dstcore = 0;       // the core who need spare mem
1590   unsigned int sourcecore = 0;       // the core who has spare mem
1591   for(i = j = 0; (i < NUMCORES4GC) && (j < NUMCORES4GC); ) {
1592     if(nosparemem) {
1593       // check if there are cores with spare mem
1594       if(gccorestatus[i] == 0) {
1595                 // finished working, check if it still have spare mem
1596                 if(gcfilledblocks[i] < gcstopblock[i]) {
1597                   // still have spare mem
1598                   nosparemem = false;
1599                   sourcecore = i;
1600                 }  // if(gcfilledblocks[i] < gcstopblock[i]) else ...
1601       }
1602       i++;
1603     }  // if(nosparemem)
1604     if(!haspending) {
1605       if(gccorestatus[j] != 0) {
1606                 // not finished, check if it has pending move requests
1607                 if((gcfilledblocks[j]==gcstopblock[j])&&(gcrequiredmems[j]>0)) {
1608                   dstcore = j;
1609                   haspending = true;
1610                 } else {
1611                   hasrunning = true;
1612                 }  // if((gcfilledblocks[i] == gcstopblock[i])...) else ...
1613       }  // if(gccorestatus[i] == 0) else ...
1614       j++;
1615     }  // if(!haspending)
1616     if(!nosparemem && haspending) {
1617       // find match
1618       unsigned int tomove = 0;
1619       unsigned int startaddr = 0;
1620       BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1621       gcrequiredmems[dstcore] = assignSpareMem_I(sourcecore,
1622                                                  gcrequiredmems[dstcore],
1623                                                  &tomove,
1624                                                  &startaddr);
1625       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1626       GC_BAMBOO_DEBUGPRINT(0xeb02);
1627       GC_BAMBOO_DEBUGPRINT_REG(sourcecore);
1628       GC_BAMBOO_DEBUGPRINT_REG(dstcore);
1629       GC_BAMBOO_DEBUGPRINT_REG(startaddr);
1630       GC_BAMBOO_DEBUGPRINT_REG(tomove);
1631       if(STARTUPCORE == dstcore) {
1632                 GC_BAMBOO_DEBUGPRINT(0xeb03);
1633                 gcdstcore = sourcecore;
1634                 gctomove = true;
1635                 gcmovestartaddr = startaddr;
1636                 gcblock2fill = tomove;
1637       } else {
1638                 GC_BAMBOO_DEBUGPRINT(0xeb04);
1639                 send_msg_4(dstcore, GCMOVESTART, sourcecore,
1640                                    startaddr, tomove, false);
1641       }
1642       gcmovepending--;
1643       nosparemem = true;
1644       haspending = false;
1645       noblock = true;
1646     }
1647   }   // for(i = 0; i < NUMCORES4GC; i++)
1648   GC_BAMBOO_DEBUGPRINT(0xcccc);
1649   GC_BAMBOO_DEBUGPRINT_REG(hasrunning);
1650   GC_BAMBOO_DEBUGPRINT_REG(haspending);
1651   GC_BAMBOO_DEBUGPRINT_REG(noblock);
1652
1653   if(!hasrunning && !noblock) {
1654     gcphase = SUBTLECOMPACTPHASE;
1655     compact2Heaptop();
1656   }
1657
1658 } // void resovePendingMoveRequest()
1659
1660 struct moveHelper {
1661   unsigned int numblocks;       // block num for heap
1662   unsigned int base;       // base virtual address of current heap block
1663   unsigned int ptr;       // virtual address of current heap top
1664   unsigned int offset;       // offset in current heap block
1665   unsigned int blockbase;   // virtual address of current small block to check
1666   unsigned int blockbound;     // bound virtual address of current small blcok
1667   unsigned int sblockindex;       // index of the small blocks
1668   unsigned int top;       // real size of current heap block to check
1669   unsigned int bound;       // bound size of current heap block to check
1670 }; // struct moveHelper
1671
1672 // If out of boundary of valid shared memory, return false, else return true
1673 inline bool nextSBlock(struct moveHelper * orig) {
1674   orig->blockbase = orig->blockbound;
1675
1676   bool sbchanged = false;
1677   unsigned int origptr = orig->ptr;
1678   unsigned int blockbase = orig->blockbase;
1679   unsigned int blockbound = orig->blockbound;
1680   unsigned int bound = orig->bound;
1681   GC_BAMBOO_DEBUGPRINT(0xecc0);
1682   GC_BAMBOO_DEBUGPRINT_REG(blockbase);
1683   GC_BAMBOO_DEBUGPRINT_REG(blockbound);
1684   GC_BAMBOO_DEBUGPRINT_REG(bound);
1685   GC_BAMBOO_DEBUGPRINT_REG(origptr);
1686 outernextSBlock:
1687   // check if across a big block
1688   // TODO now do not zero out the whole memory, maybe the last two conditions
1689   // are useless now
1690   if((blockbase>=bound)||(origptr>=bound)
1691           ||((origptr!=NULL)&&(*((int*)origptr))==0)||((*((int*)blockbase))==0)) {
1692 innernextSBlock:
1693     // end of current heap block, jump to next one
1694     orig->numblocks++;
1695     GC_BAMBOO_DEBUGPRINT(0xecc1);
1696     GC_BAMBOO_DEBUGPRINT_REG(orig->numblocks);
1697     BASEPTR(BAMBOO_NUM_OF_CORE, orig->numblocks, &(orig->base));
1698     GC_BAMBOO_DEBUGPRINT(orig->base);
1699     if(orig->base >= gcbaseva + BAMBOO_SHARED_MEM_SIZE) {
1700       // out of boundary
1701       orig->ptr = orig->base; // set current ptr to out of boundary too
1702       return false;
1703     }
1704     orig->blockbase = orig->base;
1705     orig->sblockindex =
1706           (unsigned int)(orig->blockbase-gcbaseva)/BAMBOO_SMEM_SIZE;
1707     sbchanged = true;
1708     unsigned int blocknum = 0;
1709     BLOCKINDEX(orig->base, &blocknum);
1710     if(bamboo_smemtbl[blocknum] == 0) {
1711 #ifdef GC_TBL_DEBUG
1712           if(blocknum >= gcnumblock) {
1713                 BAMBOO_EXIT(0xb012);
1714           }
1715 #endif
1716       // goto next block
1717       goto innernextSBlock;
1718     }
1719         // check the bamboo_smemtbl to decide the real bound
1720         orig->bound = orig->base + bamboo_smemtbl[blocknum];
1721   } else if(0 == (orig->blockbase%BAMBOO_SMEM_SIZE)) {
1722     orig->sblockindex += 1;
1723     sbchanged = true;
1724   }  // if((orig->blockbase >= orig->bound) || (orig->ptr >= orig->bound)...
1725
1726   // check if this sblock should be skipped or have special start point
1727   int sbstart = gcsbstarttbl[orig->sblockindex];
1728 #ifdef GC_TBL_DEBUG
1729   if((orig->sblockindex) >= gcsbstarttbl_len) {
1730         BAMBOO_EXIT(0xb013);
1731   }
1732 #endif
1733   if(sbstart == -1) {
1734     // goto next sblock
1735     GC_BAMBOO_DEBUGPRINT(0xecc2);
1736     orig->sblockindex += 1;
1737     orig->blockbase += BAMBOO_SMEM_SIZE;
1738     goto outernextSBlock;
1739   } else if((sbstart != 0) && (sbchanged)) {
1740     // the first time to access this SBlock
1741     GC_BAMBOO_DEBUGPRINT(0xecc3);
1742     // not start from the very beginning
1743     orig->blockbase = sbstart;
1744   }  // if(gcsbstarttbl[orig->sblockindex] == -1) else ...
1745
1746   // setup information for this sblock
1747   orig->blockbound = orig->blockbase+(unsigned int)*((int*)(orig->blockbase));
1748   orig->offset = BAMBOO_CACHE_LINE_SIZE;
1749   orig->ptr = orig->blockbase + orig->offset;
1750   GC_BAMBOO_DEBUGPRINT(0xecc4);
1751   GC_BAMBOO_DEBUGPRINT_REG(orig->base);
1752   GC_BAMBOO_DEBUGPRINT_REG(orig->bound);
1753   GC_BAMBOO_DEBUGPRINT_REG(orig->ptr);
1754   GC_BAMBOO_DEBUGPRINT_REG(orig->blockbound);
1755   GC_BAMBOO_DEBUGPRINT_REG(orig->blockbase);
1756   GC_BAMBOO_DEBUGPRINT_REG(orig->offset);
1757   if(orig->ptr >= orig->bound) {
1758     // met a lobj, move to next block
1759     goto innernextSBlock;
1760   }
1761
1762   return true;
1763 } // bool nextSBlock(struct moveHelper * orig)
1764
1765 // return false if there are no available data to compact
1766 inline bool initOrig_Dst(struct moveHelper * orig,
1767                          struct moveHelper * to) {
1768   // init the dst ptr
1769   to->numblocks = 0;
1770   to->top = to->offset = BAMBOO_CACHE_LINE_SIZE;
1771   to->bound = BAMBOO_SMEM_SIZE_L;
1772   BASEPTR(BAMBOO_NUM_OF_CORE, to->numblocks, &(to->base));
1773
1774   GC_BAMBOO_DEBUGPRINT(0xef01);
1775   GC_BAMBOO_DEBUGPRINT_REG(to->base);
1776   unsigned int tobase = to->base;
1777   to->ptr = tobase + to->offset;
1778 #ifdef GC_CACHE_ADAPT
1779   // initialize the gc_cache_revise_information
1780   gc_cache_revise_infomation.to_page_start_va = to->ptr;
1781   unsigned int toindex = (unsigned int)(tobase-gcbaseva)/(BAMBOO_PAGE_SIZE);
1782   gc_cache_revise_infomation.to_page_end_va = (BAMBOO_PAGE_SIZE)*
1783         (toindex+1);
1784   gc_cache_revise_infomation.to_page_index = toindex;
1785   gc_cache_revise_infomation.orig_page_start_va = -1;
1786 #endif // GC_CACHE_ADAPT
1787
1788   // init the orig ptr
1789   orig->numblocks = 0;
1790   orig->base = tobase;
1791   unsigned int blocknum = 0;
1792   BLOCKINDEX(orig->base, &blocknum);
1793   unsigned int origbase = orig->base;
1794   // check the bamboo_smemtbl to decide the real bound
1795   orig->bound = origbase + (unsigned int)bamboo_smemtbl[blocknum];
1796 #ifdef GC_TBL_DEBUG
1797   if((orig->sblockindex) >= gcsbstarttbl_len) {
1798         BAMBOO_EXIT(0xb014);
1799   }
1800 #endif
1801   orig->blockbase = origbase;
1802   orig->sblockindex = (unsigned int)(origbase - gcbaseva) / BAMBOO_SMEM_SIZE;
1803   GC_BAMBOO_DEBUGPRINT(0xef02);
1804   GC_BAMBOO_DEBUGPRINT_REG(origbase);
1805   GC_BAMBOO_DEBUGPRINT_REG(orig->sblockindex);
1806   GC_BAMBOO_DEBUGPRINT_REG(gcsbstarttbl);
1807   GC_BAMBOO_DEBUGPRINT_REG(gcsbstarttbl[orig->sblockindex]);
1808
1809   int sbstart = gcsbstarttbl[orig->sblockindex];
1810 #ifdef GC_TBL_DEBUG
1811   if((orig->sblockindex) >= gcsbstarttbl_len) {
1812         BAMBOO_EXIT(0xb015);
1813   }
1814 #endif
1815   if(sbstart == -1) {
1816     GC_BAMBOO_DEBUGPRINT(0xef03);
1817     // goto next sblock
1818     orig->blockbound =
1819       gcbaseva+BAMBOO_SMEM_SIZE*(orig->sblockindex+1);
1820     return nextSBlock(orig);
1821   } else if(sbstart != 0) {
1822     GC_BAMBOO_DEBUGPRINT(0xef04);
1823     orig->blockbase = sbstart;
1824   }
1825   GC_BAMBOO_DEBUGPRINT(0xef05);
1826   orig->blockbound = orig->blockbase + *((int*)(orig->blockbase));
1827   orig->offset = BAMBOO_CACHE_LINE_SIZE;
1828   orig->ptr = orig->blockbase + orig->offset;
1829   GC_BAMBOO_DEBUGPRINT(0xef06);
1830   GC_BAMBOO_DEBUGPRINT_REG(orig->base);
1831
1832   return true;
1833 } // bool initOrig_Dst(struct moveHelper * orig, struct moveHelper * to)
1834
1835 inline void nextBlock(struct moveHelper * to) {
1836   to->top = to->bound + BAMBOO_CACHE_LINE_SIZE; // header!
1837   to->bound += BAMBOO_SMEM_SIZE;
1838   to->numblocks++;
1839   BASEPTR(BAMBOO_NUM_OF_CORE, to->numblocks, &(to->base));
1840   to->offset = BAMBOO_CACHE_LINE_SIZE;
1841   to->ptr = to->base + to->offset;
1842 } // void nextBlock(struct moveHelper * to)
1843
1844 #ifdef GC_CACHE_ADAPT
1845 inline void samplingDataConvert(unsigned int current_ptr) {
1846   unsigned int tmp_factor =
1847         current_ptr-gc_cache_revise_infomation.to_page_start_va;
1848   unsigned int topage=gc_cache_revise_infomation.to_page_index;
1849   unsigned int oldpage = gc_cache_revise_infomation.orig_page_index;
1850   int * newtable=&gccachesamplingtbl_r[topage];
1851   int * oldtable=&gccachesamplingtbl[oldpage];
1852
1853   for(int tt = 0; tt < NUMCORESACTIVE; tt++) {
1854     (*newtable) = ((*newtable)+(*oldtable)*tmp_factor);
1855     newtable=(int*)(((char *)newtable)+size_cachesamplingtbl_local_r);
1856     oldtable=(int*) (((char *)oldtable)+size_cachesamplingtbl_local);
1857   }
1858 } // inline void samplingDataConvert(int)
1859
1860 inline void completePageConvert(struct moveHelper * orig,
1861                                     struct moveHelper * to,
1862                                                                 unsigned int current_ptr,
1863                                                                 bool closeToPage) {
1864   unsigned int ptr = 0;
1865   unsigned int tocompare = 0;
1866   if(closeToPage) {
1867         ptr = to->ptr;
1868         tocompare = gc_cache_revise_infomation.to_page_end_va;
1869   } else {
1870          ptr = orig->ptr;
1871          tocompare = gc_cache_revise_infomation.orig_page_end_va;
1872   }
1873   if((unsigned int)ptr >= (unsigned int)tocompare) {
1874         // end of an orig/to page
1875         // compute the impact of this page for the new page
1876         samplingDataConvert(current_ptr);
1877         // prepare for an new orig page
1878         unsigned int tmp_index =
1879           (unsigned int)((unsigned int)orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
1880         gc_cache_revise_infomation.orig_page_start_va = orig->ptr;
1881         gc_cache_revise_infomation.orig_page_end_va = gcbaseva +
1882           (BAMBOO_PAGE_SIZE)*(unsigned int)(tmp_index+1);
1883         gc_cache_revise_infomation.orig_page_index = tmp_index;
1884         gc_cache_revise_infomation.to_page_start_va = to->ptr;
1885         if(closeToPage) {
1886           gc_cache_revise_infomation.to_page_end_va = gcbaseva+(BAMBOO_PAGE_SIZE)
1887                 *(((unsigned int)(to->ptr)-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
1888           gc_cache_revise_infomation.to_page_index =
1889                 ((unsigned int)(to->ptr)-gcbaseva)/(BAMBOO_PAGE_SIZE);
1890         }
1891   }
1892 } // inline void completePageConvert(...)
1893 #endif // GC_CACHE_ADAPT
1894
1895 // endaddr does not contain spaces for headers
1896 inline bool moveobj(struct moveHelper * orig,
1897                     struct moveHelper * to,
1898                     unsigned int stopblock) {
1899   if(stopblock == 0) {
1900     return true;
1901   }
1902
1903   GC_BAMBOO_DEBUGPRINT(0xe201);
1904   GC_BAMBOO_DEBUGPRINT_REG(orig->ptr);
1905   GC_BAMBOO_DEBUGPRINT_REG(to->ptr);
1906 #ifdef GC_TBL_DEBUG
1907   unsigned int bkptr = (unsigned int)(orig->ptr);
1908
1909   if((unsigned int)(to->ptr) > (unsigned int)(orig->ptr)) {
1910         tprintf("Error to->ptr > orig->ptr: %x, %x \n", (int)(to->ptr), (int)(orig->ptr));
1911         BAMBOO_EXIT(0xb016);
1912   }
1913 #endif
1914
1915   int type = 0;
1916   unsigned int size = 0;
1917   unsigned int isize = 0;
1918 innermoveobj:
1919   /*while((*((char*)(orig->ptr))) == (char)(-2)) {
1920         orig->ptr = (unsigned int)((void*)(orig->ptr) + 1);
1921   }*/
1922 #ifdef GC_CACHE_ADAPT
1923   completePageConvert(orig, to, to->ptr, false);
1924 #endif
1925   unsigned int origptr = (unsigned int)(orig->ptr);
1926   unsigned int origbound = (unsigned int)orig->bound;
1927   unsigned int origblockbound = (unsigned int)orig->blockbound;
1928   if((origptr >= origbound) || (origptr == origblockbound)) {
1929     if(!nextSBlock(orig)) {
1930       // finished, no more data
1931 #ifdef GC_TBL_DEBUG
1932           tprintf("AAAA %x \n", (int)(orig->ptr));
1933 #endif
1934       return true;
1935     }
1936     goto innermoveobj;
1937   }
1938   GC_BAMBOO_DEBUGPRINT(0xe202);
1939   GC_BAMBOO_DEBUGPRINT_REG(origptr);
1940   GC_BAMBOO_DEBUGPRINT(((int *)(origptr))[0]);
1941   // check the obj's type, size and mark flag
1942   type = ((int *)(origptr))[0];
1943   size = 0;
1944   if(type == 0) {
1945         // end of this block, go to next one
1946     if(!nextSBlock(orig)) {
1947       // finished, no more data
1948 #ifdef GC_TBL_DEBUG
1949           tprintf("BBBB %x \n", (int)(orig->ptr));
1950 #endif
1951       return true;
1952     }
1953     goto innermoveobj;
1954   } else if(type < NUMCLASSES) {
1955     // a normal object
1956     size = classsize[type];
1957   } else {
1958     // an array
1959     struct ArrayObject *ao=(struct ArrayObject *)(origptr);
1960     unsigned int elementsize=classsize[type];
1961     unsigned int length=ao->___length___;
1962     size=(unsigned int)sizeof(struct ArrayObject)
1963           +(unsigned int)(length*elementsize);
1964   }
1965   GC_BAMBOO_DEBUGPRINT(0xe203);
1966   GC_BAMBOO_DEBUGPRINT_REG(origptr);
1967   GC_BAMBOO_DEBUGPRINT_REG(size);
1968   ALIGNSIZE(size, &isize);       // no matter is the obj marked or not
1969                                  // should be able to across
1970 #ifdef GC_TBL_DEBUG
1971   int sindex = OBJMAPPINGINDEX((unsigned int)bkptr);
1972   int eindex = OBJMAPPINGINDEX((unsigned int)(origptr));
1973   for(int tmpi = sindex+1; tmpi < eindex; tmpi++) {
1974         if((gcmappingtbl[tmpi] != 0) &&
1975                 (hostcore(gcbaseva+bamboo_baseobjsize*tmpi)==BAMBOO_NUM_OF_CORE) &&
1976                 (hostcore(gcbaseva+bamboo_baseobjsize*(tmpi+1))==BAMBOO_NUM_OF_CORE)) {
1977           tprintf("Error moveobj --: %x, %x, %x, %d, %x \n", (int)bkptr,
1978                   (int)origptr, (int)(gcbaseva+bamboo_baseobjsize*tmpi),
1979                   (int)gcmappingtbl[tmpi], (int)(*((char*)(bkptr))));
1980           BAMBOO_EXIT(0xb017);
1981         }
1982   }
1983 #endif
1984   if(((int *)(origptr))[BAMBOOMARKBIT] == MARKED) {
1985         unsigned int totop = (unsigned int)to->top;
1986         unsigned int tobound = (unsigned int)to->bound;
1987     GC_BAMBOO_DEBUGPRINT(0xe204);
1988 #ifdef GC_PROFILE
1989 #ifdef MGC_SPEC
1990         if((STARTUPCORE != BAMBOO_NUM_OF_CORE) || gc_profile_flag) {
1991 #endif
1992         gc_num_liveobj++;
1993 #ifdef MGC_SPEC
1994         }
1995 #endif
1996 #endif
1997     // marked obj, copy it to current heap top
1998     // check to see if remaining space is enough
1999     if((unsigned int)(totop + isize) > tobound) {
2000       // fill 0 indicating the end of this block
2001       BAMBOO_MEMSET_WH(to->ptr,  '\0', tobound - totop);
2002       // fill the header of this block and then go to next block
2003       to->offset += tobound - totop;
2004       BAMBOO_MEMSET_WH(to->base, '\0', BAMBOO_CACHE_LINE_SIZE);
2005       (*((int*)(to->base))) = to->offset;
2006 #ifdef GC_CACHE_ADAPT
2007           unsigned int tmp_ptr = to->ptr;
2008 #endif // GC_CACHE_ADAPT
2009       nextBlock(to);
2010 #ifdef GC_CACHE_ADAPT
2011           completePageConvert(orig, to, tmp_ptr, true);
2012 #endif // GC_CACHE_ADAPT
2013       if(stopblock == to->numblocks) {
2014                 // already fulfilled the block
2015 #ifdef GC_TBL_DEBUG
2016                 tprintf("CCCC %x \n", (int)(orig->ptr));
2017 #endif
2018                 return true;
2019       }   // if(stopblock == to->numblocks)
2020     }   // if(to->top + isize > to->bound)
2021     // set the mark field to 2, indicating that this obj has been moved
2022     // and need to be flushed
2023     ((int *)(origptr))[BAMBOOMARKBIT] = COMPACTED;
2024         unsigned int toptr = (unsigned int)to->ptr;
2025 #ifdef GC_TBL_DEBUG
2026         {
2027           // scan all pointers in ptr
2028           unsigned int * tt_pointer;
2029           tt_pointer=pointerarray[type];
2030           if (tt_pointer==0) {
2031                 /* Array of primitives */
2032                 /* Do nothing */
2033           } else if (((unsigned int)tt_pointer)==1) {
2034                 /* Array of pointers */
2035                 struct ArrayObject *ao=(struct ArrayObject *)(origptr);
2036                 int tt_length=ao->___length___;
2037                 int tt_j;
2038                 for(tt_j=0; tt_j<tt_length; tt_j++) {
2039                   void *objptr =
2040                         ((void **)(((char *)&ao->___length___)+sizeof(int)))[tt_j];
2041                   if((objptr != 0) &&
2042                           ((gcmappingtbl[OBJMAPPINGINDEX((unsigned int)objptr)] == 0) ||
2043                            (gcmappingtbl[OBJMAPPINGINDEX((unsigned int)objptr)] == 1))) {
2044                         tprintf("Error moveobj, missing live obj ++: %x, %x, %d, %d, %d, %d, %d, %d, %d, %d \n",
2045                                 (int)origptr, (int)objptr, __LINE__, tt_j,
2046                                 ((int *)(origptr))[0], ((int *)(objptr))[0],
2047                                 ((int *)(objptr))[BAMBOOMARKBIT],
2048                                 gcmappingtbl[OBJMAPPINGINDEX((unsigned int)objptr)],
2049                                 hostcore(objptr), BAMBOO_NUM_OF_CORE);
2050                         BAMBOO_EXIT(0xb018);
2051                   }
2052                 }
2053           } else {
2054                 unsigned int tt_size=tt_pointer[0];
2055                 int tt_i;
2056                 for(tt_i=1; tt_i<=tt_size; tt_i++) {
2057                   unsigned int tt_offset=tt_pointer[tt_i];
2058                   void * objptr=*((void **)(((char *)origptr)+tt_offset));
2059                   if((objptr!= 0) &&
2060                           ((gcmappingtbl[OBJMAPPINGINDEX((unsigned int)objptr)] == 0) ||
2061                            (gcmappingtbl[OBJMAPPINGINDEX((unsigned int)objptr)] == 1))) {
2062                         tprintf("Error moveobj, missing live obj ++: %x, %x, %d, %d, %d, %d, %d, %d, %d, %d \n",
2063                                 (int)origptr, (int)objptr, __LINE__, tt_i,
2064                                 ((int *)(origptr))[0], ((int *)(objptr))[0],
2065                                 ((int *)(objptr))[BAMBOOMARKBIT],
2066                                 gcmappingtbl[OBJMAPPINGINDEX((unsigned int)objptr)],
2067                                 hostcore(objptr), BAMBOO_NUM_OF_CORE);
2068                         BAMBOO_EXIT(0xb019);
2069                   }
2070                 }
2071           }     // if (pointer==0) else if ... else ...
2072           {
2073                   tt_pointer=pointerarray[OBJECTTYPE];
2074                   //handle object class
2075                   unsigned int tt_size=tt_pointer[0];
2076                   int tt_i;
2077                   for(tt_i=1; tt_i<=tt_size; tt_i++) {
2078                         unsigned int tt_offset=tt_pointer[i];
2079                         void * objptr=*((void **)(((char *)origptr)+tt_offset));
2080                         if((objptr!= 0) &&
2081                           ((gcmappingtbl[OBJMAPPINGINDEX((unsigned int)objptr)] == 0) ||
2082                            (gcmappingtbl[OBJMAPPINGINDEX((unsigned int)objptr)] == 1))) {
2083                           tprintf("Error moveobj, missing live obj ++: %x, %x, %d, %d, %d, %d, %d, %d, %d, %d \n",
2084                                   (int)origptr, (int)objptr, __LINE__, tt_i,
2085                                   ((int *)(origptr))[0], ((int *)(objptr))[0],
2086                                   ((int *)(objptr))[BAMBOOMARKBIT],
2087                                   gcmappingtbl[OBJMAPPINGINDEX((unsigned int)objptr)],
2088                                   hostcore(objptr), BAMBOO_NUM_OF_CORE);
2089                           BAMBOO_EXIT(0xb01a);
2090                         }
2091                   }
2092           }
2093         }
2094         if((unsigned int)(toptr) > (unsigned int)(origptr)) {
2095           tprintf("Error to->ptr > orig->ptr: %x, %x \n", (int)(toptr),
2096                   (int)(origptr));
2097           BAMBOO_EXIT(0xb01b);
2098         }
2099 #endif
2100     if(toptr != origptr) {
2101       if((unsigned int)(origptr) < (unsigned int)(toptr+size)) {
2102                 memmove(toptr, origptr, size);
2103       } else {
2104                 memcpy(toptr, origptr, size);
2105       }
2106       // fill the remaining space with -2
2107       BAMBOO_MEMSET_WH((unsigned int)(toptr+size), -2, isize-size);
2108     }
2109 #ifdef GC_TBL_DEBUG
2110         if((gcmappingtbl[OBJMAPPINGINDEX((unsigned int)origptr)] != 2)) {
2111           tprintf("Error moveobj: %x, %x, %d \n", (int)origptr,
2112                   ((int *)(origptr))[BAMBOOMARKBIT],
2113                   gcmappingtbl[OBJMAPPINGINDEX((unsigned int)origptr)]);
2114           BAMBOO_EXIT(0xb01c);
2115         }
2116 #endif
2117     // store mapping info
2118         gcmappingtbl[OBJMAPPINGINDEX((unsigned int)origptr)]=(unsigned int)toptr;
2119 #ifdef GC_TBL_DEBUG
2120         if(gcmappingtbl[OBJMAPPINGINDEX((unsigned int)origptr)] ==
2121                 gcmappingtbl[OBJMAPPINGINDEX((unsigned int)origptr)-1]) {
2122           tprintf("Error moveobj ++ : %x, %x, %d \n", (int)origptr, (int)toptr,
2123                   OBJMAPPINGINDEX((unsigned int)origptr));
2124           BAMBOO_EXIT(0xb01d);
2125         }
2126         // scan all pointers in ptr
2127         unsigned int * tt_pointer;
2128         tt_pointer=pointerarray[type];
2129         if (tt_pointer==0) {
2130           /* Array of primitives */
2131           /* Do nothing */
2132         } else if (((unsigned int)tt_pointer)==1) {
2133           /* Array of pointers */
2134           struct ArrayObject *ao=(struct ArrayObject *)(toptr);
2135           int tt_length=ao->___length___;
2136           int tt_j;
2137           for(tt_j=0; tt_j<tt_length; tt_j++) {
2138                 void *objptr =
2139                   ((void **)(((char *)&ao->___length___)+sizeof(int)))[tt_j];
2140                 if((objptr != 0) &&
2141                         (gcmappingtbl[OBJMAPPINGINDEX((unsigned int)objptr)] == 0)) {
2142                   tprintf("Error moveobj, missing live obj ++: %x, %x, %d, %d, %d, %d, %d, %d, %d, %d \n",
2143                           (int)origptr, (int)objptr, __LINE__, tt_i,
2144                           ((int *)(origptr))[0], ((int *)(objptr))[0],
2145                           ((int *)(objptr))[BAMBOOMARKBIT],
2146                           gcmappingtbl[OBJMAPPINGINDEX((unsigned int)objptr)],
2147                           hostcore(objptr), BAMBOO_NUM_OF_CORE);
2148                   BAMBOO_EXIT(0xb01e);
2149                 }
2150           }
2151         } else {
2152           unsigned int tt_size=tt_pointer[0];
2153           int tt_i;
2154           for(tt_i=1; tt_i<=tt_size; tt_i++) {
2155                 unsigned int tt_offset=tt_pointer[tt_i];
2156                 void * objptr=*((void **)(((char *)toptr)+tt_offset));
2157                 if((objptr != 0) &&
2158                         (gcmappingtbl[OBJMAPPINGINDEX((unsigned int)objptr)] == 0)) {
2159                   tprintf("Error moveobj, missing live obj ++: %x, %x, %d, %d, %d, %d, %d, %d, %d, %d \n",
2160                           (int)origptr, (int)objptr, __LINE__, tt_i,
2161                           ((int *)(origptr))[0], ((int *)(objptr))[0],
2162                           ((int *)(objptr))[BAMBOOMARKBIT],
2163                           gcmappingtbl[OBJMAPPINGINDEX((unsigned int)objptr)],
2164                           hostcore(objptr), BAMBOO_NUM_OF_CORE);
2165                   BAMBOO_EXIT(0xb01f);
2166                 }
2167           }
2168         }     // if (pointer==0) else if ... else ...
2169         {
2170                   tt_pointer=pointerarray[OBJECTTYPE];
2171                   //handle object class
2172                   unsigned int tt_size=tt_pointer[0];
2173                   int tt_i;
2174                   for(tt_i=1; tt_i<=tt_size; tt_i++) {
2175                         unsigned int tt_offset=tt_pointer[i];
2176                         void * objptr=*((void **)(((char *)origptr)+tt_offset));
2177                         if((objptr!= 0) &&
2178                           ((gcmappingtbl[OBJMAPPINGINDEX((unsigned int)objptr)] == 0) ||
2179                            (gcmappingtbl[OBJMAPPINGINDEX((unsigned int)objptr)] == 1))) {
2180                           tprintf("Error moveobj, missing live obj ++: %x, %x, %d, %d, %d, %d, %d, %d, %d, %d \n",
2181                                   (int)origptr, (int)objptr, __LINE__, tt_i,
2182                                   ((int *)(origptr))[0], ((int *)(objptr))[0],
2183                                   ((int *)(objptr))[BAMBOOMARKBIT],
2184                                   gcmappingtbl[OBJMAPPINGINDEX((unsigned int)objptr)],
2185                                   hostcore(objptr), BAMBOO_NUM_OF_CORE);
2186                           BAMBOO_EXIT(0xb020);
2187                         }
2188                   }
2189           }
2190         if(!ISSHAREDOBJ(toptr)) {
2191           tprintf("Error: %x, %x \n", (int)origptr, (int)toptr);
2192           BAMBOO_EXIT(0xb021);
2193         }
2194 #endif
2195         GC_BAMBOO_DEBUGPRINT(0xcdce);
2196     GC_BAMBOO_DEBUGPRINT_REG(origptr);
2197     GC_BAMBOO_DEBUGPRINT_REG(toptr);
2198     GC_BAMBOO_DEBUGPRINT_REG(isize);
2199     gccurr_heaptop -= isize;
2200     to->ptr += isize;
2201     to->offset += isize;
2202     to->top += isize;
2203 #ifdef GC_CACHE_ADAPT
2204         unsigned int tmp_ptr = to->ptr;
2205 #endif // GC_CACHE_ADAPT
2206     if(to->top == to->bound) {
2207       // fill the header of this block and then go to next block
2208       BAMBOO_MEMSET_WH(to->base, '\0', BAMBOO_CACHE_LINE_SIZE);
2209       (*((int*)(to->base))) = to->offset;
2210       nextBlock(to);
2211     }
2212 #ifdef GC_CACHE_ADAPT
2213         completePageConvert(orig, to, tmp_ptr, true);
2214 #endif // GC_CACHE_ADAPT
2215   } // if(mark == 1)
2216 #ifdef GC_TBL_DEBUG
2217   else {
2218         // skip the whole obj
2219         int sindex = OBJMAPPINGINDEX((unsigned int)origptr);
2220         int eindex = OBJMAPPINGINDEX((unsigned int)(origptr+size));
2221         for(int tmpi = sindex; tmpi < eindex; tmpi++) {
2222           if((gcmappingtbl[tmpi] != 0) &&
2223                   (hostcore(gcbaseva+bamboo_baseobjsize*tmpi)==BAMBOO_NUM_OF_CORE) &&
2224                   (hostcore(gcbaseva+bamboo_baseobjsize*(tmpi+1))==BAMBOO_NUM_OF_CORE))
2225           {
2226                 tprintf("Error moveobj **: %x, %x, %x, %d, (%d, %d, %x) \n",
2227                         (int)origptr, (int)(origptr+isize),
2228                         (int)(gcbaseva+bamboo_baseobjsize*tmpi), gcmappingtbl[tmpi], type,
2229                         isize, ((int *)(origptr))[BAMBOOMARKBIT]);
2230                 BAMBOO_EXIT(0xb022);
2231           }
2232         }
2233   }
2234 #endif
2235   GC_BAMBOO_DEBUGPRINT(0xe205);
2236
2237   // move to next obj
2238   orig->ptr += isize; // size;
2239
2240 #ifdef GC_TBL_DEBUG
2241   if(!ISSHAREDOBJ(orig->ptr) || !ISSHAREDOBJ(to->ptr)) {
2242         tprintf("Error moveobj out of boundary: %x, %x, %d, %d \n",
2243                 (int)(orig->ptr), (int)(to->ptr), size, isize);
2244         BAMBOO_EXIT(0x2022);
2245   }
2246 #endif
2247
2248   GC_BAMBOO_DEBUGPRINT_REG(isize);
2249   GC_BAMBOO_DEBUGPRINT_REG(size);
2250   GC_BAMBOO_DEBUGPRINT_REG(orig->ptr);
2251   GC_BAMBOO_DEBUGPRINT_REG(orig->bound);
2252   if(((unsigned int)(orig->ptr) > (unsigned int)(orig->bound))
2253           || ((unsigned int)(orig->ptr) == (unsigned int)(orig->blockbound))) {
2254     GC_BAMBOO_DEBUGPRINT(0xe206);
2255     if(!nextSBlock(orig)) {
2256       // finished, no more data
2257 #ifdef GC_TBL_DEBUG
2258           tprintf("DDDD %x \n", (int)(orig->ptr));
2259 #endif
2260       return true;
2261     }
2262   }
2263   GC_BAMBOO_DEBUGPRINT(0xe207);
2264   GC_BAMBOO_DEBUGPRINT_REG(orig->ptr);
2265   return false;
2266 } //bool moveobj(struct moveHelper* orig,struct moveHelper* to,int* endaddr)
2267
2268 // should be invoked with interrupt closed
2269 inline int assignSpareMem_I(unsigned int sourcecore,
2270                             unsigned int * requiredmem,
2271                             unsigned int * tomove,
2272                             unsigned int * startaddr) {
2273   unsigned int b = 0;
2274   BLOCKINDEX(gcloads[sourcecore], &b);
2275   unsigned int boundptr = (b<NUMCORES4GC) ? ((b+1)*BAMBOO_SMEM_SIZE_L)
2276                  : (BAMBOO_LARGE_SMEM_BOUND+(b-NUMCORES4GC+1)*BAMBOO_SMEM_SIZE);
2277   unsigned int remain = boundptr - gcloads[sourcecore];
2278   unsigned int memneed = requiredmem + BAMBOO_CACHE_LINE_SIZE;
2279   *startaddr = gcloads[sourcecore];
2280   *tomove = gcfilledblocks[sourcecore] + 1;
2281   if(memneed < remain) {
2282     gcloads[sourcecore] += memneed;
2283     return 0;
2284   } else {
2285     // next available block
2286     gcfilledblocks[sourcecore] += 1;
2287     unsigned int newbase = 0;
2288     BASEPTR(sourcecore, gcfilledblocks[sourcecore], &newbase);
2289     gcloads[sourcecore] = newbase;
2290     return requiredmem-remain;
2291   }
2292 } // int assignSpareMem_I(int ,int * , int * , int * )
2293
2294 // should be invoked with interrupt closed
2295 inline bool gcfindSpareMem_I(unsigned int * startaddr,
2296                              unsigned int * tomove,
2297                              unsigned int * dstcore,
2298                              unsigned int requiredmem,
2299                              unsigned int requiredcore) {
2300   for(int k = 0; k < NUMCORES4GC; k++) {
2301     if((gccorestatus[k] == 0) && (gcfilledblocks[k] < gcstopblock[k])) {
2302       // check if this stopped core has enough mem
2303       assignSpareMem_I(k, requiredmem, tomove, startaddr);
2304       *dstcore = k;
2305       return true;
2306     }
2307   }
2308   // if can not find spare mem right now, hold the request
2309   gcrequiredmems[requiredcore] = requiredmem;
2310   gcmovepending++;
2311   return false;
2312 } //bool gcfindSpareMem_I(int* startaddr,int* tomove,int mem,int core)
2313
2314 inline bool compacthelper(struct moveHelper * orig,
2315                           struct moveHelper * to,
2316                           int * filledblocks,
2317                           unsigned int * heaptopptr,
2318                           bool * localcompact) {
2319   // scan over all objs in this block, compact the marked objs
2320   // loop stop when finishing either scanning all active objs or
2321   // fulfilled the gcstopblock
2322   GC_BAMBOO_DEBUGPRINT(0xe101);
2323   GC_BAMBOO_DEBUGPRINT_REG(gcblock2fill);
2324   GC_BAMBOO_DEBUGPRINT_REG(gcmarkedptrbound);
2325 innercompact:
2326   while((unsigned int)(orig->ptr) < (unsigned int)gcmarkedptrbound) {
2327     bool stop = moveobj(orig, to, gcblock2fill);
2328     if(stop) {
2329       break;
2330     }
2331   }
2332 #ifdef GC_TBL_DEBUG
2333   tprintf("finish mark %x \n", (int)gcmarkedptrbound);
2334 #endif
2335 #ifdef GC_CACHE_ADAPT
2336   // end of an to page, wrap up its information
2337   samplingDataConvert(to->ptr);
2338 #endif // GC_CACHE_ADAPT
2339   // if no objs have been compact, do nothing,
2340   // otherwise, fill the header of this block
2341   if(to->offset > (unsigned int)BAMBOO_CACHE_LINE_SIZE) {
2342     BAMBOO_MEMSET_WH(to->base, '\0', BAMBOO_CACHE_LINE_SIZE);
2343     (*((int*)(to->base))) = to->offset;
2344   } else {
2345     to->offset = 0;
2346     to->ptr = to->base;
2347     to->top -= BAMBOO_CACHE_LINE_SIZE;
2348   }  // if(to->offset > BAMBOO_CACHE_LINE_SIZE) else ...
2349   if(*localcompact) {
2350     *heaptopptr = to->ptr;
2351     *filledblocks = to->numblocks;
2352   }
2353   GC_BAMBOO_DEBUGPRINT(0xe102);
2354   GC_BAMBOO_DEBUGPRINT_REG(orig->ptr);
2355   GC_BAMBOO_DEBUGPRINT_REG(gcmarkedptrbound);
2356   GC_BAMBOO_DEBUGPRINT_REG(*heaptopptr);
2357   GC_BAMBOO_DEBUGPRINT_REG(*filledblocks);
2358   GC_BAMBOO_DEBUGPRINT_REG(gccurr_heaptop);
2359
2360   // send msgs to core coordinator indicating that the compact is finishing
2361   // send compact finish message to core coordinator
2362   if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
2363     gcfilledblocks[BAMBOO_NUM_OF_CORE] = *filledblocks;
2364     gcloads[BAMBOO_NUM_OF_CORE] = *heaptopptr;
2365     if((unsigned int)(orig->ptr) < (unsigned int)gcmarkedptrbound) {
2366       GC_BAMBOO_DEBUGPRINT(0xe103);
2367       // ask for more mem
2368       gctomove = false;
2369       BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2370       if(gcfindSpareMem_I(&gcmovestartaddr, &gcblock2fill, &gcdstcore,
2371                           gccurr_heaptop, BAMBOO_NUM_OF_CORE)) {
2372                 GC_BAMBOO_DEBUGPRINT(0xe104);
2373                 gctomove = true;
2374       } else {
2375                 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2376                 GC_BAMBOO_DEBUGPRINT(0xe105);
2377                 return false;
2378       }
2379       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2380     } else {
2381       GC_BAMBOO_DEBUGPRINT(0xe106);
2382       gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
2383       gctomove = false;
2384       return true;
2385     }
2386   } else {
2387     if((unsigned int)(orig->ptr) < (unsigned int)gcmarkedptrbound) {
2388       GC_BAMBOO_DEBUGPRINT(0xe107);
2389       // ask for more mem
2390       gctomove = false;
2391       send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE,
2392                  *filledblocks, *heaptopptr, gccurr_heaptop, false);
2393     } else {
2394       GC_BAMBOO_DEBUGPRINT(0xe108);
2395       GC_BAMBOO_DEBUGPRINT_REG(*heaptopptr);
2396       // finish compacting
2397       send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE,
2398                  *filledblocks, *heaptopptr, 0, false);
2399     }
2400   }       // if(STARTUPCORE == BAMBOO_NUM_OF_CORE)
2401
2402   if(orig->ptr < gcmarkedptrbound) {
2403     GC_BAMBOO_DEBUGPRINT(0xe109);
2404     // still have unpacked obj
2405     while(true) {
2406       if(gctomove) {
2407                 break;
2408       }
2409     }
2410     ;
2411         gctomove = false;
2412     GC_BAMBOO_DEBUGPRINT(0xe10a);
2413
2414     to->ptr = gcmovestartaddr;
2415     to->numblocks = gcblock2fill - 1;
2416     to->bound = (to->numblocks==0) ?
2417                 BAMBOO_SMEM_SIZE_L :
2418                 BAMBOO_SMEM_SIZE_L+BAMBOO_SMEM_SIZE*to->numblocks;
2419     BASEPTR(gcdstcore, to->numblocks, &(to->base));
2420     to->offset = to->ptr - to->base;
2421     to->top = (to->numblocks==0) ?
2422               (to->offset) : (to->bound-BAMBOO_SMEM_SIZE+to->offset);
2423     to->base = to->ptr;
2424     to->offset = BAMBOO_CACHE_LINE_SIZE;
2425     to->ptr += to->offset;   // for header
2426     to->top += to->offset;
2427     if(gcdstcore == BAMBOO_NUM_OF_CORE) {
2428       *localcompact = true;
2429     } else {
2430       *localcompact = false;
2431     }
2432 #ifdef GC_CACHE_ADAPT
2433         // initialize the gc_cache_revise_information
2434         gc_cache_revise_infomation.to_page_start_va = (unsigned int)to->ptr;
2435         gc_cache_revise_infomation.to_page_end_va = gcbaseva+(BAMBOO_PAGE_SIZE)
2436           *(((unsigned int)(to->base)-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
2437         gc_cache_revise_infomation.to_page_index =
2438           ((unsigned int)(to->base)-gcbaseva)/(BAMBOO_PAGE_SIZE);
2439         gc_cache_revise_infomation.orig_page_start_va = orig->ptr;
2440         gc_cache_revise_infomation.orig_page_end_va = gcbaseva+(BAMBOO_PAGE_SIZE)
2441           *(((unsigned int)(orig->ptr)-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
2442         gc_cache_revise_infomation.orig_page_index =
2443           ((unsigned int)(orig->blockbase)-gcbaseva)/(BAMBOO_PAGE_SIZE);
2444 #endif // GC_CACHE_ADAPT
2445     goto innercompact;
2446   }
2447   GC_BAMBOO_DEBUGPRINT(0xe10b);
2448   return true;
2449 } // void compacthelper()
2450
2451 inline void compact() {
2452   if(COMPACTPHASE != gcphase) {
2453     BAMBOO_EXIT(0xb023);
2454   }
2455
2456   // initialize pointers for comapcting
2457   struct moveHelper * orig =
2458     (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
2459   struct moveHelper * to =
2460     (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
2461   if(!initOrig_Dst(orig, to)) {
2462     // no available data to compact
2463     // send compact finish msg to STARTUP core
2464     GC_BAMBOO_DEBUGPRINT(0xe001);
2465     GC_BAMBOO_DEBUGPRINT_REG(to->base);
2466     send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE,
2467                0, to->base, 0, false);
2468     RUNFREE(orig);
2469     RUNFREE(to);
2470     return;
2471   }
2472 #ifdef GC_CACHE_ADAPT
2473   gc_cache_revise_infomation.orig_page_start_va = (unsigned int)orig->ptr;
2474   gc_cache_revise_infomation.orig_page_end_va = gcbaseva+(BAMBOO_PAGE_SIZE)
2475         *(((unsigned int)(orig->ptr)-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
2476   gc_cache_revise_infomation.orig_page_index =
2477         ((unsigned int)(orig->blockbase)-gcbaseva)/(BAMBOO_PAGE_SIZE);
2478 #endif // GC_CACHE_ADAPT
2479
2480   unsigned int filledblocks = 0;
2481   unsigned int heaptopptr = 0;
2482   bool localcompact = true;
2483   compacthelper(orig, to, &filledblocks, &heaptopptr, &localcompact);
2484   RUNFREE(orig);
2485   RUNFREE(to);
2486 } // compact()
2487
2488 // if return NULL, means
2489 //   1. objptr is NULL
2490 //   2. objptr is not a shared obj
2491 // in these cases, remain the original value is OK
2492 #ifdef GC_TBL_DEBUG
2493 inline void * flushObj(void * objptr, int linenum, void * ptr, int tt) {
2494 #else
2495 inline void * flushObj(void * objptr) {
2496 #endif
2497   GC_BAMBOO_DEBUGPRINT(0xe401);
2498   if(objptr == NULL) {
2499     return NULL;
2500   }
2501   void * dstptr = NULL;
2502   if(ISSHAREDOBJ(objptr)) {
2503     GC_BAMBOO_DEBUGPRINT(0xe402);
2504     GC_BAMBOO_DEBUGPRINT_REG(objptr);
2505     // a shared obj ptr, change to new address
2506         dstptr = gcmappingtbl[OBJMAPPINGINDEX((unsigned int)objptr)];
2507     GC_BAMBOO_DEBUGPRINT_REG(dstptr);
2508 #ifdef GC_TBL_DEBUG
2509         if(ISSHAREDOBJ(dstptr) && ((unsigned int)(((int*)dstptr)[0]) >= (unsigned int)NUMTYPES)) {
2510           tprintf("Error flushObj  ** : %x, %x, %d, %d, %d, %d, %x, %x, %x, %d, %x, %d %d \n",
2511                   (int)objptr, (int)dstptr, ((int*)dstptr)[0], hostcore(objptr),
2512                   hostcore(objptr)==BAMBOO_NUM_OF_CORE,
2513                   OBJMAPPINGINDEX((unsigned int)objptr), (int)gcmappingtbl,
2514                   &(gcmappingtbl[OBJMAPPINGINDEX((unsigned int)objptr)]),
2515                   (int)gcbaseva, linenum, (int)ptr, ((int*)ptr)[0], tt);
2516           BAMBOO_EXIT(0xb024);
2517         }
2518 #endif
2519
2520     if(!ISSHAREDOBJ(dstptr)) {
2521 #ifdef GC_TBL_DEBUG
2522           tprintf("Error flushObj  ++ : %x, %x, %d, %d, %d, %x, %x, %x, %d, %x, %d %d \n",
2523                   (int)objptr, (int)dstptr, hostcore(objptr),
2524                   hostcore(objptr)==BAMBOO_NUM_OF_CORE,
2525                   OBJMAPPINGINDEX((unsigned int)objptr), (int)gcmappingtbl,
2526                   &(gcmappingtbl[OBJMAPPINGINDEX((unsigned int)objptr)]),
2527                   (int)gcbaseva, linenum, (int)ptr, ((int*)ptr)[0], tt);
2528           tprintf("gcmappingtbl: \n");
2529           int tmp = OBJMAPPINGINDEX((unsigned int)objptr) - 50;
2530           for(int jj = 0; jj < 100; jj+=10) {
2531                 tprintf("%8x, %8x, %8x, %8x, %8x, %8x, %8x, %8x, %8x, %8x, %d \n",
2532                         (int)gcmappingtbl[tmp++], (int)gcmappingtbl[tmp++],
2533                         (int)gcmappingtbl[tmp++], (int)gcmappingtbl[tmp++],
2534                         (int)gcmappingtbl[tmp++], (int)gcmappingtbl[tmp++],
2535                         (int)gcmappingtbl[tmp++], (int)gcmappingtbl[tmp++],
2536                         (int)gcmappingtbl[tmp++], (int)gcmappingtbl[tmp++], tmp);
2537           }
2538           BAMBOO_EXIT(0xb025);
2539 #else
2540       // no mapping info
2541       GC_BAMBOO_DEBUGPRINT(0xe403);
2542       GC_BAMBOO_DEBUGPRINT_REG(objptr);
2543       GC_BAMBOO_DEBUGPRINT_REG(hostcore(objptr));
2544           // error! the obj is right on this core, but cannot find it
2545           GC_BAMBOO_DEBUGPRINT_REG(objptr);
2546           tprintf("Error flushObj  ++ : %x, %x, %d, %d, %x, %x, %x, %x\n",
2547                   (int)objptr, (int)dstptr, hostcore(objptr),
2548                   hostcore(objptr)==BAMBOO_NUM_OF_CORE,
2549                   OBJMAPPINGINDEX((unsigned int)objptr), (int)gcmappingtbl,
2550                   &(gcmappingtbl[OBJMAPPINGINDEX((unsigned int)objptr)]),
2551                   (int)gcbaseva);
2552           BAMBOO_EXIT(0xb026);
2553 #endif
2554     }  // if(NULL == dstptr)
2555   }   // if(ISSHAREDOBJ(objptr))
2556 #ifdef GC_TBL_DEBUG
2557   else {
2558         tprintf("Error flushObj: %x \n", (int)objptr);
2559         BAMBOO_EXIT(0xb027);
2560   }
2561 #endif
2562   // if not a shared obj, return NULL to indicate no need to flush
2563   GC_BAMBOO_DEBUGPRINT(0xe404);
2564   return dstptr;
2565 } // void flushObj(void * objptr)
2566
2567 inline void flushRuntimeObj(struct garbagelist * stackptr) {
2568   int i,j;
2569   // flush current stack
2570   while(stackptr!=NULL) {
2571     for(i=0; i<stackptr->size; i++) {
2572       if(stackptr->array[i] != NULL) {
2573 #ifdef GC_TBL_DEBUG
2574                 void * dst = flushObj(stackptr->array[i],
2575                         __LINE__, stackptr->array[i], i);
2576 #else
2577                 void * dst = flushObj(stackptr->array[i]);
2578 #endif
2579                 if(dst != NULL) {
2580                   stackptr->array[i] = dst;
2581                 }
2582       }
2583     }
2584     stackptr=stackptr->next;
2585   }
2586
2587   // flush static pointers global_defs_p
2588   if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
2589         struct garbagelist * staticptr=(struct garbagelist *)global_defs_p;
2590         for(i=0; i<staticptr->size; i++) {
2591           if(staticptr->array[i] != NULL) {
2592 #ifdef GC_TBL_DEBUG
2593                 void * dst = flushObj(staticptr->array[i],
2594                         __LINE__, staticptr->array[i], i);
2595 #else
2596                 void * dst = flushObj(staticptr->array[i]);
2597 #endif
2598                 if(dst != NULL) {
2599                   staticptr->array[i] = dst;
2600                 }
2601           }
2602         }
2603   }
2604
2605 #ifdef TASK
2606   // flush objectsets
2607   if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
2608     for(i=0; i<NUMCLASSES; i++) {
2609       struct parameterwrapper ** queues =
2610         objectqueues[BAMBOO_NUM_OF_CORE][i];
2611       int length = numqueues[BAMBOO_NUM_OF_CORE][i];
2612       for(j = 0; j < length; ++j) {
2613                 struct parameterwrapper * parameter = queues[j];
2614                 struct ObjectHash * set=parameter->objectset;
2615                 struct ObjectNode * ptr=set->listhead;
2616                 while(ptr!=NULL) {
2617 #ifdef GC_TBL_DEBUG
2618                   void * dst = flushObj((void *)ptr->key,
2619                           __LINE__, (void *)ptr->key, 0);
2620 #else
2621                   void * dst = flushObj((void *)ptr->key);
2622 #endif
2623                   if(dst != NULL) {
2624                         ptr->key = dst;
2625                   }
2626                   ptr=ptr->lnext;
2627                 }
2628                 ObjectHashrehash(set);
2629       }
2630     }
2631   }
2632
2633   // flush current task descriptor
2634   if(currtpd != NULL) {
2635     for(i=0; i<currtpd->numParameters; i++) {
2636 #ifdef GC_TBL_DEBUG
2637           void * dst = flushObj(currtpd->parameterArray[i],
2638                   __LINE__, currtpd->parameterArray[i], i);
2639 #else
2640       void * dst = flushObj(currtpd->parameterArray[i]);
2641 #endif
2642       if(dst != NULL) {
2643                 currtpd->parameterArray[i] = dst;
2644       }
2645     }
2646   }
2647
2648   // flush active tasks
2649   if(activetasks != NULL) {
2650     struct genpointerlist * ptr=activetasks->list;
2651     while(ptr!=NULL) {
2652       struct taskparamdescriptor *tpd=ptr->src;
2653       int i;
2654       for(i=0; i<tpd->numParameters; i++) {
2655 #ifdef GC_TBL_DEBUG
2656                 void * dst = flushObj(tpd->parameterArray[i],
2657                         __LINE__, tpd->parameterArray[i], i);
2658 #else
2659                 void * dst = flushObj(tpd->parameterArray[i]);
2660 #endif
2661                 if(dst != NULL) {
2662                   tpd->parameterArray[i] = dst;
2663                 }
2664       }
2665       ptr=ptr->inext;
2666     }
2667     genrehash(activetasks);
2668   }
2669
2670   // flush cached transferred obj
2671   struct QueueItem * tmpobjptr =  getHead(&objqueue);
2672   while(tmpobjptr != NULL) {
2673     struct transObjInfo * objInfo =
2674       (struct transObjInfo *)(tmpobjptr->objectptr);
2675 #ifdef GC_TBL_DEBUG
2676         void * dst = flushObj(objInfo->objptr, __LINE__,
2677                 objInfo->objptr, 0);
2678 #else
2679     void * dst = flushObj(objInfo->objptr);
2680 #endif
2681     if(dst != NULL) {
2682       objInfo->objptr = dst;
2683     }
2684     tmpobjptr = getNextQueueItem(tmpobjptr);
2685   }
2686
2687   // flush cached objs to be transferred
2688   struct QueueItem * item = getHead(totransobjqueue);
2689   while(item != NULL) {
2690     struct transObjInfo * totransobj =
2691       (struct transObjInfo *)(item->objectptr);
2692 #ifdef GC_TBL_DEBUG
2693         void * dst = flushObj(totransobj->objptr, __LINE__,
2694                 totransobj->objptr, 0);
2695 #else
2696     void * dst = flushObj(totransobj->objptr);
2697 #endif
2698     if(dst != NULL) {
2699       totransobj->objptr = dst;
2700     }
2701     item = getNextQueueItem(item);
2702   }  // while(item != NULL)
2703
2704   // enqueue lock related info
2705   for(i = 0; i < runtime_locklen; ++i) {
2706 #ifdef GC_TBL_DEBUG
2707         void * dst = flushObj(runtime_locks[i].redirectlock,
2708                 __LINE__, runtime_locks[i].redirectlock, i);
2709 #else
2710     void * dst = flushObj(runtime_locks[i].redirectlock);
2711 #endif
2712     if(dst != NULL) {
2713       runtime_locks[i].redirectlock = (int)dst;
2714     }
2715     if(runtime_locks[i].value != NULL) {
2716 #ifdef GC_TBL_DEBUG
2717           void * dst=flushObj(runtime_locks[i].value,
2718                   __LINE__, runtime_locks[i].value, i);
2719 #else
2720       void * dst=flushObj(runtime_locks[i].value);
2721 #endif
2722       if(dst != NULL) {
2723                 runtime_locks[i].value = (int)dst;
2724       }
2725     }
2726   }
2727 #endif
2728
2729 #ifdef MGC
2730   // flush the bamboo_threadlocks
2731   for(i = 0; i < bamboo_threadlocks.index; i++) {
2732 #ifdef GC_TBL_DEBUG
2733         void * dst = flushObj((void *)(bamboo_threadlocks.locks[i].object),
2734                         __LINE__, (void *)(bamboo_threadlocks.locks[i].object), i);
2735 #else
2736         void * dst = flushObj((void *)(bamboo_threadlocks.locks[i].object));
2737 #endif
2738         if(dst != NULL) {
2739           bamboo_threadlocks.locks[i].object = (struct ___Object___ *)dst;
2740         }
2741   }
2742
2743   // flush the bamboo_current_thread
2744   if(bamboo_current_thread != 0) {
2745 #ifdef GC_TBL_DEBUG
2746         bamboo_current_thread =
2747           (unsigned int)(flushObj((void *)bamboo_current_thread,
2748                         __LINE__, (void *)bamboo_current_thread, 0));
2749 #else
2750         bamboo_current_thread =
2751           (unsigned int)(flushObj((void *)bamboo_current_thread));
2752 #endif
2753   }
2754
2755   // flush global thread queue
2756   if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
2757         unsigned int thread_counter = *((unsigned int*)(bamboo_thread_queue+1));
2758         if(thread_counter > 0) {
2759           unsigned int start = *((unsigned int*)(bamboo_thread_queue+2));
2760           for(i = thread_counter; i > 0; i--) {
2761 #ifdef GC_TBL_DEBUG
2762                 bamboo_thread_queue[4+start] =
2763                   (INTPTR)(flushObj((void *)bamboo_thread_queue[4+start
2764                                 ], __LINE__, (void *)bamboo_thread_queue, 0));
2765 #else
2766                 bamboo_thread_queue[4+start] =
2767                   (INTPTR)(flushObj((void *)bamboo_thread_queue[4+start]));
2768 #endif
2769                 start = (start+1)&bamboo_max_thread_num_mask;
2770           }
2771         }
2772         unlockthreadqueue();
2773   }
2774 #endif
2775 } // void flushRuntimeObj(struct garbagelist * stackptr)
2776
2777 inline void flush(struct garbagelist * stackptr) {
2778
2779   flushRuntimeObj(stackptr);
2780
2781   while(true) {
2782     BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2783     bool hasItems = gc_moreItems_I();
2784     BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2785     if(!hasItems) {
2786       break;
2787     }
2788
2789     GC_BAMBOO_DEBUGPRINT(0xe301);
2790     BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2791     void * ptr = gc_dequeue_I();
2792 #ifdef GC_TBL_DEBUG
2793     unsigned int bkptr = (unsigned int)ptr;
2794 #endif
2795     BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2796     if(ISSHAREDOBJ(ptr)) {
2797       // should be a local shared obj and should have mapping info
2798 #ifdef GC_TBL_DEBUG
2799           ptr = flushObj(ptr, __LINE__, ptr, 0);
2800 #else
2801       ptr = flushObj(ptr);
2802 #endif
2803       GC_BAMBOO_DEBUGPRINT(0xe302);
2804       GC_BAMBOO_DEBUGPRINT_REG(ptr);
2805       if(ptr == NULL) {
2806                 BAMBOO_EXIT(0xb028);
2807       }
2808     } // if(ISSHAREDOBJ(ptr))
2809     if((!ISSHAREDOBJ(ptr))||(((int *)(ptr))[BAMBOOMARKBIT] == COMPACTED)) {
2810       int type = ((int *)(ptr))[0];
2811 #ifdef GC_TBL_DEBUG
2812           if((unsigned int)type >= (unsigned int)NUMTYPES) {
2813                 tprintf("Error flushObj  %x, %x, %d, %d \n", bkptr, (int)ptr, type,
2814                         ((int *)(ptr))[BAMBOOMARKBIT]);
2815                 BAMBOO_EXIT(0xb029);
2816           }
2817 #endif
2818       // scan all pointers in ptr
2819       unsigned int * pointer;
2820       pointer=pointerarray[type];
2821       GC_BAMBOO_DEBUGPRINT(0xe303);
2822       GC_BAMBOO_DEBUGPRINT_REG(pointer);
2823       if (pointer==0) {
2824                 /* Array of primitives */
2825                 /* Do nothing */
2826       } else if (((unsigned int)pointer)==1) {
2827                 GC_BAMBOO_DEBUGPRINT(0xe304);
2828                 /* Array of pointers */
2829                 struct ArrayObject *ao=(struct ArrayObject *) ptr;
2830                 int length=ao->___length___;
2831                 int j;
2832                 for(j=0; j<length; j++) {
2833                   GC_BAMBOO_DEBUGPRINT(0xe305);
2834                   void *objptr=
2835                         ((void **)(((char *)&ao->___length___)+sizeof(int)))[j];
2836                   GC_BAMBOO_DEBUGPRINT_REG(objptr);
2837                   if(objptr != NULL) {
2838 #ifdef GC_TBL_DEBUG
2839                         void * dst = flushObj(objptr, __LINE__, ptr, j);
2840 #else
2841                         void * dst = flushObj(objptr);
2842 #endif
2843                         if(dst != NULL) {
2844                           ((void **)(((char *)&ao->___length___)+sizeof(int)))[j] = dst;
2845                         }
2846                   }
2847                 }
2848       } else {
2849                 GC_BAMBOO_DEBUGPRINT(0xe306);
2850                 unsigned int size=pointer[0];
2851                 int i;
2852                 for(i=1; i<=size; i++) {
2853                   GC_BAMBOO_DEBUGPRINT(0xe307);
2854                   unsigned int offset=pointer[i];
2855                   void * objptr=*((void **)(((char *)ptr)+offset));
2856                   GC_BAMBOO_DEBUGPRINT_REG(objptr);
2857                   if(objptr != NULL) {
2858 #ifdef GC_TBL_DEBUG
2859                         void * dst = flushObj(objptr, __LINE__, ptr, i);
2860 #else
2861                         void * dst = flushObj(objptr);
2862 #endif
2863                         if(dst != NULL) {
2864                           *((void **)(((char *)ptr)+offset)) = dst;
2865                         }
2866                   }
2867                 } // for(i=1; i<=size; i++)
2868       }  // if (pointer==0) else if (((INTPTR)pointer)==1) else ()
2869           {
2870                 pointer=pointerarray[OBJECTTYPE];
2871                 //handle object class
2872                 unsigned int size=pointer[0];
2873                 int i;
2874                 for(i=1; i<=size; i++) {
2875                   unsigned int offset=pointer[i];
2876                   void * objptr=*((void **)(((char *)ptr)+offset));
2877                   if(objptr != NULL) {
2878 #ifdef GC_TBL_DEBUG
2879                         void * dst = flushObj(objptr, __LINE__, ptr, i);
2880 #else
2881                         void * dst = flushObj(objptr);
2882 #endif
2883                         if(dst != NULL) {
2884                           *((void **)(((char *)ptr)+offset)) = dst;
2885                         }
2886                   }
2887                 }
2888           }
2889       // restore the mark field, indicating that this obj has been flushed
2890       if(ISSHAREDOBJ(ptr)) {
2891                 ((int *)(ptr))[BAMBOOMARKBIT] = INIT;
2892       }
2893     }  //if((!ISSHAREDOBJ(ptr))||(((int *)(ptr))[BAMBOOMARKBIT] == COMPACTED))
2894   }   // while(gc_moreItems())
2895   GC_BAMBOO_DEBUGPRINT(0xe308);
2896
2897   // TODO bug here: the startup core contains all lobjs' info, thus all the
2898   // lobjs are flushed in sequence.
2899   // flush lobjs
2900   while(gc_lobjmoreItems_I()) {
2901     GC_BAMBOO_DEBUGPRINT(0xe309);
2902     void * ptr = gc_lobjdequeue_I(NULL, NULL);
2903 #ifdef GC_TBL_DEBUG
2904         ptr = flushObj(ptr, __LINE__, ptr, 0);
2905 #else
2906     ptr = flushObj(ptr);
2907 #endif
2908     GC_BAMBOO_DEBUGPRINT(0xe30a);
2909     GC_BAMBOO_DEBUGPRINT_REG(ptr);
2910     GC_BAMBOO_DEBUGPRINT_REG(tptr);
2911     GC_BAMBOO_DEBUGPRINT_REG(((int *)(tptr))[0]);
2912     if(ptr == NULL) {
2913       BAMBOO_EXIT(0xb02a);
2914     }
2915     if(((int *)(ptr))[BAMBOOMARKBIT] == COMPACTED) {
2916       int type = ((int *)(ptr))[0];
2917       // scan all pointers in ptr
2918       unsigned int * pointer;
2919       pointer=pointerarray[type];
2920       GC_BAMBOO_DEBUGPRINT(0xe30b);
2921       GC_BAMBOO_DEBUGPRINT_REG(pointer);
2922       if (pointer==0) {
2923                 /* Array of primitives */
2924                 /* Do nothing */
2925       } else if (((unsigned int)pointer)==1) {
2926                 GC_BAMBOO_DEBUGPRINT(0xe30c);
2927                 /* Array of pointers */
2928                 struct ArrayObject *ao=(struct ArrayObject *) ptr;
2929                 int length=ao->___length___;
2930                 int j;
2931                 for(j=0; j<length; j++) {
2932                   GC_BAMBOO_DEBUGPRINT(0xe30d);
2933                   void *objptr=
2934                         ((void **)(((char *)&ao->___length___)+sizeof(int)))[j];
2935                   GC_BAMBOO_DEBUGPRINT_REG(objptr);
2936                   if(objptr != NULL) {
2937 #ifdef GC_TBL_DEBUG
2938                         void * dst = flushObj(objptr, __LINE__, ptr, j);
2939 #else
2940                         void * dst = flushObj(objptr);
2941 #endif
2942                         if(dst != NULL) {
2943                           ((void **)(((char *)&ao->___length___)+sizeof(int)))[j] = dst;
2944                         }
2945                   }
2946                 }
2947       } else {
2948                 GC_BAMBOO_DEBUGPRINT(0xe30e);
2949                 unsigned int size=pointer[0];
2950                 int i;
2951                 for(i=1; i<=size; i++) {
2952                   GC_BAMBOO_DEBUGPRINT(0xe30f);
2953                   unsigned int offset=pointer[i];
2954                   void * objptr=*((void **)(((char *)ptr)+offset));
2955
2956                   GC_BAMBOO_DEBUGPRINT_REG(objptr);
2957                   if(objptr != NULL) {
2958 #ifdef GC_TBL_DEBUG
2959                         void * dst = flushObj(objptr, __LINE__, ptr, i);
2960 #else
2961                         void * dst = flushObj(objptr);
2962 #endif
2963                         if(dst != NULL) {
2964                           *((void **)(((char *)ptr)+offset)) = dst;
2965                         }
2966                   }
2967                 }  // for(i=1; i<=size; i++)
2968       }  // if (pointer==0) else if (((INTPTR)pointer)==1) else ()
2969           {
2970                 pointer=pointerarray[OBJECTTYPE];
2971                 //handle object class
2972                 unsigned int size=pointer[0];
2973                 int i;
2974                 for(i=1; i<=size; i++) {
2975                   unsigned int offset=pointer[i];
2976                   void * objptr=*((void **)(((char *)ptr)+offset));
2977                   if(objptr != NULL) {
2978 #ifdef GC_TBL_DEBUG
2979                         void * dst = flushObj(objptr, __LINE__, ptr, i);
2980 #else
2981                         void * dst = flushObj(objptr);
2982 #endif
2983                         if(dst != NULL) {
2984                           *((void **)(((char *)ptr)+offset)) = dst;
2985                         }
2986                   }
2987                 }
2988           }
2989       // restore the mark field, indicating that this obj has been flushed
2990       ((int *)(ptr))[BAMBOOMARKBIT] = INIT;
2991     }     // if(((int *)(ptr))[BAMBOOMARKBIT] == COMPACTED)
2992   }     // while(gc_lobjmoreItems())
2993   GC_BAMBOO_DEBUGPRINT(0xe310);
2994
2995   // send flush finish message to core coordinator
2996   if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
2997     gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
2998   } else {
2999     send_msg_2(STARTUPCORE, GCFINISHFLUSH, BAMBOO_NUM_OF_CORE, false);
3000   }
3001   GC_BAMBOO_DEBUGPRINT(0xe311);
3002 } // flush()
3003
3004 #ifdef GC_CACHE_ADAPT
3005 // prepare for cache adaption:
3006 //   -- flush the shared heap
3007 //   -- clean dtlb entries
3008 //   -- change cache strategy
3009 void cacheAdapt_gc(bool isgccachestage) {
3010   // flush the shared heap
3011   BAMBOO_CACHE_FLUSH_L2();
3012
3013   // clean the dtlb entries
3014   BAMBOO_CLEAN_DTLB();
3015
3016   // change the cache strategy
3017   gccachestage = isgccachestage;
3018 } // cacheAdapt_gc(bool isgccachestage)
3019
3020 // the master core decides how to adapt cache strategy for the mutator
3021 // according to collected statistic data
3022
3023 // make all pages hfh
3024 int cacheAdapt_policy_h4h(){
3025   unsigned int page_index = 0;
3026   VA page_sva = 0;
3027   unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
3028   unsigned int numchanged = 0;
3029   int * tmp_p = gccachepolicytbl+1;
3030   for(page_index = 0; page_index < page_num; page_index++) {
3031         page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
3032         bamboo_cache_policy_t policy = {0};
3033         policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
3034         *tmp_p = page_index;
3035         tmp_p++;
3036         *tmp_p = policy.word;
3037         tmp_p++;
3038         numchanged++;
3039   }
3040
3041   return numchanged;
3042 } // int cacheAdapt_policy_hfh()
3043
3044 // make all pages local as non-cache-adaptable gc local mode
3045 int cacheAdapt_policy_local(){
3046   unsigned int page_index = 0;
3047   VA page_sva = 0;
3048   unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
3049   unsigned int numchanged = 0;
3050   int * tmp_p = gccachepolicytbl+1;
3051   for(page_index = 0; page_index < page_num; page_index++) {
3052         page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
3053         bamboo_cache_policy_t policy = {0};
3054         unsigned int block = 0;
3055         BLOCKINDEX(page_sva, &block);
3056         unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
3057         // locally cache the page in the hotest core
3058         // NOTE: (x,y) should be changed to (x+1, y+1)!!!
3059         policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
3060         policy.lotar_x = bamboo_cpu2coords[2*coren]+1;
3061         policy.lotar_y = bamboo_cpu2coords[2*coren+1]+1;
3062         *tmp_p = page_index;
3063         tmp_p++;
3064         *tmp_p = policy.word;
3065         tmp_p++;
3066         numchanged++;
3067   }
3068
3069   return numchanged;
3070 } // int cacheAdapt_policy_local()
3071
3072 int cacheAdapt_policy_hotest(){
3073   unsigned int page_index = 0;
3074   VA page_sva = 0;
3075   unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
3076   unsigned int numchanged = 0;
3077   int * tmp_p = gccachepolicytbl+1;
3078   for(page_index = 0; page_index < page_num; page_index++) {
3079         page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
3080         bamboo_cache_policy_t policy = {0};
3081         unsigned int hotestcore = 0;
3082         unsigned int hotfreq = 0;
3083
3084         int *local_tbl=&gccachesamplingtbl_r[page_index];
3085         for(int i = 0; i < NUMCORESACTIVE; i++) {
3086           int freq = *local_tbl;
3087           local_tbl=(int *)(((char *)local_tbl)+size_cachesamplingtbl_local_r);
3088
3089           // check the freqency, decide if this page is hot for the core
3090           if(hotfreq < freq) {
3091                 hotfreq = freq;
3092                 hotestcore = i;
3093           }
3094         }
3095         // TODO
3096         // Decide the cache strategy for this page
3097         // If decide to adapt a new cache strategy, write into the shared block of
3098         // the gcsharedsamplingtbl. The mem recording information that has been
3099         // written is enough to hold the information.
3100         // Format: page start va + cache strategy(hfh/(host core+[x,y]))
3101         if(hotfreq == 0) {
3102           // this page has not been accessed, do not change its cache policy
3103           continue;
3104         } else {
3105           // locally cache the page in the hotest core
3106           // NOTE: (x,y) should be changed to (x+1, y+1)!!!
3107           policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
3108           policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
3109           policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
3110           *tmp_p = page_index;
3111           tmp_p++;
3112           *tmp_p = policy.word;
3113           tmp_p++;
3114           numchanged++;
3115         }
3116   }
3117
3118   return numchanged;
3119 } // int cacheAdapt_policy_hotest()
3120
3121 #define GC_CACHE_ADAPT_DOMINATE_THRESHOLD  50
3122 // cache the page on the core that accesses it the most if that core accesses
3123 // it more than (GC_CACHE_ADAPT_DOMINATE_THRESHOLD)% of the total.  Otherwise,
3124 // h4h the page.
3125 int cacheAdapt_policy_dominate(){
3126   unsigned int page_index = 0;
3127   VA page_sva = 0;
3128   unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
3129   unsigned int numchanged = 0;
3130   int * tmp_p = gccachepolicytbl+1;
3131   for(page_index = 0; page_index < page_num; page_index++) {
3132         page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
3133         bamboo_cache_policy_t policy = {0};
3134         unsigned int hotestcore = 0;
3135         unsigned long long totalfreq = 0;
3136         unsigned int hotfreq = 0;
3137
3138         int *local_tbl=&gccachesamplingtbl_r[page_index];
3139         for(int i = 0; i < NUMCORESACTIVE; i++) {
3140           int freq = *local_tbl;
3141           local_tbl=(int *)(((char *)local_tbl)+size_cachesamplingtbl_local_r);
3142           totalfreq += freq;
3143           // check the freqency, decide if this page is hot for the core
3144           if(hotfreq < freq) {
3145                 hotfreq = freq;
3146                 hotestcore = i;
3147           }
3148         }
3149
3150         // Decide the cache strategy for this page
3151         // If decide to adapt a new cache strategy, write into the shared block of
3152         // the gcpolicytbl
3153         // Format: page start va + cache policy
3154         if(hotfreq == 0) {
3155           // this page has not been accessed, do not change its cache policy
3156           continue;
3157         }
3158         totalfreq =
3159           (totalfreq*GC_CACHE_ADAPT_DOMINATE_THRESHOLD)/100/BAMBOO_PAGE_SIZE;
3160         hotfreq/=BAMBOO_PAGE_SIZE;
3161         if(hotfreq < totalfreq) {
3162           // use hfh
3163           policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
3164         } else {
3165           // locally cache the page in the hotest core
3166           // NOTE: (x,y) should be changed to (x+1, y+1)!!!
3167           policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
3168           policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
3169           policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
3170         }
3171         *tmp_p = page_index;
3172         tmp_p++;
3173         *tmp_p = policy.word;
3174         tmp_p++;
3175         numchanged++;
3176   }
3177
3178   return numchanged;
3179 } // int cacheAdapt_policy_dominate()
3180
3181 #define GC_CACHE_ADAPT_OVERLOAD_THRESHOLD 10
3182
3183 void gc_quicksort(unsigned long long *array,
3184                       unsigned int left,
3185                                   unsigned int right,
3186                                   unsigned int offset) {
3187   unsigned int pivot = 0;;
3188   unsigned int leftIdx = left;
3189   unsigned int rightIdx = right;
3190   if((right-left+1) >= 1) {
3191         pivot = (left+right)/2;
3192         while((leftIdx <= pivot) && (rightIdx >= pivot)) {
3193           unsigned long long pivotValue = array[pivot*3-offset];
3194           while((array[leftIdx*3-offset] > pivotValue) && (leftIdx <= pivot)) {
3195                 leftIdx++;
3196           }
3197           while((array[rightIdx*3-offset] < pivotValue) && (rightIdx >= pivot)) {
3198                 rightIdx--;
3199           }
3200           // swap [leftIdx] & [rightIdx]
3201           for(int k = 0; k < 3; k++) {
3202                 unsigned long long tmp = array[3*rightIdx-k];
3203                 array[3*rightIdx-k] = array[3*leftIdx-k];
3204                 array[3*leftIdx-k] = tmp;
3205           }
3206           leftIdx++;
3207           rightIdx--;
3208           if((leftIdx-1) == pivot) {
3209                 pivot = rightIdx = rightIdx + 1;
3210           } else if((leftIdx+1) == pivot) {
3211                 pivot = leftIdx = leftIdx-1;
3212           }
3213         }
3214         gc_quicksort(array, left, pivot-1, offset);
3215         gc_quicksort(array, pivot+1, right, offset);
3216   }
3217   return;
3218 } // void gc_quicksort(...)
3219
3220 // Every page cached on the core that accesses it the most.
3221 // Check to see if any core's pages total more accesses than threshold
3222 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD.  If so, find the pages with the
3223 // most remote accesses and hash for home them until we get below
3224 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD
3225 int cacheAdapt_policy_overload(){
3226   unsigned int page_index = 0;
3227   VA page_sva = 0;
3228   unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
3229   unsigned int numchanged = 0;
3230   int * tmp_p = gccachepolicytbl+1;
3231   unsigned long long workload[NUMCORESACTIVE];
3232   memset(workload, 0, NUMCORESACTIVE*sizeof(unsigned long long));
3233   unsigned long long total_workload = 0;
3234   unsigned long long core2heavypages[NUMCORESACTIVE][page_num*3+1];
3235   memset(core2heavypages,0,
3236           sizeof(unsigned long long)*(page_num*3+1)*NUMCORESACTIVE);
3237   for(page_index = 0; page_index < page_num; page_index++) {
3238         page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
3239         bamboo_cache_policy_t policy = {0};
3240         unsigned int hotestcore = 0;
3241         unsigned long long totalfreq = 0;
3242         unsigned int hotfreq = 0;
3243
3244         int *local_tbl=&gccachesamplingtbl_r[page_index];
3245         for(int i = 0; i < NUMCORESACTIVE; i++) {
3246           int freq = *local_tbl;
3247           local_tbl=(int *)(((char *)local_tbl)+size_cachesamplingtbl_local_r);
3248           totalfreq += freq;
3249           // check the freqency, decide if this page is hot for the core
3250           if(hotfreq < freq) {
3251                 hotfreq = freq;
3252                 hotestcore = i;
3253           }
3254         }
3255         // Decide the cache strategy for this page
3256         // If decide to adapt a new cache strategy, write into the shared block of
3257         // the gcsharedsamplingtbl. The mem recording information that has been
3258         // written is enough to hold the information.
3259         // Format: page start va + cache strategy(hfh/(host core+[x,y]))
3260         if(hotfreq == 0) {
3261           // this page has not been accessed, do not change its cache policy
3262           continue;
3263         }
3264
3265         totalfreq/=BAMBOO_PAGE_SIZE;
3266         hotfreq/=BAMBOO_PAGE_SIZE;
3267         // locally cache the page in the hotest core
3268         // NOTE: (x,y) should be changed to (x+1, y+1)!!!
3269         policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
3270         policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
3271         policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
3272         *tmp_p = page_index;
3273         tmp_p++;
3274         *tmp_p = policy.word;
3275         tmp_p++;
3276         numchanged++;
3277         workload[hotestcore] += totalfreq;
3278         total_workload += totalfreq;
3279         // insert into core2heavypages using quicksort
3280         unsigned long long remoteaccess = totalfreq - hotfreq;
3281         unsigned int index = (unsigned int)core2heavypages[hotestcore][0];
3282         core2heavypages[hotestcore][3*index+3] = remoteaccess;
3283         core2heavypages[hotestcore][3*index+2] = totalfreq;
3284         core2heavypages[hotestcore][3*index+1] = (unsigned long long)(tmp_p-1);
3285         core2heavypages[hotestcore][0]++;
3286   }
3287
3288   unsigned long long workload_threshold =
3289         total_workload/GC_CACHE_ADAPT_OVERLOAD_THRESHOLD;
3290   // Check the workload of each core
3291   for(int i = 0; i < NUMCORESACTIVE; i++) {
3292         int j = 1;
3293         unsigned int index = (unsigned int)core2heavypages[i][0];
3294         if(workload[i] > workload_threshold) {
3295           // sort according to the remoteaccess
3296           gc_quicksort(&core2heavypages[i][0], 1, index, 0);
3297           while((workload[i] > workload_threshold) && (j<index*3)) {
3298                 // hfh those pages with more remote accesses
3299                 bamboo_cache_policy_t policy = {0};
3300                 policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
3301                 *((unsigned int*)core2heavypages[i][j]) = policy.word;
3302                 workload[i] -= core2heavypages[i][j+1];
3303                 j += 3;
3304           }
3305         }
3306   }
3307
3308   return numchanged;
3309 } // int cacheAdapt_policy_overload()
3310
3311 #define GC_CACHE_ADAPT_ACCESS_THRESHOLD 70
3312 #define GC_CACHE_ADAPT_CROWD_THRESHOLD  20
3313 // Every page cached on the core that accesses it the most.
3314 // Check to see if any core's pages total more accesses than threshold
3315 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD.  If so, find the pages with the
3316 // most remote accesses and hash for home them until we get below
3317 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD.
3318 // Sort pages based on activity....
3319 // If more then GC_CACHE_ADAPT_ACCESS_THRESHOLD% of the accesses for a
3320 // core's pages are from more than GC_CACHE_ADAPT_CROWD_THRESHOLD pages,
3321 // then start hfh these pages(selecting the ones with the most remote
3322 // accesses first or fewest local accesses) until we get below
3323 // GC_CACHE_ADAPT_CROWD_THRESHOLD pages.
3324 int cacheAdapt_policy_crowd(){
3325   unsigned int page_index = 0;
3326   VA page_sva = 0;
3327   unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
3328   unsigned int numchanged = 0;
3329   int * tmp_p = gccachepolicytbl+1;
3330   unsigned long long workload[NUMCORESACTIVE];
3331   memset(workload, 0, NUMCORESACTIVE*sizeof(unsigned long long));
3332   unsigned long long total_workload = 0;
3333   unsigned long long core2heavypages[NUMCORESACTIVE][page_num*3+1];
3334   memset(core2heavypages,0,
3335           sizeof(unsigned long long)*(page_num*3+1)*NUMCORESACTIVE);
3336   for(page_index = 0; page_index < page_num; page_index++) {
3337         page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
3338         bamboo_cache_policy_t policy = {0};
3339         unsigned int hotestcore = 0;
3340         unsigned long long totalfreq = 0;
3341         unsigned int hotfreq = 0;
3342
3343         int *local_tbl=&gccachesamplingtbl_r[page_index];
3344         for(int i = 0; i < NUMCORESACTIVE; i++) {
3345           int freq = *local_tbl;
3346           local_tbl=(int *)(((char *)local_tbl)+size_cachesamplingtbl_local_r);
3347           totalfreq += freq;
3348           // check the freqency, decide if this page is hot for the core
3349           if(hotfreq < freq) {
3350                 hotfreq = freq;
3351                 hotestcore = i;
3352           }
3353         }
3354         // Decide the cache strategy for this page
3355         // If decide to adapt a new cache strategy, write into the shared block of
3356         // the gcsharedsamplingtbl. The mem recording information that has been
3357         // written is enough to hold the information.
3358         // Format: page start va + cache strategy(hfh/(host core+[x,y]))
3359         if(hotfreq == 0) {
3360           // this page has not been accessed, do not change its cache policy
3361           continue;
3362         }
3363         totalfreq/=BAMBOO_PAGE_SIZE;
3364         hotfreq/=BAMBOO_PAGE_SIZE;
3365         // locally cache the page in the hotest core
3366         // NOTE: (x,y) should be changed to (x+1, y+1)!!!
3367         policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
3368         policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
3369         policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
3370         *tmp_p = page_index;
3371         tmp_p++;
3372         *tmp_p = policy.word;
3373         tmp_p++;
3374         numchanged++;
3375         workload[hotestcore] += totalfreq;
3376         total_workload += totalfreq;
3377         // insert into core2heavypages using quicksort
3378         unsigned long long remoteaccess = totalfreq - hotfreq;
3379         unsigned int index = (unsigned int)core2heavypages[hotestcore][0];
3380         core2heavypages[hotestcore][3*index+3] = remoteaccess;
3381         core2heavypages[hotestcore][3*index+2] = totalfreq;
3382         core2heavypages[hotestcore][3*index+1] = (unsigned long long)(tmp_p-1);
3383         core2heavypages[hotestcore][0]++;
3384   }
3385
3386   unsigned long long workload_threshold =
3387         total_workload / GC_CACHE_ADAPT_OVERLOAD_THRESHOLD;
3388   // Check the workload of each core
3389   for(int i = 0; i < NUMCORESACTIVE; i++) {
3390         int j = 1;
3391         unsigned int index = (unsigned int)core2heavypages[i][0];
3392         if(workload[i] > workload_threshold) {
3393           // sort according to the remoteaccess
3394           gc_quicksort(&core2heavypages[i][0], 1, index, 0);
3395           while((workload[i] > workload_threshold) && (j<index*3)) {
3396                 // hfh those pages with more remote accesses
3397                 bamboo_cache_policy_t policy = {0};
3398                 policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
3399                 *((unsigned int*)core2heavypages[i][j]) = policy.word;
3400                 workload[i] -= core2heavypages[i][j+1];
3401                 j += 3;
3402           }
3403         }
3404
3405         // Check if the accesses are crowded on few pages
3406         // sort according to the total access
3407 inner_crowd:
3408         gc_quicksort(&core2heavypages[i][0], j/3+1, index, 1);
3409         unsigned long long threshold =
3410           GC_CACHE_ADAPT_ACCESS_THRESHOLD*workload[i]/100;
3411         int num_crowded = 0;
3412         unsigned long long t_workload = 0;
3413         do {
3414           t_workload += core2heavypages[i][j+num_crowded*3+1];
3415           num_crowded++;
3416         } while(t_workload < threshold);
3417         // num_crowded <= GC_CACHE_ADAPT_CROWD_THRESHOLD and if there are enough
3418         // items, it is always == GC_CACHE_ADAPT_CROWD_THRESHOLD
3419         if(num_crowded > GC_CACHE_ADAPT_CROWD_THRESHOLD) {
3420           // need to hfh these pages
3421           // sort the pages according to remote access
3422           gc_quicksort(&core2heavypages[i][0], j/3+1, j/3+num_crowded, 0);
3423           // h4h those pages with more remote accesses
3424           bamboo_cache_policy_t policy = {0};
3425           policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
3426           *((unsigned int*)core2heavypages[i][j]) = policy.word;
3427           workload[i] -= core2heavypages[i][j+1];
3428           t_workload -= core2heavypages[i][j+1];
3429           j += 3;
3430           threshold = GC_CACHE_ADAPT_ACCESS_THRESHOLD*workload[i]/100;
3431           goto inner_crowd;
3432         }
3433   }
3434
3435   return numchanged;
3436 } // int cacheAdapt_policy_overload()
3437
3438 void cacheAdapt_master() {
3439 #ifdef GC_CACHE_ADAPT_SAMPLING_OUTPUT
3440   gc_output_cache_sampling_r();
3441 #endif // GC_CACHE_ADAPT_SAMPLING_OUTPUT
3442   unsigned int numchanged = 0;
3443   // check the statistic data
3444   // for each page, decide the new cache strategy
3445 #ifdef GC_CACHE_ADAPT_POLICY1
3446   numchanged = cacheAdapt_policy_h4h();
3447 #elif defined GC_CACHE_ADAPT_POLICY2
3448   numchanged = cacheAdapt_policy_local();
3449 #elif defined GC_CACHE_ADAPT_POLICY3
3450   numchanged = cacheAdapt_policy_hotest();
3451 #elif defined GC_CACHE_ADAPT_POLICY4
3452   numchanged = cacheAdapt_policy_dominate();
3453 #elif defined GC_CACHE_ADAPT_POLICY5
3454   numchanged = cacheAdapt_policy_overload();
3455 #elif defined GC_CACHE_ADAPT_POLICY6
3456   numchanged = cacheAdapt_policy_crowd();
3457 #endif
3458   *gccachepolicytbl = numchanged;
3459 }
3460
3461 // adapt the cache strategy for the mutator
3462 void cacheAdapt_mutator() {
3463   int numchanged = *gccachepolicytbl;
3464   // check the changes and adapt them
3465   int * tmp_p = gccachepolicytbl+1;
3466   while(numchanged--) {
3467         // read out the policy
3468         int page_index = *tmp_p;
3469         bamboo_cache_policy_t policy = (bamboo_cache_policy_t)(*(tmp_p+1));
3470         // adapt the policy
3471         bamboo_adapt_cache_policy(page_index*(BAMBOO_PAGE_SIZE)+gcbaseva,
3472                 policy, BAMBOO_PAGE_SIZE);
3473
3474         tmp_p += 2;
3475   }
3476 }
3477
3478 void gc_output_cache_sampling() {
3479   unsigned int page_index = 0;
3480   VA page_sva = 0;
3481   unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
3482   for(page_index = 0; page_index < page_num; page_index++) {
3483         page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
3484         unsigned int block = 0;
3485         BLOCKINDEX(page_sva, &block);
3486         unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
3487         tprintf("va: %x page_index: %d host: %d\n",
3488                 (int)page_sva, page_index, coren);
3489         for(int i = 0; i < NUMCORESACTIVE; i++) {
3490           int * local_tbl = (int *)((void *)gccachesamplingtbl
3491                   +size_cachesamplingtbl_local*i);
3492           int freq = local_tbl[page_index];
3493           printf("%8d ",freq);
3494         }
3495         printf("\n");
3496   }
3497   printf("=================\n");
3498 } // gc_output_cache_sampling
3499
3500 void gc_output_cache_sampling_r() {
3501   unsigned int page_index = 0;
3502   VA page_sva = 0;
3503   unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
3504   for(page_index = 0; page_index < page_num; page_index++) {
3505         page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
3506         unsigned int block = 0;
3507         BLOCKINDEX(page_sva, &block);
3508         unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
3509         tprintf("va: %x page_index: %d host: %d\n",
3510                 (int)page_sva, page_index, coren);
3511         for(int i = 0; i < NUMCORESACTIVE; i++) {
3512           int * local_tbl = (int *)((void *)gccachesamplingtbl_r
3513                   +size_cachesamplingtbl_local_r*i);
3514           int freq = local_tbl[page_index]/BAMBOO_PAGE_SIZE;
3515           printf("%8d ",freq);
3516         }
3517         printf("\n");
3518   }
3519   printf("=================\n");
3520 } // gc_output_cache_sampling
3521 #endif // GC_CACHE_ADAPT
3522
3523 inline void gc_collect(struct garbagelist * stackptr) {
3524   // inform the master that this core is at a gc safe point and is ready to
3525   // do gc
3526   send_msg_4(STARTUPCORE, GCFINISHPRE, BAMBOO_NUM_OF_CORE, self_numsendobjs,
3527           self_numreceiveobjs, false);
3528
3529   // core collector routine
3530   while(true) {
3531     if(INITPHASE == gcphase) {
3532       break;
3533     }
3534   }
3535 #ifdef RAWPATH // TODO GC_DEBUG
3536   printf("(%X,%X) Do initGC\n", udn_tile_coord_x(), udn_tile_coord_y());
3537 #endif
3538   initGC();
3539 #ifdef GC_CACHE_ADAPT
3540   // prepare for cache adaption:
3541   cacheAdapt_gc(true);
3542 #endif // GC_CACHE_ADAPT
3543   //send init finish msg to core coordinator
3544   send_msg_2(STARTUPCORE, GCFINISHINIT, BAMBOO_NUM_OF_CORE, false);
3545
3546   while(true) {
3547     if(MARKPHASE == gcphase) {
3548       break;
3549     }
3550   }
3551 #ifdef RAWPATH // TODO GC_DEBUG
3552   printf("(%x,%x) Start mark phase\n", udn_tile_coord_x(),
3553              udn_tile_coord_y());
3554 #endif
3555   mark(true, stackptr);
3556 #ifdef RAWPATH // TODO GC_DEBUG
3557   printf("(%x,%x) Finish mark phase, start compact phase\n",
3558              udn_tile_coord_x(), udn_tile_coord_y());
3559 #endif
3560   compact();
3561 #ifdef RAWPATH // TODO GC_DEBUG
3562   printf("(%x,%x) Finish compact phase\n", udn_tile_coord_x(),
3563              udn_tile_coord_y());
3564 #endif
3565
3566   while(true) {
3567     if(FLUSHPHASE == gcphase) {
3568       break;
3569     }
3570   }
3571 #ifdef RAWPATH // TODO GC_DEBUG
3572   printf("(%x,%x) Start flush phase\n", udn_tile_coord_x(),
3573              udn_tile_coord_y());
3574 #endif
3575 #ifdef GC_PROFILE
3576   // send the num of obj/liveobj/forwardobj to the startupcore
3577   if(STARTUPCORE != BAMBOO_NUM_OF_CORE) {
3578         send_msg_4(STARTUPCORE, GCPROFILES, gc_num_obj,
3579                 gc_num_liveobj, gc_num_forwardobj, false);
3580   }
3581   gc_num_obj = 0;
3582 #endif // GC_PROFLIE
3583   flush(stackptr);
3584 #ifdef RAWPATH // TODO GC_DEBUG
3585   printf("(%x,%x) Finish flush phase\n", udn_tile_coord_x(),
3586              udn_tile_coord_y());
3587 #endif
3588
3589 #ifdef GC_CACHE_ADAPT
3590   while(true) {
3591     if(PREFINISHPHASE == gcphase) {
3592       break;
3593     }
3594   }
3595 #ifdef RAWPATH // TODO GC_DEBUG
3596   printf("(%x,%x) Start prefinish phase\n", udn_tile_coord_x(),
3597              udn_tile_coord_y());
3598 #endif
3599   // cache adapt phase
3600   cacheAdapt_mutator();
3601   cacheAdapt_gc(false);
3602   //send init finish msg to core coordinator
3603   send_msg_2(STARTUPCORE, GCFINISHPREF, BAMBOO_NUM_OF_CORE, false);
3604 #ifdef RAWPATH // TODO GC_DEBUG
3605   printf("(%x,%x) Finish prefinish phase\n", udn_tile_coord_x(),
3606              udn_tile_coord_y());
3607 #endif
3608 #ifdef GC_CACHE_SAMPLING
3609   // reset the sampling arrays
3610   bamboo_dtlb_sampling_reset();
3611 #endif // GC_CACHE_SAMPLING
3612   if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
3613         // zero out the gccachesamplingtbl
3614         BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
3615         BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,
3616                 size_cachesamplingtbl_local_r);
3617   }
3618 #endif // GC_CACHE_ADAPT
3619
3620   // invalidate all shared mem pointers
3621   bamboo_cur_msp = NULL;
3622   bamboo_smem_size = 0;
3623   bamboo_smem_zero_top = NULL;
3624
3625   while(true) {
3626     if(FINISHPHASE == gcphase) {
3627       break;
3628     }
3629   }
3630
3631 #ifdef RAWPATH // TODO GC_DEBUG
3632   printf("(%x,%x) Finish gc! \n", udn_tile_coord_x(), udn_tile_coord_y());
3633 #endif
3634 } // void gc_collect(struct garbagelist * stackptr)
3635
3636 inline void gc_nocollect(struct garbagelist * stackptr) {
3637   // inform the master that this core is at a gc safe point and is ready to
3638   // do gc
3639   send_msg_4(STARTUPCORE, GCFINISHPRE, BAMBOO_NUM_OF_CORE, self_numsendobjs,
3640           self_numreceiveobjs, false);
3641
3642   while(true) {
3643     if(INITPHASE == gcphase) {
3644       break;
3645     }
3646   }
3647 #ifdef RAWPATH // TODO GC_DEBUG
3648   printf("(%x,%x) Do initGC\n", udn_tile_coord_x(), udn_tile_coord_y());
3649 #endif
3650   initGC();
3651 #ifdef GC_CACHE_ADAPT
3652   // prepare for cache adaption:
3653   cacheAdapt_gc(true);
3654 #endif // GC_CACHE_ADAPT
3655   //send init finish msg to core coordinator
3656   send_msg_2(STARTUPCORE, GCFINISHINIT, BAMBOO_NUM_OF_CORE, false);
3657
3658   while(true) {
3659     if(MARKPHASE == gcphase) {
3660       break;
3661     }
3662   }
3663 #ifdef RAWPATH // TODO GC_DEBUG
3664   printf("(%x,%x) Start mark phase\n", udn_tile_coord_x(),
3665              udn_tile_coord_y());
3666 #endif
3667   mark(true, stackptr);
3668 #ifdef RAWPATH // TODO GC_DEBUG
3669   printf("(%x,%x) Finish mark phase, wait for flush\n",
3670              udn_tile_coord_x(), udn_tile_coord_y());
3671 #endif
3672
3673   // non-gc core collector routine
3674   while(true) {
3675     if(FLUSHPHASE == gcphase) {
3676       break;
3677     }
3678   }
3679 #ifdef RAWPATH // TODO GC_DEBUG
3680   printf("(%x,%x) Start flush phase\n", udn_tile_coord_x(),
3681              udn_tile_coord_y());
3682 #endif
3683 #ifdef GC_PROFILE
3684   if(STARTUPCORE != BAMBOO_NUM_OF_CORE) {
3685         send_msg_4(STARTUPCORE, GCPROFILES, gc_num_obj,
3686                 gc_num_liveobj, gc_num_forwardobj, false);
3687   }
3688   gc_num_obj = 0;
3689 #endif // GC_PROFLIE
3690   flush(stackptr);
3691 #ifdef RAWPATH // TODO GC_DEBUG
3692   printf("(%x,%x) Finish flush phase\n", udn_tile_coord_x(),
3693              udn_tile_coord_y());
3694 #endif
3695
3696 #ifdef GC_CACHE_ADAPT
3697   while(true) {
3698     if(PREFINISHPHASE == gcphase) {
3699       break;
3700     }
3701   }
3702 #ifdef RAWPATH // TODO GC_DEBUG
3703   printf("(%x,%x) Start prefinish phase\n", udn_tile_coord_x(),
3704              udn_tile_coord_y());
3705 #endif
3706   // cache adapt phase
3707   cacheAdapt_mutator();
3708   cacheAdapt_gc(false);
3709   //send init finish msg to core coordinator
3710   send_msg_2(STARTUPCORE, GCFINISHPREF, BAMBOO_NUM_OF_CORE, false);
3711 #ifdef RAWPATH // TODO GC_DEBUG
3712   printf("(%x,%x) Finish prefinish phase\n", udn_tile_coord_x(),
3713              udn_tile_coord_y());
3714 #endif
3715 #ifdef GC_CACHE_SAMPLING
3716   // reset the sampling arrays
3717   bamboo_dtlb_sampling_reset();
3718 #endif // GC_CACHE_SAMPLING
3719   if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
3720         // zero out the gccachesamplingtbl
3721         BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
3722         BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,
3723                 size_cachesamplingtbl_local_r);
3724   }
3725 #endif // GC_CACHE_ADAPT
3726
3727   // invalidate all shared mem pointers
3728   bamboo_cur_msp = NULL;
3729   bamboo_smem_size = 0;
3730   bamboo_smem_zero_top = NULL;
3731
3732   while(true) {
3733     if(FINISHPHASE == gcphase) {
3734       break;
3735     }
3736   }
3737 #ifdef RAWPATH // TODO GC_DEBUG
3738   printf("(%x,%x) Finish gc! \n", udn_tile_coord_x(), udn_tile_coord_y());
3739 #endif
3740 } // void gc_collect(struct garbagelist * stackptr)
3741
3742 inline void gc_master(struct garbagelist * stackptr) {
3743   tprintf("start GC !!!!!!!!!!!!! \n");
3744
3745   gcphase = INITPHASE;
3746   int i = 0;
3747   waitconfirm = false;
3748   numconfirm = 0;
3749   initGC();
3750
3751   // Note: all cores need to init gc including non-gc cores
3752   for(i = 1; i < NUMCORESACTIVE /*NUMCORES4GC*/; i++) {
3753         // send GC init messages to all cores
3754         send_msg_1(i, GCSTARTINIT, false);
3755   }
3756   bool isfirst = true;
3757   bool allStall = false;
3758
3759 #ifdef GC_CACHE_ADAPT
3760   // prepare for cache adaption:
3761   cacheAdapt_gc(true);
3762 #endif // GC_CACHE_ADAPT
3763
3764 #ifdef RAWPATH // TODO GC_DEBUG
3765   printf("(%x,%x) Check core status \n", udn_tile_coord_x(),
3766                  udn_tile_coord_y());
3767 #endif
3768
3769   gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
3770   while(true) {
3771         BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
3772         if(gc_checkAllCoreStatus_I()) {
3773           BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3774           break;
3775         }
3776         BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3777   }
3778 #ifdef GC_PROFILE
3779 #ifdef MGC_SPEC
3780         if(gc_profile_flag) {
3781 #endif
3782   gc_profileItem();
3783 #ifdef MGC_SPEC
3784         }
3785 #endif
3786 #endif
3787 #ifdef GC_CACHE_ADAPT_POLICY_OUTPUT
3788   gc_output_cache_sampling();
3789 #endif // GC_CACHE_ADAPT
3790 #ifdef RAWPATH // TODO GC_DEBUG
3791   printf("(%x,%x) Start mark phase \n", udn_tile_coord_x(),
3792                  udn_tile_coord_y());
3793 #endif
3794   // restore the gcstatus of all cores
3795   // Note: all cores have to do mark including non-gc cores
3796   gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
3797   for(i = 1; i < NUMCORESACTIVE; ++i) {
3798         gccorestatus[i] = 1;
3799         // send GC start messages to all cores
3800         send_msg_1(i, GCSTART, false);
3801   }
3802
3803   gcphase = MARKPHASE;
3804   // mark phase
3805   while(MARKPHASE == gcphase) {
3806         mark(isfirst, stackptr);
3807         if(isfirst) {
3808           isfirst = false;
3809         }
3810
3811         // check gcstatus
3812         checkMarkStatue();
3813   }   // while(MARKPHASE == gcphase)
3814   // send msgs to all cores requiring large objs info
3815   // Note: only need to ask gc cores, non-gc cores do not host any objs
3816   numconfirm = NUMCORES4GC - 1;
3817   for(i = 1; i < NUMCORES4GC; ++i) {
3818         send_msg_1(i, GCLOBJREQUEST, false);
3819   }
3820   gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
3821   while(true) {
3822         if(numconfirm==0) {
3823           break;
3824         }
3825   }   // wait for responses
3826   // check the heaptop
3827   if(gcheaptop < gcmarkedptrbound) {
3828         gcheaptop = gcmarkedptrbound;
3829   }
3830 #ifdef GC_PROFILE
3831 #ifdef MGC_SPEC
3832         if(gc_profile_flag) {
3833 #endif
3834   gc_profileItem();
3835 #ifdef MGC_SPEC
3836         }
3837 #endif
3838 #endif
3839 #ifdef RAWPATH // TODO GC_DEBUG
3840   printf("(%x,%x) prepare to cache large objs \n", udn_tile_coord_x(),
3841                  udn_tile_coord_y());
3842 #endif
3843   // cache all large objs
3844   if(!cacheLObjs()) {
3845         // no enough space to cache large objs
3846         BAMBOO_EXIT(0xb02b);
3847   }
3848   // predict number of blocks to fill for each core
3849   unsigned int tmpheaptop = 0;
3850   int numpbc = loadbalance(&tmpheaptop);
3851   // TODO
3852   numpbc = (BAMBOO_SHARED_MEM_SIZE)/(BAMBOO_SMEM_SIZE);
3853 #ifdef RAWPATH // TODO GC_DEBUG
3854   printf("(%x,%x) mark phase finished \n", udn_tile_coord_x(),
3855                  udn_tile_coord_y());
3856 #endif
3857   //int tmptopptr = 0;
3858   //BASEPTR(gctopcore, 0, &tmptopptr);
3859   // TODO
3860   //tmptopptr = gcbaseva + (BAMBOO_SHARED_MEM_SIZE);
3861   tmpheaptop = gcbaseva + (BAMBOO_SHARED_MEM_SIZE);
3862   GC_BAMBOO_DEBUGPRINT(0xabab);
3863   GC_BAMBOO_DEBUGPRINT_REG(tmpheaptop);
3864   for(i = 0; i < NUMCORES4GC; ++i) {
3865         unsigned int tmpcoreptr = 0;
3866         BASEPTR(i, numpbc, &tmpcoreptr);
3867         // init some data strutures for compact phase
3868         gcloads[i] = 0;
3869         gcfilledblocks[i] = 0;
3870         gcrequiredmems[i] = 0;
3871         gccorestatus[i] = 1;
3872         //send start compact messages to all cores
3873         //TODO bug here, do not know if the direction is positive or negtive?
3874         if (tmpcoreptr < tmpheaptop) {
3875           gcstopblock[i] = numpbc + 1;
3876           if(i != STARTUPCORE) {
3877                 send_msg_2(i, GCSTARTCOMPACT, numpbc+1, false);
3878           } else {
3879                 gcblock2fill = numpbc+1;
3880           }   // if(i != STARTUPCORE)
3881         } else {
3882           gcstopblock[i] = numpbc;
3883           if(i != STARTUPCORE) {
3884                 send_msg_2(i, GCSTARTCOMPACT, numpbc, false);
3885           } else {
3886                 gcblock2fill = numpbc;
3887           }  // if(i != STARTUPCORE)
3888         }
3889         GC_BAMBOO_DEBUGPRINT(0xf000+i);
3890         GC_BAMBOO_DEBUGPRINT_REG(tmpcoreptr);
3891         GC_BAMBOO_DEBUGPRINT_REG(gcstopblock[i]);
3892   }
3893
3894   BAMBOO_CACHE_MF();
3895
3896 #ifdef GC_PROFILE
3897 #ifdef MGC_SPEC
3898         if(gc_profile_flag) {
3899 #endif
3900   gc_profileItem();
3901 #ifdef MGC_SPEC
3902         }
3903 #endif
3904 #endif
3905
3906   // compact phase
3907   bool finalcompact = false;
3908   // initialize pointers for comapcting
3909   struct moveHelper * orig =
3910         (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
3911   struct moveHelper * to =
3912         (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
3913   initOrig_Dst(orig, to);
3914   int filledblocks = 0;
3915   unsigned int heaptopptr = 0;
3916   bool finishcompact = false;
3917   bool iscontinue = true;
3918   bool localcompact = true;
3919   while((COMPACTPHASE == gcphase) || (SUBTLECOMPACTPHASE == gcphase)) {
3920         if((!finishcompact) && iscontinue) {
3921           GC_BAMBOO_DEBUGPRINT(0xeaa01);
3922           GC_BAMBOO_DEBUGPRINT_REG(numpbc);
3923           GC_BAMBOO_DEBUGPRINT_REG(gcblock2fill);
3924           finishcompact = compacthelper(orig, to, &filledblocks,
3925                                                                         &heaptopptr, &localcompact);
3926           GC_BAMBOO_DEBUGPRINT(0xeaa02);
3927           GC_BAMBOO_DEBUGPRINT_REG(finishcompact);
3928           GC_BAMBOO_DEBUGPRINT_REG(gctomove);
3929           GC_BAMBOO_DEBUGPRINT_REG(gcrequiredmems[0]);
3930           GC_BAMBOO_DEBUGPRINT_REG(gcfilledblocks[0]);
3931           GC_BAMBOO_DEBUGPRINT_REG(gcstopblock[0]);
3932         }
3933
3934         BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
3935         if(gc_checkCoreStatus_I()) {
3936           // all cores have finished compacting
3937           // restore the gcstatus of all cores
3938           for(i = 0; i < NUMCORES4GC; ++i) {
3939                 gccorestatus[i] = 1;
3940           }
3941           BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3942           break;
3943         } else {
3944           BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3945           // check if there are spare mem for pending move requires
3946           if(COMPACTPHASE == gcphase) {
3947                 GC_BAMBOO_DEBUGPRINT(0xeaa03);
3948                 resolvePendingMoveRequest();
3949                 GC_BAMBOO_DEBUGPRINT_REG(gctomove);
3950           } else {
3951                 GC_BAMBOO_DEBUGPRINT(0xeaa04);
3952                 compact2Heaptop();
3953           }
3954         }   // if(gc_checkCoreStatus_I()) else ...
3955
3956         if(gctomove) {
3957           GC_BAMBOO_DEBUGPRINT(0xeaa05);
3958           GC_BAMBOO_DEBUGPRINT_REG(gcmovestartaddr);
3959           GC_BAMBOO_DEBUGPRINT_REG(gcblock2fill);
3960           GC_BAMBOO_DEBUGPRINT_REG(gctomove);
3961           to->ptr = gcmovestartaddr;
3962           to->numblocks = gcblock2fill - 1;
3963           to->bound = (to->numblocks==0) ?
3964                                   BAMBOO_SMEM_SIZE_L :
3965                                   BAMBOO_SMEM_SIZE_L+BAMBOO_SMEM_SIZE*to->numblocks;
3966           BASEPTR(gcdstcore, to->numblocks, &(to->base));
3967           to->offset = to->ptr - to->base;
3968           to->top = (to->numblocks==0) ?
3969                                 (to->offset) : (to->bound-BAMBOO_SMEM_SIZE+to->offset);
3970           to->base = to->ptr;
3971           to->offset = BAMBOO_CACHE_LINE_SIZE;
3972           to->ptr += to->offset;                         // for header
3973           to->top += to->offset;
3974           if(gcdstcore == BAMBOO_NUM_OF_CORE) {
3975                 localcompact = true;
3976           } else {
3977                 localcompact = false;
3978           }
3979           gctomove = false;
3980           iscontinue = true;
3981         } else if(!finishcompact) {
3982           // still pending
3983           iscontinue = false;
3984         }  // if(gctomove)
3985   }  // while(COMPACTPHASE == gcphase)
3986 #ifdef GC_PROFILE
3987 #ifdef MGC_SPEC
3988         if(gc_profile_flag) {
3989 #endif
3990   gc_profileItem();
3991 #ifdef MGC_SPEC
3992         }
3993 #endif
3994 #endif
3995 #ifdef RAWPATH // TODO GC_DEBUG
3996   printf("(%x,%x) prepare to move large objs \n", udn_tile_coord_x(),
3997                  udn_tile_coord_y());
3998 #endif
3999   // move largeObjs
4000   moveLObjs();
4001 #ifdef RAWPATH // TODO GC_DEBUG
4002   printf("(%x,%x) compact phase finished \n", udn_tile_coord_x(),
4003                  udn_tile_coord_y());
4004 #endif
4005   RUNFREE(orig);
4006   RUNFREE(to);
4007   orig = to = NULL;
4008
4009   gcphase = FLUSHPHASE;
4010   gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
4011   // Note: all cores should flush their runtime data including non-gc
4012   //       cores
4013   for(i = 1; i < NUMCORESACTIVE; ++i) {
4014         // send start flush messages to all cores
4015         gccorestatus[i] = 1;
4016         send_msg_1(i, GCSTARTFLUSH, false);
4017   }
4018 #ifdef GC_PROFILE
4019 #ifdef MGC_SPEC
4020         if(gc_profile_flag) {
4021 #endif
4022   gc_profileItem();
4023 #ifdef MGC_SPEC
4024         }
4025 #endif
4026 #endif
4027 #ifdef RAWPATH // TODO GC_DEBUG
4028   printf("(%x,%x) Start flush phase \n", udn_tile_coord_x(),
4029                  udn_tile_coord_y());
4030 #endif
4031   // flush phase
4032   flush(stackptr);
4033
4034 #ifdef GC_CACHE_ADAPT
4035   // now the master core need to decide the new cache strategy
4036   cacheAdapt_master();
4037 #endif // GC_CACHE_ADAPT
4038
4039   gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
4040   while(FLUSHPHASE == gcphase) {
4041         // check the status of all cores
4042         BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
4043         if(gc_checkAllCoreStatus_I()) {
4044           BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
4045           break;
4046         }
4047         BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
4048   }  // while(FLUSHPHASE == gcphase)
4049 #ifdef RAWPATH // TODO GC_DEBUG
4050   printf("(%x,%x) Finish flush phase \n", udn_tile_coord_x(),
4051                  udn_tile_coord_y());
4052 #endif
4053
4054 #ifdef GC_CACHE_ADAPT
4055 #ifdef GC_PROFILE
4056 #ifdef MGC_SPEC
4057         if(gc_profile_flag) {
4058 #endif
4059   gc_profileItem();
4060 #ifdef MGC_SPEC
4061         }
4062 #endif
4063 #endif
4064   gcphase = PREFINISHPHASE;
4065   gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
4066   // Note: all cores should flush their runtime data including non-gc
4067   //       cores
4068   for(i = 1; i < NUMCORESACTIVE; ++i) {
4069         // send start flush messages to all cores
4070         gccorestatus[i] = 1;
4071         send_msg_1(i, GCSTARTPREF, false);
4072   }
4073 #ifdef RAWPATH // TODO GC_DEBUG
4074   printf("(%x,%x) Start prefinish phase \n", udn_tile_coord_x(),
4075                  udn_tile_coord_y());
4076 #endif
4077   // cache adapt phase
4078   cacheAdapt_mutator();
4079 #ifdef MGC_SPEC
4080   if(gc_profile_flag) {
4081 #endif
4082 #ifdef GC_CACHE_ADAPT_OUTPUT
4083   bamboo_output_cache_policy();
4084 #endif
4085 #ifdef MGC_SPEC
4086   }
4087 #endif
4088   cacheAdapt_gc(false);
4089
4090   gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
4091   while(PREFINISHPHASE == gcphase) {
4092         // check the status of all cores
4093         BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
4094         if(gc_checkAllCoreStatus_I()) {
4095           BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
4096           break;
4097         }
4098         BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
4099   }  // while(PREFINISHPHASE == gcphase)
4100
4101 #ifdef GC_CACHE_SAMPLING
4102   // reset the sampling arrays
4103   bamboo_dtlb_sampling_reset();
4104 #endif // GC_CACHE_SAMPLING
4105   if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
4106         // zero out the gccachesamplingtbl
4107         BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
4108         BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,
4109                 size_cachesamplingtbl_local_r);
4110         BAMBOO_MEMSET_WH(gccachepolicytbl,0,size_cachepolicytbl);
4111   }
4112 #endif // GC_CACHE_ADAPT
4113
4114   gcphase = FINISHPHASE;
4115
4116   // invalidate all shared mem pointers
4117   // put it here as it takes time to inform all the other cores to
4118   // finish gc and it might cause problem when some core resumes
4119   // mutator earlier than the other cores
4120   bamboo_cur_msp = NULL;
4121   bamboo_smem_size = 0;
4122   bamboo_smem_zero_top = NULL;
4123
4124 #ifdef GC_PROFILE
4125 #ifdef MGC_SPEC
4126         if(gc_profile_flag) {
4127 #endif
4128   gc_profileEnd();
4129 #ifdef MGC_SPEC
4130         }
4131 #endif
4132 #endif
4133   gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
4134   for(i = 1; i < NUMCORESACTIVE; ++i) {
4135         // send gc finish messages to all cores
4136         send_msg_1(i, GCFINISH, false);
4137         gccorestatus[i] = 1;
4138   }
4139
4140   gcflag = false;
4141   gcprocessing = false;
4142 #ifdef RAWPATH // TODO GC_DEBUG
4143   printf("(%x,%x) gc finished   \n", udn_tile_coord_x(),
4144                  udn_tile_coord_y());
4145 #endif
4146   tprintf("finish GC ! \n");
4147 } // void gc_master(struct garbagelist * stackptr)
4148
4149 inline bool gc(struct garbagelist * stackptr) {
4150   // check if do gc
4151   if(!gcflag) {
4152     gcprocessing = false;
4153     return false;
4154   }
4155
4156 #ifdef GC_CACHE_ADAPT
4157 #ifdef GC_CACHE_SAMPLING
4158     // disable the timer interrupt
4159     bamboo_mask_timer_intr();
4160 #endif
4161 #endif
4162   // core coordinator routine
4163   if(0 == BAMBOO_NUM_OF_CORE) {
4164 #ifdef GC_DEBUG
4165     printf("(%x,%x) Check if can do gc or not\n", udn_tile_coord_x(),
4166                    udn_tile_coord_y());
4167 #endif
4168         bool isallstall = true;
4169         gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
4170         BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
4171         int ti = 0;
4172         for(ti = 0; ti < NUMCORESACTIVE; ++ti) {
4173           if(gccorestatus[ti] != 0) {
4174                 isallstall = false;
4175                 break;
4176           }
4177         }
4178         if(!isallstall) {
4179           BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
4180           // some of the cores are still executing the mutator and did not reach
4181           // some gc safe point, therefore it is not ready to do gc
4182           gcflag = true;
4183           return false;
4184         } else {
4185 #ifdef GC_PROFILE
4186 #ifdef MGC_SPEC
4187         if(gc_profile_flag) {
4188 #endif
4189     gc_profileStart();
4190 #ifdef MGC_SPEC
4191         }
4192 #endif
4193 #endif
4194 pregccheck:
4195           gcnumsendobjs[0][BAMBOO_NUM_OF_CORE] = self_numsendobjs;
4196           gcnumreceiveobjs[0][BAMBOO_NUM_OF_CORE] = self_numreceiveobjs;
4197           int sumsendobj = 0;
4198           GC_BAMBOO_DEBUGPRINT(0xec04);
4199           for(int i = 0; i < NUMCORESACTIVE; ++i) {
4200                 sumsendobj += gcnumsendobjs[0][i];
4201                 GC_BAMBOO_DEBUGPRINT(0xf000 + gcnumsendobjs[0][i]);
4202           }  // for(i = 1; i < NUMCORESACTIVE; ++i)
4203           GC_BAMBOO_DEBUGPRINT(0xec05);
4204           GC_BAMBOO_DEBUGPRINT_REG(sumsendobj);
4205           for(int i = 0; i < NUMCORESACTIVE; ++i) {
4206                 sumsendobj -= gcnumreceiveobjs[0][i];
4207                 GC_BAMBOO_DEBUGPRINT(0xf000 + gcnumreceiveobjs[i]);
4208           }  // for(i = 1; i < NUMCORESACTIVE; ++i)
4209           GC_BAMBOO_DEBUGPRINT(0xec06);
4210           GC_BAMBOO_DEBUGPRINT_REG(sumsendobj);
4211           if(0 != sumsendobj) {
4212                 // there were still some msgs on the fly, wait until there
4213                 // are some update pregc information coming and check it again
4214                 gcprecheck = false;
4215                 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
4216                 while(true) {
4217                   if(gcprecheck) {
4218                         break;
4219                   }
4220                 }
4221                 goto pregccheck;
4222           } else {
4223                 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
4224           }
4225         }
4226 #ifdef RAWPATH // TODO GC_DEBUG
4227     printf("(%x,%x) start gc! \n", udn_tile_coord_x(), udn_tile_coord_y());
4228 #endif
4229         // Zero out the remaining bamboo_cur_msp
4230         // Only zero out the first 4 bytes of the remaining memory
4231         // Move the operation here because for the GC_CACHE_ADAPT version,
4232         // we need to make sure during the gcinit phase the shared heap is not
4233         // touched. Otherwise, there would be problem when adapt the cache
4234         // strategy.
4235         if((bamboo_cur_msp != 0)
4236                 && (bamboo_smem_zero_top == bamboo_cur_msp)
4237                 && (bamboo_smem_size > 0)) {
4238           *((int *)bamboo_cur_msp) = 0;
4239         }
4240 #ifdef GC_FLUSH_DTLB
4241         if(gc_num_flush_dtlb < GC_NUM_FLUSH_DTLB) {
4242           BAMBOO_CLEAN_DTLB();
4243           gc_num_flush_dtlb++;
4244         }
4245 #endif
4246 #ifdef GC_CACHE_ADAPT
4247 #ifdef GC_CACHE_SAMPLING
4248     // get the sampling data
4249     bamboo_output_dtlb_sampling();
4250 #endif // GC_CACHE_SAMPLING
4251 #endif // GC_CACHE_ADAPT
4252         gcprocessing = true;
4253         gc_master(stackptr);
4254   } else if(BAMBOO_NUM_OF_CORE < NUMCORES4GC) {
4255         // Zero out the remaining bamboo_cur_msp
4256         // Only zero out the first 4 bytes of the remaining memory
4257         // Move the operation here because for the GC_CACHE_ADAPT version,
4258         // we need to make sure during the gcinit phase the shared heap is not
4259         // touched. Otherwise, there would be problem when adapt the cache
4260         // strategy.
4261         if((bamboo_cur_msp != 0)
4262                 && (bamboo_smem_zero_top == bamboo_cur_msp)
4263                 && (bamboo_smem_size > 0)) {
4264           *((int *)bamboo_cur_msp) = 0;
4265         }
4266 #ifdef GC_FLUSH_DTLB
4267         if(gc_num_flush_dtlb < GC_NUM_FLUSH_DTLB) {
4268           BAMBOO_CLEAN_DTLB();
4269           gc_num_flush_dtlb++;
4270         }
4271 #endif
4272 #ifdef GC_CACHE_ADAPT
4273 #ifdef GC_CACHE_SAMPLING
4274         if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
4275           // get the sampling data
4276           bamboo_output_dtlb_sampling();
4277         }
4278 #endif // GC_CACHE_SAMPLING
4279 #endif // GC_CACHE_ADAPT
4280     gcprocessing = true;
4281     gc_collect(stackptr);
4282   } else {
4283         // Zero out the remaining bamboo_cur_msp
4284         // Only zero out the first 4 bytes of the remaining memory
4285         // Move the operation here because for the GC_CACHE_ADAPT version,
4286         // we need to make sure during the gcinit phase the shared heap is not
4287         // touched. Otherwise, there would be problem when adapt the cache
4288         // strategy.
4289         if((bamboo_cur_msp != 0)
4290                 && (bamboo_smem_zero_top == bamboo_cur_msp)
4291                 && (bamboo_smem_size > 0)) {
4292           *((int *)bamboo_cur_msp) = 0;
4293         }
4294 #ifdef GC_FLUSH_DTLB
4295         if(gc_num_flush_dtlb < GC_NUM_FLUSH_DTLB) {
4296           BAMBOO_CLEAN_DTLB();
4297           gc_num_flush_dtlb++;
4298         }
4299 #endif
4300 #ifdef GC_CACHE_ADAPT
4301 #ifdef GC_CACHE_SAMPLING
4302         if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
4303           // get the sampling data
4304           bamboo_output_dtlb_sampling();
4305         }
4306 #endif // GC_CACHE_SAMPLING
4307 #endif // GC_CACHE_ADAPT
4308     // not a gc core, should wait for gcfinish msg
4309     gcprocessing = true;
4310     gc_nocollect(stackptr);
4311   }
4312 #ifdef GC_CACHE_ADAPT
4313 #ifdef GC_CACHE_SAMPLING
4314   // enable the timer interrupt
4315   bamboo_tile_timer_set_next_event(GC_TILE_TIMER_EVENT_SETTING);
4316   bamboo_unmask_timer_intr();
4317 #endif // GC_CACHE_SAMPLING
4318 #endif // GC_CACHE_ADAPT
4319
4320   return true;
4321 } // void gc(struct garbagelist * stackptr)
4322
4323 #ifdef GC_PROFILE
4324 inline void gc_profileStart(void) {
4325   if(!gc_infoOverflow) {
4326     GCInfo* gcInfo = RUNMALLOC(sizeof(struct gc_info));
4327     gc_infoArray[gc_infoIndex] = gcInfo;
4328     gcInfo->index = 1;
4329     gcInfo->time[0] = BAMBOO_GET_EXE_TIME();
4330   }
4331 }
4332
4333 inline void gc_profileItem(void) {
4334   if(!gc_infoOverflow) {
4335     GCInfo* gcInfo = gc_infoArray[gc_infoIndex];
4336     gcInfo->time[gcInfo->index++] = BAMBOO_GET_EXE_TIME();
4337   }
4338 }
4339
4340 inline void gc_profileEnd(void) {
4341   if(!gc_infoOverflow) {
4342     GCInfo* gcInfo = gc_infoArray[gc_infoIndex];
4343     gcInfo->time[gcInfo->index++] = BAMBOO_GET_EXE_TIME();
4344         gcInfo->time[gcInfo->index++] = gc_num_livespace;
4345         gcInfo->time[gcInfo->index++] = gc_num_freespace;
4346         gcInfo->time[gcInfo->index++] = gc_num_lobj;
4347         gcInfo->time[gcInfo->index++] = gc_num_lobjspace;
4348         gcInfo->time[gcInfo->index++] = gc_num_obj;
4349         gcInfo->time[gcInfo->index++] = gc_num_liveobj;
4350         gcInfo->time[gcInfo->index++] = gc_num_forwardobj;
4351     gc_infoIndex++;
4352     if(gc_infoIndex == GCINFOLENGTH) {
4353       gc_infoOverflow = true;
4354       //taskInfoIndex = 0;
4355     }
4356   }
4357 }
4358
4359 // output the profiling data
4360 void gc_outputProfileData() {
4361   int i = 0;
4362   int j = 0;
4363   unsigned long long totalgc = 0;
4364
4365 #ifndef BAMBOO_MEMPROF
4366   BAMBOO_PRINT(0xdddd);
4367 #endif
4368   // output task related info
4369   for(i= 0; i < gc_infoIndex; i++) {
4370     GCInfo * gcInfo = gc_infoArray[i];
4371 #ifdef BAMBOO_MEMPROF
4372     unsigned long long tmp=gcInfo->time[gcInfo->index-8]-gcInfo->time[0]; //0;
4373 #else
4374         unsigned long long tmp = 0;
4375     BAMBOO_PRINT(0xddda);
4376     for(j = 0; j < gcInfo->index - 7; j++) {
4377       BAMBOO_PRINT(gcInfo->time[j]);
4378       BAMBOO_PRINT(gcInfo->time[j]-tmp);
4379       BAMBOO_PRINT(0xdddb);
4380       tmp = gcInfo->time[j];
4381     }
4382     tmp = (tmp-gcInfo->time[0]);
4383     BAMBOO_PRINT_REG(tmp);
4384         BAMBOO_PRINT(0xdddc);
4385         BAMBOO_PRINT(gcInfo->time[gcInfo->index - 7]);
4386         BAMBOO_PRINT(gcInfo->time[gcInfo->index - 6]);
4387         BAMBOO_PRINT(gcInfo->time[gcInfo->index - 5]);
4388         BAMBOO_PRINT(gcInfo->time[gcInfo->index - 4]);
4389         BAMBOO_PRINT(gcInfo->time[gcInfo->index - 3]);
4390         BAMBOO_PRINT(gcInfo->time[gcInfo->index - 2]);
4391         BAMBOO_PRINT(gcInfo->time[gcInfo->index - 1]);
4392     BAMBOO_PRINT(0xddde);
4393 #endif
4394     totalgc += tmp;
4395   }
4396 #ifndef BAMBOO_MEMPROF
4397   BAMBOO_PRINT(0xdddf);
4398 #endif
4399   BAMBOO_PRINT_REG(totalgc);
4400
4401   if(gc_infoOverflow) {
4402     BAMBOO_PRINT(0xefee);
4403   }
4404
4405 #ifndef BAMBOO_MEMPROF
4406   BAMBOO_PRINT(0xeeee);
4407 #endif
4408 }
4409 #endif  // #ifdef GC_PROFILE
4410
4411 #endif