Robust/src/Runtime/bamboo/multicoregarbage.c

   1 #ifdef MULTICORE_GC
   2 #include "runtime.h"
   3 #include "multicoregarbage.h"
   4 #include "multicoreruntime.h"
   5 #include "runtime_arch.h"
   6 #include "SimpleHash.h"
   7 #include "GenericHashtable.h"
   8 #include "ObjectHash.h"
   9 #include "GCSharedHash.h"
  10
  11 extern int corenum;
  12 extern struct parameterwrapper ** objectqueues[][NUMCLASSES];
  13 extern int numqueues[][NUMCLASSES];
  14
  15 extern struct genhashtable * activetasks;
  16 extern struct parameterwrapper ** objectqueues[][NUMCLASSES];
  17 extern struct taskparamdescriptor *currtpd;
  18
  19 extern struct LockValue runtime_locks[MAXTASKPARAMS];
  20 extern int runtime_locklen;
  21
  22 #ifdef SMEMM
  23 extern unsigned int gcmem_mixed_threshold;
  24 extern unsigned int gcmem_mixed_usedmem;
  25 #endif
  26
  27 struct pointerblock {
  28   void * ptrs[NUMPTRS];
  29   struct pointerblock *next;
  30 };
  31
  32 struct pointerblock *gchead=NULL;
  33 int gcheadindex=0;
  34 struct pointerblock *gctail=NULL;
  35 int gctailindex=0;
  36 struct pointerblock *gctail2=NULL;
  37 int gctailindex2=0;
  38 struct pointerblock *gcspare=NULL;
  39
  40 #define NUMLOBJPTRS 20
  41
  42 struct lobjpointerblock {
  43   void * lobjs[NUMLOBJPTRS];
  44   //void * dsts[NUMLOBJPTRS];
  45   int lengths[NUMLOBJPTRS];
  46   //void * origs[NUMLOBJPTRS];
  47   int hosts[NUMLOBJPTRS];
  48   struct lobjpointerblock *next;
  49   struct lobjpointerblock *prev;
  50 };
  51
  52 struct lobjpointerblock *gclobjhead=NULL;
  53 int gclobjheadindex=0;
  54 struct lobjpointerblock *gclobjtail=NULL;
  55 int gclobjtailindex=0;
  56 struct lobjpointerblock *gclobjtail2=NULL;
  57 int gclobjtailindex2=0;
  58 struct lobjpointerblock *gclobjspare=NULL;
  59
  60 #ifdef GC_CACHE_ADAPT
  61 typedef struct gc_cache_revise_info {
  62   int orig_page_start_va;
  63   int orig_page_end_va;
  64   int orig_page_index;
  65   int to_page_start_va;
  66   int to_page_end_va;
  67   int to_page_index;
  68   int revised_sampling[NUMCORESACTIVE];
  69 } gc_cache_revise_info_t;
  70 gc_cache_revise_info_t gc_cache_revise_infomation;
  71 #endif// GC_CACHE_ADAPT
  72
  73 #ifdef GC_DEBUG
  74 // dump whole mem in blocks
  75 inline void dumpSMem() {
  76   int block = 0;
  77   int sblock = 0;
  78   int j = 0;
  79   int i = 0;
  80   int coren = 0;
  81   int x = 0;
  82   int y = 0;
  83   printf("(%x,%x) Dump shared mem: \n", udn_tile_coord_x(),
  84              udn_tile_coord_y());
  85   // reserved blocks for sblocktbl
  86   printf("(%x,%x) ++++ reserved sblocks ++++ \n", udn_tile_coord_x(),
  87              udn_tile_coord_y());
  88   for(i=BAMBOO_BASE_VA; i<gcbaseva; i+= 4*16) {
  89     printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
  90                    udn_tile_coord_x(), udn_tile_coord_y(),
  91            *((int *)(i)), *((int *)(i + 4)),
  92            *((int *)(i + 4*2)), *((int *)(i + 4*3)),
  93            *((int *)(i + 4*4)), *((int *)(i + 4*5)),
  94            *((int *)(i + 4*6)), *((int *)(i + 4*7)),
  95            *((int *)(i + 4*8)), *((int *)(i + 4*9)),
  96            *((int *)(i + 4*10)), *((int *)(i + 4*11)),
  97            *((int *)(i + 4*12)), *((int *)(i + 4*13)),
  98            *((int *)(i + 4*14)), *((int *)(i + 4*15)));
  99   }
 100   sblock = gcreservedsb;
 101   bool advanceblock = false;
 102   // remaining memory
 103   for(i=gcbaseva; i<gcbaseva+BAMBOO_SHARED_MEM_SIZE; i+=4*16) {
 104     advanceblock = false;
 105     // computing sblock # and block #, core coordinate (x,y) also
 106     if(j%((BAMBOO_SMEM_SIZE)/(4*16)) == 0) {
 107       // finished a sblock
 108       if(j < ((BAMBOO_LARGE_SMEM_BOUND)/(4*16))) {
 109                 if((j > 0) && (j%((BAMBOO_SMEM_SIZE_L)/(4*16)) == 0)) {
 110                   // finished a block
 111                   block++;
 112                   advanceblock = true;
 113                 }
 114       } else {
 115                 // finished a block
 116                 block++;
 117                 advanceblock = true;
 118       }
 119       // compute core #
 120       if(advanceblock) {
 121                 coren = gc_block2core[block%(NUMCORES4GC*2)];
 122       }
 123       // compute core coordinate
 124       BAMBOO_COORDS(coren, &x, &y);
 125       printf("(%x,%x) ==== %d, %d : core (%d,%d), saddr %x====\n",
 126                      udn_tile_coord_x(), udn_tile_coord_y(),
 127              block, sblock++, x, y,
 128              (sblock-1)*(BAMBOO_SMEM_SIZE)+gcbaseva);
 129     }
 130     j++;
 131     printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
 132                    udn_tile_coord_x(), udn_tile_coord_y(),
 133            *((int *)(i)), *((int *)(i + 4)),
 134            *((int *)(i + 4*2)), *((int *)(i + 4*3)),
 135            *((int *)(i + 4*4)), *((int *)(i + 4*5)),
 136            *((int *)(i + 4*6)), *((int *)(i + 4*7)),
 137            *((int *)(i + 4*8)), *((int *)(i + 4*9)),
 138            *((int *)(i + 4*10)), *((int *)(i + 4*11)),
 139            *((int *)(i + 4*12)), *((int *)(i + 4*13)),
 140            *((int *)(i + 4*14)), *((int *)(i + 4*15)));
 141   }
 142   printf("(%x,%x) \n", udn_tile_coord_x(), udn_tile_coord_y());
 143 }
 144 #endif
 145
 146 // should be invoked with interruption closed
 147 inline void gc_enqueue_I(void *ptr) {
 148 #ifdef DEBUG
 149   BAMBOO_DEBUGPRINT(0xe601);
 150   BAMBOO_DEBUGPRINT_REG(ptr);
 151 #endif
 152   if (gcheadindex==NUMPTRS) {
 153     struct pointerblock * tmp;
 154     if (gcspare!=NULL) {
 155       tmp=gcspare;
 156       gcspare=NULL;
 157     } else {
 158       tmp=RUNMALLOC_I(sizeof(struct pointerblock));
 159     }             // if (gcspare!=NULL)
 160     gchead->next=tmp;
 161     gchead=tmp;
 162     gcheadindex=0;
 163   } // if (gcheadindex==NUMPTRS)
 164   gchead->ptrs[gcheadindex++]=ptr;
 165 #ifdef DEBUG
 166   BAMBOO_DEBUGPRINT(0xe602);
 167 #endif
 168 } // void gc_enqueue_I(void *ptr)
 169
 170 // dequeue and destroy the queue
 171 inline void * gc_dequeue_I() {
 172   if (gctailindex==NUMPTRS) {
 173     struct pointerblock *tmp=gctail;
 174     gctail=gctail->next;
 175     gctailindex=0;
 176     if (gcspare!=NULL) {
 177       RUNFREE(tmp);
 178     } else {
 179       gcspare=tmp;
 180     }             // if (gcspare!=NULL)
 181   } // if (gctailindex==NUMPTRS)
 182   return gctail->ptrs[gctailindex++];
 183 } // void * gc_dequeue()
 184
 185 // dequeue and do not destroy the queue
 186 inline void * gc_dequeue2_I() {
 187   if (gctailindex2==NUMPTRS) {
 188     struct pointerblock *tmp=gctail2;
 189     gctail2=gctail2->next;
 190     gctailindex2=0;
 191   } // if (gctailindex2==NUMPTRS)
 192   return gctail2->ptrs[gctailindex2++];
 193 } // void * gc_dequeue2()
 194
 195 inline int gc_moreItems_I() {
 196   if ((gchead==gctail)&&(gctailindex==gcheadindex))
 197     return 0;
 198   return 1;
 199 } // int gc_moreItems()
 200
 201 inline int gc_moreItems2_I() {
 202   if ((gchead==gctail2)&&(gctailindex2==gcheadindex))
 203     return 0;
 204   return 1;
 205 } // int gc_moreItems2()
 206
 207 // should be invoked with interruption closed
 208 // enqueue a large obj: start addr & length
 209 inline void gc_lobjenqueue_I(void *ptr,
 210                              int length,
 211                              int host) {
 212 #ifdef DEBUG
 213   BAMBOO_DEBUGPRINT(0xe901);
 214 #endif
 215   if (gclobjheadindex==NUMLOBJPTRS) {
 216     struct lobjpointerblock * tmp;
 217     if (gclobjspare!=NULL) {
 218       tmp=gclobjspare;
 219       gclobjspare=NULL;
 220     } else {
 221       tmp=RUNMALLOC_I(sizeof(struct lobjpointerblock));
 222     }             // if (gclobjspare!=NULL)
 223     gclobjhead->next=tmp;
 224     tmp->prev = gclobjhead;
 225     gclobjhead=tmp;
 226     gclobjheadindex=0;
 227   } // if (gclobjheadindex==NUMLOBJPTRS)
 228   gclobjhead->lobjs[gclobjheadindex]=ptr;
 229   gclobjhead->lengths[gclobjheadindex]=length;
 230   gclobjhead->hosts[gclobjheadindex++]=host;
 231 #ifdef DEBUG
 232   BAMBOO_DEBUGPRINT_REG(gclobjhead->lobjs[gclobjheadindex-1]);
 233   BAMBOO_DEBUGPRINT_REG(gclobjhead->lengths[gclobjheadindex-1]);
 234   BAMBOO_DEBUGPRINT_REG(gclobjhead->hosts[gclobjheadindex-1]);
 235 #endif
 236 } // void gc_lobjenqueue_I(void *ptr...)
 237
 238 // dequeue and destroy the queue
 239 inline void * gc_lobjdequeue_I(int * length,
 240                                int * host) {
 241   if (gclobjtailindex==NUMLOBJPTRS) {
 242     struct lobjpointerblock *tmp=gclobjtail;
 243     gclobjtail=gclobjtail->next;
 244     gclobjtailindex=0;
 245     gclobjtail->prev = NULL;
 246     if (gclobjspare!=NULL) {
 247       RUNFREE(tmp);
 248     } else {
 249       gclobjspare=tmp;
 250       tmp->next = NULL;
 251       tmp->prev = NULL;
 252     }  // if (gclobjspare!=NULL)
 253   } // if (gclobjtailindex==NUMLOBJPTRS)
 254   if(length != NULL) {
 255     *length = gclobjtail->lengths[gclobjtailindex];
 256   }
 257   if(host != NULL) {
 258     *host = (int)(gclobjtail->hosts[gclobjtailindex]);
 259   }
 260   return gclobjtail->lobjs[gclobjtailindex++];
 261 } // void * gc_lobjdequeue()
 262
 263 inline int gc_lobjmoreItems_I() {
 264   if ((gclobjhead==gclobjtail)&&(gclobjtailindex==gclobjheadindex))
 265     return 0;
 266   return 1;
 267 } // int gc_lobjmoreItems()
 268
 269 // dequeue and don't destroy the queue
 270 inline void gc_lobjdequeue2_I() {
 271   if (gclobjtailindex2==NUMLOBJPTRS) {
 272     gclobjtail2=gclobjtail2->next;
 273     gclobjtailindex2=1;
 274   } else {
 275     gclobjtailindex2++;
 276   }      // if (gclobjtailindex2==NUMLOBJPTRS)
 277 } // void * gc_lobjdequeue2()
 278
 279 inline int gc_lobjmoreItems2_I() {
 280   if ((gclobjhead==gclobjtail2)&&(gclobjtailindex2==gclobjheadindex))
 281     return 0;
 282   return 1;
 283 } // int gc_lobjmoreItems2()
 284
 285 // 'reversly' dequeue and don't destroy the queue
 286 inline void gc_lobjdequeue3_I() {
 287   if (gclobjtailindex2==0) {
 288     gclobjtail2=gclobjtail2->prev;
 289     gclobjtailindex2=NUMLOBJPTRS-1;
 290   } else {
 291     gclobjtailindex2--;
 292   }      // if (gclobjtailindex2==NUMLOBJPTRS)
 293 } // void * gc_lobjdequeue3()
 294
 295 inline int gc_lobjmoreItems3_I() {
 296   if ((gclobjtail==gclobjtail2)&&(gclobjtailindex2==gclobjtailindex))
 297     return 0;
 298   return 1;
 299 } // int gc_lobjmoreItems3()
 300
 301 inline void gc_lobjqueueinit4_I() {
 302   gclobjtail2 = gclobjtail;
 303   gclobjtailindex2 = gclobjtailindex;
 304 } // void gc_lobjqueueinit2()
 305
 306 inline void * gc_lobjdequeue4_I(int * length,
 307                                 int * host) {
 308   if (gclobjtailindex2==NUMLOBJPTRS) {
 309     gclobjtail2=gclobjtail2->next;
 310     gclobjtailindex2=0;
 311   } // if (gclobjtailindex==NUMLOBJPTRS)
 312   if(length != NULL) {
 313     *length = gclobjtail2->lengths[gclobjtailindex2];
 314   }
 315   if(host != NULL) {
 316     *host = (int)(gclobjtail2->hosts[gclobjtailindex2]);
 317   }
 318   return gclobjtail2->lobjs[gclobjtailindex2++];
 319 } // void * gc_lobjdequeue()
 320
 321 inline int gc_lobjmoreItems4_I() {
 322   if ((gclobjhead==gclobjtail2)&&(gclobjtailindex2==gclobjheadindex))
 323     return 0;
 324   return 1;
 325 } // int gc_lobjmoreItems(
 326
 327 INTPTR gccurr_heapbound = 0;
 328
 329 inline void gettype_size(void * ptr,
 330                          int * ttype,
 331                          int * tsize) {
 332   int type = ((int *)ptr)[0];
 333   int size = 0;
 334   if(type < NUMCLASSES) {
 335     // a normal object
 336     size = classsize[type];
 337   } else {
 338     // an array
 339     struct ArrayObject *ao=(struct ArrayObject *)ptr;
 340     int elementsize=classsize[type];
 341     int length=ao->___length___;
 342     size=sizeof(struct ArrayObject)+length*elementsize;
 343   }       // if(type < NUMCLASSES)
 344   *ttype = type;
 345   *tsize = size;
 346 }
 347
 348 inline bool isLarge(void * ptr,
 349                     int * ttype,
 350                     int * tsize) {
 351 #ifdef DEBUG
 352   BAMBOO_DEBUGPRINT(0xe701);
 353   BAMBOO_DEBUGPRINT_REG(ptr);
 354 #endif
 355   // check if a pointer is referring to a large object
 356   gettype_size(ptr, ttype, tsize);
 357 #ifdef DEBUG
 358   BAMBOO_DEBUGPRINT(*tsize);
 359 #endif
 360   int bound = (BAMBOO_SMEM_SIZE);
 361   if(((int)ptr-gcbaseva) < (BAMBOO_LARGE_SMEM_BOUND)) {
 362     bound = (BAMBOO_SMEM_SIZE_L);
 363   }
 364   if((((int)ptr-gcbaseva)%(bound))==0) {
 365     // ptr is a start of a block
 366 #ifdef DEBUG
 367     BAMBOO_DEBUGPRINT(0xe702);
 368     BAMBOO_DEBUGPRINT(1);
 369 #endif
 370     return true;
 371   }
 372   if((bound-(((int)ptr-gcbaseva)%bound)) < (*tsize)) {
 373     // it acrosses the boundary of current block
 374 #ifdef DEBUG
 375     BAMBOO_DEBUGPRINT(0xe703);
 376     BAMBOO_DEBUGPRINT(1);
 377 #endif
 378     return true;
 379   }
 380 #ifdef DEBUG
 381   BAMBOO_DEBUGPRINT(0);
 382 #endif
 383   return false;
 384 } // bool isLarge(void * ptr, int * ttype, int * tsize)
 385
 386 inline int hostcore(void * ptr) {
 387   // check the host core of ptr
 388   int host = 0;
 389   RESIDECORE(ptr, &host);
 390 #ifdef DEBUG
 391   BAMBOO_DEBUGPRINT(0xedd0);
 392   BAMBOO_DEBUGPRINT_REG(ptr);
 393   BAMBOO_DEBUGPRINT_REG(host);
 394 #endif
 395   return host;
 396 } // int hostcore(void * ptr)
 397
 398 inline void cpu2coords(int coren,
 399                            int * x,
 400                                            int * y) {
 401   *x = bamboo_cpu2coords[2*coren];
 402   *y = bamboo_cpu2coords[2*coren+1];
 403 } // void cpu2coords(...)
 404
 405 inline bool isLocal(void * ptr) {
 406   // check if a pointer is in shared heap on this core
 407   return hostcore(ptr) == BAMBOO_NUM_OF_CORE;
 408 } // bool isLocal(void * ptr)
 409
 410 inline bool gc_checkCoreStatus_I() {
 411   bool allStall = true;
 412   for(int i = 0; i < NUMCORES4GC; ++i) {
 413     if(gccorestatus[i] != 0) {
 414       allStall = false;
 415       break;
 416     }             // if(gccorestatus[i] != 0)
 417   }       // for(i = 0; i < NUMCORES4GC; ++i)
 418   return allStall;
 419 }
 420
 421 inline bool gc_checkAllCoreStatus_I() {
 422   bool allStall = true;
 423   for(int i = 0; i < NUMCORESACTIVE; ++i) {
 424     if(gccorestatus[i] != 0) {
 425       allStall = false;
 426       break;
 427     }             // if(gccorestatus[i] != 0)
 428   }       // for(i = 0; i < NUMCORESACTIVE; ++i)
 429   return allStall;
 430 }
 431
 432 inline void checkMarkStatue() {
 433 #ifdef DEBUG
 434   BAMBOO_DEBUGPRINT(0xee01);
 435 #endif
 436   int i;
 437   if((!waitconfirm) ||
 438      (waitconfirm && (numconfirm == 0))) {
 439 #ifdef DEBUG
 440     BAMBOO_DEBUGPRINT(0xee02);
 441 #endif
 442         int entry_index = 0;
 443         if(waitconfirm) {
 444           // phase 2
 445           entry_index = (gcnumsrobjs_index == 0) ? 1 : 0;
 446         } else {
 447           // phase 1
 448           entry_index = gcnumsrobjs_index;
 449         }
 450     BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
 451     gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
 452     gcnumsendobjs[entry_index][BAMBOO_NUM_OF_CORE] = gcself_numsendobjs;
 453     gcnumreceiveobjs[entry_index][BAMBOO_NUM_OF_CORE] = gcself_numreceiveobjs;
 454     // check the status of all cores
 455     bool allStall = gc_checkAllCoreStatus_I();
 456 #ifdef DEBUG
 457     BAMBOO_DEBUGPRINT(0xee03);
 458 #endif
 459     if(allStall) {
 460 #ifdef DEBUG
 461       BAMBOO_DEBUGPRINT(0xee04);
 462 #endif
 463       // ask for confirm
 464       if(!waitconfirm) {
 465 #ifdef DEBUG
 466                 BAMBOO_DEBUGPRINT(0xee05);
 467 #endif
 468                 // the first time found all cores stall
 469                 // send out status confirm msg to all other cores
 470                 // reset the corestatus array too
 471                 gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
 472                 waitconfirm = true;
 473                 numconfirm = NUMCORESACTIVE - 1;
 474                 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 475                 for(i = 1; i < NUMCORESACTIVE; ++i) {
 476                   gccorestatus[i] = 1;
 477                   // send mark phase finish confirm request msg to core i
 478                   send_msg_1(i, GCMARKCONFIRM, false);
 479                 }  // for(i = 1; i < NUMCORESACTIVE; ++i)
 480       } else {
 481                 // Phase 2
 482                 // check if the sum of send objs and receive obj are the same
 483                 // yes->check if the info is the latest; no->go on executing
 484                 int sumsendobj = 0;
 485                 for(i = 0; i < NUMCORESACTIVE; ++i) {
 486                   sumsendobj += gcnumsendobjs[gcnumsrobjs_index][i];
 487                 }  // for(i = 0; i < NUMCORESACTIVE; ++i)
 488 #ifdef DEBUG
 489                 BAMBOO_DEBUGPRINT(0xee06);
 490                 BAMBOO_DEBUGPRINT_REG(sumsendobj);
 491 #endif
 492                 for(i = 0; i < NUMCORESACTIVE; ++i) {
 493                   sumsendobj -= gcnumreceiveobjs[gcnumsrobjs_index][i];
 494                 }  // for(i = 0; i < NUMCORESACTIVE; ++i)
 495 #ifdef DEBUG
 496                 BAMBOO_DEBUGPRINT(0xee07);
 497                 BAMBOO_DEBUGPRINT_REG(sumsendobj);
 498 #endif
 499                 if(0 == sumsendobj) {
 500                   // Check if there are changes of the numsendobjs or numreceiveobjs on
 501                   // each core
 502                   bool ischanged = false;
 503                   for(i = 0; i < NUMCORESACTIVE; ++i) {
 504                         if((gcnumsendobjs[0][i] != gcnumsendobjs[1][i]) ||
 505                                 (gcnumreceiveobjs[0][i] != gcnumreceiveobjs[1][i]) ) {
 506                           ischanged = true;
 507                           break;
 508                         }
 509                   }  // for(i = 0; i < NUMCORESACTIVE; ++i)
 510 #ifdef DEBUG
 511                   BAMBOO_DEBUGPRINT(0xee08);
 512                   BAMBOO_DEBUGPRINT_REG(ischanged);
 513 #endif
 514                   if(!ischanged) {
 515 #ifdef DEBUG
 516                         BAMBOO_DEBUGPRINT(0xee09);
 517 #endif
 518                         // all the core status info are the latest
 519                         // stop mark phase
 520                         gcphase = COMPACTPHASE;
 521                         // restore the gcstatus for all cores
 522                         for(i = 0; i < NUMCORESACTIVE; ++i) {
 523                           gccorestatus[i] = 1;
 524                         }  // for(i = 0; i < NUMCORESACTIVE; ++i)
 525                   } else {
 526                         waitconfirm = false;
 527                         gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
 528                   } // if(!ischanged)
 529                 } else {
 530                   // There were changes between phase 1 and phase 2, can not decide
 531                   // whether the mark phase has been finished
 532                   waitconfirm = false;
 533                   // As it fails in phase 2, flip the entries
 534                   gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
 535                 } // if(0 == sumsendobj) else ...
 536                 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 537       } // if(!gcwaitconfirm) else()
 538     } else {
 539       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 540     } // if(allStall)
 541   }  // if((!waitconfirm)...
 542 #ifdef DEBUG
 543   BAMBOO_DEBUGPRINT(0xee0a);
 544 #endif
 545 } // void checkMarkStatue()
 546 /*
 547 inline bool preGC() {
 548   // preparation for gc
 549   // make sure to clear all incoming msgs espacially transfer obj msgs
 550 #ifdef DEBUG
 551   BAMBOO_DEBUGPRINT(0xec01);
 552 #endif
 553   int i;
 554   if((!waitconfirm) ||
 555      (waitconfirm && (numconfirm == 0))) {
 556     // send out status confirm msgs to all cores to check if there are
 557     // transfer obj msgs on-the-fly
 558     waitconfirm = true;
 559     numconfirm = NUMCORESACTIVE - 1;
 560     for(i = 1; i < NUMCORESACTIVE; ++i) {
 561       corestatus[i] = 1;
 562       // send status confirm msg to core i
 563       send_msg_1(i, STATUSCONFIRM, false);
 564     }   // for(i = 1; i < NUMCORESACTIVE; ++i)
 565
 566 #ifdef DEBUG
 567     BAMBOO_DEBUGPRINT(0xec02);
 568 #endif
 569     while(true) {
 570       if(numconfirm == 0) {
 571                 break;
 572       }
 573     }   // wait for confirmations
 574     waitconfirm = false;
 575     numconfirm = 0;
 576 #ifdef DEBUG
 577     BAMBOO_DEBUGPRINT(0xec03);
 578 #endif
 579     numsendobjs[BAMBOO_NUM_OF_CORE] = self_numsendobjs;
 580     numreceiveobjs[BAMBOO_NUM_OF_CORE] = self_numreceiveobjs;
 581     int sumsendobj = 0;
 582 #ifdef DEBUG
 583     BAMBOO_DEBUGPRINT(0xec04);
 584 #endif
 585     for(i = 0; i < NUMCORESACTIVE; ++i) {
 586       sumsendobj += numsendobjs[i];
 587 #ifdef DEBUG
 588       BAMBOO_DEBUGPRINT(0xf000 + numsendobjs[i]);
 589 #endif
 590     }             // for(i = 1; i < NUMCORESACTIVE; ++i)
 591 #ifdef DEBUG
 592     BAMBOO_DEBUGPRINT(0xec05);
 593     BAMBOO_DEBUGPRINT_REG(sumsendobj);
 594 #endif
 595     for(i = 0; i < NUMCORESACTIVE; ++i) {
 596       sumsendobj -= numreceiveobjs[i];
 597 #ifdef DEBUG
 598       BAMBOO_DEBUGPRINT(0xf000 + numreceiveobjs[i]);
 599 #endif
 600     }             // for(i = 1; i < NUMCORESACTIVE; ++i)
 601 #ifdef DEBUG
 602     BAMBOO_DEBUGPRINT(0xec06);
 603     BAMBOO_DEBUGPRINT_REG(sumsendobj);
 604 #endif
 605     if(0 == sumsendobj) {
 606       return true;
 607     } else {
 608       // still have some transfer obj msgs on-the-fly, can not start gc
 609       return false;
 610     }  // if(0 == sumsendobj)
 611   } else {
 612 #ifdef DEBUG
 613     BAMBOO_DEBUGPRINT(0xec07);
 614 #endif
 615     // previously asked for status confirmation and do not have all the
 616     // confirmations yet, can not start gc
 617     return false;
 618   }       // if((!waitconfirm) ||
 619 } // bool preGC()*/
 620
 621 inline void initGC() {
 622   int i;
 623   if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
 624     for(i = 0; i < NUMCORES4GC; ++i) {
 625       gccorestatus[i] = 1;
 626       gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
 627       gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
 628       gcloads[i] = 0;
 629       gcrequiredmems[i] = 0;
 630       gcfilledblocks[i] = 0;
 631       gcstopblock[i] = 0;
 632     } // for(i = 0; i < NUMCORES4GC; ++i)
 633     for(i = NUMCORES4GC; i < NUMCORESACTIVE; ++i) {
 634       gccorestatus[i] = 1;
 635       gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
 636       gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
 637     }
 638     gcheaptop = 0;
 639     gctopcore = 0;
 640     gctopblock = 0;
 641   } // if(STARTUPCORE == BAMBOO_NUM_OF_CORE)
 642   gcself_numsendobjs = 0;
 643   gcself_numreceiveobjs = 0;
 644   gcmarkedptrbound = 0;
 645   gcobj2map = 0;
 646   gcmappedobj = 0;
 647   //gcismapped = false;
 648   gcnumlobjs = 0;
 649   gcmovestartaddr = 0;
 650   gctomove = false;
 651   gcblock2fill = 0;
 652   gcmovepending = 0;
 653   gccurr_heaptop = 0;
 654   gcdstcore = 0;
 655
 656   // initialize queue
 657   if (gchead==NULL) {
 658     gcheadindex=gctailindex=gctailindex2 = 0;
 659     gchead=gctail=gctail2=RUNMALLOC(sizeof(struct pointerblock));
 660   } else {
 661     gctailindex = gctailindex2 = gcheadindex;
 662     gctail = gctail2 = gchead;
 663   }
 664
 665   // initialize the large obj queues
 666   if (gclobjhead==NULL) {
 667     gclobjheadindex=0;
 668     gclobjtailindex=0;
 669     gclobjtailindex2 = 0;
 670     gclobjhead=gclobjtail=gclobjtail2=
 671           RUNMALLOC(sizeof(struct lobjpointerblock));
 672   } else {
 673     gclobjtailindex = gclobjtailindex2 = gclobjheadindex = 0;
 674     gclobjtail = gclobjtail2 = gclobjhead;
 675   }
 676   gclobjhead->next = gclobjhead->prev = NULL;
 677
 678 #ifdef LOCALHASHTBL_TEST
 679   freeRuntimeHash(gcpointertbl);
 680   gcpointertbl = allocateRuntimeHash(20);
 681 #else
 682   mgchashreset(gcpointertbl);
 683 #endif
 684   //gcpointertbl = allocateMGCHash(20);
 685
 686   freeMGCHash(gcforwardobjtbl);
 687   gcforwardobjtbl = allocateMGCHash(20, 3);
 688
 689   // initialize the mapping info related structures
 690   if((BAMBOO_NUM_OF_CORE < NUMCORES4GC) && (gcsharedptbl != NULL)) {
 691         // Never free the shared hash table, just reset it
 692         /*freeGCSharedHash(gcsharedptbl);
 693         gcsharedptbl = allocateGCSharedHash(20);*/
 694         mgcsharedhashReset(gcsharedptbl);
 695   }
 696   // Zero out the remaining bamboo_cur_msp
 697   // Only zero out the first 4 bytes of the remaining memory
 698   /*if((bamboo_cur_msp != 0)
 699           && (bamboo_smem_zero_top == bamboo_cur_msp)
 700           && (bamboo_smem_size > 0)) {
 701         *((int *)bamboo_cur_msp) = 0;
 702   }*/
 703 #ifdef GC_PROFILE
 704   gc_num_livespace = 0;
 705   gc_num_freespace = 0;
 706   gc_num_lobj = 0;
 707   gc_num_lobjspace = 0;
 708   gc_num_liveobj = 0;
 709   gc_num_forwardobj = 0;
 710   gc_num_profiles = NUMCORESACTIVE - 1;
 711 #endif
 712 } // void initGC()
 713
 714 // compute load balance for all cores
 715 inline int loadbalance(int * heaptop) {
 716   // compute load balance
 717   int i;
 718
 719   // get the total loads
 720   int tloads = gcloads[STARTUPCORE];
 721   for(i = 1; i < NUMCORES4GC; i++) {
 722     tloads += gcloads[i];
 723   }
 724   *heaptop = gcbaseva + tloads;
 725
 726 #ifdef DEBUG
 727   BAMBOO_DEBUGPRINT(0xdddd);
 728   BAMBOO_DEBUGPRINT_REG(tloads);
 729   BAMBOO_DEBUGPRINT_REG(*heaptop);
 730 #endif
 731   int b = 0;
 732   BLOCKINDEX(*heaptop, &b);
 733   int numbpc = b / NUMCORES4GC;       // num of blocks per core
 734 #ifdef DEBUG
 735   BAMBOO_DEBUGPRINT_REG(b);
 736   BAMBOO_DEBUGPRINT_REG(numbpc);
 737 #endif
 738   gctopblock = b;
 739   RESIDECORE(heaptop, &gctopcore);
 740 #ifdef DEBUG
 741   BAMBOO_DEBUGPRINT_REG(gctopcore);
 742 #endif
 743   return numbpc;
 744 } // void loadbalance(int * heaptop)
 745
 746 inline bool cacheLObjs() {
 747   // check the total mem size need for large objs
 748   unsigned long long sumsize = 0;
 749   int size = 0;
 750 #ifdef DEBUG
 751   BAMBOO_DEBUGPRINT(0xe801);
 752 #endif
 753   gclobjtail2 = gclobjtail;
 754   gclobjtailindex2 = gclobjtailindex;
 755   int tmp_lobj = 0;
 756   int tmp_len = 0;
 757   int tmp_host = 0;
 758   // compute total mem size required and sort the lobjs in ascending order
 759   while(gc_lobjmoreItems2_I()) {
 760     gc_lobjdequeue2_I();
 761     tmp_lobj = gclobjtail2->lobjs[gclobjtailindex2-1];
 762     tmp_host = gclobjtail2->hosts[gclobjtailindex2-1];
 763     tmp_len = gclobjtail2->lengths[gclobjtailindex2 - 1];
 764     sumsize += tmp_len;
 765 #ifdef GC_PROFILE
 766         gc_num_lobj++;
 767 #endif
 768 #ifdef DEBUG
 769     BAMBOO_DEBUGPRINT_REG(gclobjtail2->lobjs[gclobjtailindex2-1]);
 770     BAMBOO_DEBUGPRINT_REG(tmp_len);
 771     BAMBOO_DEBUGPRINT_REG(sumsize);
 772 #endif
 773     int i = gclobjtailindex2-1;
 774     struct lobjpointerblock * tmp_block = gclobjtail2;
 775     // find the place to insert
 776     while(true) {
 777       if(i == 0) {
 778                 if(tmp_block->prev == NULL) {
 779                   break;
 780                 }
 781                 if(tmp_block->prev->lobjs[NUMLOBJPTRS-1] > tmp_lobj) {
 782                   tmp_block->lobjs[i] = tmp_block->prev->lobjs[NUMLOBJPTRS-1];
 783                   tmp_block->lengths[i] = tmp_block->prev->lengths[NUMLOBJPTRS-1];
 784                   tmp_block->hosts[i] = tmp_block->prev->hosts[NUMLOBJPTRS-1];
 785                   tmp_block = tmp_block->prev;
 786                   i = NUMLOBJPTRS-1;
 787                 } else {
 788                   break;
 789                 }  // if(tmp_block->prev->lobjs[NUMLOBJPTRS-1] < tmp_lobj)
 790           } else {
 791                 if(tmp_block->lobjs[i-1] > tmp_lobj) {
 792                   tmp_block->lobjs[i] = tmp_block->lobjs[i-1];
 793                   tmp_block->lengths[i] = tmp_block->lengths[i-1];
 794                   tmp_block->hosts[i] = tmp_block->hosts[i-1];
 795                   i--;
 796                 } else {
 797                   break;
 798                 }  // if(tmp_block->lobjs[i-1] < tmp_lobj)
 799       }  // if(i ==0 ) else {}
 800     }   // while(true)
 801     // insert it
 802     if(i != gclobjtailindex2 - 1) {
 803       tmp_block->lobjs[i] = tmp_lobj;
 804       tmp_block->lengths[i] = tmp_len;
 805       tmp_block->hosts[i] = tmp_host;
 806     }
 807   }  // while(gc_lobjmoreItems2())
 808
 809 #ifdef GC_PROFILE
 810   gc_num_lobjspace = sumsize;
 811 #endif
 812   // check if there are enough space to cache these large objs
 813   INTPTR dst = gcbaseva + (BAMBOO_SHARED_MEM_SIZE) -sumsize;
 814   if((unsigned long long)gcheaptop > (unsigned long long)dst) {
 815     // do not have enough room to cache large objs
 816 #ifdef DEBUG
 817     BAMBOO_DEBUGPRINT(0xe802);
 818     BAMBOO_DEBUGPRINT_REG(dst);
 819     BAMBOO_DEBUGPRINT_REG(gcheaptop);
 820         BAMBOO_DEBUGPRINT_REG(sumsize);
 821 #endif
 822     return false;
 823   }
 824 #ifdef DEBUG
 825   BAMBOO_DEBUGPRINT(0xe803);
 826   BAMBOO_DEBUGPRINT_REG(dst);
 827   BAMBOO_DEBUGPRINT_REG(gcheaptop);
 828 #endif
 829
 830   gcheaptop = dst; // Note: record the start of cached lobjs with gcheaptop
 831   // cache the largeObjs to the top of the shared heap
 832   //gclobjtail2 = gclobjtail;
 833   //gclobjtailindex2 = gclobjtailindex;
 834   dst = gcbaseva + (BAMBOO_SHARED_MEM_SIZE);
 835   while(gc_lobjmoreItems3_I()) {
 836     gc_lobjdequeue3_I();
 837     size = gclobjtail2->lengths[gclobjtailindex2];
 838     // set the mark field to , indicating that this obj has been moved
 839     // and need to be flushed
 840     ((int *)(gclobjtail2->lobjs[gclobjtailindex2]))[6] = COMPACTED;
 841     dst -= size;
 842     if((int)dst < (int)(gclobjtail2->lobjs[gclobjtailindex2])+size) {
 843       memmove(dst, gclobjtail2->lobjs[gclobjtailindex2], size);
 844     } else {
 845       //BAMBOO_WRITE_HINT_CACHE(dst, size);
 846       memcpy(dst, gclobjtail2->lobjs[gclobjtailindex2], size);
 847     }
 848 #ifdef DEBUG
 849     BAMBOO_DEBUGPRINT(0x804);
 850     BAMBOO_DEBUGPRINT_REG(gclobjtail2->lobjs[gclobjtailindex2]);
 851     BAMBOO_DEBUGPRINT(dst);
 852     BAMBOO_DEBUGPRINT_REG(size);
 853     BAMBOO_DEBUGPRINT_REG(*((int*)gclobjtail2->lobjs[gclobjtailindex2]));
 854     BAMBOO_DEBUGPRINT_REG(*((int*)(dst)));
 855 #endif
 856   }
 857   return true;
 858 } // void cacheLObjs()
 859
 860 // update the bmmboo_smemtbl to record current shared mem usage
 861 void updateSmemTbl(int coren,
 862                    int localtop) {
 863   int ltopcore = 0;
 864   int bound = BAMBOO_SMEM_SIZE_L;
 865   BLOCKINDEX(localtop, &ltopcore);
 866   if(localtop >= (gcbaseva+(BAMBOO_LARGE_SMEM_BOUND))) {
 867     bound = BAMBOO_SMEM_SIZE;
 868   }
 869   int load = (localtop-gcbaseva)%bound;
 870   int i = 0;
 871   int j = 0;
 872   int toset = 0;
 873   do {
 874     toset = gc_core2block[2*coren+i]+(NUMCORES4GC*2)*j;
 875     if(toset < ltopcore) {
 876       bamboo_smemtbl[toset]=
 877         (toset<NUMCORES4GC) ? BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
 878 #ifdef SMEMM
 879           gcmem_mixed_usedmem += bamboo_smemtbl[toset];
 880 #endif
 881     } else if(toset == ltopcore) {
 882       bamboo_smemtbl[toset] = load;
 883 #ifdef SMEMM
 884           gcmem_mixed_usedmem += bamboo_smemtbl[toset];
 885 #endif
 886       break;
 887     } else {
 888       break;
 889     }
 890     i++;
 891     if(i == 2) {
 892       i = 0;
 893       j++;
 894     }
 895   } while(true);
 896 } // void updateSmemTbl(int, int)
 897
 898 inline void moveLObjs() {
 899 #ifdef DEBUG
 900   BAMBOO_DEBUGPRINT(0xea01);
 901 #endif
 902 #ifdef SMEMM
 903   // update the gcmem_mixed_usedmem
 904   gcmem_mixed_usedmem = 0;
 905 #endif
 906   // zero out the smemtbl
 907   BAMBOO_MEMSET_WH(bamboo_smemtbl, 0, sizeof(int)*gcnumblock);
 908   // find current heap top
 909   // flush all gcloads to indicate the real heap top on one core
 910   // previous it represents the next available ptr on a core
 911   if((gcloads[0] > (gcbaseva+(BAMBOO_SMEM_SIZE_L)))
 912      && ((gcloads[0]%(BAMBOO_SMEM_SIZE)) == 0)) {
 913     // edge of a block, check if this is exactly the heaptop
 914     BASEPTR(0, gcfilledblocks[0]-1, &(gcloads[0]));
 915     gcloads[0]+=(gcfilledblocks[0]>1 ?
 916                  (BAMBOO_SMEM_SIZE) : (BAMBOO_SMEM_SIZE_L));
 917   }
 918   updateSmemTbl(0, gcloads[0]);
 919 #ifdef DEBUG
 920   BAMBOO_DEBUGPRINT(0xea02);
 921   BAMBOO_DEBUGPRINT_REG(gcloads[0]);
 922   BAMBOO_DEBUGPRINT_REG(bamboo_smemtbl[0]);
 923 #endif
 924   for(int i = 1; i < NUMCORES4GC; i++) {
 925     int tmptop = 0;
 926 #ifdef DEBUG
 927     BAMBOO_DEBUGPRINT(0xf000+i);
 928     BAMBOO_DEBUGPRINT_REG(gcloads[i]);
 929     BAMBOO_DEBUGPRINT_REG(gcfilledblocks[i]);
 930 #endif
 931     if((gcfilledblocks[i] > 0)
 932        && ((gcloads[i] % (BAMBOO_SMEM_SIZE)) == 0)) {
 933       // edge of a block, check if this is exactly the heaptop
 934       BASEPTR(i, gcfilledblocks[i]-1, &gcloads[i]);
 935       gcloads[i] +=
 936                 (gcfilledblocks[i]>1 ? (BAMBOO_SMEM_SIZE) : (BAMBOO_SMEM_SIZE_L));
 937       tmptop = gcloads[i];
 938     }
 939     updateSmemTbl(i, gcloads[i]);
 940 #ifdef DEBUG
 941     BAMBOO_DEBUGPRINT_REG(gcloads[i]);
 942 #endif
 943   } // for(int i = 1; i < NUMCORES4GC; i++) {
 944
 945   // find current heap top
 946   // TODO
 947   // a bug here: when using local allocation, directly move large objects
 948   // to the highest free chunk might not be memory efficient
 949   int tmpheaptop = 0;
 950   int size = 0;
 951   int bound = 0;
 952   int i = 0;
 953   for(i = gcnumblock-1; i >= 0; i--) {
 954     if(bamboo_smemtbl[i] > 0) {
 955       break;
 956     }
 957   }
 958   if(i == -1) {
 959     tmpheaptop = gcbaseva;
 960   } else {
 961     tmpheaptop = gcbaseva+bamboo_smemtbl[i]+((i<NUMCORES4GC) ?
 962                 (BAMBOO_SMEM_SIZE_L*i) :
 963         (BAMBOO_SMEM_SIZE*(i-NUMCORES4GC)+BAMBOO_LARGE_SMEM_BOUND));
 964   }
 965
 966   // move large objs from gcheaptop to tmpheaptop
 967   // write the header first
 968   unsigned int tomove = gcbaseva + (BAMBOO_SHARED_MEM_SIZE) -gcheaptop;
 969 #ifdef SMEMM
 970   gcmem_mixed_usedmem += tomove;
 971 #endif
 972 #ifdef DEBUG
 973   BAMBOO_DEBUGPRINT(0xea03);
 974   BAMBOO_DEBUGPRINT_REG(tomove);
 975   BAMBOO_DEBUGPRINT_REG(tmpheaptop);
 976   BAMBOO_DEBUGPRINT_REG(gcheaptop);
 977 #endif
 978   // flush the sbstartbl
 979   BAMBOO_MEMSET_WH(&(gcsbstarttbl[gcreservedsb]), '\0',
 980           (BAMBOO_SHARED_MEM_SIZE/BAMBOO_SMEM_SIZE-gcreservedsb)*sizeof(INTPTR));
 981   if(tomove == 0) {
 982     gcheaptop = tmpheaptop;
 983   } else {
 984     // check how many blocks it acrosses
 985     int remain = tmpheaptop-gcbaseva;
 986     int sb = remain/(BAMBOO_SMEM_SIZE) + gcreservedsb;//number of the sblock
 987     int b = 0;  // number of the block
 988     BLOCKINDEX(tmpheaptop, &b);
 989     // check the remaining space in this block
 990     bound = (BAMBOO_SMEM_SIZE);
 991     if(remain < (BAMBOO_LARGE_SMEM_BOUND)) {
 992       bound = (BAMBOO_SMEM_SIZE_L);
 993     }
 994     remain = bound - remain%bound;
 995
 996 #ifdef DEBUG
 997     BAMBOO_DEBUGPRINT(0xea04);
 998 #endif
 999     size = 0;
1000     int isize = 0;
1001     int host = 0;
1002     int ptr = 0;
1003     int base = tmpheaptop;
1004     int cpysize = 0;
1005     remain -= BAMBOO_CACHE_LINE_SIZE;
1006     tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
1007     gc_lobjqueueinit4_I();
1008     while(gc_lobjmoreItems4_I()) {
1009       ptr = (int)(gc_lobjdequeue4_I(&size, &host));
1010       ALIGNSIZE(size, &isize);
1011       if(remain < isize) {
1012                 // this object acrosses blocks
1013                 if(cpysize > 0) {
1014                   // close current block, fill its header
1015                   BAMBOO_MEMSET_WH(base, '\0', BAMBOO_CACHE_LINE_SIZE);
1016                   *((int*)base) = cpysize + BAMBOO_CACHE_LINE_SIZE;
1017                   bamboo_smemtbl[b]+=BAMBOO_CACHE_LINE_SIZE;//add the size of header
1018                   cpysize = 0;
1019                   base = tmpheaptop;
1020                   if(remain == 0) {
1021                         remain = ((tmpheaptop-gcbaseva)<(BAMBOO_LARGE_SMEM_BOUND)) ?
1022                                          BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
1023                   }
1024                   remain -= BAMBOO_CACHE_LINE_SIZE;
1025                   tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
1026                   BLOCKINDEX(tmpheaptop, &b);
1027                   sb = (tmpheaptop-gcbaseva)/(BAMBOO_SMEM_SIZE) + gcreservedsb;
1028                 }  // if(cpysize > 0)
1029
1030                 // move the large obj
1031                 if((int)gcheaptop < (int)(tmpheaptop)+size) {
1032                   memmove(tmpheaptop, gcheaptop, size);
1033                 } else {
1034                   //BAMBOO_WRITE_HINT_CACHE(tmpheaptop, size);
1035                   memcpy(tmpheaptop, gcheaptop, size);
1036                 }
1037                 // fill the remaining space with -2 padding
1038                 BAMBOO_MEMSET_WH(tmpheaptop+size, -2, isize-size);
1039 #ifdef DEBUG
1040                 BAMBOO_DEBUGPRINT(0xea05);
1041                 BAMBOO_DEBUGPRINT_REG(gcheaptop);
1042                 BAMBOO_DEBUGPRINT_REG(tmpheaptop);
1043                 BAMBOO_DEBUGPRINT_REG(size);
1044                 BAMBOO_DEBUGPRINT_REG(isize);
1045                 BAMBOO_DEBUGPRINT_REG(base);
1046 #endif
1047                 gcheaptop += size;
1048                 // cache the mapping info anyway
1049                 //if(ptr != tmpheaptop) {
1050                 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1051 #ifdef LOCALHASHTBL_TEST
1052                 RuntimeHashadd_I(gcpointertbl, ptr, tmpheaptop);
1053 #else
1054                 mgchashInsert_I(gcpointertbl, ptr, tmpheaptop);
1055 #endif
1056                 //MGCHashadd_I(gcpointertbl, ptr, tmpheaptop);
1057                 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1058                 //}
1059 #ifdef DEBUG
1060                 BAMBOO_DEBUGPRINT(0xcdca);
1061                 BAMBOO_DEBUGPRINT_REG(ptr);
1062                 BAMBOO_DEBUGPRINT_REG(tmpheaptop);
1063 #endif
1064                 if(host != BAMBOO_NUM_OF_CORE) {
1065                   // send the original host core with the mapping info
1066                   send_msg_3(host, GCLOBJMAPPING, ptr, tmpheaptop, false);
1067 #ifdef DEBUG
1068                   BAMBOO_DEBUGPRINT(0xcdcb);
1069                   BAMBOO_DEBUGPRINT_REG(ptr);
1070                   BAMBOO_DEBUGPRINT_REG(tmpheaptop);
1071 #endif
1072                 } // if(host != BAMBOO_NUM_OF_CORE)
1073                 tmpheaptop += isize;
1074
1075                 // set the gcsbstarttbl and bamboo_smemtbl
1076                 int tmpsbs = 1+(isize-remain-1)/BAMBOO_SMEM_SIZE;
1077                 for(int k = 1; k < tmpsbs; k++) {
1078                   gcsbstarttbl[sb+k] = (INTPTR)(-1);
1079                 }
1080                 sb += tmpsbs;
1081                 bound = (b<NUMCORES4GC) ? BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
1082                 BLOCKINDEX(tmpheaptop-1, &tmpsbs);
1083                 for(; b < tmpsbs; b++) {
1084                   bamboo_smemtbl[b] = bound;
1085                   if(b==NUMCORES4GC-1) {
1086                         bound = BAMBOO_SMEM_SIZE;
1087                   }
1088                 }
1089                 if(((isize-remain)%(BAMBOO_SMEM_SIZE)) == 0) {
1090                   gcsbstarttbl[sb] = (INTPTR)(-1);
1091                   remain = ((tmpheaptop-gcbaseva)<(BAMBOO_LARGE_SMEM_BOUND)) ?
1092                                    BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
1093                   bamboo_smemtbl[b] = bound;
1094                 } else {
1095                   gcsbstarttbl[sb] = (INTPTR)(tmpheaptop);
1096                   remain = tmpheaptop-gcbaseva;
1097                   bamboo_smemtbl[b] = remain%bound;
1098                   remain = bound - bamboo_smemtbl[b];
1099                 } // if(((isize-remain)%(BAMBOO_SMEM_SIZE)) == 0) else ...
1100
1101                 // close current block and fill the header
1102                 BAMBOO_MEMSET_WH(base, '\0', BAMBOO_CACHE_LINE_SIZE);
1103                 *((int*)base) = isize + BAMBOO_CACHE_LINE_SIZE;
1104                 cpysize = 0;
1105                 base = tmpheaptop;
1106                 if(remain == BAMBOO_CACHE_LINE_SIZE) {
1107                   // fill with 0 in case
1108                   BAMBOO_MEMSET_WH(tmpheaptop, '\0', remain);
1109                 }
1110                 remain -= BAMBOO_CACHE_LINE_SIZE;
1111                 tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
1112       } else {
1113                 remain -= isize;
1114                 // move the large obj
1115                 if((int)gcheaptop < (int)(tmpheaptop)+size) {
1116                   memmove(tmpheaptop, gcheaptop, size);
1117                 } else {
1118                   //BAMBOO_WRITE_HINT_CACHE(tmpheaptop, size);
1119                   memcpy(tmpheaptop, gcheaptop, size);
1120                 }
1121                 // fill the remaining space with -2 padding
1122                 BAMBOO_MEMSET_WH(tmpheaptop+size, -2, isize-size);
1123 #ifdef DEBUG
1124                 BAMBOO_DEBUGPRINT(0xea06);
1125                 BAMBOO_DEBUGPRINT_REG(gcheaptop);
1126                 BAMBOO_DEBUGPRINT_REG(tmpheaptop);
1127                 BAMBOO_DEBUGPRINT_REG(size);
1128                 BAMBOO_DEBUGPRINT_REG(isize);
1129 #endif
1130
1131                 gcheaptop += size;
1132                 cpysize += isize;
1133                 // cache the mapping info anyway
1134                 //if(ptr != tmpheaptop) {
1135                 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1136 #ifdef LOCALHASHTBL_TEST
1137                 RuntimeHashadd_I(gcpointertbl, ptr, tmpheaptop);
1138 #else
1139                 mgchashInsert_I(gcpointertbl, ptr, tmpheaptop);
1140 #endif
1141                 //MGCHashadd_I(gcpointertbl, ptr, tmpheaptop);
1142                 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1143                 //}
1144 #ifdef DEBUG
1145                 BAMBOO_DEBUGPRINT(0xcdcc);
1146                 BAMBOO_DEBUGPRINT_REG(ptr);
1147                 BAMBOO_DEBUGPRINT_REG(tmpheaptop);
1148                 BAMBOO_DEBUGPRINT_REG(*((int*)tmpheaptop));
1149 #endif
1150                 if(host != BAMBOO_NUM_OF_CORE) {
1151                   // send the original host core with the mapping info
1152                   send_msg_3(host, GCLOBJMAPPING, ptr, tmpheaptop, false);
1153 #ifdef DEBUG
1154                   BAMBOO_DEBUGPRINT(0xcdcd);
1155                   BAMBOO_DEBUGPRINT_REG(ptr);
1156                   BAMBOO_DEBUGPRINT_REG(tmpheaptop);
1157 #endif
1158                 }                         // if(host != BAMBOO_NUM_OF_CORE)
1159                 tmpheaptop += isize;
1160
1161                 // update bamboo_smemtbl
1162                 bamboo_smemtbl[b] += isize;
1163           }  // if(remain < isize) else ...
1164     }  // while(gc_lobjmoreItems())
1165     if(cpysize > 0) {
1166       // close current block, fill the header
1167       BAMBOO_MEMSET_WH(base, '\0', BAMBOO_CACHE_LINE_SIZE);
1168       *((int*)base) = cpysize + BAMBOO_CACHE_LINE_SIZE;
1169       bamboo_smemtbl[b] += BAMBOO_CACHE_LINE_SIZE;// add the size of the header
1170     } else {
1171       tmpheaptop -= BAMBOO_CACHE_LINE_SIZE;
1172     }
1173     gcheaptop = tmpheaptop;
1174
1175   } // if(tomove == 0)
1176
1177 #ifdef DEBUG
1178   BAMBOO_DEBUGPRINT(0xea07);
1179   BAMBOO_DEBUGPRINT_REG(gcheaptop);
1180 #endif
1181
1182   bamboo_free_block = 0;
1183   int tbound = 0;
1184   do {
1185     tbound = (bamboo_free_block<NUMCORES4GC) ?
1186              BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
1187     if(bamboo_smemtbl[bamboo_free_block] == tbound) {
1188       bamboo_free_block++;
1189     } else {
1190       // the first non-full partition
1191       break;
1192     }
1193   } while(true);
1194
1195 #ifdef GC_PROFILE
1196   // check how many live space there are
1197   gc_num_livespace = 0;
1198   for(int tmpi = 0; tmpi < gcnumblock; tmpi++) {
1199         gc_num_livespace += bamboo_smemtbl[tmpi];
1200   }
1201   gc_num_freespace = (BAMBOO_SHARED_MEM_SIZE) - gc_num_livespace;
1202 #endif
1203 #ifdef DEBUG
1204   BAMBOO_DEBUGPRINT(0xea08);
1205   BAMBOO_DEBUGPRINT_REG(gcheaptop);
1206 #endif
1207 } // void moveLObjs()
1208
1209 inline void markObj(void * objptr) {
1210   if(objptr == NULL) {
1211     return;
1212   }
1213   if(ISSHAREDOBJ(objptr)) {
1214     int host = hostcore(objptr);
1215     if(BAMBOO_NUM_OF_CORE == host) {
1216       // on this core
1217       BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1218       if(((int *)objptr)[6] == INIT) {
1219                 // this is the first time that this object is discovered,
1220                 // set the flag as DISCOVERED
1221                 ((int *)objptr)[6] |= DISCOVERED;
1222                 BAMBOO_CACHE_FLUSH_LINE(objptr);
1223                 gc_enqueue_I(objptr);
1224           }
1225       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1226     } else {
1227 #ifdef DEBUG
1228       BAMBOO_DEBUGPRINT(0xbbbb);
1229       BAMBOO_DEBUGPRINT_REG(host);
1230       BAMBOO_DEBUGPRINT_REG(objptr);
1231 #endif
1232       // check if this obj has been forwarded
1233       if(!MGCHashcontains(gcforwardobjtbl, (int)objptr)) {
1234                 // send a msg to host informing that objptr is active
1235                 send_msg_2(host, GCMARKEDOBJ, objptr, /*BAMBOO_NUM_OF_CORE,*/ false);
1236 #ifdef GC_PROFILE
1237                 gc_num_forwardobj++;
1238 #endif // GC_PROFILE
1239                 gcself_numsendobjs++;
1240                 MGCHashadd(gcforwardobjtbl, (int)objptr);
1241       }
1242     }
1243   } else {
1244     BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1245     gc_enqueue_I(objptr);
1246     BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1247   }       // if(ISSHAREDOBJ(objptr))
1248 } // void markObj(void * objptr)
1249
1250 // enqueue root objs
1251 inline void tomark(struct garbagelist * stackptr) {
1252   if(MARKPHASE != gcphase) {
1253 #ifdef DEBUG
1254     BAMBOO_DEBUGPRINT_REG(gcphase);
1255 #endif
1256     BAMBOO_EXIT(0xb101);
1257   }
1258   gcbusystatus = true;
1259   gcnumlobjs = 0;
1260
1261   int i,j;
1262   // enqueue current stack
1263   while(stackptr!=NULL) {
1264 #ifdef DEBUG
1265     BAMBOO_DEBUGPRINT(0xe501);
1266     BAMBOO_DEBUGPRINT_REG(stackptr->size);
1267     BAMBOO_DEBUGPRINT_REG(stackptr->next);
1268     BAMBOO_DEBUGPRINT_REG(stackptr->array[0]);
1269 #endif
1270     for(i=0; i<stackptr->size; i++) {
1271       if(stackptr->array[i] != NULL) {
1272                 markObj(stackptr->array[i]);
1273       }
1274     }
1275     stackptr=stackptr->next;
1276   }
1277
1278 #ifdef DEBUG
1279   BAMBOO_DEBUGPRINT(0xe503);
1280 #endif
1281   // enqueue objectsets
1282   if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
1283     for(i=0; i<NUMCLASSES; i++) {
1284       struct parameterwrapper ** queues =
1285         objectqueues[BAMBOO_NUM_OF_CORE][i];
1286       int length = numqueues[BAMBOO_NUM_OF_CORE][i];
1287       for(j = 0; j < length; ++j) {
1288                 struct parameterwrapper * parameter = queues[j];
1289                 struct ObjectHash * set=parameter->objectset;
1290                 struct ObjectNode * ptr=set->listhead;
1291                 while(ptr!=NULL) {
1292                   markObj((void *)ptr->key);
1293                   ptr=ptr->lnext;
1294                 }
1295       }
1296     }
1297   }
1298
1299   // euqueue current task descriptor
1300   if(currtpd != NULL) {
1301 #ifdef DEBUG
1302     BAMBOO_DEBUGPRINT(0xe504);
1303 #endif
1304     for(i=0; i<currtpd->numParameters; i++) {
1305       markObj(currtpd->parameterArray[i]);
1306     }
1307   }
1308
1309 #ifdef DEBUG
1310   BAMBOO_DEBUGPRINT(0xe505);
1311 #endif
1312   // euqueue active tasks
1313   if(activetasks != NULL) {
1314     struct genpointerlist * ptr=activetasks->list;
1315     while(ptr!=NULL) {
1316       struct taskparamdescriptor *tpd=ptr->src;
1317       int i;
1318       for(i=0; i<tpd->numParameters; i++) {
1319                 markObj(tpd->parameterArray[i]);
1320       }
1321       ptr=ptr->inext;
1322     }
1323   }
1324
1325 #ifdef DEBUG
1326   BAMBOO_DEBUGPRINT(0xe506);
1327 #endif
1328   // enqueue cached transferred obj
1329   struct QueueItem * tmpobjptr =  getHead(&objqueue);
1330   while(tmpobjptr != NULL) {
1331     struct transObjInfo * objInfo =
1332       (struct transObjInfo *)(tmpobjptr->objectptr);
1333     markObj(objInfo->objptr);
1334     tmpobjptr = getNextQueueItem(tmpobjptr);
1335   }
1336
1337 #ifdef DEBUG
1338   BAMBOO_DEBUGPRINT(0xe507);
1339 #endif
1340   // enqueue cached objs to be transferred
1341   struct QueueItem * item = getHead(totransobjqueue);
1342   while(item != NULL) {
1343     struct transObjInfo * totransobj =
1344       (struct transObjInfo *)(item->objectptr);
1345     markObj(totransobj->objptr);
1346     item = getNextQueueItem(item);
1347   }       // while(item != NULL)
1348
1349 #ifdef DEBUG
1350   BAMBOO_DEBUGPRINT(0xe508);
1351 #endif
1352   // enqueue lock related info
1353   for(i = 0; i < runtime_locklen; ++i) {
1354     markObj((void *)(runtime_locks[i].redirectlock));
1355     if(runtime_locks[i].value != NULL) {
1356       markObj((void *)(runtime_locks[i].value));
1357     }
1358   }
1359
1360 } // void tomark(struct garbagelist * stackptr)
1361
1362 inline void mark(bool isfirst,
1363                  struct garbagelist * stackptr) {
1364 #ifdef DEBUG
1365   if(BAMBOO_NUM_OF_CORE == 0) BAMBOO_DEBUGPRINT(0xed01);
1366 #endif
1367   if(isfirst) {
1368 #ifdef DEBUG
1369     if(BAMBOO_NUM_OF_CORE == 0) BAMBOO_DEBUGPRINT(0xed02);
1370 #endif
1371     // enqueue root objs
1372     tomark(stackptr);
1373     gccurr_heaptop = 0; // record the size of all active objs in this core
1374                         // aligned but does not consider block boundaries
1375     gcmarkedptrbound = 0;
1376   }
1377 #ifdef DEBUG
1378   if(BAMBOO_NUM_OF_CORE == 0) BAMBOO_DEBUGPRINT(0xed03);
1379 #endif
1380   int isize = 0;
1381   bool checkfield = true;
1382   bool sendStall = false;
1383   // mark phase
1384   while(MARKPHASE == gcphase) {
1385 #ifdef DEBUG
1386     if(BAMBOO_NUM_OF_CORE == 0) BAMBOO_DEBUGPRINT(0xed04);
1387 #endif
1388     while(true) {
1389       BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1390       bool hasItems = gc_moreItems2_I();
1391       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1392 #ifdef DEBUG
1393       BAMBOO_DEBUGPRINT(0xed05);
1394 #endif
1395       if(!hasItems) {
1396                 break;
1397       }
1398       sendStall = false;
1399       gcbusystatus = true;
1400       checkfield = true;
1401       void * ptr = gc_dequeue2_I();
1402
1403 #ifdef DEBUG
1404       BAMBOO_DEBUGPRINT_REG(ptr);
1405 #endif
1406       int size = 0;
1407       int isize = 0;
1408       int type = 0;
1409       // check if it is a shared obj
1410       if(ISSHAREDOBJ(ptr)) {
1411                 // a shared obj, check if it is a local obj on this core
1412                 int host = hostcore(ptr);
1413                 bool islocal = (host == BAMBOO_NUM_OF_CORE);
1414                 if(islocal) {
1415                   bool isnotmarked = ((((int *)ptr)[6] & DISCOVERED) != 0);
1416                   if(isLarge(ptr, &type, &size) && isnotmarked) {
1417                         // ptr is a large object and not marked or enqueued
1418 #ifdef DEBUG
1419                         BAMBOO_DEBUGPRINT(0xecec);
1420                         BAMBOO_DEBUGPRINT_REG(ptr);
1421                         BAMBOO_DEBUGPRINT_REG(*((int*)ptr));
1422 #endif
1423                         BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1424                         gc_lobjenqueue_I(ptr, size, BAMBOO_NUM_OF_CORE);
1425                         gcnumlobjs++;
1426                         BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1427                         // mark this obj
1428                         ((int *)ptr)[6] = ((int *)ptr)[6] & (~DISCOVERED) | MARKED;
1429                         BAMBOO_CACHE_FLUSH_LINE(ptr);
1430                   } else if(isnotmarked) {
1431                         // ptr is an unmarked active object on this core
1432                         ALIGNSIZE(size, &isize);
1433                         gccurr_heaptop += isize;
1434 #ifdef DEBUG
1435                         BAMBOO_DEBUGPRINT(0xaaaa);
1436                         BAMBOO_DEBUGPRINT_REG(ptr);
1437                         BAMBOO_DEBUGPRINT_REG(isize);
1438                         BAMBOO_DEBUGPRINT(((int *)(ptr))[0]);
1439 #endif
1440                         // mark this obj
1441                         ((int *)ptr)[6] = ((int *)ptr)[6] & (~DISCOVERED) | MARKED;
1442                         BAMBOO_CACHE_FLUSH_LINE(ptr);
1443
1444                         if(ptr + size > gcmarkedptrbound) {
1445                           gcmarkedptrbound = ptr + size;
1446                         } // if(ptr + size > gcmarkedptrbound)
1447                   } else {
1448                         // ptr is not an active obj or has been marked
1449                         checkfield = false;
1450                   } // if(isLarge(ptr, &type, &size)) else ...
1451                 }  /* can never reach here
1452                 else {
1453 #ifdef DEBUG
1454                   if(BAMBOO_NUM_OF_CORE == 0) {
1455                         BAMBOO_DEBUGPRINT(0xbbbb);
1456                         BAMBOO_DEBUGPRINT_REG(host);
1457                         BAMBOO_DEBUGPRINT_REG(ptr);
1458                   }
1459 #endif
1460                   // check if this obj has been forwarded
1461                   if(!MGCHashcontains(gcforwardobjtbl, (int)ptr)) {
1462                         // send a msg to host informing that ptr is active
1463                         send_msg_2(host, GCMARKEDOBJ, ptr, false);
1464                         gcself_numsendobjs++;
1465                         MGCHashadd(gcforwardobjtbl, (int)ptr);
1466                   }
1467                         checkfield = false;
1468                 }// if(isLocal(ptr)) else ...*/
1469           }   // if(ISSHAREDOBJ(ptr))
1470 #ifdef DEBUG
1471       BAMBOO_DEBUGPRINT(0xed06);
1472 #endif
1473
1474       if(checkfield) {
1475                 // scan all pointers in ptr
1476                 unsigned INTPTR * pointer;
1477                 pointer=pointerarray[type];
1478                 if (pointer==0) {
1479                   /* Array of primitives */
1480                   /* Do nothing */
1481                 } else if (((INTPTR)pointer)==1) {
1482                   /* Array of pointers */
1483                   struct ArrayObject *ao=(struct ArrayObject *) ptr;
1484                   int length=ao->___length___;
1485                   int j;
1486                   for(j=0; j<length; j++) {
1487                         void *objptr =
1488                           ((void **)(((char *)&ao->___length___)+sizeof(int)))[j];
1489                         markObj(objptr);
1490                   }
1491                 } else {
1492                   INTPTR size=pointer[0];
1493                   int i;
1494                   for(i=1; i<=size; i++) {
1495                         unsigned int offset=pointer[i];
1496                         void * objptr=*((void **)(((char *)ptr)+offset));
1497                         markObj(objptr);
1498                   }
1499                 }     // if (pointer==0) else if ... else ...
1500       }   // if(checkfield)
1501     }     // while(gc_moreItems2())
1502 #ifdef DEBUG
1503     BAMBOO_DEBUGPRINT(0xed07);
1504 #endif
1505     gcbusystatus = false;
1506     // send mark finish msg to core coordinator
1507     if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
1508 #ifdef DEBUG
1509       BAMBOO_DEBUGPRINT(0xed08);
1510 #endif
1511       gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
1512       gcnumsendobjs[gcnumsrobjs_index][BAMBOO_NUM_OF_CORE]=gcself_numsendobjs;
1513       gcnumreceiveobjs[gcnumsrobjs_index][BAMBOO_NUM_OF_CORE]=
1514                 gcself_numreceiveobjs;
1515       gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
1516     } else {
1517       if(!sendStall) {
1518 #ifdef DEBUG
1519                 BAMBOO_DEBUGPRINT(0xed09);
1520 #endif
1521                 send_msg_4(STARTUPCORE, GCFINISHMARK, BAMBOO_NUM_OF_CORE,
1522                                    gcself_numsendobjs, gcself_numreceiveobjs, false);
1523                 sendStall = true;
1524       }
1525     }             // if(STARTUPCORE == BAMBOO_NUM_OF_CORE) ...
1526 #ifdef DEBUG
1527     BAMBOO_DEBUGPRINT(0xed0a);
1528 #endif
1529
1530     if(BAMBOO_NUM_OF_CORE == STARTUPCORE) {
1531 #ifdef DEBUG
1532       BAMBOO_DEBUGPRINT(0xed0b);
1533 #endif
1534       return;
1535     }
1536   } // while(MARKPHASE == gcphase)
1537
1538   BAMBOO_CACHE_MF();
1539 } // mark()
1540
1541 inline void compact2Heaptophelper_I(int coren,
1542                                     int* p,
1543                                     int* numblocks,
1544                                     int* remain) {
1545   int b;
1546   int memneed = gcrequiredmems[coren] + BAMBOO_CACHE_LINE_SIZE;
1547   if(STARTUPCORE == coren) {
1548     gctomove = true;
1549     gcmovestartaddr = *p;
1550     gcdstcore = gctopcore;
1551     gcblock2fill = *numblocks + 1;
1552   } else {
1553     send_msg_4(coren, GCMOVESTART, gctopcore, *p, (*numblocks) + 1, false);
1554   }
1555 #ifdef DEBUG
1556   BAMBOO_DEBUGPRINT_REG(coren);
1557   BAMBOO_DEBUGPRINT_REG(gctopcore);
1558   BAMBOO_DEBUGPRINT_REG(*p);
1559   BAMBOO_DEBUGPRINT_REG(*numblocks+1);
1560 #endif
1561   if(memneed < *remain) {
1562 #ifdef DEBUG
1563     BAMBOO_DEBUGPRINT(0xd104);
1564 #endif
1565     *p = *p + memneed;
1566     gcrequiredmems[coren] = 0;
1567     gcloads[gctopcore] += memneed;
1568     *remain = *remain - memneed;
1569   } else {
1570 #ifdef DEBUG
1571     BAMBOO_DEBUGPRINT(0xd105);
1572 #endif
1573     // next available block
1574     *p = *p + *remain;
1575     gcfilledblocks[gctopcore] += 1;
1576     int newbase = 0;
1577     BASEPTR(gctopcore, gcfilledblocks[gctopcore], &newbase);
1578     gcloads[gctopcore] = newbase;
1579     gcrequiredmems[coren] -= *remain - BAMBOO_CACHE_LINE_SIZE;
1580     gcstopblock[gctopcore]++;
1581     gctopcore = NEXTTOPCORE(gctopblock);
1582     gctopblock++;
1583     *numblocks = gcstopblock[gctopcore];
1584     *p = gcloads[gctopcore];
1585     BLOCKINDEX(*p, &b);
1586     *remain=(b<NUMCORES4GC) ?
1587              ((BAMBOO_SMEM_SIZE_L)-((*p)%(BAMBOO_SMEM_SIZE_L)))
1588              : ((BAMBOO_SMEM_SIZE)-((*p)%(BAMBOO_SMEM_SIZE)));
1589 #ifdef DEBUG
1590     BAMBOO_DEBUGPRINT(0xd106);
1591     BAMBOO_DEBUGPRINT_REG(gctopcore);
1592     BAMBOO_DEBUGPRINT_REG(*p);
1593     BAMBOO_DEBUGPRINT_REG(b);
1594     BAMBOO_DEBUGPRINT_REG(*remain);
1595 #endif
1596   }       // if(memneed < remain)
1597   gcmovepending--;
1598 } // void compact2Heaptophelper_I(int, int*, int*, int*)
1599
1600 inline void compact2Heaptop() {
1601   // no cores with spare mem and some cores are blocked with pending move
1602   // find the current heap top and make them move to the heap top
1603   int p;
1604   int numblocks = gcfilledblocks[gctopcore];
1605   //BASEPTR(gctopcore, numblocks, &p);
1606   p = gcloads[gctopcore];
1607   int b;
1608   BLOCKINDEX(p, &b);
1609   int remain = (b<NUMCORES4GC) ?
1610                ((BAMBOO_SMEM_SIZE_L)-(p%(BAMBOO_SMEM_SIZE_L)))
1611                : ((BAMBOO_SMEM_SIZE)-(p%(BAMBOO_SMEM_SIZE)));
1612   // check if the top core finishes
1613   BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1614   if(gccorestatus[gctopcore] != 0) {
1615 #ifdef DEBUG
1616     BAMBOO_DEBUGPRINT(0xd101);
1617     BAMBOO_DEBUGPRINT_REG(gctopcore);
1618 #endif
1619     // let the top core finishes its own work first
1620     compact2Heaptophelper_I(gctopcore, &p, &numblocks, &remain);
1621     BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1622     return;
1623   }
1624   BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1625
1626 #ifdef DEBUG
1627   BAMBOO_DEBUGPRINT(0xd102);
1628   BAMBOO_DEBUGPRINT_REG(gctopcore);
1629   BAMBOO_DEBUGPRINT_REG(p);
1630   BAMBOO_DEBUGPRINT_REG(b);
1631   BAMBOO_DEBUGPRINT_REG(remain);
1632 #endif
1633   for(int i = 0; i < NUMCORES4GC; i++) {
1634     BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1635     if((gccorestatus[i] != 0) && (gcrequiredmems[i] > 0)) {
1636 #ifdef DEBUG
1637       BAMBOO_DEBUGPRINT(0xd103);
1638 #endif
1639       compact2Heaptophelper_I(i, &p, &numblocks, &remain);
1640       if(gccorestatus[gctopcore] != 0) {
1641 #ifdef DEBUG
1642                 BAMBOO_DEBUGPRINT(0xd101);
1643                 BAMBOO_DEBUGPRINT_REG(gctopcore);
1644 #endif
1645                 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1646                 // the top core is not free now
1647                 return;
1648       }
1649     }             // if((gccorestatus[i] != 0) && (gcrequiredmems[i] > 0))
1650     BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1651   }       // for(i = 0; i < NUMCORES4GC; i++)
1652 #ifdef DEBUG
1653   BAMBOO_DEBUGPRINT(0xd106);
1654 #endif
1655 } // void compact2Heaptop()
1656
1657 inline void resolvePendingMoveRequest() {
1658 #ifdef DEBUG
1659   BAMBOO_DEBUGPRINT(0xeb01);
1660 #endif
1661 #ifdef DEBUG
1662   BAMBOO_DEBUGPRINT(0xeeee);
1663   for(int k = 0; k < NUMCORES4GC; k++) {
1664     BAMBOO_DEBUGPRINT(0xf000+k);
1665     BAMBOO_DEBUGPRINT_REG(gccorestatus[k]);
1666     BAMBOO_DEBUGPRINT_REG(gcloads[k]);
1667     BAMBOO_DEBUGPRINT_REG(gcfilledblocks[k]);
1668     BAMBOO_DEBUGPRINT_REG(gcstopblock[k]);
1669   }
1670   BAMBOO_DEBUGPRINT(0xffff);
1671 #endif
1672   int i;
1673   int j;
1674   bool nosparemem = true;
1675   bool haspending = false;
1676   bool hasrunning = false;
1677   bool noblock = false;
1678   int dstcore = 0;       // the core who need spare mem
1679   int sourcecore = 0;       // the core who has spare mem
1680   for(i = j = 0; (i < NUMCORES4GC) && (j < NUMCORES4GC); ) {
1681     if(nosparemem) {
1682       // check if there are cores with spare mem
1683       if(gccorestatus[i] == 0) {
1684                 // finished working, check if it still have spare mem
1685                 if(gcfilledblocks[i] < gcstopblock[i]) {
1686                   // still have spare mem
1687                   nosparemem = false;
1688                   sourcecore = i;
1689                 }  // if(gcfilledblocks[i] < gcstopblock[i]) else ...
1690       }
1691       i++;
1692     }             // if(nosparemem)
1693     if(!haspending) {
1694       if(gccorestatus[j] != 0) {
1695                 // not finished, check if it has pending move requests
1696                 if((gcfilledblocks[j]==gcstopblock[j])&&(gcrequiredmems[j]>0)) {
1697                   dstcore = j;
1698                   haspending = true;
1699                 } else {
1700                   hasrunning = true;
1701                 }  // if((gcfilledblocks[i] == gcstopblock[i])...) else ...
1702       }  // if(gccorestatus[i] == 0) else ...
1703       j++;
1704     }  // if(!haspending)
1705     if(!nosparemem && haspending) {
1706       // find match
1707       int tomove = 0;
1708       int startaddr = 0;
1709       BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
1710       gcrequiredmems[dstcore] = assignSpareMem_I(sourcecore,
1711                                                  gcrequiredmems[dstcore],
1712                                                  &tomove,
1713                                                  &startaddr);
1714       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
1715 #ifdef DEBUG
1716       BAMBOO_DEBUGPRINT(0xeb02);
1717       BAMBOO_DEBUGPRINT_REG(sourcecore);
1718       BAMBOO_DEBUGPRINT_REG(dstcore);
1719       BAMBOO_DEBUGPRINT_REG(startaddr);
1720       BAMBOO_DEBUGPRINT_REG(tomove);
1721 #endif
1722       if(STARTUPCORE == dstcore) {
1723 #ifdef DEBUG
1724                 BAMBOO_DEBUGPRINT(0xeb03);
1725 #endif
1726                 gcdstcore = sourcecore;
1727                 gctomove = true;
1728                 gcmovestartaddr = startaddr;
1729                 gcblock2fill = tomove;
1730       } else {
1731 #ifdef DEBUG
1732                 BAMBOO_DEBUGPRINT(0xeb04);
1733 #endif
1734                 send_msg_4(dstcore, GCMOVESTART, sourcecore,
1735                                    startaddr, tomove, false);
1736       }
1737       gcmovepending--;
1738       nosparemem = true;
1739       haspending = false;
1740       noblock = true;
1741     }
1742   }       // for(i = 0; i < NUMCORES4GC; i++)
1743 #ifdef DEBUG
1744   BAMBOO_DEBUGPRINT(0xcccc);
1745   BAMBOO_DEBUGPRINT_REG(hasrunning);
1746   BAMBOO_DEBUGPRINT_REG(haspending);
1747   BAMBOO_DEBUGPRINT_REG(noblock);
1748 #endif
1749
1750   if(!hasrunning && !noblock) {
1751     gcphase = SUBTLECOMPACTPHASE;
1752     compact2Heaptop();
1753   }
1754
1755 } // void resovePendingMoveRequest()
1756
1757 struct moveHelper {
1758   int numblocks;       // block num for heap
1759   INTPTR base;       // base virtual address of current heap block
1760   INTPTR ptr;       // virtual address of current heap top
1761   int offset;       // offset in current heap block
1762   int blockbase;       // virtual address of current small block to check
1763   int blockbound;       // bound virtual address of current small blcok
1764   int sblockindex;       // index of the small blocks
1765   int top;       // real size of current heap block to check
1766   int bound;       // bound size of current heap block to check
1767 }; // struct moveHelper
1768
1769 // If out of boundary of valid shared memory, return false, else return true
1770 inline bool nextSBlock(struct moveHelper * orig) {
1771   orig->blockbase = orig->blockbound;
1772   bool sbchanged = false;
1773 #ifdef DEBUG
1774   BAMBOO_DEBUGPRINT(0xecc0);
1775   BAMBOO_DEBUGPRINT_REG(orig->blockbase);
1776   BAMBOO_DEBUGPRINT_REG(orig->blockbound);
1777   BAMBOO_DEBUGPRINT_REG(orig->bound);
1778   BAMBOO_DEBUGPRINT_REG(orig->ptr);
1779 #endif
1780 outernextSBlock:
1781   // check if across a big block
1782   // TODO now do not zero out the whole memory, maybe the last two conditions
1783   // are useless now
1784   if((orig->blockbase >= orig->bound) || (orig->ptr >= orig->bound)
1785      || ((orig->ptr != NULL) && (*((int*)orig->ptr))==0)
1786      || ((*((int*)orig->blockbase))==0)) {
1787 innernextSBlock:
1788     // end of current heap block, jump to next one
1789     orig->numblocks++;
1790 #ifdef DEBUG
1791     BAMBOO_DEBUGPRINT(0xecc1);
1792     BAMBOO_DEBUGPRINT_REG(orig->numblocks);
1793 #endif
1794     BASEPTR(BAMBOO_NUM_OF_CORE, orig->numblocks, &(orig->base));
1795 #ifdef DEBUG
1796     BAMBOO_DEBUGPRINT(orig->base);
1797 #endif
1798     if(orig->base >= gcbaseva + BAMBOO_SHARED_MEM_SIZE) {
1799       // out of boundary
1800       orig->ptr = orig->base; // set current ptr to out of boundary too
1801       return false;
1802     }
1803     //orig->bound = orig->base + BAMBOO_SMEM_SIZE;
1804     orig->blockbase = orig->base;
1805     orig->sblockindex = (orig->blockbase-gcbaseva)/BAMBOO_SMEM_SIZE;
1806     sbchanged = true;
1807     int blocknum = 0;
1808     BLOCKINDEX(orig->base, &blocknum);
1809     if(bamboo_smemtbl[blocknum] == 0) {
1810       // goto next block
1811       goto innernextSBlock;
1812     }
1813         // check the bamboo_smemtbl to decide the real bound
1814         orig->bound = orig->base + bamboo_smemtbl[blocknum];
1815   } else if(0 == (orig->blockbase%BAMBOO_SMEM_SIZE)) {
1816     orig->sblockindex += 1;
1817     sbchanged = true;
1818   }  // if((orig->blockbase >= orig->bound) || (orig->ptr >= orig->bound)...
1819
1820   // check if this sblock should be skipped or have special start point
1821   if(gcsbstarttbl[orig->sblockindex] == -1) {
1822     // goto next sblock
1823 #ifdef DEBUG
1824     BAMBOO_DEBUGPRINT(0xecc2);
1825 #endif
1826     orig->sblockindex += 1;
1827     orig->blockbase += BAMBOO_SMEM_SIZE;
1828     goto outernextSBlock;
1829   } else if((gcsbstarttbl[orig->sblockindex] != 0)
1830             && (sbchanged)) {
1831     // the first time to access this SBlock
1832 #ifdef DEBUG
1833     BAMBOO_DEBUGPRINT(0xecc3);
1834 #endif
1835     // not start from the very beginning
1836     orig->blockbase = gcsbstarttbl[orig->sblockindex];
1837   }  // if(gcsbstarttbl[orig->sblockindex] == -1) else ...
1838
1839   // setup information for this sblock
1840   orig->blockbound = orig->blockbase + *((int*)(orig->blockbase));
1841   orig->offset = BAMBOO_CACHE_LINE_SIZE;
1842   orig->ptr = orig->blockbase + orig->offset;
1843 #ifdef DEBUG
1844   BAMBOO_DEBUGPRINT(0xecc4);
1845   BAMBOO_DEBUGPRINT_REG(orig->base);
1846   BAMBOO_DEBUGPRINT_REG(orig->bound);
1847   BAMBOO_DEBUGPRINT_REG(orig->ptr);
1848   BAMBOO_DEBUGPRINT_REG(orig->blockbound);
1849   BAMBOO_DEBUGPRINT_REG(orig->blockbase);
1850   BAMBOO_DEBUGPRINT_REG(orig->offset);
1851 #endif
1852   if(orig->ptr >= orig->bound) {
1853     // met a lobj, move to next block
1854     goto innernextSBlock;
1855   }
1856
1857   return true;
1858 } // bool nextSBlock(struct moveHelper * orig)
1859
1860 // return false if there are no available data to compact
1861 inline bool initOrig_Dst(struct moveHelper * orig,
1862                          struct moveHelper * to) {
1863   // init the dst ptr
1864   to->numblocks = 0;
1865   to->top = to->offset = BAMBOO_CACHE_LINE_SIZE;
1866   to->bound = BAMBOO_SMEM_SIZE_L;
1867   BASEPTR(BAMBOO_NUM_OF_CORE, to->numblocks, &(to->base));
1868
1869 #ifdef DEBUG
1870   BAMBOO_DEBUGPRINT(0xef01);
1871   BAMBOO_DEBUGPRINT_REG(to->base);
1872 #endif
1873   to->ptr = to->base + to->offset;
1874 #ifdef GC_CACHE_ADAPT
1875   // initialize the gc_cache_revise_information
1876   gc_cache_revise_infomation.to_page_start_va = to->ptr;
1877   gc_cache_revise_infomation.to_page_end_va = (BAMBOO_PAGE_SIZE)*
1878         ((to->base-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
1879   gc_cache_revise_infomation.to_page_index =
1880         (to->base-gcbaseva)/(BAMBOO_PAGE_SIZE);
1881   gc_cache_revise_infomation.orig_page_start_va = -1;
1882 #endif // GC_CACHE_ADAPT
1883
1884   // init the orig ptr
1885   orig->numblocks = 0;
1886   orig->base = to->base;
1887   int blocknum = 0;
1888   BLOCKINDEX(orig->base, &blocknum);
1889   // check the bamboo_smemtbl to decide the real bound
1890   orig->bound = orig->base + bamboo_smemtbl[blocknum];
1891   orig->blockbase = orig->base;
1892   orig->sblockindex = (orig->base - gcbaseva) / BAMBOO_SMEM_SIZE;
1893 #ifdef DEBUG
1894   BAMBOO_DEBUGPRINT(0xef02);
1895   BAMBOO_DEBUGPRINT_REG(orig->base);
1896   BAMBOO_DEBUGPRINT_REG(orig->sblockindex);
1897   BAMBOO_DEBUGPRINT_REG(gcsbstarttbl);
1898   BAMBOO_DEBUGPRINT_REG(gcsbstarttbl[orig->sblockindex]);
1899 #endif
1900
1901   if(gcsbstarttbl[orig->sblockindex] == -1) {
1902 #ifdef DEBUG
1903     BAMBOO_DEBUGPRINT(0xef03);
1904 #endif
1905     // goto next sblock
1906     orig->blockbound =
1907       gcbaseva+BAMBOO_SMEM_SIZE*(orig->sblockindex+1);
1908     return nextSBlock(orig);
1909   } else if(gcsbstarttbl[orig->sblockindex] != 0) {
1910 #ifdef DEBUG
1911     BAMBOO_DEBUGPRINT(0xef04);
1912 #endif
1913     orig->blockbase = gcsbstarttbl[orig->sblockindex];
1914   }
1915 #ifdef DEBUG
1916   BAMBOO_DEBUGPRINT(0xef05);
1917 #endif
1918   orig->blockbound = orig->blockbase + *((int*)(orig->blockbase));
1919   orig->offset = BAMBOO_CACHE_LINE_SIZE;
1920   orig->ptr = orig->blockbase + orig->offset;
1921 #ifdef DEBUG
1922   BAMBOO_DEBUGPRINT(0xef06);
1923   BAMBOO_DEBUGPRINT_REG(orig->base);
1924 #endif
1925
1926   return true;
1927 } // bool initOrig_Dst(struct moveHelper * orig, struct moveHelper * to)
1928
1929 inline void nextBlock(struct moveHelper * to) {
1930   to->top = to->bound + BAMBOO_CACHE_LINE_SIZE; // header!
1931   to->bound += BAMBOO_SMEM_SIZE;
1932   to->numblocks++;
1933   BASEPTR(BAMBOO_NUM_OF_CORE, to->numblocks, &(to->base));
1934   to->offset = BAMBOO_CACHE_LINE_SIZE;
1935   to->ptr = to->base + to->offset;
1936 } // void nextBlock(struct moveHelper * to)
1937
1938 // endaddr does not contain spaces for headers
1939 inline bool moveobj(struct moveHelper * orig,
1940                     struct moveHelper * to,
1941                     int stopblock) {
1942   if(stopblock == 0) {
1943     return true;
1944   }
1945
1946 #ifdef DEBUG
1947   BAMBOO_DEBUGPRINT(0xe201);
1948   BAMBOO_DEBUGPRINT_REG(orig->ptr);
1949   BAMBOO_DEBUGPRINT_REG(to->ptr);
1950 #endif
1951
1952   int type = 0;
1953   int size = 0;
1954   int mark = 0;
1955   int isize = 0;
1956 innermoveobj:
1957   while((char)(*((int*)(orig->ptr))) == (char)(-2)) {
1958     orig->ptr = (int*)(orig->ptr) + 1;
1959   }
1960 #ifdef GC_CACHE_ADAPT
1961   if(orig->ptr >= gc_cache_revise_infomation.orig_page_end_va) {
1962         // end of an orig page
1963         // compute the impact of this page for the new page
1964         int tmp_factor = to->ptr-gc_cache_revise_infomation.to_page_start_va;
1965         int topage=gc_cache_revise_infomation.to_page_index;
1966         int oldpage = gc_cache_revise_infomation.orig_page_index;
1967         int * newtable=&gccachesamplingtbl_r[topage];
1968         int * oldtable=&gccachesamplingtbl[oldpage];
1969
1970         for(int tt = 0; tt < NUMCORESACTIVE; tt++) {
1971           (*newtable) += (*oldtable)*tmp_factor;
1972           newtable=(int*)(((char *)newtable)+size_cachesamplingtbl_local_r);
1973           oldtable=(int*)(((char *)oldtable)+size_cachesamplingtbl_local);
1974         }
1975         // prepare for an new orig page
1976         int tmp_index = (orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
1977         gc_cache_revise_infomation.orig_page_start_va = orig->ptr;
1978         gc_cache_revise_infomation.orig_page_end_va = gcbaseva +
1979           (BAMBOO_PAGE_SIZE)*(tmp_index+1);
1980         gc_cache_revise_infomation.orig_page_index = tmp_index;
1981         gc_cache_revise_infomation.to_page_start_va = to->ptr;
1982   }
1983 #endif
1984   if((orig->ptr >= orig->bound) || (orig->ptr == orig->blockbound)) {
1985     if(!nextSBlock(orig)) {
1986       // finished, no more data
1987       return true;
1988     }
1989     goto innermoveobj;
1990   }
1991 #ifdef DEBUG
1992   BAMBOO_DEBUGPRINT(0xe202);
1993   BAMBOO_DEBUGPRINT_REG(orig->ptr);
1994   BAMBOO_DEBUGPRINT(((int *)(orig->ptr))[0]);
1995 #endif
1996   // check the obj's type, size and mark flag
1997   type = ((int *)(orig->ptr))[0];
1998   size = 0;
1999   if(type == 0) {
2000     // end of this block, go to next one
2001     if(!nextSBlock(orig)) {
2002       // finished, no more data
2003       return true;
2004     }
2005     goto innermoveobj;
2006   } else if(type < NUMCLASSES) {
2007     // a normal object
2008     size = classsize[type];
2009   } else {
2010     // an array
2011     struct ArrayObject *ao=(struct ArrayObject *)(orig->ptr);
2012     int elementsize=classsize[type];
2013     int length=ao->___length___;
2014     size=sizeof(struct ArrayObject)+length*elementsize;
2015   }
2016   mark = ((int *)(orig->ptr))[6];
2017   bool isremote = ((((int *)(orig->ptr))[6] & REMOTEM) != 0);
2018 #ifdef DEBUG
2019   BAMBOO_DEBUGPRINT(0xe203);
2020   BAMBOO_DEBUGPRINT_REG(orig->ptr);
2021   BAMBOO_DEBUGPRINT_REG(size);
2022 #endif
2023   ALIGNSIZE(size, &isize);       // no matter is the obj marked or not
2024                                  // should be able to across it
2025   if((mark & MARKED) != 0) {
2026 #ifdef DEBUG
2027     BAMBOO_DEBUGPRINT(0xe204);
2028 #endif
2029 #ifdef GC_PROFILE
2030         gc_num_liveobj++;
2031 #endif
2032     // marked obj, copy it to current heap top
2033     // check to see if remaining space is enough
2034     if(to->top + isize > to->bound) {
2035       // fill 0 indicating the end of this block
2036       BAMBOO_MEMSET_WH(to->ptr,  '\0', to->bound - to->top);
2037       // fill the header of this block and then go to next block
2038       to->offset += to->bound - to->top;
2039       BAMBOO_MEMSET_WH(to->base, '\0', BAMBOO_CACHE_LINE_SIZE);
2040       (*((int*)(to->base))) = to->offset;
2041 #ifdef GC_CACHE_ADAPT
2042           int tmp_ptr = to->ptr;
2043 #endif // GC_CACHE_ADAPT
2044       nextBlock(to);
2045 #ifdef GC_CACHE_ADAPT
2046           if((to->ptr) >= gc_cache_revise_infomation.to_page_end_va) {
2047                 // end of an to page, wrap up its information
2048                 int tmp_factor = tmp_ptr-gc_cache_revise_infomation.to_page_start_va;
2049                 int topage=gc_cache_revise_infomation.to_page_index;
2050                 int oldpage = gc_cache_revise_infomation.orig_page_index;
2051                 int * newtable=&gccachesamplingtbl_r[topage];
2052                 int * oldtable=&gccachesamplingtbl[oldpage];
2053
2054                 for(int tt = 0; tt < NUMCORESACTIVE; tt++) {
2055                   (*newtable)=((*newtable)+(*oldtable)*tmp_factor);
2056                   newtable=(int*) (((char *)newtable)+size_cachesamplingtbl_local_r);
2057                   oldtable=(int*) (((char *)oldtable)+size_cachesamplingtbl_local);
2058                 }
2059                 // prepare for an new to page
2060                 int tmp_index = (orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
2061                 gc_cache_revise_infomation.orig_page_start_va = orig->ptr;
2062                 gc_cache_revise_infomation.orig_page_end_va = gcbaseva +
2063                   (BAMBOO_PAGE_SIZE)*(tmp_index+1);
2064                 gc_cache_revise_infomation.orig_page_index = tmp_index;
2065                 gc_cache_revise_infomation.to_page_start_va = to->ptr;
2066                 gc_cache_revise_infomation.to_page_end_va = gcbaseva +
2067                   (BAMBOO_PAGE_SIZE)*((to->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
2068                 gc_cache_revise_infomation.to_page_index =
2069                   (to->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
2070       }
2071 #endif // GC_CACHE_ADAPT
2072       if(stopblock == to->numblocks) {
2073                 // already fulfilled the block
2074                 return true;
2075       }   // if(stopblock == to->numblocks)
2076     }   // if(to->top + isize > to->bound)
2077     // set the mark field to 2, indicating that this obj has been moved
2078     // and need to be flushed
2079     ((int *)(orig->ptr))[6] = COMPACTED;
2080     if(to->ptr != orig->ptr) {
2081       if((int)(orig->ptr) < (int)(to->ptr)+size) {
2082                 memmove(to->ptr, orig->ptr, size);
2083       } else {
2084                 //BAMBOO_WRITE_HINT_CACHE(to->ptr, size);
2085                 memcpy(to->ptr, orig->ptr, size);
2086       }
2087       // fill the remaining space with -2
2088       BAMBOO_MEMSET_WH(to->ptr+size, -2, isize-size);
2089     }
2090     // store mapping info
2091     BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2092 #ifdef LOCALHASHTBL_TEST
2093     RuntimeHashadd_I(gcpointertbl, orig->ptr, to->ptr);
2094 #else
2095         mgchashInsert_I(gcpointertbl, orig->ptr, to->ptr);
2096 #endif
2097         //MGCHashadd_I(gcpointertbl, orig->ptr, to->ptr);
2098         if(isremote) {
2099           // add to the sharedptbl
2100           if(gcsharedptbl != NULL) {
2101                 //GCSharedHashadd_I(gcsharedptbl, orig->ptr, to->ptr);
2102                 mgcsharedhashInsert_I(gcsharedptbl, orig->ptr, to->ptr);
2103           }
2104         }
2105     BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2106     //}
2107 #ifdef DEBUG
2108     BAMBOO_DEBUGPRINT(0xcdce);
2109     BAMBOO_DEBUGPRINT_REG(orig->ptr);
2110     BAMBOO_DEBUGPRINT_REG(to->ptr);
2111     BAMBOO_DEBUGPRINT_REG(isize);
2112 #endif
2113     gccurr_heaptop -= isize;
2114     to->ptr += isize;
2115     to->offset += isize;
2116     to->top += isize;
2117 #ifdef GC_CACHE_ADAPT
2118         int tmp_ptr = to->ptr;
2119 #endif // GC_CACHE_ADAPT
2120     if(to->top == to->bound) {
2121       // fill the header of this block and then go to next block
2122       BAMBOO_MEMSET_WH(to->base, '\0', BAMBOO_CACHE_LINE_SIZE);
2123       (*((int*)(to->base))) = to->offset;
2124       nextBlock(to);
2125 #if 0
2126 #ifdef GC_CACHE_ADAPT
2127           if((to->base+to->bound) >= gc_cache_revise_infomation.to_page_end_va) {
2128                 // end of an to page, wrap up its information
2129                 int tmp_factor = tmp_ptr-gc_cache_revise_infomation.to_page_start_va;
2130                 int topage=gc_cache_revise_infomation.to_page_index;
2131                 int oldpage = gc_cache_revise_infomation.orig_page_index;
2132                 int * newtable=&gccachesamplingtbl_r[topage];
2133                 int * oldtable=&gccachesamplingtbl[oldpage];
2134
2135                 for(int tt = 0; tt < NUMCORESACTIVE; tt++) {
2136                   (*newtable)=((*newtable)+(*oldtable)*tmp_factor);
2137                   newtable=(int*) (((char *)newtable)+size_cachesamplingtbl_local_r);
2138                   oldtable=(int*) (((char *)oldtable)+size_cachesamplingtbl_local);
2139                 }
2140                 // prepare for an new to page
2141                 int tmp_index = (orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
2142                 gc_cache_revise_infomation.orig_page_start_va = orig->ptr;
2143                 gc_cache_revise_infomation.orig_page_end_va = gcbaseva +
2144                   (BAMBOO_PAGE_SIZE)*((orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
2145                 gc_cache_revise_infomation.orig_page_index =
2146                   (orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
2147                 gc_cache_revise_infomation.to_page_start_va = to->ptr;
2148                 gc_cache_revise_infomation.to_page_end_va = gcbaseva +
2149                   (BAMBOO_PAGE_SIZE)*((to->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
2150                 gc_cache_revise_infomation.to_page_index =
2151                   (to->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
2152           }
2153 #endif // GC_CACHE_ADAPT
2154 #endif
2155     }
2156 #ifdef GC_CACHE_ADAPT
2157           if((to->ptr) >= gc_cache_revise_infomation.to_page_end_va) {
2158                 // end of an to page, wrap up its information
2159                 int tmp_factor = tmp_ptr-gc_cache_revise_infomation.to_page_start_va;
2160                 int topage=gc_cache_revise_infomation.to_page_index;
2161                 int oldpage = gc_cache_revise_infomation.orig_page_index;
2162                 int * newtable=&gccachesamplingtbl_r[topage];
2163                 int * oldtable=&gccachesamplingtbl[oldpage];
2164
2165                 for(int tt = 0; tt < NUMCORESACTIVE; tt++) {
2166                   (*newtable)=((*newtable)+(*oldtable)*tmp_factor);
2167                   newtable=(int*) (((char *)newtable)+size_cachesamplingtbl_local_r);
2168                   oldtable=(int*) (((char *)oldtable)+size_cachesamplingtbl_local);
2169                 }
2170                 // prepare for an new to page
2171                 int tmp_index = (orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
2172                 gc_cache_revise_infomation.orig_page_start_va = orig->ptr;
2173                 gc_cache_revise_infomation.orig_page_end_va = gcbaseva +
2174                   (BAMBOO_PAGE_SIZE)*((orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
2175                 gc_cache_revise_infomation.orig_page_index =
2176                   (orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
2177                 gc_cache_revise_infomation.to_page_start_va = to->ptr;
2178                 gc_cache_revise_infomation.to_page_end_va = gcbaseva +
2179                   (BAMBOO_PAGE_SIZE)*((to->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
2180                 gc_cache_revise_infomation.to_page_index =
2181                   (to->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
2182           }
2183 #endif // GC_CACHE_ADAPT
2184   } // if(mark == 1)
2185 #ifdef DEBUG
2186   BAMBOO_DEBUGPRINT(0xe205);
2187 #endif
2188   // move to next obj
2189   orig->ptr += size;
2190
2191 #ifdef DEBUG
2192   BAMBOO_DEBUGPRINT_REG(isize);
2193   BAMBOO_DEBUGPRINT_REG(size);
2194   BAMBOO_DEBUGPRINT_REG(orig->ptr);
2195   BAMBOO_DEBUGPRINT_REG(orig->bound);
2196 #endif
2197   if((orig->ptr > orig->bound) || (orig->ptr == orig->blockbound)) {
2198 #ifdef DEBUG
2199     BAMBOO_DEBUGPRINT(0xe206);
2200 #endif
2201     if(!nextSBlock(orig)) {
2202       // finished, no more data
2203       return true;
2204     }
2205   }
2206 #ifdef DEBUG
2207   BAMBOO_DEBUGPRINT(0xe207);
2208   BAMBOO_DEBUGPRINT_REG(orig->ptr);
2209 #endif
2210   return false;
2211 } //bool moveobj(struct moveHelper* orig,struct moveHelper* to,int* endaddr)
2212
2213 // should be invoked with interrupt closed
2214 inline int assignSpareMem_I(int sourcecore,
2215                             int * requiredmem,
2216                             int * tomove,
2217                             int * startaddr) {
2218   int b = 0;
2219   BLOCKINDEX(gcloads[sourcecore], &b);
2220   int boundptr = (b<NUMCORES4GC) ? ((b+1)*BAMBOO_SMEM_SIZE_L)
2221                  : (BAMBOO_LARGE_SMEM_BOUND+(b-NUMCORES4GC+1)*BAMBOO_SMEM_SIZE);
2222   int remain = boundptr - gcloads[sourcecore];
2223   int memneed = requiredmem + BAMBOO_CACHE_LINE_SIZE;
2224   *startaddr = gcloads[sourcecore];
2225   *tomove = gcfilledblocks[sourcecore] + 1;
2226   if(memneed < remain) {
2227     gcloads[sourcecore] += memneed;
2228     return 0;
2229   } else {
2230     // next available block
2231     gcfilledblocks[sourcecore] += 1;
2232     int newbase = 0;
2233     BASEPTR(sourcecore, gcfilledblocks[sourcecore], &newbase);
2234     gcloads[sourcecore] = newbase;
2235     return requiredmem-remain;
2236   }
2237 } // int assignSpareMem_I(int ,int * , int * , int * )
2238
2239 // should be invoked with interrupt closed
2240 inline bool gcfindSpareMem_I(int * startaddr,
2241                              int * tomove,
2242                              int * dstcore,
2243                              int requiredmem,
2244                              int requiredcore) {
2245   for(int k = 0; k < NUMCORES4GC; k++) {
2246     if((gccorestatus[k] == 0) && (gcfilledblocks[k] < gcstopblock[k])) {
2247       // check if this stopped core has enough mem
2248       assignSpareMem_I(k, requiredmem, tomove, startaddr);
2249       *dstcore = k;
2250       return true;
2251     }
2252   }
2253   // if can not find spare mem right now, hold the request
2254   gcrequiredmems[requiredcore] = requiredmem;
2255   gcmovepending++;
2256   return false;
2257 } //bool gcfindSpareMem_I(int* startaddr,int* tomove,int mem,int core)
2258
2259 inline bool compacthelper(struct moveHelper * orig,
2260                           struct moveHelper * to,
2261                           int * filledblocks,
2262                           int * heaptopptr,
2263                           bool * localcompact) {
2264   // scan over all objs in this block, compact the marked objs
2265   // loop stop when finishing either scanning all active objs or
2266   // fulfilled the gcstopblock
2267 #ifdef DEBUG
2268   BAMBOO_DEBUGPRINT(0xe101);
2269   BAMBOO_DEBUGPRINT_REG(gcblock2fill);
2270   BAMBOO_DEBUGPRINT_REG(gcmarkedptrbound);
2271 #endif
2272 innercompact:
2273   while(orig->ptr < gcmarkedptrbound) {
2274     bool stop = moveobj(orig, to, gcblock2fill);
2275     if(stop) {
2276       break;
2277     }
2278   }
2279 #ifdef GC_CACHE_ADAPT
2280   // end of an to page, wrap up its information
2281   int tmp_factor = to->ptr-gc_cache_revise_infomation.to_page_start_va;
2282   int topage=gc_cache_revise_infomation.to_page_index;
2283   int oldpage = gc_cache_revise_infomation.orig_page_index;
2284   int * newtable=&gccachesamplingtbl_r[topage];
2285   int * oldtable=&gccachesamplingtbl[oldpage];
2286
2287   for(int tt = 0; tt < NUMCORESACTIVE; tt++) {
2288     (*newtable) = ((*newtable)+(*oldtable)*tmp_factor);
2289     newtable=(int*) (((char *)newtable)+size_cachesamplingtbl_local_r);
2290     oldtable=(int*) (((char *)oldtable)+size_cachesamplingtbl_local);
2291   }
2292 #endif // GC_CACHE_ADAPT
2293   // if no objs have been compact, do nothing,
2294   // otherwise, fill the header of this block
2295   if(to->offset > BAMBOO_CACHE_LINE_SIZE) {
2296     BAMBOO_MEMSET_WH(to->base, '\0', BAMBOO_CACHE_LINE_SIZE);
2297     (*((int*)(to->base))) = to->offset;
2298   } else {
2299     to->offset = 0;
2300     to->ptr = to->base;
2301     to->top -= BAMBOO_CACHE_LINE_SIZE;
2302   }  // if(to->offset > BAMBOO_CACHE_LINE_SIZE) else ...
2303   if(*localcompact) {
2304     *heaptopptr = to->ptr;
2305     *filledblocks = to->numblocks;
2306   }
2307 #ifdef DEBUG
2308   BAMBOO_DEBUGPRINT(0xe102);
2309   BAMBOO_DEBUGPRINT_REG(orig->ptr);
2310   BAMBOO_DEBUGPRINT_REG(gcmarkedptrbound);
2311   BAMBOO_DEBUGPRINT_REG(*heaptopptr);
2312   BAMBOO_DEBUGPRINT_REG(*filledblocks);
2313   BAMBOO_DEBUGPRINT_REG(gccurr_heaptop);
2314 #endif
2315
2316   // send msgs to core coordinator indicating that the compact is finishing
2317   // send compact finish message to core coordinator
2318   if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
2319     gcfilledblocks[BAMBOO_NUM_OF_CORE] = *filledblocks;
2320     gcloads[BAMBOO_NUM_OF_CORE] = *heaptopptr;
2321     if(orig->ptr < gcmarkedptrbound) {
2322 #ifdef DEBUG
2323       BAMBOO_DEBUGPRINT(0xe103);
2324 #endif
2325       // ask for more mem
2326       gctomove = false;
2327       BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2328       if(gcfindSpareMem_I(&gcmovestartaddr, &gcblock2fill, &gcdstcore,
2329                           gccurr_heaptop, BAMBOO_NUM_OF_CORE)) {
2330 #ifdef DEBUG
2331                 BAMBOO_DEBUGPRINT(0xe104);
2332 #endif
2333                 gctomove = true;
2334       } else {
2335                 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2336 #ifdef DEBUG
2337                 BAMBOO_DEBUGPRINT(0xe105);
2338 #endif
2339                 return false;
2340       }
2341       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2342     } else {
2343 #ifdef DEBUG
2344       BAMBOO_DEBUGPRINT(0xe106);
2345 #endif
2346       gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
2347       gctomove = false;
2348       return true;
2349     }
2350   } else {
2351     if(orig->ptr < gcmarkedptrbound) {
2352 #ifdef DEBUG
2353       BAMBOO_DEBUGPRINT(0xe107);
2354 #endif
2355       // ask for more mem
2356       gctomove = false;
2357       send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE,
2358                  *filledblocks, *heaptopptr, gccurr_heaptop, false);
2359     } else {
2360 #ifdef DEBUG
2361       BAMBOO_DEBUGPRINT(0xe108);
2362       BAMBOO_DEBUGPRINT_REG(*heaptopptr);
2363 #endif
2364       // finish compacting
2365       send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE,
2366                  *filledblocks, *heaptopptr, 0, false);
2367     }
2368   }       // if(STARTUPCORE == BAMBOO_NUM_OF_CORE)
2369
2370   if(orig->ptr < gcmarkedptrbound) {
2371 #ifdef DEBUG
2372     BAMBOO_DEBUGPRINT(0xe109);
2373 #endif
2374     // still have unpacked obj
2375     while(true) {
2376       if(gctomove) {
2377                 break;
2378       }
2379     }
2380     ;
2381         gctomove = false;
2382 #ifdef DEBUG
2383     BAMBOO_DEBUGPRINT(0xe10a);
2384 #endif
2385
2386     to->ptr = gcmovestartaddr;
2387     to->numblocks = gcblock2fill - 1;
2388     to->bound = (to->numblocks==0) ?
2389                 BAMBOO_SMEM_SIZE_L :
2390                 BAMBOO_SMEM_SIZE_L+BAMBOO_SMEM_SIZE*to->numblocks;
2391     BASEPTR(gcdstcore, to->numblocks, &(to->base));
2392     to->offset = to->ptr - to->base;
2393     to->top = (to->numblocks==0) ?
2394               (to->offset) : (to->bound-BAMBOO_SMEM_SIZE+to->offset);
2395     to->base = to->ptr;
2396     to->offset = BAMBOO_CACHE_LINE_SIZE;
2397     to->ptr += to->offset;             // for header
2398     to->top += to->offset;
2399     if(gcdstcore == BAMBOO_NUM_OF_CORE) {
2400       *localcompact = true;
2401     } else {
2402       *localcompact = false;
2403     }
2404 #ifdef GC_CACHE_ADAPT
2405         // initialize the gc_cache_revise_information
2406         gc_cache_revise_infomation.to_page_start_va = to->ptr;
2407         gc_cache_revise_infomation.to_page_end_va = gcbaseva +
2408           (BAMBOO_PAGE_SIZE)*((to->base-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
2409         gc_cache_revise_infomation.to_page_index =
2410           (to->base-gcbaseva)/(BAMBOO_PAGE_SIZE);
2411         gc_cache_revise_infomation.orig_page_start_va = orig->ptr;
2412         gc_cache_revise_infomation.orig_page_end_va = gcbaseva +
2413           (BAMBOO_PAGE_SIZE)*((orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
2414         gc_cache_revise_infomation.orig_page_index =
2415           (orig->blockbase-gcbaseva)/(BAMBOO_PAGE_SIZE);
2416 #endif // GC_CACHE_ADAPT
2417     goto innercompact;
2418   }
2419 #ifdef DEBUG
2420   BAMBOO_DEBUGPRINT(0xe10b);
2421 #endif
2422   return true;
2423 } // void compacthelper()
2424
2425 inline void compact() {
2426   if(COMPACTPHASE != gcphase) {
2427     BAMBOO_EXIT(0xb102);
2428   }
2429
2430   // initialize pointers for comapcting
2431   struct moveHelper * orig =
2432     (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
2433   struct moveHelper * to =
2434     (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
2435
2436   if(!initOrig_Dst(orig, to)) {
2437     // no available data to compact
2438     // send compact finish msg to STARTUP core
2439 #ifdef DEBUG
2440     BAMBOO_DEBUGPRINT(0xe001);
2441     BAMBOO_DEBUGPRINT_REG(to->base);
2442 #endif
2443     send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE,
2444                0, to->base, 0, false);
2445     RUNFREE(orig);
2446     RUNFREE(to);
2447     return;
2448   }
2449 #ifdef GC_CACHE_ADAPT
2450   gc_cache_revise_infomation.orig_page_start_va = orig->ptr;
2451   gc_cache_revise_infomation.orig_page_end_va = gcbaseva +
2452         (BAMBOO_PAGE_SIZE)*((orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
2453   gc_cache_revise_infomation.orig_page_index =
2454         (orig->blockbase-gcbaseva)/(BAMBOO_PAGE_SIZE);
2455 #endif // GC_CACHE_ADAPT
2456
2457   int filledblocks = 0;
2458   INTPTR heaptopptr = 0;
2459   bool localcompact = true;
2460   compacthelper(orig, to, &filledblocks, &heaptopptr, &localcompact);
2461
2462   RUNFREE(orig);
2463   RUNFREE(to);
2464 } // compact()
2465
2466 // if return NULL, means
2467 //   1. objptr is NULL
2468 //   2. objptr is not a shared obj
2469 // in these cases, remain the original value is OK
2470 inline void * flushObj(void * objptr) {
2471 #ifdef DEBUG
2472   BAMBOO_DEBUGPRINT(0xe401);
2473 #endif
2474   if(objptr == NULL) {
2475     return NULL;
2476   }
2477   void * dstptr = NULL;
2478   if(ISSHAREDOBJ(objptr)) {
2479 #ifdef DEBUG
2480     BAMBOO_DEBUGPRINT(0xe402);
2481     BAMBOO_DEBUGPRINT_REG(objptr);
2482 #endif
2483     // a shared obj ptr, change to new address
2484     BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2485 #ifdef GC_PROFILE
2486     //unsigned long long ttime = BAMBOO_GET_EXE_TIME();
2487 #endif
2488 #ifdef LOCALHASHTBL_TEST
2489     RuntimeHashget(gcpointertbl, objptr, &dstptr);
2490 #else
2491         dstptr = mgchashSearch(gcpointertbl, objptr);
2492 #endif
2493         //MGCHashget(gcpointertbl, objptr, &dstptr);
2494 #ifdef GC_PROFILE
2495     //flushstalltime += BAMBOO_GET_EXE_TIME()-ttime;
2496 #endif
2497     BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2498 #ifdef DEBUG
2499     BAMBOO_DEBUGPRINT_REG(dstptr);
2500 #endif
2501
2502     if(NULL == dstptr) {
2503       // no mapping info
2504 #ifdef DEBUG
2505       BAMBOO_DEBUGPRINT(0xe403);
2506       BAMBOO_DEBUGPRINT_REG(objptr);
2507       BAMBOO_DEBUGPRINT_REG(hostcore(objptr));
2508 #endif
2509       if(hostcore(objptr) == BAMBOO_NUM_OF_CORE) {
2510                 // error! the obj is right on this core, but cannot find it
2511                 //BAMBOO_DEBUGPRINT(0xecec);
2512                 BAMBOO_DEBUGPRINT_REG(objptr);
2513                 BAMBOO_EXIT(0xb103);
2514                 // assume that the obj has not been moved, use the original address
2515                 //dstptr = objptr;
2516       } else {
2517                 int hostc = hostcore(objptr);
2518 #ifdef GC_PROFILE
2519                 //unsigned long long ttimet = BAMBOO_GET_EXE_TIME();
2520 #endif
2521                 // check the corresponsing sharedptbl
2522                 BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2523                 //struct GCSharedHash * sptbl = gcrpointertbls[hostcore(objptr)];
2524                 mgcsharedhashtbl_t * sptbl = gcrpointertbls[hostc];
2525                 if(sptbl != NULL) {
2526                   //GCSharedHashget(sptbl, (int)objptr, &dstptr);
2527                   dstptr = mgcsharedhashSearch(sptbl, (int)objptr);
2528                   if(dstptr != NULL) {
2529 #ifdef LOCALHASHTBL_TEST
2530                         RuntimeHashadd_I(gcpointertbl, (int)objptr, (int)dstptr);
2531 #else
2532                         mgchashInsert_I(gcpointertbl, (int)objptr, (int)dstptr);
2533 #endif
2534                   }
2535                 }
2536                 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2537 #ifdef GC_PROFILE
2538                 //flushstalltime_i += BAMBOO_GET_EXE_TIME()-ttimet;
2539 #endif
2540
2541                 if(dstptr == NULL) {
2542                   // still can not get the mapping info,
2543                   // send msg to host core for the mapping info
2544                   gcobj2map = (int)objptr;
2545                   gcismapped = false;
2546                   gcmappedobj = NULL;
2547                   // the first time require the mapping, send msg to the hostcore
2548                   // for the mapping info
2549                   send_msg_3(hostc, GCMAPREQUEST, (int)objptr,
2550                           BAMBOO_NUM_OF_CORE, false);
2551                   while(true) {
2552                         if(gcismapped) {
2553                           break;
2554                         }
2555                   }
2556 #ifdef GC_PROFILE
2557                   //flushstalltime_i += BAMBOO_GET_EXE_TIME()-ttimet;
2558 #endif
2559                   BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2560 #ifdef LOCALHASHTBL_TEST
2561                   RuntimeHashget(gcpointertbl, objptr, &dstptr);
2562 #else
2563                   dstptr = mgchashSearch(gcpointertbl, objptr);
2564 #endif
2565                   //MGCHashget(gcpointertbl, objptr, &dstptr);
2566                   BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2567                 } // if(dstptr == NULL)
2568           }    // if(hostcore(objptr) == BAMBOO_NUM_OF_CORE) else ...
2569 #ifdef DEBUG
2570       BAMBOO_DEBUGPRINT_REG(dstptr);
2571 #endif
2572     }     // if(NULL == dstptr)
2573   }      // if(ISSHAREDOBJ(objptr))
2574          // if not a shared obj, return NULL to indicate no need to flush
2575 #ifdef DEBUG
2576   BAMBOO_DEBUGPRINT(0xe404);
2577 #endif
2578   return dstptr;
2579 } // void flushObj(void * objptr)
2580
2581 inline void flushRuntimeObj(struct garbagelist * stackptr) {
2582   int i,j;
2583   // flush current stack
2584   while(stackptr!=NULL) {
2585     for(i=0; i<stackptr->size; i++) {
2586       if(stackptr->array[i] != NULL) {
2587                 void * dst = flushObj(stackptr->array[i]);
2588                 if(dst != NULL) {
2589                   stackptr->array[i] = dst;
2590                 }
2591       }
2592     }
2593     stackptr=stackptr->next;
2594   }
2595
2596   // flush objectsets
2597   if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
2598     for(i=0; i<NUMCLASSES; i++) {
2599       struct parameterwrapper ** queues =
2600         objectqueues[BAMBOO_NUM_OF_CORE][i];
2601       int length = numqueues[BAMBOO_NUM_OF_CORE][i];
2602       for(j = 0; j < length; ++j) {
2603                 struct parameterwrapper * parameter = queues[j];
2604                 struct ObjectHash * set=parameter->objectset;
2605                 struct ObjectNode * ptr=set->listhead;
2606                 while(ptr!=NULL) {
2607                   void * dst = flushObj((void *)ptr->key);
2608                   if(dst != NULL) {
2609                         ptr->key = dst;
2610                   }
2611                   ptr=ptr->lnext;
2612                 }
2613                 ObjectHashrehash(set);
2614       }
2615     }
2616   }
2617
2618   // flush current task descriptor
2619   if(currtpd != NULL) {
2620     for(i=0; i<currtpd->numParameters; i++) {
2621       void * dst = flushObj(currtpd->parameterArray[i]);
2622       if(dst != NULL) {
2623                 currtpd->parameterArray[i] = dst;
2624       }
2625     }
2626   }
2627
2628   // flush active tasks
2629   if(activetasks != NULL) {
2630     struct genpointerlist * ptr=activetasks->list;
2631     while(ptr!=NULL) {
2632       struct taskparamdescriptor *tpd=ptr->src;
2633       int i;
2634       for(i=0; i<tpd->numParameters; i++) {
2635                 void * dst = flushObj(tpd->parameterArray[i]);
2636                 if(dst != NULL) {
2637                   tpd->parameterArray[i] = dst;
2638                 }
2639       }
2640       ptr=ptr->inext;
2641     }
2642     genrehash(activetasks);
2643   }
2644
2645   // flush cached transferred obj
2646   struct QueueItem * tmpobjptr =  getHead(&objqueue);
2647   while(tmpobjptr != NULL) {
2648     struct transObjInfo * objInfo =
2649       (struct transObjInfo *)(tmpobjptr->objectptr);
2650     void * dst = flushObj(objInfo->objptr);
2651     if(dst != NULL) {
2652       objInfo->objptr = dst;
2653     }
2654     tmpobjptr = getNextQueueItem(tmpobjptr);
2655   }
2656
2657   // flush cached objs to be transferred
2658   struct QueueItem * item = getHead(totransobjqueue);
2659   while(item != NULL) {
2660     struct transObjInfo * totransobj =
2661       (struct transObjInfo *)(item->objectptr);
2662     void * dst = flushObj(totransobj->objptr);
2663     if(dst != NULL) {
2664       totransobj->objptr = dst;
2665     }
2666     item = getNextQueueItem(item);
2667   }       // while(item != NULL)
2668
2669   // enqueue lock related info
2670   for(i = 0; i < runtime_locklen; ++i) {
2671     void * dst = flushObj(runtime_locks[i].redirectlock);
2672     if(dst != NULL) {
2673       runtime_locks[i].redirectlock = (int)dst;
2674     }
2675     if(runtime_locks[i].value != NULL) {
2676       void * dst=flushObj(runtime_locks[i].value);
2677       if(dst != NULL) {
2678                 runtime_locks[i].value = (int)dst;
2679       }
2680     }
2681   }
2682
2683 } // void flushRuntimeObj(struct garbagelist * stackptr)
2684
2685 inline void transmappinginfo() {
2686   // broadcast the sharedptbl pointer
2687   for(int i = 0; i < NUMCORESACTIVE; i++) {
2688         if(i != BAMBOO_NUM_OF_CORE) {
2689           send_msg_3(i, GCMAPTBL, gcsharedptbl, BAMBOO_NUM_OF_CORE, false);
2690         }
2691   }
2692
2693   if(STARTUPCORE != BAMBOO_NUM_OF_CORE) {
2694         send_msg_2(STARTUPCORE, GCFINISHMAPINFO, BAMBOO_NUM_OF_CORE, false);
2695   }
2696 }
2697
2698 inline void flush(struct garbagelist * stackptr) {
2699
2700   flushRuntimeObj(stackptr);
2701
2702   while(true) {
2703     BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2704     bool hasItems = gc_moreItems_I();
2705     BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2706     if(!hasItems) {
2707       break;
2708     }
2709
2710 #ifdef DEBUG
2711     BAMBOO_DEBUGPRINT(0xe301);
2712 #endif
2713     BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
2714     void * ptr = gc_dequeue_I();
2715     BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
2716     if(ISSHAREDOBJ(ptr)) {
2717       // should be a local shared obj and should have mapping info
2718       ptr = flushObj(ptr);
2719 #ifdef DEBUG
2720       BAMBOO_DEBUGPRINT(0xe302);
2721       BAMBOO_DEBUGPRINT_REG(ptr);
2722       BAMBOO_DEBUGPRINT_REG(tptr);
2723       BAMBOO_DEBUGPRINT_REG(((int *)(tptr))[0]);
2724 #endif
2725       if(ptr == NULL) {
2726                 BAMBOO_EXIT(0xb105);
2727       }
2728     } // if(ISSHAREDOBJ(ptr))
2729     if((!ISSHAREDOBJ(ptr)) || (((int *)(ptr))[6] == COMPACTED)) {
2730       int type = ((int *)(ptr))[0];
2731       // scan all pointers in ptr
2732       unsigned INTPTR * pointer;
2733       pointer=pointerarray[type];
2734 #ifdef DEBUG
2735       BAMBOO_DEBUGPRINT(0xe303);
2736       BAMBOO_DEBUGPRINT_REG(pointer);
2737 #endif
2738       if (pointer==0) {
2739                 /* Array of primitives */
2740                 /* Do nothing */
2741       } else if (((INTPTR)pointer)==1) {
2742 #ifdef DEBUG
2743                 BAMBOO_DEBUGPRINT(0xe304);
2744 #endif
2745                 /* Array of pointers */
2746                 struct ArrayObject *ao=(struct ArrayObject *) ptr;
2747                 int length=ao->___length___;
2748                 int j;
2749                 for(j=0; j<length; j++) {
2750 #ifdef DEBUG
2751                   BAMBOO_DEBUGPRINT(0xe305);
2752 #endif
2753                   void *objptr=
2754                         ((void **)(((char *)&ao->___length___)+sizeof(int)))[j];
2755 #ifdef DEBUG
2756                   BAMBOO_DEBUGPRINT_REG(objptr);
2757 #endif
2758                   if(objptr != NULL) {
2759                         void * dst = flushObj(objptr);
2760                         if(dst != NULL) {
2761                           ((void **)(((char *)&ao->___length___)+sizeof(int)))[j] = dst;
2762                         }
2763                   }
2764                 }
2765       } else {
2766 #ifdef DEBUG
2767                 BAMBOO_DEBUGPRINT(0xe306);
2768 #endif
2769                 INTPTR size=pointer[0];
2770                 int i;
2771                 for(i=1; i<=size; i++) {
2772 #ifdef DEBUG
2773                   BAMBOO_DEBUGPRINT(0xe307);
2774 #endif
2775                   unsigned int offset=pointer[i];
2776                   void * objptr=*((void **)(((char *)ptr)+offset));
2777 #ifdef DEBUG
2778                   BAMBOO_DEBUGPRINT_REG(objptr);
2779 #endif
2780                   if(objptr != NULL) {
2781                         void * dst = flushObj(objptr);
2782                         if(dst != NULL) {
2783                           *((void **)(((char *)ptr)+offset)) = dst;
2784                         }
2785                   }
2786                 } // for(i=1; i<=size; i++)
2787       }  // if (pointer==0) else if (((INTPTR)pointer)==1) else ()
2788          // restore the mark field, indicating that this obj has been flushed
2789       if(ISSHAREDOBJ(ptr)) {
2790                 ((int *)(ptr))[6] = INIT;
2791       }
2792     }  // if((!ISSHAREDOBJ(ptr)) || (((int *)(ptr))[6] == COMPACTED))
2793   }   // while(gc_moreItems())
2794 #ifdef DEBUG
2795   BAMBOO_DEBUGPRINT(0xe308);
2796 #endif
2797
2798   // TODO bug here: the startup core contains all lobjs' info, thus all the
2799   // lobjs are flushed in sequence.
2800   // flush lobjs
2801   while(gc_lobjmoreItems_I()) {
2802 #ifdef DEBUG
2803     BAMBOO_DEBUGPRINT(0xe309);
2804 #endif
2805     void * ptr = gc_lobjdequeue_I(NULL, NULL);
2806     ptr = flushObj(ptr);
2807 #ifdef DEBUG
2808     BAMBOO_DEBUGPRINT(0xe30a);
2809     BAMBOO_DEBUGPRINT_REG(ptr);
2810     BAMBOO_DEBUGPRINT_REG(tptr);
2811     BAMBOO_DEBUGPRINT_REG(((int *)(tptr))[0]);
2812 #endif
2813     if(ptr == NULL) {
2814       BAMBOO_EXIT(0xb106);
2815     }
2816     if(((int *)(ptr))[6] == COMPACTED) {
2817       int type = ((int *)(ptr))[0];
2818       // scan all pointers in ptr
2819       unsigned INTPTR * pointer;
2820       pointer=pointerarray[type];
2821 #ifdef DEBUG
2822       BAMBOO_DEBUGPRINT(0xe30b);
2823       BAMBOO_DEBUGPRINT_REG(pointer);
2824 #endif
2825       if (pointer==0) {
2826                 /* Array of primitives */
2827                 /* Do nothing */
2828       } else if (((INTPTR)pointer)==1) {
2829 #ifdef DEBUG
2830                 BAMBOO_DEBUGPRINT(0xe30c);
2831 #endif
2832                 /* Array of pointers */
2833                 struct ArrayObject *ao=(struct ArrayObject *) ptr;
2834                 int length=ao->___length___;
2835                 int j;
2836                 for(j=0; j<length; j++) {
2837 #ifdef DEBUG
2838                   BAMBOO_DEBUGPRINT(0xe30d);
2839 #endif
2840                   void *objptr=
2841                         ((void **)(((char *)&ao->___length___)+sizeof(int)))[j];
2842 #ifdef DEBUG
2843                   BAMBOO_DEBUGPRINT_REG(objptr);
2844 #endif
2845                   if(objptr != NULL) {
2846                         void * dst = flushObj(objptr);
2847                         if(dst != NULL) {
2848                           ((void **)(((char *)&ao->___length___)+sizeof(int)))[j] = dst;
2849                         }
2850                   }
2851                 }
2852       } else {
2853 #ifdef DEBUG
2854                 BAMBOO_DEBUGPRINT(0xe30e);
2855 #endif
2856                 INTPTR size=pointer[0];
2857                 int i;
2858                 for(i=1; i<=size; i++) {
2859 #ifdef DEBUG
2860                   BAMBOO_DEBUGPRINT(0xe30f);
2861 #endif
2862                   unsigned int offset=pointer[i];
2863                   void * objptr=*((void **)(((char *)ptr)+offset));
2864
2865 #ifdef DEBUG
2866                   BAMBOO_DEBUGPRINT_REG(objptr);
2867 #endif
2868                   if(objptr != NULL) {
2869                         void * dst = flushObj(objptr);
2870                         if(dst != NULL) {
2871                           *((void **)(((char *)ptr)+offset)) = dst;
2872                         }
2873                   }
2874                 }  // for(i=1; i<=size; i++)
2875       }  // if (pointer==0) else if (((INTPTR)pointer)==1) else ()
2876          // restore the mark field, indicating that this obj has been flushed
2877       ((int *)(ptr))[6] = INIT;
2878     }     // if(((int *)(ptr))[6] == COMPACTED)
2879   }     // while(gc_lobjmoreItems())
2880 #ifdef DEBUG
2881   BAMBOO_DEBUGPRINT(0xe310);
2882 #endif
2883
2884   // send flush finish message to core coordinator
2885   if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
2886     gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
2887   } else {
2888     send_msg_2(STARTUPCORE, GCFINISHFLUSH, BAMBOO_NUM_OF_CORE, false);
2889   }
2890 #ifdef DEBUG
2891   BAMBOO_DEBUGPRINT(0xe311);
2892 #endif
2893 } // flush()
2894
2895 #ifdef GC_CACHE_ADAPT
2896 // prepare for cache adaption:
2897 //   -- flush the shared heap
2898 //   -- clean dtlb entries
2899 //   -- change cache strategy
2900 void cacheAdapt_gc(bool isgccachestage) {
2901   // flush the shared heap
2902   BAMBOO_CACHE_FLUSH_L2();
2903
2904   // clean the dtlb entries
2905   BAMBOO_CLEAN_DTLB();
2906
2907   // change the cache strategy
2908   gccachestage = isgccachestage;
2909 } // cacheAdapt_gc(bool isgccachestage)
2910
2911 // the master core decides how to adapt cache strategy for the mutator
2912 // according to collected statistic data
2913
2914 // make all pages hfh
2915 int cacheAdapt_policy_h4h(){
2916   unsigned int page_index = 0;
2917   VA page_sva = 0;
2918   unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
2919   int numchanged = 0;
2920   int * tmp_p = gccachepolicytbl+1;
2921   for(page_index = 0; page_index < page_num; page_index++) {
2922         page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
2923         bamboo_cache_policy_t policy = {0};
2924         policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
2925         *tmp_p = page_index;
2926         tmp_p++;
2927         *tmp_p = policy.word;
2928         tmp_p++;
2929         numchanged++;
2930   }
2931
2932   return numchanged;
2933 } // int cacheAdapt_policy_hfh()
2934
2935 // make all pages local as non-cache-adaptable gc local mode
2936 int cacheAdapt_policy_local(){
2937   unsigned int page_index = 0;
2938   VA page_sva = 0;
2939   unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
2940   int numchanged = 0;
2941   int * tmp_p = gccachepolicytbl+1;
2942   for(page_index = 0; page_index < page_num; page_index++) {
2943         page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
2944         bamboo_cache_policy_t policy = {0};
2945         int block = 0;
2946         BLOCKINDEX(page_sva, &block);
2947         int coren = gc_block2core[block%(NUMCORES4GC*2)];
2948         // locally cache the page in the hotest core
2949         // NOTE: (x,y) should be changed to (x+1, y+1)!!!
2950         policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
2951         policy.lotar_x = bamboo_cpu2coords[2*coren]+1;
2952         policy.lotar_y = bamboo_cpu2coords[2*coren+1]+1;
2953         *tmp_p = page_index;
2954         tmp_p++;
2955         *tmp_p = policy.word;
2956         tmp_p++;
2957         numchanged++;
2958   }
2959
2960   return numchanged;
2961 } // int cacheAdapt_policy_local()
2962
2963 int cacheAdapt_policy_hotest(){
2964   unsigned int page_index = 0;
2965   VA page_sva = 0;
2966   unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
2967   int numchanged = 0;
2968   int * tmp_p = gccachepolicytbl+1;
2969   for(page_index = 0; page_index < page_num; page_index++) {
2970         page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
2971         bamboo_cache_policy_t policy = {0};
2972         int hotestcore = 0;
2973         int hotfreq = 0;
2974
2975         int *local_tbl=&gccachesamplingtbl_r[page_index];
2976         for(int i = 0; i < NUMCORESACTIVE; i++) {
2977           int freq = *local_tbl;
2978           local_tbl=(int *)(((char *)local_tbl)+size_cachesamplingtbl_local_r);
2979
2980           // check the freqency, decide if this page is hot for the core
2981           if(hotfreq < freq) {
2982                 hotfreq = freq;
2983                 hotestcore = i;
2984           }
2985         }
2986         // TODO
2987         // Decide the cache strategy for this page
2988         // If decide to adapt a new cache strategy, write into the shared block of
2989         // the gcsharedsamplingtbl. The mem recording information that has been
2990         // written is enough to hold the information.
2991         // Format: page start va + cache strategy(hfh/(host core+[x,y]))
2992         if(hotfreq == 0) {
2993           // this page has not been accessed, do not change its cache policy
2994           continue;
2995         } else {
2996           // locally cache the page in the hotest core
2997           // NOTE: (x,y) should be changed to (x+1, y+1)!!!
2998           policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
2999           policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
3000           policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
3001           *tmp_p = page_index;
3002           tmp_p++;
3003           *tmp_p = policy.word;
3004           tmp_p++;
3005           numchanged++;
3006         }
3007   }
3008
3009   return numchanged;
3010 } // int cacheAdapt_policy_hotest()
3011
3012 #define GC_CACHE_ADAPT_DOMINATE_THRESHOLD  50
3013 // cache the page on the core that accesses it the most if that core accesses
3014 // it more than (GC_CACHE_ADAPT_DOMINATE_THRESHOLD)% of the total.  Otherwise,
3015 // h4h the page.
3016 int cacheAdapt_policy_dominate(){
3017   unsigned int page_index = 0;
3018   VA page_sva = 0;
3019   unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
3020   int numchanged = 0;
3021   int * tmp_p = gccachepolicytbl+1;
3022   for(page_index = 0; page_index < page_num; page_index++) {
3023         page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
3024         bamboo_cache_policy_t policy = {0};
3025         int hotestcore = 0;
3026         int totalfreq = 0;
3027         int hotfreq = 0;
3028
3029         int *local_tbl=&gccachesamplingtbl_r[page_index];
3030         for(int i = 0; i < NUMCORESACTIVE; i++) {
3031           int freq = *local_tbl;
3032           local_tbl=(int *)(((char *)local_tbl)+size_cachesamplingtbl_local_r);
3033           totalfreq += freq;
3034           // TODO
3035           // check the freqency, decide if this page is hot for the core
3036           if(hotfreq < freq) {
3037                 hotfreq = freq;
3038                 hotestcore = i;
3039           }
3040         }
3041
3042         // Decide the cache strategy for this page
3043         // If decide to adapt a new cache strategy, write into the shared block of
3044         // the gcpolicytbl
3045         // Format: page start va + cache policy
3046         if(hotfreq == 0) {
3047           // this page has not been accessed, do not change its cache policy
3048           continue;
3049         }
3050         totalfreq = (totalfreq*GC_CACHE_ADAPT_DOMINATE_THRESHOLD)/100/BAMBOO_PAGE_SIZE;
3051         hotfreq/=BAMBOO_PAGE_SIZE;
3052         if(hotfreq < totalfreq) {
3053           // use hfh
3054           policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
3055         } else {
3056           // locally cache the page in the hotest core
3057           // NOTE: (x,y) should be changed to (x+1, y+1)!!!
3058           policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
3059           policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
3060           policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
3061         }
3062         *tmp_p = page_index;
3063         tmp_p++;
3064         *tmp_p = policy.word;
3065         tmp_p++;
3066         numchanged++;
3067   }
3068
3069   return numchanged;
3070 } // int cacheAdapt_policy_dominate()
3071
3072 #define GC_CACHE_ADAPT_OVERLOAD_THRESHOLD 20000
3073
3074 void gc_quicksort(int *array,
3075                       int left,
3076                                   int right,
3077                                   int offset) {
3078   int pivot = 0;;
3079   int leftIdx = left;
3080   int rightIdx = right;
3081   if((right-left+1) >= 1) {
3082         pivot = (left+right)/2;
3083         while((leftIdx <= pivot) && (rightIdx >= pivot)) {
3084           int pivotValue = array[pivot*3-offset];
3085           while((array[leftIdx*3-offset] > pivotValue) && (leftIdx <= pivot)) {
3086                 leftIdx++;
3087           }
3088           while((array[rightIdx*3-offset] < pivotValue) && (rightIdx >= pivot)) {
3089                 rightIdx--;
3090           }
3091           // swap [leftIdx] & [rightIdx]
3092           for(int k = 0; k < 3; k++) {
3093                 int tmp = array[3*rightIdx-k];
3094                 array[3*rightIdx-k] = array[3*leftIdx-k];
3095                 array[3*leftIdx-k] = tmp;
3096           }
3097           leftIdx++;
3098           rightIdx--;
3099           if((leftIdx-1) == pivot) {
3100                 pivot = rightIdx = rightIdx + 1;
3101           } else if((leftIdx+1) == pivot) {
3102                 pivot = leftIdx = leftIdx-1;
3103           }
3104         }
3105         gc_quicksort(array, left, pivot-1, offset);
3106         gc_quicksort(array, pivot+1, right, offset);
3107   }
3108   return;
3109 } // void gc_quicksort(...)
3110
3111 // Every page cached on the core that accesses it the most.
3112 // Check to see if any core's pages total more accesses than threshold
3113 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD.  If so, find the pages with the
3114 // most remote accesses and hash for home them until we get below
3115 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD
3116 int cacheAdapt_policy_overload(){
3117   unsigned int page_index = 0;
3118   VA page_sva = 0;
3119   unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
3120   int numchanged = 0;
3121   int * tmp_p = gccachepolicytbl+1;
3122   unsigned long long workload[NUMCORESACTIVE];
3123   memset(workload, 0, NUMCORESACTIVE*sizeof(unsigned long long));
3124   unsigned long long total_workload = 0;
3125   int core2heavypages[NUMCORESACTIVE][page_num*3+1];
3126   memset(core2heavypages, 0, sizeof(int)*(page_num*3+1)*NUMCORESACTIVE);
3127   for(page_index = 0; page_index < page_num; page_index++) {
3128         page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
3129         bamboo_cache_policy_t policy = {0};
3130         int hotestcore = 0;
3131         int totalfreq = 0;
3132         int hotfreq = 0;
3133
3134         int *local_tbl=&gccachesamplingtbl_r[page_index];
3135         for(int i = 0; i < NUMCORESACTIVE; i++) {
3136           int freq = *local_tbl;
3137           local_tbl=(int *)(((char *)local_tbl)+size_cachesamplingtbl_local_r);
3138           totalfreq += freq;
3139           // TODO
3140           // check the freqency, decide if this page is hot for the core
3141           if(hotfreq < freq) {
3142                 hotfreq = freq;
3143                 hotestcore = i;
3144           }
3145           // TODO
3146           /*if(page_sva == 0x10e90000) {
3147                 if(freq != 0) tprintf("0x10e90000 core %d, %d\n", i, freq);
3148           }*/
3149         }
3150         // TODO
3151         // Decide the cache strategy for this page
3152         // If decide to adapt a new cache strategy, write into the shared block of
3153         // the gcsharedsamplingtbl. The mem recording information that has been
3154         // written is enough to hold the information.
3155         // Format: page start va + cache strategy(hfh/(host core+[x,y]))
3156         if(hotfreq == 0) {
3157           // this page has not been accessed, do not change its cache policy
3158           continue;
3159         }
3160
3161         totalfreq/=BAMBOO_PAGE_SIZE;
3162         hotfreq/=BAMBOO_PAGE_SIZE;
3163         // locally cache the page in the hotest core
3164         // NOTE: (x,y) should be changed to (x+1, y+1)!!!
3165         policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
3166         policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
3167         policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
3168         *tmp_p = page_index;
3169         tmp_p++;
3170         *tmp_p = policy.word;
3171         tmp_p++;
3172         numchanged++;
3173         workload[hotestcore] += totalfreq;
3174         total_workload += totalfreq;
3175         // insert into core2heavypages using quicksort
3176         int remoteaccess = totalfreq - hotfreq;
3177         int index = core2heavypages[hotestcore][0];
3178         core2heavypages[hotestcore][3*index+3] = remoteaccess;
3179         core2heavypages[hotestcore][3*index+2] = totalfreq;
3180         core2heavypages[hotestcore][3*index+1] = tmp_p-1;
3181         core2heavypages[hotestcore][0]++;
3182         // TODO
3183         /*if(page_sva == 0x10f10000) {
3184         int block = 0;
3185         BLOCKINDEX(page_sva, &block);
3186         int coren = gc_block2core[block%(NUMCORES4GC*2)];
3187         int coord_x =  bamboo_cpu2coords[2*coren]+1;
3188         int coord_y = bamboo_cpu2coords[2*coren+1]+1;
3189           tprintf("+++ %x(%d-%d,%d) hotcore %d, total %d, hot %d, remote %d, index %d p %x\n", (int)page_sva, coren, coord_x, coord_y, hotestcore, totalfreq, hotfreq, remoteaccess, index, (int)(tmp_p-1));
3190         }*/
3191   }
3192
3193   int workload_threshold = total_workload / 10;
3194   // Check the workload of each core
3195   for(int i = 0; i < NUMCORESACTIVE; i++) {
3196         int j = 1;
3197         int index = core2heavypages[i][0];
3198         if(workload[i] > workload_threshold/*GC_CACHE_ADAPT_OVERLOAD_THRESHOLD*/) {
3199           // sort according to the remoteaccess
3200           gc_quicksort(&core2heavypages[i][0], 1, index, 0);
3201           while((workload[i] > workload_threshold/*GC_CACHE_ADAPT_OVERLOAD_THRESHOLD*/) && (j<index*3)) {
3202                 // hfh those pages with more remote accesses
3203                 bamboo_cache_policy_t policy = {0};
3204                 policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
3205                 *((int*)core2heavypages[i][j]) = policy.word;
3206                 workload[i] -= core2heavypages[i][j+1];
3207                 j += 3;
3208           }
3209         }
3210   }
3211
3212   return numchanged;
3213 } // int cacheAdapt_policy_overload()
3214
3215 #define GC_CACHE_ADAPT_ACCESS_THRESHOLD 70
3216 #define GC_CACHE_ADAPT_CROWD_THRESHOLD  20
3217 // Every page cached on the core that accesses it the most.
3218 // Check to see if any core's pages total more accesses than threshold
3219 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD.  If so, find the pages with the
3220 // most remote accesses and hash for home them until we get below
3221 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD.
3222 // Sort pages based on activity....
3223 // If more then GC_CACHE_ADAPT_ACCESS_THRESHOLD% of the accesses for a
3224 // core's pages are from more than GC_CACHE_ADAPT_CROWD_THRESHOLD pages,
3225 // then start hfh these pages(selecting the ones with the most remote
3226 // accesses first or fewest local accesses) until we get below
3227 // GC_CACHE_ADAPT_CROWD_THRESHOLD pages.
3228 int cacheAdapt_policy_crowd(){
3229   unsigned int page_index = 0;
3230   VA page_sva = 0;
3231   unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
3232   int numchanged = 0;
3233   int * tmp_p = gccachepolicytbl+1;
3234   unsigned long long workload[NUMCORESACTIVE];
3235   memset(workload, 0, NUMCORESACTIVE*sizeof(unsigned long long));
3236   unsigned long long total_workload = 0;
3237   int core2heavypages[NUMCORESACTIVE][page_num*3+1];
3238   memset(core2heavypages, 0, sizeof(int)*(page_num*3+1)*NUMCORESACTIVE);
3239   for(page_index = 0; page_index < page_num; page_index++) {
3240         page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
3241         bamboo_cache_policy_t policy = {0};
3242         int hotestcore = 0;
3243         int totalfreq = 0;
3244         int hotfreq = 0;
3245
3246         int *local_tbl=&gccachesamplingtbl_r[page_index];
3247         for(int i = 0; i < NUMCORESACTIVE; i++) {
3248           int freq = *local_tbl;
3249           local_tbl=(int *)(((char *)local_tbl)+size_cachesamplingtbl_local_r);
3250           totalfreq += freq;
3251           // TODO
3252           // check the freqency, decide if this page is hot for the core
3253           if(hotfreq < freq) {
3254                 hotfreq = freq;
3255                 hotestcore = i;
3256           }
3257           // TODO
3258           /*if(page_sva == 0x10e90000) {
3259                 if(freq != 0) tprintf("0x10e90000 core %d, %d\n", i, freq);
3260           }*/
3261         }
3262         // TODO
3263         // Decide the cache strategy for this page
3264         // If decide to adapt a new cache strategy, write into the shared block of
3265         // the gcsharedsamplingtbl. The mem recording information that has been
3266         // written is enough to hold the information.
3267         // Format: page start va + cache strategy(hfh/(host core+[x,y]))
3268         if(hotfreq == 0) {
3269           // this page has not been accessed, do not change its cache policy
3270           continue;
3271         }
3272         totalfreq/=BAMBOO_PAGE_SIZE;
3273         hotfreq/=BAMBOO_PAGE_SIZE;
3274         // locally cache the page in the hotest core
3275         // NOTE: (x,y) should be changed to (x+1, y+1)!!!
3276         policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
3277         policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
3278         policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
3279         *tmp_p = page_index;
3280         tmp_p++;
3281         *tmp_p = policy.word;
3282         tmp_p++;
3283         numchanged++;
3284         workload[hotestcore] += totalfreq;
3285         total_workload += totalfreq;
3286         // insert into core2heavypages using quicksort
3287         int remoteaccess = totalfreq - hotfreq;
3288         int index = core2heavypages[hotestcore][0];
3289         core2heavypages[hotestcore][3*index+3] = remoteaccess;
3290         core2heavypages[hotestcore][3*index+2] = totalfreq;
3291         core2heavypages[hotestcore][3*index+1] = tmp_p-1;
3292         core2heavypages[hotestcore][0]++;
3293         // TODO
3294         /*if(page_sva == 0x10f10000) {
3295         int block = 0;
3296         BLOCKINDEX(page_sva, &block);
3297         int coren = gc_block2core[block%(NUMCORES4GC*2)];
3298         int coord_x =  bamboo_cpu2coords[2*coren]+1;
3299         int coord_y = bamboo_cpu2coords[2*coren+1]+1;
3300           tprintf("+++ %x(%d-%d,%d) hotcore %d, total %d, hot %d, remote %d, index %d p %x\n", (int)page_sva, coren, coord_x, coord_y, hotestcore, totalfreq, hotfreq, remoteaccess, index, (int)(tmp_p-1));
3301         }*/
3302   }
3303
3304   int workload_threshold = total_workload / 10;
3305   // Check the workload of each core
3306   for(int i = 0; i < NUMCORESACTIVE; i++) {
3307         int j = 1;
3308         int index = core2heavypages[i][0];
3309         if(workload[i] > workload_threshold/*GC_CACHE_ADAPT_OVERLOAD_THRESHOLD*/) {
3310           // sort according to the remoteaccess
3311           gc_quicksort(&core2heavypages[i][0], 1, index, 0);
3312           while((workload[i] > workload_threshold/*GC_CACHE_ADAPT_OVERLOAD_THRESHOLD*/) && (j<index*3)) {
3313                 // hfh those pages with more remote accesses
3314                 bamboo_cache_policy_t policy = {0};
3315                 policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
3316                 *((int*)core2heavypages[i][j]) = policy.word;
3317                 workload[i] -= core2heavypages[i][j+1];
3318                 j += 3;
3319           }
3320         }
3321
3322         // Check if the accesses are crowded on few pages
3323         // sort according to the total access
3324 inner_crowd:
3325         gc_quicksort(&core2heavypages[i][0], j/3+1, index, 1);
3326         int threshold = GC_CACHE_ADAPT_ACCESS_THRESHOLD*workload[i]/100;
3327         int num_crowded = 0;
3328         int t_workload = 0;
3329         do {
3330           t_workload += core2heavypages[i][j+num_crowded*3+1];
3331           num_crowded++;
3332         } while(t_workload < threshold);
3333         // num_crowded <= GC_CACHE_ADAPT_CROWD_THRESHOLD and if there are enough
3334         // items, it is always == GC_CACHE_ADAPT_CROWD_THRESHOLD
3335         if(num_crowded > GC_CACHE_ADAPT_CROWD_THRESHOLD) {
3336 //inner_crowd:
3337           // need to hfh these pages
3338           // sort the pages according to remote access
3339           gc_quicksort(&core2heavypages[i][0], j/3+1, j/3+num_crowded, 0);
3340           //while((num_crowded--) && (j < index*3)) {
3341                 // h4h those pages with more remote accesses
3342                 bamboo_cache_policy_t policy = {0};
3343                 policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
3344                 *((int*)core2heavypages[i][j]) = policy.word;
3345                 workload[i] -= core2heavypages[i][j+1];
3346                 t_workload -= core2heavypages[i][j+1];
3347                 /*if((j/3+GC_CACHE_ADAPT_CROWD_THRESHOLD) < index) {
3348                   t_workload +=
3349                         core2heavypages[i][j+GC_CACHE_ADAPT_CROWD_THRESHOLD*3+1];
3350                 }*/
3351                 j += 3;
3352                 threshold = GC_CACHE_ADAPT_ACCESS_THRESHOLD*workload[i]/100;
3353                 /*if(t_workload <= threshold) {
3354                   break;
3355                 }
3356           }
3357           if((j < index*3) && (t_workload > threshold)) {
3358                 num_crowded = ((index-j/3) > GC_CACHE_ADAPT_CROWD_THRESHOLD) ?
3359                   (GC_CACHE_ADAPT_CROWD_THRESHOLD) : (index-j/3);*/
3360                 goto inner_crowd;
3361 //        }
3362         }
3363   }
3364
3365   return numchanged;
3366 } // int cacheAdapt_policy_overload()
3367
3368 void cacheAdapt_master() {
3369 #ifdef GC_CACHE_ADAPT
3370   //gc_output_cache_sampling_r();
3371 #endif // GC_CACHE_ADAPT
3372   int numchanged = 0;
3373   // check the statistic data
3374   // for each page, decide the new cache strategy
3375 #ifdef GC_CACHE_ADAPT_POLICY1
3376   numchanged = cacheAdapt_policy_h4h();
3377 #elif defined GC_CACHE_ADAPT_POLICY2
3378   numchanged = cacheAdapt_policy_local();
3379 #elif defined GC_CACHE_ADAPT_POLICY3
3380   numchanged = cacheAdapt_policy_hotest();
3381 #elif defined GC_CACHE_ADAPT_POLICY4
3382   numchanged = cacheAdapt_policy_dominate();
3383 #elif defined GC_CACHE_ADAPT_POLICY5
3384   numchanged = cacheAdapt_policy_overload();
3385 #elif defined GC_CACHE_ADAPT_POLICY6
3386   numchanged = cacheAdapt_policy_crowd();
3387 #endif
3388   *gccachepolicytbl = numchanged;
3389   // TODO
3390   //if(numchanged > 0) tprintf("=================\n");
3391 }
3392
3393 // adapt the cache strategy for the mutator
3394 void cacheAdapt_mutator() {
3395   int numchanged = *gccachepolicytbl;
3396   // check the changes and adapt them
3397   int * tmp_p = gccachepolicytbl+1;
3398   while(numchanged--) {
3399         // read out the policy
3400         int page_index = *tmp_p;
3401         bamboo_cache_policy_t policy = (bamboo_cache_policy_t)(*(tmp_p+1));
3402         // TODO
3403         /*if(BAMBOO_NUM_OF_CORE == 0) {
3404           tprintf("va: %x, policy: %d (%d,%d) \n",
3405                   (int)(page_index*(BAMBOO_PAGE_SIZE)+gcbaseva), policy.cache_mode,
3406                   policy.lotar_x, policy.lotar_y);
3407         }*/
3408         // adapt the policy
3409         bamboo_adapt_cache_policy(page_index*(BAMBOO_PAGE_SIZE)+gcbaseva,
3410                 policy, BAMBOO_PAGE_SIZE);
3411
3412         tmp_p += 2;
3413   }
3414   //if(BAMBOO_NUM_OF_CORE == 0) tprintf("=================\n"); // TODO
3415 }
3416
3417 void gc_output_cache_sampling() {
3418   unsigned int page_index = 0;
3419   VA page_sva = 0;
3420   unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
3421   for(page_index = 0; page_index < page_num; page_index++) {
3422         page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
3423         int block = 0;
3424         BLOCKINDEX(page_sva, &block);
3425         int coren = gc_block2core[block%(NUMCORES4GC*2)];
3426         tprintf("va: %x page_index: %d host: %d\n",
3427                 (int)page_sva, page_index, coren);
3428         for(int i = 0; i < NUMCORESACTIVE; i++) {
3429           int * local_tbl = (int *)((void *)gccachesamplingtbl
3430                   +size_cachesamplingtbl_local*i);
3431           int freq = local_tbl[page_index]/BAMBOO_PAGE_SIZE;
3432           printf("%8d ",freq);
3433         }
3434         printf("\n");
3435   }
3436   printf("=================\n");
3437 } // gc_output_cache_sampling
3438
3439 void gc_output_cache_sampling_r() {
3440   unsigned int page_index = 0;
3441   VA page_sva = 0;
3442   unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
3443   for(page_index = 0; page_index < page_num; page_index++) {
3444         page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
3445         int block = 0;
3446         BLOCKINDEX(page_sva, &block);
3447         int coren = gc_block2core[block%(NUMCORES4GC*2)];
3448         tprintf("va: %x page_index: %d host: %d\n",
3449                 (int)page_sva, page_index, coren);
3450         for(int i = 0; i < NUMCORESACTIVE; i++) {
3451           int * local_tbl = (int *)((void *)gccachesamplingtbl_r
3452                   +size_cachesamplingtbl_local_r*i);
3453           int freq = local_tbl[page_index]/BAMBOO_PAGE_SIZE;
3454           printf("%8d ",freq);
3455         }
3456         printf("\n");
3457   }
3458   printf("=================\n");
3459 } // gc_output_cache_sampling
3460 #endif // GC_CACHE_ADAPT
3461
3462 inline void gc_collect(struct garbagelist * stackptr) {
3463   // inform the master that this core is at a gc safe point and is ready to
3464   // do gc
3465   send_msg_4(STARTUPCORE, GCFINISHPRE, BAMBOO_NUM_OF_CORE, self_numsendobjs,
3466           self_numreceiveobjs, false);
3467
3468   // core collector routine
3469   while(true) {
3470     if(INITPHASE == gcphase) {
3471       break;
3472     }
3473   }
3474 #ifdef RAWPATH // TODO GC_DEBUG
3475   printf("(%X,%X) Do initGC\n", udn_tile_coord_x(), udn_tile_coord_y());
3476 #endif
3477   initGC();
3478 #ifdef GC_CACHE_ADAPT
3479   // prepare for cache adaption:
3480   cacheAdapt_gc(true);
3481 #endif // GC_CACHE_ADAPT
3482   //send init finish msg to core coordinator
3483   send_msg_2(STARTUPCORE, GCFINISHINIT, BAMBOO_NUM_OF_CORE, false);
3484
3485   while(true) {
3486     if(MARKPHASE == gcphase) {
3487       break;
3488     }
3489   }
3490 #ifdef RAWPATH // TODO GC_DEBUG
3491   printf("(%x,%x) Start mark phase\n", udn_tile_coord_x(),
3492              udn_tile_coord_y());
3493 #endif
3494   mark(true, stackptr);
3495 #ifdef RAWPATH // TODO GC_DEBUG
3496   printf("(%x,%x) Finish mark phase, start compact phase\n",
3497              udn_tile_coord_x(), udn_tile_coord_y());
3498 #endif
3499   compact();
3500 #ifdef RAWPATH // TODO GC_DEBUG
3501   printf("(%x,%x) Finish compact phase\n", udn_tile_coord_x(),
3502              udn_tile_coord_y());
3503 #endif
3504
3505   while(true) {
3506         if(MAPPHASE == gcphase) {
3507           break;
3508         }
3509   }
3510 #ifdef RAWPATH // TODO GC_DEBUG
3511   printf("(%x,%x) Start map phase\n", udn_tile_coord_x(),
3512              udn_tile_coord_y());
3513 #endif
3514   transmappinginfo();
3515 #ifdef RAWPATH // TODO GC_DEBUG
3516   printf("(%x,%x) Finish map phase\n", udn_tile_coord_x(),
3517              udn_tile_coord_y());
3518 #endif
3519
3520   while(true) {
3521     if(FLUSHPHASE == gcphase) {
3522       break;
3523     }
3524   }
3525 #ifdef RAWPATH // TODO GC_DEBUG
3526   printf("(%x,%x) Start flush phase\n", udn_tile_coord_x(),
3527              udn_tile_coord_y());
3528 #endif
3529 #ifdef GC_PROFILE
3530   // send the num of obj/liveobj/forwardobj to the startupcore
3531   if(STARTUPCORE != BAMBOO_NUM_OF_CORE) {
3532         send_msg_4(STARTUPCORE, GCPROFILES, gc_num_obj,
3533                 gc_num_liveobj, gc_num_forwardobj, false);
3534   }
3535   gc_num_obj = 0;
3536 #endif // GC_PROFLIE
3537   flush(stackptr);
3538 #ifdef RAWPATH // TODO GC_DEBUG
3539   printf("(%x,%x) Finish flush phase\n", udn_tile_coord_x(),
3540              udn_tile_coord_y());
3541 #endif
3542
3543 #ifdef GC_CACHE_ADAPT
3544   while(true) {
3545     if(PREFINISHPHASE == gcphase) {
3546       break;
3547     }
3548   }
3549 #ifdef RAWPATH // TODO GC_DEBUG
3550   printf("(%x,%x) Start prefinish phase\n", udn_tile_coord_x(),
3551              udn_tile_coord_y());
3552 #endif
3553   // cache adapt phase
3554   cacheAdapt_mutator();
3555   cacheAdapt_gc(false);
3556   //send init finish msg to core coordinator
3557   send_msg_2(STARTUPCORE, GCFINISHPREF, BAMBOO_NUM_OF_CORE, false);
3558 #ifdef RAWPATH // TODO GC_DEBUG
3559   printf("(%x,%x) Finish prefinish phase\n", udn_tile_coord_x(),
3560              udn_tile_coord_y());
3561 #endif
3562 #endif // GC_CACHE_ADAPT
3563
3564   while(true) {
3565     if(FINISHPHASE == gcphase) {
3566       break;
3567     }
3568   }
3569 #ifdef RAWPATH // TODO GC_DEBUG
3570   printf("(%x,%x) Finish gc!\n", udn_tile_coord_x(), udn_tile_coord_y());
3571 #endif
3572 } // void gc_collect(struct garbagelist * stackptr)
3573
3574 inline void gc_nocollect(struct garbagelist * stackptr) {
3575   // inform the master that this core is at a gc safe point and is ready to
3576   // do gc
3577   send_msg_4(STARTUPCORE, GCFINISHPRE, BAMBOO_NUM_OF_CORE, self_numsendobjs,
3578           self_numreceiveobjs, false);
3579
3580   while(true) {
3581     if(INITPHASE == gcphase) {
3582       break;
3583     }
3584   }
3585 #ifdef RAWPATH // TODO GC_DEBUG
3586   printf("(%x,%x) Do initGC\n", udn_tile_coord_x(), udn_tile_coord_y());
3587 #endif
3588   initGC();
3589 #ifdef GC_CACHE_ADAPT
3590   // prepare for cache adaption:
3591   cacheAdapt_gc(true);
3592 #endif // GC_CACHE_ADAPT
3593   //send init finish msg to core coordinator
3594   send_msg_2(STARTUPCORE, GCFINISHINIT, BAMBOO_NUM_OF_CORE, false);
3595
3596   while(true) {
3597     if(MARKPHASE == gcphase) {
3598       break;
3599     }
3600   }
3601 #ifdef RAWPATH // TODO GC_DEBUG
3602   printf("(%x,%x) Start mark phase\n", udn_tile_coord_x(),
3603              udn_tile_coord_y());
3604 #endif
3605   mark(true, stackptr);
3606 #ifdef RAWPATH // TODO GC_DEBUG
3607   printf("(%x,%x) Finish mark phase, wait for flush\n",
3608              udn_tile_coord_x(), udn_tile_coord_y());
3609 #endif
3610
3611   // non-gc core collector routine
3612   while(true) {
3613     if(FLUSHPHASE == gcphase) {
3614       break;
3615     }
3616   }
3617 #ifdef RAWPATH // TODO GC_DEBUG
3618   printf("(%x,%x) Start flush phase\n", udn_tile_coord_x(),
3619              udn_tile_coord_y());
3620 #endif
3621 #ifdef GC_PROFILE
3622   if(STARTUPCORE != BAMBOO_NUM_OF_CORE) {
3623         send_msg_4(STARTUPCORE, GCPROFILES, gc_num_obj,
3624                 gc_num_liveobj, gc_num_forwardobj, false);
3625   }
3626   gc_num_obj = 0;
3627 #endif // GC_PROFLIE
3628   flush(stackptr);
3629 #ifdef RAWPATH // TODO GC_DEBUG
3630   printf("(%x,%x) Finish flush phase\n", udn_tile_coord_x(),
3631              udn_tile_coord_y());
3632 #endif
3633
3634 #ifdef GC_CACHE_ADAPT
3635   while(true) {
3636     if(PREFINISHPHASE == gcphase) {
3637       break;
3638     }
3639   }
3640 #ifdef RAWPATH // TODO GC_DEBUG
3641   printf("(%x,%x) Start prefinish phase\n", udn_tile_coord_x(),
3642              udn_tile_coord_y());
3643 #endif
3644   // cache adapt phase
3645   cacheAdapt_mutator();
3646   cacheAdapt_gc(false);
3647   //send init finish msg to core coordinator
3648   send_msg_2(STARTUPCORE, GCFINISHPREF, BAMBOO_NUM_OF_CORE, false);
3649 #ifdef RAWPATH // TODO GC_DEBUG
3650   printf("(%x,%x) Finish prefinish phase\n", udn_tile_coord_x(),
3651              udn_tile_coord_y());
3652 #endif
3653 #endif // GC_CACHE_ADAPT
3654
3655   while(true) {
3656     if(FINISHPHASE == gcphase) {
3657       break;
3658     }
3659   }
3660 #ifdef RAWPATH // TODO GC_DEBUG
3661   printf("(%x,%x) Finish gc!\n", udn_tile_coord_x(), udn_tile_coord_y());
3662 #endif
3663 } // void gc_collect(struct garbagelist * stackptr)
3664
3665 inline void gc_master(struct garbagelist * stackptr) {
3666
3667   gcphase = INITPHASE;
3668   int i = 0;
3669   waitconfirm = false;
3670   numconfirm = 0;
3671   initGC();
3672
3673   // Note: all cores need to init gc including non-gc cores
3674   for(i = 1; i < NUMCORESACTIVE /*NUMCORES4GC*/; i++) {
3675         // send GC init messages to all cores
3676         send_msg_1(i, GCSTARTINIT, false);
3677   }
3678   bool isfirst = true;
3679   bool allStall = false;
3680
3681 #ifdef GC_CACHE_ADAPT
3682   // prepare for cache adaption:
3683   cacheAdapt_gc(true);
3684 #endif // GC_CACHE_ADAPT
3685
3686 #ifdef RAWPATH // TODO GC_DEBUG
3687   printf("(%x,%x) Check core status \n", udn_tile_coord_x(),
3688                  udn_tile_coord_y());
3689 #endif
3690
3691   gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
3692   while(true) {
3693         BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
3694         if(gc_checkAllCoreStatus_I()) {
3695           BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3696           break;
3697         }
3698         BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3699   }
3700 #ifdef GC_PROFILE
3701   gc_profileItem();
3702 #endif
3703 #ifdef GC_CACHE_ADAPT
3704   //gc_output_cache_sampling();
3705 #endif // GC_CACHE_ADAPT
3706 #ifdef RAWPATH // TODO GC_DEBUG
3707   printf("(%x,%x) Start mark phase \n", udn_tile_coord_x(),
3708                  udn_tile_coord_y());
3709 #endif
3710   // all cores have finished compacting
3711   // restore the gcstatus of all cores
3712   // Note: all cores have to do mark including non-gc cores
3713   gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
3714   for(i = 1; i < NUMCORESACTIVE /*NUMCORES4GC*/; ++i) {
3715         gccorestatus[i] = 1;
3716         // send GC start messages to all cores
3717         send_msg_1(i, GCSTART, false);
3718   }
3719
3720   gcphase = MARKPHASE;
3721   // mark phase
3722   while(MARKPHASE == gcphase) {
3723         mark(isfirst, stackptr);
3724         if(isfirst) {
3725           isfirst = false;
3726         }
3727
3728         // check gcstatus
3729         checkMarkStatue();
3730   }   // while(MARKPHASE == gcphase)
3731   // send msgs to all cores requiring large objs info
3732   // Note: only need to ask gc cores, non-gc cores do not host any objs
3733   numconfirm = NUMCORES4GC - 1;
3734   for(i = 1; i < NUMCORES4GC; ++i) {
3735         send_msg_1(i, GCLOBJREQUEST, false);
3736   }
3737   gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
3738   while(true) {
3739         if(numconfirm==0) {
3740           break;
3741         }
3742   }   // wait for responses
3743   // check the heaptop
3744   if(gcheaptop < gcmarkedptrbound) {
3745         gcheaptop = gcmarkedptrbound;
3746   }
3747 #ifdef GC_PROFILE
3748   gc_profileItem();
3749 #endif
3750 #ifdef RAWPATH // TODO GC_DEBUG
3751   printf("(%x,%x) prepare to cache large objs \n", udn_tile_coord_x(),
3752                  udn_tile_coord_y());
3753   //dumpSMem();
3754 #endif
3755   // cache all large objs
3756   if(!cacheLObjs()) {
3757         // no enough space to cache large objs
3758         BAMBOO_EXIT(0xb107);
3759   }
3760   // predict number of blocks to fill for each core
3761   int tmpheaptop = 0;
3762   int numpbc = loadbalance(&tmpheaptop);
3763   // TODO
3764   numpbc = (BAMBOO_SHARED_MEM_SIZE)/(BAMBOO_SMEM_SIZE);
3765 #ifdef RAWPATH // TODO GC_DEBUG
3766   printf("(%x,%x) mark phase finished \n", udn_tile_coord_x(),
3767                  udn_tile_coord_y());
3768   //dumpSMem();
3769 #endif
3770   //int tmptopptr = 0;
3771   //BASEPTR(gctopcore, 0, &tmptopptr);
3772   // TODO
3773   //tmptopptr = gcbaseva + (BAMBOO_SHARED_MEM_SIZE);
3774   tmpheaptop = gcbaseva + (BAMBOO_SHARED_MEM_SIZE);
3775 #ifdef DEBUG
3776   BAMBOO_DEBUGPRINT(0xabab);
3777   BAMBOO_DEBUGPRINT_REG(tmptopptr);
3778 #endif
3779   for(i = 0; i < NUMCORES4GC; ++i) {
3780         int tmpcoreptr = 0;
3781         BASEPTR(i, numpbc, &tmpcoreptr);
3782         //send start compact messages to all cores
3783         //TODO bug here, do not know if the direction is positive or negtive?
3784         if (tmpcoreptr < tmpheaptop /*tmptopptr*/) {
3785           gcstopblock[i] = numpbc + 1;
3786           if(i != STARTUPCORE) {
3787                 send_msg_2(i, GCSTARTCOMPACT, numpbc+1, false);
3788           } else {
3789                 gcblock2fill = numpbc+1;
3790           }                         // if(i != STARTUPCORE)
3791         } else {
3792           gcstopblock[i] = numpbc;
3793           if(i != STARTUPCORE) {
3794                 send_msg_2(i, GCSTARTCOMPACT, numpbc, false);
3795           } else {
3796                 gcblock2fill = numpbc;
3797           }  // if(i != STARTUPCORE)
3798         }
3799 #ifdef DEBUG
3800         BAMBOO_DEBUGPRINT(0xf000+i);
3801         BAMBOO_DEBUGPRINT_REG(tmpcoreptr);
3802         BAMBOO_DEBUGPRINT_REG(gcstopblock[i]);
3803 #endif
3804         // init some data strutures for compact phase
3805         gcloads[i] = 0;
3806         gcfilledblocks[i] = 0;
3807         gcrequiredmems[i] = 0;
3808   }
3809
3810   BAMBOO_CACHE_MF();
3811
3812 #ifdef GC_PROFILE
3813   gc_profileItem();
3814 #endif
3815
3816   // compact phase
3817   bool finalcompact = false;
3818   // initialize pointers for comapcting
3819   struct moveHelper * orig =
3820         (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
3821   struct moveHelper * to =
3822         (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
3823   initOrig_Dst(orig, to);
3824   int filledblocks = 0;
3825   INTPTR heaptopptr = 0;
3826   bool finishcompact = false;
3827   bool iscontinue = true;
3828   bool localcompact = true;
3829   while((COMPACTPHASE == gcphase) || (SUBTLECOMPACTPHASE == gcphase)) {
3830         if((!finishcompact) && iscontinue) {
3831 #ifdef DEBUG
3832           BAMBOO_DEBUGPRINT(0xe001);
3833           BAMBOO_DEBUGPRINT_REG(numpbc);
3834           BAMBOO_DEBUGPRINT_REG(gcblock2fill);
3835 #endif
3836           finishcompact = compacthelper(orig, to, &filledblocks,
3837                                                                         &heaptopptr, &localcompact);
3838 #ifdef DEBUG
3839           BAMBOO_DEBUGPRINT(0xe002);
3840           BAMBOO_DEBUGPRINT_REG(finishcompact);
3841           BAMBOO_DEBUGPRINT_REG(gctomove);
3842           BAMBOO_DEBUGPRINT_REG(gcrequiredmems[0]);
3843           BAMBOO_DEBUGPRINT_REG(gcfilledblocks[0]);
3844           BAMBOO_DEBUGPRINT_REG(gcstopblock[0]);
3845 #endif
3846         }
3847
3848         BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
3849         if(gc_checkCoreStatus_I()) {
3850           // all cores have finished compacting
3851           // restore the gcstatus of all cores
3852           for(i = 0; i < NUMCORES4GC; ++i) {
3853                 gccorestatus[i] = 1;
3854           }
3855           BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3856           break;
3857         } else {
3858           BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3859           // check if there are spare mem for pending move requires
3860           if(COMPACTPHASE == gcphase) {
3861 #ifdef DEBUG
3862                 BAMBOO_DEBUGPRINT(0xe003);
3863 #endif
3864                 resolvePendingMoveRequest();
3865 #ifdef DEBUG
3866                 BAMBOO_DEBUGPRINT_REG(gctomove);
3867 #endif
3868           } else {
3869 #ifdef DEBUG
3870                 BAMBOO_DEBUGPRINT(0xe004);
3871 #endif
3872                 compact2Heaptop();
3873           }
3874         }   // if(gc_checkCoreStatus_I()) else ...
3875
3876         if(gctomove) {
3877 #ifdef DEBUG
3878           BAMBOO_DEBUGPRINT(0xe005);
3879           BAMBOO_DEBUGPRINT_REG(gcmovestartaddr);
3880           BAMBOO_DEBUGPRINT_REG(gcblock2fill);
3881           BAMBOO_DEBUGPRINT_REG(gctomove);
3882 #endif
3883           to->ptr = gcmovestartaddr;
3884           to->numblocks = gcblock2fill - 1;
3885           to->bound = (to->numblocks==0) ?
3886                                   BAMBOO_SMEM_SIZE_L :
3887                                   BAMBOO_SMEM_SIZE_L+BAMBOO_SMEM_SIZE*to->numblocks;
3888           BASEPTR(gcdstcore, to->numblocks, &(to->base));
3889           to->offset = to->ptr - to->base;
3890           to->top = (to->numblocks==0) ?
3891                                 (to->offset) : (to->bound-BAMBOO_SMEM_SIZE+to->offset);
3892           to->base = to->ptr;
3893           to->offset = BAMBOO_CACHE_LINE_SIZE;
3894           to->ptr += to->offset;                         // for header
3895           to->top += to->offset;
3896           if(gcdstcore == BAMBOO_NUM_OF_CORE) {
3897                 localcompact = true;
3898           } else {
3899                 localcompact = false;
3900           }
3901           gctomove = false;
3902           iscontinue = true;
3903         } else if(!finishcompact) {
3904           // still pending
3905           iscontinue = false;
3906         }  // if(gctomove)
3907   }  // while(COMPACTPHASE == gcphase)
3908 #ifdef GC_PROFILE
3909   gc_profileItem();
3910 #endif
3911 #ifdef RAWPATH // TODO GC_DEBUG
3912   printf("(%x,%x) prepare to move large objs \n", udn_tile_coord_x(),
3913                  udn_tile_coord_y());
3914   //dumpSMem();
3915 #endif
3916   // move largeObjs
3917   moveLObjs();
3918 #ifdef RAWPATH // TODO GC_DEBUG
3919   printf("(%x,%x) compact phase finished \n", udn_tile_coord_x(),
3920                  udn_tile_coord_y());
3921   //dumpSMem();
3922 #endif
3923   RUNFREE(orig);
3924   RUNFREE(to);
3925   orig = to = NULL;
3926
3927   gcphase = MAPPHASE;
3928   gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
3929   // Note: all cores should flush their runtime data including non-gc
3930   //       cores
3931   for(i = 1; i < NUMCORES4GC; ++i) {
3932         // send start flush messages to all cores
3933         gccorestatus[i] = 1;
3934         send_msg_1(i, GCSTARTMAPINFO, false);
3935   }
3936 #ifdef GC_PROFILE
3937   gc_profileItem();
3938 #endif
3939 #ifdef RAWPATH // TODO GC_DEBUG
3940   printf("(%x,%x) Start map phase \n", udn_tile_coord_x(),
3941                  udn_tile_coord_y());
3942 #endif
3943   // mapinto phase
3944   transmappinginfo();
3945 #ifdef RAWPATH // TODO GC_DEBUG
3946   printf("(%x,%x) Finish map phase \n", udn_tile_coord_x(),
3947                  udn_tile_coord_y());
3948 #endif
3949   gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
3950   while(MAPPHASE == gcphase) {
3951         // check the status of all cores
3952         BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
3953         if(gc_checkCoreStatus_I()) {
3954           // all cores have finished sending mapping info
3955           BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3956           break;
3957         }
3958         BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3959   }  // while(MAPPHASE == gcphase)
3960
3961   gcphase = FLUSHPHASE;
3962   gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
3963   // Note: all cores should flush their runtime data including non-gc
3964   //       cores
3965   for(i = 1; i < NUMCORESACTIVE /*NUMCORES4GC*/; ++i) {
3966         // send start flush messages to all cores
3967         gccorestatus[i] = 1;
3968         send_msg_1(i, GCSTARTFLUSH, false);
3969   }
3970 #ifdef GC_PROFILE
3971   gc_profileItem();
3972 #endif
3973 #ifdef RAWPATH // TODO GC_DEBUG
3974   printf("(%x,%x) Start flush phase \n", udn_tile_coord_x(),
3975                  udn_tile_coord_y());
3976 #endif
3977   // flush phase
3978   flush(stackptr);
3979 #ifdef GC_CACHE_ADAPT
3980   // now the master core need to decide the new cache strategy
3981   cacheAdapt_master();
3982 #endif // GC_CACHE_ADAPT
3983
3984   gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
3985   while(FLUSHPHASE == gcphase) {
3986         // check the status of all cores
3987         BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
3988         if(gc_checkAllCoreStatus_I()) {
3989           BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3990           break;
3991         }
3992         BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
3993   }  // while(FLUSHPHASE == gcphase)
3994 #ifdef RAWPATH // TODO GC_DEBUG
3995   printf("(%x,%x) Finish flush phase \n", udn_tile_coord_x(),
3996                  udn_tile_coord_y());
3997 #endif
3998
3999 #ifdef GC_CACHE_ADAPT
4000 #ifdef GC_PROFILE
4001   gc_profileItem();
4002 #endif
4003   gcphase = PREFINISHPHASE;
4004   gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
4005   // Note: all cores should flush their runtime data including non-gc
4006   //       cores
4007   for(i = 1; i < NUMCORESACTIVE /*NUMCORES4GC*/; ++i) {
4008         // send start flush messages to all cores
4009         gccorestatus[i] = 1;
4010         send_msg_1(i, GCSTARTPREF, false);
4011   }
4012 #ifdef RAWPATH // TODO GC_DEBUG
4013   printf("(%x,%x) Start prefinish phase \n", udn_tile_coord_x(),
4014                  udn_tile_coord_y());
4015 #endif
4016   // cache adapt phase
4017   cacheAdapt_mutator();
4018 #ifdef GC_CACHE_ADAPT_OUTPUT
4019   bamboo_output_cache_policy();
4020 #endif
4021   cacheAdapt_gc(false);
4022   gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
4023   while(PREFINISHPHASE == gcphase) {
4024         // check the status of all cores
4025         BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
4026         if(gc_checkAllCoreStatus_I()) {
4027           BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
4028           break;
4029         }
4030         BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
4031   }  // while(PREFINISHPHASE == gcphase)
4032 #endif // GC_CACHE_ADAPT
4033
4034   gcphase = FINISHPHASE;
4035
4036   // invalidate all shared mem pointers
4037   // put it here as it takes time to inform all the other cores to
4038   // finish gc and it might cause problem when some core resumes
4039   // mutator earlier than the other cores
4040   bamboo_cur_msp = NULL;
4041   bamboo_smem_size = 0;
4042   bamboo_smem_zero_top = NULL;
4043   gcflag = false;
4044   gcprocessing = false;
4045
4046 #ifdef GC_PROFILE
4047   gc_profileEnd();
4048 #endif
4049   gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
4050   for(i = 1; i < NUMCORESACTIVE /*NUMCORES4GC*/; ++i) {
4051         // send gc finish messages to all cores
4052         send_msg_1(i, GCFINISH, false);
4053         gccorestatus[i] = 1;
4054   }
4055 #ifdef RAWPATH // TODO GC_DEBUG
4056   printf("(%x,%x) gc finished \n", udn_tile_coord_x(),
4057                  udn_tile_coord_y());
4058   //dumpSMem();
4059 #endif
4060 } // void gc_master(struct garbagelist * stackptr)
4061
4062 inline bool gc(struct garbagelist * stackptr) {
4063   // check if do gc
4064   if(!gcflag) {
4065     gcprocessing = false;
4066     return false;
4067   }
4068
4069   // core coordinator routine
4070   if(0 == BAMBOO_NUM_OF_CORE) {
4071 #ifdef GC_DEBUG
4072     printf("(%x,%X) Check if can do gc or not\n", udn_tile_coord_x(),
4073                    udn_tile_coord_y());
4074 #endif
4075         bool isallstall = true;
4076         gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
4077         BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
4078         int ti = 0;
4079         for(ti = 0; ti < NUMCORESACTIVE; ++ti) {
4080           if(gccorestatus[ti] != 0) {
4081                 isallstall = false;
4082                 break;
4083           }
4084         }
4085         if(!isallstall) {
4086           BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
4087           // some of the cores are still executing the mutator and did not reach
4088           // some gc safe point, therefore it is not ready to do gc
4089           // in case that there are some pregc information msg lost, send a confirm
4090           // msg to the 'busy' core
4091           send_msg_1(ti, GCSTARTPRE, false);
4092           gcflag = true;
4093           return false;
4094         } else {
4095 #ifdef GC_PROFILE
4096     gc_profileStart();
4097 #endif
4098 pregccheck:
4099           //BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
4100           gcnumsendobjs[0][BAMBOO_NUM_OF_CORE] = self_numsendobjs;
4101           gcnumreceiveobjs[0][BAMBOO_NUM_OF_CORE] = self_numreceiveobjs;
4102           int sumsendobj = 0;
4103 #ifdef DEBUG
4104           BAMBOO_DEBUGPRINT(0xec04);
4105 #endif
4106           for(int i = 0; i < NUMCORESACTIVE; ++i) {
4107                 sumsendobj += gcnumsendobjs[0][i];
4108 #ifdef DEBUG
4109                 BAMBOO_DEBUGPRINT(0xf000 + gcnumsendobjs[0][i]);
4110 #endif
4111           }  // for(i = 1; i < NUMCORESACTIVE; ++i)
4112 #ifdef DEBUG
4113           BAMBOO_DEBUGPRINT(0xec05);
4114           BAMBOO_DEBUGPRINT_REG(sumsendobj);
4115 #endif
4116           for(int i = 0; i < NUMCORESACTIVE; ++i) {
4117                 sumsendobj -= gcnumreceiveobjs[0][i];
4118 #ifdef DEBUG
4119                 BAMBOO_DEBUGPRINT(0xf000 + gcnumreceiveobjs[i]);
4120 #endif
4121           }  // for(i = 1; i < NUMCORESACTIVE; ++i)
4122 #ifdef DEBUG
4123           BAMBOO_DEBUGPRINT(0xec06);
4124           BAMBOO_DEBUGPRINT_REG(sumsendobj);
4125 #endif
4126           if(0 != sumsendobj) {
4127                 // there were still some msgs on the fly, wait until there
4128                 // are some update pregc information coming and check it again
4129                 gcprecheck = false;
4130                 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
4131                 while(true) {
4132                   if(gcprecheck) {
4133                         break;
4134                   }
4135                 }
4136                 goto pregccheck;
4137           } else {
4138                 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
4139           }
4140         }
4141 #ifdef RAWPATH // TODO GC_DEBUG
4142     printf("(%x,%x) start gc! \n", udn_tile_coord_x(), udn_tile_coord_y());
4143     //dumpSMem();
4144 #endif
4145         // Zero out the remaining bamboo_cur_msp
4146         // Only zero out the first 4 bytes of the remaining memory
4147         // Move the operation here because for the GC_CACHE_ADAPT version,
4148         // we need to make sure during the gcinit phase the shared heap is not
4149         // touched. Otherwise, there would be problem when adapt the cache
4150         // strategy.
4151         if((bamboo_cur_msp != 0)
4152                 && (bamboo_smem_zero_top == bamboo_cur_msp)
4153                 && (bamboo_smem_size > 0)) {
4154           *((int *)bamboo_cur_msp) = 0;
4155         }
4156 #ifdef GC_FLUSH_DTLB
4157         if(gc_num_flush_dtlb < GC_NUM_FLUSH_DTLB) {
4158           BAMBOO_CLEAN_DTLB();
4159           gc_num_flush_dtlb++;
4160         }
4161 #endif
4162 #ifdef GC_CACHE_ADAPT
4163 #ifdef GC_CACHE_SAMPLING
4164     // disable the timer interrupt
4165     bamboo_mask_timer_intr();
4166     // get the sampling data
4167     bamboo_output_dtlb_sampling();
4168 #endif // GC_CACHE_SAMPLING
4169 #endif // GC_CACHE_ADAPT
4170         gcprocessing = true;
4171         gc_master(stackptr);
4172   } else if(BAMBOO_NUM_OF_CORE < NUMCORES4GC) {
4173         // Zero out the remaining bamboo_cur_msp
4174         // Only zero out the first 4 bytes of the remaining memory
4175         // Move the operation here because for the GC_CACHE_ADAPT version,
4176         // we need to make sure during the gcinit phase the shared heap is not
4177         // touched. Otherwise, there would be problem when adapt the cache
4178         // strategy.
4179         if((bamboo_cur_msp != 0)
4180                 && (bamboo_smem_zero_top == bamboo_cur_msp)
4181                 && (bamboo_smem_size > 0)) {
4182           *((int *)bamboo_cur_msp) = 0;
4183         }
4184 #ifdef GC_FLUSH_DTLB
4185         if(gc_num_flush_dtlb < GC_NUM_FLUSH_DTLB) {
4186           BAMBOO_CLEAN_DTLB();
4187           gc_num_flush_dtlb++;
4188         }
4189 #endif
4190 #ifdef GC_CACHE_ADAPT
4191 #ifdef GC_CACHE_SAMPLING
4192         // disable the timer interrupt
4193         bamboo_mask_timer_intr();
4194         if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
4195           // get the sampling data
4196           bamboo_output_dtlb_sampling();
4197         }
4198 #endif // GC_CACHE_SAMPLING
4199 #endif // GC_CACHE_ADAPT
4200     gcprocessing = true;
4201     gc_collect(stackptr);
4202
4203     // invalidate all shared mem pointers
4204     bamboo_cur_msp = NULL;
4205     bamboo_smem_size = 0;
4206         bamboo_smem_zero_top = NULL;
4207     gcflag = false;
4208     gcprocessing = false;
4209   } else {
4210         // Zero out the remaining bamboo_cur_msp
4211         // Only zero out the first 4 bytes of the remaining memory
4212         // Move the operation here because for the GC_CACHE_ADAPT version,
4213         // we need to make sure during the gcinit phase the shared heap is not
4214         // touched. Otherwise, there would be problem when adapt the cache
4215         // strategy.
4216         if((bamboo_cur_msp != 0)
4217                 && (bamboo_smem_zero_top == bamboo_cur_msp)
4218                 && (bamboo_smem_size > 0)) {
4219           *((int *)bamboo_cur_msp) = 0;
4220         }
4221 #ifdef GC_FLUSH_DTLB
4222         if(gc_num_flush_dtlb < GC_NUM_FLUSH_DTLB) {
4223           BAMBOO_CLEAN_DTLB();
4224           gc_num_flush_dtlb++;
4225         }
4226 #endif
4227 #ifdef GC_CACHE_ADAPT
4228 #ifdef GC_CACHE_SAMPLING
4229         // disable the timer interrupt
4230         bamboo_mask_timer_intr();
4231         if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
4232           // get the sampling data
4233           bamboo_output_dtlb_sampling();
4234         }
4235 #endif // GC_CACHE_SAMPLING
4236 #endif // GC_CACHE_ADAPT
4237     // not a gc core, should wait for gcfinish msg
4238     gcprocessing = true;
4239     gc_nocollect(stackptr);
4240
4241     // invalidate all shared mem pointers
4242     bamboo_cur_msp = NULL;
4243     bamboo_smem_size = 0;
4244     bamboo_smem_zero_top = NULL;
4245         gcflag = false;
4246     gcprocessing = false;
4247   }
4248 #ifdef GC_CACHE_ADAPT
4249 #ifdef GC_CACHE_SAMPLING
4250   // reset the sampling arrays
4251   bamboo_dtlb_sampling_reset();
4252 #endif // GC_CACHE_SAMPLING
4253   if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
4254         // zero out the gccachesamplingtbl
4255         BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
4256         BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,
4257                 size_cachesamplingtbl_local_r);
4258         if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
4259           BAMBOO_MEMSET_WH(gccachepolicytbl,0,size_cachepolicytbl);
4260         }
4261   }
4262 #ifdef GC_CACHE_SAMPLING
4263   // enable the timer interrupt
4264   bamboo_tile_timer_set_next_event(GC_TILE_TIMER_EVENT_SETTING);
4265   bamboo_unmask_timer_intr();
4266 #endif // GC_CACHE_SAMPLING
4267 #endif // GC_CACHE_ADAPT
4268   return true;
4269 } // void gc(struct garbagelist * stackptr)
4270
4271 #ifdef GC_PROFILE
4272 inline void gc_profileStart(void) {
4273   if(!gc_infoOverflow) {
4274     GCInfo* gcInfo = RUNMALLOC(sizeof(struct gc_info));
4275     gc_infoArray[gc_infoIndex] = gcInfo;
4276     gcInfo->index = 1;
4277     gcInfo->time[0] = BAMBOO_GET_EXE_TIME();
4278   }
4279 }
4280
4281 inline void gc_profileItem(void) {
4282   if(!gc_infoOverflow) {
4283     GCInfo* gcInfo = gc_infoArray[gc_infoIndex];
4284     gcInfo->time[gcInfo->index++] = BAMBOO_GET_EXE_TIME();
4285   }
4286 }
4287
4288 inline void gc_profileEnd(void) {
4289   if(!gc_infoOverflow) {
4290     GCInfo* gcInfo = gc_infoArray[gc_infoIndex];
4291     gcInfo->time[gcInfo->index++] = BAMBOO_GET_EXE_TIME();
4292         gcInfo->time[gcInfo->index++] = gc_num_livespace;
4293         gcInfo->time[gcInfo->index++] = gc_num_freespace;
4294         gcInfo->time[gcInfo->index++] = gc_num_lobj;
4295         gcInfo->time[gcInfo->index++] = gc_num_lobjspace;
4296         gcInfo->time[gcInfo->index++] = gc_num_obj;
4297         gcInfo->time[gcInfo->index++] = gc_num_liveobj;
4298         gcInfo->time[gcInfo->index++] = gc_num_forwardobj;
4299     gc_infoIndex++;
4300     if(gc_infoIndex == GCINFOLENGTH) {
4301       gc_infoOverflow = true;
4302       //taskInfoIndex = 0;
4303     }
4304   }
4305 }
4306
4307 // output the profiling data
4308 void gc_outputProfileData() {
4309 /*#ifdef USEIO
4310   int i,j;
4311   unsigned long long totalgc = 0;
4312
4313   //printf("Start Time, End Time, Duration\n");
4314   // output task related info
4315   for(i = 0; i < gc_infoIndex; i++) {
4316     GCInfo * gcInfo = gc_infoArray[i];
4317     unsigned long long tmp = 0;
4318     for(j = 0; j < gcInfo->index; j++) {
4319       printf("%lld(%lld), ", gcInfo->time[j], (gcInfo->time[j]-tmp));
4320       tmp = gcInfo->time[j];
4321     }
4322     tmp = (tmp-gcInfo->time[0]);
4323     printf(" ++ %lld \n", tmp);
4324     totalgc += tmp;
4325   }
4326
4327   if(gc_infoOverflow) {
4328     printf("Caution: gc info overflow!\n");
4329   }
4330
4331   printf("\n\n total gc time: %lld \n", totalgc);
4332 #else*/
4333   int i = 0;
4334   int j = 0;
4335   unsigned long long totalgc = 0;
4336
4337 #ifndef BAMBOO_MEMPROF
4338   BAMBOO_DEBUGPRINT(0xdddd);
4339 #endif
4340   // output task related info
4341   for(i= 0; i < gc_infoIndex; i++) {
4342     GCInfo * gcInfo = gc_infoArray[i];
4343 #ifdef BAMBOO_MEMPROF
4344     unsigned long long tmp=gcInfo->time[gcInfo->index-8]-gcInfo->time[0]; //0;
4345 #else
4346         unsigned long long tmp = 0;
4347     BAMBOO_DEBUGPRINT(0xddda);
4348     for(j = 0; j < gcInfo->index - 7; j++) {
4349       BAMBOO_DEBUGPRINT(gcInfo->time[j]);
4350       BAMBOO_DEBUGPRINT(gcInfo->time[j]-tmp);
4351       BAMBOO_DEBUGPRINT(0xdddb);
4352       tmp = gcInfo->time[j];
4353     }
4354     tmp = (tmp-gcInfo->time[0]);
4355     BAMBOO_DEBUGPRINT_REG(tmp);
4356         BAMBOO_DEBUGPRINT(0xdddc);
4357         BAMBOO_DEBUGPRINT(gcInfo->time[gcInfo->index - 7]);
4358         BAMBOO_DEBUGPRINT(gcInfo->time[gcInfo->index - 6]);
4359         BAMBOO_DEBUGPRINT(gcInfo->time[gcInfo->index - 5]);
4360         BAMBOO_DEBUGPRINT(gcInfo->time[gcInfo->index - 4]);
4361         BAMBOO_DEBUGPRINT(gcInfo->time[gcInfo->index - 3]);
4362         BAMBOO_DEBUGPRINT(gcInfo->time[gcInfo->index - 2]);
4363         BAMBOO_DEBUGPRINT(gcInfo->time[gcInfo->index - 1]);
4364     BAMBOO_DEBUGPRINT(0xddde);
4365 #endif
4366     totalgc += tmp;
4367   }
4368 #ifndef BAMBOO_MEMPROF
4369   BAMBOO_DEBUGPRINT(0xdddf);
4370 #endif
4371   BAMBOO_DEBUGPRINT_REG(totalgc);
4372
4373   if(gc_infoOverflow) {
4374     BAMBOO_DEBUGPRINT(0xefee);
4375   }
4376
4377 #ifndef BAMBOO_MEMPROF
4378   BAMBOO_DEBUGPRINT(0xeeee);
4379 #endif
4380 //#endif
4381 }
4382 #endif  // #ifdef GC_PROFILE
4383
4384 #endif