From 9989d97f056295efabfade62ef1336d1ac7f1188 Mon Sep 17 00:00:00 2001
From: bdemsky <bdemsky>
Date: Sat, 18 Jun 2011 00:13:20 +0000
Subject: [PATCH] more changes

---
 Robust/src/Runtime/bamboo/markbit.h           |   4 +
 Robust/src/Runtime/bamboo/multicorecache.c    |   6 +-
 Robust/src/Runtime/bamboo/multicoregarbage.c  | 212 +-----
 Robust/src/Runtime/bamboo/multicoregarbage.h  | 102 +--
 .../src/Runtime/bamboo/multicoregccompact.c   | 691 ++++--------------
 .../src/Runtime/bamboo/multicoregccompact.h   |  23 +-
 Robust/src/Runtime/bamboo/multicoregcflush.c  | 202 +++--
 Robust/src/Runtime/bamboo/multicoregcmark.c   |   1 -
 Robust/src/Runtime/bamboo/multicoremsg.c      |  81 +-
 Robust/src/Runtime/bamboo/multicoremsg.h      |   8 +-
 10 files changed, 396 insertions(+), 934 deletions(-)
diff --git a/Robust/src/Runtime/bamboo/markbit.h b/Robust/src/Runtime/bamboo/markbit.h
index 93eddbac..a597cc8d 100644
--- a/Robust/src/Runtime/bamboo/markbit.h
+++ b/Robust/src/Runtime/bamboo/markbit.h
@@ -7,6 +7,10 @@ extern unsigned int revmarkmappingarray[];
 
 #define NOTMARKED 0
 #define ALIGNOBJSIZE(x) (x)>>5
+#define ALIGNSIZETOBYTES(x) (x)<<5
+#define ALIGNTOTABLEINDEX(x) (x)>>(5+4)
+#define CONVERTTABLEINDEXTOPTR(x) (((unsigned INTPTR)((x)<<(5+4)))+gcbase)
+
 
 #define OBJMASK 0x40000000  //set towhatever smallest object mark is
 #define MARKMASK 0xc0000000  //set towhatever smallest object mark is
diff --git a/Robust/src/Runtime/bamboo/multicorecache.c b/Robust/src/Runtime/bamboo/multicorecache.c
index a897765c..3e78638a 100644
--- a/Robust/src/Runtime/bamboo/multicorecache.c
+++ b/Robust/src/Runtime/bamboo/multicorecache.c
@@ -92,7 +92,7 @@ void cacheAdapt_policy_local(int coren){
   for(; page_index < page_index_end; page_index++) {
     bamboo_cache_policy_t policy = {0};
     unsigned int block = 0;
-    BLOCKINDEX((void *) page_sva, block);
+    BLOCKINDEX(block, (void *) page_sva);
     unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
     CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren);
     CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
@@ -464,7 +464,7 @@ void gc_output_cache_sampling() {
   for(page_index = 0; page_index < page_num; page_index++) {
     page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
     unsigned int block = 0;
-    BLOCKINDEX((void *) page_sva, block);
+    BLOCKINDEX(block, (void *) page_sva);
     unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
     printf("%x,  %d,  %d,  ",(int)page_sva,page_index,coren);
     for(int i = 0; i < NUMCORESACTIVE; i++) {
@@ -494,7 +494,7 @@ void gc_output_cache_sampling_r() {
   for(page_index = 0; page_index < page_num; page_index++) {
     page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
     unsigned int block = 0;
-    BLOCKINDEX((void *)page_sva, block);
+    BLOCKINDEX(block, (void *)page_sva);
     unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
     printf(" %x,  %d,  %d,  ",(int)page_sva,page_index,coren);
     int accesscore = 0; // TODO
diff --git a/Robust/src/Runtime/bamboo/multicoregarbage.c b/Robust/src/Runtime/bamboo/multicoregarbage.c
index eff4fb25..2468a98b 100644
--- a/Robust/src/Runtime/bamboo/multicoregarbage.c
+++ b/Robust/src/Runtime/bamboo/multicoregarbage.c
@@ -152,7 +152,7 @@ void initGC() {
       gccorestatus[i] = 1;
       gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
       gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
-      gcloads[i] = NULL;
+      gcloads[i] = 0;
       gcrequiredmems[i] = 0;
       gcfilledblocks[i] = 0;
       gcstopblock[i] = 0;
@@ -176,7 +176,6 @@ void initGC() {
   gcblock2fill = 0;
   gcmovepending = 0;
   gccurr_heaptop = 0;
-  gcdstcore = 0;
 
   gc_queueinit();
 
@@ -208,7 +207,7 @@ bool gc_checkAllCoreStatus_I() {
   return true;
 }
 
-INLINE void checkMarkStatus_p2() {
+void checkMarkStatus_p2() {
   // check if the sum of send objs and receive obj are the same
   // yes->check if the info is the latest; no->go on executing
   unsigned int sumsendobj = 0;
@@ -250,7 +249,7 @@ INLINE void checkMarkStatus_p2() {
   }
 }
 
-INLINE void checkMarkStatus() {
+void checkMarkStatus() {
   if((!waitconfirm)||(waitconfirm && (numconfirm == 0))) {
     unsigned int entry_index = 0;
     if(waitconfirm) {
@@ -287,26 +286,28 @@ INLINE void checkMarkStatus() {
 } 
 
 // compute load balance for all cores
-INLINE int loadbalance(void ** heaptop) {
+int loadbalance(void ** heaptop, unsigned int * topblock, unsigned int * topcore) {
   // compute load balance
   // get the total loads
-  unsigned int tloads = gcloads[STARTUPCORE];
-  for(int i = 1; i < NUMCORES4GC; i++) {
+  unsigned int tloads = 0;
+  for(int i = 0; i < NUMCORES4GC; i++) {
     tloads += gcloads[i];
   }
   *heaptop = gcbaseva + tloads;
 
-  unsigned int b = 0;
-  BLOCKINDEX(*heaptop, b);
+  unsigned int topblockindex;
+  
+  BLOCKINDEX(topblockindex, *heaptop);
   // num of blocks per core
-  unsigned int numbpc = (unsigned int)b/(unsigned int)(NUMCORES4GC);
-  gctopblock = b;
-  RESIDECORE(*heaptop, gctopcore);
+  unsigned int numbpc = (topblockindex+NUMCORES4GC-1)/NUMCORES4GC;
+  
+  *topblock = topblockindex;
+  RESIDECORE(*heaptop, *topcore);
   return numbpc;
 }
 
 // compute total mem size required and sort the lobjs in ascending order
-INLINE unsigned int sortLObjs() {
+unsigned int sortLObjs() {
   unsigned int tmp_lobj = 0;
   unsigned int tmp_len = 0;
   unsigned int tmp_host = 0;
@@ -360,7 +361,7 @@ INLINE unsigned int sortLObjs() {
   return sumsize;
 }
 
-INLINE bool cacheLObjs() {
+bool cacheLObjs() {
   // check the total mem size need for large objs
   unsigned long long sumsize = 0;
   unsigned int size = 0;
@@ -398,7 +399,7 @@ INLINE bool cacheLObjs() {
 void updateSmemTbl(unsigned int coren, void * localtop) {
   unsigned int ltopcore = 0;
   unsigned int bound = BAMBOO_SMEM_SIZE_L;
-  BLOCKINDEX(localtop, ltopcore);
+  BLOCKINDEX(ltopcore, localtop);
   if((unsigned int)localtop>=(unsigned int)(gcbaseva+BAMBOO_LARGE_SMEM_BOUND)){
     bound = BAMBOO_SMEM_SIZE;
   }
@@ -425,187 +426,6 @@ void updateSmemTbl(unsigned int coren, void * localtop) {
   }
 }
 
-INLINE unsigned int checkCurrHeapTop() {
-  // update the smemtbl
-  BAMBOO_MEMSET_WH(bamboo_smemtbl, 0, sizeof(int)*gcnumblock);
-  // flush all gcloads to indicate the real heap top on one core
-  // previous it represents the next available ptr on a core
-  if(((unsigned int)gcloads[0]>(unsigned int)(gcbaseva+BAMBOO_SMEM_SIZE_L))&&(((unsigned int)gcloads[0]%(BAMBOO_SMEM_SIZE)) == 0)) {
-    // edge of a block, check if this is exactly the heaptop
-    BASEPTR(0, gcfilledblocks[0]-1, &(gcloads[0]));
-    gcloads[0]+=BLOCKSIZE(gcfilledblocks[0]<=1);
-  }
-  updateSmemTbl(0, gcloads[0]);
-  for(int i = 1; i < NUMCORES4GC; i++) {
-    unsigned int tmptop = 0;
-    if((gcfilledblocks[i] > 0)&&(((unsigned int)gcloads[i]%(BAMBOO_SMEM_SIZE)) == 0)) {
-      // edge of a block, check if this is exactly the heaptop
-      BASEPTR(i, gcfilledblocks[i]-1, &gcloads[i]);
-      gcloads[i]+=BLOCKSIZE(gcfilledblocks[i]<=1);
-      tmptop = gcloads[i];
-    }
-    updateSmemTbl(i, gcloads[i]);
-  } 
-
-  // find current heap top
-  // TODO
-  // a bug here: when using local allocation, directly move large objects
-  // to the highest free chunk might not be memory efficient
-  unsigned int tmpheaptop = 0;
-  for(int i = gcnumblock-1; i >= 0; i--) {
-    if(bamboo_smemtbl[i] > 0) {
-      return gcbaseva+bamboo_smemtbl[i]+OFFSET2BASEVA(i);
-    }
-  }
-  return gcbaseva;
-}
-
-INLINE void movelobj(void * tmpheaptop, void * ptr,int size,int isize) {
-  // move the large obj
-  if((unsigned int)gcheaptop < (unsigned int)(tmpheaptop+size)) {
-    memmove(tmpheaptop, gcheaptop, size);
-  } else {
-    memcpy(tmpheaptop, gcheaptop, size);
-  }
-  // fill the remaining space with -2 padding
-  BAMBOO_MEMSET_WH(tmpheaptop+size, -2, isize-size);
-  gcheaptop += size;
-  // cache the mapping info 
-  gcmappingtbl[OBJMAPPINGINDEX(ptr)]=tmpheaptop;
-  tmpheaptop += isize;
-}
-
-INLINE void moveLObjs() {
-#ifdef SMEMM
-  // update the gcmem_mixed_usedmem
-  gcmem_mixed_usedmem = 0;
-#endif
-  unsigned int size = 0;
-  unsigned int bound = 0;
-  void * tmpheaptop = checkCurrHeapTop();
-
-  // move large objs from gcheaptop to tmpheaptop
-  // write the header first
-  unsigned int tomove = gcbaseva+(BAMBOO_SHARED_MEM_SIZE)-gcheaptop;
-#ifdef SMEMM
-  gcmem_mixed_usedmem += tomove;
-#endif
-  // flush the sbstartbl
-  BAMBOO_MEMSET_WH(gcsbstarttbl,'\0',(BAMBOO_SHARED_MEM_SIZE/BAMBOO_SMEM_SIZE)*sizeof(unsigned int));
-  if(tomove == 0) {
-    gcheaptop = tmpheaptop;
-  } else {
-    // check how many blocks it acrosses
-    unsigned INTPTR remain = (unsigned INTPTR) (tmpheaptop-gcbaseva);
-    //number of the sblock
-    unsigned int sb = remain/BAMBOO_SMEM_SIZE;
-    unsigned int b = 0;  // number of the block
-    BLOCKINDEX(tmpheaptop, b);
-    // check the remaining space in this block
-    bound = (BAMBOO_SMEM_SIZE);
-    if(remain < (BAMBOO_LARGE_SMEM_BOUND)) {
-      bound = (BAMBOO_SMEM_SIZE_L);
-    }
-    remain = bound - remain%bound;
-
-    size = 0;
-    unsigned int isize = 0;
-    unsigned int host = 0;
-    unsigned int ptr = 0;
-    unsigned int base = tmpheaptop;
-    unsigned int cpysize = 0;
-    remain -= BAMBOO_CACHE_LINE_SIZE;
-    tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
-    gc_lobjqueueinit4_I();
-    while(gc_lobjmoreItems4_I()) {
-      ptr = (unsigned int)(gc_lobjdequeue4_I(&size, &host));
-      isize=ALIGNSIZE(size, &isize);
-      if(remain >= isize) {
-        remain -= isize;
-        // move the large obj
-        movelobj(tmpheaptop,ptr,size,isize);
-        cpysize += isize;
-        // update bamboo_smemtbl
-        bamboo_smemtbl[b] += isize;
-      } else {
-        // this object acrosses blocks
-        if(cpysize > 0) {
-          CLOSEBLOCK(base, cpysize+BAMBOO_CACHE_LINE_SIZE);
-          bamboo_smemtbl[b] += BAMBOO_CACHE_LINE_SIZE;
-          cpysize = 0;
-          base = tmpheaptop;
-          if(remain == 0) {
-            remain = BLOCKSIZE((tmpheaptop-gcbaseva)<(BAMBOO_LARGE_SMEM_BOUND));
-          }
-          remain -= BAMBOO_CACHE_LINE_SIZE;
-          tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
-          BLOCKINDEX(tmpheaptop, b);
-          sb = (unsigned int)(tmpheaptop-gcbaseva)/(BAMBOO_SMEM_SIZE);
-        } 
-        // move the obj
-        movelobj(tmpheaptop,ptr,size,isize);
-        	
-        // set the gcsbstarttbl and bamboo_smemtbl
-        unsigned int tmpsbs=1+(unsigned int)(isize-remain-1)/BAMBOO_SMEM_SIZE;
-        for(int k = 1; k < tmpsbs; k++) {
-          gcsbstarttbl[sb+k] = -1;
-        }
-        sb += tmpsbs;
-        bound = BLOCKSIZE(b<NUMCORES4GC);
-        BLOCKINDEX(tmpheaptop-1, tmpsbs);
-        for(; b < tmpsbs; b++) {
-          bamboo_smemtbl[b] = bound;
-          if(b==NUMCORES4GC-1) {
-            bound = BAMBOO_SMEM_SIZE;
-          }
-        }
-        if(((unsigned int)(isize-remain)%(BAMBOO_SMEM_SIZE)) == 0) {
-          gcsbstarttbl[sb] = -1;
-          remain = BLOCKSIZE((tmpheaptop-gcbaseva)<(BAMBOO_LARGE_SMEM_BOUND));
-          bamboo_smemtbl[b] = bound;
-        } else {
-          gcsbstarttbl[sb] = (int)tmpheaptop;
-          remain = tmpheaptop-gcbaseva;
-          bamboo_smemtbl[b] = remain%bound;
-          remain = bound - bamboo_smemtbl[b];
-        } 
-	
-        CLOSEBLOCK(base, isize+BAMBOO_CACHE_LINE_SIZE);
-        cpysize = 0;
-        base = tmpheaptop;
-        if(remain == BAMBOO_CACHE_LINE_SIZE) {
-          // fill with 0 in case
-          BAMBOO_MEMSET_WH(tmpheaptop, '\0', remain);
-        }
-        remain -= BAMBOO_CACHE_LINE_SIZE;
-        tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
-      } 
-    }
-    
-    if(cpysize > 0) {
-      CLOSEBLOCK(base, cpysize+BAMBOO_CACHE_LINE_SIZE);
-      bamboo_smemtbl[b] += BAMBOO_CACHE_LINE_SIZE;
-    } else {
-      tmpheaptop -= BAMBOO_CACHE_LINE_SIZE;
-    }
-    gcheaptop = tmpheaptop;
-  } 
-
-  bamboo_free_block = 0;
-  unsigned int tbound = 0;
-  do {
-    tbound=BLOCKSIZE(bamboo_free_block<NUMCORES4GC);
-    if(bamboo_smemtbl[bamboo_free_block] == tbound) {
-      bamboo_free_block++;
-    } else {
-      // the first non-full partition
-      break;
-    }
-  } while(true);
-
-  GCPROFILE_RECORD_SPACE();
-} 
-
 void gc_collect(struct garbagelist * stackptr) {
   gc_status_info.gcprocessing = true;
   // inform the master that this core is at a gc safe point and is ready to 
diff --git a/Robust/src/Runtime/bamboo/multicoregarbage.h b/Robust/src/Runtime/bamboo/multicoregarbage.h
index 8cdd75dd..7b83212d 100644
--- a/Robust/src/Runtime/bamboo/multicoregarbage.h
+++ b/Robust/src/Runtime/bamboo/multicoregarbage.h
@@ -25,7 +25,7 @@ typedef enum {
   COMPACTPHASE,            // 0x2
   SUBTLECOMPACTPHASE,      // 0x3
   MAPPHASE,                // 0x4
-  FLUSHPHASE,              // 0x5
+  UPDATEPHASE,              // 0x5
 #ifdef GC_CACHE_ADAPT
   CACHEPOLICYPHASE,        // 0x6
   PREFINISHPHASE,          // 0x7
@@ -63,20 +63,21 @@ unsigned int gcself_numreceiveobjs;
 // for load balancing
 unsigned int gcheaptop;
 unsigned INTPTR gcloads[NUMCORES4GC];
+
+//Top of each core's heap
 void * topptrs[NUMCORES4GC];
+
 unsigned int gctopcore; // the core host the top of the heap
 unsigned int gctopblock; // the number of current top block
 
-unsigned int gcnumlobjs;
-
 // compact instruction
 unsigned int gcmarkedptrbound;
 unsigned int gcblock2fill;
 unsigned int gcstopblock[NUMCORES4GC]; // indicate when to stop compact phase
 unsigned int gcfilledblocks[NUMCORES4GC]; //indicate how many blocks have been fulfilled
+
 // move instruction;
 unsigned int gcmovestartaddr;
-unsigned int gcdstcore;
 volatile bool gctomove;
 unsigned int gcrequiredmems[NUMCORES4GC]; //record pending mem requests
 volatile unsigned int gcmovepending;
@@ -96,8 +97,7 @@ unsigned int * gcmarktbl;
 // table recording the starting address of each small block
 // (size is BAMBOO_SMEM_SIZE)
 // Note: 1. this table always resides on the very bottom of the shared memory
-//       2. it is not counted in the shared heap, would never be garbage 
-//          collected
+
 int * gcsbstarttbl;
 #ifdef GC_TBL_DEBUG
 unsigned int gcsbstarttbl_len;
@@ -127,29 +127,38 @@ unsigned int size_cachepolicytbl;
   ((((unsigned int)p)>=gcbaseva)&&(((unsigned int)p)<(gcbaseva+(BAMBOO_SHARED_MEM_SIZE))))
 
 
+//Minimum alignment unit
 #define ALIGNMENTBYTES 32
+
+//Bytes to shift to get minimum alignment units
 #define ALIGNMENTSHIFT 5
+#define MAXBLOCK 0x4fffffff //local block number that can never be reached...
+
 
 /* Number of bits used for each alignment unit */
 
+//Takes in size and converts into alignment units
 #define ALIGNOBJSIZE(x) (x>>ALIGNMENTSHIFT)
+
+//Takes in pointer to heap object and converts to offset in alignment units
 #define OBJMAPPINGINDEX(p) ALIGNOBJSIZE((unsigned INTPTR)(p-gcbaseva))
-#define ALIGNUNITS(s) (((s-1)>>ALIGNMENTSHIFT)+1)
 
+//Converts size of object into alignment units (need to round up)
+#define ALIGNUNITS(s) (((s-1)>>ALIGNMENTSHIFT)+1)
 
+//Rounds object size up to next alignment unit size
 #define ALIGNSIZE(s) ((((unsigned int)(s-1))&~(ALIGNMENTBYTES-1))+ALIGNMENTBYTES)
 
-
 // mapping of pointer to block # (start from 0), here the block # is
 // the global index
-#define BLOCKINDEX(p, b) \
-  { \
+#define BLOCKINDEX(b, p)			\
+  {								\
     unsigned INTPTR t = (unsigned INTPTR)(p - gcbaseva);	\
-    if(t < BAMBOO_LARGE_SMEM_BOUND) { \
-      b = t / BAMBOO_SMEM_SIZE_L; \
-    } else { \
+    if(t < BAMBOO_LARGE_SMEM_BOUND) {				\
+      b = t / BAMBOO_SMEM_SIZE_L;				\
+    } else {							      \
       b = NUMCORES4GC+((t-BAMBOO_LARGE_SMEM_BOUND)/BAMBOO_SMEM_SIZE); \
-    } \
+    }								      \
   }
 
 #define RESIDECORE(p, c) { \
@@ -157,7 +166,7 @@ unsigned int size_cachepolicytbl;
       c = 0; \
     } else { \
       unsigned INTPTR b; \
-      BLOCKINDEX(p, b); \
+      BLOCKINDEX(b, p);		      \
       c = gc_block2core[(b%(NUMCORES4GC*2))]; \
     } \
   }
@@ -169,63 +178,54 @@ INLINE static unsigned int hostcore(void * ptr) {
   return host;
 }
 
-// NOTE: n starts from 0
-// mapping of heaptop (how many bytes there are in the local heap) to
-// the number of the block
-// the number of the block indicates that the block is the xth block on
-// the local heap
+/*This macro takes in a number of bytes (the current offset into the
+  heap) and returns the number of local blocks needed for that many
+  bytes */
+
 #define NUMBLOCKS(s, n) \
   if(s < (BAMBOO_SMEM_SIZE_L)) { \
-    (*((unsigned int*)(n))) = 0; \
-  } else { \
-    (*((unsigned int*)(n))) = 1 + ((s) - (BAMBOO_SMEM_SIZE_L)) / (BAMBOO_SMEM_SIZE); \
-  }
-
-#define OFFSET(s, o) \
-  if(s < BAMBOO_SMEM_SIZE_L) { \
-    (*((unsigned int*)(o))) = (s); \
+    (n) = 0; \
   } else { \
-    (*((unsigned int*)(o))) = ((s)-(BAMBOO_SMEM_SIZE_L))%(BAMBOO_SMEM_SIZE); \
+    (n) = 1 + ((s) - (BAMBOO_SMEM_SIZE_L)) / (BAMBOO_SMEM_SIZE); \
   }
 
+//this macro takes in a global block identifier and returns the base
+//offset into the heap
 #define OFFSET2BASEVA(i) \
   (((i)<NUMCORES4GC)?(BAMBOO_SMEM_SIZE_L*(i)):(BAMBOO_SMEM_SIZE*((i)-NUMCORES4GC)+BAMBOO_LARGE_SMEM_BOUND))
 
+
+//This macro takes in a local block number and returns the size of the block
 #define BLOCKSIZE(c) \
   ((c)?BAMBOO_SMEM_SIZE_L:BAMBOO_SMEM_SIZE)
 
-// mapping of (core #, index of the block) to the global block index
+//Takes as input the core number c and the local block index n and
+//returns the global block index
+
 #define BLOCKINDEX2(c, n) \
-  (gc_core2block[(2*(c))+((n)%2)]+((NUMCORES4GC*2)*((n)/2)))
+  (gc_core2block[2*(c)+((n)&1)]+(NUMCORES4GC*2)*((n)>>1))
 
+//This macro takes in a global block number and returns the base
+//pointer of the next block
 #define BOUNDPTR(b) \
   (((b)<NUMCORES4GC)?(((b)+1)*BAMBOO_SMEM_SIZE_L):(BAMBOO_LARGE_SMEM_BOUND+((b)-NUMCORES4GC+1)*BAMBOO_SMEM_SIZE))
 
-#define BLOCKBOUND(n) \
-  (((n)==0)?BAMBOO_SMEM_SIZE_L:BAMBOO_SMEM_SIZE_L+BAMBOO_SMEM_SIZE*(n))
-
-// mapping of (core #, number of the block) to the base pointer of the block
-#define BASEPTR(c, n, p) \
-  { \
-    unsigned int b = BLOCKINDEX2((c), (n)); \
-    if(b < (NUMCORES4GC)) { \
-      (*((unsigned int*)p)) = gcbaseva + b * (BAMBOO_SMEM_SIZE_L); \
-    } else { \
-      (*((unsigned int*)p)) = gcbaseva+(BAMBOO_LARGE_SMEM_BOUND)+ \
-                     (b-(NUMCORES4GC))*(BAMBOO_SMEM_SIZE); \
-    } \
+//This macro takes in the core number c and the local block number and
+//sets p to the base pointer
+
+#define BASEPTR(p, c, n) {				   \
+    unsigned int b = BLOCKINDEX2((c), (n));		   \
+    if(b < (NUMCORES4GC)) {				   \
+      p = gcbaseva + b * (BAMBOO_SMEM_SIZE_L);		   \
+    } else {						   \
+      p = gcbaseva+(BAMBOO_LARGE_SMEM_BOUND)+		   \
+	(b-(NUMCORES4GC))*(BAMBOO_SMEM_SIZE);		   \
+    }							   \
   }
 
 // the next core in the top of the heap
 #define NEXTTOPCORE(b) (gc_block2core[((b)+1)%(NUMCORES4GC*2)])
 
-// close current block, fill the header
-#define CLOSEBLOCK(base, size) \
-  { \
-    BAMBOO_MEMSET_WH((base), '\0', BAMBOO_CACHE_LINE_SIZE); \
-    *((int*)(base)) = (size); \
-  }
-
 // check if all cores are stall now
 #define GC_CHECK_ALL_CORE_STATUS(f) \
   { \
diff --git a/Robust/src/Runtime/bamboo/multicoregccompact.c b/Robust/src/Runtime/bamboo/multicoregccompact.c
index 1fdbdfdf..3a6b9b04 100644
--- a/Robust/src/Runtime/bamboo/multicoregccompact.c
+++ b/Robust/src/Runtime/bamboo/multicoregccompact.c
@@ -4,31 +4,98 @@
 #include "multicoreruntime.h"
 #include "multicoregarbage.h"
 
-INLINE bool gc_checkCoreStatus() {
-  BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
+bool gc_checkCoreStatus() {
   for(int i = 0; i < NUMCORES4GC; ++i) {
     if(gccorestatus[i] != 0) {
-      BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
       return false;
     }
   }  
-  BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
   return true;
 }
 
-INLINE void gc_resetCoreStatus() {
-  BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
+void gc_resetCoreStatus() {
   for(int i = 0; i < NUMCORES4GC; ++i) {
     gccorestatus[i] = 1;
   }
-  BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 }
 
-// should be invoked with interrupt closed
-INLINE int assignSpareMem_I(unsigned int sourcecore,unsigned int * requiredmem, void ** tomove, void ** startaddr) {
-  unsigned int b = 0;
-  BLOCKINDEX(topptrs[sourcecore], b);
-  void * boundptr = BOUNDPTR(b);
+void initOrig_Dst(struct moveHelper * orig,struct moveHelper * to) {
+  // init the dst ptr
+  to->blocknum = 0;
+  BASEPTR(to->base, BAMBOO_NUM_OF_CORE, to->blocknum);
+  to->ptr = to->base;
+  to->bound=to->base+BLOCKSIZE(to->blocknum);
+  
+  // init the orig ptr
+  orig->blocknum = 0;
+  orig->ptr=orig->base = to->base;
+  orig->bound = orig->base + BLOCKSIZE(orig->blocknum);
+}
+
+void getSpaceLocally(struct moveHelper *to) {
+  //we have space on our core...just keep going
+  to->localblocknum++;
+  BASEPTR(to->base,BAMBOO_NUM_OF_CORE, to->localblocknum);
+  to->ptr=to->base;
+  to->bound = to->base + BLOCKSIZE(to->localblocknum);
+}
+
+void getSpaceRemotely(struct moveHelper *to, unsigned int minimumbytes) {
+  //need to get another block from elsewhere
+  //set flag to wait for memory
+  gctomove=false;
+  //send request for memory
+  send_msg_4(STARTUPCORE,GCFINISHCOMPACT,BAMBOO_NUM_OF_CORE, to->ptr, minimumbytes);
+  //wait for flag to be set that we received message
+  while(!gctomove) ;
+
+  //store pointer
+  to->ptr = gcmovestartaddr;
+
+  //set localblock number to high number to indicate this block isn't local
+  to->localblocknum = MAXBLOCK;
+  unsigned int globalblocknum;
+  BLOCKINDEX(globalblocknum, to->ptr);
+  to->base = gcbaseva + OFFSET2BASEVA(globalblocknum);
+  to->bound = gcbaseva + BOUNDPTR(globalblocknum);
+}
+
+void getSpace(struct moveHelper *to, unsigned int minimumbytes) {
+  //need more space to compact into
+  if (to->localblocknum < gcblock2fill) {
+    getSpaceLocally(to);
+  } else {
+    getSpaceRemotely(to, minimumbytes);
+  }
+}
+
+void compacthelper(struct moveHelper * orig,struct moveHelper * to) {
+  while(true) {
+    unsigned int minimumbytes=compactblocks(orig, to);
+    if (orig->ptr==orig->bound) {
+      //need more data to compact
+      //increment the core
+      orig->localblocknum++;
+      BASEPTR(orig->base,BAMBOO_NUM_OF_CORE, orig->localblocknum);
+      orig->ptr=orig->base;
+      orig->bound = orig->base + BLOCKSIZE(orig->localblocknum);
+      if (orig->base >= gcbaseva+BAMBOO_SHARED_MEM_SIZE)
+	break;
+    }
+    if (minimumbytes!=0) {
+      getSpace(to, minimumbytes);
+    }
+  }
+
+  send_msg_4(STARTUPCORE,GCFINISHCOMPACT,BAMBOO_NUM_OF_CORE, to->ptr, 0);
+}
+
+/* Should be invoked with interrupt turned off. */
+
+unsigned int assignSpareMem_I(unsigned int sourcecore, unsigned int requiredmem, void ** tomove, void ** startaddr) {
+  unsigned int blockindex;
+  BLOCKINDEX(blockindex, topptrs[sourcecore]);
+  void * boundptr = BOUNDPTR(blockindex);
   unsigned INTPTR remain = (unsigned INTPTR) (boundptr - topptrs[sourcecore]);
   unsigned int memneed = requiredmem + BAMBOO_CACHE_LINE_SIZE;
   *startaddr = topptrs[sourcecore];
@@ -38,7 +105,7 @@ INLINE int assignSpareMem_I(unsigned int sourcecore,unsigned int * requiredmem,
     return 0;
   } else {
     // next available block
-    gcfilledblocks[sourcecore] += 1;
+    gcfilledblocks[sourcecore]++;
     void * newbase = NULL;
     BASEPTR(sourcecore, gcfilledblocks[sourcecore], &newbase);
     topptrs[sourcecore] = newbase;
@@ -46,391 +113,28 @@ INLINE int assignSpareMem_I(unsigned int sourcecore,unsigned int * requiredmem,
   }
 }
 
-INLINE int assignSpareMem(unsigned int sourcecore,unsigned int * requiredmem,unsigned int * tomove, void ** startaddr) {
+unsigned int assignSpareMem(unsigned int sourcecore,unsigned int requiredmem,unsigned int * tomove, void ** startaddr) {
   BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
   int retval=assignSpareMem_I(sourcecore, requiredmem, tomove, startaddr);
   BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
   return retval;
 }
 
-INLINE void compact2Heaptophelper_I(unsigned int coren, void ** p,unsigned int* numblocks,unsigned int* remain) {
-  unsigned int b;
-  unsigned int memneed = gcrequiredmems[coren] + BAMBOO_CACHE_LINE_SIZE;
-  if(STARTUPCORE == coren) {
-    gctomove = true;
-    gcmovestartaddr = *p;
-    gcdstcore = gctopcore;
-    gcblock2fill = *numblocks + 1;
-  } else {
-    if(BAMBOO_CHECK_SEND_MODE()) {
-      cache_msg_4_I(coren,GCMOVESTART,gctopcore,*p,(*numblocks)+1);
-    } else {
-      send_msg_4_I(coren,GCMOVESTART,gctopcore,*p,(*numblocks)+1);
-    }
-  }
-  if(memneed < *remain) {
-    *p = *p + memneed;
-    gcrequiredmems[coren] = 0;
-    topptrs[gctopcore] += memneed;
-    *remain = *remain - memneed;
-  } else {
-    // next available block
-    *p = *p + *remain;
-    gcfilledblocks[gctopcore] += 1;
-    void * newbase = NULL;
-    BASEPTR(gctopcore, gcfilledblocks[gctopcore], &newbase);
-    topptrs[gctopcore] = newbase;
-    gcrequiredmems[coren] -= *remain - BAMBOO_CACHE_LINE_SIZE;
-    gcstopblock[gctopcore]++;
-    gctopcore = NEXTTOPCORE(gctopblock);
-    gctopblock++;
-    *numblocks = gcstopblock[gctopcore];
-    *p = topptrs[gctopcore];
-    BLOCKINDEX(*p, b);
-    *remain=GC_BLOCK_REMAIN_SIZE(b, (*p));
-  }  
-  gcmovepending--;
-} 
-
-INLINE void compact2Heaptop() {
-  // no cores with spare mem and some cores are blocked with pending move
-  // find the current heap top and make them move to the heap top
-  void * p = topptrs[gctopcore];
-  unsigned int numblocks = gcfilledblocks[gctopcore];
-  unsigned int b;
-  BLOCKINDEX(p, b);
-  unsigned int remain=GC_BLOCK_REMAIN_SIZE(b, p);
-  // check if the top core finishes
-  BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
-  if(gccorestatus[gctopcore] != 0) {
-    // let the top core finishes its own work first
-    compact2Heaptophelper_I(gctopcore, &p, &numblocks, &remain);
-    BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-    return;
-  }
-  BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-
-  for(int i = 0; i < NUMCORES4GC; i++) {
-    BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
-    if((gccorestatus[i] != 0) && (gcrequiredmems[i] > 0)) {
-      compact2Heaptophelper_I(i, &p, &numblocks, &remain);
-      if(gccorestatus[gctopcore] != 0) {
-        BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-        // the top core is not free now
-        return;
-      }
-    }  
-    BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-  } 
-}
-
-INLINE void resolvePendingMoveRequest() {
-  int i;
-  int j;
-  bool nosparemem = true;
-  bool haspending = false;
-  bool hasrunning = false;
-  bool noblock = false;
-  unsigned int dstcore = 0;       // the core who need spare mem
-  unsigned int sourcecore = 0;       // the core who has spare mem
-  for(i = j = 0; (i < NUMCORES4GC) && (j < NUMCORES4GC); ) {
-    if(nosparemem) {
-      // check if there are cores with spare mem
-      if(gccorestatus[i] == 0) {
-        // finished working, check if it still have spare mem
-        if(gcfilledblocks[i] < gcstopblock[i]) {
-          // still have spare mem
-          nosparemem = false;
-          sourcecore = i;
-        }  
-      }
-      i++;
-    }  
-    if(!haspending) {
-      if(gccorestatus[j] != 0) {
-        // not finished, check if it has pending move requests
-        if((gcfilledblocks[j]==gcstopblock[j])&&(gcrequiredmems[j]>0)) {
-          dstcore = j;
-          haspending = true;
-        } else {
-          hasrunning = true;
-        } 
-      } 
-      j++;
-    }  
-    if(!nosparemem && haspending) {
-      // find match
-      unsigned int tomove = 0;
-      unsigned int startaddr = 0;
-      gcrequiredmems[dstcore] = assignSpareMem(sourcecore,gcrequiredmems[dstcore],&tomove,&startaddr);
-      if(STARTUPCORE == dstcore) {
-        gcdstcore = sourcecore;
-        gctomove = true;
-        gcmovestartaddr = startaddr;
-        gcblock2fill = tomove;
-      } else {
-        send_msg_4(dstcore,GCMOVESTART,sourcecore,startaddr,tomove);
-      }
-      gcmovepending--;
-      nosparemem = true;
-      haspending = false;
-      noblock = true;
-    }
-  }  
-  
-  if(!hasrunning && !noblock) {
-    gc_status_info.gcphase = SUBTLECOMPACTPHASE;
-    compact2Heaptop();
-  }
-} 
-
-// If out of boundary of valid shared memory, return false, else return true
-INLINE bool nextSBlock(struct moveHelper * orig) {
-  orig->blockbase = orig->blockbound;
-  
-  bool sbchanged = false;
-  unsigned int origptr = orig->ptr;
-  unsigned int blockbase = orig->blockbase;
-  unsigned int blockbound = orig->blockbound;
-  unsigned int bound = orig->bound;
-outernextSBlock:
-  // check if across a big block
-  // TODO now do not zero out the whole memory, maybe the last two conditions
-  // are useless now
-  if((blockbase>=bound)||(origptr>=bound)||((origptr!=NULL)&&(*((int*)origptr))==0)||((*((int*)blockbase))==0)) {
-  innernextSBlock:
-    // end of current heap block, jump to next one
-    orig->numblocks++;
-    BASEPTR(BAMBOO_NUM_OF_CORE, orig->numblocks, &(orig->base));
-    if(orig->base >= gcbaseva + BAMBOO_SHARED_MEM_SIZE) {
-      // out of boundary
-      orig->ptr = orig->base; // set current ptr to out of boundary too
-      return false;
-    }
-    orig->blockbase = orig->base;
-    orig->sblockindex=(unsigned int)(orig->blockbase-gcbaseva)/BAMBOO_SMEM_SIZE;
-    sbchanged = true;
-    unsigned int blocknum = 0;
-    BLOCKINDEX(orig->base, blocknum);
-    if(bamboo_smemtbl[blocknum] == 0) {
-      // goto next block
-      goto innernextSBlock;
-    }
-    // check the bamboo_smemtbl to decide the real bound
-    orig->bound = orig->base + bamboo_smemtbl[blocknum];
-  } else if(0 == ((unsigned INTPTR)orig->blockbase)%BAMBOO_SMEM_SIZE) {
-    orig->sblockindex += 1;
-    sbchanged = true;
-  }  
-
-  // check if this sblock should be skipped or have special start point
-  int sbstart = gcsbstarttbl[orig->sblockindex];
-  if(sbstart == -1) {
-    // goto next sblock
-    orig->sblockindex += 1;
-    orig->blockbase += BAMBOO_SMEM_SIZE;
-    goto outernextSBlock;
-  } else if((sbstart != 0) && (sbchanged)) {
-    // the first time to access this SBlock
-    // not start from the very beginning
-    orig->blockbase = sbstart;
-  } 
-
-  // setup information for this sblock
-  orig->blockbound = orig->blockbase+(unsigned int)*((int*)(orig->blockbase));
-  orig->offset = BAMBOO_CACHE_LINE_SIZE;
-  orig->ptr = orig->blockbase + orig->offset;
-  if(orig->ptr >= orig->bound) {
-    // met a lobj, move to next block
-    goto innernextSBlock;
-  }
-
-  return true;
-} 
-
-// return false if there are no available data to compact
-INLINE bool initOrig_Dst(struct moveHelper * orig,struct moveHelper * to) {
-  // init the dst ptr
-  to->numblocks = 0;
-  to->top = to->offset = BAMBOO_CACHE_LINE_SIZE;
-  to->bound = BAMBOO_SMEM_SIZE_L;
-  BASEPTR(BAMBOO_NUM_OF_CORE, to->numblocks, &(to->base));
+/* should be invoked with interrupt turned off */
 
-  void * tobase = to->base;
-  to->ptr = tobase + to->offset;
-
-  // init the orig ptr
-  orig->numblocks = 0;
-  orig->base = tobase;
-  unsigned int blocknum = 0;
-  BLOCKINDEX(orig->base, blocknum);
-  void * origbase = orig->base;
-  // check the bamboo_smemtbl to decide the real bound
-  orig->bound = origbase + (unsigned INTPTR)bamboo_smemtbl[blocknum];
-  orig->blockbase = origbase;
-  orig->sblockindex = (unsigned INTPTR)(origbase - gcbaseva) / BAMBOO_SMEM_SIZE;
-
-  int sbstart = gcsbstarttbl[orig->sblockindex];
-  if(sbstart == -1) {
-    // goto next sblock
-    orig->blockbound=gcbaseva+BAMBOO_SMEM_SIZE*(orig->sblockindex+1);
-    return nextSBlock(orig);
-  } else if(sbstart != 0) {
-    orig->blockbase = sbstart;
-  }
-  orig->blockbound = orig->blockbase + *((int*)(orig->blockbase));
-  orig->offset = BAMBOO_CACHE_LINE_SIZE;
-  orig->ptr = orig->blockbase + orig->offset;
-
-  return true;
-}
-
-INLINE void nextBlock(struct moveHelper * to) {
-  to->top = to->bound + BAMBOO_CACHE_LINE_SIZE; // header!
-  to->bound += BAMBOO_SMEM_SIZE;
-  to->numblocks++;
-  BASEPTR(BAMBOO_NUM_OF_CORE, to->numblocks, &(to->base));
-  to->offset = BAMBOO_CACHE_LINE_SIZE;
-  to->ptr = to->base + to->offset;
-}
-
-INLINE unsigned int findValidObj(struct moveHelper * orig,struct moveHelper * to,int * type) {
-  unsigned int size = 0;
-  while(true) {
-    CACHEADAPT_COMPLETE_PAGE_CONVERT(orig, to, to->ptr, false);
-    unsigned int origptr = (unsigned int)(orig->ptr);
-    unsigned int origbound = (unsigned int)orig->bound;
-    unsigned int origblockbound = (unsigned int)orig->blockbound;
-    if((origptr >= origbound) || (origptr == origblockbound)) {
-      if(!nextSBlock(orig)) {
-        // finished, no more data
-        return -1;
-      }
-      continue;
-    }
-    // check the obj's type, size and mark flag
-    *type = ((int *)(origptr))[0];
-    size = 0;
-    if(*type == 0) {
-      // end of this block, go to next one
-      if(!nextSBlock(orig)) {
-        // finished, no more data
-        return -1;
-      }
-      continue;
-    } else if(*type < NUMCLASSES) {
-      // a normal object
-      size = classsize[*type];
-    } else {
-      // an array
-      struct ArrayObject *ao=(struct ArrayObject *)(origptr);
-      unsigned int elementsize=classsize[*type];
-      unsigned int length=ao->___length___;
-      size=(unsigned int)sizeof(struct ArrayObject)+(unsigned int)(length*elementsize);
-    }
-    return size;
-  }
-}
-
-// endaddr does not contain spaces for headers
-INLINE bool moveobj(struct moveHelper * orig, struct moveHelper * to, unsigned int stopblock) {
-  if(stopblock == 0) {
-    return true;
-  }
-
-  int type = 0;
-  unsigned int size = findValidObj(orig, to, &type);
-  unsigned int isize = 0;
-
-  if(size == -1) {
-    // finished, no more data
-    return true;
-  }
-  ALIGNSIZE(size, &isize);       // no matter is the obj marked or not
-                                 // should be able to across
-  void * origptr = orig->ptr;
-  int markedstatus;
-  GETMARKED(markedstatus, origptr);
-  
-  if(markedstatus==MARKEDFIRST) {
-    unsigned int totop = (unsigned int)to->top;
-    unsigned int tobound = (unsigned int)to->bound;
-    BAMBOO_ASSERT(totop<=tobound);
-    GCPROFILE_RECORD_LIVE_OBJ();
-    // marked obj, copy it to current heap top
-    // check to see if remaining space is enough
-    if((unsigned int)(totop + isize) > tobound) {
-      // fill 0 indicating the end of this block
-      BAMBOO_MEMSET_WH(to->ptr,  '\0', tobound - totop);
-      // fill the header of this block and then go to next block
-      to->offset += tobound - totop;
-      CLOSEBLOCK(to->base, to->offset);
-#ifdef GC_CACHE_ADAPT
-      void * tmp_ptr = to->ptr;
-#endif 
-      nextBlock(to);
-      if((to->top+isize)>(to->bound)) tprintf("%x, %x, %d, %d, %d, %d \n", to->ptr, orig->ptr, to->top, to->bound, isize, size);
-      BAMBOO_ASSERT((to->top+isize)<=(to->bound));
-#ifdef GC_CACHE_ADAPT
-      CACHEADAPT_COMPLETE_PAGE_CONVERT(orig, to, tmp_ptr, true);
-#endif 
-      if(stopblock == to->numblocks) {
-        // already fulfilled the block
-        return true;
-      }  
-    }
-    BAMBOO_ASSERT((to->top+isize)<=(to->bound));
-    // set the mark field to 2, indicating that this obj has been moved
-    // and need to be flushed
-    void * toptr = to->ptr;
-    if(toptr != origptr) {
-      if((unsigned int)(origptr) < (unsigned int)(toptr+size)) {
-        memmove(toptr, origptr, size);
-      } else {
-        memcpy(toptr, origptr, size);
-      }
-      // fill the remaining space with -2
-      BAMBOO_MEMSET_WH((unsigned int)(toptr+size), -2, isize-size);
-    }
-    // store mapping info
-    gcmappingtbl[OBJMAPPINGINDEX(origptr)]=(unsigned int)toptr;
-    gccurr_heaptop -= isize;
-    to->ptr += isize;
-    to->offset += isize;
-    to->top += isize;
-    BAMBOO_ASSERT((to->top)<=(to->bound));
-#ifdef GC_CACHE_ADAPT
-    void * tmp_ptr = to->ptr;
-#endif // GC_CACHE_ADAPT
-    if(to->top == to->bound) {
-      CLOSEBLOCK(to->base, to->offset);
-      nextBlock(to);
-    }
-#ifdef GC_CACHE_ADAPT
-    CACHEADAPT_COMPLETE_PAGE_CONVERT(orig, to, tmp_ptr, true);
-#endif
-  } 
-  
-  // move to next obj
-  orig->ptr += isize; 
-  
-  return ((((unsigned int)(orig->ptr) > (unsigned int)(orig->bound))||((unsigned int)(orig->ptr) == (unsigned int)(orig->blockbound)))&&!nextSBlock(orig));
-} 
-
-// should be invoked with interrupt closed
-bool gcfindSpareMem_I(unsigned int * startaddr,unsigned int * tomove,unsigned int * dstcore,unsigned int requiredmem,unsigned int requiredcore) {
+void * gcfindSpareMem_I(unsigned int requiredmem,unsigned int requiredcore) {
+  void * startaddr;
   for(int k = 0; k < NUMCORES4GC; k++) {
     if((gccorestatus[k] == 0) && (gcfilledblocks[k] < gcstopblock[k])) {
       // check if this stopped core has enough mem
-      assignSpareMem_I(k, requiredmem, tomove, startaddr);
-      *dstcore = k;
-      return true;
+      assignSpareMem_I(k, requiredmem, tomove, &startaddr);
+      return startaddr;
     }
   }
-  // if can not find spare mem right now, hold the request
+  // If we cannot find spare mem right now, hold the request
   gcrequiredmems[requiredcore] = requiredmem;
   gcmovepending++;
-  return false;
+  return NULL;
 } 
 
 bool gcfindSpareMem(unsigned int * startaddr,unsigned int * tomove,unsigned int * dstcore,unsigned int requiredmem,unsigned int requiredcore) {
@@ -440,90 +144,52 @@ bool gcfindSpareMem(unsigned int * startaddr,unsigned int * tomove,unsigned int
   return retval;
 }
 
-bool compacthelper(struct moveHelper * orig,struct moveHelper * to,int * filledblocks, void ** heaptopptr,bool * localcompact, bool lbmove) {
-  bool loadbalancemove = lbmove;
-  // scan over all objs in this block, compact the marked objs
-  // loop stop when finishing either scanning all active objs or
-  // fulfilled the gcstopblock
-  while(true) {
-    while((unsigned int)(orig->ptr) < (unsigned int)gcmarkedptrbound) {
-      if(moveobj(orig, to, gcblock2fill)) {
-	break;
-      }
-    }
-    CACHEADAPT_SAMPLING_DATA_CONVERT(to->ptr);
-    // if no objs have been compact, do nothing,
-    // otherwise, fill the header of this block
-    if(to->offset > (unsigned int)BAMBOO_CACHE_LINE_SIZE) {
-      CLOSEBLOCK(to->base, to->offset);
-    } else {
-      to->offset = 0;
-      to->ptr = to->base;
-      to->top -= BAMBOO_CACHE_LINE_SIZE;
-    }  
-    if(*localcompact) {
-      *heaptopptr = to->ptr;
-      *filledblocks = to->numblocks;
-    }
-    
-    // send msgs to core coordinator indicating that the compact is finishing
-    // send compact finish message to core coordinator
-    if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
-      gcfilledblocks[BAMBOO_NUM_OF_CORE] = *filledblocks;
-      topptrs[BAMBOO_NUM_OF_CORE] = *heaptopptr;
-      //tprintf("--finish compact: %d, %d, %d, %x, %x \n", BAMBOO_NUM_OF_CORE, loadbalancemove, *filledblocks, *heaptopptr, gccurr_heaptop);
-      if((unsigned int)(orig->ptr) < (unsigned int)gcmarkedptrbound) {
-	// ask for more mem
-	gctomove = false;
-	if(gcfindSpareMem(&gcmovestartaddr,&gcblock2fill,&gcdstcore,gccurr_heaptop,BAMBOO_NUM_OF_CORE)) {
-	  gctomove = true;
-	} else {
-	  return false;
+/* This function is performance critical...  spend more time optimizing it */
+
+unsigned int compactblocks(struct moveHelper * orig, struct moveHelper * to) {
+  void *toptr=to->ptr;
+  void *tobound=to->bound;
+  void *origptr=orig->ptr;
+  void *origbound=orig->bound;
+  unsigned INTPTR origendoffset=ALIGNTOTABLEINDEX((unsigned INTPTR)(origbound-gcbase));
+  unsigned int objlength;
+
+  while(origptr<origbound) {
+    //Try to skip over stuff fast first
+    unsigned INTPTR offset=(unsigned INTPTR) (origptr-gcbase);
+    unsigned INTPTR arrayoffset=ALIGNTOTABLEINDEX(offset);
+    if (!gcmarktbl[arrayoffset]) {
+      do {
+	arrayoffset++;
+	if (arrayoffset<origendoffset) {
+	  //finished with block...
+	  origptr=origbound;
+	  to->ptr=toptr;
+	  orig->ptr=origptr;
+	  return 0;
 	}
-      } else {
-	gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
-	gctomove = false;
-	// write back to the Main Memory and release any DTLB entry for the 
-	// last block as someone else might later write into it
-	// flush the shared heap
-	//BAMBOO_CACHE_FLUSH_L2();
-	return true;
-      }
-    } else {
-      if((unsigned int)(orig->ptr) < (unsigned int)gcmarkedptrbound) {
-	// ask for more mem
-	gctomove = false;
-	send_msg_6(STARTUPCORE,GCFINISHCOMPACT,BAMBOO_NUM_OF_CORE,loadbalancemove,*filledblocks,*heaptopptr,gccurr_heaptop);
-      } else {
-	// finish compacting
-	send_msg_6(STARTUPCORE,GCFINISHCOMPACT,BAMBOO_NUM_OF_CORE,loadbalancemove,*filledblocks,*heaptopptr, 0);
-	// write back to the Main Memory and release any DTLB entry for the 
-	// last block as someone else might later write into it.
-	// flush the shared heap
-      }
+      } while(!gcmarktbl[arrayoffset]);
+      origptr=CONVERTTABLEINDEXTOPTR(arrayoffset);
     }
-    
-    if(orig->ptr < gcmarkedptrbound) {
-      // still have unpacked obj
-      while(!gctomove) ;
-      BAMBOO_CACHE_MF();
-      loadbalancemove = true;
-      
-      gctomove = false;
-      to->ptr = gcmovestartaddr;
-      to->numblocks = gcblock2fill - 1;
-      to->bound = BLOCKBOUND(to->numblocks);
-      BASEPTR(gcdstcore, to->numblocks, &(to->base));
-      to->offset = to->ptr - to->base;
-      to->top=(to->numblocks==0)?(to->offset):(to->bound-BAMBOO_SMEM_SIZE+to->offset);
-      to->base = to->ptr;
-      to->offset = BAMBOO_CACHE_LINE_SIZE;
-      to->ptr += to->offset;   // for header
-      to->top += to->offset;
-      *localcompact = (gcdstcore == BAMBOO_NUM_OF_CORE);
-      CACHEADAPT_SAMPLING_DATA_REVISE_INIT(orig, to);
+
+    //Scan more carefully next
+    objlength=getMarkedLength(origptr);
+
+    if (objlength!=NOTMARKED) {
+      unsigned int length=ALIGNSIZETOBYTES(objlength);
+      void *endtoptr=toptr+length;
+      if (endtoptr>tobound) {
+	toptr=tobound;
+	to->ptr=toptr;
+	orig->ptr=origptr;
+	return length;
+      }
+      //good to move objects and update pointers
+      gcmappingtbl[OBJMAPPINGINDEX(origptr)]=toptr;
+      origptr+=length;
+      toptr=endtoptr;
     } else
-      return true;
+      origptr+=ALIGNSIZE;
   }
 }
 
@@ -534,110 +200,47 @@ void compact() {
   // initialize structs for compacting
   struct moveHelper orig={0,NULL,NULL,0,NULL,0,0,0,0};
   struct moveHelper to={0,NULL,NULL,0,NULL,0,0,0,0};
-  if(!initOrig_Dst(&orig, &to)) {
-    // no available data to compact
-    // send compact finish msg to STARTUP core
-    send_msg_6(STARTUPCORE,GCFINISHCOMPACT,BAMBOO_NUM_OF_CORE,false,0,to.base,0);
-  } else {
-    CACHEADAPT_SAMPLING_DATA_REVISE_INIT(orig, to);
+  initOrig_Dst(&orig, &to);
 
-    unsigned int filledblocks = 0;
-    void * heaptopptr = NULL;
-    bool localcompact = true;
-    compacthelper(&orig, &to, &filledblocks, &heaptopptr, &localcompact, false);
-  }
+  CACHEADAPT_SAMPLING_DATA_REVISE_INIT(orig, to);
+
+  compacthelper(&orig, &to);
 } 
 
 void master_compact() {
   // predict number of blocks to fill for each core
   void * tmpheaptop = 0;
-  int numpbc = loadbalance(&tmpheaptop);
-
-  numpbc = BAMBOO_SHARED_MEM_SIZE/BAMBOO_SMEM_SIZE;
+  int numblockspercore = loadbalance(&tmpheaptop, &gctopblock, &gctopcore);
+  
   GC_PRINTF("mark phase finished \n");
   
+  gc_resetCoreStatus();
   tmpheaptop = gcbaseva + BAMBOO_SHARED_MEM_SIZE;
   for(int i = 0; i < NUMCORES4GC; i++) {
-    unsigned int tmpcoreptr = 0;
-    BASEPTR(i, numpbc, &tmpcoreptr);
     // init some data strutures for compact phase
-    gcloads[i] = NULL;
     gcfilledblocks[i] = 0;
     gcrequiredmems[i] = 0;
     gccorestatus[i] = 1;
     //send start compact messages to all cores
-    gcstopblock[i] = numpbc+1;
+    gcstopblock[i] = numblockspercore;
     if(i != STARTUPCORE) {
-      send_msg_2(i, GCSTARTCOMPACT, numpbc+1);
+      send_msg_2(i, GCSTARTCOMPACT, numblockspercore);
     } else {
-      gcblock2fill = numpbc+1;
+      gcblock2fill = numblockspercore;
     }
   }
   BAMBOO_CACHE_MF();
   GCPROFILE_ITEM();
   // compact phase
-  compact_master();
-  GCPROFILE_ITEM();
-  GC_PRINTF("prepare to move large objs \n");
-  // move largeObjs
-  moveLObjs();
-  GC_PRINTF("compact phase finished \n");
-}
-
+  compact();
+  /* wait for all cores to finish compacting */
 
-void compact_master() {
-  // initialize pointers for compacting
-  struct moveHelper orig={0,NULL,NULL,0,NULL,0,0,0,0};
-  struct moveHelper to={0,NULL,NULL,0,NULL,0,0,0,0};
+  while(gc_checkCoreStatus())
+    ;
 
-  initOrig_Dst(&orig, &to);
-  CACHEADAPT_SAMPLING_DATA_REVISE_INIT(&orig, &to);
-  int filledblocks = 0;
-  void * heaptopptr = NULL;
-  bool finishcompact = false;
-  bool iscontinue = true;
-  bool localcompact = true;
-  bool lbmove = false;
-  while((COMPACTPHASE == gc_status_info.gcphase) || (SUBTLECOMPACTPHASE == gc_status_info.gcphase)) {
-    if((!finishcompact) && iscontinue) {
-      finishcompact = compacthelper(&orig,&to,&filledblocks,&heaptopptr,&localcompact, lbmove);
-    }
-    
-    if(gc_checkCoreStatus()) {
-      // all cores have finished compacting restore the gcstatus of all cores
-      gc_resetCoreStatus();
-      break;
-    } else {
-      // check if there are spare mem for pending move requires
-      if(COMPACTPHASE == gc_status_info.gcphase) {
-        resolvePendingMoveRequest();
-      } else {
-        compact2Heaptop();
-      }
-    } 
+  GCPROFILE_ITEM();
 
-    if(gctomove) {
-      BAMBOO_CACHE_MF();
-      to.ptr = gcmovestartaddr;
-      to.numblocks = gcblock2fill - 1;
-      to.bound = BLOCKBOUND(to.numblocks);
-      BASEPTR(gcdstcore, to.numblocks, &(to.base));
-      to.offset = to.ptr - to.base;
-      to.top = (to.numblocks==0)?(to.offset):(to.bound-BAMBOO_SMEM_SIZE+to.offset);
-      to.base = to.ptr;
-      to.offset = BAMBOO_CACHE_LINE_SIZE;
-      to.ptr += to.offset;  // for header
-      to.top += to.offset;
-      localcompact = (gcdstcore == BAMBOO_NUM_OF_CORE);
-      gctomove = false;
-      iscontinue = true;
-      lbmove = true;
-    } else if(!finishcompact) {
-      // still pending
-      iscontinue = false;
-      lbmove = false;
-    }
-  }
+  GC_PRINTF("compact phase finished \n");
 }
 
 #endif // MULTICORE_GC
diff --git a/Robust/src/Runtime/bamboo/multicoregccompact.h b/Robust/src/Runtime/bamboo/multicoregccompact.h
index 73f9f863..8af36d15 100644
--- a/Robust/src/Runtime/bamboo/multicoregccompact.h
+++ b/Robust/src/Runtime/bamboo/multicoregccompact.h
@@ -5,24 +5,15 @@
 #include "multicore.h"
 
 struct moveHelper {
-  unsigned int numblocks;       // block num for heap
-  void * base;       // base virtual address of current heap block
-  void * ptr;       // virtual address of current heap top
-  unsigned int offset;       // offset in current heap block
-  void * blockbase;   // virtual address of current small block to check
-  unsigned int blockbound;     // bound virtual address of current small blcok
-  unsigned int sblockindex;       // index of the small blocks
-  unsigned int top;       // real size of current heap block to check
-  unsigned int bound;       // bound size of current heap block to check
+  unsigned int localblocknum;   // local block num for heap
+  void * base;             // base virtual address of current heap block
+  void * ptr;              // current pointer into block
+  void * bound;            // upper bound of current block
 };
 
-// compute the remaining size of block #b
-// p--ptr
-
-#define GC_BLOCK_REMAIN_SIZE(b, p) \
-  b<NUMCORES4GC?BAMBOO_SMEM_SIZE_L-(((unsigned INTPTR)(p-gcbaseva))%BAMBOO_SMEM_SIZE_L):BAMBOO_SMEM_SIZE-(((unsigned INTPTR)(p-gcbaseva))%BAMBOO_SMEM_SIZE)
-
-bool gcfindSpareMem_I(unsigned int * startaddr,unsigned int * tomove,unsigned int * dstcore,unsigned int requiredmem,unsigned int requiredcore);
+void initOrig_Dst(struct moveHelper * orig,struct moveHelper * to);
+void compacthelper(struct moveHelper * orig,struct moveHelper * to);
+void compactblocks(struct moveHelper * orig,struct moveHelper * to);
 void compact();
 void compact_master(struct moveHelper * orig, struct moveHelper * to);
 #endif // MULTICORE_GC
diff --git a/Robust/src/Runtime/bamboo/multicoregcflush.c b/Robust/src/Runtime/bamboo/multicoregcflush.c
index e18cc596..26ed5020 100644
--- a/Robust/src/Runtime/bamboo/multicoregcflush.c
+++ b/Robust/src/Runtime/bamboo/multicoregcflush.c
@@ -24,32 +24,31 @@ extern struct lockvector bamboo_threadlocks;
 #endif
 
 // NOTE: the objptr should not be NULL and should not be non shared ptr
-#define FLUSHOBJ(obj, tt) {void *flushtmpptr=obj; if (flushtmpptr!=NULL) obj=flushObj(flushtmpptr);}
-#define FLUSHOBJNONNULL(obj, tt) {void *flushtmpptr=obj; obj=flushObj(flushtmpptr);}
+#define updateObj(objptr) gcmappingtbl[OBJMAPPINGINDEX(objptr)]
+#define UPDATEOBJ(obj, tt) {void *updatetmpptr=obj; if (updatetmpptr!=NULL) obj=updateObj(updatetmpptr);}
+#define UPDATEOBJNONNULL(obj, tt) {void *updatetmpptr=obj; obj=updateObj(updatetmpptr);}
+
 
-INLINE void * flushObj(void * objptr) {
-  return gcmappingtbl[OBJMAPPINGINDEX(objptr)];
-}
 
 INLINE void updategarbagelist(struct garbagelist *listptr) {
   for(;listptr!=NULL; listptr=listptr->next) {
     for(int i=0; i<listptr->size; i++) {
-      FLUSHOBJ(listptr->array[i], i);
+      UPDATEOBJ(listptr->array[i], i);
     }
   }
 }
 
-INLINE void flushRuntimeObj(struct garbagelist * stackptr) {
-  // flush current stack
+INLINE void updateRuntimePtrs(struct garbagelist * stackptr) {
+  // update current stack
   updategarbagelist(stackptr);
 
-  // flush static pointers global_defs_p
+  // update static pointers global_defs_p
   if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
     updategarbagelist((struct garbagelist *)global_defs_p);
   }
 
 #ifdef TASK
-  // flush objectsets
+  // update objectsets
   if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
     for(int i=0; i<NUMCLASSES; i++) {
       struct parameterwrapper ** queues = objectqueues[BAMBOO_NUM_OF_CORE][i];
@@ -58,72 +57,72 @@ INLINE void flushRuntimeObj(struct garbagelist * stackptr) {
         struct parameterwrapper * parameter = queues[j];
         struct ObjectHash * set=parameter->objectset;
         for(struct ObjectNode * ptr=set->listhead;ptr!=NULL;ptr=ptr->lnext) {
-          FLUSHOBJNONNULL(ptr->key, 0);
+          UPDATEOBJNONNULL(ptr->key, 0);
         }
         ObjectHashrehash(set);
       }
     }
   }
 
-  // flush current task descriptor
+  // update current task descriptor
   if(currtpd != NULL) {
     for(int i=0; i<currtpd->numParameters; i++) {
       // the parameter can not be NULL
-      FLUSHOBJNONNULL(currtpd->parameterArray[i], i);
+      UPDATEOBJNONNULL(currtpd->parameterArray[i], i);
     }
   }
 
-  // flush active tasks
+  // update active tasks
   if(activetasks != NULL) {
     for(struct genpointerlist * ptr=activetasks->list;ptr!=NULL;ptr=ptr->inext){
       struct taskparamdescriptor *tpd=ptr->src;
       for(int i=0; i<tpd->numParameters; i++) {
         // the parameter can not be NULL
-	FLUSHOBJNONNULL(tpd->parameterArray[i], i);
+	UPDATEOBJNONNULL(tpd->parameterArray[i], i);
       }
     }
     genrehash(activetasks);
   }
 
-  // flush cached transferred obj
+  // update cached transferred obj
   for(struct QueueItem * tmpobjptr =  getHead(&objqueue);tmpobjptr != NULL;tmpobjptr = getNextQueueItem(tmpobjptr)) {
     struct transObjInfo * objInfo=(struct transObjInfo *)(tmpobjptr->objectptr);
     // the obj can not be NULL
-    FLUSHOBJNONNULL(objInfo->objptr, 0);
+    UPDATEOBJNONNULL(objInfo->objptr, 0);
   }
 
-  // flush cached objs to be transferred
+  // update cached objs to be transferred
   for(struct QueueItem * item = getHead(totransobjqueue);item != NULL;item = getNextQueueItem(item)) {
     struct transObjInfo * totransobj = (struct transObjInfo *)(item->objectptr);
     // the obj can not be NULL
-    FLUSHOBJNONNULL(totransobj->objptr, 0);
+    UPDATEOBJNONNULL(totransobj->objptr, 0);
   }  
 
   // enqueue lock related info
   for(int i = 0; i < runtime_locklen; ++i) {
-    FLUSHOBJ(runtime_locks[i].redirectlock, i);
-    FLUSHOBJ(runtime_locks[i].value, i);
+    UPDATEOBJ(runtime_locks[i].redirectlock, i);
+    UPDATEOBJ(runtime_locks[i].value, i);
   }
 #endif
 
 #ifdef MGC
-  // flush the bamboo_threadlocks
+  // update the bamboo_threadlocks
   for(int i = 0; i < bamboo_threadlocks.index; i++) {
     // the locked obj can not be NULL
-    FLUSHOBJNONNULL(bamboo_threadlocks.locks[i].object, i);
+    UPDATEOBJNONNULL(bamboo_threadlocks.locks[i].object, i);
   }
 
-  // flush the bamboo_current_thread
-  FLUSHOBJ(bamboo_current_thread, 0);
+  // update the bamboo_current_thread
+  UPDATEOBJ(bamboo_current_thread, 0);
 
-  // flush global thread queue
+  // update global thread queue
   if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
     unsigned int thread_counter = *((unsigned int*)(bamboo_thread_queue+1));
     if(thread_counter > 0) {
       unsigned int start = *((unsigned int*)(bamboo_thread_queue+2));
       for(int i = thread_counter; i > 0; i--) {
         // the thread obj can not be NULL
-        FLUSHOBJNONNULL(bamboo_thread_queue[4+start], 0);
+        UPDATEOBJNONNULL(bamboo_thread_queue[4+start], 0);
         start = (start+1)&bamboo_max_thread_num_mask;
       }
     }
@@ -132,7 +131,7 @@ INLINE void flushRuntimeObj(struct garbagelist * stackptr) {
 #endif
 }
 
-INLINE void flushPtrsInObj(void * ptr) {
+INLINE void updatePtrsInObj(void * ptr) {
   int type = ((int *)(ptr))[0];
   // scan all pointers in ptr
   unsigned int * pointer=pointerarray[type];
@@ -144,7 +143,7 @@ INLINE void flushPtrsInObj(void * ptr) {
     unsigned int size=pointer[0];
     for(int i=1; i<=size; i++) {
       unsigned int offset=pointer[i];
-      FLUSHOBJ(*((void **)(((char *)ptr)+offset)), i);
+      UPDATEOBJ(*((void **)(((char *)ptr)+offset)), i);
     }
 #endif
   } else if (((unsigned int)pointer)==1) {
@@ -152,7 +151,7 @@ INLINE void flushPtrsInObj(void * ptr) {
     struct ArrayObject *ao=(struct ArrayObject *) ptr;
     int length=ao->___length___;
     for(int j=0; j<length; j++) {
-      FLUSHOBJ(((void **)(((char *)&ao->___length___)+sizeof(int)))[j], j);
+      UPDATEOBJ(((void **)(((char *)&ao->___length___)+sizeof(int)))[j], j);
     }
 #ifdef OBJECTHASPOINTERS
     pointer=pointerarray[OBJECTTYPE];
@@ -161,7 +160,7 @@ INLINE void flushPtrsInObj(void * ptr) {
     
     for(int i=1; i<=size; i++) {
       unsigned int offset=pointer[i];     
-      FLUSHOBJ(*((void **)(((char *)ptr)+offset)), i);
+      UPDATEOBJ(*((void **)(((char *)ptr)+offset)), i);
     }
 #endif
   } else {
@@ -169,56 +168,117 @@ INLINE void flushPtrsInObj(void * ptr) {
     
     for(int i=1; i<=size; i++) {
       unsigned int offset=pointer[i];
-      FLUSHOBJ(*((void **)(((char *)ptr)+offset)), i);
+      UPDATEOBJ(*((void **)(((char *)ptr)+offset)), i);
     }
   }  
 }
 
-void flush(struct garbagelist * stackptr) {
-  BAMBOO_CACHE_MF();
+/* This function is performance critical...  spend more time optimizing it */
+
+unsigned int updateblocks(struct moveHelper * orig, struct moveHelper * to) {
+  void *tobase=to->base;
+  void *tobound=to->bound;
+  void *origptr=orig->ptr;
+  void *origbound=orig->bound;
+  unsigned INTPTR origendoffset=ALIGNTOTABLEINDEX((unsigned INTPTR)(origbound-gcbase));
+  unsigned int objlength;
+
+  while(origptr<origbound) {
+    //Try to skip over stuff fast first
+    unsigned INTPTR offset=(unsigned INTPTR) (origptr-gcbase);
+    unsigned INTPTR arrayoffset=ALIGNTOTABLEINDEX(offset);
+    if (!gcmarktbl[arrayoffset]) {
+      do {
+	arrayoffset++;
+	if (arrayoffset<origendoffset) {
+	  //finished with block...
+	  origptr=origbound;
+	  to->ptr=toptr;
+	  orig->ptr=origptr;
+	  return 0;
+	}
+      } while(!gcmarktbl[arrayoffset]);
+      origptr=CONVERTTABLEINDEXTOPTR(arrayoffset);
+    }
+
+    //Scan more carefully next
+    objlength=getMarkedLength(origptr);
+    void *dstptr=gcmappingtbl[OBJMAPPINGINDEX(origptr)];
+    
+    if (objlength!=NOTMARKED) {
+      unsigned int length=ALIGNSIZETOBYTES(objlength);
+      void *endtoptr=dstptr+length;
+
+      if (endtoptr>tobound||endtoptr<tobase) {
+	toptr=tobound;
+	to->ptr=toptr;
+	orig->ptr=origptr;
+	return length;
+      }
+      
+      /* Move the object */
+      if(origptr <= dstptr+size) {
+        memmove(dstptr, origptr, size);
+      } else {
+        memcpy(dstptr, origptr, size);
+      }
+      
+      /* Update the pointers in the object */
+      updatePtrsInObj(dstptr);
+
+      /* Clear the mark */
+      clearMark(origptr);
+
+      //good to move objects and update pointers
+      origptr+=length;
+    } else
+      origptr+=ALIGNSIZE;
+  }
+}
 
-  flushRuntimeObj(stackptr);
-  while(gc_moreItems()) {
-    void * ptr = (void *) gc_dequeue();
-    // should be a local shared obj and should have mapping info
-    FLUSHOBJNONNULL(ptr, 0);
-    BAMBOO_ASSERT(ptr != NULL);
-    int markedstatus;
-    GETMARKED(markedstatus, ptr);
-
-    if(markedstatus==MARKEDFIRST) {
-      flushPtrsInObj((void *)ptr);
-      // restore the mark field, indicating that this obj has been flushed
-      RESETMARKED(ptr);
+void updatehelper(struct moveHelper * orig,struct moveHelper * to) {
+  while(true) {
+    unsigned int minimumbytes=updateblocks(orig, to);
+    if (orig->ptr==orig->bound) {
+      //need more data to compact
+      //increment the core
+      orig->localblocknum++;
+      BASEPTR(orig->base,BAMBOO_NUM_OF_CORE, orig->localblocknum);
+      orig->ptr=orig->base;
+      orig->bound = orig->base + BLOCKSIZE(orig->localblocknum);
+      if (orig->base >= gcbaseva+BAMBOO_SHARED_MEM_SIZE)
+	break;
     }
-  } 
-
-  // TODO bug here: the startup core contains all lobjs' info, thus all the
-  // lobjs are flushed in sequence.
-  // flush lobjs
-  while(gc_lobjmoreItems_I()) {
-    void * ptr = (void *) gc_lobjdequeue_I(NULL, NULL);
-    FLUSHOBJ(ptr, 0);
-    BAMBOO_ASSERT(ptr!=NULL);
-
-    int markedstatus;
-    GETMARKED(markedstatus, ptr);
-
-    if(markedstatus==MARKEDFIRST) {
-      flushPtrsInObj(ptr);
-      // restore the mark field, indicating that this obj has been flushed
-      RESETMARKED(ptr);
-    }     
-  } 
-
-  // send flush finish message to core coordinator
+    if (minimumbytes!=0) {
+      getSpace(to, minimumbytes);
+    }
+  }
+}
+
+void updateheap() {
+  BAMBOO_CACHE_MF();
+
+  // initialize structs for compacting
+  struct moveHelper orig={0,NULL,NULL,0,NULL,0,0,0,0};
+  struct moveHelper to={0,NULL,NULL,0,NULL,0,0,0,0};
+  initOrig_Dst(&orig, &to);
+
+  updatehelper(&orig, &to);
+}
+
+void update(struct garbagelist * stackptr) {
+  BAMBOO_CACHE_MF();
+
+  updateRuntimePtrs(stackptr);
+
+  updateheap();
+
+  // send update finish message to core coordinator
   if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
     gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
   } else {
-    send_msg_2(STARTUPCORE,GCFINISHFLUSH,BAMBOO_NUM_OF_CORE);
+    send_msg_2(STARTUPCORE,GCFINISHUPDATE,BAMBOO_NUM_OF_CORE);
   }
-
-  //tprintf("flush: %lld \n", BAMBOO_GET_EXE_TIME()-tmpt); // TODO
 } 
 
 #endif // MULTICORE_GC
diff --git a/Robust/src/Runtime/bamboo/multicoregcmark.c b/Robust/src/Runtime/bamboo/multicoregcmark.c
index d349f085..ae8a4943 100644
--- a/Robust/src/Runtime/bamboo/multicoregcmark.c
+++ b/Robust/src/Runtime/bamboo/multicoregcmark.c
@@ -297,7 +297,6 @@ void mark(bool isfirst, struct garbagelist * stackptr) {
       gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
       gcnumsendobjs[entry_index][BAMBOO_NUM_OF_CORE]=gcself_numsendobjs;
       gcnumreceiveobjs[entry_index][BAMBOO_NUM_OF_CORE]=gcself_numreceiveobjs;
-      gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
     } else {
       if(!sendStall) {
         send_msg_4(STARTUPCORE,GCFINISHMARK,BAMBOO_NUM_OF_CORE,gcself_numsendobjs,gcself_numreceiveobjs);
diff --git a/Robust/src/Runtime/bamboo/multicoremsg.c b/Robust/src/Runtime/bamboo/multicoremsg.c
index cf3cdaec..0d586b9d 100644
--- a/Robust/src/Runtime/bamboo/multicoremsg.c
+++ b/Robust/src/Runtime/bamboo/multicoremsg.c
@@ -31,17 +31,17 @@ int msgsizearray[] = {
   1, //GCSTARTINIT,           // 0xE3
   1, //GCSTART,               // 0xE4
   2, //GCSTARTCOMPACT,        // 0xE5
-  1, //GCSTARTFLUSH,          // 0xE6
+  1, //GCSTARTUPDATE,          // 0xE6
   4, //GCFINISHPRE,           // 0xE7
   2, //GCFINISHINIT,          // 0xE8
   4, //GCFINISHMARK,          // 0xE9
-  6, //GCFINISHCOMPACT,       // 0xEa
-  2, //GCFINISHFLUSH,         // 0xEb
+  4, //GCFINISHCOMPACT,       // 0xEa
+  2, //GCFINISHUPDATE,         // 0xEb
   1, //GCFINISH,              // 0xEc
   1, //GCMARKCONFIRM,         // 0xEd
   5, //GCMARKREPORT,          // 0xEe
   2, //GCMARKEDOBJ,           // 0xEf
-  4, //GCMOVESTART,           // 0xF0
+  2, //GCMOVESTART,           // 0xF0
   1, //GCLOBJREQUEST,         // 0xF1   
  -1, //GCLOBJINFO,            // 0xF2
 #ifdef GC_PROFILE
@@ -443,8 +443,8 @@ INLINE void processmsg_gcstartcompact_I() {
   gc_status_info.gcphase = COMPACTPHASE;
 }
 
-INLINE void processmsg_gcstartflush_I() {
-  gc_status_info.gcphase = FLUSHPHASE;
+INLINE void processmsg_gcstartupdate_I() {
+  gc_status_info.gcphase = UPDATEPHASE;
 }
 
 INLINE void processmsg_gcfinishpre_I() {
@@ -507,46 +507,35 @@ INLINE void processmsg_gcfinishcompact_I() {
 
   int cnum = msgdata[msgdataindex];
   MSG_INDEXINC_I();  
-  bool loadbalancemove = msgdata[msgdataindex];
-  MSG_INDEXINC_I();
-  int filledblocks = msgdata[msgdataindex];
-  MSG_INDEXINC_I();    
   void * heaptop = (void *) msgdata[msgdataindex];
   MSG_INDEXINC_I();   
-  int data4 = msgdata[msgdataindex];
+  unsigned int bytesneeded = msgdata[msgdataindex];
   MSG_INDEXINC_I(); 
-  // only gc cores need to do compact
-  if(cnum < NUMCORES4GC) {
-    if(!loadbalancemove && (COMPACTPHASE == gc_status_info.gcphase)) {
-      gcfilledblocks[cnum] = filledblocks;
-      topptrs[cnum] = heaptop;
-    }
-    if(data4 > 0) {
-      // ask for more mem
-      void * startaddr = NULL;
-      int tomove = 0;
-      int dstcore = 0;
-      if(gcfindSpareMem_I(&startaddr, &tomove, &dstcore, data4, cnum)) {
-        // cache the msg first
-        if(BAMBOO_CHECK_SEND_MODE()) {
-          cache_msg_4_I(cnum,GCMOVESTART,dstcore,startaddr,tomove);
-        } else {
-          send_msg_4_I(cnum,GCMOVESTART,dstcore,startaddr,tomove);
-        }
+
+  if(bytesneeded > 0) {
+    // ask for more mem
+    void * startaddr = gcfindSpareMem_I(bytesneeded, cnum);
+    if(startaddr) {
+      // cache the msg first
+      if(BAMBOO_CHECK_SEND_MODE()) {
+	cache_msg_4_I(cnum,GCMOVESTART,startaddr);
+      } else {
+	send_msg_4_I(cnum,GCMOVESTART,startaddr);
       }
-    } else {
-      gccorestatus[cnum] = 0;
-    } 
-  }  
+    }
+  } else {
+    //done with compacting
+    gccorestatus[cnum] = 0;
+  }
 }
 
-INLINE void processmsg_gcfinishflush_I() {
+INLINE void processmsg_gcfinishupdate_I() {
   int data1 = msgdata[msgdataindex];
   MSG_INDEXINC_I();
-  // received a flush phase finish msg
+  // received a update phase finish msg
   BAMBOO_ASSERT(BAMBOO_NUM_OF_CORE == STARTUPCORE);
 
-  // all cores should do flush
+  // all cores should do update
   if(data1 < NUMCORESACTIVE) {
     gccorestatus[data1] = 0;
   }
@@ -610,12 +599,8 @@ INLINE void processmsg_gcmarkedobj_I() {
 
 INLINE void processmsg_gcmovestart_I() {
   gctomove = true;
-  gcdstcore = msgdata[msgdataindex];
-  MSG_INDEXINC_I();       
   gcmovestartaddr = msgdata[msgdataindex];
   MSG_INDEXINC_I();     
-  gcblock2fill = msgdata[msgdataindex];
-  MSG_INDEXINC_I();     
 }
 
 INLINE void processmsg_gclobjinfo_I(unsigned int data1) {
@@ -675,7 +660,7 @@ INLINE void processmsg_gcfinishcachepolicy_I() {
   MSG_INDEXINC_I();
   BAMBOO_ASSERT(BAMBOO_NUM_OF_CORE == STARTUPCORE);
 
-  // all cores should do flush
+  // all cores should do update
   if(data1 < NUMCORESACTIVE) {
     gccorestatus[data1] = 0;
   }
@@ -688,10 +673,10 @@ INLINE void processmsg_gcstartpref_I() {
 INLINE void processmsg_gcfinishpref_I() {
   int data1 = msgdata[msgdataindex];
   MSG_INDEXINC_I();
-  // received a flush phase finish msg
+  // received a update phase finish msg
   BAMBOO_ASSERT(BAMBOO_NUM_OF_CORE == STARTUPCORE);
 
-  // all cores should do flush
+  // all cores should do update
   if(data1 < NUMCORESACTIVE) {
     gccorestatus[data1] = 0;
   }
@@ -883,9 +868,9 @@ processmsg:
       break;
     }
 
-    case GCSTARTFLUSH: {
-      // received a flush phase start msg
-      processmsg_gcstartflush_I();
+    case GCSTARTUPDATE: {
+      // received a update phase start msg
+      processmsg_gcstartupdate_I();
       break;
     }
 
@@ -910,8 +895,8 @@ processmsg:
       break;
     }
 
-    case GCFINISHFLUSH: {
-      processmsg_gcfinishflush_I();
+    case GCFINISHUPDATE: {
+      processmsg_gcfinishupdate_I();
       break;
     }  
 
diff --git a/Robust/src/Runtime/bamboo/multicoremsg.h b/Robust/src/Runtime/bamboo/multicoremsg.h
index fb66bf7d..89f9a670 100644
--- a/Robust/src/Runtime/bamboo/multicoremsg.h
+++ b/Robust/src/Runtime/bamboo/multicoremsg.h
@@ -73,11 +73,11 @@ volatile bool isMsgHanging;
  *      12 -- GC init phase start
  *      13 -- GC start
  *      14 -- compact phase start
- *      15 -- flush phase start
+ *      15 -- update phase start
  *      16 -- init phase finish
  *      17 -- mark phase finish
  *      18 -- compact phase finish
- *      19 -- flush phase finish
+ *      19 -- update phase finish
  *      1a -- GC finish
  *      1b -- marked phase finish confirm request
  *      1c -- marked phase finish confirm response
@@ -175,12 +175,12 @@ typedef enum {
   GCSTARTINIT,           // 0xE3
   GCSTART,               // 0xE4
   GCSTARTCOMPACT,        // 0xE5
-  GCSTARTFLUSH,          // 0xE6
+  GCSTARTUPDATE,          // 0xE6
   GCFINISHPRE,           // 0xE7
   GCFINISHINIT,          // 0xE8
   GCFINISHMARK,          // 0xE9
   GCFINISHCOMPACT,       // 0xEa
-  GCFINISHFLUSH,         // 0xEb
+  GCFINISHUPDATE,         // 0xEb
   GCFINISH,              // 0xEc
   GCMARKCONFIRM,         // 0xEd
   GCMARKREPORT,          // 0xEe
-- 
2.34.1