changes on compact phase
authorjzhou <jzhou>
Tue, 18 Aug 2009 02:16:01 +0000 (02:16 +0000)
committerjzhou <jzhou>
Tue, 18 Aug 2009 02:16:01 +0000 (02:16 +0000)
Robust/src/Runtime/multicoregarbage.c
Robust/src/Runtime/multicoregarbage.h
Robust/src/Runtime/multicoreruntime.h
Robust/src/Runtime/multicoretask.c

index f18814811ef83bcf30c7f9d6f8e7da1431e83c4c..d609d2a89790ae711b23f8da6121fd83d7a210db 100644 (file)
@@ -9,73 +9,135 @@ extern struct genhashtable * activetasks;
 extern struct parameterwrapper ** objectqueues[][NUMCLASSES];
 extern struct taskparamdescriptor *currtpdo;
 
-struct largeObjList {
-       struct largeObjItem * head;
-       struct largeObjItem * tail;
-};
-
-struct largeObjList lObjList;
-
-#define NUMPTRS 100
-
-void gc_enqueue(void *ptr) {
+inline void gc_enqueue(void *ptr) {
   if (gcheadindex==NUMPTRS) {
     struct pointerblock * tmp;
     if (gcspare!=NULL) {
       tmp=gcspare;
       gcspare=NULL;
-    } else
+    } else {
       tmp=malloc(sizeof(struct pointerblock));
+               } // if (gcspare!=NULL)
     gchead->next=tmp;
     gchead=tmp;
     gcheadindex=0;
-  }
+  } // if (gcheadindex==NUMPTRS)
   gchead->ptrs[gcheadindex++]=ptr;
-}
+} // void gc_enqueue(void *ptr)
 
 // dequeue and destroy the queue
-void * gc_dequeue() {
+inline void * gc_dequeue() {
   if (gctailindex==NUMPTRS) {
-    struct pointerblock *tmp=tail;
+    struct pointerblock *tmp=gctail;
     gctail=gctail->next;
     gctailindex=0;
-    if (gcspare!=NULL)
+    if (gcspare!=NULL) {
       free(tmp);
-    else
+               } else {
       gcspare=tmp;
-  }
+               } // if (gcspare!=NULL)
+  } // if (gctailindex==NUMPTRS)
   return gctail->ptrs[gctailindex++];
-}
+} // void * gc_dequeue()
 
 // dequeue and do not destroy the queue
-void * gc_dequeue2() {
+inline void * gc_dequeue2() {
        if (gctailindex2==NUMPTRS) {
-    struct pointerblock *tmp=tail;
+    struct pointerblock *tmp=gctail2;
     gctail2=gctail2->next;
     gctailindex2=0;
-  }
+  } // if (gctailindex2==NUMPTRS)
   return gctail2->ptrs[gctailindex2++];
-}
+} // void * gc_dequeue2() 
 
-int gc_moreItems() {
+inline int gc_moreItems() {
   if ((gchead==gctail)&&(gctailindex==gcheadindex))
     return 0;
   return 1;
-}
+} // int gc_moreItems() 
 
-int gc_moreItems2() {
+inline int gc_moreItems2() {
   if ((gchead==gctail2)&&(gctailindex2==gcheadindex))
     return 0;
   return 1;
-}
+} // int gc_moreItems2()
+
+// enqueue a large obj: start addr & length
+inline void gc_lobjenqueue(void *ptr, 
+                                      int length, 
+                                                                                      int host = 0) {
+  if (gclobjheadindex==NUMLOBJPTRS) {
+    struct lobjpointerblock * tmp;
+    if (gclobjspare!=NULL) {
+      tmp=gclobjspare;
+      gclobjspare=NULL;
+    } else {
+      tmp=malloc(sizeof(struct lobjpointerblock));
+               } // if (gclobjspare!=NULL)
+    gclobjhead->next=tmp;
+    gclobjhead=tmp;
+    gclobjheadindex=0;
+  } // if (gclobjheadindex==NUMLOBJPTRS)
+  gclobjhead->lobjs[gclobjheadindex]=ptr;
+       gclobjhead->lengths[gclobjheadindex]=length;
+       gclobjhead->hosts[gclobjheadindex]=host;
+       /*if(oirg == NULL) {
+               gclobjhead->origs[gclobjheadindex++]=ptr;
+       } else {
+               gclobjhead->origs[gclobjheadindex++]=orig;
+       }*/
+} // void gc_lobjenqueue(void *ptr...)
+
+// dequeue and destroy the queue
+inline void * gc_lobjdequeue(int * length
+                                        int * host) {
+  if (gclobjtailindex==NUMLOBJPTRS) {
+    struct lobjpointerblock *tmp=gclobjtail;
+    gclobjtail=gclobjtail->next;
+    gclobjtailindex=0;
+    if (gclobjspare!=NULL) {
+      free(tmp);
+               } else {
+      gclobjspare=tmp;
+               } // if (gclobjspare!=NULL)
+  } // if (gclobjtailindex==NUMLOBJPTRS)
+       if(length != NULL) {
+               *length = gclobjtail->lengths[gclobjtailindex];
+       }
+       if(host != NULL) {
+               *host = (int)(gclobjtail->hosts[gclobjtailindex]);
+       }
+  return gclobjtail->lobjs[gclobjtailindex++];
+} // void * gc_lobjdequeue()
+
+inline int gc_lobjmoreItems() {
+  if ((gclobjhead==gclobjtail)&&(gclobjtailindex==gclobjheadindex))
+    return 0;
+  return 1;
+} // int gc_lobjmoreItems()
+
+// dequeue and don't destroy the queue
+inline void gc_lobjdequeue2() {
+  if (gclobjtailindex2==NUMLOBJPTRS) {
+    gclobjtail2=gclobjtail2->next;
+    gclobjtailindex2=1;
+  } else {
+               gclobjtailindex2++;
+       }// if (gclobjtailindex2==NUMLOBJPTRS)
+} // void * gc_lobjdequeue2()
+
+inline int gc_lobjmoreItems2() {
+  if ((gclobjhead==gclobjtail2)&&(gclobjtailindex2==gclobjheadindex))
+    return 0;
+  return 1;
+} // int gc_lobjmoreItems2()
 
 INTPTR curr_heaptop = 0;
 INTPTR curr_heapbound = 0;
 
-bool isLarge(void * ptr, 
-                        int * ttype, 
-                                                int * tsize) {
-       // check if a pointer is referring to a large object
+inline void gettype_size(void * ptr, 
+                                    int * ttype, 
+                                                                        int * tsize) {
        int type = ((int *)ptr)[0];
        int size = 0;
        if(type < NUMCLASSES) {
@@ -87,13 +149,20 @@ bool isLarge(void * ptr,
                int elementsize=classsize[type];
                int length=ao->___length___; 
                size=sizeof(struct ArrayObject)+length*elementsize;
-       }
+       } // if(type < NUMCLASSES)
        *ttype = type;
        *tsize = size;
-       return(!isLocal(ptr + size));
 }
 
-int hostcore(void * ptr) {
+inline bool isLarge(void * ptr, 
+                               int * ttype, 
+                                                                               int * tsize) {
+       // check if a pointer is referring to a large object
+       gettype_size(ptr, ttype, tsize);
+       return(!isLocal(ptr + size));
+} // bool isLarge(void * ptr, int * ttype, int * tsize)
+
+inline int hostcore(void * ptr) {
        // check the host core of ptr
        int host = 0;
        int x = 0;
@@ -101,150 +170,90 @@ int hostcore(void * ptr) {
        RESIDECORE(ptr, &x, &y);
        host = (x==0)?(x*bamboo_height+y):(x*bamboo_height+y-2);
        return host;
-}
+} // int hostcore(void * ptr)
 
-bool isLocal(void * ptr) {
+inline bool isLocal(void * ptr) {
        // check if a pointer is in shared heap on this core
        return hostcore(ptr) == BAMBOO_NUM_OF_CORE;
-}
+} // bool isLocal(void * ptr)
 
-void transferMarkResults() {
-       // TODO, need distiguish between send and cache
+inline void transferMarkResults() {
        // invoked inside interruptiong handler
-       int msgsize = 4;
-  int i = 0;
-
-       // TODO check large objs here
-
-  isMsgSending = true;
-  DynamicHeader msgHdr = tmc_udn_header_from_cpu(STARTUPCORE);
-
-       // send header
-  __tmc_udn_send_header_with_size_and_tag(msgHdr, msgsize, 
-                                                           UDN0_DEMUX_TAG);  
-#ifdef DEBUG
-  BAMBOO_DEBUGPRINT(0xbbbb);
-  BAMBOO_DEBUGPRINT(0xb000 + STARTUPCORE);       // targetcore
-#endif
-  udn_send(GCLOBJINFO);
-#ifdef DEBUG
-  BAMBOO_DEBUGPRINT(GCLOBJINFO);
-#endif
-  udn_send(msgsize);
-#ifdef DEBUG
-  BAMBOO_DEBUGPRINT_REG(msgsize);
-#endif
-       udn_send(BAMBOO_NUM_OF_CORE);
-#ifdef DEBUG
-  BAMBOO_DEBUGPRINT_REG(BAMBOO_NUM_OF_CORE);
-#endif
-       udn_send(curr_heaptop);
-#ifdef DEBUG
-  BAMBOO_DEBUGPRINT_REG(curr_heaptop);
-#endif
-       // TODO large objs here
-       
-#ifdef DEBUG
-  BAMBOO_DEBUGPRINT(0xffff);
-#endif
-
-  // end of sending this msg, set sand msg flag false
-  isMsgSending = false;
-  send_hanging_msg();
-}
-
-void transferCompactStart(int core) {
-       // send start compact messages to all cores
-       // TODO no large obj info
-  int msgsize = 3;
+       int msgsize = 5 + gcnumlobjs;
   int i = 0;
-       int ismove = 0;
-       int movenum = 0;
-
-       // both lcore and rcore have the same action: either 
-       // move objs or have incoming objs
-       if(gcdeltal[core] > 0) {
-               ismove = 0; // have incoming objs
-               movenum++;
-       } else if(gcdeltal[core] < 0) {
-               ismove = 1; // have objs to move
-               movenum++;
-       } 
-       if(gcdeltar[core] > 0) {
-               ismove = 0; // have incoming objs
-               movenum++;
-       } else if(gcdeltar[core] < 0) {
-               ismove = 1; // have objs to move
-               movenum++;
-       }
-       msgsize += (movenum == 0) ? 0 : 2 + movenum * 2;
 
-  isMsgSending = true;
-  DynamicHeader msgHdr = tmc_udn_header_from_cpu(core);
+  if(isMsgSending) {
+               // cache the msg
+               isMsgHanging = true;
+               // cache the msg in outmsgdata and send it later
+               // msglength + target core + msg
+               OUTMSG_CACHE(msgsize);
+               OUTMSG_CACHE(STARTUPCORE);
+               OUTMSG_CACHE(GCLOBJINFO);
+               OUTMSG_CACHE(msgsize);
+               OUTMSG_CACHE(curr_heaptop);
+               OUTMSG_CACHE(gcmarkedptrbound);
+               // large objs here
+               void * lobj = NULL;
+               int length = 0;
+               while(gc_lobjmoreItems()) {
+                       lobj = gc_lobjdequeue(&length);
+                       OUTMSG_CACHE(lobj);
+                       OUTMSG_CACHE(length);
+               } // while(gc_lobjmoreItems())
+       } else {
+               DynamicHeader msgHdr = tmc_udn_header_from_cpu(STARTUPCORE);
 
-       // send header
-  __tmc_udn_send_header_with_size_and_tag(msgHdr, msgsize, 
-                                                           UDN0_DEMUX_TAG);  
-#ifdef DEBUG
-  BAMBOO_DEBUGPRINT(0xbbbb);
-  BAMBOO_DEBUGPRINT(0xb000 + core);       // targetcore
-#endif
-  udn_send(GCSTARTCOMPACT);
-#ifdef DEBUG
-  BAMBOO_DEBUGPRINT(GCSTARTCOMPACT);
-#endif
-  udn_send(msgsize);
+               // send header
+               __tmc_udn_send_header_with_size_and_tag(msgHdr, msgsize, 
+                                                                                                                                                                               UDN0_DEMUX_TAG);  
 #ifdef DEBUG
-  BAMBOO_DEBUGPRINT_REG(msgsize);
+               BAMBOO_DEBUGPRINT(0xbbbb);
+               BAMBOO_DEBUGPRINT(0xb000 + STARTUPCORE);       // targetcore
 #endif
-       udn_send(gcreloads[core]);
+               udn_send(GCLOBJINFO);
 #ifdef DEBUG
-  BAMBOO_DEBUGPRINT_REG(gcreloads[core]);
+               BAMBOO_DEBUGPRINT(GCLOBJINFO);
 #endif
-       if(movenum > 0) {
-               udn_send(movenum);
+               udn_send(msgsize);
 #ifdef DEBUG
-               BAMBOO_DEBUGPRINT_REG(movenum);
+               BAMBOO_DEBUGPRINT_REG(msgsize);
 #endif
-               udn_send(ismove);
+               udn_send(BAMBOO_NUM_OF_CORE);
 #ifdef DEBUG
-               BAMBOO_DEBUGPRINT_REG(ismove);
+               BAMBOO_DEBUGPRINT_REG(BAMBOO_NUM_OF_CORE);
 #endif
-               int dst = 0;
-               if(gcdeltal[core] != 0) {
-                       LEFTNEIGHBOUR(core, &dst);
-                       udn_send(abs(gcdeltal[core]));
+               udn_send(curr_heaptop);
 #ifdef DEBUG
-                       BAMBOO_DEBUGPRINT_REG(abs(gcdeltal[core]));
+               BAMBOO_DEBUGPRINT_REG(curr_heaptop);
 #endif
-                       udn_send(dst);
+               udn_send(gcmarkedptrbound);
 #ifdef DEBUG
-                       BAMBOO_DEBUGPRINT_REG(dst);
+               BAMBOO_DEBUGPRINT_REG(gcmarkedptrbound);
 #endif
-               }
-               if(gcdeltar[core] != 0) {
-                       RIGHTNEIGHBOUR(core, &dst);
-                       udn_send(abs(gcdeltar[core]));
+               // large objs here
+               void * lobj = NULL;
+               int length = 0;
+               while(gc_lobjmoreItems()) {
+                       lobj = gc_lobjdequeue(&length);
+                       OUTMSG_CACHE(lobj);
 #ifdef DEBUG
-                       BAMBOO_DEBUGPRINT_REG(abs(gcdeltar[core]));
+                       BAMBOO_DEBUGPRINT_REG(lobj);
 #endif
-                       udn_send(dst);
+                       OUTMSG_CACHE(length);
 #ifdef DEBUG
-                       BAMBOO_DEBUGPRINT_REG(dst);
+                       BAMBOO_DEBUGPRINT_REG(length);
 #endif
-               }
-       }
+               } // while(gc_lobjmoreItems())
+               
 #ifdef DEBUG
-  BAMBOO_DEBUGPRINT(0xffff);
+               BAMBOO_DEBUGPRINT(0xffff);
 #endif
+       } // if(isMsgSending)
+} // void transferMarkResults() 
 
-  // end of sending this msg, set sand msg flag false
-  isMsgSending = false;
-  send_hanging_msg();
-}
-
-void checkMarkStatue() {
-       if((!gcwaitconfirm) || 
+inline void checkMarkStatue() {
+       if((!waitconfirm) || 
                        (waitconfirm && (numconfirm == 0))) {
                BAMBOO_START_CRITICAL_SECTION_STATUS();  
                gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
@@ -256,18 +265,18 @@ void checkMarkStatue() {
                        if(gccorestatus[i] != 0) {
                                allStall = false;
                                break;
-                       }
-               }
+                       } // if(gccorestatus[i] != 0)
+               } // for(i = 0; i < NUMCORES; ++i)
                if(allStall) {
                        // check if the sum of send objs and receive obj are the same
                        // yes->check if the info is the latest; no->go on executing
                        int sumsendobj = 0;
                        for(i = 0; i < NUMCORES; ++i) {
                                sumsendobj += gcnumsendobjs[i];
-                       }               
+                       } // for(i = 0; i < NUMCORES; ++i) 
                        for(i = 0; i < NUMCORES; ++i) {
                                sumsendobj -= gcnumreceiveobjs[i];
-                       }
+                       } // for(i = 0; i < NUMCORES; ++i) 
                        if(0 == sumsendobj) {
                                if(!waitconfirm) {
                                        // the first time found all cores stall
@@ -280,7 +289,7 @@ void checkMarkStatue() {
                                                gccorestatus[i] = 1;
                                                // send mark phase finish confirm request msg to core i
                                                send_msg_1(i, GCMARKCONFIRM);
-                                       }
+                                       } // for(i = 1; i < NUMCORES; ++i) 
                                } else {
                                        // all the core status info are the latest
                                        // stop mark phase
@@ -288,15 +297,15 @@ void checkMarkStatue() {
                                        // restore the gcstatus for all cores
                                        for(i = 0; i < NUMCORES; ++i) {
                                                gccorestatus[i] = 1;
-                                       }
+                                       } // for(i = 0; i < NUMCORES; ++i)
                                } // if(!gcwautconfirm) else()
                        } // if(0 == sumsendobj)
                } // if(allStall)
                BAMBOO_CLOSE_CRITICAL_SECTION_STATUS();
-       } // if((!gcwaitconfirm)...
-}
+       } // if((!waitconfirm)...
+} // void checkMarkStatue()
 
-bool preGC() {
+inline bool preGC() {
        // preparation for gc
        // make sure to clear all incoming msgs espacially transfer obj msgs
        int i;
@@ -310,7 +319,7 @@ bool preGC() {
                        corestatus[i] = 1;
                        // send status confirm msg to core i
                        send_msg_1(i, STATUSCONFIRM);
-               }
+               } // for(i = 1; i < NUMCORES; ++i)
 
                while(numconfirm != 0) {} // wait for confirmations
                numsendobjs[BAMBOO_NUM_OF_CORE] = self_numsendobjs;
@@ -318,118 +327,187 @@ bool preGC() {
                int sumsendobj = 0;
                for(i = 0; i < NUMCORES; ++i) {
                        sumsendobj += numsendobjs[i];
-               }               
+               } // for(i = 1; i < NUMCORES; ++i)      
                for(i = 0; i < NUMCORES; ++i) {
                        sumsendobj -= numreceiveobjs[i];
-               }
+               } // for(i = 1; i < NUMCORES; ++i)
                if(0 == sumsendobj) {
                        return true;
                } else {
                        // still have some transfer obj msgs on-the-fly, can not start gc
                        return false;
-               }
+               } // if(0 == sumsendobj) 
        } else {
                // previously asked for status confirmation and do not have all the 
                // confirmations yet, can not start gc
                return false;
-       }
-}
+       } // if((!waitconfirm) || 
+} // bool preGC()
 
 // compute load balance for all cores
-void loadbalance() {
+inline int loadbalance(int heaptop) {
        // compute load balance
-       // initialize the deltas
        int i;
-       int delta = 1 << 32 -1;
-       int deltanew = 1 << 32 - 1;
-       int lcore = 0;
-       int rcore = 0;
-       bool stop = true;
-       for(i = 0; i < NUMCORES; i++) {
-               gcdeltal[i] = gcdeltar[i] = 0;
-               gcreloads[i] = gcloads[i];
+
+       // get the total loads
+       gcloads[0]+=BAMBOO_SMEM_SIZE*gcreservedsb;//reserved sblocks for sbstartbl
+       int tloads = gcloads[0];
+       for(i = 1; i < NUMCORES; i++) {
+               tloads += gcloads[i];
        }
+       int heaptop = BAMBOO_BASE_VA + tloads;
+       int b = 0;
+       BLOCKINDEX(heaptop, &b);
+       int numbpc = b / NUMCORES; // num of blocks per core
 
-       // iteratively balance the loads
-       do {
-               stop = true;
-               delta = deltanew;
-               // compute load balance
-               for(i = 0; i < NUMCORES; i++) {
-                       if(gcreloads[i] > BAMBOO_SMEM_SIZE_L) {
-                               // too much load, try to redirect some of it to its neighbours
-                               LEFTNEIGHBOUR(i, &lcore);
-                               RIGHTNEIGHBOUR(i, &rcore);
-                               if(lcore != -1) {
-                                       int tmp = (gcreloads[lcore] - gcreloads[i]) / 2;
-                                       gcdeltal[i] = tmp;
-                                       gcdeltar[lcore] = 0-tmp;
-                                       deltanew += abs(gcreloads[lcore] - gcreloads[i]);
-                               }
-                               if(rcore != -1) {
-                                       int tmp = (gcreloads[rcore] - gcreloads[i]) / 2;
-                                       gcdeltar[i] = tmp;
-                                       gcdeltal[rcore] = 0-tmp;
-                                       deltanew += abs(gcreloads[rcore] - gcreloads[i]);
-                               }
-                       }
-               }
-               deltanew /= 2;
-               if((deltanew == 0) || (delta == deltanew)) {
-                       break;
-               }
-               // flush for new loads
-               for(i = 0; i < NUMCORES; i++) {
-                       if((gcdeltal[i] != 0) || (gcdeltar[i] != 0)) {
-                               stop = false;
-                               gcreloads[i] += gcdeltal[i] + gcdeltar[i];
-                               gcdeltal[i] = gcdeltar[i] = 0;
-                       }
-               }
-       } while(!stop);
+       gcheapdirection = (numbpc%2 == 0);
+       int x = 0;
+       int y = 0;
+       RESIDECORE(heaptop, &x, &y);
+       gctopcore = (x == 0 ? y : x * bamboo_height + y - 2);
+       return numbpc;
+} // void loadbalance()
+
+inline bool cacheLObjs() {
+       // check the total mem size need for large objs
+       int sumsize = 0;
+       int size = 0;
+       int isize = 0;
+       while(gc_lobjmoreItems2()){
+               gc_lobjdequeue2();
+               size = gclobjtail2->lengths[gclobjtailindex2 - 1];
+               ALIGNSIZE(size, &isize);
+               sumsize += isize;
+       } // while(gc_lobjmoreItems2())
+
+       // check if there are enough space to cache these large objs
+       INTPTR dst = BAMBOO_BASE_VA + BAMBOO_SHARED_MEM_SIZE - sumsize;
+       if(gcheaptop > dst) {
+               // do not have enough room to cache large objs
+               return false;
+       }
 
-       // decide how to do load balance
-       for(i = 0; i < NUMCORES; i++) {
-               gcdeltal[i] = gcdeltar[i] = 0;
+       gcheaptop = dst; // Note: record the start of cached lobjs with gcheaptop
+       // cache the largeObjs to the top of the shared heap
+       gclobjtail2 = gclobjtail;
+       gclobjtailindex2 = 0;
+       while(gc_lobjmoreItems2()) {
+               gc_lobjdequeue2();
+               size = gclobjtail2->lengths[gclobjtailindex2 - 1];
+               ALIGNSIZE(size, &isize);
+               memcpy(dst, gclobjtail2->lobjs[gclobjtailindex2 - 1], size);
+               // fill the remaining space with -2
+               memset(dst+size, -2, isize-size);
+               // set the new addr of this obj
+               //gclobjtail2->origs[gclobjtailindex2 - 1] = 
+               //      gclobjtail2->lobjs[gclobjtailindex2 - 1];
+               //gclobjtail2->lobjs[gclobjtailindex2 - 1] = dst;
+               dst += isize;
        }
-       for(i = 0; i < NUMCORES; i++) {
-               int tomove = (gcloads[i] - gcreloads[i]);
-               if(tomove > 0) {
-                       LEFTNEIGHBOUR(i, &lcore);
-                       RIGHTNEIGHBOUR(i, &rcore);
-                       int lmove = 0;
-                       int rmove = 0;
-                       if(lcore != -1) {
-                               lmove = (gcreloads[lcore] - gcloads[lcore] - gcdeltal[lcore]);
-                               if(lmove < 0) {
-                                       lmove = 0;
-                               }
-                       }
-                       if(rcore != -1) {
-                               rmove = (gcreloads[rcore] - gcloads[rcore] - gcdeltar[rcore]);
-                               if(rmove < 0) {
-                                       rmove = 0;
-                               }
+       return true;
+} // void cacheLObjs()
+
+inline void moveLObjs() {
+       // find current heap top
+       int tmpheaptop = gcloads[0];
+       for(int i = 1; i < NUMCORES; i++) {
+               if(tmpheaptop < gcloads[i]) {
+                       tmpheaptop = gcloads[i];
+               }
+       }
+       // move large objs from gcheaptop to tmpheaptop
+       // write the header first
+       int tomove = BAMBOO_BASE_VA + BAMBOO_SHARED_MEM_SIZE - gcheaptop;
+       // check how many blocks it acrosses
+       int b = 0;
+       BLOCKINDEX(tmpheaptop, &b);
+       // check the remaining space in this block
+       int remain = (b < NUMCORES? (b+1)*BAMBOO_SMEM_SIZE_L  
+                       : BAMBOO_LARGE_SMEM_BOUND+(b-NUMCORES+1)*BAMBOO_SMEM_SIZE)
+                         -(mem-BAMBOO_BASE_VA);
+       if(remain <= BAMBOO_CACHE_LINE_SIZE) {
+               // fill the following space with -1, go to next block
+               (*((int *)tmpheaptop)) = -1;
+               b++;
+               remain = b < NUMCORES? BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
+               tmpheaptop += remain;
+       }
+       (*((int *)tmpheaptop)) = tomove + BAMBOO_CACHE_LINE_SIZE;
+       tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
+       memcpy(tmpheaptop, gcheaptop, tomove);
+       gcheaptop = tmpheaptop + tomove;
+       // flush the sbstartbl
+       memset(sbstarttbl, '\0', 
+                          BAMBOO_SHARED_MEM_SIZE/BAMBOO_SMEM_SIZE*sizeof(INTPTR));
+       int size = 0;
+       int isize = 0;
+       int host = 0;
+       int ptr = 0;
+       remain -= BAMBOO_CACHE_LINE_SIZE;
+       while(gc_lobjmoreItems()) {
+               ptr = (int)(gc_lobjdequeue(&size, &host));
+               ALIGNSIZE(size, &isize);
+               if(remain < isize) {
+                       // this object acrosses blocks
+                       int tmpsbs = 1+(isize-remain-1)/BAMBOO_SMEM_SIZE;
+                       for(int k = 0; k < tmpsbs-1; k++) {
+                               sbstarttbl[k+b] = (INTPTR)(-1);
                        }
-                       // the one with bigger gap has higher priority
-                       if(lmove > rmove) {
-                               int ltomove = (lmove > tomove)? tomove:lmove;
-                               gcdeltar[lcore] = ltomove;
-                               gcdeltal[i] = 0-ltomove;
-                               gcdeltal[rcore] = tomove - ltomove;
-                               gcdeltar[i] = ltomove - tomove;
+                       b += tmpsbs;
+                       remain = b < NUMCORES ? BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
+                       if((isize-remain)%BAMBOO_SMEM_SIZE == 0) {
+                               sbstarttbl[b+tmpsbs-1] = (INTPTR)(-1);
                        } else {
-                               int rtomove = (rmove > tomove)? tomove:rmove;
-                               gcdeltal[rcore] = rtomove;
-                               gcdeltar[i] = 0-rtomove;
-                               gcdeltar[lcore] = tomove - rtomove;
-                               gcdeltal[i] = rtomove - tomove;
+                               sbstarttbl[b+tmpsbs-1] = (INTPTR)(tmpheaptop+isize);
+                               remain -= (isize-remain)%BAMBOO_SMEM_SIZE;
                        }
                }
+               // send the original host core with the mapping info
+               send_msg_3(host, GCLOBJMAPPING, ptr, tmpheaptop);
+               tmpheaptop += isize;
        }
-}
+} // void moveLObjs()
 
-void gc(struct garbagelist * stackptr) {
+inline void updateFreeMemList() {
+       int i = 0;
+       int tmptop = gcloads[0];
+       struct freeMemItem * tochange = bamboo_free_mem_list->head;
+       if(tochange == NULL) {
+               bamboo_free_mem_list->head = tochange = 
+                       (struct freeMemItem *)RUNMALLOC(sizeof(struct freeMemItem));
+       }
+       for(i = 1; i < NUMCORES; ++i) {
+               int toadd = gcloads[i];
+               if(tmptop < toadd) {
+                       toadd = tmptop;
+                       tmptop = gcloads[i];
+               } // tmptop can never == toadd
+               int blocki = 0;
+               BLOCKINDEX(toadd, &blocki);
+               tochange->ptr = toadd;
+               tochange->size = (blocki<NUMCORES)
+                       ?((blocki+1)*BAMBOO_SMEM_SIZE_L+BAMBOO_BASE_VA-toadd)
+                       :(BAMBOO_LARGE_SMEM_BOUND+(blocki+1-NUMCORES)*BAMBOO_SMEM_SIZE
+                                       +BAMBOO_BASE_VA-toadd);
+               if(tochange->next == NULL) {
+                       tochange->next = 
+                               (struct freeMemItem *)RUNMALLOC(sizeof(struct freeMemItem));
+               }
+               // zero out all these spare memory
+               memset(tochange->ptr, '\0', tochange->size);
+               tochange = tochange->next;
+       } // for(i = 1; i < NUMCORES; ++i)
+       // handle the top of the heap
+       tmptop = gcheaptop;
+       BLOCKINDEX(tmptop, &blocki);
+       tochange->ptr = tmptop;
+       tochange->size = BAMBOO_SHARED_MEM_SIZE + BAMBOO_BASE_VA - tmptop;
+       // zero out all these spare memory
+       memset(tochange->ptr, '\0', tochange->size);
+       bamboo_free_mem_list->tail = tochange;
+} // void updateFreeMemList()
+
+inline void gc(struct garbagelist * stackptr) {
        // check if do gc
        if(!gcflag) {
                return;
@@ -445,8 +523,8 @@ void gc(struct garbagelist * stackptr) {
 
                gcprocessing = true;
                int i = 0;
-               gcwaitconfirm = false;
-               gcwaitconfirm = 0;
+               waitconfirm = false;
+               waitconfirm = 0;
                gcphase = MARKPHASE;
                for(i = 1; i < NUMCORES - 1; i++) {
                        // send GC start messages to all cores
@@ -471,13 +549,26 @@ void gc(struct garbagelist * stackptr) {
                        send_msg_1(i, GCLOBJREQUEST);
                }       
                while(numconfirm != 0) {} // wait for responses
-               loadbalance();
-               // TODO need to decide where to put large objects
-               // TODO cache all large objects
+               if(!cacheLObjs()) {
+                       // no enough space to cache large objs
+                       BAMBOO_EXIT(0xd001);
+               }
+               int numpbc = loadbalance();
 
+               if((gcheapdirection) && (0 <= gctopcore)
+                               || ((!gcheapdirection) && (0 == gctopcore))) {
+                       gcstopblock = numpbc + 1;
+               } else {
+                       gcstopblock = numpbc;
+               }
                for(i = 1; i < NUMCORES; ++i) {
                        //send start compact messages to all cores
-                       transferCompactStart(i);
+                       if((gcheapdirection) && (i <= gctopcore)
+                                       || ((!gcheapdirection) && (i >= gctopcore))) {
+                                       send_msg_2(i, GCSTARTCOMPACT, numpbc+1); 
+                       } else {
+                                       send_msg_2(i, GCSTARTCOMPACT, numpbc);
+                       }
                }
 
                // compact phase
@@ -500,6 +591,8 @@ void gc(struct garbagelist * stackptr) {
                                break;
                        }
                } // while(COMPACTPHASE == gcphase)
+               // move largeObjs
+               moveLObjs();
 
                gcphase = FLUSHPHASE;
                for(i = 1; i < NUMCORES; ++i) {
@@ -529,37 +622,30 @@ void gc(struct garbagelist * stackptr) {
                        send_msg_1(i, GCFINISH);
                }
 
-               // need to create free memory list and invalidate all 
-               // shared mem pointers TODO
-
-               gcflag = false;
-               gcprocessing = false;
-               return;
+               // need to create free memory list  
+               updateFreeMemList();
        } else {
                gcprocessing = true;
                gc_collect(stackptr);
        }
+
        // invalidate all shared mem pointers
        bamboo_cur_msp = NULL;
        bamboo_smem_size = 0;
+
        gcflag = false;
        gcprocessing = false;
 
-}
+} // void gc(struct garbagelist * stackptr)
 
 // enqueue root objs
-void tomark(struct garbagelist * stackptr) {
+inline void tomark(struct garbagelist * stackptr) {
        if(MARKPHASE != gcphase) {
                BAMBOO_EXIT(0xb002);
        }
        gcbusystatus = 1;
-       // initialize queue
-       if (gchead==NULL) {
-               gcheadindex=0;
-               gctailindex=0;
-               gctailindex2 = 0;
-               gchead=gctail=gctail2=malloc(sizeof(struct pointerblock));
-       }
+       gcnumlobjs = 0;
+       
        int i;
        // enqueue current stack 
        while(stackptr!=NULL) {
@@ -605,56 +691,62 @@ void tomark(struct garbagelist * stackptr) {
                gc_enqueue(objInfo->objptr);
                getNextQueueItem(tmpobjptr);
        }
-}
+} // void tomark(struct garbagelist * stackptr)
 
-void mark(bool isfirst, 
-                     struct garbagelist * stackptr) {
+inline void markObj(void * objptr) {
+       if(ISSHAREDOBJ(objptr)) {
+               int host = hostcore(objptr);
+               if(BAMBOO_NUM_OF_CORE == host) {
+                       // on this core
+                       gc_enqueue(objptr);  
+               } else {
+                       // send a msg to host informing that objptr is active
+                       send_msg_2(host, GCMARKEDOBJ, objptr);
+                       gcself_numsendobjs++;
+               }
+       } else {
+               gc_enqueue(objptr);
+       } // if(ISSHAREDOBJ(objptr))
+} // void markObj(void * objptr) 
+
+inline void mark(bool isfirst, 
+                            struct garbagelist * stackptr) {
        if(isfirst) {
                // enqueue root objs
                tomark(stackptr);
-               curr_heaptop = BAMBOO_CACHE_LINE_SIZE;
-               curr_heapbound = BAMBOO_SMEM_SIZE_L;
-               markedptrbound = 0;
+               curr_heaptop = 0; // record the size of all active objs in this core
+                                 // aligned but does not consider block boundaries
+               gcmarkedptrbound = 0;
        }
 
        int isize = 0;
        // mark phase
        while(MARKPHASE == gcphase) {
                while(gc_moreItems2()) {
-                       voit * ptr = gc_dequeue2();
+                       gcbusystatus = true;
+                       void * ptr = gc_dequeue2();
                        int size = 0;
+                       int isize = 0;
                        int type = 0;
-                       if(isLarge(ptr, &type, &size)) {
-                               // ptr is a large object
-                               struct largeObjItem * loi = 
-                                       (struct largeObjItem*)RUNMALLOC(sizeof(struct largeObjItem)); 
-                               loi->orig = (INTPTR)ptr;
-                               loi->dst = (INTPTR)0;
-                               loi->length = size;
-                               if(lObjList.head == NULL) {
-                                       lObjList.head = lObjList.tail = loi;
-                               } else {
-                                       lObjList.tail->next = loi;
-                                       lObjList.tail = loi;
-                               }
-                       } else if (isLocal(ptr)) {
-                               // ptr is an active object on this core
-                               if(type == -1) {
-                                       // nothing to do 
-                               }
-                               ALIGNSIZE(size, &isize);
-                               curr_heaptop += isize;
-                               if(curr_heaptop > curr_heapbound) {
-                                       // change to another block
-                                       curr_heaptop = curr_heapbound+BAMBOO_CACHE_LINE_SIZE+isize;
-                                       curr_heapbound += BAMBOO_SMEM_SIZE;
-                               }
-                               // mark this obj
-                               ((int *)ptr)[6] = 1;
-                               if(ptr > markedptrbound) {
-                                       markedptrbound = ptr;
-                               }
-                       }
+                       // check if it is a shared obj
+                       if(ISSHAREDOBJ(ptr)) {
+                               // a shared obj, check if it is a local obj on this core
+                               if(isLarge(ptr, &type, &size)) {
+                                       // ptr is a large object
+                                       gc_lobjenqueue(ptr, size);
+                                       gcnumlobjs++;
+                               } else if (isLocal(ptr)) {
+                                       // ptr is an active object on this core
+                                       ALIGNSIZE(size, &isize);
+                                       curr_heaptop += isize;
+                                       // mark this obj
+                                       ((int *)ptr)[6] = 1;
+                                       if(ptr + size > gcmarkedptrbound) {
+                                               gcmarkedptrbound = ptr + size;
+                                       } // if(ptr + size > gcmarkedptrbound)
+                               } // if(isLarge(ptr, &type, &size)) else if(isLocal(ptr))
+                       } // if(ISSHAREDOBJ(ptr))
+
                        // scan all pointers in ptr
                        unsigned INTPTR * pointer;
                        pointer=pointerarray[type];
@@ -669,15 +761,7 @@ void mark(bool isfirst,
                                for(j=0; j<length; j++) {
                                        void *objptr = 
                                                ((void **)(((char *)&ao->___length___)+sizeof(int)))[j];
-                                       int host = hostcore(objptr);
-                                       if(BAMBOO_NUM_OF_CORE == host) {
-                                               // on this core
-                                               gc_enqueue(objptr);  
-                                       } else {
-                                               // send a msg to host informing that objptr is active
-                                               send_msg_2(host, GCMARKEDOBJ, objptr);
-                                               gcself_numsendobjs++;
-                                       }
+                                       markObj(objptr);
                                }
                        } else {
                                INTPTR size=pointer[0];
@@ -685,22 +769,20 @@ void mark(bool isfirst,
                                for(i=1; i<=size; i++) {
                                        unsigned int offset=pointer[i];
                                        void * objptr=*((void **)(((char *)ptr)+offset));
-                                       int host = hostcore(objptr);
-                                       if(BAMBOO_NUM_OF_CORE == host) {
-                                               // on this core
-                                               gc_enqueue(objptr);  
-                                       } else {
-                                               // send a msg to host informing that objptr is active
-                                               send_msg_2(host, GCMARKEDOBJ, objptr);
-                                               gcself_numsendobjs++;
-                                       }
+                                       markObj(objptr);
                                }
                        }
                } // while(!isEmpty(gctomark))
                gcbusystatus = false;
                // send mark finish msg to core coordinator
-               send_msg_4(STARTUPCORE, GCFINISHMARK, BAMBOO_NUM_OF_CORE,
-                                      gcself_numsendobjs, gcself_numreceiveobjs); 
+               if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
+                       gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
+                       gcnumsendobjs[BAMBOO_NUM_OF_CORE] = gcself_numsendobjs;
+                       gcnumreceiveobjs[BAMBOO_NUM_OF_CORE] = gcself_numreceiveobjs;
+               } else {
+                       send_msg_4(STARTUPCORE, GCFINISHMARK, BAMBOO_NUM_OF_CORE,
+                                                                gcself_numsendobjs, gcself_numreceiveobjs);
+               }
 
                if(BAMBOO_NUM_OF_CORE == 0) {
                        return;
@@ -714,38 +796,92 @@ struct moveHelper {
        INTPTR ptr; // virtual address of current heap top
        int offset; // offset in current heap block
        int blockbase; // virtual address of current small block to check
-       int blockbound; // bound virtual address of current small blcok 
+       int blockbound; // bound virtual address of current small blcok
+       int sblockindex; // index of the small blocks
        int top; // real size of current heap block to check
        int bound; // bound size of current heap block to check
-};
+}; // struct moveHelper
 
-void nextSBlock(struct moveHelper * orig) {
+inline void nextSBlock(struct moveHelper * orig) {
        orig->blockbase = orig->blockbound;
-       if(orig->blockbase == orig->bound) {
+innernextSBlock:
+       if(orig->blockbase >= orig->bound) {
                // end of current heap block, jump to next one
                orig->numblocks++;
                BASEPTR(BAMBOO_NUM_OF_CORE, orig->numblocks, &(orig->base));
                orig->bound = orig->base + BAMBOO_SMEM_SIZE;
                orig->blockbase = orig->base;
        }
+       orig->sblockindex = (orig->blockbase-BAMBOO_BASE_VA)/BAMBOO_SMEM_SIZE;
+       if(sbstarttbl[orig->sblockindex] == -1) {
+               // goto next sblock
+               orig->sblockindex += 1;
+               orig->blockbase += BAMBOO_SMEM_SIZE;
+               goto innernextSBlock;
+       } else if(sbstarttbl[orig->sblockindex] != 0) {
+               // not start from the very beginning
+               orig->blockbase = sbstarttbl[orig->sblockindex];
+       }
        orig->blockbound = orig->blockbase + *((int*)(orig->blockbase));
        orig->offset = BAMBOO_CACHE_LINE_SIZE;
        orig->ptr = orig->blockbase + orig->offset;
-}
+} // void nextSBlock(struct moveHelper * orig) 
+
+inline void initOrig_Dst(struct moveHelper * orig, 
+                                    struct moveHelper * to) {
+       // init the dst ptr
+       to->numblocks = 0;
+       to->top = to->offset = BAMBOO_CACHE_LINE_SIZE;
+       to->bound = BAMBOO_SMEM_SIZE_L;
+       BASEPTR(BAMBOO_NUM_OF_CORE, to->numblocks, &(to->base));
+       if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
+               to->base += gcreservedsb * BAMBOO_SMEM_SIZE;
+               to->top += gcreservedsb * BAMBOO_SMEM_SIZE;
+               curr_heaptop -= gcreservedsb * BAMBOO_SMEM_SIZE;
+       }
+       to->ptr = to->base + to->offset;
 
-void nextBlock(struct moveHelper * to) {
+       // init the orig ptr
+       orig->numblocks = 0;
+       orig->base = to->base;
+       orig->bound = to->base + BAMBOO_SMEM_SIZE_L;
+       orig->blockbase = orig->base;
+       if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
+               orig->sblockindex = reservedsb;
+       } else {
+               orig->sblockindex = (orig->base - BAMBOO_BASE_VA) / BAMBOO_SMEM_SIZE;
+       }
+       if(sbstarttbl[sblockindex] == -1) {
+               // goto next sblock
+               orig->blockbound = 
+                       BAMBOO_BASE_VA+BAMBOO_SMEM_SIZE*(orig->sblockindex+1);
+               nextSBlock(orig);
+               return;
+       } else if(sbstarttbl[orig->sblockindex] != 0) {
+               orig->blockbase = sbstarttbl[sblockindex];
+       }
+       orig->blockbound = orig->blockbase + *((int*)(orig->blockbase));
+       orig->offset = BAMBOO_CACHE_LINE_SIZE;
+       orig->ptr = orig->blockbase + orig->offset;
+} // void initOrig_Dst(struct moveHelper * orig, struct moveHelper * to) 
+
+inline void nextBlock(struct moveHelper * to) {
        to->top = to->bound + BAMBOO_CACHE_LINE_SIZE; // header!
        to->bound += BAMBOO_SMEM_SIZE;
        to->numblocks++;
        BASEPTR(BAMBOO_NUM_OF_CORE, to->numblocks, &(to->base));
        to->offset = BAMBOO_CACHE_LINE_SIZE;
        to->ptr = to->base + to->offset;
-}
+} // void nextBlock(struct moveHelper * to)
 
 // endaddr does not contain spaces for headers
-bool moveobj(struct moveHelper * orig, 
-                        struct moveHelper * to, 
-                                                INTPTR * endaddr) {
+inline bool moveobj(struct moveHelper * orig, 
+                               struct moveHelper * to, 
+                                                       int stopblock) {
+       if(stopblock == 0) {
+               return true;
+       }
+
        int type = 0;
        int size = 0;
        int mark = 0;
@@ -753,7 +889,7 @@ bool moveobj(struct moveHelper * orig,
 innermoveobj:
        while((*((int*)(orig->ptr))) == -2) {
                orig->ptr++;
-               if(orig->ptr == orig->blockbound) {
+               if((orig->ptr > orig->bound) || (orig->ptr == orig->blockbound)) {
                        nextSBlock(orig);
                        goto innermoveobj;
                }
@@ -780,83 +916,72 @@ innermoveobj:
                // marked obj, copy it to current heap top
                // check to see if remaining space is enough
                ALIGNSIZE(size, &isize);
-               if((endaddr != NULL) && (to->top + isize > *endaddr)) {
-                       // reached the endaddr 
-                       // fill offset to the endaddr for later configuration of header
-                       to->offset += *endaddr - to->top;
-                       to->top += *endaddr - to->top;
-                       return true;
-               }
                if(to->top + isize > to->bound) {
+                       // fill -1 indicating the end of this block
+                       if(to->top != to->bound) {
+                               *((int*)to->ptr) = -1;
+                       }
+                       memset(to->ptr+1, -2, to->bound - to->top - 1);
                        // fill the header of this block and then go to next block
        to->offset += to->bound - to->top;
                        (*((int*)(to->base))) = to->offset;
-                       if(endaddr != NULL) {
-                               *endaddr = *endaddr + BAMBOO_CACHE_LINE_SIZE; 
-                       }
                        nextBlock(to);
+                       if(stopblock == to->numblocks) {
+                               // already fulfilled the block
+                               to->offset = 0;
+                               to->ptr = to->base;
+                               return true;
+                       }
                }
                memcpy(to->ptr, orig->ptr, size);
+               // fill the remaining space with -2
+               memset(to->ptr+size, -2, isize-size);
                // store mapping info
-               RuntimeHashadd(pointertbl, orig->ptr, to->ptr); 
+               RuntimeHashadd(gcpointertbl, orig->ptr, to->ptr); 
+               curr_heaptop -= isize;
                to->ptr += isize;
                to->offset += isize;
                to->top += isize;
        } 
        // move to next obj
        orig->ptr += size;
-       if(orig->ptr == orig->blockbound) {
+       if((orig->ptr > orig->bound) || (orig->ptr == orig->blockbound)) {
                nextSBlock(orig);
        }
        return false;
-}
+} //bool moveobj(struct moveHelper* orig,struct moveHelper* to,int* endaddr)
 
-void migrateobjs(struct moveHelper * orig) {
-       int num_dsts = cinstruction->movenum;
-       while(num_dsts > 0) {
-               while(!gctomove) {}
-               // start moving objects to other cores
-               gctomove = false;
-               struct moveHelper * into = 
-                       (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
-               for(int j = 0; j < cinstruction->movenum; j++) {
-                       if(cinstruction->moveflag[j] == 1) {
-                               // can start moving to corresponding core
-                               int dst = cinstruction->dsts[j];
-                               num_dsts--;
-                               into->ptr = cinstruction->startaddrs[j];
-                               BLOCKINDEX(into->ptr, &(into->numblocks));
-                               into->bound = (into->numblocks==0)?
-                                       BAMBOO_SMEM_SIZE_L:
-                                       BAMBOO_SMEM_SIZE_L+BAMBOO_SMEM_SIZE*into->numblocks;
-                               BASEPTR(BAMBOO_NUM_OF_CORE, into->numblocks, &(into->base));
-                               into->offset = into->ptr - into->base;
-                               into->top = (into->numblocks==0)?
-                                       (into->offset):(into->bound-BAMBOO_SMEM_SIZE+into->offset);
-                               into->base = into->ptr;
-                               into->offset = BAMBOO_CACHE_LINE_SIZE;
-                               into->ptr += into->offset; // for header
-                               into->top += into->offset;
-                               int endaddr = into->top + cinstruction->endaddrs[j];
-                               do {
-                                       bool stop = moveobj(orig, into, &endaddr);
-                                       if(stop) {
-                                               // all objs before endaddr have been moved
-                                               // STOP the loop
-                                               break;
-                                       }                                                       
-                               } while(orig->ptr < markedptrbound + 1);
-                               // set the flag indicating move finished
-                               cinstruction->moveflag[j] = 2; 
-                               // fill the header of this blockk
-                               (*((int*)(into->base))) = into->offset;
-                       } // if(cinstruction->moveflag[j] == 1)
-               } // for(int j = 0; j < cinstruction->movenum; j++)
-               RUNFREE(into);
-       } // while(num_dsts > 0)
+inline bool findSpareMem(int * startaddr,
+                                    int * tomove,
+                                                                                                int requiredmem) {
+       for(int k = 0; k < NUMCORES; k++) {
+               if((gccorestatus[k] == 0) && (gcnumblocks[k] < gcstopblock)) {
+                       // check if this stopped core has enough mem
+                       int b = 0;
+                       BLOCKINDEX(gcloads[k], &b);
+                       int boundptr = b<NUMCORES?(b+1)*BAMBOO_SMEM_SIZE_L
+                               :BAMBOO_LARGE_SMEM_BOUND+(b-NUMCORES+1)*BAMBOO_SMEM_SIZE;
+                       int remain = boundptr - gcloads[k];
+                       *tomove = requiredmem + BAMBOO_CACHE_LINE_SIZE;
+                       *startaddr = gcloads[k];
+                       if(*tomove < remain) {
+                               gcloads[k] += *tomove;
+                       } else {
+                               // next available block
+                               gcnumblocks[k] += 1;
+                               int newbase = 0;
+                               BASEPTR(k, gcnumblocks[k], &newbase);
+                               gcloads[k] = newbase;
+                               *tomove = remain;
+                       }
+                       return true;
+               }
+       }
+       // TODO if can not find spare mem right now, hold the request
+       return false;
 }
 
-void compact() {
+inline void compact() {
        if(COMPACTPHASE != gcphase) {
                BAMBOO_EXIT(0xb003);
        }
@@ -868,125 +993,102 @@ void compact() {
                (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
        struct moveHelper * to = 
                (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
-       to->numblocks = 0;
-       to->top = to->offset = BAMBOO_CACHE_LINE_SIZE;
-       to->bound = BAMBOO_SMEM_SIZE_L;
-       BASEPTR(BAMBOO_NUM_OF_CORE, to->numblocks, &(to->base));
-       to->ptr = to->base + to->offset;
-       orig->numblocks = 0;
-       orig->ptr = to->ptr;
-       orig->base = to->base;
-       orig->bound = to->bound;
-       orig->blockbase = to->base;
-       orig->blockbound = orig->blockbase + *((int*)(orig->blockbase));
-
-       // scan over all objs in this block, compact those scheduled to 
-       // reside on this core
-       // loop stop when finishing either scanning all active objs or moving
-       // all objs to reside on this core
-       int endaddr = cinstruction->loads;
+       initOrig_Dst(orig, to);
+       
+       // scan over all objs in this block, compact the marked objs 
+       // loop stop when finishing either scanning all active objs or 
+       // fulfilled the gcstopblock
+innercompact:
        do {
-               bool stop = moveobj(orig, to, &endaddr);
-               curr_heaptop = to->top;
-               curr_heapbound = to->bound;
-               if(stop && (cinstruction->movenum != 0)) {
-                       // all objs to reside on this core have been moved
-                       // the remainging objs should be moved to other cores
-                       // STOP the loop
+               bool stop = moveobj(orig, to, gcstopblock);
+               if(stop) {
                        break;
                }
-       } while(orig->ptr < markedptrbound + 1); 
+       } while(orig->ptr < gcmarkedptrbound); 
        // fill the header of this block
        (*((int*)(to->base))) = to->offset;
        heaptopptr = to->ptr;
 
-       // move objs
-       if(cinstruction->movenum != 0) {
-               if(cinstruction->ismove) {
-                       // have objs to move to other cores
-                       migrateobjs(orig);
-
-                       // might still have objs left, compact them to this core
-                       // leave space for header
-                       if(orig->ptr < markedptrbound + 1) {
-                               if(to->top + BAMBOO_CACHE_LINE_SIZE > to->bound) {
-                                       // fill the left part of current block
-                                       memset(to->top, -2, to->bound - to->top);
-                                       // go to next block
-                                       nextBlock(to);
-                               } else {
-                                       to->top += BAMBOO_CACHE_LINE_SIZE; // for header
-                                       to->offset = BAMBOO_CACHE_LINE_SIZE;
-                                       to->base = to->ptr;
-                                       to->ptr += BAMBOO_CACHE_LINE_SIZE;
-                               }
-                               while(orig->ptr < markedptrbound + 1) {
-                                       moveobj(orig, to, NULL);
-                                       curr_heaptop = to->top;
-                                       curr_heapbound = to->bound;
-                               }
-                               // fill the header of this blockk
-                               (*((int*)(to->base))) = to->offset;
+       // send msgs to core coordinator indicating that the compact is finishing
+       // send compact finish message to core coordinator
+       if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
+               gcnumblocks[0] = to->numblocks;
+               if(orig->ptr < gcmarkedptrbound) {
+                       // ask for more mem
+                       gctomove = false;
+                       if(findSpareMem(&gcmovestartaddr, &gcstopblock, curr_heaptop)) {
+                               gctomove = true;
+                       } else {
+                               // TODO hold the request
                        }
-                       heaptopptr = to->ptr;
                } else {
-                       // have incoming objs, send messages to corresponding cores 
-                       // to start moving
-                       INTPTR startaddr = 0;
-                       INTPTR endaddr = 0;
-                       int heapptr = curr_heapptr;
-                       int top = curr_heaptop;
-                       int bound = curr_heapbound;
-                       for(int j = 0; j < cinstruction->movenum; j++) {
-                               startaddr = heapptr;
-                               top = top+cinstruction->size2move[j]+BAMBOO_CACHE_LINE_SIZE;
-                               if(top > bound) {
-                                       // will cross block boundary
-                                       int numb = (top - bound) / BAMBOO_SMEM_SIZE + 1;
-                                       top += numb * BAMBOO_CACHE_LINE_SIZE;
-                                       BASEPTR(BAMBOO_NUM_OF_CORE, numblocks + numb, &endaddr);
-                                       endaddr += 
-                                               (top-bound)%BAMBOO_SMEM_SIZE+BAMBOO_CACHE_LINE_SIZE;
-                                       heapptr = endaddr;
-                                       bound += BAMBOO_SMEM_SIZE * numb;
-                               } else {
-                                       endaddr = 
-                                               heapptr+cinstruction->size2move[j]+BAMBOO_CACHE_LINE_SIZE;
-                                       heapptr = endaddr;
-                               }
-                               send_msg_4(cinstruction->dsts[j], GCMOVESTART, 
-                                                      BAMBOO_NUM_OF_CORE, startaddr, 
-                                                                        cinstruction->size2move[j]);
-                       }
-                       heaptopptr = heapptr;
-               } // if(cinstruction->ismove) 
-       } // if(cinstruction->movenum != 0)
-       
-       // TODO large obj
-       /*
-       if((cinstruction != NULL) && (cinstruction->largeobjs != NULL)) {
-               // move all large objects
-               do {
-                       // dequeue the first large obj
-                       struct largeObjItem * loi = cinstruction->largeobjs;
-                       cinstruction->largeobjs = loi->next;
-                       // move this large obj
-                       memcpy(loi->dst, loi->orig, loi->length);
-                       RuntimeHashadd(pointertbl, loi->orig, loi->dst);
-                       RUNFREE(loi);
-               }while(cinstruction->largeobjs != NULL);
-       }*/
-       // send compact finish message to core coordinator
-       send_msg_3(STARTUPCORE, GCFINISHCOMPACT, 
-                              BAMBOO_NUM_OF_CORE, to->ptr);
+                       gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
+                       gcloads[BAMBOO_NUM_OF_CORE] = to->ptr;
+               }
+       } else {
+               if(orig->ptr < gcmarkedptrbound) {
+                       // ask for more mem
+                       gctomove = false;
+                       send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE, 
+                                              to->numblocks, 0, curr_heaptop);
+               } else {
+                       // finish compacting
+                       send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE,
+                                              to->numblocks, 1, to->ptr);
+               }
+       } // if(STARTUPCORE == BAMBOO_NUM_OF_CORE)
+
+       if(orig->ptr < gcmarkedptrbound) {
+               // still have unpacked obj
+               while(!gctomove) {};
+
+               to->ptr = gcmovestartaddr;
+               to->numblocks = gcstopblock - 1;
+               to->bound = (to->numblocks==0)?
+                       BAMBOO_SMEM_SIZE_L:
+                       BAMBOO_SMEM_SIZE_L+BAMBOO_SMEM_SIZE*to->numblocks;
+               BASEPTR(BAMBOO_NUM_OF_CORE, to->numblocks, &(to->base));
+               to->offset = to->ptr - to->base;
+               to->top = (to->numblocks==0)?
+                       (to->offset):(to->bound-BAMBOO_SMEM_SIZE+to->offset);
+               to->base = to->ptr;
+               to->offset = BAMBOO_CACHE_LINE_SIZE;
+               to->ptr += to->offset; // for header
+               to->top += to->offset;
+               goto innercompact;
+       }
+       // TODO finish?
 
        RUNFREE(orig);
        RUNFREE(to);
 } // compact()
 
-void flush() {
+inline void * flushObj(void * objptr) {
+       void * dstptr = NULL;
+       if(ISSHAREDOBJ(objptr)) {
+               // a shared obj ptr, change to new address
+               RuntimeHashget(gcpointertbl, objptr, &dstptr);
+               if(NULL == dstptr) {
+                       // send msg to host core for the mapping info
+                       gcobj2map = (int)objptr;
+                       gcismapped = false;
+                       gcmappedobj = NULL;
+                       send_msg_3(hostcore(objptr), GCMAPREQUEST, (int)objptr, 
+                                                                BAMBOO_NUM_OF_CORE);
+                       while(!gcismapped) {}
+                       RuntimeHashget(gcpointertbl, objptr, &dstptr);
+               }
+       } // if(ISSHAREDOBJ(objptr))
+       return dstptr;
+} // void flushObj(void * objptr, void ** tochange)
+
+inline void flush() {
        while(gc_moreItems()) {
-               voit * ptr = gc_dequeue();
+               void * ptr = gc_dequeue();
+               void * tptr = flushObj(ptr);
+               if(tptr != NULL) {
+                       ptr = tptr;
+               }
                int type = ((int *)(ptr))[0];
                // scan all pointers in ptr
                unsigned INTPTR * pointer;
@@ -1002,20 +1104,8 @@ void flush() {
                        for(j=0; j<length; j++) {
                                void *objptr=
                                        ((void **)(((char *)&ao->___length___)+sizeof(int)))[j];
-                               // change to new address
-                               void *dstptr = NULL;
-                               RuntimeHashget(pointertbl, objptr, &dstptr);
-                               if(NULL == dstptr) {
-                                       // send msg to host core for the mapping info
-                                       obj2map = (int)objptr;
-                                       ismapped = false;
-                                       mappedobj = NULL;
-                                       send_msg_3(hostcore(objptr), GCMAPREQUEST, (int)objptr, 
-                                                              BAMBOO_NUM_OF_CORE);
-                                       while(!ismapped) {}
-                                       RuntimeHashget(pointertbl, objptr, &dstptr);
-                               }
-                               ((void **)(((char *)&ao->___length___)+sizeof(int)))[j]=dstptr;
+                               ((void **)(((char *)&ao->___length___)+sizeof(int)))[j] = 
+                                       flushObj(objptr);
                        }
                } else {
                        INTPTR size=pointer[0];
@@ -1023,28 +1113,19 @@ void flush() {
                        for(i=1; i<=size; i++) {
                                unsigned int offset=pointer[i];
                                void * objptr=*((void **)(((char *)ptr)+offset));
-                               // change to new address
-                               void *dstptr = NULL;
-                               RuntimeHashget(pointertbl, objptr, &dstptr);
-                               if(NULL == dstptr) {
-                                       // send msg to host core for the mapping info
-                                       obj2map = (int)objptr;
-                                       ismapped = false;
-                                       mappedobj = NULL;
-                                       send_msg_3(hostcore(objptr), GCMAPREQUEST, (int)objptr, 
-                                                              BAMBOO_NUM_OF_CORE);
-                                       while(!ismapped) {}
-                                       RuntimeHashget(pointertbl, objptr, &dstptr);
-                               }
-                               *((void **)(((char *)ptr)+offset)) = dstptr;
-                       }
-               }
+                               ((void **)(((char *)ptr)+offset)) = flushObj(objptr);
+                       } // for(i=1; i<=size; i++) 
+               } // if (pointer==0) else if (((INTPTR)pointer)==1) else ()
        } // while(moi != NULL)
        // send flush finish message to core coordinator
-       send_msg_2(STARTUPCORE, GCFINISHFLUSH, BAMBOO_NUM_OF_CORE);
+       if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
+               gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
+       } else {
+               send_msg_2(STARTUPCORE, GCFINISHFLUSH, BAMBOO_NUM_OF_CORE);
+       }
 } // flush()
 
-void gc_collect(struct garbagelist * stackptr) {
+inline void gc_collect(struct garbagelist * stackptr) {
        // core collector routine
        mark(true, stackptr);
        compact();
@@ -1052,6 +1133,6 @@ void gc_collect(struct garbagelist * stackptr) {
        flush();
 
        while(FINISHPHASE != gcphase) {}
-}
+} // void gc_collect(struct garbagelist * stackptr)
 
 #endif
index b01fbf642efbc1c51a5957bf9229cc00fe711a5c..5908fae98321a163f274a0fbfda5a168ad74eea2 100644 (file)
@@ -5,12 +5,14 @@
 // data structures for GC
 #define BAMBOO_NUM_PAGES 1024 * 512
 #define BAMBOO_PAGE_SIZE 4096
-#define BAMBOO_SHARED_MEM_SIZE BAMBOO_PAGE_SIZE * BAMBOO_PAGE_SIZE
+#define BAMBOO_SHARED_MEM_SIZE BAMBOO_PAGE_SIZE * BAMBOO_NUM_PAGES
 #define BAMBOO_BASE_VA 0xd000000
 #define BAMBOO_SMEM_SIZE 16 * BAMBOO_PAGE_SIZE
 #define BAMBOO_SMEM_SIZE_L 512 * BAMBOO_PAGE_SIZE
 #define BAMBOO_LARGE_SMEM_BOUND BAMBOO_SMEM_SIZE_L*NUMCORES // NUMCORES=62
 
+#define NUMPTRS 100
+
 struct garbagelist {
   int size;
   struct garbagelist *next;
@@ -36,25 +38,26 @@ struct pointerblock *gctail2=NULL;
 int gctailindex2=0;
 struct pointerblock *gcspare=NULL;
 
-struct largeObjItem {
-       INTPTR orig;
-       INTPTR dst;
-       int length;
-       struct largeObjItem * next;
-};
+#define NUMLOBJPTRS 20
 
-struct compactInstr {
-       int loads;
-       int ismove;
-       int movenum;
-       int size2move[2];
-       int dsts[2];
-       int moveflag[2];
-       INTPTR startaddrs[2];
-       INTPTR endaddrs[2];
-       struct largeObjItem * largeobjs;
+struct lobjpointerblock {
+  void * lobjs[NUMLOBJPTRS];
+       //void * dsts[NUMLOBJPTRS];
+       int lengths[NUMLOBJPTRS];
+       //void * origs[NUMLOBJPTRS];
+       int hosts[NUMLOBJPTRS];
+  struct lobjpointerblock *next;
 };
 
+struct lobjpointerblock *gclobjhead=NULL;
+int gclobjheadindex=0;
+struct lobjpointerblock *gclobjtail=NULL;
+int gclobjtailindex=0;
+struct lobjpointerblock *gclobjtail2=NULL;
+int gclobjtailindex2=0;
+struct lobjpointerblock *gclobjspare=NULL;
+int gcnumlobjs = 0;
+
 enum GCPHASETYPE {
        MARKPHASE = 0x0,   // 0x0
        COMPACTPHASE,      // 0x1
@@ -76,21 +79,35 @@ int gcself_numsendobjs;
 int gcself_numreceiveobjs;
 
 // for load balancing
+INTPTR gcheaptop;
 int gcloads[NUMCORES];
-int gcreloads[NUMCORES];
-int gcdeltal[NUMCORES];
-int gcdeltar[NUMCORES];
+int gctopcore; // the core host the top of the heap
+bool gcheapdirection; // 0: decrease; 1: increase
 
 // compact instruction
-INTPTR markedptrbound;
-struct compactInstr * cinstruction;
-bool gctomove; // flag indicating if can start moving objects to other cores
+INTPTR gcmarkedptrbound;
+int gcstopblock; // indicate when to stop compact phase
+int gcnumblocks[NUMCORES]; // indicate how many blocks have been fulfilled
+// move instruction;
+INTPTR gcmovestartaddr;
+bool gctomove;
 
 // mapping of old address to new address
-struct RuntimeHash * pointertbl;
-int obj2map;
-int mappedobj;
-bool ismapped;
+struct RuntimeHash * gcpointertbl;
+int gcobj2map;
+int gcmappedobj;
+bool gcismapped;
+
+// table recording the starting address of each small block
+// (size is BAMBOO_SMEM_SIZE)
+// Note: 1. this table always resides on the very bottom of the shared memory
+//       2. the first two blocks are reserved for this table, would never be
+//          moved or garbage collected.
+INTPTR * gcsbstarttbl;
+int gcreservedsb;  // number of reserved sblock for sbstarttbl
+
+#define ISSHAREDOBJ(p) \
+       (((p)>BAMBOO_BASE_VA)&&((p)<BAMBOO_BASE_VA+BAMBOO_SHARED_MEM_SIZE))
 
 #define ALIGNSIZE(s, as) \
        (*((int*)as)) = s & (~BAMBOO_CACHE_LINE_MASK) + BAMBOO_CACHE_LINE_SIZE;
@@ -144,81 +161,53 @@ bool ismapped;
                (*((int*)o)) = (s - BAMBOO_SMEM_SIZE_L) % BAMBOO_SMEM_SIZE; \
        }
 
-#define BASEPTR(c, n, p) \
+#define BLOCKINDEX2(c, n, b) \
        int x; \
   int y; \
-       int b; \
-  if(c > 5) c += 2; \
+  int t; \
+       if(c > 5) c += 2; \
   x = c / bamboo_height; \
        y = c % bamboo_height; \
        if(n%2) { \
                if(y % 2) { \
-                       b = bamboo_width - 1 - x + (bamboo_width - 1 - y) * bamboo_width; \
+                       t = bamboo_width - 1 - x + (bamboo_width - 1 - y) * bamboo_width; \
                } else { \
-                       b = x + (bamboo_width - 1 - y) * bamboo_width; \
+                       t = x + (bamboo_width - 1 - y) * bamboo_width; \
                } \
                if(y>5) { \
-                       b--; \
+                       t--; \
                } else { \
-                       b -= 2; \
+                       t -= 2; \
                } \
-               b += NUMCORES * n; \
+               t += NUMCORES * n; \
        } else { \
                if(y % 2) { \
-                       b = bamboo_width - 1 - x + y * bamboo_width; \
+                       t = bamboo_width - 1 - x + y * bamboo_width; \
                } else { \
-                       b = x + y * bamboo_width; \
+                       t = x + y * bamboo_width; \
                } \
-               if(y>5) b--; \
-               b += NUMCORES * n; \
+               if(y>5) t--; \
+               t += NUMCORES * n; \
        } \
+  (*((int*)b)) = t;
+
+
+#define BASEPTR(c, n, p) \
+       int b; \
+  BLOCKINDEX2(c, n, &b); \
        if(b < NUMCORES) { \
                (*((int*)p)) = BAMBOO_BASE_VA + b * BAMBOO_SMEM_SIZE_L; \
        } else { \
                (*((int*)p)) = BAMBOO_BASE_VA + BAMBOO_LARGE_SMEM_BOUND + (b - NUMCORES) * BAMBOO_SMEM_SIZE; \
        } 
 
-#define LEFTNEIGHBOUR(n, c) \
-       int x; \
-  int y; \
-  if(n > 5) n += 2; \
-  x = n / bamboo_height; \
-       y = n % bamboo_height; \
-       if((0 == n) || (15 == n)) { \
-               (*((int*)c)) = -1; \
-       } else if(n < 5) { \
-               if( 0 == y % 2) { \
-                       (*((int*)c)) = y - 1; \
-               } else { \
-                       (*((int*)c)) = y + 1; \
-               } \
-       } else if(5 == n) { \
-               (*((int*)c)) = (x + 1) * bamboo_height + y + 1 - 2; \
-       } else if(14 == n) { \
-               (*((int*)c)) = 5; \
-       } else { \
-               (*((int*)c)) = (x - 1) * bamboo_height + y - 2; \
-       } 
-
-#define RIGHTNEIGHBOUR(n, c) \
-       int x; \
-  int y; \
-  if(n > 5) n += 2; \
-  x = n / bamboo_height; \
-       y = n % bamboo_height; \
-       if(n < 56) { \
-               (*((int*)c)) = (x + 1) * bamboo_height + y - 2; \
-       } else if( 0 == y % 2) { \
-               (*((int*)c)) = x * bamboo_height + y + 1 - 2; \
-       } else { \
-               (*((int*)c)) = x * bamboo_height + y - 1 - 2; \
-       } 
-
-void gc(struct garbagelist * stackptr); // core coordinator routine
-void gc_collect(struct garbagelist * stackptr); // core collector routine
-void transferMarkResults();
-void transferCompactStart(int corenum);
-void gc_enqueue(void *ptr);
+inline void gc(struct garbagelist * stackptr); // core coordinator routine
+inline void gc_collect(struct garbagelist* stackptr);//core collector routine
+inline void transferMarkResults();
+inline void transferCompactStart(int corenum);
+inline void gc_enqueue(void *ptr);
+inline void gc_lobjenqueue(void *ptr, int length);
+inline bool findSpareMem(int * startaddr, int * tomove, int requiredmem);
 
 #endif
 
index 4d13ba275763b369256df8b52e40499a31d181b9..15255eafdde134693187862c4613ffdc3ea47c38 100644 (file)
@@ -19,6 +19,19 @@ int outmsgleft;
 bool isMsgHanging;
 volatile bool isMsgSending;
 
+#define OUTMSG_INDEXINC() \
+       outmsgindex = (outmsgindex + 1) % BAMBOO_OUT_BUF_LENGTH;
+
+#define OUTMSG_LASTINDEXINC() \
+       outmsglast = (outmsglast + 1) % BAMBOO_OUT_BUF_LENGTH; \
+       if(outmsglast == outmsgindex) { \
+               BAMBOO_EXIT(0xb003); \
+       } 
+
+#define OUTMSG_CACHE(n) \
+       outmsgdata[outmsglast] = (n); \
+  OUTMSG_LASTINDEXINC(); 
+
 /* Message format:
  *      type + Msgbody
  * type: 0 -- transfer object
@@ -55,6 +68,7 @@ volatile bool isMsgSending;
  *      1d -- mapping info of a markedObj
  *      1e -- large objs info request
  *      1f -- large objs info response
+ *      20 -- large objs mapping info
  *
  * ObjMsg: 0 + size of msg + obj's address + (task index + param index)+
  * StallMsg: 1 + corenum + sendobjs + receiveobjs 
@@ -93,7 +107,10 @@ volatile bool isMsgSending;
  *        13 (size is always 1 * sizeof(int))
  *        14 + corenum + gcsendobjs + gcreceiveobjs    
  *           (size if always 4 * sizeof(int))
- *        15/16 + corenum 
+ *        15 + corenum + fulfilled blocks num + (finish compact(1) + current
+ *           heap top)/(need mem(0) + mem need) 
+ *           size is always 5 * sizeof(int))
+ *        16 + corenum 
  *              (size is always 2 * sizeof(int))
  *        17 (size is always 1 * sizeof(int))
  *        18 (size if always 1 * sizeof(int))
@@ -101,8 +118,8 @@ volatile bool isMsgSending;
  *           (size is always 5 * sizeof(int))
  *        1a + obj's address 
  *           (size is always 2 * sizeof(int))
- *        1b + corenum 
- *           ( size is always 2 * sizeof(int))
+ *        1b + corenum + start addr + end addr
+ *           (size if always 4 * sizeof(int))
  *        1c + obj's address + corenum 
  *           (size is always 3 * sizeof(int))
  *        1d + obj's address + dst address 
@@ -110,6 +127,8 @@ volatile bool isMsgSending;
  *        1e (size is always 1 * sizeof(int))
  *        1f + size of msg + corenum + current heap size 
  *           + (num of large obj lists + (start address + length)+)?
+ *        20 + orig large obj ptr + new large obj ptr 
+*            (size is always 3 * sizeof(int))
  */
 enum MSGTYPE {
        TRANSOBJ = 0x0,  // 0x0
@@ -145,6 +164,7 @@ enum MSGTYPE {
        GCMAPINFO,       // 0x1d
        GCLOBJREQUEST,   // 0x1e
        GCLOBJINFO,      // 0x1f
+       GCLOBJMAPPING,   // 0x20
 #endif
        MSGEND
 };
index 86b5de723560cd12fe58638808a755c44186b401..924f7021b8aa6aaf8525cfdf7c90ef16897bfde2 100644 (file)
@@ -35,9 +35,6 @@ inline void initruntimedata() {
                        gcnumsendobjs[i] = 0; 
       gcnumreceiveobjs[i] = 0;
                        gcloads[i] = 0;
-                       gcreloads[i] = 0;
-                       gcdeltal[i] = 0;
-                       gcdeltar[i] = 0;
 #endif
     } // for(i = 0; i < NUMCORES; ++i)
                numconfirm = 0;
@@ -76,13 +73,36 @@ inline void initruntimedata() {
        gcphase = FINISHPHASE;
        gcself_numsendobjs = 0;
        gcself_numreceiveobjs = 0;
-       markedptrbound = 0;
-       cinstruction = NULL;
-       gctomove = false; 
-       pointertbl = allocateRuntimeHash(20);
-       obj2map = 0;
-       mappedobj = 0;
-       ismapped = false;
+       gcmarkedptrbound = 0;
+       gcpointertbl = allocateRuntimeHash(20);
+       gcobj2map = 0;
+       gcmappedobj = 0;
+       gcismapped = false;
+       gcnumlobjs = 0;
+       gcheaptop = 0;
+       gctopcore = 0;
+       gcheapdirection = 1;
+       gcstopblock = 0;
+       gcreservedsb = 0;
+       gcmovestartaddr = 0;
+       gctomove = false;
+       gcstopblock = 0;
+
+       // initialize queue
+       if (gchead==NULL) {
+               gcheadindex=0;
+               gctailindex=0;
+               gctailindex2 = 0;
+               gchead=gctail=gctail2=malloc(sizeof(struct pointerblock));
+       }
+       // initialize the large obj queues
+       if (gclobjhead==NULL) {
+               gclobjheadindex=0;
+               gclobjtailindex=0;
+               gclobjtailindex2 = 0;
+               gclobjhead=gclobjtail=gclobjtail2=
+                       malloc(sizeof(struct lobjpointerblock));
+       }
 #else
        // create the lock table, lockresult table and obj queue
   locktable.size = 20;
@@ -119,7 +139,7 @@ inline void initruntimedata() {
 
 inline void disruntimedata() {
 #ifdef MULTICORE_GC
-       freeRuntimeHash(pointertbl);
+       freeRuntimeHash(gcpointertbl);
 #else
        freeRuntimeHash(lockRedirectTbl);
        freeRuntimeHash(objRedirectLockTbl);
@@ -129,139 +149,140 @@ inline void disruntimedata() {
        RUNFREE(currtpd);
 }
 
-bool checkObjQueue(void * sendStall) {
-       int tocontinue = false;
+bool checkObjQueue() {
+       bool rflag = false;
        struct transObjInfo * objInfo = NULL;
        int grount = 0;
+
 #ifdef PROFILE
 #ifdef ACCURATEPROFILE
-{
-               bool isChecking = false;
-               if(!isEmpty(&objqueue)) {
-                       profileTaskStart("objqueue checking");
-                       isChecking = true;
-               }
+       bool isChecking = false;
+       if(!isEmpty(&objqueue)) {
+               profileTaskStart("objqueue checking");
+               isChecking = true;
+       } // if(!isEmpty(&objqueue))
 #endif
 #endif
-               while(!isEmpty(&objqueue)) {
-                       void * obj = NULL;
-                       BAMBOO_START_CRITICAL_SECTION_OBJ_QUEUE();
+
+       while(!isEmpty(&objqueue)) {
+               void * obj = NULL;
+               BAMBOO_START_CRITICAL_SECTION_OBJ_QUEUE();
 #ifdef DEBUG
-                       BAMBOO_DEBUGPRINT(0xf001);
+               BAMBOO_DEBUGPRINT(0xf001);
 #endif
 #ifdef PROFILE
-                       //isInterrupt = false;
+               //isInterrupt = false;
 #endif 
 #ifdef DEBUG
-                       BAMBOO_DEBUGPRINT(0xeee1);
+               BAMBOO_DEBUGPRINT(0xeee1);
 #endif
-                       (*((bool *)sendStall)) = false;
-                       tocontinue = true;
-                       objInfo = (struct transObjInfo *)getItem(&objqueue); 
-                       obj = objInfo->objptr;
+               rflag = true;
+               objInfo = (struct transObjInfo *)getItem(&objqueue); 
+               obj = objInfo->objptr;
 #ifdef DEBUG
-                       BAMBOO_DEBUGPRINT_REG((int)obj);
+               BAMBOO_DEBUGPRINT_REG((int)obj);
 #endif
-                       // grab lock and flush the obj
-                       grount = 0;
-                       getwritelock_I(obj);
-                       while(!lockflag) {
-                               BAMBOO_WAITING_FOR_LOCK();
-                       }
-                       grount = lockresult;
+               // grab lock and flush the obj
+               grount = 0;
+               getwritelock_I(obj);
+               while(!lockflag) {
+                       BAMBOO_WAITING_FOR_LOCK();
+               } // while(!lockflag)
+               grount = lockresult;
 #ifdef DEBUG
-                       BAMBOO_DEBUGPRINT_REG(grount);
+               BAMBOO_DEBUGPRINT_REG(grount);
 #endif
 
-                       lockresult = 0;
-                       lockobj = 0;
-                       lock2require = 0;
-                       lockflag = false;
+               lockresult = 0;
+               lockobj = 0;
+               lock2require = 0;
+               lockflag = false;
 #ifndef INTERRUPT
-                       reside = false;
+               reside = false;
 #endif
 
-                       if(grount == 1) {
-                               int k = 0;
-                               // flush the object
+               if(grount == 1) {
+                       int k = 0;
+                       // flush the object
 #ifdef CACHEFLUSH
-                               BAMBOO_CACHE_FLUSH_RANGE((int)obj,sizeof(int));
-                               BAMBOO_CACHE_FLUSH_RANGE((int)obj, 
-                                               classsize[((struct ___Object___ *)obj)->type]);
-#endif
-                               // enqueue the object
-                               for(k = 0; k < objInfo->length; ++k) {
-                                       int taskindex = objInfo->queues[2 * k];
-                                       int paramindex = objInfo->queues[2 * k + 1];
-                                       struct parameterwrapper ** queues = 
-                                               &(paramqueues[BAMBOO_NUM_OF_CORE][taskindex][paramindex]);
-#ifdef DEBUG
-                                       BAMBOO_DEBUGPRINT_REG(taskindex);
-                                       BAMBOO_DEBUGPRINT_REG(paramindex);
-                                       struct ___Object___ * tmpptr = (struct ___Object___ *)obj;
-                                       tprintf("Process %x(%d): receive obj %x(%lld), ptrflag %x\n", 
-                                                           BAMBOO_NUM_OF_CORE, BAMBOO_NUM_OF_CORE, (int)obj, 
-                                                                       (long)obj, tmpptr->flag);
-#endif
-                                       enqueueObject_I(obj, queues, 1);
+                       BAMBOO_CACHE_FLUSH_RANGE((int)obj,sizeof(int));
+                       BAMBOO_CACHE_FLUSH_RANGE((int)obj, 
+                                       classsize[((struct ___Object___ *)obj)->type]);
+#endif
+                       // enqueue the object
+                       for(k = 0; k < objInfo->length; ++k) {
+                               int taskindex = objInfo->queues[2 * k];
+                               int paramindex = objInfo->queues[2 * k + 1];
+                               struct parameterwrapper ** queues = 
+                                       &(paramqueues[BAMBOO_NUM_OF_CORE][taskindex][paramindex]);
+#ifdef DEBUG
+                               BAMBOO_DEBUGPRINT_REG(taskindex);
+                               BAMBOO_DEBUGPRINT_REG(paramindex);
+                               struct ___Object___ * tmpptr = (struct ___Object___ *)obj;
+                               tprintf("Process %x(%d): receive obj %x(%lld), ptrflag %x\n", 
+                                                               BAMBOO_NUM_OF_CORE, BAMBOO_NUM_OF_CORE, (int)obj, 
+                                                               (long)obj, tmpptr->flag);
+#endif
+                               enqueueObject_I(obj, queues, 1);
 #ifdef DEBUG                            
-                                       BAMBOO_DEBUGPRINT_REG(hashsize(activetasks));
+                               BAMBOO_DEBUGPRINT_REG(hashsize(activetasks));
 #endif
-                               }
-                               releasewritelock_I(obj);
-                               RUNFREE(objInfo->queues);
-                               RUNFREE(objInfo);
-                       } else {
-                               // can not get lock
-                               // put it at the end of the queue if no update version in the queue
-                               struct QueueItem * qitem = getHead(&objqueue);
-                               struct QueueItem * prev = NULL;
-                               while(qitem != NULL) {
-                                       struct transObjInfo * tmpinfo = 
-                                               (struct transObjInfo *)(qitem->objectptr);
-                                       if(tmpinfo->objptr == obj) {
-                                               // the same object in the queue, which should be enqueued
-                                               // recently. Current one is outdate, do not re-enqueue it
-                                               RUNFREE(objInfo->queues);
-                                               RUNFREE(objInfo);
-                                               goto objqueuebreak;
-                                       } else {
-                                               prev = qitem;
-                                       } // if(tmpinfo->objptr == obj)
-                                       qitem = getNextQueueItem(prev);
-                               } // while(qitem != NULL)
-                               // try to execute active tasks already enqueued first
-                               addNewItem_I(&objqueue, objInfo);
+                       } // for(k = 0; k < objInfo->length; ++k)
+                       releasewritelock_I(obj);
+                       RUNFREE(objInfo->queues);
+                       RUNFREE(objInfo);
+               } else {
+                       // can not get lock
+                       // put it at the end of the queue if no update version in the queue
+                       struct QueueItem * qitem = getHead(&objqueue);
+                       struct QueueItem * prev = NULL;
+                       while(qitem != NULL) {
+                               struct transObjInfo * tmpinfo = 
+                                       (struct transObjInfo *)(qitem->objectptr);
+                               if(tmpinfo->objptr == obj) {
+                                       // the same object in the queue, which should be enqueued
+                                       // recently. Current one is outdate, do not re-enqueue it
+                                       RUNFREE(objInfo->queues);
+                                       RUNFREE(objInfo);
+                                       goto objqueuebreak;
+                               } else {
+                                       prev = qitem;
+                               } // if(tmpinfo->objptr == obj)
+                               qitem = getNextQueueItem(prev);
+                       } // while(qitem != NULL)
+                       // try to execute active tasks already enqueued first
+                       addNewItem_I(&objqueue, objInfo);
 #ifdef PROFILE
-                               //isInterrupt = true;
+                       //isInterrupt = true;
 #endif
 objqueuebreak:
-                               BAMBOO_CLOSE_CRITICAL_SECTION_OBJ_QUEUE();
-#ifdef DEBUG
-                               BAMBOO_DEBUGPRINT(0xf000);
-#endif
-                               break;
-                       } // if(grount == 1)
                        BAMBOO_CLOSE_CRITICAL_SECTION_OBJ_QUEUE();
 #ifdef DEBUG
                        BAMBOO_DEBUGPRINT(0xf000);
 #endif
-               } // while(!isEmpty(&objqueue))
+                       break;
+               } // if(grount == 1)
+               BAMBOO_CLOSE_CRITICAL_SECTION_OBJ_QUEUE();
+#ifdef DEBUG
+               BAMBOO_DEBUGPRINT(0xf000);
+#endif
+       } // while(!isEmpty(&objqueue))
+
 #ifdef PROFILE
 #ifdef ACCURATEPROFILE
-               if(isChecking) {
+       if(isChecking) {
                profileTaskEnd();
-       }
-}
+       } // if(isChecking)
 #endif
 #endif
+
 #ifdef DEBUG
        BAMBOO_DEBUGPRINT(0xee02);
 #endif
-       return tocontinue;
+       return rflag;
 }
 
-void checkCoreStatue() {
+inline void checkCoreStatue() {
        bool allStall = false;
        int i = 0;
        int sumsendobj = 0;
@@ -451,7 +472,7 @@ inline void run(void * arg) {
 #ifdef PROFILE
     //isInterrupt = false;
 #endif
-       fakeExecution();
+               fakeExecution();
   } else {
          /* Create queue of active tasks */
          activetasks=
@@ -1131,13 +1152,11 @@ inline void addNewObjInfo(void * nobj) {
 
 void * smemalloc(int size, 
                             int * allocsize) {
-       // TODO can not handle large obj which is bigger than a block
 #ifdef MULTICORE_GC
        // go through free mem list for suitable blocks
        struct freeMemItem * freemem = bamboo_free_mem_list->head;
        struct freeMemItem * prev = NULL;
        do {
-smemsearch:
                if(freemem->size > size) {
                        // found one
                        break;
@@ -1147,31 +1166,27 @@ smemsearch:
        } while(freemem != NULL);
        if(freemem != NULL) {
                void * mem = (void *)(freemem->ptr);
+               *allocsize = size;
+               freemem->ptr += size;
+               freemem->size -= size;
+               // check how many blocks it acrosses
                int b = 0;
                BLOCKINDEX(mem, &b);
                // check the remaining space in this block
-               int remain = BAMBOO_SMEM_SIZE_L+b*BAMBOO_SMEM_SIZE-(mem-BAMBOO_BASE_VA);
-               // TODO how about large objs?
+               int remain = (b < NUMCORES? (b+1)*BAMBOO_SMEM_SIZE_L  
+                                       : BAMBOO_LARGE_SMEM_BOUND+(b-NUMCORES+1)*BAMBOO_SMEM_SIZE)
+                                 -(mem-BAMBOO_BASE_VA);
                if(remain < size) {
-                       // not enough space in this block
-                       struct freeMemItem * tmp = 
-                               (struct freeMemItem*)RUNMALLOC(sizeof(struct freeMemItem));
-                       tmp->ptr = mem;
-                       tmp->size = remain;
-                       tmp->next = freemem;
-                       if(bamboo_free_mem_list->head == freemem) {
-                               bamboo_free_mem_list->head = tmp;
+                       // this object acrosses blocks
+                       int tmpsbs = 1+(size-remain-1)/BAMBOO_SMEM_SIZE;
+                       for(int k = 0; k < tmpsbs-1; k++) {
+                               sbstarttbl[k+b] = (INTPTR)(-1);
+                       }
+                       if((size-remain)%BAMBOO_SMEM_SIZE == 0) {
+                               sbstarttbl[b+tmpsbs-1] = (INTPTR)(-1);
                        } else {
-                               prev->next = tmp;
+                               sbstarttbl[b+tmpsbs-1] = (INTPTR)(mem+size);
                        }
-                       freemem->ptr += size;
-                       freemem->size -= size;
-                       // continue checking
-                       goto smemsearch;
-               } else {
-                       *allocsize = size;
-                       freemem->ptr += size;
-                       freemem->size -= size;
                }
        } else {
 #else
@@ -1183,6 +1198,7 @@ smemsearch:
                *allocsize = 0;
 #ifdef MULTICORE_GC
                gcflag = true;
+               gcrequiredmem = size;
                return NULL;
 #else
                BAMBOO_DEBUGPRINT(0xa016);
@@ -1217,9 +1233,7 @@ msg:
   if(msgdataindex == msglength) {
     // received a whole msg
     MSGTYPE type; 
-               int data1;             // will receive at least 2 words including type
     type = msgdata[0];
-    data1 = msgdata[1];
     switch(type) {
     case TRANSOBJ: {
       // receive a object transfer msg
@@ -1238,7 +1252,7 @@ msg:
                                BAMBOO_EXIT(0xa005);
                        } 
       // store the object and its corresponding queue info, enqueue it later
-      transObj->objptr = (void *)msgdata[2]; // data1 is now size of the msg
+      transObj->objptr = (void *)msgdata[2]; 
       transObj->length = (msglength - 3) / 2;
       transObj->queues = RUNMALLOC_I(sizeof(int)*(msglength - 3));
       for(k = 0; k < transObj->length; ++k) {
@@ -1286,19 +1300,19 @@ msg:
       if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
                  // non startup core can not receive stall msg
 #ifndef TILERA
-                               BAMBOO_DEBUGPRINT_REG(data1);
+                               BAMBOO_DEBUGPRINT_REG(msgdata[1]);
 #endif
                                BAMBOO_EXIT(0xa006);
       } 
-      if(data1 < NUMCORES) {
+      if(msgdata[1] < NUMCORES) {
 #ifdef DEBUG
 #ifndef TILERA
                                BAMBOO_DEBUGPRINT(0xe881);
 #endif
 #endif
-                               corestatus[data1] = 0;
-                               numsendobjs[data1] = msgdata[2];
-                               numreceiveobjs[data1] = msgdata[3];
+                               corestatus[msgdata[1]] = 0;
+                               numsendobjs[msgdata[1]] = msgdata[2];
+                               numreceiveobjs[msgdata[1]] = msgdata[3];
       }
       break;
     }
@@ -1308,12 +1322,13 @@ msg:
     case LOCKREQUEST: {
       // receive lock request msg, handle it right now
       // check to see if there is a lock exist for the required obj
-                       // data1 -> lock type
+                       // msgdata[1] -> lock type
                        int data2 = msgdata[2]; // obj pointer
       int data3 = msgdata[3]; // lock
                        int data4 = msgdata[4]; // request core
                        // -1: redirected, 0: approved, 1: denied
-      deny = processlockrequest(data1, data3, data2, data4, data4, true);  
+      deny = processlockrequest(msgdata[1], data3, data2, 
+                                                             data4, data4, true);  
                        if(deny == -1) {
                                // this lock request is redirected
                                break;
@@ -1322,9 +1337,9 @@ msg:
                                // for 32 bit machine, the size is always 4 words
                                int tmp = deny==1?LOCKDENY:LOCKGROUNT;
                                if(isMsgSending) {
-                                       cache_msg_4(data4, tmp, data1, data2, data3);
+                                       cache_msg_4(data4, tmp, msgdata[1], data2, data3);
                                } else {
-                                       send_msg_4(data4, tmp, data1, data2, data3);
+                                       send_msg_4(data4, tmp, msgdata[1], data2, data3);
                                }
                        }
       break;
@@ -1390,7 +1405,7 @@ msg:
 
     case LOCKRELEASE: {
       // receive lock release msg
-                       processlockrelease(data1, msgdata[2], 0, false);
+                       processlockrelease(msgdata[1], msgdata[2], 0, false);
       break;
     }
 #endif
@@ -1408,7 +1423,7 @@ msg:
 #endif
 #endif
                        stall = true;
-                       totalexetime = data1;
+                       totalexetime = msgdata[1];
                        outputProfileData();
                        if(isMsgSending) {
                                cache_msg_2(STARTUPCORE, PROFILEFINISH, BAMBOO_NUM_OF_CORE);
@@ -1423,7 +1438,7 @@ msg:
       if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
                                // non startup core can not receive profile output finish msg
 #ifndef TILERA
-                               BAMBOO_DEBUGPRINT_REG(data1);
+                               BAMBOO_DEBUGPRINT_REG(msgdata[1]);
 #endif
                                BAMBOO_EXIT(0xa00d);
       }
@@ -1432,7 +1447,7 @@ msg:
                        BAMBOO_DEBUGPRINT(0xe886);
 #endif
 #endif
-                       profilestatus[data1] = 0;
+                       profilestatus[msgdata[1]] = 0;
       break;
     }
 #endif
@@ -1442,12 +1457,12 @@ msg:
        case REDIRECTLOCK: {
          // receive a redirect lock request msg, handle it right now
                // check to see if there is a lock exist for the required obj
-         // data1 -> lock type
+         int data1 = msgdata[1]; // lock type
          int data2 = msgdata[2]; // obj pointer
                int data3 = msgdata[3]; // redirect lock
          int data4 = msgdata[4]; // root request core
          int data5 = msgdata[5]; // request core
-         deny = processlockrequest(data1, data3, data2, data5, data4, true);
+         deny = processlockrequest(msgdata[1], data3, data2, data5, data4, true);
          if(deny == -1) {
                  // this lock request is redirected
                  break;
@@ -1526,7 +1541,7 @@ msg:
 
        case REDIRECTRELEASE: {
          // receive a lock release msg with redirect info
-               processlockrelease(data1, msgdata[2], msgdata[3], true);
+               processlockrelease(msgdata[1], msgdata[2], msgdata[3], true);
                break;
        }
 #endif
@@ -1575,8 +1590,8 @@ msg:
                          numconfirm--;
                  }
                  corestatus[msgdata[2]] = msgdata[1];
-                       numsendobjs[data1] = msgdata[2];
-                       numreceiveobjs[data1] = msgdata[3];
+                       numsendobjs[msgdata[1]] = msgdata[2];
+                       numreceiveobjs[msgdata[1]] = msgdata[3];
                }
          break;
        }
@@ -1636,10 +1651,10 @@ msg:
 #endif
 #endif
 #ifdef MULTICORE_GC
-                       if(gcprocessing) {
-                               // is currently doing gc, dump this msg
-                               break;
-                       }
+               if(gcprocessing) {
+                       // is currently doing gc, dump this msg
+                       break;
+               }
 #endif
          if(msgdata[2] == 0) {
                  bamboo_smem_size = 0;
@@ -1655,6 +1670,7 @@ msg:
                                create_mspace_with_base((void*)(msgdata[1]+BAMBOO_CACHE_LINE_SIZE),
                                                         msgdata[2]-BAMBOO_CACHE_LINE_SIZE, 
                                                                                                                                 0);
+#endif
          }
          smemflag = true;
          break;
@@ -1684,43 +1700,7 @@ msg:
 
        case GCSTARTCOMPACT: {
                // a compact phase start msg
-               if(cinstruction == NULL) {
-                       cinstruction = 
-                               (struct compactInstr *)RUNMALLOC(sizeof(struct compactInstr));
-               } else {
-                       // clean up out of date info
-                       cinstruction->movenum = 0;
-               }
-               cinstruction->loads = msgdata[2];
-               if(data1 > 3) {
-                       // have objs to move etc.
-                       int startindex = 3;
-                       // process objs to move
-                       cinstruction->movenum = msgdata[startindex++];
-                       cinstruction->ismove = msgdata[startindex++];
-                       for(i = 0; i < cinstruction->movenum; i++) {
-                               cinstruction->size2move[i] = msgdata[startindex++];
-                               cinstruction->dsts[i] = msgdata[startindex++];
-                               cinstruction->moveflag[i] = 0;
-                               cinstruction->startaddrs[i] = 0;
-                               cinstruction->endaddrs[i] = 0;
-                       }
-                       // TODO
-                       /*// process large objs
-                       num = msgdata[startindex++];
-                       for(i = 0; i < num; i++) {
-                               struct largeObjItem * loi = 
-                                       (struct largeObjItem *)RUNMALLOC(sizeof(struct largeObjItem ));
-                               loi->orig = msgdata[startindex++];
-                               loi->length = msgdata[startindex++];
-                               loi->dst = msgdata[startindex++];
-                               loi->next = NULL;
-                               if(i > 0) {
-                                       cinstruction->largeobjs->next = loi;
-                               }
-                               cinstruction->largeobjs = loi;
-                       }*/
-               }
+               gcstopblock = msgdata[1];
                gcphase = COMPACTPHASE;
                break;
        }
@@ -1736,14 +1716,14 @@ msg:
                if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
                  // non startup core can not receive this msg
 #ifndef TILERA
-                 BAMBOO_DEBUGPRINT_REG(data1);
+                 BAMBOO_DEBUGPRINT_REG(msgdata[1]);
 #endif
                  BAMBOO_EXIT(0xb006);
                } 
-               if(data1 < NUMCORES) {
-                       gccorestatus[data1] = 0;
-                       gcnumsendobjs[data1] = gcmsgdata[2];
-                       gcnumreceiveobjs[data1] = gcmsgdata[3];
+               if(msgdata[1] < NUMCORES) {
+                       gccorestatus[msgdata[1]] = 0;
+                       gcnumsendobjs[msgdata[1]] = gcmsgdata[2];
+                       gcnumreceiveobjs[msgdata[1]] = gcmsgdata[3];
                }
          break;
        }
@@ -1754,13 +1734,25 @@ msg:
                  // non startup core can not receive this msg
                  // return -1
 #ifndef TILERA
-                 BAMBOO_DEBUGPRINT_REG(data1);
+                 BAMBOO_DEBUGPRINT_REG(msgdata[1]);
 #endif
                  BAMBOO_EXIT(0xb006);
                } 
-               if(data1 < NUMCORES) {
-                 gccorestatus[data1] = 0;
-                       gcloads[data1] = msgdata[2];
+               if(msgdata[1] < NUMCORES) {
+                       gcnumblocks[msgdata[1]] = msgdata[2];
+                       if(msgdata[3] == 0) {
+                               // ask for more mem
+                               int startaddr = 0;
+                               int tomove = 0;
+                               if(findSpareMem(&startaddr, &tomove, msgdata[2])) {
+                                       send_msg_4(msgdata[1], GCMOVESTART, k, startaddr, tomove);
+                               } else {
+                                       // TODO if not success
+                               }
+                       } else {
+                               gccorestatus[msgdata[1]] = 0;
+                               gcloads[msgdata[1]] = msgdata[4];
+                       }
                }
          break;
        }
@@ -1771,12 +1763,12 @@ msg:
                  // non startup core can not receive this msg
                  // return -1
 #ifndef TILERA
-                 BAMBOO_DEBUGPRINT_REG(data1);
+                 BAMBOO_DEBUGPRINT_REG(msgdata[1]);
 #endif
                  BAMBOO_EXIT(0xb006);
                } 
-               if(data1 < NUMCORES) {
-                 gccorestatus[data1] = 0;
+               if(msgdata[1] < NUMCORES) {
+                 gccorestatus[msgdata[1]] = 0;
                }
          break;
        }
@@ -1819,16 +1811,16 @@ msg:
                  if(waitconfirm) {
                          numconfirm--;
                  }
-                 gccorestatus[data1] = gcmsgdata[2];
-                 gcnumsendobjs[data1] = gcmsgdata[3];
-                 gcnumreceiveobjs[data1] = gcmsgdata[4];
+                 gccorestatus[msgdata[1]] = gcmsgdata[2];
+                 gcnumsendobjs[msgdata[1]] = gcmsgdata[3];
+                 gcnumreceiveobjs[msgdata[1]] = gcmsgdata[4];
                }
          break;
        }
 
        case GCMARKEDOBJ: {
                // received a markedObj msg
-               gc_enqueue(data1);
+               gc_enqueue(msgdata[1]);
                gcself_numreceiveobjs++;
                gcbusystatus = true;
                break;
@@ -1836,35 +1828,25 @@ msg:
 
        case GCMOVESTART: {
                // received a start moving objs msg
-               if(cinstruction == NULL) {
-                       // something is wrong
-                       BAMBOO_EXIT(0xa023);
-               }
-               for(i = 0; i < cinstruction->movenum; i++) {
-                       if(cinstruction->dsts[i] == data1) {
-                               // set the flag to indicate the core is ready to accept objs
-                               cinstruction->moveflag[i] = 1;
-                               cinstruction->startaddrs[i] = msgdata[2];
-                               cinstruction->endaddrs[i] = msgdata[3];
-                       }
-               }
-               tomove = true;
+               gctomove = true;
+               gcmovestartaddr = msgdata[2];
+               gcstopblock = msgdata[3];
                break;
        }
        
        case GCMAPREQUEST: {
                // received a mapping info request msg
                void * dstptr = NULL;
-               RuntimeHashget(pointertbl, data1, &dstptr);
+               RuntimeHashget(gcpointertbl, msgdata[1], &dstptr);
                if(NULL == dstptr) {
                        // no such pointer in this core, something is wrong
                        BAMBOO_EXIT(0xb008);
                } else {
                        // send back the mapping info
                        if(isMsgSending) {
-                               cache_msg_3(msgdata[2], GCMAPINFO, data1, (int)dstptr);
+                               cache_msg_3(msgdata[2], GCMAPINFO, msgdata[1], (int)dstptr);
                        } else {
-                               send_msg_3(msgdata[2], GCMAPINFO,data1, (int)dstptr);
+                               send_msg_3(msgdata[2], GCMAPINFO, msgdata[1], (int)dstptr);
                        }
                }
                break;
@@ -1872,14 +1854,14 @@ msg:
 
        case GCMAPINFO: {
                // received a mapping info response msg
-               if(data1 != obj2map) {
+               if(msgdata[1] != gcobj2map) {
                        // obj not matched, something is wrong
                        BAMBOO_EXIT(0xb009);
                } else {
-                       mappedobj = msgdata[2];
-                       RuntimeHashadd(pointertbl, obj2map, mappedobj);
+                       gcmappedobj = msgdata[2];
+                       RuntimeHashadd(gcpointertbl, gcobj2map, gcmappedobj);
                }
-               ismapped = true;
+               gcismapped = true;
                break;
        }
 
@@ -1891,7 +1873,7 @@ msg:
 
        case GCLOBJINFO: {
                // received a large objs info response msg
-               gcwaitconfirm--;
+               waitconfirm--;
 
                if(BAMBOO_NUM_OF_CORE > NUMCORES - 1) {
 #ifndef TILERA
@@ -1902,9 +1884,22 @@ msg:
                // store the mark result info 
                int cnum = msgdata[2];
                gcloads[cnum] = msgdata[3];
-               // TODO large obj info here
+               if(gcheaptop < msgdata[4]) {
+                       gcheaptop = msgdata[4];
+               }
+               // large obj info here
+         for(int k = 5; k < msgdata[1];) {
+                       gc_lobjenqueue(msgdata[k++], msgdata[k++], cnum, NULL);
+               } // for(int k = 5; k < msgdata[1];)
                break;
        }
+       
+       case GCLOBJMAPPING: {
+               // received a large obj mapping info msg
+               RuntimeHashadd(gcpointertbl, msgdata[1], msgdata[2]);
+               break;
+       }
+
 #endif
 
        default:
@@ -1929,7 +1924,7 @@ msg:
                profileTaskEnd();
        }*/
 #endif
-       return type;
+       return (int)type;
 } else {
        // not a whole msg
 #ifdef DEBUG
@@ -1946,7 +1941,6 @@ msg:
   }
 }
 
-
 int enqueuetasks(struct parameterwrapper *parameter, 
                             struct parameterwrapper *prevptr, 
                                                                 struct ___Object___ *ptr,