From 43ca28489f56813a8184ff602bb8e8f0e507c40c Mon Sep 17 00:00:00 2001
From: jzhou <jzhou>
Date: Tue, 25 Aug 2009 01:26:55 +0000
Subject: [PATCH] bug fix in multicore gc

---
 Robust/src/IR/Flat/BuildCode.java          |  12 +-
 Robust/src/IR/Flat/BuildCodeMultiCore.java |  12 +-
 Robust/src/Runtime/ObjectHash.c            |   2 +-
 Robust/src/Runtime/SimpleHash.c            |   2 +-
 Robust/src/Runtime/mem.c                   |   9 +-
 Robust/src/Runtime/mem.h                   |   1 +
 Robust/src/Runtime/multicoregarbage.c      | 354 +++++++++++----------
 Robust/src/Runtime/multicoregarbage.h      | 240 +++++++-------
 Robust/src/Runtime/multicoregc.h           |  16 +
 Robust/src/Runtime/multicoreruntime.c      |   6 +
 Robust/src/Runtime/multicoreruntime.h      |  30 +-
 Robust/src/Runtime/multicoretask.c         | 137 ++++----
 Robust/src/Runtime/object.c                |   2 +-
 Robust/src/Runtime/runtime.h               |   5 +
 Robust/src/buildscript                     |   1 +
 15 files changed, 453 insertions(+), 376 deletions(-)
 create mode 100644 Robust/src/Runtime/multicoregc.h
diff --git a/Robust/src/IR/Flat/BuildCode.java b/Robust/src/IR/Flat/BuildCode.java
index 0471d35f..0a0f7c42 100644
--- a/Robust/src/IR/Flat/BuildCode.java
+++ b/Robust/src/IR/Flat/BuildCode.java
@@ -3207,7 +3207,7 @@ public class BuildCode {
       needcomma=true;
     }
 
-    if (!GENERATEPRECISEGC) {
+    if (!GENERATEPRECISEGC && !this.state.MULTICOREGC) {
       if (fc.getThis()!=null) {
 	TypeDescriptor ptd=md.getThis().getType();
 	if (needcomma)
@@ -3380,7 +3380,7 @@ public class BuildCode {
 	/* Link object into list */
 	String revertptr=generateTemp(fm, reverttable.get(lb),lb);
 	output.println(revertptr+"=revertlist;");
-	if (GENERATEPRECISEGC)
+	if (GENERATEPRECISEGC || this.state.MULTICOREGC)
 	  output.println("COPY_OBJ((struct garbagelist *)"+localsprefixaddr+",(struct ___Object___ *)"+dst+");");
 	else
 	  output.println("COPY_OBJ("+dst+");");
@@ -3408,7 +3408,7 @@ public class BuildCode {
 	String dst=generateTemp(fm, fsfn.getDst(),lb);
 	output.println("if(!"+dst+"->"+localcopystr+") {");
 	/* Link object into list */
-	if (GENERATEPRECISEGC)
+	if (GENERATEPRECISEGC || this.state.MULTICOREGC)
 	  output.println("COPY_OBJ((struct garbagelist *)"+localsprefixaddr+",(struct ___Object___ *)"+dst+");");
 	else
 	  output.println("COPY_OBJ("+dst+");");
@@ -3528,7 +3528,7 @@ public class BuildCode {
 	/* Link object into list */
 	String revertptr=generateTemp(fm, reverttable.get(lb),lb);
 	output.println(revertptr+"=revertlist;");
-	if ((GENERATEPRECISEGC))
+	if ((GENERATEPRECISEGC) || this.state.MULTICOREGC)
         output.println("COPY_OBJ((struct garbagelist *)"+localsprefixaddr+",(struct ___Object___ *)"+dst+");");
 	else
 	  output.println("COPY_OBJ("+dst+");");
@@ -3550,7 +3550,7 @@ public class BuildCode {
 	String dst=generateTemp(fm, fsen.getDst(),lb);
 	output.println("if(!"+dst+"->"+localcopystr+") {");
 	/* Link object into list */
-	if (GENERATEPRECISEGC)
+	if (GENERATEPRECISEGC || this.state.MULTICOREGC)
 	  output.println("COPY_OBJ((struct garbagelist *)"+localsprefixaddr+",(struct ___Object___ *)"+dst+");");
 	else
 	  output.println("COPY_OBJ("+dst+");");
@@ -3792,7 +3792,7 @@ public class BuildCode {
 	  output.print(temp.getType().getSafeSymbol()+" "+temp.getSafeSymbol());
       }
       output.println(") {");
-    } else if (!GENERATEPRECISEGC) {
+    } else if (!GENERATEPRECISEGC && !this.state.MULTICOREGC) {
       /* Imprecise Task */
       output.println("void * parameterarray[]) {");
       /* Unpack variables */
diff --git a/Robust/src/IR/Flat/BuildCodeMultiCore.java b/Robust/src/IR/Flat/BuildCodeMultiCore.java
index f1ba3a12..02eb9191 100644
--- a/Robust/src/IR/Flat/BuildCodeMultiCore.java
+++ b/Robust/src/IR/Flat/BuildCodeMultiCore.java
@@ -587,12 +587,7 @@ public class BuildCodeMultiCore extends BuildCode {
 
     //ParamsObject objectparams=(ParamsObject)paramstable.get(lb!=null?lb:task);
     generateTaskHeader(fm, lb, task,output);
-    // output code to check if need to do gc
-    if(state.MULTICOREGC) {
-      output.println("#ifdef MULTICORE_GC");
-      output.println("gc();");
-      output.println("#endif");
-    }
+
     TempObject objecttemp=(TempObject) tempstable.get(lb!=null ? lb : task);
     /*if (state.DSM&&lb.getHasAtomic()) {
         output.println("transrecord_t * trans;");
@@ -631,6 +626,9 @@ public class BuildCodeMultiCore extends BuildCode {
 
     /* Check to see if we need to do a GC if this is a
      * multi-threaded program...*/
+    if(this.state.MULTICOREGC) {
+      output.println("if(gcflag) gc("+localsprefixaddr+");");
+    }
 
     /*if ((state.THREAD||state.DSM)&&GENERATEPRECISEGC) {
         if (state.DSM&&lb.isAtomic())
@@ -919,7 +917,7 @@ public class BuildCodeMultiCore extends BuildCode {
         printcomma=true;
        }*/
 
-    if (!GENERATEPRECISEGC) {
+    if (!GENERATEPRECISEGC && !this.state.MULTICOREGC) {
       /* Imprecise Task */
       output.println("void * parameterarray[]) {");
       /* Unpack variables */
diff --git a/Robust/src/Runtime/ObjectHash.c b/Robust/src/Runtime/ObjectHash.c
index 8312857d..7c8ceaaa 100755
--- a/Robust/src/Runtime/ObjectHash.c
+++ b/Robust/src/Runtime/ObjectHash.c
@@ -25,7 +25,7 @@ struct ObjectHash * allocateObjectHash(int size) {
   struct ObjectHash *thisvar;  //=(struct ObjectHash *)RUNMALLOC(sizeof(struct ObjectHash));
   if (size <= 0) {
 #ifdef MULTICORE
-    BAMBOO_EXIT(0xc001);
+    BAMBOO_EXIT(0xf001);
 #else
     printf("Negative Hashtable size Exception\n");
     exit(-1);
diff --git a/Robust/src/Runtime/SimpleHash.c b/Robust/src/Runtime/SimpleHash.c
index 5f872e06..ea00ee5b 100755
--- a/Robust/src/Runtime/SimpleHash.c
+++ b/Robust/src/Runtime/SimpleHash.c
@@ -25,7 +25,7 @@ struct RuntimeHash * allocateRuntimeHash(int size) {
   struct RuntimeHash *thisvar;  //=(struct RuntimeHash *)RUNMALLOC(sizeof(struct RuntimeHash));
   if (size <= 0) {
 #ifdef MULTICORE
-    BAMBOO_EXIT(0xb001);
+    BAMBOO_EXIT(0xf101);
 #else
     printf("Negative Hashtable size Exception\n");
     exit(-1);
diff --git a/Robust/src/Runtime/mem.c b/Robust/src/Runtime/mem.c
index 2f91e493..0324f170 100644
--- a/Robust/src/Runtime/mem.c
+++ b/Robust/src/Runtime/mem.c
@@ -11,7 +11,7 @@ void * mycalloc(int m,
   BAMBOO_START_CRITICAL_SECTION_MEM();
   p = BAMBOO_LOCAL_MEM_CALLOC(m, isize); // calloc(m, isize);
   if(p == NULL) {
-	  BAMBOO_EXIT(0xa024);
+	  BAMBOO_EXIT(0xc001);
   }
   BAMBOO_CLOSE_CRITICAL_SECTION_MEM();
   return p;
@@ -26,6 +26,9 @@ void * mycalloc_share(struct garbagelist * stackptr,
 memalloc:
   BAMBOO_START_CRITICAL_SECTION_MEM();
   p = BAMBOO_SHARE_MEM_CALLOC_I(m, isize); // calloc(m, isize);
+#ifdef GC_DEBUG
+	tprintf("new obj in shared mem: %x, %x \n", p, isize);
+#endif
   if(p == NULL) {
 		// no more global shared memory
 		BAMBOO_CLOSE_CRITICAL_SECTION_MEM();
@@ -51,7 +54,7 @@ void * mycalloc_share(int m,
   p = BAMBOO_SHARE_MEM_CALLOC_I(m, isize); // calloc(m, isize);
   if(p == NULL) {
 		// no more global shared memory
-		BAMBOO_EXIT(0xa025);
+		BAMBOO_EXIT(0xc002);
   }
   BAMBOO_CLOSE_CRITICAL_SECTION_MEM();
   return 
@@ -65,7 +68,7 @@ void * mycalloc_i(int m,
   int isize = size; 
   p = BAMBOO_LOCAL_MEM_CALLOC(m, isize); // calloc(m, isize);
   if(p == NULL) {
-	  BAMBOO_EXIT(0xa026);
+	  BAMBOO_EXIT(0xc003);
   }
   return p;
 }
diff --git a/Robust/src/Runtime/mem.h b/Robust/src/Runtime/mem.h
index 759d983f..de009887 100644
--- a/Robust/src/Runtime/mem.h
+++ b/Robust/src/Runtime/mem.h
@@ -30,6 +30,7 @@ void myfree(void * ptr);
 #define RUNMALLOC_I(x) mycalloc_i(1,x) //with interruption blocked beforehand
 #define RUNFREE(x) myfree(x)
 #ifdef MULTICORE_GC
+#include "multicoregc.h"
 void * mycalloc_share(struct garbagelist * stackptr, int m, int size);
 #define FREEMALLOC(s, x) mycalloc_share((s),1,(x))
 #else
diff --git a/Robust/src/Runtime/multicoregarbage.c b/Robust/src/Runtime/multicoregarbage.c
index 22a6783e..3cd45fea 100644
--- a/Robust/src/Runtime/multicoregarbage.c
+++ b/Robust/src/Runtime/multicoregarbage.c
@@ -1,13 +1,64 @@
 #ifdef MULTICORE_GC
+#include "runtime.h"
 #include "multicoregarbage.h"
 #include "multicoreruntime.h"
 #include "runtime_arch.h"
 #include "SimpleHash.h"
 #include "GenericHashtable.h"
+#include "ObjectHash.h"
+
+extern int corenum;
+extern struct parameterwrapper ** objectqueues[][NUMCLASSES];
+extern int numqueues[][NUMCLASSES];
 
 extern struct genhashtable * activetasks;
 extern struct parameterwrapper ** objectqueues[][NUMCLASSES];
-extern struct taskparamdescriptor *currtpdo;
+extern struct taskparamdescriptor *currtpd;
+
+struct pointerblock {
+  void * ptrs[NUMPTRS];
+  struct pointerblock *next;
+};
+
+struct pointerblock *gchead=NULL;
+int gcheadindex=0;
+struct pointerblock *gctail=NULL;
+int gctailindex=0;
+struct pointerblock *gctail2=NULL;
+int gctailindex2=0;
+struct pointerblock *gcspare=NULL;
+
+#define NUMLOBJPTRS 20
+
+struct lobjpointerblock {
+  void * lobjs[NUMLOBJPTRS];
+	//void * dsts[NUMLOBJPTRS];
+	int lengths[NUMLOBJPTRS];
+	//void * origs[NUMLOBJPTRS];
+	int hosts[NUMLOBJPTRS];
+  struct lobjpointerblock *next;
+};
+
+struct lobjpointerblock *gclobjhead=NULL;
+int gclobjheadindex=0;
+struct lobjpointerblock *gclobjtail=NULL;
+int gclobjtailindex=0;
+struct lobjpointerblock *gclobjtail2=NULL;
+int gclobjtailindex2=0;
+struct lobjpointerblock *gclobjspare=NULL;
+
+#ifdef GC_DEBUG
+inline void dumpSMem() {
+	tprintf("Dump shared mem: \n");
+	for (int i = BAMBOO_BASE_VA; i < BAMBOO_BASE_VA+BAMBOO_SHARED_MEM_SIZE; i += 4*16)
+    tprintf("0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
+            *((int *)(i)), *((int *)(i + 4)), *((int *)(i + 4*2)), *((int *)(i + 4*3)), 
+						*((int *)(i + 4*4)), *((int *)(i + 4*5)), *((int *)(i + 4*6)), *((int *)(i + 4*7)), 
+						*((int *)(i + 4*8)), *((int *)(i + 4*9)), *((int *)(i + 4*10)), *((int *)(i + 4*11)),
+						*((int *)(i + 4*12)), *((int *)(i + 4*13)), *((int *)(i + 4*14)), *((int *)(i + 4*15)));
+	tprintf("\n");
+}
+#endif
 
 inline void gc_enqueue(void *ptr) {
   if (gcheadindex==NUMPTRS) {
@@ -16,7 +67,7 @@ inline void gc_enqueue(void *ptr) {
       tmp=gcspare;
       gcspare=NULL;
     } else {
-      tmp=malloc(sizeof(struct pointerblock));
+      tmp=RUNMALLOC(sizeof(struct pointerblock));
 		} // if (gcspare!=NULL)
     gchead->next=tmp;
     gchead=tmp;
@@ -32,7 +83,7 @@ inline void * gc_dequeue() {
     gctail=gctail->next;
     gctailindex=0;
     if (gcspare!=NULL) {
-      free(tmp);
+      RUNFREE(tmp);
 		} else {
       gcspare=tmp;
 		} // if (gcspare!=NULL)
@@ -65,14 +116,14 @@ inline int gc_moreItems2() {
 // enqueue a large obj: start addr & length
 inline void gc_lobjenqueue(void *ptr, 
 		                       int length, 
-										       int host = 0) {
+										       int host) {
   if (gclobjheadindex==NUMLOBJPTRS) {
     struct lobjpointerblock * tmp;
     if (gclobjspare!=NULL) {
       tmp=gclobjspare;
       gclobjspare=NULL;
     } else {
-      tmp=malloc(sizeof(struct lobjpointerblock));
+      tmp=RUNMALLOC(sizeof(struct lobjpointerblock));
 		} // if (gclobjspare!=NULL)
     gclobjhead->next=tmp;
     gclobjhead=tmp;
@@ -89,14 +140,14 @@ inline void gc_lobjenqueue(void *ptr,
 } // void gc_lobjenqueue(void *ptr...)
 
 // dequeue and destroy the queue
-inline void * gc_lobjdequeue(int * length
+inline void * gc_lobjdequeue(int * length,
 		                         int * host) {
   if (gclobjtailindex==NUMLOBJPTRS) {
     struct lobjpointerblock *tmp=gclobjtail;
     gclobjtail=gclobjtail->next;
     gclobjtailindex=0;
     if (gclobjspare!=NULL) {
-      free(tmp);
+      RUNFREE(tmp);
 		} else {
       gclobjspare=tmp;
 		} // if (gclobjspare!=NULL)
@@ -132,8 +183,7 @@ inline int gc_lobjmoreItems2() {
   return 1;
 } // int gc_lobjmoreItems2()
 
-INTPTR curr_heaptop = 0;
-INTPTR curr_heapbound = 0;
+INTPTR gccurr_heapbound = 0;
 
 inline void gettype_size(void * ptr, 
 		                     int * ttype, 
@@ -154,12 +204,13 @@ inline void gettype_size(void * ptr,
 	*tsize = size;
 }
 
+// bug here TODO
 inline bool isLarge(void * ptr, 
 		                int * ttype, 
 										int * tsize) {
 	// check if a pointer is referring to a large object
 	gettype_size(ptr, ttype, tsize);
-	return(!isLocal(ptr + size));
+	return(!isLocal(ptr + *tsize));
 } // bool isLarge(void * ptr, int * ttype, int * tsize)
 
 inline int hostcore(void * ptr) {
@@ -177,81 +228,6 @@ inline bool isLocal(void * ptr) {
 	return hostcore(ptr) == BAMBOO_NUM_OF_CORE;
 } // bool isLocal(void * ptr)
 
-inline void transferMarkResults() {
-	// invoked inside interruptiong handler
-	int msgsize = 5 + gcnumlobjs;
-  int i = 0;
-
-  if(isMsgSending) {
-		// cache the msg
-		isMsgHanging = true;
-		// cache the msg in outmsgdata and send it later
-		// msglength + target core + msg
-		OUTMSG_CACHE(msgsize);
-		OUTMSG_CACHE(STARTUPCORE);
-		OUTMSG_CACHE(GCLOBJINFO);
-		OUTMSG_CACHE(msgsize);
-		OUTMSG_CACHE(curr_heaptop);
-		OUTMSG_CACHE(gcmarkedptrbound);
-		// large objs here
-		void * lobj = NULL;
-		int length = 0;
-		while(gc_lobjmoreItems()) {
-			lobj = gc_lobjdequeue(&length);
-			OUTMSG_CACHE(lobj);
-			OUTMSG_CACHE(length);
-		} // while(gc_lobjmoreItems())
-	} else {
-		DynamicHeader msgHdr = tmc_udn_header_from_cpu(STARTUPCORE);
-
-		// send header
-		__tmc_udn_send_header_with_size_and_tag(msgHdr, msgsize, 
-																						UDN0_DEMUX_TAG);  
-#ifdef DEBUG
-		BAMBOO_DEBUGPRINT(0xbbbb);
-		BAMBOO_DEBUGPRINT(0xb000 + STARTUPCORE);       // targetcore
-#endif
-		udn_send(GCLOBJINFO);
-#ifdef DEBUG
-		BAMBOO_DEBUGPRINT(GCLOBJINFO);
-#endif
-		udn_send(msgsize);
-#ifdef DEBUG
-		BAMBOO_DEBUGPRINT_REG(msgsize);
-#endif
-		udn_send(BAMBOO_NUM_OF_CORE);
-#ifdef DEBUG
-		BAMBOO_DEBUGPRINT_REG(BAMBOO_NUM_OF_CORE);
-#endif
-		udn_send(curr_heaptop);
-#ifdef DEBUG
-		BAMBOO_DEBUGPRINT_REG(curr_heaptop);
-#endif
-		udn_send(gcmarkedptrbound);
-#ifdef DEBUG
-		BAMBOO_DEBUGPRINT_REG(gcmarkedptrbound);
-#endif
-		// large objs here
-		void * lobj = NULL;
-		int length = 0;
-		while(gc_lobjmoreItems()) {
-			lobj = gc_lobjdequeue(&length);
-			OUTMSG_CACHE(lobj);
-#ifdef DEBUG
-			BAMBOO_DEBUGPRINT_REG(lobj);
-#endif
-			OUTMSG_CACHE(length);
-#ifdef DEBUG
-			BAMBOO_DEBUGPRINT_REG(length);
-#endif
-		} // while(gc_lobjmoreItems())
-		
-#ifdef DEBUG
-		BAMBOO_DEBUGPRINT(0xffff);
-#endif
-	} // if(isMsgSending)
-} // void transferMarkResults()
-
 inline bool gc_checkCoreStatus() {
 	bool allStall = true;
 	for(int i = 0; i < NUMCORES; ++i) {
@@ -264,6 +240,7 @@ inline bool gc_checkCoreStatus() {
 }
 
 inline void checkMarkStatue() {
+	int i;
 	if((!waitconfirm) || 
 			(waitconfirm && (numconfirm == 0))) {
 		BAMBOO_START_CRITICAL_SECTION_STATUS();  
@@ -350,6 +327,7 @@ inline bool preGC() {
 } // bool preGC()
 
 inline void initGC() {
+	int i;
 	for(i = 0; i < NUMCORES; ++i) {
 		gccorestatus[i] = 1;
 		gcnumsendobjs[i] = 0; 
@@ -369,7 +347,6 @@ inline void initGC() {
 	gcheaptop = 0;
 	gctopcore = 0;
 	gcheapdirection = 1;
-	gcreservedsb = 0;
 	gcmovestartaddr = 0;
 	gctomove = false;
 	gcblock2fill = 0;
@@ -380,7 +357,7 @@ inline void initGC() {
 		gcheadindex=0;
 		gctailindex=0;
 		gctailindex2 = 0;
-		gchead=gctail=gctail2=malloc(sizeof(struct pointerblock));
+		gchead=gctail=gctail2=RUNMALLOC(sizeof(struct pointerblock));
 	}
 	// initialize the large obj queues
 	if (gclobjhead==NULL) {
@@ -388,17 +365,17 @@ inline void initGC() {
 		gclobjtailindex=0;
 		gclobjtailindex2 = 0;
 		gclobjhead=gclobjtail=gclobjtail2=
-			malloc(sizeof(struct lobjpointerblock));
+			RUNMALLOC(sizeof(struct lobjpointerblock));
 	}
 } // void initGC()
 
 // compute load balance for all cores
-inline int loadbalance(int heaptop) {
+inline int loadbalance() {
 	// compute load balance
 	int i;
 
 	// get the total loads
-	gcloads[STARTUPCORE]+=
+	gcloads[BAMBOO_NUM_OF_CORE]+=
 		BAMBOO_SMEM_SIZE*gcreservedsb;//reserved sblocks for sbstartbl
 	int tloads = gcloads[STARTUPCORE];
 	for(i = 1; i < NUMCORES; i++) {
@@ -463,7 +440,7 @@ inline void moveLObjs() {
 	if((gcloads[0] > BAMBOO_BASE_VA+BAMBOO_SMEM_SIZE_L) 
 			&& (gcloads[0] % BAMBOO_SMEM_SIZE == 0)) {
 		// edge of a block, check if this is exactly the heaptop
-		BASEPTR(0, gcfilledblocks[0]-1, &gcloads[0]);
+		BASEPTR(0, gcfilledblocks[0]-1, &(gcloads[0]));
 		gcloads[0]+=(gcfilledblocks[0]>1?BAMBOO_SMEM_SIZE:BAMBOO_SMEM_SIZE_L);
 	}
 	int tmpheaptop = gcloads[0];
@@ -481,13 +458,17 @@ inline void moveLObjs() {
 	// move large objs from gcheaptop to tmpheaptop
 	// write the header first
 	int tomove = BAMBOO_BASE_VA + BAMBOO_SHARED_MEM_SIZE - gcheaptop;
+	if(tomove == 0) {
+		gcheaptop = tmpheaptop;
+		return;
+	}
 	// check how many blocks it acrosses
 	int b = 0;
 	BLOCKINDEX(tmpheaptop, &b);
 	// check the remaining space in this block
 	int remain = (b < NUMCORES? (b+1)*BAMBOO_SMEM_SIZE_L  
   		        : BAMBOO_LARGE_SMEM_BOUND+(b-NUMCORES+1)*BAMBOO_SMEM_SIZE)
-		          -(mem-BAMBOO_BASE_VA);
+		          -(tmpheaptop-BAMBOO_BASE_VA);
 	if(remain <= BAMBOO_CACHE_LINE_SIZE) {
 		// fill the following space with -1, go to next block
 		(*((int *)tmpheaptop)) = -1;
@@ -500,7 +481,7 @@ inline void moveLObjs() {
 	memcpy(tmpheaptop, gcheaptop, tomove);
 	gcheaptop = tmpheaptop + tomove;
 	// flush the sbstartbl
-	memset(sbstarttbl, '\0', 
+	memset(gcsbstarttbl, '\0', 
 			   BAMBOO_SHARED_MEM_SIZE/BAMBOO_SMEM_SIZE*sizeof(INTPTR));
 	int size = 0;
 	int isize = 0;
@@ -514,14 +495,14 @@ inline void moveLObjs() {
 			// this object acrosses blocks
 			int tmpsbs = 1+(isize-remain-1)/BAMBOO_SMEM_SIZE;
 			for(int k = 0; k < tmpsbs-1; k++) {
-				sbstarttbl[k+b] = (INTPTR)(-1);
+				gcsbstarttbl[k+b] = (INTPTR)(-1);
 			}
 			b += tmpsbs;
 			remain = b < NUMCORES ? BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
 			if((isize-remain)%BAMBOO_SMEM_SIZE == 0) {
-				sbstarttbl[b+tmpsbs-1] = (INTPTR)(-1);
+				gcsbstarttbl[b+tmpsbs-1] = (INTPTR)(-1);
 			} else {
-				sbstarttbl[b+tmpsbs-1] = (INTPTR)(tmpheaptop+isize);
+				gcsbstarttbl[b+tmpsbs-1] = (INTPTR)(tmpheaptop+isize);
 				remain -= (isize-remain)%BAMBOO_SMEM_SIZE;
 			}
 		}
@@ -532,60 +513,43 @@ inline void moveLObjs() {
 } // void moveLObjs()
 
 inline void updateFreeMemList() {
-	int i = 0;
-	int tmptop = gcloads[0]; 
 	struct freeMemItem * tochange = bamboo_free_mem_list->head;
 	if(tochange == NULL) {
 		bamboo_free_mem_list->head = tochange = 
 			(struct freeMemItem *)RUNMALLOC(sizeof(struct freeMemItem));
 	}
-	for(i = 1; i < NUMCORES; ++i) {
-		int toadd = gcloads[i];
-		if(tmptop < toadd) {
-			toadd = tmptop;
-			tmptop = gcloads[i];
-		} // tmptop can never == toadd
-		int blocki = 0;
-		BLOCKINDEX(toadd, &blocki);
-		tochange->ptr = toadd;
-		tochange->size = (blocki<NUMCORES)
-			?((blocki+1)*BAMBOO_SMEM_SIZE_L+BAMBOO_BASE_VA-toadd)
-			:(BAMBOO_LARGE_SMEM_BOUND+(blocki+1-NUMCORES)*BAMBOO_SMEM_SIZE
-					+BAMBOO_BASE_VA-toadd);
-		if(tochange->next == NULL) {
-			tochange->next = 
-				(struct freeMemItem *)RUNMALLOC(sizeof(struct freeMemItem));
-		}
-		// zero out all these spare memory
-		memset(tochange->ptr, '\0', tochange->size);
-		tochange = tochange->next;
-	} // for(i = 1; i < NUMCORES; ++i)
 	// handle the top of the heap
-	tmptop = gcheaptop;
-	BLOCKINDEX(tmptop, &blocki);
-	tochange->ptr = tmptop;
-	tochange->size = BAMBOO_SHARED_MEM_SIZE + BAMBOO_BASE_VA - tmptop;
+	tochange->ptr = gcheaptop;
+	tochange->size = BAMBOO_SHARED_MEM_SIZE + BAMBOO_BASE_VA - gcheaptop;
 	// zero out all these spare memory
 	memset(tochange->ptr, '\0', tochange->size);
-	bamboo_free_mem_list->tail = tochange;
+	if(bamboo_free_mem_list->tail != tochange) {
+		bamboo_free_mem_list->tail = tochange;
+		if(bamboo_free_mem_list->tail != NULL) {
+			RUNFREE(bamboo_free_mem_list->tail);
+		}
+	}
 } // void updateFreeMemList()
 
 // enqueue root objs
 inline void tomark(struct garbagelist * stackptr) {
 	if(MARKPHASE != gcphase) {
-		BAMBOO_EXIT(0xb002);
+		BAMBOO_EXIT(0xb101);
 	}
-	gcbusystatus = 1;
+	gcbusystatus = true;
 	gcnumlobjs = 0;
 	
-	int i;
+	int i,j;
 	// enqueue current stack 
 	while(stackptr!=NULL) {
 		for(i=0; i<stackptr->size; i++) {
-			gc_enqueue(stackptr->array[i]);
+			if(stackptr->array[i] != NULL) {
+				gc_enqueue(stackptr->array[i]);
+			}
 		}
 		stackptr=stackptr->next;
 	}
+
 	// enqueue objectsets
 	for(i=0; i<NUMCLASSES; i++) {
 		struct parameterwrapper ** queues = 
@@ -601,10 +565,14 @@ inline void tomark(struct garbagelist * stackptr) {
 			}
 		}
 	}
+
 	// euqueue current task descriptor
-	for(i=0; i<currtpd->numParameters; i++) {
-		gc_enqueue(currtpd->parameterArray[i]);
+	if(currtpd != NULL) {
+		for(i=0; i<currtpd->numParameters; i++) {
+			gc_enqueue(currtpd->parameterArray[i]);
+		}
 	}
+
 	// euqueue active tasks
 	struct genpointerlist * ptr=activetasks->list;
 	while(ptr!=NULL) {
@@ -615,6 +583,7 @@ inline void tomark(struct garbagelist * stackptr) {
 		}
 		ptr=ptr->inext;
 	}
+
 	// enqueue cached transferred obj
 	struct QueueItem * tmpobjptr =  getHead(&objqueue);
 	while(tmpobjptr != NULL) {
@@ -626,6 +595,9 @@ inline void tomark(struct garbagelist * stackptr) {
 } // void tomark(struct garbagelist * stackptr)
 
 inline void markObj(void * objptr) {
+	if(objptr == NULL) {
+		return;
+	}
 	if(ISSHAREDOBJ(objptr)) {
 		int host = hostcore(objptr);
 		if(BAMBOO_NUM_OF_CORE == host) {
@@ -646,7 +618,7 @@ inline void mark(bool isfirst,
 	if(isfirst) {
 		// enqueue root objs
 		tomark(stackptr);
-		curr_heaptop = 0; // record the size of all active objs in this core
+		gccurr_heaptop = 0; // record the size of all active objs in this core
 		                  // aligned but does not consider block boundaries
 		gcmarkedptrbound = 0;
 	}
@@ -665,12 +637,12 @@ inline void mark(bool isfirst,
 				// a shared obj, check if it is a local obj on this core
 				if(isLarge(ptr, &type, &size)) {
 					// ptr is a large object
-					gc_lobjenqueue(ptr, size);
+					gc_lobjenqueue(ptr, size, 0);
 					gcnumlobjs++;
 				} else if (isLocal(ptr)) {
 					// ptr is an active object on this core
 					ALIGNSIZE(size, &isize);
-					curr_heaptop += isize;
+					gccurr_heaptop += isize;
 					// mark this obj
 					((int *)ptr)[6] = 1;
 					if(ptr + size > gcmarkedptrbound) {
@@ -711,6 +683,7 @@ inline void mark(bool isfirst,
 			gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
 			gcnumsendobjs[BAMBOO_NUM_OF_CORE] = gcself_numsendobjs;
 			gcnumreceiveobjs[BAMBOO_NUM_OF_CORE] = gcself_numreceiveobjs;
+			gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
 		} else {
 			send_msg_4(STARTUPCORE, GCFINISHMARK, BAMBOO_NUM_OF_CORE,
 								 gcself_numsendobjs, gcself_numreceiveobjs);
@@ -726,16 +699,15 @@ inline void compact2Heaptop() {
 	// no cores with spare mem and some cores are blocked with pending move
 	// find the current heap top and make them move to the heap top
 	int p;
-	if(gcheapdirection) {
-		gctopcore++;
-	} else {
-		gctopcore--;
-	}
 	int numblocks = gcfilledblocks[gctopcore];
 	BASEPTR(gctopcore, numblocks, &p);
 	int b;
 	BLOCKINDEX(p, &b);
 	int remain = b<NUMCORES ? BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
+	if((gctopcore == STARTUPCORE) && (b == 0)) {
+		remain -= gcreservedsb*BAMBOO_SMEM_SIZE;
+		p += gcreservedsb*BAMBOO_SMEM_SIZE;
+	}
 	for(int i = 0; i < NUMCORES; i++) {
 		if((gccorestatus[i] != 0) && (gcrequiredmems[i] > 0)) {
 			int memneed = gcrequiredmems[i] + BAMBOO_CACHE_LINE_SIZE;
@@ -763,8 +735,16 @@ inline void compact2Heaptop() {
 				gcstopblock[gctopcore]++;
 				if(gcheapdirection) {
 					gctopcore++;
+					if(gctopcore== NUMCORES) {
+						gctopcore--;
+						gcheapdirection = false;
+					}
 				} else {
 					gctopcore--;
+					if(gctopcore < 0) {
+						gctopcore++;
+						gcheapdirection = true;
+					}
 				}
 				numblocks = gcstopblock[gctopcore];
 				BASEPTR(gctopcore, numblocks, &p);
@@ -793,10 +773,9 @@ inline void resolvePendingMoveRequest() {
 					// still have spare mem
 					nosparemem = false;
 					dstcore = i;
-				} else {
-					i++;
 				} // if(gcfilledblocks[i] < gcstopblock[i]) else ...
 			}
+			i++;
 		} // if(nosparemem)
 		if(!haspending) {
 			if(gccorestatus[j] != 0) {
@@ -805,10 +784,10 @@ inline void resolvePendingMoveRequest() {
 					sourcecore = j;
 					haspending = true;
 				} else {
-					j++;
 					hasrunning = true;
 				} // if((gcfilledblocks[i] == gcstopblock[i])...) else ...
 			} // if(gccorestatus[i] == 0) else ...
+			j++;
 		} // if(!haspending)
 		if(!nosparemem && haspending) {
 			// find match
@@ -865,14 +844,14 @@ innernextSBlock:
 		orig->blockbase = orig->base;
 	}
 	orig->sblockindex = (orig->blockbase-BAMBOO_BASE_VA)/BAMBOO_SMEM_SIZE;
-	if(sbstarttbl[orig->sblockindex] == -1) {
+	if(gcsbstarttbl[orig->sblockindex] == -1) {
 		// goto next sblock
 		orig->sblockindex += 1;
 		orig->blockbase += BAMBOO_SMEM_SIZE;
 		goto innernextSBlock;
-	} else if(sbstarttbl[orig->sblockindex] != 0) {
+	} else if(gcsbstarttbl[orig->sblockindex] != 0) {
 		// not start from the very beginning
-		orig->blockbase = sbstarttbl[orig->sblockindex];
+		orig->blockbase = gcsbstarttbl[orig->sblockindex];
 	}
 	orig->blockbound = orig->blockbase + *((int*)(orig->blockbase));
 	orig->offset = BAMBOO_CACHE_LINE_SIZE;
@@ -889,7 +868,6 @@ inline void initOrig_Dst(struct moveHelper * orig,
 	if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
 		to->base += gcreservedsb * BAMBOO_SMEM_SIZE;
 		to->top += gcreservedsb * BAMBOO_SMEM_SIZE;
-		curr_heaptop -= gcreservedsb * BAMBOO_SMEM_SIZE;
 	}
 	to->ptr = to->base + to->offset;
 
@@ -899,18 +877,18 @@ inline void initOrig_Dst(struct moveHelper * orig,
 	orig->bound = to->base + BAMBOO_SMEM_SIZE_L;
 	orig->blockbase = orig->base;
 	if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
-		orig->sblockindex = reservedsb;
+		orig->sblockindex = gcreservedsb;
 	} else {
 		orig->sblockindex = (orig->base - BAMBOO_BASE_VA) / BAMBOO_SMEM_SIZE;
 	}
-	if(sbstarttbl[sblockindex] == -1) {
+	if(gcsbstarttbl[orig->sblockindex] == -1) {
 		// goto next sblock
 		orig->blockbound = 
 			BAMBOO_BASE_VA+BAMBOO_SMEM_SIZE*(orig->sblockindex+1);
 		nextSBlock(orig);
 		return;
-	} else if(sbstarttbl[orig->sblockindex] != 0) {
-		orig->blockbase = sbstarttbl[sblockindex];
+	} else if(gcsbstarttbl[orig->sblockindex] != 0) {
+		orig->blockbase = gcsbstarttbl[orig->sblockindex];
 	}
 	orig->blockbound = orig->blockbase + *((int*)(orig->blockbase));
 	orig->offset = BAMBOO_CACHE_LINE_SIZE;
@@ -986,11 +964,13 @@ innermoveobj:
 			}
 		}
 		memcpy(to->ptr, orig->ptr, size);
+		// restore the mark field
+		((int *)(to->ptr))[6] = 0;
 		// fill the remaining space with -2
 		memset(to->ptr+size, -2, isize-size);
 		// store mapping info
 		RuntimeHashadd(gcpointertbl, orig->ptr, to->ptr); 
-		curr_heaptop -= isize;
+		gccurr_heaptop -= isize;
 		to->ptr += isize;
 		to->offset += isize;
 		to->top += isize;
@@ -1028,11 +1008,11 @@ inline int assignSpareMem(int sourcecore,
 	}
 }
 
-inline bool findSpareMem(int * startaddr,
-		                     int * tomove,
-												 int * dstcore,
-												 int requiredmem,
-												 int requiredcore) {
+inline bool gcfindSpareMem(int * startaddr,
+		                       int * tomove,
+								  				 int * dstcore,
+									  			 int requiredmem,
+										  		 int requiredcore) {
 	for(int k = 0; k < NUMCORES; k++) {
 		if((gccorestatus[k] == 0) && (gcfilledblocks[k] < gcstopblock[k])) {
 			// check if this stopped core has enough mem
@@ -1045,7 +1025,7 @@ inline bool findSpareMem(int * startaddr,
 	gcrequiredmems[requiredcore] = requiredmem;
 	gcmovepending++;
 	return false;
-} //bool findSpareMem(int* startaddr,int* tomove,int mem,int core)
+} //bool gcfindSpareMem(int* startaddr,int* tomove,int mem,int core)
 
 inline bool compacthelper(struct moveHelper * orig,
 		                      struct moveHelper * to,
@@ -1061,9 +1041,16 @@ innercompact:
 		if(stop) {
 			break;
 		}
-	} while(orig->ptr < gcmarkedptrbound); 
-	// fill the header of this block
-	(*((int*)(to->base))) = to->offset;
+	} while(orig->ptr < gcmarkedptrbound);
+	// if no objs have been compact, do nothing, 
+	// otherwise, fill the header of this block
+	if(to->offset > BAMBOO_CACHE_LINE_SIZE) {
+		(*((int*)(to->base))) = to->offset;
+	} else {
+		to->offset = 0;
+		to->ptr = to->base;
+		to->top -= BAMBOO_CACHE_LINE_SIZE;
+	} // if(to->offset > BAMBOO_CACHE_LINE_SIZE) else ...
 	if(*localcompact) {
 		*heaptopptr = to->ptr;
 		*filledblocks = to->numblocks;
@@ -1077,8 +1064,8 @@ innercompact:
 		if(orig->ptr < gcmarkedptrbound) {
 			// ask for more mem
 			gctomove = false;
-			if(findSpareMem(&gcmovestartaddr, &gcblock2fill, &gcdstcore, 
-						          curr_heaptop, BAMBOO_NUM_OF_CORE)) {
+			if(gcfindSpareMem(&gcmovestartaddr, &gcblock2fill, &gcdstcore, 
+						            gccurr_heaptop, BAMBOO_NUM_OF_CORE)) {
 				gctomove = true;
 			} else {
 				return false; 
@@ -1092,7 +1079,7 @@ innercompact:
 			// ask for more mem
 			gctomove = false;
 			send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE, 
-					       *filledblocks, *heaptopptr, curr_heaptop);
+					       *filledblocks, *heaptopptr, gccurr_heaptop);
 		} else {
 			// finish compacting
 			send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE,
@@ -1130,7 +1117,7 @@ innercompact:
 
 inline void compact() {
 	if(COMPACTPHASE != gcphase) {
-		BAMBOO_EXIT(0xb003);
+		BAMBOO_EXIT(0xb102);
 	}
 
 	// initialize pointers for comapcting
@@ -1200,7 +1187,7 @@ inline void flush() {
 			for(i=1; i<=size; i++) {
 				unsigned int offset=pointer[i];
 				void * objptr=*((void **)(((char *)ptr)+offset));
-				((void **)(((char *)ptr)+offset)) = flushObj(objptr);
+				*((void **)(((char *)ptr)+offset)) = flushObj(objptr);
 			} // for(i=1; i<=size; i++) 
 		} // if (pointer==0) else if (((INTPTR)pointer)==1) else ()
 	} // while(moi != NULL)
@@ -1236,6 +1223,11 @@ inline void gc(struct garbagelist * stackptr) {
 			return;
 		}
 
+#ifdef GC_DEBUG
+		tprintf("start gc! \n");
+		dumpSMem();
+#endif
+
 		initGC();
 
 		gcprocessing = true;
@@ -1264,12 +1256,17 @@ inline void gc(struct garbagelist * stackptr) {
 		numconfirm = NUMCORES - 1;
 		for(i = 1; i < NUMCORES; ++i) {
 			send_msg_1(i, GCLOBJREQUEST);
-		}	
+		}
+		gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
 		while(numconfirm != 0) {} // wait for responses
+#ifdef GC_DEBUG
+		tprintf("prepare to cache large objs \n");
+		dumpSMem();
+#endif
 		// cache all large objs
 		if(!cacheLObjs()) {
 			// no enough space to cache large objs
-			BAMBOO_EXIT(0xd001);
+			BAMBOO_EXIT(0xb103);
 		}
 		// predict number of blocks to fill for each core
 		int numpbc = loadbalance();
@@ -1292,6 +1289,10 @@ inline void gc(struct garbagelist * stackptr) {
 			gcfilledblocks[i] = 0;
 			gcrequiredmems[i] = 0;
 		}
+#ifdef GC_DEBUG
+		tprintf("mark phase finished \n");
+		dumpSMem();
+#endif
 
 		// compact phase
 		bool finalcompact = false;
@@ -1305,6 +1306,7 @@ inline void gc(struct garbagelist * stackptr) {
 		INTPTR heaptopptr = 0;
 		bool finishcompact = false;
 		bool iscontinue = true;
+		bool localcompact = true;
 		while((COMPACTPHASE == gcphase) || (SUBTLECOMPACTPHASE == gcphase)) {
 			if((!finishcompact) && iscontinue) {
 				finishcompact = compacthelper(orig, to, &filledblocks, 
@@ -1342,9 +1344,9 @@ inline void gc(struct garbagelist * stackptr) {
 				to->ptr += to->offset; // for header
 				to->top += to->offset;
 				if(gcdstcore == BAMBOO_NUM_OF_CORE) {
-					*localcompact = true;
+					localcompact = true;
 				} else {
-					*localcompact = false;
+					localcompact = false;
 				}
 				gctomove = false;
 				iscontinue = true;
@@ -1354,8 +1356,16 @@ inline void gc(struct garbagelist * stackptr) {
 			} // if(gctomove)
 
 		} // while(COMPACTPHASE == gcphase) 
+#ifdef GC_DEBUG
+		tprintf("prepare to move large objs \n");
+		dumpSMem();
+#endif
 		// move largeObjs
 		moveLObjs();
+#ifdef GC_DEBUG
+		tprintf("compact phase finished \n");
+		dumpSMem();
+#endif
 
 		gcphase = FLUSHPHASE;
 		for(i = 1; i < NUMCORES; ++i) {
@@ -1384,9 +1394,17 @@ inline void gc(struct garbagelist * stackptr) {
 			// send gc finish messages to all cores
 			send_msg_1(i, GCFINISH);
 		}
+#ifdef GC_DEBUG
+		tprintf("flush phase finished \n");
+		dumpSMem();
+#endif
 
 		// need to create free memory list  
 		updateFreeMemList();
+#ifdef GC_DEBUG
+		tprintf("gc finished \n");
+		dumpSMem();
+#endif
 	} else {
 		gcprocessing = true;
 		gc_collect(stackptr);
diff --git a/Robust/src/Runtime/multicoregarbage.h b/Robust/src/Runtime/multicoregarbage.h
index f081e765..a8e45d6e 100644
--- a/Robust/src/Runtime/multicoregarbage.h
+++ b/Robust/src/Runtime/multicoregarbage.h
@@ -1,58 +1,22 @@
 #ifndef MULTICORE_GARBAGE_H
 #define MULTICORE_GARBAGE_H
-#include "Queue.h"
+#include "multicoregc.h"
+#include "structdefs.h"
+
+#ifndef bool
+#define bool int
+#endif
 
 // data structures for GC
-#define BAMBOO_SMEM_SIZE_L 32 * BAMBOO_SMEM_SIZE
-#define BAMBOO_LARGE_SMEM_BOUND BAMBOO_SMEM_SIZE_L*NUMCORES // NUMCORES=62
+#ifdef GC_DEBUG
+#define BAMBOO_SMEM_SIZE_L (2 * BAMBOO_SMEM_SIZE)
+#else
+#define BAMBOO_SMEM_SIZE_L (32 * BAMBOO_SMEM_SIZE)
+#endif
+#define BAMBOO_LARGE_SMEM_BOUND (BAMBOO_SMEM_SIZE_L*NUMCORES) // NUMCORES=62
 
 #define NUMPTRS 100
 
-struct garbagelist {
-  int size;
-  struct garbagelist *next;
-  void * array[];
-};
-
-struct listitem {
-  struct listitem * prev;
-  struct listitem * next;
-  struct garbagelist * stackptr;
-};
-
-struct pointerblock {
-  void * ptrs[NUMPTRS];
-  struct pointerblock *next;
-};
-
-struct pointerblock *gchead=NULL;
-int gcheadindex=0;
-struct pointerblock *gctail=NULL;
-int gctailindex=0;
-struct pointerblock *gctail2=NULL;
-int gctailindex2=0;
-struct pointerblock *gcspare=NULL;
-
-#define NUMLOBJPTRS 20
-
-struct lobjpointerblock {
-  void * lobjs[NUMLOBJPTRS];
-	//void * dsts[NUMLOBJPTRS];
-	int lengths[NUMLOBJPTRS];
-	//void * origs[NUMLOBJPTRS];
-	int hosts[NUMLOBJPTRS];
-  struct lobjpointerblock *next;
-};
-
-struct lobjpointerblock *gclobjhead=NULL;
-int gclobjheadindex=0;
-struct lobjpointerblock *gclobjtail=NULL;
-int gclobjtailindex=0;
-struct lobjpointerblock *gclobjtail2=NULL;
-int gclobjtailindex2=0;
-struct lobjpointerblock *gclobjspare=NULL;
-int gcnumlobjs = 0;
-
 typedef enum {
 	MARKPHASE = 0x0,   // 0x0
 	COMPACTPHASE,      // 0x1
@@ -65,12 +29,14 @@ volatile bool gcflag;
 volatile bool gcprocessing;
 GCPHASETYPE gcphase; // indicating GC phase
 
+int gccurr_heaptop;
 // for mark phase termination
 int gccorestatus[NUMCORES]; // records status of each core
                             // 1: running gc
                             // 0: stall
 int gcnumsendobjs[NUMCORES]; // records how many objects sent out
 int gcnumreceiveobjs[NUMCORES]; // records how many objects received
+bool gcbusystatus;
 int gcself_numsendobjs;
 int gcself_numreceiveobjs;
 
@@ -80,6 +46,8 @@ int gcloads[NUMCORES];
 int gctopcore; // the core host the top of the heap
 bool gcheapdirection; // 0: decrease; 1: increase
 
+int gcnumlobjs;
+
 // compact instruction
 INTPTR gcmarkedptrbound;
 int gcblock2fill;
@@ -107,107 +75,145 @@ INTPTR * gcsbstarttbl;
 int gcreservedsb;  // number of reserved sblock for sbstarttbl
 
 #define ISSHAREDOBJ(p) \
-	(((p)>BAMBOO_BASE_VA)&&((p)<BAMBOO_BASE_VA+BAMBOO_SHARED_MEM_SIZE))
+	(((p)>(BAMBOO_BASE_VA))&&((p)<((BAMBOO_BASE_VA)+(BAMBOO_SHARED_MEM_SIZE))))
 
 #define ALIGNSIZE(s, as) \
-	(*((int*)as)) = s & (~BAMBOO_CACHE_LINE_MASK) + BAMBOO_CACHE_LINE_SIZE;
+	(*((int*)as)) = (((s) & (~(BAMBOO_CACHE_LINE_MASK))) + (BAMBOO_CACHE_LINE_SIZE))
 
 #define BLOCKINDEX(p, b) \
-	int t = (p) - BAMBOO_BASE_VA; \
-	if(t < BAMBOO_LARGE_SMEM_BOUND) { \
-		(*((int*)b)) = t / BAMBOO_SMEM_SIZE_L; \
-	} else { \
-		(*((int*)b)) = NUMCORES+(t-BAMBOO_LARGE_SMEM_BOUND)/BAMBOO_SMEM_SIZE;\
+  { \
+		int t = (p) - (BAMBOO_BASE_VA); \
+		if(t < (BAMBOO_LARGE_SMEM_BOUND)) { \
+			(*((int*)b)) = t / (BAMBOO_SMEM_SIZE_L); \
+		} else { \
+			(*((int*)b)) = NUMCORES+((t-(BAMBOO_LARGE_SMEM_BOUND))/(BAMBOO_SMEM_SIZE));\
+		} \
 	}
 
 #define RESIDECORE(p, x, y) \
-	int b; \
-	BLOCKINDEX((p), &b); \
-	bool reverse = (b / NUMCORES) % 2; \
-	int l = b % NUMCORES; \
-	if(reverse) { \
-		if(l < 14) { \
-			l += 1; \
+  { \
+		if(1 == (NUMCORES)) { \
+			(*((int*)x)) = 0; \
+			(*((int*)y)) = 0; \
 		} else { \
-			l += 2; \
+			int b; \
+			BLOCKINDEX((p), &b); \
+			bool reverse = (b / (NUMCORES)) % 2; \
+			int l = b % (NUMCORES); \
+			if(reverse) { \
+				if(62 == (NUMCORES)) { \
+					if(l < 14) { \
+						l += 1; \
+					} else { \
+						l += 2; \
+					} \
+				} \
+				(*((int*)y)) = bamboo_width - 1 - l / bamboo_width; \
+			} else { \
+				if(62 == (NUMCORES)) {\
+					if(l > 54) { \
+						l += 2; \
+					} else if(l > 47) {\
+						l += 1; \
+					} \
+				} \
+				(*((int*)y)) = l / bamboo_width; \
+			} \
+			if((NUMCORES) % 2) { \
+				if((l/bamboo_width)%2) { \
+					(*((int*)x)) = l % bamboo_width; \
+				} else { \
+					(*((int*)x)) = bamboo_width - 1 - l % bamboo_width; \
+				} \
+			} else {\
+				if((l/bamboo_width)%2) { \
+					(*((int*)x)) = bamboo_width - 1 - l % bamboo_width; \
+				} else { \
+					(*((int*)x)) = l % bamboo_width; \
+				} \
+			} \
 		} \
-		(*((int*)y)) = bamboo_width - 1 - l / bamboo_width; \
-	} else { \
-		if(l > 54) { \
-			l += 2; \
-		} else if(l > 47) {\
-			l += 1; \
-		} \
-		(*((int*)y)) = l / bamboo_width; \
-	} \
-	if((l/bamboo_width)%2) { \
-		(*((int*)x)) = bamboo_width - 1 - l % bamboo_width; \
-	} else { \
-		(*((int*)x)) = l % bamboo_width; \
 	}
 
 // NOTE: n starts from 0
 #define NUMBLOCKS(s, n) \
-	if(s < BAMBOO_SMEM_SIZE_L) { \
-		(*((int*)n)) = 0; \
+	if(s < (BAMBOO_SMEM_SIZE_L)) { \
+		(*((int*)(n))) = 0; \
 	} else { \
-		(*((int*)n)) = 1 + (s - BAMBOO_SMEM_SIZE_L) / BAMBOO_SMEM_SIZE; \
+		(*((int*)(n))) = 1 + ((s) - (BAMBOO_SMEM_SIZE_L)) / (BAMBOO_SMEM_SIZE); \
 	}
 
 #define OFFSET(s, o) \
 	if(s < BAMBOO_SMEM_SIZE_L) { \
-		(*((int*)o)) = s; \
+		(*((int*)(o))) = (s); \
 	} else { \
-		(*((int*)o)) = (s - BAMBOO_SMEM_SIZE_L) % BAMBOO_SMEM_SIZE; \
+		(*((int*)(o))) = ((s) - (BAMBOO_SMEM_SIZE_L)) % (BAMBOO_SMEM_SIZE); \
 	}
 
 #define BLOCKINDEX2(c, n, b) \
-	int x; \
-  int y; \
-  int t; \
-	if(c > 5) c += 2; \
-  x = c / bamboo_height; \
-	y = c % bamboo_height; \
-	if(n%2) { \
-		if(y % 2) { \
-			t = bamboo_width - 1 - x + (bamboo_width - 1 - y) * bamboo_width; \
-		} else { \
-			t = x + (bamboo_width - 1 - y) * bamboo_width; \
-		} \
-		if(y>5) { \
-			t--; \
-		} else { \
-			t -= 2; \
-		} \
-		t += NUMCORES * n; \
-	} else { \
-		if(y % 2) { \
-			t = bamboo_width - 1 - x + y * bamboo_width; \
+  { \
+		int x; \
+		int y; \
+		int t; \
+		int cc = c; \
+		if((62 == (NUMCORES)) && (cc > 5)) cc += 2; \
+		x = cc / bamboo_height; \
+		y = cc % bamboo_height; \
+		if((n) % 2) { \
+			if((NUMCORES) % 2) { \
+				if(y % 2) { \
+					t = x + (bamboo_width - 1 - y) * bamboo_width; \
+				} else { \
+					t = bamboo_width - 1 - x + (bamboo_width - 1 - y) * bamboo_width; \
+				} \
+			} else { \
+				if(y % 2) { \
+					t = bamboo_width - 1 - x + (bamboo_width - 1 - y) * bamboo_width; \
+				} else { \
+					t = x + (bamboo_width - 1 - y) * bamboo_width; \
+				} \
+			} \
+			if(62 == (NUMCORES)) {\
+				if(y>5) { \
+					t--; \
+				} else { \
+					t -= 2; \
+				} \
+			} \
 		} else { \
-			t = x + y * bamboo_width; \
+			if(y % 2) { \
+				t = bamboo_width - 1 - x + y * bamboo_width; \
+			} else { \
+				t = x + y * bamboo_width; \
+			} \
+			if((62 == NUMCORES) && (y > 5)) t--; \
 		} \
-		if(y>5) t--; \
-		t += NUMCORES * n; \
-	} \
-  (*((int*)b)) = t;
+		t += NUMCORES * (n); \
+		(*((int*)b)) = t; \
+	}
 
 
 #define BASEPTR(c, n, p) \
-	int b; \
-  BLOCKINDEX2(c, n, &b); \
-	if(b < NUMCORES) { \
-		(*((int*)p)) = BAMBOO_BASE_VA + b * BAMBOO_SMEM_SIZE_L; \
-	} else { \
-		(*((int*)p)) = BAMBOO_BASE_VA + BAMBOO_LARGE_SMEM_BOUND + (b - NUMCORES) * BAMBOO_SMEM_SIZE; \
-	} 
+  { \
+		int b; \
+		BLOCKINDEX2(c, n, &b); \
+		if(b < (NUMCORES)) { \
+			(*((int*)p)) = (BAMBOO_BASE_VA) + b * (BAMBOO_SMEM_SIZE_L); \
+		} else { \
+			(*((int*)p)) = (BAMBOO_BASE_VA)+(BAMBOO_LARGE_SMEM_BOUND)+(b-(NUMCORES))*(BAMBOO_SMEM_SIZE); \
+		} \
+	}
 
 inline void gc(struct garbagelist * stackptr); // core coordinator routine
 inline void gc_collect(struct garbagelist* stackptr);//core collector routine
 inline void transferMarkResults();
-inline void transferCompactStart(int corenum);
 inline void gc_enqueue(void *ptr);
-inline void gc_lobjenqueue(void *ptr, int length);
-inline bool findSpareMem(int * startaddr, int * tomove, int requiredmem);
+inline void gc_lobjenqueue(void *ptr, int length, int host);
+inline bool gcfindSpareMem(int * startaddr, 
+		                       int * tomove,
+								  				 int * dstcore,
+									  			 int requiredmem,
+										  		 int requiredcore);
 
 #endif
 
diff --git a/Robust/src/Runtime/multicoregc.h b/Robust/src/Runtime/multicoregc.h
new file mode 100644
index 00000000..0f7ddc4c
--- /dev/null
+++ b/Robust/src/Runtime/multicoregc.h
@@ -0,0 +1,16 @@
+#ifndef MULTICORE_GC_H
+#define MULTICORE_GC_H
+
+struct garbagelist {
+  int size;
+  struct garbagelist *next;
+  void * array[];
+};
+
+struct listitem {
+  struct listitem * prev;
+  struct listitem * next;
+  struct garbagelist * stackptr;
+};
+
+#endif // MULTICORE_GC_H
diff --git a/Robust/src/Runtime/multicoreruntime.c b/Robust/src/Runtime/multicoreruntime.c
index 3b1c9cf4..9810053f 100644
--- a/Robust/src/Runtime/multicoreruntime.c
+++ b/Robust/src/Runtime/multicoreruntime.c
@@ -191,6 +191,9 @@ void CALL01(___System______printString____L___String___,struct ___String___ * __
 #ifdef MULTICORE_GC
 void * allocate_new(void * ptr, int type) {
   struct ___Object___ * v=(struct ___Object___ *)FREEMALLOC((struct garbagelist *) ptr, classsize[type]);
+#ifdef GC_DEBUG
+	tprintf("new object: %x \n", v);
+#endif
   v->type=type;
   v->version = 0;
   v->lock = NULL;
@@ -202,6 +205,9 @@ void * allocate_new(void * ptr, int type) {
 
 struct ArrayObject * allocate_newarray(void * ptr, int type, int length) {
   struct ArrayObject * v=(struct ArrayObject *)FREEMALLOC((struct garbagelist *) ptr, sizeof(struct ArrayObject)+length*classsize[type]);
+#ifdef GC_DEBUG
+	tprintf("new array object: %x \n", v);
+#endif
   v->type=type;
   v->version = 0;
   v->lock = NULL;
diff --git a/Robust/src/Runtime/multicoreruntime.h b/Robust/src/Runtime/multicoreruntime.h
index 64cae114..16b0da6e 100644
--- a/Robust/src/Runtime/multicoreruntime.h
+++ b/Robust/src/Runtime/multicoreruntime.h
@@ -20,12 +20,12 @@ bool isMsgHanging;
 volatile bool isMsgSending;
 
 #define OUTMSG_INDEXINC() \
-	outmsgindex = (outmsgindex + 1) % BAMBOO_OUT_BUF_LENGTH;
+	outmsgindex = (outmsgindex + 1) % (BAMBOO_OUT_BUF_LENGTH)
 
 #define OUTMSG_LASTINDEXINC() \
-	outmsglast = (outmsglast + 1) % BAMBOO_OUT_BUF_LENGTH; \
+	outmsglast = (outmsglast + 1) % (BAMBOO_OUT_BUF_LENGTH); \
 	if(outmsglast == outmsgindex) { \
-		BAMBOO_EXIT(0xb003); \
+		BAMBOO_EXIT(0xd001); \
 	} 
 
 #define OUTMSG_CACHE(n) \
@@ -186,27 +186,33 @@ int self_numreceiveobjs;
 // data structures for locking
 struct RuntimeHash locktable;
 static struct RuntimeHash* locktbl = &locktable;
+struct RuntimeHash * lockRedirectTbl;
+struct RuntimeHash * objRedirectLockTbl;
+#endif
 struct LockValue {
 	int redirectlock;
 	int value;
 };
-struct RuntimeHash * lockRedirectTbl;
-struct RuntimeHash * objRedirectLockTbl;
 int lockobj;
 int lock2require;
 int lockresult;
 bool lockflag;
-#endif
 
 // data structures for waiting objs
 struct Queue objqueue;
 
 // data structures for shared memory allocation
-#define BAMBOO_NUM_PAGES 1024 * 512
-#define BAMBOO_PAGE_SIZE 4096
-#define BAMBOO_SHARED_MEM_SIZE BAMBOO_PAGE_SIZE * BAMBOO_NUM_PAGES
 #define BAMBOO_BASE_VA 0xd000000
-#define BAMBOO_SMEM_SIZE 16 * BAMBOO_PAGE_SIZE
+#ifdef GC_DEBUG
+#define BAMBOO_NUM_PAGES (1*(2+1))
+#define BAMBOO_PAGE_SIZE (16 * 16)
+#define BAMBOO_SMEM_SIZE (BAMBOO_PAGE_SIZE)
+#else
+#define BAMBOO_NUM_PAGES (1024 * 512)
+#define BAMBOO_PAGE_SIZE (4096)
+#define BAMBOO_SMEM_SIZE (16 * BAMBOO_PAGE_SIZE)
+#endif
+#define BAMBOO_SHARED_MEM_SIZE (BAMBOO_PAGE_SIZE * BAMBOO_NUM_PAGES)
 
 #ifdef MULTICORE_GC
 #include "multicoregarbage.h"
@@ -370,6 +376,10 @@ inline void cache_msg_6(int targetcore,
 inline void transferObject(struct transObjInfo * transObj);
 inline int receiveMsg(void) __attribute__((always_inline));
 
+#ifdef MULTICORE_GC
+inline void transferMarkResults() __attribute__((always_inline));
+#endif
+
 #ifdef PROFILE
 inline void profileTaskStart(char * taskname) __attribute__((always_inline));
 inline void profileTaskEnd(void) __attribute__((always_inline));
diff --git a/Robust/src/Runtime/multicoretask.c b/Robust/src/Runtime/multicoretask.c
index 71d8acc5..57247987 100644
--- a/Robust/src/Runtime/multicoretask.c
+++ b/Robust/src/Runtime/multicoretask.c
@@ -18,7 +18,8 @@ int enqueuetasks_I(struct parameterwrapper *parameter,
 									 int * enterflags, 
 									 int numenterflags);
 
-inline void initruntimedata() {
+inline __attribute__((always_inline)) 
+void initruntimedata() {
 	int i;
 	// initialize the arrays
   if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
@@ -38,6 +39,7 @@ inline void initruntimedata() {
 			gcloads[i] = 0;
 			gcrequiredmems[i] = 0;
 			gcstopblock[i] = 0;
+			gcfilledblocks[i] = 0;
 #endif
     } // for(i = 0; i < NUMCORES; ++i)
 		numconfirm = 0;
@@ -74,6 +76,7 @@ inline void initruntimedata() {
 	gcflag = false;
 	gcprocessing = false;
 	gcphase = FINISHPHASE;
+	gccurr_heaptop = 0;
 	gcself_numsendobjs = 0;
 	gcself_numreceiveobjs = 0;
 	gcmarkedptrbound = 0;
@@ -85,20 +88,14 @@ inline void initruntimedata() {
 	gcheaptop = 0;
 	gctopcore = 0;
 	gcheapdirection = 1;
-	gcreservedsb = 0;
 	gcmovestartaddr = 0;
 	gctomove = false;
-	gcstopblock = 0;
-	gchead = gctail = gctail2 = NULL;
-	gclobjhead = gclobjtail = gclobjtail2 = NULL;
-	gcheadindex=0;
-	gctailindex=0;
-	gctailindex2 = 0;
-	gclobjheadindex=0;
-	gclobjtailindex=0;
-	gclobjtailindex2 = 0;
+	//gchead = gctail = gctail2 = NULL;
+	//gclobjhead = gclobjtail = gclobjtail2 = NULL;
+	//gcheadindex=gctailindex=gctailindex2 = 0;
+	//gclobjheadindex=gclobjtailindex=gclobjtailindex2 = 0;
 	gcmovepending = 0;
-	gcblocks2fill = 0;
+	gcblock2fill = 0;
 #else
 	// create the lock table, lockresult table and obj queue
   locktable.size = 20;
@@ -133,7 +130,8 @@ inline void initruntimedata() {
 #endif
 }
 
-inline void disruntimedata() {
+inline __attribute__((always_inline))
+void disruntimedata() {
 #ifdef MULTICORE_GC
 	freeRuntimeHash(gcpointertbl);
 #else
@@ -145,6 +143,7 @@ inline void disruntimedata() {
 	RUNFREE(currtpd);
 }
 
+inline __attribute__((always_inline))
 bool checkObjQueue() {
 	bool rflag = false;
 	struct transObjInfo * objInfo = NULL;
@@ -278,7 +277,8 @@ objqueuebreak:
 	return rflag;
 }
 
-inline void checkCoreStatus() {
+inline __attribute__((always_inline))
+void checkCoreStatus() {
 	bool allStall = false;
 	int i = 0;
 	int sumsendobj = 0;
@@ -495,6 +495,9 @@ inline void run(void * arg) {
 
 		  // check if there are new active tasks can be executed
 		  executetasks();
+			if(busystatus) {
+				sendStall = false;
+			}
 
 #ifndef INTERRUPT
 		  while(receiveObject() != -1) {
@@ -542,7 +545,7 @@ inline void run(void * arg) {
 							  BAMBOO_DEBUGPRINT(0xee0b);
 #endif
 							  // send stall msg
-							  send_msg_4(STARTUPCORE, 1, BAMBOO_NUM_OF_CORE, 
+							  send_msg_4(STARTUPCORE, TRANSTALL, BAMBOO_NUM_OF_CORE, 
 										       self_numsendobjs, self_numreceiveobjs);
 							  sendStall = true;
 							  isfirst = true;
@@ -565,16 +568,26 @@ inline void run(void * arg) {
 
 } // run()
 
+struct ___createstartupobject____I_locals {
+  INTPTR size;
+  void * next;
+  struct  ___StartupObject___ * ___startupobject___;
+  struct ArrayObject * ___stringarray___;
+}; // struct ___createstartupobject____I_locals
+
 void createstartupobject(int argc, 
 		                     char ** argv) {
   int i;
 
   /* Allocate startup object     */
 #ifdef MULTICORE_GC
+	struct ___createstartupobject____I_locals ___locals___={2, NULL, NULL, NULL};
   struct ___StartupObject___ *startupobject=
-		(struct ___StartupObject___*) allocate_new(NULL, STARTUPTYPE);
+		(struct ___StartupObject___*) allocate_new(&___locals___, STARTUPTYPE);
+	___locals___.___startupobject___ = startupobject;
   struct ArrayObject * stringarray=
-		allocate_newarray(NULL, STRINGARRAYTYPE, argc-1);
+		allocate_newarray(&___locals___, STRINGARRAYTYPE, argc-1);
+	___locals___.___stringarray___ = stringarray;
 #else
   struct ___StartupObject___ *startupobject=
 		(struct ___StartupObject___*) allocate_new(STARTUPTYPE);
@@ -586,7 +599,7 @@ void createstartupobject(int argc,
   for(i=1; i<argc; i++) {
     int length=strlen(argv[i]);
 #ifdef MULTICORE_GC
-    struct ___String___ *newstring=NewString(NULL, argv[i],length);
+    struct ___String___ *newstring=NewString(&___locals___, argv[i],length);
 #else
     struct ___String___ *newstring=NewString(argv[i],length);
 #endif
@@ -1147,12 +1160,13 @@ inline void addNewObjInfo(void * nobj) {
 
 void * smemalloc(int size, 
 		             int * allocsize) {
+	void * mem = NULL;
 #ifdef MULTICORE_GC
 	// go through free mem list for suitable blocks
 	struct freeMemItem * freemem = bamboo_free_mem_list->head;
 	struct freeMemItem * prev = NULL;
 	do {
-		if(freemem->size > size) {
+		if(freemem->size >= size) {
 			// found one
 			break;
 		}
@@ -1160,7 +1174,7 @@ void * smemalloc(int size,
 		freemem = freemem->next;
 	} while(freemem != NULL);
 	if(freemem != NULL) {
-		void * mem = (void *)(freemem->ptr);
+		mem = (void *)(freemem->ptr);
 		*allocsize = size;
 		freemem->ptr = ((void*)freemem->ptr) + size;
 		freemem->size -= size;
@@ -1170,22 +1184,22 @@ void * smemalloc(int size,
 		// check the remaining space in this block
 		int remain = (b < NUMCORES? (b+1)*BAMBOO_SMEM_SIZE_L  
 				        : BAMBOO_LARGE_SMEM_BOUND+(b-NUMCORES+1)*BAMBOO_SMEM_SIZE)
-			          -(mem-BAMBOO_BASE_VA);
+			          -((int)mem-BAMBOO_BASE_VA);
 		if(remain < size) {
 			// this object acrosses blocks
 			int tmpsbs = 1+(size-remain-1)/BAMBOO_SMEM_SIZE;
 			for(int k = 0; k < tmpsbs-1; k++) {
-				sbstarttbl[k+b] = (INTPTR)(-1);
+				gcsbstarttbl[k+b] = (INTPTR)(-1);
 			}
 			if((size-remain)%BAMBOO_SMEM_SIZE == 0) {
-				sbstarttbl[b+tmpsbs-1] = (INTPTR)(-1);
+				gcsbstarttbl[b+tmpsbs-1] = (INTPTR)(-1);
 			} else {
-				sbstarttbl[b+tmpsbs-1] = (INTPTR)(mem+size);
+				gcsbstarttbl[b+tmpsbs-1] = (INTPTR)(mem+size);
 			}
 		}
 	} else {
 #else
-	void * mem = mspace_calloc(bamboo_free_msp, 1, size);
+	mem = mspace_calloc(bamboo_free_msp, 1, size);
 	*allocsize = size;
 	if(mem == NULL) {
 #endif
@@ -1193,11 +1207,10 @@ void * smemalloc(int size,
 		*allocsize = 0;
 #ifdef MULTICORE_GC
 		gcflag = true;
-		gcrequiredmem = size;
 		return NULL;
 #else
-		BAMBOO_DEBUGPRINT(0xa016);
-		BAMBOO_EXIT(0xa016);
+		BAMBOO_DEBUGPRINT(0xa001);
+		BAMBOO_EXIT(0xa001);
 #endif
 	}
 	return mem;
@@ -1244,7 +1257,7 @@ msg:
 #ifndef TILERA
 				BAMBOO_DEBUGPRINT_REG(msgdata[2]);
 #endif
-				BAMBOO_EXIT(0xa005);
+				BAMBOO_EXIT(0xa002);
 			} 
       // store the object and its corresponding queue info, enqueue it later
       transObj->objptr = (void *)msgdata[2]; 
@@ -1297,7 +1310,7 @@ msg:
 #ifndef TILERA
 				BAMBOO_DEBUGPRINT_REG(msgdata[1]);
 #endif
-				BAMBOO_EXIT(0xa006);
+				BAMBOO_EXIT(0xa003);
       } 
       if(msgdata[1] < NUMCORES) {
 #ifdef DEBUG
@@ -1346,7 +1359,7 @@ msg:
 #ifndef TILERA
 				BAMBOO_DEBUGPRINT_REG(msgdata[2]);
 #endif
-				BAMBOO_EXIT(0xa007);
+				BAMBOO_EXIT(0xa004);
       } 
       if((lockobj == msgdata[2]) && (lock2require == msgdata[3])) {
 #ifdef DEBUG
@@ -1364,7 +1377,7 @@ msg:
 #ifndef TILERA
 				BAMBOO_DEBUGPRINT_REG(msgdata[2]);
 #endif
-				BAMBOO_EXIT(0xa008);
+				BAMBOO_EXIT(0xa005);
       }
       break;
     }
@@ -1375,7 +1388,7 @@ msg:
 #ifndef TILERA
 				BAMBOO_DEBUGPRINT_REG(msgdata[2]);
 #endif
-				BAMBOO_EXIT(0xa009);
+				BAMBOO_EXIT(0xa006);
       } 
       if((lockobj == msgdata[2]) && (lock2require == msgdata[3])) {
 #ifdef DEBUG
@@ -1393,7 +1406,7 @@ msg:
 #ifndef TILERA
 				BAMBOO_DEBUGPRINT_REG(msgdata[2]);
 #endif
-				BAMBOO_EXIT(0xa00a);
+				BAMBOO_EXIT(0xa007);
       }
       break;
     }
@@ -1410,7 +1423,7 @@ msg:
       // receive an output profile data request msg
       if(BAMBOO_NUM_OF_CORE == STARTUPCORE) {
 				// startup core can not receive profile output finish msg
-				BAMBOO_EXIT(0xa00c);
+				BAMBOO_EXIT(0xa008);
       }
 #ifdef DEBUG
 #ifndef TILEAR
@@ -1435,7 +1448,7 @@ msg:
 #ifndef TILERA
 				BAMBOO_DEBUGPRINT_REG(msgdata[1]);
 #endif
-				BAMBOO_EXIT(0xa00d);
+				BAMBOO_EXIT(0xa009);
       }
 #ifdef DEBUG
 #ifndef TILERA
@@ -1481,7 +1494,7 @@ msg:
 #ifndef TILERA
 			BAMBOO_DEBUGPRINT_REG(msgdata[2]);
 #endif
-			BAMBOO_EXIT(0xa00e);
+			BAMBOO_EXIT(0xa00a);
 		}
 		if(lockobj == msgdata[2]) {
 #ifdef DEBUG
@@ -1500,7 +1513,7 @@ msg:
 #ifndef TILERA
 		  BAMBOO_DEBUGPRINT_REG(msgdata[2]);
 #endif
-		  BAMBOO_EXIT(0xa00f);
+		  BAMBOO_EXIT(0xa00b);
 		}
 		break;
 	}
@@ -1511,7 +1524,7 @@ msg:
 #ifndef TILERA
 		  BAMBOO_DEBUGPRINT_REG(msgdata[2]);
 #endif
-		  BAMBOO_EXIT(0xa010);
+		  BAMBOO_EXIT(0xa00c);
 	  }
 		if(lockobj == msgdata[2]) {
 #ifdef DEBUG
@@ -1529,7 +1542,7 @@ msg:
 #ifndef TILERA
 		  BAMBOO_DEBUGPRINT_REG(msgdata[2]);
 #endif
-		  BAMBOO_EXIT(0xa011);
+		  BAMBOO_EXIT(0xa00d);
 		}
 		break;
 	}
@@ -1546,7 +1559,7 @@ msg:
 	  if((BAMBOO_NUM_OF_CORE == STARTUPCORE) 
 				|| (BAMBOO_NUM_OF_CORE > NUMCORES - 1)) {
 		  // wrong core to receive such msg
-		  BAMBOO_EXIT(0xa013);
+		  BAMBOO_EXIT(0xa00e);
 		} else {
 		  // send response msg
 #ifdef DEBUG
@@ -1574,7 +1587,7 @@ msg:
 #ifndef TILERA
 		  BAMBOO_DEBUGPRINT_REG(msgdata[2]);
 #endif
-		  BAMBOO_EXIT(0xa014);
+		  BAMBOO_EXIT(0xa00f);
 		} else {
 #ifdef DEBUG
 #ifndef TILERA
@@ -1585,8 +1598,8 @@ msg:
 			  numconfirm--;
 		  }
 		  corestatus[msgdata[2]] = msgdata[1];
-			numsendobjs[msgdata[1]] = msgdata[2];
-			numreceiveobjs[msgdata[1]] = msgdata[3];
+			numsendobjs[msgdata[2]] = msgdata[3];
+			numreceiveobjs[msgdata[2]] = msgdata[4];
 		}
 	  break;
 	}
@@ -1610,7 +1623,7 @@ msg:
 #ifndef TILERA
 		  BAMBOO_DEBUGPRINT_REG(msgdata[2]);
 #endif
-		  BAMBOO_EXIT(0xa015);
+		  BAMBOO_EXIT(0xa010);
 		} else {
 #ifdef DEBUG
 #ifndef TILERA
@@ -1695,7 +1708,7 @@ msg:
 
 	case GCSTARTCOMPACT: {
 		// a compact phase start msg
-		gcblocks2fill = msgdata[1];
+		gcblock2fill = msgdata[1];
 		gcphase = COMPACTPHASE;
 		break;
 	}
@@ -1713,12 +1726,12 @@ msg:
 #ifndef TILERA
 		  BAMBOO_DEBUGPRINT_REG(msgdata[1]);
 #endif
-		  BAMBOO_EXIT(0xb006);
+		  BAMBOO_EXIT(0xb001);
 		} 
 		if(msgdata[1] < NUMCORES) {
 			gccorestatus[msgdata[1]] = 0;
-			gcnumsendobjs[msgdata[1]] = gcmsgdata[2];
-			gcnumreceiveobjs[msgdata[1]] = gcmsgdata[3];
+			gcnumsendobjs[msgdata[1]] = msgdata[2];
+			gcnumreceiveobjs[msgdata[1]] = msgdata[3];
 		}
 	  break;
 	}
@@ -1731,7 +1744,7 @@ msg:
 #ifndef TILERA
 		  BAMBOO_DEBUGPRINT_REG(msgdata[1]);
 #endif
-		  BAMBOO_EXIT(0xb006);
+		  BAMBOO_EXIT(0xb002);
 		}
 		int cnum = msgdata[1];
 		int filledblocks = msgdata[2];
@@ -1747,7 +1760,7 @@ msg:
 				int startaddr = 0;
 				int tomove = 0;
 				int dstcore = 0;
-				if(findSpareMem(&startaddr, &tomove, &dstcore, data4, cnum)) {
+				if(gcfindSpareMem(&startaddr, &tomove, &dstcore, data4, cnum)) {
 					send_msg_4(cnum, GCMOVESTART, dstcore, startaddr, tomove);
 				}
 			} else {
@@ -1794,7 +1807,7 @@ msg:
 #ifndef TILERA
 		  BAMBOO_DEBUGPRINT_REG(msgdata[1]);
 #endif
-		  BAMBOO_EXIT(0xb006);
+		  BAMBOO_EXIT(0xb003);
 		} 
 		if(msgdata[1] < NUMCORES) {
 		  gccorestatus[msgdata[1]] = 0;
@@ -1813,7 +1826,7 @@ msg:
 		if((BAMBOO_NUM_OF_CORE == STARTUPCORE) 
 				|| (BAMBOO_NUM_OF_CORE > NUMCORES - 1)) {
 		  // wrong core to receive such msg
-		  BAMBOO_EXIT(0xa013);
+		  BAMBOO_EXIT(0xb004);
 		} else {
 		  // send response msg
 		  if(isMsgSending) {
@@ -1835,14 +1848,14 @@ msg:
 #ifndef TILERA
 		  BAMBOO_DEBUGPRINT_REG(msgdata[2]);
 #endif
-		  BAMBOO_EXIT(0xb014);
+		  BAMBOO_EXIT(0xb005);
 		} else {
 		  if(waitconfirm) {
 			  numconfirm--;
 		  }
-		  gccorestatus[msgdata[1]] = gcmsgdata[2];
-		  gcnumsendobjs[msgdata[1]] = gcmsgdata[3];
-		  gcnumreceiveobjs[msgdata[1]] = gcmsgdata[4];
+		  gccorestatus[msgdata[1]] = msgdata[2];
+		  gcnumsendobjs[msgdata[1]] = msgdata[3];
+		  gcnumreceiveobjs[msgdata[1]] = msgdata[4];
 		}
 	  break;
 	}
@@ -1870,7 +1883,7 @@ msg:
 		RuntimeHashget(gcpointertbl, msgdata[1], &dstptr);
 		if(NULL == dstptr) {
 			// no such pointer in this core, something is wrong
-			BAMBOO_EXIT(0xb008);
+			BAMBOO_EXIT(0xb006);
 		} else {
 			// send back the mapping info
 			if(isMsgSending) {
@@ -1886,7 +1899,7 @@ msg:
 		// received a mapping info response msg
 		if(msgdata[1] != gcobj2map) {
 			// obj not matched, something is wrong
-			BAMBOO_EXIT(0xb009);
+			BAMBOO_EXIT(0xb007);
 		} else {
 			gcmappedobj = msgdata[2];
 			RuntimeHashadd(gcpointertbl, gcobj2map, gcmappedobj);
@@ -1909,7 +1922,7 @@ msg:
 #ifndef TILERA
 			BAMBOO_DEBUGPRINT_REG(msgdata[2]);
 #endif
-			BAMBOO_EXIT(0xa005);
+			BAMBOO_EXIT(0xb008);
 		} 
 		// store the mark result info 
 		int cnum = msgdata[2];
@@ -1919,7 +1932,7 @@ msg:
 		}
 		// large obj info here
 	  for(int k = 5; k < msgdata[1];) {
-			gc_lobjenqueue(msgdata[k++], msgdata[k++], cnum, NULL);
+			gc_lobjenqueue(msgdata[k++], msgdata[k++], cnum);
 		} // for(int k = 5; k < msgdata[1];)
 		break;
 	}
@@ -2173,7 +2186,7 @@ void releasewritelock_r(void * lock, void * redirectlock) {
     // reside on this core
     if(!RuntimeHashcontainskey(locktbl, reallock)) {
       // no locks for this object, something is wrong
-      BAMBOO_EXIT(0xa01d);
+      BAMBOO_EXIT(0xa011);
     } else {
       int rwlock_obj = 0;
 	  struct LockValue * lockvalue = NULL;
diff --git a/Robust/src/Runtime/object.c b/Robust/src/Runtime/object.c
index 01037384..c552b4c5 100644
--- a/Robust/src/Runtime/object.c
+++ b/Robust/src/Runtime/object.c
@@ -75,7 +75,7 @@ int CALL01(___Object______MonitorExit____, struct ___Object___ * ___this___) {
     pthread_mutex_unlock(&objlock);
   } else {
 #ifdef MULTICORE
-    BAMBOO_EXIT(-1);
+    BAMBOO_EXIT(0xf201);
 #else
     printf("ERROR...UNLOCKING LOCK WE DON'T HAVE\n");
     exit(-1);
diff --git a/Robust/src/Runtime/runtime.h b/Robust/src/Runtime/runtime.h
index b102305a..bea53a5b 100644
--- a/Robust/src/Runtime/runtime.h
+++ b/Robust/src/Runtime/runtime.h
@@ -52,6 +52,11 @@ __attribute__((malloc)) void * allocate_new(void *, int type);
 __attribute__((malloc)) struct ArrayObject * allocate_newarray(void *, int type, int length);
 __attribute__((malloc)) struct ___String___ * NewString(void *, const char *str,int length);
 __attribute__((malloc)) struct ___TagDescriptor___ * allocate_tag(void *ptr, int index);
+#elif defined MULTICORE_GC
+__attribute__((malloc)) void * allocate_new(void *, int type);
+__attribute__((malloc)) struct ArrayObject * allocate_newarray(void *, int type, int length);
+__attribute__((malloc)) struct ___String___ * NewString(void *, const char *str,int length);
+__attribute__((malloc)) struct ___TagDescriptor___ * allocate_tag(void *ptr, int index);
 #else
 __attribute__((malloc)) void * allocate_new(int type);
 __attribute__((malloc)) struct ArrayObject * allocate_newarray(int type, int length);
diff --git a/Robust/src/buildscript b/Robust/src/buildscript
index d87399f2..66986b44 100755
--- a/Robust/src/buildscript
+++ b/Robust/src/buildscript
@@ -642,6 +642,7 @@ cp ../Runtime/ObjectHash.h ./
 cp ../Runtime/Queue.h ./
 cp ../Runtime/runtime.h ./
 cp ../Runtime/SimpleHash.h ./
+cp ../Runtime/multicoregc.h ./
 cp ../Runtime/multicoregarbage.h ./
 cp ../tmpbuilddirectory/*.c ./
 cp ../tmpbuilddirectory/*.h ./
-- 
2.34.1