fix some bugs in Bamboo
authorjzhou <jzhou>
Sat, 10 Apr 2010 01:01:03 +0000 (01:01 +0000)
committerjzhou <jzhou>
Sat, 10 Apr 2010 01:01:03 +0000 (01:01 +0000)
Robust/src/Runtime/MGCHash.c
Robust/src/Runtime/MGCHash.h
Robust/src/Runtime/mem.h
Robust/src/Runtime/multicoregarbage.c
Robust/src/Runtime/multicoregarbage.h
Robust/src/Runtime/multicorehelper.h
Robust/src/Runtime/multicoreruntime.c
Robust/src/Runtime/multicoreruntime.h
Robust/src/Runtime/multicoretask.c
Robust/src/buildscript

index 648f74ab288a7a052c450427fe485498ff01c648..ab378675ef8195c6db098bab6b56bd85b241a546 100755 (executable)
@@ -38,7 +38,7 @@ void mgchashCreate(unsigned int size, double loadfactor) {
   mgc_loadfactor = loadfactor;
   mgc_size = size;
   mgc_threshold=size*loadfactor;
-       
+
 #ifdef BIT64
   mgc_mask = ((size << 6)-1)&~(15UL);
 #else
@@ -54,19 +54,19 @@ void mgchashreset() {
   int i;
 
   /*if (mgc_numelements<(mgc_size>>6)) {
-    mgchashlistnode_t *top=&ptr[mgc_size];
-    mgchashlistnode_t *tmpptr=mgc_list;
-    while(tmpptr!=NULL) {
+     mgchashlistnode_t *top=&ptr[mgc_size];
+     mgchashlistnode_t *tmpptr=mgc_list;
+     while(tmpptr!=NULL) {
       mgchashlistnode_t *next=tmpptr->lnext;
       if (tmpptr>=ptr&&tmpptr<top) {
-                               //zero in list
-                               tmpptr->key=NULL;
-                               tmpptr->next=NULL;
+                                //zero in list
+                                tmpptr->key=NULL;
+                                tmpptr->next=NULL;
       }
       tmpptr=next;
-    }
-  } else {*/
-         BAMBOO_MEMSET_WH(mgc_table, '\0', sizeof(mgchashlistnode_t)*mgc_size);
+     }
+     } else {*/
+  BAMBOO_MEMSET_WH(mgc_table, '\0', sizeof(mgchashlistnode_t)*mgc_size);
   //}
   while(mgc_structs->next!=NULL) {
     mgcliststruct_t *next=mgc_structs->next;
@@ -87,12 +87,12 @@ void mgchashInsert(void * key, void *val) {
     mgchashResize(newsize);
   }
 
-       //int hashkey = (unsigned int)key % mgc_size; 
-  ptr=&mgc_table[(((unsigned INTPTR)key)&mgc_mask)>>6];//&mgc_table[hashkey];
+  //int hashkey = (unsigned int)key % mgc_size;
+  ptr=&mgc_table[(((unsigned INTPTR)key)&mgc_mask)>>6]; //&mgc_table[hashkey];
   mgc_numelements++;
 
   if(ptr->key==0) {
-               // the first time insert a value for the key
+    // the first time insert a value for the key
     ptr->key=key;
     ptr->val=val;
   } else { // Insert in the beginning of linked list
@@ -125,7 +125,7 @@ void mgchashInsert_I(void * key, void *val) {
     mgchashResize_I(newsize);
   }
 
-       //int hashkey = (unsigned int)key % mgc_size; 
+  //int hashkey = (unsigned int)key % mgc_size;
   //ptr=&mgc_table[hashkey];
   ptr = &mgc_table[(((unsigned INTPTR)key)&mgc_mask)>>6];
   mgc_numelements++;
@@ -133,7 +133,7 @@ void mgchashInsert_I(void * key, void *val) {
   if(ptr->key==0) {
     ptr->key=key;
     ptr->val=val;
-               return; 
+    return;
   } else { // Insert in the beginning of linked list
     mgchashlistnode_t * node;
     if (mgc_structs->num<NUMMGCLIST) {
@@ -158,9 +158,9 @@ void mgchashInsert_I(void * key, void *val) {
 // Search for an address for a given oid
 INLINE void * mgchashSearch(void * key) {
   //REMOVE HASH FUNCTION CALL TO MAKE SURE IT IS INLINED HERE]
-       //int hashkey = (unsigned int)key % mgc_size;
+  //int hashkey = (unsigned int)key % mgc_size;
   mgchashlistnode_t *node = &mgc_table[(((unsigned INTPTR)key)&mgc_mask)>>6];
-               //&mgc_table[hashkey];
+  //&mgc_table[hashkey];
 
   do {
     if(node->key == key) {
@@ -202,27 +202,27 @@ unsigned int mgchashResize(unsigned int newsize) {
       if ((key=curr->key) == 0) {             //Exit inner loop if there the first element is 0
        break;                  //key = val =0 for element if not present within the hash table
       }
-                       //index = (unsigned int)key % mgc_size; 
+      //index = (unsigned int)key % mgc_size;
       index = (((unsigned INTPTR)key) & mask) >>6;
       tmp=&node[index];
       next = curr->next;
       // Insert into the new table
       if(tmp->key == 0) {
-                               tmp->key = key;
-                               tmp->val = curr->val;
+       tmp->key = key;
+       tmp->val = curr->val;
       } /*
-          NOTE:  Add this case if you change this...
-          This case currently never happens because of the way things rehash....
-          else if (isfirst) {
-          chashlistnode_t *newnode= calloc(1, sizeof(chashlistnode_t));
-          newnode->key = curr->key;
-          newnode->val = curr->val;
-          newnode->next = tmp->next;
-          tmp->next=newnode;
-          } */
+          NOTE:  Add this case if you change this...
+          This case currently never happens because of the way things rehash....
+          else if (isfirst) {
+          chashlistnode_t *newnode= calloc(1, sizeof(chashlistnode_t));
+          newnode->key = curr->key;
+          newnode->val = curr->val;
+          newnode->next = tmp->next;
+          tmp->next=newnode;
+          } */
       else {
-                               curr->next=tmp->next;
-                               tmp->next=curr;
+       curr->next=tmp->next;
+       tmp->next=curr;
       }
 
       isfirst = 0;
@@ -246,7 +246,7 @@ unsigned int mgchashResize_I(unsigned int newsize) {
   oldsize = mgc_size;
 
   if((node = RUNMALLOC_I(newsize*sizeof(mgchashlistnode_t))) == NULL) {
-               BAMBOO_EXIT(0xe001);
+    BAMBOO_EXIT(0xe001);
     printf("Calloc error %s %d\n", __FILE__, __LINE__);
     return 1;
   }
@@ -263,32 +263,31 @@ unsigned int mgchashResize_I(unsigned int newsize) {
       void * key;
       mgchashlistnode_t *tmp,*next;
 
-      if ((key=curr->key) == 0) {             
-                               //Exit inner loop if there the first element is 0
-             break;                  
-                               //key = val =0 for element if not present within the hash table
+      if ((key=curr->key) == 0) {
+       //Exit inner loop if there the first element is 0
+       break;
+       //key = val =0 for element if not present within the hash table
       }
-                       //index = (unsigned int)key % mgc_size; 
+      //index = (unsigned int)key % mgc_size;
       index = (((unsigned INTPTR)key) & mask) >>6;
       tmp=&node[index];
       next = curr->next;
       // Insert into the new table
       if(tmp->key == 0) {
-                               tmp->key = key;
-                               tmp->val = curr->val;
+       tmp->key = key;
+       tmp->val = curr->val;
       } /*
-          NOTE:  Add this case if you change this...
-          This case currently never happens because of the way things rehash....*/
-                       else if (isfirst) {
-                               mgchashlistnode_t *newnode=RUNMALLOC_I(1*sizeof(mgchashlistnode_t));
-                               newnode->key = curr->key;
-                               newnode->val = curr->val;
-                               newnode->next = tmp->next;
-                               tmp->next=newnode;
-                       } 
-      else {
-                               curr->next=tmp->next;
-                               tmp->next=curr;
+          NOTE:  Add this case if you change this...
+          This case currently never happens because of the way things rehash....*/
+      else if (isfirst) {
+       mgchashlistnode_t *newnode=RUNMALLOC_I(1*sizeof(mgchashlistnode_t));
+       newnode->key = curr->key;
+       newnode->val = curr->val;
+       newnode->next = tmp->next;
+       tmp->next=newnode;
+      } else {
+       curr->next=tmp->next;
+       tmp->next=curr;
       }
 
       isfirst = 0;
@@ -315,8 +314,8 @@ void mgchashDelete() {
 }
 
 struct MGCHash * allocateMGCHash(int size,
-                                            int conflicts) {
-  struct MGCHash *thisvar;  
+                                 int conflicts) {
+  struct MGCHash *thisvar;
   if (size <= 0) {
 #ifdef MULTICORE
     BAMBOO_EXIT(0xf101);
@@ -327,22 +326,22 @@ struct MGCHash * allocateMGCHash(int size,
   }
   thisvar=(struct MGCHash *)RUNMALLOC(sizeof(struct MGCHash));
   thisvar->size = size;
-  thisvar->bucket = 
-               (struct MGCNode *) RUNMALLOC(sizeof(struct MGCNode)*size);
-       // zero out all the buckets
-       BAMBOO_MEMSET_WH(thisvar->bucket, '\0', sizeof(struct MGCNode)*size);
+  thisvar->bucket =
+    (struct MGCNode *) RUNMALLOC(sizeof(struct MGCNode)*size);
+  // zero out all the buckets
+  BAMBOO_MEMSET_WH(thisvar->bucket, '\0', sizeof(struct MGCNode)*size);
   //Set data counts
   thisvar->num4conflicts = conflicts;
   return thisvar;
 }
 
 void freeMGCHash(struct MGCHash *thisvar) {
-       int i = 0;
-       for(i=thisvar->size-1; i>=0; i--) {
+  int i = 0;
+  for(i=thisvar->size-1; i>=0; i--) {
     struct MGCNode *ptr;
-    for(ptr=thisvar->bucket[i].next; ptr!=NULL;) {
+    for(ptr=thisvar->bucket[i].next; ptr!=NULL; ) {
       struct MGCNode * nextptr=ptr->next;
-                       RUNFREE(ptr);
+      RUNFREE(ptr);
       ptr=nextptr;
     }
   }
@@ -350,11 +349,11 @@ void freeMGCHash(struct MGCHash *thisvar) {
   RUNFREE(thisvar);
 }
 /*
-void MGCHashrehash(struct MGCHash * thisvar) {
-  int newsize=thisvar->size;
-  struct MGCNode ** newbucket = (struct MGCNode **) RUNMALLOC(sizeof(struct MGCNode *)*newsize);
-  int i;
-  for(i=thisvar->size-1; i>=0; i--) {
+   void MGCHashrehash(struct MGCHash * thisvar) {
+   int newsize=thisvar->size;
+   struct MGCNode ** newbucket = (struct MGCNode **) RUNMALLOC(sizeof(struct MGCNode *)*newsize);
+   int i;
+   for(i=thisvar->size-1; i>=0; i--) {
     struct MGCNode *ptr;
     for(ptr=thisvar->bucket[i]; ptr!=NULL;) {
       struct MGCNode * nextptr=ptr->next;
@@ -363,64 +362,64 @@ void MGCHashrehash(struct MGCHash * thisvar) {
       newbucket[newhashkey]=ptr;
       ptr=nextptr;
     }
-  }
-  thisvar->size=newsize;
-  RUNFREE(thisvar->bucket);
-  thisvar->bucket=newbucket;
-}*/
+   }
+   thisvar->size=newsize;
+   RUNFREE(thisvar->bucket);
+   thisvar->bucket=newbucket;
+   }*/
 
 int MGCHashadd(struct MGCHash * thisvar, int data) {
-  // Rehash code 
+  // Rehash code
   unsigned int hashkey;
   struct MGCNode *ptr;
 
   /*if (thisvar->numelements>=thisvar->size) {
-    int newsize=2*thisvar->size+1;
-    struct MGCNode ** newbucket = (struct MGCNode **) RUNMALLOC(sizeof(struct MGCNode *)*newsize);
-    int i;
-    for(i=thisvar->size-1; i>=0; i--) {
+     int newsize=2*thisvar->size+1;
+     struct MGCNode ** newbucket = (struct MGCNode **) RUNMALLOC(sizeof(struct MGCNode *)*newsize);
+     int i;
+     for(i=thisvar->size-1; i>=0; i--) {
       struct MGCNode *ptr;
       for(ptr=thisvar->bucket[i]; ptr!=NULL;) {
-       struct MGCNode * nextptr=ptr->next;
-       unsigned int newhashkey=(unsigned int)ptr->key % newsize;
-       ptr->next=newbucket[newhashkey];
-       newbucket[newhashkey]=ptr;
-       ptr=nextptr;
+        struct MGCNode * nextptr=ptr->next;
+        unsigned int newhashkey=(unsigned int)ptr->key % newsize;
+        ptr->next=newbucket[newhashkey];
+        newbucket[newhashkey]=ptr;
+        ptr=nextptr;
       }
-    }
-    thisvar->size=newsize;
-    RUNFREE(thisvar->bucket);
-    thisvar->bucket=newbucket;
-  }*/
+     }
+     thisvar->size=newsize;
+     RUNFREE(thisvar->bucket);
+     thisvar->bucket=newbucket;
+     }*/
 
   hashkey = (unsigned int)data % thisvar->size;
   ptr = &thisvar->bucket[hashkey];
 
-       struct MGCNode * prev = NULL;
-       if(ptr->data < thisvar->num4conflicts) {
-               struct MGCNode *node=RUNMALLOC(sizeof(struct MGCNode));
+  struct MGCNode * prev = NULL;
+  if(ptr->data < thisvar->num4conflicts) {
+    struct MGCNode *node=RUNMALLOC(sizeof(struct MGCNode));
     node->data=data;
     node->next=(ptr->next);
     ptr->next=node;
-               ptr->data++;
-       } else {
-               while (ptr->next!=NULL) {
-                       prev = ptr;
-                       ptr = ptr->next;
-               }
-               ptr->data = data;
-               ptr->next = thisvar->bucket[hashkey].next;
-               thisvar->bucket[hashkey].next = ptr;
-               prev->next = NULL;
-       }
+    ptr->data++;
+  } else {
+    while (ptr->next!=NULL) {
+      prev = ptr;
+      ptr = ptr->next;
+    }
+    ptr->data = data;
+    ptr->next = thisvar->bucket[hashkey].next;
+    thisvar->bucket[hashkey].next = ptr;
+    prev->next = NULL;
+  }
 
   return 1;
 }
 
-#ifdef MULTICORE 
+#ifdef MULTICORE
 struct MGCHash * allocateMGCHash_I(int size,
-                                        int conflicts) {
-  struct MGCHash *thisvar;  
+                                   int conflicts) {
+  struct MGCHash *thisvar;
   if (size <= 0) {
 #ifdef MULTICORE
     BAMBOO_EXIT(0xf101);
@@ -431,59 +430,59 @@ struct MGCHash * allocateMGCHash_I(int size,
   }
   thisvar=(struct MGCHash *)RUNMALLOC_I(sizeof(struct MGCHash));
   thisvar->size = size;
-  thisvar->bucket = 
-               (struct MGCNode *) RUNMALLOC_I(sizeof(struct MGCNode)*size);
-       // zero out all the buckets
-       BAMBOO_MEMSET_WH(thisvar->bucket, '\0', sizeof(struct MGCNode)*size);
+  thisvar->bucket =
+    (struct MGCNode *) RUNMALLOC_I(sizeof(struct MGCNode)*size);
+  // zero out all the buckets
+  BAMBOO_MEMSET_WH(thisvar->bucket, '\0', sizeof(struct MGCNode)*size);
   //Set data counts
   thisvar->num4conflicts = conflicts;
   return thisvar;
 }
 
 int MGCHashadd_I(struct MGCHash * thisvar, int data) {
-  // Rehash code 
+  // Rehash code
   unsigned int hashkey;
   struct MGCNode *ptr;
 
   /*if (thisvar->numelements>=thisvar->size) {
-    int newsize=2*thisvar->size+1;
-    struct MGCNode ** newbucket = (struct MGCNode **) RUNMALLOC_I(sizeof(struct MGCNode *)*newsize);
-    int i;
-    for(i=thisvar->size-1; i>=0; i--) {
+     int newsize=2*thisvar->size+1;
+     struct MGCNode ** newbucket = (struct MGCNode **) RUNMALLOC_I(sizeof(struct MGCNode *)*newsize);
+     int i;
+     for(i=thisvar->size-1; i>=0; i--) {
       struct MGCNode *ptr;
       for(ptr=thisvar->bucket[i]; ptr!=NULL;) {
-       struct MGCNode * nextptr=ptr->next;
-       unsigned int newhashkey=(unsigned int)ptr->key % newsize;
-       ptr->next=newbucket[newhashkey];
-       newbucket[newhashkey]=ptr;
-       ptr=nextptr;
+        struct MGCNode * nextptr=ptr->next;
+        unsigned int newhashkey=(unsigned int)ptr->key % newsize;
+        ptr->next=newbucket[newhashkey];
+        newbucket[newhashkey]=ptr;
+        ptr=nextptr;
       }
-    }
-    thisvar->size=newsize;
-    RUNFREE(thisvar->bucket);
-    thisvar->bucket=newbucket;
-  }*/
+     }
+     thisvar->size=newsize;
+     RUNFREE(thisvar->bucket);
+     thisvar->bucket=newbucket;
+     }*/
 
   hashkey = (unsigned int)data % thisvar->size;
   ptr = &thisvar->bucket[hashkey];
 
-       struct MGCNode * prev = NULL;
-       if(ptr->data < thisvar->num4conflicts) {
-               struct MGCNode *node=RUNMALLOC_I(sizeof(struct MGCNode));
+  struct MGCNode * prev = NULL;
+  if(ptr->data < thisvar->num4conflicts) {
+    struct MGCNode *node=RUNMALLOC_I(sizeof(struct MGCNode));
     node->data=data;
     node->next=(ptr->next);
     ptr->next=node;
-               ptr->data++;
-       } else {
-               while (ptr->next!=NULL) {
-                       prev = ptr;
-                       ptr = ptr->next;
-               }
-               ptr->data = data;
-               ptr->next = thisvar->bucket[hashkey].next;
-               thisvar->bucket[hashkey].next = ptr;
-               prev->next = NULL;
-       }
+    ptr->data++;
+  } else {
+    while (ptr->next!=NULL) {
+      prev = ptr;
+      ptr = ptr->next;
+    }
+    ptr->data = data;
+    ptr->next = thisvar->bucket[hashkey].next;
+    thisvar->bucket[hashkey].next = ptr;
+    prev->next = NULL;
+  }
 
   return 1;
 }
@@ -493,21 +492,21 @@ int MGCHashcontains(struct MGCHash *thisvar, int data) {
   unsigned int hashkey = (unsigned int)data % thisvar->size;
 
   struct MGCNode *ptr = thisvar->bucket[hashkey].next;
-       struct MGCNode *prev = NULL;
+  struct MGCNode *prev = NULL;
   while (ptr!=NULL) {
     if (ptr->data == data) {
-                       if(prev != NULL) {
-                               prev->next = NULL;
-                               ptr->next = thisvar->bucket[hashkey].next;
-                               thisvar->bucket[hashkey].next = ptr;
-                       }
+      if(prev != NULL) {
+       prev->next = NULL;
+       ptr->next = thisvar->bucket[hashkey].next;
+       thisvar->bucket[hashkey].next = ptr;
+      }
 
-      return 1;       // success 
+      return 1;       // success
     }
-               prev = ptr;
+    prev = ptr;
     ptr = ptr->next;
   }
 
-  return 0;   // failure 
+  return 0;   // failure
 }
 
index fd4abe6cabdfa5012d3ca99fad5478a61fd519e3..021bfeb709ba1e042014be772350474abe9a5f25 100755 (executable)
@@ -22,8 +22,8 @@
 /* MGCHash *********************************************************/
 typedef struct mgchashlistnode {
   void * key;
-  void * val; //this can be cast to another type or used to point to a 
-                   //larger structure 
+  void * val; //this can be cast to another type or used to point to a
+  //larger structure
   struct mgchashlistnode *next;
 } mgchashlistnode_t;
 
index de00988799e5f9f173fbc346c72c1082e19e3415..7982fe16e7ca9b6eb866423a2db2ecd963d70291 100644 (file)
 #define RUNFREE(x) free(x)
 #else
 #ifdef MULTICORE
-#ifdef THREADSIMULATE
-#define FREEMALLOC(x) calloc(1,x)
-#define RUNMALLOC(x) calloc(1,x)
-#define RUNFREE(x) free(x)
-//#define PTR(x) (x)
-#else
 void * mycalloc(int m, int size);
 void * mycalloc_i(int m, int size);
 void myfree(void * ptr);
@@ -38,7 +32,6 @@ void * mycalloc_share(int m, int size);
 #define FREEMALLOC(x) mycalloc_share(1,x)
 #endif // #ifdef MULTICORE_GC
 //#define PTR(x) (32+(x-1)&~31)
-#endif  // #ifdef THREADSIMULATE
 #endif  // #ifdef MULTICORE
 #endif  // #ifdef PRECISE_GC
 #endif  // #ifdef BOEHM_GC
index c7314cd2d0866d8f3b4798b706441348cfe12d6a..e118ce1169cae2dadd86c6d0ce9ab4a37c136665 100644 (file)
@@ -42,12 +42,12 @@ struct pointerblock *gcspare=NULL;
 
 struct lobjpointerblock {
   void * lobjs[NUMLOBJPTRS];
-       //void * dsts[NUMLOBJPTRS];
-       int lengths[NUMLOBJPTRS];
-       //void * origs[NUMLOBJPTRS];
-       int hosts[NUMLOBJPTRS];
+  //void * dsts[NUMLOBJPTRS];
+  int lengths[NUMLOBJPTRS];
+  //void * origs[NUMLOBJPTRS];
+  int hosts[NUMLOBJPTRS];
   struct lobjpointerblock *next;
-       struct lobjpointerblock *prev;
+  struct lobjpointerblock *prev;
 };
 
 struct lobjpointerblock *gclobjhead=NULL;
@@ -61,76 +61,81 @@ struct lobjpointerblock *gclobjspare=NULL;
 #ifdef GC_DEBUG
 // dump whole mem in blocks
 inline void dumpSMem() {
-       int block = 0;
-       int sblock = 0;
-       int j = 0;
-       int i = 0;
-       int coren = 0;
-       int x = 0;
-       int y = 0;
-       tprintf("Dump shared mem: \n");
-       // reserved blocks for sblocktbl
-       tprintf("++++ reserved sblocks ++++ \n");
-       for(i=BAMBOO_BASE_VA; i<gcbaseva; i+= 4*16) {
-               tprintf("0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
-            *((int *)(i)), *((int *)(i + 4)), 
-                                               *((int *)(i + 4*2)), *((int *)(i + 4*3)), 
-                                               *((int *)(i + 4*4)), *((int *)(i + 4*5)), 
-                                               *((int *)(i + 4*6)), *((int *)(i + 4*7)), 
-                                               *((int *)(i + 4*8)), *((int *)(i + 4*9)), 
-                                               *((int *)(i + 4*10)), *((int *)(i + 4*11)),
-                                               *((int *)(i + 4*12)), *((int *)(i + 4*13)), 
-                                               *((int *)(i + 4*14)), *((int *)(i + 4*15)));
-       }
-       sblock = gcreservedsb;
-       bool advanceblock = false;
-       // remaining memory
-       for(i=gcbaseva;i<BAMBOO_BASE_VA+BAMBOO_SHARED_MEM_SIZE;i+=4*16){
-               advanceblock = false;
-               // computing sblock # and block #, core coordinate (x,y) also
-               if(j%((BAMBOO_SMEM_SIZE)/(4*16)) == 0) {
-                       // finished a sblock
-                       if(j < ((BAMBOO_LARGE_SMEM_BOUND)/(4*16))) {
-                               if((j > 0) && (j%((BAMBOO_SMEM_SIZE_L)/(4*16)) == 0)) {
-                                       // finished a block
-                                       block++;
-                                       advanceblock = true;
-                               }
-                       } else {
-                               // finished a block
-                               block++;
-                               advanceblock = true;
-                       }
-                       // compute core #
-                       if(advanceblock) {
-                               coren = gc_block2core[block%(NUMCORES4GC*2)];
-                       }
-                       // compute core coordinate
-                       BAMBOO_COORDS(coren, &x, &y); 
-                       tprintf("==== %d, %d : core (%d,%d), saddr %x====\n", 
-                                           block, sblock++, x, y, 
-                                                       (sblock-1)*(BAMBOO_SMEM_SIZE)+BAMBOO_BASE_VA);
-               }
-               j++;
-    tprintf("0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
-            *((int *)(i)), *((int *)(i + 4)), 
-                                               *((int *)(i + 4*2)), *((int *)(i + 4*3)), 
-                                               *((int *)(i + 4*4)), *((int *)(i + 4*5)), 
-                                               *((int *)(i + 4*6)), *((int *)(i + 4*7)), 
-                                               *((int *)(i + 4*8)), *((int *)(i + 4*9)), 
-                                               *((int *)(i + 4*10)), *((int *)(i + 4*11)),
-                                               *((int *)(i + 4*12)), *((int *)(i + 4*13)), 
-                                               *((int *)(i + 4*14)), *((int *)(i + 4*15)));
+  int block = 0;
+  int sblock = 0;
+  int j = 0;
+  int i = 0;
+  int coren = 0;
+  int x = 0;
+  int y = 0;
+  printf("(%x,%x) Dump shared mem: \n", udn_tile_coord_x(), 
+            udn_tile_coord_y());
+  // reserved blocks for sblocktbl
+  printf("(%x,%x) ++++ reserved sblocks ++++ \n", udn_tile_coord_x(), 
+            udn_tile_coord_y());
+  for(i=BAMBOO_BASE_VA; i<gcbaseva; i+= 4*16) {
+    printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
+                  udn_tile_coord_x(), udn_tile_coord_y(),
+           *((int *)(i)), *((int *)(i + 4)),
+           *((int *)(i + 4*2)), *((int *)(i + 4*3)),
+           *((int *)(i + 4*4)), *((int *)(i + 4*5)),
+           *((int *)(i + 4*6)), *((int *)(i + 4*7)),
+           *((int *)(i + 4*8)), *((int *)(i + 4*9)),
+           *((int *)(i + 4*10)), *((int *)(i + 4*11)),
+           *((int *)(i + 4*12)), *((int *)(i + 4*13)),
+           *((int *)(i + 4*14)), *((int *)(i + 4*15)));
+  }
+  sblock = gcreservedsb;
+  bool advanceblock = false;
+  // remaining memory
+  for(i=gcbaseva; i<BAMBOO_BASE_VA+BAMBOO_SHARED_MEM_SIZE; i+=4*16) {
+    advanceblock = false;
+    // computing sblock # and block #, core coordinate (x,y) also
+    if(j%((BAMBOO_SMEM_SIZE)/(4*16)) == 0) {
+      // finished a sblock
+      if(j < ((BAMBOO_LARGE_SMEM_BOUND)/(4*16))) {
+       if((j > 0) && (j%((BAMBOO_SMEM_SIZE_L)/(4*16)) == 0)) {
+         // finished a block
+         block++;
+         advanceblock = true;
        }
-       tprintf("\n");
+      } else {
+       // finished a block
+       block++;
+       advanceblock = true;
+      }
+      // compute core #
+      if(advanceblock) {
+       coren = gc_block2core[block%(NUMCORES4GC*2)];
+      }
+      // compute core coordinate
+      BAMBOO_COORDS(coren, &x, &y);
+      printf("(%x,%x) ==== %d, %d : core (%d,%d), saddr %x====\n",
+                    udn_tile_coord_x(), udn_tile_coord_y(),
+             block, sblock++, x, y,
+             (sblock-1)*(BAMBOO_SMEM_SIZE)+BAMBOO_BASE_VA);
+    }
+    j++;
+    printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
+                  udn_tile_coord_x(), udn_tile_coord_y(),
+           *((int *)(i)), *((int *)(i + 4)),
+           *((int *)(i + 4*2)), *((int *)(i + 4*3)),
+           *((int *)(i + 4*4)), *((int *)(i + 4*5)),
+           *((int *)(i + 4*6)), *((int *)(i + 4*7)),
+           *((int *)(i + 4*8)), *((int *)(i + 4*9)),
+           *((int *)(i + 4*10)), *((int *)(i + 4*11)),
+           *((int *)(i + 4*12)), *((int *)(i + 4*13)),
+           *((int *)(i + 4*14)), *((int *)(i + 4*15)));
+  }
+  printf("(%x,%x) \n", udn_tile_coord_x(), udn_tile_coord_y());
 }
 #endif
 
 // should be invoked with interruption closed
 inline void gc_enqueue_I(void *ptr) {
 #ifdef DEBUG
-       BAMBOO_DEBUGPRINT(0xe601);
-       BAMBOO_DEBUGPRINT_REG(ptr);
+  BAMBOO_DEBUGPRINT(0xe601);
+  BAMBOO_DEBUGPRINT_REG(ptr);
 #endif
   if (gcheadindex==NUMPTRS) {
     struct pointerblock * tmp;
@@ -139,14 +144,14 @@ inline void gc_enqueue_I(void *ptr) {
       gcspare=NULL;
     } else {
       tmp=RUNMALLOC_I(sizeof(struct pointerblock));
-               } // if (gcspare!=NULL)
+    }             // if (gcspare!=NULL)
     gchead->next=tmp;
     gchead=tmp;
     gcheadindex=0;
   } // if (gcheadindex==NUMPTRS)
   gchead->ptrs[gcheadindex++]=ptr;
 #ifdef DEBUG
-       BAMBOO_DEBUGPRINT(0xe602);
+  BAMBOO_DEBUGPRINT(0xe602);
 #endif
 } // void gc_enqueue_I(void *ptr)
 
@@ -158,28 +163,28 @@ inline void * gc_dequeue_I() {
     gctailindex=0;
     if (gcspare!=NULL) {
       RUNFREE(tmp);
-               } else {
+    } else {
       gcspare=tmp;
-               } // if (gcspare!=NULL)
+    }             // if (gcspare!=NULL)
   } // if (gctailindex==NUMPTRS)
   return gctail->ptrs[gctailindex++];
 } // void * gc_dequeue()
 
 // dequeue and do not destroy the queue
 inline void * gc_dequeue2_I() {
-       if (gctailindex2==NUMPTRS) {
+  if (gctailindex2==NUMPTRS) {
     struct pointerblock *tmp=gctail2;
     gctail2=gctail2->next;
     gctailindex2=0;
   } // if (gctailindex2==NUMPTRS)
   return gctail2->ptrs[gctailindex2++];
-} // void * gc_dequeue2() 
+} // void * gc_dequeue2()
 
 inline int gc_moreItems_I() {
   if ((gchead==gctail)&&(gctailindex==gcheadindex))
     return 0;
   return 1;
-} // int gc_moreItems() 
+} // int gc_moreItems()
 
 inline int gc_moreItems2_I() {
   if ((gchead==gctail2)&&(gctailindex2==gcheadindex))
@@ -189,11 +194,11 @@ inline int gc_moreItems2_I() {
 
 // should be invoked with interruption closed
 // enqueue a large obj: start addr & length
-inline void gc_lobjenqueue_I(void *ptr, 
-                                        int length, 
-                                                                                        int host) {
+inline void gc_lobjenqueue_I(void *ptr,
+                             int length,
+                             int host) {
 #ifdef DEBUG
-       BAMBOO_DEBUGPRINT(0xe901);
+  BAMBOO_DEBUGPRINT(0xe901);
 #endif
   if (gclobjheadindex==NUMLOBJPTRS) {
     struct lobjpointerblock * tmp;
@@ -202,44 +207,44 @@ inline void gc_lobjenqueue_I(void *ptr,
       gclobjspare=NULL;
     } else {
       tmp=RUNMALLOC_I(sizeof(struct lobjpointerblock));
-               } // if (gclobjspare!=NULL)
+    }             // if (gclobjspare!=NULL)
     gclobjhead->next=tmp;
-               tmp->prev = gclobjhead;
+    tmp->prev = gclobjhead;
     gclobjhead=tmp;
     gclobjheadindex=0;
   } // if (gclobjheadindex==NUMLOBJPTRS)
   gclobjhead->lobjs[gclobjheadindex]=ptr;
-       gclobjhead->lengths[gclobjheadindex]=length;
-       gclobjhead->hosts[gclobjheadindex++]=host;
+  gclobjhead->lengths[gclobjheadindex]=length;
+  gclobjhead->hosts[gclobjheadindex++]=host;
 #ifdef DEBUG
-       BAMBOO_DEBUGPRINT_REG(gclobjhead->lobjs[gclobjheadindex-1]);
-       BAMBOO_DEBUGPRINT_REG(gclobjhead->lengths[gclobjheadindex-1]);
-       BAMBOO_DEBUGPRINT_REG(gclobjhead->hosts[gclobjheadindex-1]);
+  BAMBOO_DEBUGPRINT_REG(gclobjhead->lobjs[gclobjheadindex-1]);
+  BAMBOO_DEBUGPRINT_REG(gclobjhead->lengths[gclobjheadindex-1]);
+  BAMBOO_DEBUGPRINT_REG(gclobjhead->hosts[gclobjheadindex-1]);
 #endif
 } // void gc_lobjenqueue_I(void *ptr...)
 
 // dequeue and destroy the queue
 inline void * gc_lobjdequeue_I(int * length,
-                                        int * host) {
+                               int * host) {
   if (gclobjtailindex==NUMLOBJPTRS) {
     struct lobjpointerblock *tmp=gclobjtail;
     gclobjtail=gclobjtail->next;
     gclobjtailindex=0;
-               gclobjtail->prev = NULL;
+    gclobjtail->prev = NULL;
     if (gclobjspare!=NULL) {
       RUNFREE(tmp);
-               } else {
+    } else {
       gclobjspare=tmp;
-                       tmp->next = NULL;
-                       tmp->prev = NULL;
-               } // if (gclobjspare!=NULL)
+      tmp->next = NULL;
+      tmp->prev = NULL;
+    }             // if (gclobjspare!=NULL)
   } // if (gclobjtailindex==NUMLOBJPTRS)
-       if(length != NULL) {
-               *length = gclobjtail->lengths[gclobjtailindex];
-       }
-       if(host != NULL) {
-               *host = (int)(gclobjtail->hosts[gclobjtailindex]);
-       }
+  if(length != NULL) {
+    *length = gclobjtail->lengths[gclobjtailindex];
+  }
+  if(host != NULL) {
+    *host = (int)(gclobjtail->hosts[gclobjtailindex]);
+  }
   return gclobjtail->lobjs[gclobjtailindex++];
 } // void * gc_lobjdequeue()
 
@@ -255,8 +260,8 @@ inline void gc_lobjdequeue2_I() {
     gclobjtail2=gclobjtail2->next;
     gclobjtailindex2=1;
   } else {
-               gclobjtailindex2++;
-       }// if (gclobjtailindex2==NUMLOBJPTRS)
+    gclobjtailindex2++;
+  }      // if (gclobjtailindex2==NUMLOBJPTRS)
 } // void * gc_lobjdequeue2()
 
 inline int gc_lobjmoreItems2_I() {
@@ -271,8 +276,8 @@ inline void gc_lobjdequeue3_I() {
     gclobjtail2=gclobjtail2->prev;
     gclobjtailindex2=NUMLOBJPTRS-1;
   } else {
-               gclobjtailindex2--;
-       }// if (gclobjtailindex2==NUMLOBJPTRS)
+    gclobjtailindex2--;
+  }      // if (gclobjtailindex2==NUMLOBJPTRS)
 } // void * gc_lobjdequeue3()
 
 inline int gc_lobjmoreItems3_I() {
@@ -282,22 +287,22 @@ inline int gc_lobjmoreItems3_I() {
 } // int gc_lobjmoreItems3()
 
 inline void gc_lobjqueueinit4_I() {
-       gclobjtail2 = gclobjtail;
-       gclobjtailindex2 = gclobjtailindex;
+  gclobjtail2 = gclobjtail;
+  gclobjtailindex2 = gclobjtailindex;
 } // void gc_lobjqueueinit2()
 
 inline void * gc_lobjdequeue4_I(int * length,
-                                         int * host) {
+                                int * host) {
   if (gclobjtailindex2==NUMLOBJPTRS) {
     gclobjtail2=gclobjtail2->next;
     gclobjtailindex2=0;
   } // if (gclobjtailindex==NUMLOBJPTRS)
-       if(length != NULL) {
-               *length = gclobjtail2->lengths[gclobjtailindex2];
-       }
-       if(host != NULL) {
-               *host = (int)(gclobjtail2->hosts[gclobjtailindex2]);
-       }
+  if(length != NULL) {
+    *length = gclobjtail2->lengths[gclobjtailindex2];
+  }
+  if(host != NULL) {
+    *host = (int)(gclobjtail2->hosts[gclobjtailindex2]);
+  }
   return gclobjtail2->lobjs[gclobjtailindex2++];
 } // void * gc_lobjdequeue()
 
@@ -309,475 +314,475 @@ inline int gc_lobjmoreItems4_I() {
 
 INTPTR gccurr_heapbound = 0;
 
-inline void gettype_size(void * ptr, 
-                                    int * ttype, 
-                                                                        int * tsize) {
-       int type = ((int *)ptr)[0];
-       int size = 0;
-       if(type < NUMCLASSES) {
-               // a normal object
-               size = classsize[type];
-       } else {        
-               // an array 
-               struct ArrayObject *ao=(struct ArrayObject *)ptr;
-               int elementsize=classsize[type];
-               int length=ao->___length___; 
-               size=sizeof(struct ArrayObject)+length*elementsize;
-       } // if(type < NUMCLASSES)
-       *ttype = type;
-       *tsize = size;
+inline void gettype_size(void * ptr,
+                         int * ttype,
+                         int * tsize) {
+  int type = ((int *)ptr)[0];
+  int size = 0;
+  if(type < NUMCLASSES) {
+    // a normal object
+    size = classsize[type];
+  } else {
+    // an array
+    struct ArrayObject *ao=(struct ArrayObject *)ptr;
+    int elementsize=classsize[type];
+    int length=ao->___length___;
+    size=sizeof(struct ArrayObject)+length*elementsize;
+  }       // if(type < NUMCLASSES)
+  *ttype = type;
+  *tsize = size;
 }
 
-inline bool isLarge(void * ptr, 
-                               int * ttype, 
-                                                                               int * tsize) {
+inline bool isLarge(void * ptr,
+                    int * ttype,
+                    int * tsize) {
 #ifdef DEBUG
-               BAMBOO_DEBUGPRINT(0xe701);
-               BAMBOO_DEBUGPRINT_REG(ptr);
+  BAMBOO_DEBUGPRINT(0xe701);
+  BAMBOO_DEBUGPRINT_REG(ptr);
 #endif
-       // check if a pointer is referring to a large object
-       gettype_size(ptr, ttype, tsize);
+  // check if a pointer is referring to a large object
+  gettype_size(ptr, ttype, tsize);
 #ifdef DEBUG
-       BAMBOO_DEBUGPRINT(*tsize);
+  BAMBOO_DEBUGPRINT(*tsize);
 #endif
-       int bound = (BAMBOO_SMEM_SIZE);
-       if(((int)ptr-gcbaseva) < (BAMBOO_LARGE_SMEM_BOUND)) {
-               bound = (BAMBOO_SMEM_SIZE_L);
-       }
-       if((((int)ptr-gcbaseva)%(bound))==0) {
-               // ptr is a start of a block
+  int bound = (BAMBOO_SMEM_SIZE);
+  if(((int)ptr-gcbaseva) < (BAMBOO_LARGE_SMEM_BOUND)) {
+    bound = (BAMBOO_SMEM_SIZE_L);
+  }
+  if((((int)ptr-gcbaseva)%(bound))==0) {
+    // ptr is a start of a block
 #ifdef DEBUG
-               BAMBOO_DEBUGPRINT(0xe702);
-               BAMBOO_DEBUGPRINT(1);
+    BAMBOO_DEBUGPRINT(0xe702);
+    BAMBOO_DEBUGPRINT(1);
 #endif
-               return true;
-       }
-       if((bound-(((int)ptr-gcbaseva)%bound)) < (*tsize)) {
-               // it acrosses the boundary of current block
+    return true;
+  }
+  if((bound-(((int)ptr-gcbaseva)%bound)) < (*tsize)) {
+    // it acrosses the boundary of current block
 #ifdef DEBUG
-               BAMBOO_DEBUGPRINT(0xe703);
-               BAMBOO_DEBUGPRINT(1);
+    BAMBOO_DEBUGPRINT(0xe703);
+    BAMBOO_DEBUGPRINT(1);
 #endif
-               return true;
-       }
+    return true;
+  }
 #ifdef DEBUG
-               BAMBOO_DEBUGPRINT(0);
+  BAMBOO_DEBUGPRINT(0);
 #endif
-       return false;
+  return false;
 } // bool isLarge(void * ptr, int * ttype, int * tsize)
 
 inline int hostcore(void * ptr) {
-       // check the host core of ptr
-       int host = 0;
-       RESIDECORE(ptr, &host);
+  // check the host core of ptr
+  int host = 0;
+  RESIDECORE(ptr, &host);
 #ifdef DEBUG
-       BAMBOO_DEBUGPRINT(0xedd0);
-       BAMBOO_DEBUGPRINT_REG(ptr);
-       BAMBOO_DEBUGPRINT_REG(host);
+  BAMBOO_DEBUGPRINT(0xedd0);
+  BAMBOO_DEBUGPRINT_REG(ptr);
+  BAMBOO_DEBUGPRINT_REG(host);
 #endif
-       return host;
+  return host;
 } // int hostcore(void * ptr)
 
 inline bool isLocal(void * ptr) {
-       // check if a pointer is in shared heap on this core
-       return hostcore(ptr) == BAMBOO_NUM_OF_CORE;
+  // check if a pointer is in shared heap on this core
+  return hostcore(ptr) == BAMBOO_NUM_OF_CORE;
 } // bool isLocal(void * ptr)
 
 inline bool gc_checkCoreStatus_I() {
-       bool allStall = true;
-       for(int i = 0; i < NUMCORES4GC; ++i) {
-               if(gccorestatus[i] != 0) {
-                       allStall = false;
-                       break;
-               } // if(gccorestatus[i] != 0)
-       } // for(i = 0; i < NUMCORES4GC; ++i)
-       return allStall;
+  bool allStall = true;
+  for(int i = 0; i < NUMCORES4GC; ++i) {
+    if(gccorestatus[i] != 0) {
+      allStall = false;
+      break;
+    }             // if(gccorestatus[i] != 0)
+  }       // for(i = 0; i < NUMCORES4GC; ++i)
+  return allStall;
 }
 
 inline bool gc_checkAllCoreStatus_I() {
-       bool allStall = true;
-       for(int i = 0; i < NUMCORESACTIVE; ++i) {
-               if(gccorestatus[i] != 0) {
-                       allStall = false;
-                       break;
-               } // if(gccorestatus[i] != 0)
-       } // for(i = 0; i < NUMCORESACTIVE; ++i)
-       return allStall;
+  bool allStall = true;
+  for(int i = 0; i < NUMCORESACTIVE; ++i) {
+    if(gccorestatus[i] != 0) {
+      allStall = false;
+      break;
+    }             // if(gccorestatus[i] != 0)
+  }       // for(i = 0; i < NUMCORESACTIVE; ++i)
+  return allStall;
 }
 
 inline void checkMarkStatue() {
 #ifdef DEBUG
-       BAMBOO_DEBUGPRINT(0xee01);
+  BAMBOO_DEBUGPRINT(0xee01);
 #endif
-       int i;
-       if((!waitconfirm) || 
-                       (waitconfirm && (numconfirm == 0))) {
-#ifdef DEBUG
-               BAMBOO_DEBUGPRINT(0xee02);
-#endif
-               BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
-               gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
-               gcnumsendobjs[BAMBOO_NUM_OF_CORE] = gcself_numsendobjs;
-               gcnumreceiveobjs[BAMBOO_NUM_OF_CORE] = gcself_numreceiveobjs;
-               // check the status of all cores
-               bool allStall = gc_checkAllCoreStatus_I();
-#ifdef DEBUG
-               BAMBOO_DEBUGPRINT(0xee03);
-#endif
-               if(allStall) {
-#ifdef DEBUG
-                       BAMBOO_DEBUGPRINT(0xee04);
-#endif
-                       // ask for confirm
-                       if(!waitconfirm) {
-#ifdef DEBUG
-                               BAMBOO_DEBUGPRINT(0xee05);
-#endif
-                               // the first time found all cores stall
-                               // send out status confirm msg to all other cores
-                               // reset the corestatus array too
-                               gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
-                               waitconfirm = true;
-                               numconfirm = NUMCORESACTIVE - 1;
-                               BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-                               for(i = 1; i < NUMCORESACTIVE; ++i) {   
-                                       gccorestatus[i] = 1;
-                                       // send mark phase finish confirm request msg to core i
-                                       send_msg_1(i, GCMARKCONFIRM);
-                               } // for(i = 1; i < NUMCORESACTIVE; ++i) 
-                       } else {
-                               // check if the sum of send objs and receive obj are the same
-                               // yes->check if the info is the latest; no->go on executing
-                               int sumsendobj = 0;
-                               for(i = 0; i < NUMCORESACTIVE; ++i) {
-                                       sumsendobj += gcnumsendobjs[i];
-                               } // for(i = 0; i < NUMCORESACTIVE; ++i) 
-#ifdef DEBUG
-                               BAMBOO_DEBUGPRINT(0xee06);
-                               BAMBOO_DEBUGPRINT_REG(sumsendobj);
-#endif
-                               for(i = 0; i < NUMCORESACTIVE; ++i) {
-                                       sumsendobj -= gcnumreceiveobjs[i];
-                               } // for(i = 0; i < NUMCORESACTIVE; ++i) 
-#ifdef DEBUG
-                               BAMBOO_DEBUGPRINT(0xee07);
-                               BAMBOO_DEBUGPRINT_REG(sumsendobj);
-#endif
-                               if(0 == sumsendobj) {
-#ifdef DEBUG
-                                       BAMBOO_DEBUGPRINT(0xee08);
-#endif
-                                       // all the core status info are the latest
-                                       // stop mark phase
-                                       gcphase = COMPACTPHASE;
-                                       // restore the gcstatus for all cores
-                                       for(i = 0; i < NUMCORESACTIVE; ++i) {
-                                               gccorestatus[i] = 1;
-                                       } // for(i = 0; i < NUMCORESACTIVE; ++i)
-                               } else {
-                                       // wait for a while and ask for confirm again
-                                       int h = 100;
-                                       while(h--) {
-                                       }
-                                       waitconfirm = false;
-                               }// if(0 == sumsendobj) else ...
-                               BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-                       } // if(!gcwaitconfirm) else()
-               } else {
-                       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-               }  // if(allStall)
-       } // if((!waitconfirm)...
-#ifdef DEBUG
-       BAMBOO_DEBUGPRINT(0xee0a);
+  int i;
+  if((!waitconfirm) ||
+     (waitconfirm && (numconfirm == 0))) {
+#ifdef DEBUG
+    BAMBOO_DEBUGPRINT(0xee02);
+#endif
+    BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
+    gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
+    gcnumsendobjs[BAMBOO_NUM_OF_CORE] = gcself_numsendobjs;
+    gcnumreceiveobjs[BAMBOO_NUM_OF_CORE] = gcself_numreceiveobjs;
+    // check the status of all cores
+    bool allStall = gc_checkAllCoreStatus_I();
+#ifdef DEBUG
+    BAMBOO_DEBUGPRINT(0xee03);
+#endif
+    if(allStall) {
+#ifdef DEBUG
+      BAMBOO_DEBUGPRINT(0xee04);
+#endif
+      // ask for confirm
+      if(!waitconfirm) {
+#ifdef DEBUG
+       BAMBOO_DEBUGPRINT(0xee05);
+#endif
+       // the first time found all cores stall
+       // send out status confirm msg to all other cores
+       // reset the corestatus array too
+       gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
+       waitconfirm = true;
+       numconfirm = NUMCORESACTIVE - 1;
+       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+       for(i = 1; i < NUMCORESACTIVE; ++i) {
+         gccorestatus[i] = 1;
+         // send mark phase finish confirm request msg to core i
+         send_msg_1(i, GCMARKCONFIRM);
+       }                         // for(i = 1; i < NUMCORESACTIVE; ++i)
+      } else {
+       // check if the sum of send objs and receive obj are the same
+       // yes->check if the info is the latest; no->go on executing
+       int sumsendobj = 0;
+       for(i = 0; i < NUMCORESACTIVE; ++i) {
+         sumsendobj += gcnumsendobjs[i];
+       }                         // for(i = 0; i < NUMCORESACTIVE; ++i)
+#ifdef DEBUG
+       BAMBOO_DEBUGPRINT(0xee06);
+       BAMBOO_DEBUGPRINT_REG(sumsendobj);
+#endif
+       for(i = 0; i < NUMCORESACTIVE; ++i) {
+         sumsendobj -= gcnumreceiveobjs[i];
+       }                         // for(i = 0; i < NUMCORESACTIVE; ++i)
+#ifdef DEBUG
+       BAMBOO_DEBUGPRINT(0xee07);
+       BAMBOO_DEBUGPRINT_REG(sumsendobj);
+#endif
+       if(0 == sumsendobj) {
+#ifdef DEBUG
+         BAMBOO_DEBUGPRINT(0xee08);
+#endif
+         // all the core status info are the latest
+         // stop mark phase
+         gcphase = COMPACTPHASE;
+         // restore the gcstatus for all cores
+         for(i = 0; i < NUMCORESACTIVE; ++i) {
+           gccorestatus[i] = 1;
+         }                               // for(i = 0; i < NUMCORESACTIVE; ++i)
+       } else {
+         // wait for a while and ask for confirm again
+         int h = 100;
+         while(h--) {
+         }
+         waitconfirm = false;
+       }                        // if(0 == sumsendobj) else ...
+       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+      }                   // if(!gcwaitconfirm) else()
+    } else {
+      BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+    }              // if(allStall)
+  }       // if((!waitconfirm)...
+#ifdef DEBUG
+  BAMBOO_DEBUGPRINT(0xee0a);
 #endif
 } // void checkMarkStatue()
 
 inline bool preGC() {
-       // preparation for gc
-       // make sure to clear all incoming msgs espacially transfer obj msgs
+  // preparation for gc
+  // make sure to clear all incoming msgs espacially transfer obj msgs
 #ifdef DEBUG
-       BAMBOO_DEBUGPRINT(0xec01);
+  BAMBOO_DEBUGPRINT(0xec01);
 #endif
-       int i;
-       if((!waitconfirm) || 
-                                                 (waitconfirm && (numconfirm == 0))) {
-               // send out status confirm msgs to all cores to check if there are
-               // transfer obj msgs on-the-fly
-               waitconfirm = true;
-               numconfirm = NUMCORESACTIVE - 1;
-               for(i = 1; i < NUMCORESACTIVE; ++i) {   
-                       corestatus[i] = 1;
-                       // send status confirm msg to core i
-                       send_msg_1(i, STATUSCONFIRM);
-               } // for(i = 1; i < NUMCORESACTIVE; ++i)
+  int i;
+  if((!waitconfirm) ||
+     (waitconfirm && (numconfirm == 0))) {
+    // send out status confirm msgs to all cores to check if there are
+    // transfer obj msgs on-the-fly
+    waitconfirm = true;
+    numconfirm = NUMCORESACTIVE - 1;
+    for(i = 1; i < NUMCORESACTIVE; ++i) {
+      corestatus[i] = 1;
+      // send status confirm msg to core i
+      send_msg_1(i, STATUSCONFIRM);
+    }             // for(i = 1; i < NUMCORESACTIVE; ++i)
 
 #ifdef DEBUG
-               BAMBOO_DEBUGPRINT(0xec02);
+    BAMBOO_DEBUGPRINT(0xec02);
 #endif
-               while(true) {
-                       if(numconfirm == 0) {
-                               break;
-                       }
-               } // wait for confirmations
-               waitconfirm = false;
-               numconfirm = 0;
+    while(true) {
+      if(numconfirm == 0) {
+       break;
+      }
+    }             // wait for confirmations
+    waitconfirm = false;
+    numconfirm = 0;
 #ifdef DEBUG
-               BAMBOO_DEBUGPRINT(0xec03);
+    BAMBOO_DEBUGPRINT(0xec03);
 #endif
-               numsendobjs[BAMBOO_NUM_OF_CORE] = self_numsendobjs;
-               numreceiveobjs[BAMBOO_NUM_OF_CORE] = self_numreceiveobjs;
-               int sumsendobj = 0;
+    numsendobjs[BAMBOO_NUM_OF_CORE] = self_numsendobjs;
+    numreceiveobjs[BAMBOO_NUM_OF_CORE] = self_numreceiveobjs;
+    int sumsendobj = 0;
 #ifdef DEBUG
-               BAMBOO_DEBUGPRINT(0xec04);
+    BAMBOO_DEBUGPRINT(0xec04);
 #endif
-               for(i = 0; i < NUMCORESACTIVE; ++i) {
-                       sumsendobj += numsendobjs[i];
+    for(i = 0; i < NUMCORESACTIVE; ++i) {
+      sumsendobj += numsendobjs[i];
 #ifdef DEBUG
-                       BAMBOO_DEBUGPRINT(0xf000 + numsendobjs[i]);
+      BAMBOO_DEBUGPRINT(0xf000 + numsendobjs[i]);
 #endif
-               } // for(i = 1; i < NUMCORESACTIVE; ++i)
+    }             // for(i = 1; i < NUMCORESACTIVE; ++i)
 #ifdef DEBUG
-       BAMBOO_DEBUGPRINT(0xec05);
-       BAMBOO_DEBUGPRINT_REG(sumsendobj);
+    BAMBOO_DEBUGPRINT(0xec05);
+    BAMBOO_DEBUGPRINT_REG(sumsendobj);
 #endif
-               for(i = 0; i < NUMCORESACTIVE; ++i) {
-                       sumsendobj -= numreceiveobjs[i];
+    for(i = 0; i < NUMCORESACTIVE; ++i) {
+      sumsendobj -= numreceiveobjs[i];
 #ifdef DEBUG
-                       BAMBOO_DEBUGPRINT(0xf000 + numreceiveobjs[i]);
+      BAMBOO_DEBUGPRINT(0xf000 + numreceiveobjs[i]);
 #endif
-               } // for(i = 1; i < NUMCORESACTIVE; ++i)
+    }             // for(i = 1; i < NUMCORESACTIVE; ++i)
 #ifdef DEBUG
-               BAMBOO_DEBUGPRINT(0xec06);
-               BAMBOO_DEBUGPRINT_REG(sumsendobj);
+    BAMBOO_DEBUGPRINT(0xec06);
+    BAMBOO_DEBUGPRINT_REG(sumsendobj);
 #endif
-               if(0 == sumsendobj) {
-                       return true;
-               } else {
-                       // still have some transfer obj msgs on-the-fly, can not start gc
-                       return false;
-               } // if(0 == sumsendobj) 
-       } else {
+    if(0 == sumsendobj) {
+      return true;
+    } else {
+      // still have some transfer obj msgs on-the-fly, can not start gc
+      return false;
+    }             // if(0 == sumsendobj)
+  } else {
 #ifdef DEBUG
-               BAMBOO_DEBUGPRINT(0xec07);
+    BAMBOO_DEBUGPRINT(0xec07);
 #endif
-               // previously asked for status confirmation and do not have all the 
-               // confirmations yet, can not start gc
-               return false;
-       } // if((!waitconfirm) || 
+    // previously asked for status confirmation and do not have all the
+    // confirmations yet, can not start gc
+    return false;
+  }       // if((!waitconfirm) ||
 } // bool preGC()
 
 inline void initGC() {
-       int i;
-       if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
-               for(i = 0; i < NUMCORES4GC; ++i) {
-                       gccorestatus[i] = 1;
-                       gcnumsendobjs[i] = 0; 
-                       gcnumreceiveobjs[i] = 0;
-                       gcloads[i] = 0;
-                       gcrequiredmems[i] = 0;
-                       gcfilledblocks[i] = 0;
-                       gcstopblock[i] = 0;
-               } // for(i = 0; i < NUMCORES4GC; ++i)
-               for(i = NUMCORES4GC; i < NUMCORESACTIVE; ++i) {
-                       gccorestatus[i] = 1;
-                       gcnumsendobjs[i] = 0; 
-                       gcnumreceiveobjs[i] = 0;
-               }
-               gcheaptop = 0;
-               gctopcore = 0;
-               gctopblock = 0;
-       } // if(STARTUPCORE == BAMBOO_NUM_OF_CORE) 
-       gcself_numsendobjs = 0;
-       gcself_numreceiveobjs = 0;
-       gcmarkedptrbound = 0;
-       gcobj2map = 0;
-       gcmappedobj = 0;
-       gcismapped = false;
-       gcnumlobjs = 0;
-       gcmovestartaddr = 0;
-       gctomove = false;
-       gcblock2fill = 0;
-       gcmovepending = 0;
-       gccurr_heaptop = 0;
-       gcdstcore = 0;
-
-       // initialize queue
-       if (gchead==NULL) {
-               gcheadindex=gctailindex=gctailindex2 = 0;
-               gchead=gctail=gctail2=RUNMALLOC(sizeof(struct pointerblock));
-       } else {
-               gctailindex = gctailindex2 = gcheadindex;
-               gctail = gctail2 = gchead;
-       }
+  int i;
+  if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
+    for(i = 0; i < NUMCORES4GC; ++i) {
+      gccorestatus[i] = 1;
+      gcnumsendobjs[i] = 0;
+      gcnumreceiveobjs[i] = 0;
+      gcloads[i] = 0;
+      gcrequiredmems[i] = 0;
+      gcfilledblocks[i] = 0;
+      gcstopblock[i] = 0;
+    }             // for(i = 0; i < NUMCORES4GC; ++i)
+    for(i = NUMCORES4GC; i < NUMCORESACTIVE; ++i) {
+      gccorestatus[i] = 1;
+      gcnumsendobjs[i] = 0;
+      gcnumreceiveobjs[i] = 0;
+    }
+    gcheaptop = 0;
+    gctopcore = 0;
+    gctopblock = 0;
+  }       // if(STARTUPCORE == BAMBOO_NUM_OF_CORE)
+  gcself_numsendobjs = 0;
+  gcself_numreceiveobjs = 0;
+  gcmarkedptrbound = 0;
+  gcobj2map = 0;
+  gcmappedobj = 0;
+  gcismapped = false;
+  gcnumlobjs = 0;
+  gcmovestartaddr = 0;
+  gctomove = false;
+  gcblock2fill = 0;
+  gcmovepending = 0;
+  gccurr_heaptop = 0;
+  gcdstcore = 0;
+
+  // initialize queue
+  if (gchead==NULL) {
+    gcheadindex=gctailindex=gctailindex2 = 0;
+    gchead=gctail=gctail2=RUNMALLOC(sizeof(struct pointerblock));
+  } else {
+    gctailindex = gctailindex2 = gcheadindex;
+    gctail = gctail2 = gchead;
+  }
 
-       // initialize the large obj queues
-       if (gclobjhead==NULL) {
-               gclobjheadindex=0;
-               gclobjtailindex=0;
-               gclobjtailindex2 = 0;
-               gclobjhead=gclobjtail=gclobjtail2=
-                       RUNMALLOC(sizeof(struct lobjpointerblock));
-       } else {
-               gclobjtailindex = gclobjtailindex2 = gclobjheadindex = 0;
-               gclobjtail = gclobjtail2 = gclobjhead;
-       }
-       gclobjhead->next = gclobjhead->prev = NULL;
+  // initialize the large obj queues
+  if (gclobjhead==NULL) {
+    gclobjheadindex=0;
+    gclobjtailindex=0;
+    gclobjtailindex2 = 0;
+    gclobjhead=gclobjtail=gclobjtail2=
+                             RUNMALLOC(sizeof(struct lobjpointerblock));
+  } else {
+    gclobjtailindex = gclobjtailindex2 = gclobjheadindex = 0;
+    gclobjtail = gclobjtail2 = gclobjhead;
+  }
+  gclobjhead->next = gclobjhead->prev = NULL;
+
+  freeRuntimeHash(gcpointertbl);
+  gcpointertbl = allocateRuntimeHash(20);
+  //gcpointertbl = allocateMGCHash(20);
+  //mgchashreset();
 
-       freeRuntimeHash(gcpointertbl);
-       gcpointertbl = allocateRuntimeHash(20);
-       //gcpointertbl = allocateMGCHash(20);
-       //mgchashreset();
-       
-       freeMGCHash(gcforwardobjtbl);
-       gcforwardobjtbl = allocateMGCHash(20, 3);
+  freeMGCHash(gcforwardobjtbl);
+  gcforwardobjtbl = allocateMGCHash(20, 3);
 
 #ifdef GC_PROFILE
-       // TODO
-       num_mapinforequest = 0;
-       num_mapinforequest_i = 0;
-       flushstalltime = 0;
-       flushstalltime_i = 0;
-       num_markrequest = 0;
-       marktime = 0;
+  // TODO
+  num_mapinforequest = 0;
+  num_mapinforequest_i = 0;
+  flushstalltime = 0;
+  flushstalltime_i = 0;
+  num_markrequest = 0;
+  marktime = 0;
 #endif
 } // void initGC()
 
 // compute load balance for all cores
 inline int loadbalance(int * heaptop) {
-       // compute load balance
-       int i;
+  // compute load balance
+  int i;
 
-       // get the total loads
-       int tloads = gcloads[STARTUPCORE];
-       for(i = 1; i < NUMCORES4GC; i++) {
-               tloads += gcloads[i];
-       }
-       *heaptop = gcbaseva + tloads;
+  // get the total loads
+  int tloads = gcloads[STARTUPCORE];
+  for(i = 1; i < NUMCORES4GC; i++) {
+    tloads += gcloads[i];
+  }
+  *heaptop = gcbaseva + tloads;
 #ifdef DEBUG
-       BAMBOO_DEBUGPRINT(0xdddd);
-       BAMBOO_DEBUGPRINT_REG(tloads);
-       BAMBOO_DEBUGPRINT_REG(*heaptop);
+  BAMBOO_DEBUGPRINT(0xdddd);
+  BAMBOO_DEBUGPRINT_REG(tloads);
+  BAMBOO_DEBUGPRINT_REG(*heaptop);
 #endif
-       int b = 0;
-       BLOCKINDEX(*heaptop, &b);
-       int numbpc = b / NUMCORES4GC; // num of blocks per core
+  int b = 0;
+  BLOCKINDEX(*heaptop, &b);
+  int numbpc = b / NUMCORES4GC;       // num of blocks per core
 #ifdef DEBUG
-       BAMBOO_DEBUGPRINT_REG(b);
-       BAMBOO_DEBUGPRINT_REG(numbpc);
+  BAMBOO_DEBUGPRINT_REG(b);
+  BAMBOO_DEBUGPRINT_REG(numbpc);
 #endif
-       gctopblock = b;
-       RESIDECORE(heaptop, &gctopcore);
+  gctopblock = b;
+  RESIDECORE(heaptop, &gctopcore);
 #ifdef DEBUG
-       BAMBOO_DEBUGPRINT_REG(gctopcore);
+  BAMBOO_DEBUGPRINT_REG(gctopcore);
 #endif
-       return numbpc;
+  return numbpc;
 } // void loadbalance(int * heaptop)
 
 inline bool cacheLObjs() {
-       // check the total mem size need for large objs
-       int sumsize = 0;
-       int size = 0;
-#ifdef DEBUG
-       BAMBOO_DEBUGPRINT(0xe801);
-#endif
-       gclobjtail2 = gclobjtail;
-       gclobjtailindex2 = gclobjtailindex;
-       int tmp_lobj = 0;
-       int tmp_len = 0;
-       int tmp_host = 0;
-       // compute total mem size required and sort the lobjs in ascending order
-       while(gc_lobjmoreItems2_I()){
-               gc_lobjdequeue2_I();
-               tmp_lobj = gclobjtail2->lobjs[gclobjtailindex2-1];
-               tmp_host = gclobjtail2->hosts[gclobjtailindex2-1];
-               tmp_len = gclobjtail2->lengths[gclobjtailindex2 - 1];
-               sumsize += tmp_len;
-#ifdef DEBUG
-               BAMBOO_DEBUGPRINT_REG(gclobjtail2->lobjs[gclobjtailindex2-1]);
-               BAMBOO_DEBUGPRINT_REG(tmp_len);
-               BAMBOO_DEBUGPRINT_REG(sumsize);
-#endif
-               int i = gclobjtailindex2-1;
-               struct lobjpointerblock * tmp_block = gclobjtail2;
-               // find the place to insert
-               while(true) {
-                       if(i == 0) {
-                               if(tmp_block->prev == NULL) {
-                                       break;
-                               }
-                               if(tmp_block->prev->lobjs[NUMLOBJPTRS-1] > tmp_lobj) {
-                                       tmp_block->lobjs[i] = tmp_block->prev->lobjs[NUMLOBJPTRS-1];
-                                       tmp_block->lengths[i] = tmp_block->prev->lengths[NUMLOBJPTRS-1];
-                                       tmp_block->hosts[i] = tmp_block->prev->hosts[NUMLOBJPTRS-1];
-                                       tmp_block = tmp_block->prev;
-                                       i = NUMLOBJPTRS-1;
-                               } else {
-                                       break;
-                               } // if(tmp_block->prev->lobjs[NUMLOBJPTRS-1] < tmp_lobj)
-                       } else {
-                               if(tmp_block->lobjs[i-1] > tmp_lobj) {
-                                       tmp_block->lobjs[i] = tmp_block->lobjs[i-1];
-                                       tmp_block->lengths[i] = tmp_block->lengths[i-1];
-                                       tmp_block->hosts[i] = tmp_block->hosts[i-1];
-                                       i--;
-                               } else {
-                                       break;
-                               } // if(tmp_block->lobjs[i-1] < tmp_lobj)
-                       } // if(i ==0 ) else {}
-               } // while(true)
-               // insert it
-               if(i != gclobjtailindex2 - 1) {
-                       tmp_block->lobjs[i] = tmp_lobj;
-                       tmp_block->lengths[i] = tmp_len;
-                       tmp_block->hosts[i] = tmp_host;
-               }
-       } // while(gc_lobjmoreItems2())
-
-       // check if there are enough space to cache these large objs
-       INTPTR dst = (BAMBOO_BASE_VA) + (BAMBOO_SHARED_MEM_SIZE) - sumsize;
-       if(gcheaptop > dst) {
-               // do not have enough room to cache large objs
-#ifdef DEBUG
-       BAMBOO_DEBUGPRINT(0xe802);
-       BAMBOO_DEBUGPRINT_REG(dst);
-       BAMBOO_DEBUGPRINT_REG(gcheaptop);
-#endif
-               return false;
+  // check the total mem size need for large objs
+  int sumsize = 0;
+  int size = 0;
+#ifdef DEBUG
+  BAMBOO_DEBUGPRINT(0xe801);
+#endif
+  gclobjtail2 = gclobjtail;
+  gclobjtailindex2 = gclobjtailindex;
+  int tmp_lobj = 0;
+  int tmp_len = 0;
+  int tmp_host = 0;
+  // compute total mem size required and sort the lobjs in ascending order
+  while(gc_lobjmoreItems2_I()) {
+    gc_lobjdequeue2_I();
+    tmp_lobj = gclobjtail2->lobjs[gclobjtailindex2-1];
+    tmp_host = gclobjtail2->hosts[gclobjtailindex2-1];
+    tmp_len = gclobjtail2->lengths[gclobjtailindex2 - 1];
+    sumsize += tmp_len;
+#ifdef DEBUG
+    BAMBOO_DEBUGPRINT_REG(gclobjtail2->lobjs[gclobjtailindex2-1]);
+    BAMBOO_DEBUGPRINT_REG(tmp_len);
+    BAMBOO_DEBUGPRINT_REG(sumsize);
+#endif
+    int i = gclobjtailindex2-1;
+    struct lobjpointerblock * tmp_block = gclobjtail2;
+    // find the place to insert
+    while(true) {
+      if(i == 0) {
+       if(tmp_block->prev == NULL) {
+         break;
        }
+       if(tmp_block->prev->lobjs[NUMLOBJPTRS-1] > tmp_lobj) {
+         tmp_block->lobjs[i] = tmp_block->prev->lobjs[NUMLOBJPTRS-1];
+         tmp_block->lengths[i] = tmp_block->prev->lengths[NUMLOBJPTRS-1];
+         tmp_block->hosts[i] = tmp_block->prev->hosts[NUMLOBJPTRS-1];
+         tmp_block = tmp_block->prev;
+         i = NUMLOBJPTRS-1;
+       } else {
+         break;
+       }                         // if(tmp_block->prev->lobjs[NUMLOBJPTRS-1] < tmp_lobj)
+      } else {
+       if(tmp_block->lobjs[i-1] > tmp_lobj) {
+         tmp_block->lobjs[i] = tmp_block->lobjs[i-1];
+         tmp_block->lengths[i] = tmp_block->lengths[i-1];
+         tmp_block->hosts[i] = tmp_block->hosts[i-1];
+         i--;
+       } else {
+         break;
+       }                         // if(tmp_block->lobjs[i-1] < tmp_lobj)
+      }                   // if(i ==0 ) else {}
+    }             // while(true)
+                  // insert it
+    if(i != gclobjtailindex2 - 1) {
+      tmp_block->lobjs[i] = tmp_lobj;
+      tmp_block->lengths[i] = tmp_len;
+      tmp_block->hosts[i] = tmp_host;
+    }
+  }       // while(gc_lobjmoreItems2())
+
+  // check if there are enough space to cache these large objs
+  INTPTR dst = (BAMBOO_BASE_VA) + (BAMBOO_SHARED_MEM_SIZE) -sumsize;
+  if(gcheaptop > dst) {
+    // do not have enough room to cache large objs
+#ifdef DEBUG
+    BAMBOO_DEBUGPRINT(0xe802);
+    BAMBOO_DEBUGPRINT_REG(dst);
+    BAMBOO_DEBUGPRINT_REG(gcheaptop);
+#endif
+    return false;
+  }
 #ifdef DEBUG
-       BAMBOO_DEBUGPRINT(0xe803);
-       BAMBOO_DEBUGPRINT_REG(dst);
-       BAMBOO_DEBUGPRINT_REG(gcheaptop);
-#endif
-
-       gcheaptop = dst; // Note: record the start of cached lobjs with gcheaptop
-       // cache the largeObjs to the top of the shared heap
-       //gclobjtail2 = gclobjtail;
-       //gclobjtailindex2 = gclobjtailindex;
-       dst = (BAMBOO_BASE_VA) + (BAMBOO_SHARED_MEM_SIZE);
-       while(gc_lobjmoreItems3_I()) {
-               gc_lobjdequeue3_I();
-               size = gclobjtail2->lengths[gclobjtailindex2];
-               // set the mark field to , indicating that this obj has been moved 
-               // and need to be flushed
-               ((int *)(gclobjtail2->lobjs[gclobjtailindex2]))[6] = COMPACTED;
-               dst -= size;
-               if((int)dst < (int)(gclobjtail2->lobjs[gclobjtailindex2])+size) {
-                       memmove(dst, gclobjtail2->lobjs[gclobjtailindex2], size);
-               } else {
-                       //BAMBOO_WRITE_HINT_CACHE(dst, size);
-                 memcpy(dst, gclobjtail2->lobjs[gclobjtailindex2], size);
-               }
-#ifdef DEBUG
-               BAMBOO_DEBUGPRINT(0x804);
-               BAMBOO_DEBUGPRINT_REG(gclobjtail2->lobjs[gclobjtailindex2]);
-               BAMBOO_DEBUGPRINT(dst);
-               BAMBOO_DEBUGPRINT_REG(size);
-               BAMBOO_DEBUGPRINT_REG(*((int*)gclobjtail2->lobjs[gclobjtailindex2]));
-               BAMBOO_DEBUGPRINT_REG(*((int*)(dst)));
+  BAMBOO_DEBUGPRINT(0xe803);
+  BAMBOO_DEBUGPRINT_REG(dst);
+  BAMBOO_DEBUGPRINT_REG(gcheaptop);
+#endif
+
+  gcheaptop = dst;       // Note: record the start of cached lobjs with gcheaptop
+  // cache the largeObjs to the top of the shared heap
+  //gclobjtail2 = gclobjtail;
+  //gclobjtailindex2 = gclobjtailindex;
+  dst = (BAMBOO_BASE_VA) + (BAMBOO_SHARED_MEM_SIZE);
+  while(gc_lobjmoreItems3_I()) {
+    gc_lobjdequeue3_I();
+    size = gclobjtail2->lengths[gclobjtailindex2];
+    // set the mark field to , indicating that this obj has been moved
+    // and need to be flushed
+    ((int *)(gclobjtail2->lobjs[gclobjtailindex2]))[6] = COMPACTED;
+    dst -= size;
+    if((int)dst < (int)(gclobjtail2->lobjs[gclobjtailindex2])+size) {
+      memmove(dst, gclobjtail2->lobjs[gclobjtailindex2], size);
+    } else {
+      //BAMBOO_WRITE_HINT_CACHE(dst, size);
+      memcpy(dst, gclobjtail2->lobjs[gclobjtailindex2], size);
+    }
+#ifdef DEBUG
+    BAMBOO_DEBUGPRINT(0x804);
+    BAMBOO_DEBUGPRINT_REG(gclobjtail2->lobjs[gclobjtailindex2]);
+    BAMBOO_DEBUGPRINT(dst);
+    BAMBOO_DEBUGPRINT_REG(size);
+    BAMBOO_DEBUGPRINT_REG(*((int*)gclobjtail2->lobjs[gclobjtailindex2]));
+    BAMBOO_DEBUGPRINT_REG(*((int*)(dst)));
 #endif
-       }
-       return true;
+  }
+  return true;
 } // void cacheLObjs()
 
 // NOTE: the free mem chunks should be maintained in an ordered linklist
@@ -785,2298 +790,2317 @@ inline bool cacheLObjs() {
 
 // update the bmmboo_smemtbl to record current shared mem usage
 void updateSmemTbl(int coren,
-                              int localtop) {
-       int ltopcore = 0;
-       int bound = BAMBOO_SMEM_SIZE_L;
-       BLOCKINDEX(localtop, &ltopcore);
-       if(localtop >= (gcbaseva+(BAMBOO_LARGE_SMEM_BOUND))) {
-               bound = BAMBOO_SMEM_SIZE;
-       }
-       int load = (localtop-gcbaseva)%bound;
-       int i = 0;
-       int j = 0;
-       int toset = 0;
-       do{
-               toset = gc_core2block[2*coren+i]+(NUMCORES4GC*2)*j;
-               if(toset < ltopcore) {
-                       bamboo_smemtbl[toset]=
-                               (toset<NUMCORES4GC)?BAMBOO_SMEM_SIZE_L:BAMBOO_SMEM_SIZE;
-               } else if(toset == ltopcore) {
-                       bamboo_smemtbl[toset] = load;
-                       break;
-               } else {
-                       break;
-               }
-               i++;
-               if(i == 2) {
-                       i = 0;
-                       j++;
-               }
-       }while(true);
+                   int localtop) {
+  int ltopcore = 0;
+  int bound = BAMBOO_SMEM_SIZE_L;
+  BLOCKINDEX(localtop, &ltopcore);
+  if(localtop >= (gcbaseva+(BAMBOO_LARGE_SMEM_BOUND))) {
+    bound = BAMBOO_SMEM_SIZE;
+  }
+  int load = (localtop-gcbaseva)%bound;
+  int i = 0;
+  int j = 0;
+  int toset = 0;
+  do {
+    toset = gc_core2block[2*coren+i]+(NUMCORES4GC*2)*j;
+    if(toset < ltopcore) {
+      bamboo_smemtbl[toset]=
+        (toset<NUMCORES4GC) ? BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
+    } else if(toset == ltopcore) {
+      bamboo_smemtbl[toset] = load;
+      break;
+    } else {
+      break;
+    }
+    i++;
+    if(i == 2) {
+      i = 0;
+      j++;
+    }
+  while(true);
 } // void updateSmemTbl(int, int)
 
 inline void moveLObjs() {
 #ifdef DEBUG
-       BAMBOO_DEBUGPRINT(0xea01);
-#endif
-       // zero out the smemtbl
-       BAMBOO_MEMSET_WH(bamboo_smemtbl, 0, sizeof(int)*gcnumblock);
-       // find current heap top
-       // flush all gcloads to indicate the real heap top on one core
-       // previous it represents the next available ptr on a core
-       if((gcloads[0] > (gcbaseva+(BAMBOO_SMEM_SIZE_L))) 
-                       && ((gcloads[0]%(BAMBOO_SMEM_SIZE)) == 0)) {
-               // edge of a block, check if this is exactly the heaptop
-               BASEPTR(0, gcfilledblocks[0]-1, &(gcloads[0]));
-               gcloads[0]+=(gcfilledblocks[0]>1?
-                               (BAMBOO_SMEM_SIZE):(BAMBOO_SMEM_SIZE_L));
-       } 
-       updateSmemTbl(0, gcloads[0]);
+  BAMBOO_DEBUGPRINT(0xea01);
+#endif
+  // zero out the smemtbl
+  BAMBOO_MEMSET_WH(bamboo_smemtbl, 0, sizeof(int)*gcnumblock);
+  // find current heap top
+  // flush all gcloads to indicate the real heap top on one core
+  // previous it represents the next available ptr on a core
+  if((gcloads[0] > (gcbaseva+(BAMBOO_SMEM_SIZE_L)))
+     && ((gcloads[0]%(BAMBOO_SMEM_SIZE)) == 0)) {
+    // edge of a block, check if this is exactly the heaptop
+    BASEPTR(0, gcfilledblocks[0]-1, &(gcloads[0]));
+    gcloads[0]+=(gcfilledblocks[0]>1 ?
+                 (BAMBOO_SMEM_SIZE) : (BAMBOO_SMEM_SIZE_L));
+  }
+  updateSmemTbl(0, gcloads[0]);
 #ifdef DEBUG
   BAMBOO_DEBUGPRINT(0xea02);
-       BAMBOO_DEBUGPRINT_REG(gcloads[0]);
-       BAMBOO_DEBUGPRINT_REG(bamboo_smemtbl[0]);
+  BAMBOO_DEBUGPRINT_REG(gcloads[0]);
+  BAMBOO_DEBUGPRINT_REG(bamboo_smemtbl[0]);
+#endif
+  for(int i = 1; i < NUMCORES4GC; i++) {
+    int tmptop = 0;
+#ifdef DEBUG
+    BAMBOO_DEBUGPRINT(0xf000+i);
+    BAMBOO_DEBUGPRINT_REG(gcloads[i]);
+    BAMBOO_DEBUGPRINT_REG(gcfilledblocks[i]);
+#endif
+    if((gcfilledblocks[i] > 0)
+       && ((gcloads[i] % (BAMBOO_SMEM_SIZE)) == 0)) {
+      // edge of a block, check if this is exactly the heaptop
+      BASEPTR(i, gcfilledblocks[i]-1, &gcloads[i]);
+      gcloads[i]
+        +=(gcfilledblocks[i]>1 ? (BAMBOO_SMEM_SIZE) : (BAMBOO_SMEM_SIZE_L));
+      tmptop = gcloads[i];
+    }
+    updateSmemTbl(i, gcloads[i]);
+#ifdef DEBUG
+    BAMBOO_DEBUGPRINT_REG(gcloads[i]);
+#endif
+  }       // for(int i = 1; i < NUMCORES4GC; i++) {
+
+  // find current heap top
+  // TODO
+  // a bug here: when using local allocation, directly move large objects
+  // to the highest free chunk might not be memory efficient
+  int tmpheaptop = 0;
+  int size = 0;
+  int bound = 0;
+  int i = 0;
+  for(i = gcnumblock-1; i >= 0; i--) {
+    if(bamboo_smemtbl[i] > 0) {
+      break;
+    }
+  }
+  if(i == -1) {
+    tmpheaptop = gcbaseva;
+  } else {
+    tmpheaptop = gcbaseva+bamboo_smemtbl[i]+((i<NUMCORES4GC) ?
+                                             (BAMBOO_SMEM_SIZE_L*i) :
+                                             (BAMBOO_SMEM_SIZE*(i-NUMCORES4GC)+BAMBOO_LARGE_SMEM_BOUND));
+  }
+
+  // move large objs from gcheaptop to tmpheaptop
+  // write the header first
+  int tomove = (BAMBOO_BASE_VA) + (BAMBOO_SHARED_MEM_SIZE) -gcheaptop;
+#ifdef DEBUG
+  BAMBOO_DEBUGPRINT(0xea03);
+  BAMBOO_DEBUGPRINT_REG(tomove);
+  BAMBOO_DEBUGPRINT_REG(tmpheaptop);
+  BAMBOO_DEBUGPRINT_REG(gcheaptop);
 #endif
-       for(int i = 1; i < NUMCORES4GC; i++) {
-               int tmptop = 0;
+  // flush the sbstartbl
+  BAMBOO_MEMSET_WH(&(gcsbstarttbl[gcreservedsb]), '\0',
+                   (BAMBOO_SHARED_MEM_SIZE/BAMBOO_SMEM_SIZE-gcreservedsb)*sizeof(INTPTR));
+  if(tomove == 0) {
+    gcheaptop = tmpheaptop;
+  } else {
+    // check how many blocks it acrosses
+    int remain = tmpheaptop-gcbaseva;
+    int sb = remain/(BAMBOO_SMEM_SIZE) + gcreservedsb;             //number of the sblock
+    int b = 0;             // number of the block
+    BLOCKINDEX(tmpheaptop, &b);
+    // check the remaining space in this block
+    bound = (BAMBOO_SMEM_SIZE);
+    if(remain < (BAMBOO_LARGE_SMEM_BOUND)) {
+      bound = (BAMBOO_SMEM_SIZE_L);
+    }
+    remain = bound - remain%bound;
+
+#ifdef DEBUG
+    BAMBOO_DEBUGPRINT(0xea04);
+#endif
+    size = 0;
+    int isize = 0;
+    int host = 0;
+    int ptr = 0;
+    int base = tmpheaptop;
+    int cpysize = 0;
+    remain -= BAMBOO_CACHE_LINE_SIZE;
+    tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
+    gc_lobjqueueinit4_I();
+    while(gc_lobjmoreItems4_I()) {
+      ptr = (int)(gc_lobjdequeue4_I(&size, &host));
+      ALIGNSIZE(size, &isize);
+      if(remain < isize) {
+       // this object acrosses blocks
+       if(cpysize > 0) {
+         // close current block, fill its header
+         BAMBOO_MEMSET_WH(base, '\0', BAMBOO_CACHE_LINE_SIZE);
+         *((int*)base) = cpysize + BAMBOO_CACHE_LINE_SIZE;
+         bamboo_smemtbl[b]+=BAMBOO_CACHE_LINE_SIZE;                               // add the size of the header
+         cpysize = 0;
+         base = tmpheaptop;
+         if(remain == 0) {
+           remain = ((tmpheaptop-gcbaseva)<(BAMBOO_LARGE_SMEM_BOUND)) ?
+                    BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
+         }
+         remain -= BAMBOO_CACHE_LINE_SIZE;
+         tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
+         BLOCKINDEX(tmpheaptop, &b);
+         sb = (tmpheaptop-gcbaseva)/(BAMBOO_SMEM_SIZE) + gcreservedsb;
+       }                         // if(cpysize > 0)
+
+       // move the large obj
+       if((int)gcheaptop < (int)(tmpheaptop)+size) {
+         memmove(tmpheaptop, gcheaptop, size);
+       } else {
+         //BAMBOO_WRITE_HINT_CACHE(tmpheaptop, size);
+         memcpy(tmpheaptop, gcheaptop, size);
+       }
+       // fill the remaining space with -2 padding
+       BAMBOO_MEMSET_WH(tmpheaptop+size, -2, isize-size);
+       // zero out original mem caching the lobj
+       BAMBOO_MEMSET_WH(gcheaptop, '\0', size);
 #ifdef DEBUG
-               BAMBOO_DEBUGPRINT(0xf000+i);
-               BAMBOO_DEBUGPRINT_REG(gcloads[i]);
-               BAMBOO_DEBUGPRINT_REG(gcfilledblocks[i]);
+       BAMBOO_DEBUGPRINT(0xea05);
+       BAMBOO_DEBUGPRINT_REG(gcheaptop);
+       BAMBOO_DEBUGPRINT_REG(tmpheaptop);
+       BAMBOO_DEBUGPRINT_REG(size);
+       BAMBOO_DEBUGPRINT_REG(isize);
+       BAMBOO_DEBUGPRINT_REG(base);
 #endif
-               if((gcfilledblocks[i] > 0) 
-                               && ((gcloads[i] % (BAMBOO_SMEM_SIZE)) == 0)) {
-                       // edge of a block, check if this is exactly the heaptop
-                       BASEPTR(i, gcfilledblocks[i]-1, &gcloads[i]);
-                       gcloads[i]
-                               +=(gcfilledblocks[i]>1?(BAMBOO_SMEM_SIZE):(BAMBOO_SMEM_SIZE_L));
-                       tmptop = gcloads[i];
-               } 
-               updateSmemTbl(i, gcloads[i]);
+       gcheaptop += size;
+       // cache the mapping info anyway
+       //if(ptr != tmpheaptop) {
+       BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
+       //mgchashInsert_I(ptr, tmpheaptop);
+       RuntimeHashadd_I(gcpointertbl, ptr, tmpheaptop);
+       //MGCHashadd_I(gcpointertbl, ptr, tmpheaptop);
+       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+       //}
 #ifdef DEBUG
-               BAMBOO_DEBUGPRINT_REG(gcloads[i]);
+       BAMBOO_DEBUGPRINT(0xcdca);
+       BAMBOO_DEBUGPRINT_REG(ptr);
+       BAMBOO_DEBUGPRINT_REG(tmpheaptop);
+#endif
+       if(host != BAMBOO_NUM_OF_CORE) {
+         // send the original host core with the mapping info
+         send_msg_3(host, GCLOBJMAPPING, ptr, tmpheaptop);
+#ifdef DEBUG
+         BAMBOO_DEBUGPRINT(0xcdcb);
+         BAMBOO_DEBUGPRINT_REG(ptr);
+         BAMBOO_DEBUGPRINT_REG(tmpheaptop);
 #endif
-       } // for(int i = 1; i < NUMCORES4GC; i++) {
+       }                         // if(host != BAMBOO_NUM_OF_CORE)
+       tmpheaptop += isize;
 
-       // find current heap top
-       // TODO
-       // a bug here: when using local allocation, directly move large objects
-       // to the highest free chunk might not be memory efficient
-       int tmpheaptop = 0;
-       int size = 0;
-       int bound = 0;
-       int i = 0;
-       for(i = gcnumblock-1; i >= 0; i--) {
-               if(bamboo_smemtbl[i] > 0) {
-                       break;
-               }
+       // set the gcsbstarttbl and bamboo_smemtbl
+       int tmpsbs = 1+(isize-remain-1)/BAMBOO_SMEM_SIZE;
+       for(int k = 1; k < tmpsbs; k++) {
+         gcsbstarttbl[sb+k] = (INTPTR)(-1);
        }
-       if(i == -1) {
-               tmpheaptop = gcbaseva;
+       sb += tmpsbs;
+       bound = (b<NUMCORES4GC) ? BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
+       BLOCKINDEX(tmpheaptop-1, &tmpsbs);
+       for(; b < tmpsbs; b++) {
+         bamboo_smemtbl[b] = bound;
+         if(b==NUMCORES4GC-1) {
+           bound = BAMBOO_SMEM_SIZE;
+         }
+       }
+       if(((isize-remain)%(BAMBOO_SMEM_SIZE)) == 0) {
+         gcsbstarttbl[sb] = (INTPTR)(-1);
+         remain = ((tmpheaptop-gcbaseva)<(BAMBOO_LARGE_SMEM_BOUND)) ?
+                  BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
+         bamboo_smemtbl[b] = bound;
        } else {
-               tmpheaptop = gcbaseva+bamboo_smemtbl[i]+((i<NUMCORES4GC)?
-                               (BAMBOO_SMEM_SIZE_L*i):
-                               (BAMBOO_SMEM_SIZE*(i-NUMCORES4GC)+BAMBOO_LARGE_SMEM_BOUND));
+         gcsbstarttbl[sb] = (INTPTR)(tmpheaptop);
+         remain = tmpheaptop-gcbaseva;
+         bamboo_smemtbl[b] = remain%bound;
+         remain = bound - bamboo_smemtbl[b];
+       }                         // if(((isize-remain)%(BAMBOO_SMEM_SIZE)) == 0) else ...
+
+       // close current block and fill the header
+       BAMBOO_MEMSET_WH(base, '\0', BAMBOO_CACHE_LINE_SIZE);
+       *((int*)base) = isize + BAMBOO_CACHE_LINE_SIZE;
+       cpysize = 0;
+       base = tmpheaptop;
+       if(remain == BAMBOO_CACHE_LINE_SIZE) {
+         // fill with 0 in case
+         BAMBOO_MEMSET_WH(tmpheaptop, '\0', remain);
        }
-
-       // move large objs from gcheaptop to tmpheaptop
-       // write the header first
-       int tomove = (BAMBOO_BASE_VA) + (BAMBOO_SHARED_MEM_SIZE) - gcheaptop;
+       remain -= BAMBOO_CACHE_LINE_SIZE;
+       tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
+      } else {
+       remain -= isize;
+       // move the large obj
+       if((int)gcheaptop < (int)(tmpheaptop)+size) {
+         memmove(tmpheaptop, gcheaptop, size);
+       } else {
+         //BAMBOO_WRITE_HINT_CACHE(tmpheaptop, size);
+         memcpy(tmpheaptop, gcheaptop, size);
+       }
+       // fill the remaining space with -2 padding
+       BAMBOO_MEMSET_WH(tmpheaptop+size, -2, isize-size);
+       // zero out original mem caching the lobj
+       BAMBOO_MEMSET_WH(gcheaptop, '\0', size);
 #ifdef DEBUG
-       BAMBOO_DEBUGPRINT(0xea03);
-       BAMBOO_DEBUGPRINT_REG(tomove);
-       BAMBOO_DEBUGPRINT_REG(tmpheaptop);
+       BAMBOO_DEBUGPRINT(0xea06);
        BAMBOO_DEBUGPRINT_REG(gcheaptop);
+       BAMBOO_DEBUGPRINT_REG(tmpheaptop);
+       BAMBOO_DEBUGPRINT_REG(size);
+       BAMBOO_DEBUGPRINT_REG(isize);
 #endif
-       // flush the sbstartbl
-       BAMBOO_MEMSET_WH(&(gcsbstarttbl[gcreservedsb]), '\0', 
-               (BAMBOO_SHARED_MEM_SIZE/BAMBOO_SMEM_SIZE-gcreservedsb)*sizeof(INTPTR));
-       if(tomove == 0) {
-               gcheaptop = tmpheaptop;
-       } else {
-               // check how many blocks it acrosses
-               int remain = tmpheaptop-gcbaseva;
-               int sb = remain/(BAMBOO_SMEM_SIZE) + gcreservedsb; //number of the sblock
-               int b = 0; // number of the block
-               BLOCKINDEX(tmpheaptop, &b);
-               // check the remaining space in this block
-               bound = (BAMBOO_SMEM_SIZE);
-               if(remain < (BAMBOO_LARGE_SMEM_BOUND)) {
-                       bound = (BAMBOO_SMEM_SIZE_L);
-               }
-               remain = bound - remain%bound;
-
-#ifdef DEBUG
-               BAMBOO_DEBUGPRINT(0xea04);
-#endif
-               size = 0;
-               int isize = 0;
-               int host = 0;
-               int ptr = 0;
-               int base = tmpheaptop;
-               int cpysize = 0;
-               remain -= BAMBOO_CACHE_LINE_SIZE;
-               tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
-               gc_lobjqueueinit4_I();
-               while(gc_lobjmoreItems4_I()) {
-                       ptr = (int)(gc_lobjdequeue4_I(&size, &host));
-                       ALIGNSIZE(size, &isize);
-                       if(remain < isize) {
-                               // this object acrosses blocks
-                               if(cpysize > 0) {
-                                       // close current block, fill its header
-                                       BAMBOO_MEMSET_WH(base, '\0', BAMBOO_CACHE_LINE_SIZE);
-                                       *((int*)base) = cpysize + BAMBOO_CACHE_LINE_SIZE;
-                                       bamboo_smemtbl[b]+=BAMBOO_CACHE_LINE_SIZE; // add the size of the header
-                                       cpysize = 0;
-                                       base = tmpheaptop;
-                                       if(remain == 0) {
-                                               remain = ((tmpheaptop-gcbaseva)<(BAMBOO_LARGE_SMEM_BOUND)) ? 
-                                                                                BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
-                                       } 
-                                       remain -= BAMBOO_CACHE_LINE_SIZE;
-                                       tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
-                                       BLOCKINDEX(tmpheaptop, &b);
-                                       sb = (tmpheaptop-gcbaseva)/(BAMBOO_SMEM_SIZE) + gcreservedsb;
-                               } // if(cpysize > 0)
-
-                               // move the large obj
-                               if((int)gcheaptop < (int)(tmpheaptop)+size) {
-                                 memmove(tmpheaptop, gcheaptop, size);
-                               } else {
-                                       //BAMBOO_WRITE_HINT_CACHE(tmpheaptop, size);
-                                       memcpy(tmpheaptop, gcheaptop, size);
-                               }
-                               // fill the remaining space with -2 padding
-                               BAMBOO_MEMSET_WH(tmpheaptop+size, -2, isize-size);
-                               // zero out original mem caching the lobj
-                               BAMBOO_MEMSET_WH(gcheaptop, '\0', size);
-#ifdef DEBUG
-                               BAMBOO_DEBUGPRINT(0xea05);
-                               BAMBOO_DEBUGPRINT_REG(gcheaptop);
-                               BAMBOO_DEBUGPRINT_REG(tmpheaptop);
-                               BAMBOO_DEBUGPRINT_REG(size);
-                               BAMBOO_DEBUGPRINT_REG(isize);
-                               BAMBOO_DEBUGPRINT_REG(base);
-#endif
-                               gcheaptop += size;
-                               // cache the mapping info anyway
-                               //if(ptr != tmpheaptop) {
-                               BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
-                               //mgchashInsert_I(ptr, tmpheaptop);
-                               RuntimeHashadd_I(gcpointertbl, ptr, tmpheaptop);
-                               //MGCHashadd_I(gcpointertbl, ptr, tmpheaptop);
-                               BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-                               //}
-#ifdef DEBUG
-                               BAMBOO_DEBUGPRINT(0xcdca);
-                               BAMBOO_DEBUGPRINT_REG(ptr);
-                               BAMBOO_DEBUGPRINT_REG(tmpheaptop);
-#endif
-                               if(host != BAMBOO_NUM_OF_CORE) {
-                                       // send the original host core with the mapping info
-                                       send_msg_3(host, GCLOBJMAPPING, ptr, tmpheaptop);
-#ifdef DEBUG
-                                       BAMBOO_DEBUGPRINT(0xcdcb);
-                                       BAMBOO_DEBUGPRINT_REG(ptr);
-                                       BAMBOO_DEBUGPRINT_REG(tmpheaptop);
-#endif
-                               } // if(host != BAMBOO_NUM_OF_CORE)
-                               tmpheaptop += isize;
-
-                               // set the gcsbstarttbl and bamboo_smemtbl
-                               int tmpsbs = 1+(isize-remain-1)/BAMBOO_SMEM_SIZE;
-                               for(int k = 1; k < tmpsbs; k++) {
-                                       gcsbstarttbl[sb+k] = (INTPTR)(-1);
-                               }
-                               sb += tmpsbs;
-                               bound = (b<NUMCORES4GC)?BAMBOO_SMEM_SIZE_L:BAMBOO_SMEM_SIZE;
-                               BLOCKINDEX(tmpheaptop-1, &tmpsbs);
-                               for(; b < tmpsbs; b++) {
-                                       bamboo_smemtbl[b] = bound;
-                                       if(b==NUMCORES4GC-1) {
-                                               bound = BAMBOO_SMEM_SIZE;
-                                       }
-                               }
-                               if(((isize-remain)%(BAMBOO_SMEM_SIZE)) == 0) {
-                                       gcsbstarttbl[sb] = (INTPTR)(-1);
-                                       remain = ((tmpheaptop-gcbaseva)<(BAMBOO_LARGE_SMEM_BOUND)) ? 
-                                                                        BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
-                                       bamboo_smemtbl[b] = bound;
-                               } else {
-                                       gcsbstarttbl[sb] = (INTPTR)(tmpheaptop);
-                                       remain = tmpheaptop-gcbaseva;
-                                       bamboo_smemtbl[b] = remain%bound;
-                                       remain = bound - bamboo_smemtbl[b];
-                               } // if(((isize-remain)%(BAMBOO_SMEM_SIZE)) == 0) else ...
-
-                               // close current block and fill the header
-                               BAMBOO_MEMSET_WH(base, '\0', BAMBOO_CACHE_LINE_SIZE);
-                               *((int*)base) = isize + BAMBOO_CACHE_LINE_SIZE;
-                               cpysize = 0;
-                               base = tmpheaptop;
-                               if(remain == BAMBOO_CACHE_LINE_SIZE) {
-                                       // fill with 0 in case
-                                       BAMBOO_MEMSET_WH(tmpheaptop, '\0', remain);
-                               }
-                               remain -= BAMBOO_CACHE_LINE_SIZE;
-                               tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
-                       } else {
-                               remain -= isize;
-                               // move the large obj
-                               if((int)gcheaptop < (int)(tmpheaptop)+size) {
-                               memmove(tmpheaptop, gcheaptop, size);
-                               } else {
-                                       //BAMBOO_WRITE_HINT_CACHE(tmpheaptop, size);
-                                       memcpy(tmpheaptop, gcheaptop, size);
-                               }
-                               // fill the remaining space with -2 padding
-                               BAMBOO_MEMSET_WH(tmpheaptop+size, -2, isize-size);
-                               // zero out original mem caching the lobj
-                               BAMBOO_MEMSET_WH(gcheaptop, '\0', size);
-#ifdef DEBUG
-                               BAMBOO_DEBUGPRINT(0xea06);
-                               BAMBOO_DEBUGPRINT_REG(gcheaptop);
-                               BAMBOO_DEBUGPRINT_REG(tmpheaptop);
-                               BAMBOO_DEBUGPRINT_REG(size);
-                               BAMBOO_DEBUGPRINT_REG(isize);
-#endif
-
-                               gcheaptop += size;
-                               cpysize += isize;
-                               // cache the mapping info anyway
-                               //if(ptr != tmpheaptop) {
-                               BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
-                               //mgchashInsert_I(ptr, tmpheaptop);
-                               RuntimeHashadd_I(gcpointertbl, ptr, tmpheaptop);
-                               //MGCHashadd_I(gcpointertbl, ptr, tmpheaptop);
-                               BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-                               //}
-#ifdef DEBUG
-                               BAMBOO_DEBUGPRINT(0xcdcc);
-                               BAMBOO_DEBUGPRINT_REG(ptr);
-                               BAMBOO_DEBUGPRINT_REG(tmpheaptop);
-                               BAMBOO_DEBUGPRINT_REG(*((int*)tmpheaptop));
-#endif
-                               if(host != BAMBOO_NUM_OF_CORE) {
-                                       // send the original host core with the mapping info
-                                       send_msg_3(host, GCLOBJMAPPING, ptr, tmpheaptop);
-#ifdef DEBUG
-                                       BAMBOO_DEBUGPRINT(0xcdcd);
-                                       BAMBOO_DEBUGPRINT_REG(ptr);
-                                       BAMBOO_DEBUGPRINT_REG(tmpheaptop);
-#endif
-                               } // if(host != BAMBOO_NUM_OF_CORE)
-                               tmpheaptop += isize;
-
-                               // update bamboo_smemtbl
-                               bamboo_smemtbl[b] += isize;
-                       } // if(remain < isize) else ...
-               } // while(gc_lobjmoreItems())
-               if(cpysize > 0) {
-                       // close current block, fill the header
-                       BAMBOO_MEMSET_WH(base, '\0', BAMBOO_CACHE_LINE_SIZE);
-                       *((int*)base) = cpysize + BAMBOO_CACHE_LINE_SIZE;
-                       bamboo_smemtbl[b] += BAMBOO_CACHE_LINE_SIZE; // add the size of the header
-               } else {
-                       tmpheaptop -= BAMBOO_CACHE_LINE_SIZE;
-               }
-               gcheaptop = tmpheaptop;
-
-       } // if(tomove == 0)
-
-#ifdef DEBUG
-       BAMBOO_DEBUGPRINT(0xea07);
-       BAMBOO_DEBUGPRINT_REG(gcheaptop);
+
+       gcheaptop += size;
+       cpysize += isize;
+       // cache the mapping info anyway
+       //if(ptr != tmpheaptop) {
+       BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
+       //mgchashInsert_I(ptr, tmpheaptop);
+       RuntimeHashadd_I(gcpointertbl, ptr, tmpheaptop);
+       //MGCHashadd_I(gcpointertbl, ptr, tmpheaptop);
+       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+       //}
+#ifdef DEBUG
+       BAMBOO_DEBUGPRINT(0xcdcc);
+       BAMBOO_DEBUGPRINT_REG(ptr);
+       BAMBOO_DEBUGPRINT_REG(tmpheaptop);
+       BAMBOO_DEBUGPRINT_REG(*((int*)tmpheaptop));
+#endif
+       if(host != BAMBOO_NUM_OF_CORE) {
+         // send the original host core with the mapping info
+         send_msg_3(host, GCLOBJMAPPING, ptr, tmpheaptop);
+#ifdef DEBUG
+         BAMBOO_DEBUGPRINT(0xcdcd);
+         BAMBOO_DEBUGPRINT_REG(ptr);
+         BAMBOO_DEBUGPRINT_REG(tmpheaptop);
+#endif
+       }                         // if(host != BAMBOO_NUM_OF_CORE)
+       tmpheaptop += isize;
+
+       // update bamboo_smemtbl
+       bamboo_smemtbl[b] += isize;
+      }                   // if(remain < isize) else ...
+    }             // while(gc_lobjmoreItems())
+    if(cpysize > 0) {
+      // close current block, fill the header
+      BAMBOO_MEMSET_WH(base, '\0', BAMBOO_CACHE_LINE_SIZE);
+      *((int*)base) = cpysize + BAMBOO_CACHE_LINE_SIZE;
+      bamboo_smemtbl[b] += BAMBOO_CACHE_LINE_SIZE;                   // add the size of the header
+    } else {
+      tmpheaptop -= BAMBOO_CACHE_LINE_SIZE;
+    }
+    gcheaptop = tmpheaptop;
+
+  }       // if(tomove == 0)
+
+#ifdef DEBUG
+  BAMBOO_DEBUGPRINT(0xea07);
+  BAMBOO_DEBUGPRINT_REG(gcheaptop);
 #endif
-       
-       bamboo_free_block = 0;
+
+  bamboo_free_block = 0;
   int tbound = 0;
   do {
-               tbound = (bamboo_free_block<NUMCORES4GC)?
-                       BAMBOO_SMEM_SIZE_L:BAMBOO_SMEM_SIZE;
-               if(bamboo_smemtbl[bamboo_free_block] == tbound) {
-                       bamboo_free_block++;
-               } else {
-                       // the first non-full partition
-                       break;
-               }
-       } while(true);
-#ifdef DEBUG
-       BAMBOO_DEBUGPRINT(0xea08);
-       BAMBOO_DEBUGPRINT_REG(gcheaptop);
+    tbound = (bamboo_free_block<NUMCORES4GC) ?
+             BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
+    if(bamboo_smemtbl[bamboo_free_block] == tbound) {
+      bamboo_free_block++;
+    } else {
+      // the first non-full partition
+      break;
+    }
+  } while(true);
+#ifdef DEBUG
+  BAMBOO_DEBUGPRINT(0xea08);
+  BAMBOO_DEBUGPRINT_REG(gcheaptop);
 #endif
 } // void moveLObjs()
 
 inline void markObj(void * objptr) {
-       if(objptr == NULL) {
-               return;
-       }
-       if(ISSHAREDOBJ(objptr)) {
-               int host = hostcore(objptr);
-               if(BAMBOO_NUM_OF_CORE == host) {
-                       // on this core
-                       BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
-                       if(((int *)objptr)[6] == INIT) {
-                               // this is the first time that this object is discovered,
-                               // set the flag as DISCOVERED
-                               ((int *)objptr)[6] = DISCOVERED;
-                               gc_enqueue_I(objptr);  
-                       }
-                       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-               } else {
-#ifdef DEBUG
-                       BAMBOO_DEBUGPRINT(0xbbbb);
-                       BAMBOO_DEBUGPRINT_REG(host);
-                       BAMBOO_DEBUGPRINT_REG(objptr);
-#endif
-                       // check if this obj has been forwarded
-                       if(!MGCHashcontains(gcforwardobjtbl, (int)objptr)) {
+  if(objptr == NULL) {
+    return;
+  }
+  if(ISSHAREDOBJ(objptr)) {
+    int host = hostcore(objptr);
+    if(BAMBOO_NUM_OF_CORE == host) {
+      // on this core
+      BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
+      if(((int *)objptr)[6] == INIT) {
+       // this is the first time that this object is discovered,
+       // set the flag as DISCOVERED
+       ((int *)objptr)[6] = DISCOVERED;
+       gc_enqueue_I(objptr);
+      }
+      BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+    } else {
+#ifdef DEBUG
+      BAMBOO_DEBUGPRINT(0xbbbb);
+      BAMBOO_DEBUGPRINT_REG(host);
+      BAMBOO_DEBUGPRINT_REG(objptr);
+#endif
+      // check if this obj has been forwarded
+      if(!MGCHashcontains(gcforwardobjtbl, (int)objptr)) {
 #ifdef GC_PROFILE
-                               unsigned long long ttime = BAMBOO_GET_EXE_TIME();
+       unsigned long long ttime = BAMBOO_GET_EXE_TIME();
 #endif
-                               // send a msg to host informing that objptr is active
-                               send_msg_2(host, GCMARKEDOBJ, objptr);
+       // send a msg to host informing that objptr is active
+       send_msg_2(host, GCMARKEDOBJ, objptr);
 #ifdef GC_PROFILE
-                               marktime += BAMBOO_GET_EXE_TIME() - ttime;
-                               num_markrequest++;
+       marktime += BAMBOO_GET_EXE_TIME() - ttime;
+       num_markrequest++;
 #endif
-                               gcself_numsendobjs++;
-                               MGCHashadd(gcforwardobjtbl, (int)objptr);
-                       }
-               }
-       } else {
-               BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
-               gc_enqueue_I(objptr);
-               BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-       } // if(ISSHAREDOBJ(objptr))
-} // void markObj(void * objptr) 
+       gcself_numsendobjs++;
+       MGCHashadd(gcforwardobjtbl, (int)objptr);
+      }
+    }
+  } else {
+    BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
+    gc_enqueue_I(objptr);
+    BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+  }       // if(ISSHAREDOBJ(objptr))
+} // void markObj(void * objptr)
 
 // enqueue root objs
 inline void tomark(struct garbagelist * stackptr) {
-       if(MARKPHASE != gcphase) {
+  if(MARKPHASE != gcphase) {
 #ifdef DEBUG
-               BAMBOO_DEBUGPRINT_REG(gcphase);
+    BAMBOO_DEBUGPRINT_REG(gcphase);
 #endif
-               BAMBOO_EXIT(0xb101);
-       }
-       gcbusystatus = true;
-       gcnumlobjs = 0;
-       
-       int i,j;
-       // enqueue current stack 
-       while(stackptr!=NULL) {
-#ifdef DEBUG
-               BAMBOO_DEBUGPRINT(0xe501);
-               BAMBOO_DEBUGPRINT_REG(stackptr->size);
-               BAMBOO_DEBUGPRINT_REG(stackptr->next);
-               BAMBOO_DEBUGPRINT_REG(stackptr->array[0]);
-#endif
-               for(i=0; i<stackptr->size; i++) {
-                       if(stackptr->array[i] != NULL) {
-                         markObj(stackptr->array[i]);
-                       }
-               }
-               stackptr=stackptr->next;
-       }
+    BAMBOO_EXIT(0xb101);
+  }
+  gcbusystatus = true;
+  gcnumlobjs = 0;
+
+  int i,j;
+  // enqueue current stack
+  while(stackptr!=NULL) {
+#ifdef DEBUG
+    BAMBOO_DEBUGPRINT(0xe501);
+    BAMBOO_DEBUGPRINT_REG(stackptr->size);
+    BAMBOO_DEBUGPRINT_REG(stackptr->next);
+    BAMBOO_DEBUGPRINT_REG(stackptr->array[0]);
+#endif
+    for(i=0; i<stackptr->size; i++) {
+      if(stackptr->array[i] != NULL) {
+       markObj(stackptr->array[i]);
+      }
+    }
+    stackptr=stackptr->next;
+  }
 
 #ifdef DEBUG
-       BAMBOO_DEBUGPRINT(0xe503);
-#endif
-       // enqueue objectsets
-       if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
-               for(i=0; i<NUMCLASSES; i++) {
-                       struct parameterwrapper ** queues = 
-                               objectqueues[BAMBOO_NUM_OF_CORE][i];
-                       int length = numqueues[BAMBOO_NUM_OF_CORE][i];
-                       for(j = 0; j < length; ++j) {
-                               struct parameterwrapper * parameter = queues[j];
-                               struct ObjectHash * set=parameter->objectset;
-                               struct ObjectNode * ptr=set->listhead;
-                               while(ptr!=NULL) {
-                                       markObj((void *)ptr->key);
-                                       ptr=ptr->lnext;
-                               }
-                       }
-               }
+  BAMBOO_DEBUGPRINT(0xe503);
+#endif
+  // enqueue objectsets
+  if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
+    for(i=0; i<NUMCLASSES; i++) {
+      struct parameterwrapper ** queues =
+        objectqueues[BAMBOO_NUM_OF_CORE][i];
+      int length = numqueues[BAMBOO_NUM_OF_CORE][i];
+      for(j = 0; j < length; ++j) {
+       struct parameterwrapper * parameter = queues[j];
+       struct ObjectHash * set=parameter->objectset;
+       struct ObjectNode * ptr=set->listhead;
+       while(ptr!=NULL) {
+         markObj((void *)ptr->key);
+         ptr=ptr->lnext;
        }
+      }
+    }
+  }
 
-       // euqueue current task descriptor
-       if(currtpd != NULL) {
+  // euqueue current task descriptor
+  if(currtpd != NULL) {
 #ifdef DEBUG
-               BAMBOO_DEBUGPRINT(0xe504);
+    BAMBOO_DEBUGPRINT(0xe504);
 #endif
-               for(i=0; i<currtpd->numParameters; i++) {
-                       markObj(currtpd->parameterArray[i]);
-               }
-       }
+    for(i=0; i<currtpd->numParameters; i++) {
+      markObj(currtpd->parameterArray[i]);
+    }
+  }
 
 #ifdef DEBUG
-       BAMBOO_DEBUGPRINT(0xe505);
+  BAMBOO_DEBUGPRINT(0xe505);
 #endif
-       // euqueue active tasks
-       if(activetasks != NULL) {
-               struct genpointerlist * ptr=activetasks->list;
-               while(ptr!=NULL) {
-                       struct taskparamdescriptor *tpd=ptr->src;
-                       int i;
-                       for(i=0; i<tpd->numParameters; i++) {
-                               markObj(tpd->parameterArray[i]);
-                       }
-                       ptr=ptr->inext;
-               }
-       }
+  // euqueue active tasks
+  if(activetasks != NULL) {
+    struct genpointerlist * ptr=activetasks->list;
+    while(ptr!=NULL) {
+      struct taskparamdescriptor *tpd=ptr->src;
+      int i;
+      for(i=0; i<tpd->numParameters; i++) {
+       markObj(tpd->parameterArray[i]);
+      }
+      ptr=ptr->inext;
+    }
+  }
 
 #ifdef DEBUG
-       BAMBOO_DEBUGPRINT(0xe506);
+  BAMBOO_DEBUGPRINT(0xe506);
 #endif
-       // enqueue cached transferred obj
-       struct QueueItem * tmpobjptr =  getHead(&objqueue);
-       while(tmpobjptr != NULL) {
-               struct transObjInfo * objInfo = 
-                       (struct transObjInfo *)(tmpobjptr->objectptr); 
-               markObj(objInfo->objptr);
-               tmpobjptr = getNextQueueItem(tmpobjptr);
-       }
+  // enqueue cached transferred obj
+  struct QueueItem * tmpobjptr =  getHead(&objqueue);
+  while(tmpobjptr != NULL) {
+    struct transObjInfo * objInfo =
+      (struct transObjInfo *)(tmpobjptr->objectptr);
+    markObj(objInfo->objptr);
+    tmpobjptr = getNextQueueItem(tmpobjptr);
+  }
 
 #ifdef DEBUG
-       BAMBOO_DEBUGPRINT(0xe507);
+  BAMBOO_DEBUGPRINT(0xe507);
 #endif
-       // enqueue cached objs to be transferred
-       struct QueueItem * item = getHead(totransobjqueue);
-       while(item != NULL) {
-               struct transObjInfo * totransobj = 
-                       (struct transObjInfo *)(item->objectptr);
-               markObj(totransobj->objptr);
-               item = getNextQueueItem(item);
-       } // while(item != NULL)
+  // enqueue cached objs to be transferred
+  struct QueueItem * item = getHead(totransobjqueue);
+  while(item != NULL) {
+    struct transObjInfo * totransobj =
+      (struct transObjInfo *)(item->objectptr);
+    markObj(totransobj->objptr);
+    item = getNextQueueItem(item);
+  }       // while(item != NULL)
 
 #ifdef DEBUG
-       BAMBOO_DEBUGPRINT(0xe508);
+  BAMBOO_DEBUGPRINT(0xe508);
 #endif
-       // enqueue lock related info
-       for(i = 0; i < runtime_locklen; ++i) {
-        markObj((void *)(runtime_locks[i].redirectlock));
-        if(runtime_locks[i].value != NULL) {
-                markObj((void *)(runtime_locks[i].value));
-        }
-       }
+  // enqueue lock related info
+  for(i = 0; i < runtime_locklen; ++i) {
+    markObj((void *)(runtime_locks[i].redirectlock));
+    if(runtime_locks[i].value != NULL) {
+      markObj((void *)(runtime_locks[i].value));
+    }
+  }
 
 } // void tomark(struct garbagelist * stackptr)
 
-inline void mark(bool isfirst, 
-                            struct garbagelist * stackptr) {
+inline void mark(bool isfirst,
+                 struct garbagelist * stackptr) {
 #ifdef DEBUG
-       if(BAMBOO_NUM_OF_CORE == 0) BAMBOO_DEBUGPRINT(0xed01);
+  if(BAMBOO_NUM_OF_CORE == 0) BAMBOO_DEBUGPRINT(0xed01);
 #endif
-       if(isfirst) {
+  if(isfirst) {
 #ifdef DEBUG
-               if(BAMBOO_NUM_OF_CORE == 0) BAMBOO_DEBUGPRINT(0xed02);
+    if(BAMBOO_NUM_OF_CORE == 0) BAMBOO_DEBUGPRINT(0xed02);
 #endif
-               // enqueue root objs
-               tomark(stackptr);
-               gccurr_heaptop = 0; // record the size of all active objs in this core
-                                 // aligned but does not consider block boundaries
-               gcmarkedptrbound = 0;
-       }
+    // enqueue root objs
+    tomark(stackptr);
+    gccurr_heaptop = 0;             // record the size of all active objs in this core
+                                    // aligned but does not consider block boundaries
+    gcmarkedptrbound = 0;
+  }
+#ifdef DEBUG
+  if(BAMBOO_NUM_OF_CORE == 0) BAMBOO_DEBUGPRINT(0xed03);
+#endif
+  int isize = 0;
+  bool checkfield = true;
+  bool sendStall = false;
+  // mark phase
+  while(MARKPHASE == gcphase) {
+#ifdef DEBUG
+    if(BAMBOO_NUM_OF_CORE == 0) BAMBOO_DEBUGPRINT(0xed04);
+#endif
+    while(true) {
+      BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
+      bool hasItems = gc_moreItems2_I();
+      BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+#ifdef DEBUG
+      BAMBOO_DEBUGPRINT(0xed05);
+#endif
+      if(!hasItems) {
+       break;
+      }
+      sendStall = false;
+      gcbusystatus = true;
+      checkfield = true;
+      void * ptr = gc_dequeue2_I();
+
+#ifdef DEBUG
+      BAMBOO_DEBUGPRINT_REG(ptr);
+#endif
+      int size = 0;
+      int isize = 0;
+      int type = 0;
+      // check if it is a shared obj
+      if(ISSHAREDOBJ(ptr)) {
+       // a shared obj, check if it is a local obj on this core
+       int host = hostcore(ptr);
+       bool islocal = (host == BAMBOO_NUM_OF_CORE);
+       if(islocal) {
+         bool isnotmarked = (((int *)ptr)[6] == DISCOVERED);
+         if(isLarge(ptr, &type, &size) && isnotmarked) {
+           // ptr is a large object and not marked or enqueued
+#ifdef DEBUG
+           BAMBOO_DEBUGPRINT(0xecec);
+           BAMBOO_DEBUGPRINT_REG(ptr);
+           BAMBOO_DEBUGPRINT_REG(*((int*)ptr));
+#endif
+           BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
+           gc_lobjenqueue_I(ptr, size, BAMBOO_NUM_OF_CORE);
+           gcnumlobjs++;
+           BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+           // mark this obj
+           ((int *)ptr)[6] = MARKED;
+         } else if(isnotmarked) {
+           // ptr is an unmarked active object on this core
+           ALIGNSIZE(size, &isize);
+           gccurr_heaptop += isize;
+#ifdef DEBUG
+           BAMBOO_DEBUGPRINT(0xaaaa);
+           BAMBOO_DEBUGPRINT_REG(ptr);
+           BAMBOO_DEBUGPRINT_REG(isize);
+           BAMBOO_DEBUGPRINT(((int *)(ptr))[0]);
+#endif
+           // mark this obj
+           ((int *)ptr)[6] = MARKED;
+
+           if(ptr + size > gcmarkedptrbound) {
+             gcmarkedptrbound = ptr + size;
+           }                                     // if(ptr + size > gcmarkedptrbound)
+         } else {
+           // ptr is not an active obj or has been marked
+           checkfield = false;
+         }                              // if(isLarge(ptr, &type, &size)) else ...
+       }                         /* can never reach here
+                                                else {
+                                  #ifdef DEBUG
+                                       if(BAMBOO_NUM_OF_CORE == 0) {
+                                       BAMBOO_DEBUGPRINT(0xbbbb);
+                                       BAMBOO_DEBUGPRINT_REG(host);
+                                       BAMBOO_DEBUGPRINT_REG(ptr);
+                                       }
+                                  #endif
+                                       // check if this obj has been forwarded
+                                       if(!MGCHashcontains(gcforwardobjtbl, (int)ptr)) {
+                                               // send a msg to host informing that ptr is active
+                                               send_msg_2(host, GCMARKEDOBJ, ptr);
+                                               gcself_numsendobjs++;
+                                               MGCHashadd(gcforwardobjtbl, (int)ptr);
+                                       }
+                                       checkfield = false;
+                                    }// if(isLocal(ptr)) else ...*/
+      }                   // if(ISSHAREDOBJ(ptr))
+#ifdef DEBUG
+      BAMBOO_DEBUGPRINT(0xed06);
+#endif
+
+      if(checkfield) {
+       // scan all pointers in ptr
+       unsigned INTPTR * pointer;
+       pointer=pointerarray[type];
+       if (pointer==0) {
+         /* Array of primitives */
+         /* Do nothing */
+       } else if (((INTPTR)pointer)==1) {
+         /* Array of pointers */
+         struct ArrayObject *ao=(struct ArrayObject *) ptr;
+         int length=ao->___length___;
+         int j;
+         for(j=0; j<length; j++) {
+           void *objptr =
+             ((void **)(((char *)&ao->___length___)+sizeof(int)))[j];
+           markObj(objptr);
+         }
+       } else {
+         INTPTR size=pointer[0];
+         int i;
+         for(i=1; i<=size; i++) {
+           unsigned int offset=pointer[i];
+           void * objptr=*((void **)(((char *)ptr)+offset));
+           markObj(objptr);
+         }
+       }                         // if (pointer==0) else if ... else ...
+      }                   // if(checkfield)
+    }             // while(gc_moreItems2())
+#ifdef DEBUG
+    BAMBOO_DEBUGPRINT(0xed07);
+#endif
+    gcbusystatus = false;
+    // send mark finish msg to core coordinator
+    if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
+#ifdef DEBUG
+      BAMBOO_DEBUGPRINT(0xed08);
+#endif
+      gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
+      gcnumsendobjs[BAMBOO_NUM_OF_CORE] = gcself_numsendobjs;
+      gcnumreceiveobjs[BAMBOO_NUM_OF_CORE] = gcself_numreceiveobjs;
+      gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
+    } else {
+      if(!sendStall) {
+#ifdef DEBUG
+       BAMBOO_DEBUGPRINT(0xed09);
+#endif
+       send_msg_4(STARTUPCORE, GCFINISHMARK, BAMBOO_NUM_OF_CORE,
+                  gcself_numsendobjs, gcself_numreceiveobjs);
+       sendStall = true;
+      }
+    }             // if(STARTUPCORE == BAMBOO_NUM_OF_CORE) ...
+#ifdef DEBUG
+    BAMBOO_DEBUGPRINT(0xed0a);
+#endif
+
+    if(BAMBOO_NUM_OF_CORE == STARTUPCORE) {
 #ifdef DEBUG
-       if(BAMBOO_NUM_OF_CORE == 0) BAMBOO_DEBUGPRINT(0xed03); 
-#endif
-       int isize = 0;
-       bool checkfield = true;
-       bool sendStall = false;
-       // mark phase
-       while(MARKPHASE == gcphase) {
-#ifdef DEBUG 
-               if(BAMBOO_NUM_OF_CORE == 0) BAMBOO_DEBUGPRINT(0xed04); 
-#endif
-               while(true) {
-                       BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
-                       bool hasItems = gc_moreItems2_I();
-                       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-#ifdef DEBUG 
-                       BAMBOO_DEBUGPRINT(0xed05); 
-#endif
-                       if(!hasItems) {
-                               break;
-                       }
-                       sendStall = false;
-                       gcbusystatus = true;
-                       checkfield = true;
-                       void * ptr = gc_dequeue2_I();
-
-#ifdef DEBUG 
-                       BAMBOO_DEBUGPRINT_REG(ptr);
-#endif
-                       int size = 0;
-                       int isize = 0;
-                       int type = 0;
-                       // check if it is a shared obj
-                       if(ISSHAREDOBJ(ptr)) {
-                               // a shared obj, check if it is a local obj on this core
-                               int host = hostcore(ptr);
-                               bool islocal = (host == BAMBOO_NUM_OF_CORE);
-                               if(islocal) {
-                                       bool isnotmarked = (((int *)ptr)[6] == DISCOVERED);
-                                       if(isLarge(ptr, &type, &size) && isnotmarked) {
-                                               // ptr is a large object and not marked or enqueued
-#ifdef DEBUG
-                                               BAMBOO_DEBUGPRINT(0xecec);
-                                               BAMBOO_DEBUGPRINT_REG(ptr);
-                                               BAMBOO_DEBUGPRINT_REG(*((int*)ptr));
-#endif
-                                               BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
-                                               gc_lobjenqueue_I(ptr, size, BAMBOO_NUM_OF_CORE);
-                                               gcnumlobjs++;
-                                               BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-                                               // mark this obj
-                                               ((int *)ptr)[6] = MARKED;
-                                       } else if(isnotmarked) {
-                                               // ptr is an unmarked active object on this core
-                                               ALIGNSIZE(size, &isize);
-                                               gccurr_heaptop += isize;
-#ifdef DEBUG
-                                               BAMBOO_DEBUGPRINT(0xaaaa);
-                                               BAMBOO_DEBUGPRINT_REG(ptr);
-                                               BAMBOO_DEBUGPRINT_REG(isize);
-                                               BAMBOO_DEBUGPRINT(((int *)(ptr))[0]);
-#endif
-                                               // mark this obj
-                                               ((int *)ptr)[6] = MARKED;
-
-                                               if(ptr + size > gcmarkedptrbound) {
-                                                       gcmarkedptrbound = ptr + size;
-                                               } // if(ptr + size > gcmarkedptrbound)
-                                       } else {
-                                               // ptr is not an active obj or has been marked
-                                               checkfield = false;
-                                       }// if(isLarge(ptr, &type, &size)) else ...
-                               } /* can never reach here 
-                                                else {
-#ifdef DEBUG
-                                       if(BAMBOO_NUM_OF_CORE == 0) {
-                                       BAMBOO_DEBUGPRINT(0xbbbb);
-                                       BAMBOO_DEBUGPRINT_REG(host);
-                                       BAMBOO_DEBUGPRINT_REG(ptr);
-                                       }
-#endif
-                                       // check if this obj has been forwarded
-                                       if(!MGCHashcontains(gcforwardobjtbl, (int)ptr)) {
-                                               // send a msg to host informing that ptr is active
-                                               send_msg_2(host, GCMARKEDOBJ, ptr);
-                                               gcself_numsendobjs++;
-                                               MGCHashadd(gcforwardobjtbl, (int)ptr);
-                                       }
-                                       checkfield = false;
-                               }// if(isLocal(ptr)) else ...*/
-                       } // if(ISSHAREDOBJ(ptr))
-#ifdef DEBUG 
-                       BAMBOO_DEBUGPRINT(0xed06); 
-#endif
-
-                       if(checkfield) {
-                               // scan all pointers in ptr
-                               unsigned INTPTR * pointer;
-                               pointer=pointerarray[type];
-                               if (pointer==0) {
-                                       /* Array of primitives */
-                                       /* Do nothing */
-                               } else if (((INTPTR)pointer)==1) {
-                                       /* Array of pointers */
-                                       struct ArrayObject *ao=(struct ArrayObject *) ptr;
-                                       int length=ao->___length___;
-                                       int j;
-                                       for(j=0; j<length; j++) {
-                                               void *objptr = 
-                                                       ((void **)(((char *)&ao->___length___)+sizeof(int)))[j];
-                                               markObj(objptr);
-                                       }
-                               } else {
-                                       INTPTR size=pointer[0];
-                                       int i;
-                                       for(i=1; i<=size; i++) {
-                                               unsigned int offset=pointer[i];
-                                               void * objptr=*((void **)(((char *)ptr)+offset));
-                                               markObj(objptr);
-                                       }
-                               } // if (pointer==0) else if ... else ...
-                       } // if(checkfield)
-               } // while(gc_moreItems2())
-#ifdef DEBUG
-               BAMBOO_DEBUGPRINT(0xed07);
-#endif
-               gcbusystatus = false;
-               // send mark finish msg to core coordinator
-               if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
-#ifdef DEBUG
-                       BAMBOO_DEBUGPRINT(0xed08); 
-#endif
-                       gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
-                       gcnumsendobjs[BAMBOO_NUM_OF_CORE] = gcself_numsendobjs;
-                       gcnumreceiveobjs[BAMBOO_NUM_OF_CORE] = gcself_numreceiveobjs;
-                       gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
-               } else {
-                       if(!sendStall) {
-#ifdef DEBUG
-                               BAMBOO_DEBUGPRINT(0xed09);
-#endif
-                               send_msg_4(STARTUPCORE, GCFINISHMARK, BAMBOO_NUM_OF_CORE,
-                                                                        gcself_numsendobjs, gcself_numreceiveobjs);
-                               sendStall = true;
-                       }
-               } // if(STARTUPCORE == BAMBOO_NUM_OF_CORE) ...
-#ifdef DEBUG 
-               BAMBOO_DEBUGPRINT(0xed0a); 
-#endif
-
-               if(BAMBOO_NUM_OF_CORE == STARTUPCORE) {
-#ifdef DEBUG
-                       BAMBOO_DEBUGPRINT(0xed0b); 
-#endif
-                       return;
-               }
-       } // while(MARKPHASE == gcphase)
+      BAMBOO_DEBUGPRINT(0xed0b);
+#endif
+      return;
+    }
+  }       // while(MARKPHASE == gcphase)
 } // mark()
 
 inline void compact2Heaptophelper_I(int coren,
-                                             int* p,
-                                                                                                                                       int* numblocks,
-                                                                                                                                       int* remain) {
-       int b;
-       int memneed = gcrequiredmems[coren] + BAMBOO_CACHE_LINE_SIZE;
-       if(STARTUPCORE == coren) {
-               gctomove = true;
-               gcmovestartaddr = *p;
-               gcdstcore = gctopcore;
-               gcblock2fill = *numblocks + 1;
-       } else {
-               send_msg_4(coren, GCMOVESTART, gctopcore, *p, (*numblocks) + 1); 
-       }
+                                    int* p,
+                                    int* numblocks,
+                                    int* remain) {
+  int b;
+  int memneed = gcrequiredmems[coren] + BAMBOO_CACHE_LINE_SIZE;
+  if(STARTUPCORE == coren) {
+    gctomove = true;
+    gcmovestartaddr = *p;
+    gcdstcore = gctopcore;
+    gcblock2fill = *numblocks + 1;
+  } else {
+    send_msg_4(coren, GCMOVESTART, gctopcore, *p, (*numblocks) + 1);
+  }
 #ifdef DEBUG
-       BAMBOO_DEBUGPRINT_REG(coren);
-       BAMBOO_DEBUGPRINT_REG(gctopcore);
-       BAMBOO_DEBUGPRINT_REG(*p);
-       BAMBOO_DEBUGPRINT_REG(*numblocks+1);
+  BAMBOO_DEBUGPRINT_REG(coren);
+  BAMBOO_DEBUGPRINT_REG(gctopcore);
+  BAMBOO_DEBUGPRINT_REG(*p);
+  BAMBOO_DEBUGPRINT_REG(*numblocks+1);
 #endif
-       if(memneed < *remain) {
+  if(memneed < *remain) {
 #ifdef DEBUG
-               BAMBOO_DEBUGPRINT(0xd104);
+    BAMBOO_DEBUGPRINT(0xd104);
 #endif
-               *p = *p + memneed;
-               gcrequiredmems[coren] = 0;
-               gcloads[gctopcore] += memneed;
-               *remain = *remain - memneed;
-       } else {
+    *p = *p + memneed;
+    gcrequiredmems[coren] = 0;
+    gcloads[gctopcore] += memneed;
+    *remain = *remain - memneed;
+  } else {
 #ifdef DEBUG
-               BAMBOO_DEBUGPRINT(0xd105);
-#endif
-               // next available block
-               *p = *p + *remain;
-               gcfilledblocks[gctopcore] += 1;
-               int newbase = 0;
-               BASEPTR(gctopcore, gcfilledblocks[gctopcore], &newbase);
-               gcloads[gctopcore] = newbase;
-               gcrequiredmems[coren] -= *remain - BAMBOO_CACHE_LINE_SIZE;
-               gcstopblock[gctopcore]++;
-               gctopcore = NEXTTOPCORE(gctopblock);
-               gctopblock++;
-               *numblocks = gcstopblock[gctopcore];
-               *p = gcloads[gctopcore];
-               BLOCKINDEX(*p, &b);
-               *remain=(b<NUMCORES4GC)?
-                       ((BAMBOO_SMEM_SIZE_L)-((*p)%(BAMBOO_SMEM_SIZE_L)))
-          :((BAMBOO_SMEM_SIZE)-((*p)%(BAMBOO_SMEM_SIZE)));
-#ifdef DEBUG
-               BAMBOO_DEBUGPRINT(0xd106);
-               BAMBOO_DEBUGPRINT_REG(gctopcore);
-               BAMBOO_DEBUGPRINT_REG(*p);
-               BAMBOO_DEBUGPRINT_REG(b);
-               BAMBOO_DEBUGPRINT_REG(*remain);
-#endif
-       } // if(memneed < remain)
-       gcmovepending--;
+    BAMBOO_DEBUGPRINT(0xd105);
+#endif
+    // next available block
+    *p = *p + *remain;
+    gcfilledblocks[gctopcore] += 1;
+    int newbase = 0;
+    BASEPTR(gctopcore, gcfilledblocks[gctopcore], &newbase);
+    gcloads[gctopcore] = newbase;
+    gcrequiredmems[coren] -= *remain - BAMBOO_CACHE_LINE_SIZE;
+    gcstopblock[gctopcore]++;
+    gctopcore = NEXTTOPCORE(gctopblock);
+    gctopblock++;
+    *numblocks = gcstopblock[gctopcore];
+    *p = gcloads[gctopcore];
+    BLOCKINDEX(*p, &b);
+    *remain=(b<NUMCORES4GC) ?
+             ((BAMBOO_SMEM_SIZE_L)-((*p)%(BAMBOO_SMEM_SIZE_L)))
+            : ((BAMBOO_SMEM_SIZE)-((*p)%(BAMBOO_SMEM_SIZE)));
+#ifdef DEBUG
+    BAMBOO_DEBUGPRINT(0xd106);
+    BAMBOO_DEBUGPRINT_REG(gctopcore);
+    BAMBOO_DEBUGPRINT_REG(*p);
+    BAMBOO_DEBUGPRINT_REG(b);
+    BAMBOO_DEBUGPRINT_REG(*remain);
+#endif
+  }       // if(memneed < remain)
+  gcmovepending--;
 } // void compact2Heaptophelper_I(int, int*, int*, int*)
 
 inline void compact2Heaptop() {
-       // no cores with spare mem and some cores are blocked with pending move
-       // find the current heap top and make them move to the heap top
-       int p;
-       int numblocks = gcfilledblocks[gctopcore];
-       //BASEPTR(gctopcore, numblocks, &p);
-       p = gcloads[gctopcore];
-       int b;
-       BLOCKINDEX(p, &b);
-       int remain = (b<NUMCORES4GC)?
-               ((BAMBOO_SMEM_SIZE_L)-(p%(BAMBOO_SMEM_SIZE_L)))
-        :((BAMBOO_SMEM_SIZE)-(p%(BAMBOO_SMEM_SIZE)));
-       // check if the top core finishes
-       BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
-       if(gccorestatus[gctopcore] != 0) {
-#ifdef DEBUG
-               BAMBOO_DEBUGPRINT(0xd101);
-               BAMBOO_DEBUGPRINT_REG(gctopcore);
-#endif
-               // let the top core finishes its own work first
-               compact2Heaptophelper_I(gctopcore, &p, &numblocks, &remain);
-               BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-               return;
-       }
-       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+  // no cores with spare mem and some cores are blocked with pending move
+  // find the current heap top and make them move to the heap top
+  int p;
+  int numblocks = gcfilledblocks[gctopcore];
+  //BASEPTR(gctopcore, numblocks, &p);
+  p = gcloads[gctopcore];
+  int b;
+  BLOCKINDEX(p, &b);
+  int remain = (b<NUMCORES4GC) ?
+               ((BAMBOO_SMEM_SIZE_L)-(p%(BAMBOO_SMEM_SIZE_L)))
+              : ((BAMBOO_SMEM_SIZE)-(p%(BAMBOO_SMEM_SIZE)));
+  // check if the top core finishes
+  BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
+  if(gccorestatus[gctopcore] != 0) {
+#ifdef DEBUG
+    BAMBOO_DEBUGPRINT(0xd101);
+    BAMBOO_DEBUGPRINT_REG(gctopcore);
+#endif
+    // let the top core finishes its own work first
+    compact2Heaptophelper_I(gctopcore, &p, &numblocks, &remain);
+    BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+    return;
+  }
+  BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 
 #ifdef DEBUG
-       BAMBOO_DEBUGPRINT(0xd102);
-       BAMBOO_DEBUGPRINT_REG(gctopcore);
-       BAMBOO_DEBUGPRINT_REG(p);
-       BAMBOO_DEBUGPRINT_REG(b);
-       BAMBOO_DEBUGPRINT_REG(remain);
+  BAMBOO_DEBUGPRINT(0xd102);
+  BAMBOO_DEBUGPRINT_REG(gctopcore);
+  BAMBOO_DEBUGPRINT_REG(p);
+  BAMBOO_DEBUGPRINT_REG(b);
+  BAMBOO_DEBUGPRINT_REG(remain);
 #endif
-       for(int i = 0; i < NUMCORES4GC; i++) {
-               BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
-               if((gccorestatus[i] != 0) && (gcrequiredmems[i] > 0)) {
+  for(int i = 0; i < NUMCORES4GC; i++) {
+    BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
+    if((gccorestatus[i] != 0) && (gcrequiredmems[i] > 0)) {
 #ifdef DEBUG
-                       BAMBOO_DEBUGPRINT(0xd103);
+      BAMBOO_DEBUGPRINT(0xd103);
 #endif
-                       compact2Heaptophelper_I(i, &p, &numblocks, &remain);
-                       if(gccorestatus[gctopcore] != 0) {
+      compact2Heaptophelper_I(i, &p, &numblocks, &remain);
+      if(gccorestatus[gctopcore] != 0) {
 #ifdef DEBUG
-                               BAMBOO_DEBUGPRINT(0xd101);
-                               BAMBOO_DEBUGPRINT_REG(gctopcore);
+       BAMBOO_DEBUGPRINT(0xd101);
+       BAMBOO_DEBUGPRINT_REG(gctopcore);
 #endif
-                               BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-                               // the top core is not free now
-                               return;
-                       }
-               } // if((gccorestatus[i] != 0) && (gcrequiredmems[i] > 0))
-               BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-       } // for(i = 0; i < NUMCORES4GC; i++)
+       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+       // the top core is not free now
+       return;
+      }
+    }             // if((gccorestatus[i] != 0) && (gcrequiredmems[i] > 0))
+    BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+  }       // for(i = 0; i < NUMCORES4GC; i++)
 #ifdef DEBUG
-       BAMBOO_DEBUGPRINT(0xd106);
+  BAMBOO_DEBUGPRINT(0xd106);
 #endif
 } // void compact2Heaptop()
 
 inline void resolvePendingMoveRequest() {
 #ifdef DEBUG
-       BAMBOO_DEBUGPRINT(0xeb01);
+  BAMBOO_DEBUGPRINT(0xeb01);
 #endif
 #ifdef DEBUG
-               BAMBOO_DEBUGPRINT(0xeeee);
-               for(int k = 0; k < NUMCORES4GC; k++) {
-                       BAMBOO_DEBUGPRINT(0xf000+k);
-                       BAMBOO_DEBUGPRINT_REG(gccorestatus[k]);
-                       BAMBOO_DEBUGPRINT_REG(gcloads[k]);
-                       BAMBOO_DEBUGPRINT_REG(gcfilledblocks[k]);
-                       BAMBOO_DEBUGPRINT_REG(gcstopblock[k]);
-               }
-               BAMBOO_DEBUGPRINT(0xffff);
-#endif
-       int i;
-       int j;
-       bool nosparemem = true;
-       bool haspending = false;
-       bool hasrunning = false;
-       bool noblock = false;
-       int dstcore = 0; // the core who need spare mem
-       int sourcecore = 0; // the core who has spare mem
-       for(i = j = 0; (i < NUMCORES4GC) && (j < NUMCORES4GC);) {
-               if(nosparemem) {
-                       // check if there are cores with spare mem
-                       if(gccorestatus[i] == 0) {
-                               // finished working, check if it still have spare mem
-                               if(gcfilledblocks[i] < gcstopblock[i]) {
-                                       // still have spare mem
-                                       nosparemem = false;
-                                       sourcecore = i;
-                               } // if(gcfilledblocks[i] < gcstopblock[i]) else ...
-                       }
-                       i++;
-               } // if(nosparemem)
-               if(!haspending) {
-                       if(gccorestatus[j] != 0) {
-                               // not finished, check if it has pending move requests
-                               if((gcfilledblocks[j]==gcstopblock[j])&&(gcrequiredmems[j]>0)) {
-                                       dstcore = j;
-                                       haspending = true;
-                               } else {
-                                       hasrunning = true;
-                               } // if((gcfilledblocks[i] == gcstopblock[i])...) else ...
-                       } // if(gccorestatus[i] == 0) else ...
-                       j++;
-               } // if(!haspending)
-               if(!nosparemem && haspending) {
-                       // find match
-                       int tomove = 0;
-                       int startaddr = 0;
-                       BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
-                       gcrequiredmems[dstcore] = assignSpareMem_I(sourcecore, 
-                                                                              gcrequiredmems[dstcore], 
-                                                                                                                                                                                          &tomove, 
-                                                                                                                                                                                          &startaddr);
-                       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-#ifdef DEBUG
-                       BAMBOO_DEBUGPRINT(0xeb02);
-                       BAMBOO_DEBUGPRINT_REG(sourcecore);
-                       BAMBOO_DEBUGPRINT_REG(dstcore);
-                       BAMBOO_DEBUGPRINT_REG(startaddr);
-                       BAMBOO_DEBUGPRINT_REG(tomove);
-#endif
-                       if(STARTUPCORE == dstcore) {
-#ifdef DEBUG
-                               BAMBOO_DEBUGPRINT(0xeb03);
-#endif
-                               gcdstcore = sourcecore;
-                               gctomove = true;
-                               gcmovestartaddr = startaddr;
-                               gcblock2fill = tomove;
-                       } else {
-#ifdef DEBUG
-                               BAMBOO_DEBUGPRINT(0xeb04);
-#endif
-                               send_msg_4(dstcore, GCMOVESTART, sourcecore, 
-                                               startaddr, tomove);
-                       }
-                       gcmovepending--;
-                       nosparemem = true;
-                       haspending = false;
-                       noblock = true;
-               }
-       } // for(i = 0; i < NUMCORES4GC; i++)
-#ifdef DEBUG
-       BAMBOO_DEBUGPRINT(0xcccc);
-       BAMBOO_DEBUGPRINT_REG(hasrunning);
-       BAMBOO_DEBUGPRINT_REG(haspending);
-       BAMBOO_DEBUGPRINT_REG(noblock);
-#endif
-
-       if(!hasrunning && !noblock) {
-               gcphase = SUBTLECOMPACTPHASE;
-               compact2Heaptop();
-       }
+  BAMBOO_DEBUGPRINT(0xeeee);
+  for(int k = 0; k < NUMCORES4GC; k++) {
+    BAMBOO_DEBUGPRINT(0xf000+k);
+    BAMBOO_DEBUGPRINT_REG(gccorestatus[k]);
+    BAMBOO_DEBUGPRINT_REG(gcloads[k]);
+    BAMBOO_DEBUGPRINT_REG(gcfilledblocks[k]);
+    BAMBOO_DEBUGPRINT_REG(gcstopblock[k]);
+  }
+  BAMBOO_DEBUGPRINT(0xffff);
+#endif
+  int i;
+  int j;
+  bool nosparemem = true;
+  bool haspending = false;
+  bool hasrunning = false;
+  bool noblock = false;
+  int dstcore = 0;       // the core who need spare mem
+  int sourcecore = 0;       // the core who has spare mem
+  for(i = j = 0; (i < NUMCORES4GC) && (j < NUMCORES4GC); ) {
+    if(nosparemem) {
+      // check if there are cores with spare mem
+      if(gccorestatus[i] == 0) {
+       // finished working, check if it still have spare mem
+       if(gcfilledblocks[i] < gcstopblock[i]) {
+         // still have spare mem
+         nosparemem = false;
+         sourcecore = i;
+       }                         // if(gcfilledblocks[i] < gcstopblock[i]) else ...
+      }
+      i++;
+    }             // if(nosparemem)
+    if(!haspending) {
+      if(gccorestatus[j] != 0) {
+       // not finished, check if it has pending move requests
+       if((gcfilledblocks[j]==gcstopblock[j])&&(gcrequiredmems[j]>0)) {
+         dstcore = j;
+         haspending = true;
+       } else {
+         hasrunning = true;
+       }                         // if((gcfilledblocks[i] == gcstopblock[i])...) else ...
+      }                   // if(gccorestatus[i] == 0) else ...
+      j++;
+    }             // if(!haspending)
+    if(!nosparemem && haspending) {
+      // find match
+      int tomove = 0;
+      int startaddr = 0;
+      BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
+      gcrequiredmems[dstcore] = assignSpareMem_I(sourcecore,
+                                                 gcrequiredmems[dstcore],
+                                                 &tomove,
+                                                 &startaddr);
+      BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+#ifdef DEBUG
+      BAMBOO_DEBUGPRINT(0xeb02);
+      BAMBOO_DEBUGPRINT_REG(sourcecore);
+      BAMBOO_DEBUGPRINT_REG(dstcore);
+      BAMBOO_DEBUGPRINT_REG(startaddr);
+      BAMBOO_DEBUGPRINT_REG(tomove);
+#endif
+      if(STARTUPCORE == dstcore) {
+#ifdef DEBUG
+       BAMBOO_DEBUGPRINT(0xeb03);
+#endif
+       gcdstcore = sourcecore;
+       gctomove = true;
+       gcmovestartaddr = startaddr;
+       gcblock2fill = tomove;
+      } else {
+#ifdef DEBUG
+       BAMBOO_DEBUGPRINT(0xeb04);
+#endif
+       send_msg_4(dstcore, GCMOVESTART, sourcecore,
+                  startaddr, tomove);
+      }
+      gcmovepending--;
+      nosparemem = true;
+      haspending = false;
+      noblock = true;
+    }
+  }       // for(i = 0; i < NUMCORES4GC; i++)
+#ifdef DEBUG
+  BAMBOO_DEBUGPRINT(0xcccc);
+  BAMBOO_DEBUGPRINT_REG(hasrunning);
+  BAMBOO_DEBUGPRINT_REG(haspending);
+  BAMBOO_DEBUGPRINT_REG(noblock);
+#endif
+
+  if(!hasrunning && !noblock) {
+    gcphase = SUBTLECOMPACTPHASE;
+    compact2Heaptop();
+  }
 
 } // void resovePendingMoveRequest()
 
 struct moveHelper {
-       int numblocks; // block num for heap
-       INTPTR base; // base virtual address of current heap block
-       INTPTR ptr; // virtual address of current heap top
-       int offset; // offset in current heap block
-       int blockbase; // virtual address of current small block to check
-       int blockbound; // bound virtual address of current small blcok
-       int sblockindex; // index of the small blocks
-       int top; // real size of current heap block to check
-       int bound; // bound size of current heap block to check
+  int numblocks;       // block num for heap
+  INTPTR base;       // base virtual address of current heap block
+  INTPTR ptr;       // virtual address of current heap top
+  int offset;       // offset in current heap block
+  int blockbase;       // virtual address of current small block to check
+  int blockbound;       // bound virtual address of current small blcok
+  int sblockindex;       // index of the small blocks
+  int top;       // real size of current heap block to check
+  int bound;       // bound size of current heap block to check
 }; // struct moveHelper
 
 // if out of boundary of valid shared memory, return false, else return true
 inline bool nextSBlock(struct moveHelper * orig) {
-       orig->blockbase = orig->blockbound;
-       bool sbchanged = false;
-#ifdef DEBUG 
-       BAMBOO_DEBUGPRINT(0xecc0);
-       BAMBOO_DEBUGPRINT_REG(orig->blockbase);
-       BAMBOO_DEBUGPRINT_REG(orig->blockbound);
-       BAMBOO_DEBUGPRINT_REG(orig->bound);
-       BAMBOO_DEBUGPRINT_REG(orig->ptr);
+  orig->blockbase = orig->blockbound;
+  bool sbchanged = false;
+#ifdef DEBUG
+  BAMBOO_DEBUGPRINT(0xecc0);
+  BAMBOO_DEBUGPRINT_REG(orig->blockbase);
+  BAMBOO_DEBUGPRINT_REG(orig->blockbound);
+  BAMBOO_DEBUGPRINT_REG(orig->bound);
+  BAMBOO_DEBUGPRINT_REG(orig->ptr);
 #endif
 outernextSBlock:
-       // check if across a big block
-       if((orig->blockbase >= orig->bound) || (orig->ptr >= orig->bound) 
-                       || ((orig->ptr != NULL) && (*((int*)orig->ptr))==0) 
-                       || ((*((int*)orig->blockbase))==0)) {
+  // check if across a big block
+  if((orig->blockbase >= orig->bound) || (orig->ptr >= orig->bound)
+     || ((orig->ptr != NULL) && (*((int*)orig->ptr))==0)
+     || ((*((int*)orig->blockbase))==0)) {
 innernextSBlock:
-               // end of current heap block, jump to next one
-               orig->numblocks++;
-#ifdef DEBUG 
-               BAMBOO_DEBUGPRINT(0xecc1);
-               BAMBOO_DEBUGPRINT_REG(orig->numblocks);
-#endif
-               BASEPTR(BAMBOO_NUM_OF_CORE, orig->numblocks, &(orig->base));
-#ifdef DEBUG 
-               BAMBOO_DEBUGPRINT(orig->base);
-#endif
-               if(orig->base >= BAMBOO_BASE_VA + BAMBOO_SHARED_MEM_SIZE) {
-                       // out of boundary
-                       orig->ptr = orig->base; // set current ptr to out of boundary too
-                       return false;
-               }
-               orig->bound = orig->base + BAMBOO_SMEM_SIZE;
-               orig->blockbase = orig->base;
-               orig->sblockindex = (orig->blockbase-BAMBOO_BASE_VA)/BAMBOO_SMEM_SIZE;
-               sbchanged = true;
-               int blocknum = 0;
-               BLOCKINDEX(orig->base, &blocknum);
-               if(bamboo_smemtbl[blocknum] == 0) {
-                       // goto next block
-                       goto innernextSBlock;
-               }
-       } else if(0 == (orig->blockbase%BAMBOO_SMEM_SIZE)) {
-               orig->sblockindex += 1;
-               sbchanged = true;
-       } // if((orig->blockbase >= orig->bound) || (orig->ptr >= orig->bound)...
-
-       // check if this sblock should be skipped or have special start point
-       if(gcsbstarttbl[orig->sblockindex] == -1) {
-               // goto next sblock
-#ifdef DEBUG
-               BAMBOO_DEBUGPRINT(0xecc2);
-#endif
-               orig->sblockindex += 1;
-               orig->blockbase += BAMBOO_SMEM_SIZE;
-               goto outernextSBlock;
-       } else if((gcsbstarttbl[orig->sblockindex] != 0) 
-                       && (sbchanged)) {
-               // the first time to access this SBlock
-#ifdef DEBUG
-               BAMBOO_DEBUGPRINT(0xecc3);
-#endif
-               // not start from the very beginning
-               orig->blockbase = gcsbstarttbl[orig->sblockindex];
-       } // if(gcsbstarttbl[orig->sblockindex] == -1) else ...
-
-       // setup information for this sblock
-       orig->blockbound = orig->blockbase + *((int*)(orig->blockbase));
-       orig->offset = BAMBOO_CACHE_LINE_SIZE;
-       orig->ptr = orig->blockbase + orig->offset;
-#ifdef DEBUG
-       BAMBOO_DEBUGPRINT(0xecc4);
-       BAMBOO_DEBUGPRINT_REG(orig->base);
-       BAMBOO_DEBUGPRINT_REG(orig->bound);
-       BAMBOO_DEBUGPRINT_REG(orig->ptr);
-       BAMBOO_DEBUGPRINT_REG(orig->blockbound);
-       BAMBOO_DEBUGPRINT_REG(orig->blockbase);
-       BAMBOO_DEBUGPRINT_REG(orig->offset);
-#endif
-       if(orig->ptr >= orig->bound) {
-               // met a lobj, move to next block
-               goto innernextSBlock;
-       }
+    // end of current heap block, jump to next one
+    orig->numblocks++;
+#ifdef DEBUG
+    BAMBOO_DEBUGPRINT(0xecc1);
+    BAMBOO_DEBUGPRINT_REG(orig->numblocks);
+#endif
+    BASEPTR(BAMBOO_NUM_OF_CORE, orig->numblocks, &(orig->base));
+#ifdef DEBUG
+    BAMBOO_DEBUGPRINT(orig->base);
+#endif
+    if(orig->base >= BAMBOO_BASE_VA + BAMBOO_SHARED_MEM_SIZE) {
+      // out of boundary
+      orig->ptr = orig->base;                   // set current ptr to out of boundary too
+      return false;
+    }
+    orig->bound = orig->base + BAMBOO_SMEM_SIZE;
+    orig->blockbase = orig->base;
+    orig->sblockindex = (orig->blockbase-BAMBOO_BASE_VA)/BAMBOO_SMEM_SIZE;
+    sbchanged = true;
+    int blocknum = 0;
+    BLOCKINDEX(orig->base, &blocknum);
+    if(bamboo_smemtbl[blocknum] == 0) {
+      // goto next block
+      goto innernextSBlock;
+    }
+  } else if(0 == (orig->blockbase%BAMBOO_SMEM_SIZE)) {
+    orig->sblockindex += 1;
+    sbchanged = true;
+  }       // if((orig->blockbase >= orig->bound) || (orig->ptr >= orig->bound)...
+
+  // check if this sblock should be skipped or have special start point
+  if(gcsbstarttbl[orig->sblockindex] == -1) {
+    // goto next sblock
+#ifdef DEBUG
+    BAMBOO_DEBUGPRINT(0xecc2);
+#endif
+    orig->sblockindex += 1;
+    orig->blockbase += BAMBOO_SMEM_SIZE;
+    goto outernextSBlock;
+  } else if((gcsbstarttbl[orig->sblockindex] != 0)
+            && (sbchanged)) {
+    // the first time to access this SBlock
+#ifdef DEBUG
+    BAMBOO_DEBUGPRINT(0xecc3);
+#endif
+    // not start from the very beginning
+    orig->blockbase = gcsbstarttbl[orig->sblockindex];
+  }       // if(gcsbstarttbl[orig->sblockindex] == -1) else ...
+
+  // setup information for this sblock
+  orig->blockbound = orig->blockbase + *((int*)(orig->blockbase));
+  orig->offset = BAMBOO_CACHE_LINE_SIZE;
+  orig->ptr = orig->blockbase + orig->offset;
+#ifdef DEBUG
+  BAMBOO_DEBUGPRINT(0xecc4);
+  BAMBOO_DEBUGPRINT_REG(orig->base);
+  BAMBOO_DEBUGPRINT_REG(orig->bound);
+  BAMBOO_DEBUGPRINT_REG(orig->ptr);
+  BAMBOO_DEBUGPRINT_REG(orig->blockbound);
+  BAMBOO_DEBUGPRINT_REG(orig->blockbase);
+  BAMBOO_DEBUGPRINT_REG(orig->offset);
+#endif
+  if(orig->ptr >= orig->bound) {
+    // met a lobj, move to next block
+    goto innernextSBlock;
+  }
 
-       return true;
-} // bool nextSBlock(struct moveHelper * orig) 
+  return true;
+} // bool nextSBlock(struct moveHelper * orig)
 
 // return false if there are no available data to compact
-inline bool initOrig_Dst(struct moveHelper * orig, 
-                                    struct moveHelper * to) {
-       // init the dst ptr
-       to->numblocks = 0;
-       to->top = to->offset = BAMBOO_CACHE_LINE_SIZE;
-       to->bound = BAMBOO_SMEM_SIZE_L;
-       BASEPTR(BAMBOO_NUM_OF_CORE, to->numblocks, &(to->base));
-
-#ifdef DEBUG
-       BAMBOO_DEBUGPRINT(0xef01);
-       BAMBOO_DEBUGPRINT_REG(to->base);
-#endif
-       to->ptr = to->base + to->offset;
-
-       // init the orig ptr
-       orig->numblocks = 0;
-       orig->base = to->base;
-       orig->bound = to->base + BAMBOO_SMEM_SIZE_L;
-       orig->blockbase = orig->base;
-       orig->sblockindex = (orig->base - BAMBOO_BASE_VA) / BAMBOO_SMEM_SIZE;
-#ifdef DEBUG 
-       BAMBOO_DEBUGPRINT(0xef02);
-       BAMBOO_DEBUGPRINT_REG(orig->base);
-       BAMBOO_DEBUGPRINT_REG(orig->sblockindex);
-       BAMBOO_DEBUGPRINT_REG(gcsbstarttbl);
-       BAMBOO_DEBUGPRINT_REG(gcsbstarttbl[orig->sblockindex]);
-#endif
-
-       if(gcsbstarttbl[orig->sblockindex] == -1) {
-#ifdef DEBUG 
-               BAMBOO_DEBUGPRINT(0xef03);
-#endif
-               // goto next sblock
-               orig->blockbound = 
-                       BAMBOO_BASE_VA+BAMBOO_SMEM_SIZE*(orig->sblockindex+1);
-               return nextSBlock(orig);
-       } else if(gcsbstarttbl[orig->sblockindex] != 0) {
-#ifdef DEBUG 
-               BAMBOO_DEBUGPRINT(0xef04);
-#endif
-               orig->blockbase = gcsbstarttbl[orig->sblockindex];
-       }
-#ifdef DEBUG 
-       BAMBOO_DEBUGPRINT(0xef05);
+inline bool initOrig_Dst(struct moveHelper * orig,
+                         struct moveHelper * to) {
+  // init the dst ptr
+  to->numblocks = 0;
+  to->top = to->offset = BAMBOO_CACHE_LINE_SIZE;
+  to->bound = BAMBOO_SMEM_SIZE_L;
+  BASEPTR(BAMBOO_NUM_OF_CORE, to->numblocks, &(to->base));
+
+#ifdef DEBUG
+  BAMBOO_DEBUGPRINT(0xef01);
+  BAMBOO_DEBUGPRINT_REG(to->base);
 #endif
-       orig->blockbound = orig->blockbase + *((int*)(orig->blockbase));
-       orig->offset = BAMBOO_CACHE_LINE_SIZE;
-       orig->ptr = orig->blockbase + orig->offset;
+  to->ptr = to->base + to->offset;
+
+  // init the orig ptr
+  orig->numblocks = 0;
+  orig->base = to->base;
+  orig->bound = to->base + BAMBOO_SMEM_SIZE_L;
+  orig->blockbase = orig->base;
+  orig->sblockindex = (orig->base - BAMBOO_BASE_VA) / BAMBOO_SMEM_SIZE;
 #ifdef DEBUG
-       BAMBOO_DEBUGPRINT(0xef06);
-       BAMBOO_DEBUGPRINT_REG(orig->base);
+  BAMBOO_DEBUGPRINT(0xef02);
+  BAMBOO_DEBUGPRINT_REG(orig->base);
+  BAMBOO_DEBUGPRINT_REG(orig->sblockindex);
+  BAMBOO_DEBUGPRINT_REG(gcsbstarttbl);
+  BAMBOO_DEBUGPRINT_REG(gcsbstarttbl[orig->sblockindex]);
 #endif
-       return true;
-} // bool initOrig_Dst(struct moveHelper * orig, struct moveHelper * to) 
+
+  if(gcsbstarttbl[orig->sblockindex] == -1) {
+#ifdef DEBUG
+    BAMBOO_DEBUGPRINT(0xef03);
+#endif
+    // goto next sblock
+    orig->blockbound =
+      BAMBOO_BASE_VA+BAMBOO_SMEM_SIZE*(orig->sblockindex+1);
+    return nextSBlock(orig);
+  } else if(gcsbstarttbl[orig->sblockindex] != 0) {
+#ifdef DEBUG
+    BAMBOO_DEBUGPRINT(0xef04);
+#endif
+    orig->blockbase = gcsbstarttbl[orig->sblockindex];
+  }
+#ifdef DEBUG
+  BAMBOO_DEBUGPRINT(0xef05);
+#endif
+  orig->blockbound = orig->blockbase + *((int*)(orig->blockbase));
+  orig->offset = BAMBOO_CACHE_LINE_SIZE;
+  orig->ptr = orig->blockbase + orig->offset;
+#ifdef DEBUG
+  BAMBOO_DEBUGPRINT(0xef06);
+  BAMBOO_DEBUGPRINT_REG(orig->base);
+#endif
+  return true;
+} // bool initOrig_Dst(struct moveHelper * orig, struct moveHelper * to)
 
 inline void nextBlock(struct moveHelper * to) {
-       to->top = to->bound + BAMBOO_CACHE_LINE_SIZE; // header!
-       to->bound += BAMBOO_SMEM_SIZE;
-       to->numblocks++;
-       BASEPTR(BAMBOO_NUM_OF_CORE, to->numblocks, &(to->base));
-       to->offset = BAMBOO_CACHE_LINE_SIZE;
-       to->ptr = to->base + to->offset;
+  to->top = to->bound + BAMBOO_CACHE_LINE_SIZE;       // header!
+  to->bound += BAMBOO_SMEM_SIZE;
+  to->numblocks++;
+  BASEPTR(BAMBOO_NUM_OF_CORE, to->numblocks, &(to->base));
+  to->offset = BAMBOO_CACHE_LINE_SIZE;
+  to->ptr = to->base + to->offset;
 } // void nextBlock(struct moveHelper * to)
 
 // endaddr does not contain spaces for headers
-inline bool moveobj(struct moveHelper * orig, 
-                               struct moveHelper * to, 
-                                                       int stopblock) {
-       if(stopblock == 0) {
-               return true;
-       }
+inline bool moveobj(struct moveHelper * orig,
+                    struct moveHelper * to,
+                    int stopblock) {
+  if(stopblock == 0) {
+    return true;
+  }
 
 #ifdef DEBUG
-       BAMBOO_DEBUGPRINT(0xe201);
-       BAMBOO_DEBUGPRINT_REG(orig->ptr);
-       BAMBOO_DEBUGPRINT_REG(to->ptr);
+  BAMBOO_DEBUGPRINT(0xe201);
+  BAMBOO_DEBUGPRINT_REG(orig->ptr);
+  BAMBOO_DEBUGPRINT_REG(to->ptr);
 #endif
 
-       int type = 0;
-       int size = 0;
-       int mark = 0;
-       int isize = 0;
+  int type = 0;
+  int size = 0;
+  int mark = 0;
+  int isize = 0;
 innermoveobj:
-       while((char)(*((int*)(orig->ptr))) == (char)(-2)) {
-               orig->ptr = (int*)(orig->ptr) + 1;
-       }
-       if((orig->ptr > orig->bound) || (orig->ptr == orig->blockbound)) {
-               if(!nextSBlock(orig)) {
-                       // finished, no more data
-                       return true;
-               }
-               goto innermoveobj;
-       }
-#ifdef DEBUG
-       BAMBOO_DEBUGPRINT(0xe202);
-       BAMBOO_DEBUGPRINT_REG(orig->ptr);
-       BAMBOO_DEBUGPRINT(((int *)(orig->ptr))[0]);
-#endif
-       // check the obj's type, size and mark flag
-       type = ((int *)(orig->ptr))[0];
-       size = 0;
-       if(type == 0) {
-               // end of this block, go to next one
-               if(!nextSBlock(orig)) {
-                       // finished, no more data
-                       return true;
-               }
-               goto innermoveobj;
-       } else if(type < NUMCLASSES) {
-               // a normal object
-               size = classsize[type];
-       } else {        
-               // an array 
-               struct ArrayObject *ao=(struct ArrayObject *)(orig->ptr);
-               int elementsize=classsize[type];
-               int length=ao->___length___; 
-               size=sizeof(struct ArrayObject)+length*elementsize;
-       }
-       mark = ((int *)(orig->ptr))[6];
-#ifdef DEBUG
-       BAMBOO_DEBUGPRINT(0xe203);
-       BAMBOO_DEBUGPRINT_REG(orig->ptr);
-       BAMBOO_DEBUGPRINT_REG(size);
-#endif
-       ALIGNSIZE(size, &isize); // no matter is the obj marked or not
-                                // should be able to across it
-       if(mark == MARKED) {
-#ifdef DEBUG
-               BAMBOO_DEBUGPRINT(0xe204);
-#endif
-               // marked obj, copy it to current heap top
-               // check to see if remaining space is enough
-               if(to->top + isize > to->bound) {
-                       // fill 0 indicating the end of this block
-                       BAMBOO_MEMSET_WH(to->ptr,  '\0', to->bound - to->top);
-                       // fill the header of this block and then go to next block
-       to->offset += to->bound - to->top;
-                       BAMBOO_MEMSET_WH(to->base, '\0', BAMBOO_CACHE_LINE_SIZE);
-                       (*((int*)(to->base))) = to->offset;
-                       nextBlock(to);
-                       if(stopblock == to->numblocks) {
-                               // already fulfilled the block
-                               return true;
-                       } // if(stopblock == to->numblocks)
-               } // if(to->top + isize > to->bound)
-               // set the mark field to 2, indicating that this obj has been moved 
-               // and need to be flushed
-               ((int *)(orig->ptr))[6] = COMPACTED;
-               if(to->ptr != orig->ptr) {
-                       if((int)(orig->ptr) < (int)(to->ptr)+size) {
-                         memmove(to->ptr, orig->ptr, size);
-                       } else {
-                               //BAMBOO_WRITE_HINT_CACHE(to->ptr, size);
-                               memcpy(to->ptr, orig->ptr, size);
-                       }
-                       // fill the remaining space with -2
-                       BAMBOO_MEMSET_WH(to->ptr+size, -2, isize-size);
-               }
-               // store mapping info
-               BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
-               //mgchashInsert_I(orig->ptr, to->ptr);
-               RuntimeHashadd_I(gcpointertbl, orig->ptr, to->ptr); 
-               //MGCHashadd_I(gcpointertbl, orig->ptr, to->ptr);
-               BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-         //}
-#ifdef DEBUG
-               BAMBOO_DEBUGPRINT(0xcdce);
-               BAMBOO_DEBUGPRINT_REG(orig->ptr);
-               BAMBOO_DEBUGPRINT_REG(to->ptr);
-#endif
-               gccurr_heaptop -= isize;
-               to->ptr += isize;
-               to->offset += isize;
-               to->top += isize;
-               if(to->top == to->bound) {
-                       // fill the header of this block and then go to next block
-                       BAMBOO_MEMSET_WH(to->base, '\0', BAMBOO_CACHE_LINE_SIZE);
-                       (*((int*)(to->base))) = to->offset;
-                       nextBlock(to);
-               }
-       } // if(mark == 1)
-#ifdef DEBUG
-       BAMBOO_DEBUGPRINT(0xe205);
-#endif
-       // move to next obj
-       orig->ptr += size;
-       
-#ifdef DEBUG
-       BAMBOO_DEBUGPRINT_REG(isize);
-       BAMBOO_DEBUGPRINT_REG(size);
-       BAMBOO_DEBUGPRINT_REG(orig->ptr);
-       BAMBOO_DEBUGPRINT_REG(orig->bound);
-#endif
-       if((orig->ptr > orig->bound) || (orig->ptr == orig->blockbound)) {
+  while((char)(*((int*)(orig->ptr))) == (char)(-2)) {
+    orig->ptr = (int*)(orig->ptr) + 1;
+  }
+  if((orig->ptr > orig->bound) || (orig->ptr == orig->blockbound)) {
+    if(!nextSBlock(orig)) {
+      // finished, no more data
+      return true;
+    }
+    goto innermoveobj;
+  }
 #ifdef DEBUG
-       BAMBOO_DEBUGPRINT(0xe206);
-#endif
-               if(!nextSBlock(orig)) {
-                       // finished, no more data
-                       return true;
-               }
-       }
+  BAMBOO_DEBUGPRINT(0xe202);
+  BAMBOO_DEBUGPRINT_REG(orig->ptr);
+  BAMBOO_DEBUGPRINT(((int *)(orig->ptr))[0]);
+#endif
+  // check the obj's type, size and mark flag
+  type = ((int *)(orig->ptr))[0];
+  size = 0;
+  if(type == 0) {
+    // end of this block, go to next one
+    if(!nextSBlock(orig)) {
+      // finished, no more data
+      return true;
+    }
+    goto innermoveobj;
+  } else if(type < NUMCLASSES) {
+    // a normal object
+    size = classsize[type];
+  } else {
+    // an array
+    struct ArrayObject *ao=(struct ArrayObject *)(orig->ptr);
+    int elementsize=classsize[type];
+    int length=ao->___length___;
+    size=sizeof(struct ArrayObject)+length*elementsize;
+  }
+  mark = ((int *)(orig->ptr))[6];
+#ifdef DEBUG
+  BAMBOO_DEBUGPRINT(0xe203);
+  BAMBOO_DEBUGPRINT_REG(orig->ptr);
+  BAMBOO_DEBUGPRINT_REG(size);
+#endif
+  ALIGNSIZE(size, &isize);       // no matter is the obj marked or not
+                                 // should be able to across it
+  if(mark == MARKED) {
+#ifdef DEBUG
+    BAMBOO_DEBUGPRINT(0xe204);
+#endif
+    // marked obj, copy it to current heap top
+    // check to see if remaining space is enough
+    if(to->top + isize > to->bound) {
+      // fill 0 indicating the end of this block
+      BAMBOO_MEMSET_WH(to->ptr,  '\0', to->bound - to->top);
+      // fill the header of this block and then go to next block
+      to->offset += to->bound - to->top;
+      BAMBOO_MEMSET_WH(to->base, '\0', BAMBOO_CACHE_LINE_SIZE);
+      (*((int*)(to->base))) = to->offset;
+      nextBlock(to);
+      if(stopblock == to->numblocks) {
+       // already fulfilled the block
+       return true;
+      }                   // if(stopblock == to->numblocks)
+    }             // if(to->top + isize > to->bound)
+                  // set the mark field to 2, indicating that this obj has been moved
+                  // and need to be flushed
+    ((int *)(orig->ptr))[6] = COMPACTED;
+    if(to->ptr != orig->ptr) {
+      if((int)(orig->ptr) < (int)(to->ptr)+size) {
+       memmove(to->ptr, orig->ptr, size);
+      } else {
+       //BAMBOO_WRITE_HINT_CACHE(to->ptr, size);
+       memcpy(to->ptr, orig->ptr, size);
+      }
+      // fill the remaining space with -2
+      BAMBOO_MEMSET_WH(to->ptr+size, -2, isize-size);
+    }
+    // store mapping info
+    BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
+    //mgchashInsert_I(orig->ptr, to->ptr);
+    RuntimeHashadd_I(gcpointertbl, orig->ptr, to->ptr);
+    //MGCHashadd_I(gcpointertbl, orig->ptr, to->ptr);
+    BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+    //}
+#ifdef DEBUG
+    BAMBOO_DEBUGPRINT(0xcdce);
+    BAMBOO_DEBUGPRINT_REG(orig->ptr);
+    BAMBOO_DEBUGPRINT_REG(to->ptr);
+#endif
+    gccurr_heaptop -= isize;
+    to->ptr += isize;
+    to->offset += isize;
+    to->top += isize;
+    if(to->top == to->bound) {
+      // fill the header of this block and then go to next block
+      BAMBOO_MEMSET_WH(to->base, '\0', BAMBOO_CACHE_LINE_SIZE);
+      (*((int*)(to->base))) = to->offset;
+      nextBlock(to);
+    }
+  }       // if(mark == 1)
+#ifdef DEBUG
+  BAMBOO_DEBUGPRINT(0xe205);
+#endif
+  // move to next obj
+  orig->ptr += size;
+
+#ifdef DEBUG
+  BAMBOO_DEBUGPRINT_REG(isize);
+  BAMBOO_DEBUGPRINT_REG(size);
+  BAMBOO_DEBUGPRINT_REG(orig->ptr);
+  BAMBOO_DEBUGPRINT_REG(orig->bound);
+#endif
+  if((orig->ptr > orig->bound) || (orig->ptr == orig->blockbound)) {
+#ifdef DEBUG
+    BAMBOO_DEBUGPRINT(0xe206);
+#endif
+    if(!nextSBlock(orig)) {
+      // finished, no more data
+      return true;
+    }
+  }
 #ifdef DEBUG
-       BAMBOO_DEBUGPRINT(0xe207);
-       BAMBOO_DEBUGPRINT_REG(orig->ptr);
+  BAMBOO_DEBUGPRINT(0xe207);
+  BAMBOO_DEBUGPRINT_REG(orig->ptr);
 #endif
-       return false;
+  return false;
 } //bool moveobj(struct moveHelper* orig,struct moveHelper* to,int* endaddr)
 
 // should be invoked with interrupt closed
 inline int assignSpareMem_I(int sourcecore,
-                                       int * requiredmem,
-                                                                                                         int * tomove,
-                                                                                                         int * startaddr) {
-       int b = 0;
-       BLOCKINDEX(gcloads[sourcecore], &b);
-       int boundptr = (b<NUMCORES4GC)?((b+1)*BAMBOO_SMEM_SIZE_L)
-               :(BAMBOO_LARGE_SMEM_BOUND+(b-NUMCORES4GC+1)*BAMBOO_SMEM_SIZE);
-       int remain = boundptr - gcloads[sourcecore];
-       int memneed = requiredmem + BAMBOO_CACHE_LINE_SIZE;
-       *startaddr = gcloads[sourcecore];
-       *tomove = gcfilledblocks[sourcecore] + 1;
-       if(memneed < remain) {
-               gcloads[sourcecore] += memneed;
-               return 0;
-       } else {
-               // next available block
-               gcfilledblocks[sourcecore] += 1;
-               int newbase = 0;
-               BASEPTR(sourcecore, gcfilledblocks[sourcecore], &newbase);
-               gcloads[sourcecore] = newbase;
-               return requiredmem-remain;
-       }
+                            int * requiredmem,
+                            int * tomove,
+                            int * startaddr) {
+  int b = 0;
+  BLOCKINDEX(gcloads[sourcecore], &b);
+  int boundptr = (b<NUMCORES4GC) ? ((b+1)*BAMBOO_SMEM_SIZE_L)
+                : (BAMBOO_LARGE_SMEM_BOUND+(b-NUMCORES4GC+1)*BAMBOO_SMEM_SIZE);
+  int remain = boundptr - gcloads[sourcecore];
+  int memneed = requiredmem + BAMBOO_CACHE_LINE_SIZE;
+  *startaddr = gcloads[sourcecore];
+  *tomove = gcfilledblocks[sourcecore] + 1;
+  if(memneed < remain) {
+    gcloads[sourcecore] += memneed;
+    return 0;
+  } else {
+    // next available block
+    gcfilledblocks[sourcecore] += 1;
+    int newbase = 0;
+    BASEPTR(sourcecore, gcfilledblocks[sourcecore], &newbase);
+    gcloads[sourcecore] = newbase;
+    return requiredmem-remain;
+  }
 } // int assignSpareMem_I(int ,int * , int * , int * )
 
 // should be invoked with interrupt closed
 inline bool gcfindSpareMem_I(int * startaddr,
-                                        int * tomove,
-                                                                                                  int * dstcore,
-                                                                                                  int requiredmem,
-                                                                                                  int requiredcore) {
-       for(int k = 0; k < NUMCORES4GC; k++) {
-               if((gccorestatus[k] == 0) && (gcfilledblocks[k] < gcstopblock[k])) {
-                       // check if this stopped core has enough mem
-                       assignSpareMem_I(k, requiredmem, tomove, startaddr);
-                       *dstcore = k;
-                       return true;
-               }
-       }
-       // if can not find spare mem right now, hold the request
-       gcrequiredmems[requiredcore] = requiredmem;
-       gcmovepending++;
-       return false;
+                             int * tomove,
+                             int * dstcore,
+                             int requiredmem,
+                             int requiredcore) {
+  for(int k = 0; k < NUMCORES4GC; k++) {
+    if((gccorestatus[k] == 0) && (gcfilledblocks[k] < gcstopblock[k])) {
+      // check if this stopped core has enough mem
+      assignSpareMem_I(k, requiredmem, tomove, startaddr);
+      *dstcore = k;
+      return true;
+    }
+  }
+  // if can not find spare mem right now, hold the request
+  gcrequiredmems[requiredcore] = requiredmem;
+  gcmovepending++;
+  return false;
 } //bool gcfindSpareMem_I(int* startaddr,int* tomove,int mem,int core)
 
 inline bool compacthelper(struct moveHelper * orig,
-                                     struct moveHelper * to,
-                                                                                                       int * filledblocks,
-                                                                                                       int * heaptopptr,
-                                                                                                       bool * localcompact) {
-       // scan over all objs in this block, compact the marked objs 
-       // loop stop when finishing either scanning all active objs or 
-       // fulfilled the gcstopblock
-#ifdef DEBUG
-       BAMBOO_DEBUGPRINT(0xe101);
-       BAMBOO_DEBUGPRINT_REG(gcblock2fill);
-       BAMBOO_DEBUGPRINT_REG(gcmarkedptrbound);
+                          struct moveHelper * to,
+                          int * filledblocks,
+                          int * heaptopptr,
+                          bool * localcompact) {
+  // scan over all objs in this block, compact the marked objs
+  // loop stop when finishing either scanning all active objs or
+  // fulfilled the gcstopblock
+#ifdef DEBUG
+  BAMBOO_DEBUGPRINT(0xe101);
+  BAMBOO_DEBUGPRINT_REG(gcblock2fill);
+  BAMBOO_DEBUGPRINT_REG(gcmarkedptrbound);
 #endif
 innercompact:
-       while(orig->ptr < gcmarkedptrbound) {
-               bool stop = moveobj(orig, to, gcblock2fill);
-               if(stop) {
-                       break;
-               }
-       } 
-       // if no objs have been compact, do nothing, 
-       // otherwise, fill the header of this block
-       if(to->offset > BAMBOO_CACHE_LINE_SIZE) {
-               BAMBOO_MEMSET_WH(to->base, '\0', BAMBOO_CACHE_LINE_SIZE);
-               (*((int*)(to->base))) = to->offset;
-       } else {
-               to->offset = 0;
-               to->ptr = to->base;
-               to->top -= BAMBOO_CACHE_LINE_SIZE;
-       } // if(to->offset > BAMBOO_CACHE_LINE_SIZE) else ...
-       if(*localcompact) {
-               *heaptopptr = to->ptr;
-               *filledblocks = to->numblocks;
-       }
+  while(orig->ptr < gcmarkedptrbound) {
+    bool stop = moveobj(orig, to, gcblock2fill);
+    if(stop) {
+      break;
+    }
+  }
+  // if no objs have been compact, do nothing,
+  // otherwise, fill the header of this block
+  if(to->offset > BAMBOO_CACHE_LINE_SIZE) {
+    BAMBOO_MEMSET_WH(to->base, '\0', BAMBOO_CACHE_LINE_SIZE);
+    (*((int*)(to->base))) = to->offset;
+  } else {
+    to->offset = 0;
+    to->ptr = to->base;
+    to->top -= BAMBOO_CACHE_LINE_SIZE;
+  }       // if(to->offset > BAMBOO_CACHE_LINE_SIZE) else ...
+  if(*localcompact) {
+    *heaptopptr = to->ptr;
+    *filledblocks = to->numblocks;
+  }
 #ifdef DEBUG
-       BAMBOO_DEBUGPRINT(0xe102);
-       BAMBOO_DEBUGPRINT_REG(orig->ptr);
-       BAMBOO_DEBUGPRINT_REG(gcmarkedptrbound);
-       BAMBOO_DEBUGPRINT_REG(*heaptopptr);
-       BAMBOO_DEBUGPRINT_REG(*filledblocks);
-       BAMBOO_DEBUGPRINT_REG(gccurr_heaptop);
+  BAMBOO_DEBUGPRINT(0xe102);
+  BAMBOO_DEBUGPRINT_REG(orig->ptr);
+  BAMBOO_DEBUGPRINT_REG(gcmarkedptrbound);
+  BAMBOO_DEBUGPRINT_REG(*heaptopptr);
+  BAMBOO_DEBUGPRINT_REG(*filledblocks);
+  BAMBOO_DEBUGPRINT_REG(gccurr_heaptop);
 #endif
 
-       // send msgs to core coordinator indicating that the compact is finishing
-       // send compact finish message to core coordinator
-       if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
-               gcfilledblocks[BAMBOO_NUM_OF_CORE] = *filledblocks;
-               gcloads[BAMBOO_NUM_OF_CORE] = *heaptopptr;
-               if(orig->ptr < gcmarkedptrbound) {
+  // send msgs to core coordinator indicating that the compact is finishing
+  // send compact finish message to core coordinator
+  if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
+    gcfilledblocks[BAMBOO_NUM_OF_CORE] = *filledblocks;
+    gcloads[BAMBOO_NUM_OF_CORE] = *heaptopptr;
+    if(orig->ptr < gcmarkedptrbound) {
 #ifdef DEBUG
-                       BAMBOO_DEBUGPRINT(0xe103);
+      BAMBOO_DEBUGPRINT(0xe103);
 #endif
-                       // ask for more mem
-                       gctomove = false;
-                       BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
-                       if(gcfindSpareMem_I(&gcmovestartaddr, &gcblock2fill, &gcdstcore, 
-                                                             gccurr_heaptop, BAMBOO_NUM_OF_CORE)) {
+      // ask for more mem
+      gctomove = false;
+      BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
+      if(gcfindSpareMem_I(&gcmovestartaddr, &gcblock2fill, &gcdstcore,
+                          gccurr_heaptop, BAMBOO_NUM_OF_CORE)) {
 #ifdef DEBUG
-                               BAMBOO_DEBUGPRINT(0xe104);
+       BAMBOO_DEBUGPRINT(0xe104);
 #endif
-                               gctomove = true;
-                       } else {
-                               BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+       gctomove = true;
+      } else {
+       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 #ifdef DEBUG
-                               BAMBOO_DEBUGPRINT(0xe105);
+       BAMBOO_DEBUGPRINT(0xe105);
 #endif
-                               return false; 
-                       }
-                       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-               } else {
+       return false;
+      }
+      BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+    } else {
 #ifdef DEBUG
-                       BAMBOO_DEBUGPRINT(0xe106);
+      BAMBOO_DEBUGPRINT(0xe106);
 #endif
-                       gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
-                       gctomove = false;
-                       return true;
-               }
-       } else {
-               if(orig->ptr < gcmarkedptrbound) {
-#ifdef DEBUG
-                       BAMBOO_DEBUGPRINT(0xe107);
-#endif
-                       // ask for more mem
-                       gctomove = false;
-                       send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE, 
-                                              *filledblocks, *heaptopptr, gccurr_heaptop);
-               } else {
-#ifdef DEBUG
-                       BAMBOO_DEBUGPRINT(0xe108);
-                       BAMBOO_DEBUGPRINT_REG(*heaptopptr);
-#endif
-                       // finish compacting
-                       send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE,
-                                              *filledblocks, *heaptopptr, 0);
-               }
-       } // if(STARTUPCORE == BAMBOO_NUM_OF_CORE)
-
-       if(orig->ptr < gcmarkedptrbound) {
-#ifdef DEBUG
-               BAMBOO_DEBUGPRINT(0xe109);
-#endif
-               // still have unpacked obj
-               while(true) {
-                       if(gctomove) {
-                               break;
-                       }
-               };
-               gctomove = false;
-#ifdef DEBUG
-               BAMBOO_DEBUGPRINT(0xe10a);
-#endif
-
-               to->ptr = gcmovestartaddr;
-               to->numblocks = gcblock2fill - 1;
-               to->bound = (to->numblocks==0)?
-                       BAMBOO_SMEM_SIZE_L:
-                       BAMBOO_SMEM_SIZE_L+BAMBOO_SMEM_SIZE*to->numblocks;
-               BASEPTR(gcdstcore, to->numblocks, &(to->base));
-               to->offset = to->ptr - to->base;
-               to->top = (to->numblocks==0)?
-                       (to->offset):(to->bound-BAMBOO_SMEM_SIZE+to->offset);
-               to->base = to->ptr;
-               to->offset = BAMBOO_CACHE_LINE_SIZE;
-               to->ptr += to->offset; // for header
-               to->top += to->offset;
-               if(gcdstcore == BAMBOO_NUM_OF_CORE) {
-                       *localcompact = true;
-               } else {
-                       *localcompact = false;
-               }
-               goto innercompact;
-       }
+      gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
+      gctomove = false;
+      return true;
+    }
+  } else {
+    if(orig->ptr < gcmarkedptrbound) {
 #ifdef DEBUG
-       BAMBOO_DEBUGPRINT(0xe10b);
+      BAMBOO_DEBUGPRINT(0xe107);
 #endif
-       return true;
+      // ask for more mem
+      gctomove = false;
+      send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE,
+                 *filledblocks, *heaptopptr, gccurr_heaptop);
+    } else {
+#ifdef DEBUG
+      BAMBOO_DEBUGPRINT(0xe108);
+      BAMBOO_DEBUGPRINT_REG(*heaptopptr);
+#endif
+      // finish compacting
+      send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE,
+                 *filledblocks, *heaptopptr, 0);
+    }
+  }       // if(STARTUPCORE == BAMBOO_NUM_OF_CORE)
+
+  if(orig->ptr < gcmarkedptrbound) {
+#ifdef DEBUG
+    BAMBOO_DEBUGPRINT(0xe109);
+#endif
+    // still have unpacked obj
+    while(true) {
+      if(gctomove) {
+       break;
+      }
+    }
+    ;
+    gctomove = false;
+#ifdef DEBUG
+    BAMBOO_DEBUGPRINT(0xe10a);
+#endif
+
+    to->ptr = gcmovestartaddr;
+    to->numblocks = gcblock2fill - 1;
+    to->bound = (to->numblocks==0) ?
+                BAMBOO_SMEM_SIZE_L :
+                BAMBOO_SMEM_SIZE_L+BAMBOO_SMEM_SIZE*to->numblocks;
+    BASEPTR(gcdstcore, to->numblocks, &(to->base));
+    to->offset = to->ptr - to->base;
+    to->top = (to->numblocks==0) ?
+              (to->offset) : (to->bound-BAMBOO_SMEM_SIZE+to->offset);
+    to->base = to->ptr;
+    to->offset = BAMBOO_CACHE_LINE_SIZE;
+    to->ptr += to->offset;             // for header
+    to->top += to->offset;
+    if(gcdstcore == BAMBOO_NUM_OF_CORE) {
+      *localcompact = true;
+    } else {
+      *localcompact = false;
+    }
+    goto innercompact;
+  }
+#ifdef DEBUG
+  BAMBOO_DEBUGPRINT(0xe10b);
+#endif
+  return true;
 } // void compacthelper()
 
 inline void compact() {
-       if(COMPACTPHASE != gcphase) {
-               BAMBOO_EXIT(0xb102);
-       }
+  if(COMPACTPHASE != gcphase) {
+    BAMBOO_EXIT(0xb102);
+  }
 
-       // initialize pointers for comapcting
-       struct moveHelper * orig = 
-               (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
-       struct moveHelper * to = 
-               (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
+  // initialize pointers for comapcting
+  struct moveHelper * orig =
+    (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
+  struct moveHelper * to =
+    (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
 
-       if(!initOrig_Dst(orig, to)) {
-               // no available data to compact
-               // send compact finish msg to STARTUP core
+  if(!initOrig_Dst(orig, to)) {
+    // no available data to compact
+    // send compact finish msg to STARTUP core
 #ifdef DEBUG
-               BAMBOO_DEBUGPRINT(0xe001);
-               BAMBOO_DEBUGPRINT_REG(to->base);
+    BAMBOO_DEBUGPRINT(0xe001);
+    BAMBOO_DEBUGPRINT_REG(to->base);
 #endif
-               send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE,
-                                      0, to->base, 0);
-               RUNFREE(orig);
-               RUNFREE(to);
-               return;
-       }
+    send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE,
+               0, to->base, 0);
+    RUNFREE(orig);
+    RUNFREE(to);
+    return;
+  }
+
+  int filledblocks = 0;
+  INTPTR heaptopptr = 0;
+  bool localcompact = true;
+  compacthelper(orig, to, &filledblocks, &heaptopptr, &localcompact);
 
-       int filledblocks = 0;
-       INTPTR heaptopptr = 0;
-       bool localcompact = true;
-       compacthelper(orig, to, &filledblocks, &heaptopptr, &localcompact);
-       
-       RUNFREE(orig);
-       RUNFREE(to);
+  RUNFREE(orig);
+  RUNFREE(to);
 } // compact()
 
-// if return NULL, means 
+// if return NULL, means
 //   1. objptr is NULL
 //   2. objptr is not a shared obj
 // in these cases, remain the original value is OK
 inline void * flushObj(void * objptr) {
 #ifdef DEBUG
-       BAMBOO_DEBUGPRINT(0xe401);
+  BAMBOO_DEBUGPRINT(0xe401);
 #endif
-       if(objptr == NULL) {
-               return NULL;
-       }
-       void * dstptr = NULL;
-       if(ISSHAREDOBJ(objptr)) {
+  if(objptr == NULL) {
+    return NULL;
+  }
+  void * dstptr = NULL;
+  if(ISSHAREDOBJ(objptr)) {
 #ifdef DEBUG
-               BAMBOO_DEBUGPRINT(0xe402);
-               BAMBOO_DEBUGPRINT_REG(objptr);
+    BAMBOO_DEBUGPRINT(0xe402);
+    BAMBOO_DEBUGPRINT_REG(objptr);
 #endif
-               // a shared obj ptr, change to new address
-               BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
+    // a shared obj ptr, change to new address
+    BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
 #ifdef GC_PROFILE
-               unsigned long long ttime = BAMBOO_GET_EXE_TIME();
+    unsigned long long ttime = BAMBOO_GET_EXE_TIME();
 #endif
-               //dstptr = mgchashSearch(objptr);
-               RuntimeHashget(gcpointertbl, objptr, &dstptr);
+    //dstptr = mgchashSearch(objptr);
+    RuntimeHashget(gcpointertbl, objptr, &dstptr);
 #ifdef GC_PROFILE
-               flushstalltime += BAMBOO_GET_EXE_TIME()-ttime;
-#endif
-               //MGCHashget(gcpointertbl, objptr, &dstptr);
-               BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-#ifdef DEBUG
-               BAMBOO_DEBUGPRINT_REG(dstptr);
-#endif
-               if(NULL == dstptr) {
-                       // no mapping info
-#ifdef DEBUG
-                       BAMBOO_DEBUGPRINT(0xe403);
-                       BAMBOO_DEBUGPRINT_REG(objptr);
-                       BAMBOO_DEBUGPRINT_REG(hostcore(objptr));
-#endif
-                       if(hostcore(objptr) == BAMBOO_NUM_OF_CORE) {
-                               // error! the obj is right on this core, but cannot find it
-                               BAMBOO_DEBUGPRINT_REG(objptr);
-                               BAMBOO_EXIT(0xb103);
-                               // assume that the obj has not been moved, use the original address
-                               //dstptr = objptr;
-                       } else {
-                               // send msg to host core for the mapping info
-                               gcobj2map = (int)objptr;
-                               gcismapped = false;
-                               gcmappedobj = NULL;
+    flushstalltime += BAMBOO_GET_EXE_TIME()-ttime;
+#endif
+    //MGCHashget(gcpointertbl, objptr, &dstptr);
+    BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+#ifdef DEBUG
+    BAMBOO_DEBUGPRINT_REG(dstptr);
+#endif
+    if(NULL == dstptr) {
+      // no mapping info
+#ifdef DEBUG
+      BAMBOO_DEBUGPRINT(0xe403);
+      BAMBOO_DEBUGPRINT_REG(objptr);
+      BAMBOO_DEBUGPRINT_REG(hostcore(objptr));
+#endif
+      if(hostcore(objptr) == BAMBOO_NUM_OF_CORE) {
+       // error! the obj is right on this core, but cannot find it
+       BAMBOO_DEBUGPRINT_REG(objptr);
+       BAMBOO_EXIT(0xb103);
+       // assume that the obj has not been moved, use the original address
+       //dstptr = objptr;
+      } else {
+       // send msg to host core for the mapping info
+       gcobj2map = (int)objptr;
+       gcismapped = false;
+       gcmappedobj = NULL;
 #ifdef GC_PROFILE
-                               // TODO
-                               num_mapinforequest++;
-                               //unsigned long long ttime = BAMBOO_GET_EXE_TIME();
+       // TODO
+       num_mapinforequest++;
+       //unsigned long long ttime = BAMBOO_GET_EXE_TIME();
 #endif
 #ifdef GC_PROFILE
-                               unsigned long long ttimet = BAMBOO_GET_EXE_TIME();
-#endif
-                               // the first time require the mapping, send msg to the hostcore 
-                               // for the mapping info
-                               send_msg_3(hostcore(objptr), GCMAPREQUEST, (int)objptr, 
-                                                                        BAMBOO_NUM_OF_CORE);
-                               while(true) {
-                                       if(gcismapped) {
-                                               break;
-                                       }
-                               }
+       unsigned long long ttimet = BAMBOO_GET_EXE_TIME();
+#endif
+       // the first time require the mapping, send msg to the hostcore
+       // for the mapping info
+       send_msg_3(hostcore(objptr), GCMAPREQUEST, (int)objptr,
+                  BAMBOO_NUM_OF_CORE);
+       while(true) {
+         if(gcismapped) {
+           break;
+         }
+       }
 #ifdef GC_PROFILE
-                               flushstalltime_i += BAMBOO_GET_EXE_TIME()-ttimet;
+       flushstalltime_i += BAMBOO_GET_EXE_TIME()-ttimet;
 #endif
 #ifdef GC_PROFILE
-                               // TODO
-                               //flushstalltime += BAMBOO_GET_EXE_TIME() - ttime;
+       // TODO
+       //flushstalltime += BAMBOO_GET_EXE_TIME() - ttime;
 #endif
-                               BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
-                               //dstptr = mgchashSearch(objptr);
-                               RuntimeHashget(gcpointertbl, objptr, &dstptr);
-                               //MGCHashget(gcpointertbl, objptr, &dstptr);
-                               BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-                       } // if(hostcore(objptr) == BAMBOO_NUM_OF_CORE) else ...
+       BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
+       //dstptr = mgchashSearch(objptr);
+       RuntimeHashget(gcpointertbl, objptr, &dstptr);
+       //MGCHashget(gcpointertbl, objptr, &dstptr);
+       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+      }                   // if(hostcore(objptr) == BAMBOO_NUM_OF_CORE) else ...
 #ifdef DEBUG
-                       BAMBOO_DEBUGPRINT_REG(dstptr);
+      BAMBOO_DEBUGPRINT_REG(dstptr);
 #endif
-               } // if(NULL == dstptr) 
-       }// if(ISSHAREDOBJ(objptr))
-       // if not a shared obj, return NULL to indicate no need to flush
+    }             // if(NULL == dstptr)
+  }      // if(ISSHAREDOBJ(objptr))
+         // if not a shared obj, return NULL to indicate no need to flush
 #ifdef DEBUG
-       BAMBOO_DEBUGPRINT(0xe404);
+  BAMBOO_DEBUGPRINT(0xe404);
 #endif
-       return dstptr;
+  return dstptr;
 } // void flushObj(void * objptr)
 
 inline void flushRuntimeObj(struct garbagelist * stackptr) {
-       int i,j;
-       // flush current stack 
-       while(stackptr!=NULL) {
-               for(i=0; i<stackptr->size; i++) {
-                       if(stackptr->array[i] != NULL) {
-                               void * dst = flushObj(stackptr->array[i]);
-                               if(dst != NULL) {
-                                       stackptr->array[i] = dst;
-                               }
-                       }
-               }
-               stackptr=stackptr->next;
+  int i,j;
+  // flush current stack
+  while(stackptr!=NULL) {
+    for(i=0; i<stackptr->size; i++) {
+      if(stackptr->array[i] != NULL) {
+       void * dst = flushObj(stackptr->array[i]);
+       if(dst != NULL) {
+         stackptr->array[i] = dst;
        }
+      }
+    }
+    stackptr=stackptr->next;
+  }
 
-       // flush objectsets
-       if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
-               for(i=0; i<NUMCLASSES; i++) {
-                       struct parameterwrapper ** queues = 
-                               objectqueues[BAMBOO_NUM_OF_CORE][i];
-                       int length = numqueues[BAMBOO_NUM_OF_CORE][i];
-                       for(j = 0; j < length; ++j) {
-                               struct parameterwrapper * parameter = queues[j];
-                               struct ObjectHash * set=parameter->objectset;
-                               struct ObjectNode * ptr=set->listhead;
-                               while(ptr!=NULL) {
-                                       void * dst = flushObj((void *)ptr->key);
-                                       if(dst != NULL) {
-                                               ptr->key = dst;
-                                       }
-                                       ptr=ptr->lnext;
-                               }
-                               ObjectHashrehash(set);
-                       }
-               }
+  // flush objectsets
+  if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
+    for(i=0; i<NUMCLASSES; i++) {
+      struct parameterwrapper ** queues =
+        objectqueues[BAMBOO_NUM_OF_CORE][i];
+      int length = numqueues[BAMBOO_NUM_OF_CORE][i];
+      for(j = 0; j < length; ++j) {
+       struct parameterwrapper * parameter = queues[j];
+       struct ObjectHash * set=parameter->objectset;
+       struct ObjectNode * ptr=set->listhead;
+       while(ptr!=NULL) {
+         void * dst = flushObj((void *)ptr->key);
+         if(dst != NULL) {
+           ptr->key = dst;
+         }
+         ptr=ptr->lnext;
        }
+       ObjectHashrehash(set);
+      }
+    }
+  }
 
-       // flush current task descriptor
-       if(currtpd != NULL) {
-               for(i=0; i<currtpd->numParameters; i++) {
-                       void * dst = flushObj(currtpd->parameterArray[i]);
-                       if(dst != NULL) {
-                               currtpd->parameterArray[i] = dst;
-                       }
-               }
-       }
+  // flush current task descriptor
+  if(currtpd != NULL) {
+    for(i=0; i<currtpd->numParameters; i++) {
+      void * dst = flushObj(currtpd->parameterArray[i]);
+      if(dst != NULL) {
+       currtpd->parameterArray[i] = dst;
+      }
+    }
+  }
 
-       // flush active tasks
-       if(activetasks != NULL) {
-               struct genpointerlist * ptr=activetasks->list;
-               while(ptr!=NULL) {
-                       struct taskparamdescriptor *tpd=ptr->src;
-                       int i;
-                       for(i=0; i<tpd->numParameters; i++) {
-                               void * dst = flushObj(tpd->parameterArray[i]);
-                               if(dst != NULL) {
-                                       tpd->parameterArray[i] = dst;
-                               }
-                       }
-                       ptr=ptr->inext;
-               }
-               genrehash(activetasks);
+  // flush active tasks
+  if(activetasks != NULL) {
+    struct genpointerlist * ptr=activetasks->list;
+    while(ptr!=NULL) {
+      struct taskparamdescriptor *tpd=ptr->src;
+      int i;
+      for(i=0; i<tpd->numParameters; i++) {
+       void * dst = flushObj(tpd->parameterArray[i]);
+       if(dst != NULL) {
+         tpd->parameterArray[i] = dst;
        }
+      }
+      ptr=ptr->inext;
+    }
+    genrehash(activetasks);
+  }
 
-       // flush cached transferred obj
-       struct QueueItem * tmpobjptr =  getHead(&objqueue);
-       while(tmpobjptr != NULL) {
-               struct transObjInfo * objInfo = 
-                       (struct transObjInfo *)(tmpobjptr->objectptr); 
-               void * dst = flushObj(objInfo->objptr);
-               if(dst != NULL) {
-                       objInfo->objptr = dst;
-               }
-               tmpobjptr = getNextQueueItem(tmpobjptr);
-       }
+  // flush cached transferred obj
+  struct QueueItem * tmpobjptr =  getHead(&objqueue);
+  while(tmpobjptr != NULL) {
+    struct transObjInfo * objInfo =
+      (struct transObjInfo *)(tmpobjptr->objectptr);
+    void * dst = flushObj(objInfo->objptr);
+    if(dst != NULL) {
+      objInfo->objptr = dst;
+    }
+    tmpobjptr = getNextQueueItem(tmpobjptr);
+  }
 
-       // flush cached objs to be transferred
-       struct QueueItem * item = getHead(totransobjqueue);
-       while(item != NULL) {
-               struct transObjInfo * totransobj = 
-                       (struct transObjInfo *)(item->objectptr);
-               void * dst = flushObj(totransobj->objptr);
-               if(dst != NULL) {
-                       totransobj->objptr = dst;
-               }
-               item = getNextQueueItem(item);
-       } // while(item != NULL)
-
-       // enqueue lock related info
-       for(i = 0; i < runtime_locklen; ++i) {
-         void * dst = flushObj(runtime_locks[i].redirectlock);
-               if(dst != NULL) {
-                       runtime_locks[i].redirectlock = (int)dst;
-               }
-               if(runtime_locks[i].value != NULL) {
-                       void * dst=flushObj(runtime_locks[i].value);
-                       if(dst != NULL) {
-                               runtime_locks[i].value = (int)dst;
-                       }
-         }
-       }
+  // flush cached objs to be transferred
+  struct QueueItem * item = getHead(totransobjqueue);
+  while(item != NULL) {
+    struct transObjInfo * totransobj =
+      (struct transObjInfo *)(item->objectptr);
+    void * dst = flushObj(totransobj->objptr);
+    if(dst != NULL) {
+      totransobj->objptr = dst;
+    }
+    item = getNextQueueItem(item);
+  }       // while(item != NULL)
+
+  // enqueue lock related info
+  for(i = 0; i < runtime_locklen; ++i) {
+    void * dst = flushObj(runtime_locks[i].redirectlock);
+    if(dst != NULL) {
+      runtime_locks[i].redirectlock = (int)dst;
+    }
+    if(runtime_locks[i].value != NULL) {
+      void * dst=flushObj(runtime_locks[i].value);
+      if(dst != NULL) {
+       runtime_locks[i].value = (int)dst;
+      }
+    }
+  }
 
 } // void flushRuntimeObj(struct garbagelist * stackptr)
 
 inline void flush(struct garbagelist * stackptr) {
 #ifdef GC_PROFILE
-       if(BAMBOO_NUM_OF_CORE == 0) {
-       BAMBOO_DEBUGPRINT(0xcccc);
-       BAMBOO_DEBUGPRINT_REG(BAMBOO_GET_EXE_TIME());
-       }
+  if(BAMBOO_NUM_OF_CORE == 0) {
+    BAMBOO_DEBUGPRINT(0xcccc);
+    BAMBOO_DEBUGPRINT_REG(BAMBOO_GET_EXE_TIME());
+  }
 #endif
-       flushRuntimeObj(stackptr);
+  flushRuntimeObj(stackptr);
 #ifdef GC_PROFILE
-       if(BAMBOO_NUM_OF_CORE == 0) BAMBOO_DEBUGPRINT_REG(BAMBOO_GET_EXE_TIME());
+  if(BAMBOO_NUM_OF_CORE == 0) BAMBOO_DEBUGPRINT_REG(BAMBOO_GET_EXE_TIME());
+#endif
+
+  while(true) {
+    BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
+    bool hasItems = gc_moreItems_I();
+    BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+    if(!hasItems) {
+      break;
+    }
+
+#ifdef DEBUG
+    BAMBOO_DEBUGPRINT(0xe301);
+#endif
+    BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
+    void * ptr = gc_dequeue_I();
+    BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+    if(ISSHAREDOBJ(ptr)) {
+      // should be a local shared obj and should have mapping info
+      ptr = flushObj(ptr);
+#ifdef DEBUG
+      BAMBOO_DEBUGPRINT(0xe302);
+      BAMBOO_DEBUGPRINT_REG(ptr);
+      BAMBOO_DEBUGPRINT_REG(tptr);
+      BAMBOO_DEBUGPRINT_REG(((int *)(tptr))[0]);
+#endif
+      if(ptr == NULL) {
+       BAMBOO_EXIT(0xb105);
+      }
+    }             // if(ISSHAREDOBJ(ptr))
+    if((!ISSHAREDOBJ(ptr)) || (((int *)(ptr))[6] == COMPACTED)) {
+      int type = ((int *)(ptr))[0];
+      // scan all pointers in ptr
+      unsigned INTPTR * pointer;
+      pointer=pointerarray[type];
+#ifdef DEBUG
+      BAMBOO_DEBUGPRINT(0xe303);
+      BAMBOO_DEBUGPRINT_REG(pointer);
+#endif
+      if (pointer==0) {
+       /* Array of primitives */
+       /* Do nothing */
+      } else if (((INTPTR)pointer)==1) {
+#ifdef DEBUG
+       BAMBOO_DEBUGPRINT(0xe304);
+#endif
+       /* Array of pointers */
+       struct ArrayObject *ao=(struct ArrayObject *) ptr;
+       int length=ao->___length___;
+       int j;
+       for(j=0; j<length; j++) {
+#ifdef DEBUG
+         BAMBOO_DEBUGPRINT(0xe305);
 #endif
-
-       while(true) {
-               BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
-               bool hasItems = gc_moreItems_I();
-               BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-               if(!hasItems) {
-                       break;
-               }
-               
-#ifdef DEBUG
-               BAMBOO_DEBUGPRINT(0xe301);
-#endif
-               BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
-               void * ptr = gc_dequeue_I();
-               BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-               if(ISSHAREDOBJ(ptr)) {
-                       // should be a local shared obj and should have mapping info
-                       ptr = flushObj(ptr);
-#ifdef DEBUG
-                       BAMBOO_DEBUGPRINT(0xe302);
-                       BAMBOO_DEBUGPRINT_REG(ptr);
-                       BAMBOO_DEBUGPRINT_REG(tptr);
-                       BAMBOO_DEBUGPRINT_REG(((int *)(tptr))[0]);
-#endif
-                       if(ptr == NULL) {
-                               BAMBOO_EXIT(0xb105);
-                       }
-               } // if(ISSHAREDOBJ(ptr))
-               if((!ISSHAREDOBJ(ptr)) || (((int *)(ptr))[6] == COMPACTED)) {
-                       int type = ((int *)(ptr))[0];
-                       // scan all pointers in ptr
-                       unsigned INTPTR * pointer;
-                       pointer=pointerarray[type];
-#ifdef DEBUG
-                       BAMBOO_DEBUGPRINT(0xe303);
-                       BAMBOO_DEBUGPRINT_REG(pointer);
-#endif
-                       if (pointer==0) {
-                               /* Array of primitives */
-                               /* Do nothing */
-                       } else if (((INTPTR)pointer)==1) {
-#ifdef DEBUG
-                               BAMBOO_DEBUGPRINT(0xe304);
-#endif
-                               /* Array of pointers */
-                               struct ArrayObject *ao=(struct ArrayObject *) ptr;
-                               int length=ao->___length___;
-                               int j;
-                               for(j=0; j<length; j++) {
-#ifdef DEBUG
-                                       BAMBOO_DEBUGPRINT(0xe305);
-#endif
-                                       void *objptr=
-                                               ((void **)(((char *)&ao->___length___)+sizeof(int)))[j];
-#ifdef DEBUG
-                                       BAMBOO_DEBUGPRINT_REG(objptr);
-#endif
-                                       if(objptr != NULL) {
-                                               void * dst = flushObj(objptr);
-                                               if(dst != NULL) {
-                                                       ((void **)(((char *)&ao->___length___)+sizeof(int)))[j] = dst;
-                                               }
-                                       }
-                               }
-                       } else {
-#ifdef DEBUG
-                               BAMBOO_DEBUGPRINT(0xe306);
-#endif
-                               INTPTR size=pointer[0];
-                               int i;
-                               for(i=1; i<=size; i++) {
-#ifdef DEBUG
-                                       BAMBOO_DEBUGPRINT(0xe307);
-#endif
-                                       unsigned int offset=pointer[i];
-                                       void * objptr=*((void **)(((char *)ptr)+offset));
-#ifdef DEBUG
-                                       BAMBOO_DEBUGPRINT_REG(objptr);
-#endif
-                                       if(objptr != NULL) {
-                                               void * dst = flushObj(objptr);
-                                               if(dst != NULL) {
-                                                       *((void **)(((char *)ptr)+offset)) = dst;
-                                               }
-                                       }
-                               } // for(i=1; i<=size; i++) 
-                       } // if (pointer==0) else if (((INTPTR)pointer)==1) else ()
-                       // restore the mark field, indicating that this obj has been flushed
-                       if(ISSHAREDOBJ(ptr)) {
-                               ((int *)(ptr))[6] = INIT;
-                       }
-               } // if((!ISSHAREDOBJ(ptr)) || (((int *)(ptr))[6] == COMPACTED))
-       } // while(gc_moreItems())
-#ifdef DEBUG
-       BAMBOO_DEBUGPRINT(0xe308);
+         void *objptr=
+           ((void **)(((char *)&ao->___length___)+sizeof(int)))[j];
+#ifdef DEBUG
+         BAMBOO_DEBUGPRINT_REG(objptr);
+#endif
+         if(objptr != NULL) {
+           void * dst = flushObj(objptr);
+           if(dst != NULL) {
+             ((void **)(((char *)&ao->___length___)+sizeof(int)))[j] = dst;
+           }
+         }
+       }
+      } else {
+#ifdef DEBUG
+       BAMBOO_DEBUGPRINT(0xe306);
+#endif
+       INTPTR size=pointer[0];
+       int i;
+       for(i=1; i<=size; i++) {
+#ifdef DEBUG
+         BAMBOO_DEBUGPRINT(0xe307);
+#endif
+         unsigned int offset=pointer[i];
+         void * objptr=*((void **)(((char *)ptr)+offset));
+#ifdef DEBUG
+         BAMBOO_DEBUGPRINT_REG(objptr);
+#endif
+         if(objptr != NULL) {
+           void * dst = flushObj(objptr);
+           if(dst != NULL) {
+             *((void **)(((char *)ptr)+offset)) = dst;
+           }
+         }
+       }                         // for(i=1; i<=size; i++)
+      }                   // if (pointer==0) else if (((INTPTR)pointer)==1) else ()
+                          // restore the mark field, indicating that this obj has been flushed
+      if(ISSHAREDOBJ(ptr)) {
+       ((int *)(ptr))[6] = INIT;
+      }
+    }             // if((!ISSHAREDOBJ(ptr)) || (((int *)(ptr))[6] == COMPACTED))
+  }       // while(gc_moreItems())
+#ifdef DEBUG
+  BAMBOO_DEBUGPRINT(0xe308);
 #endif
 #ifdef GC_PROFILE
-       if(BAMBOO_NUM_OF_CORE == 0) BAMBOO_DEBUGPRINT_REG(BAMBOO_GET_EXE_TIME());
+  if(BAMBOO_NUM_OF_CORE == 0) BAMBOO_DEBUGPRINT_REG(BAMBOO_GET_EXE_TIME());
 #endif
 
-       // TODO bug here: the startup core contains all lobjs' info, thus all the 
-       // lobjs are flushed in sequence.
-       // flush lobjs
-       while(gc_lobjmoreItems_I()) {
+  // TODO bug here: the startup core contains all lobjs' info, thus all the
+  // lobjs are flushed in sequence.
+  // flush lobjs
+  while(gc_lobjmoreItems_I()) {
 #ifdef DEBUG
-               BAMBOO_DEBUGPRINT(0xe309);
+    BAMBOO_DEBUGPRINT(0xe309);
 #endif
-               void * ptr = gc_lobjdequeue_I(NULL, NULL);
-               ptr = flushObj(ptr);
+    void * ptr = gc_lobjdequeue_I(NULL, NULL);
+    ptr = flushObj(ptr);
 #ifdef DEBUG
-               BAMBOO_DEBUGPRINT(0xe30a);
-               BAMBOO_DEBUGPRINT_REG(ptr);
-               BAMBOO_DEBUGPRINT_REG(tptr);
-               BAMBOO_DEBUGPRINT_REG(((int *)(tptr))[0]);
+    BAMBOO_DEBUGPRINT(0xe30a);
+    BAMBOO_DEBUGPRINT_REG(ptr);
+    BAMBOO_DEBUGPRINT_REG(tptr);
+    BAMBOO_DEBUGPRINT_REG(((int *)(tptr))[0]);
 #endif
-               if(ptr == NULL) {
-                       BAMBOO_EXIT(0x106);
-               }
-               if(((int *)(ptr))[6] == COMPACTED) {
-                       int type = ((int *)(ptr))[0];
-                       // scan all pointers in ptr
-                       unsigned INTPTR * pointer;
-                       pointer=pointerarray[type];
+    if(ptr == NULL) {
+      BAMBOO_EXIT(0x106);
+    }
+    if(((int *)(ptr))[6] == COMPACTED) {
+      int type = ((int *)(ptr))[0];
+      // scan all pointers in ptr
+      unsigned INTPTR * pointer;
+      pointer=pointerarray[type];
 #ifdef DEBUG
-                       BAMBOO_DEBUGPRINT(0xe30b);
-                       BAMBOO_DEBUGPRINT_REG(pointer);
+      BAMBOO_DEBUGPRINT(0xe30b);
+      BAMBOO_DEBUGPRINT_REG(pointer);
 #endif
-                       if (pointer==0) {
-                               /* Array of primitives */
-                               /* Do nothing */
-                       } else if (((INTPTR)pointer)==1) {
+      if (pointer==0) {
+       /* Array of primitives */
+       /* Do nothing */
+      } else if (((INTPTR)pointer)==1) {
 #ifdef DEBUG
-                               BAMBOO_DEBUGPRINT(0xe30c);
+       BAMBOO_DEBUGPRINT(0xe30c);
 #endif
-                               /* Array of pointers */
-                               struct ArrayObject *ao=(struct ArrayObject *) ptr;
-                               int length=ao->___length___;
-                               int j;
-                               for(j=0; j<length; j++) {
+       /* Array of pointers */
+       struct ArrayObject *ao=(struct ArrayObject *) ptr;
+       int length=ao->___length___;
+       int j;
+       for(j=0; j<length; j++) {
 #ifdef DEBUG
-                                       BAMBOO_DEBUGPRINT(0xe30d);
+         BAMBOO_DEBUGPRINT(0xe30d);
 #endif
-                                       void *objptr=
-                                               ((void **)(((char *)&ao->___length___)+sizeof(int)))[j];
+         void *objptr=
+           ((void **)(((char *)&ao->___length___)+sizeof(int)))[j];
 #ifdef DEBUG
-                                       BAMBOO_DEBUGPRINT_REG(objptr);
+         BAMBOO_DEBUGPRINT_REG(objptr);
 #endif
-                                       if(objptr != NULL) {
-                                               void * dst = flushObj(objptr);
-                                               if(dst != NULL) {
-                                                       ((void **)(((char *)&ao->___length___)+sizeof(int)))[j] = dst;
-                                               }       
-                                       }
-                               }
-                       } else {
+         if(objptr != NULL) {
+           void * dst = flushObj(objptr);
+           if(dst != NULL) {
+             ((void **)(((char *)&ao->___length___)+sizeof(int)))[j] = dst;
+           }
+         }
+       }
+      } else {
 #ifdef DEBUG
-                               BAMBOO_DEBUGPRINT(0xe30e);
+       BAMBOO_DEBUGPRINT(0xe30e);
 #endif
-                               INTPTR size=pointer[0];
-                               int i;
-                               for(i=1; i<=size; i++) {
+       INTPTR size=pointer[0];
+       int i;
+       for(i=1; i<=size; i++) {
 #ifdef DEBUG
-                                       BAMBOO_DEBUGPRINT(0xe30f);
+         BAMBOO_DEBUGPRINT(0xe30f);
 #endif
-                                       unsigned int offset=pointer[i];
-                                       void * objptr=*((void **)(((char *)ptr)+offset));
+         unsigned int offset=pointer[i];
+         void * objptr=*((void **)(((char *)ptr)+offset));
 
 #ifdef DEBUG
-                                       BAMBOO_DEBUGPRINT_REG(objptr);
+         BAMBOO_DEBUGPRINT_REG(objptr);
 #endif
-                                       if(objptr != NULL) {
-                                               void * dst = flushObj(objptr);
-                                               if(dst != NULL) {
-                                                       *((void **)(((char *)ptr)+offset)) = dst;
-                                               }
-                                       }
-                               } // for(i=1; i<=size; i++) 
-                       } // if (pointer==0) else if (((INTPTR)pointer)==1) else ()
-                       // restore the mark field, indicating that this obj has been flushed
-                       ((int *)(ptr))[6] = INIT;
-               } // if(((int *)(ptr))[6] == COMPACTED)
-       } // while(gc_lobjmoreItems())
+         if(objptr != NULL) {
+           void * dst = flushObj(objptr);
+           if(dst != NULL) {
+             *((void **)(((char *)ptr)+offset)) = dst;
+           }
+         }
+       }                         // for(i=1; i<=size; i++)
+      }                   // if (pointer==0) else if (((INTPTR)pointer)==1) else ()
+                          // restore the mark field, indicating that this obj has been flushed
+      ((int *)(ptr))[6] = INIT;
+    }             // if(((int *)(ptr))[6] == COMPACTED)
+  }       // while(gc_lobjmoreItems())
 #ifdef DEBUG
-       BAMBOO_DEBUGPRINT(0xe310);
+  BAMBOO_DEBUGPRINT(0xe310);
 #endif
 #ifdef GC_PROFILE
-       if(BAMBOO_NUM_OF_CORE == 0) BAMBOO_DEBUGPRINT_REG(BAMBOO_GET_EXE_TIME());
+  if(BAMBOO_NUM_OF_CORE == 0) BAMBOO_DEBUGPRINT_REG(BAMBOO_GET_EXE_TIME());
 #endif
 
-       // send flush finish message to core coordinator
-       if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
-               gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
-       } else {
-               send_msg_2(STARTUPCORE, GCFINISHFLUSH, BAMBOO_NUM_OF_CORE);
-       }
+  // send flush finish message to core coordinator
+  if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
+    gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
+  } else {
+    send_msg_2(STARTUPCORE, GCFINISHFLUSH, BAMBOO_NUM_OF_CORE);
+  }
 #ifdef GC_PROFILE
-       if(BAMBOO_NUM_OF_CORE == 0) {
-       BAMBOO_DEBUGPRINT(0xffff);
-       //BAMBOO_DEBUGPRINT_REG(num_mapinforequest);
-       BAMBOO_DEBUGPRINT_REG(flushstalltime);
-       //BAMBOO_DEBUGPRINT_REG(num_mapinforequest_i);
-       BAMBOO_DEBUGPRINT_REG(flushstalltime_i);
-       }
-       //BAMBOO_DEBUGPRINT_REG(flushstalltime);
+  if(BAMBOO_NUM_OF_CORE == 0) {
+    BAMBOO_DEBUGPRINT(0xffff);
+    //BAMBOO_DEBUGPRINT_REG(num_mapinforequest);
+    BAMBOO_DEBUGPRINT_REG(flushstalltime);
+    //BAMBOO_DEBUGPRINT_REG(num_mapinforequest_i);
+    BAMBOO_DEBUGPRINT_REG(flushstalltime_i);
+  }
+  //BAMBOO_DEBUGPRINT_REG(flushstalltime);
 #endif
 #ifdef DEBUG
-       BAMBOO_DEBUGPRINT(0xe311);
+  BAMBOO_DEBUGPRINT(0xe311);
 #endif
 } // flush()
 
 inline void gc_collect(struct garbagelist * stackptr) {
-       // core collector routine
-       while(true) {
-               if(INITPHASE == gcphase) {
-                       break;
-               }
-       }
+  // core collector routine
+  while(true) {
+    if(INITPHASE == gcphase) {
+      break;
+    }
+  }
 #ifdef RAWPATH // TODO GC_DEBUG
-       tprintf("Do initGC\n");
-#endif
-       initGC();
-       //send init finish msg to core coordinator
-       send_msg_2(STARTUPCORE, GCFINISHINIT, BAMBOO_NUM_OF_CORE);
-       while(true) {
-               if(MARKPHASE == gcphase) {
-                       break;
-               }
-       }
+  printf("(%X,%X) Do initGC\n", udn_tile_coord_x(), udn_tile_coord_y());
+#endif
+  initGC();
+  //send init finish msg to core coordinator
+  send_msg_2(STARTUPCORE, GCFINISHINIT, BAMBOO_NUM_OF_CORE);
+  while(true) {
+    if(MARKPHASE == gcphase) {
+      break;
+    }
+  }
 #ifdef RAWPATH // TODO GC_DEBUG
-       tprintf("Start mark phase\n");
+  printf("(%x,%x) Start mark phase\n", udn_tile_coord_x(), 
+            udn_tile_coord_y());
 #endif
-       mark(true, stackptr);
+  mark(true, stackptr);
 #ifdef RAWPATH // TODO GC_DEBUG
-       tprintf("Finish mark phase, start compact phase\n");
+  printf("(%x,%x) Finish mark phase, start compact phase\n", 
+            udn_tile_coord_x(), udn_tile_coord_y());
 #endif
-       compact();
+  compact();
 #ifdef RAWPATH // TODO GC_DEBUG
-       tprintf("Finish compact phase\n");
+  printf("(%x,%x) Finish compact phase\n", udn_tile_coord_x(),
+            udn_tile_coord_y());
 #endif
-       while(true) {
-               if(FLUSHPHASE == gcphase) {
-                       break;
-               }
-       }
+  while(true) {
+    if(FLUSHPHASE == gcphase) {
+      break;
+    }
+  }
 #ifdef RAWPATH // TODO GC_DEBUG
-       tprintf("Start flush phase\n");
+  printf("(%x,%x) Start flush phase\n", udn_tile_coord_x(), 
+            udn_tile_coord_y());
 #endif
-       flush(stackptr);
+  flush(stackptr);
 #ifdef RAWPATH // TODO GC_DEBUG
-       tprintf("Finish flush phase\n");
+  printf("(%x,%x) Finish flush phase\n", udn_tile_coord_x(),
+            udn_tile_coord_y());
 #endif
 
-       while(true) {
-               if(FINISHPHASE == gcphase) {
-                       break;
-               }
-       }
+  while(true) {
+    if(FINISHPHASE == gcphase) {
+      break;
+    }
+  }
 #ifdef RAWPATH // TODO GC_DEBUG
-       tprintf("Finish gc!\n");
+  printf("(%x,%x) Finish gc!\n", udn_tile_coord_x(), udn_tile_coord_y());
 #endif
 } // void gc_collect(struct garbagelist * stackptr)
 
 inline void gc_nocollect(struct garbagelist * stackptr) {
-       while(true) {
-               if(INITPHASE == gcphase) {
-                       break;
-               }
-       }
+  while(true) {
+    if(INITPHASE == gcphase) {
+      break;
+    }
+  }
 #ifdef RAWPATH // TODO GC_DEBUG
-       tprintf("Do initGC\n");
-#endif
-       initGC();
-       //send init finish msg to core coordinator
-       send_msg_2(STARTUPCORE, GCFINISHINIT, BAMBOO_NUM_OF_CORE);
-       while(true) {
-               if(MARKPHASE == gcphase) {
-                       break;
-               }
-       }
+  printf("(%x,%x) Do initGC\n", udn_tile_coord_x(), udn_tile_coord_y());
+#endif
+  initGC();
+  //send init finish msg to core coordinator
+  send_msg_2(STARTUPCORE, GCFINISHINIT, BAMBOO_NUM_OF_CORE);
+  while(true) {
+    if(MARKPHASE == gcphase) {
+      break;
+    }
+  }
 #ifdef RAWPATH // TODO GC_DEBUG
-       tprintf("Start mark phase\n");
+  printf("(%x,%x) Start mark phase\n", udn_tile_coord_x(), 
+            udn_tile_coord_y());
 #endif
-       mark(true, stackptr);
+  mark(true, stackptr);
 #ifdef RAWPATH // TODO GC_DEBUG
-       tprintf("Finish mark phase, wait for flush\n");
-#endif
-       // non-gc core collector routine
-       while(true) {
-               if(FLUSHPHASE == gcphase) {
-                       break;
-               }
-       }
+  printf("(%x,%x) Finish mark phase, wait for flush\n", 
+            udn_tile_coord_x(), udn_tile_coord_y());
+#endif
+  // non-gc core collector routine
+  while(true) {
+    if(FLUSHPHASE == gcphase) {
+      break;
+    }
+  }
 #ifdef RAWPATH // TODO GC_DEBUG
-       tprintf("Start flush phase\n");
+  printf("(%x,%x) Start flush phase\n", udn_tile_coord_x(), 
+            udn_tile_coord_y());
 #endif
-       flush(stackptr);
+  flush(stackptr);
 #ifdef RAWPATH // TODO GC_DEBUG
-       tprintf("Finish flush phase\n");
+  printf("(%x,%x) Finish flush phase\n", udn_tile_coord_x(), 
+            udn_tile_coord_y());
 #endif
 
-       while(true) {
-               if(FINISHPHASE == gcphase) {
-                       break;
-               }
-       }
+  while(true) {
+    if(FINISHPHASE == gcphase) {
+      break;
+    }
+  }
 #ifdef RAWPATH // TODO GC_DEBUG
-       tprintf("Finish gc!\n");
+  printf("(%x,%x) Finish gc!\n", udn_tile_coord_x(), udn_tile_coord_y());
 #endif
 } // void gc_collect(struct garbagelist * stackptr)
 
 inline void gc(struct garbagelist * stackptr) {
-       // check if do gc
-       if(!gcflag) {
-               gcprocessing = false;
-               return;
-       }
+  // check if do gc
+  if(!gcflag) {
+    gcprocessing = false;
+    return;
+  }
 
-       // core coordinator routine
-       if(0 == BAMBOO_NUM_OF_CORE) {
+  // core coordinator routine
+  if(0 == BAMBOO_NUM_OF_CORE) {
 #ifdef GC_DEBUG
-       tprintf("Check if can do gc or not\n");
+    printf("(%x,%X) Check if can do gc or not\n", udn_tile_coord_x(),
+                  udn_tile_coord_y());
 #endif
-               if(!preGC()) {
-                       // not ready to do gc
-                       gcflag = true;
-                       return;
-               }
+    if(!preGC()) {
+      // not ready to do gc
+      gcflag = true;
+      return;
+    }
 
 #ifdef GC_PROFILE
-               gc_profileStart();
+    gc_profileStart();
 #endif
 
 #ifdef RAWPATH // TODO GC_DEBUG
-               tprintf("start gc! \n");
-               //dumpSMem();
-#endif
-               gcprocessing = true;
-               gcphase = INITPHASE;
-               int i = 0;
-               waitconfirm = false;
-               numconfirm = 0;
-               initGC();
-
-               // Note: all cores need to init gc including non-gc cores
-               for(i = 1; i < NUMCORESACTIVE/*NUMCORES4GC*/; i++) {
-                       // send GC init messages to all cores
-                       send_msg_1(i, GCSTARTINIT);
-               }
-               bool isfirst = true;
-               bool allStall = false;
+    printf("(%x,%x) start gc! \n", udn_tile_coord_x(), udn_tile_coord_y());
+    //dumpSMem();
+#endif
+    gcprocessing = true;
+    gcphase = INITPHASE;
+    int i = 0;
+    waitconfirm = false;
+    numconfirm = 0;
+    initGC();
+
+    // Note: all cores need to init gc including non-gc cores
+    for(i = 1; i < NUMCORESACTIVE /*NUMCORES4GC*/; i++) {
+      // send GC init messages to all cores
+      send_msg_1(i, GCSTARTINIT);
+    }
+    bool isfirst = true;
+    bool allStall = false;
 
 #ifdef RAWPATH // TODO GC_DEBUG
-               tprintf("Check core status \n");
-#endif
-
-               gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
-               while(true) {
-                       BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
-                       if(gc_checkAllCoreStatus_I()) {
-                               BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-                               break;
-                       }
-                       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-               }
+    printf("(%x,%x) Check core status \n", udn_tile_coord_x(), 
+                  udn_tile_coord_y());
+#endif
+
+    gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
+    while(true) {
+      BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
+      if(gc_checkAllCoreStatus_I()) {
+       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+       break;
+      }
+      BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+    }
 #ifdef GC_PROFILE
-               gc_profileItem();
+    gc_profileItem();
 #endif
 #ifdef RAWPATH // TODO GC_DEBUG
-               tprintf("Start mark phase \n");
-#endif
-               // all cores have finished compacting
-               // restore the gcstatus of all cores
-               // Note: all cores have to do mark including non-gc cores
-               gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
-               for(i = 1; i < NUMCORESACTIVE/*NUMCORES4GC*/; ++i) {
-                       gccorestatus[i] = 1;
-                       // send GC start messages to all cores
-                       send_msg_1(i, GCSTART);
-               }
-
-               gcphase = MARKPHASE;
+    printf("(%x,%x) Start mark phase \n", udn_tile_coord_x(), 
+                  udn_tile_coord_y());
+#endif
+    // all cores have finished compacting
+    // restore the gcstatus of all cores
+    // Note: all cores have to do mark including non-gc cores
+    gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
+    for(i = 1; i < NUMCORESACTIVE /*NUMCORES4GC*/; ++i) {
+      gccorestatus[i] = 1;
+      // send GC start messages to all cores
+      send_msg_1(i, GCSTART);
+    }
+
+    gcphase = MARKPHASE;
     // mark phase
-               while(MARKPHASE == gcphase) {
-                       mark(isfirst, stackptr);
-                       if(isfirst) {
-                               isfirst = false;
-                       }
-
-                       // check gcstatus
-                       checkMarkStatue(); 
-               }  // while(MARKPHASE == gcphase)
-               // send msgs to all cores requiring large objs info
-               // Note: only need to ask gc cores, non-gc cores do not host any objs
-               numconfirm = NUMCORES4GC - 1;
-               for(i = 1; i < NUMCORES4GC; ++i) {
-                       send_msg_1(i, GCLOBJREQUEST);
-               }
-               gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
-               while(true) {
-                       if(numconfirm==0) {
-                               break;
-                       }
-               } // wait for responses
-               // check the heaptop
-               if(gcheaptop < gcmarkedptrbound) {
-                       gcheaptop = gcmarkedptrbound;
-               }
+    while(MARKPHASE == gcphase) {
+      mark(isfirst, stackptr);
+      if(isfirst) {
+       isfirst = false;
+      }
+
+      // check gcstatus
+      checkMarkStatue();
+    }              // while(MARKPHASE == gcphase)
+                   // send msgs to all cores requiring large objs info
+                   // Note: only need to ask gc cores, non-gc cores do not host any objs
+    numconfirm = NUMCORES4GC - 1;
+    for(i = 1; i < NUMCORES4GC; ++i) {
+      send_msg_1(i, GCLOBJREQUEST);
+    }
+    gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
+    while(true) {
+      if(numconfirm==0) {
+       break;
+      }
+    }             // wait for responses
+                  // check the heaptop
+    if(gcheaptop < gcmarkedptrbound) {
+      gcheaptop = gcmarkedptrbound;
+    }
 #ifdef GC_PROFILE
-               gc_profileItem();
-               // TODO
-               if(BAMBOO_NUM_OF_CORE == 0) {
-                       BAMBOO_DEBUGPRINT(0xeeee);
-                       BAMBOO_DEBUGPRINT_REG(num_markrequest);
-                       BAMBOO_DEBUGPRINT_REG(marktime);
-               }
+    gc_profileItem();
+    // TODO
+    if(BAMBOO_NUM_OF_CORE == 0) {
+      BAMBOO_DEBUGPRINT(0xeeee);
+      BAMBOO_DEBUGPRINT_REG(num_markrequest);
+      BAMBOO_DEBUGPRINT_REG(marktime);
+    }
 #endif
 #ifdef RAWPATH // TODO GC_DEBUG
-               tprintf("prepare to cache large objs \n");
-               //dumpSMem();
-#endif
-               // cache all large objs
-               if(!cacheLObjs()) {
-                       // no enough space to cache large objs
-                       BAMBOO_EXIT(0xb107);
-               }
-               // predict number of blocks to fill for each core
-               int tmpheaptop = 0;
-               int numpbc = loadbalance(&tmpheaptop);
-               // TODO
-               numpbc = (BAMBOO_SHARED_MEM_SIZE)/(BAMBOO_SMEM_SIZE);
+    printf("(%x,%x) prepare to cache large objs \n", udn_tile_coord_x(),
+                  udn_tile_coord_y());
+    //dumpSMem();
+#endif
+    // cache all large objs
+    if(!cacheLObjs()) {
+      // no enough space to cache large objs
+      BAMBOO_EXIT(0xb107);
+    }
+    // predict number of blocks to fill for each core
+    int tmpheaptop = 0;
+    int numpbc = loadbalance(&tmpheaptop);
+    // TODO
+    numpbc = (BAMBOO_SHARED_MEM_SIZE)/(BAMBOO_SMEM_SIZE);
 #ifdef RAWPATH // TODO GC_DEBUG
-               tprintf("mark phase finished \n");
-               //dumpSMem();
-#endif
-               //int tmptopptr = 0;
-               //BASEPTR(gctopcore, 0, &tmptopptr);
-               // TODO
-               //tmptopptr = (BAMBOO_BASE_VA) + (BAMBOO_SHARED_MEM_SIZE);
-               tmpheaptop = (BAMBOO_BASE_VA) + (BAMBOO_SHARED_MEM_SIZE);
-#ifdef DEBUG
-               BAMBOO_DEBUGPRINT(0xabab);
-               BAMBOO_DEBUGPRINT_REG(tmptopptr);
-#endif
-               for(i = 0; i < NUMCORES4GC; ++i) {
-                       int tmpcoreptr = 0;
-                       BASEPTR(i, numpbc, &tmpcoreptr);
-                       //send start compact messages to all cores
-                       //TODO bug here, do not know if the direction is positive or negtive?
-                       if (tmpcoreptr < tmpheaptop/*tmptopptr*/) {
-                               gcstopblock[i] = numpbc + 1;
-                               if(i != STARTUPCORE) {
-                                       send_msg_2(i, GCSTARTCOMPACT, numpbc+1); 
-                               } else {
-                                       gcblock2fill = numpbc+1;
-                               } // if(i != STARTUPCORE)
-                       } else {
-                               gcstopblock[i] = numpbc;
-                               if(i != STARTUPCORE) {
-                                       send_msg_2(i, GCSTARTCOMPACT, numpbc);
-                               } else {
-                                       gcblock2fill = numpbc;
-                               } // if(i != STARTUPCORE)
-                       }
-#ifdef DEBUG
-                       BAMBOO_DEBUGPRINT(0xf000+i);
-                       BAMBOO_DEBUGPRINT_REG(tmpcoreptr);
-                       BAMBOO_DEBUGPRINT_REG(gcstopblock[i]);
-#endif
-                       // init some data strutures for compact phase
-                       gcloads[i] = 0;
-                       gcfilledblocks[i] = 0;
-                       gcrequiredmems[i] = 0;
-               }
+    printf("(%x,%x) mark phase finished \n", udn_tile_coord_x(), 
+                  udn_tile_coord_y());
+    //dumpSMem();
+#endif
+    //int tmptopptr = 0;
+    //BASEPTR(gctopcore, 0, &tmptopptr);
+    // TODO
+    //tmptopptr = (BAMBOO_BASE_VA) + (BAMBOO_SHARED_MEM_SIZE);
+    tmpheaptop = (BAMBOO_BASE_VA) + (BAMBOO_SHARED_MEM_SIZE);
+#ifdef DEBUG
+    BAMBOO_DEBUGPRINT(0xabab);
+    BAMBOO_DEBUGPRINT_REG(tmptopptr);
+#endif
+    for(i = 0; i < NUMCORES4GC; ++i) {
+      int tmpcoreptr = 0;
+      BASEPTR(i, numpbc, &tmpcoreptr);
+      //send start compact messages to all cores
+      //TODO bug here, do not know if the direction is positive or negtive?
+      if (tmpcoreptr < tmpheaptop /*tmptopptr*/) {
+       gcstopblock[i] = numpbc + 1;
+       if(i != STARTUPCORE) {
+         send_msg_2(i, GCSTARTCOMPACT, numpbc+1);
+       } else {
+         gcblock2fill = numpbc+1;
+       }                         // if(i != STARTUPCORE)
+      } else {
+       gcstopblock[i] = numpbc;
+       if(i != STARTUPCORE) {
+         send_msg_2(i, GCSTARTCOMPACT, numpbc);
+       } else {
+         gcblock2fill = numpbc;
+       }                         // if(i != STARTUPCORE)
+      }
+#ifdef DEBUG
+      BAMBOO_DEBUGPRINT(0xf000+i);
+      BAMBOO_DEBUGPRINT_REG(tmpcoreptr);
+      BAMBOO_DEBUGPRINT_REG(gcstopblock[i]);
+#endif
+      // init some data strutures for compact phase
+      gcloads[i] = 0;
+      gcfilledblocks[i] = 0;
+      gcrequiredmems[i] = 0;
+    }
 
 #ifdef GC_PROFILE
-               gc_profileItem();
-#endif
-
-               // compact phase
-               bool finalcompact = false;
-               // initialize pointers for comapcting
-               struct moveHelper * orig = 
-                       (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
-               struct moveHelper * to = 
-                       (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
-               initOrig_Dst(orig, to);
-               int filledblocks = 0;
-               INTPTR heaptopptr = 0;
-               bool finishcompact = false;
-               bool iscontinue = true;
-               bool localcompact = true;
-               while((COMPACTPHASE == gcphase) || (SUBTLECOMPACTPHASE == gcphase)) {
-                       if((!finishcompact) && iscontinue) {
-#ifdef DEBUG
-                               BAMBOO_DEBUGPRINT(0xe001);
-                               BAMBOO_DEBUGPRINT_REG(numpbc);
-                               BAMBOO_DEBUGPRINT_REG(gcblock2fill);
-#endif
-                               finishcompact = compacthelper(orig, to, &filledblocks, 
-                                                                         &heaptopptr, &localcompact);
-#ifdef DEBUG
-                               BAMBOO_DEBUGPRINT(0xe002);
-                               BAMBOO_DEBUGPRINT_REG(finishcompact);
-                               BAMBOO_DEBUGPRINT_REG(gctomove);
-                               BAMBOO_DEBUGPRINT_REG(gcrequiredmems[0]);
-                               BAMBOO_DEBUGPRINT_REG(gcfilledblocks[0]);
-                               BAMBOO_DEBUGPRINT_REG(gcstopblock[0]);
-#endif
-                       }
-
-                       BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
-                       if(gc_checkCoreStatus_I()) {
-                               // all cores have finished compacting
-                               // restore the gcstatus of all cores
-                               for(i = 0; i < NUMCORES4GC; ++i) {
-                                       gccorestatus[i] = 1;
-                               }
-                               BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-                               break;
-                       } else {
-                               BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-                               // check if there are spare mem for pending move requires
-                               if(COMPACTPHASE == gcphase) {
-#ifdef DEBUG
-                                       BAMBOO_DEBUGPRINT(0xe003);
-#endif
-                                       resolvePendingMoveRequest();
-#ifdef DEBUG
-                                       BAMBOO_DEBUGPRINT_REG(gctomove);
-#endif
-                               } else {
-#ifdef DEBUG
-                                       BAMBOO_DEBUGPRINT(0xe004);
-#endif
-                                       compact2Heaptop();
-                               }
-                       } // if(gc_checkCoreStatus_I()) else ...
-
-                       if(gctomove) {
-#ifdef DEBUG
-                               BAMBOO_DEBUGPRINT(0xe005);
-                               BAMBOO_DEBUGPRINT_REG(gcmovestartaddr);
-                               BAMBOO_DEBUGPRINT_REG(gcblock2fill);
-                               BAMBOO_DEBUGPRINT_REG(gctomove);
-#endif
-                               to->ptr = gcmovestartaddr;
-                               to->numblocks = gcblock2fill - 1;
-                               to->bound = (to->numblocks==0)?
-                                       BAMBOO_SMEM_SIZE_L:
-                                       BAMBOO_SMEM_SIZE_L+BAMBOO_SMEM_SIZE*to->numblocks;
-                               BASEPTR(gcdstcore, to->numblocks, &(to->base));
-                               to->offset = to->ptr - to->base;
-                               to->top = (to->numblocks==0)?
-                                       (to->offset):(to->bound-BAMBOO_SMEM_SIZE+to->offset);
-                               to->base = to->ptr;
-                               to->offset = BAMBOO_CACHE_LINE_SIZE;
-                               to->ptr += to->offset; // for header
-                               to->top += to->offset;
-                               if(gcdstcore == BAMBOO_NUM_OF_CORE) {
-                                       localcompact = true;
-                               } else {
-                                       localcompact = false;
-                               }
-                               gctomove = false;
-                               iscontinue = true;
-                       } else if(!finishcompact) {
-                               // still pending
-                               iscontinue = false;
-                       } // if(gctomove)
-
-               } // while(COMPACTPHASE == gcphase) 
+    gc_profileItem();
+#endif
+
+    // compact phase
+    bool finalcompact = false;
+    // initialize pointers for comapcting
+    struct moveHelper * orig =
+      (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
+    struct moveHelper * to =
+      (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
+    initOrig_Dst(orig, to);
+    int filledblocks = 0;
+    INTPTR heaptopptr = 0;
+    bool finishcompact = false;
+    bool iscontinue = true;
+    bool localcompact = true;
+    while((COMPACTPHASE == gcphase) || (SUBTLECOMPACTPHASE == gcphase)) {
+      if((!finishcompact) && iscontinue) {
+#ifdef DEBUG
+       BAMBOO_DEBUGPRINT(0xe001);
+       BAMBOO_DEBUGPRINT_REG(numpbc);
+       BAMBOO_DEBUGPRINT_REG(gcblock2fill);
+#endif
+       finishcompact = compacthelper(orig, to, &filledblocks,
+                                     &heaptopptr, &localcompact);
+#ifdef DEBUG
+       BAMBOO_DEBUGPRINT(0xe002);
+       BAMBOO_DEBUGPRINT_REG(finishcompact);
+       BAMBOO_DEBUGPRINT_REG(gctomove);
+       BAMBOO_DEBUGPRINT_REG(gcrequiredmems[0]);
+       BAMBOO_DEBUGPRINT_REG(gcfilledblocks[0]);
+       BAMBOO_DEBUGPRINT_REG(gcstopblock[0]);
+#endif
+      }
+
+      BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
+      if(gc_checkCoreStatus_I()) {
+       // all cores have finished compacting
+       // restore the gcstatus of all cores
+       for(i = 0; i < NUMCORES4GC; ++i) {
+         gccorestatus[i] = 1;
+       }
+       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+       break;
+      } else {
+       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+       // check if there are spare mem for pending move requires
+       if(COMPACTPHASE == gcphase) {
+#ifdef DEBUG
+         BAMBOO_DEBUGPRINT(0xe003);
+#endif
+         resolvePendingMoveRequest();
+#ifdef DEBUG
+         BAMBOO_DEBUGPRINT_REG(gctomove);
+#endif
+       } else {
+#ifdef DEBUG
+         BAMBOO_DEBUGPRINT(0xe004);
+#endif
+         compact2Heaptop();
+       }
+      }                   // if(gc_checkCoreStatus_I()) else ...
+
+      if(gctomove) {
+#ifdef DEBUG
+       BAMBOO_DEBUGPRINT(0xe005);
+       BAMBOO_DEBUGPRINT_REG(gcmovestartaddr);
+       BAMBOO_DEBUGPRINT_REG(gcblock2fill);
+       BAMBOO_DEBUGPRINT_REG(gctomove);
+#endif
+       to->ptr = gcmovestartaddr;
+       to->numblocks = gcblock2fill - 1;
+       to->bound = (to->numblocks==0) ?
+                   BAMBOO_SMEM_SIZE_L :
+                   BAMBOO_SMEM_SIZE_L+BAMBOO_SMEM_SIZE*to->numblocks;
+       BASEPTR(gcdstcore, to->numblocks, &(to->base));
+       to->offset = to->ptr - to->base;
+       to->top = (to->numblocks==0) ?
+                 (to->offset) : (to->bound-BAMBOO_SMEM_SIZE+to->offset);
+       to->base = to->ptr;
+       to->offset = BAMBOO_CACHE_LINE_SIZE;
+       to->ptr += to->offset;                         // for header
+       to->top += to->offset;
+       if(gcdstcore == BAMBOO_NUM_OF_CORE) {
+         localcompact = true;
+       } else {
+         localcompact = false;
+       }
+       gctomove = false;
+       iscontinue = true;
+      } else if(!finishcompact) {
+       // still pending
+       iscontinue = false;
+      }                   // if(gctomove)
+
+    }             // while(COMPACTPHASE == gcphase)
 #ifdef GC_PROFILE
-               gc_profileItem();
+    gc_profileItem();
 #endif
 #ifdef RAWPATH // TODO GC_DEBUG
-               tprintf("prepare to move large objs \n");
-               //dumpSMem();
+    printf("(%x,%x) prepare to move large objs \n", udn_tile_coord_x(),
+              udn_tile_coord_y());
+    //dumpSMem();
 #endif
-               // move largeObjs
-               moveLObjs();
+    // move largeObjs
+    moveLObjs();
 #ifdef RAWPATH // TODO GC_DEBUG
-               tprintf("compact phase finished \n");
-               //dumpSMem();
-#endif
-               RUNFREE(orig);
-               RUNFREE(to);
-               orig = to = NULL;
-
-               gcphase = FLUSHPHASE;
-               gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
-               // Note: all cores should flush their runtime data including non-gc 
-               //       cores
-               for(i = 1; i < NUMCORESACTIVE/*NUMCORES4GC*/; ++i) {
-                       // send start flush messages to all cores
-                       gccorestatus[i] = 1;
-                       send_msg_1(i, GCSTARTFLUSH);
-               }
+    printf("(%x,%x) compact phase finished \n", udn_tile_coord_x(), 
+                  udn_tile_coord_y());
+    //dumpSMem();
+#endif
+    RUNFREE(orig);
+    RUNFREE(to);
+    orig = to = NULL;
+
+    gcphase = FLUSHPHASE;
+    gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
+    // Note: all cores should flush their runtime data including non-gc
+    //       cores
+    for(i = 1; i < NUMCORESACTIVE /*NUMCORES4GC*/; ++i) {
+      // send start flush messages to all cores
+      gccorestatus[i] = 1;
+      send_msg_1(i, GCSTARTFLUSH);
+    }
 #ifdef GC_PROFILE
-               gc_profileItem();
+    gc_profileItem();
 #endif
 #ifdef RAWPATH // TODO GC_DEBUG
-               tprintf("Start flush phase \n");
-#endif
-               // flush phase
-               flush(stackptr);
-               gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
-               while(FLUSHPHASE == gcphase) {
-                       // check the status of all cores
-                       BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
-                       if(gc_checkAllCoreStatus_I()) {
-                               BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-                               break;
-                       }
-                       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-               } // while(FLUSHPHASE == gcphase)
-               gcphase = FINISHPHASE;
-
-               // invalidate all shared mem pointers
-               // put it here as it takes time to inform all the other cores to 
-               // finish gc and it might cause problem when some core resumes 
-               // mutator earlier than the other cores
-               bamboo_cur_msp = NULL;
-               bamboo_smem_size = 0;
-               gcflag = false;
-               gcprocessing = false;
+    printf("(%x,%x) Start flush phase \n", udn_tile_coord_x(), 
+                  udn_tile_coord_y());
+#endif
+    // flush phase
+    flush(stackptr);
+    gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
+    while(FLUSHPHASE == gcphase) {
+      // check the status of all cores
+      BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
+      if(gc_checkAllCoreStatus_I()) {
+       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+       break;
+      }
+      BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+    }             // while(FLUSHPHASE == gcphase)
+    gcphase = FINISHPHASE;
+
+    // invalidate all shared mem pointers
+    // put it here as it takes time to inform all the other cores to
+    // finish gc and it might cause problem when some core resumes
+    // mutator earlier than the other cores
+    bamboo_cur_msp = NULL;
+    bamboo_smem_size = 0;
+    gcflag = false;
+    gcprocessing = false;
 
 #ifdef GC_PROFILE
-               gc_profileEnd();
-#endif
-               gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
-               for(i = 1; i < NUMCORESACTIVE/*NUMCORES4GC*/; ++i) {
-                       // send gc finish messages to all cores
-                       send_msg_1(i, GCFINISH);
-                       gccorestatus[i] = 1;
-               }
+    gc_profileEnd();
+#endif
+    gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
+    for(i = 1; i < NUMCORESACTIVE /*NUMCORES4GC*/; ++i) {
+      // send gc finish messages to all cores
+      send_msg_1(i, GCFINISH);
+      gccorestatus[i] = 1;
+    }
 #ifdef RAWPATH // TODO GC_DEBUG
-               tprintf("gc finished \n");
-               //dumpSMem();
+    printf("(%x,%x) gc finished \n", udn_tile_coord_x(), 
+                  udn_tile_coord_y());
+    //dumpSMem();
 #endif
-       } else if(BAMBOO_NUM_OF_CORE < NUMCORES4GC) {
-               gcprocessing = true;
-               gc_collect(stackptr);
+  } else if(BAMBOO_NUM_OF_CORE < NUMCORES4GC) {
+    gcprocessing = true;
+    gc_collect(stackptr);
 
-               // invalidate all shared mem pointers
-               bamboo_cur_msp = NULL;
-               bamboo_smem_size = 0;
+    // invalidate all shared mem pointers
+    bamboo_cur_msp = NULL;
+    bamboo_smem_size = 0;
 
-               gcflag = false;
-               gcprocessing = false;
-       } else {
-               // not a gc core, should wait for gcfinish msg
-         gcprocessing = true;
-               gc_nocollect(stackptr);
+    gcflag = false;
+    gcprocessing = false;
+  } else {
+    // not a gc core, should wait for gcfinish msg
+    gcprocessing = true;
+    gc_nocollect(stackptr);
 
-               // invalidate all shared mem pointers
-               bamboo_cur_msp = NULL;
-               bamboo_smem_size = 0;
+    // invalidate all shared mem pointers
+    bamboo_cur_msp = NULL;
+    bamboo_smem_size = 0;
 
-               gcflag = false;
-               gcprocessing = false;
-       }
+    gcflag = false;
+    gcprocessing = false;
+  }
 } // void gc(struct garbagelist * stackptr)
 
 #ifdef GC_PROFILE
 inline void gc_profileStart(void) {
   if(!gc_infoOverflow) {
-               GCInfo* gcInfo = RUNMALLOC(sizeof(struct gc_info));
-         gc_infoArray[gc_infoIndex] = gcInfo;
-               gcInfo->index = 1;
-               gcInfo->time[0] = BAMBOO_GET_EXE_TIME();
+    GCInfo* gcInfo = RUNMALLOC(sizeof(struct gc_info));
+    gc_infoArray[gc_infoIndex] = gcInfo;
+    gcInfo->index = 1;
+    gcInfo->time[0] = BAMBOO_GET_EXE_TIME();
   }
 }
 
 inline void gc_profileItem(void) {
   if(!gc_infoOverflow) {
-               GCInfo* gcInfo = gc_infoArray[gc_infoIndex];
-               gcInfo->time[gcInfo->index++] = BAMBOO_GET_EXE_TIME();
+    GCInfo* gcInfo = gc_infoArray[gc_infoIndex];
+    gcInfo->time[gcInfo->index++] = BAMBOO_GET_EXE_TIME();
   }
 }
 
 inline void gc_profileEnd(void) {
   if(!gc_infoOverflow) {
-               GCInfo* gcInfo = gc_infoArray[gc_infoIndex];
-         gcInfo->time[gcInfo->index++] = BAMBOO_GET_EXE_TIME();
-               gc_infoIndex++;
-         if(gc_infoIndex == GCINFOLENGTH) {
-                 gc_infoOverflow = true;
-                 //taskInfoIndex = 0;
-         }
+    GCInfo* gcInfo = gc_infoArray[gc_infoIndex];
+    gcInfo->time[gcInfo->index++] = BAMBOO_GET_EXE_TIME();
+    gc_infoIndex++;
+    if(gc_infoIndex == GCINFOLENGTH) {
+      gc_infoOverflow = true;
+      //taskInfoIndex = 0;
+    }
   }
 }
 
@@ -3084,51 +3108,51 @@ inline void gc_profileEnd(void) {
 void gc_outputProfileData() {
 #ifdef USEIO
   int i,j;
-       unsigned long long totalgc = 0;
+  unsigned long long totalgc = 0;
 
   //printf("Start Time, End Time, Duration\n");
   // output task related info
   for(i = 0; i < gc_infoIndex; i++) {
-               GCInfo * gcInfo = gc_infoArray[i];
-               unsigned long long tmp = 0;
-               for(j = 0; j < gcInfo->index; j++) {
-                       printf("%lld(%lld), ", gcInfo->time[j], (gcInfo->time[j]-tmp));
-                       tmp = gcInfo->time[j];
-               }
-               tmp = (tmp-gcInfo->time[0]);
-               printf(" ++ %lld \n", tmp);
-               totalgc += tmp;
+    GCInfo * gcInfo = gc_infoArray[i];
+    unsigned long long tmp = 0;
+    for(j = 0; j < gcInfo->index; j++) {
+      printf("%lld(%lld), ", gcInfo->time[j], (gcInfo->time[j]-tmp));
+      tmp = gcInfo->time[j];
+    }
+    tmp = (tmp-gcInfo->time[0]);
+    printf(" ++ %lld \n", tmp);
+    totalgc += tmp;
   }
 
   if(gc_infoOverflow) {
     printf("Caution: gc info overflow!\n");
   }
 
-       printf("\n\n total gc time: %lld \n", totalgc);
+  printf("\n\n total gc time: %lld \n", totalgc);
 #else
   int i = 0;
   int j = 0;
-       unsigned long long totalgc = 0;
+  unsigned long long totalgc = 0;
 
   BAMBOO_DEBUGPRINT(0xdddd);
   // output task related info
   for(i= 0; i < gc_infoIndex; i++) {
-               GCInfo * gcInfo = gc_infoArray[i];
-               unsigned long long tmp = 0;
-               BAMBOO_DEBUGPRINT(0xddda);
-               for(j = 0; j < gcInfo->index; j++) {
-                       BAMBOO_DEBUGPRINT(gcInfo->time[j]);
-                       BAMBOO_DEBUGPRINT(gcInfo->time[j]-tmp);
-                       BAMBOO_DEBUGPRINT(0xdddb);
-                       tmp = gcInfo->time[j];
-               }
-               tmp = (tmp-gcInfo->time[0]);
-               BAMBOO_DEBUGPRINT_REG(tmp);
-               BAMBOO_DEBUGPRINT(0xdddc);
-               totalgc += tmp;
-  }
-       BAMBOO_DEBUGPRINT(0xdddd);
-       BAMBOO_DEBUGPRINT_REG(totalgc);
+    GCInfo * gcInfo = gc_infoArray[i];
+    unsigned long long tmp = 0;
+    BAMBOO_DEBUGPRINT(0xddda);
+    for(j = 0; j < gcInfo->index; j++) {
+      BAMBOO_DEBUGPRINT(gcInfo->time[j]);
+      BAMBOO_DEBUGPRINT(gcInfo->time[j]-tmp);
+      BAMBOO_DEBUGPRINT(0xdddb);
+      tmp = gcInfo->time[j];
+    }
+    tmp = (tmp-gcInfo->time[0]);
+    BAMBOO_DEBUGPRINT_REG(tmp);
+    BAMBOO_DEBUGPRINT(0xdddc);
+    totalgc += tmp;
+  }
+  BAMBOO_DEBUGPRINT(0xdddd);
+  BAMBOO_DEBUGPRINT_REG(totalgc);
 
   if(gc_infoOverflow) {
     BAMBOO_DEBUGPRINT(0xefee);
index 6ae933c20c3ca909c2627c5de3c4d70c5178eac4..2e95097f2b6fd2053d2276ce26ee87e93f506434 100644 (file)
@@ -15,9 +15,9 @@
 #else
 #define BAMBOO_SMEM_SIZE_L (2 * BAMBOO_SMEM_SIZE)
 #endif
-#define BAMBOO_LARGE_SMEM_BOUND (BAMBOO_SMEM_SIZE_L*NUMCORES4GC) 
-         // let each gc core to have one big block, this is very important 
-                                // for the computation of NUMBLOCKS(s, n), DO NOT change this!
+#define BAMBOO_LARGE_SMEM_BOUND (BAMBOO_SMEM_SIZE_L*NUMCORES4GC)
+// let each gc core to have one big block, this is very important
+// for the computation of NUMBLOCKS(s, n), DO NOT change this!
 
 #define NUMPTRS 100
 
@@ -27,7 +27,7 @@
 
 typedef struct gc_info {
   unsigned long long time[7];
-       int index;
+  int index;
 } GCInfo;
 
 GCInfo * gc_infoArray[GCINFOLENGTH];
@@ -41,21 +41,21 @@ int num_mapinforequest_i;
 #endif
 
 typedef enum {
-       INIT = 0,     // 0
-       DISCOVERED,   // 1
-       MARKED,       // 2
-       COMPACTED,    // 3
-       FLUSHED,      // 4
-       END           // 5
+  INIT = 0,           // 0
+  DISCOVERED,         // 1
+  MARKED,             // 2
+  COMPACTED,          // 3
+  FLUSHED,            // 4
+  END                 // 5
 } GCOBJFLAG;
 
 typedef enum {
-       INITPHASE = 0x0,   // 0x0
-       MARKPHASE,         // 0x1
-       COMPACTPHASE,      // 0x2
-       SUBTLECOMPACTPHASE,// 0x3
-       FLUSHPHASE,        // 0x4
-       FINISHPHASE        // 0x5
+  INITPHASE = 0x0,         // 0x0
+  MARKPHASE,               // 0x1
+  COMPACTPHASE,            // 0x2
+  SUBTLECOMPACTPHASE,      // 0x3
+  FLUSHPHASE,              // 0x4
+  FINISHPHASE              // 0x5
 } GCPHASETYPE;
 
 volatile bool gcflag;
@@ -68,8 +68,8 @@ struct MGCHash * gcforwardobjtbl; // cache forwarded objs in mark phase
 volatile int gccorestatus[NUMCORESACTIVE]; // records status of each core
                                            // 1: running gc
                                            // 0: stall
-volatile int gcnumsendobjs[NUMCORESACTIVE];//records how many objects sent out
-volatile int gcnumreceiveobjs[NUMCORESACTIVE];//records how many objects 
+volatile int gcnumsendobjs[NUMCORESACTIVE]; //records how many objects sent out
+volatile int gcnumreceiveobjs[NUMCORESACTIVE]; //records how many objects
                                               //received
 volatile bool gcbusystatus;
 int gcself_numsendobjs;
@@ -87,7 +87,7 @@ int gcnumlobjs;
 INTPTR gcmarkedptrbound;
 int gcblock2fill;
 int gcstopblock[NUMCORES4GC]; // indicate when to stop compact phase
-int gcfilledblocks[NUMCORES4GC];//indicate how many blocks have been fulfilled
+int gcfilledblocks[NUMCORES4GC]; //indicate how many blocks have been fulfilled
 // move instruction;
 INTPTR gcmovestartaddr;
 int gcdstcore;
@@ -96,14 +96,14 @@ int gcrequiredmems[NUMCORES4GC]; //record pending mem requests
 volatile int gcmovepending;
 
 struct flushlist {
-       void * key;
-       struct flushnode * val;
-       struct flushlist * next;
+  void * key;
+  struct flushnode * val;
+  struct flushlist * next;
 };
 
 struct flushnode {
-       void ** ptr;
-       struct flushnode * next;
+  void ** ptr;
+  struct flushnode * next;
 };
 // mapping of old address to new address
 volatile struct RuntimeHash * gcpointertbl;
@@ -111,8 +111,8 @@ volatile struct RuntimeHash * gcpointertbl;
 int gcobj2map;
 int gcmappedobj;
 volatile bool gcismapped;
-//volatile struct flushlist * gcflushlist; // list of (key, list of reference 
-                                         // to be flushed)
+//volatile struct flushlist * gcflushlist; // list of (key, list of reference
+// to be flushed)
 //volatile int gcnumflush;
 
 // table recording the starting address of each small block
@@ -126,83 +126,83 @@ int gcnumblock; // number of total blocks in the shared mem
 int gcbaseva; // base va for shared memory without reserved sblocks
 
 #define ISSHAREDOBJ(p) \
-       ((((int)p)>gcbaseva)&&(((int)p)<(gcbaseva+(BAMBOO_SHARED_MEM_SIZE))))
+  ((((int)p)>gcbaseva)&&(((int)p)<(gcbaseva+(BAMBOO_SHARED_MEM_SIZE))))
 
 #define ALIGNSIZE(s, as) \
-       (*((int*)as)) = (((s) & (~(BAMBOO_CACHE_LINE_MASK))) + (BAMBOO_CACHE_LINE_SIZE))
+  (*((int*)as)) = (((s) & (~(BAMBOO_CACHE_LINE_MASK))) + (BAMBOO_CACHE_LINE_SIZE))
 
-// mapping of pointer to block # (start from 0), here the block # is 
+// mapping of pointer to block # (start from 0), here the block # is
 // the global index
 #define BLOCKINDEX(p, b) \
   { \
-               int t = (p) - gcbaseva; \
-               if(t < (BAMBOO_LARGE_SMEM_BOUND)) { \
-                       (*((int*)b)) = t / (BAMBOO_SMEM_SIZE_L); \
-               } else { \
-                       (*((int*)b)) = NUMCORES4GC+((t-(BAMBOO_LARGE_SMEM_BOUND))/(BAMBOO_SMEM_SIZE));\
-               } \
-       }
+    int t = (p) - gcbaseva; \
+    if(t < (BAMBOO_LARGE_SMEM_BOUND)) { \
+      (*((int*)b)) = t / (BAMBOO_SMEM_SIZE_L); \
+    } else { \
+      (*((int*)b)) = NUMCORES4GC+((t-(BAMBOO_LARGE_SMEM_BOUND))/(BAMBOO_SMEM_SIZE)); \
+    } \
+  }
 
 // mapping of pointer to core #
 #define RESIDECORE(p, c) \
-{ \
-       if(1 == (NUMCORES4GC)) { \
-               (*((int*)c)) = 0; \
-       } else {\
-               int b; \
-               BLOCKINDEX((p), &b); \
-               (*((int*)c)) = gc_block2core[(b%(NUMCORES4GC*2))]; \
-       }\
-}
+  { \
+    if(1 == (NUMCORES4GC)) { \
+      (*((int*)c)) = 0; \
+    } else { \
+      int b; \
+      BLOCKINDEX((p), &b); \
+      (*((int*)c)) = gc_block2core[(b%(NUMCORES4GC*2))]; \
+    \
+  }
 
 // NOTE: n starts from 0
-// mapping of heaptop (how many bytes there are in the local heap) to 
+// mapping of heaptop (how many bytes there are in the local heap) to
 // the number of the block
-// the number of the block indicates that the block is the xth block on 
+// the number of the block indicates that the block is the xth block on
 // the local heap
 #define NUMBLOCKS(s, n) \
-       if(s < (BAMBOO_SMEM_SIZE_L)) { \
-               (*((int*)(n))) = 0; \
-       } else { \
-               (*((int*)(n))) = 1 + ((s) - (BAMBOO_SMEM_SIZE_L)) / (BAMBOO_SMEM_SIZE); \
-       }
+  if(s < (BAMBOO_SMEM_SIZE_L)) { \
+    (*((int*)(n))) = 0; \
+  } else { \
+    (*((int*)(n))) = 1 + ((s) - (BAMBOO_SMEM_SIZE_L)) / (BAMBOO_SMEM_SIZE); \
+  }
 
 #define OFFSET(s, o) \
-       if(s < BAMBOO_SMEM_SIZE_L) { \
-               (*((int*)(o))) = (s); \
-       } else { \
-               (*((int*)(o))) = ((s) - (BAMBOO_SMEM_SIZE_L)) % (BAMBOO_SMEM_SIZE); \
-       }
+  if(s < BAMBOO_SMEM_SIZE_L) { \
+    (*((int*)(o))) = (s); \
+  } else { \
+    (*((int*)(o))) = ((s) - (BAMBOO_SMEM_SIZE_L)) % (BAMBOO_SMEM_SIZE); \
+  }
 
 // mapping of (core #, index of the block) to the global block index
-#define BLOCKINDEX2(c, n) (gc_core2block[(2*(c))+((n)%2)]+((NUMCORES4GC*2)*((n)/2))) 
+#define BLOCKINDEX2(c, n) (gc_core2block[(2*(c))+((n)%2)]+((NUMCORES4GC*2)*((n)/2)))
 
 // mapping of (core #, number of the block) to the base pointer of the block
 #define BASEPTR(c, n, p) \
   { \
-               int b = BLOCKINDEX2((c), (n)); \
-               if(b < (NUMCORES4GC)) { \
-                       (*((int*)p)) = gcbaseva + b * (BAMBOO_SMEM_SIZE_L); \
-               } else { \
-                       (*((int*)p)) = gcbaseva+(BAMBOO_LARGE_SMEM_BOUND)+ \
-                                      (b-(NUMCORES4GC))*(BAMBOO_SMEM_SIZE); \
-               } \
-       }
+    int b = BLOCKINDEX2((c), (n)); \
+    if(b < (NUMCORES4GC)) { \
+      (*((int*)p)) = gcbaseva + b * (BAMBOO_SMEM_SIZE_L); \
+    } else { \
+      (*((int*)p)) = gcbaseva+(BAMBOO_LARGE_SMEM_BOUND)+ \
+                     (b-(NUMCORES4GC))*(BAMBOO_SMEM_SIZE); \
+    } \
+  }
 
 // the next core in the top of the heap
 #define NEXTTOPCORE(b) (gc_block2core[((b)+1)%(NUMCORES4GC*2)])
 
 inline void gc(struct garbagelist * stackptr); // core coordinator routine
-inline void gc_collect(struct garbagelist* stackptr);//core collector routine
-inline void gc_nocollect(struct garbagelist* stackptr);//non-gc core collector routine
+inline void gc_collect(struct garbagelist* stackptr); //core collector routine
+inline void gc_nocollect(struct garbagelist* stackptr); //non-gc core collector routine
 inline void transferMarkResults_I();
 inline void gc_enqueue_I(void *ptr);
 inline void gc_lobjenqueue_I(void *ptr, int length, int host);
-inline bool gcfindSpareMem_I(int * startaddr, 
-                                        int * tomove,
-                                                                                                  int * dstcore,
-                                                                                                  int requiredmem,
-                                                                                                  int requiredcore);
+inline bool gcfindSpareMem_I(int * startaddr,
+                             int * tomove,
+                             int * dstcore,
+                             int requiredmem,
+                             int requiredcore);
 
 inline void * gc_lobjdequeue4(int * length, int * host);
 inline int gc_lobjmoreItems4();
index faf4ca38aa7292a77d540abaa9bf89173ecb234a..3519f5a734cf8d6effdd1ef47b26fd3ab46173dd 100644 (file)
@@ -6,28 +6,50 @@
 static int gc_core2block[2] = {0,1};
 
 static int gc_block2core[2] = { 0,  0};
+#elif defined GC_56
+// NUMCORES4GC = 56
+static int gc_core2block[112] = {
+  0,111,  15, 96,  16,95,  31,80,  32,79,  47,64,  48,63,
+  1,110,  14, 97,  17,94,  30,81,  33,78,  46,65,  49,62,
+  2,109,  13, 98,  18,93,  29,82,  34,77,  45,66,  50,61,
+  3,108,  12, 99,  19,92,  28,83,  35,76,  44,67,  51,60,
+  4,107,  11,100,  20,91,  27,84,  36,75,  43,68,  52,59,
+  5,106,  10,101,  21,90,  26,85,  37,74,  42,69,  53,58,
+  6,105,   9,102,  22,89,  25,86,  38,73,  41,70,  54,57,
+  7,104,   8,103,  23,88,  24,87,  39,72,  40,71,  55,56
+};
+
+static int gc_block2core[112] = {
+  0,  7, 14, 21, 28, 35, 42, 49, 50, 43, 36, 29, 22, 15,  8,  1,
+  2,  9, 16, 23, 30, 37, 44, 51, 52, 45, 38, 31, 24, 17, 10,  3,
+  4, 11, 18, 25, 32, 39, 46, 53, 54, 47, 40, 33, 26, 19, 12,  5,
+  6, 13, 20, 27, 34, 41, 48, 55, 55, 48, 41, 34, 27, 20, 13,  6,
+  5, 12, 19, 26, 33, 40, 47, 54, 53, 46, 39, 32, 25, 18, 11,  4,
+  3, 10, 17, 24, 31, 38, 45, 52, 51, 44, 37, 30, 23, 16,  9,  2,
+  1,  8, 15, 22, 29, 36, 43, 50, 49, 42, 35, 28, 21, 14,  7,  0
+};
 #elif defined GC_62
 // NUMCORES4GC = 62
 static int gc_core2block[124] = {
-       0,123,  15,108,  16,107,  31,92,  32,91,  47,76,    
-       1,122,  14,109,  17,106,  30,93,  33,90,  46,77,  48,75,  61,62,
-       2,121,  13,110,  18,105,  29,94,  34,89,  45,78,  49,74,  60,63,
-       3,120,  12,111,  19,104,  28,95,  35,88,  44,79,  50,73,  59,64,
-       4,119,  11,112,  20,103,  27,96,  36,87,  43,80,  51,72,  58,65,
-       5,118,  10,113,  21,102,  26,97,  37,86,  42,81,  52,71,  57,66,
-       6,117,   9,114,  22,101,  25,98,  38,85,  41,82,  53,70,  56,67,
-       7,116,   8,115,  23,100,  24,99,  39,84,  40,83,  54,69,  55,68
+  0,123,  15,108,  16,107,  31,92,  32,91,  47,76,
+  1,122,  14,109,  17,106,  30,93,  33,90,  46,77,  48,75,  61,62,
+  2,121,  13,110,  18,105,  29,94,  34,89,  45,78,  49,74,  60,63,
+  3,120,  12,111,  19,104,  28,95,  35,88,  44,79,  50,73,  59,64,
+  4,119,  11,112,  20,103,  27,96,  36,87,  43,80,  51,72,  58,65,
+  5,118,  10,113,  21,102,  26,97,  37,86,  42,81,  52,71,  57,66,
+  6,117,   9,114,  22,101,  25,98,  38,85,  41,82,  53,70,  56,67,
+  7,116,   8,115,  23,100,  24,99,  39,84,  40,83,  54,69,  55,68
 };
 
-static int gc_block2core[124] = { 
-       0,  6, 14, 22, 30, 38, 46, 54, 55, 47, 39, 31, 23, 15,  7,  1,
-       2,  8, 16, 24, 32, 40, 48, 56, 57, 49, 41, 33, 25, 17,  9,  3,
-       4, 10, 18, 26, 34, 42, 50, 58, 59, 51, 43, 35, 27, 19, 11,  5,
-       12, 20, 28, 36, 44, 52, 60, 61, 53, 45, 37, 29, 21, 13,    
-       13, 21, 29, 37, 45, 53, 61, 60, 52, 44, 36, 28, 20, 12,    
-       5, 11, 19, 27, 35, 43, 51, 59, 58, 50, 42, 34, 26, 18, 10,  4,
-       3,  9, 17, 25, 33, 41, 49, 57, 56, 48, 40, 32, 24, 16,  8,  2,
-       1,  7, 15, 23, 31, 39, 47, 55, 54, 46, 38, 30, 22, 14,  6,  0
+static int gc_block2core[124] = {
+  0,  6, 14, 22, 30, 38, 46, 54, 55, 47, 39, 31, 23, 15,  7,  1,
+  2,  8, 16, 24, 32, 40, 48, 56, 57, 49, 41, 33, 25, 17,  9,  3,
+  4, 10, 18, 26, 34, 42, 50, 58, 59, 51, 43, 35, 27, 19, 11,  5,
+  12, 20, 28, 36, 44, 52, 60, 61, 53, 45, 37, 29, 21, 13,
+  13, 21, 29, 37, 45, 53, 61, 60, 52, 44, 36, 28, 20, 12,
+  5, 11, 19, 27, 35, 43, 51, 59, 58, 50, 42, 34, 26, 18, 10,  4,
+  3,  9, 17, 25, 33, 41, 49, 57, 56, 48, 40, 32, 24, 16,  8,  2,
+  1,  7, 15, 23, 31, 39, 47, 55, 54, 46, 38, 30, 22, 14,  6,  0
 };
 #endif
 
index 7c9cba8efa474fee83c9a0f9de98e977367803c0..ea36e42d90383405c4efe13cbec8d3a99653bf05 100644 (file)
@@ -116,7 +116,7 @@ void injectinstructionfailure() {
 #ifdef D___Double______nativeparsedouble____L___String___
 double CALL01(___Double______nativeparsedouble____L___String___,struct ___String___ * ___str___) {
   int length=VAR(___str___)->___count___;
-  int maxlength=(length>60)?60:length;
+  int maxlength=(length>60) ? 60 : length;
   char str[maxlength+1];
   struct ArrayObject * chararray=VAR(___str___)->___value___;
   int i;
@@ -145,7 +145,7 @@ int CALL12(___String______convertdoubletochar____D__AR_C, double ___val___, doub
 }
 #else
 int CALL12(___String______convertdoubletochar____D__AR_C, double ___val___, double ___val___, struct ArrayObject ___chararray___) {
-       return 0;
+  return 0;
 }
 #endif
 
@@ -212,14 +212,14 @@ void CALL01(___System______printString____L___String___,struct ___String___ * __
 void * allocate_new(void * ptr, int type) {
   struct ___Object___ * v=(struct ___Object___ *)FREEMALLOC((struct garbagelist *) ptr, classsize[type]);
 #ifdef DEBUG
-  printf("(%x,%x): new object: %x \n", udn_tile_coord_x(), 
-                 udn_tile_coord_y(), v);
+  printf("(%x,%x): new object: %x \n", udn_tile_coord_x(),
+         udn_tile_coord_y(), v);
 #endif
   v->type=type;
   v->version = 0;
   v->lock = NULL;
-       v->lockcount = 0;
-       initlock(v);
+  v->lockcount = 0;
+  initlock(v);
   return v;
 }
 
@@ -228,8 +228,8 @@ void * allocate_new(void * ptr, int type) {
 struct ArrayObject * allocate_newarray(void * ptr, int type, int length) {
   struct ArrayObject * v=(struct ArrayObject *)FREEMALLOC((struct garbagelist *) ptr, sizeof(struct ArrayObject)+length*classsize[type]);
 #ifdef DEBUG
-  printf("(%x,%x): new array object: %x \n", udn_tile_coord_x(), 
-                 udn_tile_coord_y(), v);
+  printf("(%x,%x): new array object: %x \n", udn_tile_coord_x(),
+         udn_tile_coord_y(), v);
 #endif
   v->type=type;
   v->version = 0;
@@ -238,7 +238,7 @@ struct ArrayObject * allocate_newarray(void * ptr, int type, int length) {
     return NULL;
   }
   v->___length___=length;
-       initlock(v);
+  initlock(v);
   return v;
 }
 
@@ -249,7 +249,7 @@ void * allocate_new(int type) {
   v->version = 0;
   //v->numlocks = 0;
   v->lock = NULL;
-       initlock(v);
+  initlock(v);
   return v;
 }
 
@@ -262,7 +262,7 @@ struct ArrayObject * allocate_newarray(int type, int length) {
   //v->numlocks = 0;
   v->lock = NULL;
   v->___length___=length;
-       initlock(v);
+  initlock(v);
   return v;
 }
 #endif
index d8be4b5c37b768416582ec561d899517f573170d..838397d18118b5778a85bcfbc6bf59087a38ce64 100644 (file)
@@ -34,39 +34,39 @@ volatile bool isMsgHanging;
 volatile bool isMsgSending;
 
 #define MSG_INDEXINC_I() \
-       msgdataindex = (msgdataindex + 1) % (BAMBOO_MSG_BUF_LENGTH)
+  msgdataindex = (msgdataindex + 1) % (BAMBOO_MSG_BUF_LENGTH)
 
 #define MSG_LASTINDEXINC_I() \
-       msgdatalast = (msgdatalast + 1) % (BAMBOO_MSG_BUF_LENGTH)
+  msgdatalast = (msgdatalast + 1) % (BAMBOO_MSG_BUF_LENGTH)
 
 #define MSG_CACHE_I(n) \
-       msgdata[msgdatalast] = (n); \
-  MSG_LASTINDEXINC_I() 
+  msgdata[msgdatalast] = (n); \
+  MSG_LASTINDEXINC_I()
 
-// NOTE: if msgdataindex == msgdatalast, it always means that the buffer if 
+// NOTE: if msgdataindex == msgdatalast, it always means that the buffer if
 //       full. In the case that the buffer is empty, should never call this
 //       MACRO
 #define MSG_REMAINSIZE_I(s) \
-       if(msgdataindex < msgdatalast) { \
-               (*(int*)s) = msgdatalast - msgdataindex; \
-       } else if((msgdataindex == msgdatalast) && (!msgdatafull)) {\
-               (*(int*)s) = 0; \
-       }       else { \
-               (*(int*)s) = (BAMBOO_MSG_BUF_LENGTH) - msgdataindex + msgdatalast; \
-       } 
+  if(msgdataindex < msgdatalast) { \
+    (*(int*)s) = msgdatalast - msgdataindex; \
+  } else if((msgdataindex == msgdatalast) && (!msgdatafull)) { \
+    (*(int*)s) = 0; \
+  }       else { \
+    (*(int*)s) = (BAMBOO_MSG_BUF_LENGTH) -msgdataindex + msgdatalast; \
+  }
 
 #define OUTMSG_INDEXINC() \
-       outmsgindex = (outmsgindex + 1) % (BAMBOO_OUT_BUF_LENGTH)
+  outmsgindex = (outmsgindex + 1) % (BAMBOO_OUT_BUF_LENGTH)
 
 #define OUTMSG_LASTINDEXINC() \
-       outmsglast = (outmsglast + 1) % (BAMBOO_OUT_BUF_LENGTH); \
-       if(outmsglast == outmsgindex) { \
-               BAMBOO_EXIT(0xdd01); \
-       } 
+  outmsglast = (outmsglast + 1) % (BAMBOO_OUT_BUF_LENGTH); \
+  if(outmsglast == outmsgindex) { \
+    BAMBOO_EXIT(0xdd01); \
+  }
 
 #define OUTMSG_CACHE(n) \
-       outmsgdata[outmsglast] = (n); \
-  OUTMSG_LASTINDEXINC(); 
+  outmsgdata[outmsglast] = (n); \
+  OUTMSG_LASTINDEXINC();
 
 #define MAX_PACKET_WORDS 5
 
@@ -111,120 +111,120 @@ volatile bool isMsgSending;
  *      23 -- large objs mapping info
  *
  * ObjMsg: 1 + size of msg + obj's address + (task index + param index)+
- * StallMsg: 2 + corenum + sendobjs + receiveobjs 
+ * StallMsg: 2 + corenum + sendobjs + receiveobjs
  *             (size is always 4 * sizeof(int))
- * LockMsg: 3 + lock type + obj pointer + lock + request core 
+ * LockMsg: 3 + lock type + obj pointer + lock + request core
  *            (size is always 5 * sizeof(int))
- *          4/5/6 + lock type + obj pointer + lock 
+ *          4/5/6 + lock type + obj pointer + lock
  *            (size is always 4 * sizeof(int))
- *          9 + lock type + obj pointer +  redirect lock + root request core 
- *            + request core 
+ *          9 + lock type + obj pointer +  redirect lock + root request core
+ *            + request core
  *            (size is always 6 * sizeof(int))
- *          a/b + lock type + obj pointer + redirect lock 
+ *          a/b + lock type + obj pointer + redirect lock
  *              (size is always 4 * sizeof(int))
- *          c + lock type + lock + redirect lock 
+ *          c + lock type + lock + redirect lock
  *            (size is always 4 * sizeof(int))
  *          lock type: 0 -- read; 1 -- write
- * ProfileMsg: 7 + totalexetime 
+ * ProfileMsg: 7 + totalexetime
  *               (size is always 2 * sizeof(int))
- *             8 + corenum 
+ *             8 + corenum
  *               (size is always 2 * sizeof(int))
  * StatusMsg: d (size is always 1 * sizeof(int))
- *            e + status + corenum + sendobjs + receiveobjs 
+ *            e + status + corenum + sendobjs + receiveobjs
  *              (size is always 5 * sizeof(int))
  *            status: 0 -- stall; 1 -- busy
  * TerminateMsg: f (size is always 1 * sizeof(int)
- * MemoryMsg: 10 + size + corenum 
+ * MemoryMsg: 10 + size + corenum
  *              (size is always 3 * sizeof(int))
- *           11 + base_va + size 
+ *           11 + base_va + size
  *              (size is always 3 * sizeof(int))
  * GCMsg: 12/13 (size is always 1 * sizeof(int))
- *        14 + size of msg + (num of objs to move + (start address 
- *           + end address + dst core + start dst)+)? 
- *           + (num of incoming objs + (start dst + orig core)+)? 
- *           + (num of large obj lists + (start address + lenght 
+ *        14 + size of msg + (num of objs to move + (start address
+ *           + end address + dst core + start dst)+)?
+ *           + (num of incoming objs + (start dst + orig core)+)?
+ *           + (num of large obj lists + (start address + lenght
  *           + start dst)+)?
  *        15 (size is always 1 * sizeof(int))
- *        16 + corenum 
+ *        16 + corenum
  *           (size is always 2 * sizeof(int))
- *        17 + corenum + gcsendobjs + gcreceiveobjs    
+ *        17 + corenum + gcsendobjs + gcreceiveobjs
  *           (size if always 4 * sizeof(int))
  *        18 + corenum + fulfilled blocks num + (finish compact(1) + current
- *           heap top)/(need mem(0) + mem need) 
+ *           heap top)/(need mem(0) + mem need)
  *           size is always 5 * sizeof(int))
- *        19 + corenum 
+ *        19 + corenum
  *              (size is always 2 * sizeof(int))
  *        1a (size is always 1 * sizeof(int))
  *        1b (size if always 1 * sizeof(int))
- *        1c + size of msg + corenum + gcsendobjs + gcreceiveobjs 
+ *        1c + size of msg + corenum + gcsendobjs + gcreceiveobjs
  *           (size is always 5 * sizeof(int))
- *        1d + obj's address 
+ *        1d + obj's address
  *           (size is always 2 * sizeof(int))
  *        1e + corenum + start addr + end addr
  *           (size if always 4 * sizeof(int))
- *        1f + obj's address + corenum 
+ *        1f + obj's address + corenum
  *           (size is always 3 * sizeof(int))
- *        20 + obj's address + dst address 
+ *        20 + obj's address + dst address
  *           (size if always 3 * sizeof(int))
  *        21 (size is always 1 * sizeof(int))
- *        22 + size of msg + corenum + current heap size 
+ *        22 + size of msg + corenum + current heap size
  *           + (num of large obj lists + (start address + length)+)?
- *        23 + orig large obj ptr + new large obj ptr 
+ *        23 + orig large obj ptr + new large obj ptr
  *            (size is always 3 * sizeof(int))
  */
 typedef enum {
-       MSGSTART = 0xD0, // 0xD0
-       TRANSOBJ,        // 0xD1
-       TRANSTALL,       // 0xD2
-       LOCKREQUEST,     // 0xD3
-       LOCKGROUNT,      // 0xD4
-       LOCKDENY,        // 0xD5
-       LOCKRELEASE,     // 0xD6
-       PROFILEOUTPUT,   // 0xD7
-       PROFILEFINISH,   // 0xD8
-       REDIRECTLOCK,    // 0xD9
-       REDIRECTGROUNT,  // 0xDa
-       REDIRECTDENY,    // 0xDb
-       REDIRECTRELEASE, // 0xDc
-       STATUSCONFIRM,   // 0xDd
-       STATUSREPORT,    // 0xDe
-       TERMINATE,       // 0xDf
-       MEMREQUEST,      // 0xE0
-       MEMRESPONSE,     // 0xE1
+  MSGSTART = 0xD0,       // 0xD0
+  TRANSOBJ,              // 0xD1
+  TRANSTALL,             // 0xD2
+  LOCKREQUEST,           // 0xD3
+  LOCKGROUNT,            // 0xD4
+  LOCKDENY,              // 0xD5
+  LOCKRELEASE,           // 0xD6
+  PROFILEOUTPUT,         // 0xD7
+  PROFILEFINISH,         // 0xD8
+  REDIRECTLOCK,          // 0xD9
+  REDIRECTGROUNT,        // 0xDa
+  REDIRECTDENY,          // 0xDb
+  REDIRECTRELEASE,       // 0xDc
+  STATUSCONFIRM,         // 0xDd
+  STATUSREPORT,          // 0xDe
+  TERMINATE,             // 0xDf
+  MEMREQUEST,            // 0xE0
+  MEMRESPONSE,           // 0xE1
 #ifdef MULTICORE_GC
-       GCSTARTINIT,     // 0xE2
-       GCSTART,         // 0xE3
-       GCSTARTCOMPACT,  // 0xE4
-       GCSTARTFLUSH,    // 0xE5
-       GCFINISHINIT,    // 0xE6
-       GCFINISHMARK,    // 0xE7
-       GCFINISHCOMPACT, // 0xE8
-       GCFINISHFLUSH,   // 0xE9
-       GCFINISH,        // 0xEa
-       GCMARKCONFIRM,   // 0xEb
-       GCMARKREPORT,    // 0xEc
-       GCMARKEDOBJ,     // 0xEd
-       GCMOVESTART,     // 0xEe
-       GCMAPREQUEST,    // 0xEf
-       GCMAPINFO,       // 0xF0
-       GCLOBJREQUEST,   // 0xF1
-       GCLOBJINFO,      // 0xF2
-       GCLOBJMAPPING,   // 0xF3
+  GCSTARTINIT,           // 0xE2
+  GCSTART,               // 0xE3
+  GCSTARTCOMPACT,        // 0xE4
+  GCSTARTFLUSH,          // 0xE5
+  GCFINISHINIT,          // 0xE6
+  GCFINISHMARK,          // 0xE7
+  GCFINISHCOMPACT,       // 0xE8
+  GCFINISHFLUSH,         // 0xE9
+  GCFINISH,              // 0xEa
+  GCMARKCONFIRM,         // 0xEb
+  GCMARKREPORT,          // 0xEc
+  GCMARKEDOBJ,           // 0xEd
+  GCMOVESTART,           // 0xEe
+  GCMAPREQUEST,          // 0xEf
+  GCMAPINFO,             // 0xF0
+  GCLOBJREQUEST,         // 0xF1
+  GCLOBJINFO,            // 0xF2
+  GCLOBJMAPPING,         // 0xF3
 #endif
-       MSGEND
+  MSGEND
 } MSGTYPE;
 
 /////////////////////////////////////////////////////////////////////////////////
-// NOTE: BAMBOO_TOTALCORE -- number of the available cores in the processor. 
-//                           No greater than the number of all the cores in 
+// NOTE: BAMBOO_TOTALCORE -- number of the available cores in the processor.
+//                           No greater than the number of all the cores in
 //                           the processor
-//       NUMCORES -- number of cores chosen to deploy the application. It can 
-//                   be greater than that required to fully parallelize the 
+//       NUMCORES -- number of cores chosen to deploy the application. It can
+//                   be greater than that required to fully parallelize the
 //                   application. The same as NUMCORES.
-//       NUMCORESACTIVE -- number of cores that really execute the 
+//       NUMCORESACTIVE -- number of cores that really execute the
 //                         application. No greater than NUMCORES
-//       NUMCORES4GC -- number of cores for gc. No greater than NUMCORES. 
-//                      NOTE: currently only support ontinuous cores as gc 
+//       NUMCORES4GC -- number of cores for gc. No greater than NUMCORES.
+//                      NOTE: currently only support ontinuous cores as gc
 //                            cores, i.e. 0~NUMCORES4GC-1
 ////////////////////////////////////////////////////////////////////////////////
 // data structures of status for termination
@@ -232,9 +232,9 @@ typedef enum {
 volatile int corestatus[NUMCORESACTIVE]; // records status of each core
                                          // 1: running tasks
                                          // 0: stall
-volatile int numsendobjs[NUMCORESACTIVE]; // records how many objects a core 
+volatile int numsendobjs[NUMCORESACTIVE]; // records how many objects a core
                                           // has sent out
-volatile int numreceiveobjs[NUMCORESACTIVE]; // records how many objects a 
+volatile int numreceiveobjs[NUMCORESACTIVE]; // records how many objects a
                                              // core has received
 volatile int numconfirm;
 volatile bool waitconfirm;
@@ -251,8 +251,8 @@ struct RuntimeHash * lockRedirectTbl;
 struct RuntimeHash * objRedirectLockTbl;
 #endif
 struct LockValue {
-       int redirectlock;
-       int value;
+  int redirectlock;
+  int value;
 };
 int lockobj;
 int lock2require;
@@ -277,42 +277,42 @@ struct Queue * totransobjqueue; // queue to hold objs to be transferred
 #define BAMBOO_NUM_PAGES (NUMCORES4GC*(2+1)+3)
 #define BAMBOO_PAGE_SIZE (64 * 64)
 #define BAMBOO_SMEM_SIZE (64 * 64) // (BAMBOO_PAGE_SIZE)
-#define BAMBOO_SHARED_MEM_SIZE ((BAMBOO_PAGE_SIZE) * (BAMBOO_NUM_PAGES))
+#define BAMBOO_SHARED_MEM_SIZE ((BAMBOO_PAGE_SIZE) *(BAMBOO_NUM_PAGES))
 #else
 #define BAMBOO_NUM_PAGES (15 * 1024) //(64 * 4 * 0.75) //(1024 * 1024 * 3.5)  3G
-#define BAMBOO_PAGE_SIZE (16 * 1024)// * 1024)  // (4096)
+#define BAMBOO_PAGE_SIZE (16 * 1024) // * 1024)  // (4096)
 #define BAMBOO_SMEM_SIZE (16 * 1024)
 #define BAMBOO_SHARED_MEM_SIZE (1024 * 1024 * 240) //(1024 * 1024 * 1024)
 //(3.0 * 1024 * 1024 * 1024) // 3G// ((BAMBOO_PAGE_SIZE) * (BAMBOO_NUM_PAGES))
-#endif
+#endif // GC_DEBUG
 
 #ifdef MULTICORE_GC
 #include "multicoregarbage.h"
 
 typedef enum {
-       SMEMLOCAL = 0x0, // 0x0, using local mem only
-       SMEMFIXED,       // 0x1, use local mem in lower address space(1 block only)
-                        //      and global mem in higher address space
-       SMEMMIXED,       // 0x2, like FIXED mode but use a threshold to control
-       SMEMGLOBAL,      // 0x3, using global mem only
-       SMEMEND
+  SMEMLOCAL = 0x0,       // 0x0, using local mem only
+  SMEMFIXED,             // 0x1, use local mem in lower address space(1 block only)
+                         //      and global mem in higher address space
+  SMEMMIXED,             // 0x2, like FIXED mode but use a threshold to control
+  SMEMGLOBAL,            // 0x3, using global mem only
+  SMEMEND
 } SMEMSTRATEGY;
 
-SMEMSTRATEGY bamboo_smem_mode; //-DSMEML: LOCAL; -DSMEMF: FIXED; 
-                              //-DSMEMM: MIXED; -DSMEMG: GLOBAL;
+SMEMSTRATEGY bamboo_smem_mode; //-DSMEML: LOCAL; -DSMEMF: FIXED;
+                               //-DSMEMM: MIXED; -DSMEMG: GLOBAL;
 
 struct freeMemItem {
-       INTPTR ptr;
-       int size;
-       int startblock;  
-       int endblock;
-       struct freeMemItem * next;
+  INTPTR ptr;
+  int size;
+  int startblock;
+  int endblock;
+  struct freeMemItem * next;
 };
 
 struct freeMemList {
-       struct freeMemItem * head;
-       struct freeMemItem * backuplist;  // hold removed freeMemItem for reuse; 
-                                         // only maintain 1 fremmMemItem
+  struct freeMemItem * head;
+  struct freeMemItem * backuplist; // hold removed freeMemItem for reuse;
+                                   // only maintain 1 fremmMemItem
 };
 
 // table recording the number of allocated bytes on each block
@@ -324,9 +324,11 @@ volatile int bamboo_free_block;
 //struct freeMemList * bamboo_free_mem_list;
 int bamboo_reserved_smem; // reserved blocks on the top of the shared heap
                           // e.g. 20% of the heap and should not be allocated
-                                                                                                       // otherwise gc is invoked
+// otherwise gc is invoked
 #else
-volatile mspace bamboo_free_msp;
+//volatile mspace bamboo_free_msp;
+INTPTR bamboo_free_smemp;
+int bamboo_free_smem_size;
 #endif
 volatile bool smemflag;
 volatile INTPTR bamboo_cur_msp;
@@ -339,31 +341,34 @@ int total_num_t6;
 #ifdef PROFILE
 
 #define TASKINFOLENGTH 30000
-//#define INTERRUPTINFOLENGTH 500
+#define INTERRUPTINFOLENGTH 500
 
 bool stall;
 //bool isInterrupt;
 int totalexetime;
+//unsigned long long interrupttime;
 
 typedef struct task_info {
   char* taskName;
   unsigned long long startTime;
   unsigned long long endTime;
   unsigned long long exitIndex;
-  struct Queue * newObjs; 
+  struct Queue * newObjs;
 } TaskInfo;
 
-/*typedef struct interrupt_info {
-   int startTime;
-   int endTime;
-   } InterruptInfo;*/
+// TODO
+typedef struct interrupt_info {
+  unsigned long long startTime;
+  unsigned long long endTime;
+} InterruptInfo;
 
 TaskInfo * taskInfoArray[TASKINFOLENGTH];
 int taskInfoIndex;
 bool taskInfoOverflow;
-/*InterruptInfo * interruptInfoArray[INTERRUPTINFOLENGTH];
-   int interruptInfoIndex;
-   bool interruptInfoOverflow;*/
+// TODO
+InterruptInfo * interruptInfoArray[INTERRUPTINFOLENGTH];
+int interruptInfoIndex;
+bool interruptInfoOverflow;
 volatile int profilestatus[NUMCORESACTIVE]; // records status of each core
                                             // 1: running tasks
                                             // 0: stall
@@ -396,82 +401,82 @@ void releasewritelock_I(void * ptr);
 #ifndef MULTICORE_GC
 void releasewritelock_r(void * lock, void * redirectlock);
 #endif
-/* this function is to process lock requests. 
+/* this function is to process lock requests.
  * can only be invoked in receiveObject() */
 // if return -1: the lock request is redirected
 //            0: the lock request is approved
 //            1: the lock request is denied
-INLINE int processlockrequest(int locktype, 
-                                         int lock, 
-                                                                                                                       int obj, 
-                                                                                                                       int requestcore, 
-                                                                                                                       int rootrequestcore, 
-                                                                                                                       bool cache);
-INLINE void processlockrelease(int locktype, 
-                                          int lock, 
-                                                                                                                        int redirectlock, 
-                                                                                                                        bool redirect);
+INLINE int processlockrequest(int locktype,
+                              int lock,
+                              int obj,
+                              int requestcore,
+                              int rootrequestcore,
+                              bool cache);
+INLINE void processlockrelease(int locktype,
+                               int lock,
+                               int redirectlock,
+                               bool redirect);
 
 // msg related functions
 INLINE void send_hanging_msg();
-INLINE void send_msg_1(int targetcore, 
-                                  unsigned long n0);
-INLINE void send_msg_2(int targetcore, 
-                                  unsigned long n0, 
-                                                                                        unsigned long n1);
-INLINE void send_msg_3(int targetcore, 
-                                  unsigned long n0, 
-                                                                                        unsigned long n1, 
-                                                                                        unsigned long n2);
-INLINE void send_msg_4(int targetcore, 
-                                  unsigned long n0, 
-                                                                                        unsigned long n1, 
-                                                                                        unsigned long n2, 
-                                                                                        unsigned long n3);
-INLINE void send_msg_5(int targetcore, 
-                                  unsigned long n0, 
-                                                                                        unsigned long n1, 
-                                                                                        unsigned long n2, 
-                                                                                        unsigned long n3, 
-                                                                                        unsigned long n4);
-INLINE void send_msg_6(int targetcore, 
-                                  unsigned long n0, 
-                                                                                        unsigned long n1, 
-                                                                                        unsigned long n2, 
-                                                                                        unsigned long n3, 
-                                                                                        unsigned long n4, 
-                                                                                        unsigned long n5);
-INLINE void send_msg_3_I(int targetcore, 
-                                  unsigned long n0, 
-                                                                                        unsigned long n1, 
-                                                                                        unsigned long n2);
-INLINE void cache_msg_1(int targetcore, 
-                                                                                               unsigned long n0);
-INLINE void cache_msg_2(int targetcore, 
-                                   unsigned long n0, 
-                                                                                               unsigned long n1);
-INLINE void cache_msg_3(int targetcore, 
-                                   unsigned long n0, 
-                                                                                               unsigned long n1, 
-                                                                                               unsigned long n2);
-INLINE void cache_msg_4(int targetcore, 
-                                   unsigned long n0, 
-                                                                                               unsigned long n1, 
-                                                                                               unsigned long n2, 
-                                                                                               unsigned long n3);
-INLINE void cache_msg_5(int targetcore, 
-                                   unsigned long n0, 
-                                                                                               unsigned long n1, 
-                                                                                               unsigned long n2, 
-                                                                                               unsigned long n3, 
-                                                                                               unsigned long n4);
-INLINE void cache_msg_6(int targetcore, 
-                                   unsigned long n0, 
-                                                                                               unsigned long n1, 
-                                                                                               unsigned long n2, 
-                                                                                               unsigned long n3, 
-                                                                                               unsigned long n4, 
-                                                                                               unsigned long n5);
+INLINE void send_msg_1(int targetcore,
+                       unsigned long n0);
+INLINE void send_msg_2(int targetcore,
+                       unsigned long n0,
+                       unsigned long n1);
+INLINE void send_msg_3(int targetcore,
+                       unsigned long n0,
+                       unsigned long n1,
+                       unsigned long n2);
+INLINE void send_msg_4(int targetcore,
+                       unsigned long n0,
+                       unsigned long n1,
+                       unsigned long n2,
+                       unsigned long n3);
+INLINE void send_msg_5(int targetcore,
+                       unsigned long n0,
+                       unsigned long n1,
+                       unsigned long n2,
+                       unsigned long n3,
+                       unsigned long n4);
+INLINE void send_msg_6(int targetcore,
+                       unsigned long n0,
+                       unsigned long n1,
+                       unsigned long n2,
+                       unsigned long n3,
+                       unsigned long n4,
+                       unsigned long n5);
+INLINE void send_msg_3_I(int targetcore,
+                         unsigned long n0,
+                         unsigned long n1,
+                         unsigned long n2);
+INLINE void cache_msg_1(int targetcore,
+                        unsigned long n0);
+INLINE void cache_msg_2(int targetcore,
+                        unsigned long n0,
+                        unsigned long n1);
+INLINE void cache_msg_3(int targetcore,
+                        unsigned long n0,
+                        unsigned long n1,
+                        unsigned long n2);
+INLINE void cache_msg_4(int targetcore,
+                        unsigned long n0,
+                        unsigned long n1,
+                        unsigned long n2,
+                        unsigned long n3);
+INLINE void cache_msg_5(int targetcore,
+                        unsigned long n0,
+                        unsigned long n1,
+                        unsigned long n2,
+                        unsigned long n3,
+                        unsigned long n4);
+INLINE void cache_msg_6(int targetcore,
+                        unsigned long n0,
+                        unsigned long n1,
+                        unsigned long n2,
+                        unsigned long n3,
+                        unsigned long n4,
+                        unsigned long n5);
 INLINE void transferObject(struct transObjInfo * transObj);
 INLINE int receiveMsg(uint32_t send_port_pending);
 
index 5e8281f6f0b72532b5a9a50dcd53ce9068d502bc..7ba29a8518b56a29423172f4f88347ca162471a2 100644 (file)
@@ -15,71 +15,71 @@ struct LockValue runtime_locks[MAXTASKPARAMS];
 int runtime_locklen;
 
 // specific functions used inside critical sections
-void enqueueObject_I(void * ptr, 
-                                struct parameterwrapper ** queues, 
-                                                                                int length);
-int enqueuetasks_I(struct parameterwrapper *parameter, 
-                              struct parameterwrapper *prevptr, 
-                                                                        struct ___Object___ *ptr, 
-                                                                        int * enterflags, 
-                                                                        int numenterflags);
+void enqueueObject_I(void * ptr,
+                     struct parameterwrapper ** queues,
+                     int length);
+int enqueuetasks_I(struct parameterwrapper *parameter,
+                   struct parameterwrapper *prevptr,
+                   struct ___Object___ *ptr,
+                   int * enterflags,
+                   int numenterflags);
 
 #ifdef MULTICORE_GC
-inline __attribute__((always_inline)) 
+inline __attribute__((always_inline))
 void setupsmemmode(void) {
 #ifdef SMEML
-       bamboo_smem_mode = SMEMLOCAL;
+  bamboo_smem_mode = SMEMLOCAL;
 #elif defined SMEMF
-       bamboo_smem_mode = SMEMFIXED;
+  bamboo_smem_mode = SMEMFIXED;
 #elif defined SMEMM
-       bamboo_smem_mode = SMEMMIXED;
+  bamboo_smem_mode = SMEMMIXED;
 #elif defined SMEMG
-       bamboo_smem_mode = SMEMGLOBAL;
+  bamboo_smem_mode = SMEMGLOBAL;
 #else
-       // defaultly using local mode
-       //bamboo_smem_mode = SMEMLOCAL;
-       bamboo_smem_mode = SMEMGLOBAL;
+  // defaultly using local mode
+  //bamboo_smem_mode = SMEMLOCAL;
+  bamboo_smem_mode = SMEMGLOBAL;
 #endif
 } // void setupsmemmode(void)
 #endif
 
-inline __attribute__((always_inline)) 
+inline __attribute__((always_inline))
 void initruntimedata() {
-       int i;
-       // initialize the arrays
+  int i;
+  // initialize the arrays
   if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
     // startup core to initialize corestatus[]
     for(i = 0; i < NUMCORESACTIVE; ++i) {
       corestatus[i] = 1;
-      numsendobjs[i] = 0; 
+      numsendobjs[i] = 0;
       numreceiveobjs[i] = 0;
 #ifdef PROFILE
-                       // initialize the profile data arrays
-                       profilestatus[i] = 1;
+      // initialize the profile data arrays
+      profilestatus[i] = 1;
 #endif
 #ifdef MULTICORE_GC
-                       gccorestatus[i] = 1;
-                       gcnumsendobjs[i] = 0; 
+      gccorestatus[i] = 1;
+      gcnumsendobjs[i] = 0;
       gcnumreceiveobjs[i] = 0;
 #endif
     } // for(i = 0; i < NUMCORESACTIVE; ++i)
 #ifdef MULTICORE_GC
-               for(i = 0; i < NUMCORES4GC; ++i) {
-                       gcloads[i] = 0;
-                       gcrequiredmems[i] = 0;
-                       gcstopblock[i] = 0;
-                       gcfilledblocks[i] = 0;
+    for(i = 0; i < NUMCORES4GC; ++i) {
+      gcloads[i] = 0;
+      gcrequiredmems[i] = 0;
+      gcstopblock[i] = 0;
+      gcfilledblocks[i] = 0;
     } // for(i = 0; i < NUMCORES4GC; ++i)
 #ifdef GC_PROFILE
-               gc_infoIndex = 0;
-               gc_infoOverflow = false;
+    gc_infoIndex = 0;
+    gc_infoOverflow = false;
 #endif
 #endif
-               numconfirm = 0;
-               waitconfirm = false; 
-               
-               // TODO for test
-               total_num_t6 = 0;
+    numconfirm = 0;
+    waitconfirm = false;
+
+    // TODO for test
+    total_num_t6 = 0;
   }
 
   busystatus = true;
@@ -90,9 +90,9 @@ void initruntimedata() {
     msgdata[i] = -1;
   }
   msgdataindex = 0;
-       msgdatalast = 0;
+  msgdatalast = 0;
   msglength = BAMBOO_MSG_BUF_LENGTH;
-       msgdatafull = false;
+  msgdatafull = false;
   for(i = 0; i < BAMBOO_OUT_BUF_LENGTH; ++i) {
     outmsgdata[i] = -1;
   }
@@ -105,39 +105,39 @@ void initruntimedata() {
   smemflag = true;
   bamboo_cur_msp = NULL;
   bamboo_smem_size = 0;
-       totransobjqueue = createQueue_I();
+  totransobjqueue = createQueue_I();
 
 #ifdef MULTICORE_GC
-       gcflag = false;
-       gcprocessing = false;
-       gcphase = FINISHPHASE;
-       gccurr_heaptop = 0;
-       gcself_numsendobjs = 0;
-       gcself_numreceiveobjs = 0;
-       gcmarkedptrbound = 0;
-       //mgchashCreate(2000, 0.75);
-       gcpointertbl = allocateRuntimeHash_I(20);
-       //gcpointertbl = allocateMGCHash(20);
-       gcforwardobjtbl = allocateMGCHash_I(20, 3);
-       gcobj2map = 0;
-       gcmappedobj = 0;
-       gcismapped = false;
-       gcnumlobjs = 0;
-       gcheaptop = 0;
-       gctopcore = 0;
-       gctopblock = 0;
-       gcmovestartaddr = 0;
-       gctomove = false;
-       gcmovepending = 0;
-       gcblock2fill = 0;
-       gcsbstarttbl = BAMBOO_BASE_VA;
-       bamboo_smemtbl = (void *)gcsbstarttbl
-               + (BAMBOO_SHARED_MEM_SIZE/BAMBOO_SMEM_SIZE)*sizeof(INTPTR); 
+  gcflag = false;
+  gcprocessing = false;
+  gcphase = FINISHPHASE;
+  gccurr_heaptop = 0;
+  gcself_numsendobjs = 0;
+  gcself_numreceiveobjs = 0;
+  gcmarkedptrbound = 0;
+  //mgchashCreate(2000, 0.75);
+  gcpointertbl = allocateRuntimeHash_I(20);
+  //gcpointertbl = allocateMGCHash(20);
+  gcforwardobjtbl = allocateMGCHash_I(20, 3);
+  gcobj2map = 0;
+  gcmappedobj = 0;
+  gcismapped = false;
+  gcnumlobjs = 0;
+  gcheaptop = 0;
+  gctopcore = 0;
+  gctopblock = 0;
+  gcmovestartaddr = 0;
+  gctomove = false;
+  gcmovepending = 0;
+  gcblock2fill = 0;
+  gcsbstarttbl = BAMBOO_BASE_VA;
+  bamboo_smemtbl = (void *)gcsbstarttbl
+                   + (BAMBOO_SHARED_MEM_SIZE/BAMBOO_SMEM_SIZE)*sizeof(INTPTR);
 #else
-       // create the lock table, lockresult table and obj queue
+  // create the lock table, lockresult table and obj queue
   locktable.size = 20;
-  locktable.bucket = 
-               (struct RuntimeNode **) RUNMALLOC_I(sizeof(struct RuntimeNode *)*20);
+  locktable.bucket =
+    (struct RuntimeNode **) RUNMALLOC_I(sizeof(struct RuntimeNode *)*20);
   /* Set allocation blocks*/
   locktable.listhead=NULL;
   locktable.listtail=NULL;
@@ -147,354 +147,364 @@ void initruntimedata() {
   lock2require = 0;
   lockresult = 0;
   lockflag = false;
-       lockRedirectTbl = allocateRuntimeHash_I(20);
+  lockRedirectTbl = allocateRuntimeHash_I(20);
   objRedirectLockTbl = allocateRuntimeHash_I(20);
 #endif
 #ifndef INTERRUPT
   reside = false;
-#endif  
+#endif
   objqueue.head = NULL;
   objqueue.tail = NULL;
 
-       currtpd = NULL;
+  currtpd = NULL;
 
 #ifdef PROFILE
   stall = false;
   //isInterrupt = true;
   totalexetime = -1;
+  //interrupttime = 0;
   taskInfoIndex = 0;
   taskInfoOverflow = false;
-  /*interruptInfoIndex = 0;
-  interruptInfoOverflow = false;*/
+  // TODO
+  interruptInfoIndex = 0;
+  interruptInfoOverflow = false;
 #endif
 
-       for(i = 0; i < MAXTASKPARAMS; i++) {
-               runtime_locks[i].redirectlock = 0;
-               runtime_locks[i].value = 0;
-       }
-       runtime_locklen = 0;
+  for(i = 0; i < MAXTASKPARAMS; i++) {
+    runtime_locks[i].redirectlock = 0;
+    runtime_locks[i].value = 0;
+  }
+  runtime_locklen = 0;
 }
 
 inline __attribute__((always_inline))
 void disruntimedata() {
 #ifdef MULTICORE_GC
-       //mgchashDelete();
-       freeRuntimeHash(gcpointertbl);
-       //freeMGCHash(gcpointertbl);
-       freeMGCHash(gcforwardobjtbl);
+  //mgchashDelete();
+  freeRuntimeHash(gcpointertbl);
+  //freeMGCHash(gcpointertbl);
+  freeMGCHash(gcforwardobjtbl);
 #else
-       freeRuntimeHash(lockRedirectTbl);
-       freeRuntimeHash(objRedirectLockTbl);
-       RUNFREE(locktable.bucket);
+  freeRuntimeHash(lockRedirectTbl);
+  freeRuntimeHash(objRedirectLockTbl);
+  RUNFREE(locktable.bucket);
 #endif
-       if(activetasks != NULL) {
-               genfreehashtable(activetasks);
-       }
-       if(currtpd != NULL) {
-               RUNFREE(currtpd->parameterArray);
-               RUNFREE(currtpd);
-               currtpd = NULL;
-       }
-       BAMBOO_LOCAL_MEM_CLOSE();
-       BAMBOO_SHARE_MEM_CLOSE();
+  if(activetasks != NULL) {
+    genfreehashtable(activetasks);
+  }
+  if(currtpd != NULL) {
+    RUNFREE(currtpd->parameterArray);
+    RUNFREE(currtpd);
+    currtpd = NULL;
+  }
+  BAMBOO_LOCAL_MEM_CLOSE();
+  BAMBOO_SHARE_MEM_CLOSE();
 }
 
 inline __attribute__((always_inline))
 bool checkObjQueue() {
-       bool rflag = false;
-       struct transObjInfo * objInfo = NULL;
-       int grount = 0;
+  bool rflag = false;
+  struct transObjInfo * objInfo = NULL;
+  int grount = 0;
 
 #ifdef PROFILE
 #ifdef ACCURATEPROFILE
-       bool isChecking = false;
-       if(!isEmpty(&objqueue)) {
-               profileTaskStart("objqueue checking");
-               isChecking = true;
-       } // if(!isEmpty(&objqueue))
+  bool isChecking = false;
+  if(!isEmpty(&objqueue)) {
+    profileTaskStart("objqueue checking");
+    isChecking = true;
+  }       // if(!isEmpty(&objqueue))
 #endif
 #endif
 
-       while(!isEmpty(&objqueue)) {
-               void * obj = NULL;
-               BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
+  while(!isEmpty(&objqueue)) {
+    void * obj = NULL;
+    BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
 #ifdef DEBUG
-               BAMBOO_DEBUGPRINT(0xf001);
+    BAMBOO_DEBUGPRINT(0xf001);
 #endif
 #ifdef PROFILE
-               //isInterrupt = false;
-#endif 
+    //isInterrupt = false;
+#endif
 #ifdef DEBUG
-               BAMBOO_DEBUGPRINT(0xeee1);
+    BAMBOO_DEBUGPRINT(0xeee1);
 #endif
-               rflag = true;
-               objInfo = (struct transObjInfo *)getItem(&objqueue); 
-               obj = objInfo->objptr;
+    rflag = true;
+    objInfo = (struct transObjInfo *)getItem(&objqueue);
+    obj = objInfo->objptr;
 #ifdef DEBUG
-               BAMBOO_DEBUGPRINT_REG((int)obj);
+    BAMBOO_DEBUGPRINT_REG((int)obj);
 #endif
-               // grab lock and flush the obj
-               grount = 0;
-               getwritelock_I(obj);
-               while(!lockflag) {
-                       BAMBOO_WAITING_FOR_LOCK(0);
-               } // while(!lockflag)
-               grount = lockresult;
+    // grab lock and flush the obj
+    grount = 0;
+    getwritelock_I(obj);
+    while(!lockflag) {
+      BAMBOO_WAITING_FOR_LOCK(0);
+         // check for outgoing sends
+         if (isMsgHanging) {
+               extern inline void send_hanging_msg(bool);
+               send_hanging_msg(true);
+         } 
+    }             // while(!lockflag)
+    grount = lockresult;
 #ifdef DEBUG
-               BAMBOO_DEBUGPRINT_REG(grount);
+    BAMBOO_DEBUGPRINT_REG(grount);
 #endif
 
-               lockresult = 0;
-               lockobj = 0;
-               lock2require = 0;
-               lockflag = false;
+    lockresult = 0;
+    lockobj = 0;
+    lock2require = 0;
+    lockflag = false;
 #ifndef INTERRUPT
-               reside = false;
+    reside = false;
 #endif
 
-               if(grount == 1) {
-                       int k = 0;
-                       // flush the object
+    if(grount == 1) {
+      int k = 0;
+      // flush the object
 #ifdef CACHEFLUSH
-                       BAMBOO_CACHE_FLUSH_RANGE((int)obj,sizeof(int));
-                       BAMBOO_CACHE_FLUSH_RANGE((int)obj, 
-                                       classsize[((struct ___Object___ *)obj)->type]);
-#endif
-                       // enqueue the object
-                       for(k = 0; k < objInfo->length; ++k) {
-                               int taskindex = objInfo->queues[2 * k];
-                               int paramindex = objInfo->queues[2 * k + 1];
-                               struct parameterwrapper ** queues = 
-                                       &(paramqueues[BAMBOO_NUM_OF_CORE][taskindex][paramindex]);
-#ifdef DEBUG
-                               BAMBOO_DEBUGPRINT_REG(taskindex);
-                               BAMBOO_DEBUGPRINT_REG(paramindex);
-                               struct ___Object___ * tmpptr = (struct ___Object___ *)obj;
-                               tprintf("Process %x(%d): receive obj %x(%lld), ptrflag %x\n", 
-                                                               BAMBOO_NUM_OF_CORE, BAMBOO_NUM_OF_CORE, (int)obj, 
-                                                               (long)obj, tmpptr->flag);
-#endif
-                               enqueueObject_I(obj, queues, 1);
-#ifdef DEBUG                            
-                               BAMBOO_DEBUGPRINT_REG(hashsize(activetasks));
-#endif
-                       } // for(k = 0; k < objInfo->length; ++k)
-                       releasewritelock_I(obj);
-                       RUNFREE(objInfo->queues);
-                       RUNFREE(objInfo);
-               } else {
-                       // can not get lock
-                       // put it at the end of the queue if no update version in the queue
-                       struct QueueItem * qitem = getHead(&objqueue);
-                       struct QueueItem * prev = NULL;
-                       while(qitem != NULL) {
-                               struct transObjInfo * tmpinfo = 
-                                       (struct transObjInfo *)(qitem->objectptr);
-                               if(tmpinfo->objptr == obj) {
-                                       // the same object in the queue, which should be enqueued
-                                       // recently. Current one is outdate, do not re-enqueue it
-                                       RUNFREE(objInfo->queues);
-                                       RUNFREE(objInfo);
-                                       goto objqueuebreak;
-                               } else {
-                                       prev = qitem;
-                               } // if(tmpinfo->objptr == obj)
-                               qitem = getNextQueueItem(prev);
-                       } // while(qitem != NULL)
-                       // try to execute active tasks already enqueued first
-                       addNewItem_I(&objqueue, objInfo);
+      BAMBOO_CACHE_FLUSH_RANGE((int)obj,sizeof(int));
+      BAMBOO_CACHE_FLUSH_RANGE((int)obj,
+                               classsize[((struct ___Object___ *)obj)->type]);
+#endif
+      // enqueue the object
+      for(k = 0; k < objInfo->length; ++k) {
+       int taskindex = objInfo->queues[2 * k];
+       int paramindex = objInfo->queues[2 * k + 1];
+       struct parameterwrapper ** queues =
+         &(paramqueues[BAMBOO_NUM_OF_CORE][taskindex][paramindex]);
+#ifdef DEBUG
+       BAMBOO_DEBUGPRINT_REG(taskindex);
+       BAMBOO_DEBUGPRINT_REG(paramindex);
+       struct ___Object___ * tmpptr = (struct ___Object___ *)obj;
+       tprintf("Process %x(%d): receive obj %x(%lld), ptrflag %x\n",
+               BAMBOO_NUM_OF_CORE, BAMBOO_NUM_OF_CORE, (int)obj,
+               (long)obj, tmpptr->flag);
+#endif
+       enqueueObject_I(obj, queues, 1);
+#ifdef DEBUG
+       BAMBOO_DEBUGPRINT_REG(hashsize(activetasks));
+#endif
+      }                   // for(k = 0; k < objInfo->length; ++k)
+      releasewritelock_I(obj);
+      RUNFREE(objInfo->queues);
+      RUNFREE(objInfo);
+    } else {
+      // can not get lock
+      // put it at the end of the queue if no update version in the queue
+      struct QueueItem * qitem = getHead(&objqueue);
+      struct QueueItem * prev = NULL;
+      while(qitem != NULL) {
+                 struct transObjInfo * tmpinfo =
+                         (struct transObjInfo *)(qitem->objectptr);
+                 if(tmpinfo->objptr == obj) {
+                         // the same object in the queue, which should be enqueued
+                         // recently. Current one is outdate, do not re-enqueue it
+                         RUNFREE(objInfo->queues);
+                         RUNFREE(objInfo);
+                         goto objqueuebreak;
+                 } else {
+                         prev = qitem;
+                 }                         // if(tmpinfo->objptr == obj)
+                 qitem = getNextQueueItem(prev);
+         }                   // while(qitem != NULL)
+                          // try to execute active tasks already enqueued first
+      addNewItem_I(&objqueue, objInfo);
 #ifdef PROFILE
-                       //isInterrupt = true;
+      //isInterrupt = true;
 #endif
 objqueuebreak:
-                       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+      BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 #ifdef DEBUG
-                       BAMBOO_DEBUGPRINT(0xf000);
+      BAMBOO_DEBUGPRINT(0xf000);
 #endif
-                       break;
-               } // if(grount == 1)
-               BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+      break;
+    }             // if(grount == 1)
+    BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 #ifdef DEBUG
-               BAMBOO_DEBUGPRINT(0xf000);
+    BAMBOO_DEBUGPRINT(0xf000);
 #endif
-       } // while(!isEmpty(&objqueue))
+  }       // while(!isEmpty(&objqueue))
 
 #ifdef PROFILE
 #ifdef ACCURATEPROFILE
-       if(isChecking) {
-               profileTaskEnd();
-       } // if(isChecking)
+  if(isChecking) {
+    profileTaskEnd();
+  }       // if(isChecking)
 #endif
 #endif
 
 #ifdef DEBUG
-       BAMBOO_DEBUGPRINT(0xee02);
+  BAMBOO_DEBUGPRINT(0xee02);
 #endif
-       return rflag;
+  return rflag;
 }
 
 inline __attribute__((always_inline))
 void checkCoreStatus() {
-       bool allStall = false;
-       int i = 0;
-       int sumsendobj = 0;
-       if((!waitconfirm) || 
-                       (waitconfirm && (numconfirm == 0))) {
-#ifdef DEBUG
-               BAMBOO_DEBUGPRINT(0xee04);
-               BAMBOO_DEBUGPRINT_REG(waitconfirm);
-#endif
-               BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
-#ifdef DEBUG
-               BAMBOO_DEBUGPRINT(0xf001);
-#endif
-               corestatus[BAMBOO_NUM_OF_CORE] = 0;
-               numsendobjs[BAMBOO_NUM_OF_CORE] = self_numsendobjs;
-               numreceiveobjs[BAMBOO_NUM_OF_CORE] = self_numreceiveobjs;
-               // check the status of all cores
-               allStall = true;
-#ifdef DEBUG
-               BAMBOO_DEBUGPRINT_REG(NUMCORESACTIVE);
-#endif
-               for(i = 0; i < NUMCORESACTIVE; ++i) {
-#ifdef DEBUG
-                       BAMBOO_DEBUGPRINT(0xe000 + corestatus[i]);
-#endif
-                       if(corestatus[i] != 0) {
-                               allStall = false;
-                               break;
-                       }
-               } // for(i = 0; i < NUMCORESACTIVE; ++i)
-               if(allStall) {
-                       // check if the sum of send objs and receive obj are the same
-                       // yes->check if the info is the latest; no->go on executing
-                       sumsendobj = 0;
-                       for(i = 0; i < NUMCORESACTIVE; ++i) {
-                               sumsendobj += numsendobjs[i];
-#ifdef DEBUG
-                               BAMBOO_DEBUGPRINT(0xf000 + numsendobjs[i]);
-#endif
-                       } // for(i = 0; i < NUMCORESACTIVE; ++i)        
-                       for(i = 0; i < NUMCORESACTIVE; ++i) {
-                               sumsendobj -= numreceiveobjs[i];
-#ifdef DEBUG
-                               BAMBOO_DEBUGPRINT(0xf000 + numreceiveobjs[i]);
-#endif
-                       } // for(i = 0; i < NUMCORESACTIVE; ++i)
-                       if(0 == sumsendobj) {
-                               if(!waitconfirm) {
-                                       // the first time found all cores stall
-                                       // send out status confirm msg to all other cores
-                                       // reset the corestatus array too
-#ifdef DEBUG
-                                       BAMBOO_DEBUGPRINT(0xee05);
-#endif
-                                       corestatus[BAMBOO_NUM_OF_CORE] = 1;
-                                       waitconfirm = true;
-                                       numconfirm = NUMCORESACTIVE - 1;
-                                       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-                                       for(i = 1; i < NUMCORESACTIVE; ++i) {   
-                                               corestatus[i] = 1;
-                                               // send status confirm msg to core i
-                                               send_msg_1(i, STATUSCONFIRM);
-                                       } // for(i = 1; i < NUMCORESACTIVE; ++i)
-                                       return;
-                               } else {
-                                       // all the core status info are the latest
-                                       // terminate; for profiling mode, send request to all
-                                       // other cores to pour out profiling data
-#ifdef DEBUG
-                                       BAMBOO_DEBUGPRINT(0xee06);
-#endif                                           
-                        
+  bool allStall = false;
+  int i = 0;
+  int sumsendobj = 0;
+  if((!waitconfirm) ||
+     (waitconfirm && (numconfirm == 0))) {
+#ifdef DEBUG
+    BAMBOO_DEBUGPRINT(0xee04);
+    BAMBOO_DEBUGPRINT_REG(waitconfirm);
+#endif
+    BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
+#ifdef DEBUG
+    BAMBOO_DEBUGPRINT(0xf001);
+#endif
+    corestatus[BAMBOO_NUM_OF_CORE] = 0;
+    numsendobjs[BAMBOO_NUM_OF_CORE] = self_numsendobjs;
+    numreceiveobjs[BAMBOO_NUM_OF_CORE] = self_numreceiveobjs;
+    // check the status of all cores
+    allStall = true;
+#ifdef DEBUG
+    BAMBOO_DEBUGPRINT_REG(NUMCORESACTIVE);
+#endif
+    for(i = 0; i < NUMCORESACTIVE; ++i) {
+#ifdef DEBUG
+      BAMBOO_DEBUGPRINT(0xe000 + corestatus[i]);
+#endif
+      if(corestatus[i] != 0) {
+                 allStall = false;
+                 break;
+      }
+    }             // for(i = 0; i < NUMCORESACTIVE; ++i)
+    if(allStall) {
+      // check if the sum of send objs and receive obj are the same
+      // yes->check if the info is the latest; no->go on executing
+      sumsendobj = 0;
+      for(i = 0; i < NUMCORESACTIVE; ++i) {
+                 sumsendobj += numsendobjs[i];
+#ifdef DEBUG
+                 BAMBOO_DEBUGPRINT(0xf000 + numsendobjs[i]);
+#endif
+      }                   // for(i = 0; i < NUMCORESACTIVE; ++i)
+      for(i = 0; i < NUMCORESACTIVE; ++i) {
+                 sumsendobj -= numreceiveobjs[i];
+#ifdef DEBUG
+                 BAMBOO_DEBUGPRINT(0xf000 + numreceiveobjs[i]);
+#endif
+      }                   // for(i = 0; i < NUMCORESACTIVE; ++i)
+      if(0 == sumsendobj) {
+       if(!waitconfirm) {
+         // the first time found all cores stall
+         // send out status confirm msg to all other cores
+         // reset the corestatus array too
+#ifdef DEBUG
+         BAMBOO_DEBUGPRINT(0xee05);
+#endif
+         corestatus[BAMBOO_NUM_OF_CORE] = 1;
+         waitconfirm = true;
+         numconfirm = NUMCORESACTIVE - 1;
+         BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+         for(i = 1; i < NUMCORESACTIVE; ++i) {
+           corestatus[i] = 1;
+           // send status confirm msg to core i
+           send_msg_1(i, STATUSCONFIRM);
+         }                               // for(i = 1; i < NUMCORESACTIVE; ++i)
+         return;
+       } else {
+         // all the core status info are the latest
+         // terminate; for profiling mode, send request to all
+         // other cores to pour out profiling data
+#ifdef DEBUG
+         BAMBOO_DEBUGPRINT(0xee06);
+#endif
+
 #ifdef USEIO
-                                       totalexetime = BAMBOO_GET_EXE_TIME() - bamboo_start_time;
+         totalexetime = BAMBOO_GET_EXE_TIME() - bamboo_start_time;
 #else
+#ifdef PROFILE
+         //BAMBOO_DEBUGPRINT_REG(interrupttime);
+#endif
 
-                                       BAMBOO_DEBUGPRINT(BAMBOO_GET_EXE_TIME() - bamboo_start_time);
-                                       //BAMBOO_DEBUGPRINT_REG(total_num_t6); // TODO for test
-                                       BAMBOO_DEBUGPRINT(0xbbbbbbbb);
+         BAMBOO_DEBUGPRINT(BAMBOO_GET_EXE_TIME() - bamboo_start_time);
+         //BAMBOO_DEBUGPRINT_REG(total_num_t6); // TODO for test
+         BAMBOO_DEBUGPRINT(0xbbbbbbbb);
 #endif
-                                       // profile mode, send msgs to other cores to request pouring
-                                       // out progiling data
+         // profile mode, send msgs to other cores to request pouring
+         // out progiling data
 #ifdef PROFILE
-                                       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+         BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 #ifdef DEBUG
-                                       BAMBOO_DEBUGPRINT(0xf000);
+         BAMBOO_DEBUGPRINT(0xf000);
 #endif
-                                       for(i = 1; i < NUMCORESACTIVE; ++i) {
-                                               // send profile request msg to core i
-                                               send_msg_2(i, PROFILEOUTPUT, totalexetime);
-                                       } // for(i = 1; i < NUMCORESACTIVE; ++i)
-                                       // pour profiling data on startup core
-                                       outputProfileData();
-                                       while(true) {
-                                               BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
+         for(i = 1; i < NUMCORESACTIVE; ++i) {
+           // send profile request msg to core i
+           send_msg_2(i, PROFILEOUTPUT, totalexetime);
+         } // for(i = 1; i < NUMCORESACTIVE; ++i)
+         // pour profiling data on startup core
+         outputProfileData();
+         while(true) {
+           BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
 #ifdef DEBUG
-                                               BAMBOO_DEBUGPRINT(0xf001);
+           BAMBOO_DEBUGPRINT(0xf001);
 #endif
-                                               profilestatus[BAMBOO_NUM_OF_CORE] = 0;
-                                               // check the status of all cores
-                                               allStall = true;
+           profilestatus[BAMBOO_NUM_OF_CORE] = 0;
+           // check the status of all cores
+           allStall = true;
 #ifdef DEBUG
-                                               BAMBOO_DEBUGPRINT_REG(NUMCORESACTIVE);
-#endif 
-                                               for(i = 0; i < NUMCORESACTIVE; ++i) {
+           BAMBOO_DEBUGPRINT_REG(NUMCORESACTIVE);
+#endif
+           for(i = 0; i < NUMCORESACTIVE; ++i) {
 #ifdef DEBUG
-                                                       BAMBOO_DEBUGPRINT(0xe000 + profilestatus[i]);
+             BAMBOO_DEBUGPRINT(0xe000 + profilestatus[i]);
 #endif
-                                                       if(profilestatus[i] != 0) {
-                                                               allStall = false;
-                                                               break;
-                                                       }
-                                               }  // for(i = 0; i < NUMCORESACTIVE; ++i)
-                                               if(!allStall) {
-                                                       int halt = 100;
-                                                       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+             if(profilestatus[i] != 0) {
+               allStall = false;
+               break;
+             }
+           }  // for(i = 0; i < NUMCORESACTIVE; ++i)
+           if(!allStall) {
+             int halt = 100;
+             BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 #ifdef DEBUG
-                                                       BAMBOO_DEBUGPRINT(0xf000);
+             BAMBOO_DEBUGPRINT(0xf000);
 #endif
-                                                       while(halt--) {
-                                                       }
-                                               } else {
-                                                       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-                                                       break;
-                                               } // if(!allStall)
-                                       } // while(true)
+             while(halt--) {
+             }
+           } else {
+             BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+             break;
+           }                                     // if(!allStall)
+         }                               // while(true)
 #endif
 
-                                       // gc_profile mode, ourput gc prfiling data
+         // gc_profile mode, ourput gc prfiling data
 #ifdef MULTICORE_GC
 #ifdef GC_PROFILE
-                                       gc_outputProfileData();
+         gc_outputProfileData();
 #endif // #ifdef GC_PROFILE
 #endif // #ifdef MULTICORE_GC
-                                       disruntimedata();
-                                       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-                                       terminate(); // All done.
-                               } // if(!waitconfirm)
-                       } else {
-                               // still some objects on the fly on the network
-                               // reset the waitconfirm and numconfirm
+         disruntimedata();
+         BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+         terminate();                               // All done.
+       }                         // if(!waitconfirm)
+      } else {
+       // still some objects on the fly on the network
+       // reset the waitconfirm and numconfirm
 #ifdef DEBUG
-                                       BAMBOO_DEBUGPRINT(0xee07);
+       BAMBOO_DEBUGPRINT(0xee07);
 #endif
-                               waitconfirm = false;
-                               numconfirm = 0;
-                       } //  if(0 == sumsendobj)
-               } else {
-                       // not all cores are stall, keep on waiting
+       waitconfirm = false;
+       numconfirm = 0;
+      }                   //  if(0 == sumsendobj)
+    } else {
+      // not all cores are stall, keep on waiting
 #ifdef DEBUG
-                       BAMBOO_DEBUGPRINT(0xee08);
+      BAMBOO_DEBUGPRINT(0xee08);
 #endif
-                       waitconfirm = false;
-                       numconfirm = 0;
-               } //  if(allStall)
-               BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+      waitconfirm = false;
+      numconfirm = 0;
+    }             //  if(allStall)
+    BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 #ifdef DEBUG
-               BAMBOO_DEBUGPRINT(0xf000);
+    BAMBOO_DEBUGPRINT(0xf000);
 #endif
-       } // if((!waitconfirm) ||
+  }       // if((!waitconfirm) ||
 }
 
 // main function for each core
@@ -513,8 +523,8 @@ inline void run(void * arg) {
   BAMBOO_DEBUGPRINT(STARTUPCORE);
 #endif
 
-       // initialize runtime data structures
-       initruntimedata();
+  // initialize runtime data structures
+  initruntimedata();
 
   // other architecture related initialization
   initialization();
@@ -524,114 +534,114 @@ inline void run(void * arg) {
 
   // main process of the execution module
   if(BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1) {
-       // non-executing cores, only processing communications
+    // non-executing cores, only processing communications
     activetasks = NULL;
 /*#ifdef PROFILE
         BAMBOO_DEBUGPRINT(0xee01);
         BAMBOO_DEBUGPRINT_REG(taskInfoIndex);
         BAMBOO_DEBUGPRINT_REG(taskInfoOverflow);
-               profileTaskStart("msg handling");
+                profileTaskStart("msg handling");
         }
  #endif*/
 #ifdef PROFILE
     //isInterrupt = false;
 #endif
-               fakeExecution();
+    fakeExecution();
   } else {
-         /* Create queue of active tasks */
-         activetasks=
-                       genallocatehashtable((unsigned int(*) (void *)) &hashCodetpd,
-                           (int(*) (void *,void *)) &comparetpd);
-         
-         /* Process task information */
-         processtasks();
-         
-         if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
-                 /* Create startup object */
-                 createstartupobject(argc, argv);
-         }
+    /* Create queue of active tasks */
+    activetasks=
+      genallocatehashtable((unsigned int (*)(void *)) &hashCodetpd,
+                           (int (*)(void *,void *)) &comparetpd);
+
+    /* Process task information */
+    processtasks();
+
+    if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
+      /* Create startup object */
+      createstartupobject(argc, argv);
+    }
 
 #ifdef DEBUG
-         BAMBOO_DEBUGPRINT(0xee00);
+    BAMBOO_DEBUGPRINT(0xee00);
 #endif
 
-         while(true) {
+    while(true) {
 #ifdef MULTICORE_GC
-                       // check if need to do GC
-                       gc(NULL);
+      // check if need to do GC
+      gc(NULL);
 #endif
 
-                 // check if there are new active tasks can be executed
-                 executetasks();
-                       if(busystatus) {
-                               sendStall = false;
-                       }
+      // check if there are new active tasks can be executed
+      executetasks();
+      if(busystatus) {
+       sendStall = false;
+      }
 
 #ifndef INTERRUPT
-                 while(receiveObject() != -1) {
-                 }
-#endif  
+      while(receiveObject() != -1) {
+      }
+#endif
 
 #ifdef DEBUG
-                 BAMBOO_DEBUGPRINT(0xee01);
-#endif  
-                 
-                 // check if there are some pending objects, 
-                       // if yes, enqueue them and executetasks again
-                 tocontinue = checkObjQueue();
+      BAMBOO_DEBUGPRINT(0xee01);
+#endif
+
+      // check if there are some pending objects,
+      // if yes, enqueue them and executetasks again
+      tocontinue = checkObjQueue();
 
-                 if(!tocontinue) {
-                         // check if stop
-                         if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
-                                 if(isfirst) {
+      if(!tocontinue) {
+       // check if stop
+       if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
+         if(isfirst) {
 #ifdef DEBUG
-                                         BAMBOO_DEBUGPRINT(0xee03);
+           BAMBOO_DEBUGPRINT(0xee03);
 #endif
-                                         isfirst = false;
-                                 }
-                                       checkCoreStatus();
-                         } else {
-                                 if(!sendStall) {
+           isfirst = false;
+         }
+         checkCoreStatus();
+       } else {
+         if(!sendStall) {
 #ifdef DEBUG
-                                         BAMBOO_DEBUGPRINT(0xee09);
+           BAMBOO_DEBUGPRINT(0xee09);
 #endif
 #ifdef PROFILE
-                                         if(!stall) {
+           if(!stall) {
 #endif
-                                                 if(isfirst) {
-                                                         // wait for some time
-                                                         int halt = 10000;
+           if(isfirst) {
+             // wait for some time
+             int halt = 10000;
 #ifdef DEBUG
-                                                         BAMBOO_DEBUGPRINT(0xee0a);
+             BAMBOO_DEBUGPRINT(0xee0a);
 #endif
-                                                         while(halt--) {
-                                                         }
-                                                         isfirst = false;
-                                                 } else {
-                                                         // send StallMsg to startup core
+             while(halt--) {
+             }
+             isfirst = false;
+           } else {
+             // send StallMsg to startup core
 #ifdef DEBUG
-                                                         BAMBOO_DEBUGPRINT(0xee0b);
+             BAMBOO_DEBUGPRINT(0xee0b);
 #endif
-                                                         // send stall msg
-                                                               send_msg_4(STARTUPCORE, TRANSTALL, BAMBOO_NUM_OF_CORE, 
-                                                                                      self_numsendobjs, self_numreceiveobjs);
-                                                         sendStall = true;
-                                                         isfirst = true;
-                                                         busystatus = false;
-                                                 }
+             // send stall msg
+             send_msg_4(STARTUPCORE, TRANSTALL, BAMBOO_NUM_OF_CORE,
+                        self_numsendobjs, self_numreceiveobjs);
+             sendStall = true;
+             isfirst = true;
+             busystatus = false;
+           }
 #ifdef PROFILE
-                                         }
+         }
 #endif
-                                 } else {
-                                         isfirst = true;
-                                         busystatus = false;
+         } else {
+           isfirst = true;
+           busystatus = false;
 #ifdef DEBUG
-                                         BAMBOO_DEBUGPRINT(0xee0c);
+           BAMBOO_DEBUGPRINT(0xee0c);
 #endif
-                                 } // if(!sendStall)
-                         } // if(STARTUPCORE == BAMBOO_NUM_OF_CORE) 
-                 } // if(!tocontinue)
-         } // while(true) 
+         }                         // if(!sendStall)
+       }                   // if(STARTUPCORE == BAMBOO_NUM_OF_CORE)
+      }             // if(!tocontinue)
+    }       // while(true)
   } // if(BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1)
 
 } // run()
@@ -643,24 +653,24 @@ struct ___createstartupobject____I_locals {
   struct ArrayObject * ___stringarray___;
 }; // struct ___createstartupobject____I_locals
 
-void createstartupobject(int argc, 
-                                    char ** argv) {
+void createstartupobject(int argc,
+                         char ** argv) {
   int i;
 
   /* Allocate startup object     */
 #ifdef MULTICORE_GC
-       struct ___createstartupobject____I_locals ___locals___={2, NULL, NULL, NULL};
+  struct ___createstartupobject____I_locals ___locals___={2, NULL, NULL, NULL};
   struct ___StartupObject___ *startupobject=
-               (struct ___StartupObject___*) allocate_new(&___locals___, STARTUPTYPE);
-       ___locals___.___startupobject___ = startupobject;
+    (struct ___StartupObject___*) allocate_new(&___locals___, STARTUPTYPE);
+  ___locals___.___startupobject___ = startupobject;
   struct ArrayObject * stringarray=
-               allocate_newarray(&___locals___, STRINGARRAYTYPE, argc-1);
-       ___locals___.___stringarray___ = stringarray;
+    allocate_newarray(&___locals___, STRINGARRAYTYPE, argc-1);
+  ___locals___.___stringarray___ = stringarray;
 #else
   struct ___StartupObject___ *startupobject=
-               (struct ___StartupObject___*) allocate_new(STARTUPTYPE);
+    (struct ___StartupObject___*) allocate_new(STARTUPTYPE);
   struct ArrayObject * stringarray=
-               allocate_newarray(STRINGARRAYTYPE, argc-1);
+    allocate_newarray(STRINGARRAYTYPE, argc-1);
 #endif
   /* Build array of strings */
   startupobject->___parameters___=stringarray;
@@ -672,7 +682,7 @@ void createstartupobject(int argc,
     struct ___String___ *newstring=NewString(argv[i],length);
 #endif
     ((void **)(((char *)&stringarray->___length___)+sizeof(int)))[i-1]=
-                       newstring;
+      newstring;
   }
 
   startupobject->version = 0;
@@ -695,8 +705,8 @@ int hashCodetpd(struct taskparamdescriptor *ftd) {
   return hash;
 }
 
-int comparetpd(struct taskparamdescriptor *ftd1, 
-                          struct taskparamdescriptor *ftd2) {
+int comparetpd(struct taskparamdescriptor *ftd1,
+               struct taskparamdescriptor *ftd2) {
   int i;
   if (ftd1->task!=ftd2->task)
     return 0;
@@ -708,12 +718,12 @@ int comparetpd(struct taskparamdescriptor *ftd1,
 
 /* This function sets a tag. */
 #ifdef MULTICORE_GC
-void tagset(void *ptr, 
-                       struct ___Object___ * obj, 
-                                               struct ___TagDescriptor___ * tagd) {
+void tagset(void *ptr,
+            struct ___Object___ * obj,
+            struct ___TagDescriptor___ * tagd) {
 #else
-void tagset(struct ___Object___ * obj, 
-                       struct ___TagDescriptor___ * tagd) {
+void tagset(struct ___Object___ * obj,
+            struct ___TagDescriptor___ * tagd) {
 #endif
   struct ArrayObject * ao=NULL;
   struct ___Object___ * tagptr=obj->___tags___;
@@ -729,7 +739,7 @@ void tagset(struct ___Object___ * obj,
 #ifdef MULTICORE_GC
       int ptrarray[]={2, (int) ptr, (int) obj, (int)tagd};
       struct ArrayObject * ao=
-                               allocate_newarray(&ptrarray,TAGARRAYTYPE,TAGARRAYINTERVAL);
+        allocate_newarray(&ptrarray,TAGARRAYTYPE,TAGARRAYINTERVAL);
       obj=(struct ___Object___ *)ptrarray[2];
       tagd=(struct ___TagDescriptor___ *)ptrarray[3];
       td=(struct ___TagDescriptor___ *) obj->___tags___;
@@ -747,7 +757,7 @@ void tagset(struct ___Object___ * obj,
       struct ArrayObject *ao=(struct ArrayObject *) tagptr;
       for(i=0; i<ao->___cachedCode___; i++) {
        struct ___TagDescriptor___ * td=
-               ARRAYGET(ao, struct ___TagDescriptor___*, i);
+         ARRAYGET(ao, struct ___TagDescriptor___*, i);
        if (td==tagd) {
          return;
        }
@@ -759,20 +769,20 @@ void tagset(struct ___Object___ * obj,
 #ifdef MULTICORE_GC
        int ptrarray[]={2,(int) ptr, (int) obj, (int) tagd};
        struct ArrayObject * aonew=
-               allocate_newarray(&ptrarray,TAGARRAYTYPE,
-                                             TAGARRAYINTERVAL+ao->___length___);
+         allocate_newarray(&ptrarray,TAGARRAYTYPE,
+                           TAGARRAYINTERVAL+ao->___length___);
        obj=(struct ___Object___ *)ptrarray[2];
        tagd=(struct ___TagDescriptor___ *) ptrarray[3];
        ao=(struct ArrayObject *)obj->___tags___;
 #else
        struct ArrayObject * aonew=
-               allocate_newarray(TAGARRAYTYPE,TAGARRAYINTERVAL+ao->___length___);
+         allocate_newarray(TAGARRAYTYPE,TAGARRAYINTERVAL+ao->___length___);
 #endif
 
        aonew->___cachedCode___=ao->___length___+1;
        for(i=0; i<ao->___length___; i++) {
-         ARRAYSET(aonew, struct ___TagDescriptor___*, i, 
-                                    ARRAYGET(ao, struct ___TagDescriptor___*, i));
+         ARRAYSET(aonew, struct ___TagDescriptor___*, i,
+                  ARRAYGET(ao, struct ___TagDescriptor___*, i));
        }
        ARRAYSET(aonew, struct ___TagDescriptor___ *, ao->___length___, tagd);
       }
@@ -787,12 +797,12 @@ void tagset(struct ___Object___ * obj,
 #ifdef MULTICORE_GC
       int ptrarray[]={2, (int) ptr, (int) obj, (int)tagd};
       struct ArrayObject * ao=
-                               allocate_newarray(&ptrarray,OBJECTARRAYTYPE,OBJECTARRAYINTERVAL);
+        allocate_newarray(&ptrarray,OBJECTARRAYTYPE,OBJECTARRAYINTERVAL);
       obj=(struct ___Object___ *)ptrarray[2];
       tagd=(struct ___TagDescriptor___ *)ptrarray[3];
 #else
       struct ArrayObject * ao=
-                               allocate_newarray(OBJECTARRAYTYPE,OBJECTARRAYINTERVAL);
+        allocate_newarray(OBJECTARRAYTYPE,OBJECTARRAYINTERVAL);
 #endif
       ARRAYSET(ao, struct ___Object___ *, 0, tagd->flagptr);
       ARRAYSET(ao, struct ___Object___ *, 1, obj);
@@ -807,19 +817,19 @@ void tagset(struct ___Object___ * obj,
 #ifdef MULTICORE_GC
        int ptrarray[]={2, (int) ptr, (int) obj, (int)tagd};
        struct ArrayObject * aonew=
-               allocate_newarray(&ptrarray,OBJECTARRAYTYPE,
-                                             OBJECTARRAYINTERVAL+ao->___length___);
+         allocate_newarray(&ptrarray,OBJECTARRAYTYPE,
+                           OBJECTARRAYINTERVAL+ao->___length___);
        obj=(struct ___Object___ *)ptrarray[2];
        tagd=(struct ___TagDescriptor___ *)ptrarray[3];
        ao=(struct ArrayObject *)tagd->flagptr;
 #else
        struct ArrayObject * aonew=
-               allocate_newarray(OBJECTARRAYTYPE,OBJECTARRAYINTERVAL+ao->___length___);
+         allocate_newarray(OBJECTARRAYTYPE,OBJECTARRAYINTERVAL+ao->___length___);
 #endif
        aonew->___cachedCode___=ao->___cachedCode___+1;
        for(i=0; i<ao->___length___; i++) {
-         ARRAYSET(aonew, struct ___Object___*, i, 
-                                    ARRAYGET(ao, struct ___Object___*, i));
+         ARRAYSET(aonew, struct ___Object___*, i,
+                  ARRAYGET(ao, struct ___Object___*, i));
        }
        ARRAYSET(aonew, struct ___Object___ *, ao->___cachedCode___, obj);
        tagd->flagptr=(struct ___Object___ *) aonew;
@@ -830,12 +840,12 @@ void tagset(struct ___Object___ * obj,
 
 /* This function clears a tag. */
 #ifdef MULTICORE_GC
-void tagclear(void *ptr, 
-                         struct ___Object___ * obj, 
-                                                       struct ___TagDescriptor___ * tagd) {
+void tagclear(void *ptr,
+              struct ___Object___ * obj,
+              struct ___TagDescriptor___ * tagd) {
 #else
-void tagclear(struct ___Object___ * obj, 
-                         struct ___TagDescriptor___ * tagd) {
+void tagclear(struct ___Object___ * obj,
+              struct ___TagDescriptor___ * tagd) {
 #endif
   /* We'll assume that tag is alway there.
      Need to statically check for this of course. */
@@ -849,12 +859,12 @@ void tagclear(struct ___Object___ * obj,
     int i;
     for(i=0; i<ao->___cachedCode___; i++) {
       struct ___TagDescriptor___ * td=
-                               ARRAYGET(ao, struct ___TagDescriptor___ *, i);
+        ARRAYGET(ao, struct ___TagDescriptor___ *, i);
       if (td==tagd) {
        ao->___cachedCode___--;
        if (i<ao->___cachedCode___)
-         ARRAYSET(ao, struct ___TagDescriptor___ *, i, 
-                               ARRAYGET(ao, struct ___TagDescriptor___ *, ao->___cachedCode___));
+         ARRAYSET(ao, struct ___TagDescriptor___ *, i,
+                  ARRAYGET(ao, struct ___TagDescriptor___ *, ao->___cachedCode___));
        ARRAYSET(ao, struct ___TagDescriptor___ *, ao->___cachedCode___, NULL);
        if (ao->___cachedCode___==0)
          obj->___tags___=NULL;
@@ -876,8 +886,8 @@ PROCESSCLEAR:
        if (tobj==obj) {
          ao->___cachedCode___--;
          if (i<ao->___cachedCode___)
-           ARRAYSET(ao, struct ___Object___ *, i, 
-                                       ARRAYGET(ao, struct ___Object___ *, ao->___cachedCode___));
+           ARRAYSET(ao, struct ___Object___ *, i,
+                    ARRAYGET(ao, struct ___Object___ *, ao->___cachedCode___));
          ARRAYSET(ao, struct ___Object___ *, ao->___cachedCode___, NULL);
          if (ao->___cachedCode___==0)
            tagd->flagptr=NULL;
@@ -892,11 +902,11 @@ ENDCLEAR:
 
 /* This function allocates a new tag. */
 #ifdef MULTICORE_GC
-struct ___TagDescriptor___ * allocate_tag(void *ptr, 
-                                                     int index) {
+struct ___TagDescriptor___ * allocate_tag(void *ptr,
+                                          int index) {
   struct ___TagDescriptor___ * v=
-               (struct ___TagDescriptor___ *) FREEMALLOC((struct garbagelist *) ptr, 
-                                                                     classsize[TAGTYPE]);
+    (struct ___TagDescriptor___ *) FREEMALLOC((struct garbagelist *) ptr,
+                                              classsize[TAGTYPE]);
 #else
 struct ___TagDescriptor___ * allocate_tag(int index) {
   struct ___TagDescriptor___ * v=FREEMALLOC(classsize[TAGTYPE]);
@@ -911,21 +921,21 @@ struct ___TagDescriptor___ * allocate_tag(int index) {
 /* This function updates the flag for object ptr.  It or's the flag
    with the or mask and and's it with the andmask. */
 
-void flagbody(struct ___Object___ *ptr, 
-                         int flag, 
-                                                       struct parameterwrapper ** queues, 
-                                                       int length, 
-                                                       bool isnew);
+void flagbody(struct ___Object___ *ptr,
+              int flag,
+              struct parameterwrapper ** queues,
+              int length,
+              bool isnew);
 
 int flagcomp(const int *val1, const int *val2) {
   return (*val1)-(*val2);
 }
 
-void flagorand(void * ptr, 
-                          int ormask, 
-                                                        int andmask, 
-                                                        struct parameterwrapper ** queues, 
-                                                        int length) {
+void flagorand(void * ptr,
+               int ormask,
+               int andmask,
+               struct parameterwrapper ** queues,
+               int length) {
   {
     int oldflag=((int *)ptr)[1];
     int flag=ormask|oldflag;
@@ -934,9 +944,9 @@ void flagorand(void * ptr,
   }
 }
 
-bool intflagorand(void * ptr, 
-                             int ormask, 
-                                                                       int andmask) {
+bool intflagorand(void * ptr,
+                  int ormask,
+                  int andmask) {
   {
     int oldflag=((int *)ptr)[1];
     int flag=ormask|oldflag;
@@ -950,20 +960,20 @@ bool intflagorand(void * ptr,
   }
 }
 
-void flagorandinit(void * ptr, 
-                              int ormask, 
-                                                                        int andmask) {
+void flagorandinit(void * ptr,
+                   int ormask,
+                   int andmask) {
   int oldflag=((int *)ptr)[1];
   int flag=ormask|oldflag;
   flag&=andmask;
   flagbody(ptr,flag,NULL,0,true);
 }
 
-void flagbody(struct ___Object___ *ptr, 
-                         int flag, 
-                                                       struct parameterwrapper ** vqueues, 
-                                                       int vlength, 
-                                                       bool isnew) {
+void flagbody(struct ___Object___ *ptr,
+              int flag,
+              struct parameterwrapper ** vqueues,
+              int vlength,
+              bool isnew) {
   struct parameterwrapper * flagptr = NULL;
   int i = 0;
   struct parameterwrapper ** queues = vqueues;
@@ -973,238 +983,238 @@ void flagbody(struct ___Object___ *ptr,
   int * enterflags = NULL;
   if((!isnew) && (queues == NULL)) {
     if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
-               queues = objectqueues[BAMBOO_NUM_OF_CORE][ptr->type];
-               length = numqueues[BAMBOO_NUM_OF_CORE][ptr->type];
-       } else {
-               return;
-       }
+      queues = objectqueues[BAMBOO_NUM_OF_CORE][ptr->type];
+      length = numqueues[BAMBOO_NUM_OF_CORE][ptr->type];
+    } else {
+      return;
+    }
   }
   ptr->flag=flag;
 
   /*Remove object from all queues */
   for(i = 0; i < length; ++i) {
     flagptr = queues[i];
-    ObjectHashget(flagptr->objectset, (int) ptr, (int *) &next, 
-                                         (int *) &enterflags, &UNUSED, &UNUSED2);
+    ObjectHashget(flagptr->objectset, (int) ptr, (int *) &next,
+                  (int *) &enterflags, &UNUSED, &UNUSED2);
     ObjectHashremove(flagptr->objectset, (int)ptr);
     if (enterflags!=NULL)
       RUNFREE(enterflags);
   }
 }
 
-void enqueueObject(void * vptr, 
-                              struct parameterwrapper ** vqueues, 
-                                                                        int vlength) {
-       struct ___Object___ *ptr = (struct ___Object___ *)vptr;
-       
-       {
-               //struct QueueItem *tmpptr;
-               struct parameterwrapper * parameter=NULL;
-               int j;
-               int i;
-               struct parameterwrapper * prevptr=NULL;
-               struct ___Object___ *tagptr=NULL;
-               struct parameterwrapper ** queues = vqueues;
-               int length = vlength;
-               if(BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1) {
-                       return;
-               }
-               if(queues == NULL) {
-                       queues = objectqueues[BAMBOO_NUM_OF_CORE][ptr->type];
-                       length = numqueues[BAMBOO_NUM_OF_CORE][ptr->type];
-               }
-               tagptr=ptr->___tags___;
-
-               /* Outer loop iterates through all parameter queues an object of
-                  this type could be in.  */
-               for(j = 0; j < length; ++j) {
-                       parameter = queues[j];     
-                       /* Check tags */
-                       if (parameter->numbertags>0) {
-                               if (tagptr==NULL)
-                                       goto nextloop; //that means the object has no tag 
-                                                //but that param needs tag
-                               else if(tagptr->type==TAGTYPE) { //one tag
-                                       //struct ___TagDescriptor___ * tag=
-                                       //(struct ___TagDescriptor___*) tagptr;  
-                                       for(i=0; i<parameter->numbertags; i++) {
-                                               //slotid is parameter->tagarray[2*i];
-                                               int tagid=parameter->tagarray[2*i+1];
-                                               if (tagid!=tagptr->flag)
-                                                       goto nextloop; /*We don't have this tag */
-                                       }
-                               } else { //multiple tags
-                                       struct ArrayObject * ao=(struct ArrayObject *) tagptr;
-                                       for(i=0; i<parameter->numbertags; i++) {
-                                               //slotid is parameter->tagarray[2*i];
-                                               int tagid=parameter->tagarray[2*i+1];
-                                               int j;
-                                               for(j=0; j<ao->___cachedCode___; j++) {
-                                                       if (tagid==ARRAYGET(ao, struct ___TagDescriptor___*, j)->flag)
-                                                               goto foundtag;
-                                               }
-                                               goto nextloop;
+void enqueueObject(void * vptr,
+                   struct parameterwrapper ** vqueues,
+                   int vlength) {
+  struct ___Object___ *ptr = (struct ___Object___ *)vptr;
+
+  {
+    //struct QueueItem *tmpptr;
+    struct parameterwrapper * parameter=NULL;
+    int j;
+    int i;
+    struct parameterwrapper * prevptr=NULL;
+    struct ___Object___ *tagptr=NULL;
+    struct parameterwrapper ** queues = vqueues;
+    int length = vlength;
+    if(BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1) {
+      return;
+    }
+    if(queues == NULL) {
+      queues = objectqueues[BAMBOO_NUM_OF_CORE][ptr->type];
+      length = numqueues[BAMBOO_NUM_OF_CORE][ptr->type];
+    }
+    tagptr=ptr->___tags___;
+
+    /* Outer loop iterates through all parameter queues an object of
+       this type could be in.  */
+    for(j = 0; j < length; ++j) {
+      parameter = queues[j];
+      /* Check tags */
+      if (parameter->numbertags>0) {
+       if (tagptr==NULL)
+         goto nextloop;                               //that means the object has no tag
+       //but that param needs tag
+       else if(tagptr->type==TAGTYPE) {                         //one tag
+         //struct ___TagDescriptor___ * tag=
+         //(struct ___TagDescriptor___*) tagptr;
+         for(i=0; i<parameter->numbertags; i++) {
+           //slotid is parameter->tagarray[2*i];
+           int tagid=parameter->tagarray[2*i+1];
+           if (tagid!=tagptr->flag)
+             goto nextloop;                                           /*We don't have this tag */
+         }
+       } else {                         //multiple tags
+         struct ArrayObject * ao=(struct ArrayObject *) tagptr;
+         for(i=0; i<parameter->numbertags; i++) {
+           //slotid is parameter->tagarray[2*i];
+           int tagid=parameter->tagarray[2*i+1];
+           int j;
+           for(j=0; j<ao->___cachedCode___; j++) {
+             if (tagid==ARRAYGET(ao, struct ___TagDescriptor___*, j)->flag)
+               goto foundtag;
+           }
+           goto nextloop;
 foundtag:
-                                               ;
-                                       }
-                               }
-                       }
-       
-                       /* Check flags */
-                       for(i=0; i<parameter->numberofterms; i++) {
-                               int andmask=parameter->intarray[i*2];
-                               int checkmask=parameter->intarray[i*2+1];
-                               if ((ptr->flag&andmask)==checkmask) {
-                                       enqueuetasks(parameter, prevptr, ptr, NULL, 0);
-                                       prevptr=parameter;
-                                       break;
-                               }
-                       }
-nextloop:
-                       ;
-               }
+           ;
+         }
        }
+      }
+
+      /* Check flags */
+      for(i=0; i<parameter->numberofterms; i++) {
+       int andmask=parameter->intarray[i*2];
+       int checkmask=parameter->intarray[i*2+1];
+       if ((ptr->flag&andmask)==checkmask) {
+         enqueuetasks(parameter, prevptr, ptr, NULL, 0);
+         prevptr=parameter;
+         break;
+       }
+      }
+nextloop:
+      ;
+    }
+  }
 }
 
-void enqueueObject_I(void * vptr, 
-                                struct parameterwrapper ** vqueues, 
-                                                                                int vlength) {
-       struct ___Object___ *ptr = (struct ___Object___ *)vptr;
-       
-       {
-               //struct QueueItem *tmpptr;
-               struct parameterwrapper * parameter=NULL;
-               int j;
-               int i;
-               struct parameterwrapper * prevptr=NULL;
-               struct ___Object___ *tagptr=NULL;
-               struct parameterwrapper ** queues = vqueues;
-               int length = vlength;
-               if(BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1) {
-                       return;
-               }
-               if(queues == NULL) {
-                       queues = objectqueues[BAMBOO_NUM_OF_CORE][ptr->type];
-                       length = numqueues[BAMBOO_NUM_OF_CORE][ptr->type];
-               }
-               tagptr=ptr->___tags___;
-
-               /* Outer loop iterates through all parameter queues an object of
-                  this type could be in.  */
-               for(j = 0; j < length; ++j) {
-                       parameter = queues[j];     
-                       /* Check tags */
-                       if (parameter->numbertags>0) {
-                               if (tagptr==NULL)
-                                       goto nextloop; //that means the object has no tag 
-                                                //but that param needs tag
-                               else if(tagptr->type==TAGTYPE) { //one tag
-                                       //struct ___TagDescriptor___ * tag=(struct ___TagDescriptor___*) tagptr;         
-                                       for(i=0; i<parameter->numbertags; i++) {
-                                               //slotid is parameter->tagarray[2*i];
-                                               int tagid=parameter->tagarray[2*i+1];
-                                               if (tagid!=tagptr->flag)
-                                                       goto nextloop; /*We don't have this tag */
-                                       }
-                               } else { //multiple tags
-                                       struct ArrayObject * ao=(struct ArrayObject *) tagptr;
-                                       for(i=0; i<parameter->numbertags; i++) {
-                                               //slotid is parameter->tagarray[2*i];
-                                               int tagid=parameter->tagarray[2*i+1];
-                                               int j;
-                                               for(j=0; j<ao->___cachedCode___; j++) {
-                                                       if (tagid==ARRAYGET(ao, struct ___TagDescriptor___*, j)->flag)
-                                                               goto foundtag;
-                                               }
-                                               goto nextloop;
+void enqueueObject_I(void * vptr,
+                     struct parameterwrapper ** vqueues,
+                     int vlength) {
+  struct ___Object___ *ptr = (struct ___Object___ *)vptr;
+
+  {
+    //struct QueueItem *tmpptr;
+    struct parameterwrapper * parameter=NULL;
+    int j;
+    int i;
+    struct parameterwrapper * prevptr=NULL;
+    struct ___Object___ *tagptr=NULL;
+    struct parameterwrapper ** queues = vqueues;
+    int length = vlength;
+    if(BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1) {
+      return;
+    }
+    if(queues == NULL) {
+      queues = objectqueues[BAMBOO_NUM_OF_CORE][ptr->type];
+      length = numqueues[BAMBOO_NUM_OF_CORE][ptr->type];
+    }
+    tagptr=ptr->___tags___;
+
+    /* Outer loop iterates through all parameter queues an object of
+       this type could be in.  */
+    for(j = 0; j < length; ++j) {
+      parameter = queues[j];
+      /* Check tags */
+      if (parameter->numbertags>0) {
+       if (tagptr==NULL)
+         goto nextloop;                               //that means the object has no tag
+       //but that param needs tag
+       else if(tagptr->type==TAGTYPE) {                         //one tag
+         //struct ___TagDescriptor___ * tag=(struct ___TagDescriptor___*) tagptr;
+         for(i=0; i<parameter->numbertags; i++) {
+           //slotid is parameter->tagarray[2*i];
+           int tagid=parameter->tagarray[2*i+1];
+           if (tagid!=tagptr->flag)
+             goto nextloop;                                           /*We don't have this tag */
+         }
+       } else {                         //multiple tags
+         struct ArrayObject * ao=(struct ArrayObject *) tagptr;
+         for(i=0; i<parameter->numbertags; i++) {
+           //slotid is parameter->tagarray[2*i];
+           int tagid=parameter->tagarray[2*i+1];
+           int j;
+           for(j=0; j<ao->___cachedCode___; j++) {
+             if (tagid==ARRAYGET(ao, struct ___TagDescriptor___*, j)->flag)
+               goto foundtag;
+           }
+           goto nextloop;
 foundtag:
-                                               ;
-                                       }
-                               }
-                       }
-
-                       /* Check flags */
-                       for(i=0; i<parameter->numberofterms; i++) {
-                               int andmask=parameter->intarray[i*2];
-                               int checkmask=parameter->intarray[i*2+1];
-                               if ((ptr->flag&andmask)==checkmask) {
-                                       enqueuetasks_I(parameter, prevptr, ptr, NULL, 0);
-                                       prevptr=parameter;
-                                       break;
-                               }
-                       }
-nextloop:
-                       ;
-               }
+           ;
+         }
        }
+      }
+
+      /* Check flags */
+      for(i=0; i<parameter->numberofterms; i++) {
+       int andmask=parameter->intarray[i*2];
+       int checkmask=parameter->intarray[i*2+1];
+       if ((ptr->flag&andmask)==checkmask) {
+         enqueuetasks_I(parameter, prevptr, ptr, NULL, 0);
+         prevptr=parameter;
+         break;
+       }
+      }
+nextloop:
+      ;
+    }
+  }
 }
 
 
-int * getAliasLock(void ** ptrs, 
-                              int length, 
-                                                                        struct RuntimeHash * tbl) {
-       if(length == 0) {
-               return (int*)(RUNMALLOC(sizeof(int)));
+int * getAliasLock(void ** ptrs,
+                   int length,
+                   struct RuntimeHash * tbl) {
+  if(length == 0) {
+    return (int*)(RUNMALLOC(sizeof(int)));
+  } else {
+    int i = 0;
+    int locks[length];
+    int locklen = 0;
+    bool redirect = false;
+    int redirectlock = 0;
+    for(; i < length; i++) {
+      struct ___Object___ * ptr = (struct ___Object___ *)(ptrs[i]);
+      int lock = 0;
+      int j = 0;
+      if(ptr->lock == NULL) {
+       lock = (int)(ptr);
+      } else {
+       lock = (int)(ptr->lock);
+      }
+      if(redirect) {
+       if(lock != redirectlock) {
+         RuntimeHashadd(tbl, lock, redirectlock);
+       }
+      } else {
+       if(RuntimeHashcontainskey(tbl, lock)) {
+         // already redirected
+         redirect = true;
+         RuntimeHashget(tbl, lock, &redirectlock);
+         for(; j < locklen; j++) {
+           if(locks[j] != redirectlock) {
+             RuntimeHashadd(tbl, locks[j], redirectlock);
+           }
+         }
        } else {
-               int i = 0;
-               int locks[length];
-               int locklen = 0;
-               bool redirect = false;
-               int redirectlock = 0;
-               for(; i < length; i++) {
-                       struct ___Object___ * ptr = (struct ___Object___ *)(ptrs[i]);
-                       int lock = 0;
-                       int j = 0;
-                       if(ptr->lock == NULL) {
-                               lock = (int)(ptr);
-                       } else {
-                               lock = (int)(ptr->lock);
-                       }
-                       if(redirect) {
-                               if(lock != redirectlock) {
-                                       RuntimeHashadd(tbl, lock, redirectlock);
-                               }
-                       } else {
-                               if(RuntimeHashcontainskey(tbl, lock)) {
-                                       // already redirected
-                                       redirect = true;
-                                       RuntimeHashget(tbl, lock, &redirectlock);
-                                       for(; j < locklen; j++) {
-                                               if(locks[j] != redirectlock) {
-                                                       RuntimeHashadd(tbl, locks[j], redirectlock);
-                                               }
-                                       }
-                               } else {
-                                       bool insert = true;
-                                       for(j = 0; j < locklen; j++) {
-                                               if(locks[j] == lock) {
-                                                       insert = false;
-                                                       break;
-                                               } else if(locks[j] > lock) {
-                                                       break;
-                                               }
-                                       }
-                                       if(insert) {
-                                               int h = locklen;
-                                               for(; h > j; h--) {
-                                                       locks[h] = locks[h-1];
-                                               }       
-                                               locks[j] = lock;
-                                               locklen++;
-                                       }
-                               }
-                       }
-               }
-               if(redirect) {
-                       return (int *)redirectlock;
-               } else {
-                       return (int *)(locks[0]);
-               }
+         bool insert = true;
+         for(j = 0; j < locklen; j++) {
+           if(locks[j] == lock) {
+             insert = false;
+             break;
+           } else if(locks[j] > lock) {
+             break;
+           }
+         }
+         if(insert) {
+           int h = locklen;
+           for(; h > j; h--) {
+             locks[h] = locks[h-1];
+           }
+           locks[j] = lock;
+           locklen++;
+         }
        }
+      }
+    }
+    if(redirect) {
+      return (int *)redirectlock;
+    } else {
+      return (int *)(locks[0]);
+    }
+  }
 }
 
-void addAliasLock(void * ptr, 
-                             int lock) {
+void addAliasLock(void * ptr,
+                  int lock) {
   struct ___Object___ * obj = (struct ___Object___ *)ptr;
   if(((int)ptr != lock) && (obj->lock != (int*)lock)) {
     // originally no alias lock associated or have a different alias lock
@@ -1215,227 +1225,254 @@ void addAliasLock(void * ptr,
 
 #ifdef PROFILE
 inline void setTaskExitIndex(int index) {
-       taskInfoArray[taskInfoIndex]->exitIndex = index;
+  taskInfoArray[taskInfoIndex]->exitIndex = index;
 }
 
 inline void addNewObjInfo(void * nobj) {
-       if(taskInfoArray[taskInfoIndex]->newObjs == NULL) {
-               taskInfoArray[taskInfoIndex]->newObjs = createQueue();
-       }
-       addNewItem(taskInfoArray[taskInfoIndex]->newObjs, nobj);
+  if(taskInfoArray[taskInfoIndex]->newObjs == NULL) {
+    taskInfoArray[taskInfoIndex]->newObjs = createQueue();
+  }
+  addNewItem(taskInfoArray[taskInfoIndex]->newObjs, nobj);
 }
 #endif
 
 #ifdef MULTICORE_GC
 void * localmalloc_I(int coren,
-                                int isize,
-                                int * allocsize) {
-       void * mem = NULL;
-       int i = 0;
-       int j = 0;
-       int tofindb = gc_core2block[2*coren+i]+(NUMCORES4GC*2)*j;
-       int totest = tofindb;
-       int bound = BAMBOO_SMEM_SIZE_L;
-       int foundsmem = 0;
-       int size = 0;
-       do {
-               bound = (totest < NUMCORES4GC) ? BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
-               int nsize = bamboo_smemtbl[totest];
-               bool islocal = true;
-               if(nsize < bound) {
-                       bool tocheck = true;
-                       // have some space in the block
-                       if(totest == tofindb) {
-                               // the first partition
-                               size = bound - nsize;
-                       } else if(nsize == 0) {
-                               // an empty partition, can be appended
-                               size += bound;
-                       } else {
-                               // not an empty partition, can not be appended
-                               // the last continuous block is not big enough, go to check the next
-                               // local block
-                               islocal = true;
-                               tocheck = false;
-                       } // if(totest == tofindb) else if(nsize == 0) else ...
-                       if(tocheck) {
-                               if(size >= isize) {
-                                       // have enough space in the block, malloc
-                                       foundsmem = 1;
-                                       break;
-                               } else {
-                                       // no enough space yet, try to append next continuous block
-                                       islocal = false;
-                               } // if(size > isize) else ...
-                       } // if(tocheck)
-               } // if(nsize < bound)
-               if(islocal) {
-                       // no space in the block, go to check the next block
-                       i++;
-                       if(2==i) {
-                               i = 0;
-                               j++;
-                       }
-                       tofindb = totest = gc_core2block[2*coren+i]+(NUMCORES4GC*2)*j;
-               } else {
-                       totest += 1;
-               } // if(islocal) else ...
-               if(totest > gcnumblock-1-bamboo_reserved_smem) {
-                       // no more local mem, do not find suitable block
-                       foundsmem = 2;
-                       break;
-               } // if(totest > gcnumblock-1-bamboo_reserved_smem) ...
-       } while(true);
-
-       if(foundsmem == 1) {
-               // find suitable block
-               mem = gcbaseva+bamboo_smemtbl[tofindb]+((tofindb<NUMCORES4GC)?
-                               (BAMBOO_SMEM_SIZE_L*tofindb):(BAMBOO_LARGE_SMEM_BOUND+
-                                       (tofindb-NUMCORES4GC)*BAMBOO_SMEM_SIZE));
-               *allocsize = size;
-               // set bamboo_smemtbl
-               for(i = tofindb; i <= totest; i++) {
-                       bamboo_smemtbl[i]=(i<NUMCORES4GC)?BAMBOO_SMEM_SIZE_L:BAMBOO_SMEM_SIZE;
-               }
-       } else if(foundsmem == 2) {
-               // no suitable block
-               *allocsize = 0;
-       }
+                     int isize,
+                     int * allocsize) {
+  void * mem = NULL;
+  int i = 0;
+  int j = 0;
+  int tofindb = gc_core2block[2*coren+i]+(NUMCORES4GC*2)*j;
+  int totest = tofindb;
+  int bound = BAMBOO_SMEM_SIZE_L;
+  int foundsmem = 0;
+  int size = 0;
+  do {
+    bound = (totest < NUMCORES4GC) ? BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
+    int nsize = bamboo_smemtbl[totest];
+    bool islocal = true;
+    if(nsize < bound) {
+      bool tocheck = true;
+      // have some space in the block
+      if(totest == tofindb) {
+       // the first partition
+       size = bound - nsize;
+      } else if(nsize == 0) {
+       // an empty partition, can be appended
+       size += bound;
+      } else {
+       // not an empty partition, can not be appended
+       // the last continuous block is not big enough, go to check the next
+       // local block
+       islocal = true;
+       tocheck = false;
+      }                   // if(totest == tofindb) else if(nsize == 0) else ...
+      if(tocheck) {
+       if(size >= isize) {
+         // have enough space in the block, malloc
+         foundsmem = 1;
+         break;
+       } else {
+         // no enough space yet, try to append next continuous block
+         islocal = false;
+       }                         // if(size > isize) else ...
+      }                   // if(tocheck)
+    }             // if(nsize < bound)
+    if(islocal) {
+      // no space in the block, go to check the next block
+      i++;
+      if(2==i) {
+       i = 0;
+       j++;
+      }
+      tofindb = totest = gc_core2block[2*coren+i]+(NUMCORES4GC*2)*j;
+    } else {
+      totest += 1;
+    }             // if(islocal) else ...
+    if(totest > gcnumblock-1-bamboo_reserved_smem) {
+      // no more local mem, do not find suitable block
+      foundsmem = 2;
+      break;
+    }             // if(totest > gcnumblock-1-bamboo_reserved_smem) ...
+  } while(true);
+
+  if(foundsmem == 1) {
+    // find suitable block
+    mem = gcbaseva+bamboo_smemtbl[tofindb]+((tofindb<NUMCORES4GC) ?
+                                            (BAMBOO_SMEM_SIZE_L*tofindb) : (BAMBOO_LARGE_SMEM_BOUND+
+                                                                            (tofindb-NUMCORES4GC)*BAMBOO_SMEM_SIZE));
+    *allocsize = size;
+    // set bamboo_smemtbl
+    for(i = tofindb; i <= totest; i++) {
+      bamboo_smemtbl[i]=(i<NUMCORES4GC) ? BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
+    }
+  } else if(foundsmem == 2) {
+    // no suitable block
+    *allocsize = 0;
+  }
 
-       return mem;
+  return mem;
 } // void * localmalloc_I(int, int, int *)
 
 void * globalmalloc_I(int coren,
-                                 int isize,
-                                 int * allocsize) {
-       void * mem = NULL;
-       int tofindb = bamboo_free_block; //0;
-       int totest = tofindb;
-       int bound = BAMBOO_SMEM_SIZE_L;
-       int foundsmem = 0;
-       int size = 0;
-       if(tofindb > gcnumblock-1-bamboo_reserved_smem) {
-               *allocsize = 0;
-               return NULL;
-       }
-       do {
-               bound = (totest < NUMCORES4GC) ? BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
-               int nsize = bamboo_smemtbl[totest];
-               bool isnext = false;
-               if(nsize < bound) {
-                       bool tocheck = true;
-                       // have some space in the block
-                       if(totest == tofindb) {
-                               // the first partition
-                               size = bound - nsize;
-                       } else if(nsize == 0) {
-                               // an empty partition, can be appended
-                               size += bound;
-                       } else {
-                               // not an empty partition, can not be appended
-                               // the last continuous block is not big enough, start another block
-                               isnext = true;
-                               tocheck = false;
-                       } // if(totest == tofindb) else if(nsize == 0) else ...
-                       if(tocheck) {
-                               if(size >= isize) {
-                                       // have enough space in the block, malloc
-                                       foundsmem = 1;
-                                       break;
-                               } // if(size > isize) 
-                       } // if(tocheck)
-               } else {
-                       isnext = true;
-               }// if(nsize < bound) else ...
-               totest += 1;
-               if(totest > gcnumblock-1-bamboo_reserved_smem) {
-                       // no more local mem, do not find suitable block
-                       foundsmem = 2;
-                       break;
-               } // if(totest > gcnumblock-1-bamboo_reserved_smem) ...
-               if(isnext) {
-                       // start another block
-                       tofindb = totest;
-               } // if(islocal) 
-       } while(true);
-
-       if(foundsmem == 1) {
-               // find suitable block
-               mem = gcbaseva+bamboo_smemtbl[tofindb]+((tofindb<NUMCORES4GC)?
-                                 (BAMBOO_SMEM_SIZE_L*tofindb):(BAMBOO_LARGE_SMEM_BOUND+
-                                       (tofindb-NUMCORES4GC)*BAMBOO_SMEM_SIZE));
-               *allocsize = size;
-               // set bamboo_smemtbl
-               for(int i = tofindb; i <= totest; i++) {
-                       bamboo_smemtbl[i]=(i<NUMCORES4GC)?BAMBOO_SMEM_SIZE_L:BAMBOO_SMEM_SIZE;
-               }
-               if(tofindb == bamboo_free_block) {
-                       bamboo_free_block = totest+1;
-               }
-       } else if(foundsmem == 2) {
-               // no suitable block
-               *allocsize = 0;
-               mem = NULL;
-       }
+                      int isize,
+                      int * allocsize) {
+  void * mem = NULL;
+  int tofindb = bamboo_free_block;       //0;
+  int totest = tofindb;
+  int bound = BAMBOO_SMEM_SIZE_L;
+  int foundsmem = 0;
+  int size = 0;
+  if(tofindb > gcnumblock-1-bamboo_reserved_smem) {
+    *allocsize = 0;
+    return NULL;
+  }
+  do {
+    bound = (totest < NUMCORES4GC) ? BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
+    int nsize = bamboo_smemtbl[totest];
+    bool isnext = false;
+    if(nsize < bound) {
+      bool tocheck = true;
+      // have some space in the block
+      if(totest == tofindb) {
+       // the first partition
+       size = bound - nsize;
+      } else if(nsize == 0) {
+       // an empty partition, can be appended
+       size += bound;
+      } else {
+       // not an empty partition, can not be appended
+       // the last continuous block is not big enough, start another block
+       isnext = true;
+       tocheck = false;
+      }                   // if(totest == tofindb) else if(nsize == 0) else ...
+      if(tocheck) {
+       if(size >= isize) {
+         // have enough space in the block, malloc
+         foundsmem = 1;
+         break;
+       }                         // if(size > isize)
+      }                   // if(tocheck)
+    } else {
+      isnext = true;
+    }            // if(nsize < bound) else ...
+    totest += 1;
+    if(totest > gcnumblock-1-bamboo_reserved_smem) {
+      // no more local mem, do not find suitable block
+      foundsmem = 2;
+      break;
+    }             // if(totest > gcnumblock-1-bamboo_reserved_smem) ...
+    if(isnext) {
+      // start another block
+      tofindb = totest;
+    }             // if(islocal)
+  } while(true);
+
+  if(foundsmem == 1) {
+    // find suitable block
+    mem = gcbaseva+bamboo_smemtbl[tofindb]+((tofindb<NUMCORES4GC) ?
+                                            (BAMBOO_SMEM_SIZE_L*tofindb) : (BAMBOO_LARGE_SMEM_BOUND+
+                                                                            (tofindb-NUMCORES4GC)*BAMBOO_SMEM_SIZE));
+    *allocsize = size;
+    // set bamboo_smemtbl
+    for(int i = tofindb; i <= totest; i++) {
+      bamboo_smemtbl[i]=(i<NUMCORES4GC) ? BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
+    }
+    if(tofindb == bamboo_free_block) {
+      bamboo_free_block = totest+1;
+    }
+  } else if(foundsmem == 2) {
+    // no suitable block
+    *allocsize = 0;
+    mem = NULL;
+  }
 
-       return mem;
+  return mem;
 } // void * globalmalloc_I(int, int, int *)
 #endif // #ifdef MULTICORE_GC
 
 // malloc from the shared memory
 void * smemalloc_I(int coren,
-                              int size, 
-                              int * allocsize) {
-       void * mem = NULL;
+                   int size,
+                   int * allocsize) {
+  void * mem = NULL;
 #ifdef MULTICORE_GC
-       int isize = size+(BAMBOO_CACHE_LINE_SIZE);
+  int isize = size+(BAMBOO_CACHE_LINE_SIZE);
 
-       // go through the bamboo_smemtbl for suitable partitions
-       switch(bamboo_smem_mode) {
-               case SMEMLOCAL: {
-                 mem = localmalloc_I(coren, isize, allocsize);
-                       break;
-         }
+  // go through the bamboo_smemtbl for suitable partitions
+  switch(bamboo_smem_mode) {
+  case SMEMLOCAL: {
+    mem = localmalloc_I(coren, isize, allocsize);
+    break;
+  }
 
-               case SMEMFIXED: {
-                       // TODO not supported yet
-                       BAMBOO_EXIT(0xe001);
-                       break;
-               }
-
-               case SMEMMIXED: {
-                       // TODO not supported yet
-                       BAMBOO_EXIT(0xe002);
-                       break;
-               }
-
-               case SMEMGLOBAL: {
-                       mem = globalmalloc_I(coren, isize, allocsize);
-                       break;
-               }
-
-               default:
-                       break;
-       }
+  case SMEMFIXED: {
+    // TODO not supported yet
+    BAMBOO_EXIT(0xe001);
+    break;
+  }
+
+  case SMEMMIXED: {
+    // TODO not supported yet
+    BAMBOO_EXIT(0xe002);
+    break;
+  }
 
-       if(mem == NULL) {
+  case SMEMGLOBAL: {
+    mem = globalmalloc_I(coren, isize, allocsize);
+    break;
+  }
+
+  default:
+    break;
+  }
+
+  if(mem == NULL) {
 #else
-       int toallocate = (size>(BAMBOO_SMEM_SIZE)) ? (size):(BAMBOO_SMEM_SIZE);
-       mem = mspace_calloc(bamboo_free_msp, 1, toallocate);
-       *allocsize = toallocate;
-       if(mem == NULL) {
+  // TODO
+#ifdef PROFILE
+  /*if(!interruptInfoOverflow) {
+    InterruptInfo* intInfo = RUNMALLOC_I(sizeof(struct interrupt_info));
+    interruptInfoArray[interruptInfoIndex] = intInfo;
+    intInfo->startTime = BAMBOO_GET_EXE_TIME();
+    intInfo->endTime = -1;
+  }*/
+#endif  
+  int toallocate = (size>(BAMBOO_SMEM_SIZE)) ? (size) : (BAMBOO_SMEM_SIZE);
+  //mem = mspace_calloc(bamboo_free_msp, 1, toallocate);
+  if(toallocate > bamboo_free_smem_size) {
+       // no enough mem
+       mem = NULL;
+  } else {
+       mem = (void *)bamboo_free_smemp;
+       bamboo_free_smemp = ((void*)bamboo_free_smemp) + toallocate;
+       bamboo_free_smem_size -= toallocate;
+       //BAMBOO_MEMSET_WH(mem, '\0', toallocate);
+  }
+  *allocsize = toallocate;
+#ifdef PROFILE
+  /*if(!interruptInfoOverflow) {
+    interruptInfoArray[interruptInfoIndex]->endTime=BAMBOO_GET_EXE_TIME();
+    interruptInfoIndex++;
+    if(interruptInfoIndex == INTERRUPTINFOLENGTH) {
+      interruptInfoOverflow = true;
+    }
+  }*/
 #endif
-               // no enough shared global memory
-               *allocsize = 0;
+  if(mem == NULL) {
+#endif // MULTICORE_GC
+    // no enough shared global memory
+    *allocsize = 0;
 #ifdef MULTICORE_GC
-               gcflag = true;
-               return NULL;
+    gcflag = true;
+    return NULL;
 #else
-               BAMBOO_DEBUGPRINT(0xa001);
-               BAMBOO_EXIT(0xa001);
+    BAMBOO_DEBUGPRINT(0xa001);
+    BAMBOO_EXIT(0xa001);
 #endif
-       }
-       return mem;
+  }
+  return mem;
 }  // void * smemalloc_I(int, int, int)
 
 INLINE int checkMsgLength_I(int size) {
@@ -1444,102 +1481,109 @@ INLINE int checkMsgLength_I(int size) {
   BAMBOO_DEBUGPRINT(0xcccc);
 #endif
 #endif
-       int type = msgdata[msgdataindex];
-       switch(type) {
-               case STATUSCONFIRM:
-               case TERMINATE:
+  int type = msgdata[msgdataindex];
+  switch(type) {
+  case STATUSCONFIRM:
+  case TERMINATE:
 #ifdef MULTICORE_GC
-               case GCSTARTINIT: 
-               case GCSTART: 
-               case GCSTARTFLUSH: 
-               case GCFINISH: 
-               case GCMARKCONFIRM: 
-               case GCLOBJREQUEST: 
-#endif 
-               {
-                       msglength = 1;
-                       break;
-               }
-               case PROFILEOUTPUT:
-               case PROFILEFINISH:
+  case GCSTARTINIT:
+  case GCSTART:
+  case GCSTARTFLUSH:
+  case GCFINISH:
+  case GCMARKCONFIRM:
+  case GCLOBJREQUEST:
+#endif
+    {
+      msglength = 1;
+      break;
+    }
+
+  case PROFILEOUTPUT:
+  case PROFILEFINISH:
 #ifdef MULTICORE_GC
-               case GCSTARTCOMPACT:
-               case GCFINISHINIT: 
-               case GCFINISHFLUSH: 
-               case GCMARKEDOBJ: 
-#endif
-               {
-                       msglength = 2;
-                       break;
-               }
-               case MEMREQUEST: 
-               case MEMRESPONSE:
+  case GCSTARTCOMPACT:
+  case GCFINISHINIT:
+  case GCFINISHFLUSH:
+  case GCMARKEDOBJ:
+#endif
+    {
+      msglength = 2;
+      break;
+    }
+
+  case MEMREQUEST:
+  case MEMRESPONSE:
 #ifdef MULTICORE_GC
-               case GCMAPREQUEST: 
-               case GCMAPINFO: 
-               case GCLOBJMAPPING: 
-#endif 
-               {
-                       msglength = 3;
-                       break;
-               }
-               case TRANSTALL:
-               case LOCKGROUNT:
-               case LOCKDENY:
-               case LOCKRELEASE:
-               case REDIRECTGROUNT:
-               case REDIRECTDENY:
-               case REDIRECTRELEASE:
+  case GCMAPREQUEST:
+  case GCMAPINFO:
+  case GCLOBJMAPPING:
+#endif
+    {
+      msglength = 3;
+      break;
+    }
+
+  case TRANSTALL:
+  case LOCKGROUNT:
+  case LOCKDENY:
+  case LOCKRELEASE:
+  case REDIRECTGROUNT:
+  case REDIRECTDENY:
+  case REDIRECTRELEASE:
 #ifdef MULTICORE_GC
-               case GCFINISHMARK:
-               case GCMOVESTART:
-#endif
-               { 
-                       msglength = 4;
-                       break;
-               }
-               case LOCKREQUEST:
-               case STATUSREPORT:
+  case GCFINISHMARK:
+  case GCMOVESTART:
+#endif
+    {
+      msglength = 4;
+      break;
+    }
+
+  case LOCKREQUEST:
+  case STATUSREPORT:
 #ifdef MULTICORE_GC
-               case GCFINISHCOMPACT:
-               case GCMARKREPORT: 
-#endif 
-               {
-                       msglength = 5;
-                       break;
-               }
-               case REDIRECTLOCK: 
-               {
-                       msglength = 6;
-                       break;
-               }
-               case TRANSOBJ:  // nonfixed size
+  case GCFINISHCOMPACT:
+  case GCMARKREPORT:
+#endif
+    {
+      msglength = 5;
+      break;
+    }
+
+  case REDIRECTLOCK:
+  {
+    msglength = 6;
+    break;
+  }
+
+  case TRANSOBJ:                // nonfixed size
 #ifdef MULTICORE_GC
-               case GCLOBJINFO: 
-#endif
-               { // nonfixed size 
-                       if(size > 1) {
-                               msglength = msgdata[msgdataindex+1];
-                       } else {
-                               return -1;
-                       }
-                       break;
-               }
-               default: 
-               {
-                       BAMBOO_DEBUGPRINT_REG(type);
-                       BAMBOO_DEBUGPRINT_REG(msgdataindex);
-                       int i = 6;
-                       while(i-- > 0) {
-                               BAMBOO_DEBUGPRINT(msgdata[msgdataindex+i]);
-                       }
-                       BAMBOO_EXIT(0xd005);
-                       break;
-               }
-       }
+  case GCLOBJINFO:
+#endif
+    {             // nonfixed size
+      if(size > 1) {
+       msglength = msgdata[msgdataindex+1];
+      } else {
+       return -1;
+      }
+      break;
+    }
+
+  default:
+  {
+    BAMBOO_DEBUGPRINT_REG(type);
+    BAMBOO_DEBUGPRINT_REG(msgdataindex);
+    int i = 6;
+    while(i-- > 0) {
+      BAMBOO_DEBUGPRINT(msgdata[msgdataindex+i]);
+    }
+    BAMBOO_EXIT(0xd005);
+    break;
+  }
+  }
 #ifdef DEBUG
 #ifndef TILERA
-       BAMBOO_DEBUGPRINT_REG(msgdata[msgdataindex]);
+  BAMBOO_DEBUGPRINT_REG(msgdata[msgdataindex]);
 #endif
 #endif
 #ifdef DEBUG
@@ -1547,819 +1591,836 @@ INLINE int checkMsgLength_I(int size) {
   BAMBOO_DEBUGPRINT(0xffff);
 #endif
 #endif
-       return msglength;
+  return msglength;
 }
 
 INLINE void processmsg_transobj_I() {
-       MSG_INDEXINC_I();
-       struct transObjInfo * transObj = RUNMALLOC_I(sizeof(struct transObjInfo));
-       int k = 0;
+  MSG_INDEXINC_I();
+  struct transObjInfo * transObj = RUNMALLOC_I(sizeof(struct transObjInfo));
+  int k = 0;
 #ifdef DEBUG
 #ifndef CLOSE_PRINT
-       BAMBOO_DEBUGPRINT(0xe880);
+  BAMBOO_DEBUGPRINT(0xe880);
 #endif
 #endif
-       if(BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1) {
+  if(BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1) {
 #ifndef CLOSE_PRINT
-               BAMBOO_DEBUGPRINT_REG(msgdata[msgdataindex]/*[2]*/);
-#endif
-               BAMBOO_EXIT(0xa002);
-       } 
-       // store the object and its corresponding queue info, enqueue it later
-       transObj->objptr = (void *)msgdata[msgdataindex]; //[2]
-       MSG_INDEXINC_I();
-       transObj->length = (msglength - 3) / 2;
-       transObj->queues = RUNMALLOC_I(sizeof(int)*(msglength - 3));
-       for(k = 0; k < transObj->length; ++k) {
-               transObj->queues[2*k] = msgdata[msgdataindex]; //[3+2*k];
-               MSG_INDEXINC_I();
+    BAMBOO_DEBUGPRINT_REG(msgdata[msgdataindex] /*[2]*/);
+#endif
+    BAMBOO_EXIT(0xa002);
+  }
+  // store the object and its corresponding queue info, enqueue it later
+  transObj->objptr = (void *)msgdata[msgdataindex];       //[2]
+  MSG_INDEXINC_I();
+  transObj->length = (msglength - 3) / 2;
+  transObj->queues = RUNMALLOC_I(sizeof(int)*(msglength - 3));
+  for(k = 0; k < transObj->length; ++k) {
+    transObj->queues[2*k] = msgdata[msgdataindex];             //[3+2*k];
+    MSG_INDEXINC_I();
 #ifdef DEBUG
 #ifndef CLOSE_PRINT
-               //BAMBOO_DEBUGPRINT_REG(transObj->queues[2*k]);
+    //BAMBOO_DEBUGPRINT_REG(transObj->queues[2*k]);
 #endif
 #endif
-               transObj->queues[2*k+1] = msgdata[msgdataindex]; //[3+2*k+1];
-               MSG_INDEXINC_I();
+    transObj->queues[2*k+1] = msgdata[msgdataindex];             //[3+2*k+1];
+    MSG_INDEXINC_I();
 #ifdef DEBUG
 #ifndef CLOSE_PRINT
-               //BAMBOO_DEBUGPRINT_REG(transObj->queues[2*k+1]);
+    //BAMBOO_DEBUGPRINT_REG(transObj->queues[2*k+1]);
 #endif
 #endif
-       }
-       // check if there is an existing duplicate item
-       {
-               struct QueueItem * qitem = getHead(&objqueue);
-               struct QueueItem * prev = NULL;
-               while(qitem != NULL) {
-                       struct transObjInfo * tmpinfo = 
-                               (struct transObjInfo *)(qitem->objectptr);
-                       if(tmpinfo->objptr == transObj->objptr) {
-                               // the same object, remove outdate one
-                               RUNFREE(tmpinfo->queues);
-                               RUNFREE(tmpinfo);
-                               removeItem(&objqueue, qitem);
-                               //break;
-                       } else {
-                               prev = qitem;
-                       }
-                       if(prev == NULL) {
-                               qitem = getHead(&objqueue);
-                       } else {
-                               qitem = getNextQueueItem(prev);
-                       }
-               }
-               addNewItem_I(&objqueue, (void *)transObj);
-       }
-       ++(self_numreceiveobjs);
+  }
+  // check if there is an existing duplicate item
+  {
+    struct QueueItem * qitem = getHead(&objqueue);
+    struct QueueItem * prev = NULL;
+    while(qitem != NULL) {
+      struct transObjInfo * tmpinfo =
+        (struct transObjInfo *)(qitem->objectptr);
+      if(tmpinfo->objptr == transObj->objptr) {
+       // the same object, remove outdate one
+       RUNFREE(tmpinfo->queues);
+       RUNFREE(tmpinfo);
+       removeItem(&objqueue, qitem);
+       //break;
+      } else {
+       prev = qitem;
+      }
+      if(prev == NULL) {
+       qitem = getHead(&objqueue);
+      } else {
+       qitem = getNextQueueItem(prev);
+      }
+    }
+    addNewItem_I(&objqueue, (void *)transObj);
+  }
+  ++(self_numreceiveobjs);
 }
 
 INLINE void processmsg_transtall_I() {
-       if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
-       // non startup core can not receive stall msg
+  if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
+    // non startup core can not receive stall msg
 #ifndef CLOSE_PRINT
-               BAMBOO_DEBUGPRINT_REG(msgdata[msgdataindex]/*[1]*/);
+    BAMBOO_DEBUGPRINT_REG(msgdata[msgdataindex] /*[1]*/);
 #endif
-               BAMBOO_EXIT(0xa003);
-       } 
-       int num_core = msgdata[msgdataindex]; //[1]
-       MSG_INDEXINC_I();
-       if(num_core < NUMCORESACTIVE) {
+    BAMBOO_EXIT(0xa003);
+  }
+  int num_core = msgdata[msgdataindex];       //[1]
+  MSG_INDEXINC_I();
+  if(num_core < NUMCORESACTIVE) {
 #ifdef DEBUG
 #ifndef CLOSE_PRINT
-               BAMBOO_DEBUGPRINT(0xe881);
+    BAMBOO_DEBUGPRINT(0xe881);
 #endif
 #endif
-               corestatus[num_core] = 0;
-               numsendobjs[num_core] = msgdata[msgdataindex]; //[2];
-               MSG_INDEXINC_I();
-               numreceiveobjs[num_core] = msgdata[msgdataindex]; //[3];
-               MSG_INDEXINC_I();
-       }
+    corestatus[num_core] = 0;
+    numsendobjs[num_core] = msgdata[msgdataindex];             //[2];
+    MSG_INDEXINC_I();
+    numreceiveobjs[num_core] = msgdata[msgdataindex];             //[3];
+    MSG_INDEXINC_I();
+  }
 }
 
 #ifndef MULTICORE_GC
 INLINE void processmsg_lockrequest_I() {
-       // check to see if there is a lock exist for the required obj
-       // msgdata[1] -> lock type
-       int locktype = msgdata[msgdataindex]; //[1];
-       MSG_INDEXINC_I();
-       int data2 = msgdata[msgdataindex]; // obj pointer
-       MSG_INDEXINC_I();
-       int data3 = msgdata[msgdataindex]; // lock
-       MSG_INDEXINC_I();
-       int data4 = msgdata[msgdataindex]; // request core
-       MSG_INDEXINC_I();
-       // -1: redirected, 0: approved, 1: denied
-       int deny = processlockrequest(locktype, data3, data2, data4, data4, true);  
-       if(deny == -1) {
-               // this lock request is redirected
-               return;
-       } else {
-               // send response msg
-               // for 32 bit machine, the size is always 4 words, cache the msg first
-               int tmp = deny==1?LOCKDENY:LOCKGROUNT;
-               //if(isMsgSending) {
-                       cache_msg_4(data4, tmp, locktype, data2, data3);
-               /*} else {
-                       send_msg_4(data4, tmp, locktype, data2, data3);
-               }*/
-       }
+  // check to see if there is a lock exist for the required obj
+  // msgdata[1] -> lock type
+  int locktype = msgdata[msgdataindex];       //[1];
+  MSG_INDEXINC_I();
+  int data2 = msgdata[msgdataindex];       // obj pointer
+  MSG_INDEXINC_I();
+  int data3 = msgdata[msgdataindex];       // lock
+  MSG_INDEXINC_I();
+  int data4 = msgdata[msgdataindex];       // request core
+  MSG_INDEXINC_I();
+  // -1: redirected, 0: approved, 1: denied
+  int deny = processlockrequest(locktype, data3, data2, data4, data4, true);
+  if(deny == -1) {
+    // this lock request is redirected
+    return;
+  } else {
+    // send response msg
+    // for 32 bit machine, the size is always 4 words, cache the msg first
+    int tmp = deny==1 ? LOCKDENY : LOCKGROUNT;
+    //if(isMsgSending) {
+    cache_msg_4(data4, tmp, locktype, data2, data3);
+    /*} else {
+            send_msg_4(data4, tmp, locktype, data2, data3);
+       }*/
+  }
 }
 
 INLINE void processmsg_lockgrount_I() {
-       MSG_INDEXINC_I();
-       if(BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1) {
+  MSG_INDEXINC_I();
+  if(BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1) {
 #ifndef CLOSE_PRINT
-               BAMBOO_DEBUGPRINT_REG(msgdata[msgdataindex]/*[2]*/);
+    BAMBOO_DEBUGPRINT_REG(msgdata[msgdataindex] /*[2]*/);
 #endif
-               BAMBOO_EXIT(0xa004);
-       } 
-       int data2 = msgdata[msgdataindex];
-       MSG_INDEXINC_I();
-       int data3 = msgdata[msgdataindex];
-       MSG_INDEXINC_I();
-       if((lockobj == data2) && (lock2require == data3)) {
+    BAMBOO_EXIT(0xa004);
+  }
+  int data2 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  int data3 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  if((lockobj == data2) && (lock2require == data3)) {
 #ifdef DEBUG
 #ifndef CLOSE_PRINT
-               BAMBOO_DEBUGPRINT(0xe882);
+    BAMBOO_DEBUGPRINT(0xe882);
 #endif
 #endif
-               lockresult = 1;
-               lockflag = true;
+    lockresult = 1;
+    lockflag = true;
 #ifndef INTERRUPT
-               reside = false;
+    reside = false;
 #endif
-       } else {
-               // conflicts on lockresults
+  } else {
+    // conflicts on lockresults
 #ifndef CLOSE_PRINT
-               BAMBOO_DEBUGPRINT_REG(data2);
+    BAMBOO_DEBUGPRINT_REG(data2);
 #endif
-               BAMBOO_EXIT(0xa005);
-       }
+    BAMBOO_EXIT(0xa005);
+  }
 }
 
 INLINE void processmsg_lockdeny_I() {
-       MSG_INDEXINC_I();
-       int data2 = msgdata[msgdataindex];
-       MSG_INDEXINC_I();
-       int data3 = msgdata[msgdataindex];
-       MSG_INDEXINC_I();
-       if(BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1) {
+  MSG_INDEXINC_I();
+  int data2 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  int data3 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  if(BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1) {
 #ifndef CLOSE_PRINT
-               BAMBOO_DEBUGPRINT_REG(data2);
+    BAMBOO_DEBUGPRINT_REG(data2);
 #endif
-               BAMBOO_EXIT(0xa006);
-       } 
-       if((lockobj == data2) && (lock2require == data3)) {
+    BAMBOO_EXIT(0xa006);
+  }
+  if((lockobj == data2) && (lock2require == data3)) {
 #ifdef DEBUG
 #ifndef CLOSE_PRINT
-               BAMBOO_DEBUGPRINT(0xe883);
+    BAMBOO_DEBUGPRINT(0xe883);
 #endif
 #endif
-               lockresult = 0;
-               lockflag = true;
+    lockresult = 0;
+    lockflag = true;
 #ifndef INTERRUPT
-               reside = false;
+    reside = false;
 #endif
-               } else {
-               // conflicts on lockresults
+  } else {
+    // conflicts on lockresults
 #ifndef CLOSE_PRINT
-               BAMBOO_DEBUGPRINT_REG(data2);
+    BAMBOO_DEBUGPRINT_REG(data2);
 #endif
-               BAMBOO_EXIT(0xa007);
-       }
+    BAMBOO_EXIT(0xa007);
+  }
 }
 
 INLINE void processmsg_lockrelease_I() {
-       int data1 = msgdata[msgdataindex];
-       MSG_INDEXINC_I();
-       int data2 = msgdata[msgdataindex];
-       MSG_INDEXINC_I();
-       // receive lock release msg
-       processlockrelease(data1, data2, 0, false);
+  int data1 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  int data2 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  // receive lock release msg
+  processlockrelease(data1, data2, 0, false);
 }
 
 INLINE void processmsg_redirectlock_I() {
-       // check to see if there is a lock exist for the required obj
-       int data1 = msgdata[msgdataindex];
-       MSG_INDEXINC_I(); //msgdata[1]; // lock type
-       int data2 = msgdata[msgdataindex];
-       MSG_INDEXINC_I();//msgdata[2]; // obj pointer
-       int data3 = msgdata[msgdataindex];
-       MSG_INDEXINC_I(); //msgdata[3]; // redirect lock
-       int data4 = msgdata[msgdataindex];
-       MSG_INDEXINC_I(); //msgdata[4]; // root request core
-       int data5 = msgdata[msgdataindex];
-       MSG_INDEXINC_I(); //msgdata[5]; // request core
-       int deny = processlockrequest(data1, data3, data2, data5, data4, true);
-       if(deny == -1) {
-               // this lock request is redirected
-               return;
-       } else {
-               // send response msg
-               // for 32 bit machine, the size is always 4 words, cache the msg first
-               //if(isMsgSending) {
-                       cache_msg_4(data4, deny==1?REDIRECTDENY:REDIRECTGROUNT, 
-                                                                       data1, data2, data3);
-               /*} else {
-                       send_msg_4(data4, deny==1?REDIRECTDENY:REDIRECTGROUNT, 
-                                                                data1, data2, data3);
-               }*/
-       }
+  // check to see if there is a lock exist for the required obj
+  int data1 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();       //msgdata[1]; // lock type
+  int data2 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();      //msgdata[2]; // obj pointer
+  int data3 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();       //msgdata[3]; // redirect lock
+  int data4 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();       //msgdata[4]; // root request core
+  int data5 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();       //msgdata[5]; // request core
+  int deny = processlockrequest(data1, data3, data2, data5, data4, true);
+  if(deny == -1) {
+    // this lock request is redirected
+    return;
+  } else {
+    // send response msg
+    // for 32 bit machine, the size is always 4 words, cache the msg first
+    //if(isMsgSending) {
+    cache_msg_4(data4, deny==1 ? REDIRECTDENY : REDIRECTGROUNT,
+                data1, data2, data3);
+    /*} else {
+            send_msg_4(data4, deny==1?REDIRECTDENY:REDIRECTGROUNT,
+                                                     data1, data2, data3);
+       }*/
+  }
 }
 
 INLINE void processmsg_redirectgrount_I() {
-       MSG_INDEXINC_I();
-       int data2 = msgdata[msgdataindex];
-       MSG_INDEXINC_I();
-       if(BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1) {
+  MSG_INDEXINC_I();
+  int data2 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  if(BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1) {
 #ifndef CLOSE_PRINT
-               BAMBOO_DEBUGPRINT_REG(data2);
+    BAMBOO_DEBUGPRINT_REG(data2);
 #endif
-               BAMBOO_EXIT(0xa00a);
-       }
-       if(lockobj == data2) {
+    BAMBOO_EXIT(0xa00a);
+  }
+  if(lockobj == data2) {
 #ifdef DEBUG
 #ifndef CLOSE_PRINT
-               BAMBOO_DEBUGPRINT(0xe891);
+    BAMBOO_DEBUGPRINT(0xe891);
 #endif
 #endif
-               int data3 = msgdata[msgdataindex];
-               MSG_INDEXINC_I();
-               lockresult = 1;
-               lockflag = true;
-               RuntimeHashadd_I(objRedirectLockTbl, lockobj, data3);
+    int data3 = msgdata[msgdataindex];
+    MSG_INDEXINC_I();
+    lockresult = 1;
+    lockflag = true;
+    RuntimeHashadd_I(objRedirectLockTbl, lockobj, data3);
 #ifndef INTERRUPT
-               reside = false;
+    reside = false;
 #endif
-       } else {
-               // conflicts on lockresults
+  } else {
+    // conflicts on lockresults
 #ifndef CLOSE_PRINT
-               BAMBOO_DEBUGPRINT_REG(data2);
+    BAMBOO_DEBUGPRINT_REG(data2);
 #endif
-               BAMBOO_EXIT(0xa00b);
-       }
+    BAMBOO_EXIT(0xa00b);
+  }
 }
 
 INLINE void processmsg_redirectdeny_I() {
-       MSG_INDEXINC_I();
-       int data2 = msgdata[msgdataindex];
-       MSG_INDEXINC_I();
-       if(BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1) {
+  MSG_INDEXINC_I();
+  int data2 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  if(BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1) {
 #ifndef CLOSE_PRINT
-               BAMBOO_DEBUGPRINT_REG(data2);
+    BAMBOO_DEBUGPRINT_REG(data2);
 #endif
-               BAMBOO_EXIT(0xa00c);
-       }
-       if(lockobj == data2) {
+    BAMBOO_EXIT(0xa00c);
+  }
+  if(lockobj == data2) {
 #ifdef DEBUG
 #ifndef CLOSE_PRINT
-               BAMBOO_DEBUGPRINT(0xe892);
+    BAMBOO_DEBUGPRINT(0xe892);
 #endif
 #endif
-               lockresult = 0;
-               lockflag = true;
+    lockresult = 0;
+    lockflag = true;
 #ifndef INTERRUPT
-               reside = false;
+    reside = false;
 #endif
-       } else {
-               // conflicts on lockresults
+  } else {
+    // conflicts on lockresults
 #ifndef CLOSE_PRINT
-               BAMBOO_DEBUGPRINT_REG(data2);
+    BAMBOO_DEBUGPRINT_REG(data2);
 #endif
-               BAMBOO_EXIT(0xa00d);
-       }
+    BAMBOO_EXIT(0xa00d);
+  }
 }
 
 INLINE void processmsg_redirectrelease_I() {
-       int data1 = msgdata[msgdataindex];
-       MSG_INDEXINC_I();
-       int data2 = msgdata[msgdataindex];
-       MSG_INDEXINC_I();
-       int data3 = msgdata[msgdataindex];
-       MSG_INDEXINC_I();
-       processlockrelease(data1, data2, data3, true);
+  int data1 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  int data2 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  int data3 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  processlockrelease(data1, data2, data3, true);
 }
 #endif // #ifndef MULTICORE_GC
 
 #ifdef PROFILE
 INLINE void processmsg_profileoutput_I() {
-       if(BAMBOO_NUM_OF_CORE == STARTUPCORE) {
-               // startup core can not receive profile output finish msg
-               BAMBOO_EXIT(0xa008);
-       }
+  if(BAMBOO_NUM_OF_CORE == STARTUPCORE) {
+    // startup core can not receive profile output finish msg
+    BAMBOO_EXIT(0xa008);
+  }
 #ifdef DEBUG
 #ifndef CLOSE_PRINT
-       BAMBOO_DEBUGPRINT(0xe885);
-#endif
-#endif
-       stall = true;
-       totalexetime = msgdata[msgdataindex]; //[1]
-       MSG_INDEXINC_I();
-       outputProfileData();
-       // cache the msg first
-       //if(isMsgSending) {
-               cache_msg_2(STARTUPCORE, PROFILEFINISH, BAMBOO_NUM_OF_CORE);
-       /*} else {
-               send_msg_2(STARTUPCORE, PROFILEFINISH, BAMBOO_NUM_OF_CORE);
-       }*/
+  BAMBOO_DEBUGPRINT(0xe885);
+#endif
+#endif
+  stall = true;
+  totalexetime = msgdata[msgdataindex];       //[1]
+  MSG_INDEXINC_I();
+  outputProfileData();
+  // cache the msg first
+  //if(isMsgSending) {
+  cache_msg_2(STARTUPCORE, PROFILEFINISH, BAMBOO_NUM_OF_CORE);
+  /*} else {
+          send_msg_2(STARTUPCORE, PROFILEFINISH, BAMBOO_NUM_OF_CORE);
+     }*/
 }
 
 INLINE void processmsg_profilefinish_I() {
-       if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
-               // non startup core can not receive profile output finish msg
+  if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
+    // non startup core can not receive profile output finish msg
 #ifndef CLOSE_PRINT
-               BAMBOO_DEBUGPRINT_REG(msgdata[msgdataindex/*1*/]);
+    BAMBOO_DEBUGPRINT_REG(msgdata[msgdataindex /*1*/]);
 #endif
-               BAMBOO_EXIT(0xa009);
-       }
+    BAMBOO_EXIT(0xa009);
+  }
 #ifdef DEBUG
 #ifndef CLOSE_PRINT
-       BAMBOO_DEBUGPRINT(0xe886);
+  BAMBOO_DEBUGPRINT(0xe886);
 #endif
 #endif
-       int data1 = msgdata[msgdataindex];
-       MSG_INDEXINC_I();
-       profilestatus[data1] = 0;
+  int data1 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  profilestatus[data1] = 0;
 }
 #endif // #ifdef PROFILE
 
 INLINE void processmsg_statusconfirm_I() {
-       if((BAMBOO_NUM_OF_CORE == STARTUPCORE) 
-                       || (BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1)) {
-               // wrong core to receive such msg
-               BAMBOO_EXIT(0xa00e);
-       } else {
-               // send response msg
+  if((BAMBOO_NUM_OF_CORE == STARTUPCORE)
+     || (BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1)) {
+    // wrong core to receive such msg
+    BAMBOO_EXIT(0xa00e);
+  } else {
+    // send response msg
 #ifdef DEBUG
 #ifndef CLOSE_PRINT
-               BAMBOO_DEBUGPRINT(0xe887);
-#endif
-#endif
-               // cache the msg first
-               //if(isMsgSending) {
-                       cache_msg_5(STARTUPCORE, STATUSREPORT, 
-                                               busystatus?1:0, BAMBOO_NUM_OF_CORE,
-                                               self_numsendobjs, self_numreceiveobjs);
-               /*} else {
-                       send_msg_5(STARTUPCORE, STATUSREPORT, busystatus?1:0, 
-                                                                BAMBOO_NUM_OF_CORE, self_numsendobjs, 
-                                                                self_numreceiveobjs);
-               }*/
-       }
+    BAMBOO_DEBUGPRINT(0xe887);
+#endif
+#endif
+    // cache the msg first
+    //if(isMsgSending) {
+    cache_msg_5(STARTUPCORE, STATUSREPORT,
+                busystatus ? 1 : 0, BAMBOO_NUM_OF_CORE,
+                self_numsendobjs, self_numreceiveobjs);
+    /*} else {
+            send_msg_5(STARTUPCORE, STATUSREPORT, busystatus?1:0,
+                                                     BAMBOO_NUM_OF_CORE, self_numsendobjs,
+                                                     self_numreceiveobjs);
+       }*/
+  }
 }
 
 INLINE void processmsg_statusreport_I() {
-       int data1 = msgdata[msgdataindex];
-       MSG_INDEXINC_I();
-       int data2 = msgdata[msgdataindex];
-       MSG_INDEXINC_I();
-       int data3 = msgdata[msgdataindex];
-       MSG_INDEXINC_I();
-       int data4 = msgdata[msgdataindex];
-       MSG_INDEXINC_I();
-       // receive a status confirm info
-       if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
-               // wrong core to receive such msg
+  int data1 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  int data2 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  int data3 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  int data4 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  // receive a status confirm info
+  if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
+    // wrong core to receive such msg
 #ifndef CLOSE_PRINT
-               BAMBOO_DEBUGPRINT_REG(data2);
+    BAMBOO_DEBUGPRINT_REG(data2);
 #endif
-               BAMBOO_EXIT(0xa00f);
-       } else {
+    BAMBOO_EXIT(0xa00f);
+  } else {
 #ifdef DEBUG
 #ifndef CLOSE_PRINT
-               BAMBOO_DEBUGPRINT(0xe888);
+    BAMBOO_DEBUGPRINT(0xe888);
 #endif
 #endif
-               if(waitconfirm) {
-                       numconfirm--;
-               }
-               corestatus[data2] = data1;
-               numsendobjs[data2] = data3;
-               numreceiveobjs[data2] = data4;
-       }
+    if(waitconfirm) {
+      numconfirm--;
+    }
+    corestatus[data2] = data1;
+    numsendobjs[data2] = data3;
+    numreceiveobjs[data2] = data4;
+  }
 }
 
 INLINE void processmsg_terminate_I() {
 #ifdef DEBUG
 #ifndef CLOSE_PRINT
-       BAMBOO_DEBUGPRINT(0xe889);
+  BAMBOO_DEBUGPRINT(0xe889);
 #endif
 #endif
-       disruntimedata();
-       BAMBOO_EXIT_APP(0);
+  disruntimedata();
+  BAMBOO_EXIT_APP(0);
 }
 
 INLINE void processmsg_memrequest_I() {
-       int data1 = msgdata[msgdataindex];
-       MSG_INDEXINC_I();
-       int data2 = msgdata[msgdataindex];
-       MSG_INDEXINC_I();
-       // receive a shared memory request msg
-       if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
-               // wrong core to receive such msg
+#ifdef PROFILE
+  if(!interruptInfoOverflow) {
+    InterruptInfo* intInfo = RUNMALLOC_I(sizeof(struct interrupt_info));
+    interruptInfoArray[interruptInfoIndex] = intInfo;
+    intInfo->startTime = BAMBOO_GET_EXE_TIME();
+    intInfo->endTime = -1;
+  }
+#endif
+  int data1 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  int data2 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  // receive a shared memory request msg
+  if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
+    // wrong core to receive such msg
 #ifndef CLOSE_PRINT
-               BAMBOO_DEBUGPRINT_REG(data2);
+    BAMBOO_DEBUGPRINT_REG(data2);
 #endif
-               BAMBOO_EXIT(0xa010);
-       } else {
+    BAMBOO_EXIT(0xa010);
+  } else {
 #ifdef DEBUG
 #ifndef CLOSE_PRINT
-               BAMBOO_DEBUGPRINT(0xe88a);
+    BAMBOO_DEBUGPRINT(0xe88a);
 #endif
 #endif
-               int allocsize = 0;
-               void * mem = NULL;
+    int allocsize = 0;
+    void * mem = NULL;
 #ifdef MULTICORE_GC
-               if(gcprocessing) {
-                       // is currently doing gc, dump this msg
-                       if(INITPHASE == gcphase) {
-                               // if still in the initphase of gc, send a startinit msg again, 
-                               // cache the msg first
-                               //if(isMsgSending) {
-                                       cache_msg_1(data2, GCSTARTINIT);
-                               /*} else {
-                                       send_msg_1(data2, GCSTARTINIT);
-                               }*/
-                       }
-               } else { 
-#endif
-               mem = smemalloc_I(data2, data1, &allocsize);
-               if(mem != NULL) {
-                       // send the start_va to request core, cache the msg first
-                       //if(isMsgSending) {
-                               cache_msg_3(data2, MEMRESPONSE, mem, allocsize);
-                       /*} else {
-                               send_msg_3(data2, MEMRESPONSE, mem, allocsize);
-                       }*/ 
-               } // if mem == NULL, the gcflag of the startup core has been set
-                       // and the gc should be started later, then a GCSTARTINIT msg
-                       // will be sent to the requesting core to notice it to start gc
-                       // and try malloc again
+    if(gcprocessing) {
+      // is currently doing gc, dump this msg
+      if(INITPHASE == gcphase) {
+       // if still in the initphase of gc, send a startinit msg again,
+       // cache the msg first
+       //if(isMsgSending) {
+       cache_msg_1(data2, GCSTARTINIT);
+       /*} else {
+               send_msg_1(data2, GCSTARTINIT);
+          }*/
+      }
+    } else {
+#endif
+    mem = smemalloc_I(data2, data1, &allocsize);
+    if(mem != NULL) {
+      // send the start_va to request core, cache the msg first
+      //if(isMsgSending) {
+      cache_msg_3(data2, MEMRESPONSE, mem, allocsize);
+      /*} else {
+              send_msg_3(data2, MEMRESPONSE, mem, allocsize);
+         }*/
+    } // if mem == NULL, the gcflag of the startup core has been set
+    // and the gc should be started later, then a GCSTARTINIT msg
+    // will be sent to the requesting core to notice it to start gc
+    // and try malloc again
 #ifdef MULTICORE_GC
-               }
+  }
+#endif
+  }
+#ifdef PROFILE
+  if(!interruptInfoOverflow) {
+    interruptInfoArray[interruptInfoIndex]->endTime=BAMBOO_GET_EXE_TIME();
+    interruptInfoIndex++;
+    if(interruptInfoIndex == INTERRUPTINFOLENGTH) {
+      interruptInfoOverflow = true;
+    }
+  }
 #endif
-       }
 }
 
 INLINE void processmsg_memresponse_I() {
-       int data1 = msgdata[msgdataindex];
-       MSG_INDEXINC_I();
-       int data2 = msgdata[msgdataindex];
-       MSG_INDEXINC_I();
-       // receive a shared memory response msg
+  int data1 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  int data2 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  // receive a shared memory response msg
 #ifdef DEBUG
 #ifndef CLOSE_PRINT
-       BAMBOO_DEBUGPRINT(0xe88b);
+  BAMBOO_DEBUGPRINT(0xe88b);
 #endif
 #endif
 #ifdef MULTICORE_GC
-       // if is currently doing gc, dump this msg
-       if(!gcprocessing) {
+  // if is currently doing gc, dump this msg
+  if(!gcprocessing) {
 #endif
-       if(data2 == 0) {
-               bamboo_smem_size = 0;
-               bamboo_cur_msp = 0;
-       } else {
+  if(data2 == 0) {
+    bamboo_smem_size = 0;
+    bamboo_cur_msp = 0;
+  } else {
 #ifdef MULTICORE_GC
-               // fill header to store the size of this mem block
-               memset(data1, 0, BAMBOO_CACHE_LINE_SIZE);
-               (*((int*)data1)) = data2;
-               bamboo_smem_size = data2 - BAMBOO_CACHE_LINE_SIZE;
-               bamboo_cur_msp = data1 + BAMBOO_CACHE_LINE_SIZE;
+    // fill header to store the size of this mem block
+    memset(data1, 0, BAMBOO_CACHE_LINE_SIZE);
+    (*((int*)data1)) = data2;
+    bamboo_smem_size = data2 - BAMBOO_CACHE_LINE_SIZE;
+    bamboo_cur_msp = data1 + BAMBOO_CACHE_LINE_SIZE;
 #else
-               bamboo_smem_size = data2;
-               bamboo_cur_msp =(void*)(data1);
+    bamboo_smem_size = data2;
+    bamboo_cur_msp =(void*)(data1);
 #endif
-       }
-       smemflag = true;
+  }
+  smemflag = true;
 #ifdef MULTICORE_GC
-       }
+}
 #endif
 }
 
 #ifdef MULTICORE_GC
 INLINE void processmsg_gcstartinit_I() {
-       gcflag = true;
-       gcphase = INITPHASE;
-       if(!smemflag) {
-               // is waiting for response of mem request
-               // let it return NULL and start gc
-               bamboo_smem_size = 0;
-               bamboo_cur_msp = NULL;
-               smemflag = true;
-       }
+  gcflag = true;
+  gcphase = INITPHASE;
+  if(!smemflag) {
+    // is waiting for response of mem request
+    // let it return NULL and start gc
+    bamboo_smem_size = 0;
+    bamboo_cur_msp = NULL;
+    smemflag = true;
+  }
 }
 
 INLINE void processmsg_gcstart_I() {
 #ifdef DEBUG
 #ifndef CLOSE_PRINT
-       BAMBOO_DEBUGPRINT(0xe88c);
+  BAMBOO_DEBUGPRINT(0xe88c);
 #endif
 #endif
-       // set the GC flag
-       gcphase = MARKPHASE;
+  // set the GC flag
+  gcphase = MARKPHASE;
 }
 
 INLINE void processmsg_gcstartcompact_I() {
-       gcblock2fill = msgdata[msgdataindex];
-       MSG_INDEXINC_I(); //msgdata[1];
-       gcphase = COMPACTPHASE;
+  gcblock2fill = msgdata[msgdataindex];
+  MSG_INDEXINC_I();       //msgdata[1];
+  gcphase = COMPACTPHASE;
 }
 
 INLINE void processmsg_gcstartflush_I() {
-       gcphase = FLUSHPHASE;
+  gcphase = FLUSHPHASE;
 }
 
 INLINE void processmsg_gcfinishinit_I() {
-       int data1 = msgdata[msgdataindex];
-       MSG_INDEXINC_I();
-       // received a init phase finish msg
-       if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
-               // non startup core can not receive this msg
+  int data1 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  // received a init phase finish msg
+  if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
+    // non startup core can not receive this msg
 #ifndef CLOSE_PRINT
-               BAMBOO_DEBUGPRINT_REG(data1);
+    BAMBOO_DEBUGPRINT_REG(data1);
 #endif
-               BAMBOO_EXIT(0xb001);
-       }
+    BAMBOO_EXIT(0xb001);
+  }
 #ifdef DEBUG
-       BAMBOO_DEBUGPRINT(0xe88c);
-       BAMBOO_DEBUGPRINT_REG(data1);
+  BAMBOO_DEBUGPRINT(0xe88c);
+  BAMBOO_DEBUGPRINT_REG(data1);
 #endif
-       // All cores should do init GC
-       if(data1 < NUMCORESACTIVE) {
-               gccorestatus[data1] = 0;
-       }
+  // All cores should do init GC
+  if(data1 < NUMCORESACTIVE) {
+    gccorestatus[data1] = 0;
+  }
 }
 
 INLINE void processmsg_gcfinishmark_I() {
-       int data1 = msgdata[msgdataindex];
-       MSG_INDEXINC_I();
-       int data2 = msgdata[msgdataindex];
-       MSG_INDEXINC_I();
-       int data3 = msgdata[msgdataindex];
-       MSG_INDEXINC_I();
-       // received a mark phase finish msg
-       if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
-               // non startup core can not receive this msg
+  int data1 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  int data2 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  int data3 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  // received a mark phase finish msg
+  if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
+    // non startup core can not receive this msg
 #ifndef CLOSE_PRINT
-               BAMBOO_DEBUGPRINT_REG(data1);
+    BAMBOO_DEBUGPRINT_REG(data1);
 #endif
-               BAMBOO_EXIT(0xb002);
-       }
-       // all cores should do mark
-       if(data1 < NUMCORESACTIVE) {
-               gccorestatus[data1] = 0;
-               gcnumsendobjs[data1] = data2;
-               gcnumreceiveobjs[data1] = data3;
-       }
+    BAMBOO_EXIT(0xb002);
+  }
+  // all cores should do mark
+  if(data1 < NUMCORESACTIVE) {
+    gccorestatus[data1] = 0;
+    gcnumsendobjs[data1] = data2;
+    gcnumreceiveobjs[data1] = data3;
+  }
 }
 
 INLINE void processmsg_gcfinishcompact_I() {
-       if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
-               // non startup core can not receive this msg
-               // return -1
+  if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
+    // non startup core can not receive this msg
+    // return -1
 #ifndef CLOSE_PRINT
-               BAMBOO_DEBUGPRINT_REG(msgdata[msgdataindex]/*[1]*/);
+    BAMBOO_DEBUGPRINT_REG(msgdata[msgdataindex] /*[1]*/);
 #endif
-               BAMBOO_EXIT(0xb003);
-       }
-       int cnum = msgdata[msgdataindex];
-       MSG_INDEXINC_I(); //msgdata[1];
-       int filledblocks = msgdata[msgdataindex];
-       MSG_INDEXINC_I(); //msgdata[2];
-       int heaptop = msgdata[msgdataindex];
-       MSG_INDEXINC_I(); //msgdata[3];
-       int data4 = msgdata[msgdataindex];
-       MSG_INDEXINC_I(); //msgdata[4];
-       // only gc cores need to do compact
-       if(cnum < NUMCORES4GC) {
-               if(COMPACTPHASE == gcphase) {
-                       gcfilledblocks[cnum] = filledblocks;
-                       gcloads[cnum] = heaptop;
-               }
-               if(data4 > 0) {
-                       // ask for more mem
-                       int startaddr = 0;
-                       int tomove = 0;
-                       int dstcore = 0;
-                       if(gcfindSpareMem_I(&startaddr, &tomove, &dstcore, data4, cnum)) {
-                               // cache the msg first
-                               //if(isMsgSending) {
-                                       cache_msg_4(cnum, GCMOVESTART, dstcore, startaddr, tomove);
-                         /*} else {
-                                       send_msg_4(cnum, GCMOVESTART, dstcore, startaddr, tomove);
-                               }*/
-                       }
-               } else {
-                       gccorestatus[cnum] = 0;
-               } // if(data4>0)
-       } // if(cnum < NUMCORES4GC)
+    BAMBOO_EXIT(0xb003);
+  }
+  int cnum = msgdata[msgdataindex];
+  MSG_INDEXINC_I();       //msgdata[1];
+  int filledblocks = msgdata[msgdataindex];
+  MSG_INDEXINC_I();       //msgdata[2];
+  int heaptop = msgdata[msgdataindex];
+  MSG_INDEXINC_I();       //msgdata[3];
+  int data4 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();       //msgdata[4];
+  // only gc cores need to do compact
+  if(cnum < NUMCORES4GC) {
+    if(COMPACTPHASE == gcphase) {
+      gcfilledblocks[cnum] = filledblocks;
+      gcloads[cnum] = heaptop;
+    }
+    if(data4 > 0) {
+      // ask for more mem
+      int startaddr = 0;
+      int tomove = 0;
+      int dstcore = 0;
+      if(gcfindSpareMem_I(&startaddr, &tomove, &dstcore, data4, cnum)) {
+       // cache the msg first
+       //if(isMsgSending) {
+       cache_msg_4(cnum, GCMOVESTART, dstcore, startaddr, tomove);
+       /*} else {
+                     send_msg_4(cnum, GCMOVESTART, dstcore, startaddr, tomove);
+             }*/
+      }
+    } else {
+      gccorestatus[cnum] = 0;
+    }             // if(data4>0)
+  }       // if(cnum < NUMCORES4GC)
 }
 
 INLINE void processmsg_gcfinishflush_I() {
-       int data1 = msgdata[msgdataindex];
-       MSG_INDEXINC_I();
-       // received a flush phase finish msg
-       if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
-               // non startup core can not receive this msg
-               // return -1
+  int data1 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  // received a flush phase finish msg
+  if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
+    // non startup core can not receive this msg
+    // return -1
 #ifndef CLOSE_PRINT
-               BAMBOO_DEBUGPRINT_REG(data1);
+    BAMBOO_DEBUGPRINT_REG(data1);
 #endif
-               BAMBOO_EXIT(0xb004);
-       } 
-       // all cores should do flush
-       if(data1 < NUMCORESACTIVE) {
-               gccorestatus[data1] = 0;
-       }
+    BAMBOO_EXIT(0xb004);
+  }
+  // all cores should do flush
+  if(data1 < NUMCORESACTIVE) {
+    gccorestatus[data1] = 0;
+  }
 }
 
 INLINE void processmsg_gcmarkconfirm_I() {
-       if((BAMBOO_NUM_OF_CORE == STARTUPCORE) 
-                       || (BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1)) {
-               // wrong core to receive such msg
-               BAMBOO_EXIT(0xb005);
-       } else {
-               // send response msg, cahce the msg first
-               //if(isMsgSending) {
-                       cache_msg_5(STARTUPCORE, GCMARKREPORT, BAMBOO_NUM_OF_CORE, 
-                                                                       gcbusystatus, gcself_numsendobjs, 
-                                                                       gcself_numreceiveobjs);
-               /*} else {
-                       send_msg_5(STARTUPCORE, GCMARKREPORT, BAMBOO_NUM_OF_CORE, 
-                                                                gcbusystatus, gcself_numsendobjs, 
-                                                                gcself_numreceiveobjs);
-               }*/
-       }
+  if((BAMBOO_NUM_OF_CORE == STARTUPCORE)
+     || (BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1)) {
+    // wrong core to receive such msg
+    BAMBOO_EXIT(0xb005);
+  } else {
+    // send response msg, cahce the msg first
+    //if(isMsgSending) {
+    cache_msg_5(STARTUPCORE, GCMARKREPORT, BAMBOO_NUM_OF_CORE,
+                gcbusystatus, gcself_numsendobjs,
+                gcself_numreceiveobjs);
+    /*} else {
+            send_msg_5(STARTUPCORE, GCMARKREPORT, BAMBOO_NUM_OF_CORE,
+                                                     gcbusystatus, gcself_numsendobjs,
+                                                     gcself_numreceiveobjs);
+       }*/
+  }
 }
 
 INLINE void processmsg_gcmarkreport_I() {
-       int data1 = msgdata[msgdataindex];
-       MSG_INDEXINC_I();
-       int data2 = msgdata[msgdataindex];
-       MSG_INDEXINC_I();
-       int data3 = msgdata[msgdataindex];
-       MSG_INDEXINC_I();
-       int data4 = msgdata[msgdataindex];
-       MSG_INDEXINC_I();
-       // received a marked phase finish confirm response msg
-       if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
-               // wrong core to receive such msg
+  int data1 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  int data2 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  int data3 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  int data4 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  // received a marked phase finish confirm response msg
+  if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
+    // wrong core to receive such msg
 #ifndef CLOSE_PRINT
-               BAMBOO_DEBUGPRINT_REG(data2);
+    BAMBOO_DEBUGPRINT_REG(data2);
 #endif
-               BAMBOO_EXIT(0xb006);
-       } else {
-               if(waitconfirm) {
-                       numconfirm--;
-               }
-               gccorestatus[data1] = data2;
-               gcnumsendobjs[data1] = data3;
-               gcnumreceiveobjs[data1] = data4;
-       }
+    BAMBOO_EXIT(0xb006);
+  } else {
+    if(waitconfirm) {
+      numconfirm--;
+    }
+    gccorestatus[data1] = data2;
+    gcnumsendobjs[data1] = data3;
+    gcnumreceiveobjs[data1] = data4;
+  }
 }
 
 INLINE void processmsg_gcmarkedobj_I() {
-       int data1 = msgdata[msgdataindex];
-       MSG_INDEXINC_I();
-       // received a markedObj msg
-       if(((int *)data1)[6] == INIT) {
-                       // this is the first time that this object is discovered,
-                       // set the flag as DISCOVERED
-                       ((int *)data1)[6] = DISCOVERED;
-                       gc_enqueue_I(data1);
-       }
-       gcself_numreceiveobjs++;
-       gcbusystatus = true;
+  int data1 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  // received a markedObj msg
+  if(((int *)data1)[6] == INIT) {
+    // this is the first time that this object is discovered,
+    // set the flag as DISCOVERED
+    ((int *)data1)[6] = DISCOVERED;
+    gc_enqueue_I(data1);
+  }
+  gcself_numreceiveobjs++;
+  gcbusystatus = true;
 }
 
 INLINE void processmsg_gcmovestart_I() {
-       gctomove = true;
-       gcdstcore = msgdata[msgdataindex];
-       MSG_INDEXINC_I(); //msgdata[1];
-       gcmovestartaddr = msgdata[msgdataindex];
-       MSG_INDEXINC_I(); //msgdata[2];
-       gcblock2fill = msgdata[msgdataindex];
-       MSG_INDEXINC_I(); //msgdata[3];
+  gctomove = true;
+  gcdstcore = msgdata[msgdataindex];
+  MSG_INDEXINC_I();       //msgdata[1];
+  gcmovestartaddr = msgdata[msgdataindex];
+  MSG_INDEXINC_I();       //msgdata[2];
+  gcblock2fill = msgdata[msgdataindex];
+  MSG_INDEXINC_I();       //msgdata[3];
 }
 
 INLINE void processmsg_gcmaprequest_I() {
 #ifdef GC_PROFILE
-       //unsigned long long ttime = BAMBOO_GET_EXE_TIME();
+  //unsigned long long ttime = BAMBOO_GET_EXE_TIME();
 #endif
-       void * dstptr = NULL;
-       int data1 = msgdata[msgdataindex];
-       MSG_INDEXINC_I();
-       //dstptr = mgchashSearch(msgdata[1]);
+  void * dstptr = NULL;
+  int data1 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  //dstptr = mgchashSearch(msgdata[1]);
 #ifdef GC_PROFILE
-       unsigned long long ttime = BAMBOO_GET_EXE_TIME();
+  unsigned long long ttime = BAMBOO_GET_EXE_TIME();
 #endif
-       RuntimeHashget(gcpointertbl, data1, &dstptr);
+  RuntimeHashget(gcpointertbl, data1, &dstptr);
 #ifdef GC_PROFILE
-       flushstalltime += BAMBOO_GET_EXE_TIME() - ttime;
+  flushstalltime += BAMBOO_GET_EXE_TIME() - ttime;
 #endif
-       int data2 = msgdata[msgdataindex];
-       MSG_INDEXINC_I();
-       //MGCHashget(gcpointertbl, msgdata[1], &dstptr);
+  int data2 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  //MGCHashget(gcpointertbl, msgdata[1], &dstptr);
 #ifdef GC_PROFILE
-       unsigned long long ttimei = BAMBOO_GET_EXE_TIME();
+  unsigned long long ttimei = BAMBOO_GET_EXE_TIME();
 #endif
-       if(NULL == dstptr) {
-               // no such pointer in this core, something is wrong
+  if(NULL == dstptr) {
+    // no such pointer in this core, something is wrong
 #ifdef DEBUG
-               BAMBOO_DEBUGPRINT_REG(data1);
-               BAMBOO_DEBUGPRINT_REG(data2);
+    BAMBOO_DEBUGPRINT_REG(data1);
+    BAMBOO_DEBUGPRINT_REG(data2);
 #endif
-               BAMBOO_EXIT(0xb007);
-               //assume that the object was not moved, use the original address
-               /*if(isMsgSending) {
-                       cache_msg_3(msgdata[2], GCMAPINFO, msgdata[1], msgdata[1]);
-               } else {
-                       send_msg_3(msgdata[2], GCMAPINFO, msgdata[1], msgdata[1]);
-               }*/
-       } else {
-               // send back the mapping info, cache the msg first
-               //if(isMsgSending) {
-                       cache_msg_3(data2, GCMAPINFO, data1, (int)dstptr);
-               /*} else {
-                       send_msg_3(data2, GCMAPINFO, data1, (int)dstptr);
-               }*/
-       }
+    BAMBOO_EXIT(0xb007);
+    //assume that the object was not moved, use the original address
+    /*if(isMsgSending) {
+            cache_msg_3(msgdata[2], GCMAPINFO, msgdata[1], msgdata[1]);
+       } else {
+            send_msg_3(msgdata[2], GCMAPINFO, msgdata[1], msgdata[1]);
+       }*/
+  } else {
+    // send back the mapping info, cache the msg first
+    //if(isMsgSending) {
+    cache_msg_3(data2, GCMAPINFO, data1, (int)dstptr);
+    /*} else {
+            send_msg_3(data2, GCMAPINFO, data1, (int)dstptr);
+       }*/
+  }
 #ifdef GC_PROFILE
-       flushstalltime_i += BAMBOO_GET_EXE_TIME()-ttimei;
-       //num_mapinforequest_i++;
+  flushstalltime_i += BAMBOO_GET_EXE_TIME()-ttimei;
+  //num_mapinforequest_i++;
 #endif
 }
 
 INLINE void processmsg_gcmapinfo_I() {
 #ifdef GC_PROFILE
-       //unsigned long long ttime = BAMBOO_GET_EXE_TIME();
-#endif
-       int data1 = msgdata[msgdataindex];
-       MSG_INDEXINC_I();
-       if(data1 != gcobj2map) {
-                       // obj not matched, something is wrong
-#ifdef DEBUG
-                       BAMBOO_DEBUGPRINT_REG(gcobj2map);
-                       BAMBOO_DEBUGPRINT_REG(msgdata[1]);
-#endif
-                       BAMBOO_EXIT(0xb008);
-               } else {
-                       gcmappedobj = msgdata[msgdataindex]; // [2]
-      MSG_INDEXINC_I();
-                       //mgchashReplace_I(msgdata[1], msgdata[2]);
-                       //mgchashInsert_I(gcobj2map, gcmappedobj);
-                       RuntimeHashadd_I(gcpointertbl, gcobj2map, gcmappedobj);
-                       //MGCHashadd_I(gcpointertbl, gcobj2map, gcmappedobj);
-               }
-               gcismapped = true;
+  //unsigned long long ttime = BAMBOO_GET_EXE_TIME();
+#endif
+  int data1 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  if(data1 != gcobj2map) {
+    // obj not matched, something is wrong
+#ifdef DEBUG
+    BAMBOO_DEBUGPRINT_REG(gcobj2map);
+    BAMBOO_DEBUGPRINT_REG(msgdata[1]);
+#endif
+    BAMBOO_EXIT(0xb008);
+  } else {
+    gcmappedobj = msgdata[msgdataindex];                     // [2]
+    MSG_INDEXINC_I();
+    //mgchashReplace_I(msgdata[1], msgdata[2]);
+    //mgchashInsert_I(gcobj2map, gcmappedobj);
+    RuntimeHashadd_I(gcpointertbl, gcobj2map, gcmappedobj);
+    //MGCHashadd_I(gcpointertbl, gcobj2map, gcmappedobj);
+  }
+  gcismapped = true;
 #ifdef GC_PROFILE
-                       //flushstalltime += BAMBOO_GET_EXE_TIME() - ttime;
+  //flushstalltime += BAMBOO_GET_EXE_TIME() - ttime;
 #endif
 }
 
 INLINE void processmsg_gclobjinfo_I() {
-       numconfirm--;
+  numconfirm--;
 
-       int data1 = msgdata[msgdataindex];
-       MSG_INDEXINC_I();
-       int data2 = msgdata[msgdataindex];
-       MSG_INDEXINC_I();
-       if(BAMBOO_NUM_OF_CORE > NUMCORES4GC - 1) {
+  int data1 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  int data2 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  if(BAMBOO_NUM_OF_CORE > NUMCORES4GC - 1) {
 #ifndef CLOSE_PRINT
-               BAMBOO_DEBUGPRINT_REG(data2);
-#endif
-               BAMBOO_EXIT(0xb009);
-       } 
-       // store the mark result info 
-       int cnum = data2;
-       gcloads[cnum] = msgdata[msgdataindex];
-       MSG_INDEXINC_I(); // msgdata[3];
-       int data4 = msgdata[msgdataindex];
-       MSG_INDEXINC_I();
-       if(gcheaptop < data4) {
-               gcheaptop = data4;
-       }
-       // large obj info here
-       for(int k = 5; k < data1;) {
-               int lobj = msgdata[msgdataindex];
-               MSG_INDEXINC_I(); //msgdata[k++];
-               int length = msgdata[msgdataindex];
-               MSG_INDEXINC_I(); //msgdata[k++];
-               gc_lobjenqueue_I(lobj, length, cnum);
-               gcnumlobjs++;
-       } // for(int k = 5; k < msgdata[1];)
+    BAMBOO_DEBUGPRINT_REG(data2);
+#endif
+    BAMBOO_EXIT(0xb009);
+  }
+  // store the mark result info
+  int cnum = data2;
+  gcloads[cnum] = msgdata[msgdataindex];
+  MSG_INDEXINC_I();       // msgdata[3];
+  int data4 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  if(gcheaptop < data4) {
+    gcheaptop = data4;
+  }
+  // large obj info here
+  for(int k = 5; k < data1; ) {
+    int lobj = msgdata[msgdataindex];
+    MSG_INDEXINC_I();             //msgdata[k++];
+    int length = msgdata[msgdataindex];
+    MSG_INDEXINC_I();             //msgdata[k++];
+    gc_lobjenqueue_I(lobj, length, cnum);
+    gcnumlobjs++;
+  }       // for(int k = 5; k < msgdata[1];)
 }
 
 INLINE void processmsg_gclobjmapping_I() {
-       int data1 = msgdata[msgdataindex];
-       MSG_INDEXINC_I();
-       int data2 = msgdata[msgdataindex];
-       MSG_INDEXINC_I();
-       //mgchashInsert_I(msgdata[1], msgdata[2]);
-       RuntimeHashadd_I(gcpointertbl, data1, data2);
-       //MGCHashadd_I(gcpointertbl, msgdata[1], msgdata[2]);
+  int data1 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  int data2 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  //mgchashInsert_I(msgdata[1], msgdata[2]);
+  RuntimeHashadd_I(gcpointertbl, data1, data2);
+  //MGCHashadd_I(gcpointertbl, msgdata[1], msgdata[2]);
 }
 #endif // #ifdef MULTICORE_GC
 
@@ -2379,298 +2440,298 @@ INLINE void processmsg_gclobjmapping_I() {
 //                            otherwise -- received msg type
 int receiveObject(int send_port_pending) {
 msg:
-       // get the incoming msgs
+  // get the incoming msgs
   if(receiveMsg(send_port_pending) == -1) {
-         return -1;
+    return -1;
   }
 processmsg:
-       // processing received msgs
-       int size = 0;
-       MSG_REMAINSIZE_I(&size);
+  // processing received msgs
+  int size = 0;
+  MSG_REMAINSIZE_I(&size);
   if((size == 0) || (checkMsgLength_I(size) == -1)) {
-               // not a whole msg
-               // have new coming msg
-               if(BAMBOO_MSG_AVAIL() != 0) {
-                       goto msg;
-               } else {
-                       return -1;
-               }
-       }
+    // not a whole msg
+    // have new coming msg
+    if(BAMBOO_MSG_AVAIL() != 0) {
+      goto msg;
+    } else {
+      return -1;
+    }
+  }
 
-       if(msglength <= size) {
-               // have some whole msg
-  //if(msgdataindex == msglength) {
+  if(msglength <= size) {
+    // have some whole msg
+    //if(msgdataindex == msglength) {
     // received a whole msg
     MSGTYPE type;
     type = msgdata[msgdataindex]; //[0]
-               MSG_INDEXINC_I();
-               msgdatafull = false;
-               // TODO
-               //tprintf("msg type: %x\n", type);
+    MSG_INDEXINC_I();
+    msgdatafull = false;
+    // TODO
+    //tprintf("msg type: %x\n", type);
     switch(type) {
-                       case TRANSOBJ: {
-                               // receive a object transfer msg
-                               processmsg_transobj_I();
-                               break;
-                       } // case TRANSOBJ
-
-                       case TRANSTALL: {
-                               // receive a stall msg
-                               processmsg_transtall_I();
-                               break;
-                       } // case TRANSTALL
+    case TRANSOBJ: {
+      // receive a object transfer msg
+      processmsg_transobj_I();
+      break;
+    }                     // case TRANSOBJ
+
+    case TRANSTALL: {
+      // receive a stall msg
+      processmsg_transtall_I();
+      break;
+    }                     // case TRANSTALL
 
 // GC version have no lock msgs
 #ifndef MULTICORE_GC
-                       case LOCKREQUEST: {
-                               // receive lock request msg, handle it right now
-                               processmsg_lockrequest_I();
-                               break;
-                       } // case LOCKREQUEST
-
-                       case LOCKGROUNT: {
-                               // receive lock grount msg
-                               processmsg_lockgrount_I();
-                               break;
-                       } // case LOCKGROUNT
-
-                       case LOCKDENY: {
-                               // receive lock deny msg
-                               processmsg_lockdeny_I();
-                               break;
-                       } // case LOCKDENY
-
-                       case LOCKRELEASE: {
-                               processmsg_lockrelease_I();
-                               break;
-                       } // case LOCKRELEASE
+    case LOCKREQUEST: {
+      // receive lock request msg, handle it right now
+      processmsg_lockrequest_I();
+      break;
+    }                     // case LOCKREQUEST
+
+    case LOCKGROUNT: {
+      // receive lock grount msg
+      processmsg_lockgrount_I();
+      break;
+    }                     // case LOCKGROUNT
+
+    case LOCKDENY: {
+      // receive lock deny msg
+      processmsg_lockdeny_I();
+      break;
+    }                     // case LOCKDENY
+
+    case LOCKRELEASE: {
+      processmsg_lockrelease_I();
+      break;
+    }                     // case LOCKRELEASE
 #endif // #ifndef MULTICORE_GC
 
 #ifdef PROFILE
-                       case PROFILEOUTPUT: {
-                               // receive an output profile data request msg
-                               processmsg_profileoutput_I();
-                               break;
-                       } // case PROFILEOUTPUT
-
-                       case PROFILEFINISH: {
-                               // receive a profile output finish msg
-                               processmsg_profilefinish_I();
-                               break;
-                       } // case PROFILEFINISH
+    case PROFILEOUTPUT: {
+      // receive an output profile data request msg
+      processmsg_profileoutput_I();
+      break;
+    }                     // case PROFILEOUTPUT
+
+    case PROFILEFINISH: {
+      // receive a profile output finish msg
+      processmsg_profilefinish_I();
+      break;
+    }                     // case PROFILEFINISH
 #endif // #ifdef PROFILE
 
 // GC version has no lock msgs
 #ifndef MULTICORE_GC
-                       case REDIRECTLOCK: {
-                               // receive a redirect lock request msg, handle it right now
-                               processmsg_redirectlock_I();
-                               break;
-                       } // case REDIRECTLOCK
-
-                       case REDIRECTGROUNT: {
-                               // receive a lock grant msg with redirect info
-                               processmsg_redirectgrount_I();
-                               break;
-                       } // case REDIRECTGROUNT
-                       
-                       case REDIRECTDENY: {
-                               // receive a lock deny msg with redirect info
-                               processmsg_redirectdeny_I();
-                               break;
-                       } // case REDIRECTDENY
-
-                       case REDIRECTRELEASE: {
-                               // receive a lock release msg with redirect info
-                               processmsg_redirectrelease_I();
-                               break;
-                       } // case REDIRECTRELEASE
+    case REDIRECTLOCK: {
+      // receive a redirect lock request msg, handle it right now
+      processmsg_redirectlock_I();
+      break;
+    }                     // case REDIRECTLOCK
+
+    case REDIRECTGROUNT: {
+      // receive a lock grant msg with redirect info
+      processmsg_redirectgrount_I();
+      break;
+    }                     // case REDIRECTGROUNT
+
+    case REDIRECTDENY: {
+      // receive a lock deny msg with redirect info
+      processmsg_redirectdeny_I();
+      break;
+    }                     // case REDIRECTDENY
+
+    case REDIRECTRELEASE: {
+      // receive a lock release msg with redirect info
+      processmsg_redirectrelease_I();
+      break;
+    }                     // case REDIRECTRELEASE
 #endif // #ifndef MULTICORE_GC
-       
-                       case STATUSCONFIRM: {
-                               // receive a status confirm info
-                               processmsg_statusconfirm_I();
-                               break;
-                       } // case STATUSCONFIRM
-
-                       case STATUSREPORT: {
-                               processmsg_statusreport_I();
-                               break;
-                       } // case STATUSREPORT
-
-                       case TERMINATE: {
-                               // receive a terminate msg
-                               processmsg_terminate_I();
-                               break;
-                       } // case TERMINATE
-
-                       case MEMREQUEST: {
-                               processmsg_memrequest_I();
-                               break;
-                       } // case MEMREQUEST
-
-                       case MEMRESPONSE: {
-                               processmsg_memresponse_I();
-                               break;
-                       } // case MEMRESPONSE
+
+    case STATUSCONFIRM: {
+      // receive a status confirm info
+      processmsg_statusconfirm_I();
+      break;
+    }                     // case STATUSCONFIRM
+
+    case STATUSREPORT: {
+      processmsg_statusreport_I();
+      break;
+    }                     // case STATUSREPORT
+
+    case TERMINATE: {
+      // receive a terminate msg
+      processmsg_terminate_I();
+      break;
+    }                     // case TERMINATE
+
+    case MEMREQUEST: {
+      processmsg_memrequest_I();
+      break;
+    }                     // case MEMREQUEST
+
+    case MEMRESPONSE: {
+      processmsg_memresponse_I();
+      break;
+    }                     // case MEMRESPONSE
 
 #ifdef MULTICORE_GC
-                       // GC msgs
-                       case GCSTARTINIT: {
-                               processmsg_gcstartinit_I();
-                               break;
-                       } // case GCSTARTINIT
-
-                       case GCSTART: {
-                               // receive a start GC msg
-                               processmsg_gcstart_I();
-                               break;
-                       } // case GCSTART
-
-                       case GCSTARTCOMPACT: {
-                               // a compact phase start msg
-                               processmsg_gcstartcompact_I();
-                               break;
-                       } // case GCSTARTCOMPACT
-
-                       case GCSTARTFLUSH: {
-                               // received a flush phase start msg
-                               processmsg_gcstartflush_I();
-                               break;
-                       } // case GCSTARTFLUSH
-                       
-                       case GCFINISHINIT: {
-                               processmsg_gcfinishinit_I();
-                               break;
-                       } // case GCFINISHINIT
-
-                       case GCFINISHMARK: {
-                               processmsg_gcfinishmark_I();
-                               break;
-                       } // case GCFINISHMARK
-                       
-                       case GCFINISHCOMPACT: {
-                               // received a compact phase finish msg
-                               processmsg_gcfinishcompact_I();
-                               break;
-                       } // case GCFINISHCOMPACT
-
-                       case GCFINISHFLUSH: {
-                               processmsg_gcfinishflush_I();
-                               break;
-                       } // case GCFINISHFLUSH
-
-                       case GCFINISH: {
-                               // received a GC finish msg
-                               gcphase = FINISHPHASE;
-                               break;
-                       } // case GCFINISH
-
-                       case GCMARKCONFIRM: {
-                               // received a marked phase finish confirm request msg
-                               // all cores should do mark
-                               processmsg_gcmarkconfirm_I();
-                               break;
-                       } // case GCMARKCONFIRM
-
-                       case GCMARKREPORT: {
-                               processmsg_gcmarkreport_I();
-                               break;
-                       } // case GCMARKREPORT
-
-                       case GCMARKEDOBJ: {
-                               processmsg_gcmarkedobj_I();
-                               break;
-                       } // case GCMARKEDOBJ
-
-                       case GCMOVESTART: {
-                               // received a start moving objs msg
-                               processmsg_gcmovestart_I();
-                               break;
-                       } // case GCMOVESTART
-                       
-                       case GCMAPREQUEST: {
-                               // received a mapping info request msg
-                               processmsg_gcmaprequest_I();
-                               break;
-                       } // case GCMAPREQUEST
-
-                       case GCMAPINFO: {
-                               // received a mapping info response msg
-                               processmsg_gcmapinfo_I();
-                               break;
-                       } // case GCMAPINFO
-
-                       case GCLOBJREQUEST: {
-                               // received a large objs info request msg
-                               transferMarkResults_I();
-                               break;
-                       } // case GCLOBJREQUEST
-
-                       case GCLOBJINFO: {
-                               // received a large objs info response msg
-                               processmsg_gclobjinfo_I();
-                               break;
-                       } // case GCLOBJINFO
-                       
-                       case GCLOBJMAPPING: {
-                               // received a large obj mapping info msg
-                               processmsg_gclobjmapping_I();
-                               break;
-                       } // case GCLOBJMAPPING
+    // GC msgs
+    case GCSTARTINIT: {
+      processmsg_gcstartinit_I();
+      break;
+    }                     // case GCSTARTINIT
+
+    case GCSTART: {
+      // receive a start GC msg
+      processmsg_gcstart_I();
+      break;
+    }                     // case GCSTART
+
+    case GCSTARTCOMPACT: {
+      // a compact phase start msg
+      processmsg_gcstartcompact_I();
+      break;
+    }                     // case GCSTARTCOMPACT
+
+    case GCSTARTFLUSH: {
+      // received a flush phase start msg
+      processmsg_gcstartflush_I();
+      break;
+    }                     // case GCSTARTFLUSH
+
+    case GCFINISHINIT: {
+      processmsg_gcfinishinit_I();
+      break;
+    }                     // case GCFINISHINIT
+
+    case GCFINISHMARK: {
+      processmsg_gcfinishmark_I();
+      break;
+    }                     // case GCFINISHMARK
+
+    case GCFINISHCOMPACT: {
+      // received a compact phase finish msg
+      processmsg_gcfinishcompact_I();
+      break;
+    }                     // case GCFINISHCOMPACT
+
+    case GCFINISHFLUSH: {
+      processmsg_gcfinishflush_I();
+      break;
+    }                     // case GCFINISHFLUSH
+
+    case GCFINISH: {
+      // received a GC finish msg
+      gcphase = FINISHPHASE;
+      break;
+    }                     // case GCFINISH
+
+    case GCMARKCONFIRM: {
+      // received a marked phase finish confirm request msg
+      // all cores should do mark
+      processmsg_gcmarkconfirm_I();
+      break;
+    }                     // case GCMARKCONFIRM
+
+    case GCMARKREPORT: {
+      processmsg_gcmarkreport_I();
+      break;
+    }                     // case GCMARKREPORT
+
+    case GCMARKEDOBJ: {
+      processmsg_gcmarkedobj_I();
+      break;
+    }                     // case GCMARKEDOBJ
+
+    case GCMOVESTART: {
+      // received a start moving objs msg
+      processmsg_gcmovestart_I();
+      break;
+    }                     // case GCMOVESTART
+
+    case GCMAPREQUEST: {
+      // received a mapping info request msg
+      processmsg_gcmaprequest_I();
+      break;
+    }                     // case GCMAPREQUEST
+
+    case GCMAPINFO: {
+      // received a mapping info response msg
+      processmsg_gcmapinfo_I();
+      break;
+    }                     // case GCMAPINFO
+
+    case GCLOBJREQUEST: {
+      // received a large objs info request msg
+      transferMarkResults_I();
+      break;
+    }                     // case GCLOBJREQUEST
+
+    case GCLOBJINFO: {
+      // received a large objs info response msg
+      processmsg_gclobjinfo_I();
+      break;
+    }                     // case GCLOBJINFO
+
+    case GCLOBJMAPPING: {
+      // received a large obj mapping info msg
+      processmsg_gclobjmapping_I();
+      break;
+    }                     // case GCLOBJMAPPING
 
 #endif // #ifdef MULTICORE_GC
 
-                       default:
-                               break;
-               } // switch(type)
-               //memset(msgdata, '\0', sizeof(int) * msgdataindex);
-               //msgdataindex = 0;
-               msglength = BAMBOO_MSG_BUF_LENGTH;
-               // TODO
-               //printf("++ msg: %x \n", type);
-               if(msgdataindex != msgdatalast) {
-                       // still have available msg
-                       goto processmsg;
-               }
+    default:
+      break;
+    }             // switch(type)
+                  //memset(msgdata, '\0', sizeof(int) * msgdataindex);
+                  //msgdataindex = 0;
+    msglength = BAMBOO_MSG_BUF_LENGTH;
+    // TODO
+    //printf("++ msg: %x \n", type);
+    if(msgdataindex != msgdatalast) {
+      // still have available msg
+      goto processmsg;
+    }
 #ifdef DEBUG
 #ifndef CLOSE_PRINT
-               BAMBOO_DEBUGPRINT(0xe88d);
+    BAMBOO_DEBUGPRINT(0xe88d);
 #endif
 #endif
 
-               // have new coming msg
-               if(BAMBOO_MSG_AVAIL() != 0) {
-                       goto msg;
-               }
+    // have new coming msg
+    if(BAMBOO_MSG_AVAIL() != 0) {
+      goto msg;
+    }
 
 #ifdef PROFILE
 /*if(isInterrupt) {
-               profileTaskEnd();
-       }*/
+                profileTaskEnd();
+        }*/
 #endif
-               return (int)type;
-       } else {
-               // not a whole msg
+    return (int)type;
+  } else {
+    // not a whole msg
 #ifdef DEBUG
 #ifndef CLOSE_PRINT
-               BAMBOO_DEBUGPRINT(0xe88e);
+    BAMBOO_DEBUGPRINT(0xe88e);
 #endif
 #endif
 #ifdef PROFILE
-       /*  if(isInterrupt) {
-                               profileTaskEnd();
-                       }*/
+    /*  if(isInterrupt) {
+                            profileTaskEnd();
+                    }*/
 #endif
     return -2;
   }
 }
 
-int enqueuetasks(struct parameterwrapper *parameter, 
-                            struct parameterwrapper *prevptr, 
-                                                                struct ___Object___ *ptr, 
-                                                                int * enterflags, 
-                                                                int numenterflags) {
+int enqueuetasks(struct parameterwrapper *parameter,
+                 struct parameterwrapper *prevptr,
+                 struct ___Object___ *ptr,
+                 int * enterflags,
+                 int numenterflags) {
   void * taskpointerarray[MAXTASKPARAMS];
   int j;
   //int numparams=parameter->task->numParameters;
@@ -2679,9 +2740,9 @@ int enqueuetasks(struct parameterwrapper *parameter,
 
   struct taskdescriptor * task=parameter->task;
 
-   //this add the object to parameterwrapper
-   ObjectHashadd(parameter->objectset, (int) ptr, 0, (int) enterflags, 
-                                  numenterflags, enterflags==NULL);
+  //this add the object to parameterwrapper
+  ObjectHashadd(parameter->objectset, (int) ptr, 0, (int) enterflags,
+                numenterflags, enterflags==NULL);
 
   /* Add enqueued object to parameter vector */
   taskpointerarray[parameter->slot]=ptr;
@@ -2711,19 +2772,19 @@ backtrackinit:
     /* Enqueue current state */
     //int launch = 0;
     struct taskparamdescriptor *tpd=
-                       RUNMALLOC(sizeof(struct taskparamdescriptor));
+      RUNMALLOC(sizeof(struct taskparamdescriptor));
     tpd->task=task;
     tpd->numParameters=numiterators+1;
     tpd->parameterArray=RUNMALLOC(sizeof(void *)*(numiterators+1));
 
     for(j=0; j<=numiterators; j++) {
-                       //store the actual parameters
-      tpd->parameterArray[j]=taskpointerarray[j]; 
+      //store the actual parameters
+      tpd->parameterArray[j]=taskpointerarray[j];
     }
     /* Enqueue task */
-    if ((/*!gencontains(failedtasks, tpd)&&*/ 
-                                       !gencontains(activetasks,tpd))) {
-               genputtable(activetasks, tpd, tpd);
+    if (( /*!gencontains(failedtasks, tpd)&&*/
+          !gencontains(activetasks,tpd))) {
+      genputtable(activetasks, tpd, tpd);
     } else {
       RUNFREE(tpd->parameterArray);
       RUNFREE(tpd);
@@ -2751,11 +2812,11 @@ backtrackinc:
   return retval;
 }
 
-int enqueuetasks_I(struct parameterwrapper *parameter, 
-                              struct parameterwrapper *prevptr, 
-                                                                        struct ___Object___ *ptr, 
-                                                                        int * enterflags, 
-                                                                        int numenterflags) {
+int enqueuetasks_I(struct parameterwrapper *parameter,
+                   struct parameterwrapper *prevptr,
+                   struct ___Object___ *ptr,
+                   int * enterflags,
+                   int numenterflags) {
   void * taskpointerarray[MAXTASKPARAMS];
   int j;
   //int numparams=parameter->task->numParameters;
@@ -2766,9 +2827,9 @@ int enqueuetasks_I(struct parameterwrapper *parameter,
 
   struct taskdescriptor * task=parameter->task;
 
-   //this add the object to parameterwrapper
-   ObjectHashadd_I(parameter->objectset, (int) ptr, 0, (int) enterflags, 
-                                    numenterflags, enterflags==NULL);  
+  //this add the object to parameterwrapper
+  ObjectHashadd_I(parameter->objectset, (int) ptr, 0, (int) enterflags,
+                  numenterflags, enterflags==NULL);
 
   /* Add enqueued object to parameter vector */
   taskpointerarray[parameter->slot]=ptr;
@@ -2798,19 +2859,19 @@ backtrackinit:
     /* Enqueue current state */
     //int launch = 0;
     struct taskparamdescriptor *tpd=
-                       RUNMALLOC_I(sizeof(struct taskparamdescriptor));
+      RUNMALLOC_I(sizeof(struct taskparamdescriptor));
     tpd->task=task;
     tpd->numParameters=numiterators+1;
     tpd->parameterArray=RUNMALLOC_I(sizeof(void *)*(numiterators+1));
 
     for(j=0; j<=numiterators; j++) {
-                       //store the actual parameters
-      tpd->parameterArray[j]=taskpointerarray[j]; 
+      //store the actual parameters
+      tpd->parameterArray[j]=taskpointerarray[j];
     }
     /* Enqueue task */
-    if ((/*!gencontains(failedtasks, tpd)&&*/ 
-                                       !gencontains(activetasks,tpd))) {
-               genputtable_I(activetasks, tpd, tpd);
+    if (( /*!gencontains(failedtasks, tpd)&&*/
+          !gencontains(activetasks,tpd))) {
+      genputtable_I(activetasks, tpd, tpd);
     } else {
       RUNFREE(tpd->parameterArray);
       RUNFREE(tpd);
@@ -2844,8 +2905,8 @@ backtrackinc:
 #define OFFSET 0
 #endif
 
-int containstag(struct ___Object___ *ptr, 
-                           struct ___TagDescriptor___ *tag);
+int containstag(struct ___Object___ *ptr,
+                struct ___TagDescriptor___ *tag);
 
 #ifndef MULTICORE_GC
 void releasewritelock_r(void * lock, void * redirectlock) {
@@ -2861,9 +2922,9 @@ void releasewritelock_r(void * lock, void * redirectlock) {
 #endif
 
   if(targetcore == BAMBOO_NUM_OF_CORE) {
-               BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
+    BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
 #ifdef DEBUG
-       BAMBOO_DEBUGPRINT(0xf001);
+    BAMBOO_DEBUGPRINT(0xf001);
 #endif
     // reside on this core
     if(!RuntimeHashcontainskey(locktbl, reallock)) {
@@ -2871,31 +2932,31 @@ void releasewritelock_r(void * lock, void * redirectlock) {
       BAMBOO_EXIT(0xa011);
     } else {
       int rwlock_obj = 0;
-         struct LockValue * lockvalue = NULL;
+      struct LockValue * lockvalue = NULL;
 #ifdef DEBUG
       BAMBOO_DEBUGPRINT(0xe672);
 #endif
       RuntimeHashget(locktbl, reallock, &rwlock_obj);
-         lockvalue = (struct LockValue *)rwlock_obj;
+      lockvalue = (struct LockValue *)rwlock_obj;
 #ifdef DEBUG
       BAMBOO_DEBUGPRINT_REG(lockvalue->value);
 #endif
       lockvalue->value++;
-         lockvalue->redirectlock = (int)redirectlock;
+      lockvalue->redirectlock = (int)redirectlock;
 #ifdef DEBUG
       BAMBOO_DEBUGPRINT_REG(lockvalue->value);
 #endif
     }
-               BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+    BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 #ifdef DEBUG
-       BAMBOO_DEBUGPRINT(0xf000);
+    BAMBOO_DEBUGPRINT(0xf000);
 #endif
     return;
   } else {
-         // send lock release with redirect info msg
-         // for 32 bit machine, the size is always 4 words
-               send_msg_4(targetcore, REDIRECTRELEASE, 1, (int)lock, 
-                                      (int)redirectlock);
+    // send lock release with redirect info msg
+    // for 32 bit machine, the size is always 4 words
+    send_msg_4(targetcore, REDIRECTRELEASE, 1, (int)lock,
+               (int)redirectlock);
   }
 }
 #endif
@@ -2918,7 +2979,7 @@ void executetasks() {
 newtask:
   while(hashsize(activetasks)>0) {
 #ifdef MULTICORE_GC
-               gc(NULL);
+    gc(NULL);
 #endif
 #ifdef DEBUG
     BAMBOO_DEBUGPRINT(0xe990);
@@ -2926,349 +2987,359 @@ newtask:
 
     /* See if there are any active tasks */
     //if (hashsize(activetasks)>0) {
-      int i;
+    int i;
 #ifdef PROFILE
 #ifdef ACCURATEPROFILE
-         profileTaskStart("tpd checking");
-#endif
-#endif
-         //long clock1;
-         //clock1 = BAMBOO_GET_EXE_TIME();
-
-         busystatus = true;
-               currtpd=(struct taskparamdescriptor *) getfirstkey(activetasks);
-               genfreekey(activetasks, currtpd);
-
-               numparams=currtpd->task->numParameters;
-               numtotal=currtpd->task->numTotal;
-
-         // clear the lockRedirectTbl 
-               // (TODO, this table should be empty after all locks are released)
-         // reset all locks
-         /*for(j = 0; j < MAXTASKPARAMS; j++) {
-                 runtime_locks[j].redirectlock = 0;
-                 runtime_locks[j].value = 0;
-         }*/
-         // get all required locks
-         runtime_locklen = 0;
-         // check which locks are needed
-         for(i = 0; i < numparams; i++) {
-                 void * param = currtpd->parameterArray[i];
-                 int tmplock = 0;
-                 int j = 0;
-                 bool insert = true;
-                 if(((struct ___Object___ *)param)->type == STARTUPTYPE) {
-                         islock = false;
-                         taskpointerarray[i+OFFSET]=param;
-                         goto execute;
-                 }
-                 if(((struct ___Object___ *)param)->lock == NULL) {
-                         tmplock = (int)param;
-                 } else {
-                         tmplock = (int)(((struct ___Object___ *)param)->lock);
-                 }
-                 // insert into the locks array
-                 for(j = 0; j < runtime_locklen; j++) {
-                         if(runtime_locks[j].value == tmplock) {
-                                 insert = false;
-                                 break;
-                         } else if(runtime_locks[j].value > tmplock) {
-                                 break;
-                         }
-                 }
-                 if(insert) {
-                         int h = runtime_locklen;
-                         for(; h > j; h--) {
-                                 runtime_locks[h].redirectlock = runtime_locks[h-1].redirectlock;
-                                 runtime_locks[h].value = runtime_locks[h-1].value;
-                         }
-                         runtime_locks[j].value = tmplock;
-                         runtime_locks[j].redirectlock = (int)param;
-                         runtime_locklen++;
-                 }               
-         } // line 2713: for(i = 0; i < numparams; i++) 
-         // grab these required locks
-#ifdef DEBUG
-         BAMBOO_DEBUGPRINT(0xe991);
-#endif
-         //long clock2;
-         //clock2 = BAMBOO_GET_EXE_TIME();
-
-         for(i = 0; i < runtime_locklen; i++) {
-                 int * lock = (int *)(runtime_locks[i].redirectlock);
-                 islock = true;
-                 // require locks for this parameter if it is not a startup object
-#ifdef DEBUG
-                 BAMBOO_DEBUGPRINT_REG((int)lock);
-                 BAMBOO_DEBUGPRINT_REG((int)(runtime_locks[i].value));
-#endif
-                 getwritelock(lock);
-                       BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
-#ifdef DEBUG
-                 BAMBOO_DEBUGPRINT(0xf001);
+    profileTaskStart("tpd checking");
+#endif
+#endif
+    //long clock1;
+    //clock1 = BAMBOO_GET_EXE_TIME();
+
+    busystatus = true;
+    currtpd=(struct taskparamdescriptor *) getfirstkey(activetasks);
+    genfreekey(activetasks, currtpd);
+
+    numparams=currtpd->task->numParameters;
+    numtotal=currtpd->task->numTotal;
+
+    // clear the lockRedirectTbl
+    // (TODO, this table should be empty after all locks are released)
+    // reset all locks
+    /*for(j = 0; j < MAXTASKPARAMS; j++) {
+            runtime_locks[j].redirectlock = 0;
+            runtime_locks[j].value = 0;
+       }*/
+    // get all required locks
+    runtime_locklen = 0;
+    // check which locks are needed
+    for(i = 0; i < numparams; i++) {
+      void * param = currtpd->parameterArray[i];
+      int tmplock = 0;
+      int j = 0;
+      bool insert = true;
+      if(((struct ___Object___ *)param)->type == STARTUPTYPE) {
+       islock = false;
+       taskpointerarray[i+OFFSET]=param;
+       goto execute;
+      }
+      if(((struct ___Object___ *)param)->lock == NULL) {
+       tmplock = (int)param;
+      } else {
+       tmplock = (int)(((struct ___Object___ *)param)->lock);
+      }
+      // insert into the locks array
+      for(j = 0; j < runtime_locklen; j++) {
+       if(runtime_locks[j].value == tmplock) {
+         insert = false;
+         break;
+       } else if(runtime_locks[j].value > tmplock) {
+         break;
+       }
+      }
+      if(insert) {
+       int h = runtime_locklen;
+       for(; h > j; h--) {
+         runtime_locks[h].redirectlock = runtime_locks[h-1].redirectlock;
+         runtime_locks[h].value = runtime_locks[h-1].value;
+       }
+       runtime_locks[j].value = tmplock;
+       runtime_locks[j].redirectlock = (int)param;
+       runtime_locklen++;
+      }
+    }       // line 2713: for(i = 0; i < numparams; i++)
+            // grab these required locks
+#ifdef DEBUG
+    BAMBOO_DEBUGPRINT(0xe991);
+#endif
+    //long clock2;
+    //clock2 = BAMBOO_GET_EXE_TIME();
+
+    for(i = 0; i < runtime_locklen; i++) {
+      int * lock = (int *)(runtime_locks[i].redirectlock);
+      islock = true;
+      // require locks for this parameter if it is not a startup object
+#ifdef DEBUG
+      BAMBOO_DEBUGPRINT_REG((int)lock);
+      BAMBOO_DEBUGPRINT_REG((int)(runtime_locks[i].value));
+#endif
+      getwritelock(lock);
+      BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
+#ifdef DEBUG
+      BAMBOO_DEBUGPRINT(0xf001);
 #endif
 #ifdef PROFILE
-                 //isInterrupt = false;
-#endif 
-                 while(!lockflag) { 
-                         BAMBOO_WAITING_FOR_LOCK(0);
-                 }
+      //isInterrupt = false;
+#endif
+      while(!lockflag) {
+       BAMBOO_WAITING_FOR_LOCK(0);
+       // check for outgoing sends
+    if (isMsgHanging) {
+      extern inline void send_hanging_msg(bool);
+      send_hanging_msg(true);
+    } 
+         }
 #ifndef INTERRUPT
-                 if(reside) {
-                         while(BAMBOO_WAITING_FOR_LOCK() != -1) {
-                         }
-                 }
+      if(reside) {
+       while(BAMBOO_WAITING_FOR_LOCK(0) != -1) {
+         // check for outgoing sends
+         if (isMsgHanging) {
+               extern inline void send_hanging_msg(bool);
+               send_hanging_msg(true);
+         } 
+       }
+      }
 #endif
-                 grount = lockresult;
+      grount = lockresult;
 
-                 lockresult = 0;
-                 lockobj = 0;
-                 lock2require = 0;
-                 lockflag = false;
+      lockresult = 0;
+      lockobj = 0;
+      lock2require = 0;
+      lockflag = false;
 #ifndef INTERRUPT
-                 reside = false;
+      reside = false;
 #endif
 #ifdef PROFILE
-                 //isInterrupt = true;
+      //isInterrupt = true;
 #endif
-                 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+      BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 #ifdef DEBUG
-                 BAMBOO_DEBUGPRINT(0xf000);
+      BAMBOO_DEBUGPRINT(0xf000);
 #endif
 
-                 if(grount == 0) {
+      if(grount == 0) {
 #ifdef DEBUG
-                         BAMBOO_DEBUGPRINT(0xe992);
-                               BAMBOO_DEBUGPRINT_REG(lock);
+       BAMBOO_DEBUGPRINT(0xe992);
+       BAMBOO_DEBUGPRINT_REG(lock);
 #endif
-                               // check if has the lock already
-                         // can not get the lock, try later
-                         // release all grabbed locks for previous parameters
-                         for(j = 0; j < i; ++j) { 
-                                 lock = (int*)(runtime_locks[j].redirectlock);
-                                 releasewritelock(lock);
-                         }
-                         genputtable(activetasks, currtpd, currtpd);
-                         if(hashsize(activetasks) == 1) {
-                                 // only one task right now, wait a little while before next try
-                                 int halt = 10000;
-                                 while(halt--) {
-                                 }
-                         }
+       // check if has the lock already
+       // can not get the lock, try later
+       // release all grabbed locks for previous parameters
+       for(j = 0; j < i; ++j) {
+         lock = (int*)(runtime_locks[j].redirectlock);
+         releasewritelock(lock);
+       }
+       genputtable(activetasks, currtpd, currtpd);
+       if(hashsize(activetasks) == 1) {
+         // only one task right now, wait a little while before next try
+         int halt = 10000;
+         while(halt--) {
+         }
+       }
 #ifdef PROFILE
 #ifdef ACCURATEPROFILE
-                         // fail, set the end of the checkTaskInfo
-                         profileTaskEnd();
+       // fail, set the end of the checkTaskInfo
+       profileTaskEnd();
 #endif
 #endif
-                         goto newtask;
-                               //}
-                 }
-         } // line 2752:  for(i = 0; i < runtime_locklen; i++)
+       goto newtask;
+       //}
+      }
+    }       // line 2752:  for(i = 0; i < runtime_locklen; i++)
 
-         /*long clock3;
-         clock3 = BAMBOO_GET_EXE_TIME();
-         //tprintf("sort: %d, grab: %d \n", clock2-clock1, clock3-clock2);*/
+    /*long clock3;
+       clock3 = BAMBOO_GET_EXE_TIME();
+       //tprintf("sort: %d, grab: %d \n", clock2-clock1, clock3-clock2);*/
 
 #ifdef DEBUG
-       BAMBOO_DEBUGPRINT(0xe993);
+    BAMBOO_DEBUGPRINT(0xe993);
 #endif
-      /* Make sure that the parameters are still in the queues */
-      for(i=0; i<numparams; i++) {
-       void * parameter=currtpd->parameterArray[i];
+    /* Make sure that the parameters are still in the queues */
+    for(i=0; i<numparams; i++) {
+      void * parameter=currtpd->parameterArray[i];
 
-       // flush the object
+      // flush the object
 #ifdef CACHEFLUSH
-       BAMBOO_CACHE_FLUSH_RANGE((int)parameter, 
-                       classsize[((struct ___Object___ *)parameter)->type]);
+      BAMBOO_CACHE_FLUSH_RANGE((int)parameter,
+                               classsize[((struct ___Object___ *)parameter)->type]);
 #endif
-       tmpparam = (struct ___Object___ *)parameter;
-       pd=currtpd->task->descriptorarray[i];
-       pw=(struct parameterwrapper *) pd->queue;
-       /* Check that object is still in queue */
-       {
-         if (!ObjectHashcontainskey(pw->objectset, (int) parameter)) {
+      tmpparam = (struct ___Object___ *)parameter;
+      pd=currtpd->task->descriptorarray[i];
+      pw=(struct parameterwrapper *) pd->queue;
+      /* Check that object is still in queue */
+      {
+       if (!ObjectHashcontainskey(pw->objectset, (int) parameter)) {
 #ifdef DEBUG
-           BAMBOO_DEBUGPRINT(0xe994);
-                       BAMBOO_DEBUGPRINT_REG(parameter);
+         BAMBOO_DEBUGPRINT(0xe994);
+         BAMBOO_DEBUGPRINT_REG(parameter);
 #endif
-           // release grabbed locks
-           for(j = 0; j < runtime_locklen; ++j) {
-               int * lock = (int *)(runtime_locks[j].redirectlock);
-               releasewritelock(lock);
-           }
-           RUNFREE(currtpd->parameterArray);
-           RUNFREE(currtpd);
-                       currtpd = NULL;
-           goto newtask;
+         // release grabbed locks
+         for(j = 0; j < runtime_locklen; ++j) {
+           int * lock = (int *)(runtime_locks[j].redirectlock);
+           releasewritelock(lock);
          }
-       } // line2865
-       /* Check if the object's flags still meets requirements */
-       {
-         int tmpi = 0;
-         bool ismet = false;
-         for(tmpi = 0; tmpi < pw->numberofterms; ++tmpi) {
-           andmask=pw->intarray[tmpi*2];
-           checkmask=pw->intarray[tmpi*2+1];
-           if((((struct ___Object___ *)parameter)->flag&andmask)==checkmask) {
-             ismet = true;
-             break;
-           }
+         RUNFREE(currtpd->parameterArray);
+         RUNFREE(currtpd);
+         currtpd = NULL;
+         goto newtask;
+       }
+      }   // line2865
+          /* Check if the object's flags still meets requirements */
+      {
+       int tmpi = 0;
+       bool ismet = false;
+       for(tmpi = 0; tmpi < pw->numberofterms; ++tmpi) {
+         andmask=pw->intarray[tmpi*2];
+         checkmask=pw->intarray[tmpi*2+1];
+         if((((struct ___Object___ *)parameter)->flag&andmask)==checkmask) {
+           ismet = true;
+           break;
          }
-         if (!ismet) {
-           // flags are never suitable
-           // remove this obj from the queue
-           int next;
-           int UNUSED, UNUSED2;
-           int * enterflags;
-#ifdef DEBUG
-           BAMBOO_DEBUGPRINT(0xe995);
-                       BAMBOO_DEBUGPRINT_REG(parameter);
-#endif
-           ObjectHashget(pw->objectset, (int) parameter, (int *) &next, 
-                                                 (int *) &enterflags, &UNUSED, &UNUSED2);
-           ObjectHashremove(pw->objectset, (int)parameter);
-           if (enterflags!=NULL)
-             RUNFREE(enterflags);
-           // release grabbed locks
-           for(j = 0; j < runtime_locklen; ++j) {
-                int * lock = (int *)(runtime_locks[j].redirectlock);
-               releasewritelock(lock);
-           }
-           RUNFREE(currtpd->parameterArray);
-           RUNFREE(currtpd);
-                       currtpd = NULL;
+       }
+       if (!ismet) {
+         // flags are never suitable
+         // remove this obj from the queue
+         int next;
+         int UNUSED, UNUSED2;
+         int * enterflags;
+#ifdef DEBUG
+         BAMBOO_DEBUGPRINT(0xe995);
+         BAMBOO_DEBUGPRINT_REG(parameter);
+#endif
+         ObjectHashget(pw->objectset, (int) parameter, (int *) &next,
+                       (int *) &enterflags, &UNUSED, &UNUSED2);
+         ObjectHashremove(pw->objectset, (int)parameter);
+         if (enterflags!=NULL)
+           RUNFREE(enterflags);
+         // release grabbed locks
+         for(j = 0; j < runtime_locklen; ++j) {
+           int * lock = (int *)(runtime_locks[j].redirectlock);
+           releasewritelock(lock);
+         }
+         RUNFREE(currtpd->parameterArray);
+         RUNFREE(currtpd);
+         currtpd = NULL;
 #ifdef PROFILE
 #ifdef ACCURATEPROFILE
-           // fail, set the end of the checkTaskInfo
-               profileTaskEnd();
+         // fail, set the end of the checkTaskInfo
+         profileTaskEnd();
 #endif
 #endif
-           goto newtask;
-         } // line 2878: if (!ismet)
-       } // line 2867
+         goto newtask;
+       }   // line 2878: if (!ismet)
+      }   // line 2867
 parameterpresent:
-       ;
-       /* Check that object still has necessary tags */
-       for(j=0; j<pd->numbertags; j++) {
-         int slotid=pd->tagarray[2*j]+numparams;
-         struct ___TagDescriptor___ *tagd=currtpd->parameterArray[slotid];
-         if (!containstag(parameter, tagd)) {
+      ;
+      /* Check that object still has necessary tags */
+      for(j=0; j<pd->numbertags; j++) {
+       int slotid=pd->tagarray[2*j]+numparams;
+       struct ___TagDescriptor___ *tagd=currtpd->parameterArray[slotid];
+       if (!containstag(parameter, tagd)) {
 #ifdef DEBUG
-           BAMBOO_DEBUGPRINT(0xe996);
+         BAMBOO_DEBUGPRINT(0xe996);
 #endif
-               {
-               // release grabbed locks
-               int tmpj = 0;
+         {
+           // release grabbed locks
+           int tmpj = 0;
            for(tmpj = 0; tmpj < runtime_locklen; ++tmpj) {
-                int * lock = (int *)(runtime_locks[tmpj].redirectlock);
-               releasewritelock(lock);
+             int * lock = (int *)(runtime_locks[tmpj].redirectlock);
+             releasewritelock(lock);
            }
-               }
-           RUNFREE(currtpd->parameterArray);
-           RUNFREE(currtpd);
-                       currtpd = NULL;
-           goto newtask;
-         } // line2911: if (!containstag(parameter, tagd))
-       } // line 2808: for(j=0; j<pd->numbertags; j++)
-
-       taskpointerarray[i+OFFSET]=parameter;
-      } // line 2824: for(i=0; i<numparams; i++)
-      /* Copy the tags */
-      for(; i<numtotal; i++) {
-       taskpointerarray[i+OFFSET]=currtpd->parameterArray[i];
-      }
+         }
+         RUNFREE(currtpd->parameterArray);
+         RUNFREE(currtpd);
+         currtpd = NULL;
+         goto newtask;
+       }   // line2911: if (!containstag(parameter, tagd))
+      }   // line 2808: for(j=0; j<pd->numbertags; j++)
+
+      taskpointerarray[i+OFFSET]=parameter;
+    }   // line 2824: for(i=0; i<numparams; i++)
+        /* Copy the tags */
+    for(; i<numtotal; i++) {
+      taskpointerarray[i+OFFSET]=currtpd->parameterArray[i];
+    }
 
-      {
+    {
 execute:
-         /* Actually call task */
+      /* Actually call task */
 #ifdef MULTICORE_GC
-         ((int *)taskpointerarray)[0]=currtpd->numParameters;
-         taskpointerarray[1]=NULL;
+      ((int *)taskpointerarray)[0]=currtpd->numParameters;
+      taskpointerarray[1]=NULL;
 #endif
 #ifdef PROFILE
 #ifdef ACCURATEPROFILE
-         // check finish, set the end of the checkTaskInfo
-         profileTaskEnd();
+      // check finish, set the end of the checkTaskInfo
+      profileTaskEnd();
 #endif
-         profileTaskStart(currtpd->task->name);
+      profileTaskStart(currtpd->task->name);
 #endif
-         // TODO
-         //long clock4;
-         //clock4 = BAMBOO_GET_EXE_TIME();
-         //tprintf("sort: %d, grab: %d, check: %d \n", (int)(clock2-clock1), (int)(clock3-clock2), (int)(clock4-clock3));
+      // TODO
+      //long clock4;
+      //clock4 = BAMBOO_GET_EXE_TIME();
+      //tprintf("sort: %d, grab: %d, check: %d \n", (int)(clock2-clock1), (int)(clock3-clock2), (int)(clock4-clock3));
 
 #ifdef DEBUG
-               BAMBOO_DEBUGPRINT(0xe997);
+      BAMBOO_DEBUGPRINT(0xe997);
 #endif
-               ((void(*) (void **))currtpd->task->taskptr)(taskpointerarray);
-               // TODO
-               //long clock5;
-         //clock5 = BAMBOO_GET_EXE_TIME();
-        // tprintf("sort: %d, grab: %d, check: %d \n", (int)(clock2-clock1), (int)(clock3-clock2), (int)(clock4-clock3));
+      ((void (*)(void **))currtpd->task->taskptr)(taskpointerarray);
+      // TODO
+      //long clock5;
+      //clock5 = BAMBOO_GET_EXE_TIME();
+      // tprintf("sort: %d, grab: %d, check: %d \n", (int)(clock2-clock1), (int)(clock3-clock2), (int)(clock4-clock3));
 
 #ifdef PROFILE
 #ifdef ACCURATEPROFILE
-         // task finish, set the end of the checkTaskInfo
-         profileTaskEnd();
-         // new a PostTaskInfo for the post-task execution
-         profileTaskStart("post task execution");
+      // task finish, set the end of the checkTaskInfo
+      profileTaskEnd();
+      // new a PostTaskInfo for the post-task execution
+      profileTaskStart("post task execution");
 #endif
 #endif
 #ifdef DEBUG
-         BAMBOO_DEBUGPRINT(0xe998);
-         BAMBOO_DEBUGPRINT_REG(islock);
+      BAMBOO_DEBUGPRINT(0xe998);
+      BAMBOO_DEBUGPRINT_REG(islock);
 #endif
 
-         if(islock) {
+      if(islock) {
 #ifdef DEBUG
-                 BAMBOO_DEBUGPRINT(0xe999);
-#endif 
-           for(i = 0; i < runtime_locklen; ++i) {
-                               void * ptr = (void *)(runtime_locks[i].redirectlock);
-             int * lock = (int *)(runtime_locks[i].value);
+       BAMBOO_DEBUGPRINT(0xe999);
+#endif
+       for(i = 0; i < runtime_locklen; ++i) {
+         void * ptr = (void *)(runtime_locks[i].redirectlock);
+         int * lock = (int *)(runtime_locks[i].value);
 #ifdef DEBUG
-                 BAMBOO_DEBUGPRINT_REG((int)ptr);
-                 BAMBOO_DEBUGPRINT_REG((int)lock);
-                       BAMBOO_DEBUGPRINT_REG(*((int*)lock+5));
+         BAMBOO_DEBUGPRINT_REG((int)ptr);
+         BAMBOO_DEBUGPRINT_REG((int)lock);
+         BAMBOO_DEBUGPRINT_REG(*((int*)lock+5));
 #endif
 #ifndef MULTICORE_GC
-                 if(RuntimeHashcontainskey(lockRedirectTbl, (int)lock)) {
-                         int redirectlock;
-                         RuntimeHashget(lockRedirectTbl, (int)lock, &redirectlock);
-                         RuntimeHashremovekey(lockRedirectTbl, (int)lock);
-                         releasewritelock_r(lock, (int *)redirectlock);
-                 } else {
+         if(RuntimeHashcontainskey(lockRedirectTbl, (int)lock)) {
+           int redirectlock;
+           RuntimeHashget(lockRedirectTbl, (int)lock, &redirectlock);
+           RuntimeHashremovekey(lockRedirectTbl, (int)lock);
+           releasewritelock_r(lock, (int *)redirectlock);
+         } else {
 #else
-                               {
+         {
 #endif
-               releasewritelock(ptr);
-                 }
-           }
-         } // line 3015: if(islock)
+           releasewritelock(ptr);
+         }
+       }
+      }     // line 3015: if(islock)
 
-               //long clock6;
-         //clock6 = BAMBOO_GET_EXE_TIME();
-         //tprintf("sort: %d, grab: %d, check: %d \n", (int)(clock2-clock1), (int)(clock3-clock2), (int)(clock4-clock3));
+      //long clock6;
+      //clock6 = BAMBOO_GET_EXE_TIME();
+      //tprintf("sort: %d, grab: %d, check: %d \n", (int)(clock2-clock1), (int)(clock3-clock2), (int)(clock4-clock3));
 
 #ifdef PROFILE
-         // post task execution finish, set the end of the postTaskInfo
-         profileTaskEnd();
+      // post task execution finish, set the end of the postTaskInfo
+      profileTaskEnd();
 #endif
 
-         // Free up task parameter descriptor
-         RUNFREE(currtpd->parameterArray);
-         RUNFREE(currtpd);
-               currtpd = NULL;
+      // Free up task parameter descriptor
+      RUNFREE(currtpd->parameterArray);
+      RUNFREE(currtpd);
+      currtpd = NULL;
 #ifdef DEBUG
-         BAMBOO_DEBUGPRINT(0xe99a);
+      BAMBOO_DEBUGPRINT(0xe99a);
 #endif
-         //long clock7;
-         //clock7 = BAMBOO_GET_EXE_TIME();
-         //tprintf("sort: %d, grab: %d, check: %d, release: %d, other %d \n", (int)(clock2-clock1), (int)(clock3-clock2), (int)(clock4-clock3), (int)(clock6-clock5), (int)(clock7-clock6));
+      //long clock7;
+      //clock7 = BAMBOO_GET_EXE_TIME();
+      //tprintf("sort: %d, grab: %d, check: %d, release: %d, other %d \n", (int)(clock2-clock1), (int)(clock3-clock2), (int)(clock4-clock3), (int)(clock6-clock5), (int)(clock7-clock6));
 
-      } //  
-    //} //  if (hashsize(activetasks)>0)  
+    }   //
+    //} //  if (hashsize(activetasks)>0)
   } //  while(hashsize(activetasks)>0)
 #ifdef DEBUG
   BAMBOO_DEBUGPRINT(0xe99b);
@@ -3276,12 +3347,12 @@ execute:
 }
 
 /* This function processes an objects tags */
-void processtags(struct parameterdescriptor *pd, 
-                            int index, 
-                                                                struct parameterwrapper *parameter, 
-                                                                int * iteratorcount, 
-                                                                int *statusarray, 
-                                                                int numparams) {
+void processtags(struct parameterdescriptor *pd,
+                 int index,
+                 struct parameterwrapper *parameter,
+                 int * iteratorcount,
+                 int *statusarray,
+                 int numparams) {
   int i;
 
   for(i=0; i<pd->numbertags; i++) {
@@ -3300,16 +3371,16 @@ void processtags(struct parameterdescriptor *pd,
 }
 
 
-void processobject(struct parameterwrapper *parameter, 
-                              int index, 
-                                                                        struct parameterdescriptor *pd, 
-                                                                        int *iteratorcount, 
-                                                                        int * statusarray, 
-                                                                        int numparams) {
+void processobject(struct parameterwrapper *parameter,
+                   int index,
+                   struct parameterdescriptor *pd,
+                   int *iteratorcount,
+                   int * statusarray,
+                   int numparams) {
   int i;
   int tagcount=0;
   struct ObjectHash * objectset=
-               ((struct parameterwrapper *)pd->queue)->objectset;
+    ((struct parameterwrapper *)pd->queue)->objectset;
 
   parameter->iterators[*iteratorcount].istag=0;
   parameter->iterators[*iteratorcount].slot=index;
@@ -3322,7 +3393,7 @@ void processobject(struct parameterwrapper *parameter,
     if (statusarray[slotid+numparams]!=0) {
       /* This tag has already been enqueued, use it to narrow search */
       parameter->iterators[*iteratorcount].tagbindings[tagcount]=
-                               slotid+numparams;
+        slotid+numparams;
       tagcount++;
     }
   }
@@ -3333,9 +3404,9 @@ void processobject(struct parameterwrapper *parameter,
 
 /* This function builds the iterators for a task & parameter */
 
-void builditerators(struct taskdescriptor * task, 
-                               int index, 
-                                                                               struct parameterwrapper * parameter) {
+void builditerators(struct taskdescriptor * task,
+                    int index,
+                    struct parameterwrapper * parameter) {
   int statusarray[MAXTASKPARAMS];
   int i;
   int numparams=task->numParameters;
@@ -3345,8 +3416,8 @@ void builditerators(struct taskdescriptor * task,
   statusarray[index]=1; /* Initial parameter */
   /* Process tags for initial iterator */
 
-  processtags(task->descriptorarray[index], index, parameter, 
-                               &iteratorcount, statusarray, numparams);
+  processtags(task->descriptorarray[index], index, parameter,
+              &iteratorcount, statusarray, numparams);
 
   while(1) {
 loopstart:
@@ -3358,8 +3429,8 @@ loopstart:
        for(j=0; j<pd->numbertags; j++) {
          int slotid=pd->tagarray[2*j];
          if(statusarray[slotid+numparams]!=0) {
-           processobject(parameter, i, pd, &iteratorcount, statusarray, 
-                                                 numparams);
+           processobject(parameter, i, pd, &iteratorcount, statusarray,
+                         numparams);
            processtags(pd, i, parameter, &iteratorcount, statusarray, numparams);
            goto loopstart;
          }
@@ -3404,8 +3475,8 @@ void printdebug() {
   }
   for(i=0; i<numtasks[BAMBOO_NUM_OF_CORE]; i++) {
     struct taskdescriptor * task=taskarray[BAMBOO_NUM_OF_CORE][i];
-#ifndef RAW 
-       printf("%s\n", task->name);
+#ifndef RAW
+    printf("%s\n", task->name);
 #endif
     for(j=0; j<task->numParameters; j++) {
       struct parameterdescriptor *param=task->descriptorarray[j];
@@ -3413,7 +3484,7 @@ void printdebug() {
       struct ObjectHash * set=parameter->objectset;
       struct ObjectIterator objit;
 #ifndef RAW
-         printf("  Parameter %d\n", j);
+      printf("  Parameter %d\n", j);
 #endif
       ObjectHashiterator(set, &objit);
       while(ObjhasNext(&objit)) {
@@ -3439,10 +3510,10 @@ void printdebug() {
          struct ArrayObject *ao=(struct ArrayObject *)tagptr;
          for(; tagindex<ao->___cachedCode___; tagindex++) {
 #ifndef RAW
-                 printf("      tag=%lx\n",ARRAYGET(ao, struct ___TagDescriptor___*, 
-                                                tagindex));
+           printf("      tag=%lx\n",ARRAYGET(ao, struct ___TagDescriptor___*,
+                                             tagindex));
 #else
-                 ;
+           ;
 #endif
          }
        }
@@ -3491,8 +3562,8 @@ void toiReset(struct tagobjectiterator * it) {
   }
 }
 
-int toiHasNext(struct tagobjectiterator *it, 
-                          void ** objectarray OPTARG(int * failed)) {
+int toiHasNext(struct tagobjectiterator *it,
+               void ** objectarray OPTARG(int * failed)) {
   if (it->istag) {
     /* Iterate tag */
     /* Get object with tags */
@@ -3509,7 +3580,7 @@ int toiHasNext(struct tagobjectiterator *it,
       int tagindex=it->tagobjindex;
       for(; tagindex<ao->___cachedCode___; tagindex++) {
        struct ___TagDescriptor___ *td=
-               ARRAYGET(ao, struct ___TagDescriptor___ *, tagindex);
+         ARRAYGET(ao, struct ___TagDescriptor___ *, tagindex);
        if (td->flag==it->tagid) {
          it->tagobjindex=tagindex; /* Found right type of tag */
          return 1;
@@ -3537,7 +3608,7 @@ int toiHasNext(struct tagobjectiterator *it,
       struct ArrayObject *ao=(struct ArrayObject *) objptr;
       int tagindex;
       int i;
-      for(tagindex=it->tagobjindex;tagindex<ao->___cachedCode___;tagindex++) {
+      for(tagindex=it->tagobjindex; tagindex<ao->___cachedCode___; tagindex++) {
        struct ___Object___ *objptr=ARRAYGET(ao, struct ___Object___*, tagindex);
        if (!ObjectHashcontainskey(it->objectset, (int) objptr))
          continue;
@@ -3559,8 +3630,8 @@ nexttag:
   }
 }
 
-int containstag(struct ___Object___ *ptr, 
-                           struct ___TagDescriptor___ *tag) {
+int containstag(struct ___Object___ *ptr,
+                struct ___TagDescriptor___ *tag) {
   int j;
   struct ___Object___ * objptr=tag->flagptr;
   if (objptr->type==OBJECTARRAYTYPE) {
@@ -3568,16 +3639,16 @@ int containstag(struct ___Object___ *ptr,
     for(j=0; j<ao->___cachedCode___; j++) {
       if (ptr==ARRAYGET(ao, struct ___Object___*, j)) {
        return 1;
-                       }
+      }
     }
     return 0;
   } else {
     return objptr==ptr;
-       }
+  }
 }
 
-void toiNext(struct tagobjectiterator *it, 
-                        void ** objectarray OPTARG(int * failed)) {
+void toiNext(struct tagobjectiterator *it,
+             void ** objectarray OPTARG(int * failed)) {
   /* hasNext has all of the intelligence */
   if(it->istag) {
     /* Iterate tag */
@@ -3590,7 +3661,7 @@ void toiNext(struct tagobjectiterator *it,
     } else {
       struct ArrayObject *ao=(struct ArrayObject *) tagptr;
       objectarray[it->slot]=
-                               ARRAYGET(ao, struct ___TagDescriptor___ *, it->tagobjindex++);
+        ARRAYGET(ao, struct ___TagDescriptor___ *, it->tagobjindex++);
     }
   } else if (it->numtags>0) {
     /* Use tags to locate appropriate objects */
@@ -3602,7 +3673,7 @@ void toiNext(struct tagobjectiterator *it,
     } else {
       struct ArrayObject *ao=(struct ArrayObject *) objptr;
       objectarray[it->slot]=
-                               ARRAYGET(ao, struct ___Object___ *, it->tagobjindex++);
+        ARRAYGET(ao, struct ___Object___ *, it->tagobjindex++);
     }
   } else {
     /* Iterate object */
@@ -3614,24 +3685,24 @@ void toiNext(struct tagobjectiterator *it,
 #ifdef PROFILE
 inline void profileTaskStart(char * taskname) {
   if(!taskInfoOverflow) {
-         TaskInfo* taskInfo = RUNMALLOC(sizeof(struct task_info));
-         taskInfoArray[taskInfoIndex] = taskInfo;
-         taskInfo->taskName = taskname;
-         taskInfo->startTime = BAMBOO_GET_EXE_TIME();
-         taskInfo->endTime = -1;
-         taskInfo->exitIndex = -1;
-         taskInfo->newObjs = NULL;
+    TaskInfo* taskInfo = RUNMALLOC(sizeof(struct task_info));
+    taskInfoArray[taskInfoIndex] = taskInfo;
+    taskInfo->taskName = taskname;
+    taskInfo->startTime = BAMBOO_GET_EXE_TIME();
+    taskInfo->endTime = -1;
+    taskInfo->exitIndex = -1;
+    taskInfo->newObjs = NULL;
   }
 }
 
 inline void profileTaskEnd() {
   if(!taskInfoOverflow) {
-         taskInfoArray[taskInfoIndex]->endTime = BAMBOO_GET_EXE_TIME();
-         taskInfoIndex++;
-         if(taskInfoIndex == TASKINFOLENGTH) {
-                 taskInfoOverflow = true;
-                 //taskInfoIndex = 0;
-         }
+    taskInfoArray[taskInfoIndex]->endTime = BAMBOO_GET_EXE_TIME();
+    taskInfoIndex++;
+    if(taskInfoIndex == TASKINFOLENGTH) {
+      taskInfoOverflow = true;
+      //taskInfoIndex = 0;
+    }
   }
 }
 
@@ -3653,36 +3724,36 @@ void outputProfileData() {
   for(i = 0; i < taskInfoIndex; i++) {
     TaskInfo* tmpTInfo = taskInfoArray[i];
     unsigned long long duration = tmpTInfo->endTime - tmpTInfo->startTime;
-    printf("%s, %lld, %lld, %lld, %lld", 
-                       tmpTInfo->taskName, tmpTInfo->startTime, tmpTInfo->endTime, 
-                       duration, tmpTInfo->exitIndex);
-       // summarize new obj info
-       if(tmpTInfo->newObjs != NULL) {
-               struct RuntimeHash * nobjtbl = allocateRuntimeHash(5);
-               struct RuntimeIterator * iter = NULL;
-               while(0 == isEmpty(tmpTInfo->newObjs)) {
-                       char * objtype = (char *)(getItem(tmpTInfo->newObjs));
-                       if(RuntimeHashcontainskey(nobjtbl, (int)(objtype))) {
-                               int num = 0;
-                               RuntimeHashget(nobjtbl, (int)objtype, &num);
-                               RuntimeHashremovekey(nobjtbl, (int)objtype);
-                               num++;
-                               RuntimeHashadd(nobjtbl, (int)objtype, num);
-                       } else {
-                               RuntimeHashadd(nobjtbl, (int)objtype, 1);
-                       }
-                       //printf(stderr, "new obj!\n");
-               }
-
-               // output all new obj info
-               iter = RuntimeHashcreateiterator(nobjtbl);
-               while(RunhasNext(iter)) {
-                       char * objtype = (char *)Runkey(iter);
-                       int num = Runnext(iter);
-                       printf(", %s, %d", objtype, num);
-               }
+    printf("%s, %lld, %lld, %lld, %lld",
+           tmpTInfo->taskName, tmpTInfo->startTime, tmpTInfo->endTime,
+           duration, tmpTInfo->exitIndex);
+    // summarize new obj info
+    if(tmpTInfo->newObjs != NULL) {
+      struct RuntimeHash * nobjtbl = allocateRuntimeHash(5);
+      struct RuntimeIterator * iter = NULL;
+      while(0 == isEmpty(tmpTInfo->newObjs)) {
+       char * objtype = (char *)(getItem(tmpTInfo->newObjs));
+       if(RuntimeHashcontainskey(nobjtbl, (int)(objtype))) {
+         int num = 0;
+         RuntimeHashget(nobjtbl, (int)objtype, &num);
+         RuntimeHashremovekey(nobjtbl, (int)objtype);
+         num++;
+         RuntimeHashadd(nobjtbl, (int)objtype, num);
+       } else {
+         RuntimeHashadd(nobjtbl, (int)objtype, 1);
        }
-       printf("\n");
+       //printf(stderr, "new obj!\n");
+      }
+
+      // output all new obj info
+      iter = RuntimeHashcreateiterator(nobjtbl);
+      while(RunhasNext(iter)) {
+       char * objtype = (char *)Runkey(iter);
+       int num = Runnext(iter);
+       printf(", %s, %d", objtype, num);
+      }
+    }
+    printf("\n");
     if(strcmp(tmpTInfo->taskName, "tpd checking") == 0) {
       preprocessingtime += duration;
     } else if(strcmp(tmpTInfo->taskName, "post task execution") == 0) {
@@ -3704,19 +3775,22 @@ void outputProfileData() {
   averagetasktime /= tasknum;
 
   printf("\nTotal time: %lld\n", totalexetime);
-  printf("Total task execution time: %lld (%d%%)\n", totaltasktime, 
-                          (int)(((double)totaltasktime/(double)totalexetime)*100));
-  printf("Total objqueue checking time: %lld (%d%%)\n", 
-                          objqueuecheckingtime, 
-                                (int)(((double)objqueuecheckingtime/(double)totalexetime)*100));
-  printf("Total pre-processing time: %lld (%d%%)\n", preprocessingtime, 
-                          (int)(((double)preprocessingtime/(double)totalexetime)*100));
-  printf("Total post-processing time: %lld (%d%%)\n", postprocessingtime, 
-                          (int)(((double)postprocessingtime/(double)totalexetime)*100));
-  printf("Other time: %lld (%d%%)\n", other, 
-                          (int)(((double)other/(double)totalexetime)*100));
+  printf("Total task execution time: %lld (%d%%)\n", totaltasktime,
+         (int)(((double)totaltasktime/(double)totalexetime)*100));
+  printf("Total objqueue checking time: %lld (%d%%)\n",
+         objqueuecheckingtime,
+         (int)(((double)objqueuecheckingtime/(double)totalexetime)*100));
+  printf("Total pre-processing time: %lld (%d%%)\n", preprocessingtime,
+         (int)(((double)preprocessingtime/(double)totalexetime)*100));
+  printf("Total post-processing time: %lld (%d%%)\n", postprocessingtime,
+         (int)(((double)postprocessingtime/(double)totalexetime)*100));
+  printf("Other time: %lld (%d%%)\n", other,
+         (int)(((double)other/(double)totalexetime)*100));
+
 
   printf("\nAverage task execution time: %lld\n", averagetasktime);
+
+  //printf("\nTotal time spent for interruptions: %lld\n", interrupttime);
 #else
   int i = 0;
   int j = 0;
@@ -3734,37 +3808,37 @@ void outputProfileData() {
     BAMBOO_DEBUGPRINT(0xdddb);
     BAMBOO_DEBUGPRINT_REG(tmpTInfo->startTime);
     BAMBOO_DEBUGPRINT_REG(tmpTInfo->endTime);
-       BAMBOO_DEBUGPRINT_REG(tmpTInfo->exitIndex);
-       if(tmpTInfo->newObjs != NULL) {
-               struct RuntimeHash * nobjtbl = allocateRuntimeHash(5);
-               struct RuntimeIterator * iter = NULL;
-               while(0 == isEmpty(tmpTInfo->newObjs)) {
-                       char * objtype = (char *)(getItem(tmpTInfo->newObjs));
-                       if(RuntimeHashcontainskey(nobjtbl, (int)(objtype))) {
-                               int num = 0;
-                               RuntimeHashget(nobjtbl, (int)objtype, &num);
-                               RuntimeHashremovekey(nobjtbl, (int)objtype);
-                               num++;
-                               RuntimeHashadd(nobjtbl, (int)objtype, num);
-                       } else {
-                               RuntimeHashadd(nobjtbl, (int)objtype, 1);
-                       }
-               }
-
-               // ouput all new obj info
-               iter = RuntimeHashcreateiterator(nobjtbl);
-               while(RunhasNext(iter)) {
-                       char * objtype = (char *)Runkey(iter);
-                       int num = Runnext(iter);
-                       int nameLen = strlen(objtype);
-                       BAMBOO_DEBUGPRINT(0xddda);
-                       for(j = 0; j < nameLen; j++) {
-                               BAMBOO_DEBUGPRINT_REG(objtype[j]);
-                       }
-                       BAMBOO_DEBUGPRINT(0xdddb);
-                       BAMBOO_DEBUGPRINT_REG(num);
-               }
+    BAMBOO_DEBUGPRINT_REG(tmpTInfo->exitIndex);
+    if(tmpTInfo->newObjs != NULL) {
+      struct RuntimeHash * nobjtbl = allocateRuntimeHash(5);
+      struct RuntimeIterator * iter = NULL;
+      while(0 == isEmpty(tmpTInfo->newObjs)) {
+       char * objtype = (char *)(getItem(tmpTInfo->newObjs));
+       if(RuntimeHashcontainskey(nobjtbl, (int)(objtype))) {
+         int num = 0;
+         RuntimeHashget(nobjtbl, (int)objtype, &num);
+         RuntimeHashremovekey(nobjtbl, (int)objtype);
+         num++;
+         RuntimeHashadd(nobjtbl, (int)objtype, num);
+       } else {
+         RuntimeHashadd(nobjtbl, (int)objtype, 1);
+       }
+      }
+
+      // ouput all new obj info
+      iter = RuntimeHashcreateiterator(nobjtbl);
+      while(RunhasNext(iter)) {
+       char * objtype = (char *)Runkey(iter);
+       int num = Runnext(iter);
+       int nameLen = strlen(objtype);
+       BAMBOO_DEBUGPRINT(0xddda);
+       for(j = 0; j < nameLen; j++) {
+         BAMBOO_DEBUGPRINT_REG(objtype[j]);
        }
+       BAMBOO_DEBUGPRINT(0xdddb);
+       BAMBOO_DEBUGPRINT_REG(num);
+      }
+    }
     BAMBOO_DEBUGPRINT(0xdddc);
   }
 
@@ -3773,17 +3847,17 @@ void outputProfileData() {
   }
 
   // output interrupt related info
-  /*for(i = 0; i < interruptInfoIndex; i++) {
-       InterruptInfo* tmpIInfo = interruptInfoArray[i];
-       BAMBOO_DEBUGPRINT(0xddde);
-       BAMBOO_DEBUGPRINT_REG(tmpIInfo->startTime);
-       BAMBOO_DEBUGPRINT_REG(tmpIInfo->endTime);
-       BAMBOO_DEBUGPRINT(0xdddf);
-     }
-
-     if(interruptInfoOverflow) {
-       BAMBOO_DEBUGPRINT(0xefef);
-     }*/
+  for(i = 0; i < interruptInfoIndex; i++) {
+    InterruptInfo* tmpIInfo = interruptInfoArray[i];
+    BAMBOO_DEBUGPRINT(0xddde);
+    BAMBOO_DEBUGPRINT_REG(tmpIInfo->startTime);
+    BAMBOO_DEBUGPRINT_REG(tmpIInfo->endTime);
+    BAMBOO_DEBUGPRINT(0xdddf);
+  }
+
+  if(interruptInfoOverflow) {
+    BAMBOO_DEBUGPRINT(0xefef);
+  }
 
   BAMBOO_DEBUGPRINT(0xeeee);
 #endif
index d66b5678b230418f484bf2d778e1cd6992b3c100..d435eda017744bf5123d1941187c0f1bf41bb73e 100755 (executable)
@@ -34,11 +34,10 @@ echo -printscheduling print out scheduling graphs
 echo -printschedulesim print out scheduling simulator result graphs
 echo -abcclose close the array boundary check
 echo "-tilera_bme generate tilera version binary for Bare Mental Environment (should be used together with -multicore"
-echo "-tilera_zlinux generate tilera version binary for Zero-Overhead Linux (should be used together with -multicore"
+echo "-tilera_zlinux generate tilera version binary for Zero-Overhead Linux with multi-process mode (should be used together with -multicore"
 echo "-tileraconfig config tilera simulator/pci as nxm (should be used together with -tilera)"
 echo "-raw generate raw version binary (should be used together with -multicore)"
 echo "-rawconfig config raw simulator as 4xn (should be used together with -raw)"
-echo -threadsimulate generate multi-thread simulate version binary
 echo -multicoregc generate multi-core binary with garbage collection
 echo "-numcore4gc set the number of cores for gc (should be used together with -multicoregc), defaultly set as 0"
 echo -gcprofile build with gcprofile options
@@ -341,9 +340,6 @@ shift
 elif [[ $1 = '-interrupt' ]]
 then
 INTERRUPTFLAG=true
-elif [[ $1 = '-threadsimulate' ]]
-then
-THREADSIMULATEFLAG=true
 elif [[ $1 = '-abcclose' ]]
 then
 JAVAOPTS="$JAVAOPTS -abcclose"