Polish multicore code
authorjzhou <jzhou>
Thu, 5 May 2011 00:57:18 +0000 (00:57 +0000)
committerjzhou <jzhou>
Thu, 5 May 2011 00:57:18 +0000 (00:57 +0000)
36 files changed:
Robust/src/Analysis/ArrayReferencees.java
Robust/src/IR/Flat/BuildCode.java
Robust/src/Runtime/ObjectHash.c
Robust/src/Runtime/SimpleHash.c
Robust/src/Runtime/bamboo/MGCHash.c
Robust/src/Runtime/bamboo/multicore.h [new file with mode: 0644]
Robust/src/Runtime/bamboo/multicorecache.c [new file with mode: 0644]
Robust/src/Runtime/bamboo/multicorecache.h
Robust/src/Runtime/bamboo/multicoregarbage.c
Robust/src/Runtime/bamboo/multicoregarbage.h
Robust/src/Runtime/bamboo/multicoregc.h
Robust/src/Runtime/bamboo/multicoregccompact.c [new file with mode: 0644]
Robust/src/Runtime/bamboo/multicoregccompact.h [new file with mode: 0644]
Robust/src/Runtime/bamboo/multicoregcflush.c [new file with mode: 0644]
Robust/src/Runtime/bamboo/multicoregcflush.h [new file with mode: 0644]
Robust/src/Runtime/bamboo/multicoregcmark.c [new file with mode: 0644]
Robust/src/Runtime/bamboo/multicoregcmark.h [new file with mode: 0644]
Robust/src/Runtime/bamboo/multicoregcprofile.c [new file with mode: 0644]
Robust/src/Runtime/bamboo/multicoregcprofile.h [new file with mode: 0644]
Robust/src/Runtime/bamboo/multicorehelper.h
Robust/src/Runtime/bamboo/multicoremem.c
Robust/src/Runtime/bamboo/multicoremem.h
Robust/src/Runtime/bamboo/multicoremgc.h [new file with mode: 0644]
Robust/src/Runtime/bamboo/multicoremsg.c [new file with mode: 0644]
Robust/src/Runtime/bamboo/multicoremsg.h [new file with mode: 0644]
Robust/src/Runtime/bamboo/multicoreruntime.c
Robust/src/Runtime/bamboo/multicoreruntime.h
Robust/src/Runtime/bamboo/multicoretask.c
Robust/src/Runtime/bamboo/multicoretask.h [new file with mode: 0644]
Robust/src/Runtime/bamboo/multicoretaskprofile.c [new file with mode: 0644]
Robust/src/Runtime/bamboo/multicoretaskprofile.h [new file with mode: 0644]
Robust/src/Runtime/garbage.h
Robust/src/Runtime/mem.c
Robust/src/Runtime/mem.h
Robust/src/Runtime/task.c
Robust/src/buildscript

index cea2ec575c0b24614d6ca42f5ee1f9e95950867a..1c9cba2be363265f9f4f174630842d34a0aa4fdf 100644 (file)
@@ -70,19 +70,35 @@ public class ArrayReferencees {
   }
 
   protected void buildRelation() {
-
+    Set<MethodDescriptor> descriptorsToAnalyze = null;
+
+    if(state.TASK) {
+      // for Bristlecone and Bamboo, there are no main method,
+      // analyze all methods transitively reachable from tasks instead
+      Iterator it_sourceEntries = state.getTaskSymbolTable().getDescriptorsIterator();
+      while(it_sourceEntries.hasNext()) {
+        TaskDescriptor tdSourceEntry = (TaskDescriptor)it_sourceEntries.next();
+        if(descriptorsToAnalyze == null) {
+          descriptorsToAnalyze = callGraph.getAllMethods(tdSourceEntry);
+        } else {
+          descriptorsToAnalyze.addAll(callGraph.getAllMethods(tdSourceEntry));
+        }
+        //descriptorsToAnalyze.add( tdSourceEntry );
+      }
+    } else {
     // analyze all methods transitively reachable from main
     MethodDescriptor mdSourceEntry = typeUtil.getMain();
     FlatMethod       fmMain        = state.getMethodFlat( mdSourceEntry );
     
-    Set<MethodDescriptor> descriptorsToAnalyze = callGraph.getAllMethods( mdSourceEntry );
+    descriptorsToAnalyze = callGraph.getAllMethods( mdSourceEntry );
     descriptorsToAnalyze.add( mdSourceEntry );
+    }
     
     for( MethodDescriptor md: descriptorsToAnalyze ) {
       FlatMethod fm =  state.getMethodFlat( md );
       analyzeMethod( fm );
     }
-  }  
+  }
 
   protected void analyzeMethod( FlatMethod fm ) {
     Set<FlatNode> toVisit = new HashSet<FlatNode>();
index 3468bd5f257a237f504b7cc4adfae10828f2b8d8..0624b9962a51681642721d30e68475d32e672548 100644 (file)
@@ -1186,6 +1186,10 @@ public class BuildCode {
         if (type.isPtr())
           count++;
       }
+      if(state.TASK) {
+        // the lock field is also a pointer
+        count++;
+      }
       output.print(count);
       for(Iterator allit=cn.getFieldTable().getAllDescriptorsIterator(); allit.hasNext(); ) {
         FieldDescriptor fd=(FieldDescriptor)allit.next();
@@ -1199,6 +1203,11 @@ public class BuildCode {
                        fd.getSafeSymbol()+"))");
         }
       }
+      if(state.TASK) {
+        // output the lock field
+        output.print(", ");
+        output.print("((unsigned INTPTR)&(((struct "+cn.getSafeSymbol() +" *)0)->lock))");
+      }
       output.println("};");
     }
 
@@ -1587,7 +1596,8 @@ fldloop:
       classdefout.println("  int ___cachedCode___;");
       if((!state.MULTICORE) || (cn.getSymbol().equals("TagDescriptor"))) {
         classdefout.println("  void * flagptr;");
-      } else if (state.MULTICORE) {
+      }
+      if (state.MULTICORE) {
         classdefout.println("  int version;");
         classdefout.println("  int * lock;"); // lock entry for this obj
         classdefout.println("  int mutex;");
@@ -2742,7 +2752,7 @@ fldloop:
         output.println("global_defsprim_p->" +
                        fsfn.getField().getSafeSymbol()+"="+ generateTemp(fm,fsfn.getSrc())+";");
     } else {
-      
+
       if( state.CAPTURE_NULL_DEREFERENCES ) {
         output.println("#ifdef CAPTURE_NULL_DEREFERENCES");
         output.println("if (" + generateTemp(fm,fsfn.getDst()) + " == NULL) {");
index a7165494aa64761a94fa6b740e192f530341fc21..776c9be31fd9bb60c72f9f022d21aff36f8d399c 100755 (executable)
@@ -186,7 +186,7 @@ int ObjectHashadd_I(struct ObjectHash * thisvar,int key, int data, int data2, in
       }
     }
     thisvar->size=newsize;
-    RUNFREE(thisvar->bucket);
+    RUNFREE_I(thisvar->bucket);
     thisvar->bucket=newbucket;
   }
 
index d5a45b801c2b2bf73720137a4d199d4c21c67c72..1ebc88326e9ad34e299839026bd1ca8c1c7e35f1 100755 (executable)
@@ -245,7 +245,7 @@ int RuntimeHashadd_I(struct RuntimeHash * thisvar,int key, int data) {
       }
     }
     thisvar->size=newsize;
-    RUNFREE(thisvar->bucket);
+    RUNFREE_I(thisvar->bucket);
     thisvar->bucket=newbucket;
   }
 
index 4a07aebdb8e0ce6a1e33ebc627691ab5a8dd3ee6..205c488ea87306f6d98363ac46a35a036bc9e6a5 100644 (file)
@@ -341,7 +341,7 @@ unsigned int mgchashResize_I(mgchashtable_t * tbl, unsigned int newsize) {
       curr = next;
     } while(curr!=NULL);
   }
-  RUNFREE(ptr); //Free the memory of the old hash table
+  RUNFREE_I(ptr); //Free the memory of the old hash table
   return 0;
 }
 #endif
diff --git a/Robust/src/Runtime/bamboo/multicore.h b/Robust/src/Runtime/bamboo/multicore.h
new file mode 100644 (file)
index 0000000..04a6191
--- /dev/null
@@ -0,0 +1,16 @@
+#ifndef BAMBOO_MULTICORE_H
+#define BAMBOO_MULTICORE_H
+#ifdef MULTICORE
+
+#ifndef INLINE
+#define INLINE    inline __attribute__((always_inline))
+#endif
+
+#ifndef bool
+#define bool int
+#define true 1
+#define false 0
+#endif
+
+#endif // MULTICORE
+#endif // BAMBOO_MULTICORE_H
diff --git a/Robust/src/Runtime/bamboo/multicorecache.c b/Robust/src/Runtime/bamboo/multicorecache.c
new file mode 100644 (file)
index 0000000..145136b
--- /dev/null
@@ -0,0 +1,650 @@
+#ifdef GC_CACHE_ADAPT
+#include "multicorecache.h"
+
+typedef struct gc_cache_revise_info {
+  unsigned int orig_page_start_va;
+  unsigned int orig_page_end_va;
+  unsigned int orig_page_index;
+  unsigned int to_page_start_va;
+  unsigned int to_page_end_va;
+  unsigned int to_page_index;
+  unsigned int revised_sampling[NUMCORESACTIVE];
+} gc_cache_revise_info_t;
+gc_cache_revise_info_t gc_cache_revise_infomation;
+
+INLINE void samplingDataInit() {
+  gc_cache_revise_infomation.to_page_start_va = (unsigned int)to->ptr;
+  unsigned int toindex = (unsigned int)(tobase-gcbaseva)/(BAMBOO_PAGE_SIZE);
+  gc_cache_revise_infomation.to_page_end_va = gcbaseva + 
+    (BAMBOO_PAGE_SIZE)*(toindex+1);
+  gc_cache_revise_infomation.to_page_index = toindex;
+  gc_cache_revise_infomation.orig_page_start_va = (unsigned int)orig->ptr;
+  gc_cache_revise_infomation.orig_page_end_va = gcbaseva+(BAMBOO_PAGE_SIZE)
+  *(((unsigned int)(orig->ptr)-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
+  gc_cache_revise_infomation.orig_page_index = 
+    ((unsigned int)(orig->blockbase)-gcbaseva)/(BAMBOO_PAGE_SIZE);
+}
+
+INLINE void samplingDataConvert(unsigned int current_ptr) {
+  unsigned int tmp_factor = 
+  current_ptr-gc_cache_revise_infomation.to_page_start_va;
+  unsigned int topage=gc_cache_revise_infomation.to_page_index;
+  unsigned int oldpage = gc_cache_revise_infomation.orig_page_index;
+  int * newtable=&gccachesamplingtbl_r[topage];
+  int * oldtable=&gccachesamplingtbl[oldpage];
+  
+  for(int tt = 0; tt < NUMCORESACTIVE; tt++) {
+    (*newtable) = ((*newtable)+(*oldtable)*tmp_factor);
+    newtable=(int*)(((char *)newtable)+size_cachesamplingtbl_local_r);
+    oldtable=(int*) (((char *)oldtable)+size_cachesamplingtbl_local);
+  }
+} 
+
+INLINE void completePageConvert(struct moveHelper * orig,
+                                struct moveHelper * to,
+                                unsigned int current_ptr,
+                                bool closeToPage) {
+  unsigned int ptr = 0;
+  unsigned int tocompare = 0;
+  if(closeToPage) {
+    ptr = to->ptr;
+    tocompare = gc_cache_revise_infomation.to_page_end_va;
+  } else {
+    ptr = orig->ptr;
+    tocompare = gc_cache_revise_infomation.orig_page_end_va;
+  }
+  if((unsigned int)ptr >= (unsigned int)tocompare) {
+    // end of an orig/to page
+    // compute the impact of this page for the new page
+    samplingDataConvert(current_ptr);
+    // prepare for an new orig page
+    unsigned int tmp_index = 
+      (unsigned int)((unsigned int)orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
+    gc_cache_revise_infomation.orig_page_start_va = orig->ptr;
+    gc_cache_revise_infomation.orig_page_end_va = gcbaseva + 
+      (BAMBOO_PAGE_SIZE)*(unsigned int)(tmp_index+1);
+    gc_cache_revise_infomation.orig_page_index = tmp_index;
+    gc_cache_revise_infomation.to_page_start_va = to->ptr;
+    if(closeToPage) {
+      gc_cache_revise_infomation.to_page_end_va = gcbaseva+(BAMBOO_PAGE_SIZE)
+        *(((unsigned int)(to->ptr)-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
+      gc_cache_revise_infomation.to_page_index = 
+        ((unsigned int)(to->ptr)-gcbaseva)/(BAMBOO_PAGE_SIZE);
+    }
+  }
+} 
+
+// prepare for cache adaption:
+//   -- flush the shared heap
+//   -- clean dtlb entries
+//   -- change cache strategy
+void cacheAdapt_gc(bool isgccachestage) {
+  // flush the shared heap
+  BAMBOO_CACHE_FLUSH_L2();
+
+  // clean the dtlb entries
+  BAMBOO_CLEAN_DTLB();
+
+  // change the cache strategy
+  gccachestage = isgccachestage;
+} 
+
+// the master core decides how to adapt cache strategy for the mutator 
+// according to collected statistic data
+
+// make all pages hfh
+int cacheAdapt_policy_h4h(){
+  unsigned int page_index = 0;
+  VA page_sva = 0;
+  unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
+  unsigned int numchanged = 0;
+  int * tmp_p = gccachepolicytbl+1;
+  for(page_index = 0; page_index < page_num; page_index++) {
+    page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
+    bamboo_cache_policy_t policy = {0};
+    policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
+    *tmp_p = page_index;
+    tmp_p++;
+    *tmp_p = policy.word;
+    tmp_p++;
+    numchanged++;
+  }
+
+  return numchanged;
+} 
+
+// make all pages local as non-cache-adaptable gc local mode
+int cacheAdapt_policy_local(){
+  unsigned int page_index = 0;
+  VA page_sva = 0;
+  unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
+  unsigned int numchanged = 0;
+  int * tmp_p = gccachepolicytbl+1;
+  for(page_index = 0; page_index < page_num; page_index++) {
+    page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
+    bamboo_cache_policy_t policy = {0};
+    unsigned int block = 0;
+    BLOCKINDEX(page_sva, &block);
+    unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
+    // locally cache the page in the hotest core
+    // NOTE: (x,y) should be changed to (x+1, y+1)!!!
+    policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
+    policy.lotar_x = bamboo_cpu2coords[2*coren]+1;
+    policy.lotar_y = bamboo_cpu2coords[2*coren+1]+1;
+    *tmp_p = page_index;
+    tmp_p++;
+    *tmp_p = policy.word;
+    tmp_p++;
+    numchanged++;
+  }
+
+  return numchanged;
+} 
+
+int cacheAdapt_policy_hotest(){
+  unsigned int page_index = 0;
+  VA page_sva = 0;
+  unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
+  unsigned int numchanged = 0;
+  int * tmp_p = gccachepolicytbl+1;
+  for(page_index = 0; page_index < page_num; page_index++) {
+    page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
+    bamboo_cache_policy_t policy = {0};
+    unsigned int hotestcore = 0;
+    unsigned int hotfreq = 0;
+
+    int *local_tbl=&gccachesamplingtbl_r[page_index];
+    for(int i = 0; i < NUMCORESACTIVE; i++) {
+      int freq = *local_tbl;
+      local_tbl=(int *)(((char *)local_tbl)+size_cachesamplingtbl_local_r);
+
+      // check the freqency, decide if this page is hot for the core
+      if(hotfreq < freq) {
+        hotfreq = freq;
+        hotestcore = i;
+      }
+    }
+    // TODO
+    // Decide the cache strategy for this page
+    // If decide to adapt a new cache strategy, write into the shared block of
+    // the gcsharedsamplingtbl. The mem recording information that has been 
+    // written is enough to hold the information.
+    // Format: page start va + cache strategy(hfh/(host core+[x,y]))
+    if(hotfreq == 0) {
+      // this page has not been accessed, do not change its cache policy
+      continue;
+    } else {
+      // locally cache the page in the hotest core
+      // NOTE: (x,y) should be changed to (x+1, y+1)!!!
+      policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
+      policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
+      policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
+      *tmp_p = page_index;
+      tmp_p++;
+      *tmp_p = policy.word;
+      tmp_p++;
+      numchanged++;
+    }
+  }
+
+  return numchanged;
+} 
+
+#define GC_CACHE_ADAPT_DOMINATE_THRESHOLD  50
+// cache the page on the core that accesses it the most if that core accesses 
+// it more than (GC_CACHE_ADAPT_DOMINATE_THRESHOLD)% of the total.  Otherwise,
+// h4h the page.
+int cacheAdapt_policy_dominate(){
+  unsigned int page_index = 0;
+  VA page_sva = 0;
+  unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
+  unsigned int numchanged = 0;
+  int * tmp_p = gccachepolicytbl+1;
+  for(page_index = 0; page_index < page_num; page_index++) {
+    page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
+    bamboo_cache_policy_t policy = {0};
+    unsigned int hotestcore = 0;
+    unsigned long long totalfreq = 0;
+    unsigned int hotfreq = 0;
+  
+    int *local_tbl=&gccachesamplingtbl_r[page_index];
+    for(int i = 0; i < NUMCORESACTIVE; i++) {
+      int freq = *local_tbl;
+      local_tbl=(int *)(((char *)local_tbl)+size_cachesamplingtbl_local_r);
+      totalfreq += freq;
+      // check the freqency, decide if this page is hot for the core
+      if(hotfreq < freq) {
+        hotfreq = freq;
+        hotestcore = i;
+      }
+    }
+
+    // Decide the cache strategy for this page
+    // If decide to adapt a new cache strategy, write into the shared block of
+    // the gcpolicytbl 
+    // Format: page start va + cache policy
+    if(hotfreq == 0) {
+      // this page has not been accessed, do not change its cache policy
+      continue;
+    }
+    totalfreq = 
+      (totalfreq*GC_CACHE_ADAPT_DOMINATE_THRESHOLD)/100/BAMBOO_PAGE_SIZE;
+    hotfreq/=BAMBOO_PAGE_SIZE;
+    if(hotfreq < totalfreq) {
+      // use hfh
+      policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
+    } else {
+      // locally cache the page in the hotest core
+      // NOTE: (x,y) should be changed to (x+1, y+1)!!!
+      policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
+      policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
+      policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
+    }
+    *tmp_p = page_index;
+    tmp_p++;
+    *tmp_p = policy.word;    
+    tmp_p++;
+    numchanged++;
+  }
+
+  return numchanged;
+}
+
+#define GC_CACHE_ADAPT_OVERLOAD_THRESHOLD 10
+
+void gc_quicksort(unsigned long long *array,
+                  unsigned int left,
+                  unsigned int right,
+                  unsigned int offset) {
+  unsigned int pivot = 0;;
+  unsigned int leftIdx = left;
+  unsigned int rightIdx = right;
+  if((right-left+1) >= 1) {
+    pivot = (left+right)/2;
+    while((leftIdx <= pivot) && (rightIdx >= pivot)) {
+      unsigned long long pivotValue = array[pivot*3-offset];
+      while((array[leftIdx*3-offset] > pivotValue) && (leftIdx <= pivot)) {
+        leftIdx++;
+      }
+      while((array[rightIdx*3-offset] < pivotValue) && (rightIdx >= pivot)) {
+        rightIdx--;
+      }
+      // swap [leftIdx] & [rightIdx]
+      for(int k = 0; k < 3; k++) {
+        unsigned long long tmp = array[3*rightIdx-k];
+        array[3*rightIdx-k] = array[3*leftIdx-k];
+        array[3*leftIdx-k] = tmp;
+      }
+      leftIdx++;
+      rightIdx--;
+      if((leftIdx-1) == pivot) {
+        pivot = rightIdx = rightIdx + 1;
+      } else if((leftIdx+1) == pivot) {
+        pivot = leftIdx = leftIdx-1;
+      }
+    }
+    gc_quicksort(array, left, pivot-1, offset);
+    gc_quicksort(array, pivot+1, right, offset);
+  }
+  return;
+}
+
+// Every page cached on the core that accesses it the most. 
+// Check to see if any core's pages total more accesses than threshold 
+// GC_CACHE_ADAPT_OVERLOAD_THRESHOLD.  If so, find the pages with the 
+// most remote accesses and hash for home them until we get below 
+// GC_CACHE_ADAPT_OVERLOAD_THRESHOLD
+int cacheAdapt_policy_overload(){
+  unsigned int page_index = 0;
+  VA page_sva = 0;
+  unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
+  unsigned int numchanged = 0;
+  int * tmp_p = gccachepolicytbl+1;
+  unsigned long long workload[NUMCORESACTIVE];
+  memset(workload, 0, NUMCORESACTIVE*sizeof(unsigned long long));
+  unsigned long long total_workload = 0;
+  unsigned long long core2heavypages[NUMCORESACTIVE][page_num*3+1];
+  memset(core2heavypages,0,
+      sizeof(unsigned long long)*(page_num*3+1)*NUMCORESACTIVE);
+  for(page_index = 0; page_index < page_num; page_index++) {
+    page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
+    bamboo_cache_policy_t policy = {0};
+    unsigned int hotestcore = 0;
+    unsigned long long totalfreq = 0;
+    unsigned int hotfreq = 0;
+  
+    int *local_tbl=&gccachesamplingtbl_r[page_index];
+    for(int i = 0; i < NUMCORESACTIVE; i++) {
+      int freq = *local_tbl;
+      local_tbl=(int *)(((char *)local_tbl)+size_cachesamplingtbl_local_r);
+      totalfreq += freq;
+      // check the freqency, decide if this page is hot for the core
+      if(hotfreq < freq) {
+        hotfreq = freq;
+        hotestcore = i;
+      }
+    }
+    // Decide the cache strategy for this page
+    // If decide to adapt a new cache strategy, write into the shared block of
+    // the gcsharedsamplingtbl. The mem recording information that has been 
+    // written is enough to hold the information.
+    // Format: page start va + cache strategy(hfh/(host core+[x,y]))
+    if(hotfreq == 0) {
+      // this page has not been accessed, do not change its cache policy
+      continue;
+    }
+
+    totalfreq/=BAMBOO_PAGE_SIZE;
+    hotfreq/=BAMBOO_PAGE_SIZE;
+    // locally cache the page in the hotest core
+    // NOTE: (x,y) should be changed to (x+1, y+1)!!!
+    policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
+    policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
+    policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
+    *tmp_p = page_index;
+    tmp_p++;
+    *tmp_p = policy.word;
+    tmp_p++;
+    numchanged++;
+    workload[hotestcore] += totalfreq;
+    total_workload += totalfreq;
+    // insert into core2heavypages using quicksort
+    unsigned long long remoteaccess = totalfreq - hotfreq;
+    unsigned int index = (unsigned int)core2heavypages[hotestcore][0];
+    core2heavypages[hotestcore][3*index+3] = remoteaccess;
+    core2heavypages[hotestcore][3*index+2] = totalfreq;
+    core2heavypages[hotestcore][3*index+1] = (unsigned long long)(tmp_p-1);
+    core2heavypages[hotestcore][0]++;
+  }
+
+  unsigned long long workload_threshold = 
+  total_workload/GC_CACHE_ADAPT_OVERLOAD_THRESHOLD;
+  // Check the workload of each core
+  for(int i = 0; i < NUMCORESACTIVE; i++) {
+    int j = 1;
+    unsigned int index = (unsigned int)core2heavypages[i][0];
+    if(workload[i] > workload_threshold) {
+      // sort according to the remoteaccess
+      gc_quicksort(&core2heavypages[i][0], 1, index, 0);
+      while((workload[i] > workload_threshold) && (j<index*3)) {
+        // hfh those pages with more remote accesses 
+        bamboo_cache_policy_t policy = {0};
+        policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
+        *((unsigned int*)core2heavypages[i][j]) = policy.word;
+        workload[i] -= core2heavypages[i][j+1];
+        j += 3;
+      }
+    }
+  }
+
+  return numchanged;
+}
+
+#define GC_CACHE_ADAPT_ACCESS_THRESHOLD 70
+#define GC_CACHE_ADAPT_CROWD_THRESHOLD  20
+// Every page cached on the core that accesses it the most. 
+// Check to see if any core's pages total more accesses than threshold 
+// GC_CACHE_ADAPT_OVERLOAD_THRESHOLD.  If so, find the pages with the 
+// most remote accesses and hash for home them until we get below 
+// GC_CACHE_ADAPT_OVERLOAD_THRESHOLD.  
+// Sort pages based on activity.... 
+// If more then GC_CACHE_ADAPT_ACCESS_THRESHOLD% of the accesses for a
+// core's pages are from more than GC_CACHE_ADAPT_CROWD_THRESHOLD pages, 
+// then start hfh these pages(selecting the ones with the most remote 
+// accesses first or fewest local accesses) until we get below 
+// GC_CACHE_ADAPT_CROWD_THRESHOLD pages.
+int cacheAdapt_policy_crowd(){
+  unsigned int page_index = 0;
+  VA page_sva = 0;
+  unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
+  unsigned int numchanged = 0;
+  int * tmp_p = gccachepolicytbl+1;
+  unsigned long long workload[NUMCORESACTIVE];
+  memset(workload, 0, NUMCORESACTIVE*sizeof(unsigned long long));
+  unsigned long long total_workload = 0;
+  unsigned long long core2heavypages[NUMCORESACTIVE][page_num*3+1];
+  memset(core2heavypages,0,
+    sizeof(unsigned long long)*(page_num*3+1)*NUMCORESACTIVE);
+  for(page_index = 0; page_index < page_num; page_index++) {
+    page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
+    bamboo_cache_policy_t policy = {0};
+    unsigned int hotestcore = 0;
+    unsigned long long totalfreq = 0;
+    unsigned int hotfreq = 0;
+  
+    int *local_tbl=&gccachesamplingtbl_r[page_index];
+    for(int i = 0; i < NUMCORESACTIVE; i++) {
+      int freq = *local_tbl;
+      local_tbl=(int *)(((char *)local_tbl)+size_cachesamplingtbl_local_r);
+      totalfreq += freq;
+      // check the freqency, decide if this page is hot for the core
+      if(hotfreq < freq) {
+        hotfreq = freq;
+        hotestcore = i;
+      }
+    }
+    // Decide the cache strategy for this page
+    // If decide to adapt a new cache strategy, write into the shared block of
+    // the gcsharedsamplingtbl. The mem recording information that has been 
+    // written is enough to hold the information.
+    // Format: page start va + cache strategy(hfh/(host core+[x,y]))
+    if(hotfreq == 0) {
+      // this page has not been accessed, do not change its cache policy
+      continue;
+    }
+    totalfreq/=BAMBOO_PAGE_SIZE;
+    hotfreq/=BAMBOO_PAGE_SIZE;
+    // locally cache the page in the hotest core
+    // NOTE: (x,y) should be changed to (x+1, y+1)!!!
+    policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
+    policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
+    policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
+    *tmp_p = page_index;
+    tmp_p++;
+    *tmp_p = policy.word;
+    tmp_p++;
+    numchanged++;
+    workload[hotestcore] += totalfreq;
+    total_workload += totalfreq;
+    // insert into core2heavypages using quicksort
+    unsigned long long remoteaccess = totalfreq - hotfreq;
+    unsigned int index = (unsigned int)core2heavypages[hotestcore][0];
+    core2heavypages[hotestcore][3*index+3] = remoteaccess;
+    core2heavypages[hotestcore][3*index+2] = totalfreq;
+    core2heavypages[hotestcore][3*index+1] = (unsigned long long)(tmp_p-1);
+    core2heavypages[hotestcore][0]++;
+  }
+
+  unsigned long long workload_threshold = 
+  total_workload / GC_CACHE_ADAPT_OVERLOAD_THRESHOLD;
+  // Check the workload of each core
+  for(int i = 0; i < NUMCORESACTIVE; i++) {
+    int j = 1;
+    unsigned int index = (unsigned int)core2heavypages[i][0];  
+    if(workload[i] > workload_threshold) {
+      // sort according to the remoteaccess
+      gc_quicksort(&core2heavypages[i][0], 1, index, 0);
+      while((workload[i] > workload_threshold) && (j<index*3)) {
+        // hfh those pages with more remote accesses 
+        bamboo_cache_policy_t policy = {0};
+        policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
+        *((unsigned int*)core2heavypages[i][j]) = policy.word;
+        workload[i] -= core2heavypages[i][j+1];
+        j += 3;
+      }
+    }
+
+    // Check if the accesses are crowded on few pages
+    // sort according to the total access
+inner_crowd:
+    gc_quicksort(&core2heavypages[i][0], j/3+1, index, 1);
+    unsigned long long threshold = 
+      GC_CACHE_ADAPT_ACCESS_THRESHOLD*workload[i]/100;
+    int num_crowded = 0;
+    unsigned long long t_workload = 0;
+    do {
+      t_workload += core2heavypages[i][j+num_crowded*3+1];
+      num_crowded++;
+    } while(t_workload < threshold);
+    // num_crowded <= GC_CACHE_ADAPT_CROWD_THRESHOLD and if there are enough 
+    // items, it is always == GC_CACHE_ADAPT_CROWD_THRESHOLD
+    if(num_crowded > GC_CACHE_ADAPT_CROWD_THRESHOLD) {
+      // need to hfh these pages
+      // sort the pages according to remote access
+      gc_quicksort(&core2heavypages[i][0], j/3+1, j/3+num_crowded, 0);
+      // h4h those pages with more remote accesses 
+      bamboo_cache_policy_t policy = {0};
+      policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
+      *((unsigned int*)core2heavypages[i][j]) = policy.word;
+      workload[i] -= core2heavypages[i][j+1];
+      t_workload -= core2heavypages[i][j+1];
+      j += 3;
+      threshold = GC_CACHE_ADAPT_ACCESS_THRESHOLD*workload[i]/100;
+      goto inner_crowd;
+    }
+  }
+
+  return numchanged;
+} 
+
+void cacheAdapt_master() {
+  CACHEADAPT_OUTPUT_CACHE_SAMPLING_R();
+  unsigned int numchanged = 0;
+  // check the statistic data
+  // for each page, decide the new cache strategy
+#ifdef GC_CACHE_ADAPT_POLICY1
+  numchanged = cacheAdapt_policy_h4h();
+#elif defined GC_CACHE_ADAPT_POLICY2
+  numchanged = cacheAdapt_policy_local();
+#elif defined GC_CACHE_ADAPT_POLICY3
+  numchanged = cacheAdapt_policy_hotest();
+#elif defined GC_CACHE_ADAPT_POLICY4
+  numchanged = cacheAdapt_policy_dominate();
+#elif defined GC_CACHE_ADAPT_POLICY5
+  numchanged = cacheAdapt_policy_overload();
+#elif defined GC_CACHE_ADAPT_POLICY6
+  numchanged = cacheAdapt_policy_crowd();
+#endif
+  *gccachepolicytbl = numchanged;
+}
+
+// adapt the cache strategy for the mutator
+void cacheAdapt_mutator() {
+  int numchanged = *gccachepolicytbl;
+  // check the changes and adapt them
+  int * tmp_p = gccachepolicytbl+1;
+  while(numchanged--) {
+    // read out the policy
+    int page_index = *tmp_p;
+    bamboo_cache_policy_t policy = (bamboo_cache_policy_t)(*(tmp_p+1));
+    // adapt the policy
+    bamboo_adapt_cache_policy(page_index*(BAMBOO_PAGE_SIZE)+gcbaseva, 
+        policy, BAMBOO_PAGE_SIZE);
+
+    tmp_p += 2;
+  }
+}
+
+void cacheAdapt_phase_client() {
+  while(true) {
+    if(PREFINISHPHASE == gcphase) {
+      break;
+    }
+  }
+  GC_PRINTF("Start prefinish phase\n");
+  // cache adapt phase
+  cacheAdapt_mutator();
+  cacheAdapt_gc(false);
+  //send init finish msg to core coordinator
+  send_msg_2(STARTUPCORE, GCFINISHPREF, BAMBOO_NUM_OF_CORE, false);
+  GC_PRINTF("Finish prefinish phase\n");
+  CACHEADAPT_SAMPING_RESET();
+  if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
+    // zero out the gccachesamplingtbl
+    BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);  
+    BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,
+        size_cachesamplingtbl_local_r);
+  }
+}
+
+void cacheAdapt_phase_master() {
+  GCPROFILEITEM();
+  gcphase = PREFINISHPHASE;
+  gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
+  // Note: all cores should flush their runtime data including non-gc
+  //       cores
+  for(i = 1; i < NUMCORESACTIVE; ++i) {
+    // send start flush messages to all cores
+    gccorestatus[i] = 1;
+    send_msg_1(i, GCSTARTPREF, false);
+  }
+  GC_PRINTF("Start prefinish phase \n");
+  // cache adapt phase
+  cacheAdapt_mutator();
+  CACHEADPAT_OUTPUT_CACHE_POLICY();
+  cacheAdapt_gc(false);
+
+  gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
+  while(PREFINISHPHASE == gcphase) {
+    // check the status of all cores
+    BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
+    if(gc_checkAllCoreStatus_I()) {
+      BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+      break;
+    }
+    BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+  }
+
+  CACHEADAPT_SAMPING_RESET();
+  if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
+    // zero out the gccachesamplingtbl
+    BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
+    BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,
+        size_cachesamplingtbl_local_r);
+    BAMBOO_MEMSET_WH(gccachepolicytbl,0,size_cachepolicytbl);
+  }
+}
+
+void gc_output_cache_sampling() {
+  unsigned int page_index = 0;
+  VA page_sva = 0;
+  unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
+  for(page_index = 0; page_index < page_num; page_index++) {
+    page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
+    unsigned int block = 0;
+    BLOCKINDEX(page_sva, &block);
+    unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
+    tprintf("va: %x page_index: %d host: %d\n",(int)page_sva,page_index,coren);
+    for(int i = 0; i < NUMCORESACTIVE; i++) {
+      int * local_tbl = (int *)((void *)gccachesamplingtbl
+          +size_cachesamplingtbl_local*i);
+      int freq = local_tbl[page_index];
+      printf("%8d ",freq);
+    }
+    printf("\n");
+  }
+  printf("=================\n");
+} 
+
+void gc_output_cache_sampling_r() {
+  unsigned int page_index = 0;
+  VA page_sva = 0;
+  unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
+  for(page_index = 0; page_index < page_num; page_index++) {
+    page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
+    unsigned int block = 0;
+    BLOCKINDEX(page_sva, &block);
+    unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
+    tprintf("va: %x page_index: %d host: %d\n",(int)page_sva,page_index,coren);
+    for(int i = 0; i < NUMCORESACTIVE; i++) {
+      int * local_tbl = (int *)((void *)gccachesamplingtbl_r
+          +size_cachesamplingtbl_local_r*i);
+      int freq = local_tbl[page_index]/BAMBOO_PAGE_SIZE;
+      printf("%8d ",freq);
+    }
+  
+    printf("\n");
+  }
+  printf("=================\n");
+} 
+#endif // GC_CACHE_ADAPT
index 0a58e6cacc73b61343c1950a35bc97b959e32cc8..507dbeb45df8c568229f70326fa8538b888d1469 100644 (file)
@@ -1,7 +1,8 @@
 #ifndef BAMBOO_MULTICORE_CACHE_H
 #define BAMBOO_MULTICORE_CACHE_H
-
 #ifdef MULTICORE_GC
+#include "multicore.h"
+
 #ifdef GC_CACHE_ADAPT
 #define GC_CACHE_SAMPLING_UNIT 100000000
 #define GC_TILE_TIMER_EVENT_SETTING 10000000 //0  
@@ -32,7 +33,92 @@ typedef union
 #define BAMBOO_CACHE_MODE_NONE 2
 #define BAMBOO_CACHE_MODE_COORDS 3
 
+INLINE void samplingDataReviseInit(); 
+INLINE void samplingDataConvert(unsigned int current_ptr);
+INLINE void completePageConvert(struct moveHelper * orig,
+                                struct moveHelper * to,
+                                unsigned int current_ptr,
+                                bool closeToPage);
+void cacheAdapt_gc(bool isgccachestage);
+void cacheAdapt_master();
+void cacheAdapt_mutator();
+void cacheAdapt_phase_client();
+void cacheAdapt_phase_master();
+void gc_output_cache_sampling();
+void gc_output_cache_sampling_r();
+
+#ifdef GC_CACHE_SAMPLING
+// enable the timer interrupt
+#define CACHEADAPT_ENABLE_TIMER() \
+  { \
+    bamboo_tile_timer_set_next_event(GC_TILE_TIMER_EVENT_SETTING); \
+    bamboo_unmask_timer_intr(); \
+    bamboo_dtlb_sampling_process(); \
+  }
+#else
+#define CACHEADAPT_ENABLE_TIMER() 
+#endif
+// disable the TILE_TIMER interrupt
+#define CACHEADAPT_DISABLE_TIMER() bamboo_mask_timer_intr() 
+
+#ifdef GC_CACHE_SAMPLING
+// reset the sampling arrays
+#define CACHEADAPT_SAMPING_RESET()  bamboo_dtlb_sampling_reset()
+#else // GC_CACHE_SAMPING
+#define CACHEADAPT_SAMPING_RESET() 
+#endif
+
+#define CACHEADAPT_SAMPLING_DATA_REVISE_INIT() samplingDataReviseInit()
+#define CACHEADAPT_SAMPLING_DATA_CONVERT(p) samplingDataConvert((p))
+#define CACHEADAPT_COMPLETE_PAGE_CONVERT(o, t, p, b) \
+  completePageConvert((o), (t), (p), (b));
+
+#define CACHEADAPT_GC(b) cacheAdapt_gc(b)
+#define CACHEADAPT_MASTER() cacheAdapt_master()
+#define CACHEADAPT_PHASE_CLIENT() cacheAdpat_phase_client()
+#define CACHEADAPT_PHASE_MASTER() cacheAdapt_phase_master()
+
+#ifdef GC_CACHE_ADAPT_OUTPUT
+#define CACHEADAPT_OUTPUT_CACHE_SAMPLING() gc_output_cache_sampling()
+#define CACHEADAPT_OUTPUT_CACHE_SAMPLING_R() gc_output_cache_sampling_r()
+#else
+#define CACHEADAPT_OUTPUT_CACHE_SAMPLING()
+#define CACHEADAPT_OUTPUT_CACHE_SAMPLING_R() 
+#endif
+
+#ifdef GC_CACHE_ADAPT_OUTPUT_POLICY
+#ifdef MGC_SPEC
+#define CACHEADAPT_OUTPUT_CACHE_POLICY() \
+  { \
+    if(gc_profile_flag) { \
+      bamboo_output_cache_policy(); \
+    } \
+  }
+#else // MGC_SPEC
+#define CACHEADAPT_OUTPUT_CACHE_POLICY() bamboo_output_cache_policy()
+#endif // MGC_SPEC
+#else // GC_CACHE_ADAPT_OUTPUT_POLICY
+#define CACHEADAPT_OUTPUT_CACHE_POLICY() 
+#endif // GC_CACHE_ADAPT_OUTPUT
+
+#else // GC_CACHE_ADAPT
+#define CACHEADAPT_ENABLE_TIMER() 
+#define CACHEADAPT_DISABLE_TIMER() 
+#define CACHEADAPT_SAMPING_RESET()
+#define CACHEADAPT_SAMPLING_DATA_REVISE_INIT() 
+#define CACHEADAPT_SAMPLING_DATA_CONVERT(p) 
+#define CACHEADAPT_COMPLETE_PAGE_CONVERT(o, t, p, b) 
+#define CACHEADAPT_GC(b)
+#define CACHEADAPT_MASTER()
+#define CACHEADAPT_PHASE_CLIENT() 
+#define CACHEADAPT_PHASE_MASTER() 
+#define CACHEADAPT_OUTPUT_CACHE_SAMPLING()
+#define CACHEADAPT_OUTPUT_CACHE_SAMPLING_R() 
+#define CACHEADAPT_OUTPUT_CACHE_POLICY() 
 #endif // GC_CACHE_ADAPT
+#else // MULTICORE_GC
+#define CACHEADAPT_ENABLE_TIMER() 
+#define CACHEADAPT_DISABLE_TIMER()
 #endif // MULTICORE_GC
 
-#endif
+#endif // BAMBOO_MULTICORE_CACHE_H
index f3ae6cfbee674e96bf5caaf49c0b615faeceea73..4aeaa245667a0624a2de538998096c22d751eee9 100644 (file)
@@ -3,39 +3,11 @@
 #ifdef MULTICORE_GC
 #include "runtime.h"
 #include "multicoregarbage.h"
+#include "multicoregcmark.h"
+#include "multicoregccompact.h"
+#include "multicoregcflush.h"
 #include "multicoreruntime.h"
-#include "runtime_arch.h"
-#include "SimpleHash.h"
-#include "GenericHashtable.h"
-#include "ObjectHash.h"
-#include "GCSharedHash.h"
-
-extern int corenum;
-#ifdef TASK
-extern struct parameterwrapper ** objectqueues[][NUMCLASSES];
-extern int numqueues[][NUMCLASSES];
-extern struct genhashtable * activetasks;
-extern struct parameterwrapper ** objectqueues[][NUMCLASSES];
-extern struct taskparamdescriptor *currtpd;
-extern struct LockValue runtime_locks[MAXTASKPARAMS];
-extern int runtime_locklen;
-#endif
-
-extern struct global_defs_t * global_defs_p;
-
-#ifdef SMEMM
-extern unsigned int gcmem_mixed_threshold;
-extern unsigned int gcmem_mixed_usedmem;
-#endif
-
-#ifdef MGC
-extern struct lockvector bamboo_threadlocks;
-#endif
-
-struct pointerblock {
-  void * ptrs[NUMPTRS];
-  struct pointerblock *next;
-};
+#include "multicoregcprofile.h"
 
 struct pointerblock *gchead=NULL;
 int gcheadindex=0;
@@ -45,16 +17,6 @@ struct pointerblock *gctail2=NULL;
 int gctailindex2=0;
 struct pointerblock *gcspare=NULL;
 
-#define NUMLOBJPTRS 20
-
-struct lobjpointerblock {
-  void * lobjs[NUMLOBJPTRS];
-  int lengths[NUMLOBJPTRS];
-  int hosts[NUMLOBJPTRS];
-  struct lobjpointerblock *next;
-  struct lobjpointerblock *prev;
-};
-
 struct lobjpointerblock *gclobjhead=NULL;
 int gclobjheadindex=0;
 struct lobjpointerblock *gclobjtail=NULL;
@@ -63,457 +25,147 @@ struct lobjpointerblock *gclobjtail2=NULL;
 int gclobjtailindex2=0;
 struct lobjpointerblock *gclobjspare=NULL;
 
-#ifdef GC_CACHE_ADAPT
-typedef struct gc_cache_revise_info {
-  unsigned int orig_page_start_va;
-  unsigned int orig_page_end_va;
-  unsigned int orig_page_index;
-  unsigned int to_page_start_va;
-  unsigned int to_page_end_va;
-  unsigned int to_page_index;
-  unsigned int revised_sampling[NUMCORESACTIVE];
-} gc_cache_revise_info_t;
-gc_cache_revise_info_t gc_cache_revise_infomation;
-#endif// GC_CACHE_ADAPT
+#ifdef MULTICORE_GC
+#ifdef SMEMM
+extern unsigned int gcmem_mixed_threshold;
+extern unsigned int gcmem_mixed_usedmem;
+#endif // SMEMM
+#endif // MULTICORE_GC
 
 #ifdef GC_DEBUG
 // dump whole mem in blocks
-inline void dumpSMem() {
+INLINE void dumpSMem() {
   int block = 0;
   int sblock = 0;
   unsigned int j = 0;
-  unsigned int i = 0;
+  void * i = 0;
   int coren = 0;
   int x = 0;
   int y = 0;
-  printf("(%x,%x) Dump shared mem: \n", udn_tile_coord_x(), 
-            udn_tile_coord_y());
+  printf("(%x,%x) Dump shared mem: \n",udn_tile_coord_x(),udn_tile_coord_y());
   // reserved blocks for sblocktbl
-  printf("(%x,%x) ++++ reserved sblocks ++++ \n", udn_tile_coord_x(), 
-            udn_tile_coord_y());
-  for(i=BAMBOO_BASE_VA; i<gcbaseva; i+= 4*16) {
+  printf("(%x,%x) ++++ reserved sblocks ++++ \n", udn_tile_coord_x(),
+      udn_tile_coord_y());
+  for(i=BAMBOO_BASE_VA; (unsinged int)i<(unsigned int)gcbaseva; i+= 4*16) {
     printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
-                  udn_tile_coord_x(), udn_tile_coord_y(),
-           *((int *)(i)), *((int *)(i + 4)),
-           *((int *)(i + 4*2)), *((int *)(i + 4*3)),
-           *((int *)(i + 4*4)), *((int *)(i + 4*5)),
-           *((int *)(i + 4*6)), *((int *)(i + 4*7)),
-           *((int *)(i + 4*8)), *((int *)(i + 4*9)),
-           *((int *)(i + 4*10)), *((int *)(i + 4*11)),
-           *((int *)(i + 4*12)), *((int *)(i + 4*13)),
-           *((int *)(i + 4*14)), *((int *)(i + 4*15)));
+        udn_tile_coord_x(), udn_tile_coord_y(),
+        *((int *)(i)), *((int *)(i + 4)),
+        *((int *)(i + 4*2)), *((int *)(i + 4*3)),
+        *((int *)(i + 4*4)), *((int *)(i + 4*5)),
+        *((int *)(i + 4*6)), *((int *)(i + 4*7)),
+        *((int *)(i + 4*8)), *((int *)(i + 4*9)),
+        *((int *)(i + 4*10)), *((int *)(i + 4*11)),
+        *((int *)(i + 4*12)), *((int *)(i + 4*13)),
+        *((int *)(i + 4*14)), *((int *)(i + 4*15)));
   }
   sblock = gcreservedsb;
   bool advanceblock = false;
   // remaining memory
-  for(i=gcbaseva; i<gcbaseva+BAMBOO_SHARED_MEM_SIZE; i+=4*16) {
+  for(i=gcbaseva;
+      (unsigned int)i<(unsigned int)(gcbaseva+BAMBOO_SHARED_MEM_SIZE); 
+      i+=4*16) {
     advanceblock = false;
     // computing sblock # and block #, core coordinate (x,y) also
     if(j%((BAMBOO_SMEM_SIZE)/(4*16)) == 0) {
       // finished a sblock
       if(j < ((BAMBOO_LARGE_SMEM_BOUND)/(4*16))) {
-               if((j > 0) && (j%((BAMBOO_SMEM_SIZE_L)/(4*16)) == 0)) {
-                 // finished a block
-                 block++;
-                 advanceblock = true;
-               }
+    if((j > 0) && (j%((BAMBOO_SMEM_SIZE_L)/(4*16)) == 0)) {
+      // finished a block
+      block++;
+      advanceblock = true;
+    }
       } else {
-               // finished a block
-               block++;
-               advanceblock = true;
+    // finished a block
+    block++;
+    advanceblock = true;
       }
       // compute core #
       if(advanceblock) {
-               coren = gc_block2core[block%(NUMCORES4GC*2)];
+    coren = gc_block2core[block%(NUMCORES4GC*2)];
       }
       // compute core coordinate
-      BAMBOO_COORDS(coren, &x, &y);
+      x = BAMBOO_COORDS_X(coren);
+      y = BAMBOO_COORDS_Y(coren);
       printf("(%x,%x) ==== %d, %d : core (%d,%d), saddr %x====\n",
-                    udn_tile_coord_x(), udn_tile_coord_y(),
+         udn_tile_coord_x(), udn_tile_coord_y(),
              block, sblock++, x, y,
              (sblock-1)*(BAMBOO_SMEM_SIZE)+gcbaseva);
     }
     j++;
     printf("(%x,%x) 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n",
-                  udn_tile_coord_x(), udn_tile_coord_y(),
-           *((int *)(i)), *((int *)(i + 4)),
-           *((int *)(i + 4*2)), *((int *)(i + 4*3)),
-           *((int *)(i + 4*4)), *((int *)(i + 4*5)),
-           *((int *)(i + 4*6)), *((int *)(i + 4*7)),
-           *((int *)(i + 4*8)), *((int *)(i + 4*9)),
-           *((int *)(i + 4*10)), *((int *)(i + 4*11)),
-           *((int *)(i + 4*12)), *((int *)(i + 4*13)),
-           *((int *)(i + 4*14)), *((int *)(i + 4*15)));
+        udn_tile_coord_x(), udn_tile_coord_y(),
+        *((int *)(i)), *((int *)(i + 4)),
+        *((int *)(i + 4*2)), *((int *)(i + 4*3)),
+        *((int *)(i + 4*4)), *((int *)(i + 4*5)),
+        *((int *)(i + 4*6)), *((int *)(i + 4*7)),
+        *((int *)(i + 4*8)), *((int *)(i + 4*9)),
+        *((int *)(i + 4*10)), *((int *)(i + 4*11)),
+        *((int *)(i + 4*12)), *((int *)(i + 4*13)),
+        *((int *)(i + 4*14)), *((int *)(i + 4*15)));
   }
   printf("(%x,%x) \n", udn_tile_coord_x(), udn_tile_coord_y());
 }
 #endif
 
-// should be invoked with interruption closed
-inline void gc_enqueue_I(void *ptr) {
-  GC_BAMBOO_DEBUGPRINT(0xe601);
-  GC_BAMBOO_DEBUGPRINT_REG(ptr);
-  if (gcheadindex==NUMPTRS) {
-    struct pointerblock * tmp;
-    if (gcspare!=NULL) {
-      tmp=gcspare;
-      gcspare=NULL;
-    } else {
-      tmp=RUNMALLOC_I(sizeof(struct pointerblock));
-    }  // if (gcspare!=NULL)
-    gchead->next=tmp;
-    gchead=tmp;
-    gcheadindex=0;
-  } // if (gcheadindex==NUMPTRS)
-  gchead->ptrs[gcheadindex++]=ptr;
-  GC_BAMBOO_DEBUGPRINT(0xe602);
-} // void gc_enqueue_I(void *ptr)
-
-// dequeue and destroy the queue
-inline void * gc_dequeue_I() {
-  if (gctailindex==NUMPTRS) {
-    struct pointerblock *tmp=gctail;
-    gctail=gctail->next;
-    gctailindex=0;
-    if (gcspare!=NULL) {
-      RUNFREE(tmp);
-    } else {
-      gcspare=tmp;
-    }  // if (gcspare!=NULL)
-  } // if (gctailindex==NUMPTRS)
-  return gctail->ptrs[gctailindex++];
-} // void * gc_dequeue()
-
-// dequeue and do not destroy the queue
-inline void * gc_dequeue2_I() {
-  if (gctailindex2==NUMPTRS) {
-    struct pointerblock *tmp=gctail2;
-    gctail2=gctail2->next;
-    gctailindex2=0;
-  } // if (gctailindex2==NUMPTRS)
-  return gctail2->ptrs[gctailindex2++];
-} // void * gc_dequeue2()
-
-inline int gc_moreItems_I() {
-  if ((gchead==gctail)&&(gctailindex==gcheadindex))
-    return 0;
-  return 1;
-} // int gc_moreItems()
-
-inline int gc_moreItems2_I() {
-  if ((gchead==gctail2)&&(gctailindex2==gcheadindex))
-    return 0;
-  return 1;
-} // int gc_moreItems2()
-
-// should be invoked with interruption closed
-// enqueue a large obj: start addr & length
-inline void gc_lobjenqueue_I(void *ptr,
-                             unsigned int length,
-                             unsigned int host) {
-  GC_BAMBOO_DEBUGPRINT(0xe901);
-  if (gclobjheadindex==NUMLOBJPTRS) {
-    struct lobjpointerblock * tmp;
-    if (gclobjspare!=NULL) {
-      tmp=gclobjspare;
-      gclobjspare=NULL;
-    } else {
-      tmp=RUNMALLOC_I(sizeof(struct lobjpointerblock));
-    }  // if (gclobjspare!=NULL)
-    gclobjhead->next=tmp;
-    tmp->prev = gclobjhead;
-    gclobjhead=tmp;
-    gclobjheadindex=0;
-  } // if (gclobjheadindex==NUMLOBJPTRS)
-  gclobjhead->lobjs[gclobjheadindex]=ptr;
-  gclobjhead->lengths[gclobjheadindex]=length;
-  gclobjhead->hosts[gclobjheadindex++]=host;
-  GC_BAMBOO_DEBUGPRINT_REG(gclobjhead->lobjs[gclobjheadindex-1]);
-  GC_BAMBOO_DEBUGPRINT_REG(gclobjhead->lengths[gclobjheadindex-1]);
-  GC_BAMBOO_DEBUGPRINT_REG(gclobjhead->hosts[gclobjheadindex-1]);
-} // void gc_lobjenqueue_I(void *ptr...)
-
-// dequeue and destroy the queue
-inline void * gc_lobjdequeue_I(unsigned int * length,
-                               unsigned int * host) {
-  if (gclobjtailindex==NUMLOBJPTRS) {
-    struct lobjpointerblock *tmp=gclobjtail;
-    gclobjtail=gclobjtail->next;
-    gclobjtailindex=0;
-    gclobjtail->prev = NULL;
-    if (gclobjspare!=NULL) {
-      RUNFREE(tmp);
-    } else {
-      gclobjspare=tmp;
-      tmp->next = NULL;
-      tmp->prev = NULL;
-    }  // if (gclobjspare!=NULL)
-  } // if (gclobjtailindex==NUMLOBJPTRS)
-  if(length != NULL) {
-    *length = gclobjtail->lengths[gclobjtailindex];
-  }
-  if(host != NULL) {
-    *host = (unsigned int)(gclobjtail->hosts[gclobjtailindex]);
-  }
-  return gclobjtail->lobjs[gclobjtailindex++];
-} // void * gc_lobjdequeue()
-
-inline int gc_lobjmoreItems_I() {
-  if ((gclobjhead==gclobjtail)&&(gclobjtailindex==gclobjheadindex))
-    return 0;
-  return 1;
-} // int gc_lobjmoreItems()
-
-// dequeue and don't destroy the queue
-inline void gc_lobjdequeue2_I() {
-  if (gclobjtailindex2==NUMLOBJPTRS) {
-    gclobjtail2=gclobjtail2->next;
-    gclobjtailindex2=1;
-  } else {
-    gclobjtailindex2++;
-  }  // if (gclobjtailindex2==NUMLOBJPTRS)
-} // void * gc_lobjdequeue2()
-
-inline int gc_lobjmoreItems2_I() {
-  if ((gclobjhead==gclobjtail2)&&(gclobjtailindex2==gclobjheadindex))
-    return 0;
-  return 1;
-} // int gc_lobjmoreItems2()
-
-// 'reversly' dequeue and don't destroy the queue
-inline void gc_lobjdequeue3_I() {
-  if (gclobjtailindex2==0) {
-    gclobjtail2=gclobjtail2->prev;
-    gclobjtailindex2=NUMLOBJPTRS-1;
-  } else {
-    gclobjtailindex2--;
-  }  // if (gclobjtailindex2==NUMLOBJPTRS)
-} // void * gc_lobjdequeue3()
-
-inline int gc_lobjmoreItems3_I() {
-  if ((gclobjtail==gclobjtail2)&&(gclobjtailindex2==gclobjtailindex))
-    return 0;
-  return 1;
-} // int gc_lobjmoreItems3()
-
-inline void gc_lobjqueueinit4_I() {
-  gclobjtail2 = gclobjtail;
-  gclobjtailindex2 = gclobjtailindex;
-} // void gc_lobjqueueinit2()
-
-inline void * gc_lobjdequeue4_I(unsigned int * length,
-                                unsigned int * host) {
-  if (gclobjtailindex2==NUMLOBJPTRS) {
-    gclobjtail2=gclobjtail2->next;
-    gclobjtailindex2=0;
-  } // if (gclobjtailindex==NUMLOBJPTRS)
-  if(length != NULL) {
-    *length = gclobjtail2->lengths[gclobjtailindex2];
-  }
-  if(host != NULL) {
-    *host = (unsigned int)(gclobjtail2->hosts[gclobjtailindex2]);
-  }
-  return gclobjtail2->lobjs[gclobjtailindex2++];
-} // void * gc_lobjdequeue()
-
-inline int gc_lobjmoreItems4_I() {
-  if ((gclobjhead==gclobjtail2)&&(gclobjtailindex2==gclobjheadindex))
-    return 0;
-  return 1;
-} // int gc_lobjmoreItems(
-
-unsigned int gccurr_heapbound = 0;
-
-inline void gettype_size(void * ptr,
-                         int * ttype,
-                         unsigned int * tsize) {
-  int type = ((int *)ptr)[0];
-  unsigned int size = 0;
-  if(type < NUMCLASSES) {
-    // a normal object
-    size = classsize[type];
-  } else {
-    // an array
-    struct ArrayObject *ao=(struct ArrayObject *)ptr;
-    unsigned int elementsize=classsize[type];
-    unsigned int length=ao->___length___;
-    size=sizeof(struct ArrayObject)+length*elementsize;
-  }  // if(type < NUMCLASSES)
-  *ttype = type;
-  *tsize = size;
-}
-
-inline bool isLarge(void * ptr,
-                    int * ttype,
-                    unsigned int * tsize) {
-  GC_BAMBOO_DEBUGPRINT(0xe701);
-  GC_BAMBOO_DEBUGPRINT_REG(ptr);
-  // check if a pointer is referring to a large object
-  gettype_size(ptr, ttype, tsize);
-  GC_BAMBOO_DEBUGPRINT(*tsize);
-  unsigned int bound = (BAMBOO_SMEM_SIZE);
-  if(((unsigned int)ptr-gcbaseva) < (BAMBOO_LARGE_SMEM_BOUND)) {
-    bound = (BAMBOO_SMEM_SIZE_L);
-  }
-  if((((unsigned int)ptr-gcbaseva)%(bound))==0) {
-    // ptr is a start of a block
-    GC_BAMBOO_DEBUGPRINT(0xe702);
-    GC_BAMBOO_DEBUGPRINT(1);
-    return true;
-  }
-  if((bound-(((unsigned int)ptr-gcbaseva)%bound)) < (*tsize)) {
-    // it acrosses the boundary of current block
-    GC_BAMBOO_DEBUGPRINT(0xe703);
-    GC_BAMBOO_DEBUGPRINT(1);
-    return true;
+INLINE void initmulticoregcdata() {
+  int i = 0;
+  if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
+    // startup core to initialize corestatus[]
+    for(i = 0; i < NUMCORESACTIVE; ++i) {
+      gccorestatus[i] = 1;
+      gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
+      gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
+    } 
+    for(i = 0; i < NUMCORES4GC; ++i) {
+      gcloads[i] = 0;
+      gcrequiredmems[i] = 0;
+      gcstopblock[i] = 0;
+      gcfilledblocks[i] = 0;
+    }
   }
-  GC_BAMBOO_DEBUGPRINT(0);
-  return false;
-} // bool isLarge(void * ptr, int * ttype, int * tsize)
-
-inline unsigned int hostcore(void * ptr) {
-  // check the host core of ptr
-  unsigned int host = 0;
-  RESIDECORE(ptr, &host);
-  GC_BAMBOO_DEBUGPRINT(0xedd0);
-  GC_BAMBOO_DEBUGPRINT_REG(ptr);
-  GC_BAMBOO_DEBUGPRINT_REG(host);
-  return host;
-} // int hostcore(void * ptr)
 
-inline void cpu2coords(unsigned int coren,
-                          unsigned int * x,
-                                          unsigned int * y) {
-  *x = bamboo_cpu2coords[2*coren];
-  *y = bamboo_cpu2coords[2*coren+1];
-} // void cpu2coords(...)
-
-inline bool isLocal(void * ptr) {
-  // check if a pointer is in shared heap on this core
-  return hostcore(ptr) == BAMBOO_NUM_OF_CORE;
-} // bool isLocal(void * ptr)
+  bamboo_smem_zero_top = NULL;
+  gcflag = false;
+  gcprocessing = false;
+  gcphase = FINISHPHASE;
+  gcprecheck = true;
+  gccurr_heaptop = 0;
+  gcself_numsendobjs = 0;
+  gcself_numreceiveobjs = 0;
+  gcmarkedptrbound = 0;
+  gcforwardobjtbl = allocateMGCHash_I(20, 3);
+  gcnumlobjs = 0;
+  gcheaptop = 0;
+  gctopcore = 0;
+  gctopblock = 0;
+  gcmovestartaddr = 0;
+  gctomove = false;
+  gcmovepending = 0;
+  gcblock2fill = 0;
+#ifdef SMEMM
+  gcmem_mixed_threshold = (unsigned int)((BAMBOO_SHARED_MEM_SIZE
+               -bamboo_reserved_smem*BAMBOO_SMEM_SIZE)*0.8);
+  gcmem_mixed_usedmem = 0;
+#endif
+#ifdef MGC_SPEC
+  gc_profile_flag = false;
+#endif
+#ifdef GC_FLUSH_DTLB
+  gc_num_flush_dtlb = 0;
+#endif
+  gc_localheap_s = false;
+#ifdef GC_CACHE_ADAPT
+  gccachestage = false;
+#endif 
 
-inline bool gc_checkCoreStatus_I() {
-  bool allStall = true;
-  for(int i = 0; i < NUMCORES4GC; ++i) {
-    if(gccorestatus[i] != 0) {
-      allStall = false;
-      break;
-    }  // if(gccorestatus[i] != 0)
-  }  // for(i = 0; i < NUMCORES4GC; ++i)
-  return allStall;
+  INIT_MULTICORE_GCPROFILE_DATA();
 }
 
-inline bool gc_checkAllCoreStatus_I() {
-  bool allStall = true;
-  for(int i = 0; i < NUMCORESACTIVE; ++i) {
-    if(gccorestatus[i] != 0) {
-      allStall = false;
-      break;
-    }  // if(gccorestatus[i] != 0)
-  }  // for(i = 0; i < NUMCORESACTIVE; ++i)
-  return allStall;
+INLINE void dismulticoregcdata() {
+  freeMGCHash(gcforwardobjtbl);
 }
 
-inline void checkMarkStatue() {
-  GC_BAMBOO_DEBUGPRINT(0xee01);
-  int i;
-  if((!waitconfirm) ||
-     (waitconfirm && (numconfirm == 0))) {
-    GC_BAMBOO_DEBUGPRINT(0xee02);
-       unsigned int entry_index = 0;
-       if(waitconfirm) {
-         // phase 2
-         entry_index = (gcnumsrobjs_index == 0) ? 1 : 0;
-       } else {
-         // phase 1
-         entry_index = gcnumsrobjs_index;
-       }
-    BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
-    gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
-    gcnumsendobjs[entry_index][BAMBOO_NUM_OF_CORE] = gcself_numsendobjs;
-    gcnumreceiveobjs[entry_index][BAMBOO_NUM_OF_CORE] = gcself_numreceiveobjs;
-    // check the status of all cores
-    bool allStall = gc_checkAllCoreStatus_I();
-    GC_BAMBOO_DEBUGPRINT(0xee03);
-    if(allStall) {
-      GC_BAMBOO_DEBUGPRINT(0xee04);
-      // ask for confirm
-      if(!waitconfirm) {
-               GC_BAMBOO_DEBUGPRINT(0xee05);
-               // the first time found all cores stall
-               // send out status confirm msg to all other cores
-               // reset the corestatus array too
-               gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
-               waitconfirm = true;
-               numconfirm = NUMCORESACTIVE - 1;
-               BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-               for(i = 1; i < NUMCORESACTIVE; ++i) {
-                 gccorestatus[i] = 1;
-                 // send mark phase finish confirm request msg to core i
-                 send_msg_1(i, GCMARKCONFIRM, false);
-               }  // for(i = 1; i < NUMCORESACTIVE; ++i)
-      } else {
-               // Phase 2
-               // check if the sum of send objs and receive obj are the same
-               // yes->check if the info is the latest; no->go on executing
-               unsigned int sumsendobj = 0;
-               for(i = 0; i < NUMCORESACTIVE; ++i) {
-                 sumsendobj += gcnumsendobjs[gcnumsrobjs_index][i];
-               }  // for(i = 0; i < NUMCORESACTIVE; ++i)
-               GC_BAMBOO_DEBUGPRINT(0xee06);
-               GC_BAMBOO_DEBUGPRINT_REG(sumsendobj);
-               for(i = 0; i < NUMCORESACTIVE; ++i) {
-                 sumsendobj -= gcnumreceiveobjs[gcnumsrobjs_index][i];
-               }  // for(i = 0; i < NUMCORESACTIVE; ++i)
-               GC_BAMBOO_DEBUGPRINT(0xee07);
-               GC_BAMBOO_DEBUGPRINT_REG(sumsendobj);
-               if(0 == sumsendobj) {
-                 // Check if there are changes of the numsendobjs or numreceiveobjs on
-                 // each core
-                 bool ischanged = false;
-                 for(i = 0; i < NUMCORESACTIVE; ++i) {
-                       if((gcnumsendobjs[0][i] != gcnumsendobjs[1][i]) || 
-                               (gcnumreceiveobjs[0][i] != gcnumreceiveobjs[1][i]) ) {
-                         ischanged = true;
-                         break;
-                       }
-                 }  // for(i = 0; i < NUMCORESACTIVE; ++i)
-                 GC_BAMBOO_DEBUGPRINT(0xee08);
-                 GC_BAMBOO_DEBUGPRINT_REG(ischanged);
-                 if(!ischanged) {
-                       GC_BAMBOO_DEBUGPRINT(0xee09);
-                       // all the core status info are the latest
-                       // stop mark phase
-                       gcphase = COMPACTPHASE;
-                       // restore the gcstatus for all cores
-                       for(i = 0; i < NUMCORESACTIVE; ++i) {
-                         gccorestatus[i] = 1;
-                       }  // for(i = 0; i < NUMCORESACTIVE; ++i)
-                 } else {
-                       // There were changes between phase 1 and phase 2, can not decide 
-                       // whether the mark phase has been finished
-                       waitconfirm = false;
-                       // As it fails in phase 2, flip the entries
-                       gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
-                 } // if(!ischanged)
-               } else {
-                 // There were changes between phase 1 and phase 2, can not decide 
-                 // whether the mark phase has been finished
-                 waitconfirm = false;
-                 // As it fails in phase 2, flip the entries
-                 gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
-               } // if(0 == sumsendobj) else ...
-               BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-      } // if(!gcwaitconfirm) else()
-    } else {
-         BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-    } // if(allStall)
-  }  // if((!waitconfirm)...
-  GC_BAMBOO_DEBUGPRINT(0xee0a);
-} // void checkMarkStatue()
-
-inline void initGC() {
+INLINE void initGC() {
   int i;
   if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
     for(i = 0; i < NUMCORES4GC; ++i) {
@@ -524,7 +176,7 @@ inline void initGC() {
       gcrequiredmems[i] = 0;
       gcfilledblocks[i] = 0;
       gcstopblock[i] = 0;
-    } // for(i = 0; i < NUMCORES4GC; ++i)
+    } 
     for(i = NUMCORES4GC; i < NUMCORESACTIVE; ++i) {
       gccorestatus[i] = 1;
       gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
@@ -533,11 +185,8 @@ inline void initGC() {
     gcheaptop = 0;
     gctopcore = 0;
     gctopblock = 0;
-#ifdef GC_TBL_DEBUG
-       // initialize the gcmappingtbl
-       BAMBOO_MEMSET_WH(gcmappingtbl, 0, bamboo_rmsp_size);
-#endif
-  } // if(STARTUPCORE == BAMBOO_NUM_OF_CORE)
+  gcnumsrobjs_index = 0;
+  } 
   gcself_numsendobjs = 0;
   gcself_numreceiveobjs = 0;
   gcmarkedptrbound = 0;
@@ -554,7 +203,7 @@ inline void initGC() {
     gcheadindex=gctailindex=gctailindex2 = 0;
     gchead=gctail=gctail2=RUNMALLOC(sizeof(struct pointerblock));
   } else {
-    gctailindex = gctailindex2 = gcheadindex;
+    gctailindex = gctailindex2 = gcheadindex = 0;
     gctail = gctail2 = gchead;
   }
 
@@ -564,7 +213,7 @@ inline void initGC() {
     gclobjtailindex=0;
     gclobjtailindex2 = 0;
     gclobjhead=gclobjtail=gclobjtail2=
-         RUNMALLOC(sizeof(struct lobjpointerblock));
+    RUNMALLOC(sizeof(struct lobjpointerblock));
   } else {
     gclobjtailindex = gclobjtailindex2 = gclobjheadindex = 0;
     gclobjtail = gclobjtail2 = gclobjhead;
@@ -574,19 +223,105 @@ inline void initGC() {
   freeMGCHash(gcforwardobjtbl);
   gcforwardobjtbl = allocateMGCHash(20, 3);
 
-#ifdef GC_PROFILE
-  gc_num_livespace = 0;
-  gc_num_freespace = 0;
-  gc_num_lobj = 0;
-  gc_num_lobjspace = 0;
-  gc_num_liveobj = 0;
-  gc_num_forwardobj = 0;
-  gc_num_profiles = NUMCORESACTIVE - 1;
-#endif
-} // void initGC()
+  GCPROFILE_INIT();
+} 
+
+INLINE bool gc_checkAllCoreStatus_I() {
+  int i = 0;
+  for(i = 0; i < NUMCORESACTIVE; ++i) {
+    if(gccorestatus[i] != 0) {
+      break;
+    }  
+  }  
+  return (i == NUMCORESACTIVE);
+}
+
+INLINE void checkMarkStatue() {
+  int i;
+  if((!waitconfirm) ||
+      (waitconfirm && (numconfirm == 0))) {
+    unsigned int entry_index = 0;
+    if(waitconfirm) {
+      // phase 2
+      entry_index = (gcnumsrobjs_index == 0) ? 1 : 0;
+    } else {
+      // phase 1
+      entry_index = gcnumsrobjs_index;
+    }
+    BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
+    gccorestatus[BAMBOO_NUM_OF_CORE] = 0;  
+    gcnumsendobjs[entry_index][BAMBOO_NUM_OF_CORE] = gcself_numsendobjs;
+    gcnumreceiveobjs[entry_index][BAMBOO_NUM_OF_CORE] = gcself_numreceiveobjs;
+    // check the status of all cores
+    bool allStall = gc_checkAllCoreStatus_I();
+    if(allStall) {
+      // ask for confirm
+      if(!waitconfirm) {
+        // the first time found all cores stall
+        // send out status confirm msg to all other cores
+        // reset the corestatus array too    
+        gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
+        waitconfirm = true;
+        numconfirm = NUMCORESACTIVE - 1;
+        BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+        for(i = 1; i < NUMCORESACTIVE; ++i) {
+          gccorestatus[i] = 1;
+          // send mark phase finish confirm request msg to core i
+          send_msg_1(i, GCMARKCONFIRM, false);
+        }
+      } else {
+        // Phase 2
+        // check if the sum of send objs and receive obj are the same
+        // yes->check if the info is the latest; no->go on executing
+        unsigned int sumsendobj = 0;
+        for(i = 0; i < NUMCORESACTIVE; ++i) {
+          sumsendobj += gcnumsendobjs[gcnumsrobjs_index][i];
+        } 
+        for(i = 0; i < NUMCORESACTIVE; ++i) {
+          sumsendobj -= gcnumreceiveobjs[gcnumsrobjs_index][i];
+        } 
+        if(0 == sumsendobj) {
+          // Check if there are changes of the numsendobjs or numreceiveobjs on
+          // each core
+          bool ischanged = false;
+          for(i = 0; i < NUMCORESACTIVE; ++i) {
+            if((gcnumsendobjs[0][i] != gcnumsendobjs[1][i]) || 
+                (gcnumreceiveobjs[0][i] != gcnumreceiveobjs[1][i]) ) {
+              ischanged = true;
+              break;
+            }
+          }  
+          if(!ischanged) {    
+            // all the core status info are the latest,stop mark phase
+            gcphase = COMPACTPHASE;
+            // restore the gcstatus for all cores
+            for(i = 0; i < NUMCORESACTIVE; ++i) {
+              gccorestatus[i] = 1;
+            }  
+          } else {
+            // There were changes between phase 1 and phase 2, can not decide 
+            // whether the mark phase has been finished
+            waitconfirm = false;
+            // As it fails in phase 2, flip the entries
+            gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
+          } 
+        } else {
+          // There were changes between phase 1 and phase 2, can not decide 
+          // whether the mark phase has been finished
+          waitconfirm = false;
+          // As it fails in phase 2, flip the entries
+          gcnumsrobjs_index = (gcnumsrobjs_index == 0) ? 1 : 0;
+        } 
+        BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+      }
+    } else {
+      BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+    } 
+  } 
+} 
 
 // compute load balance for all cores
-inline int loadbalance(unsigned int * heaptop) {
+INLINE int loadbalance(unsigned int * heaptop) {
   // compute load balance
   int i;
 
@@ -597,31 +332,24 @@ inline int loadbalance(unsigned int * heaptop) {
   }
   *heaptop = gcbaseva + tloads;
 
-  GC_BAMBOO_DEBUGPRINT(0xdddd);
-  GC_BAMBOO_DEBUGPRINT_REG(tloads);
-  GC_BAMBOO_DEBUGPRINT_REG(*heaptop);
   unsigned int b = 0;
   BLOCKINDEX(*heaptop, &b);
-  unsigned int numbpc = (unsigned int)b/(unsigned int)(NUMCORES4GC);// num of blocks per core
-  GC_BAMBOO_DEBUGPRINT_REG(b);
-  GC_BAMBOO_DEBUGPRINT_REG(numbpc);
+  // num of blocks per core
+  unsigned int numbpc = (unsigned int)b/(unsigned int)(NUMCORES4GC);
   gctopblock = b;
   RESIDECORE(heaptop, &gctopcore);
-  GC_BAMBOO_DEBUGPRINT_REG(gctopcore);
   return numbpc;
-} // void loadbalance(int * heaptop)
+}
 
-inline bool cacheLObjs() {
-  // check the total mem size need for large objs
-  unsigned long long sumsize = 0;
-  unsigned int size = 0;
-  GC_BAMBOO_DEBUGPRINT(0xe801);
-  gclobjtail2 = gclobjtail;
-  gclobjtailindex2 = gclobjtailindex;
+// compute total mem size required and sort the lobjs in ascending order
+INLINE unsigned int sortLObjs() {
   unsigned int tmp_lobj = 0;
   unsigned int tmp_len = 0;
   unsigned int tmp_host = 0;
-  // compute total mem size required and sort the lobjs in ascending order
+  unsigned int sumsize = 0;
+
+  gclobjtail2 = gclobjtail;
+  gclobjtailindex2 = gclobjtailindex;
   // TODO USE QUICK SORT INSTEAD?
   while(gc_lobjmoreItems2_I()) {
     gc_lobjdequeue2_I();
@@ -629,76 +357,60 @@ inline bool cacheLObjs() {
     tmp_host = gclobjtail2->hosts[gclobjtailindex2-1];
     tmp_len = gclobjtail2->lengths[gclobjtailindex2 - 1];
     sumsize += tmp_len;
-#ifdef GC_PROFILE
-#ifdef MGC_SPEC
-       if((STARTUPCORE != BAMBOO_NUM_OF_CORE) || gc_profile_flag) {
-#endif
-       gc_num_lobj++;
-#ifdef MGC_SPEC
-       }
-#endif
-#endif
-    GC_BAMBOO_DEBUGPRINT_REG(gclobjtail2->lobjs[gclobjtailindex2-1]);
-    GC_BAMBOO_DEBUGPRINT_REG(tmp_len);
-    GC_BAMBOO_DEBUGPRINT_REG(sumsize);
+    GCPROFILE_RECORD_LOBJ();
     unsigned int i = gclobjtailindex2-1;
     struct lobjpointerblock * tmp_block = gclobjtail2;
     // find the place to insert
     while(true) {
       if(i == 0) {
-               if(tmp_block->prev == NULL) {
-                 break;
-               }
-               if(tmp_block->prev->lobjs[NUMLOBJPTRS-1] > tmp_lobj) {
-                 tmp_block->lobjs[i] = tmp_block->prev->lobjs[NUMLOBJPTRS-1];
-                 tmp_block->lengths[i] = tmp_block->prev->lengths[NUMLOBJPTRS-1];
-                 tmp_block->hosts[i] = tmp_block->prev->hosts[NUMLOBJPTRS-1];
-                 tmp_block = tmp_block->prev;
-                 i = NUMLOBJPTRS-1;
-               } else {
-                 break;
-               }  // if(tmp_block->prev->lobjs[NUMLOBJPTRS-1] < tmp_lobj)
-         } else {
-               if(tmp_block->lobjs[i-1] > tmp_lobj) {
-                 tmp_block->lobjs[i] = tmp_block->lobjs[i-1];
-                 tmp_block->lengths[i] = tmp_block->lengths[i-1];
-                 tmp_block->hosts[i] = tmp_block->hosts[i-1];
-                 i--;
-               } else {
-                 break;
-               }  // if(tmp_block->lobjs[i-1] < tmp_lobj)
-      }  // if(i ==0 ) else {}
-    }   // while(true)
+    if(tmp_block->prev == NULL) {
+      break;
+    }
+    if(tmp_block->prev->lobjs[NUMLOBJPTRS-1] > tmp_lobj) {
+      tmp_block->lobjs[i] = tmp_block->prev->lobjs[NUMLOBJPTRS-1];
+      tmp_block->lengths[i] = tmp_block->prev->lengths[NUMLOBJPTRS-1];
+      tmp_block->hosts[i] = tmp_block->prev->hosts[NUMLOBJPTRS-1];
+      tmp_block = tmp_block->prev;
+      i = NUMLOBJPTRS-1;
+    } else {
+      break;
+    }  // if(tmp_block->prev->lobjs[NUMLOBJPTRS-1] < tmp_lobj)
+      } else {
+    if(tmp_block->lobjs[i-1] > tmp_lobj) {
+      tmp_block->lobjs[i] = tmp_block->lobjs[i-1];
+      tmp_block->lengths[i] = tmp_block->lengths[i-1];
+      tmp_block->hosts[i] = tmp_block->hosts[i-1];
+      i--;
+    } else {
+      break;
+    }  
+      } 
+    }  
     // insert it
     if(i != gclobjtailindex2 - 1) {
       tmp_block->lobjs[i] = tmp_lobj;
       tmp_block->lengths[i] = tmp_len;
       tmp_block->hosts[i] = tmp_host;
     }
-  }  // while(gc_lobjmoreItems2())
+  }
+  return sumsize;
+}
+
+INLINE bool cacheLObjs() {
+  // check the total mem size need for large objs
+  unsigned long long sumsize = 0;
+  unsigned int size = 0;
+  
+  sumsize = sortLObjs();
+
+  GCPROFILE_RECORD_LOBJSPACE();
 
-#ifdef GC_PROFILE
-#ifdef MGC_SPEC
-       if((STARTUPCORE != BAMBOO_NUM_OF_CORE) || gc_profile_flag) {
-#endif
-  gc_num_lobjspace = sumsize;
-#ifdef MGC_SPEC
-       }
-#endif
-#endif
   // check if there are enough space to cache these large objs
   unsigned int dst = gcbaseva + (BAMBOO_SHARED_MEM_SIZE) -sumsize;
   if((unsigned long long)gcheaptop > (unsigned long long)dst) {
     // do not have enough room to cache large objs
-    GC_BAMBOO_DEBUGPRINT(0xe802);
-    GC_BAMBOO_DEBUGPRINT_REG(dst);
-    GC_BAMBOO_DEBUGPRINT_REG(gcheaptop);
-       GC_BAMBOO_DEBUGPRINT_REG(sumsize);
     return false;
   }
-  GC_BAMBOO_DEBUGPRINT(0xe803);
-  GC_BAMBOO_DEBUGPRINT_REG(dst);
-  GC_BAMBOO_DEBUGPRINT_REG(gcheaptop);
 
   gcheaptop = dst; // Note: record the start of cached lobjs with gcheaptop
   // cache the largeObjs to the top of the shared heap
@@ -708,23 +420,18 @@ inline bool cacheLObjs() {
     size = gclobjtail2->lengths[gclobjtailindex2];
     // set the mark field to , indicating that this obj has been moved
     // and need to be flushed
-    ((int *)(gclobjtail2->lobjs[gclobjtailindex2]))[BAMBOOMARKBIT] = COMPACTED;
+    ((struct ___Object___ *)(gclobjtail2->lobjs[gclobjtailindex2]))->marked = 
+      COMPACTED;
     dst -= size;
     if((unsigned int)dst < 
-               (unsigned int)(gclobjtail2->lobjs[gclobjtailindex2]+size)) {
+        (unsigned int)(gclobjtail2->lobjs[gclobjtailindex2]+size)) {
       memmove(dst, gclobjtail2->lobjs[gclobjtailindex2], size);
     } else {
       memcpy(dst, gclobjtail2->lobjs[gclobjtailindex2], size);
     }
-    GC_BAMBOO_DEBUGPRINT(0x804);
-    GC_BAMBOO_DEBUGPRINT_REG(gclobjtail2->lobjs[gclobjtailindex2]);
-    GC_BAMBOO_DEBUGPRINT(dst);
-    GC_BAMBOO_DEBUGPRINT_REG(size);
-    GC_BAMBOO_DEBUGPRINT_REG(*((int*)gclobjtail2->lobjs[gclobjtailindex2]));
-    GC_BAMBOO_DEBUGPRINT_REG(*((int*)(dst)));
   }
   return true;
-} // void cacheLObjs()
+} 
 
 // update the bmmboo_smemtbl to record current shared mem usage
 void updateSmemTbl(unsigned int coren,
@@ -741,22 +448,16 @@ void updateSmemTbl(unsigned int coren,
   unsigned int toset = 0;
   do {
     toset = gc_core2block[2*coren+i]+(unsigned int)(NUMCORES4GC*2)*j;
-#ifdef GC_TBL_DEBUG
-       if(toset >= gcnumblock) {
-         tprintf("ltopcore: %d, localtop: %x, toset: %d, gcnumblock: %d (%d, %d) \n", ltopcore, localtop, toset, gcnumblock, i, j);
-         BAMBOO_EXIT(0xb001);
-       }
-#endif
     if(toset < ltopcore) {
       bamboo_smemtbl[toset]=
         (toset<NUMCORES4GC) ? BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
 #ifdef SMEMM
-         gcmem_mixed_usedmem += bamboo_smemtbl[toset];
+      gcmem_mixed_usedmem += bamboo_smemtbl[toset];
 #endif
     } else if(toset == ltopcore) {
       bamboo_smemtbl[toset] = load;
 #ifdef SMEMM
-         gcmem_mixed_usedmem += bamboo_smemtbl[toset];
+      gcmem_mixed_usedmem += bamboo_smemtbl[toset];
 #endif
       break;
     } else {
@@ -768,54 +469,38 @@ void updateSmemTbl(unsigned int coren,
       j++;
     }
   } while(true);
-} // void updateSmemTbl(int, int)
+}
 
-inline void moveLObjs() {
-  GC_BAMBOO_DEBUGPRINT(0xea01);
-#ifdef SMEMM
-  // update the gcmem_mixed_usedmem
-  gcmem_mixed_usedmem = 0;
-#endif
-  // zero out the smemtbl
+INLINE unsigned int checkCurrHeapTop() {
+  // update the smemtbl
   BAMBOO_MEMSET_WH(bamboo_smemtbl, 0, sizeof(int)*gcnumblock);
-  // find current heap top
   // flush all gcloads to indicate the real heap top on one core
   // previous it represents the next available ptr on a core
   if(((unsigned int)gcloads[0] > (unsigned int)(gcbaseva+BAMBOO_SMEM_SIZE_L))
      && (((unsigned int)gcloads[0]%(BAMBOO_SMEM_SIZE)) == 0)) {
     // edge of a block, check if this is exactly the heaptop
     BASEPTR(0, gcfilledblocks[0]-1, &(gcloads[0]));
-    gcloads[0]+=(gcfilledblocks[0]>1 ?
-                 (BAMBOO_SMEM_SIZE) : (BAMBOO_SMEM_SIZE_L));
+    gcloads[0]+=(gcfilledblocks[0]>1?(BAMBOO_SMEM_SIZE):(BAMBOO_SMEM_SIZE_L));
   }
   updateSmemTbl(0, gcloads[0]);
-  GC_BAMBOO_DEBUGPRINT(0xea02);
-  GC_BAMBOO_DEBUGPRINT_REG(gcloads[0]);
-  GC_BAMBOO_DEBUGPRINT_REG(bamboo_smemtbl[0]);
   for(int i = 1; i < NUMCORES4GC; i++) {
     unsigned int tmptop = 0;
-    GC_BAMBOO_DEBUGPRINT(0xf000+i);
-    GC_BAMBOO_DEBUGPRINT_REG(gcloads[i]);
-    GC_BAMBOO_DEBUGPRINT_REG(gcfilledblocks[i]);
     if((gcfilledblocks[i] > 0)
        && (((unsigned int)gcloads[i] % (BAMBOO_SMEM_SIZE)) == 0)) {
       // edge of a block, check if this is exactly the heaptop
       BASEPTR(i, gcfilledblocks[i]-1, &gcloads[i]);
-      gcloads[i] += 
-               (gcfilledblocks[i]>1 ? (BAMBOO_SMEM_SIZE) : (BAMBOO_SMEM_SIZE_L));
+      gcloads[i] +=
+        (gcfilledblocks[i]>1?(BAMBOO_SMEM_SIZE):(BAMBOO_SMEM_SIZE_L));
       tmptop = gcloads[i];
     }
     updateSmemTbl(i, gcloads[i]);
-    GC_BAMBOO_DEBUGPRINT_REG(gcloads[i]);
-  } // for(int i = 1; i < NUMCORES4GC; i++) {
+  } 
 
   // find current heap top
   // TODO
   // a bug here: when using local allocation, directly move large objects
   // to the highest free chunk might not be memory efficient
   unsigned int tmpheaptop = 0;
-  unsigned int size = 0;
-  unsigned int bound = 0;
   int i = 0;
   for(i = gcnumblock-1; i >= 0; i--) {
     if(bamboo_smemtbl[i] > 0) {
@@ -826,9 +511,20 @@ inline void moveLObjs() {
     tmpheaptop = gcbaseva;
   } else {
     tmpheaptop = gcbaseva+bamboo_smemtbl[i]+((i<NUMCORES4GC) ?
-               (BAMBOO_SMEM_SIZE_L*i) :
+        (BAMBOO_SMEM_SIZE_L*i) :
         (BAMBOO_SMEM_SIZE*(i-NUMCORES4GC)+BAMBOO_LARGE_SMEM_BOUND));
   }
+  return tmpheaptop;
+}
+
+INLINE void moveLObjs() {
+#ifdef SMEMM
+  // update the gcmem_mixed_usedmem
+  gcmem_mixed_usedmem = 0;
+#endif
+  unsigned int size = 0;
+  unsigned int bound = 0;
+  unsigned int tmpheaptop = checkCurrHeapTop();
 
   // move large objs from gcheaptop to tmpheaptop
   // write the header first
@@ -836,20 +532,17 @@ inline void moveLObjs() {
 #ifdef SMEMM
   gcmem_mixed_usedmem += tomove;
 #endif
-  GC_BAMBOO_DEBUGPRINT(0xea03);
-  GC_BAMBOO_DEBUGPRINT_REG(tomove);
-  GC_BAMBOO_DEBUGPRINT_REG(tmpheaptop);
-  GC_BAMBOO_DEBUGPRINT_REG(gcheaptop);
   // flush the sbstartbl
   BAMBOO_MEMSET_WH(&(gcsbstarttbl[gcreservedsb]), '\0',
-         (BAMBOO_SHARED_MEM_SIZE/BAMBOO_SMEM_SIZE-(unsigned int)gcreservedsb)
-         *sizeof(unsigned int));
+    (BAMBOO_SHARED_MEM_SIZE/BAMBOO_SMEM_SIZE-(unsigned int)gcreservedsb)
+    *sizeof(unsigned int));
   if(tomove == 0) {
     gcheaptop = tmpheaptop;
   } else {
     // check how many blocks it acrosses
     unsigned int remain = tmpheaptop-gcbaseva;
-    unsigned int sb = remain/BAMBOO_SMEM_SIZE+(unsigned int)gcreservedsb;//number of the sblock
+    //number of the sblock
+    unsigned int sb = remain/BAMBOO_SMEM_SIZE+(unsigned int)gcreservedsb;
     unsigned int b = 0;  // number of the block
     BLOCKINDEX(tmpheaptop, &b);
     // check the remaining space in this block
@@ -859,7 +552,6 @@ inline void moveLObjs() {
     }
     remain = bound - remain%bound;
 
-    GC_BAMBOO_DEBUGPRINT(0xea04);
     size = 0;
     unsigned int isize = 0;
     unsigned int host = 0;
@@ -872,205 +564,106 @@ inline void moveLObjs() {
     while(gc_lobjmoreItems4_I()) {
       ptr = (unsigned int)(gc_lobjdequeue4_I(&size, &host));
       ALIGNSIZE(size, &isize);
-      if(remain < isize) {
-               // this object acrosses blocks
-               if(cpysize > 0) {
-                 // close current block, fill its header
-                 BAMBOO_MEMSET_WH(base, '\0', BAMBOO_CACHE_LINE_SIZE);
-                 *((int*)base) = cpysize + BAMBOO_CACHE_LINE_SIZE;
-                 bamboo_smemtbl[b]+=BAMBOO_CACHE_LINE_SIZE;//add the size of header
-#ifdef GC_TBL_DEBUG
-                 if(b >= gcnumblock) {
-                       BAMBOO_EXIT(0xb002);
-                 }
-#endif
-                 cpysize = 0;
-                 base = tmpheaptop;
-                 if(remain == 0) {
-                       remain = ((tmpheaptop-gcbaseva)<(BAMBOO_LARGE_SMEM_BOUND)) ?
-                                        BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
-                 }
-                 remain -= BAMBOO_CACHE_LINE_SIZE;
-                 tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
-                 BLOCKINDEX(tmpheaptop, &b);
-                 sb = (unsigned int)(tmpheaptop-gcbaseva)/(BAMBOO_SMEM_SIZE)
-                       +gcreservedsb;
-               }  // if(cpysize > 0)
-
-               // move the large obj
-               if((unsigned int)gcheaptop < (unsigned int)(tmpheaptop+size)) {
-                 memmove(tmpheaptop, gcheaptop, size);
-               } else {
-                 //BAMBOO_WRITE_HINT_CACHE(tmpheaptop, size);
-                 memcpy(tmpheaptop, gcheaptop, size);
-               }
-               // fill the remaining space with -2 padding
-               BAMBOO_MEMSET_WH(tmpheaptop+size, -2, isize-size);
-               GC_BAMBOO_DEBUGPRINT(0xea05);
-               GC_BAMBOO_DEBUGPRINT_REG(gcheaptop);
-               GC_BAMBOO_DEBUGPRINT_REG(tmpheaptop);
-               GC_BAMBOO_DEBUGPRINT_REG(size);
-               GC_BAMBOO_DEBUGPRINT_REG(isize);
-               GC_BAMBOO_DEBUGPRINT_REG(base);
-               gcheaptop += size;
-#ifdef GC_TBL_DEBUG
-               if((gcmappingtbl[OBJMAPPINGINDEX((unsigned int)ptr)] != 3)) {
-                 tprintf("Error moveLobj: %x %x \n", 
-                         (int)ptr, ((int *)(ptr))[BAMBOOMARKBIT] );
-                 BAMBOO_EXIT(0xb003);
-               }
-#endif
-               // cache the mapping info 
-               gcmappingtbl[OBJMAPPINGINDEX((unsigned int)ptr)] = 
-                 (unsigned int)tmpheaptop;
-#ifdef GC_TBL_DEBUG
-               if(gcmappingtbl[OBJMAPPINGINDEX((unsigned int)ptr)] == 
-                       gcmappingtbl[OBJMAPPINGINDEX((unsigned int)ptr)-1]) {
-                 tprintf("Error moveobj ^^ : %x, %x, %d \n", (int)ptr, 
-                         (int)tmpheaptop, OBJMAPPINGINDEX((unsigned int)ptr));
-                 BAMBOO_EXIT(0xb004);
-               }
-#endif
-               GC_BAMBOO_DEBUGPRINT(0xcdca);
-               GC_BAMBOO_DEBUGPRINT_REG(ptr);
-               GC_BAMBOO_DEBUGPRINT_REG(tmpheaptop);
-               tmpheaptop += isize;
+      if(remain >= isize) {
+    remain -= isize;
+    // move the large obj
+    if((unsigned int)gcheaptop < (unsigned int)(tmpheaptop+size)) {
+      memmove(tmpheaptop, gcheaptop, size);
+    } else {
+      memcpy(tmpheaptop, gcheaptop, size);
+    }
+    // fill the remaining space with -2 padding
+    BAMBOO_MEMSET_WH(tmpheaptop+size, -2, isize-size);
 
-               // set the gcsbstarttbl and bamboo_smemtbl
-               unsigned int tmpsbs=1+(unsigned int)(isize-remain-1)/BAMBOO_SMEM_SIZE;
-               for(int k = 1; k < tmpsbs; k++) {
-                 gcsbstarttbl[sb+k] = -1;
-#ifdef GC_TBL_DEBUG
-                 if((sb+k) >= gcsbstarttbl_len) {
-                       BAMBOO_EXIT(0xb005);
-                 }
-#endif
-               }
-               sb += tmpsbs;
-               bound = (b<NUMCORES4GC) ? BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
-               BLOCKINDEX(tmpheaptop-1, &tmpsbs);
-               for(; b < tmpsbs; b++) {
-                 bamboo_smemtbl[b] = bound;
-#ifdef GC_TBL_DEBUG
-                 if(b >= gcnumblock) {
-                       BAMBOO_EXIT(0xb006);
-                 }
-#endif
-                 if(b==NUMCORES4GC-1) {
-                       bound = BAMBOO_SMEM_SIZE;
-                 }
-               }
-               if(((unsigned int)(isize-remain)%(BAMBOO_SMEM_SIZE)) == 0) {
-                 gcsbstarttbl[sb] = -1;
-                 remain = ((tmpheaptop-gcbaseva)<(BAMBOO_LARGE_SMEM_BOUND)) ?
-                                  BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
-                 bamboo_smemtbl[b] = bound;
-               } else {
-                 gcsbstarttbl[sb] = (int)tmpheaptop;
-                 remain = tmpheaptop-gcbaseva;
-                 bamboo_smemtbl[b] = remain%bound;
-                 remain = bound - bamboo_smemtbl[b];
-               } // if(((isize-remain)%(BAMBOO_SMEM_SIZE)) == 0) else ...
-#ifdef GC_TBL_DEBUG
-               if(sb >= gcsbstarttbl_len) {
-                 BAMBOO_EXIT(0xb007);
-               }
-               if(b >= gcnumblock) {
-                 BAMBOO_EXIT(0xb008);
-               }
-#endif
+    gcheaptop += size;
+    cpysize += isize;
+    // cache the mapping info
+    gcmappingtbl[OBJMAPPINGINDEX((unsigned int)ptr)]=(unsigned int)tmpheaptop;
+    tmpheaptop += isize;
 
-               // close current block and fill the header
-               BAMBOO_MEMSET_WH(base, '\0', BAMBOO_CACHE_LINE_SIZE);
-               *((int*)base) = isize + BAMBOO_CACHE_LINE_SIZE;
-               cpysize = 0;
-               base = tmpheaptop;
-               if(remain == BAMBOO_CACHE_LINE_SIZE) {
-                 // fill with 0 in case
-                 BAMBOO_MEMSET_WH(tmpheaptop, '\0', remain);
-               }
-               remain -= BAMBOO_CACHE_LINE_SIZE;
-               tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
+    // update bamboo_smemtbl
+    bamboo_smemtbl[b] += isize;
       } else {
-               remain -= isize;
-               // move the large obj
-               if((unsigned int)gcheaptop < (unsigned int)(tmpheaptop+size)) {
-                 memmove(tmpheaptop, gcheaptop, size);
-               } else {
-                 memcpy(tmpheaptop, gcheaptop, size);
-               }
-               // fill the remaining space with -2 padding
-               BAMBOO_MEMSET_WH(tmpheaptop+size, -2, isize-size);
-               GC_BAMBOO_DEBUGPRINT(0xea06);
-               GC_BAMBOO_DEBUGPRINT_REG(gcheaptop);
-               GC_BAMBOO_DEBUGPRINT_REG(tmpheaptop);
-               GC_BAMBOO_DEBUGPRINT_REG(size);
-               GC_BAMBOO_DEBUGPRINT_REG(isize);
-
-               gcheaptop += size;
-               cpysize += isize;
-#ifdef GC_TBL_DEBUG
-               if((gcmappingtbl[OBJMAPPINGINDEX((unsigned int)ptr)] != 3)) {
-                 tprintf("Error moveLobj: %x %x \n", (int)ptr,
-                         ((int *)(ptr))[BAMBOOMARKBIT] );
-                 BAMBOO_EXIT(0xb009);
-               }
-#endif
-               // cache the mapping info
-               gcmappingtbl[OBJMAPPINGINDEX((unsigned int)ptr)] = 
-                 (unsigned int)tmpheaptop;
-#ifdef GC_TBL_DEBUG
-               if(gcmappingtbl[OBJMAPPINGINDEX((unsigned int)ptr)] == 
-                       gcmappingtbl[OBJMAPPINGINDEX((unsigned int)ptr)-1]) {
-                 tprintf("Error moveobj ?? : %x, %x, %d \n", (int)ptr, 
-                         (int)tmpheaptop, OBJMAPPINGINDEX((unsigned int)ptr));
-                 BAMBOO_EXIT(0xb00a);
-               }
-               if(!ISSHAREDOBJ(tmpheaptop)) {
-                 tprintf("Error: %x, %x \n", (int)ptr, (int)tmpheaptop);
-                 BAMBOO_EXIT(0xb00b);
-               }
-#endif
-               GC_BAMBOO_DEBUGPRINT(0xcdcc);
-               GC_BAMBOO_DEBUGPRINT_REG(ptr);
-               GC_BAMBOO_DEBUGPRINT_REG(tmpheaptop);
-               GC_BAMBOO_DEBUGPRINT_REG(*((int*)tmpheaptop));
-               tmpheaptop += isize;
+    // this object acrosses blocks
+    if(cpysize > 0) {
+      CLOSEBLOCK(base, cpysize+BAMBOO_CACHE_LINE_SIZE);
+      bamboo_smemtbl[b] += BAMBOO_CACHE_LINE_SIZE;
+      cpysize = 0;
+      base = tmpheaptop;
+      if(remain == 0) {
+        remain = ((tmpheaptop-gcbaseva)<(BAMBOO_LARGE_SMEM_BOUND)) ?
+          BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
+      }
+      remain -= BAMBOO_CACHE_LINE_SIZE;
+      tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
+      BLOCKINDEX(tmpheaptop, &b);
+      sb = (unsigned int)(tmpheaptop-gcbaseva)/(BAMBOO_SMEM_SIZE)+gcreservedsb;
+    } 
+
+    // move the large obj
+    if((unsigned int)gcheaptop < (unsigned int)(tmpheaptop+size)) {
+      memmove(tmpheaptop, gcheaptop, size);
+    } else {
+      memcpy(tmpheaptop, gcheaptop, size);
+    }
+    // fill the remaining space with -2 padding
+    BAMBOO_MEMSET_WH(tmpheaptop+size, -2, isize-size);
+    gcheaptop += size;
+    // cache the mapping info 
+    gcmappingtbl[OBJMAPPINGINDEX((unsigned int)ptr)]=(unsigned int)tmpheaptop;
+    tmpheaptop += isize;
+
+    // set the gcsbstarttbl and bamboo_smemtbl
+    unsigned int tmpsbs=1+(unsigned int)(isize-remain-1)/BAMBOO_SMEM_SIZE;
+    for(int k = 1; k < tmpsbs; k++) {
+      gcsbstarttbl[sb+k] = -1;
+    }
+    sb += tmpsbs;
+    bound = (b<NUMCORES4GC) ? BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
+    BLOCKINDEX(tmpheaptop-1, &tmpsbs);
+    for(; b < tmpsbs; b++) {
+      bamboo_smemtbl[b] = bound;
+      if(b==NUMCORES4GC-1) {
+        bound = BAMBOO_SMEM_SIZE;
+      }
+    }
+    if(((unsigned int)(isize-remain)%(BAMBOO_SMEM_SIZE)) == 0) {
+      gcsbstarttbl[sb] = -1;
+      remain = ((tmpheaptop-gcbaseva)<(BAMBOO_LARGE_SMEM_BOUND)) ?
+           BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
+      bamboo_smemtbl[b] = bound;
+    } else {
+      gcsbstarttbl[sb] = (int)tmpheaptop;
+      remain = tmpheaptop-gcbaseva;
+      bamboo_smemtbl[b] = remain%bound;
+      remain = bound - bamboo_smemtbl[b];
+    } 
+
+    CLOSEBLOCK(base, isize+BAMBOO_CACHE_LINE_SIZE);
+    cpysize = 0;
+    base = tmpheaptop;
+    if(remain == BAMBOO_CACHE_LINE_SIZE) {
+      // fill with 0 in case
+      BAMBOO_MEMSET_WH(tmpheaptop, '\0', remain);
+    }
+    remain -= BAMBOO_CACHE_LINE_SIZE;
+    tmpheaptop += BAMBOO_CACHE_LINE_SIZE;
+      } 
+    }
 
-               // update bamboo_smemtbl
-               bamboo_smemtbl[b] += isize;
-#ifdef GC_TBL_DEBUG
-               if(b >= gcnumblock) {
-                 BAMBOO_EXIT(0xb00c);
-               }
-#endif
-         }  // if(remain < isize) else ...
-    }  // while(gc_lobjmoreItems())
     if(cpysize > 0) {
-      // close current block, fill the header
-      BAMBOO_MEMSET_WH(base, '\0', BAMBOO_CACHE_LINE_SIZE);
-      *((int*)base) = cpysize + BAMBOO_CACHE_LINE_SIZE;
-      bamboo_smemtbl[b] += BAMBOO_CACHE_LINE_SIZE;//add the size of the header
-#ifdef GC_TBL_DEBUG
-         if(b >= gcnumblock) {
-               BAMBOO_EXIT(0xb00d);
-         }
-#endif
+      CLOSEBLOCK(base, cpysize+BAMBOO_CACHE_LINE_SIZE);
+      bamboo_smemtbl[b] += BAMBOO_CACHE_LINE_SIZE;
     } else {
       tmpheaptop -= BAMBOO_CACHE_LINE_SIZE;
     }
     gcheaptop = tmpheaptop;
-
-  } // if(tomove == 0)
-
-  GC_BAMBOO_DEBUGPRINT(0xea07);
-  GC_BAMBOO_DEBUGPRINT_REG(gcheaptop);
+  } 
 
   bamboo_free_block = 0;
   unsigned int tbound = 0;
   do {
-    tbound = (bamboo_free_block<NUMCORES4GC) ?
-             BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
+    tbound=(bamboo_free_block<NUMCORES4GC)?BAMBOO_SMEM_SIZE_L:BAMBOO_SMEM_SIZE;
     if(bamboo_smemtbl[bamboo_free_block] == tbound) {
       bamboo_free_block++;
     } else {
@@ -1078,2543 +671,52 @@ inline void moveLObjs() {
       break;
     }
   } while(true);
-#ifdef GC_TBL_DEBUG
-  if(bamboo_free_block >= gcnumblock) {
-       BAMBOO_EXIT(0xb00e);
-  }
-#endif
 
-#ifdef GC_PROFILE
-#ifdef MGC_SPEC
-       if((STARTUPCORE != BAMBOO_NUM_OF_CORE) || gc_profile_flag) {
-#endif
-  // check how many live space there are
-  gc_num_livespace = 0;
-  for(int tmpi = 0; tmpi < gcnumblock; tmpi++) {
-       gc_num_livespace += bamboo_smemtbl[tmpi];
-  }
-  gc_num_freespace = (BAMBOO_SHARED_MEM_SIZE) - gc_num_livespace;
-#ifdef MGC_SPEC
-       }
-#endif
-#endif
-  GC_BAMBOO_DEBUGPRINT(0xea08);
-  GC_BAMBOO_DEBUGPRINT_REG(gcheaptop);
-} // void moveLObjs()
+  GCPROFILE_RECORD_SPACE();
+} 
 
-inline void markObj(void * objptr) {
-  if(objptr == NULL) {
-    return;
-  }
-  if(ISSHAREDOBJ(objptr)) {
-    unsigned int host = hostcore(objptr);
-    if(BAMBOO_NUM_OF_CORE == host) {
-      // on this core
-      BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
-      if(((int *)objptr)[BAMBOOMARKBIT] == INIT) {
-               // this is the first time that this object is discovered,
-               // set the flag as DISCOVERED
-               ((int *)objptr)[BAMBOOMARKBIT] = DISCOVERED;
-               BAMBOO_CACHE_FLUSH_LINE(objptr);
-               gc_enqueue_I(objptr);
-#ifdef GC_TBL_DEBUG
-               // for test
-               gcmappingtbl[OBJMAPPINGINDEX((unsigned int)objptr)]=1;
-#endif
-         }
-      BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-    } else {
-      GC_BAMBOO_DEBUGPRINT(0xbbbb);
-      GC_BAMBOO_DEBUGPRINT_REG(host);
-      GC_BAMBOO_DEBUGPRINT_REG(objptr);
-      // check if this obj has been forwarded
-      if(!MGCHashcontains(gcforwardobjtbl, (int)objptr)) {
-               // send a msg to host informing that objptr is active
-               send_msg_2(host, GCMARKEDOBJ, objptr, false);
-#ifdef GC_PROFILE
-#ifdef MGC_SPEC
-       if((STARTUPCORE != BAMBOO_NUM_OF_CORE) || gc_profile_flag) {
-#endif
-               gc_num_forwardobj++;
-#ifdef MGC_SPEC
-       }
-#endif
-#endif // GC_PROFILE
-               gcself_numsendobjs++;
-               MGCHashadd(gcforwardobjtbl, (int)objptr);
-      }
-    }
-  } else {
-#ifdef GC_TBL_DEBUG
-       tprintf("Non shared pointer to be marked %x \n", (int)objptr);
-       BAMBOO_EXIT(0xb00f);
-#endif
-    BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
-    gc_enqueue_I(objptr);
-    BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-  }  // if(ISSHAREDOBJ(objptr))
-} // void markObj(void * objptr)
+INLINE void gc_collect(struct garbagelist * stackptr) {
+  gcprocessing = true;
+  tprintf("gc \n");
+  // inform the master that this core is at a gc safe point and is ready to 
+  // do gc
+  send_msg_4(STARTUPCORE, GCFINISHPRE, BAMBOO_NUM_OF_CORE, self_numsendobjs, 
+    self_numreceiveobjs, false);
 
-// enqueue root objs
-inline void tomark(struct garbagelist * stackptr) {
-  if(MARKPHASE != gcphase) {
-    GC_BAMBOO_DEBUGPRINT_REG(gcphase);
-    BAMBOO_EXIT(0xb010);
+  // core collector routine
+  while(true) {
+    if(INITPHASE == gcphase) {
+      break;
+    }
   }
-  gcbusystatus = true;
-  gcnumlobjs = 0;
-
+  GC_PRINTF("Do initGC\n");
+  initGC();
+  CACHEADAPT_GC(true);
+  //send init finish msg to core coordinator
+  send_msg_2(STARTUPCORE, GCFINISHINIT, BAMBOO_NUM_OF_CORE, false);
 
-  int i,j;
-  // enqueue current stack
-  while(stackptr!=NULL) {
-    GC_BAMBOO_DEBUGPRINT(0xe501);
-    GC_BAMBOO_DEBUGPRINT_REG(stackptr->size);
-    GC_BAMBOO_DEBUGPRINT_REG(stackptr->next);
-    GC_BAMBOO_DEBUGPRINT_REG(stackptr->array[0]);
-    for(i=0; i<stackptr->size; i++) {
-      if(stackptr->array[i] != NULL) {
-               markObj(stackptr->array[i]);
-      }
+  while(true) {
+    if(MARKPHASE == gcphase) {
+      break;
     }
-    stackptr=stackptr->next;
   }
-  GC_BAMBOO_DEBUGPRINT(0xe502);
-
-  // enqueue static pointers global_defs_p
-  if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
-       struct garbagelist * staticptr=(struct garbagelist *)global_defs_p;
-       while(staticptr != NULL) {
-         for(i=0; i<staticptr->size; i++) {
-               if(staticptr->array[i] != NULL) {
-                 markObj(staticptr->array[i]);
-               }
-         }
-         staticptr = staticptr->next;
-       }
-  }
-  GC_BAMBOO_DEBUGPRINT(0xe503);
-
-#ifdef TASK
-  // enqueue objectsets
-  if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
-    for(i=0; i<NUMCLASSES; i++) {
-      struct parameterwrapper ** queues =
-        objectqueues[BAMBOO_NUM_OF_CORE][i];
-      int length = numqueues[BAMBOO_NUM_OF_CORE][i];
-      for(j = 0; j < length; ++j) {
-               struct parameterwrapper * parameter = queues[j];
-               struct ObjectHash * set=parameter->objectset;
-               struct ObjectNode * ptr=set->listhead;
-               while(ptr!=NULL) {
-                 markObj((void *)ptr->key);
-                 ptr=ptr->lnext;
-               }
-      }
-    }
-  }
-
-  // euqueue current task descriptor
-  if(currtpd != NULL) {
-    GC_BAMBOO_DEBUGPRINT(0xe504);
-    for(i=0; i<currtpd->numParameters; i++) {
-      markObj(currtpd->parameterArray[i]);
-    }
-  }
-
-  GC_BAMBOO_DEBUGPRINT(0xe505);
-  // euqueue active tasks
-  if(activetasks != NULL) {
-    struct genpointerlist * ptr=activetasks->list;
-    while(ptr!=NULL) {
-      struct taskparamdescriptor *tpd=ptr->src;
-      int i;
-      for(i=0; i<tpd->numParameters; i++) {
-               markObj(tpd->parameterArray[i]);
-      }
-      ptr=ptr->inext;
-    }
-  }
-
-  GC_BAMBOO_DEBUGPRINT(0xe506);
-  // enqueue cached transferred obj
-  struct QueueItem * tmpobjptr =  getHead(&objqueue);
-  while(tmpobjptr != NULL) {
-    struct transObjInfo * objInfo =
-      (struct transObjInfo *)(tmpobjptr->objectptr);
-    markObj(objInfo->objptr);
-    tmpobjptr = getNextQueueItem(tmpobjptr);
-  }
-
-  GC_BAMBOO_DEBUGPRINT(0xe507);
-  // enqueue cached objs to be transferred
-  struct QueueItem * item = getHead(totransobjqueue);
-  while(item != NULL) {
-    struct transObjInfo * totransobj =
-      (struct transObjInfo *)(item->objectptr);
-    markObj(totransobj->objptr);
-    item = getNextQueueItem(item);
-  } // while(item != NULL)
-
-  GC_BAMBOO_DEBUGPRINT(0xe508);
-  // enqueue lock related info
-  for(i = 0; i < runtime_locklen; ++i) {
-    markObj((void *)(runtime_locks[i].redirectlock));
-    if(runtime_locks[i].value != NULL) {
-      markObj((void *)(runtime_locks[i].value));
-    }
-  }
-  GC_BAMBOO_DEBUGPRINT(0xe509);
-#endif 
-
-#ifdef MGC
-  // enqueue global thread queue
-  if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
-       lockthreadqueue();
-       unsigned int thread_counter = *((unsigned int*)(bamboo_thread_queue+1));
-       if(thread_counter > 0) {
-         unsigned int start = *((unsigned int*)(bamboo_thread_queue+2));
-         for(i = thread_counter; i > 0; i--) {
-               markObj((void *)bamboo_thread_queue[4+start]);
-               start = (start+1)&bamboo_max_thread_num_mask;
-         }
-       }
-  }
-
-  // enqueue the bamboo_threadlocks
-  for(i = 0; i < bamboo_threadlocks.index; i++) {
-       markObj((void *)(bamboo_threadlocks.locks[i].object));
-  }
-
-  // enqueue the bamboo_current_thread
-  if(bamboo_current_thread != 0) {
-       markObj((void *)bamboo_current_thread);
-  }
-
-  GC_BAMBOO_DEBUGPRINT(0xe50a);
-#endif
-} // void tomark(struct garbagelist * stackptr)
-
-inline void mark(bool isfirst,
-                 struct garbagelist * stackptr) {
-  if(BAMBOO_NUM_OF_CORE == 0) GC_BAMBOO_DEBUGPRINT(0xed01);
-  if(isfirst) {
-    if(BAMBOO_NUM_OF_CORE == 0) GC_BAMBOO_DEBUGPRINT(0xed02);
-    // enqueue root objs
-    tomark(stackptr);
-    gccurr_heaptop = 0; // record the size of all active objs in this core
-                        // aligned but does not consider block boundaries
-    gcmarkedptrbound = 0;
-  }
-  if(BAMBOO_NUM_OF_CORE == 0) GC_BAMBOO_DEBUGPRINT(0xed03);
-  unsigned int isize = 0;
-  bool checkfield = true;
-  bool sendStall = false;
-  // mark phase
-  while(MARKPHASE == gcphase) {
-    if(BAMBOO_NUM_OF_CORE == 0) GC_BAMBOO_DEBUGPRINT(0xed04);
-    while(true) {
-      BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
-      bool hasItems = gc_moreItems2_I();
-      BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-      GC_BAMBOO_DEBUGPRINT(0xed05);
-      if(!hasItems) {
-               break;
-      }
-      sendStall = false;
-      gcbusystatus = true;
-      checkfield = true;
-      void * ptr = gc_dequeue2_I();
-
-      GC_BAMBOO_DEBUGPRINT_REG(ptr);
-      unsigned int size = 0;
-      unsigned int isize = 0;
-      unsigned int type = 0;
-      // check if it is a shared obj
-      if(ISSHAREDOBJ(ptr)) {
-               // a shared obj, check if it is a local obj on this core
-               unsigned int host = hostcore(ptr);
-               bool islocal = (host == BAMBOO_NUM_OF_CORE);
-               if(islocal) {
-                 bool isnotmarked = (((int *)ptr)[BAMBOOMARKBIT] == DISCOVERED);
-                 if(isLarge(ptr, &type, &size) && isnotmarked) {
-                       // ptr is a large object and not marked or enqueued
-                       GC_BAMBOO_DEBUGPRINT(0xecec);
-                       GC_BAMBOO_DEBUGPRINT_REG(ptr);
-                       GC_BAMBOO_DEBUGPRINT_REG(*((int*)ptr));
-                       BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
-                       gc_lobjenqueue_I(ptr, size, BAMBOO_NUM_OF_CORE);
-                       gcnumlobjs++;
-                       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-                       // mark this obj
-                       ((int *)ptr)[BAMBOOMARKBIT] = MARKED;
-                       BAMBOO_CACHE_FLUSH_LINE(ptr);
-#ifdef GC_TBL_DEBUG
-                       // for test
-                       gcmappingtbl[OBJMAPPINGINDEX((unsigned int)ptr)]=3;
-#endif
-                 } else if(isnotmarked) {
-                       // ptr is an unmarked active object on this core
-                       ALIGNSIZE(size, &isize);
-                       gccurr_heaptop += isize;
-                       GC_BAMBOO_DEBUGPRINT(0xaaaa);
-                       GC_BAMBOO_DEBUGPRINT_REG(ptr);
-                       GC_BAMBOO_DEBUGPRINT_REG(isize);
-                       GC_BAMBOO_DEBUGPRINT(((int *)(ptr))[0]);
-                       // mark this obj
-                       ((int *)ptr)[BAMBOOMARKBIT] = MARKED;
-                       BAMBOO_CACHE_FLUSH_LINE(ptr);
-#ifdef GC_TBL_DEBUG
-                       // for test
-                       gcmappingtbl[OBJMAPPINGINDEX((unsigned int)ptr)]=2;
-#endif
-                 
-                       if((unsigned int)(ptr + size) > (unsigned int)gcmarkedptrbound) {
-                         gcmarkedptrbound = (unsigned int)(ptr + size);
-                       } // if(ptr + size > gcmarkedptrbound)
-                 } else {
-                       // ptr is not an active obj or has been marked
-                       checkfield = false;
-                 } // if(isLarge(ptr, &type, &size)) else ...
-               } 
-#ifdef GC_TBL_DEBUG
-               else {
-                 tprintf("Error mark: %x, %d, %d \n", (int)ptr, BAMBOO_NUM_OF_CORE, 
-                         hostcore(ptr));
-                 BAMBOO_EXIT(0xb011);
-               }
-#endif /* can never reach here
-               else {
-                 // check if this obj has been forwarded
-                 if(!MGCHashcontains(gcforwardobjtbl, (int)ptr)) {
-                       // send a msg to host informing that ptr is active
-                       send_msg_2(host, GCMARKEDOBJ, ptr, false);
-                       gcself_numsendobjs++;
-                       MGCHashadd(gcforwardobjtbl, (int)ptr);
-                 }
-                       checkfield = false;
-               }// if(isLocal(ptr)) else ...*/
-         }   // if(ISSHAREDOBJ(ptr))
-      GC_BAMBOO_DEBUGPRINT(0xed06);
-
-      if(checkfield) {
-               // scan all pointers in ptr
-               unsigned int * pointer;
-               pointer=pointerarray[type];
-               if (pointer==0) {
-                 /* Array of primitives */
-                 /* Do nothing */
-               } else if (((unsigned int)pointer)==1) {
-                 /* Array of pointers */
-                 struct ArrayObject *ao=(struct ArrayObject *) ptr;
-                 int length=ao->___length___;
-                 int j;
-                 for(j=0; j<length; j++) {
-                       void *objptr =
-                         ((void **)(((char *)&ao->___length___)+sizeof(int)))[j];
-                       markObj(objptr);
-                 }
-               } else {
-                 unsigned int size=pointer[0];
-                 int i;
-                 for(i=1; i<=size; i++) {
-                       unsigned int offset=pointer[i];
-                       void * objptr=*((void **)(((char *)ptr)+offset));
-                       markObj(objptr);
-                 }
-               }     // if (pointer==0) else if ... else ...
-               {
-                 pointer=pointerarray[OBJECTTYPE];
-                 //handle object class
-                 unsigned int size=pointer[0];
-                 int i;
-                 for(i=1; i<=size; i++) {
-                       unsigned int offset=pointer[i];
-                       void * objptr=*((void **)(((char *)ptr)+offset));
-                       markObj(objptr);
-                 }
-               }
-      }   // if(checkfield)
-    }     // while(gc_moreItems2())
-    GC_BAMBOO_DEBUGPRINT(0xed07);
-       gcbusystatus = false;
-    // send mark finish msg to core coordinator
-    if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
-      GC_BAMBOO_DEBUGPRINT(0xed08);
-         int entry_index = 0;
-         if(waitconfirm)  {
-               // phase 2
-               entry_index = (gcnumsrobjs_index == 0) ? 1 : 0;
-         } else {
-               // phase 1
-               entry_index = gcnumsrobjs_index;
-         }
-      gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
-      gcnumsendobjs[entry_index][BAMBOO_NUM_OF_CORE]=gcself_numsendobjs;
-      gcnumreceiveobjs[entry_index][BAMBOO_NUM_OF_CORE]=gcself_numreceiveobjs;
-      gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
-    } else {
-      if(!sendStall) {
-               GC_BAMBOO_DEBUGPRINT(0xed09);
-               send_msg_4(STARTUPCORE, GCFINISHMARK, BAMBOO_NUM_OF_CORE,
-                                  gcself_numsendobjs, gcself_numreceiveobjs, false);
-               sendStall = true;
-      }
-    }  // if(STARTUPCORE == BAMBOO_NUM_OF_CORE) ...
-    GC_BAMBOO_DEBUGPRINT(0xed0a);
-
-    if(BAMBOO_NUM_OF_CORE == STARTUPCORE) {
-      GC_BAMBOO_DEBUGPRINT(0xed0b);
-      return;
-    }
-  } // while(MARKPHASE == gcphase)
-
-  BAMBOO_CACHE_MF();
-} // mark()
-
-inline void compact2Heaptophelper_I(unsigned int coren,
-                                    unsigned int* p,
-                                    unsigned int* numblocks,
-                                    unsigned int* remain) {
-  unsigned int b;
-  unsigned int memneed = gcrequiredmems[coren] + BAMBOO_CACHE_LINE_SIZE;
-  if(STARTUPCORE == coren) {
-    gctomove = true;
-    gcmovestartaddr = *p;
-    gcdstcore = gctopcore;
-    gcblock2fill = *numblocks + 1;
-  } else {
-    send_msg_4(coren, GCMOVESTART, gctopcore, *p, (*numblocks) + 1, false);
-  }
-  GC_BAMBOO_DEBUGPRINT_REG(coren);
-  GC_BAMBOO_DEBUGPRINT_REG(gctopcore);
-  GC_BAMBOO_DEBUGPRINT_REG(*p);
-  GC_BAMBOO_DEBUGPRINT_REG(*numblocks+1);
-  if(memneed < *remain) {
-    GC_BAMBOO_DEBUGPRINT(0xd104);
-    *p = *p + memneed;
-    gcrequiredmems[coren] = 0;
-    gcloads[gctopcore] += memneed;
-    *remain = *remain - memneed;
-  } else {
-    GC_BAMBOO_DEBUGPRINT(0xd105);
-    // next available block
-    *p = *p + *remain;
-    gcfilledblocks[gctopcore] += 1;
-    unsigned int newbase = 0;
-    BASEPTR(gctopcore, gcfilledblocks[gctopcore], &newbase);
-    gcloads[gctopcore] = newbase;
-    gcrequiredmems[coren] -= *remain - BAMBOO_CACHE_LINE_SIZE;
-    gcstopblock[gctopcore]++;
-    gctopcore = NEXTTOPCORE(gctopblock);
-    gctopblock++;
-    *numblocks = gcstopblock[gctopcore];
-    *p = gcloads[gctopcore];
-    BLOCKINDEX(*p, &b);
-    *remain=(b<NUMCORES4GC) ?
-             ((BAMBOO_SMEM_SIZE_L)-((*p)%(BAMBOO_SMEM_SIZE_L)))
-            : ((BAMBOO_SMEM_SIZE)-((*p)%(BAMBOO_SMEM_SIZE)));
-    GC_BAMBOO_DEBUGPRINT(0xd106);
-    GC_BAMBOO_DEBUGPRINT_REG(gctopcore);
-    GC_BAMBOO_DEBUGPRINT_REG(*p);
-    GC_BAMBOO_DEBUGPRINT_REG(b);
-    GC_BAMBOO_DEBUGPRINT_REG(*remain);
-  }  // if(memneed < remain)
-  gcmovepending--;
-} // void compact2Heaptophelper_I(int, int*, int*, int*)
-
-inline void compact2Heaptop() {
-  // no cores with spare mem and some cores are blocked with pending move
-  // find the current heap top and make them move to the heap top
-  unsigned int p;
-  unsigned int numblocks = gcfilledblocks[gctopcore];
-  p = gcloads[gctopcore];
-  unsigned int b;
-  BLOCKINDEX(p, &b);
-  unsigned int remain = (b<NUMCORES4GC) ?
-               ((BAMBOO_SMEM_SIZE_L)-(p%(BAMBOO_SMEM_SIZE_L)))
-              : ((BAMBOO_SMEM_SIZE)-(p%(BAMBOO_SMEM_SIZE)));
-  // check if the top core finishes
-  BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
-  if(gccorestatus[gctopcore] != 0) {
-    GC_BAMBOO_DEBUGPRINT(0xd101);
-    GC_BAMBOO_DEBUGPRINT_REG(gctopcore);
-    // let the top core finishes its own work first
-    compact2Heaptophelper_I(gctopcore, &p, &numblocks, &remain);
-    BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-    return;
-  }
-  BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-
-  GC_BAMBOO_DEBUGPRINT(0xd102);
-  GC_BAMBOO_DEBUGPRINT_REG(gctopcore);
-  GC_BAMBOO_DEBUGPRINT_REG(p);
-  GC_BAMBOO_DEBUGPRINT_REG(b);
-  GC_BAMBOO_DEBUGPRINT_REG(remain);
-  for(int i = 0; i < NUMCORES4GC; i++) {
-    BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
-    if((gccorestatus[i] != 0) && (gcrequiredmems[i] > 0)) {
-      GC_BAMBOO_DEBUGPRINT(0xd103);
-      compact2Heaptophelper_I(i, &p, &numblocks, &remain);
-      if(gccorestatus[gctopcore] != 0) {
-               GC_BAMBOO_DEBUGPRINT(0xd101);
-               GC_BAMBOO_DEBUGPRINT_REG(gctopcore);
-               BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-               // the top core is not free now
-               return;
-      }
-    }  // if((gccorestatus[i] != 0) && (gcrequiredmems[i] > 0))
-    BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-  }   // for(i = 0; i < NUMCORES4GC; i++)
-  GC_BAMBOO_DEBUGPRINT(0xd106);
-} // void compact2Heaptop()
-
-inline void resolvePendingMoveRequest() {
-  GC_BAMBOO_DEBUGPRINT(0xeb01);
-  GC_BAMBOO_DEBUGPRINT(0xeeee);
-  for(int k = 0; k < NUMCORES4GC; k++) {
-    GC_BAMBOO_DEBUGPRINT(0xf000+k);
-    GC_BAMBOO_DEBUGPRINT_REG(gccorestatus[k]);
-    GC_BAMBOO_DEBUGPRINT_REG(gcloads[k]);
-    GC_BAMBOO_DEBUGPRINT_REG(gcfilledblocks[k]);
-    GC_BAMBOO_DEBUGPRINT_REG(gcstopblock[k]);
-  }
-  GC_BAMBOO_DEBUGPRINT(0xffff);
-  int i;
-  int j;
-  bool nosparemem = true;
-  bool haspending = false;
-  bool hasrunning = false;
-  bool noblock = false;
-  unsigned int dstcore = 0;       // the core who need spare mem
-  unsigned int sourcecore = 0;       // the core who has spare mem
-  for(i = j = 0; (i < NUMCORES4GC) && (j < NUMCORES4GC); ) {
-    if(nosparemem) {
-      // check if there are cores with spare mem
-      if(gccorestatus[i] == 0) {
-               // finished working, check if it still have spare mem
-               if(gcfilledblocks[i] < gcstopblock[i]) {
-                 // still have spare mem
-                 nosparemem = false;
-                 sourcecore = i;
-               }  // if(gcfilledblocks[i] < gcstopblock[i]) else ...
-      }
-      i++;
-    }  // if(nosparemem)
-    if(!haspending) {
-      if(gccorestatus[j] != 0) {
-               // not finished, check if it has pending move requests
-               if((gcfilledblocks[j]==gcstopblock[j])&&(gcrequiredmems[j]>0)) {
-                 dstcore = j;
-                 haspending = true;
-               } else {
-                 hasrunning = true;
-               }  // if((gcfilledblocks[i] == gcstopblock[i])...) else ...
-      }  // if(gccorestatus[i] == 0) else ...
-      j++;
-    }  // if(!haspending)
-    if(!nosparemem && haspending) {
-      // find match
-      unsigned int tomove = 0;
-      unsigned int startaddr = 0;
-      BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
-      gcrequiredmems[dstcore] = assignSpareMem_I(sourcecore,
-                                                 gcrequiredmems[dstcore],
-                                                 &tomove,
-                                                 &startaddr);
-      BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-      GC_BAMBOO_DEBUGPRINT(0xeb02);
-      GC_BAMBOO_DEBUGPRINT_REG(sourcecore);
-      GC_BAMBOO_DEBUGPRINT_REG(dstcore);
-      GC_BAMBOO_DEBUGPRINT_REG(startaddr);
-      GC_BAMBOO_DEBUGPRINT_REG(tomove);
-      if(STARTUPCORE == dstcore) {
-               GC_BAMBOO_DEBUGPRINT(0xeb03);
-               gcdstcore = sourcecore;
-               gctomove = true;
-               gcmovestartaddr = startaddr;
-               gcblock2fill = tomove;
-      } else {
-               GC_BAMBOO_DEBUGPRINT(0xeb04);
-               send_msg_4(dstcore, GCMOVESTART, sourcecore,
-                                  startaddr, tomove, false);
-      }
-      gcmovepending--;
-      nosparemem = true;
-      haspending = false;
-      noblock = true;
-    }
-  }   // for(i = 0; i < NUMCORES4GC; i++)
-  GC_BAMBOO_DEBUGPRINT(0xcccc);
-  GC_BAMBOO_DEBUGPRINT_REG(hasrunning);
-  GC_BAMBOO_DEBUGPRINT_REG(haspending);
-  GC_BAMBOO_DEBUGPRINT_REG(noblock);
-
-  if(!hasrunning && !noblock) {
-    gcphase = SUBTLECOMPACTPHASE;
-    compact2Heaptop();
-  }
-
-} // void resovePendingMoveRequest()
-
-struct moveHelper {
-  unsigned int numblocks;       // block num for heap
-  unsigned int base;       // base virtual address of current heap block
-  unsigned int ptr;       // virtual address of current heap top
-  unsigned int offset;       // offset in current heap block
-  unsigned int blockbase;   // virtual address of current small block to check
-  unsigned int blockbound;     // bound virtual address of current small blcok
-  unsigned int sblockindex;       // index of the small blocks
-  unsigned int top;       // real size of current heap block to check
-  unsigned int bound;       // bound size of current heap block to check
-}; // struct moveHelper
-
-// If out of boundary of valid shared memory, return false, else return true
-inline bool nextSBlock(struct moveHelper * orig) {
-  orig->blockbase = orig->blockbound;
-
-  bool sbchanged = false;
-  unsigned int origptr = orig->ptr;
-  unsigned int blockbase = orig->blockbase;
-  unsigned int blockbound = orig->blockbound;
-  unsigned int bound = orig->bound;
-  GC_BAMBOO_DEBUGPRINT(0xecc0);
-  GC_BAMBOO_DEBUGPRINT_REG(blockbase);
-  GC_BAMBOO_DEBUGPRINT_REG(blockbound);
-  GC_BAMBOO_DEBUGPRINT_REG(bound);
-  GC_BAMBOO_DEBUGPRINT_REG(origptr);
-outernextSBlock:
-  // check if across a big block
-  // TODO now do not zero out the whole memory, maybe the last two conditions
-  // are useless now
-  if((blockbase>=bound)||(origptr>=bound)
-         ||((origptr!=NULL)&&(*((int*)origptr))==0)||((*((int*)blockbase))==0)) {
-innernextSBlock:
-    // end of current heap block, jump to next one
-    orig->numblocks++;
-    GC_BAMBOO_DEBUGPRINT(0xecc1);
-    GC_BAMBOO_DEBUGPRINT_REG(orig->numblocks);
-    BASEPTR(BAMBOO_NUM_OF_CORE, orig->numblocks, &(orig->base));
-    GC_BAMBOO_DEBUGPRINT(orig->base);
-    if(orig->base >= gcbaseva + BAMBOO_SHARED_MEM_SIZE) {
-      // out of boundary
-      orig->ptr = orig->base; // set current ptr to out of boundary too
-      return false;
-    }
-    orig->blockbase = orig->base;
-    orig->sblockindex = 
-         (unsigned int)(orig->blockbase-gcbaseva)/BAMBOO_SMEM_SIZE;
-    sbchanged = true;
-    unsigned int blocknum = 0;
-    BLOCKINDEX(orig->base, &blocknum);
-    if(bamboo_smemtbl[blocknum] == 0) {
-#ifdef GC_TBL_DEBUG
-         if(blocknum >= gcnumblock) {
-               BAMBOO_EXIT(0xb012);
-         }
-#endif
-      // goto next block
-      goto innernextSBlock;
-    }
-       // check the bamboo_smemtbl to decide the real bound
-       orig->bound = orig->base + bamboo_smemtbl[blocknum];
-  } else if(0 == (orig->blockbase%BAMBOO_SMEM_SIZE)) {
-    orig->sblockindex += 1;
-    sbchanged = true;
-  }  // if((orig->blockbase >= orig->bound) || (orig->ptr >= orig->bound)...
-
-  // check if this sblock should be skipped or have special start point
-  int sbstart = gcsbstarttbl[orig->sblockindex];
-#ifdef GC_TBL_DEBUG
-  if((orig->sblockindex) >= gcsbstarttbl_len) {
-       BAMBOO_EXIT(0xb013);
-  }
-#endif
-  if(sbstart == -1) {
-    // goto next sblock
-    GC_BAMBOO_DEBUGPRINT(0xecc2);
-    orig->sblockindex += 1;
-    orig->blockbase += BAMBOO_SMEM_SIZE;
-    goto outernextSBlock;
-  } else if((sbstart != 0) && (sbchanged)) {
-    // the first time to access this SBlock
-    GC_BAMBOO_DEBUGPRINT(0xecc3);
-    // not start from the very beginning
-    orig->blockbase = sbstart;
-  }  // if(gcsbstarttbl[orig->sblockindex] == -1) else ...
-
-  // setup information for this sblock
-  orig->blockbound = orig->blockbase+(unsigned int)*((int*)(orig->blockbase));
-  orig->offset = BAMBOO_CACHE_LINE_SIZE;
-  orig->ptr = orig->blockbase + orig->offset;
-  GC_BAMBOO_DEBUGPRINT(0xecc4);
-  GC_BAMBOO_DEBUGPRINT_REG(orig->base);
-  GC_BAMBOO_DEBUGPRINT_REG(orig->bound);
-  GC_BAMBOO_DEBUGPRINT_REG(orig->ptr);
-  GC_BAMBOO_DEBUGPRINT_REG(orig->blockbound);
-  GC_BAMBOO_DEBUGPRINT_REG(orig->blockbase);
-  GC_BAMBOO_DEBUGPRINT_REG(orig->offset);
-  if(orig->ptr >= orig->bound) {
-    // met a lobj, move to next block
-    goto innernextSBlock;
-  }
-
-  return true;
-} // bool nextSBlock(struct moveHelper * orig)
-
-// return false if there are no available data to compact
-inline bool initOrig_Dst(struct moveHelper * orig,
-                         struct moveHelper * to) {
-  // init the dst ptr
-  to->numblocks = 0;
-  to->top = to->offset = BAMBOO_CACHE_LINE_SIZE;
-  to->bound = BAMBOO_SMEM_SIZE_L;
-  BASEPTR(BAMBOO_NUM_OF_CORE, to->numblocks, &(to->base));
-
-  GC_BAMBOO_DEBUGPRINT(0xef01);
-  GC_BAMBOO_DEBUGPRINT_REG(to->base);
-  unsigned int tobase = to->base;
-  to->ptr = tobase + to->offset;
-#ifdef GC_CACHE_ADAPT
-  // initialize the gc_cache_revise_information
-  gc_cache_revise_infomation.to_page_start_va = to->ptr;
-  unsigned int toindex = (unsigned int)(tobase-gcbaseva)/(BAMBOO_PAGE_SIZE);
-  gc_cache_revise_infomation.to_page_end_va = (BAMBOO_PAGE_SIZE)*
-       (toindex+1);
-  gc_cache_revise_infomation.to_page_index = toindex;
-  gc_cache_revise_infomation.orig_page_start_va = -1;
-#endif // GC_CACHE_ADAPT
-
-  // init the orig ptr
-  orig->numblocks = 0;
-  orig->base = tobase;
-  unsigned int blocknum = 0;
-  BLOCKINDEX(orig->base, &blocknum);
-  unsigned int origbase = orig->base;
-  // check the bamboo_smemtbl to decide the real bound
-  orig->bound = origbase + (unsigned int)bamboo_smemtbl[blocknum];
-#ifdef GC_TBL_DEBUG
-  if((orig->sblockindex) >= gcsbstarttbl_len) {
-       BAMBOO_EXIT(0xb014);
-  }
-#endif
-  orig->blockbase = origbase;
-  orig->sblockindex = (unsigned int)(origbase - gcbaseva) / BAMBOO_SMEM_SIZE;
-  GC_BAMBOO_DEBUGPRINT(0xef02);
-  GC_BAMBOO_DEBUGPRINT_REG(origbase);
-  GC_BAMBOO_DEBUGPRINT_REG(orig->sblockindex);
-  GC_BAMBOO_DEBUGPRINT_REG(gcsbstarttbl);
-  GC_BAMBOO_DEBUGPRINT_REG(gcsbstarttbl[orig->sblockindex]);
-
-  int sbstart = gcsbstarttbl[orig->sblockindex];
-#ifdef GC_TBL_DEBUG
-  if((orig->sblockindex) >= gcsbstarttbl_len) {
-       BAMBOO_EXIT(0xb015);
-  }
-#endif
-  if(sbstart == -1) {
-    GC_BAMBOO_DEBUGPRINT(0xef03);
-    // goto next sblock
-    orig->blockbound =
-      gcbaseva+BAMBOO_SMEM_SIZE*(orig->sblockindex+1);
-    return nextSBlock(orig);
-  } else if(sbstart != 0) {
-    GC_BAMBOO_DEBUGPRINT(0xef04);
-    orig->blockbase = sbstart;
-  }
-  GC_BAMBOO_DEBUGPRINT(0xef05);
-  orig->blockbound = orig->blockbase + *((int*)(orig->blockbase));
-  orig->offset = BAMBOO_CACHE_LINE_SIZE;
-  orig->ptr = orig->blockbase + orig->offset;
-  GC_BAMBOO_DEBUGPRINT(0xef06);
-  GC_BAMBOO_DEBUGPRINT_REG(orig->base);
-
-  return true;
-} // bool initOrig_Dst(struct moveHelper * orig, struct moveHelper * to)
-
-inline void nextBlock(struct moveHelper * to) {
-  to->top = to->bound + BAMBOO_CACHE_LINE_SIZE; // header!
-  to->bound += BAMBOO_SMEM_SIZE;
-  to->numblocks++;
-  BASEPTR(BAMBOO_NUM_OF_CORE, to->numblocks, &(to->base));
-  to->offset = BAMBOO_CACHE_LINE_SIZE;
-  to->ptr = to->base + to->offset;
-} // void nextBlock(struct moveHelper * to)
-
-#ifdef GC_CACHE_ADAPT
-inline void samplingDataConvert(unsigned int current_ptr) {
-  unsigned int tmp_factor = 
-       current_ptr-gc_cache_revise_infomation.to_page_start_va;
-  unsigned int topage=gc_cache_revise_infomation.to_page_index;
-  unsigned int oldpage = gc_cache_revise_infomation.orig_page_index;
-  int * newtable=&gccachesamplingtbl_r[topage];
-  int * oldtable=&gccachesamplingtbl[oldpage];
-  
-  for(int tt = 0; tt < NUMCORESACTIVE; tt++) {
-    (*newtable) = ((*newtable)+(*oldtable)*tmp_factor);
-    newtable=(int*)(((char *)newtable)+size_cachesamplingtbl_local_r);
-    oldtable=(int*) (((char *)oldtable)+size_cachesamplingtbl_local);
-  }
-} // inline void samplingDataConvert(int)
-
-inline void completePageConvert(struct moveHelper * orig,
-                                   struct moveHelper * to,
-                                                               unsigned int current_ptr,
-                                                               bool closeToPage) {
-  unsigned int ptr = 0;
-  unsigned int tocompare = 0;
-  if(closeToPage) {
-       ptr = to->ptr;
-       tocompare = gc_cache_revise_infomation.to_page_end_va;
-  } else {
-        ptr = orig->ptr;
-        tocompare = gc_cache_revise_infomation.orig_page_end_va;
-  }
-  if((unsigned int)ptr >= (unsigned int)tocompare) {
-       // end of an orig/to page
-       // compute the impact of this page for the new page
-       samplingDataConvert(current_ptr);
-       // prepare for an new orig page
-       unsigned int tmp_index = 
-         (unsigned int)((unsigned int)orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
-       gc_cache_revise_infomation.orig_page_start_va = orig->ptr;
-       gc_cache_revise_infomation.orig_page_end_va = gcbaseva + 
-         (BAMBOO_PAGE_SIZE)*(unsigned int)(tmp_index+1);
-       gc_cache_revise_infomation.orig_page_index = tmp_index;
-       gc_cache_revise_infomation.to_page_start_va = to->ptr;
-       if(closeToPage) {
-         gc_cache_revise_infomation.to_page_end_va = gcbaseva+(BAMBOO_PAGE_SIZE)
-               *(((unsigned int)(to->ptr)-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
-         gc_cache_revise_infomation.to_page_index = 
-               ((unsigned int)(to->ptr)-gcbaseva)/(BAMBOO_PAGE_SIZE);
-       }
-  }
-} // inline void completePageConvert(...)
-#endif // GC_CACHE_ADAPT
-
-// endaddr does not contain spaces for headers
-inline bool moveobj(struct moveHelper * orig,
-                    struct moveHelper * to,
-                    unsigned int stopblock) {
-  if(stopblock == 0) {
-    return true;
-  }
-
-  GC_BAMBOO_DEBUGPRINT(0xe201);
-  GC_BAMBOO_DEBUGPRINT_REG(orig->ptr);
-  GC_BAMBOO_DEBUGPRINT_REG(to->ptr);
-#ifdef GC_TBL_DEBUG
-  unsigned int bkptr = (unsigned int)(orig->ptr);
-
-  if((unsigned int)(to->ptr) > (unsigned int)(orig->ptr)) {
-       tprintf("Error to->ptr > orig->ptr: %x, %x \n", (int)(to->ptr), (int)(orig->ptr));
-       BAMBOO_EXIT(0xb016);
-  }
-#endif
-
-  int type = 0;
-  unsigned int size = 0;
-  unsigned int isize = 0;
-innermoveobj:
-  /*while((*((char*)(orig->ptr))) == (char)(-2)) {
-       orig->ptr = (unsigned int)((void*)(orig->ptr) + 1);
-  }*/
-#ifdef GC_CACHE_ADAPT
-  completePageConvert(orig, to, to->ptr, false);
-#endif
-  unsigned int origptr = (unsigned int)(orig->ptr);
-  unsigned int origbound = (unsigned int)orig->bound;
-  unsigned int origblockbound = (unsigned int)orig->blockbound;
-  if((origptr >= origbound) || (origptr == origblockbound)) {
-    if(!nextSBlock(orig)) {
-      // finished, no more data
-#ifdef GC_TBL_DEBUG
-         tprintf("AAAA %x \n", (int)(orig->ptr));
-#endif
-      return true;
-    }
-    goto innermoveobj;
-  }
-  GC_BAMBOO_DEBUGPRINT(0xe202);
-  GC_BAMBOO_DEBUGPRINT_REG(origptr);
-  GC_BAMBOO_DEBUGPRINT(((int *)(origptr))[0]);
-  // check the obj's type, size and mark flag
-  type = ((int *)(origptr))[0];
-  size = 0;
-  if(type == 0) {
-       // end of this block, go to next one
-    if(!nextSBlock(orig)) {
-      // finished, no more data
-#ifdef GC_TBL_DEBUG
-         tprintf("BBBB %x \n", (int)(orig->ptr));
-#endif
-      return true;
-    }
-    goto innermoveobj;
-  } else if(type < NUMCLASSES) {
-    // a normal object
-    size = classsize[type];
-  } else {
-    // an array
-    struct ArrayObject *ao=(struct ArrayObject *)(origptr);
-    unsigned int elementsize=classsize[type];
-    unsigned int length=ao->___length___;
-    size=(unsigned int)sizeof(struct ArrayObject)
-         +(unsigned int)(length*elementsize);
-  }
-  GC_BAMBOO_DEBUGPRINT(0xe203);
-  GC_BAMBOO_DEBUGPRINT_REG(origptr);
-  GC_BAMBOO_DEBUGPRINT_REG(size);
-  ALIGNSIZE(size, &isize);       // no matter is the obj marked or not
-                                 // should be able to across
-#ifdef GC_TBL_DEBUG
-  int sindex = OBJMAPPINGINDEX((unsigned int)bkptr);
-  int eindex = OBJMAPPINGINDEX((unsigned int)(origptr));
-  for(int tmpi = sindex+1; tmpi < eindex; tmpi++) {
-       if((gcmappingtbl[tmpi] != 0) && 
-               (hostcore(gcbaseva+bamboo_baseobjsize*tmpi)==BAMBOO_NUM_OF_CORE) && 
-               (hostcore(gcbaseva+bamboo_baseobjsize*(tmpi+1))==BAMBOO_NUM_OF_CORE)) {
-         tprintf("Error moveobj --: %x, %x, %x, %d, %x \n", (int)bkptr, 
-                 (int)origptr, (int)(gcbaseva+bamboo_baseobjsize*tmpi), 
-                 (int)gcmappingtbl[tmpi], (int)(*((char*)(bkptr))));
-         BAMBOO_EXIT(0xb017);
-       }
-  }
-#endif
-  if(((int *)(origptr))[BAMBOOMARKBIT] == MARKED) {
-       unsigned int totop = (unsigned int)to->top;
-       unsigned int tobound = (unsigned int)to->bound;
-    GC_BAMBOO_DEBUGPRINT(0xe204);
-#ifdef GC_PROFILE
-#ifdef MGC_SPEC
-       if((STARTUPCORE != BAMBOO_NUM_OF_CORE) || gc_profile_flag) {
-#endif
-       gc_num_liveobj++;
-#ifdef MGC_SPEC
-       }
-#endif
-#endif
-    // marked obj, copy it to current heap top
-    // check to see if remaining space is enough
-    if((unsigned int)(totop + isize) > tobound) {
-      // fill 0 indicating the end of this block
-      BAMBOO_MEMSET_WH(to->ptr,  '\0', tobound - totop);
-      // fill the header of this block and then go to next block
-      to->offset += tobound - totop;
-      BAMBOO_MEMSET_WH(to->base, '\0', BAMBOO_CACHE_LINE_SIZE);
-      (*((int*)(to->base))) = to->offset;
-#ifdef GC_CACHE_ADAPT
-         unsigned int tmp_ptr = to->ptr;
-#endif // GC_CACHE_ADAPT
-      nextBlock(to);
-#ifdef GC_CACHE_ADAPT
-         completePageConvert(orig, to, tmp_ptr, true);
-#endif // GC_CACHE_ADAPT
-      if(stopblock == to->numblocks) {
-               // already fulfilled the block
-#ifdef GC_TBL_DEBUG
-               tprintf("CCCC %x \n", (int)(orig->ptr));
-#endif
-               return true;
-      }   // if(stopblock == to->numblocks)
-    }   // if(to->top + isize > to->bound)
-    // set the mark field to 2, indicating that this obj has been moved
-    // and need to be flushed
-    ((int *)(origptr))[BAMBOOMARKBIT] = COMPACTED;
-       unsigned int toptr = (unsigned int)to->ptr;
-#ifdef GC_TBL_DEBUG
-       {
-         // scan all pointers in ptr
-         unsigned int * tt_pointer;
-         tt_pointer=pointerarray[type];
-         if (tt_pointer==0) {
-               /* Array of primitives */
-               /* Do nothing */
-         } else if (((unsigned int)tt_pointer)==1) {
-               /* Array of pointers */
-               struct ArrayObject *ao=(struct ArrayObject *)(origptr);
-               int tt_length=ao->___length___;
-               int tt_j;
-               for(tt_j=0; tt_j<tt_length; tt_j++) {
-                 void *objptr =
-                       ((void **)(((char *)&ao->___length___)+sizeof(int)))[tt_j];
-                 if((objptr != 0) && 
-                         ((gcmappingtbl[OBJMAPPINGINDEX((unsigned int)objptr)] == 0) || 
-                          (gcmappingtbl[OBJMAPPINGINDEX((unsigned int)objptr)] == 1))) {
-                       tprintf("Error moveobj, missing live obj ++: %x, %x, %d, %d, %d, %d, %d, %d, %d, %d \n", 
-                               (int)origptr, (int)objptr, __LINE__, tt_j, 
-                               ((int *)(origptr))[0], ((int *)(objptr))[0], 
-                               ((int *)(objptr))[BAMBOOMARKBIT], 
-                               gcmappingtbl[OBJMAPPINGINDEX((unsigned int)objptr)], 
-                               hostcore(objptr), BAMBOO_NUM_OF_CORE);
-                       BAMBOO_EXIT(0xb018);
-                 }
-               }
-         } else {
-               unsigned int tt_size=tt_pointer[0];
-               int tt_i;
-               for(tt_i=1; tt_i<=tt_size; tt_i++) {
-                 unsigned int tt_offset=tt_pointer[tt_i];
-                 void * objptr=*((void **)(((char *)origptr)+tt_offset));
-                 if((objptr!= 0) && 
-                         ((gcmappingtbl[OBJMAPPINGINDEX((unsigned int)objptr)] == 0) || 
-                          (gcmappingtbl[OBJMAPPINGINDEX((unsigned int)objptr)] == 1))) {
-                       tprintf("Error moveobj, missing live obj ++: %x, %x, %d, %d, %d, %d, %d, %d, %d, %d \n", 
-                               (int)origptr, (int)objptr, __LINE__, tt_i,
-                               ((int *)(origptr))[0], ((int *)(objptr))[0],
-                               ((int *)(objptr))[BAMBOOMARKBIT], 
-                               gcmappingtbl[OBJMAPPINGINDEX((unsigned int)objptr)], 
-                               hostcore(objptr), BAMBOO_NUM_OF_CORE);
-                       BAMBOO_EXIT(0xb019);
-                 }
-               }
-         }     // if (pointer==0) else if ... else ...
-         {
-                 tt_pointer=pointerarray[OBJECTTYPE];
-                 //handle object class
-                 unsigned int tt_size=tt_pointer[0];
-                 int tt_i;
-                 for(tt_i=1; tt_i<=tt_size; tt_i++) {
-                       unsigned int tt_offset=tt_pointer[tt_i];
-                       void * objptr=*((void **)(((char *)origptr)+tt_offset));
-                       if((objptr!= 0) && 
-                         ((gcmappingtbl[OBJMAPPINGINDEX((unsigned int)objptr)] == 0) || 
-                          (gcmappingtbl[OBJMAPPINGINDEX((unsigned int)objptr)] == 1))) {
-                         tprintf("Error moveobj, missing live obj ++: %x, %x, %d, %d, %d, %d, %d, %d, %d, %d \n", 
-                                 (int)origptr, (int)objptr, __LINE__, tt_i,
-                                 ((int *)(origptr))[0], ((int *)(objptr))[0],
-                                 ((int *)(objptr))[BAMBOOMARKBIT], 
-                                 gcmappingtbl[OBJMAPPINGINDEX((unsigned int)objptr)],
-                                 hostcore(objptr), BAMBOO_NUM_OF_CORE);
-                         BAMBOO_EXIT(0xb01a);
-                       }
-                 }
-         }
-       }
-       if((unsigned int)(toptr) > (unsigned int)(origptr)) {
-         tprintf("Error to->ptr > orig->ptr: %x, %x \n", (int)(toptr), 
-                 (int)(origptr));
-         BAMBOO_EXIT(0xb01b);
-       }
-#endif
-    if(toptr != origptr) {
-      if((unsigned int)(origptr) < (unsigned int)(toptr+size)) {
-               memmove(toptr, origptr, size);
-      } else {
-               memcpy(toptr, origptr, size);
-      }
-      // fill the remaining space with -2
-      BAMBOO_MEMSET_WH((unsigned int)(toptr+size), -2, isize-size);
-    }
-#ifdef GC_TBL_DEBUG
-       if((gcmappingtbl[OBJMAPPINGINDEX((unsigned int)origptr)] != 2)) {
-         tprintf("Error moveobj: %x, %x, %d \n", (int)origptr, 
-                 ((int *)(origptr))[BAMBOOMARKBIT], 
-                 gcmappingtbl[OBJMAPPINGINDEX((unsigned int)origptr)]);
-         BAMBOO_EXIT(0xb01c);
-       }
-#endif
-    // store mapping info
-       gcmappingtbl[OBJMAPPINGINDEX((unsigned int)origptr)]=(unsigned int)toptr;
-#ifdef GC_TBL_DEBUG
-       if(gcmappingtbl[OBJMAPPINGINDEX((unsigned int)origptr)] == 
-               gcmappingtbl[OBJMAPPINGINDEX((unsigned int)origptr)-1]) {
-         tprintf("Error moveobj ++ : %x, %x, %d \n", (int)origptr, (int)toptr, 
-                 OBJMAPPINGINDEX((unsigned int)origptr));
-         BAMBOO_EXIT(0xb01d);
-       }
-       // scan all pointers in ptr
-       unsigned int * tt_pointer;
-       tt_pointer=pointerarray[type];
-       if (tt_pointer==0) {
-         /* Array of primitives */
-         /* Do nothing */
-       } else if (((unsigned int)tt_pointer)==1) {
-         /* Array of pointers */
-         struct ArrayObject *ao=(struct ArrayObject *)(toptr);
-         int tt_length=ao->___length___;
-         int tt_j;
-         for(tt_j=0; tt_j<tt_length; tt_j++) {
-               void *objptr =
-                 ((void **)(((char *)&ao->___length___)+sizeof(int)))[tt_j];
-               if((objptr != 0) && 
-                       (gcmappingtbl[OBJMAPPINGINDEX((unsigned int)objptr)] == 0)) {
-                 tprintf("Error moveobj, missing live obj ++: %x, %x, %d, %d, %d, %d, %d, %d, %d, %d \n", 
-                         (int)origptr, (int)objptr, __LINE__, tt_j, 
-                         ((int *)(origptr))[0], ((int *)(objptr))[0], 
-                         ((int *)(objptr))[BAMBOOMARKBIT], 
-                         gcmappingtbl[OBJMAPPINGINDEX((unsigned int)objptr)], 
-                         hostcore(objptr), BAMBOO_NUM_OF_CORE);
-                 BAMBOO_EXIT(0xb01e);
-               }
-         }
-       } else {
-         unsigned int tt_size=tt_pointer[0];
-         int tt_i;
-         for(tt_i=1; tt_i<=tt_size; tt_i++) {
-               unsigned int tt_offset=tt_pointer[tt_i];
-               void * objptr=*((void **)(((char *)toptr)+tt_offset));
-               if((objptr != 0) && 
-                       (gcmappingtbl[OBJMAPPINGINDEX((unsigned int)objptr)] == 0)) {
-                 tprintf("Error moveobj, missing live obj ++: %x, %x, %d, %d, %d, %d, %d, %d, %d, %d \n", 
-                         (int)origptr, (int)objptr, __LINE__, tt_i, 
-                         ((int *)(origptr))[0], ((int *)(objptr))[0], 
-                         ((int *)(objptr))[BAMBOOMARKBIT], 
-                         gcmappingtbl[OBJMAPPINGINDEX((unsigned int)objptr)], 
-                         hostcore(objptr), BAMBOO_NUM_OF_CORE);
-                 BAMBOO_EXIT(0xb01f);
-               }
-         }
-       }     // if (pointer==0) else if ... else ...
-       {
-                 tt_pointer=pointerarray[OBJECTTYPE];
-                 //handle object class
-                 unsigned int tt_size=tt_pointer[0];
-                 int tt_i;
-                 for(tt_i=1; tt_i<=tt_size; tt_i++) {
-                       unsigned int tt_offset=tt_pointer[tt_i];
-                       void * objptr=*((void **)(((char *)origptr)+tt_offset));
-                       if((objptr!= 0) && 
-                         ((gcmappingtbl[OBJMAPPINGINDEX((unsigned int)objptr)] == 0) || 
-                          (gcmappingtbl[OBJMAPPINGINDEX((unsigned int)objptr)] == 1))) {
-                         tprintf("Error moveobj, missing live obj ++: %x, %x, %d, %d, %d, %d, %d, %d, %d, %d \n", 
-                                 (int)origptr, (int)objptr, __LINE__, tt_i,
-                                 ((int *)(origptr))[0], ((int *)(objptr))[0],
-                                 ((int *)(objptr))[BAMBOOMARKBIT], 
-                                 gcmappingtbl[OBJMAPPINGINDEX((unsigned int)objptr)],
-                                 hostcore(objptr), BAMBOO_NUM_OF_CORE);
-                         BAMBOO_EXIT(0xb020);
-                       }
-                 }
-         }
-       if(!ISSHAREDOBJ(toptr)) {
-         tprintf("Error: %x, %x \n", (int)origptr, (int)toptr);
-         BAMBOO_EXIT(0xb021);
-       }
-#endif
-       GC_BAMBOO_DEBUGPRINT(0xcdce);
-    GC_BAMBOO_DEBUGPRINT_REG(origptr);
-    GC_BAMBOO_DEBUGPRINT_REG(toptr);
-    GC_BAMBOO_DEBUGPRINT_REG(isize);
-    gccurr_heaptop -= isize;
-    to->ptr += isize;
-    to->offset += isize;
-    to->top += isize;
-#ifdef GC_CACHE_ADAPT
-       unsigned int tmp_ptr = to->ptr;
-#endif // GC_CACHE_ADAPT
-    if(to->top == to->bound) {
-      // fill the header of this block and then go to next block
-      BAMBOO_MEMSET_WH(to->base, '\0', BAMBOO_CACHE_LINE_SIZE);
-      (*((int*)(to->base))) = to->offset;
-      nextBlock(to);
-    }
-#ifdef GC_CACHE_ADAPT
-       completePageConvert(orig, to, tmp_ptr, true);
-#endif // GC_CACHE_ADAPT
-  } // if(mark == 1)
-#ifdef GC_TBL_DEBUG
-  else {
-       // skip the whole obj
-       int sindex = OBJMAPPINGINDEX((unsigned int)origptr);
-       int eindex = OBJMAPPINGINDEX((unsigned int)(origptr+size));
-       for(int tmpi = sindex; tmpi < eindex; tmpi++) {
-         if((gcmappingtbl[tmpi] != 0) && 
-                 (hostcore(gcbaseva+bamboo_baseobjsize*tmpi)==BAMBOO_NUM_OF_CORE) && 
-                 (hostcore(gcbaseva+bamboo_baseobjsize*(tmpi+1))==BAMBOO_NUM_OF_CORE))
-         {
-               tprintf("Error moveobj **: %x, %x, %x, %d, (%d, %d, %x) \n", 
-                       (int)origptr, (int)(origptr+isize), 
-                       (int)(gcbaseva+bamboo_baseobjsize*tmpi), gcmappingtbl[tmpi], type,
-                       isize, ((int *)(origptr))[BAMBOOMARKBIT]);
-               BAMBOO_EXIT(0xb022);
-         }
-       }
-  }
-#endif
-  GC_BAMBOO_DEBUGPRINT(0xe205);
-  
-  // move to next obj
-  orig->ptr += isize; // size;
-
-#ifdef GC_TBL_DEBUG
-  if(!ISSHAREDOBJ(orig->ptr) || !ISSHAREDOBJ(to->ptr)) {
-       tprintf("Error moveobj out of boundary: %x, %x, %d, %d \n", 
-               (int)(orig->ptr), (int)(to->ptr), size, isize);
-       BAMBOO_EXIT(0x2022);
-  }
-#endif
-
-  GC_BAMBOO_DEBUGPRINT_REG(isize);
-  GC_BAMBOO_DEBUGPRINT_REG(size);
-  GC_BAMBOO_DEBUGPRINT_REG(orig->ptr);
-  GC_BAMBOO_DEBUGPRINT_REG(orig->bound);
-  if(((unsigned int)(orig->ptr) > (unsigned int)(orig->bound))
-         || ((unsigned int)(orig->ptr) == (unsigned int)(orig->blockbound))) {
-    GC_BAMBOO_DEBUGPRINT(0xe206);
-    if(!nextSBlock(orig)) {
-      // finished, no more data
-#ifdef GC_TBL_DEBUG
-         tprintf("DDDD %x \n", (int)(orig->ptr));
-#endif
-      return true;
-    }
-  }
-  GC_BAMBOO_DEBUGPRINT(0xe207);
-  GC_BAMBOO_DEBUGPRINT_REG(orig->ptr);
-  return false;
-} //bool moveobj(struct moveHelper* orig,struct moveHelper* to,int* endaddr)
-
-// should be invoked with interrupt closed
-inline int assignSpareMem_I(unsigned int sourcecore,
-                            unsigned int * requiredmem,
-                            unsigned int * tomove,
-                            unsigned int * startaddr) {
-  unsigned int b = 0;
-  BLOCKINDEX(gcloads[sourcecore], &b);
-  unsigned int boundptr = (b<NUMCORES4GC) ? ((b+1)*BAMBOO_SMEM_SIZE_L)
-                : (BAMBOO_LARGE_SMEM_BOUND+(b-NUMCORES4GC+1)*BAMBOO_SMEM_SIZE);
-  unsigned int remain = boundptr - gcloads[sourcecore];
-  unsigned int memneed = requiredmem + BAMBOO_CACHE_LINE_SIZE;
-  *startaddr = gcloads[sourcecore];
-  *tomove = gcfilledblocks[sourcecore] + 1;
-  if(memneed < remain) {
-    gcloads[sourcecore] += memneed;
-    return 0;
-  } else {
-    // next available block
-    gcfilledblocks[sourcecore] += 1;
-    unsigned int newbase = 0;
-    BASEPTR(sourcecore, gcfilledblocks[sourcecore], &newbase);
-    gcloads[sourcecore] = newbase;
-    return requiredmem-remain;
-  }
-} // int assignSpareMem_I(int ,int * , int * , int * )
-
-// should be invoked with interrupt closed
-inline bool gcfindSpareMem_I(unsigned int * startaddr,
-                             unsigned int * tomove,
-                             unsigned int * dstcore,
-                             unsigned int requiredmem,
-                             unsigned int requiredcore) {
-  for(int k = 0; k < NUMCORES4GC; k++) {
-    if((gccorestatus[k] == 0) && (gcfilledblocks[k] < gcstopblock[k])) {
-      // check if this stopped core has enough mem
-      assignSpareMem_I(k, requiredmem, tomove, startaddr);
-      *dstcore = k;
-      return true;
-    }
-  }
-  // if can not find spare mem right now, hold the request
-  gcrequiredmems[requiredcore] = requiredmem;
-  gcmovepending++;
-  return false;
-} //bool gcfindSpareMem_I(int* startaddr,int* tomove,int mem,int core)
-
-inline bool compacthelper(struct moveHelper * orig,
-                          struct moveHelper * to,
-                          int * filledblocks,
-                          unsigned int * heaptopptr,
-                          bool * localcompact) {
-  // scan over all objs in this block, compact the marked objs
-  // loop stop when finishing either scanning all active objs or
-  // fulfilled the gcstopblock
-  GC_BAMBOO_DEBUGPRINT(0xe101);
-  GC_BAMBOO_DEBUGPRINT_REG(gcblock2fill);
-  GC_BAMBOO_DEBUGPRINT_REG(gcmarkedptrbound);
-innercompact:
-  while((unsigned int)(orig->ptr) < (unsigned int)gcmarkedptrbound) {
-    bool stop = moveobj(orig, to, gcblock2fill);
-    if(stop) {
-      break;
-    }
-  }
-#ifdef GC_TBL_DEBUG
-  //tprintf("finish mark %x \n", (int)gcmarkedptrbound);
-#endif
-#ifdef GC_CACHE_ADAPT
-  // end of an to page, wrap up its information
-  samplingDataConvert(to->ptr);
-#endif // GC_CACHE_ADAPT
-  // if no objs have been compact, do nothing,
-  // otherwise, fill the header of this block
-  if(to->offset > (unsigned int)BAMBOO_CACHE_LINE_SIZE) {
-    BAMBOO_MEMSET_WH(to->base, '\0', BAMBOO_CACHE_LINE_SIZE);
-    (*((int*)(to->base))) = to->offset;
-  } else {
-    to->offset = 0;
-    to->ptr = to->base;
-    to->top -= BAMBOO_CACHE_LINE_SIZE;
-  }  // if(to->offset > BAMBOO_CACHE_LINE_SIZE) else ...
-  if(*localcompact) {
-    *heaptopptr = to->ptr;
-    *filledblocks = to->numblocks;
-  }
-  GC_BAMBOO_DEBUGPRINT(0xe102);
-  GC_BAMBOO_DEBUGPRINT_REG(orig->ptr);
-  GC_BAMBOO_DEBUGPRINT_REG(gcmarkedptrbound);
-  GC_BAMBOO_DEBUGPRINT_REG(*heaptopptr);
-  GC_BAMBOO_DEBUGPRINT_REG(*filledblocks);
-  GC_BAMBOO_DEBUGPRINT_REG(gccurr_heaptop);
-
-  // send msgs to core coordinator indicating that the compact is finishing
-  // send compact finish message to core coordinator
-  if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
-    gcfilledblocks[BAMBOO_NUM_OF_CORE] = *filledblocks;
-    gcloads[BAMBOO_NUM_OF_CORE] = *heaptopptr;
-    if((unsigned int)(orig->ptr) < (unsigned int)gcmarkedptrbound) {
-      GC_BAMBOO_DEBUGPRINT(0xe103);
-      // ask for more mem
-      gctomove = false;
-      BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
-      if(gcfindSpareMem_I(&gcmovestartaddr, &gcblock2fill, &gcdstcore,
-                          gccurr_heaptop, BAMBOO_NUM_OF_CORE)) {
-               GC_BAMBOO_DEBUGPRINT(0xe104);
-               gctomove = true;
-      } else {
-               BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-               GC_BAMBOO_DEBUGPRINT(0xe105);
-               return false;
-      }
-      BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-    } else {
-      GC_BAMBOO_DEBUGPRINT(0xe106);
-      gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
-      gctomove = false;
-      return true;
-    }
-  } else {
-    if((unsigned int)(orig->ptr) < (unsigned int)gcmarkedptrbound) {
-      GC_BAMBOO_DEBUGPRINT(0xe107);
-      // ask for more mem
-      gctomove = false;
-      send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE,
-                 *filledblocks, *heaptopptr, gccurr_heaptop, false);
-    } else {
-      GC_BAMBOO_DEBUGPRINT(0xe108);
-      GC_BAMBOO_DEBUGPRINT_REG(*heaptopptr);
-      // finish compacting
-      send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE,
-                 *filledblocks, *heaptopptr, 0, false);
-    }
-  }       // if(STARTUPCORE == BAMBOO_NUM_OF_CORE)
-
-  if(orig->ptr < gcmarkedptrbound) {
-    GC_BAMBOO_DEBUGPRINT(0xe109);
-    // still have unpacked obj
-    while(true) {
-      if(gctomove) {
-               break;
-      }
-    }
-    ;
-       gctomove = false;
-    GC_BAMBOO_DEBUGPRINT(0xe10a);
-
-    to->ptr = gcmovestartaddr;
-    to->numblocks = gcblock2fill - 1;
-    to->bound = (to->numblocks==0) ?
-                BAMBOO_SMEM_SIZE_L :
-                BAMBOO_SMEM_SIZE_L+BAMBOO_SMEM_SIZE*to->numblocks;
-    BASEPTR(gcdstcore, to->numblocks, &(to->base));
-    to->offset = to->ptr - to->base;
-    to->top = (to->numblocks==0) ?
-              (to->offset) : (to->bound-BAMBOO_SMEM_SIZE+to->offset);
-    to->base = to->ptr;
-    to->offset = BAMBOO_CACHE_LINE_SIZE;
-    to->ptr += to->offset;   // for header
-    to->top += to->offset;
-    if(gcdstcore == BAMBOO_NUM_OF_CORE) {
-      *localcompact = true;
-    } else {
-      *localcompact = false;
-    }
-#ifdef GC_CACHE_ADAPT
-       // initialize the gc_cache_revise_information
-       gc_cache_revise_infomation.to_page_start_va = (unsigned int)to->ptr;
-       gc_cache_revise_infomation.to_page_end_va = gcbaseva+(BAMBOO_PAGE_SIZE)
-         *(((unsigned int)(to->base)-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
-       gc_cache_revise_infomation.to_page_index = 
-         ((unsigned int)(to->base)-gcbaseva)/(BAMBOO_PAGE_SIZE);
-       gc_cache_revise_infomation.orig_page_start_va = orig->ptr;
-       gc_cache_revise_infomation.orig_page_end_va = gcbaseva+(BAMBOO_PAGE_SIZE)
-         *(((unsigned int)(orig->ptr)-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
-       gc_cache_revise_infomation.orig_page_index = 
-         ((unsigned int)(orig->blockbase)-gcbaseva)/(BAMBOO_PAGE_SIZE);
-#endif // GC_CACHE_ADAPT
-    goto innercompact;
-  }
-  GC_BAMBOO_DEBUGPRINT(0xe10b);
-  return true;
-} // void compacthelper()
-
-inline void compact() {
-  if(COMPACTPHASE != gcphase) {
-    BAMBOO_EXIT(0xb023);
-  }
-
-  // initialize pointers for comapcting
-  struct moveHelper * orig =
-    (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
-  struct moveHelper * to =
-    (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
-  if(!initOrig_Dst(orig, to)) {
-    // no available data to compact
-    // send compact finish msg to STARTUP core
-    GC_BAMBOO_DEBUGPRINT(0xe001);
-    GC_BAMBOO_DEBUGPRINT_REG(to->base);
-    send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE,
-               0, to->base, 0, false);
-    RUNFREE(orig);
-    RUNFREE(to);
-    return;
-  }
-#ifdef GC_CACHE_ADAPT
-  gc_cache_revise_infomation.orig_page_start_va = (unsigned int)orig->ptr;
-  gc_cache_revise_infomation.orig_page_end_va = gcbaseva+(BAMBOO_PAGE_SIZE)
-       *(((unsigned int)(orig->ptr)-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
-  gc_cache_revise_infomation.orig_page_index = 
-       ((unsigned int)(orig->blockbase)-gcbaseva)/(BAMBOO_PAGE_SIZE);
-#endif // GC_CACHE_ADAPT
-
-  unsigned int filledblocks = 0;
-  unsigned int heaptopptr = 0;
-  bool localcompact = true;
-  compacthelper(orig, to, &filledblocks, &heaptopptr, &localcompact);
-  RUNFREE(orig);
-  RUNFREE(to);
-} // compact()
-
-// if return NULL, means
-//   1. objptr is NULL
-//   2. objptr is not a shared obj
-// in these cases, remain the original value is OK
-#ifdef GC_TBL_DEBUG
-inline void * flushObj(void * objptr, int linenum, void * ptr, int tt) {
-#else
-inline void * flushObj(void * objptr) {
-#endif
-  GC_BAMBOO_DEBUGPRINT(0xe401);
-  if(objptr == NULL) {
-    return NULL;
-  }
-  void * dstptr = NULL;
-  if(ISSHAREDOBJ(objptr)) {
-    GC_BAMBOO_DEBUGPRINT(0xe402);
-    GC_BAMBOO_DEBUGPRINT_REG(objptr);
-    // a shared obj ptr, change to new address
-       dstptr = gcmappingtbl[OBJMAPPINGINDEX((unsigned int)objptr)];
-    GC_BAMBOO_DEBUGPRINT_REG(dstptr);
-#ifdef GC_TBL_DEBUG
-       if(ISSHAREDOBJ(dstptr) && ((unsigned int)(((int*)dstptr)[0]) >= (unsigned int)NUMTYPES)) {
-         tprintf("Error flushObj  ** : %x, %x, %d, %d, %d, %d, %x, %x, %x, %d, %x, %d %d \n", 
-                 (int)objptr, (int)dstptr, ((int*)dstptr)[0], hostcore(objptr), 
-                 hostcore(objptr)==BAMBOO_NUM_OF_CORE, 
-                 OBJMAPPINGINDEX((unsigned int)objptr), (int)gcmappingtbl, 
-                 &(gcmappingtbl[OBJMAPPINGINDEX((unsigned int)objptr)]), 
-                 (int)gcbaseva, linenum, (int)ptr, ((int*)ptr)[0], tt);
-         BAMBOO_EXIT(0xb024);
-       }
-#endif
-
-    if(!ISSHAREDOBJ(dstptr)) {
-#ifdef GC_TBL_DEBUG
-         tprintf("Error flushObj  ++ : %x, %x, %d, %d, %d, %x, %x, %x, %d, %x, %d %d \n", 
-                 (int)objptr, (int)dstptr, hostcore(objptr), 
-                 hostcore(objptr)==BAMBOO_NUM_OF_CORE, 
-                 OBJMAPPINGINDEX((unsigned int)objptr), (int)gcmappingtbl, 
-                 &(gcmappingtbl[OBJMAPPINGINDEX((unsigned int)objptr)]), 
-                 (int)gcbaseva, linenum, (int)ptr, ((int*)ptr)[0], tt);
-         tprintf("gcmappingtbl: \n");
-         int tmp = OBJMAPPINGINDEX((unsigned int)objptr) - 50;
-         for(int jj = 0; jj < 100; jj+=10) {
-               tprintf("%8x, %8x, %8x, %8x, %8x, %8x, %8x, %8x, %8x, %8x, %d \n", 
-                       (int)gcmappingtbl[tmp++], (int)gcmappingtbl[tmp++], 
-                       (int)gcmappingtbl[tmp++], (int)gcmappingtbl[tmp++], 
-                       (int)gcmappingtbl[tmp++], (int)gcmappingtbl[tmp++], 
-                       (int)gcmappingtbl[tmp++], (int)gcmappingtbl[tmp++], 
-                       (int)gcmappingtbl[tmp++], (int)gcmappingtbl[tmp++], tmp);
-         }
-         BAMBOO_EXIT(0xb025);
-#else
-      // no mapping info
-      GC_BAMBOO_DEBUGPRINT(0xe403);
-      GC_BAMBOO_DEBUGPRINT_REG(objptr);
-      GC_BAMBOO_DEBUGPRINT_REG(hostcore(objptr));
-         // error! the obj is right on this core, but cannot find it
-         GC_BAMBOO_DEBUGPRINT_REG(objptr);
-         tprintf("Error flushObj  ++ : %x, %x, %d, %d, %x, %x, %x, %x\n", 
-                 (int)objptr, (int)dstptr, hostcore(objptr), 
-                 hostcore(objptr)==BAMBOO_NUM_OF_CORE, 
-                 OBJMAPPINGINDEX((unsigned int)objptr), (int)gcmappingtbl, 
-                 &(gcmappingtbl[OBJMAPPINGINDEX((unsigned int)objptr)]), 
-                 (int)gcbaseva);
-         BAMBOO_EXIT(0xb026);
-#endif
-    }  // if(NULL == dstptr)
-  }   // if(ISSHAREDOBJ(objptr))
-#ifdef GC_TBL_DEBUG
-  else {
-       tprintf("Error flushObj: %x \n", (int)objptr);
-       BAMBOO_EXIT(0xb027);
-  }
-#endif
-  // if not a shared obj, return NULL to indicate no need to flush
-  GC_BAMBOO_DEBUGPRINT(0xe404);
-  return dstptr;
-} // void flushObj(void * objptr)
-
-inline void flushRuntimeObj(struct garbagelist * stackptr) {
-  int i,j;
-  // flush current stack
-  while(stackptr!=NULL) {
-    for(i=0; i<stackptr->size; i++) {
-      if(stackptr->array[i] != NULL) {
-#ifdef GC_TBL_DEBUG
-               void * dst = flushObj(stackptr->array[i], 
-                       __LINE__, stackptr->array[i], i);
-#else
-               void * dst = flushObj(stackptr->array[i]);
-#endif
-               if(dst != NULL) {
-                 stackptr->array[i] = dst;
-               }
-      }
-    }
-    stackptr=stackptr->next;
-  }
-
-  // flush static pointers global_defs_p
-  if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
-       struct garbagelist * staticptr=(struct garbagelist *)global_defs_p;
-       for(i=0; i<staticptr->size; i++) {
-         if(staticptr->array[i] != NULL) {
-#ifdef GC_TBL_DEBUG
-               void * dst = flushObj(staticptr->array[i], 
-                       __LINE__, staticptr->array[i], i);
-#else
-               void * dst = flushObj(staticptr->array[i]);
-#endif
-               if(dst != NULL) {
-                 staticptr->array[i] = dst;
-               }
-         }
-       }
-  }
-
-#ifdef TASK
-  // flush objectsets
-  if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
-    for(i=0; i<NUMCLASSES; i++) {
-      struct parameterwrapper ** queues =
-        objectqueues[BAMBOO_NUM_OF_CORE][i];
-      int length = numqueues[BAMBOO_NUM_OF_CORE][i];
-      for(j = 0; j < length; ++j) {
-               struct parameterwrapper * parameter = queues[j];
-               struct ObjectHash * set=parameter->objectset;
-               struct ObjectNode * ptr=set->listhead;
-               while(ptr!=NULL) {
-#ifdef GC_TBL_DEBUG
-                 void * dst = flushObj((void *)ptr->key, 
-                         __LINE__, (void *)ptr->key, 0);
-#else
-                 void * dst = flushObj((void *)ptr->key);
-#endif
-                 if(dst != NULL) {
-                       ptr->key = dst;
-                 }
-                 ptr=ptr->lnext;
-               }
-               ObjectHashrehash(set);
-      }
-    }
-  }
-
-  // flush current task descriptor
-  if(currtpd != NULL) {
-    for(i=0; i<currtpd->numParameters; i++) {
-#ifdef GC_TBL_DEBUG
-         void * dst = flushObj(currtpd->parameterArray[i], 
-                 __LINE__, currtpd->parameterArray[i], i);
-#else
-      void * dst = flushObj(currtpd->parameterArray[i]);
-#endif
-      if(dst != NULL) {
-               currtpd->parameterArray[i] = dst;
-      }
-    }
-  }
-
-  // flush active tasks
-  if(activetasks != NULL) {
-    struct genpointerlist * ptr=activetasks->list;
-    while(ptr!=NULL) {
-      struct taskparamdescriptor *tpd=ptr->src;
-      int i;
-      for(i=0; i<tpd->numParameters; i++) {
-#ifdef GC_TBL_DEBUG
-               void * dst = flushObj(tpd->parameterArray[i], 
-                       __LINE__, tpd->parameterArray[i], i);
-#else
-               void * dst = flushObj(tpd->parameterArray[i]);
-#endif
-               if(dst != NULL) {
-                 tpd->parameterArray[i] = dst;
-               }
-      }
-      ptr=ptr->inext;
-    }
-    genrehash(activetasks);
-  }
-
-  // flush cached transferred obj
-  struct QueueItem * tmpobjptr =  getHead(&objqueue);
-  while(tmpobjptr != NULL) {
-    struct transObjInfo * objInfo =
-      (struct transObjInfo *)(tmpobjptr->objectptr);
-#ifdef GC_TBL_DEBUG
-       void * dst = flushObj(objInfo->objptr, __LINE__, 
-               objInfo->objptr, 0);
-#else
-    void * dst = flushObj(objInfo->objptr);
-#endif
-    if(dst != NULL) {
-      objInfo->objptr = dst;
-    }
-    tmpobjptr = getNextQueueItem(tmpobjptr);
-  }
-
-  // flush cached objs to be transferred
-  struct QueueItem * item = getHead(totransobjqueue);
-  while(item != NULL) {
-    struct transObjInfo * totransobj =
-      (struct transObjInfo *)(item->objectptr);
-#ifdef GC_TBL_DEBUG
-       void * dst = flushObj(totransobj->objptr, __LINE__, 
-               totransobj->objptr, 0);
-#else
-    void * dst = flushObj(totransobj->objptr);
-#endif
-    if(dst != NULL) {
-      totransobj->objptr = dst;
-    }
-    item = getNextQueueItem(item);
-  }  // while(item != NULL)
-
-  // enqueue lock related info
-  for(i = 0; i < runtime_locklen; ++i) {
-#ifdef GC_TBL_DEBUG
-       void * dst = flushObj(runtime_locks[i].redirectlock, 
-               __LINE__, runtime_locks[i].redirectlock, i);
-#else
-    void * dst = flushObj(runtime_locks[i].redirectlock);
-#endif
-    if(dst != NULL) {
-      runtime_locks[i].redirectlock = (int)dst;
-    }
-    if(runtime_locks[i].value != NULL) {
-#ifdef GC_TBL_DEBUG
-         void * dst=flushObj(runtime_locks[i].value, 
-                 __LINE__, runtime_locks[i].value, i);
-#else
-      void * dst=flushObj(runtime_locks[i].value);
-#endif
-      if(dst != NULL) {
-               runtime_locks[i].value = (int)dst;
-      }
-    }
-  }
-#endif
-
-#ifdef MGC
-  // flush the bamboo_threadlocks
-  for(i = 0; i < bamboo_threadlocks.index; i++) {
-#ifdef GC_TBL_DEBUG
-       void * dst = flushObj((void *)(bamboo_threadlocks.locks[i].object),
-                       __LINE__, (void *)(bamboo_threadlocks.locks[i].object), i);
-#else
-       void * dst = flushObj((void *)(bamboo_threadlocks.locks[i].object));
-#endif
-       if(dst != NULL) {
-         bamboo_threadlocks.locks[i].object = (struct ___Object___ *)dst;
-       }
-  }
-
-  // flush the bamboo_current_thread
-  if(bamboo_current_thread != 0) {
-#ifdef GC_TBL_DEBUG
-       bamboo_current_thread = 
-         (unsigned int)(flushObj((void *)bamboo_current_thread,
-                       __LINE__, (void *)bamboo_current_thread, 0));
-#else
-       bamboo_current_thread = 
-         (unsigned int)(flushObj((void *)bamboo_current_thread));
-#endif
-  }
-
-  // flush global thread queue
-  if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
-       unsigned int thread_counter = *((unsigned int*)(bamboo_thread_queue+1));
-       if(thread_counter > 0) {
-         unsigned int start = *((unsigned int*)(bamboo_thread_queue+2));
-         for(i = thread_counter; i > 0; i--) {
-#ifdef GC_TBL_DEBUG
-               bamboo_thread_queue[4+start] = 
-                 (INTPTR)(flushObj((void *)bamboo_thread_queue[4+start
-                               ], __LINE__, (void *)bamboo_thread_queue, 0));
-#else
-               bamboo_thread_queue[4+start] = 
-                 (INTPTR)(flushObj((void *)bamboo_thread_queue[4+start]));
-#endif
-               start = (start+1)&bamboo_max_thread_num_mask;
-         }
-       }
-       unlockthreadqueue();
-  }
-#endif
-} // void flushRuntimeObj(struct garbagelist * stackptr)
-
-inline void flush(struct garbagelist * stackptr) {
-
-  flushRuntimeObj(stackptr);
-
-  while(true) {
-    BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
-    bool hasItems = gc_moreItems_I();
-    BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-    if(!hasItems) {
-      break;
-    }
-
-    GC_BAMBOO_DEBUGPRINT(0xe301);
-    BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
-    void * ptr = gc_dequeue_I();
-#ifdef GC_TBL_DEBUG
-    unsigned int bkptr = (unsigned int)ptr;
-#endif
-    BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-    if(ISSHAREDOBJ(ptr)) {
-      // should be a local shared obj and should have mapping info
-#ifdef GC_TBL_DEBUG
-         ptr = flushObj(ptr, __LINE__, ptr, 0);
-#else
-      ptr = flushObj(ptr);
-#endif
-      GC_BAMBOO_DEBUGPRINT(0xe302);
-      GC_BAMBOO_DEBUGPRINT_REG(ptr);
-      if(ptr == NULL) {
-               BAMBOO_EXIT(0xb028);
-      }
-    } // if(ISSHAREDOBJ(ptr))
-    if((!ISSHAREDOBJ(ptr))||(((int *)(ptr))[BAMBOOMARKBIT] == COMPACTED)) {
-      int type = ((int *)(ptr))[0];
-#ifdef GC_TBL_DEBUG
-         if((unsigned int)type >= (unsigned int)NUMTYPES) {
-               tprintf("Error flushObj  %x, %x, %d, %d \n", bkptr, (int)ptr, type, 
-                       ((int *)(ptr))[BAMBOOMARKBIT]);
-               BAMBOO_EXIT(0xb029);
-         }
-#endif
-      // scan all pointers in ptr
-      unsigned int * pointer;
-      pointer=pointerarray[type];
-      GC_BAMBOO_DEBUGPRINT(0xe303);
-      GC_BAMBOO_DEBUGPRINT_REG(pointer);
-      if (pointer==0) {
-               /* Array of primitives */
-               /* Do nothing */
-      } else if (((unsigned int)pointer)==1) {
-               GC_BAMBOO_DEBUGPRINT(0xe304);
-               /* Array of pointers */
-               struct ArrayObject *ao=(struct ArrayObject *) ptr;
-               int length=ao->___length___;
-               int j;
-               for(j=0; j<length; j++) {
-                 GC_BAMBOO_DEBUGPRINT(0xe305);
-                 void *objptr=
-                       ((void **)(((char *)&ao->___length___)+sizeof(int)))[j];
-                 GC_BAMBOO_DEBUGPRINT_REG(objptr);
-                 if(objptr != NULL) {
-#ifdef GC_TBL_DEBUG
-                       void * dst = flushObj(objptr, __LINE__, ptr, j);
-#else
-                       void * dst = flushObj(objptr);
-#endif
-                       if(dst != NULL) {
-                         ((void **)(((char *)&ao->___length___)+sizeof(int)))[j] = dst;
-                       }
-                 }
-               }
-      } else {
-               GC_BAMBOO_DEBUGPRINT(0xe306);
-               unsigned int size=pointer[0];
-               int i;
-               for(i=1; i<=size; i++) {
-                 GC_BAMBOO_DEBUGPRINT(0xe307);
-                 unsigned int offset=pointer[i];
-                 void * objptr=*((void **)(((char *)ptr)+offset));
-                 GC_BAMBOO_DEBUGPRINT_REG(objptr);
-                 if(objptr != NULL) {
-#ifdef GC_TBL_DEBUG
-                       void * dst = flushObj(objptr, __LINE__, ptr, i);
-#else
-                       void * dst = flushObj(objptr);
-#endif
-                       if(dst != NULL) {
-                         *((void **)(((char *)ptr)+offset)) = dst;
-                       }
-                 }
-               } // for(i=1; i<=size; i++)
-      }  // if (pointer==0) else if (((INTPTR)pointer)==1) else ()
-         {
-               pointer=pointerarray[OBJECTTYPE];
-               //handle object class
-               unsigned int size=pointer[0];
-               int i;
-               for(i=1; i<=size; i++) {
-                 unsigned int offset=pointer[i];
-                 void * objptr=*((void **)(((char *)ptr)+offset));
-                 if(objptr != NULL) {
-#ifdef GC_TBL_DEBUG
-                       void * dst = flushObj(objptr, __LINE__, ptr, i);
-#else
-                       void * dst = flushObj(objptr);
-#endif
-                       if(dst != NULL) {
-                         *((void **)(((char *)ptr)+offset)) = dst;
-                       }
-                 }
-               }
-         }
-      // restore the mark field, indicating that this obj has been flushed
-      if(ISSHAREDOBJ(ptr)) {
-               ((int *)(ptr))[BAMBOOMARKBIT] = INIT;
-      }
-    }  //if((!ISSHAREDOBJ(ptr))||(((int *)(ptr))[BAMBOOMARKBIT] == COMPACTED))
-  }   // while(gc_moreItems())
-  GC_BAMBOO_DEBUGPRINT(0xe308);
-
-  // TODO bug here: the startup core contains all lobjs' info, thus all the
-  // lobjs are flushed in sequence.
-  // flush lobjs
-  while(gc_lobjmoreItems_I()) {
-    GC_BAMBOO_DEBUGPRINT(0xe309);
-    void * ptr = gc_lobjdequeue_I(NULL, NULL);
-#ifdef GC_TBL_DEBUG
-       ptr = flushObj(ptr, __LINE__, ptr, 0);
-#else
-    ptr = flushObj(ptr);
-#endif
-    GC_BAMBOO_DEBUGPRINT(0xe30a);
-    GC_BAMBOO_DEBUGPRINT_REG(ptr);
-    GC_BAMBOO_DEBUGPRINT_REG(((int *)(ptr))[0]);
-    if(ptr == NULL) {
-      BAMBOO_EXIT(0xb02a);
-    }
-    if(((int *)(ptr))[BAMBOOMARKBIT] == COMPACTED) {
-      int type = ((int *)(ptr))[0];
-      // scan all pointers in ptr
-      unsigned int * pointer;
-      pointer=pointerarray[type];
-      GC_BAMBOO_DEBUGPRINT(0xe30b);
-      GC_BAMBOO_DEBUGPRINT_REG(pointer);
-      if (pointer==0) {
-               /* Array of primitives */
-               /* Do nothing */
-      } else if (((unsigned int)pointer)==1) {
-               GC_BAMBOO_DEBUGPRINT(0xe30c);
-               /* Array of pointers */
-               struct ArrayObject *ao=(struct ArrayObject *) ptr;
-               int length=ao->___length___;
-               int j;
-               for(j=0; j<length; j++) {
-                 GC_BAMBOO_DEBUGPRINT(0xe30d);
-                 void *objptr=
-                       ((void **)(((char *)&ao->___length___)+sizeof(int)))[j];
-                 GC_BAMBOO_DEBUGPRINT_REG(objptr);
-                 if(objptr != NULL) {
-#ifdef GC_TBL_DEBUG
-                       void * dst = flushObj(objptr, __LINE__, ptr, j);
-#else
-                       void * dst = flushObj(objptr);
-#endif
-                       if(dst != NULL) {
-                         ((void **)(((char *)&ao->___length___)+sizeof(int)))[j] = dst;
-                       }
-                 }
-               }
-      } else {
-               GC_BAMBOO_DEBUGPRINT(0xe30e);
-               unsigned int size=pointer[0];
-               int i;
-               for(i=1; i<=size; i++) {
-                 GC_BAMBOO_DEBUGPRINT(0xe30f);
-                 unsigned int offset=pointer[i];
-                 void * objptr=*((void **)(((char *)ptr)+offset));
-
-                 GC_BAMBOO_DEBUGPRINT_REG(objptr);
-                 if(objptr != NULL) {
-#ifdef GC_TBL_DEBUG
-                       void * dst = flushObj(objptr, __LINE__, ptr, i);
-#else
-                       void * dst = flushObj(objptr);
-#endif
-                       if(dst != NULL) {
-                         *((void **)(((char *)ptr)+offset)) = dst;
-                       }
-                 }
-               }  // for(i=1; i<=size; i++)
-      }  // if (pointer==0) else if (((INTPTR)pointer)==1) else ()
-         {
-               pointer=pointerarray[OBJECTTYPE];
-               //handle object class
-               unsigned int size=pointer[0];
-               int i;
-               for(i=1; i<=size; i++) {
-                 unsigned int offset=pointer[i];
-                 void * objptr=*((void **)(((char *)ptr)+offset));
-                 if(objptr != NULL) {
-#ifdef GC_TBL_DEBUG
-                       void * dst = flushObj(objptr, __LINE__, ptr, i);
-#else
-                       void * dst = flushObj(objptr);
-#endif
-                       if(dst != NULL) {
-                         *((void **)(((char *)ptr)+offset)) = dst;
-                       }
-                 }
-               }
-         }
-      // restore the mark field, indicating that this obj has been flushed
-      ((int *)(ptr))[BAMBOOMARKBIT] = INIT;
-    }     // if(((int *)(ptr))[BAMBOOMARKBIT] == COMPACTED)
-  }     // while(gc_lobjmoreItems())
-  GC_BAMBOO_DEBUGPRINT(0xe310);
-
-  // send flush finish message to core coordinator
-  if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
-    gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
-  } else {
-    send_msg_2(STARTUPCORE, GCFINISHFLUSH, BAMBOO_NUM_OF_CORE, false);
-  }
-  GC_BAMBOO_DEBUGPRINT(0xe311);
-} // flush()
-
-#ifdef GC_CACHE_ADAPT
-// prepare for cache adaption:
-//   -- flush the shared heap
-//   -- clean dtlb entries
-//   -- change cache strategy
-void cacheAdapt_gc(bool isgccachestage) {
-  // flush the shared heap
-  BAMBOO_CACHE_FLUSH_L2();
-
-  // clean the dtlb entries
-  BAMBOO_CLEAN_DTLB();
-
-  // change the cache strategy
-  gccachestage = isgccachestage;
-} // cacheAdapt_gc(bool isgccachestage)
-
-// the master core decides how to adapt cache strategy for the mutator 
-// according to collected statistic data
-
-// make all pages hfh
-int cacheAdapt_policy_h4h(){
-  unsigned int page_index = 0;
-  VA page_sva = 0;
-  unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
-  unsigned int numchanged = 0;
-  int * tmp_p = gccachepolicytbl+1;
-  for(page_index = 0; page_index < page_num; page_index++) {
-       page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
-       bamboo_cache_policy_t policy = {0};
-       policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
-       *tmp_p = page_index;
-       tmp_p++;
-       *tmp_p = policy.word;
-       tmp_p++;
-       numchanged++;
-  }
-
-  return numchanged;
-} // int cacheAdapt_policy_hfh()
-
-// make all pages local as non-cache-adaptable gc local mode
-int cacheAdapt_policy_local(){
-  unsigned int page_index = 0;
-  VA page_sva = 0;
-  unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
-  unsigned int numchanged = 0;
-  int * tmp_p = gccachepolicytbl+1;
-  for(page_index = 0; page_index < page_num; page_index++) {
-       page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
-       bamboo_cache_policy_t policy = {0};
-       unsigned int block = 0;
-       BLOCKINDEX(page_sva, &block);
-       unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
-       // locally cache the page in the hotest core
-       // NOTE: (x,y) should be changed to (x+1, y+1)!!!
-       policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
-       policy.lotar_x = bamboo_cpu2coords[2*coren]+1;
-       policy.lotar_y = bamboo_cpu2coords[2*coren+1]+1;
-       *tmp_p = page_index;
-       tmp_p++;
-       *tmp_p = policy.word;
-       tmp_p++;
-       numchanged++;
-  }
-
-  return numchanged;
-} // int cacheAdapt_policy_local()
-
-int cacheAdapt_policy_hotest(){
-  unsigned int page_index = 0;
-  VA page_sva = 0;
-  unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
-  unsigned int numchanged = 0;
-  int * tmp_p = gccachepolicytbl+1;
-  for(page_index = 0; page_index < page_num; page_index++) {
-       page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
-       bamboo_cache_policy_t policy = {0};
-       unsigned int hotestcore = 0;
-       unsigned int hotfreq = 0;
-
-       int *local_tbl=&gccachesamplingtbl_r[page_index];
-       for(int i = 0; i < NUMCORESACTIVE; i++) {
-         int freq = *local_tbl;
-         local_tbl=(int *)(((char *)local_tbl)+size_cachesamplingtbl_local_r);
-
-         // check the freqency, decide if this page is hot for the core
-         if(hotfreq < freq) {
-               hotfreq = freq;
-               hotestcore = i;
-         }
-       }
-       // TODO
-       // Decide the cache strategy for this page
-       // If decide to adapt a new cache strategy, write into the shared block of
-       // the gcsharedsamplingtbl. The mem recording information that has been 
-       // written is enough to hold the information.
-       // Format: page start va + cache strategy(hfh/(host core+[x,y]))
-       if(hotfreq == 0) {
-         // this page has not been accessed, do not change its cache policy
-         continue;
-       } else {
-         // locally cache the page in the hotest core
-         // NOTE: (x,y) should be changed to (x+1, y+1)!!!
-         policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
-         policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
-         policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
-         *tmp_p = page_index;
-         tmp_p++;
-         *tmp_p = policy.word;
-         tmp_p++;
-         numchanged++;
-       }
-  }
-
-  return numchanged;
-} // int cacheAdapt_policy_hotest()
-
-#define GC_CACHE_ADAPT_DOMINATE_THRESHOLD  50
-// cache the page on the core that accesses it the most if that core accesses 
-// it more than (GC_CACHE_ADAPT_DOMINATE_THRESHOLD)% of the total.  Otherwise,
-// h4h the page.
-int cacheAdapt_policy_dominate(){
-  unsigned int page_index = 0;
-  VA page_sva = 0;
-  unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
-  unsigned int numchanged = 0;
-  int * tmp_p = gccachepolicytbl+1;
-  for(page_index = 0; page_index < page_num; page_index++) {
-       page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
-       bamboo_cache_policy_t policy = {0};
-       unsigned int hotestcore = 0;
-       unsigned long long totalfreq = 0;
-       unsigned int hotfreq = 0;
-       
-       int *local_tbl=&gccachesamplingtbl_r[page_index];
-       for(int i = 0; i < NUMCORESACTIVE; i++) {
-         int freq = *local_tbl;
-         local_tbl=(int *)(((char *)local_tbl)+size_cachesamplingtbl_local_r);
-         totalfreq += freq;
-         // check the freqency, decide if this page is hot for the core
-         if(hotfreq < freq) {
-               hotfreq = freq;
-               hotestcore = i;
-         }
-       }
-
-       // Decide the cache strategy for this page
-       // If decide to adapt a new cache strategy, write into the shared block of
-       // the gcpolicytbl 
-       // Format: page start va + cache policy
-       if(hotfreq == 0) {
-         // this page has not been accessed, do not change its cache policy
-         continue;
-       }
-       totalfreq = 
-         (totalfreq*GC_CACHE_ADAPT_DOMINATE_THRESHOLD)/100/BAMBOO_PAGE_SIZE;
-       hotfreq/=BAMBOO_PAGE_SIZE;
-       if(hotfreq < totalfreq) {
-         // use hfh
-         policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
-       } else {
-         // locally cache the page in the hotest core
-         // NOTE: (x,y) should be changed to (x+1, y+1)!!!
-         policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
-         policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
-         policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
-       }
-       *tmp_p = page_index;
-       tmp_p++;
-       *tmp_p = policy.word;
-       tmp_p++;
-       numchanged++;
-  }
-
-  return numchanged;
-} // int cacheAdapt_policy_dominate()
-
-#define GC_CACHE_ADAPT_OVERLOAD_THRESHOLD 10
-
-void gc_quicksort(unsigned long long *array, 
-                     unsigned int left,
-                                 unsigned int right,
-                                 unsigned int offset) {
-  unsigned int pivot = 0;;
-  unsigned int leftIdx = left;
-  unsigned int rightIdx = right;
-  if((right-left+1) >= 1) {
-       pivot = (left+right)/2;
-       while((leftIdx <= pivot) && (rightIdx >= pivot)) {
-         unsigned long long pivotValue = array[pivot*3-offset];
-         while((array[leftIdx*3-offset] > pivotValue) && (leftIdx <= pivot)) {
-               leftIdx++;
-         }
-         while((array[rightIdx*3-offset] < pivotValue) && (rightIdx >= pivot)) {
-               rightIdx--;
-         }
-         // swap [leftIdx] & [rightIdx]
-         for(int k = 0; k < 3; k++) {
-               unsigned long long tmp = array[3*rightIdx-k];
-               array[3*rightIdx-k] = array[3*leftIdx-k];
-               array[3*leftIdx-k] = tmp;
-         }
-         leftIdx++;
-         rightIdx--;
-         if((leftIdx-1) == pivot) {
-               pivot = rightIdx = rightIdx + 1;
-         } else if((leftIdx+1) == pivot) {
-               pivot = leftIdx = leftIdx-1;
-         }
-       }
-       gc_quicksort(array, left, pivot-1, offset);
-       gc_quicksort(array, pivot+1, right, offset);
-  }
-  return;
-} // void gc_quicksort(...)
-
-// Every page cached on the core that accesses it the most. 
-// Check to see if any core's pages total more accesses than threshold 
-// GC_CACHE_ADAPT_OVERLOAD_THRESHOLD.  If so, find the pages with the 
-// most remote accesses and hash for home them until we get below 
-// GC_CACHE_ADAPT_OVERLOAD_THRESHOLD
-int cacheAdapt_policy_overload(){
-  unsigned int page_index = 0;
-  VA page_sva = 0;
-  unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
-  unsigned int numchanged = 0;
-  int * tmp_p = gccachepolicytbl+1;
-  unsigned long long workload[NUMCORESACTIVE];
-  memset(workload, 0, NUMCORESACTIVE*sizeof(unsigned long long));
-  unsigned long long total_workload = 0;
-  unsigned long long core2heavypages[NUMCORESACTIVE][page_num*3+1];
-  memset(core2heavypages,0,
-         sizeof(unsigned long long)*(page_num*3+1)*NUMCORESACTIVE);
-  for(page_index = 0; page_index < page_num; page_index++) {
-       page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
-       bamboo_cache_policy_t policy = {0};
-       unsigned int hotestcore = 0;
-       unsigned long long totalfreq = 0;
-       unsigned int hotfreq = 0;
-       
-       int *local_tbl=&gccachesamplingtbl_r[page_index];
-       for(int i = 0; i < NUMCORESACTIVE; i++) {
-         int freq = *local_tbl;
-         local_tbl=(int *)(((char *)local_tbl)+size_cachesamplingtbl_local_r);
-         totalfreq += freq;
-         // check the freqency, decide if this page is hot for the core
-         if(hotfreq < freq) {
-               hotfreq = freq;
-               hotestcore = i;
-         }
-       }
-       // Decide the cache strategy for this page
-       // If decide to adapt a new cache strategy, write into the shared block of
-       // the gcsharedsamplingtbl. The mem recording information that has been 
-       // written is enough to hold the information.
-       // Format: page start va + cache strategy(hfh/(host core+[x,y]))
-       if(hotfreq == 0) {
-         // this page has not been accessed, do not change its cache policy
-         continue;
-       }
-
-       totalfreq/=BAMBOO_PAGE_SIZE;
-       hotfreq/=BAMBOO_PAGE_SIZE;
-       // locally cache the page in the hotest core
-       // NOTE: (x,y) should be changed to (x+1, y+1)!!!
-       policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
-       policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
-       policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
-       *tmp_p = page_index;
-       tmp_p++;
-       *tmp_p = policy.word;
-       tmp_p++;
-       numchanged++;
-       workload[hotestcore] += totalfreq;
-       total_workload += totalfreq;
-       // insert into core2heavypages using quicksort
-       unsigned long long remoteaccess = totalfreq - hotfreq;
-       unsigned int index = (unsigned int)core2heavypages[hotestcore][0];
-       core2heavypages[hotestcore][3*index+3] = remoteaccess;
-       core2heavypages[hotestcore][3*index+2] = totalfreq;
-       core2heavypages[hotestcore][3*index+1] = (unsigned long long)(tmp_p-1);
-       core2heavypages[hotestcore][0]++;
-  }
-
-  unsigned long long workload_threshold = 
-       total_workload/GC_CACHE_ADAPT_OVERLOAD_THRESHOLD;
-  // Check the workload of each core
-  for(int i = 0; i < NUMCORESACTIVE; i++) {
-       int j = 1;
-       unsigned int index = (unsigned int)core2heavypages[i][0];
-       if(workload[i] > workload_threshold) {
-         // sort according to the remoteaccess
-         gc_quicksort(&core2heavypages[i][0], 1, index, 0);
-         while((workload[i] > workload_threshold) && (j<index*3)) {
-               // hfh those pages with more remote accesses 
-               bamboo_cache_policy_t policy = {0};
-               policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
-               *((unsigned int*)core2heavypages[i][j]) = policy.word;
-               workload[i] -= core2heavypages[i][j+1];
-               j += 3;
-         }
-       }
-  }
-
-  return numchanged;
-} // int cacheAdapt_policy_overload()
-
-#define GC_CACHE_ADAPT_ACCESS_THRESHOLD 70
-#define GC_CACHE_ADAPT_CROWD_THRESHOLD  20
-// Every page cached on the core that accesses it the most. 
-// Check to see if any core's pages total more accesses than threshold 
-// GC_CACHE_ADAPT_OVERLOAD_THRESHOLD.  If so, find the pages with the 
-// most remote accesses and hash for home them until we get below 
-// GC_CACHE_ADAPT_OVERLOAD_THRESHOLD.  
-// Sort pages based on activity.... 
-// If more then GC_CACHE_ADAPT_ACCESS_THRESHOLD% of the accesses for a
-// core's pages are from more than GC_CACHE_ADAPT_CROWD_THRESHOLD pages, 
-// then start hfh these pages(selecting the ones with the most remote 
-// accesses first or fewest local accesses) until we get below 
-// GC_CACHE_ADAPT_CROWD_THRESHOLD pages.
-int cacheAdapt_policy_crowd(){
-  unsigned int page_index = 0;
-  VA page_sva = 0;
-  unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
-  unsigned int numchanged = 0;
-  int * tmp_p = gccachepolicytbl+1;
-  unsigned long long workload[NUMCORESACTIVE];
-  memset(workload, 0, NUMCORESACTIVE*sizeof(unsigned long long));
-  unsigned long long total_workload = 0;
-  unsigned long long core2heavypages[NUMCORESACTIVE][page_num*3+1];
-  memset(core2heavypages,0,
-         sizeof(unsigned long long)*(page_num*3+1)*NUMCORESACTIVE);
-  for(page_index = 0; page_index < page_num; page_index++) {
-       page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
-       bamboo_cache_policy_t policy = {0};
-       unsigned int hotestcore = 0;
-       unsigned long long totalfreq = 0;
-       unsigned int hotfreq = 0;
-       
-       int *local_tbl=&gccachesamplingtbl_r[page_index];
-       for(int i = 0; i < NUMCORESACTIVE; i++) {
-         int freq = *local_tbl;
-         local_tbl=(int *)(((char *)local_tbl)+size_cachesamplingtbl_local_r);
-         totalfreq += freq;
-         // check the freqency, decide if this page is hot for the core
-         if(hotfreq < freq) {
-               hotfreq = freq;
-               hotestcore = i;
-         }
-       }
-       // Decide the cache strategy for this page
-       // If decide to adapt a new cache strategy, write into the shared block of
-       // the gcsharedsamplingtbl. The mem recording information that has been 
-       // written is enough to hold the information.
-       // Format: page start va + cache strategy(hfh/(host core+[x,y]))
-       if(hotfreq == 0) {
-         // this page has not been accessed, do not change its cache policy
-         continue;
-       }
-       totalfreq/=BAMBOO_PAGE_SIZE;
-       hotfreq/=BAMBOO_PAGE_SIZE;
-       // locally cache the page in the hotest core
-       // NOTE: (x,y) should be changed to (x+1, y+1)!!!
-       policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
-       policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
-       policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
-       *tmp_p = page_index;
-       tmp_p++;
-       *tmp_p = policy.word;
-       tmp_p++;
-       numchanged++;
-       workload[hotestcore] += totalfreq;
-       total_workload += totalfreq;
-       // insert into core2heavypages using quicksort
-       unsigned long long remoteaccess = totalfreq - hotfreq;
-       unsigned int index = (unsigned int)core2heavypages[hotestcore][0];
-       core2heavypages[hotestcore][3*index+3] = remoteaccess;
-       core2heavypages[hotestcore][3*index+2] = totalfreq;
-       core2heavypages[hotestcore][3*index+1] = (unsigned long long)(tmp_p-1);
-       core2heavypages[hotestcore][0]++;
-  }
-
-  unsigned long long workload_threshold = 
-       total_workload / GC_CACHE_ADAPT_OVERLOAD_THRESHOLD;
-  // Check the workload of each core
-  for(int i = 0; i < NUMCORESACTIVE; i++) {
-       int j = 1;
-       unsigned int index = (unsigned int)core2heavypages[i][0];
-       if(workload[i] > workload_threshold) {
-         // sort according to the remoteaccess
-         gc_quicksort(&core2heavypages[i][0], 1, index, 0);
-         while((workload[i] > workload_threshold) && (j<index*3)) {
-               // hfh those pages with more remote accesses 
-               bamboo_cache_policy_t policy = {0};
-               policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
-               *((unsigned int*)core2heavypages[i][j]) = policy.word;
-               workload[i] -= core2heavypages[i][j+1];
-               j += 3;
-         }
-       }
-
-       // Check if the accesses are crowded on few pages
-       // sort according to the total access
-inner_crowd:
-       gc_quicksort(&core2heavypages[i][0], j/3+1, index, 1);
-       unsigned long long threshold = 
-         GC_CACHE_ADAPT_ACCESS_THRESHOLD*workload[i]/100;
-       int num_crowded = 0;
-       unsigned long long t_workload = 0;
-       do {
-         t_workload += core2heavypages[i][j+num_crowded*3+1];
-         num_crowded++;
-       } while(t_workload < threshold);
-       // num_crowded <= GC_CACHE_ADAPT_CROWD_THRESHOLD and if there are enough 
-       // items, it is always == GC_CACHE_ADAPT_CROWD_THRESHOLD
-       if(num_crowded > GC_CACHE_ADAPT_CROWD_THRESHOLD) {
-         // need to hfh these pages
-         // sort the pages according to remote access
-         gc_quicksort(&core2heavypages[i][0], j/3+1, j/3+num_crowded, 0);
-         // h4h those pages with more remote accesses 
-         bamboo_cache_policy_t policy = {0};
-         policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
-         *((unsigned int*)core2heavypages[i][j]) = policy.word;
-         workload[i] -= core2heavypages[i][j+1];
-         t_workload -= core2heavypages[i][j+1];
-         j += 3;
-         threshold = GC_CACHE_ADAPT_ACCESS_THRESHOLD*workload[i]/100;
-         goto inner_crowd;
-       }
-  }
-
-  return numchanged;
-} // int cacheAdapt_policy_overload()
-
-void cacheAdapt_master() {
-#ifdef GC_CACHE_ADAPT_SAMPLING_OUTPUT
-  gc_output_cache_sampling_r();
-#endif // GC_CACHE_ADAPT_SAMPLING_OUTPUT
-  unsigned int numchanged = 0;
-  // check the statistic data
-  // for each page, decide the new cache strategy
-#ifdef GC_CACHE_ADAPT_POLICY1
-  numchanged = cacheAdapt_policy_h4h();
-#elif defined GC_CACHE_ADAPT_POLICY2
-  numchanged = cacheAdapt_policy_local();
-#elif defined GC_CACHE_ADAPT_POLICY3
-  numchanged = cacheAdapt_policy_hotest();
-#elif defined GC_CACHE_ADAPT_POLICY4
-  numchanged = cacheAdapt_policy_dominate();
-#elif defined GC_CACHE_ADAPT_POLICY5
-  numchanged = cacheAdapt_policy_overload();
-#elif defined GC_CACHE_ADAPT_POLICY6
-  numchanged = cacheAdapt_policy_crowd();
-#endif
-  *gccachepolicytbl = numchanged;
-}
-
-// adapt the cache strategy for the mutator
-void cacheAdapt_mutator() {
-  int numchanged = *gccachepolicytbl;
-  // check the changes and adapt them
-  int * tmp_p = gccachepolicytbl+1;
-  while(numchanged--) {
-       // read out the policy
-       int page_index = *tmp_p;
-       bamboo_cache_policy_t policy = (bamboo_cache_policy_t)(*(tmp_p+1));
-       // adapt the policy
-       bamboo_adapt_cache_policy(page_index*(BAMBOO_PAGE_SIZE)+gcbaseva, 
-               policy, BAMBOO_PAGE_SIZE);
-
-       tmp_p += 2;
-  }
-}
-
-void gc_output_cache_sampling() {
-  unsigned int page_index = 0;
-  VA page_sva = 0;
-  unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
-  for(page_index = 0; page_index < page_num; page_index++) {
-       page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
-       unsigned int block = 0;
-       BLOCKINDEX(page_sva, &block);
-       unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
-       tprintf("va: %x page_index: %d host: %d\n", 
-               (int)page_sva, page_index, coren);
-       for(int i = 0; i < NUMCORESACTIVE; i++) {
-         int * local_tbl = (int *)((void *)gccachesamplingtbl
-                 +size_cachesamplingtbl_local*i);
-         int freq = local_tbl[page_index];
-         printf("%8d ",freq);
-       }
-       printf("\n");
-  }
-  printf("=================\n");
-} // gc_output_cache_sampling
-
-void gc_output_cache_sampling_r() {
-  unsigned int page_index = 0;
-  VA page_sva = 0;
-  unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
-  for(page_index = 0; page_index < page_num; page_index++) {
-       page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
-       unsigned int block = 0;
-       BLOCKINDEX(page_sva, &block);
-       unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
-       tprintf("va: %x page_index: %d host: %d\n", 
-               (int)page_sva, page_index, coren);
-       for(int i = 0; i < NUMCORESACTIVE; i++) {
-         int * local_tbl = (int *)((void *)gccachesamplingtbl_r
-                 +size_cachesamplingtbl_local_r*i);
-         int freq = local_tbl[page_index]/BAMBOO_PAGE_SIZE;
-         printf("%8d ",freq);
-       }
-       printf("\n");
-  }
-  printf("=================\n");
-} // gc_output_cache_sampling
-#endif // GC_CACHE_ADAPT
-
-inline void gc_collect(struct garbagelist * stackptr) {
-  // inform the master that this core is at a gc safe point and is ready to 
-  // do gc
-  send_msg_4(STARTUPCORE, GCFINISHPRE, BAMBOO_NUM_OF_CORE, self_numsendobjs, 
-         self_numreceiveobjs, false);
-
-  // core collector routine
-  while(true) {
-    if(INITPHASE == gcphase) {
-      break;
-    }
-  }
-#ifdef RAWPATH // TODO GC_DEBUG
-  printf("(%X,%X) Do initGC\n", udn_tile_coord_x(), udn_tile_coord_y());
-#endif
-  initGC();
-#ifdef GC_CACHE_ADAPT
-  // prepare for cache adaption:
-  cacheAdapt_gc(true);
-#endif // GC_CACHE_ADAPT
-  //send init finish msg to core coordinator
-  send_msg_2(STARTUPCORE, GCFINISHINIT, BAMBOO_NUM_OF_CORE, false);
-
-  while(true) {
-    if(MARKPHASE == gcphase) {
-      break;
-    }
-  }
-#ifdef RAWPATH // TODO GC_DEBUG
-  printf("(%x,%x) Start mark phase\n", udn_tile_coord_x(), 
-            udn_tile_coord_y());
-#endif
-  mark(true, stackptr);
-#ifdef RAWPATH // TODO GC_DEBUG
-  printf("(%x,%x) Finish mark phase, start compact phase\n", 
-            udn_tile_coord_x(), udn_tile_coord_y());
-#endif
-  compact();
-#ifdef RAWPATH // TODO GC_DEBUG
-  printf("(%x,%x) Finish compact phase\n", udn_tile_coord_x(),
-            udn_tile_coord_y());
-#endif
+  GC_PRINTF("Start mark phase\n");
+  mark(true, stackptr);
+  GC_PRINTF("Finish mark phase, start compact phase\n");
+  compact();
+  GC_PRINTF("Finish compact phase\n");
 
   while(true) {
     if(FLUSHPHASE == gcphase) {
       break;
     }
   }
-#ifdef RAWPATH // TODO GC_DEBUG
-  printf("(%x,%x) Start flush phase\n", udn_tile_coord_x(), 
-            udn_tile_coord_y());
-#endif
-#ifdef GC_PROFILE
-  // send the num of obj/liveobj/forwardobj to the startupcore
-  if(STARTUPCORE != BAMBOO_NUM_OF_CORE) {
-       send_msg_4(STARTUPCORE, GCPROFILES, gc_num_obj, 
-               gc_num_liveobj, gc_num_forwardobj, false);
-  }
-  gc_num_obj = 0;
-#endif // GC_PROFLIE
+  GC_PRINTF("Start flush phase\n");
+  GCPROFILE_INFO_2_MASTER();
   flush(stackptr);
-#ifdef RAWPATH // TODO GC_DEBUG
-  printf("(%x,%x) Finish flush phase\n", udn_tile_coord_x(),
-            udn_tile_coord_y());
-#endif
+  GC_PRINTF("Finish flush phase\n");
 
-#ifdef GC_CACHE_ADAPT
-  while(true) {
-    if(PREFINISHPHASE == gcphase) {
-      break;
-    }
-  }
-#ifdef RAWPATH // TODO GC_DEBUG
-  printf("(%x,%x) Start prefinish phase\n", udn_tile_coord_x(), 
-            udn_tile_coord_y());
-#endif
-  // cache adapt phase
-  cacheAdapt_mutator();
-  cacheAdapt_gc(false);
-  //send init finish msg to core coordinator
-  send_msg_2(STARTUPCORE, GCFINISHPREF, BAMBOO_NUM_OF_CORE, false);
-#ifdef RAWPATH // TODO GC_DEBUG
-  printf("(%x,%x) Finish prefinish phase\n", udn_tile_coord_x(),
-            udn_tile_coord_y());
-#endif
-#ifdef GC_CACHE_SAMPLING
-  // reset the sampling arrays
-  bamboo_dtlb_sampling_reset();
-#endif // GC_CACHE_SAMPLING
-  if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
-       // zero out the gccachesamplingtbl
-       BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
-       BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,
-               size_cachesamplingtbl_local_r);
-  }
-#endif // GC_CACHE_ADAPT
+  CACHEADAPT_PHASE_CLIENT();
 
   // invalidate all shared mem pointers
   bamboo_cur_msp = NULL;
@@ -3628,30 +730,25 @@ inline void gc_collect(struct garbagelist * stackptr) {
     }
   }
 
-#ifdef RAWPATH // TODO GC_DEBUG
-  printf("(%x,%x) Finish gc! \n", udn_tile_coord_x(), udn_tile_coord_y());
-#endif
-} // void gc_collect(struct garbagelist * stackptr)
+  GC_PRINTF("Finish gc! \n");
+} 
 
-inline void gc_nocollect(struct garbagelist * stackptr) {
+INLINE void gc_nocollect(struct garbagelist * stackptr) {
+  gcprocessing = true;
+  tprintf("gc \n");
   // inform the master that this core is at a gc safe point and is ready to 
   // do gc
   send_msg_4(STARTUPCORE, GCFINISHPRE, BAMBOO_NUM_OF_CORE, self_numsendobjs, 
-         self_numreceiveobjs, false);
+    self_numreceiveobjs, false);
   
   while(true) {
     if(INITPHASE == gcphase) {
       break;
     }
   }
-#ifdef RAWPATH // TODO GC_DEBUG
-  printf("(%x,%x) Do initGC\n", udn_tile_coord_x(), udn_tile_coord_y());
-#endif
+  GC_PRINTF("Do initGC\n");
   initGC();
-#ifdef GC_CACHE_ADAPT
-  // prepare for cache adaption:
-  cacheAdapt_gc(true);
-#endif // GC_CACHE_ADAPT
+  CACHEADAPT_GC(true);
   //send init finish msg to core coordinator
   send_msg_2(STARTUPCORE, GCFINISHINIT, BAMBOO_NUM_OF_CORE, false);
 
@@ -3660,15 +757,9 @@ inline void gc_nocollect(struct garbagelist * stackptr) {
       break;
     }
   }
-#ifdef RAWPATH // TODO GC_DEBUG
-  printf("(%x,%x) Start mark phase\n", udn_tile_coord_x(), 
-            udn_tile_coord_y());
-#endif
+  GC_PRINTF("Start mark phase\n"); 
   mark(true, stackptr);
-#ifdef RAWPATH // TODO GC_DEBUG
-  printf("(%x,%x) Finish mark phase, wait for flush\n", 
-            udn_tile_coord_x(), udn_tile_coord_y());
-#endif
+  GC_PRINTF("Finish mark phase, wait for flush\n");
 
   // non-gc core collector routine
   while(true) {
@@ -3676,53 +767,12 @@ inline void gc_nocollect(struct garbagelist * stackptr) {
       break;
     }
   }
-#ifdef RAWPATH // TODO GC_DEBUG
-  printf("(%x,%x) Start flush phase\n", udn_tile_coord_x(), 
-            udn_tile_coord_y());
-#endif
-#ifdef GC_PROFILE
-  if(STARTUPCORE != BAMBOO_NUM_OF_CORE) {
-       send_msg_4(STARTUPCORE, GCPROFILES, gc_num_obj, 
-               gc_num_liveobj, gc_num_forwardobj, false);
-  }
-  gc_num_obj = 0;
-#endif // GC_PROFLIE
+  GC_PRINTF("Start flush phase\n");
+  GCPROFILE_INFO_2_MASTER();
   flush(stackptr);
-#ifdef RAWPATH // TODO GC_DEBUG
-  printf("(%x,%x) Finish flush phase\n", udn_tile_coord_x(), 
-            udn_tile_coord_y());
-#endif
+  GC_PRINTF("Finish flush phase\n"); 
 
-#ifdef GC_CACHE_ADAPT
-  while(true) {
-    if(PREFINISHPHASE == gcphase) {
-      break;
-    }
-  }
-#ifdef RAWPATH // TODO GC_DEBUG
-  printf("(%x,%x) Start prefinish phase\n", udn_tile_coord_x(), 
-            udn_tile_coord_y());
-#endif
-  // cache adapt phase
-  cacheAdapt_mutator();
-  cacheAdapt_gc(false);
-  //send init finish msg to core coordinator
-  send_msg_2(STARTUPCORE, GCFINISHPREF, BAMBOO_NUM_OF_CORE, false);
-#ifdef RAWPATH // TODO GC_DEBUG
-  printf("(%x,%x) Finish prefinish phase\n", udn_tile_coord_x(),
-            udn_tile_coord_y());
-#endif
-#ifdef GC_CACHE_SAMPLING
-  // reset the sampling arrays
-  bamboo_dtlb_sampling_reset();
-#endif // GC_CACHE_SAMPLING
-  if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
-       // zero out the gccachesamplingtbl
-       BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
-       BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,
-               size_cachesamplingtbl_local_r);
-  }
-#endif // GC_CACHE_ADAPT
+  CACHEADAPT_PHASE_CLIENT();
 
   // invalidate all shared mem pointers
   bamboo_cur_msp = NULL;
@@ -3735,12 +785,11 @@ inline void gc_nocollect(struct garbagelist * stackptr) {
       break;
     }
   }
-#ifdef RAWPATH // TODO GC_DEBUG
-  printf("(%x,%x) Finish gc! \n", udn_tile_coord_x(), udn_tile_coord_y());
-#endif
-} // void gc_collect(struct garbagelist * stackptr)
+  GC_PRINTF("Finish gc! \n");
+}
 
-inline void gc_master(struct garbagelist * stackptr) {
+INLINE void gc_master(struct garbagelist * stackptr) {
+  gcprocessing = true;
   tprintf("start GC !!!!!!!!!!!!! \n");
 
   gcphase = INITPHASE;
@@ -3748,369 +797,118 @@ inline void gc_master(struct garbagelist * stackptr) {
   waitconfirm = false;
   numconfirm = 0;
   initGC();
-
-  // Note: all cores need to init gc including non-gc cores
-  for(i = 1; i < NUMCORESACTIVE /*NUMCORES4GC*/; i++) {
-       // send GC init messages to all cores
-       send_msg_1(i, GCSTARTINIT, false);
-  }
-  bool isfirst = true;
-  bool allStall = false;
-
-#ifdef GC_CACHE_ADAPT
-  // prepare for cache adaption:
-  cacheAdapt_gc(true);
-#endif // GC_CACHE_ADAPT
-
-#ifdef RAWPATH // TODO GC_DEBUG
-  printf("(%x,%x) Check core status \n", udn_tile_coord_x(), 
-                udn_tile_coord_y());
-#endif
-
-  gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
-  while(true) {
-       BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
-       if(gc_checkAllCoreStatus_I()) {
-         BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-         break;
-       }
-       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-  }
-#ifdef GC_PROFILE
-#ifdef MGC_SPEC
-       if(gc_profile_flag) {
-#endif
-  gc_profileItem();
-#ifdef MGC_SPEC
-       }
-#endif
-#endif
-#ifdef GC_CACHE_ADAPT_POLICY_OUTPUT
-  gc_output_cache_sampling();
-#endif // GC_CACHE_ADAPT
-#ifdef RAWPATH // TODO GC_DEBUG
-  printf("(%x,%x) Start mark phase \n", udn_tile_coord_x(), 
-                udn_tile_coord_y());
-#endif
-  // restore the gcstatus of all cores
-  // Note: all cores have to do mark including non-gc cores
-  gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
-  for(i = 1; i < NUMCORESACTIVE; ++i) {
-       gccorestatus[i] = 1;
-       // send GC start messages to all cores
-       send_msg_1(i, GCSTART, false);
-  }
-
+  GC_SEND_MSG_1_TO_CLIENT(GCSTARTINIT);
+  CACHEADAPT_GC(true);
+  GC_PRINTF("Check core status \n");
+  GC_CHECK_ALL_CORE_STATUS(true);
+  GCPROFILE_ITEM();
+  CACHEADAPT_OUTPUT_CACHE_SAMPLING();
+
+  GC_PRINTF("(%x,%x) Start mark phase \n");
+  GC_SEND_MSG_1_TO_CLIENT(GCSTART);
   gcphase = MARKPHASE;
   // mark phase
+  bool isfirst = true;
   while(MARKPHASE == gcphase) {
-       mark(isfirst, stackptr);
-       if(isfirst) {
-         isfirst = false;
-       }
+    mark(isfirst, stackptr);
+    if(isfirst) {
+      isfirst = false;
+    }
+
+    // check gcstatus
+    checkMarkStatue();
+  }
 
-       // check gcstatus
-       checkMarkStatue();
-  }   // while(MARKPHASE == gcphase)
   // send msgs to all cores requiring large objs info
   // Note: only need to ask gc cores, non-gc cores do not host any objs
   numconfirm = NUMCORES4GC - 1;
   for(i = 1; i < NUMCORES4GC; ++i) {
-       send_msg_1(i, GCLOBJREQUEST, false);
+    send_msg_1(i, GCLOBJREQUEST, false);
   }
   gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
   while(true) {
-       if(numconfirm==0) {
-         break;
-       }
+    if(numconfirm==0) {
+      break;
+    }
   }   // wait for responses
   // check the heaptop
   if(gcheaptop < gcmarkedptrbound) {
-       gcheaptop = gcmarkedptrbound;
+    gcheaptop = gcmarkedptrbound;
   }
-#ifdef GC_PROFILE
-#ifdef MGC_SPEC
-       if(gc_profile_flag) {
-#endif
-  gc_profileItem();
-#ifdef MGC_SPEC
-       }
-#endif
-#endif
-#ifdef RAWPATH // TODO GC_DEBUG
-  printf("(%x,%x) prepare to cache large objs \n", udn_tile_coord_x(),
-                udn_tile_coord_y());
-#endif
+  GCPROFILE_ITEM();
+  GC_PRINTF("prepare to cache large objs \n");
   // cache all large objs
   if(!cacheLObjs()) {
-       // no enough space to cache large objs
-       BAMBOO_EXIT(0xb02b);
+    // no enough space to cache large objs
+    BAMBOO_EXIT(0xb02e);
   }
   // predict number of blocks to fill for each core
   unsigned int tmpheaptop = 0;
   int numpbc = loadbalance(&tmpheaptop);
   // TODO
   numpbc = (BAMBOO_SHARED_MEM_SIZE)/(BAMBOO_SMEM_SIZE);
-#ifdef RAWPATH // TODO GC_DEBUG
-  printf("(%x,%x) mark phase finished \n", udn_tile_coord_x(), 
-                udn_tile_coord_y());
-#endif
+  GC_PRINTF("mark phase finished \n");
+
   //int tmptopptr = 0;
   //BASEPTR(gctopcore, 0, &tmptopptr);
   // TODO
   //tmptopptr = gcbaseva + (BAMBOO_SHARED_MEM_SIZE);
   tmpheaptop = gcbaseva + (BAMBOO_SHARED_MEM_SIZE);
-  GC_BAMBOO_DEBUGPRINT(0xabab);
-  GC_BAMBOO_DEBUGPRINT_REG(tmpheaptop);
   for(i = 0; i < NUMCORES4GC; ++i) {
-       unsigned int tmpcoreptr = 0;
-       BASEPTR(i, numpbc, &tmpcoreptr);
-       // init some data strutures for compact phase
-       gcloads[i] = 0;
-       gcfilledblocks[i] = 0;
-       gcrequiredmems[i] = 0;
-       gccorestatus[i] = 1;
-       //send start compact messages to all cores
-       //TODO bug here, do not know if the direction is positive or negtive?
-       if (tmpcoreptr < tmpheaptop) {
-         gcstopblock[i] = numpbc + 1;
-         if(i != STARTUPCORE) {
-               send_msg_2(i, GCSTARTCOMPACT, numpbc+1, false);
-         } else {
-               gcblock2fill = numpbc+1;
-         }   // if(i != STARTUPCORE)
-       } else {
-         gcstopblock[i] = numpbc;
-         if(i != STARTUPCORE) {
-               send_msg_2(i, GCSTARTCOMPACT, numpbc, false);
-         } else {
-               gcblock2fill = numpbc;
-         }  // if(i != STARTUPCORE)
-       }
-       GC_BAMBOO_DEBUGPRINT(0xf000+i);
-       GC_BAMBOO_DEBUGPRINT_REG(tmpcoreptr);
-       GC_BAMBOO_DEBUGPRINT_REG(gcstopblock[i]);
+    unsigned int tmpcoreptr = 0;
+    BASEPTR(i, numpbc, &tmpcoreptr);
+    // init some data strutures for compact phase
+    gcloads[i] = 0;
+    gcfilledblocks[i] = 0;
+    gcrequiredmems[i] = 0;
+    gccorestatus[i] = 1;
+    //send start compact messages to all cores
+    //TODO bug here, do not know if the direction is positive or negtive?
+    if (tmpcoreptr < tmpheaptop) {
+      gcstopblock[i] = numpbc + 1;
+      if(i != STARTUPCORE) {
+        send_msg_2(i, GCSTARTCOMPACT, numpbc+1, false);
+      } else {
+        gcblock2fill = numpbc+1;
+      }
+    } else {
+      gcstopblock[i] = numpbc;
+      if(i != STARTUPCORE) {
+        send_msg_2(i, GCSTARTCOMPACT, numpbc, false);
+      } else {
+        gcblock2fill = numpbc;
+      }
+    }
   }
-
   BAMBOO_CACHE_MF();
-
-#ifdef GC_PROFILE
-#ifdef MGC_SPEC
-       if(gc_profile_flag) {
-#endif
-  gc_profileItem();
-#ifdef MGC_SPEC
-       }
-#endif
-#endif
-
+  GCPROFILE_ITEM();
   // compact phase
-  bool finalcompact = false;
-  // initialize pointers for comapcting
   struct moveHelper * orig =
-       (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
+    (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
   struct moveHelper * to =
-       (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
-  initOrig_Dst(orig, to);
-  int filledblocks = 0;
-  unsigned int heaptopptr = 0;
-  bool finishcompact = false;
-  bool iscontinue = true;
-  bool localcompact = true;
-  while((COMPACTPHASE == gcphase) || (SUBTLECOMPACTPHASE == gcphase)) {
-       if((!finishcompact) && iscontinue) {
-         GC_BAMBOO_DEBUGPRINT(0xeaa01);
-         GC_BAMBOO_DEBUGPRINT_REG(numpbc);
-         GC_BAMBOO_DEBUGPRINT_REG(gcblock2fill);
-         finishcompact = compacthelper(orig, to, &filledblocks,
-                                                                       &heaptopptr, &localcompact);
-         GC_BAMBOO_DEBUGPRINT(0xeaa02);
-         GC_BAMBOO_DEBUGPRINT_REG(finishcompact);
-         GC_BAMBOO_DEBUGPRINT_REG(gctomove);
-         GC_BAMBOO_DEBUGPRINT_REG(gcrequiredmems[0]);
-         GC_BAMBOO_DEBUGPRINT_REG(gcfilledblocks[0]);
-         GC_BAMBOO_DEBUGPRINT_REG(gcstopblock[0]);
-       }
-
-       BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
-       if(gc_checkCoreStatus_I()) {
-         // all cores have finished compacting
-         // restore the gcstatus of all cores
-         for(i = 0; i < NUMCORES4GC; ++i) {
-               gccorestatus[i] = 1;
-         }
-         BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-         break;
-       } else {
-         BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-         // check if there are spare mem for pending move requires
-         if(COMPACTPHASE == gcphase) {
-               GC_BAMBOO_DEBUGPRINT(0xeaa03);
-               resolvePendingMoveRequest();
-               GC_BAMBOO_DEBUGPRINT_REG(gctomove);
-         } else {
-               GC_BAMBOO_DEBUGPRINT(0xeaa04);
-               compact2Heaptop();
-         }
-       }   // if(gc_checkCoreStatus_I()) else ...
-
-       if(gctomove) {
-         GC_BAMBOO_DEBUGPRINT(0xeaa05);
-         GC_BAMBOO_DEBUGPRINT_REG(gcmovestartaddr);
-         GC_BAMBOO_DEBUGPRINT_REG(gcblock2fill);
-         GC_BAMBOO_DEBUGPRINT_REG(gctomove);
-         to->ptr = gcmovestartaddr;
-         to->numblocks = gcblock2fill - 1;
-         to->bound = (to->numblocks==0) ?
-                                 BAMBOO_SMEM_SIZE_L :
-                                 BAMBOO_SMEM_SIZE_L+BAMBOO_SMEM_SIZE*to->numblocks;
-         BASEPTR(gcdstcore, to->numblocks, &(to->base));
-         to->offset = to->ptr - to->base;
-         to->top = (to->numblocks==0) ?
-                               (to->offset) : (to->bound-BAMBOO_SMEM_SIZE+to->offset);
-         to->base = to->ptr;
-         to->offset = BAMBOO_CACHE_LINE_SIZE;
-         to->ptr += to->offset;                         // for header
-         to->top += to->offset;
-         if(gcdstcore == BAMBOO_NUM_OF_CORE) {
-               localcompact = true;
-         } else {
-               localcompact = false;
-         }
-         gctomove = false;
-         iscontinue = true;
-       } else if(!finishcompact) {
-         // still pending
-         iscontinue = false;
-       }  // if(gctomove)
-  }  // while(COMPACTPHASE == gcphase)
-#ifdef GC_PROFILE
-#ifdef MGC_SPEC
-       if(gc_profile_flag) {
-#endif
-  gc_profileItem();
-#ifdef MGC_SPEC
-       }
-#endif
-#endif
-#ifdef RAWPATH // TODO GC_DEBUG
-  printf("(%x,%x) prepare to move large objs \n", udn_tile_coord_x(),
-                udn_tile_coord_y());
-#endif
+    (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
+  compact_master(orig, to); 
+  GCPROFILE_ITEM();
+  GC_PRINTF("prepare to move large objs \n");
   // move largeObjs
   moveLObjs();
-#ifdef RAWPATH // TODO GC_DEBUG
-  printf("(%x,%x) compact phase finished \n", udn_tile_coord_x(), 
-                udn_tile_coord_y());
-#endif
+  GC_PRINTF("compact phase finished \n");
   RUNFREE(orig);
   RUNFREE(to);
   orig = to = NULL;
 
   gcphase = FLUSHPHASE;
-  gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
-  // Note: all cores should flush their runtime data including non-gc
-  //       cores
-  for(i = 1; i < NUMCORESACTIVE; ++i) {
-       // send start flush messages to all cores
-       gccorestatus[i] = 1;
-       send_msg_1(i, GCSTARTFLUSH, false);
-  }
-#ifdef GC_PROFILE
-#ifdef MGC_SPEC
-       if(gc_profile_flag) {
-#endif
-  gc_profileItem();
-#ifdef MGC_SPEC
-       }
-#endif
-#endif
-#ifdef RAWPATH // TODO GC_DEBUG
-  printf("(%x,%x) Start flush phase \n", udn_tile_coord_x(), 
-                udn_tile_coord_y());
-#endif
+  GC_SEND_MSG_1_TO_CLIENT(GCSTARTFLUSH);
+  GCPROFILE_ITEM();
+  GC_PRINTF("Start flush phase \n");
   // flush phase
   flush(stackptr);
-
-#ifdef GC_CACHE_ADAPT
   // now the master core need to decide the new cache strategy
-  cacheAdapt_master();
-#endif // GC_CACHE_ADAPT
-
-  gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
-  while(FLUSHPHASE == gcphase) {
-       // check the status of all cores
-       BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
-       if(gc_checkAllCoreStatus_I()) {
-         BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-         break;
-       }
-       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-  }  // while(FLUSHPHASE == gcphase)
-#ifdef RAWPATH // TODO GC_DEBUG
-  printf("(%x,%x) Finish flush phase \n", udn_tile_coord_x(), 
-                udn_tile_coord_y());
-#endif
-
-#ifdef GC_CACHE_ADAPT
-#ifdef GC_PROFILE
-#ifdef MGC_SPEC
-       if(gc_profile_flag) {
-#endif
-  gc_profileItem();
-#ifdef MGC_SPEC
-       }
-#endif
-#endif
-  gcphase = PREFINISHPHASE;
-  gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
-  // Note: all cores should flush their runtime data including non-gc
-  //       cores
-  for(i = 1; i < NUMCORESACTIVE; ++i) {
-       // send start flush messages to all cores
-       gccorestatus[i] = 1;
-       send_msg_1(i, GCSTARTPREF, false);
-  }
-#ifdef RAWPATH // TODO GC_DEBUG
-  printf("(%x,%x) Start prefinish phase \n", udn_tile_coord_x(), 
-                udn_tile_coord_y());
-#endif
-  // cache adapt phase
-  cacheAdapt_mutator();
-#ifdef MGC_SPEC
-  if(gc_profile_flag) {
-#endif
-#ifdef GC_CACHE_ADAPT_OUTPUT
-  bamboo_output_cache_policy();
-#endif
-#ifdef MGC_SPEC
-  }
-#endif
-  cacheAdapt_gc(false);
+  CACHEADAPT_MASTER();
+  GC_CHECK_ALL_CORE_STATUS(FLUSHPHASE==gcphase);
+  GC_PRINTF("Finish flush phase \n");
 
-  gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
-  while(PREFINISHPHASE == gcphase) {
-       // check the status of all cores
-       BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
-       if(gc_checkAllCoreStatus_I()) {
-         BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-         break;
-       }
-       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-  }  // while(PREFINISHPHASE == gcphase)
-
-#ifdef GC_CACHE_SAMPLING
-  // reset the sampling arrays
-  bamboo_dtlb_sampling_reset();
-#endif // GC_CACHE_SAMPLING
-  if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
-       // zero out the gccachesamplingtbl
-       BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
-       BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,
-               size_cachesamplingtbl_local_r);
-       BAMBOO_MEMSET_WH(gccachepolicytbl,0,size_cachepolicytbl);
-  }
-#endif // GC_CACHE_ADAPT
+  CACHEADAPT_PHASE_MASTER();
 
   gcphase = FINISHPHASE;
 
@@ -4122,306 +920,125 @@ inline void gc_master(struct garbagelist * stackptr) {
   bamboo_smem_size = 0;
   bamboo_smem_zero_top = NULL;
 
-#ifdef GC_PROFILE
-#ifdef MGC_SPEC
-       if(gc_profile_flag) {
-#endif
-  gc_profileEnd();
-#ifdef MGC_SPEC
-       }
-#endif
-#endif
+  GCPROFILE_ITEM();
   gcflag = false;
-  gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
-  for(i = 1; i < NUMCORESACTIVE; ++i) {
-       // send gc finish messages to all cores
-       send_msg_1(i, GCFINISH, false);
-       gccorestatus[i] = 1;
-  }
+  GC_SEND_MSG_1_TO_CLIENT(GCFINISH);
 
   gcprocessing = false;
   if(gcflag) {
-       // inform other cores to stop and wait for gc
-       gcprecheck = true;
-       for(int i = 0; i < NUMCORESACTIVE; i++) {
-         // reuse the gcnumsendobjs & gcnumreceiveobjs
-         gccorestatus[i] = 1;
-         gcnumsendobjs[0][i] = 0;
-         gcnumreceiveobjs[0][i] = 0;
-       }
-       for(int i = 0; i < NUMCORESACTIVE; i++) {
-         if(i != BAMBOO_NUM_OF_CORE) {
-               send_msg_1(i, GCSTARTPRE, false);
-         }
-       }
+    // inform other cores to stop and wait for gc
+    gcprecheck = true;
+    for(int i = 0; i < NUMCORESACTIVE; i++) {
+      // reuse the gcnumsendobjs & gcnumreceiveobjs
+      gcnumsendobjs[0][i] = 0;
+      gcnumreceiveobjs[0][i] = 0;
+    }
+    GC_SEND_MSG_1_TO_CLIENT(GCSTARTPRE);
   }
-#ifdef RAWPATH // TODO GC_DEBUG
-  printf("(%x,%x) gc finished   \n", udn_tile_coord_x(), 
-                udn_tile_coord_y());
-#endif
-  tprintf("finish GC ! \n");
-} // void gc_master(struct garbagelist * stackptr)
+  GC_PRINTF("gc finished   \n");
+  tprintf("finish GC ! %d \n", gcflag);
+} 
 
-inline bool gc(struct garbagelist * stackptr) {
-  // check if do gc
-  if(!gcflag) {
-    gcprocessing = false;
-    return false;
+INLINE void pregccheck_I() {
+  while(true) {
+    gcnumsendobjs[0][BAMBOO_NUM_OF_CORE] = self_numsendobjs;
+    gcnumreceiveobjs[0][BAMBOO_NUM_OF_CORE] = self_numreceiveobjs;
+    int sumsendobj = 0;
+    int i = 0;
+    for(i = 0; i < NUMCORESACTIVE; ++i) {
+      sumsendobj += gcnumsendobjs[0][i];
+    }  
+    for(i = 0; i < NUMCORESACTIVE; ++i) {
+      sumsendobj -= gcnumreceiveobjs[0][i];
+    } 
+    if(0 != sumsendobj) {
+      // there were still some msgs on the fly, wait until there 
+      // are some update pregc information coming and check it again
+      gcprecheck = false;
+      BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+      while(true) {
+        if(gcprecheck) {
+          break;
+        }
+      }
+      BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
+    } else {
+      return;
+    }
   }
+}
 
+INLINE void pregcprocessing() {
 #ifdef GC_CACHE_ADAPT
 #ifdef GC_CACHE_SAMPLING
-    // disable the timer interrupt
-    bamboo_mask_timer_intr();
+  // disable the timer interrupt
+  bamboo_mask_timer_intr();
 #endif 
 #endif
-  // core coordinator routine
-  if(0 == BAMBOO_NUM_OF_CORE) {
-#ifdef GC_DEBUG
-    printf("(%x,%x) Check if can do gc or not\n", udn_tile_coord_x(),
-                  udn_tile_coord_y());
-#endif
-       bool isallstall = true;
-       gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
-       BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
-       int ti = 0;
-       for(ti = 0; ti < NUMCORESACTIVE; ++ti) {
-         if(gccorestatus[ti] != 0) {
-               isallstall = false;
-               break;
-         }
-       }
-       if(!isallstall) {
-         BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-         // some of the cores are still executing the mutator and did not reach
-         // some gc safe point, therefore it is not ready to do gc
-         gcflag = true;
-         return false;
-       } else {
-#ifdef GC_PROFILE
-#ifdef MGC_SPEC
-       if(gc_profile_flag) {
-#endif
-    gc_profileStart();
-#ifdef MGC_SPEC
-       }
-#endif
-#endif
-pregccheck:
-         gcnumsendobjs[0][BAMBOO_NUM_OF_CORE] = self_numsendobjs;
-         gcnumreceiveobjs[0][BAMBOO_NUM_OF_CORE] = self_numreceiveobjs;
-         int sumsendobj = 0;
-         GC_BAMBOO_DEBUGPRINT(0xec04);
-         for(int i = 0; i < NUMCORESACTIVE; ++i) {
-               sumsendobj += gcnumsendobjs[0][i];
-               GC_BAMBOO_DEBUGPRINT(0xf000 + gcnumsendobjs[0][i]);
-         }  // for(i = 1; i < NUMCORESACTIVE; ++i)
-         GC_BAMBOO_DEBUGPRINT(0xec05);
-         GC_BAMBOO_DEBUGPRINT_REG(sumsendobj);
-         for(int i = 0; i < NUMCORESACTIVE; ++i) {
-               sumsendobj -= gcnumreceiveobjs[0][i];
-               GC_BAMBOO_DEBUGPRINT(0xf000 + gcnumreceiveobjs[i]);
-         }  // for(i = 1; i < NUMCORESACTIVE; ++i)
-         GC_BAMBOO_DEBUGPRINT(0xec06);
-         GC_BAMBOO_DEBUGPRINT_REG(sumsendobj);
-         if(0 != sumsendobj) {
-               // there were still some msgs on the fly, wait until there 
-               // are some update pregc information coming and check it again
-               gcprecheck = false;
-               BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-               while(true) {
-                 if(gcprecheck) {
-                       break;
-                 }
-               }
-               goto pregccheck;
-         } else {
-               BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-         }
-       }
-#ifdef RAWPATH // TODO GC_DEBUG
-    printf("(%x,%x) start gc! \n", udn_tile_coord_x(), udn_tile_coord_y());
-#endif
-       // Zero out the remaining bamboo_cur_msp 
-       // Only zero out the first 4 bytes of the remaining memory
-       // Move the operation here because for the GC_CACHE_ADAPT version,
-       // we need to make sure during the gcinit phase the shared heap is not 
-       // touched. Otherwise, there would be problem when adapt the cache 
-       // strategy.
-       if((bamboo_cur_msp != 0) 
-               && (bamboo_smem_zero_top == bamboo_cur_msp) 
-               && (bamboo_smem_size > 0)) {
-         *((int *)bamboo_cur_msp) = 0;
-       }
+  // Zero out the remaining memory here because for the GC_CACHE_ADAPT version,
+  // we need to make sure during the gcinit phase the shared heap is not 
+  // touched. Otherwise, there would be problem when adapt the cache strategy.
+  BAMBOO_CLOSE_CUR_MSP();
 #ifdef GC_FLUSH_DTLB
-       if(gc_num_flush_dtlb < GC_NUM_FLUSH_DTLB) {
-         BAMBOO_CLEAN_DTLB();
-         gc_num_flush_dtlb++;
-       }
+  if(gc_num_flush_dtlb < GC_NUM_FLUSH_DTLB) {
+    BAMBOO_CLEAN_DTLB();
+    gc_num_flush_dtlb++;
+  }
 #endif
 #ifdef GC_CACHE_ADAPT
 #ifdef GC_CACHE_SAMPLING
-    // get the sampling data 
-    bamboo_output_dtlb_sampling();
-#endif // GC_CACHE_SAMPLING
-#endif // GC_CACHE_ADAPT
-       gcprocessing = true;
-       gc_master(stackptr);
-  } else if(BAMBOO_NUM_OF_CORE < NUMCORES4GC) {
-       // Zero out the remaining bamboo_cur_msp 
-       // Only zero out the first 4 bytes of the remaining memory
-       // Move the operation here because for the GC_CACHE_ADAPT version,
-       // we need to make sure during the gcinit phase the shared heap is not 
-       // touched. Otherwise, there would be problem when adapt the cache 
-       // strategy.
-       if((bamboo_cur_msp != 0) 
-               && (bamboo_smem_zero_top == bamboo_cur_msp) 
-               && (bamboo_smem_size > 0)) {
-         *((int *)bamboo_cur_msp) = 0;
-       }
-#ifdef GC_FLUSH_DTLB
-       if(gc_num_flush_dtlb < GC_NUM_FLUSH_DTLB) {
-         BAMBOO_CLEAN_DTLB();
-         gc_num_flush_dtlb++;
-       }
+  // get the sampling data 
+  bamboo_output_dtlb_sampling();
 #endif
-#ifdef GC_CACHE_ADAPT
-#ifdef GC_CACHE_SAMPLING
-       if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
-         // get the sampling data 
-         bamboo_output_dtlb_sampling();
-       }
-#endif // GC_CACHE_SAMPLING
-#endif // GC_CACHE_ADAPT
-    gcprocessing = true;
-    gc_collect(stackptr);
-  } else {
-       // Zero out the remaining bamboo_cur_msp 
-       // Only zero out the first 4 bytes of the remaining memory
-       // Move the operation here because for the GC_CACHE_ADAPT version,
-       // we need to make sure during the gcinit phase the shared heap is not 
-       // touched. Otherwise, there would be problem when adapt the cache 
-       // strategy.
-       if((bamboo_cur_msp != 0) 
-               && (bamboo_smem_zero_top == bamboo_cur_msp) 
-               && (bamboo_smem_size > 0)) {
-         *((int *)bamboo_cur_msp) = 0;
-       }
-#ifdef GC_FLUSH_DTLB
-       if(gc_num_flush_dtlb < GC_NUM_FLUSH_DTLB) {
-         BAMBOO_CLEAN_DTLB();
-         gc_num_flush_dtlb++;
-       }
 #endif
-#ifdef GC_CACHE_ADAPT
-#ifdef GC_CACHE_SAMPLING
-       if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
-         // get the sampling data 
-         bamboo_output_dtlb_sampling();
-       }
-#endif // GC_CACHE_SAMPLING
-#endif // GC_CACHE_ADAPT
-    // not a gc core, should wait for gcfinish msg
-    gcprocessing = true;
-    gc_nocollect(stackptr);
-  }
+}
+
+INLINE void postgcprocessing() {
 #ifdef GC_CACHE_ADAPT
 #ifdef GC_CACHE_SAMPLING
   // enable the timer interrupt
   bamboo_tile_timer_set_next_event(GC_TILE_TIMER_EVENT_SETTING); 
   bamboo_unmask_timer_intr();
-#endif // GC_CACHE_SAMPLING
-#endif // GC_CACHE_ADAPT
-
-  return true;
-} // void gc(struct garbagelist * stackptr)
-
-#ifdef GC_PROFILE
-inline void gc_profileStart(void) {
-  if(!gc_infoOverflow) {
-    GCInfo* gcInfo = RUNMALLOC(sizeof(struct gc_info));
-    gc_infoArray[gc_infoIndex] = gcInfo;
-    gcInfo->index = 1;
-    gcInfo->time[0] = BAMBOO_GET_EXE_TIME();
-  }
-}
-
-inline void gc_profileItem(void) {
-  if(!gc_infoOverflow) {
-    GCInfo* gcInfo = gc_infoArray[gc_infoIndex];
-    gcInfo->time[gcInfo->index++] = BAMBOO_GET_EXE_TIME();
-  }
+#endif
+#endif
 }
 
-inline void gc_profileEnd(void) {
-  if(!gc_infoOverflow) {
-    GCInfo* gcInfo = gc_infoArray[gc_infoIndex];
-    gcInfo->time[gcInfo->index++] = BAMBOO_GET_EXE_TIME();
-       gcInfo->time[gcInfo->index++] = gc_num_livespace;
-       gcInfo->time[gcInfo->index++] = gc_num_freespace;
-       gcInfo->time[gcInfo->index++] = gc_num_lobj;
-       gcInfo->time[gcInfo->index++] = gc_num_lobjspace;
-       gcInfo->time[gcInfo->index++] = gc_num_obj;
-       gcInfo->time[gcInfo->index++] = gc_num_liveobj;
-       gcInfo->time[gcInfo->index++] = gc_num_forwardobj;
-    gc_infoIndex++;
-    if(gc_infoIndex == GCINFOLENGTH) {
-      gc_infoOverflow = true;
-      //taskInfoIndex = 0;
-    }
+INLINE bool gc(struct garbagelist * stackptr) {
+  // check if do gc
+  if(!gcflag) {
+    gcprocessing = false;
+    return false;
   }
-}
-
-// output the profiling data
-void gc_outputProfileData() {
-  int i = 0;
-  int j = 0;
-  unsigned long long totalgc = 0;
 
-#ifndef BAMBOO_MEMPROF
-  BAMBOO_PRINT(0xdddd);
-#endif
-  // output task related info
-  for(i= 0; i < gc_infoIndex; i++) {
-    GCInfo * gcInfo = gc_infoArray[i];
-#ifdef BAMBOO_MEMPROF
-    unsigned long long tmp=gcInfo->time[gcInfo->index-8]-gcInfo->time[0]; //0;
-#else
-       unsigned long long tmp = 0;
-    BAMBOO_PRINT(0xddda);
-    for(j = 0; j < gcInfo->index - 7; j++) {
-      BAMBOO_PRINT(gcInfo->time[j]);
-      BAMBOO_PRINT(gcInfo->time[j]-tmp);
-      BAMBOO_PRINT(0xdddb);
-      tmp = gcInfo->time[j];
+  // core coordinator routine
+  if(0 == BAMBOO_NUM_OF_CORE) {
+    GC_PRINTF("Check if we can do gc or not\n");
+    gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
+    BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
+    if(!gc_checkAllCoreStatus_I()) {
+      BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+      // some of the cores are still executing the mutator and did not reach
+      // some gc safe point, therefore it is not ready to do gc
+      gcflag = true;
+      return false;
+    } else {
+      GCPROFILE_START();
+      pregccheck_I();
+      BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
     }
-    tmp = (tmp-gcInfo->time[0]);
-    BAMBOO_PRINT_REG(tmp);
-       BAMBOO_PRINT(0xdddc);
-       BAMBOO_PRINT(gcInfo->time[gcInfo->index - 7]);
-       BAMBOO_PRINT(gcInfo->time[gcInfo->index - 6]);
-       BAMBOO_PRINT(gcInfo->time[gcInfo->index - 5]);
-       BAMBOO_PRINT(gcInfo->time[gcInfo->index - 4]);
-       BAMBOO_PRINT(gcInfo->time[gcInfo->index - 3]);
-       BAMBOO_PRINT(gcInfo->time[gcInfo->index - 2]);
-       BAMBOO_PRINT(gcInfo->time[gcInfo->index - 1]);
-    BAMBOO_PRINT(0xddde);
-#endif
-    totalgc += tmp;
-  }
-#ifndef BAMBOO_MEMPROF
-  BAMBOO_PRINT(0xdddf);
-#endif
-  BAMBOO_PRINT_REG(totalgc);
-
-  if(gc_infoOverflow) {
-    BAMBOO_PRINT(0xefee);
+    GC_PRINTF("start gc! \n");
+    pregcprocessing();
+    gc_master(stackptr);
+  } else if(BAMBOO_NUM_OF_CORE < NUMCORES4GC) {
+    pregcprocessing();
+    gc_collect(stackptr);
+  } else {
+    pregcprocessing();
+    gc_nocollect(stackptr);
   }
+  postgcprocessing();
 
-#ifndef BAMBOO_MEMPROF
-  BAMBOO_PRINT(0xeeee);
-#endif
-}
-#endif  // #ifdef GC_PROFILE
+  return true;
+} 
 
 #endif
index 27fe89feb9f9709f15421717ed86926f51061857..97ee59abc0c356420206f7cf7c5d9232be24bd6b 100644 (file)
@@ -1,23 +1,18 @@
-#ifndef MULTICORE_GARBAGE_H
-#define MULTICORE_GARBAGE_H
+#ifndef BAMBOO_MULTICORE_GARBAGE_H
+#define BAMBOO_MULTICORE_GARBAGE_H
+#ifdef MULTICORE_GC
+#include "multicore.h"
 #include "multicoregc.h"
 #include "multicorehelper.h"  // for mappings between core # and block #
 #include "structdefs.h"
-#include "MGCHash.h"
-#include "GCSharedHash.h"
-#ifdef GC_CACHE_ADAPT
+#include "multicoregcprofile.h"
 #include "multicorecache.h"
-#endif // GC_CACHE_ADAPT
 
-#ifndef bool
-#define bool int
-#endif
-
-#ifdef TASK
-#define BAMBOOMARKBIT 8
-#elif defined MGC
-#define BAMBOOMARKBIT 5
-#endif // TASK
+#ifdef GC_DEBUG
+#define GC_PRINTF tprintf
+#else
+#define GC_PRINTF if(0) tprintf
+#endif 
 
 // data structures for GC
 #define BAMBOO_SMEM_SIZE_L (BAMBOO_SMEM_SIZE * 2)
 unsigned int gc_num_flush_dtlb;
 #endif
 
-#define NUMPTRS 120
-
-// for GC profile
-#ifdef GC_PROFILE
-#define GCINFOLENGTH 100
-
-#ifdef GC_CACHE_ADAPT
-#define GC_PROFILE_NUM_FIELD 15
-#else
-#define GC_PROFILE_NUM_FIELD 14
-#endif // GC_CACHE_ADAPT
-
-typedef struct gc_info {
-  unsigned long long time[GC_PROFILE_NUM_FIELD];
-  unsigned int index;
-} GCInfo;
-
-GCInfo * gc_infoArray[GCINFOLENGTH];
-unsigned int gc_infoIndex;
-bool gc_infoOverflow;
-unsigned long long gc_num_livespace;
-unsigned long long gc_num_freespace;
-unsigned long long gc_num_lobjspace;
-unsigned int gc_num_lobj;
-
-unsigned int gc_num_liveobj;
-unsigned int gc_num_obj;
-unsigned int gc_num_forwardobj;
-unsigned int gc_num_profiles;
-
-#ifdef MGC_SPEC
-volatile bool gc_profile_flag;
-#endif
-
-#endif // GC_PROFILE
-
 typedef enum {
   INIT = 0,           // 0
   DISCOVERED = 2,     // 2
-//  REMOTEM = 4,        // 4
-  MARKED = 4,         // 8
-  COMPACTED = 8,     // 16
-  //FLUSHED = 32,       // 32
-  END = 9            // 33
+  MARKED = 4,         // 4
+  COMPACTED = 8,      // 8
+  END = 9             // 9
 } GCOBJFLAG;
 
 typedef enum {
@@ -85,7 +42,7 @@ typedef enum {
   FLUSHPHASE,              // 0x5
 #ifdef GC_CACHE_ADAPT
   PREFINISHPHASE,          // 0x6
-#endif // GC_CACHE_ADAPT
+#endif 
   FINISHPHASE              // 0x6/0x7
 } GCPHASETYPE;
 
@@ -100,15 +57,15 @@ unsigned int gccurr_heaptop;
 struct MGCHash * gcforwardobjtbl; // cache forwarded objs in mark phase
 // for mark phase termination
 volatile unsigned int gccorestatus[NUMCORESACTIVE];//records status of each core
-                                           // 1: running gc
-                                           // 0: stall
+                                                   // 1: running gc
+                                                   // 0: stall
 volatile unsigned int gcnumsendobjs[2][NUMCORESACTIVE];//# of objects sent out
 volatile unsigned int gcnumreceiveobjs[2][NUMCORESACTIVE];//# of objects received
-volatile unsigned int gcnumsrobjs_index;//indicates which entry to record the info 
-                                               // received before phase 1 of the mark finish 
+volatile unsigned int gcnumsrobjs_index;//indicates which entry to record the  
+                                       // info received before phase 1 of the mark finish 
                                                                // checking process
-                                                                       // the info received in phase 2 must be 
-                                                                       // recorded in the other entry
+                                                                           // the info received in phase 2 must be 
+                                                                           // recorded in the other entry
 volatile bool gcbusystatus;
 unsigned int gcself_numsendobjs;
 unsigned int gcself_numreceiveobjs;
@@ -175,7 +132,7 @@ unsigned int size_cachepolicytbl;
   ((((unsigned int)p)>=gcbaseva)&&(((unsigned int)p)<(gcbaseva+(BAMBOO_SHARED_MEM_SIZE))))
 
 #define ALIGNSIZE(s, as) \
-  (*((unsigned int*)as)) = (((s) & (~(BAMBOO_CACHE_LINE_MASK))) + (BAMBOO_CACHE_LINE_SIZE))
+  (*((unsigned int*)as))=((((unsigned int)(s-1))&(~(BAMBOO_CACHE_LINE_MASK)))+(BAMBOO_CACHE_LINE_SIZE))
 
 // mapping of pointer to block # (start from 0), here the block # is
 // the global index
@@ -217,11 +174,12 @@ unsigned int size_cachepolicytbl;
   if(s < BAMBOO_SMEM_SIZE_L) { \
     (*((unsigned int*)(o))) = (s); \
   } else { \
-    (*((unsigned int*)(o))) = ((s) - (BAMBOO_SMEM_SIZE_L)) % (BAMBOO_SMEM_SIZE); \
+    (*((unsigned int*)(o))) = ((s)-(BAMBOO_SMEM_SIZE_L))%(BAMBOO_SMEM_SIZE); \
   }
 
 // mapping of (core #, index of the block) to the global block index
-#define BLOCKINDEX2(c, n) (gc_core2block[(2*(c))+((n)%2)]+((NUMCORES4GC*2)*((n)/2)))
+#define BLOCKINDEX2(c, n) \
+  (gc_core2block[(2*(c))+((n)%2)]+((NUMCORES4GC*2)*((n)/2)))
 
 // mapping of (core #, number of the block) to the base pointer of the block
 #define BASEPTR(c, n, p) \
@@ -238,28 +196,57 @@ unsigned int size_cachepolicytbl;
 // the next core in the top of the heap
 #define NEXTTOPCORE(b) (gc_block2core[((b)+1)%(NUMCORES4GC*2)])
 
-inline bool gc(struct garbagelist * stackptr); // core coordinator routine
-inline void gc_collect(struct garbagelist* stackptr); //core collector routine
-inline void gc_nocollect(struct garbagelist* stackptr); //non-gc core collector routine
-inline void transferMarkResults_I();
-inline void gc_enqueue_I(void *ptr);
-inline void gc_lobjenqueue_I(void *ptr, unsigned int length, unsigned int host);
-inline bool gcfindSpareMem_I(unsigned int * startaddr,
+// close current block, fill the header
+#define CLOSEBLOCK(base, size) \
+  { \
+    BAMBOO_MEMSET_WH((base), '\0', BAMBOO_CACHE_LINE_SIZE); \
+    *((int*)(base)) = (size); \
+  }
+
+// check if all cores are stall now
+#define GC_CHECK_ALL_CORE_STATUS(f) \
+  { \
+    gccorestatus[BAMBOO_NUM_OF_CORE] = 0; \
+    while(f) { \
+      BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT(); \
+      if(gc_checkAllCoreStatus_I()) { \
+        BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(); \
+        break; \
+      } \
+      BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME(); \
+    } \
+  }
+
+// send a 1-word msg to all client
+#define GC_SEND_MSG_1_TO_CLIENT(m) \
+  { \
+    for(int i = 0; i < NUMCORESACTIVE; ++i) { \
+      if(BAMBOO_NUM_OF_CORE != i) { \
+        send_msg_1(i, (m), false); \
+      } \
+      gccorestatus[i] = 1; \
+    } \
+  }
+
+#define ISLOCAL(p) (hostcore(p)==BAMBOO_NUM_OF_CORE)
+
+INLINE void initmulticoregcdata();
+INLINE void dismulticoregcdata();
+INLINE bool gc_checkAllCoreStatus_I();
+INLINE bool gc(struct garbagelist * stackptr); // core coordinator routine
+INLINE void gc_collect(struct garbagelist* stackptr); //core collector routine
+INLINE void gc_nocollect(struct garbagelist* stackptr); //non-gc core collector routine
+INLINE void transferMarkResults_I();
+INLINE bool gcfindSpareMem_I(unsigned int * startaddr,
                              unsigned int * tomove,
                              unsigned int * dstcore,
                              unsigned int requiredmem,
                              unsigned int requiredcore);
 
-inline void * gc_lobjdequeue4(unsigned int * length, unsigned int * host);
-inline int gc_lobjmoreItems4();
-inline void gc_lobjqueueinit4();
-
-#ifdef GC_PROFILE
-INLINE void gc_profileStart(void);
-INLINE void gc_profileItem(void);
-INLINE void gc_profileEnd(void);
-void gc_outputProfileData();
-#endif
-
-#endif
-
+#define INITMULTICOREGCDATA() initmulticoregcdata()
+#define DISMULTICOREGCDATA() dismulticoregcdata()
+#else // MULTICORE_GC
+#define INITMULTICOREGCDATA()
+#define DISMULTICOREGCDATA()
+#endif // MULTICORE_GC
+#endif // BAMBOO_MULTICORE_GARBAGE_H
index 0f7ddc4cac516986a7e4a2d3c5d4a999d50ed3eb..929fa0e93de48dfbef2a0dae46aeaaa1666be506 100644 (file)
@@ -1,5 +1,5 @@
-#ifndef MULTICORE_GC_H
-#define MULTICORE_GC_H
+#ifndef BAMBOO_MULTICORE_GC_H
+#define BAMBOO_MULTICORE_GC_H
 
 struct garbagelist {
   int size;
@@ -13,4 +13,4 @@ struct listitem {
   struct garbagelist * stackptr;
 };
 
-#endif // MULTICORE_GC_H
+#endif // BAMBOO_MULTICORE_GC_H
diff --git a/Robust/src/Runtime/bamboo/multicoregccompact.c b/Robust/src/Runtime/bamboo/multicoregccompact.c
new file mode 100644 (file)
index 0000000..46a6466
--- /dev/null
@@ -0,0 +1,631 @@
+#ifdef MULTICORE_GC
+#include "multicoregccompact.h"
+#include "runtime_arch.h"
+#include "multicoreruntime.h"
+
+extern int corenum;
+
+INLINE bool gc_checkCoreStatus_I() {
+  int i = 0;
+  for(i = 0; i < NUMCORES4GC; ++i) {
+    if(gccorestatus[i] != 0) {
+      break;
+    }  
+  }  
+  return (i == NUMCORES4GC);
+}
+
+INLINE void compact2Heaptophelper_I(unsigned int coren,
+                                    unsigned int* p,
+                                    unsigned int* numblocks,
+                                    unsigned int* remain) {
+  unsigned int b;
+  unsigned int memneed = gcrequiredmems[coren] + BAMBOO_CACHE_LINE_SIZE;
+  if(STARTUPCORE == coren) {
+    gctomove = true;
+    gcmovestartaddr = *p;
+    gcdstcore = gctopcore;
+    gcblock2fill = *numblocks + 1;
+  } else {
+    send_msg_4(coren, GCMOVESTART, gctopcore, *p, (*numblocks) + 1, false);
+  }
+  if(memneed < *remain) {
+    *p = *p + memneed;
+    gcrequiredmems[coren] = 0;
+    gcloads[gctopcore] += memneed;
+    *remain = *remain - memneed;
+  } else {
+    // next available block
+    *p = *p + *remain;
+    gcfilledblocks[gctopcore] += 1;
+    unsigned int newbase = 0;
+    BASEPTR(gctopcore, gcfilledblocks[gctopcore], &newbase);
+    gcloads[gctopcore] = newbase;
+    gcrequiredmems[coren] -= *remain - BAMBOO_CACHE_LINE_SIZE;
+    gcstopblock[gctopcore]++;
+    gctopcore = NEXTTOPCORE(gctopblock);
+    gctopblock++;
+    *numblocks = gcstopblock[gctopcore];
+    *p = gcloads[gctopcore];
+    BLOCKINDEX(*p, &b);
+    *remain=GC_BLOCK_REMAIN_SIZE(b, (*p));
+  }  
+  gcmovepending--;
+} 
+
+INLINE void compact2Heaptop() {
+  // no cores with spare mem and some cores are blocked with pending move
+  // find the current heap top and make them move to the heap top
+  unsigned int p;
+  unsigned int numblocks = gcfilledblocks[gctopcore];
+  p = gcloads[gctopcore];
+  unsigned int b;
+  BLOCKINDEX(p, &b);
+  unsigned int remain=GC_BLOCK_REMAIN_SIZE(b, p);
+  // check if the top core finishes
+  BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
+  if(gccorestatus[gctopcore] != 0) {
+    // let the top core finishes its own work first
+    compact2Heaptophelper_I(gctopcore, &p, &numblocks, &remain);
+    BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+    return;
+  }
+  BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+
+  for(int i = 0; i < NUMCORES4GC; i++) {
+    BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
+    if((gccorestatus[i] != 0) && (gcrequiredmems[i] > 0)) {
+      compact2Heaptophelper_I(i, &p, &numblocks, &remain);
+      if(gccorestatus[gctopcore] != 0) {
+        BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+        // the top core is not free now
+        return;
+      }
+    }  
+    BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+  } 
+}
+
+INLINE void resolvePendingMoveRequest() {
+  int i;
+  int j;
+  bool nosparemem = true;
+  bool haspending = false;
+  bool hasrunning = false;
+  bool noblock = false;
+  unsigned int dstcore = 0;       // the core who need spare mem
+  unsigned int sourcecore = 0;       // the core who has spare mem
+  for(i = j = 0; (i < NUMCORES4GC) && (j < NUMCORES4GC); ) {
+    if(nosparemem) {
+      // check if there are cores with spare mem
+      if(gccorestatus[i] == 0) {
+    // finished working, check if it still have spare mem
+    if(gcfilledblocks[i] < gcstopblock[i]) {
+      // still have spare mem
+      nosparemem = false;
+      sourcecore = i;
+    }  
+      }
+      i++;
+    }  
+    if(!haspending) {
+      if(gccorestatus[j] != 0) {
+    // not finished, check if it has pending move requests
+    if((gcfilledblocks[j]==gcstopblock[j])&&(gcrequiredmems[j]>0)) {
+      dstcore = j;
+      haspending = true;
+    } else {
+      hasrunning = true;
+    } 
+      } 
+      j++;
+    }  
+    if(!nosparemem && haspending) {
+      // find match
+      unsigned int tomove = 0;
+      unsigned int startaddr = 0;
+      BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
+      gcrequiredmems[dstcore] = assignSpareMem_I(sourcecore,
+                                                 gcrequiredmems[dstcore],
+                                                 &tomove,
+                                                 &startaddr);
+      BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+      if(STARTUPCORE == dstcore) {
+    gcdstcore = sourcecore;
+    gctomove = true;
+    gcmovestartaddr = startaddr;
+    gcblock2fill = tomove;
+      } else {
+    send_msg_4(dstcore, GCMOVESTART, sourcecore,startaddr, tomove, false);
+      }
+      gcmovepending--;
+      nosparemem = true;
+      haspending = false;
+      noblock = true;
+    }
+  }  
+
+  if(!hasrunning && !noblock) {
+    gcphase = SUBTLECOMPACTPHASE;
+    compact2Heaptop();
+  }
+
+} 
+
+// If out of boundary of valid shared memory, return false, else return true
+INLINE bool nextSBlock(struct moveHelper * orig) {
+  orig->blockbase = orig->blockbound;
+
+  bool sbchanged = false;
+  unsigned int origptr = orig->ptr;
+  unsigned int blockbase = orig->blockbase;
+  unsigned int blockbound = orig->blockbound;
+  unsigned int bound = orig->bound;
+outernextSBlock:
+  // check if across a big block
+  // TODO now do not zero out the whole memory, maybe the last two conditions
+  // are useless now
+  if((blockbase>=bound)||(origptr>=bound)
+    ||((origptr!=NULL)&&(*((int*)origptr))==0)||((*((int*)blockbase))==0)) {
+innernextSBlock:
+    // end of current heap block, jump to next one
+    orig->numblocks++;
+    BASEPTR(BAMBOO_NUM_OF_CORE, orig->numblocks, &(orig->base));
+    if(orig->base >= gcbaseva + BAMBOO_SHARED_MEM_SIZE) {
+      // out of boundary
+      orig->ptr = orig->base; // set current ptr to out of boundary too
+      return false;
+    }
+    orig->blockbase = orig->base;
+    orig->sblockindex = 
+    (unsigned int)(orig->blockbase-gcbaseva)/BAMBOO_SMEM_SIZE;
+    sbchanged = true;
+    unsigned int blocknum = 0;
+    BLOCKINDEX(orig->base, &blocknum);
+    if(bamboo_smemtbl[blocknum] == 0) {
+      // goto next block
+      goto innernextSBlock;
+    }
+    // check the bamboo_smemtbl to decide the real bound
+    orig->bound = orig->base + bamboo_smemtbl[blocknum];
+  } else if(0 == (orig->blockbase%BAMBOO_SMEM_SIZE)) {
+    orig->sblockindex += 1;
+    sbchanged = true;
+  }  
+
+  // check if this sblock should be skipped or have special start point
+  int sbstart = gcsbstarttbl[orig->sblockindex];
+  if(sbstart == -1) {
+    // goto next sblock
+    orig->sblockindex += 1;
+    orig->blockbase += BAMBOO_SMEM_SIZE;
+    goto outernextSBlock;
+  } else if((sbstart != 0) && (sbchanged)) {
+    // the first time to access this SBlock
+    // not start from the very beginning
+    orig->blockbase = sbstart;
+  } 
+
+  // setup information for this sblock
+  orig->blockbound = orig->blockbase+(unsigned int)*((int*)(orig->blockbase));
+  orig->offset = BAMBOO_CACHE_LINE_SIZE;
+  orig->ptr = orig->blockbase + orig->offset;
+  if(orig->ptr >= orig->bound) {
+    // met a lobj, move to next block
+    goto innernextSBlock;
+  }
+
+  return true;
+} 
+
+// return false if there are no available data to compact
+INLINE bool initOrig_Dst(struct moveHelper * orig,
+                         struct moveHelper * to) {
+  // init the dst ptr
+  to->numblocks = 0;
+  to->top = to->offset = BAMBOO_CACHE_LINE_SIZE;
+  to->bound = BAMBOO_SMEM_SIZE_L;
+  BASEPTR(BAMBOO_NUM_OF_CORE, to->numblocks, &(to->base));
+
+  unsigned int tobase = to->base;
+  to->ptr = tobase + to->offset;
+
+  // init the orig ptr
+  orig->numblocks = 0;
+  orig->base = tobase;
+  unsigned int blocknum = 0;
+  BLOCKINDEX(orig->base, &blocknum);
+  unsigned int origbase = orig->base;
+  // check the bamboo_smemtbl to decide the real bound
+  orig->bound = origbase + (unsigned int)bamboo_smemtbl[blocknum];
+  orig->blockbase = origbase;
+  orig->sblockindex = (unsigned int)(origbase - gcbaseva) / BAMBOO_SMEM_SIZE;
+
+  int sbstart = gcsbstarttbl[orig->sblockindex];
+  if(sbstart == -1) {
+    // goto next sblock
+    orig->blockbound=gcbaseva+BAMBOO_SMEM_SIZE*(orig->sblockindex+1);
+    return nextSBlock(orig);
+  } else if(sbstart != 0) {
+    orig->blockbase = sbstart;
+  }
+  orig->blockbound = orig->blockbase + *((int*)(orig->blockbase));
+  orig->offset = BAMBOO_CACHE_LINE_SIZE;
+  orig->ptr = orig->blockbase + orig->offset;
+
+  return true;
+}
+
+INLINE void nextBlock(struct moveHelper * to) {
+  to->top = to->bound + BAMBOO_CACHE_LINE_SIZE; // header!
+  to->bound += BAMBOO_SMEM_SIZE;
+  to->numblocks++;
+  BASEPTR(BAMBOO_NUM_OF_CORE, to->numblocks, &(to->base));
+  to->offset = BAMBOO_CACHE_LINE_SIZE;
+  to->ptr = to->base + to->offset;
+}
+
+INLINE unsigned int findValidObj(struct moveHelper * orig,
+                                 struct moveHelper * to,
+                                 int * type) {
+  unsigned int size = 0;
+  while(true) {
+    CACHEADAPT_COMPLETE_PAGE_CONVERT(orig, to, to->ptr, false);
+    unsigned int origptr = (unsigned int)(orig->ptr);
+    unsigned int origbound = (unsigned int)orig->bound;
+    unsigned int origblockbound = (unsigned int)orig->blockbound;
+    if((origptr >= origbound) || (origptr == origblockbound)) {
+      if(!nextSBlock(orig)) {
+        // finished, no more data
+        return -1;
+      }
+      continue;
+    }
+    // check the obj's type, size and mark flag
+    *type = ((int *)(origptr))[0];
+    size = 0;
+    if(*type == 0) {
+      // end of this block, go to next one
+      if(!nextSBlock(orig)) {
+        // finished, no more data
+        return -1;
+      }
+      continue;
+    } else if(*type < NUMCLASSES) {
+      // a normal object
+      size = classsize[*type];
+    } else {
+      // an array
+      struct ArrayObject *ao=(struct ArrayObject *)(origptr);
+      unsigned int elementsize=classsize[*type];
+      unsigned int length=ao->___length___;
+      size=(unsigned int)sizeof(struct ArrayObject)
+        +(unsigned int)(length*elementsize);
+    }
+    return size;
+  }
+}
+
+// endaddr does not contain spaces for headers
+INLINE bool moveobj(struct moveHelper * orig,
+                    struct moveHelper * to,
+                    unsigned int stopblock) {
+  if(stopblock == 0) {
+    return true;
+  }
+
+  int type = 0;
+  unsigned int size = 0;
+  unsigned int isize = 0;
+  size = findValidObj(orig, to, &type);
+  if(size == -1) {
+    // finished, no more data
+    return true;
+  }
+  ALIGNSIZE(size, &isize);       // no matter is the obj marked or not
+                                 // should be able to across
+  unsigned int origptr = (unsigned int)(orig->ptr);
+  if(((struct ___Object___ *)origptr)->marked == MARKED) {
+    unsigned int totop = (unsigned int)to->top;
+    unsigned int tobound = (unsigned int)to->bound;
+    GCPROFILE_RECORD_LIVE_OBJ();
+    // marked obj, copy it to current heap top
+    // check to see if remaining space is enough
+    if((unsigned int)(totop + isize) > tobound) {
+      // fill 0 indicating the end of this block
+      BAMBOO_MEMSET_WH(to->ptr,  '\0', tobound - totop);
+      // fill the header of this block and then go to next block
+      to->offset += tobound - totop;
+      CLOSEBLOCK(to->base, to->offset);
+#ifdef GC_CACHE_ADAPT
+      unsigned int tmp_ptr = to->ptr;
+#endif 
+      nextBlock(to);
+#ifdef GC_CACHE_ADAPT
+      CACHEADAPT_COMPLETE_PAGE_CONVERT(orig, to, tmp_ptr, true);
+#endif 
+      if(stopblock == to->numblocks) {
+        // already fulfilled the block
+        return true;
+      }  
+    } 
+    // set the mark field to 2, indicating that this obj has been moved
+    // and need to be flushed
+    ((struct ___Object___ *)origptr)->marked = COMPACTED;
+    unsigned int toptr = (unsigned int)to->ptr;
+    if(toptr != origptr) {
+      if((unsigned int)(origptr) < (unsigned int)(toptr+size)) {
+        memmove(toptr, origptr, size);
+      } else {
+        memcpy(toptr, origptr, size);
+      }
+      // fill the remaining space with -2
+      BAMBOO_MEMSET_WH((unsigned int)(toptr+size), -2, isize-size);
+    }
+    // store mapping info
+    gcmappingtbl[OBJMAPPINGINDEX((unsigned int)origptr)]=(unsigned int)toptr;
+    gccurr_heaptop -= isize;
+    to->ptr += isize;
+    to->offset += isize;
+    to->top += isize;
+#ifdef GC_CACHE_ADAPT
+    unsigned int tmp_ptr = to->ptr;
+#endif // GC_CACHE_ADAPT
+    if(to->top == to->bound) {
+      CLOSEBLOCK(to->base, to->offset);
+      nextBlock(to);
+    }
+#ifdef GC_CACHE_ADAPT
+    CACHEADAPT_COMPLETE_PAGE_CONVERT(orig, to, tmp_ptr, true);
+#endif
+  } 
+  
+  // move to next obj
+  orig->ptr += isize; 
+
+  if(((unsigned int)(orig->ptr) > (unsigned int)(orig->bound))
+    || ((unsigned int)(orig->ptr) == (unsigned int)(orig->blockbound))) {
+    if(!nextSBlock(orig)) {
+      // finished, no more data
+      return true;
+    }
+  }
+  return false;
+} 
+
+// should be invoked with interrupt closed
+INLINE int assignSpareMem_I(unsigned int sourcecore,
+                            unsigned int * requiredmem,
+                            unsigned int * tomove,
+                            unsigned int * startaddr) {
+  unsigned int b = 0;
+  BLOCKINDEX(gcloads[sourcecore], &b);
+  unsigned int boundptr = (b<NUMCORES4GC) ? ((b+1)*BAMBOO_SMEM_SIZE_L)
+     : (BAMBOO_LARGE_SMEM_BOUND+(b-NUMCORES4GC+1)*BAMBOO_SMEM_SIZE);
+  unsigned int remain = boundptr - gcloads[sourcecore];
+  unsigned int memneed = requiredmem + BAMBOO_CACHE_LINE_SIZE;
+  *startaddr = gcloads[sourcecore];
+  *tomove = gcfilledblocks[sourcecore] + 1;
+  if(memneed < remain) {
+    gcloads[sourcecore] += memneed;
+    return 0;
+  } else {
+    // next available block
+    gcfilledblocks[sourcecore] += 1;
+    unsigned int newbase = 0;
+    BASEPTR(sourcecore, gcfilledblocks[sourcecore], &newbase);
+    gcloads[sourcecore] = newbase;
+    return requiredmem-remain;
+  }
+} 
+
+// should be invoked with interrupt closed
+INLINE bool gcfindSpareMem_I(unsigned int * startaddr,
+                             unsigned int * tomove,
+                             unsigned int * dstcore,
+                             unsigned int requiredmem,
+                             unsigned int requiredcore) {
+  for(int k = 0; k < NUMCORES4GC; k++) {
+    if((gccorestatus[k] == 0) && (gcfilledblocks[k] < gcstopblock[k])) {
+      // check if this stopped core has enough mem
+      assignSpareMem_I(k, requiredmem, tomove, startaddr);
+      *dstcore = k;
+      return true;
+    }
+  }
+  // if can not find spare mem right now, hold the request
+  gcrequiredmems[requiredcore] = requiredmem;
+  gcmovepending++;
+  return false;
+} 
+
+INLINE bool compacthelper(struct moveHelper * orig,
+                          struct moveHelper * to,
+                          int * filledblocks,
+                          unsigned int * heaptopptr,
+                          bool * localcompact) {
+  // scan over all objs in this block, compact the marked objs
+  // loop stop when finishing either scanning all active objs or
+  // fulfilled the gcstopblock
+innercompact:
+  while((unsigned int)(orig->ptr) < (unsigned int)gcmarkedptrbound) {
+    if(moveobj(orig, to, gcblock2fill)) {
+      break;
+    }
+  }
+  CACHEADAPT_SAMPLING_DATA_CONVERT(to->ptr);
+  // if no objs have been compact, do nothing,
+  // otherwise, fill the header of this block
+  if(to->offset > (unsigned int)BAMBOO_CACHE_LINE_SIZE) {
+    CLOSEBLOCK(to->base, to->offset);
+  } else {
+    to->offset = 0;
+    to->ptr = to->base;
+    to->top -= BAMBOO_CACHE_LINE_SIZE;
+  }  
+  if(*localcompact) {
+    *heaptopptr = to->ptr;
+    *filledblocks = to->numblocks;
+  }
+
+  // send msgs to core coordinator indicating that the compact is finishing
+  // send compact finish message to core coordinator
+  if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
+    gcfilledblocks[BAMBOO_NUM_OF_CORE] = *filledblocks;
+    gcloads[BAMBOO_NUM_OF_CORE] = *heaptopptr;
+    if((unsigned int)(orig->ptr) < (unsigned int)gcmarkedptrbound) {
+      // ask for more mem
+      gctomove = false;
+      BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
+      if(gcfindSpareMem_I(&gcmovestartaddr, &gcblock2fill, &gcdstcore,
+            gccurr_heaptop, BAMBOO_NUM_OF_CORE)) {
+        gctomove = true;
+      } else {
+        BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+        return false;
+      }
+      BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+    } else {
+      gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
+      gctomove = false;
+      return true;
+    }
+  } else {
+    if((unsigned int)(orig->ptr) < (unsigned int)gcmarkedptrbound) {
+      // ask for more mem
+      gctomove = false;
+      send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE,
+                 *filledblocks, *heaptopptr, gccurr_heaptop, false);
+    } else {
+      // finish compacting
+      send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE,
+                 *filledblocks, *heaptopptr, 0, false);
+    }
+  } 
+
+  if(orig->ptr < gcmarkedptrbound) {
+    // still have unpacked obj
+    while(true) {
+      if(gctomove) {
+        break;
+      }
+    }
+    ;
+    gctomove = false;
+
+    to->ptr = gcmovestartaddr;
+    to->numblocks = gcblock2fill - 1;
+    to->bound = ((to->numblocks==0)?BAMBOO_SMEM_SIZE_L:BAMBOO_SMEM_SIZE_L)
+      +BAMBOO_SMEM_SIZE*to->numblocks;
+    BASEPTR(gcdstcore, to->numblocks, &(to->base));
+    to->offset = to->ptr - to->base;
+    to->top = (to->numblocks==0)?(to->offset)
+      :(to->bound-BAMBOO_SMEM_SIZE+to->offset);
+    to->base = to->ptr;
+    to->offset = BAMBOO_CACHE_LINE_SIZE;
+    to->ptr += to->offset;   // for header
+    to->top += to->offset;
+    if(gcdstcore == BAMBOO_NUM_OF_CORE) {
+      *localcompact = true;
+    } else {
+      *localcompact = false;
+    }
+    CACHEADAPT_SAMPLING_DATA_REVISE_INIT();
+    goto innercompact;
+  }
+  return true;
+}
+
+INLINE void compact() {
+  if(COMPACTPHASE != gcphase) {
+    BAMBOO_EXIT(0xb025);
+  }
+
+  // initialize pointers for comapcting
+  struct moveHelper * orig = 
+    (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
+  struct moveHelper * to =
+    (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
+  if(!initOrig_Dst(orig, to)) {
+    // no available data to compact
+    // send compact finish msg to STARTUP core
+    send_msg_5(STARTUPCORE, GCFINISHCOMPACT, BAMBOO_NUM_OF_CORE,
+               0, to->base, 0, false);
+    RUNFREE(orig);
+    RUNFREE(to);
+    return;
+  }
+  CACHEADAPT_SAMPLING_DATA_REVISE_INIT();
+
+  unsigned int filledblocks = 0;
+  unsigned int heaptopptr = 0;
+  bool localcompact = true;
+  compacthelper(orig, to, &filledblocks, &heaptopptr, &localcompact);
+  RUNFREE(orig);
+  RUNFREE(to);
+} 
+
+INLINE void compact_master(struct moveHelper * orig,
+                           struct moveHelper * to) {
+  bool finalcompact = false;
+  // initialize pointers for comapcting
+  initOrig_Dst(orig, to);
+  CACHEADAPT_SAMPLING_DATA_REVISE_INIT();
+  int filledblocks = 0;
+  unsigned int heaptopptr = 0;
+  bool finishcompact = false;
+  bool iscontinue = true;
+  bool localcompact = true;
+  while((COMPACTPHASE == gcphase) || (SUBTLECOMPACTPHASE == gcphase)) {
+    if((!finishcompact) && iscontinue) {
+      finishcompact =
+        compacthelper(orig,to,&filledblocks,&heaptopptr,&localcompact);
+    }
+
+    BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
+    if(gc_checkCoreStatus_I()) {
+      // all cores have finished compacting
+      // restore the gcstatus of all cores
+      for(int i = 0; i < NUMCORES4GC; ++i) {
+        gccorestatus[i] = 1;
+      }
+      BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+      break;
+    } else {
+      BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+      // check if there are spare mem for pending move requires
+      if(COMPACTPHASE == gcphase) {
+        resolvePendingMoveRequest();
+      } else {
+        compact2Heaptop();
+      }
+    } 
+
+    if(gctomove) {
+      to->ptr = gcmovestartaddr;
+      to->numblocks = gcblock2fill - 1;
+      to->bound = (to->numblocks==0) ? BAMBOO_SMEM_SIZE_L :
+        BAMBOO_SMEM_SIZE_L+BAMBOO_SMEM_SIZE*to->numblocks;
+      BASEPTR(gcdstcore, to->numblocks, &(to->base));
+      to->offset = to->ptr - to->base;
+      to->top = (to->numblocks==0)?(to->offset):
+        (to->bound-BAMBOO_SMEM_SIZE+to->offset);
+      to->base = to->ptr;
+      to->offset = BAMBOO_CACHE_LINE_SIZE;
+      to->ptr += to->offset;  // for header
+      to->top += to->offset;
+      if(gcdstcore == BAMBOO_NUM_OF_CORE) {
+        localcompact = true;
+      } else {
+        localcompact = false;
+      }
+      gctomove = false;
+      iscontinue = true;
+    } else if(!finishcompact) {
+      // still pending
+      iscontinue = false;
+    }
+  }
+}
+
+#endif // MULTICORE_GC
diff --git a/Robust/src/Runtime/bamboo/multicoregccompact.h b/Robust/src/Runtime/bamboo/multicoregccompact.h
new file mode 100644 (file)
index 0000000..e930030
--- /dev/null
@@ -0,0 +1,34 @@
+#ifndef BAMBOO_MULTICORE_GC_COMPACT_H
+#define BAMBOO_MULTICORE_GC_COMPACT_H
+
+#ifdef MULTICORE_GC
+#include "multicore.h"
+
+struct moveHelper {
+  unsigned int numblocks;       // block num for heap
+  unsigned int base;       // base virtual address of current heap block
+  unsigned int ptr;       // virtual address of current heap top
+  unsigned int offset;       // offset in current heap block
+  unsigned int blockbase;   // virtual address of current small block to check
+  unsigned int blockbound;     // bound virtual address of current small blcok
+  unsigned int sblockindex;       // index of the small blocks
+  unsigned int top;       // real size of current heap block to check
+  unsigned int bound;       // bound size of current heap block to check
+};
+
+// compute the remaining size of block #b
+// p--relative position to the bottom of the shared heap
+#define GC_BLOCK_REMAIN_SIZE(b, p) \
+  ((b)<NUMCORES4GC)?((BAMBOO_SMEM_SIZE_L)-((p)%(BAMBOO_SMEM_SIZE_L))):((BAMBOO_SMEM_SIZE)-((p)%(BAMBOO_SMEM_SIZE)))
+
+INLINE bool gcfindSpareMem_I(unsigned int * startaddr,
+                             unsigned int * tomove,
+                             unsigned int * dstcore,
+                             unsigned int requiredmem,
+                             unsigned int requiredcore);
+INLINE void compact();
+INLINE void compact_master(struct moveHelper * orig,
+                           struct moveHelper * to);
+#endif // MULTICORE_GC
+
+#endif // BAMBOO_MULTICORE_GC_COMPACT_H
diff --git a/Robust/src/Runtime/bamboo/multicoregcflush.c b/Robust/src/Runtime/bamboo/multicoregcflush.c
new file mode 100644 (file)
index 0000000..ae22542
--- /dev/null
@@ -0,0 +1,293 @@
+#ifdef MULTICORE_GC
+#include "multicoregcflush.h"
+#include "multicoreruntime.h"
+#include "ObjectHash.h"
+#include "GenericHashtable.h"
+
+extern int corenum;
+#ifdef TASK
+extern struct parameterwrapper ** objectqueues[][NUMCLASSES];
+extern int numqueues[][NUMCLASSES];
+extern struct genhashtable * activetasks;
+extern struct parameterwrapper ** objectqueues[][NUMCLASSES];
+extern struct taskparamdescriptor *currtpd;
+extern struct LockValue runtime_locks[MAXTASKPARAMS];
+extern int runtime_locklen;
+#endif
+
+extern struct global_defs_t * global_defs_p;
+
+#ifdef SMEMM
+extern unsigned int gcmem_mixed_threshold;
+extern unsigned int gcmem_mixed_usedmem;
+#endif
+
+#ifdef MGC
+extern struct lockvector bamboo_threadlocks;
+#endif
+
+extern struct pointerblock *gchead;
+extern int gcheadindex;
+extern struct pointerblock *gctail;
+extern int gctailindex;
+extern struct pointerblock *gctail2;
+extern int gctailindex2;
+extern struct pointerblock *gcspare;
+
+extern struct lobjpointerblock *gclobjhead;
+extern int gclobjheadindex;
+extern struct lobjpointerblock *gclobjtail;
+extern int gclobjtailindex;
+extern struct lobjpointerblock *gclobjtail2;
+extern int gclobjtailindex2;
+extern struct lobjpointerblock *gclobjspare;
+
+// NOTE: the objptr should not be NULL and should not be non shared ptr
+INLINE void * flushObj(void * objptr, int linenum, void * ptr, int tt) {
+  void * dstptr = gcmappingtbl[OBJMAPPINGINDEX((unsigned int)objptr)];
+  return dstptr;
+}
+
+INLINE void flushRuntimeObj(struct garbagelist * stackptr) {
+  int i,j;
+  // flush current stack
+  while(stackptr!=NULL) {
+    for(i=0; i<stackptr->size; i++) {
+      if(stackptr->array[i] != NULL) {
+        stackptr->array[i] = 
+          flushObj(stackptr->array[i], __LINE__, stackptr->array[i], i);
+      }
+    }
+    stackptr=stackptr->next;
+  }
+
+  // flush static pointers global_defs_p
+  if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
+    struct garbagelist * staticptr=(struct garbagelist *)global_defs_p;
+    for(i=0; i<staticptr->size; i++) {
+      if(staticptr->array[i] != NULL) {
+        staticptr->array[i] = 
+          flushObj(staticptr->array[i], __LINE__, staticptr->array[i], i);
+      }
+    }
+  }
+
+#ifdef TASK
+  // flush objectsets
+  if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
+    for(i=0; i<NUMCLASSES; i++) {
+      struct parameterwrapper ** queues = objectqueues[BAMBOO_NUM_OF_CORE][i];
+      int length = numqueues[BAMBOO_NUM_OF_CORE][i];
+      for(j = 0; j < length; ++j) {
+        struct parameterwrapper * parameter = queues[j];
+        struct ObjectHash * set=parameter->objectset;
+        struct ObjectNode * ptr=set->listhead;
+        while(ptr!=NULL) {
+          ptr->key = flushObj((void *)ptr->key, __LINE__, (void *)ptr->key, 0);
+          ptr=ptr->lnext;
+        }
+        ObjectHashrehash(set);
+      }
+    }
+  }
+
+  // flush current task descriptor
+  if(currtpd != NULL) {
+    for(i=0; i<currtpd->numParameters; i++) {
+      // the parameter can not be NULL
+      currtpd->parameterArray[i] = flushObj(currtpd->parameterArray[i], 
+          __LINE__, currtpd->parameterArray[i], i);
+    }
+  }
+
+  // flush active tasks
+  if(activetasks != NULL) {
+    struct genpointerlist * ptr=activetasks->list;
+    while(ptr!=NULL) {
+      struct taskparamdescriptor *tpd=ptr->src;
+      int i;
+      for(i=0; i<tpd->numParameters; i++) {
+        // the parameter can not be NULL
+        tpd->parameterArray[i] = 
+          flushObj(tpd->parameterArray[i], __LINE__, tpd->parameterArray[i], i);
+      }
+      ptr=ptr->inext;
+    }
+    genrehash(activetasks);
+  }
+
+  // flush cached transferred obj
+  struct QueueItem * tmpobjptr =  getHead(&objqueue);
+  while(tmpobjptr != NULL) {
+    struct transObjInfo * objInfo=(struct transObjInfo *)(tmpobjptr->objectptr);
+    // the obj can not be NULL
+    objInfo->objptr = flushObj(objInfo->objptr, __LINE__, objInfo->objptr, 0);
+    tmpobjptr = getNextQueueItem(tmpobjptr);
+  }
+
+  // flush cached objs to be transferred
+  struct QueueItem * item = getHead(totransobjqueue);
+  while(item != NULL) {
+    struct transObjInfo * totransobj = (struct transObjInfo *)(item->objectptr);
+    // the obj can not be NULL
+    totransobj->objptr = 
+      flushObj(totransobj->objptr, __LINE__, totransobj->objptr, 0);
+    item = getNextQueueItem(item);
+  }  
+
+  // enqueue lock related info
+  for(i = 0; i < runtime_locklen; ++i) {
+    if(runtime_locks[i].redirectlock != NULL) {
+      runtime_locks[i].redirectlock = flushObj(runtime_locks[i].redirectlock, 
+          __LINE__, runtime_locks[i].redirectlock, i);
+    }
+    if(runtime_locks[i].value != NULL) {
+      runtime_locks[i].value = flushObj(runtime_locks[i].value, 
+          __LINE__, runtime_locks[i].value, i);
+    }
+  }
+#endif
+
+#ifdef MGC
+  // flush the bamboo_threadlocks
+  for(i = 0; i < bamboo_threadlocks.index; i++) {
+    // the locked obj can not be NULL
+    bamboo_threadlocks.locks[i].object = 
+      flushObj((void *)(bamboo_threadlocks.locks[i].object), 
+          __LINE__, (void *)(bamboo_threadlocks.locks[i].object), i);
+  }
+
+  // flush the bamboo_current_thread
+  if(bamboo_current_thread != 0) {
+    bamboo_current_thread = 
+      (unsigned int)(flushObj((void *)bamboo_current_thread, 
+            __LINE__, (void *)bamboo_current_thread, 0));
+  }
+
+  // flush global thread queue
+  if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
+    unsigned int thread_counter = *((unsigned int*)(bamboo_thread_queue+1));
+    if(thread_counter > 0) {
+      unsigned int start = *((unsigned int*)(bamboo_thread_queue+2));
+      for(i = thread_counter; i > 0; i--) {
+        // the thread obj can not be NULL
+        bamboo_thread_queue[4+start] = 
+          (INTPTR)(flushObj((void *)bamboo_thread_queue[4+start], 
+                __LINE__, (void *)bamboo_thread_queue, 0));
+        start = (start+1)&bamboo_max_thread_num_mask;
+      }
+    }
+    unlockthreadqueue();
+  }
+#endif
+}
+
+INLINE void flushPtrsInObj(void * ptr) {
+  int type = ((int *)(ptr))[0];
+  // scan all pointers in ptr
+  unsigned int * pointer;
+  pointer=pointerarray[type];
+  if (pointer==0) {
+    /* Array of primitives */
+    pointer=pointerarray[OBJECTTYPE];
+    //handle object class
+    unsigned int size=pointer[0];
+    int i;
+    for(i=1; i<=size; i++) {
+      unsigned int offset=pointer[i];
+      void * objptr=*((void **)(((char *)ptr)+offset));
+      if(objptr != NULL) {
+        *((void **)(((char *)ptr)+offset)) = flushObj(objptr, __LINE__, ptr, i);
+      }
+    }
+  } else if (((unsigned int)pointer)==1) {
+    /* Array of pointers */
+    struct ArrayObject *ao=(struct ArrayObject *) ptr;
+    int length=ao->___length___;
+    int j;
+    for(j=0; j<length; j++) {
+      void *objptr=((void **)(((char *)&ao->___length___)+sizeof(int)))[j];
+      if(objptr != NULL) {
+        ((void **)(((char *)&ao->___length___)+sizeof(int)))[j] = 
+          flushObj(objptr, __LINE__, ptr, j);
+      }
+    }
+    {
+      pointer=pointerarray[OBJECTTYPE];
+      //handle object class
+      unsigned int size=pointer[0];
+      int i;
+      for(i=1; i<=size; i++) {
+        unsigned int offset=pointer[i];     
+        void * objptr=*((void **)(((char *)ptr)+offset));
+        if(objptr != NULL) {
+          *((void **)(((char *)ptr)+offset)) = 
+            flushObj(objptr, __LINE__, ptr, i);
+        }
+      }
+    }
+  } else {
+    unsigned int size=pointer[0];
+    int i;
+    for(i=1; i<=size; i++) {
+      unsigned int offset=pointer[i];
+      void * objptr=*((void **)(((char *)ptr)+offset));
+      if(objptr != NULL) {
+        *((void **)(((char *)ptr)+offset)) = flushObj(objptr, __LINE__, ptr, i);
+      }
+    } 
+  }  
+}
+
+INLINE void flush(struct garbagelist * stackptr) {
+
+  BAMBOO_CACHE_MF();
+
+  flushRuntimeObj(stackptr);
+
+  while(true) {
+    BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
+    if(!gc_moreItems_I()) {
+      BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+      break;
+    }
+
+    unsigned int ptr = gc_dequeue_I();
+    BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+    // should be a local shared obj and should have mapping info
+    ptr = flushObj(ptr, __LINE__, ptr, 0);
+    if(ptr == NULL) {
+      BAMBOO_EXIT(0xb02a);
+    }
+    if(((struct ___Object___ *)ptr)->marked == COMPACTED) {
+      flushPtrsInObj((void *)ptr);
+      // restore the mark field, indicating that this obj has been flushed
+      ((struct ___Object___ *)ptr)->marked = INIT;
+    }
+  } 
+
+  // TODO bug here: the startup core contains all lobjs' info, thus all the
+  // lobjs are flushed in sequence.
+  // flush lobjs
+  while(gc_lobjmoreItems_I()) {
+    unsigned int ptr = gc_lobjdequeue_I(NULL, NULL);
+    ptr = flushObj(ptr, __LINE__, ptr, 0);
+    if(ptr == NULL) {
+      BAMBOO_EXIT(0xb02d);
+    }
+    if(((struct ___Object___ *)ptr)->marked == COMPACTED) {
+      flushPtrsInObj((void *)ptr);
+      // restore the mark field, indicating that this obj has been flushed
+      ((struct ___Object___ *)ptr)->marked = INIT;
+    }     
+  } 
+
+  // send flush finish message to core coordinator
+  if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
+    gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
+  } else {
+    send_msg_2(STARTUPCORE, GCFINISHFLUSH, BAMBOO_NUM_OF_CORE, false);
+  }
+} 
+
+#endif // MULTICORE_GC
diff --git a/Robust/src/Runtime/bamboo/multicoregcflush.h b/Robust/src/Runtime/bamboo/multicoregcflush.h
new file mode 100644 (file)
index 0000000..c13336d
--- /dev/null
@@ -0,0 +1,10 @@
+#ifndef BAMBOO_MULTICORE_GC_FLUSH_H
+#define BAMBOO_MULTICORE_GC_FLUSH_H
+
+#ifdef MULTICORE_GC
+#include "multicore.h"
+#include "runtime.h"
+
+INLINE void flush(struct garbagelist * stackptr);
+#endif // MULTICORE_GC
+#endif // BAMBOO_MULTICORE_GC_FLUSH_H
diff --git a/Robust/src/Runtime/bamboo/multicoregcmark.c b/Robust/src/Runtime/bamboo/multicoregcmark.c
new file mode 100644 (file)
index 0000000..9752f5f
--- /dev/null
@@ -0,0 +1,546 @@
+#ifdef MULTICORE_GC
+#include "multicoregcmark.h"
+#include "runtime.h"
+#include "multicoreruntime.h"
+#include "GenericHashtable.h"
+
+extern int corenum;
+#ifdef TASK
+extern struct parameterwrapper ** objectqueues[][NUMCLASSES];
+extern int numqueues[][NUMCLASSES];
+extern struct genhashtable * activetasks;
+extern struct parameterwrapper ** objectqueues[][NUMCLASSES];
+extern struct taskparamdescriptor *currtpd;
+extern struct LockValue runtime_locks[MAXTASKPARAMS];
+extern int runtime_locklen;
+#endif
+
+extern struct global_defs_t * global_defs_p;
+
+#ifdef SMEMM
+extern unsigned int gcmem_mixed_threshold;
+extern unsigned int gcmem_mixed_usedmem;
+#endif
+
+#ifdef MGC
+extern struct lockvector bamboo_threadlocks;
+#endif
+
+extern struct pointerblock *gchead;
+extern int gcheadindex;
+extern struct pointerblock *gctail;
+extern int gctailindex;
+extern struct pointerblock *gctail2;
+extern int gctailindex2;
+extern struct pointerblock *gcspare;
+
+extern struct lobjpointerblock *gclobjhead;
+extern int gclobjheadindex;
+extern struct lobjpointerblock *gclobjtail;
+extern int gclobjtailindex;
+extern struct lobjpointerblock *gclobjtail2;
+extern int gclobjtailindex2;
+extern struct lobjpointerblock *gclobjspare;
+
+// should be invoked with interruption closed
+INLINE void gc_enqueue_I(unsigned int ptr) {
+  if (gcheadindex==NUMPTRS) {
+    struct pointerblock * tmp;
+    if (gcspare!=NULL) {
+      tmp=gcspare;
+      gcspare=NULL;
+      tmp->next = NULL;
+    } else {
+      tmp=RUNMALLOC_I(sizeof(struct pointerblock));
+    } 
+    gchead->next=tmp;
+    gchead=tmp;
+    gcheadindex=0;
+  } 
+  gchead->ptrs[gcheadindex++]=ptr;
+} 
+
+// dequeue and destroy the queue
+INLINE unsigned int gc_dequeue_I() {
+  if (gctailindex==NUMPTRS) {
+    struct pointerblock *tmp=gctail;
+    gctail=gctail->next;
+    gctailindex=0;
+    if (gcspare!=NULL) {
+      RUNFREE_I(tmp);
+    } else {
+      gcspare=tmp;
+      gcspare->next = NULL;
+    } 
+  } 
+  int loopiter = 0;
+  return gctail->ptrs[gctailindex++];
+} 
+
+// dequeue and do not destroy the queue
+INLINE unsigned int gc_dequeue2_I() {
+  if (gctailindex2==NUMPTRS) {
+    struct pointerblock *tmp=gctail2;
+    gctail2=gctail2->next;
+    gctailindex2=0;
+  } 
+  int loopiter = 0;
+  return gctail2->ptrs[gctailindex2++];
+}
+
+INLINE int gc_moreItems_I() {
+  return !((gchead==gctail)&&(gctailindex==gcheadindex));
+} 
+
+INLINE int gc_moreItems2_I() {
+  return !((gchead==gctail2)&&(gctailindex2==gcheadindex));
+} 
+
+// should be invoked with interruption closed 
+// enqueue a large obj: start addr & length
+INLINE void gc_lobjenqueue_I(unsigned int ptr,
+                             unsigned int length,
+                             unsigned int host) {
+  if (gclobjheadindex==NUMLOBJPTRS) {
+    struct lobjpointerblock * tmp;
+    if (gclobjspare!=NULL) {
+      tmp=gclobjspare;
+      gclobjspare=NULL;
+      tmp->next = NULL;
+      tmp->prev = NULL;
+    } else {
+      tmp=RUNMALLOC_I(sizeof(struct lobjpointerblock));
+    }  
+    gclobjhead->next=tmp;
+    tmp->prev = gclobjhead;
+    gclobjhead=tmp;
+    gclobjheadindex=0;
+  } 
+  gclobjhead->lobjs[gclobjheadindex]=ptr;
+  gclobjhead->lengths[gclobjheadindex]=length;
+  gclobjhead->hosts[gclobjheadindex++]=host;
+} 
+
+// dequeue and destroy the queue
+INLINE unsigned int gc_lobjdequeue_I(unsigned int * length,
+                                     unsigned int * host) {
+  if (gclobjtailindex==NUMLOBJPTRS) {
+    struct lobjpointerblock *tmp=gclobjtail;
+    gclobjtail=gclobjtail->next;
+    gclobjtailindex=0;
+    gclobjtail->prev = NULL;
+    if (gclobjspare!=NULL) {
+      RUNFREE_I(tmp);
+    } else {
+      gclobjspare=tmp;
+      tmp->next = NULL;
+      tmp->prev = NULL;
+    }  
+  } 
+  if(length != NULL) {
+    *length = gclobjtail->lengths[gclobjtailindex];
+  }
+  if(host != NULL) {
+    *host = (unsigned int)(gclobjtail->hosts[gclobjtailindex]);
+  }
+  return gclobjtail->lobjs[gclobjtailindex++];
+} 
+
+INLINE int gc_lobjmoreItems_I() {
+  return !((gclobjhead==gclobjtail)&&(gclobjtailindex==gclobjheadindex));
+} 
+
+// dequeue and don't destroy the queue
+INLINE void gc_lobjdequeue2_I() {
+  if (gclobjtailindex2==NUMLOBJPTRS) {
+    gclobjtail2=gclobjtail2->next;
+    gclobjtailindex2=1;
+  } else {
+    gclobjtailindex2++;
+  }  
+}
+
+INLINE int gc_lobjmoreItems2_I() {
+  return !((gclobjhead==gclobjtail2)&&(gclobjtailindex2==gclobjheadindex));
+} 
+
+// 'reversly' dequeue and don't destroy the queue
+INLINE void gc_lobjdequeue3_I() {
+  if (gclobjtailindex2==0) {
+    gclobjtail2=gclobjtail2->prev;
+    gclobjtailindex2=NUMLOBJPTRS-1;
+  } else {
+    gclobjtailindex2--;
+  }  
+}
+
+INLINE int gc_lobjmoreItems3_I() {
+  return !((gclobjtail==gclobjtail2)&&(gclobjtailindex2==gclobjtailindex));
+} 
+
+INLINE void gc_lobjqueueinit4_I() {
+  gclobjtail2 = gclobjtail;
+  gclobjtailindex2 = gclobjtailindex;
+} 
+
+INLINE unsigned int gc_lobjdequeue4_I(unsigned int * length,
+                                      unsigned int * host) {
+  if (gclobjtailindex2==NUMLOBJPTRS) {
+    gclobjtail2=gclobjtail2->next;
+    gclobjtailindex2=0;
+  } 
+  if(length != NULL) {
+    *length = gclobjtail2->lengths[gclobjtailindex2];
+  }
+  if(host != NULL) {
+    *host = (unsigned int)(gclobjtail2->hosts[gclobjtailindex2]);
+  }
+  return gclobjtail2->lobjs[gclobjtailindex2++];
+} 
+
+INLINE int gc_lobjmoreItems4_I() {
+  return !((gclobjhead==gclobjtail2)&&(gclobjtailindex2==gclobjheadindex));
+}
+
+INLINE void gettype_size(void * ptr,
+                         int * ttype,
+                         unsigned int * tsize) {
+  int type = ((int *)ptr)[0];
+  unsigned int size = 0;
+  if(type < NUMCLASSES) {
+    // a normal object
+    size = classsize[type];
+  } else {
+    // an array
+    struct ArrayObject *ao=(struct ArrayObject *)ptr;
+    unsigned int elementsize=classsize[type];
+    unsigned int length=ao->___length___;
+    size=sizeof(struct ArrayObject)+length*elementsize;
+  } 
+  *ttype = type;
+  *tsize = size;
+}
+
+INLINE bool isLarge(void * ptr,
+                    int * ttype,
+                    unsigned int * tsize) {
+  // check if a pointer is referring to a large object
+  gettype_size(ptr, ttype, tsize);
+  unsigned int bound = (BAMBOO_SMEM_SIZE);
+  if(((unsigned int)ptr-gcbaseva) < (BAMBOO_LARGE_SMEM_BOUND)) {
+    bound = (BAMBOO_SMEM_SIZE_L);
+  }
+  // ptr is a start of a block  OR it acrosses the boundary of current block
+  return (((((unsigned int)ptr-gcbaseva)%(bound))==0)||
+      ((bound-(((unsigned int)ptr-gcbaseva)%bound)) < (*tsize)));
+} 
+
+INLINE unsigned int hostcore(void * ptr) {
+  // check the host core of ptr
+  unsigned int host = 0;
+  RESIDECORE(ptr, &host);
+  return host;
+} 
+
+// NOTE: the objptr should not be NULL and should be a shared obj
+INLINE void markObj(void * objptr, int linenum, void * ptr, int ii) {
+  unsigned int host = hostcore(objptr);
+  if(BAMBOO_NUM_OF_CORE == host) {
+    // on this core
+    BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
+    if(((struct ___Object___ *)objptr)->marked == INIT) {
+      // this is the first time that this object is discovered,
+      // set the flag as DISCOVERED
+      ((struct ___Object___ *)objptr)->marked = DISCOVERED;
+      BAMBOO_CACHE_FLUSH_LINE(objptr);
+      gc_enqueue_I(objptr);
+    }
+    BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+  } else {
+    // check if this obj has been forwarded
+    if(!MGCHashcontains(gcforwardobjtbl, (int)objptr)) {
+      // send a msg to host informing that objptr is active
+      send_msg_2(host, GCMARKEDOBJ, objptr, false);
+      GCPROFILE_RECORD_FORWARD_OBJ();
+      gcself_numsendobjs++;
+      MGCHashadd(gcforwardobjtbl, (int)objptr);
+    }
+  }
+} 
+
+// enqueue root objs
+INLINE void tomark(struct garbagelist * stackptr) {
+  if(MARKPHASE != gcphase) {
+    BAMBOO_EXIT(0xb010);
+  }
+  gcbusystatus = true;
+  gcnumlobjs = 0;
+
+  int i,j;
+  // enqueue current stack
+  while(stackptr!=NULL) {
+    for(i=0; i<stackptr->size; i++) {
+      if(stackptr->array[i] != NULL) {
+        markObj(stackptr->array[i], __LINE__, stackptr->array[i], i);
+      }
+    }
+    stackptr=stackptr->next;
+  }
+
+  // enqueue static pointers global_defs_p
+  if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
+    struct garbagelist * staticptr=(struct garbagelist *)global_defs_p;
+    while(staticptr != NULL) {
+      for(i=0; i<staticptr->size; i++) {
+        if(staticptr->array[i] != NULL) {
+          markObj(staticptr->array[i], __LINE__, staticptr->array[i], i);
+        }
+      }
+      staticptr = staticptr->next;
+    }
+  }
+
+#ifdef TASK
+  // enqueue objectsets
+  if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
+    for(i=0; i<NUMCLASSES; i++) {
+      struct parameterwrapper ** queues = objectqueues[BAMBOO_NUM_OF_CORE][i];
+      int length = numqueues[BAMBOO_NUM_OF_CORE][i];
+      for(j = 0; j < length; ++j) {
+        struct parameterwrapper * parameter = queues[j];
+        struct ObjectHash * set=parameter->objectset;
+        struct ObjectNode * ptr=set->listhead;
+        while(ptr!=NULL) {
+          markObj((void *)ptr->key, __LINE__, ptr, 0);
+          ptr=ptr->lnext;
+        }
+      }
+    }
+  }
+
+  // euqueue current task descriptor
+  if(currtpd != NULL) {
+    for(i=0; i<currtpd->numParameters; i++) {
+      // currtpd->parameterArray[i] can not be NULL
+      markObj(currtpd->parameterArray[i],__LINE__,currtpd->parameterArray[i],i);
+    }
+  }
+
+  // euqueue active tasks
+  if(activetasks != NULL) {
+    struct genpointerlist * ptr=activetasks->list;
+    while(ptr!=NULL) {
+      struct taskparamdescriptor *tpd=ptr->src;
+      int i;
+      for(i=0; i<tpd->numParameters; i++) {
+        // the tpd->parameterArray[i] can not be NULL
+        markObj(tpd->parameterArray[i], __LINE__, tpd->parameterArray[i], i);
+      }
+      ptr=ptr->inext;
+    }
+  }
+
+  // enqueue cached transferred obj
+  struct QueueItem * tmpobjptr =  getHead(&objqueue);
+  while(tmpobjptr != NULL) {
+    struct transObjInfo * objInfo=(struct transObjInfo *)(tmpobjptr->objectptr);
+    // the objptr can not be NULL
+    markObj(objInfo->objptr, __LINE__, objInfo->objptr, 0);
+    tmpobjptr = getNextQueueItem(tmpobjptr);
+  }
+
+  // enqueue cached objs to be transferred
+  struct QueueItem * item = getHead(totransobjqueue);
+  while(item != NULL) {
+    struct transObjInfo * totransobj=(struct transObjInfo *)(item->objectptr);
+    // the objptr can not be NULL
+    markObj(totransobj->objptr, __LINE__, totransobj->objptr, 0);
+    item = getNextQueueItem(item);
+  } // while(item != NULL)
+
+  // enqueue lock related info
+  for(i = 0; i < runtime_locklen; ++i) {
+    if(runtime_locks[i].redirectlock != NULL) {
+      markObj((void *)(runtime_locks[i].redirectlock), __LINE__, 
+          (void *)(runtime_locks[i].redirectlock), 0);
+    }
+    if(runtime_locks[i].value != NULL) {
+      markObj((void *)(runtime_locks[i].value), __LINE__, 
+          (void *)(runtime_locks[i].value), i);
+    }
+  }
+#endif 
+
+#ifdef MGC
+  // enqueue global thread queue
+  if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
+    lockthreadqueue();
+    unsigned int thread_counter = *((unsigned int*)(bamboo_thread_queue+1));
+    if(thread_counter > 0) {
+      unsigned int start = *((unsigned int*)(bamboo_thread_queue+2));
+      for(i = thread_counter; i > 0; i--) {
+        // the thread obj can not be NULL
+        markObj((void *)bamboo_thread_queue[4+start], __LINE__,
+            (void *)bamboo_thread_queue[4+start], 0);
+        start = (start+1)&bamboo_max_thread_num_mask;
+      }
+    }
+  }
+
+  // enqueue the bamboo_threadlocks
+  for(i = 0; i < bamboo_threadlocks.index; i++) {
+    // the locks can not be NULL
+    markObj((void *)(bamboo_threadlocks.locks[i].object), __LINE__,
+        (void *)(bamboo_threadlocks.locks[i].object), i);
+  }
+
+  // enqueue the bamboo_current_thread
+  if(bamboo_current_thread != 0) {
+    markObj((void *)bamboo_current_thread, __LINE__, 
+        (void *)bamboo_current_thread, 0);
+  }
+#endif
+}
+
+INLINE void scanPtrsInObj(void * ptr,
+                          int type) {
+  // scan all pointers in ptr
+  unsigned int * pointer;
+  pointer=pointerarray[type];
+  if (pointer==0) {
+    /* Array of primitives */
+    pointer=pointerarray[OBJECTTYPE];
+    //handle object class
+    unsigned int size=pointer[0];
+    int i;
+    for(i=1; i<=size; i++) {
+      unsigned int offset=pointer[i];
+      void * objptr=*((void **)(((char *)ptr)+offset));
+      if(objptr != NULL) {
+        markObj(objptr, __LINE__, ptr, i);
+      }
+    }
+  } else if (((unsigned int)pointer)==1) {
+    /* Array of pointers */
+    struct ArrayObject *ao=(struct ArrayObject *) ptr;
+    int length=ao->___length___;
+    int j;
+    for(j=0; j<length; j++) {
+      void *objptr=((void **)(((char *)&ao->___length___)+sizeof(int)))[j];
+      if(objptr != NULL) {
+        markObj(objptr, __LINE__, ptr, j);
+      }
+    }
+    {
+      pointer=pointerarray[OBJECTTYPE];
+      //handle object class
+      unsigned int size=pointer[0];
+      int i;
+      for(i=1; i<=size; i++) {
+        unsigned int offset=pointer[i];
+        void * objptr=*((void **)(((char *)ptr)+offset));
+        if(objptr != NULL) {
+          markObj(objptr, __LINE__, ptr, i);
+        }
+     }
+    }
+  } else {
+    unsigned int size=pointer[0];
+    int i;
+    for(i=1; i<=size; i++) {
+      unsigned int offset=pointer[i];
+      void * objptr=*((void **)(((char *)ptr)+offset));
+      if(objptr != NULL) {
+        markObj(objptr, __LINE__, ptr, i);
+      }
+    }
+  }
+}
+
+INLINE void mark(bool isfirst,
+                 struct garbagelist * stackptr) {
+  if(isfirst) {
+    // enqueue root objs
+    tomark(stackptr);
+    gccurr_heaptop = 0; // record the size of all active objs in this core
+                        // aligned but does not consider block boundaries
+    gcmarkedptrbound = 0;
+  }
+  unsigned int isize = 0;
+  bool sendStall = false;
+  // mark phase
+  while(MARKPHASE == gcphase) {
+    while(true) {
+      BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
+      if(!gc_moreItems2_I()) {
+        BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+        break;
+      }
+      sendStall = false;
+      gcbusystatus = true;
+      unsigned int ptr = gc_dequeue2_I();
+      BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+
+      unsigned int size = 0;
+      unsigned int isize = 0;
+      unsigned int type = 0;
+      // check if it is a local obj on this core
+      if(((struct ___Object___ *)ptr)->marked!=DISCOVERED) {
+        // ptr has been marked
+        continue;
+      } else if(isLarge(ptr, &type, &size)) {
+        // ptr is a large object and not marked or enqueued
+        BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
+        gc_lobjenqueue_I(ptr, size, BAMBOO_NUM_OF_CORE);
+        gcnumlobjs++;
+        BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+        // mark this obj
+        ((struct ___Object___ *)ptr)->marked = MARKED;
+        BAMBOO_CACHE_FLUSH_LINE(ptr);
+      } else {
+        // ptr is an unmarked active object on this core
+        ALIGNSIZE(size, &isize);
+        gccurr_heaptop += isize;
+        // mark this obj
+        ((struct ___Object___ *)ptr)->marked = MARKED;
+        BAMBOO_CACHE_FLUSH_LINE(ptr);
+
+        if((unsigned int)(ptr + size) > (unsigned int)gcmarkedptrbound) {
+          gcmarkedptrbound = (unsigned int)(ptr + size);
+        }
+      }
+
+      scanPtrsInObj(ptr, type);      
+    }   
+    gcbusystatus = false;
+    // send mark finish msg to core coordinator
+    if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
+      int entry_index = 0;
+      if(waitconfirm)  {
+        // phase 2
+        entry_index = (gcnumsrobjs_index == 0) ? 1 : 0;
+      } else {
+        // phase 1
+        entry_index = gcnumsrobjs_index;
+      }
+      gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
+      gcnumsendobjs[entry_index][BAMBOO_NUM_OF_CORE]=gcself_numsendobjs;
+      gcnumreceiveobjs[entry_index][BAMBOO_NUM_OF_CORE]=gcself_numreceiveobjs;
+      gcloads[BAMBOO_NUM_OF_CORE] = gccurr_heaptop;
+    } else {
+      if(!sendStall) {
+        send_msg_4(STARTUPCORE, GCFINISHMARK, BAMBOO_NUM_OF_CORE,
+            gcself_numsendobjs, gcself_numreceiveobjs, false);
+        sendStall = true;
+      }
+    }
+
+    if(BAMBOO_NUM_OF_CORE == STARTUPCORE) {
+      return;
+    }
+  } 
+
+  BAMBOO_CACHE_MF();
+} 
+
+#endif // MULTICORE_GC
diff --git a/Robust/src/Runtime/bamboo/multicoregcmark.h b/Robust/src/Runtime/bamboo/multicoregcmark.h
new file mode 100644 (file)
index 0000000..f858819
--- /dev/null
@@ -0,0 +1,39 @@
+#ifndef BABMOO_MULTICORE_GC_MARK_H
+#define BAMBOO_MULTICORE_GC_MARK_H
+#ifdef MULTICORE_GC
+#include "multicore.h"
+
+#define NUMPTRS 120
+
+struct pointerblock {
+  unsigned int ptrs[NUMPTRS];
+  struct pointerblock *next;
+};
+
+#define NUMLOBJPTRS 20
+
+struct lobjpointerblock {
+  unsigned int lobjs[NUMLOBJPTRS];
+  int lengths[NUMLOBJPTRS];
+  int hosts[NUMLOBJPTRS];
+  struct lobjpointerblock *next;
+  struct lobjpointerblock *prev;
+};
+
+INLINE void gc_enqueue_I(unsigned int ptr);
+INLINE unsigned int gc_dequeue_I();
+INLINE void gc_lobjenqueue_I(unsigned int ptr, 
+                             unsigned int length, 
+                             unsigned int host);
+INLINE int gc_lobjmoreItems_I();
+INLINE void gc_lobjdequeue2_I();
+INLINE int gc_lobjmoreItems2_I();
+INLINE void gc_lobjdequeue3_I();
+INLINE int gc_lobjmoreItems3_I();
+INLINE void gc_lobjqueueinit4();
+INLINE unsigned int gc_lobjdequeue4(unsigned int * length, 
+                                    unsigned int * host);
+INLINE int gc_lobjmoreItems4();
+
+#endif // MULTICORE_GC
+#endif // BAMBOO_MULTICORE_GC_MARK_H
diff --git a/Robust/src/Runtime/bamboo/multicoregcprofile.c b/Robust/src/Runtime/bamboo/multicoregcprofile.c
new file mode 100644 (file)
index 0000000..6391199
--- /dev/null
@@ -0,0 +1,119 @@
+#ifdef MULTICORE_GC
+#ifdef GC_PROFILE
+#include "multicoregcprofile.h"
+#include "runtime_arch.h"
+#include "structdefs.h"
+#include "mem.h"
+
+extern int corenum;
+
+INLINE void initmulticoregcprofiledata() {
+  if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
+    // startup core to initialize corestatus[]
+    gc_infoIndex = 0;
+    gc_infoOverflow = false;
+    gc_num_livespace = 0;
+    gc_num_freespace = 0;
+  }
+  gc_num_obj = 0;
+  gc_num_liveobj = 0;
+  gc_num_forwardobj = 0;
+  gc_num_profiles = NUMCORESACTIVE - 1;
+}
+
+INLINE void gc_profileInit() {
+  gc_num_livespace = 0;
+  gc_num_freespace = 0;
+  gc_num_lobj = 0;
+  gc_num_lobjspace = 0;
+  gc_num_liveobj = 0;
+  gc_num_forwardobj = 0;
+  gc_num_profiles = NUMCORESACTIVE - 1;
+}
+
+INLINE void gc_profileStart(void) {
+  if(!gc_infoOverflow) {
+    GCInfo* gcInfo = RUNMALLOC(sizeof(struct gc_info));
+    gc_infoArray[gc_infoIndex] = gcInfo;
+    gcInfo->index = 1;
+    gcInfo->time[0] = BAMBOO_GET_EXE_TIME();
+  }
+}
+
+INLINE void gc_profileItem(void) {
+  if(!gc_infoOverflow) {
+    GCInfo* gcInfo = gc_infoArray[gc_infoIndex];
+    gcInfo->time[gcInfo->index++] = BAMBOO_GET_EXE_TIME();
+  }
+}
+
+INLINE void gc_profileEnd(void) {
+  if(!gc_infoOverflow) {
+    GCInfo* gcInfo = gc_infoArray[gc_infoIndex];
+    gcInfo->time[gcInfo->index++] = BAMBOO_GET_EXE_TIME();
+    gcInfo->time[gcInfo->index++] = gc_num_livespace;
+    gcInfo->time[gcInfo->index++] = gc_num_freespace;
+    gcInfo->time[gcInfo->index++] = gc_num_lobj;
+    gcInfo->time[gcInfo->index++] = gc_num_lobjspace;
+    gcInfo->time[gcInfo->index++] = gc_num_obj;
+    gcInfo->time[gcInfo->index++] = gc_num_liveobj;
+    gcInfo->time[gcInfo->index++] = gc_num_forwardobj;
+    gc_infoIndex++;
+    if(gc_infoIndex == GCINFOLENGTH) {
+      gc_infoOverflow = true;
+    }
+  }
+}
+
+// output the profiling data
+void gc_outputProfileData() {
+  int i = 0;
+  int j = 0;
+  unsigned long long totalgc = 0;
+
+#ifndef BAMBOO_MEMPROF
+  BAMBOO_PRINT(0xdddd);
+#endif
+  // output task related info
+  for(i= 0; i < gc_infoIndex; i++) {
+    GCInfo * gcInfo = gc_infoArray[i];
+#ifdef BAMBOO_MEMPROF
+    unsigned long long tmp=gcInfo->time[gcInfo->index-8]-gcInfo->time[0]; //0;
+#else
+    unsigned long long tmp = 0;
+    BAMBOO_PRINT(0xddda);
+    for(j = 0; j < gcInfo->index - 7; j++) {
+      BAMBOO_PRINT(gcInfo->time[j]);
+      BAMBOO_PRINT(gcInfo->time[j]-tmp);
+      BAMBOO_PRINT(0xdddb);
+      tmp = gcInfo->time[j];
+    }
+    tmp = (tmp-gcInfo->time[0]);
+    BAMBOO_PRINT_REG(tmp);
+    BAMBOO_PRINT(0xdddc);
+    BAMBOO_PRINT(gcInfo->time[gcInfo->index - 7]);
+    BAMBOO_PRINT(gcInfo->time[gcInfo->index - 6]);
+    BAMBOO_PRINT(gcInfo->time[gcInfo->index - 5]);
+    BAMBOO_PRINT(gcInfo->time[gcInfo->index - 4]);
+    BAMBOO_PRINT(gcInfo->time[gcInfo->index - 3]);
+    BAMBOO_PRINT(gcInfo->time[gcInfo->index - 2]);
+    BAMBOO_PRINT(gcInfo->time[gcInfo->index - 1]);
+    BAMBOO_PRINT(0xddde);
+#endif
+    totalgc += tmp;
+  }
+#ifndef BAMBOO_MEMPROF
+  BAMBOO_PRINT(0xdddf);
+#endif
+  BAMBOO_PRINT_REG(totalgc);
+
+  if(gc_infoOverflow) {
+    BAMBOO_PRINT(0xefee);
+  }
+
+#ifndef BAMBOO_MEMPROF
+  BAMBOO_PRINT(0xeeee);
+#endif
+}
+#endif  // GC_PROFILE
+#endif // MULTICORE_GC
diff --git a/Robust/src/Runtime/bamboo/multicoregcprofile.h b/Robust/src/Runtime/bamboo/multicoregcprofile.h
new file mode 100644 (file)
index 0000000..5cb5f0c
--- /dev/null
@@ -0,0 +1,143 @@
+#ifndef BAMBOO_MULTICORE_GC_PROFILE_H
+#define BAMBOO_MULTICORE_GC_PROFILE_H
+#ifdef MULTICORE_GC
+#include "multicore.h"
+
+#ifdef GC_PROFILE
+#define GCINFOLENGTH 100
+
+#ifdef GC_CACHE_ADAPT
+#define GC_PROFILE_NUM_FIELD 15
+#else
+#define GC_PROFILE_NUM_FIELD 14
+#endif // GC_CACHE_ADAPT
+
+typedef struct gc_info {
+  unsigned long long time[GC_PROFILE_NUM_FIELD];
+  unsigned int index;
+} GCInfo;
+
+GCInfo * gc_infoArray[GCINFOLENGTH];
+unsigned int gc_infoIndex;
+bool gc_infoOverflow;
+unsigned long long gc_num_livespace;
+unsigned long long gc_num_freespace;
+unsigned long long gc_num_lobjspace;
+unsigned int gc_num_lobj;
+
+unsigned int gc_num_liveobj;
+unsigned int gc_num_obj;
+unsigned int gc_num_forwardobj;
+unsigned int gc_num_profiles;
+
+#ifdef MGC_SPEC
+volatile bool gc_profile_flag;
+#endif
+
+
+INLINE void initmulticoregcprofiledata(void);
+INLINE void gc_profileInit(void);
+INLINE void gc_profileStart(void);
+INLINE void gc_profileItem(void);
+INLINE void gc_profileEnd(void);
+void gc_outputProfileData();
+
+#define INIT_MULTICORE_GCPROFILE_DATA() initmulticoregcprofiledata()
+#define GC_OUTPUT_PROFILE_DATA() gc_outputProfileData()
+// send the num of obj/liveobj/forwardobj to the startupcore
+#define GCPROFILE_INFO_2_MASTER() \
+  { \
+    if(STARTUPCORE != BAMBOO_NUM_OF_CORE) { \
+      send_msg_4(STARTUPCORE,GCPROFILES,gc_num_obj,gc_num_liveobj,gc_num_forwardobj, false); \
+    }\
+    gc_num_obj = 0; \
+  }
+#ifdef MGC_SPEC
+// record lobj info
+#define GCPROFILE_RECORD_LOBJ() \
+  { \
+    if(gc_profile_flag) { \
+      gc_num_lobj++; \
+    } \
+  }
+// record lobj space info
+#define GCPROFILE_RECORD_LOBJSPACE() \
+  { \
+    if(gc_profile_flag) { \
+      gc_num_lobjspace = sumsize; \
+    } \
+  }
+// check the live/free space info
+#define GCPROFILE_RECORD_SPACE() \
+  { \
+    if(gc_profile_flag) { \
+      gc_num_livespace = 0; \
+      for(int tmpi = 0; tmpi < gcnumblock; tmpi++) { \
+        gc_num_livespace += bamboo_smemtbl[tmpi]; \
+      } \
+      gc_num_freespace = (BAMBOO_SHARED_MEM_SIZE) - gc_num_livespace; \
+    } \
+  }
+// record forward obj info
+#define GCPROFILE_RECORD_FORWARD_OBJ() \
+  { \
+    if(gc_profile_flag) { \
+      gc_num_forwardobj++; \
+    } \
+  }
+// record live obj info
+#define GCPROFILE_RECORD_LIVE_OBJ() \
+  { \
+    if(gc_profile_flag) { \
+      gc_num_liveobj++; \
+    } \
+  }
+#define GCPROFILE_START() \
+  { \
+    if(gc_profile_flag) { \
+      gc_profileStart(); \
+    } \
+  }
+#define GCPROFILE_ITEM() \
+  { \
+    if(gc_profile_flag) { \
+      gc_profileItem(); \
+    } \
+  }
+#else // MGC_SPEC
+#define GCPROFILE_RECORD_LOBJ() (gc_num_lobj++)
+#define GCPROFILE_RECORD_LOBJSPACE() (gc_num_lobjspace = sumsize)
+#define GCPROFILE_RECORD_SPACE() \
+  { \
+    gc_num_livespace = 0; \
+    for(int tmpi = 0; tmpi < gcnumblock; tmpi++) { \
+      gc_num_livespace += bamboo_smemtbl[tmpi]; \
+    } \
+    gc_num_freespace = (BAMBOO_SHARED_MEM_SIZE) - gc_num_livespace; \
+  }
+#define GCPROFILE_RECORD_FORWARD_OBJ() (gc_num_forwardobj++)
+#define GCPROFILE_RECORD_LIVE_OBJ() (gc_num_liveobj++)
+#define GCPROFILE_START() gc_profileStart()
+#define GCPROFILE_ITEM() gc_profileItem()
+#endif // MGC_SPEC
+
+#define GCPROFILE_END() gc_profileEnd()
+#define GCPROFILE_INIT() gc_profileInit()
+
+#else // GC_PROFILE
+#define INIT_MULTICORE_GCPROFILE_DATA()
+#define GC_OUTPUT_PROFILE_DATA() 
+#define GCPROFILE_INFO_2_MASTER() 
+#define GCPROFILE_RECORD_LOBJ()
+#define GCPROFILE_RECORD_LOBJSPACE()
+#define GCPROFILE_RECORD_SPACE()
+#define GCPROFILE_RECORD_FORWARD_OBJ() 
+#define GCPROFILE_RECORD_LIVE_OBJ() 
+#define GCPROFILE_START()
+#define GCPROFILE_ITEM()
+#define GCPROFILE_END()
+#define GCPROFILE_INIT()
+#endif // GC_PROFILE
+
+#endif // MULTICORE_GC
+#endif // BAMBOO_MULTICORE_GC_PROFILE_H
index 5ecf1658e201a4abcfbfa74be8d888c3527e6f00..2d8c7f403eb8c81136460fde811d73cf4c1b9eb9 100644 (file)
@@ -1,5 +1,5 @@
-#ifndef MULTICORE_HELPER_H
-#define MULTICORE_HELPER_H
+#ifndef BAMBOO_MULTICORE_HELPER_H
+#define BAMBOO_MULTICORE_HELPER_H
 
 #ifdef GC_1
 // NUMCORES4GC = 1
@@ -163,4 +163,4 @@ static unsigned int gc_block2core[124] = {
 };
 #endif
 
-#endif // MULTICORE_HELPER_H
+#endif // BAMBOO_MULTICORE_HELPER_H
index 2ec5fda38692b962e547e4843091eca4e06ba0dc..e55e9090b48af8e45d61c9e173f82a7b1d0a5a06 100644 (file)
@@ -440,89 +440,128 @@ INLINE void setupsmemmode(void) {
 #else
   // defaultly using local mode
   bamboo_smem_mode = SMEMLOCAL;
-#endif // SMEML
-} // void setupsmemmode(void)
+#endif 
+} 
 
-// Only allocate local mem chunks to each core.
-// If a core has used up its local shared memory, start gc.
-void * localmalloc_I(int coren,
-                     int isize,
-                     int * allocsize) {
+INLINE void * mallocmem(int tofindb,
+                        int totest,
+                        int size,
+                        int * allocsize) {
   void * mem = NULL;
-  int gccorenum = (coren < NUMCORES4GC) ? (coren) : (coren % NUMCORES4GC);
-  int i = 0;
-  int j = 0;
-  int tofindb = gc_core2block[2*gccorenum+i]+(NUMCORES4GC*2)*j;
-  int totest = tofindb;
-  int bound = BAMBOO_SMEM_SIZE_L;
-  int foundsmem = 0;
+  // find suitable block
+  mem=gcbaseva+bamboo_smemtbl[tofindb]+((tofindb<NUMCORES4GC)?
+      (BAMBOO_SMEM_SIZE_L*tofindb):
+      (BAMBOO_LARGE_SMEM_BOUND+(tofindb-NUMCORES4GC)*BAMBOO_SMEM_SIZE));
+  *allocsize = size;
+  // set bamboo_smemtbl
+  for(int i = tofindb; i <= totest; i++) {
+    bamboo_smemtbl[i]=(i<NUMCORES4GC)?BAMBOO_SMEM_SIZE_L:BAMBOO_SMEM_SIZE;
+  }
+  if(tofindb == bamboo_free_block) {
+    bamboo_free_block = totest+1;
+  }
+  return mem;
+}
+
+INLINE void * searchBlock4Mem(int* tofindb, 
+                              int* totest,
+                              int gccorenum,
+                              int isize,
+                              int * allocsize) {
+  int i=0;
+  int j=0;
   int size = 0;
-  do {
-    bound = (totest < NUMCORES4GC) ? BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
-    int nsize = bamboo_smemtbl[totest];
-    bool islocal = true;
-    if(nsize < bound) {
-      bool tocheck = true;
-      // have some space in the block
-      if(totest == tofindb) {
-               // the first partition
-               size = bound - nsize;
-      } else if(nsize == 0) {
-               // an empty partition, can be appended
-               size += bound;
-      } else {
-               // not an empty partition, can not be appended
-               // the last continuous block is not big enough, go to check the next
-               // local block
-               islocal = true;
-               tocheck = false;
-      } // if(totest == tofindb) else if(nsize == 0) else ...
-      if(tocheck) {
-               if(size >= isize) {
-                 // have enough space in the block, malloc
-                 foundsmem = 1;
-                 break;
-               } else {
-                 // no enough space yet, try to append next continuous block
-                 islocal = false;
-               }  // if(size > isize) else ...
-      }  // if(tocheck)
-    } // if(nsize < bound)
-    if(islocal) {
-      // no space in the block, go to check the next block
+  int bound = BAMBOO_SMEM_SIZE_L;
+  while(*totest<(gcnumblock-bamboo_reserved_smem)) {
+    bound = (*totest<NUMCORES4GC)?BAMBOO_SMEM_SIZE_L:BAMBOO_SMEM_SIZE;
+    int nsize = bamboo_smemtbl[*totest];
+    if((nsize==bound)||((nsize != 0)&&(*totest != *tofindb))) {
+      // a fully/partially occupied partition, can not be appended 
+      //the last continuous block is not big enough,check the next local block
       i++;
       if(2==i) {
-               i = 0;
-               j++;
+        i = 0;
+        j++;
       }
-      tofindb = totest = gc_core2block[2*gccorenum+i]+(NUMCORES4GC*2)*j;
+      *tofindb=*totest=gc_core2block[2*gccorenum+i]+(NUMCORES4GC*2)*j;
     } else {
-      totest += 1;
-    }  // if(islocal) else ...
-    if(totest > gcnumblock-1-bamboo_reserved_smem) {
-      // no more local mem, do not find suitable block
-      foundsmem = 2;
-      break;
-    }  // if(totest > gcnumblock-1-bamboo_reserved_smem) ...
-  } while(true);
+      // an empty block or a partially occupied block that can be set as the 
+      // first block
+      if(*totest == *tofindb) {
+        // the first partition
+        size = bound - nsize;
+      } else if(nsize == 0) {
+        // an empty partition, can be appended
+        size += bound;
+      } 
+      if(size >= isize) {
+        // have enough space in the block, malloc
+        return mallocmem(*tofindb, *totest, size, allocsize);
+        break;
+      } else {
+        // no enough space yet, try to append next continuous block
+        *totest = *totest + 1;
+      }  
+    }
+  }
+  return NULL;
+}
 
-  if(foundsmem == 1) {
-    // find suitable block
-    mem = gcbaseva+bamboo_smemtbl[tofindb]+((tofindb<NUMCORES4GC) ?
-          (BAMBOO_SMEM_SIZE_L*tofindb) : (BAMBOO_LARGE_SMEM_BOUND+
-          (tofindb-NUMCORES4GC)*BAMBOO_SMEM_SIZE));
-    *allocsize = size;
-    // set bamboo_smemtbl
-    for(i = tofindb; i <= totest; i++) {
-      bamboo_smemtbl[i]=(i<NUMCORES4GC)?BAMBOO_SMEM_SIZE_L:BAMBOO_SMEM_SIZE;
+INLINE void * searchBlock4Mem_global(int* tofindb, 
+                                     int* totest,
+                                     int isize,
+                                     int * allocsize) {
+  int i=0;
+  int j=0;
+  int size = 0;
+  int bound = BAMBOO_SMEM_SIZE_L;
+  while(*totest<(gcnumblock-bamboo_reserved_smem)) {
+    bound = (*totest<NUMCORES4GC)?BAMBOO_SMEM_SIZE_L:BAMBOO_SMEM_SIZE;
+    int nsize = bamboo_smemtbl[*totest];
+    if((nsize==bound)||((nsize != 0)&&(*totest != *tofindb))) {
+      // a fully/partially occupied partition, can not be appended 
+      // set the next block as a new start
+      *totest = *totest+1;
+      *tofindb = *totest;
+    } else {
+      // an empty block or a partially occupied block that can be set as the 
+      // first block
+      if(*totest == *tofindb) {
+        // the first partition
+        size = bound - nsize;
+      } else if(nsize == 0) {
+        // an empty partition, can be appended
+        size += bound;
+      } 
+      if(size >= isize) {
+        // have enough space in the block, malloc
+        return mallocmem(*tofindb, *totest, size, allocsize);
+        break;
+      } else {
+        // no enough space yet, try to append next continuous block
+        *totest = *totest + 1;
+      }  
     }
-  } else if(foundsmem == 2) {
-    // no suitable block
-    *allocsize = 0;
   }
+  return NULL;
+}
 
+// Only allocate local mem chunks to each core.
+// If a core has used up its local shared memory, start gc.
+void * localmalloc_I(int coren,
+                     int isize,
+                     int * allocsize) {
+  void * mem = NULL;
+  int gccorenum = (coren<NUMCORES4GC)?(coren):(coren%NUMCORES4GC);
+  int tofindb = gc_core2block[2*gccorenum];
+  int totest = tofindb;
+  mem = searchBlock4Mem(&tofindb, &totest, gccorenum, isize, allocsize);
+  if(mem == NULL) {
+    // no more local mem, do not find suitable block
+    *allocsize = 0;
+  }
   return mem;
-} // void * localmalloc_I(int, int, int *)
+} 
 
 #ifdef SMEMF
 // Allocate the local shared memory to each core with the highest priority,
@@ -531,98 +570,30 @@ void * localmalloc_I(int coren,
 void * fixedmalloc_I(int coren,
                      int isize,
                      int * allocsize) {
-  void * mem = NULL;
-  int i = 0;
-  int j = 0;
-  int k = 0;
-  int gccorenum = (coren < NUMCORES4GC) ? (coren) : (coren % NUMCORES4GC);
-  int ii = 1;
-  int tofindb = gc_core2block[2*core2test[gccorenum][k]+i]+(NUMCORES4GC*2)*j;
-  int totest = tofindb;
+  void * mem;
+  int k;
+  int gccorenum = (coren<NUMCORES4GC)?(coren):(coren%NUMCORES4GC);
+  int totest, tofindb;
   int bound = BAMBOO_SMEM_SIZE_L;
   int foundsmem = 0;
   int size = 0;
-  do {
-    bound = (totest < NUMCORES4GC) ? BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
-    int nsize = bamboo_smemtbl[totest];
-    bool islocal = true;
-    if(nsize < bound) {
-      bool tocheck = true;
-      // have some space in the block
-      if(totest == tofindb) {
-               // the first partition
-               size = bound - nsize;
-      } else if(nsize == 0) {
-               // an empty partition, can be appended
-               size += bound;
-      } else {
-               // not an empty partition, can not be appended
-               // the last continuous block is not big enough, go to check the next
-               // local block
-               islocal = true;
-               tocheck = false;
-      } // if(totest == tofindb) else if(nsize == 0) else ...
-      if(tocheck) {
-               if(size >= isize) {
-                 // have enough space in the block, malloc
-                 foundsmem = 1;
-                 break;
-               } else {
-                 // no enough space yet, try to append next continuous block
-                 // TODO may consider to go to next local block?
-                 islocal = false;
-               }  // if(size > isize) else ...
-      }  // if(tocheck)
-    } // if(nsize < bound)
-    if(islocal) {
-      // no space in the block, go to check the next block
-      i++;
-      if(2==i) {
-               i = 0;
-               j++;
-      }
-      tofindb=totest=
-               gc_core2block[2*core2test[gccorenum][k]+i]+(NUMCORES4GC*2)*j;
-    } else {
-      totest += 1;
-    }  // if(islocal) else ...
-    if(totest > gcnumblock-1-bamboo_reserved_smem) {
-      // no more local mem, do not find suitable block on local mem
-         // try to malloc shared memory assigned to the neighbour cores
-         do{
-               k++;
-               if(k >= NUM_CORES2TEST) {
-                 // no more memory available on either coren or its neighbour cores
-                 foundsmem = 2;
-                 goto memsearchresult;
-               }
-         } while(core2test[gccorenum][k] == -1);
-         i = 0;
-         j = 0;
-         tofindb=totest=
-               gc_core2block[2*core2test[gccorenum][k]+i]+(NUMCORES4GC*2)*j;
-    }  // if(totest > gcnumblock-1-bamboo_reserved_smem) ...
-  } while(true);
-
-memsearchresult:
-  if(foundsmem == 1) {
-    // find suitable block
-    mem = gcbaseva+bamboo_smemtbl[tofindb]+((tofindb<NUMCORES4GC) ?
-          (BAMBOO_SMEM_SIZE_L*tofindb) : (BAMBOO_LARGE_SMEM_BOUND+
-          (tofindb-NUMCORES4GC)*BAMBOO_SMEM_SIZE));
-    *allocsize = size;
-    // set bamboo_smemtbl
-    for(i = tofindb; i <= totest; i++) {
-      bamboo_smemtbl[i]=(i<NUMCORES4GC)?BAMBOO_SMEM_SIZE_L:BAMBOO_SMEM_SIZE;
+  for(k=0; k<NUM_CORES2TEST; k++) {
+    if(core2test[gccorenum][k] == -1) {
+      // try next neighbour
+      continue;
+    }
+    tofindb=totest=gc_core2block[2*core2test[gccorenum][k]];
+    mem=searchBlock4Mem(&tofindb,&totest,core2test[gccorenum][k],
+        isize,allocsize);
+    if(mem != NULL) {
+      return mem;
     }
-  } else if(foundsmem == 2) {
-    // no suitable block
-    *allocsize = 0;
   }
-
-  return mem;
-} // void * fixedmalloc_I(int, int, int *)
-#endif // #ifdef SMEMF
+  // no more memory available on either coren or its neighbour cores
+  *allocsize = 0;
+  return NULL;
+} 
+#endif 
 
 #ifdef SMEMM
 // Allocate the local shared memory to each core with the highest priority,
@@ -634,108 +605,40 @@ memsearchresult:
 void * mixedmalloc_I(int coren,
                      int isize,
                      int * allocsize) {
-  void * mem = NULL;
-  int i = 0;
-  int j = 0;
-  int k = 0;
+  void * mem;
+  int k;
   int gccorenum = (coren < NUMCORES4GC) ? (coren) : (coren % NUMCORES4GC);
-  int ii = 1;
-  int tofindb = gc_core2block[2*core2test[gccorenum][k]+i]+(NUMCORES4GC*2)*j;
-  int totest = tofindb;
+  int totest,tofindb;
   int bound = BAMBOO_SMEM_SIZE_L;
   int foundsmem = 0;
   int size = 0;
-  do {
-    bound = (totest < NUMCORES4GC) ? BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
-    int nsize = bamboo_smemtbl[totest];
-    bool islocal = true;
-    if(nsize < bound) {
-      bool tocheck = true;
-      // have some space in the block
-      if(totest == tofindb) {
-               // the first partition
-               size = bound - nsize;
-      } else if(nsize == 0) {
-               // an empty partition, can be appended
-               size += bound;
-      } else {
-               // not an empty partition, can not be appended
-               // the last continuous block is not big enough, go to check the next
-               // local block
-               islocal = true;
-               tocheck = false;
-      } // if(totest == tofindb) else if(nsize == 0) else ...
-      if(tocheck) {
-               if(size >= isize) {
-                 // have enough space in the block, malloc
-                 foundsmem = 1;
-                 break;
-               } else {
-                 // no enough space yet, try to append next continuous block
-                 // TODO may consider to go to next local block?
-                 islocal = false;
-               }  // if(size > isize) else ...
-      }  // if(tocheck)
-    } // if(nsize < bound)
-    if(islocal) {
-      // no space in the block, go to check the next block
-      i++;
-      if(2==i) {
-               i = 0;
-               j++;
-      }
-      tofindb=totest=
-               gc_core2block[2*core2test[gccorenum][k]+i]+(NUMCORES4GC*2)*j;
-    } else {
-      totest += 1;
-    }  // if(islocal) else ...
-    if(totest > gcnumblock-1-bamboo_reserved_smem) {
-      // no more local mem, do not find suitable block on local mem
-         // try to malloc shared memory assigned to the neighbour cores
-         do{
-               k++;
-               if(k >= NUM_CORES2TEST) {
-                 if(gcmem_mixed_usedmem >= gcmem_mixed_threshold) {
-                       // no more memory available on either coren or its neighbour cores
-                       foundsmem = 2;
-                       goto memmixedsearchresult;
-                 } else {
-                       // try allocate globally
-                       mem = globalmalloc_I(coren, isize, allocsize);
-                       return mem;
-                 }
-               }
-         } while(core2test[gccorenum][k] == -1);
-         i = 0;
-         j = 0;
-         tofindb=totest=
-               gc_core2block[2*core2test[gccorenum][k]+i]+(NUMCORES4GC*2)*j;
-    }  // if(totest > gcnumblock-1-bamboo_reserved_smem) ...
-  } while(true);
-
-memmixedsearchresult:
-  if(foundsmem == 1) {
-    // find suitable block
-    mem = gcbaseva+bamboo_smemtbl[tofindb]+((tofindb<NUMCORES4GC) ?
-          (BAMBOO_SMEM_SIZE_L*tofindb) : (BAMBOO_LARGE_SMEM_BOUND+
-          (tofindb-NUMCORES4GC)*BAMBOO_SMEM_SIZE));
-    *allocsize = size;
-    // set bamboo_smemtbl
-    for(i = tofindb; i <= totest; i++) {
-      bamboo_smemtbl[i]=(i<NUMCORES4GC)?BAMBOO_SMEM_SIZE_L:BAMBOO_SMEM_SIZE;
+  for(k=0; k<NUM_CORES2TEST; k++) {
+    if(core2test[gccorenum][k] == -1) {
+      // try next neighbour
+      continue;
     }
-       gcmem_mixed_usedmem += size;
-       if(tofindb == bamboo_free_block) {
-      bamboo_free_block = totest+1;
+    tofindb=totest=gc_core2block[2*core2test[gccorenum][k]];
+    mem=searchBlock4Mem(&tofindb,&totest,core2test[gccorenum][k],
+        isize,allocsize);
+    if(mem != NULL) {
+      gcmem_mixed_usedmem += size;
+      return mem;
     }
-  } else if(foundsmem == 2) {
-    // no suitable block
+  }
+  if(gcmem_mixed_usedmem >= gcmem_mixed_threshold) {
+    // no more memory available on either coren or its neighbour cores
     *allocsize = 0;
+    return NULL; 
+  } else {
+    // try allocate globally
+    mem = globalmalloc_I(coren, isize, allocsize);
+    if(mem != NULL) {
+      gcmem_mixed_usedmem += size;
+    }
+    return mem;
   }
-
-  return mem;
-} // void * mixedmalloc_I(int, int, int *)
-#endif // #ifdef SMEMM
+} 
+#endif 
 
 // Allocate all the memory chunks globally, do not consider the host cores
 // When all the shared memory are used up, start gc.
@@ -743,86 +646,28 @@ void * globalmalloc_I(int coren,
                       int isize,
                       int * allocsize) {
   void * mem = NULL;
-  int tofindb = bamboo_free_block;       //0;
+  int tofindb = bamboo_free_block;
   int totest = tofindb;
   int bound = BAMBOO_SMEM_SIZE_L;
   int foundsmem = 0;
   int size = 0;
   if(tofindb > gcnumblock-1-bamboo_reserved_smem) {
-       // Out of shared memory
+    // Out of shared memory
     *allocsize = 0;
     return NULL;
   }
-  do {
-    bound = (totest < NUMCORES4GC) ? BAMBOO_SMEM_SIZE_L : BAMBOO_SMEM_SIZE;
-    int nsize = bamboo_smemtbl[totest];
-    bool isnext = false;
-    if(nsize < bound) {
-      bool tocheck = true;
-      // have some space in the block
-      if(totest == tofindb) {
-               // the first partition
-               size = bound - nsize;
-      } else if(nsize == 0) {
-               // an empty partition, can be appended
-               size += bound;
-      } else {
-               // not an empty partition, can not be appended
-               // the last continuous block is not big enough, start another block
-               isnext = true;
-               tocheck = false;
-      }  // if(totest == tofindb) else if(nsize == 0) else ...
-      if(tocheck) {
-               if(size >= isize) {
-                 // have enough space in the block, malloc
-                 foundsmem = 1;
-                 break;
-               }  // if(size > isize)
-      }   // if(tocheck)
-    } else {
-      isnext = true;
-    }  // if(nsize < bound) else ...
-    totest += 1;
-    if(totest > gcnumblock-1-bamboo_reserved_smem) {
-      // no more local mem, do not find suitable block
-      foundsmem = 2;
-      break;
-    }  // if(totest > gcnumblock-1-bamboo_reserved_smem) ...
-    if(isnext) {
-      // start another block
-      tofindb = totest;
-    } // if(islocal)
-  } while(true);
-
-  if(foundsmem == 1) {
-    // find suitable block
-    mem = gcbaseva+bamboo_smemtbl[tofindb]+((tofindb<NUMCORES4GC) ?
-          (BAMBOO_SMEM_SIZE_L*tofindb) : (BAMBOO_LARGE_SMEM_BOUND+
-          (tofindb-NUMCORES4GC)*BAMBOO_SMEM_SIZE));
-    *allocsize = size;
-    // set bamboo_smemtbl
-    for(int i = tofindb; i <= totest; i++) {
-      bamboo_smemtbl[i]=(i<NUMCORES4GC)?BAMBOO_SMEM_SIZE_L:BAMBOO_SMEM_SIZE;
-    }
-    if(tofindb == bamboo_free_block) {
-      bamboo_free_block = totest+1;
-    }
-  } else if(foundsmem == 2) {
-    // no suitable block
+  mem=searchBlock4Mem_global(&tofindb, &totest, isize, allocsize);
+  if(mem == NULL) {
     *allocsize = 0;
-    mem = NULL;
   }
-
   return mem;
-} // void * globalmalloc_I(int, int, int *)
-#endif // MULTICORE_GC
+} 
 
 // malloc from the shared memory
 void * smemalloc_I(int coren,
                    int size,
                    int * allocsize) {
   void * mem = NULL;
-#ifdef MULTICORE_GC
   int isize = size+(BAMBOO_CACHE_LINE_SIZE);
 
   // go through the bamboo_smemtbl for suitable partitions
@@ -862,50 +707,48 @@ void * smemalloc_I(int coren,
   }
 
   if(mem == NULL) {
-#else 
-  int toallocate = (size>(BAMBOO_SMEM_SIZE)) ? (size) : (BAMBOO_SMEM_SIZE);
-  if(toallocate > bamboo_free_smem_size) {
-       // no enough mem
-       mem = NULL;
-  } else {
-       mem = (void *)bamboo_free_smemp;
-       bamboo_free_smemp = ((void*)bamboo_free_smemp) + toallocate;
-       bamboo_free_smem_size -= toallocate;
-  }
-  *allocsize = toallocate;
-  if(mem == NULL) {
-#endif // MULTICORE_GC
     // no enough shared global memory
     *allocsize = 0;
-#ifdef MULTICORE_GC
        if(!gcflag) {
          gcflag = true;
          if(!gcprocessing) {
-               // inform other cores to stop and wait for gc
-               gcprecheck = true;
-               for(int i = 0; i < NUMCORESACTIVE; i++) {
-                 // reuse the gcnumsendobjs & gcnumreceiveobjs
-                 gccorestatus[i] = 1;
-                 gcnumsendobjs[0][i] = 0;
-                 gcnumreceiveobjs[0][i] = 0;
-               }
-               for(int i = 0; i < NUMCORESACTIVE; i++) {
-                 if(i != BAMBOO_NUM_OF_CORE) {
-                       if(BAMBOO_CHECK_SEND_MODE()) {
-                         cache_msg_1(i, GCSTARTPRE);
-                       } else {
-                         send_msg_1(i, GCSTARTPRE, true);
-                       }
-                 }
-               }
+      // inform other cores to stop and wait for gc
+      gcprecheck = true;
+      for(int i = 0; i < NUMCORESACTIVE; i++) {
+        // reuse the gcnumsendobjs & gcnumreceiveobjs
+        gcnumsendobjs[0][i] = 0;
+        gcnumreceiveobjs[0][i] = 0;
+      }
+      GC_SEND_MSG_1_TO_CLIENT(GCSTARTPRE);
          }
        }
        return NULL;
+  }
+  return mem;
+}
 #else
+// malloc from the shared memory
+void * smemalloc_I(int coren,
+                   int size,
+                   int * allocsize) {
+  void * mem = NULL;
+  int toallocate = (size>(BAMBOO_SMEM_SIZE)) ? (size) : (BAMBOO_SMEM_SIZE);
+  if(toallocate > bamboo_free_smem_size) {
+    // no enough mem
+    mem = NULL;
+  } else {
+    mem = (void *)bamboo_free_smemp;
+    bamboo_free_smemp = ((void*)bamboo_free_smemp) + toallocate;
+    bamboo_free_smem_size -= toallocate;
+  }
+  *allocsize = toallocate;
+  if(mem == NULL) {
+    // no enough shared global memory
+    *allocsize = 0;
     BAMBOO_EXIT(0xe103);
-#endif
   }
   return mem;
-}  // void * smemalloc_I(int, int, int)
+} 
+#endif // MULTICORE_GC
 
 #endif // MULTICORE
index 173957f66f0c218cb189f7eea8d239fa25b77b48..4bd0c80da8103d5d23ae9d1f3fc5fc7001b61996 100644 (file)
@@ -1,14 +1,9 @@
-#ifndef MULTICORE_MEM_H
-#define MULTICORE_MEM_H
+#ifndef BABMOO_MULTICORE_MEM_H
+#define BAMBOO_MULTICORE_MEM_H
+#include "multicore.h"
 #include "Queue.h"
 #include "SimpleHash.h"
 
-#ifndef bool
-#define bool int
-#define true 1
-#define false 0
-#endif
-
 // data structures for shared memory allocation
 #ifdef TILERA_BME
 #ifdef MGC
 #else
 #define GC_BAMBOO_NUMCORES 62
 #endif
-/*#elif defined GC_2
-#define GC_BAMBOO_NUMCORES 3
-#elif defined GC_4
-#define GC_BAMBOO_NUMCORES 4
-#elif defined GC_8
-#define GC_BAMBOO_NUMCORES 8
-#elif defined GC_16
-#define GC_BAMBOO_NUMCORES 16
-#elif defined GC_32
-#define GC_BAMBOO_NUMCORES 32
-#elif defined GC_50
-#define GC_BAMBOO_NUMCORES 50
-#elif defined GC_62
-#define GC_BAMBOO_NUMCORES 62*/
 #endif
 
 #ifdef GC_DEBUG
 #endif // GC_DEBUG
 
 #ifdef MULTICORE_GC
+#ifdef GC_SMALLPAGESIZE
+// memory for globals
+#define BAMBOO_GLOBAL_DEFS_SIZE (1024 * 1024)
+#define BAMBOO_GLOBAL_DEFS_PRIM_SIZE (1024 * 512)
+// memory for thread queue
+#define BAMBOO_THREAD_QUEUE_SIZE (1024 * 1024)
+#else
+// memory for globals
+#define BAMBOO_GLOBAL_DEFS_SIZE (BAMBOO_SMEM_SIZE)
+#define BAMBOO_GLOBAL_DEFS_PRIM_SIZE (BAMBOO_SMEM_SIZE/2)
+// memory for thread queue
+#define BAMBOO_THREAD_QUEUE_SIZE (BAMBOO_SMEM_SIZE) // (45 * 16 * 1024)
+#endif // GC_SMALLPAGESIZE
+
 volatile bool gc_localheap_s;
 #include "multicoregarbage.h"
 
@@ -140,6 +135,16 @@ struct freeMemList {
                                    // only maintain 1 freemMemItem
 };
 
+// Zero out the remaining bamboo_cur_msp. Only zero out the first 4 bytes 
+// of the remaining memory
+#define BAMBOO_CLOSE_CUR_MSP() \
+  { \
+    if((bamboo_cur_msp!=0)&&(bamboo_smem_zero_top==bamboo_cur_msp) \
+        &&(bamboo_smem_size>0)) { \
+      *((int *)bamboo_cur_msp) = 0; \
+    } \
+  }
+
 // table recording the number of allocated bytes on each block
 // Note: this table resides on the bottom of the shared heap for all cores
 //       to access
@@ -162,4 +167,4 @@ volatile bool smemflag;
 volatile unsigned int bamboo_cur_msp;
 volatile int bamboo_smem_size;
 
-#endif
+#endif // BAMBOO_MULTICORE_MEM_H
diff --git a/Robust/src/Runtime/bamboo/multicoremgc.h b/Robust/src/Runtime/bamboo/multicoremgc.h
new file mode 100644 (file)
index 0000000..d041020
--- /dev/null
@@ -0,0 +1,22 @@
+#ifndef BAMBOO_MULTICORE_MGC_H
+#define BAMBOO_MULTICORE_MGC_H
+#ifdef MGC
+// shared memory pointer for global thread queue
+// In MGC version, this block of memory is located at the very bottom of the 
+// shared memory with the base address as BAMBOO_BASE_VA.
+// The bottom of the shared memory = global thread queue + sbstart tbl 
+//                                  + smemtbl + NUMCORES4GC bamboo_rmsp
+// This queue is always reside at the bottom of the shared memory.  It is 
+// considered as runtime structure, during gc, it is scanned for mark and flush 
+// phase but never been compacted.
+//
+// This is a loop array and the first 4 int fields of the queue are:
+//     mutex + thread counter + start pointer + end pointer
+// data structures for threads
+unsigned int * bamboo_thread_queue;
+unsigned int bamboo_max_thread_num_mask;
+unsigned int bamboo_current_thread;
+
+//extern int corenum;
+#endif // MGC
+#endif // BAMBOO_MULTICORE_MGC_H
diff --git a/Robust/src/Runtime/bamboo/multicoremsg.c b/Robust/src/Runtime/bamboo/multicoremsg.c
new file mode 100644 (file)
index 0000000..1e8d85a
--- /dev/null
@@ -0,0 +1,1119 @@
+#ifdef MULTICORE
+#include "multicoremsg.h"
+#include "runtime.h"
+#include "multicoreruntime.h"
+#include "multicoretaskprofile.h"
+
+INLINE int checkMsgLength_I(int size) {
+  int type = msgdata[msgdataindex];
+  switch(type) {
+  case STATUSCONFIRM:
+  case TERMINATE:
+#ifdef MULTICORE_GC
+  case GCSTARTPRE:
+  case GCSTART:
+  case GCSTARTINIT:
+  case GCSTARTFLUSH:
+  case GCFINISH:
+  case GCMARKCONFIRM:
+  case GCLOBJREQUEST:
+#ifdef GC_CACHE_ADAPT
+  case GCSTARTPREF:
+#endif 
+#endif 
+  {
+    msglength = 1;
+    break;
+  }
+
+#ifdef TASK
+  case PROFILEOUTPUT:
+  case PROFILEFINISH:
+#endif
+#ifdef MULTICORE_GC
+  case GCSTARTCOMPACT:
+  case GCMARKEDOBJ:
+  case GCFINISHINIT:
+  case GCFINISHFLUSH:
+#ifdef GC_CACHE_ADAPT
+  case GCFINISHPREF:
+#endif 
+#endif 
+  {
+    msglength = 2;
+    break;
+  }
+
+  case MEMREQUEST:
+  case MEMRESPONSE:
+  {
+    msglength = 3;
+    break;
+  }
+
+  case TRANSTALL:
+#ifdef TASK
+  case LOCKGROUNT:
+  case LOCKDENY:
+  case LOCKRELEASE:
+  case REDIRECTGROUNT:
+  case REDIRECTDENY:
+  case REDIRECTRELEASE:
+#endif
+#ifdef MULTICORE_GC
+  case GCFINISHPRE:
+  case GCFINISHMARK:
+  case GCMOVESTART:
+#ifdef GC_PROFILE
+  case GCPROFILES:
+#endif
+#endif
+  {
+    msglength = 4;
+    break;
+  }
+
+#ifdef TASK
+  case LOCKREQUEST:
+#endif
+  case STATUSREPORT:
+#ifdef MULTICORE_GC
+  case GCFINISHCOMPACT:
+  case GCMARKREPORT:
+#endif
+  {
+    msglength = 5;
+    break;
+  }
+
+#ifdef TASK
+  case REDIRECTLOCK:
+  {
+    msglength = 6;
+    break;
+  }
+#endif
+
+#ifdef TASK
+  case TRANSOBJ:   // nonfixed size
+#endif
+#ifdef MULTICORE_GC
+  case GCLOBJINFO:
+#endif
+  {  // nonfixed size
+    if(size > 1) {
+      msglength = msgdata[(msgdataindex+1)&(BAMBOO_MSG_BUF_MASK)];
+    } else {
+      return -1;
+    }
+    break;
+  }
+
+  default:
+  {
+    BAMBOO_EXIT(0xe001);
+    break;
+  }
+  }
+  return msglength;
+}
+
+INLINE void processmsg_transobj_I() {
+  MSG_INDEXINC_I();
+  struct transObjInfo * transObj=RUNMALLOC_I(sizeof(struct transObjInfo));
+  int k = 0;
+  if(BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1) {
+    BAMBOO_EXIT(0xe201);
+  }
+  // store the object and its corresponding queue info, enqueue it later
+  transObj->objptr = (void *)msgdata[msgdataindex]; 
+  MSG_INDEXINC_I();
+  transObj->length = (msglength - 3) / 2;
+  transObj->queues = RUNMALLOC_I(sizeof(int)*(msglength - 3));
+  for(k = 0; k < transObj->length; ++k) {
+    transObj->queues[2*k] = msgdata[msgdataindex];  
+    MSG_INDEXINC_I();
+    transObj->queues[2*k+1] = msgdata[msgdataindex]; 
+    MSG_INDEXINC_I();
+  }
+  // check if there is an existing duplicate item
+  {
+    struct QueueItem * qitem = getHead(&objqueue);
+    struct QueueItem * prev = NULL;
+    while(qitem != NULL) {
+      struct transObjInfo * tmpinfo =
+        (struct transObjInfo *)(qitem->objectptr);
+      if(tmpinfo->objptr == transObj->objptr) {
+               // the same object, remove outdate one
+               RUNFREE_I(tmpinfo->queues);
+               RUNFREE_I(tmpinfo);
+               removeItem(&objqueue, qitem);
+               //break;
+      } else {
+               prev = qitem;
+      }
+      if(prev == NULL) {
+               qitem = getHead(&objqueue);
+      } else {
+               qitem = getNextQueueItem(prev);
+      }
+    }
+    addNewItem_I(&objqueue, (void *)transObj);
+  }
+  ++(self_numreceiveobjs);
+#ifdef MULTICORE_GC
+  if(gcprocessing) {
+    if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
+      // set the gcprecheck to enable checking again
+      gcprecheck = true;
+    } else {
+      // send a update pregc information msg to the master core
+      if(BAMBOO_CHECK_SEND_MODE()) {
+        cache_msg_4(STARTUPCORE, GCFINISHPRE, BAMBOO_NUM_OF_CORE, 
+            self_numsendobjs, self_numreceiveobjs);
+      } else {
+        send_msg_4(STARTUPCORE, GCFINISHPRE, BAMBOO_NUM_OF_CORE, 
+            self_numsendobjs, self_numreceiveobjs, true);
+      }
+    }
+  }
+#endif 
+}
+
+INLINE void processmsg_transtall_I() {
+  if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
+    // non startup core can not receive stall msg
+    BAMBOO_EXIT(0xe002);
+  }
+  int num_core = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  int data2 = msgdata[msgdataindex]; 
+  MSG_INDEXINC_I();
+  int data3 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  if(num_core < NUMCORESACTIVE) {
+    corestatus[num_core] = 0;
+    numsendobjs[num_core] = data2; 
+    numreceiveobjs[num_core] = data3; 
+  }
+}
+
+#ifndef MULTICORE_GC
+INLINE void processmsg_lockrequest_I() {
+  // check to see if there is a lock exist for the required obj
+  // msgdata[1] -> lock type
+  int locktype = msgdata[msgdataindex]; 
+  MSG_INDEXINC_I();
+  int data2 = msgdata[msgdataindex];  // obj pointer
+  MSG_INDEXINC_I();
+  int data3 = msgdata[msgdataindex];  // lock
+  MSG_INDEXINC_I();
+  int data4 = msgdata[msgdataindex];  // request core
+  MSG_INDEXINC_I();
+  // -1: redirected, 0: approved, 1: denied
+  int deny=processlockrequest(locktype, data3, data2, data4, data4, true);
+  if(deny == -1) {
+    // this lock request is redirected
+    return;
+  } else {
+    // send response msg
+    // for 32 bit machine, the size is always 4 words, cache the msg first
+    int tmp = deny==1 ? LOCKDENY : LOCKGROUNT;
+    if(BAMBOO_CHECK_SEND_MODE()) {
+         cache_msg_4(data4, tmp, locktype, data2, data3);
+    } else {
+         send_msg_4(data4, tmp, locktype, data2, data3, true);
+    }
+  }
+}
+
+INLINE void processmsg_lockgrount_I() {
+  MSG_INDEXINC_I();
+  if(BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1) {
+    BAMBOO_EXIT(0xe202);
+  }
+  int data2 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  int data3 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  if((lockobj == data2) && (lock2require == data3)) {
+    lockresult = 1;
+    lockflag = true;
+#ifndef INTERRUPT
+    reside = false;
+#endif
+  } else {
+    // conflicts on lockresults
+    BAMBOO_EXIT(0xe203);
+  }
+}
+
+INLINE void processmsg_lockdeny_I() {
+  MSG_INDEXINC_I();
+  int data2 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  int data3 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  if(BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1) {
+    BAMBOO_EXIT(0xe204);
+  }
+  if((lockobj == data2) && (lock2require == data3)) {
+    lockresult = 0;
+    lockflag = true;
+#ifndef INTERRUPT
+    reside = false;
+#endif
+  } else {
+    // conflicts on lockresults
+    BAMBOO_EXIT(0xe205);
+  }
+}
+
+INLINE void processmsg_lockrelease_I() {
+  int data1 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  int data2 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  int data3 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  // receive lock release msg
+  processlockrelease(data1, data2, 0, false);
+}
+
+INLINE void processmsg_redirectlock_I() {
+  // check to see if there is a lock exist for the required obj
+  int data1 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();    // lock type
+  int data2 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();    // obj pointer
+  int data3 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();    // redirect lock
+  int data4 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();    // root request core
+  int data5 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();    // request core
+  int deny = processlockrequest(data1, data3, data2, data5, data4, true);
+  if(deny == -1) {
+    // this lock request is redirected
+    return;
+  } else {
+    // send response msg
+    // for 32 bit machine, the size is always 4 words, cache the msg first
+    if(BAMBOO_CHECK_SEND_MODE()) {
+      cache_msg_4(data4, deny==1 ? REDIRECTDENY : REDIRECTGROUNT,
+          data1, data2, data3);
+    } else {
+      send_msg_4(data4, deny==1?REDIRECTDENY:REDIRECTGROUNT,
+          data1, data2, data3, true);
+    }
+  }
+}
+
+INLINE void processmsg_redirectgrount_I() {
+  MSG_INDEXINC_I();
+  int data2 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  if(BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1) {
+    BAMBOO_EXIT(0xe206);
+  }
+  if(lockobj == data2) {
+    int data3 = msgdata[msgdataindex];
+    MSG_INDEXINC_I();
+    lockresult = 1;
+    lockflag = true;
+    RuntimeHashadd_I(objRedirectLockTbl, lockobj, data3);
+#ifndef INTERRUPT
+    reside = false;
+#endif
+  } else {
+    // conflicts on lockresults
+    BAMBOO_EXIT(0xe207);
+  }
+}
+
+INLINE void processmsg_redirectdeny_I() {
+  MSG_INDEXINC_I();
+  int data2 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  int data3 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  if(BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1) {
+    BAMBOO_EXIT(0xe208);
+  }
+  if(lockobj == data2) {
+    lockresult = 0;
+    lockflag = true;
+#ifndef INTERRUPT
+    reside = false;
+#endif
+  } else {
+    // conflicts on lockresults
+    BAMBOO_EXIT(0xe209);
+  }
+}
+
+INLINE void processmsg_redirectrelease_I() {
+  int data1 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  int data2 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  int data3 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  processlockrelease(data1, data2, data3, true);
+}
+#endif // #ifndef MULTICORE_GC
+
+#ifdef PROFILE
+INLINE void processmsg_profileoutput_I() {
+  if(BAMBOO_NUM_OF_CORE == STARTUPCORE) {
+    // startup core can not receive profile output finish msg
+    BAMBOO_EXIT(0xe20a);
+  }
+  stall = true;
+  totalexetime = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+#ifdef RT_TEST
+#else
+  outputProfileData();
+#endif
+  // cache the msg first
+  if(BAMBOO_CHECK_SEND_MODE()) {
+    cache_msg_2(STARTUPCORE, PROFILEFINISH, BAMBOO_NUM_OF_CORE);
+  } else {
+    send_msg_2(STARTUPCORE, PROFILEFINISH, BAMBOO_NUM_OF_CORE, true);
+  }
+}
+
+INLINE void processmsg_profilefinish_I() {
+  if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
+    // non startup core can not receive profile output finish msg
+    BAMBOO_EXIT(0xe20b);
+  }
+  int data1 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  profilestatus[data1] = 0;
+}
+#endif // PROFILE
+
+INLINE void processmsg_statusconfirm_I() {
+  if((BAMBOO_NUM_OF_CORE == STARTUPCORE)
+     || (BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1)) {
+    // wrong core to receive such msg
+    BAMBOO_EXIT(0xe003);
+  } else {
+    // send response msg
+    // cache the msg first
+    if(BAMBOO_CHECK_SEND_MODE()) {
+      cache_msg_5(STARTUPCORE,STATUSREPORT,busystatus?1:0,BAMBOO_NUM_OF_CORE,
+          self_numsendobjs, self_numreceiveobjs);
+    } else {
+      send_msg_5(STARTUPCORE,STATUSREPORT,busystatus?1:0,BAMBOO_NUM_OF_CORE,
+          self_numsendobjs,self_numreceiveobjs, true);
+    }
+  }
+}
+
+INLINE void processmsg_statusreport_I() {
+  int data1 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  int data2 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  int data3 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  int data4 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  // receive a status confirm info
+  if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
+    // wrong core to receive such msg
+    BAMBOO_EXIT(0xe004);
+  } else {
+    if(waitconfirm) {
+      numconfirm--;
+    }
+    corestatus[data2] = data1;
+    numsendobjs[data2] = data3;
+    numreceiveobjs[data2] = data4;
+  }
+}
+
+INLINE void processmsg_terminate_I() {
+  disruntimedata();
+#ifdef MULTICORE_GC
+#ifdef GC_CACHE_ADAPT
+  bamboo_mask_timer_intr(); // disable the TILE_TIMER interrupt
+#endif
+#endif
+  BAMBOO_EXIT_APP(0);
+}
+
+INLINE void processmsg_memrequest_I() {
+  int data1 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  int data2 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  // receive a shared memory request msg
+  if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
+    // wrong core to receive such msg
+    BAMBOO_EXIT(0xe005);
+  } else {
+    int allocsize = 0;
+    void * mem = NULL;
+#ifdef MULTICORE_GC
+    if(gcprocessing && gcflag) {
+      // is currently doing GC and the master core did not decide to stop GC
+    } else {
+      // either not doing GC or the master core has decided to stop GC but 
+      // // still sending msgs to other cores to inform them to stop the GC
+#endif
+      mem = smemalloc_I(data2, data1, &allocsize);
+      if(mem != NULL) {
+        // send the start_va to request core, cache the msg first
+        if(BAMBOO_CHECK_SEND_MODE()) {
+          cache_msg_3(data2, MEMRESPONSE, mem, allocsize);
+        } else {
+          send_msg_3(data2, MEMRESPONSE, mem, allocsize, true);
+        }
+      } //else if mem == NULL, the gcflag of the startup core has been set
+        // and all the other cores have been informed to start gc
+#ifdef MULTICORE_GC
+    }
+#endif
+  }
+}
+
+INLINE void processmsg_memresponse_I() {
+  int data1 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  int data2 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  // receive a shared memory response msg
+#ifdef MULTICORE_GC
+  // if is currently doing gc, dump this msg
+  if(!gcprocessing) {
+#endif
+  if(data2 == 0) {
+#ifdef MULTICORE_GC
+    // Zero out the remaining memory here because for the GC_CACHE_ADAPT 
+    // version, we need to make sure during the gcinit phase the shared heap 
+    // is not touched. Otherwise, there would be problem when adapt the cache 
+    // strategy.
+    BAMBOO_CLOSE_CUR_MSP();
+    bamboo_smem_zero_top = NULL;
+#endif
+    bamboo_smem_size = 0;
+    bamboo_cur_msp = 0;
+  } else {
+#ifdef MULTICORE_GC
+    CLOSEBLOCK(data1, data2);
+    bamboo_smem_size = data2 - BAMBOO_CACHE_LINE_SIZE;
+    bamboo_cur_msp = data1 + BAMBOO_CACHE_LINE_SIZE;
+    bamboo_smem_zero_top = bamboo_cur_msp;
+#else
+    bamboo_smem_size = data2;
+    bamboo_cur_msp =(void*)(data1);
+#endif
+  }
+  smemflag = true;
+#ifdef MULTICORE_GC
+  }
+#endif
+}
+
+#ifdef MULTICORE_GC
+INLINE void processmsg_gcstartpre_I() {
+       // the first time to be informed to start gc
+  tprintf("pre msg \n");
+       gcflag = true;
+       if(!smemflag) {
+    // Zero out the remaining memory here because for the GC_CACHE_ADAPT 
+    // version, we need to make sure during the gcinit phase the shared heap 
+    // is not touched. Otherwise, there would be problem when adapt the cache 
+    // strategy.
+    BAMBOO_CLOSE_CUR_MSP();
+         bamboo_smem_size = 0;
+         bamboo_cur_msp = NULL;
+         smemflag = true;
+         bamboo_smem_zero_top = NULL;
+       }
+}
+
+INLINE void processmsg_gcstartinit_I() {
+  gcphase = INITPHASE;
+}
+
+INLINE void processmsg_gcstart_I() {
+  // set the GC flag
+  gcphase = MARKPHASE;
+}
+
+INLINE void processmsg_gcstartcompact_I() {
+  gcblock2fill = msgdata[msgdataindex];
+  MSG_INDEXINC_I();  
+  gcphase = COMPACTPHASE;
+}
+
+INLINE void processmsg_gcstartflush_I() {
+  gcphase = FLUSHPHASE;
+}
+
+INLINE void processmsg_gcfinishpre_I() {
+  int data1 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  int data2 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  int data3 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  // received a init phase finish msg
+  if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
+    // non startup core can not receive this msg
+    BAMBOO_EXIT(0xe006);
+  }
+  // All cores should do init GC
+  if(!gcprecheck) {
+    gcprecheck = true;
+  }
+  gccorestatus[data1] = 0;
+  gcnumsendobjs[0][data1] = data2;
+  gcnumreceiveobjs[0][data1] = data3;
+}
+
+INLINE void processmsg_gcfinishinit_I() {
+  int data1 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  // received a init phase finish msg
+  if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
+    // non startup core can not receive this msg
+    BAMBOO_EXIT(0xe007);
+  }
+  // All cores should do init GC
+  if(data1 < NUMCORESACTIVE) {
+    gccorestatus[data1] = 0;
+  }
+}
+
+INLINE void processmsg_gcfinishmark_I() {
+  int data1 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  int data2 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  int data3 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  // received a mark phase finish msg
+  if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
+    // non startup core can not receive this msg
+    BAMBOO_EXIT(0xe008);
+  }
+  // all cores should do mark
+  if(data1 < NUMCORESACTIVE) {
+    gccorestatus[data1] = 0;
+    int entry_index = 0;
+    if(waitconfirm)  {
+      // phase 2
+      entry_index = (gcnumsrobjs_index == 0) ? 1 : 0;
+    } else {
+      // phase 1
+      entry_index = gcnumsrobjs_index;
+    }
+    gcnumsendobjs[entry_index][data1] = data2;
+    gcnumreceiveobjs[entry_index][data1] = data3;
+  }
+}
+
+INLINE void processmsg_gcfinishcompact_I() {
+  if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
+    // non startup core can not receive this msg
+    // return -1
+    BAMBOO_EXIT(0xe009);
+  }
+  int cnum = msgdata[msgdataindex];
+  MSG_INDEXINC_I();      
+  int filledblocks = msgdata[msgdataindex];
+  MSG_INDEXINC_I();    
+  int heaptop = msgdata[msgdataindex];
+  MSG_INDEXINC_I();   
+  int data4 = msgdata[msgdataindex];
+  MSG_INDEXINC_I(); 
+  // only gc cores need to do compact
+  if(cnum < NUMCORES4GC) {
+    if(COMPACTPHASE == gcphase) {
+      gcfilledblocks[cnum] = filledblocks;
+      gcloads[cnum] = heaptop;
+    }
+    if(data4 > 0) {
+      // ask for more mem
+      int startaddr = 0;
+      int tomove = 0;
+      int dstcore = 0;
+      if(gcfindSpareMem_I(&startaddr, &tomove, &dstcore, data4, cnum)) {
+        // cache the msg first
+        if(BAMBOO_CHECK_SEND_MODE()) {
+          cache_msg_4(cnum, GCMOVESTART, dstcore, startaddr, tomove);
+        } else {
+          send_msg_4(cnum, GCMOVESTART, dstcore, startaddr, tomove, true);
+        }
+      }
+    } else {
+      gccorestatus[cnum] = 0;
+    } 
+  }  
+}
+
+INLINE void processmsg_gcfinishflush_I() {
+  int data1 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  // received a flush phase finish msg
+  if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
+    // non startup core can not receive this msg
+    BAMBOO_EXIT(0xe00a);
+  }
+  // all cores should do flush
+  if(data1 < NUMCORESACTIVE) {
+    gccorestatus[data1] = 0;
+  }
+}
+
+INLINE void processmsg_gcfinish_I() {
+  // received a GC finish msg
+  gcphase = FINISHPHASE;
+  gcprocessing = false;
+}
+
+INLINE void processmsg_gcmarkconfirm_I() {
+  if((BAMBOO_NUM_OF_CORE==STARTUPCORE)||(BAMBOO_NUM_OF_CORE>NUMCORESACTIVE-1)){
+    // wrong core to receive such msg
+    BAMBOO_EXIT(0xe00b);
+  } else {
+       gcbusystatus = gc_moreItems2_I();
+    // send response msg, cahce the msg first
+    if(BAMBOO_CHECK_SEND_MODE()) {
+      cache_msg_5(STARTUPCORE,GCMARKREPORT,BAMBOO_NUM_OF_CORE,gcbusystatus,
+          gcself_numsendobjs,gcself_numreceiveobjs);
+    } else {
+      send_msg_5(STARTUPCORE,GCMARKREPORT,BAMBOO_NUM_OF_CORE,gcbusystatus,
+          gcself_numsendobjs,gcself_numreceiveobjs, true);
+    }
+  }
+}
+
+INLINE void processmsg_gcmarkreport_I() {
+  int data1 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  int data2 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  int data3 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  int data4 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  // received a marked phase finish confirm response msg
+  if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
+    // wrong core to receive such msg
+    BAMBOO_EXIT(0xe00c);
+  } else {
+       int entry_index = 0;
+    if(waitconfirm) {
+      // phse 2
+      numconfirm--;
+      entry_index = (gcnumsrobjs_index == 0) ? 1 : 0;
+    } else {
+      // can never reach here
+      BAMBOO_EXIT(0xe00d);
+    }
+    gccorestatus[data1] = data2;
+    gcnumsendobjs[entry_index][data1] = data3;
+    gcnumreceiveobjs[entry_index][data1] = data4;
+  }
+}
+
+INLINE void processmsg_gcmarkedobj_I() {
+  int data1 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  if(!ISSHAREDOBJ(data1)) {
+    BAMBOO_EXIT(0xa0000000+(int)data1);
+  }
+  // received a markedObj msg
+  if(((struct ___Object___ *)data1)->marked == INIT) {
+    // this is the first time that this object is discovered,
+    // set the flag as DISCOVERED
+    ((struct ___Object___ *)data1)->marked = DISCOVERED;
+    gc_enqueue_I(data1);
+  }
+  gcself_numreceiveobjs++;
+  gcbusystatus = true;
+}
+
+INLINE void processmsg_gcmovestart_I() {
+  gctomove = true;
+  gcdstcore = msgdata[msgdataindex];
+  MSG_INDEXINC_I();       
+  gcmovestartaddr = msgdata[msgdataindex];
+  MSG_INDEXINC_I();     
+  gcblock2fill = msgdata[msgdataindex];
+  MSG_INDEXINC_I();     
+}
+
+INLINE void processmsg_gclobjinfo_I() {
+  numconfirm--;
+
+  int data1 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  int data2 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  if(BAMBOO_NUM_OF_CORE > NUMCORES4GC - 1) {
+    BAMBOO_EXIT(0xe00e);
+  }
+  // store the mark result info
+  int cnum = data2;
+  gcloads[cnum] = msgdata[msgdataindex];
+  MSG_INDEXINC_I();     
+  int data4 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  if(gcheaptop < data4) {
+    gcheaptop = data4;
+  }
+  // large obj info here
+  for(int k = 5; k < data1; k+=2) {
+    int lobj = msgdata[msgdataindex];
+    MSG_INDEXINC_I();  
+    int length = msgdata[msgdataindex];
+    MSG_INDEXINC_I();   
+    gc_lobjenqueue_I(lobj, length, cnum);
+    gcnumlobjs++;
+  }
+}
+
+#ifdef GC_PROFILE
+INLINE void processmsg_gcprofiles_I() {
+  int data1 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  int data2 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  int data3 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+#ifdef MGC_SPEC
+  if(gc_profile_flag) {
+#endif
+  gc_num_obj += data1;
+  gc_num_liveobj += data2;
+  gc_num_forwardobj += data3;
+#ifdef MGC_SPEC
+  }
+#endif
+  gc_num_profiles--;
+}
+#endif // GC_PROFILE
+
+#ifdef GC_CACHE_ADAPT
+INLINE void processmsg_gcstartpref_I() {
+  gcphase = PREFINISHPHASE;
+}
+
+INLINE void processmsg_gcfinishpref_I() {
+  int data1 = msgdata[msgdataindex];
+  MSG_INDEXINC_I();
+  // received a flush phase finish msg
+  if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
+    // non startup core can not receive this msg
+    BAMBOO_EXIT(0xe00f);
+  }
+  // all cores should do flush
+  if(data1 < NUMCORESACTIVE) {
+    gccorestatus[data1] = 0;
+  }
+}
+#endif // GC_CACHE_ADAPT
+#endif // #ifdef MULTICORE_GC
+
+// receive object transferred from other cores
+// or the terminate message from other cores
+// Should be invoked in critical sections!!
+// NOTICE: following format is for threadsimulate version only
+//         RAW version please see previous description
+// format: type + object
+// type: -1--stall msg
+//      !-1--object
+// return value: 0--received an object
+//               1--received nothing
+//               2--received a Stall Msg
+//               3--received a lock Msg
+//               RAW version: -1 -- received nothing
+//                            otherwise -- received msg type
+int receiveObject(int send_port_pending) {
+  PROFILE_INTERRUPT_START(); 
+msg:
+  // get the incoming msgs
+  if(receiveMsg(send_port_pending) == -1) {
+    return -1;
+  }
+processmsg:
+  // processing received msgs
+  int size = 0;
+  MSG_REMAINSIZE_I(&size);
+  if((size == 0) || (checkMsgLength_I(size) == -1)) {
+    // not a whole msg
+    // have new coming msg
+    if((BAMBOO_MSG_AVAIL() != 0) && !msgdatafull) {
+      goto msg;
+    } else {
+      return -1;
+    }
+  }
+
+  if(msglength <= size) {
+    // have some whole msg
+    MSGTYPE type;
+    type = msgdata[msgdataindex]; //[0]
+    MSG_INDEXINC_I();
+    msgdatafull = false;
+    switch(type) {
+#ifdef TASK
+    case TRANSOBJ: {
+      // receive a object transfer msg
+      processmsg_transobj_I();
+      break;
+    }  
+#endif 
+
+    case TRANSTALL: {
+      // receive a stall msg
+      processmsg_transtall_I();
+      break;
+    }   
+
+#ifdef TASK
+    // GC version have no lock msgs
+#ifndef MULTICORE_GC
+    case LOCKREQUEST: {
+      // receive lock request msg, handle it right now
+      processmsg_lockrequest_I();
+      break;
+    }   
+
+    case LOCKGROUNT: {
+      // receive lock grount msg
+      processmsg_lockgrount_I();
+      break;
+    } 
+
+    case LOCKDENY: {
+      // receive lock deny msg
+      processmsg_lockdeny_I();
+      break;
+    }  
+
+    case LOCKRELEASE: {
+      processmsg_lockrelease_I();
+      break;
+    }   
+#endif
+
+#ifdef PROFILE
+    case PROFILEOUTPUT: {
+      // receive an output profile data request msg
+      processmsg_profileoutput_I();
+      break;
+    }   
+
+    case PROFILEFINISH: {
+      // receive a profile output finish msg
+      processmsg_profilefinish_I();
+      break;
+    }  
+#endif 
+
+    // GC version has no lock msgs
+#ifndef MULTICORE_GC
+    case REDIRECTLOCK: {
+      // receive a redirect lock request msg, handle it right now
+      processmsg_redirectlock_I();
+      break;
+    }  
+
+    case REDIRECTGROUNT: {
+      // receive a lock grant msg with redirect info
+      processmsg_redirectgrount_I();
+      break;
+    } 
+
+    case REDIRECTDENY: {
+      // receive a lock deny msg with redirect info
+      processmsg_redirectdeny_I();
+      break;
+    }   
+
+    case REDIRECTRELEASE: {
+      // receive a lock release msg with redirect info
+      processmsg_redirectrelease_I();
+      break;
+    }   // case REDIRECTRELEASE
+#endif
+#endif 
+
+    case STATUSCONFIRM: {
+      // receive a status confirm info
+      processmsg_statusconfirm_I();
+      break;
+    }  
+
+    case STATUSREPORT: {
+      processmsg_statusreport_I();
+      break;
+    } 
+
+    case TERMINATE: {
+      // receive a terminate msg
+      processmsg_terminate_I();
+      break;
+    } 
+
+    case MEMREQUEST: {
+      processmsg_memrequest_I();
+      break;
+    }
+
+    case MEMRESPONSE: {
+      processmsg_memresponse_I();
+      break;
+    }
+
+#ifdef MULTICORE_GC
+    // GC msgs
+    case GCSTARTPRE: {
+      processmsg_gcstartpre_I();
+      break;
+    }
+       
+    case GCSTARTINIT: {
+      processmsg_gcstartinit_I();
+      break;
+    }
+
+    case GCSTART: {
+      // receive a start GC msg
+      processmsg_gcstart_I();
+      break;
+    }
+
+    case GCSTARTCOMPACT: {
+      // a compact phase start msg
+      processmsg_gcstartcompact_I();
+      break;
+    }
+
+    case GCSTARTFLUSH: {
+      // received a flush phase start msg
+      processmsg_gcstartflush_I();
+      break;
+    }
+
+    case GCFINISHPRE: {
+      processmsg_gcfinishpre_I();
+      break;
+    }
+       
+    case GCFINISHINIT: {
+      processmsg_gcfinishinit_I();
+      break;
+    }
+
+    case GCFINISHMARK: {
+      processmsg_gcfinishmark_I();
+      break;
+    }
+
+    case GCFINISHCOMPACT: {
+      // received a compact phase finish msg
+      processmsg_gcfinishcompact_I();
+      break;
+    }
+
+    case GCFINISHFLUSH: {
+      processmsg_gcfinishflush_I();
+      break;
+    }  
+
+    case GCFINISH: {
+      processmsg_gcfinish_I();
+      break;
+    } 
+
+    case GCMARKCONFIRM: {
+      // received a marked phase finish confirm request msg
+      // all cores should do mark
+      processmsg_gcmarkconfirm_I();
+      break;
+    } 
+
+    case GCMARKREPORT: {
+      processmsg_gcmarkreport_I();
+      break;
+    } 
+
+    case GCMARKEDOBJ: {
+      processmsg_gcmarkedobj_I();
+      break;
+    } 
+
+    case GCMOVESTART: {
+      // received a start moving objs msg
+      processmsg_gcmovestart_I();
+      break;
+    } 
+
+    case GCLOBJREQUEST: {
+      // received a large objs info request msg
+      transferMarkResults_I();
+      break;
+    } 
+
+    case GCLOBJINFO: {
+      // received a large objs info response msg
+      processmsg_gclobjinfo_I();
+      break;
+    } 
+
+#ifdef GC_PROFILE
+    case GCPROFILES: {
+      // received a gcprofiles msg
+      processmsg_gcprofiles_I();
+      break;
+    }
+#endif // GC_PROFILE
+
+#ifdef GC_CACHE_ADAPT
+    case GCSTARTPREF: {
+      // received a gcstartpref msg
+      processmsg_gcstartpref_I();
+      break;
+    }
+
+    case GCFINISHPREF: {
+      // received a gcfinishpref msg
+      processmsg_gcfinishpref_I();
+      break;
+    }
+#endif
+#endif 
+
+    default:
+      break;
+    }
+    msglength = BAMBOO_MSG_BUF_LENGTH;
+
+    if((msgdataindex != msgdatalast) || (msgdatafull)) {
+      // still have available msg
+      goto processmsg;
+    }
+
+    // have new coming msg
+    if(BAMBOO_MSG_AVAIL() != 0) {
+      goto msg;
+    } 
+
+    PROFILE_INTERRUPT_END();
+    return (int)type;
+  } else {
+    // not a whole msg
+    return -2;
+  }
+}
+#endif // MULTICORE
diff --git a/Robust/src/Runtime/bamboo/multicoremsg.h b/Robust/src/Runtime/bamboo/multicoremsg.h
new file mode 100644 (file)
index 0000000..5054f2a
--- /dev/null
@@ -0,0 +1,280 @@
+#ifndef BAMBOO_MULTICORE_MSG_H
+#define BAMBOO_MULTICORE_MSG_H
+#include "multicore.h"
+#ifdef MULTICORE
+// data structures for msgs
+#define BAMBOO_OUT_BUF_LENGTH 2048
+#define BAMBOO_OUT_BUF_MASK (0x7FF)
+#define BAMBOO_MSG_BUF_LENGTH 2048
+#define BAMBOO_MSG_BUF_MASK (0x7FF)
+int msgdata[BAMBOO_MSG_BUF_LENGTH];
+volatile int msgdataindex;
+volatile int msgdatalast;
+int msglength;
+volatile bool msgdatafull;
+int outmsgdata[BAMBOO_OUT_BUF_LENGTH];
+int outmsgindex;
+int outmsglast;
+int outmsgleft;
+volatile bool isMsgHanging;
+
+#define MSG_INDEXINC_I() \
+  msgdataindex = (msgdataindex + 1) & (BAMBOO_MSG_BUF_MASK) 
+
+#define MSG_LASTINDEXINC_I() \
+  msgdatalast = (msgdatalast + 1) & (BAMBOO_MSG_BUF_MASK)
+
+#define MSG_CACHE_I(n) \
+  msgdata[msgdatalast] = (n); \
+  MSG_LASTINDEXINC_I()
+
+// NOTE: if msgdataindex == msgdatalast, it always means that the buffer if
+//       full. In the case that the buffer is empty, should never call this
+//       MACRO
+#define MSG_REMAINSIZE_I(s) \
+  if(msgdataindex < msgdatalast) { \
+    (*(int*)s) = msgdatalast - msgdataindex; \
+  } else if((msgdataindex == msgdatalast) && (!msgdatafull)) { \
+    (*(int*)s) = 0; \
+  } else { \
+    (*(int*)s) = (BAMBOO_MSG_BUF_LENGTH) - msgdataindex + msgdatalast; \
+  }
+
+#define OUTMSG_INDEXINC() \
+  outmsgindex = (outmsgindex + 1) & (BAMBOO_OUT_BUF_MASK)
+
+#define OUTMSG_LASTINDEXINC() \
+  outmsglast = (outmsglast + 1) & (BAMBOO_OUT_BUF_MASK); \
+  if(outmsglast == outmsgindex) { \
+    BAMBOO_EXIT(0xd101); \
+  }
+
+#define OUTMSG_CACHE(n) \
+  outmsgdata[outmsglast] = (n); \
+  OUTMSG_LASTINDEXINC();
+
+/* Message format:
+ *      type + Msgbody
+ * type: 1 -- transfer object
+ *       2 -- transfer stall msg
+ *       3 -- lock request
+ *       4 -- lock grount
+ *       5 -- lock deny
+ *       6 -- lock release
+ *       // add for profile info
+ *       7 -- transfer profile output msg
+ *       8 -- transfer profile output finish msg
+ *       // add for alias lock strategy
+ *       9 -- redirect lock request
+ *       a -- lock grant with redirect info
+ *       b -- lock deny with redirect info
+ *       c -- lock release with redirect info
+ *       d -- status confirm request
+ *       e -- status report msg
+ *       f -- terminate
+ *      10 -- requiring for new memory
+ *      11 -- response for new memory request
+ *      12 -- GC init phase start
+ *      13 -- GC start
+ *      14 -- compact phase start
+ *      15 -- flush phase start
+ *      16 -- init phase finish
+ *      17 -- mark phase finish
+ *      18 -- compact phase finish
+ *      19 -- flush phase finish
+ *      1a -- GC finish
+ *      1b -- marked phase finish confirm request
+ *      1c -- marked phase finish confirm response
+ *      1d -- markedObj msg
+ *      1e -- start moving objs msg
+ *      1f -- ask for mapping info of a markedObj
+ *      20 -- mapping info of a markedObj
+ *      21 -- large objs info request
+ *      22 -- large objs info response
+ *      23 -- large objs mapping info
+ *
+ * ObjMsg: 1 + size of msg + obj's address + (task index + param index)+
+ * StallMsg: 2 + corenum + sendobjs + receiveobjs
+ *             (size is always 4 * sizeof(int))
+ * LockMsg: 3 + lock type + obj pointer + lock + request core
+ *            (size is always 5 * sizeof(int))
+ *          4/5/6 + lock type + obj pointer + lock
+ *            (size is always 4 * sizeof(int))
+ *          9 + lock type + obj pointer +  redirect lock + root request core
+ *            + request core
+ *            (size is always 6 * sizeof(int))
+ *          a/b + lock type + obj pointer + redirect lock
+ *              (size is always 4 * sizeof(int))
+ *          c + lock type + lock + redirect lock
+ *            (size is always 4 * sizeof(int))
+ *          lock type: 0 -- read; 1 -- write
+ * ProfileMsg: 7 + totalexetime
+ *               (size is always 2 * sizeof(int))
+ *             8 + corenum
+ *               (size is always 2 * sizeof(int))
+ * StatusMsg: d (size is always 1 * sizeof(int))
+ *            e + status + corenum + sendobjs + receiveobjs
+ *              (size is always 5 * sizeof(int))
+ *            status: 0 -- stall; 1 -- busy
+ * TerminateMsg: f (size is always 1 * sizeof(int)
+ * MemoryMsg: 10 + size + corenum
+ *              (size is always 3 * sizeof(int))
+ *           11 + base_va + size
+ *              (size is always 3 * sizeof(int))
+ * GCMsg: 12/13 (size is always 1 * sizeof(int))
+ *        14 + size of msg + (num of objs to move + (start address
+ *           + end address + dst core + start dst)+)?
+ *           + (num of incoming objs + (start dst + orig core)+)?
+ *           + (num of large obj lists + (start address + lenght
+ *           + start dst)+)?
+ *        15 (size is always 1 * sizeof(int))
+ *        16 + corenum
+ *           (size is always 2 * sizeof(int))
+ *        17 + corenum + gcsendobjs + gcreceiveobjs
+ *           (size if always 4 * sizeof(int))
+ *        18 + corenum + fulfilled blocks num + (finish compact(1) + current
+ *           heap top)/(need mem(0) + mem need)
+ *           size is always 5 * sizeof(int))
+ *        19 + corenum
+ *              (size is always 2 * sizeof(int))
+ *        1a (size is always 1 * sizeof(int))
+ *        1b (size if always 1 * sizeof(int))
+ *        1c + size of msg + corenum + gcsendobjs + gcreceiveobjs
+ *           (size is always 5 * sizeof(int))
+ *        1d + obj's address + request core
+ *           (size is always 3 * sizeof(int))
+ *        1e + corenum + start addr + end addr
+ *           (size if always 4 * sizeof(int))
+ *        1f + obj's address + corenum
+ *           (size is always 3 * sizeof(int))
+ *        20 + obj's address + dst address
+ *           (size if always 3 * sizeof(int))
+ *        21 (size is always 1 * sizeof(int))
+ *        22 + size of msg + corenum + current heap size
+ *           + (num of large obj lists + (start address + length)+)?
+ *        23 + orig large obj ptr + new large obj ptr
+ *            (size is always 3 * sizeof(int))
+ */
+typedef enum {
+  MSGSTART = 0xD0,       // 0xD0
+  TRANSOBJ,              // 0xD1
+  TRANSTALL,             // 0xD2
+  LOCKREQUEST,           // 0xD3
+  LOCKGROUNT,            // 0xD4
+  LOCKDENY,              // 0xD5
+  LOCKRELEASE,           // 0xD6
+  PROFILEOUTPUT,         // 0xD7
+  PROFILEFINISH,         // 0xD8
+  REDIRECTLOCK,          // 0xD9
+  REDIRECTGROUNT,        // 0xDa
+  REDIRECTDENY,          // 0xDb
+  REDIRECTRELEASE,       // 0xDc
+  STATUSCONFIRM,         // 0xDd
+  STATUSREPORT,          // 0xDe
+  TERMINATE,             // 0xDf
+  MEMREQUEST,            // 0xE0
+  MEMRESPONSE,           // 0xE1
+#ifdef MULTICORE_GC
+  GCSTARTPRE,            // 0xE2
+  GCSTARTINIT,           // 0xE3
+  GCSTART,               // 0xE4
+  GCSTARTCOMPACT,        // 0xE5
+  GCSTARTFLUSH,          // 0xE6
+  GCFINISHPRE,           // 0xE7
+  GCFINISHINIT,          // 0xE8
+  GCFINISHMARK,          // 0xE9
+  GCFINISHCOMPACT,       // 0xEa
+  GCFINISHFLUSH,         // 0xEb
+  GCFINISH,              // 0xEc
+  GCMARKCONFIRM,         // 0xEd
+  GCMARKREPORT,          // 0xEe
+  GCMARKEDOBJ,           // 0xEf
+  GCMOVESTART,           // 0xF0
+  GCLOBJREQUEST,         // 0xF1   
+  GCLOBJINFO,            // 0xF2
+#ifdef GC_PROFILE
+  GCPROFILES,            // 0xF3
+#endif // GC_PROFILE
+#ifdef GC_CACHE_ADAPT
+  GCSTARTPOSTINIT,       // 0xF4
+  GCSTARTPREF,           // 0xF5
+  GCFINISHPOSTINIT,      // 0xF6
+  GCFINISHPREF,          // 0xF7
+#endif // GC_CACHE_ADAPT
+#endif // MULTICORE_GC
+  MSGEND
+} MSGTYPE;
+
+// msg related functions
+INLINE void send_hanging_msg(bool isInterrupt);
+INLINE void send_msg_1(int targetcore,
+                       unsigned long n0,
+                       bool isInterrupt);
+INLINE void send_msg_2(int targetcore,
+                       unsigned long n0,
+                       unsigned long n1,
+                       bool isInterrupt);
+INLINE void send_msg_3(int targetcore,
+                       unsigned long n0,
+                       unsigned long n1,
+                       unsigned long n2,
+                       bool isInterrupt);
+INLINE void send_msg_4(int targetcore,
+                       unsigned long n0,
+                       unsigned long n1,
+                       unsigned long n2,
+                       unsigned long n3,
+                       bool isInterrupt);
+INLINE void send_msg_5(int targetcore,
+                       unsigned long n0,
+                       unsigned long n1,
+                       unsigned long n2,
+                       unsigned long n3,
+                       unsigned long n4,
+                       bool isInterrupt);
+INLINE void send_msg_6(int targetcore,
+                       unsigned long n0,
+                       unsigned long n1,
+                       unsigned long n2,
+                       unsigned long n3,
+                       unsigned long n4,
+                       unsigned long n5,
+                       bool isInterrupt);
+INLINE void cache_msg_1(int targetcore,
+                        unsigned long n0);
+INLINE void cache_msg_2(int targetcore,
+                        unsigned long n0,
+                        unsigned long n1);
+INLINE void cache_msg_3(int targetcore,
+                        unsigned long n0,
+                        unsigned long n1,
+                        unsigned long n2);
+INLINE void cache_msg_4(int targetcore,
+                        unsigned long n0,
+                        unsigned long n1,
+                        unsigned long n2,
+                        unsigned long n3);
+INLINE void cache_msg_5(int targetcore,
+                        unsigned long n0,
+                        unsigned long n1,
+                        unsigned long n2,
+                        unsigned long n3,
+                        unsigned long n4);
+INLINE void cache_msg_6(int targetcore,
+                        unsigned long n0,
+                        unsigned long n1,
+                        unsigned long n2,
+                        unsigned long n3,
+                        unsigned long n4,
+                        unsigned long n5);
+INLINE int receiveMsg(unsigned int send_port_pending);
+#ifdef TASK
+INLINE void transferObject(struct transObjInfo * transObj);
+#endif
+
+#ifdef MULTICORE_GC
+INLINE void transferMarkResults();
+#endif 
+
+#endif // MULTICORE
+#endif // BAMBOO_MULTICORE_MSG_H
index 8594308da2749109d485a74dca6eeac24def0b09..6c4b5f01315b9d18066524b18d9255e541a00d76 100644 (file)
@@ -2,19 +2,7 @@
 
 #include "runtime.h"
 #include "multicoreruntime.h"
-#include "runtime_arch.h"
-#include "GenericHashtable.h"
-#include "structdefs.h"
 #include "methodheaders.h"
-#include "mem.h"
-#ifndef RAW
-#include <stdio.h>
-#include <stdlib.h>
-#endif
-
-#ifndef INLINE
-#define INLINE    inline __attribute__((always_inline))
-#endif // #ifndef INLINE
 
 extern int classsize[];
 extern int typearray[];
@@ -24,12 +12,6 @@ extern int* supertypes[];
 #ifdef TASK
 extern struct genhashtable * activetasks;
 #endif
-#ifdef MULTICORE_GC
-#ifdef SMEMM
-extern unsigned int gcmem_mixed_threshold;
-extern unsigned int gcmem_mixed_usedmem;
-#endif // SMEMM
-#endif // MULTICORE_GC
 
 int debugtask=0;
 #ifdef MGC
@@ -38,33 +20,33 @@ int corenum = 0;
 
 int instanceofif(int otype, int type) {
   if(otype == type) {
-       return 1;
+    return 1;
   }
   if(otype == -1) {
-       return 0;
+    return 0;
   }
   int num = supertypes[otype][0];
   for(int i = 1; i < num + 1; i++) {
-       int t = supertypes[otype][i];
-       if(instanceofif(t, type) == 1) {
-         return 1;
-       }
+    int t = supertypes[otype][i];
+    if(instanceofif(t, type) == 1) {
+      return 1;
+    }
   }
   return 0;
 }
 
 int instanceof(struct ___Object___ *ptr, int type) {
   if(ptr == NULL) {
-       return 0;
+    return 0;
   }
   int i=ptr->type;
   if(instanceofif(i, type) == 1) {
-       return 1;
+    return 1;
   }
   if (i>NUMCLASSES) {
     do {
       if (i==type)
-       return 1;
+        return 1;
       i=typearray2[i-NUMCLASSES];
     } while(i!=-1);
   }
@@ -75,14 +57,14 @@ void initializeexithandler() {
 }
 
 /* This function inject failures */
-
 void injectinstructionfailure() {
   // not supported in MULTICORE version
   return;
 }
 
 #ifdef D___Double______nativeparsedouble____L___String___
-double CALL01(___Double______nativeparsedouble____L___String___,struct ___String___ * ___str___) {
+double CALL01(___Double______nativeparsedouble____L___String___,
+              struct ___String___ * ___str___) {
   int length=VAR(___str___)->___count___;
   int maxlength=(length>60) ? 60 : length;
   char str[maxlength+1];
@@ -90,7 +72,8 @@ double CALL01(___Double______nativeparsedouble____L___String___,struct ___String
   int i;
   int offset=VAR(___str___)->___offset___;
   for(i=0; i<maxlength; i++) {
-    str[i]=((short *)(((char *)&chararray->___length___)+sizeof(int)))[i+offset];
+    str[i]=
+      ((short *)(((char *)&chararray->___length___)+sizeof(int)))[i+offset];
   }
   str[i]=0;
   double d=0.0; //atof(str); TODO Unimplemented nativeparsedoulbe
@@ -99,7 +82,12 @@ double CALL01(___Double______nativeparsedouble____L___String___,struct ___String
 #endif
 
 #ifdef D___Double______nativeparsedouble_____AR_B_I_I 
-double CALL23(___Double______nativeparsedouble_____AR_B_I_I, int start, int length,int start,int length,struct ArrayObject * ___str___) {
+double CALL23(___Double______nativeparsedouble_____AR_B_I_I, 
+              int start, 
+              int length,
+              int start,
+              int length,
+              struct ArrayObject * ___str___) {
   int maxlength=(length>60)?60:length;
   char str[maxlength+1];
   struct ArrayObject * bytearray=VAR(___str___);
@@ -113,8 +101,7 @@ double CALL23(___Double______nativeparsedouble_____AR_B_I_I, int start, int leng
 }
 #endif
 
-typedef union jvalue
-{
+typedef union jvalue {
   bool z;
   char    c;
   short   s;
@@ -125,7 +112,9 @@ typedef union jvalue
 } jvalue;
 
 #ifdef D___Double______doubleToRawLongBits____D 
-long long CALL11(___Double______doubleToRawLongBits____D, double ___value___, double ___value___) {
+long long CALL11(___Double______doubleToRawLongBits____D, 
+                 double ___value___, 
+                 double ___value___) {
   jvalue val;
   val.d = ___value___;
 
@@ -133,9 +122,7 @@ long long CALL11(___Double______doubleToRawLongBits____D, double ___value___, do
   /* On little endian ARM processors when using FPA, word order of
      doubles is still big endian. So take that into account here. When
      using VFP, word order of doubles follows byte order. */
-
 #define SWAP_DOUBLE(a)    (((a) << 32) | (((a) >> 32) & 0x00000000ffffffff))
-
   val.j = SWAP_DOUBLE(val.j);
 #endif
 
@@ -144,7 +131,9 @@ long long CALL11(___Double______doubleToRawLongBits____D, double ___value___, do
 #endif
 
 #ifdef D___Double______longBitsToDouble____J 
-double CALL11(___Double______longBitsToDouble____J, long long ___bits___, long long ___bits___) {
+double CALL11(___Double______longBitsToDouble____J, 
+              long long ___bits___, 
+              long long ___bits___) {
   jvalue val;
   val.j = ___bits___;
 
@@ -160,7 +149,10 @@ double CALL11(___Double______longBitsToDouble____J, long long ___bits___, long l
 #endif
 
 #ifdef D___String______convertdoubletochar____D__AR_C
-int CALL12(___String______convertdoubletochar____D__AR_C, double ___val___, double ___val___, struct ArrayObject * ___chararray___) {
+int CALL12(___String______convertdoubletochar____D__AR_C, 
+           double ___val___, 
+           double ___val___, 
+           struct ArrayObject * ___chararray___) {
   int length=VAR(___chararray___)->___length___;
   char str[length];
   int i;
@@ -168,18 +160,23 @@ int CALL12(___String______convertdoubletochar____D__AR_C, double ___val___, doub
   if (num>=length)
     num=length-1;
   for(i=0; i<length; i++) {
-    ((short *)(((char *)&VAR(___chararray___)->___length___)+sizeof(int)))[i]=(short)str[i];
+    ((short *)(((char *)&VAR(___chararray___)->___length___)+sizeof(int)))[i]=
+      (short)str[i];
   }
   return num;
 }
 #else
-int CALL12(___String______convertdoubletochar____D__AR_C, double ___val___, double ___val___, struct ArrayObject ___chararray___) {
+int CALL12(___String______convertdoubletochar____D__AR_C, 
+           double ___val___, 
+           double ___val___, 
+           struct ArrayObject ___chararray___) {
   return 0;
 }
 #endif
 
 #ifdef D___System______deepArrayCopy____L___Object____L___Object___
-void deepArrayCopy(struct ___Object___ * dst, struct ___Object___ * src) {
+void deepArrayCopy(struct ___Object___ * dst, 
+                   struct ___Object___ * src) {
   int dsttype=((int *)dst)[0];
   int srctype=((int *)src)[0];
   if (dsttype<NUMCLASSES||srctype<NUMCLASSES||srctype!=dsttype)
@@ -195,31 +192,41 @@ void deepArrayCopy(struct ___Object___ * dst, struct ___Object___ * src) {
     int elementsize=classsize[srctype];
     int size=srclength*elementsize;
     //primitives
-    memcpy(((char *)&aodst->___length___)+sizeof(int) , ((char *)&aosrc->___length___)+sizeof(int), size);
+    memcpy(((char *)&aodst->___length___)+sizeof(int) , 
+        ((char *)&aosrc->___length___)+sizeof(int), size);
   } else {
     //objects
     int i;
     for(i=0;i<srclength;i++) {
-      struct ___Object___ * ptr=((struct ___Object___**)(((char*) &aosrc->___length___)+sizeof(int)))[i];
+      struct ___Object___ * ptr=
+        ((struct ___Object___**)(((char*)&aosrc->___length___)+sizeof(int)))[i];
       int ptrtype=((int *)ptr)[0];
       if (ptrtype>=NUMCLASSES) {
-       struct ___Object___ * dstptr=((struct ___Object___**)(((char*) &aodst->___length___)+sizeof(int)))[i];
-       deepArrayCopy(dstptr,ptr);
+        struct ___Object___ * dstptr=((struct ___Object___**)
+            (((char*)&aodst->___length___)+sizeof(int)))[i];
+        deepArrayCopy(dstptr,ptr);
       } else {
-       //hit an object
-       ((struct ___Object___ **)(((char*) &aodst->___length___)+sizeof(int)))[i]=ptr;
+        //hit an object
+        ((struct ___Object___ **)
+         (((char*) &aodst->___length___)+sizeof(int)))[i]=ptr;
       }
     }
   }
 }
 
-void CALL02(___System______deepArrayCopy____L___Object____L___Object___, struct ___Object___ * ___dst___, struct ___Object___ * ___src___) {
+void CALL02(___System______deepArrayCopy____L___Object____L___Object___, 
+            struct ___Object___ * ___dst___, 
+            struct ___Object___ * ___src___) {
   deepArrayCopy(VAR(___dst___), VAR(___src___));
 }
 #endif
 
 #ifdef D___System______arraycopy____L___Object____I_L___Object____I_I
-void arraycopy(struct ___Object___ *src, int srcPos, struct ___Object___ *dst, int destPos, int length) {
+void arraycopy(struct ___Object___ *src, 
+               int srcPos, 
+               struct ___Object___ *dst, 
+               int destPos, 
+               int length) {
   int dsttype=((int *)dst)[0];
   int srctype=((int *)src)[0];
 
@@ -244,51 +251,70 @@ void arraycopy(struct ___Object___ *src, int srcPos, struct ___Object___ *dst, i
     int elementsize=classsize[srctype];
     int size=length*elementsize;
     //primitives
-    memcpy(((char *)&aodst->___length___)+sizeof(int)+destPos*elementsize, ((char *)&aosrc->___length___)+sizeof(int)+srcPos*elementsize, size);
+    memcpy(((char *)&aodst->___length___)+sizeof(int)+destPos*elementsize, 
+        ((char *)&aosrc->___length___)+sizeof(int)+srcPos*elementsize, size);
   } else {
     //objects
     int i;
     for(i=0;i<length;i++) {
-      struct ___Object___ * ptr=((struct ___Object___**)(((char*) &aosrc->___length___)+sizeof(int)))[i+srcPos];
+      struct ___Object___ * ptr=((struct ___Object___**)
+          (((char*)&aosrc->___length___)+sizeof(int)))[i+srcPos];
       int ptrtype=((int *)ptr)[0];
       //hit an object
-      ((struct ___Object___ **)(((char*) &aodst->___length___)+sizeof(int)))[i+destPos]=ptr;
+      ((struct ___Object___ **)
+       (((char*) &aodst->___length___)+sizeof(int)))[i+destPos]=ptr;
     }
   }
 }
 
-void CALL35(___System______arraycopy____L___Object____I_L___Object____I_I, int ___srcPos___, int ___destPos___, int ___length___, struct ___Object___ * ___src___, int ___srcPos___, struct ___Object___ * ___dst___, int  ___destPos___, int ___length___) {
-  arraycopy(VAR(___src___), ___srcPos___, VAR(___dst___), ___destPos___, ___length___);
+void CALL35(___System______arraycopy____L___Object____I_L___Object____I_I, 
+            int ___srcPos___, 
+            int ___destPos___, 
+            int ___length___, 
+            struct ___Object___ * ___src___, 
+            int ___srcPos___, 
+            struct ___Object___ * ___dst___, 
+            int  ___destPos___, 
+            int ___length___) {
+  arraycopy(VAR(___src___), ___srcPos___, VAR(___dst___), ___destPos___, 
+      ___length___);
 }
 #endif
 
-void CALL11(___System______exit____I,int ___status___, int ___status___) {
+void CALL11(___System______exit____I,
+            int ___status___, 
+            int ___status___) {
 // gc_profile mode, output gc prfiling data
 #ifdef MULTICORE_GC
   if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
-       BAMBOO_PRINT(BAMBOO_GET_EXE_TIME());
-       BAMBOO_PRINT(0xbbbbbbbb);
-#ifdef GC_CACHE_ADAPT
-       bamboo_mask_timer_intr(); // disable the TILE_TIMER interrupt
-#endif // GC_CACHE_ADAPT
-#ifdef GC_PROFILE
-       gc_outputProfileData();
-#endif // #ifdef GC_PROFILE
+    BAMBOO_PRINT(BAMBOO_GET_EXE_TIME());
+    BAMBOO_PRINT(0xbbbbbbbb);
+    CACHEADAPT_DISABLE_TIMER();
+    GC_OUTPUT_PROFILE_DATA();
   }
-#endif // #ifdef MULTICORE_GC
+#endif 
   BAMBOO_EXIT(___status___);
 }
 
 #ifdef D___Vector______removeElement_____AR_L___Object____I_I
-void CALL23(___Vector______removeElement_____AR_L___Object____I_I, int ___index___, int ___size___, struct ArrayObject * ___array___, int ___index___, int ___size___) {
-  char* offset=((char *)(&VAR(___array___)->___length___))+sizeof(unsigned int)+sizeof(void *)*___index___;
-  memmove(offset, offset+sizeof(void *),(___size___-___index___-1)*sizeof(void *));
+void CALL23(___Vector______removeElement_____AR_L___Object____I_I, 
+            int ___index___, 
+            int ___size___, 
+            struct ArrayObject * ___array___, 
+            int ___index___, 
+            int ___size___) {
+  char* offset=((char *)(&VAR(___array___)->___length___))
+    +sizeof(unsigned int)+sizeof(void *)*___index___;
+  memmove(offset, offset+sizeof(void *),
+      (___size___-___index___-1)*sizeof(void *));
 }
 #endif
 
-void CALL11(___System______printI____I,int ___status___, int ___status___) {
-  BAMBOO_DEBUGPRINT(0x1111);
-  BAMBOO_DEBUGPRINT_REG(___status___);
+void CALL11(___System______printI____I,
+            int ___status___, 
+            int ___status___) {
+  BAMBOO_PRINT(0x1111);
+  BAMBOO_PRINT_REG(___status___);
 }
 
 long long CALL00(___System______currentTimeMillis____) {
@@ -314,7 +340,8 @@ void CALL00(___System______resetgcprofileflag____) {
 #endif
 }
 
-void CALL01(___System______printString____L___String___,struct ___String___ * ___s___) {
+void CALL01(___System______printString____L___String___,
+            struct ___String___ * ___s___) {
 #ifdef MGC
 #ifdef TILERA_BME
   struct ArrayObject * chararray=VAR(___s___)->___value___;
@@ -322,8 +349,8 @@ void CALL01(___System______printString____L___String___,struct ___String___ * __
   int offset=VAR(___s___)->___offset___;
   tprintf("");
   for(i=0; i<VAR(___s___)->___count___; i++) {
-       short sc=
-         ((short *)(((char *)&chararray->___length___)+sizeof(int)))[i+offset];
+    short sc=
+      ((short *)(((char *)&chararray->___length___)+sizeof(int)))[i+offset];
     printf("%c", sc);
   }
 #endif // TILERA_BME
@@ -333,9 +360,10 @@ void CALL01(___System______printString____L___String___,struct ___String___ * __
 /* Object allocation function */
 
 #ifdef MULTICORE_GC
-void * allocate_new(void * ptr, int type) {
+void * allocate_new(void * ptr, 
+                    int type) {
   struct ___Object___ * v=
-       (struct ___Object___*)FREEMALLOC((struct garbagelist*) ptr,classsize[type]);
+    (struct ___Object___*)FREEMALLOC((struct garbagelist*) ptr,classsize[type]);
   v->type=type;
 #ifdef TASK
   v->version = 0;
@@ -352,10 +380,12 @@ void * allocate_new(void * ptr, int type) {
 
 /* Array allocation function */
 
-struct ArrayObject * allocate_newarray(void * ptr, int type, int length) {
-  struct ArrayObject * v=(struct ArrayObject *)
-       FREEMALLOC((struct garbagelist*)ptr,
-               sizeof(struct ArrayObject)+length*classsize[type]);
+struct ArrayObject * allocate_newarray(void * ptr, 
+                                       int type, 
+                                       int length) {
+  struct ArrayObject * v=(struct ArrayObject *)FREEMALLOC(
+      (struct garbagelist*)ptr,
+      sizeof(struct ArrayObject)+length*classsize[type]);
   v->type=type;
 #ifdef TASK
   v->version = 0;
@@ -387,9 +417,10 @@ void * allocate_new(int type) {
 
 /* Array allocation function */
 
-struct ArrayObject * allocate_newarray(int type, int length) {
-  struct ArrayObject * v=
-       FREEMALLOC(sizeof(struct ArrayObject)+length*classsize[type]);
+struct ArrayObject * allocate_newarray(int type, 
+                                       int length) {
+  struct ArrayObject * v=FREEMALLOC(
+      sizeof(struct ArrayObject)+length*classsize[type]);
   v->type=type;
 #ifdef TASK
   v->version = 0;
@@ -404,19 +435,19 @@ struct ArrayObject * allocate_newarray(int type, int length) {
 /* Converts C character arrays into Java strings */
 #ifdef MULTICORE_GC
 __attribute__((malloc)) struct ___String___ * NewStringShort(void * ptr, 
-                                                                const short *str,
-                                                                                                                        int length) {
+                                                             const short *str,
+                                                             int length) {
 #else
 __attribute__((malloc)) struct ___String___ * NewStringShort(const short *str,
-                                                                int length) {
+                                                             int length) {
 #endif
   int i;
 #ifdef MULTICORE_GC
   struct ArrayObject * chararray=
-       allocate_newarray((struct garbagelist *)ptr, CHARARRAYTYPE, length);
+    allocate_newarray((struct garbagelist *)ptr, CHARARRAYTYPE, length);
   INTPTR ptrarray[]={1, (INTPTR) ptr, (INTPTR) chararray};
   struct ___String___ * strobj=
-       allocate_new((struct garbagelist *) &ptrarray, STRINGTYPE);
+    allocate_new((struct garbagelist *) &ptrarray, STRINGTYPE);
   chararray=(struct ArrayObject *) ptrarray[2];
 #else
   struct ArrayObject * chararray=allocate_newarray(CHARARRAYTYPE, length);
@@ -434,17 +465,20 @@ __attribute__((malloc)) struct ___String___ * NewStringShort(const short *str,
 
 /* Converts C character arrays into Java strings */
 #ifdef MULTICORE_GC
-struct ___String___ * NewString(void * ptr, const char *str,int length) {
+struct ___String___ * NewString(void * ptr, 
+                                const char *str,
+                                int length) {
 #else
-struct ___String___ * NewString(const char *str,int length) {
+struct ___String___ * NewString(const char *str,
+                                int length) {
 #endif
   int i;
 #ifdef MULTICORE_GC
   struct ArrayObject * chararray=
-       allocate_newarray((struct garbagelist *)ptr, CHARARRAYTYPE, length);
+    allocate_newarray((struct garbagelist *)ptr, CHARARRAYTYPE, length);
   int ptrarray[]={1, (int) ptr, (int) chararray};
   struct ___String___ * strobj=
-       allocate_new((struct garbagelist *) &ptrarray, STRINGTYPE);
+    allocate_new((struct garbagelist *) &ptrarray, STRINGTYPE);
   chararray=(struct ArrayObject *) ptrarray[2];
 #else
   struct ArrayObject * chararray=allocate_newarray(CHARARRAYTYPE, length);
@@ -491,19 +525,15 @@ void failednullptr(void * ptr) {
   j = 0;
   struct garbagelist * stackptr = (struct garbagelist *)ptr;
   while(stackptr!=NULL) {
-    GC_BAMBOO_DEBUGPRINT(0xa501);
-    GC_BAMBOO_DEBUGPRINT_REG(stackptr->size);
-    GC_BAMBOO_DEBUGPRINT_REG(stackptr->next);
-    GC_BAMBOO_DEBUGPRINT_REG(stackptr->array[0]);
-       tprintf("Stack %d: \n\t", j);
+    tprintf("Stack %d: \n\t", j);
     for(i=0; i<stackptr->size; i++) {
       if(stackptr->array[i] != NULL) {
-               tprintf("%x, ", stackptr->array[i]);
+        tprintf("%x, ", stackptr->array[i]);
       } else {
-               tprintf("NULL, ");
-         }
+        tprintf("NULL, ");
+      }
     }
-       tprintf("\n");
+    tprintf("\n");
     stackptr=stackptr->next;
   }
 #endif
@@ -549,26 +579,7 @@ INLINE void initruntimedata() {
       corestatus[i] = 1;
       numsendobjs[i] = 0;
       numreceiveobjs[i] = 0;
-#ifdef MULTICORE_GC
-      gccorestatus[i] = 1;
-      gcnumsendobjs[0][i] = gcnumsendobjs[1][i] = 0;
-      gcnumreceiveobjs[0][i] = gcnumreceiveobjs[1][i] = 0;
-#endif
-    } // for(i = 0; i < NUMCORESACTIVE; ++i)
-#ifdef MULTICORE_GC
-    for(i = 0; i < NUMCORES4GC; ++i) {
-      gcloads[i] = 0;
-      gcrequiredmems[i] = 0;
-      gcstopblock[i] = 0;
-      gcfilledblocks[i] = 0;
-    } // for(i = 0; i < NUMCORES4GC; ++i)
-#ifdef GC_PROFILE
-    gc_infoIndex = 0;
-    gc_infoOverflow = false;
-       gc_num_livespace = 0;
-       gc_num_freespace = 0;
-#endif
-#endif
+    } 
     numconfirm = 0;
     waitconfirm = false;
   }
@@ -592,214 +603,145 @@ INLINE void initruntimedata() {
   outmsgleft = 0;
   isMsgHanging = false;
 
-  smemflag = false;
+  smemflag = true;
   bamboo_cur_msp = NULL;
   bamboo_smem_size = 0;
-
-#ifdef MULTICORE_GC
-  bamboo_smem_zero_top = NULL;
-  gcflag = false;
-  gcprocessing = false;
-  gcphase = FINISHPHASE;
-  gcprecheck = true;
-  gccurr_heaptop = 0;
-  gcself_numsendobjs = 0;
-  gcself_numreceiveobjs = 0;
-  gcmarkedptrbound = 0;
-  gcforwardobjtbl = allocateMGCHash_I(20, 3);
-  gcnumlobjs = 0;
-  gcheaptop = 0;
-  gctopcore = 0;
-  gctopblock = 0;
-  gcmovestartaddr = 0;
-  gctomove = false;
-  gcmovepending = 0;
-  gcblock2fill = 0;
-#ifdef SMEMM
-  gcmem_mixed_threshold = (unsigned int)((BAMBOO_SHARED_MEM_SIZE
-               -bamboo_reserved_smem*BAMBOO_SMEM_SIZE)*0.8);
-  gcmem_mixed_usedmem = 0;
-#endif
-#ifdef GC_PROFILE
-  gc_num_obj = 0;
-  gc_num_liveobj = 0;
-  gc_num_forwardobj = 0;
-  gc_num_profiles = NUMCORESACTIVE - 1;
-#endif
-#ifdef MGC_SPEC
-  gc_profile_flag = false;
-#endif
-#ifdef GC_FLUSH_DTLB
-  gc_num_flush_dtlb = 0;
-#endif
-  gc_localheap_s = false;
-#ifdef GC_CACHE_ADAPT
-  gccachestage = false;
-#endif // GC_CACHE_ADAPT
-#endif // MULTICORE_GC
 #ifndef INTERRUPT
   reside = false;
 #endif
 
+  INITMULTICOREGCDATA();
+
 #ifdef MGC
   initializethreads();
   bamboo_current_thread = 0;
 #endif // MGC
 
-#ifdef TASK
-  inittaskdata();
-#endif
+  INITTASKDATA();
 }
 
 INLINE void disruntimedata() {
-#ifdef MULTICORE_GC
-  freeMGCHash(gcforwardobjtbl);
-#endif // MULTICORE_GC
-#ifdef TASK
-  distaskdata()
-#endif // TASK
+  DISMULTICOREGCDATA();
+  DISTASKDATA();
   BAMBOO_LOCAL_MEM_CLOSE();
   BAMBOO_SHARE_MEM_CLOSE();
 }
 
+INLINE void recordtotalexetime() {
+#ifdef USEIO
+  totalexetime = BAMBOO_GET_EXE_TIME()-bamboo_start_time;
+#else // USEIO
+  BAMBOO_PRINT(BAMBOO_GET_EXE_TIME()-bamboo_start_time);
+#ifdef GC_FLUSH_DTLB
+  BAMBOO_PRINT_REG(gc_num_flush_dtlb);
+#endif
+#ifndef BAMBOO_MEMPROF
+  BAMBOO_PRINT(0xbbbbbbbb);
+#endif
+#endif // USEIO
+}
+
+INLINE void getprofiledata() {
+  //profile mode, send msgs to other cores to request pouring out progiling data
+#ifdef PROFILE
+  BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+  for(i = 1; i < NUMCORESACTIVE; ++i) {
+    // send profile request msg to core i
+    send_msg_2(i, PROFILEOUTPUT, totalexetime, false);
+  } 
+#ifndef RT_TEST
+  // pour profiling data on startup core
+  outputProfileData();
+#endif
+  while(true) {
+    BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
+    profilestatus[BAMBOO_NUM_OF_CORE] = 0;
+    // check the status of all cores
+    for(i = 0; i < NUMCORESACTIVE; ++i) {
+      if(profilestatus[i] != 0) {
+        break;
+      }
+    }  
+    if(i != NUMCORESACTIVE) {
+      int halt = 100;
+      BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+      while(halt--) {
+      }
+    } else {
+      BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+      break;
+    }  
+  } 
+#endif
+}
+
 INLINE void checkCoreStatus() {
   bool allStall = false;
   int i = 0;
   int sumsendobj = 0;
   if((!waitconfirm) ||
      (waitconfirm && (numconfirm == 0))) {
-    BAMBOO_DEBUGPRINT(0xee04);
-    BAMBOO_DEBUGPRINT_REG(waitconfirm);
     BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
-    BAMBOO_DEBUGPRINT(0xf001);
     corestatus[BAMBOO_NUM_OF_CORE] = 0;
     numsendobjs[BAMBOO_NUM_OF_CORE] = self_numsendobjs;
     numreceiveobjs[BAMBOO_NUM_OF_CORE] = self_numreceiveobjs;
     // check the status of all cores
     allStall = true;
-    BAMBOO_DEBUGPRINT_REG(NUMCORESACTIVE);
     for(i = 0; i < NUMCORESACTIVE; ++i) {
-      BAMBOO_DEBUGPRINT(0xe000 + corestatus[i]);
       if(corestatus[i] != 0) {
-               allStall = false;
-               break;
+        allStall = false;
+        break;
       }
-    }  // for(i = 0; i < NUMCORESACTIVE; ++i)
+    } 
     if(allStall) {
       // check if the sum of send objs and receive obj are the same
       // yes->check if the info is the latest; no->go on executing
       sumsendobj = 0;
       for(i = 0; i < NUMCORESACTIVE; ++i) {
-               sumsendobj += numsendobjs[i];
-               BAMBOO_DEBUGPRINT(0xf000 + numsendobjs[i]);
-      }  // for(i = 0; i < NUMCORESACTIVE; ++i)
+        sumsendobj += numsendobjs[i];
+      } 
       for(i = 0; i < NUMCORESACTIVE; ++i) {
-               sumsendobj -= numreceiveobjs[i];
-               BAMBOO_DEBUGPRINT(0xf000 + numreceiveobjs[i]);
-      }  // for(i = 0; i < NUMCORESACTIVE; ++i)
+        sumsendobj -= numreceiveobjs[i];
+      }  
       if(0 == sumsendobj) {
-               if(!waitconfirm) {
-                 // the first time found all cores stall
-                 // send out status confirm msg to all other cores
-                 // reset the corestatus array too
-                 BAMBOO_DEBUGPRINT(0xee05);
-                 corestatus[BAMBOO_NUM_OF_CORE] = 1;
-                 waitconfirm = true;
-                 numconfirm = NUMCORESACTIVE - 1;
-                 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-                 for(i = 1; i < NUMCORESACTIVE; ++i) {
-                       corestatus[i] = 1;
-                       // send status confirm msg to core i
-                       send_msg_1(i, STATUSCONFIRM, false);
-                 }   // for(i = 1; i < NUMCORESACTIVE; ++i)
-                 return;
-               } else {
-                 // all the core status info are the latest
-                 // terminate; for profiling mode, send request to all
-                 // other cores to pour out profiling data
-                 BAMBOO_DEBUGPRINT(0xee06);
-
-#ifdef USEIO
-                 totalexetime = BAMBOO_GET_EXE_TIME() - bamboo_start_time;
-#else
-
-                 BAMBOO_PRINT(BAMBOO_GET_EXE_TIME() - bamboo_start_time);
-                 //BAMBOO_DEBUGPRINT_REG(total_num_t6); // TODO for test
-#ifdef GC_FLUSH_DTLB
-                 BAMBOO_PRINT_REG(gc_num_flush_dtlb);
-#endif
-#ifndef BAMBOO_MEMPROF
-                 BAMBOO_PRINT(0xbbbbbbbb);
-#endif
-#endif
-                 // profile mode, send msgs to other cores to request pouring
-                 // out progiling data
-#ifdef PROFILE
-                 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-                 BAMBOO_DEBUGPRINT(0xf000);
-                 for(i = 1; i < NUMCORESACTIVE; ++i) {
-                       // send profile request msg to core i
-                       send_msg_2(i, PROFILEOUTPUT, totalexetime, false);
-                 } // for(i = 1; i < NUMCORESACTIVE; ++i)
-#ifndef RT_TEST
-                 // pour profiling data on startup core
-                 outputProfileData();
-#endif
-                 while(true) {
-                       BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
-                       BAMBOO_DEBUGPRINT(0xf001);
-                       profilestatus[BAMBOO_NUM_OF_CORE] = 0;
-                       // check the status of all cores
-                       allStall = true;
-                       BAMBOO_DEBUGPRINT_REG(NUMCORESACTIVE);
-                       for(i = 0; i < NUMCORESACTIVE; ++i) {
-                         BAMBOO_DEBUGPRINT(0xe000 + profilestatus[i]);
-                         if(profilestatus[i] != 0) {
-                               allStall = false;
-                               break;
-                         }
-                       }  // for(i = 0; i < NUMCORESACTIVE; ++i)
-                       if(!allStall) {
-                         int halt = 100;
-                         BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-                         BAMBOO_DEBUGPRINT(0xf000);
-                         while(halt--) {
-                         }
-                       } else {
-                         BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-                         break;
-                       }  // if(!allStall)
-                 }  // while(true)
-#endif
-
-                 // gc_profile mode, output gc prfiling data
-#ifdef MULTICORE_GC
-#ifdef GC_CACHE_ADAPT
-                 bamboo_mask_timer_intr(); // disable the TILE_TIMER interrupt
-#endif // GC_CACHE_ADAPT
-#ifdef GC_PROFILE
-                 gc_outputProfileData();
-#endif // #ifdef GC_PROFILE
-#endif // #ifdef MULTICORE_GC
-                 disruntimedata();
-                 BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-                 terminate();  // All done.
-               }  // if(!waitconfirm)
-      } else {
-               // still some objects on the fly on the network
-               // reset the waitconfirm and numconfirm
-               BAMBOO_DEBUGPRINT(0xee07);
-               waitconfirm = false;
-               numconfirm = 0;
-         }  //  if(0 == sumsendobj)
+        if(!waitconfirm) {
+          // the first time found all cores stall
+          // send out status confirm msg to all other cores
+          // reset the corestatus array too
+          corestatus[BAMBOO_NUM_OF_CORE] = 1;
+          waitconfirm = true;
+          numconfirm = NUMCORESACTIVE - 1;
+          BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+          for(i = 1; i < NUMCORESACTIVE; ++i) {
+            corestatus[i] = 1;
+            // send status confirm msg to core i
+            send_msg_1(i, STATUSCONFIRM, false);
+          }   // for(i = 1; i < NUMCORESACTIVE; ++i)
+          return;
+        } else {
+          // all the core status info are the latest
+          // terminate; for profiling mode, send request to all
+          // other cores to pour out profiling data
+          recordtotalexetime();
+          getprofiledata();
+          CACHEADAPT_DISABLE_TIMER();
+          GC_OUTPUT_PROFILE_DATA();
+          disruntimedata();
+          BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+          terminate();  // All done.
+        }  // if(!waitconfirm)
+      } else {         
+        // still some objects on the fly on the network
+        // reset the waitconfirm and numconfirm
+        waitconfirm = false;
+        numconfirm = 0;
+      }  //  if(0 == sumsendobj)
     } else {
       // not all cores are stall, keep on waiting
-      BAMBOO_DEBUGPRINT(0xee08);
       waitconfirm = false;
       numconfirm = 0;
     }  //  if(allStall)
     BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
-    BAMBOO_DEBUGPRINT(0xf000);
   }  // if((!waitconfirm) ||
 }
 
@@ -811,9 +753,6 @@ inline void run(int argc, char** argv) {
   bool tocontinue = false;
 
   corenum = BAMBOO_GET_NUM_OF_CORE();
-  BAMBOO_DEBUGPRINT(0xeeee);
-  BAMBOO_DEBUGPRINT_REG(corenum);
-  BAMBOO_DEBUGPRINT(STARTUPCORE);
 
   // initialize runtime data structures
   initruntimedata();
@@ -822,14 +761,7 @@ inline void run(int argc, char** argv) {
   initialization();
   initCommunication();
 
-#ifdef GC_CACHE_ADAPT
-// enable the timer interrupt
-#ifdef GC_CACHE_SAMPLING
-  bamboo_tile_timer_set_next_event(GC_TILE_TIMER_EVENT_SETTING); // TODO
-  bamboo_unmask_timer_intr();
-  bamboo_dtlb_sampling_process();
-#endif // GC_CACHE_SAMPLING
-#endif // GC_CACHE_ADAPT
+  CACHEADAPT_ENABLE_TIMER();
 
   initializeexithandler();
 
@@ -843,9 +775,8 @@ inline void run(int argc, char** argv) {
   } else {
 #ifdef TASK
     /* Create queue of active tasks */
-    activetasks=
-      genallocatehashtable((unsigned int (*)(void *)) &hashCodetpd,
-                           (int (*)(void *,void *)) &comparetpd);
+    activetasks= genallocatehashtable((unsigned int (*)(void *)) &hashCodetpd,
+        (int (*)(void *,void *)) &comparetpd);
 
     /* Process task information */
     processtasks();
@@ -854,8 +785,6 @@ inline void run(int argc, char** argv) {
       /* Create startup object */
       createstartupobject(argc, argv);
     }
-
-    BAMBOO_DEBUGPRINT(0xee00);
 #endif
 
        if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
@@ -870,1040 +799,65 @@ inline void run(int argc, char** argv) {
        }
 
     while(true) {
-
-#ifdef MULTICORE_GC
-      // check if need to do GC
-      if(gcflag) {
-               gc(NULL);
-         }
-#endif // MULTICORE_GC
-
+      GCCHECK(NULL);
 #ifdef TASK
       // check if there are new active tasks can be executed
       executetasks();
       if(busystatus) {
-               sendStall = false;
+        sendStall = false;
       }
-
 #ifndef INTERRUPT
       while(receiveObject() != -1) {
       }
 #endif
-
-      BAMBOO_DEBUGPRINT(0xee01);
-
       // check if there are some pending objects,
       // if yes, enqueue them and executetasks again
       tocontinue = checkObjQueue();
 #elif defined MGC
-         tocontinue = trystartthread();
-         if(tocontinue) {
-               sendStall = false;
-         }
+      tocontinue = trystartthread();
+      if(tocontinue) {
+        sendStall = false;
+      }
 #endif
 
       if(!tocontinue) {
-               // check if stop
-               if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
-                 if(isfirst) {
-                       BAMBOO_DEBUGPRINT(0xee03);
-                       isfirst = false;
-                 }
-                 checkCoreStatus();
-               } else {
-                 if(!sendStall) {
-                       BAMBOO_DEBUGPRINT(0xee09);
+        // check if stop
+        if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
+          if(isfirst) {
+            isfirst = false;
+          }
+          checkCoreStatus();
+        } else {
+          if(!sendStall) {
 #ifdef PROFILE
-                       if(!stall) {
-#endif
-                       if(isfirst) {
-                         // wait for some time
-                         int halt = 10000;
-                         BAMBOO_DEBUGPRINT(0xee0a);
-                         while(halt--) {
-                         }
-                         isfirst = false;
-                       } else {
-                         // send StallMsg to startup core
-                         BAMBOO_DEBUGPRINT(0xee0b);
-                         // send stall msg
-                         send_msg_4(STARTUPCORE, TRANSTALL, BAMBOO_NUM_OF_CORE,
-                                                self_numsendobjs, self_numreceiveobjs, false);
-                         sendStall = true;
-                         isfirst = true;
-                         busystatus = false;
-                       }
+            if(!stall) {
+#endif
+            if(isfirst) {
+              // wait for some time
+              int halt = 10000;
+              while(halt--) {
+              }
+              isfirst = false;
+            } else {
+              // send StallMsg to startup core
+              // send stall msg
+              send_msg_4(STARTUPCORE, TRANSTALL, BAMBOO_NUM_OF_CORE,
+                  self_numsendobjs, self_numreceiveobjs, false);
+              sendStall = true;
+              isfirst = true;
+              busystatus = false;
+            }
 #ifdef PROFILE
-                 }
-#endif
-                 } else {
-                       isfirst = true;
-                       busystatus = false;
-                       BAMBOO_DEBUGPRINT(0xee0c);
-                 }   // if(!sendStall)
-               }   // if(STARTUPCORE == BAMBOO_NUM_OF_CORE)
-      }  // if(!tocontinue)
-    }  // while(true)
-  } // if(BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1)
-
-} // run()
-
-INLINE int checkMsgLength_I(int size) {
-#ifndef CLOSE_PRINT
-  BAMBOO_DEBUGPRINT(0xcccc);
-#endif
-  int type = msgdata[msgdataindex];
-  switch(type) {
-  case STATUSCONFIRM:
-  case TERMINATE:
-#ifdef MULTICORE_GC
-  case GCSTARTPRE:
-  case GCSTART:
-  case GCSTARTINIT:
-  case GCSTARTFLUSH:
-  case GCFINISH:
-  case GCMARKCONFIRM:
-  case GCLOBJREQUEST:
-#ifdef GC_CACHE_ADAPT
-  case GCSTARTPREF:
-#endif // GC_CACHE_ADAPT
-#endif // MULTICORE_GC
-  {
-       msglength = 1;
-       break;
-  }
-
-#ifdef TASK
-  case PROFILEOUTPUT:
-  case PROFILEFINISH:
-#endif
-#ifdef MULTICORE_GC
-  case GCSTARTCOMPACT:
-  case GCMARKEDOBJ:
-  case GCFINISHINIT:
-  case GCFINISHFLUSH:
-#ifdef GC_CACHE_ADAPT
-  case GCFINISHPREF:
-#endif // GC_CACHE_ADAPT
-#endif // MULTICORE_GC
-  {
-       msglength = 2;
-       break;
-  }
-
-  case MEMREQUEST:
-  case MEMRESPONSE:
-  {
-       msglength = 3;
-       break;
-  }
-
-  case TRANSTALL:
-#ifdef TASK
-  case LOCKGROUNT:
-  case LOCKDENY:
-  case LOCKRELEASE:
-  case REDIRECTGROUNT:
-  case REDIRECTDENY:
-  case REDIRECTRELEASE:
-#endif
-#ifdef MULTICORE_GC
-  case GCFINISHPRE:
-  case GCFINISHMARK:
-  case GCMOVESTART:
-#ifdef GC_PROFILE
-  case GCPROFILES:
-#endif
-#endif
-  {
-       msglength = 4;
-       break;
-  }
-
-#ifdef TASK
-  case LOCKREQUEST:
-#endif
-  case STATUSREPORT:
-#ifdef MULTICORE_GC
-  case GCFINISHCOMPACT:
-  case GCMARKREPORT:
-#endif
-  {
-       msglength = 5;
-       break;
-  }
-
-#ifdef TASK
-  case REDIRECTLOCK:
-  {
-    msglength = 6;
-    break;
-  }
-#endif
-
-#ifdef TASK
-  case TRANSOBJ:   // nonfixed size
+            }
 #endif
-#ifdef MULTICORE_GC
-  case GCLOBJINFO:
-#endif
-  {  // nonfixed size
-       if(size > 1) {
-         msglength = msgdata[(msgdataindex+1)&(BAMBOO_MSG_BUF_MASK)];
-       } else {
-         return -1;
-       }
-       break;
-  }
-
-  default:
-  {
-    BAMBOO_DEBUGPRINT_REG(type);
-       BAMBOO_DEBUGPRINT_REG(size);
-    BAMBOO_DEBUGPRINT_REG(msgdataindex);
-       BAMBOO_DEBUGPRINT_REG(msgdatalast);
-       BAMBOO_DEBUGPRINT_REG(msgdatafull);
-    int i = 6;
-    while(i-- > 0) {
-      BAMBOO_DEBUGPRINT(msgdata[msgdataindex+i]);
-    }
-    BAMBOO_EXIT(0xe001);
-    break;
-  }
-  }
-#ifndef CLOSE_PRINT
-  BAMBOO_DEBUGPRINT_REG(msgdata[msgdataindex]);
-  BAMBOO_DEBUGPRINT(0xffff);
-#endif
-  return msglength;
-}
-
-INLINE void processmsg_transtall_I() {
-  if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
-    // non startup core can not receive stall msg
-#ifndef CLOSE_PRINT
-    BAMBOO_DEBUGPRINT_REG(msgdata[msgdataindex] /*[1]*/);
-#endif
-    BAMBOO_EXIT(0xe002);
-  }
-  int num_core = msgdata[msgdataindex]; //[1]
-  MSG_INDEXINC_I();
-  int data2 = msgdata[msgdataindex]; //[2];
-  MSG_INDEXINC_I();
-  int data3 = msgdata[msgdataindex]; //[3];
-  MSG_INDEXINC_I();
-  if(num_core < NUMCORESACTIVE) {
-#ifndef CLOSE_PRINT
-    BAMBOO_DEBUGPRINT(0xe881);
-#endif
-    corestatus[num_core] = 0;
-    numsendobjs[num_core] = data2; //[2];
-    numreceiveobjs[num_core] = data3; //[3];
-  }
-}
-
-INLINE void processmsg_statusconfirm_I() {
-  if((BAMBOO_NUM_OF_CORE == STARTUPCORE)
-     || (BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1)) {
-    // wrong core to receive such msg
-    BAMBOO_EXIT(0xe003);
-  } else {
-    // send response msg
-#ifndef CLOSE_PRINT
-    BAMBOO_DEBUGPRINT(0xe887);
-#endif
-    // cache the msg first
-    if(BAMBOO_CHECK_SEND_MODE()) {
-         cache_msg_5(STARTUPCORE, STATUSREPORT,
-                                 busystatus ? 1 : 0, BAMBOO_NUM_OF_CORE,
-                                 self_numsendobjs, self_numreceiveobjs);
-    } else {
-         send_msg_5(STARTUPCORE, STATUSREPORT, busystatus?1:0,
-                                BAMBOO_NUM_OF_CORE, self_numsendobjs,
-                                self_numreceiveobjs, true);
-    }
-  }
-}
-
-INLINE void processmsg_statusreport_I() {
-  int data1 = msgdata[msgdataindex];
-  MSG_INDEXINC_I();
-  int data2 = msgdata[msgdataindex];
-  MSG_INDEXINC_I();
-  int data3 = msgdata[msgdataindex];
-  MSG_INDEXINC_I();
-  int data4 = msgdata[msgdataindex];
-  MSG_INDEXINC_I();
-  // receive a status confirm info
-  if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
-    // wrong core to receive such msg
-#ifndef CLOSE_PRINT
-    BAMBOO_DEBUGPRINT_REG(data2);
-#endif
-    BAMBOO_EXIT(0xe004);
-  } else {
-#ifndef CLOSE_PRINT
-    BAMBOO_DEBUGPRINT(0xe888);
-#endif
-    if(waitconfirm) {
-      numconfirm--;
-    }
-    corestatus[data2] = data1;
-    numsendobjs[data2] = data3;
-    numreceiveobjs[data2] = data4;
-  }
-}
-
-INLINE void processmsg_terminate_I() {
-#ifndef CLOSE_PRINT
-  BAMBOO_DEBUGPRINT(0xe889);
-#endif
-  disruntimedata();
-#ifdef MULTICORE_GC
-#ifdef GC_CACHE_ADAPT
-  bamboo_mask_timer_intr(); // disable the TILE_TIMER interrupt
-#endif // GC_CACHE_ADAPT
-#endif // MULTICORE_GC
-  BAMBOO_EXIT_APP(0);
-}
-
-INLINE void processmsg_memrequest_I() {
-  int data1 = msgdata[msgdataindex];
-  MSG_INDEXINC_I();
-  int data2 = msgdata[msgdataindex];
-  MSG_INDEXINC_I();
-  // receive a shared memory request msg
-  if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
-    // wrong core to receive such msg
-#ifndef CLOSE_PRINT
-    BAMBOO_DEBUGPRINT_REG(data2);
-#endif
-    BAMBOO_EXIT(0xe005);
-  } else {
-#ifndef CLOSE_PRINT
-    BAMBOO_DEBUGPRINT(0xe88a);
-#endif
-    int allocsize = 0;
-    void * mem = NULL;
-#ifdef MULTICORE_GC
-    if(gcprocessing && gcflag) {
-         // is currently doing GC and the master core does not decide to stop GC 
-         // yet
-    } else {
-         // either not doing GC or the master core has decided to stop GC but 
-         // still sending msgs to other cores to inform them to stop the GC
-#endif
-    mem = smemalloc_I(data2, data1, &allocsize);
-    if(mem != NULL) {
-      // send the start_va to request core, cache the msg first
-      if(BAMBOO_CHECK_SEND_MODE()) {
-               cache_msg_3(data2, MEMRESPONSE, mem, allocsize);
-      } else {
-               send_msg_3(data2, MEMRESPONSE, mem, allocsize, true);
-         }
-    } //else 
-         // if mem == NULL, the gcflag of the startup core has been set
-         // and all the other cores have been informed to start gc
-#ifdef MULTICORE_GC
-  }
-#endif
-  }
-}
-
-INLINE void processmsg_memresponse_I() {
-  int data1 = msgdata[msgdataindex];
-  MSG_INDEXINC_I();
-  int data2 = msgdata[msgdataindex];
-  MSG_INDEXINC_I();
-  // receive a shared memory response msg
-#ifndef CLOSE_PRINT
-  BAMBOO_DEBUGPRINT(0xe88b);
-#endif
-#ifdef MULTICORE_GC
-  // if is currently doing gc, dump this msg
-  if(!gcprocessing) {
-#endif
-  if(data2 == 0) {
-    bamboo_smem_size = 0;
-    bamboo_cur_msp = 0;
-#ifdef MULTICORE_GC
-       bamboo_smem_zero_top = 0;
-#endif
-  } else {
-#ifdef MULTICORE_GC
-    // fill header to store the size of this mem block
-    BAMBOO_MEMSET_WH(data1, '\0', BAMBOO_CACHE_LINE_SIZE); 
-    (*((int*)data1)) = data2;
-    bamboo_smem_size = data2 - BAMBOO_CACHE_LINE_SIZE;
-    bamboo_cur_msp = data1 + BAMBOO_CACHE_LINE_SIZE;
-       bamboo_smem_zero_top = bamboo_cur_msp;
-#else
-    bamboo_smem_size = data2;
-    bamboo_cur_msp =(void*)(data1);
-#endif
-  }
-  smemflag = true;
-#ifdef MULTICORE_GC
-  }
-#endif
-}
-
-#ifdef MULTICORE_GC
-INLINE void processmsg_gcstartpre_I() {
-  if(gcprocessing && gcflag) {
-       // already stall for gc
-  } else {
-       // the first time to be informed to start gc
-       gcflag = true;
-       if(!smemflag) {
-         // is waiting for response of mem request
-         // let it return NULL and start gc
-         bamboo_smem_size = 0;
-         bamboo_cur_msp = NULL;
-         smemflag = true;
-         bamboo_smem_zero_top = NULL;
-       }
-  }
-}
-
-INLINE void processmsg_gcstartinit_I() {
-  gcphase = INITPHASE;
-}
-
-INLINE void processmsg_gcstart_I() {
-#ifndef CLOSE_PRINT
-  BAMBOO_DEBUGPRINT(0xe88c);
-#endif
-  // set the GC flag
-  gcphase = MARKPHASE;
-}
-
-INLINE void processmsg_gcstartcompact_I() {
-  gcblock2fill = msgdata[msgdataindex];
-  MSG_INDEXINC_I();  //msgdata[1];
-  gcphase = COMPACTPHASE;
-}
-
-INLINE void processmsg_gcstartflush_I() {
-  gcphase = FLUSHPHASE;
-}
-
-INLINE void processmsg_gcfinishpre_I() {
-  int data1 = msgdata[msgdataindex];
-  MSG_INDEXINC_I();
-  int data2 = msgdata[msgdataindex];
-  MSG_INDEXINC_I();
-  int data3 = msgdata[msgdataindex];
-  MSG_INDEXINC_I();
-  // received a init phase finish msg
-  if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
-    // non startup core can not receive this msg
-#ifndef CLOSE_PRINT
-    BAMBOO_DEBUGPRINT_REG(data1);
-#endif
-    BAMBOO_EXIT(0xe006);
-  }
-  // All cores should do init GC
-  if(!gcprecheck) {
-       gcprecheck = true;
-  }
-  gccorestatus[data1] = 0;
-  gcnumsendobjs[0][data1] = data2;
-  gcnumreceiveobjs[0][data1] = data3;
-}
-
-INLINE void processmsg_gcfinishinit_I() {
-  int data1 = msgdata[msgdataindex];
-  MSG_INDEXINC_I();
-  // received a init phase finish msg
-  if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
-    // non startup core can not receive this msg
-#ifndef CLOSE_PRINT
-    BAMBOO_DEBUGPRINT_REG(data1);
-#endif
-    BAMBOO_EXIT(0xe007);
-  }
-#ifndef CLOSE_PRINT
-  BAMBOO_DEBUGPRINT(0xe88c);
-  BAMBOO_DEBUGPRINT_REG(data1);
-#endif
-  // All cores should do init GC
-  if(data1 < NUMCORESACTIVE) {
-    gccorestatus[data1] = 0;
-  }
-}
-
-INLINE void processmsg_gcfinishmark_I() {
-  int data1 = msgdata[msgdataindex];
-  MSG_INDEXINC_I();
-  int data2 = msgdata[msgdataindex];
-  MSG_INDEXINC_I();
-  int data3 = msgdata[msgdataindex];
-  MSG_INDEXINC_I();
-  // received a mark phase finish msg
-  if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
-    // non startup core can not receive this msg
-#ifndef CLOSE_PRINT
-    BAMBOO_DEBUGPRINT_REG(data1);
-#endif
-    BAMBOO_EXIT(0xe008);
-  }
-  // all cores should do mark
-  if(data1 < NUMCORESACTIVE) {
-    gccorestatus[data1] = 0;
-       int entry_index = 0;
-       if(waitconfirm)  {
-         // phase 2
-         entry_index = (gcnumsrobjs_index == 0) ? 1 : 0;
-       } else {
-         // phase 1
-         entry_index = gcnumsrobjs_index;
-       }
-    gcnumsendobjs[entry_index][data1] = data2;
-    gcnumreceiveobjs[entry_index][data1] = data3;
-  }
-}
-
-INLINE void processmsg_gcfinishcompact_I() {
-  if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
-    // non startup core can not receive this msg
-    // return -1
-#ifndef CLOSE_PRINT
-    BAMBOO_DEBUGPRINT_REG(msgdata[msgdataindex] /*[1]*/);
-#endif
-    BAMBOO_EXIT(0xe009);
-  }
-  int cnum = msgdata[msgdataindex];
-  MSG_INDEXINC_I();       //msgdata[1];
-  int filledblocks = msgdata[msgdataindex];
-  MSG_INDEXINC_I();       //msgdata[2];
-  int heaptop = msgdata[msgdataindex];
-  MSG_INDEXINC_I();       //msgdata[3];
-  int data4 = msgdata[msgdataindex];
-  MSG_INDEXINC_I();       //msgdata[4];
-  // only gc cores need to do compact
-  if(cnum < NUMCORES4GC) {
-    if(COMPACTPHASE == gcphase) {
-      gcfilledblocks[cnum] = filledblocks;
-      gcloads[cnum] = heaptop;
-    }
-    if(data4 > 0) {
-      // ask for more mem
-      int startaddr = 0;
-      int tomove = 0;
-      int dstcore = 0;
-      if(gcfindSpareMem_I(&startaddr, &tomove, &dstcore, data4, cnum)) {
-               // cache the msg first
-               if(BAMBOO_CHECK_SEND_MODE()) {
-                 cache_msg_4(cnum, GCMOVESTART, dstcore, startaddr, tomove);
-               } else {
-                 send_msg_4(cnum, GCMOVESTART, dstcore, startaddr, tomove, true);
-               }
+          } else {
+            isfirst = true;
+            busystatus = false;
+          }
+        }
       }
-    } else {
-      gccorestatus[cnum] = 0;
-    }  // if(data4>0)
-  }  // if(cnum < NUMCORES4GC)
-}
-
-INLINE void processmsg_gcfinishflush_I() {
-  int data1 = msgdata[msgdataindex];
-  MSG_INDEXINC_I();
-  // received a flush phase finish msg
-  if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
-    // non startup core can not receive this msg
-#ifndef CLOSE_PRINT
-    BAMBOO_DEBUGPRINT_REG(data1);
-#endif
-    BAMBOO_EXIT(0xe00a);
-  }
-  // all cores should do flush
-  if(data1 < NUMCORESACTIVE) {
-    gccorestatus[data1] = 0;
-  }
-}
-
-INLINE void processmsg_gcmarkconfirm_I() {
-  if((BAMBOO_NUM_OF_CORE == STARTUPCORE)
-     || (BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1)) {
-    // wrong core to receive such msg
-    BAMBOO_EXIT(0xe00b);
-  } else {
-       gcbusystatus = gc_moreItems2_I();
-    // send response msg, cahce the msg first
-    if(BAMBOO_CHECK_SEND_MODE()) {
-         cache_msg_5(STARTUPCORE, GCMARKREPORT, BAMBOO_NUM_OF_CORE,
-                                 gcbusystatus, gcself_numsendobjs,
-                                 gcself_numreceiveobjs);
-    } else {
-         send_msg_5(STARTUPCORE, GCMARKREPORT, BAMBOO_NUM_OF_CORE,
-                                gcbusystatus, gcself_numsendobjs,
-                                gcself_numreceiveobjs, true);
     }
   }
-}
-
-INLINE void processmsg_gcmarkreport_I() {
-  int data1 = msgdata[msgdataindex];
-  MSG_INDEXINC_I();
-  int data2 = msgdata[msgdataindex];
-  MSG_INDEXINC_I();
-  int data3 = msgdata[msgdataindex];
-  MSG_INDEXINC_I();
-  int data4 = msgdata[msgdataindex];
-  MSG_INDEXINC_I();
-  // received a marked phase finish confirm response msg
-  if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
-    // wrong core to receive such msg
-#ifndef CLOSE_PRINT
-    BAMBOO_DEBUGPRINT_REG(data2);
-#endif
-    BAMBOO_EXIT(0xe00c);
-  } else {
-       int entry_index = 0;
-    if(waitconfirm) {
-         // phse 2
-      numconfirm--;
-         entry_index = (gcnumsrobjs_index == 0) ? 1 : 0;
-    } else {
-         // can never reach here
-         BAMBOO_EXIT(0xe00d);
-       }
-    gccorestatus[data1] = data2;
-    gcnumsendobjs[entry_index][data1] = data3;
-    gcnumreceiveobjs[entry_index][data1] = data4;
-  }
-}
-
-INLINE void processmsg_gcmarkedobj_I() {
-  int data1 = msgdata[msgdataindex];
-  MSG_INDEXINC_I();
-  // received a markedObj msg
-  if(((int *)data1)[BAMBOOMARKBIT] == INIT) {
-    // this is the first time that this object is discovered,
-    // set the flag as DISCOVERED
-    ((int *)data1)[BAMBOOMARKBIT] = DISCOVERED;
-    gc_enqueue_I(data1);
-#ifdef GC_TBL_DEBUG
-       // for test
-       gcmappingtbl[OBJMAPPINGINDEX((unsigned int)data1)]=1;
-  } else if((((int *)data1)[BAMBOOMARKBIT] != DISCOVERED) && 
-         (((int *)data1)[BAMBOOMARKBIT] != MARKED)){
-       BAMBOO_EXIT(0xb0000000+((int *)data1)[0]);
-#endif
-  }
-  gcself_numreceiveobjs++;
-  gcbusystatus = true;
-}
-
-INLINE void processmsg_gcmovestart_I() {
-  gctomove = true;
-  gcdstcore = msgdata[msgdataindex];
-  MSG_INDEXINC_I();       //msgdata[1];
-  gcmovestartaddr = msgdata[msgdataindex];
-  MSG_INDEXINC_I();       //msgdata[2];
-  gcblock2fill = msgdata[msgdataindex];
-  MSG_INDEXINC_I();       //msgdata[3];
-}
-
-INLINE void processmsg_gclobjinfo_I() {
-  numconfirm--;
-
-  int data1 = msgdata[msgdataindex];
-  MSG_INDEXINC_I();
-  int data2 = msgdata[msgdataindex];
-  MSG_INDEXINC_I();
-  if(BAMBOO_NUM_OF_CORE > NUMCORES4GC - 1) {
-#ifndef CLOSE_PRINT
-    BAMBOO_DEBUGPRINT_REG(data2);
-#endif
-    BAMBOO_EXIT(0xe00e);
-  }
-  // store the mark result info
-  int cnum = data2;
-  gcloads[cnum] = msgdata[msgdataindex];
-  MSG_INDEXINC_I();       // msgdata[3];
-  int data4 = msgdata[msgdataindex];
-  MSG_INDEXINC_I();
-  if(gcheaptop < data4) {
-    gcheaptop = data4;
-  }
-  // large obj info here
-  for(int k = 5; k < data1; k+=2) {
-    int lobj = msgdata[msgdataindex];
-    MSG_INDEXINC_I();   //msgdata[k++];
-    int length = msgdata[msgdataindex];
-    MSG_INDEXINC_I();   //msgdata[k++];
-    gc_lobjenqueue_I(lobj, length, cnum);
-    gcnumlobjs++;
-  }  // for(int k = 5; k < msgdata[1];)
-}
-
-#ifdef GC_PROFILE
-INLINE void processmsg_gcprofiles_I() {
-  int data1 = msgdata[msgdataindex];
-  MSG_INDEXINC_I();
-  int data2 = msgdata[msgdataindex];
-  MSG_INDEXINC_I();
-  int data3 = msgdata[msgdataindex];
-  MSG_INDEXINC_I();
-#ifdef MGC_SPEC
-  if(gc_profile_flag) {
-#endif
-  gc_num_obj += data1;
-  gc_num_liveobj += data2;
-  gc_num_forwardobj += data3;
-#ifdef MGC_SPEC
-  }
-#endif
-  gc_num_profiles--;
-}
-#endif // GC_PROFILE
-
-#ifdef GC_CACHE_ADAPT
-INLINE void processmsg_gcstartpref_I() {
-  gcphase = PREFINISHPHASE;
-}
-
-INLINE void processmsg_gcfinishpref_I() {
-  int data1 = msgdata[msgdataindex];
-  MSG_INDEXINC_I();
-  // received a flush phase finish msg
-  if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
-    // non startup core can not receive this msg
-#ifndef CLOSE_PRINT
-    BAMBOO_DEBUGPRINT_REG(data1);
-#endif
-    BAMBOO_EXIT(0xe00f);
-  }
-  // all cores should do flush
-  if(data1 < NUMCORESACTIVE) {
-    gccorestatus[data1] = 0;
-  }
-}
-#endif // GC_CACHE_ADAPT
-#endif // #ifdef MULTICORE_GC
-
-// receive object transferred from other cores
-// or the terminate message from other cores
-// Should be invoked in critical sections!!
-// NOTICE: following format is for threadsimulate version only
-//         RAW version please see previous description
-// format: type + object
-// type: -1--stall msg
-//      !-1--object
-// return value: 0--received an object
-//               1--received nothing
-//               2--received a Stall Msg
-//               3--received a lock Msg
-//               RAW version: -1 -- received nothing
-//                            otherwise -- received msg type
-int receiveObject(int send_port_pending) {
-#ifdef TASK
-#ifdef PROFILE_INTERRUPT
-  if(!interruptInfoOverflow) {
-    InterruptInfo* intInfo = RUNMALLOC_I(sizeof(struct interrupt_info));
-    interruptInfoArray[interruptInfoIndex] = intInfo;
-    intInfo->startTime = BAMBOO_GET_EXE_TIME();
-    intInfo->endTime = -1;
-  }
-#endif // PROFILE_INTERRUPT
-#endif // TASK
-msg:
-  // get the incoming msgs
-  if(receiveMsg(send_port_pending) == -1) {
-    return -1;
-  }
-processmsg:
-  // processing received msgs
-  int size = 0;
-  MSG_REMAINSIZE_I(&size);
-  if((size == 0) || (checkMsgLength_I(size) == -1)) {
-    // not a whole msg
-    // have new coming msg
-    if((BAMBOO_MSG_AVAIL() != 0) && !msgdatafull) {
-      goto msg;
-    } else {
-      return -1;
-    }
-  }
-
-  if(msglength <= size) {
-    // have some whole msg
-    MSGTYPE type;
-    type = msgdata[msgdataindex]; //[0]
-    MSG_INDEXINC_I();
-    msgdatafull = false;
-    switch(type) {
-#ifdef TASK
-    case TRANSOBJ: {
-      // receive a object transfer msg
-      processmsg_transobj_I();
-      break;
-    }   // case TRANSOBJ
-#endif // TASK
-
-    case TRANSTALL: {
-      // receive a stall msg
-      processmsg_transtall_I();
-      break;
-    }   // case TRANSTALL
-
-#ifdef TASK
-// GC version have no lock msgs
-#ifndef MULTICORE_GC
-    case LOCKREQUEST: {
-      // receive lock request msg, handle it right now
-      processmsg_lockrequest_I();
-      break;
-    }   // case LOCKREQUEST
-
-    case LOCKGROUNT: {
-      // receive lock grount msg
-      processmsg_lockgrount_I();
-      break;
-    }   // case LOCKGROUNT
-
-    case LOCKDENY: {
-      // receive lock deny msg
-      processmsg_lockdeny_I();
-      break;
-    }   // case LOCKDENY
-
-    case LOCKRELEASE: {
-      processmsg_lockrelease_I();
-      break;
-    }   // case LOCKRELEASE
-#endif // #ifndef MULTICORE_GC
-
-#ifdef PROFILE
-    case PROFILEOUTPUT: {
-      // receive an output profile data request msg
-      processmsg_profileoutput_I();
-      break;
-    }   // case PROFILEOUTPUT
-
-    case PROFILEFINISH: {
-      // receive a profile output finish msg
-      processmsg_profilefinish_I();
-      break;
-    }   // case PROFILEFINISH
-#endif // #ifdef PROFILE
-
-// GC version has no lock msgs
-#ifndef MULTICORE_GC
-    case REDIRECTLOCK: {
-      // receive a redirect lock request msg, handle it right now
-      processmsg_redirectlock_I();
-      break;
-    }   // case REDIRECTLOCK
-
-    case REDIRECTGROUNT: {
-      // receive a lock grant msg with redirect info
-      processmsg_redirectgrount_I();
-      break;
-    }   // case REDIRECTGROUNT
-
-    case REDIRECTDENY: {
-      // receive a lock deny msg with redirect info
-      processmsg_redirectdeny_I();
-      break;
-    }   // case REDIRECTDENY
-
-    case REDIRECTRELEASE: {
-      // receive a lock release msg with redirect info
-      processmsg_redirectrelease_I();
-      break;
-    }   // case REDIRECTRELEASE
-#endif // #ifndef MULTICORE_GC
-#endif // TASK
-
-    case STATUSCONFIRM: {
-      // receive a status confirm info
-      processmsg_statusconfirm_I();
-      break;
-    }   // case STATUSCONFIRM
-
-    case STATUSREPORT: {
-      processmsg_statusreport_I();
-      break;
-    }   // case STATUSREPORT
-
-    case TERMINATE: {
-      // receive a terminate msg
-      processmsg_terminate_I();
-      break;
-    }   // case TERMINATE
-
-    case MEMREQUEST: {
-      processmsg_memrequest_I();
-      break;
-    }   // case MEMREQUEST
-
-    case MEMRESPONSE: {
-      processmsg_memresponse_I();
-      break;
-    }   // case MEMRESPONSE
-
-#ifdef MULTICORE_GC
-    // GC msgs
-    case GCSTARTPRE: {
-      processmsg_gcstartpre_I();
-      break;
-    }   // case GCSTARTPRE
-       
-       case GCSTARTINIT: {
-      processmsg_gcstartinit_I();
-      break;
-    }   // case GCSTARTINIT
-
-    case GCSTART: {
-      // receive a start GC msg
-      processmsg_gcstart_I();
-      break;
-    }   // case GCSTART
-
-    case GCSTARTCOMPACT: {
-      // a compact phase start msg
-      processmsg_gcstartcompact_I();
-      break;
-    }   // case GCSTARTCOMPACT
-
-    case GCSTARTFLUSH: {
-      // received a flush phase start msg
-      processmsg_gcstartflush_I();
-      break;
-    }   // case GCSTARTFLUSH
-
-    case GCFINISHPRE: {
-      processmsg_gcfinishpre_I();
-      break;
-    }   // case GCFINISHPRE
-       
-       case GCFINISHINIT: {
-      processmsg_gcfinishinit_I();
-      break;
-    }   // case GCFINISHINIT
-
-    case GCFINISHMARK: {
-      processmsg_gcfinishmark_I();
-      break;
-    }   // case GCFINISHMARK
-
-    case GCFINISHCOMPACT: {
-      // received a compact phase finish msg
-      processmsg_gcfinishcompact_I();
-      break;
-    }   // case GCFINISHCOMPACT
-
-    case GCFINISHFLUSH: {
-      processmsg_gcfinishflush_I();
-      break;
-    }   // case GCFINISHFLUSH
-
-    case GCFINISH: {
-      // received a GC finish msg
-      gcphase = FINISHPHASE;
-         smemflag = false; // TODO
-         gcprocessing = false;
-      break;
-    }   // case GCFINISH
-
-    case GCMARKCONFIRM: {
-      // received a marked phase finish confirm request msg
-      // all cores should do mark
-      processmsg_gcmarkconfirm_I();
-      break;
-    }   // case GCMARKCONFIRM
-
-    case GCMARKREPORT: {
-      processmsg_gcmarkreport_I();
-      break;
-    }   // case GCMARKREPORT
-
-    case GCMARKEDOBJ: {
-      processmsg_gcmarkedobj_I();
-      break;
-    }   // case GCMARKEDOBJ
-
-    case GCMOVESTART: {
-      // received a start moving objs msg
-      processmsg_gcmovestart_I();
-      break;
-    }   // case GCMOVESTART
-
-       case GCLOBJREQUEST: {
-      // received a large objs info request msg
-      transferMarkResults_I();
-      break;
-    }   // case GCLOBJREQUEST
-
-    case GCLOBJINFO: {
-      // received a large objs info response msg
-      processmsg_gclobjinfo_I();
-      break;
-    }   // case GCLOBJINFO
-
-#ifdef GC_PROFILE
-       case GCPROFILES: {
-      // received a gcprofiles msg
-      processmsg_gcprofiles_I();
-      break;
-    }
-#endif // GC_PROFILE
-
-#ifdef GC_CACHE_ADAPT
-       case GCSTARTPREF: {
-      // received a gcstartpref msg
-      processmsg_gcstartpref_I();
-      break;
-    }
-
-       case GCFINISHPREF: {
-      // received a gcfinishpref msg
-      processmsg_gcfinishpref_I();
-      break;
-    }
-#endif // GC_CACHE_ADAPT
-#endif // #ifdef MULTICORE_GC
-
-    default:
-      break;
-    }  // switch(type)
-    msglength = BAMBOO_MSG_BUF_LENGTH;
-
-    if((msgdataindex != msgdatalast) || (msgdatafull)) {
-      // still have available msg
-      goto processmsg;
-    }
-#ifndef CLOSE_PRINT
-    BAMBOO_DEBUGPRINT(0xe88d);
-#endif
-
-    // have new coming msg
-    if(BAMBOO_MSG_AVAIL() != 0) {
-      goto msg;
-    } // TODO
-
-#ifdef TASK
-#ifdef PROFILE_INTERRUPT
-  if(!interruptInfoOverflow) {
-    interruptInfoArray[interruptInfoIndex]->endTime=BAMBOO_GET_EXE_TIME();
-    interruptInfoIndex++;
-    if(interruptInfoIndex == INTERRUPTINFOLENGTH) {
-      interruptInfoOverflow = true;
-    }
-  }
-#endif
-#endif // TASK
-    return (int)type;
-  } else {
-    // not a whole msg
-#ifndef CLOSE_PRINT
-    BAMBOO_DEBUGPRINT(0xe88e);
-#endif
-    return -2;
-  }
-}
+} // run()
 
 #endif // MULTICORE
index c6d562a7c8ac5635c3516a626d811330f79b52a5..a4cfc158c73b8af512566c805785c1e49c4e1f3c 100644 (file)
@@ -1,21 +1,23 @@
-#ifndef MULTICORE_RUNTIME
-#define MULTICORE_RUNTIME
+#ifndef BAMBOO_MULTICORE_RUNTIME_H
+#define BAMBOO_MULTICORE_RUNTIME_H
+#ifdef MULTICORE
 #include "structdefs.h"
+#include "multicore.h"
+#include "multicoremsg.h"
+#include "multicoremem.h"
+#include "multicoretask.h"
+#include "multicoremgc.h"
 
-#ifndef INLINE
-#define INLINE    inline __attribute__((always_inline))
-#endif
-
-#ifndef bool
-#define bool int
-#define true 1
-#define false 0
-#endif
+#ifdef MULTICORE_GC
+#define GCCHECK(p) \
+  if(gcflag) gc(p)
+#else
+#define GCCHECK(p)
+#endif // MULTICORE_GC
 
 ////////////////////////////////////////////////////////////////
 // global variables                                          //
 ///////////////////////////////////////////////////////////////
-
 // record the starting time
 unsigned long long bamboo_start_time;
 bool stall;
@@ -24,246 +26,6 @@ int totalexetime;
 bool reside;
 #endif
 
-#ifdef MULTICORE
-#ifdef GC_SMALLPAGESIZE
-#define BAMBOO_GLOBAL_DEFS_SIZE (1024 * 1024)
-#define BAMBOO_GLOBAL_DEFS_PRIM_SIZE (1024 * 512)
-#else
-#define BAMBOO_GLOBAL_DEFS_SIZE (BAMBOO_SMEM_SIZE)
-#define BAMBOO_GLOBAL_DEFS_PRIM_SIZE (BAMBOO_SMEM_SIZE/2)
-#endif // GC_SMALLPAGESIZE
-#endif // MULTICORE
-
-#ifdef MGC
-// shared memory pointer for global thread queue
-// In MGC version, this block of memory is located at the very bottom of the 
-// shared memory with the base address as BAMBOO_BASE_VA.
-// The bottom of the shared memory = global thread queue + sbstart tbl 
-//                                  + smemtbl + NUMCORES4GC bamboo_rmsp
-// This queue is always reside at the bottom of the shared memory.  It is 
-// considered as runtime structure, during gc, it is scanned for mark and flush 
-// phase but never been compacted.
-//
-// This is a loop array and the first 4 int fields of the queue are:
-//     mutex + thread counter + start pointer + end pointer
-#ifdef GC_SMALLPAGESIZE
-#define BAMBOO_THREAD_QUEUE_SIZE (1024 * 1024)
-#else
-#define BAMBOO_THREAD_QUEUE_SIZE (BAMBOO_SMEM_SIZE) // (45 * 16 * 1024)
-#endif
-// data structures for threads
-unsigned int * bamboo_thread_queue;
-unsigned int bamboo_max_thread_num_mask;
-unsigned int bamboo_current_thread;
-
-extern int corenum;
-#endif // MGC
-
-// data structures for msgs
-#define BAMBOO_OUT_BUF_LENGTH 2048
-#define BAMBOO_OUT_BUF_MASK (0x7FF)
-#define BAMBOO_MSG_BUF_LENGTH 2048
-#define BAMBOO_MSG_BUF_MASK (0x7FF)
-int msgdata[BAMBOO_MSG_BUF_LENGTH];
-volatile int msgdataindex;
-volatile int msgdatalast;
-int msglength;
-volatile bool msgdatafull;
-int outmsgdata[BAMBOO_OUT_BUF_LENGTH];
-int outmsgindex;
-int outmsglast;
-int outmsgleft;
-volatile bool isMsgHanging;
-
-#define MSG_INDEXINC_I() \
-  msgdataindex = (msgdataindex + 1) & (BAMBOO_MSG_BUF_MASK) 
-
-#define MSG_LASTINDEXINC_I() \
-  msgdatalast = (msgdatalast + 1) & (BAMBOO_MSG_BUF_MASK)
-
-#define MSG_CACHE_I(n) \
-  msgdata[msgdatalast] = (n); \
-  MSG_LASTINDEXINC_I()
-
-// NOTE: if msgdataindex == msgdatalast, it always means that the buffer if
-//       full. In the case that the buffer is empty, should never call this
-//       MACRO
-#define MSG_REMAINSIZE_I(s) \
-  if(msgdataindex < msgdatalast) { \
-    (*(int*)s) = msgdatalast - msgdataindex; \
-  } else if((msgdataindex == msgdatalast) && (!msgdatafull)) { \
-    (*(int*)s) = 0; \
-  } else { \
-    (*(int*)s) = (BAMBOO_MSG_BUF_LENGTH) - msgdataindex + msgdatalast; \
-  }
-
-#define OUTMSG_INDEXINC() \
-  outmsgindex = (outmsgindex + 1) & (BAMBOO_OUT_BUF_MASK)
-
-#define OUTMSG_LASTINDEXINC() \
-  outmsglast = (outmsglast + 1) & (BAMBOO_OUT_BUF_MASK); \
-  if(outmsglast == outmsgindex) { \
-    BAMBOO_EXIT(0xd101); \
-  }
-
-#define OUTMSG_CACHE(n) \
-  outmsgdata[outmsglast] = (n); \
-  OUTMSG_LASTINDEXINC();
-
-#define MAX_PACKET_WORDS 5
-
-/* Message format:
- *      type + Msgbody
- * type: 1 -- transfer object
- *       2 -- transfer stall msg
- *       3 -- lock request
- *       4 -- lock grount
- *       5 -- lock deny
- *       6 -- lock release
- *       // add for profile info
- *       7 -- transfer profile output msg
- *       8 -- transfer profile output finish msg
- *       // add for alias lock strategy
- *       9 -- redirect lock request
- *       a -- lock grant with redirect info
- *       b -- lock deny with redirect info
- *       c -- lock release with redirect info
- *       d -- status confirm request
- *       e -- status report msg
- *       f -- terminate
- *      10 -- requiring for new memory
- *      11 -- response for new memory request
- *      12 -- GC init phase start
- *      13 -- GC start
- *      14 -- compact phase start
- *      15 -- flush phase start
- *      16 -- init phase finish
- *      17 -- mark phase finish
- *      18 -- compact phase finish
- *      19 -- flush phase finish
- *      1a -- GC finish
- *      1b -- marked phase finish confirm request
- *      1c -- marked phase finish confirm response
- *      1d -- markedObj msg
- *      1e -- start moving objs msg
- *      1f -- ask for mapping info of a markedObj
- *      20 -- mapping info of a markedObj
- *      21 -- large objs info request
- *      22 -- large objs info response
- *      23 -- large objs mapping info
- *
- * ObjMsg: 1 + size of msg + obj's address + (task index + param index)+
- * StallMsg: 2 + corenum + sendobjs + receiveobjs
- *             (size is always 4 * sizeof(int))
- * LockMsg: 3 + lock type + obj pointer + lock + request core
- *            (size is always 5 * sizeof(int))
- *          4/5/6 + lock type + obj pointer + lock
- *            (size is always 4 * sizeof(int))
- *          9 + lock type + obj pointer +  redirect lock + root request core
- *            + request core
- *            (size is always 6 * sizeof(int))
- *          a/b + lock type + obj pointer + redirect lock
- *              (size is always 4 * sizeof(int))
- *          c + lock type + lock + redirect lock
- *            (size is always 4 * sizeof(int))
- *          lock type: 0 -- read; 1 -- write
- * ProfileMsg: 7 + totalexetime
- *               (size is always 2 * sizeof(int))
- *             8 + corenum
- *               (size is always 2 * sizeof(int))
- * StatusMsg: d (size is always 1 * sizeof(int))
- *            e + status + corenum + sendobjs + receiveobjs
- *              (size is always 5 * sizeof(int))
- *            status: 0 -- stall; 1 -- busy
- * TerminateMsg: f (size is always 1 * sizeof(int)
- * MemoryMsg: 10 + size + corenum
- *              (size is always 3 * sizeof(int))
- *           11 + base_va + size
- *              (size is always 3 * sizeof(int))
- * GCMsg: 12/13 (size is always 1 * sizeof(int))
- *        14 + size of msg + (num of objs to move + (start address
- *           + end address + dst core + start dst)+)?
- *           + (num of incoming objs + (start dst + orig core)+)?
- *           + (num of large obj lists + (start address + lenght
- *           + start dst)+)?
- *        15 (size is always 1 * sizeof(int))
- *        16 + corenum
- *           (size is always 2 * sizeof(int))
- *        17 + corenum + gcsendobjs + gcreceiveobjs
- *           (size if always 4 * sizeof(int))
- *        18 + corenum + fulfilled blocks num + (finish compact(1) + current
- *           heap top)/(need mem(0) + mem need)
- *           size is always 5 * sizeof(int))
- *        19 + corenum
- *              (size is always 2 * sizeof(int))
- *        1a (size is always 1 * sizeof(int))
- *        1b (size if always 1 * sizeof(int))
- *        1c + size of msg + corenum + gcsendobjs + gcreceiveobjs
- *           (size is always 5 * sizeof(int))
- *        1d + obj's address + request core
- *           (size is always 3 * sizeof(int))
- *        1e + corenum + start addr + end addr
- *           (size if always 4 * sizeof(int))
- *        1f + obj's address + corenum
- *           (size is always 3 * sizeof(int))
- *        20 + obj's address + dst address
- *           (size if always 3 * sizeof(int))
- *        21 (size is always 1 * sizeof(int))
- *        22 + size of msg + corenum + current heap size
- *           + (num of large obj lists + (start address + length)+)?
- *        23 + orig large obj ptr + new large obj ptr
- *            (size is always 3 * sizeof(int))
- */
-typedef enum {
-  MSGSTART = 0xD0,       // 0xD0
-  TRANSOBJ,              // 0xD1
-  TRANSTALL,             // 0xD2
-  LOCKREQUEST,           // 0xD3
-  LOCKGROUNT,            // 0xD4
-  LOCKDENY,              // 0xD5
-  LOCKRELEASE,           // 0xD6
-  PROFILEOUTPUT,         // 0xD7
-  PROFILEFINISH,         // 0xD8
-  REDIRECTLOCK,          // 0xD9
-  REDIRECTGROUNT,        // 0xDa
-  REDIRECTDENY,          // 0xDb
-  REDIRECTRELEASE,       // 0xDc
-  STATUSCONFIRM,         // 0xDd
-  STATUSREPORT,          // 0xDe
-  TERMINATE,             // 0xDf
-  MEMREQUEST,            // 0xE0
-  MEMRESPONSE,           // 0xE1
-#ifdef MULTICORE_GC
-  GCSTARTPRE,            // 0xE2
-  GCSTARTINIT,           // 0xE3
-  GCSTART,               // 0xE4
-  GCSTARTCOMPACT,        // 0xE5
-  GCSTARTFLUSH,          // 0xE6
-  GCFINISHPRE,           // 0xE7
-  GCFINISHINIT,          // 0xE8
-  GCFINISHMARK,          // 0xE9
-  GCFINISHCOMPACT,       // 0xEa
-  GCFINISHFLUSH,         // 0xEb
-  GCFINISH,              // 0xEc
-  GCMARKCONFIRM,         // 0xEd
-  GCMARKREPORT,          // 0xEe
-  GCMARKEDOBJ,           // 0xEf
-  GCMOVESTART,           // 0xF0
-  GCLOBJREQUEST,         // 0xF1   
-  GCLOBJINFO,            // 0xF2
-#ifdef GC_PROFILE
-  GCPROFILES,            // 0xF3
-#endif // GC_PROFILE
-#ifdef GC_CACHE_ADAPT
-  GCSTARTPOSTINIT,       // 0xF4
-  GCSTARTPREF,           // 0xF5
-  GCFINISHPOSTINIT,      // 0xF6
-  GCFINISHPREF,          // 0xF7
-#endif // GC_CACHE_ADAPT
-#endif // MULTICORE_GC
-  MSGEND
-} MSGTYPE;
-
 /////////////////////////////////////////////////////////////////////////////////
 // NOTE: BAMBOO_TOTALCORE -- number of the available cores in the processor.
 //                           No greater than the number of all the cores in
@@ -292,76 +54,12 @@ bool busystatus;
 int self_numsendobjs;
 int self_numreceiveobjs;
 
-// TASK specific data structures
-#ifdef TASK
-// get rid of lock msgs for GC version
-#ifndef MULTICORE_GC
-// data structures for locking
-struct RuntimeHash locktable;
-static struct RuntimeHash* locktbl = &locktable;
-struct RuntimeHash * lockRedirectTbl;
-struct RuntimeHash * objRedirectLockTbl;
-#endif // ifndef MULTICORE_GC
-struct LockValue {
-  int redirectlock;
-  int value;
-};
-int lockobj;
-int lock2require;
-int lockresult;
-bool lockflag;
-
-// data structures for waiting objs
-struct Queue objqueue;
-struct Queue * totransobjqueue; // queue to hold objs to be transferred
-                                // should be cleared whenever enter a task
-                                       
-// for test TODO
-int total_num_t6;
-
-// data structures for profile mode
-#ifdef PROFILE
-#define TASKINFOLENGTH 3000 // 0
-#ifdef PROFILE_INTERRUPT
-#define INTERRUPTINFOLENGTH 50 //0
-#endif // PROFILE_INTERRUPT
-
-typedef struct task_info {
-  char* taskName;
-  unsigned long long startTime;
-  unsigned long long endTime;
-  unsigned long long exitIndex;
-  struct Queue * newObjs;
-} TaskInfo;
-
-TaskInfo * taskInfoArray[TASKINFOLENGTH];
-int taskInfoIndex;
-bool taskInfoOverflow;
-#ifdef PROFILE_INTERRUPT
-typedef struct interrupt_info {
-  unsigned long long startTime;
-  unsigned long long endTime;
-} InterruptInfo;
-
-InterruptInfo * interruptInfoArray[INTERRUPTINFOLENGTH];
-int interruptInfoIndex;
-bool interruptInfoOverflow;
-#endif // PROFILE_INTERUPT
-volatile int profilestatus[NUMCORESACTIVE]; // records status of each core
-                                            // 1: running tasks
-                                            // 0: stall
-#endif // #ifdef PROFILE
-#endif // TASK
-
-#include "multicoremem.h"
-
 /////////////////////////////////////////////////////////////
 
 ////////////////////////////////////////////////////////////
 // these are functions should be implemented in           //
 // multicore runtime for any multicore processors         //
 ////////////////////////////////////////////////////////////
-#ifdef MULTICORE
 INLINE void initialization(void);
 INLINE void initCommunication(void);
 INLINE void fakeExecution(void);
@@ -371,109 +69,6 @@ INLINE void initlock(struct ___Object___ * v);
 INLINE void terminatememprof(void);
 #endif // BAMBOO_MEMPROF
 
-// msg related functions
-INLINE void send_hanging_msg(bool isInterrupt);
-INLINE void send_msg_1(int targetcore,
-                       unsigned long n0,
-                                          bool isInterrupt);
-INLINE void send_msg_2(int targetcore,
-                       unsigned long n0,
-                       unsigned long n1,
-                                          bool isInterrupt);
-INLINE void send_msg_3(int targetcore,
-                       unsigned long n0,
-                       unsigned long n1,
-                       unsigned long n2,
-                                          bool isInterrupt);
-INLINE void send_msg_4(int targetcore,
-                       unsigned long n0,
-                       unsigned long n1,
-                       unsigned long n2,
-                       unsigned long n3,
-                                          bool isInterrupt);
-INLINE void send_msg_5(int targetcore,
-                       unsigned long n0,
-                       unsigned long n1,
-                       unsigned long n2,
-                       unsigned long n3,
-                       unsigned long n4,
-                                          bool isInterrupt);
-INLINE void send_msg_6(int targetcore,
-                       unsigned long n0,
-                       unsigned long n1,
-                       unsigned long n2,
-                       unsigned long n3,
-                       unsigned long n4,
-                       unsigned long n5,
-                                          bool isInterrupt);
-INLINE void cache_msg_1(int targetcore,
-                        unsigned long n0);
-INLINE void cache_msg_2(int targetcore,
-                        unsigned long n0,
-                        unsigned long n1);
-INLINE void cache_msg_3(int targetcore,
-                        unsigned long n0,
-                        unsigned long n1,
-                        unsigned long n2);
-INLINE void cache_msg_4(int targetcore,
-                        unsigned long n0,
-                        unsigned long n1,
-                        unsigned long n2,
-                        unsigned long n3);
-INLINE void cache_msg_5(int targetcore,
-                        unsigned long n0,
-                        unsigned long n1,
-                        unsigned long n2,
-                        unsigned long n3,
-                        unsigned long n4);
-INLINE void cache_msg_6(int targetcore,
-                        unsigned long n0,
-                        unsigned long n1,
-                        unsigned long n2,
-                        unsigned long n3,
-                        unsigned long n4,
-                        unsigned long n5);
-INLINE int receiveMsg(unsigned int send_port_pending);
-
-#ifdef MULTICORE_GC
-INLINE void transferMarkResults();
-#endif // MULTICORE_GC
-
-#ifdef TASK
-// lock related functions
-bool getreadlock(void* ptr);
-void releasereadlock(void* ptr);
-bool getwritelock(void* ptr);
-void releasewritelock(void* ptr);
-bool getwritelock_I(void* ptr);
-void releasewritelock_I(void * ptr);
-#ifndef MULTICORE_GC
-void releasewritelock_r(void * lock, void * redirectlock);
-#endif // ifndef MULTICORE_GC
-/* this function is to process lock requests.
- * can only be invoked in receiveObject() */
-// if return -1: the lock request is redirected
-//            0: the lock request is approved
-//            1: the lock request is denied
-INLINE int processlockrequest(int locktype,
-                              int lock,
-                              int obj,
-                              int requestcore,
-                              int rootrequestcore,
-                              bool cache);
-INLINE void processlockrelease(int locktype,
-                               int lock,
-                               int redirectlock,
-                               bool redirect);
-
-// msg related functions
-INLINE void transferObject(struct transObjInfo * transObj);
-
-#ifdef PROFILE
-INLINE void profileTaskStart(char * taskname);
-INLINE void profileTaskEnd(void);
-void outputProfileData();
-#endif  // #ifdef PROFILE
 ///////////////////////////////////////////////////////////
 
 /////////////////////////////////////////////////////////////////////////////
@@ -482,6 +77,8 @@ void outputProfileData();
 // BAMBOO_NUM_OF_CORE: the # of current residing core                      //
 // BAMBOO_GET_NUM_OF_CORE(): compute the # of current residing core        //
 // BAMBOO_COORDS(c, x, y): convert the cpu # to coords (*x, *y)            //
+// BAMBOO_COORDS_X(c): convert the cpu # to coords x                       //
+// BAMBOO_COORDS_Y(c): convert the cpu # to coordsy                        //
 // BAMBOO_DEBUGPRINT(x): print out integer x                               //
 // BAMBOO_DEBUGPRINT_REG(x): print out value of variable x                 //
 // BAMBOO_EXIT_APP(x): exit the whole application                          //
@@ -537,6 +134,5 @@ void outputProfileData();
 //                                      stores to incoherent memory        //
 /////////////////////////////////////////////////////////////////////////////
 
-#endif  // #ifdef TASK
 #endif  // #ifdef MULTICORE
-#endif  // #ifndef MULTICORE_RUNTIME
+#endif  // BAMBOO_MULTICORE_RUNTIME_H
index 363df0263d413dfd22b6584e1ccce75ae141267c..837ef6ced4f708f3e713ab57ded3da2deaea0aca 100644 (file)
@@ -1,12 +1,12 @@
 #ifdef TASK
 #include "runtime.h"
 #include "multicoreruntime.h"
-#include "runtime_arch.h"
-#include "GenericHashtable.h"
+#include "multicoretaskprofile.h"
+#include "multicoretask.h"
 
 #ifndef INLINE
 #define INLINE    inline __attribute__((always_inline))
-#endif // #ifndef INLINE
+#endif 
 
 //  data structures for task invocation
 struct genhashtable * activetasks;
@@ -24,43 +24,10 @@ int enqueuetasks_I(struct parameterwrapper *parameter,
                    int * enterflags,
                    int numenterflags);
 
-INLINE void inittaskdata() {
-  int i = 0;
-  
-  if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
-    // startup core to initialize corestatus[]
-    for(i = 0; i < NUMCORESACTIVE; ++i) {
-#ifdef PROFILE
-      // initialize the profile data arrays
-      profilestatus[i] = 1;
-#endif // PROFILE
-    } // for(i = 0; i < NUMCORESACTIVE; ++i)
-    total_num_t6 = 0; // TODO for test
-  }
-  totransobjqueue = createQueue_I();
-  objqueue.head = NULL;
-  objqueue.tail = NULL;
-
-  currtpd = NULL;
-
-#ifdef PROFILE
-  stall = false;
-  totalexetime = -1;
-  taskInfoIndex = 0;
-  taskInfoOverflow = false;
-#ifdef PROFILE_INTERRUPT
-  interruptInfoIndex = 0;
-  interruptInfoOverflow = false;
-#endif // PROFILE_INTERRUPT
-#endif // PROFILE
-
-  for(i = 0; i < MAXTASKPARAMS; i++) {
-    runtime_locks[i].redirectlock = 0;
-    runtime_locks[i].value = 0;
-  }
-  runtime_locklen = 0;
-
-#ifndef MULTICORE_GC
+INLINE void initlocktable() {
+#ifdef MULTICORE_GC
+  // do nothing
+#else
   // create the lock table, lockresult table and obj queue
   locktable.size = 20;
   locktable.bucket =
@@ -79,6 +46,34 @@ INLINE void inittaskdata() {
 #endif
 }
 
+INLINE void dislocktable() {
+#ifdef MULTICORE_GC
+  // do nothing
+#else
+  freeRuntimeHash(lockRedirectTbl);
+  freeRuntimeHash(objRedirectLockTbl);
+  RUNFREE(locktable.bucket);
+#endif
+}
+
+INLINE void inittaskdata() {
+  if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
+    // startup core to initialize corestatus[]
+    total_num_t6 = 0; // TODO for test
+  }
+  totransobjqueue = createQueue_I();
+  objqueue.head = NULL;
+  objqueue.tail = NULL;
+  currtpd = NULL;
+  for(int i = 0; i < MAXTASKPARAMS; i++) {
+    runtime_locks[i].redirectlock = 0;
+    runtime_locks[i].value = 0;
+  }
+  runtime_locklen = 0;
+  initlocktable();
+  INIT_TASKPROFILE_DATA();
+}
+
 INLINE void distaskdata() {
   if(activetasks != NULL) {
     genfreehashtable(activetasks);
@@ -88,11 +83,7 @@ INLINE void distaskdata() {
     RUNFREE(currtpd);
     currtpd = NULL;
   }
-#ifndef MULTICORE_GC
-  freeRuntimeHash(lockRedirectTbl);
-  freeRuntimeHash(objRedirectLockTbl);
-  RUNFREE(locktable.bucket);
-#endif
+  dislocktable();
 }
 
 INLINE bool checkObjQueue() {
@@ -100,14 +91,8 @@ INLINE bool checkObjQueue() {
   struct transObjInfo * objInfo = NULL;
   int grount = 0;
 
-#ifdef PROFILE
 #ifdef ACCURATEPROFILE
-  bool isChecking = false;
-  if(!isEmpty(&objqueue)) {
-    profileTaskStart("objqueue checking");
-    isChecking = true;
-  }  // if(!isEmpty(&objqueue))
-#endif
+  PROFILE_TASK_START("objqueue checking");
 #endif
 
   while(!isEmpty(&objqueue)) {
@@ -121,14 +106,14 @@ INLINE bool checkObjQueue() {
     BAMBOO_DEBUGPRINT_REG((int)obj);
     // grab lock and flush the obj
     grount = 0;
-       struct ___Object___ * tmpobj = (struct ___Object___ *)obj;
-       while(tmpobj->lock != NULL) {
-         tmpobj = (struct ___Object___ *)(tmpobj->lock);
-       }
+    struct ___Object___ * tmpobj = (struct ___Object___ *)obj;
+    while(tmpobj->lock != NULL) {
+      tmpobj = (struct ___Object___ *)(tmpobj->lock);
+    }
     getwritelock_I(tmpobj);
     while(!lockflag) {
       BAMBOO_WAITING_FOR_LOCK(0);
-    }   // while(!lockflag)
+    } 
     grount = lockresult;
     BAMBOO_DEBUGPRINT_REG(grount);
 
@@ -143,11 +128,9 @@ INLINE bool checkObjQueue() {
     if(grount == 1) {
       int k = 0;
       // flush the object
-#ifdef CACHEFLUSH
       BAMBOO_CACHE_FLUSH_RANGE((int)obj,sizeof(int));
-      BAMBOO_CACHE_FLUSH_RANGE((int)obj,
-                 classsize[((struct ___Object___ *)obj)->type]);
-#endif
+      BAMBOO_CACHE_FLUSH_RANGE((int)obj, 
+          classsize[((struct ___Object___ *)obj)->type]);
       // enqueue the object
       for(k = 0; k < objInfo->length; ++k) {
                int taskindex = objInfo->queues[2 * k];
@@ -158,10 +141,10 @@ INLINE bool checkObjQueue() {
                BAMBOO_DEBUGPRINT_REG(paramindex);
                enqueueObject_I(obj, queues, 1);
                BAMBOO_DEBUGPRINT_REG(hashsize(activetasks));
-      }  // for(k = 0; k < objInfo->length; ++k)
+      } 
       releasewritelock_I(tmpobj);
-      RUNFREE(objInfo->queues);
-      RUNFREE(objInfo);
+      RUNFREE_I(objInfo->queues);
+      RUNFREE_I(objInfo);
     } else {
       // can not get lock
       // put it at the end of the queue if no update version in the queue
@@ -173,31 +156,27 @@ INLINE bool checkObjQueue() {
                if(tmpinfo->objptr == obj) {
                  // the same object in the queue, which should be enqueued
                  // recently. Current one is outdate, do not re-enqueue it
-                 RUNFREE(objInfo->queues);
-                 RUNFREE(objInfo);
+                 RUNFREE_I(objInfo->queues);
+                 RUNFREE_I(objInfo);
                  goto objqueuebreak;
                } else {
                  prev = qitem;
-               }  // if(tmpinfo->objptr == obj)
+               } 
                qitem = getNextQueueItem(prev);
-         }  // while(qitem != NULL)
+         } 
       // try to execute active tasks already enqueued first
       addNewItem_I(&objqueue, objInfo);
 objqueuebreak:
       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
       BAMBOO_DEBUGPRINT(0xf000);
       break;
-    }  // if(grount == 1)
+    } 
     BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
     BAMBOO_DEBUGPRINT(0xf000);
-  }  // while(!isEmpty(&objqueue))
+  }
 
-#ifdef PROFILE
 #ifdef ACCURATEPROFILE
-  if(isChecking) {
-    profileTaskEnd();
-  }  // if(isChecking)
-#endif
+  PROFILE_TASK_END();
 #endif
 
   BAMBOO_DEBUGPRINT(0xee02);
@@ -209,7 +188,7 @@ struct ___createstartupobject____I_locals {
   void * next;
   struct  ___StartupObject___ * ___startupobject___;
   struct ArrayObject * ___stringarray___;
-}; // struct ___createstartupobject____I_locals
+};
 
 void createstartupobject(int argc,
                          char ** argv) {
@@ -250,9 +229,7 @@ void createstartupobject(int argc,
   /* Set initialized flag for startup object */
   flagorandinit(startupobject,1,0xFFFFFFFF);
   enqueueObject(startupobject, NULL, 0);
-#ifdef CACHEFLUSH
   BAMBOO_CACHE_FLUSH_ALL();
-#endif
 }
 
 int hashCodetpd(struct taskparamdescriptor *ftd) {
@@ -493,34 +470,30 @@ void flagorand(void * ptr,
                int andmask,
                struct parameterwrapper ** queues,
                int length) {
-  {
-    int oldflag=((int *)ptr)[1];
-    int flag=ormask|oldflag;
-    flag&=andmask;
-    flagbody(ptr, flag, queues, length, false);
-  }
+  int oldflag=((int *)ptr)[2]; // the flag field is now the third one
+  int flag=ormask|oldflag;
+  flag&=andmask;
+  flagbody(ptr, flag, queues, length, false);
 }
 
 bool intflagorand(void * ptr,
                   int ormask,
                   int andmask) {
-  {
-    int oldflag=((int *)ptr)[1];
-    int flag=ormask|oldflag;
-    flag&=andmask;
-    if (flag==oldflag)   /* Don't do anything */
-      return false;
-    else {
-      flagbody(ptr, flag, NULL, 0, false);
-      return true;
-    }
+  int oldflag=((int *)ptr)[2]; // the flag field is the third one
+  int flag=ormask|oldflag;
+  flag&=andmask;
+  if (flag==oldflag)   /* Don't do anything */
+    return false;
+  else {
+    flagbody(ptr, flag, NULL, 0, false);
+    return true;
   }
 }
 
 void flagorandinit(void * ptr,
                    int ormask,
                    int andmask) {
-  int oldflag=((int *)ptr)[1];
+  int oldflag=((int *)ptr)[2]; // the flag field is the third one
   int flag=ormask|oldflag;
   flag&=andmask;
   flagbody(ptr,flag,NULL,0,true);
@@ -815,339 +788,15 @@ void addAliasLock(void * ptr,
     // originally no alias lock associated or have a different alias lock
     // flush it as the new one
 #ifdef TILERA_BME
-       while(obj->lock != NULL) {
-         // previously have alias lock, trace the 'root' obj and redirect it
-         obj = (struct ___Object___ *)(obj->lock);
-       
+    while(obj->lock != NULL) {
+      // previously have alias lock, trace the 'root' obj and redirect it
+      obj = (struct ___Object___ *)(obj->lock);
+    } 
 #endif // TILERA_BME
     obj->lock = (int *)lock;
   }
 }
 
-#ifdef PROFILE
-inline void setTaskExitIndex(int index) {
-  taskInfoArray[taskInfoIndex]->exitIndex = index;
-}
-
-inline void addNewObjInfo(void * nobj) {
-  if(taskInfoArray[taskInfoIndex]->newObjs == NULL) {
-    taskInfoArray[taskInfoIndex]->newObjs = createQueue();
-  }
-  addNewItem(taskInfoArray[taskInfoIndex]->newObjs, nobj);
-}
-#endif
-
-INLINE void processmsg_transobj_I() {
-  MSG_INDEXINC_I();
-  struct transObjInfo * transObj=RUNMALLOC_I(sizeof(struct transObjInfo));
-  int k = 0;
-#ifndef CLOSE_PRINT
-  BAMBOO_DEBUGPRINT(0xe880);
-#endif
-  if(BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1) {
-#ifndef CLOSE_PRINT
-    BAMBOO_DEBUGPRINT_REG(msgdata[msgdataindex] /*[2]*/);
-#endif
-    BAMBOO_EXIT(0xe201);
-  }
-  // store the object and its corresponding queue info, enqueue it later
-  transObj->objptr = (void *)msgdata[msgdataindex];  //[2]
-  MSG_INDEXINC_I();
-  transObj->length = (msglength - 3) / 2;
-  transObj->queues = RUNMALLOC_I(sizeof(int)*(msglength - 3));
-  for(k = 0; k < transObj->length; ++k) {
-    transObj->queues[2*k] = msgdata[msgdataindex];   //[3+2*k];
-    MSG_INDEXINC_I();
-    transObj->queues[2*k+1] = msgdata[msgdataindex]; //[3+2*k+1];
-    MSG_INDEXINC_I();
-  }
-  // check if there is an existing duplicate item
-  {
-    struct QueueItem * qitem = getHead(&objqueue);
-    struct QueueItem * prev = NULL;
-    while(qitem != NULL) {
-      struct transObjInfo * tmpinfo =
-        (struct transObjInfo *)(qitem->objectptr);
-      if(tmpinfo->objptr == transObj->objptr) {
-               // the same object, remove outdate one
-               RUNFREE(tmpinfo->queues);
-               RUNFREE(tmpinfo);
-               removeItem(&objqueue, qitem);
-               //break;
-      } else {
-               prev = qitem;
-      }
-      if(prev == NULL) {
-               qitem = getHead(&objqueue);
-      } else {
-               qitem = getNextQueueItem(prev);
-      }
-    }
-    addNewItem_I(&objqueue, (void *)transObj);
-  }
-  ++(self_numreceiveobjs);
-#ifdef MULTICORE_GC
-  if(gcprocessing) {
-       if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
-         // set the gcprecheck to enable checking again
-         gcprecheck = true;
-       } else {
-         // send a update pregc information msg to the master core
-         if(BAMBOO_CHECK_SEND_MODE()) {
-               cache_msg_4(STARTUPCORE, GCFINISHPRE, BAMBOO_NUM_OF_CORE, 
-                       self_numsendobjs, self_numreceiveobjs);
-         } else {
-               send_msg_4(STARTUPCORE, GCFINISHPRE, BAMBOO_NUM_OF_CORE, 
-                       self_numsendobjs, self_numreceiveobjs, true);
-         }
-       }
-  }
-#endif 
-}
-
-#ifndef MULTICORE_GC
-INLINE void processmsg_lockrequest_I() {
-  // check to see if there is a lock exist for the required obj
-  // msgdata[1] -> lock type
-  int locktype = msgdata[msgdataindex]; //[1];
-  MSG_INDEXINC_I();
-  int data2 = msgdata[msgdataindex];  // obj pointer
-  MSG_INDEXINC_I();
-  int data3 = msgdata[msgdataindex];  // lock
-  MSG_INDEXINC_I();
-  int data4 = msgdata[msgdataindex];  // request core
-  MSG_INDEXINC_I();
-  // -1: redirected, 0: approved, 1: denied
-  int deny=processlockrequest(locktype, data3, data2, data4, data4, true);
-  if(deny == -1) {
-    // this lock request is redirected
-    return;
-  } else {
-    // send response msg
-    // for 32 bit machine, the size is always 4 words, cache the msg first
-    int tmp = deny==1 ? LOCKDENY : LOCKGROUNT;
-    if(BAMBOO_CHECK_SEND_MODE()) {
-         cache_msg_4(data4, tmp, locktype, data2, data3);
-    } else {
-         send_msg_4(data4, tmp, locktype, data2, data3, true);
-    }
-  }
-}
-
-INLINE void processmsg_lockgrount_I() {
-  MSG_INDEXINC_I();
-  if(BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1) {
-#ifndef CLOSE_PRINT
-    BAMBOO_DEBUGPRINT_REG(msgdata[msgdataindex] /*[2]*/);
-#endif
-    BAMBOO_EXIT(0xe202);
-  }
-  int data2 = msgdata[msgdataindex];
-  MSG_INDEXINC_I();
-  int data3 = msgdata[msgdataindex];
-  MSG_INDEXINC_I();
-  if((lockobj == data2) && (lock2require == data3)) {
-#ifndef CLOSE_PRINT
-    BAMBOO_DEBUGPRINT(0xe882);
-#endif
-    lockresult = 1;
-    lockflag = true;
-#ifndef INTERRUPT
-    reside = false;
-#endif
-  } else {
-    // conflicts on lockresults
-#ifndef CLOSE_PRINT
-    BAMBOO_DEBUGPRINT_REG(data2);
-#endif
-    BAMBOO_EXIT(0xe203);
-  }
-}
-
-INLINE void processmsg_lockdeny_I() {
-  MSG_INDEXINC_I();
-  int data2 = msgdata[msgdataindex];
-  MSG_INDEXINC_I();
-  int data3 = msgdata[msgdataindex];
-  MSG_INDEXINC_I();
-  if(BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1) {
-#ifndef CLOSE_PRINT
-    BAMBOO_DEBUGPRINT_REG(data2);
-#endif
-    BAMBOO_EXIT(0xe204);
-  }
-  if((lockobj == data2) && (lock2require == data3)) {
-#ifndef CLOSE_PRINT
-    BAMBOO_DEBUGPRINT(0xe883);
-#endif
-    lockresult = 0;
-    lockflag = true;
-#ifndef INTERRUPT
-    reside = false;
-#endif
-  } else {
-    // conflicts on lockresults
-#ifndef CLOSE_PRINT
-    BAMBOO_DEBUGPRINT_REG(data2);
-#endif
-    BAMBOO_EXIT(0xe205);
-  }
-}
-
-INLINE void processmsg_lockrelease_I() {
-  int data1 = msgdata[msgdataindex];
-  MSG_INDEXINC_I();
-  int data2 = msgdata[msgdataindex];
-  MSG_INDEXINC_I();
-  int data3 = msgdata[msgdataindex];
-  MSG_INDEXINC_I();
-  // receive lock release msg
-  processlockrelease(data1, data2, 0, false);
-}
-
-INLINE void processmsg_redirectlock_I() {
-  // check to see if there is a lock exist for the required obj
-  int data1 = msgdata[msgdataindex];
-  MSG_INDEXINC_I();    //msgdata[1]; // lock type
-  int data2 = msgdata[msgdataindex];
-  MSG_INDEXINC_I();    //msgdata[2]; // obj pointer
-  int data3 = msgdata[msgdataindex];
-  MSG_INDEXINC_I();    //msgdata[3]; // redirect lock
-  int data4 = msgdata[msgdataindex];
-  MSG_INDEXINC_I();    //msgdata[4]; // root request core
-  int data5 = msgdata[msgdataindex];
-  MSG_INDEXINC_I();    //msgdata[5]; // request core
-  int deny = processlockrequest(data1, data3, data2, data5, data4, true);
-  if(deny == -1) {
-    // this lock request is redirected
-    return;
-  } else {
-    // send response msg
-    // for 32 bit machine, the size is always 4 words, cache the msg first
-    if(BAMBOO_CHECK_SEND_MODE()) {
-         cache_msg_4(data4, deny==1 ? REDIRECTDENY : REDIRECTGROUNT,
-                                 data1, data2, data3);
-    } else {
-         send_msg_4(data4, deny==1?REDIRECTDENY:REDIRECTGROUNT,
-                                data1, data2, data3, true);
-    }
-  }
-}
-
-INLINE void processmsg_redirectgrount_I() {
-  MSG_INDEXINC_I();
-  int data2 = msgdata[msgdataindex];
-  MSG_INDEXINC_I();
-  if(BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1) {
-#ifndef CLOSE_PRINT
-    BAMBOO_DEBUGPRINT_REG(data2);
-#endif
-    BAMBOO_EXIT(0xe206);
-  }
-  if(lockobj == data2) {
-#ifndef CLOSE_PRINT
-    BAMBOO_DEBUGPRINT(0xe891);
-#endif
-    int data3 = msgdata[msgdataindex];
-    MSG_INDEXINC_I();
-    lockresult = 1;
-    lockflag = true;
-    RuntimeHashadd_I(objRedirectLockTbl, lockobj, data3);
-#ifndef INTERRUPT
-    reside = false;
-#endif
-  } else {
-    // conflicts on lockresults
-#ifndef CLOSE_PRINT
-    BAMBOO_DEBUGPRINT_REG(data2);
-#endif
-    BAMBOO_EXIT(0xe207);
-  }
-}
-
-INLINE void processmsg_redirectdeny_I() {
-  MSG_INDEXINC_I();
-  int data2 = msgdata[msgdataindex];
-  MSG_INDEXINC_I();
-  int data3 = msgdata[msgdataindex];
-  MSG_INDEXINC_I();
-  if(BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1) {
-#ifndef CLOSE_PRINT
-    BAMBOO_DEBUGPRINT_REG(data2);
-#endif
-    BAMBOO_EXIT(0xe208);
-  }
-  if(lockobj == data2) {
-#ifndef CLOSE_PRINT
-    BAMBOO_DEBUGPRINT(0xe892);
-#endif
-    lockresult = 0;
-    lockflag = true;
-#ifndef INTERRUPT
-    reside = false;
-#endif
-  } else {
-    // conflicts on lockresults
-#ifndef CLOSE_PRINT
-    BAMBOO_DEBUGPRINT_REG(data2);
-#endif
-    BAMBOO_EXIT(0xe209);
-  }
-}
-
-INLINE void processmsg_redirectrelease_I() {
-  int data1 = msgdata[msgdataindex];
-  MSG_INDEXINC_I();
-  int data2 = msgdata[msgdataindex];
-  MSG_INDEXINC_I();
-  int data3 = msgdata[msgdataindex];
-  MSG_INDEXINC_I();
-  processlockrelease(data1, data2, data3, true);
-}
-#endif // #ifndef MULTICORE_GC
-
-#ifdef PROFILE
-INLINE void processmsg_profileoutput_I() {
-  if(BAMBOO_NUM_OF_CORE == STARTUPCORE) {
-    // startup core can not receive profile output finish msg
-    BAMBOO_EXIT(0xe20a);
-  }
-#ifndef CLOSE_PRINT
-  BAMBOO_DEBUGPRINT(0xe885);
-#endif
-  stall = true;
-  totalexetime = msgdata[msgdataindex];  //[1]
-  MSG_INDEXINC_I();
-#ifdef RT_TEST
-  BAMBOO_DEBUGPRINT_REG(dot_num);
-#else
-  outputProfileData();
-#endif
-  // cache the msg first
-  if(BAMBOO_CHECK_SEND_MODE()) {
-       cache_msg_2(STARTUPCORE, PROFILEFINISH, BAMBOO_NUM_OF_CORE);
-  } else {
-       send_msg_2(STARTUPCORE, PROFILEFINISH, BAMBOO_NUM_OF_CORE, true);
-  }
-}
-
-INLINE void processmsg_profilefinish_I() {
-  if(BAMBOO_NUM_OF_CORE != STARTUPCORE) {
-    // non startup core can not receive profile output finish msg
-#ifndef CLOSE_PRINT
-    BAMBOO_DEBUGPRINT_REG(msgdata[msgdataindex /*1*/]);
-#endif
-    BAMBOO_EXIT(0xe20b);
-  }
-#ifndef CLOSE_PRINT
-  BAMBOO_DEBUGPRINT(0xe886);
-#endif
-  int data1 = msgdata[msgdataindex];
-  MSG_INDEXINC_I();
-  profilestatus[data1] = 0;
-}
-#endif // #ifdef PROFILE
-
 int enqueuetasks(struct parameterwrapper *parameter,
                  struct parameterwrapper *prevptr,
                  struct ___Object___ *ptr,
@@ -1162,7 +811,7 @@ int enqueuetasks(struct parameterwrapper *parameter,
 
   //this add the object to parameterwrapper
   ObjectHashadd(parameter->objectset, (int) ptr, 0, (int) enterflags,
-                numenterflags, enterflags==NULL);
+      numenterflags, enterflags==NULL);
 
   /* Add enqueued object to parameter vector */
   taskpointerarray[parameter->slot]=ptr;
@@ -1215,17 +864,16 @@ backtrackinit:
 
     for(j=numiterators-1; j<numiterators; j++) {
 backtrackinc:
-      if(toiHasNext(
-                       &parameter->iterators[j],taskpointerarray OPTARG(failed)))
-               toiNext(&parameter->iterators[j], taskpointerarray OPTARG(failed));
+      if(toiHasNext(&parameter->iterators[j],taskpointerarray OPTARG(failed)))
+        toiNext(&parameter->iterators[j], taskpointerarray OPTARG(failed));
       else if (j>0) {
-               /* Need to backtrack */
-               toiReset(&parameter->iterators[j]);
-               j--;
-               goto backtrackinc;
+        /* Need to backtrack */
+        toiReset(&parameter->iterators[j]);
+        j--;
+        goto backtrackinc;
       } else {
-               /* Nothing more to enqueue */
-               return retval;
+        /* Nothing more to enqueue */
+        return retval;
       }
     }
   }
@@ -1289,8 +937,8 @@ backtrackinit:
     if (!gencontains(activetasks,tpd)) {
       genputtable_I(activetasks, tpd, tpd);
     } else {
-      RUNFREE(tpd->parameterArray);
-      RUNFREE(tpd);
+      RUNFREE_I(tpd->parameterArray);
+      RUNFREE_I(tpd);
     }
 
     /* This loop iterates to the next parameter combination */
@@ -1299,17 +947,16 @@ backtrackinit:
 
     for(j=numiterators-1; j<numiterators; j++) {
 backtrackinc:
-      if(toiHasNext(
-                       &parameter->iterators[j], taskpointerarray OPTARG(failed)))
-               toiNext(&parameter->iterators[j], taskpointerarray OPTARG(failed));
+      if(toiHasNext(&parameter->iterators[j], taskpointerarray OPTARG(failed)))
+        toiNext(&parameter->iterators[j], taskpointerarray OPTARG(failed));
       else if (j>0) {
-               /* Need to backtrack */
-               toiReset(&parameter->iterators[j]);
-               j--;
-               goto backtrackinc;
+        /* Need to backtrack */
+        toiReset(&parameter->iterators[j]);
+        j--;
+        goto backtrackinc;
       } else {
-               /* Nothing more to enqueue */
-               return retval;
+        /* Nothing more to enqueue */
+        return retval;
       }
     }
   }
@@ -1383,17 +1030,13 @@ void executetasks() {
 
 newtask:
   while(hashsize(activetasks)>0) {
-#ifdef MULTICORE_GC
-    if(gcflag) gc(NULL);
-#endif
+    GCCHECK(NULL);
     BAMBOO_DEBUGPRINT(0xe990);
 
     /* See if there are any active tasks */
     int i;
-#ifdef PROFILE
 #ifdef ACCURATEPROFILE
-    profileTaskStart("tpd checking");
-#endif
+    PROFILE_TASK_START("tpd checking");
 #endif
 
     busystatus = true;
@@ -1414,49 +1057,40 @@ newtask:
       int j = 0;
       bool insert = true;
       if(((struct ___Object___ *)param)->type == STARTUPTYPE) {
-               islock = false;
-               taskpointerarray[i+OFFSET]=param;
-               goto execute;
+        islock = false;
+        taskpointerarray[i+OFFSET]=param;
+        goto execute;
       }
-      /*if(((struct ___Object___ *)param)->lock == NULL) {
-               tmplock = (int)param;
-      } else {
-               struct ___Object___ * obj = (struct ___Object___ *)param;
-               while(obj->lock != NULL) {
-                 obj = (struct ___Object___ *)(obj->lock);
-               }
-               tmplock = (int)(obj);
-      }*/
-         struct ___Object___ * obj = (struct ___Object___ *)param;
-         while(obj->lock != NULL) {
-               obj = (struct ___Object___ *)(obj->lock);
-         }
-         tmplock = (int)(obj);
+      struct ___Object___ * obj = (struct ___Object___ *)param;
+      while(obj->lock != NULL) {
+        obj = (struct ___Object___ *)(obj->lock);
+      }
+      tmplock = (int)(obj);
       // insert into the locks array
       for(j = 0; j < runtime_locklen; j++) {
-               if(runtime_locks[j].value == tmplock) {
-                 insert = false;
-                 break;
-               } else if(runtime_locks[j].value > tmplock) {
-                 break;
-               }
+        if(runtime_locks[j].value == tmplock) {
+          insert = false;
+          break;
+        } else if(runtime_locks[j].value > tmplock) {
+          break;
+        }
       }
       if(insert) {
-               int h = runtime_locklen;
-               for(; h > j; h--) {
-                 runtime_locks[h].redirectlock = runtime_locks[h-1].redirectlock;
-                 runtime_locks[h].value = runtime_locks[h-1].value;
-               }
-               runtime_locks[j].value = tmplock;
-               runtime_locks[j].redirectlock = (int)param;
-               runtime_locklen++;
+        int h = runtime_locklen;
+        for(; h > j; h--) {
+          runtime_locks[h].redirectlock = runtime_locks[h-1].redirectlock;
+          runtime_locks[h].value = runtime_locks[h-1].value;
+        }
+        runtime_locks[j].value = tmplock;
+        runtime_locks[j].redirectlock = (int)param;
+        runtime_locklen++;
       }
-    }  // line 2713: for(i = 0; i < numparams; i++)
+    }  // for(i = 0; i < numparams; i++)
     // grab these required locks
     BAMBOO_DEBUGPRINT(0xe991);
 
     for(i = 0; i < runtime_locklen; i++) {
-      int * lock = (int *)(runtime_locks[i].value);//(runtime_locks[i].redirectlock);
+      int * lock = (int *)(runtime_locks[i].value);
       islock = true;
       // require locks for this parameter if it is not a startup object
       BAMBOO_DEBUGPRINT_REG((int)lock);
@@ -1465,12 +1099,12 @@ newtask:
       BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
       BAMBOO_DEBUGPRINT(0xf001);
       while(!lockflag) {
-               BAMBOO_WAITING_FOR_LOCK(0);
-         }
+        BAMBOO_WAITING_FOR_LOCK(0);
+      }
 #ifndef INTERRUPT
       if(reside) {
-               while(BAMBOO_WAITING_FOR_LOCK(0) != -1) {
-               }
+        while(BAMBOO_WAITING_FOR_LOCK(0) != -1) {
+        }
       }
 #endif
       grount = lockresult;
@@ -1486,29 +1120,27 @@ newtask:
       BAMBOO_DEBUGPRINT(0xf000);
 
       if(grount == 0) {
-               BAMBOO_DEBUGPRINT(0xe992);
-               BAMBOO_DEBUGPRINT_REG(lock);
-               // check if has the lock already
-               // can not get the lock, try later
-               // release all grabbed locks for previous parameters
-               for(j = 0; j < i; ++j) {
-                 lock = (int*)(runtime_locks[j].value/*redirectlock*/);
-                 releasewritelock(lock);
-               }
-               genputtable(activetasks, currtpd, currtpd);
-               if(hashsize(activetasks) == 1) {
-                 // only one task right now, wait a little while before next try
-                 int halt = 10000;
-                 while(halt--) {
-                 }
-               }
-#ifdef PROFILE
+        BAMBOO_DEBUGPRINT(0xe992);
+        BAMBOO_DEBUGPRINT_REG(lock);
+        // check if has the lock already
+        // can not get the lock, try later
+        // release all grabbed locks for previous parameters           
+        for(j = 0; j < i; ++j) {
+          lock = (int*)(runtime_locks[j].value/*redirectlock*/);
+          releasewritelock(lock);
+        }
+        genputtable(activetasks, currtpd, currtpd);
+        if(hashsize(activetasks) == 1) {
+          // only one task right now, wait a little while before next try
+          int halt = 10000;
+          while(halt--) {
+          }
+        }
 #ifdef ACCURATEPROFILE
-               // fail, set the end of the checkTaskInfo
-               profileTaskEnd();
+        // fail, set the end of the checkTaskInfo
+        PROFILE_TASK_END();
 #endif
-#endif
-               goto newtask;
+        goto newtask;
       }
     }   // line 2752:  for(i = 0; i < runtime_locklen; i++)
 
@@ -1518,96 +1150,92 @@ newtask:
       void * parameter=currtpd->parameterArray[i];
 
       // flush the object
-#ifdef CACHEFLUSH
       BAMBOO_CACHE_FLUSH_RANGE((int)parameter,
-                 classsize[((struct ___Object___ *)parameter)->type]);
-#endif
+          classsize[((struct ___Object___ *)parameter)->type]);
       tmpparam = (struct ___Object___ *)parameter;
       pd=currtpd->task->descriptorarray[i];
       pw=(struct parameterwrapper *) pd->queue;
       /* Check that object is still in queue */
       {
-               if (!ObjectHashcontainskey(pw->objectset, (int) parameter)) {
-                 BAMBOO_DEBUGPRINT(0xe994);
-                 BAMBOO_DEBUGPRINT_REG(parameter);
-                 // release grabbed locks
-                 for(j = 0; j < runtime_locklen; ++j) {
-                       int * lock = (int *)(runtime_locks[j].value/*redirectlock*/);
-                       releasewritelock(lock);
-                 }
-                 RUNFREE(currtpd->parameterArray);
-                 RUNFREE(currtpd);
-                 currtpd = NULL;
-                 goto newtask;
-               }
-      }   // line2865
+        if (!ObjectHashcontainskey(pw->objectset, (int) parameter)) {
+          BAMBOO_DEBUGPRINT(0xe994);
+          BAMBOO_DEBUGPRINT_REG(parameter);
+          // release grabbed locks
+          for(j = 0; j < runtime_locklen; ++j) {
+            int * lock = (int *)(runtime_locks[j].value);
+            releasewritelock(lock);
+          }
+          RUNFREE(currtpd->parameterArray);
+          RUNFREE(currtpd);
+          currtpd = NULL;
+          goto newtask;
+        }
+      } 
       /* Check if the object's flags still meets requirements */
       {
-               int tmpi = 0;
-               bool ismet = false;
-               for(tmpi = 0; tmpi < pw->numberofterms; ++tmpi) {
-                 andmask=pw->intarray[tmpi*2];
-                 checkmask=pw->intarray[tmpi*2+1];
-                 if((((struct ___Object___ *)parameter)->flag&andmask)==checkmask) {
-                       ismet = true;
-                       break;
-                 }
-               }
-               if (!ismet) {
-                 // flags are never suitable
-                 // remove this obj from the queue
-                 int next;
-                 int UNUSED, UNUSED2;
-                 int * enterflags;
-                 BAMBOO_DEBUGPRINT(0xe995);
-                 BAMBOO_DEBUGPRINT_REG(parameter);
-                 ObjectHashget(pw->objectset, (int) parameter, (int *) &next,
-                                               (int *) &enterflags, &UNUSED, &UNUSED2);
-                 ObjectHashremove(pw->objectset, (int)parameter);
-                 if (enterflags!=NULL)
-                       RUNFREE(enterflags);
-                 // release grabbed locks
-                 for(j = 0; j < runtime_locklen; ++j) {
-                       int * lock = (int *)(runtime_locks[j].value/*redirectlock*/);
-                       releasewritelock(lock);
-                 }
-                 RUNFREE(currtpd->parameterArray);
-                 RUNFREE(currtpd);
-                 currtpd = NULL;
-#ifdef PROFILE
+        int tmpi = 0;
+        bool ismet = false;
+        for(tmpi = 0; tmpi < pw->numberofterms; ++tmpi) {
+          andmask=pw->intarray[tmpi*2];
+          checkmask=pw->intarray[tmpi*2+1];
+          if((((struct ___Object___ *)parameter)->flag&andmask)==checkmask) {
+            ismet = true;
+            break;
+          }
+        }
+        if (!ismet) {
+          // flags are never suitable
+          // remove this obj from the queue
+          int next;
+          int UNUSED, UNUSED2;
+          int * enterflags;
+          BAMBOO_DEBUGPRINT(0xe995);
+          BAMBOO_DEBUGPRINT_REG(parameter);
+          ObjectHashget(pw->objectset, (int) parameter, (int *) &next,
+              (int *) &enterflags, &UNUSED, &UNUSED2);
+          ObjectHashremove(pw->objectset, (int)parameter);
+          if (enterflags!=NULL)
+            RUNFREE(enterflags);
+          // release grabbed locks
+          for(j = 0; j < runtime_locklen; ++j) {
+            int * lock = (int *)(runtime_locks[j].value/*redirectlock*/);
+            releasewritelock(lock);
+          }
+          RUNFREE(currtpd->parameterArray);
+          RUNFREE(currtpd);
+          currtpd = NULL;
 #ifdef ACCURATEPROFILE
-                 // fail, set the end of the checkTaskInfo
-                 profileTaskEnd();
-#endif
+          // fail, set the end of the checkTaskInfo
+          PROFILE_TASK_END();
 #endif
-                 goto newtask;
-               }   // line 2878: if (!ismet)
-      }   // line 2867
+          goto newtask;
+        }   //if (!ismet)
+      } 
 parameterpresent:
       ;
       /* Check that object still has necessary tags */
       for(j=0; j<pd->numbertags; j++) {
-               int slotid=pd->tagarray[2*j]+numparams;
-               struct ___TagDescriptor___ *tagd=currtpd->parameterArray[slotid];
-               if (!containstag(parameter, tagd)) {
-                 BAMBOO_DEBUGPRINT(0xe996);
-                 {
-                       // release grabbed locks
-                       int tmpj = 0;
-                       for(tmpj = 0; tmpj < runtime_locklen; ++tmpj) {
-                         int * lock = (int *)(runtime_locks[tmpj].value/*redirectlock*/);
-                         releasewritelock(lock);
-                       }
-                 }
-                 RUNFREE(currtpd->parameterArray);
-                 RUNFREE(currtpd);
-                 currtpd = NULL;
-                 goto newtask;
-               }   // line2911: if (!containstag(parameter, tagd))
-      }   // line 2808: for(j=0; j<pd->numbertags; j++)
+        int slotid=pd->tagarray[2*j]+numparams;
+        struct ___TagDescriptor___ *tagd=currtpd->parameterArray[slotid];
+        if (!containstag(parameter, tagd)) {
+          BAMBOO_DEBUGPRINT(0xe996);
+          {
+            // release grabbed locks
+            int tmpj = 0;
+            for(tmpj = 0; tmpj < runtime_locklen; ++tmpj) {
+              int * lock = (int *)(runtime_locks[tmpj].value/*redirectlock*/);
+              releasewritelock(lock);
+            }
+          }
+          RUNFREE(currtpd->parameterArray);
+          RUNFREE(currtpd);
+          currtpd = NULL;
+          goto newtask;                
+        }   
+      }   
 
       taskpointerarray[i+OFFSET]=parameter;
-    }   // line 2824: for(i=0; i<numparams; i++)
+    }   // for(i=0; i<numparams; i++)
     /* Copy the tags */
     for(; i<numtotal; i++) {
       taskpointerarray[i+OFFSET]=currtpd->parameterArray[i];
@@ -1620,67 +1248,60 @@ execute:
       ((int *)taskpointerarray)[0]=currtpd->numParameters;
       taskpointerarray[1]=NULL;
 #endif
-#ifdef PROFILE
 #ifdef ACCURATEPROFILE
       // check finish, set the end of the checkTaskInfo
-      profileTaskEnd();
-#endif
-      profileTaskStart(currtpd->task->name);
+      PROFILE_TASK_END();
 #endif
+      PROFILE_TASK_START(currtpd->task->name);
 
       BAMBOO_DEBUGPRINT(0xe997);
       ((void (*)(void **))currtpd->task->taskptr)(taskpointerarray);
 
-#ifdef PROFILE
 #ifdef ACCURATEPROFILE
       // task finish, set the end of the checkTaskInfo
-      profileTaskEnd();
+      PROFILE_TASK_END();
       // new a PostTaskInfo for the post-task execution
-      profileTaskStart("post task execution");
-#endif
+      PROFILE_TASK_START("post task execution");
 #endif
       BAMBOO_DEBUGPRINT(0xe998);
       BAMBOO_DEBUGPRINT_REG(islock);
 
       if(islock) {
-               BAMBOO_DEBUGPRINT(0xe999);
-               for(i = runtime_locklen; i>0; i--) {
-                 void * ptr = (void *)(runtime_locks[i-1].redirectlock);
-                 int * lock = (int *)(runtime_locks[i-1].value);
-                 BAMBOO_DEBUGPRINT_REG((int)ptr);
-                 BAMBOO_DEBUGPRINT_REG((int)lock);
-                 BAMBOO_DEBUGPRINT_REG(*((int*)lock+5));
+        BAMBOO_DEBUGPRINT(0xe999);
+        for(i = runtime_locklen; i>0; i--) {
+          void * ptr = (void *)(runtime_locks[i-1].redirectlock);
+          int * lock = (int *)(runtime_locks[i-1].value);
+          BAMBOO_DEBUGPRINT_REG((int)ptr);
+          BAMBOO_DEBUGPRINT_REG((int)lock);
+          BAMBOO_DEBUGPRINT_REG(*((int*)lock+5));
 #ifndef MULTICORE_GC
 #ifndef TILERA_BME
-                 if(RuntimeHashcontainskey(lockRedirectTbl, (int)lock)) {
-                       int redirectlock;
-                       RuntimeHashget(lockRedirectTbl, (int)lock, &redirectlock);
-                       RuntimeHashremovekey(lockRedirectTbl, (int)lock);
-                       releasewritelock_r(lock, (int *)redirectlock);
-                 } else -1{
+          if(RuntimeHashcontainskey(lockRedirectTbl, (int)lock)) {
+            int redirectlock;
+            RuntimeHashget(lockRedirectTbl, (int)lock, &redirectlock);
+            RuntimeHashremovekey(lockRedirectTbl, (int)lock);
+            releasewritelock_r(lock, (int *)redirectlock);
+          } else {
 #else
-                 {
+          {
 #endif
 #else
-                 {
+          {
 #endif
-                       releasewritelock(lock); // ptr
-                 }
-               }
-      }     // line 3015: if(islock)
+            releasewritelock(lock); // ptr
+          }
+        }
+      }     // if(islock)
 
-#ifdef PROFILE
       // post task execution finish, set the end of the postTaskInfo
-      profileTaskEnd();
-#endif
+      PROFILE_TASK_END();
 
       // Free up task parameter descriptor
       RUNFREE(currtpd->parameterArray);
       RUNFREE(currtpd);
       currtpd = NULL;
       BAMBOO_DEBUGPRINT(0xe99a);
-    }   //
-    //} //  if (hashsize(activetasks)>0)
+    }   
   } //  while(hashsize(activetasks)>0)
   BAMBOO_DEBUGPRINT(0xe99b);
 }
@@ -1709,7 +1330,6 @@ void processtags(struct parameterdescriptor *pd,
   }
 }
 
-
 void processobject(struct parameterwrapper *parameter,
                    int index,
                    struct parameterdescriptor *pd,
@@ -1860,10 +1480,8 @@ void printdebug() {
   }
 }
 
-
 /* This function processes the task information to create queues for
    each parameter type. */
-
 void processtasks() {
   int i;
   if(BAMBOO_NUM_OF_CORE > NUMCORESACTIVE - 1) {
@@ -2020,186 +1638,4 @@ void toiNext(struct tagobjectiterator *it,
     Objnext(&it->it);
   }
 }
-
-#ifdef PROFILE
-inline void profileTaskStart(char * taskname) {
-  if(!taskInfoOverflow) {
-    TaskInfo* taskInfo = RUNMALLOC(sizeof(struct task_info));
-    taskInfoArray[taskInfoIndex] = taskInfo;
-    taskInfo->taskName = taskname;
-    taskInfo->startTime = BAMBOO_GET_EXE_TIME();
-    taskInfo->endTime = -1;
-    taskInfo->exitIndex = -1;
-    taskInfo->newObjs = NULL;
-  }
-}
-
-inline void profileTaskEnd() {
-  if(!taskInfoOverflow) {
-    taskInfoArray[taskInfoIndex]->endTime = BAMBOO_GET_EXE_TIME();
-    taskInfoIndex++;
-    if(taskInfoIndex == TASKINFOLENGTH) {
-      taskInfoOverflow = true;
-    }
-  }
-}
-
-// output the profiling data
-void outputProfileData() {
-#ifdef USEIO
-  int i;
-  unsigned long long totaltasktime = 0;
-  unsigned long long preprocessingtime = 0;
-  unsigned long long objqueuecheckingtime = 0;
-  unsigned long long postprocessingtime = 0;
-  unsigned long long other = 0;
-  unsigned long long averagetasktime = 0;
-  int tasknum = 0;
-
-  printf("Task Name, Start Time, End Time, Duration, Exit Index(, NewObj Name, Num)+\n");
-  // output task related info
-  for(i = 0; i < taskInfoIndex; i++) {
-    TaskInfo* tmpTInfo = taskInfoArray[i];
-    unsigned long long duration = tmpTInfo->endTime - tmpTInfo->startTime;
-    printf("%s, %lld, %lld, %lld, %lld",
-           tmpTInfo->taskName, tmpTInfo->startTime, tmpTInfo->endTime,
-           duration, tmpTInfo->exitIndex);
-    // summarize new obj info
-    if(tmpTInfo->newObjs != NULL) {
-      struct RuntimeHash * nobjtbl = allocateRuntimeHash(5);
-      struct RuntimeIterator * iter = NULL;
-      while(0 == isEmpty(tmpTInfo->newObjs)) {
-               char * objtype = (char *)(getItem(tmpTInfo->newObjs));
-               if(RuntimeHashcontainskey(nobjtbl, (int)(objtype))) {
-                 int num = 0;
-                 RuntimeHashget(nobjtbl, (int)objtype, &num);
-                 RuntimeHashremovekey(nobjtbl, (int)objtype);
-                 num++;
-                 RuntimeHashadd(nobjtbl, (int)objtype, num);
-               } else {
-                 RuntimeHashadd(nobjtbl, (int)objtype, 1);
-               }
-               //printf(stderr, "new obj!\n");
-      }
-
-      // output all new obj info
-      iter = RuntimeHashcreateiterator(nobjtbl);
-      while(RunhasNext(iter)) {
-               char * objtype = (char *)Runkey(iter);
-               int num = Runnext(iter);
-               printf(", %s, %d", objtype, num);
-      }
-    }
-    printf("\n");
-    if(strcmp(tmpTInfo->taskName, "tpd checking") == 0) {
-      preprocessingtime += duration;
-    } else if(strcmp(tmpTInfo->taskName, "post task execution") == 0) {
-      postprocessingtime += duration;
-    } else if(strcmp(tmpTInfo->taskName, "objqueue checking") == 0) {
-      objqueuecheckingtime += duration;
-    } else {
-      totaltasktime += duration;
-      averagetasktime += duration;
-      tasknum++;
-    }
-  }
-
-  if(taskInfoOverflow) {
-    printf("Caution: task info overflow!\n");
-  }
-
-  other = totalexetime-totaltasktime-preprocessingtime-postprocessingtime;
-  averagetasktime /= tasknum;
-
-  printf("\nTotal time: %lld\n", totalexetime);
-  printf("Total task execution time: %lld (%d%%)\n", totaltasktime,
-         (int)(((double)totaltasktime/(double)totalexetime)*100));
-  printf("Total objqueue checking time: %lld (%d%%)\n",
-         objqueuecheckingtime,
-         (int)(((double)objqueuecheckingtime/(double)totalexetime)*100));
-  printf("Total pre-processing time: %lld (%d%%)\n", preprocessingtime,
-         (int)(((double)preprocessingtime/(double)totalexetime)*100));
-  printf("Total post-processing time: %lld (%d%%)\n", postprocessingtime,
-         (int)(((double)postprocessingtime/(double)totalexetime)*100));
-  printf("Other time: %lld (%d%%)\n", other,
-         (int)(((double)other/(double)totalexetime)*100));
-
-
-  printf("\nAverage task execution time: %lld\n", averagetasktime);
-
-#else
-  int i = 0;
-  int j = 0;
-
-  BAMBOO_PRINT(0xdddd);
-  // output task related info
-  for(i= 0; i < taskInfoIndex; i++) {
-    TaskInfo* tmpTInfo = taskInfoArray[i];
-    char* tmpName = tmpTInfo->taskName;
-    int nameLen = strlen(tmpName);
-    BAMBOO_PRINT(0xddda);
-    for(j = 0; j < nameLen; j++) {
-      BAMBOO_PRINT_REG(tmpName[j]);
-    }
-    BAMBOO_PRINT(0xdddb);
-    BAMBOO_PRINT_REG(tmpTInfo->startTime);
-    BAMBOO_PRINT_REG(tmpTInfo->endTime);
-    BAMBOO_PRINT_REG(tmpTInfo->exitIndex);
-    if(tmpTInfo->newObjs != NULL) {
-      struct RuntimeHash * nobjtbl = allocateRuntimeHash(5);
-      struct RuntimeIterator * iter = NULL;
-      while(0 == isEmpty(tmpTInfo->newObjs)) {
-               char * objtype = (char *)(getItem(tmpTInfo->newObjs));
-               if(RuntimeHashcontainskey(nobjtbl, (int)(objtype))) {
-                 int num = 0;
-                 RuntimeHashget(nobjtbl, (int)objtype, &num);
-                 RuntimeHashremovekey(nobjtbl, (int)objtype);
-                 num++;
-                 RuntimeHashadd(nobjtbl, (int)objtype, num);
-               } else {
-                 RuntimeHashadd(nobjtbl, (int)objtype, 1);
-               }
-      }
-
-      // ouput all new obj info
-      iter = RuntimeHashcreateiterator(nobjtbl);
-      while(RunhasNext(iter)) {
-               char * objtype = (char *)Runkey(iter);
-               int num = Runnext(iter);
-               int nameLen = strlen(objtype);
-               BAMBOO_PRINT(0xddda);
-               for(j = 0; j < nameLen; j++) {
-                 BAMBOO_PRINT_REG(objtype[j]);
-               }
-               BAMBOO_PRINT(0xdddb);
-               BAMBOO_PRINT_REG(num);
-         }
-       }
-       BAMBOO_PRINT(0xdddc);
-  }
-
-  if(taskInfoOverflow) {
-       BAMBOO_PRINT(0xefee);
-  }
-
-#ifdef PROFILE_INTERRUPT
-  // output interrupt related info
-  for(i = 0; i < interruptInfoIndex; i++) {
-       InterruptInfo* tmpIInfo = interruptInfoArray[i];
-       BAMBOO_PRINT(0xddde);
-       BAMBOO_PRINT_REG(tmpIInfo->startTime);
-       BAMBOO_PRINT_REG(tmpIInfo->endTime);
-       BAMBOO_PRINT(0xdddf);
-  }
-
-  if(interruptInfoOverflow) {
-       BAMBOO_PRINT(0xefef);
-  }
-#endif // PROFILE_INTERRUPT
-
-  BAMBOO_PRINT(0xeeee);
-#endif
-}
-#endif  // #ifdef PROFILE
-
 #endif
diff --git a/Robust/src/Runtime/bamboo/multicoretask.h b/Robust/src/Runtime/bamboo/multicoretask.h
new file mode 100644 (file)
index 0000000..396e57b
--- /dev/null
@@ -0,0 +1,66 @@
+#ifndef BAMBOO_MULTICORE_TASK_H
+#define BAMBOO_MULTICORE_TASK_H
+#ifdef TASK
+#include "multicore.h"
+// TASK specific data structures
+// get rid of lock msgs for GC version
+#ifndef MULTICORE_GC
+// data structures for locking
+struct RuntimeHash locktable;
+static struct RuntimeHash* locktbl = &locktable;
+struct RuntimeHash * lockRedirectTbl;
+struct RuntimeHash * objRedirectLockTbl;
+#endif // ifndef MULTICORE_GC
+struct LockValue {
+  int redirectlock;
+  int value;
+};
+int lockobj;
+int lock2require;
+int lockresult;
+bool lockflag;
+
+// data structures for waiting objs
+struct Queue objqueue;
+struct Queue * totransobjqueue; // queue to hold objs to be transferred
+                                // should be cleared whenever enter a task
+                                       
+// for test TODO
+int total_num_t6;
+
+// lock related functions
+bool getreadlock(void* ptr);
+void releasereadlock(void* ptr);
+bool getwritelock(void* ptr);
+void releasewritelock(void* ptr);
+bool getwritelock_I(void* ptr);
+void releasewritelock_I(void * ptr);
+#ifndef MULTICORE_GC
+void releasewritelock_r(void * lock, void * redirectlock);
+#endif // ifndef MULTICORE_GC
+/* this function is to process lock requests.
+ * can only be invoked in receiveObject() */
+// if return -1: the lock request is redirected
+//            0: the lock request is approved
+//            1: the lock request is denied
+INLINE int processlockrequest(int locktype,
+                              int lock,
+                              int obj,
+                              int requestcore,
+                              int rootrequestcore,
+                              bool cache);
+INLINE void processlockrelease(int locktype,
+                               int lock,
+                               int redirectlock,
+                               bool redirect);
+
+INLINE void inittaskdata();
+INLINE void distaskdata();
+
+#define INITTASKDATA() inittaskdata()
+#define DISTASKDATA() distaskdata()
+#else // TASK
+#define INITTASKDATA()
+#define DISTASKDATA()
+#endif // TASK
+#endif // BAMBOO_MULTICORE_TASK_H
diff --git a/Robust/src/Runtime/bamboo/multicoretaskprofile.c b/Robust/src/Runtime/bamboo/multicoretaskprofile.c
new file mode 100644 (file)
index 0000000..fa86c02
--- /dev/null
@@ -0,0 +1,232 @@
+#ifdef PROFILE
+
+#include "multicoretaskprofile.h"
+
+INLINE void inittaskprofiledata() {
+  if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
+    // startup core to initialize corestatus[]
+    for(i = 0; i < NUMCORESACTIVE; ++i) {
+      // initialize the profile data arrays
+      profilestatus[i] = 1;
+    } // for(i = 0; i < NUMCORESACTIVE; ++i)
+  }
+
+  stall = false;
+  totalexetime = -1;
+  taskInfoIndex = 0;
+  taskInfoOverflow = false;
+#ifdef PROFILE_INTERRUPT
+  interruptInfoIndex = 0;
+  interruptInfoOverflow = false;
+#endif // PROFILE_INTERRUPT
+}
+
+inline void setTaskExitIndex(int index) {
+  taskInfoArray[taskInfoIndex]->exitIndex = index;
+}
+
+inline void addNewObjInfo(void * nobj) {
+  if(taskInfoArray[taskInfoIndex]->newObjs == NULL) {
+    taskInfoArray[taskInfoIndex]->newObjs = createQueue();
+  }
+  addNewItem(taskInfoArray[taskInfoIndex]->newObjs, nobj);
+}
+
+inline void profileTaskStart(char * taskname) {
+  if(!taskInfoOverflow) {
+    TaskInfo* taskInfo = RUNMALLOC(sizeof(struct task_info));
+    taskInfoArray[taskInfoIndex] = taskInfo;
+    taskInfo->taskName = taskname;
+    taskInfo->startTime = BAMBOO_GET_EXE_TIME();
+    taskInfo->endTime = -1;
+    taskInfo->exitIndex = -1;
+    taskInfo->newObjs = NULL;
+  }
+}
+
+inline void profileTaskEnd() {
+  if(!taskInfoOverflow) {
+    taskInfoArray[taskInfoIndex]->endTime = BAMBOO_GET_EXE_TIME();
+    taskInfoIndex++;
+    if(taskInfoIndex == TASKINFOLENGTH) {
+      taskInfoOverflow = true;
+    }
+  }
+}
+
+#ifdef PROFILE_INTERRUPT
+INLINE void profileInterruptStart_I(void) {
+  if(!interruptInfoOverflow) {
+    InterruptInfo* intInfo = RUNMALLOC_I(sizeof(struct interrupt_info));
+    interruptInfoArray[interruptInfoIndex] = intInfo;
+    intInfo->startTime = BAMBOO_GET_EXE_TIME();
+    intInfo->endTime = -1;
+  }
+}
+
+INLINE void profileInterruptEnd_I(void) {
+  if(!interruptInfoOverflow) {
+    interruptInfoArray[interruptInfoIndex]->endTime=BAMBOO_GET_EXE_TIME();
+    interruptInfoIndex++;
+    if(interruptInfoIndex == INTERRUPTINFOLENGTH) {
+      interruptInfoOverflow = true;
+    }
+  }
+}
+#endif // PROFILE_INTERRUPT
+
+// output the profiling data
+void outputProfileData() {
+#ifdef USEIO
+  int i;
+  unsigned long long totaltasktime = 0;
+  unsigned long long preprocessingtime = 0;
+  unsigned long long objqueuecheckingtime = 0;
+  unsigned long long postprocessingtime = 0;
+  unsigned long long other = 0;
+  unsigned long long averagetasktime = 0;
+  int tasknum = 0;
+
+  printf("Task Name, Start Time, End Time, Duration, Exit Index(, NewObj Name, Num)+\n");
+  // output task related info
+  for(i = 0; i < taskInfoIndex; i++) {
+    TaskInfo* tmpTInfo = taskInfoArray[i];
+    unsigned long long duration = tmpTInfo->endTime - tmpTInfo->startTime;
+    printf("%s, %lld, %lld, %lld, %lld", tmpTInfo->taskName, 
+        tmpTInfo->startTime, tmpTInfo->endTime, duration, tmpTInfo->exitIndex);
+    // summarize new obj info
+    if(tmpTInfo->newObjs != NULL) {
+      struct RuntimeHash * nobjtbl = allocateRuntimeHash(5);
+      struct RuntimeIterator * iter = NULL;
+      while(0 == isEmpty(tmpTInfo->newObjs)) {
+               char * objtype = (char *)(getItem(tmpTInfo->newObjs));
+               if(RuntimeHashcontainskey(nobjtbl, (int)(objtype))) {
+                 int num = 0;
+                 RuntimeHashget(nobjtbl, (int)objtype, &num);
+                 RuntimeHashremovekey(nobjtbl, (int)objtype);
+                 num++;
+                 RuntimeHashadd(nobjtbl, (int)objtype, num);
+               } else {
+                 RuntimeHashadd(nobjtbl, (int)objtype, 1);
+               }
+      }
+
+      // output all new obj info
+      iter = RuntimeHashcreateiterator(nobjtbl);
+      while(RunhasNext(iter)) {
+               char * objtype = (char *)Runkey(iter);
+               int num = Runnext(iter);
+               printf(", %s, %d", objtype, num);
+      }
+    }
+    printf("\n");
+    if(strcmp(tmpTInfo->taskName, "tpd checking") == 0) {
+      preprocessingtime += duration;
+    } else if(strcmp(tmpTInfo->taskName, "post task execution") == 0) {
+      postprocessingtime += duration;
+    } else if(strcmp(tmpTInfo->taskName, "objqueue checking") == 0) {
+      objqueuecheckingtime += duration;
+    } else {
+      totaltasktime += duration;
+      averagetasktime += duration;
+      tasknum++;
+    }
+  }
+
+  if(taskInfoOverflow) {
+    printf("Caution: task info overflow!\n");
+  }
+
+  other = totalexetime-totaltasktime-preprocessingtime-postprocessingtime;
+  averagetasktime /= tasknum;
+
+  printf("\nTotal time: %lld\n", totalexetime);
+  printf("Total task execution time: %lld (%d%%)\n", totaltasktime,
+         (int)(((double)totaltasktime/(double)totalexetime)*100));
+  printf("Total objqueue checking time: %lld (%d%%)\n",
+         objqueuecheckingtime,
+         (int)(((double)objqueuecheckingtime/(double)totalexetime)*100));
+  printf("Total pre-processing time: %lld (%d%%)\n", preprocessingtime,
+         (int)(((double)preprocessingtime/(double)totalexetime)*100));
+  printf("Total post-processing time: %lld (%d%%)\n", postprocessingtime,
+         (int)(((double)postprocessingtime/(double)totalexetime)*100));
+  printf("Other time: %lld (%d%%)\n", other,
+         (int)(((double)other/(double)totalexetime)*100));
+
+  printf("\nAverage task execution time: %lld\n", averagetasktime);
+
+#else
+  int i = 0;
+  int j = 0;
+
+  BAMBOO_PRINT(0xdddd);
+  // output task related info
+  for(i= 0; i < taskInfoIndex; i++) {
+    TaskInfo* tmpTInfo = taskInfoArray[i];
+    char* tmpName = tmpTInfo->taskName;
+    int nameLen = strlen(tmpName);
+    BAMBOO_PRINT(0xddda);
+    for(j = 0; j < nameLen; j++) {
+      BAMBOO_PRINT_REG(tmpName[j]);
+    }
+    BAMBOO_PRINT(0xdddb);
+    BAMBOO_PRINT_REG(tmpTInfo->startTime);
+    BAMBOO_PRINT_REG(tmpTInfo->endTime);
+    BAMBOO_PRINT_REG(tmpTInfo->exitIndex);
+    if(tmpTInfo->newObjs != NULL) {
+      struct RuntimeHash * nobjtbl = allocateRuntimeHash(5);
+      struct RuntimeIterator * iter = NULL;
+      while(0 == isEmpty(tmpTInfo->newObjs)) {
+               char * objtype = (char *)(getItem(tmpTInfo->newObjs));
+               if(RuntimeHashcontainskey(nobjtbl, (int)(objtype))) {
+                 int num = 0;
+                 RuntimeHashget(nobjtbl, (int)objtype, &num);
+                 RuntimeHashremovekey(nobjtbl, (int)objtype);
+                 num++;
+                 RuntimeHashadd(nobjtbl, (int)objtype, num);
+               } else {
+                 RuntimeHashadd(nobjtbl, (int)objtype, 1);
+               }
+      }
+
+      // ouput all new obj info
+      iter = RuntimeHashcreateiterator(nobjtbl);
+      while(RunhasNext(iter)) {
+               char * objtype = (char *)Runkey(iter);
+               int num = Runnext(iter);
+               int nameLen = strlen(objtype);
+               BAMBOO_PRINT(0xddda);
+               for(j = 0; j < nameLen; j++) {
+                 BAMBOO_PRINT_REG(objtype[j]);
+               }
+               BAMBOO_PRINT(0xdddb);
+               BAMBOO_PRINT_REG(num);
+         }
+    }
+    BAMBOO_PRINT(0xdddc);
+  }
+
+  if(taskInfoOverflow) {
+       BAMBOO_PRINT(0xefee);
+  }
+
+#ifdef PROFILE_INTERRUPT
+  // output interrupt related info
+  for(i = 0; i < interruptInfoIndex; i++) {
+    InterruptInfo* tmpIInfo = interruptInfoArray[i];
+    BAMBOO_PRINT(0xddde);
+    BAMBOO_PRINT_REG(tmpIInfo->startTime);
+    BAMBOO_PRINT_REG(tmpIInfo->endTime);
+    BAMBOO_PRINT(0xdddf);
+  }
+
+  if(interruptInfoOverflow) {
+    BAMBOO_PRINT(0xefef);
+  }
+#endif // PROFILE_INTERRUPT
+
+  BAMBOO_PRINT(0xeeee);
+#endif
+}
+
+#endif // PROFILE
diff --git a/Robust/src/Runtime/bamboo/multicoretaskprofile.h b/Robust/src/Runtime/bamboo/multicoretaskprofile.h
new file mode 100644 (file)
index 0000000..908fee3
--- /dev/null
@@ -0,0 +1,68 @@
+#ifndef BAMBOO_MULTICORE_TASK_PROFILE_H
+#define BAMBOO_MULTICORE_TASK_PROFILE_H
+#include "multicore.h"
+
+#ifdef TASK
+// data structures for profile mode
+#ifdef PROFILE
+#define TASKINFOLENGTH 3000
+
+typedef struct task_info {
+  char* taskName;
+  unsigned long long startTime;
+  unsigned long long endTime;
+  unsigned long long exitIndex;
+  struct Queue * newObjs;
+} TaskInfo;
+
+TaskInfo * taskInfoArray[TASKINFOLENGTH];
+int taskInfoIndex;
+bool taskInfoOverflow;
+volatile int profilestatus[NUMCORESACTIVE]; // records status of each core
+                                            // 1: running tasks
+                                            // 0: stall
+#ifdef PROFILE_INTERRUPT
+#define INTERRUPTINFOLENGTH 50
+typedef struct interrupt_info {
+  unsigned long long startTime;
+  unsigned long long endTime;
+} InterruptInfo;
+
+InterruptInfo * interruptInfoArray[INTERRUPTINFOLENGTH];
+int interruptInfoIndex;
+bool interruptInfoOverflow;
+#endif
+
+INLINE void profileTaskStart(char * taskname);
+INLINE void profileTaskEnd(void);
+void outputProfileData();
+INLINE void inittaskprofiledata();
+
+#define INIT_TASKPROFILE_DATA() inittaskprofiledata()
+#define PROFILE_TASK_START(s) profileTaskStart(s)
+#define PROFILE_TASK_END() profileTaskEnd()
+#ifdef PROFILE_INTERRUPT
+INLINE void profileInterruptStart_I(void);
+INLINE void profileInterruptEnd_I(void);
+
+#define PROFILE_INTERRUPT_START() profileInterruptStart_I()
+#define PROFILE_INTERRUPT_END() profileInterruptEnd_I()
+#else
+#define PROFILE_INTERRUPT_START() 
+#define PROFILE_INTERRUPT_END() 
+#endif
+#else // PROFILE
+#define INIT_TASKPROFILE_DATA() 
+#define PROFILE_TASK_START(s)
+#define PROFILE_TASK_END()
+#define PROFILE_INTERRUPT_START() 
+#define PROFILE_INTERRUPT_END() 
+#endif // PROFILE
+#else // TASK
+#define INIT_TASKPROFILE_DATA() 
+#define PROFILE_TASK_START(s)
+#define PROFILE_TASK_END()
+#define PROFILE_INTERRUPT_START() 
+#define PROFILE_INTERRUPT_END()
+#endif // TASK
+#endif // BAMBOO_MULTICORE_TASK_PROFILE_H
index 8728f37b1a8a1971291a3f2cd2091da5085dca94..3390a10901a82d6f96248d733a2dadb1a82cefed 100644 (file)
@@ -21,8 +21,8 @@ struct pointerblock {
   if ((!(((unsigned int)orig)&0x1))) { \
     if (orig>=curr_heapbase&&orig<curr_heaptop) { \
       void *copy; \
-      if (gc_createcopy(orig,&copy))                                                                                                                                                                                          \
-        enqueue(copy);                                                                                                                   \
+      if (gc_createcopy(orig,&copy))                                                                                                                                                                                                                                                                    \
+        enqueue(copy);                                                                                                                                                                 \
       dst=copy; \
     } \
   }
@@ -30,30 +30,30 @@ struct pointerblock {
 #define ENQUEUE(orig, dst) \
   if (orig>=curr_heapbase&&orig<curr_heaptop) { \
     void *copy; \
-    if (gc_createcopy(orig,&copy))                                                                                                                                                                                \
-      enqueue(copy);                                                                                                         \
+    if (gc_createcopy(orig,&copy))                                                                                                                                                                                                                                                      \
+      enqueue(copy);                                                                                                                                                   \
     dst=copy; \
   }
 #define SENQUEUE(orig, dst) \
   { \
     void *copy; \
-    if (gc_createcopy(orig,&copy))                                                                                                                                                                                \
-      enqueue(copy);                                                                                                         \
+    if (gc_createcopy(orig,&copy))                                                                                                                                                                                                                                                      \
+      enqueue(copy);                                                                                                                                                   \
     dst=copy; \
   }
 #elif defined(FASTCHECK)
 #define ENQUEUE(orig, dst) \
   if (((unsigned int)orig)!=1) { \
     void *copy; \
-    if (gc_createcopy(orig,&copy))                                                                                                                                                                                \
-      enqueue(copy);                                                                                                         \
+    if (gc_createcopy(orig,&copy))                                                                                                                                                                                                                                                      \
+      enqueue(copy);                                                                                                                                                   \
     dst=copy; }
 #else
 #define ENQUEUE(orig, dst) \
   if (orig!=NULL) { \
     void *copy; \
-    if (gc_createcopy(orig,&copy))                                                                                                                                                                                \
-      enqueue(copy);                                                                                                          \
+    if (gc_createcopy(orig,&copy))                                                                                                                                                                                                                                                      \
+      enqueue(copy);                                                                                                                                                    \
     dst=copy; \
   }
 #endif
index 8dd5c892b00c16ab084af998837e2098b8bd1e7d..70c802962a58545190e8bbcf932e6e489aa796fc 100644 (file)
@@ -11,7 +11,7 @@ void * mycalloc_share(struct garbagelist * stackptr,
                       int size) {
   void * p = NULL;
   //int isize = 2*BAMBOO_CACHE_LINE_SIZE-4+(size-1)&(~BAMBOO_CACHE_LINE_MASK);
-  int isize = (size & (~(BAMBOO_CACHE_LINE_MASK))) + (BAMBOO_CACHE_LINE_SIZE);
+  int isize = ((size-1)&(~(BAMBOO_CACHE_LINE_MASK)))+(BAMBOO_CACHE_LINE_SIZE);
   int hasgc = 0;
 memalloc:
   BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
@@ -50,7 +50,7 @@ void * mycalloc_share(int m,
                       int size) {
   void * p = NULL;
   //int isize = 2*BAMBOO_CACHE_LINE_SIZE-4+(size-1)&(~BAMBOO_CACHE_LINE_MASK);
-  int isize = (size & (~(BAMBOO_CACHE_LINE_MASK))) + (BAMBOO_CACHE_LINE_SIZE);
+  int isize = ((size-1)&(~(BAMBOO_CACHE_LINE_MASK)))+(BAMBOO_CACHE_LINE_SIZE);
   BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
   p = BAMBOO_SHARE_MEM_CALLOC_I(m, isize); // calloc(m, isize);
   if(p == NULL) {
@@ -114,13 +114,28 @@ inermycalloc_i:
       goto inermycalloc_i;
     }
 #endif
-    tprintf("macalloc_i %s %d \n", file, line);
+    tprintf("mycalloc_i %s %d \n", file, line);
     BAMBOO_EXIT(0xc004);
   }
   return p;
 }
 
 void myfree(void * ptr) {
+  BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
+#ifdef MULTICORE_GC
+  if(ptr >= BAMBOO_LOCAL_HEAP_START_VA ) {
+#endif
+  BAMBOO_LOCAL_MEM_FREE(ptr);
+#ifdef MULTICORE_GC
+} else if(ptr >= BAMBOO_LOCAL_HEAP_START_VA_S) {
+  BAMBOO_LOCAL_MEM_FREE_S(ptr);
+}
+#endif
+  BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
+  return;
+}
+
+void myfree_i(void * ptr) {
 #ifdef MULTICORE_GC
   if(ptr >= BAMBOO_LOCAL_HEAP_START_VA ) {
 #endif
index 9a9d55c1c7574ce2be00ca2fd825bcc0e20e6841..25087a318fe6651d52a0c10bf3ecf98c22f63859 100644 (file)
 void * mycalloc(int m, int size, char * file, int line);
 void * mycalloc_i(int m, int size, char * file, int line);
 void myfree(void * ptr);
+void myfree_i(void * ptr);
 #define RUNMALLOC(x) mycalloc(1,x,__FILE__,__LINE__) // handle interruption inside
 #define RUNMALLOC_I(x) mycalloc_i(1,x,__FILE__,__LINE__) //with interruption blocked beforehand
 #define RUNFREE(x) myfree(x)
+#define RUNFREE_I(x) myfree_i(x)
 #ifdef MULTICORE_GC
 #include "multicoregc.h"
 void * mycalloc_share(struct garbagelist * stackptr, int m, int size);
index a3027dbc65797a0a5735b1faf3d69c1421911e3d..d7048adda394c7fe757a7e4b31b4bbd31de3a2a5 100644 (file)
@@ -323,7 +323,7 @@ void flagorand(void * ptr, int ormask, int andmask) {
   } else
 #endif
   {
-    int oldflag=((int *)ptr)[1];
+    int oldflag=((struct ___Object___ *)ptr)->flag;
     int flag=ormask|oldflag;
     flag&=andmask;
     flagbody(ptr, flag);
@@ -349,7 +349,7 @@ bool intflagorand(void * ptr, int ormask, int andmask) {
   } else
 #endif
   {
-    int oldflag=((int *)ptr)[1];
+    int oldflag=((struct ___Object___ *)ptr)->flag;
     int flag=ormask|oldflag;
     flag&=andmask;
     if (flag==oldflag)   /* Don't do anything */
@@ -362,7 +362,7 @@ bool intflagorand(void * ptr, int ormask, int andmask) {
 }
 
 void flagorandinit(void * ptr, int ormask, int andmask) {
-  int oldflag=((int *)ptr)[1];
+  int oldflag=((struct ___Object___ *)ptr)->flag;
   int flag=ormask|oldflag;
   flag&=andmask;
   flagbody(ptr,flag);
index 0b9b8cbac64b50ea99cdc3ada028632f9bc22d4e..b6c03fd8b83c708ed022d99a394b3fc1cf986e71 100755 (executable)
@@ -67,7 +67,6 @@ echo "-disall  execute to collect whole distribution"
 echo "-disstart specify the start number of distribution information collection"
 echo -multicore generate multi-core version binary
 echo "-numcore set the number of cores (should be used together with -multicore), defaultly set as 1"
-echo "-cacheflush enable cache flush in raw version binary (should be used togethere with -raw)"
 echo "-interrupt generate raw version binary with interruption (should be used togethere with -raw)"
 echo "-rawpath print out execute path information for raw version (should be used together with -raw)"
 echo "-useprofile use profiling data for scheduling (should be used together with -raw)"
@@ -193,7 +192,6 @@ TILERAZLINUXFLAG=false
 TILERAMEMPROFFLAG=false
 TILERACONFIG=''
 TILERACORES=''
-CACHEFLUSHFLAG=false
 RAWCONFIG=''
 DEBUGFLAG=false
 RAWPATHFLAG=false
@@ -469,9 +467,6 @@ elif [[ $1 = '-tileraconfig' ]]
 then
 TILERACONFIG="$2"
 shift
-elif [[ $1 = '-cacheflush' ]]
-then
-CACHEFLUSHFLAG=true
 elif [[ $1 = '-rawconfig' ]]
 then
 RAWCONFIG="$2"
@@ -922,11 +917,6 @@ rm ./*
 
 export RAWRGCCFLAGS="-DTASK -DMULTICORE -DRAW"
 
-if $CACHEFLUSHFLAG
-then # print path
-RAWRGCCFLAGS="${RAWRGCCFLAGS} -DCACHEFLUSH"
-fi
-
 if $RAWPATHFLAG
 then # print path
 RAWRGCCFLAGS="${RAWRGCCFLAGS} -DRAWPATH"
@@ -1036,11 +1026,6 @@ then # TILERAZLINUXFLAG
 TILERACFLAGS="${TILERACFLAGS} -DTILERA_ZLINUX"
 fi
 
-if $CACHEFLUSHFLAG
-then # print path
-TILERACFLAGS="${TILERACFLAGS} -DCACHEFLUSH"
-fi
-
 if $RAWPATHFLAG
 then # print path
 TILERACFLAGS="${TILERACFLAGS} -DRAWPATH"
@@ -1220,20 +1205,8 @@ cp ../Runtime/ObjectHash.h ./
 cp ../Runtime/Queue.h ./
 cp ../Runtime/runtime.h ./
 cp ../Runtime/SimpleHash.h ./
-cp $BAMBOORUNTIME/multicoretask.c ./
-cp $BAMBOORUNTIME/multicoreruntime.c ./
-cp $BAMBOORUNTIME/multicoremem.c ./
-cp $BAMBOORUNTIME/multicoregarbage.c ./
-cp $BAMBOORUNTIME/GCSharedHash.c ./
-cp $BAMBOORUNTIME/MGCHash.c ./
-cp $BAMBOORUNTIME/multicoreruntime.h ./
-cp $BAMBOORUNTIME/multicoremem.h ./
-cp $BAMBOORUNTIME/multicoregarbage.h ./
-cp $BAMBOORUNTIME/multicorecache.h ./
-cp $BAMBOORUNTIME/multicoregc.h ./
-cp $BAMBOORUNTIME/multicorehelper.h ./
-cp $BAMBOORUNTIME/GCSharedHash.h ./
-cp $BAMBOORUNTIME/MGCHash.h ./
+cp $BAMBOORUNTIME/*.c ./
+cp $BAMBOORUNTIME/*.h ./
 cp ../Tilera/Runtime/*.c ./
 cp ../Tilera/Runtime/*.h ./
 cp ../Tilera/Runtime/$TILERA_INDIR/*.c ./