From ad0739f5d076c9f8a2e43bd2643ef7eb1de5e299 Mon Sep 17 00:00:00 2001
From: jzhou <jzhou>
Date: Thu, 12 May 2011 00:00:46 +0000
Subject: [PATCH] Some code clean and make the cache adapt version compile

---
 Robust/src/IR/Flat/BuildCode.java             |   6 +-
 Robust/src/IR/Flat/BuildCodeMultiCore.java    |   2 +-
 Robust/src/Runtime/bamboo/multicorecache.c    |  41 ++--
 Robust/src/Runtime/bamboo/multicorecache.h    |  82 ++++----
 Robust/src/Runtime/bamboo/multicoregarbage.c  |  48 ++---
 Robust/src/Runtime/bamboo/multicoregarbage.h  |  25 ++-
 .../src/Runtime/bamboo/multicoregccompact.c   |  15 +-
 Robust/src/Runtime/bamboo/multicoregcmark.c   |  10 +-
 Robust/src/Runtime/bamboo/multicoremem.c      |   3 +-
 Robust/src/Runtime/bamboo/multicoremem.h      |   3 +-
 Robust/src/Runtime/bamboo/multicoremsg.c      |  41 ++--
 Robust/src/Runtime/bamboo/multicoremsg.h      |   4 +-
 Robust/src/Runtime/bamboo/multicoreruntime.c  |  18 +-
 Robust/src/Runtime/bamboo/multicoreruntime.h  |   3 +-
 Robust/src/Runtime/bamboo/multicoretask.c     |   4 +-
 Robust/src/Runtime/bamboo/multicoretask.h     |  16 +-
 .../src/Runtime/bamboo/multicoretaskprofile.c | 183 +++++++++++-------
 .../src/Runtime/bamboo/multicoretaskprofile.h |  84 ++++----
 18 files changed, 307 insertions(+), 281 deletions(-)
diff --git a/Robust/src/IR/Flat/BuildCode.java b/Robust/src/IR/Flat/BuildCode.java
index d61f5b55..94c78da9 100644
--- a/Robust/src/IR/Flat/BuildCode.java
+++ b/Robust/src/IR/Flat/BuildCode.java
@@ -2038,7 +2038,7 @@ fldloop:
         //Don't bother if we aren't in recursive methods...The loops case will catch it
         if (callgraph.getAllMethods(md).contains(md)) {
           if (this.state.MULTICOREGC) {
-            output.println("if(gcflag) gc("+localsprefixaddr+");");
+            output.println("GCCHECK("+localsprefixaddr+");");
           } else {
             output.println("if (unlikely(needtocollect)) checkcollect("+localsprefixaddr+");");
           }
@@ -2097,7 +2097,7 @@ fldloop:
       //Don't bother if we aren't in recursive methods...The loops case will catch it
       if (callgraph.getAllMethods(md).contains(md)) {
         if (this.state.MULTICOREGC) {
-          output.println("if(gcflag) gc("+localsprefixaddr+");");
+          output.println("GCCHECK("+localsprefixaddr+");");
         } else {
           output.println("if (unlikely(needtocollect)) checkcollect("+localsprefixaddr+");");
         }
@@ -2402,7 +2402,7 @@ fldloop:
     if (((state.OOOJAVA||state.THREAD)&&GENERATEPRECISEGC)
         || (this.state.MULTICOREGC)) {
       if(this.state.MULTICOREGC) {
-        output.println("if (gcflag) gc("+localsprefixaddr+");");
+        output.println("GCCHECK("+localsprefixaddr+");");
       } else {
         output.println("if (unlikely(needtocollect)) checkcollect("+localsprefixaddr+");");
       }
diff --git a/Robust/src/IR/Flat/BuildCodeMultiCore.java b/Robust/src/IR/Flat/BuildCodeMultiCore.java
index 87fe2d16..16dc93b1 100644
--- a/Robust/src/IR/Flat/BuildCodeMultiCore.java
+++ b/Robust/src/IR/Flat/BuildCodeMultiCore.java
@@ -697,7 +697,7 @@ public class BuildCodeMultiCore extends BuildCode {
     /* Check to see if we need to do a GC if this is a
      * multi-threaded program...*/
     if(this.state.MULTICOREGC) {
-      output.println("if(gcflag) gc("+localsprefixaddr+");");
+      output.println("GCCHECK("+localsprefixaddr+");");
     }
 
     /* Create queues to store objects need to be transferred to other cores and their destination*/
diff --git a/Robust/src/Runtime/bamboo/multicorecache.c b/Robust/src/Runtime/bamboo/multicorecache.c
index 391ac3c6..9f050024 100644
--- a/Robust/src/Runtime/bamboo/multicorecache.c
+++ b/Robust/src/Runtime/bamboo/multicorecache.c
@@ -1,16 +1,9 @@
 #ifdef GC_CACHE_ADAPT
 #include "multicorecache.h"
+#include "multicoremsg.h"
+#include "multicoregcprofile.h"
 
-typedef struct gc_cache_revise_info {
-  unsigned int orig_page_start_va;
-  unsigned int orig_page_end_va;
-  unsigned int orig_page_index;
-  unsigned int to_page_start_va;
-  unsigned int to_page_end_va;
-  unsigned int to_page_index;
-  unsigned int revised_sampling[NUMCORESACTIVE];
-} gc_cache_revise_info_t;
-gc_cache_revise_info_t gc_cache_revise_infomation;
+gc_cache_revise_info_t gc_cache_revise_information;
 
 // prepare for cache adaption:
 //   -- flush the shared heap
@@ -40,9 +33,9 @@ void cacheAdapt_gc(bool isgccachestage) {
     for(int i = 0; i < NUMCORESACTIVE; i++) { \
       int freq = *local_tbl; \
       local_tbl=(int *)(((char *)local_tbl)+size_cachesamplingtbl_local_r); \
-      if(*((unsigned int)(hotfreq)) < freq) { \
-        *((unsigned int)(hotfreq)) = freq; \
-        *((unsigned int)(hotestcore)) = i; \
+      if(*((unsigned int *)(hotfreq)) < freq) { \
+        *((unsigned int *)(hotfreq)) = freq; \
+        *((unsigned int *)(hotestcore)) = i; \
       } \
     } \
   }
@@ -54,10 +47,10 @@ void cacheAdapt_gc(bool isgccachestage) {
     for(int i = 0; i < NUMCORESACTIVE; i++) { \
       int freq = *local_tbl; \
       local_tbl=(int *)(((char *)local_tbl)+size_cachesamplingtbl_local_r); \
-      *((unsigned int)(totalfreq)) = *((unsigned int)(totalfreq)) + freq; \
-      if(*((unsigned int)(hotfreq)) < freq) { \
-        *((unsigned int)(hotfreq)) = freq; \
-        *((unsigned int)(hotestcore)) = i; \
+      *((unsigned int *)(totalfreq)) = *((unsigned int *)(totalfreq)) + freq; \
+      if(*((unsigned int *)(hotfreq)) < freq) { \
+        *((unsigned int *)(hotfreq)) = freq; \
+        *((unsigned int *)(hotestcore)) = i; \
       } \
     } \
   }
@@ -235,7 +228,7 @@ void gc_quicksort(unsigned long long *array,unsigned int left,unsigned int right
   return;
 }
 
-INLINE void cacheAdapt_h4h_remote_accesses(unsigned long long workload_threshold,unsigned long long ** core2heavypages, unsigned long long * workload,int i) {
+INLINE int cacheAdapt_h4h_remote_accesses(unsigned long long workload_threshold,unsigned long long ** core2heavypages, unsigned long long * workload,int i) {
   int j = 1;
   unsigned int index = (unsigned int)core2heavypages[i][0];
   if(workload[i] > workload_threshold) {
@@ -250,6 +243,7 @@ INLINE void cacheAdapt_h4h_remote_accesses(unsigned long long workload_threshold
       j += 3;
     }
   }
+  return j;
 }
 
 // Every page cached on the core that accesses it the most. 
@@ -353,7 +347,8 @@ int cacheAdapt_policy_crowd(){
   unsigned long long workload_threshold=total_workload/GC_CACHE_ADAPT_OVERLOAD_THRESHOLD;
   // Check the workload of each core
   for(int i = 0; i < NUMCORESACTIVE; i++) {
-    cacheAdapt_h4h_remote_accesses(workload_threshold,core2heavypages,workload,i);
+    unsigned int index=(unsigned int)core2heavypages[i][0];
+    int j=cacheAdapt_h4h_remote_accesses(workload_threshold,core2heavypages,workload,i);
     // Check if the accesses are crowded on few pages
     // sort according to the total access
 inner_crowd:
@@ -441,17 +436,17 @@ void cacheAdapt_phase_client() {
 }
 
 void cacheAdapt_phase_master() {
-  GCPROFILEITEM();
-  gcphase = PREFINISHPHASE;
+  GCPROFILE_ITEM();
+  gc_status_info.gcphase = PREFINISHPHASE;
   // Note: all cores should flush their runtime data including non-gc cores
   GC_SEND_MSG_1_TO_CLIENT(GCSTARTPREF);
   GC_PRINTF("Start prefinish phase \n");
   // cache adapt phase
   cacheAdapt_mutator();
-  CACHEADPAT_OUTPUT_CACHE_POLICY();
+  CACHEADAPT_OUTPUT_CACHE_POLICY();
   cacheAdapt_gc(false);
 
-  GC_CHECK_ALL_CORE_STATUS(PREFINISHPHASE == gcphase);
+  GC_CHECK_ALL_CORE_STATUS(PREFINISHPHASE == gc_status_info.gcphase);
 
   CACHEADAPT_SAMPING_RESET();
   if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
diff --git a/Robust/src/Runtime/bamboo/multicorecache.h b/Robust/src/Runtime/bamboo/multicorecache.h
index 5bba898e..e9aeda38 100644
--- a/Robust/src/Runtime/bamboo/multicorecache.h
+++ b/Robust/src/Runtime/bamboo/multicorecache.h
@@ -2,6 +2,9 @@
 #define BAMBOO_MULTICORE_CACHE_H
 #ifdef MULTICORE_GC
 #include "multicore.h"
+#include "multicoremem.h"
+#include "multicoregccompact.h"
+#include "multicoregarbage.h"
 
 #ifdef GC_CACHE_ADAPT
 #define GC_CACHE_SAMPLING_UNIT 100000000
@@ -33,61 +36,65 @@ typedef union
 #define BAMBOO_CACHE_MODE_NONE 2
 #define BAMBOO_CACHE_MODE_COORDS 3
 
-INLINE static void samplingDataInit() {
-  gc_cache_revise_infomation.to_page_start_va = (unsigned int)to->ptr;
-  unsigned int toindex = (unsigned int)(tobase-gcbaseva)/(BAMBOO_PAGE_SIZE);
-  gc_cache_revise_infomation.to_page_end_va = gcbaseva + 
-    (BAMBOO_PAGE_SIZE)*(toindex+1);
-  gc_cache_revise_infomation.to_page_index = toindex;
-  gc_cache_revise_infomation.orig_page_start_va = (unsigned int)orig->ptr;
-  gc_cache_revise_infomation.orig_page_end_va = gcbaseva+(BAMBOO_PAGE_SIZE)
-  *(((unsigned int)(orig->ptr)-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
-  gc_cache_revise_infomation.orig_page_index = 
-    ((unsigned int)(orig->blockbase)-gcbaseva)/(BAMBOO_PAGE_SIZE);
+typedef struct gc_cache_revise_info {
+  unsigned int orig_page_start_va;
+  unsigned int orig_page_end_va;
+  unsigned int orig_page_index;
+  unsigned int to_page_start_va;
+  unsigned int to_page_end_va;
+  unsigned int to_page_index;
+  unsigned int revised_sampling[NUMCORESACTIVE];
+} gc_cache_revise_info_t;
+
+extern gc_cache_revise_info_t gc_cache_revise_information;
+
+INLINE static void samplingDataReviseInit(struct moveHelper * orig,struct moveHelper * to) {
+  gc_cache_revise_information.to_page_start_va=(unsigned int)to->ptr;
+  unsigned int toindex=(unsigned int)(to->base-gcbaseva)/(BAMBOO_PAGE_SIZE);
+  gc_cache_revise_information.to_page_end_va=gcbaseva+(BAMBOO_PAGE_SIZE)*(toindex+1);
+  gc_cache_revise_information.to_page_index=toindex;
+  gc_cache_revise_information.orig_page_start_va=(unsigned int)orig->ptr;
+  gc_cache_revise_information.orig_page_end_va=gcbaseva+(BAMBOO_PAGE_SIZE)*(((unsigned int)(orig->ptr)-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
+  gc_cache_revise_information.orig_page_index=((unsigned int)(orig->blockbase)-gcbaseva)/(BAMBOO_PAGE_SIZE);
 }
 
 INLINE static void samplingDataConvert(unsigned int current_ptr) {
-  unsigned int tmp_factor = 
-  current_ptr-gc_cache_revise_infomation.to_page_start_va;
-  unsigned int topage=gc_cache_revise_infomation.to_page_index;
-  unsigned int oldpage = gc_cache_revise_infomation.orig_page_index;
+  unsigned int tmp_factor=current_ptr-gc_cache_revise_information.to_page_start_va;
+  unsigned int topage=gc_cache_revise_information.to_page_index;
+  unsigned int oldpage=gc_cache_revise_information.orig_page_index;
   int * newtable=&gccachesamplingtbl_r[topage];
   int * oldtable=&gccachesamplingtbl[oldpage];
   
   for(int tt = 0; tt < NUMCORESACTIVE; tt++) {
-    (*newtable) = ((*newtable)+(*oldtable)*tmp_factor);
+    (*newtable)=((*newtable)+(*oldtable)*tmp_factor);
     newtable=(int*)(((char *)newtable)+size_cachesamplingtbl_local_r);
     oldtable=(int*) (((char *)oldtable)+size_cachesamplingtbl_local);
   }
 } 
 
 INLINE static void completePageConvert(struct moveHelper * orig,struct moveHelper * to,unsigned int current_ptr,bool closeToPage) {
-  unsigned int ptr = 0;
-  unsigned int tocompare = 0;
+  unsigned int ptr=0;
+  unsigned int tocompare=0;
   if(closeToPage) {
-    ptr = to->ptr;
-    tocompare = gc_cache_revise_infomation.to_page_end_va;
+    ptr=to->ptr;
+    tocompare=gc_cache_revise_information.to_page_end_va;
   } else {
-    ptr = orig->ptr;
-    tocompare = gc_cache_revise_infomation.orig_page_end_va;
+    ptr=orig->ptr;
+    tocompare=gc_cache_revise_information.orig_page_end_va;
   }
-  if((unsigned int)ptr >= (unsigned int)tocompare) {
+  if((unsigned int)ptr>=(unsigned int)tocompare) {
     // end of an orig/to page
     // compute the impact of this page for the new page
     samplingDataConvert(current_ptr);
     // prepare for an new orig page
-    unsigned int tmp_index = 
-      (unsigned int)((unsigned int)orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
-    gc_cache_revise_infomation.orig_page_start_va = orig->ptr;
-    gc_cache_revise_infomation.orig_page_end_va = gcbaseva + 
-      (BAMBOO_PAGE_SIZE)*(unsigned int)(tmp_index+1);
-    gc_cache_revise_infomation.orig_page_index = tmp_index;
-    gc_cache_revise_infomation.to_page_start_va = to->ptr;
+    unsigned int tmp_index=(unsigned int)((unsigned int)orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
+    gc_cache_revise_information.orig_page_start_va=orig->ptr;
+    gc_cache_revise_information.orig_page_end_va=gcbaseva+(BAMBOO_PAGE_SIZE)*(unsigned int)(tmp_index+1);
+    gc_cache_revise_information.orig_page_index=tmp_index;
+    gc_cache_revise_information.to_page_start_va=to->ptr;
     if(closeToPage) {
-      gc_cache_revise_infomation.to_page_end_va = gcbaseva+(BAMBOO_PAGE_SIZE)
-        *(((unsigned int)(to->ptr)-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
-      gc_cache_revise_infomation.to_page_index = 
-        ((unsigned int)(to->ptr)-gcbaseva)/(BAMBOO_PAGE_SIZE);
+      gc_cache_revise_information.to_page_end_va=gcbaseva+(BAMBOO_PAGE_SIZE)*(((unsigned int)(to->ptr)-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
+      gc_cache_revise_information.to_page_index=((unsigned int)(to->ptr)-gcbaseva)/(BAMBOO_PAGE_SIZE);
     }
   }
 } 
@@ -121,14 +128,15 @@ void gc_output_cache_sampling_r();
 #define CACHEADAPT_SAMPING_RESET() 
 #endif
 
-#define CACHEADAPT_SAMPLING_DATA_REVISE_INIT() samplingDataReviseInit()
+#define CACHEADAPT_SAMPLING_DATA_REVISE_INIT(o,t) \
+  samplingDataReviseInit((o),(t))
 #define CACHEADAPT_SAMPLING_DATA_CONVERT(p) samplingDataConvert((p))
 #define CACHEADAPT_COMPLETE_PAGE_CONVERT(o, t, p, b) \
   completePageConvert((o), (t), (p), (b));
 
 #define CACHEADAPT_GC(b) cacheAdapt_gc(b)
 #define CACHEADAPT_MASTER() cacheAdapt_master()
-#define CACHEADAPT_PHASE_CLIENT() cacheAdpat_phase_client()
+#define CACHEADAPT_PHASE_CLIENT() cacheAdapt_phase_client()
 #define CACHEADAPT_PHASE_MASTER() cacheAdapt_phase_master()
 
 #ifdef GC_CACHE_ADAPT_OUTPUT
@@ -158,7 +166,7 @@ void gc_output_cache_sampling_r();
 #define CACHEADAPT_ENABLE_TIMER() 
 #define CACHEADAPT_DISABLE_TIMER() 
 #define CACHEADAPT_SAMPING_RESET()
-#define CACHEADAPT_SAMPLING_DATA_REVISE_INIT() 
+#define CACHEADAPT_SAMPLING_DATA_REVISE_INIT(o,t) 
 #define CACHEADAPT_SAMPLING_DATA_CONVERT(p) 
 #define CACHEADAPT_COMPLETE_PAGE_CONVERT(o, t, p, b) 
 #define CACHEADAPT_GC(b)
diff --git a/Robust/src/Runtime/bamboo/multicoregarbage.c b/Robust/src/Runtime/bamboo/multicoregarbage.c
index 53f33c30..88f1936e 100644
--- a/Robust/src/Runtime/bamboo/multicoregarbage.c
+++ b/Robust/src/Runtime/bamboo/multicoregarbage.c
@@ -1,12 +1,12 @@
 // TODO: DO NOT support tag!!!
 #ifdef MULTICORE_GC
 #include "runtime.h"
+#include "multicoreruntime.h"
 #include "multicoregarbage.h"
 #include "multicoregcmark.h"
 #include "gcqueue.h"
 #include "multicoregccompact.h"
 #include "multicoregcflush.h"
-#include "multicoreruntime.h"
 #include "multicoregcprofile.h"
 #include "gcqueue.h"
 
@@ -15,6 +15,9 @@ extern unsigned int gcmem_mixed_threshold;
 extern unsigned int gcmem_mixed_usedmem;
 #endif // SMEMM
 
+volatile bool gcflag;
+gc_status_t gc_status_info;
+
 #ifdef GC_DEBUG
 // dump whole mem in blocks
 void dumpSMem() {
@@ -105,8 +108,9 @@ void initmulticoregcdata() {
 
   bamboo_smem_zero_top = NULL;
   gcflag = false;
-  gcprocessing = false;
-  gcphase = FINISHPHASE;
+  gc_status_info.gcprocessing = false;
+  gc_status_info.gcphase = FINISHPHASE;
+
   gcprecheck = true;
   gccurr_heaptop = 0;
   gcself_numsendobjs = 0;
@@ -122,15 +126,11 @@ void initmulticoregcdata() {
   gcmovepending = 0;
   gcblock2fill = 0;
 #ifdef SMEMM
-  gcmem_mixed_threshold = (unsigned int)((BAMBOO_SHARED_MEM_SIZE
-		-bamboo_reserved_smem*BAMBOO_SMEM_SIZE)*0.8);
+  gcmem_mixed_threshold=(unsigned int)((BAMBOO_SHARED_MEM_SIZE-bamboo_reserved_smem*BAMBOO_SMEM_SIZE)*0.8);
   gcmem_mixed_usedmem = 0;
 #endif
 #ifdef MGC_SPEC
   gc_profile_flag = false;
-#endif
-#ifdef GC_FLUSH_DTLB
-  gc_num_flush_dtlb = 0;
 #endif
   gc_localheap_s = false;
 #ifdef GC_CACHE_ADAPT
@@ -227,7 +227,7 @@ INLINE void checkMarkStatus_p2() {
     }  
     if(i == NUMCORESACTIVE) {    
       // all the core status info are the latest,stop mark phase
-      gcphase = COMPACTPHASE;
+      gc_status_info.gcphase = COMPACTPHASE;
       // restore the gcstatus for all cores
       for(int i = 0; i < NUMCORESACTIVE; i++) {
         gccorestatus[i] = 1;
@@ -606,7 +606,7 @@ INLINE void moveLObjs() {
 } 
 
 void gc_collect(struct garbagelist * stackptr) {
-  gcprocessing = true;
+  gc_status_info.gcprocessing = true;
   // inform the master that this core is at a gc safe point and is ready to 
   // do gc
   send_msg_4(STARTUPCORE,GCFINISHPRE,BAMBOO_NUM_OF_CORE,self_numsendobjs,self_numreceiveobjs);
@@ -651,7 +651,7 @@ void gc_collect(struct garbagelist * stackptr) {
 } 
 
 void gc_nocollect(struct garbagelist * stackptr) {
-  gcprocessing = true;
+  gc_status_info.gcprocessing = true;
   // inform the master that this core is at a gc safe point and is ready to 
   // do gc
   send_msg_4(STARTUPCORE,GCFINISHPRE,BAMBOO_NUM_OF_CORE,self_numsendobjs,self_numreceiveobjs);
@@ -696,10 +696,10 @@ void master_mark(struct garbagelist *stackptr) {
 
   GC_PRINTF("Start mark phase \n");
   GC_SEND_MSG_1_TO_CLIENT(GCSTART);
-  gcphase = MARKPHASE;
+  gc_status_info.gcphase = MARKPHASE;
   // mark phase
 
-  while(MARKPHASE == gcphase) {
+  while(MARKPHASE == gc_status_info.gcphase) {
     mark(isfirst, stackptr);
     isfirst=false;
     // check gcstatus
@@ -780,7 +780,7 @@ void master_compact() {
 }
 
 void master_updaterefs(struct garbagelist * stackptr) {
-  gcphase = FLUSHPHASE;
+  gc_status_info.gcphase = FLUSHPHASE;
   GC_SEND_MSG_1_TO_CLIENT(GCSTARTFLUSH);
   GCPROFILE_ITEM();
   GC_PRINTF("Start flush phase \n");
@@ -788,12 +788,12 @@ void master_updaterefs(struct garbagelist * stackptr) {
   flush(stackptr);
   // now the master core need to decide the new cache strategy
   CACHEADAPT_MASTER();
-  GC_CHECK_ALL_CORE_STATUS(FLUSHPHASE==gcphase);
+  GC_CHECK_ALL_CORE_STATUS(FLUSHPHASE==gc_status_info.gcphase);
   GC_PRINTF("Finish flush phase \n");
 }
 
 void master_finish() {
-  gcphase = FINISHPHASE;
+  gc_status_info.gcphase = FINISHPHASE;
   
   // invalidate all shared mem pointers
   // put it here as it takes time to inform all the other cores to
@@ -807,7 +807,7 @@ void master_finish() {
   gcflag = false;
   GC_SEND_MSG_1_TO_CLIENT(GCFINISH);
   
-  gcprocessing = false;
+  gc_status_info.gcprocessing = false;
   if(gcflag) {
     // inform other cores to stop and wait for gc
     gcprecheck = true;
@@ -822,8 +822,8 @@ void master_finish() {
 
 void gc_master(struct garbagelist * stackptr) {
   tprintf("start GC !!!!!!!!!!!!! \n");
-  gcprocessing = true;
-  gcphase = INITPHASE;
+  gc_status_info.gcprocessing = true;
+  gc_status_info.gcphase = INITPHASE;
 
   waitconfirm = false;
   numconfirm = 0;
@@ -854,7 +854,7 @@ void gc_master(struct garbagelist * stackptr) {
   master_finish();
 
   GC_PRINTF("gc finished   \n");
-  tprintf("finish GC ! %d \n", gcflag);
+  tprintf("finish GC ! %d \n",gcflag);
 } 
 
 void pregccheck() {
@@ -892,12 +892,6 @@ void pregcprocessing() {
   // we need to make sure during the gcinit phase the shared heap is not 
   // touched. Otherwise, there would be problem when adapt the cache strategy.
   BAMBOO_CLOSE_CUR_MSP();
-#ifdef GC_FLUSH_DTLB
-  if(gc_num_flush_dtlb < GC_NUM_FLUSH_DTLB) {
-    BAMBOO_CLEAN_DTLB();
-    gc_num_flush_dtlb++;
-  }
-#endif
 #if defined(GC_CACHE_ADAPT)&&defined(GC_CACHE_SAMPLING)
   // get the sampling data 
   bamboo_output_dtlb_sampling();
@@ -915,7 +909,7 @@ void postgcprocessing() {
 bool gc(struct garbagelist * stackptr) {
   // check if do gc
   if(!gcflag) {
-    gcprocessing = false;
+    gc_status_info.gcprocessing = false;
     return false;
   }
 
diff --git a/Robust/src/Runtime/bamboo/multicoregarbage.h b/Robust/src/Runtime/bamboo/multicoregarbage.h
index 23e006aa..4ba7aae8 100644
--- a/Robust/src/Runtime/bamboo/multicoregarbage.h
+++ b/Robust/src/Runtime/bamboo/multicoregarbage.h
@@ -6,7 +6,6 @@
 #include "multicorehelper.h"  // for mappings between core # and block #
 #include "structdefs.h"
 #include "multicoregcprofile.h"
-#include "multicorecache.h"
 
 #ifdef GC_DEBUG
 #define GC_PRINTF tprintf
@@ -20,11 +19,6 @@
 // let each gc core to have one big block, this is very important
 // for the computation of NUMBLOCKS(s, n), DO NOT change this!
 
-#ifdef GC_FLUSH_DTLB
-#define GC_NUM_FLUSH_DTLB 1
-unsigned int gc_num_flush_dtlb;
-#endif
-
 typedef enum {
   INIT = 0,           // 0
   DISCOVERED = 2,     // 2
@@ -46,13 +40,14 @@ typedef enum {
   FINISHPHASE              // 0x6/0x7
 } GCPHASETYPE;
 
-volatile bool gcflag;
-volatile bool gcprocessing;
-volatile GCPHASETYPE gcphase; // indicating GC phase
-
-#define WAITFORGCPHASE(phase) while(gcphase != phase) ;
+typedef struct gc_status {
+  volatile bool gcprocessing;
+  volatile GCPHASETYPE gcphase; // indicating GC phase
+  volatile bool gcbusystatus;
+} gc_status_t;
 
-volatile bool gcpreinform; // counter for stopped cores
+extern volatile bool gcflag;
+extern gc_status_t gc_status_info;
 volatile bool gcprecheck; // indicates if there are updated pregc information
 
 unsigned int gccurr_heaptop;
@@ -68,7 +63,7 @@ volatile unsigned int gcnumsrobjs_index;//indicates which entry to record the
 						                // checking process
 								            // the info received in phase 2 must be 
 								            // recorded in the other entry
-volatile bool gcbusystatus;
+
 unsigned int gcself_numsendobjs;
 unsigned int gcself_numreceiveobjs;
 
@@ -126,7 +121,9 @@ int * gccachesamplingtbl_local_r;
 unsigned int size_cachesamplingtbl_local_r;
 int * gccachepolicytbl;
 unsigned int size_cachepolicytbl;
-#endif // GC_CACHE_ADAPT
+#endif
+
+#define WAITFORGCPHASE(phase) while(gc_status_info.gcphase != phase) ;
 
 #define OBJMAPPINGINDEX(p) (((unsigned int)p-gcbaseva)/bamboo_baseobjsize)
 
diff --git a/Robust/src/Runtime/bamboo/multicoregccompact.c b/Robust/src/Runtime/bamboo/multicoregccompact.c
index 3e397532..f71d457e 100644
--- a/Robust/src/Runtime/bamboo/multicoregccompact.c
+++ b/Robust/src/Runtime/bamboo/multicoregccompact.c
@@ -2,6 +2,7 @@
 #include "multicoregccompact.h"
 #include "runtime_arch.h"
 #include "multicoreruntime.h"
+#include "multicoregarbage.h"
 
 INLINE bool gc_checkCoreStatus() {
   BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
@@ -196,7 +197,7 @@ INLINE void resolvePendingMoveRequest() {
   }  
   
   if(!hasrunning && !noblock) {
-    gcphase = SUBTLECOMPACTPHASE;
+    gc_status_info.gcphase = SUBTLECOMPACTPHASE;
     compact2Heaptop();
   }
 } 
@@ -533,14 +534,14 @@ innercompact:
     to->ptr += to->offset;   // for header
     to->top += to->offset;
     *localcompact = (gcdstcore == BAMBOO_NUM_OF_CORE);
-    CACHEADAPT_SAMPLING_DATA_REVISE_INIT();
+    CACHEADAPT_SAMPLING_DATA_REVISE_INIT(orig, to);
     goto innercompact;
   }
   return true;
 }
 
 void compact() {
-  BAMBOO_ASSERT(COMPACTPHASE == gcphase);
+  BAMBOO_ASSERT(COMPACTPHASE == gc_status_info.gcphase);
   
   // initialize pointers for comapcting
   struct moveHelper * orig = (struct moveHelper *)RUNMALLOC(sizeof(struct moveHelper));
@@ -552,7 +553,7 @@ void compact() {
     RUNFREE(orig);
     RUNFREE(to);
   } else {
-    CACHEADAPT_SAMPLING_DATA_REVISE_INIT();
+    CACHEADAPT_SAMPLING_DATA_REVISE_INIT(orig, to);
 
     unsigned int filledblocks = 0;
     unsigned int heaptopptr = 0;
@@ -566,13 +567,13 @@ void compact() {
 void compact_master(struct moveHelper * orig, struct moveHelper * to) {
   // initialize pointers for comapcting
   initOrig_Dst(orig, to);
-  CACHEADAPT_SAMPLING_DATA_REVISE_INIT();
+  CACHEADAPT_SAMPLING_DATA_REVISE_INIT(orig, to);
   int filledblocks = 0;
   unsigned int heaptopptr = 0;
   bool finishcompact = false;
   bool iscontinue = true;
   bool localcompact = true;
-  while((COMPACTPHASE == gcphase) || (SUBTLECOMPACTPHASE == gcphase)) {
+  while((COMPACTPHASE == gc_status_info.gcphase) || (SUBTLECOMPACTPHASE == gc_status_info.gcphase)) {
     if((!finishcompact) && iscontinue) {
       finishcompact = compacthelper(orig,to,&filledblocks,&heaptopptr,&localcompact);
     }
@@ -583,7 +584,7 @@ void compact_master(struct moveHelper * orig, struct moveHelper * to) {
       break;
     } else {
       // check if there are spare mem for pending move requires
-      if(COMPACTPHASE == gcphase) {
+      if(COMPACTPHASE == gc_status_info.gcphase) {
         resolvePendingMoveRequest();
       } else {
         compact2Heaptop();
diff --git a/Robust/src/Runtime/bamboo/multicoregcmark.c b/Robust/src/Runtime/bamboo/multicoregcmark.c
index 419eb0ec..95596dca 100644
--- a/Robust/src/Runtime/bamboo/multicoregcmark.c
+++ b/Robust/src/Runtime/bamboo/multicoregcmark.c
@@ -106,9 +106,9 @@ INLINE void markgarbagelist(struct garbagelist * listptr) {
 
 // enqueue root objs
 INLINE void tomark(struct garbagelist * stackptr) {
-  BAMBOO_ASSERT(MARKPHASE == gcphase);
+  BAMBOO_ASSERT(MARKPHASE == gc_status_info.gcphase);
 
-  gcbusystatus = true;
+  gc_status_info.gcbusystatus = true;
   gcnumlobjs = 0;
 
   // enqueue current stack
@@ -260,11 +260,11 @@ INLINE void mark(bool isfirst, struct garbagelist * stackptr) {
   unsigned int isize = 0;
   bool sendStall = false;
   // mark phase
-  while(MARKPHASE == gcphase) {
+  while(MARKPHASE == gc_status_info.gcphase) {
     int counter = 0;
     while(gc_moreItems2()) {
       sendStall = false;
-      gcbusystatus = true;
+      gc_status_info.gcbusystatus = true;
       unsigned int ptr = gc_dequeue2();
 
       unsigned int size = 0;
@@ -293,7 +293,7 @@ INLINE void mark(bool isfirst, struct garbagelist * stackptr) {
       // scan the pointers in object
       scanPtrsInObj(ptr, type);      
     }   
-    gcbusystatus = false;
+    gc_status_info.gcbusystatus = false;
     // send mark finish msg to core coordinator
     if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
       int entry_index = waitconfirm ? (gcnumsrobjs_index==0) : gcnumsrobjs_index;
diff --git a/Robust/src/Runtime/bamboo/multicoremem.c b/Robust/src/Runtime/bamboo/multicoremem.c
index 98cde60e..3ddfd0bf 100644
--- a/Robust/src/Runtime/bamboo/multicoremem.c
+++ b/Robust/src/Runtime/bamboo/multicoremem.c
@@ -3,6 +3,7 @@
 #include "multicoreruntime.h"
 
 #ifdef MULTICORE_GC
+#include "multicoregarbage.h"
 #include "multicorehelper.h"
 #include "multicoremem_helper.h"
 
@@ -300,7 +301,7 @@ void * smemalloc_I(int coren,
     *allocsize = 0;
     if(!gcflag) {
       gcflag = true;
-      if(!gcprocessing) {
+      if(!gc_status_info.gcprocessing) {
         // inform other cores to stop and wait for gc
         gcprecheck = true;
         for(int i = 0; i < NUMCORESACTIVE; i++) {
diff --git a/Robust/src/Runtime/bamboo/multicoremem.h b/Robust/src/Runtime/bamboo/multicoremem.h
index 0b29f7c9..088eef49 100644
--- a/Robust/src/Runtime/bamboo/multicoremem.h
+++ b/Robust/src/Runtime/bamboo/multicoremem.h
@@ -1,4 +1,4 @@
-#ifndef BABMOO_MULTICORE_MEM_H
+#ifndef BAMBOO_MULTICORE_MEM_H
 #define BAMBOO_MULTICORE_MEM_H
 #include "multicore.h"
 #include "Queue.h"
@@ -107,7 +107,6 @@
 #endif // GC_SMALLPAGESIZE
 
 volatile bool gc_localheap_s;
-#include "multicoregarbage.h"
 
 typedef enum {
   SMEMLOCAL = 0x0,// 0x0, using local mem only
diff --git a/Robust/src/Runtime/bamboo/multicoremsg.c b/Robust/src/Runtime/bamboo/multicoremsg.c
index 9df331c3..2ba1532f 100644
--- a/Robust/src/Runtime/bamboo/multicoremsg.c
+++ b/Robust/src/Runtime/bamboo/multicoremsg.c
@@ -2,6 +2,7 @@
 #include "multicoremsg.h"
 #include "runtime.h"
 #include "multicoreruntime.h"
+#include "multicoregarbage.h"
 #include "multicoretaskprofile.h"
 #include "gcqueue.h"
 
@@ -14,7 +15,7 @@ int msgsizearray[] = {
   4, //LOCKDENY,              // 0xD5
   4, //LOCKRELEASE,           // 0xD6
   2, //PROFILEOUTPUT,         // 0xD7
-  2, //PROFILEFINISH,         // 0xD8
+  1, //PROFILEFINISH,         // 0xD8
   6, //REDIRECTLOCK,          // 0xD9
   4, //REDIRECTGROUNT,        // 0xDa
   4, //REDIRECTDENY,          // 0xDb
@@ -110,7 +111,7 @@ INLINE void processmsg_transobj_I(int msglength) {
   
   self_numreceiveobjs++;
 #ifdef MULTICORE_GC
-  if(gcprocessing) {
+  if(gc_status_info.gcprocessing) {
     if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
       // set the gcprecheck to enable checking again
       gcprecheck = true;
@@ -288,17 +289,15 @@ INLINE void processmsg_profileoutput_I() {
 #endif
   // cache the msg first
   if(BAMBOO_CHECK_SEND_MODE()) {
-    cache_msg_2_I(STARTUPCORE,PROFILEFINISH,BAMBOO_NUM_OF_CORE);
+    cache_msg_1_I(STARTUPCORE,PROFILEFINISH);
   } else {
-    send_msg_2_I(STARTUPCORE,PROFILEFINISH,BAMBOO_NUM_OF_CORE);
+    send_msg_1_I(STARTUPCORE,PROFILEFINISH);
   }
 }
 
 INLINE void processmsg_profilefinish_I() {
   BAMBOO_ASSERT(BAMBOO_NUM_OF_CORE == STARTUPCORE);
-  int data1 = msgdata[msgdataindex];
-  MSG_INDEXINC_I();
-  profilestatus[data1] = 0;
+  numconfirm--;
 }
 #endif // PROFILE
 
@@ -352,7 +351,7 @@ INLINE void processmsg_memrequest_I() {
   int allocsize = 0;
   void * mem = NULL;
 #ifdef MULTICORE_GC
-  if(!gcprocessing || !gcflag) {
+  if(!gc_status_info.gcprocessing || !gcflag) {
     // either not doing GC or the master core has decided to stop GC but 
     // // still sending msgs to other cores to inform them to stop the GC
 #endif
@@ -379,7 +378,7 @@ INLINE void processmsg_memresponse_I() {
   // receive a shared memory response msg
 #ifdef MULTICORE_GC
   // if is currently doing gc, dump this msg
-  if(!gcprocessing) {
+  if(!gc_status_info.gcprocessing) {
 #endif
   if(data2 == 0) {
 #ifdef MULTICORE_GC
@@ -427,22 +426,22 @@ INLINE void processmsg_gcstartpre_I() {
 }
 
 INLINE void processmsg_gcstartinit_I() {
-  gcphase = INITPHASE;
+  gc_status_info.gcphase = INITPHASE;
 }
 
 INLINE void processmsg_gcstart_I() {
   // set the GC flag
-  gcphase = MARKPHASE;
+  gc_status_info.gcphase = MARKPHASE;
 }
 
 INLINE void processmsg_gcstartcompact_I() {
   gcblock2fill = msgdata[msgdataindex];
   MSG_INDEXINC_I();  
-  gcphase = COMPACTPHASE;
+  gc_status_info.gcphase = COMPACTPHASE;
 }
 
 INLINE void processmsg_gcstartflush_I() {
-  gcphase = FLUSHPHASE;
+  gc_status_info.gcphase = FLUSHPHASE;
 }
 
 INLINE void processmsg_gcfinishpre_I() {
@@ -513,7 +512,7 @@ INLINE void processmsg_gcfinishcompact_I() {
   MSG_INDEXINC_I(); 
   // only gc cores need to do compact
   if(cnum < NUMCORES4GC) {
-    if(COMPACTPHASE == gcphase) {
+    if(COMPACTPHASE == gc_status_info.gcphase) {
       gcfilledblocks[cnum] = filledblocks;
       gcloads[cnum] = heaptop;
     }
@@ -550,18 +549,18 @@ INLINE void processmsg_gcfinishflush_I() {
 
 INLINE void processmsg_gcfinish_I() {
   // received a GC finish msg
-  gcphase = FINISHPHASE;
-  gcprocessing = false;
+  gc_status_info.gcphase = FINISHPHASE;
+  gc_status_info.gcprocessing = false;
 }
 
 INLINE void processmsg_gcmarkconfirm_I() {
   BAMBOO_ASSERT(((BAMBOO_NUM_OF_CORE!=STARTUPCORE)&&(BAMBOO_NUM_OF_CORE<=NUMCORESACTIVE-1)));
-  gcbusystatus = gc_moreItems2_I();
+  gc_status_info.gcbusystatus = gc_moreItems2_I();
   // send response msg, cahce the msg first
   if(BAMBOO_CHECK_SEND_MODE()) {
-    cache_msg_5_I(STARTUPCORE,GCMARKREPORT,BAMBOO_NUM_OF_CORE,gcbusystatus,gcself_numsendobjs,gcself_numreceiveobjs);
+    cache_msg_5_I(STARTUPCORE,GCMARKREPORT,BAMBOO_NUM_OF_CORE,gc_status_info.gcbusystatus,gcself_numsendobjs,gcself_numreceiveobjs);
   } else {
-    send_msg_5_I(STARTUPCORE,GCMARKREPORT,BAMBOO_NUM_OF_CORE,gcbusystatus,gcself_numsendobjs,gcself_numreceiveobjs);
+    send_msg_5_I(STARTUPCORE,GCMARKREPORT,BAMBOO_NUM_OF_CORE,gc_status_info.gcbusystatus,gcself_numsendobjs,gcself_numreceiveobjs);
   }
 }
 
@@ -601,7 +600,7 @@ INLINE void processmsg_gcmarkedobj_I() {
     gc_enqueue_I(data1);
   }
   gcself_numreceiveobjs++;
-  gcbusystatus = true;
+  gc_status_info.gcbusystatus = true;
 }
 
 INLINE void processmsg_gcmovestart_I() {
@@ -663,7 +662,7 @@ INLINE void processmsg_gcprofiles_I() {
 
 #ifdef GC_CACHE_ADAPT
 INLINE void processmsg_gcstartpref_I() {
-  gcphase = PREFINISHPHASE;
+  gc_status_info.gcphase = PREFINISHPHASE;
 }
 
 INLINE void processmsg_gcfinishpref_I() {
diff --git a/Robust/src/Runtime/bamboo/multicoremsg.h b/Robust/src/Runtime/bamboo/multicoremsg.h
index 56bc67e1..401a6956 100644
--- a/Robust/src/Runtime/bamboo/multicoremsg.h
+++ b/Robust/src/Runtime/bamboo/multicoremsg.h
@@ -106,8 +106,8 @@ volatile bool isMsgHanging;
  *          lock type: 0 -- read; 1 -- write
  * ProfileMsg: 7 + totalexetime
  *               (size is always 2 * sizeof(int))
- *             8 + corenum
- *               (size is always 2 * sizeof(int))
+ *             8 
+ *               (size is always sizeof(int))
  * StatusMsg: d (size is always 1 * sizeof(int))
  *            e + status + corenum + sendobjs + receiveobjs
  *              (size is always 5 * sizeof(int))
diff --git a/Robust/src/Runtime/bamboo/multicoreruntime.c b/Robust/src/Runtime/bamboo/multicoreruntime.c
index ad40a02b..88f7d527 100644
--- a/Robust/src/Runtime/bamboo/multicoreruntime.c
+++ b/Robust/src/Runtime/bamboo/multicoreruntime.c
@@ -3,6 +3,7 @@
 #include "runtime.h"
 #include "multicoreruntime.h"
 #include "methodheaders.h"
+#include "multicoregarbage.h"
 
 extern int classsize[];
 extern int typearray[];
@@ -629,9 +630,6 @@ INLINE void recordtotalexetime() {
   totalexetime = BAMBOO_GET_EXE_TIME()-bamboo_start_time;
 #else // USEIO
   BAMBOO_PRINT(BAMBOO_GET_EXE_TIME()-bamboo_start_time);
-#ifdef GC_FLUSH_DTLB
-  BAMBOO_PRINT_REG(gc_num_flush_dtlb);
-#endif
 #ifndef BAMBOO_MEMPROF
   BAMBOO_PRINT(0xbbbbbbbb);
 #endif
@@ -641,6 +639,11 @@ INLINE void recordtotalexetime() {
 INLINE void getprofiledata_I() {
   //profile mode, send msgs to other cores to request pouring out progiling data
 #ifdef PROFILE
+  // use numconfirm to check if all cores have finished output task profiling 
+  // information. This is safe as when the execution reaches this phase there 
+  // should have no other msgs except the PROFILEFINISH msg, there should be 
+  // no gc too.
+  numconfirm=NUMCORESACTIVE-1;
   BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
   for(i = 1; i < NUMCORESACTIVE; ++i) {
     // send profile request msg to core i
@@ -652,14 +655,7 @@ INLINE void getprofiledata_I() {
 #endif
   while(true) {
     BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
-    profilestatus[BAMBOO_NUM_OF_CORE] = 0;
-    // check the status of all cores
-    for(i = 0; i < NUMCORESACTIVE; ++i) {
-      if(profilestatus[i] != 0) {
-        break;
-      }
-    }  
-    if(i != NUMCORESACTIVE) {
+    if(numconfirm != 0) {
       int halt = 100;
       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
       while(halt--) {
diff --git a/Robust/src/Runtime/bamboo/multicoreruntime.h b/Robust/src/Runtime/bamboo/multicoreruntime.h
index 9041d74a..31ba7d3c 100644
--- a/Robust/src/Runtime/bamboo/multicoreruntime.h
+++ b/Robust/src/Runtime/bamboo/multicoreruntime.h
@@ -7,12 +7,13 @@
 #include "multicoremem.h"
 #include "multicoretask.h"
 #include "multicoremgc.h"
+#include "multicorecache.h"
 
 //Define the following line if the base object type has pointers
 //#define OBJECTHASPOINTERS
 
-
 #ifdef MULTICORE_GC
+extern volatile bool gcflag;
 #define GCCHECK(p) \
   if(gcflag) gc(p)
 #else
diff --git a/Robust/src/Runtime/bamboo/multicoretask.c b/Robust/src/Runtime/bamboo/multicoretask.c
index 77e89921..2a3c7b27 100644
--- a/Robust/src/Runtime/bamboo/multicoretask.c
+++ b/Robust/src/Runtime/bamboo/multicoretask.c
@@ -53,7 +53,7 @@ INLINE void dislocktable() {
 #endif
 }
 
-INLINE void inittaskdata() {
+void inittaskdata() {
   if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
     // startup core to initialize corestatus[]
     total_num_t6 = 0; // TODO for test
@@ -71,7 +71,7 @@ INLINE void inittaskdata() {
   INIT_TASKPROFILE_DATA();
 }
 
-INLINE void distaskdata() {
+void distaskdata() {
   if(activetasks != NULL) {
     genfreehashtable(activetasks);
   }
diff --git a/Robust/src/Runtime/bamboo/multicoretask.h b/Robust/src/Runtime/bamboo/multicoretask.h
index e6f1c43d..10dc586e 100644
--- a/Robust/src/Runtime/bamboo/multicoretask.h
+++ b/Robust/src/Runtime/bamboo/multicoretask.h
@@ -43,19 +43,11 @@ void releasewritelock_r(void * lock, void * redirectlock);
 // if return -1: the lock request is redirected
 //            0: the lock request is approved
 //            1: the lock request is denied
-INLINE int processlockrequest_I(int locktype,
-                                int lock,
-                                int obj,
-                                int requestcore,
-                                int rootrequestcore,
-                                bool cache);
-INLINE void processlockrelease_I(int locktype,
-                                 int lock,
-                                 int redirectlock,
-                                 bool redirect);
+int processlockrequest_I(int locktype,int lock,int obj,int requestcore,int rootrequestcore,bool cache);
+void processlockrelease_I(int locktype,int lock,int redirectlock,bool redirect);
 
-INLINE void inittaskdata();
-INLINE void distaskdata();
+void inittaskdata();
+void distaskdata();
 
 #define INITTASKDATA() inittaskdata()
 #define DISTASKDATA() distaskdata()
diff --git a/Robust/src/Runtime/bamboo/multicoretaskprofile.c b/Robust/src/Runtime/bamboo/multicoretaskprofile.c
index 5fbc9abe..9d666622 100644
--- a/Robust/src/Runtime/bamboo/multicoretaskprofile.c
+++ b/Robust/src/Runtime/bamboo/multicoretaskprofile.c
@@ -1,24 +1,71 @@
+#ifdef TASK
 #ifdef PROFILE
-
 #include "multicoretaskprofile.h"
 
-void inittaskprofiledata() {
-  if(STARTUPCORE == BAMBOO_NUM_OF_CORE) {
-    // startup core to initialize corestatus[]
-    for(i = 0; i < NUMCORESACTIVE; ++i) {
-      // initialize the profile data arrays
-      profilestatus[i] = 1;
-    } // for(i = 0; i < NUMCORESACTIVE; ++i)
-  }
+int profilestatus[NUMCORESACTIVE]; // records status of each core
+                                   // 1: running tasks
+                                   // 0: stall
+///////////////////////////////////////////////////////////////////////////////
+// This global variable records the profiling information of tasks including 
+// when the task starts and ends, the exit path of each execution of the task,
+// and how many new objs are created as well as the type of the new objs. There 
+// also have an index indicates how many tasks have been recorded and if the 
+// buffer has overflowed. These profile information is supposed to be dumped 
+// out before the execution of the program is terminated.
+//
+// Maintaining protocols: 
+//     INIT_TASKPROFILE_DATA() initializes this variable and should be invoked 
+//     before executing any tasks.
+//
+//     PROFILE_TASK_START() creates a new item to record a task's execution. It 
+//     should be invoked right before starting a new task as it also records 
+//     the start time of a task's execution.
+//
+//     PROFILE_TASK_END() records the stop time of a task's execution and close 
+//     a task record item. It should be invoked immediately when a task 
+//     finishes execution.
+// 
+//   The following functions record corresponding task information during the 
+//   execution of a task and should be wrapped with a 
+//   PROFILE_TASK_START()/PROFILE_TASK_END() pair.
+//     setTaskExitIndex() records the exit path of the execution.
+//     addNewObjInfo() records the information of new objs created by the task.
+//
+//   This variable can only be updated with the functions/MACROs listed above! 
+///////////////////////////////////////////////////////////////////////////////
+TaskProfile_t taskProfileInfo;
 
+#ifdef PROFILE_INTERRUPT
+///////////////////////////////////////////////////////////////////////////////
+// This global variable records the profiling information of the interrupts 
+// happended during the execution of a program. It records when an interrupt 
+// happended and when it returns to normal program execution.
+//
+// Maintaining protocols: 
+//     INIT_TASKPROFILE_DATA() initializes this variable and should be invoked 
+//     before executing any tasks.
+//
+//     PROFILE_INTERRUPT_START() creates a new item to record the information 
+//     of an interrupt. It should be invoked at the very beginning of an
+//     interrupt handler.
+//
+//     PROFILE_INTERRUPT_END() records when an interrupt returns from its 
+//     handler.  It should be invoked right before an interrupt handler returns.
+// 
+//   This variable can only be updated with the functions/MACROs listed above! 
+///////////////////////////////////////////////////////////////////////////////
+InterruptProfile_t interruptProfileInfo;
+#endif
+
+void inittaskprofiledata() {
   stall = false;
   totalexetime = -1;
-  taskInfoIndex = 0;
-  taskInfoOverflow = false;
+  taskProfileInfo.taskInfoIndex = 0;
+  taskProfileInfo.taskInfoOverflow = false;
 #ifdef PROFILE_INTERRUPT
-  interruptInfoIndex = 0;
-  interruptInfoOverflow = false;
-#endif // PROFILE_INTERRUPT
+  interruptProfileInfo.interruptInfoIndex = 0;
+  interruptProfileInfo.interruptInfoOverflow = false;
+#endif 
 }
 
 // output the profiling data
@@ -35,34 +82,33 @@ void outputProfileData() {
 
   printf("Task Name, Start Time, End Time, Duration, Exit Index(, NewObj Name, Num)+\n");
   // output task related info
-  for(i = 0; i < taskInfoIndex; i++) {
-    TaskInfo* tmpTInfo = taskInfoArray[i];
+  for(i = 0; i < taskProfileInfo.taskInfoIndex; i++) {
+    TaskInfo* tmpTInfo = taskProfileInfo.taskInfoArray[i];
     unsigned long long duration = tmpTInfo->endTime - tmpTInfo->startTime;
-    printf("%s, %lld, %lld, %lld, %lld", tmpTInfo->taskName, 
-        tmpTInfo->startTime, tmpTInfo->endTime, duration, tmpTInfo->exitIndex);
+    printf("%s, %lld, %lld, %lld, %lld",tmpTInfo->taskName,tmpTInfo->startTime,tmpTInfo->endTime,duration,tmpTInfo->exitIndex);
     // summarize new obj info
     if(tmpTInfo->newObjs != NULL) {
       struct RuntimeHash * nobjtbl = allocateRuntimeHash(5);
       struct RuntimeIterator * iter = NULL;
       while(0 == isEmpty(tmpTInfo->newObjs)) {
-		char * objtype = (char *)(getItem(tmpTInfo->newObjs));
-		if(RuntimeHashcontainskey(nobjtbl, (int)(objtype))) {
-		  int num = 0;
-		  RuntimeHashget(nobjtbl, (int)objtype, &num);
-		  RuntimeHashremovekey(nobjtbl, (int)objtype);
-		  num++;
-		  RuntimeHashadd(nobjtbl, (int)objtype, num);
-		} else {
-		  RuntimeHashadd(nobjtbl, (int)objtype, 1);
-		}
+        char * objtype = (char *)(getItem(tmpTInfo->newObjs));
+        if(RuntimeHashcontainskey(nobjtbl, (int)(objtype))) {
+          int num = 0;
+          RuntimeHashget(nobjtbl, (int)objtype, &num);
+          RuntimeHashremovekey(nobjtbl, (int)objtype);
+          num++;
+          RuntimeHashadd(nobjtbl, (int)objtype, num);
+        } else {
+          RuntimeHashadd(nobjtbl, (int)objtype, 1);
+        }
       }
 
       // output all new obj info
       iter = RuntimeHashcreateiterator(nobjtbl);
       while(RunhasNext(iter)) {
-		char * objtype = (char *)Runkey(iter);
-		int num = Runnext(iter);
-		printf(", %s, %d", objtype, num);
+        char * objtype = (char *)Runkey(iter);
+        int num = Runnext(iter);
+        printf(", %s, %d", objtype, num);
       }
     }
     printf("\n");
@@ -79,7 +125,7 @@ void outputProfileData() {
     }
   }
 
-  if(taskInfoOverflow) {
+  if(taskProfileInfo.taskInfoOverflow) {
     printf("Caution: task info overflow!\n");
   }
 
@@ -87,17 +133,11 @@ void outputProfileData() {
   averagetasktime /= tasknum;
 
   printf("\nTotal time: %lld\n", totalexetime);
-  printf("Total task execution time: %lld (%d%%)\n", totaltasktime,
-         (int)(((double)totaltasktime/(double)totalexetime)*100));
-  printf("Total objqueue checking time: %lld (%d%%)\n",
-         objqueuecheckingtime,
-         (int)(((double)objqueuecheckingtime/(double)totalexetime)*100));
-  printf("Total pre-processing time: %lld (%d%%)\n", preprocessingtime,
-         (int)(((double)preprocessingtime/(double)totalexetime)*100));
-  printf("Total post-processing time: %lld (%d%%)\n", postprocessingtime,
-         (int)(((double)postprocessingtime/(double)totalexetime)*100));
-  printf("Other time: %lld (%d%%)\n", other,
-         (int)(((double)other/(double)totalexetime)*100));
+  printf("Total task execution time: %lld (%d%%)\n",totaltasktime,(int)(((double)totaltasktime/(double)totalexetime)*100));
+  printf("Total objqueue checking time: %lld (%d%%)\n",objqueuecheckingtime,(int)(((double)objqueuecheckingtime/(double)totalexetime)*100));
+  printf("Total pre-processing time: %lld (%d%%)\n", preprocessingtime,(int)(((double)preprocessingtime/(double)totalexetime)*100));
+  printf("Total post-processing time: %lld (%d%%)\n", postprocessingtime,(int)(((double)postprocessingtime/(double)totalexetime)*100));
+  printf("Other time: %lld (%d%%)\n", other,(int)(((double)other/(double)totalexetime)*100));
 
   printf("\nAverage task execution time: %lld\n", averagetasktime);
 
@@ -107,7 +147,7 @@ void outputProfileData() {
 
   BAMBOO_PRINT(0xdddd);
   // output task related info
-  for(i= 0; i < taskInfoIndex; i++) {
+  for(i= 0; i < taskProfileInfo.taskInfoIndex; i++) {
     TaskInfo* tmpTInfo = taskInfoArray[i];
     char* tmpName = tmpTInfo->taskName;
     int nameLen = strlen(tmpName);
@@ -123,56 +163,57 @@ void outputProfileData() {
       struct RuntimeHash * nobjtbl = allocateRuntimeHash(5);
       struct RuntimeIterator * iter = NULL;
       while(0 == isEmpty(tmpTInfo->newObjs)) {
-		char * objtype = (char *)(getItem(tmpTInfo->newObjs));
-		if(RuntimeHashcontainskey(nobjtbl, (int)(objtype))) {
-		  int num = 0;
-		  RuntimeHashget(nobjtbl, (int)objtype, &num);
-		  RuntimeHashremovekey(nobjtbl, (int)objtype);
-		  num++;
-		  RuntimeHashadd(nobjtbl, (int)objtype, num);
-		} else {
-		  RuntimeHashadd(nobjtbl, (int)objtype, 1);
-		}
+        char * objtype = (char *)(getItem(tmpTInfo->newObjs));
+        if(RuntimeHashcontainskey(nobjtbl, (int)(objtype))) {
+          int num = 0;
+          RuntimeHashget(nobjtbl, (int)objtype, &num);
+          RuntimeHashremovekey(nobjtbl, (int)objtype);
+          num++;
+          RuntimeHashadd(nobjtbl, (int)objtype, num);
+        } else {
+          RuntimeHashadd(nobjtbl, (int)objtype, 1);
+        }
       }
 
       // ouput all new obj info
       iter = RuntimeHashcreateiterator(nobjtbl);
       while(RunhasNext(iter)) {
-		char * objtype = (char *)Runkey(iter);
-		int num = Runnext(iter);
-		int nameLen = strlen(objtype);
-		BAMBOO_PRINT(0xddda);
-		for(j = 0; j < nameLen; j++) {
-		  BAMBOO_PRINT_REG(objtype[j]);
-		}
-		BAMBOO_PRINT(0xdddb);
-		BAMBOO_PRINT_REG(num);
-	  }
+        char * objtype = (char *)Runkey(iter);
+        int num = Runnext(iter);
+        int nameLen = strlen(objtype);
+        BAMBOO_PRINT(0xddda);
+        for(j = 0; j < nameLen; j++) {
+          BAMBOO_PRINT_REG(objtype[j]);
+        }
+        BAMBOO_PRINT(0xdddb);
+        BAMBOO_PRINT_REG(num);
+      }
     }
     BAMBOO_PRINT(0xdddc);
   }
 
-  if(taskInfoOverflow) {
-	BAMBOO_PRINT(0xefee);
+  if(taskProfileInfo.taskInfoOverflow) {
+    BAMBOO_PRINT(0xefee);
   }
 
 #ifdef PROFILE_INTERRUPT
   // output interrupt related info
-  for(i = 0; i < interruptInfoIndex; i++) {
-    InterruptInfo* tmpIInfo = interruptInfoArray[i];
+  for(i = 0; i < interruptProfileInfo.interruptInfoIndex; i++) {
+    InterruptInfo* tmpIInfo = interruptProfileInfo.interruptInfoArray[i];
     BAMBOO_PRINT(0xddde);
     BAMBOO_PRINT_REG(tmpIInfo->startTime);
     BAMBOO_PRINT_REG(tmpIInfo->endTime);
     BAMBOO_PRINT(0xdddf);
   }
 
-  if(interruptInfoOverflow) {
+  if(interruptProfileInfo.interruptInfoOverflow) {
     BAMBOO_PRINT(0xefef);
   }
-#endif // PROFILE_INTERRUPT
+#endif 
 
   BAMBOO_PRINT(0xeeee);
 #endif
 }
 
-#endif // PROFILE
+#endif // PROFILE 
+#endif // TASK
diff --git a/Robust/src/Runtime/bamboo/multicoretaskprofile.h b/Robust/src/Runtime/bamboo/multicoretaskprofile.h
index 20c37e1e..eb463714 100644
--- a/Robust/src/Runtime/bamboo/multicoretaskprofile.h
+++ b/Robust/src/Runtime/bamboo/multicoretaskprofile.h
@@ -13,78 +13,83 @@ typedef struct task_info {
   unsigned long long endTime;
   unsigned long long exitIndex;
   struct Queue * newObjs;
-} TaskInfo;
+} TaskInfo_t;
 
-TaskInfo * taskInfoArray[TASKINFOLENGTH];
-int taskInfoIndex;
-bool taskInfoOverflow;
-volatile int profilestatus[NUMCORESACTIVE]; // records status of each core
-                                            // 1: running tasks
-                                            // 0: stall
+typedef struct task_profile {
+  TaskInfo_t * taskInfoArray[TASKINFOLENGTH];
+  int taskInfoIndex;
+  bool taskInfoOverflow;
+} TaskProfile_t;
+
+extern TaskProfile_t taskProfileInfo;
 #ifdef PROFILE_INTERRUPT
 #define INTERRUPTINFOLENGTH 50
 typedef struct interrupt_info {
   unsigned long long startTime;
   unsigned long long endTime;
-} InterruptInfo;
+} InterruptInfo_t;
+
+typedef struct interrupt_profile {
+  InterruptInfo_t * interruptInfoArray[INTERRUPTINFOLENGTH];
+  int interruptInfoIndex;
+  bool interruptInfoOverflow;
+} InterruptProfile_t;
 
-InterruptInfo * interruptInfoArray[INTERRUPTINFOLENGTH];
-int interruptInfoIndex;
-bool interruptInfoOverflow;
+extern InterruptProfile_t interruptProfileInfo;
 #endif
 
 void outputProfileData();
 void inittaskprofiledata();
 
 INLINE static void setTaskExitIndex(int index) {
-  taskInfoArray[taskInfoIndex]->exitIndex = index;
+  taskProfileInfo.taskInfoArray[taskProfileInfo.taskInfoIndex]->exitIndex = index;
 }
 
 INLINE static void addNewObjInfo(void * nobj) {
-  if(taskInfoArray[taskInfoIndex]->newObjs == NULL) {
-    taskInfoArray[taskInfoIndex]->newObjs = createQueue();
+  if(taskProfileInfo.taskInfoArray[taskProfileInfo.taskInfoIndex]->newObjs==NULL) {
+    taskProfileInfo.taskInfoArray[taskProfileInfo.taskInfoIndex]->newObjs=createQueue();
   }
-  addNewItem(taskInfoArray[taskInfoIndex]->newObjs, nobj);
+  addNewItem(taskProfileInfo.taskInfoArray[taskProfileInfo.taskInfoIndex]->newObjs, nobj);
 }
 
 INLINE static void profileTaskStart(char * taskname) {
-  if(!taskInfoOverflow) {
-    TaskInfo* taskInfo = RUNMALLOC(sizeof(struct task_info));
-    taskInfoArray[taskInfoIndex] = taskInfo;
-    taskInfo->taskName = taskname;
-    taskInfo->startTime = BAMBOO_GET_EXE_TIME();
-    taskInfo->endTime = -1;
-    taskInfo->exitIndex = -1;
-    taskInfo->newObjs = NULL;
+  if(!taskProfileInfo.taskInfoOverflow) {
+    TaskInfo* taskInfo=RUNMALLOC(sizeof(struct task_info));
+    taskProfileInfo.taskInfoArray[taskProfileInfo.taskInfoIndex]=taskInfo;
+    taskInfo->taskName=taskname;
+    taskInfo->startTime=BAMBOO_GET_EXE_TIME();
+    taskInfo->endTime=-1;
+    taskInfo->exitIndex=-1;
+    taskInfo->newObjs=NULL;
   }
 }
 
 INLINE staitc void profileTaskEnd() {
-  if(!taskInfoOverflow) {
-    taskInfoArray[taskInfoIndex]->endTime = BAMBOO_GET_EXE_TIME();
-    taskInfoIndex++;
-    if(taskInfoIndex == TASKINFOLENGTH) {
-      taskInfoOverflow = true;
+  if(!taskProfileInfo.taskInfoOverflow) {
+    taskProfileInfo.taskInfoArray[taskProfileInfo.taskInfoIndex]->endTime=BAMBOO_GET_EXE_TIME();
+    taskProfileInfo.taskInfoIndex++;
+    if(taskProfileInfo.taskInfoIndex == TASKINFOLENGTH) {
+      taskProfileInfo.taskInfoOverflow=true;
     }
   }
 }
 
 #ifdef PROFILE_INTERRUPT
 INLINE static void profileInterruptStart_I(void) {
-  if(!interruptInfoOverflow) {
-    InterruptInfo* intInfo = RUNMALLOC_I(sizeof(struct interrupt_info));
-    interruptInfoArray[interruptInfoIndex] = intInfo;
-    intInfo->startTime = BAMBOO_GET_EXE_TIME();
-    intInfo->endTime = -1;
+  if(!interruptProfileInfo.interruptInfoOverflow) {
+    InterruptInfo* intInfo=RUNMALLOC_I(sizeof(struct interrupt_info));
+    interruptProfileInfo.interruptInfoArray[interruptProfileInfo.interruptInfoIndex]=intInfo;
+    intInfo->startTime=BAMBOO_GET_EXE_TIME();
+    intInfo->endTime=-1;
   }
 }
 
 INLINE static void profileInterruptEnd_I(void) {
-  if(!interruptInfoOverflow) {
-    interruptInfoArray[interruptInfoIndex]->endTime=BAMBOO_GET_EXE_TIME();
-    interruptInfoIndex++;
-    if(interruptInfoIndex == INTERRUPTINFOLENGTH) {
-      interruptInfoOverflow = true;
+  if(!interruptProfileInfo.interruptInfoOverflow) {
+    interruptProfileInfo.interruptInfoArray[interruptProfileInfo.interruptInfoIndex]->endTime=BAMBOO_GET_EXE_TIME();
+    interruptProfileInfo.interruptInfoIndex++;
+    if(interruptProfileInfo.interruptInfoIndex==INTERRUPTINFOLENGTH) {
+      interruptProfileInfo.interruptInfoOverflow=true;
     }
   }
 }
@@ -94,9 +99,6 @@ INLINE static void profileInterruptEnd_I(void) {
 #define PROFILE_TASK_START(s) profileTaskStart(s)
 #define PROFILE_TASK_END() profileTaskEnd()
 #ifdef PROFILE_INTERRUPT
-INLINE void profileInterruptStart_I(void);
-INLINE void profileInterruptEnd_I(void);
-
 #define PROFILE_INTERRUPT_START() profileInterruptStart_I()
 #define PROFILE_INTERRUPT_END() profileInterruptEnd_I()
 #else
-- 
2.34.1