For a page that is not used, set its host core as the home core that caches it
[IRC.git] / Robust / src / Runtime / bamboo / multicorecache.c
index df38728d1ac1708020ce79ba395ba717e12dee29..c9d44e8edb0f9bd578aab735d13ddd82a4180784 100644 (file)
 #ifdef GC_CACHE_ADAPT
 #include "multicorecache.h"
 #include "multicoremsg.h"
+#include "multicoregc.h"
 #include "multicoregcprofile.h"
 
-gc_cache_revise_info_t gc_cache_revise_information;
-
-/* This function initialize the gc_cache_revise_information. It should be 
- * invoked before we start compaction.
- */
-void samplingDataReviseInit(struct moveHelper * orig,struct moveHelper * to) {
-  // initialize the destination page info
-  gc_cache_revise_information.to_page_start_va=to->ptr;
-  unsigned int toindex=(unsigned INTPTR)(to->base-gcbaseva)/BAMBOO_PAGE_SIZE;
-  gc_cache_revise_information.to_page_end_va=gcbaseva+BAMBOO_PAGE_SIZE*(toindex+1);
-  gc_cache_revise_information.to_page_index=toindex;
-  // initilaize the original page info
-  unsigned int origindex=((unsigned INTPTR)(orig->base-gcbaseva))/BAMBOO_PAGE_SIZE;
-  gc_cache_revise_information.orig_page_start_va=orig->ptr;
-  gc_cache_revise_information.orig_page_end_va=gcbaseva+BAMBOO_PAGE_SIZE*(origindex+1);
-  gc_cache_revise_information.orig_page_index=origindex;
-}
+void cacheadapt_finish_compact(void *toptr) {
+  unsigned int dstpage=((unsigned INTPTR)(toptr-gcbaseva))>>BAMBOO_PAGE_SIZE_BITS;
+  unsigned int * newtable=&gccachesamplingtbl_r[dstpage*NUMCORESACTIVE];
 
-/* This function computes the revised profiling data of the first closed destination 
- * page of an object that acrosses multiple pages
- */
-void firstPageConvert(bool origclosefirst, unsigned INTPTR main_factor, unsigned INTPTR delta_factor) {
-  unsigned INTPTR topage=gc_cache_revise_information.to_page_index*NUMCORESACTIVE;
-  unsigned INTPTR oldpage=gc_cache_revise_information.orig_page_index*NUMCORESACTIVE;
-  int * newtable=&gccachesamplingtbl_r[topage];
-  int * oldtable=&gccachesamplingtbl[oldpage];
-  // compute the revised profiling info for the start destination page
-  if(origclosefirst) {
-    // the start original page closes first, now compute the revised profiling
-    // info for the start destination page.
-    // The start destination page = the rest of the start original page + 
-    //                              delta_fator from the next original page
-    int * oldtable_next=&gccachesamplingtbl[oldpage+NUMCORESACTIVE];
-    for(int tt = 0; tt < NUMCORESACTIVE; tt++) {
-      (*newtable)=((*newtable)+(*oldtable)*main_factor+(*oldtable_next)*delta_factor);
-      newtable++;
-      oldtable++;
-      oldtable_next++;
-    }
-    // close the start original page 
-    gc_cache_revise_information.orig_page_start_va+=main_factor+delta_factor;
-    gc_cache_revise_information.orig_page_end_va+=BAMBOO_PAGE_SIZE;
-    gc_cache_revise_information.orig_page_index++;
-  } else {
-    // the start destination page closes first, now compute the revised 
-    // profiling info for it.
-    for(int tt = 0; tt < NUMCORESACTIVE; tt++) {
-      (*newtable)=((*newtable)+(*oldtable)*main_factor);
-      newtable++;
-      oldtable++;
-    }
-    // record the new start of the original page
-    gc_cache_revise_information.orig_page_start_va+=main_factor;
-  }
-  // close the start original page and destination page
-  gc_cache_revise_information.to_page_start_va=gc_cache_revise_information.to_page_end_va;
-  gc_cache_revise_information.to_page_end_va+=BAMBOO_PAGE_SIZE;
-  gc_cache_revise_information.to_page_index++;
+  for(int core = 0; core < NUMCORESACTIVE; core++) {
+    (*newtable)=(*newtable)>>6;
+    newtable++;
+  }  
 }
 
-/* This function computes the revised profiling info for closed destination 
- * pages that are occupied by one object that acrosses multiple pages.
- * the destination page = main_factor from the first unclosed original page 
- *                       + delta_factor from the next unclosed original page
- */
-void restClosedPageConvert(void * current_ptr, unsigned INTPTR main_factor, unsigned INTPTR delta_factor) {
-  while(gc_cache_revise_information.to_page_end_va<=current_ptr) {
-    unsigned INTPTR topage=gc_cache_revise_information.to_page_index*NUMCORESACTIVE;
-    unsigned INTPTR oldpage=gc_cache_revise_information.orig_page_index*NUMCORESACTIVE;
-    int *newtable=&gccachesamplingtbl_r[topage];
-    int *oldtable=&gccachesamplingtbl[oldpage];
-    int *oldtable_next=&gccachesamplingtbl[oldpage+NUMCORESACTIVE];
-
-    for(int tt = 0; tt < NUMCORESACTIVE; tt++) {
-      (*newtable)=((*newtable)+(*oldtable)*main_factor+(*oldtable_next)*delta_factor);
-      newtable++;
-      oldtable++;
-      oldtable_next++;
-    }
-
-    // close the original page and the destination page
-    gc_cache_revise_information.orig_page_start_va+=BAMBOO_PAGE_SIZE;
-    gc_cache_revise_information.orig_page_end_va+=BAMBOO_PAGE_SIZE;
-    gc_cache_revise_information.orig_page_index++;
-    gc_cache_revise_information.to_page_start_va=gc_cache_revise_information.to_page_end_va;
-    gc_cache_revise_information.to_page_end_va+=BAMBOO_PAGE_SIZE;
-    gc_cache_revise_information.to_page_index++;
-  }
-}
+void cacheadapt_finish_src_page(void *srcptr, void *tostart, void *tofinish) {
+  unsigned int srcpage=((unsigned INTPTR)(srcptr-gcbaseva))>>BAMBOO_PAGE_SIZE_BITS;
+  unsigned int dstpage=((unsigned INTPTR)(tostart-gcbaseva))>>BAMBOO_PAGE_SIZE_BITS;
+  unsigned int numbytes=tofinish-tostart;
+  
+  unsigned int * oldtable=&gccachesamplingtbl[srcpage*NUMCORESACTIVE];
+  unsigned int * newtable=&gccachesamplingtbl_r[dstpage*NUMCORESACTIVE];
+  
+  unsigned int page64th=numbytes>>(BAMBOO_PAGE_SIZE_BITS-6);
 
-/* This function computes the revised profiling info for the last
- * destination page of an object that acrosses multiple pages.
- */
-void lastPageConvert(void * current_ptr) {
-  unsigned INTPTR to_factor=current_ptr-gc_cache_revise_information.to_page_start_va;
-  unsigned INTPTR topage=gc_cache_revise_information.to_page_index*NUMCORESACTIVE;
-  unsigned INTPTR oldpage=gc_cache_revise_information.orig_page_index*NUMCORESACTIVE;
-  int *newtable=&gccachesamplingtbl_r[topage];
-  int *oldtable=&gccachesamplingtbl[oldpage];
-
-  for(int tt = 0; tt < NUMCORESACTIVE; tt++) {
-    (*newtable)=((*newtable)+(*oldtable)*to_factor);
+  for(int core = 0; core < NUMCORESACTIVE; core++) {
+    (*newtable)+=page64th*(*oldtable);
     newtable++;
     oldtable++;
-  }
-  // do not need to set gc_cache_revise_information here for the last 
-  // original/destination page as it will be set in completePageConvert()
+  }  
 }
 
-/* This function converts multiple original pages profiling data to multiple 
- * destination pages' profiling data
- */
-void samplingDataConvertMultiple(void * current_ptr) {
-  // first decide which page close first: original or destination?
-  unsigned INTPTR to_factor=(unsigned INTPTR)(gc_cache_revise_information.to_page_end_va-gc_cache_revise_information.to_page_start_va);
-  unsigned INTPTR orig_factor=(unsigned INTPTR)(gc_cache_revise_information.orig_page_end_va-gc_cache_revise_information.orig_page_start_va);
-  bool origclosefirst=to_factor>orig_factor;
-  unsigned INTPTR delta_factor=(origclosefirst)?(to_factor-orig_factor):(orig_factor-to_factor);
-  unsigned INTPTR main_factor=(origclosefirst)?orig_factor:to_factor;
-
-  // compute the revised profiling info for the start destination page
-  firstPageConvert(origclosefirst, main_factor, delta_factor);
-  // update main_factor/delta_factor
-  if(origclosefirst) {
-    // for the following destination pages that are fully used:
-    // the destination page = (page_size-delta_factor) from the 
-    //                        first unclosed original page + delta_factor 
-    //                        from the next unclosed original page
-    // we always use main_factor to represent the factor from the first 
-    // unclosed original page
-    main_factor=BAMBOO_PAGE_SIZE-delta_factor;
-  } else {
-    // for the following destination pages that are fully used:
-    // the destination page = delta_factor from the first unclosed original    
-    //                        page + (page_size-delta_factor) from the next 
-    //                        unclosed original page
-    // we always use main_factor to represent the factor from the first
-    // unclosed original page
-    main_factor=delta_factor;
-    delta_factor=BAMBOO_PAGE_SIZE-delta_factor;
-  }
-
-  // compute the revised profiling info for the following closed destination
-  // pages
-  restClosedPageConvert(current_ptr, main_factor, delta_factor);
+/* Bytes needed equal to zero is a special case...  It means that we should finish the dst page */
 
-  // compute the revised profiling info for the last destination page if needed
-  lastPageConvert(current_ptr);
-}
+void cacheadapt_finish_dst_page(void *origptr, void *tostart, void *toptr, unsigned int bytesneeded) {
+  unsigned int numbytes=toptr-tostart;
 
-/* This function converts originial pages' profiling data to destination pages'
- * profiling data.
- * The parameter current_ptr indicates the current position in the destination 
- * pages.
- * Note that there could be objects that across pages. In such cases, there are 
- * multiple orig/to pages are closed and all these to pages' profiling data 
- * should be properly updated.
- */
-void samplingDataConvert(void * current_ptr) {
-  if(gc_cache_revise_information.to_page_end_va<current_ptr) {
-    // multiple pages are closed
-    samplingDataConvertMultiple(current_ptr);
-  } else {
-    unsigned INTPTR tmp_factor=(unsigned INTPTR)(current_ptr-gc_cache_revise_information.to_page_start_va);
-    if(tmp_factor) {
-      unsigned INTPTR topage=gc_cache_revise_information.to_page_index*NUMCORESACTIVE;
-      unsigned INTPTR oldpage=gc_cache_revise_information.orig_page_index*NUMCORESACTIVE;
-      int * newtable=&gccachesamplingtbl_r[topage];
-      int * oldtable=&gccachesamplingtbl[oldpage];
+  void *tobound=(void *)((((unsigned INTPTR)toptr-1)&~(BAMBOO_PAGE_SIZE-1))+BAMBOO_PAGE_SIZE);
+  void *origbound=(void *)((((unsigned INTPTR)origptr)&~(BAMBOO_PAGE_SIZE-1))+BAMBOO_PAGE_SIZE);
   
-      for(int tt = 0; tt < NUMCORESACTIVE; tt++) {
-        (*newtable)=((*newtable)+(*oldtable)*tmp_factor);
-        newtable++;
-        oldtable++;
-      }
-    }
-  }
-} 
+  unsigned int topage=((unsigned INTPTR)(toptr-1-gcbaseva))>>BAMBOO_PAGE_SIZE_BITS; 
+  unsigned int origpage=((unsigned INTPTR)(origptr-gcbaseva))>>BAMBOO_PAGE_SIZE_BITS;
+
+  unsigned int * totable=&gccachesamplingtbl_r[topage*NUMCORESACTIVE];
+  unsigned int * origtable=&gccachesamplingtbl[origpage*NUMCORESACTIVE];
 
-/* This function computes the impact of an original page on a destination page
- * in terms of profiling data. It can only be invoked when there is an original 
- * page that is closed or a destination page that is closed. When finished 
- * computing the revised profiling info of the current destination page, it 
- * sets up the gc_cache_revise_information to the latest position in the 
- * original page and the destination page.
- */
-void completePageConvert(void * origptr, void * toptr, void * current_ptr) {
-  bool closeToPage=(unsigned int)(toptr)>=(unsigned int)(gc_cache_revise_information.to_page_end_va);
-  bool closeOrigPage=(unsigned int)(origptr)>=(unsigned int)(gc_cache_revise_information.orig_page_end_va);
-  if(closeToPage||closeOrigPage) {
-    // end of one or more orig/to page
-    // compute the impact of the original page(s) for the desitination page(s)
-    samplingDataConvert(current_ptr);
-    // prepare for an new orig page
-    unsigned INTPTR tmp_index=((unsigned INTPTR)(origptr-gcbaseva))/BAMBOO_PAGE_SIZE;
-    gc_cache_revise_information.orig_page_start_va=origptr;
-    gc_cache_revise_information.orig_page_end_va=gcbaseva+BAMBOO_PAGE_SIZE*(tmp_index+1);
-    gc_cache_revise_information.orig_page_index=tmp_index;
-    gc_cache_revise_information.to_page_start_va=toptr;
-    if(closeToPage) {
-      unsigned INTPTR to_index=((unsigned INTPTR)(toptr-gcbaseva))/BAMBOO_PAGE_SIZE;
-      gc_cache_revise_information.to_page_end_va=gcbaseva+BAMBOO_PAGE_SIZE*(to_index+1);
-      gc_cache_revise_information.to_page_index=to_index;
+  //handler
+  unsigned int remaintobytes=(bytesneeded==0)?0:(tobound-toptr);
+  unsigned int remainorigbytes=origbound-origptr;
+
+  do {
+    //round source bytes down....don't want to close out page if not necessary
+    remainorigbytes=(remainorigbytes>bytesneeded)?bytesneeded:remainorigbytes;
+
+    if (remaintobytes<=remainorigbytes) {
+      //Need to close out to page
+
+      numbytes+=remaintobytes;
+      unsigned int page64th=numbytes>>(BAMBOO_PAGE_SIZE_BITS-6);
+
+      for(int core = 0; core < NUMCORESACTIVE; core++) {
+       (*totable)=(*totable+page64th*(*origtable))>>6;
+       totable++;
+       origtable++;
+      }
+      toptr+=remaintobytes;
+      origptr+=remaintobytes;
+      bytesneeded-=remaintobytes;
+      topage++;//to page is definitely done
+      tobound+=BAMBOO_PAGE_SIZE;
+      origpage=((unsigned INTPTR)(origptr-gcbaseva))>>BAMBOO_PAGE_SIZE_BITS;//handle exact match case
+      origbound=(void *) ((((unsigned INTPTR)origptr)&~(BAMBOO_PAGE_SIZE-1))+BAMBOO_PAGE_SIZE);
+    } else {
+      //Finishing off orig page
+
+      numbytes+=remainorigbytes;
+      unsigned int page64th=numbytes>>(BAMBOO_PAGE_SIZE_BITS-6);
+      
+      for(int core = 0; core < NUMCORESACTIVE; core++) {
+       (*totable)+=page64th*(*origtable);
+       totable++;
+       origtable++;
+      }
+      toptr+=remainorigbytes;
+      origptr+=remainorigbytes;
+      bytesneeded-=remainorigbytes;
+      origpage++;//just orig page is done
+      origbound+=BAMBOO_PAGE_SIZE;
     }
-  }
+    totable=&gccachesamplingtbl_r[topage*NUMCORESACTIVE];
+    origtable=&gccachesamplingtbl[origpage*NUMCORESACTIVE];
+    
+    remaintobytes=tobound-toptr;
+    remainorigbytes=origbound-origptr;
+    
+    numbytes=0;
+  } while(bytesneeded!=0);
 }
 
 // prepare for cache adaption:
@@ -221,8 +109,11 @@ void cacheAdapt_gc(bool isgccachestage) {
   // clean the dtlb entries
   BAMBOO_CLEAN_DTLB();
 
-  // change the cache strategy
-  gccachestage = isgccachestage;
+  if(isgccachestage) {
+    bamboo_install_dtlb_handler_for_gc();
+  } else {
+    bamboo_install_dtlb_handler_for_mutator();
+  }
 } 
 
 // the master core decides how to adapt cache strategy for the mutator 
@@ -231,7 +122,7 @@ void cacheAdapt_gc(bool isgccachestage) {
 // find the core that accesses the page #page_index most
 #define CACHEADAPT_FIND_HOTTEST_CORE(page_index,hottestcore,hotfreq) \
   { \
-    int *local_tbl=&gccachesamplingtbl_r[page_index*NUMCORESACTIVE]; \
+    unsigned int *local_tbl=&gccachesamplingtbl_r[page_index*NUMCORESACTIVE];  \
     for(int i = 0; i < NUMCORESACTIVE; i++) { \
       int freq = *local_tbl; \
       local_tbl++; \
@@ -245,7 +136,7 @@ void cacheAdapt_gc(bool isgccachestage) {
 // access time of the page at the same time
 #define CACHEADAPT_FIND_HOTTEST_CORE_W_TOTALFREQ(page_index,hottestcore,hotfreq,totalfreq) \
   { \
-    int *local_tbl=&gccachesamplingtbl_r[page_index*NUMCORESACTIVE]; \
+    unsigned int *local_tbl=&gccachesamplingtbl_r[page_index*NUMCORESACTIVE];  \
     for(int i = 0; i < NUMCORESACTIVE; i++) { \
       int freq = *local_tbl; \
       local_tbl++; \
@@ -272,12 +163,12 @@ void cacheAdapt_gc(bool isgccachestage) {
 
 // make all pages hfh
 void cacheAdapt_policy_h4h(int coren){
-  unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)/(BAMBOO_PAGE_SIZE);
+  unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)>>(BAMBOO_PAGE_SIZE_BITS);
   unsigned int page_gap=page_num/NUMCORESACTIVE;
   unsigned int page_index=page_gap*coren;
   unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
   VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
-  int * tmp_p = gccachepolicytbl;
+  unsigned int * tmp_p = gccachepolicytbl;
   for(; page_index < page_index_end; page_index++) {
     bamboo_cache_policy_t policy = {0};
     policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
@@ -288,12 +179,12 @@ void cacheAdapt_policy_h4h(int coren){
 
 // make all pages local as non-cache-adaptable gc local mode
 void cacheAdapt_policy_local(int coren){
-  unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)/(BAMBOO_PAGE_SIZE);
+  unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)>>(BAMBOO_PAGE_SIZE_BITS);
   unsigned int page_gap=page_num/NUMCORESACTIVE;
   unsigned int page_index=page_gap*coren;
   unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
   VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
-  int * tmp_p = gccachepolicytbl;
+  unsigned int * tmp_p = gccachepolicytbl;
   for(; page_index < page_index_end; page_index++) {
     bamboo_cache_policy_t policy = {0};
     unsigned int block = 0;
@@ -306,12 +197,12 @@ void cacheAdapt_policy_local(int coren){
 } 
 
 void cacheAdapt_policy_hottest(int coren){
-  unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)/(BAMBOO_PAGE_SIZE);
+  unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)>>(BAMBOO_PAGE_SIZE_BITS);
   unsigned int page_gap=page_num/NUMCORESACTIVE;
   unsigned int page_index=page_gap*coren;
   unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
   VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
-  int * tmp_p = gccachepolicytbl;
+  unsigned int * tmp_p = gccachepolicytbl;
   for(; page_index < page_index_end; page_index++) {
     bamboo_cache_policy_t policy = {0};
     unsigned int hottestcore = 0;
@@ -326,6 +217,12 @@ void cacheAdapt_policy_hottest(int coren){
     if(hotfreq != 0) {
       // locally cache the page in the hottest core
       CACHEADAPT_POLICY_SET_HOST_CORE(policy, hottestcore);
+    } else {
+      // reset it to be homed by its host core
+      unsigned int block = 0;
+      BLOCKINDEX(block, (void *) page_sva);
+      unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
+      CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren);
     }
     CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
     page_sva += BAMBOO_PAGE_SIZE;
@@ -337,12 +234,12 @@ void cacheAdapt_policy_hottest(int coren){
 // it more than (GC_CACHE_ADAPT_DOMINATE_THRESHOLD)% of the total.  Otherwise,
 // h4h the page.
 void cacheAdapt_policy_dominate(int coren){
-  unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)/(BAMBOO_PAGE_SIZE);
+  unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)>>(BAMBOO_PAGE_SIZE_BITS);
   unsigned int page_gap=page_num/NUMCORESACTIVE;
   unsigned int page_index=page_gap*coren;
   unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
   VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
-  int * tmp_p = gccachepolicytbl;
+  unsigned int * tmp_p = gccachepolicytbl;
   for(; page_index < page_index_end; page_index++) {
     bamboo_cache_policy_t policy = {0};
     unsigned int hottestcore = 0;
@@ -357,15 +254,21 @@ void cacheAdapt_policy_dominate(int coren){
       totalfreq=totalfreq>>GC_CACHE_ADAPT_DOMINATE_THRESHOLD;
       if((unsigned int)hotfreq < (unsigned int)totalfreq) {
         // use hfh
-        //policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
-        unsigned int block = 0;
+        policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
+        /*unsigned int block = 0;
         BLOCKINDEX(block, (void *) page_sva);
         unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
-        CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren);
+        CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren);*/
       } else {
         // locally cache the page in the hottest core
         CACHEADAPT_POLICY_SET_HOST_CORE(policy, hottestcore);
       }     
+    } else {
+      // reset it to be homed by its host core
+      unsigned int block = 0;
+      BLOCKINDEX(block, (void *) page_sva);
+      unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
+      CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren);
     }
     CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
     page_sva += BAMBOO_PAGE_SIZE;
@@ -391,7 +294,7 @@ unsigned int cacheAdapt_decision(int coren) {
 void cacheAdapt_mutator() {
   BAMBOO_CACHE_MF();
   // check the changes and adapt them
-  int * tmp_p = gccachepolicytbl;
+  unsigned int * tmp_p = gccachepolicytbl;
   unsigned int page_sva = gcbaseva;
   for(; page_sva<gctopva; page_sva+=BAMBOO_PAGE_SIZE) {
     // read out the policy
@@ -421,7 +324,7 @@ void cacheAdapt_phase_client() {
   //send init finish msg to core coordinator
   send_msg_2(STARTUPCORE, GCFINISHPREF, BAMBOO_NUM_OF_CORE);
   GC_PRINTF("Finish prefinish phase\n");
-  CACHEADAPT_SAMPING_RESET();
+  CACHEADAPT_SAMPLING_RESET();
   if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
     // zero out the gccachesamplingtbl
     BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);  
@@ -456,8 +359,8 @@ void cacheAdapt_phase_master() {
   cacheAdapt_mutator();
   cacheAdapt_gc(false);
   GC_CHECK_ALL_CORE_STATUS();
-
-  CACHEADAPT_SAMPING_RESET();
+  
+  CACHEADAPT_SAMPLING_RESET();
   if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
     // zero out the gccachesamplingtbl
     BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
@@ -468,34 +371,46 @@ void cacheAdapt_phase_master() {
 
 // output original cache sampling data for each page
 void gc_output_cache_sampling() {
-  //extern volatile bool gc_profile_flag;
-  //if(!gc_profile_flag) return;
+  extern volatile bool gc_profile_flag;
+  if(!gc_profile_flag) return;
   unsigned int page_index = 0;
   VA page_sva = 0;
-  unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
+  unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) >> (BAMBOO_PAGE_SIZE_BITS);
   for(page_index = 0; page_index < page_num; page_index++) {
     page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
     unsigned int block = 0;
     BLOCKINDEX(block, (void *) page_sva);
     unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
-    printf("%x,  %d,  %d,  ",(int)page_sva,page_index,coren);
-    int * local_tbl = &gccachesamplingtbl[page_index*NUMCORESACTIVE];
+    //printf("%x,  %d,  %d,  ",(int)page_sva,page_index,coren);
+    unsigned int * local_tbl = &gccachesamplingtbl[page_index*NUMCORESACTIVE];
+    int accesscore = 0;
     for(int i = 0; i < NUMCORESACTIVE; i++) {
       int freq = *local_tbl;
       local_tbl++;
-      //if(freq != 0) {
-        printf("%d,  ", freq);
-      //}
+      if(freq != 0) {
+        accesscore++;
+        //printf("%d,  ", freq);
+      }
     }
-    printf("\n");
+    if(accesscore!=0) {
+      printf("%x,  %d,  %d,  ",(int)page_sva,page_index,coren);
+      unsigned int * local_tbl = &gccachesamplingtbl[page_index*NUMCORESACTIVE];
+      for(int i = 0; i < NUMCORESACTIVE; i++) {
+        unsigned int freq = *local_tbl;
+        local_tbl++;
+        printf("%u,  ", freq);
+      }
+      printf("\n");
+    }
+    //printf("\n");
   }
   printf("=================\n");
 } 
 
 // output revised cache sampling data for each page after compaction
 void gc_output_cache_sampling_r() {
-  //extern volatile bool gc_profile_flag;
-  //if(!gc_profile_flag) return;
+  extern volatile bool gc_profile_flag;
+  if(!gc_profile_flag) return;
   // TODO summary data
   unsigned int sumdata[NUMCORESACTIVE][NUMCORESACTIVE];
   for(int i = 0; i < NUMCORESACTIVE; i++) {
@@ -506,39 +421,42 @@ void gc_output_cache_sampling_r() {
   tprintf("cache sampling_r \n");
   unsigned int page_index = 0;
   VA page_sva = 0;
-  unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
+  unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) >> (BAMBOO_PAGE_SIZE_BITS);
   for(page_index = 0; page_index < page_num; page_index++) {
     page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
     unsigned int block = 0;
     BLOCKINDEX(block, (void *)page_sva);
     unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
-    printf(" %x,  %d,  %d,  ",(int)page_sva,page_index,coren);
+    //printf("%x,  %d,  %d,  ",(int)page_sva,page_index,coren);
     int accesscore = 0; // TODO
-    int * local_tbl = &gccachesamplingtbl_r[page_index*NUMCORESACTIVE];
+    unsigned int * local_tbl = &gccachesamplingtbl_r[page_index*NUMCORESACTIVE];
     for(int i = 0; i < NUMCORESACTIVE; i++) {
-      int freq = *local_tbl; ///BAMBOO_PAGE_SIZE;
-      printf("%d,  ", freq);
+      unsigned int freq = *local_tbl; 
+      //printf("%d,  ", freq);
       if(freq != 0) {
         accesscore++;// TODO
       }
       local_tbl++;
     }
     if(accesscore!=0) {
-      int * local_tbl = &gccachesamplingtbl_r[page_index];
+      printf("%x,  %d,  %d,  ",(int)page_sva,page_index,coren);
+      unsigned int * local_tbl = &gccachesamplingtbl_r[page_index*NUMCORESACTIVE];
       for(int i = 0; i < NUMCORESACTIVE; i++) {
-        int freq = *local_tbl; ///BAMBOO_PAGE_SIZE;
+        unsigned int freq = *local_tbl;
+        printf("%u,  ", freq);
         sumdata[accesscore-1][i]+=freq;
         local_tbl++;
       }
-    }
-  
-    printf("\n");
+      printf("\n");
+    }  
+    //printf("\n");
   }
+  printf("+++++\n");
   // TODO printout the summary data
   for(int i = 0; i < NUMCORESACTIVE; i++) {
     printf("%d  ", i);
     for(int j = 0; j < NUMCORESACTIVE; j++) {
-      printf(" %d  ", sumdata[j][i]);
+      printf(" %u  ", sumdata[j][i]);
     }
     printf("\n");
   }