Robust/src/Runtime/bamboo/multicorecache.c

   1 #ifdef GC_CACHE_ADAPT
   2 #include "multicorecache.h"
   3 #include "multicoremsg.h"
   4 #include "multicoregcprofile.h"
   5
   6 gc_cache_revise_info_t gc_cache_revise_information;
   7
   8 // prepare for cache adaption:
   9 //   -- flush the shared heap
  10 //   -- clean dtlb entries
  11 //   -- change cache strategy
  12 void cacheAdapt_gc(bool isgccachestage) {
  13   // flush the shared heap
  14   BAMBOO_CACHE_FLUSH_L2();
  15
  16   // clean the dtlb entries
  17   BAMBOO_CLEAN_DTLB();
  18
  19   // change the cache strategy
  20   gccachestage = isgccachestage;
  21 }
  22
  23 // the master core decides how to adapt cache strategy for the mutator
  24 // according to collected statistic data
  25
  26 // find the core that accesses the page #page_index most
  27 #define CACHEADAPT_FIND_HOTTEST_CORE(page_index,hottestcore,hotfreq) \
  28   { \
  29     int *local_tbl=&gccachesamplingtbl_r[page_index]; \
  30     for(int i = 0; i < NUMCORESACTIVE; i++) { \
  31       int freq = *local_tbl; \
  32       local_tbl=(int *)(((void *)local_tbl)+size_cachesamplingtbl_local_r); \
  33       if(hotfreq < freq) { \
  34         hotfreq = freq; \
  35         hottestcore = i; \
  36       } \
  37     } \
  38   }
  39 // find the core that accesses the page #page_index most and comput the total
  40 // access time of the page at the same time
  41 #define CACHEADAPT_FIND_HOTTEST_CORE_W_TOTALFREQ(page_index,hottestcore,hotfreq,totalfreq) \
  42   { \
  43     int *local_tbl=&gccachesamplingtbl_r[page_index]; \
  44     for(int i = 0; i < NUMCORESACTIVE; i++) { \
  45       int freq = *local_tbl; \
  46       local_tbl=(int *)(((void *)local_tbl)+size_cachesamplingtbl_local_r); \
  47       totalfreq += freq; \
  48       if(hotfreq < freq) { \
  49         hotfreq = freq; \
  50         hottestcore = i; \
  51       } \
  52     } \
  53   }
  54 // Set the policy as hosted by coren
  55 // NOTE: (x,y) should be changed to (x+1, y+1)!!!
  56 #define CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren) \
  57   { \
  58     (policy).cache_mode = BAMBOO_CACHE_MODE_COORDS; \
  59     (policy).lotar_x = bamboo_cpu2coords[2*(coren)]+1; \
  60     (policy).lotar_y = bamboo_cpu2coords[2*(coren)+1]+1; \
  61   }
  62 // store the new policy information at tmp_p in gccachepolicytbl
  63 #define CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy) \
  64   { \
  65     ((int*)(tmp_p))[page_index] = (policy).word; \
  66   }
  67
  68 // make all pages hfh
  69 void cacheAdapt_policy_h4h(int coren){
  70   unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)/(BAMBOO_PAGE_SIZE);
  71   unsigned int page_gap=page_num/NUMCORESACTIVE;
  72   unsigned int page_index=page_gap*coren;
  73   unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
  74   VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
  75   int * tmp_p = gccachepolicytbl;
  76   for(; page_index < page_index_end; page_index++) {
  77     bamboo_cache_policy_t policy = {0};
  78     policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
  79     CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
  80     page_sva += BAMBOO_PAGE_SIZE;
  81   }
  82 }
  83
  84 // make all pages local as non-cache-adaptable gc local mode
  85 void cacheAdapt_policy_local(int coren){
  86   unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)/(BAMBOO_PAGE_SIZE);
  87   unsigned int page_gap=page_num/NUMCORESACTIVE;
  88   unsigned int page_index=page_gap*coren;
  89   unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
  90   VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
  91   int * tmp_p = gccachepolicytbl;
  92   for(; page_index < page_index_end; page_index++) {
  93     bamboo_cache_policy_t policy = {0};
  94     unsigned int block = 0;
  95     BLOCKINDEX(block, (void *) page_sva);
  96     unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
  97     CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren);
  98     CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
  99     page_sva += BAMBOO_PAGE_SIZE;
 100   }
 101 }
 102
 103 void cacheAdapt_policy_hottest(int coren){
 104   unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)/(BAMBOO_PAGE_SIZE);
 105   unsigned int page_gap=page_num/NUMCORESACTIVE;
 106   unsigned int page_index=page_gap*coren;
 107   unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
 108   VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
 109   int * tmp_p = gccachepolicytbl;
 110   for(; page_index < page_index_end; page_index++) {
 111     bamboo_cache_policy_t policy = {0};
 112     unsigned int hottestcore = 0;
 113     unsigned int hotfreq = 0;
 114     CACHEADAPT_FIND_HOTTEST_CORE(page_index,hottestcore,hotfreq);
 115     // TODO
 116     // Decide the cache strategy for this page
 117     // If decide to adapt a new cache strategy, write into the shared block of
 118     // the gcsharedsamplingtbl. The mem recording information that has been
 119     // written is enough to hold the information.
 120     // Format: page start va + cache strategy(hfh/(host core+[x,y]))
 121     if(hotfreq != 0) {
 122       // locally cache the page in the hottest core
 123       CACHEADAPT_POLICY_SET_HOST_CORE(policy, hottestcore);
 124     }
 125     CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
 126     page_sva += BAMBOO_PAGE_SIZE;
 127   }
 128 }
 129
 130 #define GC_CACHE_ADAPT_DOMINATE_THRESHOLD  1
 131 // cache the page on the core that accesses it the most if that core accesses
 132 // it more than (GC_CACHE_ADAPT_DOMINATE_THRESHOLD)% of the total.  Otherwise,
 133 // h4h the page.
 134 void cacheAdapt_policy_dominate(int coren){
 135   unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)/(BAMBOO_PAGE_SIZE);
 136   unsigned int page_gap=page_num/NUMCORESACTIVE;
 137   unsigned int page_index=page_gap*coren;
 138   unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
 139   VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
 140   int * tmp_p = gccachepolicytbl;
 141   for(; page_index < page_index_end; page_index++) {
 142     bamboo_cache_policy_t policy = {0};
 143     unsigned int hottestcore = 0;
 144     unsigned int totalfreq = 0;
 145     unsigned int hotfreq = 0;
 146     CACHEADAPT_FIND_HOTTEST_CORE_W_TOTALFREQ(page_index,hottestcore,hotfreq,totalfreq);
 147     // Decide the cache strategy for this page
 148     // If decide to adapt a new cache strategy, write into the shared block of
 149     // the gcpolicytbl
 150     // Format: page start va + cache policy
 151     if(hotfreq != 0) {
 152       totalfreq=totalfreq>>GC_CACHE_ADAPT_DOMINATE_THRESHOLD;
 153       if((unsigned int)hotfreq < (unsigned int)totalfreq) {
 154         // use hfh
 155         //policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
 156         unsigned int block = 0;
 157         BLOCKINDEX(page_sva, &block);
 158         unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
 159         CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren);
 160       } else {
 161         // locally cache the page in the hottest core
 162         CACHEADAPT_POLICY_SET_HOST_CORE(policy, hottestcore);
 163       }
 164     }
 165     CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
 166     page_sva += BAMBOO_PAGE_SIZE;
 167   }
 168 }
 169
 170 #if 0
 171 #define GC_CACHE_ADAPT_OVERLOAD_THRESHOLD 10
 172 // record the worklocad of the hottestcore into core2heavypages
 173 #define CACHEADAPT_RECORD_PAGE_WORKLOAD(hottestcore,totalfreq,hotfreq,remoteaccess,tmp_p) \
 174   { \
 175     workload[hottestcore] += (totalfreq); \
 176     total_workload += (totalfreq); \
 177     unsigned long long remoteaccess = (totalfreq) - (hotfreq); \
 178     unsigned int index = (unsigned int)core2heavypages[hottestcore][0]; \
 179     core2heavypages[hottestcore][3*index+3] = (remoteaccess); \
 180     core2heavypages[hottestcore][3*index+2] = (totalfreq); \
 181     core2heavypages[hottestcore][3*index+1] = (unsigned long long)((tmp_p)-1); \
 182     core2heavypages[hottestcore][0]++; \
 183   }
 184
 185 void gc_quicksort(unsigned long long *array,unsigned int left,unsigned int right,unsigned int offset) {
 186   unsigned int pivot = 0;;
 187   unsigned int leftIdx = left;
 188   unsigned int rightIdx = right;
 189   if((right-left+1) >= 1) {
 190     pivot = (left+right)/2;
 191     while((leftIdx <= pivot) && (rightIdx >= pivot)) {
 192       unsigned long long pivotValue = array[pivot*3-offset];
 193       while((array[leftIdx*3-offset] > pivotValue) && (leftIdx <= pivot)) {
 194         leftIdx++;
 195       }
 196       while((array[rightIdx*3-offset] < pivotValue) && (rightIdx >= pivot)) {
 197         rightIdx--;
 198       }
 199       // swap [leftIdx] & [rightIdx]
 200       for(int k = 0; k < 3; k++) {
 201         unsigned long long tmp = array[3*rightIdx-k];
 202         array[3*rightIdx-k] = array[3*leftIdx-k];
 203         array[3*leftIdx-k] = tmp;
 204       }
 205       leftIdx++;
 206       rightIdx--;
 207       if((leftIdx-1) == pivot) {
 208         pivot = rightIdx = rightIdx + 1;
 209       } else if((leftIdx+1) == pivot) {
 210         pivot = leftIdx = leftIdx-1;
 211       }
 212     }
 213     gc_quicksort(array, left, pivot-1, offset);
 214     gc_quicksort(array, pivot+1, right, offset);
 215   }
 216   return;
 217 }
 218
 219 INLINE int cacheAdapt_h4h_remote_accesses(unsigned long long workload_threshold,unsigned long long ** core2heavypages, unsigned long long * workload,int i) {
 220   int j = 1;
 221   unsigned int index = (unsigned int)core2heavypages[i][0];
 222   if(workload[i] > workload_threshold) {
 223     // sort according to the remoteaccess
 224     gc_quicksort(&core2heavypages[i][0], 1, index, 0);
 225     while((workload[i] > workload_threshold) && (j<index*3)) {
 226       // hfh those pages with more remote accesses
 227       bamboo_cache_policy_t policy = {0};
 228       policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
 229       *((unsigned int*)core2heavypages[i][j]) = policy.word;
 230       workload[i] -= core2heavypages[i][j+1];
 231       j += 3;
 232     }
 233   }
 234   return j;
 235 }
 236
 237 // Every page cached on the core that accesses it the most.
 238 // Check to see if any core's pages total more accesses than threshold
 239 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD.  If so, find the pages with the
 240 // most remote accesses and hash for home them until we get below
 241 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD
 242 int cacheAdapt_policy_overload(int coren){
 243   unsigned int page_index = 0;
 244   VA page_sva = gcbaseva;
 245   unsigned int page_num = BAMBOO_SHARED_MEM_SIZE/BAMBOO_PAGE_SIZE;
 246   unsigned int numchanged = 0;
 247   int * tmp_p = gccachepolicytbl+1;
 248   unsigned long long workload[NUMCORESACTIVE];
 249   memset(workload, 0, NUMCORESACTIVE*sizeof(unsigned long long));
 250   unsigned long long total_workload = 0;
 251   unsigned long long core2heavypages[NUMCORESACTIVE][page_num*3+1];
 252   memset(core2heavypages,0,sizeof(unsigned long long)*(page_num*3+1)*NUMCORESACTIVE);
 253   for(page_index = 0; page_sva < gctopva; page_index++) {
 254     bamboo_cache_policy_t policy = {0};
 255     unsigned int hottestcore = 0;
 256     unsigned long long totalfreq = 0;
 257     unsigned int hotfreq = 0;
 258     CACHEADAPT_FIND_HOTTEST_CORE_W_TOTALFREQ(page_index,hottestcore,hotfreq,totalfreq);
 259     // Decide the cache strategy for this page
 260     // If decide to adapt a new cache strategy, write into the shared block of
 261     // the gcsharedsamplingtbl. The mem recording information that has been
 262     // written is enough to hold the information.
 263     // Format: page start va + cache strategy(hfh/(host core+[x,y]))
 264     if(hotfreq != 0) {
 265       totalfreq/=BAMBOO_PAGE_SIZE;
 266       hotfreq/=BAMBOO_PAGE_SIZE;
 267       // locally cache the page in the hottest core
 268       CACHEADAPT_POLICY_SET_HOST_CORE(policy, hottestcore);
 269       CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy,numchanged);
 270       CACHEADAPT_RECORD_PAGE_WORKLOAD(hottestcore,totalfreq,hotfreq,remoteaccess,tmp_p);
 271     }
 272     page_sva += BAMBOO_PAGE_SIZE;
 273   }
 274
 275   unsigned long long workload_threshold=total_workload/GC_CACHE_ADAPT_OVERLOAD_THRESHOLD;
 276   // Check the workload of each core
 277   for(int i = 0; i < NUMCORESACTIVE; i++) {
 278     cacheAdapt_h4h_remote_accesses(workload_threshold,core2heavypages,workload,i);
 279   }
 280
 281   return numchanged;
 282 }
 283
 284 #define GC_CACHE_ADAPT_ACCESS_THRESHOLD 70
 285 #define GC_CACHE_ADAPT_CROWD_THRESHOLD  20
 286 // Every page cached on the core that accesses it the most.
 287 // Check to see if any core's pages total more accesses than threshold
 288 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD.  If so, find the pages with the
 289 // most remote accesses and hash for home them until we get below
 290 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD.
 291 // Sort pages based on activity....
 292 // If more then GC_CACHE_ADAPT_ACCESS_THRESHOLD% of the accesses for a
 293 // core's pages are from more than GC_CACHE_ADAPT_CROWD_THRESHOLD pages,
 294 // then start hfh these pages(selecting the ones with the most remote
 295 // accesses first or fewest local accesses) until we get below
 296 // GC_CACHE_ADAPT_CROWD_THRESHOLD pages.
 297 int cacheAdapt_policy_crowd(int coren){
 298   unsigned int page_index = 0;
 299   VA page_sva = gcbaseva;
 300   unsigned int page_num = BAMBOO_SHARED_MEM_SIZE/BAMBOO_PAGE_SIZE;
 301   unsigned int numchanged = 0;
 302   int * tmp_p = gccachepolicytbl+1;
 303   unsigned long long workload[NUMCORESACTIVE];
 304   memset(workload, 0, NUMCORESACTIVE*sizeof(unsigned long long));
 305   unsigned long long total_workload = 0;
 306   unsigned long long core2heavypages[NUMCORESACTIVE][page_num*3+1];
 307   memset(core2heavypages,0,sizeof(unsigned long long)*(page_num*3+1)*NUMCORESACTIVE);
 308   for(page_index = 0; page_sva < gctopva; page_index++) {
 309     bamboo_cache_policy_t policy = {0};
 310     unsigned int hottestcore = 0;
 311     unsigned long long totalfreq = 0;
 312     unsigned int hotfreq = 0;
 313     CACHEADAPT_FIND_HOTTEST_CORE_W_TOTALFREQ(page_index,hottestcore,hotfreq,totalfreq);
 314     // Decide the cache strategy for this page
 315     // If decide to adapt a new cache strategy, write into the shared block of
 316     // the gcsharedsamplingtbl. The mem recording information that has been
 317     // written is enough to hold the information.
 318     // Format: page start va + cache strategy(hfh/(host core+[x,y]))
 319     if(hotfreq != 0) {
 320       totalfreq/=BAMBOO_PAGE_SIZE;
 321       hotfreq/=BAMBOO_PAGE_SIZE;
 322       // locally cache the page in the hottest core
 323       CACHEADAPT_POLICY_SET_HOST_CORE(policy, hottestcore);
 324       CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy,numchanged);
 325       CACHEADAPT_RECORD_PAGE_WORKLOAD(hottestcore,totalfreq,hotfreq,remoteaccess,tmp_p);
 326     }
 327     page_sva += BAMBOO_PAGE_SIZE;
 328   }
 329
 330   unsigned long long workload_threshold=total_workload/GC_CACHE_ADAPT_OVERLOAD_THRESHOLD;
 331   // Check the workload of each core
 332   for(int i = 0; i < NUMCORESACTIVE; i++) {
 333     unsigned int index=(unsigned int)core2heavypages[i][0];
 334     int j=cacheAdapt_h4h_remote_accesses(workload_threshold,core2heavypages,workload,i);
 335     // Check if the accesses are crowded on few pages
 336     // sort according to the total access
 337 inner_crowd:
 338     gc_quicksort(&core2heavypages[i][0], j/3+1, index, 1);
 339     unsigned long long threshold=GC_CACHE_ADAPT_ACCESS_THRESHOLD*workload[i]/100;
 340     int num_crowded = 0;
 341     unsigned long long t_workload = 0;
 342     do {
 343       t_workload += core2heavypages[i][j+num_crowded*3+1];
 344       num_crowded++;
 345     } while(t_workload < threshold);
 346     // num_crowded <= GC_CACHE_ADAPT_CROWD_THRESHOLD and if there are enough
 347     // items, it is always == GC_CACHE_ADAPT_CROWD_THRESHOLD
 348     if(num_crowded > GC_CACHE_ADAPT_CROWD_THRESHOLD) {
 349       // need to hfh these pages
 350       // sort the pages according to remote access
 351       gc_quicksort(&core2heavypages[i][0], j/3+1, j/3+num_crowded, 0);
 352       // h4h those pages with more remote accesses
 353       bamboo_cache_policy_t policy = {0};
 354       policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
 355       *((unsigned int*)core2heavypages[i][j]) = policy.word;
 356       workload[i] -= core2heavypages[i][j+1];
 357       t_workload -= core2heavypages[i][j+1];
 358       j += 3;
 359       threshold = GC_CACHE_ADAPT_ACCESS_THRESHOLD*workload[i]/100;
 360       goto inner_crowd;
 361     }
 362   }
 363
 364   return numchanged;
 365 }
 366 #endif
 367
 368 unsigned int cacheAdapt_decision(int coren) {
 369   BAMBOO_CACHE_MF();
 370   // check the statistic data
 371   // for each page, decide the new cache strategy
 372 #ifdef GC_CACHE_ADAPT_POLICY1
 373   cacheAdapt_policy_h4h(coren);
 374 #elif defined GC_CACHE_ADAPT_POLICY2
 375   cacheAdapt_policy_local(coren);
 376 #elif defined GC_CACHE_ADAPT_POLICY3
 377   cacheAdapt_policy_hottest(coren);
 378 #elif defined GC_CACHE_ADAPT_POLICY4
 379   cacheAdapt_policy_dominate(coren);
 380 //#elif defined GC_CACHE_ADAPT_POLICY5
 381 //  cacheAdapt_policy_overload(coren);
 382 //#elif defined GC_CACHE_ADAPT_POLICY6
 383 //  cacheAdapt_policy_crowd(coren);
 384 #endif
 385 }
 386
 387 // adapt the cache strategy for the mutator
 388 void cacheAdapt_mutator() {
 389   BAMBOO_CACHE_MF();
 390   // check the changes and adapt them
 391   int * tmp_p = gccachepolicytbl;
 392   unsigned int page_sva = gcbaseva;
 393   for(; page_sva<gctopva; page_sva+=BAMBOO_PAGE_SIZE) {
 394     // read out the policy
 395     bamboo_cache_policy_t policy = (bamboo_cache_policy_t)(*(tmp_p));
 396     // adapt the policy
 397     if(policy.word != 0) {
 398       bamboo_adapt_cache_policy(page_sva,policy,BAMBOO_PAGE_SIZE);
 399     }
 400     tmp_p += 1;
 401   }
 402 }
 403
 404 void cacheAdapt_phase_client() {
 405   WAITFORGCPHASE(CACHEPOLICYPHASE);
 406   GC_PRINTF("Start cachepolicy phase\n");
 407   cacheAdapt_decision(BAMBOO_NUM_OF_CORE);
 408   //send init finish msg to core coordinator
 409   send_msg_2(STARTUPCORE, GCFINISHCACHEPOLICY, BAMBOO_NUM_OF_CORE);
 410   GC_PRINTF("Finish cachepolicy phase\n");
 411
 412   WAITFORGCPHASE(PREFINISHPHASE);
 413   GC_PRINTF("Start prefinish phase\n");
 414   // cache adapt phase
 415   cacheAdapt_mutator();
 416   cacheAdapt_gc(false);
 417   //send init finish msg to core coordinator
 418   send_msg_2(STARTUPCORE, GCFINISHPREF, BAMBOO_NUM_OF_CORE);
 419   GC_PRINTF("Finish prefinish phase\n");
 420   CACHEADAPT_SAMPING_RESET();
 421   if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
 422     // zero out the gccachesamplingtbl
 423     BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
 424     BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,size_cachesamplingtbl_local_r);
 425   }
 426 }
 427
 428 extern unsigned long long gc_output_cache_policy_time;
 429
 430 void cacheAdapt_phase_master() {
 431   GCPROFILE_ITEM();
 432   unsigned long long tmpt = BAMBOO_GET_EXE_TIME();
 433   CACHEADAPT_OUTPUT_CACHE_SAMPLING_R();
 434   gc_output_cache_policy_time += (BAMBOO_GET_EXE_TIME()-tmpt);
 435   // let all cores to parallelly process the revised profile data and decide
 436   // the cache policy for each page
 437   gc_status_info.gcphase = CACHEPOLICYPHASE;
 438   GC_SEND_MSG_1_TO_CLIENT(GCSTARTCACHEPOLICY);
 439   GC_PRINTF("Start cachepolicy phase \n");
 440   // cache adapt phase
 441   cacheAdapt_decision(BAMBOO_NUM_OF_CORE);
 442   GC_CHECK_ALL_CORE_STATUS(CACHEPOLICYPHASE==gc_status_info.gcphase);
 443   BAMBOO_CACHE_MF();
 444
 445   // let all cores to adopt new policies
 446   gc_status_info.gcphase = PREFINISHPHASE;
 447   // Note: all cores should flush their runtime data including non-gc cores
 448   GC_SEND_MSG_1_TO_CLIENT(GCSTARTPREF);
 449   GC_PRINTF("Start prefinish phase \n");
 450   // cache adapt phase
 451   cacheAdapt_mutator();
 452   cacheAdapt_gc(false);
 453   GC_CHECK_ALL_CORE_STATUS(PREFINISHPHASE==gc_status_info.gcphase);
 454
 455   CACHEADAPT_SAMPING_RESET();
 456   if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
 457     // zero out the gccachesamplingtbl
 458     BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
 459     BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,size_cachesamplingtbl_local_r);
 460     BAMBOO_MEMSET_WH(gccachepolicytbl,0,size_cachepolicytbl);
 461   }
 462 }
 463
 464 void gc_output_cache_sampling() {
 465   //extern volatile bool gc_profile_flag;
 466   //if(!gc_profile_flag) return;
 467   unsigned int page_index = 0;
 468   VA page_sva = 0;
 469   unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
 470   for(page_index = 0; page_index < page_num; page_index++) {
 471     page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
 472     unsigned int block = 0;
 473     BLOCKINDEX(block, (void *) page_sva);
 474     unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
 475     printf("%x,  %d,  %d,  ",(int)page_sva,page_index,coren);
 476     for(int i = 0; i < NUMCORESACTIVE; i++) {
 477       int * local_tbl = (int *)((void *)gccachesamplingtbl+size_cachesamplingtbl_local*i);
 478       int freq = local_tbl[page_index];
 479       //if(freq != 0) {
 480         printf("%d,  ", freq);
 481       //}
 482     }
 483     printf("\n");
 484   }
 485   printf("=================\n");
 486 }
 487
 488 void gc_output_cache_sampling_r() {
 489   //extern volatile bool gc_profile_flag;
 490   //if(!gc_profile_flag) return;
 491   // TODO summary data
 492   unsigned int sumdata[NUMCORESACTIVE][NUMCORESACTIVE];
 493   for(int i = 0; i < NUMCORESACTIVE; i++) {
 494     for(int j = 0; j < NUMCORESACTIVE; j++) {
 495       sumdata[i][j] = 0;
 496     }
 497   }
 498   tprintf("cache sampling_r \n");
 499   unsigned int page_index = 0;
 500   VA page_sva = 0;
 501   unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
 502   for(page_index = 0; page_index < page_num; page_index++) {
 503     page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
 504     unsigned int block = 0;
 505     BLOCKINDEX(block, (void *)page_sva);
 506     unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
 507     printf(" %x,  %d,  %d,  ",(int)page_sva,page_index,coren);
 508     int accesscore = 0; // TODO
 509     for(int i = 0; i < NUMCORESACTIVE; i++) {
 510       int * local_tbl = (int *)((void *)gccachesamplingtbl_r+size_cachesamplingtbl_local_r*i);
 511       int freq = local_tbl[page_index]/BAMBOO_PAGE_SIZE;
 512       printf("%d,  ", freq);
 513       if(freq != 0) {
 514         accesscore++;// TODO
 515       }
 516     }
 517     if(accesscore!=0) {
 518       for(int i = 0; i < NUMCORESACTIVE; i++) {
 519         int * local_tbl = (int *)((void *)gccachesamplingtbl_r+size_cachesamplingtbl_local_r*i);
 520         int freq = local_tbl[page_index]/BAMBOO_PAGE_SIZE;
 521         sumdata[accesscore-1][i]+=freq;
 522       }
 523     }
 524
 525     printf("\n");
 526   }
 527   // TODO printout the summary data
 528   for(int i = 0; i < NUMCORESACTIVE; i++) {
 529     printf("%d  ", i);
 530     for(int j = 0; j < NUMCORESACTIVE; j++) {
 531       printf(" %d  ", sumdata[j][i]);
 532     }
 533     printf("\n");
 534   }
 535   printf("=================\n");
 536 }
 537 #endif // GC_CACHE_ADAPT