Robust/src/Runtime/bamboo/multicorecache.c

   1 #ifdef GC_CACHE_ADAPT
   2 #include "multicorecache.h"
   3 #include "multicoremsg.h"
   4 #include "multicoregcprofile.h"
   5
   6 gc_cache_revise_info_t gc_cache_revise_information;
   7
   8 // prepare for cache adaption:
   9 //   -- flush the shared heap
  10 //   -- clean dtlb entries
  11 //   -- change cache strategy
  12 void cacheAdapt_gc(bool isgccachestage) {
  13   // flush the shared heap
  14   BAMBOO_CACHE_FLUSH_L2();
  15
  16   // clean the dtlb entries
  17   BAMBOO_CLEAN_DTLB();
  18
  19   // change the cache strategy
  20   gccachestage = isgccachestage;
  21 }
  22
  23 // the master core decides how to adapt cache strategy for the mutator
  24 // according to collected statistic data
  25
  26 // find the core that accesses the page #page_index most
  27 #define CACHEADAPT_FIND_HOTTEST_CORE(page_index,hottestcore,hotfreq) \
  28   { \
  29     int *local_tbl=&gccachesamplingtbl_r[page_index]; \
  30     for(int i = 0; i < NUMCORESACTIVE; i++) { \
  31       int freq = *local_tbl; \
  32       local_tbl=(int *)(((char *)local_tbl)+size_cachesamplingtbl_local_r); \
  33       if(hotfreq < freq) { \
  34         hotfreq = freq; \
  35         hottestcore = i; \
  36       } \
  37     } \
  38   }
  39 // find the core that accesses the page #page_index most and comput the total
  40 // access time of the page at the same time
  41 #define CACHEADAPT_FIND_HOTTEST_CORE_W_TOTALFREQ(page_index,hottestcore,hotfreq,totalfreq) \
  42   { \
  43     int *local_tbl=&gccachesamplingtbl_r[page_index]; \
  44     for(int i = 0; i < NUMCORESACTIVE; i++) { \
  45       int freq = *local_tbl; \
  46       local_tbl=(int *)(((char *)local_tbl)+size_cachesamplingtbl_local_r); \
  47       totalfreq += freq; \
  48       if(hotfreq < freq) { \
  49         hotfreq = freq; \
  50         hottestcore = i; \
  51       } \
  52     } \
  53   }
  54 // Set the policy as hosted by coren
  55 // NOTE: (x,y) should be changed to (x+1, y+1)!!!
  56 #define CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren) \
  57   { \
  58     (policy).cache_mode = BAMBOO_CACHE_MODE_COORDS; \
  59     (policy).lotar_x = bamboo_cpu2coords[2*(coren)]+1; \
  60     (policy).lotar_y = bamboo_cpu2coords[2*(coren)+1]+1; \
  61   }
  62 // store the new policy information at tmp_p in gccachepolicytbl
  63 #define CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy) \
  64   { \
  65     ((int*)(tmp_p))[page_index] = (policy).word; \
  66   }
  67
  68 // make all pages hfh
  69 void cacheAdapt_policy_h4h(int coren){
  70   unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)/(BAMBOO_PAGE_SIZE);
  71   unsigned int page_gap=page_num/NUMCORESACTIVE;
  72   unsigned int page_index=page_gap*coren;
  73   unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
  74   VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
  75   int * tmp_p = gccachepolicytbl;
  76   for(; page_index < page_index_end; page_index++) {
  77     bamboo_cache_policy_t policy = {0};
  78     policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
  79     CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
  80     page_sva += BAMBOO_PAGE_SIZE;
  81   }
  82 }
  83
  84 // make all pages local as non-cache-adaptable gc local mode
  85 void cacheAdapt_policy_local(int coren){
  86   unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)/(BAMBOO_PAGE_SIZE);
  87   unsigned int page_gap=page_num/NUMCORESACTIVE;
  88   unsigned int page_index=page_gap*coren;
  89   unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
  90   VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
  91   int * tmp_p = gccachepolicytbl;
  92   for(; page_index < page_index_end; page_index++) {
  93     bamboo_cache_policy_t policy = {0};
  94     unsigned int block = 0;
  95     BLOCKINDEX(block, (void *) page_sva);
  96     unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
  97     CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren);
  98     CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
  99     page_sva += BAMBOO_PAGE_SIZE;
 100   }
 101 }
 102
 103 void cacheAdapt_policy_hottest(int coren){
 104   unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)/(BAMBOO_PAGE_SIZE);
 105   unsigned int page_gap=page_num/NUMCORESACTIVE;
 106   unsigned int page_index=page_gap*coren;
 107   unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
 108   VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
 109   int * tmp_p = gccachepolicytbl;
 110   for(; page_index < page_index_end; page_index++) {
 111     bamboo_cache_policy_t policy = {0};
 112     unsigned int hottestcore = 0;
 113     unsigned int hotfreq = 0;
 114     CACHEADAPT_FIND_HOTTEST_CORE(page_index,hottestcore,hotfreq);
 115     // TODO
 116     // Decide the cache strategy for this page
 117     // If decide to adapt a new cache strategy, write into the shared block of
 118     // the gcsharedsamplingtbl. The mem recording information that has been
 119     // written is enough to hold the information.
 120     // Format: page start va + cache strategy(hfh/(host core+[x,y]))
 121     if(hotfreq != 0) {
 122       // locally cache the page in the hottest core
 123       CACHEADAPT_POLICY_SET_HOST_CORE(policy, hottestcore);
 124     }
 125     CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
 126     page_sva += BAMBOO_PAGE_SIZE;
 127   }
 128 }
 129
 130 #define GC_CACHE_ADAPT_DOMINATE_THRESHOLD  64
 131 // cache the page on the core that accesses it the most if that core accesses
 132 // it more than (GC_CACHE_ADAPT_DOMINATE_THRESHOLD)% of the total.  Otherwise,
 133 // h4h the page.
 134 void cacheAdapt_policy_dominate(int coren){
 135   unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)/(BAMBOO_PAGE_SIZE);
 136   unsigned int page_gap=page_num/NUMCORESACTIVE;
 137   unsigned int page_index=page_gap*coren;
 138   unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
 139   VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
 140   int * tmp_p = gccachepolicytbl;
 141   for(; page_index < page_index_end; page_index++) {
 142     bamboo_cache_policy_t policy = {0};
 143     unsigned int hottestcore = 0;
 144     unsigned long long totalfreq = 0;
 145     unsigned int hotfreq = 0;
 146     CACHEADAPT_FIND_HOTTEST_CORE_W_TOTALFREQ(page_index,hottestcore,hotfreq,totalfreq);
 147     // Decide the cache strategy for this page
 148     // If decide to adapt a new cache strategy, write into the shared block of
 149     // the gcpolicytbl
 150     // Format: page start va + cache policy
 151     if(hotfreq != 0) {
 152       totalfreq=(totalfreq*GC_CACHE_ADAPT_DOMINATE_THRESHOLD)>>7;
 153       if(hotfreq < totalfreq) {
 154         // use hfh
 155         policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
 156       } else {
 157         // locally cache the page in the hottest core
 158         CACHEADAPT_POLICY_SET_HOST_CORE(policy, hottestcore);
 159       }
 160     }
 161     CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
 162     page_sva += BAMBOO_PAGE_SIZE;
 163   }
 164 }
 165
 166 #if 0
 167 #define GC_CACHE_ADAPT_OVERLOAD_THRESHOLD 10
 168 // record the worklocad of the hottestcore into core2heavypages
 169 #define CACHEADAPT_RECORD_PAGE_WORKLOAD(hottestcore,totalfreq,hotfreq,remoteaccess,tmp_p) \
 170   { \
 171     workload[hottestcore] += (totalfreq); \
 172     total_workload += (totalfreq); \
 173     unsigned long long remoteaccess = (totalfreq) - (hotfreq); \
 174     unsigned int index = (unsigned int)core2heavypages[hottestcore][0]; \
 175     core2heavypages[hottestcore][3*index+3] = (remoteaccess); \
 176     core2heavypages[hottestcore][3*index+2] = (totalfreq); \
 177     core2heavypages[hottestcore][3*index+1] = (unsigned long long)((tmp_p)-1); \
 178     core2heavypages[hottestcore][0]++; \
 179   }
 180
 181 void gc_quicksort(unsigned long long *array,unsigned int left,unsigned int right,unsigned int offset) {
 182   unsigned int pivot = 0;;
 183   unsigned int leftIdx = left;
 184   unsigned int rightIdx = right;
 185   if((right-left+1) >= 1) {
 186     pivot = (left+right)/2;
 187     while((leftIdx <= pivot) && (rightIdx >= pivot)) {
 188       unsigned long long pivotValue = array[pivot*3-offset];
 189       while((array[leftIdx*3-offset] > pivotValue) && (leftIdx <= pivot)) {
 190         leftIdx++;
 191       }
 192       while((array[rightIdx*3-offset] < pivotValue) && (rightIdx >= pivot)) {
 193         rightIdx--;
 194       }
 195       // swap [leftIdx] & [rightIdx]
 196       for(int k = 0; k < 3; k++) {
 197         unsigned long long tmp = array[3*rightIdx-k];
 198         array[3*rightIdx-k] = array[3*leftIdx-k];
 199         array[3*leftIdx-k] = tmp;
 200       }
 201       leftIdx++;
 202       rightIdx--;
 203       if((leftIdx-1) == pivot) {
 204         pivot = rightIdx = rightIdx + 1;
 205       } else if((leftIdx+1) == pivot) {
 206         pivot = leftIdx = leftIdx-1;
 207       }
 208     }
 209     gc_quicksort(array, left, pivot-1, offset);
 210     gc_quicksort(array, pivot+1, right, offset);
 211   }
 212   return;
 213 }
 214
 215 INLINE int cacheAdapt_h4h_remote_accesses(unsigned long long workload_threshold,unsigned long long ** core2heavypages, unsigned long long * workload,int i) {
 216   int j = 1;
 217   unsigned int index = (unsigned int)core2heavypages[i][0];
 218   if(workload[i] > workload_threshold) {
 219     // sort according to the remoteaccess
 220     gc_quicksort(&core2heavypages[i][0], 1, index, 0);
 221     while((workload[i] > workload_threshold) && (j<index*3)) {
 222       // hfh those pages with more remote accesses
 223       bamboo_cache_policy_t policy = {0};
 224       policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
 225       *((unsigned int*)core2heavypages[i][j]) = policy.word;
 226       workload[i] -= core2heavypages[i][j+1];
 227       j += 3;
 228     }
 229   }
 230   return j;
 231 }
 232
 233 // Every page cached on the core that accesses it the most.
 234 // Check to see if any core's pages total more accesses than threshold
 235 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD.  If so, find the pages with the
 236 // most remote accesses and hash for home them until we get below
 237 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD
 238 int cacheAdapt_policy_overload(int coren){
 239   unsigned int page_index = 0;
 240   VA page_sva = gcbaseva;
 241   unsigned int page_num = BAMBOO_SHARED_MEM_SIZE/BAMBOO_PAGE_SIZE;
 242   unsigned int numchanged = 0;
 243   int * tmp_p = gccachepolicytbl+1;
 244   unsigned long long workload[NUMCORESACTIVE];
 245   memset(workload, 0, NUMCORESACTIVE*sizeof(unsigned long long));
 246   unsigned long long total_workload = 0;
 247   unsigned long long core2heavypages[NUMCORESACTIVE][page_num*3+1];
 248   memset(core2heavypages,0,sizeof(unsigned long long)*(page_num*3+1)*NUMCORESACTIVE);
 249   for(page_index = 0; page_sva < gctopva; page_index++) {
 250     bamboo_cache_policy_t policy = {0};
 251     unsigned int hottestcore = 0;
 252     unsigned long long totalfreq = 0;
 253     unsigned int hotfreq = 0;
 254     CACHEADAPT_FIND_HOTTEST_CORE_W_TOTALFREQ(page_index,hottestcore,hotfreq,totalfreq);
 255     // Decide the cache strategy for this page
 256     // If decide to adapt a new cache strategy, write into the shared block of
 257     // the gcsharedsamplingtbl. The mem recording information that has been
 258     // written is enough to hold the information.
 259     // Format: page start va + cache strategy(hfh/(host core+[x,y]))
 260     if(hotfreq != 0) {
 261       totalfreq/=BAMBOO_PAGE_SIZE;
 262       hotfreq/=BAMBOO_PAGE_SIZE;
 263       // locally cache the page in the hottest core
 264       CACHEADAPT_POLICY_SET_HOST_CORE(policy, hottestcore);
 265       CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy,numchanged);
 266       CACHEADAPT_RECORD_PAGE_WORKLOAD(hottestcore,totalfreq,hotfreq,remoteaccess,tmp_p);
 267     }
 268     page_sva += BAMBOO_PAGE_SIZE;
 269   }
 270
 271   unsigned long long workload_threshold=total_workload/GC_CACHE_ADAPT_OVERLOAD_THRESHOLD;
 272   // Check the workload of each core
 273   for(int i = 0; i < NUMCORESACTIVE; i++) {
 274     cacheAdapt_h4h_remote_accesses(workload_threshold,core2heavypages,workload,i);
 275   }
 276
 277   return numchanged;
 278 }
 279
 280 #define GC_CACHE_ADAPT_ACCESS_THRESHOLD 70
 281 #define GC_CACHE_ADAPT_CROWD_THRESHOLD  20
 282 // Every page cached on the core that accesses it the most.
 283 // Check to see if any core's pages total more accesses than threshold
 284 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD.  If so, find the pages with the
 285 // most remote accesses and hash for home them until we get below
 286 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD.
 287 // Sort pages based on activity....
 288 // If more then GC_CACHE_ADAPT_ACCESS_THRESHOLD% of the accesses for a
 289 // core's pages are from more than GC_CACHE_ADAPT_CROWD_THRESHOLD pages,
 290 // then start hfh these pages(selecting the ones with the most remote
 291 // accesses first or fewest local accesses) until we get below
 292 // GC_CACHE_ADAPT_CROWD_THRESHOLD pages.
 293 int cacheAdapt_policy_crowd(int coren){
 294   unsigned int page_index = 0;
 295   VA page_sva = gcbaseva;
 296   unsigned int page_num = BAMBOO_SHARED_MEM_SIZE/BAMBOO_PAGE_SIZE;
 297   unsigned int numchanged = 0;
 298   int * tmp_p = gccachepolicytbl+1;
 299   unsigned long long workload[NUMCORESACTIVE];
 300   memset(workload, 0, NUMCORESACTIVE*sizeof(unsigned long long));
 301   unsigned long long total_workload = 0;
 302   unsigned long long core2heavypages[NUMCORESACTIVE][page_num*3+1];
 303   memset(core2heavypages,0,sizeof(unsigned long long)*(page_num*3+1)*NUMCORESACTIVE);
 304   for(page_index = 0; page_sva < gctopva; page_index++) {
 305     bamboo_cache_policy_t policy = {0};
 306     unsigned int hottestcore = 0;
 307     unsigned long long totalfreq = 0;
 308     unsigned int hotfreq = 0;
 309     CACHEADAPT_FIND_HOTTEST_CORE_W_TOTALFREQ(page_index,hottestcore,hotfreq,totalfreq);
 310     // Decide the cache strategy for this page
 311     // If decide to adapt a new cache strategy, write into the shared block of
 312     // the gcsharedsamplingtbl. The mem recording information that has been
 313     // written is enough to hold the information.
 314     // Format: page start va + cache strategy(hfh/(host core+[x,y]))
 315     if(hotfreq != 0) {
 316       totalfreq/=BAMBOO_PAGE_SIZE;
 317       hotfreq/=BAMBOO_PAGE_SIZE;
 318       // locally cache the page in the hottest core
 319       CACHEADAPT_POLICY_SET_HOST_CORE(policy, hottestcore);
 320       CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy,numchanged);
 321       CACHEADAPT_RECORD_PAGE_WORKLOAD(hottestcore,totalfreq,hotfreq,remoteaccess,tmp_p);
 322     }
 323     page_sva += BAMBOO_PAGE_SIZE;
 324   }
 325
 326   unsigned long long workload_threshold=total_workload/GC_CACHE_ADAPT_OVERLOAD_THRESHOLD;
 327   // Check the workload of each core
 328   for(int i = 0; i < NUMCORESACTIVE; i++) {
 329     unsigned int index=(unsigned int)core2heavypages[i][0];
 330     int j=cacheAdapt_h4h_remote_accesses(workload_threshold,core2heavypages,workload,i);
 331     // Check if the accesses are crowded on few pages
 332     // sort according to the total access
 333 inner_crowd:
 334     gc_quicksort(&core2heavypages[i][0], j/3+1, index, 1);
 335     unsigned long long threshold=GC_CACHE_ADAPT_ACCESS_THRESHOLD*workload[i]/100;
 336     int num_crowded = 0;
 337     unsigned long long t_workload = 0;
 338     do {
 339       t_workload += core2heavypages[i][j+num_crowded*3+1];
 340       num_crowded++;
 341     } while(t_workload < threshold);
 342     // num_crowded <= GC_CACHE_ADAPT_CROWD_THRESHOLD and if there are enough
 343     // items, it is always == GC_CACHE_ADAPT_CROWD_THRESHOLD
 344     if(num_crowded > GC_CACHE_ADAPT_CROWD_THRESHOLD) {
 345       // need to hfh these pages
 346       // sort the pages according to remote access
 347       gc_quicksort(&core2heavypages[i][0], j/3+1, j/3+num_crowded, 0);
 348       // h4h those pages with more remote accesses
 349       bamboo_cache_policy_t policy = {0};
 350       policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
 351       *((unsigned int*)core2heavypages[i][j]) = policy.word;
 352       workload[i] -= core2heavypages[i][j+1];
 353       t_workload -= core2heavypages[i][j+1];
 354       j += 3;
 355       threshold = GC_CACHE_ADAPT_ACCESS_THRESHOLD*workload[i]/100;
 356       goto inner_crowd;
 357     }
 358   }
 359
 360   return numchanged;
 361 }
 362 #endif
 363
 364 unsigned int cacheAdapt_decision(int coren) {
 365   BAMBOO_CACHE_MF();
 366   // check the statistic data
 367   // for each page, decide the new cache strategy
 368 #ifdef GC_CACHE_ADAPT_POLICY1
 369   cacheAdapt_policy_h4h(coren);
 370 #elif defined GC_CACHE_ADAPT_POLICY2
 371   cacheAdapt_policy_local(coren);
 372 #elif defined GC_CACHE_ADAPT_POLICY3
 373   cacheAdapt_policy_hottest(coren);
 374 #elif defined GC_CACHE_ADAPT_POLICY4
 375   cacheAdapt_policy_dominate(coren);
 376 //#elif defined GC_CACHE_ADAPT_POLICY5
 377 //  cacheAdapt_policy_overload(coren);
 378 //#elif defined GC_CACHE_ADAPT_POLICY6
 379 //  cacheAdapt_policy_crowd(coren);
 380 #endif
 381 }
 382
 383 // adapt the cache strategy for the mutator
 384 void cacheAdapt_mutator() {
 385   BAMBOO_CACHE_MF();
 386   // check the changes and adapt them
 387   int * tmp_p = gccachepolicytbl;
 388   unsigned int page_sva = gcbaseva;
 389   for(; page_sva<gctopva; page_sva+=BAMBOO_PAGE_SIZE) {
 390     // read out the policy
 391     bamboo_cache_policy_t policy = (bamboo_cache_policy_t)(*(tmp_p));
 392     // adapt the policy
 393     if(policy.word != 0) {
 394       bamboo_adapt_cache_policy(page_sva,policy,BAMBOO_PAGE_SIZE);
 395     }
 396     tmp_p += 1;
 397   }
 398 }
 399
 400 void cacheAdapt_phase_client() {
 401   WAITFORGCPHASE(CACHEPOLICYPHASE);
 402   GC_PRINTF("Start cachepolicy phase\n");
 403   cacheAdapt_decision(BAMBOO_NUM_OF_CORE);
 404   //send init finish msg to core coordinator
 405   send_msg_2(STARTUPCORE, GCFINISHCACHEPOLICY, BAMBOO_NUM_OF_CORE);
 406   GC_PRINTF("Finish cachepolicy phase\n");
 407
 408   WAITFORGCPHASE(PREFINISHPHASE);
 409   GC_PRINTF("Start prefinish phase\n");
 410   // cache adapt phase
 411   cacheAdapt_mutator();
 412   cacheAdapt_gc(false);
 413   //send init finish msg to core coordinator
 414   send_msg_2(STARTUPCORE, GCFINISHPREF, BAMBOO_NUM_OF_CORE);
 415   GC_PRINTF("Finish prefinish phase\n");
 416   CACHEADAPT_SAMPING_RESET();
 417   if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
 418     // zero out the gccachesamplingtbl
 419     BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
 420     BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,size_cachesamplingtbl_local_r);
 421   }
 422 }
 423
 424 extern unsigned long long gc_output_cache_policy_time;
 425
 426 void cacheAdapt_phase_master() {
 427   GCPROFILE_ITEM();
 428   unsigned long long tmpt = BAMBOO_GET_EXE_TIME();
 429   CACHEADAPT_OUTPUT_CACHE_SAMPLING_R();
 430   gc_output_cache_policy_time += (BAMBOO_GET_EXE_TIME()-tmpt);
 431   // let all cores to parallelly process the revised profile data and decide
 432   // the cache policy for each page
 433   gc_status_info.gcphase = CACHEPOLICYPHASE;
 434   GC_SEND_MSG_1_TO_CLIENT(GCSTARTCACHEPOLICY);
 435   GC_PRINTF("Start cachepolicy phase \n");
 436   // cache adapt phase
 437   cacheAdapt_decision(BAMBOO_NUM_OF_CORE);
 438   GC_CHECK_ALL_CORE_STATUS(CACHEPOLICYPHASE==gc_status_info.gcphase);
 439   BAMBOO_CACHE_MF();
 440
 441   // let all cores to adopt new policies
 442   gc_status_info.gcphase = PREFINISHPHASE;
 443   // Note: all cores should flush their runtime data including non-gc cores
 444   GC_SEND_MSG_1_TO_CLIENT(GCSTARTPREF);
 445   GC_PRINTF("Start prefinish phase \n");
 446   // cache adapt phase
 447   cacheAdapt_mutator();
 448   cacheAdapt_gc(false);
 449   GC_CHECK_ALL_CORE_STATUS(PREFINISHPHASE==gc_status_info.gcphase);
 450
 451   CACHEADAPT_SAMPING_RESET();
 452   if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
 453     // zero out the gccachesamplingtbl
 454     BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
 455     BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,size_cachesamplingtbl_local_r);
 456     BAMBOO_MEMSET_WH(gccachepolicytbl,0,size_cachepolicytbl);
 457   }
 458 }
 459
 460 void gc_output_cache_sampling() {
 461   unsigned int page_index = 0;
 462   VA page_sva = 0;
 463   unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
 464   for(page_index = 0; page_index < page_num; page_index++) {
 465     page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
 466     unsigned int block = 0;
 467     BLOCKINDEX(block, (void *) page_sva);
 468     unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
 469     printf("%x,  %d,  %d,  ",(int)page_sva,page_index,coren);
 470     for(int i = 0; i < NUMCORESACTIVE; i++) {
 471       int * local_tbl = (int *)((void *)gccachesamplingtbl+size_cachesamplingtbl_local*i);
 472       int freq = local_tbl[page_index];
 473       //if(freq != 0) {
 474         printf("%d,  ", freq);
 475       //}
 476     }
 477     printf("\n");
 478   }
 479   printf("=================\n");
 480 }
 481
 482 void gc_output_cache_sampling_r() {
 483   // TODO summary data
 484   unsigned int sumdata[NUMCORESACTIVE][NUMCORESACTIVE];
 485   for(int i = 0; i < NUMCORESACTIVE; i++) {
 486     for(int j = 0; j < NUMCORESACTIVE; j++) {
 487       sumdata[i][j] = 0;
 488     }
 489   }
 490   tprintf("cache sampling_r \n");
 491   unsigned int page_index = 0;
 492   VA page_sva = 0;
 493   unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
 494   for(page_index = 0; page_index < page_num; page_index++) {
 495     page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
 496     unsigned int block = 0;
 497     BLOCKINDEX(block, (void *)page_sva);
 498     unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
 499     printf(" %x,  %d,  %d,  ",(int)page_sva,page_index,coren);
 500     int accesscore = 0; // TODO
 501     for(int i = 0; i < NUMCORESACTIVE; i++) {
 502       int * local_tbl = (int *)((void *)gccachesamplingtbl_r+size_cachesamplingtbl_local_r*i);
 503       int freq = local_tbl[page_index]/BAMBOO_PAGE_SIZE;
 504       printf("%d,  ", freq);
 505       if(freq != 0) {
 506         accesscore++;// TODO
 507       }
 508     }
 509     if(accesscore!=0) {
 510       for(int i = 0; i < NUMCORESACTIVE; i++) {
 511         int * local_tbl = (int *)((void *)gccachesamplingtbl_r+size_cachesamplingtbl_local_r*i);
 512         int freq = local_tbl[page_index]/BAMBOO_PAGE_SIZE;
 513         sumdata[accesscore-1][i]+=freq;
 514       }
 515     }
 516
 517     printf("\n");
 518   }
 519   // TODO printout the summary data
 520   for(int i = 0; i < NUMCORESACTIVE; i++) {
 521     printf("%d  ", i);
 522     for(int j = 0; j < NUMCORESACTIVE; j++) {
 523       printf(" %d  ", sumdata[j][i]);
 524     }
 525     printf("\n");
 526   }
 527   printf("=================\n");
 528 }
 529 #endif // GC_CACHE_ADAPT