Robust/src/Runtime/bamboo/multicorecache.c

   1 #ifdef GC_CACHE_ADAPT
   2 #include "multicorecache.h"
   3
   4 typedef struct gc_cache_revise_info {
   5   unsigned int orig_page_start_va;
   6   unsigned int orig_page_end_va;
   7   unsigned int orig_page_index;
   8   unsigned int to_page_start_va;
   9   unsigned int to_page_end_va;
  10   unsigned int to_page_index;
  11   unsigned int revised_sampling[NUMCORESACTIVE];
  12 } gc_cache_revise_info_t;
  13 gc_cache_revise_info_t gc_cache_revise_infomation;
  14
  15 INLINE void samplingDataInit() {
  16   gc_cache_revise_infomation.to_page_start_va = (unsigned int)to->ptr;
  17   unsigned int toindex = (unsigned int)(tobase-gcbaseva)/(BAMBOO_PAGE_SIZE);
  18   gc_cache_revise_infomation.to_page_end_va = gcbaseva +
  19     (BAMBOO_PAGE_SIZE)*(toindex+1);
  20   gc_cache_revise_infomation.to_page_index = toindex;
  21   gc_cache_revise_infomation.orig_page_start_va = (unsigned int)orig->ptr;
  22   gc_cache_revise_infomation.orig_page_end_va = gcbaseva+(BAMBOO_PAGE_SIZE)
  23   *(((unsigned int)(orig->ptr)-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
  24   gc_cache_revise_infomation.orig_page_index =
  25     ((unsigned int)(orig->blockbase)-gcbaseva)/(BAMBOO_PAGE_SIZE);
  26 }
  27
  28 INLINE void samplingDataConvert(unsigned int current_ptr) {
  29   unsigned int tmp_factor =
  30   current_ptr-gc_cache_revise_infomation.to_page_start_va;
  31   unsigned int topage=gc_cache_revise_infomation.to_page_index;
  32   unsigned int oldpage = gc_cache_revise_infomation.orig_page_index;
  33   int * newtable=&gccachesamplingtbl_r[topage];
  34   int * oldtable=&gccachesamplingtbl[oldpage];
  35
  36   for(int tt = 0; tt < NUMCORESACTIVE; tt++) {
  37     (*newtable) = ((*newtable)+(*oldtable)*tmp_factor);
  38     newtable=(int*)(((char *)newtable)+size_cachesamplingtbl_local_r);
  39     oldtable=(int*) (((char *)oldtable)+size_cachesamplingtbl_local);
  40   }
  41 }
  42
  43 INLINE void completePageConvert(struct moveHelper * orig,
  44                                 struct moveHelper * to,
  45                                 unsigned int current_ptr,
  46                                 bool closeToPage) {
  47   unsigned int ptr = 0;
  48   unsigned int tocompare = 0;
  49   if(closeToPage) {
  50     ptr = to->ptr;
  51     tocompare = gc_cache_revise_infomation.to_page_end_va;
  52   } else {
  53     ptr = orig->ptr;
  54     tocompare = gc_cache_revise_infomation.orig_page_end_va;
  55   }
  56   if((unsigned int)ptr >= (unsigned int)tocompare) {
  57     // end of an orig/to page
  58     // compute the impact of this page for the new page
  59     samplingDataConvert(current_ptr);
  60     // prepare for an new orig page
  61     unsigned int tmp_index =
  62       (unsigned int)((unsigned int)orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
  63     gc_cache_revise_infomation.orig_page_start_va = orig->ptr;
  64     gc_cache_revise_infomation.orig_page_end_va = gcbaseva +
  65       (BAMBOO_PAGE_SIZE)*(unsigned int)(tmp_index+1);
  66     gc_cache_revise_infomation.orig_page_index = tmp_index;
  67     gc_cache_revise_infomation.to_page_start_va = to->ptr;
  68     if(closeToPage) {
  69       gc_cache_revise_infomation.to_page_end_va = gcbaseva+(BAMBOO_PAGE_SIZE)
  70         *(((unsigned int)(to->ptr)-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
  71       gc_cache_revise_infomation.to_page_index =
  72         ((unsigned int)(to->ptr)-gcbaseva)/(BAMBOO_PAGE_SIZE);
  73     }
  74   }
  75 }
  76
  77 // prepare for cache adaption:
  78 //   -- flush the shared heap
  79 //   -- clean dtlb entries
  80 //   -- change cache strategy
  81 void cacheAdapt_gc(bool isgccachestage) {
  82   // flush the shared heap
  83   BAMBOO_CACHE_FLUSH_L2();
  84
  85   // clean the dtlb entries
  86   BAMBOO_CLEAN_DTLB();
  87
  88   // change the cache strategy
  89   gccachestage = isgccachestage;
  90 }
  91
  92 // the master core decides how to adapt cache strategy for the mutator
  93 // according to collected statistic data
  94
  95 // make all pages hfh
  96 int cacheAdapt_policy_h4h(){
  97   unsigned int page_index = 0;
  98   VA page_sva = 0;
  99   unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
 100   unsigned int numchanged = 0;
 101   int * tmp_p = gccachepolicytbl+1;
 102   for(page_index = 0; page_index < page_num; page_index++) {
 103     page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
 104     bamboo_cache_policy_t policy = {0};
 105     policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
 106     *tmp_p = page_index;
 107     tmp_p++;
 108     *tmp_p = policy.word;
 109     tmp_p++;
 110     numchanged++;
 111   }
 112
 113   return numchanged;
 114 }
 115
 116 // make all pages local as non-cache-adaptable gc local mode
 117 int cacheAdapt_policy_local(){
 118   unsigned int page_index = 0;
 119   VA page_sva = 0;
 120   unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
 121   unsigned int numchanged = 0;
 122   int * tmp_p = gccachepolicytbl+1;
 123   for(page_index = 0; page_index < page_num; page_index++) {
 124     page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
 125     bamboo_cache_policy_t policy = {0};
 126     unsigned int block = 0;
 127     BLOCKINDEX(page_sva, &block);
 128     unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
 129     // locally cache the page in the hotest core
 130     // NOTE: (x,y) should be changed to (x+1, y+1)!!!
 131     policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
 132     policy.lotar_x = bamboo_cpu2coords[2*coren]+1;
 133     policy.lotar_y = bamboo_cpu2coords[2*coren+1]+1;
 134     *tmp_p = page_index;
 135     tmp_p++;
 136     *tmp_p = policy.word;
 137     tmp_p++;
 138     numchanged++;
 139   }
 140
 141   return numchanged;
 142 }
 143
 144 int cacheAdapt_policy_hotest(){
 145   unsigned int page_index = 0;
 146   VA page_sva = 0;
 147   unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
 148   unsigned int numchanged = 0;
 149   int * tmp_p = gccachepolicytbl+1;
 150   for(page_index = 0; page_index < page_num; page_index++) {
 151     page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
 152     bamboo_cache_policy_t policy = {0};
 153     unsigned int hotestcore = 0;
 154     unsigned int hotfreq = 0;
 155
 156     int *local_tbl=&gccachesamplingtbl_r[page_index];
 157     for(int i = 0; i < NUMCORESACTIVE; i++) {
 158       int freq = *local_tbl;
 159       local_tbl=(int *)(((char *)local_tbl)+size_cachesamplingtbl_local_r);
 160
 161       // check the freqency, decide if this page is hot for the core
 162       if(hotfreq < freq) {
 163         hotfreq = freq;
 164         hotestcore = i;
 165       }
 166     }
 167     // TODO
 168     // Decide the cache strategy for this page
 169     // If decide to adapt a new cache strategy, write into the shared block of
 170     // the gcsharedsamplingtbl. The mem recording information that has been
 171     // written is enough to hold the information.
 172     // Format: page start va + cache strategy(hfh/(host core+[x,y]))
 173     if(hotfreq == 0) {
 174       // this page has not been accessed, do not change its cache policy
 175       continue;
 176     } else {
 177       // locally cache the page in the hotest core
 178       // NOTE: (x,y) should be changed to (x+1, y+1)!!!
 179       policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
 180       policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
 181       policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
 182       *tmp_p = page_index;
 183       tmp_p++;
 184       *tmp_p = policy.word;
 185       tmp_p++;
 186       numchanged++;
 187     }
 188   }
 189
 190   return numchanged;
 191 }
 192
 193 #define GC_CACHE_ADAPT_DOMINATE_THRESHOLD  50
 194 // cache the page on the core that accesses it the most if that core accesses
 195 // it more than (GC_CACHE_ADAPT_DOMINATE_THRESHOLD)% of the total.  Otherwise,
 196 // h4h the page.
 197 int cacheAdapt_policy_dominate(){
 198   unsigned int page_index = 0;
 199   VA page_sva = 0;
 200   unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
 201   unsigned int numchanged = 0;
 202   int * tmp_p = gccachepolicytbl+1;
 203   for(page_index = 0; page_index < page_num; page_index++) {
 204     page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
 205     bamboo_cache_policy_t policy = {0};
 206     unsigned int hotestcore = 0;
 207     unsigned long long totalfreq = 0;
 208     unsigned int hotfreq = 0;
 209
 210     int *local_tbl=&gccachesamplingtbl_r[page_index];
 211     for(int i = 0; i < NUMCORESACTIVE; i++) {
 212       int freq = *local_tbl;
 213       local_tbl=(int *)(((char *)local_tbl)+size_cachesamplingtbl_local_r);
 214       totalfreq += freq;
 215       // check the freqency, decide if this page is hot for the core
 216       if(hotfreq < freq) {
 217         hotfreq = freq;
 218         hotestcore = i;
 219       }
 220     }
 221
 222     // Decide the cache strategy for this page
 223     // If decide to adapt a new cache strategy, write into the shared block of
 224     // the gcpolicytbl
 225     // Format: page start va + cache policy
 226     if(hotfreq == 0) {
 227       // this page has not been accessed, do not change its cache policy
 228       continue;
 229     }
 230     totalfreq =
 231       (totalfreq*GC_CACHE_ADAPT_DOMINATE_THRESHOLD)/100/BAMBOO_PAGE_SIZE;
 232     hotfreq/=BAMBOO_PAGE_SIZE;
 233     if(hotfreq < totalfreq) {
 234       // use hfh
 235       policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
 236     } else {
 237       // locally cache the page in the hotest core
 238       // NOTE: (x,y) should be changed to (x+1, y+1)!!!
 239       policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
 240       policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
 241       policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
 242     }
 243     *tmp_p = page_index;
 244     tmp_p++;
 245     *tmp_p = policy.word;
 246     tmp_p++;
 247     numchanged++;
 248   }
 249
 250   return numchanged;
 251 }
 252
 253 #define GC_CACHE_ADAPT_OVERLOAD_THRESHOLD 10
 254
 255 void gc_quicksort(unsigned long long *array,
 256                   unsigned int left,
 257                   unsigned int right,
 258                   unsigned int offset) {
 259   unsigned int pivot = 0;;
 260   unsigned int leftIdx = left;
 261   unsigned int rightIdx = right;
 262   if((right-left+1) >= 1) {
 263     pivot = (left+right)/2;
 264     while((leftIdx <= pivot) && (rightIdx >= pivot)) {
 265       unsigned long long pivotValue = array[pivot*3-offset];
 266       while((array[leftIdx*3-offset] > pivotValue) && (leftIdx <= pivot)) {
 267         leftIdx++;
 268       }
 269       while((array[rightIdx*3-offset] < pivotValue) && (rightIdx >= pivot)) {
 270         rightIdx--;
 271       }
 272       // swap [leftIdx] & [rightIdx]
 273       for(int k = 0; k < 3; k++) {
 274         unsigned long long tmp = array[3*rightIdx-k];
 275         array[3*rightIdx-k] = array[3*leftIdx-k];
 276         array[3*leftIdx-k] = tmp;
 277       }
 278       leftIdx++;
 279       rightIdx--;
 280       if((leftIdx-1) == pivot) {
 281         pivot = rightIdx = rightIdx + 1;
 282       } else if((leftIdx+1) == pivot) {
 283         pivot = leftIdx = leftIdx-1;
 284       }
 285     }
 286     gc_quicksort(array, left, pivot-1, offset);
 287     gc_quicksort(array, pivot+1, right, offset);
 288   }
 289   return;
 290 }
 291
 292 // Every page cached on the core that accesses it the most.
 293 // Check to see if any core's pages total more accesses than threshold
 294 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD.  If so, find the pages with the
 295 // most remote accesses and hash for home them until we get below
 296 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD
 297 int cacheAdapt_policy_overload(){
 298   unsigned int page_index = 0;
 299   VA page_sva = 0;
 300   unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
 301   unsigned int numchanged = 0;
 302   int * tmp_p = gccachepolicytbl+1;
 303   unsigned long long workload[NUMCORESACTIVE];
 304   memset(workload, 0, NUMCORESACTIVE*sizeof(unsigned long long));
 305   unsigned long long total_workload = 0;
 306   unsigned long long core2heavypages[NUMCORESACTIVE][page_num*3+1];
 307   memset(core2heavypages,0,
 308       sizeof(unsigned long long)*(page_num*3+1)*NUMCORESACTIVE);
 309   for(page_index = 0; page_index < page_num; page_index++) {
 310     page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
 311     bamboo_cache_policy_t policy = {0};
 312     unsigned int hotestcore = 0;
 313     unsigned long long totalfreq = 0;
 314     unsigned int hotfreq = 0;
 315
 316     int *local_tbl=&gccachesamplingtbl_r[page_index];
 317     for(int i = 0; i < NUMCORESACTIVE; i++) {
 318       int freq = *local_tbl;
 319       local_tbl=(int *)(((char *)local_tbl)+size_cachesamplingtbl_local_r);
 320       totalfreq += freq;
 321       // check the freqency, decide if this page is hot for the core
 322       if(hotfreq < freq) {
 323         hotfreq = freq;
 324         hotestcore = i;
 325       }
 326     }
 327     // Decide the cache strategy for this page
 328     // If decide to adapt a new cache strategy, write into the shared block of
 329     // the gcsharedsamplingtbl. The mem recording information that has been
 330     // written is enough to hold the information.
 331     // Format: page start va + cache strategy(hfh/(host core+[x,y]))
 332     if(hotfreq == 0) {
 333       // this page has not been accessed, do not change its cache policy
 334       continue;
 335     }
 336
 337     totalfreq/=BAMBOO_PAGE_SIZE;
 338     hotfreq/=BAMBOO_PAGE_SIZE;
 339     // locally cache the page in the hotest core
 340     // NOTE: (x,y) should be changed to (x+1, y+1)!!!
 341     policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
 342     policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
 343     policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
 344     *tmp_p = page_index;
 345     tmp_p++;
 346     *tmp_p = policy.word;
 347     tmp_p++;
 348     numchanged++;
 349     workload[hotestcore] += totalfreq;
 350     total_workload += totalfreq;
 351     // insert into core2heavypages using quicksort
 352     unsigned long long remoteaccess = totalfreq - hotfreq;
 353     unsigned int index = (unsigned int)core2heavypages[hotestcore][0];
 354     core2heavypages[hotestcore][3*index+3] = remoteaccess;
 355     core2heavypages[hotestcore][3*index+2] = totalfreq;
 356     core2heavypages[hotestcore][3*index+1] = (unsigned long long)(tmp_p-1);
 357     core2heavypages[hotestcore][0]++;
 358   }
 359
 360   unsigned long long workload_threshold =
 361   total_workload/GC_CACHE_ADAPT_OVERLOAD_THRESHOLD;
 362   // Check the workload of each core
 363   for(int i = 0; i < NUMCORESACTIVE; i++) {
 364     int j = 1;
 365     unsigned int index = (unsigned int)core2heavypages[i][0];
 366     if(workload[i] > workload_threshold) {
 367       // sort according to the remoteaccess
 368       gc_quicksort(&core2heavypages[i][0], 1, index, 0);
 369       while((workload[i] > workload_threshold) && (j<index*3)) {
 370         // hfh those pages with more remote accesses
 371         bamboo_cache_policy_t policy = {0};
 372         policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
 373         *((unsigned int*)core2heavypages[i][j]) = policy.word;
 374         workload[i] -= core2heavypages[i][j+1];
 375         j += 3;
 376       }
 377     }
 378   }
 379
 380   return numchanged;
 381 }
 382
 383 #define GC_CACHE_ADAPT_ACCESS_THRESHOLD 70
 384 #define GC_CACHE_ADAPT_CROWD_THRESHOLD  20
 385 // Every page cached on the core that accesses it the most.
 386 // Check to see if any core's pages total more accesses than threshold
 387 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD.  If so, find the pages with the
 388 // most remote accesses and hash for home them until we get below
 389 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD.
 390 // Sort pages based on activity....
 391 // If more then GC_CACHE_ADAPT_ACCESS_THRESHOLD% of the accesses for a
 392 // core's pages are from more than GC_CACHE_ADAPT_CROWD_THRESHOLD pages,
 393 // then start hfh these pages(selecting the ones with the most remote
 394 // accesses first or fewest local accesses) until we get below
 395 // GC_CACHE_ADAPT_CROWD_THRESHOLD pages.
 396 int cacheAdapt_policy_crowd(){
 397   unsigned int page_index = 0;
 398   VA page_sva = 0;
 399   unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
 400   unsigned int numchanged = 0;
 401   int * tmp_p = gccachepolicytbl+1;
 402   unsigned long long workload[NUMCORESACTIVE];
 403   memset(workload, 0, NUMCORESACTIVE*sizeof(unsigned long long));
 404   unsigned long long total_workload = 0;
 405   unsigned long long core2heavypages[NUMCORESACTIVE][page_num*3+1];
 406   memset(core2heavypages,0,
 407     sizeof(unsigned long long)*(page_num*3+1)*NUMCORESACTIVE);
 408   for(page_index = 0; page_index < page_num; page_index++) {
 409     page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
 410     bamboo_cache_policy_t policy = {0};
 411     unsigned int hotestcore = 0;
 412     unsigned long long totalfreq = 0;
 413     unsigned int hotfreq = 0;
 414
 415     int *local_tbl=&gccachesamplingtbl_r[page_index];
 416     for(int i = 0; i < NUMCORESACTIVE; i++) {
 417       int freq = *local_tbl;
 418       local_tbl=(int *)(((char *)local_tbl)+size_cachesamplingtbl_local_r);
 419       totalfreq += freq;
 420       // check the freqency, decide if this page is hot for the core
 421       if(hotfreq < freq) {
 422         hotfreq = freq;
 423         hotestcore = i;
 424       }
 425     }
 426     // Decide the cache strategy for this page
 427     // If decide to adapt a new cache strategy, write into the shared block of
 428     // the gcsharedsamplingtbl. The mem recording information that has been
 429     // written is enough to hold the information.
 430     // Format: page start va + cache strategy(hfh/(host core+[x,y]))
 431     if(hotfreq == 0) {
 432       // this page has not been accessed, do not change its cache policy
 433       continue;
 434     }
 435     totalfreq/=BAMBOO_PAGE_SIZE;
 436     hotfreq/=BAMBOO_PAGE_SIZE;
 437     // locally cache the page in the hotest core
 438     // NOTE: (x,y) should be changed to (x+1, y+1)!!!
 439     policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
 440     policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
 441     policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
 442     *tmp_p = page_index;
 443     tmp_p++;
 444     *tmp_p = policy.word;
 445     tmp_p++;
 446     numchanged++;
 447     workload[hotestcore] += totalfreq;
 448     total_workload += totalfreq;
 449     // insert into core2heavypages using quicksort
 450     unsigned long long remoteaccess = totalfreq - hotfreq;
 451     unsigned int index = (unsigned int)core2heavypages[hotestcore][0];
 452     core2heavypages[hotestcore][3*index+3] = remoteaccess;
 453     core2heavypages[hotestcore][3*index+2] = totalfreq;
 454     core2heavypages[hotestcore][3*index+1] = (unsigned long long)(tmp_p-1);
 455     core2heavypages[hotestcore][0]++;
 456   }
 457
 458   unsigned long long workload_threshold =
 459   total_workload / GC_CACHE_ADAPT_OVERLOAD_THRESHOLD;
 460   // Check the workload of each core
 461   for(int i = 0; i < NUMCORESACTIVE; i++) {
 462     int j = 1;
 463     unsigned int index = (unsigned int)core2heavypages[i][0];
 464     if(workload[i] > workload_threshold) {
 465       // sort according to the remoteaccess
 466       gc_quicksort(&core2heavypages[i][0], 1, index, 0);
 467       while((workload[i] > workload_threshold) && (j<index*3)) {
 468         // hfh those pages with more remote accesses
 469         bamboo_cache_policy_t policy = {0};
 470         policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
 471         *((unsigned int*)core2heavypages[i][j]) = policy.word;
 472         workload[i] -= core2heavypages[i][j+1];
 473         j += 3;
 474       }
 475     }
 476
 477     // Check if the accesses are crowded on few pages
 478     // sort according to the total access
 479 inner_crowd:
 480     gc_quicksort(&core2heavypages[i][0], j/3+1, index, 1);
 481     unsigned long long threshold =
 482       GC_CACHE_ADAPT_ACCESS_THRESHOLD*workload[i]/100;
 483     int num_crowded = 0;
 484     unsigned long long t_workload = 0;
 485     do {
 486       t_workload += core2heavypages[i][j+num_crowded*3+1];
 487       num_crowded++;
 488     } while(t_workload < threshold);
 489     // num_crowded <= GC_CACHE_ADAPT_CROWD_THRESHOLD and if there are enough
 490     // items, it is always == GC_CACHE_ADAPT_CROWD_THRESHOLD
 491     if(num_crowded > GC_CACHE_ADAPT_CROWD_THRESHOLD) {
 492       // need to hfh these pages
 493       // sort the pages according to remote access
 494       gc_quicksort(&core2heavypages[i][0], j/3+1, j/3+num_crowded, 0);
 495       // h4h those pages with more remote accesses
 496       bamboo_cache_policy_t policy = {0};
 497       policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
 498       *((unsigned int*)core2heavypages[i][j]) = policy.word;
 499       workload[i] -= core2heavypages[i][j+1];
 500       t_workload -= core2heavypages[i][j+1];
 501       j += 3;
 502       threshold = GC_CACHE_ADAPT_ACCESS_THRESHOLD*workload[i]/100;
 503       goto inner_crowd;
 504     }
 505   }
 506
 507   return numchanged;
 508 }
 509
 510 void cacheAdapt_master() {
 511   CACHEADAPT_OUTPUT_CACHE_SAMPLING_R();
 512   unsigned int numchanged = 0;
 513   // check the statistic data
 514   // for each page, decide the new cache strategy
 515 #ifdef GC_CACHE_ADAPT_POLICY1
 516   numchanged = cacheAdapt_policy_h4h();
 517 #elif defined GC_CACHE_ADAPT_POLICY2
 518   numchanged = cacheAdapt_policy_local();
 519 #elif defined GC_CACHE_ADAPT_POLICY3
 520   numchanged = cacheAdapt_policy_hotest();
 521 #elif defined GC_CACHE_ADAPT_POLICY4
 522   numchanged = cacheAdapt_policy_dominate();
 523 #elif defined GC_CACHE_ADAPT_POLICY5
 524   numchanged = cacheAdapt_policy_overload();
 525 #elif defined GC_CACHE_ADAPT_POLICY6
 526   numchanged = cacheAdapt_policy_crowd();
 527 #endif
 528   *gccachepolicytbl = numchanged;
 529 }
 530
 531 // adapt the cache strategy for the mutator
 532 void cacheAdapt_mutator() {
 533   int numchanged = *gccachepolicytbl;
 534   // check the changes and adapt them
 535   int * tmp_p = gccachepolicytbl+1;
 536   while(numchanged--) {
 537     // read out the policy
 538     int page_index = *tmp_p;
 539     bamboo_cache_policy_t policy = (bamboo_cache_policy_t)(*(tmp_p+1));
 540     // adapt the policy
 541     bamboo_adapt_cache_policy(page_index*(BAMBOO_PAGE_SIZE)+gcbaseva,
 542         policy, BAMBOO_PAGE_SIZE);
 543
 544     tmp_p += 2;
 545   }
 546 }
 547
 548 void cacheAdapt_phase_client() {
 549   while(true) {
 550     if(PREFINISHPHASE == gcphase) {
 551       break;
 552     }
 553   }
 554   GC_PRINTF("Start prefinish phase\n");
 555   // cache adapt phase
 556   cacheAdapt_mutator();
 557   cacheAdapt_gc(false);
 558   //send init finish msg to core coordinator
 559   send_msg_2(STARTUPCORE, GCFINISHPREF, BAMBOO_NUM_OF_CORE, false);
 560   GC_PRINTF("Finish prefinish phase\n");
 561   CACHEADAPT_SAMPING_RESET();
 562   if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
 563     // zero out the gccachesamplingtbl
 564     BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
 565     BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,
 566         size_cachesamplingtbl_local_r);
 567   }
 568 }
 569
 570 void cacheAdapt_phase_master() {
 571   GCPROFILEITEM();
 572   gcphase = PREFINISHPHASE;
 573   gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
 574   // Note: all cores should flush their runtime data including non-gc
 575   //       cores
 576   for(i = 1; i < NUMCORESACTIVE; ++i) {
 577     // send start flush messages to all cores
 578     gccorestatus[i] = 1;
 579     send_msg_1(i, GCSTARTPREF, false);
 580   }
 581   GC_PRINTF("Start prefinish phase \n");
 582   // cache adapt phase
 583   cacheAdapt_mutator();
 584   CACHEADPAT_OUTPUT_CACHE_POLICY();
 585   cacheAdapt_gc(false);
 586
 587   gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
 588   while(PREFINISHPHASE == gcphase) {
 589     // check the status of all cores
 590     BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
 591     if(gc_checkAllCoreStatus_I()) {
 592       BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 593       break;
 594     }
 595     BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
 596   }
 597
 598   CACHEADAPT_SAMPING_RESET();
 599   if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
 600     // zero out the gccachesamplingtbl
 601     BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
 602     BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,
 603         size_cachesamplingtbl_local_r);
 604     BAMBOO_MEMSET_WH(gccachepolicytbl,0,size_cachepolicytbl);
 605   }
 606 }
 607
 608 void gc_output_cache_sampling() {
 609   unsigned int page_index = 0;
 610   VA page_sva = 0;
 611   unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
 612   for(page_index = 0; page_index < page_num; page_index++) {
 613     page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
 614     unsigned int block = 0;
 615     BLOCKINDEX(page_sva, &block);
 616     unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
 617     tprintf("va: %x page_index: %d host: %d\n",(int)page_sva,page_index,coren);
 618     for(int i = 0; i < NUMCORESACTIVE; i++) {
 619       int * local_tbl = (int *)((void *)gccachesamplingtbl
 620           +size_cachesamplingtbl_local*i);
 621       int freq = local_tbl[page_index];
 622       printf("%8d ",freq);
 623     }
 624     printf("\n");
 625   }
 626   printf("=================\n");
 627 }
 628
 629 void gc_output_cache_sampling_r() {
 630   unsigned int page_index = 0;
 631   VA page_sva = 0;
 632   unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
 633   for(page_index = 0; page_index < page_num; page_index++) {
 634     page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
 635     unsigned int block = 0;
 636     BLOCKINDEX(page_sva, &block);
 637     unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
 638     tprintf("va: %x page_index: %d host: %d\n",(int)page_sva,page_index,coren);
 639     for(int i = 0; i < NUMCORESACTIVE; i++) {
 640       int * local_tbl = (int *)((void *)gccachesamplingtbl_r
 641           +size_cachesamplingtbl_local_r*i);
 642       int freq = local_tbl[page_index]/BAMBOO_PAGE_SIZE;
 643       printf("%8d ",freq);
 644     }
 645
 646     printf("\n");
 647   }
 648   printf("=================\n");
 649 }
 650 #endif // GC_CACHE_ADAPT