Robust/src/Runtime/bamboo/multicorecache.c

   1 #ifdef GC_CACHE_ADAPT
   2 #include "multicorecache.h"
   3 #include "multicoremsg.h"
   4 #include "multicoregcprofile.h"
   5
   6 gc_cache_revise_info_t gc_cache_revise_information;
   7
   8 // prepare for cache adaption:
   9 //   -- flush the shared heap
  10 //   -- clean dtlb entries
  11 //   -- change cache strategy
  12 void cacheAdapt_gc(bool isgccachestage) {
  13   // flush the shared heap
  14   BAMBOO_CACHE_FLUSH_L2();
  15
  16   // clean the dtlb entries
  17   BAMBOO_CLEAN_DTLB();
  18
  19   // change the cache strategy
  20   gccachestage = isgccachestage;
  21 }
  22
  23 // the master core decides how to adapt cache strategy for the mutator
  24 // according to collected statistic data
  25
  26 // find the core that accesses the page #page_index most
  27 #define CACHEADAPT_FIND_HOTTEST_CORE(page_index,hottestcore,hotfreq) \
  28   { \
  29     int *local_tbl=&gccachesamplingtbl_r[page_index]; \
  30     for(int i = 0; i < NUMCORESACTIVE; i++) { \
  31       int freq = *local_tbl; \
  32       local_tbl=(int *)(((void *)local_tbl)+size_cachesamplingtbl_local_r); \
  33       if(hotfreq < freq) { \
  34         hotfreq = freq; \
  35         hottestcore = i; \
  36       } \
  37     } \
  38   }
  39 // find the core that accesses the page #page_index most and comput the total
  40 // access time of the page at the same time
  41 #define CACHEADAPT_FIND_HOTTEST_CORE_W_TOTALFREQ(page_index,hottestcore,hotfreq,totalfreq) \
  42   { \
  43     int *local_tbl=&gccachesamplingtbl_r[page_index]; \
  44     for(int i = 0; i < NUMCORESACTIVE; i++) { \
  45       int freq = *local_tbl; \
  46       local_tbl=(int *)(((void *)local_tbl)+size_cachesamplingtbl_local_r); \
  47       totalfreq += freq; \
  48       if(hotfreq < freq) { \
  49         hotfreq = freq; \
  50         hottestcore = i; \
  51       } \
  52     } \
  53   }
  54 // Set the policy as hosted by coren
  55 // NOTE: (x,y) should be changed to (x+1, y+1)!!!
  56 #define CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren) \
  57   { \
  58     (policy).cache_mode = BAMBOO_CACHE_MODE_COORDS; \
  59     (policy).lotar_x = bamboo_cpu2coords[2*(coren)]+1; \
  60     (policy).lotar_y = bamboo_cpu2coords[2*(coren)+1]+1; \
  61   }
  62 // store the new policy information at tmp_p in gccachepolicytbl
  63 #define CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy) \
  64   { \
  65     ((int*)(tmp_p))[page_index] = (policy).word; \
  66   }
  67
  68 // make all pages hfh
  69 void cacheAdapt_policy_h4h(int coren){
  70   unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)/(BAMBOO_PAGE_SIZE);
  71   unsigned int page_gap=page_num/NUMCORESACTIVE;
  72   unsigned int page_index=page_gap*coren;
  73   unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
  74   VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
  75   int * tmp_p = gccachepolicytbl;
  76   for(; page_index < page_index_end; page_index++) {
  77     bamboo_cache_policy_t policy = {0};
  78     policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
  79     CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
  80     page_sva += BAMBOO_PAGE_SIZE;
  81   }
  82 }
  83
  84 // make all pages local as non-cache-adaptable gc local mode
  85 void cacheAdapt_policy_local(int coren){
  86   unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)/(BAMBOO_PAGE_SIZE);
  87   unsigned int page_gap=page_num/NUMCORESACTIVE;
  88   unsigned int page_index=page_gap*coren;
  89   unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
  90   VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
  91   int * tmp_p = gccachepolicytbl;
  92   for(; page_index < page_index_end; page_index++) {
  93     bamboo_cache_policy_t policy = {0};
  94     unsigned int block = 0;
  95     BLOCKINDEX(block, (void *) page_sva);
  96     unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
  97     CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren);
  98     CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
  99     page_sva += BAMBOO_PAGE_SIZE;
 100   }
 101 }
 102
 103 void cacheAdapt_policy_hottest(int coren){
 104   unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)/(BAMBOO_PAGE_SIZE);
 105   unsigned int page_gap=page_num/NUMCORESACTIVE;
 106   unsigned int page_index=page_gap*coren;
 107   unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
 108   VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
 109   int * tmp_p = gccachepolicytbl;
 110   for(; page_index < page_index_end; page_index++) {
 111     bamboo_cache_policy_t policy = {0};
 112     unsigned int hottestcore = 0;
 113     unsigned int hotfreq = 0;
 114     CACHEADAPT_FIND_HOTTEST_CORE(page_index,hottestcore,hotfreq);
 115     // TODO
 116     // Decide the cache strategy for this page
 117     // If decide to adapt a new cache strategy, write into the shared block of
 118     // the gcsharedsamplingtbl. The mem recording information that has been
 119     // written is enough to hold the information.
 120     // Format: page start va + cache strategy(hfh/(host core+[x,y]))
 121     if(hotfreq != 0) {
 122       // locally cache the page in the hottest core
 123       CACHEADAPT_POLICY_SET_HOST_CORE(policy, hottestcore);
 124     }
 125     CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
 126     page_sva += BAMBOO_PAGE_SIZE;
 127   }
 128 }
 129
 130 #define GC_CACHE_ADAPT_DOMINATE_THRESHOLD  1
 131 // cache the page on the core that accesses it the most if that core accesses
 132 // it more than (GC_CACHE_ADAPT_DOMINATE_THRESHOLD)% of the total.  Otherwise,
 133 // h4h the page.
 134 void cacheAdapt_policy_dominate(int coren){
 135   unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)/(BAMBOO_PAGE_SIZE);
 136   unsigned int page_gap=page_num/NUMCORESACTIVE;
 137   unsigned int page_index=page_gap*coren;
 138   unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
 139   VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
 140   int * tmp_p = gccachepolicytbl;
 141   for(; page_index < page_index_end; page_index++) {
 142     bamboo_cache_policy_t policy = {0};
 143     unsigned int hottestcore = 0;
 144     unsigned int totalfreq = 0;
 145     unsigned int hotfreq = 0;
 146     CACHEADAPT_FIND_HOTTEST_CORE_W_TOTALFREQ(page_index,hottestcore,hotfreq,totalfreq);
 147     // Decide the cache strategy for this page
 148     // If decide to adapt a new cache strategy, write into the shared block of
 149     // the gcpolicytbl
 150     // Format: page start va + cache policy
 151     if(hotfreq != 0) {
 152       totalfreq=totalfreq>>GC_CACHE_ADAPT_DOMINATE_THRESHOLD;
 153       if((unsigned int)hotfreq < (unsigned int)totalfreq) {
 154         // use hfh
 155         //policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
 156         unsigned int block = 0;
 157         BLOCKINDEX(block, (void *) page_sva);
 158         unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
 159         CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren);
 160       } else {
 161         // locally cache the page in the hottest core
 162         CACHEADAPT_POLICY_SET_HOST_CORE(policy, hottestcore);
 163       }
 164     }
 165     CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
 166     page_sva += BAMBOO_PAGE_SIZE;
 167   }
 168 }
 169
 170 unsigned int cacheAdapt_decision(int coren) {
 171   BAMBOO_CACHE_MF();
 172   // check the statistic data
 173   // for each page, decide the new cache strategy
 174 #ifdef GC_CACHE_ADAPT_POLICY1
 175   cacheAdapt_policy_h4h(coren);
 176 #elif defined GC_CACHE_ADAPT_POLICY2
 177   cacheAdapt_policy_local(coren);
 178 #elif defined GC_CACHE_ADAPT_POLICY3
 179   cacheAdapt_policy_hottest(coren);
 180 #elif defined GC_CACHE_ADAPT_POLICY4
 181   cacheAdapt_policy_dominate(coren);
 182 #endif
 183 }
 184
 185 // adapt the cache strategy for the mutator
 186 void cacheAdapt_mutator() {
 187   BAMBOO_CACHE_MF();
 188   // check the changes and adapt them
 189   int * tmp_p = gccachepolicytbl;
 190   unsigned int page_sva = gcbaseva;
 191   for(; page_sva<gctopva; page_sva+=BAMBOO_PAGE_SIZE) {
 192     // read out the policy
 193     bamboo_cache_policy_t policy = (bamboo_cache_policy_t)(*(tmp_p));
 194     // adapt the policy
 195     if(policy.word != 0) {
 196       bamboo_adapt_cache_policy(page_sva,policy,BAMBOO_PAGE_SIZE);
 197     }
 198     tmp_p += 1;
 199   }
 200 }
 201
 202 // Cache adapt phase process for clients
 203 void cacheAdapt_phase_client() {
 204   WAITFORGCPHASE(CACHEPOLICYPHASE);
 205   GC_PRINTF("Start cachepolicy phase\n");
 206   cacheAdapt_decision(BAMBOO_NUM_OF_CORE);
 207   //send init finish msg to core coordinator
 208   send_msg_2(STARTUPCORE, GCFINISHCACHEPOLICY, BAMBOO_NUM_OF_CORE);
 209   GC_PRINTF("Finish cachepolicy phase\n");
 210
 211   WAITFORGCPHASE(PREFINISHPHASE);
 212   GC_PRINTF("Start prefinish phase\n");
 213   // cache adapt phase
 214   cacheAdapt_mutator();
 215   cacheAdapt_gc(false);
 216   //send init finish msg to core coordinator
 217   send_msg_2(STARTUPCORE, GCFINISHPREF, BAMBOO_NUM_OF_CORE);
 218   GC_PRINTF("Finish prefinish phase\n");
 219   CACHEADAPT_SAMPING_RESET();
 220   if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
 221     // zero out the gccachesamplingtbl
 222     BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
 223     BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,size_cachesamplingtbl_local_r);
 224   }
 225 }
 226
 227 extern unsigned long long gc_output_cache_policy_time;
 228
 229 // Cache adpat phase process for the master
 230 void cacheAdapt_phase_master() {
 231   GCPROFILE_ITEM();
 232   unsigned long long tmpt = BAMBOO_GET_EXE_TIME();
 233   CACHEADAPT_OUTPUT_CACHE_SAMPLING_R();
 234   gc_output_cache_policy_time += (BAMBOO_GET_EXE_TIME()-tmpt);
 235   // let all cores to parallelly process the revised profile data and decide
 236   // the cache policy for each page
 237   gc_status_info.gcphase = CACHEPOLICYPHASE;
 238   GC_SEND_MSG_1_TO_CLIENT(GCSTARTCACHEPOLICY);
 239   GC_PRINTF("Start cachepolicy phase \n");
 240   // cache adapt phase
 241   cacheAdapt_decision(BAMBOO_NUM_OF_CORE);
 242   GC_CHECK_ALL_CORE_STATUS();
 243   BAMBOO_CACHE_MF();
 244
 245   // let all cores to adopt new policies
 246   gc_status_info.gcphase = PREFINISHPHASE;
 247   // Note: all cores should flush their runtime data including non-gc cores
 248   GC_SEND_MSG_1_TO_CLIENT(GCSTARTPREF);
 249   GC_PRINTF("Start prefinish phase \n");
 250   // cache adapt phase
 251   cacheAdapt_mutator();
 252   cacheAdapt_gc(false);
 253   GC_CHECK_ALL_CORE_STATUS();
 254
 255   CACHEADAPT_SAMPING_RESET();
 256   if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
 257     // zero out the gccachesamplingtbl
 258     BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
 259     BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,size_cachesamplingtbl_local_r);
 260     BAMBOO_MEMSET_WH(gccachepolicytbl,0,size_cachepolicytbl);
 261   }
 262 }
 263
 264 // output original cache sampling data for each page
 265 void gc_output_cache_sampling() {
 266   //extern volatile bool gc_profile_flag;
 267   //if(!gc_profile_flag) return;
 268   unsigned int page_index = 0;
 269   VA page_sva = 0;
 270   unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
 271   for(page_index = 0; page_index < page_num; page_index++) {
 272     page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
 273     unsigned int block = 0;
 274     BLOCKINDEX(block, (void *) page_sva);
 275     unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
 276     printf("%x,  %d,  %d,  ",(int)page_sva,page_index,coren);
 277     for(int i = 0; i < NUMCORESACTIVE; i++) {
 278       int * local_tbl = (int *)((void *)gccachesamplingtbl+size_cachesamplingtbl_local*i);
 279       int freq = local_tbl[page_index];
 280       //if(freq != 0) {
 281         printf("%d,  ", freq);
 282       //}
 283     }
 284     printf("\n");
 285   }
 286   printf("=================\n");
 287 }
 288
 289 // output revised cache sampling data for each page after compaction
 290 void gc_output_cache_sampling_r() {
 291   //extern volatile bool gc_profile_flag;
 292   //if(!gc_profile_flag) return;
 293   // TODO summary data
 294   unsigned int sumdata[NUMCORESACTIVE][NUMCORESACTIVE];
 295   for(int i = 0; i < NUMCORESACTIVE; i++) {
 296     for(int j = 0; j < NUMCORESACTIVE; j++) {
 297       sumdata[i][j] = 0;
 298     }
 299   }
 300   tprintf("cache sampling_r \n");
 301   unsigned int page_index = 0;
 302   VA page_sva = 0;
 303   unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
 304   for(page_index = 0; page_index < page_num; page_index++) {
 305     page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
 306     unsigned int block = 0;
 307     BLOCKINDEX(block, (void *)page_sva);
 308     unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
 309     printf(" %x,  %d,  %d,  ",(int)page_sva,page_index,coren);
 310     int accesscore = 0; // TODO
 311     for(int i = 0; i < NUMCORESACTIVE; i++) {
 312       int * local_tbl = (int *)((void *)gccachesamplingtbl_r+size_cachesamplingtbl_local_r*i);
 313       int freq = local_tbl[page_index]/BAMBOO_PAGE_SIZE;
 314       printf("%d,  ", freq);
 315       if(freq != 0) {
 316         accesscore++;// TODO
 317       }
 318     }
 319     if(accesscore!=0) {
 320       for(int i = 0; i < NUMCORESACTIVE; i++) {
 321         int * local_tbl = (int *)((void *)gccachesamplingtbl_r+size_cachesamplingtbl_local_r*i);
 322         int freq = local_tbl[page_index]; ///BAMBOO_PAGE_SIZE;
 323         sumdata[accesscore-1][i]+=freq;
 324       }
 325     }
 326
 327     printf("\n");
 328   }
 329   // TODO printout the summary data
 330   for(int i = 0; i < NUMCORESACTIVE; i++) {
 331     printf("%d  ", i);
 332     for(int j = 0; j < NUMCORESACTIVE; j++) {
 333       printf(" %d  ", sumdata[j][i]);
 334     }
 335     printf("\n");
 336   }
 337   printf("=================\n");
 338 }
 339 #endif // GC_CACHE_ADAPT