Robust/src/Runtime/bamboo/multicorecache.c

   1 #ifdef GC_CACHE_ADAPT
   2 #include "multicorecache.h"
   3 #include "multicoremsg.h"
   4 #include "multicoregc.h"
   5 #include "multicoregcprofile.h"
   6
   7 void cacheadapt_finish_compact(void *toptr) {
   8   unsigned int dstpage=((unsigned INTPTR)(toptr-gcbaseva))>>BAMBOO_PAGE_SIZE_BITS;
   9   unsigned int * newtable=&gccachesamplingtbl_r[dstpage*NUMCORESACTIVE];
  10
  11   for(int core = 0; core < NUMCORESACTIVE; core++) {
  12     (*newtable)=(*newtable)>>6;
  13     newtable++;
  14   }
  15 }
  16
  17 void cacheadapt_finish_src_page(void *srcptr, void *tostart, void *tofinish) {
  18   unsigned int srcpage=((unsigned INTPTR)(srcptr-gcbaseva))>>BAMBOO_PAGE_SIZE_BITS;
  19   unsigned int dstpage=((unsigned INTPTR)(tostart-gcbaseva))>>BAMBOO_PAGE_SIZE_BITS;
  20   unsigned int numbytes=tofinish-tostart;
  21
  22   unsigned int * oldtable=&gccachesamplingtbl[srcpage*NUMCORESACTIVE];
  23   unsigned int * newtable=&gccachesamplingtbl_r[dstpage*NUMCORESACTIVE];
  24
  25   unsigned int page64th=numbytes>>(BAMBOO_PAGE_SIZE_BITS-6);
  26
  27   for(int core = 0; core < NUMCORESACTIVE; core++) {
  28     (*newtable)+=page64th*(*oldtable);
  29     newtable++;
  30     oldtable++;
  31   }
  32 }
  33
  34 /* Bytes needed equal to zero is a special case...  It means that we should finish the dst page */
  35
  36 void cacheadapt_finish_dst_page(void *origptr, void *tostart, void *toptr, unsigned int bytesneeded) {
  37   unsigned int numbytes=toptr-tostart;
  38
  39   void *tobound=(void *)((((unsigned INTPTR)toptr-1)&~(BAMBOO_PAGE_SIZE-1))+BAMBOO_PAGE_SIZE);
  40   void *origbound=(void *)((((unsigned INTPTR)origptr)&~(BAMBOO_PAGE_SIZE-1))+BAMBOO_PAGE_SIZE);
  41
  42   unsigned int topage=((unsigned INTPTR)(toptr-1-gcbaseva))>>BAMBOO_PAGE_SIZE_BITS;
  43   unsigned int origpage=((unsigned INTPTR)(origptr-gcbaseva))>>BAMBOO_PAGE_SIZE_BITS;
  44
  45   unsigned int * totable=&gccachesamplingtbl_r[topage*NUMCORESACTIVE];
  46   unsigned int * origtable=&gccachesamplingtbl[origpage*NUMCORESACTIVE];
  47
  48   //handler
  49   unsigned int remaintobytes=(bytesneeded==0)?0:(tobound-toptr);
  50   unsigned int remainorigbytes=origbound-origptr;
  51
  52   do {
  53     //round source bytes down....don't want to close out page if not necessary
  54     remainorigbytes=(remainorigbytes>bytesneeded)?bytesneeded:remainorigbytes;
  55
  56     if (remaintobytes<=remainorigbytes) {
  57       //Need to close out to page
  58
  59       numbytes+=remaintobytes;
  60       unsigned int page64th=numbytes>>(BAMBOO_PAGE_SIZE_BITS-6);
  61
  62       for(int core = 0; core < NUMCORESACTIVE; core++) {
  63         (*totable)=(*totable+page64th*(*origtable))>>6;
  64         totable++;
  65         origtable++;
  66       }
  67       toptr+=remaintobytes;
  68       origptr+=remaintobytes;
  69       bytesneeded-=remaintobytes;
  70       topage++;//to page is definitely done
  71       tobound+=BAMBOO_PAGE_SIZE;
  72       origpage=((unsigned INTPTR)(origptr-gcbaseva))>>BAMBOO_PAGE_SIZE_BITS;//handle exact match case
  73       origbound=(void *) ((((unsigned INTPTR)origptr)&~(BAMBOO_PAGE_SIZE-1))+BAMBOO_PAGE_SIZE);
  74     } else {
  75       //Finishing off orig page
  76
  77       numbytes+=remainorigbytes;
  78       unsigned int page64th=numbytes>>(BAMBOO_PAGE_SIZE_BITS-6);
  79
  80       for(int core = 0; core < NUMCORESACTIVE; core++) {
  81         (*totable)+=page64th*(*origtable);
  82         totable++;
  83         origtable++;
  84       }
  85       toptr+=remainorigbytes;
  86       origptr+=remainorigbytes;
  87       bytesneeded-=remainorigbytes;
  88       origpage++;//just orig page is done
  89       origbound+=BAMBOO_PAGE_SIZE;
  90     }
  91     totable=&gccachesamplingtbl_r[topage*NUMCORESACTIVE];
  92     origtable=&gccachesamplingtbl[origpage*NUMCORESACTIVE];
  93
  94     remaintobytes=tobound-toptr;
  95     remainorigbytes=origbound-origptr;
  96
  97     numbytes=0;
  98   } while(bytesneeded!=0);
  99 }
 100
 101 // prepare for cache adaption:
 102 //   -- flush the shared heap
 103 //   -- clean dtlb entries
 104 //   -- change cache strategy
 105 void cacheAdapt_gc(bool isgccachestage) {
 106   // flush the shared heap
 107   BAMBOO_CACHE_FLUSH_L2();
 108
 109   // clean the dtlb entries
 110   BAMBOO_CLEAN_DTLB();
 111
 112   if(isgccachestage) {
 113     bamboo_install_dtlb_handler_for_gc();
 114   } else {
 115     bamboo_install_dtlb_handler_for_mutator();
 116   }
 117 }
 118
 119 // the master core decides how to adapt cache strategy for the mutator
 120 // according to collected statistic data
 121
 122 // find the core that accesses the page #page_index most
 123 #define CACHEADAPT_FIND_HOTTEST_CORE(page_index,hottestcore,hotfreq) \
 124   { \
 125     unsigned int *local_tbl=&gccachesamplingtbl_r[page_index*NUMCORESACTIVE];   \
 126     for(int i = 0; i < NUMCORESACTIVE; i++) { \
 127       int freq = *local_tbl; \
 128       local_tbl++; \
 129       if(hotfreq < freq) { \
 130         hotfreq = freq; \
 131         hottestcore = i; \
 132       } \
 133     } \
 134   }
 135 // find the core that accesses the page #page_index most and comput the total
 136 // access time of the page at the same time
 137 #define CACHEADAPT_FIND_HOTTEST_CORE_W_TOTALFREQ(page_index,hottestcore,hotfreq,totalfreq) \
 138   { \
 139     unsigned int *local_tbl=&gccachesamplingtbl_r[page_index*NUMCORESACTIVE];   \
 140     for(int i = 0; i < NUMCORESACTIVE; i++) { \
 141       int freq = *local_tbl; \
 142       local_tbl++; \
 143       totalfreq += freq; \
 144       if(hotfreq < freq) { \
 145         hotfreq = freq; \
 146         hottestcore = i; \
 147       } \
 148     } \
 149   }
 150 // Set the policy as hosted by coren
 151 // NOTE: (x,y) should be changed to (x+1, y+1)!!!
 152 #define CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren) \
 153   { \
 154     (policy).cache_mode = BAMBOO_CACHE_MODE_COORDS; \
 155     (policy).lotar_x = bamboo_cpu2coords[2*(coren)]+1; \
 156     (policy).lotar_y = bamboo_cpu2coords[2*(coren)+1]+1; \
 157   }
 158 // store the new policy information at tmp_p in gccachepolicytbl
 159 #define CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy) \
 160   { \
 161     ((int*)(tmp_p))[page_index] = (policy).word; \
 162   }
 163
 164 // make all pages hfh
 165 void cacheAdapt_policy_h4h(int coren){
 166   unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)>>(BAMBOO_PAGE_SIZE_BITS);
 167   unsigned int page_gap=page_num/NUMCORESACTIVE;
 168   unsigned int page_index=page_gap*coren;
 169   unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
 170   VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
 171   unsigned int * tmp_p = gccachepolicytbl;
 172   for(; page_index < page_index_end; page_index++) {
 173     bamboo_cache_policy_t policy = {0};
 174     policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
 175     CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
 176     page_sva += BAMBOO_PAGE_SIZE;
 177   }
 178 }
 179
 180 // make all pages local as non-cache-adaptable gc local mode
 181 void cacheAdapt_policy_local(int coren){
 182   unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)>>(BAMBOO_PAGE_SIZE_BITS);
 183   unsigned int page_gap=page_num/NUMCORESACTIVE;
 184   unsigned int page_index=page_gap*coren;
 185   unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
 186   VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
 187   unsigned int * tmp_p = gccachepolicytbl;
 188   for(; page_index < page_index_end; page_index++) {
 189     bamboo_cache_policy_t policy = {0};
 190     unsigned int block = 0;
 191     BLOCKINDEX(block, (void *) page_sva);
 192     unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
 193     CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren);
 194     CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
 195     page_sva += BAMBOO_PAGE_SIZE;
 196   }
 197 }
 198
 199 void cacheAdapt_policy_hottest(int coren){
 200   unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)>>(BAMBOO_PAGE_SIZE_BITS);
 201   unsigned int page_gap=page_num/NUMCORESACTIVE;
 202   unsigned int page_index=page_gap*coren;
 203   unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
 204   VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
 205   unsigned int * tmp_p = gccachepolicytbl;
 206   for(; page_index < page_index_end; page_index++) {
 207     bamboo_cache_policy_t policy = {0};
 208     unsigned int hottestcore = 0;
 209     unsigned int hotfreq = 0;
 210     CACHEADAPT_FIND_HOTTEST_CORE(page_index,hottestcore,hotfreq);
 211     // TODO
 212     // Decide the cache strategy for this page
 213     // If decide to adapt a new cache strategy, write into the shared block of
 214     // the gcsharedsamplingtbl. The mem recording information that has been
 215     // written is enough to hold the information.
 216     // Format: page start va + cache strategy(hfh/(host core+[x,y]))
 217     if(hotfreq != 0) {
 218       // locally cache the page in the hottest core
 219       CACHEADAPT_POLICY_SET_HOST_CORE(policy, hottestcore);
 220     }
 221     CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
 222     page_sva += BAMBOO_PAGE_SIZE;
 223   }
 224 }
 225
 226 #define GC_CACHE_ADAPT_DOMINATE_THRESHOLD  1
 227 // cache the page on the core that accesses it the most if that core accesses
 228 // it more than (GC_CACHE_ADAPT_DOMINATE_THRESHOLD)% of the total.  Otherwise,
 229 // h4h the page.
 230 void cacheAdapt_policy_dominate(int coren){
 231   unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)>>(BAMBOO_PAGE_SIZE_BITS);
 232   unsigned int page_gap=page_num/NUMCORESACTIVE;
 233   unsigned int page_index=page_gap*coren;
 234   unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
 235   VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
 236   unsigned int * tmp_p = gccachepolicytbl;
 237   for(; page_index < page_index_end; page_index++) {
 238     bamboo_cache_policy_t policy = {0};
 239     unsigned int hottestcore = 0;
 240     unsigned int totalfreq = 0;
 241     unsigned int hotfreq = 0;
 242     CACHEADAPT_FIND_HOTTEST_CORE_W_TOTALFREQ(page_index,hottestcore,hotfreq,totalfreq);
 243     // Decide the cache strategy for this page
 244     // If decide to adapt a new cache strategy, write into the shared block of
 245     // the gcpolicytbl
 246     // Format: page start va + cache policy
 247     if(hotfreq != 0) {
 248       totalfreq=totalfreq>>GC_CACHE_ADAPT_DOMINATE_THRESHOLD;
 249       if((unsigned int)hotfreq < (unsigned int)totalfreq) {
 250         // use hfh
 251         policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
 252         /*unsigned int block = 0;
 253         BLOCKINDEX(block, (void *) page_sva);
 254         unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
 255         CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren);*/
 256       } else {
 257         // locally cache the page in the hottest core
 258         CACHEADAPT_POLICY_SET_HOST_CORE(policy, hottestcore);
 259       }
 260     }
 261     CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
 262     page_sva += BAMBOO_PAGE_SIZE;
 263   }
 264 }
 265
 266 unsigned int cacheAdapt_decision(int coren) {
 267   BAMBOO_CACHE_MF();
 268   // check the statistic data
 269   // for each page, decide the new cache strategy
 270 #ifdef GC_CACHE_ADAPT_POLICY1
 271   cacheAdapt_policy_h4h(coren);
 272 #elif defined GC_CACHE_ADAPT_POLICY2
 273   cacheAdapt_policy_local(coren);
 274 #elif defined GC_CACHE_ADAPT_POLICY3
 275   cacheAdapt_policy_hottest(coren);
 276 #elif defined GC_CACHE_ADAPT_POLICY4
 277   cacheAdapt_policy_dominate(coren);
 278 #endif
 279 }
 280
 281 // adapt the cache strategy for the mutator
 282 void cacheAdapt_mutator() {
 283   BAMBOO_CACHE_MF();
 284   // check the changes and adapt them
 285   unsigned int * tmp_p = gccachepolicytbl;
 286   unsigned int page_sva = gcbaseva;
 287   for(; page_sva<gctopva; page_sva+=BAMBOO_PAGE_SIZE) {
 288     // read out the policy
 289     bamboo_cache_policy_t policy = (bamboo_cache_policy_t)(*(tmp_p));
 290     // adapt the policy
 291     if(policy.word != 0) {
 292       bamboo_adapt_cache_policy(page_sva,policy,BAMBOO_PAGE_SIZE);
 293     }
 294     tmp_p += 1;
 295   }
 296 }
 297
 298 // Cache adapt phase process for clients
 299 void cacheAdapt_phase_client() {
 300   WAITFORGCPHASE(CACHEPOLICYPHASE);
 301   GC_PRINTF("Start cachepolicy phase\n");
 302   cacheAdapt_decision(BAMBOO_NUM_OF_CORE);
 303   //send init finish msg to core coordinator
 304   send_msg_2(STARTUPCORE, GCFINISHCACHEPOLICY, BAMBOO_NUM_OF_CORE);
 305   GC_PRINTF("Finish cachepolicy phase\n");
 306
 307   WAITFORGCPHASE(PREFINISHPHASE);
 308   GC_PRINTF("Start prefinish phase\n");
 309   // cache adapt phase
 310   cacheAdapt_mutator();
 311   cacheAdapt_gc(false);
 312   //send init finish msg to core coordinator
 313   send_msg_2(STARTUPCORE, GCFINISHPREF, BAMBOO_NUM_OF_CORE);
 314   GC_PRINTF("Finish prefinish phase\n");
 315   CACHEADAPT_SAMPLING_RESET();
 316   if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
 317     // zero out the gccachesamplingtbl
 318     BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
 319     BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,size_cachesamplingtbl_local_r);
 320   }
 321 }
 322
 323 extern unsigned long long gc_output_cache_policy_time;
 324
 325 // Cache adpat phase process for the master
 326 void cacheAdapt_phase_master() {
 327   GCPROFILE_ITEM();
 328   unsigned long long tmpt = BAMBOO_GET_EXE_TIME();
 329   CACHEADAPT_OUTPUT_CACHE_SAMPLING_R();
 330   gc_output_cache_policy_time += (BAMBOO_GET_EXE_TIME()-tmpt);
 331   // let all cores to parallelly process the revised profile data and decide
 332   // the cache policy for each page
 333   gc_status_info.gcphase = CACHEPOLICYPHASE;
 334   GC_SEND_MSG_1_TO_CLIENT(GCSTARTCACHEPOLICY);
 335   GC_PRINTF("Start cachepolicy phase \n");
 336   // cache adapt phase
 337   cacheAdapt_decision(BAMBOO_NUM_OF_CORE);
 338   GC_CHECK_ALL_CORE_STATUS();
 339   BAMBOO_CACHE_MF();
 340
 341   // let all cores to adopt new policies
 342   gc_status_info.gcphase = PREFINISHPHASE;
 343   // Note: all cores should flush their runtime data including non-gc cores
 344   GC_SEND_MSG_1_TO_CLIENT(GCSTARTPREF);
 345   GC_PRINTF("Start prefinish phase \n");
 346   // cache adapt phase
 347   cacheAdapt_mutator();
 348   cacheAdapt_gc(false);
 349   GC_CHECK_ALL_CORE_STATUS();
 350
 351   CACHEADAPT_SAMPLING_RESET();
 352   if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
 353     // zero out the gccachesamplingtbl
 354     BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
 355     BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,size_cachesamplingtbl_local_r);
 356     BAMBOO_MEMSET_WH(gccachepolicytbl,0,size_cachepolicytbl);
 357   }
 358 }
 359
 360 // output original cache sampling data for each page
 361 void gc_output_cache_sampling() {
 362   extern volatile bool gc_profile_flag;
 363   if(!gc_profile_flag) return;
 364   unsigned int page_index = 0;
 365   VA page_sva = 0;
 366   unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) >> (BAMBOO_PAGE_SIZE_BITS);
 367   for(page_index = 0; page_index < page_num; page_index++) {
 368     page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
 369     unsigned int block = 0;
 370     BLOCKINDEX(block, (void *) page_sva);
 371     unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
 372     //printf("%x,  %d,  %d,  ",(int)page_sva,page_index,coren);
 373     unsigned int * local_tbl = &gccachesamplingtbl[page_index*NUMCORESACTIVE];
 374     int accesscore = 0;
 375     for(int i = 0; i < NUMCORESACTIVE; i++) {
 376       int freq = *local_tbl;
 377       local_tbl++;
 378       if(freq != 0) {
 379         accesscore++;
 380         //printf("%d,  ", freq);
 381       }
 382     }
 383     if(accesscore!=0) {
 384       printf("%x,  %d,  %d,  ",(int)page_sva,page_index,coren);
 385       unsigned int * local_tbl = &gccachesamplingtbl[page_index*NUMCORESACTIVE];
 386       for(int i = 0; i < NUMCORESACTIVE; i++) {
 387         unsigned int freq = *local_tbl;
 388         local_tbl++;
 389         printf("%u,  ", freq);
 390       }
 391       printf("\n");
 392     }
 393     //printf("\n");
 394   }
 395   printf("=================\n");
 396 }
 397
 398 // output revised cache sampling data for each page after compaction
 399 void gc_output_cache_sampling_r() {
 400   extern volatile bool gc_profile_flag;
 401   if(!gc_profile_flag) return;
 402   // TODO summary data
 403   unsigned int sumdata[NUMCORESACTIVE][NUMCORESACTIVE];
 404   for(int i = 0; i < NUMCORESACTIVE; i++) {
 405     for(int j = 0; j < NUMCORESACTIVE; j++) {
 406       sumdata[i][j] = 0;
 407     }
 408   }
 409   tprintf("cache sampling_r \n");
 410   unsigned int page_index = 0;
 411   VA page_sva = 0;
 412   unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) >> (BAMBOO_PAGE_SIZE_BITS);
 413   for(page_index = 0; page_index < page_num; page_index++) {
 414     page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
 415     unsigned int block = 0;
 416     BLOCKINDEX(block, (void *)page_sva);
 417     unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
 418     //printf("%x,  %d,  %d,  ",(int)page_sva,page_index,coren);
 419     int accesscore = 0; // TODO
 420     unsigned int * local_tbl = &gccachesamplingtbl_r[page_index*NUMCORESACTIVE];
 421     for(int i = 0; i < NUMCORESACTIVE; i++) {
 422       unsigned int freq = *local_tbl;
 423       //printf("%d,  ", freq);
 424       if(freq != 0) {
 425         accesscore++;// TODO
 426       }
 427       local_tbl++;
 428     }
 429     if(accesscore!=0) {
 430       printf("%x,  %d,  %d,  ",(int)page_sva,page_index,coren);
 431       unsigned int * local_tbl = &gccachesamplingtbl_r[page_index*NUMCORESACTIVE];
 432       for(int i = 0; i < NUMCORESACTIVE; i++) {
 433         unsigned int freq = *local_tbl;
 434         printf("%u,  ", freq);
 435         sumdata[accesscore-1][i]+=freq;
 436         local_tbl++;
 437       }
 438       printf("\n");
 439     }
 440     //printf("\n");
 441   }
 442   printf("+++++\n");
 443   // TODO printout the summary data
 444   for(int i = 0; i < NUMCORESACTIVE; i++) {
 445     printf("%d  ", i);
 446     for(int j = 0; j < NUMCORESACTIVE; j++) {
 447       printf(" %u  ", sumdata[j][i]);
 448     }
 449     printf("\n");
 450   }
 451   printf("=================\n");
 452 }
 453 #endif // GC_CACHE_ADAPT