Robust/src/Runtime/bamboo/multicorecache.c

   1 #ifdef GC_CACHE_ADAPT
   2 #include "multicorecache.h"
   3 #include "multicoremsg.h"
   4 #include "multicoregcprofile.h"
   5
   6 void cacheadapt_finish_compact(void *toptr) {
   7   unsigned int dstpage=(toptr-gcbaseva)>>BAMBOO_PAGE_SIZE_BITS;
   8   unsigned int * newtable=&gccachesamplingtbl_r[dstpage*NUMCORESACTIVE];
   9
  10   for(int core = 0; core < NUMCORESACTIVE; core++) {
  11     (*newtable)=(*newtable)>>6;
  12     newtable++;
  13   }
  14 }
  15
  16 void cacheadapt_finish_src_page(void *srcptr, void *tostart, void *tofinish) {
  17   unsigned int srcpage=(srcptr-gcbaseva)>>BAMBOO_PAGE_SIZE_BITS;
  18   unsigned int dstpage=(tostart-gcbaseva)>>BAMBOO_PAGE_SIZE_BITS;
  19   unsigned int numbytes=tofinish-tostart;
  20
  21   unsigned int * oldtable=&gccachesamplingtbl[srcpage*NUMCORESACTIVE];
  22   unsigned int * newtable=&gccachesamplingtbl_r[dstpage*NUMCORESACTIVE];
  23
  24   unsigned int page64th=numbytes>>(BAMBOO_PAGE_SIZE_BITS-6);
  25
  26   for(int core = 0; core < NUMCORESACTIVE; core++) {
  27     (*newtable)+=page64th*(*oldtable);
  28     newtable++;
  29     oldtable++;
  30   }
  31 }
  32
  33 /* Bytes needed equal to zero is a special case...  It means that we should finish the dst page */
  34
  35 void cacheadapt_finish_dst_page(void *origptr, void *tostart, void *toptr, unsigned int bytesneeded) {
  36   unsigned int numbytes=toptr-tostart;
  37
  38   void *tobound=(void *)((((unsigned INTPTR)toptr-1)&~(BAMBOO_PAGE_SIZE-1))+BAMBOO_PAGE_SIZE);
  39   void *origbound=(void *)((((unsigned INTPTR)origptr)&~(BAMBOO_PAGE_SIZE-1))+BAMBOO_PAGE_SIZE);
  40
  41   unsigned int topage=(toptr-1-gcbaseva)>>BAMBOO_PAGE_SIZE_BITS;
  42   unsigned int origpage=(origptr-gcbaseva)>>BAMBOO_PAGE_SIZE_BITS;
  43
  44   unsigned int * totable=&gccachesamplingtbl_r[topage*NUMCORESACTIVE];
  45   unsigned int * origtable=&gccachesamplingtbl[origpage*NUMCORESACTIVE];
  46
  47   //handler
  48   unsigned int remaintobytes=(bytesneeded==0)?0:(tobound-toptr);
  49   unsigned int remainorigbytes=origbound-origptr;
  50
  51   do {
  52     //round source bytes down....don't want to close out page if not necessary
  53     remainorigbytes=(remainorigbytes>bytesneeded)?bytesneeded:remainorigbytes;
  54
  55     if (remaintobytes<=remainorigbytes) {
  56       //Need to close out to page
  57
  58       numbytes+=remaintobytes;
  59       unsigned int page64th=numbytes>>(BAMBOO_PAGE_SIZE_BITS-6);
  60
  61       for(int core = 0; core < NUMCORESACTIVE; core++) {
  62         (*totable)=(*totable+page64th*(*origtable))>>6;
  63         totable++;
  64         origtable++;
  65       }
  66       toptr+=remaintobytes;
  67       origptr+=remaintobytes;
  68       bytesneeded-=remaintobytes;
  69       topage++;//to page is definitely done
  70       tobound+=BAMBOO_PAGE_SIZE;
  71       origpage=(origptr-gcbaseva)>>BAMBOO_PAGE_SIZE_BITS;//handle exact match case
  72       origbound=(void *) ((((unsigned INTPTR)origptr)&~(BAMBOO_PAGE_SIZE-1))+BAMBOO_PAGE_SIZE);
  73     } else {
  74       //Finishing off orig page
  75
  76       numbytes+=remainorigbytes;
  77       unsigned int page64th=numbytes>>(BAMBOO_PAGE_SIZE_BITS-6);
  78
  79       for(int core = 0; core < NUMCORESACTIVE; core++) {
  80         (*totable)+=page64th*(*origtable);
  81         totable++;
  82         origtable++;
  83       }
  84       toptr+=remainorigbytes;
  85       origptr+=remainorigbytes;
  86       bytesneeded-=remainorigbytes;
  87       origpage++;//just orig page is done
  88       origbound+=BAMBOO_PAGE_SIZE;
  89     }
  90     totable=&gccachesamplingtbl_r[topage*NUMCORESACTIVE];
  91     origtable=&gccachesamplingtbl[origpage*NUMCORESACTIVE];
  92
  93     remaintobytes=tobound-toptr;
  94     remainorigbytes=origbound-origptr;
  95
  96     numbytes=0;
  97   } while(bytesneeded!=0);
  98 }
  99
 100 // prepare for cache adaption:
 101 //   -- flush the shared heap
 102 //   -- clean dtlb entries
 103 //   -- change cache strategy
 104 void cacheAdapt_gc(bool isgccachestage) {
 105   // flush the shared heap
 106   BAMBOO_CACHE_FLUSH_L2();
 107
 108   // clean the dtlb entries
 109   BAMBOO_CLEAN_DTLB();
 110
 111   if(isgccachestage) {
 112     bamboo_install_dtlb_handler_for_gc();
 113   } else {
 114     bamboo_install_dtlb_handler_for_mutator();
 115   }
 116 }
 117
 118 // the master core decides how to adapt cache strategy for the mutator
 119 // according to collected statistic data
 120
 121 // find the core that accesses the page #page_index most
 122 #define CACHEADAPT_FIND_HOTTEST_CORE(page_index,hottestcore,hotfreq) \
 123   { \
 124     unsigned int *local_tbl=&gccachesamplingtbl_r[page_index*NUMCORESACTIVE];   \
 125     for(int i = 0; i < NUMCORESACTIVE; i++) { \
 126       int freq = *local_tbl; \
 127       local_tbl++; \
 128       if(hotfreq < freq) { \
 129         hotfreq = freq; \
 130         hottestcore = i; \
 131       } \
 132     } \
 133   }
 134 // find the core that accesses the page #page_index most and comput the total
 135 // access time of the page at the same time
 136 #define CACHEADAPT_FIND_HOTTEST_CORE_W_TOTALFREQ(page_index,hottestcore,hotfreq,totalfreq) \
 137   { \
 138     unsigned int *local_tbl=&gccachesamplingtbl_r[page_index*NUMCORESACTIVE];   \
 139     for(int i = 0; i < NUMCORESACTIVE; i++) { \
 140       int freq = *local_tbl; \
 141       local_tbl++; \
 142       totalfreq += freq; \
 143       if(hotfreq < freq) { \
 144         hotfreq = freq; \
 145         hottestcore = i; \
 146       } \
 147     } \
 148   }
 149 // Set the policy as hosted by coren
 150 // NOTE: (x,y) should be changed to (x+1, y+1)!!!
 151 #define CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren) \
 152   { \
 153     (policy).cache_mode = BAMBOO_CACHE_MODE_COORDS; \
 154     (policy).lotar_x = bamboo_cpu2coords[2*(coren)]+1; \
 155     (policy).lotar_y = bamboo_cpu2coords[2*(coren)+1]+1; \
 156   }
 157 // store the new policy information at tmp_p in gccachepolicytbl
 158 #define CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy) \
 159   { \
 160     ((int*)(tmp_p))[page_index] = (policy).word; \
 161   }
 162
 163 // make all pages hfh
 164 void cacheAdapt_policy_h4h(int coren){
 165   unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)>>(BAMBOO_PAGE_SIZE_BITS);
 166   unsigned int page_gap=page_num/NUMCORESACTIVE;
 167   unsigned int page_index=page_gap*coren;
 168   unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
 169   VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
 170   unsigned int * tmp_p = gccachepolicytbl;
 171   for(; page_index < page_index_end; page_index++) {
 172     bamboo_cache_policy_t policy = {0};
 173     policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
 174     CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
 175     page_sva += BAMBOO_PAGE_SIZE;
 176   }
 177 }
 178
 179 // make all pages local as non-cache-adaptable gc local mode
 180 void cacheAdapt_policy_local(int coren){
 181   unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)>>(BAMBOO_PAGE_SIZE_BITS);
 182   unsigned int page_gap=page_num/NUMCORESACTIVE;
 183   unsigned int page_index=page_gap*coren;
 184   unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
 185   VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
 186   unsigned int * tmp_p = gccachepolicytbl;
 187   for(; page_index < page_index_end; page_index++) {
 188     bamboo_cache_policy_t policy = {0};
 189     unsigned int block = 0;
 190     BLOCKINDEX(block, (void *) page_sva);
 191     unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
 192     CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren);
 193     CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
 194     page_sva += BAMBOO_PAGE_SIZE;
 195   }
 196 }
 197
 198 void cacheAdapt_policy_hottest(int coren){
 199   unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)>>(BAMBOO_PAGE_SIZE_BITS);
 200   unsigned int page_gap=page_num/NUMCORESACTIVE;
 201   unsigned int page_index=page_gap*coren;
 202   unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
 203   VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
 204   unsigned int * tmp_p = gccachepolicytbl;
 205   for(; page_index < page_index_end; page_index++) {
 206     bamboo_cache_policy_t policy = {0};
 207     unsigned int hottestcore = 0;
 208     unsigned int hotfreq = 0;
 209     CACHEADAPT_FIND_HOTTEST_CORE(page_index,hottestcore,hotfreq);
 210     // TODO
 211     // Decide the cache strategy for this page
 212     // If decide to adapt a new cache strategy, write into the shared block of
 213     // the gcsharedsamplingtbl. The mem recording information that has been
 214     // written is enough to hold the information.
 215     // Format: page start va + cache strategy(hfh/(host core+[x,y]))
 216     if(hotfreq != 0) {
 217       // locally cache the page in the hottest core
 218       CACHEADAPT_POLICY_SET_HOST_CORE(policy, hottestcore);
 219     }
 220     CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
 221     page_sva += BAMBOO_PAGE_SIZE;
 222   }
 223 }
 224
 225 #define GC_CACHE_ADAPT_DOMINATE_THRESHOLD  1
 226 // cache the page on the core that accesses it the most if that core accesses
 227 // it more than (GC_CACHE_ADAPT_DOMINATE_THRESHOLD)% of the total.  Otherwise,
 228 // h4h the page.
 229 void cacheAdapt_policy_dominate(int coren){
 230   unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)>>(BAMBOO_PAGE_SIZE_BITS);
 231   unsigned int page_gap=page_num/NUMCORESACTIVE;
 232   unsigned int page_index=page_gap*coren;
 233   unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
 234   VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
 235   unsigned int * tmp_p = gccachepolicytbl;
 236   for(; page_index < page_index_end; page_index++) {
 237     bamboo_cache_policy_t policy = {0};
 238     unsigned int hottestcore = 0;
 239     unsigned int totalfreq = 0;
 240     unsigned int hotfreq = 0;
 241     CACHEADAPT_FIND_HOTTEST_CORE_W_TOTALFREQ(page_index,hottestcore,hotfreq,totalfreq);
 242     // Decide the cache strategy for this page
 243     // If decide to adapt a new cache strategy, write into the shared block of
 244     // the gcpolicytbl
 245     // Format: page start va + cache policy
 246     if(hotfreq != 0) {
 247       totalfreq=totalfreq>>GC_CACHE_ADAPT_DOMINATE_THRESHOLD;
 248       if((unsigned int)hotfreq < (unsigned int)totalfreq) {
 249         // use hfh
 250         policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
 251         /*unsigned int block = 0;
 252         BLOCKINDEX(block, (void *) page_sva);
 253         unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
 254         CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren);*/
 255       } else {
 256         // locally cache the page in the hottest core
 257         CACHEADAPT_POLICY_SET_HOST_CORE(policy, hottestcore);
 258       }
 259     }
 260     CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
 261     page_sva += BAMBOO_PAGE_SIZE;
 262   }
 263 }
 264
 265 unsigned int cacheAdapt_decision(int coren) {
 266   BAMBOO_CACHE_MF();
 267   // check the statistic data
 268   // for each page, decide the new cache strategy
 269 #ifdef GC_CACHE_ADAPT_POLICY1
 270   cacheAdapt_policy_h4h(coren);
 271 #elif defined GC_CACHE_ADAPT_POLICY2
 272   cacheAdapt_policy_local(coren);
 273 #elif defined GC_CACHE_ADAPT_POLICY3
 274   cacheAdapt_policy_hottest(coren);
 275 #elif defined GC_CACHE_ADAPT_POLICY4
 276   cacheAdapt_policy_dominate(coren);
 277 #endif
 278 }
 279
 280 // adapt the cache strategy for the mutator
 281 void cacheAdapt_mutator() {
 282   BAMBOO_CACHE_MF();
 283   // check the changes and adapt them
 284   unsigned int * tmp_p = gccachepolicytbl;
 285   unsigned int page_sva = gcbaseva;
 286   for(; page_sva<gctopva; page_sva+=BAMBOO_PAGE_SIZE) {
 287     // read out the policy
 288     bamboo_cache_policy_t policy = (bamboo_cache_policy_t)(*(tmp_p));
 289     // adapt the policy
 290     if(policy.word != 0) {
 291       bamboo_adapt_cache_policy(page_sva,policy,BAMBOO_PAGE_SIZE);
 292     }
 293     tmp_p += 1;
 294   }
 295 }
 296
 297 // Cache adapt phase process for clients
 298 void cacheAdapt_phase_client() {
 299   WAITFORGCPHASE(CACHEPOLICYPHASE);
 300   GC_PRINTF("Start cachepolicy phase\n");
 301   cacheAdapt_decision(BAMBOO_NUM_OF_CORE);
 302   //send init finish msg to core coordinator
 303   send_msg_2(STARTUPCORE, GCFINISHCACHEPOLICY, BAMBOO_NUM_OF_CORE);
 304   GC_PRINTF("Finish cachepolicy phase\n");
 305
 306   WAITFORGCPHASE(PREFINISHPHASE);
 307   GC_PRINTF("Start prefinish phase\n");
 308   // cache adapt phase
 309   cacheAdapt_mutator();
 310   cacheAdapt_gc(false);
 311   //send init finish msg to core coordinator
 312   send_msg_2(STARTUPCORE, GCFINISHPREF, BAMBOO_NUM_OF_CORE);
 313   GC_PRINTF("Finish prefinish phase\n");
 314   CACHEADAPT_SAMPLING_RESET();
 315   if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
 316     // zero out the gccachesamplingtbl
 317     BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
 318     BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,size_cachesamplingtbl_local_r);
 319   }
 320 }
 321
 322 extern unsigned long long gc_output_cache_policy_time;
 323
 324 // Cache adpat phase process for the master
 325 void cacheAdapt_phase_master() {
 326   GCPROFILE_ITEM();
 327   unsigned long long tmpt = BAMBOO_GET_EXE_TIME();
 328   CACHEADAPT_OUTPUT_CACHE_SAMPLING_R();
 329   gc_output_cache_policy_time += (BAMBOO_GET_EXE_TIME()-tmpt);
 330   // let all cores to parallelly process the revised profile data and decide
 331   // the cache policy for each page
 332   gc_status_info.gcphase = CACHEPOLICYPHASE;
 333   GC_SEND_MSG_1_TO_CLIENT(GCSTARTCACHEPOLICY);
 334   GC_PRINTF("Start cachepolicy phase \n");
 335   // cache adapt phase
 336   cacheAdapt_decision(BAMBOO_NUM_OF_CORE);
 337   GC_CHECK_ALL_CORE_STATUS();
 338   BAMBOO_CACHE_MF();
 339
 340   // let all cores to adopt new policies
 341   gc_status_info.gcphase = PREFINISHPHASE;
 342   // Note: all cores should flush their runtime data including non-gc cores
 343   GC_SEND_MSG_1_TO_CLIENT(GCSTARTPREF);
 344   GC_PRINTF("Start prefinish phase \n");
 345   // cache adapt phase
 346   cacheAdapt_mutator();
 347   cacheAdapt_gc(false);
 348   GC_CHECK_ALL_CORE_STATUS();
 349
 350   CACHEADAPT_SAMPLING_RESET();
 351   if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
 352     // zero out the gccachesamplingtbl
 353     BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
 354     BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,size_cachesamplingtbl_local_r);
 355     BAMBOO_MEMSET_WH(gccachepolicytbl,0,size_cachepolicytbl);
 356   }
 357 }
 358
 359 // output original cache sampling data for each page
 360 void gc_output_cache_sampling() {
 361   extern volatile bool gc_profile_flag;
 362   if(!gc_profile_flag) return;
 363   unsigned int page_index = 0;
 364   VA page_sva = 0;
 365   unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) >> (BAMBOO_PAGE_SIZE_BITS);
 366   for(page_index = 0; page_index < page_num; page_index++) {
 367     page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
 368     unsigned int block = 0;
 369     BLOCKINDEX(block, (void *) page_sva);
 370     unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
 371     //printf("%x,  %d,  %d,  ",(int)page_sva,page_index,coren);
 372     unsigned int * local_tbl = &gccachesamplingtbl[page_index*NUMCORESACTIVE];
 373     int accesscore = 0;
 374     for(int i = 0; i < NUMCORESACTIVE; i++) {
 375       int freq = *local_tbl;
 376       local_tbl++;
 377       if(freq != 0) {
 378         accesscore++;
 379         //printf("%d,  ", freq);
 380       }
 381     }
 382     if(accesscore!=0) {
 383       printf("%x,  %d,  %d,  ",(int)page_sva,page_index,coren);
 384       unsigned int * local_tbl = &gccachesamplingtbl[page_index*NUMCORESACTIVE];
 385       for(int i = 0; i < NUMCORESACTIVE; i++) {
 386         unsigned int freq = *local_tbl;
 387         local_tbl++;
 388         printf("%u,  ", freq);
 389       }
 390       printf("\n");
 391     }
 392     //printf("\n");
 393   }
 394   printf("=================\n");
 395 }
 396
 397 // output revised cache sampling data for each page after compaction
 398 void gc_output_cache_sampling_r() {
 399   extern volatile bool gc_profile_flag;
 400   if(!gc_profile_flag) return;
 401   // TODO summary data
 402   unsigned int sumdata[NUMCORESACTIVE][NUMCORESACTIVE];
 403   for(int i = 0; i < NUMCORESACTIVE; i++) {
 404     for(int j = 0; j < NUMCORESACTIVE; j++) {
 405       sumdata[i][j] = 0;
 406     }
 407   }
 408   tprintf("cache sampling_r \n");
 409   unsigned int page_index = 0;
 410   VA page_sva = 0;
 411   unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) >> (BAMBOO_PAGE_SIZE_BITS);
 412   for(page_index = 0; page_index < page_num; page_index++) {
 413     page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
 414     unsigned int block = 0;
 415     BLOCKINDEX(block, (void *)page_sva);
 416     unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
 417     //printf("%x,  %d,  %d,  ",(int)page_sva,page_index,coren);
 418     int accesscore = 0; // TODO
 419     unsigned int * local_tbl = &gccachesamplingtbl_r[page_index*NUMCORESACTIVE];
 420     for(int i = 0; i < NUMCORESACTIVE; i++) {
 421       unsigned int freq = *local_tbl;
 422       //printf("%d,  ", freq);
 423       if(freq != 0) {
 424         accesscore++;// TODO
 425       }
 426       local_tbl++;
 427     }
 428     if(accesscore!=0) {
 429       printf("%x,  %d,  %d,  ",(int)page_sva,page_index,coren);
 430       unsigned int * local_tbl = &gccachesamplingtbl_r[page_index*NUMCORESACTIVE];
 431       for(int i = 0; i < NUMCORESACTIVE; i++) {
 432         unsigned int freq = *local_tbl;
 433         printf("%u,  ", freq);
 434         sumdata[accesscore-1][i]+=freq;
 435         local_tbl++;
 436       }
 437       printf("\n");
 438     }
 439     //printf("\n");
 440   }
 441   printf("+++++\n");
 442   // TODO printout the summary data
 443   for(int i = 0; i < NUMCORESACTIVE; i++) {
 444     printf("%d  ", i);
 445     for(int j = 0; j < NUMCORESACTIVE; j++) {
 446       printf(" %u  ", sumdata[j][i]);
 447     }
 448     printf("\n");
 449   }
 450   printf("=================\n");
 451 }
 452 #endif // GC_CACHE_ADAPT