Robust/src/Runtime/bamboo/multicorecache.c

   1 #ifdef GC_CACHE_ADAPT
   2 #include "multicorecache.h"
   3 #include "multicoremsg.h"
   4 #include "multicoregc.h"
   5 #include "multicoregcprofile.h"
   6
   7 void cacheadapt_finish_compact(void *toptr) {
   8   unsigned int dstpage=((unsigned INTPTR)(toptr-gcbaseva))>>BAMBOO_PAGE_SIZE_BITS;
   9   unsigned int * newtable=&gccachesamplingtbl_r[dstpage*NUMCORESACTIVE];
  10
  11   for(int core = 0; core < NUMCORESACTIVE; core++) {
  12     (*newtable)=(*newtable)>>6;
  13     newtable++;
  14   }
  15 }
  16
  17 void cacheadapt_finish_src_page(void *srcptr, void *tostart, void *tofinish) {
  18   unsigned int srcpage=((unsigned INTPTR)(srcptr-gcbaseva))>>BAMBOO_PAGE_SIZE_BITS;
  19   unsigned int dstpage=((unsigned INTPTR)(tostart-gcbaseva))>>BAMBOO_PAGE_SIZE_BITS;
  20   unsigned int numbytes=tofinish-tostart;
  21
  22   unsigned int * oldtable=&gccachesamplingtbl[srcpage*NUMCORESACTIVE];
  23   unsigned int * newtable=&gccachesamplingtbl_r[dstpage*NUMCORESACTIVE];
  24
  25   unsigned int page64th=numbytes>>(BAMBOO_PAGE_SIZE_BITS-6);
  26
  27   for(int core = 0; core < NUMCORESACTIVE; core++) {
  28     (*newtable)+=page64th*(*oldtable);
  29     newtable++;
  30     oldtable++;
  31   }
  32 }
  33
  34 /* Bytes needed equal to zero is a special case...  It means that we should finish the dst page */
  35
  36 void cacheadapt_finish_dst_page(void *origptr, void *tostart, void *toptr, unsigned int bytesneeded) {
  37   unsigned int numbytes=toptr-tostart;
  38
  39   void *tobound=(void *)((((unsigned INTPTR)toptr-1)&~(BAMBOO_PAGE_SIZE-1))+BAMBOO_PAGE_SIZE);
  40   void *origbound=(void *)((((unsigned INTPTR)origptr)&~(BAMBOO_PAGE_SIZE-1))+BAMBOO_PAGE_SIZE);
  41
  42   unsigned int topage=((unsigned INTPTR)(toptr-1-gcbaseva))>>BAMBOO_PAGE_SIZE_BITS;
  43   unsigned int origpage=((unsigned INTPTR)(origptr-gcbaseva))>>BAMBOO_PAGE_SIZE_BITS;
  44
  45   unsigned int * totable=&gccachesamplingtbl_r[topage*NUMCORESACTIVE];
  46   unsigned int * origtable=&gccachesamplingtbl[origpage*NUMCORESACTIVE];
  47
  48   //handler
  49   unsigned int remaintobytes=(bytesneeded==0)?0:(tobound-toptr);
  50   unsigned int remainorigbytes=origbound-origptr;
  51
  52   do {
  53     //round source bytes down....don't want to close out page if not necessary
  54     remainorigbytes=(remainorigbytes>bytesneeded)?bytesneeded:remainorigbytes;
  55
  56     if (remaintobytes<=remainorigbytes) {
  57       //Need to close out to page
  58
  59       numbytes+=remaintobytes;
  60       unsigned int page64th=numbytes>>(BAMBOO_PAGE_SIZE_BITS-6);
  61
  62       for(int core = 0; core < NUMCORESACTIVE; core++) {
  63         (*totable)=(*totable+page64th*(*origtable))>>6;
  64         totable++;
  65         origtable++;
  66       }
  67       toptr+=remaintobytes;
  68       origptr+=remaintobytes;
  69       bytesneeded-=remaintobytes;
  70       topage++;//to page is definitely done
  71       tobound+=BAMBOO_PAGE_SIZE;
  72       origpage=((unsigned INTPTR)(origptr-gcbaseva))>>BAMBOO_PAGE_SIZE_BITS;//handle exact match case
  73       origbound=(void *) ((((unsigned INTPTR)origptr)&~(BAMBOO_PAGE_SIZE-1))+BAMBOO_PAGE_SIZE);
  74     } else {
  75       //Finishing off orig page
  76
  77       numbytes+=remainorigbytes;
  78       unsigned int page64th=numbytes>>(BAMBOO_PAGE_SIZE_BITS-6);
  79
  80       for(int core = 0; core < NUMCORESACTIVE; core++) {
  81         (*totable)+=page64th*(*origtable);
  82         totable++;
  83         origtable++;
  84       }
  85       toptr+=remainorigbytes;
  86       origptr+=remainorigbytes;
  87       bytesneeded-=remainorigbytes;
  88       origpage++;//just orig page is done
  89       origbound+=BAMBOO_PAGE_SIZE;
  90     }
  91     totable=&gccachesamplingtbl_r[topage*NUMCORESACTIVE];
  92     origtable=&gccachesamplingtbl[origpage*NUMCORESACTIVE];
  93
  94     remaintobytes=tobound-toptr;
  95     remainorigbytes=origbound-origptr;
  96
  97     numbytes=0;
  98   } while(bytesneeded!=0);
  99 }
 100
 101 // prepare for cache adaption:
 102 //   -- flush the shared heap
 103 //   -- clean dtlb entries
 104 //   -- change cache strategy
 105 void cacheAdapt_gc(bool isgccachestage) {
 106   // flush the shared heap
 107   BAMBOO_CACHE_FLUSH_L2();
 108
 109   // clean the dtlb entries
 110   BAMBOO_CLEAN_DTLB();
 111
 112   if(isgccachestage) {
 113     bamboo_install_dtlb_handler_for_gc();
 114   } else {
 115     bamboo_install_dtlb_handler_for_mutator();
 116   }
 117 }
 118
 119 // the master core decides how to adapt cache strategy for the mutator
 120 // according to collected statistic data
 121
 122 // find the core that accesses the page #page_index most
 123 #define CACHEADAPT_FIND_HOTTEST_CORE(page_index,hottestcore,hotfreq) \
 124   { \
 125     unsigned int *local_tbl=&gccachesamplingtbl_r[page_index*NUMCORESACTIVE];   \
 126     for(int i = 0; i < NUMCORESACTIVE; i++) { \
 127       int freq = *local_tbl; \
 128       local_tbl++; \
 129       if(hotfreq < freq) { \
 130         hotfreq = freq; \
 131         hottestcore = i; \
 132       } \
 133     } \
 134   }
 135 // find the core that accesses the page #page_index most and comput the total
 136 // access time of the page at the same time
 137 #define CACHEADAPT_FIND_HOTTEST_CORE_W_TOTALFREQ(page_index,hottestcore,hotfreq,totalfreq) \
 138   { \
 139     unsigned int *local_tbl=&gccachesamplingtbl_r[page_index*NUMCORESACTIVE];   \
 140     for(int i = 0; i < NUMCORESACTIVE; i++) { \
 141       int freq = *local_tbl; \
 142       local_tbl++; \
 143       totalfreq += freq; \
 144       if(hotfreq < freq) { \
 145         hotfreq = freq; \
 146         hottestcore = i; \
 147       } \
 148     } \
 149   }
 150 // Set the policy as hosted by coren
 151 // NOTE: (x,y) should be changed to (x+1, y+1)!!!
 152 #define CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren) \
 153   { \
 154     (policy).cache_mode = BAMBOO_CACHE_MODE_COORDS; \
 155     (policy).lotar_x = bamboo_cpu2coords[2*(coren)]+1; \
 156     (policy).lotar_y = bamboo_cpu2coords[2*(coren)+1]+1; \
 157   }
 158 // store the new policy information at tmp_p in gccachepolicytbl
 159 #define CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy) \
 160   { \
 161     ((int*)(tmp_p))[page_index] = (policy).word; \
 162   }
 163
 164 // make all pages hfh
 165 void cacheAdapt_policy_h4h(int coren){
 166   unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)>>(BAMBOO_PAGE_SIZE_BITS);
 167   unsigned int page_gap=page_num/NUMCORESACTIVE;
 168   unsigned int page_index=page_gap*coren;
 169   unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
 170   VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
 171   unsigned int * tmp_p = gccachepolicytbl;
 172   for(; page_index < page_index_end; page_index++) {
 173     bamboo_cache_policy_t policy = {0};
 174     policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
 175     CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
 176     page_sva += BAMBOO_PAGE_SIZE;
 177   }
 178 }
 179
 180 // make all pages local as non-cache-adaptable gc local mode
 181 void cacheAdapt_policy_local(int coren){
 182   unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)>>(BAMBOO_PAGE_SIZE_BITS);
 183   unsigned int page_gap=page_num/NUMCORESACTIVE;
 184   unsigned int page_index=page_gap*coren;
 185   unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
 186   VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
 187   unsigned int * tmp_p = gccachepolicytbl;
 188   for(; page_index < page_index_end; page_index++) {
 189     bamboo_cache_policy_t policy = {0};
 190     unsigned int block = 0;
 191     BLOCKINDEX(block, (void *) page_sva);
 192     unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
 193     CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren);
 194     CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
 195     page_sva += BAMBOO_PAGE_SIZE;
 196   }
 197 }
 198
 199 void cacheAdapt_policy_hottest(int coren){
 200   unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)>>(BAMBOO_PAGE_SIZE_BITS);
 201   unsigned int page_gap=page_num/NUMCORESACTIVE;
 202   unsigned int page_index=page_gap*coren;
 203   unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
 204   VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
 205   unsigned int * tmp_p = gccachepolicytbl;
 206   for(; page_index < page_index_end; page_index++) {
 207     bamboo_cache_policy_t policy = {0};
 208     unsigned int hottestcore = 0;
 209     unsigned int hotfreq = 0;
 210     CACHEADAPT_FIND_HOTTEST_CORE(page_index,hottestcore,hotfreq);
 211     // TODO
 212     // Decide the cache strategy for this page
 213     // If decide to adapt a new cache strategy, write into the shared block of
 214     // the gcsharedsamplingtbl. The mem recording information that has been
 215     // written is enough to hold the information.
 216     // Format: page start va + cache strategy(hfh/(host core+[x,y]))
 217     if(hotfreq != 0) {
 218       // locally cache the page in the hottest core
 219       CACHEADAPT_POLICY_SET_HOST_CORE(policy, hottestcore);
 220     } else {
 221       // reset it to be homed by its host core
 222       unsigned int block = 0;
 223       BLOCKINDEX(block, (void *) page_sva);
 224       unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
 225       CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren);
 226     }
 227     CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
 228     page_sva += BAMBOO_PAGE_SIZE;
 229   }
 230 }
 231
 232 #define GC_CACHE_ADAPT_DOMINATE_THRESHOLD  2
 233 // cache the page on the core that accesses it the most if that core accesses
 234 // it more than (GC_CACHE_ADAPT_DOMINATE_THRESHOLD)% of the total.  Otherwise,
 235 // h4h the page.
 236 void cacheAdapt_policy_dominate(int coren){
 237   unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)>>(BAMBOO_PAGE_SIZE_BITS);
 238   unsigned int page_gap=page_num/NUMCORESACTIVE;
 239   unsigned int page_index=page_gap*coren;
 240   unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
 241   VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
 242   unsigned int * tmp_p = gccachepolicytbl;
 243   for(; page_index < page_index_end; page_index++) {
 244     bamboo_cache_policy_t policy = {0};
 245     unsigned int hottestcore = 0;
 246     unsigned int totalfreq = 0;
 247     unsigned int hotfreq = 0;
 248     CACHEADAPT_FIND_HOTTEST_CORE_W_TOTALFREQ(page_index,hottestcore,hotfreq,totalfreq);
 249     // Decide the cache strategy for this page
 250     // If decide to adapt a new cache strategy, write into the shared block of
 251     // the gcpolicytbl
 252     // Format: page start va + cache policy
 253     if(hotfreq != 0) {
 254       totalfreq=totalfreq>>GC_CACHE_ADAPT_DOMINATE_THRESHOLD;
 255       if(hotfreq < totalfreq) {
 256         // use hfh
 257         policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
 258         /*unsigned int block = 0;
 259         BLOCKINDEX(block, (void *) page_sva);
 260         unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
 261         CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren);*/
 262       } else {
 263         // locally cache the page in the hottest core
 264         CACHEADAPT_POLICY_SET_HOST_CORE(policy, hottestcore);
 265       }
 266     } else {
 267       // reset it to be homed by its host core
 268       unsigned int block = 0;
 269       BLOCKINDEX(block, (void *) page_sva);
 270       unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
 271       CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren);
 272     }
 273     CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
 274     page_sva += BAMBOO_PAGE_SIZE;
 275   }
 276 }
 277
 278 unsigned int cacheAdapt_decision(int coren) {
 279   BAMBOO_CACHE_MF();
 280   // check the statistic data
 281   // for each page, decide the new cache strategy
 282 #ifdef GC_CACHE_ADAPT_POLICY1
 283   //  cacheAdapt_policy_h4h(coren);
 284 #elif defined(GC_CACHE_ADAPT_POLICY2)
 285   //cacheAdapt_policy_local(coren);
 286 #elif defined(GC_CACHE_ADAPT_POLICY3)
 287   //cacheAdapt_policy_hottest(coren);
 288 #elif defined(GC_CACHE_ADAPT_POLICY4)
 289   cacheAdapt_policy_dominate(coren);
 290 #endif
 291 }
 292
 293 // adapt the cache strategy for the mutator
 294 void cacheAdapt_mutator() {
 295 #if defined(GC_CACHE_ADAPT_POLICY4)
 296   BAMBOO_CACHE_MF();
 297   // check the changes and adapt them
 298   unsigned int * tmp_p = gccachepolicytbl;
 299   unsigned int page_sva = gcbaseva;
 300   for(; page_sva<gctopva; page_sva+=BAMBOO_PAGE_SIZE) {
 301     // read out the policy
 302     bamboo_cache_policy_t policy = (bamboo_cache_policy_t)(*(tmp_p));
 303     // adapt the policy
 304     if(policy.word != 0) {
 305       bamboo_adapt_cache_policy(page_sva,policy,BAMBOO_PAGE_SIZE);
 306     }
 307     tmp_p += 1;
 308   }
 309 #endif
 310 }
 311
 312 // Cache adapt phase process for clients
 313 void cacheAdapt_phase_client() {
 314   WAITFORGCPHASE(CACHEPOLICYPHASE);
 315   GC_PRINTF("Start cachepolicy phase\n");
 316   cacheAdapt_decision(BAMBOO_NUM_OF_CORE);
 317   //send init finish msg to core coordinator
 318   send_msg_2(STARTUPCORE, GCFINISHCACHEPOLICY, BAMBOO_NUM_OF_CORE);
 319   GC_PRINTF("Finish cachepolicy phase\n");
 320
 321   WAITFORGCPHASE(PREFINISHPHASE);
 322   GC_PRINTF("Start prefinish phase\n");
 323   // cache adapt phase
 324   cacheAdapt_mutator();
 325   cacheAdapt_gc(false);
 326   //send init finish msg to core coordinator
 327   send_msg_2(STARTUPCORE, GCFINISHPREF, BAMBOO_NUM_OF_CORE);
 328   GC_PRINTF("Finish prefinish phase\n");
 329
 330 #if defined(GC_CACHE_ADAPT_POLICY4)
 331   CACHEADAPT_SAMPLING_RESET();
 332   if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
 333     // zero out the gccachesamplingtbl
 334     BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
 335     BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,size_cachesamplingtbl_local_r);
 336   }
 337 #endif
 338 }
 339
 340 extern unsigned long long gc_output_cache_policy_time;
 341
 342 // Cache adpat phase process for the master
 343 void cacheAdapt_phase_master() {
 344   GCPROFILE_ITEM_MASTER();
 345   unsigned long long tmpt = BAMBOO_GET_EXE_TIME();
 346   CACHEADAPT_OUTPUT_CACHE_SAMPLING_R();
 347   gc_output_cache_policy_time += (BAMBOO_GET_EXE_TIME()-tmpt);
 348   // let all cores to parallelly process the revised profile data and decide
 349   // the cache policy for each page
 350   gc_status_info.gcphase = CACHEPOLICYPHASE;
 351   GC_SEND_MSG_1_TO_CLIENT(GCSTARTCACHEPOLICY);
 352   GC_PRINTF("Start cachepolicy phase \n");
 353   // cache adapt phase
 354   cacheAdapt_decision(BAMBOO_NUM_OF_CORE);
 355   GC_CHECK_ALL_CORE_STATUS();
 356   BAMBOO_CACHE_MF();
 357
 358   // let all cores to adopt new policies
 359   gc_status_info.gcphase = PREFINISHPHASE;
 360   // Note: all cores should flush their runtime data including non-gc cores
 361   GC_SEND_MSG_1_TO_CLIENT(GCSTARTPREF);
 362   GC_PRINTF("Start prefinish phase \n");
 363   // cache adapt phase
 364   cacheAdapt_mutator();
 365   cacheAdapt_gc(false);
 366   GC_CHECK_ALL_CORE_STATUS();
 367
 368 #if defined(GC_CACHE_ADAPT_POLICY4)
 369   CACHEADAPT_SAMPLING_RESET();
 370   if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
 371     // zero out the gccachesamplingtbl
 372     BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
 373     BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,size_cachesamplingtbl_local_r);
 374     BAMBOO_MEMSET_WH(gccachepolicytbl,0,size_cachepolicytbl);
 375   }
 376 #endif
 377 }
 378
 379 // output original cache sampling data for each page
 380 void gc_output_cache_sampling() {
 381   extern volatile bool gc_profile_flag;
 382   if(!gc_profile_flag) return;
 383   unsigned int page_index = 0;
 384   VA page_sva = 0;
 385   unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) >> (BAMBOO_PAGE_SIZE_BITS);
 386   for(page_index = 0; page_index < page_num; page_index++) {
 387     page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
 388     unsigned int block = 0;
 389     BLOCKINDEX(block, (void *) page_sva);
 390     unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
 391     //printf("%x,  %d,  %d,  ",(int)page_sva,page_index,coren);
 392     unsigned int * local_tbl = &gccachesamplingtbl[page_index*NUMCORESACTIVE];
 393     int accesscore = 0;
 394     for(int i = 0; i < NUMCORESACTIVE; i++) {
 395       int freq = *local_tbl;
 396       local_tbl++;
 397       if(freq != 0) {
 398         accesscore++;
 399         //printf("%d,  ", freq);
 400       }
 401     }
 402     if(accesscore!=0) {
 403       printf("%x,  %d,  %d,  ",(int)page_sva,page_index,coren);
 404       unsigned int * local_tbl = &gccachesamplingtbl[page_index*NUMCORESACTIVE];
 405       for(int i = 0; i < NUMCORESACTIVE; i++) {
 406         unsigned int freq = *local_tbl;
 407         local_tbl++;
 408         printf("%u,  ", freq);
 409       }
 410       printf("\n");
 411     }
 412     //printf("\n");
 413   }
 414   printf("=================\n");
 415 }
 416
 417 // output revised cache sampling data for each page after compaction
 418 void gc_output_cache_sampling_r() {
 419   extern volatile bool gc_profile_flag;
 420   if(!gc_profile_flag) return;
 421   // TODO summary data
 422   unsigned int sumdata[NUMCORESACTIVE][NUMCORESACTIVE];
 423   for(int i = 0; i < NUMCORESACTIVE; i++) {
 424     for(int j = 0; j < NUMCORESACTIVE; j++) {
 425       sumdata[i][j] = 0;
 426     }
 427   }
 428   tprintf("cache sampling_r \n");
 429   unsigned int page_index = 0;
 430   VA page_sva = 0;
 431   unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) >> (BAMBOO_PAGE_SIZE_BITS);
 432   for(page_index = 0; page_index < page_num; page_index++) {
 433     page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
 434     unsigned int block = 0;
 435     BLOCKINDEX(block, (void *)page_sva);
 436     unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
 437     //printf("%x,  %d,  %d,  ",(int)page_sva,page_index,coren);
 438     int accesscore = 0; // TODO
 439     unsigned int * local_tbl = &gccachesamplingtbl_r[page_index*NUMCORESACTIVE];
 440     for(int i = 0; i < NUMCORESACTIVE; i++) {
 441       unsigned int freq = *local_tbl;
 442       //printf("%d,  ", freq);
 443       if(freq != 0) {
 444         accesscore++;// TODO
 445       }
 446       local_tbl++;
 447     }
 448     if(accesscore!=0) {
 449       printf("%x,  %d,  %d,  ",(int)page_sva,page_index,coren);
 450       unsigned int * local_tbl = &gccachesamplingtbl_r[page_index*NUMCORESACTIVE];
 451       for(int i = 0; i < NUMCORESACTIVE; i++) {
 452         unsigned int freq = *local_tbl;
 453         printf("%u,  ", freq);
 454         sumdata[accesscore-1][i]+=freq;
 455         local_tbl++;
 456       }
 457       printf("\n");
 458     }
 459     //printf("\n");
 460   }
 461   printf("+++++\n");
 462   // TODO printout the summary data
 463   for(int i = 0; i < NUMCORESACTIVE; i++) {
 464     printf("%d  ", i);
 465     for(int j = 0; j < NUMCORESACTIVE; j++) {
 466       printf(" %u  ", sumdata[j][i]);
 467     }
 468     printf("\n");
 469   }
 470   printf("=================\n");
 471 }
 472 #endif // GC_CACHE_ADAPT