Missing changes

[IRC.git] / Robust / src / Runtime / bamboo / multicorecache.c
diff --git a/Robust/src/Runtime/bamboo/multicorecache.c b/Robust/src/Runtime/bamboo/multicorecache.c

index b3bfba1e4e980b6c02c20f99d6afd94d71eff209..6c41b9f49adfecd46f5d7cee91c04dfd12fb7c66 100644 (file)
--- a/Robust/src/Runtime/bamboo/multicorecache.c
+++ b/Robust/src/Runtime/bamboo/multicorecache.c
@@ -1,23 +1,132 @@
  #ifdef GC_CACHE_ADAPT
  #include "multicorecache.h"
  #include "multicoremsg.h"
+#include "multicoregc.h"
  #include "multicoregcprofile.h"
  
-gc_cache_revise_info_t gc_cache_revise_information;
+void cacheadapt_finish_compact(void *toptr) {
+  unsigned int dstpage=((unsigned INTPTR)(toptr-gcbaseva))>>BAMBOO_PAGE_SIZE_BITS;
+  unsigned int * newtable=&gccachesamplingtbl_r[dstpage*NUMCORESACTIVE];
+
+  for(int core = 0; core < NUMCORESACTIVE; core++) {
+    (*newtable)=(*newtable)>>6;
+    newtable++;
+  }  
+}
+
+void cacheadapt_finish_src_page(void *srcptr, void *tostart, void *tofinish) {
+  unsigned int srcpage=((unsigned INTPTR)(srcptr-gcbaseva))>>BAMBOO_PAGE_SIZE_BITS;
+  unsigned int dstpage=((unsigned INTPTR)(tostart-gcbaseva))>>BAMBOO_PAGE_SIZE_BITS;
+  unsigned int numbytes=tofinish-tostart;
+  
+  unsigned int * oldtable=&gccachesamplingtbl[srcpage*NUMCORESACTIVE];
+  unsigned int * newtable=&gccachesamplingtbl_r[dstpage*NUMCORESACTIVE];
+  
+  unsigned int page64th=numbytes>>(BAMBOO_PAGE_SIZE_BITS-6);
+
+  for(int core = 0; core < NUMCORESACTIVE; core++) {
+    (*newtable)+=page64th*(*oldtable);
+    newtable++;
+    oldtable++;
+  }  
+}
+
+/* Bytes needed equal to zero is a special case...  It means that we should finish the dst page */
+
+void cacheadapt_finish_dst_page(void *origptr, void *tostart, void *toptr, unsigned int bytesneeded) {
+  unsigned int numbytes=toptr-tostart;
+
+  void *tobound=(void *)((((unsigned INTPTR)toptr-1)&~(BAMBOO_PAGE_SIZE-1))+BAMBOO_PAGE_SIZE);
+  void *origbound=(void *)((((unsigned INTPTR)origptr)&~(BAMBOO_PAGE_SIZE-1))+BAMBOO_PAGE_SIZE);
+  
+  unsigned int topage=((unsigned INTPTR)(toptr-1-gcbaseva))>>BAMBOO_PAGE_SIZE_BITS; 
+  unsigned int origpage=((unsigned INTPTR)(origptr-gcbaseva))>>BAMBOO_PAGE_SIZE_BITS;
+
+  unsigned int * totable=&gccachesamplingtbl_r[topage*NUMCORESACTIVE];
+  unsigned int * origtable=&gccachesamplingtbl[origpage*NUMCORESACTIVE];
+
+  //handler
+  unsigned int remaintobytes=(bytesneeded==0)?0:(tobound-toptr);
+  unsigned int remainorigbytes=origbound-origptr;
+
+  do {
+    //round source bytes down....don't want to close out page if not necessary
+    remainorigbytes=(remainorigbytes>bytesneeded)?bytesneeded:remainorigbytes;
+
+    if (remaintobytes<=remainorigbytes) {
+      //Need to close out to page
+
+      numbytes+=remaintobytes;
+      unsigned int page64th=numbytes>>(BAMBOO_PAGE_SIZE_BITS-6);
+
+      for(int core = 0; core < NUMCORESACTIVE; core++) {
+       (*totable)=(*totable+page64th*(*origtable))>>6;
+       totable++;
+       origtable++;
+      }
+      toptr+=remaintobytes;
+      origptr+=remaintobytes;
+      bytesneeded-=remaintobytes;
+      topage++;//to page is definitely done
+      tobound+=BAMBOO_PAGE_SIZE;
+      origpage=((unsigned INTPTR)(origptr-gcbaseva))>>BAMBOO_PAGE_SIZE_BITS;//handle exact match case
+      origbound=(void *) ((((unsigned INTPTR)origptr)&~(BAMBOO_PAGE_SIZE-1))+BAMBOO_PAGE_SIZE);
+    } else {
+      //Finishing off orig page
+
+      numbytes+=remainorigbytes;
+      unsigned int page64th=numbytes>>(BAMBOO_PAGE_SIZE_BITS-6);
+      
+      for(int core = 0; core < NUMCORESACTIVE; core++) {
+       (*totable)+=page64th*(*origtable);
+       totable++;
+       origtable++;
+      }
+      toptr+=remainorigbytes;
+      origptr+=remainorigbytes;
+      bytesneeded-=remainorigbytes;
+      origpage++;//just orig page is done
+      origbound+=BAMBOO_PAGE_SIZE;
+    }
+    totable=&gccachesamplingtbl_r[topage*NUMCORESACTIVE];
+    origtable=&gccachesamplingtbl[origpage*NUMCORESACTIVE];
+    
+    remaintobytes=tobound-toptr;
+    remainorigbytes=origbound-origptr;
+    
+    numbytes=0;
+  } while(bytesneeded!=0);
+}
  
  // prepare for cache adaption:
  //   -- flush the shared heap
  //   -- clean dtlb entries
  //   -- change cache strategy
  void cacheAdapt_gc(bool isgccachestage) {
+#ifdef GC_CACHE_COHERENT_ON
+  if(!isgccachestage) {
+    // get out of GC
+#if (defined(GC_CACHE_ADAPT_POLICY4)||defined(GC_CACHE_ADAPT_POLICY3))
+    // flush the shared heap
+    BAMBOO_CACHE_FLUSH_L2();
+
+    // clean the dtlb entries
+    BAMBOO_CLEAN_DTLB();
+#endif
+  } 
+#else
    // flush the shared heap
    BAMBOO_CACHE_FLUSH_L2();
  
    // clean the dtlb entries
    BAMBOO_CLEAN_DTLB();
  
-  // change the cache strategy
-  gccachestage = isgccachestage;
+  if(isgccachestage) {
+    bamboo_install_dtlb_handler_for_gc();
+  } else {
+    bamboo_install_dtlb_handler_for_mutator();
+  }
+#endif
  } 
  
  // the master core decides how to adapt cache strategy for the mutator 
@@ -26,10 +135,10 @@ void cacheAdapt_gc(bool isgccachestage) {
  // find the core that accesses the page #page_index most
  #define CACHEADAPT_FIND_HOTTEST_CORE(page_index,hottestcore,hotfreq) \
    { \
-    int *local_tbl=&gccachesamplingtbl_r[page_index]; \
+    unsigned int *local_tbl=&gccachesamplingtbl_r[page_index*NUMCORESACTIVE];  \
      for(int i = 0; i < NUMCORESACTIVE; i++) { \
        int freq = *local_tbl; \
-      local_tbl=(int *)(((void *)local_tbl)+size_cachesamplingtbl_local_r); \
+      local_tbl++; \
        if(hotfreq < freq) { \
          hotfreq = freq; \
          hottestcore = i; \
@@ -40,10 +149,10 @@ void cacheAdapt_gc(bool isgccachestage) {
  // access time of the page at the same time
  #define CACHEADAPT_FIND_HOTTEST_CORE_W_TOTALFREQ(page_index,hottestcore,hotfreq,totalfreq) \
    { \
-    int *local_tbl=&gccachesamplingtbl_r[page_index]; \
+    unsigned int *local_tbl=&gccachesamplingtbl_r[page_index*NUMCORESACTIVE];  \
      for(int i = 0; i < NUMCORESACTIVE; i++) { \
        int freq = *local_tbl; \
-      local_tbl=(int *)(((void *)local_tbl)+size_cachesamplingtbl_local_r); \
+      local_tbl++; \
        totalfreq += freq; \
        if(hotfreq < freq) { \
          hotfreq = freq; \
@@ -67,12 +176,12 @@ void cacheAdapt_gc(bool isgccachestage) {
  
  // make all pages hfh
  void cacheAdapt_policy_h4h(int coren){
-  unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)/(BAMBOO_PAGE_SIZE);
+  unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)>>(BAMBOO_PAGE_SIZE_BITS);
    unsigned int page_gap=page_num/NUMCORESACTIVE;
    unsigned int page_index=page_gap*coren;
    unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
    VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
-  int * tmp_p = gccachepolicytbl;
+  unsigned int * tmp_p = gccachepolicytbl;
    for(; page_index < page_index_end; page_index++) {
      bamboo_cache_policy_t policy = {0};
      policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
@@ -83,12 +192,12 @@ void cacheAdapt_policy_h4h(int coren){
  
  // make all pages local as non-cache-adaptable gc local mode
  void cacheAdapt_policy_local(int coren){
-  unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)/(BAMBOO_PAGE_SIZE);
+  unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)>>(BAMBOO_PAGE_SIZE_BITS);
    unsigned int page_gap=page_num/NUMCORESACTIVE;
    unsigned int page_index=page_gap*coren;
    unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
    VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
-  int * tmp_p = gccachepolicytbl;
+  unsigned int * tmp_p = gccachepolicytbl;
    for(; page_index < page_index_end; page_index++) {
      bamboo_cache_policy_t policy = {0};
      unsigned int block = 0;
@@ -101,12 +210,12 @@ void cacheAdapt_policy_local(int coren){
  } 
  
  void cacheAdapt_policy_hottest(int coren){
-  unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)/(BAMBOO_PAGE_SIZE);
+  unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)>>(BAMBOO_PAGE_SIZE_BITS);
    unsigned int page_gap=page_num/NUMCORESACTIVE;
    unsigned int page_index=page_gap*coren;
    unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
    VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
-  int * tmp_p = gccachepolicytbl;
+  unsigned int * tmp_p = gccachepolicytbl;
    for(; page_index < page_index_end; page_index++) {
      bamboo_cache_policy_t policy = {0};
      unsigned int hottestcore = 0;
@@ -121,23 +230,29 @@ void cacheAdapt_policy_hottest(int coren){
      if(hotfreq != 0) {
        // locally cache the page in the hottest core
        CACHEADAPT_POLICY_SET_HOST_CORE(policy, hottestcore);
+    } else {
+      // reset it to be homed by its host core
+      unsigned int block = 0;
+      BLOCKINDEX(block, (void *) page_sva);
+      unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
+      CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren);
      }
      CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
      page_sva += BAMBOO_PAGE_SIZE;
    }
  } 
  
-#define GC_CACHE_ADAPT_DOMINATE_THRESHOLD  1
+#define GC_CACHE_ADAPT_DOMINATE_THRESHOLD  2
  // cache the page on the core that accesses it the most if that core accesses 
  // it more than (GC_CACHE_ADAPT_DOMINATE_THRESHOLD)% of the total.  Otherwise,
  // h4h the page.
  void cacheAdapt_policy_dominate(int coren){
-  unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)/(BAMBOO_PAGE_SIZE);
+  unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)>>(BAMBOO_PAGE_SIZE_BITS);
    unsigned int page_gap=page_num/NUMCORESACTIVE;
    unsigned int page_index=page_gap*coren;
    unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
    VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
-  int * tmp_p = gccachepolicytbl;
+  unsigned int * tmp_p = gccachepolicytbl;
    for(; page_index < page_index_end; page_index++) {
      bamboo_cache_policy_t policy = {0};
      unsigned int hottestcore = 0;
@@ -150,17 +265,23 @@ void cacheAdapt_policy_dominate(int coren){
      // Format: page start va + cache policy
      if(hotfreq != 0) {
        totalfreq=totalfreq>>GC_CACHE_ADAPT_DOMINATE_THRESHOLD;
-      if((unsigned int)hotfreq < (unsigned int)totalfreq) {
+      if(hotfreq < totalfreq) {
          // use hfh
-        //policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
-        unsigned int block = 0;
+        policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
+        /*unsigned int block = 0;
          BLOCKINDEX(block, (void *) page_sva);
          unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
-        CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren);
+        CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren);*/
        } else {
          // locally cache the page in the hottest core
          CACHEADAPT_POLICY_SET_HOST_CORE(policy, hottestcore);
        }     
+    } else {
+      // reset it to be homed by its host core
+      unsigned int block = 0;
+      BLOCKINDEX(block, (void *) page_sva);
+      unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
+      CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren);
      }
      CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
      page_sva += BAMBOO_PAGE_SIZE;
@@ -172,21 +293,22 @@ unsigned int cacheAdapt_decision(int coren) {
    // check the statistic data
    // for each page, decide the new cache strategy
  #ifdef GC_CACHE_ADAPT_POLICY1
-  cacheAdapt_policy_h4h(coren);
-#elif defined GC_CACHE_ADAPT_POLICY2
-  cacheAdapt_policy_local(coren);
-#elif defined GC_CACHE_ADAPT_POLICY3
+  //  cacheAdapt_policy_h4h(coren);
+#elif defined(GC_CACHE_ADAPT_POLICY2)
+  //cacheAdapt_policy_local(coren);
+#elif defined(GC_CACHE_ADAPT_POLICY3)
    cacheAdapt_policy_hottest(coren);
-#elif defined GC_CACHE_ADAPT_POLICY4
+#elif defined(GC_CACHE_ADAPT_POLICY4)
    cacheAdapt_policy_dominate(coren);
  #endif
  }
  
  // adapt the cache strategy for the mutator
  void cacheAdapt_mutator() {
+#if (defined(GC_CACHE_ADAPT_POLICY4)||defined(GC_CACHE_ADAPT_POLICY3))
    BAMBOO_CACHE_MF();
    // check the changes and adapt them
-  int * tmp_p = gccachepolicytbl;
+  unsigned int * tmp_p = gccachepolicytbl;
    unsigned int page_sva = gcbaseva;
    for(; page_sva<gctopva; page_sva+=BAMBOO_PAGE_SIZE) {
      // read out the policy
@@ -197,6 +319,7 @@ void cacheAdapt_mutator() {
      }
      tmp_p += 1;
    }
+#endif
  }
  
  // Cache adapt phase process for clients
@@ -216,19 +339,22 @@ void cacheAdapt_phase_client() {
    //send init finish msg to core coordinator
    send_msg_2(STARTUPCORE, GCFINISHPREF, BAMBOO_NUM_OF_CORE);
    GC_PRINTF("Finish prefinish phase\n");
-  CACHEADAPT_SAMPING_RESET();
+
+#if (defined(GC_CACHE_ADAPT_POLICY4)||defined(GC_CACHE_ADAPT_POLICY3))
+  CACHEADAPT_SAMPLING_RESET();
    if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
      // zero out the gccachesamplingtbl
      BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);  
      BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,size_cachesamplingtbl_local_r);
    }
+#endif
  }
  
  extern unsigned long long gc_output_cache_policy_time;
  
  // Cache adpat phase process for the master
  void cacheAdapt_phase_master() {
-  GCPROFILE_ITEM();
+  GCPROFILE_ITEM_MASTER();
    unsigned long long tmpt = BAMBOO_GET_EXE_TIME();
    CACHEADAPT_OUTPUT_CACHE_SAMPLING_R();
    gc_output_cache_policy_time += (BAMBOO_GET_EXE_TIME()-tmpt);
@@ -251,45 +377,60 @@ void cacheAdapt_phase_master() {
    cacheAdapt_mutator();
    cacheAdapt_gc(false);
    GC_CHECK_ALL_CORE_STATUS();
-
-  CACHEADAPT_SAMPING_RESET();
+  
+#if (defined(GC_CACHE_ADAPT_POLICY4)||defined(GC_CACHE_ADAPT_POLICY3))
+  CACHEADAPT_SAMPLING_RESET();
    if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
      // zero out the gccachesamplingtbl
      BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
      BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,size_cachesamplingtbl_local_r);
      BAMBOO_MEMSET_WH(gccachepolicytbl,0,size_cachepolicytbl);
    }
+#endif
  }
  
  // output original cache sampling data for each page
  void gc_output_cache_sampling() {
-  //extern volatile bool gc_profile_flag;
-  //if(!gc_profile_flag) return;
+  extern volatile bool gc_profile_flag;
+  if(!gc_profile_flag) return;
    unsigned int page_index = 0;
    VA page_sva = 0;
-  unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
+  unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) >> (BAMBOO_PAGE_SIZE_BITS);
    for(page_index = 0; page_index < page_num; page_index++) {
      page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
      unsigned int block = 0;
      BLOCKINDEX(block, (void *) page_sva);
      unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
-    printf("%x,  %d,  %d,  ",(int)page_sva,page_index,coren);
+    //printf("%x,  %d,  %d,  ",(int)page_sva,page_index,coren);
+    unsigned int * local_tbl = &gccachesamplingtbl[page_index*NUMCORESACTIVE];
+    int accesscore = 0;
      for(int i = 0; i < NUMCORESACTIVE; i++) {
-      int * local_tbl = (int *)((void *)gccachesamplingtbl+size_cachesamplingtbl_local*i);
-      int freq = local_tbl[page_index];
-      //if(freq != 0) {
-        printf("%d,  ", freq);
-      //}
+      int freq = *local_tbl;
+      local_tbl++;
+      if(freq != 0) {
+        accesscore++;
+        //printf("%d,  ", freq);
+      }
      }
-    printf("\n");
+    if(accesscore!=0) {
+      printf("%x,  %d,  %d,  ",(int)page_sva,page_index,coren);
+      unsigned int * local_tbl = &gccachesamplingtbl[page_index*NUMCORESACTIVE];
+      for(int i = 0; i < NUMCORESACTIVE; i++) {
+        unsigned int freq = *local_tbl;
+        local_tbl++;
+        printf("%u,  ", freq);
+      }
+      printf("\n");
+    }
+    //printf("\n");
    }
    printf("=================\n");
  } 
  
  // output revised cache sampling data for each page after compaction
  void gc_output_cache_sampling_r() {
-  //extern volatile bool gc_profile_flag;
-  //if(!gc_profile_flag) return;
+  extern volatile bool gc_profile_flag;
+  if(!gc_profile_flag) return;
    // TODO summary data
    unsigned int sumdata[NUMCORESACTIVE][NUMCORESACTIVE];
    for(int i = 0; i < NUMCORESACTIVE; i++) {
@@ -300,37 +441,42 @@ void gc_output_cache_sampling_r() {
    tprintf("cache sampling_r \n");
    unsigned int page_index = 0;
    VA page_sva = 0;
-  unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
+  unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) >> (BAMBOO_PAGE_SIZE_BITS);
    for(page_index = 0; page_index < page_num; page_index++) {
      page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
      unsigned int block = 0;
      BLOCKINDEX(block, (void *)page_sva);
      unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
-    printf(" %x,  %d,  %d,  ",(int)page_sva,page_index,coren);
+    //printf("%x,  %d,  %d,  ",(int)page_sva,page_index,coren);
      int accesscore = 0; // TODO
+    unsigned int * local_tbl = &gccachesamplingtbl_r[page_index*NUMCORESACTIVE];
      for(int i = 0; i < NUMCORESACTIVE; i++) {
-      int * local_tbl = (int *)((void *)gccachesamplingtbl_r+size_cachesamplingtbl_local_r*i);
-      int freq = local_tbl[page_index]/BAMBOO_PAGE_SIZE;
-      printf("%d,  ", freq);
+      unsigned int freq = *local_tbl; 
+      //printf("%d,  ", freq);
        if(freq != 0) {
          accesscore++;// TODO
        }
+      local_tbl++;
      }
      if(accesscore!=0) {
+      printf("%x,  %d,  %d,  ",(int)page_sva,page_index,coren);
+      unsigned int * local_tbl = &gccachesamplingtbl_r[page_index*NUMCORESACTIVE];
        for(int i = 0; i < NUMCORESACTIVE; i++) {
-        int * local_tbl = (int *)((void *)gccachesamplingtbl_r+size_cachesamplingtbl_local_r*i);
-        int freq = local_tbl[page_index]; ///BAMBOO_PAGE_SIZE;
+        unsigned int freq = *local_tbl;
+        printf("%u,  ", freq);
          sumdata[accesscore-1][i]+=freq;
+        local_tbl++;
        }
-    }
-  
-    printf("\n");
+      printf("\n");
+    }  
+    //printf("\n");
    }
+  printf("+++++\n");
    // TODO printout the summary data
    for(int i = 0; i < NUMCORESACTIVE; i++) {
      printf("%d  ", i);
      for(int j = 0; j < NUMCORESACTIVE; j++) {
-      printf(" %d  ", sumdata[j][i]);
+      printf(" %u  ", sumdata[j][i]);
      }
      printf("\n");
    }