#ifdef GC_CACHE_ADAPT
#include "multicorecache.h"
+#include "multicoremsg.h"
+#include "multicoregcprofile.h"
-typedef struct gc_cache_revise_info {
- unsigned int orig_page_start_va;
- unsigned int orig_page_end_va;
- unsigned int orig_page_index;
- unsigned int to_page_start_va;
- unsigned int to_page_end_va;
- unsigned int to_page_index;
- unsigned int revised_sampling[NUMCORESACTIVE];
-} gc_cache_revise_info_t;
-gc_cache_revise_info_t gc_cache_revise_infomation;
-
-INLINE void samplingDataInit() {
- gc_cache_revise_infomation.to_page_start_va = (unsigned int)to->ptr;
- unsigned int toindex = (unsigned int)(tobase-gcbaseva)/(BAMBOO_PAGE_SIZE);
- gc_cache_revise_infomation.to_page_end_va = gcbaseva +
- (BAMBOO_PAGE_SIZE)*(toindex+1);
- gc_cache_revise_infomation.to_page_index = toindex;
- gc_cache_revise_infomation.orig_page_start_va = (unsigned int)orig->ptr;
- gc_cache_revise_infomation.orig_page_end_va = gcbaseva+(BAMBOO_PAGE_SIZE)
- *(((unsigned int)(orig->ptr)-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
- gc_cache_revise_infomation.orig_page_index =
- ((unsigned int)(orig->blockbase)-gcbaseva)/(BAMBOO_PAGE_SIZE);
-}
-
-INLINE void samplingDataConvert(unsigned int current_ptr) {
- unsigned int tmp_factor =
- current_ptr-gc_cache_revise_infomation.to_page_start_va;
- unsigned int topage=gc_cache_revise_infomation.to_page_index;
- unsigned int oldpage = gc_cache_revise_infomation.orig_page_index;
- int * newtable=&gccachesamplingtbl_r[topage];
- int * oldtable=&gccachesamplingtbl[oldpage];
-
- for(int tt = 0; tt < NUMCORESACTIVE; tt++) {
- (*newtable) = ((*newtable)+(*oldtable)*tmp_factor);
- newtable=(int*)(((char *)newtable)+size_cachesamplingtbl_local_r);
- oldtable=(int*) (((char *)oldtable)+size_cachesamplingtbl_local);
- }
-}
-
-INLINE void completePageConvert(struct moveHelper * orig,
- struct moveHelper * to,
- unsigned int current_ptr,
- bool closeToPage) {
- unsigned int ptr = 0;
- unsigned int tocompare = 0;
- if(closeToPage) {
- ptr = to->ptr;
- tocompare = gc_cache_revise_infomation.to_page_end_va;
- } else {
- ptr = orig->ptr;
- tocompare = gc_cache_revise_infomation.orig_page_end_va;
- }
- if((unsigned int)ptr >= (unsigned int)tocompare) {
- // end of an orig/to page
- // compute the impact of this page for the new page
- samplingDataConvert(current_ptr);
- // prepare for an new orig page
- unsigned int tmp_index =
- (unsigned int)((unsigned int)orig->ptr-gcbaseva)/(BAMBOO_PAGE_SIZE);
- gc_cache_revise_infomation.orig_page_start_va = orig->ptr;
- gc_cache_revise_infomation.orig_page_end_va = gcbaseva +
- (BAMBOO_PAGE_SIZE)*(unsigned int)(tmp_index+1);
- gc_cache_revise_infomation.orig_page_index = tmp_index;
- gc_cache_revise_infomation.to_page_start_va = to->ptr;
- if(closeToPage) {
- gc_cache_revise_infomation.to_page_end_va = gcbaseva+(BAMBOO_PAGE_SIZE)
- *(((unsigned int)(to->ptr)-gcbaseva)/(BAMBOO_PAGE_SIZE)+1);
- gc_cache_revise_infomation.to_page_index =
- ((unsigned int)(to->ptr)-gcbaseva)/(BAMBOO_PAGE_SIZE);
- }
- }
-}
+gc_cache_revise_info_t gc_cache_revise_information;
// prepare for cache adaption:
// -- flush the shared heap
// the master core decides how to adapt cache strategy for the mutator
// according to collected statistic data
+// find the core that accesses the page #page_index most
+#define CACHEADAPT_FIND_HOTTEST_CORE(page_index,hottestcore,hotfreq) \
+ { \
+ int *local_tbl=&gccachesamplingtbl_r[page_index]; \
+ for(int i = 0; i < NUMCORESACTIVE; i++) { \
+ int freq = *local_tbl; \
+ local_tbl=(int *)(((void *)local_tbl)+size_cachesamplingtbl_local_r); \
+ if(hotfreq < freq) { \
+ hotfreq = freq; \
+ hottestcore = i; \
+ } \
+ } \
+ }
+// find the core that accesses the page #page_index most and comput the total
+// access time of the page at the same time
+#define CACHEADAPT_FIND_HOTTEST_CORE_W_TOTALFREQ(page_index,hottestcore,hotfreq,totalfreq) \
+ { \
+ int *local_tbl=&gccachesamplingtbl_r[page_index]; \
+ for(int i = 0; i < NUMCORESACTIVE; i++) { \
+ int freq = *local_tbl; \
+ local_tbl=(int *)(((void *)local_tbl)+size_cachesamplingtbl_local_r); \
+ totalfreq += freq; \
+ if(hotfreq < freq) { \
+ hotfreq = freq; \
+ hottestcore = i; \
+ } \
+ } \
+ }
+// Set the policy as hosted by coren
+// NOTE: (x,y) should be changed to (x+1, y+1)!!!
+#define CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren) \
+ { \
+ (policy).cache_mode = BAMBOO_CACHE_MODE_COORDS; \
+ (policy).lotar_x = bamboo_cpu2coords[2*(coren)]+1; \
+ (policy).lotar_y = bamboo_cpu2coords[2*(coren)+1]+1; \
+ }
+// store the new policy information at tmp_p in gccachepolicytbl
+#define CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy) \
+ { \
+ ((int*)(tmp_p))[page_index] = (policy).word; \
+ }
+
// make all pages hfh
-int cacheAdapt_policy_h4h(){
- unsigned int page_index = 0;
- VA page_sva = 0;
- unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
- unsigned int numchanged = 0;
- int * tmp_p = gccachepolicytbl+1;
- for(page_index = 0; page_index < page_num; page_index++) {
- page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
+void cacheAdapt_policy_h4h(int coren){
+ unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)/(BAMBOO_PAGE_SIZE);
+ unsigned int page_gap=page_num/NUMCORESACTIVE;
+ unsigned int page_index=page_gap*coren;
+ unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
+ VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
+ int * tmp_p = gccachepolicytbl;
+ for(; page_index < page_index_end; page_index++) {
bamboo_cache_policy_t policy = {0};
policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
- *tmp_p = page_index;
- tmp_p++;
- *tmp_p = policy.word;
- tmp_p++;
- numchanged++;
+ CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
+ page_sva += BAMBOO_PAGE_SIZE;
}
-
- return numchanged;
}
// make all pages local as non-cache-adaptable gc local mode
-int cacheAdapt_policy_local(){
- unsigned int page_index = 0;
- VA page_sva = 0;
- unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
- unsigned int numchanged = 0;
- int * tmp_p = gccachepolicytbl+1;
- for(page_index = 0; page_index < page_num; page_index++) {
- page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
+void cacheAdapt_policy_local(int coren){
+ unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)/(BAMBOO_PAGE_SIZE);
+ unsigned int page_gap=page_num/NUMCORESACTIVE;
+ unsigned int page_index=page_gap*coren;
+ unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
+ VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
+ int * tmp_p = gccachepolicytbl;
+ for(; page_index < page_index_end; page_index++) {
bamboo_cache_policy_t policy = {0};
unsigned int block = 0;
- BLOCKINDEX(page_sva, &block);
+ BLOCKINDEX(block, (void *) page_sva);
unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
- // locally cache the page in the hotest core
- // NOTE: (x,y) should be changed to (x+1, y+1)!!!
- policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
- policy.lotar_x = bamboo_cpu2coords[2*coren]+1;
- policy.lotar_y = bamboo_cpu2coords[2*coren+1]+1;
- *tmp_p = page_index;
- tmp_p++;
- *tmp_p = policy.word;
- tmp_p++;
- numchanged++;
+ CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren);
+ CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
+ page_sva += BAMBOO_PAGE_SIZE;
}
-
- return numchanged;
}
-int cacheAdapt_policy_hotest(){
- unsigned int page_index = 0;
- VA page_sva = 0;
- unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
- unsigned int numchanged = 0;
- int * tmp_p = gccachepolicytbl+1;
- for(page_index = 0; page_index < page_num; page_index++) {
- page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
+void cacheAdapt_policy_hottest(int coren){
+ unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)/(BAMBOO_PAGE_SIZE);
+ unsigned int page_gap=page_num/NUMCORESACTIVE;
+ unsigned int page_index=page_gap*coren;
+ unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
+ VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
+ int * tmp_p = gccachepolicytbl;
+ for(; page_index < page_index_end; page_index++) {
bamboo_cache_policy_t policy = {0};
- unsigned int hotestcore = 0;
+ unsigned int hottestcore = 0;
unsigned int hotfreq = 0;
-
- int *local_tbl=&gccachesamplingtbl_r[page_index];
- for(int i = 0; i < NUMCORESACTIVE; i++) {
- int freq = *local_tbl;
- local_tbl=(int *)(((char *)local_tbl)+size_cachesamplingtbl_local_r);
-
- // check the freqency, decide if this page is hot for the core
- if(hotfreq < freq) {
- hotfreq = freq;
- hotestcore = i;
- }
- }
+ CACHEADAPT_FIND_HOTTEST_CORE(page_index,hottestcore,hotfreq);
// TODO
// Decide the cache strategy for this page
// If decide to adapt a new cache strategy, write into the shared block of
// the gcsharedsamplingtbl. The mem recording information that has been
// written is enough to hold the information.
// Format: page start va + cache strategy(hfh/(host core+[x,y]))
- if(hotfreq == 0) {
- // this page has not been accessed, do not change its cache policy
- continue;
- } else {
- // locally cache the page in the hotest core
- // NOTE: (x,y) should be changed to (x+1, y+1)!!!
- policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
- policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
- policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
- *tmp_p = page_index;
- tmp_p++;
- *tmp_p = policy.word;
- tmp_p++;
- numchanged++;
+ if(hotfreq != 0) {
+ // locally cache the page in the hottest core
+ CACHEADAPT_POLICY_SET_HOST_CORE(policy, hottestcore);
}
+ CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
+ page_sva += BAMBOO_PAGE_SIZE;
}
-
- return numchanged;
}
-#define GC_CACHE_ADAPT_DOMINATE_THRESHOLD 50
+#define GC_CACHE_ADAPT_DOMINATE_THRESHOLD 1
// cache the page on the core that accesses it the most if that core accesses
// it more than (GC_CACHE_ADAPT_DOMINATE_THRESHOLD)% of the total. Otherwise,
// h4h the page.
-int cacheAdapt_policy_dominate(){
- unsigned int page_index = 0;
- VA page_sva = 0;
- unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
- unsigned int numchanged = 0;
- int * tmp_p = gccachepolicytbl+1;
- for(page_index = 0; page_index < page_num; page_index++) {
- page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
+void cacheAdapt_policy_dominate(int coren){
+ unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)/(BAMBOO_PAGE_SIZE);
+ unsigned int page_gap=page_num/NUMCORESACTIVE;
+ unsigned int page_index=page_gap*coren;
+ unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
+ VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
+ int * tmp_p = gccachepolicytbl;
+ for(; page_index < page_index_end; page_index++) {
bamboo_cache_policy_t policy = {0};
- unsigned int hotestcore = 0;
- unsigned long long totalfreq = 0;
+ unsigned int hottestcore = 0;
+ unsigned int totalfreq = 0;
unsigned int hotfreq = 0;
-
- int *local_tbl=&gccachesamplingtbl_r[page_index];
- for(int i = 0; i < NUMCORESACTIVE; i++) {
- int freq = *local_tbl;
- local_tbl=(int *)(((char *)local_tbl)+size_cachesamplingtbl_local_r);
- totalfreq += freq;
- // check the freqency, decide if this page is hot for the core
- if(hotfreq < freq) {
- hotfreq = freq;
- hotestcore = i;
- }
- }
-
+ CACHEADAPT_FIND_HOTTEST_CORE_W_TOTALFREQ(page_index,hottestcore,hotfreq,totalfreq);
// Decide the cache strategy for this page
// If decide to adapt a new cache strategy, write into the shared block of
// the gcpolicytbl
// Format: page start va + cache policy
- if(hotfreq == 0) {
- // this page has not been accessed, do not change its cache policy
- continue;
+ if(hotfreq != 0) {
+ totalfreq=totalfreq>>GC_CACHE_ADAPT_DOMINATE_THRESHOLD;
+ if((unsigned int)hotfreq < (unsigned int)totalfreq) {
+ // use hfh
+ //policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
+ unsigned int block = 0;
+ BLOCKINDEX(page_sva, &block);
+ unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
+ CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren);
+ } else {
+ // locally cache the page in the hottest core
+ CACHEADAPT_POLICY_SET_HOST_CORE(policy, hottestcore);
+ }
}
- totalfreq =
- (totalfreq*GC_CACHE_ADAPT_DOMINATE_THRESHOLD)/100/BAMBOO_PAGE_SIZE;
- hotfreq/=BAMBOO_PAGE_SIZE;
- if(hotfreq < totalfreq) {
- // use hfh
- policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
- } else {
- // locally cache the page in the hotest core
- // NOTE: (x,y) should be changed to (x+1, y+1)!!!
- policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
- policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
- policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
- }
- *tmp_p = page_index;
- tmp_p++;
- *tmp_p = policy.word;
- tmp_p++;
- numchanged++;
+ CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
+ page_sva += BAMBOO_PAGE_SIZE;
}
-
- return numchanged;
}
+#if 0
#define GC_CACHE_ADAPT_OVERLOAD_THRESHOLD 10
+// record the worklocad of the hottestcore into core2heavypages
+#define CACHEADAPT_RECORD_PAGE_WORKLOAD(hottestcore,totalfreq,hotfreq,remoteaccess,tmp_p) \
+ { \
+ workload[hottestcore] += (totalfreq); \
+ total_workload += (totalfreq); \
+ unsigned long long remoteaccess = (totalfreq) - (hotfreq); \
+ unsigned int index = (unsigned int)core2heavypages[hottestcore][0]; \
+ core2heavypages[hottestcore][3*index+3] = (remoteaccess); \
+ core2heavypages[hottestcore][3*index+2] = (totalfreq); \
+ core2heavypages[hottestcore][3*index+1] = (unsigned long long)((tmp_p)-1); \
+ core2heavypages[hottestcore][0]++; \
+ }
-void gc_quicksort(unsigned long long *array,
- unsigned int left,
- unsigned int right,
- unsigned int offset) {
+void gc_quicksort(unsigned long long *array,unsigned int left,unsigned int right,unsigned int offset) {
unsigned int pivot = 0;;
unsigned int leftIdx = left;
unsigned int rightIdx = right;
return;
}
+INLINE int cacheAdapt_h4h_remote_accesses(unsigned long long workload_threshold,unsigned long long ** core2heavypages, unsigned long long * workload,int i) {
+ int j = 1;
+ unsigned int index = (unsigned int)core2heavypages[i][0];
+ if(workload[i] > workload_threshold) {
+ // sort according to the remoteaccess
+ gc_quicksort(&core2heavypages[i][0], 1, index, 0);
+ while((workload[i] > workload_threshold) && (j<index*3)) {
+ // hfh those pages with more remote accesses
+ bamboo_cache_policy_t policy = {0};
+ policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
+ *((unsigned int*)core2heavypages[i][j]) = policy.word;
+ workload[i] -= core2heavypages[i][j+1];
+ j += 3;
+ }
+ }
+ return j;
+}
+
// Every page cached on the core that accesses it the most.
// Check to see if any core's pages total more accesses than threshold
// GC_CACHE_ADAPT_OVERLOAD_THRESHOLD. If so, find the pages with the
// most remote accesses and hash for home them until we get below
// GC_CACHE_ADAPT_OVERLOAD_THRESHOLD
-int cacheAdapt_policy_overload(){
+int cacheAdapt_policy_overload(int coren){
unsigned int page_index = 0;
- VA page_sva = 0;
- unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
+ VA page_sva = gcbaseva;
+ unsigned int page_num = BAMBOO_SHARED_MEM_SIZE/BAMBOO_PAGE_SIZE;
unsigned int numchanged = 0;
int * tmp_p = gccachepolicytbl+1;
unsigned long long workload[NUMCORESACTIVE];
memset(workload, 0, NUMCORESACTIVE*sizeof(unsigned long long));
unsigned long long total_workload = 0;
unsigned long long core2heavypages[NUMCORESACTIVE][page_num*3+1];
- memset(core2heavypages,0,
- sizeof(unsigned long long)*(page_num*3+1)*NUMCORESACTIVE);
- for(page_index = 0; page_index < page_num; page_index++) {
- page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
+ memset(core2heavypages,0,sizeof(unsigned long long)*(page_num*3+1)*NUMCORESACTIVE);
+ for(page_index = 0; page_sva < gctopva; page_index++) {
bamboo_cache_policy_t policy = {0};
- unsigned int hotestcore = 0;
+ unsigned int hottestcore = 0;
unsigned long long totalfreq = 0;
unsigned int hotfreq = 0;
-
- int *local_tbl=&gccachesamplingtbl_r[page_index];
- for(int i = 0; i < NUMCORESACTIVE; i++) {
- int freq = *local_tbl;
- local_tbl=(int *)(((char *)local_tbl)+size_cachesamplingtbl_local_r);
- totalfreq += freq;
- // check the freqency, decide if this page is hot for the core
- if(hotfreq < freq) {
- hotfreq = freq;
- hotestcore = i;
- }
- }
+ CACHEADAPT_FIND_HOTTEST_CORE_W_TOTALFREQ(page_index,hottestcore,hotfreq,totalfreq);
// Decide the cache strategy for this page
// If decide to adapt a new cache strategy, write into the shared block of
// the gcsharedsamplingtbl. The mem recording information that has been
// written is enough to hold the information.
// Format: page start va + cache strategy(hfh/(host core+[x,y]))
- if(hotfreq == 0) {
- // this page has not been accessed, do not change its cache policy
- continue;
+ if(hotfreq != 0) {
+ totalfreq/=BAMBOO_PAGE_SIZE;
+ hotfreq/=BAMBOO_PAGE_SIZE;
+ // locally cache the page in the hottest core
+ CACHEADAPT_POLICY_SET_HOST_CORE(policy, hottestcore);
+ CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy,numchanged);
+ CACHEADAPT_RECORD_PAGE_WORKLOAD(hottestcore,totalfreq,hotfreq,remoteaccess,tmp_p);
}
-
- totalfreq/=BAMBOO_PAGE_SIZE;
- hotfreq/=BAMBOO_PAGE_SIZE;
- // locally cache the page in the hotest core
- // NOTE: (x,y) should be changed to (x+1, y+1)!!!
- policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
- policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
- policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
- *tmp_p = page_index;
- tmp_p++;
- *tmp_p = policy.word;
- tmp_p++;
- numchanged++;
- workload[hotestcore] += totalfreq;
- total_workload += totalfreq;
- // insert into core2heavypages using quicksort
- unsigned long long remoteaccess = totalfreq - hotfreq;
- unsigned int index = (unsigned int)core2heavypages[hotestcore][0];
- core2heavypages[hotestcore][3*index+3] = remoteaccess;
- core2heavypages[hotestcore][3*index+2] = totalfreq;
- core2heavypages[hotestcore][3*index+1] = (unsigned long long)(tmp_p-1);
- core2heavypages[hotestcore][0]++;
+ page_sva += BAMBOO_PAGE_SIZE;
}
- unsigned long long workload_threshold =
- total_workload/GC_CACHE_ADAPT_OVERLOAD_THRESHOLD;
+ unsigned long long workload_threshold=total_workload/GC_CACHE_ADAPT_OVERLOAD_THRESHOLD;
// Check the workload of each core
for(int i = 0; i < NUMCORESACTIVE; i++) {
- int j = 1;
- unsigned int index = (unsigned int)core2heavypages[i][0];
- if(workload[i] > workload_threshold) {
- // sort according to the remoteaccess
- gc_quicksort(&core2heavypages[i][0], 1, index, 0);
- while((workload[i] > workload_threshold) && (j<index*3)) {
- // hfh those pages with more remote accesses
- bamboo_cache_policy_t policy = {0};
- policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
- *((unsigned int*)core2heavypages[i][j]) = policy.word;
- workload[i] -= core2heavypages[i][j+1];
- j += 3;
- }
- }
+ cacheAdapt_h4h_remote_accesses(workload_threshold,core2heavypages,workload,i);
}
return numchanged;
// then start hfh these pages(selecting the ones with the most remote
// accesses first or fewest local accesses) until we get below
// GC_CACHE_ADAPT_CROWD_THRESHOLD pages.
-int cacheAdapt_policy_crowd(){
+int cacheAdapt_policy_crowd(int coren){
unsigned int page_index = 0;
- VA page_sva = 0;
- unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
+ VA page_sva = gcbaseva;
+ unsigned int page_num = BAMBOO_SHARED_MEM_SIZE/BAMBOO_PAGE_SIZE;
unsigned int numchanged = 0;
int * tmp_p = gccachepolicytbl+1;
unsigned long long workload[NUMCORESACTIVE];
memset(workload, 0, NUMCORESACTIVE*sizeof(unsigned long long));
unsigned long long total_workload = 0;
unsigned long long core2heavypages[NUMCORESACTIVE][page_num*3+1];
- memset(core2heavypages,0,
- sizeof(unsigned long long)*(page_num*3+1)*NUMCORESACTIVE);
- for(page_index = 0; page_index < page_num; page_index++) {
- page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
+ memset(core2heavypages,0,sizeof(unsigned long long)*(page_num*3+1)*NUMCORESACTIVE);
+ for(page_index = 0; page_sva < gctopva; page_index++) {
bamboo_cache_policy_t policy = {0};
- unsigned int hotestcore = 0;
+ unsigned int hottestcore = 0;
unsigned long long totalfreq = 0;
unsigned int hotfreq = 0;
-
- int *local_tbl=&gccachesamplingtbl_r[page_index];
- for(int i = 0; i < NUMCORESACTIVE; i++) {
- int freq = *local_tbl;
- local_tbl=(int *)(((char *)local_tbl)+size_cachesamplingtbl_local_r);
- totalfreq += freq;
- // check the freqency, decide if this page is hot for the core
- if(hotfreq < freq) {
- hotfreq = freq;
- hotestcore = i;
- }
- }
+ CACHEADAPT_FIND_HOTTEST_CORE_W_TOTALFREQ(page_index,hottestcore,hotfreq,totalfreq);
// Decide the cache strategy for this page
// If decide to adapt a new cache strategy, write into the shared block of
// the gcsharedsamplingtbl. The mem recording information that has been
// written is enough to hold the information.
// Format: page start va + cache strategy(hfh/(host core+[x,y]))
- if(hotfreq == 0) {
- // this page has not been accessed, do not change its cache policy
- continue;
+ if(hotfreq != 0) {
+ totalfreq/=BAMBOO_PAGE_SIZE;
+ hotfreq/=BAMBOO_PAGE_SIZE;
+ // locally cache the page in the hottest core
+ CACHEADAPT_POLICY_SET_HOST_CORE(policy, hottestcore);
+ CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy,numchanged);
+ CACHEADAPT_RECORD_PAGE_WORKLOAD(hottestcore,totalfreq,hotfreq,remoteaccess,tmp_p);
}
- totalfreq/=BAMBOO_PAGE_SIZE;
- hotfreq/=BAMBOO_PAGE_SIZE;
- // locally cache the page in the hotest core
- // NOTE: (x,y) should be changed to (x+1, y+1)!!!
- policy.cache_mode = BAMBOO_CACHE_MODE_COORDS;
- policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1;
- policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1;
- *tmp_p = page_index;
- tmp_p++;
- *tmp_p = policy.word;
- tmp_p++;
- numchanged++;
- workload[hotestcore] += totalfreq;
- total_workload += totalfreq;
- // insert into core2heavypages using quicksort
- unsigned long long remoteaccess = totalfreq - hotfreq;
- unsigned int index = (unsigned int)core2heavypages[hotestcore][0];
- core2heavypages[hotestcore][3*index+3] = remoteaccess;
- core2heavypages[hotestcore][3*index+2] = totalfreq;
- core2heavypages[hotestcore][3*index+1] = (unsigned long long)(tmp_p-1);
- core2heavypages[hotestcore][0]++;
+ page_sva += BAMBOO_PAGE_SIZE;
}
- unsigned long long workload_threshold =
- total_workload / GC_CACHE_ADAPT_OVERLOAD_THRESHOLD;
+ unsigned long long workload_threshold=total_workload/GC_CACHE_ADAPT_OVERLOAD_THRESHOLD;
// Check the workload of each core
for(int i = 0; i < NUMCORESACTIVE; i++) {
- int j = 1;
- unsigned int index = (unsigned int)core2heavypages[i][0];
- if(workload[i] > workload_threshold) {
- // sort according to the remoteaccess
- gc_quicksort(&core2heavypages[i][0], 1, index, 0);
- while((workload[i] > workload_threshold) && (j<index*3)) {
- // hfh those pages with more remote accesses
- bamboo_cache_policy_t policy = {0};
- policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
- *((unsigned int*)core2heavypages[i][j]) = policy.word;
- workload[i] -= core2heavypages[i][j+1];
- j += 3;
- }
- }
-
+ unsigned int index=(unsigned int)core2heavypages[i][0];
+ int j=cacheAdapt_h4h_remote_accesses(workload_threshold,core2heavypages,workload,i);
// Check if the accesses are crowded on few pages
// sort according to the total access
inner_crowd:
gc_quicksort(&core2heavypages[i][0], j/3+1, index, 1);
- unsigned long long threshold =
- GC_CACHE_ADAPT_ACCESS_THRESHOLD*workload[i]/100;
+ unsigned long long threshold=GC_CACHE_ADAPT_ACCESS_THRESHOLD*workload[i]/100;
int num_crowded = 0;
unsigned long long t_workload = 0;
do {
return numchanged;
}
+#endif
-void cacheAdapt_master() {
- CACHEADAPT_OUTPUT_CACHE_SAMPLING_R();
- unsigned int numchanged = 0;
+unsigned int cacheAdapt_decision(int coren) {
+ BAMBOO_CACHE_MF();
// check the statistic data
// for each page, decide the new cache strategy
#ifdef GC_CACHE_ADAPT_POLICY1
- numchanged = cacheAdapt_policy_h4h();
+ cacheAdapt_policy_h4h(coren);
#elif defined GC_CACHE_ADAPT_POLICY2
- numchanged = cacheAdapt_policy_local();
+ cacheAdapt_policy_local(coren);
#elif defined GC_CACHE_ADAPT_POLICY3
- numchanged = cacheAdapt_policy_hotest();
+ cacheAdapt_policy_hottest(coren);
#elif defined GC_CACHE_ADAPT_POLICY4
- numchanged = cacheAdapt_policy_dominate();
-#elif defined GC_CACHE_ADAPT_POLICY5
- numchanged = cacheAdapt_policy_overload();
-#elif defined GC_CACHE_ADAPT_POLICY6
- numchanged = cacheAdapt_policy_crowd();
+ cacheAdapt_policy_dominate(coren);
+//#elif defined GC_CACHE_ADAPT_POLICY5
+// cacheAdapt_policy_overload(coren);
+//#elif defined GC_CACHE_ADAPT_POLICY6
+// cacheAdapt_policy_crowd(coren);
#endif
- *gccachepolicytbl = numchanged;
}
// adapt the cache strategy for the mutator
void cacheAdapt_mutator() {
- int numchanged = *gccachepolicytbl;
+ BAMBOO_CACHE_MF();
// check the changes and adapt them
- int * tmp_p = gccachepolicytbl+1;
- while(numchanged--) {
+ int * tmp_p = gccachepolicytbl;
+ unsigned int page_sva = gcbaseva;
+ for(; page_sva<gctopva; page_sva+=BAMBOO_PAGE_SIZE) {
// read out the policy
- int page_index = *tmp_p;
- bamboo_cache_policy_t policy = (bamboo_cache_policy_t)(*(tmp_p+1));
+ bamboo_cache_policy_t policy = (bamboo_cache_policy_t)(*(tmp_p));
// adapt the policy
- bamboo_adapt_cache_policy(page_index*(BAMBOO_PAGE_SIZE)+gcbaseva,
- policy, BAMBOO_PAGE_SIZE);
-
- tmp_p += 2;
+ if(policy.word != 0) {
+ bamboo_adapt_cache_policy(page_sva,policy,BAMBOO_PAGE_SIZE);
+ }
+ tmp_p += 1;
}
}
void cacheAdapt_phase_client() {
- WAITFORGCPHASE(PREFINISHPHASE);
+ WAITFORGCPHASE(CACHEPOLICYPHASE);
+ GC_PRINTF("Start cachepolicy phase\n");
+ cacheAdapt_decision(BAMBOO_NUM_OF_CORE);
+ //send init finish msg to core coordinator
+ send_msg_2(STARTUPCORE, GCFINISHCACHEPOLICY, BAMBOO_NUM_OF_CORE);
+ GC_PRINTF("Finish cachepolicy phase\n");
+ WAITFORGCPHASE(PREFINISHPHASE);
GC_PRINTF("Start prefinish phase\n");
// cache adapt phase
cacheAdapt_mutator();
cacheAdapt_gc(false);
//send init finish msg to core coordinator
- send_msg_2(STARTUPCORE, GCFINISHPREF, BAMBOO_NUM_OF_CORE, false);
+ send_msg_2(STARTUPCORE, GCFINISHPREF, BAMBOO_NUM_OF_CORE);
GC_PRINTF("Finish prefinish phase\n");
CACHEADAPT_SAMPING_RESET();
if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
// zero out the gccachesamplingtbl
BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
- BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,
- size_cachesamplingtbl_local_r);
+ BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,size_cachesamplingtbl_local_r);
}
}
+extern unsigned long long gc_output_cache_policy_time;
+
void cacheAdapt_phase_master() {
- GCPROFILEITEM();
- gcphase = PREFINISHPHASE;
- gccorestatus[BAMBOO_NUM_OF_CORE] = 1;
- // Note: all cores should flush their runtime data including non-gc
- // cores
- for(i = 1; i < NUMCORESACTIVE; ++i) {
- // send start flush messages to all cores
- gccorestatus[i] = 1;
- send_msg_1(i, GCSTARTPREF, false);
- }
+ GCPROFILE_ITEM();
+ unsigned long long tmpt = BAMBOO_GET_EXE_TIME();
+ CACHEADAPT_OUTPUT_CACHE_SAMPLING_R();
+ gc_output_cache_policy_time += (BAMBOO_GET_EXE_TIME()-tmpt);
+ // let all cores to parallelly process the revised profile data and decide
+ // the cache policy for each page
+ gc_status_info.gcphase = CACHEPOLICYPHASE;
+ GC_SEND_MSG_1_TO_CLIENT(GCSTARTCACHEPOLICY);
+ GC_PRINTF("Start cachepolicy phase \n");
+ // cache adapt phase
+ cacheAdapt_decision(BAMBOO_NUM_OF_CORE);
+ GC_CHECK_ALL_CORE_STATUS(CACHEPOLICYPHASE==gc_status_info.gcphase);
+ BAMBOO_CACHE_MF();
+
+ // let all cores to adopt new policies
+ gc_status_info.gcphase = PREFINISHPHASE;
+ // Note: all cores should flush their runtime data including non-gc cores
+ GC_SEND_MSG_1_TO_CLIENT(GCSTARTPREF);
GC_PRINTF("Start prefinish phase \n");
// cache adapt phase
cacheAdapt_mutator();
- CACHEADPAT_OUTPUT_CACHE_POLICY();
cacheAdapt_gc(false);
-
- gccorestatus[BAMBOO_NUM_OF_CORE] = 0;
- while(PREFINISHPHASE == gcphase) {
- // check the status of all cores
- BAMBOO_ENTER_RUNTIME_MODE_FROM_CLIENT();
- if(gc_checkAllCoreStatus_I()) {
- BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
- break;
- }
- BAMBOO_ENTER_CLIENT_MODE_FROM_RUNTIME();
- }
+ GC_CHECK_ALL_CORE_STATUS(PREFINISHPHASE==gc_status_info.gcphase);
CACHEADAPT_SAMPING_RESET();
if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
// zero out the gccachesamplingtbl
BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
- BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,
- size_cachesamplingtbl_local_r);
+ BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,size_cachesamplingtbl_local_r);
BAMBOO_MEMSET_WH(gccachepolicytbl,0,size_cachepolicytbl);
}
}
void gc_output_cache_sampling() {
+ //extern volatile bool gc_profile_flag;
+ //if(!gc_profile_flag) return;
unsigned int page_index = 0;
VA page_sva = 0;
unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
for(page_index = 0; page_index < page_num; page_index++) {
page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
unsigned int block = 0;
- BLOCKINDEX(page_sva, &block);
+ BLOCKINDEX(block, (void *) page_sva);
unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
- tprintf("va: %x page_index: %d host: %d\n",(int)page_sva,page_index,coren);
+ printf("%x, %d, %d, ",(int)page_sva,page_index,coren);
for(int i = 0; i < NUMCORESACTIVE; i++) {
- int * local_tbl = (int *)((void *)gccachesamplingtbl
- +size_cachesamplingtbl_local*i);
+ int * local_tbl = (int *)((void *)gccachesamplingtbl+size_cachesamplingtbl_local*i);
int freq = local_tbl[page_index];
- printf("%8d ",freq);
+ //if(freq != 0) {
+ printf("%d, ", freq);
+ //}
}
printf("\n");
}
}
void gc_output_cache_sampling_r() {
+ //extern volatile bool gc_profile_flag;
+ //if(!gc_profile_flag) return;
+ // TODO summary data
+ unsigned int sumdata[NUMCORESACTIVE][NUMCORESACTIVE];
+ for(int i = 0; i < NUMCORESACTIVE; i++) {
+ for(int j = 0; j < NUMCORESACTIVE; j++) {
+ sumdata[i][j] = 0;
+ }
+ }
+ tprintf("cache sampling_r \n");
unsigned int page_index = 0;
VA page_sva = 0;
unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
for(page_index = 0; page_index < page_num; page_index++) {
page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
unsigned int block = 0;
- BLOCKINDEX(page_sva, &block);
+ BLOCKINDEX(block, (void *)page_sva);
unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
- tprintf("va: %x page_index: %d host: %d\n",(int)page_sva,page_index,coren);
+ printf(" %x, %d, %d, ",(int)page_sva,page_index,coren);
+ int accesscore = 0; // TODO
for(int i = 0; i < NUMCORESACTIVE; i++) {
- int * local_tbl = (int *)((void *)gccachesamplingtbl_r
- +size_cachesamplingtbl_local_r*i);
+ int * local_tbl = (int *)((void *)gccachesamplingtbl_r+size_cachesamplingtbl_local_r*i);
int freq = local_tbl[page_index]/BAMBOO_PAGE_SIZE;
- printf("%8d ",freq);
+ printf("%d, ", freq);
+ if(freq != 0) {
+ accesscore++;// TODO
+ }
+ }
+ if(accesscore!=0) {
+ for(int i = 0; i < NUMCORESACTIVE; i++) {
+ int * local_tbl = (int *)((void *)gccachesamplingtbl_r+size_cachesamplingtbl_local_r*i);
+ int freq = local_tbl[page_index]/BAMBOO_PAGE_SIZE;
+ sumdata[accesscore-1][i]+=freq;
+ }
}
printf("\n");
}
+ // TODO printout the summary data
+ for(int i = 0; i < NUMCORESACTIVE; i++) {
+ printf("%d ", i);
+ for(int j = 0; j < NUMCORESACTIVE; j++) {
+ printf(" %d ", sumdata[j][i]);
+ }
+ printf("\n");
+ }
printf("=================\n");
}
#endif // GC_CACHE_ADAPT