From: jzhou Date: Sat, 18 Sep 2010 20:08:58 +0000 (+0000) Subject: Initial version of cache adaption for mutator X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=40fa6282af72c3ad7102d111d4b91acd6ac063f1;p=IRC.git Initial version of cache adaption for mutator --- diff --git a/Robust/src/Runtime/bamboo/multicorecache.h b/Robust/src/Runtime/bamboo/multicorecache.h index 5d4af667..2c56825b 100644 --- a/Robust/src/Runtime/bamboo/multicorecache.h +++ b/Robust/src/Runtime/bamboo/multicorecache.h @@ -6,8 +6,6 @@ #define GC_CACHE_SAMPLING_UNIT 100000000 #define GC_TILE_TIMER_EVENT_SETTING 100000000 #define GC_NUM_SAMPLING 24 -#define GC_CACHE_ADAPT_HOTPAGE_THRESHOLD 1000 -#define GC_CACHE_ADAPT_ACCESS_THRESHOLD 30 // should be consistent with multicoreruntime.h typedef union diff --git a/Robust/src/Runtime/bamboo/multicoregarbage.c b/Robust/src/Runtime/bamboo/multicoregarbage.c index d9f67288..b5897919 100644 --- a/Robust/src/Runtime/bamboo/multicoregarbage.c +++ b/Robust/src/Runtime/bamboo/multicoregarbage.c @@ -2878,71 +2878,93 @@ void cacheAdapt_gc(bool isgccachestage) { // according to collected statistic data extern int gc_num_sampling; -bool cacheAdapt_policy_d(VA page_sva, - bamboo_cache_policy_t* policy, - int page_num, - int page_index){ - int hottestcore = 0; - int num_hotcore = 0; - int hotfreq = 0; - - for(int i = 0; i < NUMCORESACTIVE; i++) { - int * local_tbl = (int *)((void *)gccachesamplingtbl_r - +page_num*sizeof(float)*i); - int freq = local_tbl[page_index]; - // TODO -/* if(page_sva == 0xd180000) { - tprintf("%x %d %d\n", (int)page_sva, i, (int)(freq*100000)); - }*/ - // TODO - // check the freqency, decide if this page is hot for the core - if(hotfreq < freq) { - hotfreq = freq; - hottestcore = i; - } - if(freq > GC_CACHE_ADAPT_HOTPAGE_THRESHOLD) { - num_hotcore++; - } +// make all pages hfh +int cacheAdapt_policy_h4h(){ + unsigned int page_index = 0; + VA page_sva = 0; + unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE); + int numchanged = 0; + int * tmp_p = gccachepolicytbl+1; + for(page_index = 0; page_index < page_num; page_index++) { + page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index; + bamboo_cache_policy_t policy = {0}; + policy.cache_mode = BAMBOO_CACHE_MODE_HASH; + *tmp_p = page_index; + tmp_p++; + *tmp_p = policy.word; + tmp_p++; + numchanged++; } - // TODO - // Decide the cache strategy for this page - // If decide to adapt a new cache strategy, write into the shared block of - // the gcsharedsamplingtbl. The mem recording information that has been - // written is enough to hold the information. - // Format: page start va + cache strategy(hfh/(host core+[x,y])) - if(hotfreq == 0) { - // this page has not been accessed, do not change its cache policy - return false; - } - if(num_hotcore > GC_CACHE_ADAPT_ACCESS_THRESHOLD) { - // use hfh - policy->cache_mode = BAMBOO_CACHE_MODE_HASH; - } else { - // locally cache the page in the hottest core + + return numchanged; +} // int cacheAdapt_policy_hfh() + +// make all pages local as non-cache-adaptable gc local mode +int cacheAdapt_policy_local(){ + unsigned int page_index = 0; + VA page_sva = 0; + unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE); + int numchanged = 0; + int * tmp_p = gccachepolicytbl+1; + for(page_index = 0; page_index < page_num; page_index++) { + page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index; + bamboo_cache_policy_t policy = {0}; + int block = 0; + BLOCKINDEX(page_sva, &block); + int coren = gc_block2core[block%(NUMCORES4GC*2)]; + // locally cache the page in the hotest core // NOTE: (x,y) should be changed to (x+1, y+1)!!! - policy->cache_mode = BAMBOO_CACHE_MODE_COORDS; - policy->lotar_x = bamboo_cpu2coords[2*hottestcore]+1; - policy->lotar_y = bamboo_cpu2coords[2*hottestcore+1]+1; + policy.cache_mode = BAMBOO_CACHE_MODE_COORDS; + policy.lotar_x = bamboo_cpu2coords[2*coren]+1; + policy.lotar_y = bamboo_cpu2coords[2*coren+1]+1; + *tmp_p = page_index; + tmp_p++; + *tmp_p = policy.word; + tmp_p++; + numchanged++; } - return true; -} -void cacheAdapt_master() { - // check the statistic data - // for each page, decide the new cache strategy + return numchanged; +} // int cacheAdapt_policy_local() + +int cacheAdapt_policy_hotest(){ unsigned int page_index = 0; VA page_sva = 0; unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE); int numchanged = 0; int * tmp_p = gccachepolicytbl+1; - int hottestcore = 0; - int num_hotcore = 0; - int hotfreq = 0; for(page_index = 0; page_index < page_num; page_index++) { page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index; bamboo_cache_policy_t policy = {0}; - bool ischange=cacheAdapt_policy_d(page_sva, &policy, page_num, page_index); - if(ischange) { + int hotestcore = 0; + int hotfreq = 0; + + for(int i = 0; i < NUMCORESACTIVE; i++) { + int * local_tbl = (int *)((void *)gccachesamplingtbl_r + +page_num*sizeof(float)*i); + int freq = local_tbl[page_index]; + // TODO + // check the freqency, decide if this page is hot for the core + if(hotfreq < freq) { + hotfreq = freq; + hotestcore = i; + } + } + // TODO + // Decide the cache strategy for this page + // If decide to adapt a new cache strategy, write into the shared block of + // the gcsharedsamplingtbl. The mem recording information that has been + // written is enough to hold the information. + // Format: page start va + cache strategy(hfh/(host core+[x,y])) + if(hotfreq == 0) { + // this page has not been accessed, do not change its cache policy + continue; + } else { + // locally cache the page in the hotest core + // NOTE: (x,y) should be changed to (x+1, y+1)!!! + policy.cache_mode = BAMBOO_CACHE_MODE_COORDS; + policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1; + policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1; *tmp_p = page_index; tmp_p++; *tmp_p = policy.word; @@ -2950,7 +2972,338 @@ void cacheAdapt_master() { numchanged++; } } + + return numchanged; +} // int cacheAdapt_policy_hotest() + +#define GC_CACHE_ADAPT_DOMINATE_THRESHOLD 50 +// cache the page on the core that accesses it the most if that core accesses +// it more than (GC_CACHE_ADAPT_DOMINATE_THRESHOLD)% of the total. Otherwise, +// h4h the page. +int cacheAdapt_policy_dominate(){ + unsigned int page_index = 0; + VA page_sva = 0; + unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE); + int numchanged = 0; + int * tmp_p = gccachepolicytbl+1; + for(page_index = 0; page_index < page_num; page_index++) { + page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index; + bamboo_cache_policy_t policy = {0}; + int hotestcore = 0; + int totalfreq = 0; + int hotfreq = 0; + + for(int i = 0; i < NUMCORESACTIVE; i++) { + int * local_tbl = (int *)((void *)gccachesamplingtbl_r + +page_num*sizeof(float)*i); + int freq = local_tbl[page_index]; + totalfreq += freq; + // TODO + // check the freqency, decide if this page is hot for the core + if(hotfreq < freq) { + hotfreq = freq; + hotestcore = i; + } + } + // Decide the cache strategy for this page + // If decide to adapt a new cache strategy, write into the shared block of + // the gcpolicytbl + // Format: page start va + cache policy + if(hotfreq == 0) { + // this page has not been accessed, do not change its cache policy + continue; + } + totalfreq = (totalfreq*GC_CACHE_ADAPT_DOMINATE_THRESHOLD)/100; + if(hotfreq < totalfreq) { + // use hfh + policy.cache_mode = BAMBOO_CACHE_MODE_HASH; + } else { + // locally cache the page in the hotest core + // NOTE: (x,y) should be changed to (x+1, y+1)!!! + policy.cache_mode = BAMBOO_CACHE_MODE_COORDS; + policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1; + policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1; + } + *tmp_p = page_index; + tmp_p++; + *tmp_p = policy.word; + tmp_p++; + numchanged++; + } + + return numchanged; +} // int cacheAdapt_policy_dominate() + +#define GC_CACHE_ADAPT_OVERLOAD_THRESHOLD 1000 + +void gc_quicksort(int *array, + int left, + int right, + int offset) { + int pivot = 0;; + int leftIdx = left; + int rightIdx = right; + if((right-left+1) >= 1) { + pivot = (left+right)/2; + while((leftIdx <= pivot) && (rightIdx >= pivot)) { + int pivotValue = array[pivot*3-offset]; + while((array[leftIdx*3-offset] < pivotValue) && (leftIdx <= pivot)) { + leftIdx++; + } + while((array[rightIdx*3-offset] > pivotValue) && (rightIdx >= pivot)) { + rightIdx--; + } + // swap [leftIdx] & [rightIdx] + for(int k = 0; k < 3; k++) { + int tmp = array[3*rightIdx-k]; + array[3*rightIdx-k] = array[3*leftIdx-k]; + array[3*leftIdx-k] = tmp; + } + leftIdx++; + rightIdx--; + if((leftIdx-1) == pivot) { + pivot = rightIdx = rightIdx + 1; + } else if((leftIdx+1) == pivot) { + pivot = leftIdx = leftIdx-1; + } + } + gc_quicksort(array, left, pivot-1, offset); + gc_quicksort(array, pivot+1, right, offset); + } + return; +} // void gc_quicksort(...) + +// Every page cached on the core that accesses it the most. +// Check to see if any core's pages total more accesses than threshold +// GC_CACHE_ADAPT_OVERLOAD_THRESHOLD. If so, find the pages with the +// most remote accesses and hash for home them until we get below +// GC_CACHE_ADAPT_OVERLOAD_THRESHOLD +int cacheAdapt_policy_overload(){ + unsigned int page_index = 0; + VA page_sva = 0; + unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE); + int numchanged = 0; + int * tmp_p = gccachepolicytbl+1; + int workload[NUMCORESACTIVE]; + memset(workload, 0, NUMCORESACTIVE*sizeof(int)); + int core2heavypages[NUMCORESACTIVE][page_num*3+1]; + memset(core2heavypages, 0, sizeof(int)*(page_num*3+1)*NUMCORESACTIVE); + for(page_index = 0; page_index < page_num; page_index++) { + page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index; + bamboo_cache_policy_t policy = {0}; + int hotestcore = 0; + int totalfreq = 0; + int hotfreq = 0; + + for(int i = 0; i < NUMCORESACTIVE; i++) { + int * local_tbl = (int *)((void *)gccachesamplingtbl_r + +page_num*sizeof(float)*i); + int freq = local_tbl[page_index]; + totalfreq += freq; + // TODO + // check the freqency, decide if this page is hot for the core + if(hotfreq < freq) { + hotfreq = freq; + hotestcore = i; + } + } + // TODO + // Decide the cache strategy for this page + // If decide to adapt a new cache strategy, write into the shared block of + // the gcsharedsamplingtbl. The mem recording information that has been + // written is enough to hold the information. + // Format: page start va + cache strategy(hfh/(host core+[x,y])) + if(hotfreq == 0) { + // this page has not been accessed, do not change its cache policy + continue; + } + // locally cache the page in the hotest core + // NOTE: (x,y) should be changed to (x+1, y+1)!!! + policy.cache_mode = BAMBOO_CACHE_MODE_COORDS; + policy.lotar_x = bamboo_cpu2coords[2*hotestcore]+1; + policy.lotar_y = bamboo_cpu2coords[2*hotestcore+1]+1; + *tmp_p = page_index; + tmp_p++; + *tmp_p = policy.word; + tmp_p++; + numchanged++; + workload[hotestcore] += totalfreq; + // insert into core2heavypages using quicksort + int remoteaccess = totalfreq - hotfreq; + int index = core2heavypages[hotestcore][0]; + core2heavypages[hotestcore][3*index+3] = remoteaccess; + core2heavypages[hotestcore][3*index+2] = totalfreq; + core2heavypages[hotestcore][3*index+1] = tmp_p-1; + core2heavypages[hotestcore][0]++; + } + + // Check the workload of each core + for(int i = 0; i < NUMCORESACTIVE; i++) { + int j = 1; + int index = core2heavypages[i][0]; + if(workload[i] > GC_CACHE_ADAPT_OVERLOAD_THRESHOLD) { + // sort according to the remoteaccess + gc_quicksort(&core2heavypages[i][0], 1, index, 0); + while((workload[i] > GC_CACHE_ADAPT_OVERLOAD_THRESHOLD) && (j GC_CACHE_ADAPT_OVERLOAD_THRESHOLD) { + // sort according to the remote access + gc_quicksort(&core2heavypages[i][0], 1, index, 0); + while((workload[i] > GC_CACHE_ADAPT_OVERLOAD_THRESHOLD) && (j threshold) { +inner_crowd: + // need to hfh these pages + // sort the pages according to remote access + gc_quicksort(&core2heavypages[i][0], j/3+1, j/3+num_crowded, 0); + while((num_crowded--) && (j < index*3)) { + // h4h those pages with more remote accesses + bamboo_cache_policy_t policy = {0}; + policy.cache_mode = BAMBOO_CACHE_MODE_HASH; + *((int*)core2heavypages[i][j]) = policy.word; + workload[i] -= core2heavypages[i][j+1]; + t_workload -= core2heavypages[i][j+1]; + if((j/3+GC_CACHE_ADAPT_CROWD_THRESHOLD) < index) { + t_workload += + core2heavypages[i][j+GC_CACHE_ADAPT_CROWD_THRESHOLD*3+1]; + } + j += 3; + threshold = GC_CACHE_ADAPT_ACCESS_THRESHOLD*workload[i]/100; + if(t_workload <= threshold) { + break; + } + } + if((j < index*3) && (t_workload > threshold)) { + num_crowded = ((index-j/3) > GC_CACHE_ADAPT_CROWD_THRESHOLD) ? + (GC_CACHE_ADAPT_CROWD_THRESHOLD) : (index-j/3); + goto inner_crowd; + } + } + } + + return numchanged; +} // int cacheAdapt_policy_overload() + +void cacheAdapt_master() { + // check the statistic data + // for each page, decide the new cache strategy + //int numchanged = cacheAdapt_policy_h4h(); + //int numchanged = cacheAdapt_policy_local(); + //int numchanged = cacheAdapt_policy_hotest(); + //int numchanged = cacheAdapt_policy_dominate(); + int numchanged = cacheAdapt_policy_overload(); + //int numchanged = cacheAdapt_policy_crowd(); *gccachepolicytbl = numchanged; + // TODO + //if(numchanged > 0) tprintf("=================\n"); } // adapt the cache strategy for the mutator @@ -2962,12 +3315,19 @@ void cacheAdapt_mutator() { // read out the policy int page_index = *tmp_p; bamboo_cache_policy_t policy = (bamboo_cache_policy_t)(*(tmp_p+1)); + // TODO + /*if(BAMBOO_NUM_OF_CORE == 0) { + tprintf("va: %x, policy: %d (%d,%d) \n", + (int)(page_index*(BAMBOO_PAGE_SIZE)+gcbaseva), policy.cache_mode, + policy.lotar_x, policy.lotar_y); + }*/ // adapt the policy bamboo_adapt_cache_policy(page_index*(BAMBOO_PAGE_SIZE)+gcbaseva, policy, BAMBOO_PAGE_SIZE); tmp_p += 2; } + //if(BAMBOO_NUM_OF_CORE == 0) tprintf("=================\n"); // TODO } #endif // GC_CACHE_ADAPT diff --git a/Robust/src/Runtime/bamboo/multicoreruntime.h b/Robust/src/Runtime/bamboo/multicoreruntime.h index b86a5c06..85d7e476 100644 --- a/Robust/src/Runtime/bamboo/multicoreruntime.h +++ b/Robust/src/Runtime/bamboo/multicoreruntime.h @@ -295,50 +295,48 @@ struct Queue * totransobjqueue; // queue to hold objs to be transferred #ifdef GC_DEBUG #include "structdefs.h" -#define BAMBOO_NUM_PAGES (NUMCORES4GC*(2+1)+3) +#define BAMBOO_NUM_BLOCKS (NUMCORES4GC*(2+1)+3) #define BAMBOO_PAGE_SIZE (64 * 64) -#define BAMBOO_SMEM_SIZE (64 * 64) // (BAMBOO_PAGE_SIZE) -#define BAMBOO_SHARED_MEM_SIZE ((BAMBOO_PAGE_SIZE) *(BAMBOO_NUM_PAGES)) +#define BAMBOO_SMEM_SIZE (BAMBOO_PAGE_SIZE) +#define BAMBOO_SHARED_MEM_SIZE ((BAMBOO_SMEM_SIZE) *(BAMBOO_NUM_BLOCKS)) #elif defined GC_CACHE_ADAPT -#define BAMBOO_NUM_PAGES ((GC_BAMBOO_NUMCORES)*(2+14)) +#define BAMBOO_NUM_BLOCKS ((GC_BAMBOO_NUMCORES)*(2+14)) +#define BAMBOO_PAGE_SIZE (64 * 1024) // 64K #ifdef GC_LARGEPAGESIZE -#define BAMBOO_PAGE_SIZE (1024 * 1024) // (4096) -#define BAMBOO_SMEM_SIZE (1024 * 1024) +#define BAMBOO_SMEM_SIZE (16 * (BAMBOO_PAGE_SIZE)) #elif defined GC_SMALLPAGESIZE -#define BAMBOO_PAGE_SIZE (64 * 1024) // (4096) -#define BAMBOO_SMEM_SIZE (64 * 1024) +#define BAMBOO_SMEM_SIZE (BAMBOO_PAGE_SIZE) #elif defined GC_SMALLPAGESIZE2 #define BAMBOO_PAGE_SIZE (16 * 1024) // (4096) -#define BAMBOO_SMEM_SIZE (16 * 1024) +#define BAMBOO_SMEM_SIZE (BAMBOO_PAGE_SIZE) #else -#define BAMBOO_PAGE_SIZE (256 * 1024) // (4096) -#define BAMBOO_SMEM_SIZE (256 * 1024) +#define BAMBOO_SMEM_SIZE (4 * (BAMBOO_PAGE_SIZE)) #endif // GC_LARGEPAGESIZE -#define BAMBOO_SHARED_MEM_SIZE ((BAMBOO_PAGE_SIZE) * (BAMBOO_NUM_PAGES)) +#define BAMBOO_SHARED_MEM_SIZE ((BAMBOO_SMEM_SIZE) * (BAMBOO_NUM_BLOCKS)) #else // GC_DEBUG #ifdef GC_LARGESHAREDHEAP -#define BAMBOO_NUM_PAGES ((GC_BAMBOO_NUMCORES)*(2+2)) +#define BAMBOO_NUM_BLOCKS ((GC_BAMBOO_NUMCORES)*(2+2)) #elif defined GC_LARGESHAREDHEAP2 -#define BAMBOO_NUM_PAGES ((GC_BAMBOO_NUMCORES)*(2+2)) +#define BAMBOO_NUM_BLOCKS ((GC_BAMBOO_NUMCORES)*(2+2)) #else -#define BAMBOO_NUM_PAGES ((GC_BAMBOO_NUMCORES)*(2+3)) //(15 * 1024) //(64 * 4 * 0.75) //(1024 * 1024 * 3.5) 3G +#define BAMBOO_NUM_BLOCKS ((GC_BAMBOO_NUMCORES)*(2+3)) //(15 * 1024) //(64 * 4 * 0.75) //(1024 * 1024 * 3.5) 3G #endif #ifdef GC_LARGEPAGESIZE #define BAMBOO_PAGE_SIZE (4 * 1024 * 1024) // (4096) -#define BAMBOO_SMEM_SIZE (4 * 1024 * 1024) +#define BAMBOO_SMEM_SIZE (BAMBOO_PAGE_SIZE) #elif defined GC_SMALLPAGESIZE #define BAMBOO_PAGE_SIZE (256 * 1024) // (4096) -#define BAMBOO_SMEM_SIZE (256 * 1024) +#define BAMBOO_SMEM_SIZE (BAMBOO_PAGE_SIZE) #elif defined GC_SMALLPAGESIZE2 #define BAMBOO_PAGE_SIZE (64 * 1024) // (4096) -#define BAMBOO_SMEM_SIZE (64 * 1024) +#define BAMBOO_SMEM_SIZE (BAMBOO_PAGE_SIZE) #else #define BAMBOO_PAGE_SIZE (1024 * 1024) // (4096) -#define BAMBOO_SMEM_SIZE (1024 * 1024) +#define BAMBOO_SMEM_SIZE (BAMBOO_PAGE_SIZE) #endif // GC_LARGEPAGESIZE -#define BAMBOO_SHARED_MEM_SIZE ((BAMBOO_PAGE_SIZE) * (BAMBOO_NUM_PAGES)) //(1024 * 1024 * 240) +#define BAMBOO_SHARED_MEM_SIZE ((BAMBOO_SMEM_SIZE) * (BAMBOO_NUM_BLOCKS)) //(1024 * 1024 * 240) //((unsigned long long int)(3.0 * 1024 * 1024 * 1024)) // 3G #endif // GC_DEBUG