2 #include "multicorecache.h"
4 typedef struct gc_cache_revise_info {
5 unsigned int orig_page_start_va;
6 unsigned int orig_page_end_va;
7 unsigned int orig_page_index;
8 unsigned int to_page_start_va;
9 unsigned int to_page_end_va;
10 unsigned int to_page_index;
11 unsigned int revised_sampling[NUMCORESACTIVE];
12 } gc_cache_revise_info_t;
13 gc_cache_revise_info_t gc_cache_revise_infomation;
15 // prepare for cache adaption:
16 // -- flush the shared heap
17 // -- clean dtlb entries
18 // -- change cache strategy
19 void cacheAdapt_gc(bool isgccachestage) {
20 // flush the shared heap
21 BAMBOO_CACHE_FLUSH_L2();
23 // clean the dtlb entries
26 // change the cache strategy
27 gccachestage = isgccachestage;
30 // the master core decides how to adapt cache strategy for the mutator
31 // according to collected statistic data
33 // compute the start address of page #page_index
34 #define CACHEADAPT_PAGE_START_ADDRESS(page_index) \
35 (gcbaseva + (BAMBOO_PAGE_SIZE) * (page_index))
36 // find the core that accesses the page #page_index most
37 #define CACHEADAPT_FIND_HOTEST_CORE(page_index,hotestcore,hotfreq) \
39 int *local_tbl=&gccachesamplingtbl_r[page_index]; \
40 for(int i = 0; i < NUMCORESACTIVE; i++) { \
41 int freq = *local_tbl; \
42 local_tbl=(int *)(((char *)local_tbl)+size_cachesamplingtbl_local_r); \
43 if(*((unsigned int)(hotfreq)) < freq) { \
44 *((unsigned int)(hotfreq)) = freq; \
45 *((unsigned int)(hotestcore)) = i; \
49 // find the core that accesses the page #page_index most and comput the total
50 // access time of the page at the same time
51 #define CACHEADAPT_FIND_HOTEST_CORE_W_TOTALFREQ(page_index,hotestcore,hotfreq,totalfreq) \
53 int *local_tbl=&gccachesamplingtbl_r[page_index]; \
54 for(int i = 0; i < NUMCORESACTIVE; i++) { \
55 int freq = *local_tbl; \
56 local_tbl=(int *)(((char *)local_tbl)+size_cachesamplingtbl_local_r); \
57 *((unsigned int)(totalfreq)) = *((unsigned int)(totalfreq)) + freq; \
58 if(*((unsigned int)(hotfreq)) < freq) { \
59 *((unsigned int)(hotfreq)) = freq; \
60 *((unsigned int)(hotestcore)) = i; \
64 // Set the policy as hosted by coren
65 // NOTE: (x,y) should be changed to (x+1, y+1)!!!
66 #define CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren) \
68 (policy).cache_mode = BAMBOO_CACHE_MODE_COORDS; \
69 (policy).lotar_x = bamboo_cpu2coords[2*(coren)]+1; \
70 (policy).lotar_y = bamboo_cpu2coords[2*(coren)+1]+1; \
72 // store the new policy information at tmp_p in gccachepolicytbl
73 #define CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy,numchanged) \
75 *((int*)(tmp_p)) = (page_index); \
77 *((int*)(tmp_p)) = (policy).word; \
83 int cacheAdapt_policy_h4h(){
84 unsigned int page_index = 0;
86 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
87 unsigned int numchanged = 0;
88 int * tmp_p = gccachepolicytbl+1;
89 for(page_index = 0; page_index < page_num; page_index++) {
90 page_sva = CACHEADAPT_PAGE_START_ADDRESS(page_index);
91 bamboo_cache_policy_t policy = {0};
92 policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
93 CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy,numchanged);
99 // make all pages local as non-cache-adaptable gc local mode
100 int cacheAdapt_policy_local(){
101 unsigned int page_index = 0;
103 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
104 unsigned int numchanged = 0;
105 int * tmp_p = gccachepolicytbl+1;
106 for(page_index = 0; page_index < page_num; page_index++) {
107 page_sva = CACHEADAPT_PAGE_START_ADDRESS(page_index);
108 bamboo_cache_policy_t policy = {0};
109 unsigned int block = 0;
110 BLOCKINDEX(page_sva, &block);
111 unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
112 CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren);
113 CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy,numchanged);
119 int cacheAdapt_policy_hotest(){
120 unsigned int page_index = 0;
122 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
123 unsigned int numchanged = 0;
124 int * tmp_p = gccachepolicytbl+1;
125 for(page_index = 0; page_index < page_num; page_index++) {
126 page_sva = CACHEADAPT_PAGE_START_ADDRESS(page_index);
127 bamboo_cache_policy_t policy = {0};
128 unsigned int hotestcore = 0;
129 unsigned int hotfreq = 0;
130 CACHEADAPT_FIND_HOTEST_CORE(page_index,&hotestcore,&hotfreq);
132 // Decide the cache strategy for this page
133 // If decide to adapt a new cache strategy, write into the shared block of
134 // the gcsharedsamplingtbl. The mem recording information that has been
135 // written is enough to hold the information.
136 // Format: page start va + cache strategy(hfh/(host core+[x,y]))
138 // this page has not been accessed, do not change its cache policy
141 // locally cache the page in the hotest core
142 CACHEADAPT_POLICY_SET_HOST_CORE(policy, hotestcore);
143 CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy,numchanged);
150 #define GC_CACHE_ADAPT_DOMINATE_THRESHOLD 50
151 // cache the page on the core that accesses it the most if that core accesses
152 // it more than (GC_CACHE_ADAPT_DOMINATE_THRESHOLD)% of the total. Otherwise,
154 int cacheAdapt_policy_dominate(){
155 unsigned int page_index = 0;
157 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
158 unsigned int numchanged = 0;
159 int * tmp_p = gccachepolicytbl+1;
160 for(page_index = 0; page_index < page_num; page_index++) {
161 page_sva = CACHEADAPT_PAGE_START_ADDRESS(page_index);
162 bamboo_cache_policy_t policy = {0};
163 unsigned int hotestcore = 0;
164 unsigned long long totalfreq = 0;
165 unsigned int hotfreq = 0;
166 CACHEADAPT_FIND_HOTEST_CORE_W_TOTALFREQ(page_index,&hotestcore,&hotfreq,&totalfreq);
167 // Decide the cache strategy for this page
168 // If decide to adapt a new cache strategy, write into the shared block of
170 // Format: page start va + cache policy
172 // this page has not been accessed, do not change its cache policy
175 totalfreq=(totalfreq*GC_CACHE_ADAPT_DOMINATE_THRESHOLD)/100/BAMBOO_PAGE_SIZE;
176 hotfreq/=BAMBOO_PAGE_SIZE;
177 if(hotfreq < totalfreq) {
179 policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
181 // locally cache the page in the hotest core
182 CACHEADAPT_POLICY_SET_HOST_CORE(policy, hotestcore);
184 CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy,numchanged);
190 #define GC_CACHE_ADAPT_OVERLOAD_THRESHOLD 10
191 // record the worklocad of the hotestcore into core2heavypages
192 #define CACHEADAPT_RECORD_PAGE_WORKLOAD(hotestcore,totalfreq,hotfreq,remoteaccess,tmp_p) \
194 workload[hotestcore] += (totalfreq); \
195 total_workload += (totalfreq); \
196 unsigned long long remoteaccess = (totalfreq) - (hotfreq); \
197 unsigned int index = (unsigned int)core2heavypages[hotestcore][0]; \
198 core2heavypages[hotestcore][3*index+3] = (remoteaccess); \
199 core2heavypages[hotestcore][3*index+2] = (totalfreq); \
200 core2heavypages[hotestcore][3*index+1] = (unsigned long long)((tmp_p)-1); \
201 core2heavypages[hotestcore][0]++; \
204 void gc_quicksort(unsigned long long *array,unsigned int left,unsigned int right,unsigned int offset) {
205 unsigned int pivot = 0;;
206 unsigned int leftIdx = left;
207 unsigned int rightIdx = right;
208 if((right-left+1) >= 1) {
209 pivot = (left+right)/2;
210 while((leftIdx <= pivot) && (rightIdx >= pivot)) {
211 unsigned long long pivotValue = array[pivot*3-offset];
212 while((array[leftIdx*3-offset] > pivotValue) && (leftIdx <= pivot)) {
215 while((array[rightIdx*3-offset] < pivotValue) && (rightIdx >= pivot)) {
218 // swap [leftIdx] & [rightIdx]
219 for(int k = 0; k < 3; k++) {
220 unsigned long long tmp = array[3*rightIdx-k];
221 array[3*rightIdx-k] = array[3*leftIdx-k];
222 array[3*leftIdx-k] = tmp;
226 if((leftIdx-1) == pivot) {
227 pivot = rightIdx = rightIdx + 1;
228 } else if((leftIdx+1) == pivot) {
229 pivot = leftIdx = leftIdx-1;
232 gc_quicksort(array, left, pivot-1, offset);
233 gc_quicksort(array, pivot+1, right, offset);
238 INLINE void cacheAdapt_h4h_remote_accesses(unsigned long long workload_threshold,unsigned long long ** core2heavypages, unsigned long long * workload,int i) {
240 unsigned int index = (unsigned int)core2heavypages[i][0];
241 if(workload[i] > workload_threshold) {
242 // sort according to the remoteaccess
243 gc_quicksort(&core2heavypages[i][0], 1, index, 0);
244 while((workload[i] > workload_threshold) && (j<index*3)) {
245 // hfh those pages with more remote accesses
246 bamboo_cache_policy_t policy = {0};
247 policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
248 *((unsigned int*)core2heavypages[i][j]) = policy.word;
249 workload[i] -= core2heavypages[i][j+1];
255 // Every page cached on the core that accesses it the most.
256 // Check to see if any core's pages total more accesses than threshold
257 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD. If so, find the pages with the
258 // most remote accesses and hash for home them until we get below
259 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD
260 int cacheAdapt_policy_overload(){
261 unsigned int page_index = 0;
263 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
264 unsigned int numchanged = 0;
265 int * tmp_p = gccachepolicytbl+1;
266 unsigned long long workload[NUMCORESACTIVE];
267 memset(workload, 0, NUMCORESACTIVE*sizeof(unsigned long long));
268 unsigned long long total_workload = 0;
269 unsigned long long core2heavypages[NUMCORESACTIVE][page_num*3+1];
270 memset(core2heavypages,0,sizeof(unsigned long long)*(page_num*3+1)*NUMCORESACTIVE);
271 for(page_index = 0; page_index < page_num; page_index++) {
272 page_sva = CACHEADAPT_PAGE_START_ADDRESS(page_index);
273 bamboo_cache_policy_t policy = {0};
274 unsigned int hotestcore = 0;
275 unsigned long long totalfreq = 0;
276 unsigned int hotfreq = 0;
277 CACHEADAPT_FIND_HOTEST_CORE_W_TOTALFREQ(page_index,&hotestcore,&hotfreq,&totalfreq);
278 // Decide the cache strategy for this page
279 // If decide to adapt a new cache strategy, write into the shared block of
280 // the gcsharedsamplingtbl. The mem recording information that has been
281 // written is enough to hold the information.
282 // Format: page start va + cache strategy(hfh/(host core+[x,y]))
284 // this page has not been accessed, do not change its cache policy
288 totalfreq/=BAMBOO_PAGE_SIZE;
289 hotfreq/=BAMBOO_PAGE_SIZE;
290 // locally cache the page in the hotest core
291 CACHEADAPT_POLICY_SET_HOST_CORE(policy, hotestcore);
292 CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy,numchanged);
293 CACHEADAPT_RECORD_PAGE_WORKLOAD(hotestcore,totalfreq,hotfreq,remoteaccess,tmp_p);
296 unsigned long long workload_threshold=total_workload/GC_CACHE_ADAPT_OVERLOAD_THRESHOLD;
297 // Check the workload of each core
298 for(int i = 0; i < NUMCORESACTIVE; i++) {
299 cacheAdapt_h4h_remote_accesses(workload_threshold,core2heavypages,workload,i);
305 #define GC_CACHE_ADAPT_ACCESS_THRESHOLD 70
306 #define GC_CACHE_ADAPT_CROWD_THRESHOLD 20
307 // Every page cached on the core that accesses it the most.
308 // Check to see if any core's pages total more accesses than threshold
309 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD. If so, find the pages with the
310 // most remote accesses and hash for home them until we get below
311 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD.
312 // Sort pages based on activity....
313 // If more then GC_CACHE_ADAPT_ACCESS_THRESHOLD% of the accesses for a
314 // core's pages are from more than GC_CACHE_ADAPT_CROWD_THRESHOLD pages,
315 // then start hfh these pages(selecting the ones with the most remote
316 // accesses first or fewest local accesses) until we get below
317 // GC_CACHE_ADAPT_CROWD_THRESHOLD pages.
318 int cacheAdapt_policy_crowd(){
319 unsigned int page_index = 0;
321 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
322 unsigned int numchanged = 0;
323 int * tmp_p = gccachepolicytbl+1;
324 unsigned long long workload[NUMCORESACTIVE];
325 memset(workload, 0, NUMCORESACTIVE*sizeof(unsigned long long));
326 unsigned long long total_workload = 0;
327 unsigned long long core2heavypages[NUMCORESACTIVE][page_num*3+1];
328 memset(core2heavypages,0,sizeof(unsigned long long)*(page_num*3+1)*NUMCORESACTIVE);
329 for(page_index = 0; page_index < page_num; page_index++) {
330 page_sva = CACHEADAPT_PAGE_START_ADDRESS(page_index);
331 bamboo_cache_policy_t policy = {0};
332 unsigned int hotestcore = 0;
333 unsigned long long totalfreq = 0;
334 unsigned int hotfreq = 0;
335 CACHEADAPT_FIND_HOTEST_CORE_W_TOTALFREQ(page_index,&hotestcore,&hotfreq,&totalfreq);
336 // Decide the cache strategy for this page
337 // If decide to adapt a new cache strategy, write into the shared block of
338 // the gcsharedsamplingtbl. The mem recording information that has been
339 // written is enough to hold the information.
340 // Format: page start va + cache strategy(hfh/(host core+[x,y]))
342 // this page has not been accessed, do not change its cache policy
345 totalfreq/=BAMBOO_PAGE_SIZE;
346 hotfreq/=BAMBOO_PAGE_SIZE;
347 // locally cache the page in the hotest core
348 CACHEADAPT_POLICY_SET_HOST_CORE(policy, hotestcore);
349 CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy,numchanged);
350 CACHEADAPT_RECORD_PAGE_WORKLOAD(hotestcore,totalfreq,hotfreq,remoteaccess,tmp_p);
353 unsigned long long workload_threshold=total_workload/GC_CACHE_ADAPT_OVERLOAD_THRESHOLD;
354 // Check the workload of each core
355 for(int i = 0; i < NUMCORESACTIVE; i++) {
356 cacheAdapt_h4h_remote_accesses(workload_threshold,core2heavypages,workload,i);
357 // Check if the accesses are crowded on few pages
358 // sort according to the total access
360 gc_quicksort(&core2heavypages[i][0], j/3+1, index, 1);
361 unsigned long long threshold=GC_CACHE_ADAPT_ACCESS_THRESHOLD*workload[i]/100;
363 unsigned long long t_workload = 0;
365 t_workload += core2heavypages[i][j+num_crowded*3+1];
367 } while(t_workload < threshold);
368 // num_crowded <= GC_CACHE_ADAPT_CROWD_THRESHOLD and if there are enough
369 // items, it is always == GC_CACHE_ADAPT_CROWD_THRESHOLD
370 if(num_crowded > GC_CACHE_ADAPT_CROWD_THRESHOLD) {
371 // need to hfh these pages
372 // sort the pages according to remote access
373 gc_quicksort(&core2heavypages[i][0], j/3+1, j/3+num_crowded, 0);
374 // h4h those pages with more remote accesses
375 bamboo_cache_policy_t policy = {0};
376 policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
377 *((unsigned int*)core2heavypages[i][j]) = policy.word;
378 workload[i] -= core2heavypages[i][j+1];
379 t_workload -= core2heavypages[i][j+1];
381 threshold = GC_CACHE_ADAPT_ACCESS_THRESHOLD*workload[i]/100;
389 void cacheAdapt_master() {
390 CACHEADAPT_OUTPUT_CACHE_SAMPLING_R();
391 unsigned int numchanged = 0;
392 // check the statistic data
393 // for each page, decide the new cache strategy
394 #ifdef GC_CACHE_ADAPT_POLICY1
395 numchanged = cacheAdapt_policy_h4h();
396 #elif defined GC_CACHE_ADAPT_POLICY2
397 numchanged = cacheAdapt_policy_local();
398 #elif defined GC_CACHE_ADAPT_POLICY3
399 numchanged = cacheAdapt_policy_hotest();
400 #elif defined GC_CACHE_ADAPT_POLICY4
401 numchanged = cacheAdapt_policy_dominate();
402 #elif defined GC_CACHE_ADAPT_POLICY5
403 numchanged = cacheAdapt_policy_overload();
404 #elif defined GC_CACHE_ADAPT_POLICY6
405 numchanged = cacheAdapt_policy_crowd();
407 *gccachepolicytbl = numchanged;
410 // adapt the cache strategy for the mutator
411 void cacheAdapt_mutator() {
412 int numchanged = *gccachepolicytbl;
413 // check the changes and adapt them
414 int * tmp_p = gccachepolicytbl+1;
415 while(numchanged--) {
416 // read out the policy
417 int page_index = *tmp_p;
418 bamboo_cache_policy_t policy = (bamboo_cache_policy_t)(*(tmp_p+1));
420 bamboo_adapt_cache_policy(page_index*(BAMBOO_PAGE_SIZE)+gcbaseva,policy,BAMBOO_PAGE_SIZE);
425 void cacheAdapt_phase_client() {
426 WAITFORGCPHASE(PREFINISHPHASE);
428 GC_PRINTF("Start prefinish phase\n");
430 cacheAdapt_mutator();
431 cacheAdapt_gc(false);
432 //send init finish msg to core coordinator
433 send_msg_2(STARTUPCORE, GCFINISHPREF, BAMBOO_NUM_OF_CORE);
434 GC_PRINTF("Finish prefinish phase\n");
435 CACHEADAPT_SAMPING_RESET();
436 if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
437 // zero out the gccachesamplingtbl
438 BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
439 BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,size_cachesamplingtbl_local_r);
443 void cacheAdapt_phase_master() {
445 gcphase = PREFINISHPHASE;
446 // Note: all cores should flush their runtime data including non-gc cores
447 GC_SEND_MSG_1_TO_CLIENT(GCSTARTPREF);
448 GC_PRINTF("Start prefinish phase \n");
450 cacheAdapt_mutator();
451 CACHEADPAT_OUTPUT_CACHE_POLICY();
452 cacheAdapt_gc(false);
454 GC_CHECK_ALL_CORE_STATUS(PREFINISHPHASE == gcphase);
456 CACHEADAPT_SAMPING_RESET();
457 if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
458 // zero out the gccachesamplingtbl
459 BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
460 BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,size_cachesamplingtbl_local_r);
461 BAMBOO_MEMSET_WH(gccachepolicytbl,0,size_cachepolicytbl);
465 void gc_output_cache_sampling() {
466 unsigned int page_index = 0;
468 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
469 for(page_index = 0; page_index < page_num; page_index++) {
470 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
471 unsigned int block = 0;
472 BLOCKINDEX(page_sva, &block);
473 unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
474 tprintf("va: %x page_index: %d host: %d\n",(int)page_sva,page_index,coren);
475 for(int i = 0; i < NUMCORESACTIVE; i++) {
476 int * local_tbl = (int *)((void *)gccachesamplingtbl+size_cachesamplingtbl_local*i);
477 int freq = local_tbl[page_index];
482 printf("=================\n");
485 void gc_output_cache_sampling_r() {
486 unsigned int page_index = 0;
488 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
489 for(page_index = 0; page_index < page_num; page_index++) {
490 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
491 unsigned int block = 0;
492 BLOCKINDEX(page_sva, &block);
493 unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
494 tprintf("va: %x page_index: %d host: %d\n",(int)page_sva,page_index,coren);
495 for(int i = 0; i < NUMCORESACTIVE; i++) {
496 int * local_tbl = (int *)((void *)gccachesamplingtbl_r+size_cachesamplingtbl_local_r*i);
497 int freq = local_tbl[page_index]/BAMBOO_PAGE_SIZE;
503 printf("=================\n");
505 #endif // GC_CACHE_ADAPT