Bug fix for cache adaption
[IRC.git] / Robust / src / Runtime / bamboo / multicorecache.c
1 #ifdef GC_CACHE_ADAPT
2 #include "multicorecache.h"
3 #include "multicoremsg.h"
4 #include "multicoregcprofile.h"
5
6 gc_cache_revise_info_t gc_cache_revise_information;
7
8 // prepare for cache adaption:
9 //   -- flush the shared heap
10 //   -- clean dtlb entries
11 //   -- change cache strategy
12 void cacheAdapt_gc(bool isgccachestage) {
13   // flush the shared heap
14   BAMBOO_CACHE_FLUSH_L2();
15
16   // clean the dtlb entries
17   BAMBOO_CLEAN_DTLB();
18
19   // change the cache strategy
20   gccachestage = isgccachestage;
21
22
23 // the master core decides how to adapt cache strategy for the mutator 
24 // according to collected statistic data
25
26 // find the core that accesses the page #page_index most
27 #define CACHEADAPT_FIND_HOTTEST_CORE(page_index,hottestcore,hotfreq) \
28   { \
29     int *local_tbl=&gccachesamplingtbl_r[page_index]; \
30     for(int i = 0; i < NUMCORESACTIVE; i++) { \
31       int freq = *local_tbl; \
32       local_tbl=(int *)(((void *)local_tbl)+size_cachesamplingtbl_local_r); \
33       if(hotfreq < freq) { \
34         hotfreq = freq; \
35         hottestcore = i; \
36       } \
37     } \
38   }
39 // find the core that accesses the page #page_index most and comput the total
40 // access time of the page at the same time
41 #define CACHEADAPT_FIND_HOTTEST_CORE_W_TOTALFREQ(page_index,hottestcore,hotfreq,totalfreq) \
42   { \
43     int *local_tbl=&gccachesamplingtbl_r[page_index]; \
44     for(int i = 0; i < NUMCORESACTIVE; i++) { \
45       int freq = *local_tbl; \
46       local_tbl=(int *)(((void *)local_tbl)+size_cachesamplingtbl_local_r); \
47       totalfreq += freq; \
48       if(hotfreq < freq) { \
49         hotfreq = freq; \
50         hottestcore = i; \
51       } \
52     } \
53   }
54 // Set the policy as hosted by coren
55 // NOTE: (x,y) should be changed to (x+1, y+1)!!!
56 #define CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren) \
57   { \
58     (policy).cache_mode = BAMBOO_CACHE_MODE_COORDS; \    
59     (policy).lotar_x = bamboo_cpu2coords[2*(coren)]+1; \
60     (policy).lotar_y = bamboo_cpu2coords[2*(coren)+1]+1; \
61   }
62 // store the new policy information at tmp_p in gccachepolicytbl
63 #define CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy) \
64   { \
65     ((int*)(tmp_p))[page_index] = (policy).word; \
66   }
67
68 // make all pages hfh
69 void cacheAdapt_policy_h4h(int coren){
70   unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)/(BAMBOO_PAGE_SIZE);
71   unsigned int page_gap=page_num/NUMCORESACTIVE;
72   unsigned int page_index=page_gap*coren;
73   unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
74   VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
75   int * tmp_p = gccachepolicytbl;
76   for(; page_index < page_index_end; page_index++) {
77     bamboo_cache_policy_t policy = {0};
78     policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
79     CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
80     page_sva += BAMBOO_PAGE_SIZE;
81   }
82
83
84 // make all pages local as non-cache-adaptable gc local mode
85 void cacheAdapt_policy_local(int coren){
86   unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)/(BAMBOO_PAGE_SIZE);
87   unsigned int page_gap=page_num/NUMCORESACTIVE;
88   unsigned int page_index=page_gap*coren;
89   unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
90   VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
91   int * tmp_p = gccachepolicytbl;
92   for(; page_index < page_index_end; page_index++) {
93     bamboo_cache_policy_t policy = {0};
94     unsigned int block = 0;
95     BLOCKINDEX(block, (void *) page_sva);
96     unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
97     CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren);
98     CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
99     page_sva += BAMBOO_PAGE_SIZE;
100   }
101
102
103 void cacheAdapt_policy_hottest(int coren){
104   unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)/(BAMBOO_PAGE_SIZE);
105   unsigned int page_gap=page_num/NUMCORESACTIVE;
106   unsigned int page_index=page_gap*coren;
107   unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
108   VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
109   int * tmp_p = gccachepolicytbl;
110   for(; page_index < page_index_end; page_index++) {
111     bamboo_cache_policy_t policy = {0};
112     unsigned int hottestcore = 0;
113     unsigned int hotfreq = 0;
114     CACHEADAPT_FIND_HOTTEST_CORE(page_index,hottestcore,hotfreq);
115     // TODO
116     // Decide the cache strategy for this page
117     // If decide to adapt a new cache strategy, write into the shared block of
118     // the gcsharedsamplingtbl. The mem recording information that has been 
119     // written is enough to hold the information.
120     // Format: page start va + cache strategy(hfh/(host core+[x,y]))
121     if(hotfreq != 0) {
122       // locally cache the page in the hottest core
123       CACHEADAPT_POLICY_SET_HOST_CORE(policy, hottestcore);
124     }
125     CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
126     page_sva += BAMBOO_PAGE_SIZE;
127   }
128
129
130 #define GC_CACHE_ADAPT_DOMINATE_THRESHOLD  1
131 // cache the page on the core that accesses it the most if that core accesses 
132 // it more than (GC_CACHE_ADAPT_DOMINATE_THRESHOLD)% of the total.  Otherwise,
133 // h4h the page.
134 void cacheAdapt_policy_dominate(int coren){
135   unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)/(BAMBOO_PAGE_SIZE);
136   unsigned int page_gap=page_num/NUMCORESACTIVE;
137   unsigned int page_index=page_gap*coren;
138   unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
139   VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
140   int * tmp_p = gccachepolicytbl;
141   for(; page_index < page_index_end; page_index++) {
142     bamboo_cache_policy_t policy = {0};
143     unsigned int hottestcore = 0;
144     unsigned int totalfreq = 0;
145     unsigned int hotfreq = 0;
146     CACHEADAPT_FIND_HOTTEST_CORE_W_TOTALFREQ(page_index,hottestcore,hotfreq,totalfreq);
147     // Decide the cache strategy for this page
148     // If decide to adapt a new cache strategy, write into the shared block of
149     // the gcpolicytbl 
150     // Format: page start va + cache policy
151     if(hotfreq != 0) {
152       totalfreq=totalfreq>>GC_CACHE_ADAPT_DOMINATE_THRESHOLD;
153       if((unsigned int)hotfreq < (unsigned int)totalfreq) {
154         // use hfh
155         //policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
156         unsigned int block = 0;
157         BLOCKINDEX(page_sva, &block);
158         unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
159         CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren);
160       } else {
161         // locally cache the page in the hottest core
162         CACHEADAPT_POLICY_SET_HOST_CORE(policy, hottestcore);
163       }     
164     }
165     CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
166     page_sva += BAMBOO_PAGE_SIZE;
167   }
168 }
169
170 #if 0
171 #define GC_CACHE_ADAPT_OVERLOAD_THRESHOLD 10
172 // record the worklocad of the hottestcore into core2heavypages
173 #define CACHEADAPT_RECORD_PAGE_WORKLOAD(hottestcore,totalfreq,hotfreq,remoteaccess,tmp_p) \
174   { \
175     workload[hottestcore] += (totalfreq); \
176     total_workload += (totalfreq); \
177     unsigned long long remoteaccess = (totalfreq) - (hotfreq); \
178     unsigned int index = (unsigned int)core2heavypages[hottestcore][0]; \
179     core2heavypages[hottestcore][3*index+3] = (remoteaccess); \
180     core2heavypages[hottestcore][3*index+2] = (totalfreq); \
181     core2heavypages[hottestcore][3*index+1] = (unsigned long long)((tmp_p)-1); \
182     core2heavypages[hottestcore][0]++; \
183   }
184
185 void gc_quicksort(unsigned long long *array,unsigned int left,unsigned int right,unsigned int offset) {
186   unsigned int pivot = 0;;
187   unsigned int leftIdx = left;
188   unsigned int rightIdx = right;
189   if((right-left+1) >= 1) {
190     pivot = (left+right)/2;
191     while((leftIdx <= pivot) && (rightIdx >= pivot)) {
192       unsigned long long pivotValue = array[pivot*3-offset];
193       while((array[leftIdx*3-offset] > pivotValue) && (leftIdx <= pivot)) {
194         leftIdx++;
195       }
196       while((array[rightIdx*3-offset] < pivotValue) && (rightIdx >= pivot)) {
197         rightIdx--;
198       }
199       // swap [leftIdx] & [rightIdx]
200       for(int k = 0; k < 3; k++) {
201         unsigned long long tmp = array[3*rightIdx-k];
202         array[3*rightIdx-k] = array[3*leftIdx-k];
203         array[3*leftIdx-k] = tmp;
204       }
205       leftIdx++;
206       rightIdx--;
207       if((leftIdx-1) == pivot) {
208         pivot = rightIdx = rightIdx + 1;
209       } else if((leftIdx+1) == pivot) {
210         pivot = leftIdx = leftIdx-1;
211       }
212     }
213     gc_quicksort(array, left, pivot-1, offset);
214     gc_quicksort(array, pivot+1, right, offset);
215   }
216   return;
217 }
218
219 INLINE int cacheAdapt_h4h_remote_accesses(unsigned long long workload_threshold,unsigned long long ** core2heavypages, unsigned long long * workload,int i) {
220   int j = 1;
221   unsigned int index = (unsigned int)core2heavypages[i][0];
222   if(workload[i] > workload_threshold) {
223     // sort according to the remoteaccess
224     gc_quicksort(&core2heavypages[i][0], 1, index, 0);
225     while((workload[i] > workload_threshold) && (j<index*3)) {
226       // hfh those pages with more remote accesses 
227       bamboo_cache_policy_t policy = {0};
228       policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
229       *((unsigned int*)core2heavypages[i][j]) = policy.word;
230       workload[i] -= core2heavypages[i][j+1];
231       j += 3;
232     }
233   }
234   return j;
235 }
236
237 // Every page cached on the core that accesses it the most. 
238 // Check to see if any core's pages total more accesses than threshold 
239 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD.  If so, find the pages with the 
240 // most remote accesses and hash for home them until we get below 
241 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD
242 int cacheAdapt_policy_overload(int coren){
243   unsigned int page_index = 0;
244   VA page_sva = gcbaseva;
245   unsigned int page_num = BAMBOO_SHARED_MEM_SIZE/BAMBOO_PAGE_SIZE;
246   unsigned int numchanged = 0;
247   int * tmp_p = gccachepolicytbl+1;
248   unsigned long long workload[NUMCORESACTIVE];
249   memset(workload, 0, NUMCORESACTIVE*sizeof(unsigned long long));
250   unsigned long long total_workload = 0;
251   unsigned long long core2heavypages[NUMCORESACTIVE][page_num*3+1];
252   memset(core2heavypages,0,sizeof(unsigned long long)*(page_num*3+1)*NUMCORESACTIVE);
253   for(page_index = 0; page_sva < gctopva; page_index++) {
254     bamboo_cache_policy_t policy = {0};
255     unsigned int hottestcore = 0;
256     unsigned long long totalfreq = 0;
257     unsigned int hotfreq = 0;
258     CACHEADAPT_FIND_HOTTEST_CORE_W_TOTALFREQ(page_index,hottestcore,hotfreq,totalfreq);
259     // Decide the cache strategy for this page
260     // If decide to adapt a new cache strategy, write into the shared block of
261     // the gcsharedsamplingtbl. The mem recording information that has been 
262     // written is enough to hold the information.
263     // Format: page start va + cache strategy(hfh/(host core+[x,y]))
264     if(hotfreq != 0) {
265       totalfreq/=BAMBOO_PAGE_SIZE;
266       hotfreq/=BAMBOO_PAGE_SIZE;
267       // locally cache the page in the hottest core
268       CACHEADAPT_POLICY_SET_HOST_CORE(policy, hottestcore);
269       CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy,numchanged);
270       CACHEADAPT_RECORD_PAGE_WORKLOAD(hottestcore,totalfreq,hotfreq,remoteaccess,tmp_p);    
271     }
272     page_sva += BAMBOO_PAGE_SIZE;
273   }
274
275   unsigned long long workload_threshold=total_workload/GC_CACHE_ADAPT_OVERLOAD_THRESHOLD;
276   // Check the workload of each core
277   for(int i = 0; i < NUMCORESACTIVE; i++) {
278     cacheAdapt_h4h_remote_accesses(workload_threshold,core2heavypages,workload,i);
279   }
280
281   return numchanged;
282 }
283
284 #define GC_CACHE_ADAPT_ACCESS_THRESHOLD 70
285 #define GC_CACHE_ADAPT_CROWD_THRESHOLD  20
286 // Every page cached on the core that accesses it the most. 
287 // Check to see if any core's pages total more accesses than threshold 
288 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD.  If so, find the pages with the 
289 // most remote accesses and hash for home them until we get below 
290 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD.  
291 // Sort pages based on activity.... 
292 // If more then GC_CACHE_ADAPT_ACCESS_THRESHOLD% of the accesses for a
293 // core's pages are from more than GC_CACHE_ADAPT_CROWD_THRESHOLD pages, 
294 // then start hfh these pages(selecting the ones with the most remote 
295 // accesses first or fewest local accesses) until we get below 
296 // GC_CACHE_ADAPT_CROWD_THRESHOLD pages.
297 int cacheAdapt_policy_crowd(int coren){
298   unsigned int page_index = 0;
299   VA page_sva = gcbaseva;
300   unsigned int page_num = BAMBOO_SHARED_MEM_SIZE/BAMBOO_PAGE_SIZE;
301   unsigned int numchanged = 0;
302   int * tmp_p = gccachepolicytbl+1;
303   unsigned long long workload[NUMCORESACTIVE];
304   memset(workload, 0, NUMCORESACTIVE*sizeof(unsigned long long));
305   unsigned long long total_workload = 0;
306   unsigned long long core2heavypages[NUMCORESACTIVE][page_num*3+1];
307   memset(core2heavypages,0,sizeof(unsigned long long)*(page_num*3+1)*NUMCORESACTIVE);
308   for(page_index = 0; page_sva < gctopva; page_index++) {
309     bamboo_cache_policy_t policy = {0};
310     unsigned int hottestcore = 0;
311     unsigned long long totalfreq = 0;
312     unsigned int hotfreq = 0;
313     CACHEADAPT_FIND_HOTTEST_CORE_W_TOTALFREQ(page_index,hottestcore,hotfreq,totalfreq);
314     // Decide the cache strategy for this page
315     // If decide to adapt a new cache strategy, write into the shared block of
316     // the gcsharedsamplingtbl. The mem recording information that has been 
317     // written is enough to hold the information.
318     // Format: page start va + cache strategy(hfh/(host core+[x,y]))
319     if(hotfreq != 0) {
320       totalfreq/=BAMBOO_PAGE_SIZE;
321       hotfreq/=BAMBOO_PAGE_SIZE;
322       // locally cache the page in the hottest core
323       CACHEADAPT_POLICY_SET_HOST_CORE(policy, hottestcore);
324       CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy,numchanged);
325       CACHEADAPT_RECORD_PAGE_WORKLOAD(hottestcore,totalfreq,hotfreq,remoteaccess,tmp_p);
326     }
327     page_sva += BAMBOO_PAGE_SIZE;
328   }
329
330   unsigned long long workload_threshold=total_workload/GC_CACHE_ADAPT_OVERLOAD_THRESHOLD;
331   // Check the workload of each core
332   for(int i = 0; i < NUMCORESACTIVE; i++) {
333     unsigned int index=(unsigned int)core2heavypages[i][0];
334     int j=cacheAdapt_h4h_remote_accesses(workload_threshold,core2heavypages,workload,i);
335     // Check if the accesses are crowded on few pages
336     // sort according to the total access
337 inner_crowd:
338     gc_quicksort(&core2heavypages[i][0], j/3+1, index, 1);
339     unsigned long long threshold=GC_CACHE_ADAPT_ACCESS_THRESHOLD*workload[i]/100;
340     int num_crowded = 0;
341     unsigned long long t_workload = 0;
342     do {
343       t_workload += core2heavypages[i][j+num_crowded*3+1];
344       num_crowded++;
345     } while(t_workload < threshold);
346     // num_crowded <= GC_CACHE_ADAPT_CROWD_THRESHOLD and if there are enough 
347     // items, it is always == GC_CACHE_ADAPT_CROWD_THRESHOLD
348     if(num_crowded > GC_CACHE_ADAPT_CROWD_THRESHOLD) {
349       // need to hfh these pages
350       // sort the pages according to remote access
351       gc_quicksort(&core2heavypages[i][0], j/3+1, j/3+num_crowded, 0);
352       // h4h those pages with more remote accesses 
353       bamboo_cache_policy_t policy = {0};
354       policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
355       *((unsigned int*)core2heavypages[i][j]) = policy.word;
356       workload[i] -= core2heavypages[i][j+1];
357       t_workload -= core2heavypages[i][j+1];
358       j += 3;
359       threshold = GC_CACHE_ADAPT_ACCESS_THRESHOLD*workload[i]/100;
360       goto inner_crowd;
361     }
362   }
363
364   return numchanged;
365
366 #endif
367
368 unsigned int cacheAdapt_decision(int coren) {
369   BAMBOO_CACHE_MF();
370   // check the statistic data
371   // for each page, decide the new cache strategy
372 #ifdef GC_CACHE_ADAPT_POLICY1
373   cacheAdapt_policy_h4h(coren);
374 #elif defined GC_CACHE_ADAPT_POLICY2
375   cacheAdapt_policy_local(coren);
376 #elif defined GC_CACHE_ADAPT_POLICY3
377   cacheAdapt_policy_hottest(coren);
378 #elif defined GC_CACHE_ADAPT_POLICY4
379   cacheAdapt_policy_dominate(coren);
380 //#elif defined GC_CACHE_ADAPT_POLICY5
381 //  cacheAdapt_policy_overload(coren);
382 //#elif defined GC_CACHE_ADAPT_POLICY6
383 //  cacheAdapt_policy_crowd(coren);
384 #endif
385 }
386
387 // adapt the cache strategy for the mutator
388 void cacheAdapt_mutator() {
389   BAMBOO_CACHE_MF();
390   // check the changes and adapt them
391   int * tmp_p = gccachepolicytbl;
392   unsigned int page_sva = gcbaseva;
393   for(; page_sva<gctopva; page_sva+=BAMBOO_PAGE_SIZE) {
394     // read out the policy
395     bamboo_cache_policy_t policy = (bamboo_cache_policy_t)(*(tmp_p));
396     // adapt the policy
397     if(policy.word != 0) {
398       bamboo_adapt_cache_policy(page_sva,policy,BAMBOO_PAGE_SIZE);
399     }
400     tmp_p += 1;
401   }
402 }
403
404 void cacheAdapt_phase_client() {
405   WAITFORGCPHASE(CACHEPOLICYPHASE);
406   GC_PRINTF("Start cachepolicy phase\n");
407   cacheAdapt_decision(BAMBOO_NUM_OF_CORE);
408   //send init finish msg to core coordinator
409   send_msg_2(STARTUPCORE, GCFINISHCACHEPOLICY, BAMBOO_NUM_OF_CORE);
410   GC_PRINTF("Finish cachepolicy phase\n");
411
412   WAITFORGCPHASE(PREFINISHPHASE);
413   GC_PRINTF("Start prefinish phase\n");
414   // cache adapt phase
415   cacheAdapt_mutator();
416   cacheAdapt_gc(false);
417   //send init finish msg to core coordinator
418   send_msg_2(STARTUPCORE, GCFINISHPREF, BAMBOO_NUM_OF_CORE);
419   GC_PRINTF("Finish prefinish phase\n");
420   CACHEADAPT_SAMPING_RESET();
421   if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
422     // zero out the gccachesamplingtbl
423     BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);  
424     BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,size_cachesamplingtbl_local_r);
425   }
426 }
427
428 extern unsigned long long gc_output_cache_policy_time;
429
430 void cacheAdapt_phase_master() {
431   GCPROFILE_ITEM();
432   unsigned long long tmpt = BAMBOO_GET_EXE_TIME();
433   CACHEADAPT_OUTPUT_CACHE_SAMPLING_R();
434   gc_output_cache_policy_time += (BAMBOO_GET_EXE_TIME()-tmpt);
435   // let all cores to parallelly process the revised profile data and decide 
436   // the cache policy for each page
437   gc_status_info.gcphase = CACHEPOLICYPHASE;
438   GC_SEND_MSG_1_TO_CLIENT(GCSTARTCACHEPOLICY);
439   GC_PRINTF("Start cachepolicy phase \n");
440   // cache adapt phase
441   cacheAdapt_decision(BAMBOO_NUM_OF_CORE);
442   GC_CHECK_ALL_CORE_STATUS(CACHEPOLICYPHASE==gc_status_info.gcphase);
443   BAMBOO_CACHE_MF();
444
445   // let all cores to adopt new policies
446   gc_status_info.gcphase = PREFINISHPHASE;
447   // Note: all cores should flush their runtime data including non-gc cores
448   GC_SEND_MSG_1_TO_CLIENT(GCSTARTPREF);
449   GC_PRINTF("Start prefinish phase \n");
450   // cache adapt phase
451   cacheAdapt_mutator();
452   cacheAdapt_gc(false);
453   GC_CHECK_ALL_CORE_STATUS(PREFINISHPHASE==gc_status_info.gcphase);
454
455   CACHEADAPT_SAMPING_RESET();
456   if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
457     // zero out the gccachesamplingtbl
458     BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
459     BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,size_cachesamplingtbl_local_r);
460     BAMBOO_MEMSET_WH(gccachepolicytbl,0,size_cachepolicytbl);
461   }
462 }
463
464 void gc_output_cache_sampling() {
465   //extern volatile bool gc_profile_flag;
466   //if(!gc_profile_flag) return;
467   unsigned int page_index = 0;
468   VA page_sva = 0;
469   unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
470   for(page_index = 0; page_index < page_num; page_index++) {
471     page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
472     unsigned int block = 0;
473     BLOCKINDEX(block, (void *) page_sva);
474     unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
475     printf("%x,  %d,  %d,  ",(int)page_sva,page_index,coren);
476     for(int i = 0; i < NUMCORESACTIVE; i++) {
477       int * local_tbl = (int *)((void *)gccachesamplingtbl+size_cachesamplingtbl_local*i);
478       int freq = local_tbl[page_index];
479       //if(freq != 0) {
480         printf("%d,  ", freq);
481       //}
482     }
483     printf("\n");
484   }
485   printf("=================\n");
486
487
488 void gc_output_cache_sampling_r() {
489   //extern volatile bool gc_profile_flag;
490   //if(!gc_profile_flag) return;
491   // TODO summary data
492   unsigned int sumdata[NUMCORESACTIVE][NUMCORESACTIVE];
493   for(int i = 0; i < NUMCORESACTIVE; i++) {
494     for(int j = 0; j < NUMCORESACTIVE; j++) {
495       sumdata[i][j] = 0;
496     }
497   }
498   tprintf("cache sampling_r \n");
499   unsigned int page_index = 0;
500   VA page_sva = 0;
501   unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
502   for(page_index = 0; page_index < page_num; page_index++) {
503     page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
504     unsigned int block = 0;
505     BLOCKINDEX(block, (void *)page_sva);
506     unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
507     printf(" %x,  %d,  %d,  ",(int)page_sva,page_index,coren);
508     int accesscore = 0; // TODO
509     for(int i = 0; i < NUMCORESACTIVE; i++) {
510       int * local_tbl = (int *)((void *)gccachesamplingtbl_r+size_cachesamplingtbl_local_r*i);
511       int freq = local_tbl[page_index]/BAMBOO_PAGE_SIZE;
512       printf("%d,  ", freq);
513       if(freq != 0) {
514         accesscore++;// TODO
515       }
516     }
517     if(accesscore!=0) {
518       for(int i = 0; i < NUMCORESACTIVE; i++) {
519         int * local_tbl = (int *)((void *)gccachesamplingtbl_r+size_cachesamplingtbl_local_r*i);
520         int freq = local_tbl[page_index]/BAMBOO_PAGE_SIZE;
521         sumdata[accesscore-1][i]+=freq;
522       }
523     }
524   
525     printf("\n");
526   }
527   // TODO printout the summary data
528   for(int i = 0; i < NUMCORESACTIVE; i++) {
529     printf("%d  ", i);
530     for(int j = 0; j < NUMCORESACTIVE; j++) {
531       printf(" %d  ", sumdata[j][i]);
532     }
533     printf("\n");
534   }
535   printf("=================\n");
536
537 #endif // GC_CACHE_ADAPT