3e78638a22830636b89f49744e57b69057a0bb30
[IRC.git] / Robust / src / Runtime / bamboo / multicorecache.c
1 #ifdef GC_CACHE_ADAPT
2 #include "multicorecache.h"
3 #include "multicoremsg.h"
4 #include "multicoregcprofile.h"
5
6 gc_cache_revise_info_t gc_cache_revise_information;
7
8 // prepare for cache adaption:
9 //   -- flush the shared heap
10 //   -- clean dtlb entries
11 //   -- change cache strategy
12 void cacheAdapt_gc(bool isgccachestage) {
13   // flush the shared heap
14   BAMBOO_CACHE_FLUSH_L2();
15
16   // clean the dtlb entries
17   BAMBOO_CLEAN_DTLB();
18
19   // change the cache strategy
20   gccachestage = isgccachestage;
21
22
23 // the master core decides how to adapt cache strategy for the mutator 
24 // according to collected statistic data
25
26 // find the core that accesses the page #page_index most
27 #define CACHEADAPT_FIND_HOTTEST_CORE(page_index,hottestcore,hotfreq) \
28   { \
29     int *local_tbl=&gccachesamplingtbl_r[page_index]; \
30     for(int i = 0; i < NUMCORESACTIVE; i++) { \
31       int freq = *local_tbl; \
32       local_tbl=(int *)(((char *)local_tbl)+size_cachesamplingtbl_local_r); \
33       if(hotfreq < freq) { \
34         hotfreq = freq; \
35         hottestcore = i; \
36       } \
37     } \
38   }
39 // find the core that accesses the page #page_index most and comput the total
40 // access time of the page at the same time
41 #define CACHEADAPT_FIND_HOTTEST_CORE_W_TOTALFREQ(page_index,hottestcore,hotfreq,totalfreq) \
42   { \
43     int *local_tbl=&gccachesamplingtbl_r[page_index]; \
44     for(int i = 0; i < NUMCORESACTIVE; i++) { \
45       int freq = *local_tbl; \
46       local_tbl=(int *)(((char *)local_tbl)+size_cachesamplingtbl_local_r); \
47       totalfreq += freq; \
48       if(hotfreq < freq) { \
49         hotfreq = freq; \
50         hottestcore = i; \
51       } \
52     } \
53   }
54 // Set the policy as hosted by coren
55 // NOTE: (x,y) should be changed to (x+1, y+1)!!!
56 #define CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren) \
57   { \
58     (policy).cache_mode = BAMBOO_CACHE_MODE_COORDS; \    
59     (policy).lotar_x = bamboo_cpu2coords[2*(coren)]+1; \
60     (policy).lotar_y = bamboo_cpu2coords[2*(coren)+1]+1; \
61   }
62 // store the new policy information at tmp_p in gccachepolicytbl
63 #define CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy) \
64   { \
65     ((int*)(tmp_p))[page_index] = (policy).word; \
66   }
67
68 // make all pages hfh
69 void cacheAdapt_policy_h4h(int coren){
70   unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)/(BAMBOO_PAGE_SIZE);
71   unsigned int page_gap=page_num/NUMCORESACTIVE;
72   unsigned int page_index=page_gap*coren;
73   unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
74   VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
75   int * tmp_p = gccachepolicytbl;
76   for(; page_index < page_index_end; page_index++) {
77     bamboo_cache_policy_t policy = {0};
78     policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
79     CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
80     page_sva += BAMBOO_PAGE_SIZE;
81   }
82
83
84 // make all pages local as non-cache-adaptable gc local mode
85 void cacheAdapt_policy_local(int coren){
86   unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)/(BAMBOO_PAGE_SIZE);
87   unsigned int page_gap=page_num/NUMCORESACTIVE;
88   unsigned int page_index=page_gap*coren;
89   unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
90   VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
91   int * tmp_p = gccachepolicytbl;
92   for(; page_index < page_index_end; page_index++) {
93     bamboo_cache_policy_t policy = {0};
94     unsigned int block = 0;
95     BLOCKINDEX(block, (void *) page_sva);
96     unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
97     CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren);
98     CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
99     page_sva += BAMBOO_PAGE_SIZE;
100   }
101
102
103 void cacheAdapt_policy_hottest(int coren){
104   unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)/(BAMBOO_PAGE_SIZE);
105   unsigned int page_gap=page_num/NUMCORESACTIVE;
106   unsigned int page_index=page_gap*coren;
107   unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
108   VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
109   int * tmp_p = gccachepolicytbl;
110   for(; page_index < page_index_end; page_index++) {
111     bamboo_cache_policy_t policy = {0};
112     unsigned int hottestcore = 0;
113     unsigned int hotfreq = 0;
114     CACHEADAPT_FIND_HOTTEST_CORE(page_index,hottestcore,hotfreq);
115     // TODO
116     // Decide the cache strategy for this page
117     // If decide to adapt a new cache strategy, write into the shared block of
118     // the gcsharedsamplingtbl. The mem recording information that has been 
119     // written is enough to hold the information.
120     // Format: page start va + cache strategy(hfh/(host core+[x,y]))
121     if(hotfreq != 0) {
122       // locally cache the page in the hottest core
123       CACHEADAPT_POLICY_SET_HOST_CORE(policy, hottestcore);
124     }
125     CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
126     page_sva += BAMBOO_PAGE_SIZE;
127   }
128
129
130 #define GC_CACHE_ADAPT_DOMINATE_THRESHOLD  64
131 // cache the page on the core that accesses it the most if that core accesses 
132 // it more than (GC_CACHE_ADAPT_DOMINATE_THRESHOLD)% of the total.  Otherwise,
133 // h4h the page.
134 void cacheAdapt_policy_dominate(int coren){
135   unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)/(BAMBOO_PAGE_SIZE);
136   unsigned int page_gap=page_num/NUMCORESACTIVE;
137   unsigned int page_index=page_gap*coren;
138   unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
139   VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
140   int * tmp_p = gccachepolicytbl;
141   for(; page_index < page_index_end; page_index++) {
142     bamboo_cache_policy_t policy = {0};
143     unsigned int hottestcore = 0;
144     unsigned long long totalfreq = 0;
145     unsigned int hotfreq = 0;
146     CACHEADAPT_FIND_HOTTEST_CORE_W_TOTALFREQ(page_index,hottestcore,hotfreq,totalfreq);
147     // Decide the cache strategy for this page
148     // If decide to adapt a new cache strategy, write into the shared block of
149     // the gcpolicytbl 
150     // Format: page start va + cache policy
151     if(hotfreq != 0) {
152       totalfreq=(totalfreq*GC_CACHE_ADAPT_DOMINATE_THRESHOLD)>>7;
153       if(hotfreq < totalfreq) {
154         // use hfh
155         policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
156       } else {
157         // locally cache the page in the hottest core
158         CACHEADAPT_POLICY_SET_HOST_CORE(policy, hottestcore);
159       }     
160     }
161     CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
162     page_sva += BAMBOO_PAGE_SIZE;
163   }
164 }
165
166 #if 0
167 #define GC_CACHE_ADAPT_OVERLOAD_THRESHOLD 10
168 // record the worklocad of the hottestcore into core2heavypages
169 #define CACHEADAPT_RECORD_PAGE_WORKLOAD(hottestcore,totalfreq,hotfreq,remoteaccess,tmp_p) \
170   { \
171     workload[hottestcore] += (totalfreq); \
172     total_workload += (totalfreq); \
173     unsigned long long remoteaccess = (totalfreq) - (hotfreq); \
174     unsigned int index = (unsigned int)core2heavypages[hottestcore][0]; \
175     core2heavypages[hottestcore][3*index+3] = (remoteaccess); \
176     core2heavypages[hottestcore][3*index+2] = (totalfreq); \
177     core2heavypages[hottestcore][3*index+1] = (unsigned long long)((tmp_p)-1); \
178     core2heavypages[hottestcore][0]++; \
179   }
180
181 void gc_quicksort(unsigned long long *array,unsigned int left,unsigned int right,unsigned int offset) {
182   unsigned int pivot = 0;;
183   unsigned int leftIdx = left;
184   unsigned int rightIdx = right;
185   if((right-left+1) >= 1) {
186     pivot = (left+right)/2;
187     while((leftIdx <= pivot) && (rightIdx >= pivot)) {
188       unsigned long long pivotValue = array[pivot*3-offset];
189       while((array[leftIdx*3-offset] > pivotValue) && (leftIdx <= pivot)) {
190         leftIdx++;
191       }
192       while((array[rightIdx*3-offset] < pivotValue) && (rightIdx >= pivot)) {
193         rightIdx--;
194       }
195       // swap [leftIdx] & [rightIdx]
196       for(int k = 0; k < 3; k++) {
197         unsigned long long tmp = array[3*rightIdx-k];
198         array[3*rightIdx-k] = array[3*leftIdx-k];
199         array[3*leftIdx-k] = tmp;
200       }
201       leftIdx++;
202       rightIdx--;
203       if((leftIdx-1) == pivot) {
204         pivot = rightIdx = rightIdx + 1;
205       } else if((leftIdx+1) == pivot) {
206         pivot = leftIdx = leftIdx-1;
207       }
208     }
209     gc_quicksort(array, left, pivot-1, offset);
210     gc_quicksort(array, pivot+1, right, offset);
211   }
212   return;
213 }
214
215 INLINE int cacheAdapt_h4h_remote_accesses(unsigned long long workload_threshold,unsigned long long ** core2heavypages, unsigned long long * workload,int i) {
216   int j = 1;
217   unsigned int index = (unsigned int)core2heavypages[i][0];
218   if(workload[i] > workload_threshold) {
219     // sort according to the remoteaccess
220     gc_quicksort(&core2heavypages[i][0], 1, index, 0);
221     while((workload[i] > workload_threshold) && (j<index*3)) {
222       // hfh those pages with more remote accesses 
223       bamboo_cache_policy_t policy = {0};
224       policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
225       *((unsigned int*)core2heavypages[i][j]) = policy.word;
226       workload[i] -= core2heavypages[i][j+1];
227       j += 3;
228     }
229   }
230   return j;
231 }
232
233 // Every page cached on the core that accesses it the most. 
234 // Check to see if any core's pages total more accesses than threshold 
235 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD.  If so, find the pages with the 
236 // most remote accesses and hash for home them until we get below 
237 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD
238 int cacheAdapt_policy_overload(int coren){
239   unsigned int page_index = 0;
240   VA page_sva = gcbaseva;
241   unsigned int page_num = BAMBOO_SHARED_MEM_SIZE/BAMBOO_PAGE_SIZE;
242   unsigned int numchanged = 0;
243   int * tmp_p = gccachepolicytbl+1;
244   unsigned long long workload[NUMCORESACTIVE];
245   memset(workload, 0, NUMCORESACTIVE*sizeof(unsigned long long));
246   unsigned long long total_workload = 0;
247   unsigned long long core2heavypages[NUMCORESACTIVE][page_num*3+1];
248   memset(core2heavypages,0,sizeof(unsigned long long)*(page_num*3+1)*NUMCORESACTIVE);
249   for(page_index = 0; page_sva < gctopva; page_index++) {
250     bamboo_cache_policy_t policy = {0};
251     unsigned int hottestcore = 0;
252     unsigned long long totalfreq = 0;
253     unsigned int hotfreq = 0;
254     CACHEADAPT_FIND_HOTTEST_CORE_W_TOTALFREQ(page_index,hottestcore,hotfreq,totalfreq);
255     // Decide the cache strategy for this page
256     // If decide to adapt a new cache strategy, write into the shared block of
257     // the gcsharedsamplingtbl. The mem recording information that has been 
258     // written is enough to hold the information.
259     // Format: page start va + cache strategy(hfh/(host core+[x,y]))
260     if(hotfreq != 0) {
261       totalfreq/=BAMBOO_PAGE_SIZE;
262       hotfreq/=BAMBOO_PAGE_SIZE;
263       // locally cache the page in the hottest core
264       CACHEADAPT_POLICY_SET_HOST_CORE(policy, hottestcore);
265       CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy,numchanged);
266       CACHEADAPT_RECORD_PAGE_WORKLOAD(hottestcore,totalfreq,hotfreq,remoteaccess,tmp_p);    
267     }
268     page_sva += BAMBOO_PAGE_SIZE;
269   }
270
271   unsigned long long workload_threshold=total_workload/GC_CACHE_ADAPT_OVERLOAD_THRESHOLD;
272   // Check the workload of each core
273   for(int i = 0; i < NUMCORESACTIVE; i++) {
274     cacheAdapt_h4h_remote_accesses(workload_threshold,core2heavypages,workload,i);
275   }
276
277   return numchanged;
278 }
279
280 #define GC_CACHE_ADAPT_ACCESS_THRESHOLD 70
281 #define GC_CACHE_ADAPT_CROWD_THRESHOLD  20
282 // Every page cached on the core that accesses it the most. 
283 // Check to see if any core's pages total more accesses than threshold 
284 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD.  If so, find the pages with the 
285 // most remote accesses and hash for home them until we get below 
286 // GC_CACHE_ADAPT_OVERLOAD_THRESHOLD.  
287 // Sort pages based on activity.... 
288 // If more then GC_CACHE_ADAPT_ACCESS_THRESHOLD% of the accesses for a
289 // core's pages are from more than GC_CACHE_ADAPT_CROWD_THRESHOLD pages, 
290 // then start hfh these pages(selecting the ones with the most remote 
291 // accesses first or fewest local accesses) until we get below 
292 // GC_CACHE_ADAPT_CROWD_THRESHOLD pages.
293 int cacheAdapt_policy_crowd(int coren){
294   unsigned int page_index = 0;
295   VA page_sva = gcbaseva;
296   unsigned int page_num = BAMBOO_SHARED_MEM_SIZE/BAMBOO_PAGE_SIZE;
297   unsigned int numchanged = 0;
298   int * tmp_p = gccachepolicytbl+1;
299   unsigned long long workload[NUMCORESACTIVE];
300   memset(workload, 0, NUMCORESACTIVE*sizeof(unsigned long long));
301   unsigned long long total_workload = 0;
302   unsigned long long core2heavypages[NUMCORESACTIVE][page_num*3+1];
303   memset(core2heavypages,0,sizeof(unsigned long long)*(page_num*3+1)*NUMCORESACTIVE);
304   for(page_index = 0; page_sva < gctopva; page_index++) {
305     bamboo_cache_policy_t policy = {0};
306     unsigned int hottestcore = 0;
307     unsigned long long totalfreq = 0;
308     unsigned int hotfreq = 0;
309     CACHEADAPT_FIND_HOTTEST_CORE_W_TOTALFREQ(page_index,hottestcore,hotfreq,totalfreq);
310     // Decide the cache strategy for this page
311     // If decide to adapt a new cache strategy, write into the shared block of
312     // the gcsharedsamplingtbl. The mem recording information that has been 
313     // written is enough to hold the information.
314     // Format: page start va + cache strategy(hfh/(host core+[x,y]))
315     if(hotfreq != 0) {
316       totalfreq/=BAMBOO_PAGE_SIZE;
317       hotfreq/=BAMBOO_PAGE_SIZE;
318       // locally cache the page in the hottest core
319       CACHEADAPT_POLICY_SET_HOST_CORE(policy, hottestcore);
320       CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy,numchanged);
321       CACHEADAPT_RECORD_PAGE_WORKLOAD(hottestcore,totalfreq,hotfreq,remoteaccess,tmp_p);
322     }
323     page_sva += BAMBOO_PAGE_SIZE;
324   }
325
326   unsigned long long workload_threshold=total_workload/GC_CACHE_ADAPT_OVERLOAD_THRESHOLD;
327   // Check the workload of each core
328   for(int i = 0; i < NUMCORESACTIVE; i++) {
329     unsigned int index=(unsigned int)core2heavypages[i][0];
330     int j=cacheAdapt_h4h_remote_accesses(workload_threshold,core2heavypages,workload,i);
331     // Check if the accesses are crowded on few pages
332     // sort according to the total access
333 inner_crowd:
334     gc_quicksort(&core2heavypages[i][0], j/3+1, index, 1);
335     unsigned long long threshold=GC_CACHE_ADAPT_ACCESS_THRESHOLD*workload[i]/100;
336     int num_crowded = 0;
337     unsigned long long t_workload = 0;
338     do {
339       t_workload += core2heavypages[i][j+num_crowded*3+1];
340       num_crowded++;
341     } while(t_workload < threshold);
342     // num_crowded <= GC_CACHE_ADAPT_CROWD_THRESHOLD and if there are enough 
343     // items, it is always == GC_CACHE_ADAPT_CROWD_THRESHOLD
344     if(num_crowded > GC_CACHE_ADAPT_CROWD_THRESHOLD) {
345       // need to hfh these pages
346       // sort the pages according to remote access
347       gc_quicksort(&core2heavypages[i][0], j/3+1, j/3+num_crowded, 0);
348       // h4h those pages with more remote accesses 
349       bamboo_cache_policy_t policy = {0};
350       policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
351       *((unsigned int*)core2heavypages[i][j]) = policy.word;
352       workload[i] -= core2heavypages[i][j+1];
353       t_workload -= core2heavypages[i][j+1];
354       j += 3;
355       threshold = GC_CACHE_ADAPT_ACCESS_THRESHOLD*workload[i]/100;
356       goto inner_crowd;
357     }
358   }
359
360   return numchanged;
361
362 #endif
363
364 unsigned int cacheAdapt_decision(int coren) {
365   BAMBOO_CACHE_MF();
366   // check the statistic data
367   // for each page, decide the new cache strategy
368 #ifdef GC_CACHE_ADAPT_POLICY1
369   cacheAdapt_policy_h4h(coren);
370 #elif defined GC_CACHE_ADAPT_POLICY2
371   cacheAdapt_policy_local(coren);
372 #elif defined GC_CACHE_ADAPT_POLICY3
373   cacheAdapt_policy_hottest(coren);
374 #elif defined GC_CACHE_ADAPT_POLICY4
375   cacheAdapt_policy_dominate(coren);
376 //#elif defined GC_CACHE_ADAPT_POLICY5
377 //  cacheAdapt_policy_overload(coren);
378 //#elif defined GC_CACHE_ADAPT_POLICY6
379 //  cacheAdapt_policy_crowd(coren);
380 #endif
381 }
382
383 // adapt the cache strategy for the mutator
384 void cacheAdapt_mutator() {
385   BAMBOO_CACHE_MF();
386   // check the changes and adapt them
387   int * tmp_p = gccachepolicytbl;
388   unsigned int page_sva = gcbaseva;
389   for(; page_sva<gctopva; page_sva+=BAMBOO_PAGE_SIZE) {
390     // read out the policy
391     bamboo_cache_policy_t policy = (bamboo_cache_policy_t)(*(tmp_p));
392     // adapt the policy
393     if(policy.word != 0) {
394       bamboo_adapt_cache_policy(page_sva,policy,BAMBOO_PAGE_SIZE);
395     }
396     tmp_p += 1;
397   }
398 }
399
400 void cacheAdapt_phase_client() {
401   WAITFORGCPHASE(CACHEPOLICYPHASE);
402   GC_PRINTF("Start cachepolicy phase\n");
403   cacheAdapt_decision(BAMBOO_NUM_OF_CORE);
404   //send init finish msg to core coordinator
405   send_msg_2(STARTUPCORE, GCFINISHCACHEPOLICY, BAMBOO_NUM_OF_CORE);
406   GC_PRINTF("Finish cachepolicy phase\n");
407
408   WAITFORGCPHASE(PREFINISHPHASE);
409   GC_PRINTF("Start prefinish phase\n");
410   // cache adapt phase
411   cacheAdapt_mutator();
412   cacheAdapt_gc(false);
413   //send init finish msg to core coordinator
414   send_msg_2(STARTUPCORE, GCFINISHPREF, BAMBOO_NUM_OF_CORE);
415   GC_PRINTF("Finish prefinish phase\n");
416   CACHEADAPT_SAMPING_RESET();
417   if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
418     // zero out the gccachesamplingtbl
419     BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);  
420     BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,size_cachesamplingtbl_local_r);
421   }
422 }
423
424 extern unsigned long long gc_output_cache_policy_time;
425
426 void cacheAdapt_phase_master() {
427   GCPROFILE_ITEM();
428   unsigned long long tmpt = BAMBOO_GET_EXE_TIME();
429   CACHEADAPT_OUTPUT_CACHE_SAMPLING_R();
430   gc_output_cache_policy_time += (BAMBOO_GET_EXE_TIME()-tmpt);
431   // let all cores to parallelly process the revised profile data and decide 
432   // the cache policy for each page
433   gc_status_info.gcphase = CACHEPOLICYPHASE;
434   GC_SEND_MSG_1_TO_CLIENT(GCSTARTCACHEPOLICY);
435   GC_PRINTF("Start cachepolicy phase \n");
436   // cache adapt phase
437   cacheAdapt_decision(BAMBOO_NUM_OF_CORE);
438   GC_CHECK_ALL_CORE_STATUS(CACHEPOLICYPHASE==gc_status_info.gcphase);
439   BAMBOO_CACHE_MF();
440
441   // let all cores to adopt new policies
442   gc_status_info.gcphase = PREFINISHPHASE;
443   // Note: all cores should flush their runtime data including non-gc cores
444   GC_SEND_MSG_1_TO_CLIENT(GCSTARTPREF);
445   GC_PRINTF("Start prefinish phase \n");
446   // cache adapt phase
447   cacheAdapt_mutator();
448   cacheAdapt_gc(false);
449   GC_CHECK_ALL_CORE_STATUS(PREFINISHPHASE==gc_status_info.gcphase);
450
451   CACHEADAPT_SAMPING_RESET();
452   if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
453     // zero out the gccachesamplingtbl
454     BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
455     BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,size_cachesamplingtbl_local_r);
456     BAMBOO_MEMSET_WH(gccachepolicytbl,0,size_cachepolicytbl);
457   }
458 }
459
460 void gc_output_cache_sampling() {
461   unsigned int page_index = 0;
462   VA page_sva = 0;
463   unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
464   for(page_index = 0; page_index < page_num; page_index++) {
465     page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
466     unsigned int block = 0;
467     BLOCKINDEX(block, (void *) page_sva);
468     unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
469     printf("%x,  %d,  %d,  ",(int)page_sva,page_index,coren);
470     for(int i = 0; i < NUMCORESACTIVE; i++) {
471       int * local_tbl = (int *)((void *)gccachesamplingtbl+size_cachesamplingtbl_local*i);
472       int freq = local_tbl[page_index];
473       //if(freq != 0) {
474         printf("%d,  ", freq);
475       //}
476     }
477     printf("\n");
478   }
479   printf("=================\n");
480
481
482 void gc_output_cache_sampling_r() {
483   // TODO summary data
484   unsigned int sumdata[NUMCORESACTIVE][NUMCORESACTIVE];
485   for(int i = 0; i < NUMCORESACTIVE; i++) {
486     for(int j = 0; j < NUMCORESACTIVE; j++) {
487       sumdata[i][j] = 0;
488     }
489   }
490   tprintf("cache sampling_r \n");
491   unsigned int page_index = 0;
492   VA page_sva = 0;
493   unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) / (BAMBOO_PAGE_SIZE);
494   for(page_index = 0; page_index < page_num; page_index++) {
495     page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
496     unsigned int block = 0;
497     BLOCKINDEX(block, (void *)page_sva);
498     unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
499     printf(" %x,  %d,  %d,  ",(int)page_sva,page_index,coren);
500     int accesscore = 0; // TODO
501     for(int i = 0; i < NUMCORESACTIVE; i++) {
502       int * local_tbl = (int *)((void *)gccachesamplingtbl_r+size_cachesamplingtbl_local_r*i);
503       int freq = local_tbl[page_index]/BAMBOO_PAGE_SIZE;
504       printf("%d,  ", freq);
505       if(freq != 0) {
506         accesscore++;// TODO
507       }
508     }
509     if(accesscore!=0) {
510       for(int i = 0; i < NUMCORESACTIVE; i++) {
511         int * local_tbl = (int *)((void *)gccachesamplingtbl_r+size_cachesamplingtbl_local_r*i);
512         int freq = local_tbl[page_index]/BAMBOO_PAGE_SIZE;
513         sumdata[accesscore-1][i]+=freq;
514       }
515     }
516   
517     printf("\n");
518   }
519   // TODO printout the summary data
520   for(int i = 0; i < NUMCORESACTIVE; i++) {
521     printf("%d  ", i);
522     for(int j = 0; j < NUMCORESACTIVE; j++) {
523       printf(" %d  ", sumdata[j][i]);
524     }
525     printf("\n");
526   }
527   printf("=================\n");
528
529 #endif // GC_CACHE_ADAPT