52a725a715f6a35732e940e956af1e924fbc1a6f
[IRC.git] / Robust / src / Runtime / bamboo / multicorecache.c
1 #ifdef GC_CACHE_ADAPT
2 #include "multicorecache.h"
3 #include "multicoremsg.h"
4 #include "multicoregc.h"
5 #include "multicoregcprofile.h"
6
7 void cacheadapt_finish_compact(void *toptr) {
8   unsigned int dstpage=((unsigned INTPTR)(toptr-gcbaseva))>>BAMBOO_PAGE_SIZE_BITS;
9   unsigned int * newtable=&gccachesamplingtbl_r[dstpage*NUMCORESACTIVE];
10
11   for(int core = 0; core < NUMCORESACTIVE; core++) {
12     (*newtable)=(*newtable)>>6;
13     newtable++;
14   }  
15 }
16
17 void cacheadapt_finish_src_page(void *srcptr, void *tostart, void *tofinish) {
18   unsigned int srcpage=((unsigned INTPTR)(srcptr-gcbaseva))>>BAMBOO_PAGE_SIZE_BITS;
19   unsigned int dstpage=((unsigned INTPTR)(tostart-gcbaseva))>>BAMBOO_PAGE_SIZE_BITS;
20   unsigned int numbytes=tofinish-tostart;
21   
22   unsigned int * oldtable=&gccachesamplingtbl[srcpage*NUMCORESACTIVE];
23   unsigned int * newtable=&gccachesamplingtbl_r[dstpage*NUMCORESACTIVE];
24   
25   unsigned int page64th=numbytes>>(BAMBOO_PAGE_SIZE_BITS-6);
26
27   for(int core = 0; core < NUMCORESACTIVE; core++) {
28     (*newtable)+=page64th*(*oldtable);
29     newtable++;
30     oldtable++;
31   }  
32 }
33
34 /* Bytes needed equal to zero is a special case...  It means that we should finish the dst page */
35
36 void cacheadapt_finish_dst_page(void *origptr, void *tostart, void *toptr, unsigned int bytesneeded) {
37   unsigned int numbytes=toptr-tostart;
38
39   void *tobound=(void *)((((unsigned INTPTR)toptr-1)&~(BAMBOO_PAGE_SIZE-1))+BAMBOO_PAGE_SIZE);
40   void *origbound=(void *)((((unsigned INTPTR)origptr)&~(BAMBOO_PAGE_SIZE-1))+BAMBOO_PAGE_SIZE);
41   
42   unsigned int topage=((unsigned INTPTR)(toptr-1-gcbaseva))>>BAMBOO_PAGE_SIZE_BITS; 
43   unsigned int origpage=((unsigned INTPTR)(origptr-gcbaseva))>>BAMBOO_PAGE_SIZE_BITS;
44
45   unsigned int * totable=&gccachesamplingtbl_r[topage*NUMCORESACTIVE];
46   unsigned int * origtable=&gccachesamplingtbl[origpage*NUMCORESACTIVE];
47
48   //handler
49   unsigned int remaintobytes=(bytesneeded==0)?0:(tobound-toptr);
50   unsigned int remainorigbytes=origbound-origptr;
51
52   do {
53     //round source bytes down....don't want to close out page if not necessary
54     remainorigbytes=(remainorigbytes>bytesneeded)?bytesneeded:remainorigbytes;
55
56     if (remaintobytes<=remainorigbytes) {
57       //Need to close out to page
58
59       numbytes+=remaintobytes;
60       unsigned int page64th=numbytes>>(BAMBOO_PAGE_SIZE_BITS-6);
61
62       for(int core = 0; core < NUMCORESACTIVE; core++) {
63         (*totable)=(*totable+page64th*(*origtable))>>6;
64         totable++;
65         origtable++;
66       }
67       toptr+=remaintobytes;
68       origptr+=remaintobytes;
69       bytesneeded-=remaintobytes;
70       topage++;//to page is definitely done
71       tobound+=BAMBOO_PAGE_SIZE;
72       origpage=((unsigned INTPTR)(origptr-gcbaseva))>>BAMBOO_PAGE_SIZE_BITS;//handle exact match case
73       origbound=(void *) ((((unsigned INTPTR)origptr)&~(BAMBOO_PAGE_SIZE-1))+BAMBOO_PAGE_SIZE);
74     } else {
75       //Finishing off orig page
76
77       numbytes+=remainorigbytes;
78       unsigned int page64th=numbytes>>(BAMBOO_PAGE_SIZE_BITS-6);
79       
80       for(int core = 0; core < NUMCORESACTIVE; core++) {
81         (*totable)+=page64th*(*origtable);
82         totable++;
83         origtable++;
84       }
85       toptr+=remainorigbytes;
86       origptr+=remainorigbytes;
87       bytesneeded-=remainorigbytes;
88       origpage++;//just orig page is done
89       origbound+=BAMBOO_PAGE_SIZE;
90     }
91     totable=&gccachesamplingtbl_r[topage*NUMCORESACTIVE];
92     origtable=&gccachesamplingtbl[origpage*NUMCORESACTIVE];
93     
94     remaintobytes=tobound-toptr;
95     remainorigbytes=origbound-origptr;
96     
97     numbytes=0;
98   } while(bytesneeded!=0);
99 }
100
101 // prepare for cache adaption:
102 //   -- flush the shared heap
103 //   -- clean dtlb entries
104 //   -- change cache strategy
105 void cacheAdapt_gc(bool isgccachestage) {
106 #ifdef GC_CACHE_COHERENT_ON
107   if(!isgccachestage) {
108     // get out of GC
109 #if defined(GC_CACHE_ADAPT_POLICY3)&&defined(GC_CACHE_ADAPT_POLICY4)
110     // flush the shared heap
111     BAMBOO_CACHE_FLUSH_L2();
112
113     // clean the dtlb entries
114     BAMBOO_CLEAN_DTLB();
115 #endif
116   } 
117 #else
118   // flush the shared heap
119   BAMBOO_CACHE_FLUSH_L2();
120
121   // clean the dtlb entries
122   BAMBOO_CLEAN_DTLB();
123
124   if(isgccachestage) {
125     bamboo_install_dtlb_handler_for_gc();
126   } else {
127     bamboo_install_dtlb_handler_for_mutator();
128   }
129 #endif
130
131
132 // the master core decides how to adapt cache strategy for the mutator 
133 // according to collected statistic data
134
135 // find the core that accesses the page #page_index most
136 #define CACHEADAPT_FIND_HOTTEST_CORE(page_index,hottestcore,hotfreq) \
137   { \
138     unsigned int *local_tbl=&gccachesamplingtbl_r[page_index*NUMCORESACTIVE];   \
139     for(int i = 0; i < NUMCORESACTIVE; i++) { \
140       int freq = *local_tbl; \
141       local_tbl++; \
142       if(hotfreq < freq) { \
143         hotfreq = freq; \
144         hottestcore = i; \
145       } \
146     } \
147   }
148 // find the core that accesses the page #page_index most and comput the total
149 // access time of the page at the same time
150 #define CACHEADAPT_FIND_HOTTEST_CORE_W_TOTALFREQ(page_index,hottestcore,hotfreq,totalfreq) \
151   { \
152     unsigned int *local_tbl=&gccachesamplingtbl_r[page_index*NUMCORESACTIVE];   \
153     for(int i = 0; i < NUMCORESACTIVE; i++) { \
154       int freq = *local_tbl; \
155       local_tbl++; \
156       totalfreq += freq; \
157       if(hotfreq < freq) { \
158         hotfreq = freq; \
159         hottestcore = i; \
160       } \
161     } \
162   }
163 // Set the policy as hosted by coren
164 // NOTE: (x,y) should be changed to (x+1, y+1)!!!
165 #define CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren) \
166   { \
167     (policy).cache_mode = BAMBOO_CACHE_MODE_COORDS; \    
168     (policy).lotar_x = bamboo_cpu2coords[2*(coren)]+1; \
169     (policy).lotar_y = bamboo_cpu2coords[2*(coren)+1]+1; \
170   }
171 // store the new policy information at tmp_p in gccachepolicytbl
172 #define CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy) \
173   { \
174     ((int*)(tmp_p))[page_index] = (policy).word; \
175   }
176
177 // make all pages hfh
178 void cacheAdapt_policy_h4h(int coren){
179   unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)>>(BAMBOO_PAGE_SIZE_BITS);
180   unsigned int page_gap=page_num/NUMCORESACTIVE;
181   unsigned int page_index=page_gap*coren;
182   unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
183   VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
184   unsigned int * tmp_p = gccachepolicytbl;
185   for(; page_index < page_index_end; page_index++) {
186     bamboo_cache_policy_t policy = {0};
187     policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
188     CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
189     page_sva += BAMBOO_PAGE_SIZE;
190   }
191
192
193 // make all pages local as non-cache-adaptable gc local mode
194 void cacheAdapt_policy_local(int coren){
195   unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)>>(BAMBOO_PAGE_SIZE_BITS);
196   unsigned int page_gap=page_num/NUMCORESACTIVE;
197   unsigned int page_index=page_gap*coren;
198   unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
199   VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
200   unsigned int * tmp_p = gccachepolicytbl;
201   for(; page_index < page_index_end; page_index++) {
202     bamboo_cache_policy_t policy = {0};
203     unsigned int block = 0;
204     BLOCKINDEX(block, (void *) page_sva);
205     unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
206     CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren);
207     CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
208     page_sva += BAMBOO_PAGE_SIZE;
209   }
210
211
212 void cacheAdapt_policy_hottest(int coren){
213   unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)>>(BAMBOO_PAGE_SIZE_BITS);
214   unsigned int page_gap=page_num/NUMCORESACTIVE;
215   unsigned int page_index=page_gap*coren;
216   unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
217   VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
218   unsigned int * tmp_p = gccachepolicytbl;
219   for(; page_index < page_index_end; page_index++) {
220     bamboo_cache_policy_t policy = {0};
221     unsigned int hottestcore = 0;
222     unsigned int hotfreq = 0;
223     CACHEADAPT_FIND_HOTTEST_CORE(page_index,hottestcore,hotfreq);
224     // TODO
225     // Decide the cache strategy for this page
226     // If decide to adapt a new cache strategy, write into the shared block of
227     // the gcsharedsamplingtbl. The mem recording information that has been 
228     // written is enough to hold the information.
229     // Format: page start va + cache strategy(hfh/(host core+[x,y]))
230     if(hotfreq != 0) {
231       // locally cache the page in the hottest core
232       CACHEADAPT_POLICY_SET_HOST_CORE(policy, hottestcore);
233     } else {
234       // reset it to be homed by its host core
235       unsigned int block = 0;
236       BLOCKINDEX(block, (void *) page_sva);
237       unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
238       CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren);
239     }
240     CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
241     page_sva += BAMBOO_PAGE_SIZE;
242   }
243
244
245 #define GC_CACHE_ADAPT_DOMINATE_THRESHOLD  2
246 // cache the page on the core that accesses it the most if that core accesses 
247 // it more than (GC_CACHE_ADAPT_DOMINATE_THRESHOLD)% of the total.  Otherwise,
248 // h4h the page.
249 void cacheAdapt_policy_dominate(int coren){
250   unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)>>(BAMBOO_PAGE_SIZE_BITS);
251   unsigned int page_gap=page_num/NUMCORESACTIVE;
252   unsigned int page_index=page_gap*coren;
253   unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
254   VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
255   unsigned int * tmp_p = gccachepolicytbl;
256   for(; page_index < page_index_end; page_index++) {
257     bamboo_cache_policy_t policy = {0};
258     unsigned int hottestcore = 0;
259     unsigned int totalfreq = 0;
260     unsigned int hotfreq = 0;
261     CACHEADAPT_FIND_HOTTEST_CORE_W_TOTALFREQ(page_index,hottestcore,hotfreq,totalfreq);
262     // Decide the cache strategy for this page
263     // If decide to adapt a new cache strategy, write into the shared block of
264     // the gcpolicytbl 
265     // Format: page start va + cache policy
266     if(hotfreq != 0) {
267       totalfreq=totalfreq>>GC_CACHE_ADAPT_DOMINATE_THRESHOLD;
268       if(hotfreq < totalfreq) {
269         // use hfh
270         policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
271         /*unsigned int block = 0;
272         BLOCKINDEX(block, (void *) page_sva);
273         unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
274         CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren);*/
275       } else {
276         // locally cache the page in the hottest core
277         CACHEADAPT_POLICY_SET_HOST_CORE(policy, hottestcore);
278       }     
279     } else {
280       // reset it to be homed by its host core
281       unsigned int block = 0;
282       BLOCKINDEX(block, (void *) page_sva);
283       unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
284       CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren);
285     }
286     CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
287     page_sva += BAMBOO_PAGE_SIZE;
288   }
289 }
290
291 unsigned int cacheAdapt_decision(int coren) {
292   BAMBOO_CACHE_MF();
293   // check the statistic data
294   // for each page, decide the new cache strategy
295 #ifdef GC_CACHE_ADAPT_POLICY1
296   //  cacheAdapt_policy_h4h(coren);
297 #elif defined(GC_CACHE_ADAPT_POLICY2)
298   //cacheAdapt_policy_local(coren);
299 #elif defined(GC_CACHE_ADAPT_POLICY3)
300   //cacheAdapt_policy_hottest(coren);
301 #elif defined(GC_CACHE_ADAPT_POLICY4)
302   cacheAdapt_policy_dominate(coren);
303 #endif
304 }
305
306 // adapt the cache strategy for the mutator
307 void cacheAdapt_mutator() {
308 #if (defined(GC_CACHE_ADAPT_POLICY4)||defined(GC_CACHE_ADAPT_POLICY3))
309   BAMBOO_CACHE_MF();
310   // check the changes and adapt them
311   unsigned int * tmp_p = gccachepolicytbl;
312   unsigned int page_sva = gcbaseva;
313   for(; page_sva<gctopva; page_sva+=BAMBOO_PAGE_SIZE) {
314     // read out the policy
315     bamboo_cache_policy_t policy = (bamboo_cache_policy_t)(*(tmp_p));
316     // adapt the policy
317     if(policy.word != 0) {
318       bamboo_adapt_cache_policy(page_sva,policy,BAMBOO_PAGE_SIZE);
319     }
320     tmp_p += 1;
321   }
322 #endif
323 }
324
325 // Cache adapt phase process for clients
326 void cacheAdapt_phase_client() {
327   WAITFORGCPHASE(CACHEPOLICYPHASE);
328   GC_PRINTF("Start cachepolicy phase\n");
329   cacheAdapt_decision(BAMBOO_NUM_OF_CORE);
330   //send init finish msg to core coordinator
331   send_msg_2(STARTUPCORE, GCFINISHCACHEPOLICY, BAMBOO_NUM_OF_CORE);
332   GC_PRINTF("Finish cachepolicy phase\n");
333
334   WAITFORGCPHASE(PREFINISHPHASE);
335   GC_PRINTF("Start prefinish phase\n");
336   // cache adapt phase
337   cacheAdapt_mutator();
338   cacheAdapt_gc(false);
339   //send init finish msg to core coordinator
340   send_msg_2(STARTUPCORE, GCFINISHPREF, BAMBOO_NUM_OF_CORE);
341   GC_PRINTF("Finish prefinish phase\n");
342
343 #if (defined(GC_CACHE_ADAPT_POLICY4)||defined(GC_CACHE_ADAPT_POLICY3))
344   CACHEADAPT_SAMPLING_RESET();
345   if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
346     // zero out the gccachesamplingtbl
347     BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);  
348     BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,size_cachesamplingtbl_local_r);
349   }
350 #endif
351 }
352
353 extern unsigned long long gc_output_cache_policy_time;
354
355 // Cache adpat phase process for the master
356 void cacheAdapt_phase_master() {
357   GCPROFILE_ITEM_MASTER();
358   unsigned long long tmpt = BAMBOO_GET_EXE_TIME();
359   CACHEADAPT_OUTPUT_CACHE_SAMPLING_R();
360   gc_output_cache_policy_time += (BAMBOO_GET_EXE_TIME()-tmpt);
361   // let all cores to parallelly process the revised profile data and decide 
362   // the cache policy for each page
363   gc_status_info.gcphase = CACHEPOLICYPHASE;
364   GC_SEND_MSG_1_TO_CLIENT(GCSTARTCACHEPOLICY);
365   GC_PRINTF("Start cachepolicy phase \n");
366   // cache adapt phase
367   cacheAdapt_decision(BAMBOO_NUM_OF_CORE);
368   GC_CHECK_ALL_CORE_STATUS();
369   BAMBOO_CACHE_MF();
370
371   // let all cores to adopt new policies
372   gc_status_info.gcphase = PREFINISHPHASE;
373   // Note: all cores should flush their runtime data including non-gc cores
374   GC_SEND_MSG_1_TO_CLIENT(GCSTARTPREF);
375   GC_PRINTF("Start prefinish phase \n");
376   // cache adapt phase
377   cacheAdapt_mutator();
378   cacheAdapt_gc(false);
379   GC_CHECK_ALL_CORE_STATUS();
380   
381 #if (defined(GC_CACHE_ADAPT_POLICY4)||defined(GC_CACHE_ADAPT_POLICY3))
382   CACHEADAPT_SAMPLING_RESET();
383   if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
384     // zero out the gccachesamplingtbl
385     BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
386     BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,size_cachesamplingtbl_local_r);
387     BAMBOO_MEMSET_WH(gccachepolicytbl,0,size_cachepolicytbl);
388   }
389 #endif
390 }
391
392 // output original cache sampling data for each page
393 void gc_output_cache_sampling() {
394   extern volatile bool gc_profile_flag;
395   if(!gc_profile_flag) return;
396   unsigned int page_index = 0;
397   VA page_sva = 0;
398   unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) >> (BAMBOO_PAGE_SIZE_BITS);
399   for(page_index = 0; page_index < page_num; page_index++) {
400     page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
401     unsigned int block = 0;
402     BLOCKINDEX(block, (void *) page_sva);
403     unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
404     //printf("%x,  %d,  %d,  ",(int)page_sva,page_index,coren);
405     unsigned int * local_tbl = &gccachesamplingtbl[page_index*NUMCORESACTIVE];
406     int accesscore = 0;
407     for(int i = 0; i < NUMCORESACTIVE; i++) {
408       int freq = *local_tbl;
409       local_tbl++;
410       if(freq != 0) {
411         accesscore++;
412         //printf("%d,  ", freq);
413       }
414     }
415     if(accesscore!=0) {
416       printf("%x,  %d,  %d,  ",(int)page_sva,page_index,coren);
417       unsigned int * local_tbl = &gccachesamplingtbl[page_index*NUMCORESACTIVE];
418       for(int i = 0; i < NUMCORESACTIVE; i++) {
419         unsigned int freq = *local_tbl;
420         local_tbl++;
421         printf("%u,  ", freq);
422       }
423       printf("\n");
424     }
425     //printf("\n");
426   }
427   printf("=================\n");
428
429
430 // output revised cache sampling data for each page after compaction
431 void gc_output_cache_sampling_r() {
432   extern volatile bool gc_profile_flag;
433   if(!gc_profile_flag) return;
434   // TODO summary data
435   unsigned int sumdata[NUMCORESACTIVE][NUMCORESACTIVE];
436   for(int i = 0; i < NUMCORESACTIVE; i++) {
437     for(int j = 0; j < NUMCORESACTIVE; j++) {
438       sumdata[i][j] = 0;
439     }
440   }
441   tprintf("cache sampling_r \n");
442   unsigned int page_index = 0;
443   VA page_sva = 0;
444   unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) >> (BAMBOO_PAGE_SIZE_BITS);
445   for(page_index = 0; page_index < page_num; page_index++) {
446     page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
447     unsigned int block = 0;
448     BLOCKINDEX(block, (void *)page_sva);
449     unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
450     //printf("%x,  %d,  %d,  ",(int)page_sva,page_index,coren);
451     int accesscore = 0; // TODO
452     unsigned int * local_tbl = &gccachesamplingtbl_r[page_index*NUMCORESACTIVE];
453     for(int i = 0; i < NUMCORESACTIVE; i++) {
454       unsigned int freq = *local_tbl; 
455       //printf("%d,  ", freq);
456       if(freq != 0) {
457         accesscore++;// TODO
458       }
459       local_tbl++;
460     }
461     if(accesscore!=0) {
462       printf("%x,  %d,  %d,  ",(int)page_sva,page_index,coren);
463       unsigned int * local_tbl = &gccachesamplingtbl_r[page_index*NUMCORESACTIVE];
464       for(int i = 0; i < NUMCORESACTIVE; i++) {
465         unsigned int freq = *local_tbl;
466         printf("%u,  ", freq);
467         sumdata[accesscore-1][i]+=freq;
468         local_tbl++;
469       }
470       printf("\n");
471     }  
472     //printf("\n");
473   }
474   printf("+++++\n");
475   // TODO printout the summary data
476   for(int i = 0; i < NUMCORESACTIVE; i++) {
477     printf("%d  ", i);
478     for(int j = 0; j < NUMCORESACTIVE; j++) {
479       printf(" %u  ", sumdata[j][i]);
480     }
481     printf("\n");
482   }
483   printf("=================\n");
484
485 #endif // GC_CACHE_ADAPT