e5415f6959ee39be28b56c7fd2d7f55001ed490c
[IRC.git] / Robust / src / Runtime / bamboo / multicorecache.c
1 #ifdef GC_CACHE_ADAPT
2 #include "multicorecache.h"
3 #include "multicoremsg.h"
4 #include "multicoregcprofile.h"
5
6 void cacheadapt_finish_compact(void *toptr) {
7   unsigned int dstpage=(toptr-gcbaseva)>>BAMBOO_PAGE_SIZE_BITS;
8   unsigned int * newtable=&gccachesamplingtbl_r[dstpage*NUMCORESACTIVE];
9
10   for(int core = 0; core < NUMCORESACTIVE; core++) {
11     (*newtable)=(*newtable)>>6;
12     newtable++;
13   }  
14 }
15
16 void cacheadapt_finish_src_page(void *srcptr, void *tostart, void *tofinish) {
17   unsigned int srcpage=(srcptr-gcbaseva)>>BAMBOO_PAGE_SIZE_BITS;
18   unsigned int dstpage=(tostart-gcbaseva)>>BAMBOO_PAGE_SIZE_BITS;
19   unsigned int numbytes=tofinish-tostart;
20   
21   unsigned int * oldtable=&gccachesamplingtbl[srcpage*NUMCORESACTIVE];
22   unsigned int * newtable=&gccachesamplingtbl_r[dstpage*NUMCORESACTIVE];
23   
24   unsigned int page64th=numbytes>>(BAMBOO_PAGE_SIZE_BITS-6);
25
26   for(int core = 0; core < NUMCORESACTIVE; core++) {
27     (*newtable)+=page64th*(*oldtable);
28     newtable++;
29     oldtable++;
30   }  
31 }
32
33 /* Bytes needed equal to zero is a special case...  It means that we should finish the dst page */
34
35 void cacheadapt_finish_dst_page(void *origptr, void *tostart, void *toptr, unsigned int bytesneeded) {
36   unsigned int numbytes=toptr-tostart;
37
38   void *tobound=(void *)((((unsigned INTPTR)toptr-1)&~(BAMBOO_PAGE_SIZE-1))+BAMBOO_PAGE_SIZE);
39   void *origbound=(void *)((((unsigned INTPTR)origptr)&~(BAMBOO_PAGE_SIZE-1))+BAMBOO_PAGE_SIZE);
40   
41   unsigned int topage=(toptr-1-gcbaseva)>>BAMBOO_PAGE_SIZE_BITS; 
42   unsigned int origpage=(origptr-gcbaseva)>>BAMBOO_PAGE_SIZE_BITS;
43
44   unsigned int * totable=&gccachesamplingtbl_r[topage*NUMCORESACTIVE];
45   unsigned int * origtable=&gccachesamplingtbl[origpage*NUMCORESACTIVE];
46
47   //handler
48   unsigned int remaintobytes=(bytesneeded==0)?0:(tobound-toptr);
49   unsigned int remainorigbytes=origbound-origptr;
50
51   do {
52     //round source bytes down....don't want to close out page if not necessary
53     remainorigbytes=(remainorigbytes>bytesneeded)?bytesneeded:remainorigbytes;
54
55     if (remaintobytes<=remainorigbytes) {
56       //Need to close out to page
57
58       numbytes+=remaintobytes;
59       unsigned int page64th=numbytes>>(BAMBOO_PAGE_SIZE_BITS-6);
60
61       for(int core = 0; core < NUMCORESACTIVE; core++) {
62         (*totable)=(*totable+page64th*(*origtable))>>6;
63         totable++;
64         origtable++;
65       }
66       toptr+=remaintobytes;
67       origptr+=remaintobytes;
68       bytesneeded-=remaintobytes;
69       topage++;//to page is definitely done
70       tobound+=BAMBOO_PAGE_SIZE;
71       origpage=(origptr-gcbaseva)>>BAMBOO_PAGE_SIZE_BITS;//handle exact match case
72       origbound=(void *) ((((unsigned INTPTR)origptr)&~(BAMBOO_PAGE_SIZE-1))+BAMBOO_PAGE_SIZE);
73     } else {
74       //Finishing off orig page
75
76       numbytes+=remainorigbytes;
77       unsigned int page64th=numbytes>>(BAMBOO_PAGE_SIZE_BITS-6);
78       
79       for(int core = 0; core < NUMCORESACTIVE; core++) {
80         (*totable)+=page64th*(*origtable);
81         totable++;
82         origtable++;
83       }
84       toptr+=remainorigbytes;
85       origptr+=remainorigbytes;
86       bytesneeded-=remainorigbytes;
87       origpage++;//just orig page is done
88       origbound+=BAMBOO_PAGE_SIZE;
89     }
90     totable=&gccachesamplingtbl_r[topage*NUMCORESACTIVE];
91     origtable=&gccachesamplingtbl[origpage*NUMCORESACTIVE];
92     
93     remaintobytes=tobound-toptr;
94     remainorigbytes=origbound-origptr;
95     
96     numbytes=0;
97   } while(bytesneeded!=0);
98 }
99
100 // prepare for cache adaption:
101 //   -- flush the shared heap
102 //   -- clean dtlb entries
103 //   -- change cache strategy
104 void cacheAdapt_gc(bool isgccachestage) {
105   // flush the shared heap
106   BAMBOO_CACHE_FLUSH_L2();
107
108   // clean the dtlb entries
109   BAMBOO_CLEAN_DTLB();
110
111   if(isgccachestage) {
112     bamboo_install_dtlb_handler_for_gc();
113   } else {
114     bamboo_install_dtlb_handler_for_mutator();
115   }
116
117
118 // the master core decides how to adapt cache strategy for the mutator 
119 // according to collected statistic data
120
121 // find the core that accesses the page #page_index most
122 #define CACHEADAPT_FIND_HOTTEST_CORE(page_index,hottestcore,hotfreq) \
123   { \
124     unsigned int *local_tbl=&gccachesamplingtbl_r[page_index*NUMCORESACTIVE];   \
125     for(int i = 0; i < NUMCORESACTIVE; i++) { \
126       int freq = *local_tbl; \
127       local_tbl++; \
128       if(hotfreq < freq) { \
129         hotfreq = freq; \
130         hottestcore = i; \
131       } \
132     } \
133   }
134 // find the core that accesses the page #page_index most and comput the total
135 // access time of the page at the same time
136 #define CACHEADAPT_FIND_HOTTEST_CORE_W_TOTALFREQ(page_index,hottestcore,hotfreq,totalfreq) \
137   { \
138     unsigned int *local_tbl=&gccachesamplingtbl_r[page_index*NUMCORESACTIVE];   \
139     for(int i = 0; i < NUMCORESACTIVE; i++) { \
140       int freq = *local_tbl; \
141       local_tbl++; \
142       totalfreq += freq; \
143       if(hotfreq < freq) { \
144         hotfreq = freq; \
145         hottestcore = i; \
146       } \
147     } \
148   }
149 // Set the policy as hosted by coren
150 // NOTE: (x,y) should be changed to (x+1, y+1)!!!
151 #define CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren) \
152   { \
153     (policy).cache_mode = BAMBOO_CACHE_MODE_COORDS; \    
154     (policy).lotar_x = bamboo_cpu2coords[2*(coren)]+1; \
155     (policy).lotar_y = bamboo_cpu2coords[2*(coren)+1]+1; \
156   }
157 // store the new policy information at tmp_p in gccachepolicytbl
158 #define CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy) \
159   { \
160     ((int*)(tmp_p))[page_index] = (policy).word; \
161   }
162
163 // make all pages hfh
164 void cacheAdapt_policy_h4h(int coren){
165   unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)>>(BAMBOO_PAGE_SIZE_BITS);
166   unsigned int page_gap=page_num/NUMCORESACTIVE;
167   unsigned int page_index=page_gap*coren;
168   unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
169   VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
170   unsigned int * tmp_p = gccachepolicytbl;
171   for(; page_index < page_index_end; page_index++) {
172     bamboo_cache_policy_t policy = {0};
173     policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
174     CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
175     page_sva += BAMBOO_PAGE_SIZE;
176   }
177
178
179 // make all pages local as non-cache-adaptable gc local mode
180 void cacheAdapt_policy_local(int coren){
181   unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)>>(BAMBOO_PAGE_SIZE_BITS);
182   unsigned int page_gap=page_num/NUMCORESACTIVE;
183   unsigned int page_index=page_gap*coren;
184   unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
185   VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
186   unsigned int * tmp_p = gccachepolicytbl;
187   for(; page_index < page_index_end; page_index++) {
188     bamboo_cache_policy_t policy = {0};
189     unsigned int block = 0;
190     BLOCKINDEX(block, (void *) page_sva);
191     unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
192     CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren);
193     CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
194     page_sva += BAMBOO_PAGE_SIZE;
195   }
196
197
198 void cacheAdapt_policy_hottest(int coren){
199   unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)>>(BAMBOO_PAGE_SIZE_BITS);
200   unsigned int page_gap=page_num/NUMCORESACTIVE;
201   unsigned int page_index=page_gap*coren;
202   unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
203   VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
204   unsigned int * tmp_p = gccachepolicytbl;
205   for(; page_index < page_index_end; page_index++) {
206     bamboo_cache_policy_t policy = {0};
207     unsigned int hottestcore = 0;
208     unsigned int hotfreq = 0;
209     CACHEADAPT_FIND_HOTTEST_CORE(page_index,hottestcore,hotfreq);
210     // TODO
211     // Decide the cache strategy for this page
212     // If decide to adapt a new cache strategy, write into the shared block of
213     // the gcsharedsamplingtbl. The mem recording information that has been 
214     // written is enough to hold the information.
215     // Format: page start va + cache strategy(hfh/(host core+[x,y]))
216     if(hotfreq != 0) {
217       // locally cache the page in the hottest core
218       CACHEADAPT_POLICY_SET_HOST_CORE(policy, hottestcore);
219     }
220     CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
221     page_sva += BAMBOO_PAGE_SIZE;
222   }
223
224
225 #define GC_CACHE_ADAPT_DOMINATE_THRESHOLD  1
226 // cache the page on the core that accesses it the most if that core accesses 
227 // it more than (GC_CACHE_ADAPT_DOMINATE_THRESHOLD)% of the total.  Otherwise,
228 // h4h the page.
229 void cacheAdapt_policy_dominate(int coren){
230   unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)>>(BAMBOO_PAGE_SIZE_BITS);
231   unsigned int page_gap=page_num/NUMCORESACTIVE;
232   unsigned int page_index=page_gap*coren;
233   unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
234   VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
235   unsigned int * tmp_p = gccachepolicytbl;
236   for(; page_index < page_index_end; page_index++) {
237     bamboo_cache_policy_t policy = {0};
238     unsigned int hottestcore = 0;
239     unsigned int totalfreq = 0;
240     unsigned int hotfreq = 0;
241     CACHEADAPT_FIND_HOTTEST_CORE_W_TOTALFREQ(page_index,hottestcore,hotfreq,totalfreq);
242     // Decide the cache strategy for this page
243     // If decide to adapt a new cache strategy, write into the shared block of
244     // the gcpolicytbl 
245     // Format: page start va + cache policy
246     if(hotfreq != 0) {
247       totalfreq=totalfreq>>GC_CACHE_ADAPT_DOMINATE_THRESHOLD;
248       if((unsigned int)hotfreq < (unsigned int)totalfreq) {
249         // use hfh
250         policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
251         /*unsigned int block = 0;
252         BLOCKINDEX(block, (void *) page_sva);
253         unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
254         CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren);*/
255       } else {
256         // locally cache the page in the hottest core
257         CACHEADAPT_POLICY_SET_HOST_CORE(policy, hottestcore);
258       }     
259     }
260     CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
261     page_sva += BAMBOO_PAGE_SIZE;
262   }
263 }
264
265 unsigned int cacheAdapt_decision(int coren) {
266   BAMBOO_CACHE_MF();
267   // check the statistic data
268   // for each page, decide the new cache strategy
269 #ifdef GC_CACHE_ADAPT_POLICY1
270   cacheAdapt_policy_h4h(coren);
271 #elif defined GC_CACHE_ADAPT_POLICY2
272   cacheAdapt_policy_local(coren);
273 #elif defined GC_CACHE_ADAPT_POLICY3
274   cacheAdapt_policy_hottest(coren);
275 #elif defined GC_CACHE_ADAPT_POLICY4
276   cacheAdapt_policy_dominate(coren);
277 #endif
278 }
279
280 // adapt the cache strategy for the mutator
281 void cacheAdapt_mutator() {
282   BAMBOO_CACHE_MF();
283   // check the changes and adapt them
284   unsigned int * tmp_p = gccachepolicytbl;
285   unsigned int page_sva = gcbaseva;
286   for(; page_sva<gctopva; page_sva+=BAMBOO_PAGE_SIZE) {
287     // read out the policy
288     bamboo_cache_policy_t policy = (bamboo_cache_policy_t)(*(tmp_p));
289     // adapt the policy
290     if(policy.word != 0) {
291       bamboo_adapt_cache_policy(page_sva,policy,BAMBOO_PAGE_SIZE);
292     }
293     tmp_p += 1;
294   }
295 }
296
297 // Cache adapt phase process for clients
298 void cacheAdapt_phase_client() {
299   WAITFORGCPHASE(CACHEPOLICYPHASE);
300   GC_PRINTF("Start cachepolicy phase\n");
301   cacheAdapt_decision(BAMBOO_NUM_OF_CORE);
302   //send init finish msg to core coordinator
303   send_msg_2(STARTUPCORE, GCFINISHCACHEPOLICY, BAMBOO_NUM_OF_CORE);
304   GC_PRINTF("Finish cachepolicy phase\n");
305
306   WAITFORGCPHASE(PREFINISHPHASE);
307   GC_PRINTF("Start prefinish phase\n");
308   // cache adapt phase
309   cacheAdapt_mutator();
310   cacheAdapt_gc(false);
311   //send init finish msg to core coordinator
312   send_msg_2(STARTUPCORE, GCFINISHPREF, BAMBOO_NUM_OF_CORE);
313   GC_PRINTF("Finish prefinish phase\n");
314   CACHEADAPT_SAMPLING_RESET();
315   if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
316     // zero out the gccachesamplingtbl
317     BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);  
318     BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,size_cachesamplingtbl_local_r);
319   }
320 }
321
322 extern unsigned long long gc_output_cache_policy_time;
323
324 // Cache adpat phase process for the master
325 void cacheAdapt_phase_master() {
326   GCPROFILE_ITEM();
327   unsigned long long tmpt = BAMBOO_GET_EXE_TIME();
328   CACHEADAPT_OUTPUT_CACHE_SAMPLING_R();
329   gc_output_cache_policy_time += (BAMBOO_GET_EXE_TIME()-tmpt);
330   // let all cores to parallelly process the revised profile data and decide 
331   // the cache policy for each page
332   gc_status_info.gcphase = CACHEPOLICYPHASE;
333   GC_SEND_MSG_1_TO_CLIENT(GCSTARTCACHEPOLICY);
334   GC_PRINTF("Start cachepolicy phase \n");
335   // cache adapt phase
336   cacheAdapt_decision(BAMBOO_NUM_OF_CORE);
337   GC_CHECK_ALL_CORE_STATUS();
338   BAMBOO_CACHE_MF();
339
340   // let all cores to adopt new policies
341   gc_status_info.gcphase = PREFINISHPHASE;
342   // Note: all cores should flush their runtime data including non-gc cores
343   GC_SEND_MSG_1_TO_CLIENT(GCSTARTPREF);
344   GC_PRINTF("Start prefinish phase \n");
345   // cache adapt phase
346   cacheAdapt_mutator();
347   cacheAdapt_gc(false);
348   GC_CHECK_ALL_CORE_STATUS();
349   
350   CACHEADAPT_SAMPLING_RESET();
351   if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
352     // zero out the gccachesamplingtbl
353     BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
354     BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,size_cachesamplingtbl_local_r);
355     BAMBOO_MEMSET_WH(gccachepolicytbl,0,size_cachepolicytbl);
356   }
357 }
358
359 // output original cache sampling data for each page
360 void gc_output_cache_sampling() {
361   extern volatile bool gc_profile_flag;
362   if(!gc_profile_flag) return;
363   unsigned int page_index = 0;
364   VA page_sva = 0;
365   unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) >> (BAMBOO_PAGE_SIZE_BITS);
366   for(page_index = 0; page_index < page_num; page_index++) {
367     page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
368     unsigned int block = 0;
369     BLOCKINDEX(block, (void *) page_sva);
370     unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
371     //printf("%x,  %d,  %d,  ",(int)page_sva,page_index,coren);
372     unsigned int * local_tbl = &gccachesamplingtbl[page_index*NUMCORESACTIVE];
373     int accesscore = 0;
374     for(int i = 0; i < NUMCORESACTIVE; i++) {
375       int freq = *local_tbl;
376       local_tbl++;
377       if(freq != 0) {
378         accesscore++;
379         //printf("%d,  ", freq);
380       }
381     }
382     if(accesscore!=0) {
383       printf("%x,  %d,  %d,  ",(int)page_sva,page_index,coren);
384       unsigned int * local_tbl = &gccachesamplingtbl[page_index*NUMCORESACTIVE];
385       for(int i = 0; i < NUMCORESACTIVE; i++) {
386         unsigned int freq = *local_tbl;
387         local_tbl++;
388         printf("%u,  ", freq);
389       }
390       printf("\n");
391     }
392     //printf("\n");
393   }
394   printf("=================\n");
395
396
397 // output revised cache sampling data for each page after compaction
398 void gc_output_cache_sampling_r() {
399   extern volatile bool gc_profile_flag;
400   if(!gc_profile_flag) return;
401   // TODO summary data
402   unsigned int sumdata[NUMCORESACTIVE][NUMCORESACTIVE];
403   for(int i = 0; i < NUMCORESACTIVE; i++) {
404     for(int j = 0; j < NUMCORESACTIVE; j++) {
405       sumdata[i][j] = 0;
406     }
407   }
408   tprintf("cache sampling_r \n");
409   unsigned int page_index = 0;
410   VA page_sva = 0;
411   unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) >> (BAMBOO_PAGE_SIZE_BITS);
412   for(page_index = 0; page_index < page_num; page_index++) {
413     page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
414     unsigned int block = 0;
415     BLOCKINDEX(block, (void *)page_sva);
416     unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
417     //printf("%x,  %d,  %d,  ",(int)page_sva,page_index,coren);
418     int accesscore = 0; // TODO
419     unsigned int * local_tbl = &gccachesamplingtbl_r[page_index*NUMCORESACTIVE];
420     for(int i = 0; i < NUMCORESACTIVE; i++) {
421       unsigned int freq = *local_tbl; 
422       //printf("%d,  ", freq);
423       if(freq != 0) {
424         accesscore++;// TODO
425       }
426       local_tbl++;
427     }
428     if(accesscore!=0) {
429       printf("%x,  %d,  %d,  ",(int)page_sva,page_index,coren);
430       unsigned int * local_tbl = &gccachesamplingtbl_r[page_index*NUMCORESACTIVE];
431       for(int i = 0; i < NUMCORESACTIVE; i++) {
432         unsigned int freq = *local_tbl;
433         printf("%u,  ", freq);
434         sumdata[accesscore-1][i]+=freq;
435         local_tbl++;
436       }
437       printf("\n");
438     }  
439     //printf("\n");
440   }
441   printf("+++++\n");
442   // TODO printout the summary data
443   for(int i = 0; i < NUMCORESACTIVE; i++) {
444     printf("%d  ", i);
445     for(int j = 0; j < NUMCORESACTIVE; j++) {
446       printf(" %u  ", sumdata[j][i]);
447     }
448     printf("\n");
449   }
450   printf("=================\n");
451
452 #endif // GC_CACHE_ADAPT