bug fix...it assumes ints and then the shifts go bad...
[IRC.git] / Robust / src / Runtime / bamboo / multicorecache.c
1 #ifdef GC_CACHE_ADAPT
2 #include "multicorecache.h"
3 #include "multicoremsg.h"
4 #include "multicoregc.h"
5 #include "multicoregcprofile.h"
6
7 void cacheadapt_finish_compact(void *toptr) {
8   unsigned int dstpage=((unsigned INTPTR)(toptr-gcbaseva))>>BAMBOO_PAGE_SIZE_BITS;
9   unsigned int * newtable=&gccachesamplingtbl_r[dstpage*NUMCORESACTIVE];
10
11   for(int core = 0; core < NUMCORESACTIVE; core++) {
12     (*newtable)=(*newtable)>>6;
13     newtable++;
14   }  
15 }
16
17 void cacheadapt_finish_src_page(void *srcptr, void *tostart, void *tofinish) {
18   unsigned int srcpage=((unsigned INTPTR)(srcptr-gcbaseva))>>BAMBOO_PAGE_SIZE_BITS;
19   unsigned int dstpage=((unsigned INTPTR)(tostart-gcbaseva))>>BAMBOO_PAGE_SIZE_BITS;
20   unsigned int numbytes=tofinish-tostart;
21   
22   unsigned int * oldtable=&gccachesamplingtbl[srcpage*NUMCORESACTIVE];
23   unsigned int * newtable=&gccachesamplingtbl_r[dstpage*NUMCORESACTIVE];
24   
25   unsigned int page64th=numbytes>>(BAMBOO_PAGE_SIZE_BITS-6);
26
27   for(int core = 0; core < NUMCORESACTIVE; core++) {
28     (*newtable)+=page64th*(*oldtable);
29     newtable++;
30     oldtable++;
31   }  
32 }
33
34 /* Bytes needed equal to zero is a special case...  It means that we should finish the dst page */
35
36 void cacheadapt_finish_dst_page(void *origptr, void *tostart, void *toptr, unsigned int bytesneeded) {
37   unsigned int numbytes=toptr-tostart;
38
39   void *tobound=(void *)((((unsigned INTPTR)toptr-1)&~(BAMBOO_PAGE_SIZE-1))+BAMBOO_PAGE_SIZE);
40   void *origbound=(void *)((((unsigned INTPTR)origptr)&~(BAMBOO_PAGE_SIZE-1))+BAMBOO_PAGE_SIZE);
41   
42   unsigned int topage=((unsigned INTPTR)(toptr-1-gcbaseva))>>BAMBOO_PAGE_SIZE_BITS; 
43   unsigned int origpage=((unsigned INTPTR)(origptr-gcbaseva))>>BAMBOO_PAGE_SIZE_BITS;
44
45   unsigned int * totable=&gccachesamplingtbl_r[topage*NUMCORESACTIVE];
46   unsigned int * origtable=&gccachesamplingtbl[origpage*NUMCORESACTIVE];
47
48   //handler
49   unsigned int remaintobytes=(bytesneeded==0)?0:(tobound-toptr);
50   unsigned int remainorigbytes=origbound-origptr;
51
52   do {
53     //round source bytes down....don't want to close out page if not necessary
54     remainorigbytes=(remainorigbytes>bytesneeded)?bytesneeded:remainorigbytes;
55
56     if (remaintobytes<=remainorigbytes) {
57       //Need to close out to page
58
59       numbytes+=remaintobytes;
60       unsigned int page64th=numbytes>>(BAMBOO_PAGE_SIZE_BITS-6);
61
62       for(int core = 0; core < NUMCORESACTIVE; core++) {
63         (*totable)=(*totable+page64th*(*origtable))>>6;
64         totable++;
65         origtable++;
66       }
67       toptr+=remaintobytes;
68       origptr+=remaintobytes;
69       bytesneeded-=remaintobytes;
70       topage++;//to page is definitely done
71       tobound+=BAMBOO_PAGE_SIZE;
72       origpage=((unsigned INTPTR)(origptr-gcbaseva))>>BAMBOO_PAGE_SIZE_BITS;//handle exact match case
73       origbound=(void *) ((((unsigned INTPTR)origptr)&~(BAMBOO_PAGE_SIZE-1))+BAMBOO_PAGE_SIZE);
74     } else {
75       //Finishing off orig page
76
77       numbytes+=remainorigbytes;
78       unsigned int page64th=numbytes>>(BAMBOO_PAGE_SIZE_BITS-6);
79       
80       for(int core = 0; core < NUMCORESACTIVE; core++) {
81         (*totable)+=page64th*(*origtable);
82         totable++;
83         origtable++;
84       }
85       toptr+=remainorigbytes;
86       origptr+=remainorigbytes;
87       bytesneeded-=remainorigbytes;
88       origpage++;//just orig page is done
89       origbound+=BAMBOO_PAGE_SIZE;
90     }
91     totable=&gccachesamplingtbl_r[topage*NUMCORESACTIVE];
92     origtable=&gccachesamplingtbl[origpage*NUMCORESACTIVE];
93     
94     remaintobytes=tobound-toptr;
95     remainorigbytes=origbound-origptr;
96     
97     numbytes=0;
98   } while(bytesneeded!=0);
99 }
100
101 // prepare for cache adaption:
102 //   -- flush the shared heap
103 //   -- clean dtlb entries
104 //   -- change cache strategy
105 void cacheAdapt_gc(bool isgccachestage) {
106   // flush the shared heap
107   BAMBOO_CACHE_FLUSH_L2();
108
109   // clean the dtlb entries
110   BAMBOO_CLEAN_DTLB();
111
112   if(isgccachestage) {
113     bamboo_install_dtlb_handler_for_gc();
114   } else {
115     bamboo_install_dtlb_handler_for_mutator();
116   }
117
118
119 // the master core decides how to adapt cache strategy for the mutator 
120 // according to collected statistic data
121
122 // find the core that accesses the page #page_index most
123 #define CACHEADAPT_FIND_HOTTEST_CORE(page_index,hottestcore,hotfreq) \
124   { \
125     unsigned int *local_tbl=&gccachesamplingtbl_r[page_index*NUMCORESACTIVE];   \
126     for(int i = 0; i < NUMCORESACTIVE; i++) { \
127       int freq = *local_tbl; \
128       local_tbl++; \
129       if(hotfreq < freq) { \
130         hotfreq = freq; \
131         hottestcore = i; \
132       } \
133     } \
134   }
135 // find the core that accesses the page #page_index most and comput the total
136 // access time of the page at the same time
137 #define CACHEADAPT_FIND_HOTTEST_CORE_W_TOTALFREQ(page_index,hottestcore,hotfreq,totalfreq) \
138   { \
139     unsigned int *local_tbl=&gccachesamplingtbl_r[page_index*NUMCORESACTIVE];   \
140     for(int i = 0; i < NUMCORESACTIVE; i++) { \
141       int freq = *local_tbl; \
142       local_tbl++; \
143       totalfreq += freq; \
144       if(hotfreq < freq) { \
145         hotfreq = freq; \
146         hottestcore = i; \
147       } \
148     } \
149   }
150 // Set the policy as hosted by coren
151 // NOTE: (x,y) should be changed to (x+1, y+1)!!!
152 #define CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren) \
153   { \
154     (policy).cache_mode = BAMBOO_CACHE_MODE_COORDS; \    
155     (policy).lotar_x = bamboo_cpu2coords[2*(coren)]+1; \
156     (policy).lotar_y = bamboo_cpu2coords[2*(coren)+1]+1; \
157   }
158 // store the new policy information at tmp_p in gccachepolicytbl
159 #define CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy) \
160   { \
161     ((int*)(tmp_p))[page_index] = (policy).word; \
162   }
163
164 // make all pages hfh
165 void cacheAdapt_policy_h4h(int coren){
166   unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)>>(BAMBOO_PAGE_SIZE_BITS);
167   unsigned int page_gap=page_num/NUMCORESACTIVE;
168   unsigned int page_index=page_gap*coren;
169   unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
170   VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
171   unsigned int * tmp_p = gccachepolicytbl;
172   for(; page_index < page_index_end; page_index++) {
173     bamboo_cache_policy_t policy = {0};
174     policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
175     CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
176     page_sva += BAMBOO_PAGE_SIZE;
177   }
178
179
180 // make all pages local as non-cache-adaptable gc local mode
181 void cacheAdapt_policy_local(int coren){
182   unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)>>(BAMBOO_PAGE_SIZE_BITS);
183   unsigned int page_gap=page_num/NUMCORESACTIVE;
184   unsigned int page_index=page_gap*coren;
185   unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
186   VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
187   unsigned int * tmp_p = gccachepolicytbl;
188   for(; page_index < page_index_end; page_index++) {
189     bamboo_cache_policy_t policy = {0};
190     unsigned int block = 0;
191     BLOCKINDEX(block, (void *) page_sva);
192     unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
193     CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren);
194     CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
195     page_sva += BAMBOO_PAGE_SIZE;
196   }
197
198
199 void cacheAdapt_policy_hottest(int coren){
200   unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)>>(BAMBOO_PAGE_SIZE_BITS);
201   unsigned int page_gap=page_num/NUMCORESACTIVE;
202   unsigned int page_index=page_gap*coren;
203   unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
204   VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
205   unsigned int * tmp_p = gccachepolicytbl;
206   for(; page_index < page_index_end; page_index++) {
207     bamboo_cache_policy_t policy = {0};
208     unsigned int hottestcore = 0;
209     unsigned int hotfreq = 0;
210     CACHEADAPT_FIND_HOTTEST_CORE(page_index,hottestcore,hotfreq);
211     // TODO
212     // Decide the cache strategy for this page
213     // If decide to adapt a new cache strategy, write into the shared block of
214     // the gcsharedsamplingtbl. The mem recording information that has been 
215     // written is enough to hold the information.
216     // Format: page start va + cache strategy(hfh/(host core+[x,y]))
217     if(hotfreq != 0) {
218       // locally cache the page in the hottest core
219       CACHEADAPT_POLICY_SET_HOST_CORE(policy, hottestcore);
220     }
221     CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
222     page_sva += BAMBOO_PAGE_SIZE;
223   }
224
225
226 #define GC_CACHE_ADAPT_DOMINATE_THRESHOLD  1
227 // cache the page on the core that accesses it the most if that core accesses 
228 // it more than (GC_CACHE_ADAPT_DOMINATE_THRESHOLD)% of the total.  Otherwise,
229 // h4h the page.
230 void cacheAdapt_policy_dominate(int coren){
231   unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)>>(BAMBOO_PAGE_SIZE_BITS);
232   unsigned int page_gap=page_num/NUMCORESACTIVE;
233   unsigned int page_index=page_gap*coren;
234   unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
235   VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
236   unsigned int * tmp_p = gccachepolicytbl;
237   for(; page_index < page_index_end; page_index++) {
238     bamboo_cache_policy_t policy = {0};
239     unsigned int hottestcore = 0;
240     unsigned int totalfreq = 0;
241     unsigned int hotfreq = 0;
242     CACHEADAPT_FIND_HOTTEST_CORE_W_TOTALFREQ(page_index,hottestcore,hotfreq,totalfreq);
243     // Decide the cache strategy for this page
244     // If decide to adapt a new cache strategy, write into the shared block of
245     // the gcpolicytbl 
246     // Format: page start va + cache policy
247     if(hotfreq != 0) {
248       totalfreq=totalfreq>>GC_CACHE_ADAPT_DOMINATE_THRESHOLD;
249       if((unsigned int)hotfreq < (unsigned int)totalfreq) {
250         // use hfh
251         policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
252         /*unsigned int block = 0;
253         BLOCKINDEX(block, (void *) page_sva);
254         unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
255         CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren);*/
256       } else {
257         // locally cache the page in the hottest core
258         CACHEADAPT_POLICY_SET_HOST_CORE(policy, hottestcore);
259       }     
260     }
261     CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
262     page_sva += BAMBOO_PAGE_SIZE;
263   }
264 }
265
266 unsigned int cacheAdapt_decision(int coren) {
267   BAMBOO_CACHE_MF();
268   // check the statistic data
269   // for each page, decide the new cache strategy
270 #ifdef GC_CACHE_ADAPT_POLICY1
271   cacheAdapt_policy_h4h(coren);
272 #elif defined GC_CACHE_ADAPT_POLICY2
273   cacheAdapt_policy_local(coren);
274 #elif defined GC_CACHE_ADAPT_POLICY3
275   cacheAdapt_policy_hottest(coren);
276 #elif defined GC_CACHE_ADAPT_POLICY4
277   cacheAdapt_policy_dominate(coren);
278 #endif
279 }
280
281 // adapt the cache strategy for the mutator
282 void cacheAdapt_mutator() {
283   BAMBOO_CACHE_MF();
284   // check the changes and adapt them
285   unsigned int * tmp_p = gccachepolicytbl;
286   unsigned int page_sva = gcbaseva;
287   for(; page_sva<gctopva; page_sva+=BAMBOO_PAGE_SIZE) {
288     // read out the policy
289     bamboo_cache_policy_t policy = (bamboo_cache_policy_t)(*(tmp_p));
290     // adapt the policy
291     if(policy.word != 0) {
292       bamboo_adapt_cache_policy(page_sva,policy,BAMBOO_PAGE_SIZE);
293     }
294     tmp_p += 1;
295   }
296 }
297
298 // Cache adapt phase process for clients
299 void cacheAdapt_phase_client() {
300   WAITFORGCPHASE(CACHEPOLICYPHASE);
301   GC_PRINTF("Start cachepolicy phase\n");
302   cacheAdapt_decision(BAMBOO_NUM_OF_CORE);
303   //send init finish msg to core coordinator
304   send_msg_2(STARTUPCORE, GCFINISHCACHEPOLICY, BAMBOO_NUM_OF_CORE);
305   GC_PRINTF("Finish cachepolicy phase\n");
306
307   WAITFORGCPHASE(PREFINISHPHASE);
308   GC_PRINTF("Start prefinish phase\n");
309   // cache adapt phase
310   cacheAdapt_mutator();
311   cacheAdapt_gc(false);
312   //send init finish msg to core coordinator
313   send_msg_2(STARTUPCORE, GCFINISHPREF, BAMBOO_NUM_OF_CORE);
314   GC_PRINTF("Finish prefinish phase\n");
315   CACHEADAPT_SAMPLING_RESET();
316   if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
317     // zero out the gccachesamplingtbl
318     BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);  
319     BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,size_cachesamplingtbl_local_r);
320   }
321 }
322
323 extern unsigned long long gc_output_cache_policy_time;
324
325 // Cache adpat phase process for the master
326 void cacheAdapt_phase_master() {
327   GCPROFILE_ITEM();
328   unsigned long long tmpt = BAMBOO_GET_EXE_TIME();
329   CACHEADAPT_OUTPUT_CACHE_SAMPLING_R();
330   gc_output_cache_policy_time += (BAMBOO_GET_EXE_TIME()-tmpt);
331   // let all cores to parallelly process the revised profile data and decide 
332   // the cache policy for each page
333   gc_status_info.gcphase = CACHEPOLICYPHASE;
334   GC_SEND_MSG_1_TO_CLIENT(GCSTARTCACHEPOLICY);
335   GC_PRINTF("Start cachepolicy phase \n");
336   // cache adapt phase
337   cacheAdapt_decision(BAMBOO_NUM_OF_CORE);
338   GC_CHECK_ALL_CORE_STATUS();
339   BAMBOO_CACHE_MF();
340
341   // let all cores to adopt new policies
342   gc_status_info.gcphase = PREFINISHPHASE;
343   // Note: all cores should flush their runtime data including non-gc cores
344   GC_SEND_MSG_1_TO_CLIENT(GCSTARTPREF);
345   GC_PRINTF("Start prefinish phase \n");
346   // cache adapt phase
347   cacheAdapt_mutator();
348   cacheAdapt_gc(false);
349   GC_CHECK_ALL_CORE_STATUS();
350   
351   CACHEADAPT_SAMPLING_RESET();
352   if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
353     // zero out the gccachesamplingtbl
354     BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
355     BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,size_cachesamplingtbl_local_r);
356     BAMBOO_MEMSET_WH(gccachepolicytbl,0,size_cachepolicytbl);
357   }
358 }
359
360 // output original cache sampling data for each page
361 void gc_output_cache_sampling() {
362   extern volatile bool gc_profile_flag;
363   if(!gc_profile_flag) return;
364   unsigned int page_index = 0;
365   VA page_sva = 0;
366   unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) >> (BAMBOO_PAGE_SIZE_BITS);
367   for(page_index = 0; page_index < page_num; page_index++) {
368     page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
369     unsigned int block = 0;
370     BLOCKINDEX(block, (void *) page_sva);
371     unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
372     //printf("%x,  %d,  %d,  ",(int)page_sva,page_index,coren);
373     unsigned int * local_tbl = &gccachesamplingtbl[page_index*NUMCORESACTIVE];
374     int accesscore = 0;
375     for(int i = 0; i < NUMCORESACTIVE; i++) {
376       int freq = *local_tbl;
377       local_tbl++;
378       if(freq != 0) {
379         accesscore++;
380         //printf("%d,  ", freq);
381       }
382     }
383     if(accesscore!=0) {
384       printf("%x,  %d,  %d,  ",(int)page_sva,page_index,coren);
385       unsigned int * local_tbl = &gccachesamplingtbl[page_index*NUMCORESACTIVE];
386       for(int i = 0; i < NUMCORESACTIVE; i++) {
387         unsigned int freq = *local_tbl;
388         local_tbl++;
389         printf("%u,  ", freq);
390       }
391       printf("\n");
392     }
393     //printf("\n");
394   }
395   printf("=================\n");
396
397
398 // output revised cache sampling data for each page after compaction
399 void gc_output_cache_sampling_r() {
400   extern volatile bool gc_profile_flag;
401   if(!gc_profile_flag) return;
402   // TODO summary data
403   unsigned int sumdata[NUMCORESACTIVE][NUMCORESACTIVE];
404   for(int i = 0; i < NUMCORESACTIVE; i++) {
405     for(int j = 0; j < NUMCORESACTIVE; j++) {
406       sumdata[i][j] = 0;
407     }
408   }
409   tprintf("cache sampling_r \n");
410   unsigned int page_index = 0;
411   VA page_sva = 0;
412   unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) >> (BAMBOO_PAGE_SIZE_BITS);
413   for(page_index = 0; page_index < page_num; page_index++) {
414     page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
415     unsigned int block = 0;
416     BLOCKINDEX(block, (void *)page_sva);
417     unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
418     //printf("%x,  %d,  %d,  ",(int)page_sva,page_index,coren);
419     int accesscore = 0; // TODO
420     unsigned int * local_tbl = &gccachesamplingtbl_r[page_index*NUMCORESACTIVE];
421     for(int i = 0; i < NUMCORESACTIVE; i++) {
422       unsigned int freq = *local_tbl; 
423       //printf("%d,  ", freq);
424       if(freq != 0) {
425         accesscore++;// TODO
426       }
427       local_tbl++;
428     }
429     if(accesscore!=0) {
430       printf("%x,  %d,  %d,  ",(int)page_sva,page_index,coren);
431       unsigned int * local_tbl = &gccachesamplingtbl_r[page_index*NUMCORESACTIVE];
432       for(int i = 0; i < NUMCORESACTIVE; i++) {
433         unsigned int freq = *local_tbl;
434         printf("%u,  ", freq);
435         sumdata[accesscore-1][i]+=freq;
436         local_tbl++;
437       }
438       printf("\n");
439     }  
440     //printf("\n");
441   }
442   printf("+++++\n");
443   // TODO printout the summary data
444   for(int i = 0; i < NUMCORESACTIVE; i++) {
445     printf("%d  ", i);
446     for(int j = 0; j < NUMCORESACTIVE; j++) {
447       printf(" %u  ", sumdata[j][i]);
448     }
449     printf("\n");
450   }
451   printf("=================\n");
452
453 #endif // GC_CACHE_ADAPT