2 #include "multicorecache.h"
3 #include "multicoremsg.h"
4 #include "multicoregc.h"
5 #include "multicoregcprofile.h"
7 void cacheadapt_finish_compact(void *toptr) {
8 unsigned int dstpage=((unsigned INTPTR)(toptr-gcbaseva))>>BAMBOO_PAGE_SIZE_BITS;
9 unsigned int * newtable=&gccachesamplingtbl_r[dstpage*NUMCORESACTIVE];
11 for(int core = 0; core < NUMCORESACTIVE; core++) {
12 (*newtable)=(*newtable)>>6;
17 void cacheadapt_finish_src_page(void *srcptr, void *tostart, void *tofinish) {
18 unsigned int srcpage=((unsigned INTPTR)(srcptr-gcbaseva))>>BAMBOO_PAGE_SIZE_BITS;
19 unsigned int dstpage=((unsigned INTPTR)(tostart-gcbaseva))>>BAMBOO_PAGE_SIZE_BITS;
20 unsigned int numbytes=tofinish-tostart;
22 unsigned int * oldtable=&gccachesamplingtbl[srcpage*NUMCORESACTIVE];
23 unsigned int * newtable=&gccachesamplingtbl_r[dstpage*NUMCORESACTIVE];
25 unsigned int page64th=numbytes>>(BAMBOO_PAGE_SIZE_BITS-6);
27 for(int core = 0; core < NUMCORESACTIVE; core++) {
28 (*newtable)+=page64th*(*oldtable);
34 /* Bytes needed equal to zero is a special case... It means that we should finish the dst page */
36 void cacheadapt_finish_dst_page(void *origptr, void *tostart, void *toptr, unsigned int bytesneeded) {
37 unsigned int numbytes=toptr-tostart;
39 void *tobound=(void *)((((unsigned INTPTR)toptr-1)&~(BAMBOO_PAGE_SIZE-1))+BAMBOO_PAGE_SIZE);
40 void *origbound=(void *)((((unsigned INTPTR)origptr)&~(BAMBOO_PAGE_SIZE-1))+BAMBOO_PAGE_SIZE);
42 unsigned int topage=((unsigned INTPTR)(toptr-1-gcbaseva))>>BAMBOO_PAGE_SIZE_BITS;
43 unsigned int origpage=((unsigned INTPTR)(origptr-gcbaseva))>>BAMBOO_PAGE_SIZE_BITS;
45 unsigned int * totable=&gccachesamplingtbl_r[topage*NUMCORESACTIVE];
46 unsigned int * origtable=&gccachesamplingtbl[origpage*NUMCORESACTIVE];
49 unsigned int remaintobytes=(bytesneeded==0)?0:(tobound-toptr);
50 unsigned int remainorigbytes=origbound-origptr;
53 //round source bytes down....don't want to close out page if not necessary
54 remainorigbytes=(remainorigbytes>bytesneeded)?bytesneeded:remainorigbytes;
56 if (remaintobytes<=remainorigbytes) {
57 //Need to close out to page
59 numbytes+=remaintobytes;
60 unsigned int page64th=numbytes>>(BAMBOO_PAGE_SIZE_BITS-6);
62 for(int core = 0; core < NUMCORESACTIVE; core++) {
63 (*totable)=(*totable+page64th*(*origtable))>>6;
68 origptr+=remaintobytes;
69 bytesneeded-=remaintobytes;
70 topage++;//to page is definitely done
71 tobound+=BAMBOO_PAGE_SIZE;
72 origpage=((unsigned INTPTR)(origptr-gcbaseva))>>BAMBOO_PAGE_SIZE_BITS;//handle exact match case
73 origbound=(void *) ((((unsigned INTPTR)origptr)&~(BAMBOO_PAGE_SIZE-1))+BAMBOO_PAGE_SIZE);
75 //Finishing off orig page
77 numbytes+=remainorigbytes;
78 unsigned int page64th=numbytes>>(BAMBOO_PAGE_SIZE_BITS-6);
80 for(int core = 0; core < NUMCORESACTIVE; core++) {
81 (*totable)+=page64th*(*origtable);
85 toptr+=remainorigbytes;
86 origptr+=remainorigbytes;
87 bytesneeded-=remainorigbytes;
88 origpage++;//just orig page is done
89 origbound+=BAMBOO_PAGE_SIZE;
91 totable=&gccachesamplingtbl_r[topage*NUMCORESACTIVE];
92 origtable=&gccachesamplingtbl[origpage*NUMCORESACTIVE];
94 remaintobytes=tobound-toptr;
95 remainorigbytes=origbound-origptr;
98 } while(bytesneeded!=0);
101 // prepare for cache adaption:
102 // -- flush the shared heap
103 // -- clean dtlb entries
104 // -- change cache strategy
105 void cacheAdapt_gc(bool isgccachestage) {
106 // flush the shared heap
107 BAMBOO_CACHE_FLUSH_L2();
109 // clean the dtlb entries
113 bamboo_install_dtlb_handler_for_gc();
115 bamboo_install_dtlb_handler_for_mutator();
119 // the master core decides how to adapt cache strategy for the mutator
120 // according to collected statistic data
122 // find the core that accesses the page #page_index most
123 #define CACHEADAPT_FIND_HOTTEST_CORE(page_index,hottestcore,hotfreq) \
125 unsigned int *local_tbl=&gccachesamplingtbl_r[page_index*NUMCORESACTIVE]; \
126 for(int i = 0; i < NUMCORESACTIVE; i++) { \
127 int freq = *local_tbl; \
129 if(hotfreq < freq) { \
135 // find the core that accesses the page #page_index most and comput the total
136 // access time of the page at the same time
137 #define CACHEADAPT_FIND_HOTTEST_CORE_W_TOTALFREQ(page_index,hottestcore,hotfreq,totalfreq) \
139 unsigned int *local_tbl=&gccachesamplingtbl_r[page_index*NUMCORESACTIVE]; \
140 for(int i = 0; i < NUMCORESACTIVE; i++) { \
141 int freq = *local_tbl; \
144 if(hotfreq < freq) { \
150 // Set the policy as hosted by coren
151 // NOTE: (x,y) should be changed to (x+1, y+1)!!!
152 #define CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren) \
154 (policy).cache_mode = BAMBOO_CACHE_MODE_COORDS; \
155 (policy).lotar_x = bamboo_cpu2coords[2*(coren)]+1; \
156 (policy).lotar_y = bamboo_cpu2coords[2*(coren)+1]+1; \
158 // store the new policy information at tmp_p in gccachepolicytbl
159 #define CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy) \
161 ((int*)(tmp_p))[page_index] = (policy).word; \
164 // make all pages hfh
165 void cacheAdapt_policy_h4h(int coren){
166 unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)>>(BAMBOO_PAGE_SIZE_BITS);
167 unsigned int page_gap=page_num/NUMCORESACTIVE;
168 unsigned int page_index=page_gap*coren;
169 unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
170 VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
171 unsigned int * tmp_p = gccachepolicytbl;
172 for(; page_index < page_index_end; page_index++) {
173 bamboo_cache_policy_t policy = {0};
174 policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
175 CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
176 page_sva += BAMBOO_PAGE_SIZE;
180 // make all pages local as non-cache-adaptable gc local mode
181 void cacheAdapt_policy_local(int coren){
182 unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)>>(BAMBOO_PAGE_SIZE_BITS);
183 unsigned int page_gap=page_num/NUMCORESACTIVE;
184 unsigned int page_index=page_gap*coren;
185 unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
186 VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
187 unsigned int * tmp_p = gccachepolicytbl;
188 for(; page_index < page_index_end; page_index++) {
189 bamboo_cache_policy_t policy = {0};
190 unsigned int block = 0;
191 BLOCKINDEX(block, (void *) page_sva);
192 unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
193 CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren);
194 CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
195 page_sva += BAMBOO_PAGE_SIZE;
199 void cacheAdapt_policy_hottest(int coren){
200 unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)>>(BAMBOO_PAGE_SIZE_BITS);
201 unsigned int page_gap=page_num/NUMCORESACTIVE;
202 unsigned int page_index=page_gap*coren;
203 unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
204 VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
205 unsigned int * tmp_p = gccachepolicytbl;
206 for(; page_index < page_index_end; page_index++) {
207 bamboo_cache_policy_t policy = {0};
208 unsigned int hottestcore = 0;
209 unsigned int hotfreq = 0;
210 CACHEADAPT_FIND_HOTTEST_CORE(page_index,hottestcore,hotfreq);
212 // Decide the cache strategy for this page
213 // If decide to adapt a new cache strategy, write into the shared block of
214 // the gcsharedsamplingtbl. The mem recording information that has been
215 // written is enough to hold the information.
216 // Format: page start va + cache strategy(hfh/(host core+[x,y]))
218 // locally cache the page in the hottest core
219 CACHEADAPT_POLICY_SET_HOST_CORE(policy, hottestcore);
221 // reset it to be homed by its host core
222 unsigned int block = 0;
223 BLOCKINDEX(block, (void *) page_sva);
224 unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
225 CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren);
227 CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
228 page_sva += BAMBOO_PAGE_SIZE;
232 #define GC_CACHE_ADAPT_DOMINATE_THRESHOLD 2
233 // cache the page on the core that accesses it the most if that core accesses
234 // it more than (GC_CACHE_ADAPT_DOMINATE_THRESHOLD)% of the total. Otherwise,
236 void cacheAdapt_policy_dominate(int coren){
237 unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)>>(BAMBOO_PAGE_SIZE_BITS);
238 unsigned int page_gap=page_num/NUMCORESACTIVE;
239 unsigned int page_index=page_gap*coren;
240 unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
241 VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
242 unsigned int * tmp_p = gccachepolicytbl;
243 for(; page_index < page_index_end; page_index++) {
244 bamboo_cache_policy_t policy = {0};
245 unsigned int hottestcore = 0;
246 unsigned int totalfreq = 0;
247 unsigned int hotfreq = 0;
248 CACHEADAPT_FIND_HOTTEST_CORE_W_TOTALFREQ(page_index,hottestcore,hotfreq,totalfreq);
249 // Decide the cache strategy for this page
250 // If decide to adapt a new cache strategy, write into the shared block of
252 // Format: page start va + cache policy
254 totalfreq=totalfreq>>GC_CACHE_ADAPT_DOMINATE_THRESHOLD;
255 if(hotfreq < totalfreq) {
257 policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
258 /*unsigned int block = 0;
259 BLOCKINDEX(block, (void *) page_sva);
260 unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
261 CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren);*/
263 // locally cache the page in the hottest core
264 CACHEADAPT_POLICY_SET_HOST_CORE(policy, hottestcore);
267 // reset it to be homed by its host core
268 unsigned int block = 0;
269 BLOCKINDEX(block, (void *) page_sva);
270 unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
271 CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren);
273 CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
274 page_sva += BAMBOO_PAGE_SIZE;
278 unsigned int cacheAdapt_decision(int coren) {
280 // check the statistic data
281 // for each page, decide the new cache strategy
282 #ifdef GC_CACHE_ADAPT_POLICY1
283 // cacheAdapt_policy_h4h(coren);
284 #elif defined(GC_CACHE_ADAPT_POLICY2)
285 //cacheAdapt_policy_local(coren);
286 #elif defined(GC_CACHE_ADAPT_POLICY3)
287 //cacheAdapt_policy_hottest(coren);
288 #elif defined(GC_CACHE_ADAPT_POLICY4)
289 cacheAdapt_policy_dominate(coren);
293 // adapt the cache strategy for the mutator
294 void cacheAdapt_mutator() {
295 #if defined(GC_CACHE_ADAPT_POLICY4)
297 // check the changes and adapt them
298 unsigned int * tmp_p = gccachepolicytbl;
299 unsigned int page_sva = gcbaseva;
300 for(; page_sva<gctopva; page_sva+=BAMBOO_PAGE_SIZE) {
301 // read out the policy
302 bamboo_cache_policy_t policy = (bamboo_cache_policy_t)(*(tmp_p));
304 if(policy.word != 0) {
305 bamboo_adapt_cache_policy(page_sva,policy,BAMBOO_PAGE_SIZE);
312 // Cache adapt phase process for clients
313 void cacheAdapt_phase_client() {
314 WAITFORGCPHASE(CACHEPOLICYPHASE);
315 GC_PRINTF("Start cachepolicy phase\n");
316 cacheAdapt_decision(BAMBOO_NUM_OF_CORE);
317 //send init finish msg to core coordinator
318 send_msg_2(STARTUPCORE, GCFINISHCACHEPOLICY, BAMBOO_NUM_OF_CORE);
319 GC_PRINTF("Finish cachepolicy phase\n");
321 WAITFORGCPHASE(PREFINISHPHASE);
322 GC_PRINTF("Start prefinish phase\n");
324 cacheAdapt_mutator();
325 cacheAdapt_gc(false);
326 //send init finish msg to core coordinator
327 send_msg_2(STARTUPCORE, GCFINISHPREF, BAMBOO_NUM_OF_CORE);
328 GC_PRINTF("Finish prefinish phase\n");
330 #if defined(GC_CACHE_ADAPT_POLICY4)
331 CACHEADAPT_SAMPLING_RESET();
332 if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
333 // zero out the gccachesamplingtbl
334 BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
335 BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,size_cachesamplingtbl_local_r);
340 extern unsigned long long gc_output_cache_policy_time;
342 // Cache adpat phase process for the master
343 void cacheAdapt_phase_master() {
344 GCPROFILE_ITEM_MASTER();
345 unsigned long long tmpt = BAMBOO_GET_EXE_TIME();
346 CACHEADAPT_OUTPUT_CACHE_SAMPLING_R();
347 gc_output_cache_policy_time += (BAMBOO_GET_EXE_TIME()-tmpt);
348 // let all cores to parallelly process the revised profile data and decide
349 // the cache policy for each page
350 gc_status_info.gcphase = CACHEPOLICYPHASE;
351 GC_SEND_MSG_1_TO_CLIENT(GCSTARTCACHEPOLICY);
352 GC_PRINTF("Start cachepolicy phase \n");
354 cacheAdapt_decision(BAMBOO_NUM_OF_CORE);
355 GC_CHECK_ALL_CORE_STATUS();
358 // let all cores to adopt new policies
359 gc_status_info.gcphase = PREFINISHPHASE;
360 // Note: all cores should flush their runtime data including non-gc cores
361 GC_SEND_MSG_1_TO_CLIENT(GCSTARTPREF);
362 GC_PRINTF("Start prefinish phase \n");
364 cacheAdapt_mutator();
365 cacheAdapt_gc(false);
366 GC_CHECK_ALL_CORE_STATUS();
368 #if defined(GC_CACHE_ADAPT_POLICY4)
369 CACHEADAPT_SAMPLING_RESET();
370 if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
371 // zero out the gccachesamplingtbl
372 BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
373 BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,size_cachesamplingtbl_local_r);
374 BAMBOO_MEMSET_WH(gccachepolicytbl,0,size_cachepolicytbl);
379 // output original cache sampling data for each page
380 void gc_output_cache_sampling() {
381 extern volatile bool gc_profile_flag;
382 if(!gc_profile_flag) return;
383 unsigned int page_index = 0;
385 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) >> (BAMBOO_PAGE_SIZE_BITS);
386 for(page_index = 0; page_index < page_num; page_index++) {
387 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
388 unsigned int block = 0;
389 BLOCKINDEX(block, (void *) page_sva);
390 unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
391 //printf("%x, %d, %d, ",(int)page_sva,page_index,coren);
392 unsigned int * local_tbl = &gccachesamplingtbl[page_index*NUMCORESACTIVE];
394 for(int i = 0; i < NUMCORESACTIVE; i++) {
395 int freq = *local_tbl;
399 //printf("%d, ", freq);
403 printf("%x, %d, %d, ",(int)page_sva,page_index,coren);
404 unsigned int * local_tbl = &gccachesamplingtbl[page_index*NUMCORESACTIVE];
405 for(int i = 0; i < NUMCORESACTIVE; i++) {
406 unsigned int freq = *local_tbl;
408 printf("%u, ", freq);
414 printf("=================\n");
417 // output revised cache sampling data for each page after compaction
418 void gc_output_cache_sampling_r() {
419 extern volatile bool gc_profile_flag;
420 if(!gc_profile_flag) return;
422 unsigned int sumdata[NUMCORESACTIVE][NUMCORESACTIVE];
423 for(int i = 0; i < NUMCORESACTIVE; i++) {
424 for(int j = 0; j < NUMCORESACTIVE; j++) {
428 tprintf("cache sampling_r \n");
429 unsigned int page_index = 0;
431 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) >> (BAMBOO_PAGE_SIZE_BITS);
432 for(page_index = 0; page_index < page_num; page_index++) {
433 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
434 unsigned int block = 0;
435 BLOCKINDEX(block, (void *)page_sva);
436 unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
437 //printf("%x, %d, %d, ",(int)page_sva,page_index,coren);
438 int accesscore = 0; // TODO
439 unsigned int * local_tbl = &gccachesamplingtbl_r[page_index*NUMCORESACTIVE];
440 for(int i = 0; i < NUMCORESACTIVE; i++) {
441 unsigned int freq = *local_tbl;
442 //printf("%d, ", freq);
449 printf("%x, %d, %d, ",(int)page_sva,page_index,coren);
450 unsigned int * local_tbl = &gccachesamplingtbl_r[page_index*NUMCORESACTIVE];
451 for(int i = 0; i < NUMCORESACTIVE; i++) {
452 unsigned int freq = *local_tbl;
453 printf("%u, ", freq);
454 sumdata[accesscore-1][i]+=freq;
462 // TODO printout the summary data
463 for(int i = 0; i < NUMCORESACTIVE; i++) {
465 for(int j = 0; j < NUMCORESACTIVE; j++) {
466 printf(" %u ", sumdata[j][i]);
470 printf("=================\n");
472 #endif // GC_CACHE_ADAPT