2 #include "multicorecache.h"
3 #include "multicoremsg.h"
4 #include "multicoregcprofile.h"
6 void cacheadapt_finish_compact(void *toptr) {
7 unsigned int dstpage=(toptr-gcbaseva)>>BAMBOO_PAGE_SIZE_BITS;
8 unsigned int * newtable=&gccachesamplingtbl_r[dstpage*NUMCORESACTIVE];
10 for(int core = 0; core < NUMCORESACTIVE; core++) {
11 (*newtable)=(*newtable)>>6;
16 void cacheadapt_finish_src_page(void *srcptr, void *tostart, void *tofinish) {
17 unsigned int srcpage=(srcptr-gcbaseva)>>BAMBOO_PAGE_SIZE_BITS;
18 unsigned int dstpage=(tostart-gcbaseva)>>BAMBOO_PAGE_SIZE_BITS;
19 unsigned int numbytes=tofinish-tostart;
21 unsigned int * oldtable=&gccachesamplingtbl[srcpage*NUMCORESACTIVE];
22 unsigned int * newtable=&gccachesamplingtbl_r[dstpage*NUMCORESACTIVE];
24 unsigned int page64th=numbytes>>(BAMBOO_PAGE_SIZE_BITS-6);
26 for(int core = 0; core < NUMCORESACTIVE; core++) {
27 (*newtable)+=page64th*(*oldtable);
33 /* Bytes needed equal to zero is a special case... It means that we should finish the dst page */
35 void cacheadapt_finish_dst_page(void *origptr, void *tostart, void *toptr, unsigned int bytesneeded) {
36 unsigned int numbytes=toptr-tostart;
38 void *tobound=(void *)((((unsigned INTPTR)toptr-1)&~(BAMBOO_PAGE_SIZE-1))+BAMBOO_PAGE_SIZE);
39 void *origbound=(void *)((((unsigned INTPTR)origptr)&~(BAMBOO_PAGE_SIZE-1))+BAMBOO_PAGE_SIZE);
41 unsigned int topage=(toptr-1-gcbaseva)>>BAMBOO_PAGE_SIZE_BITS;
42 unsigned int origpage=(origptr-gcbaseva)>>BAMBOO_PAGE_SIZE_BITS;
44 unsigned int * totable=&gccachesamplingtbl_r[topage*NUMCORESACTIVE];
45 unsigned int * origtable=&gccachesamplingtbl[origpage*NUMCORESACTIVE];
48 unsigned int remaintobytes=(bytesneeded==0)?0:(tobound-toptr);
49 unsigned int remainorigbytes=origbound-origptr;
52 //round source bytes down....don't want to close out page if not necessary
53 remainorigbytes=(remainorigbytes>bytesneeded)?bytesneeded:remainorigbytes;
55 if (remaintobytes<=remainorigbytes) {
56 //Need to close out to page
58 numbytes+=remaintobytes;
59 unsigned int page64th=numbytes>>(BAMBOO_PAGE_SIZE_BITS-6);
61 for(int core = 0; core < NUMCORESACTIVE; core++) {
62 (*totable)=(*totable+page64th*(*origtable))>>6;
67 origptr+=remaintobytes;
68 bytesneeded-=remaintobytes;
69 topage++;//to page is definitely done
70 tobound+=BAMBOO_PAGE_SIZE;
71 origpage=(origptr-gcbaseva)>>BAMBOO_PAGE_SIZE_BITS;//handle exact match case
72 origbound=(void *) ((((unsigned INTPTR)origptr)&~(BAMBOO_PAGE_SIZE-1))+BAMBOO_PAGE_SIZE);
74 //Finishing off orig page
76 numbytes+=remainorigbytes;
77 unsigned int page64th=numbytes>>(BAMBOO_PAGE_SIZE_BITS-6);
79 for(int core = 0; core < NUMCORESACTIVE; core++) {
80 (*totable)+=page64th*(*origtable);
84 toptr+=remainorigbytes;
85 origptr+=remainorigbytes;
86 bytesneeded-=remainorigbytes;
87 origpage++;//just orig page is done
88 origbound+=BAMBOO_PAGE_SIZE;
90 totable=&gccachesamplingtbl_r[topage*NUMCORESACTIVE];
91 origtable=&gccachesamplingtbl[origpage*NUMCORESACTIVE];
93 remaintobytes=tobound-toptr;
94 remainorigbytes=origbound-origptr;
97 } while(bytesneeded!=0);
100 // prepare for cache adaption:
101 // -- flush the shared heap
102 // -- clean dtlb entries
103 // -- change cache strategy
104 void cacheAdapt_gc(bool isgccachestage) {
105 // flush the shared heap
106 BAMBOO_CACHE_FLUSH_L2();
108 // clean the dtlb entries
112 bamboo_install_dtlb_handler_for_gc();
114 bamboo_install_dtlb_handler_for_mutator();
118 // the master core decides how to adapt cache strategy for the mutator
119 // according to collected statistic data
121 // find the core that accesses the page #page_index most
122 #define CACHEADAPT_FIND_HOTTEST_CORE(page_index,hottestcore,hotfreq) \
124 unsigned int *local_tbl=&gccachesamplingtbl_r[page_index*NUMCORESACTIVE]; \
125 for(int i = 0; i < NUMCORESACTIVE; i++) { \
126 int freq = *local_tbl; \
128 if(hotfreq < freq) { \
134 // find the core that accesses the page #page_index most and comput the total
135 // access time of the page at the same time
136 #define CACHEADAPT_FIND_HOTTEST_CORE_W_TOTALFREQ(page_index,hottestcore,hotfreq,totalfreq) \
138 unsigned int *local_tbl=&gccachesamplingtbl_r[page_index*NUMCORESACTIVE]; \
139 for(int i = 0; i < NUMCORESACTIVE; i++) { \
140 int freq = *local_tbl; \
143 if(hotfreq < freq) { \
149 // Set the policy as hosted by coren
150 // NOTE: (x,y) should be changed to (x+1, y+1)!!!
151 #define CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren) \
153 (policy).cache_mode = BAMBOO_CACHE_MODE_COORDS; \
154 (policy).lotar_x = bamboo_cpu2coords[2*(coren)]+1; \
155 (policy).lotar_y = bamboo_cpu2coords[2*(coren)+1]+1; \
157 // store the new policy information at tmp_p in gccachepolicytbl
158 #define CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy) \
160 ((int*)(tmp_p))[page_index] = (policy).word; \
163 // make all pages hfh
164 void cacheAdapt_policy_h4h(int coren){
165 unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)>>(BAMBOO_PAGE_SIZE_BITS);
166 unsigned int page_gap=page_num/NUMCORESACTIVE;
167 unsigned int page_index=page_gap*coren;
168 unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
169 VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
170 unsigned int * tmp_p = gccachepolicytbl;
171 for(; page_index < page_index_end; page_index++) {
172 bamboo_cache_policy_t policy = {0};
173 policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
174 CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
175 page_sva += BAMBOO_PAGE_SIZE;
179 // make all pages local as non-cache-adaptable gc local mode
180 void cacheAdapt_policy_local(int coren){
181 unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)>>(BAMBOO_PAGE_SIZE_BITS);
182 unsigned int page_gap=page_num/NUMCORESACTIVE;
183 unsigned int page_index=page_gap*coren;
184 unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
185 VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
186 unsigned int * tmp_p = gccachepolicytbl;
187 for(; page_index < page_index_end; page_index++) {
188 bamboo_cache_policy_t policy = {0};
189 unsigned int block = 0;
190 BLOCKINDEX(block, (void *) page_sva);
191 unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
192 CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren);
193 CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
194 page_sva += BAMBOO_PAGE_SIZE;
198 void cacheAdapt_policy_hottest(int coren){
199 unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)>>(BAMBOO_PAGE_SIZE_BITS);
200 unsigned int page_gap=page_num/NUMCORESACTIVE;
201 unsigned int page_index=page_gap*coren;
202 unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
203 VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
204 unsigned int * tmp_p = gccachepolicytbl;
205 for(; page_index < page_index_end; page_index++) {
206 bamboo_cache_policy_t policy = {0};
207 unsigned int hottestcore = 0;
208 unsigned int hotfreq = 0;
209 CACHEADAPT_FIND_HOTTEST_CORE(page_index,hottestcore,hotfreq);
211 // Decide the cache strategy for this page
212 // If decide to adapt a new cache strategy, write into the shared block of
213 // the gcsharedsamplingtbl. The mem recording information that has been
214 // written is enough to hold the information.
215 // Format: page start va + cache strategy(hfh/(host core+[x,y]))
217 // locally cache the page in the hottest core
218 CACHEADAPT_POLICY_SET_HOST_CORE(policy, hottestcore);
220 CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
221 page_sva += BAMBOO_PAGE_SIZE;
225 #define GC_CACHE_ADAPT_DOMINATE_THRESHOLD 1
226 // cache the page on the core that accesses it the most if that core accesses
227 // it more than (GC_CACHE_ADAPT_DOMINATE_THRESHOLD)% of the total. Otherwise,
229 void cacheAdapt_policy_dominate(int coren){
230 unsigned int page_num=(BAMBOO_SHARED_MEM_SIZE)>>(BAMBOO_PAGE_SIZE_BITS);
231 unsigned int page_gap=page_num/NUMCORESACTIVE;
232 unsigned int page_index=page_gap*coren;
233 unsigned int page_index_end=(coren==NUMCORESACTIVE-1)?page_num:(page_index+page_gap);
234 VA page_sva = gcbaseva+(BAMBOO_PAGE_SIZE)*page_index;
235 unsigned int * tmp_p = gccachepolicytbl;
236 for(; page_index < page_index_end; page_index++) {
237 bamboo_cache_policy_t policy = {0};
238 unsigned int hottestcore = 0;
239 unsigned int totalfreq = 0;
240 unsigned int hotfreq = 0;
241 CACHEADAPT_FIND_HOTTEST_CORE_W_TOTALFREQ(page_index,hottestcore,hotfreq,totalfreq);
242 // Decide the cache strategy for this page
243 // If decide to adapt a new cache strategy, write into the shared block of
245 // Format: page start va + cache policy
247 totalfreq=totalfreq>>GC_CACHE_ADAPT_DOMINATE_THRESHOLD;
248 if((unsigned int)hotfreq < (unsigned int)totalfreq) {
250 policy.cache_mode = BAMBOO_CACHE_MODE_HASH;
251 /*unsigned int block = 0;
252 BLOCKINDEX(block, (void *) page_sva);
253 unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
254 CACHEADAPT_POLICY_SET_HOST_CORE(policy, coren);*/
256 // locally cache the page in the hottest core
257 CACHEADAPT_POLICY_SET_HOST_CORE(policy, hottestcore);
260 CACHEADAPT_CHANGE_POLICY_4_PAGE(tmp_p,page_index,policy);
261 page_sva += BAMBOO_PAGE_SIZE;
265 unsigned int cacheAdapt_decision(int coren) {
267 // check the statistic data
268 // for each page, decide the new cache strategy
269 #ifdef GC_CACHE_ADAPT_POLICY1
270 cacheAdapt_policy_h4h(coren);
271 #elif defined GC_CACHE_ADAPT_POLICY2
272 cacheAdapt_policy_local(coren);
273 #elif defined GC_CACHE_ADAPT_POLICY3
274 cacheAdapt_policy_hottest(coren);
275 #elif defined GC_CACHE_ADAPT_POLICY4
276 cacheAdapt_policy_dominate(coren);
280 // adapt the cache strategy for the mutator
281 void cacheAdapt_mutator() {
283 // check the changes and adapt them
284 unsigned int * tmp_p = gccachepolicytbl;
285 unsigned int page_sva = gcbaseva;
286 for(; page_sva<gctopva; page_sva+=BAMBOO_PAGE_SIZE) {
287 // read out the policy
288 bamboo_cache_policy_t policy = (bamboo_cache_policy_t)(*(tmp_p));
290 if(policy.word != 0) {
291 bamboo_adapt_cache_policy(page_sva,policy,BAMBOO_PAGE_SIZE);
297 // Cache adapt phase process for clients
298 void cacheAdapt_phase_client() {
299 WAITFORGCPHASE(CACHEPOLICYPHASE);
300 GC_PRINTF("Start cachepolicy phase\n");
301 cacheAdapt_decision(BAMBOO_NUM_OF_CORE);
302 //send init finish msg to core coordinator
303 send_msg_2(STARTUPCORE, GCFINISHCACHEPOLICY, BAMBOO_NUM_OF_CORE);
304 GC_PRINTF("Finish cachepolicy phase\n");
306 WAITFORGCPHASE(PREFINISHPHASE);
307 GC_PRINTF("Start prefinish phase\n");
309 cacheAdapt_mutator();
310 cacheAdapt_gc(false);
311 //send init finish msg to core coordinator
312 send_msg_2(STARTUPCORE, GCFINISHPREF, BAMBOO_NUM_OF_CORE);
313 GC_PRINTF("Finish prefinish phase\n");
314 CACHEADAPT_SAMPLING_RESET();
315 if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
316 // zero out the gccachesamplingtbl
317 BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
318 BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,size_cachesamplingtbl_local_r);
322 extern unsigned long long gc_output_cache_policy_time;
324 // Cache adpat phase process for the master
325 void cacheAdapt_phase_master() {
327 unsigned long long tmpt = BAMBOO_GET_EXE_TIME();
328 CACHEADAPT_OUTPUT_CACHE_SAMPLING_R();
329 gc_output_cache_policy_time += (BAMBOO_GET_EXE_TIME()-tmpt);
330 // let all cores to parallelly process the revised profile data and decide
331 // the cache policy for each page
332 gc_status_info.gcphase = CACHEPOLICYPHASE;
333 GC_SEND_MSG_1_TO_CLIENT(GCSTARTCACHEPOLICY);
334 GC_PRINTF("Start cachepolicy phase \n");
336 cacheAdapt_decision(BAMBOO_NUM_OF_CORE);
337 GC_CHECK_ALL_CORE_STATUS();
340 // let all cores to adopt new policies
341 gc_status_info.gcphase = PREFINISHPHASE;
342 // Note: all cores should flush their runtime data including non-gc cores
343 GC_SEND_MSG_1_TO_CLIENT(GCSTARTPREF);
344 GC_PRINTF("Start prefinish phase \n");
346 cacheAdapt_mutator();
347 cacheAdapt_gc(false);
348 GC_CHECK_ALL_CORE_STATUS();
350 CACHEADAPT_SAMPLING_RESET();
351 if(BAMBOO_NUM_OF_CORE < NUMCORESACTIVE) {
352 // zero out the gccachesamplingtbl
353 BAMBOO_MEMSET_WH(gccachesamplingtbl_local,0,size_cachesamplingtbl_local);
354 BAMBOO_MEMSET_WH(gccachesamplingtbl_local_r,0,size_cachesamplingtbl_local_r);
355 BAMBOO_MEMSET_WH(gccachepolicytbl,0,size_cachepolicytbl);
359 // output original cache sampling data for each page
360 void gc_output_cache_sampling() {
361 extern volatile bool gc_profile_flag;
362 if(!gc_profile_flag) return;
363 unsigned int page_index = 0;
365 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) >> (BAMBOO_PAGE_SIZE_BITS);
366 for(page_index = 0; page_index < page_num; page_index++) {
367 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
368 unsigned int block = 0;
369 BLOCKINDEX(block, (void *) page_sva);
370 unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
371 //printf("%x, %d, %d, ",(int)page_sva,page_index,coren);
372 unsigned int * local_tbl = &gccachesamplingtbl[page_index*NUMCORESACTIVE];
374 for(int i = 0; i < NUMCORESACTIVE; i++) {
375 int freq = *local_tbl;
379 //printf("%d, ", freq);
383 printf("%x, %d, %d, ",(int)page_sva,page_index,coren);
384 unsigned int * local_tbl = &gccachesamplingtbl[page_index*NUMCORESACTIVE];
385 for(int i = 0; i < NUMCORESACTIVE; i++) {
386 unsigned int freq = *local_tbl;
388 printf("%u, ", freq);
394 printf("=================\n");
397 // output revised cache sampling data for each page after compaction
398 void gc_output_cache_sampling_r() {
399 extern volatile bool gc_profile_flag;
400 if(!gc_profile_flag) return;
402 unsigned int sumdata[NUMCORESACTIVE][NUMCORESACTIVE];
403 for(int i = 0; i < NUMCORESACTIVE; i++) {
404 for(int j = 0; j < NUMCORESACTIVE; j++) {
408 tprintf("cache sampling_r \n");
409 unsigned int page_index = 0;
411 unsigned int page_num = (BAMBOO_SHARED_MEM_SIZE) >> (BAMBOO_PAGE_SIZE_BITS);
412 for(page_index = 0; page_index < page_num; page_index++) {
413 page_sva = gcbaseva + (BAMBOO_PAGE_SIZE) * page_index;
414 unsigned int block = 0;
415 BLOCKINDEX(block, (void *)page_sva);
416 unsigned int coren = gc_block2core[block%(NUMCORES4GC*2)];
417 //printf("%x, %d, %d, ",(int)page_sva,page_index,coren);
418 int accesscore = 0; // TODO
419 unsigned int * local_tbl = &gccachesamplingtbl_r[page_index*NUMCORESACTIVE];
420 for(int i = 0; i < NUMCORESACTIVE; i++) {
421 unsigned int freq = *local_tbl;
422 //printf("%d, ", freq);
429 printf("%x, %d, %d, ",(int)page_sva,page_index,coren);
430 unsigned int * local_tbl = &gccachesamplingtbl_r[page_index*NUMCORESACTIVE];
431 for(int i = 0; i < NUMCORESACTIVE; i++) {
432 unsigned int freq = *local_tbl;
433 printf("%u, ", freq);
434 sumdata[accesscore-1][i]+=freq;
442 // TODO printout the summary data
443 for(int i = 0; i < NUMCORESACTIVE; i++) {
445 for(int j = 0; j < NUMCORESACTIVE; j++) {
446 printf(" %u ", sumdata[j][i]);
450 printf("=================\n");
452 #endif // GC_CACHE_ADAPT